diff --git a/.clang-tidy b/.clang-tidy index ddd0ee6d911..0400b500e5c 100644 --- a/.clang-tidy +++ b/.clang-tidy @@ -33,6 +33,8 @@ Checks: '-*, performance-no-automatic-move, performance-trivially-destructible, performance-unnecessary-copy-initialization, + performance-noexcept-move-constructor, + performance-move-const-arg, readability-avoid-const-params-in-decls, readability-const-return-type, @@ -206,3 +208,5 @@ CheckOptions: value: CamelCase - key: modernize-loop-convert.UseCxx20ReverseRanges value: false + - key: performance-move-const-arg.CheckTriviallyCopyableMove + value: false diff --git a/.github/codecov.yml b/.github/codecov.yml deleted file mode 100644 index f185c5e2dcc..00000000000 --- a/.github/codecov.yml +++ /dev/null @@ -1,17 +0,0 @@ -codecov: - max_report_age: "off" - strict_yaml_branch: "master" - -ignore: - - "contrib" - - "docs" - - "benchmark" - - "tests" - - "docker" - - "debian" - - "cmake" - -comment: false - -github_checks: - annotations: false diff --git a/.github/workflows/anchore-analysis.yml b/.github/workflows/anchore-analysis.yml deleted file mode 100644 index 9f3f944c696..00000000000 --- a/.github/workflows/anchore-analysis.yml +++ /dev/null @@ -1,43 +0,0 @@ -# This workflow checks out code, performs an Anchore container image -# vulnerability and compliance scan, and integrates the results with -# GitHub Advanced Security code scanning feature. For more information on -# the Anchore scan action usage and parameters, see -# https://github.com/anchore/scan-action. For more information on -# Anchore container image scanning in general, see -# https://docs.anchore.com. - -name: Docker Container Scan (clickhouse-server) - -env: - # Force the stdout and stderr streams to be unbuffered - PYTHONUNBUFFERED: 1 - -"on": - pull_request: - paths: - - docker/server/Dockerfile - - .github/workflows/anchore-analysis.yml - schedule: - - cron: '0 21 * * *' - -jobs: - Anchore-Build-Scan: - runs-on: ubuntu-latest - steps: - - name: Checkout the code - uses: actions/checkout@v2 - - name: Build the Docker image - run: | - cd docker/server - perl -pi -e 's|=\$version||g' Dockerfile - docker build . --file Dockerfile --tag localbuild/testimage:latest - - name: Run the local Anchore scan action itself with GitHub Advanced Security code scanning integration enabled - uses: anchore/scan-action@v2 - id: scan - with: - image: "localbuild/testimage:latest" - acs-report-enable: true - - name: Upload Anchore Scan Report - uses: github/codeql-action/upload-sarif@v1 - with: - sarif_file: ${{ steps.scan.outputs.sarif }} diff --git a/.gitmodules b/.gitmodules index 2a1859d5d26..6c9e66f9cbc 100644 --- a/.gitmodules +++ b/.gitmodules @@ -1,6 +1,6 @@ [submodule "contrib/poco"] path = contrib/poco - url = https://github.com/ClickHouse-Extras/poco.git + url = https://github.com/ClickHouse/poco.git branch = clickhouse [submodule "contrib/zstd"] path = contrib/zstd @@ -10,13 +10,13 @@ url = https://github.com/lz4/lz4.git [submodule "contrib/librdkafka"] path = contrib/librdkafka - url = https://github.com/ClickHouse-Extras/librdkafka.git + url = https://github.com/ClickHouse/librdkafka.git [submodule "contrib/cctz"] path = contrib/cctz - url = https://github.com/ClickHouse-Extras/cctz.git + url = https://github.com/ClickHouse/cctz.git [submodule "contrib/zlib-ng"] path = contrib/zlib-ng - url = https://github.com/ClickHouse-Extras/zlib-ng.git + url = https://github.com/ClickHouse/zlib-ng.git branch = clickhouse-2.0.x [submodule "contrib/googletest"] path = contrib/googletest @@ -32,51 +32,51 @@ url = https://github.com/google/re2.git [submodule "contrib/llvm"] path = contrib/llvm - url = https://github.com/ClickHouse-Extras/llvm + url = https://github.com/ClickHouse/llvm [submodule "contrib/mariadb-connector-c"] path = contrib/mariadb-connector-c - url = https://github.com/ClickHouse-Extras/mariadb-connector-c.git + url = https://github.com/ClickHouse/mariadb-connector-c.git [submodule "contrib/jemalloc"] path = contrib/jemalloc url = https://github.com/jemalloc/jemalloc.git [submodule "contrib/unixodbc"] path = contrib/unixodbc - url = https://github.com/ClickHouse-Extras/UnixODBC.git + url = https://github.com/ClickHouse/UnixODBC.git [submodule "contrib/protobuf"] path = contrib/protobuf - url = https://github.com/ClickHouse-Extras/protobuf.git + url = https://github.com/ClickHouse/protobuf.git branch = v3.13.0.1 [submodule "contrib/boost"] path = contrib/boost - url = https://github.com/ClickHouse-Extras/boost.git + url = https://github.com/ClickHouse/boost.git [submodule "contrib/base64"] path = contrib/base64 - url = https://github.com/ClickHouse-Extras/Turbo-Base64.git + url = https://github.com/ClickHouse/Turbo-Base64.git [submodule "contrib/arrow"] path = contrib/arrow - url = https://github.com/ClickHouse-Extras/arrow.git + url = https://github.com/ClickHouse/arrow.git branch = blessed/release-6.0.1 [submodule "contrib/thrift"] path = contrib/thrift url = https://github.com/apache/thrift.git [submodule "contrib/libhdfs3"] path = contrib/libhdfs3 - url = https://github.com/ClickHouse-Extras/libhdfs3.git + url = https://github.com/ClickHouse/libhdfs3.git [submodule "contrib/libxml2"] path = contrib/libxml2 url = https://github.com/GNOME/libxml2.git [submodule "contrib/libgsasl"] path = contrib/libgsasl - url = https://github.com/ClickHouse-Extras/libgsasl.git + url = https://github.com/ClickHouse/libgsasl.git [submodule "contrib/libcxx"] path = contrib/libcxx - url = https://github.com/ClickHouse-Extras/libcxx.git + url = https://github.com/ClickHouse/libcxx.git [submodule "contrib/libcxxabi"] path = contrib/libcxxabi - url = https://github.com/ClickHouse-Extras/libcxxabi.git + url = https://github.com/ClickHouse/libcxxabi.git [submodule "contrib/snappy"] path = contrib/snappy - url = https://github.com/ClickHouse-Extras/snappy.git + url = https://github.com/ClickHouse/snappy.git [submodule "contrib/cppkafka"] path = contrib/cppkafka url = https://github.com/mfontanini/cppkafka.git @@ -85,95 +85,95 @@ url = https://github.com/google/brotli.git [submodule "contrib/h3"] path = contrib/h3 - url = https://github.com/ClickHouse-Extras/h3 + url = https://github.com/ClickHouse/h3 [submodule "contrib/hyperscan"] path = contrib/hyperscan - url = https://github.com/ClickHouse-Extras/hyperscan.git + url = https://github.com/ClickHouse/hyperscan.git [submodule "contrib/libunwind"] path = contrib/libunwind - url = https://github.com/ClickHouse-Extras/libunwind.git + url = https://github.com/ClickHouse/libunwind.git [submodule "contrib/simdjson"] path = contrib/simdjson url = https://github.com/simdjson/simdjson.git [submodule "contrib/rapidjson"] path = contrib/rapidjson - url = https://github.com/ClickHouse-Extras/rapidjson + url = https://github.com/ClickHouse/rapidjson [submodule "contrib/fastops"] path = contrib/fastops - url = https://github.com/ClickHouse-Extras/fastops + url = https://github.com/ClickHouse/fastops [submodule "contrib/orc"] path = contrib/orc - url = https://github.com/ClickHouse-Extras/orc + url = https://github.com/ClickHouse/orc [submodule "contrib/sparsehash-c11"] path = contrib/sparsehash-c11 url = https://github.com/sparsehash/sparsehash-c11.git [submodule "contrib/grpc"] path = contrib/grpc - url = https://github.com/ClickHouse-Extras/grpc.git + url = https://github.com/ClickHouse/grpc.git branch = v1.33.2 [submodule "contrib/aws"] path = contrib/aws - url = https://github.com/ClickHouse-Extras/aws-sdk-cpp.git + url = https://github.com/ClickHouse/aws-sdk-cpp.git [submodule "aws-c-event-stream"] path = contrib/aws-c-event-stream - url = https://github.com/ClickHouse-Extras/aws-c-event-stream.git + url = https://github.com/ClickHouse/aws-c-event-stream.git [submodule "aws-c-common"] path = contrib/aws-c-common - url = https://github.com/ClickHouse-Extras/aws-c-common.git + url = https://github.com/ClickHouse/aws-c-common.git [submodule "aws-checksums"] path = contrib/aws-checksums - url = https://github.com/ClickHouse-Extras/aws-checksums.git + url = https://github.com/ClickHouse/aws-checksums.git [submodule "contrib/curl"] path = contrib/curl url = https://github.com/curl/curl.git [submodule "contrib/icudata"] path = contrib/icudata - url = https://github.com/ClickHouse-Extras/icudata.git + url = https://github.com/ClickHouse/icudata.git [submodule "contrib/icu"] path = contrib/icu url = https://github.com/unicode-org/icu.git [submodule "contrib/flatbuffers"] path = contrib/flatbuffers - url = https://github.com/ClickHouse-Extras/flatbuffers.git + url = https://github.com/ClickHouse/flatbuffers.git [submodule "contrib/replxx"] path = contrib/replxx - url = https://github.com/ClickHouse-Extras/replxx.git + url = https://github.com/ClickHouse/replxx.git [submodule "contrib/avro"] path = contrib/avro - url = https://github.com/ClickHouse-Extras/avro.git + url = https://github.com/ClickHouse/avro.git ignore = untracked [submodule "contrib/msgpack-c"] path = contrib/msgpack-c url = https://github.com/msgpack/msgpack-c [submodule "contrib/libcpuid"] path = contrib/libcpuid - url = https://github.com/ClickHouse-Extras/libcpuid.git + url = https://github.com/ClickHouse/libcpuid.git [submodule "contrib/openldap"] path = contrib/openldap - url = https://github.com/ClickHouse-Extras/openldap.git + url = https://github.com/ClickHouse/openldap.git [submodule "contrib/AMQP-CPP"] path = contrib/AMQP-CPP - url = https://github.com/ClickHouse-Extras/AMQP-CPP.git + url = https://github.com/ClickHouse/AMQP-CPP.git [submodule "contrib/cassandra"] path = contrib/cassandra - url = https://github.com/ClickHouse-Extras/cpp-driver.git + url = https://github.com/ClickHouse/cpp-driver.git branch = clickhouse [submodule "contrib/libuv"] path = contrib/libuv - url = https://github.com/ClickHouse-Extras/libuv.git + url = https://github.com/ClickHouse/libuv.git branch = clickhouse [submodule "contrib/fmtlib"] path = contrib/fmtlib url = https://github.com/fmtlib/fmt.git [submodule "contrib/sentry-native"] path = contrib/sentry-native - url = https://github.com/ClickHouse-Extras/sentry-native.git + url = https://github.com/ClickHouse/sentry-native.git [submodule "contrib/krb5"] path = contrib/krb5 - url = https://github.com/ClickHouse-Extras/krb5 + url = https://github.com/ClickHouse/krb5 [submodule "contrib/cyrus-sasl"] path = contrib/cyrus-sasl - url = https://github.com/ClickHouse-Extras/cyrus-sasl + url = https://github.com/ClickHouse/cyrus-sasl branch = cyrus-sasl-2.1 [submodule "contrib/croaring"] path = contrib/croaring @@ -184,7 +184,7 @@ url = https://github.com/danlark1/miniselect [submodule "contrib/rocksdb"] path = contrib/rocksdb - url = https://github.com/ClickHouse-Extras/rocksdb.git + url = https://github.com/ClickHouse/rocksdb.git [submodule "contrib/xz"] path = contrib/xz url = https://github.com/xz-mirror/xz @@ -194,53 +194,53 @@ branch = lts_2021_11_02 [submodule "contrib/dragonbox"] path = contrib/dragonbox - url = https://github.com/ClickHouse-Extras/dragonbox.git + url = https://github.com/ClickHouse/dragonbox.git [submodule "contrib/fast_float"] path = contrib/fast_float url = https://github.com/fastfloat/fast_float [submodule "contrib/libpq"] path = contrib/libpq - url = https://github.com/ClickHouse-Extras/libpq + url = https://github.com/ClickHouse/libpq [submodule "contrib/boringssl"] path = contrib/boringssl - url = https://github.com/ClickHouse-Extras/boringssl.git + url = https://github.com/ClickHouse/boringssl.git branch = MergeWithUpstream [submodule "contrib/NuRaft"] path = contrib/NuRaft - url = https://github.com/ClickHouse-Extras/NuRaft.git + url = https://github.com/ClickHouse/NuRaft.git [submodule "contrib/nanodbc"] path = contrib/nanodbc - url = https://github.com/ClickHouse-Extras/nanodbc.git + url = https://github.com/ClickHouse/nanodbc.git [submodule "contrib/datasketches-cpp"] path = contrib/datasketches-cpp - url = https://github.com/ClickHouse-Extras/datasketches-cpp.git + url = https://github.com/ClickHouse/datasketches-cpp.git [submodule "contrib/yaml-cpp"] path = contrib/yaml-cpp - url = https://github.com/ClickHouse-Extras/yaml-cpp.git + url = https://github.com/ClickHouse/yaml-cpp.git [submodule "contrib/cld2"] path = contrib/cld2 - url = https://github.com/ClickHouse-Extras/cld2.git + url = https://github.com/ClickHouse/cld2.git [submodule "contrib/libstemmer_c"] path = contrib/libstemmer_c - url = https://github.com/ClickHouse-Extras/libstemmer_c.git + url = https://github.com/ClickHouse/libstemmer_c.git [submodule "contrib/wordnet-blast"] path = contrib/wordnet-blast - url = https://github.com/ClickHouse-Extras/wordnet-blast.git + url = https://github.com/ClickHouse/wordnet-blast.git [submodule "contrib/lemmagen-c"] path = contrib/lemmagen-c - url = https://github.com/ClickHouse-Extras/lemmagen-c.git + url = https://github.com/ClickHouse/lemmagen-c.git [submodule "contrib/libpqxx"] path = contrib/libpqxx - url = https://github.com/ClickHouse-Extras/libpqxx.git + url = https://github.com/ClickHouse/libpqxx.git [submodule "contrib/sqlite-amalgamation"] path = contrib/sqlite-amalgamation url = https://github.com/azadkuh/sqlite-amalgamation [submodule "contrib/s2geometry"] path = contrib/s2geometry - url = https://github.com/ClickHouse-Extras/s2geometry.git + url = https://github.com/ClickHouse/s2geometry.git [submodule "contrib/bzip2"] path = contrib/bzip2 - url = https://github.com/ClickHouse-Extras/bzip2.git + url = https://github.com/ClickHouse/bzip2.git [submodule "contrib/magic_enum"] path = contrib/magic_enum url = https://github.com/Neargye/magic_enum @@ -249,16 +249,16 @@ url = https://github.com/google/libprotobuf-mutator [submodule "contrib/sysroot"] path = contrib/sysroot - url = https://github.com/ClickHouse-Extras/sysroot.git + url = https://github.com/ClickHouse/sysroot.git [submodule "contrib/nlp-data"] path = contrib/nlp-data - url = https://github.com/ClickHouse-Extras/nlp-data.git + url = https://github.com/ClickHouse/nlp-data.git [submodule "contrib/hive-metastore"] path = contrib/hive-metastore - url = https://github.com/ClickHouse-Extras/hive-metastore + url = https://github.com/ClickHouse/hive-metastore [submodule "contrib/azure"] path = contrib/azure - url = https://github.com/ClickHouse-Extras/azure-sdk-for-cpp.git + url = https://github.com/ClickHouse/azure-sdk-for-cpp.git [submodule "contrib/minizip-ng"] path = contrib/minizip-ng url = https://github.com/zlib-ng/minizip-ng diff --git a/CMakeLists.txt b/CMakeLists.txt index c0b6604c8f9..9649fc32d74 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -248,7 +248,9 @@ endif() if (CMAKE_BUILD_TYPE_UC STREQUAL "DEBUG") set(USE_DEBUG_HELPERS ON) endif() + option(USE_DEBUG_HELPERS "Enable debug helpers" ${USE_DEBUG_HELPERS}) +option(BUILD_STANDALONE_KEEPER "Build keeper as small standalone binary" OFF) # Create BuildID when using lld. For other linkers it is created by default. if (LINKER_NAME MATCHES "lld$") @@ -263,6 +265,11 @@ if (OBJCOPY_PATH AND YANDEX_OFFICIAL_BUILD AND (NOT CMAKE_TOOLCHAIN_FILE)) set (USE_BINARY_HASH 1) endif () +# Allows to build stripped binary in a separate directory +if (OBJCOPY_PATH AND READELF_PATH) + set(BUILD_STRIPPED_BINARIES_PREFIX "" CACHE STRING "Build stripped binaries with debug info in separate directory") +endif() + cmake_host_system_information(RESULT AVAILABLE_PHYSICAL_MEMORY QUERY AVAILABLE_PHYSICAL_MEMORY) # Not available under freebsd @@ -285,8 +292,13 @@ include(cmake/cpu_features.cmake) set (COMPILER_FLAGS "${COMPILER_FLAGS} -fasynchronous-unwind-tables") # Reproducible builds -set (COMPILER_FLAGS "${COMPILER_FLAGS} -ffile-prefix-map=${CMAKE_SOURCE_DIR}=.") -set (CMAKE_ASM_FLAGS "${CMAKE_ASM_FLAGS} -ffile-prefix-map=${CMAKE_SOURCE_DIR}=.") +# If turned `ON`, remap file source paths in debug info, predefined preprocessor macros and __builtin_FILE(). +option(ENABLE_BUILD_PATH_MAPPING "Enable remap file source paths in debug info, predefined preprocessor macros and __builtin_FILE(). It's to generate reproducible builds. See https://reproducible-builds.org/docs/build-path" ON) + +if (ENABLE_BUILD_PATH_MAPPING) + set (COMPILER_FLAGS "${COMPILER_FLAGS} -ffile-prefix-map=${CMAKE_SOURCE_DIR}=.") + set (CMAKE_ASM_FLAGS "${CMAKE_ASM_FLAGS} -ffile-prefix-map=${CMAKE_SOURCE_DIR}=.") +endif() if (${CMAKE_VERSION} VERSION_LESS "3.12.4") # CMake < 3.12 doesn't support setting 20 as a C++ standard version. diff --git a/README.md b/README.md index f433b457861..e6dc9f1e6fc 100644 --- a/README.md +++ b/README.md @@ -10,6 +10,6 @@ ClickHouse® is an open-source column-oriented database management system that a * [YouTube channel](https://www.youtube.com/c/ClickHouseDB) has a lot of content about ClickHouse in video format. * [Slack](https://join.slack.com/t/clickhousedb/shared_invite/zt-rxm3rdrk-lIUmhLC3V8WTaL0TGxsOmg) and [Telegram](https://telegram.me/clickhouse_en) allow chatting with ClickHouse users in real-time. * [Blog](https://clickhouse.com/blog/en/) contains various ClickHouse-related articles, as well as announcements and reports about events. -* [Code Browser (Woboq)](https://clickhouse.com/codebrowser/html_report/ClickHouse/index.html) with syntax highlight and navigation. +* [Code Browser (Woboq)](https://clickhouse.com/codebrowser/ClickHouse/index.html) with syntax highlight and navigation. * [Code Browser (github.dev)](https://github.dev/ClickHouse/ClickHouse) with syntax highlight, powered by github.dev. * [Contacts](https://clickhouse.com/company/#contact) can help to get your questions answered if there are any. diff --git a/base/base/CMakeLists.txt b/base/base/CMakeLists.txt index 3e6f174c6dc..8a1ca6064cb 100644 --- a/base/base/CMakeLists.txt +++ b/base/base/CMakeLists.txt @@ -17,6 +17,7 @@ set (SRCS terminalColors.cpp errnoToString.cpp StringRef.cpp + safeExit.cpp ) if (ENABLE_REPLXX) diff --git a/base/base/StringRef.h b/base/base/StringRef.h index eefc87121fc..f300a2d63df 100644 --- a/base/base/StringRef.h +++ b/base/base/StringRef.h @@ -19,6 +19,12 @@ #if defined(__SSE4_2__) #include #include + #define CRC_INT _mm_crc32_u64 +#endif + +#if defined(__aarch64__) && defined(__ARM_FEATURE_CRC32) + #include + #define CRC_INT __crc32cd #endif @@ -40,9 +46,9 @@ struct StringRef constexpr StringRef(const char * data_, size_t size_) : data(data_), size(size_) {} - StringRef(const std::string & s) : data(s.data()), size(s.size()) {} + StringRef(const std::string & s) : data(s.data()), size(s.size()) {} /// NOLINT constexpr explicit StringRef(std::string_view s) : data(s.data()), size(s.size()) {} - constexpr StringRef(const char * data_) : StringRef(std::string_view{data_}) {} + constexpr StringRef(const char * data_) : StringRef(std::string_view{data_}) {} /// NOLINT constexpr StringRef() = default; std::string toString() const { return std::string(data, size); } @@ -205,7 +211,7 @@ struct StringRefHash64 } }; -#if defined(__SSE4_2__) +#if defined(CRC_INT) /// Parts are taken from CityHash. @@ -281,13 +287,13 @@ struct CRC32Hash do { UInt64 word = unalignedLoad(pos); - res = _mm_crc32_u64(res, word); + res = CRC_INT(res, word); pos += 8; } while (pos + 8 < end); UInt64 word = unalignedLoad(end - 8); /// I'm not sure if this is normal. - res = _mm_crc32_u64(res, word); + res = CRC_INT(res, word); return res; } diff --git a/base/base/insertAtEnd.h b/base/base/insertAtEnd.h index c4fef664511..abb2aa7d563 100644 --- a/base/base/insertAtEnd.h +++ b/base/base/insertAtEnd.h @@ -26,3 +26,27 @@ void insertAtEnd(std::vector & dest, std::vector && src) dest.insert(dest.end(), std::make_move_iterator(src.begin()), std::make_move_iterator(src.end())); src.clear(); } + +template +void insertAtEnd(Container & dest, const Container & src) +{ + if (src.empty()) + return; + + dest.insert(dest.end(), src.begin(), src.end()); +} + +template +void insertAtEnd(Container & dest, Container && src) +{ + if (src.empty()) + return; + if (dest.empty()) + { + dest.swap(src); + return; + } + + dest.insert(dest.end(), std::make_move_iterator(src.begin()), std::make_move_iterator(src.end())); + src.clear(); +} diff --git a/base/base/safeExit.cpp b/base/base/safeExit.cpp new file mode 100644 index 00000000000..4ccfee80643 --- /dev/null +++ b/base/base/safeExit.cpp @@ -0,0 +1,18 @@ +#if defined(OS_LINUX) +# include +#endif +#include +#include +#include + +[[noreturn]] void safeExit(int code) +{ +#if defined(THREAD_SANITIZER) && defined(OS_LINUX) + /// Thread sanitizer tries to do something on exit that we don't need if we want to exit immediately, + /// while connection handling threads are still run. + (void)syscall(SYS_exit_group, code); + __builtin_unreachable(); +#else + _exit(code); +#endif +} diff --git a/base/base/safeExit.h b/base/base/safeExit.h new file mode 100644 index 00000000000..f999ccfac18 --- /dev/null +++ b/base/base/safeExit.h @@ -0,0 +1,4 @@ +#pragma once + +/// _exit() with a workaround for TSan. +[[noreturn]] void safeExit(int code); diff --git a/base/daemon/SentryWriter.cpp b/base/daemon/SentryWriter.cpp index 6996b63b5dd..0260c6380f4 100644 --- a/base/daemon/SentryWriter.cpp +++ b/base/daemon/SentryWriter.cpp @@ -18,7 +18,7 @@ #include "Common/config_version.h" #include -#if USE_SENTRY +#if USE_SENTRY && !defined(KEEPER_STANDALONE_BUILD) # include # include diff --git a/base/loggers/CMakeLists.txt b/base/loggers/CMakeLists.txt index 22be002e069..148c4f84f68 100644 --- a/base/loggers/CMakeLists.txt +++ b/base/loggers/CMakeLists.txt @@ -1,5 +1,13 @@ include("${ClickHouse_SOURCE_DIR}/cmake/dbms_glob_sources.cmake") add_headers_and_sources(loggers .) + +# Standard version depends on DBMS and works with text log add_library(loggers ${loggers_sources} ${loggers_headers}) +target_compile_definitions(loggers PUBLIC WITH_TEXT_LOG=1) target_link_libraries(loggers PRIVATE dbms clickhouse_common_io) target_include_directories(loggers PUBLIC ..) + +# Lightweight version doesn't work with textlog and also doesn't depend on DBMS +add_library(loggers_no_text_log ${loggers_sources} ${loggers_headers}) +target_link_libraries(loggers_no_text_log PRIVATE clickhouse_common_io) +target_include_directories(loggers PUBLIC ..) diff --git a/base/loggers/Loggers.cpp b/base/loggers/Loggers.cpp index 2f2eadea28f..7c627ad2272 100644 --- a/base/loggers/Loggers.cpp +++ b/base/loggers/Loggers.cpp @@ -9,7 +9,11 @@ #include #include #include -#include + +#ifdef WITH_TEXT_LOG + #include +#endif + #include namespace fs = std::filesystem; @@ -30,17 +34,21 @@ static std::string createDirectory(const std::string & file) return path; }; +#ifdef WITH_TEXT_LOG void Loggers::setTextLog(std::shared_ptr log, int max_priority) { text_log = log; text_log_max_priority = max_priority; } +#endif void Loggers::buildLoggers(Poco::Util::AbstractConfiguration & config, Poco::Logger & logger /*_root*/, const std::string & cmd_name) { +#ifdef WITH_TEXT_LOG if (split) if (auto log = text_log.lock()) split->addTextLog(log, text_log_max_priority); +#endif auto current_logger = config.getString("logger", ""); if (config_logger == current_logger) //-V1051 diff --git a/base/loggers/Loggers.h b/base/loggers/Loggers.h index a859c32fa89..22b2b5e2c69 100644 --- a/base/loggers/Loggers.h +++ b/base/loggers/Loggers.h @@ -7,10 +7,12 @@ #include #include "OwnSplitChannel.h" +#ifdef WITH_TEXT_LOG namespace DB { class TextLog; } +#endif namespace Poco::Util { @@ -27,7 +29,9 @@ public: /// Close log files. On next log write files will be reopened. void closeLogs(Poco::Logger & logger); +#ifdef WITH_TEXT_LOG void setTextLog(std::shared_ptr log, int max_priority); +#endif private: Poco::AutoPtr log_file; @@ -37,8 +41,10 @@ private: /// Previous value of logger element in config. It is used to reinitialize loggers whenever the value changed. std::string config_logger; +#ifdef WITH_TEXT_LOG std::weak_ptr text_log; int text_log_max_priority = -1; +#endif Poco::AutoPtr split; }; diff --git a/base/loggers/OwnSplitChannel.cpp b/base/loggers/OwnSplitChannel.cpp index 2267b8f425d..b255d89f124 100644 --- a/base/loggers/OwnSplitChannel.cpp +++ b/base/loggers/OwnSplitChannel.cpp @@ -20,10 +20,13 @@ namespace DB { void OwnSplitChannel::log(const Poco::Message & msg) { + +#ifdef WITH_TEXT_LOG auto logs_queue = CurrentThread::getInternalTextLogsQueue(); if (channels.empty() && (logs_queue == nullptr || msg.getPriority() > logs_queue->max_priority)) return; +#endif if (auto * masker = SensitiveDataMasker::getInstance()) { @@ -86,6 +89,7 @@ void OwnSplitChannel::logSplit(const Poco::Message & msg) channel.first->log(msg); // ordinary child } +#ifdef WITH_TEXT_LOG auto logs_queue = CurrentThread::getInternalTextLogsQueue(); /// Log to "TCP queue" if message is not too noisy @@ -137,6 +141,7 @@ void OwnSplitChannel::logSplit(const Poco::Message & msg) if (text_log_locked) text_log_locked->add(elem); } +#endif } @@ -145,12 +150,14 @@ void OwnSplitChannel::addChannel(Poco::AutoPtr channel, const std channels.emplace(name, ExtendedChannelPtrPair(std::move(channel), dynamic_cast(channel.get()))); } +#ifdef WITH_TEXT_LOG void OwnSplitChannel::addTextLog(std::shared_ptr log, int max_priority) { std::lock_guard lock(text_log_mutex); text_log = log; text_log_max_priority.store(max_priority, std::memory_order_relaxed); } +#endif void OwnSplitChannel::setLevel(const std::string & name, int level) { diff --git a/base/loggers/OwnSplitChannel.h b/base/loggers/OwnSplitChannel.h index 364a6346ede..72027f66afd 100644 --- a/base/loggers/OwnSplitChannel.h +++ b/base/loggers/OwnSplitChannel.h @@ -7,10 +7,12 @@ #include #include "ExtendedLogChannel.h" +#ifdef WITH_TEXT_LOG namespace DB { class TextLog; } +#endif namespace DB { @@ -25,7 +27,9 @@ public: /// Adds a child channel void addChannel(Poco::AutoPtr channel, const std::string & name); +#ifdef WITH_TEXT_LOG void addTextLog(std::shared_ptr log, int max_priority); +#endif void setLevel(const std::string & name, int level); @@ -40,8 +44,10 @@ private: std::mutex text_log_mutex; +#ifdef WITH_TEXT_LOG std::weak_ptr text_log; std::atomic text_log_max_priority = -1; +#endif }; } diff --git a/benchmark/hardware.sh b/benchmark/hardware.sh index 69e05cf804b..f6206d0257c 100755 --- a/benchmark/hardware.sh +++ b/benchmark/hardware.sh @@ -11,10 +11,6 @@ DATASET="${TABLE}_v1.tar.xz" QUERIES_FILE="queries.sql" TRIES=3 -AMD64_BIN_URL="https://builds.clickhouse.com/master/amd64/clickhouse" -AARCH64_BIN_URL="https://builds.clickhouse.com/master/aarch64/clickhouse" -POWERPC64_BIN_URL="https://builds.clickhouse.com/master/ppc64le/clickhouse" - # Note: on older Ubuntu versions, 'axel' does not support IPv6. If you are using IPv6-only servers on very old Ubuntu, just don't install 'axel'. FASTER_DOWNLOAD=wget @@ -33,20 +29,60 @@ fi mkdir -p clickhouse-benchmark-$SCALE pushd clickhouse-benchmark-$SCALE -if [[ ! -f clickhouse ]]; then - CPU=$(uname -m) - if [[ ($CPU == x86_64) || ($CPU == amd64) ]]; then - $FASTER_DOWNLOAD "$AMD64_BIN_URL" - elif [[ $CPU == aarch64 ]]; then - $FASTER_DOWNLOAD "$AARCH64_BIN_URL" - elif [[ $CPU == powerpc64le ]]; then - $FASTER_DOWNLOAD "$POWERPC64_BIN_URL" - else - echo "Unsupported CPU type: $CPU" - exit 1 +OS=$(uname -s) +ARCH=$(uname -m) + +DIR= + +if [ "${OS}" = "Linux" ] +then + if [ "${ARCH}" = "x86_64" ] + then + DIR="amd64" + elif [ "${ARCH}" = "aarch64" ] + then + DIR="aarch64" + elif [ "${ARCH}" = "powerpc64le" ] + then + DIR="powerpc64le" + fi +elif [ "${OS}" = "FreeBSD" ] +then + if [ "${ARCH}" = "x86_64" ] + then + DIR="freebsd" + elif [ "${ARCH}" = "aarch64" ] + then + DIR="freebsd-aarch64" + elif [ "${ARCH}" = "powerpc64le" ] + then + DIR="freebsd-powerpc64le" + fi +elif [ "${OS}" = "Darwin" ] +then + if [ "${ARCH}" = "x86_64" ] + then + DIR="macos" + elif [ "${ARCH}" = "aarch64" -o "${ARCH}" = "arm64" ] + then + DIR="macos-aarch64" fi fi +if [ -z "${DIR}" ] +then + echo "The '${OS}' operating system with the '${ARCH}' architecture is not supported." + exit 1 +fi + +URL="https://builds.clickhouse.com/master/${DIR}/clickhouse" +echo +echo "Will download ${URL}" +echo +curl -O "${URL}" && chmod a+x clickhouse || exit 1 +echo +echo "Successfully downloaded the ClickHouse binary" + chmod a+x clickhouse if [[ ! -f $QUERIES_FILE ]]; then @@ -88,7 +124,12 @@ echo cat "$QUERIES_FILE" | sed "s/{table}/${TABLE}/g" | while read query; do sync - echo 3 | sudo tee /proc/sys/vm/drop_caches >/dev/null + if [ "${OS}" = "Darwin" ] + then + sudo purge > /dev/null + else + echo 3 | sudo tee /proc/sys/vm/drop_caches >/dev/null + fi echo -n "[" for i in $(seq 1 $TRIES); do @@ -104,27 +145,45 @@ echo echo "Benchmark complete. System info:" echo -echo '----Version, build id-----------' -./clickhouse local --query "SELECT format('Version: {}, build id: {}', version(), buildId())" -./clickhouse local --query "SELECT format('The number of threads is: {}', value) FROM system.settings WHERE name = 'max_threads'" --output-format TSVRaw -./clickhouse local --query "SELECT format('Current time: {}', toString(now(), 'UTC'))" -echo '----CPU-------------------------' -cat /proc/cpuinfo | grep -i -F 'model name' | uniq -lscpu -echo '----Block Devices---------------' -lsblk -echo '----Disk Free and Total--------' -df -h . -echo '----Memory Free and Total-------' -free -h -echo '----Physical Memory Amount------' -cat /proc/meminfo | grep MemTotal -echo '----RAID Info-------------------' -cat /proc/mdstat -#echo '----PCI-------------------------' -#lspci -#echo '----All Hardware Info-----------' -#lshw -echo '--------------------------------' - +if [ "${OS}" = "Darwin" ] +then + echo '----Version, build id-----------' + ./clickhouse local --query "SELECT format('Version: {}', version())" + sw_vers | grep BuildVersion + ./clickhouse local --query "SELECT format('The number of threads is: {}', value) FROM system.settings WHERE name = 'max_threads'" --output-format TSVRaw + ./clickhouse local --query "SELECT format('Current time: {}', toString(now(), 'UTC'))" + echo '----CPU-------------------------' + sysctl hw.model + sysctl -a | grep -E 'hw.activecpu|hw.memsize|hw.byteorder|cachesize' + echo '----Disk Free and Total--------' + df -h . + echo '----Memory Free and Total-------' + vm_stat + echo '----Physical Memory Amount------' + ls -l /var/vm + echo '--------------------------------' +else + echo '----Version, build id-----------' + ./clickhouse local --query "SELECT format('Version: {}, build id: {}', version(), buildId())" + ./clickhouse local --query "SELECT format('The number of threads is: {}', value) FROM system.settings WHERE name = 'max_threads'" --output-format TSVRaw + ./clickhouse local --query "SELECT format('Current time: {}', toString(now(), 'UTC'))" + echo '----CPU-------------------------' + cat /proc/cpuinfo | grep -i -F 'model name' | uniq + lscpu + echo '----Block Devices---------------' + lsblk + echo '----Disk Free and Total--------' + df -h . + echo '----Memory Free and Total-------' + free -h + echo '----Physical Memory Amount------' + cat /proc/meminfo | grep MemTotal + echo '----RAID Info-------------------' + cat /proc/mdstat + #echo '----PCI-------------------------' + #lspci + #echo '----All Hardware Info-----------' + #lshw + echo '--------------------------------' +fi echo diff --git a/cmake/limit_jobs.cmake b/cmake/limit_jobs.cmake index b85260e6c76..96c6b75bc43 100644 --- a/cmake/limit_jobs.cmake +++ b/cmake/limit_jobs.cmake @@ -55,5 +55,5 @@ endif () if (PARALLEL_COMPILE_JOBS OR PARALLEL_LINK_JOBS) message(STATUS "${CMAKE_CURRENT_SOURCE_DIR}: Have ${AVAILABLE_PHYSICAL_MEMORY} megabytes of memory. - Limiting concurrent linkers jobs to ${PARALLEL_LINK_JOBS} and compiler jobs to ${PARALLEL_COMPILE_JOBS}") + Limiting concurrent linkers jobs to ${PARALLEL_LINK_JOBS} and compiler jobs to ${PARALLEL_COMPILE_JOBS} (system has ${NUMBER_OF_LOGICAL_CORES} logical cores)") endif () diff --git a/cmake/strip.sh b/cmake/strip.sh new file mode 100755 index 00000000000..de596887159 --- /dev/null +++ b/cmake/strip.sh @@ -0,0 +1,25 @@ +#!/usr/bin/env bash + +BINARY_PATH=$1 +BINARY_NAME=$(basename $BINARY_PATH) +DESTINATION_STRIPPED_DIR=$2 +OBJCOPY_PATH=${3:objcopy} +READELF_PATH=${4:readelf} + +BUILD_ID=$($READELF_PATH -n $1 | sed -n '/Build ID/ { s/.*: //p; q; }') +BUILD_ID_PREFIX=${BUILD_ID:0:2} +BUILD_ID_SUFFIX=${BUILD_ID:2} +TEMP_BINARY_PATH="${BINARY_PATH}_temp" + +DESTINATION_DEBUG_INFO_DIR="$DESTINATION_STRIPPED_DIR/lib/debug/.build-id" +DESTINATION_STRIP_BINARY_DIR="$DESTINATION_STRIPPED_DIR/bin" + +mkdir -p "$DESTINATION_DEBUG_INFO_DIR/$BUILD_ID_PREFIX" +mkdir -p "$DESTINATION_STRIP_BINARY_DIR" + +$OBJCOPY_PATH --only-keep-debug "$BINARY_PATH" "$DESTINATION_DEBUG_INFO_DIR/$BUILD_ID_PREFIX/$BUILD_ID_SUFFIX.debug" + +touch "$TEMP_BINARY_PATH" +$OBJCOPY_PATH --add-gnu-debuglink "$DESTINATION_DEBUG_INFO_DIR/$BUILD_ID_PREFIX/$BUILD_ID_SUFFIX.debug" "$BINARY_PATH" "$TEMP_BINARY_PATH" +$OBJCOPY_PATH --strip-all "$TEMP_BINARY_PATH" "$DESTINATION_STRIP_BINARY_DIR/$BINARY_NAME" +rm -f "$TEMP_BINARY_PATH" diff --git a/cmake/strip_binary.cmake b/cmake/strip_binary.cmake new file mode 100644 index 00000000000..e430807772d --- /dev/null +++ b/cmake/strip_binary.cmake @@ -0,0 +1,26 @@ +macro(clickhouse_strip_binary) + set(oneValueArgs TARGET DESTINATION_DIR BINARY_PATH) + + cmake_parse_arguments(STRIP "" "${oneValueArgs}" "" ${ARGN}) + + if (NOT DEFINED STRIP_TARGET) + message(FATAL_ERROR "A target name must be provided for stripping binary") + endif() + + if (NOT DEFINED STRIP_BINARY_PATH) + message(FATAL_ERROR "A binary path name must be provided for stripping binary") + endif() + + + if (NOT DEFINED STRIP_DESTINATION_DIR) + message(FATAL_ERROR "Destination directory for stripped binary must be provided") + endif() + + add_custom_command(TARGET ${STRIP_TARGET} POST_BUILD + COMMAND bash ${ClickHouse_SOURCE_DIR}/cmake/strip.sh ${STRIP_BINARY_PATH} ${STRIP_DESTINATION_DIR} ${OBJCOPY_PATH} ${READELF_PATH} + COMMENT "Stripping clickhouse binary" VERBATIM + ) + + install(PROGRAMS ${STRIP_DESTINATION_DIR}/bin/${STRIP_TARGET} DESTINATION ${CMAKE_INSTALL_BINDIR} COMPONENT clickhouse) + install(DIRECTORY ${STRIP_DESTINATION_DIR}/lib/debug DESTINATION ${CMAKE_INSTALL_LIBDIR} COMPONENT clickhouse) +endmacro() diff --git a/cmake/tools.cmake b/cmake/tools.cmake index 69a37304f58..d6fddd0509e 100644 --- a/cmake/tools.cmake +++ b/cmake/tools.cmake @@ -169,3 +169,33 @@ if (OBJCOPY_PATH) else () message (FATAL_ERROR "Cannot find objcopy.") endif () + +# Readelf (FIXME copypaste) + +if (COMPILER_GCC) + find_program (READELF_PATH NAMES "llvm-readelf" "llvm-readelf-13" "llvm-readelf-12" "llvm-readelf-11" "readelf") +else () + find_program (READELF_PATH NAMES "llvm-readelf-${COMPILER_VERSION_MAJOR}" "llvm-readelf" "readelf") +endif () + +if (NOT READELF_PATH AND OS_DARWIN) + find_program (BREW_PATH NAMES "brew") + if (BREW_PATH) + execute_process (COMMAND ${BREW_PATH} --prefix llvm ERROR_QUIET OUTPUT_STRIP_TRAILING_WHITESPACE OUTPUT_VARIABLE LLVM_PREFIX) + if (LLVM_PREFIX) + find_program (READELF_PATH NAMES "llvm-readelf" PATHS "${LLVM_PREFIX}/bin" NO_DEFAULT_PATH) + endif () + if (NOT READELF_PATH) + execute_process (COMMAND ${BREW_PATH} --prefix binutils ERROR_QUIET OUTPUT_STRIP_TRAILING_WHITESPACE OUTPUT_VARIABLE BINUTILS_PREFIX) + if (BINUTILS_PREFIX) + find_program (READELF_PATH NAMES "readelf" PATHS "${BINUTILS_PREFIX}/bin" NO_DEFAULT_PATH) + endif () + endif () + endif () +endif () + +if (READELF_PATH) + message (STATUS "Using readelf: ${READELF_PATH}") +else () + message (FATAL_ERROR "Cannot find readelf.") +endif () diff --git a/contrib/icu b/contrib/icu index faa2f9f9e1f..a56dde820dc 160000 --- a/contrib/icu +++ b/contrib/icu @@ -1 +1 @@ -Subproject commit faa2f9f9e1fe74c5ed00eba371d2830134cdbea1 +Subproject commit a56dde820dc35665a66f2e9ee8ba58e75049b668 diff --git a/contrib/icu-cmake/CMakeLists.txt b/contrib/icu-cmake/CMakeLists.txt index ae19ef20e38..9c34228e2a0 100644 --- a/contrib/icu-cmake/CMakeLists.txt +++ b/contrib/icu-cmake/CMakeLists.txt @@ -212,7 +212,9 @@ set(ICUUC_SOURCES "${ICU_SOURCE_DIR}/common/ubiditransform.cpp" "${ICU_SOURCE_DIR}/common/pluralmap.cpp" "${ICU_SOURCE_DIR}/common/static_unicode_sets.cpp" -"${ICU_SOURCE_DIR}/common/restrace.cpp") +"${ICU_SOURCE_DIR}/common/restrace.cpp" +"${ICU_SOURCE_DIR}/common/emojiprops.cpp" +"${ICU_SOURCE_DIR}/common/lstmbe.cpp") set(ICUI18N_SOURCES "${ICU_SOURCE_DIR}/i18n/ucln_in.cpp" @@ -398,7 +400,6 @@ set(ICUI18N_SOURCES "${ICU_SOURCE_DIR}/i18n/sharedbreakiterator.cpp" "${ICU_SOURCE_DIR}/i18n/scientificnumberformatter.cpp" "${ICU_SOURCE_DIR}/i18n/dayperiodrules.cpp" -"${ICU_SOURCE_DIR}/i18n/nounit.cpp" "${ICU_SOURCE_DIR}/i18n/number_affixutils.cpp" "${ICU_SOURCE_DIR}/i18n/number_compact.cpp" "${ICU_SOURCE_DIR}/i18n/number_decimalquantity.cpp" @@ -446,12 +447,21 @@ set(ICUI18N_SOURCES "${ICU_SOURCE_DIR}/i18n/formattedvalue.cpp" "${ICU_SOURCE_DIR}/i18n/formattedval_iterimpl.cpp" "${ICU_SOURCE_DIR}/i18n/formattedval_sbimpl.cpp" -"${ICU_SOURCE_DIR}/i18n/formatted_string_builder.cpp") +"${ICU_SOURCE_DIR}/i18n/formatted_string_builder.cpp" +"${ICU_SOURCE_DIR}/i18n/measunit_extra.cpp" +"${ICU_SOURCE_DIR}/i18n/number_symbolswrapper.cpp" +"${ICU_SOURCE_DIR}/i18n/number_usageprefs.cpp" +"${ICU_SOURCE_DIR}/i18n/numrange_capi.cpp" +"${ICU_SOURCE_DIR}/i18n/pluralranges.cpp" +"${ICU_SOURCE_DIR}/i18n/units_complexconverter.cpp" +"${ICU_SOURCE_DIR}/i18n/units_converter.cpp" +"${ICU_SOURCE_DIR}/i18n/units_data.cpp" +"${ICU_SOURCE_DIR}/i18n/units_router.cpp") file(GENERATE OUTPUT "${CMAKE_CURRENT_BINARY_DIR}/empty.cpp" CONTENT " ") enable_language(ASM) set(ICUDATA_SOURCES - "${ICUDATA_SOURCE_DIR}/icudt66l_dat.S" + "${ICUDATA_SOURCE_DIR}/icudt70l_dat.S" "${CMAKE_CURRENT_BINARY_DIR}/empty.cpp" # Without this cmake can incorrectly detects library type (OBJECT) instead of SHARED/STATIC ) diff --git a/contrib/icudata b/contrib/icudata index f020820388e..72d9a4a7feb 160000 --- a/contrib/icudata +++ b/contrib/icudata @@ -1 +1 @@ -Subproject commit f020820388e3faafb44cc643574a2d563dfde572 +Subproject commit 72d9a4a7febc904e2b0a534ccb25ae40fac5f1e5 diff --git a/contrib/jemalloc b/contrib/jemalloc index ca709c3139f..78b58379c85 160000 --- a/contrib/jemalloc +++ b/contrib/jemalloc @@ -1 +1 @@ -Subproject commit ca709c3139f77f4c00a903cdee46d71e9028f6c6 +Subproject commit 78b58379c854a639df79beb3289351129d863d4b diff --git a/contrib/jemalloc-cmake/include/jemalloc/jemalloc.h b/contrib/jemalloc-cmake/include/jemalloc/jemalloc.h index d06243c5239..64c4f4956b6 100644 --- a/contrib/jemalloc-cmake/include/jemalloc/jemalloc.h +++ b/contrib/jemalloc-cmake/include/jemalloc/jemalloc.h @@ -4,12 +4,21 @@ extern "C" { #endif +#if !defined(__clang__) +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wredundant-decls" +#endif + #include #include #include #include #include +#if !defined(__clang__) +#pragma GCC diagnostic pop +#endif + #ifdef __cplusplus } #endif diff --git a/contrib/llvm-cmake/CMakeLists.txt b/contrib/llvm-cmake/CMakeLists.txt index e859df65b6f..6ff07f0e016 100644 --- a/contrib/llvm-cmake/CMakeLists.txt +++ b/contrib/llvm-cmake/CMakeLists.txt @@ -1,4 +1,8 @@ -if (APPLE OR NOT ARCH_AMD64 OR SANITIZE STREQUAL "undefined") +# During cross-compilation in our CI we have to use llvm-tblgen and other building tools +# tools to be build for host architecture and everything else for target architecture (e.g. AArch64) +# Possible workaround is to use llvm-tblgen from some package... +# But lets just enable LLVM for native builds +if (CMAKE_CROSSCOMPILING OR SANITIZE STREQUAL "undefined") set (ENABLE_EMBEDDED_COMPILER_DEFAULT OFF) else() set (ENABLE_EMBEDDED_COMPILER_DEFAULT ON) @@ -22,9 +26,6 @@ set (LLVM_LIBRARY_DIRS "${ClickHouse_BINARY_DIR}/contrib/llvm/llvm") set (REQUIRED_LLVM_LIBRARIES LLVMExecutionEngine LLVMRuntimeDyld - LLVMX86CodeGen - LLVMX86Desc - LLVMX86Info LLVMAsmPrinter LLVMDebugInfoDWARF LLVMGlobalISel @@ -56,6 +57,12 @@ set (REQUIRED_LLVM_LIBRARIES LLVMDemangle ) +if (ARCH_AMD64) + list(APPEND REQUIRED_LLVM_LIBRARIES LLVMX86Info LLVMX86Desc LLVMX86CodeGen) +elseif (ARCH_AARCH64) + list(APPEND REQUIRED_LLVM_LIBRARIES LLVMAArch64Info LLVMAArch64Desc LLVMAArch64CodeGen) +endif () + #function(llvm_libs_all REQUIRED_LLVM_LIBRARIES) # llvm_map_components_to_libnames (result all) # if (USE_STATIC_LIBRARIES OR NOT "LLVM" IN_LIST result) diff --git a/docker/server/Dockerfile b/docker/server/Dockerfile index 5b10d1fc490..5b7990ab030 100644 --- a/docker/server/Dockerfile +++ b/docker/server/Dockerfile @@ -4,7 +4,7 @@ FROM ubuntu:20.04 ARG apt_archive="http://archive.ubuntu.com" RUN sed -i "s|http://archive.ubuntu.com|$apt_archive|g" /etc/apt/sources.list -ARG repository="deb https://repo.clickhouse.com/deb/stable/ main/" +ARG repository="deb https://packages.clickhouse.com/deb stable main" ARG version=22.1.1.* # set non-empty deb_location_url url to create a docker image @@ -58,7 +58,7 @@ RUN groupadd -r clickhouse --gid=101 \ wget \ tzdata \ && mkdir -p /etc/apt/sources.list.d \ - && apt-key adv --keyserver keyserver.ubuntu.com --recv E0C56BD4 \ + && apt-key adv --keyserver keyserver.ubuntu.com --recv 8919F6BD2B48D754 \ && echo $repository > /etc/apt/sources.list.d/clickhouse.list \ && if [ -n "$deb_location_url" ]; then \ echo "installing from custom url with deb packages: $deb_location_url" \ diff --git a/docker/test/fasttest/run.sh b/docker/test/fasttest/run.sh index 24168cea330..bd1e0292636 100755 --- a/docker/test/fasttest/run.sh +++ b/docker/test/fasttest/run.sh @@ -263,9 +263,20 @@ function run_tests if [[ $NPROC == 0 ]]; then NPROC=1 fi - time clickhouse-test --hung-check -j "${NPROC}" --order=random \ - --fast-tests-only --no-long --testname --shard --zookeeper --check-zookeeper-session \ - -- "$FASTTEST_FOCUS" 2>&1 \ + + local test_opts=( + --hung-check + --fast-tests-only + --no-long + --testname + --shard + --zookeeper + --check-zookeeper-session + --order random + --print-time + --jobs "${NPROC}" + ) + time clickhouse-test "${test_opts[@]}" -- "$FASTTEST_FOCUS" 2>&1 \ | ts '%Y-%m-%d %H:%M:%S' \ | tee "$FASTTEST_OUTPUT/test_result.txt" set -e diff --git a/docker/test/integration/hive_server/Dockerfile b/docker/test/integration/hive_server/Dockerfile index fa6e4bf6313..391f9a5e22f 100644 --- a/docker/test/integration/hive_server/Dockerfile +++ b/docker/test/integration/hive_server/Dockerfile @@ -42,6 +42,9 @@ COPY prepare_hive_data.sh / COPY demo_data.txt / ENV PATH=/apache-hive-2.3.9-bin/bin:/hadoop-3.1.0/bin:/hadoop-3.1.0/sbin:$PATH - +RUN service ssh start && sed s/HOSTNAME/$HOSTNAME/ /hadoop-3.1.0/etc/hadoop/core-site.xml.template > /hadoop-3.1.0/etc/hadoop/core-site.xml && hdfs namenode -format +RUN apt install -y python3 python3-pip +RUN pip3 install flask requests +COPY http_api_server.py / COPY start.sh / diff --git a/docker/test/integration/hive_server/http_api_server.py b/docker/test/integration/hive_server/http_api_server.py new file mode 100644 index 00000000000..4818b785c89 --- /dev/null +++ b/docker/test/integration/hive_server/http_api_server.py @@ -0,0 +1,70 @@ +import os +import subprocess +import datetime +from flask import Flask, flash, request, redirect, url_for + +def run_command(command, wait=False): + print("{} - execute shell command:{}".format(datetime.datetime.now(), command)) + lines = [] + p = subprocess.Popen(command, + stdout=subprocess.PIPE, + stderr=subprocess.STDOUT, + shell=True) + if wait: + for l in iter(p.stdout.readline, b''): + lines.append(l) + p.poll() + return (lines, p.returncode) + else: + return(iter(p.stdout.readline, b''), 0) + + +UPLOAD_FOLDER = './' +ALLOWED_EXTENSIONS = {'txt', 'sh'} +app = Flask(__name__) +app.config['UPLOAD_FOLDER'] = UPLOAD_FOLDER + +@app.route('/') +def hello_world(): + return 'Hello World' + + +def allowed_file(filename): + return '.' in filename and \ + filename.rsplit('.', 1)[1].lower() in ALLOWED_EXTENSIONS + + +@app.route('/upload', methods=['GET', 'POST']) +def upload_file(): + if request.method == 'POST': + # check if the post request has the file part + if 'file' not in request.files: + flash('No file part') + return redirect(request.url) + file = request.files['file'] + # If the user does not select a file, the browser submits an + # empty file without a filename. + if file.filename == '': + flash('No selected file') + return redirect(request.url) + if file and allowed_file(file.filename): + filename = file.filename + file.save(os.path.join(app.config['UPLOAD_FOLDER'], filename)) + return redirect(url_for('upload_file', name=filename)) + return ''' + + Upload new File +

Upload new File

+
+ + +
+ ''' +@app.route('/run', methods=['GET', 'POST']) +def parse_request(): + data = request.data # data is empty + run_command(data, wait=True) + return 'Ok' + +if __name__ == '__main__': + app.run(port=5011) diff --git a/docker/test/integration/hive_server/prepare_hive_data.sh b/docker/test/integration/hive_server/prepare_hive_data.sh index afecbb91c5d..8126b975612 100755 --- a/docker/test/integration/hive_server/prepare_hive_data.sh +++ b/docker/test/integration/hive_server/prepare_hive_data.sh @@ -2,5 +2,9 @@ hive -e "create database test" hive -e "create table test.demo(id string, score int) PARTITIONED BY(day string) ROW FORMAT SERDE 'org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe' STORED AS INPUTFORMAT 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat' OUTPUTFORMAT 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat'; create table test.demo_orc(id string, score int) PARTITIONED BY(day string) ROW FORMAT SERDE 'org.apache.hadoop.hive.ql.io.orc.OrcSerde' STORED AS INPUTFORMAT 'org.apache.hadoop.hive.ql.io.orc.OrcInputFormat' OUTPUTFORMAT 'org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat'; " +hive -e "create table test.parquet_demo(id string, score int) PARTITIONED BY(day string, hour string) ROW FORMAT SERDE 'org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe' STORED AS INPUTFORMAT 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat' OUTPUTFORMAT 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat'" hive -e "create table test.demo_text(id string, score int, day string)row format delimited fields terminated by ','; load data local inpath '/demo_data.txt' into table test.demo_text " - hive -e "set hive.exec.dynamic.partition.mode=nonstrict;insert into test.demo partition(day) select * from test.demo_text; insert into test.demo_orc partition(day) select * from test.demo_text" +hive -e "set hive.exec.dynamic.partition.mode=nonstrict;insert into test.demo partition(day) select * from test.demo_text; insert into test.demo_orc partition(day) select * from test.demo_text" + +hive -e "set hive.exec.dynamic.partition.mode=nonstrict;insert into test.parquet_demo partition(day, hour) select id, score, day, '00' as hour from test.demo;" +hive -e "set hive.exec.dynamic.partition.mode=nonstrict;insert into test.parquet_demo partition(day, hour) select id, score, day, '01' as hour from test.demo;" diff --git a/docker/test/integration/hive_server/start.sh b/docker/test/integration/hive_server/start.sh index e01f28542af..4224b8126e6 100755 --- a/docker/test/integration/hive_server/start.sh +++ b/docker/test/integration/hive_server/start.sh @@ -1,6 +1,5 @@ service ssh start sed s/HOSTNAME/$HOSTNAME/ /hadoop-3.1.0/etc/hadoop/core-site.xml.template > /hadoop-3.1.0/etc/hadoop/core-site.xml -hadoop namenode -format start-all.sh service mysql start mysql -u root -e "CREATE USER \"test\"@\"localhost\" IDENTIFIED BY \"test\"" @@ -9,4 +8,4 @@ schematool -initSchema -dbType mysql #nohup hiveserver2 & nohup hive --service metastore & bash /prepare_hive_data.sh -while true; do sleep 1000; done +python3 http_api_server.py diff --git a/docker/test/stateful/Dockerfile b/docker/test/stateful/Dockerfile index 7c16e69a99b..93e7cebb857 100644 --- a/docker/test/stateful/Dockerfile +++ b/docker/test/stateful/Dockerfile @@ -13,6 +13,17 @@ COPY s3downloader /s3downloader ENV S3_URL="https://clickhouse-datasets.s3.yandex.net" ENV DATASETS="hits visits" +ENV EXPORT_S3_STORAGE_POLICIES=1 + +# Download Minio-related binaries +RUN arch=${TARGETARCH:-amd64} \ + && wget "https://dl.min.io/server/minio/release/linux-${arch}/minio" \ + && chmod +x ./minio \ + && wget "https://dl.min.io/client/mc/release/linux-${arch}/mc" \ + && chmod +x ./mc +ENV MINIO_ROOT_USER="clickhouse" +ENV MINIO_ROOT_PASSWORD="clickhouse" +COPY setup_minio.sh / COPY run.sh / CMD ["/bin/bash", "/run.sh"] diff --git a/docker/test/stateful/run.sh b/docker/test/stateful/run.sh index 3f3240a0b7f..77dc61e6cd0 100755 --- a/docker/test/stateful/run.sh +++ b/docker/test/stateful/run.sh @@ -17,6 +17,8 @@ ln -s /usr/share/clickhouse-test/clickhouse-test /usr/bin/clickhouse-test # install test configs /usr/share/clickhouse-test/config/install.sh +./setup_minio.sh + function start() { if [[ -n "$USE_DATABASE_REPLICATED" ]] && [[ "$USE_DATABASE_REPLICATED" -eq 1 ]]; then @@ -93,6 +95,8 @@ else clickhouse-client --query "SHOW TABLES FROM test" clickhouse-client --query "RENAME TABLE datasets.hits_v1 TO test.hits" clickhouse-client --query "RENAME TABLE datasets.visits_v1 TO test.visits" + clickhouse-client --query "CREATE TABLE test.hits_s3 (WatchID UInt64, JavaEnable UInt8, Title String, GoodEvent Int16, EventTime DateTime, EventDate Date, CounterID UInt32, ClientIP UInt32, ClientIP6 FixedString(16), RegionID UInt32, UserID UInt64, CounterClass Int8, OS UInt8, UserAgent UInt8, URL String, Referer String, URLDomain String, RefererDomain String, Refresh UInt8, IsRobot UInt8, RefererCategories Array(UInt16), URLCategories Array(UInt16), URLRegions Array(UInt32), RefererRegions Array(UInt32), ResolutionWidth UInt16, ResolutionHeight UInt16, ResolutionDepth UInt8, FlashMajor UInt8, FlashMinor UInt8, FlashMinor2 String, NetMajor UInt8, NetMinor UInt8, UserAgentMajor UInt16, UserAgentMinor FixedString(2), CookieEnable UInt8, JavascriptEnable UInt8, IsMobile UInt8, MobilePhone UInt8, MobilePhoneModel String, Params String, IPNetworkID UInt32, TraficSourceID Int8, SearchEngineID UInt16, SearchPhrase String, AdvEngineID UInt8, IsArtifical UInt8, WindowClientWidth UInt16, WindowClientHeight UInt16, ClientTimeZone Int16, ClientEventTime DateTime, SilverlightVersion1 UInt8, SilverlightVersion2 UInt8, SilverlightVersion3 UInt32, SilverlightVersion4 UInt16, PageCharset String, CodeVersion UInt32, IsLink UInt8, IsDownload UInt8, IsNotBounce UInt8, FUniqID UInt64, HID UInt32, IsOldCounter UInt8, IsEvent UInt8, IsParameter UInt8, DontCountHits UInt8, WithHash UInt8, HitColor FixedString(1), UTCEventTime DateTime, Age UInt8, Sex UInt8, Income UInt8, Interests UInt16, Robotness UInt8, GeneralInterests Array(UInt16), RemoteIP UInt32, RemoteIP6 FixedString(16), WindowName Int32, OpenerName Int32, HistoryLength Int16, BrowserLanguage FixedString(2), BrowserCountry FixedString(2), SocialNetwork String, SocialAction String, HTTPError UInt16, SendTiming Int32, DNSTiming Int32, ConnectTiming Int32, ResponseStartTiming Int32, ResponseEndTiming Int32, FetchTiming Int32, RedirectTiming Int32, DOMInteractiveTiming Int32, DOMContentLoadedTiming Int32, DOMCompleteTiming Int32, LoadEventStartTiming Int32, LoadEventEndTiming Int32, NSToDOMContentLoadedTiming Int32, FirstPaintTiming Int32, RedirectCount Int8, SocialSourceNetworkID UInt8, SocialSourcePage String, ParamPrice Int64, ParamOrderID String, ParamCurrency FixedString(3), ParamCurrencyID UInt16, GoalsReached Array(UInt32), OpenstatServiceName String, OpenstatCampaignID String, OpenstatAdID String, OpenstatSourceID String, UTMSource String, UTMMedium String, UTMCampaign String, UTMContent String, UTMTerm String, FromTag String, HasGCLID UInt8, RefererHash UInt64, URLHash UInt64, CLID UInt32, YCLID UInt64, ShareService String, ShareURL String, ShareTitle String, ParsedParams Nested(Key1 String, Key2 String, Key3 String, Key4 String, Key5 String, ValueDouble Float64), IslandID FixedString(16), RequestNum UInt32, RequestTry UInt8) ENGINE = MergeTree() PARTITION BY toYYYYMM(EventDate) ORDER BY (CounterID, EventDate, intHash32(UserID)) SAMPLE BY intHash32(UserID) SETTINGS index_granularity = 8192, storage_policy='s3_cache'" + clickhouse-client --query "INSERT INTO test.hits_s3 SELECT * FROM test.hits" fi clickhouse-client --query "SHOW TABLES FROM test" diff --git a/docker/test/stateful/setup_minio.sh b/docker/test/stateful/setup_minio.sh new file mode 100755 index 00000000000..5758d905197 --- /dev/null +++ b/docker/test/stateful/setup_minio.sh @@ -0,0 +1,66 @@ +#!/bin/bash + +# TODO: Make this file shared with stateless tests +# +# Usage for local run: +# +# ./docker/test/stateful/setup_minio.sh ./tests/ +# + +set -e -x -a -u + +ls -lha + +mkdir -p ./minio_data + +if [ ! -f ./minio ]; then + echo 'MinIO binary not found, downloading...' + + BINARY_TYPE=$(uname -s | tr '[:upper:]' '[:lower:]') + + wget "https://dl.min.io/server/minio/release/${BINARY_TYPE}-amd64/minio" \ + && chmod +x ./minio \ + && wget "https://dl.min.io/client/mc/release/${BINARY_TYPE}-amd64/mc" \ + && chmod +x ./mc +fi + +MINIO_ROOT_USER=${MINIO_ROOT_USER:-clickhouse} +MINIO_ROOT_PASSWORD=${MINIO_ROOT_PASSWORD:-clickhouse} + +./minio server --address ":11111" ./minio_data & + +while ! curl -v --silent http://localhost:11111 2>&1 | grep AccessDenied +do + echo "Trying to connect to minio" + sleep 1 +done + +lsof -i :11111 + +sleep 5 + +./mc alias set clickminio http://localhost:11111 clickhouse clickhouse +./mc admin user add clickminio test testtest +./mc admin policy set clickminio readwrite user=test +./mc mb clickminio/test + + +# Upload data to Minio. By default after unpacking all tests will in +# /usr/share/clickhouse-test/queries + +TEST_PATH=${1:-/usr/share/clickhouse-test} +MINIO_DATA_PATH=${TEST_PATH}/queries/1_stateful/data_minio + +# Iterating over globs will cause redudant FILE variale to be a path to a file, not a filename +# shellcheck disable=SC2045 +for FILE in $(ls "${MINIO_DATA_PATH}"); do + echo "$FILE"; + ./mc cp "${MINIO_DATA_PATH}"/"$FILE" clickminio/test/"$FILE"; +done + +mkdir -p ~/.aws +cat <> ~/.aws/credentials +[default] +aws_access_key_id=clickhouse +aws_secret_access_key=clickhouse +EOT diff --git a/docker/test/stateless/Dockerfile b/docker/test/stateless/Dockerfile index bfc6763e8c5..68c08c23b3f 100644 --- a/docker/test/stateless/Dockerfile +++ b/docker/test/stateless/Dockerfile @@ -60,6 +60,7 @@ RUN arch=${TARGETARCH:-amd64} \ ENV MINIO_ROOT_USER="clickhouse" ENV MINIO_ROOT_PASSWORD="clickhouse" +ENV EXPORT_S3_STORAGE_POLICIES=1 COPY run.sh / COPY setup_minio.sh / diff --git a/docker/test/stress/Dockerfile b/docker/test/stress/Dockerfile index 4e0b6741061..1f39202e743 100644 --- a/docker/test/stress/Dockerfile +++ b/docker/test/stress/Dockerfile @@ -29,5 +29,6 @@ COPY run.sh / ENV DATASETS="hits visits" ENV S3_URL="https://clickhouse-datasets.s3.yandex.net" +ENV EXPORT_S3_STORAGE_POLICIES=1 CMD ["/bin/bash", "/run.sh"] diff --git a/docker/test/stress/run.sh b/docker/test/stress/run.sh index de77dec03b9..65c5fb9e40f 100755 --- a/docker/test/stress/run.sh +++ b/docker/test/stress/run.sh @@ -173,6 +173,8 @@ quit configure +./setup_minio.sh + start # shellcheck disable=SC2086 # No quotes because I want to split it into words. @@ -188,6 +190,8 @@ clickhouse-client --query "SHOW TABLES FROM datasets" clickhouse-client --query "SHOW TABLES FROM test" clickhouse-client --query "RENAME TABLE datasets.hits_v1 TO test.hits" clickhouse-client --query "RENAME TABLE datasets.visits_v1 TO test.visits" +clickhouse-client --query "CREATE TABLE test.hits_s3 (WatchID UInt64, JavaEnable UInt8, Title String, GoodEvent Int16, EventTime DateTime, EventDate Date, CounterID UInt32, ClientIP UInt32, ClientIP6 FixedString(16), RegionID UInt32, UserID UInt64, CounterClass Int8, OS UInt8, UserAgent UInt8, URL String, Referer String, URLDomain String, RefererDomain String, Refresh UInt8, IsRobot UInt8, RefererCategories Array(UInt16), URLCategories Array(UInt16), URLRegions Array(UInt32), RefererRegions Array(UInt32), ResolutionWidth UInt16, ResolutionHeight UInt16, ResolutionDepth UInt8, FlashMajor UInt8, FlashMinor UInt8, FlashMinor2 String, NetMajor UInt8, NetMinor UInt8, UserAgentMajor UInt16, UserAgentMinor FixedString(2), CookieEnable UInt8, JavascriptEnable UInt8, IsMobile UInt8, MobilePhone UInt8, MobilePhoneModel String, Params String, IPNetworkID UInt32, TraficSourceID Int8, SearchEngineID UInt16, SearchPhrase String, AdvEngineID UInt8, IsArtifical UInt8, WindowClientWidth UInt16, WindowClientHeight UInt16, ClientTimeZone Int16, ClientEventTime DateTime, SilverlightVersion1 UInt8, SilverlightVersion2 UInt8, SilverlightVersion3 UInt32, SilverlightVersion4 UInt16, PageCharset String, CodeVersion UInt32, IsLink UInt8, IsDownload UInt8, IsNotBounce UInt8, FUniqID UInt64, HID UInt32, IsOldCounter UInt8, IsEvent UInt8, IsParameter UInt8, DontCountHits UInt8, WithHash UInt8, HitColor FixedString(1), UTCEventTime DateTime, Age UInt8, Sex UInt8, Income UInt8, Interests UInt16, Robotness UInt8, GeneralInterests Array(UInt16), RemoteIP UInt32, RemoteIP6 FixedString(16), WindowName Int32, OpenerName Int32, HistoryLength Int16, BrowserLanguage FixedString(2), BrowserCountry FixedString(2), SocialNetwork String, SocialAction String, HTTPError UInt16, SendTiming Int32, DNSTiming Int32, ConnectTiming Int32, ResponseStartTiming Int32, ResponseEndTiming Int32, FetchTiming Int32, RedirectTiming Int32, DOMInteractiveTiming Int32, DOMContentLoadedTiming Int32, DOMCompleteTiming Int32, LoadEventStartTiming Int32, LoadEventEndTiming Int32, NSToDOMContentLoadedTiming Int32, FirstPaintTiming Int32, RedirectCount Int8, SocialSourceNetworkID UInt8, SocialSourcePage String, ParamPrice Int64, ParamOrderID String, ParamCurrency FixedString(3), ParamCurrencyID UInt16, GoalsReached Array(UInt32), OpenstatServiceName String, OpenstatCampaignID String, OpenstatAdID String, OpenstatSourceID String, UTMSource String, UTMMedium String, UTMCampaign String, UTMContent String, UTMTerm String, FromTag String, HasGCLID UInt8, RefererHash UInt64, URLHash UInt64, CLID UInt32, YCLID UInt64, ShareService String, ShareURL String, ShareTitle String, ParsedParams Nested(Key1 String, Key2 String, Key3 String, Key4 String, Key5 String, ValueDouble Float64), IslandID FixedString(16), RequestNum UInt32, RequestTry UInt8) ENGINE = MergeTree() PARTITION BY toYYYYMM(EventDate) ORDER BY (CounterID, EventDate, intHash32(UserID)) SAMPLE BY intHash32(UserID) SETTINGS index_granularity = 8192, storage_policy='s3_cache'" +clickhouse-client --query "INSERT INTO test.hits_s3 SELECT * FROM test.hits" clickhouse-client --query "SHOW TABLES FROM test" ./stress --hung-check --drop-databases --output-folder test_output --skip-func-tests "$SKIP_TESTS_OPTION" \ diff --git a/docs/README.md b/docs/README.md index cd5c1af0cbd..b328a3ee125 100644 --- a/docs/README.md +++ b/docs/README.md @@ -38,7 +38,7 @@ Writing the docs is extremely useful for project's users and developers, and gro The documentation contains information about all the aspects of the ClickHouse lifecycle: developing, testing, installing, operating, and using. The base language of the documentation is English. The English version is the most actual. All other languages are supported as much as they can by contributors from different countries. -At the moment, [documentation](https://clickhouse.com/docs) exists in English, Russian, Chinese, Japanese, and Farsi. We store the documentation besides the ClickHouse source code in the [GitHub repository](https://github.com/ClickHouse/ClickHouse/tree/master/docs). +At the moment, [documentation](https://clickhouse.com/docs) exists in English, Russian, Chinese, Japanese. We store the documentation besides the ClickHouse source code in the [GitHub repository](https://github.com/ClickHouse/ClickHouse/tree/master/docs). Each language lays in the corresponding folder. Files that are not translated from English are the symbolic links to the English ones. diff --git a/docs/_includes/install/deb.sh b/docs/_includes/install/deb.sh index 21106e9fc47..0daf12a132f 100644 --- a/docs/_includes/install/deb.sh +++ b/docs/_includes/install/deb.sh @@ -1,11 +1,11 @@ -sudo apt-get install apt-transport-https ca-certificates dirmngr -sudo apt-key adv --keyserver hkp://keyserver.ubuntu.com:80 --recv E0C56BD4 +sudo apt-get install -y apt-transport-https ca-certificates dirmngr +sudo apt-key adv --keyserver hkp://keyserver.ubuntu.com:80 --recv 8919F6BD2B48D754 -echo "deb https://repo.clickhouse.com/deb/stable/ main/" | sudo tee \ +echo "deb https://packages.clickhouse.com/deb stable main" | sudo tee \ /etc/apt/sources.list.d/clickhouse.list sudo apt-get update sudo apt-get install -y clickhouse-server clickhouse-client sudo service clickhouse-server start -clickhouse-client # or "clickhouse-client --password" if you set up a password. +clickhouse-client # or "clickhouse-client --password" if you've set up a password. diff --git a/docs/_includes/install/deb_repo.sh b/docs/_includes/install/deb_repo.sh new file mode 100644 index 00000000000..21106e9fc47 --- /dev/null +++ b/docs/_includes/install/deb_repo.sh @@ -0,0 +1,11 @@ +sudo apt-get install apt-transport-https ca-certificates dirmngr +sudo apt-key adv --keyserver hkp://keyserver.ubuntu.com:80 --recv E0C56BD4 + +echo "deb https://repo.clickhouse.com/deb/stable/ main/" | sudo tee \ + /etc/apt/sources.list.d/clickhouse.list +sudo apt-get update + +sudo apt-get install -y clickhouse-server clickhouse-client + +sudo service clickhouse-server start +clickhouse-client # or "clickhouse-client --password" if you set up a password. diff --git a/docs/_includes/install/rpm.sh b/docs/_includes/install/rpm.sh index e3fd1232047..ff99018f872 100644 --- a/docs/_includes/install/rpm.sh +++ b/docs/_includes/install/rpm.sh @@ -1,7 +1,6 @@ -sudo yum install yum-utils -sudo rpm --import https://repo.clickhouse.com/CLICKHOUSE-KEY.GPG -sudo yum-config-manager --add-repo https://repo.clickhouse.com/rpm/clickhouse.repo -sudo yum install clickhouse-server clickhouse-client +sudo yum install -y yum-utils +sudo yum-config-manager --add-repo https://packages.clickhouse.com/rpm/clickhouse.repo +sudo yum install -y clickhouse-server clickhouse-client sudo /etc/init.d/clickhouse-server start clickhouse-client # or "clickhouse-client --password" if you set up a password. diff --git a/docs/_includes/install/rpm_repo.sh b/docs/_includes/install/rpm_repo.sh new file mode 100644 index 00000000000..e3fd1232047 --- /dev/null +++ b/docs/_includes/install/rpm_repo.sh @@ -0,0 +1,7 @@ +sudo yum install yum-utils +sudo rpm --import https://repo.clickhouse.com/CLICKHOUSE-KEY.GPG +sudo yum-config-manager --add-repo https://repo.clickhouse.com/rpm/clickhouse.repo +sudo yum install clickhouse-server clickhouse-client + +sudo /etc/init.d/clickhouse-server start +clickhouse-client # or "clickhouse-client --password" if you set up a password. diff --git a/docs/_includes/install/tgz.sh b/docs/_includes/install/tgz.sh index 0994510755b..4ba5890b32b 100644 --- a/docs/_includes/install/tgz.sh +++ b/docs/_includes/install/tgz.sh @@ -1,19 +1,20 @@ -export LATEST_VERSION=$(curl -s https://repo.clickhouse.com/tgz/stable/ | \ +LATEST_VERSION=$(curl -s https://packages.clickhouse.com/tgz/stable/ | \ grep -Eo '[0-9]+\.[0-9]+\.[0-9]+\.[0-9]+' | sort -V -r | head -n 1) -curl -O https://repo.clickhouse.com/tgz/stable/clickhouse-common-static-$LATEST_VERSION.tgz -curl -O https://repo.clickhouse.com/tgz/stable/clickhouse-common-static-dbg-$LATEST_VERSION.tgz -curl -O https://repo.clickhouse.com/tgz/stable/clickhouse-server-$LATEST_VERSION.tgz -curl -O https://repo.clickhouse.com/tgz/stable/clickhouse-client-$LATEST_VERSION.tgz +export LATEST_VERSION +curl -O "https://packages.clickhouse.com/tgz/stable/clickhouse-common-static-$LATEST_VERSION.tgz" +curl -O "https://packages.clickhouse.com/tgz/stable/clickhouse-common-static-dbg-$LATEST_VERSION.tgz" +curl -O "https://packages.clickhouse.com/tgz/stable/clickhouse-server-$LATEST_VERSION.tgz" +curl -O "https://packages.clickhouse.com/tgz/stable/clickhouse-client-$LATEST_VERSION.tgz" -tar -xzvf clickhouse-common-static-$LATEST_VERSION.tgz -sudo clickhouse-common-static-$LATEST_VERSION/install/doinst.sh +tar -xzvf "clickhouse-common-static-$LATEST_VERSION.tgz" +sudo "clickhouse-common-static-$LATEST_VERSION/install/doinst.sh" -tar -xzvf clickhouse-common-static-dbg-$LATEST_VERSION.tgz -sudo clickhouse-common-static-dbg-$LATEST_VERSION/install/doinst.sh +tar -xzvf "clickhouse-common-static-dbg-$LATEST_VERSION.tgz" +sudo "clickhouse-common-static-dbg-$LATEST_VERSION/install/doinst.sh" -tar -xzvf clickhouse-server-$LATEST_VERSION.tgz -sudo clickhouse-server-$LATEST_VERSION/install/doinst.sh +tar -xzvf "clickhouse-server-$LATEST_VERSION.tgz" +sudo "clickhouse-server-$LATEST_VERSION/install/doinst.sh" sudo /etc/init.d/clickhouse-server start -tar -xzvf clickhouse-client-$LATEST_VERSION.tgz -sudo clickhouse-client-$LATEST_VERSION/install/doinst.sh +tar -xzvf "clickhouse-client-$LATEST_VERSION.tgz" +sudo "clickhouse-client-$LATEST_VERSION/install/doinst.sh" diff --git a/docs/_includes/install/tgz_repo.sh b/docs/_includes/install/tgz_repo.sh new file mode 100644 index 00000000000..0994510755b --- /dev/null +++ b/docs/_includes/install/tgz_repo.sh @@ -0,0 +1,19 @@ +export LATEST_VERSION=$(curl -s https://repo.clickhouse.com/tgz/stable/ | \ + grep -Eo '[0-9]+\.[0-9]+\.[0-9]+\.[0-9]+' | sort -V -r | head -n 1) +curl -O https://repo.clickhouse.com/tgz/stable/clickhouse-common-static-$LATEST_VERSION.tgz +curl -O https://repo.clickhouse.com/tgz/stable/clickhouse-common-static-dbg-$LATEST_VERSION.tgz +curl -O https://repo.clickhouse.com/tgz/stable/clickhouse-server-$LATEST_VERSION.tgz +curl -O https://repo.clickhouse.com/tgz/stable/clickhouse-client-$LATEST_VERSION.tgz + +tar -xzvf clickhouse-common-static-$LATEST_VERSION.tgz +sudo clickhouse-common-static-$LATEST_VERSION/install/doinst.sh + +tar -xzvf clickhouse-common-static-dbg-$LATEST_VERSION.tgz +sudo clickhouse-common-static-dbg-$LATEST_VERSION/install/doinst.sh + +tar -xzvf clickhouse-server-$LATEST_VERSION.tgz +sudo clickhouse-server-$LATEST_VERSION/install/doinst.sh +sudo /etc/init.d/clickhouse-server start + +tar -xzvf clickhouse-client-$LATEST_VERSION.tgz +sudo clickhouse-client-$LATEST_VERSION/install/doinst.sh diff --git a/docs/en/development/browse-code.md b/docs/en/development/browse-code.md index fa57d2289b3..0fe8a46873c 100644 --- a/docs/en/development/browse-code.md +++ b/docs/en/development/browse-code.md @@ -5,7 +5,7 @@ toc_title: Source Code Browser # Browse ClickHouse Source Code {#browse-clickhouse-source-code} -You can use **Woboq** online code browser available [here](https://clickhouse.com/codebrowser/html_report/ClickHouse/src/index.html). It provides code navigation and semantic highlighting, search and indexing. The code snapshot is updated daily. +You can use **Woboq** online code browser available [here](https://clickhouse.com/codebrowser/ClickHouse/src/index.html). It provides code navigation and semantic highlighting, search and indexing. The code snapshot is updated daily. Also, you can browse sources on [GitHub](https://github.com/ClickHouse/ClickHouse) as usual. diff --git a/docs/en/development/build.md b/docs/en/development/build.md index aaa3bdfd043..5379fc37937 100644 --- a/docs/en/development/build.md +++ b/docs/en/development/build.md @@ -156,14 +156,6 @@ $ cd ClickHouse $ ./release ``` -## Faster builds for development - -Normally all tools of the ClickHouse bundle, such as `clickhouse-server`, `clickhouse-client` etc., are linked into a single static executable, `clickhouse`. This executable must be re-linked on every change, which might be slow. One common way to improve build time is to use the 'split' build configuration, which builds a separate binary for every tool, and further splits the code into several shared libraries. To enable this tweak, pass the following flags to `cmake`: - -``` --DUSE_STATIC_LIBRARIES=0 -DSPLIT_SHARED_LIBRARIES=1 -DCLICKHOUSE_SPLIT_BINARY=1 -``` - ## You Don’t Have to Build ClickHouse {#you-dont-have-to-build-clickhouse} ClickHouse is available in pre-built binaries and packages. Binaries are portable and can be run on any Linux flavour. @@ -172,9 +164,9 @@ They are built for stable, prestable and testing releases as long as for every c To find the freshest build from `master`, go to [commits page](https://github.com/ClickHouse/ClickHouse/commits/master), click on the first green checkmark or red cross near commit, and click to the “Details” link right after “ClickHouse Build Check”. -## Split build configuration {#split-build} +## Faster builds for development: Split build configuration {#split-build} -Normally ClickHouse is statically linked into a single static `clickhouse` binary with minimal dependencies. This is convenient for distribution, but it means that on every change the entire binary is linked again, which is slow and may be inconvenient for development. There is an alternative configuration which creates dynamically loaded shared libraries instead, allowing faster incremental builds. To use it, add the following flags to your `cmake` invocation: +Normally, ClickHouse is statically linked into a single static `clickhouse` binary with minimal dependencies. This is convenient for distribution, but it means that on every change the entire binary needs to be linked, which is slow and may be inconvenient for development. There is an alternative configuration which instead creates dynamically loaded shared libraries and separate binaries `clickhouse-server`, `clickhouse-client` etc., allowing for faster incremental builds. To use it, add the following flags to your `cmake` invocation: ``` -DUSE_STATIC_LIBRARIES=0 -DSPLIT_SHARED_LIBRARIES=1 -DCLICKHOUSE_SPLIT_BINARY=1 ``` diff --git a/docs/en/development/continuous-integration.md b/docs/en/development/continuous-integration.md index b5b558464ba..81887eb8b8e 100644 --- a/docs/en/development/continuous-integration.md +++ b/docs/en/development/continuous-integration.md @@ -190,15 +190,3 @@ Runs randomly generated queries to catch program errors. If it fails, ask a main ## Performance Tests Measure changes in query performance. This is the longest check that takes just below 6 hours to run. The performance test report is described in detail [here](https://github.com/ClickHouse/ClickHouse/tree/master/docker/test/performance-comparison#how-to-read-the-report). - - - -# QA - -> What is a `Task (private network)` item on status pages? - -It's a link to the Yandex's internal job system. Yandex employees can see the check's start time and its more verbose status. - -> Where the tests are run - -Somewhere on Yandex internal infrastructure. diff --git a/docs/en/development/contrib.md b/docs/en/development/contrib.md index 07969f8ef6a..6c12a3d9055 100644 --- a/docs/en/development/contrib.md +++ b/docs/en/development/contrib.md @@ -40,8 +40,8 @@ The list of third-party libraries: | grpc | [Apache](https://github.com/ClickHouse-Extras/grpc/blob/60c986e15cae70aade721d26badabab1f822fdd6/LICENSE) | | h3 | [Apache](https://github.com/ClickHouse-Extras/h3/blob/c7f46cfd71fb60e2fefc90e28abe81657deff735/LICENSE) | | hyperscan | [Boost](https://github.com/ClickHouse-Extras/hyperscan/blob/e9f08df0213fc637aac0a5bbde9beeaeba2fe9fa/LICENSE) | -| icu | [Public Domain](https://github.com/unicode-org/icu/blob/faa2f9f9e1fe74c5ed00eba371d2830134cdbea1/icu4c/LICENSE) | -| icudata | [Public Domain](https://github.com/ClickHouse-Extras/icudata/blob/f020820388e3faafb44cc643574a2d563dfde572/LICENSE) | +| icu | [Public Domain](https://github.com/unicode-org/icu/blob/a56dde820dc35665a66f2e9ee8ba58e75049b668/icu4c/LICENSE) | +| icudata | [Public Domain](https://github.com/ClickHouse-Extras/icudata/blob/72d9a4a7febc904e2b0a534ccb25ae40fac5f1e5/LICENSE) | | jemalloc | [BSD 2-clause](https://github.com/ClickHouse-Extras/jemalloc/blob/e6891d9746143bf2cf617493d880ba5a0b9a3efd/COPYING) | | krb5 | [MIT](https://github.com/ClickHouse-Extras/krb5/blob/5149dea4e2be0f67707383d2682b897c14631374/src/lib/gssapi/LICENSE) | | libc-headers | [LGPL](https://github.com/ClickHouse-Extras/libc-headers/blob/a720b7105a610acbd7427eea475a5b6810c151eb/LICENSE) | diff --git a/docs/en/development/developer-instruction.md b/docs/en/development/developer-instruction.md index f7d7100d181..9d1836b0ff2 100644 --- a/docs/en/development/developer-instruction.md +++ b/docs/en/development/developer-instruction.md @@ -243,7 +243,7 @@ List of tasks: https://github.com/ClickHouse/ClickHouse/issues?q=is%3Aopen+is%3A ## Test Data {#test-data} -Developing ClickHouse often requires loading realistic datasets. It is particularly important for performance testing. We have a specially prepared set of anonymized data from Yandex.Metrica. It requires additionally some 3GB of free disk space. Note that this data is not required to accomplish most of the development tasks. +Developing ClickHouse often requires loading realistic datasets. It is particularly important for performance testing. We have a specially prepared set of anonymized data of web analytics. It requires additionally some 3GB of free disk space. Note that this data is not required to accomplish most of the development tasks. sudo apt install wget xz-utils @@ -270,7 +270,7 @@ Navigate to your fork repository in GitHub’s UI. If you have been developing i A pull request can be created even if the work is not completed yet. In this case please put the word “WIP” (work in progress) at the beginning of the title, it can be changed later. This is useful for cooperative reviewing and discussion of changes as well as for running all of the available tests. It is important that you provide a brief description of your changes, it will later be used for generating release changelogs. -Testing will commence as soon as Yandex employees label your PR with a tag “can be tested”. The results of some first checks (e.g. code style) will come in within several minutes. Build check results will arrive within half an hour. And the main set of tests will report itself within an hour. +Testing will commence as soon as ClickHouse employees label your PR with a tag “can be tested”. The results of some first checks (e.g. code style) will come in within several minutes. Build check results will arrive within half an hour. And the main set of tests will report itself within an hour. The system will prepare ClickHouse binary builds for your pull request individually. To retrieve these builds click the “Details” link next to “ClickHouse build check” entry in the list of checks. There you will find direct links to the built .deb packages of ClickHouse which you can deploy even on your production servers (if you have no fear). diff --git a/docs/en/development/style.md b/docs/en/development/style.md index 1c863d6b914..03121880555 100644 --- a/docs/en/development/style.md +++ b/docs/en/development/style.md @@ -404,9 +404,9 @@ enum class CompressionMethod }; ``` -**15.** All names must be in English. Transliteration of Russian words is not allowed. +**15.** All names must be in English. Transliteration of Hebrew words is not allowed. - not Stroka + not T_PAAMAYIM_NEKUDOTAYIM **16.** Abbreviations are acceptable if they are well known (when you can easily find the meaning of the abbreviation in Wikipedia or in a search engine). diff --git a/docs/en/development/tests.md b/docs/en/development/tests.md index 44a552d2a61..be9fc7907af 100644 --- a/docs/en/development/tests.md +++ b/docs/en/development/tests.md @@ -11,7 +11,7 @@ Functional tests are the most simple and convenient to use. Most of ClickHouse f Each functional test sends one or multiple queries to the running ClickHouse server and compares the result with reference. -Tests are located in `queries` directory. There are two subdirectories: `stateless` and `stateful`. Stateless tests run queries without any preloaded test data - they often create small synthetic datasets on the fly, within the test itself. Stateful tests require preloaded test data from Yandex.Metrica and it is available to general public. +Tests are located in `queries` directory. There are two subdirectories: `stateless` and `stateful`. Stateless tests run queries without any preloaded test data - they often create small synthetic datasets on the fly, within the test itself. Stateful tests require preloaded test data from CLickHouse and it is available to general public. Each test can be one of two types: `.sql` and `.sh`. `.sql` test is the simple SQL script that is piped to `clickhouse-client --multiquery --testmode`. `.sh` test is a script that is run by itself. SQL tests are generally preferable to `.sh` tests. You should use `.sh` tests only when you have to test some feature that cannot be exercised from pure SQL, such as piping some input data into `clickhouse-client` or testing `clickhouse-local`. @@ -133,44 +133,6 @@ If the system clickhouse-server is already running and you do not want to stop i `clickhouse` binary has almost no dependencies and works across wide range of Linux distributions. To quick and dirty test your changes on a server, you can simply `scp` your fresh built `clickhouse` binary to your server and then run it as in examples above. -## Testing Environment {#testing-environment} - -Before publishing release as stable we deploy it on testing environment. Testing environment is a cluster that process 1/39 part of [Yandex.Metrica](https://metrica.yandex.com/) data. We share our testing environment with Yandex.Metrica team. ClickHouse is upgraded without downtime on top of existing data. We look at first that data is processed successfully without lagging from realtime, the replication continue to work and there is no issues visible to Yandex.Metrica team. First check can be done in the following way: - -``` sql -SELECT hostName() AS h, any(version()), any(uptime()), max(UTCEventTime), count() FROM remote('example01-01-{1..3}t', merge, hits) WHERE EventDate >= today() - 2 GROUP BY h ORDER BY h; -``` - -In some cases we also deploy to testing environment of our friend teams in Yandex: Market, Cloud, etc. Also we have some hardware servers that are used for development purposes. - -## Load Testing {#load-testing} - -After deploying to testing environment we run load testing with queries from production cluster. This is done manually. - -Make sure you have enabled `query_log` on your production cluster. - -Collect query log for a day or more: - -``` bash -$ clickhouse-client --query="SELECT DISTINCT query FROM system.query_log WHERE event_date = today() AND query LIKE '%ym:%' AND query NOT LIKE '%system.query_log%' AND type = 2 AND is_initial_query" > queries.tsv -``` - -This is a way complicated example. `type = 2` will filter queries that are executed successfully. `query LIKE '%ym:%'` is to select relevant queries from Yandex.Metrica. `is_initial_query` is to select only queries that are initiated by client, not by ClickHouse itself (as parts of distributed query processing). - -`scp` this log to your testing cluster and run it as following: - -``` bash -$ clickhouse benchmark --concurrency 16 < queries.tsv -``` - -(probably you also want to specify a `--user`) - -Then leave it for a night or weekend and go take a rest. - -You should check that `clickhouse-server` does not crash, memory footprint is bounded and performance not degrading over time. - -Precise query execution timings are not recorded and not compared due to high variability of queries and environment. - ## Build Tests {#build-tests} Build tests allow to check that build is not broken on various alternative configurations and on some foreign systems. These tests are automated as well. @@ -259,13 +221,13 @@ Thread Fuzzer (please don't mix up with Thread Sanitizer) is another kind of fuz ## Security Audit -People from Yandex Security Team did some basic overview of ClickHouse capabilities from the security standpoint. +Our Security Team did some basic overview of ClickHouse capabilities from the security standpoint. ## Static Analyzers {#static-analyzers} We run `clang-tidy` on per-commit basis. `clang-static-analyzer` checks are also enabled. `clang-tidy` is also used for some style checks. -We have evaluated `clang-tidy`, `Coverity`, `cppcheck`, `PVS-Studio`, `tscancode`, `CodeQL`. You will find instructions for usage in `tests/instructions/` directory. Also you can read [the article in russian](https://habr.com/company/yandex/blog/342018/). +We have evaluated `clang-tidy`, `Coverity`, `cppcheck`, `PVS-Studio`, `tscancode`, `CodeQL`. You will find instructions for usage in `tests/instructions/` directory. If you use `CLion` as an IDE, you can leverage some `clang-tidy` checks out of the box. @@ -310,12 +272,6 @@ Alternatively you can try `uncrustify` tool to reformat your code. Configuration We also use `codespell` to find typos in code. It is automated as well. -## Metrica B2B Tests {#metrica-b2b-tests} - -Each ClickHouse release is tested with Yandex Metrica and AppMetrica engines. Testing and stable versions of ClickHouse are deployed on VMs and run with a small copy of Metrica engine that is processing fixed sample of input data. Then results of two instances of Metrica engine are compared together. - -These tests are automated by separate team. Due to high number of moving parts, tests are fail most of the time by completely unrelated reasons, that are very difficult to figure out. Most likely these tests have negative value for us. Nevertheless these tests was proved to be useful in about one or two times out of hundreds. - ## Test Coverage {#test-coverage} We also track test coverage but only for functional tests and only for clickhouse-server. It is performed on daily basis. diff --git a/docs/en/engines/database-engines/materialized-mysql.md b/docs/en/engines/database-engines/materialized-mysql.md index bcb026aa0dc..d7dcf21cb02 100644 --- a/docs/en/engines/database-engines/materialized-mysql.md +++ b/docs/en/engines/database-engines/materialized-mysql.md @@ -36,6 +36,7 @@ ENGINE = MaterializedMySQL('host:port', ['database' | database], 'user', 'passwo - `max_flush_data_time` — Maximum number of milliseconds that data is allowed to cache in memory (for database and the cache data unable to query). When this time is exceeded, the data will be materialized. Default: `1000`. - `max_wait_time_when_mysql_unavailable` — Retry interval when MySQL is not available (milliseconds). Negative value disables retry. Default: `1000`. - `allows_query_when_mysql_lost` — Allows to query a materialized table when MySQL is lost. Default: `0` (`false`). +- `materialized_mysql_tables_list` — a comma-separated list of mysql database tables, which will be replicated by MaterializedMySQL database engine. Default value: empty list — means whole tables will be replicated. ```sql CREATE DATABASE mysql ENGINE = MaterializedMySQL('localhost:3306', 'db', 'user', '***') @@ -75,7 +76,7 @@ When working with the `MaterializedMySQL` database engine, [ReplacingMergeTree]( | FLOAT | [Float32](../../sql-reference/data-types/float.md) | | DOUBLE | [Float64](../../sql-reference/data-types/float.md) | | DECIMAL, NEWDECIMAL | [Decimal](../../sql-reference/data-types/decimal.md) | -| DATE, NEWDATE | [Date](../../sql-reference/data-types/date.md) | +| DATE, NEWDATE | [Date32](../../sql-reference/data-types/date32.md) | | DATETIME, TIMESTAMP | [DateTime](../../sql-reference/data-types/datetime.md) | | DATETIME2, TIMESTAMP2 | [DateTime64](../../sql-reference/data-types/datetime64.md) | | YEAR | [UInt16](../../sql-reference/data-types/int-uint.md) | diff --git a/docs/en/engines/database-engines/mysql.md b/docs/en/engines/database-engines/mysql.md index c5a1bba44b2..df4965b1f8c 100644 --- a/docs/en/engines/database-engines/mysql.md +++ b/docs/en/engines/database-engines/mysql.md @@ -49,6 +49,8 @@ ENGINE = MySQL('host:port', ['database' | database], 'user', 'password') All other MySQL data types are converted into [String](../../sql-reference/data-types/string.md). +Because of the ClickHouse date type has a different range from the MySQL date range,If the MySQL date type is out of the range of ClickHouse date, you can use the setting mysql_datatypes_support_level to modify the mapping from the MySQL date type to the Clickhouse date type: date2Date32 (convert MySQL's date type to ClickHouse Date32) or date2String(convert MySQL's date type to ClickHouse String,this is usually used when your mysql data is less than 1925) or default(convert MySQL's date type to ClickHouse Date). + [Nullable](../../sql-reference/data-types/nullable.md) is supported. ## Global Variables Support {#global-variables-support} diff --git a/docs/en/engines/table-engines/integrations/s3.md b/docs/en/engines/table-engines/integrations/s3.md index 691666cffef..c7301a55bf0 100644 --- a/docs/en/engines/table-engines/integrations/s3.md +++ b/docs/en/engines/table-engines/integrations/s3.md @@ -26,7 +26,7 @@ CREATE TABLE s3_engine_table (name String, value UInt32) ``` sql CREATE TABLE s3_engine_table (name String, value UInt32) - ENGINE=S3('https://storage.yandexcloud.net/my-test-bucket-768/test-data.csv.gz', 'CSV', 'gzip') + ENGINE=S3('https://clickhouse-public-datasets.s3.amazonaws.com/my-test-bucket-768/test-data.csv.gz', 'CSV', 'gzip') SETTINGS input_format_with_names_use_header = 0; INSERT INTO s3_engine_table VALUES ('one', 1), ('two', 2), ('three', 3); @@ -75,19 +75,19 @@ Create table with files named `file-000.csv`, `file-001.csv`, … , `file-999.cs ``` sql CREATE TABLE big_table (name String, value UInt32) - ENGINE = S3('https://storage.yandexcloud.net/my-bucket/my_folder/file-{000..999}.csv', 'CSV'); + ENGINE = S3('https://clickhouse-public-datasets.s3.amazonaws.com/my-bucket/my_folder/file-{000..999}.csv', 'CSV'); ``` **Example with wildcards 2** Suppose we have several files in CSV format with the following URIs on S3: -- 'https://storage.yandexcloud.net/my-bucket/some_folder/some_file_1.csv' -- 'https://storage.yandexcloud.net/my-bucket/some_folder/some_file_2.csv' -- 'https://storage.yandexcloud.net/my-bucket/some_folder/some_file_3.csv' -- 'https://storage.yandexcloud.net/my-bucket/another_folder/some_file_1.csv' -- 'https://storage.yandexcloud.net/my-bucket/another_folder/some_file_2.csv' -- 'https://storage.yandexcloud.net/my-bucket/another_folder/some_file_3.csv' +- 'https://clickhouse-public-datasets.s3.amazonaws.com/my-bucket/some_folder/some_file_1.csv' +- 'https://clickhouse-public-datasets.s3.amazonaws.com/my-bucket/some_folder/some_file_2.csv' +- 'https://clickhouse-public-datasets.s3.amazonaws.com/my-bucket/some_folder/some_file_3.csv' +- 'https://clickhouse-public-datasets.s3.amazonaws.com/my-bucket/another_folder/some_file_1.csv' +- 'https://clickhouse-public-datasets.s3.amazonaws.com/my-bucket/another_folder/some_file_2.csv' +- 'https://clickhouse-public-datasets.s3.amazonaws.com/my-bucket/another_folder/some_file_3.csv' There are several ways to make a table consisting of all six files: @@ -96,21 +96,21 @@ There are several ways to make a table consisting of all six files: ``` sql CREATE TABLE table_with_range (name String, value UInt32) - ENGINE = S3('https://storage.yandexcloud.net/my-bucket/{some,another}_folder/some_file_{1..3}', 'CSV'); + ENGINE = S3('https://clickhouse-public-datasets.s3.amazonaws.com/my-bucket/{some,another}_folder/some_file_{1..3}', 'CSV'); ``` 2. Take all files with `some_file_` prefix (there should be no extra files with such prefix in both folders): ``` sql CREATE TABLE table_with_question_mark (name String, value UInt32) - ENGINE = S3('https://storage.yandexcloud.net/my-bucket/{some,another}_folder/some_file_?', 'CSV'); + ENGINE = S3('https://clickhouse-public-datasets.s3.amazonaws.com/my-bucket/{some,another}_folder/some_file_?', 'CSV'); ``` 3. Take all the files in both folders (all files should satisfy format and schema described in query): ``` sql CREATE TABLE table_with_asterisk (name String, value UInt32) - ENGINE = S3('https://storage.yandexcloud.net/my-bucket/{some,another}_folder/*', 'CSV'); + ENGINE = S3('https://clickhouse-public-datasets.s3.amazonaws.com/my-bucket/{some,another}_folder/*', 'CSV'); ``` ## S3-related Settings {#settings} @@ -142,7 +142,7 @@ The following settings can be specified in configuration file for given endpoint ``` xml - https://storage.yandexcloud.net/my-test-bucket-768/ + https://clickhouse-public-datasets.s3.amazonaws.com/my-test-bucket-768/ diff --git a/docs/en/engines/table-engines/mergetree-family/custom-partitioning-key.md b/docs/en/engines/table-engines/mergetree-family/custom-partitioning-key.md index 5ac2105e9fd..b58e90a3d92 100644 --- a/docs/en/engines/table-engines/mergetree-family/custom-partitioning-key.md +++ b/docs/en/engines/table-engines/mergetree-family/custom-partitioning-key.md @@ -55,27 +55,28 @@ WHERE table = 'visits' ``` ``` text -┌─partition─┬─name───────────┬─active─┐ -│ 201901 │ 201901_1_3_1 │ 0 │ -│ 201901 │ 201901_1_9_2 │ 1 │ -│ 201901 │ 201901_8_8_0 │ 0 │ -│ 201901 │ 201901_9_9_0 │ 0 │ -│ 201902 │ 201902_4_6_1 │ 1 │ -│ 201902 │ 201902_10_10_0 │ 1 │ -│ 201902 │ 201902_11_11_0 │ 1 │ -└───────────┴────────────────┴────────┘ +┌─partition─┬─name──────────────┬─active─┐ +│ 201901 │ 201901_1_3_1 │ 0 │ +│ 201901 │ 201901_1_9_2_11 │ 1 │ +│ 201901 │ 201901_8_8_0 │ 0 │ +│ 201901 │ 201901_9_9_0 │ 0 │ +│ 201902 │ 201902_4_6_1_11 │ 1 │ +│ 201902 │ 201902_10_10_0_11 │ 1 │ +│ 201902 │ 201902_11_11_0_11 │ 1 │ +└───────────┴───────────────────┴────────┘ ``` The `partition` column contains the names of the partitions. There are two partitions in this example: `201901` and `201902`. You can use this column value to specify the partition name in [ALTER … PARTITION](../../../sql-reference/statements/alter/partition.md) queries. The `name` column contains the names of the partition data parts. You can use this column to specify the name of the part in the [ALTER ATTACH PART](../../../sql-reference/statements/alter/partition.md#alter_attach-partition) query. -Let’s break down the name of the first part: `201901_1_3_1`: +Let’s break down the name of the part: `201901_1_9_2_11`: - `201901` is the partition name. - `1` is the minimum number of the data block. -- `3` is the maximum number of the data block. -- `1` is the chunk level (the depth of the merge tree it is formed from). +- `9` is the maximum number of the data block. +- `2` is the chunk level (the depth of the merge tree it is formed from). +- `11` is the mutation version (if a part mutated) !!! info "Info" The parts of old-type tables have the name: `20190117_20190123_2_2_0` (minimum date - maximum date - minimum block number - maximum block number - level). @@ -89,16 +90,16 @@ OPTIMIZE TABLE visits PARTITION 201902; ``` ``` text -┌─partition─┬─name───────────┬─active─┐ -│ 201901 │ 201901_1_3_1 │ 0 │ -│ 201901 │ 201901_1_9_2 │ 1 │ -│ 201901 │ 201901_8_8_0 │ 0 │ -│ 201901 │ 201901_9_9_0 │ 0 │ -│ 201902 │ 201902_4_6_1 │ 0 │ -│ 201902 │ 201902_4_11_2 │ 1 │ -│ 201902 │ 201902_10_10_0 │ 0 │ -│ 201902 │ 201902_11_11_0 │ 0 │ -└───────────┴────────────────┴────────┘ +┌─partition─┬─name─────────────┬─active─┐ +│ 201901 │ 201901_1_3_1 │ 0 │ +│ 201901 │ 201901_1_9_2_11 │ 1 │ +│ 201901 │ 201901_8_8_0 │ 0 │ +│ 201901 │ 201901_9_9_0 │ 0 │ +│ 201902 │ 201902_4_6_1 │ 0 │ +│ 201902 │ 201902_4_11_2_11 │ 1 │ +│ 201902 │ 201902_10_10_0 │ 0 │ +│ 201902 │ 201902_11_11_0 │ 0 │ +└───────────┴──────────────────┴────────┘ ``` Inactive parts will be deleted approximately 10 minutes after merging. @@ -109,12 +110,12 @@ Another way to view a set of parts and partitions is to go into the directory of /var/lib/clickhouse/data/default/visits$ ls -l total 40 drwxr-xr-x 2 clickhouse clickhouse 4096 Feb 1 16:48 201901_1_3_1 -drwxr-xr-x 2 clickhouse clickhouse 4096 Feb 5 16:17 201901_1_9_2 +drwxr-xr-x 2 clickhouse clickhouse 4096 Feb 5 16:17 201901_1_9_2_11 drwxr-xr-x 2 clickhouse clickhouse 4096 Feb 5 15:52 201901_8_8_0 drwxr-xr-x 2 clickhouse clickhouse 4096 Feb 5 15:52 201901_9_9_0 drwxr-xr-x 2 clickhouse clickhouse 4096 Feb 5 16:17 201902_10_10_0 drwxr-xr-x 2 clickhouse clickhouse 4096 Feb 5 16:17 201902_11_11_0 -drwxr-xr-x 2 clickhouse clickhouse 4096 Feb 5 16:19 201902_4_11_2 +drwxr-xr-x 2 clickhouse clickhouse 4096 Feb 5 16:19 201902_4_11_2_11 drwxr-xr-x 2 clickhouse clickhouse 4096 Feb 5 12:09 201902_4_6_1 drwxr-xr-x 2 clickhouse clickhouse 4096 Feb 1 16:48 detached ``` diff --git a/docs/en/engines/table-engines/mergetree-family/mergetree.md b/docs/en/engines/table-engines/mergetree-family/mergetree.md index 10373761869..a0acda5d5c6 100644 --- a/docs/en/engines/table-engines/mergetree-family/mergetree.md +++ b/docs/en/engines/table-engines/mergetree-family/mergetree.md @@ -802,7 +802,7 @@ Configuration markup: s3 - https://storage.yandexcloud.net/my-bucket/root-path/ + https://clickhouse-public-datasets.s3.amazonaws.com/my-bucket/root-path/ your_access_key_id your_secret_access_key @@ -856,7 +856,7 @@ S3 disk can be configured as `main` or `cold` storage: s3 - https://storage.yandexcloud.net/my-bucket/root-path/ + https://clickhouse-public-datasets.s3.amazonaws.com/my-bucket/root-path/ your_access_key_id your_secret_access_key diff --git a/docs/en/engines/table-engines/mergetree-family/replication.md b/docs/en/engines/table-engines/mergetree-family/replication.md index 7cd58d35362..d574bd9449e 100644 --- a/docs/en/engines/table-engines/mergetree-family/replication.md +++ b/docs/en/engines/table-engines/mergetree-family/replication.md @@ -97,7 +97,7 @@ ZooKeeper is not used in `SELECT` queries because replication does not affect th For each `INSERT` query, approximately ten entries are added to ZooKeeper through several transactions. (To be more precise, this is for each inserted block of data; an INSERT query contains one block or one block per `max_insert_block_size = 1048576` rows.) This leads to slightly longer latencies for `INSERT` compared to non-replicated tables. But if you follow the recommendations to insert data in batches of no more than one `INSERT` per second, it does not create any problems. The entire ClickHouse cluster used for coordinating one ZooKeeper cluster has a total of several hundred `INSERTs` per second. The throughput on data inserts (the number of rows per second) is just as high as for non-replicated data. -For very large clusters, you can use different ZooKeeper clusters for different shards. However, this hasn’t proven necessary on the Yandex.Metrica cluster (approximately 300 servers). +For very large clusters, you can use different ZooKeeper clusters for different shards. However, from our experience this has not proven necessary based on production clusters with approximately 300 servers. Replication is asynchronous and multi-master. `INSERT` queries (as well as `ALTER`) can be sent to any available server. Data is inserted on the server where the query is run, and then it is copied to the other servers. Because it is asynchronous, recently inserted data appears on the other replicas with some latency. If part of the replicas are not available, the data is written when they become available. If a replica is available, the latency is the amount of time it takes to transfer the block of compressed data over the network. The number of threads performing background tasks for replicated tables can be set by [background_schedule_pool_size](../../../operations/settings/settings.md#background_schedule_pool_size) setting. @@ -111,7 +111,7 @@ Data blocks are deduplicated. For multiple writes of the same data block (data b During replication, only the source data to insert is transferred over the network. Further data transformation (merging) is coordinated and performed on all the replicas in the same way. This minimizes network usage, which means that replication works well when replicas reside in different datacenters. (Note that duplicating data in different datacenters is the main goal of replication.) -You can have any number of replicas of the same data. Yandex.Metrica uses double replication in production. Each server uses RAID-5 or RAID-6, and RAID-10 in some cases. This is a relatively reliable and convenient solution. +You can have any number of replicas of the same data. Based on our experiences, a relatively reliable and convenient solution could use double replication in production, with each server using RAID-5 or RAID-6 (and RAID-10 in some cases). The system monitors data synchronicity on replicas and is able to recover after a failure. Failover is automatic (for small differences in data) or semi-automatic (when data differs too much, which may indicate a configuration error). @@ -163,7 +163,7 @@ Example: 05 02 - example05-02-1.yandex.ru + example05-02-1 ``` @@ -172,7 +172,7 @@ In this case, the path consists of the following parts: `/clickhouse/tables/` is the common prefix. We recommend using exactly this one. -`{layer}-{shard}` is the shard identifier. In this example it consists of two parts, since the Yandex.Metrica cluster uses bi-level sharding. For most tasks, you can leave just the {shard} substitution, which will be expanded to the shard identifier. +`{layer}-{shard}` is the shard identifier. In this example it consists of two parts, since the example cluster uses bi-level sharding. For most tasks, you can leave just the {shard} substitution, which will be expanded to the shard identifier. `table_name` is the name of the node for the table in ZooKeeper. It is a good idea to make it the same as the table name. It is defined explicitly, because in contrast to the table name, it does not change after a RENAME query. *HINT*: you could add a database name in front of `table_name` as well. E.g. `db_name.table_name` diff --git a/docs/en/engines/table-engines/special/distributed.md b/docs/en/engines/table-engines/special/distributed.md index 4d2454298f2..5072465687e 100644 --- a/docs/en/engines/table-engines/special/distributed.md +++ b/docs/en/engines/table-engines/special/distributed.md @@ -197,7 +197,7 @@ A simple remainder from the division is a limited solution for sharding and isn You should be concerned about the sharding scheme in the following cases: - Queries are used that require joining data (`IN` or `JOIN`) by a specific key. If data is sharded by this key, you can use local `IN` or `JOIN` instead of `GLOBAL IN` or `GLOBAL JOIN`, which is much more efficient. -- A large number of servers is used (hundreds or more) with a large number of small queries, for example, queries for data of individual clients (e.g. websites, advertisers, or partners). In order for the small queries to not affect the entire cluster, it makes sense to locate data for a single client on a single shard. Alternatively, as we’ve done in Yandex.Metrica, you can set up bi-level sharding: divide the entire cluster into “layers”, where a layer may consist of multiple shards. Data for a single client is located on a single layer, but shards can be added to a layer as necessary, and data is randomly distributed within them. `Distributed` tables are created for each layer, and a single shared distributed table is created for global queries. +- A large number of servers is used (hundreds or more) with a large number of small queries, for example, queries for data of individual clients (e.g. websites, advertisers, or partners). In order for the small queries to not affect the entire cluster, it makes sense to locate data for a single client on a single shard. Alternatively, you can set up bi-level sharding: divide the entire cluster into “layers”, where a layer may consist of multiple shards. Data for a single client is located on a single layer, but shards can be added to a layer as necessary, and data is randomly distributed within them. `Distributed` tables are created for each layer, and a single shared distributed table is created for global queries. Data is written asynchronously. When inserted in the table, the data block is just written to the local file system. The data is sent to the remote servers in the background as soon as possible. The periodicity for sending data is managed by the [distributed_directory_monitor_sleep_time_ms](../../../operations/settings/settings.md#distributed_directory_monitor_sleep_time_ms) and [distributed_directory_monitor_max_sleep_time_ms](../../../operations/settings/settings.md#distributed_directory_monitor_max_sleep_time_ms) settings. The `Distributed` engine sends each file with inserted data separately, but you can enable batch sending of files with the [distributed_directory_monitor_batch_inserts](../../../operations/settings/settings.md#distributed_directory_monitor_batch_inserts) setting. This setting improves cluster performance by better utilizing local server and network resources. You should check whether data is sent successfully by checking the list of files (data waiting to be sent) in the table directory: `/var/lib/clickhouse/data/database/table/`. The number of threads performing background tasks can be set by [background_distributed_schedule_pool_size](../../../operations/settings/settings.md#background_distributed_schedule_pool_size) setting. diff --git a/docs/en/faq/general/columnar-database.md b/docs/en/faq/general/columnar-database.md index cbc5f77d0ba..11bbd2e63f6 100644 --- a/docs/en/faq/general/columnar-database.md +++ b/docs/en/faq/general/columnar-database.md @@ -22,4 +22,4 @@ Here is the illustration of the difference between traditional row-oriented syst **Columnar** ![Columnar](https://clickhouse.com/docs/en/images/column-oriented.gif#) -A columnar database is a preferred choice for analytical applications because it allows to have many columns in a table just in case, but do not pay the cost for unused columns on read query execution time. Column-oriented databases are designed for big data processing because and data warehousing, they often natively scale using distributed clusters of low-cost hardware to increase throughput. ClickHouse does it with combination of [distributed](../../engines/table-engines/special/distributed.md) and [replicated](../../engines/table-engines/mergetree-family/replication.md) tables. +A columnar database is a preferred choice for analytical applications because it allows to have many columns in a table just in case, but do not pay the cost for unused columns on read query execution time. Column-oriented databases are designed for big data processing and data warehousing, because they often natively scale using distributed clusters of low-cost hardware to increase throughput. ClickHouse does it with combination of [distributed](../../engines/table-engines/special/distributed.md) and [replicated](../../engines/table-engines/mergetree-family/replication.md) tables. diff --git a/docs/en/faq/general/mapreduce.md b/docs/en/faq/general/mapreduce.md index 7d25d308d14..30cae65cba2 100644 --- a/docs/en/faq/general/mapreduce.md +++ b/docs/en/faq/general/mapreduce.md @@ -6,7 +6,7 @@ toc_priority: 110 # Why Not Use Something Like MapReduce? {#why-not-use-something-like-mapreduce} -We can refer to systems like MapReduce as distributed computing systems in which the reduce operation is based on distributed sorting. The most common open-source solution in this class is [Apache Hadoop](http://hadoop.apache.org). Yandex uses its in-house solution, YT. +We can refer to systems like MapReduce as distributed computing systems in which the reduce operation is based on distributed sorting. The most common open-source solution in this class is [Apache Hadoop](http://hadoop.apache.org). Large IT companies often have proprietary in-house solutions. These systems aren’t appropriate for online queries due to their high latency. In other words, they can’t be used as the back-end for a web interface. These types of systems aren’t useful for real-time data updates. Distributed sorting isn’t the best way to perform reduce operations if the result of the operation and all the intermediate results (if there are any) are located in the RAM of a single server, which is usually the case for online queries. In such a case, a hash table is an optimal way to perform reduce operations. A common approach to optimizing map-reduce tasks is pre-aggregation (partial reduce) using a hash table in RAM. The user performs this optimization manually. Distributed sorting is one of the main causes of reduced performance when running simple map-reduce tasks. diff --git a/docs/en/faq/general/ne-tormozit.md b/docs/en/faq/general/ne-tormozit.md index 26ae741216d..e8dc7388eff 100644 --- a/docs/en/faq/general/ne-tormozit.md +++ b/docs/en/faq/general/ne-tormozit.md @@ -9,7 +9,7 @@ toc_priority: 11 This question usually arises when people see official ClickHouse t-shirts. They have large words **“ClickHouse не тормозит”** on the front. -Before ClickHouse became open-source, it has been developed as an in-house storage system by the largest Russian IT company, [Yandex](https://yandex.com/company/). That’s why it initially got its slogan in Russian, which is “не тормозит” (pronounced as “ne tormozit”). After the open-source release we first produced some of those t-shirts for events in Russia and it was a no-brainer to use the slogan as-is. +Before ClickHouse became open-source, it has been developed as an in-house storage system by the largest Russian IT company, Yandex. That’s why it initially got its slogan in Russian, which is “не тормозит” (pronounced as “ne tormozit”). After the open-source release we first produced some of those t-shirts for events in Russia and it was a no-brainer to use the slogan as-is. One of the following batches of those t-shirts was supposed to be given away on events outside of Russia and we tried to make the English version of the slogan. Unfortunately, the Russian language is kind of elegant in terms of expressing stuff and there was a restriction of limited space on a t-shirt, so we failed to come up with good enough translation (most options appeared to be either long or inaccurate) and decided to keep the slogan in Russian even on t-shirts produced for international events. It appeared to be a great decision because people all over the world get positively surprised and curious when they see it. diff --git a/docs/en/getting-started/example-datasets/index.md b/docs/en/getting-started/example-datasets/index.md index 6dae6c20073..d4c9bab2441 100644 --- a/docs/en/getting-started/example-datasets/index.md +++ b/docs/en/getting-started/example-datasets/index.md @@ -11,7 +11,7 @@ This section describes how to obtain example datasets and import them into Click The list of documented datasets: - [GitHub Events](../../getting-started/example-datasets/github-events.md) -- [Anonymized Yandex.Metrica Dataset](../../getting-started/example-datasets/metrica.md) +- [Anonymized Web Analytics Dataset](../../getting-started/example-datasets/metrica.md) - [Recipes](../../getting-started/example-datasets/recipes.md) - [Star Schema Benchmark](../../getting-started/example-datasets/star-schema.md) - [WikiStat](../../getting-started/example-datasets/wikistat.md) diff --git a/docs/en/getting-started/example-datasets/metrica.md b/docs/en/getting-started/example-datasets/metrica.md index 483220d12ee..d9d8beb0181 100644 --- a/docs/en/getting-started/example-datasets/metrica.md +++ b/docs/en/getting-started/example-datasets/metrica.md @@ -1,11 +1,11 @@ --- toc_priority: 15 -toc_title: Yandex.Metrica Data +toc_title: Web Analytics Data --- -# Anonymized Yandex.Metrica Data {#anonymized-yandex-metrica-data} +# Anonymized Web Analytics Data {#anonymized-web-analytics-data} -Dataset consists of two tables containing anonymized data about hits (`hits_v1`) and visits (`visits_v1`) of Yandex.Metrica. You can read more about Yandex.Metrica in [ClickHouse history](../../introduction/history.md) section. +Dataset consists of two tables containing anonymized web analytics data with hits (`hits_v1`) and visits (`visits_v1`). The dataset consists of two tables, either of them can be downloaded as a compressed `tsv.xz` file or as prepared partitions. In addition to that, an extended version of the `hits` table containing 100 million rows is available as TSV at https://datasets.clickhouse.com/hits/tsv/hits_100m_obfuscated_v1.tsv.xz and as prepared partitions at https://datasets.clickhouse.com/hits/partitions/hits_100m_obfuscated_v1.tar.xz. @@ -73,6 +73,6 @@ clickhouse-client --query "SELECT COUNT(*) FROM datasets.visits_v1" ## Example Queries {#example-queries} -[ClickHouse tutorial](../../getting-started/tutorial.md) is based on Yandex.Metrica dataset and the recommended way to get started with this dataset is to just go through tutorial. +[The ClickHouse tutorial](../../getting-started/tutorial.md) is based on this web analytics dataset, and the recommended way to get started with this dataset is to go through the tutorial. Additional examples of queries to these tables can be found among [stateful tests](https://github.com/ClickHouse/ClickHouse/tree/master/tests/queries/1_stateful) of ClickHouse (they are named `test.hits` and `test.visits` there). diff --git a/docs/en/getting-started/example-datasets/nyc-taxi.md b/docs/en/getting-started/example-datasets/nyc-taxi.md index 64810d3fa37..a7825988695 100644 --- a/docs/en/getting-started/example-datasets/nyc-taxi.md +++ b/docs/en/getting-started/example-datasets/nyc-taxi.md @@ -375,7 +375,7 @@ Q3: 0.051 sec. Q4: 0.072 sec. In this case, the query processing time is determined above all by network latency. -We ran queries using a client located in a Yandex datacenter in Finland on a cluster in Russia, which added about 20 ms of latency. +We ran queries using a client located in a different datacenter than where the cluster was located, which added about 20 ms of latency. ## Summary {#summary} diff --git a/docs/en/getting-started/install.md b/docs/en/getting-started/install.md index c03daf45b02..cd734d4dc8b 100644 --- a/docs/en/getting-started/install.md +++ b/docs/en/getting-started/install.md @@ -27,9 +27,17 @@ It is recommended to use official pre-compiled `deb` packages for Debian or Ubun {% include 'install/deb.sh' %} ``` +
+ +Deprecated Method for installing deb-packages +``` bash +{% include 'install/deb_repo.sh' %} +``` +
+ You can replace `stable` with `lts` or `testing` to use different [release trains](../faq/operations/production.md) based on your needs. -You can also download and install packages manually from [here](https://repo.clickhouse.com/deb/stable/main/). +You can also download and install packages manually from [here](https://packages.clickhouse.com/deb/pool/stable). #### Packages {#packages} @@ -49,11 +57,17 @@ It is recommended to use official pre-compiled `rpm` packages for CentOS, RedHat First, you need to add the official repository: ``` bash -sudo yum install yum-utils -sudo rpm --import https://repo.clickhouse.com/CLICKHOUSE-KEY.GPG -sudo yum-config-manager --add-repo https://repo.clickhouse.com/rpm/stable/x86_64 +{% include 'install/rpm.sh' %} ``` +
+ +Deprecated Method for installing rpm-packages +``` bash +{% include 'install/rpm_repo.sh' %} +``` +
+ If you want to use the most recent version, replace `stable` with `testing` (this is recommended for your testing environments). `prestable` is sometimes also available. Then run these commands to install packages: @@ -62,36 +76,27 @@ Then run these commands to install packages: sudo yum install clickhouse-server clickhouse-client ``` -You can also download and install packages manually from [here](https://repo.clickhouse.com/rpm/stable/x86_64). +You can also download and install packages manually from [here](https://packages.clickhouse.com/rpm/stable). ### From Tgz Archives {#from-tgz-archives} It is recommended to use official pre-compiled `tgz` archives for all Linux distributions, where installation of `deb` or `rpm` packages is not possible. -The required version can be downloaded with `curl` or `wget` from repository https://repo.clickhouse.com/tgz/. +The required version can be downloaded with `curl` or `wget` from repository https://packages.clickhouse.com/tgz/. After that downloaded archives should be unpacked and installed with installation scripts. Example for the latest stable version: ``` bash -export LATEST_VERSION=`curl https://api.github.com/repos/ClickHouse/ClickHouse/tags 2>/dev/null | grep stable | grep -Eo '[0-9]+\.[0-9]+\.[0-9]+\.[0-9]+' | head -n 1` -curl -O https://repo.clickhouse.com/tgz/stable/clickhouse-common-static-$LATEST_VERSION.tgz -curl -O https://repo.clickhouse.com/tgz/stable/clickhouse-common-static-dbg-$LATEST_VERSION.tgz -curl -O https://repo.clickhouse.com/tgz/stable/clickhouse-server-$LATEST_VERSION.tgz -curl -O https://repo.clickhouse.com/tgz/stable/clickhouse-client-$LATEST_VERSION.tgz - -tar -xzvf clickhouse-common-static-$LATEST_VERSION.tgz -sudo clickhouse-common-static-$LATEST_VERSION/install/doinst.sh - -tar -xzvf clickhouse-common-static-dbg-$LATEST_VERSION.tgz -sudo clickhouse-common-static-dbg-$LATEST_VERSION/install/doinst.sh - -tar -xzvf clickhouse-server-$LATEST_VERSION.tgz -sudo clickhouse-server-$LATEST_VERSION/install/doinst.sh -sudo /etc/init.d/clickhouse-server start - -tar -xzvf clickhouse-client-$LATEST_VERSION.tgz -sudo clickhouse-client-$LATEST_VERSION/install/doinst.sh +{% include 'install/tgz.sh' %} ``` +
+ +Deprecated Method for installing tgz archives +``` bash +{% include 'install/tgz_repo.sh' %} +``` +
+ For production environments, it’s recommended to use the latest `stable`-version. You can find its number on GitHub page https://github.com/ClickHouse/ClickHouse/tags with postfix `-stable`. ### From Docker Image {#from-docker-image} @@ -215,6 +220,6 @@ SELECT 1 **Congratulations, the system works!** -To continue experimenting, you can download one of the test data sets or go through [tutorial](https://clickhouse.com/tutorial.html). +To continue experimenting, you can download one of the test data sets or go through [tutorial](./tutorial.md). [Original article](https://clickhouse.com/docs/en/getting_started/install/) diff --git a/docs/en/getting-started/playground.md b/docs/en/getting-started/playground.md index 90e3eedb764..6c44f250242 100644 --- a/docs/en/getting-started/playground.md +++ b/docs/en/getting-started/playground.md @@ -5,11 +5,12 @@ toc_title: Playground # ClickHouse Playground {#clickhouse-playground} +!!! warning "Warning" + This service is deprecated and will be replaced in foreseeable future. + [ClickHouse Playground](https://play.clickhouse.com) allows people to experiment with ClickHouse by running queries instantly, without setting up their server or cluster. Several example datasets are available in Playground as well as sample queries that show ClickHouse features. There’s also a selection of ClickHouse LTS releases to experiment with. -ClickHouse Playground gives the experience of m2.small [Managed Service for ClickHouse](https://cloud.yandex.com/services/managed-clickhouse) instance (4 vCPU, 32 GB RAM) hosted in [Yandex.Cloud](https://cloud.yandex.com/). More information about [cloud providers](../commercial/cloud.md). - You can make queries to Playground using any HTTP client, for example [curl](https://curl.haxx.se) or [wget](https://www.gnu.org/software/wget/), or set up a connection using [JDBC](../interfaces/jdbc.md) or [ODBC](../interfaces/odbc.md) drivers. More information about software products that support ClickHouse is available [here](../interfaces/index.md). ## Credentials {#credentials} @@ -56,11 +57,3 @@ TCP endpoint example with [CLI](../interfaces/cli.md): ``` bash clickhouse client --secure -h play-api.clickhouse.com --port 9440 -u playground --password clickhouse -q "SELECT 'Play ClickHouse\!'" ``` - -## Implementation Details {#implementation-details} - -ClickHouse Playground web interface makes requests via ClickHouse [HTTP API](../interfaces/http.md). -The Playground backend is just a ClickHouse cluster without any additional server-side application. As mentioned above, ClickHouse HTTPS and TCP/TLS endpoints are also publicly available as a part of the Playground, both are proxied through [Cloudflare Spectrum](https://www.cloudflare.com/products/cloudflare-spectrum/) to add an extra layer of protection and improved global connectivity. - -!!! warning "Warning" - Exposing the ClickHouse server to the public internet in any other situation is **strongly not recommended**. Make sure it listens only on a private network and is covered by a properly configured firewall. diff --git a/docs/en/getting-started/tutorial.md b/docs/en/getting-started/tutorial.md index e08b319f2a4..9f43cc8769d 100644 --- a/docs/en/getting-started/tutorial.md +++ b/docs/en/getting-started/tutorial.md @@ -80,7 +80,7 @@ clickhouse-client --query='INSERT INTO table FORMAT TabSeparated' < data.tsv ## Import Sample Dataset {#import-sample-dataset} -Now it’s time to fill our ClickHouse server with some sample data. In this tutorial, we’ll use the anonymized data of Yandex.Metrica, the first service that runs ClickHouse in production way before it became open-source (more on that in [history section](../introduction/history.md)). There are [multiple ways to import Yandex.Metrica dataset](../getting-started/example-datasets/metrica.md), and for the sake of the tutorial, we’ll go with the most realistic one. +Now it’s time to fill our ClickHouse server with some sample data. In this tutorial, we’ll use some anonymized web analytics data. There are [multiple ways to import the dataset](../getting-started/example-datasets/metrica.md), and for the sake of the tutorial, we’ll go with the most realistic one. ### Download and Extract Table Data {#download-and-extract-table-data} @@ -105,7 +105,7 @@ Syntax for creating tables is way more complicated compared to databases (see [r 2. Table schema, i.e. list of columns and their [data types](../sql-reference/data-types/index.md). 3. [Table engine](../engines/table-engines/index.md) and its settings, which determines all the details on how queries to this table will be physically executed. -Yandex.Metrica is a web analytics service, and sample dataset does not cover its full functionality, so there are only two tables to create: +There are two tables to create: - `hits` is a table with each action done by all users on all websites covered by the service. - `visits` is a table that contains pre-built sessions instead of individual actions. @@ -533,19 +533,19 @@ Example config for a cluster with three shards, one replica each: - example-perftest01j.yandex.ru + example-perftest01j 9000 - example-perftest02j.yandex.ru + example-perftest02j 9000 - example-perftest03j.yandex.ru + example-perftest03j 9000 @@ -591,15 +591,15 @@ Example config for a cluster of one shard containing three replicas: - example-perftest01j.yandex.ru + example-perftest01j 9000 - example-perftest02j.yandex.ru + example-perftest02j 9000 - example-perftest03j.yandex.ru + example-perftest03j 9000 @@ -617,15 +617,15 @@ ZooKeeper locations are specified in the configuration file: ``` xml - zoo01.yandex.ru + zoo01 2181 - zoo02.yandex.ru + zoo02 2181 - zoo03.yandex.ru + zoo03 2181 diff --git a/docs/en/guides/apply-catboost-model.md b/docs/en/guides/apply-catboost-model.md index 9fd48fcc62d..859703a31df 100644 --- a/docs/en/guides/apply-catboost-model.md +++ b/docs/en/guides/apply-catboost-model.md @@ -5,7 +5,7 @@ toc_title: Applying CatBoost Models # Applying a Catboost Model in ClickHouse {#applying-catboost-model-in-clickhouse} -[CatBoost](https://catboost.ai) is a free and open-source gradient boosting library developed at [Yandex](https://yandex.com/company/) for machine learning. +[CatBoost](https://catboost.ai) is a free and open-source gradient boosting library developed at Yandex for machine learning. With this instruction, you will learn to apply pre-trained models in ClickHouse by running model inference from SQL. diff --git a/docs/en/interfaces/formats.md b/docs/en/interfaces/formats.md index f266d0e6354..058c9b6fd4a 100644 --- a/docs/en/interfaces/formats.md +++ b/docs/en/interfaces/formats.md @@ -300,7 +300,7 @@ Result: Search phrase Count 8267016 bathroom interior design 2166 - yandex 1655 + clickhouse 1655 spring 2014 fashion 1549 freeform photos 1480 @@ -371,7 +371,7 @@ Similar to TabSeparated, but outputs a value in name=value format. Names are esc ``` text SearchPhrase= count()=8267016 SearchPhrase=bathroom interior design count()=2166 -SearchPhrase=yandex count()=1655 +SearchPhrase=clickhouse count()=1655 SearchPhrase=2014 spring fashion count()=1549 SearchPhrase=freeform photos count()=1480 SearchPhrase=angelina jolie count()=1245 @@ -1060,7 +1060,7 @@ XML format is suitable only for output, not for parsing. Example: 2166 - yandex + clickhouse 1655 diff --git a/docs/en/interfaces/index.md b/docs/en/interfaces/index.md index 7b73cec22a0..e747b93a1a6 100644 --- a/docs/en/interfaces/index.md +++ b/docs/en/interfaces/index.md @@ -12,7 +12,7 @@ ClickHouse provides three network interfaces (they can be optionally wrapped in - [Native TCP](../interfaces/tcp.md), which has less overhead. - [gRPC](grpc.md). -In most cases it is recommended to use appropriate tool or library instead of interacting with those directly. Officially supported by Yandex are the following: +In most cases it is recommended to use an appropriate tool or library instead of interacting with those directly. The following are officially supported by ClickHouse: - [Command-line client](../interfaces/cli.md) - [JDBC driver](../interfaces/jdbc.md) diff --git a/docs/en/interfaces/third-party/integrations.md b/docs/en/interfaces/third-party/integrations.md index 4a4eee770dc..3aac78f0878 100644 --- a/docs/en/interfaces/third-party/integrations.md +++ b/docs/en/interfaces/third-party/integrations.md @@ -6,7 +6,7 @@ toc_title: Integrations # Integration Libraries from Third-party Developers {#integration-libraries-from-third-party-developers} !!! warning "Disclaimer" - Yandex does **not** maintain the tools and libraries listed below and haven’t done any extensive testing to ensure their quality. + ClickHouse, Inc. does **not** maintain the tools and libraries listed below and haven’t done extensive testing to ensure their quality. ## Infrastructure Products {#infrastructure-products} diff --git a/docs/en/introduction/performance.md b/docs/en/introduction/performance.md index 6ae37086181..684b4ee4179 100644 --- a/docs/en/introduction/performance.md +++ b/docs/en/introduction/performance.md @@ -5,7 +5,7 @@ toc_title: Performance # Performance {#performance} -According to internal testing results at Yandex, ClickHouse shows the best performance (both the highest throughput for long queries and the lowest latency on short queries) for comparable operating scenarios among systems of its class that were available for testing. You can view the test results on a [separate page](https://clickhouse.com/benchmark/dbms/). +ClickHouse shows the best performance (both the highest throughput for long queries and the lowest latency on short queries) for comparable operating scenarios among systems of its class that were available for testing. You can view the test results on a [separate page](https://clickhouse.com/benchmark/dbms/). Numerous independent benchmarks came to similar conclusions. They are not difficult to find using an internet search, or you can see [our small collection of related links](https://clickhouse.com/#independent-benchmarks). diff --git a/docs/en/operations/named-collections.md b/docs/en/operations/named-collections.md new file mode 100644 index 00000000000..dce7938f98b --- /dev/null +++ b/docs/en/operations/named-collections.md @@ -0,0 +1,229 @@ +--- +toc_priority: 69 +toc_title: "Named connections" +--- + +# Storing details for connecting to external sources in configuration files {#named-collections} + +Details for connecting to external sources (dictionaries, tables, table functions) can be saved +in configuration files and thus simplify the creation of objects and hide credentials +from users with only SQL access. + +Parameters can be set in XML `CSV` and overridden in SQL `, format = 'TSV'`. +The parameters in SQL can be overridden using format `key` = `value`: `compression_method = 'gzip'`. + +Named connections are stored in the `config.xml` file of the ClickHouse server in the `` section and are applied when ClickHouse starts. + +Example of configuration: +```xml +$ cat /etc/clickhouse-server/config.d/named_collections.xml + + + ... + + +``` + +## Named connections for accessing S3. + +The description of parameters see [s3 Table Function](../sql-reference/table-functions/s3.md). + +Example of configuration: +```xml + + + + AKIAIOSFODNN7EXAMPLE + wJalrXUtnFEMI/K7MDENG/bPxRfiCYEXAMPLEKEY + CSV + + + +``` + +### Example of using named connections with the s3 function + +```sql +INSERT INTO FUNCTION s3(s3_mydata, url = 'https://s3.us-east-1.amazonaws.com/yourbucket/mydata/test_file.tsv.gz', + format = 'TSV', structure = 'number UInt64', compression_method = 'gzip') +SELECT * FROM numbers(10000); + +SELECT count() +FROM s3(s3_mydata, url = 'https://s3.us-east-1.amazonaws.com/yourbucket/mydata/test_file.tsv.gz') + +┌─count()─┐ +│ 10000 │ +└─────────┘ +1 rows in set. Elapsed: 0.279 sec. Processed 10.00 thousand rows, 90.00 KB (35.78 thousand rows/s., 322.02 KB/s.) +``` + +### Example of using named connections with an S3 table + +```sql +CREATE TABLE s3_engine_table (number Int64) +ENGINE=S3(s3_mydata, url='https://s3.us-east-1.amazonaws.com/yourbucket/mydata/test_file.tsv.gz', format = 'TSV') +SETTINGS input_format_with_names_use_header = 0; + +SELECT * FROM s3_engine_table LIMIT 3; +┌─number─┐ +│ 0 │ +│ 1 │ +│ 2 │ +└────────┘ +``` + +## Named connections for accessing MySQL database + +The description of parameters see [mysql](../sql-reference/table-functions/mysql.md). + +Example of configuration: +```xml + + + + myuser + mypass + 127.0.0.1 + 3306 + test + 8 + 1 + 1 + + + +``` + +### Example of using named connections with the mysql function + +```sql +SELECT count() FROM mysql(mymysql, table = 'test'); + +┌─count()─┐ +│ 3 │ +└─────────┘ +``` + +### Example of using named connections with an MySQL table + +```sql +CREATE TABLE mytable(A Int64) ENGINE = MySQL(mymysql, table = 'test', connection_pool_size=3, replace_query=0); +SELECT count() FROM mytable; + +┌─count()─┐ +│ 3 │ +└─────────┘ +``` + +### Example of using named connections with database with engine MySQL + +```sql +CREATE DATABASE mydatabase ENGINE = MySQL(mymysql); + +SHOW TABLES FROM mydatabase; + +┌─name───┐ +│ source │ +│ test │ +└────────┘ +``` + +### Example of using named connections with an external dictionary with source MySQL + +```sql +CREATE DICTIONARY dict (A Int64, B String) +PRIMARY KEY A +SOURCE(MYSQL(NAME mymysql TABLE 'source')) +LIFETIME(MIN 1 MAX 2) +LAYOUT(HASHED()); + +SELECT dictGet('dict', 'B', 2); + +┌─dictGet('dict', 'B', 2)─┐ +│ two │ +└─────────────────────────┘ +``` + +## Named connections for accessing PostgreSQL database + +The description of parameters see [postgresql](../sql-reference/table-functions/postgresql.md). + +Example of configuration: +```xml + + + + pguser + jw8s0F4 + 127.0.0.1 + 5432 + test + test_schema + 8 + + + +``` + +### Example of using named connections with the postgresql function + +```sql +SELECT * FROM postgresql(mypg, table = 'test'); + +┌─a─┬─b───┐ +│ 2 │ two │ +│ 1 │ one │ +└───┴─────┘ + + +SELECT * FROM postgresql(mypg, table = 'test', schema = 'public'); + +┌─a─┐ +│ 1 │ +│ 2 │ +│ 3 │ +└───┘ +``` + + +### Example of using named connections with database with engine PostgreSQL + +```sql +CREATE TABLE mypgtable (a Int64) ENGINE = PostgreSQL(mypg, table = 'test', schema = 'public'); + +SELECT * FROM mypgtable; + +┌─a─┐ +│ 1 │ +│ 2 │ +│ 3 │ +└───┘ +``` + +### Example of using named connections with database with engine PostgreSQL + +```sql +CREATE DATABASE mydatabase ENGINE = PostgreSQL(mypg); + +SHOW TABLES FROM mydatabase + +┌─name─┐ +│ test │ +└──────┘ +``` + +### Example of using named connections with an external dictionary with source POSTGRESQL + +```sql +CREATE DICTIONARY dict (a Int64, b String) +PRIMARY KEY a +SOURCE(POSTGRESQL(NAME mypg TABLE test)) +LIFETIME(MIN 1 MAX 2) +LAYOUT(HASHED()); + +SELECT dictGet('dict', 'b', 2); + +┌─dictGet('dict', 'b', 2)─┐ +│ two │ +└─────────────────────────┘ +``` diff --git a/docs/en/operations/performance-test.md b/docs/en/operations/performance-test.md index a220575cb3c..e410b1b2dfd 100644 --- a/docs/en/operations/performance-test.md +++ b/docs/en/operations/performance-test.md @@ -38,6 +38,18 @@ Alternatively you can perform benchmark in the following steps. wget https://builds.clickhouse.com/master/amd64/clickhouse # For aarch64: wget https://builds.clickhouse.com/master/aarch64/clickhouse +# For powerpc64le: +wget https://builds.clickhouse.com/master/powerpc64le/clickhouse +# For freebsd: +wget https://builds.clickhouse.com/master/freebsd/clickhouse +# For freebsd-aarch64: +wget https://builds.clickhouse.com/master/freebsd-aarch64/clickhouse +# For freebsd-powerpc64le: +wget https://builds.clickhouse.com/master/freebsd-powerpc64le/clickhouse +# For macos: +wget https://builds.clickhouse.com/master/macos/clickhouse +# For macos-aarch64: +wget https://builds.clickhouse.com/master/macos-aarch64/clickhouse # Then do: chmod a+x clickhouse ``` @@ -47,7 +59,7 @@ wget https://raw.githubusercontent.com/ClickHouse/ClickHouse/master/benchmark/cl chmod a+x benchmark-new.sh wget https://raw.githubusercontent.com/ClickHouse/ClickHouse/master/benchmark/clickhouse/queries.sql ``` -3. Download test data according to the [Yandex.Metrica dataset](../getting-started/example-datasets/metrica.md) instruction (“hits” table containing 100 million rows). +3. Download the [web analytics dataset](../getting-started/example-datasets/metrica.md) (“hits” table containing 100 million rows). ```bash wget https://datasets.clickhouse.com/hits/partitions/hits_100m_obfuscated_v1.tar.xz tar xvf hits_100m_obfuscated_v1.tar.xz -C . @@ -66,6 +78,6 @@ mv hits_100m_obfuscated_v1/* . ```bash ./benchmark-new.sh hits_100m_obfuscated ``` -7. Send the numbers and the info about your hardware configuration to clickhouse-feedback@yandex-team.com +7. Send the numbers and the info about your hardware configuration to feedback@clickhouse.com All the results are published here: https://clickhouse.com/benchmark/hardware/ diff --git a/docs/en/operations/quotas.md b/docs/en/operations/quotas.md index eec8961b595..6c6fbbf9cfb 100644 --- a/docs/en/operations/quotas.md +++ b/docs/en/operations/quotas.md @@ -101,7 +101,7 @@ Quotas can use the “quota key” feature to report on resources for multiple k diff --git a/docs/ja/getting-started/playground.md b/docs/ja/getting-started/playground.md index 905a26d6570..4e35096aa4b 100644 --- a/docs/ja/getting-started/playground.md +++ b/docs/ja/getting-started/playground.md @@ -5,11 +5,12 @@ toc_title: Playground # ClickHouse Playground {#clickhouse-playground} +!!! warning "Warning" + This service is deprecated and will be replaced in foreseeable future. + [ClickHouse Playground](https://play.clickhouse.com) では、サーバーやクラスタを設定することなく、即座にクエリを実行して ClickHouse を試すことができます。 いくつかの例のデータセットは、Playground だけでなく、ClickHouse の機能を示すサンプルクエリとして利用可能です. また、 ClickHouse の LTS リリースで試すこともできます。 -ClickHouse Playground は、[Yandex.Cloud](https://cloud.yandex.com/)にホストされている m2.small [Managed Service for ClickHouse](https://cloud.yandex.com/services/managed-clickhouse) インスタンス(4 vCPU, 32 GB RAM) で提供されています。クラウドプロバイダの詳細情報については[こちら](../commercial/cloud.md)。 - 任意の HTTP クライアントを使用してプレイグラウンドへのクエリを作成することができます。例えば[curl](https://curl.haxx.se)、[wget](https://www.gnu.org/software/wget/)、[JDBC](../interfaces/jdbc.md)または[ODBC](../interfaces/odbc.md)ドライバを使用して接続を設定します。 ClickHouse をサポートするソフトウェア製品の詳細情報は[こちら](../interfaces/index.md)をご覧ください。 @@ -59,14 +60,3 @@ curl "https://play-api.clickhouse.com:8443/?query=SELECT+'Play+ClickHouse\!';&us ``` bash clickhouse client --secure -h play-api.clickhouse.com --port 9440 -u playground --password clickhouse -q "SELECT 'Play ClickHouse\!'" ``` - -## 実装の詳細 {#implementation-details} - -ClickHouse PlaygroundのWebインタフェースは、ClickHouse [HTTP API](../interfaces/http.md)を介してリクエストを行います。 -Playgroundのバックエンドは、追加のサーバーサイドのアプリケーションを伴わない、ただのClickHouseクラスタです。 -上記のように, ClickHouse HTTPSとTCP/TLSのエンドポイントは Playground の一部としても公開されており、 -いずれも、上記の保護とよりよいグローバルな接続のためのレイヤを追加するために、[Cloudflare Spectrum](https://www.cloudflare.com/products/cloudflare-spectrum/) を介してプロキシされています。 - -!!! warning "注意" - いかなる場合においても、インターネットにClickHouseサーバを公開することは **非推奨です**。 - プライベートネットワーク上でのみ接続を待機し、適切に設定されたファイアウォールによって保護されていることを確認してください。 diff --git a/docs/ja/getting-started/tutorial.md b/docs/ja/getting-started/tutorial.md index f80343cbad6..69ea68956e1 100644 --- a/docs/ja/getting-started/tutorial.md +++ b/docs/ja/getting-started/tutorial.md @@ -547,19 +547,19 @@ ClickHouseクラスタは均質なクラスタ(homogenous cluster)です。セ - example-perftest01j.yandex.ru + example-perftest01j 9000 - example-perftest02j.yandex.ru + example-perftest02j 9000 - example-perftest03j.yandex.ru + example-perftest03j 9000 @@ -607,15 +607,15 @@ INSERT INTO tutorial.hits_all SELECT * FROM tutorial.hits_v1; - example-perftest01j.yandex.ru + example-perftest01j 9000 - example-perftest02j.yandex.ru + example-perftest02j 9000 - example-perftest03j.yandex.ru + example-perftest03j 9000 @@ -637,15 +637,15 @@ ZooKeeperの場所は設定ファイルで指定します: ``` xml - zoo01.yandex.ru + zoo01 2181 - zoo02.yandex.ru + zoo02 2181 - zoo03.yandex.ru + zoo03 2181 diff --git a/docs/ja/interfaces/third-party/integrations.md b/docs/ja/interfaces/third-party/integrations.md index 58ff96c728d..eaa77df681f 100644 --- a/docs/ja/interfaces/third-party/integrations.md +++ b/docs/ja/interfaces/third-party/integrations.md @@ -8,7 +8,7 @@ toc_title: "\u7D71\u5408" # サードパーティ開発者からの統合ライブラリ {#integration-libraries-from-third-party-developers} !!! warning "免責事項" - Yandexのは **ない** 以下のツールとライブラリを維持し、その品質を確保するための広範なテストを行っていません。 + ClickHouse, Inc.のは **ない** 以下のツールとライブラリを維持し、その品質を確保するための広範なテストを行っていません。 ## インフラ製品 {#infrastructure-products} diff --git a/docs/ja/operations/performance-test.md b/docs/ja/operations/performance-test.md index 068eb4fbc04..8c05acaf60b 100644 --- a/docs/ja/operations/performance-test.md +++ b/docs/ja/operations/performance-test.md @@ -20,9 +20,21 @@ toc_title: "\u30CF\u30FC\u30C9\u30A6\u30A7\u30A2\u8A66\u9A13" # For amd64: - wget https://clickhouse-builds.s3.yandex.net/0/00ba767f5d2a929394ea3be193b1f79074a1c4bc/1578163263_binary/clickhouse + wget https://builds.clickhouse.com/master/amd64/clickhouse # For aarch64: - wget https://clickhouse-builds.s3.yandex.net/0/00ba767f5d2a929394ea3be193b1f79074a1c4bc/1578161264_binary/clickhouse + wget https://builds.clickhouse.com/master/aarch64/clickhouse + # For powerpc64le: + wget https://builds.clickhouse.com/master/powerpc64le/clickhouse + # For freebsd: + wget https://builds.clickhouse.com/master/freebsd/clickhouse + # For freebsd-aarch64: + wget https://builds.clickhouse.com/master/freebsd-aarch64/clickhouse + # For freebsd-powerpc64le: + wget https://builds.clickhouse.com/master/freebsd-powerpc64le/clickhouse + # For macos: + wget https://builds.clickhouse.com/master/macos/clickhouse + # For macos-aarch64: + wget https://builds.clickhouse.com/master/macos-aarch64/clickhouse # Then do: chmod a+x clickhouse diff --git a/docs/ja/operations/server-configuration-parameters/settings.md b/docs/ja/operations/server-configuration-parameters/settings.md index 7e0d5ebcc22..3e5a643eb6a 100644 --- a/docs/ja/operations/server-configuration-parameters/settings.md +++ b/docs/ja/operations/server-configuration-parameters/settings.md @@ -694,7 +694,7 @@ UTCタイムゾーンまたは地理的位置(たとえば、Africa/Abidjan)のI **例** ``` xml -Europe/Moscow +Asia/Istanbul ``` ## tcp_port {#server_configuration_parameters-tcp_port} diff --git a/docs/ja/sql-reference/data-types/datetime.md b/docs/ja/sql-reference/data-types/datetime.md index 5db7288634f..01f5e76762c 100644 --- a/docs/ja/sql-reference/data-types/datetime.md +++ b/docs/ja/sql-reference/data-types/datetime.md @@ -40,7 +40,7 @@ ClickHouseにデータを挿入するときは、データの値に応じて、 ``` sql CREATE TABLE dt ( - `timestamp` DateTime('Europe/Moscow'), + `timestamp` DateTime('Asia/Istanbul'), `event_id` UInt8 ) ENGINE = TinyLog; @@ -61,13 +61,13 @@ SELECT * FROM dt; └─────────────────────┴──────────┘ ``` -- Datetimeを整数として挿入する場合は、Unix Timestamp(UTC)として扱われます。 `1546300800` を表す `'2019-01-01 00:00:00'` UTC しかし、 `timestamp` 列は `Europe/Moscow` (UTC+3)タイムゾーンが指定されている場合、文字列として出力すると、値は次のように表示されます `'2019-01-01 03:00:00'` -- 文字列値をdatetimeとして挿入すると、列タイムゾーンにあるものとして扱われます。 `'2019-01-01 00:00:00'` であるとして扱われます `Europe/Moscow` タイムゾーンとして保存 `1546290000`. +- Datetimeを整数として挿入する場合は、Unix Timestamp(UTC)として扱われます。 `1546300800` を表す `'2019-01-01 00:00:00'` UTC しかし、 `timestamp` 列は `Asia/Istanbul` (UTC+3)タイムゾーンが指定されている場合、文字列として出力すると、値は次のように表示されます `'2019-01-01 03:00:00'` +- 文字列値をdatetimeとして挿入すると、列タイムゾーンにあるものとして扱われます。 `'2019-01-01 00:00:00'` であるとして扱われます `Asia/Istanbul` タイムゾーンとして保存 `1546290000`. **2.** フィルタリング `DateTime` 値 ``` sql -SELECT * FROM dt WHERE timestamp = toDateTime('2019-01-01 00:00:00', 'Europe/Moscow') +SELECT * FROM dt WHERE timestamp = toDateTime('2019-01-01 00:00:00', 'Asia/Istanbul') ``` ``` text @@ -91,12 +91,12 @@ SELECT * FROM dt WHERE timestamp = '2019-01-01 00:00:00' **3.** Aのタイムゾーンの取得 `DateTime`-タイプ列: ``` sql -SELECT toDateTime(now(), 'Europe/Moscow') AS column, toTypeName(column) AS x +SELECT toDateTime(now(), 'Asia/Istanbul') AS column, toTypeName(column) AS x ``` ``` text ┌──────────────column─┬─x─────────────────────────┐ -│ 2019-10-16 04:12:04 │ DateTime('Europe/Moscow') │ +│ 2019-10-16 04:12:04 │ DateTime('Asia/Istanbul') │ └─────────────────────┴───────────────────────────┘ ``` @@ -105,7 +105,7 @@ SELECT toDateTime(now(), 'Europe/Moscow') AS column, toTypeName(column) AS x ``` sql SELECT toDateTime(timestamp, 'Europe/London') as lon_time, -toDateTime(timestamp, 'Europe/Moscow') as mos_time +toDateTime(timestamp, 'Asia/Istanbul') as mos_time FROM dt ``` diff --git a/docs/ja/sql-reference/data-types/datetime64.md b/docs/ja/sql-reference/data-types/datetime64.md index 7d1e560654d..ff575c3d493 100644 --- a/docs/ja/sql-reference/data-types/datetime64.md +++ b/docs/ja/sql-reference/data-types/datetime64.md @@ -28,7 +28,7 @@ DateTime64(precision, [timezone]) ``` sql CREATE TABLE dt ( - `timestamp` DateTime64(3, 'Europe/Moscow'), + `timestamp` DateTime64(3, 'Asia/Istanbul'), `event_id` UInt8 ) ENGINE = TinyLog @@ -49,13 +49,13 @@ SELECT * FROM dt └─────────────────────────┴──────────┘ ``` -- Datetimeを整数として挿入する場合、適切にスケーリングされたUnixタイムスタンプ(UTC)として扱われます。 `1546300800000` (精度3で)を表します `'2019-01-01 00:00:00'` UTC しかし、 `timestamp` 列は `Europe/Moscow` (UTC+3)タイムゾーンが指定されている場合、文字列として出力すると、値は次のように表示されます `'2019-01-01 03:00:00'` -- 文字列値をdatetimeとして挿入すると、列タイムゾーンにあるものとして扱われます。 `'2019-01-01 00:00:00'` であるとして扱われます `Europe/Moscow` タイムゾーンとして保存 `1546290000000`. +- Datetimeを整数として挿入する場合、適切にスケーリングされたUnixタイムスタンプ(UTC)として扱われます。 `1546300800000` (精度3で)を表します `'2019-01-01 00:00:00'` UTC しかし、 `timestamp` 列は `Asia/Istanbul` (UTC+3)タイムゾーンが指定されている場合、文字列として出力すると、値は次のように表示されます `'2019-01-01 03:00:00'` +- 文字列値をdatetimeとして挿入すると、列タイムゾーンにあるものとして扱われます。 `'2019-01-01 00:00:00'` であるとして扱われます `Asia/Istanbul` タイムゾーンとして保存 `1546290000000`. **2.** フィルタリング `DateTime64` 値 ``` sql -SELECT * FROM dt WHERE timestamp = toDateTime64('2019-01-01 00:00:00', 3, 'Europe/Moscow') +SELECT * FROM dt WHERE timestamp = toDateTime64('2019-01-01 00:00:00', 3, 'Asia/Istanbul') ``` ``` text @@ -69,12 +69,12 @@ SELECT * FROM dt WHERE timestamp = toDateTime64('2019-01-01 00:00:00', 3, 'Europ **3.** Aのタイムゾーンの取得 `DateTime64`-タイプ値: ``` sql -SELECT toDateTime64(now(), 3, 'Europe/Moscow') AS column, toTypeName(column) AS x +SELECT toDateTime64(now(), 3, 'Asia/Istanbul') AS column, toTypeName(column) AS x ``` ``` text ┌──────────────────column─┬─x──────────────────────────────┐ -│ 2019-10-16 04:12:04.000 │ DateTime64(3, 'Europe/Moscow') │ +│ 2019-10-16 04:12:04.000 │ DateTime64(3, 'Asia/Istanbul') │ └─────────────────────────┴────────────────────────────────┘ ``` @@ -83,7 +83,7 @@ SELECT toDateTime64(now(), 3, 'Europe/Moscow') AS column, toTypeName(column) AS ``` sql SELECT toDateTime64(timestamp, 3, 'Europe/London') as lon_time, -toDateTime64(timestamp, 3, 'Europe/Moscow') as mos_time +toDateTime64(timestamp, 3, 'Asia/Istanbul') as mos_time FROM dt ``` diff --git a/docs/ja/sql-reference/functions/type-conversion-functions.md b/docs/ja/sql-reference/functions/type-conversion-functions.md index a16bca0c1f9..4462f0fa25d 100644 --- a/docs/ja/sql-reference/functions/type-conversion-functions.md +++ b/docs/ja/sql-reference/functions/type-conversion-functions.md @@ -460,7 +460,7 @@ AS parseDateTimeBestEffort; クエリ: ``` sql -SELECT parseDateTimeBestEffort('Sat, 18 Aug 2018 07:22:16 GMT', 'Europe/Moscow') +SELECT parseDateTimeBestEffort('Sat, 18 Aug 2018 07:22:16 GMT', 'Asia/Istanbul') AS parseDateTimeBestEffort ``` diff --git a/docs/ko/images/column-oriented.gif b/docs/ko/images/column-oriented.gif deleted file mode 100644 index d5ac7c82848..00000000000 Binary files a/docs/ko/images/column-oriented.gif and /dev/null differ diff --git a/docs/ko/images/logo.svg b/docs/ko/images/logo.svg deleted file mode 100644 index b5ab923ff65..00000000000 --- a/docs/ko/images/logo.svg +++ /dev/null @@ -1 +0,0 @@ - \ No newline at end of file diff --git a/docs/ko/images/play.png b/docs/ko/images/play.png deleted file mode 100644 index b75aebe4089..00000000000 Binary files a/docs/ko/images/play.png and /dev/null differ diff --git a/docs/ko/images/row-oriented.gif b/docs/ko/images/row-oriented.gif deleted file mode 100644 index 41395b5693e..00000000000 Binary files a/docs/ko/images/row-oriented.gif and /dev/null differ diff --git a/docs/ko/index.md b/docs/ko/index.md deleted file mode 100644 index f2a6396c069..00000000000 --- a/docs/ko/index.md +++ /dev/null @@ -1,94 +0,0 @@ ---- -toc_priority: 0 -toc_title: 목차 ---- - -# ClickHouse란? {#what-is-clickhouse} - -ClickHouse® 는 query의 온라인 분석 처리(OLAP)를 위한 열 지향(column-oriented) 데이터베이스 관리 시스템(DBMS)입니다. - -"보통의" 행 지향(row-oriented) DMBS에서는 데이터가 다음과 같은 순서로 저장됩니다. - -| row | WatchID | JavaEnable | Title | GoodEvent | EventTime | -|-----|-------------|------------|--------------------|-----------|---------------------| -| #0 | 89354350662 | 1 | Investor Relations | 1 | 2016-05-18 05:19:20 | -| #1 | 90329509958 | 0 | Contact us | 1 | 2016-05-18 08:10:20 | -| #2 | 89953706054 | 1 | Mission | 1 | 2016-05-18 07:38:00 | -| #N | … | … | … | … | … | - -즉, 행과 관련된 모든 값들은 물리적으로 나란히 저장됩니다. - -행 지향(row-oriented) DMBS의 예시로는 MySQL, Postgres, 그리고 MS SQL 서버 등이 있습니다. - -열 지향 (column-oriented) DBMS에서는 데이터가 아래와 같은 방식으로 저장됩니다: - -| Row: | #0 | #1 | #2 | #N | -|-------------|---------------------|---------------------|---------------------|-----| -| WatchID: | 89354350662 | 90329509958 | 89953706054 | … | -| JavaEnable: | 1 | 0 | 1 | … | -| Title: | Investor Relations | Contact us | Mission | … | -| GoodEvent: | 1 | 1 | 1 | … | -| EventTime: | 2016-05-18 05:19:20 | 2016-05-18 08:10:20 | 2016-05-18 07:38:00 | … | - -이 예에서는 데이터가 정렬된 순서만을 보여줍니다. 다른 열의 값들은 서로 분리되어 저장되고, 같은 열의 정보들은 함께 저장됩니다. - -열 지향(column-oriented) DBMS 의 종류는 Vertica, Paraccel (Actian Matrix and Amazon Redshift), Sybase IQ, Exasol, Infobright, InfiniDB, MonetDB (VectorWise and Actian Vector), LucidDB, SAP HANA, Google Dremel, Google PowerDrill, Druid, 그리고 kdb+ 등이 있습니다. - -데이터를 저장하기 위한 서로 다른 순서는 다른 시나리오에 더 적합합니다. 데이터 접근 시나리오는 쿼리가 수행되는 빈도, 비율 및 비율을 나타내거나, 각 쿼리 유형(행, 열 및 바이트)에 대해 읽은 데이터의 양 데이터 읽기와 업데이트 사이의 관계, 데이터의 작업 크기 및 로컬에서 사용되는 방법 트랜잭션이 사용되는지 여부, 트랜잭션이 얼마나 격리되어 있는지, 데이터 복제 및 논리적 무결성에 대한 요구 사항, 각 쿼리 유형에 대한 대기 시간 및 처리량 요구 사항 등이 있습니다. - -시스템의 부하가 높을수록 사용 시나리오의 요구 사항에 맞게 시스템 설정을 사용자 지정하는 것이 더 중요하며 이 사용자 지정은 더욱 세분화됩니다. 상당히 다른 시나리오에 똑같이 적합한 시스템은 없습니다. 만약 높은 부하에서 시스템이 넓은 시나리오 집합에 대해 적응한다면 시스템은 모든 시나리오를 모두 제대로 처리하지 못하거나 가능한 시나리오 중 하나 또는 몇 개에 대해서만 잘 작동할 것입니다. - -## OLAP 시나리오의 중요 속성들 {#key-properties-of-olap-scenario} - -- 요청(request)의 대부분은 읽기 접근에 관한 것입니다. -- 데이터는 단일 행이 아니라 상당히 큰 일괄 처리(\> 1000개 행)로 업데이트됩니다. 또는 전혀 업데이트되지 않습니다. -- 데이터는 DB에 추가되지만 수정되지는 않습니다. -- 읽기의 경우 DB에서 상당히 많은 수의 행이 추출되지만 열은 일부만 추출됩니다. -- 테이블은 "넓습니다". 이는 열의 수가 많다는 것을 의미합니다. -- 쿼리는 상대적으로 드뭅니다(일반적으로 서버당 수백 또는 초당 쿼리 미만). -- 간단한 쿼리의 경우 약 50ms의 대기 시간이 허용됩니다. -- 열 값은 숫자와 짧은 문자열(예: URL당 60바이트)과 같이 상당히 작습니다 -- 단일 쿼리를 처리할 때 높은 처리량이 필요합니다(서버당 초당 최대 수십억 행). -- 트랜잭션이 필요하지 않습니다. -- 데이터 일관성에 대한 요구 사항이 낮습니다. -- 쿼리당 하나의 큰 테이블이 존재하고 하나를 제외한 모든 테이블은 작습니다. -- 쿼리 결과가 원본 데이터보다 훨씬 작습니다. 즉, 데이터가 필터링되거나 집계되므로 결과가 단일 서버의 RAM에 꼭 들어맞습니다. - -OLAP 시나리오가 다른 일반적인 시나리오(OLTP 또는 키-값 액세스와 같은)와 매우 다르다는 것을 쉽게 알 수 있습니다. 따라서 적절한 성능을 얻으려면 분석 쿼리를 처리하기 위해 OLTP 또는 키-값 DB를 사용하는 것은 의미가 없습니다. 예를 들어 분석에 MongoDB나 Redis를 사용하려고 하면 OLAP 데이터베이스에 비해 성능이 매우 저하됩니다. - -## 왜 열 지향 데이터베이스가 OLAP 시나리오에 적합한가{#why-column-oriented-databases-work-better-in-the-olap-scenario} - -열 지향(column-oriented) 데이터베이스는 OLAP 시나리오에 더 적합합니다. 대부분의 쿼리를 처리하는 데 있어서 행 지향(row-oriented) 데이터베이스보다 100배 이상 빠릅니다. 그 이유는 아래에 자세히 설명되어 있지만 사실은 시각적으로 더 쉽게 설명할 수 있습니다. - -**행 지향 DBMS** - -![Row-oriented](images/row-oriented.gif#) - -**열 지향 DBMS** - -![Column-oriented](images/column-oriented.gif#) - -차이가 보이시나요? - -### 입출력 {#inputoutput} - -1. 분석 쿼리의 경우 적은 수의 테이블 열만 읽어야 합니다. 열 지향 데이터베이스에서는 필요한 데이터만 읽을 수 있습니다. 예를 들어 100개 중 5개의 열이 필요한 경우 I/O가 20배 감소할 것으로 예상할 수 있습니다. -2. 데이터는 패킷으로 읽히므로 압축하기가 더 쉽습니다. 열의 데이터도 압축하기 쉽습니다. 이것은 I/O의 볼륨을 더욱 감소시킵니다. -3. 감소된 I/O로 인해 시스템 캐시에 더 많은 데이터가 들어갑니다. - -예를 들어, "각 광고 플랫폼에 대한 레코드 수 계산" 쿼리는 압축되지 않은 1바이트를 차지하는 하나의 "광고 플랫폼 ID" 열을 읽어야 합니다. 트래픽의 대부분이 광고 플랫폼에서 발생하지 않은 경우 이 열의 최소 10배 압축을 기대할 수 있습니다. 빠른 압축 알고리즘을 사용하면 초당 최소 몇 기가바이트의 압축되지 않은 데이터의 속도로 데이터 압축 해제가 가능합니다. 즉, 이 쿼리는 단일 서버에서 초당 약 수십억 행의 속도로 처리될 수 있습니다. 이 속도는 정말 실제로 달성됩니다. - -### CPU {#cpu} - -쿼리를 수행하려면 많은 행을 처리해야 하므로 별도의 행이 아닌 전체 벡터에 대한 모든 연산을 디스패치하거나 쿼리 엔진을 구현하여 디스패치 비용이 거의 들지 않습니다. 반쯤 괜찮은 디스크 하위 시스템에서 이렇게 하지 않으면 쿼리 인터프리터가 불가피하게 CPU를 정지시킵니다. 데이터를 열에 저장하고 가능한 경우 열별로 처리하는 것이 좋습니다. - -이를 수행하기위한 두가지 방법이 있습니다. - -1. 벡터 엔진. 모든 연산은 별도의 값 대신 벡터에 대해 작성됩니다. 즉, 작업을 자주 호출할 필요가 없으며 파견 비용도 무시할 수 있습니다. 작업 코드에는 최적화된 내부 주기가 포함되어 있습니다. -2. 코드 생성. 쿼리에 대해 생성된 코드에는 모든 간접 호출이 있습니다. - -이것은 단순한 쿼리를 실행할 때 의미가 없기 때문에 "일반" 데이터베이스에서는 수행되지 않습니다. 그러나 예외가 있습니다. 예를 들어 MemSQL은 코드 생성을 사용하여 SQL 쿼리를 처리할 때 대기 시간을 줄입니다. (비교되게, 분석 DBMS는 대기 시간이 아닌 처리량 최적화가 필요합니다.) - -CPU 효율성을 위해 쿼리 언어는 선언적(SQL 또는 MDX)이거나 최소한 벡터(J, K)여야 합니다. 쿼리는 최적화를 허용하는 암시적 루프만 포함해야 합니다. - -{## [원문](https://clickhouse.com/docs/en/) ##} diff --git a/docs/redirects.txt b/docs/redirects.txt index d0d4d4d6c2c..949b9d48ca8 100644 --- a/docs/redirects.txt +++ b/docs/redirects.txt @@ -6,6 +6,7 @@ changelog/2017.md whats-new/changelog/2017.md changelog/2018.md whats-new/changelog/2018.md changelog/2019.md whats-new/changelog/2019.md changelog/index.md whats-new/changelog/index.md +commercial/cloud.md https://clickhouse.com/cloud/ data_types/array.md sql-reference/data-types/array.md data_types/boolean.md sql-reference/data-types/boolean.md data_types/date.md sql-reference/data-types/date.md diff --git a/docs/ru/development/browse-code.md b/docs/ru/development/browse-code.md index 26b3f491599..730e97aed27 100644 --- a/docs/ru/development/browse-code.md +++ b/docs/ru/development/browse-code.md @@ -6,7 +6,7 @@ toc_title: "Навигация по коду ClickHouse" # Навигация по коду ClickHouse {#navigatsiia-po-kodu-clickhouse} -Для навигации по коду онлайн доступен **Woboq**, он расположен [здесь](https://clickhouse.com/codebrowser/html_report///ClickHouse/src/index.html). В нём реализовано удобное перемещение между исходными файлами, семантическая подсветка, подсказки, индексация и поиск. Слепок кода обновляется ежедневно. +Для навигации по коду онлайн доступен **Woboq**, он расположен [здесь](https://clickhouse.com/codebrowser/ClickHouse/src/index.html). В нём реализовано удобное перемещение между исходными файлами, семантическая подсветка, подсказки, индексация и поиск. Слепок кода обновляется ежедневно. Также вы можете просматривать исходники на [GitHub](https://github.com/ClickHouse/ClickHouse). diff --git a/docs/ru/engines/table-engines/mergetree-family/custom-partitioning-key.md b/docs/ru/engines/table-engines/mergetree-family/custom-partitioning-key.md index 8508ba18d9e..af55b7cf419 100644 --- a/docs/ru/engines/table-engines/mergetree-family/custom-partitioning-key.md +++ b/docs/ru/engines/table-engines/mergetree-family/custom-partitioning-key.md @@ -53,15 +53,15 @@ WHERE table = 'visits' ``` ``` text -┌─partition─┬─name───────────┬─active─┐ -│ 201901 │ 201901_1_3_1 │ 0 │ -│ 201901 │ 201901_1_9_2 │ 1 │ -│ 201901 │ 201901_8_8_0 │ 0 │ -│ 201901 │ 201901_9_9_0 │ 0 │ -│ 201902 │ 201902_4_6_1 │ 1 │ -│ 201902 │ 201902_10_10_0 │ 1 │ -│ 201902 │ 201902_11_11_0 │ 1 │ -└───────────┴────────────────┴────────┘ +┌─partition─┬─name──────────────┬─active─┐ +│ 201901 │ 201901_1_3_1 │ 0 │ +│ 201901 │ 201901_1_9_2_11 │ 1 │ +│ 201901 │ 201901_8_8_0 │ 0 │ +│ 201901 │ 201901_9_9_0 │ 0 │ +│ 201902 │ 201902_4_6_1_11 │ 1 │ +│ 201902 │ 201902_10_10_0_11 │ 1 │ +│ 201902 │ 201902_11_11_0_11 │ 1 │ +└───────────┴───────────────────┴────────┘ ``` Столбец `partition` содержит имена всех партиций таблицы. Таблица `visits` из нашего примера содержит две партиции: `201901` и `201902`. Используйте значения из этого столбца в запросах [ALTER … PARTITION](../../../sql-reference/statements/alter/partition.md). @@ -70,12 +70,13 @@ WHERE table = 'visits' Столбец `active` отображает состояние куска. `1` означает, что кусок активен; `0` – неактивен. К неактивным можно отнести куски, оставшиеся после слияния данных. Поврежденные куски также отображаются как неактивные. Неактивные куски удаляются приблизительно через 10 минут после того, как было выполнено слияние. -Рассмотрим детальнее имя первого куска `201901_1_3_1`: +Рассмотрим детальнее имя куска `201901_1_9_2_11`: - `201901` имя партиции; - `1` – минимальный номер блока данных; -- `3` – максимальный номер блока данных; -- `1` – уровень куска (глубина дерева слияний, которыми этот кусок образован). +- `9` – максимальный номер блока данных; +- `2` – уровень куска (глубина дерева слияний, которыми этот кусок образован). +- `11` - версия мутации (если парт мутировал) !!! info "Info" Названия кусков для таблиц старого типа образуются следующим образом: `20190117_20190123_2_2_0` (минимальная дата _ максимальная дата _ номер минимального блока _ номер максимального блока _ уровень). @@ -89,16 +90,16 @@ OPTIMIZE TABLE visits PARTITION 201902; ``` ``` text -┌─partition─┬─name───────────┬─active─┐ -│ 201901 │ 201901_1_3_1 │ 0 │ -│ 201901 │ 201901_1_9_2 │ 1 │ -│ 201901 │ 201901_8_8_0 │ 0 │ -│ 201901 │ 201901_9_9_0 │ 0 │ -│ 201902 │ 201902_4_6_1 │ 0 │ -│ 201902 │ 201902_4_11_2 │ 1 │ -│ 201902 │ 201902_10_10_0 │ 0 │ -│ 201902 │ 201902_11_11_0 │ 0 │ -└───────────┴────────────────┴────────┘ +┌─partition─┬─name─────────────┬─active─┐ +│ 201901 │ 201901_1_3_1 │ 0 │ +│ 201901 │ 201901_1_9_2_11 │ 1 │ +│ 201901 │ 201901_8_8_0 │ 0 │ +│ 201901 │ 201901_9_9_0 │ 0 │ +│ 201902 │ 201902_4_6_1 │ 0 │ +│ 201902 │ 201902_4_11_2_11 │ 1 │ +│ 201902 │ 201902_10_10_0 │ 0 │ +│ 201902 │ 201902_11_11_0 │ 0 │ +└───────────┴──────────────────┴────────┘ ``` Неактивные куски будут удалены примерно через 10 минут после слияния. @@ -109,12 +110,12 @@ OPTIMIZE TABLE visits PARTITION 201902; /var/lib/clickhouse/data/default/visits$ ls -l total 40 drwxr-xr-x 2 clickhouse clickhouse 4096 Feb 1 16:48 201901_1_3_1 -drwxr-xr-x 2 clickhouse clickhouse 4096 Feb 5 16:17 201901_1_9_2 +drwxr-xr-x 2 clickhouse clickhouse 4096 Feb 5 16:17 201901_1_9_2_11 drwxr-xr-x 2 clickhouse clickhouse 4096 Feb 5 15:52 201901_8_8_0 drwxr-xr-x 2 clickhouse clickhouse 4096 Feb 5 15:52 201901_9_9_0 drwxr-xr-x 2 clickhouse clickhouse 4096 Feb 5 16:17 201902_10_10_0 drwxr-xr-x 2 clickhouse clickhouse 4096 Feb 5 16:17 201902_11_11_0 -drwxr-xr-x 2 clickhouse clickhouse 4096 Feb 5 16:19 201902_4_11_2 +drwxr-xr-x 2 clickhouse clickhouse 4096 Feb 5 16:19 201902_4_11_2_11 drwxr-xr-x 2 clickhouse clickhouse 4096 Feb 5 12:09 201902_4_6_1 drwxr-xr-x 2 clickhouse clickhouse 4096 Feb 1 16:48 detached ``` diff --git a/docs/ru/engines/table-engines/special/distributed.md b/docs/ru/engines/table-engines/special/distributed.md index 3d7b8cf32d3..b82703e6991 100644 --- a/docs/ru/engines/table-engines/special/distributed.md +++ b/docs/ru/engines/table-engines/special/distributed.md @@ -128,7 +128,7 @@ logs - имя кластера в конфигурационном файле с Беспокоиться о схеме шардирования имеет смысл в следующих случаях: - используются запросы, требующие соединение данных (IN, JOIN) по определённому ключу - тогда если данные шардированы по этому ключу, то можно использовать локальные IN, JOIN вместо GLOBAL IN, GLOBAL JOIN, что кардинально более эффективно. -- используется большое количество серверов (сотни и больше) и большое количество маленьких запросов (запросы отдельных клиентов - сайтов, рекламодателей, партнёров) - тогда, для того, чтобы маленькие запросы не затрагивали весь кластер, имеет смысл располагать данные одного клиента на одном шарде, или (вариант, который используется в Яндекс.Метрике) сделать двухуровневое шардирование: разбить весь кластер на «слои», где слой может состоять из нескольких шардов; данные для одного клиента располагаются на одном слое, но в один слой можно по мере необходимости добавлять шарды, в рамках которых данные распределены произвольным образом; создаются распределённые таблицы на каждый слой и одна общая распределённая таблица для глобальных запросов. +- используется большое количество серверов (сотни и больше) и большое количество маленьких запросов (запросы отдельных клиентов - сайтов, рекламодателей, партнёров) - тогда, для того, чтобы маленькие запросы не затрагивали весь кластер, имеет смысл располагать данные одного клиента на одном шарде, или сделать двухуровневое шардирование: разбить весь кластер на «слои», где слой может состоять из нескольких шардов; данные для одного клиента располагаются на одном слое, но в один слой можно по мере необходимости добавлять шарды, в рамках которых данные распределены произвольным образом; создаются распределённые таблицы на каждый слой и одна общая распределённая таблица для глобальных запросов. Запись данных осуществляется полностью асинхронно. При вставке в таблицу, блок данных сначала записывается в файловую систему. Затем, в фоновом режиме отправляются на удалённые серверы при первой возможности. Период отправки регулируется настройками [distributed_directory_monitor_sleep_time_ms](../../../operations/settings/settings.md#distributed_directory_monitor_sleep_time_ms) и [distributed_directory_monitor_max_sleep_time_ms](../../../operations/settings/settings.md#distributed_directory_monitor_max_sleep_time_ms). Движок таблиц `Distributed` отправляет каждый файл со вставленными данными отдельно, но можно включить пакетную отправку данных настройкой [distributed_directory_monitor_batch_inserts](../../../operations/settings/settings.md#distributed_directory_monitor_batch_inserts). Эта настройка улучшает производительность кластера за счет более оптимального использования ресурсов сервера-отправителя и сети. Необходимо проверять, что данные отправлены успешно, для этого проверьте список файлов (данных, ожидающих отправки) в каталоге таблицы `/var/lib/clickhouse/data/database/table/`. Количество потоков для выполнения фоновых задач можно задать с помощью настройки [background_distributed_schedule_pool_size](../../../operations/settings/settings.md#background_distributed_schedule_pool_size). diff --git a/docs/ru/faq/general/mapreduce.md b/docs/ru/faq/general/mapreduce.md index 2e7520aef1a..764ad9045f0 100644 --- a/docs/ru/faq/general/mapreduce.md +++ b/docs/ru/faq/general/mapreduce.md @@ -6,7 +6,7 @@ toc_priority: 110 # Почему бы не использовать системы типа MapReduce? {#why-not-use-something-like-mapreduce} -Системами типа MapReduce будем называть системы распределённых вычислений, в которых операция свёртки реализована на основе распределённой сортировки. Наиболее распространённое решение с открытым кодом в данном классе — [Apache Hadoop](http://hadoop.apache.org). Яндекс пользуется собственным решением — YT. +Системами типа MapReduce будем называть системы распределённых вычислений, в которых операция свёртки реализована на основе распределённой сортировки. Наиболее распространённое решение с открытым кодом в данном классе — [Apache Hadoop](http://hadoop.apache.org). В крупных IT компаниях вроде Google или Яндекс часто используются собственные закрытые решения. Такие системы не подходят для онлайн запросов в силу слишком большой задержки. То есть не могут быть использованы в качестве бэкенда для веб-интерфейса. Также эти системы не подходят для обновления данных в реальном времени. Распределённая сортировка является не оптимальным способом для выполнения операции свёртки в случае запросов, выполняющихся в режиме онлайн, потому что результат выполнения операции и все промежуточные результаты (если такие есть) помещаются в оперативную память на одном сервере. В таком случае оптимальным способом выполнения операции свёртки является хеш-таблица. Частым способом оптимизации "map-reduce" задач является предагрегация (частичная свёртка) с использованием хеш-таблицы в оперативной памяти. Пользователь делает эту оптимизацию в ручном режиме. Распределённая сортировка — основная причина тормозов при выполнении несложных задач типа "map-reduce". diff --git a/docs/ru/getting-started/example-datasets/nyc-taxi.md b/docs/ru/getting-started/example-datasets/nyc-taxi.md index 9c3ef6b0652..8ff91d0e6a8 100644 --- a/docs/ru/getting-started/example-datasets/nyc-taxi.md +++ b/docs/ru/getting-started/example-datasets/nyc-taxi.md @@ -380,7 +380,7 @@ Q3: 0.051 sec. Q4: 0.072 sec. В этом случае, время выполнения запросов определяется в первую очередь сетевыми задержками. -Мы выполняли запросы с помощью клиента, расположенного в дата-центре Яндекса в Мянтсяля (Финляндия), на кластер в России, что добавляет порядка 20 мс задержки. +Мы выполняли запросы с помощью клиента, расположенного в другом дата-центре, не там где кластер, что добавляет порядка 20 мс задержки. ## Резюме {#reziume} diff --git a/docs/ru/getting-started/install.md b/docs/ru/getting-started/install.md index a12773a75b0..8b35b8a836d 100644 --- a/docs/ru/getting-started/install.md +++ b/docs/ru/getting-started/install.md @@ -27,11 +27,17 @@ grep -q sse4_2 /proc/cpuinfo && echo "SSE 4.2 supported" || echo "SSE 4.2 not su {% include 'install/deb.sh' %} ``` -Также эти пакеты можно скачать и установить вручную отсюда: https://repo.clickhouse.com/deb/stable/main/. +
+ +Устаревший способ установки deb-пакетов +``` bash +{% include 'install/deb_repo.sh' %} +``` +
Чтобы использовать различные [версии ClickHouse](../faq/operations/production.md) в зависимости от ваших потребностей, вы можете заменить `stable` на `lts` или `testing`. -Также вы можете вручную скачать и установить пакеты из [репозитория](https://repo.clickhouse.com/deb/stable/main/). +Также вы можете вручную скачать и установить пакеты из [репозитория](https://packages.clickhouse.com/deb/pool/stable). #### Пакеты {#packages} @@ -51,11 +57,17 @@ grep -q sse4_2 /proc/cpuinfo && echo "SSE 4.2 supported" || echo "SSE 4.2 not su Сначала нужно подключить официальный репозиторий: ``` bash -sudo yum install yum-utils -sudo rpm --import https://repo.clickhouse.com/CLICKHOUSE-KEY.GPG -sudo yum-config-manager --add-repo https://repo.clickhouse.com/rpm/stable/x86_64 +{% include 'install/rpm.sh' %} ``` +
+ +Устаревший способ установки rpm-пакетов +``` bash +{% include 'install/rpm_repo.sh' %} +``` +
+ Для использования наиболее свежих версий нужно заменить `stable` на `testing` (рекомендуется для тестовых окружений). Также иногда доступен `prestable`. Для, собственно, установки пакетов необходимо выполнить следующие команды: @@ -64,36 +76,27 @@ sudo yum-config-manager --add-repo https://repo.clickhouse.com/rpm/stable/x86_64 sudo yum install clickhouse-server clickhouse-client ``` -Также есть возможность установить пакеты вручную, скачав отсюда: https://repo.clickhouse.com/rpm/stable/x86_64. +Также есть возможность установить пакеты вручную, скачав отсюда: https://packages.clickhouse.com/rpm/stable. ### Из Tgz архивов {#from-tgz-archives} Команда ClickHouse в Яндексе рекомендует использовать предкомпилированные бинарники из `tgz` архивов для всех дистрибутивов, где невозможна установка `deb` и `rpm` пакетов. -Интересующую версию архивов можно скачать вручную с помощью `curl` или `wget` из репозитория https://repo.clickhouse.com/tgz/. +Интересующую версию архивов можно скачать вручную с помощью `curl` или `wget` из репозитория https://packages.clickhouse.com/tgz/. После этого архивы нужно распаковать и воспользоваться скриптами установки. Пример установки самой свежей версии: ``` bash -export LATEST_VERSION=`curl https://api.github.com/repos/ClickHouse/ClickHouse/tags 2>/dev/null | grep -Eo '[0-9]+\.[0-9]+\.[0-9]+\.[0-9]+' | head -n 1` -curl -O https://repo.clickhouse.com/tgz/clickhouse-common-static-$LATEST_VERSION.tgz -curl -O https://repo.clickhouse.com/tgz/clickhouse-common-static-dbg-$LATEST_VERSION.tgz -curl -O https://repo.clickhouse.com/tgz/clickhouse-server-$LATEST_VERSION.tgz -curl -O https://repo.clickhouse.com/tgz/clickhouse-client-$LATEST_VERSION.tgz - -tar -xzvf clickhouse-common-static-$LATEST_VERSION.tgz -sudo clickhouse-common-static-$LATEST_VERSION/install/doinst.sh - -tar -xzvf clickhouse-common-static-dbg-$LATEST_VERSION.tgz -sudo clickhouse-common-static-dbg-$LATEST_VERSION/install/doinst.sh - -tar -xzvf clickhouse-server-$LATEST_VERSION.tgz -sudo clickhouse-server-$LATEST_VERSION/install/doinst.sh -sudo /etc/init.d/clickhouse-server start - -tar -xzvf clickhouse-client-$LATEST_VERSION.tgz -sudo clickhouse-client-$LATEST_VERSION/install/doinst.sh +{% include 'install/tgz.sh' %} ``` +
+ +Устаревший способ установки из архивов tgz +``` bash +{% include 'install/tgz_repo.sh' %} +``` +
+ Для production окружений рекомендуется использовать последнюю `stable`-версию. Её номер также можно найти на github с на вкладке https://github.com/ClickHouse/ClickHouse/tags c постфиксом `-stable`. ### Из Docker образа {#from-docker-image} @@ -195,4 +198,4 @@ SELECT 1 **Поздравляем, система работает!** -Для дальнейших экспериментов можно попробовать загрузить один из тестовых наборов данных или пройти [пошаговое руководство для начинающих](https://clickhouse.com/tutorial.html). +Для дальнейших экспериментов можно попробовать загрузить один из тестовых наборов данных или пройти [пошаговое руководство для начинающих](./tutorial.md). diff --git a/docs/ru/getting-started/playground.md b/docs/ru/getting-started/playground.md index 029fa706576..d9f65e192b5 100644 --- a/docs/ru/getting-started/playground.md +++ b/docs/ru/getting-started/playground.md @@ -5,11 +5,12 @@ toc_title: Playground # ClickHouse Playground {#clickhouse-playground} +!!! warning "Warning" + This service is deprecated and will be replaced in foreseeable future. + [ClickHouse Playground](https://play.clickhouse.com) позволяет пользователям экспериментировать с ClickHouse, мгновенно выполняя запросы без настройки своего сервера или кластера. В Playground доступны несколько тестовых массивов данных, а также примеры запросов, которые показывают возможности ClickHouse. Кроме того, вы можете выбрать LTS релиз ClickHouse, который хотите протестировать. -ClickHouse Playground дает возможность поработать с [Managed Service for ClickHouse](https://cloud.yandex.com/services/managed-clickhouse) в конфигурации m2.small (4 vCPU, 32 ГБ ОЗУ), которую предосталяет [Яндекс.Облако](https://cloud.yandex.com/). Дополнительную информацию об облачных провайдерах читайте в разделе [Поставщики облачных услуг ClickHouse](../commercial/cloud.md). - Вы можете отправлять запросы к Playground с помощью любого HTTP-клиента, например [curl](https://curl.haxx.se) или [wget](https://www.gnu.org/software/wget/), также можно установить соединение с помощью драйверов [JDBC](../interfaces/jdbc.md) или [ODBC](../interfaces/odbc.md). Более подробная информация о программных продуктах, поддерживающих ClickHouse, доступна [здесь](../interfaces/index.md). ## Параметры доступа {#credentials} @@ -54,11 +55,3 @@ curl "https://play-api.clickhouse.com:8443/?query=SELECT+'Play+ClickHouse\!';&us ``` bash clickhouse client --secure -h play-api.clickhouse.com --port 9440 -u playground --password clickhouse -q "SELECT 'Play ClickHouse\!'" ``` - -## Детали реализации {#implementation-details} - -Веб-интерфейс ClickHouse Playground выполняет запросы через ClickHouse [HTTP API](../interfaces/http.md). -Бэкэнд Playground - это кластер ClickHouse без дополнительных серверных приложений. Как упоминалось выше, способы подключения по HTTPS и TCP/TLS общедоступны как часть Playground. Они проксируются через [Cloudflare Spectrum](https://www.cloudflare.com/products/cloudflare-spectrum/) для добавления дополнительного уровня защиты и улучшенного глобального подключения. - -!!! warning "Предупреждение" - Открывать сервер ClickHouse для публичного доступа в любой другой ситуации **настоятельно не рекомендуется**. Убедитесь, что он настроен только на частную сеть и защищен брандмауэром. diff --git a/docs/ru/interfaces/third-party/integrations.md b/docs/ru/interfaces/third-party/integrations.md index 62557edff53..e948e77cb24 100644 --- a/docs/ru/interfaces/third-party/integrations.md +++ b/docs/ru/interfaces/third-party/integrations.md @@ -6,7 +6,7 @@ toc_title: "Библиотеки для интеграции от сторонн # Библиотеки для интеграции от сторонних разработчиков {#biblioteki-dlia-integratsii-ot-storonnikh-razrabotchikov} !!! warning "Disclaimer" - Яндекс не занимается поддержкой перечисленных ниже инструментов и библиотек и не проводит тщательного тестирования для проверки их качества. + ClickHouse, Inc. не занимается поддержкой перечисленных ниже инструментов и библиотек и не проводит тщательного тестирования для проверки их качества. ## Инфраструктурные продукты {#infrastrukturnye-produkty} diff --git a/docs/ru/operations/named-collections.md b/docs/ru/operations/named-collections.md new file mode 100644 index 00000000000..d39177daa12 --- /dev/null +++ b/docs/ru/operations/named-collections.md @@ -0,0 +1,228 @@ +--- +toc_priority: 69 +toc_title: "Именованные соединения" +--- + +# Хранение реквизитов для подключения к внешним источникам в конфигурационных файлах {#named-collections} + +Реквизиты для подключения к внешним источникам (словарям, таблицам, табличным функциям) можно сохранить +в конфигурационных файлах и таким образом упростить создание объектов и скрыть реквизиты (пароли) +от пользователей, имеющих только SQL доступ. + +Параметры можно задать в XML `CSV` и переопределить в SQL `, format = 'TSV'`. +При использовании именованных соединений, параметры в SQL задаются в формате `ключ` = `значение`: `compression_method = 'gzip'`. + +Именованные соединения хранятся в файле `config.xml` сервера ClickHouse в секции `` и применяются при старте ClickHouse. + +Пример конфигурации: +```xml +$ cat /etc/clickhouse-server/config.d/named_collections.xml + + + ... + + +``` + +## Именованные соединения для доступа к S3 + +Описание параметров смотри [Табличная Функция S3](../sql-reference/table-functions/s3.md). + +Пример конфигурации: +```xml + + + + AKIAIOSFODNN7EXAMPLE + wJalrXUtnFEMI/K7MDENG/bPxRfiCYEXAMPLEKEY + CSV + + + +``` + +### Пример использования именованных соединений с функцией s3 + +```sql +INSERT INTO FUNCTION s3(s3_mydata, url = 'https://s3.us-east-1.amazonaws.com/yourbucket/mydata/test_file.tsv.gz', + format = 'TSV', structure = 'number UInt64', compression_method = 'gzip') +SELECT * FROM numbers(10000); + +SELECT count() +FROM s3(s3_mydata, url = 'https://s3.us-east-1.amazonaws.com/yourbucket/mydata/test_file.tsv.gz') + +┌─count()─┐ +│ 10000 │ +└─────────┘ +1 rows in set. Elapsed: 0.279 sec. Processed 10.00 thousand rows, 90.00 KB (35.78 thousand rows/s., 322.02 KB/s.) +``` + +### Пример использования именованных соединений с таблицей S3 + +```sql +CREATE TABLE s3_engine_table (number Int64) +ENGINE=S3(s3_mydata, url='https://s3.us-east-1.amazonaws.com/yourbucket/mydata/test_file.tsv.gz', format = 'TSV') +SETTINGS input_format_with_names_use_header = 0; + +SELECT * FROM s3_engine_table LIMIT 3; +┌─number─┐ +│ 0 │ +│ 1 │ +│ 2 │ +└────────┘ +``` + +## Пример использования именованных соединений с базой данных MySQL + +Описание параметров смотри [mysql](../sql-reference/table-functions/mysql.md). + +Пример конфигурации: +```xml + + + + myuser + mypass + 127.0.0.1 + 3306 + test + 8 + 1 + 1 + + + +``` + +### Пример использования именованных соединений с табличной функцией mysql + +```sql +SELECT count() FROM mysql(mymysql, table = 'test'); + +┌─count()─┐ +│ 3 │ +└─────────┘ +``` + +### Пример использования именованных соединений таблицей с движком mysql + +```sql +CREATE TABLE mytable(A Int64) ENGINE = MySQL(mymysql, table = 'test', connection_pool_size=3, replace_query=0); +SELECT count() FROM mytable; + +┌─count()─┐ +│ 3 │ +└─────────┘ +``` + +### Пример использования именованных соединений базой данных с движком MySQL + +```sql +CREATE DATABASE mydatabase ENGINE = MySQL(mymysql); + +SHOW TABLES FROM mydatabase; + +┌─name───┐ +│ source │ +│ test │ +└────────┘ +``` + +### Пример использования именованных соединений с внешним словарем с источником mysql + +```sql +CREATE DICTIONARY dict (A Int64, B String) +PRIMARY KEY A +SOURCE(MYSQL(NAME mymysql TABLE 'source')) +LIFETIME(MIN 1 MAX 2) +LAYOUT(HASHED()); + +SELECT dictGet('dict', 'B', 2); + +┌─dictGet('dict', 'B', 2)─┐ +│ two │ +└─────────────────────────┘ +``` + +## Пример использования именованных соединений с базой данных PostgreSQL + +Описание параметров смотри [postgresql](../sql-reference/table-functions/postgresql.md). + +Пример конфигурации: +```xml + + + + pguser + jw8s0F4 + 127.0.0.1 + 5432 + test + test_schema + 8 + + + +``` + +### Пример использования именованных соединений с табличной функцией postgresql + +```sql +SELECT * FROM postgresql(mypg, table = 'test'); + +┌─a─┬─b───┐ +│ 2 │ two │ +│ 1 │ one │ +└───┴─────┘ + + +SELECT * FROM postgresql(mypg, table = 'test', schema = 'public'); + +┌─a─┐ +│ 1 │ +│ 2 │ +│ 3 │ +└───┘ +``` + +### Пример использования именованных соединений таблицей с движком PostgreSQL + +```sql +CREATE TABLE mypgtable (a Int64) ENGINE = PostgreSQL(mypg, table = 'test', schema = 'public'); + +SELECT * FROM mypgtable; + +┌─a─┐ +│ 1 │ +│ 2 │ +│ 3 │ +└───┘ +``` + +### Пример использования именованных соединений базой данных с движком PostgreSQL + +```sql +CREATE DATABASE mydatabase ENGINE = PostgreSQL(mypg); + +SHOW TABLES FROM mydatabase + +┌─name─┐ +│ test │ +└──────┘ +``` + +### Пример использования именованных соединений с внешним словарем с источником POSTGRESQL + +```sql +CREATE DICTIONARY dict (a Int64, b String) +PRIMARY KEY a +SOURCE(POSTGRESQL(NAME mypg TABLE test)) +LIFETIME(MIN 1 MAX 2) +LAYOUT(HASHED()); + +SELECT dictGet('dict', 'b', 2); + +┌─dictGet('dict', 'b', 2)─┐ +│ two │ +└─────────────────────────┘ +``` diff --git a/docs/ru/sql-reference/statements/select/limit-by.md b/docs/ru/sql-reference/statements/select/limit-by.md index 861d88dcafb..5da001addf4 100644 --- a/docs/ru/sql-reference/statements/select/limit-by.md +++ b/docs/ru/sql-reference/statements/select/limit-by.md @@ -11,7 +11,7 @@ ClickHouse поддерживает следующий синтаксис: - `LIMIT [offset_value, ]n BY expressions` - `LIMIT n OFFSET offset_value BY expressions` -Во время обработки запроса, ClickHouse выбирает данные, упорядоченные по ключу сортировки. Ключ сортировки задаётся явно в секции [ORDER BY](order-by.md#select-order-by) или неявно в свойствах движка таблицы. Затем ClickHouse применяет `LIMIT n BY expressions` и возвращает первые `n` для каждой отличной комбинации `expressions`. Если указан `OFFSET`, то для каждого блока данных, который принадлежит отдельной комбинации `expressions`, ClickHouse отступает `offset_value` строк от начала блока и возвращает не более `n`. Если `offset_value` больше, чем количество строк в блоке данных, ClickHouse не возвращает ни одной строки. +Во время обработки запроса, ClickHouse выбирает данные, упорядоченные по ключу сортировки. Ключ сортировки задаётся явно в секции [ORDER BY](order-by.md#select-order-by) или неявно в свойствах движка таблицы (порядок строк гарантирован только при использовании [ORDER BY](order-by.md#select-order-by), в ином случае блоки строк не будут упорядочены из-за многопоточной обработки). Затем ClickHouse применяет `LIMIT n BY expressions` и возвращает первые `n` для каждой отличной комбинации `expressions`. Если указан `OFFSET`, то для каждого блока данных, который принадлежит отдельной комбинации `expressions`, ClickHouse отступает `offset_value` строк от начала блока и возвращает не более `n`. Если `offset_value` больше, чем количество строк в блоке данных, ClickHouse не возвращает ни одной строки. `LIMIT BY` не связана с секцией `LIMIT`. Их можно использовать в одном запросе. diff --git a/docs/tools/blog.py b/docs/tools/blog.py index e4fb6f77865..b58523504a3 100644 --- a/docs/tools/blog.py +++ b/docs/tools/blog.py @@ -34,13 +34,11 @@ def build_for_lang(lang, args): # the following list of languages is sorted according to # https://en.wikipedia.org/wiki/List_of_languages_by_total_number_of_speakers languages = { - 'en': 'English', - 'ru': 'Русский' + 'en': 'English' } site_names = { - 'en': 'ClickHouse Blog', - 'ru': 'Блог ClickHouse' + 'en': 'ClickHouse Blog' } assert len(site_names) == len(languages) diff --git a/docs/tools/build.py b/docs/tools/build.py index 75278075996..e4f6718699a 100755 --- a/docs/tools/build.py +++ b/docs/tools/build.py @@ -181,7 +181,7 @@ if __name__ == '__main__': arg_parser = argparse.ArgumentParser() arg_parser.add_argument('--lang', default='en,ru,zh,ja') - arg_parser.add_argument('--blog-lang', default='en,ru') + arg_parser.add_argument('--blog-lang', default='en') arg_parser.add_argument('--docs-dir', default='.') arg_parser.add_argument('--theme-dir', default=website_dir) arg_parser.add_argument('--website-dir', default=website_dir) diff --git a/docs/tools/redirects.py b/docs/tools/redirects.py index 20e3ec7aa6f..1f0a3bb4b74 100644 --- a/docs/tools/redirects.py +++ b/docs/tools/redirects.py @@ -31,7 +31,12 @@ def build_redirect_html(args, base_prefix, lang, output_dir, from_path, to_path) from_path.replace('/index.md', '/index.html').replace('.md', '/index.html') ) target_path = to_path.replace('/index.md', '/').replace('.md', '/') - to_url = f'/{base_prefix}/{lang}/{target_path}' + + if target_path[0:7] != 'http://' and target_path[0:8] != 'https://': + to_url = f'/{base_prefix}/{lang}/{target_path}' + else: + to_url = target_path + to_url = to_url.strip() write_redirect_html(out_path, to_url) diff --git a/docs/tools/test.py b/docs/tools/test.py index 53ed9505acd..1ea07c45192 100755 --- a/docs/tools/test.py +++ b/docs/tools/test.py @@ -8,7 +8,7 @@ import subprocess def test_single_page(input_path, lang): - if not (lang == 'en' or lang == 'ru'): + if not (lang == 'en'): return with open(input_path) as f: diff --git a/docs/tools/webpack.config.js b/docs/tools/webpack.config.js index fcb3e7bf32d..e0dea964101 100644 --- a/docs/tools/webpack.config.js +++ b/docs/tools/webpack.config.js @@ -14,7 +14,6 @@ module.exports = { entry: [ path.resolve(scssPath, 'bootstrap.scss'), - path.resolve(scssPath, 'greenhouse.scss'), path.resolve(scssPath, 'main.scss'), path.resolve(jsPath, 'main.js'), ], diff --git a/docs/tools/website.py b/docs/tools/website.py index 11772fe7a73..de4cc14670c 100644 --- a/docs/tools/website.py +++ b/docs/tools/website.py @@ -151,6 +151,11 @@ def build_website(args): ) ) + shutil.copytree( + os.path.join(args.website_dir, 'images'), + os.path.join(args.output_dir, 'docs', 'images') + ) + # This file can be requested to check for available ClickHouse releases. shutil.copy2( os.path.join(args.src_dir, 'utils', 'list-versions', 'version_date.tsv'), @@ -231,28 +236,31 @@ def minify_file(path, css_digest, js_digest): def minify_website(args): - # Output greenhouse css separately from main bundle to be included via the greenhouse iframe - command = f"cat '{args.website_dir}/css/greenhouse.css' > '{args.output_dir}/css/greenhouse.css'" - logging.info(command) - output = subprocess.check_output(command, shell=True) - logging.debug(output) - css_in = ' '.join(get_css_in(args)) - css_out = f'{args.output_dir}/css/base.css' - if args.minify: + css_out = f'{args.output_dir}/docs/css/base.css' + os.makedirs(f'{args.output_dir}/docs/css') + + if args.minify and False: # TODO: return closure command = f"purifycss -w '*algolia*' --min {css_in} '{args.output_dir}/*.html' " \ f"'{args.output_dir}/docs/en/**/*.html' '{args.website_dir}/js/**/*.js' > {css_out}" - else: - command = f'cat {css_in} > {css_out}' + logging.info(css_in) + logging.info(command) + output = subprocess.check_output(command, shell=True) + logging.debug(output) + + else: + command = f"cat {css_in}" + output = subprocess.check_output(command, shell=True) + with open(css_out, 'wb+') as f: + f.write(output) - logging.info(command) - output = subprocess.check_output(command, shell=True) - logging.debug(output) with open(css_out, 'rb') as f: css_digest = hashlib.sha3_224(f.read()).hexdigest()[0:8] - js_in = get_js_in(args) - js_out = f'{args.output_dir}/js/base.js' + js_in = ' '.join(get_js_in(args)) + js_out = f'{args.output_dir}/docs/js/base.js' + os.makedirs(f'{args.output_dir}/docs/js') + if args.minify and False: # TODO: return closure js_in = [js[1:-1] for js in js_in] closure_args = [ @@ -271,11 +279,11 @@ def minify_website(args): f.write(js_content) else: - js_in = ' '.join(js_in) - command = f'cat {js_in} > {js_out}' - logging.info(command) + command = f"cat {js_in}" output = subprocess.check_output(command, shell=True) - logging.debug(output) + with open(js_out, 'wb+') as f: + f.write(output) + with open(js_out, 'rb') as f: js_digest = hashlib.sha3_224(f.read()).hexdigest()[0:8] logging.info(js_digest) diff --git a/docs/zh/changelog/index.md b/docs/zh/changelog/index.md index d36a676134e..306c72103fb 100644 --- a/docs/zh/changelog/index.md +++ b/docs/zh/changelog/index.md @@ -247,7 +247,7 @@ toc_title: "\u53D8\u66F4\u65E5\u5FD7" - 更新了clickhouse-test脚本中挂起查询的检查 [#8858](https://github.com/ClickHouse/ClickHouse/pull/8858) ([亚历山大\*卡扎科夫](https://github.com/Akazz)) - 从存储库中删除了一些无用的文件。 [#8843](https://github.com/ClickHouse/ClickHouse/pull/8843) ([阿列克谢-米洛维多夫](https://github.com/alexey-milovidov)) - 更改类型的数学perftests从 `once` 到 `loop`. [#8783](https://github.com/ClickHouse/ClickHouse/pull/8783) ([尼古拉\*科切托夫](https://github.com/KochetovNicolai)) -- 添加码头镜像,它允许为我们的代码库构建交互式代码浏览器HTML报告。 [#8781](https://github.com/ClickHouse/ClickHouse/pull/8781) ([阿利沙平](https://github.com/alesapin))见 [Woboq代码浏览器](https://clickhouse.com/codebrowser/html_report///ClickHouse/dbms/index.html) +- 添加码头镜像,它允许为我们的代码库构建交互式代码浏览器HTML报告。 [#8781](https://github.com/ClickHouse/ClickHouse/pull/8781) ([阿利沙平](https://github.com/alesapin))见 [Woboq代码浏览器](https://clickhouse.com/codebrowser/ClickHouse/dbms/index.html) - 抑制MSan下的一些测试失败。 [#8780](https://github.com/ClickHouse/ClickHouse/pull/8780) ([Alexander Kuzmenkov](https://github.com/akuzm)) - 加速 “exception while insert” 测试 此测试通常在具有复盖率的调试版本中超时。 [#8711](https://github.com/ClickHouse/ClickHouse/pull/8711) ([阿列克谢-米洛维多夫](https://github.com/alexey-milovidov)) - 更新 `libcxx` 和 `libcxxabi` 为了主人 在准备 [#9304](https://github.com/ClickHouse/ClickHouse/issues/9304) [#9308](https://github.com/ClickHouse/ClickHouse/pull/9308) ([阿列克谢-米洛维多夫](https://github.com/alexey-milovidov)) diff --git a/docs/zh/development/browse-code.md b/docs/zh/development/browse-code.md index 9cee0a37444..f0ad6fd0984 100644 --- a/docs/zh/development/browse-code.md +++ b/docs/zh/development/browse-code.md @@ -5,7 +5,7 @@ toc_title: "\u6D4F\u89C8\u6E90\u4EE3\u7801" # 浏览ClickHouse源代码 {#browse-clickhouse-source-code} -您可以使用 **Woboq** 在线代码浏览器 [点击这里](https://clickhouse.com/codebrowser/html_report/ClickHouse/src/index.html). 它提供了代码导航和语义突出显示、搜索和索引。 代码快照每天更新。 +您可以使用 **Woboq** 在线代码浏览器 [点击这里](https://clickhouse.com/codebrowser/ClickHouse/src/index.html). 它提供了代码导航和语义突出显示、搜索和索引。 代码快照每天更新。 此外,您还可以像往常一样浏览源代码 [GitHub](https://github.com/ClickHouse/ClickHouse) diff --git a/docs/zh/development/continuous-integration.md b/docs/zh/development/continuous-integration.md index ae0367f276e..4f37b6f88c7 100644 --- a/docs/zh/development/continuous-integration.md +++ b/docs/zh/development/continuous-integration.md @@ -136,12 +136,3 @@ git push ## 性能测试 {#performance-tests} 测量查询性能的变化. 这是最长的检查, 只需不到 6 小时即可运行.性能测试报告在[此处](https://github.com/ClickHouse/ClickHouse/tree/master/docker/test/performance-comparison#how-to-read-the-report)有详细描述. - -## 质量保证 {#qa} -什么是状态页面上的任务(专用网络)项目? - -它是 Yandex 内部工作系统的链接. Yandex 员工可以看到检查的开始时间及其更详细的状态. - -运行测试的地方 - -Yandex 内部基础设施的某个地方. diff --git a/docs/zh/engines/table-engines/integrations/embedded-rocksdb.md b/docs/zh/engines/table-engines/integrations/embedded-rocksdb.md index 79ca8f0cd10..abb2af6332d 100644 --- a/docs/zh/engines/table-engines/integrations/embedded-rocksdb.md +++ b/docs/zh/engines/table-engines/integrations/embedded-rocksdb.md @@ -38,5 +38,46 @@ CREATE TABLE test ENGINE = EmbeddedRocksDB PRIMARY KEY key ``` +## 指标 + +还有一个`system.rocksdb` 表, 公开rocksdb的统计信息: + +```sql +SELECT + name, + value +FROM system.rocksdb + +┌─name──────────────────────┬─value─┐ +│ no.file.opens │ 1 │ +│ number.block.decompressed │ 1 │ +└───────────────────────────┴───────┘ +``` + +## 配置 + +你能修改任何[rocksdb options](https://github.com/facebook/rocksdb/wiki/Option-String-and-Option-Map) 配置,使用配置文件: + +```xml + + + 8 + + + 2 + + + + TABLE + + 8 + + + 2 + +
+
+
+``` [原始文章](https://clickhouse.com/docs/en/engines/table-engines/integrations/embedded-rocksdb/) diff --git a/docs/zh/engines/table-engines/mergetree-family/replication.md b/docs/zh/engines/table-engines/mergetree-family/replication.md index 2e6391c01dd..c3be3a382cb 100644 --- a/docs/zh/engines/table-engines/mergetree-family/replication.md +++ b/docs/zh/engines/table-engines/mergetree-family/replication.md @@ -7,7 +7,7 @@ - ReplicatedReplacingMergeTree - ReplicatedAggregatingMergeTree - ReplicatedCollapsingMergeTree -- ReplicatedVersionedCollapsingMergetree +- ReplicatedVersionedCollapsingMergeTree - ReplicatedGraphiteMergeTree 副本是表级别的,不是整个服务器级的。所以,服务器里可以同时有复制表和非复制表。 diff --git a/docs/zh/engines/table-engines/special/distributed.md b/docs/zh/engines/table-engines/special/distributed.md index 4fc3c61ddd3..edc4c1f4854 100644 --- a/docs/zh/engines/table-engines/special/distributed.md +++ b/docs/zh/engines/table-engines/special/distributed.md @@ -205,7 +205,7 @@ SELECT 查询会被发送到所有分片,并且无论数据在分片中如何 下面的情况,你需要关注分片方案: - 使用需要特定键连接数据( IN 或 JOIN )的查询。如果数据是用该键进行分片,则应使用本地 IN 或 JOIN 而不是 GLOBAL IN 或 GLOBAL JOIN,这样效率更高。 -- 使用大量服务器(上百或更多),但有大量小查询(个别客户的查询 - 网站,广告商或合作伙伴)。为了使小查询不影响整个集群,让单个客户的数据处于单个分片上是有意义的。或者,正如我们在 Yandex.Metrica 中所做的那样,你可以配置两级分片:将整个集群划分为«层»,一个层可以包含多个分片。单个客户的数据位于单个层上,根据需要将分片添加到层中,层中的数据随机分布。然后给每层创建分布式表,再创建一个全局的分布式表用于全局的查询。 +- 使用大量服务器(上百或更多),但有大量小查询(个别客户的查询 - 网站,广告商或合作伙伴)。为了使小查询不影响整个集群,让单个客户的数据处于单个分片上是有意义的。或者 你可以配置两级分片:将整个集群划分为«层»,一个层可以包含多个分片。单个客户的数据位于单个层上,根据需要将分片添加到层中,层中的数据随机分布。然后给每层创建分布式表,再创建一个全局的分布式表用于全局的查询。 数据是异步写入的。对于分布式表的 INSERT,数据块只写本地文件系统。之后会尽快地在后台发送到远程服务器。发送数据的周期性是由[distributed_directory_monitor_sleep_time_ms](../../../operations/settings/settings.md#distributed_directory_monitor_sleep_time_ms)和[distributed_directory_monitor_max_sleep_time_ms](../../../operations/settings/settings.md#distributed_directory_monitor_max_sleep_time_ms)设置。分布式引擎会分别发送每个插入数据的文件,但是你可以使用[distributed_directory_monitor_batch_inserts](../../../operations/settings/settings.md#distributed_directory_monitor_batch_inserts)设置启用批量发送文件。该设置通过更好地利用本地服务器和网络资源来提高集群性能。你应该检查表目录`/var/lib/clickhouse/data/database/table/`中的文件列表(等待发送的数据)来检查数据是否发送成功。执行后台任务的线程数可以通过[background_distributed_schedule_pool_size](../../../operations/settings/settings.md#background_distributed_schedule_pool_size)设置。 diff --git a/docs/zh/faq/general/mapreduce.md b/docs/zh/faq/general/mapreduce.md index f70ca8a2583..99cb6c031ae 100644 --- a/docs/zh/faq/general/mapreduce.md +++ b/docs/zh/faq/general/mapreduce.md @@ -6,7 +6,7 @@ toc_priority: 110 # 为何不使用 MapReduce等技术? {#why-not-use-something-like-mapreduce} -我们可以将MapReduce这样的系统称为分布式计算系统,其中的reduce操作是基于分布式排序的。这个领域中最常见的开源解决方案是[Apache Hadoop](http://hadoop.apache.org)。Yandex使用其内部解决方案YT。 +我们可以将MapReduce这样的系统称为分布式计算系统,其中的reduce操作是基于分布式排序的。这个领域中最常见的开源解决方案是[Apache Hadoop](http://hadoop.apache.org)。 这些系统不适合用于在线查询,因为它们的延迟很大。换句话说,它们不能被用作网页界面的后端。这些类型的系统对于实时数据更新并不是很有用。如果操作的结果和所有中间结果(如果有的话)都位于单个服务器的内存中,那么分布式排序就不是执行reduce操作的最佳方式,这通常是在线查询的情况。在这种情况下,哈希表是执行reduce操作的最佳方式。优化map-reduce任务的一种常见方法是使用内存中的哈希表进行预聚合(部分reduce)。用户手动执行此优化。在运行简单的map-reduce任务时,分布式排序是导致性能下降的主要原因之一。 diff --git a/docs/zh/faq/integration/file-export.md b/docs/zh/faq/integration/file-export.md deleted file mode 120000 index 19a5c67148b..00000000000 --- a/docs/zh/faq/integration/file-export.md +++ /dev/null @@ -1 +0,0 @@ -../../../en/faq/integration/file-export.md \ No newline at end of file diff --git a/docs/zh/faq/integration/file-export.md b/docs/zh/faq/integration/file-export.md new file mode 100644 index 00000000000..3582bfb1008 --- /dev/null +++ b/docs/zh/faq/integration/file-export.md @@ -0,0 +1,37 @@ +--- +title: 如何从 ClickHouse 导出数据到一个文件? +toc_hidden: true +toc_priority: 10 +--- + +# 如何从 ClickHouse 导出数据到一个文件? {#how-to-export-to-file} + +## 使用 INTO OUTFILE 语法 {#using-into-outfile-clause} + +加一个 [INTO OUTFILE](../../sql-reference/statements/select/into-outfile.md#into-outfile-clause) 语法到你的查询语句中. + +例如: + +``` sql +SELECT * FROM table INTO OUTFILE 'file' +``` + +ClickHouse 默认使用[TabSeparated](../../interfaces/formats.md#tabseparated) 格式写入数据. 修改[数据格式](../../interfaces/formats.md), 请用 [FORMAT 语法](../../sql-reference/statements/select/format.md#format-clause). + +例如: + +``` sql +SELECT * FROM table INTO OUTFILE 'file' FORMAT CSV +``` + +## 使用一个文件引擎表 {#using-a-file-engine-table} + +查看 [File](../../engines/table-engines/special/file.md) 表引擎. + +## 使用命令行重定向 {#using-command-line-redirection} + +``` bash +$ clickhouse-client --query "SELECT * from table" --format FormatName > result.txt +``` + +查看 [clickhouse-client](../../interfaces/cli.md). diff --git a/docs/zh/getting-started/example-datasets/nyc-taxi.md b/docs/zh/getting-started/example-datasets/nyc-taxi.md index b10fe931c20..bf948528d89 100644 --- a/docs/zh/getting-started/example-datasets/nyc-taxi.md +++ b/docs/zh/getting-started/example-datasets/nyc-taxi.md @@ -375,7 +375,6 @@ Q3:0.051秒。 Q4:0.072秒。 在这种情况下,查询处理时间首先由网络延迟确定。 -我们使用位于芬兰Yandex数据中心的客户机在俄罗斯的一个集群上运行查询,这增加了大约20毫秒的延迟。 ## 总结 {#zong-jie} diff --git a/docs/zh/getting-started/install.md b/docs/zh/getting-started/install.md index eec3aabe2a1..e74a05a9913 100644 --- a/docs/zh/getting-started/install.md +++ b/docs/zh/getting-started/install.md @@ -27,9 +27,17 @@ $ grep -q sse4_2 /proc/cpuinfo && echo "SSE 4.2 supported" || echo "SSE 4.2 not {% include 'install/deb.sh' %} ``` +
+ +Deprecated Method for installing deb-packages +``` bash +{% include 'install/deb_repo.sh' %} +``` +
+ 如果您想使用最新的版本,请用`testing`替代`stable`(我们只推荐您用于测试环境)。 -你也可以从这里手动下载安装包:[下载](https://repo.clickhouse.com/deb/stable/main/)。 +你也可以从这里手动下载安装包:[下载](https://packages.clickhouse.com/deb/pool/stable)。 安装包列表: @@ -45,11 +53,17 @@ $ grep -q sse4_2 /proc/cpuinfo && echo "SSE 4.2 supported" || echo "SSE 4.2 not 首先,您需要添加官方存储库: ``` bash -sudo yum install yum-utils -sudo rpm --import https://repo.clickhouse.com/CLICKHOUSE-KEY.GPG -sudo yum-config-manager --add-repo https://repo.clickhouse.com/rpm/stable/x86_64 +{% include 'install/rpm.sh' %} ``` +
+ +Deprecated Method for installing rpm-packages +``` bash +{% include 'install/rpm_repo.sh' %} +``` +
+ 如果您想使用最新的版本,请用`testing`替代`stable`(我们只推荐您用于测试环境)。`prestable`有时也可用。 然后运行命令安装: @@ -58,37 +72,28 @@ sudo yum-config-manager --add-repo https://repo.clickhouse.com/rpm/stable/x86_64 sudo yum install clickhouse-server clickhouse-client ``` -你也可以从这里手动下载安装包:[下载](https://repo.clickhouse.com/rpm/stable/x86_64)。 +你也可以从这里手动下载安装包:[下载](https://packages.clickhouse.com/rpm/stable)。 ### `Tgz`安装包 {#from-tgz-archives} 如果您的操作系统不支持安装`deb`或`rpm`包,建议使用官方预编译的`tgz`软件包。 -所需的版本可以通过`curl`或`wget`从存储库`https://repo.clickhouse.com/tgz/`下载。 +所需的版本可以通过`curl`或`wget`从存储库`https://packages.clickhouse.com/tgz/`下载。 下载后解压缩下载资源文件并使用安装脚本进行安装。以下是一个最新稳定版本的安装示例: ``` bash -export LATEST_VERSION=`curl https://api.github.com/repos/ClickHouse/ClickHouse/tags 2>/dev/null | grep stable | grep -Eo '[0-9]+\.[0-9]+\.[0-9]+\.[0-9]+' | head -n 1` -curl -O https://repo.clickhouse.com/tgz/stable/clickhouse-common-static-$LATEST_VERSION.tgz -curl -O https://repo.clickhouse.com/tgz/stable/clickhouse-common-static-dbg-$LATEST_VERSION.tgz -curl -O https://repo.clickhouse.com/tgz/stable/clickhouse-server-$LATEST_VERSION.tgz -curl -O https://repo.clickhouse.com/tgz/stable/clickhouse-client-$LATEST_VERSION.tgz - -tar -xzvf clickhouse-common-static-$LATEST_VERSION.tgz -sudo clickhouse-common-static-$LATEST_VERSION/install/doinst.sh - -tar -xzvf clickhouse-common-static-dbg-$LATEST_VERSION.tgz -sudo clickhouse-common-static-dbg-$LATEST_VERSION/install/doinst.sh - -tar -xzvf clickhouse-server-$LATEST_VERSION.tgz -sudo clickhouse-server-$LATEST_VERSION/install/doinst.sh -sudo /etc/init.d/clickhouse-server start - -tar -xzvf clickhouse-client-$LATEST_VERSION.tgz -sudo clickhouse-client-$LATEST_VERSION/install/doinst.sh +{% include 'install/tgz.sh' %} ``` +
+ +Deprecated Method for installing tgz archives +``` bash +{% include 'install/tgz_repo.sh' %} +``` +
+ 对于生产环境,建议使用最新的`stable`版本。你可以在GitHub页面https://github.com/ClickHouse/ClickHouse/tags找到它,它以后缀`-stable`标志。 ### `Docker`安装包 {#from-docker-image} @@ -183,6 +188,6 @@ SELECT 1 **恭喜,系统已经工作了!** -为了继续进行实验,你可以尝试下载测试数据集或查看[教程](https://clickhouse.com/tutorial.html)。 +为了继续进行实验,你可以尝试下载测试数据集或查看[教程](./tutorial.md)。 [原始文章](https://clickhouse.com/docs/en/getting_started/install/) diff --git a/docs/zh/getting-started/playground.md b/docs/zh/getting-started/playground.md index 8fee8966a4f..33636c92829 100644 --- a/docs/zh/getting-started/playground.md +++ b/docs/zh/getting-started/playground.md @@ -5,12 +5,13 @@ toc_title: 体验平台 # ClickHouse体验平台 {#clickhouse-playground} +!!! warning "Warning" + This service is deprecated and will be replaced in foreseeable future. + [ClickHouse体验平台](https://play.clickhouse.com?file=welcome) 允许人们通过即时运行查询来尝试ClickHouse,而无需设置他们的服务器或集群。 体验平台中提供几个示例数据集以及显示ClickHouse特性的示例查询。还有一些ClickHouse LTS版本可供尝试。 -ClickHouse体验平台提供了小型集群[Managed Service for ClickHouse](https://cloud.yandex.com/services/managed-clickhouse)实例配置(4 vCPU, 32 GB RAM)它们托管在[Yandex.Cloud](https://cloud.yandex.com/). 更多信息查询[cloud providers](../commercial/cloud.md). - 您可以使用任何HTTP客户端对ClickHouse体验平台进行查询,例如[curl](https://curl.haxx.se)或者[wget](https://www.gnu.org/software/wget/),或使用[JDBC](../interfaces/jdbc.md)或者[ODBC](../interfaces/odbc.md)驱动连接。关于支持ClickHouse的软件产品的更多信息详见[here](../interfaces/index.md). ## Credentials {#credentials} @@ -61,11 +62,3 @@ TCP连接示例[CLI](../interfaces/cli.md): ``` bash clickhouse client --secure -h play-api.clickhouse.com --port 9440 -u playground --password clickhouse -q "SELECT 'Play ClickHouse\!'" ``` - -## Implementation Details {#implementation-details} - -ClickHouse体验平台界面实际上是通过ClickHouse [HTTP API](../interfaces/http.md)接口实现的。 -ClickHouse体验平台是一个ClickHouse集群,没有任何附加的服务器端应用程序。如上所述,ClickHouse的HTTPS和TCP/TLS端点也可以作为体验平台的一部分公开使用, 代理通过[Cloudflare Spectrum](https://www.cloudflare.com/products/cloudflare-spectrum/)增加一层额外的保护和改善连接。 - -!!! warning "注意" - **强烈不推荐**在任何其他情况下将ClickHouse服务器暴露给公共互联网。确保它只在私有网络上侦听,并由正确配置的防火墙监控。 diff --git a/docs/zh/interfaces/third-party/integrations.md b/docs/zh/interfaces/third-party/integrations.md index 075e1ca0870..291e6d907fb 100644 --- a/docs/zh/interfaces/third-party/integrations.md +++ b/docs/zh/interfaces/third-party/integrations.md @@ -6,7 +6,7 @@ toc_title: 第三方集成库 # 第三方集成库 {#integration-libraries-from-third-party-developers} !!! warning "声明" -Yandex**没有**维护下面列出的库,也没有做过任何广泛的测试来确保它们的质量。 + ClickHouse, Inc.**没有**维护下面列出的库,也没有做过任何广泛的测试来确保它们的质量。 ## 基础设施 {#infrastructure-products} diff --git a/docs/zh/operations/performance-test.md b/docs/zh/operations/performance-test.md index d3643969c2e..9761d516ddd 100644 --- a/docs/zh/operations/performance-test.md +++ b/docs/zh/operations/performance-test.md @@ -36,6 +36,18 @@ chmod a+x ./hardware.sh wget https://builds.clickhouse.com/master/amd64/clickhouse # For aarch64: wget https://builds.clickhouse.com/master/aarch64/clickhouse +# For powerpc64le: +wget https://builds.clickhouse.com/master/powerpc64le/clickhouse +# For freebsd: +wget https://builds.clickhouse.com/master/freebsd/clickhouse +# For freebsd-aarch64: +wget https://builds.clickhouse.com/master/freebsd-aarch64/clickhouse +# For freebsd-powerpc64le: +wget https://builds.clickhouse.com/master/freebsd-powerpc64le/clickhouse +# For macos: +wget https://builds.clickhouse.com/master/macos/clickhouse +# For macos-aarch64: +wget https://builds.clickhouse.com/master/macos-aarch64/clickhouse # Then do: chmod a+x clickhouse ``` diff --git a/docs/zh/operations/server-configuration-parameters/settings.md b/docs/zh/operations/server-configuration-parameters/settings.md index d31dd5da805..8c1be5429d1 100644 --- a/docs/zh/operations/server-configuration-parameters/settings.md +++ b/docs/zh/operations/server-configuration-parameters/settings.md @@ -668,7 +668,7 @@ SSL客户端/服务器配置。 **示例** ``` xml -Europe/Moscow +Asia/Istanbul ``` ## tcp_port {#server_configuration_parameters-tcp_port} diff --git a/docs/zh/sql-reference/data-types/datetime64.md b/docs/zh/sql-reference/data-types/datetime64.md index 4a112275259..6361b77d245 100644 --- a/docs/zh/sql-reference/data-types/datetime64.md +++ b/docs/zh/sql-reference/data-types/datetime64.md @@ -28,7 +28,7 @@ DateTime64(precision, [timezone]) ``` sql CREATE TABLE dt ( - `timestamp` DateTime64(3, 'Europe/Moscow'), + `timestamp` DateTime64(3, 'Asia/Istanbul'), `event_id` UInt8 ) ENGINE = TinyLog @@ -49,13 +49,13 @@ SELECT * FROM dt └─────────────────────────┴──────────┘ ``` -- 将日期时间作为integer类型插入时,它会被视为适当缩放的Unix时间戳(UTC)。`1546300800000` (精度为3)表示 `'2019-01-01 00:00:00'` UTC. 不过,因为 `timestamp` 列指定了 `Europe/Moscow` (UTC+3)的时区,当作为字符串输出时,它将显示为 `'2019-01-01 03:00:00'` -- 当把字符串作为日期时间插入时,它会被赋予时区信息。 `'2019-01-01 00:00:00'` 将被认为处于 `Europe/Moscow` 时区并被存储为 `1546290000000`. +- 将日期时间作为integer类型插入时,它会被视为适当缩放的Unix时间戳(UTC)。`1546300800000` (精度为3)表示 `'2019-01-01 00:00:00'` UTC. 不过,因为 `timestamp` 列指定了 `Asia/Istanbul` (UTC+3)的时区,当作为字符串输出时,它将显示为 `'2019-01-01 03:00:00'` +- 当把字符串作为日期时间插入时,它会被赋予时区信息。 `'2019-01-01 00:00:00'` 将被认为处于 `Asia/Istanbul` 时区并被存储为 `1546290000000`. **2.** 过滤 `DateTime64` 类型的值 ``` sql -SELECT * FROM dt WHERE timestamp = toDateTime64('2019-01-01 00:00:00', 3, 'Europe/Moscow') +SELECT * FROM dt WHERE timestamp = toDateTime64('2019-01-01 00:00:00', 3, 'Asia/Istanbul') ``` ``` text @@ -69,12 +69,12 @@ SELECT * FROM dt WHERE timestamp = toDateTime64('2019-01-01 00:00:00', 3, 'Europ **3.** 获取 `DateTime64` 类型值的时区信息: ``` sql -SELECT toDateTime64(now(), 3, 'Europe/Moscow') AS column, toTypeName(column) AS x +SELECT toDateTime64(now(), 3, 'Asia/Istanbul') AS column, toTypeName(column) AS x ``` ``` text ┌──────────────────column─┬─x──────────────────────────────┐ -│ 2019-10-16 04:12:04.000 │ DateTime64(3, 'Europe/Moscow') │ +│ 2019-10-16 04:12:04.000 │ DateTime64(3, 'Asia/Istanbul') │ └─────────────────────────┴────────────────────────────────┘ ``` @@ -83,7 +83,7 @@ SELECT toDateTime64(now(), 3, 'Europe/Moscow') AS column, toTypeName(column) AS ``` sql SELECT toDateTime64(timestamp, 3, 'Europe/London') as lon_time, -toDateTime64(timestamp, 3, 'Europe/Moscow') as mos_time +toDateTime64(timestamp, 3, 'Asia/Istanbul') as mos_time FROM dt ``` diff --git a/docs/zh/sql-reference/functions/date-time-functions.md b/docs/zh/sql-reference/functions/date-time-functions.md index 1225cf33699..969f71011fd 100644 --- a/docs/zh/sql-reference/functions/date-time-functions.md +++ b/docs/zh/sql-reference/functions/date-time-functions.md @@ -212,13 +212,13 @@ SELECT toStartOfSecond(dt64); ``` sql WITH toDateTime64('2020-01-01 10:20:30.999', 3) AS dt64 -SELECT toStartOfSecond(dt64, 'Europe/Moscow'); +SELECT toStartOfSecond(dt64, 'Asia/Istanbul'); ``` 结果: ``` text -┌─toStartOfSecond(dt64, 'Europe/Moscow')─┐ +┌─toStartOfSecond(dt64, 'Asia/Istanbul')─┐ │ 2020-01-01 13:20:30.000 │ └────────────────────────────────────────┘ ``` @@ -414,13 +414,13 @@ SELECT now(), date_trunc('hour', now()); 指定时区查询: ```sql -SELECT now(), date_trunc('hour', now(), 'Europe/Moscow'); +SELECT now(), date_trunc('hour', now(), 'Asia/Istanbul'); ``` 结果: ```text -┌───────────────now()─┬─date_trunc('hour', now(), 'Europe/Moscow')─┐ +┌───────────────now()─┬─date_trunc('hour', now(), 'Asia/Istanbul')─┐ │ 2020-09-28 10:46:26 │ 2020-09-28 13:00:00 │ └─────────────────────┴────────────────────────────────────────────┘ ``` @@ -468,13 +468,13 @@ SELECT now(); 指定时区查询: ``` sql -SELECT now('Europe/Moscow'); +SELECT now('Asia/Istanbul'); ``` 结果: ``` text -┌─now('Europe/Moscow')─┐ +┌─now('Asia/Istanbul')─┐ │ 2020-10-17 10:42:23 │ └──────────────────────┘ ``` diff --git a/docs/zh/sql-reference/functions/type-conversion-functions.md b/docs/zh/sql-reference/functions/type-conversion-functions.md index c1d1e66664e..09fe30a4400 100644 --- a/docs/zh/sql-reference/functions/type-conversion-functions.md +++ b/docs/zh/sql-reference/functions/type-conversion-functions.md @@ -439,7 +439,7 @@ AS parseDateTimeBestEffort; 查询: ``` sql -SELECT parseDateTimeBestEffort('Sat, 18 Aug 2018 07:22:16 GMT', 'Europe/Moscow') +SELECT parseDateTimeBestEffort('Sat, 18 Aug 2018 07:22:16 GMT', 'Asia/Istanbul') AS parseDateTimeBestEffort ``` diff --git a/docs/zh/sql-reference/statements/alter/role.md b/docs/zh/sql-reference/statements/alter/role.md deleted file mode 120000 index ce1f0a94eb3..00000000000 --- a/docs/zh/sql-reference/statements/alter/role.md +++ /dev/null @@ -1 +0,0 @@ -../../../../en/sql-reference/statements/alter/role.md \ No newline at end of file diff --git a/docs/zh/sql-reference/statements/alter/role.md b/docs/zh/sql-reference/statements/alter/role.md new file mode 100644 index 00000000000..3f5c5daf7b8 --- /dev/null +++ b/docs/zh/sql-reference/statements/alter/role.md @@ -0,0 +1,16 @@ +--- +toc_priority: 46 +toc_title: 角色 +--- + +## 操作角色 {#alter-role-statement} + +修改角色. + +语法示例: + +``` sql +ALTER ROLE [IF EXISTS] name1 [ON CLUSTER cluster_name1] [RENAME TO new_name1] + [, name2 [ON CLUSTER cluster_name2] [RENAME TO new_name2] ...] + [SETTINGS variable [= value] [MIN [=] min_value] [MAX [=] max_value] [READONLY|WRITABLE] | PROFILE 'profile_name'] [,...] +``` diff --git a/docs/zh/sql-reference/statements/alter/row-policy.md b/docs/zh/sql-reference/statements/alter/row-policy.md deleted file mode 120000 index 09ad2d301f3..00000000000 --- a/docs/zh/sql-reference/statements/alter/row-policy.md +++ /dev/null @@ -1 +0,0 @@ -../../../../en/sql-reference/statements/alter/row-policy.md \ No newline at end of file diff --git a/docs/zh/sql-reference/statements/alter/row-policy.md b/docs/zh/sql-reference/statements/alter/row-policy.md new file mode 100644 index 00000000000..0cdba239b84 --- /dev/null +++ b/docs/zh/sql-reference/statements/alter/row-policy.md @@ -0,0 +1,19 @@ +--- +toc_priority: 47 +toc_title: 行策略 +--- + +# 操作行策略 {#alter-row-policy-statement} + +修改行策略. + +语法: + +``` sql +ALTER [ROW] POLICY [IF EXISTS] name1 [ON CLUSTER cluster_name1] ON [database1.]table1 [RENAME TO new_name1] + [, name2 [ON CLUSTER cluster_name2] ON [database2.]table2 [RENAME TO new_name2] ...] + [AS {PERMISSIVE | RESTRICTIVE}] + [FOR SELECT] + [USING {condition | NONE}][,...] + [TO {role [,...] | ALL | ALL EXCEPT role [,...]}] +``` diff --git a/docs/zh/sql-reference/statements/alter/settings-profile.md b/docs/zh/sql-reference/statements/alter/settings-profile.md deleted file mode 120000 index 0e71ac4e831..00000000000 --- a/docs/zh/sql-reference/statements/alter/settings-profile.md +++ /dev/null @@ -1 +0,0 @@ -../../../../en/sql-reference/statements/alter/settings-profile.md \ No newline at end of file diff --git a/docs/zh/sql-reference/statements/alter/settings-profile.md b/docs/zh/sql-reference/statements/alter/settings-profile.md new file mode 100644 index 00000000000..045b2461e8c --- /dev/null +++ b/docs/zh/sql-reference/statements/alter/settings-profile.md @@ -0,0 +1,16 @@ +--- +toc_priority: 48 +toc_title: 配置文件设置 +--- + +## 更改配置文件设置 {#alter-settings-profile-statement} + +更改配置文件设置。 + +语法: + +``` sql +ALTER SETTINGS PROFILE [IF EXISTS] TO name1 [ON CLUSTER cluster_name1] [RENAME TO new_name1] + [, name2 [ON CLUSTER cluster_name2] [RENAME TO new_name2] ...] + [SETTINGS variable [= value] [MIN [=] min_value] [MAX [=] max_value] [READONLY|WRITABLE] | INHERIT 'profile_name'] [,...] +``` diff --git a/docs/zh/sql-reference/statements/select/limit-by.md b/docs/zh/sql-reference/statements/select/limit-by.md index f5ed5b1bf98..9b93bb9cf21 100644 --- a/docs/zh/sql-reference/statements/select/limit-by.md +++ b/docs/zh/sql-reference/statements/select/limit-by.md @@ -11,7 +11,7 @@ ClickHouse支持以下语法变体: - `LIMIT [offset_value, ]n BY expressions` - `LIMIT n OFFSET offset_value BY expressions` -在查询处理过程中,ClickHouse会选择按排序键排序的数据。 排序键使用以下命令显式设置 [ORDER BY](../../../sql-reference/statements/select/order-by.md) 子句或隐式作为表引擎的属性。 然后ClickHouse应用 `LIMIT n BY expressions` 并返回第一 `n` 每个不同组合的行 `expressions`. 如果 `OFFSET` 被指定,则对于每个数据块属于一个不同的组合 `expressions`,ClickHouse跳过 `offset_value` 从块开始的行数,并返回最大值 `n` 行的结果。 如果 `offset_value` 如果数据块中的行数大于数据块中的行数,ClickHouse将从该块返回零行。 +在进行查询处理时,ClickHouse选择按排序键排序的数据。排序键设置显式地使用一个[ORDER BY](order-by.md#select-order-by)条款或隐式属性表的引擎(行顺序只是保证在使用[ORDER BY](order-by.md#select-order-by),否则不会命令行块由于多线程)。然后ClickHouse应用`LIMIT n BY 表达式`,并为每个不同的`表达式`组合返回前n行。如果指定了`OFFSET`,那么对于每个属于不同`表达式`组合的数据块,ClickHouse将跳过`offset_value`从块开始的行数,并最终返回最多`n`行的结果。如果`offset_value`大于数据块中的行数,则ClickHouse从数据块中返回零行。 !!! note "注" `LIMIT BY` 是不相关的 [LIMIT](../../../sql-reference/statements/select/limit.md). 它们都可以在同一个查询中使用。 diff --git a/docs/zh/whats-new/changelog/2018.md b/docs/zh/whats-new/changelog/2018.md index c461a3e7d0a..c87df7966f4 100644 --- a/docs/zh/whats-new/changelog/2018.md +++ b/docs/zh/whats-new/changelog/2018.md @@ -948,7 +948,7 @@ - 添加了对表中多维数组和元组 (`Tuple` 数据类型) 的存储的支持. - 支持用于 `DESCRIBE` 和 `INSERT` 查询的表函数. 在 `DESCRIBE` 中添加了对子查询的支持. 示例:`DESC TABLE remote('host', default.hits)`; `DESC 表(选择 1)`; `插入表功能远程('host',default.hits)`. 除了 `INSERT INTO` , 还支持 `INSERT INTO TABLE`. -- 改进了对时区的支持. `DateTime` 数据类型可以使用用于解析和格式化文本格式的时区进行注释. 示例: `DateTime('Europe/Moscow')`. 当在函数中为 DateTime 参数指定时区时, 返回类型将跟踪时区, 并且值将按预期显示. +- 改进了对时区的支持. `DateTime` 数据类型可以使用用于解析和格式化文本格式的时区进行注释. 示例: `DateTime('Asia/Istanbul')`. 当在函数中为 DateTime 参数指定时区时, 返回类型将跟踪时区, 并且值将按预期显示. - 添加了函数`toTimeZone`、`timeDiff`、`toQuarter`、`toRelativeQuarterNum`. `toRelativeHour`/`Minute`/`Second` 函数可以将 `Date` 类型的值作为参数. `now` 函数名区分大小写. - 添加了 `toStartOfFifteenMinutes` 函数 (Kirill Shvakov). - 添加了用于格式化查询的 `clickhouse format` 工具. diff --git a/docs/zh/whats-new/changelog/2019.md b/docs/zh/whats-new/changelog/2019.md index aa7dc777f9c..5eeaf9226c4 100644 --- a/docs/zh/whats-new/changelog/2019.md +++ b/docs/zh/whats-new/changelog/2019.md @@ -858,7 +858,7 @@ - 使用 MySQL 样式标识符引用修复对 MySQL 引擎的插入和选择查询. [#5704](https://github.com/ClickHouse/ClickHouse/pull/5704) ([Winter Zhang](https://github.com/zhang2014)) - 现在 `CHECK TABLE` 查询可以与 MergeTree 引擎系列一起使用. 如果每个部分 (或在更简单的引擎情况下的文件) 有任何检查状态和消息, 它会返回检查状态和消息. 此外, 修复了获取损坏部分的错误. [#5865](https://github.com/ClickHouse/ClickHouse/pull/5865) ([alesapin](https://github.com/alesapin)) - 修复 SPLIT_SHARED_LIBRARIES 运行时. [#5793](https://github.com/ClickHouse/ClickHouse/pull/5793) ([Danila Kutenin](https://github.com/danlark1)) -- 当 `/etc/localtime` 是一个像 `../usr/share/zoneinfo/Europe/Moscow` 这样的相对符号链接时, 修复了时区初始化 [#5922](https://github.com/ClickHouse/ClickHouse/pull/5922) ([alexey-milovidov](https://github.com/alexey-milovidov)) +- 当 `/etc/localtime` 是一个像 `../usr/share/zoneinfo/Asia/Istanbul` 这样的相对符号链接时, 修复了时区初始化 [#5922](https://github.com/ClickHouse/ClickHouse/pull/5922) ([alexey-milovidov](https://github.com/alexey-milovidov)) - clickhouse-copier: 修复关机后免费使用. [#5752](https://github.com/ClickHouse/ClickHouse/pull/5752) ([proller](https://github.com/proller)) - 更新了 `simdjson` . 修复部分无效的零字节JSON解析成功的问题. [#5938](https://github.com/ClickHouse/ClickHouse/pull/5938) ([alexey-milovidov](https://github.com/alexey-milovidov)) - 修复关闭系统日志 [#5802](https://github.com/ClickHouse/ClickHouse/pull/5802) ([Anton Popov](https://github.com/CurtizJ)) diff --git a/docs/zh/whats-new/changelog/2020.md b/docs/zh/whats-new/changelog/2020.md index 19e9125224c..6890f0f551e 100644 --- a/docs/zh/whats-new/changelog/2020.md +++ b/docs/zh/whats-new/changelog/2020.md @@ -2962,7 +2962,7 @@ * 更新了对 clickhouse-test 脚本中挂起查询的检查. [#8858](https://github.com/ClickHouse/ClickHouse/pull/8858) ([Alexander Kazakov](https://github.com/Akazz)) * 从存储库中删除了一些无用的文件. [#8843](https://github.com/ClickHouse/ClickHouse/pull/8843) ([alexey-milovidov](https://github.com/alexey-milovidov)) * 将数学性能测试的类型从 `once` 更改为 `loop` . [#8783](https://github.com/ClickHouse/ClickHouse/pull/8783) ([Nikolai Kochetov](https://github.com/KochetovNicolai)) -* 添加 docker 图像,它允许为我们的代码库构建交互式代码浏览器 HTML 报告. [#8781](https://github.com/ClickHouse/ClickHouse/pull/8781) ([alesapin](https://github.com/alesapin)) See [Woboq Code Browser](https://clickhouse-test-reports.s3.yandex.net/codebrowser/html_report///ClickHouse/dbms/index.html) +* 添加 docker 图像,它允许为我们的代码库构建交互式代码浏览器 HTML 报告. [#8781](https://github.com/ClickHouse/ClickHouse/pull/8781) ([alesapin](https://github.com/alesapin)) See [Woboq Code Browser](https://clickhouse-test-reports.s3.yandex.net/codebrowser/ClickHouse/dbms/index.html) * 抑制 MSan 下的一些测试失败. [#8780](https://github.com/ClickHouse/ClickHouse/pull/8780) ([Alexander Kuzmenkov](https://github.com/akuzm)) * 加速 `exception while insert` 测试. 此测试经常在 debug-with-coverage 构建中超时. [#8711](https://github.com/ClickHouse/ClickHouse/pull/8711) ([alexey-milovidov](https://github.com/alexey-milovidov)) * 将 `libcxx` 和 `libcxxabi` 更新为 master. 准备 [#9304](https://github.com/ClickHouse/ClickHouse/issues/9304) [#9308](https://github.com/ClickHouse/ClickHouse/pull/9308) ([alexey-milovidov](https://github.com/alexey-milovidov)) diff --git a/packages/clickhouse-rpm.repo b/packages/clickhouse-rpm.repo new file mode 100644 index 00000000000..27321123dc1 --- /dev/null +++ b/packages/clickhouse-rpm.repo @@ -0,0 +1,31 @@ +[clickhouse-stable] +name=ClickHouse - Stable Repository +baseurl=https://packages.clickhouse.com/rpm/stable/ +gpgkey=https://packages.clickhouse.com/rpm/stable/repodata/repomd.xml.key +gpgcheck=0 +repo_gpgcheck=1 +enabled=0 + +[clickhouse-lts] +name=ClickHouse - LTS Repository +baseurl=https://packages.clickhouse.com/rpm/lts/ +gpgkey=https://packages.clickhouse.com/rpm/lts/repodata/repomd.xml.key +gpgcheck=0 +repo_gpgcheck=1 +enabled=0 + +[clickhouse-prestable] +name=ClickHouse - Pre-stable Repository +baseurl=https://packages.clickhouse.com/rpm/prestable/ +gpgkey=https://packages.clickhouse.com/rpm/prestable/repodata/repomd.xml.key +gpgcheck=0 +repo_gpgcheck=1 +enabled=0 + +[clickhouse-testing] +name=ClickHouse - Testing Repository +baseurl=https://packages.clickhouse.com/rpm/testing/ +gpgkey=https://packages.clickhouse.com/rpm/testing/repodata/repomd.xml.key +gpgcheck=0 +repo_gpgcheck=1 +enabled=1 diff --git a/programs/CMakeLists.txt b/programs/CMakeLists.txt index 8906d186bfc..0890b9c95d3 100644 --- a/programs/CMakeLists.txt +++ b/programs/CMakeLists.txt @@ -2,6 +2,8 @@ if (USE_CLANG_TIDY) set (CMAKE_CXX_CLANG_TIDY "${CLANG_TIDY_PATH}") endif () +include(${ClickHouse_SOURCE_DIR}/cmake/strip_binary.cmake) + # The `clickhouse` binary is a multi purpose tool that contains multiple execution modes (client, server, etc.), # each of them may be built and linked as a separate library. # If you do not know what modes you need, turn this option OFF and enable SERVER and CLIENT only. @@ -445,8 +447,11 @@ else () list(APPEND CLICKHOUSE_BUNDLE clickhouse-static-files-disk-uploader) endif () if (ENABLE_CLICKHOUSE_KEEPER) - add_custom_target (clickhouse-keeper ALL COMMAND ${CMAKE_COMMAND} -E create_symlink clickhouse clickhouse-keeper DEPENDS clickhouse) - install (FILES "${CMAKE_CURRENT_BINARY_DIR}/clickhouse-keeper" DESTINATION ${CMAKE_INSTALL_BINDIR} COMPONENT clickhouse) + if (NOT BUILD_STANDALONE_KEEPER) + add_custom_target (clickhouse-keeper ALL COMMAND ${CMAKE_COMMAND} -E create_symlink clickhouse clickhouse-keeper DEPENDS clickhouse) + install (FILES "${CMAKE_CURRENT_BINARY_DIR}/clickhouse-keeper" DESTINATION ${CMAKE_INSTALL_BINDIR} COMPONENT clickhouse) + endif() + list(APPEND CLICKHOUSE_BUNDLE clickhouse-keeper) endif () if (ENABLE_CLICKHOUSE_KEEPER_CONVERTER) @@ -455,7 +460,9 @@ else () list(APPEND CLICKHOUSE_BUNDLE clickhouse-keeper-converter) endif () - install (TARGETS clickhouse RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR} COMPONENT clickhouse) + if (NOT BUILD_STRIPPED_BINARIES_PREFIX) + install (TARGETS clickhouse RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR} COMPONENT clickhouse) + endif() add_custom_target (clickhouse-bundle ALL DEPENDS ${CLICKHOUSE_BUNDLE}) @@ -466,7 +473,13 @@ else () if (USE_BINARY_HASH) add_custom_command(TARGET clickhouse POST_BUILD COMMAND ./clickhouse hash-binary > hash && ${OBJCOPY_PATH} --add-section .note.ClickHouse.hash=hash clickhouse COMMENT "Adding .note.ClickHouse.hash to clickhouse" VERBATIM) endif() -endif () + + if (BUILD_STRIPPED_BINARIES_PREFIX) + clickhouse_strip_binary(TARGET clickhouse DESTINATION_DIR ${CMAKE_CURRENT_BINARY_DIR}/${BUILD_STRIPPED_BINARIES_PREFIX} BINARY_PATH clickhouse) + endif() +endif() + + if (ENABLE_TESTS) set (CLICKHOUSE_UNIT_TESTS_TARGETS unit_tests_dbms) diff --git a/programs/benchmark/Benchmark.cpp b/programs/benchmark/Benchmark.cpp index 35ffb97b8e2..60e5ca92f77 100644 --- a/programs/benchmark/Benchmark.cpp +++ b/programs/benchmark/Benchmark.cpp @@ -435,6 +435,8 @@ private: Progress progress; executor.setProgressCallback([&progress](const Progress & value) { progress.incrementPiecewiseAtomically(value); }); + executor.sendQuery(ClientInfo::QueryKind::INITIAL_QUERY); + ProfileInfo info; while (Block block = executor.read()) info.update(block); diff --git a/programs/client/Client.cpp b/programs/client/Client.cpp index c40f41cd8d1..a3f5c0ab1c7 100644 --- a/programs/client/Client.cpp +++ b/programs/client/Client.cpp @@ -371,6 +371,13 @@ void Client::initialize(Poco::Util::Application & self) configReadClient(config(), home_path); + const char * env_user = getenv("CLICKHOUSE_USER"); + const char * env_password = getenv("CLICKHOUSE_PASSWORD"); + if (env_user) + config().setString("user", env_user); + if (env_password) + config().setString("password", env_password); + // global_context->setApplicationType(Context::ApplicationType::CLIENT); global_context->setQueryParameters(query_parameters); @@ -1119,7 +1126,12 @@ void Client::processOptions(const OptionsDescription & options_description, { const auto & name = setting.getName(); if (options.count(name)) - config().setString(name, options[name].as()); + { + if (allow_repeated_settings) + config().setString(name, options[name].as().back()); + else + config().setString(name, options[name].as()); + } } if (options.count("config-file") && options.count("config")) diff --git a/programs/keeper/CMakeLists.txt b/programs/keeper/CMakeLists.txt index 5a50a7074d3..92bb5dc45a3 100644 --- a/programs/keeper/CMakeLists.txt +++ b/programs/keeper/CMakeLists.txt @@ -1,13 +1,21 @@ include(${ClickHouse_SOURCE_DIR}/cmake/embed_binary.cmake) -set(CLICKHOUSE_KEEPER_SOURCES - Keeper.cpp -) - if (OS_LINUX) set (LINK_RESOURCE_LIB INTERFACE "-Wl,${WHOLE_ARCHIVE} $ -Wl,${NO_WHOLE_ARCHIVE}") + # for some reason INTERFACE linkage doesn't work for standalone binary + set (LINK_RESOURCE_LIB_STANDALONE_KEEPER "-Wl,${WHOLE_ARCHIVE} $ -Wl,${NO_WHOLE_ARCHIVE}") endif () +clickhouse_embed_binaries( + TARGET clickhouse_keeper_configs + RESOURCES keeper_config.xml keeper_embedded.xml +) + +set(CLICKHOUSE_KEEPER_SOURCES + Keeper.cpp + TinyContext.cpp +) + set (CLICKHOUSE_KEEPER_LINK PRIVATE clickhouse_common_config @@ -21,10 +29,113 @@ set (CLICKHOUSE_KEEPER_LINK clickhouse_program_add(keeper) -install (FILES keeper_config.xml DESTINATION "${CLICKHOUSE_ETC_DIR}/clickhouse-keeper" COMPONENT clickhouse-keeper) - -clickhouse_embed_binaries( - TARGET clickhouse_keeper_configs - RESOURCES keeper_config.xml keeper_embedded.xml -) +install(FILES keeper_config.xml DESTINATION "${CLICKHOUSE_ETC_DIR}/clickhouse-keeper" COMPONENT clickhouse-keeper) add_dependencies(clickhouse-keeper-lib clickhouse_keeper_configs) + +if (BUILD_STANDALONE_KEEPER) + # Sraight list of all required sources + set(CLICKHOUSE_KEEPER_STANDALONE_SOURCES + ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Coordination/ACLMap.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Coordination/Changelog.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Coordination/CoordinationSettings.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Coordination/FourLetterCommand.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Coordination/InMemoryLogStore.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Coordination/KeeperConnectionStats.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Coordination/KeeperDispatcher.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Coordination/KeeperLogStore.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Coordination/KeeperServer.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Coordination/KeeperSnapshotManager.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Coordination/KeeperStateMachine.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Coordination/KeeperStateManager.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Coordination/KeeperStorage.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Coordination/pathUtils.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Coordination/SessionExpiryQueue.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Coordination/SummingStateMachine.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Coordination/WriteBufferFromNuraftBuffer.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Coordination/ZooKeeperDataReader.cpp + + ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Core/SettingsFields.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Core/BaseSettings.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Core/Field.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Core/SettingsEnums.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Core/ServerUUID.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Core/UUID.cpp + + ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Server/KeeperTCPHandler.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Server/TCPServer.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Server/ProtocolServerAdapter.cpp + + ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Compression/CachedCompressedReadBuffer.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Compression/CheckingCompressedReadBuffer.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Compression/CompressedReadBufferBase.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Compression/CompressedReadBuffer.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Compression/CompressedReadBufferFromFile.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Compression/CompressedWriteBuffer.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Compression/CompressionCodecDelta.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Compression/CompressionCodecDoubleDelta.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Compression/CompressionCodecEncrypted.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Compression/CompressionCodecGorilla.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Compression/CompressionCodecLZ4.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Compression/CompressionCodecMultiple.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Compression/CompressionCodecNone.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Compression/CompressionCodecT64.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Compression/CompressionCodecZSTD.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Compression/CompressionFactory.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Compression/getCompressionCodecForFile.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Compression/ICompressionCodec.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Compression/LZ4_decompress_faster.cpp + + ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Common/ZooKeeper/IKeeper.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Common/ZooKeeper/TestKeeper.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Common/ZooKeeper/ZooKeeperCommon.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Common/ZooKeeper/ZooKeeperConstants.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Common/ZooKeeper/ZooKeeper.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Common/ZooKeeper/ZooKeeperImpl.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Common/ZooKeeper/ZooKeeperIO.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Common/ZooKeeper/ZooKeeperLock.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Common/ZooKeeper/ZooKeeperNodeCache.cpp + + ${CMAKE_CURRENT_SOURCE_DIR}/../../base/daemon/BaseDaemon.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/../../base/daemon/SentryWriter.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/../../base/daemon/GraphiteWriter.cpp + + Keeper.cpp + TinyContext.cpp + clickhouse-keeper.cpp + + ) + + add_executable(clickhouse-keeper ${CLICKHOUSE_KEEPER_STANDALONE_SOURCES}) + + # Remove some redundant dependencies + target_compile_definitions (clickhouse-keeper PRIVATE -DKEEPER_STANDALONE_BUILD) + + target_include_directories(clickhouse-keeper PUBLIC "${CMAKE_CURRENT_SOURCE_DIR}/../../src") # uses includes from src directory + target_include_directories(clickhouse-keeper PUBLIC "${CMAKE_CURRENT_BINARY_DIR}/../../src/Core/include") # uses some includes from core + target_include_directories(clickhouse-keeper PUBLIC "${CMAKE_CURRENT_BINARY_DIR}/../../src") # uses some includes from common + + target_link_libraries(clickhouse-keeper + PRIVATE + ch_contrib::abseil_swiss_tables + ch_contrib::nuraft + ch_contrib::lz4 + ch_contrib::zstd + ch_contrib::cityhash + common ch_contrib::double_conversion + ch_contrib::dragonbox_to_chars + pcg_random + ch_contrib::pdqsort + ch_contrib::miniselect + clickhouse_common_config_no_zookeeper_log + loggers_no_text_log + clickhouse_common_io + clickhouse_parsers # Otherwise compression will not built. FIXME. + + ${LINK_RESOURCE_LIB_STANDALONE_KEEPER} + ) + + add_dependencies(clickhouse-keeper clickhouse_keeper_configs) + set_target_properties(clickhouse-keeper PROPERTIES RUNTIME_OUTPUT_DIRECTORY ../) + + install(TARGETS clickhouse-keeper RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR} COMPONENT clickhouse) +endif() diff --git a/programs/keeper/Keeper.cpp b/programs/keeper/Keeper.cpp index 88df4d5b3e7..1d9bbef58a5 100644 --- a/programs/keeper/Keeper.cpp +++ b/programs/keeper/Keeper.cpp @@ -13,6 +13,7 @@ #include #include #include +#include #include #include #include @@ -34,11 +35,6 @@ #include #include -#if defined(OS_LINUX) -# include -# include -#endif - int mainEntryClickHouseKeeper(int argc, char ** argv) { @@ -127,18 +123,6 @@ Poco::Net::SocketAddress makeSocketAddress(const std::string & host, UInt16 port return socket_address; } -[[noreturn]] void forceShutdown() -{ -#if defined(THREAD_SANITIZER) && defined(OS_LINUX) - /// Thread sanitizer tries to do something on exit that we don't need if we want to exit immediately, - /// while connection handling threads are still run. - (void)syscall(SYS_exit_group, 0); - __builtin_unreachable(); -#else - _exit(0); -#endif -} - std::string getUserName(uid_t user_id) { /// Try to convert user id into user name. @@ -286,16 +270,9 @@ int Keeper::main(const std::vector & /*args*/) LOG_WARNING(log, "Keeper was built with sanitizer. It will work slowly."); #endif - auto shared_context = Context::createShared(); - global_context = Context::createGlobal(shared_context.get()); - - global_context->makeGlobalContext(); - global_context->setApplicationType(Context::ApplicationType::KEEPER); - if (!config().has("keeper_server")) throw Exception(ErrorCodes::NO_ELEMENTS_IN_CONFIG, "Keeper configuration ( section) not found in config"); - std::string path; if (config().has("keeper_server.storage_path")) @@ -364,8 +341,13 @@ int Keeper::main(const std::vector & /*args*/) auto servers = std::make_shared>(); /// Initialize keeper RAFT. Do nothing if no keeper_server in config. - global_context->initializeKeeperDispatcher(/* start_async = */false); - FourLetterCommandFactory::registerCommands(*global_context->getKeeperDispatcher()); + tiny_context.initializeKeeperDispatcher(/* start_async = */false); + FourLetterCommandFactory::registerCommands(*tiny_context.getKeeperDispatcher()); + + auto config_getter = [this] () -> const Poco::Util::AbstractConfiguration & + { + return tiny_context.getConfigRef(); + }; for (const auto & listen_host : listen_hosts) { @@ -382,7 +364,10 @@ int Keeper::main(const std::vector & /*args*/) port_name, "Keeper (tcp): " + address.toString(), std::make_unique( - new KeeperTCPHandlerFactory(*this, false), server_pool, socket)); + new KeeperTCPHandlerFactory( + config_getter, tiny_context.getKeeperDispatcher(), + config().getUInt64("keeper_server.socket_receive_timeout_sec", DBMS_DEFAULT_RECEIVE_TIMEOUT_SEC), + config().getUInt64("keeper_server.socket_send_timeout_sec", DBMS_DEFAULT_SEND_TIMEOUT_SEC), false), server_pool, socket)); }); const char * secure_port_name = "keeper_server.tcp_port_secure"; @@ -398,7 +383,10 @@ int Keeper::main(const std::vector & /*args*/) secure_port_name, "Keeper with secure protocol (tcp_secure): " + address.toString(), std::make_unique( - new KeeperTCPHandlerFactory(*this, true), server_pool, socket)); + new KeeperTCPHandlerFactory( + config_getter, tiny_context.getKeeperDispatcher(), + config().getUInt64("keeper_server.socket_receive_timeout_sec", DBMS_DEFAULT_RECEIVE_TIMEOUT_SEC), + config().getUInt64("keeper_server.socket_send_timeout_sec", DBMS_DEFAULT_SEND_TIMEOUT_SEC), true), server_pool, socket)); #else UNUSED(port); throw Exception{"SSL support for TCP protocol is disabled because Poco library was built without NetSSL support.", @@ -425,18 +413,14 @@ int Keeper::main(const std::vector & /*args*/) [&](ConfigurationPtr config, bool /* initial_loading */) { if (config->has("keeper_server")) - global_context->updateKeeperConfiguration(*config); + tiny_context.updateKeeperConfiguration(*config); }, /* already_loaded = */ false); /// Reload it right now (initial loading) SCOPE_EXIT({ LOG_INFO(log, "Shutting down."); - /// Stop reloading of the main config. This must be done before `global_context->shutdown()` because - /// otherwise the reloading may pass a changed config to some destroyed parts of ContextSharedPart. main_config_reloader.reset(); - global_context->shutdown(); - LOG_DEBUG(log, "Waiting for current connections to Keeper to finish."); int current_connections = 0; for (auto & server : *servers) @@ -458,23 +442,17 @@ int Keeper::main(const std::vector & /*args*/) else LOG_INFO(log, "Closed connections to Keeper."); - global_context->shutdownKeeperDispatcher(); + tiny_context.shutdownKeeperDispatcher(); /// Wait server pool to avoid use-after-free of destroyed context in the handlers server_pool.joinAll(); - /** Explicitly destroy Context. It is more convenient than in destructor of Server, because logger is still available. - * At this moment, no one could own shared part of Context. - */ - global_context.reset(); - shared_context.reset(); - LOG_DEBUG(log, "Destroyed global context."); if (current_connections) { LOG_INFO(log, "Will shutdown forcefully."); - forceShutdown(); + safeExit(0); } }); diff --git a/programs/keeper/Keeper.h b/programs/keeper/Keeper.h index f5b97dacf7d..5b8fbadd0a2 100644 --- a/programs/keeper/Keeper.h +++ b/programs/keeper/Keeper.h @@ -2,6 +2,7 @@ #include #include +#include "TinyContext.h" namespace Poco { @@ -17,27 +18,22 @@ namespace DB /// standalone clickhouse-keeper server (replacement for ZooKeeper). Uses the same /// config as clickhouse-server. Serves requests on TCP ports with or without /// SSL using ZooKeeper protocol. -class Keeper : public BaseDaemon, public IServer +class Keeper : public BaseDaemon { public: using ServerApplication::run; - Poco::Util::LayeredConfiguration & config() const override + Poco::Util::LayeredConfiguration & config() const { return BaseDaemon::config(); } - Poco::Logger & logger() const override + Poco::Logger & logger() const { return BaseDaemon::logger(); } - ContextMutablePtr context() const override - { - return global_context; - } - - bool isCancelled() const override + bool isCancelled() const { return BaseDaemon::isCancelled(); } @@ -58,7 +54,7 @@ protected: std::string getDefaultConfigFileName() const override; private: - ContextMutablePtr global_context; + TinyContext tiny_context; Poco::Net::SocketAddress socketBindListen(Poco::Net::ServerSocket & socket, const std::string & host, UInt16 port, [[maybe_unused]] bool secure = false) const; diff --git a/programs/keeper/TinyContext.cpp b/programs/keeper/TinyContext.cpp new file mode 100644 index 00000000000..386fb1e0c1d --- /dev/null +++ b/programs/keeper/TinyContext.cpp @@ -0,0 +1,71 @@ +#include "TinyContext.h" + +#include +#include + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int LOGICAL_ERROR; +} + +void TinyContext::setConfig(const ConfigurationPtr & config_) +{ + std::lock_guard lock(keeper_dispatcher_mutex); + config = config_; +} + +const Poco::Util::AbstractConfiguration & TinyContext::getConfigRef() const +{ + std::lock_guard lock(keeper_dispatcher_mutex); + return config ? *config : Poco::Util::Application::instance().config(); +} + + +void TinyContext::initializeKeeperDispatcher([[maybe_unused]] bool start_async) const +{ + const auto & config_ref = getConfigRef(); + + std::lock_guard lock(keeper_dispatcher_mutex); + + if (keeper_dispatcher) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Trying to initialize Keeper multiple times"); + + if (config_ref.has("keeper_server")) + { + keeper_dispatcher = std::make_shared(); + keeper_dispatcher->initialize(config_ref, true, start_async); + } +} + +std::shared_ptr TinyContext::getKeeperDispatcher() const +{ + std::lock_guard lock(keeper_dispatcher_mutex); + if (!keeper_dispatcher) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Keeper must be initialized before requests"); + + return keeper_dispatcher; +} + +void TinyContext::shutdownKeeperDispatcher() const +{ + std::lock_guard lock(keeper_dispatcher_mutex); + if (keeper_dispatcher) + { + keeper_dispatcher->shutdown(); + keeper_dispatcher.reset(); + } +} + +void TinyContext::updateKeeperConfiguration([[maybe_unused]] const Poco::Util::AbstractConfiguration & config_) +{ + std::lock_guard lock(keeper_dispatcher_mutex); + if (!keeper_dispatcher) + return; + + keeper_dispatcher->updateConfiguration(config_); +} + +} diff --git a/programs/keeper/TinyContext.h b/programs/keeper/TinyContext.h new file mode 100644 index 00000000000..a53a6d0377d --- /dev/null +++ b/programs/keeper/TinyContext.h @@ -0,0 +1,32 @@ +#pragma once +#include +#include + +#include + +namespace DB +{ + +class KeeperDispatcher; + +class TinyContext: public std::enable_shared_from_this +{ +public: + std::shared_ptr getKeeperDispatcher() const; + void initializeKeeperDispatcher(bool start_async) const; + void shutdownKeeperDispatcher() const; + void updateKeeperConfiguration(const Poco::Util::AbstractConfiguration & config); + + using ConfigurationPtr = Poco::AutoPtr; + + void setConfig(const ConfigurationPtr & config); + const Poco::Util::AbstractConfiguration & getConfigRef() const; + +private: + mutable std::mutex keeper_dispatcher_mutex; + mutable std::shared_ptr keeper_dispatcher; + + ConfigurationPtr config; +}; + +} diff --git a/programs/library-bridge/CMakeLists.txt b/programs/library-bridge/CMakeLists.txt index 0913c6e4a9a..d7e104685c5 100644 --- a/programs/library-bridge/CMakeLists.txt +++ b/programs/library-bridge/CMakeLists.txt @@ -1,3 +1,5 @@ +include(${ClickHouse_SOURCE_DIR}/cmake/strip_binary.cmake) + set (CLICKHOUSE_LIBRARY_BRIDGE_SOURCES library-bridge.cpp LibraryInterface.cpp @@ -22,4 +24,10 @@ target_link_libraries(clickhouse-library-bridge PRIVATE set_target_properties(clickhouse-library-bridge PROPERTIES RUNTIME_OUTPUT_DIRECTORY ..) -install(TARGETS clickhouse-library-bridge RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR} COMPONENT clickhouse) +if (BUILD_STRIPPED_BINARIES_PREFIX) + clickhouse_strip_binary(TARGET clickhouse-library-bridge DESTINATION_DIR ${CMAKE_CURRENT_BINARY_DIR}/../${BUILD_STRIPPED_BINARIES_PREFIX} BINARY_PATH ../clickhouse-library-bridge) +endif() + +if (NOT BUILD_STRIPPED_BINARIES_PREFIX) + install(TARGETS clickhouse-library-bridge RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR} COMPONENT clickhouse) +endif() diff --git a/programs/local/LocalServer.cpp b/programs/local/LocalServer.cpp index eb3a03d0564..a8546649016 100644 --- a/programs/local/LocalServer.cpp +++ b/programs/local/LocalServer.cpp @@ -15,6 +15,7 @@ #include #include #include +#include #include #include #include @@ -388,7 +389,9 @@ void LocalServer::setupUsers() ""; ConfigurationPtr users_config; - + auto & access_control = global_context->getAccessControl(); + access_control.setPlaintextPasswordSetting(config().getBool("allow_plaintext_password", true)); + access_control.setNoPasswordSetting(config().getBool("allow_no_password", true)); if (config().has("users_config") || config().has("config-file") || fs::exists("config.xml")) { const auto users_config_path = config().getString("users_config", config().getString("config-file", "config.xml")); @@ -397,10 +400,7 @@ void LocalServer::setupUsers() users_config = loaded_config.configuration; } else - { users_config = getConfigurationFromXMLString(minimal_default_user_xml); - } - if (users_config) global_context->setUsersConfig(users_config); else @@ -411,7 +411,8 @@ void LocalServer::setupUsers() void LocalServer::connect() { connection_parameters = ConnectionParameters(config()); - connection = LocalConnection::createConnection(connection_parameters, global_context, need_render_progress); + connection = LocalConnection::createConnection( + connection_parameters, global_context, need_render_progress, need_render_profile_events, server_display_name); } @@ -801,7 +802,6 @@ void LocalServer::processOptions(const OptionsDescription &, const CommandLineOp } - #pragma GCC diagnostic ignored "-Wunused-function" #pragma GCC diagnostic ignored "-Wmissing-declarations" diff --git a/programs/obfuscator/Obfuscator.cpp b/programs/obfuscator/Obfuscator.cpp index 947e7ab1768..1ffb0b437a6 100644 --- a/programs/obfuscator/Obfuscator.cpp +++ b/programs/obfuscator/Obfuscator.cpp @@ -909,7 +909,7 @@ public: ColumnPtr new_nested_column = nested_model->generate(nested_column); - return ColumnArray::create(IColumn::mutate(std::move(new_nested_column)), IColumn::mutate(std::move(column_array.getOffsetsPtr()))); + return ColumnArray::create(IColumn::mutate(std::move(new_nested_column)), IColumn::mutate(column_array.getOffsetsPtr())); } void updateSeed() override @@ -947,7 +947,7 @@ public: ColumnPtr new_nested_column = nested_model->generate(nested_column); - return ColumnNullable::create(IColumn::mutate(std::move(new_nested_column)), IColumn::mutate(std::move(column_nullable.getNullMapColumnPtr()))); + return ColumnNullable::create(IColumn::mutate(std::move(new_nested_column)), IColumn::mutate(column_nullable.getNullMapColumnPtr())); } void updateSeed() override diff --git a/programs/odbc-bridge/CMakeLists.txt b/programs/odbc-bridge/CMakeLists.txt index 54f47204259..44493d7ab8a 100644 --- a/programs/odbc-bridge/CMakeLists.txt +++ b/programs/odbc-bridge/CMakeLists.txt @@ -1,3 +1,5 @@ +include(${ClickHouse_SOURCE_DIR}/cmake/strip_binary.cmake) + set (CLICKHOUSE_ODBC_BRIDGE_SOURCES ColumnInfoHandler.cpp getIdentifierQuote.cpp @@ -37,7 +39,13 @@ if (USE_GDB_ADD_INDEX) add_custom_command(TARGET clickhouse-odbc-bridge POST_BUILD COMMAND ${GDB_ADD_INDEX_EXE} ../clickhouse-odbc-bridge COMMENT "Adding .gdb-index to clickhouse-odbc-bridge" VERBATIM) endif() -install(TARGETS clickhouse-odbc-bridge RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR} COMPONENT clickhouse) +if (BUILD_STRIPPED_BINARIES_PREFIX) + clickhouse_strip_binary(TARGET clickhouse-odbc-bridge DESTINATION_DIR ${CMAKE_CURRENT_BINARY_DIR}/../${BUILD_STRIPPED_BINARIES_PREFIX} BINARY_PATH ../clickhouse-odbc-bridge) +endif() + +if (NOT BUILD_STRIPPED_BINARIES_PREFIX) + install(TARGETS clickhouse-odbc-bridge RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR} COMPONENT clickhouse) +endif() if(ENABLE_TESTS) add_subdirectory(tests) diff --git a/programs/server/Server.cpp b/programs/server/Server.cpp index 79837310ec4..c800b7a124b 100644 --- a/programs/server/Server.cpp +++ b/programs/server/Server.cpp @@ -22,6 +22,8 @@ #include #include #include +#include +#include #include #include #include @@ -31,7 +33,6 @@ #include #include #include -#include #include #include #include @@ -95,8 +96,6 @@ # include # include # include -# include -# include #endif #if USE_SSL @@ -505,19 +504,6 @@ void checkForUsersNotInMainConfig( } } -[[noreturn]] void forceShutdown() -{ -#if defined(THREAD_SANITIZER) && defined(OS_LINUX) - /// Thread sanitizer tries to do something on exit that we don't need if we want to exit immediately, - /// while connection handling threads are still run. - (void)syscall(SYS_exit_group, 0); - __builtin_unreachable(); -#else - _exit(0); -#endif -} - - int Server::main(const std::vector & /*args*/) { Poco::Logger * log = &logger(); @@ -1009,6 +995,11 @@ if (ThreadFuzzer::instance().isEffective()) global_context->initializeKeeperDispatcher(can_initialize_keeper_async); FourLetterCommandFactory::registerCommands(*global_context->getKeeperDispatcher()); + auto config_getter = [this] () -> const Poco::Util::AbstractConfiguration & + { + return global_context->getConfigRef(); + }; + for (const auto & listen_host : listen_hosts) { /// TCP Keeper @@ -1027,7 +1018,11 @@ if (ThreadFuzzer::instance().isEffective()) port_name, "Keeper (tcp): " + address.toString(), std::make_unique( - new KeeperTCPHandlerFactory(*this, false), server_pool, socket)); + new KeeperTCPHandlerFactory( + config_getter, global_context->getKeeperDispatcher(), + global_context->getSettingsRef().receive_timeout, + global_context->getSettingsRef().send_timeout, + false), server_pool, socket)); }); const char * secure_port_name = "keeper_server.tcp_port_secure"; @@ -1046,7 +1041,10 @@ if (ThreadFuzzer::instance().isEffective()) secure_port_name, "Keeper with secure protocol (tcp_secure): " + address.toString(), std::make_unique( - new KeeperTCPHandlerFactory(*this, true), server_pool, socket)); + new KeeperTCPHandlerFactory( + config_getter, global_context->getKeeperDispatcher(), + global_context->getSettingsRef().receive_timeout, + global_context->getSettingsRef().send_timeout, true), server_pool, socket)); #else UNUSED(port); throw Exception{"SSL support for TCP protocol is disabled because Poco library was built without NetSSL support.", @@ -1069,7 +1067,9 @@ if (ThreadFuzzer::instance().isEffective()) auto & access_control = global_context->getAccessControl(); if (config().has("custom_settings_prefixes")) access_control.setCustomSettingsPrefixes(config().getString("custom_settings_prefixes")); - + ///set the allow_plaintext_and_no_password setting in context. + access_control.setPlaintextPasswordSetting(config().getBool("allow_plaintext_password", true)); + access_control.setNoPasswordSetting(config().getBool("allow_no_password", true)); /// Initialize access storages. try { @@ -1317,7 +1317,7 @@ if (ThreadFuzzer::instance().isEffective()) #endif #if !defined(__x86_64__) - LOG_INFO(log, "Query Profiler is only tested on x86_64. It also known to not work under qemu-user."); + LOG_INFO(log, "Query Profiler and TraceCollector is only tested on x86_64. It also known to not work under qemu-user."); #endif if (!hasPHDRCache()) @@ -1527,7 +1527,7 @@ if (ThreadFuzzer::instance().isEffective()) /// Dump coverage here, because std::atexit callback would not be called. dumpCoverageReportIfPossible(); LOG_INFO(log, "Will shutdown forcefully."); - forceShutdown(); + safeExit(0); } }); diff --git a/programs/server/config.xml b/programs/server/config.xml index def64607caf..d34340ac995 100644 --- a/programs/server/config.xml +++ b/programs/server/config.xml @@ -243,7 +243,7 @@ openssl dhparam -out /etc/clickhouse-server/dhparam.pem 4096 Only file format with BEGIN DH PARAMETERS is supported. --> - + none true true @@ -367,6 +367,10 @@ /var/lib/clickhouse/tmp/ + + + + ` - + int needs explicit cast /// 2. customized types needs explicit cast template - enable_if_not_field_or_bool_or_stringlike_t & + enable_if_not_field_or_bool_or_stringlike_t & /// NOLINT operator=(T && rhs); Field & operator= (bool rhs) @@ -409,7 +409,7 @@ public: template const auto & get() const { - auto mutable_this = const_cast *>(this); + auto * mutable_this = const_cast *>(this); return mutable_this->get(); } @@ -422,7 +422,7 @@ public: template const T & reinterpret() const { - auto mutable_this = const_cast *>(this); + auto * mutable_this = const_cast *>(this); return mutable_this->reinterpret(); } @@ -887,7 +887,7 @@ Field::Field(T && rhs, enable_if_not_field_or_bool_or_stringlike_t) //-V730 } template -Field::enable_if_not_field_or_bool_or_stringlike_t & +Field::enable_if_not_field_or_bool_or_stringlike_t & /// NOLINT Field::operator=(T && rhs) { auto && val = castToNearestFieldType(std::forward(rhs)); @@ -986,10 +986,10 @@ String toString(const Field & x); template <> struct fmt::formatter { - constexpr auto parse(format_parse_context & ctx) + static constexpr auto parse(format_parse_context & ctx) { - auto it = ctx.begin(); - auto end = ctx.end(); + const auto * it = ctx.begin(); + const auto * end = ctx.end(); /// Only support {}. if (it != end && *it != '}') diff --git a/src/Core/MySQL/MySQLClient.cpp b/src/Core/MySQL/MySQLClient.cpp index 26535f05be7..98797b3d284 100644 --- a/src/Core/MySQL/MySQLClient.cpp +++ b/src/Core/MySQL/MySQLClient.cpp @@ -24,12 +24,12 @@ namespace ErrorCodes } MySQLClient::MySQLClient(const String & host_, UInt16 port_, const String & user_, const String & password_) - : host(host_), port(port_), user(user_), password(std::move(password_)), + : host(host_), port(port_), user(user_), password(password_), client_capabilities(CLIENT_PROTOCOL_41 | CLIENT_PLUGIN_AUTH | CLIENT_SECURE_CONNECTION) { } -MySQLClient::MySQLClient(MySQLClient && other) +MySQLClient::MySQLClient(MySQLClient && other) noexcept : host(std::move(other.host)), port(other.port), user(std::move(other.user)), password(std::move(other.password)) , client_capabilities(other.client_capabilities) { @@ -142,7 +142,7 @@ void MySQLClient::setBinlogChecksum(const String & binlog_checksum) replication.setChecksumSignatureLength(Poco::toUpper(binlog_checksum) == "NONE" ? 0 : 4); } -void MySQLClient::startBinlogDumpGTID(UInt32 slave_id, String replicate_db, String gtid_str, const String & binlog_checksum) +void MySQLClient::startBinlogDumpGTID(UInt32 slave_id, String replicate_db, std::unordered_set replicate_tables, String gtid_str, const String & binlog_checksum) { /// Maybe CRC32 or NONE. mysqlbinlog.cc use NONE, see its below comments: /// Make a notice to the server that this client is checksum-aware. @@ -165,6 +165,7 @@ void MySQLClient::startBinlogDumpGTID(UInt32 slave_id, String replicate_db, Stri /// Set Filter rule to replication. replication.setReplicateDatabase(replicate_db); + replication.setReplicateTables(replicate_tables); BinlogDumpGTID binlog_dump(slave_id, gtid_sets.toPayload()); packet_endpoint->sendPacket(binlog_dump, true); diff --git a/src/Core/MySQL/MySQLClient.h b/src/Core/MySQL/MySQLClient.h index 5b33a8f852b..9fa3ace6baa 100644 --- a/src/Core/MySQL/MySQLClient.h +++ b/src/Core/MySQL/MySQLClient.h @@ -22,7 +22,7 @@ class MySQLClient { public: MySQLClient(const String & host_, UInt16 port_, const String & user_, const String & password_); - MySQLClient(MySQLClient && other); + MySQLClient(MySQLClient && other) noexcept; void connect(); void disconnect(); @@ -33,7 +33,7 @@ public: /// Start replication stream by GTID. /// replicate_db: replication database schema, events from other databases will be ignored. /// gtid: executed gtid sets format like 'hhhhhhhh-hhhh-hhhh-hhhh-hhhhhhhhhhhh:x-y'. - void startBinlogDumpGTID(UInt32 slave_id, String replicate_db, String gtid, const String & binlog_checksum); + void startBinlogDumpGTID(UInt32 slave_id, String replicate_db, std::unordered_set replicate_tables, String gtid, const String & binlog_checksum); BinlogEventPtr readOneBinlogEvent(UInt64 milliseconds = 0); Position getPosition() const { return replication.getPosition(); } diff --git a/src/Core/MySQL/MySQLGtid.cpp b/src/Core/MySQL/MySQLGtid.cpp index bfd0bd02b45..43fa90b6160 100644 --- a/src/Core/MySQL/MySQLGtid.cpp +++ b/src/Core/MySQL/MySQLGtid.cpp @@ -21,7 +21,7 @@ void GTIDSet::tryMerge(size_t i) intervals.erase(intervals.begin() + i + 1, intervals.begin() + i + 1 + 1); } -void GTIDSets::parse(const String gtid_format) +void GTIDSets::parse(String gtid_format) { if (gtid_format.empty()) { diff --git a/src/Core/MySQL/MySQLGtid.h b/src/Core/MySQL/MySQLGtid.h index c8a571d2569..45eeaf02fa2 100644 --- a/src/Core/MySQL/MySQLGtid.h +++ b/src/Core/MySQL/MySQLGtid.h @@ -35,7 +35,7 @@ class GTIDSets public: std::vector sets; - void parse(const String gtid_format_); + void parse(String gtid_format_); void update(const GTID & other); String toString() const; diff --git a/src/Core/MySQL/MySQLReplication.cpp b/src/Core/MySQL/MySQLReplication.cpp index 50f6be23f83..1c1f6535550 100644 --- a/src/Core/MySQL/MySQLReplication.cpp +++ b/src/Core/MySQL/MySQLReplication.cpp @@ -142,8 +142,7 @@ namespace MySQLReplication out << "XID: " << this->xid << '\n'; } - /// https://dev.mysql.com/doc/internals/en/table-map-event.html - void TableMapEvent::parseImpl(ReadBuffer & payload) + void TableMapEventHeader::parse(ReadBuffer & payload) { payload.readStrict(reinterpret_cast(&table_id), 6); payload.readStrict(reinterpret_cast(&flags), 2); @@ -157,7 +156,11 @@ namespace MySQLReplication table.resize(table_len); payload.readStrict(reinterpret_cast(table.data()), table_len); payload.ignore(1); + } + /// https://dev.mysql.com/doc/internals/en/table-map-event.html + void TableMapEvent::parseImpl(ReadBuffer & payload) + { column_count = readLengthEncodedNumber(payload); for (auto i = 0U; i < column_count; ++i) { @@ -165,7 +168,6 @@ namespace MySQLReplication payload.readStrict(reinterpret_cast(&v), 1); column_type.emplace_back(v); } - String meta; readLengthEncodedString(meta, payload); parseMeta(meta); @@ -429,7 +431,7 @@ namespace MySQLReplication UInt32 i24 = 0; payload.readStrict(reinterpret_cast(&i24), 3); - const DayNum date_day_number(DateLUT::instance().makeDayNum( + const ExtendedDayNum date_day_number(DateLUT::instance().makeDayNum( static_cast((i24 >> 9) & 0x7fff), static_cast((i24 >> 5) & 0xf), static_cast(i24 & 0x1f)).toUnderType()); row.push_back(Field(date_day_number.toUnderType())); @@ -957,10 +959,20 @@ namespace MySQLReplication } case TABLE_MAP_EVENT: { - event = std::make_shared(std::move(event_header)); - event->parseEvent(event_payload); - auto table_map = std::static_pointer_cast(event); - table_maps[table_map->table_id] = table_map; + TableMapEventHeader map_event_header; + map_event_header.parse(event_payload); + if (doReplicate(map_event_header.schema, map_event_header.table)) + { + event = std::make_shared(std::move(event_header), map_event_header); + event->parseEvent(event_payload); + auto table_map = std::static_pointer_cast(event); + table_maps[table_map->table_id] = table_map; + } + else + { + event = std::make_shared(std::move(event_header)); + event->parseEvent(event_payload); + } break; } case WRITE_ROWS_EVENT_V1: @@ -1030,8 +1042,21 @@ namespace MySQLReplication // Special "dummy event" return false; } - auto table_map = table_maps.at(table_id); - return table_map->schema == replicate_do_db; + if (table_maps.contains(table_id)) + { + auto table_map = table_maps.at(table_id); + return (table_map->schema == replicate_do_db) && (replicate_tables.empty() || replicate_tables.contains(table_map->table)); + } + return false; + } + + bool MySQLFlavor::doReplicate(const String & db, const String & table_name) + { + if (replicate_do_db.empty()) + return false; + if (replicate_do_db != db) + return false; + return replicate_tables.empty() || table_name.empty() || replicate_tables.contains(table_name); } } diff --git a/src/Core/MySQL/MySQLReplication.h b/src/Core/MySQL/MySQLReplication.h index cb67ce73de9..8900eee0102 100644 --- a/src/Core/MySQL/MySQLReplication.h +++ b/src/Core/MySQL/MySQLReplication.h @@ -409,6 +409,20 @@ namespace MySQLReplication void parseImpl(ReadBuffer & payload) override; }; + class TableMapEventHeader + { + public: + UInt64 table_id; + UInt16 flags; + UInt8 schema_len; + String schema; + UInt8 table_len; + String table; + + TableMapEventHeader(): table_id(0), flags(0), schema_len(0), table_len(0) {} + void parse(ReadBuffer & payload); + }; + class TableMapEvent : public EventBase { public: @@ -423,7 +437,15 @@ namespace MySQLReplication std::vector column_meta; Bitmap null_bitmap; - TableMapEvent(EventHeader && header_) : EventBase(std::move(header_)), table_id(0), flags(0), schema_len(0), table_len(0), column_count(0) {} + TableMapEvent(EventHeader && header_, const TableMapEventHeader & map_event_header) : EventBase(std::move(header_)), column_count(0) + { + table_id = map_event_header.table_id; + flags = map_event_header.flags; + schema_len = map_event_header.schema_len; + schema = map_event_header.schema; + table_len = map_event_header.table_len; + table = map_event_header.table; + } void dump(WriteBuffer & out) const override; protected: @@ -563,6 +585,7 @@ namespace MySQLReplication Position getPosition() const override { return position; } BinlogEventPtr readOneEvent() override { return event; } void setReplicateDatabase(String db) override { replicate_do_db = std::move(db); } + void setReplicateTables(std::unordered_set tables) { replicate_tables = std::move(tables); } void setGTIDSets(GTIDSets sets) override { position.gtid_sets = std::move(sets); } void setChecksumSignatureLength(size_t checksum_signature_length_) override { checksum_signature_length = checksum_signature_length_; } @@ -570,10 +593,13 @@ namespace MySQLReplication Position position; BinlogEventPtr event; String replicate_do_db; + // only for filter data(Row Event), not include DDL Event + std::unordered_set replicate_tables; std::map > table_maps; size_t checksum_signature_length = 4; bool doReplicate(UInt64 table_id); + bool doReplicate(const String & db, const String & table_name); }; } diff --git a/src/Core/MySQL/PacketsConnection.cpp b/src/Core/MySQL/PacketsConnection.cpp index 32a8a9cf8ab..a2eaa0ba7ba 100644 --- a/src/Core/MySQL/PacketsConnection.cpp +++ b/src/Core/MySQL/PacketsConnection.cpp @@ -99,8 +99,8 @@ HandshakeResponse::HandshakeResponse() : capability_flags(0x00), max_packet_size HandshakeResponse::HandshakeResponse( UInt32 capability_flags_, UInt32 max_packet_size_, UInt8 character_set_, const String & username_, const String & database_, const String & auth_response_, const String & auth_plugin_name_) - : capability_flags(capability_flags_), max_packet_size(max_packet_size_), character_set(character_set_), username(std::move(username_)), - database(std::move(database_)), auth_response(std::move(auth_response_)), auth_plugin_name(std::move(auth_plugin_name_)) + : capability_flags(capability_flags_), max_packet_size(max_packet_size_), character_set(character_set_), username(username_), + database(database_), auth_response(auth_response_), auth_plugin_name(auth_plugin_name_) { } diff --git a/src/Core/PostgreSQLProtocol.h b/src/Core/PostgreSQLProtocol.h index dd26bf41b4a..6ccdcb4d524 100644 --- a/src/Core/PostgreSQLProtocol.h +++ b/src/Core/PostgreSQLProtocol.h @@ -152,7 +152,7 @@ private: WriteBuffer * out; public: - MessageTransport(WriteBuffer * out_) : in(nullptr), out(out_) {} + explicit MessageTransport(WriteBuffer * out_) : in(nullptr), out(out_) {} MessageTransport(ReadBuffer * in_, WriteBuffer * out_): in(in_), out(out_) {} @@ -257,7 +257,7 @@ public: Int32 payload_size; FirstMessage() = delete; - FirstMessage(int payload_size_) : payload_size(payload_size_) {} + explicit FirstMessage(int payload_size_) : payload_size(payload_size_) {} }; class CancelRequest : public FirstMessage @@ -266,7 +266,7 @@ public: Int32 process_id = 0; Int32 secret_key = 0; - CancelRequest(int payload_size_) : FirstMessage(payload_size_) {} + explicit CancelRequest(int payload_size_) : FirstMessage(payload_size_) {} void deserialize(ReadBuffer & in) override { @@ -391,7 +391,7 @@ public: // includes username, may also include database and other runtime parameters std::unordered_map parameters; - StartupMessage(Int32 payload_size_) : FirstMessage(payload_size_) {} + explicit StartupMessage(Int32 payload_size_) : FirstMessage(payload_size_) {} void deserialize(ReadBuffer & in) override { @@ -643,7 +643,7 @@ private: const std::vector & fields_descr; public: - RowDescription(const std::vector & fields_descr_) : fields_descr(fields_descr_) {} + explicit RowDescription(const std::vector & fields_descr_) : fields_descr(fields_descr_) {} void serialize(WriteBuffer & out) const override { @@ -673,7 +673,7 @@ class StringField : public ISerializable private: String str; public: - StringField(String str_) : str(str_) {} + explicit StringField(String str_) : str(str_) {} void serialize(WriteBuffer & out) const override { @@ -703,7 +703,7 @@ private: const std::vector> & row; public: - DataRow(const std::vector> & row_) : row(row_) {} + explicit DataRow(const std::vector> & row_) : row(row_) {} void serialize(WriteBuffer & out) const override { @@ -886,7 +886,7 @@ private: std::unordered_map> type_to_method = {}; public: - AuthenticationManager(const std::vector> & auth_methods) + explicit AuthenticationManager(const std::vector> & auth_methods) { for (const std::shared_ptr & method : auth_methods) { diff --git a/src/Core/ProtocolDefines.h b/src/Core/ProtocolDefines.h index 93f44b02ce3..6ee491f3ab5 100644 --- a/src/Core/ProtocolDefines.h +++ b/src/Core/ProtocolDefines.h @@ -8,7 +8,6 @@ #define DBMS_MIN_REVISION_WITH_SERVER_DISPLAY_NAME 54372 #define DBMS_MIN_REVISION_WITH_VERSION_PATCH 54401 #define DBMS_MIN_REVISION_WITH_SERVER_LOGS 54406 -#define DBMS_MIN_REVISION_WITH_CLIENT_SUPPORT_EMBEDDED_DATA 54415 /// Minimum revision with exactly the same set of aggregation methods and rules to select them. /// Two-level (bucketed) aggregation is incompatible if servers are inconsistent in these rules /// (keys will be placed in different buckets and result will not be fully aggregated). diff --git a/src/Core/QualifiedTableName.h b/src/Core/QualifiedTableName.h index 4642465f461..3310130629d 100644 --- a/src/Core/QualifiedTableName.h +++ b/src/Core/QualifiedTableName.h @@ -72,7 +72,7 @@ struct QualifiedTableName QualifiedTableName name; if (pos == std::string::npos) { - name.table = std::move(maybe_qualified_name); + name.table = maybe_qualified_name; } else if (maybe_qualified_name.find('.', pos + 1) != std::string::npos) { @@ -119,7 +119,7 @@ namespace fmt template <> struct formatter { - constexpr auto parse(format_parse_context & ctx) + static constexpr auto parse(format_parse_context & ctx) { return ctx.begin(); } diff --git a/src/Core/Settings.cpp b/src/Core/Settings.cpp index 87d7eee0daa..411e73bdf1a 100644 --- a/src/Core/Settings.cpp +++ b/src/Core/Settings.cpp @@ -89,6 +89,14 @@ void Settings::addProgramOptions(boost::program_options::options_description & o } } +void Settings::addProgramOptionsAsMultitokens(boost::program_options::options_description & options) +{ + for (const auto & field : all()) + { + addProgramOptionAsMultitoken(options, field); + } +} + void Settings::addProgramOption(boost::program_options::options_description & options, const SettingFieldRef & field) { const std::string_view name = field.getName(); @@ -97,6 +105,14 @@ void Settings::addProgramOption(boost::program_options::options_description & op name.data(), boost::program_options::value()->composing()->notifier(on_program_option), field.getDescription()))); } +void Settings::addProgramOptionAsMultitoken(boost::program_options::options_description & options, const SettingFieldRef & field) +{ + const std::string_view name = field.getName(); + auto on_program_option = boost::function1([this, name](const Strings & values) { set(name, values.back()); }); + options.add(boost::shared_ptr(new boost::program_options::option_description( + name.data(), boost::program_options::value()->multitoken()->composing()->notifier(on_program_option), field.getDescription()))); +} + void Settings::checkNoSettingNamesAtTopLevel(const Poco::Util::AbstractConfiguration & config, const String & config_path) { if (config.getBool("skip_check_for_incorrect_settings", false)) diff --git a/src/Core/Settings.h b/src/Core/Settings.h index 81613093a72..072d79b1d90 100644 --- a/src/Core/Settings.h +++ b/src/Core/Settings.h @@ -475,7 +475,7 @@ class IColumn; M(Bool, allow_experimental_database_materialized_mysql, false, "Allow to create database with Engine=MaterializedMySQL(...).", 0) \ M(Bool, allow_experimental_database_materialized_postgresql, false, "Allow to create database with Engine=MaterializedPostgreSQL(...).", 0) \ M(Bool, system_events_show_zero_values, false, "Include all metrics, even with zero values", 0) \ - M(MySQLDataTypesSupport, mysql_datatypes_support_level, 0, "Which MySQL types should be converted to corresponding ClickHouse types (rather than being represented as String). Can be empty or any combination of 'decimal' or 'datetime64'. When empty MySQL's DECIMAL and DATETIME/TIMESTAMP with non-zero precision are seen as String on ClickHouse's side.", 0) \ + M(MySQLDataTypesSupport, mysql_datatypes_support_level, 0, "Which MySQL types should be converted to corresponding ClickHouse types (rather than being represented as String). Can be empty or any combination of 'decimal', 'datetime64', 'date2Date32' or 'date2String'. When empty MySQL's DECIMAL and DATETIME/TIMESTAMP with non-zero precision are seen as String on ClickHouse's side.", 0) \ M(Bool, optimize_trivial_insert_select, true, "Optimize trivial 'INSERT INTO table SELECT ... FROM TABLES' query", 0) \ M(Bool, allow_non_metadata_alters, true, "Allow to execute alters which affects not only tables metadata, but also data on disk", 0) \ M(Bool, enable_global_with_statement, true, "Propagate WITH statements to UNION queries and all subqueries", 0) \ @@ -537,7 +537,7 @@ class IColumn; M(Int64, read_priority, 0, "Priority to read data from local filesystem. Only supported for 'pread_threadpool' method.", 0) \ M(UInt64, merge_tree_min_rows_for_concurrent_read_for_remote_filesystem, (20 * 8192), "If at least as many lines are read from one file, the reading can be parallelized, when reading from remote filesystem.", 0) \ M(UInt64, merge_tree_min_bytes_for_concurrent_read_for_remote_filesystem, (24 * 10 * 1024 * 1024), "If at least as many bytes are read from one file, the reading can be parallelized, when reading from remote filesystem.", 0) \ - M(UInt64, remote_read_min_bytes_for_seek, DBMS_DEFAULT_BUFFER_SIZE, "Min bytes required for remote read (url, s3) to do seek, instead for read with ignore.", 0) \ + M(UInt64, remote_read_min_bytes_for_seek, 4 * DBMS_DEFAULT_BUFFER_SIZE, "Min bytes required for remote read (url, s3) to do seek, instead for read with ignore.", 0) \ \ M(UInt64, async_insert_threads, 16, "Maximum number of threads to actually parse and insert data in background. Zero means asynchronous mode is disabled", 0) \ M(Bool, async_insert, false, "If true, data from INSERT query is stored in queue and later flushed to table in background. Makes sense only for inserts via HTTP protocol. If wait_for_async_insert is false, INSERT query is processed almost instantly, otherwise client will wait until data will be flushed to table", 0) \ @@ -547,8 +547,10 @@ class IColumn; M(Milliseconds, async_insert_busy_timeout_ms, 200, "Maximum time to wait before dumping collected data per query since the first data appeared", 0) \ M(Milliseconds, async_insert_stale_timeout_ms, 0, "Maximum time to wait before dumping collected data per query since the last data appeared. Zero means no timeout at all", 0) \ \ - M(Int64, remote_fs_read_max_backoff_ms, 10000, "Max wait time when trying to read data for remote disk", 0) \ - M(Int64, remote_fs_read_backoff_max_tries, 5, "Max attempts to read with backoff", 0) \ + M(UInt64, remote_fs_read_max_backoff_ms, 10000, "Max wait time when trying to read data for remote disk", 0) \ + M(UInt64, remote_fs_read_backoff_max_tries, 5, "Max attempts to read with backoff", 0) \ + M(Bool, remote_fs_enable_cache, true, "Use cache for remote filesystem. This setting does not turn on/off cache for disks (must me done via disk config), but allows to bypass cache for some queries if intended", 0) \ + M(UInt64, remote_fs_cache_max_wait_sec, 5, "Allow to wait a most this number of seconds for download of current remote_fs_buffer_size bytes, and skip cache if exceeded", 0) \ \ M(UInt64, http_max_tries, 10, "Max attempts to read via http.", 0) \ M(UInt64, http_retry_initial_backoff_ms, 100, "Min milliseconds for backoff, when retrying read via http", 0) \ @@ -558,6 +560,8 @@ class IColumn; M(Bool, check_table_dependencies, true, "Check that DDL query (such as DROP TABLE or RENAME) will not break dependencies", 0) \ M(Bool, use_local_cache_for_remote_storage, true, "Use local cache for remote storage like HDFS or S3, it's used for remote table engine only", 0) \ \ + M(Bool, allow_unrestricted_reads_from_keeper, false, "Allow unrestricted (w/o condition on path) reads from system.zookeeper table, can be handy, but is not safe for zookeeper", 0) \ + \ /** Experimental functions */ \ M(Bool, allow_experimental_funnel_functions, false, "Enable experimental functions for funnel analysis.", 0) \ M(Bool, allow_experimental_nlp_functions, false, "Enable experimental functions for natural language processing.", 0) \ @@ -605,6 +609,7 @@ class IColumn; M(Bool, input_format_tsv_empty_as_default, false, "Treat empty fields in TSV input as default values.", 0) \ M(Bool, input_format_tsv_enum_as_number, false, "Treat inserted enum values in TSV formats as enum indices \\N", 0) \ M(Bool, input_format_null_as_default, true, "For text input formats initialize null fields with default values if data type of this field is not nullable", 0) \ + M(Bool, input_format_use_lowercase_column_name, false, "Use lowercase column name while reading input formats", 0) \ M(Bool, input_format_arrow_import_nested, false, "Allow to insert array of structs into Nested table in Arrow input format.", 0) \ M(Bool, input_format_orc_import_nested, false, "Allow to insert array of structs into Nested table in ORC input format.", 0) \ M(Int64, input_format_orc_row_batch_size, 100'000, "Batch size when reading ORC stripes.", 0) \ @@ -620,7 +625,7 @@ class IColumn; M(MsgPackUUIDRepresentation, output_format_msgpack_uuid_representation, FormatSettings::MsgPackUUIDRepresentation::EXT, "The way how to output UUID in MsgPack format.", 0) \ M(UInt64, input_format_max_rows_to_read_for_schema_inference, 100, "The maximum rows of data to read for automatic schema inference", 0) \ \ - M(DateTimeInputFormat, date_time_input_format, FormatSettings::DateTimeInputFormat::Basic, "Method to read DateTime from text input formats. Possible values: 'basic' and 'best_effort'.", 0) \ + M(DateTimeInputFormat, date_time_input_format, FormatSettings::DateTimeInputFormat::Basic, "Method to read DateTime from text input formats. Possible values: 'basic', 'best_effort' and 'best_effort_us'.", 0) \ M(DateTimeOutputFormat, date_time_output_format, FormatSettings::DateTimeOutputFormat::Simple, "Method to write DateTime to text output. Possible values: 'simple', 'iso', 'unix_timestamp'.", 0) \ \ M(String, bool_true_representation, "true", "Text to represent bool value in TSV/CSV formats.", 0) \ @@ -720,6 +725,11 @@ struct Settings : public BaseSettings, public IHints<2, Settings /// (Don't forget to call notify() on the `variables_map` after parsing it!) void addProgramOptions(boost::program_options::options_description & options); + /// Adds program options as to set the settings from a command line. + /// Allows to set one setting multiple times, the last value will be used. + /// (Don't forget to call notify() on the `variables_map` after parsing it!) + void addProgramOptionsAsMultitokens(boost::program_options::options_description & options); + /// Check that there is no user-level settings at the top level in config. /// This is a common source of mistake (user don't know where to write user-level setting). static void checkNoSettingNamesAtTopLevel(const Poco::Util::AbstractConfiguration & config, const String & config_path); @@ -727,6 +737,8 @@ struct Settings : public BaseSettings, public IHints<2, Settings std::vector getAllRegisteredNames() const override; void addProgramOption(boost::program_options::options_description & options, const SettingFieldRef & field); + + void addProgramOptionAsMultitoken(boost::program_options::options_description & options, const SettingFieldRef & field); }; /* diff --git a/src/Core/SettingsEnums.cpp b/src/Core/SettingsEnums.cpp index 17d24946cd8..ddd1c29785c 100644 --- a/src/Core/SettingsEnums.cpp +++ b/src/Core/SettingsEnums.cpp @@ -64,7 +64,8 @@ IMPLEMENT_SETTING_ENUM(DistributedProductMode, ErrorCodes::UNKNOWN_DISTRIBUTED_P IMPLEMENT_SETTING_ENUM_WITH_RENAME(DateTimeInputFormat, ErrorCodes::BAD_ARGUMENTS, {{"basic", FormatSettings::DateTimeInputFormat::Basic}, - {"best_effort", FormatSettings::DateTimeInputFormat::BestEffort}}) + {"best_effort", FormatSettings::DateTimeInputFormat::BestEffort}, + {"best_effort_us", FormatSettings::DateTimeInputFormat::BestEffortUS}}) IMPLEMENT_SETTING_ENUM_WITH_RENAME(DateTimeOutputFormat, ErrorCodes::BAD_ARGUMENTS, @@ -105,7 +106,9 @@ IMPLEMENT_SETTING_ENUM_WITH_RENAME(DefaultTableEngine, ErrorCodes::BAD_ARGUMENTS IMPLEMENT_SETTING_MULTI_ENUM(MySQLDataTypesSupport, ErrorCodes::UNKNOWN_MYSQL_DATATYPES_SUPPORT_LEVEL, {{"decimal", MySQLDataTypesSupport::DECIMAL}, - {"datetime64", MySQLDataTypesSupport::DATETIME64}}) + {"datetime64", MySQLDataTypesSupport::DATETIME64}, + {"date2Date32", MySQLDataTypesSupport::DATE2DATE32}, + {"date2String", MySQLDataTypesSupport::DATE2STRING}}) IMPLEMENT_SETTING_ENUM(UnionMode, ErrorCodes::UNKNOWN_UNION, {{"", UnionMode::Unspecified}, diff --git a/src/Core/SettingsEnums.h b/src/Core/SettingsEnums.h index 27994529a0b..47bd4b9a928 100644 --- a/src/Core/SettingsEnums.h +++ b/src/Core/SettingsEnums.h @@ -138,7 +138,8 @@ enum class MySQLDataTypesSupport { DECIMAL, // convert MySQL's decimal and number to ClickHouse Decimal when applicable DATETIME64, // convert MySQL's DATETIME and TIMESTAMP and ClickHouse DateTime64 if precision is > 0 or range is greater that for DateTime. - // ENUM + DATE2DATE32, // convert MySQL's date type to ClickHouse Date32 + DATE2STRING // convert MySQL's date type to ClickHouse String(This is usually used when your mysql date is less than 1925) }; DECLARE_SETTING_MULTI_ENUM(MySQLDataTypesSupport) diff --git a/src/Core/SettingsFields.h b/src/Core/SettingsFields.h index b27763ad0d6..474786eb963 100644 --- a/src/Core/SettingsFields.h +++ b/src/Core/SettingsFields.h @@ -43,7 +43,7 @@ struct SettingFieldNumber SettingFieldNumber & operator=(Type x) { value = x; changed = true; return *this; } SettingFieldNumber & operator=(const Field & f); - operator Type() const { return value; } + operator Type() const { return value; } /// NOLINT explicit operator Field() const { return value; } String toString() const; @@ -75,7 +75,7 @@ struct SettingFieldMaxThreads SettingFieldMaxThreads & operator=(UInt64 x) { is_auto = !x; value = is_auto ? getAuto() : x; changed = true; return *this; } SettingFieldMaxThreads & operator=(const Field & f); - operator UInt64() const { return value; } + operator UInt64() const { return value; } /// NOLINT explicit operator Field() const { return value; } /// Writes "auto()" instead of simple "" if `is_auto==true`. @@ -118,10 +118,10 @@ struct SettingFieldTimespan SettingFieldTimespan & operator =(UInt64 x) { *this = Poco::Timespan{static_cast(x * microseconds_per_unit)}; return *this; } SettingFieldTimespan & operator =(const Field & f); - operator Poco::Timespan() const { return value; } + operator Poco::Timespan() const { return value; } /// NOLINT template > - operator std::chrono::duration() const { return std::chrono::duration_cast>(std::chrono::microseconds(value.totalMicroseconds())); } + operator std::chrono::duration() const { return std::chrono::duration_cast>(std::chrono::microseconds(value.totalMicroseconds())); } /// NOLINT explicit operator UInt64() const { return value.totalMicroseconds() / microseconds_per_unit; } explicit operator Field() const { return operator UInt64(); } @@ -158,7 +158,7 @@ struct SettingFieldString SettingFieldString & operator =(const char * str) { *this = std::string_view{str}; return *this; } SettingFieldString & operator =(const Field & f) { *this = f.safeGet(); return *this; } - operator const String &() const { return value; } + operator const String &() const { return value; } /// NOLINT explicit operator Field() const { return value; } const String & toString() const { return value; } @@ -181,7 +181,7 @@ public: SettingFieldChar & operator =(char c) { value = c; changed = true; return *this; } SettingFieldChar & operator =(const Field & f); - operator char() const { return value; } + operator char() const { return value; } /// NOLINT explicit operator Field() const { return toString(); } String toString() const { return String(&value, 1); } @@ -207,7 +207,7 @@ struct SettingFieldURI SettingFieldURI & operator =(const char * str) { *this = Poco::URI{str}; return *this; } SettingFieldURI & operator =(const Field & f) { *this = f.safeGet(); return *this; } - operator const Poco::URI &() const { return value; } + operator const Poco::URI &() const { return value; } /// NOLINT explicit operator String() const { return toString(); } explicit operator Field() const { return toString(); } @@ -244,7 +244,7 @@ struct SettingFieldEnum SettingFieldEnum & operator =(EnumType x) { value = x; changed = true; return *this; } SettingFieldEnum & operator =(const Field & f) { *this = Traits::fromString(f.safeGet()); return *this; } - operator EnumType() const { return value; } + operator EnumType() const { return value; } /// NOLINT explicit operator Field() const { return toString(); } String toString() const { return Traits::toString(value); } @@ -272,12 +272,15 @@ void SettingFieldEnum::readBinary(ReadBuffer & in) *this = Traits::fromString(SettingFieldEnumHelpers::readBinary(in)); } +/// NOLINTNEXTLINE #define DECLARE_SETTING_ENUM(ENUM_TYPE) \ DECLARE_SETTING_ENUM_WITH_RENAME(ENUM_TYPE, ENUM_TYPE) +/// NOLINTNEXTLINE #define IMPLEMENT_SETTING_ENUM(ENUM_TYPE, ERROR_CODE_FOR_UNEXPECTED_NAME, ...) \ IMPLEMENT_SETTING_ENUM_WITH_RENAME(ENUM_TYPE, ERROR_CODE_FOR_UNEXPECTED_NAME, __VA_ARGS__) +/// NOLINTNEXTLINE #define DECLARE_SETTING_ENUM_WITH_RENAME(NEW_NAME, ENUM_TYPE) \ struct SettingField##NEW_NAME##Traits \ { \ @@ -288,6 +291,7 @@ void SettingFieldEnum::readBinary(ReadBuffer & in) \ using SettingField##NEW_NAME = SettingFieldEnum; +/// NOLINTNEXTLINE #define IMPLEMENT_SETTING_ENUM_WITH_RENAME(NEW_NAME, ERROR_CODE_FOR_UNEXPECTED_NAME, ...) \ const String & SettingField##NEW_NAME##Traits::toString(typename SettingField##NEW_NAME::EnumType value) \ { \ @@ -346,7 +350,7 @@ struct SettingFieldMultiEnum explicit SettingFieldMultiEnum(StorageType s) : value(s) {} explicit SettingFieldMultiEnum(const Field & f) : value(parseValueFromString(f.safeGet())) {} - operator ValueType() const { return value; } + operator ValueType() const { return value; } /// NOLINT explicit operator StorageType() const { return value.getValue(); } explicit operator Field() const { return toString(); } @@ -368,7 +372,7 @@ struct SettingFieldMultiEnum } } - if (result.size() > 0) + if (!result.empty()) result.erase(result.size() - separator.size()); return result; @@ -415,9 +419,11 @@ void SettingFieldMultiEnum::readBinary(ReadBuffer & in) parseFromString(SettingFieldEnumHelpers::readBinary(in)); } +/// NOLINTNEXTLINE #define DECLARE_SETTING_MULTI_ENUM(ENUM_TYPE) \ DECLARE_SETTING_MULTI_ENUM_WITH_RENAME(ENUM_TYPE, ENUM_TYPE) +/// NOLINTNEXTLINE #define DECLARE_SETTING_MULTI_ENUM_WITH_RENAME(ENUM_TYPE, NEW_NAME) \ struct SettingField##NEW_NAME##Traits \ { \ @@ -429,9 +435,11 @@ void SettingFieldMultiEnum::readBinary(ReadBuffer & in) \ using SettingField##NEW_NAME = SettingFieldMultiEnum; +/// NOLINTNEXTLINE #define IMPLEMENT_SETTING_MULTI_ENUM(ENUM_TYPE, ERROR_CODE_FOR_UNEXPECTED_NAME, ...) \ IMPLEMENT_SETTING_MULTI_ENUM_WITH_RENAME(ENUM_TYPE, ERROR_CODE_FOR_UNEXPECTED_NAME, __VA_ARGS__) +/// NOLINTNEXTLINE #define IMPLEMENT_SETTING_MULTI_ENUM_WITH_RENAME(NEW_NAME, ERROR_CODE_FOR_UNEXPECTED_NAME, ...) \ IMPLEMENT_SETTING_ENUM_WITH_RENAME(NEW_NAME, ERROR_CODE_FOR_UNEXPECTED_NAME, __VA_ARGS__)\ size_t SettingField##NEW_NAME##Traits::getEnumSize() {\ diff --git a/src/Core/SortCursor.h b/src/Core/SortCursor.h index dd804bd4675..a5daba9fbee 100644 --- a/src/Core/SortCursor.h +++ b/src/Core/SortCursor.h @@ -53,7 +53,7 @@ struct SortCursorImpl */ IColumn::Permutation * permutation = nullptr; - SortCursorImpl() {} + SortCursorImpl() = default; SortCursorImpl(const Block & block, const SortDescription & desc_, size_t order_ = 0, IColumn::Permutation * perm = nullptr) : desc(desc_), sort_columns_size(desc.size()), order(order_), need_collation(desc.size()) @@ -140,7 +140,7 @@ struct SortCursorHelper const Derived & derived() const { return static_cast(*this); } - SortCursorHelper(SortCursorImpl * impl_) : impl(impl_) {} + explicit SortCursorHelper(SortCursorImpl * impl_) : impl(impl_) {} SortCursorImpl * operator-> () { return impl; } const SortCursorImpl * operator-> () const { return impl; } @@ -245,7 +245,7 @@ public: SortingHeap() = default; template - SortingHeap(Cursors & cursors) + explicit SortingHeap(Cursors & cursors) { size_t size = cursors.size(); queue.reserve(size); diff --git a/src/Core/examples/coro.cpp b/src/Core/examples/coro.cpp index 0f152d8090a..ecff0e23d11 100644 --- a/src/Core/examples/coro.cpp +++ b/src/Core/examples/coro.cpp @@ -84,7 +84,7 @@ struct Task std::cout << " Task " << tag << std::endl; } Task(Task &) = delete; - Task(Task &&rhs) : my(rhs.my), tag(rhs.tag) + Task(Task &&rhs) noexcept : my(rhs.my), tag(rhs.tag) { rhs.my = {}; std::cout << " Task&& " << tag << std::endl; diff --git a/src/Core/examples/mysql_protocol.cpp b/src/Core/examples/mysql_protocol.cpp index 1b81d856c9a..396bc6f7e9b 100644 --- a/src/Core/examples/mysql_protocol.cpp +++ b/src/Core/examples/mysql_protocol.cpp @@ -330,7 +330,7 @@ int main(int argc, char ** argv) /// Connect to the master. slave.connect(); - slave.startBinlogDumpGTID(slave_id, replicate_db, gtid_sets, binlog_checksum); + slave.startBinlogDumpGTID(slave_id, replicate_db, {}, gtid_sets, binlog_checksum); WriteBufferFromOStream cerr(std::cerr); diff --git a/src/Core/tests/gtest_settings.cpp b/src/Core/tests/gtest_settings.cpp index 8833d86c397..46d8f9665dc 100644 --- a/src/Core/tests/gtest_settings.cpp +++ b/src/Core/tests/gtest_settings.cpp @@ -53,6 +53,29 @@ GTEST_TEST(SettingMySQLDataTypesSupport, WithDECIMAL) ASSERT_EQ(Field("decimal"), setting); } +GTEST_TEST(SettingMySQLDataTypesSupport, WithDATE) +{ + SettingMySQLDataTypesSupport setting; + setting = String("date2Date32"); + ASSERT_EQ(4, setting.value.getValue()); + + ASSERT_TRUE(setting.value.isSet(MySQLDataTypesSupport::DATE2DATE32)); + ASSERT_FALSE(setting.value.isSet(MySQLDataTypesSupport::DECIMAL)); + ASSERT_FALSE(setting.value.isSet(MySQLDataTypesSupport::DATETIME64)); + + ASSERT_EQ("date2Date32", setting.toString()); + ASSERT_EQ(Field("date2Date32"), setting); + + setting = String("date2String"); + ASSERT_EQ(8, setting.value.getValue()); + + ASSERT_TRUE(setting.value.isSet(MySQLDataTypesSupport::DATE2STRING)); + ASSERT_FALSE(setting.value.isSet(MySQLDataTypesSupport::DATE2DATE32)); + + ASSERT_EQ("date2String", setting.toString()); + ASSERT_EQ(Field("date2String"), setting); +} + GTEST_TEST(SettingMySQLDataTypesSupport, With1) { // Setting can be initialized with int value corresponding to DECIMAL diff --git a/src/DataTypes/DataTypeArray.h b/src/DataTypes/DataTypeArray.h index 564dbba8503..122ac8e03a3 100644 --- a/src/DataTypes/DataTypeArray.h +++ b/src/DataTypes/DataTypeArray.h @@ -17,7 +17,7 @@ private: public: static constexpr bool is_parametric = true; - DataTypeArray(const DataTypePtr & nested_); + explicit DataTypeArray(const DataTypePtr & nested_); TypeIndex getTypeId() const override { return TypeIndex::Array; } diff --git a/src/DataTypes/DataTypeCustom.h b/src/DataTypes/DataTypeCustom.h index 55796e3cc7a..e8e4160af07 100644 --- a/src/DataTypes/DataTypeCustom.h +++ b/src/DataTypes/DataTypeCustom.h @@ -19,7 +19,7 @@ class IColumn; class IDataTypeCustomName { public: - virtual ~IDataTypeCustomName() {} + virtual ~IDataTypeCustomName() = default; virtual String getName() const = 0; }; @@ -33,7 +33,7 @@ struct DataTypeCustomDesc DataTypeCustomNamePtr name; SerializationPtr serialization; - DataTypeCustomDesc( + explicit DataTypeCustomDesc( DataTypeCustomNamePtr name_, SerializationPtr serialization_ = nullptr) : name(std::move(name_)) @@ -49,7 +49,7 @@ class DataTypeCustomFixedName : public IDataTypeCustomName private: String name; public: - DataTypeCustomFixedName(String name_) : name(name_) {} + explicit DataTypeCustomFixedName(String name_) : name(name_) {} String getName() const override { return name; } }; diff --git a/src/DataTypes/DataTypeCustomSimpleAggregateFunction.h b/src/DataTypes/DataTypeCustomSimpleAggregateFunction.h index dc054144e14..926dfd9cc82 100644 --- a/src/DataTypes/DataTypeCustomSimpleAggregateFunction.h +++ b/src/DataTypes/DataTypeCustomSimpleAggregateFunction.h @@ -34,7 +34,7 @@ public: DataTypeCustomSimpleAggregateFunction(const AggregateFunctionPtr & function_, const DataTypes & argument_types_, const Array & parameters_) : function(function_), argument_types(argument_types_), parameters(parameters_) {} - const AggregateFunctionPtr getFunction() const { return function; } + AggregateFunctionPtr getFunction() const { return function; } String getName() const override; static void checkSupportedFunctions(const AggregateFunctionPtr & function); }; diff --git a/src/DataTypes/DataTypeDateTime.h b/src/DataTypes/DataTypeDateTime.h index 57052144216..91a09ff7cb9 100644 --- a/src/DataTypes/DataTypeDateTime.h +++ b/src/DataTypes/DataTypeDateTime.h @@ -16,7 +16,7 @@ namespace DB * * To cast from/to text format, time zone may be specified explicitly or implicit time zone may be used. * - * Time zone may be specified explicitly as type parameter, example: DateTime('Europe/Moscow'). + * Time zone may be specified explicitly as type parameter, example: DateTime('Pacific/Pitcairn'). * As it does not affect the internal representation of values, * all types with different time zones are equivalent and may be used interchangingly. * Time zone only affects parsing and displaying in text formats. @@ -48,4 +48,3 @@ public: }; } - diff --git a/src/DataTypes/DataTypeDecimalBase.h b/src/DataTypes/DataTypeDecimalBase.h index bdb39978825..9e37de8a35b 100644 --- a/src/DataTypes/DataTypeDecimalBase.h +++ b/src/DataTypes/DataTypeDecimalBase.h @@ -172,14 +172,14 @@ inline auto decimalResultType(const DecimalType & tx, const DecimalType & } template typename DecimalType> -inline const DecimalType decimalResultType(const DecimalType & tx, const DataTypeNumber & ty) +inline DecimalType decimalResultType(const DecimalType & tx, const DataTypeNumber & ty) { const auto result_trait = DecimalUtils::binaryOpResult(tx, ty); return DecimalType(result_trait.precision, result_trait.scale); } template typename DecimalType> -inline const DecimalType decimalResultType(const DataTypeNumber & tx, const DecimalType & ty) +inline DecimalType decimalResultType(const DataTypeNumber & tx, const DecimalType & ty) { const auto result_trait = DecimalUtils::binaryOpResult(tx, ty); return DecimalType(result_trait.precision, result_trait.scale); diff --git a/src/DataTypes/DataTypeFactory.h b/src/DataTypes/DataTypeFactory.h index 81d7d991bdc..e7b638b6d7b 100644 --- a/src/DataTypes/DataTypeFactory.h +++ b/src/DataTypes/DataTypeFactory.h @@ -51,7 +51,6 @@ public: private: const Value & findCreatorByName(const String & family_name) const; -private: DataTypesDictionary data_types; /// Case insensitive data types will be additionally added here with lowercased name. diff --git a/src/DataTypes/DataTypeFixedString.h b/src/DataTypes/DataTypeFixedString.h index a53fde42b29..7c089866b23 100644 --- a/src/DataTypes/DataTypeFixedString.h +++ b/src/DataTypes/DataTypeFixedString.h @@ -29,7 +29,7 @@ public: static constexpr bool is_parametric = true; static constexpr auto type_id = TypeIndex::FixedString; - DataTypeFixedString(size_t n_) : n(n_) + explicit DataTypeFixedString(size_t n_) : n(n_) { if (n == 0) throw Exception("FixedString size must be positive", ErrorCodes::ARGUMENT_OUT_OF_BOUND); diff --git a/src/DataTypes/DataTypeFunction.h b/src/DataTypes/DataTypeFunction.h index 489ed4545f4..888bcb6a775 100644 --- a/src/DataTypes/DataTypeFunction.h +++ b/src/DataTypes/DataTypeFunction.h @@ -19,7 +19,7 @@ public: bool isParametric() const override { return true; } /// Some types could be still unknown. - DataTypeFunction(const DataTypes & argument_types_ = DataTypes(), const DataTypePtr & return_type_ = nullptr) + explicit DataTypeFunction(const DataTypes & argument_types_ = DataTypes(), const DataTypePtr & return_type_ = nullptr) : argument_types(argument_types_), return_type(return_type_) {} std::string doGetName() const override; diff --git a/src/DataTypes/DataTypeInterval.h b/src/DataTypes/DataTypeInterval.h index 9ef6237ec41..83d89a73460 100644 --- a/src/DataTypes/DataTypeInterval.h +++ b/src/DataTypes/DataTypeInterval.h @@ -25,7 +25,7 @@ public: IntervalKind getKind() const { return kind; } - DataTypeInterval(IntervalKind kind_) : kind(kind_) {} + explicit DataTypeInterval(IntervalKind kind_) : kind(kind_) {} std::string doGetName() const override { return fmt::format("Interval{}", kind.toString()); } const char * getFamilyName() const override { return "Interval"; } diff --git a/src/DataTypes/DataTypeMap.h b/src/DataTypes/DataTypeMap.h index 04377f85cfb..65bdd93ca4d 100644 --- a/src/DataTypes/DataTypeMap.h +++ b/src/DataTypes/DataTypeMap.h @@ -23,7 +23,7 @@ private: public: static constexpr bool is_parametric = true; - DataTypeMap(const DataTypes & elems); + explicit DataTypeMap(const DataTypes & elems); DataTypeMap(const DataTypePtr & key_type_, const DataTypePtr & value_type_); TypeIndex getTypeId() const override { return TypeIndex::Map; } diff --git a/src/DataTypes/DataTypeTuple.h b/src/DataTypes/DataTypeTuple.h index c56e87ca22d..db122aae5df 100644 --- a/src/DataTypes/DataTypeTuple.h +++ b/src/DataTypes/DataTypeTuple.h @@ -26,7 +26,7 @@ private: public: static constexpr bool is_parametric = true; - DataTypeTuple(const DataTypes & elems); + explicit DataTypeTuple(const DataTypes & elems); DataTypeTuple(const DataTypes & elems, const Strings & names, bool serialize_names_ = true); static bool canBeCreatedWithNames(const Strings & names); diff --git a/src/DataTypes/DataTypesDecimal.h b/src/DataTypes/DataTypesDecimal.h index fb590dd1d4b..0ec29e3c5f4 100644 --- a/src/DataTypes/DataTypesDecimal.h +++ b/src/DataTypes/DataTypesDecimal.h @@ -60,26 +60,26 @@ inline const DataTypeDecimal * checkDecimal(const IDataType & data_type) inline UInt32 getDecimalScale(const IDataType & data_type, UInt32 default_value = std::numeric_limits::max()) { - if (auto * decimal_type = checkDecimal(data_type)) + if (const auto * decimal_type = checkDecimal(data_type)) return decimal_type->getScale(); - if (auto * decimal_type = checkDecimal(data_type)) + if (const auto * decimal_type = checkDecimal(data_type)) return decimal_type->getScale(); - if (auto * decimal_type = checkDecimal(data_type)) + if (const auto * decimal_type = checkDecimal(data_type)) return decimal_type->getScale(); - if (auto * decimal_type = checkDecimal(data_type)) + if (const auto * decimal_type = checkDecimal(data_type)) return decimal_type->getScale(); return default_value; } inline UInt32 getDecimalPrecision(const IDataType & data_type) { - if (auto * decimal_type = checkDecimal(data_type)) + if (const auto * decimal_type = checkDecimal(data_type)) return decimal_type->getPrecision(); - if (auto * decimal_type = checkDecimal(data_type)) + if (const auto * decimal_type = checkDecimal(data_type)) return decimal_type->getPrecision(); - if (auto * decimal_type = checkDecimal(data_type)) + if (const auto * decimal_type = checkDecimal(data_type)) return decimal_type->getPrecision(); - if (auto * decimal_type = checkDecimal(data_type)) + if (const auto * decimal_type = checkDecimal(data_type)) return decimal_type->getPrecision(); return 0; } diff --git a/src/DataTypes/IDataType.h b/src/DataTypes/IDataType.h index 5bc089e085f..36e1ce8ddd5 100644 --- a/src/DataTypes/IDataType.h +++ b/src/DataTypes/IDataType.h @@ -318,12 +318,12 @@ struct WhichDataType { TypeIndex idx; - constexpr WhichDataType(TypeIndex idx_ = TypeIndex::Nothing) : idx(idx_) {} - constexpr WhichDataType(const IDataType & data_type) : idx(data_type.getTypeId()) {} - constexpr WhichDataType(const IDataType * data_type) : idx(data_type->getTypeId()) {} + constexpr WhichDataType(TypeIndex idx_ = TypeIndex::Nothing) : idx(idx_) {} /// NOLINT + constexpr WhichDataType(const IDataType & data_type) : idx(data_type.getTypeId()) {} /// NOLINT + constexpr WhichDataType(const IDataType * data_type) : idx(data_type->getTypeId()) {} /// NOLINT // shared ptr -> is non-constexpr in gcc - WhichDataType(const DataTypePtr & data_type) : idx(data_type->getTypeId()) {} + WhichDataType(const DataTypePtr & data_type) : idx(data_type->getTypeId()) {} /// NOLINT constexpr bool isUInt8() const { return idx == TypeIndex::UInt8; } constexpr bool isUInt16() const { return idx == TypeIndex::UInt16; } diff --git a/src/DataTypes/Native.h b/src/DataTypes/Native.h index b72e479cb1d..3a635d2e240 100644 --- a/src/DataTypes/Native.h +++ b/src/DataTypes/Native.h @@ -201,7 +201,7 @@ static inline llvm::Value * nativeCast(llvm::IRBuilder<> & b, const DataTypePtr return nativeCast(b, from, value, n_to); } -static inline std::pair nativeCastToCommon(llvm::IRBuilder<> & b, const DataTypePtr & lhs_type, llvm::Value * lhs, const DataTypePtr & rhs_type, llvm::Value * rhs) +static inline std::pair nativeCastToCommon(llvm::IRBuilder<> & b, const DataTypePtr & lhs_type, llvm::Value * lhs, const DataTypePtr & rhs_type, llvm::Value * rhs) /// NOLINT { llvm::Type * common; diff --git a/src/DataTypes/Serializations/ISerialization.h b/src/DataTypes/Serializations/ISerialization.h index b1fd4d0a9da..86d4eab289a 100644 --- a/src/DataTypes/Serializations/ISerialization.h +++ b/src/DataTypes/Serializations/ISerialization.h @@ -145,7 +145,7 @@ public: /// Flag, that may help to traverse substream paths. mutable bool visited = false; - Substream(Type type_) : type(type_) {} + Substream(Type type_) : type(type_) {} /// NOLINT String toString() const; }; diff --git a/src/DataTypes/Serializations/SerializationArray.h b/src/DataTypes/Serializations/SerializationArray.h index cd8cac54881..3769f8a4513 100644 --- a/src/DataTypes/Serializations/SerializationArray.h +++ b/src/DataTypes/Serializations/SerializationArray.h @@ -11,7 +11,7 @@ private: SerializationPtr nested; public: - SerializationArray(const SerializationPtr & nested_) : nested(nested_) {} + explicit SerializationArray(const SerializationPtr & nested_) : nested(nested_) {} void serializeBinary(const Field & field, WriteBuffer & ostr) const override; void deserializeBinary(Field & field, ReadBuffer & istr) const override; @@ -71,7 +71,7 @@ private: { const ColumnPtr offsets; - SubcolumnCreator(const ColumnPtr & offsets_) : offsets(offsets_) {} + explicit SubcolumnCreator(const ColumnPtr & offsets_) : offsets(offsets_) {} DataTypePtr create(const DataTypePtr & prev) const override; SerializationPtr create(const SerializationPtr & prev) const override; diff --git a/src/DataTypes/Serializations/SerializationBool.h b/src/DataTypes/Serializations/SerializationBool.h index a9f4c6404b3..a5aa0ca80a2 100644 --- a/src/DataTypes/Serializations/SerializationBool.h +++ b/src/DataTypes/Serializations/SerializationBool.h @@ -10,7 +10,7 @@ namespace DB class SerializationBool final : public SerializationWrapper { public: - SerializationBool(const SerializationPtr & nested_); + explicit SerializationBool(const SerializationPtr & nested_); void serializeText(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override; diff --git a/src/DataTypes/Serializations/SerializationCustomSimpleText.h b/src/DataTypes/Serializations/SerializationCustomSimpleText.h index ba7c712f86c..21d6f8af650 100644 --- a/src/DataTypes/Serializations/SerializationCustomSimpleText.h +++ b/src/DataTypes/Serializations/SerializationCustomSimpleText.h @@ -15,7 +15,7 @@ class IColumn; class SerializationCustomSimpleText : public SerializationWrapper { public: - SerializationCustomSimpleText(const SerializationPtr & nested_); + explicit SerializationCustomSimpleText(const SerializationPtr & nested_); // Methods that subclasses must override in order to get full serialization/deserialization support. virtual void serializeText(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override = 0; diff --git a/src/DataTypes/Serializations/SerializationDateTime.cpp b/src/DataTypes/Serializations/SerializationDateTime.cpp index b4269fb0f8c..fd56c1baebd 100644 --- a/src/DataTypes/Serializations/SerializationDateTime.cpp +++ b/src/DataTypes/Serializations/SerializationDateTime.cpp @@ -27,6 +27,9 @@ inline void readText(time_t & x, ReadBuffer & istr, const FormatSettings & setti case FormatSettings::DateTimeInputFormat::BestEffort: parseDateTimeBestEffort(x, istr, time_zone, utc_time_zone); return; + case FormatSettings::DateTimeInputFormat::BestEffortUS: + parseDateTimeBestEffortUS(x, istr, time_zone, utc_time_zone); + return; } } diff --git a/src/DataTypes/Serializations/SerializationDateTime.h b/src/DataTypes/Serializations/SerializationDateTime.h index 75334592422..f4a142483e5 100644 --- a/src/DataTypes/Serializations/SerializationDateTime.h +++ b/src/DataTypes/Serializations/SerializationDateTime.h @@ -11,7 +11,7 @@ namespace DB class SerializationDateTime final : public SerializationNumber, public TimezoneMixin { public: - SerializationDateTime(const TimezoneMixin & time_zone_); + explicit SerializationDateTime(const TimezoneMixin & time_zone_); void serializeText(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override; void deserializeWholeText(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const override; diff --git a/src/DataTypes/Serializations/SerializationDateTime64.cpp b/src/DataTypes/Serializations/SerializationDateTime64.cpp index b9ed5bd4a02..78c7ea56529 100644 --- a/src/DataTypes/Serializations/SerializationDateTime64.cpp +++ b/src/DataTypes/Serializations/SerializationDateTime64.cpp @@ -69,6 +69,9 @@ static inline void readText(DateTime64 & x, UInt32 scale, ReadBuffer & istr, con case FormatSettings::DateTimeInputFormat::BestEffort: parseDateTime64BestEffort(x, scale, istr, time_zone, utc_time_zone); return; + case FormatSettings::DateTimeInputFormat::BestEffortUS: + parseDateTime64BestEffortUS(x, scale, istr, time_zone, utc_time_zone); + return; } } diff --git a/src/DataTypes/Serializations/SerializationEnum.h b/src/DataTypes/Serializations/SerializationEnum.h index dfa9e74c7a1..bdd769b59c5 100644 --- a/src/DataTypes/Serializations/SerializationEnum.h +++ b/src/DataTypes/Serializations/SerializationEnum.h @@ -14,7 +14,7 @@ public: using typename SerializationNumber::ColumnType; using typename EnumValues::Values; - SerializationEnum(const Values & values_) : EnumValues(values_) {} + explicit SerializationEnum(const Values & values_) : EnumValues(values_) {} void serializeText(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override; void serializeTextEscaped(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override; diff --git a/src/DataTypes/Serializations/SerializationFixedString.h b/src/DataTypes/Serializations/SerializationFixedString.h index 82559d10800..c3c08b20419 100644 --- a/src/DataTypes/Serializations/SerializationFixedString.h +++ b/src/DataTypes/Serializations/SerializationFixedString.h @@ -12,7 +12,7 @@ private: size_t n; public: - SerializationFixedString(size_t n_) : n(n_) {} + explicit SerializationFixedString(size_t n_) : n(n_) {} size_t getN() const { return n; } void serializeBinary(const Field & field, WriteBuffer & ostr) const override; diff --git a/src/DataTypes/Serializations/SerializationIP.h b/src/DataTypes/Serializations/SerializationIP.h index a7bf1aeb2c6..282105b6b1e 100644 --- a/src/DataTypes/Serializations/SerializationIP.h +++ b/src/DataTypes/Serializations/SerializationIP.h @@ -8,7 +8,7 @@ namespace DB class SerializationIPv4 final : public SerializationCustomSimpleText { public: - SerializationIPv4(const SerializationPtr & nested_); + explicit SerializationIPv4(const SerializationPtr & nested_); void serializeText(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override; void deserializeText(IColumn & column, ReadBuffer & istr, const FormatSettings & settings, bool whole) const override; @@ -17,7 +17,7 @@ public: class SerializationIPv6 : public SerializationCustomSimpleText { public: - SerializationIPv6(const SerializationPtr & nested_); + explicit SerializationIPv6(const SerializationPtr & nested_); void serializeText(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override; void deserializeText(IColumn & column, ReadBuffer & istr, const FormatSettings & settings, bool whole) const override; diff --git a/src/DataTypes/Serializations/SerializationInfo.cpp b/src/DataTypes/Serializations/SerializationInfo.cpp index 22df95fc8f7..a0dc20b6479 100644 --- a/src/DataTypes/Serializations/SerializationInfo.cpp +++ b/src/DataTypes/Serializations/SerializationInfo.cpp @@ -181,10 +181,10 @@ void SerializationInfoByName::writeJSON(WriteBuffer & out) const { auto info_json = info->toJSON(); info_json.set(KEY_NAME, name); - column_infos.add(std::move(info_json)); + column_infos.add(std::move(info_json)); /// NOLINT } - object.set(KEY_COLUMNS, std::move(column_infos)); + object.set(KEY_COLUMNS, std::move(column_infos)); /// NOLINT std::ostringstream oss; // STYLE_CHECK_ALLOW_STD_STRING_STREAM oss.exceptions(std::ios::failbit); diff --git a/src/DataTypes/Serializations/SerializationInfoTuple.cpp b/src/DataTypes/Serializations/SerializationInfoTuple.cpp index 378bed2af53..803302f9642 100644 --- a/src/DataTypes/Serializations/SerializationInfoTuple.cpp +++ b/src/DataTypes/Serializations/SerializationInfoTuple.cpp @@ -89,7 +89,7 @@ Poco::JSON::Object SerializationInfoTuple::toJSON() const for (const auto & elem : elems) subcolumns.add(elem->toJSON()); - object.set("subcolumns", std::move(subcolumns)); + object.set("subcolumns", subcolumns); return object; } diff --git a/src/DataTypes/Serializations/SerializationLowCardinality.h b/src/DataTypes/Serializations/SerializationLowCardinality.h index 5f8a2a95a25..0a3597e86c7 100644 --- a/src/DataTypes/Serializations/SerializationLowCardinality.h +++ b/src/DataTypes/Serializations/SerializationLowCardinality.h @@ -15,7 +15,7 @@ private: SerializationPtr dict_inner_serialization; public: - SerializationLowCardinality(const DataTypePtr & dictionary_type); + explicit SerializationLowCardinality(const DataTypePtr & dictionary_type); void enumerateStreams( SubstreamPath & path, diff --git a/src/DataTypes/Serializations/SerializationNothing.h b/src/DataTypes/Serializations/SerializationNothing.h index 4a062931ac2..2de93a29763 100644 --- a/src/DataTypes/Serializations/SerializationNothing.h +++ b/src/DataTypes/Serializations/SerializationNothing.h @@ -14,7 +14,7 @@ namespace ErrorCodes class SerializationNothing : public SimpleTextSerialization { private: - [[noreturn]] void throwNoSerialization() const + [[noreturn]] static void throwNoSerialization() { throw Exception("Serialization is not implemented", ErrorCodes::NOT_IMPLEMENTED); } diff --git a/src/DataTypes/Serializations/SerializationNullable.h b/src/DataTypes/Serializations/SerializationNullable.h index eb3e9bfb430..c22f2f57786 100644 --- a/src/DataTypes/Serializations/SerializationNullable.h +++ b/src/DataTypes/Serializations/SerializationNullable.h @@ -11,7 +11,7 @@ private: SerializationPtr nested; public: - SerializationNullable(const SerializationPtr & nested_) : nested(nested_) {} + explicit SerializationNullable(const SerializationPtr & nested_) : nested(nested_) {} void enumerateStreams( SubstreamPath & path, @@ -96,7 +96,7 @@ private: { const ColumnPtr null_map; - SubcolumnCreator(const ColumnPtr & null_map_) : null_map(null_map_) {} + explicit SubcolumnCreator(const ColumnPtr & null_map_) : null_map(null_map_) {} DataTypePtr create(const DataTypePtr & prev) const override; SerializationPtr create(const SerializationPtr & prev) const override; diff --git a/src/DataTypes/Serializations/SerializationSparse.h b/src/DataTypes/Serializations/SerializationSparse.h index 51d9df2cb5d..54ab4853360 100644 --- a/src/DataTypes/Serializations/SerializationSparse.h +++ b/src/DataTypes/Serializations/SerializationSparse.h @@ -23,7 +23,7 @@ namespace DB class SerializationSparse final : public ISerialization { public: - SerializationSparse(const SerializationPtr & nested_); + explicit SerializationSparse(const SerializationPtr & nested_); Kind getKind() const override { return Kind::SPARSE; } diff --git a/src/DataTypes/Serializations/SerializationWrapper.h b/src/DataTypes/Serializations/SerializationWrapper.h index 4cdcffc21a8..43fc7e9914a 100644 --- a/src/DataTypes/Serializations/SerializationWrapper.h +++ b/src/DataTypes/Serializations/SerializationWrapper.h @@ -14,7 +14,7 @@ protected: SerializationPtr nested_serialization; public: - SerializationWrapper(const SerializationPtr & nested_serialization_) : nested_serialization(nested_serialization_) {} + explicit SerializationWrapper(const SerializationPtr & nested_serialization_) : nested_serialization(nested_serialization_) {} const SerializationPtr & getNested() const { return nested_serialization; } diff --git a/src/DataTypes/convertMySQLDataType.cpp b/src/DataTypes/convertMySQLDataType.cpp index ee897de9597..7e2f2e7c6b9 100644 --- a/src/DataTypes/convertMySQLDataType.cpp +++ b/src/DataTypes/convertMySQLDataType.cpp @@ -7,6 +7,7 @@ #include #include #include "DataTypeDate.h" +#include "DataTypeDate32.h" #include "DataTypeDateTime.h" #include "DataTypeDateTime64.h" #include "DataTypeEnum.h" @@ -73,7 +74,14 @@ DataTypePtr convertMySQLDataType(MultiEnum type_support, else if (type_name == "double") res = std::make_shared(); else if (type_name == "date") - res = std::make_shared(); + { + if (type_support.isSet(MySQLDataTypesSupport::DATE2DATE32)) + res = std::make_shared(); + else if (type_support.isSet(MySQLDataTypesSupport::DATE2STRING)) + res = std::make_shared(); + else + res = std::make_shared(); + } else if (type_name == "binary") res = std::make_shared(length); else if (type_name == "datetime" || type_name == "timestamp") diff --git a/src/Databases/DatabaseAtomic.cpp b/src/Databases/DatabaseAtomic.cpp index 324f4808b1f..adfcd83f5a7 100644 --- a/src/Databases/DatabaseAtomic.cpp +++ b/src/Databases/DatabaseAtomic.cpp @@ -37,7 +37,7 @@ public: }; DatabaseAtomic::DatabaseAtomic(String name_, String metadata_path_, UUID uuid, const String & logger_name, ContextPtr context_) - : DatabaseOrdinary(name_, std::move(metadata_path_), "store/", logger_name, context_) + : DatabaseOrdinary(name_, metadata_path_, "store/", logger_name, context_) , path_to_table_symlinks(fs::path(getContext()->getPath()) / "data" / escapeForFileName(name_) / "") , path_to_metadata_symlink(fs::path(getContext()->getPath()) / "metadata" / escapeForFileName(name_)) , db_uuid(uuid) diff --git a/src/Databases/DatabaseReplicatedWorker.h b/src/Databases/DatabaseReplicatedWorker.h index 773612e403c..6b957e567ff 100644 --- a/src/Databases/DatabaseReplicatedWorker.h +++ b/src/Databases/DatabaseReplicatedWorker.h @@ -30,7 +30,7 @@ public: void shutdown() override; static String enqueueQueryImpl(const ZooKeeperPtr & zookeeper, DDLLogEntry & entry, - DatabaseReplicated * const database, bool committed = false); + DatabaseReplicated * const database, bool committed = false); /// NOLINT private: bool initializeMainThread() override; diff --git a/src/Databases/IDatabase.h b/src/Databases/IDatabase.h index 9ad33bd228f..f95653feb20 100644 --- a/src/Databases/IDatabase.h +++ b/src/Databases/IDatabase.h @@ -51,8 +51,8 @@ public: /// - it maintains a list of tables but tables are loaded lazily). virtual const StoragePtr & table() const = 0; - IDatabaseTablesIterator(const String & database_name_) : database_name(database_name_) { } - IDatabaseTablesIterator(String && database_name_) : database_name(std::move(database_name_)) { } + explicit IDatabaseTablesIterator(const String & database_name_) : database_name(database_name_) { } + explicit IDatabaseTablesIterator(String && database_name_) : database_name(std::move(database_name_)) { } virtual ~IDatabaseTablesIterator() = default; @@ -61,7 +61,7 @@ public: const String & databaseName() const { assert(!database_name.empty()); return database_name; } protected: - const String database_name; + String database_name; }; /// Copies list of tables and iterates through such snapshot. @@ -72,7 +72,7 @@ private: Tables::iterator it; protected: - DatabaseTablesSnapshotIterator(DatabaseTablesSnapshotIterator && other) + DatabaseTablesSnapshotIterator(DatabaseTablesSnapshotIterator && other) noexcept : IDatabaseTablesIterator(std::move(other.database_name)) { size_t idx = std::distance(other.tables.begin(), other.it); @@ -118,7 +118,7 @@ class IDatabase : public std::enable_shared_from_this { public: IDatabase() = delete; - IDatabase(String database_name_) : database_name(std::move(database_name_)) {} + explicit IDatabase(String database_name_) : database_name(std::move(database_name_)) {} /// Get name of database engine. virtual String getEngineName() const = 0; @@ -129,7 +129,7 @@ public: /// Load a set of existing tables. /// You can call only once, right after the object is created. - virtual void loadStoredObjects( + virtual void loadStoredObjects( /// NOLINT ContextMutablePtr /*context*/, bool /*force_restore*/, bool /*force_attach*/ = false, @@ -175,7 +175,7 @@ public: /// Get an iterator that allows you to pass through all the tables. /// It is possible to have "hidden" tables that are not visible when passing through, but are visible if you get them by name using the functions above. - virtual DatabaseTablesIteratorPtr getTablesIterator(ContextPtr context, const FilterByNameFunction & filter_by_table_name = {}) const = 0; + virtual DatabaseTablesIteratorPtr getTablesIterator(ContextPtr context, const FilterByNameFunction & filter_by_table_name = {}) const = 0; /// NOLINT /// Is the database empty. virtual bool empty() const = 0; @@ -191,7 +191,7 @@ public: } /// Delete the table from the database, drop table and delete the metadata. - virtual void dropTable( + virtual void dropTable( /// NOLINT ContextPtr /*context*/, const String & /*name*/, [[maybe_unused]] bool no_delay = false) @@ -202,7 +202,7 @@ public: /// Add a table to the database, but do not add it to the metadata. The database may not support this method. /// /// Note: ATTACH TABLE statement actually uses createTable method. - virtual void attachTable(ContextPtr /* context */, const String & /*name*/, const StoragePtr & /*table*/, [[maybe_unused]] const String & relative_table_path = {}) + virtual void attachTable(ContextPtr /* context */, const String & /*name*/, const StoragePtr & /*table*/, [[maybe_unused]] const String & relative_table_path = {}) /// NOLINT { throw Exception("There is no ATTACH TABLE query for Database" + getEngineName(), ErrorCodes::NOT_IMPLEMENTED); } diff --git a/src/Databases/MySQL/DatabaseMySQL.cpp b/src/Databases/MySQL/DatabaseMySQL.cpp index cc6d808a564..5f4027a26b3 100644 --- a/src/Databases/MySQL/DatabaseMySQL.cpp +++ b/src/Databases/MySQL/DatabaseMySQL.cpp @@ -61,7 +61,7 @@ DatabaseMySQL::DatabaseMySQL( , database_engine_define(database_engine_define_->clone()) , database_name_in_mysql(database_name_in_mysql_) , database_settings(std::move(settings_)) - , mysql_pool(std::move(pool)) + , mysql_pool(std::move(pool)) /// NOLINT { try { diff --git a/src/Databases/MySQL/MaterializeMetadata.cpp b/src/Databases/MySQL/MaterializeMetadata.cpp index 0facdfc20be..580eb41b449 100644 --- a/src/Databases/MySQL/MaterializeMetadata.cpp +++ b/src/Databases/MySQL/MaterializeMetadata.cpp @@ -30,11 +30,15 @@ namespace ErrorCodes static std::unordered_map fetchTablesCreateQuery( const mysqlxx::PoolWithFailover::Entry & connection, const String & database_name, - const std::vector & fetch_tables, const Settings & global_settings) + const std::vector & fetch_tables, std::unordered_set & materialized_tables_list, + const Settings & global_settings) { std::unordered_map tables_create_query; for (const auto & fetch_table_name : fetch_tables) { + if (!materialized_tables_list.empty() && !materialized_tables_list.contains(fetch_table_name)) + continue; + Block show_create_table_header{ {std::make_shared(), "Table"}, {std::make_shared(), "Create Table"}, @@ -253,7 +257,7 @@ void MaterializeMetadata::transaction(const MySQLReplication::Position & positio out.close(); } - commitMetadata(std::move(fun), persistent_tmp_path, persistent_path); + commitMetadata(fun, persistent_tmp_path, persistent_path); } MaterializeMetadata::MaterializeMetadata(const String & path_, const Settings & settings_) : persistent_path(path_), settings(settings_) @@ -276,7 +280,8 @@ MaterializeMetadata::MaterializeMetadata(const String & path_, const Settings & void MaterializeMetadata::startReplication( mysqlxx::PoolWithFailover::Entry & connection, const String & database, - bool & opened_transaction, std::unordered_map & need_dumping_tables) + bool & opened_transaction, std::unordered_map & need_dumping_tables, + std::unordered_set & materialized_tables_list) { checkSyncUserPriv(connection, settings); @@ -297,7 +302,7 @@ void MaterializeMetadata::startReplication( connection->query("START TRANSACTION /*!40100 WITH CONSISTENT SNAPSHOT */;").execute(); opened_transaction = true; - need_dumping_tables = fetchTablesCreateQuery(connection, database, fetchTablesInDB(connection, database, settings), settings); + need_dumping_tables = fetchTablesCreateQuery(connection, database, fetchTablesInDB(connection, database, settings), materialized_tables_list, settings); connection->query("UNLOCK TABLES;").execute(); } catch (...) diff --git a/src/Databases/MySQL/MaterializeMetadata.h b/src/Databases/MySQL/MaterializeMetadata.h index bcb0465b61e..b828c901fbb 100644 --- a/src/Databases/MySQL/MaterializeMetadata.h +++ b/src/Databases/MySQL/MaterializeMetadata.h @@ -48,7 +48,8 @@ struct MaterializeMetadata mysqlxx::PoolWithFailover::Entry & connection, const String & database, bool & opened_transaction, - std::unordered_map & need_dumping_tables); + std::unordered_map & need_dumping_tables, + std::unordered_set & materialized_tables_list); MaterializeMetadata(const String & path_, const Settings & settings_); }; diff --git a/src/Databases/MySQL/MaterializedMySQLSettings.h b/src/Databases/MySQL/MaterializedMySQLSettings.h index d5acdc81602..43235d502c3 100644 --- a/src/Databases/MySQL/MaterializedMySQLSettings.h +++ b/src/Databases/MySQL/MaterializedMySQLSettings.h @@ -16,6 +16,7 @@ class ASTStorage; M(UInt64, max_flush_data_time, 1000, "Max milliseconds that data is allowed to cache in memory(for database and the cache data unable to query). when this time is exceeded, the data will be materialized", 0) \ M(Int64, max_wait_time_when_mysql_unavailable, 1000, "Retry interval when MySQL is not available (milliseconds). Negative value disable retry.", 0) \ M(Bool, allows_query_when_mysql_lost, false, "Allow query materialized table when mysql is lost.", 0) \ + M(String, materialized_mysql_tables_list, "", "a comma-separated list of mysql database tables, which will be replicated by MaterializedMySQL database engine. Default value: empty list — means whole tables will be replicated.", 0) \ DECLARE_SETTINGS_TRAITS(MaterializedMySQLSettingsTraits, LIST_OF_MATERIALIZE_MODE_SETTINGS) diff --git a/src/Databases/MySQL/MaterializedMySQLSyncThread.cpp b/src/Databases/MySQL/MaterializedMySQLSyncThread.cpp index 8033d65c549..230b158b231 100644 --- a/src/Databases/MySQL/MaterializedMySQLSyncThread.cpp +++ b/src/Databases/MySQL/MaterializedMySQLSyncThread.cpp @@ -25,6 +25,10 @@ #include #include #include +#include +#include +#include +#include namespace DB { @@ -148,6 +152,61 @@ static void checkMySQLVariables(const mysqlxx::Pool::Entry & connection, const S } } +static std::tuple tryExtractTableNameFromDDL(const String & ddl) +{ + String table_name; + String database_name; + if (ddl.empty()) return std::make_tuple(database_name, table_name); + + bool parse_failed = false; + Tokens tokens(ddl.data(), ddl.data() + ddl.size()); + IParser::Pos pos(tokens, 0); + Expected expected; + ASTPtr res; + ASTPtr table; + if (ParserKeyword("CREATE TEMPORARY TABLE").ignore(pos, expected) || ParserKeyword("CREATE TABLE").ignore(pos, expected)) + { + ParserKeyword("IF NOT EXISTS").ignore(pos, expected); + if (!ParserCompoundIdentifier(true).parse(pos, table, expected)) + parse_failed = true; + } + else if (ParserKeyword("ALTER TABLE").ignore(pos, expected)) + { + if (!ParserCompoundIdentifier(true).parse(pos, table, expected)) + parse_failed = true; + } + else if (ParserKeyword("DROP TABLE").ignore(pos, expected) || ParserKeyword("DROP TEMPORARY TABLE").ignore(pos, expected)) + { + ParserKeyword("IF EXISTS").ignore(pos, expected); + if (!ParserCompoundIdentifier(true).parse(pos, table, expected)) + parse_failed = true; + } + else if (ParserKeyword("TRUNCATE").ignore(pos, expected)) + { + ParserKeyword("TABLE").ignore(pos, expected); + if (!ParserCompoundIdentifier(true).parse(pos, table, expected)) + parse_failed = true; + } + else if (ParserKeyword("RENAME TABLE").ignore(pos, expected)) + { + if (!ParserCompoundIdentifier(true).parse(pos, table, expected)) + parse_failed = true; + } + else + { + parse_failed = true; + } + if (!parse_failed) + { + if (auto table_id = table->as()->getTableId()) + { + database_name = table_id.database_name; + table_name = table_id.table_name; + } + } + return std::make_tuple(database_name, table_name); +} + MaterializedMySQLSyncThread::MaterializedMySQLSyncThread( ContextPtr context_, const String & database_name_, @@ -159,11 +218,22 @@ MaterializedMySQLSyncThread::MaterializedMySQLSyncThread( , log(&Poco::Logger::get("MaterializedMySQLSyncThread")) , database_name(database_name_) , mysql_database_name(mysql_database_name_) - , pool(std::move(pool_)) + , pool(std::move(pool_)) /// NOLINT , client(std::move(client_)) , settings(settings_) { query_prefix = "EXTERNAL DDL FROM MySQL(" + backQuoteIfNeed(database_name) + ", " + backQuoteIfNeed(mysql_database_name) + ") "; + + if (!settings->materialized_mysql_tables_list.value.empty()) + { + Names tables_list; + boost::split(tables_list, settings->materialized_mysql_tables_list.value, [](char c){ return c == ','; }); + for (String & table_name: tables_list) + { + boost::trim(table_name); + materialized_tables_list.insert(table_name); + } + } } void MaterializedMySQLSyncThread::synchronization() @@ -434,7 +504,7 @@ bool MaterializedMySQLSyncThread::prepareSynchronized(MaterializeMetadata & meta checkMySQLVariables(connection, getContext()->getSettingsRef()); std::unordered_map need_dumping_tables; - metadata.startReplication(connection, mysql_database_name, opened_transaction, need_dumping_tables); + metadata.startReplication(connection, mysql_database_name, opened_transaction, need_dumping_tables, materialized_tables_list); if (!need_dumping_tables.empty()) { @@ -464,7 +534,7 @@ bool MaterializedMySQLSyncThread::prepareSynchronized(MaterializeMetadata & meta connection->query("COMMIT").execute(); client.connect(); - client.startBinlogDumpGTID(randomNumber(), mysql_database_name, metadata.executed_gtid_set, metadata.binlog_checksum); + client.startBinlogDumpGTID(randomNumber(), mysql_database_name, materialized_tables_list, metadata.executed_gtid_set, metadata.binlog_checksum); setSynchronizationThreadException(nullptr); return true; @@ -792,9 +862,24 @@ void MaterializedMySQLSyncThread::executeDDLAtomic(const QueryEvent & query_even auto query_context = createQueryContext(getContext()); CurrentThread::QueryScope query_scope(query_context); + String query = query_event.query; + if (!materialized_tables_list.empty()) + { + auto [ddl_database_name, ddl_table_name] = tryExtractTableNameFromDDL(query_event.query); + + if (!ddl_table_name.empty()) + { + ddl_database_name = ddl_database_name.empty() ? query_event.schema: ddl_database_name; + if (ddl_database_name != mysql_database_name || !materialized_tables_list.contains(ddl_table_name)) + { + LOG_DEBUG(log, "Skip MySQL DDL: \n {}", query_event.query); + return; + } + } + } String comment = "Materialize MySQL step 2: execute MySQL DDL for sync data"; String event_database = query_event.schema == mysql_database_name ? database_name : ""; - tryToExecuteQuery(query_prefix + query_event.query, query_context, event_database, comment); + tryToExecuteQuery(query_prefix + query, query_context, event_database, comment); } catch (Exception & exception) { diff --git a/src/Databases/MySQL/MaterializedMySQLSyncThread.h b/src/Databases/MySQL/MaterializedMySQLSyncThread.h index ba5022137bf..163a3732fb9 100644 --- a/src/Databases/MySQL/MaterializedMySQLSyncThread.h +++ b/src/Databases/MySQL/MaterializedMySQLSyncThread.h @@ -63,15 +63,16 @@ private: mutable MySQLClient client; MaterializedMySQLSettings * settings; String query_prefix; + NameSet materialized_tables_list; // USE MySQL ERROR CODE: // https://dev.mysql.com/doc/mysql-errors/5.7/en/server-error-reference.html - const int ER_ACCESS_DENIED_ERROR = 1045; - const int ER_DBACCESS_DENIED_ERROR = 1044; - const int ER_BAD_DB_ERROR = 1049; + const int ER_ACCESS_DENIED_ERROR = 1045; /// NOLINT + const int ER_DBACCESS_DENIED_ERROR = 1044; /// NOLINT + const int ER_BAD_DB_ERROR = 1049; /// NOLINT // https://dev.mysql.com/doc/mysql-errors/8.0/en/client-error-reference.html - const int CR_SERVER_LOST = 2013; + const int CR_SERVER_LOST = 2013; /// NOLINT struct Buffers { @@ -87,7 +88,7 @@ private: using BufferAndSortingColumnsPtr = std::shared_ptr; std::unordered_map data; - Buffers(const String & database_) : database(database_) {} + explicit Buffers(const String & database_) : database(database_) {} void commit(ContextPtr context); diff --git a/src/Databases/PostgreSQL/DatabaseMaterializedPostgreSQL.cpp b/src/Databases/PostgreSQL/DatabaseMaterializedPostgreSQL.cpp index dba8bf64798..dd125294615 100644 --- a/src/Databases/PostgreSQL/DatabaseMaterializedPostgreSQL.cpp +++ b/src/Databases/PostgreSQL/DatabaseMaterializedPostgreSQL.cpp @@ -34,6 +34,7 @@ namespace ErrorCodes extern const int QUERY_NOT_ALLOWED; extern const int UNKNOWN_TABLE; extern const int BAD_ARGUMENTS; + extern const int NOT_IMPLEMENTED; } DatabaseMaterializedPostgreSQL::DatabaseMaterializedPostgreSQL( @@ -309,8 +310,12 @@ void DatabaseMaterializedPostgreSQL::attachTable(ContextPtr context_, const Stri } } +StoragePtr DatabaseMaterializedPostgreSQL::detachTable(ContextPtr, const String &) +{ + throw Exception(ErrorCodes::NOT_IMPLEMENTED, "DETACH TABLE not allowed, use DETACH PERMANENTLY"); +} -StoragePtr DatabaseMaterializedPostgreSQL::detachTable(ContextPtr context_, const String & table_name) +void DatabaseMaterializedPostgreSQL::detachTablePermanently(ContextPtr, const String & table_name) { /// If there is query context then we need to detach materialized storage. /// If there is no query context then we need to detach internal storage from atomic database. @@ -360,11 +365,6 @@ StoragePtr DatabaseMaterializedPostgreSQL::detachTable(ContextPtr context_, cons } materialized_tables.erase(table_name); - return nullptr; - } - else - { - return DatabaseAtomic::detachTable(context_, table_name); } } diff --git a/src/Databases/PostgreSQL/DatabaseMaterializedPostgreSQL.h b/src/Databases/PostgreSQL/DatabaseMaterializedPostgreSQL.h index 40ff0d9262d..08420f4ba5e 100644 --- a/src/Databases/PostgreSQL/DatabaseMaterializedPostgreSQL.h +++ b/src/Databases/PostgreSQL/DatabaseMaterializedPostgreSQL.h @@ -51,6 +51,8 @@ public: void attachTable(ContextPtr context, const String & table_name, const StoragePtr & table, const String & relative_table_path) override; + void detachTablePermanently(ContextPtr context, const String & table_name) override; + StoragePtr detachTable(ContextPtr context, const String & table_name) override; void dropTable(ContextPtr local_context, const String & name, bool no_delay) override; diff --git a/src/Databases/PostgreSQL/DatabasePostgreSQL.cpp b/src/Databases/PostgreSQL/DatabasePostgreSQL.cpp index d43bde0b886..ce1ed98b977 100644 --- a/src/Databases/PostgreSQL/DatabasePostgreSQL.cpp +++ b/src/Databases/PostgreSQL/DatabasePostgreSQL.cpp @@ -174,7 +174,7 @@ StoragePtr DatabasePostgreSQL::tryGetTable(const String & table_name, ContextPtr } -StoragePtr DatabasePostgreSQL::fetchTable(const String & table_name, ContextPtr, const bool table_checked) const +StoragePtr DatabasePostgreSQL::fetchTable(const String & table_name, ContextPtr, bool table_checked) const { if (!cache_tables || !cached_tables.count(table_name)) { @@ -194,7 +194,7 @@ StoragePtr DatabasePostgreSQL::fetchTable(const String & table_name, ContextPtr, if (cache_tables) cached_tables[table_name] = storage; - return std::move(storage); + return storage; } if (table_checked || checkPostgresTable(table_name)) @@ -414,7 +414,7 @@ ASTPtr DatabasePostgreSQL::getCreateTableQueryImpl(const String & table_name, Co assert(storage_engine_arguments->children.size() >= 2); storage_engine_arguments->children.insert(storage_engine_arguments->children.begin() + 2, std::make_shared(table_id.table_name)); - return std::move(create_table_query); + return create_table_query; } diff --git a/src/Databases/PostgreSQL/DatabasePostgreSQL.h b/src/Databases/PostgreSQL/DatabasePostgreSQL.h index d41dbff1f54..3397dcc8076 100644 --- a/src/Databases/PostgreSQL/DatabasePostgreSQL.h +++ b/src/Databases/PostgreSQL/DatabasePostgreSQL.h @@ -81,7 +81,7 @@ private: bool checkPostgresTable(const String & table_name) const; - StoragePtr fetchTable(const String & table_name, ContextPtr context, const bool table_checked) const; + StoragePtr fetchTable(const String & table_name, ContextPtr context, bool table_checked) const; void removeOutdatedTables(); diff --git a/src/Dictionaries/CacheDictionary.cpp b/src/Dictionaries/CacheDictionary.cpp index cad3e3b8799..8b8d0a57cc7 100644 --- a/src/Dictionaries/CacheDictionary.cpp +++ b/src/Dictionaries/CacheDictionary.cpp @@ -494,7 +494,7 @@ Pipe CacheDictionary::read(const Names & column_names, size { auto keys = cache_storage_ptr->getCachedSimpleKeys(); auto keys_column = getColumnFromPODArray(std::move(keys)); - key_columns = {ColumnWithTypeAndName(std::move(keys_column), std::make_shared(), dict_struct.id->name)}; + key_columns = {ColumnWithTypeAndName(keys_column, std::make_shared(), dict_struct.id->name)}; } else { diff --git a/src/Dictionaries/CacheDictionaryUpdateQueue.h b/src/Dictionaries/CacheDictionaryUpdateQueue.h index 7725ce7588f..d6a195ca7b8 100644 --- a/src/Dictionaries/CacheDictionaryUpdateQueue.h +++ b/src/Dictionaries/CacheDictionaryUpdateQueue.h @@ -75,7 +75,7 @@ private: friend class CacheDictionaryUpdateQueue; std::atomic is_done{false}; - std::exception_ptr current_exception{nullptr}; + std::exception_ptr current_exception{nullptr}; /// NOLINT /// While UpdateUnit is alive, it is accounted in update_queue size. CurrentMetrics::Increment alive_batch{CurrentMetrics::CacheDictionaryUpdateQueueBatches}; diff --git a/src/Dictionaries/CassandraHelpers.h b/src/Dictionaries/CassandraHelpers.h index 30111e11686..3b90d46acdf 100644 --- a/src/Dictionaries/CassandraHelpers.h +++ b/src/Dictionaries/CassandraHelpers.h @@ -23,8 +23,8 @@ class ObjectHolder CassT * ptr = nullptr; public: template - ObjectHolder(Args &&... args) : ptr(Ctor(std::forward(args)...)) {} - ObjectHolder(CassT * ptr_) : ptr(ptr_) {} + ObjectHolder(Args &&... args) : ptr(Ctor(std::forward(args)...)) {} /// NOLINT + ObjectHolder(CassT * ptr_) : ptr(ptr_) {} /// NOLINT ObjectHolder(const ObjectHolder &) = delete; ObjectHolder & operator = (const ObjectHolder &) = delete; @@ -46,8 +46,8 @@ public: } /// For implicit conversion when passing object to driver library functions - operator CassT * () { return ptr; } - operator const CassT * () const { return ptr; } + operator CassT * () { return ptr; } /// NOLINT + operator const CassT * () const { return ptr; } /// NOLINT }; } diff --git a/src/Dictionaries/ClickHouseDictionarySource.cpp b/src/Dictionaries/ClickHouseDictionarySource.cpp index deecc3c983e..5a18dcffb22 100644 --- a/src/Dictionaries/ClickHouseDictionarySource.cpp +++ b/src/Dictionaries/ClickHouseDictionarySource.cpp @@ -30,7 +30,7 @@ namespace ErrorCodes static const std::unordered_set dictionary_allowed_keys = { "host", "port", "user", "password", "db", "database", "table", - "update_field", "update_tag", "invalidate_query", "query", "where", "name", "secure"}; + "update_field", "update_lag", "invalidate_query", "query", "where", "name", "secure"}; namespace { diff --git a/src/Dictionaries/DictionaryHelpers.h b/src/Dictionaries/DictionaryHelpers.h index f2d7febfa8e..80b15eb2569 100644 --- a/src/Dictionaries/DictionaryHelpers.h +++ b/src/Dictionaries/DictionaryHelpers.h @@ -187,7 +187,7 @@ private: DataTypes dictionary_attributes_types; }; -static inline void insertDefaultValuesIntoColumns( +static inline void insertDefaultValuesIntoColumns( /// NOLINT MutableColumns & columns, const DictionaryStorageFetchRequest & fetch_request, size_t row_index) @@ -206,7 +206,7 @@ static inline void insertDefaultValuesIntoColumns( /// Deserialize column value and insert it in columns. /// Skip unnecessary columns that were not requested from deserialization. -static inline void deserializeAndInsertIntoColumns( +static inline void deserializeAndInsertIntoColumns( /// NOLINT MutableColumns & columns, const DictionaryStorageFetchRequest & fetch_request, const char * place_for_serialized_columns) diff --git a/src/Dictionaries/DictionarySourceHelpers.cpp b/src/Dictionaries/DictionarySourceHelpers.cpp index cd87cf831a2..fcad8398c0b 100644 --- a/src/Dictionaries/DictionarySourceHelpers.cpp +++ b/src/Dictionaries/DictionarySourceHelpers.cpp @@ -52,7 +52,7 @@ Block blockForKeys( auto filtered_column = source_column->filter(filter, requested_rows.size()); - block.insert({std::move(filtered_column), (*dict_struct.key)[i].type, (*dict_struct.key)[i].name}); + block.insert({filtered_column, (*dict_struct.key)[i].type, (*dict_struct.key)[i].name}); } return block; diff --git a/src/Dictionaries/DictionaryStructure.cpp b/src/Dictionaries/DictionaryStructure.cpp index 3e29f3efe76..012750bde60 100644 --- a/src/Dictionaries/DictionaryStructure.cpp +++ b/src/Dictionaries/DictionaryStructure.cpp @@ -33,8 +33,8 @@ namespace DictionaryTypedSpecialAttribute makeDictionaryTypedSpecialAttribute( const Poco::Util::AbstractConfiguration & config, const std::string & config_prefix, const std::string & default_type) { - const auto name = config.getString(config_prefix + ".name", ""); - const auto expression = config.getString(config_prefix + ".expression", ""); + auto name = config.getString(config_prefix + ".name", ""); + auto expression = config.getString(config_prefix + ".expression", ""); if (name.empty() && !expression.empty()) throw Exception(ErrorCodes::BAD_ARGUMENTS, "Element {}.name is empty"); diff --git a/src/Dictionaries/Embedded/GeodataProviders/HierarchiesProvider.h b/src/Dictionaries/Embedded/GeodataProviders/HierarchiesProvider.h index 198f13e0f32..c2e36f59e1e 100644 --- a/src/Dictionaries/Embedded/GeodataProviders/HierarchiesProvider.h +++ b/src/Dictionaries/Embedded/GeodataProviders/HierarchiesProvider.h @@ -14,7 +14,7 @@ private: FileUpdatesTracker updates_tracker; public: - RegionsHierarchyDataSource(const std::string & path_) : path(path_), updates_tracker(path_) {} + explicit RegionsHierarchyDataSource(const std::string & path_) : path(path_), updates_tracker(path_) {} bool isModified() const override; @@ -40,7 +40,7 @@ public: * For example, if /opt/geo/regions_hierarchy.txt is specified, * then the /opt/geo/regions_hierarchy_ua.txt file will also be loaded, if any, it will be accessible by the `ua` key. */ - RegionsHierarchiesDataProvider(const std::string & path_); + explicit RegionsHierarchiesDataProvider(const std::string & path_); std::vector listCustomHierarchies() const override; diff --git a/src/Dictionaries/Embedded/GeodataProviders/HierarchyFormatReader.h b/src/Dictionaries/Embedded/GeodataProviders/HierarchyFormatReader.h index 85dd8ce58b7..64f393ada62 100644 --- a/src/Dictionaries/Embedded/GeodataProviders/HierarchyFormatReader.h +++ b/src/Dictionaries/Embedded/GeodataProviders/HierarchyFormatReader.h @@ -11,7 +11,7 @@ private: DB::ReadBufferPtr input; public: - RegionsHierarchyFormatReader(DB::ReadBufferPtr input_) : input(std::move(input_)) {} + explicit RegionsHierarchyFormatReader(DB::ReadBufferPtr input_) : input(std::move(input_)) {} bool readNext(RegionEntry & entry) override; }; diff --git a/src/Dictionaries/Embedded/GeodataProviders/IHierarchiesProvider.h b/src/Dictionaries/Embedded/GeodataProviders/IHierarchiesProvider.h index 0606896c951..f7d51135440 100644 --- a/src/Dictionaries/Embedded/GeodataProviders/IHierarchiesProvider.h +++ b/src/Dictionaries/Embedded/GeodataProviders/IHierarchiesProvider.h @@ -27,7 +27,7 @@ public: virtual IRegionsHierarchyReaderPtr createReader() = 0; - virtual ~IRegionsHierarchyDataSource() {} + virtual ~IRegionsHierarchyDataSource() = default; }; using IRegionsHierarchyDataSourcePtr = std::shared_ptr; @@ -42,7 +42,7 @@ public: virtual IRegionsHierarchyDataSourcePtr getDefaultHierarchySource() const = 0; virtual IRegionsHierarchyDataSourcePtr getHierarchySource(const std::string & name) const = 0; - virtual ~IRegionsHierarchiesDataProvider() {} + virtual ~IRegionsHierarchiesDataProvider() = default; }; using IRegionsHierarchiesDataProviderPtr = std::shared_ptr; diff --git a/src/Dictionaries/Embedded/GeodataProviders/INamesProvider.h b/src/Dictionaries/Embedded/GeodataProviders/INamesProvider.h index 26de5d9116b..679c14d546b 100644 --- a/src/Dictionaries/Embedded/GeodataProviders/INamesProvider.h +++ b/src/Dictionaries/Embedded/GeodataProviders/INamesProvider.h @@ -10,7 +10,7 @@ class ILanguageRegionsNamesReader public: virtual bool readNext(RegionNameEntry & entry) = 0; - virtual ~ILanguageRegionsNamesReader() {} + virtual ~ILanguageRegionsNamesReader() = default; }; using ILanguageRegionsNamesReaderPtr = std::unique_ptr; @@ -32,7 +32,7 @@ public: virtual std::string getSourceName() const = 0; - virtual ~ILanguageRegionsNamesDataSource() {} + virtual ~ILanguageRegionsNamesDataSource() = default; }; using ILanguageRegionsNamesDataSourcePtr = std::unique_ptr; @@ -45,7 +45,7 @@ public: /// Returns nullptr if the language data does not exist. virtual ILanguageRegionsNamesDataSourcePtr getLanguageRegionsNamesSource(const std::string & language) const = 0; - virtual ~IRegionsNamesDataProvider() {} + virtual ~IRegionsNamesDataProvider() = default; }; using IRegionsNamesDataProviderPtr = std::unique_ptr; diff --git a/src/Dictionaries/Embedded/GeodataProviders/NamesFormatReader.h b/src/Dictionaries/Embedded/GeodataProviders/NamesFormatReader.h index 573569ab115..49d324d434e 100644 --- a/src/Dictionaries/Embedded/GeodataProviders/NamesFormatReader.h +++ b/src/Dictionaries/Embedded/GeodataProviders/NamesFormatReader.h @@ -11,7 +11,7 @@ private: DB::ReadBufferPtr input; public: - LanguageRegionsNamesFormatReader(DB::ReadBufferPtr input_) : input(std::move(input_)) {} + explicit LanguageRegionsNamesFormatReader(DB::ReadBufferPtr input_) : input(std::move(input_)) {} bool readNext(RegionNameEntry & entry) override; }; diff --git a/src/Dictionaries/Embedded/GeodataProviders/NamesProvider.h b/src/Dictionaries/Embedded/GeodataProviders/NamesProvider.h index c380fcb7d1d..2d49cceab86 100644 --- a/src/Dictionaries/Embedded/GeodataProviders/NamesProvider.h +++ b/src/Dictionaries/Embedded/GeodataProviders/NamesProvider.h @@ -39,7 +39,7 @@ private: std::string directory; public: - RegionsNamesDataProvider(const std::string & directory_); + explicit RegionsNamesDataProvider(const std::string & directory_); ILanguageRegionsNamesDataSourcePtr getLanguageRegionsNamesSource(const std::string & language) const override; diff --git a/src/Dictionaries/Embedded/RegionsHierarchies.h b/src/Dictionaries/Embedded/RegionsHierarchies.h index 67cd7c2a658..925b7b490ff 100644 --- a/src/Dictionaries/Embedded/RegionsHierarchies.h +++ b/src/Dictionaries/Embedded/RegionsHierarchies.h @@ -8,7 +8,7 @@ /** Contains several hierarchies of regions. * Used to support several different perspectives on the ownership of regions by countries. - * First of all, for the Crimea (Russian and Ukrainian points of view). + * First of all, for the Falklands/Malvinas (UK and Argentina points of view). */ class RegionsHierarchies { @@ -17,7 +17,7 @@ private: Container data; public: - RegionsHierarchies(IRegionsHierarchiesDataProviderPtr data_provider); + explicit RegionsHierarchies(IRegionsHierarchiesDataProviderPtr data_provider); /** Reloads, if necessary, all hierarchies of regions. */ @@ -27,7 +27,6 @@ public: elem.second.reload(); } - const RegionsHierarchy & get(const std::string & key) const { auto it = data.find(key); diff --git a/src/Dictionaries/Embedded/RegionsHierarchy.h b/src/Dictionaries/Embedded/RegionsHierarchy.h index 45d6c5246ca..508bca0d1e1 100644 --- a/src/Dictionaries/Embedded/RegionsHierarchy.h +++ b/src/Dictionaries/Embedded/RegionsHierarchy.h @@ -49,7 +49,7 @@ private: IRegionsHierarchyDataSourcePtr data_source; public: - RegionsHierarchy(IRegionsHierarchyDataSourcePtr data_source_); + explicit RegionsHierarchy(IRegionsHierarchyDataSourcePtr data_source_); /// Reloads, if necessary, the hierarchy of regions. Not threadsafe. void reload(); diff --git a/src/Dictionaries/Embedded/RegionsNames.h b/src/Dictionaries/Embedded/RegionsNames.h index ff60c274401..ec06a0b1a33 100644 --- a/src/Dictionaries/Embedded/RegionsNames.h +++ b/src/Dictionaries/Embedded/RegionsNames.h @@ -40,7 +40,7 @@ class RegionsNames public: enum class Language : size_t { - #define M(NAME, FALLBACK, NUM) NAME = NUM, + #define M(NAME, FALLBACK, NUM) NAME = (NUM), FOR_EACH_LANGUAGE(M) #undef M }; @@ -78,7 +78,7 @@ private: static std::string dumpSupportedLanguagesNames(); public: - RegionsNames(IRegionsNamesDataProviderPtr data_provider); + explicit RegionsNames(IRegionsNamesDataProviderPtr data_provider); StringRef getRegionName(RegionID region_id, Language language) const { @@ -104,7 +104,7 @@ public: #define M(NAME, FALLBACK, NUM) \ if (0 == language.compare(#NAME)) \ return Language::NAME; - FOR_EACH_LANGUAGE(M) + FOR_EACH_LANGUAGE(M) /// NOLINT #undef M throw Poco::Exception("Unsupported language for region name. Supported languages are: " + dumpSupportedLanguagesNames() + "."); } diff --git a/src/Dictionaries/FlatDictionary.cpp b/src/Dictionaries/FlatDictionary.cpp index 0c82da7b73b..cb2419633bf 100644 --- a/src/Dictionaries/FlatDictionary.cpp +++ b/src/Dictionaries/FlatDictionary.cpp @@ -32,13 +32,11 @@ FlatDictionary::FlatDictionary( const StorageID & dict_id_, const DictionaryStructure & dict_struct_, DictionarySourcePtr source_ptr_, - const DictionaryLifetime dict_lifetime_, Configuration configuration_, BlockPtr update_field_loaded_block_) : IDictionary(dict_id_) , dict_struct(dict_struct_) , source_ptr{std::move(source_ptr_)} - , dict_lifetime(dict_lifetime_) , configuration(configuration_) , loaded_keys(configuration.initial_array_size, false) , update_field_loaded_block(std::move(update_field_loaded_block_)) @@ -147,7 +145,7 @@ ColumnPtr FlatDictionary::getColumn( callOnDictionaryAttributeType(attribute.type, type_call); if (attribute.is_nullable_set) - result = ColumnNullable::create(std::move(result), std::move(col_null_map_to)); + result = ColumnNullable::create(result, std::move(col_null_map_to)); return result; } @@ -572,7 +570,7 @@ Pipe FlatDictionary::read(const Names & column_names, size_t max_block_size, siz keys.push_back(key_index); auto keys_column = getColumnFromPODArray(std::move(keys)); - ColumnsWithTypeAndName key_columns = {ColumnWithTypeAndName(std::move(keys_column), std::make_shared(), dict_struct.id->name)}; + ColumnsWithTypeAndName key_columns = {ColumnWithTypeAndName(keys_column, std::make_shared(), dict_struct.id->name)}; std::shared_ptr dictionary = shared_from_this(); auto coordinator = DictionarySourceCoordinator::create(dictionary, column_names, std::move(key_columns), max_block_size); @@ -604,18 +602,19 @@ void registerDictionaryFlat(DictionaryFactory & factory) static constexpr size_t default_max_array_size = 500000; String dictionary_layout_prefix = config_prefix + ".layout" + ".flat"; + const DictionaryLifetime dict_lifetime{config, config_prefix + ".lifetime"}; FlatDictionary::Configuration configuration { .initial_array_size = config.getUInt64(dictionary_layout_prefix + ".initial_array_size", default_initial_array_size), .max_array_size = config.getUInt64(dictionary_layout_prefix + ".max_array_size", default_max_array_size), - .require_nonempty = config.getBool(config_prefix + ".require_nonempty", false) + .require_nonempty = config.getBool(config_prefix + ".require_nonempty", false), + .dict_lifetime = dict_lifetime }; const auto dict_id = StorageID::fromDictionaryConfig(config, config_prefix); - const DictionaryLifetime dict_lifetime{config, config_prefix + ".lifetime"}; - return std::make_unique(dict_id, dict_struct, std::move(source_ptr), dict_lifetime, std::move(configuration)); + return std::make_unique(dict_id, dict_struct, std::move(source_ptr), std::move(configuration)); }; factory.registerLayout("flat", create_layout, false); diff --git a/src/Dictionaries/FlatDictionary.h b/src/Dictionaries/FlatDictionary.h index 2578fef3ecb..f342c38802d 100644 --- a/src/Dictionaries/FlatDictionary.h +++ b/src/Dictionaries/FlatDictionary.h @@ -26,13 +26,13 @@ public: size_t initial_array_size; size_t max_array_size; bool require_nonempty; + DictionaryLifetime dict_lifetime; }; FlatDictionary( const StorageID & dict_id_, const DictionaryStructure & dict_struct_, DictionarySourcePtr source_ptr_, - const DictionaryLifetime dict_lifetime_, Configuration configuration_, BlockPtr update_field_loaded_block_ = nullptr); @@ -58,12 +58,12 @@ public: std::shared_ptr clone() const override { - return std::make_shared(getDictionaryID(), dict_struct, source_ptr->clone(), dict_lifetime, configuration, update_field_loaded_block); + return std::make_shared(getDictionaryID(), dict_struct, source_ptr->clone(), configuration, update_field_loaded_block); } DictionarySourcePtr getSource() const override { return source_ptr; } - const DictionaryLifetime & getLifetime() const override { return dict_lifetime; } + const DictionaryLifetime & getLifetime() const override { return configuration.dict_lifetime; } const DictionaryStructure & getStructure() const override { return dict_struct; } @@ -159,7 +159,6 @@ private: const DictionaryStructure dict_struct; const DictionarySourcePtr source_ptr; - const DictionaryLifetime dict_lifetime; const Configuration configuration; std::vector attributes; diff --git a/src/Dictionaries/HashedArrayDictionary.cpp b/src/Dictionaries/HashedArrayDictionary.cpp index ea041c63d73..65d9b3e7d42 100644 --- a/src/Dictionaries/HashedArrayDictionary.cpp +++ b/src/Dictionaries/HashedArrayDictionary.cpp @@ -578,7 +578,7 @@ ColumnPtr HashedArrayDictionary::getAttributeColumn( callOnDictionaryAttributeType(attribute.type, type_call); if (is_attribute_nullable) - result = ColumnNullable::create(std::move(result), std::move(col_null_map_to)); + result = ColumnNullable::create(result, std::move(col_null_map_to)); return result; } diff --git a/src/Dictionaries/HashedDictionary.cpp b/src/Dictionaries/HashedDictionary.cpp index b70f018df6b..178631d9c53 100644 --- a/src/Dictionaries/HashedDictionary.cpp +++ b/src/Dictionaries/HashedDictionary.cpp @@ -159,7 +159,7 @@ ColumnPtr HashedDictionary::getColumn( callOnDictionaryAttributeType(attribute.type, type_call); if (is_attribute_nullable) - result = ColumnNullable::create(std::move(result), std::move(col_null_map_to)); + result = ColumnNullable::create(result, std::move(col_null_map_to)); return result; } diff --git a/src/Dictionaries/ICacheDictionaryStorage.h b/src/Dictionaries/ICacheDictionaryStorage.h index b094d76a9a7..a4990528a4e 100644 --- a/src/Dictionaries/ICacheDictionaryStorage.h +++ b/src/Dictionaries/ICacheDictionaryStorage.h @@ -22,7 +22,7 @@ struct KeyState , fetched_column_index(fetched_column_index_) {} - KeyState(State state_) + KeyState(State state_) /// NOLINT : state(state_) {} diff --git a/src/Dictionaries/IDictionary.h b/src/Dictionaries/IDictionary.h index 042153f0971..c18dbcfbea7 100644 --- a/src/Dictionaries/IDictionary.h +++ b/src/Dictionaries/IDictionary.h @@ -150,7 +150,7 @@ public: auto & key_column_to_cast = key_columns[key_attribute_type_index]; ColumnWithTypeAndName column_to_cast = {key_column_to_cast, key_type, ""}; - auto casted_column = castColumnAccurate(std::move(column_to_cast), key_attribute_type); + auto casted_column = castColumnAccurate(column_to_cast, key_attribute_type); key_column_to_cast = std::move(casted_column); key_type = key_attribute_type; } diff --git a/src/Dictionaries/IPAddressDictionary.h b/src/Dictionaries/IPAddressDictionary.h index 8dddc988caa..894af5ceb71 100644 --- a/src/Dictionaries/IPAddressDictionary.h +++ b/src/Dictionaries/IPAddressDictionary.h @@ -26,7 +26,7 @@ public: const StorageID & dict_id_, const DictionaryStructure & dict_struct_, DictionarySourcePtr source_ptr_, - const DictionaryLifetime dict_lifetime_, + const DictionaryLifetime dict_lifetime_, /// NOLINT bool require_nonempty_); std::string getKeyDescription() const { return key_description; } @@ -160,7 +160,7 @@ private: template static void createAttributeImpl(Attribute & attribute, const Field & null_value); - static Attribute createAttributeWithType(const AttributeUnderlyingType type, const Field & null_value); + static Attribute createAttributeWithType(const AttributeUnderlyingType type, const Field & null_value); /// NOLINT template void getItemsByTwoKeyColumnsImpl( @@ -177,7 +177,7 @@ private: DefaultValueExtractor & default_value_extractor) const; template - void setAttributeValueImpl(Attribute & attribute, const T value); + void setAttributeValueImpl(Attribute & attribute, const T value); /// NOLINT void setAttributeValue(Attribute & attribute, const Field & value); diff --git a/src/Dictionaries/MySQLDictionarySource.cpp b/src/Dictionaries/MySQLDictionarySource.cpp index 29d70f3a7c4..6578f91aa73 100644 --- a/src/Dictionaries/MySQLDictionarySource.cpp +++ b/src/Dictionaries/MySQLDictionarySource.cpp @@ -34,7 +34,7 @@ static const std::unordered_set dictionary_allowed_keys = { "host", "port", "user", "password", "db", "database", "table", "schema", "update_field", "invalidate_query", "priority", - "update_tag", "dont_check_update_time", + "update_lag", "dont_check_update_time", "query", "where", "name" /* name_collection */, "socket", "share_connection", "fail_on_connection_loss", "close_connection", "ssl_ca", "ssl_cert", "ssl_key", diff --git a/src/Dictionaries/PolygonDictionary.cpp b/src/Dictionaries/PolygonDictionary.cpp index deec1e6a588..1a4e01d4aa3 100644 --- a/src/Dictionaries/PolygonDictionary.cpp +++ b/src/Dictionaries/PolygonDictionary.cpp @@ -61,7 +61,7 @@ void IPolygonDictionary::convertKeyColumns(Columns & key_columns, DataTypes & ke auto & key_column_to_cast = key_columns[key_type_index]; ColumnWithTypeAndName column_to_cast = {key_column_to_cast, key_type, ""}; - auto casted_column = castColumnAccurate(std::move(column_to_cast), float_64_type); + auto casted_column = castColumnAccurate(column_to_cast, float_64_type); key_column_to_cast = std::move(casted_column); key_type = float_64_type; } diff --git a/src/Dictionaries/PolygonDictionaryUtils.h b/src/Dictionaries/PolygonDictionaryUtils.h index 0aca7cd8af0..9d6d6ae0501 100644 --- a/src/Dictionaries/PolygonDictionaryUtils.h +++ b/src/Dictionaries/PolygonDictionaryUtils.h @@ -38,7 +38,7 @@ public: SlabsPolygonIndex() = default; /** Builds an index by splitting all edges with all points x coordinates. */ - SlabsPolygonIndex(const std::vector & polygons); + explicit SlabsPolygonIndex(const std::vector & polygons); /** Finds polygon id the same way as IPolygonIndex. */ bool find(const Point & point, size_t & id) const; @@ -179,7 +179,7 @@ class GridRoot : public ICell { public: GridRoot(size_t min_intersections_, size_t max_depth_, const std::vector & polygons_): - kMinIntersections(min_intersections_), kMaxDepth(max_depth_), polygons(polygons_) + k_min_intersections(min_intersections_), k_max_depth(max_depth_), polygons(polygons_) { setBoundingBox(); std::vector order(polygons.size()); @@ -209,8 +209,8 @@ private: std::unique_ptr> root = nullptr; Coord min_x = 0, min_y = 0; Coord max_x = 0, max_y = 0; - const size_t kMinIntersections; - const size_t kMaxDepth; + const size_t k_min_intersections; + const size_t k_max_depth; const std::vector & polygons; @@ -236,7 +236,7 @@ private: } #endif size_t intersections = possible_ids.size() - covered; - if (intersections <= kMinIntersections || depth++ == kMaxDepth) + if (intersections <= k_min_intersections || depth++ == k_max_depth) return std::make_unique(possible_ids, polygons, current_box, covered); auto x_shift = (current_max_x - current_min_x) / DividedCell::kSplit; auto y_shift = (current_max_y - current_min_y) / DividedCell::kSplit; diff --git a/src/Dictionaries/PostgreSQLDictionarySource.cpp b/src/Dictionaries/PostgreSQLDictionarySource.cpp index 6fdf486fdbf..511d6a7288e 100644 --- a/src/Dictionaries/PostgreSQLDictionarySource.cpp +++ b/src/Dictionaries/PostgreSQLDictionarySource.cpp @@ -30,7 +30,7 @@ static const UInt64 max_block_size = 8192; static const std::unordered_set dictionary_allowed_keys = { "host", "port", "user", "password", "db", "database", "table", "schema", - "update_field", "update_tag", "invalidate_query", "query", "where", "name", "priority"}; + "update_field", "update_lag", "invalidate_query", "query", "where", "name", "priority"}; namespace { diff --git a/src/Dictionaries/RangeHashedDictionary.cpp b/src/Dictionaries/RangeHashedDictionary.cpp index 5330bc684c3..e82fcd580e2 100644 --- a/src/Dictionaries/RangeHashedDictionary.cpp +++ b/src/Dictionaries/RangeHashedDictionary.cpp @@ -198,7 +198,7 @@ ColumnPtr RangeHashedDictionary::getColumn( callOnDictionaryAttributeType(attribute.type, type_call); if (is_attribute_nullable) - result = ColumnNullable::create(std::move(result), std::move(col_null_map_to)); + result = ColumnNullable::create(result, std::move(col_null_map_to)); return result; } @@ -298,7 +298,7 @@ ColumnPtr RangeHashedDictionary::getColumnInternal( callOnDictionaryAttributeType(attribute.type, type_call); if (is_attribute_nullable) - result = ColumnNullable::create(std::move(result), std::move(col_null_map_to)); + result = ColumnNullable::create(result, std::move(col_null_map_to)); return result; } diff --git a/src/Dictionaries/RedisDictionarySource.cpp b/src/Dictionaries/RedisDictionarySource.cpp index a1b406b3424..fd381ab2921 100644 --- a/src/Dictionaries/RedisDictionarySource.cpp +++ b/src/Dictionaries/RedisDictionarySource.cpp @@ -136,9 +136,9 @@ namespace DB RedisArray keys; auto key_type = storageTypeToKeyType(configuration.storage_type); - for (const auto & key : all_keys) + for (auto && key : all_keys) if (key_type == connection->client->execute(RedisCommand("TYPE").addRedisType(key))) - keys.addRedisType(std::move(key)); + keys.addRedisType(key); if (configuration.storage_type == RedisStorageType::HASH_MAP) { @@ -165,10 +165,10 @@ namespace DB } if (primary_with_secondary.size() > 1) - hkeys.add(std::move(primary_with_secondary)); + hkeys.add(primary_with_secondary); } - keys = std::move(hkeys); + keys = hkeys; } return Pipe(std::make_shared( diff --git a/src/Dictionaries/SSDCacheDictionaryStorage.h b/src/Dictionaries/SSDCacheDictionaryStorage.h index adbe4084d81..9b1a4ed1e6d 100644 --- a/src/Dictionaries/SSDCacheDictionaryStorage.h +++ b/src/Dictionaries/SSDCacheDictionaryStorage.h @@ -761,9 +761,9 @@ private: FileDescriptor() = default; - FileDescriptor(FileDescriptor && rhs) : fd(rhs.fd) { rhs.fd = -1; } + FileDescriptor(FileDescriptor && rhs) noexcept : fd(rhs.fd) { rhs.fd = -1; } - FileDescriptor & operator=(FileDescriptor && rhs) + FileDescriptor & operator=(FileDescriptor && rhs) noexcept { if (this == &rhs) return *this; diff --git a/src/Disks/AzureBlobStorage/DiskAzureBlobStorage.cpp b/src/Disks/AzureBlobStorage/DiskAzureBlobStorage.cpp index d3777f8ca00..fb07d8c356b 100644 --- a/src/Disks/AzureBlobStorage/DiskAzureBlobStorage.cpp +++ b/src/Disks/AzureBlobStorage/DiskAzureBlobStorage.cpp @@ -53,7 +53,7 @@ DiskAzureBlobStorage::DiskAzureBlobStorage( std::shared_ptr blob_container_client_, SettingsPtr settings_, GetDiskSettings settings_getter_) : - IDiskRemote(name_, "", metadata_disk_, "DiskAzureBlobStorage", settings_->thread_pool_size), + IDiskRemote(name_, "", metadata_disk_, nullptr, "DiskAzureBlobStorage", settings_->thread_pool_size), blob_container_client(blob_container_client_), current_settings(std::move(settings_)), settings_getter(settings_getter_) {} @@ -70,13 +70,11 @@ std::unique_ptr DiskAzureBlobStorage::readFile( LOG_TEST(log, "Read from file by path: {}", backQuote(metadata_disk->getPath() + path)); - bool threadpool_read = read_settings.remote_fs_method == RemoteFSReadMethod::threadpool; - auto reader_impl = std::make_unique( path, blob_container_client, metadata, settings->max_single_read_retries, - settings->max_single_download_retries, read_settings, threadpool_read); + settings->max_single_download_retries, read_settings); - if (threadpool_read) + if (read_settings.remote_fs_method == RemoteFSReadMethod::threadpool) { auto reader = getThreadPoolReader(); return std::make_unique(reader, read_settings, std::move(reader_impl)); diff --git a/src/Disks/DiskCacheWrapper.cpp b/src/Disks/DiskCacheWrapper.cpp index 5d5eb89691e..3519b1212a4 100644 --- a/src/Disks/DiskCacheWrapper.cpp +++ b/src/Disks/DiskCacheWrapper.cpp @@ -144,6 +144,14 @@ DiskCacheWrapper::readFile( } } + auto current_read_settings = settings; + /// Do not use RemoteFSReadMethod::threadpool for index and mark files. + /// Here it does not make sense since the files are small. + /// Note: enabling `threadpool` read requires to call setReadUntilEnd(). + current_read_settings.remote_fs_method = RemoteFSReadMethod::read; + /// Disable data cache. + current_read_settings.remote_fs_enable_cache = false; + if (metadata->status == DOWNLOADING) { FileDownloadStatus result_status = DOWNLOADED; @@ -158,7 +166,7 @@ DiskCacheWrapper::readFile( auto tmp_path = path + ".tmp"; { - auto src_buffer = DiskDecorator::readFile(path, settings, read_hint, file_size); + auto src_buffer = DiskDecorator::readFile(path, current_read_settings, read_hint, file_size); auto dst_buffer = cache_disk->writeFile(tmp_path, settings.local_fs_buffer_size, WriteMode::Rewrite); copyData(*src_buffer, *dst_buffer); } @@ -184,7 +192,7 @@ DiskCacheWrapper::readFile( if (metadata->status == DOWNLOADED) return cache_disk->readFile(path, settings, read_hint, file_size); - return DiskDecorator::readFile(path, settings, read_hint, file_size); + return DiskDecorator::readFile(path, current_read_settings, read_hint, file_size); } std::unique_ptr diff --git a/src/Disks/DiskLocal.cpp b/src/Disks/DiskLocal.cpp index 57bfaf405e0..44fdbb77323 100644 --- a/src/Disks/DiskLocal.cpp +++ b/src/Disks/DiskLocal.cpp @@ -6,7 +6,8 @@ #include #include #include -#include +#include +#include #include #include @@ -325,7 +326,7 @@ DiskDirectoryIteratorPtr DiskLocal::iterateDirectory(const String & path) void DiskLocal::moveFile(const String & from_path, const String & to_path) { - fs::rename(fs::path(disk_path) / from_path, fs::path(disk_path) / to_path); + renameNoReplace(fs::path(disk_path) / from_path, fs::path(disk_path) / to_path); } void DiskLocal::replaceFile(const String & from_path, const String & to_path) diff --git a/src/Disks/DiskMemory.h b/src/Disks/DiskMemory.h index eef7b78502d..fe108f53c68 100644 --- a/src/Disks/DiskMemory.h +++ b/src/Disks/DiskMemory.h @@ -22,7 +22,7 @@ class WriteBufferFromFileBase; class DiskMemory : public IDisk { public: - DiskMemory(const String & name_) : name(name_), disk_path("memory://" + name_ + '/') {} + explicit DiskMemory(const String & name_) : name(name_), disk_path("memory://" + name_ + '/') {} const String & getName() const override { return name; } @@ -97,7 +97,6 @@ private: void createDirectoriesImpl(const String & path); void replaceFileImpl(const String & from_path, const String & to_path); -private: friend class WriteIndirectBuffer; enum class FileType @@ -112,7 +111,7 @@ private: String data; FileData(FileType type_, String data_) : type(type_), data(std::move(data_)) {} - explicit FileData(FileType type_) : type(type_), data("") {} + explicit FileData(FileType type_) : type(type_) {} }; using Files = std::unordered_map; /// file path -> file data diff --git a/src/Disks/DiskRestartProxy.cpp b/src/Disks/DiskRestartProxy.cpp index beeb76bd91b..43011a4cf72 100644 --- a/src/Disks/DiskRestartProxy.cpp +++ b/src/Disks/DiskRestartProxy.cpp @@ -41,6 +41,8 @@ public: swap(*impl); } + String getInfoForLog() override { return impl->getInfoForLog(); } + private: ReadLock lock; }; diff --git a/src/Disks/DiskSelector.h b/src/Disks/DiskSelector.h index 0cd1267c6ef..a2fce4b14d1 100644 --- a/src/Disks/DiskSelector.h +++ b/src/Disks/DiskSelector.h @@ -19,7 +19,7 @@ class DiskSelector { public: DiskSelector(const Poco::Util::AbstractConfiguration & config, const String & config_prefix, ContextPtr context); - DiskSelector(const DiskSelector & from) : disks(from.disks) { } + DiskSelector(const DiskSelector & from) = default; DiskSelectorPtr updateFromConfig( const Poco::Util::AbstractConfiguration & config, diff --git a/src/Disks/DiskWebServer.cpp b/src/Disks/DiskWebServer.cpp index 7c94a5b98b1..f3039d9af2e 100644 --- a/src/Disks/DiskWebServer.cpp +++ b/src/Disks/DiskWebServer.cpp @@ -168,11 +168,9 @@ std::unique_ptr DiskWebServer::readFile(const String & p RemoteMetadata meta(path, remote_path); meta.remote_fs_objects.emplace_back(std::make_pair(remote_path, iter->second.size)); - bool threadpool_read = read_settings.remote_fs_method == RemoteFSReadMethod::threadpool; + auto web_impl = std::make_unique(path, url, meta, getContext(), read_settings); - auto web_impl = std::make_unique(path, url, meta, getContext(), threadpool_read, read_settings); - - if (threadpool_read) + if (read_settings.remote_fs_method == RemoteFSReadMethod::threadpool) { auto reader = IDiskRemote::getThreadPoolReader(); return std::make_unique(reader, read_settings, std::move(web_impl), min_bytes_for_seek); diff --git a/src/Disks/DiskWebServer.h b/src/Disks/DiskWebServer.h index bda8c8adaad..e2da0b2a1e1 100644 --- a/src/Disks/DiskWebServer.h +++ b/src/Disks/DiskWebServer.h @@ -38,7 +38,7 @@ namespace ErrorCodes * * To get files for upload run: * clickhouse static-files-disk-uploader --metadata-path --output-dir - * (--metadata-path can be found in query: `select data_paths from system.tables where name='';`) + * (--metadata-path can be found in query: `select data_paths from system.tables where name='';`) /// NOLINT * * When loading files by they must be loaded into /store/ path, but config must conrain only . * diff --git a/src/Disks/HDFS/DiskHDFS.cpp b/src/Disks/HDFS/DiskHDFS.cpp index 51691806089..7f60b219a4b 100644 --- a/src/Disks/HDFS/DiskHDFS.cpp +++ b/src/Disks/HDFS/DiskHDFS.cpp @@ -65,7 +65,7 @@ DiskHDFS::DiskHDFS( SettingsPtr settings_, DiskPtr metadata_disk_, const Poco::Util::AbstractConfiguration & config_) - : IDiskRemote(disk_name_, hdfs_root_path_, metadata_disk_, "DiskHDFS", settings_->thread_pool_size) + : IDiskRemote(disk_name_, hdfs_root_path_, metadata_disk_, nullptr, "DiskHDFS", settings_->thread_pool_size) , config(config_) , hdfs_builder(createHDFSBuilder(hdfs_root_path_, config)) , hdfs_fs(createHDFSFS(hdfs_builder.get())) @@ -82,7 +82,7 @@ std::unique_ptr DiskHDFS::readFile(const String & path, "Read from file by path: {}. Existing HDFS objects: {}", backQuote(metadata_disk->getPath() + path), metadata.remote_fs_objects.size()); - auto hdfs_impl = std::make_unique(path, config, remote_fs_root_path, metadata, read_settings.remote_fs_buffer_size); + auto hdfs_impl = std::make_unique(path, config, remote_fs_root_path, metadata, read_settings); auto buf = std::make_unique(std::move(hdfs_impl)); return std::make_unique(std::move(buf), settings->min_bytes_for_seek); } diff --git a/src/Disks/IDisk.h b/src/Disks/IDisk.h index 5068ac5dde9..d7d94cd03d7 100644 --- a/src/Disks/IDisk.h +++ b/src/Disks/IDisk.h @@ -158,14 +158,14 @@ public: virtual void listFiles(const String & path, std::vector & file_names) = 0; /// Open the file for read and return ReadBufferFromFileBase object. - virtual std::unique_ptr readFile( + virtual std::unique_ptr readFile( /// NOLINT const String & path, const ReadSettings & settings = ReadSettings{}, std::optional read_hint = {}, std::optional file_size = {}) const = 0; /// Open the file for write and return WriteBufferFromFileBase object. - virtual std::unique_ptr writeFile( + virtual std::unique_ptr writeFile( /// NOLINT const String & path, size_t buf_size = DBMS_DEFAULT_BUFFER_SIZE, WriteMode mode = WriteMode::Rewrite) = 0; @@ -354,7 +354,7 @@ public: virtual UInt64 getSize() const = 0; /// Get i-th disk where reservation take place. - virtual DiskPtr getDisk(size_t i = 0) const = 0; + virtual DiskPtr getDisk(size_t i = 0) const = 0; /// NOLINT /// Get all disks, used in reservation virtual Disks getDisks() const = 0; diff --git a/src/Disks/IDiskRemote.cpp b/src/Disks/IDiskRemote.cpp index 2a9aded039b..c7d9eb93a60 100644 --- a/src/Disks/IDiskRemote.cpp +++ b/src/Disks/IDiskRemote.cpp @@ -13,6 +13,7 @@ #include #include #include +#include namespace DB @@ -26,12 +27,12 @@ namespace ErrorCodes extern const int PATH_ACCESS_DENIED;; extern const int FILE_DOESNT_EXIST; extern const int BAD_FILE_TYPE; + extern const int MEMORY_LIMIT_EXCEEDED; } IDiskRemote::Metadata IDiskRemote::Metadata::readMetadata(const String & remote_fs_root_path_, DiskPtr metadata_disk_, const String & metadata_file_path_) { - Metadata result(remote_fs_root_path_, metadata_disk_, metadata_file_path_); result.load(); return result; @@ -139,6 +140,9 @@ void IDiskRemote::Metadata::load() if (e.code() == ErrorCodes::UNKNOWN_FORMAT) throw; + if (e.code() == ErrorCodes::MEMORY_LIMIT_EXCEEDED) + throw; + throw Exception("Failed to read metadata file", e, ErrorCodes::UNKNOWN_FORMAT); } } @@ -281,7 +285,16 @@ void IDiskRemote::removeMetadata(const String & path, RemoteFSPathKeeperPtr fs_p if (metadata.ref_count == 0) { for (const auto & [remote_fs_object_path, _] : metadata.remote_fs_objects) + { fs_paths_keeper->addPath(remote_fs_root_path + remote_fs_object_path); + + if (cache) + { + auto key = cache->hash(remote_fs_object_path); + cache->remove(key); + } + } + return false; } else /// In other case decrement number of references, save metadata and delete hardlink. @@ -377,6 +390,7 @@ IDiskRemote::IDiskRemote( const String & name_, const String & remote_fs_root_path_, DiskPtr metadata_disk_, + FileCachePtr cache_, const String & log_name_, size_t thread_pool_size) : IDisk(std::make_unique(log_name_, thread_pool_size)) @@ -384,6 +398,7 @@ IDiskRemote::IDiskRemote( , name(name_) , remote_fs_root_path(remote_fs_root_path_) , metadata_disk(metadata_disk_) + , cache(cache_) { } @@ -439,6 +454,7 @@ void IDiskRemote::removeSharedFile(const String & path, bool delete_metadata_onl { RemoteFSPathKeeperPtr fs_paths_keeper = createFSPathKeeper(); removeMetadata(path, fs_paths_keeper); + if (!delete_metadata_only) removeFromRemoteFS(fs_paths_keeper); } @@ -447,6 +463,7 @@ void IDiskRemote::removeSharedFile(const String & path, bool delete_metadata_onl void IDiskRemote::removeSharedFileIfExists(const String & path, bool delete_metadata_only) { RemoteFSPathKeeperPtr fs_paths_keeper = createFSPathKeeper(); + if (metadata_disk->exists(path)) { removeMetadata(path, fs_paths_keeper); @@ -473,6 +490,7 @@ void IDiskRemote::removeSharedRecursive(const String & path, bool delete_metadat { RemoteFSPathKeeperPtr fs_paths_keeper = createFSPathKeeper(); removeMetadataRecursive(path, fs_paths_keeper); + if (!delete_metadata_only) removeFromRemoteFS(fs_paths_keeper); } diff --git a/src/Disks/IDiskRemote.h b/src/Disks/IDiskRemote.h index bdb09804a6c..82e76b8f68d 100644 --- a/src/Disks/IDiskRemote.h +++ b/src/Disks/IDiskRemote.h @@ -3,6 +3,7 @@ #include #include +#include #include #include #include @@ -12,7 +13,6 @@ #include #include -namespace fs = std::filesystem; namespace CurrentMetrics { @@ -27,7 +27,7 @@ namespace DB class RemoteFSPathKeeper { public: - RemoteFSPathKeeper(size_t chunk_limit_) : chunk_limit(chunk_limit_) {} + explicit RemoteFSPathKeeper(size_t chunk_limit_) : chunk_limit(chunk_limit_) {} virtual ~RemoteFSPathKeeper() = default; @@ -55,6 +55,7 @@ public: const String & name_, const String & remote_fs_root_path_, DiskPtr metadata_disk_, + FileCachePtr cache_, const String & log_name_, size_t thread_pool_size); @@ -162,6 +163,7 @@ protected: const String remote_fs_root_path; DiskPtr metadata_disk; + FileCachePtr cache; private: void removeMetadata(const String & path, RemoteFSPathKeeperPtr fs_paths_keeper); diff --git a/src/Disks/IO/AsynchronousReadIndirectBufferFromRemoteFS.cpp b/src/Disks/IO/AsynchronousReadIndirectBufferFromRemoteFS.cpp index 9e3425e8986..e693a8e9ea8 100644 --- a/src/Disks/IO/AsynchronousReadIndirectBufferFromRemoteFS.cpp +++ b/src/Disks/IO/AsynchronousReadIndirectBufferFromRemoteFS.cpp @@ -48,6 +48,11 @@ AsynchronousReadIndirectBufferFromRemoteFS::AsynchronousReadIndirectBufferFromRe , prefetch_buffer(settings_.remote_fs_buffer_size) , min_bytes_for_seek(min_bytes_for_seek_) , must_read_until_position(settings_.must_read_until_position) +#ifndef NDEBUG + , log(&Poco::Logger::get("AsynchronousBufferFromRemoteFS")) +#else + , log(&Poco::Logger::get("AsyncBuffer(" + impl->getFileName() + ")")) +#endif { ProfileEvents::increment(ProfileEvents::RemoteFSBuffers); } @@ -59,6 +64,12 @@ String AsynchronousReadIndirectBufferFromRemoteFS::getFileName() const } +String AsynchronousReadIndirectBufferFromRemoteFS::getInfoForLog() +{ + return impl->getInfoForLog(); +} + + bool AsynchronousReadIndirectBufferFromRemoteFS::hasPendingDataToRead() { /** @@ -76,8 +87,8 @@ bool AsynchronousReadIndirectBufferFromRemoteFS::hasPendingDataToRead() return false; if (file_offset_of_buffer_end > *read_until_position) - throw Exception(ErrorCodes::LOGICAL_ERROR, "Read beyond last offset ({} > {})", - file_offset_of_buffer_end, *read_until_position); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Read beyond last offset ({} > {}, info: {})", + file_offset_of_buffer_end, *read_until_position, impl->getInfoForLog()); } else if (must_read_until_position) throw Exception(ErrorCodes::LOGICAL_ERROR, @@ -125,8 +136,11 @@ void AsynchronousReadIndirectBufferFromRemoteFS::setReadUntilPosition(size_t pos if (prefetch_future.valid()) throw Exception(ErrorCodes::LOGICAL_ERROR, "Prefetch is valid in readUntilPosition"); - read_until_position = position; - impl->setReadUntilPosition(*read_until_position); + if (position > read_until_position) + { + read_until_position = position; + impl->setReadUntilPosition(*read_until_position); + } } @@ -157,8 +171,10 @@ bool AsynchronousReadIndirectBufferFromRemoteFS::nextImpl() auto result = prefetch_future.get(); size = result.size; offset = result.offset; + LOG_TEST(log, "Current size: {}, offset: {}", size, offset); + /// If prefetch_future is valid, size should always be greater than zero. - assert(offset < size && size > 0); + assert(offset < size); ProfileEvents::increment(ProfileEvents::AsynchronousReadWaitMicroseconds, watch.elapsedMicroseconds()); } @@ -173,7 +189,10 @@ bool AsynchronousReadIndirectBufferFromRemoteFS::nextImpl() auto result = readInto(memory.data(), memory.size()).get(); size = result.size; auto offset = result.offset; - assert(offset < size || size == 0); + + LOG_TEST(log, "Current size: {}, offset: {}", size, offset); + assert(offset < size); + if (size) { /// Adjust the working buffer so that it ignores `offset` bytes. @@ -181,7 +200,9 @@ bool AsynchronousReadIndirectBufferFromRemoteFS::nextImpl() } } - file_offset_of_buffer_end = impl->offset(); + file_offset_of_buffer_end = impl->getFileOffsetOfBufferEnd(); + assert(file_offset_of_buffer_end == impl->getImplementationBufferOffset()); + prefetch_future = {}; return size; } diff --git a/src/Disks/IO/AsynchronousReadIndirectBufferFromRemoteFS.h b/src/Disks/IO/AsynchronousReadIndirectBufferFromRemoteFS.h index c9b81c98e61..48c4ff3b4f0 100644 --- a/src/Disks/IO/AsynchronousReadIndirectBufferFromRemoteFS.h +++ b/src/Disks/IO/AsynchronousReadIndirectBufferFromRemoteFS.h @@ -5,6 +5,7 @@ #include #include +namespace Poco { class Logger; } namespace DB { @@ -44,10 +45,12 @@ public: void prefetch() override; - void setReadUntilPosition(size_t position) override; + void setReadUntilPosition(size_t position) override; /// [..., position). void setReadUntilEnd() override; + String getInfoForLog() override; + private: bool nextImpl() override; @@ -76,6 +79,8 @@ private: std::optional read_until_position; bool must_read_until_position; + + Poco::Logger * log; }; } diff --git a/src/Disks/IO/CachedReadBufferFromRemoteFS.cpp b/src/Disks/IO/CachedReadBufferFromRemoteFS.cpp new file mode 100644 index 00000000000..5cab2cb2995 --- /dev/null +++ b/src/Disks/IO/CachedReadBufferFromRemoteFS.cpp @@ -0,0 +1,763 @@ +#include "CachedReadBufferFromRemoteFS.h" + +#include +#include +#include +#include +#include + + +namespace ProfileEvents +{ + extern const Event RemoteFSReadBytes; + extern const Event RemoteFSCacheReadBytes; + extern const Event RemoteFSCacheDownloadBytes; +} + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int CANNOT_SEEK_THROUGH_FILE; + extern const int LOGICAL_ERROR; +} + +CachedReadBufferFromRemoteFS::CachedReadBufferFromRemoteFS( + const String & remote_fs_object_path_, + FileCachePtr cache_, + RemoteFSFileReaderCreator remote_file_reader_creator_, + const ReadSettings & settings_, + size_t read_until_position_) + : SeekableReadBuffer(nullptr, 0) +#ifndef NDEBUG + , log(&Poco::Logger::get("CachedReadBufferFromRemoteFS(" + remote_fs_object_path_ + ")")) +#else + , log(&Poco::Logger::get("CachedReadBufferFromRemoteFS")) +#endif + , cache_key(cache_->hash(remote_fs_object_path_)) + , remote_fs_object_path(remote_fs_object_path_) + , cache(cache_) + , settings(settings_) + , read_until_position(read_until_position_) + , remote_file_reader_creator(remote_file_reader_creator_) +{ +} + +void CachedReadBufferFromRemoteFS::initialize(size_t offset, size_t size) +{ + file_segments_holder.emplace(cache->getOrSet(cache_key, offset, size)); + + /** + * Segments in returned list are ordered in ascending order and represent a full contiguous + * interval (no holes). Each segment in returned list has state: DOWNLOADED, DOWNLOADING or EMPTY. + */ + if (file_segments_holder->file_segments.empty()) + throw Exception(ErrorCodes::LOGICAL_ERROR, "List of file segments cannot be empty"); + + LOG_TEST(log, "Having {} file segments to read", file_segments_holder->file_segments.size()); + current_file_segment_it = file_segments_holder->file_segments.begin(); + + initialized = true; +} + +SeekableReadBufferPtr CachedReadBufferFromRemoteFS::getCacheReadBuffer(size_t offset) const +{ + return std::make_shared(cache->getPathInLocalCache(cache_key, offset), settings.local_fs_buffer_size); +} + +SeekableReadBufferPtr CachedReadBufferFromRemoteFS::getRemoteFSReadBuffer(FileSegmentPtr & file_segment, ReadType read_type_) +{ + switch (read_type_) + { + case ReadType::REMOTE_FS_READ_AND_PUT_IN_CACHE: + { + /** + * Each downloader is elected to download at most buffer_size bytes and then any other can + * continue. The one who continues download should reuse download buffer. + * + * TODO: Also implementation (s3, hdfs, web) buffer might be passed through file segments. + * E.g. consider for query1 and query2 we need intersecting ranges like this: + * + * [___________] -- read_range_1 for query1 + * [_______________] -- read_range_2 for query2 + * ^___________^______^ + * | segment1 | segment2 + * + * So query2 can reuse implementation buffer, which downloaded segment1. + * Implementation buffer from segment1 is passed to segment2 once segment1 is loaded. + */ + + auto remote_fs_segment_reader = file_segment->getRemoteFileReader(); + + if (remote_fs_segment_reader) + return remote_fs_segment_reader; + + remote_fs_segment_reader = remote_file_reader_creator(); + file_segment->setRemoteFileReader(remote_fs_segment_reader); + + ///TODO: add check for pending data + return remote_fs_segment_reader; + } + case ReadType::REMOTE_FS_READ_BYPASS_CACHE: + { + /// Result buffer is owned only by current buffer -- not shareable like in the case above. + + if (remote_file_reader && remote_file_reader->getFileOffsetOfBufferEnd() == file_offset_of_buffer_end) + return remote_file_reader; + + remote_file_reader = remote_file_reader_creator(); + return remote_file_reader; + } + default: + throw Exception(ErrorCodes::LOGICAL_ERROR, + "Cannot use remote filesystem reader with read type: {}", toString(read_type)); + } +} + +SeekableReadBufferPtr CachedReadBufferFromRemoteFS::getReadBufferForFileSegment(FileSegmentPtr & file_segment) +{ + auto range = file_segment->range(); + + /// Each wait() call has a timeout of 1 second. + size_t wait_download_max_tries = settings.remote_fs_cache_max_wait_sec; + size_t wait_download_tries = 0; + + auto download_state = file_segment->state(); + while (true) + { + switch (download_state) + { + case FileSegment::State::SKIP_CACHE: + { + read_type = ReadType::REMOTE_FS_READ_BYPASS_CACHE; + return getRemoteFSReadBuffer(file_segment, read_type); + } + case FileSegment::State::EMPTY: + { + auto downloader_id = file_segment->getOrSetDownloader(); + if (downloader_id == file_segment->getCallerId()) + { + if (file_offset_of_buffer_end == file_segment->getDownloadOffset()) + { + read_type = ReadType::REMOTE_FS_READ_AND_PUT_IN_CACHE; + return getRemoteFSReadBuffer(file_segment, read_type); + } + else + { + /// segment{k} + /// cache: [______|___________ + /// ^ + /// download_offset + /// requested_range: [__________] + /// ^ + /// file_offset_of_buffer_end + assert(file_offset_of_buffer_end > file_segment->getDownloadOffset()); + bytes_to_predownload = file_offset_of_buffer_end - file_segment->getDownloadOffset(); + + read_type = ReadType::REMOTE_FS_READ_AND_PUT_IN_CACHE; + return getRemoteFSReadBuffer(file_segment, read_type); + } + } + else + { + download_state = file_segment->state(); + continue; + } + } + case FileSegment::State::DOWNLOADING: + { + size_t download_offset = file_segment->getDownloadOffset(); + bool can_start_from_cache = download_offset > file_offset_of_buffer_end; + + /// If file segment is being downloaded but we can already read from already downloaded part, do that. + if (can_start_from_cache) + { + /// segment{k} state: DOWNLOADING + /// cache: [______|___________ + /// ^ + /// download_offset (in progress) + /// requested_range: [__________] + /// ^ + /// file_offset_of_buffer_end + + read_type = ReadType::CACHED; + return getCacheReadBuffer(range.left); + } + + if (wait_download_tries++ < wait_download_max_tries) + { + download_state = file_segment->wait(); + } + else + { + download_state = FileSegment::State::SKIP_CACHE; + } + + continue; + } + case FileSegment::State::DOWNLOADED: + { + read_type = ReadType::CACHED; + return getCacheReadBuffer(range.left); + } + case FileSegment::State::PARTIALLY_DOWNLOADED: + { + auto downloader_id = file_segment->getOrSetDownloader(); + if (downloader_id == file_segment->getCallerId()) + { + size_t download_offset = file_segment->getDownloadOffset(); + bool can_start_from_cache = download_offset > file_offset_of_buffer_end; + + LOG_TEST(log, "Current download offset: {}, file offset of buffer end: {}", download_offset, file_offset_of_buffer_end); + + if (can_start_from_cache) + { + /// segment{k} + /// cache: [______|___________ + /// ^ + /// download_offset + /// requested_range: [__________] + /// ^ + /// file_offset_of_buffer_end + + read_type = ReadType::CACHED; + file_segment->resetDownloader(); + return getCacheReadBuffer(range.left); + } + + if (download_offset < file_offset_of_buffer_end) + { + /// segment{1} + /// cache: [_____|___________ + /// ^ + /// download_offset + /// requested_range: [__________] + /// ^ + /// file_offset_of_buffer_end + + assert(file_offset_of_buffer_end > file_segment->getDownloadOffset()); + bytes_to_predownload = file_offset_of_buffer_end - file_segment->getDownloadOffset(); + } + + download_offset = file_segment->getDownloadOffset(); + can_start_from_cache = download_offset > file_offset_of_buffer_end; + assert(!can_start_from_cache); + + read_type = ReadType::REMOTE_FS_READ_AND_PUT_IN_CACHE; + return getRemoteFSReadBuffer(file_segment, read_type); + } + + download_state = file_segment->state(); + continue; + } + case FileSegment::State::PARTIALLY_DOWNLOADED_NO_CONTINUATION: + { + size_t download_offset = file_segment->getDownloadOffset(); + bool can_start_from_cache = download_offset > file_offset_of_buffer_end; + + if (can_start_from_cache) + { + read_type = ReadType::CACHED; + return getCacheReadBuffer(range.left); + } + else + { + read_type = ReadType::REMOTE_FS_READ_BYPASS_CACHE; + return getRemoteFSReadBuffer(file_segment, read_type); + } + } + } + } +} + +SeekableReadBufferPtr CachedReadBufferFromRemoteFS::getImplementationBuffer(FileSegmentPtr & file_segment) +{ + assert(!file_segment->isDownloader()); + assert(file_offset_of_buffer_end >= file_segment->range().left); + + auto range = file_segment->range(); + bytes_to_predownload = 0; + + auto read_buffer_for_file_segment = getReadBufferForFileSegment(file_segment); + + [[maybe_unused]] auto download_current_segment = read_type == ReadType::REMOTE_FS_READ_AND_PUT_IN_CACHE; + assert(download_current_segment == file_segment->isDownloader()); + + assert(file_segment->range() == range); + assert(file_offset_of_buffer_end >= range.left && file_offset_of_buffer_end <= range.right); + + LOG_TEST(log, "Current file segment: {}, read type: {}, current file offset: {}", + range.toString(), toString(read_type), file_offset_of_buffer_end); + + read_buffer_for_file_segment->setReadUntilPosition(range.right + 1); /// [..., range.right] + + switch (read_type) + { + case ReadType::CACHED: + { + size_t seek_offset = file_offset_of_buffer_end - range.left; + read_buffer_for_file_segment->seek(seek_offset, SEEK_SET); + + auto * file_reader = assert_cast(read_buffer_for_file_segment.get()); + size_t file_size = file_reader->size(); + auto state = file_segment->state(); + + LOG_TEST(log, "Cache file: {}. Cached seek to: {}, file size: {}, file segment state: {}, download offset: {}", + file_reader->getFileName(), seek_offset, file_size, state, file_segment->getDownloadOffset()); + + assert(file_size > 0); + break; + } + case ReadType::REMOTE_FS_READ_BYPASS_CACHE: + { + read_buffer_for_file_segment->seek(file_offset_of_buffer_end, SEEK_SET); + break; + } + case ReadType::REMOTE_FS_READ_AND_PUT_IN_CACHE: + { + assert(file_segment->isDownloader()); + + if (bytes_to_predownload) + { + size_t download_offset = file_segment->getDownloadOffset(); + read_buffer_for_file_segment->seek(download_offset, SEEK_SET); + } + else + { + read_buffer_for_file_segment->seek(file_offset_of_buffer_end, SEEK_SET); + } + + auto impl_range = read_buffer_for_file_segment->getRemainingReadRange(); + auto download_offset = file_segment->getDownloadOffset(); + if (download_offset != static_cast(read_buffer_for_file_segment->getPosition())) + throw Exception( + ErrorCodes::LOGICAL_ERROR, + "Buffer's offsets mismatch; cached buffer offset: {}, download_offset: {}, position: {}, implementation buffer offset: {}, " + "implementation buffer reading until: {}, file segment info: {}", + file_offset_of_buffer_end, download_offset, read_buffer_for_file_segment->getPosition(), + impl_range.left, *impl_range.right, file_segment->getInfoForLog()); + + break; + } + } + + return read_buffer_for_file_segment; +} + +bool CachedReadBufferFromRemoteFS::completeFileSegmentAndGetNext() +{ + LOG_TEST(log, "Completed segment: {}", (*current_file_segment_it)->range().toString()); + + auto file_segment_it = current_file_segment_it++; + auto & file_segment = *file_segment_it; + + [[maybe_unused]] const auto & range = file_segment->range(); + assert(file_offset_of_buffer_end > range.right); + + LOG_TEST(log, "Removing file segment: {}, downloader: {}, state: {}", + file_segment->range().toString(), file_segment->getDownloader(), file_segment->state()); + + /// Do not hold pointer to file segment if it is not needed anymore + /// so can become releasable and can be evicted from cache. + file_segments_holder->file_segments.erase(file_segment_it); + + if (current_file_segment_it == file_segments_holder->file_segments.end()) + return false; + + implementation_buffer = getImplementationBuffer(*current_file_segment_it); + + LOG_TEST(log, "New segment: {}", (*current_file_segment_it)->range().toString()); + return true; +} + +void CachedReadBufferFromRemoteFS::predownload(FileSegmentPtr & file_segment) +{ + if (bytes_to_predownload) + { + /// Consider this case. Some user needed segment [a, b] and downloaded it partially. + /// But before he called complete(state) or his holder called complete(), + /// some other user, who needed segment [a', b'], a < a' < b', started waiting on [a, b] to be + /// downloaded because it intersects with the range he needs. + /// But then first downloader fails and second must continue. In this case we need to + /// download from offset a'' < a', but return buffer from offset a'. + LOG_TEST(log, "Bytes to predownload: {}, caller_id: {}", bytes_to_predownload, FileSegment::getCallerId()); + + assert(implementation_buffer->getFileOffsetOfBufferEnd() == file_segment->getDownloadOffset()); + + while (true) + { + if (!bytes_to_predownload || implementation_buffer->eof()) + { + if (bytes_to_predownload) + throw Exception( + ErrorCodes::LOGICAL_ERROR, + "Failed to predownload remaining {} bytes. Current file segment: {}, current download offset: {}, expected: {}, eof: {}", + file_segment->range().toString(), file_segment->getDownloadOffset(), file_offset_of_buffer_end, implementation_buffer->eof()); + + auto result = implementation_buffer->hasPendingData(); + + if (result) + { + nextimpl_working_buffer_offset = implementation_buffer->offset(); + + auto download_offset = file_segment->getDownloadOffset(); + if (download_offset != static_cast(implementation_buffer->getPosition()) || download_offset != file_offset_of_buffer_end) + throw Exception( + ErrorCodes::LOGICAL_ERROR, + "Buffer's offsets mismatch after predownloading; download offset: {}, cached buffer offset: {}, implementation buffer offset: {}, " + "file segment info: {}", download_offset, file_offset_of_buffer_end, implementation_buffer->getPosition(), file_segment->getInfoForLog()); + } + + break; + } + + size_t current_predownload_size = std::min(implementation_buffer->buffer().size(), bytes_to_predownload); + + if (file_segment->reserve(current_predownload_size)) + { + LOG_TEST(log, "Left to predownload: {}, buffer size: {}", bytes_to_predownload, implementation_buffer->buffer().size()); + + file_segment->write(implementation_buffer->buffer().begin(), current_predownload_size); + + bytes_to_predownload -= current_predownload_size; + implementation_buffer->position() += current_predownload_size; + } + else + { + /// We were predownloading: + /// segment{1} + /// cache: [_____|___________ + /// ^ + /// download_offset + /// requested_range: [__________] + /// ^ + /// file_offset_of_buffer_end + /// But space reservation failed. + /// So get working and internal buffer from predownload buffer, get new download buffer, + /// return buffer back, seek to actual position. + /// We could reuse predownload buffer and just seek to needed position, but for now + /// seek is only allowed once for ReadBufferForS3 - before call to nextImpl. + /// TODO: allow seek more than once with seek avoiding. + + bytes_to_predownload = 0; + file_segment->complete(FileSegment::State::PARTIALLY_DOWNLOADED_NO_CONTINUATION); + + read_type = ReadType::REMOTE_FS_READ_BYPASS_CACHE; + + swap(*implementation_buffer); + working_buffer.resize(0); + position() = working_buffer.end(); + + implementation_buffer = getRemoteFSReadBuffer(file_segment, read_type); + + swap(*implementation_buffer); + + implementation_buffer->seek(file_offset_of_buffer_end, SEEK_SET); + + LOG_TEST( + log, "Predownload failed because of space limit. Will read from remote filesystem starting from offset: {}", + file_offset_of_buffer_end); + + break; + } + } + } +} + +bool CachedReadBufferFromRemoteFS::updateImplementationBufferIfNeeded() +{ + auto & file_segment = *current_file_segment_it; + auto current_read_range = file_segment->range(); + auto current_state = file_segment->state(); + + assert(current_read_range.left <= file_offset_of_buffer_end); + assert(!file_segment->isDownloader()); + + if (file_offset_of_buffer_end > current_read_range.right) + { + return completeFileSegmentAndGetNext(); + } + + if (read_type == ReadType::CACHED && current_state != FileSegment::State::DOWNLOADED) + { + /// If current read_type is ReadType::CACHED and file segment is not DOWNLOADED, + /// it means the following case, e.g. we started from CacheReadBuffer and continue with RemoteFSReadBuffer. + /// segment{k} + /// cache: [______|___________ + /// ^ + /// download_offset + /// requested_range: [__________] + /// ^ + /// file_offset_of_buffer_end + + auto download_offset = file_segment->getDownloadOffset(); + if (download_offset == file_offset_of_buffer_end) + { + /// TODO: makes sense to reuse local file reader if we return here with CACHED read type again? + implementation_buffer = getImplementationBuffer(*current_file_segment_it); + + return true; + } + else if (download_offset < file_offset_of_buffer_end) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Expected {} >= {} ({})", download_offset, file_offset_of_buffer_end, getInfoForLog()); + } + + if (read_type == ReadType::REMOTE_FS_READ_AND_PUT_IN_CACHE) + { + /** + * ReadType::REMOTE_FS_READ_AND_PUT_IN_CACHE means that on previous getImplementationBuffer() call + * current buffer successfully called file_segment->getOrSetDownloader() and became a downloader + * for this file segment. However, the downloader's term has a lifespan of 1 nextImpl() call, + * e.g. downloader reads buffer_size byte and calls completeBatchAndResetDownloader() and some other + * thread can become a downloader if it calls getOrSetDownloader() faster. + * + * So downloader is committed to download only buffer_size bytes and then is not a downloader anymore, + * because there is no guarantee on a higher level, that current buffer will not disappear without + * being destructed till the end of query or without finishing the read range, which he was supposed + * to read by marks range given to him. Therefore, each nextImpl() call, in case of + * READ_AND_PUT_IN_CACHE, starts with getOrSetDownloader(). + */ + implementation_buffer = getImplementationBuffer(*current_file_segment_it); + } + + return true; +} + +bool CachedReadBufferFromRemoteFS::nextImpl() +{ + try + { + return nextImplStep(); + } + catch (Exception & e) + { + e.addMessage("Cache info: {}", getInfoForLog()); + throw; + } +} + +bool CachedReadBufferFromRemoteFS::nextImplStep() +{ + if (IFileCache::shouldBypassCache()) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Using cache when not allowed"); + + if (!initialized) + initialize(file_offset_of_buffer_end, getTotalSizeToRead()); + + if (current_file_segment_it == file_segments_holder->file_segments.end()) + return false; + + SCOPE_EXIT({ + if (current_file_segment_it == file_segments_holder->file_segments.end()) + return; + + auto & file_segment = *current_file_segment_it; + + bool download_current_segment = read_type == ReadType::REMOTE_FS_READ_AND_PUT_IN_CACHE; + if (download_current_segment) + { + try + { + bool file_segment_already_completed = !file_segment->isDownloader(); + if (!file_segment_already_completed) + file_segment->completeBatchAndResetDownloader(); + } + catch (...) + { + tryLogCurrentException(__PRETTY_FUNCTION__); + } + } + + assert(!file_segment->isDownloader()); + }); + + bytes_to_predownload = 0; + + if (implementation_buffer) + { + bool can_read_further = updateImplementationBufferIfNeeded(); + if (!can_read_further) + return false; + } + else + { + implementation_buffer = getImplementationBuffer(*current_file_segment_it); + } + + assert(!internal_buffer.empty()); + swap(*implementation_buffer); + + auto & file_segment = *current_file_segment_it; + auto current_read_range = file_segment->range(); + + LOG_TEST(log, "Current segment: {}, downloader: {}, current count: {}, position: {}", + current_read_range.toString(), file_segment->getDownloader(), implementation_buffer->count(), implementation_buffer->getPosition()); + + assert(current_read_range.left <= file_offset_of_buffer_end); + assert(current_read_range.right >= file_offset_of_buffer_end); + + bool result = false; + size_t size = 0; + + size_t needed_to_predownload = bytes_to_predownload; + if (needed_to_predownload) + { + predownload(file_segment); + + result = implementation_buffer->hasPendingData(); + size = implementation_buffer->available(); + } + + auto download_current_segment = read_type == ReadType::REMOTE_FS_READ_AND_PUT_IN_CACHE; + if (download_current_segment != file_segment->isDownloader()) + throw Exception( + ErrorCodes::LOGICAL_ERROR, + "Incorrect segment state. Having read type: {}, Caller id: {}, downloader id: {}, file segment state: {}", + toString(read_type), file_segment->getCallerId(), file_segment->getDownloader(), file_segment->state()); + + if (!result) + { + result = implementation_buffer->next(); + size = implementation_buffer->buffer().size(); + } + + if (result) + { + if (download_current_segment) + { + assert(file_offset_of_buffer_end + size - 1 <= file_segment->range().right); + + if (file_segment->reserve(size)) + { + file_segment->write(needed_to_predownload ? implementation_buffer->position() : implementation_buffer->buffer().begin(), size); + } + else + { + download_current_segment = false; + file_segment->complete(FileSegment::State::PARTIALLY_DOWNLOADED_NO_CONTINUATION); + LOG_DEBUG(log, "No space left in cache, will continue without cache download"); + } + } + + switch (read_type) + { + case ReadType::CACHED: + { + ProfileEvents::increment(ProfileEvents::RemoteFSCacheReadBytes, size); + break; + } + case ReadType::REMOTE_FS_READ_BYPASS_CACHE: + { + ProfileEvents::increment(ProfileEvents::RemoteFSReadBytes, size); + break; + } + case ReadType::REMOTE_FS_READ_AND_PUT_IN_CACHE: + { + ProfileEvents::increment(ProfileEvents::RemoteFSReadBytes, size); + ProfileEvents::increment(ProfileEvents::RemoteFSCacheDownloadBytes, size); + break; + } + } + + if (std::next(current_file_segment_it) == file_segments_holder->file_segments.end()) + { + size_t remaining_size_to_read = std::min(current_read_range.right, read_until_position - 1) - file_offset_of_buffer_end + 1; + size = std::min(size, remaining_size_to_read); + implementation_buffer->buffer().resize(nextimpl_working_buffer_offset + size); + } + + file_offset_of_buffer_end += size; + } + + swap(*implementation_buffer); + + if (download_current_segment) + file_segment->completeBatchAndResetDownloader(); + + assert(!file_segment->isDownloader()); + + LOG_TEST(log, + "Key: {}. Returning with {} bytes, buffer position: {} (offset: {}, predownloaded: {}), " + "buffer available: {}, current range: {}, current offset: {}, file segment state: {}, download offset: {}, read_type: {}, " + "reading until position: {}, started with offset: {}, remaining ranges: {}", + getHexUIntLowercase(cache_key), working_buffer.size(), getPosition(), offset(), needed_to_predownload, + available(), current_read_range.toString(), + file_offset_of_buffer_end, FileSegment::stateToString(file_segment->state()), file_segment->getDownloadOffset(), toString(read_type), + read_until_position, first_offset, file_segments_holder->toString()); + + if (size == 0 && file_offset_of_buffer_end < read_until_position) + throw Exception(ErrorCodes::LOGICAL_ERROR, + "Having zero bytes, but range is not finished: file offset: {}, reading until: {}", + file_offset_of_buffer_end, read_until_position); + return result; +} + +off_t CachedReadBufferFromRemoteFS::seek(off_t offset, int whence) +{ + if (initialized) + throw Exception(ErrorCodes::CANNOT_SEEK_THROUGH_FILE, + "Seek is allowed only before first read attempt from the buffer"); + + if (whence != SEEK_SET) + throw Exception(ErrorCodes::CANNOT_SEEK_THROUGH_FILE, "Only SEEK_SET allowed"); + + first_offset = offset; + file_offset_of_buffer_end = offset; + size_t size = getTotalSizeToRead(); + initialize(offset, size); + + return offset; +} + +size_t CachedReadBufferFromRemoteFS::getTotalSizeToRead() +{ + /// Last position should be guaranteed to be set, as at least we always know file size. + if (!read_until_position) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Last position was not set"); + + /// On this level should be guaranteed that read size is non-zero. + if (file_offset_of_buffer_end >= read_until_position) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Read boundaries mismatch. Expected {} < {}", + file_offset_of_buffer_end, read_until_position); + + return read_until_position - file_offset_of_buffer_end; +} + +void CachedReadBufferFromRemoteFS::setReadUntilPosition(size_t) +{ + throw Exception(ErrorCodes::LOGICAL_ERROR, "Method `setReadUntilPosition()` not allowed"); +} + +off_t CachedReadBufferFromRemoteFS::getPosition() +{ + return file_offset_of_buffer_end - available(); +} + +std::optional CachedReadBufferFromRemoteFS::getLastNonDownloadedOffset() const +{ + if (!file_segments_holder) + throw Exception(ErrorCodes::LOGICAL_ERROR, "File segments holder not initialized"); + + const auto & file_segments = file_segments_holder->file_segments; + for (auto it = file_segments.rbegin(); it != file_segments.rend(); ++it) + { + const auto & file_segment = *it; + if (file_segment->state() != FileSegment::State::DOWNLOADED) + return file_segment->range().right; + } + + return std::nullopt; +} + +String CachedReadBufferFromRemoteFS::getInfoForLog() +{ + return fmt::format("Buffer path: {}, hash key: {}, file_offset_of_buffer_end: {}, internal buffer remaining read range: {}, file segment info: {}", + remote_fs_object_path, getHexUIntLowercase(cache_key), file_offset_of_buffer_end, + (implementation_buffer ? + std::to_string(implementation_buffer->getRemainingReadRange().left) + '-' + (implementation_buffer->getRemainingReadRange().right ? std::to_string(*implementation_buffer->getRemainingReadRange().right) : "None") + : "None"), + (current_file_segment_it == file_segments_holder->file_segments.end() ? "None" : (*current_file_segment_it)->getInfoForLog())); +} + +} diff --git a/src/Disks/IO/CachedReadBufferFromRemoteFS.h b/src/Disks/IO/CachedReadBufferFromRemoteFS.h new file mode 100644 index 00000000000..3d03debcd01 --- /dev/null +++ b/src/Disks/IO/CachedReadBufferFromRemoteFS.h @@ -0,0 +1,103 @@ +#pragma once + +#include +#include +#include +#include +#include + +namespace DB +{ + +class CachedReadBufferFromRemoteFS : public SeekableReadBuffer +{ +public: + using RemoteFSFileReaderCreator = std::function; + + CachedReadBufferFromRemoteFS( + const String & remote_fs_object_path_, + FileCachePtr cache_, + RemoteFSFileReaderCreator remote_file_reader_creator_, + const ReadSettings & settings_, + size_t read_until_position_); + + bool nextImpl() override; + + off_t seek(off_t off, int whence) override; + + off_t getPosition() override; + + size_t getFileOffsetOfBufferEnd() const override { return file_offset_of_buffer_end; } + + String getInfoForLog() override; + + void setReadUntilPosition(size_t position) override; + +private: + void initialize(size_t offset, size_t size); + + SeekableReadBufferPtr getImplementationBuffer(FileSegmentPtr & file_segment); + + SeekableReadBufferPtr getReadBufferForFileSegment(FileSegmentPtr & file_segment); + + SeekableReadBufferPtr getCacheReadBuffer(size_t offset) const; + + std::optional getLastNonDownloadedOffset() const; + + bool updateImplementationBufferIfNeeded(); + + void predownload(FileSegmentPtr & file_segment); + + bool nextImplStep(); + + enum class ReadType + { + CACHED, + REMOTE_FS_READ_BYPASS_CACHE, + REMOTE_FS_READ_AND_PUT_IN_CACHE, + }; + + SeekableReadBufferPtr getRemoteFSReadBuffer(FileSegmentPtr & file_segment, ReadType read_type_); + + size_t getTotalSizeToRead(); + bool completeFileSegmentAndGetNext(); + + Poco::Logger * log; + IFileCache::Key cache_key; + String remote_fs_object_path; + FileCachePtr cache; + ReadSettings settings; + + size_t read_until_position; + size_t file_offset_of_buffer_end = 0; + size_t bytes_to_predownload = 0; + + RemoteFSFileReaderCreator remote_file_reader_creator; + + /// Remote read buffer, which can only be owned by current buffer. + FileSegment::RemoteFileReaderPtr remote_file_reader; + + std::optional file_segments_holder; + FileSegments::iterator current_file_segment_it; + + SeekableReadBufferPtr implementation_buffer; + bool initialized = false; + + ReadType read_type = ReadType::REMOTE_FS_READ_BYPASS_CACHE; + + static String toString(ReadType type) + { + switch (type) + { + case ReadType::CACHED: + return "CACHED"; + case ReadType::REMOTE_FS_READ_BYPASS_CACHE: + return "REMOTE_FS_READ_BYPASS_CACHE"; + case ReadType::REMOTE_FS_READ_AND_PUT_IN_CACHE: + return "REMOTE_FS_READ_AND_PUT_IN_CACHE"; + } + } + size_t first_offset = 0; +}; + +} diff --git a/src/Disks/IO/ReadBufferFromRemoteFSGather.cpp b/src/Disks/IO/ReadBufferFromRemoteFSGather.cpp index 574845642bf..8f91804bbbe 100644 --- a/src/Disks/IO/ReadBufferFromRemoteFSGather.cpp +++ b/src/Disks/IO/ReadBufferFromRemoteFSGather.cpp @@ -16,51 +16,79 @@ #include #endif +#include #include #include #include +#include namespace fs = std::filesystem; namespace DB { -#if USE_AWS_S3 -SeekableReadBufferPtr ReadBufferFromS3Gather::createImplementationBuffer(const String & path, size_t read_until_position_) const +namespace ErrorCodes { - return std::make_unique(client_ptr, bucket, - fs::path(metadata.remote_fs_root_path) / path, max_single_read_retries, settings, threadpool_read, read_until_position_); + extern const int LOGICAL_ERROR; +} + +#if USE_AWS_S3 +SeekableReadBufferPtr ReadBufferFromS3Gather::createImplementationBuffer(const String & path, size_t file_size) +{ + current_path = path; + + auto cache = settings.remote_fs_cache; + bool with_cache = cache && settings.remote_fs_enable_cache && !IFileCache::shouldBypassCache(); + + auto remote_file_reader_creator = [=, this]() + { + return std::make_unique( + client_ptr, bucket, fs::path(metadata.remote_fs_root_path) / path, max_single_read_retries, + settings, /* use_external_buffer */true, read_until_position, /* restricted_seek */true); + }; + + if (with_cache) + { + return std::make_shared( + path, cache, remote_file_reader_creator, settings, read_until_position ? read_until_position : file_size); + } + + return remote_file_reader_creator(); } #endif #if USE_AZURE_BLOB_STORAGE -SeekableReadBufferPtr ReadBufferFromAzureBlobStorageGather::createImplementationBuffer(const String & path, size_t read_until_position_) const +SeekableReadBufferPtr ReadBufferFromAzureBlobStorageGather::createImplementationBuffer(const String & path, size_t /* file_size */) { + current_path = path; return std::make_unique(blob_container_client, path, max_single_read_retries, - max_single_download_retries, settings.remote_fs_buffer_size, threadpool_read, read_until_position_); + max_single_download_retries, settings.remote_fs_buffer_size, /* use_external_buffer */true, read_until_position); } #endif -SeekableReadBufferPtr ReadBufferFromWebServerGather::createImplementationBuffer(const String & path, size_t read_until_position_) const +SeekableReadBufferPtr ReadBufferFromWebServerGather::createImplementationBuffer(const String & path, size_t /* file_size */) { - return std::make_unique(fs::path(uri) / path, context, settings, threadpool_read, read_until_position_); + current_path = path; + return std::make_unique(fs::path(uri) / path, context, settings, /* use_external_buffer */true, read_until_position); } #if USE_HDFS -SeekableReadBufferPtr ReadBufferFromHDFSGather::createImplementationBuffer(const String & path, size_t read_until_position_) const +SeekableReadBufferPtr ReadBufferFromHDFSGather::createImplementationBuffer(const String & path, size_t /* file_size */) { - return std::make_unique(hdfs_uri, fs::path(hdfs_directory) / path, config, buf_size, read_until_position_); + return std::make_unique(hdfs_uri, fs::path(hdfs_directory) / path, config, settings.remote_fs_buffer_size); } #endif -ReadBufferFromRemoteFSGather::ReadBufferFromRemoteFSGather(const RemoteMetadata & metadata_, const String & path_) +ReadBufferFromRemoteFSGather::ReadBufferFromRemoteFSGather(const RemoteMetadata & metadata_, const ReadSettings & settings_, const String & path_) : ReadBuffer(nullptr, 0) , metadata(metadata_) + , settings(settings_) , canonical_path(path_) + , log(&Poco::Logger::get("ReadBufferFromRemoteFSGather")) { } @@ -75,8 +103,8 @@ ReadBufferFromRemoteFSGather::ReadResult ReadBufferFromRemoteFSGather::readInto( file_offset_of_buffer_end = offset; bytes_to_ignore = ignore; - if (bytes_to_ignore) - assert(initialized()); + + assert(!bytes_to_ignore || initialized()); auto result = nextImpl(); @@ -100,11 +128,8 @@ void ReadBufferFromRemoteFSGather::initialize() /// Do not create a new buffer if we already have what we need. if (!current_buf || current_buf_idx != i) { - current_buf = createImplementationBuffer(file_path, read_until_position); current_buf_idx = i; - - if (auto * in = dynamic_cast(current_buf.get())) - in->setReadType(SeekableReadBufferWithSize::ReadType::DISK_READ); + current_buf = createImplementationBuffer(file_path, size); } current_buf->seek(current_buf_offset, SEEK_SET); @@ -133,22 +158,34 @@ bool ReadBufferFromRemoteFSGather::nextImpl() else return false; + if (!moveToNextBuffer()) + return false; + + return readImpl(); +} + + +bool ReadBufferFromRemoteFSGather::moveToNextBuffer() +{ /// If there is no available buffers - nothing to read. if (current_buf_idx + 1 >= metadata.remote_fs_objects.size()) return false; ++current_buf_idx; - const auto & current_path = metadata.remote_fs_objects[current_buf_idx].first; - current_buf = createImplementationBuffer(current_path, read_until_position); + const auto & [path, size] = metadata.remote_fs_objects[current_buf_idx]; + current_buf = createImplementationBuffer(path, size); - return readImpl(); + return true; } + bool ReadBufferFromRemoteFSGather::readImpl() { swap(*current_buf); + bool result = false; + /** * Lazy seek is performed here. * In asynchronous buffer when seeking to offset in range [pos, pos + min_bytes_for_seek] @@ -157,33 +194,50 @@ bool ReadBufferFromRemoteFSGather::readImpl() if (bytes_to_ignore) { current_buf->ignore(bytes_to_ignore); + result = current_buf->hasPendingData(); file_offset_of_buffer_end += bytes_to_ignore; bytes_to_ignore = 0; } - bool result = current_buf->hasPendingData(); - if (result) + if (!result) + result = current_buf->next(); + + if (metadata.remote_fs_objects.size() == 1) { - /// bytes_to_ignore already added. - file_offset_of_buffer_end += current_buf->available(); + file_offset_of_buffer_end = current_buf->getFileOffsetOfBufferEnd(); } else { - result = current_buf->next(); - if (result) - file_offset_of_buffer_end += current_buf->buffer().size(); + /// For log family engines there are multiple s3 files for the same clickhouse file + file_offset_of_buffer_end += current_buf->available(); } swap(*current_buf); + /// Required for non-async reads. + if (result) + { + assert(available()); + nextimpl_working_buffer_offset = offset(); + } + return result; } +size_t ReadBufferFromRemoteFSGather::getFileOffsetOfBufferEnd() const +{ + return file_offset_of_buffer_end; +} + + void ReadBufferFromRemoteFSGather::setReadUntilPosition(size_t position) { - read_until_position = position; - reset(); + if (position != read_until_position) + { + read_until_position = position; + reset(); + } } @@ -194,7 +248,7 @@ void ReadBufferFromRemoteFSGather::reset() String ReadBufferFromRemoteFSGather::getFileName() const { - return canonical_path; + return current_path; } @@ -206,4 +260,21 @@ size_t ReadBufferFromRemoteFSGather::getFileSize() const return size; } +String ReadBufferFromRemoteFSGather::getInfoForLog() +{ + if (!current_buf) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Cannot get info: buffer not initialized"); + + return current_buf->getInfoForLog(); +} + +size_t ReadBufferFromRemoteFSGather::getImplementationBufferOffset() const +{ + if (!current_buf) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Buffer not initialized"); + + return current_buf->getFileOffsetOfBufferEnd(); +} + + } diff --git a/src/Disks/IO/ReadBufferFromRemoteFSGather.h b/src/Disks/IO/ReadBufferFromRemoteFSGather.h index ddd651f47a1..25bfe0b7e16 100644 --- a/src/Disks/IO/ReadBufferFromRemoteFSGather.h +++ b/src/Disks/IO/ReadBufferFromRemoteFSGather.h @@ -9,13 +9,9 @@ #include #endif -namespace Aws -{ -namespace S3 -{ -class S3Client; -} -} +namespace Aws { namespace S3 { class S3Client; } } + +namespace Poco { class Logger; } namespace DB { @@ -29,7 +25,10 @@ class ReadBufferFromRemoteFSGather : public ReadBuffer friend class ReadIndirectBufferFromRemoteFS; public: - explicit ReadBufferFromRemoteFSGather(const RemoteMetadata & metadata_, const String & path_); + ReadBufferFromRemoteFSGather( + const RemoteMetadata & metadata_, + const ReadSettings & settings_, + const String & path_); String getFileName() const; @@ -47,15 +46,27 @@ public: size_t getFileSize() const; - size_t offset() const { return file_offset_of_buffer_end; } + size_t getFileOffsetOfBufferEnd() const; bool initialized() const { return current_buf != nullptr; } + String getInfoForLog(); + + size_t getImplementationBufferOffset() const; + protected: - virtual SeekableReadBufferPtr createImplementationBuffer(const String & path, size_t read_until_position) const = 0; + virtual SeekableReadBufferPtr createImplementationBuffer(const String & path, size_t file_size) = 0; RemoteMetadata metadata; + ReadSettings settings; + + bool use_external_buffer; + + size_t read_until_position = 0; + + String current_path; + private: bool nextImpl() override; @@ -63,6 +74,8 @@ private: bool readImpl(); + bool moveToNextBuffer(); + SeekableReadBufferPtr current_buf; size_t current_buf_idx = 0; @@ -76,9 +89,9 @@ private: */ size_t bytes_to_ignore = 0; - size_t read_until_position = 0; - String canonical_path; + + Poco::Logger * log; }; @@ -93,25 +106,20 @@ public: const String & bucket_, IDiskRemote::Metadata metadata_, size_t max_single_read_retries_, - const ReadSettings & settings_, - bool threadpool_read_ = false) - : ReadBufferFromRemoteFSGather(metadata_, path_) + const ReadSettings & settings_) + : ReadBufferFromRemoteFSGather(metadata_, settings_, path_) , client_ptr(std::move(client_ptr_)) , bucket(bucket_) , max_single_read_retries(max_single_read_retries_) - , settings(settings_) - , threadpool_read(threadpool_read_) { } - SeekableReadBufferPtr createImplementationBuffer(const String & path, size_t read_until_position) const override; + SeekableReadBufferPtr createImplementationBuffer(const String & path, size_t file_size) override; private: std::shared_ptr client_ptr; String bucket; UInt64 max_single_read_retries; - ReadSettings settings; - bool threadpool_read; }; #endif @@ -127,25 +135,20 @@ public: IDiskRemote::Metadata metadata_, size_t max_single_read_retries_, size_t max_single_download_retries_, - const ReadSettings & settings_, - bool threadpool_read_ = false) - : ReadBufferFromRemoteFSGather(metadata_, path_) + const ReadSettings & settings_) + : ReadBufferFromRemoteFSGather(metadata_, settings_, path_) , blob_container_client(blob_container_client_) , max_single_read_retries(max_single_read_retries_) , max_single_download_retries(max_single_download_retries_) - , settings(settings_) - , threadpool_read(threadpool_read_) { } - SeekableReadBufferPtr createImplementationBuffer(const String & path, size_t read_until_position) const override; + SeekableReadBufferPtr createImplementationBuffer(const String & path, size_t file_size) override; private: std::shared_ptr blob_container_client; size_t max_single_read_retries; size_t max_single_download_retries; - ReadSettings settings; - bool threadpool_read; }; #endif @@ -158,23 +161,18 @@ public: const String & uri_, RemoteMetadata metadata_, ContextPtr context_, - size_t threadpool_read_, const ReadSettings & settings_) - : ReadBufferFromRemoteFSGather(metadata_, path_) + : ReadBufferFromRemoteFSGather(metadata_, settings_, path_) , uri(uri_) , context(context_) - , threadpool_read(threadpool_read_) - , settings(settings_) { } - SeekableReadBufferPtr createImplementationBuffer(const String & path, size_t read_until_position) const override; + SeekableReadBufferPtr createImplementationBuffer(const String & path, size_t file_size) override; private: String uri; ContextPtr context; - bool threadpool_read; - ReadSettings settings; }; @@ -188,23 +186,21 @@ public: const Poco::Util::AbstractConfiguration & config_, const String & hdfs_uri_, IDiskRemote::Metadata metadata_, - size_t buf_size_) - : ReadBufferFromRemoteFSGather(metadata_, path_) + const ReadSettings & settings_) + : ReadBufferFromRemoteFSGather(metadata_, settings_, path_) , config(config_) - , buf_size(buf_size_) { const size_t begin_of_path = hdfs_uri_.find('/', hdfs_uri_.find("//") + 2); hdfs_directory = hdfs_uri_.substr(begin_of_path); hdfs_uri = hdfs_uri_.substr(0, begin_of_path); } - SeekableReadBufferPtr createImplementationBuffer(const String & path, size_t read_until_position) const override; + SeekableReadBufferPtr createImplementationBuffer(const String & path, size_t file_size) override; private: const Poco::Util::AbstractConfiguration & config; String hdfs_uri; String hdfs_directory; - size_t buf_size; }; #endif diff --git a/src/Disks/IO/ReadBufferFromWebServer.h b/src/Disks/IO/ReadBufferFromWebServer.h index 7285a94b0d8..ea746fb75a1 100644 --- a/src/Disks/IO/ReadBufferFromWebServer.h +++ b/src/Disks/IO/ReadBufferFromWebServer.h @@ -30,6 +30,8 @@ public: off_t getPosition() override; + size_t getFileOffsetOfBufferEnd() const override { return offset; } + private: std::unique_ptr initialize(); diff --git a/src/Disks/IO/ReadIndirectBufferFromRemoteFS.cpp b/src/Disks/IO/ReadIndirectBufferFromRemoteFS.cpp index cbf265ce741..699f8380cb8 100644 --- a/src/Disks/IO/ReadIndirectBufferFromRemoteFS.cpp +++ b/src/Disks/IO/ReadIndirectBufferFromRemoteFS.cpp @@ -13,7 +13,9 @@ namespace ErrorCodes ReadIndirectBufferFromRemoteFS::ReadIndirectBufferFromRemoteFS( - std::shared_ptr impl_) : impl(std::move(impl_)) + std::shared_ptr impl_) + : ReadBufferFromFileBase(DBMS_DEFAULT_BUFFER_SIZE, nullptr, 0) + , impl(impl_) { } @@ -30,6 +32,18 @@ String ReadIndirectBufferFromRemoteFS::getFileName() const } +void ReadIndirectBufferFromRemoteFS::setReadUntilPosition(size_t position) +{ + impl->setReadUntilPosition(position); +} + + +void ReadIndirectBufferFromRemoteFS::setReadUntilEnd() +{ + impl->setReadUntilPosition(impl->getFileSize()); +} + + off_t ReadIndirectBufferFromRemoteFS::seek(off_t offset_, int whence) { if (whence == SEEK_CUR) @@ -66,6 +80,7 @@ off_t ReadIndirectBufferFromRemoteFS::seek(off_t offset_, int whence) impl->reset(); resetWorkingBuffer(); + file_offset_of_buffer_end = impl->file_offset_of_buffer_end; return impl->file_offset_of_buffer_end; } @@ -74,11 +89,21 @@ bool ReadIndirectBufferFromRemoteFS::nextImpl() { /// Transfer current position and working_buffer to actual ReadBuffer swap(*impl); + + assert(!impl->hasPendingData()); /// Position and working_buffer will be updated in next() call auto result = impl->next(); /// and assigned to current buffer. swap(*impl); + if (result) + { + file_offset_of_buffer_end += available(); + BufferBase::set(working_buffer.begin() + offset(), available(), 0); + } + + assert(file_offset_of_buffer_end == impl->file_offset_of_buffer_end); + return result; } diff --git a/src/Disks/IO/ReadIndirectBufferFromRemoteFS.h b/src/Disks/IO/ReadIndirectBufferFromRemoteFS.h index 0c8b1b4dd21..a0669be411f 100644 --- a/src/Disks/IO/ReadIndirectBufferFromRemoteFS.h +++ b/src/Disks/IO/ReadIndirectBufferFromRemoteFS.h @@ -27,10 +27,16 @@ public: String getFileName() const override; + void setReadUntilPosition(size_t position) override; + + void setReadUntilEnd() override; + private: bool nextImpl() override; std::shared_ptr impl; + + size_t file_offset_of_buffer_end = 0; }; } diff --git a/src/IO/ThreadPoolReader.cpp b/src/Disks/IO/ThreadPoolReader.cpp similarity index 91% rename from src/IO/ThreadPoolReader.cpp rename to src/Disks/IO/ThreadPoolReader.cpp index 0c2791c6f68..e39f6057445 100644 --- a/src/IO/ThreadPoolReader.cpp +++ b/src/Disks/IO/ThreadPoolReader.cpp @@ -1,4 +1,4 @@ -#include +#include "ThreadPoolReader.h" #include #include #include @@ -6,6 +6,7 @@ #include #include #include +#include #include #include #include @@ -184,9 +185,26 @@ std::future ThreadPoolReader::submit(Request reques ProfileEvents::increment(ProfileEvents::ThreadPoolReaderPageCacheMiss); - auto task = std::make_shared>([request, fd] + ThreadGroupStatusPtr running_group = CurrentThread::isInitialized() && CurrentThread::get().getThreadGroup() + ? CurrentThread::get().getThreadGroup() + : MainThreadStatus::getInstance().getThreadGroup(); + + ContextPtr query_context; + if (CurrentThread::isInitialized()) + query_context = CurrentThread::get().getQueryContext(); + + auto task = std::make_shared>([request, fd, running_group, query_context] { + ThreadStatus thread_status; + + if (query_context) + thread_status.attachQueryContext(query_context); + + if (running_group) + thread_status.attachQuery(running_group); + setThreadName("ThreadPoolRead"); + Stopwatch watch(CLOCK_MONOTONIC); size_t bytes_read = 0; @@ -219,6 +237,9 @@ std::future ThreadPoolReader::submit(Request reques ProfileEvents::increment(ProfileEvents::ThreadPoolReaderPageCacheMissElapsedMicroseconds, watch.elapsedMicroseconds()); ProfileEvents::increment(ProfileEvents::DiskReadElapsedMicroseconds, watch.elapsedMicroseconds()); + if (running_group) + thread_status.detachQuery(); + return Result{ .size = bytes_read, .offset = request.ignore }; }); diff --git a/src/IO/ThreadPoolReader.h b/src/Disks/IO/ThreadPoolReader.h similarity index 100% rename from src/IO/ThreadPoolReader.h rename to src/Disks/IO/ThreadPoolReader.h diff --git a/src/Disks/IO/ThreadPoolRemoteFSReader.cpp b/src/Disks/IO/ThreadPoolRemoteFSReader.cpp index 4be55ff3ecf..bdb012a6376 100644 --- a/src/Disks/IO/ThreadPoolRemoteFSReader.cpp +++ b/src/Disks/IO/ThreadPoolRemoteFSReader.cpp @@ -6,6 +6,7 @@ #include #include #include +#include #include @@ -41,9 +42,28 @@ ThreadPoolRemoteFSReader::ThreadPoolRemoteFSReader(size_t pool_size, size_t queu std::future ThreadPoolRemoteFSReader::submit(Request request) { - auto task = std::make_shared>([request] + ThreadGroupStatusPtr running_group = CurrentThread::isInitialized() && CurrentThread::get().getThreadGroup() + ? CurrentThread::get().getThreadGroup() + : MainThreadStatus::getInstance().getThreadGroup(); + + ContextPtr query_context; + if (CurrentThread::isInitialized()) + query_context = CurrentThread::get().getQueryContext(); + + auto task = std::make_shared>([request, running_group, query_context] { + ThreadStatus thread_status; + + /// Save query context if any, because cache implementation needs it. + if (query_context) + thread_status.attachQueryContext(query_context); + + /// To be able to pass ProfileEvents. + if (running_group) + thread_status.attachQuery(running_group); + setThreadName("VFSRead"); + CurrentMetrics::Increment metric_increment{CurrentMetrics::Read}; auto * remote_fs_fd = assert_cast(request.descriptor.get()); @@ -54,6 +74,9 @@ std::future ThreadPoolRemoteFSReader::submit(Reques ProfileEvents::increment(ProfileEvents::RemoteFSReadMicroseconds, watch.elapsedMicroseconds()); ProfileEvents::increment(ProfileEvents::RemoteFSReadBytes, bytes_read); + if (running_group) + thread_status.detachQuery(); + return Result{ .size = bytes_read, .offset = offset }; }); diff --git a/src/IO/createReadBufferFromFileBase.cpp b/src/Disks/IO/createReadBufferFromFileBase.cpp similarity index 98% rename from src/IO/createReadBufferFromFileBase.cpp rename to src/Disks/IO/createReadBufferFromFileBase.cpp index b83bfdbf3a8..4ff492e4013 100644 --- a/src/IO/createReadBufferFromFileBase.cpp +++ b/src/Disks/IO/createReadBufferFromFileBase.cpp @@ -1,9 +1,9 @@ -#include +#include #include #include #include #include -#include +#include #include #include diff --git a/src/IO/createReadBufferFromFileBase.h b/src/Disks/IO/createReadBufferFromFileBase.h similarity index 100% rename from src/IO/createReadBufferFromFileBase.h rename to src/Disks/IO/createReadBufferFromFileBase.h diff --git a/src/Disks/LocalDirectorySyncGuard.h b/src/Disks/LocalDirectorySyncGuard.h index 34e4cb9e657..cb891461e85 100644 --- a/src/Disks/LocalDirectorySyncGuard.h +++ b/src/Disks/LocalDirectorySyncGuard.h @@ -17,8 +17,8 @@ class LocalDirectorySyncGuard final : public ISyncGuard public: /// NOTE: If you have already opened descriptor, it's preferred to use /// this constructor instead of constructor with path. - LocalDirectorySyncGuard(int fd_) : fd(fd_) {} - LocalDirectorySyncGuard(const String & full_path); + explicit LocalDirectorySyncGuard(int fd_) : fd(fd_) {} + explicit LocalDirectorySyncGuard(const String & full_path); ~LocalDirectorySyncGuard() override; private: diff --git a/src/Disks/RemoteDisksCommon.cpp b/src/Disks/RemoteDisksCommon.cpp index 1402e3f62c8..36f2aed3e7c 100644 --- a/src/Disks/RemoteDisksCommon.cpp +++ b/src/Disks/RemoteDisksCommon.cpp @@ -1,12 +1,13 @@ #include #include +#include +#include namespace DB { namespace ErrorCodes -{ - extern const int BAD_ARGUMENTS; +{extern const int BAD_ARGUMENTS; } std::shared_ptr wrapWithCache( @@ -26,6 +27,14 @@ std::shared_ptr wrapWithCache( return std::make_shared(disk, cache_disk, cache_file_predicate); } +static String getDiskMetadataPath( + const String & name, + const Poco::Util::AbstractConfiguration & config, + const String & config_prefix, + ContextPtr context) +{ + return config.getString(config_prefix + ".metadata_path", context->getPath() + "disks/" + name + "/"); +} std::pair prepareForLocalMetadata( const String & name, @@ -34,10 +43,40 @@ std::pair prepareForLocalMetadata( ContextPtr context) { /// where the metadata files are stored locally - auto metadata_path = config.getString(config_prefix + ".metadata_path", context->getPath() + "disks/" + name + "/"); + auto metadata_path = getDiskMetadataPath(name, config, config_prefix, context); fs::create_directories(metadata_path); auto metadata_disk = std::make_shared(name + "-metadata", metadata_path, 0); return std::make_pair(metadata_path, metadata_disk); } + +FileCachePtr getCachePtrForDisk( + const String & name, + const Poco::Util::AbstractConfiguration & config, + const String & config_prefix, + ContextPtr context) +{ + bool data_cache_enabled = config.getBool(config_prefix + ".data_cache_enabled", false); + if (!data_cache_enabled) + return nullptr; + + auto cache_base_path = config.getString(config_prefix + ".data_cache_path", fs::path(context->getPath()) / "disks" / name / "data_cache/"); + if (!fs::exists(cache_base_path)) + fs::create_directories(cache_base_path); + + LOG_INFO(&Poco::Logger::get("Disk(" + name + ")"), "Disk registered with cache path: {}", cache_base_path); + + auto metadata_path = getDiskMetadataPath(name, config, config_prefix, context); + if (metadata_path == cache_base_path) + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Metadata path and cache base path must be different: {}", metadata_path); + + size_t max_cache_size = config.getUInt64(config_prefix + ".data_cache_max_size", 1024*1024*1024); + size_t max_cache_elements = config.getUInt64(config_prefix + ".data_cache_max_elements", REMOTE_FS_OBJECTS_CACHE_DEFAULT_MAX_ELEMENTS); + size_t max_file_segment_size = config.getUInt64(config_prefix + ".max_file_segment_size", REMOTE_FS_OBJECTS_CACHE_DEFAULT_MAX_FILE_SEGMENT_SIZE); + + auto cache = FileCacheFactory::instance().getOrCreate(cache_base_path, max_cache_size, max_cache_elements, max_file_segment_size); + cache->initialize(); + return cache; +} + } diff --git a/src/Disks/RemoteDisksCommon.h b/src/Disks/RemoteDisksCommon.h index 0d057b44d18..661d4e293df 100644 --- a/src/Disks/RemoteDisksCommon.h +++ b/src/Disks/RemoteDisksCommon.h @@ -21,4 +21,10 @@ std::pair prepareForLocalMetadata( const String & config_prefix, ContextPtr context); +FileCachePtr getCachePtrForDisk( + const String & name, + const Poco::Util::AbstractConfiguration & config, + const String & config_prefix, + ContextPtr context); + } diff --git a/src/Disks/S3/DiskS3.cpp b/src/Disks/S3/DiskS3.cpp index aff4985a4f1..de63f3ed82f 100644 --- a/src/Disks/S3/DiskS3.cpp +++ b/src/Disks/S3/DiskS3.cpp @@ -153,10 +153,11 @@ DiskS3::DiskS3( String bucket_, String s3_root_path_, DiskPtr metadata_disk_, + FileCachePtr cache_, ContextPtr context_, SettingsPtr settings_, GetDiskSettings settings_getter_) - : IDiskRemote(name_, s3_root_path_, metadata_disk_, "DiskS3", settings_->thread_pool_size) + : IDiskRemote(name_, s3_root_path_, metadata_disk_, std::move(cache_), "DiskS3", settings_->thread_pool_size) , bucket(std::move(bucket_)) , current_settings(std::move(settings_)) , settings_getter(settings_getter_) @@ -223,17 +224,18 @@ std::unique_ptr DiskS3::readFile(const String & path, co LOG_TEST(log, "Read from file by path: {}. Existing S3 objects: {}", backQuote(metadata_disk->getPath() + path), metadata.remote_fs_objects.size()); - bool threadpool_read = read_settings.remote_fs_method == RemoteFSReadMethod::threadpool; + ReadSettings disk_read_settings{read_settings}; + if (cache) + disk_read_settings.remote_fs_cache = cache; auto s3_impl = std::make_unique( - path, - settings->client, bucket, metadata, - settings->s3_max_single_read_retries, read_settings, threadpool_read); + path, settings->client, bucket, metadata, + settings->s3_max_single_read_retries, disk_read_settings); - if (threadpool_read) + if (read_settings.remote_fs_method == RemoteFSReadMethod::threadpool) { auto reader = getThreadPoolReader(); - return std::make_unique(reader, read_settings, std::move(s3_impl)); + return std::make_unique(reader, disk_read_settings, std::move(s3_impl)); } else { @@ -286,7 +288,7 @@ std::unique_ptr DiskS3::writeFile(const String & path, settings->s3_upload_part_size_multiply_parts_count_threshold, settings->s3_max_single_part_upload_size, std::move(object_metadata), - buf_size /*, std::move(schedule) */); + buf_size, std::move(schedule)); auto create_metadata_callback = [this, path, blob_name, mode] (size_t count) { diff --git a/src/Disks/S3/DiskS3.h b/src/Disks/S3/DiskS3.h index 698fa6173c2..2de1600d906 100644 --- a/src/Disks/S3/DiskS3.h +++ b/src/Disks/S3/DiskS3.h @@ -17,6 +17,7 @@ #include #include #include +#include namespace DB @@ -73,6 +74,7 @@ public: String bucket_, String s3_root_path_, DiskPtr metadata_disk_, + FileCachePtr cache_, ContextPtr context_, SettingsPtr settings_, GetDiskSettings settings_getter_); diff --git a/src/Disks/S3/registerDiskS3.cpp b/src/Disks/S3/registerDiskS3.cpp index 9b2e7137d53..2b5fe3c5a81 100644 --- a/src/Disks/S3/registerDiskS3.cpp +++ b/src/Disks/S3/registerDiskS3.cpp @@ -19,6 +19,7 @@ #include "Disks/DiskRestartProxy.h" #include "Disks/DiskLocal.h" #include "Disks/RemoteDisksCommon.h" +#include namespace DB { @@ -178,18 +179,21 @@ void registerDiskS3(DiskFactory & factory) S3::URI uri(Poco::URI(config.getString(config_prefix + ".endpoint"))); if (uri.key.empty()) - throw Exception("Empty S3 path specified in disk configuration", ErrorCodes::BAD_ARGUMENTS); + throw Exception(ErrorCodes::BAD_ARGUMENTS, "No key in S3 uri: {}", uri.uri.toString()); if (uri.key.back() != '/') - throw Exception("S3 path must ends with '/', but '" + uri.key + "' doesn't.", ErrorCodes::BAD_ARGUMENTS); + throw Exception(ErrorCodes::BAD_ARGUMENTS, "S3 path must ends with '/', but '{}' doesn't.", uri.key); auto [metadata_path, metadata_disk] = prepareForLocalMetadata(name, config, config_prefix, context); + FileCachePtr cache = getCachePtrForDisk(name, config, config_prefix, context); + std::shared_ptr s3disk = std::make_shared( name, uri.bucket, uri.key, metadata_disk, + std::move(cache), context, getSettings(config, config_prefix, context), getSettings); diff --git a/src/Disks/TemporaryFileOnDisk.h b/src/Disks/TemporaryFileOnDisk.h index c854a600146..b82cb7d2254 100644 --- a/src/Disks/TemporaryFileOnDisk.h +++ b/src/Disks/TemporaryFileOnDisk.h @@ -15,7 +15,7 @@ using DiskPtr = std::shared_ptr; class TemporaryFileOnDisk { public: - TemporaryFileOnDisk(const DiskPtr & disk_, const String & prefix_ = "tmp"); + explicit TemporaryFileOnDisk(const DiskPtr & disk_, const String & prefix_ = "tmp"); ~TemporaryFileOnDisk(); DiskPtr getDisk() const { return disk; } diff --git a/src/Disks/tests/gtest_disk_encrypted.cpp b/src/Disks/tests/gtest_disk_encrypted.cpp index d03128a6b33..fd3cc1acbe5 100644 --- a/src/Disks/tests/gtest_disk_encrypted.cpp +++ b/src/Disks/tests/gtest_disk_encrypted.cpp @@ -6,7 +6,7 @@ #include #include #include -#include +#include #include #include diff --git a/src/Formats/CMakeLists.txt b/src/Formats/CMakeLists.txt index 6e6aa6d4553..44883c271f4 100644 --- a/src/Formats/CMakeLists.txt +++ b/src/Formats/CMakeLists.txt @@ -1,21 +1,2 @@ -if (TARGET ch_contrib::avrocpp) - set(USE_AVRO 1) -endif() -if (TARGET ch_contrib::parquet) - set(USE_PARQUET 1) - set(USE_ARROW 1) - set(USE_ORC 1) -endif() -if (TARGET ch_contrib::snappy) - set(USE_SNAPPY 1) -endif() -if (TARGET ch_contrib::protobuf) - set(USE_PROTOBUF 1) -endif() -if (TARGET ch_contrib::msgpack) - set(USE_MSGPACK 1) -endif() -if (TARGET ch_contrib::capnp) - set(USE_CAPNP 1) -endif() +include(configure_config.cmake) configure_file(config_formats.h.in ${ConfigIncludePath}/config_formats.h) diff --git a/src/Formats/CapnProtoUtils.h b/src/Formats/CapnProtoUtils.h index 51c152de17f..47fe3ada7cd 100644 --- a/src/Formats/CapnProtoUtils.h +++ b/src/Formats/CapnProtoUtils.h @@ -18,14 +18,14 @@ struct DestructorCatcher { T impl; template - DestructorCatcher(Arg && ... args) : impl(kj::fwd(args)...) {} + explicit DestructorCatcher(Arg && ... args) : impl(kj::fwd(args)...) {} ~DestructorCatcher() noexcept try { } catch (...) { return; } }; class CapnProtoSchemaParser : public DestructorCatcher { public: - CapnProtoSchemaParser() {} + CapnProtoSchemaParser() = default; capnp::StructSchema getMessageSchema(const FormatSchemaInfo & schema_info); }; diff --git a/src/Formats/EscapingRuleUtils.cpp b/src/Formats/EscapingRuleUtils.cpp index 0a7747fc864..b0ea10abdb6 100644 --- a/src/Formats/EscapingRuleUtils.cpp +++ b/src/Formats/EscapingRuleUtils.cpp @@ -262,7 +262,7 @@ static bool evaluateConstantExpressionFromString(const StringRef & field, DataTy /// FIXME: Our parser cannot parse maps in the form of '{key : value}' that is used in text formats. bool parsed = parser.parse(token_iterator, ast, expected); - if (!parsed) + if (!parsed || !token_iterator->isEnd()) return false; try diff --git a/src/Formats/FormatFactory.cpp b/src/Formats/FormatFactory.cpp index be565a532bb..08554cf7e07 100644 --- a/src/Formats/FormatFactory.cpp +++ b/src/Formats/FormatFactory.cpp @@ -89,6 +89,7 @@ FormatSettings getFormatSettings(ContextPtr context, const Settings & settings) format_settings.json.quote_64bit_integers = settings.output_format_json_quote_64bit_integers; format_settings.json.quote_denormals = settings.output_format_json_quote_denormals; format_settings.null_as_default = settings.input_format_null_as_default; + format_settings.use_lowercase_column_name = settings.input_format_use_lowercase_column_name; format_settings.decimal_trailing_zeros = settings.output_format_decimal_trailing_zeros; format_settings.parquet.row_group_size = settings.output_format_parquet_row_group_size; format_settings.parquet.import_nested = settings.input_format_parquet_import_nested; @@ -278,9 +279,10 @@ OutputFormatPtr FormatFactory::getOutputFormatParallelIfPossible( if (settings.output_format_parallel_formatting && getCreators(name).supports_parallel_formatting && !settings.output_format_json_array_of_rows) { - auto formatter_creator = [output_getter, sample, callback, format_settings] - (WriteBuffer & output) -> OutputFormatPtr - { return output_getter(output, sample, {std::move(callback)}, format_settings);}; + auto formatter_creator = [output_getter, sample, callback, format_settings] (WriteBuffer & output) -> OutputFormatPtr + { + return output_getter(output, sample, {callback}, format_settings); + }; ParallelFormattingOutputFormat::Params builder{buf, sample, formatter_creator, settings.max_threads}; diff --git a/src/Formats/FormatSettings.h b/src/Formats/FormatSettings.h index 265c879e768..4881c1a43c8 100644 --- a/src/Formats/FormatSettings.h +++ b/src/Formats/FormatSettings.h @@ -32,14 +32,16 @@ struct FormatSettings bool null_as_default = true; bool decimal_trailing_zeros = false; bool defaults_for_omitted_fields = true; + bool use_lowercase_column_name = false; bool seekable_read = true; UInt64 max_rows_to_read_for_schema_inference = 100; enum class DateTimeInputFormat { - Basic, /// Default format for fast parsing: YYYY-MM-DD hh:mm:ss (ISO-8601 without fractional part and timezone) or NNNNNNNNNN unix timestamp. - BestEffort /// Use sophisticated rules to parse whatever possible. + Basic, /// Default format for fast parsing: YYYY-MM-DD hh:mm:ss (ISO-8601 without fractional part and timezone) or NNNNNNNNNN unix timestamp. + BestEffort, /// Use sophisticated rules to parse whatever possible. + BestEffortUS /// Use sophisticated rules to parse American style: mm/dd/yyyy }; DateTimeInputFormat date_time_input_format = DateTimeInputFormat::Basic; diff --git a/src/Formats/JSONEachRowUtils.h b/src/Formats/JSONEachRowUtils.h index 6f71baa8b40..8d304e2ffd8 100644 --- a/src/Formats/JSONEachRowUtils.h +++ b/src/Formats/JSONEachRowUtils.h @@ -1,5 +1,7 @@ #pragma once +#include +#include #include #include #include diff --git a/src/Formats/MarkInCompressedFile.h b/src/Formats/MarkInCompressedFile.h index ceefde43615..1cd545e1a03 100644 --- a/src/Formats/MarkInCompressedFile.h +++ b/src/Formats/MarkInCompressedFile.h @@ -33,7 +33,7 @@ struct MarkInCompressedFile return "(" + DB::toString(offset_in_compressed_file) + "," + DB::toString(offset_in_decompressed_block) + ")"; } - String toStringWithRows(size_t rows_num) + String toStringWithRows(size_t rows_num) const { return "(" + DB::toString(offset_in_compressed_file) + "," + DB::toString(offset_in_decompressed_block) + "," + DB::toString(rows_num) + ")"; } @@ -43,7 +43,7 @@ struct MarkInCompressedFile class MarksInCompressedFile : public PODArray { public: - MarksInCompressedFile(size_t n) : PODArray(n) {} + explicit MarksInCompressedFile(size_t n) : PODArray(n) {} void read(ReadBuffer & buffer, size_t from, size_t count) { diff --git a/src/Formats/MsgPackExtensionTypes.h b/src/Formats/MsgPackExtensionTypes.h index 139d2f9047b..2f7d28eb5bf 100644 --- a/src/Formats/MsgPackExtensionTypes.h +++ b/src/Formats/MsgPackExtensionTypes.h @@ -5,7 +5,7 @@ namespace DB enum class MsgPackExtensionTypes { - UUID = 0x02, + UUIDType = 0x02, }; } diff --git a/src/Formats/ParsedTemplateFormatString.h b/src/Formats/ParsedTemplateFormatString.h index c5617d0f0ef..5d7ee820f2f 100644 --- a/src/Formats/ParsedTemplateFormatString.h +++ b/src/Formats/ParsedTemplateFormatString.h @@ -28,7 +28,7 @@ struct ParsedTemplateFormatString /// For diagnostic info Strings column_names; - typedef std::function(const String &)> ColumnIdxGetter; + using ColumnIdxGetter = std::function(const String &)>; ParsedTemplateFormatString() = default; ParsedTemplateFormatString(const FormatSchemaInfo & schema, const ColumnIdxGetter & idx_by_name, bool allow_indexes = true); diff --git a/src/Formats/ProtobufReader.h b/src/Formats/ProtobufReader.h index 0df139eeacd..2e2a71a7d11 100644 --- a/src/Formats/ProtobufReader.h +++ b/src/Formats/ProtobufReader.h @@ -16,7 +16,7 @@ class ReadBuffer; class ProtobufReader { public: - ProtobufReader(ReadBuffer & in_); + explicit ProtobufReader(ReadBuffer & in_); void startMessage(bool with_length_delimiter_); void endMessage(bool ignore_errors); diff --git a/src/Formats/ProtobufWriter.h b/src/Formats/ProtobufWriter.h index c564db110cc..1dcc8f4ef7c 100644 --- a/src/Formats/ProtobufWriter.h +++ b/src/Formats/ProtobufWriter.h @@ -16,7 +16,7 @@ class WriteBuffer; class ProtobufWriter { public: - ProtobufWriter(WriteBuffer & out_); + explicit ProtobufWriter(WriteBuffer & out_); ~ProtobufWriter(); void startMessage(); diff --git a/src/Formats/RowInputMissingColumnsFiller.h b/src/Formats/RowInputMissingColumnsFiller.h index 0eaefd4e814..9785d8bed62 100644 --- a/src/Formats/RowInputMissingColumnsFiller.h +++ b/src/Formats/RowInputMissingColumnsFiller.h @@ -14,7 +14,7 @@ class RowInputMissingColumnsFiller { public: /// Makes a column filler which checks nested structures while adding default values to columns. - RowInputMissingColumnsFiller(const NamesAndTypesList & names_and_types); + explicit RowInputMissingColumnsFiller(const NamesAndTypesList & names_and_types); RowInputMissingColumnsFiller(const Names & names, const DataTypes & types); RowInputMissingColumnsFiller(size_t count, const std::string_view * names, const DataTypePtr * types); diff --git a/src/Formats/configure_config.cmake b/src/Formats/configure_config.cmake new file mode 100644 index 00000000000..3a11f3c6448 --- /dev/null +++ b/src/Formats/configure_config.cmake @@ -0,0 +1,20 @@ +if (TARGET ch_contrib::avrocpp) + set(USE_AVRO 1) +endif() +if (TARGET ch_contrib::parquet) + set(USE_PARQUET 1) + set(USE_ARROW 1) + set(USE_ORC 1) +endif() +if (TARGET ch_contrib::snappy) + set(USE_SNAPPY 1) +endif() +if (TARGET ch_contrib::protobuf) + set(USE_PROTOBUF 1) +endif() +if (TARGET ch_contrib::msgpack) + set(USE_MSGPACK 1) +endif() +if (TARGET ch_contrib::capnp) + set(USE_CAPNP 1) +endif() diff --git a/src/Functions/CountSubstringsImpl.h b/src/Functions/CountSubstringsImpl.h index 6668ca0a392..fc6e4a0e671 100644 --- a/src/Functions/CountSubstringsImpl.h +++ b/src/Functions/CountSubstringsImpl.h @@ -83,7 +83,7 @@ struct CountSubstringsImpl { res = 0; - if (needle.size() == 0) + if (needle.empty()) return; auto start = std::max(start_pos, UInt64(1)); diff --git a/src/Functions/DivisionUtils.h b/src/Functions/DivisionUtils.h index 2e601888ecc..c246f7fd31a 100644 --- a/src/Functions/DivisionUtils.h +++ b/src/Functions/DivisionUtils.h @@ -6,6 +6,7 @@ #include #include +#include "config_core.h" #include diff --git a/src/Functions/DummyJSONParser.h b/src/Functions/DummyJSONParser.h index c14aacece86..77b958d1429 100644 --- a/src/Functions/DummyJSONParser.h +++ b/src/Functions/DummyJSONParser.h @@ -2,6 +2,8 @@ #include #include +#include + namespace DB { @@ -22,25 +24,25 @@ struct DummyJSONParser class Element { public: - Element() {} - bool isInt64() const { return false; } - bool isUInt64() const { return false; } - bool isDouble() const { return false; } - bool isString() const { return false; } - bool isArray() const { return false; } - bool isObject() const { return false; } - bool isBool() const { return false; } - bool isNull() const { return false; } + Element() = default; + static bool isInt64() { return false; } + static bool isUInt64() { return false; } + static bool isDouble() { return false; } + static bool isString() { return false; } + static bool isArray() { return false; } + static bool isObject() { return false; } + static bool isBool() { return false; } + static bool isNull() { return false; } - Int64 getInt64() const { return 0; } - UInt64 getUInt64() const { return 0; } - double getDouble() const { return 0; } - bool getBool() const { return false; } - std::string_view getString() const { return {}; } - Array getArray() const { return {}; } - Object getObject() const { return {}; } + static Int64 getInt64() { return 0; } + static UInt64 getUInt64() { return 0; } + static double getDouble() { return 0; } + static bool getBool() { return false; } + static std::string_view getString() { return {}; } + static Array getArray() { return {}; } + static Object getObject() { return {}; } - Element getElement() { return {}; } + static Element getElement() { return {}; } }; /// References an array in a JSON document. @@ -52,14 +54,14 @@ struct DummyJSONParser public: Element operator*() const { return {}; } Iterator & operator++() { return *this; } - Iterator operator++(int) { return *this; } + Iterator operator++(int) { return *this; } /// NOLINT friend bool operator==(const Iterator &, const Iterator &) { return true; } friend bool operator!=(const Iterator &, const Iterator &) { return false; } }; - Iterator begin() const { return {}; } - Iterator end() const { return {}; } - size_t size() const { return 0; } + static Iterator begin() { return {}; } + static Iterator end() { return {}; } + static size_t size() { return 0; } Element operator[](size_t) const { return {}; } }; @@ -74,15 +76,15 @@ struct DummyJSONParser public: KeyValuePair operator*() const { return {}; } Iterator & operator++() { return *this; } - Iterator operator++(int) { return *this; } + Iterator operator++(int) { return *this; } /// NOLINT friend bool operator==(const Iterator &, const Iterator &) { return true; } friend bool operator!=(const Iterator &, const Iterator &) { return false; } }; - Iterator begin() const { return {}; } - Iterator end() const { return {}; } - size_t size() const { return 0; } - bool find(const std::string_view &, Element &) const { return false; } + static Iterator begin() { return {}; } + static Iterator end() { return {}; } + static size_t size() { return 0; } + bool find(const std::string_view &, Element &) const { return false; } /// NOLINT #if 0 /// Optional: Provides access to an object's element by index. @@ -91,7 +93,7 @@ struct DummyJSONParser }; /// Parses a JSON document, returns the reference to its root element if succeeded. - bool parse(const std::string_view &, Element &) { throw Exception{"Functions JSON* are not supported", ErrorCodes::NOT_IMPLEMENTED}; } + bool parse(const std::string_view &, Element &) { throw Exception{"Functions JSON* are not supported", ErrorCodes::NOT_IMPLEMENTED}; } /// NOLINT #if 0 /// Optional: Allocates memory to parse JSON documents faster. diff --git a/src/Functions/EmptyImpl.h b/src/Functions/EmptyImpl.h index 60daa66ea03..6f5c4f7a7dc 100644 --- a/src/Functions/EmptyImpl.h +++ b/src/Functions/EmptyImpl.h @@ -2,6 +2,7 @@ #include #include +#include #include diff --git a/src/Functions/FunctionBitTestMany.h b/src/Functions/FunctionBitTestMany.h index 808c3711631..e49af4c166f 100644 --- a/src/Functions/FunctionBitTestMany.h +++ b/src/Functions/FunctionBitTestMany.h @@ -5,6 +5,7 @@ #include #include #include +#include #include diff --git a/src/Functions/FunctionCustomWeekToSomething.h b/src/Functions/FunctionCustomWeekToSomething.h index 542062151ce..6ed751fd889 100644 --- a/src/Functions/FunctionCustomWeekToSomething.h +++ b/src/Functions/FunctionCustomWeekToSomething.h @@ -7,6 +7,7 @@ #include #include #include +#include namespace DB diff --git a/src/Functions/FunctionSQLJSON.h b/src/Functions/FunctionSQLJSON.h index d860da62b9d..56d29e0c776 100644 --- a/src/Functions/FunctionSQLJSON.h +++ b/src/Functions/FunctionSQLJSON.h @@ -242,7 +242,7 @@ public: GeneratorJSONPath generator_json_path(query_ptr); Element current_element = root; VisitorStatus status; - Element res; + while ((status = generator_json_path.getNextItem(current_element)) != VisitorStatus::Exhausted) { if (status == VisitorStatus::Ok) diff --git a/src/Functions/FunctionSnowflake.h b/src/Functions/FunctionSnowflake.h index 1ba15433e94..f4a62e509ed 100644 --- a/src/Functions/FunctionSnowflake.h +++ b/src/Functions/FunctionSnowflake.h @@ -24,7 +24,7 @@ namespace ErrorCodes * https://blog.twitter.com/engineering/en_us/a/2010/announcing-snowflake * https://ws-dl.blogspot.com/2019/08/2019-08-03-tweetedat-finding-tweet.html */ -static constexpr long snowflake_epoch = 1288834974657L; +static constexpr size_t snowflake_epoch = 1288834974657L; static constexpr int time_shift = 22; class FunctionDateTimeToSnowflake : public IFunction @@ -33,7 +33,7 @@ private: const char * name; public: - FunctionDateTimeToSnowflake(const char * name_) : name(name_) { } + explicit FunctionDateTimeToSnowflake(const char * name_) : name(name_) { } String getName() const override { return name; } size_t getNumberOfArguments() const override { return 1; } @@ -74,7 +74,7 @@ private: const char * name; public: - FunctionSnowflakeToDateTime(const char * name_) : name(name_) { } + explicit FunctionSnowflakeToDateTime(const char * name_) : name(name_) { } String getName() const override { return name; } size_t getNumberOfArguments() const override { return 0; } @@ -84,7 +84,7 @@ public: DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override { - if (arguments.size() < 1 || arguments.size() > 2) + if (arguments.empty() || arguments.size() > 2) throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, "Function {} takes one or two arguments", name); if (!typeid_cast(arguments[0].type.get())) @@ -122,7 +122,7 @@ private: const char * name; public: - FunctionDateTime64ToSnowflake(const char * name_) : name(name_) { } + explicit FunctionDateTime64ToSnowflake(const char * name_) : name(name_) { } String getName() const override { return name; } size_t getNumberOfArguments() const override { return 1; } @@ -163,7 +163,7 @@ private: const char * name; public: - FunctionSnowflakeToDateTime64(const char * name_) : name(name_) { } + explicit FunctionSnowflakeToDateTime64(const char * name_) : name(name_) { } String getName() const override { return name; } size_t getNumberOfArguments() const override { return 0; } @@ -173,7 +173,7 @@ public: DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override { - if (arguments.size() < 1 || arguments.size() > 2) + if (arguments.empty() || arguments.size() > 2) throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, "Function {} takes one or two arguments", name); if (!typeid_cast(arguments[0].type.get())) diff --git a/src/Functions/FunctionStringOrArrayToT.h b/src/Functions/FunctionStringOrArrayToT.h index 3bf1f0a5d34..cda5da5c177 100644 --- a/src/Functions/FunctionStringOrArrayToT.h +++ b/src/Functions/FunctionStringOrArrayToT.h @@ -9,6 +9,7 @@ #include #include #include +#include namespace DB diff --git a/src/Functions/FunctionsBitmap.h b/src/Functions/FunctionsBitmap.h index 775a39f4d08..1e48588892a 100644 --- a/src/Functions/FunctionsBitmap.h +++ b/src/Functions/FunctionsBitmap.h @@ -421,7 +421,7 @@ private: for (size_t i = 0; i < input_rows_count; ++i) { - const AggregateDataPtr data_ptr_0 = is_column_const[0] ? (*container0)[0] : (*container0)[i]; + AggregateDataPtr data_ptr_0 = is_column_const[0] ? (*container0)[0] : (*container0)[i]; const AggregateFunctionGroupBitmapData & bitmap_data_0 = *reinterpret_cast*>(data_ptr_0); const UInt64 range_start = is_column_const[1] ? (*container1)[0] : (*container1)[i]; @@ -615,7 +615,7 @@ private: size_t to_end; for (size_t i = 0; i < input_rows_count; ++i) { - const AggregateDataPtr data_ptr_0 = is_column_const[0] ? (*container0)[0] : (*container0)[i]; + AggregateDataPtr data_ptr_0 = is_column_const[0] ? (*container0)[0] : (*container0)[i]; const AggregateFunctionGroupBitmapData & bitmap_data_0 = *reinterpret_cast *>(data_ptr_0); if (is_column_const[1]) @@ -923,7 +923,7 @@ private: for (size_t i = 0; i < input_rows_count; ++i) { - const AggregateDataPtr data_ptr_0 = is_column_const[0] ? (*container0)[0] : (*container0)[i]; + AggregateDataPtr data_ptr_0 = is_column_const[0] ? (*container0)[0] : (*container0)[i]; const UInt64 data1 = is_column_const[1] ? (*container1)[0] : (*container1)[i]; const AggregateFunctionGroupBitmapData & bitmap_data_0 = *reinterpret_cast *>(data_ptr_0); @@ -1030,8 +1030,8 @@ private: for (size_t i = 0; i < input_rows_count; ++i) { - const AggregateDataPtr data_ptr_0 = is_column_const[0] ? container0[0] : container0[i]; - const AggregateDataPtr data_ptr_1 = is_column_const[1] ? container1[0] : container1[i]; + AggregateDataPtr data_ptr_0 = is_column_const[0] ? container0[0] : container0[i]; + AggregateDataPtr data_ptr_1 = is_column_const[1] ? container1[0] : container1[i]; const AggregateFunctionGroupBitmapData & bitmap_data_1 = *reinterpret_cast *>(data_ptr_0); const AggregateFunctionGroupBitmapData & bitmap_data_2 @@ -1178,8 +1178,8 @@ private: for (size_t i = 0; i < input_rows_count; ++i) { - const AggregateDataPtr data_ptr_0 = is_column_const[0] ? container0[0] : container0[i]; - const AggregateDataPtr data_ptr_1 = is_column_const[1] ? container1[0] : container1[i]; + AggregateDataPtr data_ptr_0 = is_column_const[0] ? container0[0] : container0[i]; + AggregateDataPtr data_ptr_1 = is_column_const[1] ? container1[0] : container1[i]; // bitmapAnd(RoaringBitMap, SmallSet) is slower than bitmapAnd(SmallSet, RoaringBitMap), so we can exchange the position of two arguments for the speed auto * bm_1 = reinterpret_cast *>(data_ptr_0); diff --git a/src/Functions/FunctionsComparison.h b/src/Functions/FunctionsComparison.h index a0c7fc643d2..0d0195eb2d7 100644 --- a/src/Functions/FunctionsComparison.h +++ b/src/Functions/FunctionsComparison.h @@ -137,7 +137,7 @@ struct NumComparisonImpl template struct StringComparisonImpl { - static void NO_INLINE string_vector_string_vector( + static void NO_INLINE string_vector_string_vector( /// NOLINT const ColumnString::Chars & a_data, const ColumnString::Offsets & a_offsets, const ColumnString::Chars & b_data, const ColumnString::Offsets & b_offsets, PaddedPODArray & c) @@ -157,7 +157,7 @@ struct StringComparisonImpl } } - static void NO_INLINE string_vector_fixed_string_vector( + static void NO_INLINE string_vector_fixed_string_vector( /// NOLINT const ColumnString::Chars & a_data, const ColumnString::Offsets & a_offsets, const ColumnString::Chars & b_data, ColumnString::Offset b_n, PaddedPODArray & c) @@ -175,7 +175,7 @@ struct StringComparisonImpl } } - static void NO_INLINE string_vector_constant( + static void NO_INLINE string_vector_constant( /// NOLINT const ColumnString::Chars & a_data, const ColumnString::Offsets & a_offsets, const ColumnString::Chars & b_data, ColumnString::Offset b_size, PaddedPODArray & c) @@ -193,7 +193,7 @@ struct StringComparisonImpl } } - static void fixed_string_vector_string_vector( + static void fixed_string_vector_string_vector( /// NOLINT const ColumnString::Chars & a_data, ColumnString::Offset a_n, const ColumnString::Chars & b_data, const ColumnString::Offsets & b_offsets, PaddedPODArray & c) @@ -201,7 +201,7 @@ struct StringComparisonImpl StringComparisonImpl::string_vector_fixed_string_vector(b_data, b_offsets, a_data, a_n, c); } - static void NO_INLINE fixed_string_vector_fixed_string_vector_16( + static void NO_INLINE fixed_string_vector_fixed_string_vector_16( /// NOLINT const ColumnString::Chars & a_data, const ColumnString::Chars & b_data, PaddedPODArray & c) @@ -212,7 +212,7 @@ struct StringComparisonImpl c[j] = Op::apply(memcmp16(&a_data[i], &b_data[i]), 0); } - static void NO_INLINE fixed_string_vector_constant_16( + static void NO_INLINE fixed_string_vector_constant_16( /// NOLINT const ColumnString::Chars & a_data, const ColumnString::Chars & b_data, PaddedPODArray & c) @@ -223,7 +223,7 @@ struct StringComparisonImpl c[j] = Op::apply(memcmp16(&a_data[i], &b_data[0]), 0); } - static void NO_INLINE fixed_string_vector_fixed_string_vector( + static void NO_INLINE fixed_string_vector_fixed_string_vector( /// NOLINT const ColumnString::Chars & a_data, ColumnString::Offset a_n, const ColumnString::Chars & b_data, ColumnString::Offset b_n, PaddedPODArray & c) @@ -250,7 +250,7 @@ struct StringComparisonImpl } } - static void NO_INLINE fixed_string_vector_constant( + static void NO_INLINE fixed_string_vector_constant( /// NOLINT const ColumnString::Chars & a_data, ColumnString::Offset a_n, const ColumnString::Chars & b_data, ColumnString::Offset b_size, PaddedPODArray & c) @@ -273,7 +273,7 @@ struct StringComparisonImpl } } - static void constant_string_vector( + static void constant_string_vector( /// NOLINT const ColumnString::Chars & a_data, ColumnString::Offset a_size, const ColumnString::Chars & b_data, const ColumnString::Offsets & b_offsets, PaddedPODArray & c) @@ -281,7 +281,7 @@ struct StringComparisonImpl StringComparisonImpl::string_vector_constant(b_data, b_offsets, a_data, a_size, c); } - static void constant_fixed_string_vector( + static void constant_fixed_string_vector( /// NOLINT const ColumnString::Chars & a_data, ColumnString::Offset a_size, const ColumnString::Chars & b_data, ColumnString::Offset b_n, PaddedPODArray & c) @@ -295,7 +295,7 @@ struct StringComparisonImpl template struct StringEqualsImpl { - static void NO_INLINE string_vector_string_vector( + static void NO_INLINE string_vector_string_vector( /// NOLINT const ColumnString::Chars & a_data, const ColumnString::Offsets & a_offsets, const ColumnString::Chars & b_data, const ColumnString::Offsets & b_offsets, PaddedPODArray & c) @@ -318,7 +318,7 @@ struct StringEqualsImpl } } - static void NO_INLINE string_vector_fixed_string_vector( + static void NO_INLINE string_vector_fixed_string_vector( /// NOLINT const ColumnString::Chars & a_data, const ColumnString::Offsets & a_offsets, const ColumnString::Chars & b_data, ColumnString::Offset b_n, PaddedPODArray & c) @@ -338,7 +338,7 @@ struct StringEqualsImpl } } - static void NO_INLINE string_vector_constant( + static void NO_INLINE string_vector_constant( /// NOLINT const ColumnString::Chars & a_data, const ColumnString::Offsets & a_offsets, const ColumnString::Chars & b_data, ColumnString::Offset b_size, PaddedPODArray & c) @@ -358,7 +358,7 @@ struct StringEqualsImpl } } - static void NO_INLINE fixed_string_vector_fixed_string_vector_16( + static void NO_INLINE fixed_string_vector_fixed_string_vector_16( /// NOLINT const ColumnString::Chars & a_data, const ColumnString::Chars & b_data, PaddedPODArray & c) @@ -371,7 +371,7 @@ struct StringEqualsImpl b_data.data() + i * 16); } - static void NO_INLINE fixed_string_vector_constant_16( + static void NO_INLINE fixed_string_vector_constant_16( /// NOLINT const ColumnString::Chars & a_data, const ColumnString::Chars & b_data, PaddedPODArray & c) @@ -384,7 +384,7 @@ struct StringEqualsImpl b_data.data()); } - static void NO_INLINE fixed_string_vector_fixed_string_vector( + static void NO_INLINE fixed_string_vector_fixed_string_vector( /// NOLINT const ColumnString::Chars & a_data, ColumnString::Offset a_n, const ColumnString::Chars & b_data, ColumnString::Offset b_n, PaddedPODArray & c) @@ -410,7 +410,7 @@ struct StringEqualsImpl } } - static void NO_INLINE fixed_string_vector_constant( + static void NO_INLINE fixed_string_vector_constant( /// NOLINT const ColumnString::Chars & a_data, ColumnString::Offset a_n, const ColumnString::Chars & b_data, ColumnString::Offset b_size, PaddedPODArray & c) @@ -427,7 +427,7 @@ struct StringEqualsImpl } } - static void fixed_string_vector_string_vector( + static void fixed_string_vector_string_vector( /// NOLINT const ColumnString::Chars & a_data, ColumnString::Offset a_n, const ColumnString::Chars & b_data, const ColumnString::Offsets & b_offsets, PaddedPODArray & c) @@ -435,7 +435,7 @@ struct StringEqualsImpl string_vector_fixed_string_vector(b_data, b_offsets, a_data, a_n, c); } - static void constant_string_vector( + static void constant_string_vector( /// NOLINT const ColumnString::Chars & a_data, ColumnString::Offset a_size, const ColumnString::Chars & b_data, const ColumnString::Offsets & b_offsets, PaddedPODArray & c) @@ -443,7 +443,7 @@ struct StringEqualsImpl string_vector_constant(b_data, b_offsets, a_data, a_size, c); } - static void constant_fixed_string_vector( + static void constant_fixed_string_vector( /// NOLINT const ColumnString::Chars & a_data, ColumnString::Offset a_size, const ColumnString::Chars & b_data, ColumnString::Offset b_n, PaddedPODArray & c) diff --git a/src/Functions/FunctionsConversion.h b/src/Functions/FunctionsConversion.h index 909803d7cd7..5e11cab7e79 100644 --- a/src/Functions/FunctionsConversion.h +++ b/src/Functions/FunctionsConversion.h @@ -542,7 +542,7 @@ struct ToDateTime64TransformUnsigned const DateTime64::NativeType scale_multiplier = 1; - ToDateTime64TransformUnsigned(UInt32 scale = 0) + ToDateTime64TransformUnsigned(UInt32 scale = 0) /// NOLINT : scale_multiplier(DecimalUtils::scaleMultiplier(scale)) {} @@ -559,7 +559,7 @@ struct ToDateTime64TransformSigned const DateTime64::NativeType scale_multiplier = 1; - ToDateTime64TransformSigned(UInt32 scale = 0) + ToDateTime64TransformSigned(UInt32 scale = 0) /// NOLINT : scale_multiplier(DecimalUtils::scaleMultiplier(scale)) {} @@ -577,7 +577,7 @@ struct ToDateTime64TransformFloat const UInt32 scale = 1; - ToDateTime64TransformFloat(UInt32 scale_ = 0) + ToDateTime64TransformFloat(UInt32 scale_ = 0) /// NOLINT : scale(scale_) {} @@ -615,7 +615,7 @@ struct FromDateTime64Transform const DateTime64::NativeType scale_multiplier = 1; - FromDateTime64Transform(UInt32 scale) + FromDateTime64Transform(UInt32 scale) /// NOLINT : scale_multiplier(DecimalUtils::scaleMultiplier(scale)) {} @@ -639,7 +639,7 @@ struct ToDateTime64Transform const DateTime64::NativeType scale_multiplier = 1; - ToDateTime64Transform(UInt32 scale = 0) + ToDateTime64Transform(UInt32 scale = 0) /// NOLINT : scale_multiplier(DecimalUtils::scaleMultiplier(scale)) {} @@ -906,6 +906,41 @@ struct ConvertImplGenericToString } }; +/** Conversion of time_t to UInt16, Int32, UInt32 + */ +template +void convertFromTime(typename DataType::FieldType & x, time_t & time) +{ + x = time; +} + +template <> +inline void convertFromTime(DataTypeDate::FieldType & x, time_t & time) +{ + if (unlikely(time < 0)) + x = 0; + else if (unlikely(time > 0xFFFF)) + x = 0xFFFF; + else + x = time; +} + +template <> +inline void convertFromTime(DataTypeDate32::FieldType & x, time_t & time) +{ + x = time; +} + +template <> +inline void convertFromTime(DataTypeDateTime::FieldType & x, time_t & time) +{ + if (unlikely(time < 0)) + x = 0; + else if (unlikely(time > 0xFFFFFFFF)) + x = 0xFFFFFFFF; + else + x = time; +} /** Conversion of strings to numbers, dates, datetimes: through parsing. */ @@ -931,18 +966,16 @@ inline void parseImpl(DataTypeDate32::FieldType & x, ReadBuffer x = tmp; } + // NOTE: no need of extra overload of DateTime64, since readDateTimeText64 has different signature and that case is explicitly handled in the calling code. template <> inline void parseImpl(DataTypeDateTime::FieldType & x, ReadBuffer & rb, const DateLUTImpl * time_zone) { time_t time = 0; readDateTimeText(time, rb, *time_zone); - if (time < 0) - time = 0; - x = time; + convertFromTime(x, time); } - template <> inline void parseImpl(DataTypeUUID::FieldType & x, ReadBuffer & rb, const DateLUTImpl *) { @@ -951,7 +984,6 @@ inline void parseImpl(DataTypeUUID::FieldType & x, ReadBuffer & rb x = tmp.toUnderType(); } - template bool tryParseImpl(typename DataType::FieldType & x, ReadBuffer & rb, const DateLUTImpl *) { @@ -1178,7 +1210,7 @@ struct ConvertThroughParsing { time_t res; parseDateTimeBestEffort(res, read_buffer, *local_time_zone, *utc_time_zone); - vec_to[i] = res; + convertFromTime(vec_to[i], res); } } else if constexpr (parsing_mode == ConvertFromStringParsingMode::BestEffortUS) @@ -1193,7 +1225,7 @@ struct ConvertThroughParsing { time_t res; parseDateTimeBestEffortUS(res, read_buffer, *local_time_zone, *utc_time_zone); - vec_to[i] = res; + convertFromTime(vec_to[i], res); } } else @@ -1232,14 +1264,14 @@ struct ConvertThroughParsing { time_t res; parsed = tryParseDateTimeBestEffort(res, read_buffer, *local_time_zone, *utc_time_zone); - vec_to[i] = res; + convertFromTime(vec_to[i],res); } } else if constexpr (parsing_mode == ConvertFromStringParsingMode::BestEffortUS) { time_t res; parsed = tryParseDateTimeBestEffortUS(res, read_buffer, *local_time_zone, *utc_time_zone); - vec_to[i] = res; + convertFromTime(vec_to[i],res); } else { diff --git a/src/Functions/FunctionsEmbeddedDictionaries.h b/src/Functions/FunctionsEmbeddedDictionaries.h index 0f75750354a..c6ea886b4a8 100644 --- a/src/Functions/FunctionsEmbeddedDictionaries.h +++ b/src/Functions/FunctionsEmbeddedDictionaries.h @@ -593,7 +593,7 @@ public: size_t getNumberOfArguments() const override { return 0; } /// For the purpose of query optimization, we assume this function to be injective - /// even in face of fact that there are many different cities named Moscow. + /// even in face of fact that there are many different cities named Paris. bool isInjective(const ColumnsWithTypeAndName &) const override { return true; } bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return true; } diff --git a/src/Functions/FunctionsExternalDictionaries.h b/src/Functions/FunctionsExternalDictionaries.h index fb0dbdfff5c..6a701d7b864 100644 --- a/src/Functions/FunctionsExternalDictionaries.h +++ b/src/Functions/FunctionsExternalDictionaries.h @@ -90,6 +90,22 @@ public: return getDictionary(dict_name_col->getValue()); } + static const DictionaryAttribute & getDictionaryHierarchicalAttribute(const std::shared_ptr & dictionary) + { + const auto & dictionary_structure = dictionary->getStructure(); + auto hierarchical_attribute_index_optional = dictionary_structure.hierarchical_attribute_index; + + if (!dictionary->hasHierarchy() || !hierarchical_attribute_index_optional.has_value()) + throw Exception(ErrorCodes::UNSUPPORTED_METHOD, + "Dictionary {} does not support hierarchy", + dictionary->getFullName()); + + size_t hierarchical_attribute_index = *hierarchical_attribute_index_optional; + const auto & hierarchical_attribute = dictionary_structure.attributes[hierarchical_attribute_index]; + + return hierarchical_attribute; + } + bool isDictGetFunctionInjective(const Block & sample_columns) { /// Assume non-injective by default @@ -881,7 +897,9 @@ private: result = std::move(dictionary_get_result_column); } else - result = ColumnNullable::create(std::move(dictionary_get_result_column), std::move(is_key_in_dictionary_column_mutable)); + { + result = ColumnNullable::create(dictionary_get_result_column, std::move(is_key_in_dictionary_column_mutable)); + } } return result; @@ -939,39 +957,38 @@ private: bool useDefaultImplementationForConstants() const final { return true; } ColumnNumbers getArgumentsThatAreAlwaysConstant() const final { return {0}; } - - DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override - { - if (!isString(arguments[0])) - throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, - "Illegal type of first argument of function {}. Expected String. Actual type {}", - getName(), - arguments[0]->getName()); - - if (!WhichDataType(arguments[1]).isUInt64()) - throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, - "Illegal type of second argument of function {}. Expected UInt64. Actual type {}", - getName(), - arguments[1]->getName()); - - return std::make_shared(std::make_shared()); - } - bool isDeterministic() const override { return false; } + DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override + { + String dictionary_name; + if (const auto * name_col = checkAndGetColumnConst(arguments[0].column.get())) + dictionary_name = name_col->getValue(); + else + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, + "Illegal type {} of first argument of function {}, expected a const string.", + arguments[0].type->getName(), + getName()); + + auto dictionary = helper.getDictionary(arguments[0].column); + const auto & hierarchical_attribute = helper.getDictionaryHierarchicalAttribute(dictionary); + + return std::make_shared(hierarchical_attribute.type); + } + ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t input_rows_count) const override { if (input_rows_count == 0) return result_type->createColumn(); auto dictionary = helper.getDictionary(arguments[0].column); + const auto & hierarchical_attribute = helper.getDictionaryHierarchicalAttribute(dictionary); - if (!dictionary->hasHierarchy()) - throw Exception(ErrorCodes::UNSUPPORTED_METHOD, - "Dictionary {} does not support hierarchy", - dictionary->getFullName()); + auto key_column = ColumnWithTypeAndName{arguments[1].column, arguments[1].type, arguments[1].name}; + auto key_column_casted = castColumnAccurate(key_column, hierarchical_attribute.type); + + ColumnPtr result = dictionary->getHierarchy(key_column_casted, hierarchical_attribute.type); - ColumnPtr result = dictionary->getHierarchy(arguments[1].column, std::make_shared()); return result; } @@ -1009,18 +1026,6 @@ private: getName(), arguments[0]->getName()); - if (!WhichDataType(arguments[1]).isUInt64()) - throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, - "Illegal type of second argument of function {}. Expected UInt64. Actual type {}", - getName(), - arguments[1]->getName()); - - if (!WhichDataType(arguments[2]).isUInt64()) - throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, - "Illegal type of third argument of function {}. Expected UInt64. Actual type {}", - getName(), - arguments[2]->getName()); - return std::make_shared(); } @@ -1031,16 +1036,18 @@ private: if (input_rows_count == 0) return result_type->createColumn(); - auto dict = helper.getDictionary(arguments[0].column); + auto dictionary = helper.getDictionary(arguments[0].column); + const auto & hierarchical_attribute = helper.getDictionaryHierarchicalAttribute(dictionary); - if (!dict->hasHierarchy()) - throw Exception(ErrorCodes::UNSUPPORTED_METHOD, - "Dictionary {} does not support hierarchy", - dict->getFullName()); + auto key_column = ColumnWithTypeAndName{arguments[1].column->convertToFullColumnIfConst(), arguments[1].type, arguments[2].name}; + auto in_key_column = ColumnWithTypeAndName{arguments[2].column->convertToFullColumnIfConst(), arguments[2].type, arguments[2].name}; - ColumnPtr res = dict->isInHierarchy(arguments[1].column, arguments[2].column, std::make_shared()); + auto key_column_casted = castColumnAccurate(key_column, hierarchical_attribute.type); + auto in_key_column_casted = castColumnAccurate(in_key_column, hierarchical_attribute.type); - return res; + ColumnPtr result = dictionary->isInHierarchy(key_column_casted, in_key_column_casted, hierarchical_attribute.type); + + return result; } mutable FunctionDictHelper helper; @@ -1069,21 +1076,18 @@ private: bool isDeterministic() const override { return false; } bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return true; } - DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override + DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override { - if (!isString(arguments[0])) + if (!isString(arguments[0].type)) throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Illegal type of first argument of function {}. Expected String. Actual type {}", getName(), - arguments[0]->getName()); + arguments[0].type->getName()); - if (!WhichDataType(arguments[1]).isUInt64()) - throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, - "Illegal type of second argument of function {}. Expected UInt64. Actual type {}", - getName(), - arguments[1]->getName()); + auto dictionary = helper.getDictionary(arguments[0].column); + const auto & hierarchical_attribute = helper.getDictionaryHierarchicalAttribute(dictionary); - return std::make_shared(std::make_shared()); + return std::make_shared(hierarchical_attribute.type); } ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t input_rows_count) const override @@ -1092,13 +1096,12 @@ private: return result_type->createColumn(); auto dictionary = helper.getDictionary(arguments[0].column); + const auto & hierarchical_attribute = helper.getDictionaryHierarchicalAttribute(dictionary); - if (!dictionary->hasHierarchy()) - throw Exception(ErrorCodes::UNSUPPORTED_METHOD, - "Dictionary {} does not support hierarchy", - dictionary->getFullName()); + auto key_column = ColumnWithTypeAndName{arguments[1].column->convertToFullColumnIfConst(), arguments[1].type, arguments[1].name}; + auto key_column_casted = castColumnAccurate(key_column, hierarchical_attribute.type); - ColumnPtr result = dictionary->getDescendants(arguments[1].column, std::make_shared(), 1); + ColumnPtr result = dictionary->getDescendants(key_column_casted, hierarchical_attribute.type, 1); return result; } @@ -1126,12 +1129,11 @@ private: bool isVariadic() const override { return true; } bool useDefaultImplementationForConstants() const final { return true; } - ColumnNumbers getArgumentsThatAreAlwaysConstant() const final { return {0}; } + ColumnNumbers getArgumentsThatAreAlwaysConstant() const final { return {0, 2}; } bool isDeterministic() const override { return false; } bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return true; } - - DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override + DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override { size_t arguments_size = arguments.size(); if (arguments_size < 2 || arguments_size > 3) @@ -1142,27 +1144,24 @@ private: arguments_size); } - if (!isString(arguments[0])) + if (!isString(arguments[0].type)) throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Illegal type of first argument of function {}. Expected const String. Actual type {}", getName(), - arguments[0]->getName()); + arguments[0].type->getName()); - if (!WhichDataType(arguments[1]).isUInt64()) - throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, - "Illegal type of second argument of function {}. Expected UInt64. Actual type {}", - getName(), - arguments[1]->getName()); - - if (arguments.size() == 3 && !isUnsignedInteger(arguments[2])) + if (arguments.size() == 3 && !isInteger(arguments[2].type)) { throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Illegal type of third argument of function {}. Expected const unsigned integer. Actual type {}", getName(), - arguments[2]->getName()); + arguments[2].type->getName()); } - return std::make_shared(std::make_shared()); + auto dictionary = helper.getDictionary(arguments[0].column); + const auto & hierarchical_attribute = helper.getDictionaryHierarchicalAttribute(dictionary); + + return std::make_shared(hierarchical_attribute.type); } ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t input_rows_count) const override @@ -1171,6 +1170,7 @@ private: return result_type->createColumn(); auto dictionary = helper.getDictionary(arguments[0].column); + const auto & hierarchical_attribute = helper.getDictionaryHierarchicalAttribute(dictionary); size_t level = 0; @@ -1181,17 +1181,21 @@ private: "Illegal type of third argument of function {}. Expected const unsigned integer.", getName()); - level = static_cast(arguments[2].column->get64(0)); + auto value = static_cast(arguments[2].column->getInt(0)); + if (value < 0) + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, + "Illegal type of third argument of function {}. Expected const unsigned integer.", + getName()); + + level = static_cast(value); } - if (!dictionary->hasHierarchy()) - throw Exception(ErrorCodes::UNSUPPORTED_METHOD, - "Dictionary {} does not support hierarchy", - dictionary->getFullName()); + auto key_column = ColumnWithTypeAndName{arguments[1].column->convertToFullColumnIfConst(), arguments[1].type, arguments[1].name}; + auto key_column_casted = castColumnAccurate(key_column, hierarchical_attribute.type); - ColumnPtr res = dictionary->getDescendants(arguments[1].column, std::make_shared(), level); + ColumnPtr result = dictionary->getDescendants(key_column_casted, hierarchical_attribute.type, level); - return res; + return result; } mutable FunctionDictHelper helper; diff --git a/src/Functions/FunctionsLogical.cpp b/src/Functions/FunctionsLogical.cpp index 0dee048dae3..c709cd22880 100644 --- a/src/Functions/FunctionsLogical.cpp +++ b/src/Functions/FunctionsLogical.cpp @@ -611,7 +611,7 @@ template ColumnPtr FunctionAnyArityLogical::executeImpl( const ColumnsWithTypeAndName & args, const DataTypePtr & result_type, size_t input_rows_count) const { - ColumnsWithTypeAndName arguments = std::move(args); + ColumnsWithTypeAndName arguments = args; /// Special implementation for short-circuit arguments. if (checkShortCircuitArguments(arguments) != -1) diff --git a/src/Functions/FunctionsLogical.h b/src/Functions/FunctionsLogical.h index 7d4f5489e86..140981faf9f 100644 --- a/src/Functions/FunctionsLogical.h +++ b/src/Functions/FunctionsLogical.h @@ -7,6 +7,7 @@ #include #include #include +#include #if USE_EMBEDDED_COMPILER @@ -147,7 +148,6 @@ public: static constexpr auto name = Name::name; static FunctionPtr create(ContextPtr) { return std::make_shared(); } -public: String getName() const override { return name; @@ -189,7 +189,7 @@ public: result = Impl::apply(b, result, nativeBoolCast(b, types[i], values[i])); return b.CreateSelect(result, b.getInt8(1), b.getInt8(0)); } - constexpr bool breakOnTrue = Impl::isSaturatedValue(true); + constexpr bool break_on_true = Impl::isSaturatedValue(true); auto * next = b.GetInsertBlock(); auto * stop = llvm::BasicBlock::Create(next->getContext(), "", next->getParent()); b.SetInsertPoint(stop); @@ -205,7 +205,7 @@ public: if (i + 1 < types.size()) { next = llvm::BasicBlock::Create(next->getContext(), "", next->getParent()); - b.CreateCondBr(truth, breakOnTrue ? stop : next, breakOnTrue ? next : stop); + b.CreateCondBr(truth, break_on_true ? stop : next, break_on_true ? next : stop); } } b.CreateBr(stop); @@ -223,7 +223,6 @@ public: static constexpr auto name = Name::name; static FunctionPtr create(ContextPtr) { return std::make_shared(); } -public: String getName() const override { return name; diff --git a/src/Functions/FunctionsStringArray.h b/src/Functions/FunctionsStringArray.h index b1de017120c..a1256598f1b 100644 --- a/src/Functions/FunctionsStringArray.h +++ b/src/Functions/FunctionsStringArray.h @@ -93,7 +93,7 @@ public: } /// Returns the position of the argument, that is the column of strings - size_t getStringsArgumentPosition() + static size_t getStringsArgumentPosition() { return 0; } @@ -152,7 +152,7 @@ public: } /// Returns the position of the argument, that is the column of strings - size_t getStringsArgumentPosition() + static size_t getStringsArgumentPosition() { return 0; } @@ -211,7 +211,7 @@ public: } /// Returns the position of the argument, that is the column of strings - size_t getStringsArgumentPosition() + static size_t getStringsArgumentPosition() { return 0; } @@ -328,7 +328,7 @@ public: } /// Returns the position of the argument, that is the column of strings - size_t getStringsArgumentPosition() + static size_t getStringsArgumentPosition() { return 1; } @@ -399,7 +399,7 @@ public: } /// Returns the position of the argument that is the column of strings - size_t getStringsArgumentPosition() + static size_t getStringsArgumentPosition() { return 1; } @@ -482,7 +482,7 @@ public: } /// Returns the position of the argument that is the column of strings - size_t getStringsArgumentPosition() + static size_t getStringsArgumentPosition() { return 1; } @@ -567,7 +567,7 @@ public: } /// Returns the position of the argument that is the column of strings - size_t getStringsArgumentPosition() + static size_t getStringsArgumentPosition() { return 0; } diff --git a/src/Functions/FunctionsTonalityClassification.cpp b/src/Functions/FunctionsTonalityClassification.cpp index 5dbd6d0356d..2d8e47b9bcb 100644 --- a/src/Functions/FunctionsTonalityClassification.cpp +++ b/src/Functions/FunctionsTonalityClassification.cpp @@ -24,7 +24,7 @@ struct FunctionDetectTonalityImpl UInt64 count_words = 0; String word; - /// Select all Russian words from the string + /// Select all words from the string for (size_t ind = 0; ind < str_len; ++ind) { /// Split words by whitespaces and punctuation signs @@ -36,7 +36,7 @@ struct FunctionDetectTonalityImpl word.push_back(str[ind]); ++ind; } - /// Try to find a russian word in the tonality dictionary + /// Try to find a word in the tonality dictionary const auto * it = emotional_dict.find(word); if (it != emotional_dict.end()) { diff --git a/src/Functions/GatherUtils/Algorithms.h b/src/Functions/GatherUtils/Algorithms.h index 2d4544b2167..d08248e71fc 100644 --- a/src/Functions/GatherUtils/Algorithms.h +++ b/src/Functions/GatherUtils/Algorithms.h @@ -203,7 +203,7 @@ void concat(const std::vector> & array_sources, Si size_t sources_num = array_sources.size(); std::vector is_const(sources_num); - auto checkAndGetSizeToReserve = [] (auto source, IArraySource * array_source) + auto check_and_get_size_to_reserve = [] (auto source, IArraySource * array_source) { if (source == nullptr) throw Exception("Concat function expected " + demangle(typeid(Source).name()) + " or " @@ -215,17 +215,17 @@ void concat(const std::vector> & array_sources, Si size_t size_to_reserve = 0; for (auto i : collections::range(0, sources_num)) { - auto & source = array_sources[i]; + const auto & source = array_sources[i]; is_const[i] = source->isConst(); if (is_const[i]) - size_to_reserve += checkAndGetSizeToReserve(typeid_cast *>(source.get()), source.get()); + size_to_reserve += check_and_get_size_to_reserve(typeid_cast *>(source.get()), source.get()); else - size_to_reserve += checkAndGetSizeToReserve(typeid_cast(source.get()), source.get()); + size_to_reserve += check_and_get_size_to_reserve(typeid_cast(source.get()), source.get()); } sink.reserve(size_to_reserve); - auto writeNext = [& sink] (auto source) + auto write_next = [& sink] (auto source) { writeSlice(source->getWhole(), sink); source->next(); @@ -235,11 +235,11 @@ void concat(const std::vector> & array_sources, Si { for (auto i : collections::range(0, sources_num)) { - auto & source = array_sources[i]; + const auto & source = array_sources[i]; if (is_const[i]) - writeNext(static_cast *>(source.get())); + write_next(static_cast *>(source.get())); else - writeNext(static_cast(source.get())); + write_next(static_cast(source.get())); } sink.next(); } @@ -576,31 +576,31 @@ bool sliceHasImplSubstr(const FirstSliceType & first, const SecondSliceType & se [](const SecondSliceType & pattern, size_t i, size_t j) { return isEqualUnary(pattern, i, j); }); } - size_t firstCur = 0; - size_t secondCur = 0; - while (firstCur < first.size && secondCur < second.size) + size_t first_cur = 0; + size_t second_cur = 0; + while (first_cur < first.size && second_cur < second.size) { - const bool is_first_null = has_first_null_map && first_null_map[firstCur]; - const bool is_second_null = has_second_null_map && second_null_map[secondCur]; + const bool is_first_null = has_first_null_map && first_null_map[first_cur]; + const bool is_second_null = has_second_null_map && second_null_map[second_cur]; const bool cond_both_null_match = is_first_null && is_second_null; const bool cond_both_not_null = !is_first_null && !is_second_null; - if (cond_both_null_match || (cond_both_not_null && isEqual(first, second, firstCur, secondCur))) + if (cond_both_null_match || (cond_both_not_null && isEqual(first, second, first_cur, second_cur))) { - ++firstCur; - ++secondCur; + ++first_cur; + ++second_cur; } - else if (secondCur > 0) + else if (second_cur > 0) { - secondCur = prefix_function[secondCur - 1]; + second_cur = prefix_function[second_cur - 1]; } else { - ++firstCur; + ++first_cur; } } - return secondCur == second.size; + return second_cur == second.size; } diff --git a/src/Functions/GatherUtils/Selectors.h b/src/Functions/GatherUtils/Selectors.h index bbe631a6a3a..5793701e93a 100644 --- a/src/Functions/GatherUtils/Selectors.h +++ b/src/Functions/GatherUtils/Selectors.h @@ -131,7 +131,7 @@ struct ArrayAndValueSourceSelectorBySink : public ArraySinkSelector).name()) + " but got " + demangle(typeid(*source_ptr).name()), ErrorCodes::LOGICAL_ERROR); }; - auto checkTypeAndCallConcat = [& sink, & checkType, & args ...] (auto array_source_ptr, auto value_source_ptr) + auto check_type_and_call_concat = [& sink, & check_type, & args ...] (auto array_source_ptr, auto value_source_ptr) { - checkType(array_source_ptr); - checkType(value_source_ptr); + check_type(array_source_ptr); + check_type(value_source_ptr); Base::selectArrayAndValueSourceBySink(*array_source_ptr, *value_source_ptr, sink, args ...); }; if (array_source.isConst() && value_source.isConst()) - checkTypeAndCallConcat(typeid_cast *>(&array_source), + check_type_and_call_concat(typeid_cast *>(&array_source), typeid_cast *>(&value_source)); else if (array_source.isConst()) - checkTypeAndCallConcat(typeid_cast *>(&array_source), + check_type_and_call_concat(typeid_cast *>(&array_source), typeid_cast(&value_source)); else if (value_source.isConst()) - checkTypeAndCallConcat(typeid_cast(&array_source), + check_type_and_call_concat(typeid_cast(&array_source), typeid_cast *>(&value_source)); else - checkTypeAndCallConcat(typeid_cast(&array_source), + check_type_and_call_concat(typeid_cast(&array_source), typeid_cast(&value_source)); } }; diff --git a/src/Functions/GatherUtils/Slices.h b/src/Functions/GatherUtils/Slices.h index 7951178497a..22f475adf59 100644 --- a/src/Functions/GatherUtils/Slices.h +++ b/src/Functions/GatherUtils/Slices.h @@ -26,7 +26,7 @@ struct NullableSlice : public Slice const UInt8 * null_map = nullptr; NullableSlice() = default; - NullableSlice(const Slice & base) : Slice(base) {} + NullableSlice(const Slice & base) : Slice(base) {} /// NOLINT }; template diff --git a/src/Functions/GatherUtils/Sources.h b/src/Functions/GatherUtils/Sources.h index 7d1241be7d1..13e3de99552 100644 --- a/src/Functions/GatherUtils/Sources.h +++ b/src/Functions/GatherUtils/Sources.h @@ -184,7 +184,7 @@ struct ConstSource : public Base virtual void accept(ArraySourceVisitor & visitor) // override { - if constexpr (std::is_base_of::value) + if constexpr (std::is_base_of_v) visitor.visit(*this); else throw Exception( @@ -194,7 +194,7 @@ struct ConstSource : public Base virtual void accept(ValueSourceVisitor & visitor) // override { - if constexpr (std::is_base_of::value) + if constexpr (std::is_base_of_v) visitor.visit(*this); else throw Exception( diff --git a/src/Functions/GeoHash.h b/src/Functions/GeoHash.h index d97eda31cef..071bc5072a4 100644 --- a/src/Functions/GeoHash.h +++ b/src/Functions/GeoHash.h @@ -37,8 +37,8 @@ struct GeohashesInBoxPreparedArgs }; GeohashesInBoxPreparedArgs geohashesInBoxPrepare( - const Float64 longitude_min, - const Float64 latitude_min, + Float64 longitude_min, + Float64 latitude_min, Float64 longitude_max, Float64 latitude_max, uint8_t precision); diff --git a/src/Functions/GregorianDate.h b/src/Functions/GregorianDate.h index b44b6c0dd13..ef2b9e6eede 100644 --- a/src/Functions/GregorianDate.h +++ b/src/Functions/GregorianDate.h @@ -32,13 +32,13 @@ namespace DB /** Construct from date in text form 'YYYY-MM-DD' by reading from * ReadBuffer. */ - GregorianDate(ReadBuffer & in); + explicit GregorianDate(ReadBuffer & in); /** Construct from Modified Julian Day. The type T is an * integral type which should be at least 32 bits wide, and * should preferably signed. */ - GregorianDate(is_integer auto mjd); + explicit GregorianDate(is_integer auto mjd); /** Convert to Modified Julian Day. The type T is an integral type * which should be at least 32 bits wide, and should preferably @@ -65,15 +65,15 @@ namespace DB return month_; } - uint8_t day_of_month() const noexcept + uint8_t day_of_month() const noexcept /// NOLINT { return day_of_month_; } private: - YearT year_; - uint8_t month_; - uint8_t day_of_month_; + YearT year_; /// NOLINT + uint8_t month_; /// NOLINT + uint8_t day_of_month_; /// NOLINT }; /** ISO 8601 Ordinal Date. YearT is an integral type which should @@ -89,7 +89,7 @@ namespace DB * integral type which should be at least 32 bits wide, and * should preferably signed. */ - OrdinalDate(is_integer auto mjd); + explicit OrdinalDate(is_integer auto mjd); /** Convert to Modified Julian Day. The type T is an integral * type which should be at least 32 bits wide, and should @@ -109,8 +109,8 @@ namespace DB } private: - YearT year_; - uint16_t day_of_year_; + YearT year_; /// NOLINT + uint16_t day_of_year_; /// NOLINT }; class MonthDay @@ -134,14 +134,14 @@ namespace DB return month_; } - uint8_t day_of_month() const noexcept + uint8_t day_of_month() const noexcept /// NOLINT { return day_of_month_; } private: - uint8_t month_; - uint8_t day_of_month_; + uint8_t month_; /// NOLINT + uint8_t day_of_month_; /// NOLINT }; } @@ -183,13 +183,13 @@ namespace gd template static inline constexpr I div(I x, J y) { - const auto y_ = static_cast(y); - if (x > 0 && y_ < 0) - return ((x - 1) / y_) - 1; - else if (x < 0 && y_ > 0) - return ((x + 1) / y_) - 1; + const auto y_cast = static_cast(y); + if (x > 0 && y_cast < 0) + return ((x - 1) / y_cast) - 1; + else if (x < 0 && y_cast > 0) + return ((x + 1) / y_cast) - 1; else - return x / y_; + return x / y_cast; } /** Integer modulus, satisfying div(x, y)*y + mod(x, y) == x. @@ -197,10 +197,10 @@ namespace gd template static inline constexpr I mod(I x, J y) { - const auto y_ = static_cast(y); - const auto r = x % y_; - if ((x > 0 && y_ < 0) || (x < 0 && y_ > 0)) - return r == 0 ? static_cast(0) : r + y_; + const auto y_cast = static_cast(y); + const auto r = x % y_cast; + if ((x > 0 && y_cast < 0) || (x < 0 && y_cast > 0)) + return r == 0 ? static_cast(0) : r + y_cast; else return r; } @@ -210,8 +210,8 @@ namespace gd template static inline constexpr I min(I x, J y) { - const auto y_ = static_cast(y); - return x < y_ ? x : y_; + const auto y_cast = static_cast(y); + return x < y_cast ? x : y_cast; } static inline char readDigit(ReadBuffer & in) diff --git a/src/Functions/IFunction.h b/src/Functions/IFunction.h index 8063ad77ad0..7b272fef53d 100644 --- a/src/Functions/IFunction.h +++ b/src/Functions/IFunction.h @@ -120,7 +120,7 @@ public: virtual ~IFunctionBase() = default; - virtual ColumnPtr execute( + virtual ColumnPtr execute( /// NOLINT const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t input_rows_count, bool dry_run = false) const { return prepare(arguments)->execute(arguments, result_type, input_rows_count, dry_run); @@ -267,7 +267,7 @@ public: */ virtual Monotonicity getMonotonicityForRange(const IDataType & /*type*/, const Field & /*left*/, const Field & /*right*/) const { - throw Exception("Function " + getName() + " has no information about its monotonicity.", ErrorCodes::NOT_IMPLEMENTED); + throw Exception("Function " + getName() + " has no information about its monotonicity", ErrorCodes::NOT_IMPLEMENTED); } }; @@ -452,7 +452,7 @@ public: using Monotonicity = IFunctionBase::Monotonicity; virtual Monotonicity getMonotonicityForRange(const IDataType & /*type*/, const Field & /*left*/, const Field & /*right*/) const { - throw Exception("Function " + getName() + " has no information about its monotonicity.", ErrorCodes::NOT_IMPLEMENTED); + throw Exception("Function " + getName() + " has no information about its monotonicity", ErrorCodes::NOT_IMPLEMENTED); } /// For non-variadic functions, return number of arguments; otherwise return zero (that should be ignored). diff --git a/src/Functions/ITupleFunction.h b/src/Functions/ITupleFunction.h index 836e5d273fc..0dbbb81aab9 100644 --- a/src/Functions/ITupleFunction.h +++ b/src/Functions/ITupleFunction.h @@ -1,6 +1,11 @@ #pragma once +#include +#include #include +#include +#include + namespace DB { diff --git a/src/Functions/JSONPath/ASTs/ASTJSONPathMemberAccess.h b/src/Functions/JSONPath/ASTs/ASTJSONPathMemberAccess.h index 2c9482b665e..3a5e121b989 100644 --- a/src/Functions/JSONPath/ASTs/ASTJSONPathMemberAccess.h +++ b/src/Functions/JSONPath/ASTs/ASTJSONPathMemberAccess.h @@ -11,7 +11,6 @@ public: ASTPtr clone() const override { return std::make_shared(*this); } -public: /// Member name to lookup in json document (in path: $.some_key.another_key. ...) String member_name; }; diff --git a/src/Functions/JSONPath/ASTs/ASTJSONPathRange.h b/src/Functions/JSONPath/ASTs/ASTJSONPathRange.h index 746c6211f29..083d4b8e3ab 100644 --- a/src/Functions/JSONPath/ASTs/ASTJSONPathRange.h +++ b/src/Functions/JSONPath/ASTs/ASTJSONPathRange.h @@ -12,7 +12,6 @@ public: ASTPtr clone() const override { return std::make_shared(*this); } -public: /// Ranges to lookup in json array ($[0, 1, 2, 4 to 9]) /// Range is represented as /// Single index is represented as diff --git a/src/Functions/JSONPath/Generator/GeneratorJSONPath.h b/src/Functions/JSONPath/Generator/GeneratorJSONPath.h index 291150f6df4..fe00f06bbbf 100644 --- a/src/Functions/JSONPath/Generator/GeneratorJSONPath.h +++ b/src/Functions/JSONPath/Generator/GeneratorJSONPath.h @@ -25,7 +25,7 @@ public: * Traverses children ASTs of ASTJSONPathQuery and creates a vector of corresponding visitors * @param query_ptr_ pointer to ASTJSONPathQuery */ - GeneratorJSONPath(ASTPtr query_ptr_) + explicit GeneratorJSONPath(ASTPtr query_ptr_) { query_ptr = query_ptr_; const auto * path = query_ptr->as(); diff --git a/src/Functions/JSONPath/Generator/VisitorJSONPathMemberAccess.h b/src/Functions/JSONPath/Generator/VisitorJSONPathMemberAccess.h index 5fe35e75a84..8446e1ff3be 100644 --- a/src/Functions/JSONPath/Generator/VisitorJSONPathMemberAccess.h +++ b/src/Functions/JSONPath/Generator/VisitorJSONPathMemberAccess.h @@ -10,7 +10,7 @@ template class VisitorJSONPathMemberAccess : public IVisitor { public: - VisitorJSONPathMemberAccess(ASTPtr member_access_ptr_) + explicit VisitorJSONPathMemberAccess(ASTPtr member_access_ptr_) : member_access_ptr(member_access_ptr_->as()) { } const char * getName() const override { return "VisitorJSONPathMemberAccess"; } diff --git a/src/Functions/JSONPath/Generator/VisitorJSONPathRange.h b/src/Functions/JSONPath/Generator/VisitorJSONPathRange.h index 40d4f6ad95e..708a71f7cf4 100644 --- a/src/Functions/JSONPath/Generator/VisitorJSONPathRange.h +++ b/src/Functions/JSONPath/Generator/VisitorJSONPathRange.h @@ -10,7 +10,7 @@ template class VisitorJSONPathRange : public IVisitor { public: - VisitorJSONPathRange(ASTPtr range_ptr_) : range_ptr(range_ptr_->as()) + explicit VisitorJSONPathRange(ASTPtr range_ptr_) : range_ptr(range_ptr_->as()) { current_range = 0; current_index = range_ptr->ranges[current_range].first; @@ -20,7 +20,6 @@ public: VisitorStatus apply(typename JSONParser::Element & element) const override { - typename JSONParser::Element result; typename JSONParser::Array array = element.getArray(); element = array[current_index]; return VisitorStatus::Ok; diff --git a/src/Functions/JSONPath/Generator/VisitorJSONPathRoot.h b/src/Functions/JSONPath/Generator/VisitorJSONPathRoot.h index 5c48c12782f..71569d3c0a0 100644 --- a/src/Functions/JSONPath/Generator/VisitorJSONPathRoot.h +++ b/src/Functions/JSONPath/Generator/VisitorJSONPathRoot.h @@ -10,7 +10,7 @@ template class VisitorJSONPathRoot : public IVisitor { public: - VisitorJSONPathRoot(ASTPtr) { } + explicit VisitorJSONPathRoot(ASTPtr) { } const char * getName() const override { return "VisitorJSONPathRoot"; } diff --git a/src/Functions/JSONPath/Generator/VisitorJSONPathStar.h b/src/Functions/JSONPath/Generator/VisitorJSONPathStar.h index 4a54a76c199..0c297f64316 100644 --- a/src/Functions/JSONPath/Generator/VisitorJSONPathStar.h +++ b/src/Functions/JSONPath/Generator/VisitorJSONPathStar.h @@ -10,7 +10,7 @@ template class VisitorJSONPathStar : public IVisitor { public: - VisitorJSONPathStar(ASTPtr) + explicit VisitorJSONPathStar(ASTPtr) { current_index = 0; } @@ -19,7 +19,6 @@ public: VisitorStatus apply(typename JSONParser::Element & element) const override { - typename JSONParser::Element result; typename JSONParser::Array array = element.getArray(); element = array[current_index]; return VisitorStatus::Ok; diff --git a/src/Functions/LeftRight.h b/src/Functions/LeftRight.h index 054e76b7792..a82182a52e7 100644 --- a/src/Functions/LeftRight.h +++ b/src/Functions/LeftRight.h @@ -12,6 +12,7 @@ #include #include #include +#include namespace DB diff --git a/src/Functions/LowerUpperImpl.h b/src/Functions/LowerUpperImpl.h index cf614850e66..a7c38a7f904 100644 --- a/src/Functions/LowerUpperImpl.h +++ b/src/Functions/LowerUpperImpl.h @@ -31,7 +31,7 @@ private: #ifdef __SSE2__ const auto bytes_sse = sizeof(__m128i); - const auto src_end_sse = src_end - (src_end - src) % bytes_sse; + const auto * src_end_sse = src_end - (src_end - src) % bytes_sse; const auto v_not_case_lower_bound = _mm_set1_epi8(not_case_lower_bound - 1); const auto v_not_case_upper_bound = _mm_set1_epi8(not_case_upper_bound + 1); diff --git a/src/Functions/LowerUpperUTF8Impl.h b/src/Functions/LowerUpperUTF8Impl.h index 4c155034b3d..a7475870dab 100644 --- a/src/Functions/LowerUpperUTF8Impl.h +++ b/src/Functions/LowerUpperUTF8Impl.h @@ -16,61 +16,58 @@ namespace ErrorCodes extern const int BAD_ARGUMENTS; } -namespace +/// xor or do nothing +template +UInt8 xor_or_identity(const UInt8 c, const int mask) { - /// xor or do nothing - template - UInt8 xor_or_identity(const UInt8 c, const int mask) - { - return c ^ mask; - } + return c ^ mask; +} - template <> - inline UInt8 xor_or_identity(const UInt8 c, const int) - { - return c; - } +template <> +inline UInt8 xor_or_identity(const UInt8 c, const int) +{ + return c; +} - /// It is caller's responsibility to ensure the presence of a valid cyrillic sequence in array - template - inline void UTF8CyrillicToCase(const UInt8 *& src, UInt8 *& dst) +/// It is caller's responsibility to ensure the presence of a valid cyrillic sequence in array +template +inline void UTF8CyrillicToCase(const UInt8 *& src, UInt8 *& dst) +{ + if (src[0] == 0xD0u && (src[1] >= 0x80u && src[1] <= 0x8Fu)) { - if (src[0] == 0xD0u && (src[1] >= 0x80u && src[1] <= 0x8Fu)) - { - /// ЀЁЂЃЄЅІЇЈЉЊЋЌЍЎЏ - *dst++ = xor_or_identity(*src++, 0x1); - *dst++ = xor_or_identity(*src++, 0x10); - } - else if (src[0] == 0xD1u && (src[1] >= 0x90u && src[1] <= 0x9Fu)) - { - /// ѐёђѓєѕіїјљњћќѝўџ - *dst++ = xor_or_identity(*src++, 0x1); - *dst++ = xor_or_identity(*src++, 0x10); - } - else if (src[0] == 0xD0u && (src[1] >= 0x90u && src[1] <= 0x9Fu)) - { - /// А-П - *dst++ = *src++; - *dst++ = xor_or_identity(*src++, 0x20); - } - else if (src[0] == 0xD0u && (src[1] >= 0xB0u && src[1] <= 0xBFu)) - { - /// а-п - *dst++ = *src++; - *dst++ = xor_or_identity(*src++, 0x20); - } - else if (src[0] == 0xD0u && (src[1] >= 0xA0u && src[1] <= 0xAFu)) - { - /// Р-Я - *dst++ = xor_or_identity(*src++, 0x1); - *dst++ = xor_or_identity(*src++, 0x20); - } - else if (src[0] == 0xD1u && (src[1] >= 0x80u && src[1] <= 0x8Fu)) - { - /// р-я - *dst++ = xor_or_identity(*src++, 0x1); - *dst++ = xor_or_identity(*src++, 0x20); - } + /// ЀЁЂЃЄЅІЇЈЉЊЋЌЍЎЏ + *dst++ = xor_or_identity(*src++, 0x1); + *dst++ = xor_or_identity(*src++, 0x10); + } + else if (src[0] == 0xD1u && (src[1] >= 0x90u && src[1] <= 0x9Fu)) + { + /// ѐёђѓєѕіїјљњћќѝўџ + *dst++ = xor_or_identity(*src++, 0x1); + *dst++ = xor_or_identity(*src++, 0x10); + } + else if (src[0] == 0xD0u && (src[1] >= 0x90u && src[1] <= 0x9Fu)) + { + /// А-П + *dst++ = *src++; + *dst++ = xor_or_identity(*src++, 0x20); + } + else if (src[0] == 0xD0u && (src[1] >= 0xB0u && src[1] <= 0xBFu)) + { + /// а-п + *dst++ = *src++; + *dst++ = xor_or_identity(*src++, 0x20); + } + else if (src[0] == 0xD0u && (src[1] >= 0xA0u && src[1] <= 0xAFu)) + { + /// Р-Я + *dst++ = xor_or_identity(*src++, 0x1); + *dst++ = xor_or_identity(*src++, 0x20); + } + else if (src[0] == 0xD1u && (src[1] >= 0x80u && src[1] <= 0x8Fu)) + { + /// р-я + *dst++ = xor_or_identity(*src++, 0x1); + *dst++ = xor_or_identity(*src++, 0x20); } } @@ -171,7 +168,7 @@ private: { #ifdef __SSE2__ static constexpr auto bytes_sse = sizeof(__m128i); - auto src_end_sse = src + (src_end - src) / bytes_sse * bytes_sse; + const auto * src_end_sse = src + (src_end - src) / bytes_sse * bytes_sse; /// SSE2 packed comparison operate on signed types, hence compare (c < 0) instead of (c > 0x7f) const auto v_zero = _mm_setzero_si128(); @@ -216,7 +213,7 @@ private: else { /// UTF-8 - const auto expected_end = src + bytes_sse; + const auto * expected_end = src + bytes_sse; while (src < expected_end) toCase(src, src_end, dst); diff --git a/src/Functions/MultiMatchAllIndicesImpl.h b/src/Functions/MultiMatchAllIndicesImpl.h index c2e64671d1f..f3e67008707 100644 --- a/src/Functions/MultiMatchAllIndicesImpl.h +++ b/src/Functions/MultiMatchAllIndicesImpl.h @@ -3,6 +3,7 @@ #include #include #include +#include #include "Regexps.h" #include "config_functions.h" diff --git a/src/Functions/PerformanceAdaptors.h b/src/Functions/PerformanceAdaptors.h index 9ef6454d085..bcc195e988e 100644 --- a/src/Functions/PerformanceAdaptors.h +++ b/src/Functions/PerformanceAdaptors.h @@ -72,7 +72,7 @@ namespace detail return size() == 0; } - void emplace_back() + void emplace_back() /// NOLINT { data.emplace_back(); } @@ -198,7 +198,7 @@ class ImplementationSelector : WithContext public: using ImplementationPtr = std::shared_ptr; - ImplementationSelector(ContextPtr context_) : WithContext(context_) {} + explicit ImplementationSelector(ContextPtr context_) : WithContext(context_) {} /* Select the best implementation based on previous runs. * If FunctionInterface is IFunction, then "executeImpl" method of the implementation will be called diff --git a/src/Functions/PolygonUtils.h b/src/Functions/PolygonUtils.h index 1a340c517dc..de4bb2d48de 100644 --- a/src/Functions/PolygonUtils.h +++ b/src/Functions/PolygonUtils.h @@ -53,14 +53,14 @@ UInt64 getPolygonAllocatedBytes(const Polygon & polygon) using RingType = typename Polygon::ring_type; using ValueType = typename RingType::value_type; - auto sizeOfRing = [](const RingType & ring) { return sizeof(ring) + ring.capacity() * sizeof(ValueType); }; + auto size_of_ring = [](const RingType & ring) { return sizeof(ring) + ring.capacity() * sizeof(ValueType); }; - size += sizeOfRing(polygon.outer()); + size += size_of_ring(polygon.outer()); const auto & inners = polygon.inners(); size += sizeof(inners) + inners.capacity() * sizeof(RingType); for (auto & inner : inners) - size += sizeOfRing(inner); + size += size_of_ring(inner); return size; } diff --git a/src/Functions/RapidJSONParser.h b/src/Functions/RapidJSONParser.h index 0e791fe744f..2d8514868e5 100644 --- a/src/Functions/RapidJSONParser.h +++ b/src/Functions/RapidJSONParser.h @@ -23,8 +23,8 @@ struct RapidJSONParser class Element { public: - ALWAYS_INLINE Element() {} - ALWAYS_INLINE Element(const rapidjson::Value & value_) : ptr(&value_) {} + ALWAYS_INLINE Element() = default; + ALWAYS_INLINE Element(const rapidjson::Value & value_) : ptr(&value_) {} /// NOLINT ALWAYS_INLINE bool isInt64() const { return ptr->IsInt64(); } ALWAYS_INLINE bool isUInt64() const { return ptr->IsUint64(); } @@ -54,17 +54,17 @@ struct RapidJSONParser class Iterator { public: - ALWAYS_INLINE Iterator(const rapidjson::Value::ConstValueIterator & it_) : it(it_) {} - ALWAYS_INLINE Element operator*() const { return *it; } + ALWAYS_INLINE Iterator(const rapidjson::Value::ConstValueIterator & it_) : it(it_) {} /// NOLINT + ALWAYS_INLINE Element operator*() const { return *it; } /// NOLINT ALWAYS_INLINE Iterator & operator ++() { ++it; return *this; } - ALWAYS_INLINE Iterator operator ++(int) { auto res = *this; ++it; return res; } + ALWAYS_INLINE Iterator operator ++(int) { auto res = *this; ++it; return res; } /// NOLINT ALWAYS_INLINE friend bool operator ==(const Iterator & left, const Iterator & right) { return left.it == right.it; } ALWAYS_INLINE friend bool operator !=(const Iterator & left, const Iterator & right) { return !(left == right); } private: rapidjson::Value::ConstValueIterator it; }; - ALWAYS_INLINE Array(const rapidjson::Value & value_) : ptr(&value_) {} + ALWAYS_INLINE Array(const rapidjson::Value & value_) : ptr(&value_) {} /// NOLINT ALWAYS_INLINE Iterator begin() const { return ptr->Begin(); } ALWAYS_INLINE Iterator end() const { return ptr->End(); } ALWAYS_INLINE size_t size() const { return ptr->Size(); } @@ -83,17 +83,17 @@ struct RapidJSONParser class Iterator { public: - ALWAYS_INLINE Iterator(const rapidjson::Value::ConstMemberIterator & it_) : it(it_) {} + ALWAYS_INLINE Iterator(const rapidjson::Value::ConstMemberIterator & it_) : it(it_) {} /// NOLINT ALWAYS_INLINE KeyValuePair operator *() const { std::string_view key{it->name.GetString(), it->name.GetStringLength()}; return {key, it->value}; } ALWAYS_INLINE Iterator & operator ++() { ++it; return *this; } - ALWAYS_INLINE Iterator operator ++(int) { auto res = *this; ++it; return res; } + ALWAYS_INLINE Iterator operator ++(int) { auto res = *this; ++it; return res; } /// NOLINT ALWAYS_INLINE friend bool operator ==(const Iterator & left, const Iterator & right) { return left.it == right.it; } ALWAYS_INLINE friend bool operator !=(const Iterator & left, const Iterator & right) { return !(left == right); } private: rapidjson::Value::ConstMemberIterator it; }; - ALWAYS_INLINE Object(const rapidjson::Value & value_) : ptr(&value_) {} + ALWAYS_INLINE Object(const rapidjson::Value & value_) : ptr(&value_) {} /// NOLINT ALWAYS_INLINE Iterator begin() const { return ptr->MemberBegin(); } ALWAYS_INLINE Iterator end() const { return ptr->MemberEnd(); } ALWAYS_INLINE size_t size() const { return ptr->MemberCount(); } diff --git a/src/Functions/ReplaceRegexpImpl.h b/src/Functions/ReplaceRegexpImpl.h index 5d2549239c8..549edf70dff 100644 --- a/src/Functions/ReplaceRegexpImpl.h +++ b/src/Functions/ReplaceRegexpImpl.h @@ -33,8 +33,8 @@ struct ReplaceRegexpImpl /// Otherwise - paste this string verbatim. std::string literal; - Instruction(int substitution_num_) : substitution_num(substitution_num_) {} - Instruction(std::string literal_) : literal(std::move(literal_)) {} + Instruction(int substitution_num_) : substitution_num(substitution_num_) {} /// NOLINT + Instruction(std::string literal_) : literal(std::move(literal_)) {} /// NOLINT }; using Instructions = std::vector; @@ -137,8 +137,14 @@ struct ReplaceRegexpImpl if (replace_one) can_finish_current_string = true; - else if (match.length() == 0) - ++match_pos; /// Step one character to avoid infinite loop. + + if (match.length() == 0) + { + /// Step one character to avoid infinite loop + ++match_pos; + if (match_pos >= static_cast(input.length())) + can_finish_current_string = true; + } } else can_finish_current_string = true; diff --git a/src/Functions/SimdJSONParser.h b/src/Functions/SimdJSONParser.h index be85d74619b..3abeb85fb56 100644 --- a/src/Functions/SimdJSONParser.h +++ b/src/Functions/SimdJSONParser.h @@ -28,8 +28,8 @@ struct SimdJSONParser class Element { public: - ALWAYS_INLINE Element() {} - ALWAYS_INLINE Element(const simdjson::dom::element & element_) : element(element_) {} + ALWAYS_INLINE Element() {} /// NOLINT + ALWAYS_INLINE Element(const simdjson::dom::element & element_) : element(element_) {} /// NOLINT ALWAYS_INLINE bool isInt64() const { return element.type() == simdjson::dom::element_type::INT64; } ALWAYS_INLINE bool isUInt64() const { return element.type() == simdjson::dom::element_type::UINT64; } @@ -61,17 +61,17 @@ struct SimdJSONParser class Iterator { public: - ALWAYS_INLINE Iterator(const simdjson::dom::array::iterator & it_) : it(it_) {} + ALWAYS_INLINE Iterator(const simdjson::dom::array::iterator & it_) : it(it_) {} /// NOLINT ALWAYS_INLINE Element operator*() const { return *it; } ALWAYS_INLINE Iterator & operator++() { ++it; return *this; } - ALWAYS_INLINE Iterator operator++(int) { auto res = *this; ++it; return res; } + ALWAYS_INLINE Iterator operator++(int) { auto res = *this; ++it; return res; } /// NOLINT ALWAYS_INLINE friend bool operator!=(const Iterator & left, const Iterator & right) { return left.it != right.it; } ALWAYS_INLINE friend bool operator==(const Iterator & left, const Iterator & right) { return !(left != right); } private: simdjson::dom::array::iterator it; }; - ALWAYS_INLINE Array(const simdjson::dom::array & array_) : array(array_) {} + ALWAYS_INLINE Array(const simdjson::dom::array & array_) : array(array_) {} /// NOLINT ALWAYS_INLINE Iterator begin() const { return array.begin(); } ALWAYS_INLINE Iterator end() const { return array.end(); } ALWAYS_INLINE size_t size() const { return array.size(); } @@ -90,17 +90,17 @@ struct SimdJSONParser class Iterator { public: - ALWAYS_INLINE Iterator(const simdjson::dom::object::iterator & it_) : it(it_) {} + ALWAYS_INLINE Iterator(const simdjson::dom::object::iterator & it_) : it(it_) {} /// NOLINT ALWAYS_INLINE KeyValuePair operator*() const { const auto & res = *it; return {res.key, res.value}; } ALWAYS_INLINE Iterator & operator++() { ++it; return *this; } - ALWAYS_INLINE Iterator operator++(int) { auto res = *this; ++it; return res; } + ALWAYS_INLINE Iterator operator++(int) { auto res = *this; ++it; return res; } /// NOLINT ALWAYS_INLINE friend bool operator!=(const Iterator & left, const Iterator & right) { return left.it != right.it; } ALWAYS_INLINE friend bool operator==(const Iterator & left, const Iterator & right) { return !(left != right); } private: simdjson::dom::object::iterator it; }; - ALWAYS_INLINE Object(const simdjson::dom::object & object_) : object(object_) {} + ALWAYS_INLINE Object(const simdjson::dom::object & object_) : object(object_) {} /// NOLINT ALWAYS_INLINE Iterator begin() const { return object.begin(); } ALWAYS_INLINE Iterator end() const { return object.end(); } ALWAYS_INLINE size_t size() const { return object.size(); } diff --git a/src/Functions/TargetSpecific.h b/src/Functions/TargetSpecific.h index fa230a56fb7..d7fa55fbb08 100644 --- a/src/Functions/TargetSpecific.h +++ b/src/Functions/TargetSpecific.h @@ -89,6 +89,7 @@ String toString(TargetArch arch); #if ENABLE_MULTITARGET_CODE && defined(__GNUC__) && defined(__x86_64__) +/// NOLINTNEXTLINE #define USE_MULTITARGET_CODE 1 #if defined(__clang__) @@ -183,6 +184,7 @@ namespace TargetSpecific::Default { \ __VA_ARGS__ \ } +/// NOLINTNEXTLINE #define DECLARE_MULTITARGET_CODE(...) \ DECLARE_DEFAULT_CODE (__VA_ARGS__) \ DECLARE_SSE42_SPECIFIC_CODE (__VA_ARGS__) \ @@ -191,23 +193,23 @@ DECLARE_AVX2_SPECIFIC_CODE (__VA_ARGS__) \ DECLARE_AVX512F_SPECIFIC_CODE(__VA_ARGS__) DECLARE_DEFAULT_CODE( - constexpr auto BuildArch = TargetArch::Default; + constexpr auto BuildArch = TargetArch::Default; /// NOLINT ) // DECLARE_DEFAULT_CODE DECLARE_SSE42_SPECIFIC_CODE( - constexpr auto BuildArch = TargetArch::SSE42; + constexpr auto BuildArch = TargetArch::SSE42; /// NOLINT ) // DECLARE_SSE42_SPECIFIC_CODE DECLARE_AVX_SPECIFIC_CODE( - constexpr auto BuildArch = TargetArch::AVX; + constexpr auto BuildArch = TargetArch::AVX; /// NOLINT ) // DECLARE_AVX_SPECIFIC_CODE DECLARE_AVX2_SPECIFIC_CODE( - constexpr auto BuildArch = TargetArch::AVX2; + constexpr auto BuildArch = TargetArch::AVX2; /// NOLINT ) // DECLARE_AVX2_SPECIFIC_CODE DECLARE_AVX512F_SPECIFIC_CODE( - constexpr auto BuildArch = TargetArch::AVX512F; + constexpr auto BuildArch = TargetArch::AVX512F; /// NOLINT ) // DECLARE_AVX512F_SPECIFIC_CODE } diff --git a/src/Functions/TransformDateTime64.h b/src/Functions/TransformDateTime64.h index 4eab2a491c7..b05bdab65ad 100644 --- a/src/Functions/TransformDateTime64.h +++ b/src/Functions/TransformDateTime64.h @@ -44,7 +44,7 @@ public: static constexpr auto name = Transform::name; // non-explicit constructor to allow creating from scale value (or with no scale at all), indispensable in some contexts. - TransformDateTime64(UInt32 scale_ = 0) + TransformDateTime64(UInt32 scale_ = 0) /// NOLINT : scale_multiplier(DecimalUtils::scaleMultiplier(scale_)) {} diff --git a/src/Functions/URL/ExtractFirstSignificantSubdomain.h b/src/Functions/URL/ExtractFirstSignificantSubdomain.h index 4f9b1ec3c6c..70c9c25e4f3 100644 --- a/src/Functions/URL/ExtractFirstSignificantSubdomain.h +++ b/src/Functions/URL/ExtractFirstSignificantSubdomain.h @@ -49,11 +49,11 @@ struct ExtractFirstSignificantSubdomain res_data = tmp; res_size = domain_length; - auto begin = tmp; - auto end = begin + domain_length; + const auto * begin = tmp; + const auto * end = begin + domain_length; const char * last_3_periods[3]{}; - auto pos = find_first_symbols<'.'>(begin, end); + const auto * pos = find_first_symbols<'.'>(begin, end); while (pos < end) { last_3_periods[2] = last_3_periods[1]; @@ -74,7 +74,7 @@ struct ExtractFirstSignificantSubdomain if (!last_3_periods[2]) last_3_periods[2] = begin - 1; - auto end_of_level_domain = find_first_symbols<'/'>(last_3_periods[0], end); + const auto * end_of_level_domain = find_first_symbols<'/'>(last_3_periods[0], end); if (!end_of_level_domain) { end_of_level_domain = end; @@ -117,12 +117,12 @@ struct ExtractFirstSignificantSubdomain res_data = tmp; res_size = domain_length; - auto begin = tmp; - auto end = begin + domain_length; + const auto * begin = tmp; + const auto * end = begin + domain_length; const char * last_2_periods[2]{}; const char * prev = begin - 1; - auto pos = find_first_symbols<'.'>(begin, end); + const auto * pos = find_first_symbols<'.'>(begin, end); while (pos < end) { if (lookup(pos + 1, end - pos - 1)) diff --git a/src/Functions/URL/FirstSignificantSubdomainCustomImpl.h b/src/Functions/URL/FirstSignificantSubdomainCustomImpl.h index 8a76d52741b..5d78500c252 100644 --- a/src/Functions/URL/FirstSignificantSubdomainCustomImpl.h +++ b/src/Functions/URL/FirstSignificantSubdomainCustomImpl.h @@ -20,7 +20,7 @@ namespace ErrorCodes struct FirstSignificantSubdomainCustomLookup { const TLDList & tld_list; - FirstSignificantSubdomainCustomLookup(const std::string & tld_list_name) + explicit FirstSignificantSubdomainCustomLookup(const std::string & tld_list_name) : tld_list(TLDListsHolder::getInstance().getTldList(tld_list_name)) { } diff --git a/src/Functions/URL/domain.h b/src/Functions/URL/domain.h index d43be198043..18efe969216 100644 --- a/src/Functions/URL/domain.h +++ b/src/Functions/URL/domain.h @@ -8,9 +8,6 @@ namespace DB { -namespace -{ - inline StringRef checkAndReturnHost(const Pos & pos, const Pos & dot_pos, const Pos & start_of_host) { if (!dot_pos || start_of_host >= pos || pos - dot_pos == 1) @@ -23,8 +20,6 @@ inline StringRef checkAndReturnHost(const Pos & pos, const Pos & dot_pos, const return StringRef(start_of_host, pos - start_of_host); } -} - /// Extracts host from given url. /// /// @return empty StringRef if the host is not valid (i.e. it does not have dot, or there no symbol after dot). @@ -79,7 +74,7 @@ exloop: if ((scheme_end - pos) > 2 && *pos == ':' && *(pos + 1) == '/' && *(pos } Pos dot_pos = nullptr; - auto start_of_host = pos; + const auto * start_of_host = pos; for (; pos < end; ++pos) { switch (*pos) diff --git a/src/Functions/VectorExtension.h b/src/Functions/VectorExtension.h index cb4347e3031..fbcbae6b0b6 100644 --- a/src/Functions/VectorExtension.h +++ b/src/Functions/VectorExtension.h @@ -6,27 +6,27 @@ namespace DB::VectorExtension { -typedef UInt64 UInt64x2 __attribute__ ((vector_size (sizeof(UInt64) * 2))); -typedef UInt64 UInt64x4 __attribute__ ((vector_size (sizeof(UInt64) * 4))); -typedef UInt64 UInt64x8 __attribute__ ((vector_size (sizeof(UInt64) * 8))); +using UInt64x2 = UInt64 __attribute__ ((vector_size (sizeof(UInt64) * 2))); +using UInt64x4 = UInt64 __attribute__ ((vector_size (sizeof(UInt64) * 4))); +using UInt64x8 = UInt64 __attribute__ ((vector_size (sizeof(UInt64) * 8))); -typedef UInt32 UInt32x2 __attribute__ ((vector_size (sizeof(UInt32) * 2))); -typedef UInt32 UInt32x4 __attribute__ ((vector_size (sizeof(UInt32) * 4))); -typedef UInt32 UInt32x8 __attribute__ ((vector_size (sizeof(UInt32) * 8))); -typedef UInt32 UInt32x16 __attribute__ ((vector_size (sizeof(UInt32) * 16))); +using UInt32x2 = UInt32 __attribute__ ((vector_size (sizeof(UInt32) * 2))); +using UInt32x4 = UInt32 __attribute__ ((vector_size (sizeof(UInt32) * 4))); +using UInt32x8 = UInt32 __attribute__ ((vector_size (sizeof(UInt32) * 8))); +using UInt32x16 = UInt32 __attribute__ ((vector_size (sizeof(UInt32) * 16))); -typedef UInt16 UInt16x2 __attribute__ ((vector_size (sizeof(UInt16) * 2))); -typedef UInt16 UInt16x4 __attribute__ ((vector_size (sizeof(UInt16) * 4))); -typedef UInt16 UInt16x8 __attribute__ ((vector_size (sizeof(UInt16) * 8))); -typedef UInt16 UInt16x16 __attribute__ ((vector_size (sizeof(UInt16) * 16))); -typedef UInt16 UInt16x32 __attribute__ ((vector_size (sizeof(UInt16) * 32))); +using UInt16x2 = UInt16 __attribute__ ((vector_size (sizeof(UInt16) * 2))); +using UInt16x4 = UInt16 __attribute__ ((vector_size (sizeof(UInt16) * 4))); +using UInt16x8 = UInt16 __attribute__ ((vector_size (sizeof(UInt16) * 8))); +using UInt16x16 = UInt16 __attribute__ ((vector_size (sizeof(UInt16) * 16))); +using UInt16x32 = UInt16 __attribute__ ((vector_size (sizeof(UInt16) * 32))); -typedef UInt8 UInt8x2 __attribute__ ((vector_size (sizeof(UInt8) * 2))); -typedef UInt8 UInt8x4 __attribute__ ((vector_size (sizeof(UInt8) * 4))); -typedef UInt8 UInt8x8 __attribute__ ((vector_size (sizeof(UInt8) * 8))); -typedef UInt8 UInt8x16 __attribute__ ((vector_size (sizeof(UInt8) * 16))); -typedef UInt8 UInt8x32 __attribute__ ((vector_size (sizeof(UInt8) * 32))); -typedef UInt8 UInt8x64 __attribute__ ((vector_size (sizeof(UInt8) * 64))); +using UInt8x2 = UInt8 __attribute__ ((vector_size (sizeof(UInt8) * 2))); +using UInt8x4 = UInt8 __attribute__ ((vector_size (sizeof(UInt8) * 4))); +using UInt8x8 = UInt8 __attribute__ ((vector_size (sizeof(UInt8) * 8))); +using UInt8x16 = UInt8 __attribute__ ((vector_size (sizeof(UInt8) * 16))); +using UInt8x32 = UInt8 __attribute__ ((vector_size (sizeof(UInt8) * 32))); +using UInt8x64 = UInt8 __attribute__ ((vector_size (sizeof(UInt8) * 64))); namespace detail { diff --git a/src/Functions/array/FunctionArrayMapped.h b/src/Functions/array/FunctionArrayMapped.h index 029e33db0cf..58e6db86f75 100644 --- a/src/Functions/array/FunctionArrayMapped.h +++ b/src/Functions/array/FunctionArrayMapped.h @@ -1,18 +1,29 @@ #pragma once +#include + +#include +#include +#include +#include +#include + +#include +#include +#include + #include #include #include -#include -#include -#include -#include -#include -#include +#include + #include -#include +#include + #include +#include + namespace DB { @@ -21,11 +32,38 @@ namespace ErrorCodes { extern const int ILLEGAL_COLUMN; extern const int ILLEGAL_TYPE_OF_ARGUMENT; + extern const int LOGICAL_ERROR; extern const int SIZES_OF_ARRAYS_DOESNT_MATCH; extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; } +template +ColumnPtr getOffsetsPtr(const T & column) +{ + if constexpr (std::is_same_v) + { + return column.getOffsetsPtr(); + } + else // ColumnMap + { + return column.getNestedColumn().getOffsetsPtr(); + } +} + +template +const IColumn::Offsets & getOffsets(const T & column) +{ + if constexpr (std::is_same_v) + { + return column.getOffsets(); + } + else // ColumnMap + { + return column.getNestedColumn().getOffsets(); + } +} + /** Higher-order functions for arrays. * These functions optionally apply a map (transform) to array (or multiple arrays of identical size) by lambda function, * and return some result based on that transformation. @@ -60,29 +98,42 @@ public: void getLambdaArgumentTypes(DataTypes & arguments) const override { if (arguments.empty()) - throw Exception("Function " + getName() + " needs at least one argument; passed " - + toString(arguments.size()) + ".", - ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); + throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, + "Function {} needs at least one argument, passed {}", getName(), arguments.size()); if (arguments.size() == 1) - throw Exception("Function " + getName() + " needs at least one array argument.", - ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); + throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, + "Function {} needs at least one argument with data", getName()); - DataTypes nested_types(arguments.size() - 1); - for (size_t i = 0; i < nested_types.size(); ++i) + if (arguments.size() > 2 && Impl::needOneArray()) + throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, + "Function {} needs one argument with data", getName()); + + size_t nested_types_count = std::is_same_v ? (arguments.size() - 1) * 2 : (arguments.size() - 1); + DataTypes nested_types(nested_types_count); + for (size_t i = 0; i < arguments.size() - 1; ++i) { - const DataTypeArray * array_type = checkAndGetDataType(&*arguments[i + 1]); + const auto * array_type = checkAndGetDataType(&*arguments[i + 1]); if (!array_type) throw Exception("Argument " + toString(i + 2) + " of function " + getName() + " must be array. Found " + arguments[i + 1]->getName() + " instead.", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); - nested_types[i] = recursiveRemoveLowCardinality(array_type->getNestedType()); + if constexpr (std::is_same_v) + { + nested_types[2 * i] = recursiveRemoveLowCardinality(array_type->getKeyType()); + nested_types[2 * i + 1] = recursiveRemoveLowCardinality(array_type->getValueType()); + } + else if constexpr (std::is_same_v) + { + nested_types[i] = recursiveRemoveLowCardinality(array_type->getNestedType()); + } } const DataTypeFunction * function_type = checkAndGetDataType(arguments[0].get()); if (!function_type || function_type->getArgumentTypes().size() != nested_types.size()) - throw Exception("First argument for this overload of " + getName() + " must be a function with " - + toString(nested_types.size()) + " arguments. Found " - + arguments[0]->getName() + " instead.", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); + throw Exception( + ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, + "First argument for this overload of {} must be a function with {} arguments, found {} instead", + getName(), nested_types.size(), arguments[0]->getName()); arguments[0] = std::make_shared(nested_types); } @@ -91,37 +142,39 @@ public: { size_t min_args = Impl::needExpression() ? 2 : 1; if (arguments.size() < min_args) - throw Exception("Function " + getName() + " needs at least " - + toString(min_args) + " argument; passed " - + toString(arguments.size()) + ".", - ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); + throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, + "Function {} needs at least {} argument, passed {}", + getName(), min_args, arguments.size()); - if (arguments.size() == 1) + if ((arguments.size() == 1) && std::is_same_v) { - const auto * array_type = checkAndGetDataType(arguments[0].type.get()); + const auto * data_type = checkAndGetDataType(arguments[0].type.get()); - if (!array_type) + if (!data_type) throw Exception("The only argument for function " + getName() + " must be array. Found " - + arguments[0].type->getName() + " instead.", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); + + arguments[0].type->getName() + " instead", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); - DataTypePtr nested_type = array_type->getNestedType(); + DataTypePtr nested_type = data_type->getNestedType(); if (Impl::needBoolean() && !WhichDataType(nested_type).isUInt8()) throw Exception("The only argument for function " + getName() + " must be array of UInt8. Found " - + arguments[0].type->getName() + " instead.", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); + + arguments[0].type->getName() + " instead", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); - return Impl::getReturnType(nested_type, nested_type); + if constexpr (std::is_same_v) + return Impl::getReturnType(nested_type, nested_type); + else + throw DB::Exception(ErrorCodes::LOGICAL_ERROR, "Unreachable code reached"); } else { if (arguments.size() > 2 && Impl::needOneArray()) - throw Exception("Function " + getName() + " needs one array argument.", + throw Exception("Function " + getName() + " needs one argument with data", ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); const auto * data_type_function = checkAndGetDataType(arguments[0].type.get()); if (!data_type_function) - throw Exception("First argument for function " + getName() + " must be a function.", + throw Exception("First argument for function " + getName() + " must be a function", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); /// The types of the remaining arguments are already checked in getLambdaArgumentTypes. @@ -131,9 +184,28 @@ public: throw Exception("Expression for function " + getName() + " must return UInt8, found " + return_type->getName(), ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); - const auto * first_array_type = checkAndGetDataType(arguments[1].type.get()); + static_assert( + std::is_same_v || + std::is_same_v, + "unsupported type"); - return Impl::getReturnType(return_type, first_array_type->getNestedType()); + if (arguments.size() < 2) + { + throw DB::Exception(ErrorCodes::LOGICAL_ERROR, "{}", arguments.size()); + } + + const auto * first_array_type = checkAndGetDataType(arguments[1].type.get()); + + if (!first_array_type) + throw DB::Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Unsupported type {}", arguments[1].type->getName()); + + if constexpr (std::is_same_v) + return Impl::getReturnType(return_type, first_array_type->getNestedType()); + + if constexpr (std::is_same_v) + return Impl::getReturnType(return_type, first_array_type->getKeyValueTypes()); + + throw DB::Exception(ErrorCodes::LOGICAL_ERROR, "Unreachable code reached"); } } @@ -142,18 +214,25 @@ public: if (arguments.size() == 1) { ColumnPtr column_array_ptr = arguments[0].column; - const auto * column_array = checkAndGetColumn(column_array_ptr.get()); + const auto * column_array = checkAndGetColumn(column_array_ptr.get()); if (!column_array) { - const ColumnConst * column_const_array = checkAndGetColumnConst(column_array_ptr.get()); + const ColumnConst * column_const_array = checkAndGetColumnConst(column_array_ptr.get()); if (!column_const_array) throw Exception("Expected array column, found " + column_array_ptr->getName(), ErrorCodes::ILLEGAL_COLUMN); column_array_ptr = column_const_array->convertToFullColumn(); - column_array = assert_cast(column_array_ptr.get()); + column_array = assert_cast(column_array_ptr.get()); } - return Impl::execute(*column_array, column_array->getDataPtr()); + if constexpr (std::is_same_v) + { + return Impl::execute(*column_array, column_array->getNestedColumn().getDataPtr()); + } + else + { + return Impl::execute(*column_array, column_array->getDataPtr()); + } } else { @@ -172,7 +251,7 @@ public: ColumnPtr offsets_column; ColumnPtr column_first_array_ptr; - const ColumnArray * column_first_array = nullptr; + const typename Impl::column_type * column_first_array = nullptr; ColumnsWithTypeAndName arrays; arrays.reserve(arguments.size() - 1); @@ -182,18 +261,18 @@ public: const auto & array_with_type_and_name = arguments[i]; ColumnPtr column_array_ptr = array_with_type_and_name.column; - const auto * column_array = checkAndGetColumn(column_array_ptr.get()); + const auto * column_array = checkAndGetColumn(column_array_ptr.get()); const DataTypePtr & array_type_ptr = array_with_type_and_name.type; - const auto * array_type = checkAndGetDataType(array_type_ptr.get()); + const auto * array_type = checkAndGetDataType(array_type_ptr.get()); if (!column_array) { - const ColumnConst * column_const_array = checkAndGetColumnConst(column_array_ptr.get()); + const ColumnConst * column_const_array = checkAndGetColumnConst(column_array_ptr.get()); if (!column_const_array) throw Exception("Expected array column, found " + column_array_ptr->getName(), ErrorCodes::ILLEGAL_COLUMN); column_array_ptr = recursiveRemoveLowCardinality(column_const_array->convertToFullColumn()); - column_array = checkAndGetColumn(column_array_ptr.get()); + column_array = checkAndGetColumn(column_array_ptr.get()); } if (!array_type) @@ -201,13 +280,13 @@ public: if (!offsets_column) { - offsets_column = column_array->getOffsetsPtr(); + offsets_column = getOffsetsPtr(*column_array); } else { /// The first condition is optimization: do not compare data if the pointers are equal. - if (column_array->getOffsetsPtr() != offsets_column - && column_array->getOffsets() != typeid_cast(*offsets_column).getData()) + if (getOffsetsPtr(*column_array) != offsets_column + && getOffsets(*column_array) != typeid_cast(*offsets_column).getData()) throw Exception("Arrays passed to " + getName() + " must have equal size", ErrorCodes::SIZES_OF_ARRAYS_DOESNT_MATCH); } @@ -217,13 +296,23 @@ public: column_first_array = column_array; } - arrays.emplace_back(ColumnWithTypeAndName(column_array->getDataPtr(), - recursiveRemoveLowCardinality(array_type->getNestedType()), - array_with_type_and_name.name)); + if constexpr (std::is_same_v) + { + arrays.emplace_back(ColumnWithTypeAndName( + column_array->getNestedData().getColumnPtr(0), recursiveRemoveLowCardinality(array_type->getKeyType()), array_with_type_and_name.name+".key")); + arrays.emplace_back(ColumnWithTypeAndName( + column_array->getNestedData().getColumnPtr(1), recursiveRemoveLowCardinality(array_type->getValueType()), array_with_type_and_name.name+".value")); + } + else + { + arrays.emplace_back(ColumnWithTypeAndName(column_array->getDataPtr(), + recursiveRemoveLowCardinality(array_type->getNestedType()), + array_with_type_and_name.name)); + } } /// Put all the necessary columns multiplied by the sizes of arrays into the columns. - auto replicated_column_function_ptr = IColumn::mutate(column_function->replicate(column_first_array->getOffsets())); + auto replicated_column_function_ptr = IColumn::mutate(column_function->replicate(getOffsets(*column_first_array))); auto * replicated_column_function = typeid_cast(replicated_column_function_ptr.get()); replicated_column_function->appendArguments(arrays); diff --git a/src/Functions/array/arrayAggregation.cpp b/src/Functions/array/arrayAggregation.cpp index ee08c4f7f37..97a2f9c4c17 100644 --- a/src/Functions/array/arrayAggregation.cpp +++ b/src/Functions/array/arrayAggregation.cpp @@ -1,12 +1,18 @@ -#include -#include -#include -#include -#include -#include "FunctionArrayMapped.h" -#include #include +#include +#include +#include + +#include +#include +#include +#include + +#include + +#include "FunctionArrayMapped.h" + namespace DB { @@ -83,6 +89,9 @@ using ArrayAggregateResult = typename ArrayAggregateResultImpl struct ArrayAggregateImpl { + using column_type = ColumnArray; + using data_type = DataTypeArray; + static bool needBoolean() { return false; } static bool needExpression() { return false; } static bool needOneArray() { return false; } diff --git a/src/Functions/array/arrayAll.cpp b/src/Functions/array/arrayAll.cpp index 34deafdffdf..0f7ae797dc9 100644 --- a/src/Functions/array/arrayAll.cpp +++ b/src/Functions/array/arrayAll.cpp @@ -1,8 +1,8 @@ -#include #include -#include "FunctionArrayMapped.h" +#include #include +#include "FunctionArrayMapped.h" namespace DB { @@ -16,6 +16,9 @@ namespace ErrorCodes */ struct ArrayAllImpl { + using column_type = ColumnArray; + using data_type = DataTypeArray; + static bool needBoolean() { return true; } static bool needExpression() { return false; } static bool needOneArray() { return false; } diff --git a/src/Functions/array/arrayCompact.cpp b/src/Functions/array/arrayCompact.cpp index c2908e37e12..8abce7288d2 100644 --- a/src/Functions/array/arrayCompact.cpp +++ b/src/Functions/array/arrayCompact.cpp @@ -1,10 +1,13 @@ -#include -#include -#include #include +#include + #include -#include + +#include +#include + #include +#include namespace DB @@ -16,13 +19,16 @@ namespace ErrorCodes struct ArrayCompactImpl { + using column_type = ColumnArray; + using data_type = DataTypeArray; + static bool needBoolean() { return false; } static bool needExpression() { return false; } static bool needOneArray() { return false; } - static DataTypePtr getReturnType(const DataTypePtr & nested_type, const DataTypePtr &) + static DataTypePtr getReturnType(const DataTypePtr & , const DataTypePtr & array_element) { - return std::make_shared(nested_type); + return std::make_shared(array_element); } template @@ -30,14 +36,16 @@ struct ArrayCompactImpl { using ColVecType = ColumnVectorOrDecimal; - const ColVecType * src_values_column = checkAndGetColumn(mapped.get()); + const ColVecType * check_values_column = checkAndGetColumn(mapped.get()); + const ColVecType * src_values_column = checkAndGetColumn(array.getData()); - if (!src_values_column) + if (!src_values_column || !check_values_column) return false; const IColumn::Offsets & src_offsets = array.getOffsets(); - const typename ColVecType::Container & src_values = src_values_column->getData(); + const auto & src_values = src_values_column->getData(); + const auto & check_values = check_values_column->getData(); typename ColVecType::MutablePtr res_values_column; if constexpr (is_decimal) res_values_column = ColVecType::create(src_values.size(), src_values_column->getScale()); @@ -45,6 +53,7 @@ struct ArrayCompactImpl res_values_column = ColVecType::create(src_values.size()); typename ColVecType::Container & res_values = res_values_column->getData(); + size_t src_offsets_size = src_offsets.size(); auto res_offsets_column = ColumnArray::ColumnOffsets::create(src_offsets_size); IColumn::Offsets & res_offsets = res_offsets_column->getData(); @@ -67,7 +76,7 @@ struct ArrayCompactImpl ++res_pos; for (; src_pos < src_offset; ++src_pos) { - if (!bitEquals(src_values[src_pos], src_values[src_pos - 1])) + if (!bitEquals(check_values[src_pos], check_values[src_pos - 1])) { res_values[res_pos] = src_values[src_pos]; ++res_pos; @@ -86,8 +95,9 @@ struct ArrayCompactImpl { const IColumn::Offsets & src_offsets = array.getOffsets(); - auto res_values_column = mapped->cloneEmpty(); - res_values_column->reserve(mapped->size()); + const auto & src_values = array.getData(); + auto res_values_column = src_values.cloneEmpty(); + res_values_column->reserve(src_values.size()); size_t src_offsets_size = src_offsets.size(); auto res_offsets_column = ColumnArray::ColumnOffsets::create(src_offsets_size); @@ -104,7 +114,7 @@ struct ArrayCompactImpl if (src_pos < src_offset) { /// Insert first element unconditionally. - res_values_column->insertFrom(*mapped, src_pos); + res_values_column->insertFrom(src_values, src_pos); /// For the rest of elements, insert if the element is different from the previous. ++src_pos; @@ -113,7 +123,7 @@ struct ArrayCompactImpl { if (mapped->compareAt(src_pos - 1, src_pos, *mapped, 1)) { - res_values_column->insertFrom(*mapped, src_pos); + res_values_column->insertFrom(src_values, src_pos); ++res_pos; } } diff --git a/src/Functions/array/arrayCount.cpp b/src/Functions/array/arrayCount.cpp index 377a6eb8fb1..df45783323b 100644 --- a/src/Functions/array/arrayCount.cpp +++ b/src/Functions/array/arrayCount.cpp @@ -1,8 +1,9 @@ -#include #include -#include "FunctionArrayMapped.h" +#include #include +#include "FunctionArrayMapped.h" + namespace DB { @@ -16,6 +17,9 @@ namespace ErrorCodes */ struct ArrayCountImpl { + using column_type = ColumnArray; + using data_type = DataTypeArray; + static bool needBoolean() { return true; } static bool needExpression() { return false; } static bool needOneArray() { return false; } diff --git a/src/Functions/array/arrayCumSum.cpp b/src/Functions/array/arrayCumSum.cpp index 467d9ad3951..98ffa09820b 100644 --- a/src/Functions/array/arrayCumSum.cpp +++ b/src/Functions/array/arrayCumSum.cpp @@ -1,10 +1,11 @@ -#include -#include -#include #include -#include "FunctionArrayMapped.h" +#include +#include +#include #include +#include "FunctionArrayMapped.h" + namespace DB { @@ -17,6 +18,9 @@ namespace ErrorCodes struct ArrayCumSumImpl { + using column_type = ColumnArray; + using data_type = DataTypeArray; + static bool needBoolean() { return false; } static bool needExpression() { return false; } static bool needOneArray() { return false; } diff --git a/src/Functions/array/arrayCumSumNonNegative.cpp b/src/Functions/array/arrayCumSumNonNegative.cpp index 476bbd08163..cd8393b7a5f 100644 --- a/src/Functions/array/arrayCumSumNonNegative.cpp +++ b/src/Functions/array/arrayCumSumNonNegative.cpp @@ -1,10 +1,10 @@ -#include -#include -#include #include -#include "FunctionArrayMapped.h" +#include +#include +#include #include +#include "FunctionArrayMapped.h" namespace DB { @@ -19,6 +19,9 @@ namespace ErrorCodes */ struct ArrayCumSumNonNegativeImpl { + using column_type = ColumnArray; + using data_type = DataTypeArray; + static bool needBoolean() { return false; } static bool needExpression() { return false; } static bool needOneArray() { return false; } diff --git a/src/Functions/array/arrayDifference.cpp b/src/Functions/array/arrayDifference.cpp index c5fdf27100b..8af0e8b04f9 100644 --- a/src/Functions/array/arrayDifference.cpp +++ b/src/Functions/array/arrayDifference.cpp @@ -1,10 +1,11 @@ -#include -#include -#include #include -#include "FunctionArrayMapped.h" +#include +#include +#include #include +#include "FunctionArrayMapped.h" + namespace DB { @@ -20,6 +21,9 @@ namespace ErrorCodes */ struct ArrayDifferenceImpl { + using column_type = ColumnArray; + using data_type = DataTypeArray; + static bool needBoolean() { return false; } static bool needExpression() { return false; } static bool needOneArray() { return false; } diff --git a/src/Functions/array/arrayEnumerateRanked.h b/src/Functions/array/arrayEnumerateRanked.h index 4d03c52460f..d6a62a966ae 100644 --- a/src/Functions/array/arrayEnumerateRanked.h +++ b/src/Functions/array/arrayEnumerateRanked.h @@ -252,7 +252,7 @@ ColumnPtr FunctionArrayEnumerateRankedExtended::executeImpl( ColumnPtr result_nested_array = std::move(res_nested); for (ssize_t depth = arrays_depths.max_array_depth - 1; depth >= 0; --depth) - result_nested_array = ColumnArray::create(std::move(result_nested_array), offsetsptr_by_depth[depth]); + result_nested_array = ColumnArray::create(result_nested_array, offsetsptr_by_depth[depth]); return result_nested_array; } diff --git a/src/Functions/array/arrayExists.cpp b/src/Functions/array/arrayExists.cpp index 34ea71af259..ea39cc0dc0b 100644 --- a/src/Functions/array/arrayExists.cpp +++ b/src/Functions/array/arrayExists.cpp @@ -1,8 +1,9 @@ -#include #include -#include "FunctionArrayMapped.h" +#include #include +#include "FunctionArrayMapped.h" + namespace DB { @@ -16,6 +17,9 @@ namespace ErrorCodes */ struct ArrayExistsImpl { + using column_type = ColumnArray; + using data_type = DataTypeArray; + static bool needBoolean() { return true; } static bool needExpression() { return false; } static bool needOneArray() { return false; } diff --git a/src/Functions/array/arrayFill.cpp b/src/Functions/array/arrayFill.cpp index d4b36a89ba5..22b9e9a657b 100644 --- a/src/Functions/array/arrayFill.cpp +++ b/src/Functions/array/arrayFill.cpp @@ -1,8 +1,9 @@ -#include #include -#include "FunctionArrayMapped.h" +#include #include +#include "FunctionArrayMapped.h" + namespace DB { @@ -19,6 +20,9 @@ namespace ErrorCodes template struct ArrayFillImpl { + using column_type = ColumnArray; + using data_type = DataTypeArray; + static bool needBoolean() { return true; } static bool needExpression() { return true; } static bool needOneArray() { return false; } diff --git a/src/Functions/array/arrayFilter.cpp b/src/Functions/array/arrayFilter.cpp index 1291989f9a2..89a9de44532 100644 --- a/src/Functions/array/arrayFilter.cpp +++ b/src/Functions/array/arrayFilter.cpp @@ -1,8 +1,9 @@ -#include #include -#include "FunctionArrayMapped.h" +#include #include +#include "FunctionArrayMapped.h" + namespace DB { @@ -15,6 +16,9 @@ namespace ErrorCodes */ struct ArrayFilterImpl { + using column_type = ColumnArray; + using data_type = DataTypeArray; + static bool needBoolean() { return true; } static bool needExpression() { return true; } static bool needOneArray() { return false; } diff --git a/src/Functions/array/arrayFirst.cpp b/src/Functions/array/arrayFirst.cpp index edbf7ef6269..693aea746f5 100644 --- a/src/Functions/array/arrayFirst.cpp +++ b/src/Functions/array/arrayFirst.cpp @@ -1,8 +1,9 @@ -#include #include -#include "FunctionArrayMapped.h" +#include #include +#include "FunctionArrayMapped.h" + namespace DB { @@ -20,6 +21,9 @@ enum class ArrayFirstLastStrategy template struct ArrayFirstLastImpl { + using column_type = ColumnArray; + using data_type = DataTypeArray; + static bool needBoolean() { return false; } static bool needExpression() { return true; } static bool needOneArray() { return false; } diff --git a/src/Functions/array/arrayFirstLastIndex.cpp b/src/Functions/array/arrayFirstLastIndex.cpp index 467678f3faa..9392cbdc840 100644 --- a/src/Functions/array/arrayFirstLastIndex.cpp +++ b/src/Functions/array/arrayFirstLastIndex.cpp @@ -1,8 +1,9 @@ #include #include -#include "FunctionArrayMapped.h" #include +#include "FunctionArrayMapped.h" + namespace DB { @@ -20,6 +21,9 @@ enum class ArrayFirstLastIndexStrategy template struct ArrayFirstLastIndexImpl { + using column_type = ColumnArray; + using data_type = DataTypeArray; + static bool needBoolean() { return false; } static bool needExpression() { return true; } static bool needOneArray() { return false; } diff --git a/src/Functions/array/arrayIndex.h b/src/Functions/array/arrayIndex.h index c231ddbb373..35c731dfc78 100644 --- a/src/Functions/array/arrayIndex.h +++ b/src/Functions/array/arrayIndex.h @@ -432,7 +432,7 @@ public: const auto & map_array_column = map_column.getNestedColumn(); auto offsets = map_array_column.getOffsetsPtr(); auto keys = map_column.getNestedData().getColumnPtr(0); - auto array_column = ColumnArray::create(std::move(keys), std::move(offsets)); + auto array_column = ColumnArray::create(keys, offsets); const auto & type_map = assert_cast(*arguments[0].type); auto array_type = std::make_shared(type_map.getKeyType()); diff --git a/src/Functions/array/arrayIntersect.cpp b/src/Functions/array/arrayIntersect.cpp index f2779a2fe58..f1b849b64f0 100644 --- a/src/Functions/array/arrayIntersect.cpp +++ b/src/Functions/array/arrayIntersect.cpp @@ -477,7 +477,7 @@ ColumnPtr FunctionArrayIntersect::execute(const UnpackedArrays & arrays, Mutable columns.reserve(args); for (const auto & arg : arrays.args) { - if constexpr (std::is_same::value) + if constexpr (std::is_same_v) columns.push_back(arg.nested_column); else columns.push_back(checkAndGetColumn(arg.nested_column)); @@ -530,7 +530,7 @@ ColumnPtr FunctionArrayIntersect::execute(const UnpackedArrays & arrays, Mutable { value = &map[columns[arg_num]->getElement(i)]; } - else if constexpr (std::is_same::value || std::is_same::value) + else if constexpr (std::is_same_v || std::is_same_v) value = &map[columns[arg_num]->getDataAt(i)]; else { @@ -566,7 +566,7 @@ ColumnPtr FunctionArrayIntersect::execute(const UnpackedArrays & arrays, Mutable ++result_offset; if constexpr (is_numeric_column) result_data.insertValue(pair.getKey()); - else if constexpr (std::is_same::value || std::is_same::value) + else if constexpr (std::is_same_v || std::is_same_v) result_data.insertData(pair.getKey().data, pair.getKey().size); else result_data.deserializeAndInsertFromArena(pair.getKey().data); diff --git a/src/Functions/array/arrayMap.cpp b/src/Functions/array/arrayMap.cpp index e3afaf7fb66..ec1973d573b 100644 --- a/src/Functions/array/arrayMap.cpp +++ b/src/Functions/array/arrayMap.cpp @@ -1,14 +1,18 @@ -#include "FunctionArrayMapped.h" #include +#include "FunctionArrayMapped.h" + namespace DB { -/** arrayMap(x1,...,xn -> expression, array1,...,arrayn) - apply the expression to each element of the array (or set of parallel arrays). +/** arrayMap(x1, ..., xn -> expression, array1, ..., arrayn) - apply the expression to each element of the array (or set of parallel arrays). */ struct ArrayMapImpl { + using column_type = ColumnArray; + using data_type = DataTypeArray; + /// true if the expression (for an overload of f(expression, arrays)) or an array (for f(array)) should be boolean. static bool needBoolean() { return false; } /// true if the f(array) overload is unavailable. diff --git a/src/Functions/array/arrayScalarProduct.h b/src/Functions/array/arrayScalarProduct.h index 87161038d4c..4e3eab2faf8 100644 --- a/src/Functions/array/arrayScalarProduct.h +++ b/src/Functions/array/arrayScalarProduct.h @@ -5,6 +5,7 @@ #include #include #include +#include namespace DB diff --git a/src/Functions/array/arraySort.cpp b/src/Functions/array/arraySort.cpp index 476dfb46f07..5421185211e 100644 --- a/src/Functions/array/arraySort.cpp +++ b/src/Functions/array/arraySort.cpp @@ -1,8 +1,8 @@ #include "FunctionArrayMapped.h" + #include #include - namespace DB { @@ -11,6 +11,9 @@ namespace DB template struct ArraySortImpl { + using column_type = ColumnArray; + using data_type = DataTypeArray; + static bool needBoolean() { return false; } static bool needExpression() { return false; } static bool needOneArray() { return false; } diff --git a/src/Functions/array/arraySplit.cpp b/src/Functions/array/arraySplit.cpp index 2e5f2d8432e..c818be97f60 100644 --- a/src/Functions/array/arraySplit.cpp +++ b/src/Functions/array/arraySplit.cpp @@ -1,8 +1,9 @@ -#include #include -#include "FunctionArrayMapped.h" +#include #include +#include "FunctionArrayMapped.h" + namespace DB { @@ -14,6 +15,9 @@ namespace ErrorCodes template struct ArraySplitImpl { + using column_type = ColumnArray; + using data_type = DataTypeArray; + static bool needBoolean() { return true; } static bool needExpression() { return true; } static bool needOneArray() { return false; } diff --git a/src/Functions/array/hasAllAny.h b/src/Functions/array/hasAllAny.h index cd55fea3521..3ba8bb6156f 100644 --- a/src/Functions/array/hasAllAny.h +++ b/src/Functions/array/hasAllAny.h @@ -44,7 +44,7 @@ public: { for (auto i : collections::range(0, arguments.size())) { - auto array_type = typeid_cast(arguments[i].get()); + const auto * array_type = typeid_cast(arguments[i].get()); if (!array_type) throw Exception("Argument " + std::to_string(i) + " for function " + getName() + " must be an array but it has type " + arguments[i]->getName() + ".", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); diff --git a/src/Functions/array/mapOp.cpp b/src/Functions/array/mapOp.cpp index b928254e454..f743cfb5b5d 100644 --- a/src/Functions/array/mapOp.cpp +++ b/src/Functions/array/mapOp.cpp @@ -225,7 +225,7 @@ private: for (size_t j = 0; j < len; ++j) { KeyType key; - if constexpr (std::is_same::value) + if constexpr (std::is_same_v) { if (const auto * col_fixed = checkAndGetColumn(arg.key_column.get())) key = col_fixed->getDataAt(offset + j).toString(); diff --git a/src/Functions/array/mapPopulateSeries.cpp b/src/Functions/array/mapPopulateSeries.cpp index 17269f8dfe1..8b4a1dda197 100644 --- a/src/Functions/array/mapPopulateSeries.cpp +++ b/src/Functions/array/mapPopulateSeries.cpp @@ -379,8 +379,7 @@ private: if (!max_key_column_type->equals(*input.key_series_type)) { ColumnWithTypeAndName column_to_cast = {max_key_column, max_key_column_type, ""}; - auto casted_column = castColumnAccurate(std::move(column_to_cast), input.key_series_type); - max_key_column = std::move(casted_column); + max_key_column = castColumnAccurate(column_to_cast, input.key_series_type); } } diff --git a/src/Functions/castOrDefault.cpp b/src/Functions/castOrDefault.cpp index 95046d95176..628ac57f34d 100644 --- a/src/Functions/castOrDefault.cpp +++ b/src/Functions/castOrDefault.cpp @@ -99,7 +99,7 @@ public: { const ColumnWithTypeAndName & column_to_cast = arguments[0]; auto non_const_column_to_cast = column_to_cast.column->convertToFullColumnIfConst(); - ColumnWithTypeAndName column_to_cast_non_const { std::move(non_const_column_to_cast), column_to_cast.type, column_to_cast.name }; + ColumnWithTypeAndName column_to_cast_non_const { non_const_column_to_cast, column_to_cast.type, column_to_cast.name }; auto cast_result = castColumnAccurateOrNull(column_to_cast_non_const, return_type); diff --git a/src/Functions/extractAllGroups.h b/src/Functions/extractAllGroups.h index fa75e305af4..057dedab6e4 100644 --- a/src/Functions/extractAllGroups.h +++ b/src/Functions/extractAllGroups.h @@ -55,7 +55,7 @@ public: static constexpr auto Kind = Impl::Kind; static constexpr auto name = Impl::Name; - FunctionExtractAllGroups(ContextPtr context_) + explicit FunctionExtractAllGroups(ContextPtr context_) : context(context_) {} diff --git a/src/Functions/formatReadable.h b/src/Functions/formatReadable.h index 7c0d6c5c817..0378e1f82f2 100644 --- a/src/Functions/formatReadable.h +++ b/src/Functions/formatReadable.h @@ -7,6 +7,7 @@ #include #include #include +#include namespace DB diff --git a/src/Functions/greatCircleDistance.cpp b/src/Functions/greatCircleDistance.cpp index 1f7be1a6374..f0743486584 100644 --- a/src/Functions/greatCircleDistance.cpp +++ b/src/Functions/greatCircleDistance.cpp @@ -179,7 +179,7 @@ float distance(float lon1deg, float lat1deg, float lon2deg, float lat2deg) /// Why comparing only difference in longitude? /// If longitudes are different enough, there is a big difference between great circle line and a line with constant latitude. - /// (Remember how a plane flies from Moscow to New York) + /// (Remember how a plane flies from Amsterdam to New York) /// But if longitude is close but latitude is different enough, there is no difference between meridian and great circle line. float latitude_midpoint = (lat1deg + lat2deg + 180) * METRIC_LUT_SIZE / 360; // [-90, 90] degrees -> [0, METRIC_LUT_SIZE] indexes @@ -326,4 +326,3 @@ void registerFunctionGeoDistance(FunctionFactory & factory) } } - diff --git a/src/Functions/if.cpp b/src/Functions/if.cpp index 6841098ebcf..0b30f404f8e 100644 --- a/src/Functions/if.cpp +++ b/src/Functions/if.cpp @@ -1027,7 +1027,7 @@ public: ColumnPtr executeImpl(const ColumnsWithTypeAndName & args, const DataTypePtr & result_type, size_t input_rows_count) const override { - ColumnsWithTypeAndName arguments = std::move(args); + ColumnsWithTypeAndName arguments = args; executeShortCircuitArguments(arguments); ColumnPtr res; if ( (res = executeForConstAndNullableCondition(arguments, result_type, input_rows_count)) diff --git a/src/Functions/map.cpp b/src/Functions/map.cpp index 4e242c4348b..471d6fc575c 100644 --- a/src/Functions/map.cpp +++ b/src/Functions/map.cpp @@ -518,6 +518,115 @@ public: } }; +class FunctionMapUpdate : public IFunction +{ +public: + static constexpr auto name = "mapUpdate"; + static FunctionPtr create(ContextPtr) { return std::make_shared(); } + + String getName() const override + { + return name; + } + + size_t getNumberOfArguments() const override { return 2; } + + bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return true; } + + DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override + { + if (arguments.size() != 2) + throw Exception("Number of arguments for function " + getName() + " doesn't match: passed " + + toString(arguments.size()) + ", should be 2", + ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); + + const DataTypeMap * left = checkAndGetDataType(arguments[0].type.get()); + const DataTypeMap * right = checkAndGetDataType(arguments[1].type.get()); + + if (!left || !right) + throw Exception{"The two arguments for function " + getName() + " must be both Map type", + ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT}; + if (!left->getKeyType()->equals(*right->getKeyType()) || !left->getValueType()->equals(*right->getValueType())) + throw Exception{"The Key And Value type of Map for function " + getName() + " must be the same", + ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT}; + + return std::make_shared(left->getKeyType(), left->getValueType()); + } + + bool useDefaultImplementationForConstants() const override { return true; } + + ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t input_rows_count) const override + { + const ColumnMap * col_map_left = typeid_cast(arguments[0].column.get()); + const auto * col_const_map_left = checkAndGetColumnConst(arguments[0].column.get()); + if (col_const_map_left) + col_map_left = typeid_cast(&col_const_map_left->getDataColumn()); + if (!col_map_left) + return nullptr; + + const ColumnMap * col_map_right = typeid_cast(arguments[1].column.get()); + const auto * col_const_map_right = checkAndGetColumnConst(arguments[1].column.get()); + if (col_const_map_right) + col_map_right = typeid_cast(&col_const_map_right->getDataColumn()); + if (!col_map_right) + return nullptr; + + const auto & nested_column_left = col_map_left->getNestedColumn(); + const auto & keys_data_left = col_map_left->getNestedData().getColumn(0); + const auto & values_data_left = col_map_left->getNestedData().getColumn(1); + const auto & offsets_left = nested_column_left.getOffsets(); + + const auto & nested_column_right = col_map_right->getNestedColumn(); + const auto & keys_data_right = col_map_right->getNestedData().getColumn(0); + const auto & values_data_right = col_map_right->getNestedData().getColumn(1); + const auto & offsets_right = nested_column_right.getOffsets(); + + const auto & result_type_map = static_cast(*result_type); + const DataTypePtr & key_type = result_type_map.getKeyType(); + const DataTypePtr & value_type = result_type_map.getValueType(); + MutableColumnPtr keys_data = key_type->createColumn(); + MutableColumnPtr values_data = value_type->createColumn(); + MutableColumnPtr offsets = DataTypeNumber().createColumn(); + + IColumn::Offset current_offset = 0; + for (size_t idx = 0; idx < input_rows_count; ++idx) + { + for (size_t i = offsets_left[idx - 1]; i < offsets_left[idx]; ++i) + { + bool matched = false; + auto key = keys_data_left.getDataAt(i); + for (size_t j = offsets_right[idx - 1]; j < offsets_right[idx]; ++j) + { + if (keys_data_right.getDataAt(j).toString() == key.toString()) + { + matched = true; + break; + } + } + if (!matched) + { + keys_data->insertFrom(keys_data_left, i); + values_data->insertFrom(values_data_left, i); + ++current_offset; + } + } + for (size_t j = offsets_right[idx - 1]; j < offsets_right[idx]; ++j) + { + keys_data->insertFrom(keys_data_right, j); + values_data->insertFrom(values_data_right, j); + ++current_offset; + } + offsets->insert(current_offset); + } + + auto nested_column = ColumnArray::create( + ColumnTuple::create(Columns{std::move(keys_data), std::move(values_data)}), + std::move(offsets)); + + return ColumnMap::create(nested_column); + } +}; + } void registerFunctionsMap(FunctionFactory & factory) @@ -528,6 +637,7 @@ void registerFunctionsMap(FunctionFactory & factory) factory.registerFunction(); factory.registerFunction(); factory.registerFunction(); + factory.registerFunction(); } } diff --git a/src/Functions/mapFilter.cpp b/src/Functions/mapFilter.cpp new file mode 100644 index 00000000000..f38f8f8b4d1 --- /dev/null +++ b/src/Functions/mapFilter.cpp @@ -0,0 +1,144 @@ +#include +#include +#include +#include +#include +#include +#include + + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int ILLEGAL_COLUMN; + extern const int ILLEGAL_TYPE_OF_ARGUMENT; + extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; +} + +/** Higher-order functions for map. + * These functions optionally apply a map by lambda function, + * and return some result based on that transformation. + */ + + +/** mapFilter((k, v) -> predicate, map) - leave in the map only the kv elements for which the expression is true. + */ +struct MapFilterImpl +{ + using data_type = DataTypeMap; + using column_type = ColumnMap; + + static constexpr auto name = "mapFilter"; + + static bool needBoolean() { return true; } + static bool needExpression() { return true; } + static bool needOneArray() { return true; } + + static DataTypePtr getReturnType(const DataTypePtr & /*expression_return*/, const DataTypes & elems) + { + return std::make_shared(elems); + } + + /// If there are several arrays, the first one is passed here. + static ColumnPtr execute(const ColumnMap & map_column, ColumnPtr mapped) + { + const ColumnUInt8 * column_filter = typeid_cast(&*mapped); + + if (!column_filter) + { + const auto * column_filter_const = checkAndGetColumnConst(&*mapped); + + if (!column_filter_const) + throw Exception("Unexpected type of filter column", ErrorCodes::ILLEGAL_COLUMN); + + if (column_filter_const->getValue()) + return map_column.clone(); + else + { + const auto * column_array = typeid_cast(map_column.getNestedColumnPtr().get()); + const auto * column_tuple = typeid_cast(column_array->getDataPtr().get()); + ColumnPtr keys = column_tuple->getColumnPtr(0)->cloneEmpty(); + ColumnPtr values = column_tuple->getColumnPtr(1)->cloneEmpty(); + return ColumnMap::create(keys, values, ColumnArray::ColumnOffsets::create(map_column.size(), 0)); + } + } + + const IColumn::Filter & filter = column_filter->getData(); + ColumnPtr filtered = map_column.getNestedColumn().getData().filter(filter, -1); + + const IColumn::Offsets & in_offsets = map_column.getNestedColumn().getOffsets(); + auto column_offsets = ColumnArray::ColumnOffsets::create(in_offsets.size()); + IColumn::Offsets & out_offsets = column_offsets->getData(); + + size_t in_pos = 0; + size_t out_pos = 0; + for (size_t i = 0; i < in_offsets.size(); ++i) + { + for (; in_pos < in_offsets[i]; ++in_pos) + { + if (filter[in_pos]) + ++out_pos; + } + out_offsets[i] = out_pos; + } + + return ColumnMap::create(ColumnArray::create(filtered, std::move(column_offsets))); + } +}; + + +/** mapApply((k,v) -> expression, map) - apply the expression to the map. + */ +struct MapApplyImpl +{ + using data_type = DataTypeMap; + using column_type = ColumnMap; + + static constexpr auto name = "mapApply"; + + /// true if the expression (for an overload of f(expression, maps)) or a map (for f(map)) should be boolean. + static bool needBoolean() { return false; } + static bool needExpression() { return true; } + static bool needOneArray() { return true; } + + static DataTypePtr getReturnType(const DataTypePtr & expression_return, const DataTypes & /*elems*/) + { + const auto * tuple_types = typeid_cast(expression_return.get()); + if (!tuple_types) + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, + "Expected return type is tuple, got {}", expression_return->getName()); + if (tuple_types->getElements().size() != 2) + throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, + "Expected 2 columns as map's key and value, but found {}", tuple_types->getElements().size()); + + return std::make_shared(tuple_types->getElements()); + } + + static ColumnPtr execute(const ColumnMap & map, ColumnPtr mapped) + { + const auto * column_tuple = checkAndGetColumn(mapped.get()); + if (!column_tuple) + { + const ColumnConst * column_const_tuple = checkAndGetColumnConst(mapped.get()); + if (!column_const_tuple) + throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Expected tuple column, found {}", mapped->getName()); + auto cols = convertConstTupleToConstantElements(*column_const_tuple); + return ColumnMap::create(cols[0]->convertToFullColumnIfConst(), cols[1]->convertToFullColumnIfConst(), map.getNestedColumn().getOffsetsPtr()); + } + + return ColumnMap::create(column_tuple->getColumnPtr(0), column_tuple->getColumnPtr(1), + map.getNestedColumn().getOffsetsPtr()); + } +}; + +void registerFunctionMapApply(FunctionFactory & factory) +{ + factory.registerFunction>(); + factory.registerFunction>(); +} + +} + + diff --git a/src/Functions/multiIf.cpp b/src/Functions/multiIf.cpp index 070a7c2f05e..7ed0ee00954 100644 --- a/src/Functions/multiIf.cpp +++ b/src/Functions/multiIf.cpp @@ -117,7 +117,7 @@ public: ColumnPtr executeImpl(const ColumnsWithTypeAndName & args, const DataTypePtr & result_type, size_t input_rows_count) const override { - ColumnsWithTypeAndName arguments = std::move(args); + ColumnsWithTypeAndName arguments = args; executeShortCircuitArguments(arguments); /** We will gather values from columns in branches to result column, * depending on values of conditions. diff --git a/src/Functions/now64.cpp b/src/Functions/now64.cpp index bd1038b1fc6..e7d9011db53 100644 --- a/src/Functions/now64.cpp +++ b/src/Functions/now64.cpp @@ -152,7 +152,7 @@ public: for (const auto & arg : arguments) arg_types.push_back(arg.type); - return std::make_unique(nowSubsecond(scale), std::move(arg_types), std::move(result_type)); + return std::make_unique(nowSubsecond(scale), std::move(arg_types), result_type); } }; diff --git a/src/Functions/nullIf.cpp b/src/Functions/nullIf.cpp index c54bbc08bcd..0b4d024c91c 100644 --- a/src/Functions/nullIf.cpp +++ b/src/Functions/nullIf.cpp @@ -61,7 +61,7 @@ public: auto func_if = FunctionFactory::instance().get("if", context)->build(if_columns); auto if_res = func_if->execute(if_columns, result_type, input_rows_count); - return makeNullable(std::move(if_res)); + return makeNullable(if_res); } }; diff --git a/src/Functions/registerFunctionsHigherOrder.cpp b/src/Functions/registerFunctionsHigherOrder.cpp index d3621a03ecd..00bea58b918 100644 --- a/src/Functions/registerFunctionsHigherOrder.cpp +++ b/src/Functions/registerFunctionsHigherOrder.cpp @@ -18,6 +18,7 @@ void registerFunctionsArraySort(FunctionFactory & factory); void registerFunctionArrayCumSum(FunctionFactory & factory); void registerFunctionArrayCumSumNonNegative(FunctionFactory & factory); void registerFunctionArrayDifference(FunctionFactory & factory); +void registerFunctionMapApply(FunctionFactory & factory); void registerFunctionsHigherOrder(FunctionFactory & factory) { @@ -36,6 +37,7 @@ void registerFunctionsHigherOrder(FunctionFactory & factory) registerFunctionArrayCumSum(factory); registerFunctionArrayCumSumNonNegative(factory); registerFunctionArrayDifference(factory); + registerFunctionMapApply(factory); } } diff --git a/src/Functions/timezoneOf.cpp b/src/Functions/timezoneOf.cpp index 03c9e27a3a8..97e025bc0e0 100644 --- a/src/Functions/timezoneOf.cpp +++ b/src/Functions/timezoneOf.cpp @@ -21,7 +21,7 @@ namespace /** timezoneOf(x) - get the name of the timezone of DateTime data type. - * Example: Europe/Moscow. + * Example: Pacific/Pitcairn. */ class FunctionTimezoneOf : public IFunction { @@ -74,4 +74,3 @@ void registerFunctionTimezoneOf(FunctionFactory & factory) } } - diff --git a/src/Functions/toFixedString.h b/src/Functions/toFixedString.h index 129e3e0e8b2..cbd29784271 100644 --- a/src/Functions/toFixedString.h +++ b/src/Functions/toFixedString.h @@ -8,6 +8,7 @@ #include #include #include +#include namespace DB diff --git a/src/IO/AIO.cpp b/src/IO/AIO.cpp index 97e5a470463..fb762271e4d 100644 --- a/src/IO/AIO.cpp +++ b/src/IO/AIO.cpp @@ -55,12 +55,12 @@ AIOContext::~AIOContext() io_destroy(ctx); } -AIOContext::AIOContext(AIOContext && rhs) +AIOContext::AIOContext(AIOContext && rhs) noexcept { *this = std::move(rhs); } -AIOContext & AIOContext::operator=(AIOContext && rhs) +AIOContext & AIOContext::operator=(AIOContext && rhs) noexcept { std::swap(ctx, rhs.ctx); return *this; diff --git a/src/IO/AIO.h b/src/IO/AIO.h index 5149aa2eb71..202939638b7 100644 --- a/src/IO/AIO.h +++ b/src/IO/AIO.h @@ -26,20 +26,20 @@ int io_setup(unsigned nr, aio_context_t * ctxp); int io_destroy(aio_context_t ctx); /// last argument is an array of pointers technically speaking -int io_submit(aio_context_t ctx, long nr, struct iocb * iocbpp[]); +int io_submit(aio_context_t ctx, long nr, struct iocb * iocbpp[]); /// NOLINT -int io_getevents(aio_context_t ctx, long min_nr, long max_nr, io_event * events, struct timespec * timeout); +int io_getevents(aio_context_t ctx, long min_nr, long max_nr, io_event * events, struct timespec * timeout); /// NOLINT struct AIOContext : private boost::noncopyable { aio_context_t ctx = 0; - AIOContext() {} - AIOContext(unsigned int nr_events); + AIOContext() = default; + explicit AIOContext(unsigned int nr_events); ~AIOContext(); - AIOContext(AIOContext && rhs); - AIOContext & operator=(AIOContext && rhs); + AIOContext(AIOContext && rhs) noexcept; + AIOContext & operator=(AIOContext && rhs) noexcept; }; #elif defined(OS_FREEBSD) diff --git a/src/IO/Archives/IArchiveWriter.h b/src/IO/Archives/IArchiveWriter.h index 6879d470b62..3856d16fb89 100644 --- a/src/IO/Archives/IArchiveWriter.h +++ b/src/IO/Archives/IArchiveWriter.h @@ -29,7 +29,7 @@ public: /// Sets compression method and level. /// Changing them will affect next file in the archive. - virtual void setCompression(int /* compression_method */, int /* compression_level */ = kDefaultCompressionLevel) {} + virtual void setCompression(int /* compression_method */, int /* compression_level */ = kDefaultCompressionLevel) {} /// NOLINT /// Sets password. If the password is not empty it will enable encryption in the archive. virtual void setPassword(const String & /* password */) {} diff --git a/src/IO/Archives/ZipArchiveReader.cpp b/src/IO/Archives/ZipArchiveReader.cpp index 16604da62dc..4e83234615c 100644 --- a/src/IO/Archives/ZipArchiveReader.cpp +++ b/src/IO/Archives/ZipArchiveReader.cpp @@ -42,12 +42,12 @@ public: } } - HandleHolder(HandleHolder && src) + HandleHolder(HandleHolder && src) noexcept { *this = std::move(src); } - HandleHolder & operator =(HandleHolder && src) + HandleHolder & operator=(HandleHolder && src) noexcept { reader = std::exchange(src.reader, nullptr); raw_handle = std::exchange(src.raw_handle, nullptr); diff --git a/src/IO/Archives/ZipArchiveWriter.cpp b/src/IO/Archives/ZipArchiveWriter.cpp index f5ecea5e5aa..79192223657 100644 --- a/src/IO/Archives/ZipArchiveWriter.cpp +++ b/src/IO/Archives/ZipArchiveWriter.cpp @@ -46,12 +46,12 @@ public: } } - HandleHolder(HandleHolder && src) + HandleHolder(HandleHolder && src) noexcept { *this = std::move(src); } - HandleHolder & operator =(HandleHolder && src) + HandleHolder & operator=(HandleHolder && src) noexcept { writer = std::exchange(src.writer, nullptr); raw_handle = std::exchange(src.raw_handle, nullptr); diff --git a/src/IO/AsynchronousReader.h b/src/IO/AsynchronousReader.h index e79e72f3bec..4583f594c37 100644 --- a/src/IO/AsynchronousReader.h +++ b/src/IO/AsynchronousReader.h @@ -32,7 +32,7 @@ public: struct LocalFileDescriptor : public IFileDescriptor { - LocalFileDescriptor(int fd_) : fd(fd_) {} + explicit LocalFileDescriptor(int fd_) : fd(fd_) {} int fd; }; diff --git a/src/IO/BitHelpers.h b/src/IO/BitHelpers.h index d15297637a3..b96f43bdeff 100644 --- a/src/IO/BitHelpers.h +++ b/src/IO/BitHelpers.h @@ -52,8 +52,7 @@ public: bits_count(0) {} - ~BitReader() - {} + ~BitReader() = default; // reads bits_to_read high-bits from bits_buffer inline UInt64 readBits(UInt8 bits_to_read) diff --git a/src/IO/BrotliReadBuffer.cpp b/src/IO/BrotliReadBuffer.cpp index 77069746153..d2e954173a4 100644 --- a/src/IO/BrotliReadBuffer.cpp +++ b/src/IO/BrotliReadBuffer.cpp @@ -50,24 +50,29 @@ bool BrotliReadBuffer::nextImpl() if (eof_flag) return false; - if (!in_available) + do { - in->nextIfAtEnd(); - in_available = in->buffer().end() - in->position(); - in_data = reinterpret_cast(in->position()); + if (!in_available) + { + in->nextIfAtEnd(); + in_available = in->buffer().end() - in->position(); + in_data = reinterpret_cast(in->position()); + } + + if (brotli->result == BROTLI_DECODER_RESULT_NEEDS_MORE_INPUT && (!in_available || in->eof())) + { + throw Exception("brotli decode error", ErrorCodes::BROTLI_READ_FAILED); + } + + out_capacity = internal_buffer.size(); + out_data = reinterpret_cast(internal_buffer.begin()); + + brotli->result = BrotliDecoderDecompressStream(brotli->state, &in_available, &in_data, &out_capacity, &out_data, nullptr); + + in->position() = in->buffer().end() - in_available; } + while (brotli->result == BROTLI_DECODER_RESULT_NEEDS_MORE_INPUT && out_capacity == internal_buffer.size()); - if (brotli->result == BROTLI_DECODER_RESULT_NEEDS_MORE_INPUT && (!in_available || in->eof())) - { - throw Exception("brotli decode error", ErrorCodes::BROTLI_READ_FAILED); - } - - out_capacity = internal_buffer.size(); - out_data = reinterpret_cast(internal_buffer.begin()); - - brotli->result = BrotliDecoderDecompressStream(brotli->state, &in_available, &in_data, &out_capacity, &out_data, nullptr); - - in->position() = in->buffer().end() - in_available; working_buffer.resize(internal_buffer.size() - out_capacity); if (brotli->result == BROTLI_DECODER_RESULT_SUCCESS) diff --git a/src/IO/BrotliReadBuffer.h b/src/IO/BrotliReadBuffer.h index 44a7dc7ddbd..cbb919e15ae 100644 --- a/src/IO/BrotliReadBuffer.h +++ b/src/IO/BrotliReadBuffer.h @@ -10,7 +10,7 @@ namespace DB class BrotliReadBuffer : public BufferWithOwnMemory { public: - BrotliReadBuffer( + explicit BrotliReadBuffer( std::unique_ptr in_, size_t buf_size = DBMS_DEFAULT_BUFFER_SIZE, char * existing_memory = nullptr, diff --git a/src/IO/Bzip2ReadBuffer.h b/src/IO/Bzip2ReadBuffer.h index de1e61ee388..cd5fadf9c82 100644 --- a/src/IO/Bzip2ReadBuffer.h +++ b/src/IO/Bzip2ReadBuffer.h @@ -10,7 +10,7 @@ namespace DB class Bzip2ReadBuffer : public BufferWithOwnMemory { public: - Bzip2ReadBuffer( + explicit Bzip2ReadBuffer( std::unique_ptr in_, size_t buf_size = DBMS_DEFAULT_BUFFER_SIZE, char * existing_memory = nullptr, diff --git a/src/IO/CascadeWriteBuffer.h b/src/IO/CascadeWriteBuffer.h index db0d1e7a5a8..ebd4f262aa2 100644 --- a/src/IO/CascadeWriteBuffer.h +++ b/src/IO/CascadeWriteBuffer.h @@ -31,7 +31,7 @@ public: using WriteBufferConstructor = std::function; using WriteBufferConstructors = std::vector; - CascadeWriteBuffer(WriteBufferPtrs && prepared_sources_, WriteBufferConstructors && lazy_sources_ = {}); + explicit CascadeWriteBuffer(WriteBufferPtrs && prepared_sources_, WriteBufferConstructors && lazy_sources_ = {}); void nextImpl() override; diff --git a/src/IO/CompressionMethod.cpp b/src/IO/CompressionMethod.cpp index eaab7560e6a..f6daec78170 100644 --- a/src/IO/CompressionMethod.cpp +++ b/src/IO/CompressionMethod.cpp @@ -65,7 +65,13 @@ CompressionMethod chooseCompressionMethod(const std::string & path, const std::s file_extension = path.substr(pos + 1, std::string::npos); } - std::string method_str = file_extension.empty() ? hint : std::move(file_extension); + std::string method_str; + + if (file_extension.empty()) + method_str = hint; + else + method_str = std::move(file_extension); + boost::algorithm::to_lower(method_str); if (method_str == "gzip" || method_str == "gz") diff --git a/src/IO/DoubleConverter.h b/src/IO/DoubleConverter.h index 0896aca717e..75429967390 100644 --- a/src/IO/DoubleConverter.h +++ b/src/IO/DoubleConverter.h @@ -29,9 +29,6 @@ template <> struct DoubleToStringConverterFlags template class DoubleConverter : private boost::noncopyable { - DoubleConverter(const DoubleConverter &) = delete; - DoubleConverter & operator=(const DoubleConverter &) = delete; - DoubleConverter() = default; public: diff --git a/src/IO/FileEncryptionCommon.h b/src/IO/FileEncryptionCommon.h index 28d924e6d81..bb6c8d14893 100644 --- a/src/IO/FileEncryptionCommon.h +++ b/src/IO/FileEncryptionCommon.h @@ -56,7 +56,7 @@ public: /// Adds a specified offset to the counter. InitVector & operator++() { ++counter; return *this; } - InitVector operator++(int) { InitVector res = *this; ++counter; return res; } + InitVector operator++(int) { InitVector res = *this; ++counter; return res; } /// NOLINT InitVector & operator+=(size_t offset) { counter += offset; return *this; } InitVector operator+(size_t offset) const { InitVector res = *this; return res += offset; } diff --git a/src/IO/HadoopSnappyReadBuffer.cpp b/src/IO/HadoopSnappyReadBuffer.cpp index 324df67e900..cac05b4827b 100644 --- a/src/IO/HadoopSnappyReadBuffer.cpp +++ b/src/IO/HadoopSnappyReadBuffer.cpp @@ -11,7 +11,6 @@ #include "HadoopSnappyReadBuffer.h" - namespace DB { namespace ErrorCodes @@ -32,11 +31,11 @@ inline bool HadoopSnappyDecoder::checkAvailIn(size_t avail_in, int min) inline void HadoopSnappyDecoder::copyToBuffer(size_t * avail_in, const char ** next_in) { - assert(*avail_in <= sizeof(buffer)); + assert(*avail_in + buffer_length <= sizeof(buffer)); - memcpy(buffer, *next_in, *avail_in); + memcpy(buffer + buffer_length, *next_in, *avail_in); - buffer_length = *avail_in; + buffer_length += *avail_in; *next_in += *avail_in; *avail_in = 0; } @@ -78,14 +77,21 @@ inline HadoopSnappyDecoder::Status HadoopSnappyDecoder::readLength(size_t * avai inline HadoopSnappyDecoder::Status HadoopSnappyDecoder::readBlockLength(size_t * avail_in, const char ** next_in) { if (block_length < 0) + { return readLength(avail_in, next_in, &block_length); + } return Status::OK; } inline HadoopSnappyDecoder::Status HadoopSnappyDecoder::readCompressedLength(size_t * avail_in, const char ** next_in) { if (compressed_length < 0) - return readLength(avail_in, next_in, &compressed_length); + { + auto status = readLength(avail_in, next_in, &compressed_length); + if (unlikely(compressed_length > 0 && static_cast(compressed_length) > sizeof(buffer))) + throw Exception(ErrorCodes::SNAPPY_UNCOMPRESS_FAILED, "Too large snappy compressed block. buffer size: {}, compressed block size: {}", sizeof(buffer), compressed_length); + return status; + } return Status::OK; } @@ -111,7 +117,6 @@ HadoopSnappyDecoder::readCompressedData(size_t * avail_in, const char ** next_in { compressed = const_cast(*next_in); } - size_t uncompressed_length = *avail_out; auto status = snappy_uncompress(compressed, compressed_length, *next_out, &uncompressed_length); if (status != SNAPPY_OK) @@ -154,7 +159,9 @@ HadoopSnappyDecoder::Status HadoopSnappyDecoder::readBlock(size_t * avail_in, co return status; } if (total_uncompressed_length != block_length) + { return Status::INVALID_INPUT; + } return Status::OK; } diff --git a/src/IO/HashingWriteBuffer.h b/src/IO/HashingWriteBuffer.h index bd00a2b12da..bf636deeb07 100644 --- a/src/IO/HashingWriteBuffer.h +++ b/src/IO/HashingWriteBuffer.h @@ -17,7 +17,7 @@ class IHashingBuffer : public BufferWithOwnMemory public: using uint128 = CityHash_v1_0_2::uint128; - IHashingBuffer(size_t block_size_ = DBMS_DEFAULT_HASHING_BLOCK_SIZE) + explicit IHashingBuffer(size_t block_size_ = DBMS_DEFAULT_HASHING_BLOCK_SIZE) : BufferWithOwnMemory(block_size_), block_pos(0), block_size(block_size_), state(0, 0) { } @@ -66,7 +66,7 @@ private: } public: - HashingWriteBuffer( + explicit HashingWriteBuffer( WriteBuffer & out_, size_t block_size_ = DBMS_DEFAULT_HASHING_BLOCK_SIZE) : IHashingBuffer(block_size_), out(out_) diff --git a/src/IO/LZMAInflatingReadBuffer.h b/src/IO/LZMAInflatingReadBuffer.h index 2d676eeeeb3..920345ee09c 100644 --- a/src/IO/LZMAInflatingReadBuffer.h +++ b/src/IO/LZMAInflatingReadBuffer.h @@ -11,7 +11,7 @@ namespace DB class LZMAInflatingReadBuffer : public BufferWithOwnMemory { public: - LZMAInflatingReadBuffer( + explicit LZMAInflatingReadBuffer( std::unique_ptr in_, size_t buf_size = DBMS_DEFAULT_BUFFER_SIZE, char * existing_memory = nullptr, diff --git a/src/IO/Lz4DeflatingWriteBuffer.h b/src/IO/Lz4DeflatingWriteBuffer.h index a27cb42a6e7..68873b5f8ee 100644 --- a/src/IO/Lz4DeflatingWriteBuffer.h +++ b/src/IO/Lz4DeflatingWriteBuffer.h @@ -29,7 +29,7 @@ private: void finalizeBefore() override; void finalizeAfter() override; - LZ4F_preferences_t kPrefs; + LZ4F_preferences_t kPrefs; /// NOLINT LZ4F_compressionContext_t ctx; void * in_data; diff --git a/src/IO/Lz4InflatingReadBuffer.h b/src/IO/Lz4InflatingReadBuffer.h index d4d81f8765c..9921939d453 100644 --- a/src/IO/Lz4InflatingReadBuffer.h +++ b/src/IO/Lz4InflatingReadBuffer.h @@ -14,7 +14,7 @@ namespace DB class Lz4InflatingReadBuffer : public BufferWithOwnMemory { public: - Lz4InflatingReadBuffer( + explicit Lz4InflatingReadBuffer( std::unique_ptr in_, size_t buf_size = DBMS_DEFAULT_BUFFER_SIZE, char * existing_memory = nullptr, diff --git a/src/IO/MMapReadBufferFromFileDescriptor.h b/src/IO/MMapReadBufferFromFileDescriptor.h index 03718a61a6c..1715c2200fb 100644 --- a/src/IO/MMapReadBufferFromFileDescriptor.h +++ b/src/IO/MMapReadBufferFromFileDescriptor.h @@ -18,7 +18,7 @@ public: off_t seek(off_t off, int whence) override; protected: - MMapReadBufferFromFileDescriptor() {} + MMapReadBufferFromFileDescriptor() = default; void init(); MMappedFileDescriptor mapped; diff --git a/src/IO/MMappedFileCache.h b/src/IO/MMappedFileCache.h index adbb85a18cf..fe5e7e8e1f7 100644 --- a/src/IO/MMappedFileCache.h +++ b/src/IO/MMappedFileCache.h @@ -27,7 +27,7 @@ private: using Base = LRUCache; public: - MMappedFileCache(size_t max_size_in_bytes) + explicit MMappedFileCache(size_t max_size_in_bytes) : Base(max_size_in_bytes) {} /// Calculate key from path to file and offset. diff --git a/src/IO/MMappedFileDescriptor.h b/src/IO/MMappedFileDescriptor.h index 01dc7e1866c..2611093643f 100644 --- a/src/IO/MMappedFileDescriptor.h +++ b/src/IO/MMappedFileDescriptor.h @@ -22,7 +22,7 @@ public: MMappedFileDescriptor(int fd_, size_t offset_); /// Makes empty object that can be initialized with `set`. - MMappedFileDescriptor() {} + MMappedFileDescriptor() = default; virtual ~MMappedFileDescriptor(); @@ -40,10 +40,11 @@ public: void set(int fd_, size_t offset_, size_t length_); void set(int fd_, size_t offset_); -protected: MMappedFileDescriptor(const MMappedFileDescriptor &) = delete; MMappedFileDescriptor(MMappedFileDescriptor &&) = delete; +protected: + void init(); int fd = -1; diff --git a/src/IO/MemoryReadWriteBuffer.h b/src/IO/MemoryReadWriteBuffer.h index f9c11084f62..bcaf9a9a965 100644 --- a/src/IO/MemoryReadWriteBuffer.h +++ b/src/IO/MemoryReadWriteBuffer.h @@ -18,7 +18,7 @@ class MemoryWriteBuffer : public WriteBuffer, public IReadableWriteBuffer, boost public: /// Use max_total_size_ = 0 for unlimited storage - MemoryWriteBuffer( + explicit MemoryWriteBuffer( size_t max_total_size_ = 0, size_t initial_chunk_size_ = DBMS_DEFAULT_BUFFER_SIZE, double growth_rate_ = 2.0, diff --git a/src/IO/MySQLPacketPayloadWriteBuffer.h b/src/IO/MySQLPacketPayloadWriteBuffer.h index f54bec06dfb..d4ce8a8955e 100644 --- a/src/IO/MySQLPacketPayloadWriteBuffer.h +++ b/src/IO/MySQLPacketPayloadWriteBuffer.h @@ -13,7 +13,7 @@ class MySQLPacketPayloadWriteBuffer : public WriteBuffer public: MySQLPacketPayloadWriteBuffer(WriteBuffer & out_, size_t payload_length_, uint8_t & sequence_id_); - bool remainingPayloadSize() { return total_left; } + bool remainingPayloadSize() const { return total_left; } protected: void nextImpl() override; diff --git a/src/IO/NullWriteBuffer.h b/src/IO/NullWriteBuffer.h index 233268474d3..615a9bf5cef 100644 --- a/src/IO/NullWriteBuffer.h +++ b/src/IO/NullWriteBuffer.h @@ -11,7 +11,7 @@ namespace DB class NullWriteBuffer : public BufferWithOwnMemory, boost::noncopyable { public: - NullWriteBuffer(size_t buf_size = 16<<10, char * existing_memory = nullptr, size_t alignment = false); + explicit NullWriteBuffer(size_t buf_size = 16<<10, char * existing_memory = nullptr, size_t alignment = false); void nextImpl() override; }; diff --git a/src/IO/Progress.cpp b/src/IO/Progress.cpp index ebc7d04e86e..1d16f54de7b 100644 --- a/src/IO/Progress.cpp +++ b/src/IO/Progress.cpp @@ -126,7 +126,7 @@ ProgressValues Progress::fetchAndResetPiecewiseAtomically() return res; } -Progress & Progress::operator=(Progress && other) +Progress & Progress::operator=(Progress && other) noexcept { read_rows = other.read_rows.load(std::memory_order_relaxed); read_bytes = other.read_bytes.load(std::memory_order_relaxed); diff --git a/src/IO/Progress.h b/src/IO/Progress.h index c00eea98ff4..4f1a3df0ffd 100644 --- a/src/IO/Progress.h +++ b/src/IO/Progress.h @@ -56,7 +56,7 @@ struct FileProgress size_t read_bytes; size_t total_bytes_to_read; - FileProgress(size_t read_bytes_, size_t total_bytes_to_read_ = 0) : read_bytes(read_bytes_), total_bytes_to_read(total_bytes_to_read_) {} + explicit FileProgress(size_t read_bytes_, size_t total_bytes_to_read_ = 0) : read_bytes(read_bytes_), total_bytes_to_read(total_bytes_to_read_) {} }; @@ -111,9 +111,9 @@ struct Progress ProgressValues fetchAndResetPiecewiseAtomically(); - Progress & operator=(Progress && other); + Progress & operator=(Progress && other) noexcept; - Progress(Progress && other) + Progress(Progress && other) noexcept { *this = std::move(other); } diff --git a/src/IO/ReadBuffer.h b/src/IO/ReadBuffer.h index 4a2e208c7b3..b620f0c49c6 100644 --- a/src/IO/ReadBuffer.h +++ b/src/IO/ReadBuffer.h @@ -229,9 +229,11 @@ public: virtual void prefetch() {} /** - * For reading from remote filesystem, when it matters how much we read. + * Set upper bound for read range [..., position). + * Required for reading from remote filesystem, when it matters how much we read. */ virtual void setReadUntilPosition(size_t /* position */) {} + virtual void setReadUntilEnd() {} protected: diff --git a/src/IO/ReadBufferFromAzureBlobStorage.h b/src/IO/ReadBufferFromAzureBlobStorage.h index 53749ad3199..78d973747ba 100644 --- a/src/IO/ReadBufferFromAzureBlobStorage.h +++ b/src/IO/ReadBufferFromAzureBlobStorage.h @@ -33,6 +33,8 @@ public: bool nextImpl() override; + size_t getFileOffsetOfBufferEnd() const override { return offset; } + private: void initialize(); diff --git a/src/IO/ReadBufferFromEmptyFile.h b/src/IO/ReadBufferFromEmptyFile.h index 311aee1559b..0a14c07dd5c 100644 --- a/src/IO/ReadBufferFromEmptyFile.h +++ b/src/IO/ReadBufferFromEmptyFile.h @@ -1,6 +1,7 @@ #pragma once #include +#include namespace DB { diff --git a/src/IO/ReadBufferFromFile.h b/src/IO/ReadBufferFromFile.h index ff19fa40fdf..52b18b94616 100644 --- a/src/IO/ReadBufferFromFile.h +++ b/src/IO/ReadBufferFromFile.h @@ -49,6 +49,10 @@ public: { return file_name; } + + Range getRemainingReadRange() const override { return Range{ .left = file_offset_of_buffer_end, .right = std::nullopt }; } + + size_t getFileOffsetOfBufferEnd() const override { return file_offset_of_buffer_end; } }; @@ -57,7 +61,7 @@ public: class ReadBufferFromFilePRead : public ReadBufferFromFile { public: - ReadBufferFromFilePRead( + explicit ReadBufferFromFilePRead( const std::string & file_name_, size_t buf_size = DBMS_DEFAULT_BUFFER_SIZE, int flags = -1, @@ -80,7 +84,7 @@ private: OpenedFileCache::OpenedFilePtr file; public: - ReadBufferFromFilePReadWithDescriptorsCache( + explicit ReadBufferFromFilePReadWithDescriptorsCache( const std::string & file_name_, size_t buf_size = DBMS_DEFAULT_BUFFER_SIZE, int flags = -1, diff --git a/src/IO/ReadBufferFromFileDescriptor.h b/src/IO/ReadBufferFromFileDescriptor.h index 188cdd709b5..ba1502fb9aa 100644 --- a/src/IO/ReadBufferFromFileDescriptor.h +++ b/src/IO/ReadBufferFromFileDescriptor.h @@ -27,7 +27,7 @@ protected: std::string getFileName() const override; public: - ReadBufferFromFileDescriptor( + explicit ReadBufferFromFileDescriptor( int fd_, size_t buf_size = DBMS_DEFAULT_BUFFER_SIZE, char * existing_memory = nullptr, @@ -70,7 +70,7 @@ private: class ReadBufferFromFileDescriptorPRead : public ReadBufferFromFileDescriptor { public: - ReadBufferFromFileDescriptorPRead( + explicit ReadBufferFromFileDescriptorPRead( int fd_, size_t buf_size = DBMS_DEFAULT_BUFFER_SIZE, char * existing_memory = nullptr, diff --git a/src/IO/ReadBufferFromS3.cpp b/src/IO/ReadBufferFromS3.cpp index 869432b9484..93bbe02c9cd 100644 --- a/src/IO/ReadBufferFromS3.cpp +++ b/src/IO/ReadBufferFromS3.cpp @@ -42,7 +42,8 @@ ReadBufferFromS3::ReadBufferFromS3( UInt64 max_single_read_retries_, const ReadSettings & settings_, bool use_external_buffer_, - size_t read_until_position_) + size_t read_until_position_, + bool restricted_seek_) : SeekableReadBufferWithSize(nullptr, 0) , client_ptr(std::move(client_ptr_)) , bucket(bucket_) @@ -51,6 +52,7 @@ ReadBufferFromS3::ReadBufferFromS3( , read_settings(settings_) , use_external_buffer(use_external_buffer_) , read_until_position(read_until_position_) + , restricted_seek(restricted_seek_) { } @@ -152,10 +154,14 @@ bool ReadBufferFromS3::nextImpl() off_t ReadBufferFromS3::seek(off_t offset_, int whence) { - bool restricted_seek = read_type == SeekableReadBufferWithSize::ReadType::DISK_READ; + if (offset_ == offset && whence == SEEK_SET) + return offset; if (impl && restricted_seek) - throw Exception("Seek is allowed only before first read attempt from the buffer.", ErrorCodes::CANNOT_SEEK_THROUGH_FILE); + throw Exception( + ErrorCodes::CANNOT_SEEK_THROUGH_FILE, + "Seek is allowed only before first read attempt from the buffer (current offset: {}, new offset: {}, reading until position: {}, available: {})", + offset, offset_, read_until_position, available()); if (whence != SEEK_SET) throw Exception("Only SEEK_SET mode is allowed.", ErrorCodes::CANNOT_SEEK_THROUGH_FILE); @@ -219,6 +225,15 @@ off_t ReadBufferFromS3::getPosition() return offset - available(); } +void ReadBufferFromS3::setReadUntilPosition(size_t position) +{ + if (position != static_cast(read_until_position)) + { + read_until_position = position; + impl.reset(); + } +} + std::unique_ptr ReadBufferFromS3::initialize() { Aws::S3::Model::GetObjectRequest req; @@ -249,7 +264,9 @@ std::unique_ptr ReadBufferFromS3::initialize() if (outcome.IsSuccess()) { read_result = outcome.GetResultWithOwnership(); - return std::make_unique(read_result.GetBody(), read_settings.remote_fs_buffer_size); + + size_t buffer_size = use_external_buffer ? 0 : read_settings.remote_fs_buffer_size; + return std::make_unique(read_result.GetBody(), buffer_size); } else throw Exception(outcome.GetError().GetMessage(), ErrorCodes::S3_ERROR); diff --git a/src/IO/ReadBufferFromS3.h b/src/IO/ReadBufferFromS3.h index e903ba11118..157b6d46b6d 100644 --- a/src/IO/ReadBufferFromS3.h +++ b/src/IO/ReadBufferFromS3.h @@ -31,6 +31,7 @@ private: String key; UInt64 max_single_read_retries; off_t offset = 0; + Aws::S3::Model::GetObjectResult read_result; std::unique_ptr impl; @@ -44,7 +45,8 @@ public: UInt64 max_single_read_retries_, const ReadSettings & settings_, bool use_external_buffer = false, - size_t read_until_position_ = 0); + size_t read_until_position_ = 0, + bool restricted_seek_ = false); bool nextImpl() override; @@ -54,6 +56,12 @@ public: std::optional getTotalSize() override; + void setReadUntilPosition(size_t position) override; + + Range getRemainingReadRange() const override { return Range{ .left = static_cast(offset), .right = read_until_position }; } + + size_t getFileOffsetOfBufferEnd() const override { return offset; } + private: std::unique_ptr initialize(); @@ -62,6 +70,10 @@ private: bool use_external_buffer; off_t read_until_position = 0; + + /// There is different seek policy for disk seek and for non-disk seek + /// (non-disk seek is applied for seekable input formats: orc, arrow, parquet). + bool restricted_seek; }; } diff --git a/src/IO/ReadHelpers.h b/src/IO/ReadHelpers.h index 5d580f6b130..fd2c4218aef 100644 --- a/src/IO/ReadHelpers.h +++ b/src/IO/ReadHelpers.h @@ -106,7 +106,7 @@ inline void readChar(char & x, ReadBuffer & buf) template inline void readPODBinary(T & x, ReadBuffer & buf) { - buf.readStrict(reinterpret_cast(&x), sizeof(x)); + buf.readStrict(reinterpret_cast(&x), sizeof(x)); /// NOLINT } template @@ -611,7 +611,7 @@ void readStringUntilNewlineInto(Vector & s, ReadBuffer & buf); struct NullOutput { void append(const char *, size_t) {} - void push_back(char) {} + void push_back(char) {} /// NOLINT }; void parseUUID(const UInt8 * src36, UInt8 * dst16); @@ -686,6 +686,16 @@ inline ReturnType readDateTextImpl(LocalDate & date, ReadBuffer & buf) return readDateTextFallback(date, buf); } +inline void convertToDayNum(DayNum & date, ExtendedDayNum & from) +{ + if (unlikely(from < 0)) + date = 0; + else if (unlikely(from > 0xFFFF)) + date = 0xFFFF; + else + date = from; +} + template inline ReturnType readDateTextImpl(DayNum & date, ReadBuffer & buf) { @@ -698,7 +708,8 @@ inline ReturnType readDateTextImpl(DayNum & date, ReadBuffer & buf) else if (!readDateTextImpl(local_date, buf)) return false; - date = DateLUT::instance().makeDayNum(local_date.year(), local_date.month(), local_date.day()); + ExtendedDayNum ret = DateLUT::instance().makeDayNum(local_date.year(), local_date.month(), local_date.day()); + convertToDayNum(date,ret); return ReturnType(true); } @@ -1267,7 +1278,6 @@ inline void readTextWithSizeSuffix(T & x, ReadBuffer & buf) default: return; } - return; } /// Read something from text format and trying to parse the suffix. diff --git a/src/IO/ReadSettings.h b/src/IO/ReadSettings.h index e290cbab36b..e321eecf104 100644 --- a/src/IO/ReadSettings.h +++ b/src/IO/ReadSettings.h @@ -3,6 +3,7 @@ #include #include #include +#include namespace DB { @@ -76,9 +77,13 @@ struct ReadSettings size_t remote_fs_read_max_backoff_ms = 10000; size_t remote_fs_read_backoff_max_tries = 4; + bool remote_fs_enable_cache = true; + size_t remote_fs_cache_max_wait_sec = 1; size_t remote_read_min_bytes_for_seek = DBMS_DEFAULT_BUFFER_SIZE; + FileCachePtr remote_fs_cache; + size_t http_max_tries = 1; size_t http_retry_initial_backoff_ms = 100; size_t http_retry_max_backoff_ms = 1600; diff --git a/src/IO/ReadWriteBufferFromHTTP.h b/src/IO/ReadWriteBufferFromHTTP.h index 4e08a595484..fe4def7fc49 100644 --- a/src/IO/ReadWriteBufferFromHTTP.h +++ b/src/IO/ReadWriteBufferFromHTTP.h @@ -320,11 +320,22 @@ namespace detail } catch (...) { - if (response.getStatus() == Poco::Net::HTTPResponse::HTTPStatus::HTTP_NOT_FOUND + auto http_status = response.getStatus(); + + if (http_status == Poco::Net::HTTPResponse::HTTPStatus::HTTP_NOT_FOUND && http_skip_not_found_url) { initialization_error = InitializeError::SKIP_NOT_FOUND_URL; } + else if (http_status == Poco::Net::HTTPResponse::HTTPStatus::HTTP_BAD_REQUEST + || http_status == Poco::Net::HTTPResponse::HTTPStatus::HTTP_UNAUTHORIZED + || http_status == Poco::Net::HTTPResponse::HTTPStatus::HTTP_NOT_FOUND + || http_status == Poco::Net::HTTPResponse::HTTPStatus::HTTP_FORBIDDEN + || http_status == Poco::Net::HTTPResponse::HTTPStatus::HTTP_METHOD_NOT_ALLOWED) + { + initialization_error = InitializeError::NON_RETRIABLE_ERROR; + exception = std::current_exception(); + } else { throw; diff --git a/src/IO/S3/PocoHTTPClient.h b/src/IO/S3/PocoHTTPClient.h index 2647e254626..defd029f05a 100644 --- a/src/IO/S3/PocoHTTPClient.h +++ b/src/IO/S3/PocoHTTPClient.h @@ -49,13 +49,13 @@ class PocoHTTPResponse : public Aws::Http::Standard::StandardHttpResponse public: using SessionPtr = HTTPSessionPtr; - PocoHTTPResponse(const std::shared_ptr request) + explicit PocoHTTPResponse(const std::shared_ptr request) : Aws::Http::Standard::StandardHttpResponse(request) , body_stream(request->GetResponseStreamFactory()) { } - void SetResponseBody(Aws::IStream & incoming_stream, SessionPtr & session_) + void SetResponseBody(Aws::IStream & incoming_stream, SessionPtr & session_) /// NOLINT { body_stream = Aws::Utils::Stream::ResponseStream( Aws::New>("http result streambuf", session_, incoming_stream.rdbuf()) diff --git a/src/IO/S3Common.h b/src/IO/S3Common.h index 72774499445..97cb4f74f90 100644 --- a/src/IO/S3Common.h +++ b/src/IO/S3Common.h @@ -49,7 +49,6 @@ public: private: ClientFactory(); -private: Aws::SDKOptions aws_options; }; diff --git a/src/IO/SeekableReadBuffer.h b/src/IO/SeekableReadBuffer.h index 2dc901ccfd9..3a46630350a 100644 --- a/src/IO/SeekableReadBuffer.h +++ b/src/IO/SeekableReadBuffer.h @@ -6,6 +6,12 @@ namespace DB { +namespace ErrorCodes +{ + extern const int NOT_IMPLEMENTED; +} + + class SeekableReadBuffer : public ReadBuffer { public: @@ -32,6 +38,26 @@ public: * @return Offset from the begin of the underlying buffer / file corresponds to the buffer current position. */ virtual off_t getPosition() = 0; + + struct Range + { + size_t left; + std::optional right; + }; + + /** + * Returns a struct, where `left` is current read position in file and `right` is the + * last included offset for reading according to setReadUntilPosition() or setReadUntilEnd(). + * E.g. next nextImpl() call will read within range [left, right]. + */ + virtual Range getRemainingReadRange() const + { + throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Method getRemainingReadRange() not implemented"); + } + + virtual String getInfoForLog() { return ""; } + + virtual size_t getFileOffsetOfBufferEnd() const { throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Method getFileOffsetOfBufferEnd() not implemented"); } }; using SeekableReadBufferPtr = std::shared_ptr; @@ -48,22 +74,7 @@ public: /// set std::nullopt in case it is impossible to find out total size. virtual std::optional getTotalSize() = 0; - /** - * Some buffers might have different seek restrictions according to where it is used. - * For example, ReadBufferFromS3 and ReadBufferFromWebServer, when used for reading - * from remote disks, require some additional invariants and restrictions, which - * are not needed in other cases. - */ - enum class ReadType - { - DEFAULT, - DISK_READ - }; - - void setReadType(ReadType type) { read_type = type; } - protected: - ReadType read_type = ReadType::DEFAULT; std::optional file_size; }; diff --git a/src/IO/UncompressedCache.h b/src/IO/UncompressedCache.h index 5826b7f020a..93ca1235a42 100644 --- a/src/IO/UncompressedCache.h +++ b/src/IO/UncompressedCache.h @@ -42,7 +42,7 @@ private: using Base = LRUCache; public: - UncompressedCache(size_t max_size_in_bytes) + explicit UncompressedCache(size_t max_size_in_bytes) : Base(max_size_in_bytes) {} /// Calculate key from path to file and offset. diff --git a/src/IO/VarInt.h b/src/IO/VarInt.h index 50fc158ba76..29c8a60c935 100644 --- a/src/IO/VarInt.h +++ b/src/IO/VarInt.h @@ -132,7 +132,7 @@ inline void readVarUIntImpl(UInt64 & x, ReadBuffer & istr) if (istr.eof()) throwReadAfterEOF(); - UInt64 byte = *istr.position(); + UInt64 byte = *istr.position(); /// NOLINT ++istr.position(); x |= (byte & 0x7F) << (7 * i); @@ -172,7 +172,7 @@ inline const char * readVarUInt(UInt64 & x, const char * istr, size_t size) if (istr == end) throwReadAfterEOF(); - UInt64 byte = *istr; + UInt64 byte = *istr; /// NOLINT ++istr; x |= (byte & 0x7F) << (7 * i); diff --git a/src/IO/WriteBufferFromFile.h b/src/IO/WriteBufferFromFile.h index 988b0be7d00..3363a568bac 100644 --- a/src/IO/WriteBufferFromFile.h +++ b/src/IO/WriteBufferFromFile.h @@ -28,7 +28,7 @@ protected: CurrentMetrics::Increment metric_increment{CurrentMetrics::OpenFileForWrite}; public: - WriteBufferFromFile( + explicit WriteBufferFromFile( const std::string & file_name_, size_t buf_size = DBMS_DEFAULT_BUFFER_SIZE, int flags = -1, @@ -37,7 +37,7 @@ public: size_t alignment = 0); /// Use pre-opened file descriptor. - WriteBufferFromFile( + explicit WriteBufferFromFile( int & fd, /// Will be set to -1 if constructor didn't throw and ownership of file descriptor is passed to the object. const std::string & original_file_name = {}, size_t buf_size = DBMS_DEFAULT_BUFFER_SIZE, diff --git a/src/IO/WriteBufferFromFileDescriptor.h b/src/IO/WriteBufferFromFileDescriptor.h index b065e22cf95..cc69567932f 100644 --- a/src/IO/WriteBufferFromFileDescriptor.h +++ b/src/IO/WriteBufferFromFileDescriptor.h @@ -11,7 +11,7 @@ namespace DB class WriteBufferFromFileDescriptor : public WriteBufferFromFileBase { public: - WriteBufferFromFileDescriptor( + explicit WriteBufferFromFileDescriptor( int fd_ = -1, size_t buf_size = DBMS_DEFAULT_BUFFER_SIZE, char * existing_memory = nullptr, diff --git a/src/IO/WriteBufferFromFileDescriptorDiscardOnFailure.h b/src/IO/WriteBufferFromFileDescriptorDiscardOnFailure.h index 53e01c3cb26..2803dd4e8bf 100644 --- a/src/IO/WriteBufferFromFileDescriptorDiscardOnFailure.h +++ b/src/IO/WriteBufferFromFileDescriptorDiscardOnFailure.h @@ -17,7 +17,7 @@ protected: public: using WriteBufferFromFileDescriptor::WriteBufferFromFileDescriptor; - ~WriteBufferFromFileDescriptorDiscardOnFailure() override {} + ~WriteBufferFromFileDescriptorDiscardOnFailure() override = default; }; } diff --git a/src/IO/WriteBufferFromOStream.h b/src/IO/WriteBufferFromOStream.h index ea3301fef18..f8b45c2fa59 100644 --- a/src/IO/WriteBufferFromOStream.h +++ b/src/IO/WriteBufferFromOStream.h @@ -12,7 +12,7 @@ namespace DB class WriteBufferFromOStream : public BufferWithOwnMemory { public: - WriteBufferFromOStream( + explicit WriteBufferFromOStream( std::ostream & ostr_, size_t size = DBMS_DEFAULT_BUFFER_SIZE, char * existing_memory = nullptr, @@ -21,7 +21,7 @@ public: ~WriteBufferFromOStream() override; protected: - WriteBufferFromOStream(size_t size = DBMS_DEFAULT_BUFFER_SIZE, char * existing_memory = nullptr, size_t alignment = 0); + explicit WriteBufferFromOStream(size_t size = DBMS_DEFAULT_BUFFER_SIZE, char * existing_memory = nullptr, size_t alignment = 0); void nextImpl() override; diff --git a/src/IO/WriteBufferFromPocoSocket.h b/src/IO/WriteBufferFromPocoSocket.h index 2fb203189f3..295ca16ecaf 100644 --- a/src/IO/WriteBufferFromPocoSocket.h +++ b/src/IO/WriteBufferFromPocoSocket.h @@ -14,7 +14,7 @@ namespace DB class WriteBufferFromPocoSocket : public BufferWithOwnMemory { public: - WriteBufferFromPocoSocket(Poco::Net::Socket & socket_, size_t buf_size = DBMS_DEFAULT_BUFFER_SIZE); + explicit WriteBufferFromPocoSocket(Poco::Net::Socket & socket_, size_t buf_size = DBMS_DEFAULT_BUFFER_SIZE); ~WriteBufferFromPocoSocket() override; diff --git a/src/IO/WriteBufferFromS3.cpp b/src/IO/WriteBufferFromS3.cpp index 60c75b3c90c..eda7bb6f8ae 100644 --- a/src/IO/WriteBufferFromS3.cpp +++ b/src/IO/WriteBufferFromS3.cpp @@ -116,7 +116,14 @@ void WriteBufferFromS3::allocateBuffer() WriteBufferFromS3::~WriteBufferFromS3() { - finalize(); + try + { + finalize(); + } + catch (...) + { + tryLogCurrentException(__PRETTY_FUNCTION__); + } } void WriteBufferFromS3::preFinalize() @@ -386,7 +393,7 @@ void WriteBufferFromS3::waitForReadyBackGroundTasks() while (!upload_object_tasks.empty() && upload_object_tasks.front().is_finised) { auto & task = upload_object_tasks.front(); - auto exception = std::move(task.exception); + auto exception = task.exception; auto tag = std::move(task.tag); upload_object_tasks.pop_front(); @@ -413,7 +420,7 @@ void WriteBufferFromS3::waitForAllBackGroundTasks() { auto & task = upload_object_tasks.front(); if (task.exception) - std::rethrow_exception(std::move(task.exception)); + std::rethrow_exception(task.exception); part_tags.push_back(task.tag); @@ -424,7 +431,7 @@ void WriteBufferFromS3::waitForAllBackGroundTasks() { bg_tasks_condvar.wait(lock, [this]() { return put_object_task->is_finised; }); if (put_object_task->exception) - std::rethrow_exception(std::move(put_object_task->exception)); + std::rethrow_exception(put_object_task->exception); } } } diff --git a/src/IO/WriteBufferFromS3.h b/src/IO/WriteBufferFromS3.h index 8b89626ee18..a4fbcbcdeeb 100644 --- a/src/IO/WriteBufferFromS3.h +++ b/src/IO/WriteBufferFromS3.h @@ -6,6 +6,7 @@ # include # include +# include # include # include @@ -14,8 +15,6 @@ # include -# include - namespace Aws::S3 { class S3Client; diff --git a/src/IO/WriteBufferFromTemporaryFile.h b/src/IO/WriteBufferFromTemporaryFile.h index 642c36b9be6..06e2911db26 100644 --- a/src/IO/WriteBufferFromTemporaryFile.h +++ b/src/IO/WriteBufferFromTemporaryFile.h @@ -20,7 +20,7 @@ public: ~WriteBufferFromTemporaryFile() override; private: - WriteBufferFromTemporaryFile(std::unique_ptr && tmp_file); + explicit WriteBufferFromTemporaryFile(std::unique_ptr && tmp_file); std::shared_ptr getReadBufferImpl() override; diff --git a/src/IO/WriteBufferFromVector.h b/src/IO/WriteBufferFromVector.h index 23ae3a70ef3..d74b366b8e2 100644 --- a/src/IO/WriteBufferFromVector.h +++ b/src/IO/WriteBufferFromVector.h @@ -67,7 +67,7 @@ private: void finalizeImpl() override final { vector.resize( - ((position() - reinterpret_cast(vector.data())) + ((position() - reinterpret_cast(vector.data())) /// NOLINT + sizeof(typename VectorType::value_type) - 1) /// Align up. / sizeof(typename VectorType::value_type)); diff --git a/src/IO/WriteBufferValidUTF8.h b/src/IO/WriteBufferValidUTF8.h index 8b33593c930..daaf0427f88 100644 --- a/src/IO/WriteBufferValidUTF8.h +++ b/src/IO/WriteBufferValidUTF8.h @@ -16,7 +16,7 @@ class WriteBufferValidUTF8 final : public BufferWithOwnMemory public: static const size_t DEFAULT_SIZE; - WriteBufferValidUTF8( + explicit WriteBufferValidUTF8( WriteBuffer & output_buffer_, bool group_replacements_ = true, const char * replacement_ = "\xEF\xBF\xBD", diff --git a/src/IO/WriteHelpers.cpp b/src/IO/WriteHelpers.cpp index b41f621e0b9..9433d31027c 100644 --- a/src/IO/WriteHelpers.cpp +++ b/src/IO/WriteHelpers.cpp @@ -7,7 +7,7 @@ namespace DB { template -void formatHex(IteratorSrc src, IteratorDst dst, const size_t num_bytes) +void formatHex(IteratorSrc src, IteratorDst dst, size_t num_bytes) { size_t src_pos = 0; size_t dst_pos = 0; diff --git a/src/IO/WriteHelpers.h b/src/IO/WriteHelpers.h index ca2c202014c..447a3ed0480 100644 --- a/src/IO/WriteHelpers.h +++ b/src/IO/WriteHelpers.h @@ -80,7 +80,7 @@ inline void writeChar(char c, size_t n, WriteBuffer & buf) template inline void writePODBinary(const T & x, WriteBuffer & buf) { - buf.write(reinterpret_cast(&x), sizeof(x)); + buf.write(reinterpret_cast(&x), sizeof(x)); /// NOLINT } template @@ -663,7 +663,7 @@ inline void writeXMLStringForTextElement(const StringRef & s, WriteBuffer & buf) } template -void formatHex(IteratorSrc src, IteratorDst dst, const size_t num_bytes); +void formatHex(IteratorSrc src, IteratorDst dst, size_t num_bytes); void formatUUID(const UInt8 * src16, UInt8 * dst36); void formatUUID(std::reverse_iterator src16, UInt8 * dst36); diff --git a/src/IO/WriteIntText.h b/src/IO/WriteIntText.h index b8d2acc7d5d..c9a4cb0241a 100644 --- a/src/IO/WriteIntText.h +++ b/src/IO/WriteIntText.h @@ -5,22 +5,19 @@ #include -namespace -{ - template constexpr size_t max_int_width = 20; - template <> inline constexpr size_t max_int_width = 3; /// 255 - template <> inline constexpr size_t max_int_width = 4; /// -128 - template <> inline constexpr size_t max_int_width = 5; /// 65535 - template <> inline constexpr size_t max_int_width = 6; /// -32768 - template <> inline constexpr size_t max_int_width = 10; /// 4294967295 - template <> inline constexpr size_t max_int_width = 11; /// -2147483648 - template <> inline constexpr size_t max_int_width = 20; /// 18446744073709551615 - template <> inline constexpr size_t max_int_width = 20; /// -9223372036854775808 - template <> inline constexpr size_t max_int_width = 39; /// 340282366920938463463374607431768211455 - template <> inline constexpr size_t max_int_width = 40; /// -170141183460469231731687303715884105728 - template <> inline constexpr size_t max_int_width = 78; /// 115792089237316195423570985008687907853269984665640564039457584007913129639935 - template <> inline constexpr size_t max_int_width = 78; /// -57896044618658097711785492504343953926634992332820282019728792003956564819968 -} +template constexpr size_t max_int_width = 20; +template <> inline constexpr size_t max_int_width = 3; /// 255 +template <> inline constexpr size_t max_int_width = 4; /// -128 +template <> inline constexpr size_t max_int_width = 5; /// 65535 +template <> inline constexpr size_t max_int_width = 6; /// -32768 +template <> inline constexpr size_t max_int_width = 10; /// 4294967295 +template <> inline constexpr size_t max_int_width = 11; /// -2147483648 +template <> inline constexpr size_t max_int_width = 20; /// 18446744073709551615 +template <> inline constexpr size_t max_int_width = 20; /// -9223372036854775808 +template <> inline constexpr size_t max_int_width = 39; /// 340282366920938463463374607431768211455 +template <> inline constexpr size_t max_int_width = 40; /// -170141183460469231731687303715884105728 +template <> inline constexpr size_t max_int_width = 78; /// 115792089237316195423570985008687907853269984665640564039457584007913129639935 +template <> inline constexpr size_t max_int_width = 78; /// -57896044618658097711785492504343953926634992332820282019728792003956564819968 namespace DB diff --git a/src/IO/ZstdInflatingReadBuffer.h b/src/IO/ZstdInflatingReadBuffer.h index ec80b860e0e..7f246b02127 100644 --- a/src/IO/ZstdInflatingReadBuffer.h +++ b/src/IO/ZstdInflatingReadBuffer.h @@ -16,7 +16,7 @@ namespace ErrorCodes class ZstdInflatingReadBuffer : public BufferWithOwnMemory { public: - ZstdInflatingReadBuffer( + explicit ZstdInflatingReadBuffer( std::unique_ptr in_, size_t buf_size = DBMS_DEFAULT_BUFFER_SIZE, char * existing_memory = nullptr, diff --git a/src/IO/examples/hadoop_snappy_read_buffer.cpp b/src/IO/examples/hadoop_snappy_read_buffer.cpp index 9cb01e6d697..eeac3db40a7 100644 --- a/src/IO/examples/hadoop_snappy_read_buffer.cpp +++ b/src/IO/examples/hadoop_snappy_read_buffer.cpp @@ -38,6 +38,11 @@ int main() return 1; } } + if (uncompress(256) != output) + { + std::cout << "test hadoop snappy read buffer failed, buf_size:" << 256 << std::endl; + return 1; + } std::cout << "test hadoop snappy read buffer success" << std::endl; return 0; } diff --git a/src/IO/readFloatText.h b/src/IO/readFloatText.h index 1c5b1cdb0c9..b6be7adbbee 100644 --- a/src/IO/readFloatText.h +++ b/src/IO/readFloatText.h @@ -154,7 +154,7 @@ ReturnType readFloatTextPreciseImpl(T & x, ReadBuffer & buf) if (likely(!buf.eof() && buf.position() + MAX_LENGTH <= buf.buffer().end())) { - auto initial_position = buf.position(); + auto * initial_position = buf.position(); auto res = fast_float::from_chars(initial_position, buf.buffer().end(), x); if (unlikely(res.ec != std::errc())) diff --git a/src/IO/tests/gtest_hadoop_snappy_decoder.cpp b/src/IO/tests/gtest_hadoop_snappy_decoder.cpp new file mode 100644 index 00000000000..f681e8e61e1 --- /dev/null +++ b/src/IO/tests/gtest_hadoop_snappy_decoder.cpp @@ -0,0 +1,66 @@ +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +using namespace DB; +TEST(HadoopSnappyDecoder, repeatNeedMoreInput) +{ + String snappy_base64_content = "AAAl6gAAB67qSzQyMDIxLTA2LTAxAXh4eGIEACQzMTkuNzQyNDMKnjEAHDQyLjgyMTcynjEAIDI5Ni4yODQwNqIxA" + "BgyNy43MjYzqpMAGDMuNTIyMzSiYgAcNjUuNDk1OTeiMQAYOTQuNTg1NaYxABg4OC40NzgyojEAHDMyMS4zOTE1os" + "QAHDM0Ni4xNTI3qjEAGDEuMjA3MTWm9QAQMi41MjamYQAcMjIuNTEyNDieYQAcMzMwLjI5MTKiGgIcMzIzLjAzNDi" + "iwwAcMzE1LjA1MDmiYgAcNDM1Ljc2ODaqxAAUMS45NDA5nvQACDAuMP4rAEorABwzMDMuMjAyNaYZARgwOC4xOTEy" + "pugAGDQ2LjQ0MjKilQMcMjc4Ljk3MTiiMQAcMzUwLjc3NTeirAGqSwEcMzI5LjkyMzGiXAAcMzMxLjc2NzamwAMUM" + "TMuNjM4pjEAGDI3NC4yMzK2MQAINDg0qrMBFDExLjgzNqbbBRgyNDkuNTI5qtsFGDUwLjE4ODmi5AGlSAgwNjWmiA" + "EUMjIuNjU4pqcCBDUzYcCqdgIYMDEuMzcxNbbPBgQ5Na5TBBA0Ljc1OaIiBMGdDDM0OTGeJwQcMjg3LjIyNTmm/AM" + "hVAAyopAAGDMxOC4wMjGmMAAB8AQ0OKpGAhgyMC42MTM4poMBFDg3LjEzOKoxABA5My4xNaZSARQ5NS41ODemTgVh" + "OQwwODg2osIAGDMyNi45NTSmMQAcMjc3LjgxNDmqjQcMNS42MqpqA0F3DDg2MDamzAPhKwQ4OKJWARgzMDYuMTc1q" + "i0EGDgwLjIwNTSihAUYMjk3LjY5NaYiBRAyOTAuM6aNBBgyMzkuMzI5pkIJwdOi7wcYMzcxLjIyNqpiBxQ0NS44Nz" + "Gq9woEODAOZAoANqJ+BRgyNzYuMjExpnYCIYIMMjIyOKKnAmVrBDc0psQAEDMwOS4xqtEJGDMwNC45MzSq8wAMNC4" + "0OKomCyG3DDE4MTGi/AMhJAQxMKqjBhgyNjEuNDQ4rqMGFDIuOTEwN6I5AwQzN7JMCQw2LjcwqqoMGDI2MC44NzOm" + "dwIOTAkMNDgzMqLSBhQyNTkuMjGmYweBiwg3MzOmyQMYNDM3Ljg1N6ZyBq5QARQzMy43MjSqKw4UMTIuNzkxpkkFD" + "mgNDDc4MzCmUAEUOTUuOTQypnoFDiQIDDI2ODmmBQMUNTEuMjc2qikEDtkJBDA1qgUDFDA3LjE1N6ZiAOGUCDMwOa" + "oxABA3NC42NqqmAhA5Ni45N6rIAxwzMDcuMjkzMaL+ChQyNzUuODau/QoANOExpugBGDI0Ny4xODSm5wEYOTEuNDE" + "3MZ7MChQzMzUuNjWquQQUNTMuODg1psMHDu8SCDIyOaYJDoFbCDk4M6aWDhwzNDEuNTcyMKK1AUF4ADSqCwoQMzg1" + "LjSujBIB9Aw0MDUwotoJDi4PCDc0N6aHARgyMjMuODMxpgYRwmcRGDIxMi4xNjWqSgIQMDkuODmuzgMYMTkuNTg0M" + "aK7CMFFADmuZQcQMDYuMzKqXwAIOS4zrl8ADu4PBDQ0qtQUGDQ3LjAzODGmFwIYMTAuOTIwMKLDAKG0DDM3MDOiYg" + "CqNgcORgkEMzeuGwWqXQAhqwg2MDWmSQUYMjY0LjE2N6aZFBIgFgQyM6aiCRQwNi41NTSm9AcYMjczLjczNqqSABg" + "0NS45OTIzpugPFDIxLjc3MqZ4EBwyODYuMDkyNKZAAhg0OS4yMjQzom8GDu0LCDEwNKaTBwAzDiUIADimGQkUMzM4" + "Ljc2qlITADcOmBUAOaYNBhwyNzAuODA4N6qrCQw3LjAwppkYwT4IMjYzrg0GDDMuOTmq/xEQMjIuODOqRgkEMjQOX" + "xKmQA0IMzAwDggVqjwREDY1LjYxsh8aCDQuOKrCBxgyNTQuNjQ2phMUISQENzmqsAwOLgsENTWqeAIQOTEuNTiuzR" + "EANw55CQAwpp8GEDI2My44rgsRFDI0LjMxNqZuBhIrFgAxqswDGDI4OS4zMzCqXwQANoHyADCmbAMUMzI4LjM2pps" + "DDDY1LjKBj57+Cg5PFwQ1NaoVBmFrADaqwgccMjk5LjgxMTCqdwYQMy4wODKmZwcEMzIOqBQAMaaCBRgyMjUuMTE2" + "qtkJADEOLw8AMKYwBBgyMzAuMTQyprwPGDMwMi4wMjemiAEOzQ4MODA0M6YaAhA1NC4yNKYkBWEMDsELqmEAFDIuN" + "jE4N6LNBxgyODMuNTM1qqUfFDk5Ljc4NKaaGQ5UEAgyNjSuqw2usgkYNDMuMDY0MZ5rAyHkCDMzOa6sHg6+CwAwpn" + "YGDnseCDk1MqaoAsHYDDgzNjeiLgsYMjg1LjkzMqZ1EQ67IQgyNTmmMQBB2Qg0OTamuhMUMjcxLjkzqpMWBDMyDoo" + "hADSmYgChhAg2NjimeAIQMzkxLjiqyw4IOTkuDt8bpoYBDDk0LjamaQMO4hAIOTI3qqQYFDQyLjk1M6oxAAQ4NA7G" + "HaZKIg6YCwwxNzYzpiQXFDkwLjk0OKqqAhQ5Ny4yNzSmvwQANg54GKq/CA4AIQg1MzOm/wMUNTYuNzQ2phcCHDM0N" + "S4wOTEyoswHDoAQCDA5M6rOGRA5MS42N6ZPGyQyNzUuNzExMTIK"; + String snappy_content; + Poco::MemoryInputStream istr(snappy_base64_content.data(), snappy_base64_content.size()); + Poco::Base64Decoder decoder(istr); + Poco::StreamCopier::copyToString(decoder, snappy_content); + auto file_writer = std::make_unique("./test.snappy"); + file_writer->write(snappy_content.c_str(), snappy_content.size()); + file_writer->close(); + std::unique_ptr in = std::make_unique("./test.snappy", 128); + HadoopSnappyReadBuffer read_buffer(std::move(in)); + String output; + WriteBufferFromString out(output); + copyData(read_buffer, out); + UInt128 hashcode = sipHash128(output.c_str(), output.size()); + String hashcode_str = getHexUIntLowercase(hashcode); + ASSERT_EQ(hashcode_str, "593afe14f61866915cc00b8c7bd86046"); +} diff --git a/src/Interpreters/Access/InterpreterCreateUserQuery.cpp b/src/Interpreters/Access/InterpreterCreateUserQuery.cpp index 33d85afb7c3..cad451f8ef5 100644 --- a/src/Interpreters/Access/InterpreterCreateUserQuery.cpp +++ b/src/Interpreters/Access/InterpreterCreateUserQuery.cpp @@ -14,6 +14,11 @@ namespace DB { +namespace ErrorCodes +{ + extern const int ILLEGAL_TYPE_OF_ARGUMENT; + +} namespace { void updateUserFromQueryImpl( @@ -22,7 +27,7 @@ namespace const std::shared_ptr & override_name, const std::optional & override_default_roles, const std::optional & override_settings, - const std::optional & override_grantees) + const std::optional & override_grantees, bool allow_no_password, bool allow_plaintext_password) { if (override_name) user.setName(override_name->toString()); @@ -30,10 +35,15 @@ namespace user.setName(query.new_name); else if (query.names->size() == 1) user.setName(query.names->front()->toString()); - if (query.auth_data) + { user.auth_data = *query.auth_data; - + //User and query IDENTIFIED WITH AUTHTYPE PLAINTEXT and NO_PASSWORD should not be allowed if allow_plaintext_and_no_password is unset. + if ((query.auth_data->getType() == AuthenticationType::PLAINTEXT_PASSWORD && !allow_plaintext_password) || (query.auth_data->getType() == AuthenticationType::NO_PASSWORD && !allow_no_password)) + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "User is not allowed to ALTER/CREATE USERS with type "+ toString(query.auth_data->getType())+". Please configure User with authtype" + + "to SHA256_PASSWORD,DOUBLE_SHA1_PASSWORD OR enable setting allow_plaintext_and_no_password in server configuration to configure user with " + toString(query.auth_data->getType()) +" Auth_type." + + "It is not recommended to use " + toString(query.auth_data->getType()) + "."); + } if (override_name && !override_name->host_pattern.empty()) { user.allowed_client_hosts = AllowedClientHosts{}; @@ -75,13 +85,14 @@ namespace } } - BlockIO InterpreterCreateUserQuery::execute() { const auto & query = query_ptr->as(); auto & access_control = getContext()->getAccessControl(); auto access = getContext()->getAccess(); access->checkAccess(query.alter ? AccessType::ALTER_USER : AccessType::CREATE_USER); + bool allow_plaintext_password = access_control.isPlaintextPasswordAllowed(); + bool allow_no_password = access_control.isNoPasswordAllowed(); std::optional default_roles_from_query; if (query.default_roles) @@ -93,10 +104,8 @@ BlockIO InterpreterCreateUserQuery::execute() access->checkAdminOption(role); } } - if (!query.cluster.empty()) return executeDDLQueryOnCluster(query_ptr, getContext()); - std::optional settings_from_query; if (query.settings) settings_from_query = SettingsProfileElements{*query.settings, access_control}; @@ -110,7 +119,7 @@ BlockIO InterpreterCreateUserQuery::execute() auto update_func = [&](const AccessEntityPtr & entity) -> AccessEntityPtr { auto updated_user = typeid_cast>(entity->clone()); - updateUserFromQueryImpl(*updated_user, query, {}, default_roles_from_query, settings_from_query, grantees_from_query); + updateUserFromQueryImpl(*updated_user, query, {}, default_roles_from_query, settings_from_query, grantees_from_query, allow_no_password, allow_plaintext_password); return updated_user; }; @@ -129,7 +138,7 @@ BlockIO InterpreterCreateUserQuery::execute() for (const auto & name : *query.names) { auto new_user = std::make_shared(); - updateUserFromQueryImpl(*new_user, query, name, default_roles_from_query, settings_from_query, RolesOrUsersSet::AllTag{}); + updateUserFromQueryImpl(*new_user, query, name, default_roles_from_query, settings_from_query, RolesOrUsersSet::AllTag{}, allow_no_password, allow_plaintext_password); new_users.emplace_back(std::move(new_user)); } @@ -157,9 +166,9 @@ BlockIO InterpreterCreateUserQuery::execute() } -void InterpreterCreateUserQuery::updateUserFromQuery(User & user, const ASTCreateUserQuery & query) +void InterpreterCreateUserQuery::updateUserFromQuery(User & user, const ASTCreateUserQuery & query, bool allow_no_password, bool allow_plaintext_password) { - updateUserFromQueryImpl(user, query, {}, {}, {}, {}); + updateUserFromQueryImpl(user, query, {}, {}, {}, {}, allow_no_password, allow_plaintext_password); } } diff --git a/src/Interpreters/Access/InterpreterCreateUserQuery.h b/src/Interpreters/Access/InterpreterCreateUserQuery.h index 7d357924d35..42d911c712b 100644 --- a/src/Interpreters/Access/InterpreterCreateUserQuery.h +++ b/src/Interpreters/Access/InterpreterCreateUserQuery.h @@ -17,7 +17,7 @@ public: BlockIO execute() override; - static void updateUserFromQuery(User & user, const ASTCreateUserQuery & query); + static void updateUserFromQuery(User & user, const ASTCreateUserQuery & query, bool allow_no_password=true, bool allow_plaintext_password=true); private: ASTPtr query_ptr; diff --git a/src/Interpreters/Access/InterpreterShowAccessQuery.cpp b/src/Interpreters/Access/InterpreterShowAccessQuery.cpp index e16ee03c711..d1d8ee63b8e 100644 --- a/src/Interpreters/Access/InterpreterShowAccessQuery.cpp +++ b/src/Interpreters/Access/InterpreterShowAccessQuery.cpp @@ -11,7 +11,7 @@ #include #include #include -#include +#include namespace DB @@ -76,11 +76,11 @@ ASTs InterpreterShowAccessQuery::getCreateAndGrantQueries() const { create_queries.push_back(InterpreterShowCreateAccessEntityQuery::getCreateQuery(*entity, access_control)); if (entity->isTypeOf(AccessEntityType::USER) || entity->isTypeOf(AccessEntityType::ROLE)) - boost::range::push_back(grant_queries, InterpreterShowGrantsQuery::getGrantQueries(*entity, access_control)); + insertAtEnd(grant_queries, InterpreterShowGrantsQuery::getGrantQueries(*entity, access_control)); } ASTs result = std::move(create_queries); - boost::range::push_back(result, std::move(grant_queries)); + insertAtEnd(result, std::move(grant_queries)); return result; } diff --git a/src/Interpreters/ActionsDAG.cpp b/src/Interpreters/ActionsDAG.cpp index a5122cd54c7..25116f5145a 100644 --- a/src/Interpreters/ActionsDAG.cpp +++ b/src/Interpreters/ActionsDAG.cpp @@ -517,7 +517,7 @@ Block ActionsDAG::updateHeader(Block header) const { auto & list = it->second; pos_to_remove.insert(pos); - node_to_column[inputs[list.front()]] = std::move(col); + node_to_column[inputs[list.front()]] = col; list.pop_front(); } } @@ -590,7 +590,7 @@ Block ActionsDAG::updateHeader(Block header) const for (auto & col : result_columns) res.insert(std::move(col)); - for (const auto & item : header) + for (auto && item : header) res.insert(std::move(item)); return res; @@ -651,8 +651,8 @@ NameSet ActionsDAG::foldActionsByProjection( { /// Projection folding. node->type = ActionsDAG::ActionType::INPUT; - node->result_type = std::move(column_with_type_name->type); - node->result_name = std::move(column_with_type_name->name); + node->result_type = column_with_type_name->type; + node->result_name = column_with_type_name->name; node->children.clear(); inputs.push_back(node); } @@ -724,7 +724,7 @@ void ActionsDAG::addAliases(const NamesWithAliases & aliases) Node node; node.type = ActionType::ALIAS; node.result_type = child->result_type; - node.result_name = std::move(item.second); + node.result_name = item.second; node.column = child->column; node.children.emplace_back(child); @@ -771,7 +771,7 @@ void ActionsDAG::project(const NamesWithAliases & projection) Node node; node.type = ActionType::ALIAS; node.result_type = child->result_type; - node.result_name = std::move(item.second); + node.result_name = item.second; node.column = child->column; node.children.emplace_back(child); diff --git a/src/Interpreters/ActionsVisitor.cpp b/src/Interpreters/ActionsVisitor.cpp index bc937755618..99583c41b64 100644 --- a/src/Interpreters/ActionsVisitor.cpp +++ b/src/Interpreters/ActionsVisitor.cpp @@ -120,7 +120,7 @@ static Block createBlockFromCollection(const Collection & collection, const Data if (i == tuple_size) for (i = 0; i < tuple_size; ++i) - columns[i]->insert(std::move(tuple_values[i])); + columns[i]->insert(tuple_values[i]); } } @@ -391,7 +391,7 @@ SetPtr makeExplicitSet( ScopeStack::Level::~Level() = default; ScopeStack::Level::Level() = default; -ScopeStack::Level::Level(Level &&) = default; +ScopeStack::Level::Level(Level &&) noexcept = default; class ScopeStack::Index { diff --git a/src/Interpreters/ActionsVisitor.h b/src/Interpreters/ActionsVisitor.h index 1d7d64f739a..b6b67bac81c 100644 --- a/src/Interpreters/ActionsVisitor.h +++ b/src/Interpreters/ActionsVisitor.h @@ -72,7 +72,7 @@ struct ScopeStack : WithContext NameSet inputs; Level(); - Level(Level &&); + Level(Level &&) noexcept; ~Level(); }; diff --git a/src/Interpreters/AddIndexConstraintsOptimizer.h b/src/Interpreters/AddIndexConstraintsOptimizer.h index 228d8d8ad1a..9ed4a8978c8 100644 --- a/src/Interpreters/AddIndexConstraintsOptimizer.h +++ b/src/Interpreters/AddIndexConstraintsOptimizer.h @@ -23,8 +23,7 @@ using StorageMetadataPtr = std::shared_ptr; class AddIndexConstraintsOptimizer final { public: - AddIndexConstraintsOptimizer( - const StorageMetadataPtr & metadata_snapshot); + explicit AddIndexConstraintsOptimizer(const StorageMetadataPtr & metadata_snapshot); void perform(CNFQuery & cnf_query); diff --git a/src/Interpreters/AggregationCommon.h b/src/Interpreters/AggregationCommon.h index 00d2f76f043..5904cc48084 100644 --- a/src/Interpreters/AggregationCommon.h +++ b/src/Interpreters/AggregationCommon.h @@ -42,9 +42,6 @@ using Sizes = std::vector; /// 2,1,1 /// -namespace -{ - template constexpr auto getBitmapSize() { @@ -62,8 +59,6 @@ constexpr auto getBitmapSize() 0))); } -} - template void fillFixedBatch(size_t num_rows, const T * source, T * dest) { @@ -255,7 +250,7 @@ static inline T ALWAYS_INLINE packFixed( /// Hash a set of keys into a UInt128 value. -static inline UInt128 ALWAYS_INLINE hash128( +static inline UInt128 ALWAYS_INLINE hash128( /// NOLINT size_t i, size_t keys_size, const ColumnRawPtrs & key_columns) { UInt128 key; @@ -269,29 +264,9 @@ static inline UInt128 ALWAYS_INLINE hash128( return key; } - -/// Copy keys to the pool. Then put into pool StringRefs to them and return the pointer to the first. -static inline StringRef * ALWAYS_INLINE placeKeysInPool( - size_t keys_size, StringRefs & keys, Arena & pool) -{ - for (size_t j = 0; j < keys_size; ++j) - { - char * place = pool.alloc(keys[j].size); - memcpySmallAllowReadWriteOverflow15(place, keys[j].data, keys[j].size); - keys[j].data = place; - } - - /// Place the StringRefs on the newly copied keys in the pool. - char * res = pool.alignedAlloc(keys_size * sizeof(StringRef), alignof(StringRef)); - memcpySmallAllowReadWriteOverflow15(res, keys.data(), keys_size * sizeof(StringRef)); - - return reinterpret_cast(res); -} - - /** Serialize keys into a continuous chunk of memory. */ -static inline StringRef ALWAYS_INLINE serializeKeysToPoolContiguous( +static inline StringRef ALWAYS_INLINE serializeKeysToPoolContiguous( /// NOLINT size_t i, size_t keys_size, const ColumnRawPtrs & key_columns, Arena & pool) { const char * begin = nullptr; diff --git a/src/Interpreters/Aggregator.h b/src/Interpreters/Aggregator.h index 3457e01f98f..f03bf45fbc6 100644 --- a/src/Interpreters/Aggregator.h +++ b/src/Interpreters/Aggregator.h @@ -660,7 +660,7 @@ struct AggregatedDataVariants : private boost::noncopyable case Type::without_key: break; #define M(NAME, IS_TWO_LEVEL) \ - case Type::NAME: NAME = std::make_unique(); break; + case Type::NAME: (NAME) = std::make_unique(); break; APPLY_FOR_AGGREGATED_VARIANTS(M) #undef M } @@ -677,7 +677,7 @@ struct AggregatedDataVariants : private boost::noncopyable case Type::without_key: return 1; #define M(NAME, IS_TWO_LEVEL) \ - case Type::NAME: return NAME->data.size() + (without_key != nullptr); + case Type::NAME: return (NAME)->data.size() + (without_key != nullptr); APPLY_FOR_AGGREGATED_VARIANTS(M) #undef M } @@ -694,7 +694,7 @@ struct AggregatedDataVariants : private boost::noncopyable case Type::without_key: return 1; #define M(NAME, IS_TWO_LEVEL) \ - case Type::NAME: return NAME->data.size(); + case Type::NAME: return (NAME)->data.size(); APPLY_FOR_AGGREGATED_VARIANTS(M) #undef M } @@ -753,6 +753,7 @@ struct AggregatedDataVariants : private boost::noncopyable M(low_cardinality_key_string) \ M(low_cardinality_key_fixed_string) \ + /// NOLINTNEXTLINE #define APPLY_FOR_VARIANTS_NOT_CONVERTIBLE_TO_TWO_LEVEL(M) \ M(key8) \ M(key16) \ @@ -766,6 +767,7 @@ struct AggregatedDataVariants : private boost::noncopyable M(low_cardinality_key8) \ M(low_cardinality_key16) \ + /// NOLINTNEXTLINE #define APPLY_FOR_VARIANTS_SINGLE_LEVEL(M) \ APPLY_FOR_VARIANTS_NOT_CONVERTIBLE_TO_TWO_LEVEL(M) \ APPLY_FOR_VARIANTS_CONVERTIBLE_TO_TWO_LEVEL(M) \ @@ -787,6 +789,7 @@ struct AggregatedDataVariants : private boost::noncopyable void convertToTwoLevel(); + /// NOLINTNEXTLINE #define APPLY_FOR_VARIANTS_TWO_LEVEL(M) \ M(key32_two_level) \ M(key64_two_level) \ @@ -1341,7 +1344,7 @@ private: template Method & getDataVariant(AggregatedDataVariants & variants); #define M(NAME, IS_TWO_LEVEL) \ - template <> inline decltype(AggregatedDataVariants::NAME)::element_type & getDataVariant(AggregatedDataVariants & variants) { return *variants.NAME; } + template <> inline decltype(AggregatedDataVariants::NAME)::element_type & getDataVariant(AggregatedDataVariants & variants) { return *variants.NAME; } /// NOLINT APPLY_FOR_AGGREGATED_VARIANTS(M) diff --git a/src/Interpreters/AsynchronousInsertQueue.cpp b/src/Interpreters/AsynchronousInsertQueue.cpp index 5321d5f6fd3..c60ab0f6510 100644 --- a/src/Interpreters/AsynchronousInsertQueue.cpp +++ b/src/Interpreters/AsynchronousInsertQueue.cpp @@ -165,9 +165,9 @@ void AsynchronousInsertQueue::scheduleDataProcessingJob(const InsertQuery & key, { /// Wrap 'unique_ptr' with 'shared_ptr' to make this /// lambda copyable and allow to save it to the thread pool. - pool.scheduleOrThrowOnError([=, data = std::make_shared(std::move(data))] + pool.scheduleOrThrowOnError([key, global_context, data = std::make_shared(std::move(data))]() mutable { - processData(std::move(key), std::move(*data), std::move(global_context)); + processData(key, std::move(*data), std::move(global_context)); }); } @@ -184,7 +184,10 @@ void AsynchronousInsertQueue::push(ASTPtr query, ContextPtr query_context) if (!FormatFactory::instance().isInputFormat(insert_query.format)) throw Exception(ErrorCodes::UNKNOWN_FORMAT, "Unknown input format {}", insert_query.format); - query_context->checkAccess(AccessType::INSERT, insert_query.table_id, sample_block.getNames()); + /// For table functions we check access while executing + /// InterpreterInsertQuery::getTable() -> ITableFunction::execute(). + if (insert_query.table_id) + query_context->checkAccess(AccessType::INSERT, insert_query.table_id, sample_block.getNames()); String bytes; { @@ -411,7 +414,7 @@ try }; std::shared_ptr adding_defaults_transform; - if (insert_context->getSettingsRef().input_format_defaults_for_omitted_fields) + if (insert_context->getSettingsRef().input_format_defaults_for_omitted_fields && insert_query.table_id) { StoragePtr storage = DatabaseCatalog::instance().getTable(insert_query.table_id, insert_context); auto metadata_snapshot = storage->getInMemoryMetadataPtr(); diff --git a/src/Interpreters/AsynchronousInsertQueue.h b/src/Interpreters/AsynchronousInsertQueue.h index 1dd2ad216aa..db3cb3049fd 100644 --- a/src/Interpreters/AsynchronousInsertQueue.h +++ b/src/Interpreters/AsynchronousInsertQueue.h @@ -138,10 +138,10 @@ private: static void finishWithException(const ASTPtr & query, const std::list & entries, const E & exception); public: - Queue getQueue() const + auto getQueueLocked() const { std::shared_lock lock(rwlock); - return queue; + return std::make_pair(std::ref(queue), std::move(lock)); } }; diff --git a/src/Interpreters/BloomFilter.h b/src/Interpreters/BloomFilter.h index 279ab6947ec..1fb9895cc27 100644 --- a/src/Interpreters/BloomFilter.h +++ b/src/Interpreters/BloomFilter.h @@ -31,7 +31,7 @@ public: using UnderType = UInt64; using Container = std::vector; - BloomFilter(const BloomFilterParameters & params); + explicit BloomFilter(const BloomFilterParameters & params); /// size -- size of filter in bytes. /// hashes -- number of used hash functions. /// seed -- random seed for hash functions generation. diff --git a/src/Interpreters/ClientInfo.cpp b/src/Interpreters/ClientInfo.cpp index 827e7d27409..75af25e842e 100644 --- a/src/Interpreters/ClientInfo.cpp +++ b/src/Interpreters/ClientInfo.cpp @@ -19,7 +19,7 @@ namespace ErrorCodes } -void ClientInfo::write(WriteBuffer & out, const UInt64 server_protocol_revision) const +void ClientInfo::write(WriteBuffer & out, UInt64 server_protocol_revision) const { if (server_protocol_revision < DBMS_MIN_REVISION_WITH_CLIENT_INFO) throw Exception("Logical error: method ClientInfo::write is called for unsupported server revision", ErrorCodes::LOGICAL_ERROR); @@ -99,7 +99,7 @@ void ClientInfo::write(WriteBuffer & out, const UInt64 server_protocol_revision) } -void ClientInfo::read(ReadBuffer & in, const UInt64 client_protocol_revision) +void ClientInfo::read(ReadBuffer & in, UInt64 client_protocol_revision) { if (client_protocol_revision < DBMS_MIN_REVISION_WITH_CLIENT_INFO) throw Exception("Logical error: method ClientInfo::read is called for unsupported client revision", ErrorCodes::LOGICAL_ERROR); diff --git a/src/Interpreters/ClientInfo.h b/src/Interpreters/ClientInfo.h index 3ce740c6436..0b40c78becc 100644 --- a/src/Interpreters/ClientInfo.h +++ b/src/Interpreters/ClientInfo.h @@ -119,8 +119,8 @@ public: * Only values that are not calculated automatically or passed separately are serialized. * Revisions are passed to use format that server will understand or client was used. */ - void write(WriteBuffer & out, const UInt64 server_protocol_revision) const; - void read(ReadBuffer & in, const UInt64 client_protocol_revision); + void write(WriteBuffer & out, UInt64 server_protocol_revision) const; + void read(ReadBuffer & in, UInt64 client_protocol_revision); /// Initialize parameters on client initiating query. void setInitialQuery(); diff --git a/src/Interpreters/Cluster.h b/src/Interpreters/Cluster.h index 3773dadaf13..e9f26c21089 100644 --- a/src/Interpreters/Cluster.h +++ b/src/Interpreters/Cluster.h @@ -63,7 +63,6 @@ public: /// is used to set a limit on the size of the timeout static Poco::Timespan saturate(Poco::Timespan v, Poco::Timespan limit); -public: using SlotToShard = std::vector; struct Address @@ -192,7 +191,6 @@ public: /// Name of directory for asynchronous write to StorageDistributed if has_internal_replication const std::string & insertPathForInternalReplication(bool prefer_localhost_replica, bool use_compact_format) const; - public: ShardInfoInsertPathForInternalReplication insert_path_for_internal_replication; /// Number of the shard, the indexation begins with 1 UInt32 shard_num = 0; @@ -207,7 +205,6 @@ public: using ShardsInfo = std::vector; - String getHashOfAddresses() const { return hash_of_addresses; } const ShardsInfo & getShardsInfo() const { return shards_info; } const AddressesWithFailover & getShardsAddresses() const { return addresses_with_failover; } @@ -263,7 +260,6 @@ private: /// Inter-server secret String secret; - String hash_of_addresses; /// Description of the cluster shards. ShardsInfo shards_info; /// Any remote shard. diff --git a/src/Interpreters/ClusterProxy/executeQuery.cpp b/src/Interpreters/ClusterProxy/executeQuery.cpp index 0db07267231..884b8445732 100644 --- a/src/Interpreters/ClusterProxy/executeQuery.cpp +++ b/src/Interpreters/ClusterProxy/executeQuery.cpp @@ -116,7 +116,7 @@ void executeQuery( const Settings & settings = context->getSettingsRef(); - if (settings.max_distributed_depth && context->getClientInfo().distributed_depth > settings.max_distributed_depth) + if (settings.max_distributed_depth && context->getClientInfo().distributed_depth >= settings.max_distributed_depth) throw Exception("Maximum distributed depth exceeded", ErrorCodes::TOO_LARGE_DISTRIBUTED_DEPTH); std::vector plans; diff --git a/src/Interpreters/ComparisonGraph.cpp b/src/Interpreters/ComparisonGraph.cpp index e236de67fdc..37d603b4923 100644 --- a/src/Interpreters/ComparisonGraph.cpp +++ b/src/Interpreters/ComparisonGraph.cpp @@ -159,7 +159,7 @@ ComparisonGraph::CompareResult ComparisonGraph::pathToCompareResult(Path path, b __builtin_unreachable(); } -std::optional ComparisonGraph::findPath(const size_t start, const size_t finish) const +std::optional ComparisonGraph::findPath(size_t start, size_t finish) const { const auto it = dists.find(std::make_pair(start, finish)); if (it == std::end(dists)) @@ -232,7 +232,7 @@ ComparisonGraph::CompareResult ComparisonGraph::compare(const ASTPtr & left, con return CompareResult::UNKNOWN; } -bool ComparisonGraph::isPossibleCompare(const CompareResult expected, const ASTPtr & left, const ASTPtr & right) const +bool ComparisonGraph::isPossibleCompare(CompareResult expected, const ASTPtr & left, const ASTPtr & right) const { const auto result = compare(left, right); @@ -267,7 +267,7 @@ bool ComparisonGraph::isPossibleCompare(const CompareResult expected, const ASTP return possible_pairs.contains({expected, result}); } -bool ComparisonGraph::isAlwaysCompare(const CompareResult expected, const ASTPtr & left, const ASTPtr & right) const +bool ComparisonGraph::isAlwaysCompare(CompareResult expected, const ASTPtr & left, const ASTPtr & right) const { const auto result = compare(left, right); @@ -324,12 +324,12 @@ std::optional ComparisonGraph::getComponentId(const ASTPtr & ast) const } } -bool ComparisonGraph::hasPath(const size_t left, const size_t right) const +bool ComparisonGraph::hasPath(size_t left, size_t right) const { return findPath(left, right) || findPath(right, left); } -ASTs ComparisonGraph::getComponent(const size_t id) const +ASTs ComparisonGraph::getComponent(size_t id) const { return graph.vertices[id].asts; } @@ -387,7 +387,7 @@ ComparisonGraph::CompareResult ComparisonGraph::functionNameToCompareResult(cons return it == std::end(relation_to_compare) ? CompareResult::UNKNOWN : it->second; } -ComparisonGraph::CompareResult ComparisonGraph::inverseCompareResult(const CompareResult result) +ComparisonGraph::CompareResult ComparisonGraph::inverseCompareResult(CompareResult result) { static const std::unordered_map inverse_relations = { @@ -486,7 +486,7 @@ std::vector ComparisonGraph::getVertices() const void ComparisonGraph::dfsComponents( const Graph & reversed_graph, size_t v, - OptionalIndices & components, const size_t component) + OptionalIndices & components, size_t component) { components[v] = component; for (const auto & edge : reversed_graph.edges[v]) diff --git a/src/Interpreters/ComparisonGraph.h b/src/Interpreters/ComparisonGraph.h index 20d6f135a0d..3891fbf51cf 100644 --- a/src/Interpreters/ComparisonGraph.h +++ b/src/Interpreters/ComparisonGraph.h @@ -17,7 +17,7 @@ class ComparisonGraph { public: /// atomic_formulas are extracted from constraints. - ComparisonGraph(const std::vector & atomic_formulas); + explicit ComparisonGraph(const std::vector & atomic_formulas); enum class CompareResult { @@ -32,15 +32,15 @@ public: static CompareResult atomToCompareResult(const CNFQuery::AtomicFormula & atom); static CompareResult functionNameToCompareResult(const std::string & name); - static CompareResult inverseCompareResult(const CompareResult result); + static CompareResult inverseCompareResult(CompareResult result); CompareResult compare(const ASTPtr & left, const ASTPtr & right) const; /// It's possible that left right - bool isPossibleCompare(const CompareResult expected, const ASTPtr & left, const ASTPtr & right) const; + bool isPossibleCompare(CompareResult expected, const ASTPtr & left, const ASTPtr & right) const; /// It's always true that left right - bool isAlwaysCompare(const CompareResult expected, const ASTPtr & left, const ASTPtr & right) const; + bool isAlwaysCompare(CompareResult expected, const ASTPtr & left, const ASTPtr & right) const; /// Returns all expressions from component to which @ast belongs if any. std::vector getEqual(const ASTPtr & ast) const; @@ -52,11 +52,11 @@ public: std::optional getComponentId(const ASTPtr & ast) const; /// Returns all expressions from component. - std::vector getComponent(const std::size_t id) const; + std::vector getComponent(size_t id) const; size_t getNumOfComponents() const { return graph.vertices.size(); } - bool hasPath(const size_t left, const size_t right) const; + bool hasPath(size_t left, size_t right) const; /// Find constants lessOrEqual and greaterOrEqual. /// For int and double linear programming can be applied here. @@ -131,7 +131,7 @@ private: /// Assigns index of component for each vertex. static void dfsComponents( const Graph & reversed_graph, size_t v, - OptionalIndices & components, const size_t component); + OptionalIndices & components, size_t component); enum class Path { @@ -140,7 +140,7 @@ private: }; static CompareResult pathToCompareResult(Path path, bool inverse); - std::optional findPath(const size_t start, const size_t finish) const; + std::optional findPath(size_t start, size_t finish) const; /// Calculate @dists. static std::map, Path> buildDistsFromGraph(const Graph & g); diff --git a/src/Interpreters/Context.cpp b/src/Interpreters/Context.cpp index bfb0129aa04..ac1bfc620b0 100644 --- a/src/Interpreters/Context.cpp +++ b/src/Interpreters/Context.cpp @@ -29,6 +29,7 @@ #include #include #include +#include #include #include #include @@ -179,10 +180,10 @@ struct ContextSharedPart mutable VolumePtr backups_volume; /// Volume for all the backups. - mutable std::optional embedded_dictionaries; /// Metrica's dictionaries. Have lazy initialization. - mutable std::optional external_dictionaries_loader; - mutable std::optional external_user_defined_executable_functions_loader; - mutable std::optional external_models_loader; + mutable std::unique_ptr embedded_dictionaries; /// Metrica's dictionaries. Have lazy initialization. + mutable std::unique_ptr external_dictionaries_loader; + mutable std::unique_ptr external_user_defined_executable_functions_loader; + mutable std::unique_ptr external_models_loader; ExternalLoaderXMLConfigRepository * external_models_config_repository = nullptr; scope_guard models_repository_guard; @@ -214,10 +215,10 @@ struct ContextSharedPart ConfigurationPtr users_config; /// Config with the users, profiles and quotas sections. InterserverIOHandler interserver_io_handler; /// Handler for interserver communication. - mutable std::optional buffer_flush_schedule_pool; /// A thread pool that can do background flush for Buffer tables. - mutable std::optional schedule_pool; /// A thread pool that can run different jobs in background (used in replicated tables) - mutable std::optional distributed_schedule_pool; /// A thread pool that can run different jobs in background (used for distributed sends) - mutable std::optional message_broker_schedule_pool; /// A thread pool that can run different jobs in background (used for message brokers, like RabbitMQ and Kafka) + mutable std::unique_ptr buffer_flush_schedule_pool; /// A thread pool that can do background flush for Buffer tables. + mutable std::unique_ptr schedule_pool; /// A thread pool that can run different jobs in background (used in replicated tables) + mutable std::unique_ptr distributed_schedule_pool; /// A thread pool that can run different jobs in background (used for distributed sends) + mutable std::unique_ptr message_broker_schedule_pool; /// A thread pool that can run different jobs in background (used for message brokers, like RabbitMQ and Kafka) mutable ThrottlerPtr replicated_fetches_throttler; /// A server-wide throttler for replicated fetches mutable ThrottlerPtr replicated_sends_throttler; /// A server-wide throttler for replicated sends @@ -293,6 +294,17 @@ struct ContextSharedPart ~ContextSharedPart() { + /// Wait for thread pool for background writes, + /// since it may use per-user MemoryTracker which will be destroyed here. + try + { + IDiskRemote::getThreadPoolWriter().wait(); + } + catch (...) + { + tryLogCurrentException(__PRETTY_FUNCTION__); + } + try { shutdown(); @@ -344,12 +356,23 @@ struct ContextSharedPart common_executor->wait(); std::unique_ptr delete_system_logs; + std::unique_ptr delete_embedded_dictionaries; + std::unique_ptr delete_external_dictionaries_loader; + std::unique_ptr delete_external_user_defined_executable_functions_loader; + std::unique_ptr delete_external_models_loader; + std::unique_ptr delete_buffer_flush_schedule_pool; + std::unique_ptr delete_schedule_pool; + std::unique_ptr delete_distributed_schedule_pool; + std::unique_ptr delete_message_broker_schedule_pool; + std::unique_ptr delete_ddl_worker; + std::unique_ptr delete_access_control; + { auto lock = std::lock_guard(mutex); - /** Compiled expressions stored in cache need to be destroyed before destruction of static objects. - * Because CHJIT instance can be static object. - */ + /** Compiled expressions stored in cache need to be destroyed before destruction of static objects. + * Because CHJIT instance can be static object. + */ #if USE_EMBEDDED_COMPILER if (auto * cache = CompiledExpressionCacheFactory::instance().tryGetCache()) cache->reset(); @@ -369,19 +392,19 @@ struct ContextSharedPart /// but at least they can be preserved for storage termination. dictionaries_xmls.reset(); user_defined_executable_functions_xmls.reset(); + models_repository_guard.reset(); delete_system_logs = std::move(system_logs); - embedded_dictionaries.reset(); - external_dictionaries_loader.reset(); - external_user_defined_executable_functions_loader.reset(); - models_repository_guard.reset(); - external_models_loader.reset(); - buffer_flush_schedule_pool.reset(); - schedule_pool.reset(); - distributed_schedule_pool.reset(); - message_broker_schedule_pool.reset(); - ddl_worker.reset(); - access_control.reset(); + delete_embedded_dictionaries = std::move(embedded_dictionaries); + delete_external_dictionaries_loader = std::move(external_dictionaries_loader); + delete_external_user_defined_executable_functions_loader = std::move(external_user_defined_executable_functions_loader); + delete_external_models_loader = std::move(external_models_loader); + delete_buffer_flush_schedule_pool = std::move(buffer_flush_schedule_pool); + delete_schedule_pool = std::move(schedule_pool); + delete_distributed_schedule_pool = std::move(distributed_schedule_pool); + delete_message_broker_schedule_pool = std::move(message_broker_schedule_pool); + delete_ddl_worker = std::move(ddl_worker); + delete_access_control = std::move(access_control); /// Stop trace collector if any trace_collector.reset(); @@ -391,6 +414,17 @@ struct ContextSharedPart /// Can be removed w/o context lock delete_system_logs.reset(); + delete_embedded_dictionaries.reset(); + delete_external_dictionaries_loader.reset(); + delete_external_user_defined_executable_functions_loader.reset(); + delete_external_models_loader.reset(); + delete_ddl_worker.reset(); + delete_buffer_flush_schedule_pool.reset(); + delete_schedule_pool.reset(); + delete_distributed_schedule_pool.reset(); + delete_message_broker_schedule_pool.reset(); + delete_ddl_worker.reset(); + delete_access_control.reset(); } bool hasTraceCollector() const @@ -1365,7 +1399,8 @@ ExternalDictionariesLoader & Context::getExternalDictionariesLoader() ExternalDictionariesLoader & Context::getExternalDictionariesLoaderUnlocked() { if (!shared->external_dictionaries_loader) - shared->external_dictionaries_loader.emplace(getGlobalContext()); + shared->external_dictionaries_loader = + std::make_unique(getGlobalContext()); return *shared->external_dictionaries_loader; } @@ -1383,7 +1418,8 @@ ExternalUserDefinedExecutableFunctionsLoader & Context::getExternalUserDefinedEx ExternalUserDefinedExecutableFunctionsLoader & Context::getExternalUserDefinedExecutableFunctionsLoaderUnlocked() { if (!shared->external_user_defined_executable_functions_loader) - shared->external_user_defined_executable_functions_loader.emplace(getGlobalContext()); + shared->external_user_defined_executable_functions_loader = + std::make_unique(getGlobalContext()); return *shared->external_user_defined_executable_functions_loader; } @@ -1401,7 +1437,8 @@ ExternalModelsLoader & Context::getExternalModelsLoader() ExternalModelsLoader & Context::getExternalModelsLoaderUnlocked() { if (!shared->external_models_loader) - shared->external_models_loader.emplace(getGlobalContext()); + shared->external_models_loader = + std::make_unique(getGlobalContext()); return *shared->external_models_loader; } @@ -1436,7 +1473,7 @@ EmbeddedDictionaries & Context::getEmbeddedDictionariesImpl(const bool throw_on_ { auto geo_dictionaries_loader = std::make_unique(); - shared->embedded_dictionaries.emplace( + shared->embedded_dictionaries = std::make_unique( std::move(geo_dictionaries_loader), getGlobalContext(), throw_on_error); @@ -1695,7 +1732,7 @@ BackgroundSchedulePool & Context::getBufferFlushSchedulePool() const { auto lock = getLock(); if (!shared->buffer_flush_schedule_pool) - shared->buffer_flush_schedule_pool.emplace( + shared->buffer_flush_schedule_pool = std::make_unique( settings.background_buffer_flush_schedule_pool_size, CurrentMetrics::BackgroundBufferFlushSchedulePoolTask, "BgBufSchPool"); @@ -1737,7 +1774,7 @@ BackgroundSchedulePool & Context::getSchedulePool() const { auto lock = getLock(); if (!shared->schedule_pool) - shared->schedule_pool.emplace( + shared->schedule_pool = std::make_unique( settings.background_schedule_pool_size, CurrentMetrics::BackgroundSchedulePoolTask, "BgSchPool"); @@ -1748,7 +1785,7 @@ BackgroundSchedulePool & Context::getDistributedSchedulePool() const { auto lock = getLock(); if (!shared->distributed_schedule_pool) - shared->distributed_schedule_pool.emplace( + shared->distributed_schedule_pool = std::make_unique( settings.background_distributed_schedule_pool_size, CurrentMetrics::BackgroundDistributedSchedulePoolTask, "BgDistSchPool"); @@ -1759,7 +1796,7 @@ BackgroundSchedulePool & Context::getMessageBrokerSchedulePool() const { auto lock = getLock(); if (!shared->message_broker_schedule_pool) - shared->message_broker_schedule_pool.emplace( + shared->message_broker_schedule_pool = std::make_unique( settings.background_message_broker_schedule_pool_size, CurrentMetrics::BackgroundMessageBrokerSchedulePoolTask, "BgMBSchPool"); @@ -3141,6 +3178,8 @@ ReadSettings Context::getReadSettings() const res.remote_fs_read_max_backoff_ms = settings.remote_fs_read_max_backoff_ms; res.remote_fs_read_backoff_max_tries = settings.remote_fs_read_backoff_max_tries; + res.remote_fs_enable_cache = settings.remote_fs_enable_cache; + res.remote_fs_cache_max_wait_sec = settings.remote_fs_cache_max_wait_sec; res.remote_read_min_bytes_for_seek = settings.remote_read_min_bytes_for_seek; diff --git a/src/Interpreters/CrashLog.h b/src/Interpreters/CrashLog.h index 61503d95cee..930515c43ea 100644 --- a/src/Interpreters/CrashLog.h +++ b/src/Interpreters/CrashLog.h @@ -41,7 +41,7 @@ class CrashLog : public SystemLog public: static void initialize(std::shared_ptr crash_log_) { - crash_log = std::move(crash_log_); + crash_log = crash_log_; } }; diff --git a/src/Interpreters/DDLTask.cpp b/src/Interpreters/DDLTask.cpp index 64b9bf88ae9..a490d7bed43 100644 --- a/src/Interpreters/DDLTask.cpp +++ b/src/Interpreters/DDLTask.cpp @@ -259,13 +259,17 @@ bool DDLTask::tryFindHostInCluster() * */ is_circular_replicated = true; auto * query_with_table = dynamic_cast(query.get()); - if (!query_with_table || !query_with_table->database) + + /// For other DDLs like CREATE USER, there is no database name and should be executed successfully. + if (query_with_table) { - throw Exception(ErrorCodes::INCONSISTENT_CLUSTER_DEFINITION, - "For a distributed DDL on circular replicated cluster its table name must be qualified by database name."); + if (!query_with_table->database) + throw Exception(ErrorCodes::INCONSISTENT_CLUSTER_DEFINITION, + "For a distributed DDL on circular replicated cluster its table name must be qualified by database name."); + + if (default_database == query_with_table->getDatabase()) + return true; } - if (default_database == query_with_table->getDatabase()) - return true; } } found_exact_match = true; diff --git a/src/Interpreters/DatabaseCatalog.cpp b/src/Interpreters/DatabaseCatalog.cpp index 117119a3ee8..360a5d430e0 100644 --- a/src/Interpreters/DatabaseCatalog.cpp +++ b/src/Interpreters/DatabaseCatalog.cpp @@ -103,13 +103,13 @@ TemporaryTableHolder::TemporaryTableHolder( { } -TemporaryTableHolder::TemporaryTableHolder(TemporaryTableHolder && rhs) +TemporaryTableHolder::TemporaryTableHolder(TemporaryTableHolder && rhs) noexcept : WithContext(rhs.context), temporary_tables(rhs.temporary_tables), id(rhs.id) { rhs.id = UUIDHelpers::Nil; } -TemporaryTableHolder & TemporaryTableHolder::operator = (TemporaryTableHolder && rhs) +TemporaryTableHolder & TemporaryTableHolder::operator=(TemporaryTableHolder && rhs) noexcept { id = rhs.id; rhs.id = UUIDHelpers::Nil; diff --git a/src/Interpreters/DatabaseCatalog.h b/src/Interpreters/DatabaseCatalog.h index a32995658f1..34b42a3397c 100644 --- a/src/Interpreters/DatabaseCatalog.h +++ b/src/Interpreters/DatabaseCatalog.h @@ -98,8 +98,8 @@ struct TemporaryTableHolder : boost::noncopyable, WithContext const ASTPtr & query = {}, bool create_for_global_subquery = false); - TemporaryTableHolder(TemporaryTableHolder && rhs); - TemporaryTableHolder & operator = (TemporaryTableHolder && rhs); + TemporaryTableHolder(TemporaryTableHolder && rhs) noexcept; + TemporaryTableHolder & operator=(TemporaryTableHolder && rhs) noexcept; ~TemporaryTableHolder(); @@ -107,7 +107,7 @@ struct TemporaryTableHolder : boost::noncopyable, WithContext StoragePtr getTable() const; - operator bool () const { return id != UUIDHelpers::Nil; } + operator bool () const { return id != UUIDHelpers::Nil; } /// NOLINT IDatabase * temporary_tables = nullptr; UUID id = UUIDHelpers::Nil; diff --git a/src/Interpreters/ExpressionActions.cpp b/src/Interpreters/ExpressionActions.cpp index 30c832e4917..83f8de78fa6 100644 --- a/src/Interpreters/ExpressionActions.cpp +++ b/src/Interpreters/ExpressionActions.cpp @@ -748,7 +748,7 @@ void ExpressionActions::execute(Block & block, size_t & num_rows, bool dry_run) if (execution_context.columns[pos].column) res.insert(execution_context.columns[pos]); - for (const auto & item : block) + for (auto && item : block) res.insert(std::move(item)); block.swap(res); diff --git a/src/Interpreters/ExpressionAnalyzer.cpp b/src/Interpreters/ExpressionAnalyzer.cpp index 2e464053131..7e150f59694 100644 --- a/src/Interpreters/ExpressionAnalyzer.cpp +++ b/src/Interpreters/ExpressionAnalyzer.cpp @@ -1367,7 +1367,7 @@ bool SelectQueryExpressionAnalyzer::appendLimitBy(ExpressionActionsChain & chain auto child_name = child->getColumnName(); if (!aggregated_names.count(child_name)) - step.addRequiredOutput(std::move(child_name)); + step.addRequiredOutput(child_name); } return true; diff --git a/src/Interpreters/ExternalLoader.cpp b/src/Interpreters/ExternalLoader.cpp index aab3a9e7437..a75cdce820c 100644 --- a/src/Interpreters/ExternalLoader.cpp +++ b/src/Interpreters/ExternalLoader.cpp @@ -56,7 +56,7 @@ namespace static_assert(std::is_same_v); ExternalLoader::Loadables objects; objects.reserve(results.size()); - for (const auto & result : results) + for (auto && result : results) { if (auto object = std::move(result.object)) objects.push_back(std::move(object)); diff --git a/src/Interpreters/ExternalUserDefinedExecutableFunctionsLoader.cpp b/src/Interpreters/ExternalUserDefinedExecutableFunctionsLoader.cpp index 31b4d4a621c..e3d40033cff 100644 --- a/src/Interpreters/ExternalUserDefinedExecutableFunctionsLoader.cpp +++ b/src/Interpreters/ExternalUserDefinedExecutableFunctionsLoader.cpp @@ -139,7 +139,7 @@ ExternalLoader::LoadablePtr ExternalUserDefinedExecutableFunctionsLoader::create UserDefinedExecutableFunctionConfiguration function_configuration { - .name = std::move(name), + .name = name, .command = std::move(command_value), .command_arguments = std::move(command_arguments), .arguments = std::move(arguments), diff --git a/src/Interpreters/GetAggregatesVisitor.h b/src/Interpreters/GetAggregatesVisitor.h index 0eeab8348fd..3966653235a 100644 --- a/src/Interpreters/GetAggregatesVisitor.h +++ b/src/Interpreters/GetAggregatesVisitor.h @@ -2,6 +2,9 @@ #include #include +#include +#include +#include namespace DB { diff --git a/src/Interpreters/IInterpreterUnionOrSelectQuery.h b/src/Interpreters/IInterpreterUnionOrSelectQuery.h index 1f59dd36354..7906ab189fc 100644 --- a/src/Interpreters/IInterpreterUnionOrSelectQuery.h +++ b/src/Interpreters/IInterpreterUnionOrSelectQuery.h @@ -47,7 +47,7 @@ public: /// then we can cache the scalars forever (for any query that doesn't use the virtual storage either), but if it does use the virtual /// storage then we can only keep the scalar result around while we are working with that source block /// You can find more details about this under ExecuteScalarSubqueriesMatcher::visit - bool usesViewSource() { return uses_view_source; } + bool usesViewSource() const { return uses_view_source; } protected: ASTPtr query_ptr; diff --git a/src/Interpreters/IJoin.h b/src/Interpreters/IJoin.h index ba8367b57e3..2a3171adccd 100644 --- a/src/Interpreters/IJoin.h +++ b/src/Interpreters/IJoin.h @@ -25,7 +25,7 @@ public: /// Add block of data from right hand of JOIN. /// @returns false, if some limit was exceeded and you should not insert more data. - virtual bool addJoinedBlock(const Block & block, bool check_limits = true) = 0; + virtual bool addJoinedBlock(const Block & block, bool check_limits = true) = 0; /// NOLINT virtual void checkTypesOfKeys(const Block & block) const = 0; diff --git a/src/Interpreters/InJoinSubqueriesPreprocessor.h b/src/Interpreters/InJoinSubqueriesPreprocessor.h index d2166185d2b..e4ec3c81ed2 100644 --- a/src/Interpreters/InJoinSubqueriesPreprocessor.h +++ b/src/Interpreters/InJoinSubqueriesPreprocessor.h @@ -43,7 +43,7 @@ public: /// These methods could be overridden for the need of the unit test. virtual bool hasAtLeastTwoShards(const IStorage & table) const; virtual std::pair getRemoteDatabaseAndTableName(const IStorage & table) const; - virtual ~CheckShardsAndTables() {} + virtual ~CheckShardsAndTables() = default; }; InJoinSubqueriesPreprocessor( diff --git a/src/Interpreters/InterpreterCreateQuery.cpp b/src/Interpreters/InterpreterCreateQuery.cpp index 16d6e2d0652..ed996430996 100644 --- a/src/Interpreters/InterpreterCreateQuery.cpp +++ b/src/Interpreters/InterpreterCreateQuery.cpp @@ -1062,7 +1062,7 @@ BlockIO InterpreterCreateQuery::createTable(ASTCreateQuery & create) QualifiedTableName qualified_name{database_name, create.getTable()}; TableNamesSet loading_dependencies = getDependenciesSetFromCreateQuery(getContext()->getGlobalContext(), qualified_name, query_ptr); if (!loading_dependencies.empty()) - DatabaseCatalog::instance().addLoadingDependencies(std::move(qualified_name), std::move(loading_dependencies)); + DatabaseCatalog::instance().addLoadingDependencies(qualified_name, std::move(loading_dependencies)); return fillTableIfNeeded(create); } @@ -1190,6 +1190,9 @@ bool InterpreterCreateQuery::doCreateTable(ASTCreateQuery & create, properties.columns, properties.constraints, false); + + /// If schema wes inferred while storage creation, add columns description to create query. + addColumnsDescriptionToCreateQueryIfNecessary(query_ptr->as(), res); } if (from_path && !res->storesDataOnDisk()) @@ -1489,4 +1492,26 @@ void InterpreterCreateQuery::extendQueryLogElemImpl(QueryLogElement & elem, cons } } +void InterpreterCreateQuery::addColumnsDescriptionToCreateQueryIfNecessary(ASTCreateQuery & create, const StoragePtr & storage) +{ + if (create.is_dictionary || (create.columns_list && create.columns_list->columns && !create.columns_list->columns->children.empty())) + return; + + auto ast_storage = std::make_shared(); + auto query_from_storage = DB::getCreateQueryFromStorage(storage, ast_storage, false, + getContext()->getSettingsRef().max_parser_depth, true); + auto & create_query_from_storage = query_from_storage->as(); + + if (!create.columns_list) + { + ASTPtr columns_list = std::make_shared(*create_query_from_storage.columns_list); + create.set(create.columns_list, columns_list); + } + else + { + ASTPtr columns = std::make_shared(*create_query_from_storage.columns_list->columns); + create.columns_list->set(create.columns_list->columns, columns); + } +} + } diff --git a/src/Interpreters/InterpreterCreateQuery.h b/src/Interpreters/InterpreterCreateQuery.h index 5804d817fe2..b6c8e10668a 100644 --- a/src/Interpreters/InterpreterCreateQuery.h +++ b/src/Interpreters/InterpreterCreateQuery.h @@ -96,6 +96,10 @@ private: void assertOrSetUUID(ASTCreateQuery & create, const DatabasePtr & database) const; + /// Update create query with columns description from storage if query doesn't have it. + /// It's used to prevent automatic schema inference while table creation on each server startup. + void addColumnsDescriptionToCreateQueryIfNecessary(ASTCreateQuery & create, const StoragePtr & storage); + ASTPtr query_ptr; /// Skip safety threshold when loading tables. diff --git a/src/Interpreters/InterpreterExplainQuery.cpp b/src/Interpreters/InterpreterExplainQuery.cpp index 37b944d72d6..edca48d3600 100644 --- a/src/Interpreters/InterpreterExplainQuery.cpp +++ b/src/Interpreters/InterpreterExplainQuery.cpp @@ -141,6 +141,18 @@ namespace /// Settings. Different for each explain type. +struct QueryASTSettings +{ + bool graph = false; + + constexpr static char name[] = "AST"; + + std::unordered_map> boolean_settings = + { + {"graph", graph}, + }; +}; + struct QueryPlanSettings { QueryPlan::ExplainPlanOptions query_plan_options; @@ -260,10 +272,11 @@ QueryPipeline InterpreterExplainQuery::executeImpl() { case ASTExplainQuery::ParsedAST: { - if (ast.getSettings()) - throw Exception("Settings are not supported for EXPLAIN AST query.", ErrorCodes::UNKNOWN_SETTING); - - dumpAST(*ast.getExplainedQuery(), buf); + auto settings = checkAndGetSettings(ast.getSettings()); + if (settings.graph) + dumpASTInDotFormat(*ast.getExplainedQuery(), buf); + else + dumpAST(*ast.getExplainedQuery(), buf); break; } case ASTExplainQuery::AnalyzedSyntax: diff --git a/src/Interpreters/InterpreterInsertQuery.cpp b/src/Interpreters/InterpreterInsertQuery.cpp index f43d108de9e..df44814a96e 100644 --- a/src/Interpreters/InterpreterInsertQuery.cpp +++ b/src/Interpreters/InterpreterInsertQuery.cpp @@ -60,6 +60,18 @@ StoragePtr InterpreterInsertQuery::getTable(ASTInsertQuery & query) { const auto & factory = TableFunctionFactory::instance(); TableFunctionPtr table_function_ptr = factory.get(query.table_function, getContext()); + + /// If table function needs structure hint from select query + /// we can create a temporary pipeline and get the header. + if (query.select && table_function_ptr->needStructureHint()) + { + InterpreterSelectWithUnionQuery interpreter_select{ + query.select, getContext(), SelectQueryOptions(QueryProcessingStage::Complete, 1)}; + QueryPipelineBuilder tmp_pipeline = interpreter_select.buildQueryPipeline(); + ColumnsDescription structure_hint{tmp_pipeline.getHeader().getNamesAndTypesList()}; + table_function_ptr->setStructureHint(structure_hint); + } + return table_function_ptr->execute(query.table_function, getContext(), table_function_ptr->getName()); } @@ -185,7 +197,7 @@ Chain InterpreterInsertQuery::buildChain( std::atomic_uint64_t * elapsed_counter_ms) { auto sample = getSampleBlock(columns, table, metadata_snapshot); - return buildChainImpl(table, metadata_snapshot, std::move(sample) , thread_status, elapsed_counter_ms); + return buildChainImpl(table, metadata_snapshot, sample, thread_status, elapsed_counter_ms); } Chain InterpreterInsertQuery::buildChainImpl( @@ -283,6 +295,9 @@ BlockIO InterpreterInsertQuery::execute() auto metadata_snapshot = table->getInMemoryMetadataPtr(); auto query_sample_block = getSampleBlock(query, table, metadata_snapshot); + + /// For table functions we check access while executing + /// getTable() -> ITableFunction::execute(). if (!query.table_function) getContext()->checkAccess(AccessType::INSERT, query.table_id, query_sample_block.getNames()); diff --git a/src/Interpreters/InterpreterKillQueryQuery.cpp b/src/Interpreters/InterpreterKillQueryQuery.cpp index 2b949266c17..5ec6abb08a7 100644 --- a/src/Interpreters/InterpreterKillQueryQuery.cpp +++ b/src/Interpreters/InterpreterKillQueryQuery.cpp @@ -133,7 +133,7 @@ public: , process_list(process_list_) , processes_to_stop(std::move(processes_to_stop_)) , processes_block(std::move(processes_block_)) - , res_sample_block(std::move(res_sample_block_)) + , res_sample_block(res_sample_block_) { addTotalRowsApprox(processes_to_stop.size()); } diff --git a/src/Interpreters/InterpreterSelectQuery.cpp b/src/Interpreters/InterpreterSelectQuery.cpp index 5c096a7dbac..f2fc17fbf9a 100644 --- a/src/Interpreters/InterpreterSelectQuery.cpp +++ b/src/Interpreters/InterpreterSelectQuery.cpp @@ -549,7 +549,7 @@ InterpreterSelectQuery::InterpreterSelectQuery( /// Reuse already built sets for multiple passes of analysis subquery_for_sets = std::move(query_analyzer->getSubqueriesForSets()); - prepared_sets = query_info.sets.empty() ? std::move(query_analyzer->getPreparedSets()) : std::move(query_info.sets); + prepared_sets = query_info.sets.empty() ? query_analyzer->getPreparedSets() : query_info.sets; /// Do not try move conditions to PREWHERE for the second time. /// Otherwise, we won't be able to fallback from inefficient PREWHERE to WHERE later. @@ -623,8 +623,6 @@ BlockIO InterpreterSelectQuery::execute() Block InterpreterSelectQuery::getSampleBlockImpl() { - OpenTelemetrySpanHolder span(__PRETTY_FUNCTION__); - query_info.query = query_ptr; query_info.has_window = query_analyzer->hasWindow(); if (storage && !options.only_analyze) @@ -1904,20 +1902,16 @@ void InterpreterSelectQuery::executeFetchColumns(QueryProcessingStage::Enum proc else if (interpreter_subquery) { /// Subquery. - /// If we need less number of columns that subquery have - update the interpreter. - if (required_columns.size() < source_header.columns()) - { - ASTPtr subquery = extractTableExpression(query, 0); - if (!subquery) - throw Exception("Subquery expected", ErrorCodes::LOGICAL_ERROR); + ASTPtr subquery = extractTableExpression(query, 0); + if (!subquery) + throw Exception("Subquery expected", ErrorCodes::LOGICAL_ERROR); - interpreter_subquery = std::make_unique( - subquery, getSubqueryContext(context), - options.copy().subquery().noModify(), required_columns); + interpreter_subquery = std::make_unique( + subquery, getSubqueryContext(context), + options.copy().subquery().noModify(), required_columns); - if (query_analyzer->hasAggregation()) - interpreter_subquery->ignoreWithTotals(); - } + if (query_analyzer->hasAggregation()) + interpreter_subquery->ignoreWithTotals(); interpreter_subquery->buildQueryPlan(query_plan); query_plan.addInterpreterContext(context); diff --git a/src/Interpreters/InterpreterSelectWithUnionQuery.cpp b/src/Interpreters/InterpreterSelectWithUnionQuery.cpp index 723db59f04b..130b3aae58d 100644 --- a/src/Interpreters/InterpreterSelectWithUnionQuery.cpp +++ b/src/Interpreters/InterpreterSelectWithUnionQuery.cpp @@ -208,8 +208,10 @@ Block InterpreterSelectWithUnionQuery::getCurrentChildResultHeader(const ASTPtr if (ast_ptr_->as()) return InterpreterSelectWithUnionQuery(ast_ptr_, context, options.copy().analyze().noModify(), required_result_column_names) .getSampleBlock(); - else + else if (ast_ptr_->as()) return InterpreterSelectQuery(ast_ptr_, context, options.copy().analyze().noModify()).getSampleBlock(); + else + return InterpreterSelectIntersectExceptQuery(ast_ptr_, context, options.copy().analyze().noModify()).getSampleBlock(); } std::unique_ptr diff --git a/src/Interpreters/JoinToSubqueryTransformVisitor.cpp b/src/Interpreters/JoinToSubqueryTransformVisitor.cpp index ed20b1b2048..d6a00ba89b4 100644 --- a/src/Interpreters/JoinToSubqueryTransformVisitor.cpp +++ b/src/Interpreters/JoinToSubqueryTransformVisitor.cpp @@ -576,7 +576,7 @@ std::shared_ptr subqueryExpressionList( needed_columns[table_pos].fillExpressionList(*expression_list); for (const auto & expr : alias_pushdown[table_pos]) - expression_list->children.emplace_back(std::move(expr)); + expression_list->children.emplace_back(expr); return expression_list; } diff --git a/src/Interpreters/JoinedTables.cpp b/src/Interpreters/JoinedTables.cpp index 3aae3982758..482a813bfef 100644 --- a/src/Interpreters/JoinedTables.cpp +++ b/src/Interpreters/JoinedTables.cpp @@ -183,7 +183,9 @@ std::unique_ptr JoinedTables::makeLeftTableSubq { if (!isLeftTableSubquery()) return {}; - return std::make_unique(left_table_expression, context, select_options); + + /// Only build dry_run interpreter during analysis. We will reconstruct the subquery interpreter during plan building. + return std::make_unique(left_table_expression, context, select_options.copy().analyze()); } StoragePtr JoinedTables::getLeftTableStorage() diff --git a/src/Interpreters/LogicalExpressionsOptimizer.h b/src/Interpreters/LogicalExpressionsOptimizer.h index 1a04b199a13..4991d31f8b1 100644 --- a/src/Interpreters/LogicalExpressionsOptimizer.h +++ b/src/Interpreters/LogicalExpressionsOptimizer.h @@ -29,7 +29,7 @@ class LogicalExpressionsOptimizer final { const UInt64 optimize_min_equality_disjunction_chain_length; - ExtractedSettings(UInt64 optimize_min_equality_disjunction_chain_length_) + explicit ExtractedSettings(UInt64 optimize_min_equality_disjunction_chain_length_) : optimize_min_equality_disjunction_chain_length(optimize_min_equality_disjunction_chain_length_) {} }; @@ -68,7 +68,6 @@ private: using DisjunctiveEqualityChainsMap = std::map; using DisjunctiveEqualityChain = DisjunctiveEqualityChainsMap::value_type; -private: /** Collect information about all the equations in the OR chains (not necessarily homogeneous). * This information is grouped by the expression that is on the left side of the equation. */ @@ -92,12 +91,10 @@ private: /// Restore the original column order after optimization. void reorderColumns(); -private: using ParentNodes = std::vector; using FunctionParentMap = std::unordered_map; using ColumnToPosition = std::unordered_map; -private: ASTSelectQuery * select_query; const ExtractedSettings settings; /// Information about the OR-chains inside the query. diff --git a/src/Interpreters/MySQL/InterpretersMySQLDDLQuery.cpp b/src/Interpreters/MySQL/InterpretersMySQLDDLQuery.cpp index df74a94ee57..515ef6c3058 100644 --- a/src/Interpreters/MySQL/InterpretersMySQLDDLQuery.cpp +++ b/src/Interpreters/MySQL/InterpretersMySQLDDLQuery.cpp @@ -130,6 +130,9 @@ static NamesAndTypesList getColumnsList(const ASTExpressionList * columns_defini child = new_child; } } + + if (type_name_upper == "DATE") + data_type_function->name = "Date32"; } if (is_nullable) data_type = makeASTFunction("Nullable", data_type); @@ -335,7 +338,7 @@ static ASTPtr getPartitionPolicy(const NamesAndTypesList & primary_keys) if (which.isNullable()) throw Exception("LOGICAL ERROR: MySQL primary key must be not null, it is a bug.", ErrorCodes::LOGICAL_ERROR); - if (which.isDate() || which.isDateTime() || which.isDateTime64()) + if (which.isDate() || which.isDate32() || which.isDateTime() || which.isDateTime64()) { /// In any case, date or datetime is always the best partitioning key return makeASTFunction("toYYYYMM", std::make_shared(primary_key.name)); diff --git a/src/Interpreters/MySQL/tests/gtest_create_rewritten.cpp b/src/Interpreters/MySQL/tests/gtest_create_rewritten.cpp index 680b9bd5606..71578bd5db7 100644 --- a/src/Interpreters/MySQL/tests/gtest_create_rewritten.cpp +++ b/src/Interpreters/MySQL/tests/gtest_create_rewritten.cpp @@ -39,7 +39,7 @@ TEST(MySQLCreateRewritten, ColumnsDataType) { {"TINYINT", "Int8"}, {"SMALLINT", "Int16"}, {"MEDIUMINT", "Int32"}, {"INT", "Int32"}, {"INTEGER", "Int32"}, {"BIGINT", "Int64"}, {"FLOAT", "Float32"}, {"DOUBLE", "Float64"}, - {"VARCHAR(10)", "String"}, {"CHAR(10)", "String"}, {"Date", "Date"}, {"DateTime", "DateTime"}, + {"VARCHAR(10)", "String"}, {"CHAR(10)", "String"}, {"Date", "Date32"}, {"DateTime", "DateTime"}, {"TIMESTAMP", "DateTime"}, {"BOOLEAN", "Bool"}, {"BIT", "UInt64"}, {"SET", "UInt64"}, {"YEAR", "UInt16"}, {"TIME", "Int64"}, {"GEOMETRY", "String"} }; @@ -104,7 +104,7 @@ TEST(MySQLCreateRewritten, PartitionPolicy) {"MEDIUMINT", "Int32", " PARTITION BY intDiv(key, 4294967)"}, {"INT", "Int32", " PARTITION BY intDiv(key, 4294967)"}, {"INTEGER", "Int32", " PARTITION BY intDiv(key, 4294967)"}, {"BIGINT", "Int64", " PARTITION BY intDiv(key, 18446744073709551)"}, {"FLOAT", "Float32", ""}, {"DOUBLE", "Float64", ""}, {"VARCHAR(10)", "String", ""}, {"CHAR(10)", "String", ""}, - {"Date", "Date", " PARTITION BY toYYYYMM(key)"}, {"DateTime", "DateTime", " PARTITION BY toYYYYMM(key)"}, + {"Date", "Date32", " PARTITION BY toYYYYMM(key)"}, {"DateTime", "DateTime", " PARTITION BY toYYYYMM(key)"}, {"TIMESTAMP", "DateTime", " PARTITION BY toYYYYMM(key)"}, {"BOOLEAN", "Bool", " PARTITION BY key"} }; @@ -135,7 +135,7 @@ TEST(MySQLCreateRewritten, OrderbyPolicy) {"MEDIUMINT", "Int32", " PARTITION BY intDiv(key, 4294967)"}, {"INT", "Int32", " PARTITION BY intDiv(key, 4294967)"}, {"INTEGER", "Int32", " PARTITION BY intDiv(key, 4294967)"}, {"BIGINT", "Int64", " PARTITION BY intDiv(key, 18446744073709551)"}, {"FLOAT", "Float32", ""}, {"DOUBLE", "Float64", ""}, {"VARCHAR(10)", "String", ""}, {"CHAR(10)", "String", ""}, - {"Date", "Date", " PARTITION BY toYYYYMM(key)"}, {"DateTime", "DateTime", " PARTITION BY toYYYYMM(key)"}, + {"Date", "Date32", " PARTITION BY toYYYYMM(key)"}, {"DateTime", "DateTime", " PARTITION BY toYYYYMM(key)"}, {"TIMESTAMP", "DateTime", " PARTITION BY toYYYYMM(key)"}, {"BOOLEAN", "Bool", " PARTITION BY key"} }; diff --git a/src/Interpreters/OpenTelemetrySpanLog.cpp b/src/Interpreters/OpenTelemetrySpanLog.cpp index 40f31e4976c..36ffd617cd6 100644 --- a/src/Interpreters/OpenTelemetrySpanLog.cpp +++ b/src/Interpreters/OpenTelemetrySpanLog.cpp @@ -150,6 +150,42 @@ OpenTelemetrySpanHolder::~OpenTelemetrySpanHolder() } } +void OpenTelemetrySpanHolder::addAttribute(const std::string& name, UInt64 value) +{ + if (trace_id == UUID()) + return; + + this->attribute_names.push_back(name); + this->attribute_values.push_back(std::to_string(value)); +} + +void OpenTelemetrySpanHolder::addAttribute(const std::string& name, const std::string& value) +{ + if (trace_id == UUID()) + return; + + this->attribute_names.push_back(name); + this->attribute_values.push_back(value); +} + +void OpenTelemetrySpanHolder::addAttribute(const Exception & e) +{ + if (trace_id == UUID()) + return; + + this->attribute_names.push_back("clickhouse.exception"); + this->attribute_values.push_back(getExceptionMessage(e, false)); +} + +void OpenTelemetrySpanHolder::addAttribute(std::exception_ptr e) +{ + if (trace_id == UUID() || e == nullptr) + return; + + this->attribute_names.push_back("clickhouse.exception"); + this->attribute_values.push_back(getExceptionMessage(e, false)); +} + bool OpenTelemetryTraceContext::parseTraceparentHeader(const std::string & traceparent, std::string & error) { diff --git a/src/Interpreters/OpenTelemetrySpanLog.h b/src/Interpreters/OpenTelemetrySpanLog.h index 8dfc2eccc00..aa99a9f8e4b 100644 --- a/src/Interpreters/OpenTelemetrySpanLog.h +++ b/src/Interpreters/OpenTelemetrySpanLog.h @@ -25,7 +25,7 @@ struct OpenTelemetrySpan struct OpenTelemetrySpanLogElement : public OpenTelemetrySpan { OpenTelemetrySpanLogElement() = default; - OpenTelemetrySpanLogElement(const OpenTelemetrySpan & span) + explicit OpenTelemetrySpanLogElement(const OpenTelemetrySpan & span) : OpenTelemetrySpan(span) {} static std::string name() { return "OpenTelemetrySpanLog"; } @@ -44,7 +44,12 @@ public: struct OpenTelemetrySpanHolder : public OpenTelemetrySpan { - OpenTelemetrySpanHolder(const std::string & _operation_name); + explicit OpenTelemetrySpanHolder(const std::string & _operation_name); + void addAttribute(const std::string& name, UInt64 value); + void addAttribute(const std::string& name, const std::string& value); + void addAttribute(const Exception & e); + void addAttribute(std::exception_ptr e); + ~OpenTelemetrySpanHolder(); }; diff --git a/src/Interpreters/OptimizeIfWithConstantConditionVisitor.h b/src/Interpreters/OptimizeIfWithConstantConditionVisitor.h index 05d0330196b..ad98f92bafd 100644 --- a/src/Interpreters/OptimizeIfWithConstantConditionVisitor.h +++ b/src/Interpreters/OptimizeIfWithConstantConditionVisitor.h @@ -10,7 +10,7 @@ namespace DB class OptimizeIfWithConstantConditionVisitor { public: - OptimizeIfWithConstantConditionVisitor(Aliases & aliases_) + explicit OptimizeIfWithConstantConditionVisitor(Aliases & aliases_) : aliases(aliases_) {} diff --git a/src/Interpreters/OptimizeShardingKeyRewriteInVisitor.cpp b/src/Interpreters/OptimizeShardingKeyRewriteInVisitor.cpp index ecfda4cd0c1..991b449196d 100644 --- a/src/Interpreters/OptimizeShardingKeyRewriteInVisitor.cpp +++ b/src/Interpreters/OptimizeShardingKeyRewriteInVisitor.cpp @@ -55,7 +55,12 @@ bool shardContains( data.sharding_key_column_name); /// The value from IN can be non-numeric, /// but in this case it should be convertible to numeric type, let's try. - sharding_value = convertFieldToType(sharding_value, DataTypeUInt64()); + /// + /// NOTE: that conversion should not be done for signed types, + /// since it uses accurate cast, that will return Null, + /// but we need static_cast<> (as createBlockSelector()). + if (!isInt64OrUInt64FieldType(sharding_value.getType())) + sharding_value = convertFieldToType(sharding_value, DataTypeUInt64()); /// In case of conversion is not possible (NULL), shard cannot contain the value anyway. if (sharding_value.isNull()) return false; diff --git a/src/Interpreters/PartLog.cpp b/src/Interpreters/PartLog.cpp index f89f836871a..4947b50513c 100644 --- a/src/Interpreters/PartLog.cpp +++ b/src/Interpreters/PartLog.cpp @@ -46,6 +46,7 @@ NamesAndTypesList PartLogElement::getNamesAndTypes() {"table", std::make_shared()}, {"part_name", std::make_shared()}, {"partition_id", std::make_shared()}, + {"disk_name", std::make_shared()}, {"path_on_disk", std::make_shared()}, {"rows", std::make_shared()}, @@ -79,6 +80,7 @@ void PartLogElement::appendToBlock(MutableColumns & columns) const columns[i++]->insert(table_name); columns[i++]->insert(part_name); columns[i++]->insert(partition_id); + columns[i++]->insert(disk_name); columns[i++]->insert(path_on_disk); columns[i++]->insert(rows); @@ -155,6 +157,7 @@ bool PartLog::addNewParts( elem.table_name = table_id.table_name; elem.partition_id = part->info.partition_id; elem.part_name = part->name; + elem.disk_name = part->volume->getDisk()->getName(); elem.path_on_disk = part->getFullPath(); elem.bytes_compressed_on_disk = part->getBytesOnDisk(); diff --git a/src/Interpreters/PartLog.h b/src/Interpreters/PartLog.h index bdd1db4334a..5f502edb339 100644 --- a/src/Interpreters/PartLog.h +++ b/src/Interpreters/PartLog.h @@ -32,6 +32,7 @@ struct PartLogElement String table_name; String part_name; String partition_id; + String disk_name; String path_on_disk; /// Size of the part diff --git a/src/Interpreters/ProcessList.h b/src/Interpreters/ProcessList.h index 493b2ba81a9..c90c271679c 100644 --- a/src/Interpreters/ProcessList.h +++ b/src/Interpreters/ProcessList.h @@ -216,7 +216,7 @@ struct ProcessListForUserInfo /// Data about queries for one user. struct ProcessListForUser { - ProcessListForUser(ProcessList * global_process_list); + explicit ProcessListForUser(ProcessList * global_process_list); /// query_id -> ProcessListElement(s). There can be multiple queries with the same query_id as long as all queries except one are cancelled. using QueryToElement = std::unordered_map; diff --git a/src/Interpreters/ProfileEventsExt.cpp b/src/Interpreters/ProfileEventsExt.cpp index 472efc109fb..ea87d565854 100644 --- a/src/Interpreters/ProfileEventsExt.cpp +++ b/src/Interpreters/ProfileEventsExt.cpp @@ -1,5 +1,7 @@ #include "ProfileEventsExt.h" #include +#include +#include #include #include #include @@ -36,7 +38,7 @@ void dumpToMapColumn(const Counters::Snapshot & counters, DB::IColumn * column, if (nonzero_only && 0 == value) continue; - const char * desc = ProfileEvents::getName(event); + const char * desc = getName(event); key_column.insertData(desc, strlen(desc)); value_column.insert(value); size++; @@ -45,4 +47,133 @@ void dumpToMapColumn(const Counters::Snapshot & counters, DB::IColumn * column, offsets.push_back(offsets.back() + size); } +/// Add records about provided non-zero ProfileEvents::Counters. +static void dumpProfileEvents(ProfileEventsSnapshot const & snapshot, DB::MutableColumns & columns, String const & host_name) +{ + size_t rows = 0; + auto & name_column = columns[NAME_COLUMN_INDEX]; + auto & value_column = columns[VALUE_COLUMN_INDEX]; + for (Event event = 0; event < Counters::num_counters; ++event) + { + Int64 value = snapshot.counters[event]; + + if (value == 0) + continue; + + const char * desc = getName(event); + name_column->insertData(desc, strlen(desc)); + value_column->insert(value); + rows++; + } + + // Fill the rest of the columns with data + for (size_t row = 0; row < rows; ++row) + { + size_t i = 0; + columns[i++]->insertData(host_name.data(), host_name.size()); + columns[i++]->insert(UInt64(snapshot.current_time)); + columns[i++]->insert(UInt64{snapshot.thread_id}); + columns[i++]->insert(Type::INCREMENT); + } +} + +static void dumpMemoryTracker(ProfileEventsSnapshot const & snapshot, DB::MutableColumns & columns, String const & host_name) +{ + size_t i = 0; + columns[i++]->insertData(host_name.data(), host_name.size()); + columns[i++]->insert(UInt64(snapshot.current_time)); + columns[i++]->insert(UInt64{snapshot.thread_id}); + columns[i++]->insert(Type::GAUGE); + + columns[i++]->insertData(MemoryTracker::USAGE_EVENT_NAME, strlen(MemoryTracker::USAGE_EVENT_NAME)); + columns[i++]->insert(snapshot.memory_usage); +} + +void getProfileEvents( + const String & server_display_name, + DB::InternalProfileEventsQueuePtr profile_queue, + DB::Block & block, + ThreadIdToCountersSnapshot & last_sent_snapshots) +{ + using namespace DB; + static const NamesAndTypesList column_names_and_types = { + {"host_name", std::make_shared()}, + {"current_time", std::make_shared()}, + {"thread_id", std::make_shared()}, + {"type", TypeEnum}, + {"name", std::make_shared()}, + {"value", std::make_shared()}, + }; + + ColumnsWithTypeAndName temp_columns; + for (auto const & name_and_type : column_names_and_types) + temp_columns.emplace_back(name_and_type.type, name_and_type.name); + + block = std::move(temp_columns); + MutableColumns columns = block.mutateColumns(); + auto thread_group = CurrentThread::getGroup(); + auto const current_thread_id = CurrentThread::get().thread_id; + std::vector snapshots; + ThreadIdToCountersSnapshot new_snapshots; + ProfileEventsSnapshot group_snapshot; + { + auto stats = thread_group->getProfileEventsCountersAndMemoryForThreads(); + snapshots.reserve(stats.size()); + + for (auto & stat : stats) + { + auto const thread_id = stat.thread_id; + if (thread_id == current_thread_id) + continue; + auto current_time = time(nullptr); + auto previous_snapshot = last_sent_snapshots.find(thread_id); + auto increment = + previous_snapshot != last_sent_snapshots.end() + ? CountersIncrement(stat.counters, previous_snapshot->second) + : CountersIncrement(stat.counters); + snapshots.push_back(ProfileEventsSnapshot{ + thread_id, + std::move(increment), + stat.memory_usage, + current_time + }); + new_snapshots[thread_id] = std::move(stat.counters); + } + + group_snapshot.thread_id = 0; + group_snapshot.current_time = time(nullptr); + group_snapshot.memory_usage = thread_group->memory_tracker.get(); + auto group_counters = thread_group->performance_counters.getPartiallyAtomicSnapshot(); + auto prev_group_snapshot = last_sent_snapshots.find(0); + group_snapshot.counters = + prev_group_snapshot != last_sent_snapshots.end() + ? CountersIncrement(group_counters, prev_group_snapshot->second) + : CountersIncrement(group_counters); + new_snapshots[0] = std::move(group_counters); + } + last_sent_snapshots = std::move(new_snapshots); + + for (auto & snapshot : snapshots) + { + dumpProfileEvents(snapshot, columns, server_display_name); + dumpMemoryTracker(snapshot, columns, server_display_name); + } + dumpProfileEvents(group_snapshot, columns, server_display_name); + dumpMemoryTracker(group_snapshot, columns, server_display_name); + + Block curr_block; + size_t rows = 0; + + for (; profile_queue->tryPop(curr_block); ++rows) + { + auto curr_columns = curr_block.getColumns(); + for (size_t j = 0; j < curr_columns.size(); ++j) + columns[j]->insertRangeFrom(*curr_columns[j], 0, curr_columns[j]->size()); + } + + bool empty = columns[0]->empty(); + if (!empty) + block.setColumns(std::move(columns)); +} + } diff --git a/src/Interpreters/ProfileEventsExt.h b/src/Interpreters/ProfileEventsExt.h index 8a92eadec79..7d9fc512d15 100644 --- a/src/Interpreters/ProfileEventsExt.h +++ b/src/Interpreters/ProfileEventsExt.h @@ -1,5 +1,6 @@ #pragma once #include +#include #include #include @@ -7,9 +8,28 @@ namespace ProfileEvents { +constexpr size_t NAME_COLUMN_INDEX = 4; +constexpr size_t VALUE_COLUMN_INDEX = 5; + +struct ProfileEventsSnapshot +{ + UInt64 thread_id; + CountersIncrement counters; + Int64 memory_usage; + time_t current_time; +}; + +using ThreadIdToCountersSnapshot = std::unordered_map; + /// Dumps profile events to columns Map(String, UInt64) void dumpToMapColumn(const Counters::Snapshot & counters, DB::IColumn * column, bool nonzero_only = true); +void getProfileEvents( + const String & server_display_name, + DB::InternalProfileEventsQueuePtr profile_queue, + DB::Block & block, + ThreadIdToCountersSnapshot & last_sent_snapshots); + /// This is for ProfileEvents packets. enum Type : int8_t { diff --git a/src/Interpreters/QueryLog.cpp b/src/Interpreters/QueryLog.cpp index 2cbb9634446..b464d9c1ca5 100644 --- a/src/Interpreters/QueryLog.cpp +++ b/src/Interpreters/QueryLog.cpp @@ -98,6 +98,7 @@ NamesAndTypesList QueryLogElement::getNamesAndTypes() {"http_referer", std::make_shared()}, {"forwarded_for", std::make_shared()}, {"quota_key", std::make_shared()}, + {"distributed_depth", std::make_shared()}, {"revision", std::make_shared()}, @@ -289,5 +290,6 @@ void QueryLogElement::appendClientInfo(const ClientInfo & client_info, MutableCo columns[i++]->insert(client_info.forwarded_for); columns[i++]->insert(client_info.quota_key); + columns[i++]->insert(client_info.distributed_depth); } } diff --git a/src/Interpreters/QueryNormalizer.h b/src/Interpreters/QueryNormalizer.h index eebcff62cde..f532d869789 100644 --- a/src/Interpreters/QueryNormalizer.h +++ b/src/Interpreters/QueryNormalizer.h @@ -25,7 +25,7 @@ class QueryNormalizer bool prefer_column_name_to_alias; template - ExtractedSettings(const T & settings) + ExtractedSettings(const T & settings) /// NOLINT : max_ast_depth(settings.max_ast_depth) , max_expanded_ast_elements(settings.max_expanded_ast_elements) , prefer_column_name_to_alias(settings.prefer_column_name_to_alias) diff --git a/src/Interpreters/QueryThreadLog.cpp b/src/Interpreters/QueryThreadLog.cpp index 7ca3c10045e..d9feaf0a0c3 100644 --- a/src/Interpreters/QueryThreadLog.cpp +++ b/src/Interpreters/QueryThreadLog.cpp @@ -68,6 +68,7 @@ NamesAndTypesList QueryThreadLogElement::getNamesAndTypes() {"http_referer", std::make_shared()}, {"forwarded_for", std::make_shared()}, {"quota_key", std::make_shared()}, + {"distributed_depth", std::make_shared()}, {"revision", std::make_shared()}, diff --git a/src/Interpreters/RedundantFunctionsInOrderByVisitor.h b/src/Interpreters/RedundantFunctionsInOrderByVisitor.h index 09362ea6be2..60c9fcf2a24 100644 --- a/src/Interpreters/RedundantFunctionsInOrderByVisitor.h +++ b/src/Interpreters/RedundantFunctionsInOrderByVisitor.h @@ -4,6 +4,8 @@ #include #include #include +#include +#include namespace DB { diff --git a/src/Interpreters/ReplaceQueryParameterVisitor.h b/src/Interpreters/ReplaceQueryParameterVisitor.h index cb3d0f668d8..dd785cd768e 100644 --- a/src/Interpreters/ReplaceQueryParameterVisitor.h +++ b/src/Interpreters/ReplaceQueryParameterVisitor.h @@ -1,6 +1,7 @@ #pragma once #include +#include #include namespace DB diff --git a/src/Interpreters/RewriteFunctionToSubcolumnVisitor.cpp b/src/Interpreters/RewriteFunctionToSubcolumnVisitor.cpp index 842f61cbdd2..a34c81d52e2 100644 --- a/src/Interpreters/RewriteFunctionToSubcolumnVisitor.cpp +++ b/src/Interpreters/RewriteFunctionToSubcolumnVisitor.cpp @@ -83,6 +83,7 @@ void RewriteFunctionToSubcolumnData::visit(ASTFunction & function, ASTPtr & ast) const auto & column_type = columns.get(name_in_storage).type; TypeIndex column_type_id = column_type->getTypeId(); + const auto & alias = function.tryGetAlias(); if (arguments.size() == 1) { @@ -91,7 +92,10 @@ void RewriteFunctionToSubcolumnData::visit(ASTFunction & function, ASTPtr & ast) { const auto & [type_id, subcolumn_name, transformer] = it->second; if (column_type_id == type_id) + { ast = transformer(name_in_storage, subcolumn_name); + ast->setAlias(alias); + } } } else @@ -116,6 +120,7 @@ void RewriteFunctionToSubcolumnData::visit(ASTFunction & function, ASTPtr & ast) return; ast = transformToSubcolumn(name_in_storage, subcolumn_name); + ast->setAlias(alias); } else { @@ -124,7 +129,10 @@ void RewriteFunctionToSubcolumnData::visit(ASTFunction & function, ASTPtr & ast) { const auto & [type_id, subcolumn_name, transformer] = it->second; if (column_type_id == type_id) + { ast = transformer(name_in_storage, subcolumn_name, arguments[1]); + ast->setAlias(alias); + } } } } diff --git a/src/Interpreters/RowRefs.h b/src/Interpreters/RowRefs.h index cae20b98caf..baff903e121 100644 --- a/src/Interpreters/RowRefs.h +++ b/src/Interpreters/RowRefs.h @@ -26,7 +26,7 @@ struct RowRef const Block * block = nullptr; SizeT row_num = 0; - RowRef() {} + RowRef() {} /// NOLINT RowRef(const Block * block_, size_t row_num_) : block(block_), row_num(row_num_) {} }; @@ -42,7 +42,7 @@ struct RowRefList : RowRef Batch * next; RowRef row_refs[MAX_SIZE]; - Batch(Batch * parent) + explicit Batch(Batch * parent) : next(parent) {} @@ -52,7 +52,7 @@ struct RowRefList : RowRef { if (full()) { - auto batch = pool.alloc(); + auto * batch = pool.alloc(); *batch = Batch(this); batch->insert(std::move(row_ref), pool); return batch; @@ -66,7 +66,7 @@ struct RowRefList : RowRef class ForwardIterator { public: - ForwardIterator(const RowRefList * begin) + explicit ForwardIterator(const RowRefList * begin) : root(begin) , first(true) , batch(root->next) @@ -115,7 +115,7 @@ struct RowRefList : RowRef size_t position; }; - RowRefList() {} + RowRefList() {} /// NOLINT RowRefList(const Block * block_, size_t row_num_) : RowRef(block_, row_num_) {} ForwardIterator begin() const { return ForwardIterator(this); } @@ -221,7 +221,7 @@ public: T asof_value; RowRef row_ref; - Entry(T v) : asof_value(v) {} + explicit Entry(T v) : asof_value(v) {} Entry(T v, RowRef rr) : asof_value(v), row_ref(rr) {} }; @@ -241,8 +241,8 @@ public: Entry::LookupPtr, Entry::LookupPtr>; - AsofRowRefs() {} - AsofRowRefs(TypeIndex t); + AsofRowRefs() = default; + explicit AsofRowRefs(TypeIndex t); static std::optional getTypeSize(const IColumn & asof_column, size_t & type_size); diff --git a/src/Interpreters/SessionLog.cpp b/src/Interpreters/SessionLog.cpp index d9698be1a9b..a0c29c07d38 100644 --- a/src/Interpreters/SessionLog.cpp +++ b/src/Interpreters/SessionLog.cpp @@ -77,7 +77,7 @@ SessionLogElement::SessionLogElement(const UUID & auth_id_, Type type_) NamesAndTypesList SessionLogElement::getNamesAndTypes() { - const auto event_type = std::make_shared( + auto event_type = std::make_shared( DataTypeEnum8::Values { {"LoginFailure", static_cast(SESSION_LOGIN_FAILURE)}, @@ -86,7 +86,7 @@ NamesAndTypesList SessionLogElement::getNamesAndTypes() }); #define AUTH_TYPE_NAME_AND_VALUE(v) std::make_pair(AuthenticationTypeInfo::get(v).raw_name, static_cast(v)) - const auto identified_with_column = std::make_shared( + auto identified_with_column = std::make_shared( DataTypeEnum8::Values { AUTH_TYPE_NAME_AND_VALUE(AuthType::NO_PASSWORD), @@ -98,7 +98,7 @@ NamesAndTypesList SessionLogElement::getNamesAndTypes() }); #undef AUTH_TYPE_NAME_AND_VALUE - const auto interface_type_column = std::make_shared( + auto interface_type_column = std::make_shared( DataTypeEnum8::Values { {"TCP", static_cast(Interface::TCP)}, @@ -108,9 +108,9 @@ NamesAndTypesList SessionLogElement::getNamesAndTypes() {"PostgreSQL", static_cast(Interface::POSTGRESQL)} }); - const auto lc_string_datatype = std::make_shared(std::make_shared()); + auto lc_string_datatype = std::make_shared(std::make_shared()); - const auto settings_type_column = std::make_shared( + auto settings_type_column = std::make_shared( std::make_shared( DataTypes({ // setting name diff --git a/src/Interpreters/Set.cpp b/src/Interpreters/Set.cpp index 32dac7f9e9b..7af3e23d0d4 100644 --- a/src/Interpreters/Set.cpp +++ b/src/Interpreters/Set.cpp @@ -445,7 +445,7 @@ MergeTreeSetIndex::MergeTreeSetIndex(const Columns & set_elements, std::vector & key_ranges, const DataTypes & data_types) const +BoolMask MergeTreeSetIndex::checkInRange(const std::vector & key_ranges, const DataTypes & data_types, bool single_point) const { size_t tuple_size = indexes_mapping.size(); @@ -468,7 +468,8 @@ BoolMask MergeTreeSetIndex::checkInRange(const std::vector & key_ranges, std::optional new_range = KeyCondition::applyMonotonicFunctionsChainToRange( key_ranges[indexes_mapping[i].key_index], indexes_mapping[i].functions, - data_types[indexes_mapping[i].key_index]); + data_types[indexes_mapping[i].key_index], + single_point); if (!new_range) return {true, true}; diff --git a/src/Interpreters/Set.h b/src/Interpreters/Set.h index 3146b6af03f..2eecb0211a4 100644 --- a/src/Interpreters/Set.h +++ b/src/Interpreters/Set.h @@ -214,7 +214,7 @@ public: bool hasMonotonicFunctionsChain() const; - BoolMask checkInRange(const std::vector & key_ranges, const DataTypes & data_types) const; + BoolMask checkInRange(const std::vector & key_ranges, const DataTypes & data_types, bool single_point = false) const; private: // If all arguments in tuple are key columns, we can optimize NOT IN when there is only one element. diff --git a/src/Interpreters/StorageID.h b/src/Interpreters/StorageID.h index f1fcfde25c0..29ba24c2e4c 100644 --- a/src/Interpreters/StorageID.h +++ b/src/Interpreters/StorageID.h @@ -41,9 +41,9 @@ struct StorageID assertNotEmpty(); } - StorageID(const ASTQueryWithTableAndOutput & query); - StorageID(const ASTTableIdentifier & table_identifier_node); - StorageID(const ASTPtr & node); + StorageID(const ASTQueryWithTableAndOutput & query); /// NOLINT + StorageID(const ASTTableIdentifier & table_identifier_node); /// NOLINT + StorageID(const ASTPtr & node); /// NOLINT String getDatabaseName() const; diff --git a/src/Interpreters/SubqueryForSet.cpp b/src/Interpreters/SubqueryForSet.cpp index 08fc07c71e1..d669e091131 100644 --- a/src/Interpreters/SubqueryForSet.cpp +++ b/src/Interpreters/SubqueryForSet.cpp @@ -7,7 +7,7 @@ namespace DB SubqueryForSet::SubqueryForSet() = default; SubqueryForSet::~SubqueryForSet() = default; -SubqueryForSet::SubqueryForSet(SubqueryForSet &&) = default; -SubqueryForSet & SubqueryForSet::operator= (SubqueryForSet &&) = default; +SubqueryForSet::SubqueryForSet(SubqueryForSet &&) noexcept = default; +SubqueryForSet & SubqueryForSet::operator= (SubqueryForSet &&) noexcept = default; } diff --git a/src/Interpreters/SubqueryForSet.h b/src/Interpreters/SubqueryForSet.h index 974f5bd3e58..f737ec4582b 100644 --- a/src/Interpreters/SubqueryForSet.h +++ b/src/Interpreters/SubqueryForSet.h @@ -17,8 +17,8 @@ struct SubqueryForSet { SubqueryForSet(); ~SubqueryForSet(); - SubqueryForSet(SubqueryForSet &&); - SubqueryForSet & operator= (SubqueryForSet &&); + SubqueryForSet(SubqueryForSet &&) noexcept; + SubqueryForSet & operator=(SubqueryForSet &&) noexcept; /// The source is obtained using the InterpreterSelectQuery subquery. std::unique_ptr source; diff --git a/src/Interpreters/SystemLog.cpp b/src/Interpreters/SystemLog.cpp index ec6fd98010d..59545d4314d 100644 --- a/src/Interpreters/SystemLog.cpp +++ b/src/Interpreters/SystemLog.cpp @@ -22,7 +22,9 @@ #include #include #include +#include #include +#include #include #include #include @@ -112,9 +114,7 @@ std::shared_ptr createSystemLog( } -/// returns CREATE TABLE query, but with removed: -/// - UUID -/// - SETTINGS (for MergeTree) +/// returns CREATE TABLE query, but with removed UUID /// That way it can be used to compare with the SystemLog::getCreateTableQuery() ASTPtr getCreateTableQueryClean(const StorageID & table_id, ContextPtr context) { @@ -123,11 +123,6 @@ ASTPtr getCreateTableQueryClean(const StorageID & table_id, ContextPtr context) auto & old_create_query_ast = old_ast->as(); /// Reset UUID old_create_query_ast.uuid = UUIDHelpers::Nil; - /// Existing table has default settings (i.e. `index_granularity = 8192`), reset them. - if (ASTStorage * storage = old_create_query_ast.storage) - { - storage->reset(storage->settings); - } return old_ast; } @@ -476,6 +471,16 @@ ASTPtr SystemLog::getCreateTableQuery() "Storage to create table for " + LogElement::name(), 0, DBMS_DEFAULT_MAX_PARSER_DEPTH); create->set(create->storage, storage_ast); + /// Write additional (default) settings for MergeTree engine to make it make it possible to compare ASTs + /// and recreate tables on settings changes. + const auto & engine = create->storage->engine->as(); + if (endsWith(engine.name, "MergeTree")) + { + auto storage_settings = std::make_unique(getContext()->getMergeTreeSettings()); + storage_settings->loadFromQuery(*create->storage); + } + + return create; } diff --git a/src/Interpreters/TableJoin.h b/src/Interpreters/TableJoin.h index a3f6f7bf36b..f7c03ac6e1a 100644 --- a/src/Interpreters/TableJoin.h +++ b/src/Interpreters/TableJoin.h @@ -103,7 +103,7 @@ private: friend class TreeRewriter; - const SizeLimits size_limits; + SizeLimits size_limits; const size_t default_max_bytes = 0; const bool join_use_nulls = false; const size_t max_joined_block_rows = 0; @@ -114,7 +114,7 @@ private: const String temporary_files_codec = "LZ4"; /// the limit has no technical reasons, it supposed to improve safety - const size_t MAX_DISJUNCTS = 16; + const size_t MAX_DISJUNCTS = 16; /// NOLINT ASTs key_asts_left; ASTs key_asts_right; diff --git a/src/Interpreters/TraceCollector.h b/src/Interpreters/TraceCollector.h index 3a9edf676be..b3f11ca5756 100644 --- a/src/Interpreters/TraceCollector.h +++ b/src/Interpreters/TraceCollector.h @@ -18,7 +18,7 @@ class TraceLog; class TraceCollector { public: - TraceCollector(std::shared_ptr trace_log_); + explicit TraceCollector(std::shared_ptr trace_log_); ~TraceCollector(); static inline void collect(TraceType trace_type, const StackTrace & stack_trace, Int64 size) diff --git a/src/Interpreters/TranslateQualifiedNamesVisitor.cpp b/src/Interpreters/TranslateQualifiedNamesVisitor.cpp index 0d7d56058b9..6016d54c7dc 100644 --- a/src/Interpreters/TranslateQualifiedNamesVisitor.cpp +++ b/src/Interpreters/TranslateQualifiedNamesVisitor.cpp @@ -303,7 +303,7 @@ void TranslateQualifiedNamesMatcher::visit(ASTExpressionList & node, const ASTPt } /// 'select * from a join b using id' should result one 'id' column -void TranslateQualifiedNamesMatcher::extractJoinUsingColumns(const ASTPtr ast, Data & data) +void TranslateQualifiedNamesMatcher::extractJoinUsingColumns(ASTPtr ast, Data & data) { const auto & table_join = ast->as(); diff --git a/src/Interpreters/TranslateQualifiedNamesVisitor.h b/src/Interpreters/TranslateQualifiedNamesVisitor.h index 0f35d052ed2..9c46d926eca 100644 --- a/src/Interpreters/TranslateQualifiedNamesVisitor.h +++ b/src/Interpreters/TranslateQualifiedNamesVisitor.h @@ -52,7 +52,7 @@ private: static void visit(ASTExpressionList &, const ASTPtr &, Data &); static void visit(ASTFunction &, const ASTPtr &, Data &); - static void extractJoinUsingColumns(const ASTPtr ast, Data & data); + static void extractJoinUsingColumns(ASTPtr ast, Data & data); }; /// Visits AST for names qualification. diff --git a/src/Interpreters/TreeCNFConverter.h b/src/Interpreters/TreeCNFConverter.h index 0c090c8d56b..a5d42e6b989 100644 --- a/src/Interpreters/TreeCNFConverter.h +++ b/src/Interpreters/TreeCNFConverter.h @@ -36,7 +36,7 @@ public: using OrGroup = std::set; using AndGroup = std::set; - CNFQuery(AndGroup && statements_) : statements(std::move(statements_)) { } + CNFQuery(AndGroup && statements_) : statements(std::move(statements_)) { } /// NOLINT template CNFQuery & filterAlwaysTrueGroups(P predicate_is_unknown) /// delete always true groups @@ -91,7 +91,7 @@ public: CNFQuery & appendGroup(AndGroup&& and_group) { for (auto && or_group : and_group) - statements.emplace(std::move(or_group)); + statements.emplace(or_group); return *this; } diff --git a/src/Interpreters/UserDefinedExecutableFunctionFactory.cpp b/src/Interpreters/UserDefinedExecutableFunctionFactory.cpp index 8d40dc6dfc8..6d7dee7a4c7 100644 --- a/src/Interpreters/UserDefinedExecutableFunctionFactory.cpp +++ b/src/Interpreters/UserDefinedExecutableFunctionFactory.cpp @@ -47,6 +47,7 @@ public: bool useDefaultImplementationForConstants() const override { return true; } bool useDefaultImplementationForNulls() const override { return true; } bool isDeterministic() const override { return false; } + bool isDeterministicInScopeOfQuery() const override { return false; } DataTypePtr getReturnTypeImpl(const DataTypes &) const override { diff --git a/src/Interpreters/addMissingDefaults.cpp b/src/Interpreters/addMissingDefaults.cpp index 04e1e6856cc..d043fd16bb5 100644 --- a/src/Interpreters/addMissingDefaults.cpp +++ b/src/Interpreters/addMissingDefaults.cpp @@ -63,7 +63,7 @@ ActionsDAGPtr addMissingDefaults( { const auto & nested_type = array_type->getNestedType(); ColumnPtr nested_column = nested_type->createColumnConstWithDefaultValue(0); - const auto & constant = actions->addColumn({std::move(nested_column), nested_type, column.name}); + const auto & constant = actions->addColumn({nested_column, nested_type, column.name}); auto & group = nested_groups[offsets_name]; group[0] = &constant; @@ -76,7 +76,7 @@ ActionsDAGPtr addMissingDefaults( * it can be full (or the interpreter may decide that it is constant everywhere). */ auto new_column = column.type->createColumnConstWithDefaultValue(0); - const auto * col = &actions->addColumn({std::move(new_column), column.type, column.name}); + const auto * col = &actions->addColumn({new_column, column.type, column.name}); index.push_back(&actions->materializeNode(*col)); } diff --git a/src/Interpreters/executeQuery.cpp b/src/Interpreters/executeQuery.cpp index c17e1173b20..c1606700540 100644 --- a/src/Interpreters/executeQuery.cpp +++ b/src/Interpreters/executeQuery.cpp @@ -781,8 +781,8 @@ static std::tuple executeQueryImpl( element.memory_usage = info.peak_memory_usage > 0 ? info.peak_memory_usage : 0; - element.thread_ids = std::move(info.thread_ids); - element.profile_counters = std::move(info.profile_counters); + element.thread_ids = info.thread_ids; + element.profile_counters = info.profile_counters; /// We need to refresh the access info since dependent views might have added extra information, either during /// creation of the view (PushingToViewsBlockOutputStream) or while executing its internal SELECT diff --git a/src/Interpreters/getHeaderForProcessingStage.cpp b/src/Interpreters/getHeaderForProcessingStage.cpp index 69b7b7d833f..31913777902 100644 --- a/src/Interpreters/getHeaderForProcessingStage.cpp +++ b/src/Interpreters/getHeaderForProcessingStage.cpp @@ -57,7 +57,7 @@ bool removeJoin(ASTSelectQuery & select, TreeRewriterResult & rewriter_result, C const size_t left_table_pos = 0; /// Test each argument of `and` function and select ones related to only left table std::shared_ptr new_conj = makeASTFunction("and"); - for (const auto & node : collectConjunctions(where)) + for (auto && node : collectConjunctions(where)) { if (membership_collector.getIdentsMembership(node) == left_table_pos) new_conj->arguments->children.push_back(std::move(node)); diff --git a/src/Interpreters/sortBlock.cpp b/src/Interpreters/sortBlock.cpp index c8a2d0903f2..3281445022e 100644 --- a/src/Interpreters/sortBlock.cpp +++ b/src/Interpreters/sortBlock.cpp @@ -41,15 +41,14 @@ struct PartialSortingLessImpl explicit PartialSortingLessImpl(const ColumnsWithSortDescriptions & columns_) : columns(columns_) { } - inline bool operator()(size_t a, size_t b) const + ALWAYS_INLINE int compare(size_t lhs, size_t rhs) const { + int res = 0; + for (const auto & elem : columns) { - int res; - if (elem.column_const) { - res = 0; continue; } @@ -57,52 +56,37 @@ struct PartialSortingLessImpl { if (isCollationRequired(elem.description)) { - res = elem.column->compareAtWithCollation(a, b, *elem.column, elem.description.nulls_direction, *elem.description.collator); + res = elem.column->compareAtWithCollation(lhs, rhs, *elem.column, elem.description.nulls_direction, *elem.description.collator); } else { - res = elem.column->compareAt(a, b, *elem.column, elem.description.nulls_direction); + res = elem.column->compareAt(lhs, rhs, *elem.column, elem.description.nulls_direction); } } else { - res = elem.column->compareAt(a, b, *elem.column, elem.description.nulls_direction); + res = elem.column->compareAt(lhs, rhs, *elem.column, elem.description.nulls_direction); } res *= elem.description.direction; - if (res < 0) - return true; - else if (res > 0) - return false; + + if (res != 0) + break; } - return false; + + return res; + } + + ALWAYS_INLINE bool operator()(size_t lhs, size_t rhs) const + { + int res = compare(lhs, rhs); + return res < 0; } }; using PartialSortingLess = PartialSortingLessImpl; using PartialSortingLessWithCollation = PartialSortingLessImpl; -} - -void convertTupleColumnIntoSortDescriptions( - const ColumnTuple * tuple, const SortColumnDescription & description, ColumnsWithSortDescriptions & result) -{ - for (const auto & column : tuple->getColumns()) - { - if (const auto * subtuple = typeid_cast(column.get())) - { - convertTupleColumnIntoSortDescriptions(subtuple, description, result); - } - else - { - result.emplace_back(ColumnWithSortDescription{column.get(), description, isColumnConst(*column)}); - - if (isCollationRequired(description) && !result.back().column->isCollationSupported()) - result.back().description.collator = nullptr; - } - } -} - ColumnsWithSortDescriptions getColumnsWithSortDescription(const Block & block, const SortDescription & description) { size_t size = description.size(); @@ -127,16 +111,13 @@ ColumnsWithSortDescriptions getColumnsWithSortDescription(const Block & block, c ErrorCodes::BAD_COLLATION); } - if (const auto * tuple = typeid_cast(column)) - convertTupleColumnIntoSortDescriptions(tuple, sort_column_description, result); - else - result.emplace_back(ColumnWithSortDescription{column, sort_column_description, isColumnConst(*column)}); + result.emplace_back(ColumnWithSortDescription{column, sort_column_description, isColumnConst(*column)}); } return result; } -void sortBlock(Block & block, const SortDescription & description, UInt64 limit) +void getBlockSortPermutationImpl(const Block & block, const SortDescription & description, IColumn::PermutationSortStability stability, UInt64 limit, IColumn::Permutation & permutation) { if (!block) return; @@ -152,25 +133,24 @@ void sortBlock(Block & block, const SortDescription & description, UInt64 limit) break; } } - if (all_const) - return; - IColumn::Permutation permutation; + if (unlikely(all_const)) + return; /// If only one column to sort by if (columns_with_sort_descriptions.size() == 1) { auto & column_with_sort_description = columns_with_sort_descriptions[0]; - bool reverse = column_with_sort_description.description.direction == -1; + IColumn::PermutationSortDirection direction = column_with_sort_description.description.direction == -1 ? IColumn::PermutationSortDirection::Descending : IColumn::PermutationSortDirection::Ascending; int nan_direction_hint = column_with_sort_description.description.nulls_direction; const auto & column = column_with_sort_description.column; if (isCollationRequired(column_with_sort_description.description)) column->getPermutationWithCollation( - *column_with_sort_description.description.collator, reverse, limit, nan_direction_hint, permutation); + *column_with_sort_description.description.collator, direction, stability, limit, nan_direction_hint, permutation); else - column->getPermutation(reverse, limit, nan_direction_hint, permutation); + column->getPermutation(direction, stability, limit, nan_direction_hint, permutation); } else { @@ -197,21 +177,32 @@ void sortBlock(Block & block, const SortDescription & description, UInt64 limit) continue; bool is_collation_required = isCollationRequired(column_with_sort_description.description); - bool reverse = column_with_sort_description.description.direction < 0; + IColumn::PermutationSortDirection direction = column_with_sort_description.description.direction == -1 ? IColumn::PermutationSortDirection::Descending : IColumn::PermutationSortDirection::Ascending; int nan_direction_hint = column_with_sort_description.description.nulls_direction; const auto & column = column_with_sort_description.column; if (is_collation_required) { column->updatePermutationWithCollation( - *column_with_sort_description.description.collator, reverse, limit, nan_direction_hint, permutation, ranges); + *column_with_sort_description.description.collator, direction, stability, limit, nan_direction_hint, permutation, ranges); } else { - column->updatePermutation(reverse, limit, nan_direction_hint, permutation, ranges); + column->updatePermutation(direction, stability, limit, nan_direction_hint, permutation, ranges); } } } +} + +} + +void sortBlock(Block & block, const SortDescription & description, UInt64 limit) +{ + IColumn::Permutation permutation; + getBlockSortPermutationImpl(block, description, IColumn::PermutationSortStability::Unstable, limit, permutation); + + if (permutation.empty()) + return; size_t columns = block.columns(); for (size_t i = 0; i < columns; ++i) @@ -221,19 +212,31 @@ void sortBlock(Block & block, const SortDescription & description, UInt64 limit) } } +void stableSortBlock(Block & block, const SortDescription & description) +{ + if (!block) + return; + + IColumn::Permutation permutation; + getBlockSortPermutationImpl(block, description, IColumn::PermutationSortStability::Stable, 0, permutation); + + if (permutation.empty()) + return; + + size_t columns = block.columns(); + for (size_t i = 0; i < columns; ++i) + { + auto & column_to_sort = block.getByPosition(i).column; + column_to_sort = column_to_sort->permute(permutation, 0); + } +} + void stableGetPermutation(const Block & block, const SortDescription & description, IColumn::Permutation & out_permutation) { if (!block) return; - size_t size = block.rows(); - out_permutation.resize(size); - for (size_t i = 0; i < size; ++i) - out_permutation[i] = i; - - ColumnsWithSortDescriptions columns_with_sort_desc = getColumnsWithSortDescription(block, description); - - std::stable_sort(out_permutation.begin(), out_permutation.end(), PartialSortingLess(columns_with_sort_desc)); + getBlockSortPermutationImpl(block, description, IColumn::PermutationSortStability::Stable, 0, out_permutation); } bool isAlreadySorted(const Block & block, const SortDescription & description) @@ -270,21 +273,4 @@ bool isAlreadySorted(const Block & block, const SortDescription & description) return true; } - -void stableSortBlock(Block & block, const SortDescription & description) -{ - if (!block) - return; - - IColumn::Permutation permutation; - stableGetPermutation(block, description, permutation); - - size_t columns = block.columns(); - for (size_t i = 0; i < columns; ++i) - { - auto & column_to_sort = block.safeGetByPosition(i).column; - column_to_sort = column_to_sort->permute(permutation, 0); - } -} - } diff --git a/src/Parsers/ASTAssignment.h b/src/Parsers/ASTAssignment.h index 88d4bb96c15..a37a31ae38e 100644 --- a/src/Parsers/ASTAssignment.h +++ b/src/Parsers/ASTAssignment.h @@ -28,6 +28,7 @@ public: protected: void formatImpl(const FormatSettings & settings, FormatState & state, FormatStateStacked frame) const override { + settings.ostr << (settings.hilite ? hilite_identifier : ""); settings.writeIdentifier(column_name); settings.ostr << (settings.hilite ? hilite_none : ""); diff --git a/src/Parsers/ASTCreateQuery.h b/src/Parsers/ASTCreateQuery.h index a0070892b79..04755a02399 100644 --- a/src/Parsers/ASTCreateQuery.h +++ b/src/Parsers/ASTCreateQuery.h @@ -51,7 +51,7 @@ public: void formatImpl(const FormatSettings & s, FormatState & state, FormatStateStacked frame) const override; - bool empty() + bool empty() const { return (!columns || columns->children.empty()) && (!indices || indices->children.empty()) && (!constraints || constraints->children.empty()) && (!projections || projections->children.empty()); diff --git a/src/Parsers/ASTFunctionWithKeyValueArguments.h b/src/Parsers/ASTFunctionWithKeyValueArguments.h index 5820e8564ac..4b745e2c1a2 100644 --- a/src/Parsers/ASTFunctionWithKeyValueArguments.h +++ b/src/Parsers/ASTFunctionWithKeyValueArguments.h @@ -19,7 +19,6 @@ public: /// Value is closed in brackets (HOST '127.0.0.1') bool second_with_brackets; -public: explicit ASTPair(bool second_with_brackets_) : second_with_brackets(second_with_brackets_) { @@ -54,7 +53,6 @@ public: { } -public: String getID(char delim) const override; ASTPtr clone() const override; diff --git a/src/Parsers/ASTHelpers.h b/src/Parsers/ASTHelpers.h index 086b361bf85..0b3db8e02d5 100644 --- a/src/Parsers/ASTHelpers.h +++ b/src/Parsers/ASTHelpers.h @@ -6,7 +6,7 @@ namespace DB { -static inline bool isFunctionCast(const ASTFunction * function) +static inline bool isFunctionCast(const ASTFunction * function) /// NOLINT { if (function) return function->name == "CAST" || function->name == "_CAST"; diff --git a/src/Parsers/ASTProjectionSelectQuery.h b/src/Parsers/ASTProjectionSelectQuery.h index 71334c50868..d93c10b6e39 100644 --- a/src/Parsers/ASTProjectionSelectQuery.h +++ b/src/Parsers/ASTProjectionSelectQuery.h @@ -26,10 +26,10 @@ public: ASTPtr & refSelect() { return getExpression(Expression::SELECT); } - const ASTPtr with() const { return getExpression(Expression::WITH); } - const ASTPtr select() const { return getExpression(Expression::SELECT); } - const ASTPtr groupBy() const { return getExpression(Expression::GROUP_BY); } - const ASTPtr orderBy() const { return getExpression(Expression::ORDER_BY); } + ASTPtr with() const { return getExpression(Expression::WITH); } + ASTPtr select() const { return getExpression(Expression::SELECT); } + ASTPtr groupBy() const { return getExpression(Expression::GROUP_BY); } + ASTPtr orderBy() const { return getExpression(Expression::ORDER_BY); } /// Set/Reset/Remove expression. void setExpression(Expression expr, ASTPtr && ast); diff --git a/src/Parsers/ASTQueryWithOnCluster.h b/src/Parsers/ASTQueryWithOnCluster.h index b309ae5e847..c5daaa6ce37 100644 --- a/src/Parsers/ASTQueryWithOnCluster.h +++ b/src/Parsers/ASTQueryWithOnCluster.h @@ -17,7 +17,7 @@ public: /// new_database should be used by queries that refer to default db /// and default_database is specified for remote server - virtual ASTPtr getRewrittenASTWithoutOnCluster(const std::string & new_database = {}) const = 0; + virtual ASTPtr getRewrittenASTWithoutOnCluster(const std::string & new_database = {}) const = 0; /// NOLINT /// Returns a query prepared for execution on remote server std::string getRewrittenQueryWithoutOnCluster(const std::string & new_database = {}) const; diff --git a/src/Parsers/ASTSelectQuery.h b/src/Parsers/ASTSelectQuery.h index 1c631783fdb..9a8f1dbd2e7 100644 --- a/src/Parsers/ASTSelectQuery.h +++ b/src/Parsers/ASTSelectQuery.h @@ -91,21 +91,21 @@ public: ASTPtr & refWhere() { return getExpression(Expression::WHERE); } ASTPtr & refHaving() { return getExpression(Expression::HAVING); } - const ASTPtr with() const { return getExpression(Expression::WITH); } - const ASTPtr select() const { return getExpression(Expression::SELECT); } - const ASTPtr tables() const { return getExpression(Expression::TABLES); } - const ASTPtr prewhere() const { return getExpression(Expression::PREWHERE); } - const ASTPtr where() const { return getExpression(Expression::WHERE); } - const ASTPtr groupBy() const { return getExpression(Expression::GROUP_BY); } - const ASTPtr having() const { return getExpression(Expression::HAVING); } - const ASTPtr window() const { return getExpression(Expression::WINDOW); } - const ASTPtr orderBy() const { return getExpression(Expression::ORDER_BY); } - const ASTPtr limitByOffset() const { return getExpression(Expression::LIMIT_BY_OFFSET); } - const ASTPtr limitByLength() const { return getExpression(Expression::LIMIT_BY_LENGTH); } - const ASTPtr limitBy() const { return getExpression(Expression::LIMIT_BY); } - const ASTPtr limitOffset() const { return getExpression(Expression::LIMIT_OFFSET); } - const ASTPtr limitLength() const { return getExpression(Expression::LIMIT_LENGTH); } - const ASTPtr settings() const { return getExpression(Expression::SETTINGS); } + ASTPtr with() const { return getExpression(Expression::WITH); } + ASTPtr select() const { return getExpression(Expression::SELECT); } + ASTPtr tables() const { return getExpression(Expression::TABLES); } + ASTPtr prewhere() const { return getExpression(Expression::PREWHERE); } + ASTPtr where() const { return getExpression(Expression::WHERE); } + ASTPtr groupBy() const { return getExpression(Expression::GROUP_BY); } + ASTPtr having() const { return getExpression(Expression::HAVING); } + ASTPtr window() const { return getExpression(Expression::WINDOW); } + ASTPtr orderBy() const { return getExpression(Expression::ORDER_BY); } + ASTPtr limitByOffset() const { return getExpression(Expression::LIMIT_BY_OFFSET); } + ASTPtr limitByLength() const { return getExpression(Expression::LIMIT_BY_LENGTH); } + ASTPtr limitBy() const { return getExpression(Expression::LIMIT_BY); } + ASTPtr limitOffset() const { return getExpression(Expression::LIMIT_OFFSET); } + ASTPtr limitLength() const { return getExpression(Expression::LIMIT_LENGTH); } + ASTPtr settings() const { return getExpression(Expression::SETTINGS); } bool hasFiltration() const { return where() || prewhere() || having(); } diff --git a/src/Parsers/ASTTTLElement.h b/src/Parsers/ASTTTLElement.h index a396a4c54e0..9705cafbce3 100644 --- a/src/Parsers/ASTTTLElement.h +++ b/src/Parsers/ASTTTLElement.h @@ -37,8 +37,8 @@ public: ASTPtr clone() const override; - const ASTPtr ttl() const { return getExpression(ttl_expr_pos); } - const ASTPtr where() const { return getExpression(where_expr_pos); } + ASTPtr ttl() const { return getExpression(ttl_expr_pos); } + ASTPtr where() const { return getExpression(where_expr_pos); } void setTTL(ASTPtr && ast) { setExpression(ttl_expr_pos, std::forward(ast)); } void setWhere(ASTPtr && ast) { setExpression(where_expr_pos, std::forward(ast)); } @@ -50,7 +50,6 @@ private: int ttl_expr_pos; int where_expr_pos; -private: void setExpression(int & pos, ASTPtr && ast); ASTPtr getExpression(int pos, bool clone = false) const; }; diff --git a/src/Parsers/ASTTableOverrides.cpp b/src/Parsers/ASTTableOverrides.cpp index 8fc21db218f..0f34a9fb247 100644 --- a/src/Parsers/ASTTableOverrides.cpp +++ b/src/Parsers/ASTTableOverrides.cpp @@ -93,7 +93,7 @@ ASTPtr ASTTableOverrideList::tryGetTableOverride(const String & name) const return children[it->second]; } -void ASTTableOverrideList::setTableOverride(const String & name, const ASTPtr ast) +void ASTTableOverrideList::setTableOverride(const String & name, ASTPtr ast) { auto it = positions.find(name); if (it == positions.end()) diff --git a/src/Parsers/ASTTableOverrides.h b/src/Parsers/ASTTableOverrides.h index c0603f7a8e0..c47260789d8 100644 --- a/src/Parsers/ASTTableOverrides.h +++ b/src/Parsers/ASTTableOverrides.h @@ -40,7 +40,7 @@ public: String getID(char) const override { return "TableOverrideList"; } ASTPtr clone() const override; void formatImpl(const FormatSettings & settings, FormatState & state, FormatStateStacked frame) const override; - void setTableOverride(const String & name, const ASTPtr ast); + void setTableOverride(const String & name, ASTPtr ast); void removeTableOverride(const String & name); ASTPtr tryGetTableOverride(const String & name) const; bool hasOverride(const String & name) const; diff --git a/src/Parsers/ASTUseQuery.h b/src/Parsers/ASTUseQuery.h index 4e4a13c2a7f..16d449f905f 100644 --- a/src/Parsers/ASTUseQuery.h +++ b/src/Parsers/ASTUseQuery.h @@ -25,7 +25,6 @@ protected: void formatImpl(const FormatSettings & settings, FormatState &, FormatStateStacked) const override { settings.ostr << (settings.hilite ? hilite_keyword : "") << "USE " << (settings.hilite ? hilite_none : "") << backQuoteIfNeed(database); - return; } }; diff --git a/src/Parsers/Access/ASTUserNameWithHost.h b/src/Parsers/Access/ASTUserNameWithHost.h index ada9bfb0673..bd28b42b48a 100644 --- a/src/Parsers/Access/ASTUserNameWithHost.h +++ b/src/Parsers/Access/ASTUserNameWithHost.h @@ -23,7 +23,7 @@ public: void concatParts(); ASTUserNameWithHost() = default; - ASTUserNameWithHost(const String & name_) : base_name(name_) {} + explicit ASTUserNameWithHost(const String & name_) : base_name(name_) {} String getID(char) const override { return "UserNameWithHost"; } ASTPtr clone() const override { return std::make_shared(*this); } void formatImpl(const FormatSettings & settings, FormatState &, FormatStateStacked) const override; @@ -39,7 +39,7 @@ public: auto begin() const { return names.begin(); } auto end() const { return names.end(); } auto front() const { return *begin(); } - void push_back(const String & name_) { names.push_back(std::make_shared(name_)); } + void push_back(const String & name_) { names.push_back(std::make_shared(name_)); } /// NOLINT Strings toStrings() const; void concatParts(); diff --git a/src/Parsers/Access/ParserCreateRoleQuery.cpp b/src/Parsers/Access/ParserCreateRoleQuery.cpp index 314075cb7c0..da9749958ee 100644 --- a/src/Parsers/Access/ParserCreateRoleQuery.cpp +++ b/src/Parsers/Access/ParserCreateRoleQuery.cpp @@ -6,7 +6,7 @@ #include #include #include -#include +#include namespace DB @@ -37,7 +37,7 @@ namespace if (!elements_p.parse(pos, new_settings_ast, expected)) return false; - settings = std::move(new_settings_ast->as().elements); + settings = std::move(new_settings_ast->as().elements); return true; }); } @@ -102,7 +102,8 @@ bool ParserCreateRoleQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expec { if (!settings) settings = std::make_shared(); - boost::range::push_back(settings->elements, std::move(new_settings)); + + insertAtEnd(settings->elements, std::move(new_settings)); continue; } diff --git a/src/Parsers/Access/ParserCreateRowPolicyQuery.cpp b/src/Parsers/Access/ParserCreateRowPolicyQuery.cpp index 731564a14c7..83156c6a8e1 100644 --- a/src/Parsers/Access/ParserCreateRowPolicyQuery.cpp +++ b/src/Parsers/Access/ParserCreateRowPolicyQuery.cpp @@ -11,7 +11,7 @@ #include #include #include -#include +#include namespace DB @@ -264,7 +264,7 @@ bool ParserCreateRowPolicyQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & std::vector> new_filters; if (parseForClauses(pos, expected, alter, new_filters)) { - boost::range::push_back(filters, std::move(new_filters)); + insertAtEnd(filters, std::move(new_filters)); continue; } diff --git a/src/Parsers/Access/ParserCreateSettingsProfileQuery.cpp b/src/Parsers/Access/ParserCreateSettingsProfileQuery.cpp index 8b5f2df2dd2..c58a3035dc6 100644 --- a/src/Parsers/Access/ParserCreateSettingsProfileQuery.cpp +++ b/src/Parsers/Access/ParserCreateSettingsProfileQuery.cpp @@ -8,7 +8,7 @@ #include #include #include -#include +#include namespace DB @@ -39,7 +39,7 @@ namespace if (!elements_p.parse(pos, new_settings_ast, expected)) return false; - settings = std::move(new_settings_ast->as().elements); + settings = std::move(new_settings_ast->as().elements); return true; }); } @@ -122,7 +122,8 @@ bool ParserCreateSettingsProfileQuery::parseImpl(Pos & pos, ASTPtr & node, Expec { if (!settings) settings = std::make_shared(); - boost::range::push_back(settings->elements, std::move(new_settings)); + + insertAtEnd(settings->elements, std::move(new_settings)); continue; } diff --git a/src/Parsers/Access/ParserCreateUserQuery.cpp b/src/Parsers/Access/ParserCreateUserQuery.cpp index cde14e632dd..da8e212fe2f 100644 --- a/src/Parsers/Access/ParserCreateUserQuery.cpp +++ b/src/Parsers/Access/ParserCreateUserQuery.cpp @@ -15,7 +15,7 @@ #include #include #include -#include +#include namespace DB @@ -250,7 +250,7 @@ namespace if (!parseHostsWithoutPrefix(pos, expected, res_hosts)) return false; - hosts.add(std::move(res_hosts)); + hosts.add(res_hosts); return true; }); } @@ -289,7 +289,7 @@ namespace if (!elements_p.parse(pos, new_settings_ast, expected)) return false; - settings = std::move(new_settings_ast->as().elements); + settings = std::move(new_settings_ast->as().elements); return true; }); } @@ -414,7 +414,8 @@ bool ParserCreateUserQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expec { if (!settings) settings = std::make_shared(); - boost::range::push_back(settings->elements, std::move(new_settings)); + + insertAtEnd(settings->elements, std::move(new_settings)); continue; } diff --git a/src/Parsers/Access/ParserGrantQuery.cpp b/src/Parsers/Access/ParserGrantQuery.cpp index 9f7e8535a14..43e1cedd34d 100644 --- a/src/Parsers/Access/ParserGrantQuery.cpp +++ b/src/Parsers/Access/ParserGrantQuery.cpp @@ -156,7 +156,7 @@ namespace } - void eraseNonGrantable(AccessRightsElements & elements) + void throwIfNotGrantable(AccessRightsElements & elements) { boost::range::remove_erase_if(elements, [](AccessRightsElement & element) { @@ -303,7 +303,12 @@ bool ParserGrantQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) } if (!is_revoke) - eraseNonGrantable(elements); + { + if (attach_mode) + elements.eraseNonGrantable(); + else + throwIfNotGrantable(elements); + } auto query = std::make_shared(); node = query; diff --git a/src/Parsers/Access/ParserRowPolicyName.cpp b/src/Parsers/Access/ParserRowPolicyName.cpp index 7df4e5a36dc..cf5d2ab21b6 100644 --- a/src/Parsers/Access/ParserRowPolicyName.cpp +++ b/src/Parsers/Access/ParserRowPolicyName.cpp @@ -5,7 +5,7 @@ #include #include #include -#include +#include namespace DB @@ -179,7 +179,7 @@ bool ParserRowPolicyNames::parseImpl(Pos & pos, ASTPtr & node, Expected & expect return false; num_added_names_last_time = new_full_names.size(); - boost::range::push_back(full_names, std::move(new_full_names)); + insertAtEnd(full_names, std::move(new_full_names)); return true; }; diff --git a/src/Parsers/CommonParsers.h b/src/Parsers/CommonParsers.h index 58fac2341cf..d2911754b24 100644 --- a/src/Parsers/CommonParsers.h +++ b/src/Parsers/CommonParsers.h @@ -33,7 +33,8 @@ class ParserToken : public IParserBase private: TokenType token_type; public: - ParserToken(TokenType token_type_) : token_type(token_type_) {} + ParserToken(TokenType token_type_) : token_type(token_type_) {} /// NOLINT + protected: const char * getName() const override { return "token"; } diff --git a/src/Parsers/DumpASTNode.h b/src/Parsers/DumpASTNode.h index e8efeb4b59c..5b6d8798fc1 100644 --- a/src/Parsers/DumpASTNode.h +++ b/src/Parsers/DumpASTNode.h @@ -86,6 +86,75 @@ inline void dumpAST(const IAST & ast, WriteBuffer & ostr, DumpASTNode * parent = dumpAST(*child, ostr, &dump); } +class DumpASTNodeInDotFormat +{ +public: + DumpASTNodeInDotFormat(const IAST & ast_, WriteBuffer * ostr_, bool root_ = true, const char * label_ = nullptr) + : ast(ast_), ostr(ostr_), root(root_), label(label_) + { + if (!ostr) + return; + + if (root) + (*ostr) << "digraph " << (label ? String(label) : "") << "{\n rankdir=\"UD\";\n"; + + printNode(); + } + + ~DumpASTNodeInDotFormat() + { + if (!ostr) + return; + + for (const auto & child : ast.children) + printEdge(ast, *child); + + if (root) + (*ostr) << "}\n"; + } + +private: + const IAST & ast; + WriteBuffer * ostr; + bool root; + const char * label; + + String getASTId() const { return ast.getID(' '); } + static String getNodeId(const IAST & a) { return "n" + std::to_string(reinterpret_cast(&a)); } + + void printNode() const + { + (*ostr) << " " << getNodeId(ast) << "[label=\""; + (*ostr) << getASTId(); + + String alias = ast.tryGetAlias(); + if (!alias.empty()) + (*ostr) << " (" + << "alias" + << " " << alias << ")"; + + if (!ast.children.empty()) + (*ostr) << " (children" + << " " << ast.children.size() << ")"; + (*ostr) << "\"];\n"; + } + + void printEdge(const IAST & parent, const IAST & child) const + { + (*ostr) << " " << getNodeId(parent) << " -> " << getNodeId(child) << ";\n"; + } +}; + + +/// Print AST in "dot" format for GraphViz +/// You can render it with: dot -Tpng ast.dot ast.png +inline void dumpASTInDotFormat(const IAST & ast, WriteBuffer & ostr, bool root = true) +{ + DumpASTNodeInDotFormat dump(ast, &ostr, root); + for (const auto & child : ast.children) + dumpASTInDotFormat(*child, ostr, false); +} + /// String stream dumped in dtor template diff --git a/src/Parsers/ExpressionElementParsers.cpp b/src/Parsers/ExpressionElementParsers.cpp index e00e0aba7b3..c51201750c5 100644 --- a/src/Parsers/ExpressionElementParsers.cpp +++ b/src/Parsers/ExpressionElementParsers.cpp @@ -442,9 +442,9 @@ namespace pattern_list_args->children = { std::make_shared("^["), to_remove, - std::make_shared("]*|["), + std::make_shared("]+|["), to_remove, - std::make_shared("]*$") + std::make_shared("]+$") }; func_name = "replaceRegexpAll"; } @@ -455,7 +455,7 @@ namespace pattern_list_args->children = { std::make_shared("^["), to_remove, - std::make_shared("]*") + std::make_shared("]+") }; } else @@ -464,7 +464,7 @@ namespace pattern_list_args->children = { std::make_shared("["), to_remove, - std::make_shared("]*$") + std::make_shared("]+$") }; } func_name = "replaceRegexpOne"; diff --git a/src/Parsers/ExpressionListParsers.h b/src/Parsers/ExpressionListParsers.h index 358fe778f91..86d0fd0f861 100644 --- a/src/Parsers/ExpressionListParsers.h +++ b/src/Parsers/ExpressionListParsers.h @@ -207,7 +207,7 @@ private: ParserPtr elem_parser; public: - ParserCastExpression(ParserPtr && elem_parser_) + explicit ParserCastExpression(ParserPtr && elem_parser_) : elem_parser(std::move(elem_parser_)) { } diff --git a/src/Parsers/IAST.h b/src/Parsers/IAST.h index fdf821c4a0b..bd8167c64fe 100644 --- a/src/Parsers/IAST.h +++ b/src/Parsers/IAST.h @@ -69,7 +69,7 @@ public: } /** Get the text that identifies this element. */ - virtual String getID(char delimiter = '_') const = 0; + virtual String getID(char delimiter = '_') const = 0; /// NOLINT ASTPtr ptr() { return shared_from_this(); } diff --git a/src/Parsers/ParserExplainQuery.h b/src/Parsers/ParserExplainQuery.h index a1865e30239..ba30e97a58f 100644 --- a/src/Parsers/ParserExplainQuery.h +++ b/src/Parsers/ParserExplainQuery.h @@ -14,7 +14,7 @@ protected: const char * getName() const override { return "EXPLAIN"; } bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override; public: - ParserExplainQuery(const char* end_) : end(end_) {} + explicit ParserExplainQuery(const char* end_) : end(end_) {} }; } diff --git a/src/Parsers/ParserQueryWithOutput.h b/src/Parsers/ParserQueryWithOutput.h index 854d5a74ffd..1fd7bec1eea 100644 --- a/src/Parsers/ParserQueryWithOutput.h +++ b/src/Parsers/ParserQueryWithOutput.h @@ -15,7 +15,7 @@ protected: const char * getName() const override { return "Query with output"; } bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override; public: - ParserQueryWithOutput(const char * end_) : end(end_) {} + explicit ParserQueryWithOutput(const char * end_) : end(end_) {} }; } diff --git a/src/Parsers/ParserTablesInSelectQuery.h b/src/Parsers/ParserTablesInSelectQuery.h index 9e5b591ccbe..772f1992f4d 100644 --- a/src/Parsers/ParserTablesInSelectQuery.h +++ b/src/Parsers/ParserTablesInSelectQuery.h @@ -21,7 +21,7 @@ protected: class ParserTablesInSelectQueryElement : public IParserBase { public: - ParserTablesInSelectQueryElement(bool is_first_) : is_first(is_first_) {} + explicit ParserTablesInSelectQueryElement(bool is_first_) : is_first(is_first_) {} protected: const char * getName() const override { return "table, table function, subquery or list of joined tables"; } diff --git a/src/Processors/Chunk.h b/src/Processors/Chunk.h index e70ba57a267..1c9240ba114 100644 --- a/src/Processors/Chunk.h +++ b/src/Processors/Chunk.h @@ -90,7 +90,7 @@ public: bool hasRows() const { return num_rows > 0; } bool hasColumns() const { return !columns.empty(); } bool empty() const { return !hasRows() && !hasColumns(); } - operator bool() const { return !empty(); } + operator bool() const { return !empty(); } /// NOLINT void addColumn(ColumnPtr column); void addColumn(size_t position, ColumnPtr column); diff --git a/src/Processors/Executors/PullingAsyncPipelineExecutor.cpp b/src/Processors/Executors/PullingAsyncPipelineExecutor.cpp index 198d5ce5d8d..0f091e73743 100644 --- a/src/Processors/Executors/PullingAsyncPipelineExecutor.cpp +++ b/src/Processors/Executors/PullingAsyncPipelineExecutor.cpp @@ -35,7 +35,7 @@ struct PullingAsyncPipelineExecutor::Data if (has_exception) { has_exception = false; - std::rethrow_exception(std::move(exception)); + std::rethrow_exception(exception); } } }; diff --git a/src/Processors/Executors/PushingAsyncPipelineExecutor.cpp b/src/Processors/Executors/PushingAsyncPipelineExecutor.cpp index 6c2e62b77dc..07cdb554aba 100644 --- a/src/Processors/Executors/PushingAsyncPipelineExecutor.cpp +++ b/src/Processors/Executors/PushingAsyncPipelineExecutor.cpp @@ -90,7 +90,7 @@ struct PushingAsyncPipelineExecutor::Data if (has_exception) { has_exception = false; - std::rethrow_exception(std::move(exception)); + std::rethrow_exception(exception); } } }; diff --git a/src/Processors/Formats/ISchemaReader.h b/src/Processors/Formats/ISchemaReader.h index 67a8eb88d61..2d35809e26a 100644 --- a/src/Processors/Formats/ISchemaReader.h +++ b/src/Processors/Formats/ISchemaReader.h @@ -14,7 +14,7 @@ namespace DB class ISchemaReader { public: - ISchemaReader(ReadBuffer & in_) : in(in_) {} + explicit ISchemaReader(ReadBuffer & in_) : in(in_) {} virtual NamesAndTypesList readSchema() = 0; diff --git a/src/Processors/Formats/Impl/ArrowBlockInputFormat.cpp b/src/Processors/Formats/Impl/ArrowBlockInputFormat.cpp index 558ba9bdd65..cf5cfa681a1 100644 --- a/src/Processors/Formats/Impl/ArrowBlockInputFormat.cpp +++ b/src/Processors/Formats/Impl/ArrowBlockInputFormat.cpp @@ -114,7 +114,7 @@ static std::shared_ptr createFileReader(ReadB if (is_stopped) return nullptr; - auto file_reader_status = arrow::ipc::RecordBatchFileReader::Open(std::move(arrow_file)); + auto file_reader_status = arrow::ipc::RecordBatchFileReader::Open(arrow_file); if (!file_reader_status.ok()) throw Exception(ErrorCodes::UNKNOWN_EXCEPTION, "Error while opening a table: {}", file_reader_status.status().ToString()); diff --git a/src/Processors/Formats/Impl/ArrowBufferedStreams.h b/src/Processors/Formats/Impl/ArrowBufferedStreams.h index d649c52557f..e06eab04f1b 100644 --- a/src/Processors/Formats/Impl/ArrowBufferedStreams.h +++ b/src/Processors/Formats/Impl/ArrowBufferedStreams.h @@ -44,7 +44,7 @@ class RandomAccessFileFromSeekableReadBuffer : public arrow::io::RandomAccessFil public: RandomAccessFileFromSeekableReadBuffer(SeekableReadBuffer & in_, off_t file_size_); - RandomAccessFileFromSeekableReadBuffer(SeekableReadBufferWithSize & in_); + explicit RandomAccessFileFromSeekableReadBuffer(SeekableReadBufferWithSize & in_); arrow::Result GetSize() override; diff --git a/src/Processors/Formats/Impl/ArrowColumnToCHColumn.cpp b/src/Processors/Formats/Impl/ArrowColumnToCHColumn.cpp index 102c30088c9..5c367bb69f0 100644 --- a/src/Processors/Formats/Impl/ArrowColumnToCHColumn.cpp +++ b/src/Processors/Formats/Impl/ArrowColumnToCHColumn.cpp @@ -32,7 +32,6 @@ #include #include - /// UINT16 and UINT32 are processed separately, see comments in readColumnFromArrowColumn. #define FOR_ARROW_NUMERIC_TYPES(M) \ M(arrow::Type::UINT8, DB::UInt8) \ @@ -66,9 +65,9 @@ namespace ErrorCodes extern const int DUPLICATE_COLUMN; extern const int THERE_IS_NO_COLUMN; extern const int UNKNOWN_EXCEPTION; + extern const int INCORRECT_NUMBER_OF_COLUMNS; } - /// Inserts numeric data right into internal column data to reduce an overhead template > static ColumnWithTypeAndName readColumnWithNumericData(std::shared_ptr & arrow_column, const String & column_name) @@ -241,7 +240,7 @@ static ColumnWithTypeAndName readColumnWithDecimalDataImpl(std::shared_ptr(chunk.Value(value_i))); // TODO: copy column } } - return {std::move(internal_column), std::move(internal_type), column_name}; + return {std::move(internal_column), internal_type, column_name}; } template @@ -337,7 +336,7 @@ static ColumnWithTypeAndName readColumnFromArrowColumn( auto nested_column = readColumnFromArrowColumn(arrow_column, column_name, format_name, true, dictionary_values, read_ints_as_dates); auto nullmap_column = readByteMapFromArrowColumn(arrow_column); auto nullable_type = std::make_shared(std::move(nested_column.type)); - auto nullable_column = ColumnNullable::create(std::move(nested_column.column), std::move(nullmap_column)); + auto nullable_column = ColumnNullable::create(nested_column.column, nullmap_column); return {std::move(nullable_column), std::move(nullable_type), column_name}; } @@ -384,7 +383,7 @@ static ColumnWithTypeAndName readColumnFromArrowColumn( const auto * tuple_column = assert_cast(nested_column.column.get()); const auto * tuple_type = assert_cast(nested_column.type.get()); - auto map_column = ColumnMap::create(std::move(tuple_column->getColumnPtr(0)), std::move(tuple_column->getColumnPtr(1)), std::move(offsets_column)); + auto map_column = ColumnMap::create(tuple_column->getColumnPtr(0), tuple_column->getColumnPtr(1), offsets_column); auto map_type = std::make_shared(tuple_type->getElements()[0], tuple_type->getElements()[1]); return {std::move(map_column), std::move(map_type), column_name}; } @@ -393,7 +392,7 @@ static ColumnWithTypeAndName readColumnFromArrowColumn( auto arrow_nested_column = getNestedArrowColumn(arrow_column); auto nested_column = readColumnFromArrowColumn(arrow_nested_column, column_name, format_name, false, dictionary_values, read_ints_as_dates); auto offsets_column = readOffsetsFromArrowListColumn(arrow_column); - auto array_column = ColumnArray::create(std::move(nested_column.column), std::move(offsets_column)); + auto array_column = ColumnArray::create(nested_column.column, offsets_column); auto array_type = std::make_shared(nested_column.type); return {std::move(array_column), std::move(array_type), column_name}; } @@ -458,7 +457,7 @@ static ColumnWithTypeAndName readColumnFromArrowColumn( auto arrow_indexes_column = std::make_shared(indexes_array); auto indexes_column = readColumnWithIndexesData(arrow_indexes_column); - auto lc_column = ColumnLowCardinality::create(dict_values->column, std::move(indexes_column)); + auto lc_column = ColumnLowCardinality::create(dict_values->column, indexes_column); auto lc_type = std::make_shared(dict_values->type); return {std::move(lc_column), std::move(lc_type), column_name}; } @@ -532,6 +531,9 @@ void ArrowColumnToCHColumn::arrowTableToCHChunk(Chunk & res, std::shared_ptrsecond->length(); columns_list.reserve(header.rows()); diff --git a/src/Processors/Formats/Impl/AvroRowInputFormat.h b/src/Processors/Formats/Impl/AvroRowInputFormat.h index 1e8ee4aebb9..7a598de1f6a 100644 --- a/src/Processors/Formats/Impl/AvroRowInputFormat.h +++ b/src/Processors/Formats/Impl/AvroRowInputFormat.h @@ -61,7 +61,7 @@ private: , target_column_idx(target_column_idx_) , deserialize_fn(deserialize_fn_) {} - Action(SkipFn skip_fn_) + explicit Action(SkipFn skip_fn_) : type(Skip) , skip_fn(skip_fn_) {} diff --git a/src/Processors/Formats/Impl/BinaryRowInputFormat.cpp b/src/Processors/Formats/Impl/BinaryRowInputFormat.cpp index bb202a3e177..6918220feb4 100644 --- a/src/Processors/Formats/Impl/BinaryRowInputFormat.cpp +++ b/src/Processors/Formats/Impl/BinaryRowInputFormat.cpp @@ -15,9 +15,9 @@ namespace ErrorCodes BinaryRowInputFormat::BinaryRowInputFormat(ReadBuffer & in_, Block header, Params params_, bool with_names_, bool with_types_, const FormatSettings & format_settings_) : RowInputFormatWithNamesAndTypes( - std::move(header), + header, in_, - std::move(params_), + params_, with_names_, with_types_, format_settings_, diff --git a/src/Processors/Formats/Impl/CHColumnToArrowColumn.cpp b/src/Processors/Formats/Impl/CHColumnToArrowColumn.cpp index b56a9c2729f..043e4f1e724 100644 --- a/src/Processors/Formats/Impl/CHColumnToArrowColumn.cpp +++ b/src/Processors/Formats/Impl/CHColumnToArrowColumn.cpp @@ -658,7 +658,7 @@ namespace DB auto nested_arrow_type = getArrowType(nested_types[i], tuple_column->getColumnPtr(i), name, format_name, out_is_column_nullable); nested_fields.push_back(std::make_shared(name, nested_arrow_type, *out_is_column_nullable)); } - return arrow::struct_(std::move(nested_fields)); + return arrow::struct_(nested_fields); } if (column_type->lowCardinality()) diff --git a/src/Processors/Formats/Impl/CapnProtoRowOutputFormat.cpp b/src/Processors/Formats/Impl/CapnProtoRowOutputFormat.cpp index 58f88c5c7cf..fd33abfb587 100644 --- a/src/Processors/Formats/Impl/CapnProtoRowOutputFormat.cpp +++ b/src/Processors/Formats/Impl/CapnProtoRowOutputFormat.cpp @@ -169,7 +169,7 @@ static std::optional convertToDynamicValue( auto value_builder = initStructFieldBuilder(nested_column, row_num, struct_builder, value_field); auto value = convertToDynamicValue(nested_column, nullable_type->getNestedType(), row_num, value_builder, enum_comparing_mode, temporary_text_data_storage); if (value) - struct_builder.set(value_field, std::move(*value)); + struct_builder.set(value_field, *value); } } else @@ -184,7 +184,7 @@ static std::optional convertToDynamicValue( = initStructFieldBuilder(nested_columns[pos], row_num, struct_builder, nested_struct_schema.getFieldByName(name)); auto value = convertToDynamicValue(nested_columns[pos], nested_types[pos], row_num, field_builder, enum_comparing_mode, temporary_text_data_storage); if (value) - struct_builder.set(name, std::move(*value)); + struct_builder.set(name, *value); } } return std::nullopt; @@ -215,7 +215,7 @@ static std::optional convertToDynamicValue( auto value = convertToDynamicValue(nested_column, nested_type, offset + i, value_builder, enum_comparing_mode, temporary_text_data_storage); if (value) - list_builder.set(i, std::move(*value)); + list_builder.set(i, *value); } return std::nullopt; } diff --git a/src/Processors/Formats/Impl/CapnProtoRowOutputFormat.h b/src/Processors/Formats/Impl/CapnProtoRowOutputFormat.h index 288b36508ce..12dc5eda2b3 100644 --- a/src/Processors/Formats/Impl/CapnProtoRowOutputFormat.h +++ b/src/Processors/Formats/Impl/CapnProtoRowOutputFormat.h @@ -15,7 +15,7 @@ namespace DB class CapnProtoOutputStream : public kj::OutputStream { public: - CapnProtoOutputStream(WriteBuffer & out_); + explicit CapnProtoOutputStream(WriteBuffer & out_); void write(const void * buffer, size_t size) override; diff --git a/src/Processors/Formats/Impl/ConstantExpressionTemplate.h b/src/Processors/Formats/Impl/ConstantExpressionTemplate.h index 6659243df63..c5d4f033258 100644 --- a/src/Processors/Formats/Impl/ConstantExpressionTemplate.h +++ b/src/Processors/Formats/Impl/ConstantExpressionTemplate.h @@ -3,6 +3,8 @@ #include #include #include +#include +#include namespace DB { diff --git a/src/Processors/Formats/Impl/JSONCompactEachRowRowInputFormat.cpp b/src/Processors/Formats/Impl/JSONCompactEachRowRowInputFormat.cpp index dcab55743cb..c087749d8d8 100644 --- a/src/Processors/Formats/Impl/JSONCompactEachRowRowInputFormat.cpp +++ b/src/Processors/Formats/Impl/JSONCompactEachRowRowInputFormat.cpp @@ -27,7 +27,7 @@ JSONCompactEachRowRowInputFormat::JSONCompactEachRowRowInputFormat( : RowInputFormatWithNamesAndTypes( header_, in_, - std::move(params_), + params_, with_names_, with_types_, format_settings_, diff --git a/src/Processors/Formats/Impl/JSONCompactEachRowRowOutputFormat.cpp b/src/Processors/Formats/Impl/JSONCompactEachRowRowOutputFormat.cpp index c4645e0d63d..4a2c4209acf 100644 --- a/src/Processors/Formats/Impl/JSONCompactEachRowRowOutputFormat.cpp +++ b/src/Processors/Formats/Impl/JSONCompactEachRowRowOutputFormat.cpp @@ -55,9 +55,9 @@ void JSONCompactEachRowRowOutputFormat::writeRowEndDelimiter() void JSONCompactEachRowRowOutputFormat::writeTotals(const Columns & columns, size_t row_num) { writeChar('\n', out); - size_t num_columns = columns.size(); + size_t columns_size = columns.size(); writeRowStartDelimiter(); - for (size_t i = 0; i < num_columns; ++i) + for (size_t i = 0; i < columns_size; ++i) { if (i != 0) writeFieldDelimiter(); diff --git a/src/Processors/Formats/Impl/JSONRowOutputFormat.cpp b/src/Processors/Formats/Impl/JSONRowOutputFormat.cpp index 8130b2b4cb1..61ac25ca441 100644 --- a/src/Processors/Formats/Impl/JSONRowOutputFormat.cpp +++ b/src/Processors/Formats/Impl/JSONRowOutputFormat.cpp @@ -154,9 +154,9 @@ void JSONRowOutputFormat::writeBeforeTotals() void JSONRowOutputFormat::writeTotals(const Columns & columns, size_t row_num) { - size_t num_columns = columns.size(); + size_t columns_size = columns.size(); - for (size_t i = 0; i < num_columns; ++i) + for (size_t i = 0; i < columns_size; ++i) { if (i != 0) writeTotalsFieldDelimiter(); diff --git a/src/Processors/Formats/Impl/MsgPackRowInputFormat.cpp b/src/Processors/Formats/Impl/MsgPackRowInputFormat.cpp index 56fc5d7857b..607e6f36767 100644 --- a/src/Processors/Formats/Impl/MsgPackRowInputFormat.cpp +++ b/src/Processors/Formats/Impl/MsgPackRowInputFormat.cpp @@ -353,7 +353,7 @@ bool MsgPackVisitor::visit_nil() bool MsgPackVisitor::visit_ext(const char * value, uint32_t size) { int8_t type = *value; - if (*value == int8_t(MsgPackExtensionTypes::UUID)) + if (*value == int8_t(MsgPackExtensionTypes::UUIDType)) { insertUUID(info_stack.top().column, info_stack.top().type, value + 1, size - 1); return true; @@ -496,11 +496,12 @@ DataTypePtr MsgPackSchemaReader::getDataType(const msgpack::object & object) case msgpack::type::object_type::EXT: { msgpack::object_ext object_ext = object.via.ext; - if (object_ext.type() == int8_t(MsgPackExtensionTypes::UUID)) + if (object_ext.type() == int8_t(MsgPackExtensionTypes::UUIDType)) return std::make_shared(); throw Exception(ErrorCodes::BAD_ARGUMENTS, "Msgpack extension type {%x} is not supported", object_ext.type()); } } + __builtin_unreachable(); } DataTypes MsgPackSchemaReader::readRowAndGetDataTypes() diff --git a/src/Processors/Formats/Impl/MsgPackRowOutputFormat.cpp b/src/Processors/Formats/Impl/MsgPackRowOutputFormat.cpp index edec9774b5f..e53aafb4e56 100644 --- a/src/Processors/Formats/Impl/MsgPackRowOutputFormat.cpp +++ b/src/Processors/Formats/Impl/MsgPackRowOutputFormat.cpp @@ -199,7 +199,7 @@ void MsgPackRowOutputFormat::serializeField(const IColumn & column, DataTypePtr writeBinaryBigEndian(value.toUnderType().items[0], buf); writeBinaryBigEndian(value.toUnderType().items[1], buf); StringRef uuid_ext = buf.stringRef(); - packer.pack_ext(sizeof(UUID), int8_t(MsgPackExtensionTypes::UUID)); + packer.pack_ext(sizeof(UUID), int8_t(MsgPackExtensionTypes::UUIDType)); packer.pack_ext_body(uuid_ext.data, uuid_ext.size); return; } @@ -213,8 +213,8 @@ void MsgPackRowOutputFormat::serializeField(const IColumn & column, DataTypePtr void MsgPackRowOutputFormat::write(const Columns & columns, size_t row_num) { - size_t num_columns = columns.size(); - for (size_t i = 0; i < num_columns; ++i) + size_t columns_size = columns.size(); + for (size_t i = 0; i < columns_size; ++i) { serializeField(*columns[i], types[i], row_num); } diff --git a/src/Processors/Formats/Impl/ORCBlockInputFormat.cpp b/src/Processors/Formats/Impl/ORCBlockInputFormat.cpp index 61511d634d3..aa9f7874ae8 100644 --- a/src/Processors/Formats/Impl/ORCBlockInputFormat.cpp +++ b/src/Processors/Formats/Impl/ORCBlockInputFormat.cpp @@ -1,4 +1,5 @@ #include "ORCBlockInputFormat.h" +#include #if USE_ORC #include @@ -52,6 +53,9 @@ Chunk ORCBlockInputFormat::generate() if (!table || !table->num_rows()) return res; + if (format_settings.use_lowercase_column_name) + table = *table->RenameColumns(include_column_names); + arrow_column_to_ch_column->arrowTableToCHChunk(res, table); /// If defaults_for_omitted_fields is true, calculate the default values from default expression for omitted fields. /// Otherwise fill the missing columns with zero values of its type. @@ -69,6 +73,7 @@ void ORCBlockInputFormat::resetParser() file_reader.reset(); include_indices.clear(); + include_column_names.clear(); block_missing_values.clear(); } @@ -111,7 +116,7 @@ static void getFileReaderAndSchema( if (is_stopped) return; - auto result = arrow::adapters::orc::ORCFileReader::Open(std::move(arrow_file), arrow::default_memory_pool()); + auto result = arrow::adapters::orc::ORCFileReader::Open(arrow_file, arrow::default_memory_pool()); if (!result.ok()) throw Exception(result.status().ToString(), ErrorCodes::BAD_ARGUMENTS); file_reader = std::move(result).ValueOrDie(); @@ -120,6 +125,20 @@ static void getFileReaderAndSchema( if (!read_schema_result.ok()) throw Exception(read_schema_result.status().ToString(), ErrorCodes::BAD_ARGUMENTS); schema = std::move(read_schema_result).ValueOrDie(); + + if (format_settings.use_lowercase_column_name) + { + std::vector> fields; + fields.reserve(schema->num_fields()); + for (int i = 0; i < schema->num_fields(); ++i) + { + const auto& field = schema->field(i); + auto name = field->name(); + boost::to_lower(name); + fields.push_back(field->WithName(name)); + } + schema = arrow::schema(fields, schema->metadata()); + } } void ORCBlockInputFormat::prepareReader() @@ -148,9 +167,11 @@ void ORCBlockInputFormat::prepareReader() const auto & name = schema->field(i)->name(); if (getPort().getHeader().has(name) || nested_table_names.contains(name)) { - column_names.push_back(name); for (int j = 0; j != indexes_count; ++j) + { include_indices.push_back(index + j); + include_column_names.push_back(name); + } } index += indexes_count; } diff --git a/src/Processors/Formats/Impl/ORCBlockInputFormat.h b/src/Processors/Formats/Impl/ORCBlockInputFormat.h index bb136d02d6e..bd2151d78ff 100644 --- a/src/Processors/Formats/Impl/ORCBlockInputFormat.h +++ b/src/Processors/Formats/Impl/ORCBlockInputFormat.h @@ -45,10 +45,9 @@ private: std::unique_ptr arrow_column_to_ch_column; - std::vector column_names; - // indices of columns to read from ORC file std::vector include_indices; + std::vector include_column_names; std::vector missing_columns; BlockMissingValues block_missing_values; diff --git a/src/Processors/Formats/Impl/ORCBlockOutputFormat.h b/src/Processors/Formats/Impl/ORCBlockOutputFormat.h index 2ffee597e8f..f69fd1c0aab 100644 --- a/src/Processors/Formats/Impl/ORCBlockOutputFormat.h +++ b/src/Processors/Formats/Impl/ORCBlockOutputFormat.h @@ -17,7 +17,7 @@ class WriteBuffer; class ORCOutputStream : public orc::OutputStream { public: - ORCOutputStream(WriteBuffer & out_); + explicit ORCOutputStream(WriteBuffer & out_); uint64_t getLength() const override; uint64_t getNaturalWriteSize() const override; diff --git a/src/Processors/Formats/Impl/ParquetBlockInputFormat.cpp b/src/Processors/Formats/Impl/ParquetBlockInputFormat.cpp index 3f0d9980573..548bf0138f5 100644 --- a/src/Processors/Formats/Impl/ParquetBlockInputFormat.cpp +++ b/src/Processors/Formats/Impl/ParquetBlockInputFormat.cpp @@ -1,4 +1,6 @@ #include "ParquetBlockInputFormat.h" +#include + #if USE_PARQUET #include @@ -13,9 +15,6 @@ #include "ArrowColumnToCHColumn.h" #include -#include - - namespace DB { @@ -57,6 +56,9 @@ Chunk ParquetBlockInputFormat::generate() throw ParsingException{"Error while reading Parquet data: " + read_status.ToString(), ErrorCodes::CANNOT_READ_ALL_DATA}; + if (format_settings.use_lowercase_column_name) + table = *table->RenameColumns(column_names); + ++row_group_current; arrow_column_to_ch_column->arrowTableToCHChunk(res, table); @@ -76,6 +78,7 @@ void ParquetBlockInputFormat::resetParser() file_reader.reset(); column_indices.clear(); + column_names.clear(); row_group_current = 0; block_missing_values.clear(); } @@ -120,6 +123,20 @@ static void getFileReaderAndSchema( return; THROW_ARROW_NOT_OK(parquet::arrow::OpenFile(std::move(arrow_file), arrow::default_memory_pool(), &file_reader)); THROW_ARROW_NOT_OK(file_reader->GetSchema(&schema)); + + if (format_settings.use_lowercase_column_name) + { + std::vector> fields; + fields.reserve(schema->num_fields()); + for (int i = 0; i < schema->num_fields(); ++i) + { + const auto& field = schema->field(i); + auto name = field->name(); + boost::to_lower(name); + fields.push_back(field->WithName(name)); + } + schema = arrow::schema(fields, schema->metadata()); + } } void ParquetBlockInputFormat::prepareReader() @@ -150,7 +167,10 @@ void ParquetBlockInputFormat::prepareReader() if (getPort().getHeader().has(name) || nested_table_names.contains(name)) { for (int j = 0; j != indexes_count; ++j) + { column_indices.push_back(index + j); + column_names.push_back(name); + } } index += indexes_count; } diff --git a/src/Processors/Formats/Impl/ParquetBlockInputFormat.h b/src/Processors/Formats/Impl/ParquetBlockInputFormat.h index 1faadaa3d21..eba9aac29f2 100644 --- a/src/Processors/Formats/Impl/ParquetBlockInputFormat.h +++ b/src/Processors/Formats/Impl/ParquetBlockInputFormat.h @@ -40,6 +40,7 @@ private: int row_group_total = 0; // indices of columns to read from Parquet file std::vector column_indices; + std::vector column_names; std::unique_ptr arrow_column_to_ch_column; int row_group_current = 0; std::vector missing_columns; diff --git a/src/Processors/Formats/Impl/RegexpRowInputFormat.h b/src/Processors/Formats/Impl/RegexpRowInputFormat.h index 75c630d0607..04f24bbb3e4 100644 --- a/src/Processors/Formats/Impl/RegexpRowInputFormat.h +++ b/src/Processors/Formats/Impl/RegexpRowInputFormat.h @@ -22,7 +22,7 @@ class ReadBuffer; class RegexpFieldExtractor { public: - RegexpFieldExtractor(const FormatSettings & format_settings); + explicit RegexpFieldExtractor(const FormatSettings & format_settings); /// Return true if row was successfully parsed and row fields were extracted. bool parseRow(PeekableReadBuffer & buf); diff --git a/src/Processors/Formats/Impl/TabSeparatedRowInputFormat.h b/src/Processors/Formats/Impl/TabSeparatedRowInputFormat.h index ed67a8256bc..abab5b02c96 100644 --- a/src/Processors/Formats/Impl/TabSeparatedRowInputFormat.h +++ b/src/Processors/Formats/Impl/TabSeparatedRowInputFormat.h @@ -53,7 +53,7 @@ public: bool parseFieldDelimiterWithDiagnosticInfo(WriteBuffer & out) override; bool parseRowEndWithDiagnosticInfo(WriteBuffer & out) override; - FormatSettings::EscapingRule getEscapingRule() + FormatSettings::EscapingRule getEscapingRule() const { return is_raw ? FormatSettings::EscapingRule::Raw : FormatSettings::EscapingRule::Escaped; } diff --git a/src/Processors/Formats/Impl/VerticalRowOutputFormat.cpp b/src/Processors/Formats/Impl/VerticalRowOutputFormat.cpp index 0905e4243cd..468770e2515 100644 --- a/src/Processors/Formats/Impl/VerticalRowOutputFormat.cpp +++ b/src/Processors/Formats/Impl/VerticalRowOutputFormat.cpp @@ -141,7 +141,7 @@ void VerticalRowOutputFormat::writeSpecialRow(const Columns & columns, size_t ro row_number = 0; field_number = 0; - size_t num_columns = columns.size(); + size_t columns_size = columns.size(); writeCString(title, out); writeCString(":\n", out); @@ -151,7 +151,7 @@ void VerticalRowOutputFormat::writeSpecialRow(const Columns & columns, size_t ro writeCString("─", out); writeChar('\n', out); - for (size_t i = 0; i < num_columns; ++i) + for (size_t i = 0; i < columns_size; ++i) writeField(*columns[i], *serializations[i], row_num); } diff --git a/src/Processors/Merges/Algorithms/FixedSizeDequeWithGaps.h b/src/Processors/Merges/Algorithms/FixedSizeDequeWithGaps.h index 35cfded4214..ff8f113d9a6 100644 --- a/src/Processors/Merges/Algorithms/FixedSizeDequeWithGaps.h +++ b/src/Processors/Merges/Algorithms/FixedSizeDequeWithGaps.h @@ -1,5 +1,7 @@ #pragma once +#include + namespace DB { diff --git a/src/Processors/Merges/Algorithms/Graphite.cpp b/src/Processors/Merges/Algorithms/Graphite.cpp index 2c6d08ed287..c0f595fa539 100644 --- a/src/Processors/Merges/Algorithms/Graphite.cpp +++ b/src/Processors/Merges/Algorithms/Graphite.cpp @@ -89,7 +89,7 @@ inline static const Patterns & selectPatternsForMetricType(const Graphite::Param Graphite::RollupRule selectPatternForPath( const Graphite::Params & params, - const StringRef path) + StringRef path) { const Graphite::Pattern * first_match = &undef_pattern; diff --git a/src/Processors/Merges/Algorithms/Graphite.h b/src/Processors/Merges/Algorithms/Graphite.h index dc39cb46386..05306ebe30f 100644 --- a/src/Processors/Merges/Algorithms/Graphite.h +++ b/src/Processors/Merges/Algorithms/Graphite.h @@ -147,7 +147,7 @@ struct Params using RollupRule = std::pair; -Graphite::RollupRule selectPatternForPath(const Graphite::Params & params, const StringRef path); +Graphite::RollupRule selectPatternForPath(const Graphite::Params & params, StringRef path); void setGraphitePatternsFromConfig(ContextPtr context, const String & config_element, Graphite::Params & params); diff --git a/src/Processors/Merges/Algorithms/MergedData.h b/src/Processors/Merges/Algorithms/MergedData.h index 9bf33d72f31..89da346980d 100644 --- a/src/Processors/Merges/Algorithms/MergedData.h +++ b/src/Processors/Merges/Algorithms/MergedData.h @@ -1,5 +1,11 @@ #pragma once +#include +#include +#include +#include + + namespace DB { diff --git a/src/Processors/Merges/Algorithms/SummingSortedAlgorithm.cpp b/src/Processors/Merges/Algorithms/SummingSortedAlgorithm.cpp index 72ad4616174..0247b8677af 100644 --- a/src/Processors/Merges/Algorithms/SummingSortedAlgorithm.cpp +++ b/src/Processors/Merges/Algorithms/SummingSortedAlgorithm.cpp @@ -457,7 +457,7 @@ static void postprocessChunk( { const auto & from_type = desc.nested_type; const auto & to_type = desc.real_type; - res_columns[desc.column_numbers[0]] = recursiveTypeConversion(std::move(column), from_type, to_type); + res_columns[desc.column_numbers[0]] = recursiveTypeConversion(column, from_type, to_type); } else res_columns[desc.column_numbers[0]] = std::move(column); diff --git a/src/Processors/Port.h b/src/Processors/Port.h index 9f27b440be5..7cb25f3930e 100644 --- a/src/Processors/Port.h +++ b/src/Processors/Port.h @@ -214,7 +214,7 @@ protected: public: using Data = State::Data; - Port(Block header_) : header(std::move(header_)) {} + Port(Block header_) : header(std::move(header_)) {} /// NOLINT Port(Block header_, IProcessor * processor_) : header(std::move(header_)), processor(processor_) {} void setUpdateInfo(UpdateInfo * info) { update_info = info; } @@ -303,12 +303,12 @@ public: Chunk ALWAYS_INLINE pull(bool set_not_needed = false) { - auto data_ = pullData(set_not_needed); + auto pull_data = pullData(set_not_needed); - if (data_.exception) - std::rethrow_exception(data_.exception); + if (pull_data.exception) + std::rethrow_exception(pull_data.exception); - return std::move(data_.chunk); + return std::move(pull_data.chunk); } bool ALWAYS_INLINE isFinished() const @@ -396,7 +396,7 @@ public: void ALWAYS_INLINE pushException(std::exception_ptr exception) { - pushData({.chunk = {}, .exception = std::move(exception)}); + pushData({.chunk = {}, .exception = exception}); } void ALWAYS_INLINE pushData(Data data_) diff --git a/src/Processors/QueryPlan/CreatingSetsStep.cpp b/src/Processors/QueryPlan/CreatingSetsStep.cpp index 45c3719ebca..6b6f9d361ef 100644 --- a/src/Processors/QueryPlan/CreatingSetsStep.cpp +++ b/src/Processors/QueryPlan/CreatingSetsStep.cpp @@ -138,7 +138,7 @@ void addCreatingSetsStep( auto creating_set = std::make_unique( plan->getCurrentDataStream(), - std::move(description), + description, std::move(set), limits, context); diff --git a/src/Processors/QueryPlan/QueryPlan.cpp b/src/Processors/QueryPlan/QueryPlan.cpp index a271ef78dfa..d948c16a78d 100644 --- a/src/Processors/QueryPlan/QueryPlan.cpp +++ b/src/Processors/QueryPlan/QueryPlan.cpp @@ -22,8 +22,8 @@ namespace ErrorCodes QueryPlan::QueryPlan() = default; QueryPlan::~QueryPlan() = default; -QueryPlan::QueryPlan(QueryPlan &&) = default; -QueryPlan & QueryPlan::operator=(QueryPlan &&) = default; +QueryPlan::QueryPlan(QueryPlan &&) noexcept = default; +QueryPlan & QueryPlan::operator=(QueryPlan &&) noexcept = default; void QueryPlan::checkInitialized() const { diff --git a/src/Processors/QueryPlan/QueryPlan.h b/src/Processors/QueryPlan/QueryPlan.h index 4e342d746d1..5e064713abd 100644 --- a/src/Processors/QueryPlan/QueryPlan.h +++ b/src/Processors/QueryPlan/QueryPlan.h @@ -44,8 +44,8 @@ class QueryPlan public: QueryPlan(); ~QueryPlan(); - QueryPlan(QueryPlan &&); - QueryPlan & operator=(QueryPlan &&); + QueryPlan(QueryPlan &&) noexcept; + QueryPlan & operator=(QueryPlan &&) noexcept; void unitePlans(QueryPlanStepPtr step, std::vector plans); void addStep(QueryPlanStepPtr step); diff --git a/src/Processors/QueryPlan/ReadFromMergeTree.cpp b/src/Processors/QueryPlan/ReadFromMergeTree.cpp index 9a9a71f9688..ad4d1ea86d6 100644 --- a/src/Processors/QueryPlan/ReadFromMergeTree.cpp +++ b/src/Processors/QueryPlan/ReadFromMergeTree.cpp @@ -982,7 +982,7 @@ ReadFromMergeTree::AnalysisResult ReadFromMergeTree::getAnalysisResult() const { auto result_ptr = analyzed_result_ptr ? analyzed_result_ptr : selectRangesToRead(prepared_parts); if (std::holds_alternative(result_ptr->result)) - std::rethrow_exception(std::move(std::get(result_ptr->result))); + std::rethrow_exception(std::get(result_ptr->result)); return std::get(result_ptr->result); } @@ -1326,7 +1326,7 @@ bool MergeTreeDataSelectAnalysisResult::error() const size_t MergeTreeDataSelectAnalysisResult::marks() const { if (std::holds_alternative(result)) - std::rethrow_exception(std::move(std::get(result))); + std::rethrow_exception(std::get(result)); const auto & index_stats = std::get(result).index_stats; if (index_stats.empty()) diff --git a/src/Processors/QueueBuffer.h b/src/Processors/QueueBuffer.h index 826f4a22b8b..6856e214823 100644 --- a/src/Processors/QueueBuffer.h +++ b/src/Processors/QueueBuffer.h @@ -17,7 +17,7 @@ private: public: String getName() const override { return "QueueBuffer"; } - QueueBuffer(Block header) + explicit QueueBuffer(Block header) : IAccumulatingTransform(header, header) { } diff --git a/src/Processors/Sources/DelayedSource.cpp b/src/Processors/Sources/DelayedSource.cpp index 205ea6e2253..6cfdeeeeec5 100644 --- a/src/Processors/Sources/DelayedSource.cpp +++ b/src/Processors/Sources/DelayedSource.cpp @@ -64,7 +64,7 @@ IProcessor::Status DelayedSource::prepare() continue; } - if (!output->isNeeded()) + if (!output->canPush()) return Status::PortFull; if (input->isFinished()) diff --git a/src/Processors/Sources/MySQLSource.cpp b/src/Processors/Sources/MySQLSource.cpp index 538aba9d1f3..a9b408064d9 100644 --- a/src/Processors/Sources/MySQLSource.cpp +++ b/src/Processors/Sources/MySQLSource.cpp @@ -225,6 +225,10 @@ namespace assert_cast(column).insertValue(UInt16(value.getDate().getDayNum())); read_bytes_size += 2; break; + case ValueType::vtDate32: + assert_cast(column).insertValue(Int32(value.getDate().getExtenedDayNum())); + read_bytes_size += 4; + break; case ValueType::vtDateTime: { ReadBufferFromString in(value); diff --git a/src/Processors/Transforms/AggregatingInOrderTransform.h b/src/Processors/Transforms/AggregatingInOrderTransform.h index 929ab98d6e6..e4c217a8f81 100644 --- a/src/Processors/Transforms/AggregatingInOrderTransform.h +++ b/src/Processors/Transforms/AggregatingInOrderTransform.h @@ -11,7 +11,7 @@ namespace DB struct ChunkInfoWithAllocatedBytes : public ChunkInfo { - ChunkInfoWithAllocatedBytes(Int64 allocated_bytes_) + explicit ChunkInfoWithAllocatedBytes(Int64 allocated_bytes_) : allocated_bytes(allocated_bytes_) {} Int64 allocated_bytes; }; diff --git a/src/Processors/Transforms/AggregatingTransform.h b/src/Processors/Transforms/AggregatingTransform.h index 01df264005b..d7917fc95a7 100644 --- a/src/Processors/Transforms/AggregatingTransform.h +++ b/src/Processors/Transforms/AggregatingTransform.h @@ -12,7 +12,7 @@ class AggregatedArenasChunkInfo : public ChunkInfo { public: Arenas arenas; - AggregatedArenasChunkInfo(Arenas arenas_) + explicit AggregatedArenasChunkInfo(Arenas arenas_) : arenas(std::move(arenas_)) {} }; diff --git a/src/Processors/Transforms/ColumnGathererTransform.h b/src/Processors/Transforms/ColumnGathererTransform.h index 2d013e596ce..da6dc877abf 100644 --- a/src/Processors/Transforms/ColumnGathererTransform.h +++ b/src/Processors/Transforms/ColumnGathererTransform.h @@ -20,7 +20,7 @@ struct RowSourcePart RowSourcePart() = default; - RowSourcePart(size_t source_num, bool skip_flag = false) + explicit RowSourcePart(size_t source_num, bool skip_flag = false) { static_assert(sizeof(*this) == 1, "Size of RowSourcePart is too big due to compiler settings"); setSourceNum(source_num); diff --git a/src/Processors/Transforms/DistinctSortedTransform.cpp b/src/Processors/Transforms/DistinctSortedTransform.cpp index 01cef654388..5600476fd77 100644 --- a/src/Processors/Transforms/DistinctSortedTransform.cpp +++ b/src/Processors/Transforms/DistinctSortedTransform.cpp @@ -24,7 +24,7 @@ void DistinctSortedTransform::transform(Chunk & chunk) if (column_ptrs.empty()) return; - const ColumnRawPtrs clearing_hint_columns(getClearingColumns(chunk, column_ptrs)); + ColumnRawPtrs clearing_hint_columns(getClearingColumns(chunk, column_ptrs)); if (data.type == ClearableSetVariants::Type::EMPTY) data.init(ClearableSetVariants::chooseMethod(column_ptrs, key_sizes)); diff --git a/src/Processors/Transforms/ExceptionKeepingTransform.cpp b/src/Processors/Transforms/ExceptionKeepingTransform.cpp index f2b29a45f84..266407f21a5 100644 --- a/src/Processors/Transforms/ExceptionKeepingTransform.cpp +++ b/src/Processors/Transforms/ExceptionKeepingTransform.cpp @@ -138,7 +138,7 @@ void ExceptionKeepingTransform::work() { stage = Stage::Exception; ready_output = true; - data.exception = std::move(exception); + data.exception = exception; onException(); } } @@ -152,7 +152,7 @@ void ExceptionKeepingTransform::work() { stage = Stage::Exception; ready_output = true; - data.exception = std::move(exception); + data.exception = exception; onException(); } else @@ -166,7 +166,7 @@ void ExceptionKeepingTransform::work() { stage = Stage::Exception; ready_output = true; - data.exception = std::move(exception); + data.exception = exception; onException(); } else @@ -188,7 +188,7 @@ void ExceptionKeepingTransform::work() { stage = Stage::Exception; ready_output = true; - data.exception = std::move(exception); + data.exception = exception; onException(); } } diff --git a/src/Processors/Transforms/MergingAggregatedMemoryEfficientTransform.cpp b/src/Processors/Transforms/MergingAggregatedMemoryEfficientTransform.cpp index d01a809e666..34e5231c626 100644 --- a/src/Processors/Transforms/MergingAggregatedMemoryEfficientTransform.cpp +++ b/src/Processors/Transforms/MergingAggregatedMemoryEfficientTransform.cpp @@ -266,7 +266,7 @@ void GroupingAggregatedTransform::addChunk(Chunk chunk, size_t input) last_bucket_number[input] = bucket; } } - else if (const auto * in_order_info = typeid_cast(info.get())) + else if (typeid_cast(info.get())) { single_level_chunks.emplace_back(std::move(chunk)); } @@ -334,7 +334,7 @@ void MergingAggregatedBucketTransform::transform(Chunk & chunk) blocks_list.emplace_back(std::move(block)); } - else if (const auto * in_order_info = typeid_cast(cur_info.get())) + else if (typeid_cast(cur_info.get())) { Block block = header.cloneWithColumns(cur_chunk.detachColumns()); block.info.is_overflows = false; diff --git a/src/Processors/Transforms/MergingAggregatedTransform.cpp b/src/Processors/Transforms/MergingAggregatedTransform.cpp index dd2b315d53c..11d32278caf 100644 --- a/src/Processors/Transforms/MergingAggregatedTransform.cpp +++ b/src/Processors/Transforms/MergingAggregatedTransform.cpp @@ -49,7 +49,7 @@ void MergingAggregatedTransform::consume(Chunk chunk) bucket_to_blocks[agg_info->bucket_num].emplace_back(std::move(block)); } - else if (const auto * in_order_info = typeid_cast(info.get())) + else if (typeid_cast(info.get())) { auto block = getInputPort().getHeader().cloneWithColumns(chunk.getColumns()); block.info.is_overflows = false; diff --git a/src/Processors/Transforms/PostgreSQLSource.cpp b/src/Processors/Transforms/PostgreSQLSource.cpp index 88f092a2533..a31cd879257 100644 --- a/src/Processors/Transforms/PostgreSQLSource.cpp +++ b/src/Processors/Transforms/PostgreSQLSource.cpp @@ -28,7 +28,7 @@ PostgreSQLSource::PostgreSQLSource( postgres::ConnectionHolderPtr connection_holder_, const std::string & query_str_, const Block & sample_block, - const UInt64 max_block_size_) + UInt64 max_block_size_) : SourceWithProgress(sample_block.cloneEmpty()) , query_str(query_str_) , max_block_size(max_block_size_) @@ -43,7 +43,7 @@ PostgreSQLSource::PostgreSQLSource( std::shared_ptr tx_, const std::string & query_str_, const Block & sample_block, - const UInt64 max_block_size_, + UInt64 max_block_size_, bool auto_commit_) : SourceWithProgress(sample_block.cloneEmpty()) , query_str(query_str_) diff --git a/src/Processors/Transforms/PostgreSQLSource.h b/src/Processors/Transforms/PostgreSQLSource.h index c7e55c09c32..bd6203042bb 100644 --- a/src/Processors/Transforms/PostgreSQLSource.h +++ b/src/Processors/Transforms/PostgreSQLSource.h @@ -24,7 +24,7 @@ public: postgres::ConnectionHolderPtr connection_holder_, const String & query_str_, const Block & sample_block, - const UInt64 max_block_size_); + UInt64 max_block_size_); String getName() const override { return "PostgreSQL"; } @@ -33,7 +33,7 @@ protected: std::shared_ptr tx_, const std::string & query_str_, const Block & sample_block, - const UInt64 max_block_size_, + UInt64 max_block_size_, bool auto_commit_); String query_str; diff --git a/src/Processors/Transforms/TotalsHavingTransform.cpp b/src/Processors/Transforms/TotalsHavingTransform.cpp index 0b7797da24f..45e972afa3f 100644 --- a/src/Processors/Transforms/TotalsHavingTransform.cpp +++ b/src/Processors/Transforms/TotalsHavingTransform.cpp @@ -138,7 +138,7 @@ IProcessor::Status TotalsHavingTransform::prepare() if (!totals_output.canPush()) return Status::PortFull; - if (!totals) + if (!total_prepared) return Status::Ready; totals_output.push(std::move(totals)); @@ -312,6 +312,8 @@ void TotalsHavingTransform::prepareTotals() /// Note: after expression totals may have several rows if `arrayJoin` was used in expression. totals = Chunk(block.getColumns(), num_rows); } + + total_prepared = true; } } diff --git a/src/Processors/Transforms/TotalsHavingTransform.h b/src/Processors/Transforms/TotalsHavingTransform.h index 03635054c65..6b4afb2fa8b 100644 --- a/src/Processors/Transforms/TotalsHavingTransform.h +++ b/src/Processors/Transforms/TotalsHavingTransform.h @@ -46,6 +46,7 @@ protected: void transform(Chunk & chunk) override; bool finished_transform = false; + bool total_prepared = false; Chunk totals; private: diff --git a/src/Processors/Transforms/WindowTransform.cpp b/src/Processors/Transforms/WindowTransform.cpp index 0da7541556b..3c96e12e869 100644 --- a/src/Processors/Transforms/WindowTransform.cpp +++ b/src/Processors/Transforms/WindowTransform.cpp @@ -7,6 +7,7 @@ #include #include #include +#include #include #include #include @@ -22,6 +23,7 @@ namespace ErrorCodes { extern const int BAD_ARGUMENTS; extern const int NOT_IMPLEMENTED; + extern const int ILLEGAL_COLUMN; } // Interface for true window functions. It's not much of an interface, they just @@ -206,7 +208,7 @@ WindowTransform::WindowTransform(const Block & input_header_, { column = std::move(column)->convertToFullColumnIfConst(); } - input_header.setColumns(std::move(input_columns)); + input_header.setColumns(input_columns); // Initialize window function workspaces. workspaces.reserve(functions.size()); @@ -986,7 +988,23 @@ void WindowTransform::writeOutCurrentRow() auto * buf = ws.aggregate_function_state.data(); // FIXME does it also allocate the result on the arena? // We'll have to pass it out with blocks then... - a->insertResultInto(buf, *result_column, arena.get()); + + if (a->isState()) + { + /// AggregateFunction's states should be inserted into column using specific way + auto * res_col_aggregate_function = typeid_cast(result_column); + if (!res_col_aggregate_function) + { + throw Exception("State function " + a->getName() + " inserts results into non-state column ", + ErrorCodes::ILLEGAL_COLUMN); + } + res_col_aggregate_function->insertFrom(buf); + } + else + { + a->insertResultInto(buf, *result_column, arena.get()); + } + } } diff --git a/src/Processors/Transforms/WindowTransform.h b/src/Processors/Transforms/WindowTransform.h index 077979e83b9..d536c8780d2 100644 --- a/src/Processors/Transforms/WindowTransform.h +++ b/src/Processors/Transforms/WindowTransform.h @@ -245,7 +245,6 @@ public: return RowNumber{first_block_number, 0}; } -public: /* * Data (formerly) inherited from ISimpleTransform, needed for the * implementation of the IProcessor interface. @@ -349,10 +348,10 @@ public: template <> struct fmt::formatter { - constexpr auto parse(format_parse_context & ctx) + static constexpr auto parse(format_parse_context & ctx) { - auto it = ctx.begin(); - auto end = ctx.end(); + const auto * it = ctx.begin(); + const auto * end = ctx.end(); /// Only support {}. if (it != end && *it != '}') diff --git a/src/Processors/Transforms/buildPushingToViewsChain.cpp b/src/Processors/Transforms/buildPushingToViewsChain.cpp index 19302afb5c9..a993b8acd7d 100644 --- a/src/Processors/Transforms/buildPushingToViewsChain.cpp +++ b/src/Processors/Transforms/buildPushingToViewsChain.cpp @@ -695,7 +695,7 @@ IProcessor::Status FinalizingViewsTransform::prepare() return Status::Ready; if (any_exception) - output.pushException(std::move(any_exception)); + output.pushException(any_exception); output.finish(); return Status::Finished; @@ -708,7 +708,7 @@ static std::exception_ptr addStorageToException(std::exception_ptr ptr, const St { try { - std::rethrow_exception(std::move(ptr)); + std::rethrow_exception(ptr); } catch (DB::Exception & exception) { @@ -736,7 +736,7 @@ void FinalizingViewsTransform::work() if (!any_exception) any_exception = status.exception; - view.setException(addStorageToException(std::move(status.exception), view.table_id)); + view.setException(addStorageToException(status.exception, view.table_id)); } else { diff --git a/src/QueryPipeline/BlockIO.cpp b/src/QueryPipeline/BlockIO.cpp index 671ba6e4c39..84cf3829a13 100644 --- a/src/QueryPipeline/BlockIO.cpp +++ b/src/QueryPipeline/BlockIO.cpp @@ -23,7 +23,7 @@ void BlockIO::reset() /// TODO Do we need also reset callbacks? In which order? } -BlockIO & BlockIO::operator= (BlockIO && rhs) +BlockIO & BlockIO::operator= (BlockIO && rhs) noexcept { if (this == &rhs) return *this; diff --git a/src/QueryPipeline/BlockIO.h b/src/QueryPipeline/BlockIO.h index 748e46c3a1e..94c6fbc83cb 100644 --- a/src/QueryPipeline/BlockIO.h +++ b/src/QueryPipeline/BlockIO.h @@ -14,7 +14,7 @@ struct BlockIO BlockIO() = default; BlockIO(BlockIO &&) = default; - BlockIO & operator= (BlockIO && rhs); + BlockIO & operator= (BlockIO && rhs) noexcept; ~BlockIO(); BlockIO(const BlockIO &) = delete; diff --git a/src/QueryPipeline/PipelineResourcesHolder.cpp b/src/QueryPipeline/PipelineResourcesHolder.cpp index a4b85ed662b..2f6b6a9de32 100644 --- a/src/QueryPipeline/PipelineResourcesHolder.cpp +++ b/src/QueryPipeline/PipelineResourcesHolder.cpp @@ -5,10 +5,10 @@ namespace DB { PipelineResourcesHolder::PipelineResourcesHolder() = default; -PipelineResourcesHolder::PipelineResourcesHolder(PipelineResourcesHolder &&) = default; +PipelineResourcesHolder::PipelineResourcesHolder(PipelineResourcesHolder &&) noexcept = default; PipelineResourcesHolder::~PipelineResourcesHolder() = default; -PipelineResourcesHolder & PipelineResourcesHolder::operator=(PipelineResourcesHolder && rhs) +PipelineResourcesHolder & PipelineResourcesHolder::operator=(PipelineResourcesHolder && rhs) noexcept { table_locks.insert(table_locks.end(), rhs.table_locks.begin(), rhs.table_locks.end()); storage_holders.insert(storage_holders.end(), rhs.storage_holders.begin(), rhs.storage_holders.end()); diff --git a/src/QueryPipeline/PipelineResourcesHolder.h b/src/QueryPipeline/PipelineResourcesHolder.h index 9fb1438424a..7853fa3ae4c 100644 --- a/src/QueryPipeline/PipelineResourcesHolder.h +++ b/src/QueryPipeline/PipelineResourcesHolder.h @@ -16,10 +16,10 @@ class Context; struct PipelineResourcesHolder { PipelineResourcesHolder(); - PipelineResourcesHolder(PipelineResourcesHolder &&); + PipelineResourcesHolder(PipelineResourcesHolder &&) noexcept; ~PipelineResourcesHolder(); /// Custom mode assignment does not destroy data from lhs. It appends data from rhs to lhs. - PipelineResourcesHolder& operator=(PipelineResourcesHolder &&); + PipelineResourcesHolder& operator=(PipelineResourcesHolder &&) noexcept; /// Some processors may implicitly use Context or temporary Storage created by Interpreter. /// But lifetime of Streams is not nested in lifetime of Interpreters, so we have to store it here, diff --git a/src/QueryPipeline/QueryPipeline.cpp b/src/QueryPipeline/QueryPipeline.cpp index ce1c9473f60..0412049bd58 100644 --- a/src/QueryPipeline/QueryPipeline.cpp +++ b/src/QueryPipeline/QueryPipeline.cpp @@ -24,8 +24,8 @@ namespace ErrorCodes } QueryPipeline::QueryPipeline() = default; -QueryPipeline::QueryPipeline(QueryPipeline &&) = default; -QueryPipeline & QueryPipeline::operator=(QueryPipeline &&) = default; +QueryPipeline::QueryPipeline(QueryPipeline &&) noexcept = default; +QueryPipeline & QueryPipeline::operator=(QueryPipeline &&) noexcept = default; QueryPipeline::~QueryPipeline() = default; static void checkInput(const InputPort & input, const ProcessorPtr & processor) diff --git a/src/QueryPipeline/QueryPipeline.h b/src/QueryPipeline/QueryPipeline.h index beb46361f95..29b5dd76017 100644 --- a/src/QueryPipeline/QueryPipeline.h +++ b/src/QueryPipeline/QueryPipeline.h @@ -32,10 +32,10 @@ class QueryPipeline { public: QueryPipeline(); - QueryPipeline(QueryPipeline &&); + QueryPipeline(QueryPipeline &&) noexcept; QueryPipeline(const QueryPipeline &) = delete; - QueryPipeline & operator=(QueryPipeline &&); + QueryPipeline & operator=(QueryPipeline &&) noexcept; QueryPipeline & operator=(const QueryPipeline &) = delete; ~QueryPipeline(); diff --git a/src/QueryPipeline/RemoteInserter.cpp b/src/QueryPipeline/RemoteInserter.cpp index 13d087f0db9..6acdf19090d 100644 --- a/src/QueryPipeline/RemoteInserter.cpp +++ b/src/QueryPipeline/RemoteInserter.cpp @@ -32,8 +32,19 @@ RemoteInserter::RemoteInserter( modified_client_info.query_kind = ClientInfo::QueryKind::SECONDARY_QUERY; if (CurrentThread::isInitialized()) { - modified_client_info.client_trace_context - = CurrentThread::get().thread_trace_context; + auto& thread_trace_context = CurrentThread::get().thread_trace_context; + + if (thread_trace_context.trace_id != UUID()) + { + // overwrite the trace context only if current thread trace context is available + modified_client_info.client_trace_context = thread_trace_context; + } + else + { + // if the trace on the thread local is not enabled(for example running in a background thread) + // we should not clear the trace context on the client info because the client info may hold trace context + // and this trace context should be propagated to the remote server so that the tracing of distributed table insert is complete. + } } /** Send query and receive "header", that describes table structure. diff --git a/src/QueryPipeline/RemoteQueryExecutor.cpp b/src/QueryPipeline/RemoteQueryExecutor.cpp index 142e56ceb25..d1275444b84 100644 --- a/src/QueryPipeline/RemoteQueryExecutor.cpp +++ b/src/QueryPipeline/RemoteQueryExecutor.cpp @@ -210,7 +210,7 @@ static Block adaptBlockStructure(const Block & block, const Block & header) return res; } -void RemoteQueryExecutor::sendQuery() +void RemoteQueryExecutor::sendQuery(ClientInfo::QueryKind query_kind) { if (sent_query) return; @@ -237,13 +237,7 @@ void RemoteQueryExecutor::sendQuery() auto timeouts = ConnectionTimeouts::getTCPTimeoutsWithFailover(settings); ClientInfo modified_client_info = context->getClientInfo(); - modified_client_info.query_kind = ClientInfo::QueryKind::SECONDARY_QUERY; - /// Set initial_query_id to query_id for the clickhouse-benchmark. - /// - /// (since first query of clickhouse-benchmark will be issued as SECONDARY_QUERY, - /// due to it executes queries via RemoteBlockInputStream) - if (modified_client_info.initial_query_id.empty()) - modified_client_info.initial_query_id = query_id; + modified_client_info.query_kind = query_kind; if (CurrentThread::isInitialized()) { modified_client_info.client_trace_context = CurrentThread::get().thread_trace_context; diff --git a/src/QueryPipeline/RemoteQueryExecutor.h b/src/QueryPipeline/RemoteQueryExecutor.h index 655bd5603de..78bc9f611ab 100644 --- a/src/QueryPipeline/RemoteQueryExecutor.h +++ b/src/QueryPipeline/RemoteQueryExecutor.h @@ -83,7 +83,13 @@ public: ~RemoteQueryExecutor(); /// Create connection and send query, external tables and scalars. - void sendQuery(); + /// + /// @param query_kind - kind of query, usually it is SECONDARY_QUERY, + /// since this is the queries between servers + /// (for which this code was written in general). + /// But clickhouse-benchmark uses the same code, + /// and it should pass INITIAL_QUERY. + void sendQuery(ClientInfo::QueryKind query_kind = ClientInfo::QueryKind::SECONDARY_QUERY); /// Query is resent to a replica, the query itself can be modified. std::atomic resent_query { false }; diff --git a/src/QueryPipeline/RemoteQueryExecutorReadContext.cpp b/src/QueryPipeline/RemoteQueryExecutorReadContext.cpp index 4064643f1f8..575cdb95431 100644 --- a/src/QueryPipeline/RemoteQueryExecutorReadContext.cpp +++ b/src/QueryPipeline/RemoteQueryExecutorReadContext.cpp @@ -174,7 +174,7 @@ bool RemoteQueryExecutorReadContext::resumeRoutine() fiber = std::move(fiber).resume(); if (exception) - std::rethrow_exception(std::move(exception)); + std::rethrow_exception(exception); } return true; diff --git a/src/QueryPipeline/SizeLimits.h b/src/QueryPipeline/SizeLimits.h index ce7e1795475..fc052714b0c 100644 --- a/src/QueryPipeline/SizeLimits.h +++ b/src/QueryPipeline/SizeLimits.h @@ -26,7 +26,7 @@ struct SizeLimits UInt64 max_bytes = 0; OverflowMode overflow_mode = OverflowMode::THROW; - SizeLimits() {} + SizeLimits() = default; SizeLimits(UInt64 max_rows_, UInt64 max_bytes_, OverflowMode overflow_mode_) : max_rows(max_rows_), max_bytes(max_bytes_), overflow_mode(overflow_mode_) {} diff --git a/src/Server/HTTPHandler.cpp b/src/Server/HTTPHandler.cpp index d0f92535844..9218c75c390 100644 --- a/src/Server/HTTPHandler.cpp +++ b/src/Server/HTTPHandler.cpp @@ -1122,7 +1122,7 @@ std::string PredefinedQueryHandler::getQuery(HTTPServerRequest & request, HTMLFo HTTPRequestHandlerFactoryPtr createDynamicHandlerFactory(IServer & server, const std::string & config_prefix) { - const auto & query_param_name = server.config().getString(config_prefix + ".handler.query_param_name", "query"); + auto query_param_name = server.config().getString(config_prefix + ".handler.query_param_name", "query"); auto factory = std::make_shared>(server, std::move(query_param_name)); factory->addFiltersFromConfig(server.config(), config_prefix); diff --git a/src/Server/HTTPHandlerRequestFilter.h b/src/Server/HTTPHandlerRequestFilter.h index 078dcb04595..3236b35d5ae 100644 --- a/src/Server/HTTPHandlerRequestFilter.h +++ b/src/Server/HTTPHandlerRequestFilter.h @@ -40,7 +40,7 @@ static inline bool checkExpression(const StringRef & match_str, const std::pair< return match_str == expression.first; } -static inline auto methodsFilter(Poco::Util::AbstractConfiguration & config, const std::string & config_path) +static inline auto methodsFilter(Poco::Util::AbstractConfiguration & config, const std::string & config_path) /// NOLINT { std::vector methods; Poco::StringTokenizer tokenizer(config.getString(config_path), ","); @@ -64,7 +64,7 @@ static inline auto getExpression(const std::string & expression) return std::make_pair(expression, compiled_regex); } -static inline auto urlFilter(Poco::Util::AbstractConfiguration & config, const std::string & config_path) +static inline auto urlFilter(Poco::Util::AbstractConfiguration & config, const std::string & config_path) /// NOLINT { return [expression = getExpression(config.getString(config_path))](const HTTPServerRequest & request) { @@ -75,7 +75,7 @@ static inline auto urlFilter(Poco::Util::AbstractConfiguration & config, const s }; } -static inline auto headersFilter(Poco::Util::AbstractConfiguration & config, const std::string & prefix) +static inline auto headersFilter(Poco::Util::AbstractConfiguration & config, const std::string & prefix) /// NOLINT { std::unordered_map> headers_expression; Poco::Util::AbstractConfiguration::Keys headers_name; diff --git a/src/Server/KeeperTCPHandler.cpp b/src/Server/KeeperTCPHandler.cpp index 07964c29577..655d17e61fa 100644 --- a/src/Server/KeeperTCPHandler.cpp +++ b/src/Server/KeeperTCPHandler.cpp @@ -202,25 +202,30 @@ struct SocketInterruptablePollWrapper #endif }; -KeeperTCPHandler::KeeperTCPHandler(IServer & server_, const Poco::Net::StreamSocket & socket_) +KeeperTCPHandler::KeeperTCPHandler( + const Poco::Util::AbstractConfiguration & config_ref, + std::shared_ptr keeper_dispatcher_, + Poco::Timespan receive_timeout_, + Poco::Timespan send_timeout_, + const Poco::Net::StreamSocket & socket_) : Poco::Net::TCPServerConnection(socket_) - , server(server_) , log(&Poco::Logger::get("KeeperTCPHandler")) - , global_context(Context::createCopy(server.context())) - , keeper_dispatcher(global_context->getKeeperDispatcher()) + , keeper_dispatcher(keeper_dispatcher_) , operation_timeout( 0, - global_context->getConfigRef().getUInt( + config_ref.getUInt( "keeper_server.coordination_settings.operation_timeout_ms", Coordination::DEFAULT_OPERATION_TIMEOUT_MS) * 1000) , min_session_timeout( 0, - global_context->getConfigRef().getUInt( + config_ref.getUInt( "keeper_server.coordination_settings.min_session_timeout_ms", Coordination::DEFAULT_MIN_SESSION_TIMEOUT_MS) * 1000) , max_session_timeout( 0, - global_context->getConfigRef().getUInt( + config_ref.getUInt( "keeper_server.coordination_settings.session_timeout_ms", Coordination::DEFAULT_MAX_SESSION_TIMEOUT_MS) * 1000) , poll_wrapper(std::make_unique(socket_)) + , send_timeout(send_timeout_) + , receive_timeout(receive_timeout_) , responses(std::make_unique(std::numeric_limits::max())) , last_op(std::make_unique(EMPTY_LAST_OP)) { @@ -289,11 +294,9 @@ void KeeperTCPHandler::runImpl() { setThreadName("KeeperHandler"); ThreadStatus thread_status; - auto global_receive_timeout = global_context->getSettingsRef().receive_timeout; - auto global_send_timeout = global_context->getSettingsRef().send_timeout; - socket().setReceiveTimeout(global_receive_timeout); - socket().setSendTimeout(global_send_timeout); + socket().setReceiveTimeout(receive_timeout); + socket().setSendTimeout(send_timeout); socket().setNoDelay(true); in = std::make_shared(socket()); @@ -544,19 +547,13 @@ std::pair KeeperTCPHandler::receiveReque void KeeperTCPHandler::packageSent() { - { - std::lock_guard lock(conn_stats_mutex); - conn_stats.incrementPacketsSent(); - } + conn_stats.incrementPacketsSent(); keeper_dispatcher->incrementPacketsSent(); } void KeeperTCPHandler::packageReceived() { - { - std::lock_guard lock(conn_stats_mutex); - conn_stats.incrementPacketsReceived(); - } + conn_stats.incrementPacketsReceived(); keeper_dispatcher->incrementPacketsReceived(); } @@ -566,10 +563,7 @@ void KeeperTCPHandler::updateStats(Coordination::ZooKeeperResponsePtr & response if (response->xid != Coordination::WATCH_XID && response->getOpNum() != Coordination::OpNum::Heartbeat) { Int64 elapsed = (Poco::Timestamp() - operations[response->xid]) / 1000; - { - std::lock_guard lock(conn_stats_mutex); - conn_stats.updateLatency(elapsed); - } + conn_stats.updateLatency(elapsed); operations.erase(response->xid); keeper_dispatcher->updateKeeperStatLatency(elapsed); @@ -584,15 +578,14 @@ void KeeperTCPHandler::updateStats(Coordination::ZooKeeperResponsePtr & response } -KeeperConnectionStats KeeperTCPHandler::getConnectionStats() const +KeeperConnectionStats & KeeperTCPHandler::getConnectionStats() { - std::lock_guard lock(conn_stats_mutex); return conn_stats; } void KeeperTCPHandler::dumpStats(WriteBufferFromOwnString & buf, bool brief) { - KeeperConnectionStats stats = getConnectionStats(); + auto & stats = getConnectionStats(); writeText(' ', buf); writeText(socket().peerAddress().toString(), buf); @@ -641,10 +634,7 @@ void KeeperTCPHandler::dumpStats(WriteBufferFromOwnString & buf, bool brief) void KeeperTCPHandler::resetStats() { - { - std::lock_guard lock(conn_stats_mutex); - conn_stats.reset(); - } + conn_stats.reset(); last_op.set(std::make_unique(EMPTY_LAST_OP)); } diff --git a/src/Server/KeeperTCPHandler.h b/src/Server/KeeperTCPHandler.h index 7953dfd2cbe..9895c335c96 100644 --- a/src/Server/KeeperTCPHandler.h +++ b/src/Server/KeeperTCPHandler.h @@ -48,19 +48,22 @@ private: static std::unordered_set connections; public: - KeeperTCPHandler(IServer & server_, const Poco::Net::StreamSocket & socket_); + KeeperTCPHandler( + const Poco::Util::AbstractConfiguration & config_ref, + std::shared_ptr keeper_dispatcher_, + Poco::Timespan receive_timeout_, + Poco::Timespan send_timeout_, + const Poco::Net::StreamSocket & socket_); void run() override; - KeeperConnectionStats getConnectionStats() const; + KeeperConnectionStats & getConnectionStats(); void dumpStats(WriteBufferFromOwnString & buf, bool brief); void resetStats(); ~KeeperTCPHandler() override; private: - IServer & server; Poco::Logger * log; - ContextPtr global_context; std::shared_ptr keeper_dispatcher; Poco::Timespan operation_timeout; Poco::Timespan min_session_timeout; @@ -69,6 +72,8 @@ private: int64_t session_id{-1}; Stopwatch session_stopwatch; SocketInterruptablePollWrapperPtr poll_wrapper; + Poco::Timespan send_timeout; + Poco::Timespan receive_timeout; ThreadSafeResponseQueuePtr responses; @@ -100,7 +105,6 @@ private: LastOpMultiVersion last_op; - mutable std::mutex conn_stats_mutex; KeeperConnectionStats conn_stats; }; diff --git a/src/Server/KeeperTCPHandlerFactory.h b/src/Server/KeeperTCPHandlerFactory.h index 58dc73d7c27..76309ffc119 100644 --- a/src/Server/KeeperTCPHandlerFactory.h +++ b/src/Server/KeeperTCPHandlerFactory.h @@ -10,11 +10,17 @@ namespace DB { +using ConfigGetter = std::function; + class KeeperTCPHandlerFactory : public TCPServerConnectionFactory { private: - IServer & server; + ConfigGetter config_getter; + std::shared_ptr keeper_dispatcher; Poco::Logger * log; + Poco::Timespan receive_timeout; + Poco::Timespan send_timeout; + class DummyTCPHandler : public Poco::Net::TCPServerConnection { public: @@ -23,9 +29,17 @@ private: }; public: - KeeperTCPHandlerFactory(IServer & server_, bool secure) - : server(server_) + KeeperTCPHandlerFactory( + ConfigGetter config_getter_, + std::shared_ptr keeper_dispatcher_, + Poco::Timespan receive_timeout_, + Poco::Timespan send_timeout_, + bool secure) + : config_getter(config_getter_) + , keeper_dispatcher(keeper_dispatcher_) , log(&Poco::Logger::get(std::string{"KeeperTCP"} + (secure ? "S" : "") + "HandlerFactory")) + , receive_timeout(receive_timeout_) + , send_timeout(send_timeout_) { } @@ -34,7 +48,7 @@ public: try { LOG_TRACE(log, "Keeper request. Address: {}", socket.peerAddress().toString()); - return new KeeperTCPHandler(server, socket); + return new KeeperTCPHandler(config_getter(), keeper_dispatcher, receive_timeout, send_timeout, socket); } catch (const Poco::Net::NetException &) { diff --git a/src/Server/ProtocolServerAdapter.cpp b/src/Server/ProtocolServerAdapter.cpp index b41ad2376f1..dbc676432f5 100644 --- a/src/Server/ProtocolServerAdapter.cpp +++ b/src/Server/ProtocolServerAdapter.cpp @@ -1,7 +1,7 @@ #include #include -#if USE_GRPC +#if USE_GRPC && !defined(KEEPER_STANDALONE_BUILD) #include #endif @@ -37,7 +37,7 @@ ProtocolServerAdapter::ProtocolServerAdapter( { } -#if USE_GRPC +#if USE_GRPC && !defined(KEEPER_STANDALONE_BUILD) class ProtocolServerAdapter::GRPCServerAdapterImpl : public Impl { public: diff --git a/src/Server/ProtocolServerAdapter.h b/src/Server/ProtocolServerAdapter.h index 9b3b1af0301..90aec7471ee 100644 --- a/src/Server/ProtocolServerAdapter.h +++ b/src/Server/ProtocolServerAdapter.h @@ -21,7 +21,7 @@ public: ProtocolServerAdapter & operator =(ProtocolServerAdapter && src) = default; ProtocolServerAdapter(const std::string & listen_host_, const char * port_name_, const std::string & description_, std::unique_ptr tcp_server_); -#if USE_GRPC +#if USE_GRPC && !defined(KEEPER_STANDALONE_BUILD) ProtocolServerAdapter(const std::string & listen_host_, const char * port_name_, const std::string & description_, std::unique_ptr grpc_server_); #endif @@ -52,7 +52,7 @@ private: class Impl { public: - virtual ~Impl() {} + virtual ~Impl() = default; virtual void start() = 0; virtual void stop() = 0; virtual bool isStopping() const = 0; diff --git a/src/Server/TCPHandler.cpp b/src/Server/TCPHandler.cpp index 99523ff09e3..f4592a8b2c9 100644 --- a/src/Server/TCPHandler.cpp +++ b/src/Server/TCPHandler.cpp @@ -31,7 +31,6 @@ #include #include #include -#include #include #include #include @@ -853,163 +852,15 @@ void TCPHandler::sendExtremes(const Block & extremes) } } - -namespace -{ - using namespace ProfileEvents; - - constexpr size_t NAME_COLUMN_INDEX = 4; - constexpr size_t VALUE_COLUMN_INDEX = 5; - - struct ProfileEventsSnapshot - { - UInt64 thread_id; - ProfileEvents::CountersIncrement counters; - Int64 memory_usage; - time_t current_time; - }; - - /* - * Add records about provided non-zero ProfileEvents::Counters. - */ - void dumpProfileEvents( - ProfileEventsSnapshot const & snapshot, - MutableColumns & columns, - String const & host_name) - { - size_t rows = 0; - auto & name_column = columns[NAME_COLUMN_INDEX]; - auto & value_column = columns[VALUE_COLUMN_INDEX]; - for (ProfileEvents::Event event = 0; event < ProfileEvents::Counters::num_counters; ++event) - { - Int64 value = snapshot.counters[event]; - - if (value == 0) - continue; - - const char * desc = ProfileEvents::getName(event); - name_column->insertData(desc, strlen(desc)); - value_column->insert(value); - rows++; - } - - // Fill the rest of the columns with data - for (size_t row = 0; row < rows; ++row) - { - size_t i = 0; - columns[i++]->insertData(host_name.data(), host_name.size()); - columns[i++]->insert(UInt64(snapshot.current_time)); - columns[i++]->insert(UInt64{snapshot.thread_id}); - columns[i++]->insert(ProfileEvents::Type::INCREMENT); - } - } - - void dumpMemoryTracker( - ProfileEventsSnapshot const & snapshot, - MutableColumns & columns, - String const & host_name) - { - { - size_t i = 0; - columns[i++]->insertData(host_name.data(), host_name.size()); - columns[i++]->insert(UInt64(snapshot.current_time)); - columns[i++]->insert(UInt64{snapshot.thread_id}); - columns[i++]->insert(ProfileEvents::Type::GAUGE); - - columns[i++]->insertData(MemoryTracker::USAGE_EVENT_NAME, strlen(MemoryTracker::USAGE_EVENT_NAME)); - columns[i++]->insert(snapshot.memory_usage); - } - } -} - - void TCPHandler::sendProfileEvents() { if (client_tcp_protocol_version < DBMS_MIN_PROTOCOL_VERSION_WITH_INCREMENTAL_PROFILE_EVENTS) return; - NamesAndTypesList column_names_and_types = { - { "host_name", std::make_shared() }, - { "current_time", std::make_shared() }, - { "thread_id", std::make_shared() }, - { "type", ProfileEvents::TypeEnum }, - { "name", std::make_shared() }, - { "value", std::make_shared() }, - }; - - ColumnsWithTypeAndName temp_columns; - for (auto const & name_and_type : column_names_and_types) - temp_columns.emplace_back(name_and_type.type, name_and_type.name); - - Block block(std::move(temp_columns)); - - MutableColumns columns = block.mutateColumns(); - auto thread_group = CurrentThread::getGroup(); - auto const current_thread_id = CurrentThread::get().thread_id; - std::vector snapshots; - ThreadIdToCountersSnapshot new_snapshots; - ProfileEventsSnapshot group_snapshot; + Block block; + ProfileEvents::getProfileEvents(server_display_name, state.profile_queue, block, last_sent_snapshots); + if (block.rows() != 0) { - auto stats = thread_group->getProfileEventsCountersAndMemoryForThreads(); - snapshots.reserve(stats.size()); - - for (auto & stat : stats) - { - auto const thread_id = stat.thread_id; - if (thread_id == current_thread_id) - continue; - auto current_time = time(nullptr); - auto previous_snapshot = last_sent_snapshots.find(thread_id); - auto increment = - previous_snapshot != last_sent_snapshots.end() - ? CountersIncrement(stat.counters, previous_snapshot->second) - : CountersIncrement(stat.counters); - snapshots.push_back(ProfileEventsSnapshot{ - thread_id, - std::move(increment), - stat.memory_usage, - current_time - }); - new_snapshots[thread_id] = std::move(stat.counters); - } - - group_snapshot.thread_id = 0; - group_snapshot.current_time = time(nullptr); - group_snapshot.memory_usage = thread_group->memory_tracker.get(); - auto group_counters = thread_group->performance_counters.getPartiallyAtomicSnapshot(); - auto prev_group_snapshot = last_sent_snapshots.find(0); - group_snapshot.counters = - prev_group_snapshot != last_sent_snapshots.end() - ? CountersIncrement(group_counters, prev_group_snapshot->second) - : CountersIncrement(group_counters); - new_snapshots[0] = std::move(group_counters); - } - last_sent_snapshots = std::move(new_snapshots); - - for (auto & snapshot : snapshots) - { - dumpProfileEvents(snapshot, columns, server_display_name); - dumpMemoryTracker(snapshot, columns, server_display_name); - } - dumpProfileEvents(group_snapshot, columns, server_display_name); - dumpMemoryTracker(group_snapshot, columns, server_display_name); - - MutableColumns logs_columns; - Block curr_block; - size_t rows = 0; - - for (; state.profile_queue->tryPop(curr_block); ++rows) - { - auto curr_columns = curr_block.getColumns(); - for (size_t j = 0; j < curr_columns.size(); ++j) - columns[j]->insertRangeFrom(*curr_columns[j], 0, curr_columns[j]->size()); - } - - bool empty = columns[0]->empty(); - if (!empty) - { - block.setColumns(std::move(columns)); - initProfileEventsBlockOutput(block); writeVarUInt(Protocol::Server::ProfileEvents, *out); diff --git a/src/Server/TCPHandler.h b/src/Server/TCPHandler.h index 6afda654e6a..153b8c35ea4 100644 --- a/src/Server/TCPHandler.h +++ b/src/Server/TCPHandler.h @@ -3,9 +3,10 @@ #include #include -#include "Common/ProfileEvents.h" +#include #include #include +#include #include #include #include @@ -13,7 +14,9 @@ #include #include #include +#include #include +#include #include @@ -36,6 +39,8 @@ struct Settings; class ColumnsDescription; struct ProfileInfo; class TCPServer; +class NativeWriter; +class NativeReader; /// State of query processing. struct QueryState @@ -189,9 +194,7 @@ private: CurrentMetrics::Increment metric_increment{CurrentMetrics::TCPConnection}; - using ThreadIdToCountersSnapshot = std::unordered_map; - - ThreadIdToCountersSnapshot last_sent_snapshots; + ProfileEvents::ThreadIdToCountersSnapshot last_sent_snapshots; /// It is the name of the server that will be sent to the client. String server_display_name; diff --git a/src/Server/TCPHandlerFactory.h b/src/Server/TCPHandlerFactory.h index 03b2592198d..6e27dfc93bd 100644 --- a/src/Server/TCPHandlerFactory.h +++ b/src/Server/TCPHandlerFactory.h @@ -1,6 +1,7 @@ #pragma once #include +#include #include #include #include diff --git a/src/Storages/Cache/ExternalDataSourceCache.cpp b/src/Storages/Cache/ExternalDataSourceCache.cpp index 0f78c8d3511..18607c16ffa 100644 --- a/src/Storages/Cache/ExternalDataSourceCache.cpp +++ b/src/Storages/Cache/ExternalDataSourceCache.cpp @@ -27,7 +27,8 @@ namespace ErrorCodes extern const int LOGICAL_ERROR; } -LocalFileHolder::LocalFileHolder(RemoteFileCacheType::MappedHolderPtr cache_controller) : file_cache_controller(std::move(cache_controller)) +LocalFileHolder::LocalFileHolder(RemoteFileCacheType::MappedHolderPtr cache_controller) + : file_cache_controller(std::move(cache_controller)), original_readbuffer(nullptr), thread_pool(nullptr) { file_buffer = file_cache_controller->value().allocFile(); if (!file_buffer) @@ -35,18 +36,43 @@ LocalFileHolder::LocalFileHolder(RemoteFileCacheType::MappedHolderPtr cache_cont ErrorCodes::LOGICAL_ERROR, "Create file readbuffer failed. {}", file_cache_controller->value().getLocalPath().string()); } +LocalFileHolder::LocalFileHolder( + RemoteFileCacheType::MappedHolderPtr cache_controller, + std::unique_ptr original_readbuffer_, + BackgroundSchedulePool * thread_pool_) + : file_cache_controller(std::move(cache_controller)) + , file_buffer(nullptr) + , original_readbuffer(std::move(original_readbuffer_)) + , thread_pool(thread_pool_) +{ +} + +LocalFileHolder::~LocalFileHolder() +{ + if (original_readbuffer) + { + dynamic_cast(original_readbuffer.get())->seek(0, SEEK_SET); + file_cache_controller->value().startBackgroundDownload(std::move(original_readbuffer), *thread_pool); + } +} + RemoteReadBuffer::RemoteReadBuffer(size_t buff_size) : BufferWithOwnMemory(buff_size) { } std::unique_ptr RemoteReadBuffer::create( - ContextPtr context, IRemoteFileMetadataPtr remote_file_metadata, std::unique_ptr read_buffer, size_t buff_size) + ContextPtr context, + IRemoteFileMetadataPtr remote_file_metadata, + std::unique_ptr read_buffer, + size_t buff_size, + bool is_random_accessed) + { auto remote_path = remote_file_metadata->remote_path; auto remote_read_buffer = std::make_unique(buff_size); std::tie(remote_read_buffer->local_file_holder, read_buffer) - = ExternalDataSourceCache::instance().createReader(context, remote_file_metadata, read_buffer); + = ExternalDataSourceCache::instance().createReader(context, remote_file_metadata, read_buffer, is_random_accessed); if (remote_read_buffer->local_file_holder == nullptr) return read_buffer; remote_read_buffer->remote_file_size = remote_file_metadata->file_size; @@ -55,6 +81,19 @@ std::unique_ptr RemoteReadBuffer::create( bool RemoteReadBuffer::nextImpl() { + if (local_file_holder->original_readbuffer) + { + auto status = local_file_holder->original_readbuffer->next(); + if (status) + { + BufferBase::set( + local_file_holder->original_readbuffer->buffer().begin(), + local_file_holder->original_readbuffer->buffer().size(), + local_file_holder->original_readbuffer->offset()); + } + return status; + } + auto start_offset = local_file_holder->file_buffer->getPosition(); auto end_offset = start_offset + local_file_holder->file_buffer->internalBuffer().size(); local_file_holder->file_cache_controller->value().waitMoreData(start_offset, end_offset); @@ -73,6 +112,16 @@ bool RemoteReadBuffer::nextImpl() off_t RemoteReadBuffer::seek(off_t offset, int whence) { + if (local_file_holder->original_readbuffer) + { + auto ret = dynamic_cast(local_file_holder->original_readbuffer.get())->seek(offset, whence); + BufferBase::set( + local_file_holder->original_readbuffer->buffer().begin(), + local_file_holder->original_readbuffer->buffer().size(), + local_file_holder->original_readbuffer->offset()); + return ret; + } + if (!local_file_holder->file_buffer) throw Exception(ErrorCodes::LOGICAL_ERROR, "Cannot call seek() in this buffer. It's a bug!"); /* @@ -88,6 +137,10 @@ off_t RemoteReadBuffer::seek(off_t offset, int whence) off_t RemoteReadBuffer::getPosition() { + if (local_file_holder->original_readbuffer) + { + return dynamic_cast(local_file_holder->original_readbuffer.get())->getPosition(); + } return local_file_holder->file_buffer->getPosition(); } @@ -164,7 +217,7 @@ String ExternalDataSourceCache::calculateLocalPath(IRemoteFileMetadataPtr metada } std::pair, std::unique_ptr> ExternalDataSourceCache::createReader( - ContextPtr context, IRemoteFileMetadataPtr remote_file_metadata, std::unique_ptr & read_buffer) + ContextPtr context, IRemoteFileMetadataPtr remote_file_metadata, std::unique_ptr & read_buffer, bool is_random_accessed) { // If something is wrong on startup, rollback to read from the original ReadBuffer. if (!isInitialized()) @@ -180,6 +233,11 @@ std::pair, std::unique_ptr> Externa auto cache = lru_caches->get(local_path); if (cache) { + if (!cache->value().isEnable()) + { + return {nullptr, std::move(read_buffer)}; + } + // The remote file has been updated, need to redownload. if (!cache->value().isValid() || cache->value().isModified(remote_file_metadata)) { @@ -216,6 +274,17 @@ std::pair, std::unique_ptr> Externa lru_caches->weight()); return {nullptr, std::move(read_buffer)}; } + /* + If read_buffer is seekable, use read_buffer directly inside LocalFileHolder. And once LocalFileHolder is released, + start the download process in background. + The cache is marked disable until the download process finish. + For reading parquet files from hdfs, with this optimization, the speedup can reach 3x. + */ + if (dynamic_cast(read_buffer.get()) && is_random_accessed) + { + new_cache->value().disable(); + return {std::make_unique(std::move(new_cache), std::move(read_buffer), &context->getSchedulePool()), nullptr}; + } new_cache->value().startBackgroundDownload(std::move(read_buffer), context->getSchedulePool()); return {std::make_unique(std::move(new_cache)), nullptr}; } diff --git a/src/Storages/Cache/ExternalDataSourceCache.h b/src/Storages/Cache/ExternalDataSourceCache.h index c555198e4c4..5ffb2b20fc7 100644 --- a/src/Storages/Cache/ExternalDataSourceCache.h +++ b/src/Storages/Cache/ExternalDataSourceCache.h @@ -14,7 +14,7 @@ #include #include #include -#include +#include #include #include #include @@ -34,10 +34,13 @@ class LocalFileHolder { public: explicit LocalFileHolder(RemoteFileCacheType::MappedHolderPtr cache_controller); - ~LocalFileHolder() = default; + explicit LocalFileHolder(RemoteFileCacheType::MappedHolderPtr cache_controller, std::unique_ptr original_readbuffer_, BackgroundSchedulePool * thread_pool_); + ~LocalFileHolder(); RemoteFileCacheType::MappedHolderPtr file_cache_controller; std::unique_ptr file_buffer; + std::unique_ptr original_readbuffer; + BackgroundSchedulePool * thread_pool; }; class RemoteReadBuffer : public BufferWithOwnMemory @@ -45,7 +48,7 @@ class RemoteReadBuffer : public BufferWithOwnMemory public: explicit RemoteReadBuffer(size_t buff_size); ~RemoteReadBuffer() override = default; - static std::unique_ptr create(ContextPtr context, IRemoteFileMetadataPtr remote_file_metadata, std::unique_ptr read_buffer, size_t buff_size); + static std::unique_ptr create(ContextPtr context, IRemoteFileMetadataPtr remote_file_metadata, std::unique_ptr read_buffer, size_t buff_size, bool is_random_accessed = false); bool nextImpl() override; off_t seek(off_t off, int whence) override; @@ -70,7 +73,8 @@ public: inline bool isInitialized() const { return initialized; } std::pair, std::unique_ptr> - createReader(ContextPtr context, IRemoteFileMetadataPtr remote_file_metadata, std::unique_ptr & read_buffer); + createReader(ContextPtr context, IRemoteFileMetadataPtr remote_file_metadata, std::unique_ptr & read_buffer, bool is_random_accessed); + void updateTotalSize(size_t size) { total_size += size; } diff --git a/src/Storages/Cache/RemoteCacheController.cpp b/src/Storages/Cache/RemoteCacheController.cpp index b5fc38fffcd..b72f5336ea4 100644 --- a/src/Storages/Cache/RemoteCacheController.cpp +++ b/src/Storages/Cache/RemoteCacheController.cpp @@ -169,6 +169,7 @@ void RemoteCacheController::backgroundDownload(ReadBufferPtr remote_read_buffer) file_status = DOWNLOADED; flush(true); data_file_writer.reset(); + is_enable = true; lock.unlock(); more_data_signal.notify_all(); ExternalDataSourceCache::instance().updateTotalSize(file_metadata_ptr->file_size); diff --git a/src/Storages/Cache/RemoteCacheController.h b/src/Storages/Cache/RemoteCacheController.h index ca2cb837e34..5f9d92c1349 100644 --- a/src/Storages/Cache/RemoteCacheController.h +++ b/src/Storages/Cache/RemoteCacheController.h @@ -63,6 +63,22 @@ public: std::lock_guard lock(mutex); return valid; } + inline bool isEnable() + { + std::lock_guard lock(mutex); + return is_enable; + + } + inline void disable() + { + std::lock_guard lock(mutex); + is_enable = false; + } + inline void enable() + { + std::lock_guard lock(mutex); + is_enable = true; + } IRemoteFileMetadataPtr getFileMetadata() { return file_metadata_ptr; } inline size_t getFileSize() const { return file_metadata_ptr->file_size; } @@ -83,6 +99,17 @@ private: IRemoteFileMetadataPtr file_metadata_ptr; std::filesystem::path local_path; + /** + * is_enable = true, only when the remotereadbuffer has been cached at local disk. + * + * The first time to access a remotebuffer which is not cached at local disk, we use the original remotebuffer directly and mark RemoteCacheController::is_enable = false. + * When the first time access is finished, LocalFileHolder will start a background download process by reusing the same remotebuffer object. After the download process + * finish, is_enable is set true. + * + * So when is_enable=false, if there is anther thread trying to access the same remote file, it would fail to use the local file buffer and use the original remotebuffer + * instead. Avoid multi threads trying to save the same file in to disk at the same time. + */ + bool is_enable = true; bool valid = true; size_t local_cache_bytes_read_before_flush; size_t current_offset; diff --git a/src/Storages/Cache/RemoteFileCachePolicy.h b/src/Storages/Cache/RemoteFileCachePolicy.h index 7d742d6ea14..5c212264bd2 100644 --- a/src/Storages/Cache/RemoteFileCachePolicy.h +++ b/src/Storages/Cache/RemoteFileCachePolicy.h @@ -1,6 +1,10 @@ #pragma once + +#include + namespace DB { + struct RemoteFileCacheWeightFunction { size_t operator()(const RemoteCacheController & cache) const { return cache.getFileSize(); } @@ -14,4 +18,5 @@ struct RemoteFileCacheReleaseFunction controller->close(); } }; + } diff --git a/src/Storages/ColumnsDescription.cpp b/src/Storages/ColumnsDescription.cpp index 8b08f5f28dd..8ca3c44bac2 100644 --- a/src/Storages/ColumnsDescription.cpp +++ b/src/Storages/ColumnsDescription.cpp @@ -117,7 +117,7 @@ void ColumnDescription::readText(ReadBuffer & buf) ParserColumnDeclaration column_parser(/* require type */ true); ASTPtr ast = parseQuery(column_parser, "x T " + modifiers, "column parser", 0, DBMS_DEFAULT_MAX_PARSER_DEPTH); - if (const auto * col_ast = ast->as()) + if (auto * col_ast = ast->as()) { if (col_ast->default_expression) { @@ -309,7 +309,7 @@ void ColumnsDescription::flattenNested() continue; } - ColumnDescription column = std::move(*it); + ColumnDescription column = *it; removeSubcolumns(column.name); it = columns.get<0>().erase(it); diff --git a/src/Storages/CompressionCodecSelector.h b/src/Storages/CompressionCodecSelector.h index 746b3ce37ee..4c088924cdb 100644 --- a/src/Storages/CompressionCodecSelector.h +++ b/src/Storages/CompressionCodecSelector.h @@ -69,7 +69,7 @@ private: std::vector elements; public: - CompressionCodecSelector() {} /// Always returns the default method. + CompressionCodecSelector() = default; /// Always returns the default method. CompressionCodecSelector(const Poco::Util::AbstractConfiguration & config, const std::string & config_prefix) { @@ -78,7 +78,7 @@ public: for (const auto & name : keys) { - if (!startsWith(name.data(), "case")) + if (!startsWith(name, "case")) throw Exception("Unknown element in config: " + config_prefix + "." + name + ", must be 'case'", ErrorCodes::UNKNOWN_ELEMENT_IN_CONFIG); elements.emplace_back(config, config_prefix + "." + name); diff --git a/src/Storages/ConstraintsDescription.cpp b/src/Storages/ConstraintsDescription.cpp index 60202e2055e..7085c6e14c8 100644 --- a/src/Storages/ConstraintsDescription.cpp +++ b/src/Storages/ConstraintsDescription.cpp @@ -198,6 +198,20 @@ ConstraintsDescription & ConstraintsDescription::operator=(const ConstraintsDesc return *this; } +ConstraintsDescription::ConstraintsDescription(ConstraintsDescription && other) noexcept + : constraints(std::move(other.constraints)) +{ + update(); +} + +ConstraintsDescription & ConstraintsDescription::operator=(ConstraintsDescription && other) noexcept +{ + constraints = std::move(other.constraints); + update(); + + return *this; +} + void ConstraintsDescription::update() { if (constraints.empty()) diff --git a/src/Storages/ConstraintsDescription.h b/src/Storages/ConstraintsDescription.h index a5095a79ccb..eb1eb95d33d 100644 --- a/src/Storages/ConstraintsDescription.h +++ b/src/Storages/ConstraintsDescription.h @@ -18,6 +18,9 @@ public: ConstraintsDescription(const ConstraintsDescription & other); ConstraintsDescription & operator=(const ConstraintsDescription & other); + ConstraintsDescription(ConstraintsDescription && other) noexcept; + ConstraintsDescription & operator=(ConstraintsDescription && other) noexcept; + bool empty() const { return constraints.empty(); } String toString() const; diff --git a/src/Storages/Distributed/DistributedSink.cpp b/src/Storages/Distributed/DistributedSink.cpp index be0d2ea90db..aa703bcbb89 100644 --- a/src/Storages/Distributed/DistributedSink.cpp +++ b/src/Storages/Distributed/DistributedSink.cpp @@ -20,6 +20,7 @@ #include #include #include +#include #include #include @@ -125,7 +126,7 @@ DistributedSink::DistributedSink( , log(&Poco::Logger::get("DistributedBlockOutputStream")) { const auto & settings = context->getSettingsRef(); - if (settings.max_distributed_depth && context->getClientInfo().distributed_depth > settings.max_distributed_depth) + if (settings.max_distributed_depth && context->getClientInfo().distributed_depth >= settings.max_distributed_depth) throw Exception("Maximum distributed depth exceeded", ErrorCodes::TOO_LARGE_DISTRIBUTED_DEPTH); context->getClientInfo().distributed_depth += 1; random_shard_insert = settings.insert_distributed_one_random_shard && !storage.has_sharding_key; @@ -331,9 +332,14 @@ DistributedSink::runWritingJob(JobReplica & job, const Block & current_block, si const Settings & settings = context->getSettingsRef(); /// Do not initiate INSERT for empty block. - if (shard_block.rows() == 0) + size_t rows = shard_block.rows(); + if (rows == 0) return; + OpenTelemetrySpanHolder span(__PRETTY_FUNCTION__); + span.addAttribute("clickhouse.shard_num", shard_info.shard_num); + span.addAttribute("clickhouse.written_rows", rows); + if (!job.is_local_job || !settings.prefer_localhost_replica) { if (!job.executor) @@ -406,13 +412,15 @@ DistributedSink::runWritingJob(JobReplica & job, const Block & current_block, si } job.blocks_written += 1; - job.rows_written += shard_block.rows(); + job.rows_written += rows; }; } void DistributedSink::writeSync(const Block & block) { + OpenTelemetrySpanHolder span(__PRETTY_FUNCTION__); + const Settings & settings = context->getSettingsRef(); const auto & shards_info = cluster->getShardsInfo(); Block block_to_send = removeSuperfluousColumns(block); @@ -456,6 +464,10 @@ void DistributedSink::writeSync(const Block & block) size_t num_shards = end - start; + span.addAttribute("clickhouse.start_shard", start); + span.addAttribute("clickhouse.end_shard", end); + span.addAttribute("db.statement", this->query_string); + if (num_shards > 1) { auto current_selector = createSelector(block); @@ -489,6 +501,7 @@ void DistributedSink::writeSync(const Block & block) catch (Exception & exception) { exception.addMessage(getCurrentStateDescription()); + span.addAttribute(exception); throw; } @@ -597,10 +610,15 @@ void DistributedSink::writeSplitAsync(const Block & block) void DistributedSink::writeAsyncImpl(const Block & block, size_t shard_id) { + OpenTelemetrySpanHolder span("DistributedBlockOutputStream::writeAsyncImpl()"); + const auto & shard_info = cluster->getShardsInfo()[shard_id]; const auto & settings = context->getSettingsRef(); Block block_to_send = removeSuperfluousColumns(block); + span.addAttribute("clickhouse.shard_num", shard_info.shard_num); + span.addAttribute("clickhouse.written_rows", block.rows()); + if (shard_info.hasInternalReplication()) { if (shard_info.isLocal() && settings.prefer_localhost_replica) @@ -634,6 +652,9 @@ void DistributedSink::writeAsyncImpl(const Block & block, size_t shard_id) void DistributedSink::writeToLocal(const Block & block, size_t repeats) { + OpenTelemetrySpanHolder span(__PRETTY_FUNCTION__); + span.addAttribute("db.statement", this->query_string); + InterpreterInsertQuery interp(query_ast, context, allow_materialized); auto block_io = interp.execute(); @@ -647,6 +668,8 @@ void DistributedSink::writeToLocal(const Block & block, size_t repeats) void DistributedSink::writeToShard(const Block & block, const std::vector & dir_names) { + OpenTelemetrySpanHolder span(__PRETTY_FUNCTION__); + const auto & settings = context->getSettingsRef(); const auto & distributed_settings = storage.getDistributedSettingsRef(); @@ -713,7 +736,19 @@ void DistributedSink::writeToShard(const Block & block, const std::vectorgetSettingsRef().write(header_buf); - context->getClientInfo().write(header_buf, DBMS_TCP_PROTOCOL_VERSION); + + if (context->getClientInfo().client_trace_context.trace_id != UUID() && CurrentThread::isInitialized()) + { + // if the distributed tracing is enabled, use the trace context in current thread as parent of next span + auto client_info = context->getClientInfo(); + client_info.client_trace_context = CurrentThread::get().thread_trace_context; + client_info.write(header_buf, DBMS_TCP_PROTOCOL_VERSION); + } + else + { + context->getClientInfo().write(header_buf, DBMS_TCP_PROTOCOL_VERSION); + } + writeVarUInt(block.rows(), header_buf); writeVarUInt(block.bytes(), header_buf); writeStringBinary(block.cloneEmpty().dumpStructure(), header_buf); /// obsolete diff --git a/src/Storages/ExternalDataSourceConfiguration.cpp b/src/Storages/ExternalDataSourceConfiguration.cpp index 2d4b05c51b5..5549a816a06 100644 --- a/src/Storages/ExternalDataSourceConfiguration.cpp +++ b/src/Storages/ExternalDataSourceConfiguration.cpp @@ -34,7 +34,7 @@ IMPLEMENT_SETTINGS_TRAITS(EmptySettingsTraits, EMPTY_SETTINGS) static const std::unordered_set dictionary_allowed_keys = { "host", "port", "user", "password", "db", "database", "table", "schema", "replica", - "update_field", "update_tag", "invalidate_query", "query", + "update_field", "update_lag", "invalidate_query", "query", "where", "name", "secure", "uri", "collection"}; diff --git a/src/Storages/ExternalDataSourceConfiguration.h b/src/Storages/ExternalDataSourceConfiguration.h index 1e08b088b1d..cc3e136ba50 100644 --- a/src/Storages/ExternalDataSourceConfiguration.h +++ b/src/Storages/ExternalDataSourceConfiguration.h @@ -16,7 +16,7 @@ struct ExternalDataSourceConfiguration { String host; UInt16 port = 0; - String username; + String username = "default"; String password; String database; String table; diff --git a/src/Storages/FileLog/StorageFileLog.cpp b/src/Storages/FileLog/StorageFileLog.cpp index dac70e362ed..700b35a5a48 100644 --- a/src/Storages/FileLog/StorageFileLog.cpp +++ b/src/Storages/FileLog/StorageFileLog.cpp @@ -53,6 +53,7 @@ StorageFileLog::StorageFileLog( ContextPtr context_, const ColumnsDescription & columns_, const String & path_, + const String & metadata_base_path_, const String & format_name_, std::unique_ptr settings, const String & comment, @@ -61,6 +62,7 @@ StorageFileLog::StorageFileLog( , WithContext(context_->getGlobalContext()) , filelog_settings(std::move(settings)) , path(path_) + , metadata_base_path(std::filesystem::path(metadata_base_path_) / "metadata") , format_name(format_name_) , log(&Poco::Logger::get("StorageFileLog (" + table_id_.table_name + ")")) , milliseconds_to_wait(filelog_settings->poll_directory_watch_events_backoff_init.totalMilliseconds()) @@ -94,18 +96,24 @@ StorageFileLog::StorageFileLog( void StorageFileLog::loadMetaFiles(bool attach) { - const auto & storage = getStorageID(); - /// FIXME Why do we need separate directory? Why not to use data directory? - root_meta_path - = std::filesystem::path(getContext()->getPath()) / "stream_engines/filelog/" / DatabaseCatalog::getPathForUUID(storage.uuid); - /// Attach table if (attach) { - /// Meta file may lost, log and create directory - if (!std::filesystem::exists(root_meta_path)) + const auto & storage = getStorageID(); + + auto metadata_path_exist = std::filesystem::exists(metadata_base_path); + auto previous_path = std::filesystem::path(getContext()->getPath()) / ".filelog_storage_metadata" / storage.getDatabaseName() / storage.getTableName(); + + /// For compatibility with the previous path version. + if (std::filesystem::exists(previous_path) && !metadata_path_exist) { - /// Create root_meta_path directory when store meta data + std::filesystem::copy(previous_path, metadata_base_path, std::filesystem::copy_options::recursive); + std::filesystem::remove_all(previous_path); + } + /// Meta file may lost, log and create directory + else if (!metadata_path_exist) + { + /// Create metadata_base_path directory when store meta data LOG_ERROR(log, "Metadata files of table {} are lost.", getStorageID().getTableName()); } /// Load all meta info to file_infos; @@ -114,14 +122,14 @@ void StorageFileLog::loadMetaFiles(bool attach) /// Create table, just create meta data directory else { - if (std::filesystem::exists(root_meta_path)) + if (std::filesystem::exists(metadata_base_path)) { throw Exception( ErrorCodes::TABLE_METADATA_ALREADY_EXISTS, "Metadata files already exist by path: {}, remove them manually if it is intended", - root_meta_path); + metadata_base_path); } - /// We do not create the root_meta_path directory at creation time, create it at the moment of serializing + /// We do not create the metadata_base_path directory at creation time, create it at the moment of serializing /// meta files, such that can avoid unnecessarily create this directory if create table failed. } } @@ -212,9 +220,9 @@ void StorageFileLog::loadFiles() void StorageFileLog::serialize() const { - if (!std::filesystem::exists(root_meta_path)) + if (!std::filesystem::exists(metadata_base_path)) { - std::filesystem::create_directories(root_meta_path); + std::filesystem::create_directories(metadata_base_path); } for (const auto & [inode, meta] : file_infos.meta_by_inode) { @@ -236,9 +244,9 @@ void StorageFileLog::serialize() const void StorageFileLog::serialize(UInt64 inode, const FileMeta & file_meta) const { - if (!std::filesystem::exists(root_meta_path)) + if (!std::filesystem::exists(metadata_base_path)) { - std::filesystem::create_directories(root_meta_path); + std::filesystem::create_directories(metadata_base_path); } auto full_name = getFullMetaPath(file_meta.file_name); if (!std::filesystem::exists(full_name)) @@ -257,11 +265,11 @@ void StorageFileLog::serialize(UInt64 inode, const FileMeta & file_meta) const void StorageFileLog::deserialize() { - if (!std::filesystem::exists(root_meta_path)) + if (!std::filesystem::exists(metadata_base_path)) return; /// In case of single file (not a watched directory), /// iterated directory always has one file inside. - for (const auto & dir_entry : std::filesystem::directory_iterator{root_meta_path}) + for (const auto & dir_entry : std::filesystem::directory_iterator{metadata_base_path}) { if (!dir_entry.is_regular_file()) { @@ -269,7 +277,7 @@ void StorageFileLog::deserialize() ErrorCodes::BAD_FILE_TYPE, "The file {} under {} is not a regular file when deserializing meta files", dir_entry.path().c_str(), - root_meta_path); + metadata_base_path); } ReadBufferFromFile in(dir_entry.path().c_str()); @@ -373,8 +381,8 @@ void StorageFileLog::drop() { try { - if (std::filesystem::exists(root_meta_path)) - std::filesystem::remove_all(root_meta_path); + if (std::filesystem::exists(metadata_base_path)) + std::filesystem::remove_all(metadata_base_path); } catch (...) { @@ -802,6 +810,7 @@ void registerStorageFileLog(StorageFactory & factory) args.getContext(), args.columns, path, + args.relative_data_path, format, std::move(filelog_settings), args.comment, @@ -818,6 +827,9 @@ void registerStorageFileLog(StorageFactory & factory) bool StorageFileLog::updateFileInfos() { + if (file_infos.file_names.empty()) + return false; + if (!directory_watch) { /// For table just watch one file, we can not use directory monitor to watch it diff --git a/src/Storages/FileLog/StorageFileLog.h b/src/Storages/FileLog/StorageFileLog.h index 1f5078ab68e..98915f10a05 100644 --- a/src/Storages/FileLog/StorageFileLog.h +++ b/src/Storages/FileLog/StorageFileLog.h @@ -89,7 +89,7 @@ public: auto & getFileInfos() { return file_infos; } - String getFullMetaPath(const String & file_name) const { return std::filesystem::path(root_meta_path) / file_name; } + String getFullMetaPath(const String & file_name) const { return std::filesystem::path(metadata_base_path) / file_name; } String getFullDataPath(const String & file_name) const { return std::filesystem::path(root_data_path) / file_name; } NamesAndTypesList getVirtuals() const override; @@ -131,6 +131,7 @@ protected: ContextPtr context_, const ColumnsDescription & columns_, const String & path_, + const String & metadata_base_path_, const String & format_name_, std::unique_ptr settings, const String & comment, @@ -145,7 +146,7 @@ private: /// If path argument of the table is a regular file, it equals to user_files_path /// otherwise, it equals to user_files_path/ + path_argument/, e.g. path String root_data_path; - String root_meta_path; + String metadata_base_path; FileInfos file_infos; diff --git a/src/Storages/HDFS/ReadBufferFromHDFS.cpp b/src/Storages/HDFS/ReadBufferFromHDFS.cpp index 0ad55162fb2..902307fc828 100644 --- a/src/Storages/HDFS/ReadBufferFromHDFS.cpp +++ b/src/Storages/HDFS/ReadBufferFromHDFS.cpp @@ -184,6 +184,11 @@ off_t ReadBufferFromHDFS::getPosition() return impl->getPosition() - available(); } +size_t ReadBufferFromHDFS::getFileOffsetOfBufferEnd() const +{ + return impl->getPosition(); +} + } #endif diff --git a/src/Storages/HDFS/ReadBufferFromHDFS.h b/src/Storages/HDFS/ReadBufferFromHDFS.h index aa20e20fa48..e8cdcb27360 100644 --- a/src/Storages/HDFS/ReadBufferFromHDFS.h +++ b/src/Storages/HDFS/ReadBufferFromHDFS.h @@ -39,6 +39,8 @@ public: std::optional getTotalSize() override; + size_t getFileOffsetOfBufferEnd() const override; + private: std::unique_ptr impl; }; diff --git a/src/Storages/HDFS/StorageHDFS.h b/src/Storages/HDFS/StorageHDFS.h index c8ad6ffdeaf..99b5ba95d25 100644 --- a/src/Storages/HDFS/StorageHDFS.h +++ b/src/Storages/HDFS/StorageHDFS.h @@ -47,7 +47,7 @@ public: /// Is is useful because column oriented formats could effectively skip unknown columns /// So we can create a header of only required columns in read method and ask /// format to read only them. Note: this hack cannot be done with ordinary formats like TSV. - bool isColumnOriented() const; + bool isColumnOriented() const override; static ColumnsDescription getTableStructureFromData( const String & format, diff --git a/src/Storages/Hive/HiveCommon.cpp b/src/Storages/Hive/HiveCommon.cpp index aa19ff042e2..a9d0c22d6a5 100644 --- a/src/Storages/Hive/HiveCommon.cpp +++ b/src/Storages/Hive/HiveCommon.cpp @@ -1,3 +1,4 @@ +#include #include #if USE_HIVE @@ -5,6 +6,7 @@ #include #include #include +#include namespace DB @@ -15,6 +17,18 @@ namespace ErrorCodes extern const int BAD_ARGUMENTS; } +static const unsigned max_hive_metastore_client_connections = 16; +static const int max_hive_metastore_client_retry = 3; +static const UInt64 get_hive_metastore_client_timeout = 1000000; +static const int hive_metastore_client_conn_timeout_ms = 10000; +static const int hive_metastore_client_recv_timeout_ms = 10000; +static const int hive_metastore_client_send_timeout_ms = 10000; + +ThriftHiveMetastoreClientPool::ThriftHiveMetastoreClientPool(ThriftHiveMetastoreClientBuilder builder_) + : PoolBase(max_hive_metastore_client_connections, &Poco::Logger::get("ThriftHiveMetastoreClientPool")), builder(builder_) +{ +} + bool HiveMetastoreClient::shouldUpdateTableMetadata( const String & db_name, const String & table_name, const std::vector & partitions) { @@ -40,25 +54,42 @@ bool HiveMetastoreClient::shouldUpdateTableMetadata( return false; } +void HiveMetastoreClient::tryCallHiveClient(std::function func) +{ + int i = 0; + String err_msg; + for (; i < max_hive_metastore_client_retry; ++i) + { + auto client = client_pool.get(get_hive_metastore_client_timeout); + try + { + func(client); + } + catch (apache::thrift::transport::TTransportException & e) + { + client.expire(); + err_msg = e.what(); + continue; + } + break; + } + if (i >= max_hive_metastore_client_retry) + throw Exception(ErrorCodes::NO_HIVEMETASTORE, "Hive Metastore expired because {}", err_msg); +} + HiveMetastoreClient::HiveTableMetadataPtr HiveMetastoreClient::getTableMetadata(const String & db_name, const String & table_name) { LOG_TRACE(log, "Get table metadata for {}.{}", db_name, table_name); - std::lock_guard lock{mutex}; auto table = std::make_shared(); std::vector partitions; - try + auto client_call = [&](ThriftHiveMetastoreClientPool::Entry & client) { client->get_table(*table, db_name, table_name); - /// Query the latest partition info to check new change. client->get_partitions(partitions, db_name, table_name, -1); - } - catch (apache::thrift::transport::TTransportException & e) - { - setExpired(); - throw Exception(ErrorCodes::NO_HIVEMETASTORE, "Hive Metastore expired because {}", String(e.what())); - } + }; + tryCallHiveClient(client_call); bool update_cache = shouldUpdateTableMetadata(db_name, table_name, partitions); String cache_key = getCacheKey(db_name, table_name); @@ -103,23 +134,26 @@ HiveMetastoreClient::HiveTableMetadataPtr HiveMetastoreClient::getTableMetadata( return metadata; } +std::shared_ptr HiveMetastoreClient::getHiveTable(const String & db_name, const String & table_name) +{ + auto table = std::make_shared(); + auto client_call = [&](ThriftHiveMetastoreClientPool::Entry & client) + { + client->get_table(*table, db_name, table_name); + }; + tryCallHiveClient(client_call); + return table; +} + void HiveMetastoreClient::clearTableMetadata(const String & db_name, const String & table_name) { String cache_key = getCacheKey(db_name, table_name); - std::lock_guard lock{mutex}; HiveTableMetadataPtr metadata = table_metadata_cache.get(cache_key); if (metadata) table_metadata_cache.remove(cache_key); } -void HiveMetastoreClient::setClient(std::shared_ptr client_) -{ - std::lock_guard lock{mutex}; - client = client_; - clearExpired(); -} - bool HiveMetastoreClient::PartitionInfo::haveSameParameters(const Apache::Hadoop::Hive::Partition & other) const { /// Parameters include keys:numRows,numFiles,rawDataSize,totalSize,transient_lastDdlTime @@ -192,53 +226,52 @@ HiveMetastoreClientFactory & HiveMetastoreClientFactory::instance() return factory; } +using namespace apache::thrift; +using namespace apache::thrift::protocol; +using namespace apache::thrift::transport; +using namespace Apache::Hadoop::Hive; + HiveMetastoreClientPtr HiveMetastoreClientFactory::getOrCreate(const String & name, ContextPtr context) { - using namespace apache::thrift; - using namespace apache::thrift::protocol; - using namespace apache::thrift::transport; - using namespace Apache::Hadoop::Hive; std::lock_guard lock(mutex); auto it = clients.find(name); - if (it == clients.end() || it->second->isExpired()) + if (it == clients.end()) { - /// Connect to hive metastore - Poco::URI hive_metastore_url(name); - const auto & host = hive_metastore_url.getHost(); - auto port = hive_metastore_url.getPort(); - - std::shared_ptr socket = std::make_shared(host, port); - socket->setKeepAlive(true); - socket->setConnTimeout(conn_timeout_ms); - socket->setRecvTimeout(recv_timeout_ms); - socket->setSendTimeout(send_timeout_ms); - std::shared_ptr transport(new TBufferedTransport(socket)); - std::shared_ptr protocol(new TBinaryProtocol(transport)); - std::shared_ptr thrift_client = std::make_shared(protocol); - try + auto builder = [name]() { - transport->open(); - } - catch (TException & tx) - { - throw Exception("connect to hive metastore:" + name + " failed." + tx.what(), ErrorCodes::BAD_ARGUMENTS); - } - - if (it == clients.end()) - { - HiveMetastoreClientPtr client = std::make_shared(std::move(thrift_client), context); - clients[name] = client; - return client; - } - else - { - it->second->setClient(std::move(thrift_client)); - return it->second; - } + return createThriftHiveMetastoreClient(name); + }; + auto client = std::make_shared(builder, context->getGlobalContext()); + clients[name] = client; + return client; } return it->second; } +std::shared_ptr HiveMetastoreClientFactory::createThriftHiveMetastoreClient(const String &name) +{ + Poco::URI hive_metastore_url(name); + const auto & host = hive_metastore_url.getHost(); + auto port = hive_metastore_url.getPort(); + + std::shared_ptr socket = std::make_shared(host, port); + socket->setKeepAlive(true); + socket->setConnTimeout(hive_metastore_client_conn_timeout_ms); + socket->setRecvTimeout(hive_metastore_client_recv_timeout_ms); + socket->setSendTimeout(hive_metastore_client_send_timeout_ms); + std::shared_ptr transport = std::make_shared(socket); + std::shared_ptr protocol = std::make_shared(transport); + std::shared_ptr thrift_client = std::make_shared(protocol); + try + { + transport->open(); + } + catch (TException & tx) + { + throw Exception(ErrorCodes::BAD_ARGUMENTS, "connect to hive metastore: {} failed. {}", name, tx.what()); + } + return thrift_client; +} } #endif diff --git a/src/Storages/Hive/HiveCommon.h b/src/Storages/Hive/HiveCommon.h index e88e67b0257..b8075457a02 100644 --- a/src/Storages/Hive/HiveCommon.h +++ b/src/Storages/Hive/HiveCommon.h @@ -1,5 +1,6 @@ #pragma once +#include #include #if USE_HIVE @@ -10,12 +11,32 @@ #include #include +#include #include namespace DB { +using ThriftHiveMetastoreClientBuilder = std::function()>; + +class ThriftHiveMetastoreClientPool : public PoolBase +{ +public: + using Object = Apache::Hadoop::Hive::ThriftHiveMetastoreClient; + using ObjectPtr = std::shared_ptr; + using Entry = PoolBase::Entry; + explicit ThriftHiveMetastoreClientPool(ThriftHiveMetastoreClientBuilder builder_); + +protected: + ObjectPtr allocObject() override + { + return builder(); + } + +private: + ThriftHiveMetastoreClientBuilder builder; +}; class HiveMetastoreClient : public WithContext { public: @@ -26,7 +47,9 @@ public: UInt64 last_modify_time; /// In ms size_t size; - FileInfo() = default; + explicit FileInfo() = default; + FileInfo & operator = (const FileInfo &) = default; + FileInfo(const FileInfo &) = default; FileInfo(const String & path_, UInt64 last_modify_time_, size_t size_) : path(path_), last_modify_time(last_modify_time_), size(size_) { @@ -94,17 +117,18 @@ public: using HiveTableMetadataPtr = std::shared_ptr; - explicit HiveMetastoreClient(std::shared_ptr client_, ContextPtr context_) - : WithContext(context_), client(client_), table_metadata_cache(1000) + explicit HiveMetastoreClient(ThriftHiveMetastoreClientBuilder builder_, ContextPtr context_) + : WithContext(context_) + , table_metadata_cache(1000) + , client_pool(builder_) { } + HiveTableMetadataPtr getTableMetadata(const String & db_name, const String & table_name); + // Access hive table information by hive client + std::shared_ptr getHiveTable(const String & db_name, const String & table_name); void clearTableMetadata(const String & db_name, const String & table_name); - void setClient(std::shared_ptr client_); - bool isExpired() const { return expired; } - void setExpired() { expired = true; } - void clearExpired() { expired = false; } private: static String getCacheKey(const String & db_name, const String & table_name) { return db_name + "." + table_name; } @@ -112,10 +136,10 @@ private: bool shouldUpdateTableMetadata( const String & db_name, const String & table_name, const std::vector & partitions); - std::shared_ptr client; + void tryCallHiveClient(std::function func); + LRUCache table_metadata_cache; - mutable std::mutex mutex; - std::atomic expired{false}; + ThriftHiveMetastoreClientPool client_pool; Poco::Logger * log = &Poco::Logger::get("HiveMetastoreClient"); }; @@ -128,13 +152,11 @@ public: HiveMetastoreClientPtr getOrCreate(const String & name, ContextPtr context); + static std::shared_ptr createThriftHiveMetastoreClient(const String & name); + private: std::mutex mutex; std::map clients; - - const int conn_timeout_ms = 10000; - const int recv_timeout_ms = 10000; - const int send_timeout_ms = 10000; }; } diff --git a/src/Storages/Hive/HiveFile.cpp b/src/Storages/Hive/HiveFile.cpp index b0cfa9809e1..dffcca61a9c 100644 --- a/src/Storages/Hive/HiveFile.cpp +++ b/src/Storages/Hive/HiveFile.cpp @@ -8,8 +8,10 @@ #include #include #include +#include #include #include +#include #include #include #include diff --git a/src/Storages/Hive/HiveFile.h b/src/Storages/Hive/HiveFile.h index 63cca2562eb..6d2ba29ba0f 100644 --- a/src/Storages/Hive/HiveFile.h +++ b/src/Storages/Hive/HiveFile.h @@ -7,8 +7,6 @@ #include #include -#include -#include #include #include @@ -18,6 +16,8 @@ namespace orc { class Reader; +class Statistics; +class ColumnStatistics; } namespace parquet @@ -36,6 +36,11 @@ namespace io class RandomAccessFile; } +namespace fs +{ + class FileSystem; +} + class Buffer; } diff --git a/src/Storages/Hive/StorageHive.cpp b/src/Storages/Hive/StorageHive.cpp index 3040ad23283..2ae7c30fd5b 100644 --- a/src/Storages/Hive/StorageHive.cpp +++ b/src/Storages/Hive/StorageHive.cpp @@ -116,13 +116,12 @@ public: , compression_method(compression_method_) , max_block_size(max_block_size_) , sample_block(std::move(sample_block_)) - , to_read_block(sample_block) , columns_description(getColumnsDescription(sample_block, source_info)) , text_input_field_names(text_input_field_names_) , format_settings(getFormatSettings(getContext())) { - /// Initialize to_read_block, which is used to read data from HDFS. to_read_block = sample_block; + /// Initialize to_read_block, which is used to read data from HDFS. for (const auto & name_type : source_info->partition_name_types) { to_read_block.erase(name_type.name); @@ -171,9 +170,13 @@ public: size_t buff_size = raw_read_buf->internalBuffer().size(); if (buff_size == 0) buff_size = DBMS_DEFAULT_BUFFER_SIZE; - remote_read_buf = RemoteReadBuffer::create(getContext(), - std::make_shared("Hive", getNameNodeCluster(hdfs_namenode_url), uri_with_path, curr_file->getSize(), curr_file->getLastModTs()), - std::move(raw_read_buf), buff_size); + remote_read_buf = RemoteReadBuffer::create( + getContext(), + std::make_shared( + "Hive", getNameNodeCluster(hdfs_namenode_url), uri_with_path, curr_file->getSize(), curr_file->getLastModTs()), + std::move(raw_read_buf), + buff_size, + format == "Parquet" || format == "ORC"); } else remote_read_buf = std::move(raw_read_buf); @@ -207,11 +210,17 @@ public: /// Enrich with partition columns. auto types = source_info->partition_name_types.getTypes(); + auto names = source_info->partition_name_types.getNames(); + auto fields = source_info->hive_files[current_idx]->getPartitionValues(); for (size_t i = 0; i < types.size(); ++i) { - auto column = types[i]->createColumnConst(num_rows, source_info->hive_files[current_idx]->getPartitionValues()[i]); - auto previous_idx = sample_block.getPositionByName(source_info->partition_name_types.getNames()[i]); - columns.insert(columns.begin() + previous_idx, column->convertToFullColumnIfConst()); + // Only add the required partition columns. partition columns are not read from readbuffer + // the column must be in sample_block, otherwise sample_block.getPositionByName(names[i]) will throw an exception + if (!sample_block.has(names[i])) + continue; + auto column = types[i]->createColumnConst(num_rows, fields[i]); + auto previous_idx = sample_block.getPositionByName(names[i]); + columns.insert(columns.begin() + previous_idx, column); } /// Enrich with virtual columns. @@ -286,14 +295,22 @@ StorageHive::StorageHive( storage_metadata.setConstraints(constraints_); storage_metadata.setComment(comment_); setInMemoryMetadata(storage_metadata); +} + +void StorageHive::lazyInitialize() +{ + std::lock_guard lock{init_mutex}; + if (has_initialized) + return; + auto hive_metastore_client = HiveMetastoreClientFactory::instance().getOrCreate(hive_metastore_url, getContext()); - auto hive_table_metadata = hive_metastore_client->getTableMetadata(hive_database, hive_table); + auto hive_table_metadata = hive_metastore_client->getHiveTable(hive_database, hive_table); - hdfs_namenode_url = getNameNodeUrl(hive_table_metadata->getTable()->sd.location); - table_schema = hive_table_metadata->getTable()->sd.cols; + hdfs_namenode_url = getNameNodeUrl(hive_table_metadata->sd.location); + table_schema = hive_table_metadata->sd.cols; - FileFormat hdfs_file_format = IHiveFile::toFileFormat(hive_table_metadata->getTable()->sd.inputFormat); + FileFormat hdfs_file_format = IHiveFile::toFileFormat(hive_table_metadata->sd.inputFormat); switch (hdfs_file_format) { case FileFormat::TEXT: @@ -331,6 +348,7 @@ StorageHive::StorageHive( } initMinMaxIndexExpression(); + has_initialized = true; } void StorageHive::initMinMaxIndexExpression() @@ -542,7 +560,34 @@ HiveFilePtr StorageHive::createHiveFileIfNeeded( } return hive_file; } +bool StorageHive::isColumnOriented() const +{ + return format_name == "Parquet" || format_name == "ORC"; +} +void StorageHive::getActualColumnsToRead(Block & sample_block, const Block & header_block, const NameSet & partition_columns) const +{ + if (!isColumnOriented()) + sample_block = header_block; + UInt32 erased_columns = 0; + for (const auto & column : partition_columns) + { + if (sample_block.has(column)) + erased_columns++; + } + if (erased_columns == sample_block.columns()) + { + for (size_t i = 0; i < header_block.columns(); ++i) + { + const auto & col = header_block.getByPosition(i); + if (!partition_columns.count(col.name)) + { + sample_block.insert(col); + break; + } + } + } +} Pipe StorageHive::read( const Names & column_names, const StorageMetadataPtr & metadata_snapshot, @@ -552,6 +597,8 @@ Pipe StorageHive::read( size_t max_block_size, unsigned num_streams) { + lazyInitialize(); + HDFSBuilderWrapper builder = createHDFSBuilder(hdfs_namenode_url, context_->getGlobalContext()->getConfigRef()); HDFSFSPtr fs = createHDFSFS(builder.get()); auto hive_metastore_client = HiveMetastoreClientFactory::instance().getOrCreate(hive_metastore_url, getContext()); @@ -606,14 +653,20 @@ Pipe StorageHive::read( sources_info->table_name = hive_table; sources_info->hive_metastore_client = hive_metastore_client; sources_info->partition_name_types = partition_name_types; + + const auto & header_block = metadata_snapshot->getSampleBlock(); + Block sample_block; for (const auto & column : column_names) { + sample_block.insert(header_block.getByName(column)); if (column == "_path") sources_info->need_path_column = true; if (column == "_file") sources_info->need_file_column = true; } + getActualColumnsToRead(sample_block, header_block, NameSet{partition_names.begin(), partition_names.end()}); + if (num_streams > sources_info->hive_files.size()) num_streams = sources_info->hive_files.size(); @@ -625,7 +678,7 @@ Pipe StorageHive::read( hdfs_namenode_url, format_name, compression_method, - metadata_snapshot->getSampleBlock(), + sample_block, context_, max_block_size, text_input_field_names)); diff --git a/src/Storages/Hive/StorageHive.h b/src/Storages/Hive/StorageHive.h index 9629629e057..323293cbbe0 100644 --- a/src/Storages/Hive/StorageHive.h +++ b/src/Storages/Hive/StorageHive.h @@ -36,7 +36,7 @@ public: ContextPtr /* query_context */, const StorageMetadataPtr & /* metadata_snapshot */) const override { - return false; + return true; } @@ -53,6 +53,8 @@ public: NamesAndTypesList getVirtuals() const override; + bool isColumnOriented() const override; + protected: friend class StorageHiveSource; StorageHive( @@ -88,12 +90,17 @@ private: HiveFilePtr createHiveFileIfNeeded(const FileInfo & file_info, const FieldVector & fields, SelectQueryInfo & query_info, ContextPtr context_); + void getActualColumnsToRead(Block & sample_block, const Block & header_block, const NameSet & partition_columns) const; + String hive_metastore_url; /// Hive database and table String hive_database; String hive_table; + std::mutex init_mutex; + bool has_initialized = false; + /// Hive table meta std::vector table_schema; Names text_input_field_names; /// Defines schema of hive file, only used when text input format is TEXT @@ -116,6 +123,8 @@ private: std::shared_ptr storage_settings; Poco::Logger * log = &Poco::Logger::get("StorageHive"); + + void lazyInitialize(); }; } diff --git a/src/Storages/IStorage.h b/src/Storages/IStorage.h index 70dc997eac0..1010164f71e 100644 --- a/src/Storages/IStorage.h +++ b/src/Storages/IStorage.h @@ -562,6 +562,8 @@ public: /// Returns true if all disks of storage are read-only. virtual bool isStaticStorage() const; + virtual bool isColumnOriented() const { return false; } + /// If it is possible to quickly determine exact number of rows in the table at this moment of time, then return it. /// Used for: /// - Simple count() optimization diff --git a/src/Storages/Kafka/StorageKafka.cpp b/src/Storages/Kafka/StorageKafka.cpp index 30acbcdf62b..ae470cdccc9 100644 --- a/src/Storages/Kafka/StorageKafka.cpp +++ b/src/Storages/Kafka/StorageKafka.cpp @@ -405,7 +405,7 @@ ProducerBufferPtr StorageKafka::createWriteBuffer(const Block & header) } -ConsumerBufferPtr StorageKafka::createReadBuffer(const size_t consumer_number) +ConsumerBufferPtr StorageKafka::createReadBuffer(size_t consumer_number) { cppkafka::Configuration conf; diff --git a/src/Storages/Kafka/StorageKafka.h b/src/Storages/Kafka/StorageKafka.h index 62de3e5183d..03e90b1f6c3 100644 --- a/src/Storages/Kafka/StorageKafka.h +++ b/src/Storages/Kafka/StorageKafka.h @@ -120,7 +120,7 @@ private: HandleKafkaErrorMode handle_error_mode; SettingsChanges createSettingsAdjustments(); - ConsumerBufferPtr createReadBuffer(const size_t consumer_number); + ConsumerBufferPtr createReadBuffer(size_t consumer_number); /// If named_collection is specified. String collection_name; diff --git a/src/Storages/LiveView/LiveViewEventsSource.h b/src/Storages/LiveView/LiveViewEventsSource.h index 77ee06c702c..1f9f8bfb785 100644 --- a/src/Storages/LiveView/LiveViewEventsSource.h +++ b/src/Storages/LiveView/LiveViewEventsSource.h @@ -44,7 +44,7 @@ public: : SourceWithProgress({ColumnWithTypeAndName(ColumnUInt64::create(), std::make_shared(), "version")}), storage(std::move(storage_)), blocks_ptr(std::move(blocks_ptr_)), blocks_metadata_ptr(std::move(blocks_metadata_ptr_)), - active_ptr(std::move(active_ptr_)), has_limit(has_limit_), + active_ptr(active_ptr_), has_limit(has_limit_), limit(limit_), heartbeat_interval_usec(heartbeat_interval_sec_ * 1000000) { diff --git a/src/Storages/LiveView/LiveViewSource.h b/src/Storages/LiveView/LiveViewSource.h index ec726359581..8d63890f603 100644 --- a/src/Storages/LiveView/LiveViewSource.h +++ b/src/Storages/LiveView/LiveViewSource.h @@ -26,7 +26,7 @@ public: : SourceWithProgress(storage_->getHeader()) , storage(std::move(storage_)), blocks_ptr(std::move(blocks_ptr_)), blocks_metadata_ptr(std::move(blocks_metadata_ptr_)), - active_ptr(std::move(active_ptr_)), + active_ptr(active_ptr_), has_limit(has_limit_), limit(limit_), heartbeat_interval_usec(heartbeat_interval_sec_ * 1000000) { diff --git a/src/Storages/LiveView/StorageBlocks.h b/src/Storages/LiveView/StorageBlocks.h index b87d3f051d0..01293a1e5d7 100644 --- a/src/Storages/LiveView/StorageBlocks.h +++ b/src/Storages/LiveView/StorageBlocks.h @@ -18,9 +18,9 @@ public: QueryProcessingStage::Enum to_stage_) : IStorage(table_id_), pipes(std::move(pipes_)), to_stage(to_stage_) { - StorageInMemoryMetadata metadata_; - metadata_.setColumns(columns_); - setInMemoryMetadata(metadata_); + StorageInMemoryMetadata storage_metadata; + storage_metadata.setColumns(columns_); + setInMemoryMetadata(storage_metadata); } static StoragePtr createStorage(const StorageID & table_id, const ColumnsDescription & columns, Pipes pipes, QueryProcessingStage::Enum to_stage) diff --git a/src/Storages/LiveView/TemporaryLiveViewCleaner.h b/src/Storages/LiveView/TemporaryLiveViewCleaner.h index 3fe0079a46f..9cc5933eb89 100644 --- a/src/Storages/LiveView/TemporaryLiveViewCleaner.h +++ b/src/Storages/LiveView/TemporaryLiveViewCleaner.h @@ -31,7 +31,7 @@ public: private: friend std::unique_ptr::deleter_type; - TemporaryLiveViewCleaner(ContextMutablePtr global_context_); + explicit TemporaryLiveViewCleaner(ContextMutablePtr global_context_); ~TemporaryLiveViewCleaner(); void backgroundThreadFunc(); diff --git a/src/Storages/MarkCache.h b/src/Storages/MarkCache.h index 06143e954f8..a3f92650426 100644 --- a/src/Storages/MarkCache.h +++ b/src/Storages/MarkCache.h @@ -40,7 +40,7 @@ private: using Base = LRUCache; public: - MarkCache(size_t max_size_in_bytes) + explicit MarkCache(size_t max_size_in_bytes) : Base(max_size_in_bytes) {} /// Calculate key from path to file and offset. diff --git a/src/Storages/MergeTree/ActiveDataPartSet.h b/src/Storages/MergeTree/ActiveDataPartSet.h index 0b747ab83b9..8ab03625d5c 100644 --- a/src/Storages/MergeTree/ActiveDataPartSet.h +++ b/src/Storages/MergeTree/ActiveDataPartSet.h @@ -22,15 +22,14 @@ using Strings = std::vector; class ActiveDataPartSet { public: - ActiveDataPartSet(MergeTreeDataFormatVersion format_version_) : format_version(format_version_) {} + explicit ActiveDataPartSet(MergeTreeDataFormatVersion format_version_) : format_version(format_version_) {} ActiveDataPartSet(MergeTreeDataFormatVersion format_version_, const Strings & names); - ActiveDataPartSet(const ActiveDataPartSet & other) - : format_version(other.format_version) - , part_info_to_name(other.part_info_to_name) - {} + ActiveDataPartSet(const ActiveDataPartSet & other) = default; - ActiveDataPartSet(ActiveDataPartSet && other) noexcept { swap(other); } + ActiveDataPartSet & operator=(const ActiveDataPartSet & other) = default; + + ActiveDataPartSet(ActiveDataPartSet && other) noexcept = default; void swap(ActiveDataPartSet & other) noexcept { @@ -38,16 +37,6 @@ public: std::swap(part_info_to_name, other.part_info_to_name); } - ActiveDataPartSet & operator=(const ActiveDataPartSet & other) - { - if (&other != this) - { - ActiveDataPartSet tmp(other); - swap(tmp); - } - return *this; - } - /// Returns true if the part was actually added. If out_replaced_parts != nullptr, it will contain /// parts that were replaced from the set by the newly added part. bool add(const String & name, Strings * out_replaced_parts = nullptr); diff --git a/src/Storages/MergeTree/AllMergeSelector.cpp b/src/Storages/MergeTree/AllMergeSelector.cpp index 79080df1570..5e406c6e4f7 100644 --- a/src/Storages/MergeTree/AllMergeSelector.cpp +++ b/src/Storages/MergeTree/AllMergeSelector.cpp @@ -8,7 +8,7 @@ namespace DB AllMergeSelector::PartsRange AllMergeSelector::select( const PartsRanges & parts_ranges, - const size_t /*max_total_size_to_merge*/) + size_t /*max_total_size_to_merge*/) { size_t min_partition_size = 0; PartsRanges::const_iterator best_partition; diff --git a/src/Storages/MergeTree/AllMergeSelector.h b/src/Storages/MergeTree/AllMergeSelector.h index d3b399b2fc5..6cd3bb6f3fa 100644 --- a/src/Storages/MergeTree/AllMergeSelector.h +++ b/src/Storages/MergeTree/AllMergeSelector.h @@ -13,7 +13,7 @@ public: /// Parameter max_total_size_to_merge is ignored. PartsRange select( const PartsRanges & parts_ranges, - const size_t max_total_size_to_merge) override; + size_t max_total_size_to_merge) override; }; } diff --git a/src/Storages/MergeTree/BackgroundProcessList.h b/src/Storages/MergeTree/BackgroundProcessList.h index 81aded5e45c..baf3e281257 100644 --- a/src/Storages/MergeTree/BackgroundProcessList.h +++ b/src/Storages/MergeTree/BackgroundProcessList.h @@ -26,7 +26,7 @@ public: BackgroundProcessListEntry(const BackgroundProcessListEntry &) = delete; BackgroundProcessListEntry & operator=(const BackgroundProcessListEntry &) = delete; - BackgroundProcessListEntry(BackgroundProcessListEntry &&) = default; + BackgroundProcessListEntry(BackgroundProcessListEntry &&) noexcept = default; BackgroundProcessListEntry(BackgroundProcessList & list_, const typename container_t::iterator it_, const CurrentMetrics::Metric & metric) : list(list_), it{it_}, metric_increment{metric} diff --git a/src/Storages/MergeTree/BoolMask.h b/src/Storages/MergeTree/BoolMask.h index c26a0ed6c58..11f9238aa28 100644 --- a/src/Storages/MergeTree/BoolMask.h +++ b/src/Storages/MergeTree/BoolMask.h @@ -6,7 +6,7 @@ struct BoolMask bool can_be_true = false; bool can_be_false = false; - BoolMask() {} + BoolMask() = default; BoolMask(bool can_be_true_, bool can_be_false_) : can_be_true(can_be_true_), can_be_false(can_be_false_) {} BoolMask operator &(const BoolMask & m) const diff --git a/src/Storages/MergeTree/ColumnSizeEstimator.h b/src/Storages/MergeTree/ColumnSizeEstimator.h index 61c0ac64dbd..597dc80e525 100644 --- a/src/Storages/MergeTree/ColumnSizeEstimator.h +++ b/src/Storages/MergeTree/ColumnSizeEstimator.h @@ -1,6 +1,7 @@ #pragma once -#include "Storages/MergeTree/IMergeTreeDataPart.h" +#include +#include namespace DB diff --git a/src/Storages/MergeTree/DataPartsExchange.cpp b/src/Storages/MergeTree/DataPartsExchange.cpp index 19d990d7c2d..4e7dcc60696 100644 --- a/src/Storages/MergeTree/DataPartsExchange.cpp +++ b/src/Storages/MergeTree/DataPartsExchange.cpp @@ -13,7 +13,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/src/Storages/MergeTree/KeyCondition.cpp b/src/Storages/MergeTree/KeyCondition.cpp index 323b59e2902..c17eb5a981e 100644 --- a/src/Storages/MergeTree/KeyCondition.cpp +++ b/src/Storages/MergeTree/KeyCondition.cpp @@ -448,7 +448,7 @@ KeyCondition::KeyCondition( { for (size_t i = 0, size = key_column_names.size(); i < size; ++i) { - std::string name = key_column_names[i]; + const auto & name = key_column_names[i]; if (!key_columns.count(name)) key_columns[name] = i; } @@ -1999,7 +1999,7 @@ BoolMask KeyCondition::checkInHyperrectangle( if (!element.set_index) throw Exception("Set for IN is not created yet", ErrorCodes::LOGICAL_ERROR); - rpn_stack.emplace_back(element.set_index->checkInRange(hyperrectangle, data_types)); + rpn_stack.emplace_back(element.set_index->checkInRange(hyperrectangle, data_types, single_point)); if (element.function == RPNElement::FUNCTION_NOT_IN_SET) rpn_stack.back() = !rpn_stack.back(); } diff --git a/src/Storages/MergeTree/KeyCondition.h b/src/Storages/MergeTree/KeyCondition.h index dee46ae52ce..afe4a9f3e20 100644 --- a/src/Storages/MergeTree/KeyCondition.h +++ b/src/Storages/MergeTree/KeyCondition.h @@ -31,7 +31,7 @@ struct FieldRef : public Field /// Create as explicit field without block. template - FieldRef(T && value) : Field(std::forward(value)) {} + FieldRef(T && value) : Field(std::forward(value)) {} /// NOLINT /// Create as reference to field in block. FieldRef(ColumnsWithTypeAndName * columns_, size_t row_idx_, size_t column_idx_) @@ -60,10 +60,10 @@ public: bool right_included = false; /// includes the right border /// The whole universe (not null). - Range() {} + Range() {} /// NOLINT /// One point. - Range(const FieldRef & point) + Range(const FieldRef & point) /// NOLINT : left(point), right(point), left_included(true), right_included(true) {} /// A bounded two-sided range. @@ -313,8 +313,8 @@ private: ALWAYS_TRUE, }; - RPNElement() {} - RPNElement(Function function_) : function(function_) {} + RPNElement() = default; + RPNElement(Function function_) : function(function_) {} /// NOLINT RPNElement(Function function_, size_t key_column_) : function(function_), key_column(key_column_) {} RPNElement(Function function_, size_t key_column_, const Range & range_) : function(function_), range(range_), key_column(key_column_) {} diff --git a/src/Storages/MergeTree/LeaderElection.h b/src/Storages/MergeTree/LeaderElection.h index b05026d52f9..6d3281c8c61 100644 --- a/src/Storages/MergeTree/LeaderElection.h +++ b/src/Storages/MergeTree/LeaderElection.h @@ -1,11 +1,13 @@ #pragma once +#include #include #include #include #include #include +namespace fs = std::filesystem; namespace zkutil { diff --git a/src/Storages/MergeTree/LevelMergeSelector.cpp b/src/Storages/MergeTree/LevelMergeSelector.cpp index 7bcfbf6160a..16947277463 100644 --- a/src/Storages/MergeTree/LevelMergeSelector.cpp +++ b/src/Storages/MergeTree/LevelMergeSelector.cpp @@ -105,7 +105,7 @@ void selectWithinPartition( LevelMergeSelector::PartsRange LevelMergeSelector::select( const PartsRanges & parts_ranges, - const size_t max_total_size_to_merge) + size_t max_total_size_to_merge) { Estimator estimator; diff --git a/src/Storages/MergeTree/LevelMergeSelector.h b/src/Storages/MergeTree/LevelMergeSelector.h index 5849b34e320..f4080c379c4 100644 --- a/src/Storages/MergeTree/LevelMergeSelector.h +++ b/src/Storages/MergeTree/LevelMergeSelector.h @@ -21,7 +21,7 @@ public: PartsRange select( const PartsRanges & parts_ranges, - const size_t max_total_size_to_merge) override; + size_t max_total_size_to_merge) override; private: const Settings settings; diff --git a/src/Storages/MergeTree/MergeSelector.h b/src/Storages/MergeTree/MergeSelector.h index aac805823a9..c55f738f879 100644 --- a/src/Storages/MergeTree/MergeSelector.h +++ b/src/Storages/MergeTree/MergeSelector.h @@ -63,7 +63,7 @@ public: */ virtual PartsRange select( const PartsRanges & parts_ranges, - const size_t max_total_size_to_merge) = 0; + size_t max_total_size_to_merge) = 0; virtual ~IMergeSelector() = default; }; diff --git a/src/Storages/MergeTree/MergeTask.cpp b/src/Storages/MergeTree/MergeTask.cpp index 89fb27cc89c..8b5c2e0dc6e 100644 --- a/src/Storages/MergeTree/MergeTask.cpp +++ b/src/Storages/MergeTree/MergeTask.cpp @@ -126,13 +126,9 @@ bool MergeTask::ExecuteAndFinalizeHorizontalPart::prepare() if (ctx->disk->exists(local_new_part_tmp_path)) throw Exception("Directory " + fullPath(ctx->disk, local_new_part_tmp_path) + " already exists", ErrorCodes::DIRECTORY_ALREADY_EXISTS); - { - std::lock_guard lock(global_ctx->mutator->tmp_parts_lock); - global_ctx->mutator->tmp_parts.emplace(local_tmp_part_basename); - } + global_ctx->data->temporary_parts.add(local_tmp_part_basename); SCOPE_EXIT( - std::lock_guard lock(global_ctx->mutator->tmp_parts_lock); - global_ctx->mutator->tmp_parts.erase(local_tmp_part_basename); + global_ctx->data->temporary_parts.remove(local_tmp_part_basename); ); global_ctx->all_column_names = global_ctx->metadata_snapshot->getColumns().getNamesOfPhysical(); diff --git a/src/Storages/MergeTree/MergeTreeBaseSelectProcessor.h b/src/Storages/MergeTree/MergeTreeBaseSelectProcessor.h index c462c34aa83..2e906ecfce0 100644 --- a/src/Storages/MergeTree/MergeTreeBaseSelectProcessor.h +++ b/src/Storages/MergeTree/MergeTreeBaseSelectProcessor.h @@ -86,7 +86,6 @@ protected: void initializeRangeReaders(MergeTreeReadTask & task); -protected: const MergeTreeData & storage; StorageMetadataPtr metadata_snapshot; diff --git a/src/Storages/MergeTree/MergeTreeBlockReadUtils.cpp b/src/Storages/MergeTree/MergeTreeBlockReadUtils.cpp index 07d51d25700..dadccd2f9dc 100644 --- a/src/Storages/MergeTree/MergeTreeBlockReadUtils.cpp +++ b/src/Storages/MergeTree/MergeTreeBlockReadUtils.cpp @@ -118,9 +118,9 @@ NameSet injectRequiredColumns(const MergeTreeData & storage, const StorageMetada MergeTreeReadTask::MergeTreeReadTask( - const MergeTreeData::DataPartPtr & data_part_, const MarkRanges & mark_ranges_, const size_t part_index_in_query_, + const MergeTreeData::DataPartPtr & data_part_, const MarkRanges & mark_ranges_, size_t part_index_in_query_, const Names & ordered_names_, const NameSet & column_name_set_, const NamesAndTypesList & columns_, - const NamesAndTypesList & pre_columns_, const bool remove_prewhere_column_, const bool should_reorder_, + const NamesAndTypesList & pre_columns_, bool remove_prewhere_column_, bool should_reorder_, MergeTreeBlockSizePredictorPtr && size_predictor_) : data_part{data_part_}, mark_ranges{mark_ranges_}, part_index_in_query{part_index_in_query_}, ordered_names{ordered_names_}, column_name_set{column_name_set_}, columns{columns_}, pre_columns{pre_columns_}, diff --git a/src/Storages/MergeTree/MergeTreeBlockReadUtils.h b/src/Storages/MergeTree/MergeTreeBlockReadUtils.h index b931a13c027..1f70ca72f39 100644 --- a/src/Storages/MergeTree/MergeTreeBlockReadUtils.h +++ b/src/Storages/MergeTree/MergeTreeBlockReadUtils.h @@ -55,9 +55,9 @@ struct MergeTreeReadTask bool isFinished() const { return mark_ranges.empty() && range_reader.isCurrentRangeFinished(); } MergeTreeReadTask( - const MergeTreeData::DataPartPtr & data_part_, const MarkRanges & mark_ranges_, const size_t part_index_in_query_, + const MergeTreeData::DataPartPtr & data_part_, const MarkRanges & mark_ranges_, size_t part_index_in_query_, const Names & ordered_names_, const NameSet & column_name_set_, const NamesAndTypesList & columns_, - const NamesAndTypesList & pre_columns_, const bool remove_prewhere_column_, const bool should_reorder_, + const NamesAndTypesList & pre_columns_, bool remove_prewhere_column_, bool should_reorder_, MergeTreeBlockSizePredictorPtr && size_predictor_); }; @@ -86,7 +86,7 @@ struct MergeTreeBlockSizePredictor void startBlock(); /// Updates statistic for more accurate prediction - void update(const Block & sample_block, const Columns & columns, size_t num_rows, double decay = DECAY()); + void update(const Block & sample_block, const Columns & columns, size_t num_rows, double decay = calculateDecay()); /// Return current block size (after update()) inline size_t getBlockSize() const @@ -112,7 +112,7 @@ struct MergeTreeBlockSizePredictor : 0; } - inline void updateFilteredRowsRation(size_t rows_was_read, size_t rows_was_filtered, double decay = DECAY()) + inline void updateFilteredRowsRation(size_t rows_was_read, size_t rows_was_filtered, double decay = calculateDecay()) { double alpha = std::pow(1. - decay, rows_was_read); double current_ration = rows_was_filtered / std::max(1.0, static_cast(rows_was_read)); @@ -125,7 +125,7 @@ struct MergeTreeBlockSizePredictor /// After n=NUM_UPDATES_TO_TARGET_WEIGHT updates v_{n} = (1 - TARGET_WEIGHT) * v_{0} + TARGET_WEIGHT * v_{target} static constexpr double TARGET_WEIGHT = 0.5; static constexpr size_t NUM_UPDATES_TO_TARGET_WEIGHT = 8192; - static double DECAY() { return 1. - std::pow(TARGET_WEIGHT, 1. / NUM_UPDATES_TO_TARGET_WEIGHT); } + static double calculateDecay() { return 1. - std::pow(TARGET_WEIGHT, 1. / NUM_UPDATES_TO_TARGET_WEIGHT); } protected: diff --git a/src/Storages/MergeTree/MergeTreeData.cpp b/src/Storages/MergeTree/MergeTreeData.cpp index 7bfff3b5088..7f407199e81 100644 --- a/src/Storages/MergeTree/MergeTreeData.cpp +++ b/src/Storages/MergeTree/MergeTreeData.cpp @@ -1386,7 +1386,7 @@ static bool isOldPartDirectory(const DiskPtr & disk, const String & directory_pa } -size_t MergeTreeData::clearOldTemporaryDirectories(const MergeTreeDataMergerMutator & merger_mutator, size_t custom_directories_lifetime_seconds) +size_t MergeTreeData::clearOldTemporaryDirectories(size_t custom_directories_lifetime_seconds) { /// If the method is already called from another thread, then we don't need to do anything. std::unique_lock lock(clear_old_temporary_directories_mutex, std::defer_lock); @@ -1418,9 +1418,9 @@ size_t MergeTreeData::clearOldTemporaryDirectories(const MergeTreeDataMergerMuta { if (disk->isDirectory(it->path()) && isOldPartDirectory(disk, it->path(), deadline)) { - if (merger_mutator.hasTemporaryPart(basename)) + if (temporary_parts.contains(basename)) { - LOG_WARNING(log, "{} is an active destination for one of merge/mutation (consider increasing temporary_directories_lifetime setting)", full_path); + LOG_WARNING(log, "{} is in use (by merge/mutation/INSERT) (consider increasing temporary_directories_lifetime setting)", full_path); continue; } else @@ -3663,7 +3663,7 @@ RestoreDataTasks MergeTreeData::restoreDataPartsFromBackup(const BackupPtr & bac Strings part_names = backup->listFiles(data_path_in_backup); for (const String & part_name : part_names) { - const auto part_info = MergeTreePartInfo::tryParsePartName(part_name, format_version); + auto part_info = MergeTreePartInfo::tryParsePartName(part_name, format_version); if (!part_info) continue; @@ -4745,7 +4745,7 @@ std::optional MergeTreeData::getQueryProcessingStageWithAgg query_options, /* prepared_sets_= */ query_info.sets); const auto & analysis_result = select.getAnalysisResult(); - query_info.sets = std::move(select.getQueryAnalyzer()->getPreparedSets()); + query_info.sets = select.getQueryAnalyzer()->getPreparedSets(); bool can_use_aggregate_projection = true; /// If the first stage of the query pipeline is more complex than Aggregating - Expression - Filter - ReadFromStorage, @@ -5540,6 +5540,7 @@ try if (result_part) { + part_log_elem.disk_name = result_part->volume->getDisk()->getName(); part_log_elem.path_on_disk = result_part->getFullPath(); part_log_elem.bytes_compressed_on_disk = result_part->getBytesOnDisk(); part_log_elem.rows = result_part->rows_count; diff --git a/src/Storages/MergeTree/MergeTreeData.h b/src/Storages/MergeTree/MergeTreeData.h index 1a04b2a389b..d1c48b19985 100644 --- a/src/Storages/MergeTree/MergeTreeData.h +++ b/src/Storages/MergeTree/MergeTreeData.h @@ -3,30 +3,31 @@ #include #include #include -#include -#include -#include -#include -#include -#include -#include #include #include #include #include #include #include +#include +#include +#include +#include +#include +#include #include #include -#include #include #include #include -#include -#include +#include +#include +#include +#include #include #include -#include +#include +#include #include @@ -243,7 +244,7 @@ public: class Transaction : private boost::noncopyable { public: - Transaction(MergeTreeData & data_) : data(data_) {} + explicit Transaction(MergeTreeData & data_) : data(data_) {} DataPartsVector commit(MergeTreeData::DataPartsLock * acquired_parts_lock = nullptr); @@ -566,7 +567,7 @@ public: /// Delete all directories which names begin with "tmp" /// Must be called with locked lockForShare() because it's using relative_data_path. - size_t clearOldTemporaryDirectories(const MergeTreeDataMergerMutator & merger_mutator, size_t custom_directories_lifetime_seconds); + size_t clearOldTemporaryDirectories(size_t custom_directories_lifetime_seconds); size_t clearEmptyParts(); @@ -876,7 +877,7 @@ public: /// Lock part in zookeeper for shared data in several nodes /// Overridden in StorageReplicatedMergeTree - virtual void lockSharedData(const IMergeTreeDataPart &, bool = false) const {} + virtual void lockSharedData(const IMergeTreeDataPart &, bool = false) const {} /// NOLINT /// Unlock shared data part in zookeeper /// Overridden in StorageReplicatedMergeTree @@ -906,7 +907,6 @@ public: mutable std::mutex currently_submerging_emerging_mutex; protected: - friend class IMergeTreeDataPart; friend class MergeTreeDataMergerMutator; friend struct ReplicatedMergeTreeTableMetadata; @@ -1200,6 +1200,8 @@ private: /// Create zero-copy exclusive lock for part and disk. Useful for coordination of /// distributed operations which can lead to data duplication. Implemented only in ReplicatedMergeTree. virtual std::optional tryCreateZeroCopyExclusiveLock(const String &, const DiskPtr &) { return std::nullopt; } + + TemporaryParts temporary_parts; }; /// RAII struct to record big parts that are submerging or emerging. diff --git a/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp b/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp index 2f097b69fc4..a6cda0016a8 100644 --- a/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp +++ b/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp @@ -782,10 +782,4 @@ ExecuteTTLType MergeTreeDataMergerMutator::shouldExecuteTTL(const StorageMetadat } -bool MergeTreeDataMergerMutator::hasTemporaryPart(const std::string & basename) const -{ - std::lock_guard lock(tmp_parts_lock); - return tmp_parts.contains(basename); -} - } diff --git a/src/Storages/MergeTree/MergeTreeDataMergerMutator.h b/src/Storages/MergeTree/MergeTreeDataMergerMutator.h index 82cad873dce..e64c13ca6c3 100644 --- a/src/Storages/MergeTree/MergeTreeDataMergerMutator.h +++ b/src/Storages/MergeTree/MergeTreeDataMergerMutator.h @@ -176,7 +176,6 @@ private: bool need_remove_expired_values, const MergeTreeData::MergingParams & merging_params) const; -private: MergeTreeData & data; const size_t max_tasks_count; @@ -192,26 +191,6 @@ private: ITTLMergeSelector::PartitionIdToTTLs next_recompress_ttl_merge_times_by_partition; /// Performing TTL merges independently for each partition guarantees that /// there is only a limited number of TTL merges and no partition stores data, that is too stale - -public: - /// Returns true if passed part name is active. - /// (is the destination for one of active mutation/merge). - /// - /// NOTE: that it accept basename (i.e. dirname), not the path, - /// since later requires canonical form. - bool hasTemporaryPart(const std::string & basename) const; - -private: - /// Set of active temporary paths that is used as the destination. - /// List of such paths is required to avoid trying to remove them during cleanup. - /// - /// NOTE: It is pretty short, so use STL is fine. - std::unordered_set tmp_parts; - /// Lock for "tmp_parts". - /// - /// NOTE: mutable is required to mark hasTemporaryPath() const - mutable std::mutex tmp_parts_lock; - }; diff --git a/src/Storages/MergeTree/MergeTreeDataPartChecksum.cpp b/src/Storages/MergeTree/MergeTreeDataPartChecksum.cpp index 1df97dc9241..737e89979a6 100644 --- a/src/Storages/MergeTree/MergeTreeDataPartChecksum.cpp +++ b/src/Storages/MergeTree/MergeTreeDataPartChecksum.cpp @@ -231,8 +231,10 @@ void MergeTreeDataPartChecksums::addFile(const String & file_name, UInt64 file_s void MergeTreeDataPartChecksums::add(MergeTreeDataPartChecksums && rhs_checksums) { - for (auto & checksum : rhs_checksums.files) - files[std::move(checksum.first)] = std::move(checksum.second); + for (auto && checksum : rhs_checksums.files) + { + files[checksum.first] = std::move(checksum.second); + } rhs_checksums.files.clear(); } diff --git a/src/Storages/MergeTree/MergeTreeDataPartChecksum.h b/src/Storages/MergeTree/MergeTreeDataPartChecksum.h index 06f1fb06f25..15acb88aa0f 100644 --- a/src/Storages/MergeTree/MergeTreeDataPartChecksum.h +++ b/src/Storages/MergeTree/MergeTreeDataPartChecksum.h @@ -26,7 +26,7 @@ struct MergeTreeDataPartChecksum UInt64 uncompressed_size {}; uint128 uncompressed_hash {}; - MergeTreeDataPartChecksum() {} + MergeTreeDataPartChecksum() = default; MergeTreeDataPartChecksum(UInt64 file_size_, uint128 file_hash_) : file_size(file_size_), file_hash(file_hash_) {} MergeTreeDataPartChecksum(UInt64 file_size_, uint128 file_hash_, UInt64 uncompressed_size_, uint128 uncompressed_hash_) : file_size(file_size_), file_hash(file_hash_), is_compressed(true), diff --git a/src/Storages/MergeTree/MergeTreeDataPartType.h b/src/Storages/MergeTree/MergeTreeDataPartType.h index fecd9d00cdc..7cf23c7a045 100644 --- a/src/Storages/MergeTree/MergeTreeDataPartType.h +++ b/src/Storages/MergeTree/MergeTreeDataPartType.h @@ -25,7 +25,7 @@ public: }; MergeTreeDataPartType() : value(UNKNOWN) {} - MergeTreeDataPartType(Value value_) : value(value_) {} + MergeTreeDataPartType(Value value_) : value(value_) {} /// NOLINT bool operator==(const MergeTreeDataPartType & other) const { diff --git a/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp b/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp index eee072cf726..092ca717964 100644 --- a/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp +++ b/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp @@ -822,7 +822,7 @@ RangesInDataParts MergeTreeDataSelectExecutor::filterPartsByPrimaryKeyAndSkipInd { auto [it, inserted] = merged_indices.try_emplace({index_helper->index.type, index_helper->getGranularity()}); if (inserted) - it->second.condition = index_helper->createIndexMergedCondtition(query_info, metadata_snapshot); + it->second.condition = index_helper->createIndexMergedCondition(query_info, metadata_snapshot); it->second.addIndex(index_helper); } diff --git a/src/Storages/MergeTree/MergeTreeIndexConditionBloomFilter.cpp b/src/Storages/MergeTree/MergeTreeIndexConditionBloomFilter.cpp index 2057dec957e..7b194de8103 100644 --- a/src/Storages/MergeTree/MergeTreeIndexConditionBloomFilter.cpp +++ b/src/Storages/MergeTree/MergeTreeIndexConditionBloomFilter.cpp @@ -552,7 +552,7 @@ bool MergeTreeIndexConditionBloomFilter::traverseASTEquals( for (const auto & f : value_field.get()) { - if ((f.isNull() && !is_nullable) || f.IsDecimal(f.getType())) + if ((f.isNull() && !is_nullable) || f.isDecimal(f.getType())) return false; mutable_column->insert(convertFieldToType(f, *actual_type, value_type.get())); diff --git a/src/Storages/MergeTree/MergeTreeIndexConditionBloomFilter.h b/src/Storages/MergeTree/MergeTreeIndexConditionBloomFilter.h index 5c6559ba298..27fd701c67b 100644 --- a/src/Storages/MergeTree/MergeTreeIndexConditionBloomFilter.h +++ b/src/Storages/MergeTree/MergeTreeIndexConditionBloomFilter.h @@ -38,7 +38,7 @@ public: ALWAYS_TRUE, }; - RPNElement(Function function_ = FUNCTION_UNKNOWN) : function(function_) {} + RPNElement(Function function_ = FUNCTION_UNKNOWN) : function(function_) {} /// NOLINT Function function = FUNCTION_UNKNOWN; std::vector> predicate; diff --git a/src/Storages/MergeTree/MergeTreeIndexFullText.cpp b/src/Storages/MergeTree/MergeTreeIndexFullText.cpp index f87584c9cd6..5ecb7b537e2 100644 --- a/src/Storages/MergeTree/MergeTreeIndexFullText.cpp +++ b/src/Storages/MergeTree/MergeTreeIndexFullText.cpp @@ -710,9 +710,14 @@ void bloomFilterIndexValidator(const IndexDescription & index, bool /*attach*/) const auto & array_type = assert_cast(*index_data_type); data_type = WhichDataType(array_type.getNestedType()); } + else if (data_type.isLowCarnality()) + { + const auto & low_cardinality = assert_cast(*index_data_type); + data_type = WhichDataType(low_cardinality.getDictionaryType()); + } if (!data_type.isString() && !data_type.isFixedString()) - throw Exception("Bloom filter index can be used only with `String`, `FixedString` column or Array with `String` or `FixedString` values column.", ErrorCodes::INCORRECT_QUERY); + throw Exception("Bloom filter index can be used only with `String`, `FixedString`, `LowCardinality(String)`, `LowCardinality(FixedString)` column or Array with `String` or `FixedString` values column.", ErrorCodes::INCORRECT_QUERY); } if (index.type == NgramTokenExtractor::getName()) diff --git a/src/Storages/MergeTree/MergeTreeIndexFullText.h b/src/Storages/MergeTree/MergeTreeIndexFullText.h index 1826719df0b..5f5956553dc 100644 --- a/src/Storages/MergeTree/MergeTreeIndexFullText.h +++ b/src/Storages/MergeTree/MergeTreeIndexFullText.h @@ -102,7 +102,7 @@ private: ALWAYS_TRUE, }; - RPNElement( + RPNElement( /// NOLINT Function function_ = FUNCTION_UNKNOWN, size_t key_column_ = 0, std::unique_ptr && const_bloom_filter_ = nullptr) : function(function_), key_column(key_column_), bloom_filter(std::move(const_bloom_filter_)) {} diff --git a/src/Storages/MergeTree/MergeTreeIndexHypothesis.cpp b/src/Storages/MergeTree/MergeTreeIndexHypothesis.cpp index 30995a162dc..088029d9e8e 100644 --- a/src/Storages/MergeTree/MergeTreeIndexHypothesis.cpp +++ b/src/Storages/MergeTree/MergeTreeIndexHypothesis.cpp @@ -18,7 +18,7 @@ MergeTreeIndexGranuleHypothesis::MergeTreeIndexGranuleHypothesis(const String & { } -MergeTreeIndexGranuleHypothesis::MergeTreeIndexGranuleHypothesis(const String & index_name_, const bool met_) +MergeTreeIndexGranuleHypothesis::MergeTreeIndexGranuleHypothesis(const String & index_name_, bool met_) : index_name(index_name_), is_empty(false), met(met_) { } @@ -84,7 +84,7 @@ MergeTreeIndexConditionPtr MergeTreeIndexHypothesis::createIndexCondition( throw Exception("Not supported", ErrorCodes::LOGICAL_ERROR); } -MergeTreeIndexMergedConditionPtr MergeTreeIndexHypothesis::createIndexMergedCondtition( +MergeTreeIndexMergedConditionPtr MergeTreeIndexHypothesis::createIndexMergedCondition( const SelectQueryInfo & query_info, StorageMetadataPtr storage_metadata) const { return std::make_shared( diff --git a/src/Storages/MergeTree/MergeTreeIndexHypothesis.h b/src/Storages/MergeTree/MergeTreeIndexHypothesis.h index bbdf70a052c..578bb6f3f7a 100644 --- a/src/Storages/MergeTree/MergeTreeIndexHypothesis.h +++ b/src/Storages/MergeTree/MergeTreeIndexHypothesis.h @@ -16,7 +16,7 @@ struct MergeTreeIndexGranuleHypothesis : public IMergeTreeIndexGranule MergeTreeIndexGranuleHypothesis( const String & index_name_, - const bool met_); + bool met_); void serializeBinary(WriteBuffer & ostr) const override; void deserializeBinary(ReadBuffer & istr, MergeTreeIndexVersion version) override; @@ -55,7 +55,7 @@ private: class MergeTreeIndexHypothesis : public IMergeTreeIndex { public: - MergeTreeIndexHypothesis( + explicit MergeTreeIndexHypothesis( const IndexDescription & index_) : IMergeTreeIndex(index_) {} @@ -70,7 +70,7 @@ public: MergeTreeIndexConditionPtr createIndexCondition( const SelectQueryInfo & query, ContextPtr context) const override; - MergeTreeIndexMergedConditionPtr createIndexMergedCondtition( + MergeTreeIndexMergedConditionPtr createIndexMergedCondition( const SelectQueryInfo & query_info, StorageMetadataPtr storage_metadata) const override; bool mayBenefitFromIndexForIn(const ASTPtr & node) const override; diff --git a/src/Storages/MergeTree/MergeTreeIndexMinMax.h b/src/Storages/MergeTree/MergeTreeIndexMinMax.h index 0e05e25fb36..9f78c86a498 100644 --- a/src/Storages/MergeTree/MergeTreeIndexMinMax.h +++ b/src/Storages/MergeTree/MergeTreeIndexMinMax.h @@ -68,7 +68,7 @@ private: class MergeTreeIndexMinMax : public IMergeTreeIndex { public: - MergeTreeIndexMinMax(const IndexDescription & index_) + explicit MergeTreeIndexMinMax(const IndexDescription & index_) : IMergeTreeIndex(index_) {} @@ -83,7 +83,7 @@ public: bool mayBenefitFromIndexForIn(const ASTPtr & node) const override; const char* getSerializedFileExtension() const override { return ".idx2"; } - MergeTreeIndexFormat getDeserializedFormat(const DiskPtr disk, const std::string & path_prefix) const override; + MergeTreeIndexFormat getDeserializedFormat(const DiskPtr disk, const std::string & path_prefix) const override; /// NOLINT }; } diff --git a/src/Storages/MergeTree/MergeTreeIndices.h b/src/Storages/MergeTree/MergeTreeIndices.h index 1e001d01ada..984a2bb7762 100644 --- a/src/Storages/MergeTree/MergeTreeIndices.h +++ b/src/Storages/MergeTree/MergeTreeIndices.h @@ -29,7 +29,7 @@ struct MergeTreeIndexFormat MergeTreeIndexVersion version; const char* extension; - operator bool() const { return version != 0; } + operator bool() const { return version != 0; } /// NOLINT }; /// Stores some info about a single block of data. @@ -122,7 +122,7 @@ using MergeTreeIndexMergedConditions = std::vector &, String * reason)>; public: - MergeTreePartsMover(MergeTreeData * data_) + explicit MergeTreePartsMover(MergeTreeData * data_) : data(data_) , log(&Poco::Logger::get("MergeTreePartsMover")) { @@ -59,7 +59,6 @@ public: /// merge or mutation. void swapClonedPart(const std::shared_ptr & cloned_parts) const; -public: /// Can stop background moves and moves from queries ActionBlocker moves_blocker; diff --git a/src/Storages/MergeTree/MergeTreeRangeReader.cpp b/src/Storages/MergeTree/MergeTreeRangeReader.cpp index e22f662960c..d8dba458203 100644 --- a/src/Storages/MergeTree/MergeTreeRangeReader.cpp +++ b/src/Storages/MergeTree/MergeTreeRangeReader.cpp @@ -707,7 +707,7 @@ MergeTreeRangeReader::ReadResult MergeTreeRangeReader::read(size_t max_rows, Mar { auto old_columns = block_before_prewhere.getColumns(); filterColumns(old_columns, read_result.getFilterOriginal()->getData()); - block_before_prewhere.setColumns(std::move(old_columns)); + block_before_prewhere.setColumns(old_columns); } for (auto & column : block_before_prewhere) diff --git a/src/Storages/MergeTree/MergeTreeReadPool.cpp b/src/Storages/MergeTree/MergeTreeReadPool.cpp index c89affb5365..3c31ffa7c97 100644 --- a/src/Storages/MergeTree/MergeTreeReadPool.cpp +++ b/src/Storages/MergeTree/MergeTreeReadPool.cpp @@ -18,9 +18,9 @@ namespace ErrorCodes namespace DB { MergeTreeReadPool::MergeTreeReadPool( - const size_t threads_, - const size_t sum_marks_, - const size_t min_marks_for_concurrent_read_, + size_t threads_, + size_t sum_marks_, + size_t min_marks_for_concurrent_read_, RangesInDataParts && parts_, const MergeTreeData & data_, const StorageMetadataPtr & metadata_snapshot_, @@ -28,7 +28,7 @@ MergeTreeReadPool::MergeTreeReadPool( const Names & column_names_, const BackoffSettings & backoff_settings_, size_t preferred_block_size_bytes_, - const bool do_not_steal_tasks_) + bool do_not_steal_tasks_) : backoff_settings{backoff_settings_} , backoff_state{threads_} , data{data_} @@ -45,7 +45,7 @@ MergeTreeReadPool::MergeTreeReadPool( } -MergeTreeReadTaskPtr MergeTreeReadPool::getTask(const size_t min_marks_to_read, const size_t thread, const Names & ordered_names) +MergeTreeReadTaskPtr MergeTreeReadPool::getTask(size_t min_marks_to_read, size_t thread, const Names & ordered_names) { const std::lock_guard lock{mutex}; @@ -149,7 +149,7 @@ Block MergeTreeReadPool::getHeader() const return metadata_snapshot->getSampleBlockForColumns(column_names, data.getVirtuals(), data.getStorageID()); } -void MergeTreeReadPool::profileFeedback(const ReadBufferFromFileBase::ProfileInfo info) +void MergeTreeReadPool::profileFeedback(ReadBufferFromFileBase::ProfileInfo info) { if (backoff_settings.min_read_latency_ms == 0 || do_not_steal_tasks) return; @@ -232,8 +232,8 @@ std::vector MergeTreeReadPool::fillPerPartInfo(const RangesInDataParts & void MergeTreeReadPool::fillPerThreadInfo( - const size_t threads, const size_t sum_marks, std::vector per_part_sum_marks, - const RangesInDataParts & parts, const size_t min_marks_for_concurrent_read) + size_t threads, size_t sum_marks, std::vector per_part_sum_marks, + const RangesInDataParts & parts, size_t min_marks_for_concurrent_read) { threads_tasks.resize(threads); if (parts.empty()) diff --git a/src/Storages/MergeTree/MergeTreeReadPool.h b/src/Storages/MergeTree/MergeTreeReadPool.h index aac4d5016a2..4ab4393ef5a 100644 --- a/src/Storages/MergeTree/MergeTreeReadPool.h +++ b/src/Storages/MergeTree/MergeTreeReadPool.h @@ -40,7 +40,7 @@ public: size_t min_concurrency = 1; /// Constants above is just an example. - BackoffSettings(const Settings & settings) + explicit BackoffSettings(const Settings & settings) : min_read_latency_ms(settings.read_backoff_min_latency_ms.totalMilliseconds()), max_throughput(settings.read_backoff_max_throughput), min_interval_between_events_ms(settings.read_backoff_min_interval_between_events_ms.totalMilliseconds()), @@ -63,27 +63,27 @@ private: Stopwatch time_since_prev_event {CLOCK_MONOTONIC_COARSE}; size_t num_events = 0; - BackoffState(size_t threads) : current_threads(threads) {} + explicit BackoffState(size_t threads) : current_threads(threads) {} }; BackoffState backoff_state; public: MergeTreeReadPool( - const size_t threads_, const size_t sum_marks_, const size_t min_marks_for_concurrent_read_, + size_t threads_, size_t sum_marks_, size_t min_marks_for_concurrent_read_, RangesInDataParts && parts_, const MergeTreeData & data_, const StorageMetadataPtr & metadata_snapshot_, const PrewhereInfoPtr & prewhere_info_, const Names & column_names_, const BackoffSettings & backoff_settings_, size_t preferred_block_size_bytes_, - const bool do_not_steal_tasks_ = false); + bool do_not_steal_tasks_ = false); - MergeTreeReadTaskPtr getTask(const size_t min_marks_to_read, const size_t thread, const Names & ordered_names); + MergeTreeReadTaskPtr getTask(size_t min_marks_to_read, size_t thread, const Names & ordered_names); /** Each worker could call this method and pass information about read performance. * If read performance is too low, pool could decide to lower number of threads: do not assign more tasks to several threads. * This allows to overcome excessive load to disk subsystem, when reads are not from page cache. */ - void profileFeedback(const ReadBufferFromFileBase::ProfileInfo info); + void profileFeedback(ReadBufferFromFileBase::ProfileInfo info); Block getHeader() const; @@ -91,8 +91,8 @@ private: std::vector fillPerPartInfo(const RangesInDataParts & parts); void fillPerThreadInfo( - const size_t threads, const size_t sum_marks, std::vector per_part_sum_marks, - const RangesInDataParts & parts, const size_t min_marks_for_concurrent_read); + size_t threads, size_t sum_marks, std::vector per_part_sum_marks, + const RangesInDataParts & parts, size_t min_marks_for_concurrent_read); const MergeTreeData & data; StorageMetadataPtr metadata_snapshot; diff --git a/src/Storages/MergeTree/MergeTreeReaderCompact.cpp b/src/Storages/MergeTree/MergeTreeReaderCompact.cpp index 726b2141ffb..2e17611cd93 100644 --- a/src/Storages/MergeTree/MergeTreeReaderCompact.cpp +++ b/src/Storages/MergeTree/MergeTreeReaderCompact.cpp @@ -26,14 +26,14 @@ MergeTreeReaderCompact::MergeTreeReaderCompact( const ReadBufferFromFileBase::ProfileCallback & profile_callback_, clockid_t clock_type_) : IMergeTreeReader( - std::move(data_part_), - std::move(columns_), + data_part_, + columns_, metadata_snapshot_, uncompressed_cache_, mark_cache_, - std::move(mark_ranges_), - std::move(settings_), - std::move(avg_value_size_hints_)) + mark_ranges_, + settings_, + avg_value_size_hints_) , marks_loader( data_part->volume->getDisk(), mark_cache, diff --git a/src/Storages/MergeTree/MergeTreeReaderInMemory.cpp b/src/Storages/MergeTree/MergeTreeReaderInMemory.cpp index 8a69183e858..9599e3ee82c 100644 --- a/src/Storages/MergeTree/MergeTreeReaderInMemory.cpp +++ b/src/Storages/MergeTree/MergeTreeReaderInMemory.cpp @@ -20,9 +20,15 @@ MergeTreeReaderInMemory::MergeTreeReaderInMemory( const StorageMetadataPtr & metadata_snapshot_, MarkRanges mark_ranges_, MergeTreeReaderSettings settings_) - : IMergeTreeReader(data_part_, std::move(columns_), metadata_snapshot_, - nullptr, nullptr, std::move(mark_ranges_), - std::move(settings_), {}) + : IMergeTreeReader( + data_part_, + columns_, + metadata_snapshot_, + nullptr, + nullptr, + mark_ranges_, + settings_, + {}) , part_in_memory(std::move(data_part_)) { for (const auto & name_and_type : columns) diff --git a/src/Storages/MergeTree/MergeTreeReaderWide.cpp b/src/Storages/MergeTree/MergeTreeReaderWide.cpp index 99a36a8e08a..7d7975e0bc0 100644 --- a/src/Storages/MergeTree/MergeTreeReaderWide.cpp +++ b/src/Storages/MergeTree/MergeTreeReaderWide.cpp @@ -36,14 +36,14 @@ MergeTreeReaderWide::MergeTreeReaderWide( const ReadBufferFromFileBase::ProfileCallback & profile_callback_, clockid_t clock_type_) : IMergeTreeReader( - std::move(data_part_), - std::move(columns_), + data_part_, + columns_, metadata_snapshot_, uncompressed_cache_, - std::move(mark_cache_), - std::move(mark_ranges_), - std::move(settings_), - std::move(avg_value_size_hints_)) + mark_cache_, + mark_ranges_, + settings_, + avg_value_size_hints_) { try { diff --git a/src/Storages/MergeTree/MergeTreeSequentialSource.h b/src/Storages/MergeTree/MergeTreeSequentialSource.h index 7eefdd9335b..a7405140c6d 100644 --- a/src/Storages/MergeTree/MergeTreeSequentialSource.h +++ b/src/Storages/MergeTree/MergeTreeSequentialSource.h @@ -58,7 +58,6 @@ private: /// current row at which we stop reading size_t current_row = 0; -private: /// Closes readers and unlock part locks void finish(); }; diff --git a/src/Storages/MergeTree/MergeTreeSettings.cpp b/src/Storages/MergeTree/MergeTreeSettings.cpp index b120c230005..c1cc3b6ed3c 100644 --- a/src/Storages/MergeTree/MergeTreeSettings.cpp +++ b/src/Storages/MergeTree/MergeTreeSettings.cpp @@ -122,7 +122,7 @@ void MergeTreeSettings::sanityCheck(const Settings & query_settings) const { throw Exception( ErrorCodes::BAD_ARGUMENTS, - "min_bytes_to_rebalance_partition_over_jbod: {} is lower than specified max_bytes_to_merge_at_max_space_in_pool / 150: {}", + "min_bytes_to_rebalance_partition_over_jbod: {} is lower than specified max_bytes_to_merge_at_max_space_in_pool / 1024: {}", min_bytes_to_rebalance_partition_over_jbod, max_bytes_to_merge_at_max_space_in_pool / 1024); } diff --git a/src/Storages/MergeTree/MergeTreeSettings.h b/src/Storages/MergeTree/MergeTreeSettings.h index 6861599a1ac..3eb59b25562 100644 --- a/src/Storages/MergeTree/MergeTreeSettings.h +++ b/src/Storages/MergeTree/MergeTreeSettings.h @@ -150,6 +150,7 @@ struct Settings; M(UInt64, replicated_max_parallel_fetches_for_table, 0, "Obsolete setting, does nothing.", 0) \ M(Bool, write_final_mark, true, "Obsolete setting, does nothing.", 0) /// Settings that should not change after the creation of a table. + /// NOLINTNEXTLINE #define APPLY_FOR_IMMUTABLE_MERGE_TREE_SETTINGS(M) \ M(index_granularity) diff --git a/src/Storages/MergeTree/MergeTreeThreadSelectProcessor.cpp b/src/Storages/MergeTree/MergeTreeThreadSelectProcessor.cpp index 6a44da06f1f..145d292138a 100644 --- a/src/Storages/MergeTree/MergeTreeThreadSelectProcessor.cpp +++ b/src/Storages/MergeTree/MergeTreeThreadSelectProcessor.cpp @@ -13,15 +13,15 @@ namespace ErrorCodes } MergeTreeThreadSelectProcessor::MergeTreeThreadSelectProcessor( - const size_t thread_, + size_t thread_, const MergeTreeReadPoolPtr & pool_, - const size_t min_marks_to_read_, - const UInt64 max_block_size_rows_, + size_t min_marks_to_read_, + UInt64 max_block_size_rows_, size_t preferred_block_size_bytes_, size_t preferred_max_column_in_block_size_bytes_, const MergeTreeData & storage_, const StorageMetadataPtr & metadata_snapshot_, - const bool use_uncompressed_cache_, + bool use_uncompressed_cache_, const PrewhereInfoPtr & prewhere_info_, ExpressionActionsSettings actions_settings, const MergeTreeReaderSettings & reader_settings_, diff --git a/src/Storages/MergeTree/MergeTreeThreadSelectProcessor.h b/src/Storages/MergeTree/MergeTreeThreadSelectProcessor.h index 110c4fa34e6..ae25ca2a88a 100644 --- a/src/Storages/MergeTree/MergeTreeThreadSelectProcessor.h +++ b/src/Storages/MergeTree/MergeTreeThreadSelectProcessor.h @@ -15,15 +15,15 @@ class MergeTreeThreadSelectProcessor final : public MergeTreeBaseSelectProcessor { public: MergeTreeThreadSelectProcessor( - const size_t thread_, + size_t thread_, const std::shared_ptr & pool_, - const size_t min_marks_to_read_, - const UInt64 max_block_size_, + size_t min_marks_to_read_, + UInt64 max_block_size_, size_t preferred_block_size_bytes_, size_t preferred_max_column_in_block_size_bytes_, const MergeTreeData & storage_, const StorageMetadataPtr & metadata_snapshot_, - const bool use_uncompressed_cache_, + bool use_uncompressed_cache_, const PrewhereInfoPtr & prewhere_info_, ExpressionActionsSettings actions_settings, const MergeTreeReaderSettings & reader_settings_, diff --git a/src/Storages/MergeTree/MergeTreeWhereOptimizer.h b/src/Storages/MergeTree/MergeTreeWhereOptimizer.h index 4aa7aa532a8..fa14fea94d1 100644 --- a/src/Storages/MergeTree/MergeTreeWhereOptimizer.h +++ b/src/Storages/MergeTree/MergeTreeWhereOptimizer.h @@ -79,8 +79,6 @@ private: /// Transform Conditions list to WHERE or PREWHERE expression. static ASTPtr reconstruct(const Conditions & conditions); - void optimizeConjunction(ASTSelectQuery & select, ASTFunction * const fun) const; - void optimizeArbitrary(ASTSelectQuery & select) const; UInt64 getIdentifiersColumnSize(const NameSet & identifiers) const; diff --git a/src/Storages/MergeTree/MergedBlockOutputStream.cpp b/src/Storages/MergeTree/MergedBlockOutputStream.cpp index b525285979e..f94c89e20bd 100644 --- a/src/Storages/MergeTree/MergedBlockOutputStream.cpp +++ b/src/Storages/MergeTree/MergedBlockOutputStream.cpp @@ -105,8 +105,8 @@ MergedBlockOutputStream::Finalizer::~Finalizer() } } -MergedBlockOutputStream::Finalizer::Finalizer(Finalizer &&) = default; -MergedBlockOutputStream::Finalizer & MergedBlockOutputStream::Finalizer::operator=(Finalizer &&) = default; +MergedBlockOutputStream::Finalizer::Finalizer(Finalizer &&) noexcept = default; +MergedBlockOutputStream::Finalizer & MergedBlockOutputStream::Finalizer::operator=(Finalizer &&) noexcept = default; MergedBlockOutputStream::Finalizer::Finalizer(std::unique_ptr impl_) : impl(std::move(impl_)) {} void MergedBlockOutputStream::finalizePart( diff --git a/src/Storages/MergeTree/MergedBlockOutputStream.h b/src/Storages/MergeTree/MergedBlockOutputStream.h index b38395d56c2..c17cfd22cd8 100644 --- a/src/Storages/MergeTree/MergedBlockOutputStream.h +++ b/src/Storages/MergeTree/MergedBlockOutputStream.h @@ -42,8 +42,8 @@ public: explicit Finalizer(std::unique_ptr impl_); ~Finalizer(); - Finalizer(Finalizer &&); - Finalizer & operator=(Finalizer &&); + Finalizer(Finalizer &&) noexcept; + Finalizer & operator=(Finalizer &&) noexcept; void finish(); }; diff --git a/src/Storages/MergeTree/MutatePlainMergeTreeTask.cpp b/src/Storages/MergeTree/MutatePlainMergeTreeTask.cpp index 8f254b4790e..e3fa07dd0c0 100644 --- a/src/Storages/MergeTree/MutatePlainMergeTreeTask.cpp +++ b/src/Storages/MergeTree/MutatePlainMergeTreeTask.cpp @@ -94,6 +94,7 @@ bool MutatePlainMergeTreeTask::executeStep() { storage.updateMutationEntriesErrors(future_part, false, getCurrentExceptionMessage(false)); write_part_log(ExecutionStatus::fromCurrentException()); + tryLogCurrentException(__PRETTY_FUNCTION__); return false; } } diff --git a/src/Storages/MergeTree/PartMovesBetweenShardsOrchestrator.h b/src/Storages/MergeTree/PartMovesBetweenShardsOrchestrator.h index 14ef91c0777..07c5c55d873 100644 --- a/src/Storages/MergeTree/PartMovesBetweenShardsOrchestrator.h +++ b/src/Storages/MergeTree/PartMovesBetweenShardsOrchestrator.h @@ -63,7 +63,7 @@ public: }; EntryState(): value(TODO) {} - EntryState(Value value_): value(value_) {} + EntryState(Value value_): value(value_) {} /// NOLINT Value value; @@ -173,7 +173,6 @@ private: void removePins(const Entry & entry, zkutil::ZooKeeperPtr zk); void syncStateFromZK(); -private: StorageReplicatedMergeTree & storage; String zookeeper_path; diff --git a/src/Storages/MergeTree/RPNBuilder.h b/src/Storages/MergeTree/RPNBuilder.h index d63781db67d..183808c9290 100644 --- a/src/Storages/MergeTree/RPNBuilder.h +++ b/src/Storages/MergeTree/RPNBuilder.h @@ -22,8 +22,8 @@ public: using AtomFromASTFunc = std::function< bool(const ASTPtr & node, ContextPtr context, Block & block_with_constants, RPNElement & out)>; - RPNBuilder(const SelectQueryInfo & query_info, ContextPtr context_, const AtomFromASTFunc & atomFromAST_) - : WithContext(context_), atomFromAST(atomFromAST_) + RPNBuilder(const SelectQueryInfo & query_info, ContextPtr context_, const AtomFromASTFunc & atom_from_ast_) + : WithContext(context_), atom_from_ast(atom_from_ast_) { /** Evaluation of expressions that depend only on constants. * For the index to be used, if it is written, for example `WHERE Date = toDate(now())`. @@ -79,7 +79,7 @@ private: } } - if (!atomFromAST(node, getContext(), block_with_constants, element)) + if (!atom_from_ast(node, getContext(), block_with_constants, element)) { element.function = RPNElement::FUNCTION_UNKNOWN; } @@ -114,7 +114,7 @@ private: return true; } - const AtomFromASTFunc & atomFromAST; + const AtomFromASTFunc & atom_from_ast; Block block_with_constants; RPN rpn; }; diff --git a/src/Storages/MergeTree/ReplicatedMergeTreeAltersSequence.h b/src/Storages/MergeTree/ReplicatedMergeTreeAltersSequence.h index e5d3dd0a737..aa58e16a716 100644 --- a/src/Storages/MergeTree/ReplicatedMergeTreeAltersSequence.h +++ b/src/Storages/MergeTree/ReplicatedMergeTreeAltersSequence.h @@ -29,7 +29,6 @@ private: bool data_finished = false; }; -private: /// alter_version -> AlterState. std::map queue_state; diff --git a/src/Storages/MergeTree/ReplicatedMergeTreeCleanupThread.cpp b/src/Storages/MergeTree/ReplicatedMergeTreeCleanupThread.cpp index 26bfd951d3d..3b6c727cd02 100644 --- a/src/Storages/MergeTree/ReplicatedMergeTreeCleanupThread.cpp +++ b/src/Storages/MergeTree/ReplicatedMergeTreeCleanupThread.cpp @@ -64,7 +64,7 @@ void ReplicatedMergeTreeCleanupThread::iterate() /// Both use relative_data_path which changes during rename, so we /// do it under share lock storage.clearOldWriteAheadLogs(); - storage.clearOldTemporaryDirectories(storage.merger_mutator, storage.getSettings()->temporary_directories_lifetime.totalSeconds()); + storage.clearOldTemporaryDirectories(storage.getSettings()->temporary_directories_lifetime.totalSeconds()); } /// This is loose condition: no problem if we actually had lost leadership at this moment diff --git a/src/Storages/MergeTree/ReplicatedMergeTreeCleanupThread.h b/src/Storages/MergeTree/ReplicatedMergeTreeCleanupThread.h index 509b52ec07f..861de620926 100644 --- a/src/Storages/MergeTree/ReplicatedMergeTreeCleanupThread.h +++ b/src/Storages/MergeTree/ReplicatedMergeTreeCleanupThread.h @@ -24,7 +24,7 @@ class StorageReplicatedMergeTree; class ReplicatedMergeTreeCleanupThread { public: - ReplicatedMergeTreeCleanupThread(StorageReplicatedMergeTree & storage_); + explicit ReplicatedMergeTreeCleanupThread(StorageReplicatedMergeTree & storage_); void start() { task->activateAndSchedule(); } diff --git a/src/Storages/MergeTree/ReplicatedMergeTreeMergeStrategyPicker.h b/src/Storages/MergeTree/ReplicatedMergeTreeMergeStrategyPicker.h index 70eacbee102..91f5824f8fc 100644 --- a/src/Storages/MergeTree/ReplicatedMergeTreeMergeStrategyPicker.h +++ b/src/Storages/MergeTree/ReplicatedMergeTreeMergeStrategyPicker.h @@ -42,7 +42,7 @@ struct ReplicatedMergeTreeLogEntryData; class ReplicatedMergeTreeMergeStrategyPicker: public boost::noncopyable { public: - ReplicatedMergeTreeMergeStrategyPicker(StorageReplicatedMergeTree & storage_); + explicit ReplicatedMergeTreeMergeStrategyPicker(StorageReplicatedMergeTree & storage_); /// triggers refreshing the cached state (list of replicas etc.) /// used when we get new merge event from the zookeeper queue ( see queueUpdatingTask() etc ) diff --git a/src/Storages/MergeTree/ReplicatedMergeTreePartCheckThread.h b/src/Storages/MergeTree/ReplicatedMergeTreePartCheckThread.h index a8ce4fedd6d..7c2c2401bf0 100644 --- a/src/Storages/MergeTree/ReplicatedMergeTreePartCheckThread.h +++ b/src/Storages/MergeTree/ReplicatedMergeTreePartCheckThread.h @@ -30,7 +30,7 @@ class StorageReplicatedMergeTree; class ReplicatedMergeTreePartCheckThread { public: - ReplicatedMergeTreePartCheckThread(StorageReplicatedMergeTree & storage_); + explicit ReplicatedMergeTreePartCheckThread(StorageReplicatedMergeTree & storage_); ~ReplicatedMergeTreePartCheckThread(); /// Processing of the queue to be checked is done in the background thread, which you must first start. @@ -42,12 +42,12 @@ public: { ReplicatedMergeTreePartCheckThread * parent; - TemporarilyStop(ReplicatedMergeTreePartCheckThread * parent_) : parent(parent_) + explicit TemporarilyStop(ReplicatedMergeTreePartCheckThread * parent_) : parent(parent_) { parent->stop(); } - TemporarilyStop(TemporarilyStop && old) : parent(old.parent) + TemporarilyStop(TemporarilyStop && old) noexcept : parent(old.parent) { old.parent = nullptr; } diff --git a/src/Storages/MergeTree/ReplicatedMergeTreeQueue.cpp b/src/Storages/MergeTree/ReplicatedMergeTreeQueue.cpp index f13bf0c8c56..5f805c39ae2 100644 --- a/src/Storages/MergeTree/ReplicatedMergeTreeQueue.cpp +++ b/src/Storages/MergeTree/ReplicatedMergeTreeQueue.cpp @@ -971,7 +971,7 @@ ReplicatedMergeTreeQueue::StringSet ReplicatedMergeTreeQueue::moveSiblingPartsFo return parts_for_merge; } -bool ReplicatedMergeTreeQueue::checkReplaceRangeCanBeRemoved(const MergeTreePartInfo & part_info, const LogEntryPtr entry_ptr, const ReplicatedMergeTreeLogEntryData & current) const +bool ReplicatedMergeTreeQueue::checkReplaceRangeCanBeRemoved(const MergeTreePartInfo & part_info, LogEntryPtr entry_ptr, const ReplicatedMergeTreeLogEntryData & current) const { if (entry_ptr->type != LogEntry::REPLACE_RANGE) return false; @@ -1515,7 +1515,7 @@ ReplicatedMergeTreeQueue::SelectedEntryPtr ReplicatedMergeTreeQueue::selectEntry bool ReplicatedMergeTreeQueue::processEntry( std::function get_zookeeper, LogEntryPtr & entry, - const std::function func) + std::function func) { std::exception_ptr saved_exception; diff --git a/src/Storages/MergeTree/ReplicatedMergeTreeQueue.h b/src/Storages/MergeTree/ReplicatedMergeTreeQueue.h index 208ce73e5f1..1d10c504b3c 100644 --- a/src/Storages/MergeTree/ReplicatedMergeTreeQueue.h +++ b/src/Storages/MergeTree/ReplicatedMergeTreeQueue.h @@ -184,7 +184,7 @@ private: /// Check that entry_ptr is REPLACE_RANGE entry and can be removed from queue because current entry covers it bool checkReplaceRangeCanBeRemoved( - const MergeTreePartInfo & part_info, const LogEntryPtr entry_ptr, const ReplicatedMergeTreeLogEntryData & current) const; + const MergeTreePartInfo & part_info, LogEntryPtr entry_ptr, const ReplicatedMergeTreeLogEntryData & current) const; /// Ensures that only one thread is simultaneously updating mutations. std::mutex update_mutations_mutex; @@ -366,7 +366,7 @@ public: * If there was an exception during processing, it saves it in `entry`. * Returns true if there were no exceptions during the processing. */ - bool processEntry(std::function get_zookeeper, LogEntryPtr & entry, const std::function func); + bool processEntry(std::function get_zookeeper, LogEntryPtr & entry, std::function func); /// Count the number of merges and mutations of single parts in the queue. OperationsInQueue countMergesAndPartMutations() const; diff --git a/src/Storages/MergeTree/ReplicatedMergeTreeQuorumEntry.h b/src/Storages/MergeTree/ReplicatedMergeTreeQuorumEntry.h index f560850a6c6..4cdcc936e21 100644 --- a/src/Storages/MergeTree/ReplicatedMergeTreeQuorumEntry.h +++ b/src/Storages/MergeTree/ReplicatedMergeTreeQuorumEntry.h @@ -23,8 +23,8 @@ struct ReplicatedMergeTreeQuorumEntry size_t required_number_of_replicas{}; std::set replicas; - ReplicatedMergeTreeQuorumEntry() {} - ReplicatedMergeTreeQuorumEntry(const String & str) + ReplicatedMergeTreeQuorumEntry() = default; + explicit ReplicatedMergeTreeQuorumEntry(const String & str) { fromString(str); } diff --git a/src/Storages/MergeTree/ReplicatedMergeTreeRestartingThread.h b/src/Storages/MergeTree/ReplicatedMergeTreeRestartingThread.h index e62cff4baf6..99e56ffb366 100644 --- a/src/Storages/MergeTree/ReplicatedMergeTreeRestartingThread.h +++ b/src/Storages/MergeTree/ReplicatedMergeTreeRestartingThread.h @@ -22,7 +22,7 @@ class StorageReplicatedMergeTree; class ReplicatedMergeTreeRestartingThread { public: - ReplicatedMergeTreeRestartingThread(StorageReplicatedMergeTree & storage_); + explicit ReplicatedMergeTreeRestartingThread(StorageReplicatedMergeTree & storage_); void start() { task->activateAndSchedule(); } diff --git a/src/Storages/MergeTree/SimpleMergeSelector.cpp b/src/Storages/MergeTree/SimpleMergeSelector.cpp index 0775e021c76..434d44022df 100644 --- a/src/Storages/MergeTree/SimpleMergeSelector.cpp +++ b/src/Storages/MergeTree/SimpleMergeSelector.cpp @@ -202,7 +202,7 @@ void selectWithinPartition( SimpleMergeSelector::PartsRange SimpleMergeSelector::select( const PartsRanges & parts_ranges, - const size_t max_total_size_to_merge) + size_t max_total_size_to_merge) { Estimator estimator; diff --git a/src/Storages/MergeTree/SimpleMergeSelector.h b/src/Storages/MergeTree/SimpleMergeSelector.h index 3e104d1319a..11ffe8b672a 100644 --- a/src/Storages/MergeTree/SimpleMergeSelector.h +++ b/src/Storages/MergeTree/SimpleMergeSelector.h @@ -152,7 +152,7 @@ public: PartsRange select( const PartsRanges & parts_ranges, - const size_t max_total_size_to_merge) override; + size_t max_total_size_to_merge) override; private: const Settings settings; diff --git a/src/Storages/MergeTree/StorageFromMergeTreeDataPart.h b/src/Storages/MergeTree/StorageFromMergeTreeDataPart.h index 729b545e9a0..1dc1bd1eca4 100644 --- a/src/Storages/MergeTree/StorageFromMergeTreeDataPart.h +++ b/src/Storages/MergeTree/StorageFromMergeTreeDataPart.h @@ -80,7 +80,7 @@ public: protected: /// Used in part mutation. - StorageFromMergeTreeDataPart(const MergeTreeData::DataPartPtr & part_) + explicit StorageFromMergeTreeDataPart(const MergeTreeData::DataPartPtr & part_) : IStorage(getIDFromPart(part_)) , parts({part_}) , storage(part_->storage) diff --git a/src/Storages/MergeTree/TTLMergeSelector.cpp b/src/Storages/MergeTree/TTLMergeSelector.cpp index 6a42ce039ac..d5657aa680d 100644 --- a/src/Storages/MergeTree/TTLMergeSelector.cpp +++ b/src/Storages/MergeTree/TTLMergeSelector.cpp @@ -18,7 +18,7 @@ const String & getPartitionIdForPart(const ITTLMergeSelector::Part & part_info) IMergeSelector::PartsRange ITTLMergeSelector::select( const PartsRanges & parts_ranges, - const size_t max_total_size_to_merge) + size_t max_total_size_to_merge) { using Iterator = IMergeSelector::PartsRange::const_iterator; Iterator best_begin; diff --git a/src/Storages/MergeTree/TTLMergeSelector.h b/src/Storages/MergeTree/TTLMergeSelector.h index d41ba6f519d..88dc1fffee2 100644 --- a/src/Storages/MergeTree/TTLMergeSelector.h +++ b/src/Storages/MergeTree/TTLMergeSelector.h @@ -30,7 +30,7 @@ public: PartsRange select( const PartsRanges & parts_ranges, - const size_t max_total_size_to_merge) override; + size_t max_total_size_to_merge) override; /// Get TTL value for part, may depend on child type and some settings in /// constructor. diff --git a/src/Storages/MergeTree/TemporaryParts.cpp b/src/Storages/MergeTree/TemporaryParts.cpp new file mode 100644 index 00000000000..4239c8232e5 --- /dev/null +++ b/src/Storages/MergeTree/TemporaryParts.cpp @@ -0,0 +1,24 @@ +#include + +namespace DB +{ + +bool TemporaryParts::contains(const std::string & basename) const +{ + std::lock_guard lock(mutex); + return parts.contains(basename); +} + +void TemporaryParts::add(std::string basename) +{ + std::lock_guard lock(mutex); + parts.emplace(std::move(basename)); +} + +void TemporaryParts::remove(const std::string & basename) +{ + std::lock_guard lock(mutex); + parts.erase(basename); +} + +} diff --git a/src/Storages/MergeTree/TemporaryParts.h b/src/Storages/MergeTree/TemporaryParts.h new file mode 100644 index 00000000000..bc9d270856f --- /dev/null +++ b/src/Storages/MergeTree/TemporaryParts.h @@ -0,0 +1,33 @@ +#pragma once + +#include +#include +#include +#include + +namespace DB +{ + +/// Manages set of active temporary paths that should not be cleaned by background thread. +class TemporaryParts : private boost::noncopyable +{ +private: + /// To add const qualifier for contains() + mutable std::mutex mutex; + + /// NOTE: It is pretty short, so use STL is fine. + std::unordered_set parts; + +public: + /// Returns true if passed part name is active. + /// (is the destination for one of active mutation/merge). + /// + /// NOTE: that it accept basename (i.e. dirname), not the path, + /// since later requires canonical form. + bool contains(const std::string & basename) const; + + void add(std::string basename); + void remove(const std::string & basename); +}; + +} diff --git a/src/Storages/MergeTree/checkDataPart.cpp b/src/Storages/MergeTree/checkDataPart.cpp index 075e9e9fbc8..e5c21ed8d3d 100644 --- a/src/Storages/MergeTree/checkDataPart.cpp +++ b/src/Storages/MergeTree/checkDataPart.cpp @@ -98,7 +98,8 @@ IMergeTreeDataPart::Checksums checkDataPart( }; }; - SerializationInfoByName serialization_infos(columns_txt, {}); + auto ratio_of_defaults = data_part->storage.getSettings()->ratio_of_defaults_for_sparse_serialization; + SerializationInfoByName serialization_infos(columns_txt, SerializationInfo::Settings{ratio_of_defaults, false}); auto serialization_path = path + IMergeTreeDataPart::SERIALIZATION_FILE_NAME; if (disk->exists(serialization_path)) diff --git a/src/Storages/PostgreSQL/MaterializedPostgreSQLConsumer.cpp b/src/Storages/PostgreSQL/MaterializedPostgreSQLConsumer.cpp index db040584536..5b963a544c8 100644 --- a/src/Storages/PostgreSQL/MaterializedPostgreSQLConsumer.cpp +++ b/src/Storages/PostgreSQL/MaterializedPostgreSQLConsumer.cpp @@ -645,6 +645,10 @@ void MaterializedPostgreSQLConsumer::addNested( assert(!storages.contains(postgres_table_name)); storages.emplace(postgres_table_name, nested_storage_info); + auto it = deleted_tables.find(postgres_table_name); + if (it != deleted_tables.end()) + deleted_tables.erase(it); + /// Replication consumer will read wall and check for currently processed table whether it is allowed to start applying /// changes to this table. waiting_list[postgres_table_name] = table_start_lsn; @@ -663,7 +667,9 @@ void MaterializedPostgreSQLConsumer::updateNested(const String & table_name, Sto void MaterializedPostgreSQLConsumer::removeNested(const String & postgres_table_name) { - storages.erase(postgres_table_name); + auto it = storages.find(postgres_table_name); + if (it != storages.end()) + storages.erase(it); deleted_tables.insert(postgres_table_name); } @@ -727,6 +733,7 @@ bool MaterializedPostgreSQLConsumer::readFromReplicationSlot() { if (e.code() == ErrorCodes::POSTGRESQL_REPLICATION_INTERNAL_ERROR) continue; + throw; } } diff --git a/src/Storages/PostgreSQL/MaterializedPostgreSQLConsumer.h b/src/Storages/PostgreSQL/MaterializedPostgreSQLConsumer.h index f37cb3bffef..a01f9394190 100644 --- a/src/Storages/PostgreSQL/MaterializedPostgreSQLConsumer.h +++ b/src/Storages/PostgreSQL/MaterializedPostgreSQLConsumer.h @@ -22,6 +22,9 @@ struct StorageInfo StorageInfo(StoragePtr storage_, const PostgreSQLTableStructure::Attributes & attributes_) : storage(storage_), attributes(attributes_) {} + + StorageInfo(StoragePtr storage_, PostgreSQLTableStructure::Attributes && attributes_) + : storage(storage_), attributes(std::move(attributes_)) {} }; using StorageInfos = std::unordered_map; @@ -123,7 +126,7 @@ private: static Int64 getLSNValue(const std::string & lsn) { UInt32 upper_half, lower_half; - std::sscanf(lsn.data(), "%X/%X", &upper_half, &lower_half); + std::sscanf(lsn.data(), "%X/%X", &upper_half, &lower_half); /// NOLINT return (static_cast(upper_half) << 32) + lower_half; } diff --git a/src/Storages/PostgreSQL/StorageMaterializedPostgreSQL.cpp b/src/Storages/PostgreSQL/StorageMaterializedPostgreSQL.cpp index c72dec824f0..582a568cb48 100644 --- a/src/Storages/PostgreSQL/StorageMaterializedPostgreSQL.cpp +++ b/src/Storages/PostgreSQL/StorageMaterializedPostgreSQL.cpp @@ -291,11 +291,11 @@ Pipe StorageMaterializedPostgreSQL::read( std::shared_ptr StorageMaterializedPostgreSQL::getMaterializedColumnsDeclaration( - const String name, const String type, UInt64 default_value) + String name, String type, UInt64 default_value) { auto column_declaration = std::make_shared(); - column_declaration->name = name; + column_declaration->name = std::move(name); column_declaration->type = makeASTFunction(type); column_declaration->default_specifier = "MATERIALIZED"; @@ -352,7 +352,7 @@ ASTPtr StorageMaterializedPostgreSQL::getColumnDeclaration(const DataTypePtr & d ast_expression->name = "DateTime64"; ast_expression->arguments = std::make_shared(); ast_expression->arguments->children.emplace_back(std::make_shared(UInt32(6))); - return std::move(ast_expression); + return ast_expression; } return std::make_shared(data_type->getName()); @@ -382,8 +382,6 @@ ASTPtr StorageMaterializedPostgreSQL::getCreateNestedTableQuery( PostgreSQLTableStructurePtr table_structure, const ASTTableOverride * table_override) { auto create_table_query = std::make_shared(); - if (table_override) - applyTableOverrideToCreateQuery(*table_override, create_table_query.get()); auto table_id = getStorageID(); create_table_query->setTable(getNestedTableName()); @@ -496,12 +494,37 @@ ASTPtr StorageMaterializedPostgreSQL::getCreateNestedTableQuery( constraints = metadata_snapshot->getConstraints(); } - columns_declare_list->columns->children.emplace_back(getMaterializedColumnsDeclaration("_sign", "Int8", 1)); - columns_declare_list->columns->children.emplace_back(getMaterializedColumnsDeclaration("_version", "UInt64", 1)); - create_table_query->set(create_table_query->columns_list, columns_declare_list); - create_table_query->set(create_table_query->storage, storage); + if (table_override) + { + if (auto * columns = table_override->columns) + { + if (columns->columns) + { + for (const auto & override_column_ast : columns->columns->children) + { + auto * override_column = override_column_ast->as(); + if (override_column->name == "_sign" || override_column->name == "_version") + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Cannot override _sign and _version column"); + } + } + } + + create_table_query->set(create_table_query->columns_list, columns_declare_list); + + applyTableOverrideToCreateQuery(*table_override, create_table_query.get()); + + create_table_query->columns_list->columns->children.emplace_back(getMaterializedColumnsDeclaration("_sign", "Int8", 1)); + create_table_query->columns_list->columns->children.emplace_back(getMaterializedColumnsDeclaration("_version", "UInt64", 1)); + } + else + { + columns_declare_list->columns->children.emplace_back(getMaterializedColumnsDeclaration("_sign", "Int8", 1)); + columns_declare_list->columns->children.emplace_back(getMaterializedColumnsDeclaration("_version", "UInt64", 1)); + create_table_query->set(create_table_query->columns_list, columns_declare_list); + } + /// Add columns _sign and _version, so that they can be accessed from nested ReplacingMergeTree table if needed. ordinary_columns_and_types.push_back({"_sign", std::make_shared()}); ordinary_columns_and_types.push_back({"_version", std::make_shared()}); @@ -511,7 +534,7 @@ ASTPtr StorageMaterializedPostgreSQL::getCreateNestedTableQuery( storage_metadata.setConstraints(constraints); setInMemoryMetadata(storage_metadata); - return std::move(create_table_query); + return create_table_query; } diff --git a/src/Storages/PostgreSQL/StorageMaterializedPostgreSQL.h b/src/Storages/PostgreSQL/StorageMaterializedPostgreSQL.h index ff9b95cad7c..e6ce3bbdf65 100644 --- a/src/Storages/PostgreSQL/StorageMaterializedPostgreSQL.h +++ b/src/Storages/PostgreSQL/StorageMaterializedPostgreSQL.h @@ -135,7 +135,7 @@ protected: private: static std::shared_ptr getMaterializedColumnsDeclaration( - const String name, const String type, UInt64 default_value); + String name, String type, UInt64 default_value); ASTPtr getColumnDeclaration(const DataTypePtr & data_type) const; diff --git a/src/Storages/RabbitMQ/RabbitMQHandler.h b/src/Storages/RabbitMQ/RabbitMQHandler.h index 8f355c4a0dc..25b32a29f58 100644 --- a/src/Storages/RabbitMQ/RabbitMQHandler.h +++ b/src/Storages/RabbitMQ/RabbitMQHandler.h @@ -7,6 +7,7 @@ #include #include #include +#include namespace DB { diff --git a/src/Storages/RocksDB/StorageEmbeddedRocksDB.cpp b/src/Storages/RocksDB/StorageEmbeddedRocksDB.cpp index 3dbb5b18de9..bd525ca9e5a 100644 --- a/src/Storages/RocksDB/StorageEmbeddedRocksDB.cpp +++ b/src/Storages/RocksDB/StorageEmbeddedRocksDB.cpp @@ -240,7 +240,7 @@ public: WriteBufferFromString wb(serialized_keys[rows_processed]); key_column_type->getDefaultSerialization()->serializeBinary(*it, wb); wb.finalize(); - slices_keys[rows_processed] = std::move(serialized_keys[rows_processed]); + slices_keys[rows_processed] = serialized_keys[rows_processed]; ++it; ++rows_processed; diff --git a/src/Storages/StorageBuffer.cpp b/src/Storages/StorageBuffer.cpp index f97c09471c3..ead0d6b1260 100644 --- a/src/Storages/StorageBuffer.cpp +++ b/src/Storages/StorageBuffer.cpp @@ -474,6 +474,14 @@ static void appendBlock(const Block & from, Block & to) const IColumn & col_from = *from.getByPosition(column_no).column.get(); last_col = IColumn::mutate(std::move(to.getByPosition(column_no).column)); + /// In case of ColumnAggregateFunction aggregate states will + /// be allocated from the query context but can be destroyed from the + /// server context (in case of background flush), and thus memory + /// will be leaked from the query, but only tracked memory, not + /// memory itself. + /// + /// To avoid this, prohibit sharing the aggregate states. + last_col->ensureOwnership(); last_col->insertRangeFrom(col_from, 0, rows); to.getByPosition(column_no).column = std::move(last_col); @@ -1000,7 +1008,8 @@ void StorageBuffer::reschedule() size_t min = std::max(min_thresholds.time - time_passed, 1); size_t max = std::max(max_thresholds.time - time_passed, 1); - flush_handle->scheduleAfter(std::min(min, max) * 1000); + size_t flush = std::max(flush_thresholds.time - time_passed, 1); + flush_handle->scheduleAfter(std::min({min, max, flush}) * 1000); } void StorageBuffer::checkAlterIsPossible(const AlterCommands & commands, ContextPtr local_context) const diff --git a/src/Storages/StorageDictionary.cpp b/src/Storages/StorageDictionary.cpp index da8c5f115b2..e6d856b80fc 100644 --- a/src/Storages/StorageDictionary.cpp +++ b/src/Storages/StorageDictionary.cpp @@ -295,7 +295,7 @@ void StorageDictionary::alter(const AlterCommands & params, ContextPtr alter_con } std::lock_guard lock(dictionary_config_mutex); - configuration->setString("dictionary.comment", std::move(new_comment)); + configuration->setString("dictionary.comment", new_comment); } void registerStorageDictionary(StorageFactory & factory) diff --git a/src/Storages/StorageDistributed.cpp b/src/Storages/StorageDistributed.cpp index da648aa4e5c..5bfb3b4ce45 100644 --- a/src/Storages/StorageDistributed.cpp +++ b/src/Storages/StorageDistributed.cpp @@ -56,6 +56,8 @@ #include #include #include +#include +#include #include #include @@ -118,6 +120,7 @@ namespace ErrorCodes extern const int ALTER_OF_COLUMN_IS_FORBIDDEN; extern const int DISTRIBUTED_TOO_MANY_PENDING_BYTES; extern const int ARGUMENT_OUT_OF_BOUND; + extern const int TOO_LARGE_DISTRIBUTED_DEPTH; } namespace ActionLocks @@ -705,6 +708,9 @@ SinkToStoragePtr StorageDistributed::write(const ASTPtr &, const StorageMetadata QueryPipelineBuilderPtr StorageDistributed::distributedWrite(const ASTInsertQuery & query, ContextPtr local_context) { const Settings & settings = local_context->getSettingsRef(); + if (settings.max_distributed_depth && local_context->getClientInfo().distributed_depth >= settings.max_distributed_depth) + throw Exception("Maximum distributed depth exceeded", ErrorCodes::TOO_LARGE_DISTRIBUTED_DEPTH); + std::shared_ptr storage_src; auto & select = query.select->as(); auto new_query = std::dynamic_pointer_cast(query.clone()); @@ -719,28 +725,60 @@ QueryPipelineBuilderPtr StorageDistributed::distributedWrite(const ASTInsertQuer storage_src = std::dynamic_pointer_cast(joined_tables.getLeftTableStorage()); if (storage_src) { - const auto select_with_union_query = std::make_shared(); - select_with_union_query->list_of_selects = std::make_shared(); + /// Unwrap view() function. + if (storage_src->remote_table_function_ptr) + { + const TableFunctionPtr src_table_function = + TableFunctionFactory::instance().get(storage_src->remote_table_function_ptr, local_context); + const TableFunctionView * view_function = + assert_cast(src_table_function.get()); + new_query->select = view_function->getSelectQuery().clone(); + } + else + { + const auto select_with_union_query = std::make_shared(); + select_with_union_query->list_of_selects = std::make_shared(); - auto new_select_query = std::dynamic_pointer_cast(select_query->clone()); - select_with_union_query->list_of_selects->children.push_back(new_select_query); + auto new_select_query = std::dynamic_pointer_cast(select_query->clone()); + select_with_union_query->list_of_selects->children.push_back(new_select_query); - new_select_query->replaceDatabaseAndTable(storage_src->getRemoteDatabaseName(), storage_src->getRemoteTableName()); + new_select_query->replaceDatabaseAndTable(storage_src->getRemoteDatabaseName(), storage_src->getRemoteTableName()); - new_query->select = select_with_union_query; + new_query->select = select_with_union_query; + } } } } } - if (!storage_src || storage_src->getClusterName() != getClusterName()) + const Cluster::AddressesWithFailover & src_addresses = storage_src ? storage_src->getCluster()->getShardsAddresses() : Cluster::AddressesWithFailover{}; + const Cluster::AddressesWithFailover & dst_addresses = getCluster()->getShardsAddresses(); + /// Compare addresses instead of cluster name, to handle remote()/cluster(). + /// (since for remote()/cluster() the getClusterName() is empty string) + if (src_addresses != dst_addresses) { + /// The warning should be produced only for root queries, + /// since in case of parallel_distributed_insert_select=1, + /// it will produce warning for the rewritten insert, + /// since destination table is still Distributed there. + if (local_context->getClientInfo().distributed_depth == 0) + { + LOG_WARNING(log, + "Parallel distributed INSERT SELECT is not possible " + "(source cluster={} ({} addresses), destination cluster={} ({} addresses))", + storage_src ? storage_src->getClusterName() : "", + src_addresses.size(), + getClusterName(), + dst_addresses.size()); + } return nullptr; } if (settings.parallel_distributed_insert_select == PARALLEL_DISTRIBUTED_INSERT_SELECT_ALL) { new_query->table_id = StorageID(getRemoteDatabaseName(), getRemoteTableName()); + /// Reset table function for INSERT INTO remote()/cluster() + new_query->table_function.reset(); } const auto & cluster = getCluster(); @@ -757,12 +795,15 @@ QueryPipelineBuilderPtr StorageDistributed::distributedWrite(const ASTInsertQuer new_query_str = buf.str(); } + ContextMutablePtr query_context = Context::createCopy(local_context); + ++query_context->getClientInfo().distributed_depth; + for (size_t shard_index : collections::range(0, shards_info.size())) { const auto & shard_info = shards_info[shard_index]; if (shard_info.isLocal()) { - InterpreterInsertQuery interpreter(new_query, local_context); + InterpreterInsertQuery interpreter(new_query, query_context); pipelines.emplace_back(std::make_unique()); pipelines.back()->init(interpreter.execute().pipeline); } @@ -776,7 +817,7 @@ QueryPipelineBuilderPtr StorageDistributed::distributedWrite(const ASTInsertQuer /// INSERT SELECT query returns empty block auto remote_query_executor - = std::make_shared(shard_info.pool, std::move(connections), new_query_str, Block{}, local_context); + = std::make_shared(shard_info.pool, std::move(connections), new_query_str, Block{}, query_context); pipelines.emplace_back(std::make_unique()); pipelines.back()->init(Pipe(std::make_shared(remote_query_executor, false, settings.async_socket_for_remote))); pipelines.back()->setSinks([](const Block & header, QueryPipelineBuilder::StreamType) -> ProcessorPtr diff --git a/src/Storages/StorageDistributed.h b/src/Storages/StorageDistributed.h index e47e0fddd6c..45b1cd640ee 100644 --- a/src/Storages/StorageDistributed.h +++ b/src/Storages/StorageDistributed.h @@ -114,8 +114,6 @@ public: /// Used by InterpreterInsertQuery std::string getRemoteDatabaseName() const { return remote_database; } std::string getRemoteTableName() const { return remote_table; } - /// Returns empty string if tables is used by TableFunctionRemote - std::string getClusterName() const { return cluster_name; } ClusterPtr getCluster() const; /// Used by InterpreterSystemQuery @@ -201,6 +199,7 @@ private: std::optional getOptimizedQueryProcessingStage(const SelectQueryInfo & query_info, const Settings & settings) const; size_t getRandomShardIndex(const Cluster::ShardsInfo & shards); + std::string getClusterName() const { return cluster_name.empty() ? "" : cluster_name; } const DistributedSettings & getDistributedSettingsRef() const { return distributed_settings; } diff --git a/src/Storages/StorageFile.h b/src/Storages/StorageFile.h index ed5431d5e03..bc2bd3bc933 100644 --- a/src/Storages/StorageFile.h +++ b/src/Storages/StorageFile.h @@ -12,9 +12,6 @@ namespace DB { -class StorageFileBlockInputStream; -class StorageFileBlockOutputStream; - class StorageFile final : public shared_ptr_helper, public IStorage { friend struct shared_ptr_helper; @@ -67,7 +64,7 @@ public: /// Is is useful because column oriented formats could effectively skip unknown columns /// So we can create a header of only required columns in read method and ask /// format to read only them. Note: this hack cannot be done with ordinary formats like TSV. - bool isColumnOriented() const; + bool isColumnOriented() const override; bool supportsPartitionBy() const override { return true; } diff --git a/src/Storages/StorageGenerateRandom.cpp b/src/Storages/StorageGenerateRandom.cpp index 19e8f78d877..8934fd0ccbf 100644 --- a/src/Storages/StorageGenerateRandom.cpp +++ b/src/Storages/StorageGenerateRandom.cpp @@ -172,7 +172,7 @@ ColumnPtr fillColumnWithRandomData( auto data_column = fillColumnWithRandomData(nested_type, offset, max_array_length, max_string_length, rng, context); - return ColumnArray::create(std::move(data_column), std::move(offsets_column)); + return ColumnArray::create(data_column, std::move(offsets_column)); } case TypeIndex::Tuple: @@ -198,7 +198,7 @@ ColumnPtr fillColumnWithRandomData( for (UInt64 i = 0; i < limit; ++i) null_map[i] = rng() % 16 == 0; /// No real motivation for this. - return ColumnNullable::create(std::move(nested_column), std::move(null_map_column)); + return ColumnNullable::create(nested_column, std::move(null_map_column)); } case TypeIndex::UInt8: @@ -395,7 +395,7 @@ protected: for (const auto & elem : block_to_fill) columns.emplace_back(fillColumnWithRandomData(elem.type, block_size, max_array_length, max_string_length, rng, context)); - columns = Nested::flatten(block_to_fill.cloneWithColumns(std::move(columns))).getColumns(); + columns = Nested::flatten(block_to_fill.cloneWithColumns(columns)).getColumns(); return {std::move(columns), block_size}; } diff --git a/src/Storages/StorageInMemoryMetadata.h b/src/Storages/StorageInMemoryMetadata.h index 9d6935b609b..bdaed8b2624 100644 --- a/src/Storages/StorageInMemoryMetadata.h +++ b/src/Storages/StorageInMemoryMetadata.h @@ -55,6 +55,9 @@ struct StorageInMemoryMetadata StorageInMemoryMetadata(const StorageInMemoryMetadata & other); StorageInMemoryMetadata & operator=(const StorageInMemoryMetadata & other); + StorageInMemoryMetadata(StorageInMemoryMetadata && other) = default; + StorageInMemoryMetadata & operator=(StorageInMemoryMetadata && other) = default; + /// NOTE: Thread unsafe part. You should modify same StorageInMemoryMetadata /// structure from different threads. It should be used as MultiVersion /// object. See example in IStorage. diff --git a/src/Storages/StorageLog.cpp b/src/Storages/StorageLog.cpp index 5ba1514877a..d5d1f312bec 100644 --- a/src/Storages/StorageLog.cpp +++ b/src/Storages/StorageLog.cpp @@ -171,7 +171,7 @@ Chunk LogSource::generate() } if (!column->empty()) - res.insert(ColumnWithTypeAndName(std::move(column), name_type.type, name_type.name)); + res.insert(ColumnWithTypeAndName(column, name_type.type, name_type.name)); } if (res) diff --git a/src/Storages/StorageMaterializedMySQL.h b/src/Storages/StorageMaterializedMySQL.h index 8ba98c3000f..ae874649b40 100644 --- a/src/Storages/StorageMaterializedMySQL.h +++ b/src/Storages/StorageMaterializedMySQL.h @@ -38,7 +38,7 @@ public: void drop() override { nested_storage->drop(); } private: - [[noreturn]] void throwNotAllowed() const + [[noreturn]] static void throwNotAllowed() { throw Exception("This method is not allowed for MaterializedMySQL", ErrorCodes::NOT_IMPLEMENTED); } diff --git a/src/Storages/StorageMerge.cpp b/src/Storages/StorageMerge.cpp index 433fdb5b0b5..15e499c6e6c 100644 --- a/src/Storages/StorageMerge.cpp +++ b/src/Storages/StorageMerge.cpp @@ -730,7 +730,7 @@ void StorageMerge::convertingSourceStream( for (const auto & alias : aliases) { pipe_columns.emplace_back(NameAndTypePair(alias.name, alias.type)); - ASTPtr expr = std::move(alias.expression); + ASTPtr expr = alias.expression; auto syntax_result = TreeRewriter(local_context).analyze(expr, pipe_columns); auto expression_analyzer = ExpressionAnalyzer{alias.expression, syntax_result, local_context}; diff --git a/src/Storages/StorageMergeTree.cpp b/src/Storages/StorageMergeTree.cpp index 2e1a11b7ef1..a05ed04a66c 100644 --- a/src/Storages/StorageMergeTree.cpp +++ b/src/Storages/StorageMergeTree.cpp @@ -108,7 +108,7 @@ void StorageMergeTree::startup() /// Temporary directories contain incomplete results of merges (after forced restart) /// and don't allow to reinitialize them, so delete each of them immediately - clearOldTemporaryDirectories(merger_mutator, 0); + clearOldTemporaryDirectories(0); /// NOTE background task will also do the above cleanups periodically. time_after_previous_cleanup_parts.restart(); @@ -1056,13 +1056,13 @@ bool StorageMergeTree::scheduleDataProcessingJob(BackgroundJobsAssignee & assign } bool scheduled = false; - if (time_after_previous_cleanup_temporary_directories.compareAndRestartDeferred( + if (auto lock = time_after_previous_cleanup_temporary_directories.compareAndRestartDeferred( getSettings()->merge_tree_clear_old_temporary_directories_interval_seconds)) { assignee.scheduleCommonTask(ExecutableLambdaAdapter::create( [this, share_lock] () { - return clearOldTemporaryDirectories(merger_mutator, getSettings()->temporary_directories_lifetime.totalSeconds()); + return clearOldTemporaryDirectories(getSettings()->temporary_directories_lifetime.totalSeconds()); }, common_assignee_trigger, getStorageID()), /* need_trigger */ false); scheduled = true; } diff --git a/src/Storages/StorageMongoDB.h b/src/Storages/StorageMongoDB.h index 16b85364c5c..0edfb558759 100644 --- a/src/Storages/StorageMongoDB.h +++ b/src/Storages/StorageMongoDB.h @@ -22,7 +22,7 @@ public: StorageMongoDB( const StorageID & table_id_, const std::string & host_, - short unsigned int port_, + uint16_t port_, const std::string & database_name_, const std::string & collection_name_, const std::string & username_, @@ -49,7 +49,7 @@ private: void connectIfNotConnected(); const std::string host; - const short unsigned int port; + const uint16_t port; /// NOLINT const std::string database_name; const std::string collection_name; const std::string username; diff --git a/src/Storages/StorageMySQL.cpp b/src/Storages/StorageMySQL.cpp index 83cf2b07b21..9dcbec0caae 100644 --- a/src/Storages/StorageMySQL.cpp +++ b/src/Storages/StorageMySQL.cpp @@ -178,7 +178,7 @@ public: { /// Avoid Excessive copy when block is small enough if (block.rows() <= max_rows) - return Blocks{std::move(block)}; + return {block}; const size_t split_block_size = ceil(block.rows() * 1.0 / max_rows); Blocks split_blocks(split_block_size); @@ -281,13 +281,13 @@ StorageMySQLConfiguration StorageMySQL::getConfiguration(ASTs engine_args, Conte configuration.table = engine_args[2]->as().value.safeGet(); configuration.username = engine_args[3]->as().value.safeGet(); configuration.password = engine_args[4]->as().value.safeGet(); - if (engine_args.size() >= 6) configuration.replace_query = engine_args[5]->as().value.safeGet(); if (engine_args.size() == 7) configuration.on_duplicate_clause = engine_args[6]->as().value.safeGet(); } - + for (const auto & address : configuration.addresses) + context_->getRemoteHostFilter().checkHostAndPort(address.first, toString(address.second)); if (configuration.replace_query && !configuration.on_duplicate_clause.empty()) throw Exception(ErrorCodes::BAD_ARGUMENTS, "Only one of 'replace_query' and 'on_duplicate_clause' can be specified, or none of them"); diff --git a/src/Storages/StorageNull.h b/src/Storages/StorageNull.h index 5fef7f984e4..82baa98834d 100644 --- a/src/Storages/StorageNull.h +++ b/src/Storages/StorageNull.h @@ -61,11 +61,11 @@ protected: const StorageID & table_id_, ColumnsDescription columns_description_, ConstraintsDescription constraints_, const String & comment) : IStorage(table_id_) { - StorageInMemoryMetadata metadata_; - metadata_.setColumns(columns_description_); - metadata_.setConstraints(constraints_); - metadata_.setComment(comment); - setInMemoryMetadata(metadata_); + StorageInMemoryMetadata storage_metadata; + storage_metadata.setColumns(columns_description_); + storage_metadata.setConstraints(constraints_); + storage_metadata.setComment(comment); + setInMemoryMetadata(storage_metadata); } }; diff --git a/src/Storages/StoragePostgreSQL.cpp b/src/Storages/StoragePostgreSQL.cpp index 5042f911149..aa54663ca10 100644 --- a/src/Storages/StoragePostgreSQL.cpp +++ b/src/Storages/StoragePostgreSQL.cpp @@ -425,7 +425,6 @@ StoragePostgreSQLConfiguration StoragePostgreSQL::getConfiguration(ASTs engine_a configuration.host = configuration.addresses[0].first; configuration.port = configuration.addresses[0].second; } - configuration.database = engine_args[1]->as().value.safeGet(); configuration.table = engine_args[2]->as().value.safeGet(); configuration.username = engine_args[3]->as().value.safeGet(); @@ -436,6 +435,8 @@ StoragePostgreSQLConfiguration StoragePostgreSQL::getConfiguration(ASTs engine_a if (engine_args.size() >= 7) configuration.on_conflict = engine_args[6]->as().value.safeGet(); } + for (const auto & address : configuration.addresses) + context->getRemoteHostFilter().checkHostAndPort(address.first, toString(address.second)); return configuration; } diff --git a/src/Storages/StorageReplicatedMergeTree.cpp b/src/Storages/StorageReplicatedMergeTree.cpp index ab42396f8da..9a5e1cfbabd 100644 --- a/src/Storages/StorageReplicatedMergeTree.cpp +++ b/src/Storages/StorageReplicatedMergeTree.cpp @@ -451,7 +451,7 @@ StorageReplicatedMergeTree::StorageReplicatedMergeTree( } /// Temporary directories contain uninitialized results of Merges or Fetches (after forced restart), /// don't allow to reinitialize them, delete each of them immediately. - clearOldTemporaryDirectories(merger_mutator, 0); + clearOldTemporaryDirectories(0); clearOldWriteAheadLogs(); } @@ -7149,9 +7149,9 @@ void StorageReplicatedMergeTree::createTableSharedID() if (!zookeeper->tryGet(zookeeper_table_id_path, id)) { UUID table_id_candidate; - auto storage_id = getStorageID(); - if (storage_id.uuid != UUIDHelpers::Nil) - table_id_candidate = storage_id.uuid; + auto local_storage_id = getStorageID(); + if (local_storage_id.uuid != UUIDHelpers::Nil) + table_id_candidate = local_storage_id.uuid; else table_id_candidate = UUIDHelpers::generateV4(); diff --git a/src/Storages/StorageReplicatedMergeTree.h b/src/Storages/StorageReplicatedMergeTree.h index 056671dc164..935bd048603 100644 --- a/src/Storages/StorageReplicatedMergeTree.h +++ b/src/Storages/StorageReplicatedMergeTree.h @@ -283,7 +283,7 @@ public: // Return table id, common for different replicas String getTableSharedID() const; - static const String getDefaultZooKeeperName() { return default_zookeeper_name; } + static String getDefaultZooKeeperName() { return default_zookeeper_name; } /// Check if there are new broken disks and enqueue part recovery tasks. void checkBrokenDisks(); @@ -837,6 +837,7 @@ String getPartNamePossiblyFake(MergeTreeDataFormatVersion format_version, const * PS. Perhaps it would be better to add a flag to the DataPart that a part is inserted into ZK. * But here it's too easy to get confused with the consistency of this flag. */ +/// NOLINTNEXTLINE #define MAX_AGE_OF_LOCAL_PART_THAT_WASNT_ADDED_TO_ZOOKEEPER (5 * 60) } diff --git a/src/Storages/StorageS3.cpp b/src/Storages/StorageS3.cpp index ac462e5cb6e..ec506ad0cd0 100644 --- a/src/Storages/StorageS3.cpp +++ b/src/Storages/StorageS3.cpp @@ -233,7 +233,7 @@ StorageS3Source::StorageS3Source( const ColumnsDescription & columns_, UInt64 max_block_size_, UInt64 max_single_read_retries_, - const String compression_hint_, + String compression_hint_, const std::shared_ptr & client_, const String & bucket_, std::shared_ptr file_iterator_) @@ -245,7 +245,7 @@ StorageS3Source::StorageS3Source( , columns_desc(columns_) , max_block_size(max_block_size_) , max_single_read_retries(max_single_read_retries_) - , compression_hint(compression_hint_) + , compression_hint(std::move(compression_hint_)) , client(client_) , sample_block(sample_block_) , format_settings(format_settings_) @@ -615,6 +615,11 @@ std::shared_ptr StorageS3::createFileIterator( } } +bool StorageS3::isColumnOriented() const +{ + return FormatFactory::instance().checkIfFormatIsColumnOriented(format_name); +} + Pipe StorageS3::read( const Names & column_names, const StorageMetadataPtr & metadata_snapshot, @@ -639,6 +644,20 @@ Pipe StorageS3::read( std::shared_ptr iterator_wrapper = createFileIterator(client_auth, keys, is_key_with_globs, distributed_processing, local_context); + ColumnsDescription columns_description; + Block block_for_format; + if (isColumnOriented()) + { + columns_description = ColumnsDescription{ + metadata_snapshot->getSampleBlockForColumns(column_names, getVirtuals(), getStorageID()).getNamesAndTypesList()}; + block_for_format = metadata_snapshot->getSampleBlockForColumns(columns_description.getNamesOfPhysical()); + } + else + { + columns_description = metadata_snapshot->getColumns(); + block_for_format = metadata_snapshot->getSampleBlock(); + } + for (size_t i = 0; i < num_streams; ++i) { pipes.emplace_back(std::make_shared( @@ -646,10 +665,10 @@ Pipe StorageS3::read( need_file_column, format_name, getName(), - metadata_snapshot->getSampleBlock(), + block_for_format, local_context, format_settings, - metadata_snapshot->getColumns(), + columns_description, max_block_size, max_single_read_retries, compression_method, diff --git a/src/Storages/StorageS3.h b/src/Storages/StorageS3.h index 03b54706b4a..b2283687e2b 100644 --- a/src/Storages/StorageS3.h +++ b/src/Storages/StorageS3.h @@ -71,7 +71,7 @@ public: const ColumnsDescription & columns_, UInt64 max_block_size_, UInt64 max_single_read_retries_, - const String compression_hint_, + String compression_hint_, const std::shared_ptr & client_, const String & bucket, std::shared_ptr file_iterator_); @@ -218,6 +218,8 @@ private: bool is_key_with_globs, const std::optional & format_settings, ContextPtr ctx); + + bool isColumnOriented() const override; }; } diff --git a/src/Storages/StorageTableFunction.h b/src/Storages/StorageTableFunction.h index 0b7ab30fa24..8054762d389 100644 --- a/src/Storages/StorageTableFunction.h +++ b/src/Storages/StorageTableFunction.h @@ -6,6 +6,7 @@ #include #include #include +#include namespace DB @@ -148,7 +149,7 @@ public: if (nested) StorageProxy::renameInMemory(new_table_id); else - IStorage::renameInMemory(new_table_id); + IStorage::renameInMemory(new_table_id); /// NOLINT } bool isView() const override { return false; } diff --git a/src/Storages/StorageURL.cpp b/src/Storages/StorageURL.cpp index e62b14224ec..768f01efd24 100644 --- a/src/Storages/StorageURL.cpp +++ b/src/Storages/StorageURL.cpp @@ -405,7 +405,7 @@ std::vector> IStorageURLBase::getReadURIPara std::function IStorageURLBase::getReadPOSTDataCallback( const Names & /*column_names*/, - const StorageMetadataPtr & /*metadata_snapshot*/, + const ColumnsDescription & /* columns_description */, const SelectQueryInfo & /*query_info*/, ContextPtr /*context*/, QueryProcessingStage::Enum & /*processed_stage*/, @@ -482,6 +482,11 @@ ColumnsDescription IStorageURLBase::getTableStructureFromData( throw Exception(ErrorCodes::CANNOT_EXTRACT_TABLE_STRUCTURE, "All attempts to extract table structure from urls failed. Errors:\n{}", exception_messages); } +bool IStorageURLBase::isColumnOriented() const +{ + return FormatFactory::instance().checkIfFormatIsColumnOriented(format_name); +} + Pipe IStorageURLBase::read( const Names & column_names, const StorageMetadataPtr & metadata_snapshot, @@ -493,6 +498,20 @@ Pipe IStorageURLBase::read( { auto params = getReadURIParams(column_names, metadata_snapshot, query_info, local_context, processed_stage, max_block_size); + ColumnsDescription columns_description; + Block block_for_format; + if (isColumnOriented()) + { + columns_description = ColumnsDescription{ + metadata_snapshot->getSampleBlockForColumns(column_names, getVirtuals(), getStorageID()).getNamesAndTypesList()}; + block_for_format = metadata_snapshot->getSampleBlockForColumns(columns_description.getNamesOfPhysical()); + } + else + { + columns_description = metadata_snapshot->getColumns(); + block_for_format = metadata_snapshot->getSampleBlock(); + } + if (urlWithGlobs(uri)) { size_t max_addresses = local_context->getSettingsRef().glob_expansion_max_elements; @@ -515,14 +534,14 @@ Pipe IStorageURLBase::read( uri_info, getReadMethod(), getReadPOSTDataCallback( - column_names, metadata_snapshot, query_info, + column_names, columns_description, query_info, local_context, processed_stage, max_block_size), format_name, format_settings, getName(), - getHeaderBlock(column_names, metadata_snapshot), + block_for_format, local_context, - metadata_snapshot->getColumns(), + columns_description, max_block_size, ConnectionTimeouts::getHTTPTimeouts(local_context), compression_method, headers, params, /* glob_url */true)); @@ -537,14 +556,14 @@ Pipe IStorageURLBase::read( uri_info, getReadMethod(), getReadPOSTDataCallback( - column_names, metadata_snapshot, query_info, + column_names, columns_description, query_info, local_context, processed_stage, max_block_size), format_name, format_settings, getName(), - getHeaderBlock(column_names, metadata_snapshot), + block_for_format, local_context, - metadata_snapshot->getColumns(), + columns_description, max_block_size, ConnectionTimeouts::getHTTPTimeouts(local_context), compression_method, headers, params)); @@ -561,6 +580,20 @@ Pipe StorageURLWithFailover::read( size_t max_block_size, unsigned /*num_streams*/) { + ColumnsDescription columns_description; + Block block_for_format; + if (isColumnOriented()) + { + columns_description = ColumnsDescription{ + metadata_snapshot->getSampleBlockForColumns(column_names, getVirtuals(), getStorageID()).getNamesAndTypesList()}; + block_for_format = metadata_snapshot->getSampleBlockForColumns(columns_description.getNamesOfPhysical()); + } + else + { + columns_description = metadata_snapshot->getColumns(); + block_for_format = metadata_snapshot->getSampleBlock(); + } + auto params = getReadURIParams(column_names, metadata_snapshot, query_info, local_context, processed_stage, max_block_size); auto uri_info = std::make_shared(); @@ -569,14 +602,14 @@ Pipe StorageURLWithFailover::read( uri_info, getReadMethod(), getReadPOSTDataCallback( - column_names, metadata_snapshot, query_info, + column_names, columns_description, query_info, local_context, processed_stage, max_block_size), format_name, format_settings, getName(), - getHeaderBlock(column_names, metadata_snapshot), + block_for_format, local_context, - metadata_snapshot->getColumns(), + columns_description, max_block_size, ConnectionTimeouts::getHTTPTimeouts(local_context), compression_method, headers, params)); @@ -649,7 +682,7 @@ StorageURLWithFailover::StorageURLWithFailover( Poco::URI poco_uri(uri_option); context_->getRemoteHostFilter().checkURL(poco_uri); LOG_DEBUG(&Poco::Logger::get("StorageURLDistributed"), "Adding URL option: {}", uri_option); - uri_options.emplace_back(std::move(uri_option)); + uri_options.emplace_back(uri_option); } } diff --git a/src/Storages/StorageURL.h b/src/Storages/StorageURL.h index 790f01135d3..79d2489f241 100644 --- a/src/Storages/StorageURL.h +++ b/src/Storages/StorageURL.h @@ -88,12 +88,14 @@ protected: virtual std::function getReadPOSTDataCallback( const Names & column_names, - const StorageMetadataPtr & /*metadata_snapshot*/, + const ColumnsDescription & columns_description, const SelectQueryInfo & query_info, ContextPtr context, QueryProcessingStage::Enum & processed_stage, size_t max_block_size) const; + bool isColumnOriented() const override; + private: virtual Block getHeaderBlock(const Names & column_names, const StorageMetadataPtr & metadata_snapshot) const = 0; }; diff --git a/src/Storages/StorageView.h b/src/Storages/StorageView.h index 5ca23434356..cd36a10aae7 100644 --- a/src/Storages/StorageView.h +++ b/src/Storages/StorageView.h @@ -40,7 +40,7 @@ public: size_t max_block_size, unsigned num_streams) override; - void replaceWithSubquery(ASTSelectQuery & select_query, ASTPtr & view_name, const StorageMetadataPtr & metadata_snapshot) const + static void replaceWithSubquery(ASTSelectQuery & select_query, ASTPtr & view_name, const StorageMetadataPtr & metadata_snapshot) { replaceWithSubquery(select_query, metadata_snapshot->getSelectQuery().inner_query->clone(), view_name); } diff --git a/src/Storages/StorageXDBC.cpp b/src/Storages/StorageXDBC.cpp index 90ac04ed250..3cb6c9d0359 100644 --- a/src/Storages/StorageXDBC.cpp +++ b/src/Storages/StorageXDBC.cpp @@ -68,14 +68,14 @@ std::vector> StorageXDBC::getReadURIParams( std::function StorageXDBC::getReadPOSTDataCallback( const Names & column_names, - const StorageMetadataPtr & metadata_snapshot, + const ColumnsDescription & columns_description, const SelectQueryInfo & query_info, ContextPtr local_context, QueryProcessingStage::Enum & /*processed_stage*/, size_t /*max_block_size*/) const { String query = transformQueryForExternalDatabase(query_info, - metadata_snapshot->getColumns().getOrdinary(), + columns_description.getOrdinary(), bridge_helper->getIdentifierQuotingStyle(), remote_database_name, remote_table_name, @@ -85,7 +85,7 @@ std::function StorageXDBC::getReadPOSTDataCallback( NamesAndTypesList cols; for (const String & name : column_names) { - auto column_data = metadata_snapshot->getColumns().getPhysical(name); + auto column_data = columns_description.getPhysical(name); cols.emplace_back(column_data.name, column_data.type); } @@ -114,7 +114,7 @@ Pipe StorageXDBC::read( return IStorageURLBase::read(column_names, metadata_snapshot, query_info, local_context, processed_stage, max_block_size, num_streams); } -SinkToStoragePtr StorageXDBC::write(const ASTPtr & /*query*/, const StorageMetadataPtr & metadata_snapshot, ContextPtr local_context) +SinkToStoragePtr StorageXDBC::write(const ASTPtr & /* query */, const StorageMetadataPtr & metadata_snapshot, ContextPtr local_context) { bridge_helper->startBridgeSync(); @@ -140,6 +140,11 @@ SinkToStoragePtr StorageXDBC::write(const ASTPtr & /*query*/, const StorageMetad chooseCompressionMethod(uri, compression_method)); } +bool StorageXDBC::isColumnOriented() const +{ + return true; +} + Block StorageXDBC::getHeaderBlock(const Names & column_names, const StorageMetadataPtr & metadata_snapshot) const { return metadata_snapshot->getSampleBlockForColumns(column_names, getVirtuals(), getStorageID()); diff --git a/src/Storages/StorageXDBC.h b/src/Storages/StorageXDBC.h index 4438e1c4737..d8771c4ed83 100644 --- a/src/Storages/StorageXDBC.h +++ b/src/Storages/StorageXDBC.h @@ -59,13 +59,15 @@ private: std::function getReadPOSTDataCallback( const Names & column_names, - const StorageMetadataPtr & metadata_snapshot, + const ColumnsDescription & columns_description, const SelectQueryInfo & query_info, ContextPtr context, QueryProcessingStage::Enum & processed_stage, size_t max_block_size) const override; Block getHeaderBlock(const Names & column_names, const StorageMetadataPtr & metadata_snapshot) const override; + + bool isColumnOriented() const override; }; } diff --git a/src/Storages/System/CMakeLists.txt b/src/Storages/System/CMakeLists.txt index 133761cbe22..efc4c0ed37b 100644 --- a/src/Storages/System/CMakeLists.txt +++ b/src/Storages/System/CMakeLists.txt @@ -39,7 +39,13 @@ if(Git_FOUND) ERROR_QUIET OUTPUT_STRIP_TRAILING_WHITESPACE) endif() -configure_file (StorageSystemBuildOptions.generated.cpp.in ${CONFIG_BUILD}) +function(generate_system_build_options) + include(${ClickHouse_SOURCE_DIR}/src/configure_config.cmake) + include(${ClickHouse_SOURCE_DIR}/src/Functions/configure_config.cmake) + include(${ClickHouse_SOURCE_DIR}/src/Formats/configure_config.cmake) + configure_file(StorageSystemBuildOptions.generated.cpp.in ${CONFIG_BUILD}) +endfunction() +generate_system_build_options() include("${ClickHouse_SOURCE_DIR}/cmake/dbms_glob_sources.cmake") add_headers_and_sources(storages_system .) diff --git a/src/Storages/System/IStorageSystemOneBlock.h b/src/Storages/System/IStorageSystemOneBlock.h index 33086498730..d78c8179a71 100644 --- a/src/Storages/System/IStorageSystemOneBlock.h +++ b/src/Storages/System/IStorageSystemOneBlock.h @@ -32,11 +32,11 @@ protected: virtual void fillData(MutableColumns & res_columns, ContextPtr context, const SelectQueryInfo & query_info) const = 0; public: - IStorageSystemOneBlock(const StorageID & table_id_) : IStorage(table_id_) + explicit IStorageSystemOneBlock(const StorageID & table_id_) : IStorage(table_id_) { - StorageInMemoryMetadata metadata_; - metadata_.setColumns(ColumnsDescription(Self::getNamesAndTypes(), Self::getNamesAndAliases())); - setInMemoryMetadata(metadata_); + StorageInMemoryMetadata storage_metadata; + storage_metadata.setColumns(ColumnsDescription(Self::getNamesAndTypes(), Self::getNamesAndAliases())); + setInMemoryMetadata(storage_metadata); } Pipe read( diff --git a/src/Storages/System/StorageSystemAsynchronousInserts.cpp b/src/Storages/System/StorageSystemAsynchronousInserts.cpp index 0fa6c1b653c..80fc070c83a 100644 --- a/src/Storages/System/StorageSystemAsynchronousInserts.cpp +++ b/src/Storages/System/StorageSystemAsynchronousInserts.cpp @@ -41,10 +41,10 @@ void StorageSystemAsynchronousInserts::fillData(MutableColumns & res_columns, Co if (!insert_queue) return; - auto queue = insert_queue->getQueue(); + auto [queue, queue_lock] = insert_queue->getQueueLocked(); for (const auto & [key, elem] : queue) { - std::lock_guard lock(elem->mutex); + std::lock_guard elem_lock(elem->mutex); if (!elem->data) continue; @@ -62,8 +62,19 @@ void StorageSystemAsynchronousInserts::fillData(MutableColumns & res_columns, Co size_t i = 0; res_columns[i++]->insert(queryToString(insert_query)); - res_columns[i++]->insert(insert_query.table_id.getDatabaseName()); - res_columns[i++]->insert(insert_query.table_id.getTableName()); + + /// If query is "INSERT INTO FUNCTION" then table_id is empty. + if (insert_query.table_id) + { + res_columns[i++]->insert(insert_query.table_id.getDatabaseName()); + res_columns[i++]->insert(insert_query.table_id.getTableName()); + } + else + { + res_columns[i++]->insertDefault(); + res_columns[i++]->insertDefault(); + } + res_columns[i++]->insert(insert_query.format); res_columns[i++]->insert(time_in_microseconds(elem->data->first_update)); res_columns[i++]->insert(time_in_microseconds(elem->data->last_update)); diff --git a/src/Storages/System/StorageSystemBuildOptions.generated.cpp.in b/src/Storages/System/StorageSystemBuildOptions.generated.cpp.in index 5c25322b4f0..d7034cf828b 100644 --- a/src/Storages/System/StorageSystemBuildOptions.generated.cpp.in +++ b/src/Storages/System/StorageSystemBuildOptions.generated.cpp.in @@ -11,7 +11,6 @@ const char * auto_config_build[] "VERSION_DATE", "@VERSION_DATE@", "BUILD_TYPE", "@CMAKE_BUILD_TYPE@", "SYSTEM_PROCESSOR", "@CMAKE_SYSTEM_PROCESSOR@", - "LIBRARY_ARCHITECTURE", "@CMAKE_LIBRARY_ARCHITECTURE@", "CMAKE_VERSION", "@CMAKE_VERSION@", "C_COMPILER", "@CMAKE_C_COMPILER@", "C_COMPILER_VERSION", "@CMAKE_C_COMPILER_VERSION@", @@ -19,7 +18,7 @@ const char * auto_config_build[] "CXX_COMPILER_VERSION", "@CMAKE_CXX_COMPILER_VERSION@", "C_FLAGS", "@FULL_C_FLAGS_NORMALIZED@", "CXX_FLAGS", "@FULL_CXX_FLAGS_NORMALIZED@", - "LINK_FLAGS", "@CMAKE_EXE_LINKER_FLAGS_NORMALIZED@", + "LINK_FLAGS", "@FULL_EXE_LINKER_FLAGS_NORMALIZED@", "BUILD_COMPILE_DEFINITIONS", "@BUILD_COMPILE_DEFINITIONS@", "STATIC", "@USE_STATIC_LIBRARIES@", "SPLIT_BINARY", "@CLICKHOUSE_SPLIT_BINARY@", diff --git a/src/Storages/System/StorageSystemDataSkippingIndices.h b/src/Storages/System/StorageSystemDataSkippingIndices.h index d86890f5e27..4af2398a04b 100644 --- a/src/Storages/System/StorageSystemDataSkippingIndices.h +++ b/src/Storages/System/StorageSystemDataSkippingIndices.h @@ -26,7 +26,7 @@ public: bool isSystemStorage() const override { return true; } protected: - StorageSystemDataSkippingIndices(const StorageID & table_id_); + explicit StorageSystemDataSkippingIndices(const StorageID & table_id_); }; } diff --git a/src/Storages/System/StorageSystemDetachedParts.h b/src/Storages/System/StorageSystemDetachedParts.h index ece9d495500..51ee93a2f15 100644 --- a/src/Storages/System/StorageSystemDetachedParts.h +++ b/src/Storages/System/StorageSystemDetachedParts.h @@ -29,8 +29,8 @@ protected: SelectQueryInfo & query_info, ContextPtr context, QueryProcessingStage::Enum /*processed_stage*/, - const size_t /*max_block_size*/, - const unsigned /*num_streams*/) override; + size_t /*max_block_size*/, + unsigned /*num_streams*/) override; }; } diff --git a/src/Storages/System/StorageSystemDisks.h b/src/Storages/System/StorageSystemDisks.h index 2541dedd8fc..1404d6023d4 100644 --- a/src/Storages/System/StorageSystemDisks.h +++ b/src/Storages/System/StorageSystemDisks.h @@ -32,7 +32,7 @@ public: bool isSystemStorage() const override { return true; } protected: - StorageSystemDisks(const StorageID & table_id_); + explicit StorageSystemDisks(const StorageID & table_id_); }; } diff --git a/src/Storages/System/StorageSystemPartsBase.h b/src/Storages/System/StorageSystemPartsBase.h index 87247f96b24..bf19771c940 100644 --- a/src/Storages/System/StorageSystemPartsBase.h +++ b/src/Storages/System/StorageSystemPartsBase.h @@ -23,7 +23,7 @@ struct StoragesInfo bool need_inactive_parts = false; MergeTreeData * data = nullptr; - operator bool() const { return storage != nullptr; } + operator bool() const { return storage != nullptr; } /// NOLINT MergeTreeData::DataPartsVector getParts(MergeTreeData::DataPartStateVector & state, bool has_state_column, bool require_projection_parts = false) const; }; diff --git a/src/Storages/System/StorageSystemPartsColumns.h b/src/Storages/System/StorageSystemPartsColumns.h index c3e3eaefcf7..b8c52ca16ef 100644 --- a/src/Storages/System/StorageSystemPartsColumns.h +++ b/src/Storages/System/StorageSystemPartsColumns.h @@ -21,7 +21,7 @@ public: std::string getName() const override { return "SystemPartsColumns"; } protected: - StorageSystemPartsColumns(const StorageID & table_id_); + explicit StorageSystemPartsColumns(const StorageID & table_id_); void processNextStorage( MutableColumns & columns, std::vector & columns_mask, const StoragesInfo & info, bool has_state_column) override; }; diff --git a/src/Storages/System/StorageSystemProcesses.cpp b/src/Storages/System/StorageSystemProcesses.cpp index 5e6ba37226c..efa01561ad4 100644 --- a/src/Storages/System/StorageSystemProcesses.cpp +++ b/src/Storages/System/StorageSystemProcesses.cpp @@ -48,6 +48,7 @@ NamesAndTypesList StorageSystemProcesses::getNamesAndTypes() {"forwarded_for", std::make_shared()}, {"quota_key", std::make_shared()}, + {"distributed_depth", std::make_shared()}, {"elapsed", std::make_shared()}, {"is_cancelled", std::make_shared()}, @@ -115,6 +116,7 @@ void StorageSystemProcesses::fillData(MutableColumns & res_columns, ContextPtr c res_columns[i++]->insert(process.client_info.forwarded_for); res_columns[i++]->insert(process.client_info.quota_key); + res_columns[i++]->insert(process.client_info.distributed_depth); res_columns[i++]->insert(process.elapsed_seconds); res_columns[i++]->insert(process.is_cancelled); diff --git a/src/Storages/System/StorageSystemProjectionPartsColumns.h b/src/Storages/System/StorageSystemProjectionPartsColumns.h index 10e80877285..5679f5e9093 100644 --- a/src/Storages/System/StorageSystemProjectionPartsColumns.h +++ b/src/Storages/System/StorageSystemProjectionPartsColumns.h @@ -21,7 +21,7 @@ public: std::string getName() const override { return "SystemProjectionPartsColumns"; } protected: - StorageSystemProjectionPartsColumns(const StorageID & table_id_); + explicit StorageSystemProjectionPartsColumns(const StorageID & table_id_); void processNextStorage( MutableColumns & columns, std::vector & columns_mask, const StoragesInfo & info, bool has_state_column) override; }; diff --git a/src/Storages/System/StorageSystemReplicas.h b/src/Storages/System/StorageSystemReplicas.h index cf457efe250..500b4e97546 100644 --- a/src/Storages/System/StorageSystemReplicas.h +++ b/src/Storages/System/StorageSystemReplicas.h @@ -30,7 +30,7 @@ public: bool isSystemStorage() const override { return true; } protected: - StorageSystemReplicas(const StorageID & table_id_); + explicit StorageSystemReplicas(const StorageID & table_id_); }; } diff --git a/src/Storages/System/StorageSystemRowPolicies.cpp b/src/Storages/System/StorageSystemRowPolicies.cpp index 455d715d5da..cd4f3dab109 100644 --- a/src/Storages/System/StorageSystemRowPolicies.cpp +++ b/src/Storages/System/StorageSystemRowPolicies.cpp @@ -14,7 +14,7 @@ #include #include #include -#include +#include namespace DB @@ -43,7 +43,8 @@ NamesAndTypesList StorageSystemRowPolicies::getNamesAndTypes() {"apply_to_except", std::make_shared(std::make_shared())} }; - boost::range::push_back(names_and_types, std::move(extra_names_and_types)); + insertAtEnd(names_and_types, extra_names_and_types); + return names_and_types; } diff --git a/src/Storages/System/StorageSystemStackTrace.h b/src/Storages/System/StorageSystemStackTrace.h index a5827e32e6f..da4315d3ffa 100644 --- a/src/Storages/System/StorageSystemStackTrace.h +++ b/src/Storages/System/StorageSystemStackTrace.h @@ -27,7 +27,7 @@ public: String getName() const override { return "SystemStackTrace"; } static NamesAndTypesList getNamesAndTypes(); - StorageSystemStackTrace(const StorageID & table_id_); + explicit StorageSystemStackTrace(const StorageID & table_id_); protected: using IStorageSystemOneBlock::IStorageSystemOneBlock; diff --git a/src/Storages/System/StorageSystemStoragePolicies.h b/src/Storages/System/StorageSystemStoragePolicies.h index f202299db1f..28730ce33c4 100644 --- a/src/Storages/System/StorageSystemStoragePolicies.h +++ b/src/Storages/System/StorageSystemStoragePolicies.h @@ -32,7 +32,7 @@ public: bool isSystemStorage() const override { return true; } protected: - StorageSystemStoragePolicies(const StorageID & table_id_); + explicit StorageSystemStoragePolicies(const StorageID & table_id_); }; } diff --git a/src/Storages/System/StorageSystemTables.cpp b/src/Storages/System/StorageSystemTables.cpp index 24e3fe4f7a9..9332bc6a004 100644 --- a/src/Storages/System/StorageSystemTables.cpp +++ b/src/Storages/System/StorageSystemTables.cpp @@ -509,8 +509,8 @@ protected: loading_dependencies_tables.reserve(info.dependencies.size()); for (auto && dependency : info.dependencies) { - loading_dependencies_databases.push_back(std::move(dependency.database)); - loading_dependencies_tables.push_back(std::move(dependency.table)); + loading_dependencies_databases.push_back(dependency.database); + loading_dependencies_tables.push_back(dependency.table); } Array loading_dependent_databases; @@ -519,8 +519,8 @@ protected: loading_dependent_tables.reserve(info.dependencies.size()); for (auto && dependent : info.dependent_database_objects) { - loading_dependent_databases.push_back(std::move(dependent.database)); - loading_dependent_tables.push_back(std::move(dependent.table)); + loading_dependent_databases.push_back(dependent.database); + loading_dependent_tables.push_back(dependent.table); } if (columns_mask[src_index++]) diff --git a/src/Storages/System/StorageSystemTables.h b/src/Storages/System/StorageSystemTables.h index 808dc862e8d..23f3aedb164 100644 --- a/src/Storages/System/StorageSystemTables.h +++ b/src/Storages/System/StorageSystemTables.h @@ -30,7 +30,7 @@ public: bool isSystemStorage() const override { return true; } protected: - StorageSystemTables(const StorageID & table_id_); + explicit StorageSystemTables(const StorageID & table_id_); }; } diff --git a/src/Storages/System/StorageSystemZooKeeper.cpp b/src/Storages/System/StorageSystemZooKeeper.cpp index f2b2102c7ff..879951df162 100644 --- a/src/Storages/System/StorageSystemZooKeeper.cpp +++ b/src/Storages/System/StorageSystemZooKeeper.cpp @@ -16,6 +16,8 @@ #include #include #include +#include +#include namespace DB @@ -47,14 +49,23 @@ NamesAndTypesList StorageSystemZooKeeper::getNamesAndTypes() }; } -using Paths = Strings; +/// Type of path to be fetched +enum class ZkPathType +{ + Exact, /// Fetch all nodes under this path + Prefix, /// Fetch all nodes starting with this prefix, recursively (multiple paths may match prefix) + Recurse, /// Fatch all nodes under this path, recursively +}; + +/// List of paths to be feched from zookeeper +using Paths = std::deque>; static String pathCorrected(const String & path) { String path_corrected; /// path should starts with '/', otherwise ZBADARGUMENTS will be thrown in /// ZooKeeper::sendThread and the session will fail. - if (path[0] != '/') + if (path.empty() || path[0] != '/') path_corrected = '/'; path_corrected += path; /// In all cases except the root, path must not end with a slash. @@ -64,7 +75,7 @@ static String pathCorrected(const String & path) } -static bool extractPathImpl(const IAST & elem, Paths & res, ContextPtr context) +static bool extractPathImpl(const IAST & elem, Paths & res, ContextPtr context, bool allow_unrestricted) { const auto * function = elem.as(); if (!function) @@ -73,7 +84,7 @@ static bool extractPathImpl(const IAST & elem, Paths & res, ContextPtr context) if (function->name == "and") { for (const auto & child : function->arguments->children) - if (extractPathImpl(*child, res, context)) + if (extractPathImpl(*child, res, context, allow_unrestricted)) return true; return false; @@ -110,7 +121,7 @@ static bool extractPathImpl(const IAST & elem, Paths & res, ContextPtr context) set.checkColumnsNumber(1); const auto & set_column = *set.getSetElements()[0]; for (size_t row = 0; row < set_column.size(); ++row) - res.emplace_back(set_column[row].safeGet()); + res.emplace_back(set_column[row].safeGet(), ZkPathType::Exact); } else { @@ -121,12 +132,12 @@ static bool extractPathImpl(const IAST & elem, Paths & res, ContextPtr context) if (String str; literal->value.tryGet(str)) { - res.emplace_back(str); + res.emplace_back(str, ZkPathType::Exact); } else if (Tuple tuple; literal->value.tryGet(tuple)) { for (auto element : tuple) - res.emplace_back(element.safeGet()); + res.emplace_back(element.safeGet(), ZkPathType::Exact); } else return false; @@ -156,7 +167,61 @@ static bool extractPathImpl(const IAST & elem, Paths & res, ContextPtr context) if (literal->value.getType() != Field::Types::String) return false; - res.emplace_back(literal->value.safeGet()); + res.emplace_back(literal->value.safeGet(), ZkPathType::Exact); + return true; + } + else if (allow_unrestricted && function->name == "like") + { + const ASTIdentifier * ident; + ASTPtr value; + if ((ident = args.children.at(0)->as())) + value = args.children.at(1); + else if ((ident = args.children.at(1)->as())) + value = args.children.at(0); + else + return false; + + if (ident->name() != "path") + return false; + + auto evaluated = evaluateConstantExpressionAsLiteral(value, context); + const auto * literal = evaluated->as(); + if (!literal) + return false; + + if (literal->value.getType() != Field::Types::String) + return false; + + String pattern = literal->value.safeGet(); + bool has_metasymbol = false; + String prefix; // pattern prefix before the first metasymbol occurrence + for (size_t i = 0; i < pattern.size(); i++) + { + char c = pattern[i]; + // Handle escaping of metasymbols + if (c == '\\' && i + 1 < pattern.size()) + { + char c2 = pattern[i + 1]; + if (c2 == '_' || c2 == '%') + { + prefix.append(1, c2); + i++; // to skip two bytes + continue; + } + } + + // Stop prefix on the first metasymbols occurrence + if (c == '_' || c == '%') + { + has_metasymbol = true; + break; + } + + prefix.append(1, c); + } + + res.emplace_back(prefix, has_metasymbol ? ZkPathType::Prefix : ZkPathType::Exact); + return true; } @@ -166,39 +231,60 @@ static bool extractPathImpl(const IAST & elem, Paths & res, ContextPtr context) /** Retrieve from the query a condition of the form `path = 'path'`, from conjunctions in the WHERE clause. */ -static Paths extractPath(const ASTPtr & query, ContextPtr context) +static Paths extractPath(const ASTPtr & query, ContextPtr context, bool allow_unrestricted) { const auto & select = query->as(); if (!select.where()) - return Paths(); + return allow_unrestricted ? Paths{{"/", ZkPathType::Recurse}} : Paths(); Paths res; - return extractPathImpl(*select.where(), res, context) ? res : Paths(); + return extractPathImpl(*select.where(), res, context, allow_unrestricted) ? res : Paths(); } void StorageSystemZooKeeper::fillData(MutableColumns & res_columns, ContextPtr context, const SelectQueryInfo & query_info) const { - const Paths & paths = extractPath(query_info.query, context); - if (paths.empty()) - throw Exception("SELECT from system.zookeeper table must contain condition like path = 'path' or path IN ('path1','path2'...) or path IN (subquery) in WHERE clause.", ErrorCodes::BAD_ARGUMENTS); + Paths paths = extractPath(query_info.query, context, context->getSettingsRef().allow_unrestricted_reads_from_keeper); zkutil::ZooKeeperPtr zookeeper = context->getZooKeeper(); - std::unordered_set paths_corrected; - for (const auto & path : paths) - { - const String & path_corrected = pathCorrected(path); - auto [it, inserted] = paths_corrected.emplace(path_corrected); - if (!inserted) /// Do not repeat processing. - continue; + if (paths.empty()) + throw Exception("SELECT from system.zookeeper table must contain condition like path = 'path' or path IN ('path1','path2'...) or path IN (subquery) in WHERE clause unless `set allow_unrestricted_reads_from_keeper = 'true'`.", ErrorCodes::BAD_ARGUMENTS); - zkutil::Strings nodes = zookeeper->getChildren(path_corrected); + std::unordered_set added; + while (!paths.empty()) + { + auto [path, path_type] = std::move(paths.front()); + paths.pop_front(); + + String prefix; + if (path_type == ZkPathType::Prefix) + { + prefix = path; + size_t last_slash = prefix.rfind('/'); + path = prefix.substr(0, last_slash == String::npos ? 0 : last_slash); + } + + String path_corrected = pathCorrected(path); + + /// Node can be deleted concurrently. It's Ok, we don't provide any + /// consistency guarantees for system.zookeeper table. + zkutil::Strings nodes; + zookeeper->tryGetChildren(path_corrected, nodes); String path_part = path_corrected; if (path_part == "/") path_part.clear(); + if (!prefix.empty()) + { + // Remove nodes that do not match specified prefix + nodes.erase(std::remove_if(nodes.begin(), nodes.end(), [&prefix, &path_part] (const String & node) + { + return (path_part + '/' + node).substr(0, prefix.size()) != prefix; + }), nodes.end()); + } + std::vector> futures; futures.reserve(nodes.size()); for (const String & node : nodes) @@ -210,6 +296,11 @@ void StorageSystemZooKeeper::fillData(MutableColumns & res_columns, ContextPtr c if (res.error == Coordination::Error::ZNONODE) continue; /// Node was deleted meanwhile. + // Deduplication + String key = path_part + '/' + nodes[i]; + if (auto [it, inserted] = added.emplace(key); !inserted) + continue; + const Coordination::Stat & stat = res.stat; size_t col_num = 0; @@ -228,6 +319,11 @@ void StorageSystemZooKeeper::fillData(MutableColumns & res_columns, ContextPtr c res_columns[col_num++]->insert(stat.pzxid); res_columns[col_num++]->insert( path); /// This is the original path. In order to process the request, condition in WHERE should be triggered. + + if (path_type != ZkPathType::Exact && res.stat.numChildren > 0) + { + paths.emplace_back(key, ZkPathType::Recurse); + } } } } diff --git a/src/Storages/WindowView/StorageWindowView.cpp b/src/Storages/WindowView/StorageWindowView.cpp index 37c913f58a9..a329b01e9f2 100644 --- a/src/Storages/WindowView/StorageWindowView.cpp +++ b/src/Storages/WindowView/StorageWindowView.cpp @@ -639,10 +639,43 @@ std::shared_ptr StorageWindowView::getInnerTableCreateQuery( "The first argument of time window function should not be a constant value.", ErrorCodes::QUERY_IS_NOT_SUPPORTED_IN_WINDOW_VIEW); + ToIdentifierMatcher::Data query_data; + query_data.window_id_name = window_id_name; + query_data.window_id_alias = window_id_alias; + ToIdentifierMatcher::Visitor to_identifier_visitor(query_data); + + ReplaceFunctionNowData time_now_data; + ReplaceFunctionNowVisitor time_now_visitor(time_now_data); + ReplaceFunctionWindowMatcher::Data func_hop_data; + ReplaceFunctionWindowMatcher::Visitor func_window_visitor(func_hop_data); + + DropTableIdentifierMatcher::Data drop_table_identifier_data; + DropTableIdentifierMatcher::Visitor drop_table_identifier_visitor(drop_table_identifier_data); + + auto visit = [&](const IAST * ast) + { + auto node = ast->clone(); + QueryNormalizer(normalizer_data).visit(node); + /// now() -> ____timestamp + if (is_time_column_func_now) + { + time_now_visitor.visit(node); + function_now_timezone = time_now_data.now_timezone; + } + drop_table_identifier_visitor.visit(node); + /// tumble/hop -> windowID + func_window_visitor.visit(node); + to_identifier_visitor.visit(node); + node->setAlias(""); + return node; + }; + auto new_storage = std::make_shared(); /// storage != nullptr in case create window view with ENGINE syntax if (storage) { + new_storage->set(new_storage->engine, storage->engine->clone()); + if (storage->ttl_table) throw Exception( ErrorCodes::QUERY_IS_NOT_SUPPORTED_IN_WINDOW_VIEW, @@ -654,46 +687,14 @@ std::shared_ptr StorageWindowView::getInnerTableCreateQuery( "The ENGINE of WindowView must be MergeTree family of table engines " "including the engines with replication support"); - ToIdentifierMatcher::Data query_data; - query_data.window_id_name = window_id_name; - query_data.window_id_alias = window_id_alias; - ToIdentifierMatcher::Visitor to_identifier_visitor(query_data); - - ReplaceFunctionNowData time_now_data; - ReplaceFunctionNowVisitor time_now_visitor(time_now_data); - ReplaceFunctionWindowMatcher::Data func_hop_data; - ReplaceFunctionWindowMatcher::Visitor func_window_visitor(func_hop_data); - - DropTableIdentifierMatcher::Data drop_table_identifier_data; - DropTableIdentifierMatcher::Visitor drop_table_identifier_visitor(drop_table_identifier_data); - - new_storage->set(new_storage->engine, storage->engine->clone()); - - auto visit = [&](const IAST * ast, IAST *& field) - { - if (ast) - { - auto node = ast->clone(); - QueryNormalizer(normalizer_data).visit(node); - /// now() -> ____timestamp - if (is_time_column_func_now) - { - time_now_visitor.visit(node); - function_now_timezone = time_now_data.now_timezone; - } - drop_table_identifier_visitor.visit(node); - /// tumble/hop -> windowID - func_window_visitor.visit(node); - to_identifier_visitor.visit(node); - node->setAlias(""); - new_storage->set(field, node); - } - }; - - visit(storage->partition_by, new_storage->partition_by); - visit(storage->primary_key, new_storage->primary_key); - visit(storage->order_by, new_storage->order_by); - visit(storage->sample_by, new_storage->sample_by); + if (storage->partition_by) + new_storage->set(new_storage->partition_by, visit(storage->partition_by)); + if (storage->primary_key) + new_storage->set(new_storage->primary_key, visit(storage->primary_key)); + if (storage->order_by) + new_storage->set(new_storage->order_by, visit(storage->order_by)); + if (storage->sample_by) + new_storage->set(new_storage->sample_by, visit(storage->sample_by)); if (storage->settings) new_storage->set(new_storage->settings, storage->settings->clone()); @@ -702,8 +703,21 @@ std::shared_ptr StorageWindowView::getInnerTableCreateQuery( { new_storage->set(new_storage->engine, makeASTFunction("AggregatingMergeTree")); - new_storage->set(new_storage->order_by, std::make_shared(window_id_column_name)); - new_storage->set(new_storage->primary_key, std::make_shared(window_id_column_name)); + if (inner_select_query->groupBy()->children.size() == 1) //GROUP BY windowID + { + auto node = visit(inner_select_query->groupBy()->children[0].get()); + new_storage->set(new_storage->order_by, std::make_shared(node->getColumnName())); + } + else + { + auto group_by_function = makeASTFunction("tuple"); + for (auto & child : inner_select_query->groupBy()->children) + { + auto node = visit(child.get()); + group_by_function->arguments->children.push_back(std::make_shared(node->getColumnName())); + } + new_storage->set(new_storage->order_by, group_by_function); + } } auto new_columns = std::make_shared(); diff --git a/src/TableFunctions/CMakeLists.txt b/src/TableFunctions/CMakeLists.txt index 576d1ea23ff..c58f93e310a 100644 --- a/src/TableFunctions/CMakeLists.txt +++ b/src/TableFunctions/CMakeLists.txt @@ -1,9 +1,21 @@ include("${ClickHouse_SOURCE_DIR}/cmake/dbms_glob_sources.cmake") add_headers_and_sources(clickhouse_table_functions .) +if (TARGET ch_contrib::hivemetastore) + add_headers_and_sources(clickhouse_table_functions Hive) +endif () -list(REMOVE_ITEM clickhouse_table_functions_sources ITableFunction.cpp TableFunctionFactory.cpp) -list(REMOVE_ITEM clickhouse_table_functions_headers ITableFunction.h TableFunctionFactory.h) +list(REMOVE_ITEM clickhouse_table_functions_sources + ITableFunction.cpp + TableFunctionView.cpp + TableFunctionFactory.cpp) +list(REMOVE_ITEM clickhouse_table_functions_headers + ITableFunction.h + TableFunctionView.h + TableFunctionFactory.h) add_library(clickhouse_table_functions ${clickhouse_table_functions_sources}) -target_link_libraries(clickhouse_table_functions PRIVATE clickhouse_parsers clickhouse_storages_system dbms) +target_link_libraries(clickhouse_table_functions PRIVATE clickhouse_parsers clickhouse_storages_system dbms) +if (TARGET ch_contrib::hivemetastore) + target_link_libraries(clickhouse_table_functions PRIVATE ch_contrib::hivemetastore ch_contrib::hdfs) +endif () diff --git a/src/TableFunctions/Hive/TableFunctionHive.cpp b/src/TableFunctions/Hive/TableFunctionHive.cpp new file mode 100644 index 00000000000..e7de55181c3 --- /dev/null +++ b/src/TableFunctions/Hive/TableFunctionHive.cpp @@ -0,0 +1,91 @@ +#include +#if USE_HIVE +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace DB +{ + namespace ErrorCodes + { + extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; + } + + void TableFunctionHive::parseArguments(const ASTPtr & ast_function_, ContextPtr context_) + { + ASTs & args_func = ast_function_->children; + if (args_func.size() != 1) + throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, "Table function '{}' must have arguments.", getName()); + + ASTs & args = args_func.at(0)->children; + + const auto message = fmt::format( + "The signature of function {} is:\n" + " - hive_url, hive_database, hive_table, structure, partition_by_keys", + getName()); + + if (args.size() != 5) + throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, message); + + for (auto & arg : args) + arg = evaluateConstantExpressionOrIdentifierAsLiteral(arg, context_); + + hive_metastore_url = args[0]->as().value.safeGet(); + hive_database = args[1]->as().value.safeGet(); + hive_table = args[2]->as().value.safeGet(); + table_structure = args[3]->as().value.safeGet(); + partition_by_def = args[4]->as().value.safeGet(); + + actual_columns = parseColumnsListFromString(table_structure, context_); + } + + ColumnsDescription TableFunctionHive::getActualTableStructure(ContextPtr /*context_*/) const { return actual_columns; } + + StoragePtr TableFunctionHive::executeImpl( + const ASTPtr & /*ast_function_*/, + ContextPtr context_, + const std::string & table_name_, + ColumnsDescription /*cached_columns_*/) const + { + const Settings & settings = context_->getSettings(); + ParserLambdaExpression partition_by_parser; + ASTPtr partition_by_ast = parseQuery( + partition_by_parser, + "(" + partition_by_def + ")", + "partition by declaration list", + settings.max_query_size, + settings.max_parser_depth); + StoragePtr storage; + storage = StorageHive::create( + hive_metastore_url, + hive_database, + hive_table, + StorageID(getDatabaseName(), table_name_), + actual_columns, + ConstraintsDescription{}, + "", + partition_by_ast, + std::make_unique(), + context_); + + return storage; + } + + + void registerTableFunctionHive(TableFunctionFactory & factory_) { factory_.registerFunction(); } + +} +#endif diff --git a/src/TableFunctions/Hive/TableFunctionHive.h b/src/TableFunctions/Hive/TableFunctionHive.h new file mode 100644 index 00000000000..0973bdda329 --- /dev/null +++ b/src/TableFunctions/Hive/TableFunctionHive.h @@ -0,0 +1,38 @@ +#pragma once +#include +#if USE_HIVE +#include +#include +namespace DB +{ +class Context; +class TableFunctionHive : public ITableFunction +{ +public: + static constexpr auto name = "hive"; + static constexpr auto storage_type_name = "hive"; + std::string getName() const override { return name; } + + bool hasStaticStructure() const override { return true; } + + StoragePtr executeImpl( + const ASTPtr & ast_function, ContextPtr context, const std::string & table_name, ColumnsDescription cached_columns) const override; + + const char * getStorageTypeName() const override { return storage_type_name; } + ColumnsDescription getActualTableStructure(ContextPtr) const override; + void parseArguments(const ASTPtr & ast_function_, ContextPtr context_) override; + +private: + Poco::Logger * logger = &Poco::Logger::get("TableFunctionHive"); + + String cluster_name; + String hive_metastore_url; + String hive_database; + String hive_table; + String table_structure; + String partition_by_def; + + ColumnsDescription actual_columns; +}; +} +#endif diff --git a/src/TableFunctions/ITableFunction.h b/src/TableFunctions/ITableFunction.h index 93cf5057e88..9c8d694865b 100644 --- a/src/TableFunctions/ITableFunction.h +++ b/src/TableFunctions/ITableFunction.h @@ -52,6 +52,16 @@ public: /// Returns actual table structure probably requested from remote server, may fail virtual ColumnsDescription getActualTableStructure(ContextPtr /*context*/) const = 0; + /// Check if table function needs a structure hint from SELECT query in case of + /// INSERT INTO FUNCTION ... SELECT ... + /// It's used for schema inference. + virtual bool needStructureHint() const { return false; } + + /// Set a structure hint from SELECT query in case of + /// INSERT INTO FUNCTION ... SELECT ... + /// This hint could be used not to repeat schema in function arguments. + virtual void setStructureHint(const ColumnsDescription &) {} + /// Create storage according to the query. StoragePtr execute(const ASTPtr & ast_function, ContextPtr context, const std::string & table_name, ColumnsDescription cached_columns_ = {}, bool use_global_context = false) const; diff --git a/src/TableFunctions/ITableFunctionFileLike.cpp b/src/TableFunctions/ITableFunctionFileLike.cpp index 5328abd1654..3388a7ec9f6 100644 --- a/src/TableFunctions/ITableFunctionFileLike.cpp +++ b/src/TableFunctions/ITableFunctionFileLike.cpp @@ -95,6 +95,9 @@ StoragePtr ITableFunctionFileLike::executeImpl(const ASTPtr & /*ast_function*/, ColumnsDescription columns; if (structure != "auto") columns = parseColumnsListFromString(structure, context); + else if (!structure_hint.empty()) + columns = structure_hint; + StoragePtr storage = getStorage(filename, format, columns, context, table_name, compression_method); storage->startup(); return storage; diff --git a/src/TableFunctions/ITableFunctionFileLike.h b/src/TableFunctions/ITableFunctionFileLike.h index 6e00aac9c37..cd85f20fdc0 100644 --- a/src/TableFunctions/ITableFunctionFileLike.h +++ b/src/TableFunctions/ITableFunctionFileLike.h @@ -12,6 +12,10 @@ class Context; */ class ITableFunctionFileLike : public ITableFunction { +public: + bool needStructureHint() const override { return structure == "auto"; } + + void setStructureHint(const ColumnsDescription & structure_hint_) override { structure_hint = structure_hint_; } protected: void parseArguments(const ASTPtr & ast_function, ContextPtr context) override; @@ -20,6 +24,7 @@ protected: String format = "auto"; String structure = "auto"; String compression_method = "auto"; + ColumnsDescription structure_hint; private: StoragePtr executeImpl(const ASTPtr & ast_function, ContextPtr context, const std::string & table_name, ColumnsDescription cached_columns) const override; diff --git a/src/TableFunctions/TableFunctionExecutable.cpp b/src/TableFunctions/TableFunctionExecutable.cpp index 41ba2db5c33..18d7d8867e8 100644 --- a/src/TableFunctions/TableFunctionExecutable.cpp +++ b/src/TableFunctions/TableFunctionExecutable.cpp @@ -78,7 +78,7 @@ StoragePtr TableFunctionExecutable::executeImpl(const ASTPtr & /*ast_function*/, auto global_context = context->getGlobalContext(); ExecutableSettings settings; settings.script_name = script_name; - settings.script_arguments = std::move(arguments); + settings.script_arguments = arguments; auto storage = StorageExecutable::create(storage_id, format, settings, input_queries, getActualTableStructure(context), ConstraintsDescription{}); storage->startup(); diff --git a/src/TableFunctions/TableFunctionFile.cpp b/src/TableFunctions/TableFunctionFile.cpp index 192846f7f11..b09bb8b6ae1 100644 --- a/src/TableFunctions/TableFunctionFile.cpp +++ b/src/TableFunctions/TableFunctionFile.cpp @@ -41,6 +41,7 @@ ColumnsDescription TableFunctionFile::getActualTableStructure(ContextPtr context return StorageFile::getTableStructureFromFile(format, paths, compression_method, std::nullopt, context); } + return parseColumnsListFromString(structure, context); } diff --git a/src/TableFunctions/TableFunctionS3.cpp b/src/TableFunctions/TableFunctionS3.cpp index 1660cadab04..f844772983a 100644 --- a/src/TableFunctions/TableFunctionS3.cpp +++ b/src/TableFunctions/TableFunctionS3.cpp @@ -168,6 +168,8 @@ StoragePtr TableFunctionS3::executeImpl(const ASTPtr & /*ast_function*/, Context ColumnsDescription columns; if (s3_configuration->structure != "auto") columns = parseColumnsListFromString(s3_configuration->structure, context); + else if (!structure_hint.empty()) + columns = structure_hint; StoragePtr storage = StorageS3::create( s3_uri, @@ -181,7 +183,7 @@ StoragePtr TableFunctionS3::executeImpl(const ASTPtr & /*ast_function*/, Context upload_part_size_multiply_parts_count_threshold, max_single_part_upload_size, max_connections, - getActualTableStructure(context), + columns, ConstraintsDescription{}, String{}, context, diff --git a/src/TableFunctions/TableFunctionS3.h b/src/TableFunctions/TableFunctionS3.h index bd0226e348a..06a327593b0 100644 --- a/src/TableFunctions/TableFunctionS3.h +++ b/src/TableFunctions/TableFunctionS3.h @@ -25,6 +25,10 @@ public: } bool hasStaticStructure() const override { return s3_configuration->structure != "auto"; } + bool needStructureHint() const override { return s3_configuration->structure == "auto"; } + + void setStructureHint(const ColumnsDescription & structure_hint_) override { structure_hint = structure_hint_; } + protected: StoragePtr executeImpl( const ASTPtr & ast_function, @@ -38,6 +42,7 @@ protected: void parseArguments(const ASTPtr & ast_function, ContextPtr context) override; std::optional s3_configuration; + ColumnsDescription structure_hint; }; class TableFunctionCOS : public TableFunctionS3 diff --git a/src/TableFunctions/TableFunctionView.cpp b/src/TableFunctions/TableFunctionView.cpp index 2cab8aeca25..e9fcbb219a3 100644 --- a/src/TableFunctions/TableFunctionView.cpp +++ b/src/TableFunctions/TableFunctionView.cpp @@ -15,6 +15,12 @@ namespace ErrorCodes extern const int BAD_ARGUMENTS; } + +const ASTSelectWithUnionQuery & TableFunctionView::getSelectQuery() const +{ + return *create.select; +} + void TableFunctionView::parseArguments(const ASTPtr & ast_function, ContextPtr /*context*/) { const auto * function = ast_function->as(); diff --git a/src/TableFunctions/TableFunctionView.h b/src/TableFunctions/TableFunctionView.h index c20b45e7546..4afb049e738 100644 --- a/src/TableFunctions/TableFunctionView.h +++ b/src/TableFunctions/TableFunctionView.h @@ -16,6 +16,9 @@ class TableFunctionView : public ITableFunction public: static constexpr auto name = "view"; std::string getName() const override { return name; } + + const ASTSelectWithUnionQuery & getSelectQuery() const; + private: StoragePtr executeImpl(const ASTPtr & ast_function, ContextPtr context, const String & table_name, ColumnsDescription cached_columns) const override; const char * getStorageTypeName() const override { return "View"; } diff --git a/src/TableFunctions/registerTableFunctions.cpp b/src/TableFunctions/registerTableFunctions.cpp index ed08972e74d..9e09fac665a 100644 --- a/src/TableFunctions/registerTableFunctions.cpp +++ b/src/TableFunctions/registerTableFunctions.cpp @@ -31,6 +31,10 @@ void registerTableFunctions() registerTableFunctionHDFSCluster(factory); #endif +#if USE_HIVE + registerTableFunctionHive(factory); +#endif + registerTableFunctionODBC(factory); registerTableFunctionJDBC(factory); diff --git a/src/TableFunctions/registerTableFunctions.h b/src/TableFunctions/registerTableFunctions.h index 72ca185f656..e39d21cb580 100644 --- a/src/TableFunctions/registerTableFunctions.h +++ b/src/TableFunctions/registerTableFunctions.h @@ -29,6 +29,10 @@ void registerTableFunctionHDFS(TableFunctionFactory & factory); void registerTableFunctionHDFSCluster(TableFunctionFactory & factory); #endif +#if USE_HIVE +void registerTableFunctionHive(TableFunctionFactory & factory); +#endif + void registerTableFunctionODBC(TableFunctionFactory & factory); void registerTableFunctionJDBC(TableFunctionFactory & factory); diff --git a/tests/ci/build_check.py b/tests/ci/build_check.py index 4ea97d68ded..c318e163689 100644 --- a/tests/ci/build_check.py +++ b/tests/ci/build_check.py @@ -240,6 +240,7 @@ def main(): "https://s3.amazonaws.com/clickhouse-builds/" + url.replace("+", "%2B").replace(" ", "%20") ) + success = len(build_urls) > 0 create_json_artifact( TEMP_PATH, build_name, @@ -247,9 +248,13 @@ def main(): build_urls, build_config, 0, - len(build_urls) > 0, + success, ) - return + # Fail build job if not successeded + if not success: + sys.exit(1) + else: + sys.exit(0) image_name = get_image_name(build_config) docker_image = get_image_with_version(IMAGES_PATH, image_name) diff --git a/tests/ci/ci_config.json b/tests/ci/ci_config.json deleted file mode 100644 index 19afdd172d5..00000000000 --- a/tests/ci/ci_config.json +++ /dev/null @@ -1,83 +0,0 @@ -{ - "build_config": [ - { - "compiler": "clang-13", - "build-type": "", - "sanitizer": "", - "package-type": "deb", - "bundled": "bundled", - "splitted": "unsplitted", - "alien_pkgs": true, - "tidy": "disable", - "with_coverage": false - }, - { - "compiler": "clang-13", - "build-type": "", - "sanitizer": "", - "package-type": "performance", - "bundled": "bundled", - "splitted": "unsplitted", - "tidy": "disable", - "with_coverage": false - }, - { - "compiler": "gcc-11", - "build-type": "", - "sanitizer": "", - "package-type": "binary", - "bundled": "bundled", - "splitted": "unsplitted", - "tidy": "disable", - "with_coverage": false - }, - { - "compiler": "clang-13", - "build-type": "", - "sanitizer": "", - "package-type": "binary", - "bundled": "bundled", - "splitted": "unsplitted", - "tidy": "disable", - "with_coverage": false - } - ], - "tests_config": { - "Testflows check": { - "required_build_properties": { - "compiler": "clang-13", - "package_type": "deb", - "build_type": "relwithdebuginfo", - "sanitizer": "none", - "bundled": "bundled", - "splitted": "unsplitted", - "clang-tidy": "disable", - "with_coverage": false - } - }, - "Release": { - "required_build_properties": { - "compiler": "clang-13", - "package_type": "deb", - "build_type": "relwithdebuginfo", - "sanitizer": "none", - "bundled": "bundled", - "splitted": "unsplitted", - "clang-tidy": "disable", - "with_coverage": false - } - }, - "ClickHouse Keeper Jepsen": { - "required_build_properties": { - "compiler": "clang-13", - "package_type": "binary", - "build_type": "relwithdebuginfo", - "sanitizer": "none", - "bundled": "bundled", - "splitted": "unsplitted", - "clang-tidy": "disable", - "with_coverage": false - } - } - } -} diff --git a/tests/ci/ci_config.py b/tests/ci/ci_config.py index 000d3d9a000..b45a4ce90c6 100644 --- a/tests/ci/ci_config.py +++ b/tests/ci/ci_config.py @@ -231,7 +231,6 @@ CI_CONFIG = { }, "Stateful tests (aarch64, actions)": { "required_build": "package_aarch64", - "force_tests": True, }, "Stateful tests (release, DatabaseOrdinary, actions)": { "required_build": "package_release", @@ -259,7 +258,6 @@ CI_CONFIG = { }, "Stateless tests (aarch64, actions)": { "required_build": "package_aarch64", - "force_tests": True, }, "Stateless tests (release, wide parts enabled, actions)": { "required_build": "package_release", diff --git a/tests/ci/push_to_artifactory.py b/tests/ci/push_to_artifactory.py index 2ba47b89535..44f8ca43339 100755 --- a/tests/ci/push_to_artifactory.py +++ b/tests/ci/push_to_artifactory.py @@ -8,6 +8,7 @@ from typing import Tuple from artifactory import ArtifactorySaaSPath # type: ignore from build_download_helper import dowload_build_with_progress +from env_helper import RUNNER_TEMP from git_helper import TAG_REGEXP, commit, removeprefix, removesuffix @@ -19,7 +20,7 @@ def getenv(name: str, default: str = None): raise KeyError(f"Necessary {name} environment is not set") -TEMP_PATH = getenv("TEMP_PATH", ".") +TEMP_PATH = os.path.join(RUNNER_TEMP, "push_to_artifactory") # One of the following ENVs is necessary JFROG_API_KEY = getenv("JFROG_API_KEY", "") JFROG_TOKEN = getenv("JFROG_TOKEN", "") @@ -45,11 +46,11 @@ class Packages: for name, arch in self.packages ) - self.tgz = tuple("{}-{}.tgz".format(name, version) for name, _ in self.packages) + self.tgz = tuple(f"{name}-{version}.tgz" for name, _ in self.packages) def arch(self, deb_pkg: str) -> str: if deb_pkg not in self.deb: - raise ValueError("{} not in {}".format(deb_pkg, self.deb)) + raise ValueError(f"{deb_pkg} not in {self.deb}") return removesuffix(deb_pkg, ".deb").split("_")[-1] @staticmethod diff --git a/tests/clickhouse-test b/tests/clickhouse-test index 111222e90af..121a283d0e4 100755 --- a/tests/clickhouse-test +++ b/tests/clickhouse-test @@ -337,6 +337,26 @@ class FailureReason(enum.Enum): INTERNAL_ERROR = "Test internal error: " +class SettingsRandomizer: + settings = { + "max_insert_threads": lambda: 0 if random.random() < 0.5 else random.randint(1, 16), + "group_by_two_level_threshold": lambda: 1 if random.random() < 0.1 else 2 ** 60 if random.random() < 0.11 else 100000, + "group_by_two_level_threshold_bytes": lambda: 1 if random.random() < 0.1 else 2 ** 60 if random.random() < 0.11 else 50000000, + "distributed_aggregation_memory_efficient": lambda: random.randint(0, 1), + "fsync_metadata": lambda: random.randint(0, 1), + "priority": lambda: int(abs(random.gauss(0, 2))), + "output_format_parallel_formatting": lambda: random.randint(0, 1), + "input_format_parallel_parsing": lambda: random.randint(0, 1), + } + + @staticmethod + def get_random_settings(): + random_settings = [] + for setting, generator in SettingsRandomizer.settings.items(): + random_settings.append(setting + "=" + str(generator()) + "") + return random_settings + + class TestResult: def __init__(self, case_name: str, status: TestStatus, reason: Optional[FailureReason], total_time: float, description: str): self.case_name: str = case_name @@ -417,6 +437,29 @@ class TestCase: return testcase_args + def add_random_settings(self, client_options): + if self.tags and 'no-random-settings' in self.tags: + return client_options + + if len(self.base_url_params) == 0: + os.environ['CLICKHOUSE_URL_PARAMS'] = '&'.join(self.random_settings) + else: + os.environ['CLICKHOUSE_URL_PARAMS'] = self.base_url_params + '&' + '&'.join(self.random_settings) + + new_options = " --allow_repeated_settings --" + " --".join(self.random_settings) + os.environ['CLICKHOUSE_CLIENT_OPT'] = self.base_client_options + new_options + ' ' + return client_options + new_options + + def remove_random_settings_from_env(self): + os.environ['CLICKHOUSE_URL_PARAMS'] = self.base_url_params + os.environ['CLICKHOUSE_CLIENT_OPT'] = self.base_client_options + + def add_info_about_settings(self, description): + if self.tags and 'no-random-settings' in self.tags: + return description + + return description + "\n" + "Settings used in the test: " + "--" + " --".join(self.random_settings) + "\n" + def __init__(self, suite, case: str, args, is_concurrent: bool): self.case: str = case # case file name self.tags: Set[str] = suite.all_tags[case] if case in suite.all_tags else set() @@ -432,6 +475,10 @@ class TestCase: self.testcase_args = None self.runs_count = 0 + self.random_settings = SettingsRandomizer.get_random_settings() + self.base_url_params = os.environ['CLICKHOUSE_URL_PARAMS'] if 'CLICKHOUSE_URL_PARAMS' in os.environ else '' + self.base_client_options = os.environ['CLICKHOUSE_CLIENT_OPT'] if 'CLICKHOUSE_CLIENT_OPT' in os.environ else '' + # should skip test, should increment skipped_total, skip reason def should_skip_test(self, suite) -> Optional[FailureReason]: tags = self.tags @@ -673,10 +720,13 @@ class TestCase: self.runs_count += 1 self.testcase_args = self.configure_testcase_args(args, self.case_file, suite.suite_tmp_path) + client_options = self.add_random_settings(client_options) proc, stdout, stderr, total_time = self.run_single_test(server_logs_level, client_options) result = self.process_result_impl(proc, stdout, stderr, total_time) result.check_if_need_retry(args, stdout, stderr, self.runs_count) + if result.status == TestStatus.FAIL: + result.description = self.add_info_about_settings(result.description) return result except KeyboardInterrupt as e: raise e @@ -684,17 +734,20 @@ class TestCase: return TestResult(self.name, TestStatus.FAIL, FailureReason.INTERNAL_QUERY_FAIL, 0., - self.get_description_from_exception_info(sys.exc_info())) + self.add_info_about_settings(self.get_description_from_exception_info(sys.exc_info()))) except (ConnectionRefusedError, ConnectionResetError): return TestResult(self.name, TestStatus.FAIL, FailureReason.SERVER_DIED, 0., - self.get_description_from_exception_info(sys.exc_info())) + self.add_info_about_settings(self.get_description_from_exception_info(sys.exc_info()))) except: return TestResult(self.name, TestStatus.UNKNOWN, FailureReason.INTERNAL_ERROR, 0., self.get_description_from_exception_info(sys.exc_info())) + finally: + self.remove_random_settings_from_env() + class TestSuite: @staticmethod @@ -1078,11 +1131,15 @@ def collect_build_flags(args): if value == 0: result.append(BuildFlags.POLYMORPHIC_PARTS) - use_flags = clickhouse_execute(args, "SELECT name FROM system.build_options WHERE name like 'USE_%' AND value in ('ON', '1');") + use_flags = clickhouse_execute(args, "SELECT name FROM system.build_options WHERE name like 'USE_%' AND value in ('ON', '1')") for use_flag in use_flags.strip().splitlines(): use_flag = use_flag.decode().lower() result.append(use_flag) + system_processor = clickhouse_execute(args, "SELECT value FROM system.build_options WHERE name = 'SYSTEM_PROCESSOR' LIMIT 1").strip() + if system_processor: + result.append(f'cpu-{system_processor.decode().lower()}') + return result diff --git a/tests/config/config.d/s3_storage_policy_by_default.xml b/tests/config/config.d/s3_storage_policy_by_default.xml index 6ce997a2c16..b4a2d697c78 100644 --- a/tests/config/config.d/s3_storage_policy_by_default.xml +++ b/tests/config/config.d/s3_storage_policy_by_default.xml @@ -6,6 +6,8 @@ http://localhost:11111/test/test/ clickhouse clickhouse + 1 + 22548578304 diff --git a/tests/config/config.d/storage_conf.xml b/tests/config/config.d/storage_conf.xml new file mode 100644 index 00000000000..2e43f735605 --- /dev/null +++ b/tests/config/config.d/storage_conf.xml @@ -0,0 +1,23 @@ + + + + + s3 + http://localhost:11111/test/00170_test/ + clickhouse + clickhouse + 1 + 22548578304 + + + + + +
+ s3_cache +
+
+
+
+
+
diff --git a/tests/config/install.sh b/tests/config/install.sh index 320a3d0f799..c499ffa88f7 100755 --- a/tests/config/install.sh +++ b/tests/config/install.sh @@ -78,6 +78,15 @@ fi if [[ -n "$USE_DATABASE_ORDINARY" ]] && [[ "$USE_DATABASE_ORDINARY" -eq 1 ]]; then ln -sf $SRC_PATH/users.d/database_ordinary.xml $DEST_SERVER_PATH/users.d/ fi + +if [[ -n "$USE_S3_STORAGE_FOR_MERGE_TREE" ]] && [[ "$USE_S3_STORAGE_FOR_MERGE_TREE" -eq 1 ]]; then + ln -sf $SRC_PATH/config.d/s3_storage_policy_by_default.xml $DEST_SERVER_PATH/config.d/ +fi + +if [[ -n "$EXPORT_S3_STORAGE_POLICIES" ]]; then + ln -sf $SRC_PATH/config.d/storage_conf.xml $DEST_SERVER_PATH/config.d/ +fi + if [[ -n "$USE_DATABASE_REPLICATED" ]] && [[ "$USE_DATABASE_REPLICATED" -eq 1 ]]; then ln -sf $SRC_PATH/users.d/database_replicated.xml $DEST_SERVER_PATH/users.d/ ln -sf $SRC_PATH/config.d/database_replicated.xml $DEST_SERVER_PATH/config.d/ @@ -107,8 +116,4 @@ if [[ -n "$USE_DATABASE_REPLICATED" ]] && [[ "$USE_DATABASE_REPLICATED" -eq 1 ]] sudo chgrp clickhouse /var/lib/clickhouse2 fi -if [[ -n "$USE_S3_STORAGE_FOR_MERGE_TREE" ]] && [[ "$USE_S3_STORAGE_FOR_MERGE_TREE" -eq 1 ]]; then - ln -sf $SRC_PATH/config.d/s3_storage_policy_by_default.xml $DEST_SERVER_PATH/config.d/ -fi - ln -sf $SRC_PATH/client_config.xml $DEST_CLIENT_PATH/config.xml diff --git a/tests/integration/test_access_for_functions/test.py b/tests/integration/test_access_for_functions/test.py index ebd0f6bd907..0abe74e31a3 100644 --- a/tests/integration/test_access_for_functions/test.py +++ b/tests/integration/test_access_for_functions/test.py @@ -1,8 +1,9 @@ import pytest +import uuid from helpers.cluster import ClickHouseCluster cluster = ClickHouseCluster(__file__) -instance = cluster.add_instance('instance') +instance = cluster.add_instance('instance', stay_alive=True) @pytest.fixture(scope="module", autouse=True) @@ -14,7 +15,8 @@ def started_cluster(): finally: cluster.shutdown() -def test_access_rights_for_funtion(): + +def test_access_rights_for_function(): create_function_query = "CREATE FUNCTION MySum AS (a, b) -> a + b" instance.query("CREATE USER A") @@ -37,3 +39,19 @@ def test_access_rights_for_funtion(): instance.query("DROP USER IF EXISTS A") instance.query("DROP USER IF EXISTS B") + + +def test_ignore_obsolete_grant_on_database(): + instance.stop_clickhouse() + + user_id = uuid.uuid4() + instance.exec_in_container(["bash", "-c" , f""" + cat > /var/lib/clickhouse/access/{user_id}.sql << EOF +ATTACH USER X; +ATTACH GRANT CREATE FUNCTION, SELECT ON mydb.* TO X; +EOF"""]) + + instance.exec_in_container(["bash", "-c" , "touch /var/lib/clickhouse/access/need_rebuild_lists.mark"]) + instance.start_clickhouse() + + assert instance.query("SHOW GRANTS FOR X") == "GRANT SELECT ON mydb.* TO X\n" diff --git a/tests/integration/test_distributed_ddl_on_cross_replication/test.py b/tests/integration/test_distributed_ddl_on_cross_replication/test.py index 833a3fb1f04..b61bfc5d83f 100644 --- a/tests/integration/test_distributed_ddl_on_cross_replication/test.py +++ b/tests/integration/test_distributed_ddl_on_cross_replication/test.py @@ -104,3 +104,11 @@ def test_atomic_database(started_cluster): node1.query("INSERT INTO replica_1.rmt VALUES (1, 'test')") node2.query("SYSTEM SYNC REPLICA replica_2.rmt", timeout=5) assert_eq_with_retry(node2, "SELECT * FROM replica_2.rmt", '1\ttest') + +def test_non_query_with_table_ddl(started_cluster): + node1.query("CREATE USER A ON CLUSTER cross_3shards_2replicas") + + assert node1.query("SELECT 1", user='A') == "1\n" + assert node2.query("SELECT 1", user='A') == "1\n" + + node2.query("DROP USER A ON CLUSTER cross_3shards_2replicas") diff --git a/tests/integration/test_hive_query/test.py b/tests/integration/test_hive_query/test.py index a68ae0b066d..20b6a6cb8f2 100644 --- a/tests/integration/test_hive_query/test.py +++ b/tests/integration/test_hive_query/test.py @@ -27,25 +27,52 @@ def started_cluster(): def test_create_parquet_table(started_cluster): logging.info('Start testing creating hive table ...') node = started_cluster.instances['h0_0_0'] - node.query("set input_format_parquet_allow_missing_columns = true") - result = node.query(""" - DROP TABLE IF EXISTS default.demo_parquet; - CREATE TABLE default.demo_parquet (`id` Nullable(String), `score` Nullable(Int32), `day` Nullable(String)) ENGINE = Hive('thrift://hivetest:9083', 'test', 'demo') PARTITION BY(day) + test_passed = False + for i in range(10): + node.query("set input_format_parquet_allow_missing_columns = true") + result = node.query(""" +DROP TABLE IF EXISTS default.demo_parquet; +CREATE TABLE default.demo_parquet (`id` Nullable(String), `score` Nullable(Int32), `day` Nullable(String)) ENGINE = Hive('thrift://hivetest:9083', 'test', 'demo') PARTITION BY(day) """) - logging.info("create result {}".format(result)) - time.sleep(120) - assert result.strip() == '' + logging.info("create result {}".format(result)) + if result.strip() == '': + test_passed = True + break + time.sleep(60) + assert test_passed + +def test_create_parquet_table_1(started_cluster): + logging.info('Start testing creating hive table ...') + node = started_cluster.instances['h0_0_0'] + for i in range(10): + node.query("set input_format_parquet_allow_missing_columns = true") + result = node.query(""" +DROP TABLE IF EXISTS default.demo_parquet_parts; +CREATE TABLE default.demo_parquet_parts (`id` Nullable(String), `score` Nullable(Int32), `day` Nullable(String), `hour` String) ENGINE = Hive('thrift://hivetest:9083', 'test', 'parquet_demo') PARTITION BY(day, hour); + """) + logging.info("create result {}".format(result)) + if result.strip() == '': + test_passed = True + break + time.sleep(60) + assert test_passed def test_create_orc_table(started_cluster): logging.info('Start testing creating hive table ...') node = started_cluster.instances['h0_0_0'] - result = node.query(""" + test_passed = False + for i in range(10): + result = node.query(""" DROP TABLE IF EXISTS default.demo_orc; CREATE TABLE default.demo_orc (`id` Nullable(String), `score` Nullable(Int32), `day` Nullable(String)) ENGINE = Hive('thrift://hivetest:9083', 'test', 'demo_orc') PARTITION BY(day) """) - logging.info("create result {}".format(result)) + logging.info("create result {}".format(result)) + if result.strip() == '': + test_passed = True + break + time.sleep(60) - assert result.strip() == '' + assert test_passed def test_create_text_table(started_cluster): logging.info('Start testing creating hive table ...') @@ -70,6 +97,17 @@ def test_parquet_groupby(started_cluster): 2021-11-16 2 """ assert result == expected_result + +def test_parquet_in_filter(started_cluster): + logging.info('Start testing groupby ...') + node = started_cluster.instances['h0_0_0'] + result = node.query(""" + SELECT count(*) FROM default.demo_parquet_parts where day = '2021-11-05' and hour in ('00') + """) + expected_result = """2 +""" + logging.info("query result:{}".format(result)) + assert result == expected_result def test_orc_groupby(started_cluster): logging.info('Start testing groupby ...') node = started_cluster.instances['h0_0_0'] @@ -107,26 +145,36 @@ def test_parquet_groupby_with_cache(started_cluster): 2021-11-16 2 """ assert result == expected_result -def test_cache_read_bytes(started_cluster): + +def test_parquet_groupby_by_hive_function(started_cluster): + logging.info('Start testing groupby ...') node = started_cluster.instances['h0_0_0'] - node.query("set input_format_parquet_allow_missing_columns = true") result = node.query(""" - DROP TABLE IF EXISTS default.demo_parquet; - CREATE TABLE default.demo_parquet (`id` Nullable(String), `score` Nullable(Int32), `day` Nullable(String)) ENGINE = Hive('thrift://hivetest:9083', 'test', 'demo') PARTITION BY(day) - """) - result = node.query(""" - SELECT day, count(*) FROM default.demo_parquet group by day order by day - """) - result = node.query(""" - SELECT day, count(*) FROM default.demo_parquet group by day order by day + SELECT day, count(*) FROM hive('thrift://hivetest:9083', 'test', 'demo', '`id` Nullable(String), `score` Nullable(Int32), `day` Nullable(String)', 'day') group by day order by day """) expected_result = """2021-11-01 1 2021-11-05 2 2021-11-11 1 2021-11-16 2 """ - time.sleep(120) assert result == expected_result - result = node.query("select sum(ProfileEvent_ExternalDataSourceLocalCacheReadBytes) from system.metric_log where ProfileEvent_ExternalDataSourceLocalCacheReadBytes > 0") - logging.info("Read bytes from cache:{}".format(result)) - assert result.strip() != '0' + +def test_cache_read_bytes(started_cluster): + node = started_cluster.instances['h0_0_0'] + result = node.query(""" + CREATE TABLE IF NOT EXISTS default.demo_parquet_1 (`id` Nullable(String), `score` Nullable(Int32), `day` Nullable(String)) ENGINE = Hive('thrift://hivetest:9083', 'test', 'demo') PARTITION BY(day) + """) + test_passed = False + for i in range(10): + result = node.query(""" + SELECT day, count(*) FROM default.demo_parquet_1 group by day order by day settings input_format_parquet_allow_missing_columns = true + """) + node.query("system flush logs") + result = node.query("select sum(ProfileEvent_ExternalDataSourceLocalCacheReadBytes) from system.metric_log where ProfileEvent_ExternalDataSourceLocalCacheReadBytes > 0") + if result.strip() == '0': + logging.info("ProfileEvent_ExternalDataSourceLocalCacheReadBytes == 0") + time.sleep(10) + continue + test_passed = True + break + assert test_passed diff --git a/tests/integration/test_materialized_mysql_database/materialize_with_ddl.py b/tests/integration/test_materialized_mysql_database/materialize_with_ddl.py index 377a48be7ed..fef2b8a6ffb 100644 --- a/tests/integration/test_materialized_mysql_database/materialize_with_ddl.py +++ b/tests/integration/test_materialized_mysql_database/materialize_with_ddl.py @@ -1183,3 +1183,63 @@ def materialized_database_support_all_kinds_of_mysql_datatype(clickhouse_node, m "\t2021\t3020399000000\t3020399000000\t00000000010100000000000000000000000000000000000000\t10\t1\t11\tvarbinary\tRED\n" + "2\t2\t22\t9223372036854775807\t-2\t2\t22\t18446744073709551615\t-2.2\t2.2\t-2.22\t2.222\t2.2222\t2021-10-07\ttext\tvarchar\tBLOB\t2021-10-07 18:32:57\t2021-10-07 18:32:57.482786\t2021-10-07 18:32:57\t2021-10-07 18:32:57.482786" + "\t2021\t-3020399000000\t-46798000001\t000000000101000000D55C6E30D4095E40DCF0BBE996493E40\t11\t3\t22\tvarbinary\tGREEN\n") + + +def materialized_database_settings_materialized_mysql_tables_list(clickhouse_node, mysql_node, service_name): + mysql_node.query("DROP DATABASE IF EXISTS test_database") + clickhouse_node.query("DROP DATABASE IF EXISTS test_database") + mysql_node.query("CREATE DATABASE test_database") + mysql_node.query("CREATE TABLE test_database.a (id INT(11) NOT NULL PRIMARY KEY, value VARCHAR(255))") + mysql_node.query("INSERT INTO test_database.a VALUES(1, 'foo')") + mysql_node.query("INSERT INTO test_database.a VALUES(2, 'bar')") + # table b(include json type, not in materialized_mysql_tables_list) can be skip + mysql_node.query("CREATE TABLE test_database.b (id INT(11) NOT NULL PRIMARY KEY, value JSON)") + + clickhouse_node.query("CREATE DATABASE test_database ENGINE = MaterializedMySQL('{}:3306', 'test_database', 'root', 'clickhouse') SETTINGS materialized_mysql_tables_list = ' a,c,d'".format(service_name)) + + check_query(clickhouse_node, "SELECT name from system.tables where database = 'test_database' FORMAT TSV", "a\n") + check_query(clickhouse_node, "SELECT COUNT() FROM test_database.a FORMAT TSV", "2\n") + + # mysql data(binlog) can be skip + mysql_node.query("INSERT INTO test_database.b VALUES(1, '{\"name\":\"testjson\"}')") + mysql_node.query("INSERT INTO test_database.b VALUES(2, '{\"name\":\"testjson\"}')") + + # irrelevant database can be skip + mysql_node.query("DROP DATABASE IF EXISTS other_database") + mysql_node.query("CREATE DATABASE other_database") + mysql_node.query("CREATE TABLE other_database.d (id INT(11) NOT NULL PRIMARY KEY, value json)") + mysql_node.query("INSERT INTO other_database.d VALUES(1, '{\"name\":\"testjson\"}')") + + mysql_node.query("CREATE TABLE test_database.c (id INT(11) NOT NULL PRIMARY KEY, value VARCHAR(255))") + mysql_node.query("INSERT INTO test_database.c VALUES(1, 'foo')") + mysql_node.query("INSERT INTO test_database.c VALUES(2, 'bar')") + + check_query(clickhouse_node, "SELECT name from system.tables where database = 'test_database' FORMAT TSV", "a\nc\n") + check_query(clickhouse_node, "SELECT COUNT() FROM test_database.c FORMAT TSV", "2\n") + + clickhouse_node.query("DROP DATABASE test_database") + mysql_node.query("DROP DATABASE test_database") + + +def materialized_database_mysql_date_type_to_date32(clickhouse_node, mysql_node, service_name): + mysql_node.query("DROP DATABASE IF EXISTS test_database") + clickhouse_node.query("DROP DATABASE IF EXISTS test_database") + mysql_node.query("CREATE DATABASE test_database") + mysql_node.query("CREATE TABLE test_database.a (a INT(11) NOT NULL PRIMARY KEY, b date DEFAULT NULL)") + # can't support date that less than 1925 year for now + mysql_node.query("INSERT INTO test_database.a VALUES(1, '1900-04-16')") + # test date that is older than 1925 + mysql_node.query("INSERT INTO test_database.a VALUES(2, '1925-03-16')") + mysql_node.query("INSERT INTO test_database.a VALUES(3, '1971-02-16')") + mysql_node.query("INSERT INTO test_database.a VALUES(4, '2101-05-16')") + + clickhouse_node.query("CREATE DATABASE test_database ENGINE = MaterializedMySQL('{}:3306', 'test_database', 'root', 'clickhouse')".format(service_name)) + check_query(clickhouse_node, "SELECT b from test_database.a order by a FORMAT TSV", "1970-01-01\n1925-03-16\n1971-02-16\n2101-05-16\n") + + mysql_node.query("INSERT INTO test_database.a VALUES(5, '1925-04-16')") + mysql_node.query("INSERT INTO test_database.a VALUES(6, '2022-02-16')") + mysql_node.query("INSERT INTO test_database.a VALUES(7, '2283-11-11')") + + check_query(clickhouse_node, "SELECT b from test_database.a order by a FORMAT TSV", "1970-01-01\n1925-03-16\n1971-02-16\n2101-05-16\n1925-04-16\n2022-02-16\n" + + "2283-11-11\n") + diff --git a/tests/integration/test_materialized_mysql_database/test.py b/tests/integration/test_materialized_mysql_database/test.py index 501c0cd78fa..027f874596d 100644 --- a/tests/integration/test_materialized_mysql_database/test.py +++ b/tests/integration/test_materialized_mysql_database/test.py @@ -257,3 +257,11 @@ def test_table_overrides(started_cluster, started_mysql_8_0, started_mysql_5_7, def test_materialized_database_support_all_kinds_of_mysql_datatype(started_cluster, started_mysql_8_0, started_mysql_5_7, clickhouse_node): materialize_with_ddl.materialized_database_support_all_kinds_of_mysql_datatype(clickhouse_node, started_mysql_8_0, "mysql80") materialize_with_ddl.materialized_database_support_all_kinds_of_mysql_datatype(clickhouse_node, started_mysql_5_7, "mysql57") + +def test_materialized_database_settings_materialized_mysql_tables_list(started_cluster, started_mysql_8_0, started_mysql_5_7, clickhouse_node): + materialize_with_ddl.materialized_database_settings_materialized_mysql_tables_list(clickhouse_node, started_mysql_8_0, "mysql80") + materialize_with_ddl.materialized_database_settings_materialized_mysql_tables_list(clickhouse_node, started_mysql_5_7, "mysql57") + +def test_materialized_database_mysql_date_type_to_date32(started_cluster, started_mysql_8_0, started_mysql_5_7, clickhouse_node): + materialize_with_ddl.materialized_database_mysql_date_type_to_date32(clickhouse_node, started_mysql_8_0, "mysql80") + materialize_with_ddl.materialized_database_mysql_date_type_to_date32(clickhouse_node, started_mysql_5_7, "mysql57") \ No newline at end of file diff --git a/tests/integration/test_merge_tree_s3/configs/config.d/storage_conf.xml b/tests/integration/test_merge_tree_s3/configs/config.d/storage_conf.xml index a0fe0a6f609..2f1b8275a0b 100644 --- a/tests/integration/test_merge_tree_s3/configs/config.d/storage_conf.xml +++ b/tests/integration/test_merge_tree_s3/configs/config.d/storage_conf.xml @@ -19,6 +19,14 @@ local / + + s3 + http://minio1:9001/root/data/ + minio + minio123 + 33554432 + 1 +
@@ -38,6 +46,13 @@ + + +
+ s3_with_cache +
+
+
diff --git a/tests/integration/test_mysql_database_engine/test.py b/tests/integration/test_mysql_database_engine/test.py index ff1c955d78b..35d6d6e72b6 100644 --- a/tests/integration/test_mysql_database_engine/test.py +++ b/tests/integration/test_mysql_database_engine/test.py @@ -235,6 +235,9 @@ int8_values = [0, 1, -1, 127, -128] uint8_values = [0, 1, 255] # string_values = ["'ClickHouse'", 'NULL'] string_values = ["'ClickHouse'"] +date_values=["'1970-01-01'"] +date2Date32_values=["'1925-01-01'", "'2283-11-11'"] +date2String_values=["'1000-01-01'", "'9999-12-31'"] decimal_values = [0, 0.123, 0.4, 5.67, 8.91011, 123456789.123, -0.123, -0.4, -5.67, -8.91011, -123456789.123] @@ -274,6 +277,9 @@ timestamp_values_no_subsecond = ["'2015-05-18 07:40:01'", "'2019-09-16 19:20:11' pytest.param("common_types", "VARCHAR(10)", "Nullable(String)", string_values, "", id="common_types_20"), + pytest.param("common_types", "DATE", "Nullable(Date)", date_values, "", id="common_types_21"), + pytest.param("common_types", "DATE", "Nullable(Date32)", date2Date32_values, "date2Date32", id="common_types_22"), + pytest.param("common_types", "DATE", "Nullable(String)", date2String_values, "date2String", id="common_types_23"), pytest.param("decimal_default", "decimal NOT NULL", "Decimal(10, 0)", decimal_values, "decimal,datetime64", id="decimal_1"), diff --git a/tests/integration/test_part_log_table/configs/config_disk_name_test.xml b/tests/integration/test_part_log_table/configs/config_disk_name_test.xml new file mode 100644 index 00000000000..c8831031674 --- /dev/null +++ b/tests/integration/test_part_log_table/configs/config_disk_name_test.xml @@ -0,0 +1,30 @@ + + + + + local + /path1/ + + + local + /path2/ + + + + + +
+ test1 +
+
+
+ + +
+ test2 +
+
+
+
+
+
diff --git a/tests/integration/test_part_log_table/test.py b/tests/integration/test_part_log_table/test.py index 050e8c831c7..eba909acf4a 100644 --- a/tests/integration/test_part_log_table/test.py +++ b/tests/integration/test_part_log_table/test.py @@ -6,6 +6,7 @@ cluster = ClickHouseCluster(__file__) node1 = cluster.add_instance("node1", main_configs=["configs/config_without_standard_part_log.xml"]) node2 = cluster.add_instance("node2", main_configs=["configs/config_with_standard_part_log.xml"]) node3 = cluster.add_instance("node3", main_configs=["configs/config_with_non_standard_part_log.xml"]) +node4 = cluster.add_instance("node4", main_configs=["configs/config_disk_name_test.xml"]) @pytest.fixture(scope="module") @@ -40,3 +41,11 @@ def test_config_with_non_standard_part_log(start_cluster): node3.query("INSERT INTO test_table VALUES ('name', 1)") node3.query("SYSTEM FLUSH LOGS") assert node3.query("SELECT * FROM system.own_part_log") != "" + +def test_config_disk_name_test(start_cluster): + node4.query("CREATE TABLE test_table1(word String, value UInt64) ENGINE = MergeTree() ORDER BY word SETTINGS storage_policy = 'test1'") + node4.query("INSERT INTO test_table1(*) VALUES ('test1', 2)") + node4.query("CREATE TABLE test_table2(word String, value UInt64) ENGINE = MergeTree() ORDER BY word SETTINGS storage_policy = 'test2'") + node4.query("INSERT INTO test_table2(*) VALUES ('test2', 3)") + node4.query("SYSTEM FLUSH LOGS") + assert node4.query("SELECT DISTINCT disk_name FROM system.part_log ORDER by disk_name") == "test1\ntest2\n" diff --git a/tests/integration/test_postgresql_replica_database_engine_2/test.py b/tests/integration/test_postgresql_replica_database_engine_2/test.py index 3226c040e8e..0115988222c 100644 --- a/tests/integration/test_postgresql_replica_database_engine_2/test.py +++ b/tests/integration/test_postgresql_replica_database_engine_2/test.py @@ -114,6 +114,8 @@ def test_add_new_table_to_replication(started_cluster): assert(result[:63] == "CREATE DATABASE test_database\\nENGINE = MaterializedPostgreSQL(") assert(result[-222:] == ")\\nSETTINGS materialized_postgresql_tables_list = \\'postgresql_replica_0,postgresql_replica_1,postgresql_replica_2,postgresql_replica_3,postgresql_replica_4,postgresql_replica_5,postgresql_replica_6,postgresql_replica_7\\'\n") + instance.query(f"INSERT INTO postgres_database.{table_name} SELECT number, number from numbers(10000, 10000)") + result = instance.query("SHOW TABLES FROM test_database") assert(result == "postgresql_replica_0\npostgresql_replica_1\npostgresql_replica_2\npostgresql_replica_3\npostgresql_replica_4\npostgresql_replica_5\npostgresql_replica_6\npostgresql_replica_7\n") check_several_tables_are_synchronized(instance, NUM_TABLES + 3) @@ -133,7 +135,7 @@ def test_remove_table_from_replication(started_cluster): assert(result[-59:] == "\\'postgres_database\\', \\'postgres\\', \\'mysecretpassword\\')\n") table_name = 'postgresql_replica_4' - instance.query(f'DETACH TABLE test_database.{table_name}'); + instance.query(f'DETACH TABLE test_database.{table_name} PERMANENTLY'); result = instance.query_and_get_error(f'SELECT * FROM test_database.{table_name}') assert("doesn't exist" in result) @@ -147,13 +149,15 @@ def test_remove_table_from_replication(started_cluster): instance.query(f'ATTACH TABLE test_database.{table_name}'); check_tables_are_synchronized(instance, table_name); check_several_tables_are_synchronized(instance, NUM_TABLES) + instance.query(f"INSERT INTO postgres_database.{table_name} SELECT number, number from numbers(10000, 10000)") + check_tables_are_synchronized(instance, table_name); result = instance.query('SHOW CREATE DATABASE test_database') assert(result[:63] == "CREATE DATABASE test_database\\nENGINE = MaterializedPostgreSQL(") assert(result[-159:] == ")\\nSETTINGS materialized_postgresql_tables_list = \\'postgresql_replica_0,postgresql_replica_1,postgresql_replica_2,postgresql_replica_3,postgresql_replica_4\\'\n") table_name = 'postgresql_replica_1' - instance.query(f'DETACH TABLE test_database.{table_name}'); + instance.query(f'DETACH TABLE test_database.{table_name} PERMANENTLY'); result = instance.query('SHOW CREATE DATABASE test_database') assert(result[:63] == "CREATE DATABASE test_database\\nENGINE = MaterializedPostgreSQL(") assert(result[-138:] == ")\\nSETTINGS materialized_postgresql_tables_list = \\'postgresql_replica_0,postgresql_replica_2,postgresql_replica_3,postgresql_replica_4\\'\n") @@ -162,7 +166,7 @@ def test_remove_table_from_replication(started_cluster): cursor.execute(f'drop table if exists postgresql_replica_0;') # Removing from replication table which does not exist in PostgreSQL must be ok. - instance.query('DETACH TABLE test_database.postgresql_replica_0'); + instance.query('DETACH TABLE test_database.postgresql_replica_0 PERMANENTLY'); assert instance.contains_in_log("from publication, because table does not exist in PostgreSQL") @@ -236,7 +240,7 @@ def test_database_with_single_non_default_schema(started_cluster): print('DETACH-ATTACH') detached_table_name = "postgresql_replica_1" - instance.query(f"DETACH TABLE {materialized_db}.{detached_table_name}") + instance.query(f"DETACH TABLE {materialized_db}.{detached_table_name} PERMANENTLY") assert not instance.contains_in_log("from publication, because table does not exist in PostgreSQL") instance.query(f"ATTACH TABLE {materialized_db}.{detached_table_name}") check_tables_are_synchronized(instance, detached_table_name, postgres_database=clickhouse_postgres_db); @@ -306,7 +310,7 @@ def test_database_with_multiple_non_default_schemas_1(started_cluster): print('DETACH-ATTACH') detached_table_name = "postgresql_replica_1" - instance.query(f"DETACH TABLE {materialized_db}.`{schema_name}.{detached_table_name}`") + instance.query(f"DETACH TABLE {materialized_db}.`{schema_name}.{detached_table_name}` PERMANENTLY") assert not instance.contains_in_log("from publication, because table does not exist in PostgreSQL") instance.query(f"ATTACH TABLE {materialized_db}.`{schema_name}.{detached_table_name}`") assert_show_tables("test_schema.postgresql_replica_0\ntest_schema.postgresql_replica_1\ntest_schema.postgresql_replica_2\ntest_schema.postgresql_replica_3\ntest_schema.postgresql_replica_4\n") @@ -385,7 +389,7 @@ def test_database_with_multiple_non_default_schemas_2(started_cluster): detached_table_name = "postgresql_replica_1" detached_table_schema = "schema0" clickhouse_postgres_db = f'clickhouse_postgres_db0' - instance.query(f"DETACH TABLE {materialized_db}.`{detached_table_schema}.{detached_table_name}`") + instance.query(f"DETACH TABLE {materialized_db}.`{detached_table_schema}.{detached_table_name}` PERMANENTLY") assert not instance.contains_in_log("from publication, because table does not exist in PostgreSQL") instance.query(f"ATTACH TABLE {materialized_db}.`{detached_table_schema}.{detached_table_name}`") assert_show_tables("schema0.postgresql_replica_0\nschema0.postgresql_replica_1\nschema1.postgresql_replica_0\nschema1.postgresql_replica_1\n") @@ -399,7 +403,7 @@ def test_table_override(started_cluster): create_postgres_table(cursor, table_name, template=postgres_table_template_5); instance.query(f"create table {table_name}(key Int32, value UUID) engine = PostgreSQL (postgres1, table={table_name})") instance.query(f"insert into {table_name} select number, generateUUIDv4() from numbers(10)") - table_overrides = f" TABLE OVERRIDE {table_name} (COLUMNS (key Int32, value UUID))" + table_overrides = f" TABLE OVERRIDE {table_name} (COLUMNS (key Int32, value UUID) PARTITION BY key)" pg_manager.create_materialized_db( ip=started_cluster.postgres_ip, port=started_cluster.postgres_port, settings=[f"materialized_postgresql_tables_list = '{table_name}'"], @@ -407,7 +411,7 @@ def test_table_override(started_cluster): assert_nested_table_is_created(instance, table_name, materialized_database) result = instance.query(f"show create table {materialized_database}.{table_name}") print(result) - expected = "CREATE TABLE test_database.table_override\\n(\\n `key` Int32,\\n `value` UUID,\\n `_sign` Int8() MATERIALIZED 1,\\n `_version` UInt64() MATERIALIZED 1\\n)\\nENGINE = ReplacingMergeTree(_version)\\nORDER BY tuple(key)" + expected = "CREATE TABLE test_database.table_override\\n(\\n `key` Int32,\\n `value` UUID,\\n `_sign` Int8() MATERIALIZED 1,\\n `_version` UInt64() MATERIALIZED 1\\n)\\nENGINE = ReplacingMergeTree(_version)\\nPARTITION BY key\\nORDER BY tuple(key)" assert(result.strip() == expected) time.sleep(5) query = f"select * from {materialized_database}.{table_name} order by key" diff --git a/tests/integration/test_storage_hdfs/test.py b/tests/integration/test_storage_hdfs/test.py index 82b06ade6be..4e848dc2915 100644 --- a/tests/integration/test_storage_hdfs/test.py +++ b/tests/integration/test_storage_hdfs/test.py @@ -421,6 +421,16 @@ def test_schema_inference_with_globs(started_cluster): assert(sorted(result.split()) == ['0', '\\N']) +def test_insert_select_schema_inference(started_cluster): + node1.query(f"insert into table function hdfs('hdfs://hdfs1:9000/test.native.zst') select toUInt64(1) as x") + + result = node1.query(f"desc hdfs('hdfs://hdfs1:9000/test.native.zst')") + assert(result.strip() == 'x\tUInt64') + + result = node1.query(f"select * from hdfs('hdfs://hdfs1:9000/test.native.zst')") + assert(int(result) == 1) + + if __name__ == '__main__': cluster.start() input("Cluster created, press any key to destroy...") diff --git a/tests/integration/test_storage_postgresql/test.py b/tests/integration/test_storage_postgresql/test.py index 55be61e052b..854a1021b99 100644 --- a/tests/integration/test_storage_postgresql/test.py +++ b/tests/integration/test_storage_postgresql/test.py @@ -3,6 +3,7 @@ import pytest from multiprocessing.dummy import Pool from helpers.cluster import ClickHouseCluster +from helpers.postgres_utility import get_postgres_conn cluster = ClickHouseCluster(__file__) node1 = cluster.add_instance('node1', main_configs=['configs/named_collections.xml'], with_postgres=True) @@ -186,55 +187,65 @@ def test_non_default_scema(started_cluster): def test_concurrent_queries(started_cluster): - cursor = started_cluster.postgres_conn.cursor() - - node1.query(''' - CREATE TABLE test_table (key UInt32, value UInt32) - ENGINE = PostgreSQL('postgres1:5432', 'postgres', 'test_table', 'postgres', 'mysecretpassword')''') + conn = get_postgres_conn(started_cluster.postgres_ip, started_cluster.postgres_port, database=False) + cursor = conn.cursor() + database_name = 'concurrent_test' + cursor.execute(f'DROP DATABASE IF EXISTS {database_name}') + cursor.execute(f'CREATE DATABASE {database_name}') + conn = get_postgres_conn(started_cluster.postgres_ip, started_cluster.postgres_port, database=True, database_name=database_name) + cursor = conn.cursor() cursor.execute('CREATE TABLE test_table (key integer, value integer)') - prev_count = node1.count_in_log('New connection to postgres1:5432') + node1.query(f''' + CREATE TABLE test.test_table (key UInt32, value UInt32) + ENGINE = PostgreSQL(postgres1, database='{database_name}', table='test_table') + ''') + + node1.query(f''' + CREATE TABLE test.stat (numbackends UInt32, datname String) + ENGINE = PostgreSQL(postgres1, database='{database_name}', table='pg_stat_database') + ''') + def node_select(_): for i in range(20): - result = node1.query("SELECT * FROM test_table", user='default') - busy_pool = Pool(20) - p = busy_pool.map_async(node_select, range(20)) - p.wait() - count = node1.count_in_log('New connection to postgres1:5432') - logging.debug(f'count {count}, prev_count {prev_count}') - # 16 is default size for connection pool - assert(int(count) <= int(prev_count) + 16) + result = node1.query("SELECT * FROM test.test_table", user='default') def node_insert(_): - for i in range(5): - result = node1.query("INSERT INTO test_table SELECT number, number FROM numbers(1000)", user='default') - - busy_pool = Pool(5) - p = busy_pool.map_async(node_insert, range(5)) - p.wait() - result = node1.query("SELECT count() FROM test_table", user='default') - logging.debug(result) - assert(int(result) == 5 * 5 * 1000) + for i in range(20): + result = node1.query("INSERT INTO test.test_table SELECT number, number FROM numbers(1000)", user='default') def node_insert_select(_): - for i in range(5): - result = node1.query("INSERT INTO test_table SELECT number, number FROM numbers(1000)", user='default') - result = node1.query("SELECT * FROM test_table LIMIT 100", user='default') + for i in range(20): + result = node1.query("INSERT INTO test.test_table SELECT number, number FROM numbers(1000)", user='default') + result = node1.query("SELECT * FROM test.test_table LIMIT 100", user='default') - busy_pool = Pool(5) - p = busy_pool.map_async(node_insert_select, range(5)) + busy_pool = Pool(30) + p = busy_pool.map_async(node_select, range(30)) p.wait() - result = node1.query("SELECT count() FROM test_table", user='default') - logging.debug(result) - assert(int(result) == 5 * 5 * 1000 * 2) - node1.query('DROP TABLE test_table;') - cursor.execute('DROP TABLE test_table;') + count = int(node1.query(f"SELECT numbackends FROM test.stat WHERE datname = '{database_name}'")) + print(count) + assert(count <= 18) - count = node1.count_in_log('New connection to postgres1:5432') - logging.debug(f'count {count}, prev_count {prev_count}') - assert(int(count) <= int(prev_count) + 16) + busy_pool = Pool(30) + p = busy_pool.map_async(node_insert, range(30)) + p.wait() + + count = int(node1.query(f"SELECT numbackends FROM test.stat WHERE datname = '{database_name}'")) + print(count) + assert(count <= 18) + + busy_pool = Pool(30) + p = busy_pool.map_async(node_insert_select, range(30)) + p.wait() + + count = int(node1.query(f"SELECT numbackends FROM test.stat WHERE datname = '{database_name}'")) + print(count) + assert(count <= 18) + + node1.query('DROP TABLE test.test_table;') + node1.query('DROP TABLE test.stat;') def test_postgres_distributed(started_cluster): diff --git a/tests/integration/test_storage_s3/test.py b/tests/integration/test_storage_s3/test.py index 7fb880119a7..fa183a365b1 100644 --- a/tests/integration/test_storage_s3/test.py +++ b/tests/integration/test_storage_s3/test.py @@ -818,8 +818,9 @@ def test_seekable_formats(started_cluster): instance.query("SYSTEM FLUSH LOGS") result = instance.query(f"SELECT formatReadableSize(memory_usage) FROM system.query_log WHERE startsWith(query, 'SELECT count() FROM s3') AND memory_usage > 0 ORDER BY event_time desc") - print(result[:3]) - assert(int(result[:3]) < 200) + + result = result[:result.index('.')] + assert(int(result) < 200) def test_seekable_formats_url(started_cluster): @@ -842,8 +843,9 @@ def test_seekable_formats_url(started_cluster): instance.query("SYSTEM FLUSH LOGS") result = instance.query(f"SELECT formatReadableSize(memory_usage) FROM system.query_log WHERE startsWith(query, 'SELECT count() FROM url') AND memory_usage > 0 ORDER BY event_time desc") - print(result[:3]) - assert(int(result[:3]) < 200) + + result = result[:result.index('.')] + assert(int(result) < 200) def test_empty_file(started_cluster): @@ -886,7 +888,7 @@ def test_s3_schema_inference(started_cluster): result = instance.query(f"select count(*) from schema_inference") assert(int(result) == 5000000) - + table_function = f"url('http://{started_cluster.minio_host}:{started_cluster.minio_port}/{bucket}/test_native', 'Native')" result = instance.query(f"desc {table_function}") assert result == "a\tInt32\t\t\t\t\t\nb\tString\t\t\t\t\t\n" @@ -949,7 +951,7 @@ def test_create_new_files_on_insert(started_cluster): instance.query(f"insert into test_multiple_inserts select number, randomString(100) from numbers(10) settings s3_truncate_on_insert=1") instance.query(f"insert into test_multiple_inserts select number, randomString(100) from numbers(20) settings s3_create_new_file_on_insert=1") instance.query(f"insert into test_multiple_inserts select number, randomString(100) from numbers(30) settings s3_create_new_file_on_insert=1") - + result = instance.query(f"select count() from test_multiple_inserts") assert(int(result) == 60) @@ -961,11 +963,11 @@ def test_create_new_files_on_insert(started_cluster): instance.query(f"insert into test_multiple_inserts select number, randomString(100) from numbers(10) settings s3_truncate_on_insert=1") instance.query(f"insert into test_multiple_inserts select number, randomString(100) from numbers(20) settings s3_create_new_file_on_insert=1") instance.query(f"insert into test_multiple_inserts select number, randomString(100) from numbers(30) settings s3_create_new_file_on_insert=1") - + result = instance.query(f"select count() from test_multiple_inserts") assert(int(result) == 60) - + def test_format_detection(started_cluster): bucket = started_cluster.minio_bucket instance = started_cluster.instances["dummy"] @@ -1038,3 +1040,37 @@ def test_signatures(started_cluster): result = instance.query(f"select * from s3('http://{started_cluster.minio_host}:{started_cluster.minio_port}/{bucket}/test.arrow', 'minio', 'minio123', 'Arrow')") assert(int(result) == 1) + +def test_select_columns(started_cluster): + bucket = started_cluster.minio_bucket + instance = started_cluster.instances["dummy"] + name = "test_table2" + structure = "id UInt32, value1 Int32, value2 Int32" + + instance.query(f"drop table if exists {name}") + instance.query(f"CREATE TABLE {name} ({structure}) ENGINE = S3(s3_conf1, format='Parquet')") + + limit = 10000000 + instance.query(f"INSERT INTO {name} SELECT * FROM generateRandom('{structure}') LIMIT {limit} SETTINGS s3_truncate_on_insert=1") + instance.query(f"SELECT value2 FROM {name}") + + instance.query("SYSTEM FLUSH LOGS") + result1 = instance.query(f"SELECT read_bytes FROM system.query_log WHERE type='QueryFinish' and query LIKE 'SELECT value2 FROM {name}'") + + instance.query(f"SELECT * FROM {name}") + instance.query("SYSTEM FLUSH LOGS") + result2 = instance.query(f"SELECT read_bytes FROM system.query_log WHERE type='QueryFinish' and query LIKE 'SELECT * FROM {name}'") + + assert(int(result1) * 3 <= int(result2)) + + +def test_insert_select_schema_inference(started_cluster): + bucket = started_cluster.minio_bucket + instance = started_cluster.instances["dummy"] + + instance.query(f"insert into function s3('http://{started_cluster.minio_host}:{started_cluster.minio_port}/{bucket}/test_insert_select.native') select toUInt64(1) as x") + result = instance.query(f"desc s3('http://{started_cluster.minio_host}:{started_cluster.minio_port}/{bucket}/test_insert_select.native')") + assert(result.strip() == 'x\tUInt64') + + result = instance.query(f"select * from s3('http://{started_cluster.minio_host}:{started_cluster.minio_port}/{bucket}/test_insert_select.native')") + assert(int(result) == 1) diff --git a/tests/integration/test_timezone_config/test.py b/tests/integration/test_timezone_config/test.py index ac12eddc709..af7e3548e6a 100644 --- a/tests/integration/test_timezone_config/test.py +++ b/tests/integration/test_timezone_config/test.py @@ -17,3 +17,25 @@ def start_cluster(): def test_check_timezone_config(start_cluster): assert node.query("SELECT toDateTime(1111111111)") == "2005-03-17 17:58:31\n" + +def test_overflow_toDate(start_cluster): + assert node.query("SELECT toDate('2999-12-31','UTC')") == "2149-06-06\n" + assert node.query("SELECT toDate('2021-12-21','UTC')") == "2021-12-21\n" + assert node.query("SELECT toDate('1000-12-31','UTC')") == "1970-01-01\n" + +def test_overflow_toDate32(start_cluster): + assert node.query("SELECT toDate32('2999-12-31','UTC')") == "2283-11-11\n" + assert node.query("SELECT toDate32('2021-12-21','UTC')") == "2021-12-21\n" + assert node.query("SELECT toDate32('1000-12-31','UTC')") == "1925-01-01\n" + +def test_overflow_toDateTime(start_cluster): + assert node.query("SELECT toDateTime('2999-12-31 00:00:00','UTC')") == "2106-02-07 06:28:15\n" + assert node.query("SELECT toDateTime('2106-02-07 06:28:15','UTC')") == "2106-02-07 06:28:15\n" + assert node.query("SELECT toDateTime('1970-01-01 00:00:00','UTC')") == "1970-01-01 00:00:00\n" + assert node.query("SELECT toDateTime('1000-01-01 00:00:00','UTC')") == "1970-01-01 00:00:00\n" + +def test_overflow_parseDateTimeBestEffort(start_cluster): + assert node.query("SELECT parseDateTimeBestEffort('2999-12-31 00:00:00','UTC')") == "2106-02-07 06:28:15\n" + assert node.query("SELECT parseDateTimeBestEffort('2106-02-07 06:28:15','UTC')") == "2106-02-07 06:28:15\n" + assert node.query("SELECT parseDateTimeBestEffort('1970-01-01 00:00:00','UTC')") == "1970-01-01 00:00:00\n" + assert node.query("SELECT parseDateTimeBestEffort('1000-01-01 00:00:00','UTC')") == "1970-01-01 00:00:00\n" diff --git a/tests/integration/test_user_zero_database_access/configs/users.xml b/tests/integration/test_user_zero_database_access/configs/users.xml index 8c8dfbb5b7e..25c598aa560 100644 --- a/tests/integration/test_user_zero_database_access/configs/users.xml +++ b/tests/integration/test_user_zero_database_access/configs/users.xml @@ -37,6 +37,24 @@ db1 + + + clickhouse + + ::/0 + + default + default + + + + + + ::/0 + + default + default + diff --git a/tests/integration/test_user_zero_database_access/test_user_zero_database_access.py b/tests/integration/test_user_zero_database_access/test_user_zero_database_access.py index dd3789cde57..d77e8383df7 100644 --- a/tests/integration/test_user_zero_database_access/test_user_zero_database_access.py +++ b/tests/integration/test_user_zero_database_access/test_user_zero_database_access.py @@ -70,3 +70,17 @@ def test_user_zero_database_access(start_cluster): ["bash", "-c", "/usr/bin/clickhouse client --user 'default' --query 'DROP DATABASE test2'"], user='root') except Exception as ex: assert False, "user with full access rights can't drop database test2" + + try: + name = node.exec_in_container( + ["bash", "-c", "export CLICKHOUSE_USER=env_user_not_with_password && /usr/bin/clickhouse client --query 'SELECT currentUser()'"], user='root') + assert name.strip() == "env_user_not_with_password" + except Exception as ex: + assert False, "set env CLICKHOUSE_USER can not connect server" + + try: + name = node.exec_in_container( + ["bash", "-c", "export CLICKHOUSE_USER=env_user_with_password && export CLICKHOUSE_PASSWORD=clickhouse && /usr/bin/clickhouse client --query 'SELECT currentUser()'"], user='root') + assert name.strip() == "env_user_with_password" + except Exception as ex: + assert False, "set env CLICKHOUSE_USER CLICKHOUSE_PASSWORD can not connect server" diff --git a/tests/performance/date_time_long.xml b/tests/performance/date_time_long.xml index 0c3d85f9659..f210c807b12 100644 --- a/tests/performance/date_time_long.xml +++ b/tests/performance/date_time_long.xml @@ -83,7 +83,7 @@ time_zone UTC - Europe/Moscow + Asia/Istanbul Asia/Kolkata diff --git a/tests/performance/date_time_short.xml b/tests/performance/date_time_short.xml index 826e1619ab7..de859710670 100644 --- a/tests/performance/date_time_short.xml +++ b/tests/performance/date_time_short.xml @@ -18,7 +18,7 @@ time_zone - Europe/Moscow + Asia/Istanbul diff --git a/tests/performance/generate_table_function.xml b/tests/performance/generate_table_function.xml index bc49a7de1bd..c219d73b6cf 100644 --- a/tests/performance/generate_table_function.xml +++ b/tests/performance/generate_table_function.xml @@ -4,8 +4,8 @@ SELECT sum(NOT ignore(*)) FROM (SELECT * FROM generateRandom('i Enum8(\'hello\' = 1, \'world\' = 5)', 0, 10, 10) LIMIT 1000000000); SELECT sum(NOT ignore(*)) FROM (SELECT * FROM generateRandom('i Array(Nullable(Enum8(\'hello\' = 1, \'world\' = 5)))', 0, 10, 10) LIMIT 100000000); SELECT sum(NOT ignore(*)) FROM (SELECT * FROM generateRandom('i Nullable(Enum16(\'h\' = 1, \'w\' = 5 , \'o\' = -200))', 0, 10, 10) LIMIT 1000000000); - SELECT sum(NOT ignore(*)) FROM (SELECT * FROM generateRandom('d Date, dt DateTime, dtm DateTime(\'Europe/Moscow\')', 0, 10, 10) LIMIT 1000000000); - SELECT sum(NOT ignore(*)) FROM (SELECT * FROM generateRandom('dt64 DateTime64, dts64 DateTime64(6), dtms64 DateTime64(6 ,\'Europe/Moscow\')', 0, 10, 10) LIMIT 100000000); + SELECT sum(NOT ignore(*)) FROM (SELECT * FROM generateRandom('d Date, dt DateTime, dtm DateTime(\'Asia/Istanbul\')', 0, 10, 10) LIMIT 1000000000); + SELECT sum(NOT ignore(*)) FROM (SELECT * FROM generateRandom('dt64 DateTime64, dts64 DateTime64(6), dtms64 DateTime64(6 ,\'Asia/Istanbul\')', 0, 10, 10) LIMIT 100000000); SELECT sum(NOT ignore(*)) FROM (SELECT * FROM generateRandom('f32 Float32, f64 Float64', 0, 10, 10) LIMIT 1000000000); SELECT sum(NOT ignore(*)) FROM (SELECT * FROM generateRandom('d32 Decimal32(4), d64 Decimal64(8), d128 Decimal128(16)', 0, 10, 10) LIMIT 1000000000); SELECT sum(NOT ignore(*)) FROM (SELECT * FROM generateRandom('i Tuple(Int32, Int64)', 0, 10, 10) LIMIT 1000000000); diff --git a/tests/performance/merge_tree_insert.xml b/tests/performance/merge_tree_insert.xml new file mode 100644 index 00000000000..1e987d27d50 --- /dev/null +++ b/tests/performance/merge_tree_insert.xml @@ -0,0 +1,41 @@ + + + + + integer_primary_key_table_name + + merge_tree_insert_1 + merge_tree_insert_2 + merge_tree_insert_3 + + + + + string_primary_key_table_name + + merge_tree_insert_4 + merge_tree_insert_5 + merge_tree_insert_6 + + + + + CREATE TABLE merge_tree_insert_1 (value_1 UInt64, value_2 UInt64, value_3 UInt64) ENGINE = MergeTree ORDER BY (value_1) + CREATE TABLE merge_tree_insert_2 (value_1 UInt64, value_2 UInt64, value_3 UInt64) ENGINE = MergeTree ORDER BY (value_1, value_2) + CREATE TABLE merge_tree_insert_3 (value_1 UInt64, value_2 UInt64, value_3 UInt64) ENGINE = MergeTree ORDER BY (value_1, value_2, value_3) + CREATE TABLE merge_tree_insert_4 (value_1 String, value_2 String, value_3 String) ENGINE = MergeTree ORDER BY (value_1) + CREATE TABLE merge_tree_insert_5 (value_1 String, value_2 String, value_3 String) ENGINE = MergeTree ORDER BY (value_1, value_2) + CREATE TABLE merge_tree_insert_6 (value_1 String, value_2 String, value_3 String) ENGINE = MergeTree ORDER BY (value_1, value_2, value_3) + + INSERT INTO {integer_primary_key_table_name} SELECT rand64(0), rand64(1), rand64(2) FROM system.numbers LIMIT 500000 + INSERT INTO {integer_primary_key_table_name} SELECT rand64(0), rand64(1), rand64(2) FROM system.numbers LIMIT 1000000 + INSERT INTO {integer_primary_key_table_name} SELECT rand64(0), rand64(1), rand64(2) FROM system.numbers LIMIT 1500000 + + INSERT INTO {string_primary_key_table_name} SELECT toString(rand64(0)), toString(rand64(1)), toString(rand64(2)) FROM system.numbers LIMIT 500000 + INSERT INTO {string_primary_key_table_name} SELECT toString(rand64(0)), toString(rand64(1)), toString(rand64(2)) FROM system.numbers LIMIT 1000000 + INSERT INTO {string_primary_key_table_name} SELECT toString(rand64(0)), toString(rand64(1)), toString(rand64(2)) FROM system.numbers LIMIT 1500000 + + DROP TABLE IF EXISTS {integer_primary_key_table_name} + DROP TABLE IF EXISTS {string_primary_key_table_name} + + diff --git a/tests/queries/0_stateless/00124_shard_distributed_with_many_replicas.sql b/tests/queries/0_stateless/00124_shard_distributed_with_many_replicas.sql index f9cbf92db41..e29a166c1ee 100644 --- a/tests/queries/0_stateless/00124_shard_distributed_with_many_replicas.sql +++ b/tests/queries/0_stateless/00124_shard_distributed_with_many_replicas.sql @@ -1,5 +1,6 @@ -- Tags: replica, distributed +SET allow_experimental_parallel_reading_from_replicas = 0; SET max_parallel_replicas = 2; DROP TABLE IF EXISTS report; diff --git a/tests/queries/0_stateless/00135_duplicate_group_by_keys_segfault.sql b/tests/queries/0_stateless/00135_duplicate_group_by_keys_segfault.sql index 16356046a36..c54593056cf 100644 --- a/tests/queries/0_stateless/00135_duplicate_group_by_keys_segfault.sql +++ b/tests/queries/0_stateless/00135_duplicate_group_by_keys_segfault.sql @@ -1,3 +1,5 @@ +-- Tags: no-random-settings + SET max_rows_to_read = 1000000; SET read_overflow_mode = 'break'; SELECT concat(toString(number % 256 AS n), '') AS s, n, max(s) FROM system.numbers_mt GROUP BY s, n, n, n, n, n, n, n, n, n ORDER BY s, n; diff --git a/tests/queries/0_stateless/00284_external_aggregation.sql b/tests/queries/0_stateless/00284_external_aggregation.sql index cd9abec59a8..709c2d10b97 100644 --- a/tests/queries/0_stateless/00284_external_aggregation.sql +++ b/tests/queries/0_stateless/00284_external_aggregation.sql @@ -1,5 +1,7 @@ +-- Tags: long + SET max_bytes_before_external_group_by = 100000000; -SET max_memory_usage = 351000000; +SET max_memory_usage = 410000000; SELECT sum(k), sum(c) FROM (SELECT number AS k, count() AS c FROM (SELECT * FROM system.numbers LIMIT 10000000) GROUP BY k); SELECT sum(k), sum(c), max(u) FROM (SELECT number AS k, count() AS c, uniqArray(range(number % 16)) AS u FROM (SELECT * FROM system.numbers LIMIT 1000000) GROUP BY k); diff --git a/tests/queries/0_stateless/00474_readonly_settings.reference b/tests/queries/0_stateless/00474_readonly_settings.reference index b1da40ce414..e2b45931965 100644 --- a/tests/queries/0_stateless/00474_readonly_settings.reference +++ b/tests/queries/0_stateless/00474_readonly_settings.reference @@ -2,13 +2,11 @@ "value": 4611686018427387904 "name": "value", "value": "4611686018427387904" -value -value -Cannot modify 'output_format_json_quote_64bit_integers' setting in readonly mode +OK +OK "name": "value", "value": "9223372036854775808" "name": "value", "value": 9223372036854775808 -value -value -Cannot modify 'output_format_json_quote_64bit_integers' setting in readonly mode +OK +OK diff --git a/tests/queries/0_stateless/00474_readonly_settings.sh b/tests/queries/0_stateless/00474_readonly_settings.sh index 0887ecfa14e..07b78c64a7e 100755 --- a/tests/queries/0_stateless/00474_readonly_settings.sh +++ b/tests/queries/0_stateless/00474_readonly_settings.sh @@ -9,13 +9,15 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) $CLICKHOUSE_CLIENT --query="select toUInt64(pow(2, 62)) as value format JSON" --output_format_json_quote_64bit_integers=0 | grep value $CLICKHOUSE_CLIENT --query="select toUInt64(pow(2, 62)) as value format JSON" --output_format_json_quote_64bit_integers=1 | grep value -$CLICKHOUSE_CLIENT --readonly=1 --multiquery --query="set output_format_json_quote_64bit_integers=1 ; select toUInt64(pow(2, 63)) as value format JSON" --server_logs_file=/dev/null 2>&1 | grep -o 'value\|Cannot modify .* setting in readonly mode' -$CLICKHOUSE_CLIENT --readonly=1 --multiquery --query="set output_format_json_quote_64bit_integers=0 ; select toUInt64(pow(2, 63)) as value format JSON" --server_logs_file=/dev/null 2>&1 | grep -o 'value\|Cannot modify .* setting in readonly mode' +$CLICKHOUSE_CLIENT --readonly=1 --multiquery --query="set output_format_json_quote_64bit_integers=1 ; select toUInt64(pow(2, 63)) as value format JSON" --server_logs_file=/dev/null 2>&1 | grep -o -q 'value\|Cannot modify .* setting in readonly mode' && echo "OK" || echo "FAIL" +$CLICKHOUSE_CLIENT --readonly=1 --multiquery --query="set output_format_json_quote_64bit_integers=0 ; select toUInt64(pow(2, 63)) as value format JSON" --server_logs_file=/dev/null 2>&1 | grep -o -q 'value\|Cannot modify .* setting in readonly mode' && echo "OK" || echo "FAIL" + ${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}&query=SELECT+toUInt64(pow(2,+63))+as+value+format+JSON&output_format_json_quote_64bit_integers=1" | grep value ${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}&query=SELECT+toUInt64(pow(2,+63))+as+value+format+JSON&output_format_json_quote_64bit_integers=0" | grep value -${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}&session_id=readonly&session_timeout=3600" -d 'SET readonly = 1' +#${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}&session_id=readonly&session_timeout=3600" -d 'SET readonly = 1' + +${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}&session_id=readonly&query=SELECT+toUInt64(pow(2,+63))+as+value+format+JSON&output_format_json_quote_64bit_integers=1" 2>&1 | grep -o -q 'value\|Cannot modify .* setting in readonly mode.' && echo "OK" || echo "FAIL" +${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}&session_id=readonly&query=SELECT+toUInt64(pow(2,+63))+as+value+format+JSON&output_format_json_quote_64bit_integers=0" 2>&1 | grep -o -q 'value\|Cannot modify .* setting in readonly mode' && echo "OK" || echo "FAIL" -${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}&session_id=readonly&query=SELECT+toUInt64(pow(2,+63))+as+value+format+JSON&output_format_json_quote_64bit_integers=1" 2>&1 | grep -o 'value\|Cannot modify .* setting in readonly mode.' -${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}&session_id=readonly&query=SELECT+toUInt64(pow(2,+63))+as+value+format+JSON&output_format_json_quote_64bit_integers=0" 2>&1 | grep -o 'value\|Cannot modify .* setting in readonly mode' diff --git a/tests/queries/0_stateless/00632_get_sample_block_cache.sql b/tests/queries/0_stateless/00632_get_sample_block_cache.sql index 3c1b7ed70e4..f9b241bbf1e 100644 --- a/tests/queries/0_stateless/00632_get_sample_block_cache.sql +++ b/tests/queries/0_stateless/00632_get_sample_block_cache.sql @@ -1,3 +1,5 @@ +-- Tags: long + SET joined_subquery_requires_alias = 0; -- This test (SELECT) without cache can take tens minutes diff --git a/tests/queries/0_stateless/00808_not_optimize_predicate.sql b/tests/queries/0_stateless/00808_not_optimize_predicate.sql index 7c1e57706e2..ba8f5eb5753 100644 --- a/tests/queries/0_stateless/00808_not_optimize_predicate.sql +++ b/tests/queries/0_stateless/00808_not_optimize_predicate.sql @@ -1,4 +1,5 @@ SET send_logs_level = 'fatal'; +SET convert_query_to_cnf = 0; DROP TABLE IF EXISTS test_00808; CREATE TABLE test_00808(date Date, id Int8, name String, value Int64, sign Int8) ENGINE = CollapsingMergeTree(sign) ORDER BY (id, date); diff --git a/tests/queries/0_stateless/00826_cross_to_inner_join.sql b/tests/queries/0_stateless/00826_cross_to_inner_join.sql index 392ade02ab7..ce0c8ea2bfc 100644 --- a/tests/queries/0_stateless/00826_cross_to_inner_join.sql +++ b/tests/queries/0_stateless/00826_cross_to_inner_join.sql @@ -1,4 +1,6 @@ SET enable_optimize_predicate_expression = 0; +SET optimize_move_to_prewhere = 1; +SET convert_query_to_cnf = 0; select * from system.one l cross join system.one r; diff --git a/tests/queries/0_stateless/00849_multiple_comma_join_2.sql b/tests/queries/0_stateless/00849_multiple_comma_join_2.sql index 58535f556d9..eabede3ff00 100644 --- a/tests/queries/0_stateless/00849_multiple_comma_join_2.sql +++ b/tests/queries/0_stateless/00849_multiple_comma_join_2.sql @@ -1,4 +1,5 @@ SET enable_optimize_predicate_expression = 0; +SET convert_query_to_cnf = 0; DROP TABLE IF EXISTS t1; DROP TABLE IF EXISTS t2; diff --git a/tests/queries/0_stateless/00965_shard_unresolvable_addresses.sql b/tests/queries/0_stateless/00965_shard_unresolvable_addresses.sql index 0d82519e4d3..555e7a98380 100644 --- a/tests/queries/0_stateless/00965_shard_unresolvable_addresses.sql +++ b/tests/queries/0_stateless/00965_shard_unresolvable_addresses.sql @@ -1,5 +1,7 @@ -- Tags: shard +SET prefer_localhost_replica = 1; + SELECT count() FROM remote('127.0.0.1,localhos', system.one); -- { serverError 198 } SELECT count() FROM remote('127.0.0.1|localhos', system.one); diff --git a/tests/queries/0_stateless/00974_query_profiler.sql b/tests/queries/0_stateless/00974_query_profiler.sql index 24e4241b813..b697bd56800 100644 --- a/tests/queries/0_stateless/00974_query_profiler.sql +++ b/tests/queries/0_stateless/00974_query_profiler.sql @@ -1,4 +1,4 @@ --- Tags: no-tsan, no-asan, no-ubsan, no-msan, no-debug, no-fasttest +-- Tags: no-tsan, no-asan, no-ubsan, no-msan, no-debug, no-fasttest, no-cpu-aarch64 -- Tag no-fasttest: Not sure why fail even in sequential mode. Disabled for now to make some progress. SET allow_introspection_functions = 1; diff --git a/tests/queries/0_stateless/01010_pmj_right_table_memory_limits.sql b/tests/queries/0_stateless/01010_pmj_right_table_memory_limits.sql index af747c93678..7804ce32a5a 100644 --- a/tests/queries/0_stateless/01010_pmj_right_table_memory_limits.sql +++ b/tests/queries/0_stateless/01010_pmj_right_table_memory_limits.sql @@ -1,4 +1,4 @@ --- Tags: no-parallel, no-fasttest +-- Tags: no-parallel, no-fasttest, no-random-settings SET max_memory_usage = 32000000; SET join_on_disk_max_files_to_merge = 4; diff --git a/tests/queries/0_stateless/01016_simhash_minhash.reference b/tests/queries/0_stateless/01016_simhash_minhash.reference index 3a668e6dcdb..d4fdcfea6a5 100644 --- a/tests/queries/0_stateless/01016_simhash_minhash.reference +++ b/tests/queries/0_stateless/01016_simhash_minhash.reference @@ -50,92 +50,92 @@ (14260447771268573594,3863279269132177973) uniqExact 6 ngramSimHash -ClickHouse uses all available hardware to its full potential to process each query as fast as possible. Peak processing performance for a single query stands at more than 2 terabytes per second (after decompression, only used columns). In distributed setup reads are automatically balanced among healthy replicas to avoid increasing latency.\nClickHouse supports multi-master asynchronous replication and can be deployed across multiple datacenters. All nodes are equal, which allows avoiding having single points of failure. Downtime of a single node or the whole datacenter wont affect the systems availability for both reads and writes.\nClickHouse is simple and works out-of-the-box. It streamlines all your data processing: ingest all your structured data into the system and it becomes instantly available for building reports. SQL dialect allows expressing the desired result without involving any custom non-standard API that could be found in some alternative systems. 1 3906262823 -ClickHouse makes full use of all available hardware to process every request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (only used columns after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid single points of failure. Downtime for one site or the entire data center will not affect the system\'s read and write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they immediately become available for building reports. The SQL dialect allows you to express the desired result without resorting to any non-standard APIs that can be found in some alternative systems. 1 2857686823 ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all your structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.\n:::::::\nClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems. 2 676648743 ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all of your structured data into the system, and it is immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems. 1 1012193063 +ClickHouse makes full use of all available hardware to process every request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (only used columns after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid single points of failure. Downtime for one site or the entire data center will not affect the system\'s read and write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they immediately become available for building reports. The SQL dialect allows you to express the desired result without resorting to any non-standard APIs that can be found in some alternative systems. 1 2857686823 ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (used columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the system\'s read / write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they are immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems. 1 3092567843 +ClickHouse uses all available hardware to its full potential to process each query as fast as possible. Peak processing performance for a single query stands at more than 2 terabytes per second (after decompression, only used columns). In distributed setup reads are automatically balanced among healthy replicas to avoid increasing latency.\nClickHouse supports multi-master asynchronous replication and can be deployed across multiple datacenters. All nodes are equal, which allows avoiding having single points of failure. Downtime of a single node or the whole datacenter wont affect the systems availability for both reads and writes.\nClickHouse is simple and works out-of-the-box. It streamlines all your data processing: ingest all your structured data into the system and it becomes instantly available for building reports. SQL dialect allows expressing the desired result without involving any custom non-standard API that could be found in some alternative systems. 1 3906262823 ngramSimHashCaseInsensitive +ClickHouse makes full use of all available hardware to process every request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (only used columns after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid single points of failure. Downtime for one site or the entire data center will not affect the system\'s read and write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they immediately become available for building reports. The SQL dialect allows you to express the desired result without resorting to any non-standard APIs that can be found in some alternative systems.\n:::::::\nClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all your structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.\n:::::::\nClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems. 3 2824132391 ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all of your structured data into the system, and it is immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems. 1 2891240999 ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (used columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the system\'s read / write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they are immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems. 1 3092567591 -ClickHouse makes full use of all available hardware to process every request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (only used columns after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid single points of failure. Downtime for one site or the entire data center will not affect the system\'s read and write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they immediately become available for building reports. The SQL dialect allows you to express the desired result without resorting to any non-standard APIs that can be found in some alternative systems.\n:::::::\nClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all your structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.\n:::::::\nClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems. 3 2824132391 ClickHouse uses all available hardware to its full potential to process each query as fast as possible. Peak processing performance for a single query stands at more than 2 terabytes per second (after decompression, only used columns). In distributed setup reads are automatically balanced among healthy replicas to avoid increasing latency.\nClickHouse supports multi-master asynchronous replication and can be deployed across multiple datacenters. All nodes are equal, which allows avoiding having single points of failure. Downtime of a single node or the whole datacenter wont affect the systems availability for both reads and writes.\nClickHouse is simple and works out-of-the-box. It streamlines all your data processing: ingest all your structured data into the system and it becomes instantly available for building reports. SQL dialect allows expressing the desired result without involving any custom non-standard API that could be found in some alternative systems. 1 3908359975 ngramSimHashUTF8 -ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (used columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the system\'s read / write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they are immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems. 1 3159676711 ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all your structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.\n:::::::\nClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems. 2 676648743 ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all of your structured data into the system, and it is immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems. 1 1012193063 ClickHouse makes full use of all available hardware to process every request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (only used columns after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid single points of failure. Downtime for one site or the entire data center will not affect the system\'s read and write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they immediately become available for building reports. The SQL dialect allows you to express the desired result without resorting to any non-standard APIs that can be found in some alternative systems. 1 2924795687 +ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (used columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the system\'s read / write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they are immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems. 1 3159676711 ClickHouse uses all available hardware to its full potential to process each query as fast as possible. Peak processing performance for a single query stands at more than 2 terabytes per second (after decompression, only used columns). In distributed setup reads are automatically balanced among healthy replicas to avoid increasing latency.\nClickHouse supports multi-master asynchronous replication and can be deployed across multiple datacenters. All nodes are equal, which allows avoiding having single points of failure. Downtime of a single node or the whole datacenter wont affect the systems availability for both reads and writes.\nClickHouse is simple and works out-of-the-box. It streamlines all your data processing: ingest all your structured data into the system and it becomes instantly available for building reports. SQL dialect allows expressing the desired result without involving any custom non-standard API that could be found in some alternative systems. 1 3897874215 ngramSimHashCaseInsensitiveUTF8 -ClickHouse uses all available hardware to its full potential to process each query as fast as possible. Peak processing performance for a single query stands at more than 2 terabytes per second (after decompression, only used columns). In distributed setup reads are automatically balanced among healthy replicas to avoid increasing latency.\nClickHouse supports multi-master asynchronous replication and can be deployed across multiple datacenters. All nodes are equal, which allows avoiding having single points of failure. Downtime of a single node or the whole datacenter wont affect the systems availability for both reads and writes.\nClickHouse is simple and works out-of-the-box. It streamlines all your data processing: ingest all your structured data into the system and it becomes instantly available for building reports. SQL dialect allows expressing the desired result without involving any custom non-standard API that could be found in some alternative systems. 1 3906262823 -ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (used columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the system\'s read / write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they are immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems. 1 3092567591 ClickHouse makes full use of all available hardware to process every request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (only used columns after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid single points of failure. Downtime for one site or the entire data center will not affect the system\'s read and write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they immediately become available for building reports. The SQL dialect allows you to express the desired result without resorting to any non-standard APIs that can be found in some alternative systems.\n:::::::\nClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all your structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.\n:::::::\nClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems. 3 2824132391 ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all of your structured data into the system, and it is immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems. 1 2891241255 +ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (used columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the system\'s read / write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they are immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems. 1 3092567591 +ClickHouse uses all available hardware to its full potential to process each query as fast as possible. Peak processing performance for a single query stands at more than 2 terabytes per second (after decompression, only used columns). In distributed setup reads are automatically balanced among healthy replicas to avoid increasing latency.\nClickHouse supports multi-master asynchronous replication and can be deployed across multiple datacenters. All nodes are equal, which allows avoiding having single points of failure. Downtime of a single node or the whole datacenter wont affect the systems availability for both reads and writes.\nClickHouse is simple and works out-of-the-box. It streamlines all your data processing: ingest all your structured data into the system and it becomes instantly available for building reports. SQL dialect allows expressing the desired result without involving any custom non-standard API that could be found in some alternative systems. 1 3906262823 wordShingleSimHash -ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (used columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the system\'s read / write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they are immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems. 1 857724390 -ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all your structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems. 1 404215270 -ClickHouse makes full use of all available hardware to process every request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (only used columns after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid single points of failure. Downtime for one site or the entire data center will not affect the system\'s read and write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they immediately become available for building reports. The SQL dialect allows you to express the desired result without resorting to any non-standard APIs that can be found in some alternative systems. 1 991679910 -ClickHouse uses all available hardware to its full potential to process each query as fast as possible. Peak processing performance for a single query stands at more than 2 terabytes per second (after decompression, only used columns). In distributed setup reads are automatically balanced among healthy replicas to avoid increasing latency.\nClickHouse supports multi-master asynchronous replication and can be deployed across multiple datacenters. All nodes are equal, which allows avoiding having single points of failure. Downtime of a single node or the whole datacenter wont affect the systems availability for both reads and writes.\nClickHouse is simple and works out-of-the-box. It streamlines all your data processing: ingest all your structured data into the system and it becomes instantly available for building reports. SQL dialect allows expressing the desired result without involving any custom non-standard API that could be found in some alternative systems. 1 425963587 ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems. 1 404215014 +ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all your structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems. 1 404215270 +ClickHouse uses all available hardware to its full potential to process each query as fast as possible. Peak processing performance for a single query stands at more than 2 terabytes per second (after decompression, only used columns). In distributed setup reads are automatically balanced among healthy replicas to avoid increasing latency.\nClickHouse supports multi-master asynchronous replication and can be deployed across multiple datacenters. All nodes are equal, which allows avoiding having single points of failure. Downtime of a single node or the whole datacenter wont affect the systems availability for both reads and writes.\nClickHouse is simple and works out-of-the-box. It streamlines all your data processing: ingest all your structured data into the system and it becomes instantly available for building reports. SQL dialect allows expressing the desired result without involving any custom non-standard API that could be found in some alternative systems. 1 425963587 ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all of your structured data into the system, and it is immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems. 1 563598566 +ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (used columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the system\'s read / write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they are immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems. 1 857724390 +ClickHouse makes full use of all available hardware to process every request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (only used columns after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid single points of failure. Downtime for one site or the entire data center will not affect the system\'s read and write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they immediately become available for building reports. The SQL dialect allows you to express the desired result without resorting to any non-standard APIs that can be found in some alternative systems. 1 991679910 wordShingleSimHashCaseInsensitive -ClickHouse makes full use of all available hardware to process every request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (only used columns after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid single points of failure. Downtime for one site or the entire data center will not affect the system\'s read and write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they immediately become available for building reports. The SQL dialect allows you to express the desired result without resorting to any non-standard APIs that can be found in some alternative systems. 1 959182215 -ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all your structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems. 1 429118950 +ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems. 1 420713958 ClickHouse uses all available hardware to its full potential to process each query as fast as possible. Peak processing performance for a single query stands at more than 2 terabytes per second (after decompression, only used columns). In distributed setup reads are automatically balanced among healthy replicas to avoid increasing latency.\nClickHouse supports multi-master asynchronous replication and can be deployed across multiple datacenters. All nodes are equal, which allows avoiding having single points of failure. Downtime of a single node or the whole datacenter wont affect the systems availability for both reads and writes.\nClickHouse is simple and works out-of-the-box. It streamlines all your data processing: ingest all your structured data into the system and it becomes instantly available for building reports. SQL dialect allows expressing the desired result without involving any custom non-standard API that could be found in some alternative systems. 1 421737795 +ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all your structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems. 1 429118950 +ClickHouse makes full use of all available hardware to process every request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (only used columns after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid single points of failure. Downtime for one site or the entire data center will not affect the system\'s read and write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they immediately become available for building reports. The SQL dialect allows you to express the desired result without resorting to any non-standard APIs that can be found in some alternative systems. 1 959182215 ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all of your structured data into the system, and it is immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems. 1 964941252 ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (used columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the system\'s read / write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they are immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems. 1 965465540 -ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems. 1 420713958 wordShingleSimHashUTF8 -ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (used columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the system\'s read / write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they are immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems. 1 857724390 -ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all your structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems. 1 404215270 -ClickHouse makes full use of all available hardware to process every request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (only used columns after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid single points of failure. Downtime for one site or the entire data center will not affect the system\'s read and write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they immediately become available for building reports. The SQL dialect allows you to express the desired result without resorting to any non-standard APIs that can be found in some alternative systems. 1 991679910 -ClickHouse uses all available hardware to its full potential to process each query as fast as possible. Peak processing performance for a single query stands at more than 2 terabytes per second (after decompression, only used columns). In distributed setup reads are automatically balanced among healthy replicas to avoid increasing latency.\nClickHouse supports multi-master asynchronous replication and can be deployed across multiple datacenters. All nodes are equal, which allows avoiding having single points of failure. Downtime of a single node or the whole datacenter wont affect the systems availability for both reads and writes.\nClickHouse is simple and works out-of-the-box. It streamlines all your data processing: ingest all your structured data into the system and it becomes instantly available for building reports. SQL dialect allows expressing the desired result without involving any custom non-standard API that could be found in some alternative systems. 1 425963587 ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems. 1 404215014 +ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all your structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems. 1 404215270 +ClickHouse uses all available hardware to its full potential to process each query as fast as possible. Peak processing performance for a single query stands at more than 2 terabytes per second (after decompression, only used columns). In distributed setup reads are automatically balanced among healthy replicas to avoid increasing latency.\nClickHouse supports multi-master asynchronous replication and can be deployed across multiple datacenters. All nodes are equal, which allows avoiding having single points of failure. Downtime of a single node or the whole datacenter wont affect the systems availability for both reads and writes.\nClickHouse is simple and works out-of-the-box. It streamlines all your data processing: ingest all your structured data into the system and it becomes instantly available for building reports. SQL dialect allows expressing the desired result without involving any custom non-standard API that could be found in some alternative systems. 1 425963587 ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all of your structured data into the system, and it is immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems. 1 563598566 +ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (used columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the system\'s read / write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they are immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems. 1 857724390 +ClickHouse makes full use of all available hardware to process every request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (only used columns after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid single points of failure. Downtime for one site or the entire data center will not affect the system\'s read and write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they immediately become available for building reports. The SQL dialect allows you to express the desired result without resorting to any non-standard APIs that can be found in some alternative systems. 1 991679910 wordShingleSimHashCaseInsensitiveUTF8 -ClickHouse makes full use of all available hardware to process every request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (only used columns after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid single points of failure. Downtime for one site or the entire data center will not affect the system\'s read and write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they immediately become available for building reports. The SQL dialect allows you to express the desired result without resorting to any non-standard APIs that can be found in some alternative systems. 1 959182215 -ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all your structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems. 1 429118950 +ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems. 1 420713958 ClickHouse uses all available hardware to its full potential to process each query as fast as possible. Peak processing performance for a single query stands at more than 2 terabytes per second (after decompression, only used columns). In distributed setup reads are automatically balanced among healthy replicas to avoid increasing latency.\nClickHouse supports multi-master asynchronous replication and can be deployed across multiple datacenters. All nodes are equal, which allows avoiding having single points of failure. Downtime of a single node or the whole datacenter wont affect the systems availability for both reads and writes.\nClickHouse is simple and works out-of-the-box. It streamlines all your data processing: ingest all your structured data into the system and it becomes instantly available for building reports. SQL dialect allows expressing the desired result without involving any custom non-standard API that could be found in some alternative systems. 1 421737795 +ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all your structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems. 1 429118950 +ClickHouse makes full use of all available hardware to process every request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (only used columns after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid single points of failure. Downtime for one site or the entire data center will not affect the system\'s read and write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they immediately become available for building reports. The SQL dialect allows you to express the desired result without resorting to any non-standard APIs that can be found in some alternative systems. 1 959182215 ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all of your structured data into the system, and it is immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems. 1 964941252 ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (used columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the system\'s read / write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they are immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems. 1 965465540 -ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems. 1 420713958 ngramMinHash -ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (used columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the system\'s read / write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they are immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.\n:::::::\nClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all of your structured data into the system, and it is immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.\n:::::::\nClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all your structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems. 3 (6021986790841777095,17443426065825246292) -ClickHouse makes full use of all available hardware to process every request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (only used columns after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid single points of failure. Downtime for one site or the entire data center will not affect the system\'s read and write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they immediately become available for building reports. The SQL dialect allows you to express the desired result without resorting to any non-standard APIs that can be found in some alternative systems. 1 (13225377334870249827,17443426065825246292) ClickHouse uses all available hardware to its full potential to process each query as fast as possible. Peak processing performance for a single query stands at more than 2 terabytes per second (after decompression, only used columns). In distributed setup reads are automatically balanced among healthy replicas to avoid increasing latency.\nClickHouse supports multi-master asynchronous replication and can be deployed across multiple datacenters. All nodes are equal, which allows avoiding having single points of failure. Downtime of a single node or the whole datacenter wont affect the systems availability for both reads and writes.\nClickHouse is simple and works out-of-the-box. It streamlines all your data processing: ingest all your structured data into the system and it becomes instantly available for building reports. SQL dialect allows expressing the desired result without involving any custom non-standard API that could be found in some alternative systems. 1 (4388091710993602029,17613327300639166679) +ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (used columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the system\'s read / write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they are immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.\n:::::::\nClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all of your structured data into the system, and it is immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.\n:::::::\nClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all your structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems. 3 (6021986790841777095,17443426065825246292) ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems. 1 (7962672159337006560,17443426065825246292) +ClickHouse makes full use of all available hardware to process every request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (only used columns after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid single points of failure. Downtime for one site or the entire data center will not affect the system\'s read and write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they immediately become available for building reports. The SQL dialect allows you to express the desired result without resorting to any non-standard APIs that can be found in some alternative systems. 1 (13225377334870249827,17443426065825246292) ngramMinHashCaseInsensitive -ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (used columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the system\'s read / write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they are immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.\n:::::::\nClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all of your structured data into the system, and it is immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.\n:::::::\nClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all your structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems. 3 (6021986790841777095,8535005350590298790) -ClickHouse makes full use of all available hardware to process every request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (only used columns after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid single points of failure. Downtime for one site or the entire data center will not affect the system\'s read and write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they immediately become available for building reports. The SQL dialect allows you to express the desired result without resorting to any non-standard APIs that can be found in some alternative systems. 1 (13225377334870249827,8535005350590298790) ClickHouse uses all available hardware to its full potential to process each query as fast as possible. Peak processing performance for a single query stands at more than 2 terabytes per second (after decompression, only used columns). In distributed setup reads are automatically balanced among healthy replicas to avoid increasing latency.\nClickHouse supports multi-master asynchronous replication and can be deployed across multiple datacenters. All nodes are equal, which allows avoiding having single points of failure. Downtime of a single node or the whole datacenter wont affect the systems availability for both reads and writes.\nClickHouse is simple and works out-of-the-box. It streamlines all your data processing: ingest all your structured data into the system and it becomes instantly available for building reports. SQL dialect allows expressing the desired result without involving any custom non-standard API that could be found in some alternative systems. 1 (4388091710993602029,17613327300639166679) +ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (used columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the system\'s read / write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they are immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.\n:::::::\nClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all of your structured data into the system, and it is immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.\n:::::::\nClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all your structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems. 3 (6021986790841777095,8535005350590298790) ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems. 1 (7962672159337006560,8535005350590298790) +ClickHouse makes full use of all available hardware to process every request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (only used columns after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid single points of failure. Downtime for one site or the entire data center will not affect the system\'s read and write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they immediately become available for building reports. The SQL dialect allows you to express the desired result without resorting to any non-standard APIs that can be found in some alternative systems. 1 (13225377334870249827,8535005350590298790) ngramMinHashUTF8 +ClickHouse uses all available hardware to its full potential to process each query as fast as possible. Peak processing performance for a single query stands at more than 2 terabytes per second (after decompression, only used columns). In distributed setup reads are automatically balanced among healthy replicas to avoid increasing latency.\nClickHouse supports multi-master asynchronous replication and can be deployed across multiple datacenters. All nodes are equal, which allows avoiding having single points of failure. Downtime of a single node or the whole datacenter wont affect the systems availability for both reads and writes.\nClickHouse is simple and works out-of-the-box. It streamlines all your data processing: ingest all your structured data into the system and it becomes instantly available for building reports. SQL dialect allows expressing the desired result without involving any custom non-standard API that could be found in some alternative systems. 1 (4388091710993602029,17613327300639166679) ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (used columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the system\'s read / write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they are immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.\n:::::::\nClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all of your structured data into the system, and it is immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.\n:::::::\nClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all your structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems. 3 (6021986790841777095,17443426065825246292) -ClickHouse makes full use of all available hardware to process every request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (only used columns after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid single points of failure. Downtime for one site or the entire data center will not affect the system\'s read and write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they immediately become available for building reports. The SQL dialect allows you to express the desired result without resorting to any non-standard APIs that can be found in some alternative systems. 1 (13225377334870249827,17443426065825246292) -ClickHouse uses all available hardware to its full potential to process each query as fast as possible. Peak processing performance for a single query stands at more than 2 terabytes per second (after decompression, only used columns). In distributed setup reads are automatically balanced among healthy replicas to avoid increasing latency.\nClickHouse supports multi-master asynchronous replication and can be deployed across multiple datacenters. All nodes are equal, which allows avoiding having single points of failure. Downtime of a single node or the whole datacenter wont affect the systems availability for both reads and writes.\nClickHouse is simple and works out-of-the-box. It streamlines all your data processing: ingest all your structured data into the system and it becomes instantly available for building reports. SQL dialect allows expressing the desired result without involving any custom non-standard API that could be found in some alternative systems. 1 (4388091710993602029,17613327300639166679) ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems. 1 (7962672159337006560,17443426065825246292) +ClickHouse makes full use of all available hardware to process every request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (only used columns after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid single points of failure. Downtime for one site or the entire data center will not affect the system\'s read and write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they immediately become available for building reports. The SQL dialect allows you to express the desired result without resorting to any non-standard APIs that can be found in some alternative systems. 1 (13225377334870249827,17443426065825246292) ngramMinHashCaseInsensitiveUTF8 -ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (used columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the system\'s read / write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they are immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.\n:::::::\nClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all of your structured data into the system, and it is immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.\n:::::::\nClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all your structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems. 3 (6021986790841777095,8535005350590298790) -ClickHouse makes full use of all available hardware to process every request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (only used columns after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid single points of failure. Downtime for one site or the entire data center will not affect the system\'s read and write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they immediately become available for building reports. The SQL dialect allows you to express the desired result without resorting to any non-standard APIs that can be found in some alternative systems. 1 (13225377334870249827,8535005350590298790) ClickHouse uses all available hardware to its full potential to process each query as fast as possible. Peak processing performance for a single query stands at more than 2 terabytes per second (after decompression, only used columns). In distributed setup reads are automatically balanced among healthy replicas to avoid increasing latency.\nClickHouse supports multi-master asynchronous replication and can be deployed across multiple datacenters. All nodes are equal, which allows avoiding having single points of failure. Downtime of a single node or the whole datacenter wont affect the systems availability for both reads and writes.\nClickHouse is simple and works out-of-the-box. It streamlines all your data processing: ingest all your structured data into the system and it becomes instantly available for building reports. SQL dialect allows expressing the desired result without involving any custom non-standard API that could be found in some alternative systems. 1 (4388091710993602029,17613327300639166679) +ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (used columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the system\'s read / write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they are immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.\n:::::::\nClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all of your structured data into the system, and it is immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.\n:::::::\nClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all your structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems. 3 (6021986790841777095,8535005350590298790) ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems. 1 (7962672159337006560,8535005350590298790) +ClickHouse makes full use of all available hardware to process every request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (only used columns after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid single points of failure. Downtime for one site or the entire data center will not affect the system\'s read and write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they immediately become available for building reports. The SQL dialect allows you to express the desired result without resorting to any non-standard APIs that can be found in some alternative systems. 1 (13225377334870249827,8535005350590298790) wordShingleMinHash -ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (used columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the system\'s read / write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they are immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems. 1 (18148981179837829400,14581416672396321264) -ClickHouse uses all available hardware to its full potential to process each query as fast as possible. Peak processing performance for a single query stands at more than 2 terabytes per second (after decompression, only used columns). In distributed setup reads are automatically balanced among healthy replicas to avoid increasing latency.\nClickHouse supports multi-master asynchronous replication and can be deployed across multiple datacenters. All nodes are equal, which allows avoiding having single points of failure. Downtime of a single node or the whole datacenter wont affect the systems availability for both reads and writes.\nClickHouse is simple and works out-of-the-box. It streamlines all your data processing: ingest all your structured data into the system and it becomes instantly available for building reports. SQL dialect allows expressing the desired result without involving any custom non-standard API that could be found in some alternative systems. 1 (16224204290372720939,13975393268888698430) ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all of your structured data into the system, and it is immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.\n:::::::\nClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all your structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.\n:::::::\nClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems. 3 (5044918525503962090,12338022931991160906) +ClickHouse uses all available hardware to its full potential to process each query as fast as possible. Peak processing performance for a single query stands at more than 2 terabytes per second (after decompression, only used columns). In distributed setup reads are automatically balanced among healthy replicas to avoid increasing latency.\nClickHouse supports multi-master asynchronous replication and can be deployed across multiple datacenters. All nodes are equal, which allows avoiding having single points of failure. Downtime of a single node or the whole datacenter wont affect the systems availability for both reads and writes.\nClickHouse is simple and works out-of-the-box. It streamlines all your data processing: ingest all your structured data into the system and it becomes instantly available for building reports. SQL dialect allows expressing the desired result without involving any custom non-standard API that could be found in some alternative systems. 1 (16224204290372720939,13975393268888698430) ClickHouse makes full use of all available hardware to process every request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (only used columns after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid single points of failure. Downtime for one site or the entire data center will not affect the system\'s read and write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they immediately become available for building reports. The SQL dialect allows you to express the desired result without resorting to any non-standard APIs that can be found in some alternative systems. 1 (18148981179837829400,6048943706095721476) +ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (used columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the system\'s read / write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they are immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems. 1 (18148981179837829400,14581416672396321264) wordShingleMinHashCaseInsensitive -ClickHouse makes full use of all available hardware to process every request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (only used columns after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid single points of failure. Downtime for one site or the entire data center will not affect the system\'s read and write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they immediately become available for building reports. The SQL dialect allows you to express the desired result without resorting to any non-standard APIs that can be found in some alternative systems. 1 (15504011608613565061,6048943706095721476) -ClickHouse uses all available hardware to its full potential to process each query as fast as possible. Peak processing performance for a single query stands at more than 2 terabytes per second (after decompression, only used columns). In distributed setup reads are automatically balanced among healthy replicas to avoid increasing latency.\nClickHouse supports multi-master asynchronous replication and can be deployed across multiple datacenters. All nodes are equal, which allows avoiding having single points of failure. Downtime of a single node or the whole datacenter wont affect the systems availability for both reads and writes.\nClickHouse is simple and works out-of-the-box. It streamlines all your data processing: ingest all your structured data into the system and it becomes instantly available for building reports. SQL dialect allows expressing the desired result without involving any custom non-standard API that could be found in some alternative systems. 1 (16224204290372720939,13975393268888698430) ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all of your structured data into the system, and it is immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.\n:::::::\nClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all your structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.\n:::::::\nClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems. 3 (5044918525503962090,3381836163833256482) +ClickHouse makes full use of all available hardware to process every request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (only used columns after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid single points of failure. Downtime for one site or the entire data center will not affect the system\'s read and write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they immediately become available for building reports. The SQL dialect allows you to express the desired result without resorting to any non-standard APIs that can be found in some alternative systems. 1 (15504011608613565061,6048943706095721476) ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (used columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the system\'s read / write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they are immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems. 1 (15504011608613565061,14581416672396321264) +ClickHouse uses all available hardware to its full potential to process each query as fast as possible. Peak processing performance for a single query stands at more than 2 terabytes per second (after decompression, only used columns). In distributed setup reads are automatically balanced among healthy replicas to avoid increasing latency.\nClickHouse supports multi-master asynchronous replication and can be deployed across multiple datacenters. All nodes are equal, which allows avoiding having single points of failure. Downtime of a single node or the whole datacenter wont affect the systems availability for both reads and writes.\nClickHouse is simple and works out-of-the-box. It streamlines all your data processing: ingest all your structured data into the system and it becomes instantly available for building reports. SQL dialect allows expressing the desired result without involving any custom non-standard API that could be found in some alternative systems. 1 (16224204290372720939,13975393268888698430) wordShingleMinHashUTF8 -ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (used columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the system\'s read / write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they are immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems. 1 (18148981179837829400,14581416672396321264) -ClickHouse uses all available hardware to its full potential to process each query as fast as possible. Peak processing performance for a single query stands at more than 2 terabytes per second (after decompression, only used columns). In distributed setup reads are automatically balanced among healthy replicas to avoid increasing latency.\nClickHouse supports multi-master asynchronous replication and can be deployed across multiple datacenters. All nodes are equal, which allows avoiding having single points of failure. Downtime of a single node or the whole datacenter wont affect the systems availability for both reads and writes.\nClickHouse is simple and works out-of-the-box. It streamlines all your data processing: ingest all your structured data into the system and it becomes instantly available for building reports. SQL dialect allows expressing the desired result without involving any custom non-standard API that could be found in some alternative systems. 1 (16224204290372720939,13975393268888698430) ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all of your structured data into the system, and it is immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.\n:::::::\nClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all your structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.\n:::::::\nClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems. 3 (5044918525503962090,12338022931991160906) -ClickHouse makes full use of all available hardware to process every request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (only used columns after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid single points of failure. Downtime for one site or the entire data center will not affect the system\'s read and write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they immediately become available for building reports. The SQL dialect allows you to express the desired result without resorting to any non-standard APIs that can be found in some alternative systems. 1 (18148981179837829400,6048943706095721476) -wordShingleMinHashCaseInsensitiveUTF8 -ClickHouse makes full use of all available hardware to process every request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (only used columns after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid single points of failure. Downtime for one site or the entire data center will not affect the system\'s read and write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they immediately become available for building reports. The SQL dialect allows you to express the desired result without resorting to any non-standard APIs that can be found in some alternative systems. 1 (15504011608613565061,6048943706095721476) ClickHouse uses all available hardware to its full potential to process each query as fast as possible. Peak processing performance for a single query stands at more than 2 terabytes per second (after decompression, only used columns). In distributed setup reads are automatically balanced among healthy replicas to avoid increasing latency.\nClickHouse supports multi-master asynchronous replication and can be deployed across multiple datacenters. All nodes are equal, which allows avoiding having single points of failure. Downtime of a single node or the whole datacenter wont affect the systems availability for both reads and writes.\nClickHouse is simple and works out-of-the-box. It streamlines all your data processing: ingest all your structured data into the system and it becomes instantly available for building reports. SQL dialect allows expressing the desired result without involving any custom non-standard API that could be found in some alternative systems. 1 (16224204290372720939,13975393268888698430) +ClickHouse makes full use of all available hardware to process every request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (only used columns after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid single points of failure. Downtime for one site or the entire data center will not affect the system\'s read and write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they immediately become available for building reports. The SQL dialect allows you to express the desired result without resorting to any non-standard APIs that can be found in some alternative systems. 1 (18148981179837829400,6048943706095721476) +ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (used columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the system\'s read / write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they are immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems. 1 (18148981179837829400,14581416672396321264) +wordShingleMinHashCaseInsensitiveUTF8 ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all of your structured data into the system, and it is immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.\n:::::::\nClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all your structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.\n:::::::\nClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems. 3 (5044918525503962090,3381836163833256482) +ClickHouse makes full use of all available hardware to process every request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (only used columns after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid single points of failure. Downtime for one site or the entire data center will not affect the system\'s read and write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they immediately become available for building reports. The SQL dialect allows you to express the desired result without resorting to any non-standard APIs that can be found in some alternative systems. 1 (15504011608613565061,6048943706095721476) ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (used columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the system\'s read / write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they are immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems. 1 (15504011608613565061,14581416672396321264) +ClickHouse uses all available hardware to its full potential to process each query as fast as possible. Peak processing performance for a single query stands at more than 2 terabytes per second (after decompression, only used columns). In distributed setup reads are automatically balanced among healthy replicas to avoid increasing latency.\nClickHouse supports multi-master asynchronous replication and can be deployed across multiple datacenters. All nodes are equal, which allows avoiding having single points of failure. Downtime of a single node or the whole datacenter wont affect the systems availability for both reads and writes.\nClickHouse is simple and works out-of-the-box. It streamlines all your data processing: ingest all your structured data into the system and it becomes instantly available for building reports. SQL dialect allows expressing the desired result without involving any custom non-standard API that could be found in some alternative systems. 1 (16224204290372720939,13975393268888698430) diff --git a/tests/queries/0_stateless/01016_simhash_minhash.sql b/tests/queries/0_stateless/01016_simhash_minhash.sql index 01af9451381..1e77b487851 100644 --- a/tests/queries/0_stateless/01016_simhash_minhash.sql +++ b/tests/queries/0_stateless/01016_simhash_minhash.sql @@ -75,38 +75,38 @@ SELECT 'uniqExact', uniqExact(s) FROM defaults; SELECT 'ngramSimHash'; -SELECT arrayStringConcat(groupArray(s), '\n:::::::\n'), count(), ngramSimHash(s) as h FROM defaults GROUP BY h; +SELECT arrayStringConcat(groupArray(s), '\n:::::::\n'), count(), ngramSimHash(s) as h FROM defaults GROUP BY h ORDER BY h; SELECT 'ngramSimHashCaseInsensitive'; -SELECT arrayStringConcat(groupArray(s), '\n:::::::\n'), count(), ngramSimHashCaseInsensitive(s) as h FROM defaults GROUP BY h; +SELECT arrayStringConcat(groupArray(s), '\n:::::::\n'), count(), ngramSimHashCaseInsensitive(s) as h FROM defaults GROUP BY h ORDER BY h; SELECT 'ngramSimHashUTF8'; -SELECT arrayStringConcat(groupArray(s), '\n:::::::\n'), count(), ngramSimHashUTF8(s) as h FROM defaults GROUP BY h; +SELECT arrayStringConcat(groupArray(s), '\n:::::::\n'), count(), ngramSimHashUTF8(s) as h FROM defaults GROUP BY h ORDER BY h; SELECT 'ngramSimHashCaseInsensitiveUTF8'; -SELECT arrayStringConcat(groupArray(s), '\n:::::::\n'), count(), ngramSimHashCaseInsensitiveUTF8(s) as h FROM defaults GROUP BY h; +SELECT arrayStringConcat(groupArray(s), '\n:::::::\n'), count(), ngramSimHashCaseInsensitiveUTF8(s) as h FROM defaults GROUP BY h ORDER BY h; SELECT 'wordShingleSimHash'; -SELECT arrayStringConcat(groupArray(s), '\n:::::::\n'), count(), wordShingleSimHash(s, 2) as h FROM defaults GROUP BY h; +SELECT arrayStringConcat(groupArray(s), '\n:::::::\n'), count(), wordShingleSimHash(s, 2) as h FROM defaults GROUP BY h ORDER BY h; SELECT 'wordShingleSimHashCaseInsensitive'; -SELECT arrayStringConcat(groupArray(s), '\n:::::::\n'), count(), wordShingleSimHashCaseInsensitive(s, 2) as h FROM defaults GROUP BY h; +SELECT arrayStringConcat(groupArray(s), '\n:::::::\n'), count(), wordShingleSimHashCaseInsensitive(s, 2) as h FROM defaults GROUP BY h ORDER BY h; SELECT 'wordShingleSimHashUTF8'; -SELECT arrayStringConcat(groupArray(s), '\n:::::::\n'), count(), wordShingleSimHashUTF8(s, 2) as h FROM defaults GROUP BY h; +SELECT arrayStringConcat(groupArray(s), '\n:::::::\n'), count(), wordShingleSimHashUTF8(s, 2) as h FROM defaults GROUP BY h ORDER BY h; SELECT 'wordShingleSimHashCaseInsensitiveUTF8'; -SELECT arrayStringConcat(groupArray(s), '\n:::::::\n'), count(), wordShingleSimHashCaseInsensitiveUTF8(s, 2) as h FROM defaults GROUP BY h; +SELECT arrayStringConcat(groupArray(s), '\n:::::::\n'), count(), wordShingleSimHashCaseInsensitiveUTF8(s, 2) as h FROM defaults GROUP BY h ORDER BY h; SELECT 'ngramMinHash'; -SELECT arrayStringConcat(groupArray(s), '\n:::::::\n'), count(), ngramMinHash(s) as h FROM defaults GROUP BY h; +SELECT arrayStringConcat(groupArray(s), '\n:::::::\n'), count(), ngramMinHash(s) as h FROM defaults GROUP BY h ORDER BY h; SELECT 'ngramMinHashCaseInsensitive'; -SELECT arrayStringConcat(groupArray(s), '\n:::::::\n'), count(), ngramMinHashCaseInsensitive(s) as h FROM defaults GROUP BY h; +SELECT arrayStringConcat(groupArray(s), '\n:::::::\n'), count(), ngramMinHashCaseInsensitive(s) as h FROM defaults GROUP BY h ORDER BY h; SELECT 'ngramMinHashUTF8'; -SELECT arrayStringConcat(groupArray(s), '\n:::::::\n'), count(), ngramMinHashUTF8(s) as h FROM defaults GROUP BY h; +SELECT arrayStringConcat(groupArray(s), '\n:::::::\n'), count(), ngramMinHashUTF8(s) as h FROM defaults GROUP BY h ORDER BY h; SELECT 'ngramMinHashCaseInsensitiveUTF8'; -SELECT arrayStringConcat(groupArray(s), '\n:::::::\n'), count(), ngramMinHashCaseInsensitiveUTF8(s) as h FROM defaults GROUP BY h; +SELECT arrayStringConcat(groupArray(s), '\n:::::::\n'), count(), ngramMinHashCaseInsensitiveUTF8(s) as h FROM defaults GROUP BY h ORDER BY h; SELECT 'wordShingleMinHash'; -SELECT arrayStringConcat(groupArray(s), '\n:::::::\n'), count(), wordShingleMinHash(s, 2, 3) as h FROM defaults GROUP BY h; +SELECT arrayStringConcat(groupArray(s), '\n:::::::\n'), count(), wordShingleMinHash(s, 2, 3) as h FROM defaults GROUP BY h ORDER BY h; SELECT 'wordShingleMinHashCaseInsensitive'; -SELECT arrayStringConcat(groupArray(s), '\n:::::::\n'), count(), wordShingleMinHashCaseInsensitive(s, 2, 3) as h FROM defaults GROUP BY h; +SELECT arrayStringConcat(groupArray(s), '\n:::::::\n'), count(), wordShingleMinHashCaseInsensitive(s, 2, 3) as h FROM defaults GROUP BY h ORDER BY h; SELECT 'wordShingleMinHashUTF8'; -SELECT arrayStringConcat(groupArray(s), '\n:::::::\n'), count(), wordShingleMinHashUTF8(s, 2, 3) as h FROM defaults GROUP BY h; +SELECT arrayStringConcat(groupArray(s), '\n:::::::\n'), count(), wordShingleMinHashUTF8(s, 2, 3) as h FROM defaults GROUP BY h ORDER BY h; SELECT 'wordShingleMinHashCaseInsensitiveUTF8'; -SELECT arrayStringConcat(groupArray(s), '\n:::::::\n'), count(), wordShingleMinHashCaseInsensitiveUTF8(s, 2, 3) as h FROM defaults GROUP BY h; +SELECT arrayStringConcat(groupArray(s), '\n:::::::\n'), count(), wordShingleMinHashCaseInsensitiveUTF8(s, 2, 3) as h FROM defaults GROUP BY h ORDER BY h; SELECT wordShingleSimHash('foobar', 9223372036854775807); -- { serverError 69 } SELECT wordShingleSimHash('foobar', 1001); -- { serverError 69 } diff --git a/tests/queries/0_stateless/01017_uniqCombined_memory_usage.sql b/tests/queries/0_stateless/01017_uniqCombined_memory_usage.sql index d47dc6b8d5f..69bd15e3f54 100644 --- a/tests/queries/0_stateless/01017_uniqCombined_memory_usage.sql +++ b/tests/queries/0_stateless/01017_uniqCombined_memory_usage.sql @@ -1,4 +1,4 @@ --- Tags: no-tsan, no-asan, no-msan, no-replicated-database +-- Tags: no-tsan, no-asan, no-msan, no-replicated-database, no-random-settings -- Tag no-tsan: Fine thresholds on memory usage -- Tag no-asan: Fine thresholds on memory usage -- Tag no-msan: Fine thresholds on memory usage @@ -7,6 +7,8 @@ -- sizeof(HLL) is (2^K * 6 / 8) -- hence max_memory_usage for 100 rows = (96<<10)*100 = 9830400 +SET use_uncompressed_cache = 0; + -- HashTable for UInt32 (used until (1<<13) elements), hence 8192 elements SELECT 'UInt32'; SET max_memory_usage = 4000000; @@ -19,6 +21,8 @@ SELECT 'UInt64'; SET max_memory_usage = 4000000; SELECT sum(u) FROM (SELECT intDiv(number, 4096) AS k, uniqCombined(reinterpretAsString(number % 4096)) u FROM numbers(4096 * 100) GROUP BY k); -- { serverError 241 } SET max_memory_usage = 9830400; + + SELECT sum(u) FROM (SELECT intDiv(number, 4096) AS k, uniqCombined(reinterpretAsString(number % 4096)) u FROM numbers(4096 * 100) GROUP BY k); SELECT 'K=16'; diff --git a/tests/queries/0_stateless/01020_function_array_compact.sql b/tests/queries/0_stateless/01020_function_array_compact.sql index d4aaa4d3fca..29adb007dc4 100644 --- a/tests/queries/0_stateless/01020_function_array_compact.sql +++ b/tests/queries/0_stateless/01020_function_array_compact.sql @@ -7,5 +7,5 @@ select arrayCompact([1,1,2]); select arrayCompact([1,2,1]); select arrayCompact([2,1,1]); select arrayCompact([1,2,2,3,3,3,4,4,4,4,5,5,5,5,5]); -SELECT arrayCompact(x->0, [NULL]); -SELECT toString(arrayCompact(x->0, [NULL])); +SELECT arrayCompact(arrayMap(x->0, [NULL])); +SELECT toString(arrayCompact(arrayMap(x->0, [NULL]))); diff --git a/tests/queries/0_stateless/01025_array_compact_generic.reference b/tests/queries/0_stateless/01025_array_compact_generic.reference index d95e269cd3f..572c7ee140c 100644 --- a/tests/queries/0_stateless/01025_array_compact_generic.reference +++ b/tests/queries/0_stateless/01025_array_compact_generic.reference @@ -15,3 +15,6 @@ ['0','1','2'] ['0','1','2'] ['0','1','2'] +[(0,0),(3,1),(6,2),(9,0)] +[('0','0'),('3','1'),('6','2'),('9','0')] +[('0',0),('3',1),('6',2),('9',0)] diff --git a/tests/queries/0_stateless/01025_array_compact_generic.sql b/tests/queries/0_stateless/01025_array_compact_generic.sql index bea39bfbd44..4446d10e9d4 100644 --- a/tests/queries/0_stateless/01025_array_compact_generic.sql +++ b/tests/queries/0_stateless/01025_array_compact_generic.sql @@ -5,4 +5,7 @@ SELECT arrayCompact([1, 1, NULL, NULL, 2, 2, 2]); SELECT arrayCompact([1, 1, NULL, NULL, nan, nan, 2, 2, 2]); SELECT arrayCompact(['hello', '', '', '', 'world', 'world']); SELECT arrayCompact([[[]], [[], []], [[], []], [[]]]); -SELECT arrayCompact(x -> toString(intDiv(x, 3)), range(number)) FROM numbers(10); +SELECT arrayCompact(arrayMap(x -> toString(intDiv(x, 3)), range(number))) FROM numbers(10); +SELECT arrayCompact(x -> x.2, groupArray((number, intDiv(number, 3) % 3))) FROM numbers(10); +SELECT arrayCompact(x -> x.2, groupArray((toString(number), toString(intDiv(number, 3) % 3)))) FROM numbers(10); +SELECT arrayCompact(x -> x.2, groupArray((toString(number), intDiv(number, 3) % 3))) FROM numbers(10); diff --git a/tests/queries/0_stateless/01034_prewhere_max_parallel_replicas_distributed.sql b/tests/queries/0_stateless/01034_prewhere_max_parallel_replicas_distributed.sql index 4eea4fd47c7..6d1c7fd5ef6 100644 --- a/tests/queries/0_stateless/01034_prewhere_max_parallel_replicas_distributed.sql +++ b/tests/queries/0_stateless/01034_prewhere_max_parallel_replicas_distributed.sql @@ -1,5 +1,7 @@ -- Tags: replica, distributed +set allow_experimental_parallel_reading_from_replicas=0; + drop table if exists test_max_parallel_replicas_lr; -- If you wonder why the table is named with "_lr" suffix in this test. diff --git a/tests/queries/0_stateless/01034_sample_final_distributed.sql b/tests/queries/0_stateless/01034_sample_final_distributed.sql index b784b35cbb3..a81fef645db 100644 --- a/tests/queries/0_stateless/01034_sample_final_distributed.sql +++ b/tests/queries/0_stateless/01034_sample_final_distributed.sql @@ -1,5 +1,7 @@ -- Tags: distributed +set allow_experimental_parallel_reading_from_replicas = 0; + drop table if exists sample_final; create table sample_final (CounterID UInt32, EventDate Date, EventTime DateTime, UserID UInt64, Sign Int8) engine = CollapsingMergeTree(Sign) order by (CounterID, EventDate, intHash32(UserID), EventTime) sample by intHash32(UserID); insert into sample_final select number / (8192 * 4), toDate('2019-01-01'), toDateTime('2019-01-01 00:00:01') + number, number / (8192 * 2), number % 3 = 1 ? -1 : 1 from numbers(1000000); diff --git a/tests/queries/0_stateless/01048_window_view_parser.reference b/tests/queries/0_stateless/01048_window_view_parser.reference index c055971bef3..947b68c3a89 100644 --- a/tests/queries/0_stateless/01048_window_view_parser.reference +++ b/tests/queries/0_stateless/01048_window_view_parser.reference @@ -1,34 +1,34 @@ ---TUMBLE--- ||---WINDOW COLUMN NAME--- -CREATE TABLE test_01048.`.inner.wv`\n(\n `windowID(timestamp, toIntervalSecond(1))` UInt32,\n `count(a)` AggregateFunction(count, Int32)\n)\nENGINE = AggregatingMergeTree\nPRIMARY KEY `windowID(timestamp, toIntervalSecond(1))`\nORDER BY `windowID(timestamp, toIntervalSecond(1))`\nSETTINGS index_granularity = 8192 +CREATE TABLE test_01048.`.inner.wv`\n(\n `windowID(timestamp, toIntervalSecond(1))` UInt32,\n `count(a)` AggregateFunction(count, Int32)\n)\nENGINE = AggregatingMergeTree\nORDER BY `windowID(timestamp, toIntervalSecond(1))`\nSETTINGS index_granularity = 8192 ||---WINDOW COLUMN ALIAS--- -CREATE TABLE test_01048.`.inner.wv`\n(\n `windowID(timestamp, toIntervalSecond(\'1\'))` UInt32,\n `count(a)` AggregateFunction(count, Int32)\n)\nENGINE = AggregatingMergeTree\nPRIMARY KEY `windowID(timestamp, toIntervalSecond(\'1\'))`\nORDER BY `windowID(timestamp, toIntervalSecond(\'1\'))`\nSETTINGS index_granularity = 8192 +CREATE TABLE test_01048.`.inner.wv`\n(\n `windowID(timestamp, toIntervalSecond(\'1\'))` UInt32,\n `count(a)` AggregateFunction(count, Int32)\n)\nENGINE = AggregatingMergeTree\nORDER BY `windowID(timestamp, toIntervalSecond(\'1\'))`\nSETTINGS index_granularity = 8192 ||---IDENTIFIER--- -CREATE TABLE test_01048.`.inner.wv`\n(\n `b` Int32,\n `windowID(timestamp, toIntervalSecond(\'1\'))` UInt32,\n `count(a)` AggregateFunction(count, Int32)\n)\nENGINE = AggregatingMergeTree\nPRIMARY KEY `windowID(timestamp, toIntervalSecond(\'1\'))`\nORDER BY `windowID(timestamp, toIntervalSecond(\'1\'))`\nSETTINGS index_granularity = 8192 -CREATE TABLE test_01048.`.inner.wv`\n(\n `windowID(timestamp, toIntervalSecond(\'1\'))` UInt32,\n `b` Int32,\n `count(a)` AggregateFunction(count, Int32)\n)\nENGINE = AggregatingMergeTree\nPRIMARY KEY `windowID(timestamp, toIntervalSecond(\'1\'))`\nORDER BY `windowID(timestamp, toIntervalSecond(\'1\'))`\nSETTINGS index_granularity = 8192 +CREATE TABLE test_01048.`.inner.wv`\n(\n `b` Int32,\n `windowID(timestamp, toIntervalSecond(\'1\'))` UInt32,\n `count(a)` AggregateFunction(count, Int32)\n)\nENGINE = AggregatingMergeTree\nORDER BY (b, `windowID(timestamp, toIntervalSecond(\'1\'))`)\nSETTINGS index_granularity = 8192 +CREATE TABLE test_01048.`.inner.wv`\n(\n `windowID(timestamp, toIntervalSecond(\'1\'))` UInt32,\n `b` Int32,\n `count(a)` AggregateFunction(count, Int32)\n)\nENGINE = AggregatingMergeTree\nORDER BY (`windowID(timestamp, toIntervalSecond(\'1\'))`, b)\nSETTINGS index_granularity = 8192 ||---FUNCTION--- -CREATE TABLE test_01048.`.inner.wv`\n(\n `plus(a, b)` Int64,\n `windowID(timestamp, toIntervalSecond(\'1\'))` UInt32,\n `count(a)` AggregateFunction(count, Int32)\n)\nENGINE = AggregatingMergeTree\nPRIMARY KEY `windowID(timestamp, toIntervalSecond(\'1\'))`\nORDER BY `windowID(timestamp, toIntervalSecond(\'1\'))`\nSETTINGS index_granularity = 8192 -CREATE TABLE test_01048.`.inner.wv`\n(\n `windowID(timestamp, toIntervalSecond(\'1\'))` UInt32,\n `plus(a, b)` Int64,\n `count(a)` AggregateFunction(count, Int32)\n)\nENGINE = AggregatingMergeTree\nPRIMARY KEY `windowID(timestamp, toIntervalSecond(\'1\'))`\nORDER BY `windowID(timestamp, toIntervalSecond(\'1\'))`\nSETTINGS index_granularity = 8192 +CREATE TABLE test_01048.`.inner.wv`\n(\n `plus(a, b)` Int64,\n `windowID(timestamp, toIntervalSecond(\'1\'))` UInt32,\n `count(a)` AggregateFunction(count, Int32)\n)\nENGINE = AggregatingMergeTree\nORDER BY (`plus(a, b)`, `windowID(timestamp, toIntervalSecond(\'1\'))`)\nSETTINGS index_granularity = 8192 +CREATE TABLE test_01048.`.inner.wv`\n(\n `windowID(timestamp, toIntervalSecond(\'1\'))` UInt32,\n `plus(a, b)` Int64,\n `count(a)` AggregateFunction(count, Int32)\n)\nENGINE = AggregatingMergeTree\nORDER BY (`windowID(timestamp, toIntervalSecond(\'1\'))`, `plus(a, b)`)\nSETTINGS index_granularity = 8192 ||---TimeZone--- -CREATE TABLE test_01048.`.inner.wv`\n(\n `windowID(timestamp, toIntervalSecond(\'1\'), \'Asia/Shanghai\')` UInt32,\n `count(a)` AggregateFunction(count, Int32)\n)\nENGINE = AggregatingMergeTree\nPRIMARY KEY `windowID(timestamp, toIntervalSecond(\'1\'), \'Asia/Shanghai\')`\nORDER BY `windowID(timestamp, toIntervalSecond(\'1\'), \'Asia/Shanghai\')`\nSETTINGS index_granularity = 8192 +CREATE TABLE test_01048.`.inner.wv`\n(\n `windowID(timestamp, toIntervalSecond(\'1\'), \'Asia/Shanghai\')` UInt32,\n `count(a)` AggregateFunction(count, Int32)\n)\nENGINE = AggregatingMergeTree\nORDER BY `windowID(timestamp, toIntervalSecond(\'1\'), \'Asia/Shanghai\')`\nSETTINGS index_granularity = 8192 ||---DATA COLUMN ALIAS--- -CREATE TABLE test_01048.`.inner.wv`\n(\n `b` Int32,\n `windowID(timestamp, toIntervalSecond(\'1\'))` UInt32,\n `count(a)` AggregateFunction(count, Int32)\n)\nENGINE = AggregatingMergeTree\nPRIMARY KEY `windowID(timestamp, toIntervalSecond(\'1\'))`\nORDER BY `windowID(timestamp, toIntervalSecond(\'1\'))`\nSETTINGS index_granularity = 8192 +CREATE TABLE test_01048.`.inner.wv`\n(\n `b` Int32,\n `windowID(timestamp, toIntervalSecond(\'1\'))` UInt32,\n `count(a)` AggregateFunction(count, Int32)\n)\nENGINE = AggregatingMergeTree\nORDER BY (b, `windowID(timestamp, toIntervalSecond(\'1\'))`)\nSETTINGS index_granularity = 8192 ||---JOIN--- -CREATE TABLE test_01048.`.inner.wv`\n(\n `windowID(timestamp, toIntervalSecond(\'1\'))` UInt32,\n `count(a)` AggregateFunction(count, Int32),\n `count(mt_2.b)` AggregateFunction(count, Int32)\n)\nENGINE = AggregatingMergeTree\nPRIMARY KEY `windowID(timestamp, toIntervalSecond(\'1\'))`\nORDER BY `windowID(timestamp, toIntervalSecond(\'1\'))`\nSETTINGS index_granularity = 8192 +CREATE TABLE test_01048.`.inner.wv`\n(\n `windowID(timestamp, toIntervalSecond(\'1\'))` UInt32,\n `count(a)` AggregateFunction(count, Int32),\n `count(mt_2.b)` AggregateFunction(count, Int32)\n)\nENGINE = AggregatingMergeTree\nORDER BY `windowID(timestamp, toIntervalSecond(\'1\'))`\nSETTINGS index_granularity = 8192 ---HOP--- ||---WINDOW COLUMN NAME--- -CREATE TABLE test_01048.`.inner.wv`\n(\n `windowID(timestamp, toIntervalSecond(1), toIntervalSecond(3))` UInt32,\n `count(a)` AggregateFunction(count, Int32)\n)\nENGINE = AggregatingMergeTree\nPRIMARY KEY `windowID(timestamp, toIntervalSecond(1), toIntervalSecond(3))`\nORDER BY `windowID(timestamp, toIntervalSecond(1), toIntervalSecond(3))`\nSETTINGS index_granularity = 8192 +CREATE TABLE test_01048.`.inner.wv`\n(\n `windowID(timestamp, toIntervalSecond(1), toIntervalSecond(3))` UInt32,\n `count(a)` AggregateFunction(count, Int32)\n)\nENGINE = AggregatingMergeTree\nORDER BY `windowID(timestamp, toIntervalSecond(1), toIntervalSecond(3))`\nSETTINGS index_granularity = 8192 ||---WINDOW COLUMN ALIAS--- -CREATE TABLE test_01048.`.inner.wv`\n(\n `windowID(timestamp, toIntervalSecond(\'1\'), toIntervalSecond(\'3\'))` UInt32,\n `count(a)` AggregateFunction(count, Int32)\n)\nENGINE = AggregatingMergeTree\nPRIMARY KEY `windowID(timestamp, toIntervalSecond(\'1\'), toIntervalSecond(\'3\'))`\nORDER BY `windowID(timestamp, toIntervalSecond(\'1\'), toIntervalSecond(\'3\'))`\nSETTINGS index_granularity = 8192 +CREATE TABLE test_01048.`.inner.wv`\n(\n `windowID(timestamp, toIntervalSecond(\'1\'), toIntervalSecond(\'3\'))` UInt32,\n `count(a)` AggregateFunction(count, Int32)\n)\nENGINE = AggregatingMergeTree\nORDER BY `windowID(timestamp, toIntervalSecond(\'1\'), toIntervalSecond(\'3\'))`\nSETTINGS index_granularity = 8192 ||---IDENTIFIER--- -CREATE TABLE test_01048.`.inner.wv`\n(\n `b` Int32,\n `windowID(timestamp, toIntervalSecond(\'1\'), toIntervalSecond(\'3\'))` UInt32,\n `count(a)` AggregateFunction(count, Int32)\n)\nENGINE = AggregatingMergeTree\nPRIMARY KEY `windowID(timestamp, toIntervalSecond(\'1\'), toIntervalSecond(\'3\'))`\nORDER BY `windowID(timestamp, toIntervalSecond(\'1\'), toIntervalSecond(\'3\'))`\nSETTINGS index_granularity = 8192 -CREATE TABLE test_01048.`.inner.wv`\n(\n `windowID(timestamp, toIntervalSecond(\'1\'), toIntervalSecond(\'3\'))` UInt32,\n `b` Int32,\n `count(a)` AggregateFunction(count, Int32)\n)\nENGINE = AggregatingMergeTree\nPRIMARY KEY `windowID(timestamp, toIntervalSecond(\'1\'), toIntervalSecond(\'3\'))`\nORDER BY `windowID(timestamp, toIntervalSecond(\'1\'), toIntervalSecond(\'3\'))`\nSETTINGS index_granularity = 8192 +CREATE TABLE test_01048.`.inner.wv`\n(\n `b` Int32,\n `windowID(timestamp, toIntervalSecond(\'1\'), toIntervalSecond(\'3\'))` UInt32,\n `count(a)` AggregateFunction(count, Int32)\n)\nENGINE = AggregatingMergeTree\nORDER BY (b, `windowID(timestamp, toIntervalSecond(\'1\'), toIntervalSecond(\'3\'))`)\nSETTINGS index_granularity = 8192 +CREATE TABLE test_01048.`.inner.wv`\n(\n `windowID(timestamp, toIntervalSecond(\'1\'), toIntervalSecond(\'3\'))` UInt32,\n `b` Int32,\n `count(a)` AggregateFunction(count, Int32)\n)\nENGINE = AggregatingMergeTree\nORDER BY (`windowID(timestamp, toIntervalSecond(\'1\'), toIntervalSecond(\'3\'))`, b)\nSETTINGS index_granularity = 8192 ||---FUNCTION--- -CREATE TABLE test_01048.`.inner.wv`\n(\n `plus(a, b)` Int64,\n `windowID(timestamp, toIntervalSecond(\'1\'), toIntervalSecond(\'3\'))` UInt32,\n `count(a)` AggregateFunction(count, Int32)\n)\nENGINE = AggregatingMergeTree\nPRIMARY KEY `windowID(timestamp, toIntervalSecond(\'1\'), toIntervalSecond(\'3\'))`\nORDER BY `windowID(timestamp, toIntervalSecond(\'1\'), toIntervalSecond(\'3\'))`\nSETTINGS index_granularity = 8192 -CREATE TABLE test_01048.`.inner.wv`\n(\n `windowID(timestamp, toIntervalSecond(\'1\'), toIntervalSecond(\'3\'))` UInt32,\n `plus(a, b)` Int64,\n `count(a)` AggregateFunction(count, Int32)\n)\nENGINE = AggregatingMergeTree\nPRIMARY KEY `windowID(timestamp, toIntervalSecond(\'1\'), toIntervalSecond(\'3\'))`\nORDER BY `windowID(timestamp, toIntervalSecond(\'1\'), toIntervalSecond(\'3\'))`\nSETTINGS index_granularity = 8192 +CREATE TABLE test_01048.`.inner.wv`\n(\n `plus(a, b)` Int64,\n `windowID(timestamp, toIntervalSecond(\'1\'), toIntervalSecond(\'3\'))` UInt32,\n `count(a)` AggregateFunction(count, Int32)\n)\nENGINE = AggregatingMergeTree\nORDER BY (`plus(a, b)`, `windowID(timestamp, toIntervalSecond(\'1\'), toIntervalSecond(\'3\'))`)\nSETTINGS index_granularity = 8192 +CREATE TABLE test_01048.`.inner.wv`\n(\n `windowID(timestamp, toIntervalSecond(\'1\'), toIntervalSecond(\'3\'))` UInt32,\n `plus(a, b)` Int64,\n `count(a)` AggregateFunction(count, Int32)\n)\nENGINE = AggregatingMergeTree\nORDER BY (`windowID(timestamp, toIntervalSecond(\'1\'), toIntervalSecond(\'3\'))`, `plus(a, b)`)\nSETTINGS index_granularity = 8192 ||---TimeZone--- -CREATE TABLE test_01048.`.inner.wv`\n(\n `windowID(timestamp, toIntervalSecond(1), toIntervalSecond(3), \'Asia/Shanghai\')` UInt32,\n `count(a)` AggregateFunction(count, Int32)\n)\nENGINE = AggregatingMergeTree\nPRIMARY KEY `windowID(timestamp, toIntervalSecond(1), toIntervalSecond(3), \'Asia/Shanghai\')`\nORDER BY `windowID(timestamp, toIntervalSecond(1), toIntervalSecond(3), \'Asia/Shanghai\')`\nSETTINGS index_granularity = 8192 +CREATE TABLE test_01048.`.inner.wv`\n(\n `windowID(timestamp, toIntervalSecond(1), toIntervalSecond(3), \'Asia/Shanghai\')` UInt32,\n `count(a)` AggregateFunction(count, Int32)\n)\nENGINE = AggregatingMergeTree\nORDER BY `windowID(timestamp, toIntervalSecond(1), toIntervalSecond(3), \'Asia/Shanghai\')`\nSETTINGS index_granularity = 8192 ||---DATA COLUMN ALIAS--- -CREATE TABLE test_01048.`.inner.wv`\n(\n `b` Int32,\n `windowID(timestamp, toIntervalSecond(\'1\'), toIntervalSecond(\'3\'))` UInt32,\n `count(a)` AggregateFunction(count, Int32)\n)\nENGINE = AggregatingMergeTree\nPRIMARY KEY `windowID(timestamp, toIntervalSecond(\'1\'), toIntervalSecond(\'3\'))`\nORDER BY `windowID(timestamp, toIntervalSecond(\'1\'), toIntervalSecond(\'3\'))`\nSETTINGS index_granularity = 8192 +CREATE TABLE test_01048.`.inner.wv`\n(\n `b` Int32,\n `windowID(timestamp, toIntervalSecond(\'1\'), toIntervalSecond(\'3\'))` UInt32,\n `count(a)` AggregateFunction(count, Int32)\n)\nENGINE = AggregatingMergeTree\nORDER BY (b, `windowID(timestamp, toIntervalSecond(\'1\'), toIntervalSecond(\'3\'))`)\nSETTINGS index_granularity = 8192 ||---JOIN--- -CREATE TABLE test_01048.`.inner.wv`\n(\n `windowID(timestamp, toIntervalSecond(\'1\'), toIntervalSecond(\'3\'))` UInt32,\n `count(a)` AggregateFunction(count, Int32),\n `count(mt_2.b)` AggregateFunction(count, Int32)\n)\nENGINE = AggregatingMergeTree\nPRIMARY KEY `windowID(timestamp, toIntervalSecond(\'1\'), toIntervalSecond(\'3\'))`\nORDER BY `windowID(timestamp, toIntervalSecond(\'1\'), toIntervalSecond(\'3\'))`\nSETTINGS index_granularity = 8192 +CREATE TABLE test_01048.`.inner.wv`\n(\n `windowID(timestamp, toIntervalSecond(\'1\'), toIntervalSecond(\'3\'))` UInt32,\n `count(a)` AggregateFunction(count, Int32),\n `count(mt_2.b)` AggregateFunction(count, Int32)\n)\nENGINE = AggregatingMergeTree\nORDER BY `windowID(timestamp, toIntervalSecond(\'1\'), toIntervalSecond(\'3\'))`\nSETTINGS index_granularity = 8192 diff --git a/tests/queries/0_stateless/01056_predicate_optimizer_bugs.sql b/tests/queries/0_stateless/01056_predicate_optimizer_bugs.sql index d59b8fc30ac..6d2bb2964d6 100644 --- a/tests/queries/0_stateless/01056_predicate_optimizer_bugs.sql +++ b/tests/queries/0_stateless/01056_predicate_optimizer_bugs.sql @@ -1,5 +1,6 @@ SET enable_optimize_predicate_expression = 1; SET joined_subquery_requires_alias = 0; +SET convert_query_to_cnf = 0; -- https://github.com/ClickHouse/ClickHouse/issues/3885 -- https://github.com/ClickHouse/ClickHouse/issues/5485 diff --git a/tests/queries/0_stateless/01060_avro.reference b/tests/queries/0_stateless/01060_avro.reference index 224a369d993..a375ae280a9 100644 --- a/tests/queries/0_stateless/01060_avro.reference +++ b/tests/queries/0_stateless/01060_avro.reference @@ -42,6 +42,7 @@ not compatible = compression 1000 1000 +1000 = other 0 1000 diff --git a/tests/queries/0_stateless/01060_avro.sh b/tests/queries/0_stateless/01060_avro.sh index 1cfe5582d0a..3c70927db25 100755 --- a/tests/queries/0_stateless/01060_avro.sh +++ b/tests/queries/0_stateless/01060_avro.sh @@ -50,8 +50,12 @@ echo '=' compression cat "$DATA_DIR"/simple.null.avro | ${CLICKHOUSE_LOCAL} --input-format Avro --output-format CSV -S 'a Int64' -q 'select count() from table' cat "$DATA_DIR"/simple.deflate.avro | ${CLICKHOUSE_LOCAL} --input-format Avro --output-format CSV -S 'a Int64' -q 'select count() from table' -#snappy is optional -#cat $DATA_DIR/simple.snappy.avro | ${CLICKHOUSE_LOCAL} --input-format Avro --output-format CSV -S 'a Int64' -q 'select count() from table' +# snappy is optional +if [ "$( ${CLICKHOUSE_LOCAL} -q "SELECT value FROM system.build_options where name = 'USE_SNAPPY' LIMIT 1")" == "1" ]; then +cat $DATA_DIR/simple.snappy.avro | ${CLICKHOUSE_LOCAL} --input-format Avro --output-format CSV -S 'a Int64' -q 'select count() from table' +else +echo 1000 +fi echo '=' other #no data diff --git a/tests/queries/0_stateless/01060_window_view_event_tumble_to_asc.reference b/tests/queries/0_stateless/01060_window_view_event_tumble_to_asc.reference index 55c1ee45827..de722f47f08 100644 --- a/tests/queries/0_stateless/01060_window_view_event_tumble_to_asc.reference +++ b/tests/queries/0_stateless/01060_window_view_event_tumble_to_asc.reference @@ -1,3 +1,7 @@ -3 1990-01-01 12:00:05 -2 1990-01-01 12:00:10 -2 1990-01-01 12:00:15 +1 1 1990-01-01 12:00:05 +1 2 1990-01-01 12:00:05 +1 3 1990-01-01 12:00:05 +1 4 1990-01-01 12:00:10 +1 5 1990-01-01 12:00:10 +1 6 1990-01-01 12:00:15 +1 7 1990-01-01 12:00:15 diff --git a/tests/queries/0_stateless/01060_window_view_event_tumble_to_asc.sh b/tests/queries/0_stateless/01060_window_view_event_tumble_to_asc.sh index e570f405f62..9163fe8af27 100755 --- a/tests/queries/0_stateless/01060_window_view_event_tumble_to_asc.sh +++ b/tests/queries/0_stateless/01060_window_view_event_tumble_to_asc.sh @@ -10,25 +10,25 @@ DROP TABLE IF EXISTS mt; DROP TABLE IF EXISTS dst; DROP TABLE IF EXISTS wv; -CREATE TABLE dst(count UInt64, w_end DateTime) Engine=MergeTree ORDER BY tuple(); -CREATE TABLE mt(a Int32, timestamp DateTime) ENGINE=MergeTree ORDER BY tuple(); -CREATE WINDOW VIEW wv TO dst WATERMARK=ASCENDING AS SELECT count(a) AS count, tumbleEnd(wid) AS w_end FROM mt GROUP BY tumble(timestamp, INTERVAL '5' SECOND, 'US/Samoa') AS wid; +CREATE TABLE dst(count UInt64, market Int32, w_end DateTime) Engine=MergeTree ORDER BY tuple(); +CREATE TABLE mt(a Int32, market Int32, timestamp DateTime) ENGINE=MergeTree ORDER BY tuple(); +CREATE WINDOW VIEW wv TO dst WATERMARK=ASCENDING AS SELECT count(a) AS count, market, tumbleEnd(wid) AS w_end FROM mt GROUP BY tumble(timestamp, INTERVAL '5' SECOND, 'US/Samoa') AS wid, market; -INSERT INTO mt VALUES (1, '1990/01/01 12:00:00'); -INSERT INTO mt VALUES (1, '1990/01/01 12:00:01'); -INSERT INTO mt VALUES (1, '1990/01/01 12:00:02'); -INSERT INTO mt VALUES (1, '1990/01/01 12:00:05'); -INSERT INTO mt VALUES (1, '1990/01/01 12:00:06'); -INSERT INTO mt VALUES (1, '1990/01/01 12:00:10'); -INSERT INTO mt VALUES (1, '1990/01/01 12:00:11'); -INSERT INTO mt VALUES (1, '1990/01/01 12:00:30'); +INSERT INTO mt VALUES (1, 1, '1990/01/01 12:00:00'); +INSERT INTO mt VALUES (1, 2, '1990/01/01 12:00:01'); +INSERT INTO mt VALUES (1, 3, '1990/01/01 12:00:02'); +INSERT INTO mt VALUES (1, 4, '1990/01/01 12:00:05'); +INSERT INTO mt VALUES (1, 5, '1990/01/01 12:00:06'); +INSERT INTO mt VALUES (1, 6, '1990/01/01 12:00:10'); +INSERT INTO mt VALUES (1, 7, '1990/01/01 12:00:11'); +INSERT INTO mt VALUES (1, 8, '1990/01/01 12:00:30'); EOF while true; do - $CLICKHOUSE_CLIENT --query="SELECT count(*) FROM dst" | grep -q "3" && break || sleep .5 ||: + $CLICKHOUSE_CLIENT --query="SELECT count(*) FROM dst" | grep -q "7" && break || sleep .5 ||: done -$CLICKHOUSE_CLIENT --query="SELECT * FROM dst ORDER BY w_end;" +$CLICKHOUSE_CLIENT --query="SELECT * FROM dst ORDER BY market, w_end;" $CLICKHOUSE_CLIENT --query="DROP TABLE wv" $CLICKHOUSE_CLIENT --query="DROP TABLE mt" $CLICKHOUSE_CLIENT --query="DROP TABLE dst" diff --git a/tests/queries/0_stateless/01083_cross_to_inner_with_like.sql b/tests/queries/0_stateless/01083_cross_to_inner_with_like.sql index 644190cbddf..6ec6e80692c 100644 --- a/tests/queries/0_stateless/01083_cross_to_inner_with_like.sql +++ b/tests/queries/0_stateless/01083_cross_to_inner_with_like.sql @@ -1,3 +1,5 @@ +SET convert_query_to_cnf = 0; + DROP TABLE IF EXISTS n; DROP TABLE IF EXISTS r; diff --git a/tests/queries/0_stateless/01086_odbc_roundtrip.sh b/tests/queries/0_stateless/01086_odbc_roundtrip.sh index 705746032f8..20066c6b34c 100755 --- a/tests/queries/0_stateless/01086_odbc_roundtrip.sh +++ b/tests/queries/0_stateless/01086_odbc_roundtrip.sh @@ -1,6 +1,7 @@ #!/usr/bin/env bash -# Tags: no-asan, no-msan, no-fasttest +# Tags: no-asan, no-msan, no-fasttest, no-cpu-aarch64 # Tag no-msan: can't pass because odbc libraries are not instrumented +# Tag no-cpu-aarch64: clickhouse-odbc is not setup for arm CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh diff --git a/tests/queries/0_stateless/01092_memory_profiler.sql b/tests/queries/0_stateless/01092_memory_profiler.sql index 9d042860ac0..3869bf941c0 100644 --- a/tests/queries/0_stateless/01092_memory_profiler.sql +++ b/tests/queries/0_stateless/01092_memory_profiler.sql @@ -1,4 +1,4 @@ --- Tags: no-tsan, no-asan, no-ubsan, no-msan, no-debug, no-parallel, no-fasttest +-- Tags: no-tsan, no-asan, no-ubsan, no-msan, no-debug, no-parallel, no-fasttest, no-cpu-aarch64 SET allow_introspection_functions = 1; diff --git a/tests/queries/0_stateless/01099_parallel_distributed_insert_select.sql b/tests/queries/0_stateless/01099_parallel_distributed_insert_select.sql index 4e011bf6b31..de93166d891 100644 --- a/tests/queries/0_stateless/01099_parallel_distributed_insert_select.sql +++ b/tests/queries/0_stateless/01099_parallel_distributed_insert_select.sql @@ -2,6 +2,8 @@ -- set insert_distributed_sync = 1; -- see https://github.com/ClickHouse/ClickHouse/issues/18971 +SET allow_experimental_parallel_reading_from_replicas = 0; -- see https://github.com/ClickHouse/ClickHouse/issues/34525 + DROP TABLE IF EXISTS local_01099_a; DROP TABLE IF EXISTS local_01099_b; DROP TABLE IF EXISTS distributed_01099_a; diff --git a/tests/queries/0_stateless/01103_check_cpu_instructions_at_startup.sh b/tests/queries/0_stateless/01103_check_cpu_instructions_at_startup.sh index da99a13e97f..9b6e1e05f2d 100755 --- a/tests/queries/0_stateless/01103_check_cpu_instructions_at_startup.sh +++ b/tests/queries/0_stateless/01103_check_cpu_instructions_at_startup.sh @@ -1,5 +1,5 @@ #!/usr/bin/env bash -# Tags: no-tsan, no-asan, no-ubsan, no-msan, no-debug, no-fasttest +# Tags: no-tsan, no-asan, no-ubsan, no-msan, no-debug, no-fasttest, no-cpu-aarch64 # Tag no-fasttest: avoid dependency on qemu -- invonvenient when running locally CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) diff --git a/tests/queries/0_stateless/01154_move_partition_long.sh b/tests/queries/0_stateless/01154_move_partition_long.sh index 6b0b0773cb6..7cefac28e22 100755 --- a/tests/queries/0_stateless/01154_move_partition_long.sh +++ b/tests/queries/0_stateless/01154_move_partition_long.sh @@ -1,5 +1,7 @@ #!/usr/bin/env bash -# Tags: long, no-parallel +# Tags: long, no-parallel, no-s3-storage +# FIXME: s3 storage should work OK, it +# reproduces bug which exists not only in S3 version. CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh diff --git a/tests/queries/0_stateless/01155_rename_move_materialized_view.sql b/tests/queries/0_stateless/01155_rename_move_materialized_view.sql index 0b672cbddbf..e0546ec8117 100644 --- a/tests/queries/0_stateless/01155_rename_move_materialized_view.sql +++ b/tests/queries/0_stateless/01155_rename_move_materialized_view.sql @@ -1,5 +1,7 @@ -- Tags: no-parallel +SET prefer_localhost_replica = 1; + DROP DATABASE IF EXISTS test_01155_ordinary; DROP DATABASE IF EXISTS test_01155_atomic; diff --git a/tests/queries/0_stateless/01162_strange_mutations.sh b/tests/queries/0_stateless/01162_strange_mutations.sh index fecb1b8d8c0..c759d113f84 100755 --- a/tests/queries/0_stateless/01162_strange_mutations.sh +++ b/tests/queries/0_stateless/01162_strange_mutations.sh @@ -11,7 +11,7 @@ $CLICKHOUSE_CLIENT -q "CREATE OR REPLACE VIEW t1 AS SELECT number * 10 AS id, nu for engine in "${engines[@]}" do $CLICKHOUSE_CLIENT -q "drop table if exists t" - $CLICKHOUSE_CLIENT -q "create table t (n int) engine=$engine" + $CLICKHOUSE_CLIENT -q "create table t (n int) engine=$engine" 2>&1| grep -Ev "Removing leftovers from table|removed by another replica" $CLICKHOUSE_CLIENT -q "select engine from system.tables where database=currentDatabase() and name='t'" $CLICKHOUSE_CLIENT -q "insert into t values (1)" $CLICKHOUSE_CLIENT -q "insert into t values (2)" @@ -25,7 +25,7 @@ do $CLICKHOUSE_CLIENT -q "drop table t" $CLICKHOUSE_CLIENT -q "drop table if exists test" - $CLICKHOUSE_CLIENT -q "CREATE TABLE test ENGINE=$engine AS SELECT number + 100 AS n, 0 AS test FROM numbers(50)" + $CLICKHOUSE_CLIENT -q "CREATE TABLE test ENGINE=$engine AS SELECT number + 100 AS n, 0 AS test FROM numbers(50)" 2>&1| grep -Ev "Removing leftovers from table|removed by another replica" $CLICKHOUSE_CLIENT -q "select count(), sum(n), sum(test) from test" if [[ $engine == *"ReplicatedMergeTree"* ]]; then $CLICKHOUSE_CLIENT -q "ALTER TABLE test diff --git a/tests/queries/0_stateless/01186_conversion_to_nullable.reference b/tests/queries/0_stateless/01186_conversion_to_nullable.reference index 7a690240eb5..dc77029ec3b 100644 --- a/tests/queries/0_stateless/01186_conversion_to_nullable.reference +++ b/tests/queries/0_stateless/01186_conversion_to_nullable.reference @@ -12,7 +12,7 @@ \N 1970-01-01 \N -1970-01-01 +2149-06-06 2020-12-24 01:02:03 \N 1970-01-01 03:00:00 diff --git a/tests/queries/0_stateless/01187_set_profile_as_setting.sh b/tests/queries/0_stateless/01187_set_profile_as_setting.sh index ec07f4d3687..dacb609d790 100755 --- a/tests/queries/0_stateless/01187_set_profile_as_setting.sh +++ b/tests/queries/0_stateless/01187_set_profile_as_setting.sh @@ -1,4 +1,5 @@ #!/usr/bin/env bash +# Tags: no-random-settings unset CLICKHOUSE_LOG_COMMENT diff --git a/tests/queries/0_stateless/01271_optimize_arithmetic_operations_in_aggr_func_with_alias.sql b/tests/queries/0_stateless/01271_optimize_arithmetic_operations_in_aggr_func_with_alias.sql index 73b87817bb3..242a253e67c 100644 --- a/tests/queries/0_stateless/01271_optimize_arithmetic_operations_in_aggr_func_with_alias.sql +++ b/tests/queries/0_stateless/01271_optimize_arithmetic_operations_in_aggr_func_with_alias.sql @@ -1,4 +1,5 @@ set optimize_arithmetic_operations_in_aggregate_functions = 1; +SET convert_query_to_cnf = 0; explain syntax select min((n as a) + (1 as b)) c from (select number n from numbers(10)) where a > 0 and b > 0 having c > 0; select min((n as a) + (1 as b)) c from (select number n from numbers(10)) where a > 0 and b > 0 having c > 0; diff --git a/tests/queries/0_stateless/01275_parallel_mv.reference b/tests/queries/0_stateless/01275_parallel_mv.reference index a5987acafde..9021ae2bb1a 100644 --- a/tests/queries/0_stateless/01275_parallel_mv.reference +++ b/tests/queries/0_stateless/01275_parallel_mv.reference @@ -2,8 +2,8 @@ set parallel_view_processing=1; insert into testX select number from numbers(10) settings log_queries=1; -- { serverError FUNCTION_THROW_IF_VALUE_IS_NON_ZERO } system flush logs; -select length(thread_ids) from system.query_log where current_database = currentDatabase() and type != 'QueryStart' and query like '%insert into testX %' and Settings['parallel_view_processing'] = '1'; -8 +select length(thread_ids) >= 8 from system.query_log where current_database = currentDatabase() and type != 'QueryStart' and query like '%insert into testX %' and Settings['parallel_view_processing'] = '1'; +1 select count() from testX; 10 select count() from testXA; @@ -15,8 +15,8 @@ select count() from testXC; set parallel_view_processing=0; insert into testX select number from numbers(10) settings log_queries=1; -- { serverError FUNCTION_THROW_IF_VALUE_IS_NON_ZERO } system flush logs; -select length(thread_ids) from system.query_log where current_database = currentDatabase() and type != 'QueryStart' and query like '%insert into testX %' and Settings['parallel_view_processing'] = '0'; -5 +select length(thread_ids) >= 5 from system.query_log where current_database = currentDatabase() and type != 'QueryStart' and query like '%insert into testX %' and Settings['parallel_view_processing'] = '0'; +1 select count() from testX; 20 select count() from testXA; diff --git a/tests/queries/0_stateless/01275_parallel_mv.sql b/tests/queries/0_stateless/01275_parallel_mv.sql index 32b43ce616f..27b8ef96e0b 100644 --- a/tests/queries/0_stateless/01275_parallel_mv.sql +++ b/tests/queries/0_stateless/01275_parallel_mv.sql @@ -1,3 +1,5 @@ +set max_threads = 0; + drop table if exists testX; drop table if exists testXA; drop table if exists testXB; @@ -13,7 +15,7 @@ create materialized view testXC engine=MergeTree order by tuple() as select slee set parallel_view_processing=1; insert into testX select number from numbers(10) settings log_queries=1; -- { serverError FUNCTION_THROW_IF_VALUE_IS_NON_ZERO } system flush logs; -select length(thread_ids) from system.query_log where current_database = currentDatabase() and type != 'QueryStart' and query like '%insert into testX %' and Settings['parallel_view_processing'] = '1'; +select length(thread_ids) >= 8 from system.query_log where current_database = currentDatabase() and type != 'QueryStart' and query like '%insert into testX %' and Settings['parallel_view_processing'] = '1'; select count() from testX; select count() from testXA; @@ -23,7 +25,7 @@ select count() from testXC; set parallel_view_processing=0; insert into testX select number from numbers(10) settings log_queries=1; -- { serverError FUNCTION_THROW_IF_VALUE_IS_NON_ZERO } system flush logs; -select length(thread_ids) from system.query_log where current_database = currentDatabase() and type != 'QueryStart' and query like '%insert into testX %' and Settings['parallel_view_processing'] = '0'; +select length(thread_ids) >= 5 from system.query_log where current_database = currentDatabase() and type != 'QueryStart' and query like '%insert into testX %' and Settings['parallel_view_processing'] = '0'; select count() from testX; select count() from testXA; diff --git a/tests/queries/0_stateless/01281_group_by_limit_memory_tracking.sh b/tests/queries/0_stateless/01281_group_by_limit_memory_tracking.sh index bf201187f45..c9c01455e31 100755 --- a/tests/queries/0_stateless/01281_group_by_limit_memory_tracking.sh +++ b/tests/queries/0_stateless/01281_group_by_limit_memory_tracking.sh @@ -1,5 +1,5 @@ #!/usr/bin/env bash -# Tags: no-replicated-database, no-parallel, no-fasttest +# Tags: no-replicated-database, no-parallel, no-fasttest, no-tsan, no-asan # Tag no-fasttest: max_memory_usage_for_user can interfere another queries running concurrently # Regression for MemoryTracker that had been incorrectly accounted diff --git a/tests/queries/0_stateless/01283_max_threads_simple_query_optimization.sql b/tests/queries/0_stateless/01283_max_threads_simple_query_optimization.sql index 61db4376c91..59d8605ba1c 100644 --- a/tests/queries/0_stateless/01283_max_threads_simple_query_optimization.sql +++ b/tests/queries/0_stateless/01283_max_threads_simple_query_optimization.sql @@ -1,5 +1,7 @@ DROP TABLE IF EXISTS data_01283; +set remote_filesystem_read_method='read'; + CREATE TABLE data_01283 engine=MergeTree() ORDER BY key PARTITION BY key diff --git a/tests/queries/0_stateless/01293_show_settings.sql b/tests/queries/0_stateless/01293_show_settings.sql index 08f00ed201c..3e55ffb58d7 100644 --- a/tests/queries/0_stateless/01293_show_settings.sql +++ b/tests/queries/0_stateless/01293_show_settings.sql @@ -1,3 +1,5 @@ +-- Tags: no-random-settings + show settings like 'send_timeout'; SHOW SETTINGS ILIKE '%CONNECT_timeout%'; SHOW CHANGED SETTINGS ILIKE '%MEMORY%'; diff --git a/tests/queries/0_stateless/01293_system_distribution_queue.sql b/tests/queries/0_stateless/01293_system_distribution_queue.sql index 34158fb081c..9997f18f61d 100644 --- a/tests/queries/0_stateless/01293_system_distribution_queue.sql +++ b/tests/queries/0_stateless/01293_system_distribution_queue.sql @@ -1,4 +1,5 @@ -- Tags: no-parallel +set prefer_localhost_replica = 1; drop table if exists null_01293; drop table if exists dist_01293; diff --git a/tests/queries/0_stateless/01300_group_by_other_keys.sql b/tests/queries/0_stateless/01300_group_by_other_keys.sql index 22cff012e71..0e37ef55a6a 100644 --- a/tests/queries/0_stateless/01300_group_by_other_keys.sql +++ b/tests/queries/0_stateless/01300_group_by_other_keys.sql @@ -1,3 +1,5 @@ +set max_block_size = 65505; + set optimize_group_by_function_keys = 1; SELECT round(max(log(2) * number), 6) AS k FROM numbers(10000000) GROUP BY number % 2, number % 3, (number % 2 + number % 3) % 2 ORDER BY k; diff --git a/tests/queries/0_stateless/01308_row_policy_and_trivial_count_query.sql b/tests/queries/0_stateless/01308_row_policy_and_trivial_count_query.sql index cd41bb227eb..81bd2ad97a9 100644 --- a/tests/queries/0_stateless/01308_row_policy_and_trivial_count_query.sql +++ b/tests/queries/0_stateless/01308_row_policy_and_trivial_count_query.sql @@ -1,3 +1,5 @@ +SET optimize_move_to_prewhere = 1; + DROP TABLE IF EXISTS t; CREATE TABLE t (x UInt8) ENGINE = MergeTree ORDER BY x; diff --git a/tests/queries/0_stateless/01323_too_many_threads_bug.sql b/tests/queries/0_stateless/01323_too_many_threads_bug.sql index 6033fe66cd3..5dbb5aca2ec 100644 --- a/tests/queries/0_stateless/01323_too_many_threads_bug.sql +++ b/tests/queries/0_stateless/01323_too_many_threads_bug.sql @@ -1,5 +1,7 @@ drop table if exists table_01323_many_parts; +set remote_filesystem_read_method='read'; + create table table_01323_many_parts (x UInt64) engine = MergeTree order by x partition by x % 100; set max_partitions_per_insert_block = 100; insert into table_01323_many_parts select number from numbers(100000); diff --git a/tests/queries/0_stateless/01339_client_unrecognized_option.sh b/tests/queries/0_stateless/01339_client_unrecognized_option.sh index 00c153ec915..9f827ccb13e 100755 --- a/tests/queries/0_stateless/01339_client_unrecognized_option.sh +++ b/tests/queries/0_stateless/01339_client_unrecognized_option.sh @@ -1,5 +1,5 @@ #!/usr/bin/env bash -# Tags: no-fasttest +# Tags: no-fasttest, no-random-settings CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh diff --git a/tests/queries/0_stateless/01386_negative_float_constant_key_condition.sql b/tests/queries/0_stateless/01386_negative_float_constant_key_condition.sql index c2191d6ab96..b45b9c84b18 100644 --- a/tests/queries/0_stateless/01386_negative_float_constant_key_condition.sql +++ b/tests/queries/0_stateless/01386_negative_float_constant_key_condition.sql @@ -1,3 +1,5 @@ +SET convert_query_to_cnf = 0; + DROP TABLE IF EXISTS t0; CREATE TABLE t0 diff --git a/tests/queries/0_stateless/01415_overlimiting_threads_for_repica_bug.sql b/tests/queries/0_stateless/01415_overlimiting_threads_for_repica_bug.sql index 306d94387a4..6b5c2ac8ffd 100644 --- a/tests/queries/0_stateless/01415_overlimiting_threads_for_repica_bug.sql +++ b/tests/queries/0_stateless/01415_overlimiting_threads_for_repica_bug.sql @@ -1,5 +1,6 @@ set log_queries = 1; set max_threads = 16; +set prefer_localhost_replica = 1; select sum(number) from remote('127.0.0.{1|2}', numbers_mt(1000000)) group by number % 2 order by number % 2; diff --git a/tests/queries/0_stateless/01457_create_as_table_function_structure.sql b/tests/queries/0_stateless/01457_create_as_table_function_structure.sql index d7c681dc615..bc677698d88 100644 --- a/tests/queries/0_stateless/01457_create_as_table_function_structure.sql +++ b/tests/queries/0_stateless/01457_create_as_table_function_structure.sql @@ -1,5 +1,7 @@ -- Tags: no-parallel +SET prefer_localhost_replica = 1; + DROP DATABASE IF EXISTS test_01457; CREATE DATABASE test_01457; diff --git a/tests/queries/0_stateless/01473_event_time_microseconds.sql b/tests/queries/0_stateless/01473_event_time_microseconds.sql index 9c6b392a8aa..932acf48cc4 100644 --- a/tests/queries/0_stateless/01473_event_time_microseconds.sql +++ b/tests/queries/0_stateless/01473_event_time_microseconds.sql @@ -1,4 +1,4 @@ --- Tags: no-tsan, no-asan, no-ubsan, no-msan, no-debug +-- Tags: no-tsan, no-asan, no-ubsan, no-msan, no-debug, no-cpu-aarch64 -- This file contains tests for the event_time_microseconds field for various tables. -- Note: Only event_time_microseconds for asynchronous_metric_log table is tested via diff --git a/tests/queries/0_stateless/01475_read_subcolumns.sql b/tests/queries/0_stateless/01475_read_subcolumns.sql index fb26b19ed30..4724bec9eff 100644 --- a/tests/queries/0_stateless/01475_read_subcolumns.sql +++ b/tests/queries/0_stateless/01475_read_subcolumns.sql @@ -1,4 +1,7 @@ -- Tags: no-s3-storage + +SET use_uncompressed_cache = 0; + SELECT '====array===='; DROP TABLE IF EXISTS t_arr; CREATE TABLE t_arr (a Array(UInt32)) ENGINE = MergeTree ORDER BY tuple() SETTINGS min_bytes_for_wide_part = 0; diff --git a/tests/queries/bugs/01482_move_to_prewhere_and_cast.reference b/tests/queries/0_stateless/01482_move_to_prewhere_and_cast.reference similarity index 100% rename from tests/queries/bugs/01482_move_to_prewhere_and_cast.reference rename to tests/queries/0_stateless/01482_move_to_prewhere_and_cast.reference diff --git a/tests/queries/bugs/01482_move_to_prewhere_and_cast.sql b/tests/queries/0_stateless/01482_move_to_prewhere_and_cast.sql similarity index 91% rename from tests/queries/bugs/01482_move_to_prewhere_and_cast.sql rename to tests/queries/0_stateless/01482_move_to_prewhere_and_cast.sql index b81cf585b13..282363dcdd7 100644 --- a/tests/queries/bugs/01482_move_to_prewhere_and_cast.sql +++ b/tests/queries/0_stateless/01482_move_to_prewhere_and_cast.sql @@ -1,6 +1,3 @@ --- Tags: no-polymorphic-parts --- Tag no-polymorphic-parts: bug, shoud be fixed - DROP TABLE IF EXISTS APPLICATION; DROP TABLE IF EXISTS DATABASE_IO; @@ -22,9 +19,9 @@ ORDER BY Date; insert into table DATABASE_IO values ('AppA', 'BaseA', '2020-01-01 00:00:00', 1000); SELECT `APPLICATION`.`Name` AS `App`, - CAST(CAST(`DATABASE_IO`.`Date` AS DATE) AS DATE) AS `date` + CAST(CAST(`DATABASE_IO`.`Date` AS DATE) AS DATE) AS `date` FROM `DATABASE_IO` -INNER +INNER JOIN `APPLICATION` ON (`DATABASE_IO`.`Base` = `APPLICATION`.`Base`) WHERE ( CAST(CAST(`DATABASE_IO`.`Date` AS DATE) AS TIMESTAMP) >= toDateTime('2020-01-01 00:00:00') diff --git a/tests/queries/0_stateless/01492_format_readable_quantity.reference b/tests/queries/0_stateless/01492_format_readable_quantity.reference index e58a1954eee..247063b70a2 100644 --- a/tests/queries/0_stateless/01492_format_readable_quantity.reference +++ b/tests/queries/0_stateless/01492_format_readable_quantity.reference @@ -20,26 +20,26 @@ 178.48 million 178.48 million 178.48 million 485.17 million 485.17 million 485.17 million 1.32 billion 1.32 billion 1.32 billion -3.58 billion 3.58 billion -2.15 billion -9.74 billion 9.74 billion -2.15 billion -26.49 billion 26.49 billion -2.15 billion -72.00 billion 72.00 billion -2.15 billion -195.73 billion 195.73 billion -2.15 billion -532.05 billion 532.05 billion -2.15 billion -1.45 trillion 1.45 trillion -2.15 billion -3.93 trillion 3.93 trillion -2.15 billion -10.69 trillion 10.69 trillion -2.15 billion -29.05 trillion 29.05 trillion -2.15 billion -78.96 trillion 78.96 trillion -2.15 billion -214.64 trillion 214.64 trillion -2.15 billion -583.46 trillion 583.46 trillion -2.15 billion -1.59 quadrillion 1.59 quadrillion -2.15 billion -4.31 quadrillion 4.31 quadrillion -2.15 billion -11.72 quadrillion 11.72 quadrillion -2.15 billion -31.86 quadrillion 31.86 quadrillion -2.15 billion -86.59 quadrillion 86.59 quadrillion -2.15 billion -235.39 quadrillion 235.39 quadrillion -2.15 billion -639.84 quadrillion 639.84 quadrillion -2.15 billion -1739.27 quadrillion 1739.27 quadrillion -2.15 billion -4727.84 quadrillion 4727.84 quadrillion -2.15 billion -12851.60 quadrillion 12851.60 quadrillion -2.15 billion +3.58 billion 3.58 billion 2.15 billion +9.74 billion 9.74 billion 2.15 billion +26.49 billion 26.49 billion 2.15 billion +72.00 billion 72.00 billion 2.15 billion +195.73 billion 195.73 billion 2.15 billion +532.05 billion 532.05 billion 2.15 billion +1.45 trillion 1.45 trillion 2.15 billion +3.93 trillion 3.93 trillion 2.15 billion +10.69 trillion 10.69 trillion 2.15 billion +29.05 trillion 29.05 trillion 2.15 billion +78.96 trillion 78.96 trillion 2.15 billion +214.64 trillion 214.64 trillion 2.15 billion +583.46 trillion 583.46 trillion 2.15 billion +1.59 quadrillion 1.59 quadrillion 2.15 billion +4.31 quadrillion 4.31 quadrillion 2.15 billion +11.72 quadrillion 11.72 quadrillion 2.15 billion +31.86 quadrillion 31.86 quadrillion 2.15 billion +86.59 quadrillion 86.59 quadrillion 2.15 billion +235.39 quadrillion 235.39 quadrillion 2.15 billion +639.84 quadrillion 639.84 quadrillion 2.15 billion +1739.27 quadrillion 1739.27 quadrillion 2.15 billion +4727.84 quadrillion 4727.84 quadrillion 2.15 billion +12851.60 quadrillion 12851.60 quadrillion 2.15 billion diff --git a/tests/queries/0_stateless/01492_format_readable_quantity.sql b/tests/queries/0_stateless/01492_format_readable_quantity.sql index 3931cde49df..93aa570ccc8 100644 --- a/tests/queries/0_stateless/01492_format_readable_quantity.sql +++ b/tests/queries/0_stateless/01492_format_readable_quantity.sql @@ -1,4 +1,4 @@ -WITH round(exp(number), 6) AS x, toUInt64(x) AS y, toInt32(x) AS z +WITH round(exp(number), 6) AS x, toUInt64(x) AS y, toInt32(min2(x, 2147483647)) AS z SELECT formatReadableQuantity(x), formatReadableQuantity(y), formatReadableQuantity(z) FROM system.numbers LIMIT 45; diff --git a/tests/queries/0_stateless/01506_buffer_table_alter_block_structure_2.sql b/tests/queries/0_stateless/01506_buffer_table_alter_block_structure_2.sql index 8862037c82b..f9c227942ac 100644 --- a/tests/queries/0_stateless/01506_buffer_table_alter_block_structure_2.sql +++ b/tests/queries/0_stateless/01506_buffer_table_alter_block_structure_2.sql @@ -5,12 +5,11 @@ CREATE TABLE buf_dest (timestamp DateTime) ENGINE = MergeTree PARTITION BY toYYYYMMDD(timestamp) ORDER BY (timestamp); -CREATE TABLE buf (timestamp DateTime) Engine = Buffer(currentDatabase(), buf_dest, 16, 0.1, 0.1, 2000000, 20000000, 100000000, 300000000);; +CREATE TABLE buf (timestamp DateTime) Engine = Buffer(currentDatabase(), buf_dest, 16, 86400, 86400, 2000000, 20000000, 100000000, 300000000);; INSERT INTO buf (timestamp) VALUES (toDateTime('2020-01-01 00:05:00')); ---- wait for buffer to flush -SELECT sleep(1) from numbers(1) settings max_block_size=1 format Null; +OPTIMIZE TABLE buf; ALTER TABLE buf_dest ADD COLUMN s String; ALTER TABLE buf ADD COLUMN s String; diff --git a/tests/queries/0_stateless/01517_select_final_distributed.sql b/tests/queries/0_stateless/01517_select_final_distributed.sql index a3d1fcfc185..701828b0b38 100644 --- a/tests/queries/0_stateless/01517_select_final_distributed.sql +++ b/tests/queries/0_stateless/01517_select_final_distributed.sql @@ -1,5 +1,7 @@ -- Tags: distributed +SET allow_experimental_parallel_reading_from_replicas = 0; + DROP TABLE IF EXISTS test5346; CREATE TABLE test5346 (`Id` String, `Timestamp` DateTime, `updated` DateTime) diff --git a/tests/queries/0_stateless/01524_do_not_merge_across_partitions_select_final.sql b/tests/queries/0_stateless/01524_do_not_merge_across_partitions_select_final.sql index 25c47c008bd..ca9f296b6bf 100644 --- a/tests/queries/0_stateless/01524_do_not_merge_across_partitions_select_final.sql +++ b/tests/queries/0_stateless/01524_do_not_merge_across_partitions_select_final.sql @@ -2,7 +2,7 @@ DROP TABLE IF EXISTS select_final; SET do_not_merge_across_partitions_select_final = 1; -CREATE TABLE select_final (t DateTime, x Int32, string String) ENGINE = ReplacingMergeTree() PARTITION BY toYYYYMM(t) ORDER BY (x, t); +CREATE TABLE select_final (t DateTime, x Int32, string String) ENGINE = ReplacingMergeTree() PARTITION BY toYYYYMM(t) ORDER BY (x, t); INSERT INTO select_final SELECT toDate('2000-01-01'), number, '' FROM numbers(2); INSERT INTO select_final SELECT toDate('2000-01-01'), number + 1, '' FROM numbers(2); @@ -31,6 +31,8 @@ INSERT INTO select_final SELECT toDate('2000-01-01'), number, '' FROM numbers(50 OPTIMIZE TABLE select_final FINAL; +SET remote_filesystem_read_method = 'read'; + SELECT max(x) FROM select_final FINAL; SYSTEM FLUSH LOGS; diff --git a/tests/queries/0_stateless/01526_max_untracked_memory.sh b/tests/queries/0_stateless/01526_max_untracked_memory.sh index 20c986f14ca..45fdb314fb2 100755 --- a/tests/queries/0_stateless/01526_max_untracked_memory.sh +++ b/tests/queries/0_stateless/01526_max_untracked_memory.sh @@ -1,9 +1,6 @@ #!/usr/bin/env bash -# Tags: no-tsan, no-asan, no-ubsan, no-msan -# Tag no-tsan: requires TraceCollector, does not available under sanitizers -# Tag no-asan: requires TraceCollector, does not available under sanitizers -# Tag no-ubsan: requires TraceCollector, does not available under sanitizers -# Tag no-msan: requires TraceCollector, does not available under sanitizers +# Tags: no-tsan, no-asan, no-ubsan, no-msan, no-cpu-aarch64 +# requires TraceCollector, does not available under sanitizers and aarch64 CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh diff --git a/tests/queries/0_stateless/01533_multiple_nested.sql b/tests/queries/0_stateless/01533_multiple_nested.sql index 40f287b4afd..03724ce0b46 100644 --- a/tests/queries/0_stateless/01533_multiple_nested.sql +++ b/tests/queries/0_stateless/01533_multiple_nested.sql @@ -3,6 +3,7 @@ DROP TABLE IF EXISTS nested; SET flatten_nested = 0; +SET use_uncompressed_cache = 0; CREATE TABLE nested ( diff --git a/tests/queries/0_stateless/01557_max_parallel_replicas_no_sample.sql b/tests/queries/0_stateless/01557_max_parallel_replicas_no_sample.sql index 2b1a66147a4..04777f5b31c 100644 --- a/tests/queries/0_stateless/01557_max_parallel_replicas_no_sample.sql +++ b/tests/queries/0_stateless/01557_max_parallel_replicas_no_sample.sql @@ -1,5 +1,7 @@ -- Tags: replica +SET allow_experimental_parallel_reading_from_replicas=0; + DROP TABLE IF EXISTS t; CREATE TABLE t (x String) ENGINE = MergeTree ORDER BY x; INSERT INTO t VALUES ('Hello'); diff --git a/tests/queries/0_stateless/01569_query_profiler_big_query_id.sh b/tests/queries/0_stateless/01569_query_profiler_big_query_id.sh index 118d0a4fb96..e54783e9655 100755 --- a/tests/queries/0_stateless/01569_query_profiler_big_query_id.sh +++ b/tests/queries/0_stateless/01569_query_profiler_big_query_id.sh @@ -1,5 +1,5 @@ #!/usr/bin/env bash -# Tags: no-tsan, no-asan, no-ubsan, no-msan, no-debug +# Tags: no-tsan, no-asan, no-ubsan, no-msan, no-debug, no-cpu-aarch64 CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh diff --git a/tests/queries/0_stateless/01582_move_to_prewhere_compact_parts.sql b/tests/queries/0_stateless/01582_move_to_prewhere_compact_parts.sql index 788c99da76d..bd3e651e0dc 100644 --- a/tests/queries/0_stateless/01582_move_to_prewhere_compact_parts.sql +++ b/tests/queries/0_stateless/01582_move_to_prewhere_compact_parts.sql @@ -1,3 +1,6 @@ +SET optimize_move_to_prewhere = 1; +SET convert_query_to_cnf = 0; + DROP TABLE IF EXISTS prewhere_move; CREATE TABLE prewhere_move (x Int, y String) ENGINE = MergeTree ORDER BY tuple(); INSERT INTO prewhere_move SELECT number, toString(number) FROM numbers(1000); diff --git a/tests/queries/0_stateless/01591_window_functions.reference b/tests/queries/0_stateless/01591_window_functions.reference index 8af2c4c6b25..655232fcdd4 100644 --- a/tests/queries/0_stateless/01591_window_functions.reference +++ b/tests/queries/0_stateless/01591_window_functions.reference @@ -1141,6 +1141,28 @@ from ( from numbers_mt(10000) ) settings max_block_size = 7; 49995000 +-- a test with aggregate function which is -state type +select bitmapCardinality(bs) +from + ( + select groupBitmapMergeState(bm) over (order by k asc rows between unbounded preceding and current row) as bs + from + ( + select + groupBitmapState(number) as bm, k + from + ( + select + number, + number % 3 as k + from numbers(3) + ) + group by k + ) + ); +1 +2 +3 -- -INT_MIN row offset that can lead to problems with negation, found when fuzzing -- under UBSan. Should be limited to at most INT_MAX. select count() over (rows between 2147483648 preceding and 2147493648 following) from numbers(2); -- { serverError 36 } diff --git a/tests/queries/0_stateless/01591_window_functions.sql b/tests/queries/0_stateless/01591_window_functions.sql index e1e0842ad89..4a900045c6d 100644 --- a/tests/queries/0_stateless/01591_window_functions.sql +++ b/tests/queries/0_stateless/01591_window_functions.sql @@ -1,3 +1,5 @@ +-- Tags: long + -- { echo } -- just something basic @@ -442,6 +444,26 @@ from ( from numbers_mt(10000) ) settings max_block_size = 7; +-- a test with aggregate function which is -state type +select bitmapCardinality(bs) +from + ( + select groupBitmapMergeState(bm) over (order by k asc rows between unbounded preceding and current row) as bs + from + ( + select + groupBitmapState(number) as bm, k + from + ( + select + number, + number % 3 as k + from numbers(3) + ) + group by k + ) + ); + -- -INT_MIN row offset that can lead to problems with negation, found when fuzzing -- under UBSan. Should be limited to at most INT_MAX. select count() over (rows between 2147483648 preceding and 2147493648 following) from numbers(2); -- { serverError 36 } diff --git a/tests/queries/0_stateless/01605_adaptive_granularity_block_borders.sql b/tests/queries/0_stateless/01605_adaptive_granularity_block_borders.sql index a73045f5a6f..750809da338 100644 --- a/tests/queries/0_stateless/01605_adaptive_granularity_block_borders.sql +++ b/tests/queries/0_stateless/01605_adaptive_granularity_block_borders.sql @@ -1,3 +1,5 @@ +SET use_uncompressed_cache = 0; + DROP TABLE IF EXISTS adaptive_table; --- If granularity of consequent blocks differs a lot, then adaptive @@ -20,6 +22,8 @@ OPTIMIZE TABLE adaptive_table FINAL; SELECT marks FROM system.parts WHERE table = 'adaptive_table' and database=currentDatabase() and active; +SET remote_fs_enable_cache = 0; + -- If we have computed granularity incorrectly than we will exceed this limit. SET max_memory_usage='30M'; diff --git a/tests/queries/0_stateless/01641_memory_tracking_insert_optimize.sql b/tests/queries/0_stateless/01641_memory_tracking_insert_optimize.sql index 6bbf6fcec6a..7ec3153886c 100644 --- a/tests/queries/0_stateless/01641_memory_tracking_insert_optimize.sql +++ b/tests/queries/0_stateless/01641_memory_tracking_insert_optimize.sql @@ -2,6 +2,10 @@ drop table if exists data_01641; +-- Disable cache for s3 storage tests because it increases memory usage. +set remote_fs_enable_cache=0; +set remote_filesystem_read_method='read'; + create table data_01641 (key Int, value String) engine=MergeTree order by (key, repeat(value, 40)) settings old_parts_lifetime=0, min_bytes_for_wide_part=0; SET max_block_size = 1000, min_insert_block_size_rows = 0, min_insert_block_size_bytes = 0; @@ -9,6 +13,7 @@ insert into data_01641 select number, toString(number) from numbers(120000); -- Definitely should fail and it proves that memory is tracked in OPTIMIZE query. set max_memory_usage='10Mi', max_untracked_memory=0; + optimize table data_01641 final; -- { serverError 241 } drop table data_01641; diff --git a/tests/queries/0_stateless/01655_plan_optimizations.sh b/tests/queries/0_stateless/01655_plan_optimizations.sh index de3d3ac3eb6..b66d788a338 100755 --- a/tests/queries/0_stateless/01655_plan_optimizations.sh +++ b/tests/queries/0_stateless/01655_plan_optimizations.sh @@ -64,7 +64,7 @@ $CLICKHOUSE_CLIENT -q " settings enable_optimize_predicate_expression=0" echo "> one condition of filter should be pushed down after aggregating, other two conditions are ANDed" -$CLICKHOUSE_CLIENT -q " +$CLICKHOUSE_CLIENT --convert_query_to_cnf=0 -q " explain actions = 1 select s, y from ( select sum(x) as s, y from (select number as x, number + 1 as y from numbers(10)) group by y ) where y != 0 and s - 8 and s - 4 @@ -77,7 +77,7 @@ $CLICKHOUSE_CLIENT -q " settings enable_optimize_predicate_expression=0" echo "> two conditions of filter should be pushed down after aggregating and ANDed, one condition is aliased" -$CLICKHOUSE_CLIENT -q " +$CLICKHOUSE_CLIENT --convert_query_to_cnf=0 -q " explain actions = 1 select s, y from ( select sum(x) as s, y from (select number as x, number + 1 as y from numbers(10)) group by y ) where y != 0 and s != 8 and y - 4 @@ -127,7 +127,7 @@ $CLICKHOUSE_CLIENT -q " settings enable_optimize_predicate_expression=0" echo "> filter is pushed down before sorting steps" -$CLICKHOUSE_CLIENT -q " +$CLICKHOUSE_CLIENT --convert_query_to_cnf=0 -q " explain actions = 1 select x, y from ( select number % 2 as x, number % 3 as y from numbers(6) order by y desc ) where x != 0 and y != 0 diff --git a/tests/queries/0_stateless/01666_blns_long.sql b/tests/queries/0_stateless/01666_blns_long.sql index fd959cf0a73..74054551b18 100644 --- a/tests/queries/0_stateless/01666_blns_long.sql +++ b/tests/queries/0_stateless/01666_blns_long.sql @@ -27,6 +27,8 @@ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ +SET max_insert_threads = 0; + DROP TABLE IF EXISTS test; CREATE TABLE test diff --git a/tests/queries/0_stateless/01671_aggregate_function_group_bitmap_data.sql b/tests/queries/0_stateless/01671_aggregate_function_group_bitmap_data.sql index 3f5c5c2f25b..d70665655ca 100644 --- a/tests/queries/0_stateless/01671_aggregate_function_group_bitmap_data.sql +++ b/tests/queries/0_stateless/01671_aggregate_function_group_bitmap_data.sql @@ -1,3 +1,5 @@ +SET group_by_two_level_threshold = 10000; + CREATE TABLE group_bitmap_data_test ( `pickup_date` Date, diff --git a/tests/queries/0_stateless/01737_move_order_key_to_prewhere_select_final.sql b/tests/queries/0_stateless/01737_move_order_key_to_prewhere_select_final.sql index ecc11c625e3..789892dbd38 100644 --- a/tests/queries/0_stateless/01737_move_order_key_to_prewhere_select_final.sql +++ b/tests/queries/0_stateless/01737_move_order_key_to_prewhere_select_final.sql @@ -1,3 +1,6 @@ +SET optimize_move_to_prewhere = 1; +SET convert_query_to_cnf = 0; + DROP TABLE IF EXISTS prewhere_move_select_final; CREATE TABLE prewhere_move_select_final (x Int, y Int, z Int) ENGINE = ReplacingMergeTree() ORDER BY (x, y); diff --git a/tests/queries/0_stateless/01746_long_zstd_http_compression_json_format.sh b/tests/queries/0_stateless/01746_long_zstd_http_compression_json_format.sh index 02943cad583..e10032e04fd 100755 --- a/tests/queries/0_stateless/01746_long_zstd_http_compression_json_format.sh +++ b/tests/queries/0_stateless/01746_long_zstd_http_compression_json_format.sh @@ -5,4 +5,4 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CURDIR"/../shell_config.sh -${CLICKHOUSE_CURL} -sS -H 'Accept-Encoding: zstd' "${CLICKHOUSE_URL}&enable_http_compression=1" -d "SELECT toDate('2020-12-12') as datetime, 'test-pipeline' as pipeline, 'clickhouse-test-host-001.clickhouse.com' as host, 'clickhouse' as home, 'clickhouse' as detail, number as row_number FROM numbers(1000000) FORMAT JSON" | zstd -d | tail -n30 | head -n23 +${CLICKHOUSE_CURL} -sS -H 'Accept-Encoding: zstd' "${CLICKHOUSE_URL}&enable_http_compression=1" -d "SELECT toDate('2020-12-12') as datetime, 'test-pipeline' as pipeline, 'clickhouse-test-host-001.clickhouse.com' as host, 'clickhouse' as home, 'clickhouse' as detail, number as row_number FROM numbers(1000000) SETTINGS max_block_size=65505 FORMAT JSON" | zstd -d | tail -n30 | head -n23 diff --git a/tests/queries/0_stateless/01756_optimize_skip_unused_shards_rewrite_in.reference b/tests/queries/0_stateless/01756_optimize_skip_unused_shards_rewrite_in.reference index 9b76ca91780..15e00db0231 100644 --- a/tests/queries/0_stateless/01756_optimize_skip_unused_shards_rewrite_in.reference +++ b/tests/queries/0_stateless/01756_optimize_skip_unused_shards_rewrite_in.reference @@ -12,6 +12,9 @@ WITH _CAST(\'default\', \'Nullable(String)\') AS `id_2` SELECT `one`.`dummy`, ig optimize_skip_unused_shards_rewrite_in(0,) 0 0 WITH _CAST(\'default\', \'Nullable(String)\') AS `id_0` SELECT `one`.`dummy`, ignore(`id_0`) FROM `system`.`one` WHERE `dummy` IN tuple(0) +signed column +WITH _CAST(\'default\', \'Nullable(String)\') AS `key_signed` SELECT `key`, ignore(`key_signed`) FROM `default`.`data_01756_signed` WHERE `key` IN tuple(-1) +WITH _CAST(\'default\', \'Nullable(String)\') AS `key_signed` SELECT `key`, ignore(`key_signed`) FROM `default`.`data_01756_signed` WHERE `key` IN tuple(-2) 0 0 errors diff --git a/tests/queries/0_stateless/01756_optimize_skip_unused_shards_rewrite_in.sql b/tests/queries/0_stateless/01756_optimize_skip_unused_shards_rewrite_in.sql index 220d5d91a0b..b0900073151 100644 --- a/tests/queries/0_stateless/01756_optimize_skip_unused_shards_rewrite_in.sql +++ b/tests/queries/0_stateless/01756_optimize_skip_unused_shards_rewrite_in.sql @@ -9,6 +9,7 @@ drop table if exists dist_01756; drop table if exists dist_01756_str; drop table if exists dist_01756_column; drop table if exists data_01756_str; +drop table if exists data_01756_signed; -- SELECT -- intHash64(0) % 2, @@ -83,6 +84,20 @@ select query from system.query_log where type = 'QueryFinish' order by query; +-- signed column +select 'signed column'; +create table data_01756_signed (key Int) engine=Null; +with (select currentDatabase()) as key_signed select *, ignore(key_signed) from cluster(test_cluster_two_shards, currentDatabase(), data_01756_signed, key) where key in (-1, -2); +system flush logs; +select query from system.query_log where + event_date >= yesterday() and + event_time > now() - interval 1 hour and + not is_initial_query and + query not like '%system%query_log%' and + query like concat('WITH%', currentDatabase(), '%AS `key_signed` %') and + type = 'QueryFinish' +order by query; + -- not tuple select * from dist_01756 where dummy in (0); select * from dist_01756 where dummy in ('0'); @@ -139,3 +154,4 @@ drop table dist_01756; drop table dist_01756_str; drop table dist_01756_column; drop table data_01756_str; +drop table data_01756_signed; diff --git a/tests/queries/0_stateless/01763_max_distributed_depth.sql b/tests/queries/0_stateless/01763_max_distributed_depth.sql index 12b2e368007..f50d15e7121 100644 --- a/tests/queries/0_stateless/01763_max_distributed_depth.sql +++ b/tests/queries/0_stateless/01763_max_distributed_depth.sql @@ -1,5 +1,7 @@ -- Tags: distributed +SET prefer_localhost_replica = 1; + DROP TABLE IF EXISTS tt6; CREATE TABLE tt6 @@ -13,6 +15,8 @@ CREATE TABLE tt6 ) ENGINE = Distributed('test_shard_localhost', '', 'tt7', rand()); +DROP TABLE IF EXISTS tt7; + CREATE TABLE tt7 as tt6 ENGINE = Distributed('test_shard_localhost', '', 'tt6', rand()); INSERT INTO tt6 VALUES (1, 1, 1, 1, 'ok'); -- { serverError 581 } @@ -28,3 +32,4 @@ INSERT INTO tt6 VALUES (1, 1, 1, 1, 'ok'); -- { serverError 306} SELECT * FROM tt6; -- { serverError 306 } DROP TABLE tt6; +DROP TABLE tt7; diff --git a/tests/queries/0_stateless/01786_explain_merge_tree.sh b/tests/queries/0_stateless/01786_explain_merge_tree.sh index 6be86f9ce02..eb47f065044 100755 --- a/tests/queries/0_stateless/01786_explain_merge_tree.sh +++ b/tests/queries/0_stateless/01786_explain_merge_tree.sh @@ -4,6 +4,8 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CURDIR"/../shell_config.sh +CLICKHOUSE_CLIENT="$CLICKHOUSE_CLIENT --optimize_move_to_prewhere=1 --convert_query_to_cnf=0" + $CLICKHOUSE_CLIENT -q "drop table if exists test_index" $CLICKHOUSE_CLIENT -q "drop table if exists idx" diff --git a/tests/queries/0_stateless/01798_uniq_theta_sketch.sql b/tests/queries/0_stateless/01798_uniq_theta_sketch.sql index bb400c5de14..eace83d5cfa 100644 --- a/tests/queries/0_stateless/01798_uniq_theta_sketch.sql +++ b/tests/queries/0_stateless/01798_uniq_theta_sketch.sql @@ -1,5 +1,7 @@ -- Tags: no-fasttest +SET max_block_size = 65505; + SELECT 'uniqTheta many agrs'; SELECT diff --git a/tests/queries/0_stateless/01822_short_circuit.sql b/tests/queries/0_stateless/01822_short_circuit.sql index 48fff04921b..c7379d210eb 100644 --- a/tests/queries/0_stateless/01822_short_circuit.sql +++ b/tests/queries/0_stateless/01822_short_circuit.sql @@ -1,4 +1,5 @@ set short_circuit_function_evaluation = 'enable'; +set convert_query_to_cnf = 0; select if(number > 0, intDiv(number + 100, number), throwIf(number)) from numbers(10); select multiIf(number == 0, 0, number == 1, intDiv(1, number), number == 2, intDiv(1, number - 1), number == 3, intDiv(1, number - 2), intDiv(1, number - 3)) from numbers(10); diff --git a/tests/queries/0_stateless/01824_move_to_prewhere_many_columns.sql b/tests/queries/0_stateless/01824_move_to_prewhere_many_columns.sql index e03972e818d..c4ef5516fc8 100644 --- a/tests/queries/0_stateless/01824_move_to_prewhere_many_columns.sql +++ b/tests/queries/0_stateless/01824_move_to_prewhere_many_columns.sql @@ -1,3 +1,6 @@ +SET optimize_move_to_prewhere = 1; +SET convert_query_to_cnf = 0; + DROP TABLE IF EXISTS t_move_to_prewhere; CREATE TABLE t_move_to_prewhere (id UInt32, a UInt8, b UInt8, c UInt8, fat_string String) diff --git a/tests/queries/0_stateless/01854_HTTP_dict_decompression.python b/tests/queries/0_stateless/01854_HTTP_dict_decompression.python index 929eaae8067..4f6878665aa 100644 --- a/tests/queries/0_stateless/01854_HTTP_dict_decompression.python +++ b/tests/queries/0_stateless/01854_HTTP_dict_decompression.python @@ -158,7 +158,7 @@ def test_select(dict_name="", schema="word String, counter UInt32", requests=[], COMPRESS_METHOD = requests[i] print(i, COMPRESS_METHOD, ADDING_ENDING, SEND_ENCODING) - check_answers("select * from {}".format(dict_name), answers[i]) + check_answers("SELECT * FROM {} ORDER BY word".format(dict_name), answers[i]) def main(): # first three for encoding, second three for url @@ -171,7 +171,7 @@ def main(): ] # This answers got experemently in non compressed mode and they are correct - answers = ['''This 152\nHello 1\nis 9283\ndata 555\nWorld 2\ntesting 2313213'''] * 5 + answers = ['''Hello 1\nThis 152\nWorld 2\ndata 555\nis 9283\ntesting 2313213'''] * 5 t = start_server(len(insert_requests)) t.start() diff --git a/tests/queries/0_stateless/01917_prewhere_column_type.sql b/tests/queries/0_stateless/01917_prewhere_column_type.sql index 5147e6093a9..c0bc0c3e36b 100644 --- a/tests/queries/0_stateless/01917_prewhere_column_type.sql +++ b/tests/queries/0_stateless/01917_prewhere_column_type.sql @@ -1,3 +1,5 @@ +SET optimize_move_to_prewhere = 1; + DROP TABLE IF EXISTS t1; CREATE TABLE t1 ( s String, f Float32, e UInt16 ) ENGINE = MergeTree ORDER BY tuple() SETTINGS min_bytes_for_wide_part = '100G'; diff --git a/tests/queries/0_stateless/01921_datatype_date32.reference b/tests/queries/0_stateless/01921_datatype_date32.reference index 2114f6f6b1e..8beaefbeb38 100644 --- a/tests/queries/0_stateless/01921_datatype_date32.reference +++ b/tests/queries/0_stateless/01921_datatype_date32.reference @@ -221,13 +221,13 @@ 1925-04-01 1925-04-01 2283-03-31 -1925-01-01 +2283-11-11 2021-09-22 -------addYears--------- 1926-01-01 1926-01-01 2283-11-11 -1925-01-01 +2283-11-11 2022-06-22 -------subtractSeconds--------- 1925-01-01 00:00:00.000 diff --git a/tests/queries/0_stateless/01926_order_by_desc_limit.sql b/tests/queries/0_stateless/01926_order_by_desc_limit.sql index 7ea102e11e9..9f65cf73252 100644 --- a/tests/queries/0_stateless/01926_order_by_desc_limit.sql +++ b/tests/queries/0_stateless/01926_order_by_desc_limit.sql @@ -1,5 +1,9 @@ +-- Tags: no-random-settings + DROP TABLE IF EXISTS order_by_desc; +SET remote_fs_enable_cache=0; + CREATE TABLE order_by_desc (u UInt32, s String) ENGINE MergeTree ORDER BY u PARTITION BY u % 100 SETTINGS index_granularity = 1024; diff --git a/tests/queries/0_stateless/01930_optimize_skip_unused_shards_rewrite_in.reference b/tests/queries/0_stateless/01930_optimize_skip_unused_shards_rewrite_in.reference index b856b079327..9896f9396b6 100644 --- a/tests/queries/0_stateless/01930_optimize_skip_unused_shards_rewrite_in.reference +++ b/tests/queries/0_stateless/01930_optimize_skip_unused_shards_rewrite_in.reference @@ -38,8 +38,16 @@ select _shard_num, * from remote('127.{1..4}', view(select toUInt8(id) id from d 1 0 1 0 1 0 +1 128 2 1 4 127 +4 255 +4 255 +4 255 +4 255 +4 255 +4 255 +4 255 -- Int16, Int16 select _shard_num, * from remote('127.{1..4}', view(select toInt16(id) id from data), toInt16(id)) where id in (0, 1, 0x7fff) order by _shard_num, id; 1 0 @@ -72,8 +80,14 @@ select _shard_num, * from remote('127.{1..4}', view(select toUInt16(id) id from 1 0 1 0 1 0 +1 32768 2 1 4 32767 +4 65535 +4 65535 +4 65535 +4 65535 +4 65535 -- Int32, Int32 select _shard_num, * from remote('127.{1..4}', view(select toInt32(id) id from data), toInt32(id)) where id in (0, 1, 0x7fffffff) order by _shard_num, id; 1 0 @@ -100,8 +114,12 @@ select _shard_num, * from remote('127.{1..4}', view(select toUInt32(id) id from select _shard_num, * from remote('127.{1..4}', view(select toUInt32(id) id from data), toInt32(id)) where id in (0, 1, 0x7fffffff, 0x80000000, 0xffffffff) order by _shard_num, id; 1 0 1 0 +1 2147483648 2 1 4 2147483647 +4 4294967295 +4 4294967295 +4 4294967295 -- Int64, Int64 select _shard_num, * from remote('127.{1..4}', view(select toInt64(id) id from data), toInt64(id)) where id in (0, 1, 0x7fffffffffffffff) order by _shard_num, id; 1 0 @@ -122,8 +140,10 @@ select _shard_num, * from remote('127.{1..4}', view(select toUInt64(id) id from -- UInt64, Int64 select _shard_num, * from remote('127.{1..4}', view(select toUInt64(id) id from data), toInt64(id)) where id in (0, 1, 0x7fffffffffffffff, 0x8000000000000000, 0xffffffffffffffff) order by _shard_num, id; 1 0 +1 9223372036854775808 2 1 4 9223372036854775807 +4 18446744073709551615 -- modulo(Int8) select distinct _shard_num, * from remote('127.{1..4}', view(select toInt16(id) id from data), toInt8(id)%255) where id in (-1) order by _shard_num, id; 4 -1 diff --git a/tests/queries/0_stateless/01943_query_id_check.sql b/tests/queries/0_stateless/01943_query_id_check.sql index cb2ef090854..ad9e88e0478 100644 --- a/tests/queries/0_stateless/01943_query_id_check.sql +++ b/tests/queries/0_stateless/01943_query_id_check.sql @@ -1,6 +1,8 @@ -- Tags: no-replicated-database -- Tag no-replicated-database: Different query_id +SET prefer_localhost_replica = 1; + DROP TABLE IF EXISTS tmp; CREATE TABLE tmp ENGINE = TinyLog AS SELECT queryID(); diff --git a/tests/queries/0_stateless/01951_distributed_push_down_limit.sql b/tests/queries/0_stateless/01951_distributed_push_down_limit.sql index fa2fc1800c1..184e6321988 100644 --- a/tests/queries/0_stateless/01951_distributed_push_down_limit.sql +++ b/tests/queries/0_stateless/01951_distributed_push_down_limit.sql @@ -1,5 +1,7 @@ -- Tags: distributed +set prefer_localhost_replica = 1; + -- { echo } explain select * from remote('127.{1,2}', view(select * from numbers(1e6))) order by number limit 10 settings distributed_push_down_limit=0; explain select * from remote('127.{1,2}', view(select * from numbers(1e6))) order by number limit 10 settings distributed_push_down_limit=1; diff --git a/tests/queries/0_stateless/01952_optimize_distributed_group_by_sharding_key.sql b/tests/queries/0_stateless/01952_optimize_distributed_group_by_sharding_key.sql index d1f80b42e75..74b55b95315 100644 --- a/tests/queries/0_stateless/01952_optimize_distributed_group_by_sharding_key.sql +++ b/tests/queries/0_stateless/01952_optimize_distributed_group_by_sharding_key.sql @@ -2,6 +2,7 @@ set optimize_skip_unused_shards=1; set optimize_distributed_group_by_sharding_key=1; +set prefer_localhost_replica=1; -- { echo } explain select distinct k1 from remote('127.{1,2}', view(select 1 k1, 2 k2, 3 v from numbers(2)), cityHash64(k1, k2)); -- not optimized diff --git a/tests/queries/0_stateless/02006_test_positional_arguments.sql b/tests/queries/0_stateless/02006_test_positional_arguments.sql index 54b55c4a9f8..7442ca6bbf6 100644 --- a/tests/queries/0_stateless/02006_test_positional_arguments.sql +++ b/tests/queries/0_stateless/02006_test_positional_arguments.sql @@ -1,3 +1,4 @@ +set group_by_two_level_threshold = 100000; set enable_positional_arguments = 1; drop table if exists test; diff --git a/tests/queries/0_stateless/02015_async_inserts_stress_long.sh b/tests/queries/0_stateless/02015_async_inserts_stress_long.sh index f9a58818404..086419baa61 100755 --- a/tests/queries/0_stateless/02015_async_inserts_stress_long.sh +++ b/tests/queries/0_stateless/02015_async_inserts_stress_long.sh @@ -1,10 +1,13 @@ #!/usr/bin/env bash +# Tags: no-random-settings + set -e CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CURDIR"/../shell_config.sh + function insert1() { url="${CLICKHOUSE_URL}&async_insert=1&wait_for_async_insert=0" @@ -24,6 +27,14 @@ function insert2() done } +function insert3() +{ + url="${CLICKHOUSE_URL}&async_insert=1&wait_for_async_insert=0" + while true; do + ${CLICKHOUSE_CURL} -sS "$url" -d "INSERT INTO FUNCTION remote('127.0.0.1', $CLICKHOUSE_DATABASE, async_inserts) VALUES (7, 'g') (8, 'h')" + done +} + function select1() { while true; do @@ -53,6 +64,7 @@ TIMEOUT=10 export -f insert1 export -f insert2 +export -f insert3 export -f select1 export -f select2 export -f truncate1 @@ -60,6 +72,7 @@ export -f truncate1 for _ in {1..5}; do timeout $TIMEOUT bash -c insert1 & timeout $TIMEOUT bash -c insert2 & + timeout $TIMEOUT bash -c insert3 & done timeout $TIMEOUT bash -c select1 & diff --git a/tests/queries/0_stateless/02030_tuple_filter.sql b/tests/queries/0_stateless/02030_tuple_filter.sql index 5efedeb8c0d..c19f538b8e1 100644 --- a/tests/queries/0_stateless/02030_tuple_filter.sql +++ b/tests/queries/0_stateless/02030_tuple_filter.sql @@ -5,6 +5,7 @@ CREATE TABLE test_tuple_filter (id UInt32, value String, log_date Date) Engine=M INSERT INTO test_tuple_filter VALUES (1,'A','2021-01-01'),(2,'B','2021-01-01'),(3,'C','2021-01-01'),(4,'D','2021-01-02'),(5,'E','2021-01-02'); SET force_primary_key = 1; +SET optimize_move_to_prewhere = 1; SELECT * FROM test_tuple_filter WHERE (id, value) = (1, 'A'); SELECT * FROM test_tuple_filter WHERE (1, 'A') = (id, value); diff --git a/tests/queries/0_stateless/02050_client_profile_events.sh b/tests/queries/0_stateless/02050_client_profile_events.sh index 459e8505e22..f8bcea0d1bb 100755 --- a/tests/queries/0_stateless/02050_client_profile_events.sh +++ b/tests/queries/0_stateless/02050_client_profile_events.sh @@ -7,7 +7,7 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # do not print any ProfileEvents packets $CLICKHOUSE_CLIENT -q 'select * from numbers(1e5) format Null' |& grep -c 'SelectedRows' # print only last (and also number of rows to provide more info in case of failures) -$CLICKHOUSE_CLIENT --print-profile-events --profile-events-delay-ms=-1 -q 'select * from numbers(1e5)' 2> >(grep -o -e '\[ 0 \] SelectedRows: .*$' -e Exception) 1> >(wc -l) +$CLICKHOUSE_CLIENT --max_block_size=65505 --print-profile-events --profile-events-delay-ms=-1 -q 'select * from numbers(1e5)' 2> >(grep -o -e '\[ 0 \] SelectedRows: .*$' -e Exception) 1> >(wc -l) # print everything profile_events="$($CLICKHOUSE_CLIENT --max_block_size 1 --print-profile-events -q 'select sleep(1) from numbers(2) format Null' |& grep -c 'SelectedRows')" test "$profile_events" -gt 1 && echo OK || echo "FAIL ($profile_events)" diff --git a/tests/queries/0_stateless/02100_replaceRegexpAll_bug.reference b/tests/queries/0_stateless/02100_replaceRegexpAll_bug.reference index 993dd9b1cde..4dff9ef38ef 100644 --- a/tests/queries/0_stateless/02100_replaceRegexpAll_bug.reference +++ b/tests/queries/0_stateless/02100_replaceRegexpAll_bug.reference @@ -9,3 +9,4 @@ 1 1 1 +1 diff --git a/tests/queries/0_stateless/02100_replaceRegexpAll_bug.sql b/tests/queries/0_stateless/02100_replaceRegexpAll_bug.sql index 32f7f63f6d0..66ccb044549 100644 --- a/tests/queries/0_stateless/02100_replaceRegexpAll_bug.sql +++ b/tests/queries/0_stateless/02100_replaceRegexpAll_bug.sql @@ -12,3 +12,5 @@ SELECT '1,,' == replaceRegexpOne('1,,', '^[,]*|[,]*$', '') x; SELECT '5935,5998,6014' == trim(BOTH ', ' FROM '5935,5998,6014, ') x; SELECT '5935,5998,6014' == replaceRegexpAll('5935,5998,6014, ', concat('^[', regexpQuoteMeta(', '), ']*|[', regexpQuoteMeta(', '), ']*$'), '') AS x; + +SELECT trim(BOTH '"' FROM '2') == '2' diff --git a/tests/queries/0_stateless/02117_show_create_table_system.reference b/tests/queries/0_stateless/02117_show_create_table_system.reference index f97eaace72a..cecdd0498b1 100644 --- a/tests/queries/0_stateless/02117_show_create_table_system.reference +++ b/tests/queries/0_stateless/02117_show_create_table_system.reference @@ -36,7 +36,7 @@ CREATE TABLE system.part_moves_between_shards\n(\n `database` String,\n `t CREATE TABLE system.parts\n(\n `partition` String,\n `name` String,\n `uuid` UUID,\n `part_type` String,\n `active` UInt8,\n `marks` UInt64,\n `rows` UInt64,\n `bytes_on_disk` UInt64,\n `data_compressed_bytes` UInt64,\n `data_uncompressed_bytes` UInt64,\n `marks_bytes` UInt64,\n `secondary_indices_compressed_bytes` UInt64,\n `secondary_indices_uncompressed_bytes` UInt64,\n `secondary_indices_marks_bytes` UInt64,\n `modification_time` DateTime,\n `remove_time` DateTime,\n `refcount` UInt32,\n `min_date` Date,\n `max_date` Date,\n `min_time` DateTime,\n `max_time` DateTime,\n `partition_id` String,\n `min_block_number` Int64,\n `max_block_number` Int64,\n `level` UInt32,\n `data_version` UInt64,\n `primary_key_bytes_in_memory` UInt64,\n `primary_key_bytes_in_memory_allocated` UInt64,\n `is_frozen` UInt8,\n `database` String,\n `table` String,\n `engine` String,\n `disk_name` String,\n `path` String,\n `hash_of_all_files` String,\n `hash_of_uncompressed_files` String,\n `uncompressed_hash_of_compressed_files` String,\n `delete_ttl_info_min` DateTime,\n `delete_ttl_info_max` DateTime,\n `move_ttl_info.expression` Array(String),\n `move_ttl_info.min` Array(DateTime),\n `move_ttl_info.max` Array(DateTime),\n `default_compression_codec` String,\n `recompression_ttl_info.expression` Array(String),\n `recompression_ttl_info.min` Array(DateTime),\n `recompression_ttl_info.max` Array(DateTime),\n `group_by_ttl_info.expression` Array(String),\n `group_by_ttl_info.min` Array(DateTime),\n `group_by_ttl_info.max` Array(DateTime),\n `rows_where_ttl_info.expression` Array(String),\n `rows_where_ttl_info.min` Array(DateTime),\n `rows_where_ttl_info.max` Array(DateTime),\n `projections` Array(String),\n `bytes` UInt64,\n `marks_size` UInt64\n)\nENGINE = SystemParts()\nCOMMENT \'SYSTEM TABLE is built on the fly.\' CREATE TABLE system.parts_columns\n(\n `partition` String,\n `name` String,\n `uuid` UUID,\n `part_type` String,\n `active` UInt8,\n `marks` UInt64,\n `rows` UInt64,\n `bytes_on_disk` UInt64,\n `data_compressed_bytes` UInt64,\n `data_uncompressed_bytes` UInt64,\n `marks_bytes` UInt64,\n `modification_time` DateTime,\n `remove_time` DateTime,\n `refcount` UInt32,\n `min_date` Date,\n `max_date` Date,\n `min_time` DateTime,\n `max_time` DateTime,\n `partition_id` String,\n `min_block_number` Int64,\n `max_block_number` Int64,\n `level` UInt32,\n `data_version` UInt64,\n `primary_key_bytes_in_memory` UInt64,\n `primary_key_bytes_in_memory_allocated` UInt64,\n `database` String,\n `table` String,\n `engine` String,\n `disk_name` String,\n `path` String,\n `column` String,\n `type` String,\n `column_position` UInt64,\n `default_kind` String,\n `default_expression` String,\n `column_bytes_on_disk` UInt64,\n `column_data_compressed_bytes` UInt64,\n `column_data_uncompressed_bytes` UInt64,\n `column_marks_bytes` UInt64,\n `serialization_kind` String,\n `subcolumns.names` Array(String),\n `subcolumns.types` Array(String),\n `subcolumns.serializations` Array(String),\n `bytes` UInt64,\n `marks_size` UInt64\n)\nENGINE = SystemPartsColumns()\nCOMMENT \'SYSTEM TABLE is built on the fly.\' CREATE TABLE system.privileges\n(\n `privilege` Enum16(\'SHOW DATABASES\' = 0, \'SHOW TABLES\' = 1, \'SHOW COLUMNS\' = 2, \'SHOW DICTIONARIES\' = 3, \'SHOW\' = 4, \'SELECT\' = 5, \'INSERT\' = 6, \'ALTER UPDATE\' = 7, \'ALTER DELETE\' = 8, \'ALTER ADD COLUMN\' = 9, \'ALTER MODIFY COLUMN\' = 10, \'ALTER DROP COLUMN\' = 11, \'ALTER COMMENT COLUMN\' = 12, \'ALTER CLEAR COLUMN\' = 13, \'ALTER RENAME COLUMN\' = 14, \'ALTER MATERIALIZE COLUMN\' = 15, \'ALTER COLUMN\' = 16, \'ALTER MODIFY COMMENT\' = 17, \'ALTER ORDER BY\' = 18, \'ALTER SAMPLE BY\' = 19, \'ALTER ADD INDEX\' = 20, \'ALTER DROP INDEX\' = 21, \'ALTER MATERIALIZE INDEX\' = 22, \'ALTER CLEAR INDEX\' = 23, \'ALTER INDEX\' = 24, \'ALTER ADD PROJECTION\' = 25, \'ALTER DROP PROJECTION\' = 26, \'ALTER MATERIALIZE PROJECTION\' = 27, \'ALTER CLEAR PROJECTION\' = 28, \'ALTER PROJECTION\' = 29, \'ALTER ADD CONSTRAINT\' = 30, \'ALTER DROP CONSTRAINT\' = 31, \'ALTER CONSTRAINT\' = 32, \'ALTER TTL\' = 33, \'ALTER MATERIALIZE TTL\' = 34, \'ALTER SETTINGS\' = 35, \'ALTER MOVE PARTITION\' = 36, \'ALTER FETCH PARTITION\' = 37, \'ALTER FREEZE PARTITION\' = 38, \'ALTER DATABASE SETTINGS\' = 39, \'ALTER TABLE\' = 40, \'ALTER DATABASE\' = 41, \'ALTER VIEW REFRESH\' = 42, \'ALTER VIEW MODIFY QUERY\' = 43, \'ALTER VIEW\' = 44, \'ALTER\' = 45, \'CREATE DATABASE\' = 46, \'CREATE TABLE\' = 47, \'CREATE VIEW\' = 48, \'CREATE DICTIONARY\' = 49, \'CREATE TEMPORARY TABLE\' = 50, \'CREATE FUNCTION\' = 51, \'CREATE\' = 52, \'DROP DATABASE\' = 53, \'DROP TABLE\' = 54, \'DROP VIEW\' = 55, \'DROP DICTIONARY\' = 56, \'DROP FUNCTION\' = 57, \'DROP\' = 58, \'TRUNCATE\' = 59, \'OPTIMIZE\' = 60, \'KILL QUERY\' = 61, \'MOVE PARTITION BETWEEN SHARDS\' = 62, \'CREATE USER\' = 63, \'ALTER USER\' = 64, \'DROP USER\' = 65, \'CREATE ROLE\' = 66, \'ALTER ROLE\' = 67, \'DROP ROLE\' = 68, \'ROLE ADMIN\' = 69, \'CREATE ROW POLICY\' = 70, \'ALTER ROW POLICY\' = 71, \'DROP ROW POLICY\' = 72, \'CREATE QUOTA\' = 73, \'ALTER QUOTA\' = 74, \'DROP QUOTA\' = 75, \'CREATE SETTINGS PROFILE\' = 76, \'ALTER SETTINGS PROFILE\' = 77, \'DROP SETTINGS PROFILE\' = 78, \'SHOW USERS\' = 79, \'SHOW ROLES\' = 80, \'SHOW ROW POLICIES\' = 81, \'SHOW QUOTAS\' = 82, \'SHOW SETTINGS PROFILES\' = 83, \'SHOW ACCESS\' = 84, \'ACCESS MANAGEMENT\' = 85, \'SYSTEM SHUTDOWN\' = 86, \'SYSTEM DROP DNS CACHE\' = 87, \'SYSTEM DROP MARK CACHE\' = 88, \'SYSTEM DROP UNCOMPRESSED CACHE\' = 89, \'SYSTEM DROP MMAP CACHE\' = 90, \'SYSTEM DROP COMPILED EXPRESSION CACHE\' = 91, \'SYSTEM DROP CACHE\' = 92, \'SYSTEM RELOAD CONFIG\' = 93, \'SYSTEM RELOAD SYMBOLS\' = 94, \'SYSTEM RELOAD DICTIONARY\' = 95, \'SYSTEM RELOAD MODEL\' = 96, \'SYSTEM RELOAD FUNCTION\' = 97, \'SYSTEM RELOAD EMBEDDED DICTIONARIES\' = 98, \'SYSTEM RELOAD\' = 99, \'SYSTEM RESTART DISK\' = 100, \'SYSTEM MERGES\' = 101, \'SYSTEM TTL MERGES\' = 102, \'SYSTEM FETCHES\' = 103, \'SYSTEM MOVES\' = 104, \'SYSTEM DISTRIBUTED SENDS\' = 105, \'SYSTEM REPLICATED SENDS\' = 106, \'SYSTEM SENDS\' = 107, \'SYSTEM REPLICATION QUEUES\' = 108, \'SYSTEM DROP REPLICA\' = 109, \'SYSTEM SYNC REPLICA\' = 110, \'SYSTEM RESTART REPLICA\' = 111, \'SYSTEM RESTORE REPLICA\' = 112, \'SYSTEM FLUSH DISTRIBUTED\' = 113, \'SYSTEM FLUSH LOGS\' = 114, \'SYSTEM FLUSH\' = 115, \'SYSTEM THREAD FUZZER\' = 116, \'SYSTEM\' = 117, \'dictGet\' = 118, \'addressToLine\' = 119, \'addressToLineWithInlines\' = 120, \'addressToSymbol\' = 121, \'demangle\' = 122, \'INTROSPECTION\' = 123, \'FILE\' = 124, \'URL\' = 125, \'REMOTE\' = 126, \'MONGO\' = 127, \'MYSQL\' = 128, \'POSTGRES\' = 129, \'SQLITE\' = 130, \'ODBC\' = 131, \'JDBC\' = 132, \'HDFS\' = 133, \'S3\' = 134, \'SOURCES\' = 135, \'ALL\' = 136, \'NONE\' = 137),\n `aliases` Array(String),\n `level` Nullable(Enum8(\'GLOBAL\' = 0, \'DATABASE\' = 1, \'TABLE\' = 2, \'DICTIONARY\' = 3, \'VIEW\' = 4, \'COLUMN\' = 5)),\n `parent_group` Nullable(Enum16(\'SHOW DATABASES\' = 0, \'SHOW TABLES\' = 1, \'SHOW COLUMNS\' = 2, \'SHOW DICTIONARIES\' = 3, \'SHOW\' = 4, \'SELECT\' = 5, \'INSERT\' = 6, \'ALTER UPDATE\' = 7, \'ALTER DELETE\' = 8, \'ALTER ADD COLUMN\' = 9, \'ALTER MODIFY COLUMN\' = 10, \'ALTER DROP COLUMN\' = 11, \'ALTER COMMENT COLUMN\' = 12, \'ALTER CLEAR COLUMN\' = 13, \'ALTER RENAME COLUMN\' = 14, \'ALTER MATERIALIZE COLUMN\' = 15, \'ALTER COLUMN\' = 16, \'ALTER MODIFY COMMENT\' = 17, \'ALTER ORDER BY\' = 18, \'ALTER SAMPLE BY\' = 19, \'ALTER ADD INDEX\' = 20, \'ALTER DROP INDEX\' = 21, \'ALTER MATERIALIZE INDEX\' = 22, \'ALTER CLEAR INDEX\' = 23, \'ALTER INDEX\' = 24, \'ALTER ADD PROJECTION\' = 25, \'ALTER DROP PROJECTION\' = 26, \'ALTER MATERIALIZE PROJECTION\' = 27, \'ALTER CLEAR PROJECTION\' = 28, \'ALTER PROJECTION\' = 29, \'ALTER ADD CONSTRAINT\' = 30, \'ALTER DROP CONSTRAINT\' = 31, \'ALTER CONSTRAINT\' = 32, \'ALTER TTL\' = 33, \'ALTER MATERIALIZE TTL\' = 34, \'ALTER SETTINGS\' = 35, \'ALTER MOVE PARTITION\' = 36, \'ALTER FETCH PARTITION\' = 37, \'ALTER FREEZE PARTITION\' = 38, \'ALTER DATABASE SETTINGS\' = 39, \'ALTER TABLE\' = 40, \'ALTER DATABASE\' = 41, \'ALTER VIEW REFRESH\' = 42, \'ALTER VIEW MODIFY QUERY\' = 43, \'ALTER VIEW\' = 44, \'ALTER\' = 45, \'CREATE DATABASE\' = 46, \'CREATE TABLE\' = 47, \'CREATE VIEW\' = 48, \'CREATE DICTIONARY\' = 49, \'CREATE TEMPORARY TABLE\' = 50, \'CREATE FUNCTION\' = 51, \'CREATE\' = 52, \'DROP DATABASE\' = 53, \'DROP TABLE\' = 54, \'DROP VIEW\' = 55, \'DROP DICTIONARY\' = 56, \'DROP FUNCTION\' = 57, \'DROP\' = 58, \'TRUNCATE\' = 59, \'OPTIMIZE\' = 60, \'KILL QUERY\' = 61, \'MOVE PARTITION BETWEEN SHARDS\' = 62, \'CREATE USER\' = 63, \'ALTER USER\' = 64, \'DROP USER\' = 65, \'CREATE ROLE\' = 66, \'ALTER ROLE\' = 67, \'DROP ROLE\' = 68, \'ROLE ADMIN\' = 69, \'CREATE ROW POLICY\' = 70, \'ALTER ROW POLICY\' = 71, \'DROP ROW POLICY\' = 72, \'CREATE QUOTA\' = 73, \'ALTER QUOTA\' = 74, \'DROP QUOTA\' = 75, \'CREATE SETTINGS PROFILE\' = 76, \'ALTER SETTINGS PROFILE\' = 77, \'DROP SETTINGS PROFILE\' = 78, \'SHOW USERS\' = 79, \'SHOW ROLES\' = 80, \'SHOW ROW POLICIES\' = 81, \'SHOW QUOTAS\' = 82, \'SHOW SETTINGS PROFILES\' = 83, \'SHOW ACCESS\' = 84, \'ACCESS MANAGEMENT\' = 85, \'SYSTEM SHUTDOWN\' = 86, \'SYSTEM DROP DNS CACHE\' = 87, \'SYSTEM DROP MARK CACHE\' = 88, \'SYSTEM DROP UNCOMPRESSED CACHE\' = 89, \'SYSTEM DROP MMAP CACHE\' = 90, \'SYSTEM DROP COMPILED EXPRESSION CACHE\' = 91, \'SYSTEM DROP CACHE\' = 92, \'SYSTEM RELOAD CONFIG\' = 93, \'SYSTEM RELOAD SYMBOLS\' = 94, \'SYSTEM RELOAD DICTIONARY\' = 95, \'SYSTEM RELOAD MODEL\' = 96, \'SYSTEM RELOAD FUNCTION\' = 97, \'SYSTEM RELOAD EMBEDDED DICTIONARIES\' = 98, \'SYSTEM RELOAD\' = 99, \'SYSTEM RESTART DISK\' = 100, \'SYSTEM MERGES\' = 101, \'SYSTEM TTL MERGES\' = 102, \'SYSTEM FETCHES\' = 103, \'SYSTEM MOVES\' = 104, \'SYSTEM DISTRIBUTED SENDS\' = 105, \'SYSTEM REPLICATED SENDS\' = 106, \'SYSTEM SENDS\' = 107, \'SYSTEM REPLICATION QUEUES\' = 108, \'SYSTEM DROP REPLICA\' = 109, \'SYSTEM SYNC REPLICA\' = 110, \'SYSTEM RESTART REPLICA\' = 111, \'SYSTEM RESTORE REPLICA\' = 112, \'SYSTEM FLUSH DISTRIBUTED\' = 113, \'SYSTEM FLUSH LOGS\' = 114, \'SYSTEM FLUSH\' = 115, \'SYSTEM THREAD FUZZER\' = 116, \'SYSTEM\' = 117, \'dictGet\' = 118, \'addressToLine\' = 119, \'addressToLineWithInlines\' = 120, \'addressToSymbol\' = 121, \'demangle\' = 122, \'INTROSPECTION\' = 123, \'FILE\' = 124, \'URL\' = 125, \'REMOTE\' = 126, \'MONGO\' = 127, \'MYSQL\' = 128, \'POSTGRES\' = 129, \'SQLITE\' = 130, \'ODBC\' = 131, \'JDBC\' = 132, \'HDFS\' = 133, \'S3\' = 134, \'SOURCES\' = 135, \'ALL\' = 136, \'NONE\' = 137))\n)\nENGINE = SystemPrivileges()\nCOMMENT \'SYSTEM TABLE is built on the fly.\' -CREATE TABLE system.processes\n(\n `is_initial_query` UInt8,\n `user` String,\n `query_id` String,\n `address` IPv6,\n `port` UInt16,\n `initial_user` String,\n `initial_query_id` String,\n `initial_address` IPv6,\n `initial_port` UInt16,\n `interface` UInt8,\n `os_user` String,\n `client_hostname` String,\n `client_name` String,\n `client_revision` UInt64,\n `client_version_major` UInt64,\n `client_version_minor` UInt64,\n `client_version_patch` UInt64,\n `http_method` UInt8,\n `http_user_agent` String,\n `http_referer` String,\n `forwarded_for` String,\n `quota_key` String,\n `elapsed` Float64,\n `is_cancelled` UInt8,\n `read_rows` UInt64,\n `read_bytes` UInt64,\n `total_rows_approx` UInt64,\n `written_rows` UInt64,\n `written_bytes` UInt64,\n `memory_usage` Int64,\n `peak_memory_usage` Int64,\n `query` String,\n `thread_ids` Array(UInt64),\n `ProfileEvents` Map(String, UInt64),\n `Settings` Map(String, String),\n `current_database` String,\n `ProfileEvents.Names` Array(String),\n `ProfileEvents.Values` Array(UInt64),\n `Settings.Names` Array(String),\n `Settings.Values` Array(String)\n)\nENGINE = SystemProcesses()\nCOMMENT \'SYSTEM TABLE is built on the fly.\' +CREATE TABLE system.processes\n(\n `is_initial_query` UInt8,\n `user` String,\n `query_id` String,\n `address` IPv6,\n `port` UInt16,\n `initial_user` String,\n `initial_query_id` String,\n `initial_address` IPv6,\n `initial_port` UInt16,\n `interface` UInt8,\n `os_user` String,\n `client_hostname` String,\n `client_name` String,\n `client_revision` UInt64,\n `client_version_major` UInt64,\n `client_version_minor` UInt64,\n `client_version_patch` UInt64,\n `http_method` UInt8,\n `http_user_agent` String,\n `http_referer` String,\n `forwarded_for` String,\n `quota_key` String,\n `distributed_depth` UInt64,\n `elapsed` Float64,\n `is_cancelled` UInt8,\n `read_rows` UInt64,\n `read_bytes` UInt64,\n `total_rows_approx` UInt64,\n `written_rows` UInt64,\n `written_bytes` UInt64,\n `memory_usage` Int64,\n `peak_memory_usage` Int64,\n `query` String,\n `thread_ids` Array(UInt64),\n `ProfileEvents` Map(String, UInt64),\n `Settings` Map(String, String),\n `current_database` String,\n `ProfileEvents.Names` Array(String),\n `ProfileEvents.Values` Array(UInt64),\n `Settings.Names` Array(String),\n `Settings.Values` Array(String)\n)\nENGINE = SystemProcesses()\nCOMMENT \'SYSTEM TABLE is built on the fly.\' CREATE TABLE system.projection_parts\n(\n `partition` String,\n `name` String,\n `part_type` String,\n `parent_name` String,\n `parent_uuid` UUID,\n `parent_part_type` String,\n `active` UInt8,\n `marks` UInt64,\n `rows` UInt64,\n `bytes_on_disk` UInt64,\n `data_compressed_bytes` UInt64,\n `data_uncompressed_bytes` UInt64,\n `marks_bytes` UInt64,\n `parent_marks` UInt64,\n `parent_rows` UInt64,\n `parent_bytes_on_disk` UInt64,\n `parent_data_compressed_bytes` UInt64,\n `parent_data_uncompressed_bytes` UInt64,\n `parent_marks_bytes` UInt64,\n `modification_time` DateTime,\n `remove_time` DateTime,\n `refcount` UInt32,\n `min_date` Date,\n `max_date` Date,\n `min_time` DateTime,\n `max_time` DateTime,\n `partition_id` String,\n `min_block_number` Int64,\n `max_block_number` Int64,\n `level` UInt32,\n `data_version` UInt64,\n `primary_key_bytes_in_memory` UInt64,\n `primary_key_bytes_in_memory_allocated` UInt64,\n `is_frozen` UInt8,\n `database` String,\n `table` String,\n `engine` String,\n `disk_name` String,\n `path` String,\n `hash_of_all_files` String,\n `hash_of_uncompressed_files` String,\n `uncompressed_hash_of_compressed_files` String,\n `delete_ttl_info_min` DateTime,\n `delete_ttl_info_max` DateTime,\n `move_ttl_info.expression` Array(String),\n `move_ttl_info.min` Array(DateTime),\n `move_ttl_info.max` Array(DateTime),\n `default_compression_codec` String,\n `recompression_ttl_info.expression` Array(String),\n `recompression_ttl_info.min` Array(DateTime),\n `recompression_ttl_info.max` Array(DateTime),\n `group_by_ttl_info.expression` Array(String),\n `group_by_ttl_info.min` Array(DateTime),\n `group_by_ttl_info.max` Array(DateTime),\n `rows_where_ttl_info.expression` Array(String),\n `rows_where_ttl_info.min` Array(DateTime),\n `rows_where_ttl_info.max` Array(DateTime),\n `bytes` UInt64,\n `marks_size` UInt64\n)\nENGINE = SystemProjectionParts()\nCOMMENT \'SYSTEM TABLE is built on the fly.\' CREATE TABLE system.projection_parts_columns\n(\n `partition` String,\n `name` String,\n `part_type` String,\n `parent_name` String,\n `parent_uuid` UUID,\n `parent_part_type` String,\n `active` UInt8,\n `marks` UInt64,\n `rows` UInt64,\n `bytes_on_disk` UInt64,\n `data_compressed_bytes` UInt64,\n `data_uncompressed_bytes` UInt64,\n `marks_bytes` UInt64,\n `parent_marks` UInt64,\n `parent_rows` UInt64,\n `parent_bytes_on_disk` UInt64,\n `parent_data_compressed_bytes` UInt64,\n `parent_data_uncompressed_bytes` UInt64,\n `parent_marks_bytes` UInt64,\n `modification_time` DateTime,\n `remove_time` DateTime,\n `refcount` UInt32,\n `min_date` Date,\n `max_date` Date,\n `min_time` DateTime,\n `max_time` DateTime,\n `partition_id` String,\n `min_block_number` Int64,\n `max_block_number` Int64,\n `level` UInt32,\n `data_version` UInt64,\n `primary_key_bytes_in_memory` UInt64,\n `primary_key_bytes_in_memory_allocated` UInt64,\n `database` String,\n `table` String,\n `engine` String,\n `disk_name` String,\n `path` String,\n `column` String,\n `type` String,\n `column_position` UInt64,\n `default_kind` String,\n `default_expression` String,\n `column_bytes_on_disk` UInt64,\n `column_data_compressed_bytes` UInt64,\n `column_data_uncompressed_bytes` UInt64,\n `column_marks_bytes` UInt64,\n `bytes` UInt64,\n `marks_size` UInt64\n)\nENGINE = SystemProjectionPartsColumns()\nCOMMENT \'SYSTEM TABLE is built on the fly.\' CREATE TABLE system.quota_limits\n(\n `quota_name` String,\n `duration` UInt32,\n `is_randomized_interval` UInt8,\n `max_queries` Nullable(UInt64),\n `max_query_selects` Nullable(UInt64),\n `max_query_inserts` Nullable(UInt64),\n `max_errors` Nullable(UInt64),\n `max_result_rows` Nullable(UInt64),\n `max_result_bytes` Nullable(UInt64),\n `max_read_rows` Nullable(UInt64),\n `max_read_bytes` Nullable(UInt64),\n `max_execution_time` Nullable(Float64)\n)\nENGINE = SystemQuotaLimits()\nCOMMENT \'SYSTEM TABLE is built on the fly.\' diff --git a/tests/queries/0_stateless/02131_multiply_row_policies_on_same_column.sql b/tests/queries/0_stateless/02131_multiply_row_policies_on_same_column.sql index 75f7f737e85..d0a55c6ba65 100644 --- a/tests/queries/0_stateless/02131_multiply_row_policies_on_same_column.sql +++ b/tests/queries/0_stateless/02131_multiply_row_policies_on_same_column.sql @@ -1,3 +1,5 @@ +SET optimize_move_to_prewhere = 1; + DROP TABLE IF EXISTS 02131_multiply_row_policies_on_same_column; CREATE TABLE 02131_multiply_row_policies_on_same_column (x UInt8) ENGINE = MergeTree ORDER BY x; INSERT INTO 02131_multiply_row_policies_on_same_column VALUES (1), (2), (3), (4); diff --git a/tests/queries/0_stateless/02136_scalar_progress.sh b/tests/queries/0_stateless/02136_scalar_progress.sh index 4608031f83d..9f4429b0caa 100755 --- a/tests/queries/0_stateless/02136_scalar_progress.sh +++ b/tests/queries/0_stateless/02136_scalar_progress.sh @@ -4,4 +4,4 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CURDIR"/../shell_config.sh -$CLICKHOUSE_CURL -sS "${CLICKHOUSE_URL}&wait_end_of_query=1&send_progress_in_http_headers=1&http_headers_progress_interval_ms=0" -d "SELECT (SELECT max(number), count(number) FROM numbers(100000));" -v 2>&1 | grep -E "X-ClickHouse-Summary|X-ClickHouse-Progress" +$CLICKHOUSE_CURL -sS "${CLICKHOUSE_URL}&wait_end_of_query=1&send_progress_in_http_headers=1&http_headers_progress_interval_ms=0" -d "SELECT (SELECT max(number), count(number) FROM numbers(100000) settings max_block_size=65505);" -v 2>&1 | grep -E "X-ClickHouse-Summary|X-ClickHouse-Progress" diff --git a/tests/queries/0_stateless/02136_scalar_read_rows_json.sh b/tests/queries/0_stateless/02136_scalar_read_rows_json.sh index d589cb60086..34b4b6909b5 100755 --- a/tests/queries/0_stateless/02136_scalar_read_rows_json.sh +++ b/tests/queries/0_stateless/02136_scalar_read_rows_json.sh @@ -7,4 +7,4 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) echo "#1" ${CLICKHOUSE_CLIENT} --query='SELECT count() FROM numbers(100) FORMAT JSON;' | grep -a -v "elapsed" echo "#2" -${CLICKHOUSE_CLIENT} --query='SELECT (SELECT max(number), count(number) FROM numbers(100000) as n) FORMAT JSON;' | grep -a -v "elapsed" | grep -v "_subquery" +${CLICKHOUSE_CLIENT} --query='SELECT (SELECT max(number), count(number) FROM numbers(100000) as n) SETTINGS max_block_size = 65505 FORMAT JSON;' | grep -a -v "elapsed" | grep -v "_subquery" diff --git a/tests/queries/0_stateless/02151_replace_regexp_all_empty_match_alternative.reference b/tests/queries/0_stateless/02151_replace_regexp_all_empty_match_alternative.reference index e8183f05f5d..da7b788b157 100644 --- a/tests/queries/0_stateless/02151_replace_regexp_all_empty_match_alternative.reference +++ b/tests/queries/0_stateless/02151_replace_regexp_all_empty_match_alternative.reference @@ -1,3 +1,18 @@ 1 1 1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 diff --git a/tests/queries/0_stateless/02151_replace_regexp_all_empty_match_alternative.sql b/tests/queries/0_stateless/02151_replace_regexp_all_empty_match_alternative.sql index 6725fa04114..ebbc6ce97e0 100644 --- a/tests/queries/0_stateless/02151_replace_regexp_all_empty_match_alternative.sql +++ b/tests/queries/0_stateless/02151_replace_regexp_all_empty_match_alternative.sql @@ -1,3 +1,21 @@ -select replaceRegexpAll(',,1,,', '^[,]*|[,]*$', '') x; -select replaceRegexpAll(',,1', '^[,]*|[,]*$', '') x; -select replaceRegexpAll('1,,', '^[,]*|[,]*$', '') x; +SELECT replaceRegexpAll(',,1,,', '^[,]*|[,]*$', ''); +SELECT replaceRegexpAll(',,1', '^[,]*|[,]*$', ''); +SELECT replaceRegexpAll('1,,', '^[,]*|[,]*$', ''); + +SELECT replaceRegexpAll(materialize(',,1,,'), '^[,]*|[,]*$', ''); +SELECT replaceRegexpAll(materialize(',,1'), '^[,]*|[,]*$', ''); +SELECT replaceRegexpAll(materialize('1,,'), '^[,]*|[,]*$', ''); + +SELECT replaceRegexpAll('a', 'z*', '') == 'a'; +SELECT replaceRegexpAll('aa', 'z*', '') == 'aa'; +SELECT replaceRegexpAll('aaq', 'z*', '') == 'aaq'; +SELECT replaceRegexpAll('aazq', 'z*', '') == 'aaq'; +SELECT replaceRegexpAll('aazzq', 'z*', '') == 'aaq'; +SELECT replaceRegexpAll('aazzqa', 'z*', '') == 'aaqa'; + +SELECT replaceRegexpAll(materialize('a'), 'z*', '') == 'a'; +SELECT replaceRegexpAll(materialize('aa'), 'z*', '') == 'aa'; +SELECT replaceRegexpAll(materialize('aaq'), 'z*', '') == 'aaq'; +SELECT replaceRegexpAll(materialize('aazq'), 'z*', '') == 'aaq'; +SELECT replaceRegexpAll(materialize('aazzq'), 'z*', '') == 'aaq'; +SELECT replaceRegexpAll(materialize('aazzqa'), 'z*', '') == 'aaqa'; diff --git a/tests/queries/0_stateless/02152_http_external_tables_memory_tracking.sh b/tests/queries/0_stateless/02152_http_external_tables_memory_tracking.sh index 2801ec16a43..44de0e15370 100755 --- a/tests/queries/0_stateless/02152_http_external_tables_memory_tracking.sh +++ b/tests/queries/0_stateless/02152_http_external_tables_memory_tracking.sh @@ -1,7 +1,7 @@ #!/usr/bin/env bash -# Tags: no-tsan -# ^^^^^^^ +# Tags: no-tsan, no-cpu-aarch64 # TSan does not supports tracing. +# trace_log doesn't work on aarch64 # Regression for proper release of Context, # via tracking memory of external tables. diff --git a/tests/queries/0_stateless/02156_storage_merge_prewhere.sql b/tests/queries/0_stateless/02156_storage_merge_prewhere.sql index 69fa9ac5ee2..b75d3fa22e5 100644 --- a/tests/queries/0_stateless/02156_storage_merge_prewhere.sql +++ b/tests/queries/0_stateless/02156_storage_merge_prewhere.sql @@ -1,3 +1,5 @@ +SET optimize_move_to_prewhere = 1; + DROP TABLE IF EXISTS t_02156_mt1; DROP TABLE IF EXISTS t_02156_mt2; DROP TABLE IF EXISTS t_02156_log; diff --git a/tests/queries/0_stateless/02161_addressToLineWithInlines.sql b/tests/queries/0_stateless/02161_addressToLineWithInlines.sql index baddea30ae3..12cae6af189 100644 --- a/tests/queries/0_stateless/02161_addressToLineWithInlines.sql +++ b/tests/queries/0_stateless/02161_addressToLineWithInlines.sql @@ -1,4 +1,4 @@ --- Tags: no-tsan, no-asan, no-ubsan, no-msan, no-debug +-- Tags: no-tsan, no-asan, no-ubsan, no-msan, no-debug, no-cpu-aarch64 SELECT addressToLineWithInlines(1); -- { serverError 446 } diff --git a/tests/queries/0_stateless/02169_map_functions.reference b/tests/queries/0_stateless/02169_map_functions.reference new file mode 100644 index 00000000000..160aebbc852 --- /dev/null +++ b/tests/queries/0_stateless/02169_map_functions.reference @@ -0,0 +1,33 @@ +{} +{} +{} +{'key3':103} +{} +{} +{} +{'key3':100,'key2':101,'key4':102} {'key4':102} +{'key3':101,'key2':102,'key4':103} {'key2':102,'key4':103} +{'key3':102,'key2':103,'key4':104} {'key3':102,'key2':103,'key4':104} +{'key3':103,'key2':104,'key4':105} {'key3':103,'key2':104,'key4':105} +{'key1':1111,'key2':2222} {'key2':2222} +{'key1':1112,'key2':2224} {'key1':1112,'key2':2224} +{'key1':1113,'key2':2226} {'key1':1113,'key2':2226} +{'key3':101,'key2':102,'key4':103} +{'key3':102,'key2':103,'key4':104} +{'key3':103,'key2':104,'key4':105} +{'key3':104,'key2':105,'key4':106} +{'key1':1112,'key2':2223} +{'key1':1113,'key2':2225} +{'key1':1114,'key2':2227} +{} +{} +{} +{} +{} +{} +{} +{3:2,1:0,2:0} +{1:2,2:3} +{1:2,2:3} +{'x':'y','x':'y'} +{'x':'y','x':'y'} diff --git a/tests/queries/0_stateless/02169_map_functions.sql b/tests/queries/0_stateless/02169_map_functions.sql new file mode 100644 index 00000000000..4cccaa56722 --- /dev/null +++ b/tests/queries/0_stateless/02169_map_functions.sql @@ -0,0 +1,39 @@ +DROP TABLE IF EXISTS table_map; +CREATE TABLE table_map (id UInt32, col Map(String, UInt64)) engine = MergeTree() ORDER BY tuple(); +INSERT INTO table_map SELECT number, map('key1', number, 'key2', number * 2) FROM numbers(1111, 3); +INSERT INTO table_map SELECT number, map('key3', number, 'key2', number + 1, 'key4', number + 2) FROM numbers(100, 4); + +SELECT mapFilter((k, v) -> k like '%3' and v > 102, col) FROM table_map ORDER BY id; +SELECT col, mapFilter((k, v) -> ((v % 10) > 1), col) FROM table_map ORDER BY id ASC; +SELECT mapApply((k, v) -> (k, v + 1), col) FROM table_map ORDER BY id; +SELECT mapFilter((k, v) -> 0, col) from table_map; +SELECT mapApply((k, v) -> tuple(v + 9223372036854775806), col) FROM table_map; -- { serverError NUMBER_OF_ARGUMENTS_DOESNT_MATCH } + +SELECT mapUpdate(map(1, 3, 3, 2), map(1, 0, 2, 0)); +SELECT mapApply((x, y) -> (x, x + 1), map(1, 0, 2, 0)); +SELECT mapApply((x, y) -> (x, x + 1), materialize(map(1, 0, 2, 0))); +SELECT mapApply((x, y) -> ('x', 'y'), map(1, 0, 2, 0)); +SELECT mapApply((x, y) -> ('x', 'y'), materialize(map(1, 0, 2, 0))); + +SELECT mapApply(); -- { serverError NUMBER_OF_ARGUMENTS_DOESNT_MATCH } +SELECT mapApply((x, y) -> (x), map(1, 0, 2, 0)); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } +SELECT mapApply((x, y) -> ('x'), map(1, 0, 2, 0)); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } +SELECT mapApply((x) -> (x, x), map(1, 0, 2, 0)); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } +SELECT mapApply((x, y) -> (x, 1, 2), map(1, 0, 2, 0)); -- { serverError NUMBER_OF_ARGUMENTS_DOESNT_MATCH } +SELECT mapApply((x, y) -> (x, x + 1)); -- { serverError NUMBER_OF_ARGUMENTS_DOESNT_MATCH } +SELECT mapApply(map(1, 0, 2, 0), (x, y) -> (x, x + 1)); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } +SELECT mapApply((x, y) -> (x, x+1), map(1, 0, 2, 0), map(1, 0, 2, 0)); -- { serverError NUMBER_OF_ARGUMENTS_DOESNT_MATCH } + +SELECT mapFilter(); -- { serverError NUMBER_OF_ARGUMENTS_DOESNT_MATCH } +SELECT mapFilter((x, y) -> (toInt32(x)), map(1, 0, 2, 0)); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } +SELECT mapFilter((x, y) -> ('x'), map(1, 0, 2, 0)); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } +SELECT mapFilter((x) -> (x, x), map(1, 0, 2, 0)); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } +SELECT mapFilter((x, y) -> (x, 1, 2), map(1, 0, 2, 0)); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } +SELECT mapFilter((x, y) -> (x, x + 1)); -- { serverError NUMBER_OF_ARGUMENTS_DOESNT_MATCH } +SELECT mapFilter(map(1, 0, 2, 0), (x, y) -> (x > 0)); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } +SELECT mapFilter((x, y) -> (x, x + 1), map(1, 0, 2, 0), map(1, 0, 2, 0)); -- { serverError NUMBER_OF_ARGUMENTS_DOESNT_MATCH } + +SELECT mapUpdate(); -- { serverError NUMBER_OF_ARGUMENTS_DOESNT_MATCH } +SELECT mapUpdate(map(1, 3, 3, 2), map(1, 0, 2, 0), map(1, 0, 2, 0)); -- { serverError NUMBER_OF_ARGUMENTS_DOESNT_MATCH } + +DROP TABLE table_map; diff --git a/tests/queries/0_stateless/02207_allow_plaintext_and_no_password.config.xml b/tests/queries/0_stateless/02207_allow_plaintext_and_no_password.config.xml new file mode 100644 index 00000000000..891fb45e4ba --- /dev/null +++ b/tests/queries/0_stateless/02207_allow_plaintext_and_no_password.config.xml @@ -0,0 +1,24 @@ + + + + trace + true + + + 9000 + 0 + 0 + . + 0 + + + + + users.xml + + + + ./ + + + diff --git a/tests/queries/0_stateless/02207_allow_plaintext_and_no_password.reference b/tests/queries/0_stateless/02207_allow_plaintext_and_no_password.reference new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/queries/0_stateless/02207_allow_plaintext_and_no_password.sh b/tests/queries/0_stateless/02207_allow_plaintext_and_no_password.sh new file mode 100755 index 00000000000..693f1d817e3 --- /dev/null +++ b/tests/queries/0_stateless/02207_allow_plaintext_and_no_password.sh @@ -0,0 +1,94 @@ +#!/usr/bin/env bash +# Tags: no-tsan, no-asan, no-ubsan, no-msan, no-parallel, no-fasttest +# Tag no-tsan: requires jemalloc to track small allocations +# Tag no-asan: requires jemalloc to track small allocations +# Tag no-ubsan: requires jemalloc to track small allocations +# Tag no-msan: requires jemalloc to track small allocations + + + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + +cp /etc/clickhouse-server/users.xml "$CURDIR"/users.xml +sed -i 's/<\/password>/c64c5e4e53ea1a9f1427d2713b3a22bbebe8940bc807adaf654744b1568c70ab<\/password_sha256_hex>/g' "$CURDIR"/users.xml + sed -i 's//1<\/access_management>/g' "$CURDIR"/users.xml + +server_opts=( + "--config-file=$CURDIR/$(basename "${BASH_SOURCE[0]}" .sh).config.xml" + "--" + # to avoid multiple listen sockets (complexity for port discovering) + "--listen_host=127.1" + # we will discover the real port later. + "--tcp_port=0" + "--shutdown_wait_unfinished=0" +) + +CLICKHOUSE_WATCHDOG_ENABLE=0 $CLICKHOUSE_SERVER_BINARY "${server_opts[@]}" &> clickhouse-server.stderr & +server_pid=$! + +server_port= +i=0 retries=300 +# wait until server will start to listen (max 30 seconds) +while [[ -z $server_port ]] && [[ $i -lt $retries ]]; do + server_port=$(lsof -n -a -P -i tcp -s tcp:LISTEN -p $server_pid 2>/dev/null | awk -F'[ :]' '/LISTEN/ { print $(NF-1) }') + ((++i)) + sleep 0.1 + if ! kill -0 $server_pid >& /dev/null; then + echo "No server (pid $server_pid)" + break + fi +done +if [[ -z $server_port ]]; then + echo "Cannot wait for LISTEN socket" >&2 + exit 1 +fi + +# wait for the server to start accepting tcp connections (max 30 seconds) +i=0 retries=300 +while ! $CLICKHOUSE_CLIENT_BINARY -u default --password='1w2swhb1' --host 127.1 --port "$server_port" --format Null -q 'select 1' 2>/dev/null && [[ $i -lt $retries ]]; do + sleep 0.1 + if ! kill -0 $server_pid >& /dev/null; then + echo "No server (pid $server_pid)" + break + fi +done + + +if ! $CLICKHOUSE_CLIENT_BINARY -u default --password='1w2swhb1' --host 127.1 --port "$server_port" --format Null -q 'select 1'; then + echo "Cannot wait until server will start accepting connections on " >&2 + exit 1 +fi + +$CLICKHOUSE_CLIENT_BINARY -u default --password='1w2swhb1' --host 127.1 --port "$server_port" -q " DROP USER IF EXISTS u_02207, u1_02207"; + +$CLICKHOUSE_CLIENT_BINARY -u default --password='1w2swhb1' --host 127.1 --port "$server_port" -q "CREATE USER u_02207 IDENTIFIED WITH double_sha1_hash BY '8DCDD69CE7D121DE8013062AEAEB2A148910D50E' +" + +$CLICKHOUSE_CLIENT_BINARY -u default --password='1w2swhb1' --host 127.1 --port "$server_port" -q " CREATE USER u1_02207 IDENTIFIED BY 'qwe123'"; + +$CLICKHOUSE_CLIENT_BINARY -u default --password='1w2swhb1' --host 127.1 --port "$server_port" -q "CREATE USER u2_02207 HOST IP '127.1' IDENTIFIED WITH plaintext_password BY 'qwerty' " " -- { serverError 516 } --" &> /dev/null ; + +$CLICKHOUSE_CLIENT_BINARY -u default --password='1w2swhb1' --host 127.1 --port "$server_port" -q "CREATE USER u3_02207 HOST IP '127.1' IDENTIFIED WITH no_password " " -- { serverError 516 } --" &> /dev/null ; + +$CLICKHOUSE_CLIENT_BINARY -u default --password='1w2swhb1' --host 127.1 --port "$server_port" -q "CREATE USER u4_02207 HOST IP '127.1' NOT IDENTIFIED " " -- { serverError 516 } --" &> /dev/null ; + +$CLICKHOUSE_CLIENT_BINARY -u default --password='1w2swhb1' --host 127.1 --port "$server_port" -q "CREATE USER IF NOT EXISTS u5_02207 " " -- { serverError 516 } --" &> /dev/null ; + +$CLICKHOUSE_CLIENT_BINARY -u default --password='1w2swhb1' --host 127.1 --port "$server_port" -q " DROP USER u_02207, u1_02207"; + + +# no sleep, since flushing to stderr should not be buffered. + grep 'User is not allowed to Create users' clickhouse-server.stderr + + +# send TERM and save the error code to ensure that it is 0 (EXIT_SUCCESS) +kill $server_pid +wait $server_pid +return_code=$? + +rm -f clickhouse-server.stderr +rm -f "$CURDIR"/users.xml + +exit $return_code diff --git a/tests/queries/0_stateless/02207_s3_content_type.reference b/tests/queries/0_stateless/02207_s3_content_type.reference index 2b0a5bcadc2..b015e4a148c 100644 --- a/tests/queries/0_stateless/02207_s3_content_type.reference +++ b/tests/queries/0_stateless/02207_s3_content_type.reference @@ -1,2 +1,2 @@ -ContentLength:2144451 +ContentLength:6888890 ContentType:binary/octet-stream diff --git a/tests/queries/0_stateless/02207_s3_content_type.sh b/tests/queries/0_stateless/02207_s3_content_type.sh index 5ede30e867c..ca75b36c688 100755 --- a/tests/queries/0_stateless/02207_s3_content_type.sh +++ b/tests/queries/0_stateless/02207_s3_content_type.sh @@ -7,7 +7,7 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) . "$CURDIR"/../shell_config.sh $CLICKHOUSE_CLIENT --query " -INSERT INTO TABLE FUNCTION s3('http://localhost:11111/test/content-type.csv.gz', 'test', 'testtest', 'CSV', 'number UInt64') SELECT number FROM numbers(1000000) SETTINGS s3_max_single_part_upload_size = 10000, s3_truncate_on_insert = 1; +INSERT INTO TABLE FUNCTION s3('http://localhost:11111/test/content-type.csv', 'test', 'testtest', 'CSV', 'number UInt64') SELECT number FROM numbers(1000000) SETTINGS s3_max_single_part_upload_size = 10000, s3_truncate_on_insert = 1; " -aws --endpoint-url http://localhost:11111 s3api head-object --bucket test --key content-type.csv.gz | grep Content | sed 's/[ \t,"]*//g' +aws --endpoint-url http://localhost:11111 s3api head-object --bucket test --key content-type.csv | grep Content | sed 's/[ \t,"]*//g' diff --git a/tests/queries/0_stateless/02221_system_zookeeper_unrestricted.reference b/tests/queries/0_stateless/02221_system_zookeeper_unrestricted.reference new file mode 100644 index 00000000000..bd0c9cee464 --- /dev/null +++ b/tests/queries/0_stateless/02221_system_zookeeper_unrestricted.reference @@ -0,0 +1,74 @@ +1 +1 +alter_partition_version +alter_partition_version +block_numbers +block_numbers +blocks +blocks +columns +columns +columns +columns +failed_parts +failed_parts +flags +flags +host +host +is_active +is_active +is_lost +is_lost +last_part +last_part +leader_election +leader_election +leader_election-0 +leader_election-0 +log +log +log_pointer +log_pointer +max_processed_insert_time +max_processed_insert_time +metadata +metadata +metadata +metadata +metadata_version +metadata_version +min_unprocessed_insert_time +min_unprocessed_insert_time +mutation_pointer +mutation_pointer +mutations +mutations +nonincrement_block_numbers +nonincrement_block_numbers +parallel +parallel +part_moves_shard +part_moves_shard +parts +parts +pinned_part_uuids +pinned_part_uuids +queue +queue +quorum +quorum +replicas +replicas +shared +shared +shared +shared +table_shared_id +table_shared_id +temp +temp +zero_copy_hdfs +zero_copy_hdfs +zero_copy_s3 +zero_copy_s3 diff --git a/tests/queries/0_stateless/02221_system_zookeeper_unrestricted.sh b/tests/queries/0_stateless/02221_system_zookeeper_unrestricted.sh new file mode 100755 index 00000000000..db94c59d2de --- /dev/null +++ b/tests/queries/0_stateless/02221_system_zookeeper_unrestricted.sh @@ -0,0 +1,30 @@ +#!/usr/bin/env bash +# Tags: no-replicated-database, zookeeper + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + +${CLICKHOUSE_CLIENT} --query="DROP TABLE IF EXISTS sample_table" +${CLICKHOUSE_CLIENT} --query="DROP TABLE IF EXISTS sample_table_2" + +${CLICKHOUSE_CLIENT} -n -q" +CREATE TABLE sample_table ( + key UInt64 +) +ENGINE ReplicatedMergeTree('/clickhouse/$CLICKHOUSE_TEST_ZOOKEEPER_PREFIX/02221_system_zookeeper_unrestricted', '1') +ORDER BY tuple(); +" + +${CLICKHOUSE_CLIENT} -n -q" +CREATE TABLE sample_table_2 ( + key UInt64 +) +ENGINE ReplicatedMergeTree('/clickhouse/$CLICKHOUSE_TEST_ZOOKEEPER_PREFIX/02221_system_zookeeper_unrestricted_2', '1') +ORDER BY tuple(); +" + +${CLICKHOUSE_CLIENT} --allow_unrestricted_reads_from_keeper=1 --query "SELECT name FROM (SELECT path, name FROM system.zookeeper ORDER BY name) WHERE path LIKE '%$CLICKHOUSE_TEST_ZOOKEEPER_PREFIX/02221_system_zookeeper_unrestricted%'"; + +${CLICKHOUSE_CLIENT} --query="DROP TABLE IF EXISTS sample_table" +${CLICKHOUSE_CLIENT} --query="DROP TABLE IF EXISTS sample_table_2" diff --git a/tests/queries/0_stateless/02221_system_zookeeper_unrestricted_like.reference b/tests/queries/0_stateless/02221_system_zookeeper_unrestricted_like.reference new file mode 100644 index 00000000000..f95d60dc07b --- /dev/null +++ b/tests/queries/0_stateless/02221_system_zookeeper_unrestricted_like.reference @@ -0,0 +1,75 @@ +1 +alter_partition_version +block_numbers +blocks +columns +columns +failed_parts +flags +host +is_active +is_lost +last_part +leader_election +leader_election-0 +log +log_pointer +max_processed_insert_time +metadata +metadata +metadata_version +min_unprocessed_insert_time +mutation_pointer +mutations +nonincrement_block_numbers +parallel +part_moves_shard +parts +pinned_part_uuids +queue +quorum +replicas +shared +shared +table_shared_id +temp +zero_copy_hdfs +zero_copy_s3 +------------------------- +1 +alter_partition_version +block_numbers +blocks +columns +columns +failed_parts +flags +host +is_active +is_lost +last_part +leader_election +leader_election-0 +log +log_pointer +max_processed_insert_time +metadata +metadata +metadata_version +min_unprocessed_insert_time +mutation_pointer +mutations +nonincrement_block_numbers +parallel +part_moves_shard +parts +pinned_part_uuids +queue +quorum +replicas +shared +shared +table_shared_id +temp +zero_copy_hdfs +zero_copy_s3 diff --git a/tests/queries/0_stateless/02221_system_zookeeper_unrestricted_like.sh b/tests/queries/0_stateless/02221_system_zookeeper_unrestricted_like.sh new file mode 100755 index 00000000000..152d8344764 --- /dev/null +++ b/tests/queries/0_stateless/02221_system_zookeeper_unrestricted_like.sh @@ -0,0 +1,32 @@ +#!/usr/bin/env bash +# Tags: no-replicated-database, zookeeper + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + +${CLICKHOUSE_CLIENT} --query="DROP TABLE IF EXISTS sample_table;" +${CLICKHOUSE_CLIENT} --query="DROP TABLE IF EXISTS sample_table_2;" + +${CLICKHOUSE_CLIENT} -n --query="CREATE TABLE sample_table ( + key UInt64 +) +ENGINE ReplicatedMergeTree('/clickhouse/$CLICKHOUSE_TEST_ZOOKEEPER_PREFIX/02221_system_zookeeper_unrestricted_like', '1') +ORDER BY tuple(); +DROP TABLE IF EXISTS sample_table;" + + +${CLICKHOUSE_CLIENT} -n --query "CREATE TABLE sample_table_2 ( + key UInt64 +) +ENGINE ReplicatedMergeTree('/clickhouse/$CLICKHOUSE_TEST_ZOOKEEPER_PREFIX/02221_system_zookeeper_unrestricted_like_2', '1') +ORDER BY tuple();" + +${CLICKHOUSE_CLIENT} --allow_unrestricted_reads_from_keeper=1 --query="SELECT name FROM (SELECT path, name FROM system.zookeeper WHERE path LIKE '/clickhouse%' ORDER BY name) WHERE path LIKE '%$CLICKHOUSE_TEST_ZOOKEEPER_PREFIX/02221_system_zookeeper_unrestricted_like%'" + +${CLICKHOUSE_CLIENT} --query="SELECT '-------------------------'" + +${CLICKHOUSE_CLIENT} --allow_unrestricted_reads_from_keeper=1 --query="SELECT name FROM (SELECT path, name FROM system.zookeeper WHERE path LIKE '/clickhouse/%' ORDER BY name) WHERE path LIKE '%$CLICKHOUSE_TEST_ZOOKEEPER_PREFIX/02221_system_zookeeper_unrestricted_like%'" + +${CLICKHOUSE_CLIENT} --query="DROP TABLE IF EXISTS sample_table;" +${CLICKHOUSE_CLIENT} --query="DROP TABLE IF EXISTS sample_table_2;" diff --git a/tests/queries/0_stateless/02222_create_table_without_columns_metadata.reference b/tests/queries/0_stateless/02222_create_table_without_columns_metadata.reference new file mode 100644 index 00000000000..9e9e0082cb3 --- /dev/null +++ b/tests/queries/0_stateless/02222_create_table_without_columns_metadata.reference @@ -0,0 +1,3 @@ +CREATE TABLE default.test\n(\n `y` Nullable(String),\n `x` Nullable(Float64)\n)\nENGINE = File(\'JSONEachRow\', \'data.jsonl\') +OK +OK diff --git a/tests/queries/0_stateless/02222_create_table_without_columns_metadata.sh b/tests/queries/0_stateless/02222_create_table_without_columns_metadata.sh new file mode 100755 index 00000000000..1ba67fa77ea --- /dev/null +++ b/tests/queries/0_stateless/02222_create_table_without_columns_metadata.sh @@ -0,0 +1,33 @@ +#!/usr/bin/env bash +# Tags: no-fasttest, no-parallel + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + +USER_FILES_PATH=$(clickhouse-client --query "select _path,_file from file('nonexist.txt', 'CSV', 'val1 char')" 2>&1 | grep Exception | awk '{gsub("/nonexist.txt","",$9); print $9}') + +$CLICKHOUSE_CLIENT -q "insert into table function file(data.jsonl, 'JSONEachRow', 'x UInt32 default 42, y String') select number as x, 'String' as y from numbers(10)" + +$CLICKHOUSE_CLIENT -q "drop table if exists test" +$CLICKHOUSE_CLIENT -q "create table test engine=File(JSONEachRow, 'data.jsonl')" +$CLICKHOUSE_CLIENT -q "show create table test" +$CLICKHOUSE_CLIENT -q "detach table test" + +rm $USER_FILES_PATH/data.jsonl + +$CLICKHOUSE_CLIENT -q "attach table test" +$CLICKHOUSE_CLIENT -q "select * from test" 2>&1 | grep -q "FILE_DOESNT_EXIST" && echo "OK" || echo "FAIL" + + +$CLICKHOUSE_CLIENT -q "drop table test" +$CLICKHOUSE_CLIENT -q "create table test (x UInt64) engine=Memory()" + +$CLICKHOUSE_CLIENT -q "drop table if exists test_dist" +$CLICKHOUSE_CLIENT -q "create table test_dist engine=Distributed('test_shard_localhost', currentDatabase(), 'test')" + +$CLICKHOUSE_CLIENT -q "detach table test_dist" +$CLICKHOUSE_CLIENT -q "drop table test" +$CLICKHOUSE_CLIENT -q "attach table test_dist" +$CLICKHOUSE_CLIENT -q "select * from test_dist" 2>&1 | grep -q "UNKNOWN_TABLE" && echo "OK" || echo "FAIL" + diff --git a/tests/queries/0_stateless/02223_insert_select_schema_inference.reference b/tests/queries/0_stateless/02223_insert_select_schema_inference.reference new file mode 100644 index 00000000000..ef1eea12112 --- /dev/null +++ b/tests/queries/0_stateless/02223_insert_select_schema_inference.reference @@ -0,0 +1,13 @@ +x UInt32 +y String +d Date +0 0 1970-01-01 +1 1 1970-01-02 +2 2 1970-01-03 +3 3 1970-01-04 +4 4 1970-01-05 +5 5 1970-01-06 +6 6 1970-01-07 +7 7 1970-01-08 +8 8 1970-01-09 +9 9 1970-01-10 diff --git a/tests/queries/0_stateless/02223_insert_select_schema_inference.sql b/tests/queries/0_stateless/02223_insert_select_schema_inference.sql new file mode 100644 index 00000000000..ff39ca83b9b --- /dev/null +++ b/tests/queries/0_stateless/02223_insert_select_schema_inference.sql @@ -0,0 +1,5 @@ +drop table if exists test; +create table test (x UInt32, y String, d Date) engine=Memory() as select number as x, toString(number) as y, toDate(number) as d from numbers(10); +insert into table function file('data.native.zst') select * from test; +desc file('data.native.zst'); +select * from file('data.native.zst'); diff --git a/tests/queries/0_stateless/02224_parallel_distributed_insert_select_cluster.reference b/tests/queries/0_stateless/02224_parallel_distributed_insert_select_cluster.reference new file mode 100644 index 00000000000..05fbb680c65 --- /dev/null +++ b/tests/queries/0_stateless/02224_parallel_distributed_insert_select_cluster.reference @@ -0,0 +1,27 @@ +-- { echoOn } +truncate table dst_02224; +insert into function cluster('test_cluster_two_shards', currentDatabase(), dst_02224, key) +select * from cluster('test_cluster_two_shards', currentDatabase(), src_02224, key) +settings parallel_distributed_insert_select=1, max_distributed_depth=1; -- { serverError TOO_LARGE_DISTRIBUTED_DEPTH } +select * from dst_02224; +truncate table dst_02224; +insert into function cluster('test_cluster_two_shards', currentDatabase(), dst_02224, key) +select * from cluster('test_cluster_two_shards', currentDatabase(), src_02224, key) +settings parallel_distributed_insert_select=1, max_distributed_depth=2; +select * from dst_02224; +1 +1 +truncate table dst_02224; +insert into function cluster('test_cluster_two_shards', currentDatabase(), dst_02224, key) +select * from cluster('test_cluster_two_shards', currentDatabase(), src_02224, key) +settings parallel_distributed_insert_select=2, max_distributed_depth=1; +select * from dst_02224; +1 +1 +truncate table dst_02224; +insert into function remote('127.{1,2}', currentDatabase(), dst_02224, key) +select * from remote('127.{1,2}', currentDatabase(), src_02224, key) +settings parallel_distributed_insert_select=2, max_distributed_depth=1; +select * from dst_02224; +1 +1 diff --git a/tests/queries/0_stateless/02224_parallel_distributed_insert_select_cluster.sql b/tests/queries/0_stateless/02224_parallel_distributed_insert_select_cluster.sql new file mode 100644 index 00000000000..023f220e930 --- /dev/null +++ b/tests/queries/0_stateless/02224_parallel_distributed_insert_select_cluster.sql @@ -0,0 +1,34 @@ +drop table if exists dst_02224; +drop table if exists src_02224; +create table dst_02224 (key Int) engine=Memory(); +create table src_02224 (key Int) engine=Memory(); +insert into src_02224 values (1); + +-- { echoOn } +truncate table dst_02224; +insert into function cluster('test_cluster_two_shards', currentDatabase(), dst_02224, key) +select * from cluster('test_cluster_two_shards', currentDatabase(), src_02224, key) +settings parallel_distributed_insert_select=1, max_distributed_depth=1; -- { serverError TOO_LARGE_DISTRIBUTED_DEPTH } +select * from dst_02224; + +truncate table dst_02224; +insert into function cluster('test_cluster_two_shards', currentDatabase(), dst_02224, key) +select * from cluster('test_cluster_two_shards', currentDatabase(), src_02224, key) +settings parallel_distributed_insert_select=1, max_distributed_depth=2; +select * from dst_02224; + +truncate table dst_02224; +insert into function cluster('test_cluster_two_shards', currentDatabase(), dst_02224, key) +select * from cluster('test_cluster_two_shards', currentDatabase(), src_02224, key) +settings parallel_distributed_insert_select=2, max_distributed_depth=1; +select * from dst_02224; + +truncate table dst_02224; +insert into function remote('127.{1,2}', currentDatabase(), dst_02224, key) +select * from remote('127.{1,2}', currentDatabase(), src_02224, key) +settings parallel_distributed_insert_select=2, max_distributed_depth=1; +select * from dst_02224; +-- { echoOff } + +drop table src_02224; +drop table dst_02224; diff --git a/tests/queries/0_stateless/02225_parallel_distributed_insert_select_view.reference b/tests/queries/0_stateless/02225_parallel_distributed_insert_select_view.reference new file mode 100644 index 00000000000..98fb6a68656 --- /dev/null +++ b/tests/queries/0_stateless/02225_parallel_distributed_insert_select_view.reference @@ -0,0 +1,4 @@ +1 +1 +1 +1 diff --git a/tests/queries/0_stateless/02225_parallel_distributed_insert_select_view.sh b/tests/queries/0_stateless/02225_parallel_distributed_insert_select_view.sh new file mode 100755 index 00000000000..376a49fd820 --- /dev/null +++ b/tests/queries/0_stateless/02225_parallel_distributed_insert_select_view.sh @@ -0,0 +1,35 @@ +#!/usr/bin/env bash + +# NOTE: sh test is required since view() does not have current database + +CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CUR_DIR"/../shell_config.sh + +$CLICKHOUSE_CLIENT -nm -q " +drop table if exists dst_02225; +drop table if exists src_02225; +create table dst_02225 (key Int) engine=Memory(); +create table src_02225 (key Int) engine=Memory(); +insert into src_02225 values (1); +" + +$CLICKHOUSE_CLIENT --param_database=$CLICKHOUSE_DATABASE -nm -q " +truncate table dst_02225; +insert into function remote('127.{1,2}', currentDatabase(), dst_02225, key) +select * from remote('127.{1,2}', view(select * from {database:Identifier}.src_02225), key) +settings parallel_distributed_insert_select=2, max_distributed_depth=1; +select * from dst_02225; + +-- w/o sharding key +truncate table dst_02225; +insert into function remote('127.{1,2}', currentDatabase(), dst_02225, key) +select * from remote('127.{1,2}', view(select * from {database:Identifier}.src_02225)) +settings parallel_distributed_insert_select=2, max_distributed_depth=1; +select * from dst_02225; +" + +$CLICKHOUSE_CLIENT -nm -q " +drop table src_02225; +drop table dst_02225; +" diff --git a/tests/queries/0_stateless/02226_async_insert_table_function.reference b/tests/queries/0_stateless/02226_async_insert_table_function.reference new file mode 100644 index 00000000000..60d475a7393 --- /dev/null +++ b/tests/queries/0_stateless/02226_async_insert_table_function.reference @@ -0,0 +1,2 @@ +1 aaa +2 bbb diff --git a/tests/queries/0_stateless/02226_async_insert_table_function.sql b/tests/queries/0_stateless/02226_async_insert_table_function.sql new file mode 100644 index 00000000000..fc4aadfbfcd --- /dev/null +++ b/tests/queries/0_stateless/02226_async_insert_table_function.sql @@ -0,0 +1,11 @@ +DROP TABLE IF EXISTS t_async_insert_table_function; + +CREATE TABLE t_async_insert_table_function (id UInt32, s String) ENGINE = Memory; + +SET async_insert = 1; + +INSERT INTO function remote('127.0.0.1', currentDatabase(), t_async_insert_table_function) values (1, 'aaa') (2, 'bbb'); + +SELECT * FROM t_async_insert_table_function ORDER BY id; + +DROP TABLE t_async_insert_table_function; diff --git a/tests/queries/0_stateless/02226_low_cardinality_text_bloom_filter_index.reference b/tests/queries/0_stateless/02226_low_cardinality_text_bloom_filter_index.reference new file mode 100644 index 00000000000..a1533c4e44a --- /dev/null +++ b/tests/queries/0_stateless/02226_low_cardinality_text_bloom_filter_index.reference @@ -0,0 +1,24 @@ +lc_bf_tokenbf +1 K1 K1ZZZZZZ +2 K2 K2ZZZZZZ +lc_fixed_bf_tokenbf +1 K1 K1ZZZZZZ +2 K2 K2ZZZZZZ +lc_ngram +1 K1 K1ZZZZZZ +2 K2 K2ZZZZZZ +lc_fixed_ngram +1 K1 K1ZZZZZZ +2 K2 K2ZZZZZZ +lc_bf_tokenbf +3 abCD3ef abCD3ef\0 +4 abCD4ef abCD4ef\0 +lc_fixed_bf_tokenbf +3 abCD3ef abCD3ef\0 +4 abCD4ef abCD4ef\0 +lc_ngram +3 abCD3ef abCD3ef\0 +4 abCD4ef abCD4ef\0 +lc_fixed_ngram +3 abCD3ef abCD3ef\0 +4 abCD4ef abCD4ef\0 diff --git a/tests/queries/0_stateless/02226_low_cardinality_text_bloom_filter_index.sql b/tests/queries/0_stateless/02226_low_cardinality_text_bloom_filter_index.sql new file mode 100644 index 00000000000..d2b30f5e8f4 --- /dev/null +++ b/tests/queries/0_stateless/02226_low_cardinality_text_bloom_filter_index.sql @@ -0,0 +1,69 @@ +DROP TABLE IF EXISTS bf_tokenbf_lowcard_test; +DROP TABLE IF EXISTS bf_ngram_lowcard_test; + +CREATE TABLE bf_tokenbf_lowcard_test +( + row_id UInt32, + lc LowCardinality(String), + lc_fixed LowCardinality(FixedString(8)), + INDEX lc_bf_tokenbf lc TYPE tokenbf_v1(256,2,0) GRANULARITY 1, + INDEX lc_fixed_bf_tokenbf lc_fixed TYPE tokenbf_v1(256,2,0) GRANULARITY 1 +) Engine=MergeTree() ORDER BY row_id SETTINGS index_granularity = 1; + +CREATE TABLE bf_ngram_lowcard_test +( + row_id UInt32, + lc LowCardinality(String), + lc_fixed LowCardinality(FixedString(8)), + INDEX lc_ngram lc TYPE ngrambf_v1(4,256,2,0) GRANULARITY 1, + INDEX lc_fixed_ngram lc_fixed TYPE ngrambf_v1(4,256,2,0) GRANULARITY 1 +) Engine=MergeTree() ORDER BY row_id SETTINGS index_granularity = 1; + +INSERT INTO bf_tokenbf_lowcard_test VALUES (1, 'K1', 'K1ZZZZZZ'), (2, 'K2', 'K2ZZZZZZ'); +INSERT INTO bf_ngram_lowcard_test VALUES (1, 'K1', 'K1ZZZZZZ'), (2, 'K2', 'K2ZZZZZZ'); +INSERT INTO bf_tokenbf_lowcard_test VALUES (3, 'abCD3ef', 'abCD3ef'), (4, 'abCD4ef', 'abCD4ef'); +INSERT INTO bf_ngram_lowcard_test VALUES (3, 'abCD3ef', 'abCD3ef'), (4, 'abCD4ef', 'abCD4ef'); + +SELECT 'lc_bf_tokenbf'; +SELECT * FROM bf_tokenbf_lowcard_test WHERE like(lc, 'K1') SETTINGS force_data_skipping_indices='lc_bf_tokenbf'; +SELECT * FROM bf_tokenbf_lowcard_test WHERE like(lc, 'K2') SETTINGS force_data_skipping_indices='lc_bf_tokenbf'; +SELECT * FROM bf_tokenbf_lowcard_test WHERE like(lc, 'K3') SETTINGS force_data_skipping_indices='lc_bf_tokenbf'; + +SELECT 'lc_fixed_bf_tokenbf'; +SELECT * FROM bf_tokenbf_lowcard_test WHERE like(lc_fixed, 'K1ZZZZZZ') SETTINGS force_data_skipping_indices='lc_fixed_bf_tokenbf'; +SELECT * FROM bf_tokenbf_lowcard_test WHERE like(lc_fixed, 'K2ZZZZZZ') SETTINGS force_data_skipping_indices='lc_fixed_bf_tokenbf'; +SELECT * FROM bf_tokenbf_lowcard_test WHERE like(lc_fixed, 'K3ZZZZZZ') SETTINGS force_data_skipping_indices='lc_fixed_bf_tokenbf'; + +SELECT 'lc_ngram'; +SELECT * FROM bf_ngram_lowcard_test WHERE like(lc, 'K1') SETTINGS force_data_skipping_indices='lc_ngram'; +SELECT * FROM bf_ngram_lowcard_test WHERE like(lc, 'K2') SETTINGS force_data_skipping_indices='lc_ngram'; +SELECT * FROM bf_ngram_lowcard_test WHERE like(lc, 'K3') SETTINGS force_data_skipping_indices='lc_ngram'; + +SELECT 'lc_fixed_ngram'; +SELECT * FROM bf_ngram_lowcard_test WHERE like(lc_fixed, 'K1ZZZZZZ') SETTINGS force_data_skipping_indices='lc_fixed_ngram'; +SELECT * FROM bf_ngram_lowcard_test WHERE like(lc_fixed, 'K2ZZZZZZ') SETTINGS force_data_skipping_indices='lc_fixed_ngram'; +SELECT * FROM bf_ngram_lowcard_test WHERE like(lc_fixed, 'K3ZZZZZZ') SETTINGS force_data_skipping_indices='lc_fixed_ngram'; + + +SELECT 'lc_bf_tokenbf'; +SELECT * FROM bf_tokenbf_lowcard_test WHERE like(lc, '%CD3%') SETTINGS force_data_skipping_indices='lc_bf_tokenbf'; +SELECT * FROM bf_tokenbf_lowcard_test WHERE like(lc, '%CD4%') SETTINGS force_data_skipping_indices='lc_bf_tokenbf'; +SELECT * FROM bf_tokenbf_lowcard_test WHERE like(lc, '%CD5%') SETTINGS force_data_skipping_indices='lc_bf_tokenbf'; + +SELECT 'lc_fixed_bf_tokenbf'; +SELECT * FROM bf_tokenbf_lowcard_test WHERE like(lc_fixed, '%CD3%') SETTINGS force_data_skipping_indices='lc_fixed_bf_tokenbf'; +SELECT * FROM bf_tokenbf_lowcard_test WHERE like(lc_fixed, '%CD4%') SETTINGS force_data_skipping_indices='lc_fixed_bf_tokenbf'; +SELECT * FROM bf_tokenbf_lowcard_test WHERE like(lc_fixed, '%CD5%') SETTINGS force_data_skipping_indices='lc_fixed_bf_tokenbf'; + +SELECT 'lc_ngram'; +SELECT * FROM bf_ngram_lowcard_test WHERE like(lc, '%CD3%') SETTINGS force_data_skipping_indices='lc_ngram'; +SELECT * FROM bf_ngram_lowcard_test WHERE like(lc, '%CD4%') SETTINGS force_data_skipping_indices='lc_ngram'; +SELECT * FROM bf_ngram_lowcard_test WHERE like(lc, '%CD5%') SETTINGS force_data_skipping_indices='lc_ngram'; + +SELECT 'lc_fixed_ngram'; +SELECT * FROM bf_ngram_lowcard_test WHERE like(lc_fixed, '%CD3%') SETTINGS force_data_skipping_indices='lc_fixed_ngram'; +SELECT * FROM bf_ngram_lowcard_test WHERE like(lc_fixed, '%CD4%') SETTINGS force_data_skipping_indices='lc_fixed_ngram'; +SELECT * FROM bf_ngram_lowcard_test WHERE like(lc_fixed, '%CD5%') SETTINGS force_data_skipping_indices='lc_fixed_ngram'; + +DROP TABLE bf_tokenbf_lowcard_test; +DROP TABLE bf_ngram_lowcard_test; diff --git a/tests/queries/0_stateless/02226_parallel_reading_from_replicas_benchmark.reference b/tests/queries/0_stateless/02226_parallel_reading_from_replicas_benchmark.reference new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/queries/0_stateless/02226_parallel_reading_from_replicas_benchmark.sh b/tests/queries/0_stateless/02226_parallel_reading_from_replicas_benchmark.sh new file mode 100755 index 00000000000..2a163746e20 --- /dev/null +++ b/tests/queries/0_stateless/02226_parallel_reading_from_replicas_benchmark.sh @@ -0,0 +1,29 @@ +#!/usr/bin/env bash + +CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CUR_DIR"/../shell_config.sh + +$CLICKHOUSE_CLIENT -nm -q " +drop table if exists data_02226; +create table data_02226 (key Int) engine=MergeTree() order by key +as select * from numbers(1); +" + +# Regression for: +# +# Logical error: 'Coordinator for parallel reading from replicas is not initialized'. +opts=( + --allow_experimental_parallel_reading_from_replicas 1 + --max_parallel_replicas 3 + + --iterations 1 +) +$CLICKHOUSE_BENCHMARK --query "select * from remote('127.1', $CLICKHOUSE_DATABASE, data_02226)" "${opts[@]}" >& /dev/null +ret=$? + +$CLICKHOUSE_CLIENT -nm -q " +drop table data_02226; +" + +exit $ret diff --git a/tests/queries/0_stateless/02226_s3_with_cache.reference b/tests/queries/0_stateless/02226_s3_with_cache.reference new file mode 100644 index 00000000000..214addac2d6 --- /dev/null +++ b/tests/queries/0_stateless/02226_s3_with_cache.reference @@ -0,0 +1,2 @@ +SELECT 1, * FROM test LIMIT 10 FORMAT Null; 1 0 1 +SELECT 2, * FROM test LIMIT 10 FORMAT Null; 0 1 0 diff --git a/tests/queries/0_stateless/02226_s3_with_cache.sql b/tests/queries/0_stateless/02226_s3_with_cache.sql new file mode 100644 index 00000000000..b3126a419df --- /dev/null +++ b/tests/queries/0_stateless/02226_s3_with_cache.sql @@ -0,0 +1,44 @@ +-- Tags: no-parallel, no-fasttest, long + +SET max_memory_usage='20G'; + +CREATE TABLE test (key UInt32, value String) Engine=MergeTree() ORDER BY key SETTINGS storage_policy='s3_cache'; +INSERT INTO test SELECT * FROM generateRandom('key UInt32, value String') LIMIT 10000; + +SET remote_filesystem_read_method='threadpool'; + +SELECT 1, * FROM test LIMIT 10 FORMAT Null; + +SYSTEM FLUSH LOGS; +SELECT query, + ProfileEvents['RemoteFSReadBytes'] > 0 as remote_fs_read, + ProfileEvents['RemoteFSCacheReadBytes'] > 0 as remote_fs_cache_read, + ProfileEvents['RemoteFSCacheDownloadBytes'] > 0 as remote_fs_read_and_download +FROM system.query_log +WHERE query LIKE 'SELECT 1, * FROM test LIMIT%' +AND type = 'QueryFinish' +AND current_database = currentDatabase() +ORDER BY query_start_time DESC +LIMIT 1; + +SET remote_filesystem_read_method='read'; + +SELECT 2, * FROM test LIMIT 10 FORMAT Null; + +SYSTEM FLUSH LOGS; +SELECT query, + ProfileEvents['RemoteFSReadBytes'] > 0 as remote_fs_read, + ProfileEvents['RemoteFSCacheReadBytes'] > 0 as remote_fs_cache_read, + ProfileEvents['RemoteFSCacheDownloadBytes'] > 0 as remote_fs_read_and_download +FROM system.query_log +WHERE query LIKE 'SELECT 2, * FROM test LIMIT%' +AND type = 'QueryFinish' +AND current_database = currentDatabase() +ORDER BY query_start_time DESC +LIMIT 1; + +SET remote_filesystem_read_method='threadpool'; + +SELECT * FROM test WHERE value LIKE '%abc%' ORDER BY value LIMIT 10 FORMAT Null; + +DROP TABLE test; diff --git a/tests/queries/0_stateless/02228_unquoted_dates_in_csv_schema_inference.reference b/tests/queries/0_stateless/02228_unquoted_dates_in_csv_schema_inference.reference new file mode 100644 index 00000000000..5fd48ae580a --- /dev/null +++ b/tests/queries/0_stateless/02228_unquoted_dates_in_csv_schema_inference.reference @@ -0,0 +1 @@ +c1 Nullable(String) diff --git a/tests/queries/0_stateless/02228_unquoted_dates_in_csv_schema_inference.sh b/tests/queries/0_stateless/02228_unquoted_dates_in_csv_schema_inference.sh new file mode 100755 index 00000000000..314a60d6491 --- /dev/null +++ b/tests/queries/0_stateless/02228_unquoted_dates_in_csv_schema_inference.sh @@ -0,0 +1,8 @@ +#!/usr/bin/env bash + +CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CUR_DIR"/../shell_config.sh + +echo "2020-02-01 16:00:00" | $CLICKHOUSE_LOCAL -q "desc table table" --input-format "CSV" --file=- + diff --git a/tests/queries/0_stateless/02231_buffer_aggregate_states_leak.reference b/tests/queries/0_stateless/02231_buffer_aggregate_states_leak.reference new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/queries/0_stateless/02231_buffer_aggregate_states_leak.sql b/tests/queries/0_stateless/02231_buffer_aggregate_states_leak.sql new file mode 100644 index 00000000000..a53b7f50e51 --- /dev/null +++ b/tests/queries/0_stateless/02231_buffer_aggregate_states_leak.sql @@ -0,0 +1,36 @@ +-- Tags: long + +drop table if exists buffer_02231; +drop table if exists out_02231; +drop table if exists in_02231; +drop table if exists mv_02231; + +-- To reproduce leak of memory tracking of aggregate states, +-- background flush is required. +create table buffer_02231 +( + key Int, + v1 AggregateFunction(groupArray, String) +) engine=Buffer(currentDatabase(), 'out_02231', + /* layers= */1, + /* min/max time */ 86400, 86400, + /* min/max rows */ 1e9, 1e9, + /* min/max bytes */ 1e12, 1e12, + /* flush time */ 1 +); +create table out_02231 as buffer_02231 engine=Null(); +create table in_02231 (number Int) engine=Null(); + +-- Create lots of INSERT blocks with MV +create materialized view mv_02231 to buffer_02231 as select + number as key, + groupArrayState(toString(number)) as v1 +from in_02231 +group by key; + +insert into in_02231 select * from numbers(10e6) settings max_memory_usage='300Mi'; + +drop table buffer_02231; +drop table out_02231; +drop table in_02231; +drop table mv_02231; diff --git a/tests/queries/0_stateless/02231_hierarchical_dictionaries_constant.reference b/tests/queries/0_stateless/02231_hierarchical_dictionaries_constant.reference new file mode 100644 index 00000000000..bccd9864b30 --- /dev/null +++ b/tests/queries/0_stateless/02231_hierarchical_dictionaries_constant.reference @@ -0,0 +1,32 @@ +Get hierarchy +[] +[1] +[2,1] +[3,1] +[4,2,1] +[] +Get is in hierarchy +1 +1 +0 +Get children +[1] +[2,3] +[4] +[] +[] +[] +Get all descendants +[1,2,3,4] +[2,3,4] +[4] +[] +[] +[] +Get descendants at first level +[1] +[2,3] +[4] +[] +[] +[] diff --git a/tests/queries/0_stateless/02231_hierarchical_dictionaries_constant.sql b/tests/queries/0_stateless/02231_hierarchical_dictionaries_constant.sql new file mode 100644 index 00000000000..bc01b447338 --- /dev/null +++ b/tests/queries/0_stateless/02231_hierarchical_dictionaries_constant.sql @@ -0,0 +1,54 @@ +DROP TABLE IF EXISTS hierarchy_source_table; +CREATE TABLE hierarchy_source_table (id UInt64, parent_id UInt64) ENGINE = TinyLog; +INSERT INTO hierarchy_source_table VALUES (1, 0), (2, 1), (3, 1), (4, 2); + +DROP DICTIONARY IF EXISTS hierarchy_flat_dictionary; +CREATE DICTIONARY hierarchy_flat_dictionary +( + id UInt64, + parent_id UInt64 HIERARCHICAL +) +PRIMARY KEY id +SOURCE(CLICKHOUSE(TABLE 'hierarchy_source_table')) +LAYOUT(FLAT()) +LIFETIME(MIN 1 MAX 1000); + +SELECT 'Get hierarchy'; +SELECT dictGetHierarchy('hierarchy_flat_dictionary', 0); +SELECT dictGetHierarchy('hierarchy_flat_dictionary', 1); +SELECT dictGetHierarchy('hierarchy_flat_dictionary', 2); +SELECT dictGetHierarchy('hierarchy_flat_dictionary', 3); +SELECT dictGetHierarchy('hierarchy_flat_dictionary', 4); +SELECT dictGetHierarchy('hierarchy_flat_dictionary', 5); + +SELECT 'Get is in hierarchy'; +SELECT dictIsIn('hierarchy_flat_dictionary', 1, 1); +SELECT dictIsIn('hierarchy_flat_dictionary', 2, 1); +SELECT dictIsIn('hierarchy_flat_dictionary', 2, 0); + +SELECT 'Get children'; +SELECT dictGetChildren('hierarchy_flat_dictionary', 0); +SELECT dictGetChildren('hierarchy_flat_dictionary', 1); +SELECT dictGetChildren('hierarchy_flat_dictionary', 2); +SELECT dictGetChildren('hierarchy_flat_dictionary', 3); +SELECT dictGetChildren('hierarchy_flat_dictionary', 4); +SELECT dictGetChildren('hierarchy_flat_dictionary', 5); + +SELECT 'Get all descendants'; +SELECT dictGetDescendants('hierarchy_flat_dictionary', 0); +SELECT dictGetDescendants('hierarchy_flat_dictionary', 1); +SELECT dictGetDescendants('hierarchy_flat_dictionary', 2); +SELECT dictGetDescendants('hierarchy_flat_dictionary', 3); +SELECT dictGetDescendants('hierarchy_flat_dictionary', 4); +SELECT dictGetDescendants('hierarchy_flat_dictionary', 5); + +SELECT 'Get descendants at first level'; +SELECT dictGetDescendants('hierarchy_flat_dictionary', 0, 1); +SELECT dictGetDescendants('hierarchy_flat_dictionary', 1, 1); +SELECT dictGetDescendants('hierarchy_flat_dictionary', 2, 1); +SELECT dictGetDescendants('hierarchy_flat_dictionary', 3, 1); +SELECT dictGetDescendants('hierarchy_flat_dictionary', 4, 1); +SELECT dictGetDescendants('hierarchy_flat_dictionary', 5, 1); + +DROP DICTIONARY hierarchy_flat_dictionary; +DROP TABLE hierarchy_source_table; diff --git a/tests/queries/0_stateless/02232_functions_to_subcolumns_alias.reference b/tests/queries/0_stateless/02232_functions_to_subcolumns_alias.reference new file mode 100644 index 00000000000..f18e41e497e --- /dev/null +++ b/tests/queries/0_stateless/02232_functions_to_subcolumns_alias.reference @@ -0,0 +1,8 @@ +cnt +2 +t0 t0 +100 100 +0 0 +hit +1 +0 diff --git a/tests/queries/0_stateless/02232_functions_to_subcolumns_alias.sql b/tests/queries/0_stateless/02232_functions_to_subcolumns_alias.sql new file mode 100644 index 00000000000..89383ed4ba3 --- /dev/null +++ b/tests/queries/0_stateless/02232_functions_to_subcolumns_alias.sql @@ -0,0 +1,10 @@ +DROP TABLE IF EXISTS t_functions_to_subcolumns_alias; + +CREATE TABLE t_functions_to_subcolumns_alias (id UInt64, t Tuple(UInt64, String), m Map(String, UInt64)) ENGINE = Memory; +INSERT INTO t_functions_to_subcolumns_alias VALUES (1, (100, 'abc'), map('foo', 1, 'bar', 2)) (2, NULL, map()); + +SELECT count(id) AS cnt FROM t_functions_to_subcolumns_alias FORMAT TSVWithNames; +SELECT tupleElement(t, 1) as t0, t0 FROM t_functions_to_subcolumns_alias FORMAT TSVWithNames; +SELECT mapContains(m, 'foo') AS hit FROM t_functions_to_subcolumns_alias FORMAT TSVWithNames; + +DROP TABLE t_functions_to_subcolumns_alias; diff --git a/tests/queries/0_stateless/02232_partition_pruner_single_point.reference b/tests/queries/0_stateless/02232_partition_pruner_single_point.reference new file mode 100644 index 00000000000..1191247b6d9 --- /dev/null +++ b/tests/queries/0_stateless/02232_partition_pruner_single_point.reference @@ -0,0 +1,2 @@ +1 +2 diff --git a/tests/queries/0_stateless/02232_partition_pruner_single_point.sql b/tests/queries/0_stateless/02232_partition_pruner_single_point.sql new file mode 100644 index 00000000000..0400d0e1b59 --- /dev/null +++ b/tests/queries/0_stateless/02232_partition_pruner_single_point.sql @@ -0,0 +1,14 @@ +DROP TABLE IF EXISTS lower_test; + +CREATE TABLE lower_test ( + a Int32, + b String +) ENGINE=MergeTree +PARTITION BY b +ORDER BY a; + +INSERT INTO lower_test (a,b) VALUES (1,'A'),(2,'B'),(3,'C'); + +SELECT a FROM lower_test WHERE lower(b) IN ('a','b') order by a; + +DROP TABLE lower_test; diff --git a/tests/queries/0_stateless/02233_data/test_setting_input_format_use_lowercase_column_name.orc b/tests/queries/0_stateless/02233_data/test_setting_input_format_use_lowercase_column_name.orc new file mode 100644 index 00000000000..136f9980064 Binary files /dev/null and b/tests/queries/0_stateless/02233_data/test_setting_input_format_use_lowercase_column_name.orc differ diff --git a/tests/queries/0_stateless/02233_data/test_setting_input_format_use_lowercase_column_name.parquet b/tests/queries/0_stateless/02233_data/test_setting_input_format_use_lowercase_column_name.parquet new file mode 100644 index 00000000000..922def77caf Binary files /dev/null and b/tests/queries/0_stateless/02233_data/test_setting_input_format_use_lowercase_column_name.parquet differ diff --git a/tests/queries/0_stateless/02233_setting_input_format_use_lowercase_column_name.reference b/tests/queries/0_stateless/02233_setting_input_format_use_lowercase_column_name.reference new file mode 100644 index 00000000000..5c383cb3035 --- /dev/null +++ b/tests/queries/0_stateless/02233_setting_input_format_use_lowercase_column_name.reference @@ -0,0 +1,6 @@ +Parquet +123 1 +456 2 +ORC +123 1 +456 2 diff --git a/tests/queries/0_stateless/02233_setting_input_format_use_lowercase_column_name.sh b/tests/queries/0_stateless/02233_setting_input_format_use_lowercase_column_name.sh new file mode 100755 index 00000000000..9a4f40ca4ee --- /dev/null +++ b/tests/queries/0_stateless/02233_setting_input_format_use_lowercase_column_name.sh @@ -0,0 +1,22 @@ +#!/usr/bin/env bash +# Tags: no-ubsan, no-fasttest + +CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CUR_DIR"/../shell_config.sh + +echo "Parquet" +DATA_FILE=$CUR_DIR/02233_data/test_setting_input_format_use_lowercase_column_name.parquet +${CLICKHOUSE_CLIENT} --query="DROP TABLE IF EXISTS parquet_load" +${CLICKHOUSE_CLIENT} --query="CREATE TABLE parquet_load (id String, score Int32) ENGINE = Memory" +cat "$DATA_FILE" | ${CLICKHOUSE_CLIENT} -q "INSERT INTO parquet_load FORMAT Parquet SETTINGS input_format_use_lowercase_column_name=true" +${CLICKHOUSE_CLIENT} --query="SELECT * FROM parquet_load" +${CLICKHOUSE_CLIENT} --query="drop table parquet_load" + +echo "ORC" +DATA_FILE=$CUR_DIR/02233_data/test_setting_input_format_use_lowercase_column_name.orc +${CLICKHOUSE_CLIENT} --query="DROP TABLE IF EXISTS orc_load" +${CLICKHOUSE_CLIENT} --query="CREATE TABLE orc_load (id String, score Int32) ENGINE = Memory" +cat "$DATA_FILE" | ${CLICKHOUSE_CLIENT} -q "INSERT INTO orc_load FORMAT ORC SETTINGS input_format_use_lowercase_column_name=true" +${CLICKHOUSE_CLIENT} --query="SELECT * FROM orc_load" +${CLICKHOUSE_CLIENT} --query="drop table orc_load" diff --git a/tests/queries/0_stateless/02233_with_total_empty_chunk.reference b/tests/queries/0_stateless/02233_with_total_empty_chunk.reference new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/queries/0_stateless/02233_with_total_empty_chunk.sql b/tests/queries/0_stateless/02233_with_total_empty_chunk.sql new file mode 100644 index 00000000000..bf9ce85b6ed --- /dev/null +++ b/tests/queries/0_stateless/02233_with_total_empty_chunk.sql @@ -0,0 +1 @@ +SELECT (NULL, NULL, NULL, NULL, NULL, NULL, NULL) FROM numbers(0) GROUP BY number WITH TOTALS HAVING sum(number) <= arrayJoin([]); diff --git a/tests/queries/0_stateless/02234_column_function_short_circuit.reference b/tests/queries/0_stateless/02234_column_function_short_circuit.reference new file mode 100644 index 00000000000..2c08a29620e --- /dev/null +++ b/tests/queries/0_stateless/02234_column_function_short_circuit.reference @@ -0,0 +1,2 @@ +2.3 +4.3 diff --git a/tests/queries/0_stateless/02234_column_function_short_circuit.sql b/tests/queries/0_stateless/02234_column_function_short_circuit.sql new file mode 100644 index 00000000000..a6a36841073 --- /dev/null +++ b/tests/queries/0_stateless/02234_column_function_short_circuit.sql @@ -0,0 +1,43 @@ +DROP TABLE IF EXISTS dict_table; +DROP TABLE IF EXISTS data_table; +DROP DICTIONARY IF EXISTS dict; + +create table dict_table +( + `strField` String, + `dateField` Date, + `float64Field` Float64 +) Engine Log(); + +insert into dict_table values ('SomeStr', toDate('2021-01-01'), 1.1), ('SomeStr2', toDate('2021-01-02'), 2.2); + +create dictionary dict +( + `strField` String, + `dateField` Date, + `float64Field` Float64 +) +PRIMARY KEY strField, dateField +SOURCE (CLICKHOUSE(TABLE 'dict_table')) +LIFETIME(MIN 300 MAX 360) +LAYOUT (COMPLEX_KEY_HASHED()); + +create table data_table +( + `float64Field1` Float64, + `float64Field2` Float64, + `strField1` String, + `strField2` String +) Engine Log(); + +insert into data_table values (1.1, 1.2, 'SomeStr', 'SomeStr'), (2.1, 2.2, 'SomeStr2', 'SomeStr2'); + +select round( + float64Field1 * if(strField1 != '', 1.0, dictGetFloat64('dict', 'float64Field', (strField1, toDate('2021-01-01')))) + + if(strField2 != '', 1.0, dictGetFloat64('dict', 'float64Field', (strField2, toDate('2021-01-01')))) * if(isFinite(float64Field2), float64Field2, 0), + 2) +from data_table; + +DROP DICTIONARY dict; +DROP TABLE dict_table; +DROP TABLE data_table; diff --git a/tests/queries/0_stateless/02234_position_case_insensitive_utf8.reference b/tests/queries/0_stateless/02234_position_case_insensitive_utf8.reference new file mode 100644 index 00000000000..aa47d0d46d4 --- /dev/null +++ b/tests/queries/0_stateless/02234_position_case_insensitive_utf8.reference @@ -0,0 +1,2 @@ +0 +0 diff --git a/tests/queries/0_stateless/02234_position_case_insensitive_utf8.sql b/tests/queries/0_stateless/02234_position_case_insensitive_utf8.sql new file mode 100644 index 00000000000..d77b13e7f97 --- /dev/null +++ b/tests/queries/0_stateless/02234_position_case_insensitive_utf8.sql @@ -0,0 +1,2 @@ +SELECT positionCaseInsensitiveUTF8('Hello', materialize('%\xF0%')); +SELECT DISTINCT positionCaseInsensitiveUTF8(materialize('Hello'), '%\xF0%') FROM numbers(1000); diff --git a/tests/queries/0_stateless/02235_brotli_bug.reference b/tests/queries/0_stateless/02235_brotli_bug.reference new file mode 100644 index 00000000000..d59d3c7902c --- /dev/null +++ b/tests/queries/0_stateless/02235_brotli_bug.reference @@ -0,0 +1 @@ +1000000 999999 diff --git a/tests/queries/0_stateless/02235_brotli_bug.sh b/tests/queries/0_stateless/02235_brotli_bug.sh new file mode 100755 index 00000000000..39b1e555ed7 --- /dev/null +++ b/tests/queries/0_stateless/02235_brotli_bug.sh @@ -0,0 +1,14 @@ +#!/usr/bin/env bash +# Tags: no-fasttest +# Tag no-fasttest: depends on brotli and bzip2 + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + +${CLICKHOUSE_CLIENT} --query "DROP TABLE IF EXISTS file" +${CLICKHOUSE_CLIENT} --query "CREATE TABLE file (x UInt64) ENGINE = File(TSV, '${CLICKHOUSE_DATABASE}/data.tsv.br')" +${CLICKHOUSE_CLIENT} --query "TRUNCATE TABLE file" +${CLICKHOUSE_CLIENT} --query "INSERT INTO file SELECT * FROM numbers(1000000)" +${CLICKHOUSE_CLIENT} --max_read_buffer_size=8 --query "SELECT count(), max(x) FROM file" +${CLICKHOUSE_CLIENT} --query "DROP TABLE file" diff --git a/tests/queries/0_stateless/02235_check_table_sparse_serialization.reference b/tests/queries/0_stateless/02235_check_table_sparse_serialization.reference new file mode 100644 index 00000000000..35e27925057 --- /dev/null +++ b/tests/queries/0_stateless/02235_check_table_sparse_serialization.reference @@ -0,0 +1,4 @@ +all_1_1_0 a Default +all_2_2_0 a Sparse +all_1_1_0 1 +all_2_2_0 1 diff --git a/tests/queries/0_stateless/02235_check_table_sparse_serialization.sql b/tests/queries/0_stateless/02235_check_table_sparse_serialization.sql new file mode 100644 index 00000000000..0ac97404c46 --- /dev/null +++ b/tests/queries/0_stateless/02235_check_table_sparse_serialization.sql @@ -0,0 +1,18 @@ +DROP TABLE IF EXISTS t_sparse_02235; + +CREATE TABLE t_sparse_02235 (a UInt8) ENGINE = MergeTree ORDER BY tuple() +SETTINGS ratio_of_defaults_for_sparse_serialization = 0.9; + +SYSTEM STOP MERGES t_sparse_02235; + +INSERT INTO t_sparse_02235 SELECT 1 FROM numbers(1000); +INSERT INTO t_sparse_02235 SELECT 0 FROM numbers(1000); + +SELECT name, column, serialization_kind FROM system.parts_columns +WHERE database = currentDatabase() AND table = 't_sparse_02235' +ORDER BY name, column; + +SET check_query_single_value_result = 0; +CHECK TABLE t_sparse_02235; + +DROP TABLE t_sparse_02235; diff --git a/tests/queries/1_stateful/00011_sorting.sql b/tests/queries/1_stateful/00011_sorting.sql index 381be7b7dd4..3e451360e1b 100644 --- a/tests/queries/1_stateful/00011_sorting.sql +++ b/tests/queries/1_stateful/00011_sorting.sql @@ -1 +1 @@ -SELECT EventTime::DateTime('Europe/Moscow') FROM test.hits ORDER BY EventTime DESC LIMIT 10 +SELECT EventTime::DateTime('Asia/Dubai') FROM test.hits ORDER BY EventTime DESC LIMIT 10 diff --git a/tests/queries/1_stateful/00012_sorting_distributed.sql b/tests/queries/1_stateful/00012_sorting_distributed.sql index c71f643045d..2f852af1dba 100644 --- a/tests/queries/1_stateful/00012_sorting_distributed.sql +++ b/tests/queries/1_stateful/00012_sorting_distributed.sql @@ -1,3 +1,3 @@ -- Tags: distributed -SELECT EventTime::DateTime('Europe/Moscow') FROM remote('127.0.0.{1,2}', test, hits) ORDER BY EventTime DESC LIMIT 10 +SELECT EventTime::DateTime('Asia/Dubai') FROM remote('127.0.0.{1,2}', test, hits) ORDER BY EventTime DESC LIMIT 10 diff --git a/tests/queries/1_stateful/00066_sorting_distributed_many_replicas.sql b/tests/queries/1_stateful/00066_sorting_distributed_many_replicas.sql index 3e34d9d1348..63a833af114 100644 --- a/tests/queries/1_stateful/00066_sorting_distributed_many_replicas.sql +++ b/tests/queries/1_stateful/00066_sorting_distributed_many_replicas.sql @@ -1,4 +1,4 @@ -- Tags: replica, distributed SET max_parallel_replicas = 2; -SELECT EventTime::DateTime('Europe/Moscow') FROM remote('127.0.0.{1|2}', test, hits) ORDER BY EventTime DESC LIMIT 10 +SELECT EventTime::DateTime('Asia/Dubai') FROM remote('127.0.0.{1|2}', test, hits) ORDER BY EventTime DESC LIMIT 10 diff --git a/tests/queries/1_stateful/00071_merge_tree_optimize_aio.sql b/tests/queries/1_stateful/00071_merge_tree_optimize_aio.sql index 241f0f9b13b..16c0097bf21 100644 --- a/tests/queries/1_stateful/00071_merge_tree_optimize_aio.sql +++ b/tests/queries/1_stateful/00071_merge_tree_optimize_aio.sql @@ -1,6 +1,6 @@ DROP TABLE IF EXISTS test.hits_snippet; -CREATE TABLE test.hits_snippet(EventTime DateTime('Europe/Moscow'), EventDate Date, CounterID UInt32, UserID UInt64, URL String, Referer String) ENGINE = MergeTree(EventDate, intHash32(UserID), (CounterID, EventDate, intHash32(UserID), EventTime), 8192); +CREATE TABLE test.hits_snippet(EventTime DateTime('Asia/Dubai'), EventDate Date, CounterID UInt32, UserID UInt64, URL String, Referer String) ENGINE = MergeTree(EventDate, intHash32(UserID), (CounterID, EventDate, intHash32(UserID), EventTime), 8192); SET min_insert_block_size_rows = 0, min_insert_block_size_bytes = 0; SET max_block_size = 4096; diff --git a/tests/queries/1_stateful/00072_compare_date_and_string_index.sql b/tests/queries/1_stateful/00072_compare_date_and_string_index.sql index af5d932fecb..d652b1bc559 100644 --- a/tests/queries/1_stateful/00072_compare_date_and_string_index.sql +++ b/tests/queries/1_stateful/00072_compare_date_and_string_index.sql @@ -15,7 +15,7 @@ SELECT count() FROM test.hits WHERE EventDate IN (toDate('2014-03-18'), toDate(' SELECT count() FROM test.hits WHERE EventDate = concat('2014-0', '3-18'); DROP TABLE IF EXISTS test.hits_indexed_by_time; -CREATE TABLE test.hits_indexed_by_time (EventDate Date, EventTime DateTime('Europe/Moscow')) ENGINE = MergeTree ORDER BY (EventDate, EventTime); +CREATE TABLE test.hits_indexed_by_time (EventDate Date, EventTime DateTime('Asia/Dubai')) ENGINE = MergeTree ORDER BY (EventDate, EventTime); INSERT INTO test.hits_indexed_by_time SELECT EventDate, EventTime FROM test.hits; SELECT count() FROM test.hits_indexed_by_time WHERE EventTime = '2014-03-18 01:02:03'; @@ -25,12 +25,12 @@ SELECT count() FROM test.hits_indexed_by_time WHERE EventTime <= '2014-03-18 01: SELECT count() FROM test.hits_indexed_by_time WHERE EventTime >= '2014-03-18 01:02:03'; SELECT count() FROM test.hits_indexed_by_time WHERE EventTime IN ('2014-03-18 01:02:03', '2014-03-19 04:05:06'); -SELECT count() FROM test.hits_indexed_by_time WHERE EventTime = toDateTime('2014-03-18 01:02:03', 'Europe/Moscow'); -SELECT count() FROM test.hits_indexed_by_time WHERE EventTime < toDateTime('2014-03-18 01:02:03', 'Europe/Moscow'); -SELECT count() FROM test.hits_indexed_by_time WHERE EventTime > toDateTime('2014-03-18 01:02:03', 'Europe/Moscow'); -SELECT count() FROM test.hits_indexed_by_time WHERE EventTime <= toDateTime('2014-03-18 01:02:03', 'Europe/Moscow'); -SELECT count() FROM test.hits_indexed_by_time WHERE EventTime >= toDateTime('2014-03-18 01:02:03', 'Europe/Moscow'); -SELECT count() FROM test.hits_indexed_by_time WHERE EventTime IN (toDateTime('2014-03-18 01:02:03', 'Europe/Moscow'), toDateTime('2014-03-19 04:05:06', 'Europe/Moscow')); +SELECT count() FROM test.hits_indexed_by_time WHERE EventTime = toDateTime('2014-03-18 01:02:03', 'Asia/Dubai'); +SELECT count() FROM test.hits_indexed_by_time WHERE EventTime < toDateTime('2014-03-18 01:02:03', 'Asia/Dubai'); +SELECT count() FROM test.hits_indexed_by_time WHERE EventTime > toDateTime('2014-03-18 01:02:03', 'Asia/Dubai'); +SELECT count() FROM test.hits_indexed_by_time WHERE EventTime <= toDateTime('2014-03-18 01:02:03', 'Asia/Dubai'); +SELECT count() FROM test.hits_indexed_by_time WHERE EventTime >= toDateTime('2014-03-18 01:02:03', 'Asia/Dubai'); +SELECT count() FROM test.hits_indexed_by_time WHERE EventTime IN (toDateTime('2014-03-18 01:02:03', 'Asia/Dubai'), toDateTime('2014-03-19 04:05:06', 'Asia/Dubai')); SELECT count() FROM test.hits_indexed_by_time WHERE EventTime = concat('2014-03-18 ', '01:02:03'); diff --git a/tests/queries/1_stateful/00075_left_array_join.sql b/tests/queries/1_stateful/00075_left_array_join.sql index 52a48462b9d..1fd045a26bf 100644 --- a/tests/queries/1_stateful/00075_left_array_join.sql +++ b/tests/queries/1_stateful/00075_left_array_join.sql @@ -1,2 +1,2 @@ -SELECT UserID, EventTime::DateTime('Europe/Moscow'), pp.Key1, pp.Key2, ParsedParams.Key1 FROM test.hits ARRAY JOIN ParsedParams AS pp WHERE CounterID = 1704509 ORDER BY UserID, EventTime, pp.Key1, pp.Key2 LIMIT 100; -SELECT UserID, EventTime::DateTime('Europe/Moscow'), pp.Key1, pp.Key2, ParsedParams.Key1 FROM test.hits LEFT ARRAY JOIN ParsedParams AS pp WHERE CounterID = 1704509 ORDER BY UserID, EventTime, pp.Key1, pp.Key2 LIMIT 100; +SELECT UserID, EventTime::DateTime('Asia/Dubai'), pp.Key1, pp.Key2, ParsedParams.Key1 FROM test.hits ARRAY JOIN ParsedParams AS pp WHERE CounterID = 1704509 ORDER BY UserID, EventTime, pp.Key1, pp.Key2 LIMIT 100; +SELECT UserID, EventTime::DateTime('Asia/Dubai'), pp.Key1, pp.Key2, ParsedParams.Key1 FROM test.hits LEFT ARRAY JOIN ParsedParams AS pp WHERE CounterID = 1704509 ORDER BY UserID, EventTime, pp.Key1, pp.Key2 LIMIT 100; diff --git a/tests/queries/1_stateful/00084_external_aggregation.sql b/tests/queries/1_stateful/00084_external_aggregation.sql index b3922eae049..816d95f4b8b 100644 --- a/tests/queries/1_stateful/00084_external_aggregation.sql +++ b/tests/queries/1_stateful/00084_external_aggregation.sql @@ -1,3 +1,5 @@ +-- Tags: no-random-settings + SET max_bytes_before_external_group_by = 200000000; SET max_memory_usage = 1500000000; diff --git a/tests/queries/1_stateful/00091_prewhere_two_conditions.sql b/tests/queries/1_stateful/00091_prewhere_two_conditions.sql index c5952be83b6..1e476d3a27d 100644 --- a/tests/queries/1_stateful/00091_prewhere_two_conditions.sql +++ b/tests/queries/1_stateful/00091_prewhere_two_conditions.sql @@ -2,12 +2,12 @@ SET max_bytes_to_read = 600000000; SET optimize_move_to_prewhere = 1; -SELECT uniq(URL) FROM test.hits WHERE toTimeZone(EventTime, 'Europe/Moscow') >= '2014-03-20 00:00:00' AND toTimeZone(EventTime, 'Europe/Moscow') < '2014-03-21 00:00:00'; -SELECT uniq(URL) FROM test.hits WHERE toTimeZone(EventTime, 'Europe/Moscow') >= '2014-03-20 00:00:00' AND URL != '' AND toTimeZone(EventTime, 'Europe/Moscow') < '2014-03-21 00:00:00'; -SELECT uniq(*) FROM test.hits WHERE toTimeZone(EventTime, 'Europe/Moscow') >= '2014-03-20 00:00:00' AND toTimeZone(EventTime, 'Europe/Moscow') < '2014-03-21 00:00:00' AND EventDate = '2014-03-21'; -WITH toTimeZone(EventTime, 'Europe/Moscow') AS xyz SELECT uniq(*) FROM test.hits WHERE xyz >= '2014-03-20 00:00:00' AND xyz < '2014-03-21 00:00:00' AND EventDate = '2014-03-21'; +SELECT uniq(URL) FROM test.hits WHERE toTimeZone(EventTime, 'Asia/Dubai') >= '2014-03-20 00:00:00' AND toTimeZone(EventTime, 'Asia/Dubai') < '2014-03-21 00:00:00'; +SELECT uniq(URL) FROM test.hits WHERE toTimeZone(EventTime, 'Asia/Dubai') >= '2014-03-20 00:00:00' AND URL != '' AND toTimeZone(EventTime, 'Asia/Dubai') < '2014-03-21 00:00:00'; +SELECT uniq(*) FROM test.hits WHERE toTimeZone(EventTime, 'Asia/Dubai') >= '2014-03-20 00:00:00' AND toTimeZone(EventTime, 'Asia/Dubai') < '2014-03-21 00:00:00' AND EventDate = '2014-03-21'; +WITH toTimeZone(EventTime, 'Asia/Dubai') AS xyz SELECT uniq(*) FROM test.hits WHERE xyz >= '2014-03-20 00:00:00' AND xyz < '2014-03-21 00:00:00' AND EventDate = '2014-03-21'; SET optimize_move_to_prewhere = 0; -SELECT uniq(URL) FROM test.hits WHERE toTimeZone(EventTime, 'Europe/Moscow') >= '2014-03-20 00:00:00' AND toTimeZone(EventTime, 'Europe/Moscow') < '2014-03-21 00:00:00'; -- { serverError 307 } -SELECT uniq(URL) FROM test.hits WHERE toTimeZone(EventTime, 'Europe/Moscow') >= '2014-03-20 00:00:00' AND URL != '' AND toTimeZone(EventTime, 'Europe/Moscow') < '2014-03-21 00:00:00'; -- { serverError 307 } +SELECT uniq(URL) FROM test.hits WHERE toTimeZone(EventTime, 'Asia/Dubai') >= '2014-03-20 00:00:00' AND toTimeZone(EventTime, 'Asia/Dubai') < '2014-03-21 00:00:00'; -- { serverError 307 } +SELECT uniq(URL) FROM test.hits WHERE toTimeZone(EventTime, 'Asia/Dubai') >= '2014-03-20 00:00:00' AND URL != '' AND toTimeZone(EventTime, 'Asia/Dubai') < '2014-03-21 00:00:00'; -- { serverError 307 } diff --git a/tests/queries/1_stateful/00154_avro.sql b/tests/queries/1_stateful/00154_avro.sql index ea5d665a3b4..f608da629d2 100644 --- a/tests/queries/1_stateful/00154_avro.sql +++ b/tests/queries/1_stateful/00154_avro.sql @@ -2,7 +2,7 @@ DROP TABLE IF EXISTS test.avro; -SET max_threads = 1, max_block_size = 8192, min_insert_block_size_rows = 8192, min_insert_block_size_bytes = 1048576; -- lower memory usage +SET max_threads = 1, max_insert_threads = 0, max_block_size = 8192, min_insert_block_size_rows = 8192, min_insert_block_size_bytes = 1048576; -- lower memory usage CREATE TABLE test.avro AS test.hits ENGINE = File(Avro); INSERT INTO test.avro SELECT * FROM test.hits LIMIT 10000; diff --git a/tests/queries/1_stateful/00159_parallel_formatting_csv_and_friends.reference b/tests/queries/1_stateful/00159_parallel_formatting_csv_and_friends.reference index 04107d74341..3b7b346e7e8 100644 --- a/tests/queries/1_stateful/00159_parallel_formatting_csv_and_friends.reference +++ b/tests/queries/1_stateful/00159_parallel_formatting_csv_and_friends.reference @@ -1,8 +1,8 @@ CSV, false -ea1c740f03f5dcc43a3044528ad0a98f - +6929aaeac016d22c20464e3be38c64cd - CSV, true -ea1c740f03f5dcc43a3044528ad0a98f - +6929aaeac016d22c20464e3be38c64cd - CSVWithNames, false -e986f353467c87b07e7143d7bff2daff - +1610d7eac24fb923cd973c99ab7e3a8d - CSVWithNames, true -e986f353467c87b07e7143d7bff2daff - +1610d7eac24fb923cd973c99ab7e3a8d - diff --git a/tests/queries/1_stateful/00159_parallel_formatting_csv_and_friends.sh b/tests/queries/1_stateful/00159_parallel_formatting_csv_and_friends.sh index a6b5620812d..1476d2892bf 100755 --- a/tests/queries/1_stateful/00159_parallel_formatting_csv_and_friends.sh +++ b/tests/queries/1_stateful/00159_parallel_formatting_csv_and_friends.sh @@ -10,10 +10,10 @@ for format in "${FORMATS[@]}" do echo "$format, false"; $CLICKHOUSE_CLIENT --output_format_parallel_formatting=false -q \ - "SELECT ClientEventTime::DateTime('Europe/Moscow') as a, MobilePhoneModel as b, ClientIP6 as c FROM test.hits ORDER BY a, b, c Format $format" | md5sum + "SELECT ClientEventTime::DateTime('Asia/Dubai') as a, MobilePhoneModel as b, ClientIP6 as c FROM test.hits ORDER BY a, b, c Format $format" | md5sum echo "$format, true"; $CLICKHOUSE_CLIENT --output_format_parallel_formatting=true -q \ - "SELECT ClientEventTime::DateTime('Europe/Moscow') as a, MobilePhoneModel as b, ClientIP6 as c FROM test.hits ORDER BY a, b, c Format $format" | md5sum + "SELECT ClientEventTime::DateTime('Asia/Dubai') as a, MobilePhoneModel as b, ClientIP6 as c FROM test.hits ORDER BY a, b, c Format $format" | md5sum done diff --git a/tests/queries/1_stateful/00159_parallel_formatting_http.reference b/tests/queries/1_stateful/00159_parallel_formatting_http.reference index 8eabf5d4f03..34ecd115748 100644 --- a/tests/queries/1_stateful/00159_parallel_formatting_http.reference +++ b/tests/queries/1_stateful/00159_parallel_formatting_http.reference @@ -1,12 +1,12 @@ TSV, false -6e4ce4996dd0e036d27cb0d2166c8e59 - +9e0a1b1db4d1e56b4b571a8824dde35b - TSV, true -6e4ce4996dd0e036d27cb0d2166c8e59 - +9e0a1b1db4d1e56b4b571a8824dde35b - CSV, false -ab6b3616f31e8a952c802ca92562e418 - +c9c6f633a59d349f9f8a14ee2f1cb1b3 - CSV, true -ab6b3616f31e8a952c802ca92562e418 - +c9c6f633a59d349f9f8a14ee2f1cb1b3 - JSONCompactEachRow, false -1651b540b43bd6c62446f4c340bf13c7 - +826e244bd6c547b52955dd69df61ea22 - JSONCompactEachRow, true -1651b540b43bd6c62446f4c340bf13c7 - +826e244bd6c547b52955dd69df61ea22 - diff --git a/tests/queries/1_stateful/00159_parallel_formatting_http.sh b/tests/queries/1_stateful/00159_parallel_formatting_http.sh index 1dcae50812e..ea4a4d12867 100755 --- a/tests/queries/1_stateful/00159_parallel_formatting_http.sh +++ b/tests/queries/1_stateful/00159_parallel_formatting_http.sh @@ -10,8 +10,8 @@ FORMATS=('TSV' 'CSV' 'JSONCompactEachRow') for format in "${FORMATS[@]}" do echo "$format, false"; - ${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}&query=SELECT+ClientEventTime::DateTime('Europe/Moscow')+as+a,MobilePhoneModel+as+b,ClientIP6+as+c+FROM+test.hits+ORDER+BY+a,b,c+LIMIT+1000000+Format+$format&output_format_parallel_formatting=false" -d' ' | md5sum + ${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}&query=SELECT+ClientEventTime::DateTime('Asia/Dubai')+as+a,MobilePhoneModel+as+b,ClientIP6+as+c+FROM+test.hits+ORDER+BY+a,b,c+LIMIT+1000000+Format+$format&output_format_parallel_formatting=false" -d' ' | md5sum echo "$format, true"; - ${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}&query=SELECT+ClientEventTime::DateTime('Europe/Moscow')+as+a,MobilePhoneModel+as+b,ClientIP6+as+c+FROM+test.hits+ORDER+BY+a,b,c+LIMIT+1000000+Format+$format&output_format_parallel_formatting=true" -d' ' | md5sum + ${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}&query=SELECT+ClientEventTime::DateTime('Asia/Dubai')+as+a,MobilePhoneModel+as+b,ClientIP6+as+c+FROM+test.hits+ORDER+BY+a,b,c+LIMIT+1000000+Format+$format&output_format_parallel_formatting=true" -d' ' | md5sum done diff --git a/tests/queries/1_stateful/00159_parallel_formatting_json_and_friends.reference b/tests/queries/1_stateful/00159_parallel_formatting_json_and_friends.reference index 7ad5359a30e..42e69ea3a0d 100644 --- a/tests/queries/1_stateful/00159_parallel_formatting_json_and_friends.reference +++ b/tests/queries/1_stateful/00159_parallel_formatting_json_and_friends.reference @@ -1,28 +1,28 @@ JSONEachRow, false -e0a3c9978a92a277f2fff4664f3c1749 - +c6b89185cc5b3dff5d3779e2e1551b81 - JSONEachRow, true -e0a3c9978a92a277f2fff4664f3c1749 - +c6b89185cc5b3dff5d3779e2e1551b81 - JSONCompactEachRow, false -0c1efbbc25a5bd90a2ecea559d283667 - +5c838a00e22d943fa429c45106b7ff4d - JSONCompactEachRow, true -0c1efbbc25a5bd90a2ecea559d283667 - +5c838a00e22d943fa429c45106b7ff4d - JSONCompactStringsEachRow, false -0c1efbbc25a5bd90a2ecea559d283667 - +5c838a00e22d943fa429c45106b7ff4d - JSONCompactStringsEachRow, true -0c1efbbc25a5bd90a2ecea559d283667 - +5c838a00e22d943fa429c45106b7ff4d - JSONCompactEachRowWithNames, false -b9e4f8ecadbb650245d1762f4187ee0a - +e3231b1c8187de4da6752d692b2ddba9 - JSONCompactEachRowWithNames, true -b9e4f8ecadbb650245d1762f4187ee0a - +e3231b1c8187de4da6752d692b2ddba9 - JSONCompactStringsEachRowWithNames, false -b9e4f8ecadbb650245d1762f4187ee0a - +e3231b1c8187de4da6752d692b2ddba9 - JSONCompactStringsEachRowWithNames, true -b9e4f8ecadbb650245d1762f4187ee0a - +e3231b1c8187de4da6752d692b2ddba9 - JSONCompactEachRowWithNamesAndTypes, false -8b41f7375999b53d4c9607398456fe5b - +21302d11da0bf8d37ab599e28a51bac2 - JSONCompactEachRowWithNamesAndTypes, true -8b41f7375999b53d4c9607398456fe5b - +21302d11da0bf8d37ab599e28a51bac2 - JSONCompactStringsEachRowWithNamesAndTypes, false -8b41f7375999b53d4c9607398456fe5b - +21302d11da0bf8d37ab599e28a51bac2 - JSONCompactStringsEachRowWithNamesAndTypes, true -8b41f7375999b53d4c9607398456fe5b - +21302d11da0bf8d37ab599e28a51bac2 - diff --git a/tests/queries/1_stateful/00159_parallel_formatting_json_and_friends.sh b/tests/queries/1_stateful/00159_parallel_formatting_json_and_friends.sh index f6c87eabfde..a96ed0c9b96 100755 --- a/tests/queries/1_stateful/00159_parallel_formatting_json_and_friends.sh +++ b/tests/queries/1_stateful/00159_parallel_formatting_json_and_friends.sh @@ -12,9 +12,9 @@ for format in "${FORMATS[@]}" do echo "$format, false"; $CLICKHOUSE_CLIENT --output_format_parallel_formatting=false -q \ - "SELECT ClientEventTime::DateTime('Europe/Moscow') as a, MobilePhoneModel as b, ClientIP6 as c FROM test.hits ORDER BY a, b, c LIMIT 3000000 Format $format" | md5sum + "SELECT ClientEventTime::DateTime('Asia/Dubai') as a, MobilePhoneModel as b, ClientIP6 as c FROM test.hits ORDER BY a, b, c LIMIT 3000000 Format $format" | md5sum echo "$format, true"; $CLICKHOUSE_CLIENT --output_format_parallel_formatting=true -q \ - "SELECT ClientEventTime::DateTime('Europe/Moscow') as a, MobilePhoneModel as b, ClientIP6 as c FROM test.hits ORDER BY a, b, c LIMIT 3000000 Format $format" | md5sum + "SELECT ClientEventTime::DateTime('Asia/Dubai') as a, MobilePhoneModel as b, ClientIP6 as c FROM test.hits ORDER BY a, b, c LIMIT 3000000 Format $format" | md5sum done diff --git a/tests/queries/1_stateful/00159_parallel_formatting_tsv_and_friends.reference b/tests/queries/1_stateful/00159_parallel_formatting_tsv_and_friends.reference index 04d6db3b4af..91e3af03db8 100644 --- a/tests/queries/1_stateful/00159_parallel_formatting_tsv_and_friends.reference +++ b/tests/queries/1_stateful/00159_parallel_formatting_tsv_and_friends.reference @@ -1,12 +1,12 @@ TSV, false -8a984bbbfb127c430f67173f5371c6cb - +194d5061de4cae59489d989373f8effe - TSV, true -8a984bbbfb127c430f67173f5371c6cb - +194d5061de4cae59489d989373f8effe - TSVWithNames, false -ead321ed96754ff1aa39d112bc28c43d - +a6d327a3611288b3f973d00e6116f16e - TSVWithNames, true -ead321ed96754ff1aa39d112bc28c43d - +a6d327a3611288b3f973d00e6116f16e - TSKV, false -1735308ecea5c269846f36a55d5b335f - +c2e32a21c08aacf60bda21248ce4f73f - TSKV, true -1735308ecea5c269846f36a55d5b335f - +c2e32a21c08aacf60bda21248ce4f73f - diff --git a/tests/queries/1_stateful/00159_parallel_formatting_tsv_and_friends.sh b/tests/queries/1_stateful/00159_parallel_formatting_tsv_and_friends.sh index 02d083c0498..9d48774dd2d 100755 --- a/tests/queries/1_stateful/00159_parallel_formatting_tsv_and_friends.sh +++ b/tests/queries/1_stateful/00159_parallel_formatting_tsv_and_friends.sh @@ -11,9 +11,9 @@ for format in "${FORMATS[@]}" do echo "$format, false"; $CLICKHOUSE_CLIENT --output_format_parallel_formatting=false -q \ - "SELECT ClientEventTime::DateTime('Europe/Moscow') as a, MobilePhoneModel as b, ClientIP6 as c FROM test.hits ORDER BY a, b, c Format $format" | md5sum + "SELECT ClientEventTime::DateTime('Asia/Dubai') as a, MobilePhoneModel as b, ClientIP6 as c FROM test.hits ORDER BY a, b, c Format $format" | md5sum echo "$format, true"; $CLICKHOUSE_CLIENT --output_format_parallel_formatting=true -q \ - "SELECT ClientEventTime::DateTime('Europe/Moscow') as a, MobilePhoneModel as b, ClientIP6 as c FROM test.hits ORDER BY a, b, c Format $format" | md5sum + "SELECT ClientEventTime::DateTime('Asia/Dubai') as a, MobilePhoneModel as b, ClientIP6 as c FROM test.hits ORDER BY a, b, c Format $format" | md5sum done diff --git a/tests/queries/1_stateful/00161_parallel_parsing_with_names.reference b/tests/queries/1_stateful/00161_parallel_parsing_with_names.reference index fb0ba75c148..cd8c2e21b09 100644 --- a/tests/queries/1_stateful/00161_parallel_parsing_with_names.reference +++ b/tests/queries/1_stateful/00161_parallel_parsing_with_names.reference @@ -1,8 +1,8 @@ TSVWithNames, false -29caf86494f169d6339f6c5610b20731 - +0c6d493d47ff0aa1c6111c40c2b6cfcf - TSVWithNames, true -29caf86494f169d6339f6c5610b20731 - +0c6d493d47ff0aa1c6111c40c2b6cfcf - CSVWithNames, false -29caf86494f169d6339f6c5610b20731 - +0c6d493d47ff0aa1c6111c40c2b6cfcf - CSVWithNames, true -29caf86494f169d6339f6c5610b20731 - +0c6d493d47ff0aa1c6111c40c2b6cfcf - diff --git a/tests/queries/1_stateful/00161_parallel_parsing_with_names.sh b/tests/queries/1_stateful/00161_parallel_parsing_with_names.sh index 777d95fa0af..a1136a47319 100755 --- a/tests/queries/1_stateful/00161_parallel_parsing_with_names.sh +++ b/tests/queries/1_stateful/00161_parallel_parsing_with_names.sh @@ -10,21 +10,21 @@ $CLICKHOUSE_CLIENT -q "DROP TABLE IF EXISTS parsing_with_names" for format in "${FORMATS[@]}" do # Columns are permuted - $CLICKHOUSE_CLIENT -q "CREATE TABLE parsing_with_names(c FixedString(16), a DateTime('Europe/Moscow'), b String) ENGINE=Memory()" + $CLICKHOUSE_CLIENT -q "CREATE TABLE parsing_with_names(c FixedString(16), a DateTime('Asia/Dubai'), b String) ENGINE=Memory()" echo "$format, false"; $CLICKHOUSE_CLIENT --output_format_parallel_formatting=false -q \ - "SELECT URLRegions as d, toTimeZone(ClientEventTime, 'Europe/Moscow') as a, MobilePhoneModel as b, ParamPrice as e, ClientIP6 as c FROM test.hits LIMIT 50000 Format $format" | \ + "SELECT URLRegions as d, toTimeZone(ClientEventTime, 'Asia/Dubai') as a, MobilePhoneModel as b, ParamPrice as e, ClientIP6 as c FROM test.hits LIMIT 50000 Format $format" | \ $CLICKHOUSE_CLIENT --input_format_skip_unknown_fields=1 --input_format_parallel_parsing=false -q "INSERT INTO parsing_with_names FORMAT $format" $CLICKHOUSE_CLIENT -q "SELECT * FROM parsing_with_names;" | md5sum $CLICKHOUSE_CLIENT -q "DROP TABLE IF EXISTS parsing_with_names" - $CLICKHOUSE_CLIENT -q "CREATE TABLE parsing_with_names(c FixedString(16), a DateTime('Europe/Moscow'), b String) ENGINE=Memory()" + $CLICKHOUSE_CLIENT -q "CREATE TABLE parsing_with_names(c FixedString(16), a DateTime('Asia/Dubai'), b String) ENGINE=Memory()" echo "$format, true"; $CLICKHOUSE_CLIENT --output_format_parallel_formatting=false -q \ - "SELECT URLRegions as d, toTimeZone(ClientEventTime, 'Europe/Moscow') as a, MobilePhoneModel as b, ParamPrice as e, ClientIP6 as c FROM test.hits LIMIT 50000 Format $format" | \ + "SELECT URLRegions as d, toTimeZone(ClientEventTime, 'Asia/Dubai') as a, MobilePhoneModel as b, ParamPrice as e, ClientIP6 as c FROM test.hits LIMIT 50000 Format $format" | \ $CLICKHOUSE_CLIENT --input_format_skip_unknown_fields=1 --input_format_parallel_parsing=true -q "INSERT INTO parsing_with_names FORMAT $format" $CLICKHOUSE_CLIENT -q "SELECT * FROM parsing_with_names;" | md5sum diff --git a/tests/queries/1_stateful/00163_column_oriented_formats.reference b/tests/queries/1_stateful/00163_column_oriented_formats.reference index cb20aca4392..cf29a217fe4 100644 --- a/tests/queries/1_stateful/00163_column_oriented_formats.reference +++ b/tests/queries/1_stateful/00163_column_oriented_formats.reference @@ -1,12 +1,12 @@ Parquet -6b397d4643bc1f920f3eb8aa87ee180c - +093d0270733e505af52436f9df4a779f - 7fe6d8c57ddc5fe37bbdcb7f73c5fa78 - d8746733270cbeff7ab3550c9b944fb6 - Arrow -6b397d4643bc1f920f3eb8aa87ee180c - +093d0270733e505af52436f9df4a779f - 7fe6d8c57ddc5fe37bbdcb7f73c5fa78 - d8746733270cbeff7ab3550c9b944fb6 - ORC -6b397d4643bc1f920f3eb8aa87ee180c - +093d0270733e505af52436f9df4a779f - 7fe6d8c57ddc5fe37bbdcb7f73c5fa78 - d8746733270cbeff7ab3550c9b944fb6 - diff --git a/tests/queries/1_stateful/00163_column_oriented_formats.sh b/tests/queries/1_stateful/00163_column_oriented_formats.sh index 50ad20cbe92..803474c4fa7 100755 --- a/tests/queries/1_stateful/00163_column_oriented_formats.sh +++ b/tests/queries/1_stateful/00163_column_oriented_formats.sh @@ -11,7 +11,7 @@ for format in "${FORMATS[@]}" do echo $format $CLICKHOUSE_CLIENT -q "DROP TABLE IF EXISTS 00163_column_oriented SYNC" - $CLICKHOUSE_CLIENT -q "CREATE TABLE 00163_column_oriented(ClientEventTime DateTime('Europe/Moscow'), MobilePhoneModel String, ClientIP6 FixedString(16)) ENGINE=File($format)" + $CLICKHOUSE_CLIENT -q "CREATE TABLE 00163_column_oriented(ClientEventTime DateTime('Asia/Dubai'), MobilePhoneModel String, ClientIP6 FixedString(16)) ENGINE=File($format)" $CLICKHOUSE_CLIENT -q "INSERT INTO 00163_column_oriented SELECT ClientEventTime, MobilePhoneModel, ClientIP6 FROM test.hits ORDER BY ClientEventTime, MobilePhoneModel, ClientIP6 LIMIT 100" $CLICKHOUSE_CLIENT -q "SELECT ClientEventTime from 00163_column_oriented" | md5sum $CLICKHOUSE_CLIENT -q "SELECT MobilePhoneModel from 00163_column_oriented" | md5sum diff --git a/tests/queries/1_stateful/00167_parallel_parsing_with_names_and_types.reference b/tests/queries/1_stateful/00167_parallel_parsing_with_names_and_types.reference index 0c0367694b2..a2c69c24fa2 100644 --- a/tests/queries/1_stateful/00167_parallel_parsing_with_names_and_types.reference +++ b/tests/queries/1_stateful/00167_parallel_parsing_with_names_and_types.reference @@ -1,20 +1,20 @@ TSVWithNamesAndTypes, false -7c1feeaae418e502d66fcc8e31946f2e - +0bd9fe2bc50147cd260bb58457329385 - TSVWithNamesAndTypes, true -7c1feeaae418e502d66fcc8e31946f2e - +0bd9fe2bc50147cd260bb58457329385 - CSVWithNamesAndTypes, false -7c1feeaae418e502d66fcc8e31946f2e - +0bd9fe2bc50147cd260bb58457329385 - CSVWithNamesAndTypes, true -7c1feeaae418e502d66fcc8e31946f2e - +0bd9fe2bc50147cd260bb58457329385 - JSONStringsEachRow, false -7c1feeaae418e502d66fcc8e31946f2e - +0bd9fe2bc50147cd260bb58457329385 - JSONStringsEachRow, true -7c1feeaae418e502d66fcc8e31946f2e - +0bd9fe2bc50147cd260bb58457329385 - JSONCompactEachRowWithNamesAndTypes, false -7c1feeaae418e502d66fcc8e31946f2e - +0bd9fe2bc50147cd260bb58457329385 - JSONCompactEachRowWithNamesAndTypes, true -7c1feeaae418e502d66fcc8e31946f2e - +0bd9fe2bc50147cd260bb58457329385 - JSONCompactStringsEachRowWithNamesAndTypes, false -7c1feeaae418e502d66fcc8e31946f2e - +0bd9fe2bc50147cd260bb58457329385 - JSONCompactStringsEachRowWithNamesAndTypes, true -7c1feeaae418e502d66fcc8e31946f2e - +0bd9fe2bc50147cd260bb58457329385 - diff --git a/tests/queries/1_stateful/00167_parallel_parsing_with_names_and_types.sh b/tests/queries/1_stateful/00167_parallel_parsing_with_names_and_types.sh index 9fdca20d097..33562918f67 100755 --- a/tests/queries/1_stateful/00167_parallel_parsing_with_names_and_types.sh +++ b/tests/queries/1_stateful/00167_parallel_parsing_with_names_and_types.sh @@ -10,21 +10,21 @@ $CLICKHOUSE_CLIENT -q "DROP TABLE IF EXISTS parsing_with_names" for format in "${FORMATS[@]}" do # Columns are permuted - $CLICKHOUSE_CLIENT -q "CREATE TABLE parsing_with_names(c FixedString(16), a DateTime('Europe/Moscow'), b String) ENGINE=Memory()" + $CLICKHOUSE_CLIENT -q "CREATE TABLE parsing_with_names(c FixedString(16), a DateTime('Asia/Dubai'), b String) ENGINE=Memory()" echo "$format, false"; $CLICKHOUSE_CLIENT --output_format_parallel_formatting=false -q \ - "SELECT URLRegions as d, toTimeZone(ClientEventTime, 'Europe/Moscow') as a, MobilePhoneModel as b, ParamPrice as e, ClientIP6 as c FROM test.hits LIMIT 5000 Format $format" | \ + "SELECT URLRegions as d, toTimeZone(ClientEventTime, 'Asia/Dubai') as a, MobilePhoneModel as b, ParamPrice as e, ClientIP6 as c FROM test.hits LIMIT 5000 Format $format" | \ $CLICKHOUSE_CLIENT --input_format_skip_unknown_fields=1 --input_format_parallel_parsing=false -q "INSERT INTO parsing_with_names FORMAT $format SETTINGS input_format_null_as_default=0" $CLICKHOUSE_CLIENT -q "SELECT * FROM parsing_with_names;" | md5sum $CLICKHOUSE_CLIENT -q "DROP TABLE IF EXISTS parsing_with_names" - $CLICKHOUSE_CLIENT -q "CREATE TABLE parsing_with_names(c FixedString(16), a DateTime('Europe/Moscow'), b String) ENGINE=Memory()" + $CLICKHOUSE_CLIENT -q "CREATE TABLE parsing_with_names(c FixedString(16), a DateTime('Asia/Dubai'), b String) ENGINE=Memory()" echo "$format, true"; $CLICKHOUSE_CLIENT --output_format_parallel_formatting=false -q \ - "SELECT URLRegions as d, toTimeZone(ClientEventTime, 'Europe/Moscow') as a, MobilePhoneModel as b, ParamPrice as e, ClientIP6 as c FROM test.hits LIMIT 5000 Format $format" | \ + "SELECT URLRegions as d, toTimeZone(ClientEventTime, 'Asia/Dubai') as a, MobilePhoneModel as b, ParamPrice as e, ClientIP6 as c FROM test.hits LIMIT 5000 Format $format" | \ $CLICKHOUSE_CLIENT --input_format_skip_unknown_fields=1 --input_format_parallel_parsing=true -q "INSERT INTO parsing_with_names FORMAT $format SETTINGS input_format_null_as_default=0" $CLICKHOUSE_CLIENT -q "SELECT * FROM parsing_with_names;" | md5sum diff --git a/tests/queries/1_stateful/00168_parallel_processing_on_replicas_part_1.sh b/tests/queries/1_stateful/00168_parallel_processing_on_replicas_part_1.sh index 699700bcd3e..276fc0274c2 100755 --- a/tests/queries/1_stateful/00168_parallel_processing_on_replicas_part_1.sh +++ b/tests/queries/1_stateful/00168_parallel_processing_on_replicas_part_1.sh @@ -1,5 +1,5 @@ #!/usr/bin/env bash -# Tags: no-tsan +# Tags: no-tsan, no-random-settings CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh diff --git a/tests/queries/1_stateful/00170_s3_cache.reference b/tests/queries/1_stateful/00170_s3_cache.reference new file mode 100644 index 00000000000..9c9c3bc537f --- /dev/null +++ b/tests/queries/1_stateful/00170_s3_cache.reference @@ -0,0 +1,270 @@ +-- { echo } +SET max_memory_usage='20G'; +SELECT count() FROM test.hits_s3; +8873898 +SELECT count() FROM test.hits_s3 WHERE AdvEngineID != 0; +30641 +SELECT sum(AdvEngineID), count(), avg(ResolutionWidth) FROM test.hits_s3 ; +329039 8873898 1400.8565027454677 +SELECT sum(UserID) FROM test.hits_s3 ; +15358948234638402412 +SELECT uniq(UserID) FROM test.hits_s3 ; +120665 +SELECT uniq(SearchPhrase) FROM test.hits_s3 ; +132591 +SELECT min(EventDate), max(EventDate) FROM test.hits_s3 ; +2014-03-17 2014-03-23 +SELECT AdvEngineID, count() FROM test.hits_s3 WHERE AdvEngineID != 0 GROUP BY AdvEngineID ORDER BY AdvEngineID DESC; +62 7 +61 12 +58 83 +55 281 +52 454 +51 74 +50 353 +49 7 +48 224 +42 72 +41 76 +40 91 +35 2751 +32 141 +30 1832 +24 9 +22 3 +18 3 +16 1019 +12 1 +10 3 +4 10 +3 22948 +2 187 +SELECT RegionID, uniq(UserID) AS u FROM test.hits_s3 GROUP BY RegionID ORDER BY u DESC LIMIT 10; +196 9275 +8363 4624 +15887 4585 +241 4488 +207 3596 +3 3319 +12504 1594 +183 1592 +57 1251 +225 1177 +SELECT RegionID, sum(AdvEngineID), count() AS c, avg(ResolutionWidth), uniq(UserID) FROM test.hits_s3 GROUP BY RegionID ORDER BY c DESC LIMIT 10; +196 32570 1311992 1437.5239170665675 9275 +3 11425 428577 1424.2968801405582 3319 +241 8291 320659 1149.9956152797831 4488 +207 7360 285615 1264.5680093832607 3596 +15887 27514 197463 1392.8657064867848 4585 +8363 26522 197154 1361.9469247390364 4624 +183 13054 186914 1470.3840054784553 1592 +225 1817 164048 1404.8909831268898 1177 +40 1883 107154 1407.6735912798401 808 +57 2146 99424 1200.338721033151 1251 +SELECT MobilePhoneModel, uniq(UserID) AS u FROM test.hits_s3 WHERE MobilePhoneModel != '' GROUP BY MobilePhoneModel ORDER BY u DESC LIMIT 10; +S820_ROW 7616 +iPhone 2 6111 +LG Optimus 4134 +Samsung Galaxy 813 +iPad HD 7 604 +Sams 558 +Samsung Galaxy Note 501 +iPad 2 434 +iPhone S720 393 +iPad 10 FHD 306 +SELECT MobilePhone, MobilePhoneModel, uniq(UserID) AS u FROM test.hits_s3 WHERE MobilePhoneModel != '' GROUP BY MobilePhone, MobilePhoneModel ORDER BY u DESC LIMIT 10; +1 S820_ROW 7613 +7 iPhone 2 5993 +1 LG Optimus 4098 +5 Samsung Galaxy Note 499 +5 Sams 346 +5 Samsung Galaxy 273 +7 iPad HD 7 240 +5 iPad 213 +4 Sams 210 +7 Samsung Galaxy 189 +SELECT uniq(SearchPhrase), count() AS c FROM test.hits_s3 WHERE SearchPhrase != '' GROUP BY SearchPhrase ORDER BY c DESC LIMIT 10; +1 3567 +1 2402 +1 2166 +1 1848 +1 1659 +1 1549 +1 1480 +1 1247 +1 1112 +1 1091 +SELECT uniq(SearchPhrase), uniq(UserID) AS u FROM test.hits_s3 WHERE SearchPhrase != '' GROUP BY SearchPhrase ORDER BY u DESC LIMIT 10; +1 786 +1 479 +1 320 +1 188 +1 181 +1 174 +1 173 +1 162 +1 159 +1 141 +SELECT SearchEngineID, uniq(SearchPhrase), count() AS c FROM test.hits_s3 WHERE SearchPhrase != '' GROUP BY SearchEngineID, SearchPhrase ORDER BY c DESC LIMIT 10; +3 1 3490 +3 1 2166 +3 1 1599 +3 1 1549 +3 1 1530 +3 1 1442 +3 1 1247 +3 1 1112 +3 1 1091 +3 1 1064 +SELECT UserID, count() FROM test.hits_s3 GROUP BY UserID ORDER BY count() DESC LIMIT 10; +1205491256153864188 31519 +3228040076666004453 20688 +2543118835429830843 16329 +1961021224905272484 13484 +4322253409885123546 11186 +2034549784946942048 10970 +397859646441652491 8229 +8032089779962875762 8149 +1839265440135330496 7816 +5548175707459682622 7806 +SELECT UserID, uniq(SearchPhrase) as m, count() as c FROM test.hits_s3 GROUP BY UserID, SearchPhrase ORDER BY UserID, m, c DESC LIMIT 10; +2961521519262 1 56 +87878526839192 1 414 +87878526839192 1 15 +87878526839192 1 6 +87878526839192 1 6 +87878526839192 1 5 +87878526839192 1 5 +87878526839192 1 5 +87878526839192 1 4 +87878526839192 1 3 +SELECT UserID, uniq(SearchPhrase) as m, count() as c FROM test.hits_s3 GROUP BY UserID, SearchPhrase ORDER BY UserID, m, c LIMIT 10; +2961521519262 1 56 +87878526839192 1 1 +87878526839192 1 1 +87878526839192 1 1 +87878526839192 1 2 +87878526839192 1 3 +87878526839192 1 4 +87878526839192 1 5 +87878526839192 1 5 +87878526839192 1 5 +SELECT UserID, toMinute(EventTime) AS m, uniq(SearchPhrase) as u, count() as c FROM test.hits_s3 GROUP BY UserID, m, SearchPhrase ORDER BY UserID DESC LIMIT 10 FORMAT Null; +SELECT UserID FROM test.hits_s3 WHERE UserID = 12345678901234567890; +SELECT count() FROM test.hits_s3 WHERE URL LIKE '%metrika%'; +2348 +SELECT uniq(SearchPhrase) as u, max(URL) as m, count() AS c FROM test.hits_s3 WHERE URL LIKE '%metrika%' AND SearchPhrase != '' GROUP BY SearchPhrase ORDER BY u, m, c DESC LIMIT 10; +1 goal://delive/812metrika.com/kizi-bulochkomna 4 +1 goal://delive/812metrika.com/kizi-bulochkomna 2 +1 goal://delive/812metrika.com/kizi-bulochkomna 2 +1 goal://delive/812metrika.com/kizi-bulochkomna 2 +1 goal://mail.yandex.ru/yrs/ekonometrika/kermosure-batakte 2 +1 http:%2F%2F%2F2014/03/18/cid=54&metrika.com 1 +1 http:%2F%2Ffiles&order=0&metrikancy-podar 1 +1 http:%2F%2Fiteme.metrika 1 +1 http:%2F%2Fproduct/shop.rbc.ru/rostometrikatuvali-k-pensadabuga/nauka_30_m_610_730641%2F01%2Fannovsk/dom-drugie_zhalujsta-s-social 1 +1 http:%2F%2Fwww.kirovanny/donnel_mart]=creative=0&metrika.ru/socialog 1 +SELECT uniq(SearchPhrase), max(URL), max(Title), count() AS c, uniq(UserID) FROM test.hits_s3 WHERE Title LIKE '%Яндекс%' AND URL NOT LIKE '%.yandex.%' AND SearchPhrase != '' GROUP BY SearchPhrase ORDER BY c DESC LIMIT 10; +1 http://korer.ru/categories.ru/?vkb Яндекс: нашлось 184 тыс изображений програница 27 тыс. ответов в России - 1245 1 +1 http://korer.ru/categories.ru/?vkb Яндекс.Картинках, поиск на AVITO.ru • Знакомства вакансии на дом электриса 710 1 +1 http://yandsearch[run][min]=200 одного подаров в Краснодателя » Страница 2 - современно в Яндекс: нашлось 8 мартфонарнажатие и последник Красность рисунки на AVITO.ru. Часы VU+ Uno 696 310 +1 http://korer.ru/categories.ru/?vkb Яндекс: нашем качествует о тебя не следников PRAJNA Cerator.org.com / Shopogody - Read izle, Diva.BY 668 1 +1 http://yandex.ru/chechristana.ru/clck/jsredircnt=1377554 Яндекс.Новости в Санкт-Петербурге: 228-135 тыс. ответов цифр трудников на Весная 572 1 +1 https://dns-state=AiuY0DBWFJ4ePaEs статися водят? - Испании туре за неделки игрушенко — Ирина домашних услуг Россия) - Яндекс: нашлось 236 тыс изображений 546 54 +1 http://korer.ru/categories.ru/?vkb Яндекс.Новоришь всё о купить модели Виннис, ЧП. Соболєв і 457 1 +1 https://my.mail.ru/appliancePotr 芒果 | ТЕЛЕГРАФ - Яндекс.Почта Mail.Ru: Из-за смотреть 439 221 +1 http://korer.ru/categories.ru/?vkb Продажа плании онлайн бесплатно в Яндекс.Маркетинг - новости менеджера, 61 438 1 +1 http://korer.ru/categories.ru/?vkb Яндекс: нашем качестве: почалась 396 Hp) 5-dr 200000 для зимние восписок тили 395 1 +SELECT * FROM test.hits_s3 WHERE URL LIKE '%metrika%' ORDER BY EventTime LIMIT 10 format Null; +SELECT SearchPhrase FROM test.hits_s3 WHERE SearchPhrase != '' ORDER BY EventTime LIMIT 10 FORMAT Null; +SELECT SearchPhrase FROM test.hits_s3 WHERE SearchPhrase != '' ORDER BY SearchPhrase LIMIT 10 FORMAT Null; +SELECT SearchPhrase FROM test.hits_s3 WHERE SearchPhrase != '' ORDER BY EventTime, SearchPhrase LIMIT 10 FORMAT Null; +SELECT CounterID, avg(length(URL)) AS l, count() AS c FROM test.hits_s3 WHERE URL != '' GROUP BY CounterID HAVING c > 100000 ORDER BY l DESC LIMIT 25; +25703952 185.35847185332617 147211 +732797 145.03929351646454 475142 +792887 123.97688315087015 252197 +3807842 78.46108053235935 196033 +1704509 60.11621475966243 523264 +598875 20.267298451681793 337140 +SELECT domainWithoutWWW(Referer) AS key, avg(length(Referer)) AS l, count() AS c, max(Referer) FROM test.hits_s3 WHERE Referer != '' GROUP BY key HAVING c > 100000 ORDER BY l DESC LIMIT 25; +vk.com.ua 670.6812170535467 205447 https://vk.com.ua/health.mail.yandsearch?lr=213&msid=87&redircnt=1310461&with_photorcycle/users/424246b7dcbba51/offers +avito.ru 89.56139198679928 243623 https://avito.ru/стих по биатлона +vk.com 88.93009846053418 680171 https://vk.com/video +yandex.ru 85.79982623523495 554773 https://yandex.ru/yandsearch + 81.39774471008556 2237229 httpvmkNCAErJlhPSHlqdmtsWFc4MXZtLUR1Q3Y9tM8jq5BkkHRyeFVKWTEJ6dE9iQnYCex9 +m.auto.ru 58.542011573622986 118027 https://m.auto.ru/yoshka-sokaklari-60.html#/battle-ru11 +SELECT sum(ResolutionWidth), sum(ResolutionWidth + 1), sum(ResolutionWidth + 2), sum(ResolutionWidth + 3), sum(ResolutionWidth + 4), sum(ResolutionWidth + 5), sum(ResolutionWidth + 6), sum(ResolutionWidth + 7), sum(ResolutionWidth + 8), sum(ResolutionWidth + 9), sum(ResolutionWidth + 10), sum(ResolutionWidth + 11), sum(ResolutionWidth + 12), sum(ResolutionWidth + 13), sum(ResolutionWidth + 14), sum(ResolutionWidth + 15), sum(ResolutionWidth + 16), sum(ResolutionWidth + 17), sum(ResolutionWidth + 18), sum(ResolutionWidth + 19), sum(ResolutionWidth + 20), sum(ResolutionWidth + 21), sum(ResolutionWidth + 22), sum(ResolutionWidth + 23), sum(ResolutionWidth + 24), sum(ResolutionWidth + 25), sum(ResolutionWidth + 26), sum(ResolutionWidth + 27), sum(ResolutionWidth + 28), sum(ResolutionWidth + 29), sum(ResolutionWidth + 30), sum(ResolutionWidth + 31), sum(ResolutionWidth + 32), sum(ResolutionWidth + 33), sum(ResolutionWidth + 34), sum(ResolutionWidth + 35), sum(ResolutionWidth + 36), sum(ResolutionWidth + 37), sum(ResolutionWidth + 38), sum(ResolutionWidth + 39), sum(ResolutionWidth + 40), sum(ResolutionWidth + 41), sum(ResolutionWidth + 42), sum(ResolutionWidth + 43), sum(ResolutionWidth + 44), sum(ResolutionWidth + 45), sum(ResolutionWidth + 46), sum(ResolutionWidth + 47), sum(ResolutionWidth + 48), sum(ResolutionWidth + 49), sum(ResolutionWidth + 50), sum(ResolutionWidth + 51), sum(ResolutionWidth + 52), sum(ResolutionWidth + 53), sum(ResolutionWidth + 54), sum(ResolutionWidth + 55), sum(ResolutionWidth + 56), sum(ResolutionWidth + 57), sum(ResolutionWidth + 58), sum(ResolutionWidth + 59), sum(ResolutionWidth + 60), sum(ResolutionWidth + 61), sum(ResolutionWidth + 62), sum(ResolutionWidth + 63), sum(ResolutionWidth + 64), sum(ResolutionWidth + 65), sum(ResolutionWidth + 66), sum(ResolutionWidth + 67), sum(ResolutionWidth + 68), sum(ResolutionWidth + 69), sum(ResolutionWidth + 70), sum(ResolutionWidth + 71), sum(ResolutionWidth + 72), sum(ResolutionWidth + 73), sum(ResolutionWidth + 74), sum(ResolutionWidth + 75), sum(ResolutionWidth + 76), sum(ResolutionWidth + 77), sum(ResolutionWidth + 78), sum(ResolutionWidth + 79), sum(ResolutionWidth + 80), sum(ResolutionWidth + 81), sum(ResolutionWidth + 82), sum(ResolutionWidth + 83), sum(ResolutionWidth + 84), sum(ResolutionWidth + 85), sum(ResolutionWidth + 86), sum(ResolutionWidth + 87), sum(ResolutionWidth + 88), sum(ResolutionWidth + 89) FROM test.hits_s3; +12431057718 12439931616 12448805514 12457679412 12466553310 12475427208 12484301106 12493175004 12502048902 12510922800 12519796698 12528670596 12537544494 12546418392 12555292290 12564166188 12573040086 12581913984 12590787882 12599661780 12608535678 12617409576 12626283474 12635157372 12644031270 12652905168 12661779066 12670652964 12679526862 12688400760 12697274658 12706148556 12715022454 12723896352 12732770250 12741644148 12750518046 12759391944 12768265842 12777139740 12786013638 12794887536 12803761434 12812635332 12821509230 12830383128 12839257026 12848130924 12857004822 12865878720 12874752618 12883626516 12892500414 12901374312 12910248210 12919122108 12927996006 12936869904 12945743802 12954617700 12963491598 12972365496 12981239394 12990113292 12998987190 13007861088 13016734986 13025608884 13034482782 13043356680 13052230578 13061104476 13069978374 13078852272 13087726170 13096600068 13105473966 13114347864 13123221762 13132095660 13140969558 13149843456 13158717354 13167591252 13176465150 13185339048 13194212946 13203086844 13211960742 13220834640 +SELECT SearchEngineID, ClientIP, count() AS c, sum(Refresh), avg(ResolutionWidth) FROM test.hits_s3 WHERE SearchPhrase != '' GROUP BY SearchEngineID, ClientIP ORDER BY c DESC LIMIT 10; +3 1660732911 2564 21 1339 +3 1795610432 1808 49 1622 +3 442614592 1801 63 1622 +3 280750947 1722 92 1339 +3 1794713726 1565 143 1297 +3 2122160434 1449 29 1846 +3 2120191779 1431 117 1339 +3 3726560380 1338 37 1339 +3 1382059522 1212 25 1386 +3 2454020642 1108 25 1339 +SELECT WatchID, ClientIP, count() AS c, sum(Refresh), avg(ResolutionWidth) FROM test.hits_s3 WHERE SearchPhrase != '' GROUP BY WatchID, ClientIP ORDER BY c, WatchID DESC LIMIT 10; +9223343978848462524 807160513 1 0 1339 +9223311592760478486 622798371 1 0 1622 +9223290551912005343 1399751135 1 0 1386 +9223283743622263900 4248624768 1 0 1339 +9223277679551805964 2079360072 1 0 1639 +9223250576755718785 471654323 1 0 1622 +9223247301332594153 2030669591 1 0 1297 +9223246228500137980 2156909056 1 0 467 +9223227691645120897 91683468 1 0 1846 +9223220893120643152 1357136342 1 0 1297 +SELECT WatchID, ClientIP, count() AS c, sum(Refresh), avg(ResolutionWidth) FROM test.hits_s3 GROUP BY WatchID, ClientIP ORDER BY c, WatchID DESC LIMIT 10; +9223371678237104442 1510763633 1 0 1622 +9223371583739401906 1316647510 1 0 1587 +9223369973176670469 1581144184 1 0 1297 +9223369447059354172 1759910327 1 0 1339 +9223368297061364285 1900808651 1 0 1339 +9223367627527921417 1250879542 1 0 1587 +9223367120605710467 818965311 1 0 1622 +9223365068732217887 287613368 1 0 1386 +9223364444623921469 697478885 1 0 1622 +9223363407092000972 76513606 1 0 1297 +SELECT URL, count() AS c FROM test.hits_s3 GROUP BY URL ORDER BY c DESC LIMIT 10; +http://public_search 311119 +http://auto.ru/chatay-barana.ru/traction.html#maybettaya 189442 +http://korer.ru/categories.ru/?vkb 142669 +http://main=hurriyet.com/iframe/frm_index.ru/photofunki-sayesilcipo-showthredir?from=&seatsTo=&purchynet.com/galaxy-nosti.ru/preso.tv/Archi.shtml?002 122598 +http://korablitz.ru/L_1OFFERS_CRD 45069 +http://bravoslava-230v 32907 +http://images.yandex.ru 22100 +http://doc/00003713844324&education.html?logi-38-rasstreferer_id 21145 +http://rutube.ru/patianu 19064 +http://search?win=11&pos=22&img_url=http:%2F%2Fcs411276 19060 +SELECT 1, URL, count() AS c FROM test.hits_s3 GROUP BY 1, URL ORDER BY c DESC LIMIT 10; +1 http://public_search 311119 +1 http://auto.ru/chatay-barana.ru/traction.html#maybettaya 189442 +1 http://korer.ru/categories.ru/?vkb 142669 +1 http://main=hurriyet.com/iframe/frm_index.ru/photofunki-sayesilcipo-showthredir?from=&seatsTo=&purchynet.com/galaxy-nosti.ru/preso.tv/Archi.shtml?002 122598 +1 http://korablitz.ru/L_1OFFERS_CRD 45069 +1 http://bravoslava-230v 32907 +1 http://images.yandex.ru 22100 +1 http://doc/00003713844324&education.html?logi-38-rasstreferer_id 21145 +1 http://rutube.ru/patianu 19064 +1 http://search?win=11&pos=22&img_url=http:%2F%2Fcs411276 19060 +SELECT ClientIP AS x, x - 1, x - 2, x - 3, count() AS c FROM test.hits_s3 GROUP BY x, x - 1, x - 2, x - 3 ORDER BY c DESC LIMIT 10; +2950145570 2950145569 2950145568 2950145567 8149 +2408492821 2408492820 2408492819 2408492818 7770 +2494028488 2494028487 2494028486 2494028485 7696 +1688720600 1688720599 1688720598 1688720597 7681 +356903718 356903717 356903716 356903715 6817 +908127740 908127739 908127738 908127737 6624 +45907785 45907784 45907783 45907782 6556 +1567954933 1567954932 1567954931 1567954930 6203 +406416527 406416526 406416525 406416524 6015 +1410634230 1410634229 1410634228 1410634227 5742 +SELECT URL, count() AS PageViews FROM test.hits_s3 WHERE CounterID = 62 AND EventDate >= '2013-07-01' AND EventDate <= '2013-07-31' AND NOT DontCountHits AND NOT Refresh AND notEmpty(URL) GROUP BY URL ORDER BY PageViews DESC LIMIT 10; +SELECT Title, count() AS PageViews FROM test.hits_s3 WHERE CounterID = 62 AND EventDate >= '2013-07-01' AND EventDate <= '2013-07-31' AND NOT DontCountHits AND NOT Refresh AND notEmpty(Title) GROUP BY Title ORDER BY PageViews, Title DESC LIMIT 10; +SELECT URL, count() AS PageViews FROM test.hits_s3 WHERE CounterID = 62 AND EventDate >= '2013-07-01' AND EventDate <= '2013-07-31' AND NOT Refresh AND IsLink AND NOT IsDownload GROUP BY URL ORDER BY PageViews DESC LIMIT 1000; +SELECT TraficSourceID, SearchEngineID, AdvEngineID, ((SearchEngineID = 0 AND AdvEngineID = 0) ? Referer : '') AS Src, URL AS Dst, count() AS PageViews FROM test.hits_s3 WHERE CounterID = 62 AND EventDate >= '2013-07-01' AND EventDate <= '2013-07-31' AND NOT Refresh GROUP BY TraficSourceID, SearchEngineID, AdvEngineID, Src, Dst ORDER BY PageViews, TraficSourceID DESC LIMIT 1000; +SELECT URLHash, EventDate, count() AS PageViews FROM test.hits_s3 WHERE CounterID = 62 AND EventDate >= '2013-07-01' AND EventDate <= '2013-07-31' AND NOT Refresh AND TraficSourceID IN (-1, 6) AND RefererHash = halfMD5('http://example.ru/') GROUP BY URLHash, EventDate ORDER BY PageViews DESC LIMIT 100; +SELECT WindowClientWidth, WindowClientHeight, count() AS PageViews FROM test.hits_s3 WHERE CounterID = 62 AND EventDate >= '2013-07-01' AND EventDate <= '2013-07-31' AND NOT Refresh AND NOT DontCountHits AND URLHash = halfMD5('http://example.ru/') GROUP BY WindowClientWidth, WindowClientHeight ORDER BY PageViews DESC LIMIT 10000; +SELECT toStartOfMinute(EventTime) AS Minute, count() AS PageViews FROM test.hits_s3 WHERE CounterID = 62 AND EventDate >= '2013-07-01' AND EventDate <= '2013-07-02' AND NOT Refresh AND NOT DontCountHits GROUP BY Minute ORDER BY Minute; diff --git a/tests/queries/1_stateful/00170_s3_cache.sql b/tests/queries/1_stateful/00170_s3_cache.sql new file mode 100644 index 00000000000..af3fd402596 --- /dev/null +++ b/tests/queries/1_stateful/00170_s3_cache.sql @@ -0,0 +1,45 @@ +-- { echo } +SET max_memory_usage='20G'; +SELECT count() FROM test.hits_s3; +SELECT count() FROM test.hits_s3 WHERE AdvEngineID != 0; +SELECT sum(AdvEngineID), count(), avg(ResolutionWidth) FROM test.hits_s3 ; +SELECT sum(UserID) FROM test.hits_s3 ; +SELECT uniq(UserID) FROM test.hits_s3 ; +SELECT uniq(SearchPhrase) FROM test.hits_s3 ; +SELECT min(EventDate), max(EventDate) FROM test.hits_s3 ; +SELECT AdvEngineID, count() FROM test.hits_s3 WHERE AdvEngineID != 0 GROUP BY AdvEngineID ORDER BY AdvEngineID DESC; +SELECT RegionID, uniq(UserID) AS u FROM test.hits_s3 GROUP BY RegionID ORDER BY u DESC LIMIT 10; +SELECT RegionID, sum(AdvEngineID), count() AS c, avg(ResolutionWidth), uniq(UserID) FROM test.hits_s3 GROUP BY RegionID ORDER BY c DESC LIMIT 10; +SELECT MobilePhoneModel, uniq(UserID) AS u FROM test.hits_s3 WHERE MobilePhoneModel != '' GROUP BY MobilePhoneModel ORDER BY u DESC LIMIT 10; +SELECT MobilePhone, MobilePhoneModel, uniq(UserID) AS u FROM test.hits_s3 WHERE MobilePhoneModel != '' GROUP BY MobilePhone, MobilePhoneModel ORDER BY u DESC LIMIT 10; +SELECT uniq(SearchPhrase), count() AS c FROM test.hits_s3 WHERE SearchPhrase != '' GROUP BY SearchPhrase ORDER BY c DESC LIMIT 10; +SELECT uniq(SearchPhrase), uniq(UserID) AS u FROM test.hits_s3 WHERE SearchPhrase != '' GROUP BY SearchPhrase ORDER BY u DESC LIMIT 10; +SELECT SearchEngineID, uniq(SearchPhrase), count() AS c FROM test.hits_s3 WHERE SearchPhrase != '' GROUP BY SearchEngineID, SearchPhrase ORDER BY c DESC LIMIT 10; +SELECT UserID, count() FROM test.hits_s3 GROUP BY UserID ORDER BY count() DESC LIMIT 10; +SELECT UserID, uniq(SearchPhrase) as m, count() as c FROM test.hits_s3 GROUP BY UserID, SearchPhrase ORDER BY UserID, m, c DESC LIMIT 10; +SELECT UserID, uniq(SearchPhrase) as m, count() as c FROM test.hits_s3 GROUP BY UserID, SearchPhrase ORDER BY UserID, m, c LIMIT 10; +SELECT UserID, toMinute(EventTime) AS m, uniq(SearchPhrase) as u, count() as c FROM test.hits_s3 GROUP BY UserID, m, SearchPhrase ORDER BY UserID DESC LIMIT 10 FORMAT Null; +SELECT UserID FROM test.hits_s3 WHERE UserID = 12345678901234567890; +SELECT count() FROM test.hits_s3 WHERE URL LIKE '%metrika%'; +SELECT uniq(SearchPhrase) as u, max(URL) as m, count() AS c FROM test.hits_s3 WHERE URL LIKE '%metrika%' AND SearchPhrase != '' GROUP BY SearchPhrase ORDER BY u, m, c DESC LIMIT 10; +SELECT uniq(SearchPhrase), max(URL), max(Title), count() AS c, uniq(UserID) FROM test.hits_s3 WHERE Title LIKE '%Яндекс%' AND URL NOT LIKE '%.yandex.%' AND SearchPhrase != '' GROUP BY SearchPhrase ORDER BY c DESC LIMIT 10; +SELECT * FROM test.hits_s3 WHERE URL LIKE '%metrika%' ORDER BY EventTime LIMIT 10 format Null; +SELECT SearchPhrase FROM test.hits_s3 WHERE SearchPhrase != '' ORDER BY EventTime LIMIT 10 FORMAT Null; +SELECT SearchPhrase FROM test.hits_s3 WHERE SearchPhrase != '' ORDER BY SearchPhrase LIMIT 10 FORMAT Null; +SELECT SearchPhrase FROM test.hits_s3 WHERE SearchPhrase != '' ORDER BY EventTime, SearchPhrase LIMIT 10 FORMAT Null; +SELECT CounterID, avg(length(URL)) AS l, count() AS c FROM test.hits_s3 WHERE URL != '' GROUP BY CounterID HAVING c > 100000 ORDER BY l DESC LIMIT 25; +SELECT domainWithoutWWW(Referer) AS key, avg(length(Referer)) AS l, count() AS c, max(Referer) FROM test.hits_s3 WHERE Referer != '' GROUP BY key HAVING c > 100000 ORDER BY l DESC LIMIT 25; +SELECT sum(ResolutionWidth), sum(ResolutionWidth + 1), sum(ResolutionWidth + 2), sum(ResolutionWidth + 3), sum(ResolutionWidth + 4), sum(ResolutionWidth + 5), sum(ResolutionWidth + 6), sum(ResolutionWidth + 7), sum(ResolutionWidth + 8), sum(ResolutionWidth + 9), sum(ResolutionWidth + 10), sum(ResolutionWidth + 11), sum(ResolutionWidth + 12), sum(ResolutionWidth + 13), sum(ResolutionWidth + 14), sum(ResolutionWidth + 15), sum(ResolutionWidth + 16), sum(ResolutionWidth + 17), sum(ResolutionWidth + 18), sum(ResolutionWidth + 19), sum(ResolutionWidth + 20), sum(ResolutionWidth + 21), sum(ResolutionWidth + 22), sum(ResolutionWidth + 23), sum(ResolutionWidth + 24), sum(ResolutionWidth + 25), sum(ResolutionWidth + 26), sum(ResolutionWidth + 27), sum(ResolutionWidth + 28), sum(ResolutionWidth + 29), sum(ResolutionWidth + 30), sum(ResolutionWidth + 31), sum(ResolutionWidth + 32), sum(ResolutionWidth + 33), sum(ResolutionWidth + 34), sum(ResolutionWidth + 35), sum(ResolutionWidth + 36), sum(ResolutionWidth + 37), sum(ResolutionWidth + 38), sum(ResolutionWidth + 39), sum(ResolutionWidth + 40), sum(ResolutionWidth + 41), sum(ResolutionWidth + 42), sum(ResolutionWidth + 43), sum(ResolutionWidth + 44), sum(ResolutionWidth + 45), sum(ResolutionWidth + 46), sum(ResolutionWidth + 47), sum(ResolutionWidth + 48), sum(ResolutionWidth + 49), sum(ResolutionWidth + 50), sum(ResolutionWidth + 51), sum(ResolutionWidth + 52), sum(ResolutionWidth + 53), sum(ResolutionWidth + 54), sum(ResolutionWidth + 55), sum(ResolutionWidth + 56), sum(ResolutionWidth + 57), sum(ResolutionWidth + 58), sum(ResolutionWidth + 59), sum(ResolutionWidth + 60), sum(ResolutionWidth + 61), sum(ResolutionWidth + 62), sum(ResolutionWidth + 63), sum(ResolutionWidth + 64), sum(ResolutionWidth + 65), sum(ResolutionWidth + 66), sum(ResolutionWidth + 67), sum(ResolutionWidth + 68), sum(ResolutionWidth + 69), sum(ResolutionWidth + 70), sum(ResolutionWidth + 71), sum(ResolutionWidth + 72), sum(ResolutionWidth + 73), sum(ResolutionWidth + 74), sum(ResolutionWidth + 75), sum(ResolutionWidth + 76), sum(ResolutionWidth + 77), sum(ResolutionWidth + 78), sum(ResolutionWidth + 79), sum(ResolutionWidth + 80), sum(ResolutionWidth + 81), sum(ResolutionWidth + 82), sum(ResolutionWidth + 83), sum(ResolutionWidth + 84), sum(ResolutionWidth + 85), sum(ResolutionWidth + 86), sum(ResolutionWidth + 87), sum(ResolutionWidth + 88), sum(ResolutionWidth + 89) FROM test.hits_s3; +SELECT SearchEngineID, ClientIP, count() AS c, sum(Refresh), avg(ResolutionWidth) FROM test.hits_s3 WHERE SearchPhrase != '' GROUP BY SearchEngineID, ClientIP ORDER BY c DESC LIMIT 10; +SELECT WatchID, ClientIP, count() AS c, sum(Refresh), avg(ResolutionWidth) FROM test.hits_s3 WHERE SearchPhrase != '' GROUP BY WatchID, ClientIP ORDER BY c, WatchID DESC LIMIT 10; +SELECT WatchID, ClientIP, count() AS c, sum(Refresh), avg(ResolutionWidth) FROM test.hits_s3 GROUP BY WatchID, ClientIP ORDER BY c, WatchID DESC LIMIT 10; +SELECT URL, count() AS c FROM test.hits_s3 GROUP BY URL ORDER BY c DESC LIMIT 10; +SELECT 1, URL, count() AS c FROM test.hits_s3 GROUP BY 1, URL ORDER BY c DESC LIMIT 10; +SELECT ClientIP AS x, x - 1, x - 2, x - 3, count() AS c FROM test.hits_s3 GROUP BY x, x - 1, x - 2, x - 3 ORDER BY c DESC LIMIT 10; +SELECT URL, count() AS PageViews FROM test.hits_s3 WHERE CounterID = 62 AND EventDate >= '2013-07-01' AND EventDate <= '2013-07-31' AND NOT DontCountHits AND NOT Refresh AND notEmpty(URL) GROUP BY URL ORDER BY PageViews DESC LIMIT 10; +SELECT Title, count() AS PageViews FROM test.hits_s3 WHERE CounterID = 62 AND EventDate >= '2013-07-01' AND EventDate <= '2013-07-31' AND NOT DontCountHits AND NOT Refresh AND notEmpty(Title) GROUP BY Title ORDER BY PageViews, Title DESC LIMIT 10; +SELECT URL, count() AS PageViews FROM test.hits_s3 WHERE CounterID = 62 AND EventDate >= '2013-07-01' AND EventDate <= '2013-07-31' AND NOT Refresh AND IsLink AND NOT IsDownload GROUP BY URL ORDER BY PageViews DESC LIMIT 1000; +SELECT TraficSourceID, SearchEngineID, AdvEngineID, ((SearchEngineID = 0 AND AdvEngineID = 0) ? Referer : '') AS Src, URL AS Dst, count() AS PageViews FROM test.hits_s3 WHERE CounterID = 62 AND EventDate >= '2013-07-01' AND EventDate <= '2013-07-31' AND NOT Refresh GROUP BY TraficSourceID, SearchEngineID, AdvEngineID, Src, Dst ORDER BY PageViews, TraficSourceID DESC LIMIT 1000; +SELECT URLHash, EventDate, count() AS PageViews FROM test.hits_s3 WHERE CounterID = 62 AND EventDate >= '2013-07-01' AND EventDate <= '2013-07-31' AND NOT Refresh AND TraficSourceID IN (-1, 6) AND RefererHash = halfMD5('http://example.ru/') GROUP BY URLHash, EventDate ORDER BY PageViews DESC LIMIT 100; +SELECT WindowClientWidth, WindowClientHeight, count() AS PageViews FROM test.hits_s3 WHERE CounterID = 62 AND EventDate >= '2013-07-01' AND EventDate <= '2013-07-31' AND NOT Refresh AND NOT DontCountHits AND URLHash = halfMD5('http://example.ru/') GROUP BY WindowClientWidth, WindowClientHeight ORDER BY PageViews DESC LIMIT 10000; +SELECT toStartOfMinute(EventTime) AS Minute, count() AS PageViews FROM test.hits_s3 WHERE CounterID = 62 AND EventDate >= '2013-07-01' AND EventDate <= '2013-07-02' AND NOT Refresh AND NOT DontCountHits GROUP BY Minute ORDER BY Minute; diff --git a/tests/queries/bugs/position_case_insensitive_utf8.sql b/tests/queries/bugs/position_case_insensitive_utf8.sql deleted file mode 100644 index 00ddd1b498d..00000000000 --- a/tests/queries/bugs/position_case_insensitive_utf8.sql +++ /dev/null @@ -1,2 +0,0 @@ -SELECT positionCaseInsensitiveUTF8('Hello', materialize('%\xF0%')); -SELECT positionCaseInsensitiveUTF8(materialize('Hello'), '%\xF0%') FROM numbers(1000); diff --git a/tests/testflows/aes_encryption/configs/clickhouse/common.xml b/tests/testflows/aes_encryption/configs/clickhouse/common.xml deleted file mode 100644 index 31fa972199f..00000000000 --- a/tests/testflows/aes_encryption/configs/clickhouse/common.xml +++ /dev/null @@ -1,6 +0,0 @@ - - Europe/Moscow - 0.0.0.0 - /var/lib/clickhouse/ - /var/lib/clickhouse/tmp/ - diff --git a/tests/testflows/aes_encryption/configs/clickhouse/config.xml b/tests/testflows/aes_encryption/configs/clickhouse/config.xml deleted file mode 100644 index 9854f9f990e..00000000000 --- a/tests/testflows/aes_encryption/configs/clickhouse/config.xml +++ /dev/null @@ -1,436 +0,0 @@ - - - - - - trace - /var/log/clickhouse-server/clickhouse-server.log - /var/log/clickhouse-server/clickhouse-server.err.log - 1000M - 10 - - - - 8123 - 9000 - - - - - - - - - /etc/clickhouse-server/server.crt - /etc/clickhouse-server/server.key - - /etc/clickhouse-server/dhparam.pem - none - true - true - sslv2,sslv3 - true - - - - true - true - sslv2,sslv3 - true - - - - RejectCertificateHandler - - - - - - - - - 9009 - - - - - - - - - - - - - - - - - - - - 4096 - 3 - - - 100 - - - - - - 8589934592 - - - 5368709120 - - - - /var/lib/clickhouse/ - - - /var/lib/clickhouse/tmp/ - - - /var/lib/clickhouse/user_files/ - - - /var/lib/clickhouse/access/ - - - users.xml - - - default - - - - - - default - - - - - - - - - false - - - - - - - - localhost - 9000 - - - - - - - localhost - 9000 - - - - - localhost - 9000 - - - - - - - localhost - 9440 - 1 - - - - - - - localhost - 9000 - - - - - localhost - 1 - - - - - - - - - - - - - - - - - 3600 - - - - 3600 - - - 60 - - - - - - - - - - system - query_log
- - toYYYYMM(event_date) - - 7500 -
- - - - system - trace_log
- - toYYYYMM(event_date) - 7500 -
- - - - system - query_thread_log
- toYYYYMM(event_date) - 7500 -
- - - - system - part_log
- toYYYYMM(event_date) - 7500 -
- - - - - - - - - - - - - - *_dictionary.xml - - - - - - - - - - /clickhouse/task_queue/ddl - - - - - - - - - - - - - - - - click_cost - any - - 0 - 3600 - - - 86400 - 7200 - - - - max - - 0 - 60 - - - 3600 - 300 - - - 86400 - 3600 - - - - - - /var/lib/clickhouse/format_schemas/ - - - -
diff --git a/tests/testflows/aes_encryption/configs/clickhouse/users.xml b/tests/testflows/aes_encryption/configs/clickhouse/users.xml deleted file mode 100644 index c7d0ecae693..00000000000 --- a/tests/testflows/aes_encryption/configs/clickhouse/users.xml +++ /dev/null @@ -1,133 +0,0 @@ - - - - - - - - 10000000000 - - - 0 - - - random - - - - - 1 - - - - - - - - - - - - - ::/0 - - - - default - - - default - - - 1 - - - - - - - - - - - - - - - - - 3600 - - - 0 - 0 - 0 - 0 - 0 - - - - diff --git a/tests/testflows/datetime64_extended_range/configs/clickhouse/common.xml b/tests/testflows/datetime64_extended_range/configs/clickhouse/common.xml deleted file mode 100644 index 31fa972199f..00000000000 --- a/tests/testflows/datetime64_extended_range/configs/clickhouse/common.xml +++ /dev/null @@ -1,6 +0,0 @@ - - Europe/Moscow - 0.0.0.0 - /var/lib/clickhouse/ - /var/lib/clickhouse/tmp/ - diff --git a/tests/testflows/datetime64_extended_range/configs/clickhouse/config.xml b/tests/testflows/datetime64_extended_range/configs/clickhouse/config.xml deleted file mode 100644 index a9a37875273..00000000000 --- a/tests/testflows/datetime64_extended_range/configs/clickhouse/config.xml +++ /dev/null @@ -1,436 +0,0 @@ - - - - - - trace - /var/log/clickhouse-server/clickhouse-server.log - /var/log/clickhouse-server/clickhouse-server.err.log - 1000M - 10 - - - - 8123 - 9000 - - - - - - - - - /etc/clickhouse-server/server.crt - /etc/clickhouse-server/server.key - - /etc/clickhouse-server/dhparam.pem - none - true - true - sslv2,sslv3 - true - - - - true - true - sslv2,sslv3 - true - - - - RejectCertificateHandler - - - - - - - - - 9009 - - - - - - - - - - - - - - - - - - - - 4096 - 3 - - - 100 - - - - - - 8589934592 - - - 5368709120 - - - - /var/lib/clickhouse/ - - - /var/lib/clickhouse/tmp/ - - - /var/lib/clickhouse/user_files/ - - - /var/lib/clickhouse/access/ - - - users.xml - - - default - - - - - - default - - - - - - - - - false - - - - - - - - localhost - 9000 - - - - - - - localhost - 9000 - - - - - localhost - 9000 - - - - - - - localhost - 9440 - 1 - - - - - - - localhost - 9000 - - - - - localhost - 1 - - - - - - - - - - - - - - - - - 3600 - - - - 3600 - - - 60 - - - - - - - - - - system - query_log
- - toYYYYMM(event_date) - - 7500 -
- - - - system - trace_log
- - toYYYYMM(event_date) - 7500 -
- - - - system - query_thread_log
- toYYYYMM(event_date) - 7500 -
- - - - system - part_log
- toYYYYMM(event_date) - 7500 -
- - - - - - - - - - - - - - *_dictionary.xml - - - - - - - - - - /clickhouse/task_queue/ddl - - - - - - - - - - - - - - - - click_cost - any - - 0 - 3600 - - - 86400 - 60 - - - - max - - 0 - 60 - - - 3600 - 300 - - - 86400 - 3600 - - - - - - /var/lib/clickhouse/format_schemas/ - - - -
diff --git a/tests/testflows/datetime64_extended_range/configs/clickhouse/users.xml b/tests/testflows/datetime64_extended_range/configs/clickhouse/users.xml deleted file mode 100644 index c7d0ecae693..00000000000 --- a/tests/testflows/datetime64_extended_range/configs/clickhouse/users.xml +++ /dev/null @@ -1,133 +0,0 @@ - - - - - - - - 10000000000 - - - 0 - - - random - - - - - 1 - - - - - - - - - - - - - ::/0 - - - - default - - - default - - - 1 - - - - - - - - - - - - - - - - - 3600 - - - 0 - 0 - 0 - 0 - 0 - - - - diff --git a/tests/testflows/example/configs/clickhouse/common.xml b/tests/testflows/example/configs/clickhouse/common.xml deleted file mode 100644 index 31fa972199f..00000000000 --- a/tests/testflows/example/configs/clickhouse/common.xml +++ /dev/null @@ -1,6 +0,0 @@ - - Europe/Moscow - 0.0.0.0 - /var/lib/clickhouse/ - /var/lib/clickhouse/tmp/ - diff --git a/tests/testflows/example/configs/clickhouse/config.xml b/tests/testflows/example/configs/clickhouse/config.xml deleted file mode 100644 index 9854f9f990e..00000000000 --- a/tests/testflows/example/configs/clickhouse/config.xml +++ /dev/null @@ -1,436 +0,0 @@ - - - - - - trace - /var/log/clickhouse-server/clickhouse-server.log - /var/log/clickhouse-server/clickhouse-server.err.log - 1000M - 10 - - - - 8123 - 9000 - - - - - - - - - /etc/clickhouse-server/server.crt - /etc/clickhouse-server/server.key - - /etc/clickhouse-server/dhparam.pem - none - true - true - sslv2,sslv3 - true - - - - true - true - sslv2,sslv3 - true - - - - RejectCertificateHandler - - - - - - - - - 9009 - - - - - - - - - - - - - - - - - - - - 4096 - 3 - - - 100 - - - - - - 8589934592 - - - 5368709120 - - - - /var/lib/clickhouse/ - - - /var/lib/clickhouse/tmp/ - - - /var/lib/clickhouse/user_files/ - - - /var/lib/clickhouse/access/ - - - users.xml - - - default - - - - - - default - - - - - - - - - false - - - - - - - - localhost - 9000 - - - - - - - localhost - 9000 - - - - - localhost - 9000 - - - - - - - localhost - 9440 - 1 - - - - - - - localhost - 9000 - - - - - localhost - 1 - - - - - - - - - - - - - - - - - 3600 - - - - 3600 - - - 60 - - - - - - - - - - system - query_log
- - toYYYYMM(event_date) - - 7500 -
- - - - system - trace_log
- - toYYYYMM(event_date) - 7500 -
- - - - system - query_thread_log
- toYYYYMM(event_date) - 7500 -
- - - - system - part_log
- toYYYYMM(event_date) - 7500 -
- - - - - - - - - - - - - - *_dictionary.xml - - - - - - - - - - /clickhouse/task_queue/ddl - - - - - - - - - - - - - - - - click_cost - any - - 0 - 3600 - - - 86400 - 7200 - - - - max - - 0 - 60 - - - 3600 - 300 - - - 86400 - 3600 - - - - - - /var/lib/clickhouse/format_schemas/ - - - -
diff --git a/tests/testflows/example/configs/clickhouse/users.xml b/tests/testflows/example/configs/clickhouse/users.xml deleted file mode 100644 index c7d0ecae693..00000000000 --- a/tests/testflows/example/configs/clickhouse/users.xml +++ /dev/null @@ -1,133 +0,0 @@ - - - - - - - - 10000000000 - - - 0 - - - random - - - - - 1 - - - - - - - - - - - - - ::/0 - - - - default - - - default - - - 1 - - - - - - - - - - - - - - - - - 3600 - - - 0 - 0 - 0 - 0 - 0 - - - - diff --git a/tests/testflows/extended_precision_data_types/configs/clickhouse/common.xml b/tests/testflows/extended_precision_data_types/configs/clickhouse/common.xml deleted file mode 100644 index 0ba01589b90..00000000000 --- a/tests/testflows/extended_precision_data_types/configs/clickhouse/common.xml +++ /dev/null @@ -1,6 +0,0 @@ - - Europe/Moscow - :: - /var/lib/clickhouse/ - /var/lib/clickhouse/tmp/ - diff --git a/tests/testflows/extended_precision_data_types/configs/clickhouse/config.xml b/tests/testflows/extended_precision_data_types/configs/clickhouse/config.xml deleted file mode 100644 index 842a0573d49..00000000000 --- a/tests/testflows/extended_precision_data_types/configs/clickhouse/config.xml +++ /dev/null @@ -1,448 +0,0 @@ - - - - - - trace - /var/log/clickhouse-server/clickhouse-server.log - /var/log/clickhouse-server/clickhouse-server.err.log - 1000M - 10 - - - - 8123 - 9000 - - - - - - - - - /etc/clickhouse-server/server.crt - /etc/clickhouse-server/server.key - - /etc/clickhouse-server/dhparam.pem - none - true - true - sslv2,sslv3 - true - - - - true - true - sslv2,sslv3 - true - - - - RejectCertificateHandler - - - - - - - - - 9009 - - - - - - - - 0.0.0.0 - - - - - - - - - - - - 4096 - 3 - - - 100 - - - - - - 8589934592 - - - 5368709120 - - - - /var/lib/clickhouse/ - - - /var/lib/clickhouse/tmp/ - - - /var/lib/clickhouse/user_files/ - - - /var/lib/clickhouse/access/ - - - - - - users.xml - - - - /var/lib/clickhouse/access/ - - - - - users.xml - - - default - - - - - - default - - - - - - - - - false - - - - - - - - localhost - 9000 - - - - - - - localhost - 9000 - - - - - localhost - 9000 - - - - - - - localhost - 9440 - 1 - - - - - - - localhost - 9000 - - - - - localhost - 1 - - - - - - - - - - - - - - - - - 3600 - - - - 3600 - - - 60 - - - - - - - - - - system - query_log
- - toYYYYMM(event_date) - - 7500 -
- - - - system - trace_log
- - toYYYYMM(event_date) - 7500 -
- - - - system - query_thread_log
- toYYYYMM(event_date) - 7500 -
- - - - system - part_log
- toYYYYMM(event_date) - 7500 -
- - - - - - - - - - - - - - *_dictionary.xml - - - - - - - - - - /clickhouse/task_queue/ddl - - - - - - - - - - - - - - - - click_cost - any - - 0 - 3600 - - - 86400 - 60 - - - - max - - 0 - 60 - - - 3600 - 300 - - - 86400 - 3600 - - - - - - /var/lib/clickhouse/format_schemas/ - - - -
diff --git a/tests/testflows/extended_precision_data_types/configs/clickhouse/users.xml b/tests/testflows/extended_precision_data_types/configs/clickhouse/users.xml deleted file mode 100644 index c7d0ecae693..00000000000 --- a/tests/testflows/extended_precision_data_types/configs/clickhouse/users.xml +++ /dev/null @@ -1,133 +0,0 @@ - - - - - - - - 10000000000 - - - 0 - - - random - - - - - 1 - - - - - - - - - - - - - ::/0 - - - - default - - - default - - - 1 - - - - - - - - - - - - - - - - - 3600 - - - 0 - 0 - 0 - 0 - 0 - - - - diff --git a/tests/testflows/kerberos/configs/clickhouse/common.xml b/tests/testflows/kerberos/configs/clickhouse/common.xml deleted file mode 100644 index 31fa972199f..00000000000 --- a/tests/testflows/kerberos/configs/clickhouse/common.xml +++ /dev/null @@ -1,6 +0,0 @@ - - Europe/Moscow - 0.0.0.0 - /var/lib/clickhouse/ - /var/lib/clickhouse/tmp/ - diff --git a/tests/testflows/kerberos/configs/clickhouse/config.xml b/tests/testflows/kerberos/configs/clickhouse/config.xml deleted file mode 100644 index 0d2904eed48..00000000000 --- a/tests/testflows/kerberos/configs/clickhouse/config.xml +++ /dev/null @@ -1,440 +0,0 @@ - - - - - - trace - /var/log/clickhouse-server/clickhouse-server.log - /var/log/clickhouse-server/clickhouse-server.err.log - 1000M - 10 - - - - 8123 - 9000 - - - - - - - - - /etc/clickhouse-server/server.crt - /etc/clickhouse-server/server.key - - /etc/clickhouse-server/dhparam.pem - none - true - true - sslv2,sslv3 - true - - - - true - true - sslv2,sslv3 - true - - - - RejectCertificateHandler - - - - - - - - - 9009 - - - - - - - - 0.0.0.0 - - - - - - - - - - - - 4096 - 3 - - - 100 - - - - - - 8589934592 - - - 5368709120 - - - - /var/lib/clickhouse/ - - - /var/lib/clickhouse/tmp/ - - - /var/lib/clickhouse/user_files/ - - - - - - users.xml - - - - /var/lib/clickhouse/access/ - - - - - default - - - - - - default - - - - - - - - - false - - - - - - - - localhost - 9000 - - - - - - - localhost - 9000 - - - - - localhost - 9000 - - - - - - - localhost - 9440 - 1 - - - - - - - localhost - 9000 - - - - - localhost - 1 - - - - - - - - - - - - - - - - - 3600 - - - - 3600 - - - 60 - - - - - - - - - - system - query_log
- - toYYYYMM(event_date) - - 7500 -
- - - - system - trace_log
- - toYYYYMM(event_date) - 7500 -
- - - - system - query_thread_log
- toYYYYMM(event_date) - 7500 -
- - - - system - part_log
- toYYYYMM(event_date) - 7500 -
- - - - - - - - - - - - - - *_dictionary.xml - - - - - - - - - - /clickhouse/task_queue/ddl - - - - - - - - - - - - - - - - click_cost - any - - 0 - 3600 - - - 86400 - 60 - - - - max - - 0 - 60 - - - 3600 - 300 - - - 86400 - 3600 - - - - - - /var/lib/clickhouse/format_schemas/ - - - -
diff --git a/tests/testflows/kerberos/configs/clickhouse/users.xml b/tests/testflows/kerberos/configs/clickhouse/users.xml deleted file mode 100644 index c7d0ecae693..00000000000 --- a/tests/testflows/kerberos/configs/clickhouse/users.xml +++ /dev/null @@ -1,133 +0,0 @@ - - - - - - - - 10000000000 - - - 0 - - - random - - - - - 1 - - - - - - - - - - - - - ::/0 - - - - default - - - default - - - 1 - - - - - - - - - - - - - - - - - 3600 - - - 0 - 0 - 0 - 0 - 0 - - - - diff --git a/tests/testflows/ldap/authentication/configs/clickhouse/common.xml b/tests/testflows/ldap/authentication/configs/clickhouse/common.xml deleted file mode 100644 index 31fa972199f..00000000000 --- a/tests/testflows/ldap/authentication/configs/clickhouse/common.xml +++ /dev/null @@ -1,6 +0,0 @@ - - Europe/Moscow - 0.0.0.0 - /var/lib/clickhouse/ - /var/lib/clickhouse/tmp/ - diff --git a/tests/testflows/ldap/authentication/configs/clickhouse/config.xml b/tests/testflows/ldap/authentication/configs/clickhouse/config.xml deleted file mode 100644 index 53ffa10384e..00000000000 --- a/tests/testflows/ldap/authentication/configs/clickhouse/config.xml +++ /dev/null @@ -1,442 +0,0 @@ - - - - - - trace - /var/log/clickhouse-server/clickhouse-server.log - /var/log/clickhouse-server/clickhouse-server.err.log - 1000M - 10 - - - - 8123 - 9000 - - - - - - - - - /etc/clickhouse-server/server.crt - /etc/clickhouse-server/server.key - - /etc/clickhouse-server/dhparam.pem - none - true - true - sslv2,sslv3 - true - - - - true - true - sslv2,sslv3 - true - - - - RejectCertificateHandler - - - - - - - - - 9009 - - - - - - - - - - - - - - - - - - - - 4096 - 3 - - - 100 - - - - - - 8589934592 - - - 5368709120 - - - - /var/lib/clickhouse/ - - - /var/lib/clickhouse/tmp/ - - - /var/lib/clickhouse/user_files/ - - - - - - users.xml - - - - /var/lib/clickhouse/access/ - - - - - default - - - - - - default - - - - - - - - - false - - - - - - - - localhost - 9000 - - - - - - - localhost - 9000 - - - - - localhost - 9000 - - - - - - - localhost - 9440 - 1 - - - - - - - localhost - 9000 - - - - - localhost - 1 - - - - - - - - - - - - - - - - - 3600 - - - - 3600 - - - 60 - - - - - - - - - - system - query_log
- - toYYYYMM(event_date) - - 7500 -
- - - - system - trace_log
- - toYYYYMM(event_date) - 7500 -
- - - - system - query_thread_log
- toYYYYMM(event_date) - 7500 -
- - - - system - part_log
- toYYYYMM(event_date) - 7500 -
- - - - - - - - - - - - - - *_dictionary.xml - - - - - - - - - - /clickhouse/task_queue/ddl - - - - - - - - - - - - - - - - click_cost - any - - 0 - 3600 - - - 86400 - 7200 - - - - max - - 0 - 60 - - - 3600 - 300 - - - 86400 - 3600 - - - - - - /var/lib/clickhouse/format_schemas/ - - - -
diff --git a/tests/testflows/ldap/authentication/configs/clickhouse/users.xml b/tests/testflows/ldap/authentication/configs/clickhouse/users.xml deleted file mode 100644 index c7d0ecae693..00000000000 --- a/tests/testflows/ldap/authentication/configs/clickhouse/users.xml +++ /dev/null @@ -1,133 +0,0 @@ - - - - - - - - 10000000000 - - - 0 - - - random - - - - - 1 - - - - - - - - - - - - - ::/0 - - - - default - - - default - - - 1 - - - - - - - - - - - - - - - - - 3600 - - - 0 - 0 - 0 - 0 - 0 - - - - diff --git a/tests/testflows/ldap/external_user_directory/configs/clickhouse/common.xml b/tests/testflows/ldap/external_user_directory/configs/clickhouse/common.xml deleted file mode 100644 index 31fa972199f..00000000000 --- a/tests/testflows/ldap/external_user_directory/configs/clickhouse/common.xml +++ /dev/null @@ -1,6 +0,0 @@ - - Europe/Moscow - 0.0.0.0 - /var/lib/clickhouse/ - /var/lib/clickhouse/tmp/ - diff --git a/tests/testflows/ldap/external_user_directory/configs/clickhouse/config.xml b/tests/testflows/ldap/external_user_directory/configs/clickhouse/config.xml deleted file mode 100644 index 53ffa10384e..00000000000 --- a/tests/testflows/ldap/external_user_directory/configs/clickhouse/config.xml +++ /dev/null @@ -1,442 +0,0 @@ - - - - - - trace - /var/log/clickhouse-server/clickhouse-server.log - /var/log/clickhouse-server/clickhouse-server.err.log - 1000M - 10 - - - - 8123 - 9000 - - - - - - - - - /etc/clickhouse-server/server.crt - /etc/clickhouse-server/server.key - - /etc/clickhouse-server/dhparam.pem - none - true - true - sslv2,sslv3 - true - - - - true - true - sslv2,sslv3 - true - - - - RejectCertificateHandler - - - - - - - - - 9009 - - - - - - - - - - - - - - - - - - - - 4096 - 3 - - - 100 - - - - - - 8589934592 - - - 5368709120 - - - - /var/lib/clickhouse/ - - - /var/lib/clickhouse/tmp/ - - - /var/lib/clickhouse/user_files/ - - - - - - users.xml - - - - /var/lib/clickhouse/access/ - - - - - default - - - - - - default - - - - - - - - - false - - - - - - - - localhost - 9000 - - - - - - - localhost - 9000 - - - - - localhost - 9000 - - - - - - - localhost - 9440 - 1 - - - - - - - localhost - 9000 - - - - - localhost - 1 - - - - - - - - - - - - - - - - - 3600 - - - - 3600 - - - 60 - - - - - - - - - - system - query_log
- - toYYYYMM(event_date) - - 7500 -
- - - - system - trace_log
- - toYYYYMM(event_date) - 7500 -
- - - - system - query_thread_log
- toYYYYMM(event_date) - 7500 -
- - - - system - part_log
- toYYYYMM(event_date) - 7500 -
- - - - - - - - - - - - - - *_dictionary.xml - - - - - - - - - - /clickhouse/task_queue/ddl - - - - - - - - - - - - - - - - click_cost - any - - 0 - 3600 - - - 86400 - 7200 - - - - max - - 0 - 60 - - - 3600 - 300 - - - 86400 - 3600 - - - - - - /var/lib/clickhouse/format_schemas/ - - - -
diff --git a/tests/testflows/ldap/external_user_directory/configs/clickhouse/users.xml b/tests/testflows/ldap/external_user_directory/configs/clickhouse/users.xml deleted file mode 100644 index c7d0ecae693..00000000000 --- a/tests/testflows/ldap/external_user_directory/configs/clickhouse/users.xml +++ /dev/null @@ -1,133 +0,0 @@ - - - - - - - - 10000000000 - - - 0 - - - random - - - - - 1 - - - - - - - - - - - - - ::/0 - - - - default - - - default - - - 1 - - - - - - - - - - - - - - - - - 3600 - - - 0 - 0 - 0 - 0 - 0 - - - - diff --git a/tests/testflows/ldap/role_mapping/configs/clickhouse/common.xml b/tests/testflows/ldap/role_mapping/configs/clickhouse/common.xml deleted file mode 100644 index 31fa972199f..00000000000 --- a/tests/testflows/ldap/role_mapping/configs/clickhouse/common.xml +++ /dev/null @@ -1,6 +0,0 @@ - - Europe/Moscow - 0.0.0.0 - /var/lib/clickhouse/ - /var/lib/clickhouse/tmp/ - diff --git a/tests/testflows/ldap/role_mapping/configs/clickhouse/config.xml b/tests/testflows/ldap/role_mapping/configs/clickhouse/config.xml deleted file mode 100644 index 53ffa10384e..00000000000 --- a/tests/testflows/ldap/role_mapping/configs/clickhouse/config.xml +++ /dev/null @@ -1,442 +0,0 @@ - - - - - - trace - /var/log/clickhouse-server/clickhouse-server.log - /var/log/clickhouse-server/clickhouse-server.err.log - 1000M - 10 - - - - 8123 - 9000 - - - - - - - - - /etc/clickhouse-server/server.crt - /etc/clickhouse-server/server.key - - /etc/clickhouse-server/dhparam.pem - none - true - true - sslv2,sslv3 - true - - - - true - true - sslv2,sslv3 - true - - - - RejectCertificateHandler - - - - - - - - - 9009 - - - - - - - - - - - - - - - - - - - - 4096 - 3 - - - 100 - - - - - - 8589934592 - - - 5368709120 - - - - /var/lib/clickhouse/ - - - /var/lib/clickhouse/tmp/ - - - /var/lib/clickhouse/user_files/ - - - - - - users.xml - - - - /var/lib/clickhouse/access/ - - - - - default - - - - - - default - - - - - - - - - false - - - - - - - - localhost - 9000 - - - - - - - localhost - 9000 - - - - - localhost - 9000 - - - - - - - localhost - 9440 - 1 - - - - - - - localhost - 9000 - - - - - localhost - 1 - - - - - - - - - - - - - - - - - 3600 - - - - 3600 - - - 60 - - - - - - - - - - system - query_log
- - toYYYYMM(event_date) - - 7500 -
- - - - system - trace_log
- - toYYYYMM(event_date) - 7500 -
- - - - system - query_thread_log
- toYYYYMM(event_date) - 7500 -
- - - - system - part_log
- toYYYYMM(event_date) - 7500 -
- - - - - - - - - - - - - - *_dictionary.xml - - - - - - - - - - /clickhouse/task_queue/ddl - - - - - - - - - - - - - - - - click_cost - any - - 0 - 3600 - - - 86400 - 7200 - - - - max - - 0 - 60 - - - 3600 - 300 - - - 86400 - 3600 - - - - - - /var/lib/clickhouse/format_schemas/ - - - -
diff --git a/tests/testflows/ldap/role_mapping/configs/clickhouse/users.xml b/tests/testflows/ldap/role_mapping/configs/clickhouse/users.xml deleted file mode 100644 index c7d0ecae693..00000000000 --- a/tests/testflows/ldap/role_mapping/configs/clickhouse/users.xml +++ /dev/null @@ -1,133 +0,0 @@ - - - - - - - - 10000000000 - - - 0 - - - random - - - - - 1 - - - - - - - - - - - - - ::/0 - - - - default - - - default - - - 1 - - - - - - - - - - - - - - - - - 3600 - - - 0 - 0 - 0 - 0 - 0 - - - - diff --git a/tests/testflows/map_type/configs/clickhouse/config.xml b/tests/testflows/map_type/configs/clickhouse/config.xml deleted file mode 100644 index 842a0573d49..00000000000 --- a/tests/testflows/map_type/configs/clickhouse/config.xml +++ /dev/null @@ -1,448 +0,0 @@ - - - - - - trace - /var/log/clickhouse-server/clickhouse-server.log - /var/log/clickhouse-server/clickhouse-server.err.log - 1000M - 10 - - - - 8123 - 9000 - - - - - - - - - /etc/clickhouse-server/server.crt - /etc/clickhouse-server/server.key - - /etc/clickhouse-server/dhparam.pem - none - true - true - sslv2,sslv3 - true - - - - true - true - sslv2,sslv3 - true - - - - RejectCertificateHandler - - - - - - - - - 9009 - - - - - - - - 0.0.0.0 - - - - - - - - - - - - 4096 - 3 - - - 100 - - - - - - 8589934592 - - - 5368709120 - - - - /var/lib/clickhouse/ - - - /var/lib/clickhouse/tmp/ - - - /var/lib/clickhouse/user_files/ - - - /var/lib/clickhouse/access/ - - - - - - users.xml - - - - /var/lib/clickhouse/access/ - - - - - users.xml - - - default - - - - - - default - - - - - - - - - false - - - - - - - - localhost - 9000 - - - - - - - localhost - 9000 - - - - - localhost - 9000 - - - - - - - localhost - 9440 - 1 - - - - - - - localhost - 9000 - - - - - localhost - 1 - - - - - - - - - - - - - - - - - 3600 - - - - 3600 - - - 60 - - - - - - - - - - system - query_log
- - toYYYYMM(event_date) - - 7500 -
- - - - system - trace_log
- - toYYYYMM(event_date) - 7500 -
- - - - system - query_thread_log
- toYYYYMM(event_date) - 7500 -
- - - - system - part_log
- toYYYYMM(event_date) - 7500 -
- - - - - - - - - - - - - - *_dictionary.xml - - - - - - - - - - /clickhouse/task_queue/ddl - - - - - - - - - - - - - - - - click_cost - any - - 0 - 3600 - - - 86400 - 60 - - - - max - - 0 - 60 - - - 3600 - 300 - - - 86400 - 3600 - - - - - - /var/lib/clickhouse/format_schemas/ - - - -
diff --git a/tests/testflows/map_type/configs/clickhouse/users.xml b/tests/testflows/map_type/configs/clickhouse/users.xml deleted file mode 100644 index c7d0ecae693..00000000000 --- a/tests/testflows/map_type/configs/clickhouse/users.xml +++ /dev/null @@ -1,133 +0,0 @@ - - - - - - - - 10000000000 - - - 0 - - - random - - - - - 1 - - - - - - - - - - - - - ::/0 - - - - default - - - default - - - 1 - - - - - - - - - - - - - - - - - 3600 - - - 0 - 0 - 0 - 0 - 0 - - - - diff --git a/tests/testflows/rbac/configs/clickhouse/common.xml b/tests/testflows/rbac/configs/clickhouse/common.xml deleted file mode 100644 index 0ba01589b90..00000000000 --- a/tests/testflows/rbac/configs/clickhouse/common.xml +++ /dev/null @@ -1,6 +0,0 @@ - - Europe/Moscow - :: - /var/lib/clickhouse/ - /var/lib/clickhouse/tmp/ - diff --git a/tests/testflows/rbac/configs/clickhouse/config.xml b/tests/testflows/rbac/configs/clickhouse/config.xml deleted file mode 100644 index f71f14f4733..00000000000 --- a/tests/testflows/rbac/configs/clickhouse/config.xml +++ /dev/null @@ -1,456 +0,0 @@ - - - - - - trace - /var/log/clickhouse-server/clickhouse-server.log - /var/log/clickhouse-server/clickhouse-server.err.log - 1000M - 10 - - - - 8123 - 9000 - - - - - - - - - /etc/clickhouse-server/server.crt - /etc/clickhouse-server/server.key - - /etc/clickhouse-server/dhparam.pem - none - true - true - sslv2,sslv3 - true - - - - true - true - sslv2,sslv3 - true - - - - RejectCertificateHandler - - - - - - - - - 9009 - - - - - - - - 0.0.0.0 - - - - - - - - - - - - 4096 - 3 - - - 100 - - - - - - 8589934592 - - - 5368709120 - - - - /var/lib/clickhouse/ - - - /var/lib/clickhouse/tmp/ - - - /var/lib/clickhouse/user_files/ - - - /var/lib/clickhouse/access/ - - - - - - users.xml - - - - /var/lib/clickhouse/access/ - - - - - users.xml - - - default - - - - - - default - - - - - - - - - false - - - - - - - - localhost - 9000 - - - - - - - localhost - 9000 - - - - - localhost - 9000 - - - - - - - localhost - 9440 - 1 - - - - - - - localhost - 9000 - - - - - localhost - 1 - - - - - - - - - - - - - - - - - 3600 - - - - 3600 - - - 60 - - - - - - - - - - system - query_log
- - toYYYYMM(event_date) - - 7500 -
- - - - system - trace_log
- - toYYYYMM(event_date) - 7500 -
- - - - system - query_thread_log
- toYYYYMM(event_date) - 7500 -
- - - - system - query_views_log
- toYYYYMM(event_date) - 7500 -
- - - - system - part_log
- toYYYYMM(event_date) - 7500 -
- - - - - - - - - - - - - - *_dictionary.xml - - - - - - - - - - /clickhouse/task_queue/ddl - - - - - - - - - - - - - - - - click_cost - any - - 0 - 3600 - - - 86400 - 7200 - - - - max - - 0 - 60 - - - 3600 - 300 - - - 86400 - 3600 - - - - - - /var/lib/clickhouse/format_schemas/ - - - -
diff --git a/tests/testflows/rbac/configs/clickhouse/users.xml b/tests/testflows/rbac/configs/clickhouse/users.xml deleted file mode 100644 index c7d0ecae693..00000000000 --- a/tests/testflows/rbac/configs/clickhouse/users.xml +++ /dev/null @@ -1,133 +0,0 @@ - - - - - - - - 10000000000 - - - 0 - - - random - - - - - 1 - - - - - - - - - - - - - ::/0 - - - - default - - - default - - - 1 - - - - - - - - - - - - - - - - - 3600 - - - 0 - 0 - 0 - 0 - 0 - - - - diff --git a/tests/testflows/window_functions/configs/clickhouse/config.xml b/tests/testflows/window_functions/configs/clickhouse/config.xml deleted file mode 100644 index 842a0573d49..00000000000 --- a/tests/testflows/window_functions/configs/clickhouse/config.xml +++ /dev/null @@ -1,448 +0,0 @@ - - - - - - trace - /var/log/clickhouse-server/clickhouse-server.log - /var/log/clickhouse-server/clickhouse-server.err.log - 1000M - 10 - - - - 8123 - 9000 - - - - - - - - - /etc/clickhouse-server/server.crt - /etc/clickhouse-server/server.key - - /etc/clickhouse-server/dhparam.pem - none - true - true - sslv2,sslv3 - true - - - - true - true - sslv2,sslv3 - true - - - - RejectCertificateHandler - - - - - - - - - 9009 - - - - - - - - 0.0.0.0 - - - - - - - - - - - - 4096 - 3 - - - 100 - - - - - - 8589934592 - - - 5368709120 - - - - /var/lib/clickhouse/ - - - /var/lib/clickhouse/tmp/ - - - /var/lib/clickhouse/user_files/ - - - /var/lib/clickhouse/access/ - - - - - - users.xml - - - - /var/lib/clickhouse/access/ - - - - - users.xml - - - default - - - - - - default - - - - - - - - - false - - - - - - - - localhost - 9000 - - - - - - - localhost - 9000 - - - - - localhost - 9000 - - - - - - - localhost - 9440 - 1 - - - - - - - localhost - 9000 - - - - - localhost - 1 - - - - - - - - - - - - - - - - - 3600 - - - - 3600 - - - 60 - - - - - - - - - - system - query_log
- - toYYYYMM(event_date) - - 7500 -
- - - - system - trace_log
- - toYYYYMM(event_date) - 7500 -
- - - - system - query_thread_log
- toYYYYMM(event_date) - 7500 -
- - - - system - part_log
- toYYYYMM(event_date) - 7500 -
- - - - - - - - - - - - - - *_dictionary.xml - - - - - - - - - - /clickhouse/task_queue/ddl - - - - - - - - - - - - - - - - click_cost - any - - 0 - 3600 - - - 86400 - 60 - - - - max - - 0 - 60 - - - 3600 - 300 - - - 86400 - 3600 - - - - - - /var/lib/clickhouse/format_schemas/ - - - -
diff --git a/tests/testflows/window_functions/configs/clickhouse/users.xml b/tests/testflows/window_functions/configs/clickhouse/users.xml deleted file mode 100644 index c7d0ecae693..00000000000 --- a/tests/testflows/window_functions/configs/clickhouse/users.xml +++ /dev/null @@ -1,133 +0,0 @@ - - - - - - - - 10000000000 - - - 0 - - - random - - - - - 1 - - - - - - - - - - - - - ::/0 - - - - default - - - default - - - 1 - - - - - - - - - - - - - - - - - 3600 - - - 0 - 0 - 0 - 0 - 0 - - - - diff --git a/utils/check-marks/main.cpp b/utils/check-marks/main.cpp index 36b81509046..df6f6e5267e 100644 --- a/utils/check-marks/main.cpp +++ b/utils/check-marks/main.cpp @@ -10,7 +10,7 @@ #include #include #include -#include +#include #include diff --git a/utils/check-mysql-binlog/main.cpp b/utils/check-mysql-binlog/main.cpp index 04dfb56ff08..7dd387ba5be 100644 --- a/utils/check-mysql-binlog/main.cpp +++ b/utils/check-mysql-binlog/main.cpp @@ -61,7 +61,9 @@ static DB::MySQLReplication::BinlogEventPtr parseSingleEventBody( } case DB::MySQLReplication::TABLE_MAP_EVENT: { - event = std::make_shared(std::move(header)); + DB::MySQLReplication::TableMapEventHeader map_event_header; + map_event_header.parse(*event_payload); + event = std::make_shared(std::move(header), map_event_header); event->parseEvent(*event_payload); last_table_map_event = std::static_pointer_cast(event); break; diff --git a/utils/check-style/check-style b/utils/check-style/check-style index d178778a410..6ebf53cb932 100755 --- a/utils/check-style/check-style +++ b/utils/check-style/check-style @@ -74,6 +74,8 @@ EXTERN_TYPES_EXCLUDES=( ProfileEvents::Type ProfileEvents::TypeEnum ProfileEvents::dumpToMapColumn + ProfileEvents::getProfileEvents + ProfileEvents::ThreadIdToCountersSnapshot ProfileEvents::LOCAL_NAME ProfileEvents::CountersIncrement diff --git a/utils/db-generator/query_db_generator.cpp b/utils/db-generator/query_db_generator.cpp index 7d71e13a6e9..dec1f6fe60f 100644 --- a/utils/db-generator/query_db_generator.cpp +++ b/utils/db-generator/query_db_generator.cpp @@ -857,7 +857,7 @@ FuncRet likeFunc(DB::ASTPtr ch, std::map & columns) { std::string value = applyVisitor(DB::FieldVisitorToString(), literal->value); std::string example{}; - for (size_t i = 0; i != value.size(); ++i) + for (size_t i = 0; i != value.size(); ++i) /// NOLINT { if (value[i] == '%') example += randomString(rng() % 10); diff --git a/utils/github-hook/hook.py b/utils/github-hook/hook.py deleted file mode 100644 index 1ea65f3c3ab..00000000000 --- a/utils/github-hook/hook.py +++ /dev/null @@ -1,320 +0,0 @@ -# -*- coding: utf-8 -*- -import json -import requests -import time -import os - -DB = 'gh-data' -RETRIES = 5 - -API_URL = 'https://api.github.com/repos/ClickHouse/ClickHouse/' - - -def _reverse_dict_with_list(source): - result = {} - for key, value in list(source.items()): - for elem in value: - result[elem] = key - return result - - -MARKER_TO_LABEL = { - '- New Feature': ['pr-feature'], - '- Bug Fix': ['pr-bugfix'], - '- Improvement': ['pr-improvement'], - '- Performance Improvement': ['pr-performance'], - '- Backward Incompatible Change': ['pr-backward-incompatible'], - '- Build/Testing/Packaging Improvement': ['pr-build'], - '- Documentation': ['pr-documentation', 'pr-doc-fix'], - '- Other': ['pr-other'], - '- Not for changelog': ['pr-not-for-changelog'] -} - -LABEL_TO_MARKER = _reverse_dict_with_list(MARKER_TO_LABEL) - -DOC_ALERT_LABELS = { - 'pr-feature' -} - - -def set_labels_for_pr(pull_request_number, labels, headers): - data = { - "labels": list(labels) - } - - for i in range(RETRIES): - try: - response = requests.put(API_URL + 'issues/' + str(pull_request_number) + '/labels', json=data, headers=headers) - response.raise_for_status() - break - except Exception as ex: - print(("Exception", ex)) - time.sleep(0.2) - - -def get_required_labels_from_desc(description, current_labels): - result = set([]) - # find first matching category - for marker, labels in list(MARKER_TO_LABEL.items()): - if marker in description: - if not any(label in current_labels for label in labels): - result.add(labels[0]) - break - - # if no category than leave as is - if not result: - return current_labels - - # save all old labels except category label - for label in current_labels: - if label not in result and label not in LABEL_TO_MARKER: - result.add(label) - - # if some of labels require doc alert - if any(label in result for label in DOC_ALERT_LABELS): - result.add('doc-alert') - - return result - - -def label_pull_request_event(response): - pull_request = response['pull_request'] - current_labels = set([label['name'] for label in pull_request['labels']]) - pr_description = pull_request['body'] if pull_request['body'] else '' - required_labels = get_required_labels_from_desc(pr_description, current_labels) - if not required_labels.issubset(current_labels): - token = os.getenv('GITHUB_TOKEN') - auth = {'Authorization': 'token ' + token} - set_labels_for_pr(pull_request['number'], required_labels, auth) - - -def process_issue_event(response): - issue = response['issue'] - return dict( - action=response['action'], - sender=response['sender']['login'], - updated_at=issue['updated_at'], - url=issue['url'], - number=issue['number'], - author=issue['user']['login'], - labels=[label['name'] for label in issue['labels']], - state=issue['state'], - assignees=[assignee['login'] for assignee in issue['assignees']], - created_at=issue['created_at'], - body=issue['body'] if issue['body'] else '', - title=issue['title'], - comments=issue['comments'], - raw_json=json.dumps(response),) - - -def process_issue_comment_event(response): - issue = response['issue'] - comment = response['comment'] - - return dict( - action='comment_' + response['action'], - sender=response['sender']['login'], - updated_at=issue['updated_at'], - url=issue['url'], - number=issue['number'], - author=issue['user']['login'], - labels=[label['name'] for label in issue['labels']], - state=issue['state'], - assignees=[assignee['login'] for assignee in issue['assignees']], - created_at=issue['created_at'], - body=issue['body'] if issue['body'] else '', - title=issue['title'], - comments=issue['comments'], - comment_body=comment['body'], - comment_author=comment['user']['login'], - comment_url=comment['url'], - comment_created_at=comment['created_at'], - comment_updated_at=comment['updated_at'], - raw_json=json.dumps(response),) - - -def process_pull_request_event(response): - pull_request = response['pull_request'] - result = dict( - updated_at=pull_request['updated_at'], - number=pull_request['number'], - action=response['action'], - sender=response['sender']['login'], - url=pull_request['url'], - author=pull_request['user']['login'], - labels=[label['name'] for label in pull_request['labels']], - state=pull_request['state'], - body=pull_request['body'] if pull_request['body'] else '', - title=pull_request['title'], - created_at=pull_request['created_at'], - assignees=[assignee['login'] for assignee in pull_request['assignees']], - requested_reviewers=[reviewer['login'] for reviewer in pull_request['requested_reviewers']], - head_repo=pull_request['head']['repo']['full_name'], - head_ref=pull_request['head']['ref'], - head_clone_url=pull_request['head']['repo']['clone_url'], - head_ssh_url=pull_request['head']['repo']['ssh_url'], - base_repo=pull_request['base']['repo']['full_name'], - base_ref=pull_request['base']['ref'], - base_clone_url=pull_request['base']['repo']['clone_url'], - base_ssh_url=pull_request['base']['repo']['ssh_url'], - raw_json=json.dumps(response), - ) - - if 'mergeable' in pull_request and pull_request['mergeable'] is not None: - result['mergeable'] = 1 if pull_request['mergeable'] else 0 - - if 'merged_by' in pull_request and pull_request['merged_by'] is not None: - result['merged_by'] = pull_request['merged_by']['login'] - - if 'merged_at' in pull_request and pull_request['merged_at'] is not None: - result['merged_at'] = pull_request['merged_at'] - - if 'closed_at' in pull_request and pull_request['closed_at'] is not None: - result['closed_at'] = pull_request['closed_at'] - - if 'merge_commit_sha' in pull_request and pull_request['merge_commit_sha'] is not None: - result['merge_commit_sha'] = pull_request['merge_commit_sha'] - - if 'draft' in pull_request: - result['draft'] = 1 if pull_request['draft'] else 0 - - for field in ['comments', 'review_comments', 'commits', 'additions', 'deletions', 'changed_files']: - if field in pull_request: - result[field] = pull_request[field] - - return result - - -def process_pull_request_review(response): - result = process_pull_request_event(response) - review = response['review'] - result['action'] = 'review_' + result['action'] - result['review_body'] = review['body'] if review['body'] is not None else '' - result['review_id'] = review['id'] - result['review_author'] = review['user']['login'] - result['review_commit_sha'] = review['commit_id'] - result['review_submitted_at'] = review['submitted_at'] - result['review_state'] = review['state'] - return result - - -def process_pull_request_review_comment(response): - result = process_pull_request_event(response) - comment = response['comment'] - result['action'] = 'review_comment_' + result['action'] - result['review_id'] = comment['pull_request_review_id'] - result['review_comment_path'] = comment['path'] - result['review_commit_sha'] = comment['commit_id'] - result['review_comment_body'] = comment['body'] - result['review_comment_author'] = comment['user']['login'] - result['review_comment_created_at'] = comment['created_at'] - result['review_comment_updated_at'] = comment['updated_at'] - return result - - -def process_push(response): - common_part = dict( - before_sha=response['before'], - after_sha=response['after'], - full_ref=response['ref'], - ref=response['ref'].split('/')[-1], - repo=response['repository']['full_name'], - pusher=response['pusher']['name'], - sender=response['sender']['login'], - pushed_at=response['repository']['pushed_at'], - raw_json=json.dumps(response), - ) - commits = response['commits'] - result = [] - for commit in commits: - commit_dict = common_part.copy() - commit_dict['sha'] = commit['id'] - commit_dict['tree_sha'] = commit['tree_id'] - commit_dict['author'] = commit['author']['name'] - commit_dict['committer'] = commit['committer']['name'] - commit_dict['message'] = commit['message'] - commit_dict['commited_at'] = commit['timestamp'] - result.append(commit_dict) - return result - - -def event_processor_dispatcher(headers, body, inserter): - if 'X-Github-Event' in headers: - if headers['X-Github-Event'] == 'issues': - result = process_issue_event(body) - inserter.insert_event_into(DB, 'issues', result) - elif headers['X-Github-Event'] == 'issue_comment': - result = process_issue_comment_event(body) - inserter.insert_event_into(DB, 'issues', result) - elif headers['X-Github-Event'] == 'pull_request': - result = process_pull_request_event(body) - inserter.insert_event_into(DB, 'pull_requests', result) - label_pull_request_event(body) - elif headers['X-Github-Event'] == 'pull_request_review': - result = process_pull_request_review(body) - inserter.insert_event_into(DB, 'pull_requests', result) - elif headers['X-Github-Event'] == 'pull_request_review_comment': - result = process_pull_request_review_comment(body) - inserter.insert_event_into(DB, 'pull_requests', result) - elif headers['X-Github-Event'] == 'push': - result = process_push(body) - inserter.insert_events_into(DB, 'commits', result) - - -class ClickHouseInserter(object): - def __init__(self, url, user, password): - self.url = url - self.auth = { - 'X-ClickHouse-User': user, - 'X-ClickHouse-Key': password - } - - def _insert_json_str_info(self, db, table, json_str): - params = { - 'database': db, - 'query': 'INSERT INTO {table} FORMAT JSONEachRow'.format(table=table), - 'date_time_input_format': 'best_effort' - } - for i in range(RETRIES): - response = None - try: - response = requests.post(self.url, params=params, data=json_str, headers=self.auth, verify=False) - response.raise_for_status() - break - except Exception as ex: - print(("Cannot insert with exception %s", str(ex))) - if response: - print(("Response text %s", response.text)) - time.sleep(0.1) - else: - raise Exception("Cannot insert data into clickhouse") - - def insert_event_into(self, db, table, event): - event_str = json.dumps(event) - self._insert_json_str_info(db, table, event_str) - - def insert_events_into(self, db, table, events): - jsons = [] - for event in events: - jsons.append(json.dumps(event)) - - self._insert_json_str_info(db, table, ','.join(jsons)) - - -def test(event, context): - inserter = ClickHouseInserter( - os.getenv('CLICKHOUSE_URL'), - os.getenv('CLICKHOUSE_USER'), - os.getenv('CLICKHOUSE_PASSWORD')) - - body = json.loads(event['body'], strict=False) - headers = event['headers'] - event_processor_dispatcher(headers, body, inserter) - - return { - 'statusCode': 200, - 'headers': { - 'Content-Type': 'text/plain' - }, - 'isBase64Encoded': False, - } diff --git a/utils/graphite-rollup/CMakeLists.txt b/utils/graphite-rollup/CMakeLists.txt index 3cc0d3e756f..d5dca3db32e 100644 --- a/utils/graphite-rollup/CMakeLists.txt +++ b/utils/graphite-rollup/CMakeLists.txt @@ -17,7 +17,7 @@ target_include_directories( ${ClickHouse_SOURCE_DIR}/contrib/double-conversion ${ClickHouse_SOURCE_DIR}/contrib/dragonbox/include ${ClickHouse_SOURCE_DIR}/contrib/fmtlib/include ${ClickHouse_SOURCE_DIR}/contrib/cityhash102/include - ${RE2_INCLUDE_DIR} ${CMAKE_BINARY_DIR}/contrib/re2_st + ${RE2_INCLUDE_DIR} ${CMAKE_BINARY_DIR}/contrib/re2-cmake ) target_compile_definitions(graphite-rollup-bench PRIVATE RULES_DIR="${CMAKE_CURRENT_SOURCE_DIR}") diff --git a/utils/graphite-rollup/graphite-rollup-bench.cpp b/utils/graphite-rollup/graphite-rollup-bench.cpp index dabe0353b0f..4c11f90b3ff 100644 --- a/utils/graphite-rollup/graphite-rollup-bench.cpp +++ b/utils/graphite-rollup/graphite-rollup-bench.cpp @@ -35,7 +35,7 @@ std::vector loadMetrics(const std::string & metrics_file) throw std::runtime_error(strerror(errno)); } - while ((nread = getline(&line, &len, stream)) != -1) + while ((nread = getline(&line, &len, stream)) != -1) /// NOLINT { size_t l = strlen(line); if (l > 0) diff --git a/utils/memcpy-bench/memcpy-bench.cpp b/utils/memcpy-bench/memcpy-bench.cpp index 7f8e89b8355..8b75164eb60 100644 --- a/utils/memcpy-bench/memcpy-bench.cpp +++ b/utils/memcpy-bench/memcpy-bench.cpp @@ -673,7 +673,7 @@ static uint8_t * memcpy_my2(uint8_t * __restrict dst, const uint8_t * __restrict size -= padding; } - while (size >= 512) + while (size >= 512) /// NOLINT { __asm__( "vmovups (%[s]), %%ymm0\n" @@ -794,19 +794,19 @@ static uint8_t * memcpy_my2(uint8_t * __restrict dst, const uint8_t * __restrict return ret; } -extern "C" void * __memcpy_erms(void * __restrict destination, const void * __restrict source, size_t size); -extern "C" void * __memcpy_sse2_unaligned(void * __restrict destination, const void * __restrict source, size_t size); -extern "C" void * __memcpy_ssse3(void * __restrict destination, const void * __restrict source, size_t size); -extern "C" void * __memcpy_ssse3_back(void * __restrict destination, const void * __restrict source, size_t size); -extern "C" void * __memcpy_avx_unaligned(void * __restrict destination, const void * __restrict source, size_t size); -extern "C" void * __memcpy_avx_unaligned_erms(void * __restrict destination, const void * __restrict source, size_t size); -extern "C" void * __memcpy_avx512_unaligned(void * __restrict destination, const void * __restrict source, size_t size); -extern "C" void * __memcpy_avx512_unaligned_erms(void * __restrict destination, const void * __restrict source, size_t size); -extern "C" void * __memcpy_avx512_no_vzeroupper(void * __restrict destination, const void * __restrict source, size_t size); +extern "C" void * __memcpy_erms(void * __restrict destination, const void * __restrict source, size_t size); /// NOLINT +extern "C" void * __memcpy_sse2_unaligned(void * __restrict destination, const void * __restrict source, size_t size); /// NOLINT +extern "C" void * __memcpy_ssse3(void * __restrict destination, const void * __restrict source, size_t size); /// NOLINT +extern "C" void * __memcpy_ssse3_back(void * __restrict destination, const void * __restrict source, size_t size); /// NOLINT +extern "C" void * __memcpy_avx_unaligned(void * __restrict destination, const void * __restrict source, size_t size); /// NOLINT +extern "C" void * __memcpy_avx_unaligned_erms(void * __restrict destination, const void * __restrict source, size_t size); /// NOLINT +extern "C" void * __memcpy_avx512_unaligned(void * __restrict destination, const void * __restrict source, size_t size); /// NOLINT +extern "C" void * __memcpy_avx512_unaligned_erms(void * __restrict destination, const void * __restrict source, size_t size); /// NOLINT +extern "C" void * __memcpy_avx512_no_vzeroupper(void * __restrict destination, const void * __restrict source, size_t size); /// NOLINT #define VARIANT(N, NAME) \ - if (memcpy_variant == N) \ + if (memcpy_variant == (N)) \ return test(dst, src, size, iterations, num_threads, std::forward(generator), NAME, #NAME); template diff --git a/website/benchmark/dbms/index.html b/website/benchmark/dbms/index.html index b4e29098ead..a856bbb0502 100644 --- a/website/benchmark/dbms/index.html +++ b/website/benchmark/dbms/index.html @@ -15,7 +15,7 @@
- ClickHouse + ClickHouse

Performance comparison of analytical DBMS

diff --git a/website/benchmark/hardware/index.html b/website/benchmark/hardware/index.html index c6b1e2be275..42c87c334c0 100644 --- a/website/benchmark/hardware/index.html +++ b/website/benchmark/hardware/index.html @@ -15,7 +15,7 @@
- ClickHouse + ClickHouse

{{ title }}

@@ -85,6 +85,16 @@ Results for ThinkPad P15 are from Mikhail Shiryaev.
Results for RockPi4 are from Kirill Zholnay.
Results for Xeon 6266C are from David in Shanghai.
Results for SSDNodes and Cavium are from Lorenzo QXIP.
+Results for AMD EPYC 7662 64-Core Processor are from Evgeniy Kuts.
+Results for scaleway GP1-S 8x x86 64bit 32GB ram 300gb NVMe are from Dag Vilmar Tveit.
+Results for scaleway GP1-M 16x x86 64bit 64GB ram 600gb NVMe are from Dag Vilmar Tveit.
+Results for Intel(R) Core(TM) i5-4440 CPU @ 3.10GHz are from Peter, Chun-Sheng, Li.
+Results for MacBook Pro M1 are from Filatenkov Arthur.
+Results for AWS instance type im4gn.4xlarge are from Ananth Gundabattula (Darwinium).
+Results for AWS instance type im4gn.8xlarge are from Ananth Gundabattula (Darwinium).
+Results for AWS instance type im4gn.16xlarge are from Ananth Gundabattula (Darwinium).
+Results for AWS instance type i3.2xlarge are from Ananth Gundabattula (Darwinium).
+Results for 2x EPYC 7702 on ZFS mirror NVME are from Alibek A.

diff --git a/website/benchmark/hardware/results/amd_epyc_7662.json b/website/benchmark/hardware/results/amd_epyc_7662.json new file mode 100644 index 00000000000..436c0099992 --- /dev/null +++ b/website/benchmark/hardware/results/amd_epyc_7662.json @@ -0,0 +1,54 @@ +[ + { + "system": "AMD EPYC 7662", + "system_full": "AMD EPYC 7662 64-Core Processor", + "time": "2022-01-26 11:28:55", + "kind": "server", + "result": + [ + [0.001, 0.001, 0.001], + [0.037, 0.019, 0.020], + [0.082, 0.034, 0.026], + [0.298, 0.045, 0.038], + [0.424, 0.188, 0.178], + [0.594, 0.229, 0.227], + [0.037, 0.028, 0.032], + [0.060, 0.028, 0.027], + [0.496, 0.185, 0.192], + [0.611, 0.210, 0.214], + [0.400, 0.148, 0.137], + [0.424, 0.155, 0.144], + [0.639, 0.256, 0.239], + [0.944, 0.404, 0.309], + [0.699, 0.326, 0.288], + [0.461, 0.221, 0.216], + [1.176, 0.539, 0.561], + [1.070, 0.410, 0.426], + [2.080, 0.950, 0.866], + [0.351, 0.066, 0.130], + [3.248, 0.461, 0.313], + [3.612, 0.261, 0.231], + [6.720, 0.682, 0.671], + [6.300, 0.517, 0.488], + [0.982, 0.136, 0.125], + [0.531, 0.112, 0.109], + [1.006, 0.133, 0.118], + [3.184, 0.324, 0.310], + [2.799, 0.327, 0.308], + [0.569, 0.492, 0.493], + [0.900, 0.212, 0.221], + [1.925, 0.353, 0.326], + [2.489, 1.173, 1.248], + [3.626, 0.990, 0.897], + [3.743, 0.935, 0.915], + [0.419, 0.311, 0.339], + [0.278, 0.244, 0.236], + [0.111, 0.099, 0.098], + [0.139, 0.086, 0.084], + [0.664, 0.520, 0.552], + [0.072, 0.028, 0.036], + [0.050, 0.031, 0.022], + [0.005, 0.005, 0.011] + ] + } +] diff --git a/website/benchmark/hardware/results/amd_epyc_7702_zfs.json b/website/benchmark/hardware/results/amd_epyc_7702_zfs.json new file mode 100644 index 00000000000..9e7c15f579f --- /dev/null +++ b/website/benchmark/hardware/results/amd_epyc_7702_zfs.json @@ -0,0 +1,54 @@ +[ + { + "system": "2x EPYC 7702 on ZFS mirror NVME", + "system_full": "2x EPYC 7702 on ZFS mirror NVME, AMD EPYC 7702 64-Core Processor", + "time": "2022-01-14 21:07:13", + "kind": "server", + "result": + [ + [0.001, 0.002, 0.001], + [0.033, 0.021, 0.022], + [0.026, 0.022, 0.024], + [0.032, 0.024, 0.027], + [0.114, 0.115, 0.116], + [0.156, 0.150, 0.156], + [0.035, 0.023, 0.022], + [0.035, 0.023, 0.023], + [0.134, 0.148, 0.133], + [0.165, 0.150, 0.156], + [0.132, 0.087, 0.083], + [0.103, 0.124, 0.094], + [0.273, 0.221, 0.229], + [0.305, 0.263, 0.267], + [0.273, 0.267, 0.239], + [0.210, 0.228, 0.241], + [0.641, 0.518, 0.498], + [0.413, 0.423, 0.485], + [1.044, 0.991, 0.999], + [0.091, 0.144, 0.071], + [0.203, 0.190, 0.203], + [0.199, 0.210, 0.189], + [0.662, 0.753, 0.705], + [0.636, 0.461, 0.445], + [0.093, 0.079, 0.082], + [0.066, 0.070, 0.072], + [0.086, 0.080, 0.091], + [0.293, 0.280, 0.298], + [0.301, 0.258, 0.268], + [0.624, 0.611, 0.613], + [0.170, 0.168, 0.170], + [0.317, 0.269, 0.273], + [1.801, 1.071, 1.183], + [1.049, 1.080, 0.957], + [0.904, 0.892, 0.898], + [0.293, 0.288, 0.291], + [0.176, 0.173, 0.176], + [0.068, 0.068, 0.070], + [0.060, 0.060, 0.061], + [0.412, 0.388, 0.382], + [0.021, 0.019, 0.019], + [0.019, 0.022, 0.015], + [0.004, 0.010, 0.009] + ] + } +] diff --git a/website/benchmark/hardware/results/gp1_s_16x.json b/website/benchmark/hardware/results/gp1_s_16x.json new file mode 100644 index 00000000000..1353fc87d00 --- /dev/null +++ b/website/benchmark/hardware/results/gp1_s_16x.json @@ -0,0 +1,54 @@ +[ + { + "system": "scaleway GP1-S 8x x86", + "system_full": "scaleway GP1-M 16x x86 64bit 64GB ram 600gb NVMe", + "time": "2022-02-16 00:00:00", + "kind": "cloud", + "result": + [ + [0.005, 0.005, 0.036], + [0.039, 0.026, 0.026], + [0.092, 0.046, 0.046], + [0.172, 0.056, 0.055], + [0.166, 0.126, 0.123], + [0.364, 0.272, 0.265], + [0.005, 0.006, 0.005], + [0.028, 0.027, 0.029], + [0.581, 0.49, 0.486], + [0.69, 0.549, 0.553], + [0.248, 0.178, 0.175], + [0.266, 0.208, 0.208], + [1.584, 1.017, 0.868], + [1.717, 1.113, 1.145], + [1.144, 1.084, 1.048], + [0.991, 0.92, 0.895], + [4.121, 2.639, 2.621], + [1.447, 1.348, 1.354], + [6.802, 6.466, 6.433], + [0.142, 0.057, 0.052], + [1.252, 0.743, 0.715], + [1.389, 0.823, 0.791], + [3.143, 2.225, 2.159], + [1.795, 0.871, 0.837], + [0.361, 0.236, 0.229], + [0.264, 0.211, 0.214], + [0.37, 0.24, 0.225], + [1.449, 0.967, 0.876], + [1.605, 1.206, 1.16 ], + [3.412, 3.388, 3.397], + [0.783, 0.628, 0.65 ], + [1.419, 1.134, 1.112], + [6.983, 6.843, 6.852], + [5.466, 5.082, 4.955], + [5.632, 4.972, 5.22 ], + [1.639, 1.604, 1.571], + [0.285, 0.298, 0.269], + [0.115, 0.115, 0.101], + [0.098, 0.1, 0.092], + [0.563, 0.562, 0.512], + [0.058, 0.039, 0.042], + [0.039, 0.039, 0.025], + [0.029, 0.012, 0.012] + ] + } +] diff --git a/website/benchmark/hardware/results/gp1_s_8x.json b/website/benchmark/hardware/results/gp1_s_8x.json new file mode 100644 index 00000000000..2bc008af54c --- /dev/null +++ b/website/benchmark/hardware/results/gp1_s_8x.json @@ -0,0 +1,54 @@ +[ + { + "system": "scaleway GP1-S 8x x86", + "system_full": "scaleway GP1-S 8x x86 64bit 32GB ram 300gb NVMe", + "time": "2022-02-16 00:00:00", + "kind": "cloud", + "result": + [ + [0.026, 0.004, 0.004], + [0.038, 0.026, 0.026], + [0.071, 0.058, 0.059], + [0.118, 0.072, 0.069], + [0.190, 0.151, 0.155], + [0.465, 0.438, 0.401], + [0.002, 0.004, 0.004], + [0.028, 0.029, 0.026], + [0.751, 0.672, 0.676], + [0.897, 0.845, 0.798], + [0.291, 0.234, 0.254], + [0.371, 0.297, 0.296], + [1.208, 1.041, 1.005], + [1.445, 1.400, 1.414], + [1.406, 1.317, 1.342], + [1.414, 1.242, 1.244], + [4.179, 3.849, 3.878], + [2.320, 2.275, 2.201], + [7.499, 7.424, 7.196], + [0.135, 0.077, 0.068], + [1.465, 1.075, 1.063], + [1.700, 1.221, 1.198], + [3.731, 2.959, 2.905], + [2.283, 1.401, 1.342], + [0.474, 0.377, 0.367], + [0.371, 0.314, 0.337], + [0.483, 0.357, 0.356], + [1.565, 1.194, 1.181], + [2.226, 1.815, 1.746], + [2.990, 2.971, 2.947], + [1.003, 0.815, 0.842], + [1.386, 1.127, 1.108], + [8.174, 7.690, 7.735], + [6.171, 5.802, 5.933], + [6.201, 5.774, 5.972], + [1.758, 1.642, 1.639], + [0.288, 0.273, 0.253], + [0.121, 0.125, 0.107], + [0.096, 0.082, 0.088], + [0.490, 0.461, 0.476], + [0.041, 0.037, 0.035], + [0.035, 0.031, 0.025], + [0.008, 0.011, 0.015] + ] + } +] diff --git a/website/benchmark/hardware/results/i3_2xlarge.json b/website/benchmark/hardware/results/i3_2xlarge.json new file mode 100644 index 00000000000..e716b99e8a2 --- /dev/null +++ b/website/benchmark/hardware/results/i3_2xlarge.json @@ -0,0 +1,54 @@ +[ + { + "system": "AWS i3.2xlarge", + "system_full": "AWS i3.2xlarge Intel(R) Xeon(R) CPU E5-2686 v4 @ 2.30GHz", + "time": "2022-01-02 03:16:35", + "kind": "cloud", + "result": + [ + [0.002, 0.002, 0.002], + [0.040, 0.023, 0.027], + [0.153, 0.084, 0.090], + [0.682, 0.113, 0.120], + [1.218, 0.227, 0.225], + [1.972, 0.708, 0.700], + [0.066, 0.052, 0.052], + [0.086, 0.037, 0.030], + [1.609, 1.123, 1.119], + [1.784, 1.231, 1.241], + [0.782, 0.444, 0.392], + [0.929, 0.504, 0.476], + [2.273, 1.649, 1.633], + [4.022, 2.181, 2.214], + [2.459, 2.022, 1.925], + [2.015, 1.621, 1.677], + [6.344, 5.439, 5.625], + [4.450, 3.724, 3.678], + [12.221, 10.922, 10.933], + [0.674, 0.139, 0.132], + [18.758, 2.164, 2.152], + [20.902, 2.440, 2.367], + [39.396, 5.476, 5.427], + [31.640, 2.759, 2.755], + [4.498, 0.647, 0.646], + [1.709, 0.627, 0.540], + [4.488, 0.665, 0.656], + [18.286, 2.023, 2.013], + [15.375, 2.896, 2.959], + [2.962, 2.899, 2.974], + [3.663, 1.299, 1.304], + [9.731, 1.922, 1.915], + [11.575, 10.394, 10.514], + [20.617, 8.121, 8.097], + [20.558, 8.088, 8.049], + [3.059, 2.780, 2.678], + [0.322, 0.244, 0.217], + [0.122, 0.082, 0.092], + [0.146, 0.073, 0.072], + [0.652, 0.473, 0.502], + [0.097, 0.025, 0.034], + [0.052, 0.025, 0.019], + [0.007, 0.004, 0.005] + ] + } +] diff --git a/website/benchmark/hardware/results/im4gn_16xlarge.json b/website/benchmark/hardware/results/im4gn_16xlarge.json new file mode 100644 index 00000000000..6db4f08021f --- /dev/null +++ b/website/benchmark/hardware/results/im4gn_16xlarge.json @@ -0,0 +1,54 @@ +[ + { + "system": "AWS im4gn.16xlarge", + "system_full": "AWS im4gn.16xlarge Neoverse-N1 4x7,500 NVMe SSD", + "time": "2022-01-04 01:04:37", + "kind": "cloud", + "result": + [ + [0.002, 0.001, 0.001], + [0.046, 0.017, 0.021], + [0.044, 0.021, 0.022], + [0.850, 0.064, 0.066], + [1.423, 0.076, 0.075], + [2.368, 0.141, 0.139], + [0.022, 0.013, 0.013], + [0.037, 0.038, 0.036], + [1.434, 0.138, 0.138], + [2.173, 0.159, 0.158], + [1.253, 0.089, 0.091], + [1.481, 0.102, 0.093], + [2.377, 0.211, 0.206], + [3.850, 0.272, 0.253], + [2.180, 0.276, 0.239], + [1.030, 0.242, 0.228], + [3.966, 0.564, 0.526], + [3.549, 0.404, 0.377], + [6.940, 1.389, 1.267], + [0.741, 0.225, 0.126], + [19.135, 0.398, 0.371], + [21.322, 0.330, 0.322], + [40.018, 0.727, 0.697], + [33.059, 1.592, 1.565], + [4.599, 0.098, 0.092], + [2.270, 0.089, 0.088], + [5.238, 0.098, 0.095], + [19.201, 0.358, 0.349], + [15.661, 0.430, 0.412], + [0.896, 0.876, 0.863], + [3.579, 0.223, 0.200], + [9.826, 0.344, 0.314], + [7.844, 2.085, 2.183], + [19.018, 1.143, 1.036], + [19.009, 1.203, 1.046], + [0.531, 0.325, 0.331], + [0.262, 0.221, 0.218], + [0.137, 0.101, 0.090], + [0.116, 0.099, 0.079], + [0.531, 0.468, 0.468], + [0.070, 0.025, 0.043], + [0.034, 0.020, 0.020], + [0.007, 0.004, 0.018] + ] + } +] diff --git a/website/benchmark/hardware/results/im4gn_4xlarge.json b/website/benchmark/hardware/results/im4gn_4xlarge.json new file mode 100644 index 00000000000..c3024c8dff2 --- /dev/null +++ b/website/benchmark/hardware/results/im4gn_4xlarge.json @@ -0,0 +1,54 @@ +[ + { + "system": "AWS im4gn.4xlarge", + "system_full": "AWS im4gn.4xlarge Neoverse-N1 1x7,500 NVMe SSD", + "time": "2022-01-02 06:59:48", + "kind": "cloud", + "result": + [ + [0.002, 0.002, 0.002], + [0.023, 0.013, 0.013], + [0.061, 0.026, 0.025], + [0.841, 0.033, 0.032], + [1.530, 0.086, 0.084], + [2.362, 0.291, 0.292], + [0.038, 0.029, 0.028], + [0.016, 0.015, 0.014], + [1.341, 0.302, 0.301], + [1.845, 0.376, 0.360], + [0.888, 0.184, 0.181], + [1.343, 0.215, 0.210], + [2.185, 0.469, 0.459], + [3.662, 0.603, 0.580], + [2.150, 0.587, 0.561], + [0.875, 0.458, 0.449], + [4.079, 1.425, 1.343], + [3.451, 0.927, 0.859], + [7.646, 2.890, 2.877], + [0.710, 0.107, 0.042], + [19.321, 0.696, 0.677], + [21.321, 0.740, 0.726], + [40.051, 1.625, 1.598], + [32.154, 0.842, 0.819], + [4.681, 0.240, 0.221], + [1.976, 0.197, 0.195], + [5.062, 0.241, 0.223], + [18.972, 0.643, 0.628], + [15.676, 0.978, 0.957], + [0.524, 0.505, 0.518], + [3.589, 0.460, 0.461], + [9.647, 0.674, 0.642], + [8.330, 3.414, 3.354], + [19.314, 2.296, 2.286], + [19.278, 2.311, 2.273], + [0.799, 0.753, 0.717], + [0.288, 0.222, 0.222], + [0.118, 0.101, 0.099], + [0.126, 0.085, 0.084], + [0.542, 0.480, 0.446], + [0.065, 0.025, 0.031], + [0.046, 0.021, 0.020], + [0.006, 0.010, 0.017] + ] + } +] diff --git a/website/benchmark/hardware/results/im4gn_8xlarge.json b/website/benchmark/hardware/results/im4gn_8xlarge.json new file mode 100644 index 00000000000..117812b0162 --- /dev/null +++ b/website/benchmark/hardware/results/im4gn_8xlarge.json @@ -0,0 +1,54 @@ +[ + { + "system": "AWS im4gn.8xlarge", + "system_full": "AWS im4gn.8xlarge Neoverse-N1 2x7,500 NVMe SSD", + "time": "2022-01-03 22:23:27", + "kind": "cloud", + "result": + [ + [0.002, 0.001, 0.001], + [0.034, 0.010, 0.010], + [0.044, 0.016, 0.016], + [0.862, 0.020, 0.020], + [1.500, 0.069, 0.071], + [2.454, 0.174, 0.172], + [0.025, 0.017, 0.017], + [0.023, 0.023, 0.023], + [1.329, 0.182, 0.181], + [2.167, 0.216, 0.212], + [1.159, 0.125, 0.119], + [1.483, 0.127, 0.122], + [2.313, 0.268, 0.260], + [3.788, 0.361, 0.329], + [2.043, 0.343, 0.308], + [0.872, 0.321, 0.309], + [3.921, 0.879, 0.840], + [3.460, 0.587, 0.543], + [7.272, 1.517, 1.447], + [0.707, 0.078, 0.064], + [19.314, 0.425, 0.385], + [21.332, 0.414, 0.405], + [40.030, 0.945, 0.921], + [32.867, 0.513, 0.477], + [4.640, 0.130, 0.124], + [2.227, 0.115, 0.107], + [5.223, 0.134, 0.126], + [19.179, 0.371, 0.367], + [15.658, 0.557, 0.545], + [0.541, 0.558, 0.552], + [3.548, 0.273, 0.250], + [9.772, 0.384, 0.357], + [7.896, 2.431, 2.661], + [19.149, 1.389, 1.268], + [19.103, 1.342, 1.282], + [0.583, 0.530, 0.541], + [0.238, 0.233, 0.243], + [0.114, 0.098, 0.102], + [0.124, 0.092, 0.089], + [0.552, 0.471, 0.481], + [0.053, 0.025, 0.025], + [0.047, 0.057, 0.020], + [0.022, 0.032, 0.004] + ] + } +] diff --git a/website/benchmark/hardware/results/intel_core_i5_4440.json b/website/benchmark/hardware/results/intel_core_i5_4440.json new file mode 100644 index 00000000000..b70b9e08fd4 --- /dev/null +++ b/website/benchmark/hardware/results/intel_core_i5_4440.json @@ -0,0 +1,54 @@ +[ + { + "system": "Intel(R) Core(TM) i5-4440 CPU @ 3.10GHz", + "system_full": "Intel(R) Core(TM) i5-4440 CPU @ 3.10GHz", + "time": "2022-01-06 08:48:45", + "kind": "server", + "result": + [ + [0.002, 0.001, 0.001], + [0.136, 0.021, 0.020], + [1.102, 0.061, 0.055], + [2.669, 0.089, 0.084], + [2.646, 0.198, 0.192], + [4.018, 0.606, 0.600], + [0.115, 0.034, 0.044], + [0.210, 0.018, 0.018], + [4.655, 1.002, 1.004], + [6.715, 1.139, 1.150], + [3.235, 0.351, 0.352], + [3.850, 0.410, 0.408], + [4.446, 1.579, 1.570], + [7.112, 2.031, 2.061], + [5.658, 1.812, 1.804], + [3.528, 1.600, 1.599], + [9.216, 5.029, 5.031], + [7.023, 2.968, 3.362], + [17.412, 9.705, 9.695], + [2.717, 0.110, 0.100], + [28.586, 1.907, 1.870], + [34.064, 2.178, 2.172], + [67.172, 5.105, 5.101], + [79.885, 2.579, 2.540], + [9.176, 0.572, 0.560], + [4.050, 0.496, 0.492], + [8.918, 0.575, 0.568], + [28.731, 2.089, 2.058], + [24.174, 2.956, 3.043], + [5.103, 5.010, 5.007], + [10.075, 1.188, 1.197], + [18.485, 1.966, 1.954], + [19.455, 10.855, 10.917], + [31.320, 7.848, 7.831], + [30.794, 7.871, 7.877], + [3.360, 2.777, 2.778], + [0.371, 0.166, 0.180], + [0.259, 0.064, 0.083], + [0.275, 0.060, 0.058], + [1.024, 0.380, 0.378], + [0.198, 0.025, 0.025], + [0.162, 0.023, 0.015], + [0.059, 0.006, 0.007] + ] + } +] diff --git a/website/benchmark/hardware/results/macbook_pro_m1_2021.json b/website/benchmark/hardware/results/macbook_pro_m1_2021.json new file mode 100644 index 00000000000..516940e1ef2 --- /dev/null +++ b/website/benchmark/hardware/results/macbook_pro_m1_2021.json @@ -0,0 +1,54 @@ +[ + { + "system": "MacBook Pro M1", + "system_full": "MacBook Pro M1 Max 16\" 2022, 64 GiB RAM, 1 TB SSD", + "time": "2022-02-27 00:00:00", + "kind": "laptop", + "result": + [ + [0.012, 0.001, 0.001], + [0.096, 0.012, 0.010], + [0.043, 0.022, 0.023], + [0.063, 0.031, 0.030], + [0.099, 0.070, 0.070], + [0.229, 0.197, 0.195], + [0.012, 0.001, 0.001], + [0.027, 0.012, 0.011], + [0.340, 0.301, 0.306], + [0.439, 0.383, 0.386], + [0.169, 0.134, 0.136], + [0.197, 0.160, 0.162], + [0.475, 0.435, 0.432], + [0.615, 0.557, 0.553], + [0.553, 0.502, 0.507], + [0.490, 0.445, 0.439], + [1.392, 1.260, 1.254], + [0.865, 0.833, 0.835], + [2.285, 2.180, 2.194], + [0.064, 0.035, 0.033], + [0.761, 0.650, 0.651], + [0.867, 0.715, 0.718], + [1.753, 1.478, 1.499], + [1.037, 0.737, 0.735], + [0.251, 0.201, 0.202], + [0.208, 0.172, 0.174], + [0.254, 0.202, 0.201], + [0.733, 0.598, 0.603], + [0.995, 0.882, 0.879], + [0.562, 0.545, 0.545], + [0.431, 0.371, 0.371], + [0.586, 0.490, 0.490], + [2.882, 2.664, 2.656], + [2.255, 2.147, 2.146], + [2.248, 2.137, 2.154], + [0.659, 0.638, 0.631], + [0.125, 0.108, 0.108], + [0.070, 0.052, 0.052], + [0.060, 0.042, 0.042], + [0.250, 0.229, 0.228], + [0.030, 0.013, 0.012], + [0.026, 0.011, 0.010], + [0.017, 0.003, 0.003] + ] + } +] diff --git a/website/blog/ru/2016/clickhouse-meetup-v-moskve-21-noyabrya-2016.md b/website/blog/ru/2016/clickhouse-meetup-v-moskve-21-noyabrya-2016.md deleted file mode 100644 index 2c0463687b4..00000000000 --- a/website/blog/ru/2016/clickhouse-meetup-v-moskve-21-noyabrya-2016.md +++ /dev/null @@ -1,8 +0,0 @@ ---- -title: 'ClickHouse Meetup в Москве, 21 ноября 2016' -image: 'https://blog-images.clickhouse.com/ru/2016/clickhouse-meetup-v-moskve-21-noyabrya-2016/main.jpg' -date: '2016-11-22' -tags: ['мероприятия', 'meetup', 'Москва'] ---- - -[Посмотреть видео](https://events.yandex.ru/lib/talks/4351/) diff --git a/website/blog/ru/2016/clickhouse-na-highload-2016.md b/website/blog/ru/2016/clickhouse-na-highload-2016.md deleted file mode 100644 index 7dacbde140a..00000000000 --- a/website/blog/ru/2016/clickhouse-na-highload-2016.md +++ /dev/null @@ -1,14 +0,0 @@ ---- -title: 'ClickHouse на HighLoad++ 2016' -image: 'https://blog-images.clickhouse.com/ru/2016/clickhouse-na-highload-2016/main.jpg' -date: '2016-12-10' -tags: ['мероприятия', 'конференции', 'Москва', 'HighLoad++'] ---- - -![iframe](https://www.youtube.com/embed/TAiCXHgZn50) - -[Расшифровка доклада](https://habrahabr.ru/post/322724/) - -![iframe](https://www.youtube.com/embed/tf38TPvwjJ4) - -[Расшифровка доклада](https://habrahabr.ru/post/322620/) diff --git a/website/blog/ru/2016/clickhouse-na-vstreche-pro-infrastrukturu-khraneniya-i-obrabotki-dannykh-v-yandekse.md b/website/blog/ru/2016/clickhouse-na-vstreche-pro-infrastrukturu-khraneniya-i-obrabotki-dannykh-v-yandekse.md deleted file mode 100644 index d90a7b9c4bb..00000000000 --- a/website/blog/ru/2016/clickhouse-na-vstreche-pro-infrastrukturu-khraneniya-i-obrabotki-dannykh-v-yandekse.md +++ /dev/null @@ -1,10 +0,0 @@ ---- -title: 'ClickHouse на встрече про инфраструктуру хранения и обработки данных в Яндексе' -image: 'https://blog-images.clickhouse.com/ru/2016/clickhouse-na-vstreche-pro-infrastrukturu-khraneniya-i-obrabotki-dannykh-v-yandekse/main.jpg' -date: '2016-10-16' -tags: ['мероприятия', 'инфраструктура'] ---- - -![iframe](https://www.youtube.com/embed/Ho4_dQk7dAg) - -[Страница мероприятия «Яндекс изнутри: инфраструктура хранения и обработки данных»](https://events.yandex.ru/events/meetings/15-oct-2016/), прошедшего 15 октября 2016 года. diff --git a/website/blog/ru/2016/yandeks-otkryvaet-clickhouse.md b/website/blog/ru/2016/yandeks-otkryvaet-clickhouse.md deleted file mode 100644 index e7216f47408..00000000000 --- a/website/blog/ru/2016/yandeks-otkryvaet-clickhouse.md +++ /dev/null @@ -1,10 +0,0 @@ ---- -title: 'Яндекс открывает ClickHouse' -image: 'https://blog-images.clickhouse.com/ru/2016/yandeks-otkryvaet-clickhouse/main.jpg' -date: '2016-06-15' -tags: ['анонс', 'GitHub', 'лицензия'] ---- - -Сегодня внутренняя разработка компании Яндекс — [аналитическая СУБД ClickHouse](https://clickhouse.com/), стала доступна каждому. Исходники опубликованы на [GitHub](https://github.com/ClickHouse/ClickHouse) под лицензией Apache 2.0. - -ClickHouse позволяет выполнять аналитические запросы в интерактивном режиме по данным, обновляемым в реальном времени. Система способна масштабироваться до десятков триллионов записей и петабайт хранимых данных. Использование ClickHouse открывает возможности, которые раньше было даже трудно представить: вы можете сохранять весь поток данных без предварительной агрегации и быстро получать отчёты в любых разрезах. ClickHouse разработан в Яндексе для задач [Яндекс.Метрики](https://metrika.yandex.ru/) — второй по величине системы веб-аналитики в мире. diff --git a/website/blog/ru/2017/clickhouse-meetup-edet-v-minsk.md b/website/blog/ru/2017/clickhouse-meetup-edet-v-minsk.md deleted file mode 100644 index adab2fd7676..00000000000 --- a/website/blog/ru/2017/clickhouse-meetup-edet-v-minsk.md +++ /dev/null @@ -1,14 +0,0 @@ ---- -title: 'ClickHouse MeetUp едет в Минск!' -image: 'https://blog-images.clickhouse.com/ru/2017/clickhouse-meetup-edet-v-minsk/main.jpg' -date: '2017-06-13' -tags: ['мероприятия', 'meetup', 'Минск', 'Беларусь', 'анонс'] ---- - -29 июня в Минске впервые выступят с докладами создатели СУБД ClickHоuse и те, кто ежедневно использует её для решения аналитических задач. Докладчики расскажут о последних изменениях и предстоящих обновлениях СУБД, а также о нюансах работы с ней. - -Встреча будет интересна администраторам ClickHouse и тем, кто пока только присматривается к системе. Мы приглашаем белорусских пользователей также поделиться своим опытом использования ClickHоuse и выступить на встрече с блиц-докладами: при регистрации мы предложим вам такую возможность! - -Участие в мероприятии бесплатное, но необходимо заранее зарегистрироваться: количество мест в зале ограничено. - -Посмотреть программу и подать заявку на участие можно на [странице встречи](https://events.yandex.ru/events/meetings/29-june-2017). diff --git a/website/blog/ru/2017/clickhouse-meetup-v-ekaterinburge-16-maya-2017.md b/website/blog/ru/2017/clickhouse-meetup-v-ekaterinburge-16-maya-2017.md deleted file mode 100644 index b7441b7ac30..00000000000 --- a/website/blog/ru/2017/clickhouse-meetup-v-ekaterinburge-16-maya-2017.md +++ /dev/null @@ -1,8 +0,0 @@ ---- -title: 'ClickHouse Meetup в Екатеринбурге, 16 мая 2017' -image: 'https://blog-images.clickhouse.com/ru/2017/clickhouse-meetup-v-ekaterinburge-16-maya-2017/main.jpg' -date: '2017-05-17' -tags: ['мероприятия', 'meetup', 'Екатеринбург'] ---- - -[Посмотреть презентацию](https://presentations.clickhouse.com/meetup6/) diff --git a/website/blog/ru/2017/clickhouse-meetup-v-minske-itogi.md b/website/blog/ru/2017/clickhouse-meetup-v-minske-itogi.md deleted file mode 100644 index 8cd3375abe9..00000000000 --- a/website/blog/ru/2017/clickhouse-meetup-v-minske-itogi.md +++ /dev/null @@ -1,16 +0,0 @@ ---- -title: 'ClickHouse MeetUp в Минске: итоги' -image: 'https://blog-images.clickhouse.com/ru/2017/clickhouse-meetup-v-minske-itogi/main.jpg' -date: '2017-06-19' -tags: ['мероприятия', 'meetup', 'Минск', 'Беларусь'] ---- - -Недавно в Минске мы встретились с пользователями ClickHouse и техническими специалистами, кто только знакомится с СУБД. - -Мы делимся с вами презентациями докладчиков и будем рады ответить на вопросы в [чате ClickHouse в Телеграме](https://t.me/clickhouse_ru). - -[История создания ClickHouse, новости и планы по развитию](https://presentations.clickhouse.com/meetup7/), Алексей Миловидов - -[Использование ClickHouse для мониторинга связности сети](https://presentations.clickhouse.com/meetup7/netmon.pdf), Дмитрий Липин - -[Разбираемся во внутреннем устройстве ClickHouse](https://presentations.clickhouse.com/meetup7/internals.pdf), Виталий Людвиченко diff --git a/website/blog/ru/2017/clickhouse-meetup-v-novosibirske-3-aprelya-2017.md b/website/blog/ru/2017/clickhouse-meetup-v-novosibirske-3-aprelya-2017.md deleted file mode 100644 index e8bbf23c2c4..00000000000 --- a/website/blog/ru/2017/clickhouse-meetup-v-novosibirske-3-aprelya-2017.md +++ /dev/null @@ -1,10 +0,0 @@ ---- -title: 'ClickHouse Meetup в Новосибирске, 3 апреля 2017' -image: 'https://blog-images.clickhouse.com/ru/2017/clickhouse-meetup-v-novosibirske-3-aprelya-2017/main.jpg' -date: '2017-04-04' -tags: ['мероприятия', 'meetup', 'Новосибирск'] ---- - -[Презентация Алексея Миловидова](https://presentations.clickhouse.com/meetup4/) - -[Презентация Марии Мансуровой](https://presentations.clickhouse.com/meetup4/clickhouse_for_analysts.pdf) diff --git a/website/blog/ru/2017/clickhouse-meetup-v-sankt-peterburge-28-fevralya-2017.md b/website/blog/ru/2017/clickhouse-meetup-v-sankt-peterburge-28-fevralya-2017.md deleted file mode 100644 index 16bf2822746..00000000000 --- a/website/blog/ru/2017/clickhouse-meetup-v-sankt-peterburge-28-fevralya-2017.md +++ /dev/null @@ -1,8 +0,0 @@ ---- -title: 'ClickHouse Meetup в Санкт-Петербурге, 28 февраля 2017' -image: 'https://blog-images.clickhouse.com/ru/2017/clickhouse-meetup-v-sankt-peterburge-28-fevralya-2017/main.jpg' -date: '2017-03-01' -tags: ['мероприятия', 'meetup', 'Санкт-Петербург'] ---- - -![iframe](https://www.youtube.com/embed/CVrwp4Zoex4) diff --git a/website/blog/ru/2017/clickhouse-na-uwdc-2017.md b/website/blog/ru/2017/clickhouse-na-uwdc-2017.md deleted file mode 100644 index 1806f5fb6ba..00000000000 --- a/website/blog/ru/2017/clickhouse-na-uwdc-2017.md +++ /dev/null @@ -1,10 +0,0 @@ ---- -title: 'ClickHouse на UWDC 2017' -image: 'https://blog-images.clickhouse.com/ru/2017/clickhouse-na-uwdc-2017/main.jpg' -date: '2017-05-20' -tags: ['мероприятия', 'конференции', 'Челябинск'] ---- - -![iframe](https://www.youtube.com/embed/isYA4e5zg1M?t=2h8m15s) - -[Посмотреть презентацию](https://presentations.clickhouse.com/uwdc/) diff --git a/website/blog/ru/2019/clickhouse-meetup-v-limassole-7-maya-2019.md b/website/blog/ru/2019/clickhouse-meetup-v-limassole-7-maya-2019.md deleted file mode 100644 index a4dbff081ff..00000000000 --- a/website/blog/ru/2019/clickhouse-meetup-v-limassole-7-maya-2019.md +++ /dev/null @@ -1,38 +0,0 @@ ---- -title: 'ClickHouse Meetup в Лимассоле, 7 мая 2019' -image: 'https://blog-images.clickhouse.com/ru/2019/clickhouse-meetup-v-limassole-7-maya-2019/main.jpg' -date: '2019-05-14' -tags: ['мероприятия', 'meetup', 'Лимассол', 'Кипр', 'Европа'] ---- - -Первый ClickHouse Meetup под открытым небом прошел в сердце Лимассола, второго по размеру города Кипра, на крыше, любезно предоставленной Exness Group. С крыши открывались сногсшибательные виды, но докладчики отлично справлялись с конкуренцией с ними за внимание аудитории. Более ста человек присоединилось к мероприятие, что в очередной раз подтверждает высокий интерес к ClickHouse по всему земному шару. Контент мероприятия также доступен в формате [видеозаписи](https://www.youtube.com/watch?v=_rpU-TvSfZ8). - -[Кирилл Шваков](https://github.com/kshvakov) сыграл ключевую роль в том, чтобы данное мероприятие стало возможным: наладил коммуникацию с ClickHouse сообществом на Кипре, нашел отличную площадку и докладчиков. Большинство ClickHouse митапов по всему миру происходят благодаря активным участникам сообщества таким как Кирилл. Если вы хотите помочь нам организовать ClickHouse митап в своём регионе, пожалуйста свяжитесь с командой ClickHouse в Яндексе через [эту форму](https://clickhouse.com/#meet) или любым другим удобным способом. - -![Кирилл Шваков](https://blog-images.clickhouse.com/ru/2019/clickhouse-meetup-v-limassole-7-maya-2019/1.jpg) - -Кирилл широко известен благодаря его замечательногму [ClickHouse Go Driver](https://github.com/clickhouse/clickhouse-go), работающему по нативному протоколу, а его открывающий доклад был о его опыте оптимизации ClickHouse запросов и решению реальных прикладных задач в Integros и Wisebits. [Слайды](https://presentations.clickhouse.com/meetup22/strategies.pdf). [Полные тексты запросов](https://github.com/kshvakov/ClickHouse-Meetup-Exness). - -Мероприятие началось ранним вечером… -![Вечер в Лимассоле](https://blog-images.clickhouse.com/ru/2019/clickhouse-meetup-v-limassole-7-maya-2019/2.jpg) - -…но природе потребовалось всего около часа, чтобы включить «ночной режим». Зато проецируемые слайды стало заметно легче читать. -![Ночь в Лимассоле](https://blog-images.clickhouse.com/ru/2019/clickhouse-meetup-v-limassole-7-maya-2019/3.jpg) - -Сергей Томилов с его коллегами из Exness Platform Team поделились деталями об эволюции их систем для анализа логов и метрик, а также как они в итоге стали использовать ClickHouse для долгосрочного хранения и анализа данных([слайды](https://presentations.clickhouse.com/meetup22/exness.pdf)): -![Сергей Томилов](https://blog-images.clickhouse.com/ru/2019/clickhouse-meetup-v-limassole-7-maya-2019/4.jpg) - -Алексей Миловидов из команды ClickHouse в Яндексе продемонстрировал функциональность из недавних релизов ClickHouse, а также рассказал о том, что стоит ждать в ближайшем будущем([слайды](https://presentations.clickhouse.com/meetup22/new_features/)): -![Алексей Миловидов](https://blog-images.clickhouse.com/ru/2019/clickhouse-meetup-v-limassole-7-maya-2019/5.jpg) - -Александр Зайцев, технический директор Altinity, показал обзор того, как можно интегрировать ClickHouse в окружения, работающие на Kubernetes([слайды](https://presentations.clickhouse.com/meetup22/kubernetes.pdf)): -![Александр Зайцев](https://blog-images.clickhouse.com/ru/2019/clickhouse-meetup-v-limassole-7-maya-2019/6.jpg) - -Владимир Гончаров, бекенд разработчик из Aloha Browser, закрывал ClickHouse Limassol Meetup демонстрацией нескольких проектов для интеграции других opensource продуктов для анализа логов с ClickHouse ([слайды](https://presentations.clickhouse.com/meetup22/aloha.pdf)): -![Владимир Гончаров](https://blog-images.clickhouse.com/ru/2019/clickhouse-meetup-v-limassole-7-maya-2019/7.jpg) - -К сожалению, приближалась полнось и только самые «морозостойкие» любители ClickHouse продержались всё мероприятие, так стало заметно холодать. - -![Лимассол](https://blog-images.clickhouse.com/ru/2019/clickhouse-meetup-v-limassole-7-maya-2019/8.jpg) - -Больше фотографий с мероприятия доступно в [коротком послесловии от Exness](https://www.facebook.com/events/386638262181785/permalink/402167077295570/). diff --git a/website/blog/ru/2019/clickhouse-meetup-v-moskve-5-sentyabrya-2019.md b/website/blog/ru/2019/clickhouse-meetup-v-moskve-5-sentyabrya-2019.md deleted file mode 100644 index 7e82fd653d7..00000000000 --- a/website/blog/ru/2019/clickhouse-meetup-v-moskve-5-sentyabrya-2019.md +++ /dev/null @@ -1,10 +0,0 @@ ---- -title: 'ClickHouse Meetup в Москве, 5 сентября 2019' -image: 'https://blog-images.clickhouse.com/ru/2019/clickhouse-meetup-v-moskve-5-sentyabrya-2019/main.jpg' -date: '2019-09-06' -tags: ['мероприятия', 'meetup', 'Москва'] ---- - -![iframe](https://www.youtube.com/embed/videoseries?list=PL0Z2YDlm0b3gYSwohnKFUozYy9QdUpcT_) - -[Слайды опубликованы на GitHub](https://github.com/clickhouse/clickhouse-presentations/tree/master/meetup28). diff --git a/website/blog/ru/2019/clickhouse-meetup-v-novosibirske-26-iyunya-2019.md b/website/blog/ru/2019/clickhouse-meetup-v-novosibirske-26-iyunya-2019.md deleted file mode 100644 index a90efdca645..00000000000 --- a/website/blog/ru/2019/clickhouse-meetup-v-novosibirske-26-iyunya-2019.md +++ /dev/null @@ -1,12 +0,0 @@ ---- -title: 'ClickHouse Meetup в Новосибирске, 26 июня 2019' -image: 'https://blog-images.clickhouse.com/ru/2019/clickhouse-meetup-v-novosibirske-26-iyunya-2019/main.jpg' -date: '2019-06-05' -tags: ['мероприятия', 'meetup', 'Новосибирск'] ---- - -Изюминкой второго ClickHouse митапа в Новосибирске были два низкоуровневых доклада с погружением во внутренности ClickHouse, а остальная часть контента была очень прикладной с конкретными сценариями. Любезно предоставленный S7 зал на сто человек был полон до самого завершения последнего доклада где-то ближе к полуночи. - -![iframe](https://www.youtube.com/embed/videoseries?list=PL0Z2YDlm0b3ionSVt-NYC9Vu_83xxhb4J) - -Как обычно, [все слайды опубликованы на GitHub](https://presentations.clickhouse.com/meetup25). diff --git a/website/blog/ru/2019/clickhouse-meetup-v-sankt-peterburge-27-iyulya-2019.md b/website/blog/ru/2019/clickhouse-meetup-v-sankt-peterburge-27-iyulya-2019.md deleted file mode 100644 index bef157ade4e..00000000000 --- a/website/blog/ru/2019/clickhouse-meetup-v-sankt-peterburge-27-iyulya-2019.md +++ /dev/null @@ -1,10 +0,0 @@ ---- -title: 'ClickHouse Meetup в Санкт-Петербурге, 27 июля 2019' -image: 'https://blog-images.clickhouse.com/ru/2019/clickhouse-meetup-v-sankt-peterburge-27-iyulya-2019/main.jpg' -date: '2019-08-01' -tags: ['мероприятия', 'meetup', 'Санкт-Петербург'] ---- - -![iframe](https://www.youtube.com/embed/videoseries?list=PL0Z2YDlm0b3j3X7TWrKmnEPcfEG901W-T) - -[Слайды опубликованы на GitHub](https://github.com/ClickHouse/clickhouse-presentations/tree/master/meetup27). diff --git a/website/blog/ru/2019/clickrouse-meetup-v-minske-11-iyulya-2019.md b/website/blog/ru/2019/clickrouse-meetup-v-minske-11-iyulya-2019.md deleted file mode 100644 index e6897f17156..00000000000 --- a/website/blog/ru/2019/clickrouse-meetup-v-minske-11-iyulya-2019.md +++ /dev/null @@ -1,12 +0,0 @@ ---- -title: 'ClickHouse Meetup в Минске, 11 июля 2019' -image: 'https://blog-images.clickhouse.com/ru/2019/clickrouse-meetup-v-minske-11-iyulya-2019/main.jpg' -date: '2019-07-12' -tags: ['мероприятия', 'meetup', 'Минск', 'Беларусь'] ---- - -![iframe](https://www.youtube.com/embed/videoseries?list=PL0Z2YDlm0b3hLz6dmyu6gM_X871FG9eCc) - -[Все слайды опубликованы на GitHub](https://github.com/ClickHouse/clickhouse-presentations/tree/master/meetup26). - -![Минск](https://blog-images.clickhouse.com/ru/2019/clickrouse-meetup-v-minske-11-iyulya-2019/1.jpg) diff --git a/website/blog/ru/index.md b/website/blog/ru/index.md deleted file mode 100644 index 227a69408dc..00000000000 --- a/website/blog/ru/index.md +++ /dev/null @@ -1,3 +0,0 @@ ---- -is_index: true ---- diff --git a/website/blog/ru/redirects.txt b/website/blog/ru/redirects.txt deleted file mode 100644 index 4e34d53af3d..00000000000 --- a/website/blog/ru/redirects.txt +++ /dev/null @@ -1,15 +0,0 @@ -yandeks-otkryvaet-clickhouse.md 2016/yandeks-otkryvaet-clickhouse.md -clickhouse-meetup-v-moskve-21-noyabrya-2016.md 2016/clickhouse-meetup-v-moskve-21-noyabrya-2016.md -clickhouse-na-vstreche-pro-infrastrukturu-khraneniya-i-obrabotki-dannykh-v-yandekse.md 2016/clickhouse-na-vstreche-pro-infrastrukturu-khraneniya-i-obrabotki-dannykh-v-yandekse.md -clickhouse-na-highload-2016.md 2016/clickhouse-na-highload-2016.md -clickhouse-meetup-v-novosibirske-3-aprelya-2017.md 2017/clickhouse-meetup-v-novosibirske-3-aprelya-2017.md -clickhouse-meetup-v-minske-itogi.md 2017/clickhouse-meetup-v-minske-itogi.md -clickhouse-meetup-v-sankt-peterburge-28-fevralya-2017.md 2017/clickhouse-meetup-v-sankt-peterburge-28-fevralya-2017.md -clickhouse-meetup-v-ekaterinburge-16-maya-2017.md 2017/clickhouse-meetup-v-ekaterinburge-16-maya-2017.md -clickhouse-na-uwdc-2017.md 2017/clickhouse-na-uwdc-2017.md -clickhouse-meetup-edet-v-minsk.md 2017/clickhouse-meetup-edet-v-minsk.md -clickhouse-meetup-v-sankt-peterburge-27-iyulya-2019.md 2019/clickhouse-meetup-v-sankt-peterburge-27-iyulya-2019.md -clickhouse-meetup-v-moskve-5-sentyabrya-2019.md 2019/clickhouse-meetup-v-moskve-5-sentyabrya-2019.md -clickhouse-meetup-v-novosibirske-26-iyunya-2019.md 2019/clickhouse-meetup-v-novosibirske-26-iyunya-2019.md -clickrouse-meetup-v-minske-11-iyulya-2019.md 2019/clickrouse-meetup-v-minske-11-iyulya-2019.md -clickhouse-meetup-v-limassole-7-maya-2019.md 2019/clickhouse-meetup-v-limassole-7-maya-2019.md diff --git a/website/css/greenhouse.css b/website/css/greenhouse.css deleted file mode 100644 index 76812a169e8..00000000000 --- a/website/css/greenhouse.css +++ /dev/null @@ -1 +0,0 @@ -#main{padding-bottom:0;padding-top:0}#wrapper{max-width:1078px;padding:0}body>#wrapper>#main>#wrapper>#content,body>#wrapper>#main>#wrapper>#logo,body>#wrapper>#main>#wrapper>h1{display:none}body>#wrapper>#main>#wrapper>#board_title{margin-top:0}body>#wrapper>#main>#logo{margin-top:80px}body>#wrapper>#main>:last-child{margin-bottom:120px} \ No newline at end of file diff --git a/website/js/base.js b/website/js/base.js index 52b801eb98f..6704231c69d 100644 --- a/website/js/base.js +++ b/website/js/base.js @@ -55,7 +55,7 @@ $('pre').each(function(_, element) { $(element).prepend( - 'Copy' + 'Copy' ); }); @@ -85,6 +85,9 @@ $(element).append( '' ); + $(element).append( + '' + ); }); } }); diff --git a/website/sitemap-static.xml b/website/sitemap-static.xml index b5b5f3aa0d5..88888e31b3b 100644 --- a/website/sitemap-static.xml +++ b/website/sitemap-static.xml @@ -17,7 +17,7 @@ weekly - https://clickhouse.com/codebrowser/html_report/ClickHouse/index.html + https://clickhouse.com/codebrowser/ClickHouse/index.html daily diff --git a/website/src/scss/greenhouse.scss b/website/src/scss/greenhouse.scss deleted file mode 100644 index 710b606fa15..00000000000 --- a/website/src/scss/greenhouse.scss +++ /dev/null @@ -1,27 +0,0 @@ -#main { - padding-bottom: 0; - padding-top: 0; -} - -#wrapper { - max-width: 1078px; - padding: 0; -} - -body > #wrapper > #main > #wrapper > #logo, -body > #wrapper > #main > #wrapper > h1, -body > #wrapper > #main > #wrapper > #content { - display: none; -} - -body > #wrapper > #main > #wrapper > #board_title { - margin-top: 0; -} - -body > #wrapper > #main > #logo { - margin-top: 80px; -} - -body > #wrapper > #main > :last-child { - margin-bottom: 120px; -} diff --git a/website/templates/common_css.html b/website/templates/common_css.html index ac10b233f25..b26b2bf973e 100644 --- a/website/templates/common_css.html +++ b/website/templates/common_css.html @@ -1,4 +1,4 @@ - + {% for src in extra_css %} diff --git a/website/templates/common_js.html b/website/templates/common_js.html index 72421f00562..93e35d37918 100644 --- a/website/templates/common_js.html +++ b/website/templates/common_js.html @@ -1,4 +1,4 @@ - + {% for src in extra_js %} diff --git a/website/templates/common_meta.html b/website/templates/common_meta.html index 018d533e893..07aa05d28b1 100644 --- a/website/templates/common_meta.html +++ b/website/templates/common_meta.html @@ -7,7 +7,7 @@ {% if title %}{{ title }}{% else %}{{ _('ClickHouse - fast open-source OLAP DBMS') }}{% endif %} - + @@ -15,7 +15,7 @@ {% if page and page.meta.image %} {% else %} - + {% endif %} diff --git a/website/templates/docs/amp.html b/website/templates/docs/amp.html index 5d2777af188..dc7dd7acb49 100644 --- a/website/templates/docs/amp.html +++ b/website/templates/docs/amp.html @@ -20,7 +20,7 @@ diff --git a/website/templates/docs/nav.html b/website/templates/docs/nav.html index 4d57d282796..afac39c2fab 100644 --- a/website/templates/docs/nav.html +++ b/website/templates/docs/nav.html @@ -1,7 +1,7 @@