diff --git a/.github/codecov.yml b/.github/codecov.yml deleted file mode 100644 index f185c5e2dcc..00000000000 --- a/.github/codecov.yml +++ /dev/null @@ -1,17 +0,0 @@ -codecov: - max_report_age: "off" - strict_yaml_branch: "master" - -ignore: - - "contrib" - - "docs" - - "benchmark" - - "tests" - - "docker" - - "debian" - - "cmake" - -comment: false - -github_checks: - annotations: false diff --git a/.github/workflows/anchore-analysis.yml b/.github/workflows/anchore-analysis.yml deleted file mode 100644 index 9f3f944c696..00000000000 --- a/.github/workflows/anchore-analysis.yml +++ /dev/null @@ -1,43 +0,0 @@ -# This workflow checks out code, performs an Anchore container image -# vulnerability and compliance scan, and integrates the results with -# GitHub Advanced Security code scanning feature. For more information on -# the Anchore scan action usage and parameters, see -# https://github.com/anchore/scan-action. For more information on -# Anchore container image scanning in general, see -# https://docs.anchore.com. - -name: Docker Container Scan (clickhouse-server) - -env: - # Force the stdout and stderr streams to be unbuffered - PYTHONUNBUFFERED: 1 - -"on": - pull_request: - paths: - - docker/server/Dockerfile - - .github/workflows/anchore-analysis.yml - schedule: - - cron: '0 21 * * *' - -jobs: - Anchore-Build-Scan: - runs-on: ubuntu-latest - steps: - - name: Checkout the code - uses: actions/checkout@v2 - - name: Build the Docker image - run: | - cd docker/server - perl -pi -e 's|=\$version||g' Dockerfile - docker build . --file Dockerfile --tag localbuild/testimage:latest - - name: Run the local Anchore scan action itself with GitHub Advanced Security code scanning integration enabled - uses: anchore/scan-action@v2 - id: scan - with: - image: "localbuild/testimage:latest" - acs-report-enable: true - - name: Upload Anchore Scan Report - uses: github/codeql-action/upload-sarif@v1 - with: - sarif_file: ${{ steps.scan.outputs.sarif }} diff --git a/.gitmodules b/.gitmodules index 2a1859d5d26..6c9e66f9cbc 100644 --- a/.gitmodules +++ b/.gitmodules @@ -1,6 +1,6 @@ [submodule "contrib/poco"] path = contrib/poco - url = https://github.com/ClickHouse-Extras/poco.git + url = https://github.com/ClickHouse/poco.git branch = clickhouse [submodule "contrib/zstd"] path = contrib/zstd @@ -10,13 +10,13 @@ url = https://github.com/lz4/lz4.git [submodule "contrib/librdkafka"] path = contrib/librdkafka - url = https://github.com/ClickHouse-Extras/librdkafka.git + url = https://github.com/ClickHouse/librdkafka.git [submodule "contrib/cctz"] path = contrib/cctz - url = https://github.com/ClickHouse-Extras/cctz.git + url = https://github.com/ClickHouse/cctz.git [submodule "contrib/zlib-ng"] path = contrib/zlib-ng - url = https://github.com/ClickHouse-Extras/zlib-ng.git + url = https://github.com/ClickHouse/zlib-ng.git branch = clickhouse-2.0.x [submodule "contrib/googletest"] path = contrib/googletest @@ -32,51 +32,51 @@ url = https://github.com/google/re2.git [submodule "contrib/llvm"] path = contrib/llvm - url = https://github.com/ClickHouse-Extras/llvm + url = https://github.com/ClickHouse/llvm [submodule "contrib/mariadb-connector-c"] path = contrib/mariadb-connector-c - url = https://github.com/ClickHouse-Extras/mariadb-connector-c.git + url = https://github.com/ClickHouse/mariadb-connector-c.git [submodule "contrib/jemalloc"] path = contrib/jemalloc url = https://github.com/jemalloc/jemalloc.git [submodule "contrib/unixodbc"] path = contrib/unixodbc - url = https://github.com/ClickHouse-Extras/UnixODBC.git + url = https://github.com/ClickHouse/UnixODBC.git [submodule "contrib/protobuf"] path = contrib/protobuf - url = https://github.com/ClickHouse-Extras/protobuf.git + url = https://github.com/ClickHouse/protobuf.git branch = v3.13.0.1 [submodule "contrib/boost"] path = contrib/boost - url = https://github.com/ClickHouse-Extras/boost.git + url = https://github.com/ClickHouse/boost.git [submodule "contrib/base64"] path = contrib/base64 - url = https://github.com/ClickHouse-Extras/Turbo-Base64.git + url = https://github.com/ClickHouse/Turbo-Base64.git [submodule "contrib/arrow"] path = contrib/arrow - url = https://github.com/ClickHouse-Extras/arrow.git + url = https://github.com/ClickHouse/arrow.git branch = blessed/release-6.0.1 [submodule "contrib/thrift"] path = contrib/thrift url = https://github.com/apache/thrift.git [submodule "contrib/libhdfs3"] path = contrib/libhdfs3 - url = https://github.com/ClickHouse-Extras/libhdfs3.git + url = https://github.com/ClickHouse/libhdfs3.git [submodule "contrib/libxml2"] path = contrib/libxml2 url = https://github.com/GNOME/libxml2.git [submodule "contrib/libgsasl"] path = contrib/libgsasl - url = https://github.com/ClickHouse-Extras/libgsasl.git + url = https://github.com/ClickHouse/libgsasl.git [submodule "contrib/libcxx"] path = contrib/libcxx - url = https://github.com/ClickHouse-Extras/libcxx.git + url = https://github.com/ClickHouse/libcxx.git [submodule "contrib/libcxxabi"] path = contrib/libcxxabi - url = https://github.com/ClickHouse-Extras/libcxxabi.git + url = https://github.com/ClickHouse/libcxxabi.git [submodule "contrib/snappy"] path = contrib/snappy - url = https://github.com/ClickHouse-Extras/snappy.git + url = https://github.com/ClickHouse/snappy.git [submodule "contrib/cppkafka"] path = contrib/cppkafka url = https://github.com/mfontanini/cppkafka.git @@ -85,95 +85,95 @@ url = https://github.com/google/brotli.git [submodule "contrib/h3"] path = contrib/h3 - url = https://github.com/ClickHouse-Extras/h3 + url = https://github.com/ClickHouse/h3 [submodule "contrib/hyperscan"] path = contrib/hyperscan - url = https://github.com/ClickHouse-Extras/hyperscan.git + url = https://github.com/ClickHouse/hyperscan.git [submodule "contrib/libunwind"] path = contrib/libunwind - url = https://github.com/ClickHouse-Extras/libunwind.git + url = https://github.com/ClickHouse/libunwind.git [submodule "contrib/simdjson"] path = contrib/simdjson url = https://github.com/simdjson/simdjson.git [submodule "contrib/rapidjson"] path = contrib/rapidjson - url = https://github.com/ClickHouse-Extras/rapidjson + url = https://github.com/ClickHouse/rapidjson [submodule "contrib/fastops"] path = contrib/fastops - url = https://github.com/ClickHouse-Extras/fastops + url = https://github.com/ClickHouse/fastops [submodule "contrib/orc"] path = contrib/orc - url = https://github.com/ClickHouse-Extras/orc + url = https://github.com/ClickHouse/orc [submodule "contrib/sparsehash-c11"] path = contrib/sparsehash-c11 url = https://github.com/sparsehash/sparsehash-c11.git [submodule "contrib/grpc"] path = contrib/grpc - url = https://github.com/ClickHouse-Extras/grpc.git + url = https://github.com/ClickHouse/grpc.git branch = v1.33.2 [submodule "contrib/aws"] path = contrib/aws - url = https://github.com/ClickHouse-Extras/aws-sdk-cpp.git + url = https://github.com/ClickHouse/aws-sdk-cpp.git [submodule "aws-c-event-stream"] path = contrib/aws-c-event-stream - url = https://github.com/ClickHouse-Extras/aws-c-event-stream.git + url = https://github.com/ClickHouse/aws-c-event-stream.git [submodule "aws-c-common"] path = contrib/aws-c-common - url = https://github.com/ClickHouse-Extras/aws-c-common.git + url = https://github.com/ClickHouse/aws-c-common.git [submodule "aws-checksums"] path = contrib/aws-checksums - url = https://github.com/ClickHouse-Extras/aws-checksums.git + url = https://github.com/ClickHouse/aws-checksums.git [submodule "contrib/curl"] path = contrib/curl url = https://github.com/curl/curl.git [submodule "contrib/icudata"] path = contrib/icudata - url = https://github.com/ClickHouse-Extras/icudata.git + url = https://github.com/ClickHouse/icudata.git [submodule "contrib/icu"] path = contrib/icu url = https://github.com/unicode-org/icu.git [submodule "contrib/flatbuffers"] path = contrib/flatbuffers - url = https://github.com/ClickHouse-Extras/flatbuffers.git + url = https://github.com/ClickHouse/flatbuffers.git [submodule "contrib/replxx"] path = contrib/replxx - url = https://github.com/ClickHouse-Extras/replxx.git + url = https://github.com/ClickHouse/replxx.git [submodule "contrib/avro"] path = contrib/avro - url = https://github.com/ClickHouse-Extras/avro.git + url = https://github.com/ClickHouse/avro.git ignore = untracked [submodule "contrib/msgpack-c"] path = contrib/msgpack-c url = https://github.com/msgpack/msgpack-c [submodule "contrib/libcpuid"] path = contrib/libcpuid - url = https://github.com/ClickHouse-Extras/libcpuid.git + url = https://github.com/ClickHouse/libcpuid.git [submodule "contrib/openldap"] path = contrib/openldap - url = https://github.com/ClickHouse-Extras/openldap.git + url = https://github.com/ClickHouse/openldap.git [submodule "contrib/AMQP-CPP"] path = contrib/AMQP-CPP - url = https://github.com/ClickHouse-Extras/AMQP-CPP.git + url = https://github.com/ClickHouse/AMQP-CPP.git [submodule "contrib/cassandra"] path = contrib/cassandra - url = https://github.com/ClickHouse-Extras/cpp-driver.git + url = https://github.com/ClickHouse/cpp-driver.git branch = clickhouse [submodule "contrib/libuv"] path = contrib/libuv - url = https://github.com/ClickHouse-Extras/libuv.git + url = https://github.com/ClickHouse/libuv.git branch = clickhouse [submodule "contrib/fmtlib"] path = contrib/fmtlib url = https://github.com/fmtlib/fmt.git [submodule "contrib/sentry-native"] path = contrib/sentry-native - url = https://github.com/ClickHouse-Extras/sentry-native.git + url = https://github.com/ClickHouse/sentry-native.git [submodule "contrib/krb5"] path = contrib/krb5 - url = https://github.com/ClickHouse-Extras/krb5 + url = https://github.com/ClickHouse/krb5 [submodule "contrib/cyrus-sasl"] path = contrib/cyrus-sasl - url = https://github.com/ClickHouse-Extras/cyrus-sasl + url = https://github.com/ClickHouse/cyrus-sasl branch = cyrus-sasl-2.1 [submodule "contrib/croaring"] path = contrib/croaring @@ -184,7 +184,7 @@ url = https://github.com/danlark1/miniselect [submodule "contrib/rocksdb"] path = contrib/rocksdb - url = https://github.com/ClickHouse-Extras/rocksdb.git + url = https://github.com/ClickHouse/rocksdb.git [submodule "contrib/xz"] path = contrib/xz url = https://github.com/xz-mirror/xz @@ -194,53 +194,53 @@ branch = lts_2021_11_02 [submodule "contrib/dragonbox"] path = contrib/dragonbox - url = https://github.com/ClickHouse-Extras/dragonbox.git + url = https://github.com/ClickHouse/dragonbox.git [submodule "contrib/fast_float"] path = contrib/fast_float url = https://github.com/fastfloat/fast_float [submodule "contrib/libpq"] path = contrib/libpq - url = https://github.com/ClickHouse-Extras/libpq + url = https://github.com/ClickHouse/libpq [submodule "contrib/boringssl"] path = contrib/boringssl - url = https://github.com/ClickHouse-Extras/boringssl.git + url = https://github.com/ClickHouse/boringssl.git branch = MergeWithUpstream [submodule "contrib/NuRaft"] path = contrib/NuRaft - url = https://github.com/ClickHouse-Extras/NuRaft.git + url = https://github.com/ClickHouse/NuRaft.git [submodule "contrib/nanodbc"] path = contrib/nanodbc - url = https://github.com/ClickHouse-Extras/nanodbc.git + url = https://github.com/ClickHouse/nanodbc.git [submodule "contrib/datasketches-cpp"] path = contrib/datasketches-cpp - url = https://github.com/ClickHouse-Extras/datasketches-cpp.git + url = https://github.com/ClickHouse/datasketches-cpp.git [submodule "contrib/yaml-cpp"] path = contrib/yaml-cpp - url = https://github.com/ClickHouse-Extras/yaml-cpp.git + url = https://github.com/ClickHouse/yaml-cpp.git [submodule "contrib/cld2"] path = contrib/cld2 - url = https://github.com/ClickHouse-Extras/cld2.git + url = https://github.com/ClickHouse/cld2.git [submodule "contrib/libstemmer_c"] path = contrib/libstemmer_c - url = https://github.com/ClickHouse-Extras/libstemmer_c.git + url = https://github.com/ClickHouse/libstemmer_c.git [submodule "contrib/wordnet-blast"] path = contrib/wordnet-blast - url = https://github.com/ClickHouse-Extras/wordnet-blast.git + url = https://github.com/ClickHouse/wordnet-blast.git [submodule "contrib/lemmagen-c"] path = contrib/lemmagen-c - url = https://github.com/ClickHouse-Extras/lemmagen-c.git + url = https://github.com/ClickHouse/lemmagen-c.git [submodule "contrib/libpqxx"] path = contrib/libpqxx - url = https://github.com/ClickHouse-Extras/libpqxx.git + url = https://github.com/ClickHouse/libpqxx.git [submodule "contrib/sqlite-amalgamation"] path = contrib/sqlite-amalgamation url = https://github.com/azadkuh/sqlite-amalgamation [submodule "contrib/s2geometry"] path = contrib/s2geometry - url = https://github.com/ClickHouse-Extras/s2geometry.git + url = https://github.com/ClickHouse/s2geometry.git [submodule "contrib/bzip2"] path = contrib/bzip2 - url = https://github.com/ClickHouse-Extras/bzip2.git + url = https://github.com/ClickHouse/bzip2.git [submodule "contrib/magic_enum"] path = contrib/magic_enum url = https://github.com/Neargye/magic_enum @@ -249,16 +249,16 @@ url = https://github.com/google/libprotobuf-mutator [submodule "contrib/sysroot"] path = contrib/sysroot - url = https://github.com/ClickHouse-Extras/sysroot.git + url = https://github.com/ClickHouse/sysroot.git [submodule "contrib/nlp-data"] path = contrib/nlp-data - url = https://github.com/ClickHouse-Extras/nlp-data.git + url = https://github.com/ClickHouse/nlp-data.git [submodule "contrib/hive-metastore"] path = contrib/hive-metastore - url = https://github.com/ClickHouse-Extras/hive-metastore + url = https://github.com/ClickHouse/hive-metastore [submodule "contrib/azure"] path = contrib/azure - url = https://github.com/ClickHouse-Extras/azure-sdk-for-cpp.git + url = https://github.com/ClickHouse/azure-sdk-for-cpp.git [submodule "contrib/minizip-ng"] path = contrib/minizip-ng url = https://github.com/zlib-ng/minizip-ng diff --git a/CMakeLists.txt b/CMakeLists.txt index c0b6604c8f9..9649fc32d74 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -248,7 +248,9 @@ endif() if (CMAKE_BUILD_TYPE_UC STREQUAL "DEBUG") set(USE_DEBUG_HELPERS ON) endif() + option(USE_DEBUG_HELPERS "Enable debug helpers" ${USE_DEBUG_HELPERS}) +option(BUILD_STANDALONE_KEEPER "Build keeper as small standalone binary" OFF) # Create BuildID when using lld. For other linkers it is created by default. if (LINKER_NAME MATCHES "lld$") @@ -263,6 +265,11 @@ if (OBJCOPY_PATH AND YANDEX_OFFICIAL_BUILD AND (NOT CMAKE_TOOLCHAIN_FILE)) set (USE_BINARY_HASH 1) endif () +# Allows to build stripped binary in a separate directory +if (OBJCOPY_PATH AND READELF_PATH) + set(BUILD_STRIPPED_BINARIES_PREFIX "" CACHE STRING "Build stripped binaries with debug info in separate directory") +endif() + cmake_host_system_information(RESULT AVAILABLE_PHYSICAL_MEMORY QUERY AVAILABLE_PHYSICAL_MEMORY) # Not available under freebsd @@ -285,8 +292,13 @@ include(cmake/cpu_features.cmake) set (COMPILER_FLAGS "${COMPILER_FLAGS} -fasynchronous-unwind-tables") # Reproducible builds -set (COMPILER_FLAGS "${COMPILER_FLAGS} -ffile-prefix-map=${CMAKE_SOURCE_DIR}=.") -set (CMAKE_ASM_FLAGS "${CMAKE_ASM_FLAGS} -ffile-prefix-map=${CMAKE_SOURCE_DIR}=.") +# If turned `ON`, remap file source paths in debug info, predefined preprocessor macros and __builtin_FILE(). +option(ENABLE_BUILD_PATH_MAPPING "Enable remap file source paths in debug info, predefined preprocessor macros and __builtin_FILE(). It's to generate reproducible builds. See https://reproducible-builds.org/docs/build-path" ON) + +if (ENABLE_BUILD_PATH_MAPPING) + set (COMPILER_FLAGS "${COMPILER_FLAGS} -ffile-prefix-map=${CMAKE_SOURCE_DIR}=.") + set (CMAKE_ASM_FLAGS "${CMAKE_ASM_FLAGS} -ffile-prefix-map=${CMAKE_SOURCE_DIR}=.") +endif() if (${CMAKE_VERSION} VERSION_LESS "3.12.4") # CMake < 3.12 doesn't support setting 20 as a C++ standard version. diff --git a/base/base/StringRef.h b/base/base/StringRef.h index 171861e9ba7..f300a2d63df 100644 --- a/base/base/StringRef.h +++ b/base/base/StringRef.h @@ -46,9 +46,9 @@ struct StringRef constexpr StringRef(const char * data_, size_t size_) : data(data_), size(size_) {} - StringRef(const std::string & s) : data(s.data()), size(s.size()) {} + StringRef(const std::string & s) : data(s.data()), size(s.size()) {} /// NOLINT constexpr explicit StringRef(std::string_view s) : data(s.data()), size(s.size()) {} - constexpr StringRef(const char * data_) : StringRef(std::string_view{data_}) {} + constexpr StringRef(const char * data_) : StringRef(std::string_view{data_}) {} /// NOLINT constexpr StringRef() = default; std::string toString() const { return std::string(data, size); } diff --git a/base/daemon/SentryWriter.cpp b/base/daemon/SentryWriter.cpp index 6996b63b5dd..0260c6380f4 100644 --- a/base/daemon/SentryWriter.cpp +++ b/base/daemon/SentryWriter.cpp @@ -18,7 +18,7 @@ #include "Common/config_version.h" #include -#if USE_SENTRY +#if USE_SENTRY && !defined(KEEPER_STANDALONE_BUILD) # include # include diff --git a/base/loggers/CMakeLists.txt b/base/loggers/CMakeLists.txt index 22be002e069..148c4f84f68 100644 --- a/base/loggers/CMakeLists.txt +++ b/base/loggers/CMakeLists.txt @@ -1,5 +1,13 @@ include("${ClickHouse_SOURCE_DIR}/cmake/dbms_glob_sources.cmake") add_headers_and_sources(loggers .) + +# Standard version depends on DBMS and works with text log add_library(loggers ${loggers_sources} ${loggers_headers}) +target_compile_definitions(loggers PUBLIC WITH_TEXT_LOG=1) target_link_libraries(loggers PRIVATE dbms clickhouse_common_io) target_include_directories(loggers PUBLIC ..) + +# Lightweight version doesn't work with textlog and also doesn't depend on DBMS +add_library(loggers_no_text_log ${loggers_sources} ${loggers_headers}) +target_link_libraries(loggers_no_text_log PRIVATE clickhouse_common_io) +target_include_directories(loggers PUBLIC ..) diff --git a/base/loggers/Loggers.cpp b/base/loggers/Loggers.cpp index 2f2eadea28f..7c627ad2272 100644 --- a/base/loggers/Loggers.cpp +++ b/base/loggers/Loggers.cpp @@ -9,7 +9,11 @@ #include #include #include -#include + +#ifdef WITH_TEXT_LOG + #include +#endif + #include namespace fs = std::filesystem; @@ -30,17 +34,21 @@ static std::string createDirectory(const std::string & file) return path; }; +#ifdef WITH_TEXT_LOG void Loggers::setTextLog(std::shared_ptr log, int max_priority) { text_log = log; text_log_max_priority = max_priority; } +#endif void Loggers::buildLoggers(Poco::Util::AbstractConfiguration & config, Poco::Logger & logger /*_root*/, const std::string & cmd_name) { +#ifdef WITH_TEXT_LOG if (split) if (auto log = text_log.lock()) split->addTextLog(log, text_log_max_priority); +#endif auto current_logger = config.getString("logger", ""); if (config_logger == current_logger) //-V1051 diff --git a/base/loggers/Loggers.h b/base/loggers/Loggers.h index a859c32fa89..22b2b5e2c69 100644 --- a/base/loggers/Loggers.h +++ b/base/loggers/Loggers.h @@ -7,10 +7,12 @@ #include #include "OwnSplitChannel.h" +#ifdef WITH_TEXT_LOG namespace DB { class TextLog; } +#endif namespace Poco::Util { @@ -27,7 +29,9 @@ public: /// Close log files. On next log write files will be reopened. void closeLogs(Poco::Logger & logger); +#ifdef WITH_TEXT_LOG void setTextLog(std::shared_ptr log, int max_priority); +#endif private: Poco::AutoPtr log_file; @@ -37,8 +41,10 @@ private: /// Previous value of logger element in config. It is used to reinitialize loggers whenever the value changed. std::string config_logger; +#ifdef WITH_TEXT_LOG std::weak_ptr text_log; int text_log_max_priority = -1; +#endif Poco::AutoPtr split; }; diff --git a/base/loggers/OwnSplitChannel.cpp b/base/loggers/OwnSplitChannel.cpp index 2267b8f425d..b255d89f124 100644 --- a/base/loggers/OwnSplitChannel.cpp +++ b/base/loggers/OwnSplitChannel.cpp @@ -20,10 +20,13 @@ namespace DB { void OwnSplitChannel::log(const Poco::Message & msg) { + +#ifdef WITH_TEXT_LOG auto logs_queue = CurrentThread::getInternalTextLogsQueue(); if (channels.empty() && (logs_queue == nullptr || msg.getPriority() > logs_queue->max_priority)) return; +#endif if (auto * masker = SensitiveDataMasker::getInstance()) { @@ -86,6 +89,7 @@ void OwnSplitChannel::logSplit(const Poco::Message & msg) channel.first->log(msg); // ordinary child } +#ifdef WITH_TEXT_LOG auto logs_queue = CurrentThread::getInternalTextLogsQueue(); /// Log to "TCP queue" if message is not too noisy @@ -137,6 +141,7 @@ void OwnSplitChannel::logSplit(const Poco::Message & msg) if (text_log_locked) text_log_locked->add(elem); } +#endif } @@ -145,12 +150,14 @@ void OwnSplitChannel::addChannel(Poco::AutoPtr channel, const std channels.emplace(name, ExtendedChannelPtrPair(std::move(channel), dynamic_cast(channel.get()))); } +#ifdef WITH_TEXT_LOG void OwnSplitChannel::addTextLog(std::shared_ptr log, int max_priority) { std::lock_guard lock(text_log_mutex); text_log = log; text_log_max_priority.store(max_priority, std::memory_order_relaxed); } +#endif void OwnSplitChannel::setLevel(const std::string & name, int level) { diff --git a/base/loggers/OwnSplitChannel.h b/base/loggers/OwnSplitChannel.h index 364a6346ede..72027f66afd 100644 --- a/base/loggers/OwnSplitChannel.h +++ b/base/loggers/OwnSplitChannel.h @@ -7,10 +7,12 @@ #include #include "ExtendedLogChannel.h" +#ifdef WITH_TEXT_LOG namespace DB { class TextLog; } +#endif namespace DB { @@ -25,7 +27,9 @@ public: /// Adds a child channel void addChannel(Poco::AutoPtr channel, const std::string & name); +#ifdef WITH_TEXT_LOG void addTextLog(std::shared_ptr log, int max_priority); +#endif void setLevel(const std::string & name, int level); @@ -40,8 +44,10 @@ private: std::mutex text_log_mutex; +#ifdef WITH_TEXT_LOG std::weak_ptr text_log; std::atomic text_log_max_priority = -1; +#endif }; } diff --git a/cmake/strip.sh b/cmake/strip.sh new file mode 100755 index 00000000000..de596887159 --- /dev/null +++ b/cmake/strip.sh @@ -0,0 +1,25 @@ +#!/usr/bin/env bash + +BINARY_PATH=$1 +BINARY_NAME=$(basename $BINARY_PATH) +DESTINATION_STRIPPED_DIR=$2 +OBJCOPY_PATH=${3:objcopy} +READELF_PATH=${4:readelf} + +BUILD_ID=$($READELF_PATH -n $1 | sed -n '/Build ID/ { s/.*: //p; q; }') +BUILD_ID_PREFIX=${BUILD_ID:0:2} +BUILD_ID_SUFFIX=${BUILD_ID:2} +TEMP_BINARY_PATH="${BINARY_PATH}_temp" + +DESTINATION_DEBUG_INFO_DIR="$DESTINATION_STRIPPED_DIR/lib/debug/.build-id" +DESTINATION_STRIP_BINARY_DIR="$DESTINATION_STRIPPED_DIR/bin" + +mkdir -p "$DESTINATION_DEBUG_INFO_DIR/$BUILD_ID_PREFIX" +mkdir -p "$DESTINATION_STRIP_BINARY_DIR" + +$OBJCOPY_PATH --only-keep-debug "$BINARY_PATH" "$DESTINATION_DEBUG_INFO_DIR/$BUILD_ID_PREFIX/$BUILD_ID_SUFFIX.debug" + +touch "$TEMP_BINARY_PATH" +$OBJCOPY_PATH --add-gnu-debuglink "$DESTINATION_DEBUG_INFO_DIR/$BUILD_ID_PREFIX/$BUILD_ID_SUFFIX.debug" "$BINARY_PATH" "$TEMP_BINARY_PATH" +$OBJCOPY_PATH --strip-all "$TEMP_BINARY_PATH" "$DESTINATION_STRIP_BINARY_DIR/$BINARY_NAME" +rm -f "$TEMP_BINARY_PATH" diff --git a/cmake/strip_binary.cmake b/cmake/strip_binary.cmake new file mode 100644 index 00000000000..e430807772d --- /dev/null +++ b/cmake/strip_binary.cmake @@ -0,0 +1,26 @@ +macro(clickhouse_strip_binary) + set(oneValueArgs TARGET DESTINATION_DIR BINARY_PATH) + + cmake_parse_arguments(STRIP "" "${oneValueArgs}" "" ${ARGN}) + + if (NOT DEFINED STRIP_TARGET) + message(FATAL_ERROR "A target name must be provided for stripping binary") + endif() + + if (NOT DEFINED STRIP_BINARY_PATH) + message(FATAL_ERROR "A binary path name must be provided for stripping binary") + endif() + + + if (NOT DEFINED STRIP_DESTINATION_DIR) + message(FATAL_ERROR "Destination directory for stripped binary must be provided") + endif() + + add_custom_command(TARGET ${STRIP_TARGET} POST_BUILD + COMMAND bash ${ClickHouse_SOURCE_DIR}/cmake/strip.sh ${STRIP_BINARY_PATH} ${STRIP_DESTINATION_DIR} ${OBJCOPY_PATH} ${READELF_PATH} + COMMENT "Stripping clickhouse binary" VERBATIM + ) + + install(PROGRAMS ${STRIP_DESTINATION_DIR}/bin/${STRIP_TARGET} DESTINATION ${CMAKE_INSTALL_BINDIR} COMPONENT clickhouse) + install(DIRECTORY ${STRIP_DESTINATION_DIR}/lib/debug DESTINATION ${CMAKE_INSTALL_LIBDIR} COMPONENT clickhouse) +endmacro() diff --git a/cmake/tools.cmake b/cmake/tools.cmake index 69a37304f58..d6fddd0509e 100644 --- a/cmake/tools.cmake +++ b/cmake/tools.cmake @@ -169,3 +169,33 @@ if (OBJCOPY_PATH) else () message (FATAL_ERROR "Cannot find objcopy.") endif () + +# Readelf (FIXME copypaste) + +if (COMPILER_GCC) + find_program (READELF_PATH NAMES "llvm-readelf" "llvm-readelf-13" "llvm-readelf-12" "llvm-readelf-11" "readelf") +else () + find_program (READELF_PATH NAMES "llvm-readelf-${COMPILER_VERSION_MAJOR}" "llvm-readelf" "readelf") +endif () + +if (NOT READELF_PATH AND OS_DARWIN) + find_program (BREW_PATH NAMES "brew") + if (BREW_PATH) + execute_process (COMMAND ${BREW_PATH} --prefix llvm ERROR_QUIET OUTPUT_STRIP_TRAILING_WHITESPACE OUTPUT_VARIABLE LLVM_PREFIX) + if (LLVM_PREFIX) + find_program (READELF_PATH NAMES "llvm-readelf" PATHS "${LLVM_PREFIX}/bin" NO_DEFAULT_PATH) + endif () + if (NOT READELF_PATH) + execute_process (COMMAND ${BREW_PATH} --prefix binutils ERROR_QUIET OUTPUT_STRIP_TRAILING_WHITESPACE OUTPUT_VARIABLE BINUTILS_PREFIX) + if (BINUTILS_PREFIX) + find_program (READELF_PATH NAMES "readelf" PATHS "${BINUTILS_PREFIX}/bin" NO_DEFAULT_PATH) + endif () + endif () + endif () +endif () + +if (READELF_PATH) + message (STATUS "Using readelf: ${READELF_PATH}") +else () + message (FATAL_ERROR "Cannot find readelf.") +endif () diff --git a/contrib/icu b/contrib/icu index faa2f9f9e1f..a56dde820dc 160000 --- a/contrib/icu +++ b/contrib/icu @@ -1 +1 @@ -Subproject commit faa2f9f9e1fe74c5ed00eba371d2830134cdbea1 +Subproject commit a56dde820dc35665a66f2e9ee8ba58e75049b668 diff --git a/contrib/icu-cmake/CMakeLists.txt b/contrib/icu-cmake/CMakeLists.txt index ae19ef20e38..9c34228e2a0 100644 --- a/contrib/icu-cmake/CMakeLists.txt +++ b/contrib/icu-cmake/CMakeLists.txt @@ -212,7 +212,9 @@ set(ICUUC_SOURCES "${ICU_SOURCE_DIR}/common/ubiditransform.cpp" "${ICU_SOURCE_DIR}/common/pluralmap.cpp" "${ICU_SOURCE_DIR}/common/static_unicode_sets.cpp" -"${ICU_SOURCE_DIR}/common/restrace.cpp") +"${ICU_SOURCE_DIR}/common/restrace.cpp" +"${ICU_SOURCE_DIR}/common/emojiprops.cpp" +"${ICU_SOURCE_DIR}/common/lstmbe.cpp") set(ICUI18N_SOURCES "${ICU_SOURCE_DIR}/i18n/ucln_in.cpp" @@ -398,7 +400,6 @@ set(ICUI18N_SOURCES "${ICU_SOURCE_DIR}/i18n/sharedbreakiterator.cpp" "${ICU_SOURCE_DIR}/i18n/scientificnumberformatter.cpp" "${ICU_SOURCE_DIR}/i18n/dayperiodrules.cpp" -"${ICU_SOURCE_DIR}/i18n/nounit.cpp" "${ICU_SOURCE_DIR}/i18n/number_affixutils.cpp" "${ICU_SOURCE_DIR}/i18n/number_compact.cpp" "${ICU_SOURCE_DIR}/i18n/number_decimalquantity.cpp" @@ -446,12 +447,21 @@ set(ICUI18N_SOURCES "${ICU_SOURCE_DIR}/i18n/formattedvalue.cpp" "${ICU_SOURCE_DIR}/i18n/formattedval_iterimpl.cpp" "${ICU_SOURCE_DIR}/i18n/formattedval_sbimpl.cpp" -"${ICU_SOURCE_DIR}/i18n/formatted_string_builder.cpp") +"${ICU_SOURCE_DIR}/i18n/formatted_string_builder.cpp" +"${ICU_SOURCE_DIR}/i18n/measunit_extra.cpp" +"${ICU_SOURCE_DIR}/i18n/number_symbolswrapper.cpp" +"${ICU_SOURCE_DIR}/i18n/number_usageprefs.cpp" +"${ICU_SOURCE_DIR}/i18n/numrange_capi.cpp" +"${ICU_SOURCE_DIR}/i18n/pluralranges.cpp" +"${ICU_SOURCE_DIR}/i18n/units_complexconverter.cpp" +"${ICU_SOURCE_DIR}/i18n/units_converter.cpp" +"${ICU_SOURCE_DIR}/i18n/units_data.cpp" +"${ICU_SOURCE_DIR}/i18n/units_router.cpp") file(GENERATE OUTPUT "${CMAKE_CURRENT_BINARY_DIR}/empty.cpp" CONTENT " ") enable_language(ASM) set(ICUDATA_SOURCES - "${ICUDATA_SOURCE_DIR}/icudt66l_dat.S" + "${ICUDATA_SOURCE_DIR}/icudt70l_dat.S" "${CMAKE_CURRENT_BINARY_DIR}/empty.cpp" # Without this cmake can incorrectly detects library type (OBJECT) instead of SHARED/STATIC ) diff --git a/contrib/icudata b/contrib/icudata index f020820388e..72d9a4a7feb 160000 --- a/contrib/icudata +++ b/contrib/icudata @@ -1 +1 @@ -Subproject commit f020820388e3faafb44cc643574a2d563dfde572 +Subproject commit 72d9a4a7febc904e2b0a534ccb25ae40fac5f1e5 diff --git a/contrib/jemalloc-cmake/include/jemalloc/jemalloc.h b/contrib/jemalloc-cmake/include/jemalloc/jemalloc.h index d06243c5239..64c4f4956b6 100644 --- a/contrib/jemalloc-cmake/include/jemalloc/jemalloc.h +++ b/contrib/jemalloc-cmake/include/jemalloc/jemalloc.h @@ -4,12 +4,21 @@ extern "C" { #endif +#if !defined(__clang__) +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wredundant-decls" +#endif + #include #include #include #include #include +#if !defined(__clang__) +#pragma GCC diagnostic pop +#endif + #ifdef __cplusplus } #endif diff --git a/contrib/llvm-cmake/CMakeLists.txt b/contrib/llvm-cmake/CMakeLists.txt index e859df65b6f..6ff07f0e016 100644 --- a/contrib/llvm-cmake/CMakeLists.txt +++ b/contrib/llvm-cmake/CMakeLists.txt @@ -1,4 +1,8 @@ -if (APPLE OR NOT ARCH_AMD64 OR SANITIZE STREQUAL "undefined") +# During cross-compilation in our CI we have to use llvm-tblgen and other building tools +# tools to be build for host architecture and everything else for target architecture (e.g. AArch64) +# Possible workaround is to use llvm-tblgen from some package... +# But lets just enable LLVM for native builds +if (CMAKE_CROSSCOMPILING OR SANITIZE STREQUAL "undefined") set (ENABLE_EMBEDDED_COMPILER_DEFAULT OFF) else() set (ENABLE_EMBEDDED_COMPILER_DEFAULT ON) @@ -22,9 +26,6 @@ set (LLVM_LIBRARY_DIRS "${ClickHouse_BINARY_DIR}/contrib/llvm/llvm") set (REQUIRED_LLVM_LIBRARIES LLVMExecutionEngine LLVMRuntimeDyld - LLVMX86CodeGen - LLVMX86Desc - LLVMX86Info LLVMAsmPrinter LLVMDebugInfoDWARF LLVMGlobalISel @@ -56,6 +57,12 @@ set (REQUIRED_LLVM_LIBRARIES LLVMDemangle ) +if (ARCH_AMD64) + list(APPEND REQUIRED_LLVM_LIBRARIES LLVMX86Info LLVMX86Desc LLVMX86CodeGen) +elseif (ARCH_AARCH64) + list(APPEND REQUIRED_LLVM_LIBRARIES LLVMAArch64Info LLVMAArch64Desc LLVMAArch64CodeGen) +endif () + #function(llvm_libs_all REQUIRED_LLVM_LIBRARIES) # llvm_map_components_to_libnames (result all) # if (USE_STATIC_LIBRARIES OR NOT "LLVM" IN_LIST result) diff --git a/docker/test/stateful/Dockerfile b/docker/test/stateful/Dockerfile index 7c16e69a99b..93e7cebb857 100644 --- a/docker/test/stateful/Dockerfile +++ b/docker/test/stateful/Dockerfile @@ -13,6 +13,17 @@ COPY s3downloader /s3downloader ENV S3_URL="https://clickhouse-datasets.s3.yandex.net" ENV DATASETS="hits visits" +ENV EXPORT_S3_STORAGE_POLICIES=1 + +# Download Minio-related binaries +RUN arch=${TARGETARCH:-amd64} \ + && wget "https://dl.min.io/server/minio/release/linux-${arch}/minio" \ + && chmod +x ./minio \ + && wget "https://dl.min.io/client/mc/release/linux-${arch}/mc" \ + && chmod +x ./mc +ENV MINIO_ROOT_USER="clickhouse" +ENV MINIO_ROOT_PASSWORD="clickhouse" +COPY setup_minio.sh / COPY run.sh / CMD ["/bin/bash", "/run.sh"] diff --git a/docker/test/stateful/run.sh b/docker/test/stateful/run.sh index 3f3240a0b7f..77dc61e6cd0 100755 --- a/docker/test/stateful/run.sh +++ b/docker/test/stateful/run.sh @@ -17,6 +17,8 @@ ln -s /usr/share/clickhouse-test/clickhouse-test /usr/bin/clickhouse-test # install test configs /usr/share/clickhouse-test/config/install.sh +./setup_minio.sh + function start() { if [[ -n "$USE_DATABASE_REPLICATED" ]] && [[ "$USE_DATABASE_REPLICATED" -eq 1 ]]; then @@ -93,6 +95,8 @@ else clickhouse-client --query "SHOW TABLES FROM test" clickhouse-client --query "RENAME TABLE datasets.hits_v1 TO test.hits" clickhouse-client --query "RENAME TABLE datasets.visits_v1 TO test.visits" + clickhouse-client --query "CREATE TABLE test.hits_s3 (WatchID UInt64, JavaEnable UInt8, Title String, GoodEvent Int16, EventTime DateTime, EventDate Date, CounterID UInt32, ClientIP UInt32, ClientIP6 FixedString(16), RegionID UInt32, UserID UInt64, CounterClass Int8, OS UInt8, UserAgent UInt8, URL String, Referer String, URLDomain String, RefererDomain String, Refresh UInt8, IsRobot UInt8, RefererCategories Array(UInt16), URLCategories Array(UInt16), URLRegions Array(UInt32), RefererRegions Array(UInt32), ResolutionWidth UInt16, ResolutionHeight UInt16, ResolutionDepth UInt8, FlashMajor UInt8, FlashMinor UInt8, FlashMinor2 String, NetMajor UInt8, NetMinor UInt8, UserAgentMajor UInt16, UserAgentMinor FixedString(2), CookieEnable UInt8, JavascriptEnable UInt8, IsMobile UInt8, MobilePhone UInt8, MobilePhoneModel String, Params String, IPNetworkID UInt32, TraficSourceID Int8, SearchEngineID UInt16, SearchPhrase String, AdvEngineID UInt8, IsArtifical UInt8, WindowClientWidth UInt16, WindowClientHeight UInt16, ClientTimeZone Int16, ClientEventTime DateTime, SilverlightVersion1 UInt8, SilverlightVersion2 UInt8, SilverlightVersion3 UInt32, SilverlightVersion4 UInt16, PageCharset String, CodeVersion UInt32, IsLink UInt8, IsDownload UInt8, IsNotBounce UInt8, FUniqID UInt64, HID UInt32, IsOldCounter UInt8, IsEvent UInt8, IsParameter UInt8, DontCountHits UInt8, WithHash UInt8, HitColor FixedString(1), UTCEventTime DateTime, Age UInt8, Sex UInt8, Income UInt8, Interests UInt16, Robotness UInt8, GeneralInterests Array(UInt16), RemoteIP UInt32, RemoteIP6 FixedString(16), WindowName Int32, OpenerName Int32, HistoryLength Int16, BrowserLanguage FixedString(2), BrowserCountry FixedString(2), SocialNetwork String, SocialAction String, HTTPError UInt16, SendTiming Int32, DNSTiming Int32, ConnectTiming Int32, ResponseStartTiming Int32, ResponseEndTiming Int32, FetchTiming Int32, RedirectTiming Int32, DOMInteractiveTiming Int32, DOMContentLoadedTiming Int32, DOMCompleteTiming Int32, LoadEventStartTiming Int32, LoadEventEndTiming Int32, NSToDOMContentLoadedTiming Int32, FirstPaintTiming Int32, RedirectCount Int8, SocialSourceNetworkID UInt8, SocialSourcePage String, ParamPrice Int64, ParamOrderID String, ParamCurrency FixedString(3), ParamCurrencyID UInt16, GoalsReached Array(UInt32), OpenstatServiceName String, OpenstatCampaignID String, OpenstatAdID String, OpenstatSourceID String, UTMSource String, UTMMedium String, UTMCampaign String, UTMContent String, UTMTerm String, FromTag String, HasGCLID UInt8, RefererHash UInt64, URLHash UInt64, CLID UInt32, YCLID UInt64, ShareService String, ShareURL String, ShareTitle String, ParsedParams Nested(Key1 String, Key2 String, Key3 String, Key4 String, Key5 String, ValueDouble Float64), IslandID FixedString(16), RequestNum UInt32, RequestTry UInt8) ENGINE = MergeTree() PARTITION BY toYYYYMM(EventDate) ORDER BY (CounterID, EventDate, intHash32(UserID)) SAMPLE BY intHash32(UserID) SETTINGS index_granularity = 8192, storage_policy='s3_cache'" + clickhouse-client --query "INSERT INTO test.hits_s3 SELECT * FROM test.hits" fi clickhouse-client --query "SHOW TABLES FROM test" diff --git a/docker/test/stateful/setup_minio.sh b/docker/test/stateful/setup_minio.sh new file mode 100755 index 00000000000..5758d905197 --- /dev/null +++ b/docker/test/stateful/setup_minio.sh @@ -0,0 +1,66 @@ +#!/bin/bash + +# TODO: Make this file shared with stateless tests +# +# Usage for local run: +# +# ./docker/test/stateful/setup_minio.sh ./tests/ +# + +set -e -x -a -u + +ls -lha + +mkdir -p ./minio_data + +if [ ! -f ./minio ]; then + echo 'MinIO binary not found, downloading...' + + BINARY_TYPE=$(uname -s | tr '[:upper:]' '[:lower:]') + + wget "https://dl.min.io/server/minio/release/${BINARY_TYPE}-amd64/minio" \ + && chmod +x ./minio \ + && wget "https://dl.min.io/client/mc/release/${BINARY_TYPE}-amd64/mc" \ + && chmod +x ./mc +fi + +MINIO_ROOT_USER=${MINIO_ROOT_USER:-clickhouse} +MINIO_ROOT_PASSWORD=${MINIO_ROOT_PASSWORD:-clickhouse} + +./minio server --address ":11111" ./minio_data & + +while ! curl -v --silent http://localhost:11111 2>&1 | grep AccessDenied +do + echo "Trying to connect to minio" + sleep 1 +done + +lsof -i :11111 + +sleep 5 + +./mc alias set clickminio http://localhost:11111 clickhouse clickhouse +./mc admin user add clickminio test testtest +./mc admin policy set clickminio readwrite user=test +./mc mb clickminio/test + + +# Upload data to Minio. By default after unpacking all tests will in +# /usr/share/clickhouse-test/queries + +TEST_PATH=${1:-/usr/share/clickhouse-test} +MINIO_DATA_PATH=${TEST_PATH}/queries/1_stateful/data_minio + +# Iterating over globs will cause redudant FILE variale to be a path to a file, not a filename +# shellcheck disable=SC2045 +for FILE in $(ls "${MINIO_DATA_PATH}"); do + echo "$FILE"; + ./mc cp "${MINIO_DATA_PATH}"/"$FILE" clickminio/test/"$FILE"; +done + +mkdir -p ~/.aws +cat <> ~/.aws/credentials +[default] +aws_access_key_id=clickhouse +aws_secret_access_key=clickhouse +EOT diff --git a/docker/test/stateless/Dockerfile b/docker/test/stateless/Dockerfile index bfc6763e8c5..68c08c23b3f 100644 --- a/docker/test/stateless/Dockerfile +++ b/docker/test/stateless/Dockerfile @@ -60,6 +60,7 @@ RUN arch=${TARGETARCH:-amd64} \ ENV MINIO_ROOT_USER="clickhouse" ENV MINIO_ROOT_PASSWORD="clickhouse" +ENV EXPORT_S3_STORAGE_POLICIES=1 COPY run.sh / COPY setup_minio.sh / diff --git a/docker/test/stress/Dockerfile b/docker/test/stress/Dockerfile index 4e0b6741061..1f39202e743 100644 --- a/docker/test/stress/Dockerfile +++ b/docker/test/stress/Dockerfile @@ -29,5 +29,6 @@ COPY run.sh / ENV DATASETS="hits visits" ENV S3_URL="https://clickhouse-datasets.s3.yandex.net" +ENV EXPORT_S3_STORAGE_POLICIES=1 CMD ["/bin/bash", "/run.sh"] diff --git a/docker/test/stress/run.sh b/docker/test/stress/run.sh index de77dec03b9..65c5fb9e40f 100755 --- a/docker/test/stress/run.sh +++ b/docker/test/stress/run.sh @@ -173,6 +173,8 @@ quit configure +./setup_minio.sh + start # shellcheck disable=SC2086 # No quotes because I want to split it into words. @@ -188,6 +190,8 @@ clickhouse-client --query "SHOW TABLES FROM datasets" clickhouse-client --query "SHOW TABLES FROM test" clickhouse-client --query "RENAME TABLE datasets.hits_v1 TO test.hits" clickhouse-client --query "RENAME TABLE datasets.visits_v1 TO test.visits" +clickhouse-client --query "CREATE TABLE test.hits_s3 (WatchID UInt64, JavaEnable UInt8, Title String, GoodEvent Int16, EventTime DateTime, EventDate Date, CounterID UInt32, ClientIP UInt32, ClientIP6 FixedString(16), RegionID UInt32, UserID UInt64, CounterClass Int8, OS UInt8, UserAgent UInt8, URL String, Referer String, URLDomain String, RefererDomain String, Refresh UInt8, IsRobot UInt8, RefererCategories Array(UInt16), URLCategories Array(UInt16), URLRegions Array(UInt32), RefererRegions Array(UInt32), ResolutionWidth UInt16, ResolutionHeight UInt16, ResolutionDepth UInt8, FlashMajor UInt8, FlashMinor UInt8, FlashMinor2 String, NetMajor UInt8, NetMinor UInt8, UserAgentMajor UInt16, UserAgentMinor FixedString(2), CookieEnable UInt8, JavascriptEnable UInt8, IsMobile UInt8, MobilePhone UInt8, MobilePhoneModel String, Params String, IPNetworkID UInt32, TraficSourceID Int8, SearchEngineID UInt16, SearchPhrase String, AdvEngineID UInt8, IsArtifical UInt8, WindowClientWidth UInt16, WindowClientHeight UInt16, ClientTimeZone Int16, ClientEventTime DateTime, SilverlightVersion1 UInt8, SilverlightVersion2 UInt8, SilverlightVersion3 UInt32, SilverlightVersion4 UInt16, PageCharset String, CodeVersion UInt32, IsLink UInt8, IsDownload UInt8, IsNotBounce UInt8, FUniqID UInt64, HID UInt32, IsOldCounter UInt8, IsEvent UInt8, IsParameter UInt8, DontCountHits UInt8, WithHash UInt8, HitColor FixedString(1), UTCEventTime DateTime, Age UInt8, Sex UInt8, Income UInt8, Interests UInt16, Robotness UInt8, GeneralInterests Array(UInt16), RemoteIP UInt32, RemoteIP6 FixedString(16), WindowName Int32, OpenerName Int32, HistoryLength Int16, BrowserLanguage FixedString(2), BrowserCountry FixedString(2), SocialNetwork String, SocialAction String, HTTPError UInt16, SendTiming Int32, DNSTiming Int32, ConnectTiming Int32, ResponseStartTiming Int32, ResponseEndTiming Int32, FetchTiming Int32, RedirectTiming Int32, DOMInteractiveTiming Int32, DOMContentLoadedTiming Int32, DOMCompleteTiming Int32, LoadEventStartTiming Int32, LoadEventEndTiming Int32, NSToDOMContentLoadedTiming Int32, FirstPaintTiming Int32, RedirectCount Int8, SocialSourceNetworkID UInt8, SocialSourcePage String, ParamPrice Int64, ParamOrderID String, ParamCurrency FixedString(3), ParamCurrencyID UInt16, GoalsReached Array(UInt32), OpenstatServiceName String, OpenstatCampaignID String, OpenstatAdID String, OpenstatSourceID String, UTMSource String, UTMMedium String, UTMCampaign String, UTMContent String, UTMTerm String, FromTag String, HasGCLID UInt8, RefererHash UInt64, URLHash UInt64, CLID UInt32, YCLID UInt64, ShareService String, ShareURL String, ShareTitle String, ParsedParams Nested(Key1 String, Key2 String, Key3 String, Key4 String, Key5 String, ValueDouble Float64), IslandID FixedString(16), RequestNum UInt32, RequestTry UInt8) ENGINE = MergeTree() PARTITION BY toYYYYMM(EventDate) ORDER BY (CounterID, EventDate, intHash32(UserID)) SAMPLE BY intHash32(UserID) SETTINGS index_granularity = 8192, storage_policy='s3_cache'" +clickhouse-client --query "INSERT INTO test.hits_s3 SELECT * FROM test.hits" clickhouse-client --query "SHOW TABLES FROM test" ./stress --hung-check --drop-databases --output-folder test_output --skip-func-tests "$SKIP_TESTS_OPTION" \ diff --git a/docs/README.md b/docs/README.md index cd5c1af0cbd..b328a3ee125 100644 --- a/docs/README.md +++ b/docs/README.md @@ -38,7 +38,7 @@ Writing the docs is extremely useful for project's users and developers, and gro The documentation contains information about all the aspects of the ClickHouse lifecycle: developing, testing, installing, operating, and using. The base language of the documentation is English. The English version is the most actual. All other languages are supported as much as they can by contributors from different countries. -At the moment, [documentation](https://clickhouse.com/docs) exists in English, Russian, Chinese, Japanese, and Farsi. We store the documentation besides the ClickHouse source code in the [GitHub repository](https://github.com/ClickHouse/ClickHouse/tree/master/docs). +At the moment, [documentation](https://clickhouse.com/docs) exists in English, Russian, Chinese, Japanese. We store the documentation besides the ClickHouse source code in the [GitHub repository](https://github.com/ClickHouse/ClickHouse/tree/master/docs). Each language lays in the corresponding folder. Files that are not translated from English are the symbolic links to the English ones. diff --git a/docs/en/development/continuous-integration.md b/docs/en/development/continuous-integration.md index b5b558464ba..81887eb8b8e 100644 --- a/docs/en/development/continuous-integration.md +++ b/docs/en/development/continuous-integration.md @@ -190,15 +190,3 @@ Runs randomly generated queries to catch program errors. If it fails, ask a main ## Performance Tests Measure changes in query performance. This is the longest check that takes just below 6 hours to run. The performance test report is described in detail [here](https://github.com/ClickHouse/ClickHouse/tree/master/docker/test/performance-comparison#how-to-read-the-report). - - - -# QA - -> What is a `Task (private network)` item on status pages? - -It's a link to the Yandex's internal job system. Yandex employees can see the check's start time and its more verbose status. - -> Where the tests are run - -Somewhere on Yandex internal infrastructure. diff --git a/docs/en/development/contrib.md b/docs/en/development/contrib.md index 07969f8ef6a..6c12a3d9055 100644 --- a/docs/en/development/contrib.md +++ b/docs/en/development/contrib.md @@ -40,8 +40,8 @@ The list of third-party libraries: | grpc | [Apache](https://github.com/ClickHouse-Extras/grpc/blob/60c986e15cae70aade721d26badabab1f822fdd6/LICENSE) | | h3 | [Apache](https://github.com/ClickHouse-Extras/h3/blob/c7f46cfd71fb60e2fefc90e28abe81657deff735/LICENSE) | | hyperscan | [Boost](https://github.com/ClickHouse-Extras/hyperscan/blob/e9f08df0213fc637aac0a5bbde9beeaeba2fe9fa/LICENSE) | -| icu | [Public Domain](https://github.com/unicode-org/icu/blob/faa2f9f9e1fe74c5ed00eba371d2830134cdbea1/icu4c/LICENSE) | -| icudata | [Public Domain](https://github.com/ClickHouse-Extras/icudata/blob/f020820388e3faafb44cc643574a2d563dfde572/LICENSE) | +| icu | [Public Domain](https://github.com/unicode-org/icu/blob/a56dde820dc35665a66f2e9ee8ba58e75049b668/icu4c/LICENSE) | +| icudata | [Public Domain](https://github.com/ClickHouse-Extras/icudata/blob/72d9a4a7febc904e2b0a534ccb25ae40fac5f1e5/LICENSE) | | jemalloc | [BSD 2-clause](https://github.com/ClickHouse-Extras/jemalloc/blob/e6891d9746143bf2cf617493d880ba5a0b9a3efd/COPYING) | | krb5 | [MIT](https://github.com/ClickHouse-Extras/krb5/blob/5149dea4e2be0f67707383d2682b897c14631374/src/lib/gssapi/LICENSE) | | libc-headers | [LGPL](https://github.com/ClickHouse-Extras/libc-headers/blob/a720b7105a610acbd7427eea475a5b6810c151eb/LICENSE) | diff --git a/docs/en/development/developer-instruction.md b/docs/en/development/developer-instruction.md index f7d7100d181..9d1836b0ff2 100644 --- a/docs/en/development/developer-instruction.md +++ b/docs/en/development/developer-instruction.md @@ -243,7 +243,7 @@ List of tasks: https://github.com/ClickHouse/ClickHouse/issues?q=is%3Aopen+is%3A ## Test Data {#test-data} -Developing ClickHouse often requires loading realistic datasets. It is particularly important for performance testing. We have a specially prepared set of anonymized data from Yandex.Metrica. It requires additionally some 3GB of free disk space. Note that this data is not required to accomplish most of the development tasks. +Developing ClickHouse often requires loading realistic datasets. It is particularly important for performance testing. We have a specially prepared set of anonymized data of web analytics. It requires additionally some 3GB of free disk space. Note that this data is not required to accomplish most of the development tasks. sudo apt install wget xz-utils @@ -270,7 +270,7 @@ Navigate to your fork repository in GitHub’s UI. If you have been developing i A pull request can be created even if the work is not completed yet. In this case please put the word “WIP” (work in progress) at the beginning of the title, it can be changed later. This is useful for cooperative reviewing and discussion of changes as well as for running all of the available tests. It is important that you provide a brief description of your changes, it will later be used for generating release changelogs. -Testing will commence as soon as Yandex employees label your PR with a tag “can be tested”. The results of some first checks (e.g. code style) will come in within several minutes. Build check results will arrive within half an hour. And the main set of tests will report itself within an hour. +Testing will commence as soon as ClickHouse employees label your PR with a tag “can be tested”. The results of some first checks (e.g. code style) will come in within several minutes. Build check results will arrive within half an hour. And the main set of tests will report itself within an hour. The system will prepare ClickHouse binary builds for your pull request individually. To retrieve these builds click the “Details” link next to “ClickHouse build check” entry in the list of checks. There you will find direct links to the built .deb packages of ClickHouse which you can deploy even on your production servers (if you have no fear). diff --git a/docs/en/development/style.md b/docs/en/development/style.md index 1c863d6b914..03121880555 100644 --- a/docs/en/development/style.md +++ b/docs/en/development/style.md @@ -404,9 +404,9 @@ enum class CompressionMethod }; ``` -**15.** All names must be in English. Transliteration of Russian words is not allowed. +**15.** All names must be in English. Transliteration of Hebrew words is not allowed. - not Stroka + not T_PAAMAYIM_NEKUDOTAYIM **16.** Abbreviations are acceptable if they are well known (when you can easily find the meaning of the abbreviation in Wikipedia or in a search engine). diff --git a/docs/en/development/tests.md b/docs/en/development/tests.md index 44a552d2a61..be9fc7907af 100644 --- a/docs/en/development/tests.md +++ b/docs/en/development/tests.md @@ -11,7 +11,7 @@ Functional tests are the most simple and convenient to use. Most of ClickHouse f Each functional test sends one or multiple queries to the running ClickHouse server and compares the result with reference. -Tests are located in `queries` directory. There are two subdirectories: `stateless` and `stateful`. Stateless tests run queries without any preloaded test data - they often create small synthetic datasets on the fly, within the test itself. Stateful tests require preloaded test data from Yandex.Metrica and it is available to general public. +Tests are located in `queries` directory. There are two subdirectories: `stateless` and `stateful`. Stateless tests run queries without any preloaded test data - they often create small synthetic datasets on the fly, within the test itself. Stateful tests require preloaded test data from CLickHouse and it is available to general public. Each test can be one of two types: `.sql` and `.sh`. `.sql` test is the simple SQL script that is piped to `clickhouse-client --multiquery --testmode`. `.sh` test is a script that is run by itself. SQL tests are generally preferable to `.sh` tests. You should use `.sh` tests only when you have to test some feature that cannot be exercised from pure SQL, such as piping some input data into `clickhouse-client` or testing `clickhouse-local`. @@ -133,44 +133,6 @@ If the system clickhouse-server is already running and you do not want to stop i `clickhouse` binary has almost no dependencies and works across wide range of Linux distributions. To quick and dirty test your changes on a server, you can simply `scp` your fresh built `clickhouse` binary to your server and then run it as in examples above. -## Testing Environment {#testing-environment} - -Before publishing release as stable we deploy it on testing environment. Testing environment is a cluster that process 1/39 part of [Yandex.Metrica](https://metrica.yandex.com/) data. We share our testing environment with Yandex.Metrica team. ClickHouse is upgraded without downtime on top of existing data. We look at first that data is processed successfully without lagging from realtime, the replication continue to work and there is no issues visible to Yandex.Metrica team. First check can be done in the following way: - -``` sql -SELECT hostName() AS h, any(version()), any(uptime()), max(UTCEventTime), count() FROM remote('example01-01-{1..3}t', merge, hits) WHERE EventDate >= today() - 2 GROUP BY h ORDER BY h; -``` - -In some cases we also deploy to testing environment of our friend teams in Yandex: Market, Cloud, etc. Also we have some hardware servers that are used for development purposes. - -## Load Testing {#load-testing} - -After deploying to testing environment we run load testing with queries from production cluster. This is done manually. - -Make sure you have enabled `query_log` on your production cluster. - -Collect query log for a day or more: - -``` bash -$ clickhouse-client --query="SELECT DISTINCT query FROM system.query_log WHERE event_date = today() AND query LIKE '%ym:%' AND query NOT LIKE '%system.query_log%' AND type = 2 AND is_initial_query" > queries.tsv -``` - -This is a way complicated example. `type = 2` will filter queries that are executed successfully. `query LIKE '%ym:%'` is to select relevant queries from Yandex.Metrica. `is_initial_query` is to select only queries that are initiated by client, not by ClickHouse itself (as parts of distributed query processing). - -`scp` this log to your testing cluster and run it as following: - -``` bash -$ clickhouse benchmark --concurrency 16 < queries.tsv -``` - -(probably you also want to specify a `--user`) - -Then leave it for a night or weekend and go take a rest. - -You should check that `clickhouse-server` does not crash, memory footprint is bounded and performance not degrading over time. - -Precise query execution timings are not recorded and not compared due to high variability of queries and environment. - ## Build Tests {#build-tests} Build tests allow to check that build is not broken on various alternative configurations and on some foreign systems. These tests are automated as well. @@ -259,13 +221,13 @@ Thread Fuzzer (please don't mix up with Thread Sanitizer) is another kind of fuz ## Security Audit -People from Yandex Security Team did some basic overview of ClickHouse capabilities from the security standpoint. +Our Security Team did some basic overview of ClickHouse capabilities from the security standpoint. ## Static Analyzers {#static-analyzers} We run `clang-tidy` on per-commit basis. `clang-static-analyzer` checks are also enabled. `clang-tidy` is also used for some style checks. -We have evaluated `clang-tidy`, `Coverity`, `cppcheck`, `PVS-Studio`, `tscancode`, `CodeQL`. You will find instructions for usage in `tests/instructions/` directory. Also you can read [the article in russian](https://habr.com/company/yandex/blog/342018/). +We have evaluated `clang-tidy`, `Coverity`, `cppcheck`, `PVS-Studio`, `tscancode`, `CodeQL`. You will find instructions for usage in `tests/instructions/` directory. If you use `CLion` as an IDE, you can leverage some `clang-tidy` checks out of the box. @@ -310,12 +272,6 @@ Alternatively you can try `uncrustify` tool to reformat your code. Configuration We also use `codespell` to find typos in code. It is automated as well. -## Metrica B2B Tests {#metrica-b2b-tests} - -Each ClickHouse release is tested with Yandex Metrica and AppMetrica engines. Testing and stable versions of ClickHouse are deployed on VMs and run with a small copy of Metrica engine that is processing fixed sample of input data. Then results of two instances of Metrica engine are compared together. - -These tests are automated by separate team. Due to high number of moving parts, tests are fail most of the time by completely unrelated reasons, that are very difficult to figure out. Most likely these tests have negative value for us. Nevertheless these tests was proved to be useful in about one or two times out of hundreds. - ## Test Coverage {#test-coverage} We also track test coverage but only for functional tests and only for clickhouse-server. It is performed on daily basis. diff --git a/docs/en/engines/database-engines/materialized-mysql.md b/docs/en/engines/database-engines/materialized-mysql.md index 3dc14c87be7..d7dcf21cb02 100644 --- a/docs/en/engines/database-engines/materialized-mysql.md +++ b/docs/en/engines/database-engines/materialized-mysql.md @@ -76,7 +76,7 @@ When working with the `MaterializedMySQL` database engine, [ReplacingMergeTree]( | FLOAT | [Float32](../../sql-reference/data-types/float.md) | | DOUBLE | [Float64](../../sql-reference/data-types/float.md) | | DECIMAL, NEWDECIMAL | [Decimal](../../sql-reference/data-types/decimal.md) | -| DATE, NEWDATE | [Date](../../sql-reference/data-types/date.md) | +| DATE, NEWDATE | [Date32](../../sql-reference/data-types/date32.md) | | DATETIME, TIMESTAMP | [DateTime](../../sql-reference/data-types/datetime.md) | | DATETIME2, TIMESTAMP2 | [DateTime64](../../sql-reference/data-types/datetime64.md) | | YEAR | [UInt16](../../sql-reference/data-types/int-uint.md) | diff --git a/docs/en/engines/database-engines/mysql.md b/docs/en/engines/database-engines/mysql.md index c5a1bba44b2..df4965b1f8c 100644 --- a/docs/en/engines/database-engines/mysql.md +++ b/docs/en/engines/database-engines/mysql.md @@ -49,6 +49,8 @@ ENGINE = MySQL('host:port', ['database' | database], 'user', 'password') All other MySQL data types are converted into [String](../../sql-reference/data-types/string.md). +Because of the ClickHouse date type has a different range from the MySQL date range,If the MySQL date type is out of the range of ClickHouse date, you can use the setting mysql_datatypes_support_level to modify the mapping from the MySQL date type to the Clickhouse date type: date2Date32 (convert MySQL's date type to ClickHouse Date32) or date2String(convert MySQL's date type to ClickHouse String,this is usually used when your mysql data is less than 1925) or default(convert MySQL's date type to ClickHouse Date). + [Nullable](../../sql-reference/data-types/nullable.md) is supported. ## Global Variables Support {#global-variables-support} diff --git a/docs/en/engines/table-engines/integrations/s3.md b/docs/en/engines/table-engines/integrations/s3.md index 691666cffef..c7301a55bf0 100644 --- a/docs/en/engines/table-engines/integrations/s3.md +++ b/docs/en/engines/table-engines/integrations/s3.md @@ -26,7 +26,7 @@ CREATE TABLE s3_engine_table (name String, value UInt32) ``` sql CREATE TABLE s3_engine_table (name String, value UInt32) - ENGINE=S3('https://storage.yandexcloud.net/my-test-bucket-768/test-data.csv.gz', 'CSV', 'gzip') + ENGINE=S3('https://clickhouse-public-datasets.s3.amazonaws.com/my-test-bucket-768/test-data.csv.gz', 'CSV', 'gzip') SETTINGS input_format_with_names_use_header = 0; INSERT INTO s3_engine_table VALUES ('one', 1), ('two', 2), ('three', 3); @@ -75,19 +75,19 @@ Create table with files named `file-000.csv`, `file-001.csv`, … , `file-999.cs ``` sql CREATE TABLE big_table (name String, value UInt32) - ENGINE = S3('https://storage.yandexcloud.net/my-bucket/my_folder/file-{000..999}.csv', 'CSV'); + ENGINE = S3('https://clickhouse-public-datasets.s3.amazonaws.com/my-bucket/my_folder/file-{000..999}.csv', 'CSV'); ``` **Example with wildcards 2** Suppose we have several files in CSV format with the following URIs on S3: -- 'https://storage.yandexcloud.net/my-bucket/some_folder/some_file_1.csv' -- 'https://storage.yandexcloud.net/my-bucket/some_folder/some_file_2.csv' -- 'https://storage.yandexcloud.net/my-bucket/some_folder/some_file_3.csv' -- 'https://storage.yandexcloud.net/my-bucket/another_folder/some_file_1.csv' -- 'https://storage.yandexcloud.net/my-bucket/another_folder/some_file_2.csv' -- 'https://storage.yandexcloud.net/my-bucket/another_folder/some_file_3.csv' +- 'https://clickhouse-public-datasets.s3.amazonaws.com/my-bucket/some_folder/some_file_1.csv' +- 'https://clickhouse-public-datasets.s3.amazonaws.com/my-bucket/some_folder/some_file_2.csv' +- 'https://clickhouse-public-datasets.s3.amazonaws.com/my-bucket/some_folder/some_file_3.csv' +- 'https://clickhouse-public-datasets.s3.amazonaws.com/my-bucket/another_folder/some_file_1.csv' +- 'https://clickhouse-public-datasets.s3.amazonaws.com/my-bucket/another_folder/some_file_2.csv' +- 'https://clickhouse-public-datasets.s3.amazonaws.com/my-bucket/another_folder/some_file_3.csv' There are several ways to make a table consisting of all six files: @@ -96,21 +96,21 @@ There are several ways to make a table consisting of all six files: ``` sql CREATE TABLE table_with_range (name String, value UInt32) - ENGINE = S3('https://storage.yandexcloud.net/my-bucket/{some,another}_folder/some_file_{1..3}', 'CSV'); + ENGINE = S3('https://clickhouse-public-datasets.s3.amazonaws.com/my-bucket/{some,another}_folder/some_file_{1..3}', 'CSV'); ``` 2. Take all files with `some_file_` prefix (there should be no extra files with such prefix in both folders): ``` sql CREATE TABLE table_with_question_mark (name String, value UInt32) - ENGINE = S3('https://storage.yandexcloud.net/my-bucket/{some,another}_folder/some_file_?', 'CSV'); + ENGINE = S3('https://clickhouse-public-datasets.s3.amazonaws.com/my-bucket/{some,another}_folder/some_file_?', 'CSV'); ``` 3. Take all the files in both folders (all files should satisfy format and schema described in query): ``` sql CREATE TABLE table_with_asterisk (name String, value UInt32) - ENGINE = S3('https://storage.yandexcloud.net/my-bucket/{some,another}_folder/*', 'CSV'); + ENGINE = S3('https://clickhouse-public-datasets.s3.amazonaws.com/my-bucket/{some,another}_folder/*', 'CSV'); ``` ## S3-related Settings {#settings} @@ -142,7 +142,7 @@ The following settings can be specified in configuration file for given endpoint ``` xml - https://storage.yandexcloud.net/my-test-bucket-768/ + https://clickhouse-public-datasets.s3.amazonaws.com/my-test-bucket-768/ diff --git a/docs/en/engines/table-engines/mergetree-family/custom-partitioning-key.md b/docs/en/engines/table-engines/mergetree-family/custom-partitioning-key.md index 5ac2105e9fd..b58e90a3d92 100644 --- a/docs/en/engines/table-engines/mergetree-family/custom-partitioning-key.md +++ b/docs/en/engines/table-engines/mergetree-family/custom-partitioning-key.md @@ -55,27 +55,28 @@ WHERE table = 'visits' ``` ``` text -┌─partition─┬─name───────────┬─active─┐ -│ 201901 │ 201901_1_3_1 │ 0 │ -│ 201901 │ 201901_1_9_2 │ 1 │ -│ 201901 │ 201901_8_8_0 │ 0 │ -│ 201901 │ 201901_9_9_0 │ 0 │ -│ 201902 │ 201902_4_6_1 │ 1 │ -│ 201902 │ 201902_10_10_0 │ 1 │ -│ 201902 │ 201902_11_11_0 │ 1 │ -└───────────┴────────────────┴────────┘ +┌─partition─┬─name──────────────┬─active─┐ +│ 201901 │ 201901_1_3_1 │ 0 │ +│ 201901 │ 201901_1_9_2_11 │ 1 │ +│ 201901 │ 201901_8_8_0 │ 0 │ +│ 201901 │ 201901_9_9_0 │ 0 │ +│ 201902 │ 201902_4_6_1_11 │ 1 │ +│ 201902 │ 201902_10_10_0_11 │ 1 │ +│ 201902 │ 201902_11_11_0_11 │ 1 │ +└───────────┴───────────────────┴────────┘ ``` The `partition` column contains the names of the partitions. There are two partitions in this example: `201901` and `201902`. You can use this column value to specify the partition name in [ALTER … PARTITION](../../../sql-reference/statements/alter/partition.md) queries. The `name` column contains the names of the partition data parts. You can use this column to specify the name of the part in the [ALTER ATTACH PART](../../../sql-reference/statements/alter/partition.md#alter_attach-partition) query. -Let’s break down the name of the first part: `201901_1_3_1`: +Let’s break down the name of the part: `201901_1_9_2_11`: - `201901` is the partition name. - `1` is the minimum number of the data block. -- `3` is the maximum number of the data block. -- `1` is the chunk level (the depth of the merge tree it is formed from). +- `9` is the maximum number of the data block. +- `2` is the chunk level (the depth of the merge tree it is formed from). +- `11` is the mutation version (if a part mutated) !!! info "Info" The parts of old-type tables have the name: `20190117_20190123_2_2_0` (minimum date - maximum date - minimum block number - maximum block number - level). @@ -89,16 +90,16 @@ OPTIMIZE TABLE visits PARTITION 201902; ``` ``` text -┌─partition─┬─name───────────┬─active─┐ -│ 201901 │ 201901_1_3_1 │ 0 │ -│ 201901 │ 201901_1_9_2 │ 1 │ -│ 201901 │ 201901_8_8_0 │ 0 │ -│ 201901 │ 201901_9_9_0 │ 0 │ -│ 201902 │ 201902_4_6_1 │ 0 │ -│ 201902 │ 201902_4_11_2 │ 1 │ -│ 201902 │ 201902_10_10_0 │ 0 │ -│ 201902 │ 201902_11_11_0 │ 0 │ -└───────────┴────────────────┴────────┘ +┌─partition─┬─name─────────────┬─active─┐ +│ 201901 │ 201901_1_3_1 │ 0 │ +│ 201901 │ 201901_1_9_2_11 │ 1 │ +│ 201901 │ 201901_8_8_0 │ 0 │ +│ 201901 │ 201901_9_9_0 │ 0 │ +│ 201902 │ 201902_4_6_1 │ 0 │ +│ 201902 │ 201902_4_11_2_11 │ 1 │ +│ 201902 │ 201902_10_10_0 │ 0 │ +│ 201902 │ 201902_11_11_0 │ 0 │ +└───────────┴──────────────────┴────────┘ ``` Inactive parts will be deleted approximately 10 minutes after merging. @@ -109,12 +110,12 @@ Another way to view a set of parts and partitions is to go into the directory of /var/lib/clickhouse/data/default/visits$ ls -l total 40 drwxr-xr-x 2 clickhouse clickhouse 4096 Feb 1 16:48 201901_1_3_1 -drwxr-xr-x 2 clickhouse clickhouse 4096 Feb 5 16:17 201901_1_9_2 +drwxr-xr-x 2 clickhouse clickhouse 4096 Feb 5 16:17 201901_1_9_2_11 drwxr-xr-x 2 clickhouse clickhouse 4096 Feb 5 15:52 201901_8_8_0 drwxr-xr-x 2 clickhouse clickhouse 4096 Feb 5 15:52 201901_9_9_0 drwxr-xr-x 2 clickhouse clickhouse 4096 Feb 5 16:17 201902_10_10_0 drwxr-xr-x 2 clickhouse clickhouse 4096 Feb 5 16:17 201902_11_11_0 -drwxr-xr-x 2 clickhouse clickhouse 4096 Feb 5 16:19 201902_4_11_2 +drwxr-xr-x 2 clickhouse clickhouse 4096 Feb 5 16:19 201902_4_11_2_11 drwxr-xr-x 2 clickhouse clickhouse 4096 Feb 5 12:09 201902_4_6_1 drwxr-xr-x 2 clickhouse clickhouse 4096 Feb 1 16:48 detached ``` diff --git a/docs/en/engines/table-engines/mergetree-family/mergetree.md b/docs/en/engines/table-engines/mergetree-family/mergetree.md index 10373761869..a0acda5d5c6 100644 --- a/docs/en/engines/table-engines/mergetree-family/mergetree.md +++ b/docs/en/engines/table-engines/mergetree-family/mergetree.md @@ -802,7 +802,7 @@ Configuration markup: s3 - https://storage.yandexcloud.net/my-bucket/root-path/ + https://clickhouse-public-datasets.s3.amazonaws.com/my-bucket/root-path/ your_access_key_id your_secret_access_key @@ -856,7 +856,7 @@ S3 disk can be configured as `main` or `cold` storage: s3 - https://storage.yandexcloud.net/my-bucket/root-path/ + https://clickhouse-public-datasets.s3.amazonaws.com/my-bucket/root-path/ your_access_key_id your_secret_access_key diff --git a/docs/en/engines/table-engines/mergetree-family/replication.md b/docs/en/engines/table-engines/mergetree-family/replication.md index 7cd58d35362..d574bd9449e 100644 --- a/docs/en/engines/table-engines/mergetree-family/replication.md +++ b/docs/en/engines/table-engines/mergetree-family/replication.md @@ -97,7 +97,7 @@ ZooKeeper is not used in `SELECT` queries because replication does not affect th For each `INSERT` query, approximately ten entries are added to ZooKeeper through several transactions. (To be more precise, this is for each inserted block of data; an INSERT query contains one block or one block per `max_insert_block_size = 1048576` rows.) This leads to slightly longer latencies for `INSERT` compared to non-replicated tables. But if you follow the recommendations to insert data in batches of no more than one `INSERT` per second, it does not create any problems. The entire ClickHouse cluster used for coordinating one ZooKeeper cluster has a total of several hundred `INSERTs` per second. The throughput on data inserts (the number of rows per second) is just as high as for non-replicated data. -For very large clusters, you can use different ZooKeeper clusters for different shards. However, this hasn’t proven necessary on the Yandex.Metrica cluster (approximately 300 servers). +For very large clusters, you can use different ZooKeeper clusters for different shards. However, from our experience this has not proven necessary based on production clusters with approximately 300 servers. Replication is asynchronous and multi-master. `INSERT` queries (as well as `ALTER`) can be sent to any available server. Data is inserted on the server where the query is run, and then it is copied to the other servers. Because it is asynchronous, recently inserted data appears on the other replicas with some latency. If part of the replicas are not available, the data is written when they become available. If a replica is available, the latency is the amount of time it takes to transfer the block of compressed data over the network. The number of threads performing background tasks for replicated tables can be set by [background_schedule_pool_size](../../../operations/settings/settings.md#background_schedule_pool_size) setting. @@ -111,7 +111,7 @@ Data blocks are deduplicated. For multiple writes of the same data block (data b During replication, only the source data to insert is transferred over the network. Further data transformation (merging) is coordinated and performed on all the replicas in the same way. This minimizes network usage, which means that replication works well when replicas reside in different datacenters. (Note that duplicating data in different datacenters is the main goal of replication.) -You can have any number of replicas of the same data. Yandex.Metrica uses double replication in production. Each server uses RAID-5 or RAID-6, and RAID-10 in some cases. This is a relatively reliable and convenient solution. +You can have any number of replicas of the same data. Based on our experiences, a relatively reliable and convenient solution could use double replication in production, with each server using RAID-5 or RAID-6 (and RAID-10 in some cases). The system monitors data synchronicity on replicas and is able to recover after a failure. Failover is automatic (for small differences in data) or semi-automatic (when data differs too much, which may indicate a configuration error). @@ -163,7 +163,7 @@ Example: 05 02 - example05-02-1.yandex.ru + example05-02-1 ``` @@ -172,7 +172,7 @@ In this case, the path consists of the following parts: `/clickhouse/tables/` is the common prefix. We recommend using exactly this one. -`{layer}-{shard}` is the shard identifier. In this example it consists of two parts, since the Yandex.Metrica cluster uses bi-level sharding. For most tasks, you can leave just the {shard} substitution, which will be expanded to the shard identifier. +`{layer}-{shard}` is the shard identifier. In this example it consists of two parts, since the example cluster uses bi-level sharding. For most tasks, you can leave just the {shard} substitution, which will be expanded to the shard identifier. `table_name` is the name of the node for the table in ZooKeeper. It is a good idea to make it the same as the table name. It is defined explicitly, because in contrast to the table name, it does not change after a RENAME query. *HINT*: you could add a database name in front of `table_name` as well. E.g. `db_name.table_name` diff --git a/docs/en/engines/table-engines/special/distributed.md b/docs/en/engines/table-engines/special/distributed.md index 4d2454298f2..5072465687e 100644 --- a/docs/en/engines/table-engines/special/distributed.md +++ b/docs/en/engines/table-engines/special/distributed.md @@ -197,7 +197,7 @@ A simple remainder from the division is a limited solution for sharding and isn You should be concerned about the sharding scheme in the following cases: - Queries are used that require joining data (`IN` or `JOIN`) by a specific key. If data is sharded by this key, you can use local `IN` or `JOIN` instead of `GLOBAL IN` or `GLOBAL JOIN`, which is much more efficient. -- A large number of servers is used (hundreds or more) with a large number of small queries, for example, queries for data of individual clients (e.g. websites, advertisers, or partners). In order for the small queries to not affect the entire cluster, it makes sense to locate data for a single client on a single shard. Alternatively, as we’ve done in Yandex.Metrica, you can set up bi-level sharding: divide the entire cluster into “layers”, where a layer may consist of multiple shards. Data for a single client is located on a single layer, but shards can be added to a layer as necessary, and data is randomly distributed within them. `Distributed` tables are created for each layer, and a single shared distributed table is created for global queries. +- A large number of servers is used (hundreds or more) with a large number of small queries, for example, queries for data of individual clients (e.g. websites, advertisers, or partners). In order for the small queries to not affect the entire cluster, it makes sense to locate data for a single client on a single shard. Alternatively, you can set up bi-level sharding: divide the entire cluster into “layers”, where a layer may consist of multiple shards. Data for a single client is located on a single layer, but shards can be added to a layer as necessary, and data is randomly distributed within them. `Distributed` tables are created for each layer, and a single shared distributed table is created for global queries. Data is written asynchronously. When inserted in the table, the data block is just written to the local file system. The data is sent to the remote servers in the background as soon as possible. The periodicity for sending data is managed by the [distributed_directory_monitor_sleep_time_ms](../../../operations/settings/settings.md#distributed_directory_monitor_sleep_time_ms) and [distributed_directory_monitor_max_sleep_time_ms](../../../operations/settings/settings.md#distributed_directory_monitor_max_sleep_time_ms) settings. The `Distributed` engine sends each file with inserted data separately, but you can enable batch sending of files with the [distributed_directory_monitor_batch_inserts](../../../operations/settings/settings.md#distributed_directory_monitor_batch_inserts) setting. This setting improves cluster performance by better utilizing local server and network resources. You should check whether data is sent successfully by checking the list of files (data waiting to be sent) in the table directory: `/var/lib/clickhouse/data/database/table/`. The number of threads performing background tasks can be set by [background_distributed_schedule_pool_size](../../../operations/settings/settings.md#background_distributed_schedule_pool_size) setting. diff --git a/docs/en/faq/general/mapreduce.md b/docs/en/faq/general/mapreduce.md index 7d25d308d14..30cae65cba2 100644 --- a/docs/en/faq/general/mapreduce.md +++ b/docs/en/faq/general/mapreduce.md @@ -6,7 +6,7 @@ toc_priority: 110 # Why Not Use Something Like MapReduce? {#why-not-use-something-like-mapreduce} -We can refer to systems like MapReduce as distributed computing systems in which the reduce operation is based on distributed sorting. The most common open-source solution in this class is [Apache Hadoop](http://hadoop.apache.org). Yandex uses its in-house solution, YT. +We can refer to systems like MapReduce as distributed computing systems in which the reduce operation is based on distributed sorting. The most common open-source solution in this class is [Apache Hadoop](http://hadoop.apache.org). Large IT companies often have proprietary in-house solutions. These systems aren’t appropriate for online queries due to their high latency. In other words, they can’t be used as the back-end for a web interface. These types of systems aren’t useful for real-time data updates. Distributed sorting isn’t the best way to perform reduce operations if the result of the operation and all the intermediate results (if there are any) are located in the RAM of a single server, which is usually the case for online queries. In such a case, a hash table is an optimal way to perform reduce operations. A common approach to optimizing map-reduce tasks is pre-aggregation (partial reduce) using a hash table in RAM. The user performs this optimization manually. Distributed sorting is one of the main causes of reduced performance when running simple map-reduce tasks. diff --git a/docs/en/faq/general/ne-tormozit.md b/docs/en/faq/general/ne-tormozit.md index 26ae741216d..e8dc7388eff 100644 --- a/docs/en/faq/general/ne-tormozit.md +++ b/docs/en/faq/general/ne-tormozit.md @@ -9,7 +9,7 @@ toc_priority: 11 This question usually arises when people see official ClickHouse t-shirts. They have large words **“ClickHouse не тормозит”** on the front. -Before ClickHouse became open-source, it has been developed as an in-house storage system by the largest Russian IT company, [Yandex](https://yandex.com/company/). That’s why it initially got its slogan in Russian, which is “не тормозит” (pronounced as “ne tormozit”). After the open-source release we first produced some of those t-shirts for events in Russia and it was a no-brainer to use the slogan as-is. +Before ClickHouse became open-source, it has been developed as an in-house storage system by the largest Russian IT company, Yandex. That’s why it initially got its slogan in Russian, which is “не тормозит” (pronounced as “ne tormozit”). After the open-source release we first produced some of those t-shirts for events in Russia and it was a no-brainer to use the slogan as-is. One of the following batches of those t-shirts was supposed to be given away on events outside of Russia and we tried to make the English version of the slogan. Unfortunately, the Russian language is kind of elegant in terms of expressing stuff and there was a restriction of limited space on a t-shirt, so we failed to come up with good enough translation (most options appeared to be either long or inaccurate) and decided to keep the slogan in Russian even on t-shirts produced for international events. It appeared to be a great decision because people all over the world get positively surprised and curious when they see it. diff --git a/docs/en/getting-started/example-datasets/index.md b/docs/en/getting-started/example-datasets/index.md index 6dae6c20073..d4c9bab2441 100644 --- a/docs/en/getting-started/example-datasets/index.md +++ b/docs/en/getting-started/example-datasets/index.md @@ -11,7 +11,7 @@ This section describes how to obtain example datasets and import them into Click The list of documented datasets: - [GitHub Events](../../getting-started/example-datasets/github-events.md) -- [Anonymized Yandex.Metrica Dataset](../../getting-started/example-datasets/metrica.md) +- [Anonymized Web Analytics Dataset](../../getting-started/example-datasets/metrica.md) - [Recipes](../../getting-started/example-datasets/recipes.md) - [Star Schema Benchmark](../../getting-started/example-datasets/star-schema.md) - [WikiStat](../../getting-started/example-datasets/wikistat.md) diff --git a/docs/en/getting-started/example-datasets/metrica.md b/docs/en/getting-started/example-datasets/metrica.md index 483220d12ee..d9d8beb0181 100644 --- a/docs/en/getting-started/example-datasets/metrica.md +++ b/docs/en/getting-started/example-datasets/metrica.md @@ -1,11 +1,11 @@ --- toc_priority: 15 -toc_title: Yandex.Metrica Data +toc_title: Web Analytics Data --- -# Anonymized Yandex.Metrica Data {#anonymized-yandex-metrica-data} +# Anonymized Web Analytics Data {#anonymized-web-analytics-data} -Dataset consists of two tables containing anonymized data about hits (`hits_v1`) and visits (`visits_v1`) of Yandex.Metrica. You can read more about Yandex.Metrica in [ClickHouse history](../../introduction/history.md) section. +Dataset consists of two tables containing anonymized web analytics data with hits (`hits_v1`) and visits (`visits_v1`). The dataset consists of two tables, either of them can be downloaded as a compressed `tsv.xz` file or as prepared partitions. In addition to that, an extended version of the `hits` table containing 100 million rows is available as TSV at https://datasets.clickhouse.com/hits/tsv/hits_100m_obfuscated_v1.tsv.xz and as prepared partitions at https://datasets.clickhouse.com/hits/partitions/hits_100m_obfuscated_v1.tar.xz. @@ -73,6 +73,6 @@ clickhouse-client --query "SELECT COUNT(*) FROM datasets.visits_v1" ## Example Queries {#example-queries} -[ClickHouse tutorial](../../getting-started/tutorial.md) is based on Yandex.Metrica dataset and the recommended way to get started with this dataset is to just go through tutorial. +[The ClickHouse tutorial](../../getting-started/tutorial.md) is based on this web analytics dataset, and the recommended way to get started with this dataset is to go through the tutorial. Additional examples of queries to these tables can be found among [stateful tests](https://github.com/ClickHouse/ClickHouse/tree/master/tests/queries/1_stateful) of ClickHouse (they are named `test.hits` and `test.visits` there). diff --git a/docs/en/getting-started/example-datasets/nyc-taxi.md b/docs/en/getting-started/example-datasets/nyc-taxi.md index 64810d3fa37..a7825988695 100644 --- a/docs/en/getting-started/example-datasets/nyc-taxi.md +++ b/docs/en/getting-started/example-datasets/nyc-taxi.md @@ -375,7 +375,7 @@ Q3: 0.051 sec. Q4: 0.072 sec. In this case, the query processing time is determined above all by network latency. -We ran queries using a client located in a Yandex datacenter in Finland on a cluster in Russia, which added about 20 ms of latency. +We ran queries using a client located in a different datacenter than where the cluster was located, which added about 20 ms of latency. ## Summary {#summary} diff --git a/docs/en/getting-started/playground.md b/docs/en/getting-started/playground.md index 90e3eedb764..6c44f250242 100644 --- a/docs/en/getting-started/playground.md +++ b/docs/en/getting-started/playground.md @@ -5,11 +5,12 @@ toc_title: Playground # ClickHouse Playground {#clickhouse-playground} +!!! warning "Warning" + This service is deprecated and will be replaced in foreseeable future. + [ClickHouse Playground](https://play.clickhouse.com) allows people to experiment with ClickHouse by running queries instantly, without setting up their server or cluster. Several example datasets are available in Playground as well as sample queries that show ClickHouse features. There’s also a selection of ClickHouse LTS releases to experiment with. -ClickHouse Playground gives the experience of m2.small [Managed Service for ClickHouse](https://cloud.yandex.com/services/managed-clickhouse) instance (4 vCPU, 32 GB RAM) hosted in [Yandex.Cloud](https://cloud.yandex.com/). More information about [cloud providers](../commercial/cloud.md). - You can make queries to Playground using any HTTP client, for example [curl](https://curl.haxx.se) or [wget](https://www.gnu.org/software/wget/), or set up a connection using [JDBC](../interfaces/jdbc.md) or [ODBC](../interfaces/odbc.md) drivers. More information about software products that support ClickHouse is available [here](../interfaces/index.md). ## Credentials {#credentials} @@ -56,11 +57,3 @@ TCP endpoint example with [CLI](../interfaces/cli.md): ``` bash clickhouse client --secure -h play-api.clickhouse.com --port 9440 -u playground --password clickhouse -q "SELECT 'Play ClickHouse\!'" ``` - -## Implementation Details {#implementation-details} - -ClickHouse Playground web interface makes requests via ClickHouse [HTTP API](../interfaces/http.md). -The Playground backend is just a ClickHouse cluster without any additional server-side application. As mentioned above, ClickHouse HTTPS and TCP/TLS endpoints are also publicly available as a part of the Playground, both are proxied through [Cloudflare Spectrum](https://www.cloudflare.com/products/cloudflare-spectrum/) to add an extra layer of protection and improved global connectivity. - -!!! warning "Warning" - Exposing the ClickHouse server to the public internet in any other situation is **strongly not recommended**. Make sure it listens only on a private network and is covered by a properly configured firewall. diff --git a/docs/en/getting-started/tutorial.md b/docs/en/getting-started/tutorial.md index e08b319f2a4..9f43cc8769d 100644 --- a/docs/en/getting-started/tutorial.md +++ b/docs/en/getting-started/tutorial.md @@ -80,7 +80,7 @@ clickhouse-client --query='INSERT INTO table FORMAT TabSeparated' < data.tsv ## Import Sample Dataset {#import-sample-dataset} -Now it’s time to fill our ClickHouse server with some sample data. In this tutorial, we’ll use the anonymized data of Yandex.Metrica, the first service that runs ClickHouse in production way before it became open-source (more on that in [history section](../introduction/history.md)). There are [multiple ways to import Yandex.Metrica dataset](../getting-started/example-datasets/metrica.md), and for the sake of the tutorial, we’ll go with the most realistic one. +Now it’s time to fill our ClickHouse server with some sample data. In this tutorial, we’ll use some anonymized web analytics data. There are [multiple ways to import the dataset](../getting-started/example-datasets/metrica.md), and for the sake of the tutorial, we’ll go with the most realistic one. ### Download and Extract Table Data {#download-and-extract-table-data} @@ -105,7 +105,7 @@ Syntax for creating tables is way more complicated compared to databases (see [r 2. Table schema, i.e. list of columns and their [data types](../sql-reference/data-types/index.md). 3. [Table engine](../engines/table-engines/index.md) and its settings, which determines all the details on how queries to this table will be physically executed. -Yandex.Metrica is a web analytics service, and sample dataset does not cover its full functionality, so there are only two tables to create: +There are two tables to create: - `hits` is a table with each action done by all users on all websites covered by the service. - `visits` is a table that contains pre-built sessions instead of individual actions. @@ -533,19 +533,19 @@ Example config for a cluster with three shards, one replica each: - example-perftest01j.yandex.ru + example-perftest01j 9000 - example-perftest02j.yandex.ru + example-perftest02j 9000 - example-perftest03j.yandex.ru + example-perftest03j 9000 @@ -591,15 +591,15 @@ Example config for a cluster of one shard containing three replicas: - example-perftest01j.yandex.ru + example-perftest01j 9000 - example-perftest02j.yandex.ru + example-perftest02j 9000 - example-perftest03j.yandex.ru + example-perftest03j 9000 @@ -617,15 +617,15 @@ ZooKeeper locations are specified in the configuration file: ``` xml - zoo01.yandex.ru + zoo01 2181 - zoo02.yandex.ru + zoo02 2181 - zoo03.yandex.ru + zoo03 2181 diff --git a/docs/en/guides/apply-catboost-model.md b/docs/en/guides/apply-catboost-model.md index 9fd48fcc62d..859703a31df 100644 --- a/docs/en/guides/apply-catboost-model.md +++ b/docs/en/guides/apply-catboost-model.md @@ -5,7 +5,7 @@ toc_title: Applying CatBoost Models # Applying a Catboost Model in ClickHouse {#applying-catboost-model-in-clickhouse} -[CatBoost](https://catboost.ai) is a free and open-source gradient boosting library developed at [Yandex](https://yandex.com/company/) for machine learning. +[CatBoost](https://catboost.ai) is a free and open-source gradient boosting library developed at Yandex for machine learning. With this instruction, you will learn to apply pre-trained models in ClickHouse by running model inference from SQL. diff --git a/docs/en/interfaces/formats.md b/docs/en/interfaces/formats.md index f266d0e6354..058c9b6fd4a 100644 --- a/docs/en/interfaces/formats.md +++ b/docs/en/interfaces/formats.md @@ -300,7 +300,7 @@ Result: Search phrase Count 8267016 bathroom interior design 2166 - yandex 1655 + clickhouse 1655 spring 2014 fashion 1549 freeform photos 1480 @@ -371,7 +371,7 @@ Similar to TabSeparated, but outputs a value in name=value format. Names are esc ``` text SearchPhrase= count()=8267016 SearchPhrase=bathroom interior design count()=2166 -SearchPhrase=yandex count()=1655 +SearchPhrase=clickhouse count()=1655 SearchPhrase=2014 spring fashion count()=1549 SearchPhrase=freeform photos count()=1480 SearchPhrase=angelina jolie count()=1245 @@ -1060,7 +1060,7 @@ XML format is suitable only for output, not for parsing. Example: 2166 - yandex + clickhouse 1655 diff --git a/docs/en/interfaces/index.md b/docs/en/interfaces/index.md index 7b73cec22a0..e747b93a1a6 100644 --- a/docs/en/interfaces/index.md +++ b/docs/en/interfaces/index.md @@ -12,7 +12,7 @@ ClickHouse provides three network interfaces (they can be optionally wrapped in - [Native TCP](../interfaces/tcp.md), which has less overhead. - [gRPC](grpc.md). -In most cases it is recommended to use appropriate tool or library instead of interacting with those directly. Officially supported by Yandex are the following: +In most cases it is recommended to use an appropriate tool or library instead of interacting with those directly. The following are officially supported by ClickHouse: - [Command-line client](../interfaces/cli.md) - [JDBC driver](../interfaces/jdbc.md) diff --git a/docs/en/interfaces/third-party/integrations.md b/docs/en/interfaces/third-party/integrations.md index 4a4eee770dc..3aac78f0878 100644 --- a/docs/en/interfaces/third-party/integrations.md +++ b/docs/en/interfaces/third-party/integrations.md @@ -6,7 +6,7 @@ toc_title: Integrations # Integration Libraries from Third-party Developers {#integration-libraries-from-third-party-developers} !!! warning "Disclaimer" - Yandex does **not** maintain the tools and libraries listed below and haven’t done any extensive testing to ensure their quality. + ClickHouse, Inc. does **not** maintain the tools and libraries listed below and haven’t done extensive testing to ensure their quality. ## Infrastructure Products {#infrastructure-products} diff --git a/docs/en/introduction/performance.md b/docs/en/introduction/performance.md index 6ae37086181..684b4ee4179 100644 --- a/docs/en/introduction/performance.md +++ b/docs/en/introduction/performance.md @@ -5,7 +5,7 @@ toc_title: Performance # Performance {#performance} -According to internal testing results at Yandex, ClickHouse shows the best performance (both the highest throughput for long queries and the lowest latency on short queries) for comparable operating scenarios among systems of its class that were available for testing. You can view the test results on a [separate page](https://clickhouse.com/benchmark/dbms/). +ClickHouse shows the best performance (both the highest throughput for long queries and the lowest latency on short queries) for comparable operating scenarios among systems of its class that were available for testing. You can view the test results on a [separate page](https://clickhouse.com/benchmark/dbms/). Numerous independent benchmarks came to similar conclusions. They are not difficult to find using an internet search, or you can see [our small collection of related links](https://clickhouse.com/#independent-benchmarks). diff --git a/docs/en/operations/named-collections.md b/docs/en/operations/named-collections.md new file mode 100644 index 00000000000..dce7938f98b --- /dev/null +++ b/docs/en/operations/named-collections.md @@ -0,0 +1,229 @@ +--- +toc_priority: 69 +toc_title: "Named connections" +--- + +# Storing details for connecting to external sources in configuration files {#named-collections} + +Details for connecting to external sources (dictionaries, tables, table functions) can be saved +in configuration files and thus simplify the creation of objects and hide credentials +from users with only SQL access. + +Parameters can be set in XML `CSV` and overridden in SQL `, format = 'TSV'`. +The parameters in SQL can be overridden using format `key` = `value`: `compression_method = 'gzip'`. + +Named connections are stored in the `config.xml` file of the ClickHouse server in the `` section and are applied when ClickHouse starts. + +Example of configuration: +```xml +$ cat /etc/clickhouse-server/config.d/named_collections.xml + + + ... + + +``` + +## Named connections for accessing S3. + +The description of parameters see [s3 Table Function](../sql-reference/table-functions/s3.md). + +Example of configuration: +```xml + + + + AKIAIOSFODNN7EXAMPLE + wJalrXUtnFEMI/K7MDENG/bPxRfiCYEXAMPLEKEY + CSV + + + +``` + +### Example of using named connections with the s3 function + +```sql +INSERT INTO FUNCTION s3(s3_mydata, url = 'https://s3.us-east-1.amazonaws.com/yourbucket/mydata/test_file.tsv.gz', + format = 'TSV', structure = 'number UInt64', compression_method = 'gzip') +SELECT * FROM numbers(10000); + +SELECT count() +FROM s3(s3_mydata, url = 'https://s3.us-east-1.amazonaws.com/yourbucket/mydata/test_file.tsv.gz') + +┌─count()─┐ +│ 10000 │ +└─────────┘ +1 rows in set. Elapsed: 0.279 sec. Processed 10.00 thousand rows, 90.00 KB (35.78 thousand rows/s., 322.02 KB/s.) +``` + +### Example of using named connections with an S3 table + +```sql +CREATE TABLE s3_engine_table (number Int64) +ENGINE=S3(s3_mydata, url='https://s3.us-east-1.amazonaws.com/yourbucket/mydata/test_file.tsv.gz', format = 'TSV') +SETTINGS input_format_with_names_use_header = 0; + +SELECT * FROM s3_engine_table LIMIT 3; +┌─number─┐ +│ 0 │ +│ 1 │ +│ 2 │ +└────────┘ +``` + +## Named connections for accessing MySQL database + +The description of parameters see [mysql](../sql-reference/table-functions/mysql.md). + +Example of configuration: +```xml + + + + myuser + mypass + 127.0.0.1 + 3306 + test + 8 + 1 + 1 + + + +``` + +### Example of using named connections with the mysql function + +```sql +SELECT count() FROM mysql(mymysql, table = 'test'); + +┌─count()─┐ +│ 3 │ +└─────────┘ +``` + +### Example of using named connections with an MySQL table + +```sql +CREATE TABLE mytable(A Int64) ENGINE = MySQL(mymysql, table = 'test', connection_pool_size=3, replace_query=0); +SELECT count() FROM mytable; + +┌─count()─┐ +│ 3 │ +└─────────┘ +``` + +### Example of using named connections with database with engine MySQL + +```sql +CREATE DATABASE mydatabase ENGINE = MySQL(mymysql); + +SHOW TABLES FROM mydatabase; + +┌─name───┐ +│ source │ +│ test │ +└────────┘ +``` + +### Example of using named connections with an external dictionary with source MySQL + +```sql +CREATE DICTIONARY dict (A Int64, B String) +PRIMARY KEY A +SOURCE(MYSQL(NAME mymysql TABLE 'source')) +LIFETIME(MIN 1 MAX 2) +LAYOUT(HASHED()); + +SELECT dictGet('dict', 'B', 2); + +┌─dictGet('dict', 'B', 2)─┐ +│ two │ +└─────────────────────────┘ +``` + +## Named connections for accessing PostgreSQL database + +The description of parameters see [postgresql](../sql-reference/table-functions/postgresql.md). + +Example of configuration: +```xml + + + + pguser + jw8s0F4 + 127.0.0.1 + 5432 + test + test_schema + 8 + + + +``` + +### Example of using named connections with the postgresql function + +```sql +SELECT * FROM postgresql(mypg, table = 'test'); + +┌─a─┬─b───┐ +│ 2 │ two │ +│ 1 │ one │ +└───┴─────┘ + + +SELECT * FROM postgresql(mypg, table = 'test', schema = 'public'); + +┌─a─┐ +│ 1 │ +│ 2 │ +│ 3 │ +└───┘ +``` + + +### Example of using named connections with database with engine PostgreSQL + +```sql +CREATE TABLE mypgtable (a Int64) ENGINE = PostgreSQL(mypg, table = 'test', schema = 'public'); + +SELECT * FROM mypgtable; + +┌─a─┐ +│ 1 │ +│ 2 │ +│ 3 │ +└───┘ +``` + +### Example of using named connections with database with engine PostgreSQL + +```sql +CREATE DATABASE mydatabase ENGINE = PostgreSQL(mypg); + +SHOW TABLES FROM mydatabase + +┌─name─┐ +│ test │ +└──────┘ +``` + +### Example of using named connections with an external dictionary with source POSTGRESQL + +```sql +CREATE DICTIONARY dict (a Int64, b String) +PRIMARY KEY a +SOURCE(POSTGRESQL(NAME mypg TABLE test)) +LIFETIME(MIN 1 MAX 2) +LAYOUT(HASHED()); + +SELECT dictGet('dict', 'b', 2); + +┌─dictGet('dict', 'b', 2)─┐ +│ two │ +└─────────────────────────┘ +``` diff --git a/docs/en/operations/performance-test.md b/docs/en/operations/performance-test.md index 3c29ebc3270..e410b1b2dfd 100644 --- a/docs/en/operations/performance-test.md +++ b/docs/en/operations/performance-test.md @@ -59,7 +59,7 @@ wget https://raw.githubusercontent.com/ClickHouse/ClickHouse/master/benchmark/cl chmod a+x benchmark-new.sh wget https://raw.githubusercontent.com/ClickHouse/ClickHouse/master/benchmark/clickhouse/queries.sql ``` -3. Download test data according to the [Yandex.Metrica dataset](../getting-started/example-datasets/metrica.md) instruction (“hits” table containing 100 million rows). +3. Download the [web analytics dataset](../getting-started/example-datasets/metrica.md) (“hits” table containing 100 million rows). ```bash wget https://datasets.clickhouse.com/hits/partitions/hits_100m_obfuscated_v1.tar.xz tar xvf hits_100m_obfuscated_v1.tar.xz -C . @@ -78,6 +78,6 @@ mv hits_100m_obfuscated_v1/* . ```bash ./benchmark-new.sh hits_100m_obfuscated ``` -7. Send the numbers and the info about your hardware configuration to clickhouse-feedback@yandex-team.com +7. Send the numbers and the info about your hardware configuration to feedback@clickhouse.com All the results are published here: https://clickhouse.com/benchmark/hardware/ diff --git a/docs/en/operations/quotas.md b/docs/en/operations/quotas.md index eec8961b595..6c6fbbf9cfb 100644 --- a/docs/en/operations/quotas.md +++ b/docs/en/operations/quotas.md @@ -101,7 +101,7 @@ Quotas can use the “quota key” feature to report on resources for multiple k - + none true true @@ -367,6 +367,10 @@ /var/lib/clickhouse/tmp/ + + + + ` - + int needs explicit cast /// 2. customized types needs explicit cast template - enable_if_not_field_or_bool_or_stringlike_t & + enable_if_not_field_or_bool_or_stringlike_t & /// NOLINT operator=(T && rhs); Field & operator= (bool rhs) @@ -409,7 +409,7 @@ public: template const auto & get() const { - auto mutable_this = const_cast *>(this); + auto * mutable_this = const_cast *>(this); return mutable_this->get(); } @@ -422,7 +422,7 @@ public: template const T & reinterpret() const { - auto mutable_this = const_cast *>(this); + auto * mutable_this = const_cast *>(this); return mutable_this->reinterpret(); } @@ -887,7 +887,7 @@ Field::Field(T && rhs, enable_if_not_field_or_bool_or_stringlike_t) //-V730 } template -Field::enable_if_not_field_or_bool_or_stringlike_t & +Field::enable_if_not_field_or_bool_or_stringlike_t & /// NOLINT Field::operator=(T && rhs) { auto && val = castToNearestFieldType(std::forward(rhs)); @@ -986,10 +986,10 @@ String toString(const Field & x); template <> struct fmt::formatter { - constexpr auto parse(format_parse_context & ctx) + static constexpr auto parse(format_parse_context & ctx) { - auto it = ctx.begin(); - auto end = ctx.end(); + const auto * it = ctx.begin(); + const auto * end = ctx.end(); /// Only support {}. if (it != end && *it != '}') diff --git a/src/Core/MySQL/MySQLGtid.cpp b/src/Core/MySQL/MySQLGtid.cpp index bfd0bd02b45..43fa90b6160 100644 --- a/src/Core/MySQL/MySQLGtid.cpp +++ b/src/Core/MySQL/MySQLGtid.cpp @@ -21,7 +21,7 @@ void GTIDSet::tryMerge(size_t i) intervals.erase(intervals.begin() + i + 1, intervals.begin() + i + 1 + 1); } -void GTIDSets::parse(const String gtid_format) +void GTIDSets::parse(String gtid_format) { if (gtid_format.empty()) { diff --git a/src/Core/MySQL/MySQLGtid.h b/src/Core/MySQL/MySQLGtid.h index c8a571d2569..45eeaf02fa2 100644 --- a/src/Core/MySQL/MySQLGtid.h +++ b/src/Core/MySQL/MySQLGtid.h @@ -35,7 +35,7 @@ class GTIDSets public: std::vector sets; - void parse(const String gtid_format_); + void parse(String gtid_format_); void update(const GTID & other); String toString() const; diff --git a/src/Core/MySQL/MySQLReplication.cpp b/src/Core/MySQL/MySQLReplication.cpp index 49cc201955a..1c1f6535550 100644 --- a/src/Core/MySQL/MySQLReplication.cpp +++ b/src/Core/MySQL/MySQLReplication.cpp @@ -431,7 +431,7 @@ namespace MySQLReplication UInt32 i24 = 0; payload.readStrict(reinterpret_cast(&i24), 3); - const DayNum date_day_number(DateLUT::instance().makeDayNum( + const ExtendedDayNum date_day_number(DateLUT::instance().makeDayNum( static_cast((i24 >> 9) & 0x7fff), static_cast((i24 >> 5) & 0xf), static_cast(i24 & 0x1f)).toUnderType()); row.push_back(Field(date_day_number.toUnderType())); diff --git a/src/Core/PostgreSQLProtocol.h b/src/Core/PostgreSQLProtocol.h index dd26bf41b4a..6ccdcb4d524 100644 --- a/src/Core/PostgreSQLProtocol.h +++ b/src/Core/PostgreSQLProtocol.h @@ -152,7 +152,7 @@ private: WriteBuffer * out; public: - MessageTransport(WriteBuffer * out_) : in(nullptr), out(out_) {} + explicit MessageTransport(WriteBuffer * out_) : in(nullptr), out(out_) {} MessageTransport(ReadBuffer * in_, WriteBuffer * out_): in(in_), out(out_) {} @@ -257,7 +257,7 @@ public: Int32 payload_size; FirstMessage() = delete; - FirstMessage(int payload_size_) : payload_size(payload_size_) {} + explicit FirstMessage(int payload_size_) : payload_size(payload_size_) {} }; class CancelRequest : public FirstMessage @@ -266,7 +266,7 @@ public: Int32 process_id = 0; Int32 secret_key = 0; - CancelRequest(int payload_size_) : FirstMessage(payload_size_) {} + explicit CancelRequest(int payload_size_) : FirstMessage(payload_size_) {} void deserialize(ReadBuffer & in) override { @@ -391,7 +391,7 @@ public: // includes username, may also include database and other runtime parameters std::unordered_map parameters; - StartupMessage(Int32 payload_size_) : FirstMessage(payload_size_) {} + explicit StartupMessage(Int32 payload_size_) : FirstMessage(payload_size_) {} void deserialize(ReadBuffer & in) override { @@ -643,7 +643,7 @@ private: const std::vector & fields_descr; public: - RowDescription(const std::vector & fields_descr_) : fields_descr(fields_descr_) {} + explicit RowDescription(const std::vector & fields_descr_) : fields_descr(fields_descr_) {} void serialize(WriteBuffer & out) const override { @@ -673,7 +673,7 @@ class StringField : public ISerializable private: String str; public: - StringField(String str_) : str(str_) {} + explicit StringField(String str_) : str(str_) {} void serialize(WriteBuffer & out) const override { @@ -703,7 +703,7 @@ private: const std::vector> & row; public: - DataRow(const std::vector> & row_) : row(row_) {} + explicit DataRow(const std::vector> & row_) : row(row_) {} void serialize(WriteBuffer & out) const override { @@ -886,7 +886,7 @@ private: std::unordered_map> type_to_method = {}; public: - AuthenticationManager(const std::vector> & auth_methods) + explicit AuthenticationManager(const std::vector> & auth_methods) { for (const std::shared_ptr & method : auth_methods) { diff --git a/src/Core/QualifiedTableName.h b/src/Core/QualifiedTableName.h index 4642465f461..3310130629d 100644 --- a/src/Core/QualifiedTableName.h +++ b/src/Core/QualifiedTableName.h @@ -72,7 +72,7 @@ struct QualifiedTableName QualifiedTableName name; if (pos == std::string::npos) { - name.table = std::move(maybe_qualified_name); + name.table = maybe_qualified_name; } else if (maybe_qualified_name.find('.', pos + 1) != std::string::npos) { @@ -119,7 +119,7 @@ namespace fmt template <> struct formatter { - constexpr auto parse(format_parse_context & ctx) + static constexpr auto parse(format_parse_context & ctx) { return ctx.begin(); } diff --git a/src/Core/Settings.h b/src/Core/Settings.h index 18e75224e7a..0f06030c720 100644 --- a/src/Core/Settings.h +++ b/src/Core/Settings.h @@ -475,7 +475,7 @@ class IColumn; M(Bool, allow_experimental_database_materialized_mysql, false, "Allow to create database with Engine=MaterializedMySQL(...).", 0) \ M(Bool, allow_experimental_database_materialized_postgresql, false, "Allow to create database with Engine=MaterializedPostgreSQL(...).", 0) \ M(Bool, system_events_show_zero_values, false, "Include all metrics, even with zero values", 0) \ - M(MySQLDataTypesSupport, mysql_datatypes_support_level, 0, "Which MySQL types should be converted to corresponding ClickHouse types (rather than being represented as String). Can be empty or any combination of 'decimal' or 'datetime64'. When empty MySQL's DECIMAL and DATETIME/TIMESTAMP with non-zero precision are seen as String on ClickHouse's side.", 0) \ + M(MySQLDataTypesSupport, mysql_datatypes_support_level, 0, "Which MySQL types should be converted to corresponding ClickHouse types (rather than being represented as String). Can be empty or any combination of 'decimal', 'datetime64', 'date2Date32' or 'date2String'. When empty MySQL's DECIMAL and DATETIME/TIMESTAMP with non-zero precision are seen as String on ClickHouse's side.", 0) \ M(Bool, optimize_trivial_insert_select, true, "Optimize trivial 'INSERT INTO table SELECT ... FROM TABLES' query", 0) \ M(Bool, allow_non_metadata_alters, true, "Allow to execute alters which affects not only tables metadata, but also data on disk", 0) \ M(Bool, enable_global_with_statement, true, "Propagate WITH statements to UNION queries and all subqueries", 0) \ @@ -547,8 +547,10 @@ class IColumn; M(Milliseconds, async_insert_busy_timeout_ms, 200, "Maximum time to wait before dumping collected data per query since the first data appeared", 0) \ M(Milliseconds, async_insert_stale_timeout_ms, 0, "Maximum time to wait before dumping collected data per query since the last data appeared. Zero means no timeout at all", 0) \ \ - M(Int64, remote_fs_read_max_backoff_ms, 10000, "Max wait time when trying to read data for remote disk", 0) \ - M(Int64, remote_fs_read_backoff_max_tries, 5, "Max attempts to read with backoff", 0) \ + M(UInt64, remote_fs_read_max_backoff_ms, 10000, "Max wait time when trying to read data for remote disk", 0) \ + M(UInt64, remote_fs_read_backoff_max_tries, 5, "Max attempts to read with backoff", 0) \ + M(Bool, remote_fs_enable_cache, true, "Use cache for remote filesystem. This setting does not turn on/off cache for disks (must me done via disk config), but allows to bypass cache for some queries if intended", 0) \ + M(UInt64, remote_fs_cache_max_wait_sec, 5, "Allow to wait a most this number of seconds for download of current remote_fs_buffer_size bytes, and skip cache if exceeded", 0) \ \ M(UInt64, http_max_tries, 10, "Max attempts to read via http.", 0) \ M(UInt64, http_retry_initial_backoff_ms, 100, "Min milliseconds for backoff, when retrying read via http", 0) \ @@ -608,6 +610,7 @@ class IColumn; M(Bool, input_format_tsv_empty_as_default, false, "Treat empty fields in TSV input as default values.", 0) \ M(Bool, input_format_tsv_enum_as_number, false, "Treat inserted enum values in TSV formats as enum indices \\N", 0) \ M(Bool, input_format_null_as_default, true, "For text input formats initialize null fields with default values if data type of this field is not nullable", 0) \ + M(Bool, input_format_use_lowercase_column_name, false, "Use lowercase column name while reading input formats", 0) \ M(Bool, input_format_arrow_import_nested, false, "Allow to insert array of structs into Nested table in Arrow input format.", 0) \ M(Bool, input_format_orc_import_nested, false, "Allow to insert array of structs into Nested table in ORC input format.", 0) \ M(Int64, input_format_orc_row_batch_size, 100'000, "Batch size when reading ORC stripes.", 0) \ diff --git a/src/Core/SettingsEnums.cpp b/src/Core/SettingsEnums.cpp index 1b0f6c96954..ddd1c29785c 100644 --- a/src/Core/SettingsEnums.cpp +++ b/src/Core/SettingsEnums.cpp @@ -106,7 +106,9 @@ IMPLEMENT_SETTING_ENUM_WITH_RENAME(DefaultTableEngine, ErrorCodes::BAD_ARGUMENTS IMPLEMENT_SETTING_MULTI_ENUM(MySQLDataTypesSupport, ErrorCodes::UNKNOWN_MYSQL_DATATYPES_SUPPORT_LEVEL, {{"decimal", MySQLDataTypesSupport::DECIMAL}, - {"datetime64", MySQLDataTypesSupport::DATETIME64}}) + {"datetime64", MySQLDataTypesSupport::DATETIME64}, + {"date2Date32", MySQLDataTypesSupport::DATE2DATE32}, + {"date2String", MySQLDataTypesSupport::DATE2STRING}}) IMPLEMENT_SETTING_ENUM(UnionMode, ErrorCodes::UNKNOWN_UNION, {{"", UnionMode::Unspecified}, diff --git a/src/Core/SettingsEnums.h b/src/Core/SettingsEnums.h index 27994529a0b..47bd4b9a928 100644 --- a/src/Core/SettingsEnums.h +++ b/src/Core/SettingsEnums.h @@ -138,7 +138,8 @@ enum class MySQLDataTypesSupport { DECIMAL, // convert MySQL's decimal and number to ClickHouse Decimal when applicable DATETIME64, // convert MySQL's DATETIME and TIMESTAMP and ClickHouse DateTime64 if precision is > 0 or range is greater that for DateTime. - // ENUM + DATE2DATE32, // convert MySQL's date type to ClickHouse Date32 + DATE2STRING // convert MySQL's date type to ClickHouse String(This is usually used when your mysql date is less than 1925) }; DECLARE_SETTING_MULTI_ENUM(MySQLDataTypesSupport) diff --git a/src/Core/SettingsFields.h b/src/Core/SettingsFields.h index b27763ad0d6..474786eb963 100644 --- a/src/Core/SettingsFields.h +++ b/src/Core/SettingsFields.h @@ -43,7 +43,7 @@ struct SettingFieldNumber SettingFieldNumber & operator=(Type x) { value = x; changed = true; return *this; } SettingFieldNumber & operator=(const Field & f); - operator Type() const { return value; } + operator Type() const { return value; } /// NOLINT explicit operator Field() const { return value; } String toString() const; @@ -75,7 +75,7 @@ struct SettingFieldMaxThreads SettingFieldMaxThreads & operator=(UInt64 x) { is_auto = !x; value = is_auto ? getAuto() : x; changed = true; return *this; } SettingFieldMaxThreads & operator=(const Field & f); - operator UInt64() const { return value; } + operator UInt64() const { return value; } /// NOLINT explicit operator Field() const { return value; } /// Writes "auto()" instead of simple "" if `is_auto==true`. @@ -118,10 +118,10 @@ struct SettingFieldTimespan SettingFieldTimespan & operator =(UInt64 x) { *this = Poco::Timespan{static_cast(x * microseconds_per_unit)}; return *this; } SettingFieldTimespan & operator =(const Field & f); - operator Poco::Timespan() const { return value; } + operator Poco::Timespan() const { return value; } /// NOLINT template > - operator std::chrono::duration() const { return std::chrono::duration_cast>(std::chrono::microseconds(value.totalMicroseconds())); } + operator std::chrono::duration() const { return std::chrono::duration_cast>(std::chrono::microseconds(value.totalMicroseconds())); } /// NOLINT explicit operator UInt64() const { return value.totalMicroseconds() / microseconds_per_unit; } explicit operator Field() const { return operator UInt64(); } @@ -158,7 +158,7 @@ struct SettingFieldString SettingFieldString & operator =(const char * str) { *this = std::string_view{str}; return *this; } SettingFieldString & operator =(const Field & f) { *this = f.safeGet(); return *this; } - operator const String &() const { return value; } + operator const String &() const { return value; } /// NOLINT explicit operator Field() const { return value; } const String & toString() const { return value; } @@ -181,7 +181,7 @@ public: SettingFieldChar & operator =(char c) { value = c; changed = true; return *this; } SettingFieldChar & operator =(const Field & f); - operator char() const { return value; } + operator char() const { return value; } /// NOLINT explicit operator Field() const { return toString(); } String toString() const { return String(&value, 1); } @@ -207,7 +207,7 @@ struct SettingFieldURI SettingFieldURI & operator =(const char * str) { *this = Poco::URI{str}; return *this; } SettingFieldURI & operator =(const Field & f) { *this = f.safeGet(); return *this; } - operator const Poco::URI &() const { return value; } + operator const Poco::URI &() const { return value; } /// NOLINT explicit operator String() const { return toString(); } explicit operator Field() const { return toString(); } @@ -244,7 +244,7 @@ struct SettingFieldEnum SettingFieldEnum & operator =(EnumType x) { value = x; changed = true; return *this; } SettingFieldEnum & operator =(const Field & f) { *this = Traits::fromString(f.safeGet()); return *this; } - operator EnumType() const { return value; } + operator EnumType() const { return value; } /// NOLINT explicit operator Field() const { return toString(); } String toString() const { return Traits::toString(value); } @@ -272,12 +272,15 @@ void SettingFieldEnum::readBinary(ReadBuffer & in) *this = Traits::fromString(SettingFieldEnumHelpers::readBinary(in)); } +/// NOLINTNEXTLINE #define DECLARE_SETTING_ENUM(ENUM_TYPE) \ DECLARE_SETTING_ENUM_WITH_RENAME(ENUM_TYPE, ENUM_TYPE) +/// NOLINTNEXTLINE #define IMPLEMENT_SETTING_ENUM(ENUM_TYPE, ERROR_CODE_FOR_UNEXPECTED_NAME, ...) \ IMPLEMENT_SETTING_ENUM_WITH_RENAME(ENUM_TYPE, ERROR_CODE_FOR_UNEXPECTED_NAME, __VA_ARGS__) +/// NOLINTNEXTLINE #define DECLARE_SETTING_ENUM_WITH_RENAME(NEW_NAME, ENUM_TYPE) \ struct SettingField##NEW_NAME##Traits \ { \ @@ -288,6 +291,7 @@ void SettingFieldEnum::readBinary(ReadBuffer & in) \ using SettingField##NEW_NAME = SettingFieldEnum; +/// NOLINTNEXTLINE #define IMPLEMENT_SETTING_ENUM_WITH_RENAME(NEW_NAME, ERROR_CODE_FOR_UNEXPECTED_NAME, ...) \ const String & SettingField##NEW_NAME##Traits::toString(typename SettingField##NEW_NAME::EnumType value) \ { \ @@ -346,7 +350,7 @@ struct SettingFieldMultiEnum explicit SettingFieldMultiEnum(StorageType s) : value(s) {} explicit SettingFieldMultiEnum(const Field & f) : value(parseValueFromString(f.safeGet())) {} - operator ValueType() const { return value; } + operator ValueType() const { return value; } /// NOLINT explicit operator StorageType() const { return value.getValue(); } explicit operator Field() const { return toString(); } @@ -368,7 +372,7 @@ struct SettingFieldMultiEnum } } - if (result.size() > 0) + if (!result.empty()) result.erase(result.size() - separator.size()); return result; @@ -415,9 +419,11 @@ void SettingFieldMultiEnum::readBinary(ReadBuffer & in) parseFromString(SettingFieldEnumHelpers::readBinary(in)); } +/// NOLINTNEXTLINE #define DECLARE_SETTING_MULTI_ENUM(ENUM_TYPE) \ DECLARE_SETTING_MULTI_ENUM_WITH_RENAME(ENUM_TYPE, ENUM_TYPE) +/// NOLINTNEXTLINE #define DECLARE_SETTING_MULTI_ENUM_WITH_RENAME(ENUM_TYPE, NEW_NAME) \ struct SettingField##NEW_NAME##Traits \ { \ @@ -429,9 +435,11 @@ void SettingFieldMultiEnum::readBinary(ReadBuffer & in) \ using SettingField##NEW_NAME = SettingFieldMultiEnum; +/// NOLINTNEXTLINE #define IMPLEMENT_SETTING_MULTI_ENUM(ENUM_TYPE, ERROR_CODE_FOR_UNEXPECTED_NAME, ...) \ IMPLEMENT_SETTING_MULTI_ENUM_WITH_RENAME(ENUM_TYPE, ERROR_CODE_FOR_UNEXPECTED_NAME, __VA_ARGS__) +/// NOLINTNEXTLINE #define IMPLEMENT_SETTING_MULTI_ENUM_WITH_RENAME(NEW_NAME, ERROR_CODE_FOR_UNEXPECTED_NAME, ...) \ IMPLEMENT_SETTING_ENUM_WITH_RENAME(NEW_NAME, ERROR_CODE_FOR_UNEXPECTED_NAME, __VA_ARGS__)\ size_t SettingField##NEW_NAME##Traits::getEnumSize() {\ diff --git a/src/Core/SortCursor.h b/src/Core/SortCursor.h index dd804bd4675..a5daba9fbee 100644 --- a/src/Core/SortCursor.h +++ b/src/Core/SortCursor.h @@ -53,7 +53,7 @@ struct SortCursorImpl */ IColumn::Permutation * permutation = nullptr; - SortCursorImpl() {} + SortCursorImpl() = default; SortCursorImpl(const Block & block, const SortDescription & desc_, size_t order_ = 0, IColumn::Permutation * perm = nullptr) : desc(desc_), sort_columns_size(desc.size()), order(order_), need_collation(desc.size()) @@ -140,7 +140,7 @@ struct SortCursorHelper const Derived & derived() const { return static_cast(*this); } - SortCursorHelper(SortCursorImpl * impl_) : impl(impl_) {} + explicit SortCursorHelper(SortCursorImpl * impl_) : impl(impl_) {} SortCursorImpl * operator-> () { return impl; } const SortCursorImpl * operator-> () const { return impl; } @@ -245,7 +245,7 @@ public: SortingHeap() = default; template - SortingHeap(Cursors & cursors) + explicit SortingHeap(Cursors & cursors) { size_t size = cursors.size(); queue.reserve(size); diff --git a/src/Core/examples/mysql_protocol.cpp b/src/Core/examples/mysql_protocol.cpp index 1b81d856c9a..396bc6f7e9b 100644 --- a/src/Core/examples/mysql_protocol.cpp +++ b/src/Core/examples/mysql_protocol.cpp @@ -330,7 +330,7 @@ int main(int argc, char ** argv) /// Connect to the master. slave.connect(); - slave.startBinlogDumpGTID(slave_id, replicate_db, gtid_sets, binlog_checksum); + slave.startBinlogDumpGTID(slave_id, replicate_db, {}, gtid_sets, binlog_checksum); WriteBufferFromOStream cerr(std::cerr); diff --git a/src/Core/tests/gtest_settings.cpp b/src/Core/tests/gtest_settings.cpp index 8833d86c397..46d8f9665dc 100644 --- a/src/Core/tests/gtest_settings.cpp +++ b/src/Core/tests/gtest_settings.cpp @@ -53,6 +53,29 @@ GTEST_TEST(SettingMySQLDataTypesSupport, WithDECIMAL) ASSERT_EQ(Field("decimal"), setting); } +GTEST_TEST(SettingMySQLDataTypesSupport, WithDATE) +{ + SettingMySQLDataTypesSupport setting; + setting = String("date2Date32"); + ASSERT_EQ(4, setting.value.getValue()); + + ASSERT_TRUE(setting.value.isSet(MySQLDataTypesSupport::DATE2DATE32)); + ASSERT_FALSE(setting.value.isSet(MySQLDataTypesSupport::DECIMAL)); + ASSERT_FALSE(setting.value.isSet(MySQLDataTypesSupport::DATETIME64)); + + ASSERT_EQ("date2Date32", setting.toString()); + ASSERT_EQ(Field("date2Date32"), setting); + + setting = String("date2String"); + ASSERT_EQ(8, setting.value.getValue()); + + ASSERT_TRUE(setting.value.isSet(MySQLDataTypesSupport::DATE2STRING)); + ASSERT_FALSE(setting.value.isSet(MySQLDataTypesSupport::DATE2DATE32)); + + ASSERT_EQ("date2String", setting.toString()); + ASSERT_EQ(Field("date2String"), setting); +} + GTEST_TEST(SettingMySQLDataTypesSupport, With1) { // Setting can be initialized with int value corresponding to DECIMAL diff --git a/src/DataTypes/DataTypeArray.h b/src/DataTypes/DataTypeArray.h index 564dbba8503..122ac8e03a3 100644 --- a/src/DataTypes/DataTypeArray.h +++ b/src/DataTypes/DataTypeArray.h @@ -17,7 +17,7 @@ private: public: static constexpr bool is_parametric = true; - DataTypeArray(const DataTypePtr & nested_); + explicit DataTypeArray(const DataTypePtr & nested_); TypeIndex getTypeId() const override { return TypeIndex::Array; } diff --git a/src/DataTypes/DataTypeCustom.h b/src/DataTypes/DataTypeCustom.h index 55796e3cc7a..e8e4160af07 100644 --- a/src/DataTypes/DataTypeCustom.h +++ b/src/DataTypes/DataTypeCustom.h @@ -19,7 +19,7 @@ class IColumn; class IDataTypeCustomName { public: - virtual ~IDataTypeCustomName() {} + virtual ~IDataTypeCustomName() = default; virtual String getName() const = 0; }; @@ -33,7 +33,7 @@ struct DataTypeCustomDesc DataTypeCustomNamePtr name; SerializationPtr serialization; - DataTypeCustomDesc( + explicit DataTypeCustomDesc( DataTypeCustomNamePtr name_, SerializationPtr serialization_ = nullptr) : name(std::move(name_)) @@ -49,7 +49,7 @@ class DataTypeCustomFixedName : public IDataTypeCustomName private: String name; public: - DataTypeCustomFixedName(String name_) : name(name_) {} + explicit DataTypeCustomFixedName(String name_) : name(name_) {} String getName() const override { return name; } }; diff --git a/src/DataTypes/DataTypeCustomSimpleAggregateFunction.h b/src/DataTypes/DataTypeCustomSimpleAggregateFunction.h index dc054144e14..926dfd9cc82 100644 --- a/src/DataTypes/DataTypeCustomSimpleAggregateFunction.h +++ b/src/DataTypes/DataTypeCustomSimpleAggregateFunction.h @@ -34,7 +34,7 @@ public: DataTypeCustomSimpleAggregateFunction(const AggregateFunctionPtr & function_, const DataTypes & argument_types_, const Array & parameters_) : function(function_), argument_types(argument_types_), parameters(parameters_) {} - const AggregateFunctionPtr getFunction() const { return function; } + AggregateFunctionPtr getFunction() const { return function; } String getName() const override; static void checkSupportedFunctions(const AggregateFunctionPtr & function); }; diff --git a/src/DataTypes/DataTypeDateTime.h b/src/DataTypes/DataTypeDateTime.h index 57052144216..91a09ff7cb9 100644 --- a/src/DataTypes/DataTypeDateTime.h +++ b/src/DataTypes/DataTypeDateTime.h @@ -16,7 +16,7 @@ namespace DB * * To cast from/to text format, time zone may be specified explicitly or implicit time zone may be used. * - * Time zone may be specified explicitly as type parameter, example: DateTime('Europe/Moscow'). + * Time zone may be specified explicitly as type parameter, example: DateTime('Pacific/Pitcairn'). * As it does not affect the internal representation of values, * all types with different time zones are equivalent and may be used interchangingly. * Time zone only affects parsing and displaying in text formats. @@ -48,4 +48,3 @@ public: }; } - diff --git a/src/DataTypes/DataTypeDecimalBase.h b/src/DataTypes/DataTypeDecimalBase.h index bdb39978825..9e37de8a35b 100644 --- a/src/DataTypes/DataTypeDecimalBase.h +++ b/src/DataTypes/DataTypeDecimalBase.h @@ -172,14 +172,14 @@ inline auto decimalResultType(const DecimalType & tx, const DecimalType & } template typename DecimalType> -inline const DecimalType decimalResultType(const DecimalType & tx, const DataTypeNumber & ty) +inline DecimalType decimalResultType(const DecimalType & tx, const DataTypeNumber & ty) { const auto result_trait = DecimalUtils::binaryOpResult(tx, ty); return DecimalType(result_trait.precision, result_trait.scale); } template typename DecimalType> -inline const DecimalType decimalResultType(const DataTypeNumber & tx, const DecimalType & ty) +inline DecimalType decimalResultType(const DataTypeNumber & tx, const DecimalType & ty) { const auto result_trait = DecimalUtils::binaryOpResult(tx, ty); return DecimalType(result_trait.precision, result_trait.scale); diff --git a/src/DataTypes/DataTypeFactory.h b/src/DataTypes/DataTypeFactory.h index 81d7d991bdc..e7b638b6d7b 100644 --- a/src/DataTypes/DataTypeFactory.h +++ b/src/DataTypes/DataTypeFactory.h @@ -51,7 +51,6 @@ public: private: const Value & findCreatorByName(const String & family_name) const; -private: DataTypesDictionary data_types; /// Case insensitive data types will be additionally added here with lowercased name. diff --git a/src/DataTypes/DataTypeFixedString.h b/src/DataTypes/DataTypeFixedString.h index a53fde42b29..7c089866b23 100644 --- a/src/DataTypes/DataTypeFixedString.h +++ b/src/DataTypes/DataTypeFixedString.h @@ -29,7 +29,7 @@ public: static constexpr bool is_parametric = true; static constexpr auto type_id = TypeIndex::FixedString; - DataTypeFixedString(size_t n_) : n(n_) + explicit DataTypeFixedString(size_t n_) : n(n_) { if (n == 0) throw Exception("FixedString size must be positive", ErrorCodes::ARGUMENT_OUT_OF_BOUND); diff --git a/src/DataTypes/DataTypeFunction.h b/src/DataTypes/DataTypeFunction.h index 489ed4545f4..888bcb6a775 100644 --- a/src/DataTypes/DataTypeFunction.h +++ b/src/DataTypes/DataTypeFunction.h @@ -19,7 +19,7 @@ public: bool isParametric() const override { return true; } /// Some types could be still unknown. - DataTypeFunction(const DataTypes & argument_types_ = DataTypes(), const DataTypePtr & return_type_ = nullptr) + explicit DataTypeFunction(const DataTypes & argument_types_ = DataTypes(), const DataTypePtr & return_type_ = nullptr) : argument_types(argument_types_), return_type(return_type_) {} std::string doGetName() const override; diff --git a/src/DataTypes/DataTypeInterval.h b/src/DataTypes/DataTypeInterval.h index 9ef6237ec41..83d89a73460 100644 --- a/src/DataTypes/DataTypeInterval.h +++ b/src/DataTypes/DataTypeInterval.h @@ -25,7 +25,7 @@ public: IntervalKind getKind() const { return kind; } - DataTypeInterval(IntervalKind kind_) : kind(kind_) {} + explicit DataTypeInterval(IntervalKind kind_) : kind(kind_) {} std::string doGetName() const override { return fmt::format("Interval{}", kind.toString()); } const char * getFamilyName() const override { return "Interval"; } diff --git a/src/DataTypes/DataTypeMap.h b/src/DataTypes/DataTypeMap.h index 04377f85cfb..65bdd93ca4d 100644 --- a/src/DataTypes/DataTypeMap.h +++ b/src/DataTypes/DataTypeMap.h @@ -23,7 +23,7 @@ private: public: static constexpr bool is_parametric = true; - DataTypeMap(const DataTypes & elems); + explicit DataTypeMap(const DataTypes & elems); DataTypeMap(const DataTypePtr & key_type_, const DataTypePtr & value_type_); TypeIndex getTypeId() const override { return TypeIndex::Map; } diff --git a/src/DataTypes/DataTypeTuple.h b/src/DataTypes/DataTypeTuple.h index c56e87ca22d..db122aae5df 100644 --- a/src/DataTypes/DataTypeTuple.h +++ b/src/DataTypes/DataTypeTuple.h @@ -26,7 +26,7 @@ private: public: static constexpr bool is_parametric = true; - DataTypeTuple(const DataTypes & elems); + explicit DataTypeTuple(const DataTypes & elems); DataTypeTuple(const DataTypes & elems, const Strings & names, bool serialize_names_ = true); static bool canBeCreatedWithNames(const Strings & names); diff --git a/src/DataTypes/DataTypesDecimal.h b/src/DataTypes/DataTypesDecimal.h index fb590dd1d4b..0ec29e3c5f4 100644 --- a/src/DataTypes/DataTypesDecimal.h +++ b/src/DataTypes/DataTypesDecimal.h @@ -60,26 +60,26 @@ inline const DataTypeDecimal * checkDecimal(const IDataType & data_type) inline UInt32 getDecimalScale(const IDataType & data_type, UInt32 default_value = std::numeric_limits::max()) { - if (auto * decimal_type = checkDecimal(data_type)) + if (const auto * decimal_type = checkDecimal(data_type)) return decimal_type->getScale(); - if (auto * decimal_type = checkDecimal(data_type)) + if (const auto * decimal_type = checkDecimal(data_type)) return decimal_type->getScale(); - if (auto * decimal_type = checkDecimal(data_type)) + if (const auto * decimal_type = checkDecimal(data_type)) return decimal_type->getScale(); - if (auto * decimal_type = checkDecimal(data_type)) + if (const auto * decimal_type = checkDecimal(data_type)) return decimal_type->getScale(); return default_value; } inline UInt32 getDecimalPrecision(const IDataType & data_type) { - if (auto * decimal_type = checkDecimal(data_type)) + if (const auto * decimal_type = checkDecimal(data_type)) return decimal_type->getPrecision(); - if (auto * decimal_type = checkDecimal(data_type)) + if (const auto * decimal_type = checkDecimal(data_type)) return decimal_type->getPrecision(); - if (auto * decimal_type = checkDecimal(data_type)) + if (const auto * decimal_type = checkDecimal(data_type)) return decimal_type->getPrecision(); - if (auto * decimal_type = checkDecimal(data_type)) + if (const auto * decimal_type = checkDecimal(data_type)) return decimal_type->getPrecision(); return 0; } diff --git a/src/DataTypes/IDataType.h b/src/DataTypes/IDataType.h index 5bc089e085f..36e1ce8ddd5 100644 --- a/src/DataTypes/IDataType.h +++ b/src/DataTypes/IDataType.h @@ -318,12 +318,12 @@ struct WhichDataType { TypeIndex idx; - constexpr WhichDataType(TypeIndex idx_ = TypeIndex::Nothing) : idx(idx_) {} - constexpr WhichDataType(const IDataType & data_type) : idx(data_type.getTypeId()) {} - constexpr WhichDataType(const IDataType * data_type) : idx(data_type->getTypeId()) {} + constexpr WhichDataType(TypeIndex idx_ = TypeIndex::Nothing) : idx(idx_) {} /// NOLINT + constexpr WhichDataType(const IDataType & data_type) : idx(data_type.getTypeId()) {} /// NOLINT + constexpr WhichDataType(const IDataType * data_type) : idx(data_type->getTypeId()) {} /// NOLINT // shared ptr -> is non-constexpr in gcc - WhichDataType(const DataTypePtr & data_type) : idx(data_type->getTypeId()) {} + WhichDataType(const DataTypePtr & data_type) : idx(data_type->getTypeId()) {} /// NOLINT constexpr bool isUInt8() const { return idx == TypeIndex::UInt8; } constexpr bool isUInt16() const { return idx == TypeIndex::UInt16; } diff --git a/src/DataTypes/Native.h b/src/DataTypes/Native.h index b72e479cb1d..3a635d2e240 100644 --- a/src/DataTypes/Native.h +++ b/src/DataTypes/Native.h @@ -201,7 +201,7 @@ static inline llvm::Value * nativeCast(llvm::IRBuilder<> & b, const DataTypePtr return nativeCast(b, from, value, n_to); } -static inline std::pair nativeCastToCommon(llvm::IRBuilder<> & b, const DataTypePtr & lhs_type, llvm::Value * lhs, const DataTypePtr & rhs_type, llvm::Value * rhs) +static inline std::pair nativeCastToCommon(llvm::IRBuilder<> & b, const DataTypePtr & lhs_type, llvm::Value * lhs, const DataTypePtr & rhs_type, llvm::Value * rhs) /// NOLINT { llvm::Type * common; diff --git a/src/DataTypes/Serializations/ISerialization.h b/src/DataTypes/Serializations/ISerialization.h index b1fd4d0a9da..86d4eab289a 100644 --- a/src/DataTypes/Serializations/ISerialization.h +++ b/src/DataTypes/Serializations/ISerialization.h @@ -145,7 +145,7 @@ public: /// Flag, that may help to traverse substream paths. mutable bool visited = false; - Substream(Type type_) : type(type_) {} + Substream(Type type_) : type(type_) {} /// NOLINT String toString() const; }; diff --git a/src/DataTypes/Serializations/SerializationArray.h b/src/DataTypes/Serializations/SerializationArray.h index cd8cac54881..3769f8a4513 100644 --- a/src/DataTypes/Serializations/SerializationArray.h +++ b/src/DataTypes/Serializations/SerializationArray.h @@ -11,7 +11,7 @@ private: SerializationPtr nested; public: - SerializationArray(const SerializationPtr & nested_) : nested(nested_) {} + explicit SerializationArray(const SerializationPtr & nested_) : nested(nested_) {} void serializeBinary(const Field & field, WriteBuffer & ostr) const override; void deserializeBinary(Field & field, ReadBuffer & istr) const override; @@ -71,7 +71,7 @@ private: { const ColumnPtr offsets; - SubcolumnCreator(const ColumnPtr & offsets_) : offsets(offsets_) {} + explicit SubcolumnCreator(const ColumnPtr & offsets_) : offsets(offsets_) {} DataTypePtr create(const DataTypePtr & prev) const override; SerializationPtr create(const SerializationPtr & prev) const override; diff --git a/src/DataTypes/Serializations/SerializationBool.h b/src/DataTypes/Serializations/SerializationBool.h index a9f4c6404b3..a5aa0ca80a2 100644 --- a/src/DataTypes/Serializations/SerializationBool.h +++ b/src/DataTypes/Serializations/SerializationBool.h @@ -10,7 +10,7 @@ namespace DB class SerializationBool final : public SerializationWrapper { public: - SerializationBool(const SerializationPtr & nested_); + explicit SerializationBool(const SerializationPtr & nested_); void serializeText(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override; diff --git a/src/DataTypes/Serializations/SerializationCustomSimpleText.h b/src/DataTypes/Serializations/SerializationCustomSimpleText.h index ba7c712f86c..21d6f8af650 100644 --- a/src/DataTypes/Serializations/SerializationCustomSimpleText.h +++ b/src/DataTypes/Serializations/SerializationCustomSimpleText.h @@ -15,7 +15,7 @@ class IColumn; class SerializationCustomSimpleText : public SerializationWrapper { public: - SerializationCustomSimpleText(const SerializationPtr & nested_); + explicit SerializationCustomSimpleText(const SerializationPtr & nested_); // Methods that subclasses must override in order to get full serialization/deserialization support. virtual void serializeText(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override = 0; diff --git a/src/DataTypes/Serializations/SerializationDateTime.h b/src/DataTypes/Serializations/SerializationDateTime.h index 75334592422..f4a142483e5 100644 --- a/src/DataTypes/Serializations/SerializationDateTime.h +++ b/src/DataTypes/Serializations/SerializationDateTime.h @@ -11,7 +11,7 @@ namespace DB class SerializationDateTime final : public SerializationNumber, public TimezoneMixin { public: - SerializationDateTime(const TimezoneMixin & time_zone_); + explicit SerializationDateTime(const TimezoneMixin & time_zone_); void serializeText(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override; void deserializeWholeText(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const override; diff --git a/src/DataTypes/Serializations/SerializationEnum.h b/src/DataTypes/Serializations/SerializationEnum.h index dfa9e74c7a1..bdd769b59c5 100644 --- a/src/DataTypes/Serializations/SerializationEnum.h +++ b/src/DataTypes/Serializations/SerializationEnum.h @@ -14,7 +14,7 @@ public: using typename SerializationNumber::ColumnType; using typename EnumValues::Values; - SerializationEnum(const Values & values_) : EnumValues(values_) {} + explicit SerializationEnum(const Values & values_) : EnumValues(values_) {} void serializeText(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override; void serializeTextEscaped(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override; diff --git a/src/DataTypes/Serializations/SerializationFixedString.h b/src/DataTypes/Serializations/SerializationFixedString.h index 82559d10800..c3c08b20419 100644 --- a/src/DataTypes/Serializations/SerializationFixedString.h +++ b/src/DataTypes/Serializations/SerializationFixedString.h @@ -12,7 +12,7 @@ private: size_t n; public: - SerializationFixedString(size_t n_) : n(n_) {} + explicit SerializationFixedString(size_t n_) : n(n_) {} size_t getN() const { return n; } void serializeBinary(const Field & field, WriteBuffer & ostr) const override; diff --git a/src/DataTypes/Serializations/SerializationIP.h b/src/DataTypes/Serializations/SerializationIP.h index a7bf1aeb2c6..282105b6b1e 100644 --- a/src/DataTypes/Serializations/SerializationIP.h +++ b/src/DataTypes/Serializations/SerializationIP.h @@ -8,7 +8,7 @@ namespace DB class SerializationIPv4 final : public SerializationCustomSimpleText { public: - SerializationIPv4(const SerializationPtr & nested_); + explicit SerializationIPv4(const SerializationPtr & nested_); void serializeText(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override; void deserializeText(IColumn & column, ReadBuffer & istr, const FormatSettings & settings, bool whole) const override; @@ -17,7 +17,7 @@ public: class SerializationIPv6 : public SerializationCustomSimpleText { public: - SerializationIPv6(const SerializationPtr & nested_); + explicit SerializationIPv6(const SerializationPtr & nested_); void serializeText(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override; void deserializeText(IColumn & column, ReadBuffer & istr, const FormatSettings & settings, bool whole) const override; diff --git a/src/DataTypes/Serializations/SerializationLowCardinality.h b/src/DataTypes/Serializations/SerializationLowCardinality.h index 5f8a2a95a25..0a3597e86c7 100644 --- a/src/DataTypes/Serializations/SerializationLowCardinality.h +++ b/src/DataTypes/Serializations/SerializationLowCardinality.h @@ -15,7 +15,7 @@ private: SerializationPtr dict_inner_serialization; public: - SerializationLowCardinality(const DataTypePtr & dictionary_type); + explicit SerializationLowCardinality(const DataTypePtr & dictionary_type); void enumerateStreams( SubstreamPath & path, diff --git a/src/DataTypes/Serializations/SerializationNothing.h b/src/DataTypes/Serializations/SerializationNothing.h index 4a062931ac2..2de93a29763 100644 --- a/src/DataTypes/Serializations/SerializationNothing.h +++ b/src/DataTypes/Serializations/SerializationNothing.h @@ -14,7 +14,7 @@ namespace ErrorCodes class SerializationNothing : public SimpleTextSerialization { private: - [[noreturn]] void throwNoSerialization() const + [[noreturn]] static void throwNoSerialization() { throw Exception("Serialization is not implemented", ErrorCodes::NOT_IMPLEMENTED); } diff --git a/src/DataTypes/Serializations/SerializationNullable.h b/src/DataTypes/Serializations/SerializationNullable.h index eb3e9bfb430..c22f2f57786 100644 --- a/src/DataTypes/Serializations/SerializationNullable.h +++ b/src/DataTypes/Serializations/SerializationNullable.h @@ -11,7 +11,7 @@ private: SerializationPtr nested; public: - SerializationNullable(const SerializationPtr & nested_) : nested(nested_) {} + explicit SerializationNullable(const SerializationPtr & nested_) : nested(nested_) {} void enumerateStreams( SubstreamPath & path, @@ -96,7 +96,7 @@ private: { const ColumnPtr null_map; - SubcolumnCreator(const ColumnPtr & null_map_) : null_map(null_map_) {} + explicit SubcolumnCreator(const ColumnPtr & null_map_) : null_map(null_map_) {} DataTypePtr create(const DataTypePtr & prev) const override; SerializationPtr create(const SerializationPtr & prev) const override; diff --git a/src/DataTypes/Serializations/SerializationSparse.h b/src/DataTypes/Serializations/SerializationSparse.h index 51d9df2cb5d..54ab4853360 100644 --- a/src/DataTypes/Serializations/SerializationSparse.h +++ b/src/DataTypes/Serializations/SerializationSparse.h @@ -23,7 +23,7 @@ namespace DB class SerializationSparse final : public ISerialization { public: - SerializationSparse(const SerializationPtr & nested_); + explicit SerializationSparse(const SerializationPtr & nested_); Kind getKind() const override { return Kind::SPARSE; } diff --git a/src/DataTypes/Serializations/SerializationWrapper.h b/src/DataTypes/Serializations/SerializationWrapper.h index 4cdcffc21a8..43fc7e9914a 100644 --- a/src/DataTypes/Serializations/SerializationWrapper.h +++ b/src/DataTypes/Serializations/SerializationWrapper.h @@ -14,7 +14,7 @@ protected: SerializationPtr nested_serialization; public: - SerializationWrapper(const SerializationPtr & nested_serialization_) : nested_serialization(nested_serialization_) {} + explicit SerializationWrapper(const SerializationPtr & nested_serialization_) : nested_serialization(nested_serialization_) {} const SerializationPtr & getNested() const { return nested_serialization; } diff --git a/src/DataTypes/convertMySQLDataType.cpp b/src/DataTypes/convertMySQLDataType.cpp index ee897de9597..7e2f2e7c6b9 100644 --- a/src/DataTypes/convertMySQLDataType.cpp +++ b/src/DataTypes/convertMySQLDataType.cpp @@ -7,6 +7,7 @@ #include #include #include "DataTypeDate.h" +#include "DataTypeDate32.h" #include "DataTypeDateTime.h" #include "DataTypeDateTime64.h" #include "DataTypeEnum.h" @@ -73,7 +74,14 @@ DataTypePtr convertMySQLDataType(MultiEnum type_support, else if (type_name == "double") res = std::make_shared(); else if (type_name == "date") - res = std::make_shared(); + { + if (type_support.isSet(MySQLDataTypesSupport::DATE2DATE32)) + res = std::make_shared(); + else if (type_support.isSet(MySQLDataTypesSupport::DATE2STRING)) + res = std::make_shared(); + else + res = std::make_shared(); + } else if (type_name == "binary") res = std::make_shared(length); else if (type_name == "datetime" || type_name == "timestamp") diff --git a/src/Databases/DatabaseReplicatedWorker.h b/src/Databases/DatabaseReplicatedWorker.h index 773612e403c..6b957e567ff 100644 --- a/src/Databases/DatabaseReplicatedWorker.h +++ b/src/Databases/DatabaseReplicatedWorker.h @@ -30,7 +30,7 @@ public: void shutdown() override; static String enqueueQueryImpl(const ZooKeeperPtr & zookeeper, DDLLogEntry & entry, - DatabaseReplicated * const database, bool committed = false); + DatabaseReplicated * const database, bool committed = false); /// NOLINT private: bool initializeMainThread() override; diff --git a/src/Databases/IDatabase.h b/src/Databases/IDatabase.h index 9ad33bd228f..f95653feb20 100644 --- a/src/Databases/IDatabase.h +++ b/src/Databases/IDatabase.h @@ -51,8 +51,8 @@ public: /// - it maintains a list of tables but tables are loaded lazily). virtual const StoragePtr & table() const = 0; - IDatabaseTablesIterator(const String & database_name_) : database_name(database_name_) { } - IDatabaseTablesIterator(String && database_name_) : database_name(std::move(database_name_)) { } + explicit IDatabaseTablesIterator(const String & database_name_) : database_name(database_name_) { } + explicit IDatabaseTablesIterator(String && database_name_) : database_name(std::move(database_name_)) { } virtual ~IDatabaseTablesIterator() = default; @@ -61,7 +61,7 @@ public: const String & databaseName() const { assert(!database_name.empty()); return database_name; } protected: - const String database_name; + String database_name; }; /// Copies list of tables and iterates through such snapshot. @@ -72,7 +72,7 @@ private: Tables::iterator it; protected: - DatabaseTablesSnapshotIterator(DatabaseTablesSnapshotIterator && other) + DatabaseTablesSnapshotIterator(DatabaseTablesSnapshotIterator && other) noexcept : IDatabaseTablesIterator(std::move(other.database_name)) { size_t idx = std::distance(other.tables.begin(), other.it); @@ -118,7 +118,7 @@ class IDatabase : public std::enable_shared_from_this { public: IDatabase() = delete; - IDatabase(String database_name_) : database_name(std::move(database_name_)) {} + explicit IDatabase(String database_name_) : database_name(std::move(database_name_)) {} /// Get name of database engine. virtual String getEngineName() const = 0; @@ -129,7 +129,7 @@ public: /// Load a set of existing tables. /// You can call only once, right after the object is created. - virtual void loadStoredObjects( + virtual void loadStoredObjects( /// NOLINT ContextMutablePtr /*context*/, bool /*force_restore*/, bool /*force_attach*/ = false, @@ -175,7 +175,7 @@ public: /// Get an iterator that allows you to pass through all the tables. /// It is possible to have "hidden" tables that are not visible when passing through, but are visible if you get them by name using the functions above. - virtual DatabaseTablesIteratorPtr getTablesIterator(ContextPtr context, const FilterByNameFunction & filter_by_table_name = {}) const = 0; + virtual DatabaseTablesIteratorPtr getTablesIterator(ContextPtr context, const FilterByNameFunction & filter_by_table_name = {}) const = 0; /// NOLINT /// Is the database empty. virtual bool empty() const = 0; @@ -191,7 +191,7 @@ public: } /// Delete the table from the database, drop table and delete the metadata. - virtual void dropTable( + virtual void dropTable( /// NOLINT ContextPtr /*context*/, const String & /*name*/, [[maybe_unused]] bool no_delay = false) @@ -202,7 +202,7 @@ public: /// Add a table to the database, but do not add it to the metadata. The database may not support this method. /// /// Note: ATTACH TABLE statement actually uses createTable method. - virtual void attachTable(ContextPtr /* context */, const String & /*name*/, const StoragePtr & /*table*/, [[maybe_unused]] const String & relative_table_path = {}) + virtual void attachTable(ContextPtr /* context */, const String & /*name*/, const StoragePtr & /*table*/, [[maybe_unused]] const String & relative_table_path = {}) /// NOLINT { throw Exception("There is no ATTACH TABLE query for Database" + getEngineName(), ErrorCodes::NOT_IMPLEMENTED); } diff --git a/src/Databases/MySQL/MaterializedMySQLSyncThread.h b/src/Databases/MySQL/MaterializedMySQLSyncThread.h index c7781595a85..163a3732fb9 100644 --- a/src/Databases/MySQL/MaterializedMySQLSyncThread.h +++ b/src/Databases/MySQL/MaterializedMySQLSyncThread.h @@ -67,12 +67,12 @@ private: // USE MySQL ERROR CODE: // https://dev.mysql.com/doc/mysql-errors/5.7/en/server-error-reference.html - const int ER_ACCESS_DENIED_ERROR = 1045; - const int ER_DBACCESS_DENIED_ERROR = 1044; - const int ER_BAD_DB_ERROR = 1049; + const int ER_ACCESS_DENIED_ERROR = 1045; /// NOLINT + const int ER_DBACCESS_DENIED_ERROR = 1044; /// NOLINT + const int ER_BAD_DB_ERROR = 1049; /// NOLINT // https://dev.mysql.com/doc/mysql-errors/8.0/en/client-error-reference.html - const int CR_SERVER_LOST = 2013; + const int CR_SERVER_LOST = 2013; /// NOLINT struct Buffers { @@ -88,7 +88,7 @@ private: using BufferAndSortingColumnsPtr = std::shared_ptr; std::unordered_map data; - Buffers(const String & database_) : database(database_) {} + explicit Buffers(const String & database_) : database(database_) {} void commit(ContextPtr context); diff --git a/src/Databases/PostgreSQL/DatabaseMaterializedPostgreSQL.cpp b/src/Databases/PostgreSQL/DatabaseMaterializedPostgreSQL.cpp index dba8bf64798..dd125294615 100644 --- a/src/Databases/PostgreSQL/DatabaseMaterializedPostgreSQL.cpp +++ b/src/Databases/PostgreSQL/DatabaseMaterializedPostgreSQL.cpp @@ -34,6 +34,7 @@ namespace ErrorCodes extern const int QUERY_NOT_ALLOWED; extern const int UNKNOWN_TABLE; extern const int BAD_ARGUMENTS; + extern const int NOT_IMPLEMENTED; } DatabaseMaterializedPostgreSQL::DatabaseMaterializedPostgreSQL( @@ -309,8 +310,12 @@ void DatabaseMaterializedPostgreSQL::attachTable(ContextPtr context_, const Stri } } +StoragePtr DatabaseMaterializedPostgreSQL::detachTable(ContextPtr, const String &) +{ + throw Exception(ErrorCodes::NOT_IMPLEMENTED, "DETACH TABLE not allowed, use DETACH PERMANENTLY"); +} -StoragePtr DatabaseMaterializedPostgreSQL::detachTable(ContextPtr context_, const String & table_name) +void DatabaseMaterializedPostgreSQL::detachTablePermanently(ContextPtr, const String & table_name) { /// If there is query context then we need to detach materialized storage. /// If there is no query context then we need to detach internal storage from atomic database. @@ -360,11 +365,6 @@ StoragePtr DatabaseMaterializedPostgreSQL::detachTable(ContextPtr context_, cons } materialized_tables.erase(table_name); - return nullptr; - } - else - { - return DatabaseAtomic::detachTable(context_, table_name); } } diff --git a/src/Databases/PostgreSQL/DatabaseMaterializedPostgreSQL.h b/src/Databases/PostgreSQL/DatabaseMaterializedPostgreSQL.h index 40ff0d9262d..08420f4ba5e 100644 --- a/src/Databases/PostgreSQL/DatabaseMaterializedPostgreSQL.h +++ b/src/Databases/PostgreSQL/DatabaseMaterializedPostgreSQL.h @@ -51,6 +51,8 @@ public: void attachTable(ContextPtr context, const String & table_name, const StoragePtr & table, const String & relative_table_path) override; + void detachTablePermanently(ContextPtr context, const String & table_name) override; + StoragePtr detachTable(ContextPtr context, const String & table_name) override; void dropTable(ContextPtr local_context, const String & name, bool no_delay) override; diff --git a/src/Databases/PostgreSQL/DatabasePostgreSQL.cpp b/src/Databases/PostgreSQL/DatabasePostgreSQL.cpp index d43bde0b886..ce1ed98b977 100644 --- a/src/Databases/PostgreSQL/DatabasePostgreSQL.cpp +++ b/src/Databases/PostgreSQL/DatabasePostgreSQL.cpp @@ -174,7 +174,7 @@ StoragePtr DatabasePostgreSQL::tryGetTable(const String & table_name, ContextPtr } -StoragePtr DatabasePostgreSQL::fetchTable(const String & table_name, ContextPtr, const bool table_checked) const +StoragePtr DatabasePostgreSQL::fetchTable(const String & table_name, ContextPtr, bool table_checked) const { if (!cache_tables || !cached_tables.count(table_name)) { @@ -194,7 +194,7 @@ StoragePtr DatabasePostgreSQL::fetchTable(const String & table_name, ContextPtr, if (cache_tables) cached_tables[table_name] = storage; - return std::move(storage); + return storage; } if (table_checked || checkPostgresTable(table_name)) @@ -414,7 +414,7 @@ ASTPtr DatabasePostgreSQL::getCreateTableQueryImpl(const String & table_name, Co assert(storage_engine_arguments->children.size() >= 2); storage_engine_arguments->children.insert(storage_engine_arguments->children.begin() + 2, std::make_shared(table_id.table_name)); - return std::move(create_table_query); + return create_table_query; } diff --git a/src/Databases/PostgreSQL/DatabasePostgreSQL.h b/src/Databases/PostgreSQL/DatabasePostgreSQL.h index d41dbff1f54..3397dcc8076 100644 --- a/src/Databases/PostgreSQL/DatabasePostgreSQL.h +++ b/src/Databases/PostgreSQL/DatabasePostgreSQL.h @@ -81,7 +81,7 @@ private: bool checkPostgresTable(const String & table_name) const; - StoragePtr fetchTable(const String & table_name, ContextPtr context, const bool table_checked) const; + StoragePtr fetchTable(const String & table_name, ContextPtr context, bool table_checked) const; void removeOutdatedTables(); diff --git a/src/Dictionaries/CacheDictionaryUpdateQueue.h b/src/Dictionaries/CacheDictionaryUpdateQueue.h index 7725ce7588f..d6a195ca7b8 100644 --- a/src/Dictionaries/CacheDictionaryUpdateQueue.h +++ b/src/Dictionaries/CacheDictionaryUpdateQueue.h @@ -75,7 +75,7 @@ private: friend class CacheDictionaryUpdateQueue; std::atomic is_done{false}; - std::exception_ptr current_exception{nullptr}; + std::exception_ptr current_exception{nullptr}; /// NOLINT /// While UpdateUnit is alive, it is accounted in update_queue size. CurrentMetrics::Increment alive_batch{CurrentMetrics::CacheDictionaryUpdateQueueBatches}; diff --git a/src/Dictionaries/CassandraHelpers.h b/src/Dictionaries/CassandraHelpers.h index 30111e11686..3b90d46acdf 100644 --- a/src/Dictionaries/CassandraHelpers.h +++ b/src/Dictionaries/CassandraHelpers.h @@ -23,8 +23,8 @@ class ObjectHolder CassT * ptr = nullptr; public: template - ObjectHolder(Args &&... args) : ptr(Ctor(std::forward(args)...)) {} - ObjectHolder(CassT * ptr_) : ptr(ptr_) {} + ObjectHolder(Args &&... args) : ptr(Ctor(std::forward(args)...)) {} /// NOLINT + ObjectHolder(CassT * ptr_) : ptr(ptr_) {} /// NOLINT ObjectHolder(const ObjectHolder &) = delete; ObjectHolder & operator = (const ObjectHolder &) = delete; @@ -46,8 +46,8 @@ public: } /// For implicit conversion when passing object to driver library functions - operator CassT * () { return ptr; } - operator const CassT * () const { return ptr; } + operator CassT * () { return ptr; } /// NOLINT + operator const CassT * () const { return ptr; } /// NOLINT }; } diff --git a/src/Dictionaries/DictionaryHelpers.h b/src/Dictionaries/DictionaryHelpers.h index f2d7febfa8e..80b15eb2569 100644 --- a/src/Dictionaries/DictionaryHelpers.h +++ b/src/Dictionaries/DictionaryHelpers.h @@ -187,7 +187,7 @@ private: DataTypes dictionary_attributes_types; }; -static inline void insertDefaultValuesIntoColumns( +static inline void insertDefaultValuesIntoColumns( /// NOLINT MutableColumns & columns, const DictionaryStorageFetchRequest & fetch_request, size_t row_index) @@ -206,7 +206,7 @@ static inline void insertDefaultValuesIntoColumns( /// Deserialize column value and insert it in columns. /// Skip unnecessary columns that were not requested from deserialization. -static inline void deserializeAndInsertIntoColumns( +static inline void deserializeAndInsertIntoColumns( /// NOLINT MutableColumns & columns, const DictionaryStorageFetchRequest & fetch_request, const char * place_for_serialized_columns) diff --git a/src/Dictionaries/Embedded/GeodataProviders/HierarchiesProvider.h b/src/Dictionaries/Embedded/GeodataProviders/HierarchiesProvider.h index 198f13e0f32..c2e36f59e1e 100644 --- a/src/Dictionaries/Embedded/GeodataProviders/HierarchiesProvider.h +++ b/src/Dictionaries/Embedded/GeodataProviders/HierarchiesProvider.h @@ -14,7 +14,7 @@ private: FileUpdatesTracker updates_tracker; public: - RegionsHierarchyDataSource(const std::string & path_) : path(path_), updates_tracker(path_) {} + explicit RegionsHierarchyDataSource(const std::string & path_) : path(path_), updates_tracker(path_) {} bool isModified() const override; @@ -40,7 +40,7 @@ public: * For example, if /opt/geo/regions_hierarchy.txt is specified, * then the /opt/geo/regions_hierarchy_ua.txt file will also be loaded, if any, it will be accessible by the `ua` key. */ - RegionsHierarchiesDataProvider(const std::string & path_); + explicit RegionsHierarchiesDataProvider(const std::string & path_); std::vector listCustomHierarchies() const override; diff --git a/src/Dictionaries/Embedded/GeodataProviders/HierarchyFormatReader.h b/src/Dictionaries/Embedded/GeodataProviders/HierarchyFormatReader.h index 85dd8ce58b7..64f393ada62 100644 --- a/src/Dictionaries/Embedded/GeodataProviders/HierarchyFormatReader.h +++ b/src/Dictionaries/Embedded/GeodataProviders/HierarchyFormatReader.h @@ -11,7 +11,7 @@ private: DB::ReadBufferPtr input; public: - RegionsHierarchyFormatReader(DB::ReadBufferPtr input_) : input(std::move(input_)) {} + explicit RegionsHierarchyFormatReader(DB::ReadBufferPtr input_) : input(std::move(input_)) {} bool readNext(RegionEntry & entry) override; }; diff --git a/src/Dictionaries/Embedded/GeodataProviders/IHierarchiesProvider.h b/src/Dictionaries/Embedded/GeodataProviders/IHierarchiesProvider.h index 0606896c951..f7d51135440 100644 --- a/src/Dictionaries/Embedded/GeodataProviders/IHierarchiesProvider.h +++ b/src/Dictionaries/Embedded/GeodataProviders/IHierarchiesProvider.h @@ -27,7 +27,7 @@ public: virtual IRegionsHierarchyReaderPtr createReader() = 0; - virtual ~IRegionsHierarchyDataSource() {} + virtual ~IRegionsHierarchyDataSource() = default; }; using IRegionsHierarchyDataSourcePtr = std::shared_ptr; @@ -42,7 +42,7 @@ public: virtual IRegionsHierarchyDataSourcePtr getDefaultHierarchySource() const = 0; virtual IRegionsHierarchyDataSourcePtr getHierarchySource(const std::string & name) const = 0; - virtual ~IRegionsHierarchiesDataProvider() {} + virtual ~IRegionsHierarchiesDataProvider() = default; }; using IRegionsHierarchiesDataProviderPtr = std::shared_ptr; diff --git a/src/Dictionaries/Embedded/GeodataProviders/INamesProvider.h b/src/Dictionaries/Embedded/GeodataProviders/INamesProvider.h index 26de5d9116b..679c14d546b 100644 --- a/src/Dictionaries/Embedded/GeodataProviders/INamesProvider.h +++ b/src/Dictionaries/Embedded/GeodataProviders/INamesProvider.h @@ -10,7 +10,7 @@ class ILanguageRegionsNamesReader public: virtual bool readNext(RegionNameEntry & entry) = 0; - virtual ~ILanguageRegionsNamesReader() {} + virtual ~ILanguageRegionsNamesReader() = default; }; using ILanguageRegionsNamesReaderPtr = std::unique_ptr; @@ -32,7 +32,7 @@ public: virtual std::string getSourceName() const = 0; - virtual ~ILanguageRegionsNamesDataSource() {} + virtual ~ILanguageRegionsNamesDataSource() = default; }; using ILanguageRegionsNamesDataSourcePtr = std::unique_ptr; @@ -45,7 +45,7 @@ public: /// Returns nullptr if the language data does not exist. virtual ILanguageRegionsNamesDataSourcePtr getLanguageRegionsNamesSource(const std::string & language) const = 0; - virtual ~IRegionsNamesDataProvider() {} + virtual ~IRegionsNamesDataProvider() = default; }; using IRegionsNamesDataProviderPtr = std::unique_ptr; diff --git a/src/Dictionaries/Embedded/GeodataProviders/NamesFormatReader.h b/src/Dictionaries/Embedded/GeodataProviders/NamesFormatReader.h index 573569ab115..49d324d434e 100644 --- a/src/Dictionaries/Embedded/GeodataProviders/NamesFormatReader.h +++ b/src/Dictionaries/Embedded/GeodataProviders/NamesFormatReader.h @@ -11,7 +11,7 @@ private: DB::ReadBufferPtr input; public: - LanguageRegionsNamesFormatReader(DB::ReadBufferPtr input_) : input(std::move(input_)) {} + explicit LanguageRegionsNamesFormatReader(DB::ReadBufferPtr input_) : input(std::move(input_)) {} bool readNext(RegionNameEntry & entry) override; }; diff --git a/src/Dictionaries/Embedded/GeodataProviders/NamesProvider.h b/src/Dictionaries/Embedded/GeodataProviders/NamesProvider.h index c380fcb7d1d..2d49cceab86 100644 --- a/src/Dictionaries/Embedded/GeodataProviders/NamesProvider.h +++ b/src/Dictionaries/Embedded/GeodataProviders/NamesProvider.h @@ -39,7 +39,7 @@ private: std::string directory; public: - RegionsNamesDataProvider(const std::string & directory_); + explicit RegionsNamesDataProvider(const std::string & directory_); ILanguageRegionsNamesDataSourcePtr getLanguageRegionsNamesSource(const std::string & language) const override; diff --git a/src/Dictionaries/Embedded/RegionsHierarchies.h b/src/Dictionaries/Embedded/RegionsHierarchies.h index 67cd7c2a658..925b7b490ff 100644 --- a/src/Dictionaries/Embedded/RegionsHierarchies.h +++ b/src/Dictionaries/Embedded/RegionsHierarchies.h @@ -8,7 +8,7 @@ /** Contains several hierarchies of regions. * Used to support several different perspectives on the ownership of regions by countries. - * First of all, for the Crimea (Russian and Ukrainian points of view). + * First of all, for the Falklands/Malvinas (UK and Argentina points of view). */ class RegionsHierarchies { @@ -17,7 +17,7 @@ private: Container data; public: - RegionsHierarchies(IRegionsHierarchiesDataProviderPtr data_provider); + explicit RegionsHierarchies(IRegionsHierarchiesDataProviderPtr data_provider); /** Reloads, if necessary, all hierarchies of regions. */ @@ -27,7 +27,6 @@ public: elem.second.reload(); } - const RegionsHierarchy & get(const std::string & key) const { auto it = data.find(key); diff --git a/src/Dictionaries/Embedded/RegionsHierarchy.h b/src/Dictionaries/Embedded/RegionsHierarchy.h index 45d6c5246ca..508bca0d1e1 100644 --- a/src/Dictionaries/Embedded/RegionsHierarchy.h +++ b/src/Dictionaries/Embedded/RegionsHierarchy.h @@ -49,7 +49,7 @@ private: IRegionsHierarchyDataSourcePtr data_source; public: - RegionsHierarchy(IRegionsHierarchyDataSourcePtr data_source_); + explicit RegionsHierarchy(IRegionsHierarchyDataSourcePtr data_source_); /// Reloads, if necessary, the hierarchy of regions. Not threadsafe. void reload(); diff --git a/src/Dictionaries/Embedded/RegionsNames.h b/src/Dictionaries/Embedded/RegionsNames.h index ff60c274401..ec06a0b1a33 100644 --- a/src/Dictionaries/Embedded/RegionsNames.h +++ b/src/Dictionaries/Embedded/RegionsNames.h @@ -40,7 +40,7 @@ class RegionsNames public: enum class Language : size_t { - #define M(NAME, FALLBACK, NUM) NAME = NUM, + #define M(NAME, FALLBACK, NUM) NAME = (NUM), FOR_EACH_LANGUAGE(M) #undef M }; @@ -78,7 +78,7 @@ private: static std::string dumpSupportedLanguagesNames(); public: - RegionsNames(IRegionsNamesDataProviderPtr data_provider); + explicit RegionsNames(IRegionsNamesDataProviderPtr data_provider); StringRef getRegionName(RegionID region_id, Language language) const { @@ -104,7 +104,7 @@ public: #define M(NAME, FALLBACK, NUM) \ if (0 == language.compare(#NAME)) \ return Language::NAME; - FOR_EACH_LANGUAGE(M) + FOR_EACH_LANGUAGE(M) /// NOLINT #undef M throw Poco::Exception("Unsupported language for region name. Supported languages are: " + dumpSupportedLanguagesNames() + "."); } diff --git a/src/Dictionaries/FlatDictionary.cpp b/src/Dictionaries/FlatDictionary.cpp index 26144821a0e..cb2419633bf 100644 --- a/src/Dictionaries/FlatDictionary.cpp +++ b/src/Dictionaries/FlatDictionary.cpp @@ -32,13 +32,11 @@ FlatDictionary::FlatDictionary( const StorageID & dict_id_, const DictionaryStructure & dict_struct_, DictionarySourcePtr source_ptr_, - const DictionaryLifetime dict_lifetime_, Configuration configuration_, BlockPtr update_field_loaded_block_) : IDictionary(dict_id_) , dict_struct(dict_struct_) , source_ptr{std::move(source_ptr_)} - , dict_lifetime(dict_lifetime_) , configuration(configuration_) , loaded_keys(configuration.initial_array_size, false) , update_field_loaded_block(std::move(update_field_loaded_block_)) @@ -604,18 +602,19 @@ void registerDictionaryFlat(DictionaryFactory & factory) static constexpr size_t default_max_array_size = 500000; String dictionary_layout_prefix = config_prefix + ".layout" + ".flat"; + const DictionaryLifetime dict_lifetime{config, config_prefix + ".lifetime"}; FlatDictionary::Configuration configuration { .initial_array_size = config.getUInt64(dictionary_layout_prefix + ".initial_array_size", default_initial_array_size), .max_array_size = config.getUInt64(dictionary_layout_prefix + ".max_array_size", default_max_array_size), - .require_nonempty = config.getBool(config_prefix + ".require_nonempty", false) + .require_nonempty = config.getBool(config_prefix + ".require_nonempty", false), + .dict_lifetime = dict_lifetime }; const auto dict_id = StorageID::fromDictionaryConfig(config, config_prefix); - const DictionaryLifetime dict_lifetime{config, config_prefix + ".lifetime"}; - return std::make_unique(dict_id, dict_struct, std::move(source_ptr), dict_lifetime, std::move(configuration)); + return std::make_unique(dict_id, dict_struct, std::move(source_ptr), std::move(configuration)); }; factory.registerLayout("flat", create_layout, false); diff --git a/src/Dictionaries/FlatDictionary.h b/src/Dictionaries/FlatDictionary.h index 2578fef3ecb..f342c38802d 100644 --- a/src/Dictionaries/FlatDictionary.h +++ b/src/Dictionaries/FlatDictionary.h @@ -26,13 +26,13 @@ public: size_t initial_array_size; size_t max_array_size; bool require_nonempty; + DictionaryLifetime dict_lifetime; }; FlatDictionary( const StorageID & dict_id_, const DictionaryStructure & dict_struct_, DictionarySourcePtr source_ptr_, - const DictionaryLifetime dict_lifetime_, Configuration configuration_, BlockPtr update_field_loaded_block_ = nullptr); @@ -58,12 +58,12 @@ public: std::shared_ptr clone() const override { - return std::make_shared(getDictionaryID(), dict_struct, source_ptr->clone(), dict_lifetime, configuration, update_field_loaded_block); + return std::make_shared(getDictionaryID(), dict_struct, source_ptr->clone(), configuration, update_field_loaded_block); } DictionarySourcePtr getSource() const override { return source_ptr; } - const DictionaryLifetime & getLifetime() const override { return dict_lifetime; } + const DictionaryLifetime & getLifetime() const override { return configuration.dict_lifetime; } const DictionaryStructure & getStructure() const override { return dict_struct; } @@ -159,7 +159,6 @@ private: const DictionaryStructure dict_struct; const DictionarySourcePtr source_ptr; - const DictionaryLifetime dict_lifetime; const Configuration configuration; std::vector attributes; diff --git a/src/Dictionaries/ICacheDictionaryStorage.h b/src/Dictionaries/ICacheDictionaryStorage.h index b094d76a9a7..a4990528a4e 100644 --- a/src/Dictionaries/ICacheDictionaryStorage.h +++ b/src/Dictionaries/ICacheDictionaryStorage.h @@ -22,7 +22,7 @@ struct KeyState , fetched_column_index(fetched_column_index_) {} - KeyState(State state_) + KeyState(State state_) /// NOLINT : state(state_) {} diff --git a/src/Dictionaries/IDictionary.h b/src/Dictionaries/IDictionary.h index 042153f0971..c18dbcfbea7 100644 --- a/src/Dictionaries/IDictionary.h +++ b/src/Dictionaries/IDictionary.h @@ -150,7 +150,7 @@ public: auto & key_column_to_cast = key_columns[key_attribute_type_index]; ColumnWithTypeAndName column_to_cast = {key_column_to_cast, key_type, ""}; - auto casted_column = castColumnAccurate(std::move(column_to_cast), key_attribute_type); + auto casted_column = castColumnAccurate(column_to_cast, key_attribute_type); key_column_to_cast = std::move(casted_column); key_type = key_attribute_type; } diff --git a/src/Dictionaries/IPAddressDictionary.h b/src/Dictionaries/IPAddressDictionary.h index 8dddc988caa..894af5ceb71 100644 --- a/src/Dictionaries/IPAddressDictionary.h +++ b/src/Dictionaries/IPAddressDictionary.h @@ -26,7 +26,7 @@ public: const StorageID & dict_id_, const DictionaryStructure & dict_struct_, DictionarySourcePtr source_ptr_, - const DictionaryLifetime dict_lifetime_, + const DictionaryLifetime dict_lifetime_, /// NOLINT bool require_nonempty_); std::string getKeyDescription() const { return key_description; } @@ -160,7 +160,7 @@ private: template static void createAttributeImpl(Attribute & attribute, const Field & null_value); - static Attribute createAttributeWithType(const AttributeUnderlyingType type, const Field & null_value); + static Attribute createAttributeWithType(const AttributeUnderlyingType type, const Field & null_value); /// NOLINT template void getItemsByTwoKeyColumnsImpl( @@ -177,7 +177,7 @@ private: DefaultValueExtractor & default_value_extractor) const; template - void setAttributeValueImpl(Attribute & attribute, const T value); + void setAttributeValueImpl(Attribute & attribute, const T value); /// NOLINT void setAttributeValue(Attribute & attribute, const Field & value); diff --git a/src/Dictionaries/PolygonDictionaryUtils.h b/src/Dictionaries/PolygonDictionaryUtils.h index 0aca7cd8af0..9d6d6ae0501 100644 --- a/src/Dictionaries/PolygonDictionaryUtils.h +++ b/src/Dictionaries/PolygonDictionaryUtils.h @@ -38,7 +38,7 @@ public: SlabsPolygonIndex() = default; /** Builds an index by splitting all edges with all points x coordinates. */ - SlabsPolygonIndex(const std::vector & polygons); + explicit SlabsPolygonIndex(const std::vector & polygons); /** Finds polygon id the same way as IPolygonIndex. */ bool find(const Point & point, size_t & id) const; @@ -179,7 +179,7 @@ class GridRoot : public ICell { public: GridRoot(size_t min_intersections_, size_t max_depth_, const std::vector & polygons_): - kMinIntersections(min_intersections_), kMaxDepth(max_depth_), polygons(polygons_) + k_min_intersections(min_intersections_), k_max_depth(max_depth_), polygons(polygons_) { setBoundingBox(); std::vector order(polygons.size()); @@ -209,8 +209,8 @@ private: std::unique_ptr> root = nullptr; Coord min_x = 0, min_y = 0; Coord max_x = 0, max_y = 0; - const size_t kMinIntersections; - const size_t kMaxDepth; + const size_t k_min_intersections; + const size_t k_max_depth; const std::vector & polygons; @@ -236,7 +236,7 @@ private: } #endif size_t intersections = possible_ids.size() - covered; - if (intersections <= kMinIntersections || depth++ == kMaxDepth) + if (intersections <= k_min_intersections || depth++ == k_max_depth) return std::make_unique(possible_ids, polygons, current_box, covered); auto x_shift = (current_max_x - current_min_x) / DividedCell::kSplit; auto y_shift = (current_max_y - current_min_y) / DividedCell::kSplit; diff --git a/src/Disks/AzureBlobStorage/DiskAzureBlobStorage.cpp b/src/Disks/AzureBlobStorage/DiskAzureBlobStorage.cpp index d3777f8ca00..fb07d8c356b 100644 --- a/src/Disks/AzureBlobStorage/DiskAzureBlobStorage.cpp +++ b/src/Disks/AzureBlobStorage/DiskAzureBlobStorage.cpp @@ -53,7 +53,7 @@ DiskAzureBlobStorage::DiskAzureBlobStorage( std::shared_ptr blob_container_client_, SettingsPtr settings_, GetDiskSettings settings_getter_) : - IDiskRemote(name_, "", metadata_disk_, "DiskAzureBlobStorage", settings_->thread_pool_size), + IDiskRemote(name_, "", metadata_disk_, nullptr, "DiskAzureBlobStorage", settings_->thread_pool_size), blob_container_client(blob_container_client_), current_settings(std::move(settings_)), settings_getter(settings_getter_) {} @@ -70,13 +70,11 @@ std::unique_ptr DiskAzureBlobStorage::readFile( LOG_TEST(log, "Read from file by path: {}", backQuote(metadata_disk->getPath() + path)); - bool threadpool_read = read_settings.remote_fs_method == RemoteFSReadMethod::threadpool; - auto reader_impl = std::make_unique( path, blob_container_client, metadata, settings->max_single_read_retries, - settings->max_single_download_retries, read_settings, threadpool_read); + settings->max_single_download_retries, read_settings); - if (threadpool_read) + if (read_settings.remote_fs_method == RemoteFSReadMethod::threadpool) { auto reader = getThreadPoolReader(); return std::make_unique(reader, read_settings, std::move(reader_impl)); diff --git a/src/Disks/DiskCacheWrapper.cpp b/src/Disks/DiskCacheWrapper.cpp index 7cacab98af5..3519b1212a4 100644 --- a/src/Disks/DiskCacheWrapper.cpp +++ b/src/Disks/DiskCacheWrapper.cpp @@ -144,11 +144,13 @@ DiskCacheWrapper::readFile( } } + auto current_read_settings = settings; /// Do not use RemoteFSReadMethod::threadpool for index and mark files. /// Here it does not make sense since the files are small. /// Note: enabling `threadpool` read requires to call setReadUntilEnd(). - auto current_read_settings = settings; current_read_settings.remote_fs_method = RemoteFSReadMethod::read; + /// Disable data cache. + current_read_settings.remote_fs_enable_cache = false; if (metadata->status == DOWNLOADING) { diff --git a/src/Disks/DiskLocal.cpp b/src/Disks/DiskLocal.cpp index e49e9cf6726..44fdbb77323 100644 --- a/src/Disks/DiskLocal.cpp +++ b/src/Disks/DiskLocal.cpp @@ -7,7 +7,7 @@ #include #include #include -#include +#include #include #include diff --git a/src/Disks/DiskMemory.h b/src/Disks/DiskMemory.h index eef7b78502d..fe108f53c68 100644 --- a/src/Disks/DiskMemory.h +++ b/src/Disks/DiskMemory.h @@ -22,7 +22,7 @@ class WriteBufferFromFileBase; class DiskMemory : public IDisk { public: - DiskMemory(const String & name_) : name(name_), disk_path("memory://" + name_ + '/') {} + explicit DiskMemory(const String & name_) : name(name_), disk_path("memory://" + name_ + '/') {} const String & getName() const override { return name; } @@ -97,7 +97,6 @@ private: void createDirectoriesImpl(const String & path); void replaceFileImpl(const String & from_path, const String & to_path); -private: friend class WriteIndirectBuffer; enum class FileType @@ -112,7 +111,7 @@ private: String data; FileData(FileType type_, String data_) : type(type_), data(std::move(data_)) {} - explicit FileData(FileType type_) : type(type_), data("") {} + explicit FileData(FileType type_) : type(type_) {} }; using Files = std::unordered_map; /// file path -> file data diff --git a/src/Disks/DiskRestartProxy.cpp b/src/Disks/DiskRestartProxy.cpp index beeb76bd91b..43011a4cf72 100644 --- a/src/Disks/DiskRestartProxy.cpp +++ b/src/Disks/DiskRestartProxy.cpp @@ -41,6 +41,8 @@ public: swap(*impl); } + String getInfoForLog() override { return impl->getInfoForLog(); } + private: ReadLock lock; }; diff --git a/src/Disks/DiskSelector.h b/src/Disks/DiskSelector.h index 0cd1267c6ef..a2fce4b14d1 100644 --- a/src/Disks/DiskSelector.h +++ b/src/Disks/DiskSelector.h @@ -19,7 +19,7 @@ class DiskSelector { public: DiskSelector(const Poco::Util::AbstractConfiguration & config, const String & config_prefix, ContextPtr context); - DiskSelector(const DiskSelector & from) : disks(from.disks) { } + DiskSelector(const DiskSelector & from) = default; DiskSelectorPtr updateFromConfig( const Poco::Util::AbstractConfiguration & config, diff --git a/src/Disks/DiskWebServer.cpp b/src/Disks/DiskWebServer.cpp index 7c94a5b98b1..f3039d9af2e 100644 --- a/src/Disks/DiskWebServer.cpp +++ b/src/Disks/DiskWebServer.cpp @@ -168,11 +168,9 @@ std::unique_ptr DiskWebServer::readFile(const String & p RemoteMetadata meta(path, remote_path); meta.remote_fs_objects.emplace_back(std::make_pair(remote_path, iter->second.size)); - bool threadpool_read = read_settings.remote_fs_method == RemoteFSReadMethod::threadpool; + auto web_impl = std::make_unique(path, url, meta, getContext(), read_settings); - auto web_impl = std::make_unique(path, url, meta, getContext(), threadpool_read, read_settings); - - if (threadpool_read) + if (read_settings.remote_fs_method == RemoteFSReadMethod::threadpool) { auto reader = IDiskRemote::getThreadPoolReader(); return std::make_unique(reader, read_settings, std::move(web_impl), min_bytes_for_seek); diff --git a/src/Disks/DiskWebServer.h b/src/Disks/DiskWebServer.h index bda8c8adaad..e2da0b2a1e1 100644 --- a/src/Disks/DiskWebServer.h +++ b/src/Disks/DiskWebServer.h @@ -38,7 +38,7 @@ namespace ErrorCodes * * To get files for upload run: * clickhouse static-files-disk-uploader --metadata-path --output-dir - * (--metadata-path can be found in query: `select data_paths from system.tables where name='';`) + * (--metadata-path can be found in query: `select data_paths from system.tables where name='';`) /// NOLINT * * When loading files by they must be loaded into /store/ path, but config must conrain only . * diff --git a/src/Disks/HDFS/DiskHDFS.cpp b/src/Disks/HDFS/DiskHDFS.cpp index 51691806089..7f60b219a4b 100644 --- a/src/Disks/HDFS/DiskHDFS.cpp +++ b/src/Disks/HDFS/DiskHDFS.cpp @@ -65,7 +65,7 @@ DiskHDFS::DiskHDFS( SettingsPtr settings_, DiskPtr metadata_disk_, const Poco::Util::AbstractConfiguration & config_) - : IDiskRemote(disk_name_, hdfs_root_path_, metadata_disk_, "DiskHDFS", settings_->thread_pool_size) + : IDiskRemote(disk_name_, hdfs_root_path_, metadata_disk_, nullptr, "DiskHDFS", settings_->thread_pool_size) , config(config_) , hdfs_builder(createHDFSBuilder(hdfs_root_path_, config)) , hdfs_fs(createHDFSFS(hdfs_builder.get())) @@ -82,7 +82,7 @@ std::unique_ptr DiskHDFS::readFile(const String & path, "Read from file by path: {}. Existing HDFS objects: {}", backQuote(metadata_disk->getPath() + path), metadata.remote_fs_objects.size()); - auto hdfs_impl = std::make_unique(path, config, remote_fs_root_path, metadata, read_settings.remote_fs_buffer_size); + auto hdfs_impl = std::make_unique(path, config, remote_fs_root_path, metadata, read_settings); auto buf = std::make_unique(std::move(hdfs_impl)); return std::make_unique(std::move(buf), settings->min_bytes_for_seek); } diff --git a/src/Disks/IDisk.h b/src/Disks/IDisk.h index 5068ac5dde9..d7d94cd03d7 100644 --- a/src/Disks/IDisk.h +++ b/src/Disks/IDisk.h @@ -158,14 +158,14 @@ public: virtual void listFiles(const String & path, std::vector & file_names) = 0; /// Open the file for read and return ReadBufferFromFileBase object. - virtual std::unique_ptr readFile( + virtual std::unique_ptr readFile( /// NOLINT const String & path, const ReadSettings & settings = ReadSettings{}, std::optional read_hint = {}, std::optional file_size = {}) const = 0; /// Open the file for write and return WriteBufferFromFileBase object. - virtual std::unique_ptr writeFile( + virtual std::unique_ptr writeFile( /// NOLINT const String & path, size_t buf_size = DBMS_DEFAULT_BUFFER_SIZE, WriteMode mode = WriteMode::Rewrite) = 0; @@ -354,7 +354,7 @@ public: virtual UInt64 getSize() const = 0; /// Get i-th disk where reservation take place. - virtual DiskPtr getDisk(size_t i = 0) const = 0; + virtual DiskPtr getDisk(size_t i = 0) const = 0; /// NOLINT /// Get all disks, used in reservation virtual Disks getDisks() const = 0; diff --git a/src/Disks/IDiskRemote.cpp b/src/Disks/IDiskRemote.cpp index 2a9aded039b..c7d9eb93a60 100644 --- a/src/Disks/IDiskRemote.cpp +++ b/src/Disks/IDiskRemote.cpp @@ -13,6 +13,7 @@ #include #include #include +#include namespace DB @@ -26,12 +27,12 @@ namespace ErrorCodes extern const int PATH_ACCESS_DENIED;; extern const int FILE_DOESNT_EXIST; extern const int BAD_FILE_TYPE; + extern const int MEMORY_LIMIT_EXCEEDED; } IDiskRemote::Metadata IDiskRemote::Metadata::readMetadata(const String & remote_fs_root_path_, DiskPtr metadata_disk_, const String & metadata_file_path_) { - Metadata result(remote_fs_root_path_, metadata_disk_, metadata_file_path_); result.load(); return result; @@ -139,6 +140,9 @@ void IDiskRemote::Metadata::load() if (e.code() == ErrorCodes::UNKNOWN_FORMAT) throw; + if (e.code() == ErrorCodes::MEMORY_LIMIT_EXCEEDED) + throw; + throw Exception("Failed to read metadata file", e, ErrorCodes::UNKNOWN_FORMAT); } } @@ -281,7 +285,16 @@ void IDiskRemote::removeMetadata(const String & path, RemoteFSPathKeeperPtr fs_p if (metadata.ref_count == 0) { for (const auto & [remote_fs_object_path, _] : metadata.remote_fs_objects) + { fs_paths_keeper->addPath(remote_fs_root_path + remote_fs_object_path); + + if (cache) + { + auto key = cache->hash(remote_fs_object_path); + cache->remove(key); + } + } + return false; } else /// In other case decrement number of references, save metadata and delete hardlink. @@ -377,6 +390,7 @@ IDiskRemote::IDiskRemote( const String & name_, const String & remote_fs_root_path_, DiskPtr metadata_disk_, + FileCachePtr cache_, const String & log_name_, size_t thread_pool_size) : IDisk(std::make_unique(log_name_, thread_pool_size)) @@ -384,6 +398,7 @@ IDiskRemote::IDiskRemote( , name(name_) , remote_fs_root_path(remote_fs_root_path_) , metadata_disk(metadata_disk_) + , cache(cache_) { } @@ -439,6 +454,7 @@ void IDiskRemote::removeSharedFile(const String & path, bool delete_metadata_onl { RemoteFSPathKeeperPtr fs_paths_keeper = createFSPathKeeper(); removeMetadata(path, fs_paths_keeper); + if (!delete_metadata_only) removeFromRemoteFS(fs_paths_keeper); } @@ -447,6 +463,7 @@ void IDiskRemote::removeSharedFile(const String & path, bool delete_metadata_onl void IDiskRemote::removeSharedFileIfExists(const String & path, bool delete_metadata_only) { RemoteFSPathKeeperPtr fs_paths_keeper = createFSPathKeeper(); + if (metadata_disk->exists(path)) { removeMetadata(path, fs_paths_keeper); @@ -473,6 +490,7 @@ void IDiskRemote::removeSharedRecursive(const String & path, bool delete_metadat { RemoteFSPathKeeperPtr fs_paths_keeper = createFSPathKeeper(); removeMetadataRecursive(path, fs_paths_keeper); + if (!delete_metadata_only) removeFromRemoteFS(fs_paths_keeper); } diff --git a/src/Disks/IDiskRemote.h b/src/Disks/IDiskRemote.h index bdb09804a6c..82e76b8f68d 100644 --- a/src/Disks/IDiskRemote.h +++ b/src/Disks/IDiskRemote.h @@ -3,6 +3,7 @@ #include #include +#include #include #include #include @@ -12,7 +13,6 @@ #include #include -namespace fs = std::filesystem; namespace CurrentMetrics { @@ -27,7 +27,7 @@ namespace DB class RemoteFSPathKeeper { public: - RemoteFSPathKeeper(size_t chunk_limit_) : chunk_limit(chunk_limit_) {} + explicit RemoteFSPathKeeper(size_t chunk_limit_) : chunk_limit(chunk_limit_) {} virtual ~RemoteFSPathKeeper() = default; @@ -55,6 +55,7 @@ public: const String & name_, const String & remote_fs_root_path_, DiskPtr metadata_disk_, + FileCachePtr cache_, const String & log_name_, size_t thread_pool_size); @@ -162,6 +163,7 @@ protected: const String remote_fs_root_path; DiskPtr metadata_disk; + FileCachePtr cache; private: void removeMetadata(const String & path, RemoteFSPathKeeperPtr fs_paths_keeper); diff --git a/src/Disks/IO/AsynchronousReadIndirectBufferFromRemoteFS.cpp b/src/Disks/IO/AsynchronousReadIndirectBufferFromRemoteFS.cpp index 9e3425e8986..e693a8e9ea8 100644 --- a/src/Disks/IO/AsynchronousReadIndirectBufferFromRemoteFS.cpp +++ b/src/Disks/IO/AsynchronousReadIndirectBufferFromRemoteFS.cpp @@ -48,6 +48,11 @@ AsynchronousReadIndirectBufferFromRemoteFS::AsynchronousReadIndirectBufferFromRe , prefetch_buffer(settings_.remote_fs_buffer_size) , min_bytes_for_seek(min_bytes_for_seek_) , must_read_until_position(settings_.must_read_until_position) +#ifndef NDEBUG + , log(&Poco::Logger::get("AsynchronousBufferFromRemoteFS")) +#else + , log(&Poco::Logger::get("AsyncBuffer(" + impl->getFileName() + ")")) +#endif { ProfileEvents::increment(ProfileEvents::RemoteFSBuffers); } @@ -59,6 +64,12 @@ String AsynchronousReadIndirectBufferFromRemoteFS::getFileName() const } +String AsynchronousReadIndirectBufferFromRemoteFS::getInfoForLog() +{ + return impl->getInfoForLog(); +} + + bool AsynchronousReadIndirectBufferFromRemoteFS::hasPendingDataToRead() { /** @@ -76,8 +87,8 @@ bool AsynchronousReadIndirectBufferFromRemoteFS::hasPendingDataToRead() return false; if (file_offset_of_buffer_end > *read_until_position) - throw Exception(ErrorCodes::LOGICAL_ERROR, "Read beyond last offset ({} > {})", - file_offset_of_buffer_end, *read_until_position); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Read beyond last offset ({} > {}, info: {})", + file_offset_of_buffer_end, *read_until_position, impl->getInfoForLog()); } else if (must_read_until_position) throw Exception(ErrorCodes::LOGICAL_ERROR, @@ -125,8 +136,11 @@ void AsynchronousReadIndirectBufferFromRemoteFS::setReadUntilPosition(size_t pos if (prefetch_future.valid()) throw Exception(ErrorCodes::LOGICAL_ERROR, "Prefetch is valid in readUntilPosition"); - read_until_position = position; - impl->setReadUntilPosition(*read_until_position); + if (position > read_until_position) + { + read_until_position = position; + impl->setReadUntilPosition(*read_until_position); + } } @@ -157,8 +171,10 @@ bool AsynchronousReadIndirectBufferFromRemoteFS::nextImpl() auto result = prefetch_future.get(); size = result.size; offset = result.offset; + LOG_TEST(log, "Current size: {}, offset: {}", size, offset); + /// If prefetch_future is valid, size should always be greater than zero. - assert(offset < size && size > 0); + assert(offset < size); ProfileEvents::increment(ProfileEvents::AsynchronousReadWaitMicroseconds, watch.elapsedMicroseconds()); } @@ -173,7 +189,10 @@ bool AsynchronousReadIndirectBufferFromRemoteFS::nextImpl() auto result = readInto(memory.data(), memory.size()).get(); size = result.size; auto offset = result.offset; - assert(offset < size || size == 0); + + LOG_TEST(log, "Current size: {}, offset: {}", size, offset); + assert(offset < size); + if (size) { /// Adjust the working buffer so that it ignores `offset` bytes. @@ -181,7 +200,9 @@ bool AsynchronousReadIndirectBufferFromRemoteFS::nextImpl() } } - file_offset_of_buffer_end = impl->offset(); + file_offset_of_buffer_end = impl->getFileOffsetOfBufferEnd(); + assert(file_offset_of_buffer_end == impl->getImplementationBufferOffset()); + prefetch_future = {}; return size; } diff --git a/src/Disks/IO/AsynchronousReadIndirectBufferFromRemoteFS.h b/src/Disks/IO/AsynchronousReadIndirectBufferFromRemoteFS.h index c9b81c98e61..48c4ff3b4f0 100644 --- a/src/Disks/IO/AsynchronousReadIndirectBufferFromRemoteFS.h +++ b/src/Disks/IO/AsynchronousReadIndirectBufferFromRemoteFS.h @@ -5,6 +5,7 @@ #include #include +namespace Poco { class Logger; } namespace DB { @@ -44,10 +45,12 @@ public: void prefetch() override; - void setReadUntilPosition(size_t position) override; + void setReadUntilPosition(size_t position) override; /// [..., position). void setReadUntilEnd() override; + String getInfoForLog() override; + private: bool nextImpl() override; @@ -76,6 +79,8 @@ private: std::optional read_until_position; bool must_read_until_position; + + Poco::Logger * log; }; } diff --git a/src/Disks/IO/CachedReadBufferFromRemoteFS.cpp b/src/Disks/IO/CachedReadBufferFromRemoteFS.cpp new file mode 100644 index 00000000000..a9039ce0a2f --- /dev/null +++ b/src/Disks/IO/CachedReadBufferFromRemoteFS.cpp @@ -0,0 +1,744 @@ +#include "CachedReadBufferFromRemoteFS.h" + +#include +#include +#include +#include +#include + + +namespace ProfileEvents +{ + extern const Event RemoteFSReadBytes; + extern const Event RemoteFSCacheReadBytes; + extern const Event RemoteFSCacheDownloadBytes; +} + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int CANNOT_SEEK_THROUGH_FILE; + extern const int LOGICAL_ERROR; +} + +CachedReadBufferFromRemoteFS::CachedReadBufferFromRemoteFS( + const String & remote_fs_object_path_, + FileCachePtr cache_, + RemoteFSFileReaderCreator remote_file_reader_creator_, + const ReadSettings & settings_, + size_t read_until_position_) + : SeekableReadBuffer(nullptr, 0) +#ifndef NDEBUG + , log(&Poco::Logger::get("CachedReadBufferFromRemoteFS(" + remote_fs_object_path_ + ")")) +#else + , log(&Poco::Logger::get("CachedReadBufferFromRemoteFS")) +#endif + , cache_key(cache_->hash(remote_fs_object_path_)) + , remote_fs_object_path(remote_fs_object_path_) + , cache(cache_) + , settings(settings_) + , read_until_position(read_until_position_) + , remote_file_reader_creator(remote_file_reader_creator_) +{ +} + +void CachedReadBufferFromRemoteFS::initialize(size_t offset, size_t size) +{ + file_segments_holder.emplace(cache->getOrSet(cache_key, offset, size)); + + /** + * Segments in returned list are ordered in ascending order and represent a full contiguous + * interval (no holes). Each segment in returned list has state: DOWNLOADED, DOWNLOADING or EMPTY. + */ + if (file_segments_holder->file_segments.empty()) + throw Exception(ErrorCodes::LOGICAL_ERROR, "List of file segments cannot be empty"); + + LOG_TEST(log, "Having {} file segments to read", file_segments_holder->file_segments.size()); + current_file_segment_it = file_segments_holder->file_segments.begin(); + + initialized = true; +} + +SeekableReadBufferPtr CachedReadBufferFromRemoteFS::getCacheReadBuffer(size_t offset) const +{ + return std::make_shared(cache->getPathInLocalCache(cache_key, offset), settings.local_fs_buffer_size); +} + +SeekableReadBufferPtr CachedReadBufferFromRemoteFS::getRemoteFSReadBuffer(FileSegmentPtr & file_segment, ReadType read_type_) +{ + switch (read_type_) + { + case ReadType::REMOTE_FS_READ_AND_PUT_IN_CACHE: + { + /** + * Each downloader is elected to download at most buffer_size bytes and then any other can + * continue. The one who continues download should reuse download buffer. + * + * TODO: Also implementation (s3, hdfs, web) buffer might be passed through file segments. + * E.g. consider for query1 and query2 we need intersecting ranges like this: + * + * [___________] -- read_range_1 for query1 + * [_______________] -- read_range_2 for query2 + * ^___________^______^ + * | segment1 | segment2 + * + * So query2 can reuse implementation buffer, which downloaded segment1. + * Implementation buffer from segment1 is passed to segment2 once segment1 is loaded. + */ + + auto remote_fs_segment_reader = file_segment->getRemoteFileReader(); + + if (remote_fs_segment_reader) + return remote_fs_segment_reader; + + remote_fs_segment_reader = remote_file_reader_creator(); + file_segment->setRemoteFileReader(remote_fs_segment_reader); + + ///TODO: add check for pending data + return remote_fs_segment_reader; + } + case ReadType::REMOTE_FS_READ_BYPASS_CACHE: + { + /// Result buffer is owned only by current buffer -- not shareable like in the case above. + + if (remote_file_reader && remote_file_reader->getFileOffsetOfBufferEnd() == file_offset_of_buffer_end) + return remote_file_reader; + + remote_file_reader = remote_file_reader_creator(); + return remote_file_reader; + } + default: + throw Exception(ErrorCodes::LOGICAL_ERROR, + "Cannot use remote filesystem reader with read type: {}", toString(read_type)); + } +} + +SeekableReadBufferPtr CachedReadBufferFromRemoteFS::getReadBufferForFileSegment(FileSegmentPtr & file_segment) +{ + auto range = file_segment->range(); + + /// Each wait() call has a timeout of 1 second. + size_t wait_download_max_tries = settings.remote_fs_cache_max_wait_sec; + size_t wait_download_tries = 0; + + auto download_state = file_segment->state(); + while (true) + { + switch (download_state) + { + case FileSegment::State::SKIP_CACHE: + { + read_type = ReadType::REMOTE_FS_READ_BYPASS_CACHE; + return getRemoteFSReadBuffer(file_segment, read_type); + } + case FileSegment::State::EMPTY: + { + auto downloader_id = file_segment->getOrSetDownloader(); + if (downloader_id == file_segment->getCallerId()) + { + if (file_offset_of_buffer_end == file_segment->getDownloadOffset()) + { + read_type = ReadType::REMOTE_FS_READ_AND_PUT_IN_CACHE; + return getRemoteFSReadBuffer(file_segment, read_type); + } + else + { + /// segment{k} + /// cache: [______|___________ + /// ^ + /// download_offset + /// requested_range: [__________] + /// ^ + /// file_offset_of_buffer_end + assert(file_offset_of_buffer_end > file_segment->getDownloadOffset()); + bytes_to_predownload = file_offset_of_buffer_end - file_segment->getDownloadOffset(); + + read_type = ReadType::REMOTE_FS_READ_AND_PUT_IN_CACHE; + return getRemoteFSReadBuffer(file_segment, read_type); + } + } + else + { + download_state = file_segment->state(); + continue; + } + } + case FileSegment::State::DOWNLOADING: + { + if (wait_download_tries++ < wait_download_max_tries) + { + download_state = file_segment->wait(); + } + else + { + download_state = FileSegment::State::SKIP_CACHE; + } + + continue; + } + case FileSegment::State::DOWNLOADED: + { + read_type = ReadType::CACHED; + return getCacheReadBuffer(range.left); + } + case FileSegment::State::PARTIALLY_DOWNLOADED: + { + auto downloader_id = file_segment->getOrSetDownloader(); + if (downloader_id == file_segment->getCallerId()) + { + size_t download_offset = file_segment->getDownloadOffset(); + bool can_start_from_cache = download_offset > file_offset_of_buffer_end; + + LOG_TEST(log, "Current download offset: {}, file offset of buffer end: {}", download_offset, file_offset_of_buffer_end); + + if (can_start_from_cache) + { + /// segment{k} + /// cache: [______|___________ + /// ^ + /// download_offset + /// requested_range: [__________] + /// ^ + /// file_offset_of_buffer_end + + read_type = ReadType::CACHED; + file_segment->resetDownloader(); + return getCacheReadBuffer(range.left); + } + + if (download_offset < file_offset_of_buffer_end) + { + /// segment{1} + /// cache: [_____|___________ + /// ^ + /// download_offset + /// requested_range: [__________] + /// ^ + /// file_offset_of_buffer_end + + assert(file_offset_of_buffer_end > file_segment->getDownloadOffset()); + bytes_to_predownload = file_offset_of_buffer_end - file_segment->getDownloadOffset(); + } + + download_offset = file_segment->getDownloadOffset(); + can_start_from_cache = download_offset > file_offset_of_buffer_end; + assert(!can_start_from_cache); + + read_type = ReadType::REMOTE_FS_READ_AND_PUT_IN_CACHE; + return getRemoteFSReadBuffer(file_segment, read_type); + } + + download_state = file_segment->state(); + continue; + } + case FileSegment::State::PARTIALLY_DOWNLOADED_NO_CONTINUATION: + { + size_t download_offset = file_segment->getDownloadOffset(); + bool can_start_from_cache = download_offset > file_offset_of_buffer_end; + + if (can_start_from_cache) + { + read_type = ReadType::CACHED; + return getCacheReadBuffer(range.left); + } + else + { + read_type = ReadType::REMOTE_FS_READ_BYPASS_CACHE; + return getRemoteFSReadBuffer(file_segment, read_type); + } + } + } + } +} + +SeekableReadBufferPtr CachedReadBufferFromRemoteFS::getImplementationBuffer(FileSegmentPtr & file_segment) +{ + assert(!file_segment->isDownloader()); + assert(file_offset_of_buffer_end >= file_segment->range().left); + + auto range = file_segment->range(); + bytes_to_predownload = 0; + + auto read_buffer_for_file_segment = getReadBufferForFileSegment(file_segment); + + [[maybe_unused]] auto download_current_segment = read_type == ReadType::REMOTE_FS_READ_AND_PUT_IN_CACHE; + assert(download_current_segment == file_segment->isDownloader()); + + assert(file_segment->range() == range); + assert(file_offset_of_buffer_end >= range.left && file_offset_of_buffer_end <= range.right); + + LOG_TEST(log, "Current file segment: {}, read type: {}, current file offset: {}", + range.toString(), toString(read_type), file_offset_of_buffer_end); + + read_buffer_for_file_segment->setReadUntilPosition(range.right + 1); /// [..., range.right] + + switch (read_type) + { + case ReadType::CACHED: + { + size_t seek_offset = file_offset_of_buffer_end - range.left; + read_buffer_for_file_segment->seek(seek_offset, SEEK_SET); + + auto * file_reader = assert_cast(read_buffer_for_file_segment.get()); + size_t file_size = file_reader->size(); + auto state = file_segment->state(); + + LOG_TEST(log, "Cache file: {}. Cached seek to: {}, file size: {}, file segment state: {}, download offset: {}", + file_reader->getFileName(), seek_offset, file_size, state, file_segment->getDownloadOffset()); + + assert(file_size > 0); + break; + } + case ReadType::REMOTE_FS_READ_BYPASS_CACHE: + { + read_buffer_for_file_segment->seek(file_offset_of_buffer_end, SEEK_SET); + break; + } + case ReadType::REMOTE_FS_READ_AND_PUT_IN_CACHE: + { + assert(file_segment->isDownloader()); + + if (bytes_to_predownload) + { + size_t download_offset = file_segment->getDownloadOffset(); + read_buffer_for_file_segment->seek(download_offset, SEEK_SET); + } + else + { + read_buffer_for_file_segment->seek(file_offset_of_buffer_end, SEEK_SET); + } + + auto impl_range = read_buffer_for_file_segment->getRemainingReadRange(); + auto download_offset = file_segment->getDownloadOffset(); + if (download_offset != static_cast(read_buffer_for_file_segment->getPosition())) + throw Exception( + ErrorCodes::LOGICAL_ERROR, + "Buffer's offsets mismatch; cached buffer offset: {}, download_offset: {}, position: {}, implementation buffer offset: {}, " + "implementation buffer reading until: {}, file segment info: {}", + file_offset_of_buffer_end, download_offset, read_buffer_for_file_segment->getPosition(), + impl_range.left, *impl_range.right, file_segment->getInfoForLog()); + + break; + } + } + + return read_buffer_for_file_segment; +} + +bool CachedReadBufferFromRemoteFS::completeFileSegmentAndGetNext() +{ + LOG_TEST(log, "Completed segment: {}", (*current_file_segment_it)->range().toString()); + + auto file_segment_it = current_file_segment_it++; + auto & file_segment = *file_segment_it; + + [[maybe_unused]] const auto & range = file_segment->range(); + assert(file_offset_of_buffer_end > range.right); + + LOG_TEST(log, "Removing file segment: {}, downloader: {}, state: {}", + file_segment->range().toString(), file_segment->getDownloader(), file_segment->state()); + + /// Do not hold pointer to file segment if it is not needed anymore + /// so can become releasable and can be evicted from cache. + file_segments_holder->file_segments.erase(file_segment_it); + + if (current_file_segment_it == file_segments_holder->file_segments.end()) + return false; + + implementation_buffer = getImplementationBuffer(*current_file_segment_it); + + LOG_TEST(log, "New segment: {}", (*current_file_segment_it)->range().toString()); + return true; +} + +void CachedReadBufferFromRemoteFS::predownload(FileSegmentPtr & file_segment) +{ + if (bytes_to_predownload) + { + /// Consider this case. Some user needed segment [a, b] and downloaded it partially. + /// But before he called complete(state) or his holder called complete(), + /// some other user, who needed segment [a', b'], a < a' < b', started waiting on [a, b] to be + /// downloaded because it intersects with the range he needs. + /// But then first downloader fails and second must continue. In this case we need to + /// download from offset a'' < a', but return buffer from offset a'. + LOG_TEST(log, "Bytes to predownload: {}, caller_id: {}", bytes_to_predownload, FileSegment::getCallerId()); + + assert(implementation_buffer->getFileOffsetOfBufferEnd() == file_segment->getDownloadOffset()); + + while (true) + { + if (!bytes_to_predownload || implementation_buffer->eof()) + { + if (bytes_to_predownload) + throw Exception( + ErrorCodes::LOGICAL_ERROR, + "Failed to predownload remaining {} bytes. Current file segment: {}, current download offset: {}, expected: {}, eof: {}", + file_segment->range().toString(), file_segment->getDownloadOffset(), file_offset_of_buffer_end, implementation_buffer->eof()); + + auto result = implementation_buffer->hasPendingData(); + + if (result) + { + nextimpl_working_buffer_offset = implementation_buffer->offset(); + + auto download_offset = file_segment->getDownloadOffset(); + if (download_offset != static_cast(implementation_buffer->getPosition()) || download_offset != file_offset_of_buffer_end) + throw Exception( + ErrorCodes::LOGICAL_ERROR, + "Buffer's offsets mismatch after predownloading; download offset: {}, cached buffer offset: {}, implementation buffer offset: {}, " + "file segment info: {}", download_offset, file_offset_of_buffer_end, implementation_buffer->getPosition(), file_segment->getInfoForLog()); + } + + break; + } + + size_t current_predownload_size = std::min(implementation_buffer->buffer().size(), bytes_to_predownload); + + if (file_segment->reserve(current_predownload_size)) + { + LOG_TEST(log, "Left to predownload: {}, buffer size: {}", bytes_to_predownload, implementation_buffer->buffer().size()); + + file_segment->write(implementation_buffer->buffer().begin(), current_predownload_size); + + bytes_to_predownload -= current_predownload_size; + implementation_buffer->position() += current_predownload_size; + } + else + { + /// We were predownloading: + /// segment{1} + /// cache: [_____|___________ + /// ^ + /// download_offset + /// requested_range: [__________] + /// ^ + /// file_offset_of_buffer_end + /// But space reservation failed. + /// So get working and internal buffer from predownload buffer, get new download buffer, + /// return buffer back, seek to actual position. + /// We could reuse predownload buffer and just seek to needed position, but for now + /// seek is only allowed once for ReadBufferForS3 - before call to nextImpl. + /// TODO: allow seek more than once with seek avoiding. + + bytes_to_predownload = 0; + file_segment->complete(FileSegment::State::PARTIALLY_DOWNLOADED_NO_CONTINUATION); + + read_type = ReadType::REMOTE_FS_READ_BYPASS_CACHE; + + swap(*implementation_buffer); + working_buffer.resize(0); + position() = working_buffer.end(); + + implementation_buffer = getRemoteFSReadBuffer(file_segment, read_type); + + swap(*implementation_buffer); + + implementation_buffer->seek(file_offset_of_buffer_end, SEEK_SET); + + LOG_TEST( + log, "Predownload failed because of space limit. Will read from remote filesystem starting from offset: {}", + file_offset_of_buffer_end); + + break; + } + } + } +} + +bool CachedReadBufferFromRemoteFS::updateImplementationBufferIfNeeded() +{ + auto & file_segment = *current_file_segment_it; + auto current_read_range = file_segment->range(); + auto current_state = file_segment->state(); + + assert(current_read_range.left <= file_offset_of_buffer_end); + assert(!file_segment->isDownloader()); + + if (file_offset_of_buffer_end > current_read_range.right) + { + return completeFileSegmentAndGetNext(); + } + + if (read_type == ReadType::CACHED && current_state != FileSegment::State::DOWNLOADED) + { + /// If current read_type is ReadType::CACHED and file segment is not DOWNLOADED, + /// it means the following case, e.g. we started from CacheReadBuffer and continue with RemoteFSReadBuffer. + /// segment{k} + /// cache: [______|___________ + /// ^ + /// download_offset + /// requested_range: [__________] + /// ^ + /// file_offset_of_buffer_end + + auto download_offset = file_segment->getDownloadOffset(); + if (download_offset == file_offset_of_buffer_end) + { + implementation_buffer = getImplementationBuffer(*current_file_segment_it); + + return true; + } + else if (download_offset < file_offset_of_buffer_end) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Expected {} >= {} ({})", download_offset, file_offset_of_buffer_end, getInfoForLog()); + } + + if (read_type == ReadType::REMOTE_FS_READ_AND_PUT_IN_CACHE) + { + /** + * ReadType::REMOTE_FS_READ_AND_PUT_IN_CACHE means that on previous getImplementationBuffer() call + * current buffer successfully called file_segment->getOrSetDownloader() and became a downloader + * for this file segment. However, the downloader's term has a lifespan of 1 nextImpl() call, + * e.g. downloader reads buffer_size byte and calls completeBatchAndResetDownloader() and some other + * thread can become a downloader if it calls getOrSetDownloader() faster. + * + * So downloader is committed to download only buffer_size bytes and then is not a downloader anymore, + * because there is no guarantee on a higher level, that current buffer will not disappear without + * being destructed till the end of query or without finishing the read range, which he was supposed + * to read by marks range given to him. Therefore, each nextImpl() call, in case of + * READ_AND_PUT_IN_CACHE, starts with getOrSetDownloader(). + */ + implementation_buffer = getImplementationBuffer(*current_file_segment_it); + } + + return true; +} + +bool CachedReadBufferFromRemoteFS::nextImpl() +{ + try + { + return nextImplStep(); + } + catch (Exception & e) + { + e.addMessage("Cache info: {}", getInfoForLog()); + throw; + } +} + +bool CachedReadBufferFromRemoteFS::nextImplStep() +{ + if (IFileCache::shouldBypassCache()) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Using cache when not allowed"); + + if (!initialized) + initialize(file_offset_of_buffer_end, getTotalSizeToRead()); + + if (current_file_segment_it == file_segments_holder->file_segments.end()) + return false; + + SCOPE_EXIT({ + if (current_file_segment_it == file_segments_holder->file_segments.end()) + return; + + auto & file_segment = *current_file_segment_it; + + bool download_current_segment = read_type == ReadType::REMOTE_FS_READ_AND_PUT_IN_CACHE; + if (download_current_segment) + { + try + { + bool file_segment_already_completed = !file_segment->isDownloader(); + if (!file_segment_already_completed) + file_segment->completeBatchAndResetDownloader(); + } + catch (...) + { + tryLogCurrentException(__PRETTY_FUNCTION__); + } + } + + assert(!file_segment->isDownloader()); + }); + + bytes_to_predownload = 0; + + if (implementation_buffer) + { + bool can_read_further = updateImplementationBufferIfNeeded(); + if (!can_read_further) + return false; + } + else + { + implementation_buffer = getImplementationBuffer(*current_file_segment_it); + } + + assert(!internal_buffer.empty()); + swap(*implementation_buffer); + + auto & file_segment = *current_file_segment_it; + auto current_read_range = file_segment->range(); + + LOG_TEST(log, "Current segment: {}, downloader: {}, current count: {}, position: {}", + current_read_range.toString(), file_segment->getDownloader(), implementation_buffer->count(), implementation_buffer->getPosition()); + + assert(current_read_range.left <= file_offset_of_buffer_end); + assert(current_read_range.right >= file_offset_of_buffer_end); + + bool result = false; + size_t size = 0; + + size_t needed_to_predownload = bytes_to_predownload; + if (needed_to_predownload) + { + predownload(file_segment); + + result = implementation_buffer->hasPendingData(); + size = implementation_buffer->available(); + } + + auto download_current_segment = read_type == ReadType::REMOTE_FS_READ_AND_PUT_IN_CACHE; + if (download_current_segment != file_segment->isDownloader()) + throw Exception( + ErrorCodes::LOGICAL_ERROR, + "Incorrect segment state. Having read type: {}, Caller id: {}, downloader id: {}, file segment state: {}", + toString(read_type), file_segment->getCallerId(), file_segment->getDownloader(), file_segment->state()); + + if (!result) + { + result = implementation_buffer->next(); + size = implementation_buffer->buffer().size(); + } + + if (result) + { + if (download_current_segment) + { + assert(file_offset_of_buffer_end + size - 1 <= file_segment->range().right); + + if (file_segment->reserve(size)) + { + file_segment->write(needed_to_predownload ? implementation_buffer->position() : implementation_buffer->buffer().begin(), size); + } + else + { + download_current_segment = false; + file_segment->complete(FileSegment::State::PARTIALLY_DOWNLOADED_NO_CONTINUATION); + LOG_DEBUG(log, "No space left in cache, will continue without cache download"); + } + } + + switch (read_type) + { + case ReadType::CACHED: + { + ProfileEvents::increment(ProfileEvents::RemoteFSCacheReadBytes, size); + break; + } + case ReadType::REMOTE_FS_READ_BYPASS_CACHE: + { + ProfileEvents::increment(ProfileEvents::RemoteFSReadBytes, size); + break; + } + case ReadType::REMOTE_FS_READ_AND_PUT_IN_CACHE: + { + ProfileEvents::increment(ProfileEvents::RemoteFSReadBytes, size); + ProfileEvents::increment(ProfileEvents::RemoteFSCacheDownloadBytes, size); + break; + } + } + + if (std::next(current_file_segment_it) == file_segments_holder->file_segments.end()) + { + size_t remaining_size_to_read = std::min(current_read_range.right, read_until_position - 1) - file_offset_of_buffer_end + 1; + size = std::min(size, remaining_size_to_read); + implementation_buffer->buffer().resize(nextimpl_working_buffer_offset + size); + } + + file_offset_of_buffer_end += size; + } + + swap(*implementation_buffer); + + if (download_current_segment) + file_segment->completeBatchAndResetDownloader(); + + assert(!file_segment->isDownloader()); + + LOG_TEST(log, + "Key: {}. Returning with {} bytes, buffer position: {} (offset: {}, predownloaded: {}), " + "buffer available: {}, current range: {}, current offset: {}, file segment state: {}, download offset: {}, read_type: {}, " + "reading until position: {}, started with offset: {}, remaining ranges: {}", + getHexUIntLowercase(cache_key), working_buffer.size(), getPosition(), offset(), needed_to_predownload, + available(), current_read_range.toString(), + file_offset_of_buffer_end, FileSegment::stateToString(file_segment->state()), file_segment->getDownloadOffset(), toString(read_type), + read_until_position, first_offset, file_segments_holder->toString()); + + if (size == 0 && file_offset_of_buffer_end < read_until_position) + throw Exception(ErrorCodes::LOGICAL_ERROR, + "Having zero bytes, but range is not finished: file offset: {}, reading until: {}", + file_offset_of_buffer_end, read_until_position); + return result; +} + +off_t CachedReadBufferFromRemoteFS::seek(off_t offset, int whence) +{ + if (initialized) + throw Exception(ErrorCodes::CANNOT_SEEK_THROUGH_FILE, + "Seek is allowed only before first read attempt from the buffer"); + + if (whence != SEEK_SET) + throw Exception(ErrorCodes::CANNOT_SEEK_THROUGH_FILE, "Only SEEK_SET allowed"); + + first_offset = offset; + file_offset_of_buffer_end = offset; + size_t size = getTotalSizeToRead(); + initialize(offset, size); + + return offset; +} + +size_t CachedReadBufferFromRemoteFS::getTotalSizeToRead() +{ + /// Last position should be guaranteed to be set, as at least we always know file size. + if (!read_until_position) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Last position was not set"); + + /// On this level should be guaranteed that read size is non-zero. + if (file_offset_of_buffer_end >= read_until_position) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Read boundaries mismatch. Expected {} < {}", + file_offset_of_buffer_end, read_until_position); + + return read_until_position - file_offset_of_buffer_end; +} + +void CachedReadBufferFromRemoteFS::setReadUntilPosition(size_t) +{ + throw Exception(ErrorCodes::LOGICAL_ERROR, "Method `setReadUntilPosition()` not allowed"); +} + +off_t CachedReadBufferFromRemoteFS::getPosition() +{ + return file_offset_of_buffer_end - available(); +} + +std::optional CachedReadBufferFromRemoteFS::getLastNonDownloadedOffset() const +{ + if (!file_segments_holder) + throw Exception(ErrorCodes::LOGICAL_ERROR, "File segments holder not initialized"); + + const auto & file_segments = file_segments_holder->file_segments; + for (auto it = file_segments.rbegin(); it != file_segments.rend(); ++it) + { + const auto & file_segment = *it; + if (file_segment->state() != FileSegment::State::DOWNLOADED) + return file_segment->range().right; + } + + return std::nullopt; +} + +String CachedReadBufferFromRemoteFS::getInfoForLog() +{ + return fmt::format("Buffer path: {}, hash key: {}, file_offset_of_buffer_end: {}, internal buffer remaining read range: {}, file segment info: {}", + remote_fs_object_path, getHexUIntLowercase(cache_key), file_offset_of_buffer_end, + (implementation_buffer ? + std::to_string(implementation_buffer->getRemainingReadRange().left) + '-' + (implementation_buffer->getRemainingReadRange().right ? std::to_string(*implementation_buffer->getRemainingReadRange().right) : "None") + : "None"), + (current_file_segment_it == file_segments_holder->file_segments.end() ? "None" : (*current_file_segment_it)->getInfoForLog())); +} + +} diff --git a/src/Disks/IO/CachedReadBufferFromRemoteFS.h b/src/Disks/IO/CachedReadBufferFromRemoteFS.h new file mode 100644 index 00000000000..3d03debcd01 --- /dev/null +++ b/src/Disks/IO/CachedReadBufferFromRemoteFS.h @@ -0,0 +1,103 @@ +#pragma once + +#include +#include +#include +#include +#include + +namespace DB +{ + +class CachedReadBufferFromRemoteFS : public SeekableReadBuffer +{ +public: + using RemoteFSFileReaderCreator = std::function; + + CachedReadBufferFromRemoteFS( + const String & remote_fs_object_path_, + FileCachePtr cache_, + RemoteFSFileReaderCreator remote_file_reader_creator_, + const ReadSettings & settings_, + size_t read_until_position_); + + bool nextImpl() override; + + off_t seek(off_t off, int whence) override; + + off_t getPosition() override; + + size_t getFileOffsetOfBufferEnd() const override { return file_offset_of_buffer_end; } + + String getInfoForLog() override; + + void setReadUntilPosition(size_t position) override; + +private: + void initialize(size_t offset, size_t size); + + SeekableReadBufferPtr getImplementationBuffer(FileSegmentPtr & file_segment); + + SeekableReadBufferPtr getReadBufferForFileSegment(FileSegmentPtr & file_segment); + + SeekableReadBufferPtr getCacheReadBuffer(size_t offset) const; + + std::optional getLastNonDownloadedOffset() const; + + bool updateImplementationBufferIfNeeded(); + + void predownload(FileSegmentPtr & file_segment); + + bool nextImplStep(); + + enum class ReadType + { + CACHED, + REMOTE_FS_READ_BYPASS_CACHE, + REMOTE_FS_READ_AND_PUT_IN_CACHE, + }; + + SeekableReadBufferPtr getRemoteFSReadBuffer(FileSegmentPtr & file_segment, ReadType read_type_); + + size_t getTotalSizeToRead(); + bool completeFileSegmentAndGetNext(); + + Poco::Logger * log; + IFileCache::Key cache_key; + String remote_fs_object_path; + FileCachePtr cache; + ReadSettings settings; + + size_t read_until_position; + size_t file_offset_of_buffer_end = 0; + size_t bytes_to_predownload = 0; + + RemoteFSFileReaderCreator remote_file_reader_creator; + + /// Remote read buffer, which can only be owned by current buffer. + FileSegment::RemoteFileReaderPtr remote_file_reader; + + std::optional file_segments_holder; + FileSegments::iterator current_file_segment_it; + + SeekableReadBufferPtr implementation_buffer; + bool initialized = false; + + ReadType read_type = ReadType::REMOTE_FS_READ_BYPASS_CACHE; + + static String toString(ReadType type) + { + switch (type) + { + case ReadType::CACHED: + return "CACHED"; + case ReadType::REMOTE_FS_READ_BYPASS_CACHE: + return "REMOTE_FS_READ_BYPASS_CACHE"; + case ReadType::REMOTE_FS_READ_AND_PUT_IN_CACHE: + return "REMOTE_FS_READ_AND_PUT_IN_CACHE"; + } + } + size_t first_offset = 0; +}; + +} diff --git a/src/Disks/IO/ReadBufferFromRemoteFSGather.cpp b/src/Disks/IO/ReadBufferFromRemoteFSGather.cpp index 574845642bf..8f91804bbbe 100644 --- a/src/Disks/IO/ReadBufferFromRemoteFSGather.cpp +++ b/src/Disks/IO/ReadBufferFromRemoteFSGather.cpp @@ -16,51 +16,79 @@ #include #endif +#include #include #include #include +#include namespace fs = std::filesystem; namespace DB { -#if USE_AWS_S3 -SeekableReadBufferPtr ReadBufferFromS3Gather::createImplementationBuffer(const String & path, size_t read_until_position_) const +namespace ErrorCodes { - return std::make_unique(client_ptr, bucket, - fs::path(metadata.remote_fs_root_path) / path, max_single_read_retries, settings, threadpool_read, read_until_position_); + extern const int LOGICAL_ERROR; +} + +#if USE_AWS_S3 +SeekableReadBufferPtr ReadBufferFromS3Gather::createImplementationBuffer(const String & path, size_t file_size) +{ + current_path = path; + + auto cache = settings.remote_fs_cache; + bool with_cache = cache && settings.remote_fs_enable_cache && !IFileCache::shouldBypassCache(); + + auto remote_file_reader_creator = [=, this]() + { + return std::make_unique( + client_ptr, bucket, fs::path(metadata.remote_fs_root_path) / path, max_single_read_retries, + settings, /* use_external_buffer */true, read_until_position, /* restricted_seek */true); + }; + + if (with_cache) + { + return std::make_shared( + path, cache, remote_file_reader_creator, settings, read_until_position ? read_until_position : file_size); + } + + return remote_file_reader_creator(); } #endif #if USE_AZURE_BLOB_STORAGE -SeekableReadBufferPtr ReadBufferFromAzureBlobStorageGather::createImplementationBuffer(const String & path, size_t read_until_position_) const +SeekableReadBufferPtr ReadBufferFromAzureBlobStorageGather::createImplementationBuffer(const String & path, size_t /* file_size */) { + current_path = path; return std::make_unique(blob_container_client, path, max_single_read_retries, - max_single_download_retries, settings.remote_fs_buffer_size, threadpool_read, read_until_position_); + max_single_download_retries, settings.remote_fs_buffer_size, /* use_external_buffer */true, read_until_position); } #endif -SeekableReadBufferPtr ReadBufferFromWebServerGather::createImplementationBuffer(const String & path, size_t read_until_position_) const +SeekableReadBufferPtr ReadBufferFromWebServerGather::createImplementationBuffer(const String & path, size_t /* file_size */) { - return std::make_unique(fs::path(uri) / path, context, settings, threadpool_read, read_until_position_); + current_path = path; + return std::make_unique(fs::path(uri) / path, context, settings, /* use_external_buffer */true, read_until_position); } #if USE_HDFS -SeekableReadBufferPtr ReadBufferFromHDFSGather::createImplementationBuffer(const String & path, size_t read_until_position_) const +SeekableReadBufferPtr ReadBufferFromHDFSGather::createImplementationBuffer(const String & path, size_t /* file_size */) { - return std::make_unique(hdfs_uri, fs::path(hdfs_directory) / path, config, buf_size, read_until_position_); + return std::make_unique(hdfs_uri, fs::path(hdfs_directory) / path, config, settings.remote_fs_buffer_size); } #endif -ReadBufferFromRemoteFSGather::ReadBufferFromRemoteFSGather(const RemoteMetadata & metadata_, const String & path_) +ReadBufferFromRemoteFSGather::ReadBufferFromRemoteFSGather(const RemoteMetadata & metadata_, const ReadSettings & settings_, const String & path_) : ReadBuffer(nullptr, 0) , metadata(metadata_) + , settings(settings_) , canonical_path(path_) + , log(&Poco::Logger::get("ReadBufferFromRemoteFSGather")) { } @@ -75,8 +103,8 @@ ReadBufferFromRemoteFSGather::ReadResult ReadBufferFromRemoteFSGather::readInto( file_offset_of_buffer_end = offset; bytes_to_ignore = ignore; - if (bytes_to_ignore) - assert(initialized()); + + assert(!bytes_to_ignore || initialized()); auto result = nextImpl(); @@ -100,11 +128,8 @@ void ReadBufferFromRemoteFSGather::initialize() /// Do not create a new buffer if we already have what we need. if (!current_buf || current_buf_idx != i) { - current_buf = createImplementationBuffer(file_path, read_until_position); current_buf_idx = i; - - if (auto * in = dynamic_cast(current_buf.get())) - in->setReadType(SeekableReadBufferWithSize::ReadType::DISK_READ); + current_buf = createImplementationBuffer(file_path, size); } current_buf->seek(current_buf_offset, SEEK_SET); @@ -133,22 +158,34 @@ bool ReadBufferFromRemoteFSGather::nextImpl() else return false; + if (!moveToNextBuffer()) + return false; + + return readImpl(); +} + + +bool ReadBufferFromRemoteFSGather::moveToNextBuffer() +{ /// If there is no available buffers - nothing to read. if (current_buf_idx + 1 >= metadata.remote_fs_objects.size()) return false; ++current_buf_idx; - const auto & current_path = metadata.remote_fs_objects[current_buf_idx].first; - current_buf = createImplementationBuffer(current_path, read_until_position); + const auto & [path, size] = metadata.remote_fs_objects[current_buf_idx]; + current_buf = createImplementationBuffer(path, size); - return readImpl(); + return true; } + bool ReadBufferFromRemoteFSGather::readImpl() { swap(*current_buf); + bool result = false; + /** * Lazy seek is performed here. * In asynchronous buffer when seeking to offset in range [pos, pos + min_bytes_for_seek] @@ -157,33 +194,50 @@ bool ReadBufferFromRemoteFSGather::readImpl() if (bytes_to_ignore) { current_buf->ignore(bytes_to_ignore); + result = current_buf->hasPendingData(); file_offset_of_buffer_end += bytes_to_ignore; bytes_to_ignore = 0; } - bool result = current_buf->hasPendingData(); - if (result) + if (!result) + result = current_buf->next(); + + if (metadata.remote_fs_objects.size() == 1) { - /// bytes_to_ignore already added. - file_offset_of_buffer_end += current_buf->available(); + file_offset_of_buffer_end = current_buf->getFileOffsetOfBufferEnd(); } else { - result = current_buf->next(); - if (result) - file_offset_of_buffer_end += current_buf->buffer().size(); + /// For log family engines there are multiple s3 files for the same clickhouse file + file_offset_of_buffer_end += current_buf->available(); } swap(*current_buf); + /// Required for non-async reads. + if (result) + { + assert(available()); + nextimpl_working_buffer_offset = offset(); + } + return result; } +size_t ReadBufferFromRemoteFSGather::getFileOffsetOfBufferEnd() const +{ + return file_offset_of_buffer_end; +} + + void ReadBufferFromRemoteFSGather::setReadUntilPosition(size_t position) { - read_until_position = position; - reset(); + if (position != read_until_position) + { + read_until_position = position; + reset(); + } } @@ -194,7 +248,7 @@ void ReadBufferFromRemoteFSGather::reset() String ReadBufferFromRemoteFSGather::getFileName() const { - return canonical_path; + return current_path; } @@ -206,4 +260,21 @@ size_t ReadBufferFromRemoteFSGather::getFileSize() const return size; } +String ReadBufferFromRemoteFSGather::getInfoForLog() +{ + if (!current_buf) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Cannot get info: buffer not initialized"); + + return current_buf->getInfoForLog(); +} + +size_t ReadBufferFromRemoteFSGather::getImplementationBufferOffset() const +{ + if (!current_buf) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Buffer not initialized"); + + return current_buf->getFileOffsetOfBufferEnd(); +} + + } diff --git a/src/Disks/IO/ReadBufferFromRemoteFSGather.h b/src/Disks/IO/ReadBufferFromRemoteFSGather.h index ddd651f47a1..25bfe0b7e16 100644 --- a/src/Disks/IO/ReadBufferFromRemoteFSGather.h +++ b/src/Disks/IO/ReadBufferFromRemoteFSGather.h @@ -9,13 +9,9 @@ #include #endif -namespace Aws -{ -namespace S3 -{ -class S3Client; -} -} +namespace Aws { namespace S3 { class S3Client; } } + +namespace Poco { class Logger; } namespace DB { @@ -29,7 +25,10 @@ class ReadBufferFromRemoteFSGather : public ReadBuffer friend class ReadIndirectBufferFromRemoteFS; public: - explicit ReadBufferFromRemoteFSGather(const RemoteMetadata & metadata_, const String & path_); + ReadBufferFromRemoteFSGather( + const RemoteMetadata & metadata_, + const ReadSettings & settings_, + const String & path_); String getFileName() const; @@ -47,15 +46,27 @@ public: size_t getFileSize() const; - size_t offset() const { return file_offset_of_buffer_end; } + size_t getFileOffsetOfBufferEnd() const; bool initialized() const { return current_buf != nullptr; } + String getInfoForLog(); + + size_t getImplementationBufferOffset() const; + protected: - virtual SeekableReadBufferPtr createImplementationBuffer(const String & path, size_t read_until_position) const = 0; + virtual SeekableReadBufferPtr createImplementationBuffer(const String & path, size_t file_size) = 0; RemoteMetadata metadata; + ReadSettings settings; + + bool use_external_buffer; + + size_t read_until_position = 0; + + String current_path; + private: bool nextImpl() override; @@ -63,6 +74,8 @@ private: bool readImpl(); + bool moveToNextBuffer(); + SeekableReadBufferPtr current_buf; size_t current_buf_idx = 0; @@ -76,9 +89,9 @@ private: */ size_t bytes_to_ignore = 0; - size_t read_until_position = 0; - String canonical_path; + + Poco::Logger * log; }; @@ -93,25 +106,20 @@ public: const String & bucket_, IDiskRemote::Metadata metadata_, size_t max_single_read_retries_, - const ReadSettings & settings_, - bool threadpool_read_ = false) - : ReadBufferFromRemoteFSGather(metadata_, path_) + const ReadSettings & settings_) + : ReadBufferFromRemoteFSGather(metadata_, settings_, path_) , client_ptr(std::move(client_ptr_)) , bucket(bucket_) , max_single_read_retries(max_single_read_retries_) - , settings(settings_) - , threadpool_read(threadpool_read_) { } - SeekableReadBufferPtr createImplementationBuffer(const String & path, size_t read_until_position) const override; + SeekableReadBufferPtr createImplementationBuffer(const String & path, size_t file_size) override; private: std::shared_ptr client_ptr; String bucket; UInt64 max_single_read_retries; - ReadSettings settings; - bool threadpool_read; }; #endif @@ -127,25 +135,20 @@ public: IDiskRemote::Metadata metadata_, size_t max_single_read_retries_, size_t max_single_download_retries_, - const ReadSettings & settings_, - bool threadpool_read_ = false) - : ReadBufferFromRemoteFSGather(metadata_, path_) + const ReadSettings & settings_) + : ReadBufferFromRemoteFSGather(metadata_, settings_, path_) , blob_container_client(blob_container_client_) , max_single_read_retries(max_single_read_retries_) , max_single_download_retries(max_single_download_retries_) - , settings(settings_) - , threadpool_read(threadpool_read_) { } - SeekableReadBufferPtr createImplementationBuffer(const String & path, size_t read_until_position) const override; + SeekableReadBufferPtr createImplementationBuffer(const String & path, size_t file_size) override; private: std::shared_ptr blob_container_client; size_t max_single_read_retries; size_t max_single_download_retries; - ReadSettings settings; - bool threadpool_read; }; #endif @@ -158,23 +161,18 @@ public: const String & uri_, RemoteMetadata metadata_, ContextPtr context_, - size_t threadpool_read_, const ReadSettings & settings_) - : ReadBufferFromRemoteFSGather(metadata_, path_) + : ReadBufferFromRemoteFSGather(metadata_, settings_, path_) , uri(uri_) , context(context_) - , threadpool_read(threadpool_read_) - , settings(settings_) { } - SeekableReadBufferPtr createImplementationBuffer(const String & path, size_t read_until_position) const override; + SeekableReadBufferPtr createImplementationBuffer(const String & path, size_t file_size) override; private: String uri; ContextPtr context; - bool threadpool_read; - ReadSettings settings; }; @@ -188,23 +186,21 @@ public: const Poco::Util::AbstractConfiguration & config_, const String & hdfs_uri_, IDiskRemote::Metadata metadata_, - size_t buf_size_) - : ReadBufferFromRemoteFSGather(metadata_, path_) + const ReadSettings & settings_) + : ReadBufferFromRemoteFSGather(metadata_, settings_, path_) , config(config_) - , buf_size(buf_size_) { const size_t begin_of_path = hdfs_uri_.find('/', hdfs_uri_.find("//") + 2); hdfs_directory = hdfs_uri_.substr(begin_of_path); hdfs_uri = hdfs_uri_.substr(0, begin_of_path); } - SeekableReadBufferPtr createImplementationBuffer(const String & path, size_t read_until_position) const override; + SeekableReadBufferPtr createImplementationBuffer(const String & path, size_t file_size) override; private: const Poco::Util::AbstractConfiguration & config; String hdfs_uri; String hdfs_directory; - size_t buf_size; }; #endif diff --git a/src/Disks/IO/ReadBufferFromWebServer.h b/src/Disks/IO/ReadBufferFromWebServer.h index 7285a94b0d8..ea746fb75a1 100644 --- a/src/Disks/IO/ReadBufferFromWebServer.h +++ b/src/Disks/IO/ReadBufferFromWebServer.h @@ -30,6 +30,8 @@ public: off_t getPosition() override; + size_t getFileOffsetOfBufferEnd() const override { return offset; } + private: std::unique_ptr initialize(); diff --git a/src/Disks/IO/ReadIndirectBufferFromRemoteFS.cpp b/src/Disks/IO/ReadIndirectBufferFromRemoteFS.cpp index cbf265ce741..699f8380cb8 100644 --- a/src/Disks/IO/ReadIndirectBufferFromRemoteFS.cpp +++ b/src/Disks/IO/ReadIndirectBufferFromRemoteFS.cpp @@ -13,7 +13,9 @@ namespace ErrorCodes ReadIndirectBufferFromRemoteFS::ReadIndirectBufferFromRemoteFS( - std::shared_ptr impl_) : impl(std::move(impl_)) + std::shared_ptr impl_) + : ReadBufferFromFileBase(DBMS_DEFAULT_BUFFER_SIZE, nullptr, 0) + , impl(impl_) { } @@ -30,6 +32,18 @@ String ReadIndirectBufferFromRemoteFS::getFileName() const } +void ReadIndirectBufferFromRemoteFS::setReadUntilPosition(size_t position) +{ + impl->setReadUntilPosition(position); +} + + +void ReadIndirectBufferFromRemoteFS::setReadUntilEnd() +{ + impl->setReadUntilPosition(impl->getFileSize()); +} + + off_t ReadIndirectBufferFromRemoteFS::seek(off_t offset_, int whence) { if (whence == SEEK_CUR) @@ -66,6 +80,7 @@ off_t ReadIndirectBufferFromRemoteFS::seek(off_t offset_, int whence) impl->reset(); resetWorkingBuffer(); + file_offset_of_buffer_end = impl->file_offset_of_buffer_end; return impl->file_offset_of_buffer_end; } @@ -74,11 +89,21 @@ bool ReadIndirectBufferFromRemoteFS::nextImpl() { /// Transfer current position and working_buffer to actual ReadBuffer swap(*impl); + + assert(!impl->hasPendingData()); /// Position and working_buffer will be updated in next() call auto result = impl->next(); /// and assigned to current buffer. swap(*impl); + if (result) + { + file_offset_of_buffer_end += available(); + BufferBase::set(working_buffer.begin() + offset(), available(), 0); + } + + assert(file_offset_of_buffer_end == impl->file_offset_of_buffer_end); + return result; } diff --git a/src/Disks/IO/ReadIndirectBufferFromRemoteFS.h b/src/Disks/IO/ReadIndirectBufferFromRemoteFS.h index 0c8b1b4dd21..a0669be411f 100644 --- a/src/Disks/IO/ReadIndirectBufferFromRemoteFS.h +++ b/src/Disks/IO/ReadIndirectBufferFromRemoteFS.h @@ -27,10 +27,16 @@ public: String getFileName() const override; + void setReadUntilPosition(size_t position) override; + + void setReadUntilEnd() override; + private: bool nextImpl() override; std::shared_ptr impl; + + size_t file_offset_of_buffer_end = 0; }; } diff --git a/src/IO/ThreadPoolReader.cpp b/src/Disks/IO/ThreadPoolReader.cpp similarity index 91% rename from src/IO/ThreadPoolReader.cpp rename to src/Disks/IO/ThreadPoolReader.cpp index 0c2791c6f68..e39f6057445 100644 --- a/src/IO/ThreadPoolReader.cpp +++ b/src/Disks/IO/ThreadPoolReader.cpp @@ -1,4 +1,4 @@ -#include +#include "ThreadPoolReader.h" #include #include #include @@ -6,6 +6,7 @@ #include #include #include +#include #include #include #include @@ -184,9 +185,26 @@ std::future ThreadPoolReader::submit(Request reques ProfileEvents::increment(ProfileEvents::ThreadPoolReaderPageCacheMiss); - auto task = std::make_shared>([request, fd] + ThreadGroupStatusPtr running_group = CurrentThread::isInitialized() && CurrentThread::get().getThreadGroup() + ? CurrentThread::get().getThreadGroup() + : MainThreadStatus::getInstance().getThreadGroup(); + + ContextPtr query_context; + if (CurrentThread::isInitialized()) + query_context = CurrentThread::get().getQueryContext(); + + auto task = std::make_shared>([request, fd, running_group, query_context] { + ThreadStatus thread_status; + + if (query_context) + thread_status.attachQueryContext(query_context); + + if (running_group) + thread_status.attachQuery(running_group); + setThreadName("ThreadPoolRead"); + Stopwatch watch(CLOCK_MONOTONIC); size_t bytes_read = 0; @@ -219,6 +237,9 @@ std::future ThreadPoolReader::submit(Request reques ProfileEvents::increment(ProfileEvents::ThreadPoolReaderPageCacheMissElapsedMicroseconds, watch.elapsedMicroseconds()); ProfileEvents::increment(ProfileEvents::DiskReadElapsedMicroseconds, watch.elapsedMicroseconds()); + if (running_group) + thread_status.detachQuery(); + return Result{ .size = bytes_read, .offset = request.ignore }; }); diff --git a/src/IO/ThreadPoolReader.h b/src/Disks/IO/ThreadPoolReader.h similarity index 100% rename from src/IO/ThreadPoolReader.h rename to src/Disks/IO/ThreadPoolReader.h diff --git a/src/Disks/IO/ThreadPoolRemoteFSReader.cpp b/src/Disks/IO/ThreadPoolRemoteFSReader.cpp index 4be55ff3ecf..bdb012a6376 100644 --- a/src/Disks/IO/ThreadPoolRemoteFSReader.cpp +++ b/src/Disks/IO/ThreadPoolRemoteFSReader.cpp @@ -6,6 +6,7 @@ #include #include #include +#include #include @@ -41,9 +42,28 @@ ThreadPoolRemoteFSReader::ThreadPoolRemoteFSReader(size_t pool_size, size_t queu std::future ThreadPoolRemoteFSReader::submit(Request request) { - auto task = std::make_shared>([request] + ThreadGroupStatusPtr running_group = CurrentThread::isInitialized() && CurrentThread::get().getThreadGroup() + ? CurrentThread::get().getThreadGroup() + : MainThreadStatus::getInstance().getThreadGroup(); + + ContextPtr query_context; + if (CurrentThread::isInitialized()) + query_context = CurrentThread::get().getQueryContext(); + + auto task = std::make_shared>([request, running_group, query_context] { + ThreadStatus thread_status; + + /// Save query context if any, because cache implementation needs it. + if (query_context) + thread_status.attachQueryContext(query_context); + + /// To be able to pass ProfileEvents. + if (running_group) + thread_status.attachQuery(running_group); + setThreadName("VFSRead"); + CurrentMetrics::Increment metric_increment{CurrentMetrics::Read}; auto * remote_fs_fd = assert_cast(request.descriptor.get()); @@ -54,6 +74,9 @@ std::future ThreadPoolRemoteFSReader::submit(Reques ProfileEvents::increment(ProfileEvents::RemoteFSReadMicroseconds, watch.elapsedMicroseconds()); ProfileEvents::increment(ProfileEvents::RemoteFSReadBytes, bytes_read); + if (running_group) + thread_status.detachQuery(); + return Result{ .size = bytes_read, .offset = offset }; }); diff --git a/src/IO/createReadBufferFromFileBase.cpp b/src/Disks/IO/createReadBufferFromFileBase.cpp similarity index 98% rename from src/IO/createReadBufferFromFileBase.cpp rename to src/Disks/IO/createReadBufferFromFileBase.cpp index b83bfdbf3a8..4ff492e4013 100644 --- a/src/IO/createReadBufferFromFileBase.cpp +++ b/src/Disks/IO/createReadBufferFromFileBase.cpp @@ -1,9 +1,9 @@ -#include +#include #include #include #include #include -#include +#include #include #include diff --git a/src/IO/createReadBufferFromFileBase.h b/src/Disks/IO/createReadBufferFromFileBase.h similarity index 100% rename from src/IO/createReadBufferFromFileBase.h rename to src/Disks/IO/createReadBufferFromFileBase.h diff --git a/src/Disks/LocalDirectorySyncGuard.h b/src/Disks/LocalDirectorySyncGuard.h index 34e4cb9e657..cb891461e85 100644 --- a/src/Disks/LocalDirectorySyncGuard.h +++ b/src/Disks/LocalDirectorySyncGuard.h @@ -17,8 +17,8 @@ class LocalDirectorySyncGuard final : public ISyncGuard public: /// NOTE: If you have already opened descriptor, it's preferred to use /// this constructor instead of constructor with path. - LocalDirectorySyncGuard(int fd_) : fd(fd_) {} - LocalDirectorySyncGuard(const String & full_path); + explicit LocalDirectorySyncGuard(int fd_) : fd(fd_) {} + explicit LocalDirectorySyncGuard(const String & full_path); ~LocalDirectorySyncGuard() override; private: diff --git a/src/Disks/RemoteDisksCommon.cpp b/src/Disks/RemoteDisksCommon.cpp index 1402e3f62c8..36f2aed3e7c 100644 --- a/src/Disks/RemoteDisksCommon.cpp +++ b/src/Disks/RemoteDisksCommon.cpp @@ -1,12 +1,13 @@ #include #include +#include +#include namespace DB { namespace ErrorCodes -{ - extern const int BAD_ARGUMENTS; +{extern const int BAD_ARGUMENTS; } std::shared_ptr wrapWithCache( @@ -26,6 +27,14 @@ std::shared_ptr wrapWithCache( return std::make_shared(disk, cache_disk, cache_file_predicate); } +static String getDiskMetadataPath( + const String & name, + const Poco::Util::AbstractConfiguration & config, + const String & config_prefix, + ContextPtr context) +{ + return config.getString(config_prefix + ".metadata_path", context->getPath() + "disks/" + name + "/"); +} std::pair prepareForLocalMetadata( const String & name, @@ -34,10 +43,40 @@ std::pair prepareForLocalMetadata( ContextPtr context) { /// where the metadata files are stored locally - auto metadata_path = config.getString(config_prefix + ".metadata_path", context->getPath() + "disks/" + name + "/"); + auto metadata_path = getDiskMetadataPath(name, config, config_prefix, context); fs::create_directories(metadata_path); auto metadata_disk = std::make_shared(name + "-metadata", metadata_path, 0); return std::make_pair(metadata_path, metadata_disk); } + +FileCachePtr getCachePtrForDisk( + const String & name, + const Poco::Util::AbstractConfiguration & config, + const String & config_prefix, + ContextPtr context) +{ + bool data_cache_enabled = config.getBool(config_prefix + ".data_cache_enabled", false); + if (!data_cache_enabled) + return nullptr; + + auto cache_base_path = config.getString(config_prefix + ".data_cache_path", fs::path(context->getPath()) / "disks" / name / "data_cache/"); + if (!fs::exists(cache_base_path)) + fs::create_directories(cache_base_path); + + LOG_INFO(&Poco::Logger::get("Disk(" + name + ")"), "Disk registered with cache path: {}", cache_base_path); + + auto metadata_path = getDiskMetadataPath(name, config, config_prefix, context); + if (metadata_path == cache_base_path) + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Metadata path and cache base path must be different: {}", metadata_path); + + size_t max_cache_size = config.getUInt64(config_prefix + ".data_cache_max_size", 1024*1024*1024); + size_t max_cache_elements = config.getUInt64(config_prefix + ".data_cache_max_elements", REMOTE_FS_OBJECTS_CACHE_DEFAULT_MAX_ELEMENTS); + size_t max_file_segment_size = config.getUInt64(config_prefix + ".max_file_segment_size", REMOTE_FS_OBJECTS_CACHE_DEFAULT_MAX_FILE_SEGMENT_SIZE); + + auto cache = FileCacheFactory::instance().getOrCreate(cache_base_path, max_cache_size, max_cache_elements, max_file_segment_size); + cache->initialize(); + return cache; +} + } diff --git a/src/Disks/RemoteDisksCommon.h b/src/Disks/RemoteDisksCommon.h index 0d057b44d18..661d4e293df 100644 --- a/src/Disks/RemoteDisksCommon.h +++ b/src/Disks/RemoteDisksCommon.h @@ -21,4 +21,10 @@ std::pair prepareForLocalMetadata( const String & config_prefix, ContextPtr context); +FileCachePtr getCachePtrForDisk( + const String & name, + const Poco::Util::AbstractConfiguration & config, + const String & config_prefix, + ContextPtr context); + } diff --git a/src/Disks/S3/DiskS3.cpp b/src/Disks/S3/DiskS3.cpp index 5d61285981b..de63f3ed82f 100644 --- a/src/Disks/S3/DiskS3.cpp +++ b/src/Disks/S3/DiskS3.cpp @@ -153,10 +153,11 @@ DiskS3::DiskS3( String bucket_, String s3_root_path_, DiskPtr metadata_disk_, + FileCachePtr cache_, ContextPtr context_, SettingsPtr settings_, GetDiskSettings settings_getter_) - : IDiskRemote(name_, s3_root_path_, metadata_disk_, "DiskS3", settings_->thread_pool_size) + : IDiskRemote(name_, s3_root_path_, metadata_disk_, std::move(cache_), "DiskS3", settings_->thread_pool_size) , bucket(std::move(bucket_)) , current_settings(std::move(settings_)) , settings_getter(settings_getter_) @@ -223,17 +224,18 @@ std::unique_ptr DiskS3::readFile(const String & path, co LOG_TEST(log, "Read from file by path: {}. Existing S3 objects: {}", backQuote(metadata_disk->getPath() + path), metadata.remote_fs_objects.size()); - bool threadpool_read = read_settings.remote_fs_method == RemoteFSReadMethod::threadpool; + ReadSettings disk_read_settings{read_settings}; + if (cache) + disk_read_settings.remote_fs_cache = cache; auto s3_impl = std::make_unique( - path, - settings->client, bucket, metadata, - settings->s3_max_single_read_retries, read_settings, threadpool_read); + path, settings->client, bucket, metadata, + settings->s3_max_single_read_retries, disk_read_settings); - if (threadpool_read) + if (read_settings.remote_fs_method == RemoteFSReadMethod::threadpool) { auto reader = getThreadPoolReader(); - return std::make_unique(reader, read_settings, std::move(s3_impl)); + return std::make_unique(reader, disk_read_settings, std::move(s3_impl)); } else { diff --git a/src/Disks/S3/DiskS3.h b/src/Disks/S3/DiskS3.h index 698fa6173c2..2de1600d906 100644 --- a/src/Disks/S3/DiskS3.h +++ b/src/Disks/S3/DiskS3.h @@ -17,6 +17,7 @@ #include #include #include +#include namespace DB @@ -73,6 +74,7 @@ public: String bucket_, String s3_root_path_, DiskPtr metadata_disk_, + FileCachePtr cache_, ContextPtr context_, SettingsPtr settings_, GetDiskSettings settings_getter_); diff --git a/src/Disks/S3/registerDiskS3.cpp b/src/Disks/S3/registerDiskS3.cpp index 9b2e7137d53..2b5fe3c5a81 100644 --- a/src/Disks/S3/registerDiskS3.cpp +++ b/src/Disks/S3/registerDiskS3.cpp @@ -19,6 +19,7 @@ #include "Disks/DiskRestartProxy.h" #include "Disks/DiskLocal.h" #include "Disks/RemoteDisksCommon.h" +#include namespace DB { @@ -178,18 +179,21 @@ void registerDiskS3(DiskFactory & factory) S3::URI uri(Poco::URI(config.getString(config_prefix + ".endpoint"))); if (uri.key.empty()) - throw Exception("Empty S3 path specified in disk configuration", ErrorCodes::BAD_ARGUMENTS); + throw Exception(ErrorCodes::BAD_ARGUMENTS, "No key in S3 uri: {}", uri.uri.toString()); if (uri.key.back() != '/') - throw Exception("S3 path must ends with '/', but '" + uri.key + "' doesn't.", ErrorCodes::BAD_ARGUMENTS); + throw Exception(ErrorCodes::BAD_ARGUMENTS, "S3 path must ends with '/', but '{}' doesn't.", uri.key); auto [metadata_path, metadata_disk] = prepareForLocalMetadata(name, config, config_prefix, context); + FileCachePtr cache = getCachePtrForDisk(name, config, config_prefix, context); + std::shared_ptr s3disk = std::make_shared( name, uri.bucket, uri.key, metadata_disk, + std::move(cache), context, getSettings(config, config_prefix, context), getSettings); diff --git a/src/Disks/TemporaryFileOnDisk.h b/src/Disks/TemporaryFileOnDisk.h index c854a600146..b82cb7d2254 100644 --- a/src/Disks/TemporaryFileOnDisk.h +++ b/src/Disks/TemporaryFileOnDisk.h @@ -15,7 +15,7 @@ using DiskPtr = std::shared_ptr; class TemporaryFileOnDisk { public: - TemporaryFileOnDisk(const DiskPtr & disk_, const String & prefix_ = "tmp"); + explicit TemporaryFileOnDisk(const DiskPtr & disk_, const String & prefix_ = "tmp"); ~TemporaryFileOnDisk(); DiskPtr getDisk() const { return disk; } diff --git a/src/Disks/tests/gtest_disk_encrypted.cpp b/src/Disks/tests/gtest_disk_encrypted.cpp index d03128a6b33..fd3cc1acbe5 100644 --- a/src/Disks/tests/gtest_disk_encrypted.cpp +++ b/src/Disks/tests/gtest_disk_encrypted.cpp @@ -6,7 +6,7 @@ #include #include #include -#include +#include #include #include diff --git a/src/Formats/CapnProtoUtils.h b/src/Formats/CapnProtoUtils.h index 51c152de17f..47fe3ada7cd 100644 --- a/src/Formats/CapnProtoUtils.h +++ b/src/Formats/CapnProtoUtils.h @@ -18,14 +18,14 @@ struct DestructorCatcher { T impl; template - DestructorCatcher(Arg && ... args) : impl(kj::fwd(args)...) {} + explicit DestructorCatcher(Arg && ... args) : impl(kj::fwd(args)...) {} ~DestructorCatcher() noexcept try { } catch (...) { return; } }; class CapnProtoSchemaParser : public DestructorCatcher { public: - CapnProtoSchemaParser() {} + CapnProtoSchemaParser() = default; capnp::StructSchema getMessageSchema(const FormatSchemaInfo & schema_info); }; diff --git a/src/Formats/FormatFactory.cpp b/src/Formats/FormatFactory.cpp index 3fea8d3eb7b..08554cf7e07 100644 --- a/src/Formats/FormatFactory.cpp +++ b/src/Formats/FormatFactory.cpp @@ -89,6 +89,7 @@ FormatSettings getFormatSettings(ContextPtr context, const Settings & settings) format_settings.json.quote_64bit_integers = settings.output_format_json_quote_64bit_integers; format_settings.json.quote_denormals = settings.output_format_json_quote_denormals; format_settings.null_as_default = settings.input_format_null_as_default; + format_settings.use_lowercase_column_name = settings.input_format_use_lowercase_column_name; format_settings.decimal_trailing_zeros = settings.output_format_decimal_trailing_zeros; format_settings.parquet.row_group_size = settings.output_format_parquet_row_group_size; format_settings.parquet.import_nested = settings.input_format_parquet_import_nested; diff --git a/src/Formats/FormatSettings.h b/src/Formats/FormatSettings.h index 751b3c51fa8..4881c1a43c8 100644 --- a/src/Formats/FormatSettings.h +++ b/src/Formats/FormatSettings.h @@ -32,6 +32,7 @@ struct FormatSettings bool null_as_default = true; bool decimal_trailing_zeros = false; bool defaults_for_omitted_fields = true; + bool use_lowercase_column_name = false; bool seekable_read = true; UInt64 max_rows_to_read_for_schema_inference = 100; diff --git a/src/Formats/JSONEachRowUtils.h b/src/Formats/JSONEachRowUtils.h index 6f71baa8b40..8d304e2ffd8 100644 --- a/src/Formats/JSONEachRowUtils.h +++ b/src/Formats/JSONEachRowUtils.h @@ -1,5 +1,7 @@ #pragma once +#include +#include #include #include #include diff --git a/src/Formats/MarkInCompressedFile.h b/src/Formats/MarkInCompressedFile.h index ceefde43615..1cd545e1a03 100644 --- a/src/Formats/MarkInCompressedFile.h +++ b/src/Formats/MarkInCompressedFile.h @@ -33,7 +33,7 @@ struct MarkInCompressedFile return "(" + DB::toString(offset_in_compressed_file) + "," + DB::toString(offset_in_decompressed_block) + ")"; } - String toStringWithRows(size_t rows_num) + String toStringWithRows(size_t rows_num) const { return "(" + DB::toString(offset_in_compressed_file) + "," + DB::toString(offset_in_decompressed_block) + "," + DB::toString(rows_num) + ")"; } @@ -43,7 +43,7 @@ struct MarkInCompressedFile class MarksInCompressedFile : public PODArray { public: - MarksInCompressedFile(size_t n) : PODArray(n) {} + explicit MarksInCompressedFile(size_t n) : PODArray(n) {} void read(ReadBuffer & buffer, size_t from, size_t count) { diff --git a/src/Formats/MsgPackExtensionTypes.h b/src/Formats/MsgPackExtensionTypes.h index 139d2f9047b..2f7d28eb5bf 100644 --- a/src/Formats/MsgPackExtensionTypes.h +++ b/src/Formats/MsgPackExtensionTypes.h @@ -5,7 +5,7 @@ namespace DB enum class MsgPackExtensionTypes { - UUID = 0x02, + UUIDType = 0x02, }; } diff --git a/src/Formats/ParsedTemplateFormatString.h b/src/Formats/ParsedTemplateFormatString.h index c5617d0f0ef..5d7ee820f2f 100644 --- a/src/Formats/ParsedTemplateFormatString.h +++ b/src/Formats/ParsedTemplateFormatString.h @@ -28,7 +28,7 @@ struct ParsedTemplateFormatString /// For diagnostic info Strings column_names; - typedef std::function(const String &)> ColumnIdxGetter; + using ColumnIdxGetter = std::function(const String &)>; ParsedTemplateFormatString() = default; ParsedTemplateFormatString(const FormatSchemaInfo & schema, const ColumnIdxGetter & idx_by_name, bool allow_indexes = true); diff --git a/src/Formats/ProtobufReader.h b/src/Formats/ProtobufReader.h index 0df139eeacd..2e2a71a7d11 100644 --- a/src/Formats/ProtobufReader.h +++ b/src/Formats/ProtobufReader.h @@ -16,7 +16,7 @@ class ReadBuffer; class ProtobufReader { public: - ProtobufReader(ReadBuffer & in_); + explicit ProtobufReader(ReadBuffer & in_); void startMessage(bool with_length_delimiter_); void endMessage(bool ignore_errors); diff --git a/src/Formats/ProtobufWriter.h b/src/Formats/ProtobufWriter.h index c564db110cc..1dcc8f4ef7c 100644 --- a/src/Formats/ProtobufWriter.h +++ b/src/Formats/ProtobufWriter.h @@ -16,7 +16,7 @@ class WriteBuffer; class ProtobufWriter { public: - ProtobufWriter(WriteBuffer & out_); + explicit ProtobufWriter(WriteBuffer & out_); ~ProtobufWriter(); void startMessage(); diff --git a/src/Formats/RowInputMissingColumnsFiller.h b/src/Formats/RowInputMissingColumnsFiller.h index 0eaefd4e814..9785d8bed62 100644 --- a/src/Formats/RowInputMissingColumnsFiller.h +++ b/src/Formats/RowInputMissingColumnsFiller.h @@ -14,7 +14,7 @@ class RowInputMissingColumnsFiller { public: /// Makes a column filler which checks nested structures while adding default values to columns. - RowInputMissingColumnsFiller(const NamesAndTypesList & names_and_types); + explicit RowInputMissingColumnsFiller(const NamesAndTypesList & names_and_types); RowInputMissingColumnsFiller(const Names & names, const DataTypes & types); RowInputMissingColumnsFiller(size_t count, const std::string_view * names, const DataTypePtr * types); diff --git a/src/Functions/CountSubstringsImpl.h b/src/Functions/CountSubstringsImpl.h index 6668ca0a392..fc6e4a0e671 100644 --- a/src/Functions/CountSubstringsImpl.h +++ b/src/Functions/CountSubstringsImpl.h @@ -83,7 +83,7 @@ struct CountSubstringsImpl { res = 0; - if (needle.size() == 0) + if (needle.empty()) return; auto start = std::max(start_pos, UInt64(1)); diff --git a/src/Functions/DivisionUtils.h b/src/Functions/DivisionUtils.h index 2e601888ecc..c246f7fd31a 100644 --- a/src/Functions/DivisionUtils.h +++ b/src/Functions/DivisionUtils.h @@ -6,6 +6,7 @@ #include #include +#include "config_core.h" #include diff --git a/src/Functions/DummyJSONParser.h b/src/Functions/DummyJSONParser.h index c14aacece86..77b958d1429 100644 --- a/src/Functions/DummyJSONParser.h +++ b/src/Functions/DummyJSONParser.h @@ -2,6 +2,8 @@ #include #include +#include + namespace DB { @@ -22,25 +24,25 @@ struct DummyJSONParser class Element { public: - Element() {} - bool isInt64() const { return false; } - bool isUInt64() const { return false; } - bool isDouble() const { return false; } - bool isString() const { return false; } - bool isArray() const { return false; } - bool isObject() const { return false; } - bool isBool() const { return false; } - bool isNull() const { return false; } + Element() = default; + static bool isInt64() { return false; } + static bool isUInt64() { return false; } + static bool isDouble() { return false; } + static bool isString() { return false; } + static bool isArray() { return false; } + static bool isObject() { return false; } + static bool isBool() { return false; } + static bool isNull() { return false; } - Int64 getInt64() const { return 0; } - UInt64 getUInt64() const { return 0; } - double getDouble() const { return 0; } - bool getBool() const { return false; } - std::string_view getString() const { return {}; } - Array getArray() const { return {}; } - Object getObject() const { return {}; } + static Int64 getInt64() { return 0; } + static UInt64 getUInt64() { return 0; } + static double getDouble() { return 0; } + static bool getBool() { return false; } + static std::string_view getString() { return {}; } + static Array getArray() { return {}; } + static Object getObject() { return {}; } - Element getElement() { return {}; } + static Element getElement() { return {}; } }; /// References an array in a JSON document. @@ -52,14 +54,14 @@ struct DummyJSONParser public: Element operator*() const { return {}; } Iterator & operator++() { return *this; } - Iterator operator++(int) { return *this; } + Iterator operator++(int) { return *this; } /// NOLINT friend bool operator==(const Iterator &, const Iterator &) { return true; } friend bool operator!=(const Iterator &, const Iterator &) { return false; } }; - Iterator begin() const { return {}; } - Iterator end() const { return {}; } - size_t size() const { return 0; } + static Iterator begin() { return {}; } + static Iterator end() { return {}; } + static size_t size() { return 0; } Element operator[](size_t) const { return {}; } }; @@ -74,15 +76,15 @@ struct DummyJSONParser public: KeyValuePair operator*() const { return {}; } Iterator & operator++() { return *this; } - Iterator operator++(int) { return *this; } + Iterator operator++(int) { return *this; } /// NOLINT friend bool operator==(const Iterator &, const Iterator &) { return true; } friend bool operator!=(const Iterator &, const Iterator &) { return false; } }; - Iterator begin() const { return {}; } - Iterator end() const { return {}; } - size_t size() const { return 0; } - bool find(const std::string_view &, Element &) const { return false; } + static Iterator begin() { return {}; } + static Iterator end() { return {}; } + static size_t size() { return 0; } + bool find(const std::string_view &, Element &) const { return false; } /// NOLINT #if 0 /// Optional: Provides access to an object's element by index. @@ -91,7 +93,7 @@ struct DummyJSONParser }; /// Parses a JSON document, returns the reference to its root element if succeeded. - bool parse(const std::string_view &, Element &) { throw Exception{"Functions JSON* are not supported", ErrorCodes::NOT_IMPLEMENTED}; } + bool parse(const std::string_view &, Element &) { throw Exception{"Functions JSON* are not supported", ErrorCodes::NOT_IMPLEMENTED}; } /// NOLINT #if 0 /// Optional: Allocates memory to parse JSON documents faster. diff --git a/src/Functions/EmptyImpl.h b/src/Functions/EmptyImpl.h index 60daa66ea03..6f5c4f7a7dc 100644 --- a/src/Functions/EmptyImpl.h +++ b/src/Functions/EmptyImpl.h @@ -2,6 +2,7 @@ #include #include +#include #include diff --git a/src/Functions/FunctionBitTestMany.h b/src/Functions/FunctionBitTestMany.h index 808c3711631..e49af4c166f 100644 --- a/src/Functions/FunctionBitTestMany.h +++ b/src/Functions/FunctionBitTestMany.h @@ -5,6 +5,7 @@ #include #include #include +#include #include diff --git a/src/Functions/FunctionCustomWeekToSomething.h b/src/Functions/FunctionCustomWeekToSomething.h index 542062151ce..6ed751fd889 100644 --- a/src/Functions/FunctionCustomWeekToSomething.h +++ b/src/Functions/FunctionCustomWeekToSomething.h @@ -7,6 +7,7 @@ #include #include #include +#include namespace DB diff --git a/src/Functions/FunctionSQLJSON.h b/src/Functions/FunctionSQLJSON.h index d860da62b9d..56d29e0c776 100644 --- a/src/Functions/FunctionSQLJSON.h +++ b/src/Functions/FunctionSQLJSON.h @@ -242,7 +242,7 @@ public: GeneratorJSONPath generator_json_path(query_ptr); Element current_element = root; VisitorStatus status; - Element res; + while ((status = generator_json_path.getNextItem(current_element)) != VisitorStatus::Exhausted) { if (status == VisitorStatus::Ok) diff --git a/src/Functions/FunctionSnowflake.h b/src/Functions/FunctionSnowflake.h index 1ba15433e94..f4a62e509ed 100644 --- a/src/Functions/FunctionSnowflake.h +++ b/src/Functions/FunctionSnowflake.h @@ -24,7 +24,7 @@ namespace ErrorCodes * https://blog.twitter.com/engineering/en_us/a/2010/announcing-snowflake * https://ws-dl.blogspot.com/2019/08/2019-08-03-tweetedat-finding-tweet.html */ -static constexpr long snowflake_epoch = 1288834974657L; +static constexpr size_t snowflake_epoch = 1288834974657L; static constexpr int time_shift = 22; class FunctionDateTimeToSnowflake : public IFunction @@ -33,7 +33,7 @@ private: const char * name; public: - FunctionDateTimeToSnowflake(const char * name_) : name(name_) { } + explicit FunctionDateTimeToSnowflake(const char * name_) : name(name_) { } String getName() const override { return name; } size_t getNumberOfArguments() const override { return 1; } @@ -74,7 +74,7 @@ private: const char * name; public: - FunctionSnowflakeToDateTime(const char * name_) : name(name_) { } + explicit FunctionSnowflakeToDateTime(const char * name_) : name(name_) { } String getName() const override { return name; } size_t getNumberOfArguments() const override { return 0; } @@ -84,7 +84,7 @@ public: DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override { - if (arguments.size() < 1 || arguments.size() > 2) + if (arguments.empty() || arguments.size() > 2) throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, "Function {} takes one or two arguments", name); if (!typeid_cast(arguments[0].type.get())) @@ -122,7 +122,7 @@ private: const char * name; public: - FunctionDateTime64ToSnowflake(const char * name_) : name(name_) { } + explicit FunctionDateTime64ToSnowflake(const char * name_) : name(name_) { } String getName() const override { return name; } size_t getNumberOfArguments() const override { return 1; } @@ -163,7 +163,7 @@ private: const char * name; public: - FunctionSnowflakeToDateTime64(const char * name_) : name(name_) { } + explicit FunctionSnowflakeToDateTime64(const char * name_) : name(name_) { } String getName() const override { return name; } size_t getNumberOfArguments() const override { return 0; } @@ -173,7 +173,7 @@ public: DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override { - if (arguments.size() < 1 || arguments.size() > 2) + if (arguments.empty() || arguments.size() > 2) throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, "Function {} takes one or two arguments", name); if (!typeid_cast(arguments[0].type.get())) diff --git a/src/Functions/FunctionStringOrArrayToT.h b/src/Functions/FunctionStringOrArrayToT.h index 3bf1f0a5d34..cda5da5c177 100644 --- a/src/Functions/FunctionStringOrArrayToT.h +++ b/src/Functions/FunctionStringOrArrayToT.h @@ -9,6 +9,7 @@ #include #include #include +#include namespace DB diff --git a/src/Functions/FunctionsBitmap.h b/src/Functions/FunctionsBitmap.h index 775a39f4d08..1e48588892a 100644 --- a/src/Functions/FunctionsBitmap.h +++ b/src/Functions/FunctionsBitmap.h @@ -421,7 +421,7 @@ private: for (size_t i = 0; i < input_rows_count; ++i) { - const AggregateDataPtr data_ptr_0 = is_column_const[0] ? (*container0)[0] : (*container0)[i]; + AggregateDataPtr data_ptr_0 = is_column_const[0] ? (*container0)[0] : (*container0)[i]; const AggregateFunctionGroupBitmapData & bitmap_data_0 = *reinterpret_cast*>(data_ptr_0); const UInt64 range_start = is_column_const[1] ? (*container1)[0] : (*container1)[i]; @@ -615,7 +615,7 @@ private: size_t to_end; for (size_t i = 0; i < input_rows_count; ++i) { - const AggregateDataPtr data_ptr_0 = is_column_const[0] ? (*container0)[0] : (*container0)[i]; + AggregateDataPtr data_ptr_0 = is_column_const[0] ? (*container0)[0] : (*container0)[i]; const AggregateFunctionGroupBitmapData & bitmap_data_0 = *reinterpret_cast *>(data_ptr_0); if (is_column_const[1]) @@ -923,7 +923,7 @@ private: for (size_t i = 0; i < input_rows_count; ++i) { - const AggregateDataPtr data_ptr_0 = is_column_const[0] ? (*container0)[0] : (*container0)[i]; + AggregateDataPtr data_ptr_0 = is_column_const[0] ? (*container0)[0] : (*container0)[i]; const UInt64 data1 = is_column_const[1] ? (*container1)[0] : (*container1)[i]; const AggregateFunctionGroupBitmapData & bitmap_data_0 = *reinterpret_cast *>(data_ptr_0); @@ -1030,8 +1030,8 @@ private: for (size_t i = 0; i < input_rows_count; ++i) { - const AggregateDataPtr data_ptr_0 = is_column_const[0] ? container0[0] : container0[i]; - const AggregateDataPtr data_ptr_1 = is_column_const[1] ? container1[0] : container1[i]; + AggregateDataPtr data_ptr_0 = is_column_const[0] ? container0[0] : container0[i]; + AggregateDataPtr data_ptr_1 = is_column_const[1] ? container1[0] : container1[i]; const AggregateFunctionGroupBitmapData & bitmap_data_1 = *reinterpret_cast *>(data_ptr_0); const AggregateFunctionGroupBitmapData & bitmap_data_2 @@ -1178,8 +1178,8 @@ private: for (size_t i = 0; i < input_rows_count; ++i) { - const AggregateDataPtr data_ptr_0 = is_column_const[0] ? container0[0] : container0[i]; - const AggregateDataPtr data_ptr_1 = is_column_const[1] ? container1[0] : container1[i]; + AggregateDataPtr data_ptr_0 = is_column_const[0] ? container0[0] : container0[i]; + AggregateDataPtr data_ptr_1 = is_column_const[1] ? container1[0] : container1[i]; // bitmapAnd(RoaringBitMap, SmallSet) is slower than bitmapAnd(SmallSet, RoaringBitMap), so we can exchange the position of two arguments for the speed auto * bm_1 = reinterpret_cast *>(data_ptr_0); diff --git a/src/Functions/FunctionsComparison.h b/src/Functions/FunctionsComparison.h index a0c7fc643d2..0d0195eb2d7 100644 --- a/src/Functions/FunctionsComparison.h +++ b/src/Functions/FunctionsComparison.h @@ -137,7 +137,7 @@ struct NumComparisonImpl template struct StringComparisonImpl { - static void NO_INLINE string_vector_string_vector( + static void NO_INLINE string_vector_string_vector( /// NOLINT const ColumnString::Chars & a_data, const ColumnString::Offsets & a_offsets, const ColumnString::Chars & b_data, const ColumnString::Offsets & b_offsets, PaddedPODArray & c) @@ -157,7 +157,7 @@ struct StringComparisonImpl } } - static void NO_INLINE string_vector_fixed_string_vector( + static void NO_INLINE string_vector_fixed_string_vector( /// NOLINT const ColumnString::Chars & a_data, const ColumnString::Offsets & a_offsets, const ColumnString::Chars & b_data, ColumnString::Offset b_n, PaddedPODArray & c) @@ -175,7 +175,7 @@ struct StringComparisonImpl } } - static void NO_INLINE string_vector_constant( + static void NO_INLINE string_vector_constant( /// NOLINT const ColumnString::Chars & a_data, const ColumnString::Offsets & a_offsets, const ColumnString::Chars & b_data, ColumnString::Offset b_size, PaddedPODArray & c) @@ -193,7 +193,7 @@ struct StringComparisonImpl } } - static void fixed_string_vector_string_vector( + static void fixed_string_vector_string_vector( /// NOLINT const ColumnString::Chars & a_data, ColumnString::Offset a_n, const ColumnString::Chars & b_data, const ColumnString::Offsets & b_offsets, PaddedPODArray & c) @@ -201,7 +201,7 @@ struct StringComparisonImpl StringComparisonImpl::string_vector_fixed_string_vector(b_data, b_offsets, a_data, a_n, c); } - static void NO_INLINE fixed_string_vector_fixed_string_vector_16( + static void NO_INLINE fixed_string_vector_fixed_string_vector_16( /// NOLINT const ColumnString::Chars & a_data, const ColumnString::Chars & b_data, PaddedPODArray & c) @@ -212,7 +212,7 @@ struct StringComparisonImpl c[j] = Op::apply(memcmp16(&a_data[i], &b_data[i]), 0); } - static void NO_INLINE fixed_string_vector_constant_16( + static void NO_INLINE fixed_string_vector_constant_16( /// NOLINT const ColumnString::Chars & a_data, const ColumnString::Chars & b_data, PaddedPODArray & c) @@ -223,7 +223,7 @@ struct StringComparisonImpl c[j] = Op::apply(memcmp16(&a_data[i], &b_data[0]), 0); } - static void NO_INLINE fixed_string_vector_fixed_string_vector( + static void NO_INLINE fixed_string_vector_fixed_string_vector( /// NOLINT const ColumnString::Chars & a_data, ColumnString::Offset a_n, const ColumnString::Chars & b_data, ColumnString::Offset b_n, PaddedPODArray & c) @@ -250,7 +250,7 @@ struct StringComparisonImpl } } - static void NO_INLINE fixed_string_vector_constant( + static void NO_INLINE fixed_string_vector_constant( /// NOLINT const ColumnString::Chars & a_data, ColumnString::Offset a_n, const ColumnString::Chars & b_data, ColumnString::Offset b_size, PaddedPODArray & c) @@ -273,7 +273,7 @@ struct StringComparisonImpl } } - static void constant_string_vector( + static void constant_string_vector( /// NOLINT const ColumnString::Chars & a_data, ColumnString::Offset a_size, const ColumnString::Chars & b_data, const ColumnString::Offsets & b_offsets, PaddedPODArray & c) @@ -281,7 +281,7 @@ struct StringComparisonImpl StringComparisonImpl::string_vector_constant(b_data, b_offsets, a_data, a_size, c); } - static void constant_fixed_string_vector( + static void constant_fixed_string_vector( /// NOLINT const ColumnString::Chars & a_data, ColumnString::Offset a_size, const ColumnString::Chars & b_data, ColumnString::Offset b_n, PaddedPODArray & c) @@ -295,7 +295,7 @@ struct StringComparisonImpl template struct StringEqualsImpl { - static void NO_INLINE string_vector_string_vector( + static void NO_INLINE string_vector_string_vector( /// NOLINT const ColumnString::Chars & a_data, const ColumnString::Offsets & a_offsets, const ColumnString::Chars & b_data, const ColumnString::Offsets & b_offsets, PaddedPODArray & c) @@ -318,7 +318,7 @@ struct StringEqualsImpl } } - static void NO_INLINE string_vector_fixed_string_vector( + static void NO_INLINE string_vector_fixed_string_vector( /// NOLINT const ColumnString::Chars & a_data, const ColumnString::Offsets & a_offsets, const ColumnString::Chars & b_data, ColumnString::Offset b_n, PaddedPODArray & c) @@ -338,7 +338,7 @@ struct StringEqualsImpl } } - static void NO_INLINE string_vector_constant( + static void NO_INLINE string_vector_constant( /// NOLINT const ColumnString::Chars & a_data, const ColumnString::Offsets & a_offsets, const ColumnString::Chars & b_data, ColumnString::Offset b_size, PaddedPODArray & c) @@ -358,7 +358,7 @@ struct StringEqualsImpl } } - static void NO_INLINE fixed_string_vector_fixed_string_vector_16( + static void NO_INLINE fixed_string_vector_fixed_string_vector_16( /// NOLINT const ColumnString::Chars & a_data, const ColumnString::Chars & b_data, PaddedPODArray & c) @@ -371,7 +371,7 @@ struct StringEqualsImpl b_data.data() + i * 16); } - static void NO_INLINE fixed_string_vector_constant_16( + static void NO_INLINE fixed_string_vector_constant_16( /// NOLINT const ColumnString::Chars & a_data, const ColumnString::Chars & b_data, PaddedPODArray & c) @@ -384,7 +384,7 @@ struct StringEqualsImpl b_data.data()); } - static void NO_INLINE fixed_string_vector_fixed_string_vector( + static void NO_INLINE fixed_string_vector_fixed_string_vector( /// NOLINT const ColumnString::Chars & a_data, ColumnString::Offset a_n, const ColumnString::Chars & b_data, ColumnString::Offset b_n, PaddedPODArray & c) @@ -410,7 +410,7 @@ struct StringEqualsImpl } } - static void NO_INLINE fixed_string_vector_constant( + static void NO_INLINE fixed_string_vector_constant( /// NOLINT const ColumnString::Chars & a_data, ColumnString::Offset a_n, const ColumnString::Chars & b_data, ColumnString::Offset b_size, PaddedPODArray & c) @@ -427,7 +427,7 @@ struct StringEqualsImpl } } - static void fixed_string_vector_string_vector( + static void fixed_string_vector_string_vector( /// NOLINT const ColumnString::Chars & a_data, ColumnString::Offset a_n, const ColumnString::Chars & b_data, const ColumnString::Offsets & b_offsets, PaddedPODArray & c) @@ -435,7 +435,7 @@ struct StringEqualsImpl string_vector_fixed_string_vector(b_data, b_offsets, a_data, a_n, c); } - static void constant_string_vector( + static void constant_string_vector( /// NOLINT const ColumnString::Chars & a_data, ColumnString::Offset a_size, const ColumnString::Chars & b_data, const ColumnString::Offsets & b_offsets, PaddedPODArray & c) @@ -443,7 +443,7 @@ struct StringEqualsImpl string_vector_constant(b_data, b_offsets, a_data, a_size, c); } - static void constant_fixed_string_vector( + static void constant_fixed_string_vector( /// NOLINT const ColumnString::Chars & a_data, ColumnString::Offset a_size, const ColumnString::Chars & b_data, ColumnString::Offset b_n, PaddedPODArray & c) diff --git a/src/Functions/FunctionsConversion.h b/src/Functions/FunctionsConversion.h index f75d67032f2..5e11cab7e79 100644 --- a/src/Functions/FunctionsConversion.h +++ b/src/Functions/FunctionsConversion.h @@ -542,7 +542,7 @@ struct ToDateTime64TransformUnsigned const DateTime64::NativeType scale_multiplier = 1; - ToDateTime64TransformUnsigned(UInt32 scale = 0) + ToDateTime64TransformUnsigned(UInt32 scale = 0) /// NOLINT : scale_multiplier(DecimalUtils::scaleMultiplier(scale)) {} @@ -559,7 +559,7 @@ struct ToDateTime64TransformSigned const DateTime64::NativeType scale_multiplier = 1; - ToDateTime64TransformSigned(UInt32 scale = 0) + ToDateTime64TransformSigned(UInt32 scale = 0) /// NOLINT : scale_multiplier(DecimalUtils::scaleMultiplier(scale)) {} @@ -577,7 +577,7 @@ struct ToDateTime64TransformFloat const UInt32 scale = 1; - ToDateTime64TransformFloat(UInt32 scale_ = 0) + ToDateTime64TransformFloat(UInt32 scale_ = 0) /// NOLINT : scale(scale_) {} @@ -615,7 +615,7 @@ struct FromDateTime64Transform const DateTime64::NativeType scale_multiplier = 1; - FromDateTime64Transform(UInt32 scale) + FromDateTime64Transform(UInt32 scale) /// NOLINT : scale_multiplier(DecimalUtils::scaleMultiplier(scale)) {} @@ -639,7 +639,7 @@ struct ToDateTime64Transform const DateTime64::NativeType scale_multiplier = 1; - ToDateTime64Transform(UInt32 scale = 0) + ToDateTime64Transform(UInt32 scale = 0) /// NOLINT : scale_multiplier(DecimalUtils::scaleMultiplier(scale)) {} diff --git a/src/Functions/FunctionsEmbeddedDictionaries.h b/src/Functions/FunctionsEmbeddedDictionaries.h index 0f75750354a..c6ea886b4a8 100644 --- a/src/Functions/FunctionsEmbeddedDictionaries.h +++ b/src/Functions/FunctionsEmbeddedDictionaries.h @@ -593,7 +593,7 @@ public: size_t getNumberOfArguments() const override { return 0; } /// For the purpose of query optimization, we assume this function to be injective - /// even in face of fact that there are many different cities named Moscow. + /// even in face of fact that there are many different cities named Paris. bool isInjective(const ColumnsWithTypeAndName &) const override { return true; } bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return true; } diff --git a/src/Functions/FunctionsExternalDictionaries.h b/src/Functions/FunctionsExternalDictionaries.h index 7e26de574aa..6a701d7b864 100644 --- a/src/Functions/FunctionsExternalDictionaries.h +++ b/src/Functions/FunctionsExternalDictionaries.h @@ -897,7 +897,9 @@ private: result = std::move(dictionary_get_result_column); } else - result = ColumnNullable::create(std::move(dictionary_get_result_column), std::move(is_key_in_dictionary_column_mutable)); + { + result = ColumnNullable::create(dictionary_get_result_column, std::move(is_key_in_dictionary_column_mutable)); + } } return result; diff --git a/src/Functions/FunctionsLogical.h b/src/Functions/FunctionsLogical.h index 7d4f5489e86..140981faf9f 100644 --- a/src/Functions/FunctionsLogical.h +++ b/src/Functions/FunctionsLogical.h @@ -7,6 +7,7 @@ #include #include #include +#include #if USE_EMBEDDED_COMPILER @@ -147,7 +148,6 @@ public: static constexpr auto name = Name::name; static FunctionPtr create(ContextPtr) { return std::make_shared(); } -public: String getName() const override { return name; @@ -189,7 +189,7 @@ public: result = Impl::apply(b, result, nativeBoolCast(b, types[i], values[i])); return b.CreateSelect(result, b.getInt8(1), b.getInt8(0)); } - constexpr bool breakOnTrue = Impl::isSaturatedValue(true); + constexpr bool break_on_true = Impl::isSaturatedValue(true); auto * next = b.GetInsertBlock(); auto * stop = llvm::BasicBlock::Create(next->getContext(), "", next->getParent()); b.SetInsertPoint(stop); @@ -205,7 +205,7 @@ public: if (i + 1 < types.size()) { next = llvm::BasicBlock::Create(next->getContext(), "", next->getParent()); - b.CreateCondBr(truth, breakOnTrue ? stop : next, breakOnTrue ? next : stop); + b.CreateCondBr(truth, break_on_true ? stop : next, break_on_true ? next : stop); } } b.CreateBr(stop); @@ -223,7 +223,6 @@ public: static constexpr auto name = Name::name; static FunctionPtr create(ContextPtr) { return std::make_shared(); } -public: String getName() const override { return name; diff --git a/src/Functions/FunctionsStringArray.h b/src/Functions/FunctionsStringArray.h index b1de017120c..a1256598f1b 100644 --- a/src/Functions/FunctionsStringArray.h +++ b/src/Functions/FunctionsStringArray.h @@ -93,7 +93,7 @@ public: } /// Returns the position of the argument, that is the column of strings - size_t getStringsArgumentPosition() + static size_t getStringsArgumentPosition() { return 0; } @@ -152,7 +152,7 @@ public: } /// Returns the position of the argument, that is the column of strings - size_t getStringsArgumentPosition() + static size_t getStringsArgumentPosition() { return 0; } @@ -211,7 +211,7 @@ public: } /// Returns the position of the argument, that is the column of strings - size_t getStringsArgumentPosition() + static size_t getStringsArgumentPosition() { return 0; } @@ -328,7 +328,7 @@ public: } /// Returns the position of the argument, that is the column of strings - size_t getStringsArgumentPosition() + static size_t getStringsArgumentPosition() { return 1; } @@ -399,7 +399,7 @@ public: } /// Returns the position of the argument that is the column of strings - size_t getStringsArgumentPosition() + static size_t getStringsArgumentPosition() { return 1; } @@ -482,7 +482,7 @@ public: } /// Returns the position of the argument that is the column of strings - size_t getStringsArgumentPosition() + static size_t getStringsArgumentPosition() { return 1; } @@ -567,7 +567,7 @@ public: } /// Returns the position of the argument that is the column of strings - size_t getStringsArgumentPosition() + static size_t getStringsArgumentPosition() { return 0; } diff --git a/src/Functions/FunctionsTonalityClassification.cpp b/src/Functions/FunctionsTonalityClassification.cpp index 5dbd6d0356d..2d8e47b9bcb 100644 --- a/src/Functions/FunctionsTonalityClassification.cpp +++ b/src/Functions/FunctionsTonalityClassification.cpp @@ -24,7 +24,7 @@ struct FunctionDetectTonalityImpl UInt64 count_words = 0; String word; - /// Select all Russian words from the string + /// Select all words from the string for (size_t ind = 0; ind < str_len; ++ind) { /// Split words by whitespaces and punctuation signs @@ -36,7 +36,7 @@ struct FunctionDetectTonalityImpl word.push_back(str[ind]); ++ind; } - /// Try to find a russian word in the tonality dictionary + /// Try to find a word in the tonality dictionary const auto * it = emotional_dict.find(word); if (it != emotional_dict.end()) { diff --git a/src/Functions/GatherUtils/Algorithms.h b/src/Functions/GatherUtils/Algorithms.h index 2d4544b2167..d08248e71fc 100644 --- a/src/Functions/GatherUtils/Algorithms.h +++ b/src/Functions/GatherUtils/Algorithms.h @@ -203,7 +203,7 @@ void concat(const std::vector> & array_sources, Si size_t sources_num = array_sources.size(); std::vector is_const(sources_num); - auto checkAndGetSizeToReserve = [] (auto source, IArraySource * array_source) + auto check_and_get_size_to_reserve = [] (auto source, IArraySource * array_source) { if (source == nullptr) throw Exception("Concat function expected " + demangle(typeid(Source).name()) + " or " @@ -215,17 +215,17 @@ void concat(const std::vector> & array_sources, Si size_t size_to_reserve = 0; for (auto i : collections::range(0, sources_num)) { - auto & source = array_sources[i]; + const auto & source = array_sources[i]; is_const[i] = source->isConst(); if (is_const[i]) - size_to_reserve += checkAndGetSizeToReserve(typeid_cast *>(source.get()), source.get()); + size_to_reserve += check_and_get_size_to_reserve(typeid_cast *>(source.get()), source.get()); else - size_to_reserve += checkAndGetSizeToReserve(typeid_cast(source.get()), source.get()); + size_to_reserve += check_and_get_size_to_reserve(typeid_cast(source.get()), source.get()); } sink.reserve(size_to_reserve); - auto writeNext = [& sink] (auto source) + auto write_next = [& sink] (auto source) { writeSlice(source->getWhole(), sink); source->next(); @@ -235,11 +235,11 @@ void concat(const std::vector> & array_sources, Si { for (auto i : collections::range(0, sources_num)) { - auto & source = array_sources[i]; + const auto & source = array_sources[i]; if (is_const[i]) - writeNext(static_cast *>(source.get())); + write_next(static_cast *>(source.get())); else - writeNext(static_cast(source.get())); + write_next(static_cast(source.get())); } sink.next(); } @@ -576,31 +576,31 @@ bool sliceHasImplSubstr(const FirstSliceType & first, const SecondSliceType & se [](const SecondSliceType & pattern, size_t i, size_t j) { return isEqualUnary(pattern, i, j); }); } - size_t firstCur = 0; - size_t secondCur = 0; - while (firstCur < first.size && secondCur < second.size) + size_t first_cur = 0; + size_t second_cur = 0; + while (first_cur < first.size && second_cur < second.size) { - const bool is_first_null = has_first_null_map && first_null_map[firstCur]; - const bool is_second_null = has_second_null_map && second_null_map[secondCur]; + const bool is_first_null = has_first_null_map && first_null_map[first_cur]; + const bool is_second_null = has_second_null_map && second_null_map[second_cur]; const bool cond_both_null_match = is_first_null && is_second_null; const bool cond_both_not_null = !is_first_null && !is_second_null; - if (cond_both_null_match || (cond_both_not_null && isEqual(first, second, firstCur, secondCur))) + if (cond_both_null_match || (cond_both_not_null && isEqual(first, second, first_cur, second_cur))) { - ++firstCur; - ++secondCur; + ++first_cur; + ++second_cur; } - else if (secondCur > 0) + else if (second_cur > 0) { - secondCur = prefix_function[secondCur - 1]; + second_cur = prefix_function[second_cur - 1]; } else { - ++firstCur; + ++first_cur; } } - return secondCur == second.size; + return second_cur == second.size; } diff --git a/src/Functions/GatherUtils/Selectors.h b/src/Functions/GatherUtils/Selectors.h index bbe631a6a3a..5793701e93a 100644 --- a/src/Functions/GatherUtils/Selectors.h +++ b/src/Functions/GatherUtils/Selectors.h @@ -131,7 +131,7 @@ struct ArrayAndValueSourceSelectorBySink : public ArraySinkSelector).name()) + " but got " + demangle(typeid(*source_ptr).name()), ErrorCodes::LOGICAL_ERROR); }; - auto checkTypeAndCallConcat = [& sink, & checkType, & args ...] (auto array_source_ptr, auto value_source_ptr) + auto check_type_and_call_concat = [& sink, & check_type, & args ...] (auto array_source_ptr, auto value_source_ptr) { - checkType(array_source_ptr); - checkType(value_source_ptr); + check_type(array_source_ptr); + check_type(value_source_ptr); Base::selectArrayAndValueSourceBySink(*array_source_ptr, *value_source_ptr, sink, args ...); }; if (array_source.isConst() && value_source.isConst()) - checkTypeAndCallConcat(typeid_cast *>(&array_source), + check_type_and_call_concat(typeid_cast *>(&array_source), typeid_cast *>(&value_source)); else if (array_source.isConst()) - checkTypeAndCallConcat(typeid_cast *>(&array_source), + check_type_and_call_concat(typeid_cast *>(&array_source), typeid_cast(&value_source)); else if (value_source.isConst()) - checkTypeAndCallConcat(typeid_cast(&array_source), + check_type_and_call_concat(typeid_cast(&array_source), typeid_cast *>(&value_source)); else - checkTypeAndCallConcat(typeid_cast(&array_source), + check_type_and_call_concat(typeid_cast(&array_source), typeid_cast(&value_source)); } }; diff --git a/src/Functions/GatherUtils/Slices.h b/src/Functions/GatherUtils/Slices.h index 7951178497a..22f475adf59 100644 --- a/src/Functions/GatherUtils/Slices.h +++ b/src/Functions/GatherUtils/Slices.h @@ -26,7 +26,7 @@ struct NullableSlice : public Slice const UInt8 * null_map = nullptr; NullableSlice() = default; - NullableSlice(const Slice & base) : Slice(base) {} + NullableSlice(const Slice & base) : Slice(base) {} /// NOLINT }; template diff --git a/src/Functions/GatherUtils/Sources.h b/src/Functions/GatherUtils/Sources.h index 7d1241be7d1..13e3de99552 100644 --- a/src/Functions/GatherUtils/Sources.h +++ b/src/Functions/GatherUtils/Sources.h @@ -184,7 +184,7 @@ struct ConstSource : public Base virtual void accept(ArraySourceVisitor & visitor) // override { - if constexpr (std::is_base_of::value) + if constexpr (std::is_base_of_v) visitor.visit(*this); else throw Exception( @@ -194,7 +194,7 @@ struct ConstSource : public Base virtual void accept(ValueSourceVisitor & visitor) // override { - if constexpr (std::is_base_of::value) + if constexpr (std::is_base_of_v) visitor.visit(*this); else throw Exception( diff --git a/src/Functions/GeoHash.h b/src/Functions/GeoHash.h index d97eda31cef..071bc5072a4 100644 --- a/src/Functions/GeoHash.h +++ b/src/Functions/GeoHash.h @@ -37,8 +37,8 @@ struct GeohashesInBoxPreparedArgs }; GeohashesInBoxPreparedArgs geohashesInBoxPrepare( - const Float64 longitude_min, - const Float64 latitude_min, + Float64 longitude_min, + Float64 latitude_min, Float64 longitude_max, Float64 latitude_max, uint8_t precision); diff --git a/src/Functions/GregorianDate.h b/src/Functions/GregorianDate.h index b44b6c0dd13..ef2b9e6eede 100644 --- a/src/Functions/GregorianDate.h +++ b/src/Functions/GregorianDate.h @@ -32,13 +32,13 @@ namespace DB /** Construct from date in text form 'YYYY-MM-DD' by reading from * ReadBuffer. */ - GregorianDate(ReadBuffer & in); + explicit GregorianDate(ReadBuffer & in); /** Construct from Modified Julian Day. The type T is an * integral type which should be at least 32 bits wide, and * should preferably signed. */ - GregorianDate(is_integer auto mjd); + explicit GregorianDate(is_integer auto mjd); /** Convert to Modified Julian Day. The type T is an integral type * which should be at least 32 bits wide, and should preferably @@ -65,15 +65,15 @@ namespace DB return month_; } - uint8_t day_of_month() const noexcept + uint8_t day_of_month() const noexcept /// NOLINT { return day_of_month_; } private: - YearT year_; - uint8_t month_; - uint8_t day_of_month_; + YearT year_; /// NOLINT + uint8_t month_; /// NOLINT + uint8_t day_of_month_; /// NOLINT }; /** ISO 8601 Ordinal Date. YearT is an integral type which should @@ -89,7 +89,7 @@ namespace DB * integral type which should be at least 32 bits wide, and * should preferably signed. */ - OrdinalDate(is_integer auto mjd); + explicit OrdinalDate(is_integer auto mjd); /** Convert to Modified Julian Day. The type T is an integral * type which should be at least 32 bits wide, and should @@ -109,8 +109,8 @@ namespace DB } private: - YearT year_; - uint16_t day_of_year_; + YearT year_; /// NOLINT + uint16_t day_of_year_; /// NOLINT }; class MonthDay @@ -134,14 +134,14 @@ namespace DB return month_; } - uint8_t day_of_month() const noexcept + uint8_t day_of_month() const noexcept /// NOLINT { return day_of_month_; } private: - uint8_t month_; - uint8_t day_of_month_; + uint8_t month_; /// NOLINT + uint8_t day_of_month_; /// NOLINT }; } @@ -183,13 +183,13 @@ namespace gd template static inline constexpr I div(I x, J y) { - const auto y_ = static_cast(y); - if (x > 0 && y_ < 0) - return ((x - 1) / y_) - 1; - else if (x < 0 && y_ > 0) - return ((x + 1) / y_) - 1; + const auto y_cast = static_cast(y); + if (x > 0 && y_cast < 0) + return ((x - 1) / y_cast) - 1; + else if (x < 0 && y_cast > 0) + return ((x + 1) / y_cast) - 1; else - return x / y_; + return x / y_cast; } /** Integer modulus, satisfying div(x, y)*y + mod(x, y) == x. @@ -197,10 +197,10 @@ namespace gd template static inline constexpr I mod(I x, J y) { - const auto y_ = static_cast(y); - const auto r = x % y_; - if ((x > 0 && y_ < 0) || (x < 0 && y_ > 0)) - return r == 0 ? static_cast(0) : r + y_; + const auto y_cast = static_cast(y); + const auto r = x % y_cast; + if ((x > 0 && y_cast < 0) || (x < 0 && y_cast > 0)) + return r == 0 ? static_cast(0) : r + y_cast; else return r; } @@ -210,8 +210,8 @@ namespace gd template static inline constexpr I min(I x, J y) { - const auto y_ = static_cast(y); - return x < y_ ? x : y_; + const auto y_cast = static_cast(y); + return x < y_cast ? x : y_cast; } static inline char readDigit(ReadBuffer & in) diff --git a/src/Functions/IFunction.h b/src/Functions/IFunction.h index 71af6149774..7b272fef53d 100644 --- a/src/Functions/IFunction.h +++ b/src/Functions/IFunction.h @@ -120,7 +120,7 @@ public: virtual ~IFunctionBase() = default; - virtual ColumnPtr execute( + virtual ColumnPtr execute( /// NOLINT const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t input_rows_count, bool dry_run = false) const { return prepare(arguments)->execute(arguments, result_type, input_rows_count, dry_run); diff --git a/src/Functions/ITupleFunction.h b/src/Functions/ITupleFunction.h index 836e5d273fc..0dbbb81aab9 100644 --- a/src/Functions/ITupleFunction.h +++ b/src/Functions/ITupleFunction.h @@ -1,6 +1,11 @@ #pragma once +#include +#include #include +#include +#include + namespace DB { diff --git a/src/Functions/JSONPath/ASTs/ASTJSONPathMemberAccess.h b/src/Functions/JSONPath/ASTs/ASTJSONPathMemberAccess.h index 2c9482b665e..3a5e121b989 100644 --- a/src/Functions/JSONPath/ASTs/ASTJSONPathMemberAccess.h +++ b/src/Functions/JSONPath/ASTs/ASTJSONPathMemberAccess.h @@ -11,7 +11,6 @@ public: ASTPtr clone() const override { return std::make_shared(*this); } -public: /// Member name to lookup in json document (in path: $.some_key.another_key. ...) String member_name; }; diff --git a/src/Functions/JSONPath/ASTs/ASTJSONPathRange.h b/src/Functions/JSONPath/ASTs/ASTJSONPathRange.h index 746c6211f29..083d4b8e3ab 100644 --- a/src/Functions/JSONPath/ASTs/ASTJSONPathRange.h +++ b/src/Functions/JSONPath/ASTs/ASTJSONPathRange.h @@ -12,7 +12,6 @@ public: ASTPtr clone() const override { return std::make_shared(*this); } -public: /// Ranges to lookup in json array ($[0, 1, 2, 4 to 9]) /// Range is represented as /// Single index is represented as diff --git a/src/Functions/JSONPath/Generator/GeneratorJSONPath.h b/src/Functions/JSONPath/Generator/GeneratorJSONPath.h index 291150f6df4..fe00f06bbbf 100644 --- a/src/Functions/JSONPath/Generator/GeneratorJSONPath.h +++ b/src/Functions/JSONPath/Generator/GeneratorJSONPath.h @@ -25,7 +25,7 @@ public: * Traverses children ASTs of ASTJSONPathQuery and creates a vector of corresponding visitors * @param query_ptr_ pointer to ASTJSONPathQuery */ - GeneratorJSONPath(ASTPtr query_ptr_) + explicit GeneratorJSONPath(ASTPtr query_ptr_) { query_ptr = query_ptr_; const auto * path = query_ptr->as(); diff --git a/src/Functions/JSONPath/Generator/VisitorJSONPathMemberAccess.h b/src/Functions/JSONPath/Generator/VisitorJSONPathMemberAccess.h index 5fe35e75a84..8446e1ff3be 100644 --- a/src/Functions/JSONPath/Generator/VisitorJSONPathMemberAccess.h +++ b/src/Functions/JSONPath/Generator/VisitorJSONPathMemberAccess.h @@ -10,7 +10,7 @@ template class VisitorJSONPathMemberAccess : public IVisitor { public: - VisitorJSONPathMemberAccess(ASTPtr member_access_ptr_) + explicit VisitorJSONPathMemberAccess(ASTPtr member_access_ptr_) : member_access_ptr(member_access_ptr_->as()) { } const char * getName() const override { return "VisitorJSONPathMemberAccess"; } diff --git a/src/Functions/JSONPath/Generator/VisitorJSONPathRange.h b/src/Functions/JSONPath/Generator/VisitorJSONPathRange.h index 40d4f6ad95e..708a71f7cf4 100644 --- a/src/Functions/JSONPath/Generator/VisitorJSONPathRange.h +++ b/src/Functions/JSONPath/Generator/VisitorJSONPathRange.h @@ -10,7 +10,7 @@ template class VisitorJSONPathRange : public IVisitor { public: - VisitorJSONPathRange(ASTPtr range_ptr_) : range_ptr(range_ptr_->as()) + explicit VisitorJSONPathRange(ASTPtr range_ptr_) : range_ptr(range_ptr_->as()) { current_range = 0; current_index = range_ptr->ranges[current_range].first; @@ -20,7 +20,6 @@ public: VisitorStatus apply(typename JSONParser::Element & element) const override { - typename JSONParser::Element result; typename JSONParser::Array array = element.getArray(); element = array[current_index]; return VisitorStatus::Ok; diff --git a/src/Functions/JSONPath/Generator/VisitorJSONPathRoot.h b/src/Functions/JSONPath/Generator/VisitorJSONPathRoot.h index 5c48c12782f..71569d3c0a0 100644 --- a/src/Functions/JSONPath/Generator/VisitorJSONPathRoot.h +++ b/src/Functions/JSONPath/Generator/VisitorJSONPathRoot.h @@ -10,7 +10,7 @@ template class VisitorJSONPathRoot : public IVisitor { public: - VisitorJSONPathRoot(ASTPtr) { } + explicit VisitorJSONPathRoot(ASTPtr) { } const char * getName() const override { return "VisitorJSONPathRoot"; } diff --git a/src/Functions/JSONPath/Generator/VisitorJSONPathStar.h b/src/Functions/JSONPath/Generator/VisitorJSONPathStar.h index 4a54a76c199..0c297f64316 100644 --- a/src/Functions/JSONPath/Generator/VisitorJSONPathStar.h +++ b/src/Functions/JSONPath/Generator/VisitorJSONPathStar.h @@ -10,7 +10,7 @@ template class VisitorJSONPathStar : public IVisitor { public: - VisitorJSONPathStar(ASTPtr) + explicit VisitorJSONPathStar(ASTPtr) { current_index = 0; } @@ -19,7 +19,6 @@ public: VisitorStatus apply(typename JSONParser::Element & element) const override { - typename JSONParser::Element result; typename JSONParser::Array array = element.getArray(); element = array[current_index]; return VisitorStatus::Ok; diff --git a/src/Functions/LeftRight.h b/src/Functions/LeftRight.h index 054e76b7792..a82182a52e7 100644 --- a/src/Functions/LeftRight.h +++ b/src/Functions/LeftRight.h @@ -12,6 +12,7 @@ #include #include #include +#include namespace DB diff --git a/src/Functions/LowerUpperImpl.h b/src/Functions/LowerUpperImpl.h index cf614850e66..a7c38a7f904 100644 --- a/src/Functions/LowerUpperImpl.h +++ b/src/Functions/LowerUpperImpl.h @@ -31,7 +31,7 @@ private: #ifdef __SSE2__ const auto bytes_sse = sizeof(__m128i); - const auto src_end_sse = src_end - (src_end - src) % bytes_sse; + const auto * src_end_sse = src_end - (src_end - src) % bytes_sse; const auto v_not_case_lower_bound = _mm_set1_epi8(not_case_lower_bound - 1); const auto v_not_case_upper_bound = _mm_set1_epi8(not_case_upper_bound + 1); diff --git a/src/Functions/LowerUpperUTF8Impl.h b/src/Functions/LowerUpperUTF8Impl.h index 4c155034b3d..a7475870dab 100644 --- a/src/Functions/LowerUpperUTF8Impl.h +++ b/src/Functions/LowerUpperUTF8Impl.h @@ -16,61 +16,58 @@ namespace ErrorCodes extern const int BAD_ARGUMENTS; } -namespace +/// xor or do nothing +template +UInt8 xor_or_identity(const UInt8 c, const int mask) { - /// xor or do nothing - template - UInt8 xor_or_identity(const UInt8 c, const int mask) - { - return c ^ mask; - } + return c ^ mask; +} - template <> - inline UInt8 xor_or_identity(const UInt8 c, const int) - { - return c; - } +template <> +inline UInt8 xor_or_identity(const UInt8 c, const int) +{ + return c; +} - /// It is caller's responsibility to ensure the presence of a valid cyrillic sequence in array - template - inline void UTF8CyrillicToCase(const UInt8 *& src, UInt8 *& dst) +/// It is caller's responsibility to ensure the presence of a valid cyrillic sequence in array +template +inline void UTF8CyrillicToCase(const UInt8 *& src, UInt8 *& dst) +{ + if (src[0] == 0xD0u && (src[1] >= 0x80u && src[1] <= 0x8Fu)) { - if (src[0] == 0xD0u && (src[1] >= 0x80u && src[1] <= 0x8Fu)) - { - /// ЀЁЂЃЄЅІЇЈЉЊЋЌЍЎЏ - *dst++ = xor_or_identity(*src++, 0x1); - *dst++ = xor_or_identity(*src++, 0x10); - } - else if (src[0] == 0xD1u && (src[1] >= 0x90u && src[1] <= 0x9Fu)) - { - /// ѐёђѓєѕіїјљњћќѝўџ - *dst++ = xor_or_identity(*src++, 0x1); - *dst++ = xor_or_identity(*src++, 0x10); - } - else if (src[0] == 0xD0u && (src[1] >= 0x90u && src[1] <= 0x9Fu)) - { - /// А-П - *dst++ = *src++; - *dst++ = xor_or_identity(*src++, 0x20); - } - else if (src[0] == 0xD0u && (src[1] >= 0xB0u && src[1] <= 0xBFu)) - { - /// а-п - *dst++ = *src++; - *dst++ = xor_or_identity(*src++, 0x20); - } - else if (src[0] == 0xD0u && (src[1] >= 0xA0u && src[1] <= 0xAFu)) - { - /// Р-Я - *dst++ = xor_or_identity(*src++, 0x1); - *dst++ = xor_or_identity(*src++, 0x20); - } - else if (src[0] == 0xD1u && (src[1] >= 0x80u && src[1] <= 0x8Fu)) - { - /// р-я - *dst++ = xor_or_identity(*src++, 0x1); - *dst++ = xor_or_identity(*src++, 0x20); - } + /// ЀЁЂЃЄЅІЇЈЉЊЋЌЍЎЏ + *dst++ = xor_or_identity(*src++, 0x1); + *dst++ = xor_or_identity(*src++, 0x10); + } + else if (src[0] == 0xD1u && (src[1] >= 0x90u && src[1] <= 0x9Fu)) + { + /// ѐёђѓєѕіїјљњћќѝўџ + *dst++ = xor_or_identity(*src++, 0x1); + *dst++ = xor_or_identity(*src++, 0x10); + } + else if (src[0] == 0xD0u && (src[1] >= 0x90u && src[1] <= 0x9Fu)) + { + /// А-П + *dst++ = *src++; + *dst++ = xor_or_identity(*src++, 0x20); + } + else if (src[0] == 0xD0u && (src[1] >= 0xB0u && src[1] <= 0xBFu)) + { + /// а-п + *dst++ = *src++; + *dst++ = xor_or_identity(*src++, 0x20); + } + else if (src[0] == 0xD0u && (src[1] >= 0xA0u && src[1] <= 0xAFu)) + { + /// Р-Я + *dst++ = xor_or_identity(*src++, 0x1); + *dst++ = xor_or_identity(*src++, 0x20); + } + else if (src[0] == 0xD1u && (src[1] >= 0x80u && src[1] <= 0x8Fu)) + { + /// р-я + *dst++ = xor_or_identity(*src++, 0x1); + *dst++ = xor_or_identity(*src++, 0x20); } } @@ -171,7 +168,7 @@ private: { #ifdef __SSE2__ static constexpr auto bytes_sse = sizeof(__m128i); - auto src_end_sse = src + (src_end - src) / bytes_sse * bytes_sse; + const auto * src_end_sse = src + (src_end - src) / bytes_sse * bytes_sse; /// SSE2 packed comparison operate on signed types, hence compare (c < 0) instead of (c > 0x7f) const auto v_zero = _mm_setzero_si128(); @@ -216,7 +213,7 @@ private: else { /// UTF-8 - const auto expected_end = src + bytes_sse; + const auto * expected_end = src + bytes_sse; while (src < expected_end) toCase(src, src_end, dst); diff --git a/src/Functions/MultiMatchAllIndicesImpl.h b/src/Functions/MultiMatchAllIndicesImpl.h index c2e64671d1f..f3e67008707 100644 --- a/src/Functions/MultiMatchAllIndicesImpl.h +++ b/src/Functions/MultiMatchAllIndicesImpl.h @@ -3,6 +3,7 @@ #include #include #include +#include #include "Regexps.h" #include "config_functions.h" diff --git a/src/Functions/PerformanceAdaptors.h b/src/Functions/PerformanceAdaptors.h index 9ef6454d085..bcc195e988e 100644 --- a/src/Functions/PerformanceAdaptors.h +++ b/src/Functions/PerformanceAdaptors.h @@ -72,7 +72,7 @@ namespace detail return size() == 0; } - void emplace_back() + void emplace_back() /// NOLINT { data.emplace_back(); } @@ -198,7 +198,7 @@ class ImplementationSelector : WithContext public: using ImplementationPtr = std::shared_ptr; - ImplementationSelector(ContextPtr context_) : WithContext(context_) {} + explicit ImplementationSelector(ContextPtr context_) : WithContext(context_) {} /* Select the best implementation based on previous runs. * If FunctionInterface is IFunction, then "executeImpl" method of the implementation will be called diff --git a/src/Functions/PolygonUtils.h b/src/Functions/PolygonUtils.h index 1a340c517dc..de4bb2d48de 100644 --- a/src/Functions/PolygonUtils.h +++ b/src/Functions/PolygonUtils.h @@ -53,14 +53,14 @@ UInt64 getPolygonAllocatedBytes(const Polygon & polygon) using RingType = typename Polygon::ring_type; using ValueType = typename RingType::value_type; - auto sizeOfRing = [](const RingType & ring) { return sizeof(ring) + ring.capacity() * sizeof(ValueType); }; + auto size_of_ring = [](const RingType & ring) { return sizeof(ring) + ring.capacity() * sizeof(ValueType); }; - size += sizeOfRing(polygon.outer()); + size += size_of_ring(polygon.outer()); const auto & inners = polygon.inners(); size += sizeof(inners) + inners.capacity() * sizeof(RingType); for (auto & inner : inners) - size += sizeOfRing(inner); + size += size_of_ring(inner); return size; } diff --git a/src/Functions/RapidJSONParser.h b/src/Functions/RapidJSONParser.h index 0e791fe744f..2d8514868e5 100644 --- a/src/Functions/RapidJSONParser.h +++ b/src/Functions/RapidJSONParser.h @@ -23,8 +23,8 @@ struct RapidJSONParser class Element { public: - ALWAYS_INLINE Element() {} - ALWAYS_INLINE Element(const rapidjson::Value & value_) : ptr(&value_) {} + ALWAYS_INLINE Element() = default; + ALWAYS_INLINE Element(const rapidjson::Value & value_) : ptr(&value_) {} /// NOLINT ALWAYS_INLINE bool isInt64() const { return ptr->IsInt64(); } ALWAYS_INLINE bool isUInt64() const { return ptr->IsUint64(); } @@ -54,17 +54,17 @@ struct RapidJSONParser class Iterator { public: - ALWAYS_INLINE Iterator(const rapidjson::Value::ConstValueIterator & it_) : it(it_) {} - ALWAYS_INLINE Element operator*() const { return *it; } + ALWAYS_INLINE Iterator(const rapidjson::Value::ConstValueIterator & it_) : it(it_) {} /// NOLINT + ALWAYS_INLINE Element operator*() const { return *it; } /// NOLINT ALWAYS_INLINE Iterator & operator ++() { ++it; return *this; } - ALWAYS_INLINE Iterator operator ++(int) { auto res = *this; ++it; return res; } + ALWAYS_INLINE Iterator operator ++(int) { auto res = *this; ++it; return res; } /// NOLINT ALWAYS_INLINE friend bool operator ==(const Iterator & left, const Iterator & right) { return left.it == right.it; } ALWAYS_INLINE friend bool operator !=(const Iterator & left, const Iterator & right) { return !(left == right); } private: rapidjson::Value::ConstValueIterator it; }; - ALWAYS_INLINE Array(const rapidjson::Value & value_) : ptr(&value_) {} + ALWAYS_INLINE Array(const rapidjson::Value & value_) : ptr(&value_) {} /// NOLINT ALWAYS_INLINE Iterator begin() const { return ptr->Begin(); } ALWAYS_INLINE Iterator end() const { return ptr->End(); } ALWAYS_INLINE size_t size() const { return ptr->Size(); } @@ -83,17 +83,17 @@ struct RapidJSONParser class Iterator { public: - ALWAYS_INLINE Iterator(const rapidjson::Value::ConstMemberIterator & it_) : it(it_) {} + ALWAYS_INLINE Iterator(const rapidjson::Value::ConstMemberIterator & it_) : it(it_) {} /// NOLINT ALWAYS_INLINE KeyValuePair operator *() const { std::string_view key{it->name.GetString(), it->name.GetStringLength()}; return {key, it->value}; } ALWAYS_INLINE Iterator & operator ++() { ++it; return *this; } - ALWAYS_INLINE Iterator operator ++(int) { auto res = *this; ++it; return res; } + ALWAYS_INLINE Iterator operator ++(int) { auto res = *this; ++it; return res; } /// NOLINT ALWAYS_INLINE friend bool operator ==(const Iterator & left, const Iterator & right) { return left.it == right.it; } ALWAYS_INLINE friend bool operator !=(const Iterator & left, const Iterator & right) { return !(left == right); } private: rapidjson::Value::ConstMemberIterator it; }; - ALWAYS_INLINE Object(const rapidjson::Value & value_) : ptr(&value_) {} + ALWAYS_INLINE Object(const rapidjson::Value & value_) : ptr(&value_) {} /// NOLINT ALWAYS_INLINE Iterator begin() const { return ptr->MemberBegin(); } ALWAYS_INLINE Iterator end() const { return ptr->MemberEnd(); } ALWAYS_INLINE size_t size() const { return ptr->MemberCount(); } diff --git a/src/Functions/ReplaceRegexpImpl.h b/src/Functions/ReplaceRegexpImpl.h index 5d2549239c8..549edf70dff 100644 --- a/src/Functions/ReplaceRegexpImpl.h +++ b/src/Functions/ReplaceRegexpImpl.h @@ -33,8 +33,8 @@ struct ReplaceRegexpImpl /// Otherwise - paste this string verbatim. std::string literal; - Instruction(int substitution_num_) : substitution_num(substitution_num_) {} - Instruction(std::string literal_) : literal(std::move(literal_)) {} + Instruction(int substitution_num_) : substitution_num(substitution_num_) {} /// NOLINT + Instruction(std::string literal_) : literal(std::move(literal_)) {} /// NOLINT }; using Instructions = std::vector; @@ -137,8 +137,14 @@ struct ReplaceRegexpImpl if (replace_one) can_finish_current_string = true; - else if (match.length() == 0) - ++match_pos; /// Step one character to avoid infinite loop. + + if (match.length() == 0) + { + /// Step one character to avoid infinite loop + ++match_pos; + if (match_pos >= static_cast(input.length())) + can_finish_current_string = true; + } } else can_finish_current_string = true; diff --git a/src/Functions/SimdJSONParser.h b/src/Functions/SimdJSONParser.h index be85d74619b..3abeb85fb56 100644 --- a/src/Functions/SimdJSONParser.h +++ b/src/Functions/SimdJSONParser.h @@ -28,8 +28,8 @@ struct SimdJSONParser class Element { public: - ALWAYS_INLINE Element() {} - ALWAYS_INLINE Element(const simdjson::dom::element & element_) : element(element_) {} + ALWAYS_INLINE Element() {} /// NOLINT + ALWAYS_INLINE Element(const simdjson::dom::element & element_) : element(element_) {} /// NOLINT ALWAYS_INLINE bool isInt64() const { return element.type() == simdjson::dom::element_type::INT64; } ALWAYS_INLINE bool isUInt64() const { return element.type() == simdjson::dom::element_type::UINT64; } @@ -61,17 +61,17 @@ struct SimdJSONParser class Iterator { public: - ALWAYS_INLINE Iterator(const simdjson::dom::array::iterator & it_) : it(it_) {} + ALWAYS_INLINE Iterator(const simdjson::dom::array::iterator & it_) : it(it_) {} /// NOLINT ALWAYS_INLINE Element operator*() const { return *it; } ALWAYS_INLINE Iterator & operator++() { ++it; return *this; } - ALWAYS_INLINE Iterator operator++(int) { auto res = *this; ++it; return res; } + ALWAYS_INLINE Iterator operator++(int) { auto res = *this; ++it; return res; } /// NOLINT ALWAYS_INLINE friend bool operator!=(const Iterator & left, const Iterator & right) { return left.it != right.it; } ALWAYS_INLINE friend bool operator==(const Iterator & left, const Iterator & right) { return !(left != right); } private: simdjson::dom::array::iterator it; }; - ALWAYS_INLINE Array(const simdjson::dom::array & array_) : array(array_) {} + ALWAYS_INLINE Array(const simdjson::dom::array & array_) : array(array_) {} /// NOLINT ALWAYS_INLINE Iterator begin() const { return array.begin(); } ALWAYS_INLINE Iterator end() const { return array.end(); } ALWAYS_INLINE size_t size() const { return array.size(); } @@ -90,17 +90,17 @@ struct SimdJSONParser class Iterator { public: - ALWAYS_INLINE Iterator(const simdjson::dom::object::iterator & it_) : it(it_) {} + ALWAYS_INLINE Iterator(const simdjson::dom::object::iterator & it_) : it(it_) {} /// NOLINT ALWAYS_INLINE KeyValuePair operator*() const { const auto & res = *it; return {res.key, res.value}; } ALWAYS_INLINE Iterator & operator++() { ++it; return *this; } - ALWAYS_INLINE Iterator operator++(int) { auto res = *this; ++it; return res; } + ALWAYS_INLINE Iterator operator++(int) { auto res = *this; ++it; return res; } /// NOLINT ALWAYS_INLINE friend bool operator!=(const Iterator & left, const Iterator & right) { return left.it != right.it; } ALWAYS_INLINE friend bool operator==(const Iterator & left, const Iterator & right) { return !(left != right); } private: simdjson::dom::object::iterator it; }; - ALWAYS_INLINE Object(const simdjson::dom::object & object_) : object(object_) {} + ALWAYS_INLINE Object(const simdjson::dom::object & object_) : object(object_) {} /// NOLINT ALWAYS_INLINE Iterator begin() const { return object.begin(); } ALWAYS_INLINE Iterator end() const { return object.end(); } ALWAYS_INLINE size_t size() const { return object.size(); } diff --git a/src/Functions/TargetSpecific.h b/src/Functions/TargetSpecific.h index fa230a56fb7..d7fa55fbb08 100644 --- a/src/Functions/TargetSpecific.h +++ b/src/Functions/TargetSpecific.h @@ -89,6 +89,7 @@ String toString(TargetArch arch); #if ENABLE_MULTITARGET_CODE && defined(__GNUC__) && defined(__x86_64__) +/// NOLINTNEXTLINE #define USE_MULTITARGET_CODE 1 #if defined(__clang__) @@ -183,6 +184,7 @@ namespace TargetSpecific::Default { \ __VA_ARGS__ \ } +/// NOLINTNEXTLINE #define DECLARE_MULTITARGET_CODE(...) \ DECLARE_DEFAULT_CODE (__VA_ARGS__) \ DECLARE_SSE42_SPECIFIC_CODE (__VA_ARGS__) \ @@ -191,23 +193,23 @@ DECLARE_AVX2_SPECIFIC_CODE (__VA_ARGS__) \ DECLARE_AVX512F_SPECIFIC_CODE(__VA_ARGS__) DECLARE_DEFAULT_CODE( - constexpr auto BuildArch = TargetArch::Default; + constexpr auto BuildArch = TargetArch::Default; /// NOLINT ) // DECLARE_DEFAULT_CODE DECLARE_SSE42_SPECIFIC_CODE( - constexpr auto BuildArch = TargetArch::SSE42; + constexpr auto BuildArch = TargetArch::SSE42; /// NOLINT ) // DECLARE_SSE42_SPECIFIC_CODE DECLARE_AVX_SPECIFIC_CODE( - constexpr auto BuildArch = TargetArch::AVX; + constexpr auto BuildArch = TargetArch::AVX; /// NOLINT ) // DECLARE_AVX_SPECIFIC_CODE DECLARE_AVX2_SPECIFIC_CODE( - constexpr auto BuildArch = TargetArch::AVX2; + constexpr auto BuildArch = TargetArch::AVX2; /// NOLINT ) // DECLARE_AVX2_SPECIFIC_CODE DECLARE_AVX512F_SPECIFIC_CODE( - constexpr auto BuildArch = TargetArch::AVX512F; + constexpr auto BuildArch = TargetArch::AVX512F; /// NOLINT ) // DECLARE_AVX512F_SPECIFIC_CODE } diff --git a/src/Functions/TransformDateTime64.h b/src/Functions/TransformDateTime64.h index 4eab2a491c7..b05bdab65ad 100644 --- a/src/Functions/TransformDateTime64.h +++ b/src/Functions/TransformDateTime64.h @@ -44,7 +44,7 @@ public: static constexpr auto name = Transform::name; // non-explicit constructor to allow creating from scale value (or with no scale at all), indispensable in some contexts. - TransformDateTime64(UInt32 scale_ = 0) + TransformDateTime64(UInt32 scale_ = 0) /// NOLINT : scale_multiplier(DecimalUtils::scaleMultiplier(scale_)) {} diff --git a/src/Functions/URL/ExtractFirstSignificantSubdomain.h b/src/Functions/URL/ExtractFirstSignificantSubdomain.h index 4f9b1ec3c6c..70c9c25e4f3 100644 --- a/src/Functions/URL/ExtractFirstSignificantSubdomain.h +++ b/src/Functions/URL/ExtractFirstSignificantSubdomain.h @@ -49,11 +49,11 @@ struct ExtractFirstSignificantSubdomain res_data = tmp; res_size = domain_length; - auto begin = tmp; - auto end = begin + domain_length; + const auto * begin = tmp; + const auto * end = begin + domain_length; const char * last_3_periods[3]{}; - auto pos = find_first_symbols<'.'>(begin, end); + const auto * pos = find_first_symbols<'.'>(begin, end); while (pos < end) { last_3_periods[2] = last_3_periods[1]; @@ -74,7 +74,7 @@ struct ExtractFirstSignificantSubdomain if (!last_3_periods[2]) last_3_periods[2] = begin - 1; - auto end_of_level_domain = find_first_symbols<'/'>(last_3_periods[0], end); + const auto * end_of_level_domain = find_first_symbols<'/'>(last_3_periods[0], end); if (!end_of_level_domain) { end_of_level_domain = end; @@ -117,12 +117,12 @@ struct ExtractFirstSignificantSubdomain res_data = tmp; res_size = domain_length; - auto begin = tmp; - auto end = begin + domain_length; + const auto * begin = tmp; + const auto * end = begin + domain_length; const char * last_2_periods[2]{}; const char * prev = begin - 1; - auto pos = find_first_symbols<'.'>(begin, end); + const auto * pos = find_first_symbols<'.'>(begin, end); while (pos < end) { if (lookup(pos + 1, end - pos - 1)) diff --git a/src/Functions/URL/FirstSignificantSubdomainCustomImpl.h b/src/Functions/URL/FirstSignificantSubdomainCustomImpl.h index 8a76d52741b..5d78500c252 100644 --- a/src/Functions/URL/FirstSignificantSubdomainCustomImpl.h +++ b/src/Functions/URL/FirstSignificantSubdomainCustomImpl.h @@ -20,7 +20,7 @@ namespace ErrorCodes struct FirstSignificantSubdomainCustomLookup { const TLDList & tld_list; - FirstSignificantSubdomainCustomLookup(const std::string & tld_list_name) + explicit FirstSignificantSubdomainCustomLookup(const std::string & tld_list_name) : tld_list(TLDListsHolder::getInstance().getTldList(tld_list_name)) { } diff --git a/src/Functions/URL/domain.h b/src/Functions/URL/domain.h index d43be198043..18efe969216 100644 --- a/src/Functions/URL/domain.h +++ b/src/Functions/URL/domain.h @@ -8,9 +8,6 @@ namespace DB { -namespace -{ - inline StringRef checkAndReturnHost(const Pos & pos, const Pos & dot_pos, const Pos & start_of_host) { if (!dot_pos || start_of_host >= pos || pos - dot_pos == 1) @@ -23,8 +20,6 @@ inline StringRef checkAndReturnHost(const Pos & pos, const Pos & dot_pos, const return StringRef(start_of_host, pos - start_of_host); } -} - /// Extracts host from given url. /// /// @return empty StringRef if the host is not valid (i.e. it does not have dot, or there no symbol after dot). @@ -79,7 +74,7 @@ exloop: if ((scheme_end - pos) > 2 && *pos == ':' && *(pos + 1) == '/' && *(pos } Pos dot_pos = nullptr; - auto start_of_host = pos; + const auto * start_of_host = pos; for (; pos < end; ++pos) { switch (*pos) diff --git a/src/Functions/VectorExtension.h b/src/Functions/VectorExtension.h index cb4347e3031..fbcbae6b0b6 100644 --- a/src/Functions/VectorExtension.h +++ b/src/Functions/VectorExtension.h @@ -6,27 +6,27 @@ namespace DB::VectorExtension { -typedef UInt64 UInt64x2 __attribute__ ((vector_size (sizeof(UInt64) * 2))); -typedef UInt64 UInt64x4 __attribute__ ((vector_size (sizeof(UInt64) * 4))); -typedef UInt64 UInt64x8 __attribute__ ((vector_size (sizeof(UInt64) * 8))); +using UInt64x2 = UInt64 __attribute__ ((vector_size (sizeof(UInt64) * 2))); +using UInt64x4 = UInt64 __attribute__ ((vector_size (sizeof(UInt64) * 4))); +using UInt64x8 = UInt64 __attribute__ ((vector_size (sizeof(UInt64) * 8))); -typedef UInt32 UInt32x2 __attribute__ ((vector_size (sizeof(UInt32) * 2))); -typedef UInt32 UInt32x4 __attribute__ ((vector_size (sizeof(UInt32) * 4))); -typedef UInt32 UInt32x8 __attribute__ ((vector_size (sizeof(UInt32) * 8))); -typedef UInt32 UInt32x16 __attribute__ ((vector_size (sizeof(UInt32) * 16))); +using UInt32x2 = UInt32 __attribute__ ((vector_size (sizeof(UInt32) * 2))); +using UInt32x4 = UInt32 __attribute__ ((vector_size (sizeof(UInt32) * 4))); +using UInt32x8 = UInt32 __attribute__ ((vector_size (sizeof(UInt32) * 8))); +using UInt32x16 = UInt32 __attribute__ ((vector_size (sizeof(UInt32) * 16))); -typedef UInt16 UInt16x2 __attribute__ ((vector_size (sizeof(UInt16) * 2))); -typedef UInt16 UInt16x4 __attribute__ ((vector_size (sizeof(UInt16) * 4))); -typedef UInt16 UInt16x8 __attribute__ ((vector_size (sizeof(UInt16) * 8))); -typedef UInt16 UInt16x16 __attribute__ ((vector_size (sizeof(UInt16) * 16))); -typedef UInt16 UInt16x32 __attribute__ ((vector_size (sizeof(UInt16) * 32))); +using UInt16x2 = UInt16 __attribute__ ((vector_size (sizeof(UInt16) * 2))); +using UInt16x4 = UInt16 __attribute__ ((vector_size (sizeof(UInt16) * 4))); +using UInt16x8 = UInt16 __attribute__ ((vector_size (sizeof(UInt16) * 8))); +using UInt16x16 = UInt16 __attribute__ ((vector_size (sizeof(UInt16) * 16))); +using UInt16x32 = UInt16 __attribute__ ((vector_size (sizeof(UInt16) * 32))); -typedef UInt8 UInt8x2 __attribute__ ((vector_size (sizeof(UInt8) * 2))); -typedef UInt8 UInt8x4 __attribute__ ((vector_size (sizeof(UInt8) * 4))); -typedef UInt8 UInt8x8 __attribute__ ((vector_size (sizeof(UInt8) * 8))); -typedef UInt8 UInt8x16 __attribute__ ((vector_size (sizeof(UInt8) * 16))); -typedef UInt8 UInt8x32 __attribute__ ((vector_size (sizeof(UInt8) * 32))); -typedef UInt8 UInt8x64 __attribute__ ((vector_size (sizeof(UInt8) * 64))); +using UInt8x2 = UInt8 __attribute__ ((vector_size (sizeof(UInt8) * 2))); +using UInt8x4 = UInt8 __attribute__ ((vector_size (sizeof(UInt8) * 4))); +using UInt8x8 = UInt8 __attribute__ ((vector_size (sizeof(UInt8) * 8))); +using UInt8x16 = UInt8 __attribute__ ((vector_size (sizeof(UInt8) * 16))); +using UInt8x32 = UInt8 __attribute__ ((vector_size (sizeof(UInt8) * 32))); +using UInt8x64 = UInt8 __attribute__ ((vector_size (sizeof(UInt8) * 64))); namespace detail { diff --git a/src/Functions/array/arrayEnumerateRanked.h b/src/Functions/array/arrayEnumerateRanked.h index 4d03c52460f..d6a62a966ae 100644 --- a/src/Functions/array/arrayEnumerateRanked.h +++ b/src/Functions/array/arrayEnumerateRanked.h @@ -252,7 +252,7 @@ ColumnPtr FunctionArrayEnumerateRankedExtended::executeImpl( ColumnPtr result_nested_array = std::move(res_nested); for (ssize_t depth = arrays_depths.max_array_depth - 1; depth >= 0; --depth) - result_nested_array = ColumnArray::create(std::move(result_nested_array), offsetsptr_by_depth[depth]); + result_nested_array = ColumnArray::create(result_nested_array, offsetsptr_by_depth[depth]); return result_nested_array; } diff --git a/src/Functions/array/arrayIndex.h b/src/Functions/array/arrayIndex.h index c231ddbb373..35c731dfc78 100644 --- a/src/Functions/array/arrayIndex.h +++ b/src/Functions/array/arrayIndex.h @@ -432,7 +432,7 @@ public: const auto & map_array_column = map_column.getNestedColumn(); auto offsets = map_array_column.getOffsetsPtr(); auto keys = map_column.getNestedData().getColumnPtr(0); - auto array_column = ColumnArray::create(std::move(keys), std::move(offsets)); + auto array_column = ColumnArray::create(keys, offsets); const auto & type_map = assert_cast(*arguments[0].type); auto array_type = std::make_shared(type_map.getKeyType()); diff --git a/src/Functions/array/arrayIntersect.cpp b/src/Functions/array/arrayIntersect.cpp index f2779a2fe58..f1b849b64f0 100644 --- a/src/Functions/array/arrayIntersect.cpp +++ b/src/Functions/array/arrayIntersect.cpp @@ -477,7 +477,7 @@ ColumnPtr FunctionArrayIntersect::execute(const UnpackedArrays & arrays, Mutable columns.reserve(args); for (const auto & arg : arrays.args) { - if constexpr (std::is_same::value) + if constexpr (std::is_same_v) columns.push_back(arg.nested_column); else columns.push_back(checkAndGetColumn(arg.nested_column)); @@ -530,7 +530,7 @@ ColumnPtr FunctionArrayIntersect::execute(const UnpackedArrays & arrays, Mutable { value = &map[columns[arg_num]->getElement(i)]; } - else if constexpr (std::is_same::value || std::is_same::value) + else if constexpr (std::is_same_v || std::is_same_v) value = &map[columns[arg_num]->getDataAt(i)]; else { @@ -566,7 +566,7 @@ ColumnPtr FunctionArrayIntersect::execute(const UnpackedArrays & arrays, Mutable ++result_offset; if constexpr (is_numeric_column) result_data.insertValue(pair.getKey()); - else if constexpr (std::is_same::value || std::is_same::value) + else if constexpr (std::is_same_v || std::is_same_v) result_data.insertData(pair.getKey().data, pair.getKey().size); else result_data.deserializeAndInsertFromArena(pair.getKey().data); diff --git a/src/Functions/array/arrayScalarProduct.h b/src/Functions/array/arrayScalarProduct.h index 87161038d4c..4e3eab2faf8 100644 --- a/src/Functions/array/arrayScalarProduct.h +++ b/src/Functions/array/arrayScalarProduct.h @@ -5,6 +5,7 @@ #include #include #include +#include namespace DB diff --git a/src/Functions/array/hasAllAny.h b/src/Functions/array/hasAllAny.h index cd55fea3521..3ba8bb6156f 100644 --- a/src/Functions/array/hasAllAny.h +++ b/src/Functions/array/hasAllAny.h @@ -44,7 +44,7 @@ public: { for (auto i : collections::range(0, arguments.size())) { - auto array_type = typeid_cast(arguments[i].get()); + const auto * array_type = typeid_cast(arguments[i].get()); if (!array_type) throw Exception("Argument " + std::to_string(i) + " for function " + getName() + " must be an array but it has type " + arguments[i]->getName() + ".", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); diff --git a/src/Functions/array/mapOp.cpp b/src/Functions/array/mapOp.cpp index b928254e454..f743cfb5b5d 100644 --- a/src/Functions/array/mapOp.cpp +++ b/src/Functions/array/mapOp.cpp @@ -225,7 +225,7 @@ private: for (size_t j = 0; j < len; ++j) { KeyType key; - if constexpr (std::is_same::value) + if constexpr (std::is_same_v) { if (const auto * col_fixed = checkAndGetColumn(arg.key_column.get())) key = col_fixed->getDataAt(offset + j).toString(); diff --git a/src/Functions/extractAllGroups.h b/src/Functions/extractAllGroups.h index fa75e305af4..057dedab6e4 100644 --- a/src/Functions/extractAllGroups.h +++ b/src/Functions/extractAllGroups.h @@ -55,7 +55,7 @@ public: static constexpr auto Kind = Impl::Kind; static constexpr auto name = Impl::Name; - FunctionExtractAllGroups(ContextPtr context_) + explicit FunctionExtractAllGroups(ContextPtr context_) : context(context_) {} diff --git a/src/Functions/formatReadable.h b/src/Functions/formatReadable.h index 7c0d6c5c817..0378e1f82f2 100644 --- a/src/Functions/formatReadable.h +++ b/src/Functions/formatReadable.h @@ -7,6 +7,7 @@ #include #include #include +#include namespace DB diff --git a/src/Functions/greatCircleDistance.cpp b/src/Functions/greatCircleDistance.cpp index 1f7be1a6374..f0743486584 100644 --- a/src/Functions/greatCircleDistance.cpp +++ b/src/Functions/greatCircleDistance.cpp @@ -179,7 +179,7 @@ float distance(float lon1deg, float lat1deg, float lon2deg, float lat2deg) /// Why comparing only difference in longitude? /// If longitudes are different enough, there is a big difference between great circle line and a line with constant latitude. - /// (Remember how a plane flies from Moscow to New York) + /// (Remember how a plane flies from Amsterdam to New York) /// But if longitude is close but latitude is different enough, there is no difference between meridian and great circle line. float latitude_midpoint = (lat1deg + lat2deg + 180) * METRIC_LUT_SIZE / 360; // [-90, 90] degrees -> [0, METRIC_LUT_SIZE] indexes @@ -326,4 +326,3 @@ void registerFunctionGeoDistance(FunctionFactory & factory) } } - diff --git a/src/Functions/timezoneOf.cpp b/src/Functions/timezoneOf.cpp index 03c9e27a3a8..97e025bc0e0 100644 --- a/src/Functions/timezoneOf.cpp +++ b/src/Functions/timezoneOf.cpp @@ -21,7 +21,7 @@ namespace /** timezoneOf(x) - get the name of the timezone of DateTime data type. - * Example: Europe/Moscow. + * Example: Pacific/Pitcairn. */ class FunctionTimezoneOf : public IFunction { @@ -74,4 +74,3 @@ void registerFunctionTimezoneOf(FunctionFactory & factory) } } - diff --git a/src/Functions/toFixedString.h b/src/Functions/toFixedString.h index 129e3e0e8b2..cbd29784271 100644 --- a/src/Functions/toFixedString.h +++ b/src/Functions/toFixedString.h @@ -8,6 +8,7 @@ #include #include #include +#include namespace DB diff --git a/src/IO/AIO.h b/src/IO/AIO.h index b8609c8853c..202939638b7 100644 --- a/src/IO/AIO.h +++ b/src/IO/AIO.h @@ -26,17 +26,17 @@ int io_setup(unsigned nr, aio_context_t * ctxp); int io_destroy(aio_context_t ctx); /// last argument is an array of pointers technically speaking -int io_submit(aio_context_t ctx, long nr, struct iocb * iocbpp[]); +int io_submit(aio_context_t ctx, long nr, struct iocb * iocbpp[]); /// NOLINT -int io_getevents(aio_context_t ctx, long min_nr, long max_nr, io_event * events, struct timespec * timeout); +int io_getevents(aio_context_t ctx, long min_nr, long max_nr, io_event * events, struct timespec * timeout); /// NOLINT struct AIOContext : private boost::noncopyable { aio_context_t ctx = 0; - AIOContext() {} - AIOContext(unsigned int nr_events); + AIOContext() = default; + explicit AIOContext(unsigned int nr_events); ~AIOContext(); AIOContext(AIOContext && rhs) noexcept; AIOContext & operator=(AIOContext && rhs) noexcept; diff --git a/src/IO/Archives/IArchiveWriter.h b/src/IO/Archives/IArchiveWriter.h index 6879d470b62..3856d16fb89 100644 --- a/src/IO/Archives/IArchiveWriter.h +++ b/src/IO/Archives/IArchiveWriter.h @@ -29,7 +29,7 @@ public: /// Sets compression method and level. /// Changing them will affect next file in the archive. - virtual void setCompression(int /* compression_method */, int /* compression_level */ = kDefaultCompressionLevel) {} + virtual void setCompression(int /* compression_method */, int /* compression_level */ = kDefaultCompressionLevel) {} /// NOLINT /// Sets password. If the password is not empty it will enable encryption in the archive. virtual void setPassword(const String & /* password */) {} diff --git a/src/IO/AsynchronousReader.h b/src/IO/AsynchronousReader.h index e79e72f3bec..4583f594c37 100644 --- a/src/IO/AsynchronousReader.h +++ b/src/IO/AsynchronousReader.h @@ -32,7 +32,7 @@ public: struct LocalFileDescriptor : public IFileDescriptor { - LocalFileDescriptor(int fd_) : fd(fd_) {} + explicit LocalFileDescriptor(int fd_) : fd(fd_) {} int fd; }; diff --git a/src/IO/BitHelpers.h b/src/IO/BitHelpers.h index d15297637a3..b96f43bdeff 100644 --- a/src/IO/BitHelpers.h +++ b/src/IO/BitHelpers.h @@ -52,8 +52,7 @@ public: bits_count(0) {} - ~BitReader() - {} + ~BitReader() = default; // reads bits_to_read high-bits from bits_buffer inline UInt64 readBits(UInt8 bits_to_read) diff --git a/src/IO/BrotliReadBuffer.h b/src/IO/BrotliReadBuffer.h index 44a7dc7ddbd..cbb919e15ae 100644 --- a/src/IO/BrotliReadBuffer.h +++ b/src/IO/BrotliReadBuffer.h @@ -10,7 +10,7 @@ namespace DB class BrotliReadBuffer : public BufferWithOwnMemory { public: - BrotliReadBuffer( + explicit BrotliReadBuffer( std::unique_ptr in_, size_t buf_size = DBMS_DEFAULT_BUFFER_SIZE, char * existing_memory = nullptr, diff --git a/src/IO/Bzip2ReadBuffer.h b/src/IO/Bzip2ReadBuffer.h index de1e61ee388..cd5fadf9c82 100644 --- a/src/IO/Bzip2ReadBuffer.h +++ b/src/IO/Bzip2ReadBuffer.h @@ -10,7 +10,7 @@ namespace DB class Bzip2ReadBuffer : public BufferWithOwnMemory { public: - Bzip2ReadBuffer( + explicit Bzip2ReadBuffer( std::unique_ptr in_, size_t buf_size = DBMS_DEFAULT_BUFFER_SIZE, char * existing_memory = nullptr, diff --git a/src/IO/CascadeWriteBuffer.h b/src/IO/CascadeWriteBuffer.h index db0d1e7a5a8..ebd4f262aa2 100644 --- a/src/IO/CascadeWriteBuffer.h +++ b/src/IO/CascadeWriteBuffer.h @@ -31,7 +31,7 @@ public: using WriteBufferConstructor = std::function; using WriteBufferConstructors = std::vector; - CascadeWriteBuffer(WriteBufferPtrs && prepared_sources_, WriteBufferConstructors && lazy_sources_ = {}); + explicit CascadeWriteBuffer(WriteBufferPtrs && prepared_sources_, WriteBufferConstructors && lazy_sources_ = {}); void nextImpl() override; diff --git a/src/IO/DoubleConverter.h b/src/IO/DoubleConverter.h index 0896aca717e..75429967390 100644 --- a/src/IO/DoubleConverter.h +++ b/src/IO/DoubleConverter.h @@ -29,9 +29,6 @@ template <> struct DoubleToStringConverterFlags template class DoubleConverter : private boost::noncopyable { - DoubleConverter(const DoubleConverter &) = delete; - DoubleConverter & operator=(const DoubleConverter &) = delete; - DoubleConverter() = default; public: diff --git a/src/IO/FileEncryptionCommon.h b/src/IO/FileEncryptionCommon.h index 28d924e6d81..bb6c8d14893 100644 --- a/src/IO/FileEncryptionCommon.h +++ b/src/IO/FileEncryptionCommon.h @@ -56,7 +56,7 @@ public: /// Adds a specified offset to the counter. InitVector & operator++() { ++counter; return *this; } - InitVector operator++(int) { InitVector res = *this; ++counter; return res; } + InitVector operator++(int) { InitVector res = *this; ++counter; return res; } /// NOLINT InitVector & operator+=(size_t offset) { counter += offset; return *this; } InitVector operator+(size_t offset) const { InitVector res = *this; return res += offset; } diff --git a/src/IO/HashingWriteBuffer.h b/src/IO/HashingWriteBuffer.h index bd00a2b12da..bf636deeb07 100644 --- a/src/IO/HashingWriteBuffer.h +++ b/src/IO/HashingWriteBuffer.h @@ -17,7 +17,7 @@ class IHashingBuffer : public BufferWithOwnMemory public: using uint128 = CityHash_v1_0_2::uint128; - IHashingBuffer(size_t block_size_ = DBMS_DEFAULT_HASHING_BLOCK_SIZE) + explicit IHashingBuffer(size_t block_size_ = DBMS_DEFAULT_HASHING_BLOCK_SIZE) : BufferWithOwnMemory(block_size_), block_pos(0), block_size(block_size_), state(0, 0) { } @@ -66,7 +66,7 @@ private: } public: - HashingWriteBuffer( + explicit HashingWriteBuffer( WriteBuffer & out_, size_t block_size_ = DBMS_DEFAULT_HASHING_BLOCK_SIZE) : IHashingBuffer(block_size_), out(out_) diff --git a/src/IO/LZMAInflatingReadBuffer.h b/src/IO/LZMAInflatingReadBuffer.h index 2d676eeeeb3..920345ee09c 100644 --- a/src/IO/LZMAInflatingReadBuffer.h +++ b/src/IO/LZMAInflatingReadBuffer.h @@ -11,7 +11,7 @@ namespace DB class LZMAInflatingReadBuffer : public BufferWithOwnMemory { public: - LZMAInflatingReadBuffer( + explicit LZMAInflatingReadBuffer( std::unique_ptr in_, size_t buf_size = DBMS_DEFAULT_BUFFER_SIZE, char * existing_memory = nullptr, diff --git a/src/IO/Lz4DeflatingWriteBuffer.h b/src/IO/Lz4DeflatingWriteBuffer.h index a27cb42a6e7..68873b5f8ee 100644 --- a/src/IO/Lz4DeflatingWriteBuffer.h +++ b/src/IO/Lz4DeflatingWriteBuffer.h @@ -29,7 +29,7 @@ private: void finalizeBefore() override; void finalizeAfter() override; - LZ4F_preferences_t kPrefs; + LZ4F_preferences_t kPrefs; /// NOLINT LZ4F_compressionContext_t ctx; void * in_data; diff --git a/src/IO/Lz4InflatingReadBuffer.h b/src/IO/Lz4InflatingReadBuffer.h index d4d81f8765c..9921939d453 100644 --- a/src/IO/Lz4InflatingReadBuffer.h +++ b/src/IO/Lz4InflatingReadBuffer.h @@ -14,7 +14,7 @@ namespace DB class Lz4InflatingReadBuffer : public BufferWithOwnMemory { public: - Lz4InflatingReadBuffer( + explicit Lz4InflatingReadBuffer( std::unique_ptr in_, size_t buf_size = DBMS_DEFAULT_BUFFER_SIZE, char * existing_memory = nullptr, diff --git a/src/IO/MMapReadBufferFromFileDescriptor.h b/src/IO/MMapReadBufferFromFileDescriptor.h index 03718a61a6c..1715c2200fb 100644 --- a/src/IO/MMapReadBufferFromFileDescriptor.h +++ b/src/IO/MMapReadBufferFromFileDescriptor.h @@ -18,7 +18,7 @@ public: off_t seek(off_t off, int whence) override; protected: - MMapReadBufferFromFileDescriptor() {} + MMapReadBufferFromFileDescriptor() = default; void init(); MMappedFileDescriptor mapped; diff --git a/src/IO/MMappedFileCache.h b/src/IO/MMappedFileCache.h index adbb85a18cf..fe5e7e8e1f7 100644 --- a/src/IO/MMappedFileCache.h +++ b/src/IO/MMappedFileCache.h @@ -27,7 +27,7 @@ private: using Base = LRUCache; public: - MMappedFileCache(size_t max_size_in_bytes) + explicit MMappedFileCache(size_t max_size_in_bytes) : Base(max_size_in_bytes) {} /// Calculate key from path to file and offset. diff --git a/src/IO/MMappedFileDescriptor.h b/src/IO/MMappedFileDescriptor.h index 01dc7e1866c..2611093643f 100644 --- a/src/IO/MMappedFileDescriptor.h +++ b/src/IO/MMappedFileDescriptor.h @@ -22,7 +22,7 @@ public: MMappedFileDescriptor(int fd_, size_t offset_); /// Makes empty object that can be initialized with `set`. - MMappedFileDescriptor() {} + MMappedFileDescriptor() = default; virtual ~MMappedFileDescriptor(); @@ -40,10 +40,11 @@ public: void set(int fd_, size_t offset_, size_t length_); void set(int fd_, size_t offset_); -protected: MMappedFileDescriptor(const MMappedFileDescriptor &) = delete; MMappedFileDescriptor(MMappedFileDescriptor &&) = delete; +protected: + void init(); int fd = -1; diff --git a/src/IO/MemoryReadWriteBuffer.h b/src/IO/MemoryReadWriteBuffer.h index f9c11084f62..bcaf9a9a965 100644 --- a/src/IO/MemoryReadWriteBuffer.h +++ b/src/IO/MemoryReadWriteBuffer.h @@ -18,7 +18,7 @@ class MemoryWriteBuffer : public WriteBuffer, public IReadableWriteBuffer, boost public: /// Use max_total_size_ = 0 for unlimited storage - MemoryWriteBuffer( + explicit MemoryWriteBuffer( size_t max_total_size_ = 0, size_t initial_chunk_size_ = DBMS_DEFAULT_BUFFER_SIZE, double growth_rate_ = 2.0, diff --git a/src/IO/MySQLPacketPayloadWriteBuffer.h b/src/IO/MySQLPacketPayloadWriteBuffer.h index f54bec06dfb..d4ce8a8955e 100644 --- a/src/IO/MySQLPacketPayloadWriteBuffer.h +++ b/src/IO/MySQLPacketPayloadWriteBuffer.h @@ -13,7 +13,7 @@ class MySQLPacketPayloadWriteBuffer : public WriteBuffer public: MySQLPacketPayloadWriteBuffer(WriteBuffer & out_, size_t payload_length_, uint8_t & sequence_id_); - bool remainingPayloadSize() { return total_left; } + bool remainingPayloadSize() const { return total_left; } protected: void nextImpl() override; diff --git a/src/IO/NullWriteBuffer.h b/src/IO/NullWriteBuffer.h index 233268474d3..615a9bf5cef 100644 --- a/src/IO/NullWriteBuffer.h +++ b/src/IO/NullWriteBuffer.h @@ -11,7 +11,7 @@ namespace DB class NullWriteBuffer : public BufferWithOwnMemory, boost::noncopyable { public: - NullWriteBuffer(size_t buf_size = 16<<10, char * existing_memory = nullptr, size_t alignment = false); + explicit NullWriteBuffer(size_t buf_size = 16<<10, char * existing_memory = nullptr, size_t alignment = false); void nextImpl() override; }; diff --git a/src/IO/Progress.h b/src/IO/Progress.h index 77187aea8f9..4f1a3df0ffd 100644 --- a/src/IO/Progress.h +++ b/src/IO/Progress.h @@ -56,7 +56,7 @@ struct FileProgress size_t read_bytes; size_t total_bytes_to_read; - FileProgress(size_t read_bytes_, size_t total_bytes_to_read_ = 0) : read_bytes(read_bytes_), total_bytes_to_read(total_bytes_to_read_) {} + explicit FileProgress(size_t read_bytes_, size_t total_bytes_to_read_ = 0) : read_bytes(read_bytes_), total_bytes_to_read(total_bytes_to_read_) {} }; diff --git a/src/IO/ReadBuffer.h b/src/IO/ReadBuffer.h index 4a2e208c7b3..b620f0c49c6 100644 --- a/src/IO/ReadBuffer.h +++ b/src/IO/ReadBuffer.h @@ -229,9 +229,11 @@ public: virtual void prefetch() {} /** - * For reading from remote filesystem, when it matters how much we read. + * Set upper bound for read range [..., position). + * Required for reading from remote filesystem, when it matters how much we read. */ virtual void setReadUntilPosition(size_t /* position */) {} + virtual void setReadUntilEnd() {} protected: diff --git a/src/IO/ReadBufferFromAzureBlobStorage.h b/src/IO/ReadBufferFromAzureBlobStorage.h index 53749ad3199..78d973747ba 100644 --- a/src/IO/ReadBufferFromAzureBlobStorage.h +++ b/src/IO/ReadBufferFromAzureBlobStorage.h @@ -33,6 +33,8 @@ public: bool nextImpl() override; + size_t getFileOffsetOfBufferEnd() const override { return offset; } + private: void initialize(); diff --git a/src/IO/ReadBufferFromEmptyFile.h b/src/IO/ReadBufferFromEmptyFile.h index 311aee1559b..0a14c07dd5c 100644 --- a/src/IO/ReadBufferFromEmptyFile.h +++ b/src/IO/ReadBufferFromEmptyFile.h @@ -1,6 +1,7 @@ #pragma once #include +#include namespace DB { diff --git a/src/IO/ReadBufferFromFile.h b/src/IO/ReadBufferFromFile.h index ff19fa40fdf..52b18b94616 100644 --- a/src/IO/ReadBufferFromFile.h +++ b/src/IO/ReadBufferFromFile.h @@ -49,6 +49,10 @@ public: { return file_name; } + + Range getRemainingReadRange() const override { return Range{ .left = file_offset_of_buffer_end, .right = std::nullopt }; } + + size_t getFileOffsetOfBufferEnd() const override { return file_offset_of_buffer_end; } }; @@ -57,7 +61,7 @@ public: class ReadBufferFromFilePRead : public ReadBufferFromFile { public: - ReadBufferFromFilePRead( + explicit ReadBufferFromFilePRead( const std::string & file_name_, size_t buf_size = DBMS_DEFAULT_BUFFER_SIZE, int flags = -1, @@ -80,7 +84,7 @@ private: OpenedFileCache::OpenedFilePtr file; public: - ReadBufferFromFilePReadWithDescriptorsCache( + explicit ReadBufferFromFilePReadWithDescriptorsCache( const std::string & file_name_, size_t buf_size = DBMS_DEFAULT_BUFFER_SIZE, int flags = -1, diff --git a/src/IO/ReadBufferFromFileDescriptor.h b/src/IO/ReadBufferFromFileDescriptor.h index 188cdd709b5..ba1502fb9aa 100644 --- a/src/IO/ReadBufferFromFileDescriptor.h +++ b/src/IO/ReadBufferFromFileDescriptor.h @@ -27,7 +27,7 @@ protected: std::string getFileName() const override; public: - ReadBufferFromFileDescriptor( + explicit ReadBufferFromFileDescriptor( int fd_, size_t buf_size = DBMS_DEFAULT_BUFFER_SIZE, char * existing_memory = nullptr, @@ -70,7 +70,7 @@ private: class ReadBufferFromFileDescriptorPRead : public ReadBufferFromFileDescriptor { public: - ReadBufferFromFileDescriptorPRead( + explicit ReadBufferFromFileDescriptorPRead( int fd_, size_t buf_size = DBMS_DEFAULT_BUFFER_SIZE, char * existing_memory = nullptr, diff --git a/src/IO/ReadBufferFromS3.cpp b/src/IO/ReadBufferFromS3.cpp index 869432b9484..93bbe02c9cd 100644 --- a/src/IO/ReadBufferFromS3.cpp +++ b/src/IO/ReadBufferFromS3.cpp @@ -42,7 +42,8 @@ ReadBufferFromS3::ReadBufferFromS3( UInt64 max_single_read_retries_, const ReadSettings & settings_, bool use_external_buffer_, - size_t read_until_position_) + size_t read_until_position_, + bool restricted_seek_) : SeekableReadBufferWithSize(nullptr, 0) , client_ptr(std::move(client_ptr_)) , bucket(bucket_) @@ -51,6 +52,7 @@ ReadBufferFromS3::ReadBufferFromS3( , read_settings(settings_) , use_external_buffer(use_external_buffer_) , read_until_position(read_until_position_) + , restricted_seek(restricted_seek_) { } @@ -152,10 +154,14 @@ bool ReadBufferFromS3::nextImpl() off_t ReadBufferFromS3::seek(off_t offset_, int whence) { - bool restricted_seek = read_type == SeekableReadBufferWithSize::ReadType::DISK_READ; + if (offset_ == offset && whence == SEEK_SET) + return offset; if (impl && restricted_seek) - throw Exception("Seek is allowed only before first read attempt from the buffer.", ErrorCodes::CANNOT_SEEK_THROUGH_FILE); + throw Exception( + ErrorCodes::CANNOT_SEEK_THROUGH_FILE, + "Seek is allowed only before first read attempt from the buffer (current offset: {}, new offset: {}, reading until position: {}, available: {})", + offset, offset_, read_until_position, available()); if (whence != SEEK_SET) throw Exception("Only SEEK_SET mode is allowed.", ErrorCodes::CANNOT_SEEK_THROUGH_FILE); @@ -219,6 +225,15 @@ off_t ReadBufferFromS3::getPosition() return offset - available(); } +void ReadBufferFromS3::setReadUntilPosition(size_t position) +{ + if (position != static_cast(read_until_position)) + { + read_until_position = position; + impl.reset(); + } +} + std::unique_ptr ReadBufferFromS3::initialize() { Aws::S3::Model::GetObjectRequest req; @@ -249,7 +264,9 @@ std::unique_ptr ReadBufferFromS3::initialize() if (outcome.IsSuccess()) { read_result = outcome.GetResultWithOwnership(); - return std::make_unique(read_result.GetBody(), read_settings.remote_fs_buffer_size); + + size_t buffer_size = use_external_buffer ? 0 : read_settings.remote_fs_buffer_size; + return std::make_unique(read_result.GetBody(), buffer_size); } else throw Exception(outcome.GetError().GetMessage(), ErrorCodes::S3_ERROR); diff --git a/src/IO/ReadBufferFromS3.h b/src/IO/ReadBufferFromS3.h index e903ba11118..157b6d46b6d 100644 --- a/src/IO/ReadBufferFromS3.h +++ b/src/IO/ReadBufferFromS3.h @@ -31,6 +31,7 @@ private: String key; UInt64 max_single_read_retries; off_t offset = 0; + Aws::S3::Model::GetObjectResult read_result; std::unique_ptr impl; @@ -44,7 +45,8 @@ public: UInt64 max_single_read_retries_, const ReadSettings & settings_, bool use_external_buffer = false, - size_t read_until_position_ = 0); + size_t read_until_position_ = 0, + bool restricted_seek_ = false); bool nextImpl() override; @@ -54,6 +56,12 @@ public: std::optional getTotalSize() override; + void setReadUntilPosition(size_t position) override; + + Range getRemainingReadRange() const override { return Range{ .left = static_cast(offset), .right = read_until_position }; } + + size_t getFileOffsetOfBufferEnd() const override { return offset; } + private: std::unique_ptr initialize(); @@ -62,6 +70,10 @@ private: bool use_external_buffer; off_t read_until_position = 0; + + /// There is different seek policy for disk seek and for non-disk seek + /// (non-disk seek is applied for seekable input formats: orc, arrow, parquet). + bool restricted_seek; }; } diff --git a/src/IO/ReadHelpers.h b/src/IO/ReadHelpers.h index 69942953925..492765b1b4d 100644 --- a/src/IO/ReadHelpers.h +++ b/src/IO/ReadHelpers.h @@ -106,7 +106,7 @@ inline void readChar(char & x, ReadBuffer & buf) template inline void readPODBinary(T & x, ReadBuffer & buf) { - buf.readStrict(reinterpret_cast(&x), sizeof(x)); + buf.readStrict(reinterpret_cast(&x), sizeof(x)); /// NOLINT } template @@ -612,7 +612,7 @@ void readStringUntilNewlineInto(Vector & s, ReadBuffer & buf); struct NullOutput { void append(const char *, size_t) {} - void push_back(char) {} + void push_back(char) {} /// NOLINT }; void parseUUID(const UInt8 * src36, UInt8 * dst16); @@ -1279,7 +1279,6 @@ inline void readTextWithSizeSuffix(T & x, ReadBuffer & buf) default: return; } - return; } /// Read something from text format and trying to parse the suffix. diff --git a/src/IO/ReadSettings.h b/src/IO/ReadSettings.h index e290cbab36b..e321eecf104 100644 --- a/src/IO/ReadSettings.h +++ b/src/IO/ReadSettings.h @@ -3,6 +3,7 @@ #include #include #include +#include namespace DB { @@ -76,9 +77,13 @@ struct ReadSettings size_t remote_fs_read_max_backoff_ms = 10000; size_t remote_fs_read_backoff_max_tries = 4; + bool remote_fs_enable_cache = true; + size_t remote_fs_cache_max_wait_sec = 1; size_t remote_read_min_bytes_for_seek = DBMS_DEFAULT_BUFFER_SIZE; + FileCachePtr remote_fs_cache; + size_t http_max_tries = 1; size_t http_retry_initial_backoff_ms = 100; size_t http_retry_max_backoff_ms = 1600; diff --git a/src/IO/ReadWriteBufferFromHTTP.h b/src/IO/ReadWriteBufferFromHTTP.h index 4e08a595484..fe4def7fc49 100644 --- a/src/IO/ReadWriteBufferFromHTTP.h +++ b/src/IO/ReadWriteBufferFromHTTP.h @@ -320,11 +320,22 @@ namespace detail } catch (...) { - if (response.getStatus() == Poco::Net::HTTPResponse::HTTPStatus::HTTP_NOT_FOUND + auto http_status = response.getStatus(); + + if (http_status == Poco::Net::HTTPResponse::HTTPStatus::HTTP_NOT_FOUND && http_skip_not_found_url) { initialization_error = InitializeError::SKIP_NOT_FOUND_URL; } + else if (http_status == Poco::Net::HTTPResponse::HTTPStatus::HTTP_BAD_REQUEST + || http_status == Poco::Net::HTTPResponse::HTTPStatus::HTTP_UNAUTHORIZED + || http_status == Poco::Net::HTTPResponse::HTTPStatus::HTTP_NOT_FOUND + || http_status == Poco::Net::HTTPResponse::HTTPStatus::HTTP_FORBIDDEN + || http_status == Poco::Net::HTTPResponse::HTTPStatus::HTTP_METHOD_NOT_ALLOWED) + { + initialization_error = InitializeError::NON_RETRIABLE_ERROR; + exception = std::current_exception(); + } else { throw; diff --git a/src/IO/S3/PocoHTTPClient.h b/src/IO/S3/PocoHTTPClient.h index 2647e254626..defd029f05a 100644 --- a/src/IO/S3/PocoHTTPClient.h +++ b/src/IO/S3/PocoHTTPClient.h @@ -49,13 +49,13 @@ class PocoHTTPResponse : public Aws::Http::Standard::StandardHttpResponse public: using SessionPtr = HTTPSessionPtr; - PocoHTTPResponse(const std::shared_ptr request) + explicit PocoHTTPResponse(const std::shared_ptr request) : Aws::Http::Standard::StandardHttpResponse(request) , body_stream(request->GetResponseStreamFactory()) { } - void SetResponseBody(Aws::IStream & incoming_stream, SessionPtr & session_) + void SetResponseBody(Aws::IStream & incoming_stream, SessionPtr & session_) /// NOLINT { body_stream = Aws::Utils::Stream::ResponseStream( Aws::New>("http result streambuf", session_, incoming_stream.rdbuf()) diff --git a/src/IO/S3Common.h b/src/IO/S3Common.h index 72774499445..97cb4f74f90 100644 --- a/src/IO/S3Common.h +++ b/src/IO/S3Common.h @@ -49,7 +49,6 @@ public: private: ClientFactory(); -private: Aws::SDKOptions aws_options; }; diff --git a/src/IO/SeekableReadBuffer.h b/src/IO/SeekableReadBuffer.h index 2dc901ccfd9..3a46630350a 100644 --- a/src/IO/SeekableReadBuffer.h +++ b/src/IO/SeekableReadBuffer.h @@ -6,6 +6,12 @@ namespace DB { +namespace ErrorCodes +{ + extern const int NOT_IMPLEMENTED; +} + + class SeekableReadBuffer : public ReadBuffer { public: @@ -32,6 +38,26 @@ public: * @return Offset from the begin of the underlying buffer / file corresponds to the buffer current position. */ virtual off_t getPosition() = 0; + + struct Range + { + size_t left; + std::optional right; + }; + + /** + * Returns a struct, where `left` is current read position in file and `right` is the + * last included offset for reading according to setReadUntilPosition() or setReadUntilEnd(). + * E.g. next nextImpl() call will read within range [left, right]. + */ + virtual Range getRemainingReadRange() const + { + throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Method getRemainingReadRange() not implemented"); + } + + virtual String getInfoForLog() { return ""; } + + virtual size_t getFileOffsetOfBufferEnd() const { throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Method getFileOffsetOfBufferEnd() not implemented"); } }; using SeekableReadBufferPtr = std::shared_ptr; @@ -48,22 +74,7 @@ public: /// set std::nullopt in case it is impossible to find out total size. virtual std::optional getTotalSize() = 0; - /** - * Some buffers might have different seek restrictions according to where it is used. - * For example, ReadBufferFromS3 and ReadBufferFromWebServer, when used for reading - * from remote disks, require some additional invariants and restrictions, which - * are not needed in other cases. - */ - enum class ReadType - { - DEFAULT, - DISK_READ - }; - - void setReadType(ReadType type) { read_type = type; } - protected: - ReadType read_type = ReadType::DEFAULT; std::optional file_size; }; diff --git a/src/IO/UncompressedCache.h b/src/IO/UncompressedCache.h index 5826b7f020a..93ca1235a42 100644 --- a/src/IO/UncompressedCache.h +++ b/src/IO/UncompressedCache.h @@ -42,7 +42,7 @@ private: using Base = LRUCache; public: - UncompressedCache(size_t max_size_in_bytes) + explicit UncompressedCache(size_t max_size_in_bytes) : Base(max_size_in_bytes) {} /// Calculate key from path to file and offset. diff --git a/src/IO/VarInt.h b/src/IO/VarInt.h index 50fc158ba76..29c8a60c935 100644 --- a/src/IO/VarInt.h +++ b/src/IO/VarInt.h @@ -132,7 +132,7 @@ inline void readVarUIntImpl(UInt64 & x, ReadBuffer & istr) if (istr.eof()) throwReadAfterEOF(); - UInt64 byte = *istr.position(); + UInt64 byte = *istr.position(); /// NOLINT ++istr.position(); x |= (byte & 0x7F) << (7 * i); @@ -172,7 +172,7 @@ inline const char * readVarUInt(UInt64 & x, const char * istr, size_t size) if (istr == end) throwReadAfterEOF(); - UInt64 byte = *istr; + UInt64 byte = *istr; /// NOLINT ++istr; x |= (byte & 0x7F) << (7 * i); diff --git a/src/IO/WriteBufferFromFile.h b/src/IO/WriteBufferFromFile.h index 988b0be7d00..3363a568bac 100644 --- a/src/IO/WriteBufferFromFile.h +++ b/src/IO/WriteBufferFromFile.h @@ -28,7 +28,7 @@ protected: CurrentMetrics::Increment metric_increment{CurrentMetrics::OpenFileForWrite}; public: - WriteBufferFromFile( + explicit WriteBufferFromFile( const std::string & file_name_, size_t buf_size = DBMS_DEFAULT_BUFFER_SIZE, int flags = -1, @@ -37,7 +37,7 @@ public: size_t alignment = 0); /// Use pre-opened file descriptor. - WriteBufferFromFile( + explicit WriteBufferFromFile( int & fd, /// Will be set to -1 if constructor didn't throw and ownership of file descriptor is passed to the object. const std::string & original_file_name = {}, size_t buf_size = DBMS_DEFAULT_BUFFER_SIZE, diff --git a/src/IO/WriteBufferFromFileDescriptor.h b/src/IO/WriteBufferFromFileDescriptor.h index b065e22cf95..cc69567932f 100644 --- a/src/IO/WriteBufferFromFileDescriptor.h +++ b/src/IO/WriteBufferFromFileDescriptor.h @@ -11,7 +11,7 @@ namespace DB class WriteBufferFromFileDescriptor : public WriteBufferFromFileBase { public: - WriteBufferFromFileDescriptor( + explicit WriteBufferFromFileDescriptor( int fd_ = -1, size_t buf_size = DBMS_DEFAULT_BUFFER_SIZE, char * existing_memory = nullptr, diff --git a/src/IO/WriteBufferFromFileDescriptorDiscardOnFailure.h b/src/IO/WriteBufferFromFileDescriptorDiscardOnFailure.h index 53e01c3cb26..2803dd4e8bf 100644 --- a/src/IO/WriteBufferFromFileDescriptorDiscardOnFailure.h +++ b/src/IO/WriteBufferFromFileDescriptorDiscardOnFailure.h @@ -17,7 +17,7 @@ protected: public: using WriteBufferFromFileDescriptor::WriteBufferFromFileDescriptor; - ~WriteBufferFromFileDescriptorDiscardOnFailure() override {} + ~WriteBufferFromFileDescriptorDiscardOnFailure() override = default; }; } diff --git a/src/IO/WriteBufferFromOStream.h b/src/IO/WriteBufferFromOStream.h index ea3301fef18..f8b45c2fa59 100644 --- a/src/IO/WriteBufferFromOStream.h +++ b/src/IO/WriteBufferFromOStream.h @@ -12,7 +12,7 @@ namespace DB class WriteBufferFromOStream : public BufferWithOwnMemory { public: - WriteBufferFromOStream( + explicit WriteBufferFromOStream( std::ostream & ostr_, size_t size = DBMS_DEFAULT_BUFFER_SIZE, char * existing_memory = nullptr, @@ -21,7 +21,7 @@ public: ~WriteBufferFromOStream() override; protected: - WriteBufferFromOStream(size_t size = DBMS_DEFAULT_BUFFER_SIZE, char * existing_memory = nullptr, size_t alignment = 0); + explicit WriteBufferFromOStream(size_t size = DBMS_DEFAULT_BUFFER_SIZE, char * existing_memory = nullptr, size_t alignment = 0); void nextImpl() override; diff --git a/src/IO/WriteBufferFromPocoSocket.h b/src/IO/WriteBufferFromPocoSocket.h index 2fb203189f3..295ca16ecaf 100644 --- a/src/IO/WriteBufferFromPocoSocket.h +++ b/src/IO/WriteBufferFromPocoSocket.h @@ -14,7 +14,7 @@ namespace DB class WriteBufferFromPocoSocket : public BufferWithOwnMemory { public: - WriteBufferFromPocoSocket(Poco::Net::Socket & socket_, size_t buf_size = DBMS_DEFAULT_BUFFER_SIZE); + explicit WriteBufferFromPocoSocket(Poco::Net::Socket & socket_, size_t buf_size = DBMS_DEFAULT_BUFFER_SIZE); ~WriteBufferFromPocoSocket() override; diff --git a/src/IO/WriteBufferFromS3.h b/src/IO/WriteBufferFromS3.h index 8b89626ee18..a4fbcbcdeeb 100644 --- a/src/IO/WriteBufferFromS3.h +++ b/src/IO/WriteBufferFromS3.h @@ -6,6 +6,7 @@ # include # include +# include # include # include @@ -14,8 +15,6 @@ # include -# include - namespace Aws::S3 { class S3Client; diff --git a/src/IO/WriteBufferFromTemporaryFile.h b/src/IO/WriteBufferFromTemporaryFile.h index 642c36b9be6..06e2911db26 100644 --- a/src/IO/WriteBufferFromTemporaryFile.h +++ b/src/IO/WriteBufferFromTemporaryFile.h @@ -20,7 +20,7 @@ public: ~WriteBufferFromTemporaryFile() override; private: - WriteBufferFromTemporaryFile(std::unique_ptr && tmp_file); + explicit WriteBufferFromTemporaryFile(std::unique_ptr && tmp_file); std::shared_ptr getReadBufferImpl() override; diff --git a/src/IO/WriteBufferFromVector.h b/src/IO/WriteBufferFromVector.h index 23ae3a70ef3..d74b366b8e2 100644 --- a/src/IO/WriteBufferFromVector.h +++ b/src/IO/WriteBufferFromVector.h @@ -67,7 +67,7 @@ private: void finalizeImpl() override final { vector.resize( - ((position() - reinterpret_cast(vector.data())) + ((position() - reinterpret_cast(vector.data())) /// NOLINT + sizeof(typename VectorType::value_type) - 1) /// Align up. / sizeof(typename VectorType::value_type)); diff --git a/src/IO/WriteBufferValidUTF8.h b/src/IO/WriteBufferValidUTF8.h index 8b33593c930..daaf0427f88 100644 --- a/src/IO/WriteBufferValidUTF8.h +++ b/src/IO/WriteBufferValidUTF8.h @@ -16,7 +16,7 @@ class WriteBufferValidUTF8 final : public BufferWithOwnMemory public: static const size_t DEFAULT_SIZE; - WriteBufferValidUTF8( + explicit WriteBufferValidUTF8( WriteBuffer & output_buffer_, bool group_replacements_ = true, const char * replacement_ = "\xEF\xBF\xBD", diff --git a/src/IO/WriteHelpers.cpp b/src/IO/WriteHelpers.cpp index b41f621e0b9..9433d31027c 100644 --- a/src/IO/WriteHelpers.cpp +++ b/src/IO/WriteHelpers.cpp @@ -7,7 +7,7 @@ namespace DB { template -void formatHex(IteratorSrc src, IteratorDst dst, const size_t num_bytes) +void formatHex(IteratorSrc src, IteratorDst dst, size_t num_bytes) { size_t src_pos = 0; size_t dst_pos = 0; diff --git a/src/IO/WriteHelpers.h b/src/IO/WriteHelpers.h index b4f1c48e327..7d4e1eba3ca 100644 --- a/src/IO/WriteHelpers.h +++ b/src/IO/WriteHelpers.h @@ -80,7 +80,7 @@ inline void writeChar(char c, size_t n, WriteBuffer & buf) template inline void writePODBinary(const T & x, WriteBuffer & buf) { - buf.write(reinterpret_cast(&x), sizeof(x)); + buf.write(reinterpret_cast(&x), sizeof(x)); /// NOLINT } template @@ -663,7 +663,7 @@ inline void writeXMLStringForTextElement(const StringRef & s, WriteBuffer & buf) } template -void formatHex(IteratorSrc src, IteratorDst dst, const size_t num_bytes); +void formatHex(IteratorSrc src, IteratorDst dst, size_t num_bytes); void formatUUID(const UInt8 * src16, UInt8 * dst36); void formatUUID(std::reverse_iterator src16, UInt8 * dst36); diff --git a/src/IO/WriteIntText.h b/src/IO/WriteIntText.h index b8d2acc7d5d..c9a4cb0241a 100644 --- a/src/IO/WriteIntText.h +++ b/src/IO/WriteIntText.h @@ -5,22 +5,19 @@ #include -namespace -{ - template constexpr size_t max_int_width = 20; - template <> inline constexpr size_t max_int_width = 3; /// 255 - template <> inline constexpr size_t max_int_width = 4; /// -128 - template <> inline constexpr size_t max_int_width = 5; /// 65535 - template <> inline constexpr size_t max_int_width = 6; /// -32768 - template <> inline constexpr size_t max_int_width = 10; /// 4294967295 - template <> inline constexpr size_t max_int_width = 11; /// -2147483648 - template <> inline constexpr size_t max_int_width = 20; /// 18446744073709551615 - template <> inline constexpr size_t max_int_width = 20; /// -9223372036854775808 - template <> inline constexpr size_t max_int_width = 39; /// 340282366920938463463374607431768211455 - template <> inline constexpr size_t max_int_width = 40; /// -170141183460469231731687303715884105728 - template <> inline constexpr size_t max_int_width = 78; /// 115792089237316195423570985008687907853269984665640564039457584007913129639935 - template <> inline constexpr size_t max_int_width = 78; /// -57896044618658097711785492504343953926634992332820282019728792003956564819968 -} +template constexpr size_t max_int_width = 20; +template <> inline constexpr size_t max_int_width = 3; /// 255 +template <> inline constexpr size_t max_int_width = 4; /// -128 +template <> inline constexpr size_t max_int_width = 5; /// 65535 +template <> inline constexpr size_t max_int_width = 6; /// -32768 +template <> inline constexpr size_t max_int_width = 10; /// 4294967295 +template <> inline constexpr size_t max_int_width = 11; /// -2147483648 +template <> inline constexpr size_t max_int_width = 20; /// 18446744073709551615 +template <> inline constexpr size_t max_int_width = 20; /// -9223372036854775808 +template <> inline constexpr size_t max_int_width = 39; /// 340282366920938463463374607431768211455 +template <> inline constexpr size_t max_int_width = 40; /// -170141183460469231731687303715884105728 +template <> inline constexpr size_t max_int_width = 78; /// 115792089237316195423570985008687907853269984665640564039457584007913129639935 +template <> inline constexpr size_t max_int_width = 78; /// -57896044618658097711785492504343953926634992332820282019728792003956564819968 namespace DB diff --git a/src/IO/ZstdInflatingReadBuffer.h b/src/IO/ZstdInflatingReadBuffer.h index ec80b860e0e..7f246b02127 100644 --- a/src/IO/ZstdInflatingReadBuffer.h +++ b/src/IO/ZstdInflatingReadBuffer.h @@ -16,7 +16,7 @@ namespace ErrorCodes class ZstdInflatingReadBuffer : public BufferWithOwnMemory { public: - ZstdInflatingReadBuffer( + explicit ZstdInflatingReadBuffer( std::unique_ptr in_, size_t buf_size = DBMS_DEFAULT_BUFFER_SIZE, char * existing_memory = nullptr, diff --git a/src/IO/readFloatText.h b/src/IO/readFloatText.h index 1c5b1cdb0c9..b6be7adbbee 100644 --- a/src/IO/readFloatText.h +++ b/src/IO/readFloatText.h @@ -154,7 +154,7 @@ ReturnType readFloatTextPreciseImpl(T & x, ReadBuffer & buf) if (likely(!buf.eof() && buf.position() + MAX_LENGTH <= buf.buffer().end())) { - auto initial_position = buf.position(); + auto * initial_position = buf.position(); auto res = fast_float::from_chars(initial_position, buf.buffer().end(), x); if (unlikely(res.ec != std::errc())) diff --git a/src/Interpreters/Access/InterpreterCreateUserQuery.cpp b/src/Interpreters/Access/InterpreterCreateUserQuery.cpp index 33d85afb7c3..cad451f8ef5 100644 --- a/src/Interpreters/Access/InterpreterCreateUserQuery.cpp +++ b/src/Interpreters/Access/InterpreterCreateUserQuery.cpp @@ -14,6 +14,11 @@ namespace DB { +namespace ErrorCodes +{ + extern const int ILLEGAL_TYPE_OF_ARGUMENT; + +} namespace { void updateUserFromQueryImpl( @@ -22,7 +27,7 @@ namespace const std::shared_ptr & override_name, const std::optional & override_default_roles, const std::optional & override_settings, - const std::optional & override_grantees) + const std::optional & override_grantees, bool allow_no_password, bool allow_plaintext_password) { if (override_name) user.setName(override_name->toString()); @@ -30,10 +35,15 @@ namespace user.setName(query.new_name); else if (query.names->size() == 1) user.setName(query.names->front()->toString()); - if (query.auth_data) + { user.auth_data = *query.auth_data; - + //User and query IDENTIFIED WITH AUTHTYPE PLAINTEXT and NO_PASSWORD should not be allowed if allow_plaintext_and_no_password is unset. + if ((query.auth_data->getType() == AuthenticationType::PLAINTEXT_PASSWORD && !allow_plaintext_password) || (query.auth_data->getType() == AuthenticationType::NO_PASSWORD && !allow_no_password)) + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "User is not allowed to ALTER/CREATE USERS with type "+ toString(query.auth_data->getType())+". Please configure User with authtype" + + "to SHA256_PASSWORD,DOUBLE_SHA1_PASSWORD OR enable setting allow_plaintext_and_no_password in server configuration to configure user with " + toString(query.auth_data->getType()) +" Auth_type." + + "It is not recommended to use " + toString(query.auth_data->getType()) + "."); + } if (override_name && !override_name->host_pattern.empty()) { user.allowed_client_hosts = AllowedClientHosts{}; @@ -75,13 +85,14 @@ namespace } } - BlockIO InterpreterCreateUserQuery::execute() { const auto & query = query_ptr->as(); auto & access_control = getContext()->getAccessControl(); auto access = getContext()->getAccess(); access->checkAccess(query.alter ? AccessType::ALTER_USER : AccessType::CREATE_USER); + bool allow_plaintext_password = access_control.isPlaintextPasswordAllowed(); + bool allow_no_password = access_control.isNoPasswordAllowed(); std::optional default_roles_from_query; if (query.default_roles) @@ -93,10 +104,8 @@ BlockIO InterpreterCreateUserQuery::execute() access->checkAdminOption(role); } } - if (!query.cluster.empty()) return executeDDLQueryOnCluster(query_ptr, getContext()); - std::optional settings_from_query; if (query.settings) settings_from_query = SettingsProfileElements{*query.settings, access_control}; @@ -110,7 +119,7 @@ BlockIO InterpreterCreateUserQuery::execute() auto update_func = [&](const AccessEntityPtr & entity) -> AccessEntityPtr { auto updated_user = typeid_cast>(entity->clone()); - updateUserFromQueryImpl(*updated_user, query, {}, default_roles_from_query, settings_from_query, grantees_from_query); + updateUserFromQueryImpl(*updated_user, query, {}, default_roles_from_query, settings_from_query, grantees_from_query, allow_no_password, allow_plaintext_password); return updated_user; }; @@ -129,7 +138,7 @@ BlockIO InterpreterCreateUserQuery::execute() for (const auto & name : *query.names) { auto new_user = std::make_shared(); - updateUserFromQueryImpl(*new_user, query, name, default_roles_from_query, settings_from_query, RolesOrUsersSet::AllTag{}); + updateUserFromQueryImpl(*new_user, query, name, default_roles_from_query, settings_from_query, RolesOrUsersSet::AllTag{}, allow_no_password, allow_plaintext_password); new_users.emplace_back(std::move(new_user)); } @@ -157,9 +166,9 @@ BlockIO InterpreterCreateUserQuery::execute() } -void InterpreterCreateUserQuery::updateUserFromQuery(User & user, const ASTCreateUserQuery & query) +void InterpreterCreateUserQuery::updateUserFromQuery(User & user, const ASTCreateUserQuery & query, bool allow_no_password, bool allow_plaintext_password) { - updateUserFromQueryImpl(user, query, {}, {}, {}, {}); + updateUserFromQueryImpl(user, query, {}, {}, {}, {}, allow_no_password, allow_plaintext_password); } } diff --git a/src/Interpreters/Access/InterpreterCreateUserQuery.h b/src/Interpreters/Access/InterpreterCreateUserQuery.h index 7d357924d35..42d911c712b 100644 --- a/src/Interpreters/Access/InterpreterCreateUserQuery.h +++ b/src/Interpreters/Access/InterpreterCreateUserQuery.h @@ -17,7 +17,7 @@ public: BlockIO execute() override; - static void updateUserFromQuery(User & user, const ASTCreateUserQuery & query); + static void updateUserFromQuery(User & user, const ASTCreateUserQuery & query, bool allow_no_password=true, bool allow_plaintext_password=true); private: ASTPtr query_ptr; diff --git a/src/Interpreters/AddIndexConstraintsOptimizer.h b/src/Interpreters/AddIndexConstraintsOptimizer.h index 228d8d8ad1a..9ed4a8978c8 100644 --- a/src/Interpreters/AddIndexConstraintsOptimizer.h +++ b/src/Interpreters/AddIndexConstraintsOptimizer.h @@ -23,8 +23,7 @@ using StorageMetadataPtr = std::shared_ptr; class AddIndexConstraintsOptimizer final { public: - AddIndexConstraintsOptimizer( - const StorageMetadataPtr & metadata_snapshot); + explicit AddIndexConstraintsOptimizer(const StorageMetadataPtr & metadata_snapshot); void perform(CNFQuery & cnf_query); diff --git a/src/Interpreters/AggregationCommon.h b/src/Interpreters/AggregationCommon.h index 00d2f76f043..5904cc48084 100644 --- a/src/Interpreters/AggregationCommon.h +++ b/src/Interpreters/AggregationCommon.h @@ -42,9 +42,6 @@ using Sizes = std::vector; /// 2,1,1 /// -namespace -{ - template constexpr auto getBitmapSize() { @@ -62,8 +59,6 @@ constexpr auto getBitmapSize() 0))); } -} - template void fillFixedBatch(size_t num_rows, const T * source, T * dest) { @@ -255,7 +250,7 @@ static inline T ALWAYS_INLINE packFixed( /// Hash a set of keys into a UInt128 value. -static inline UInt128 ALWAYS_INLINE hash128( +static inline UInt128 ALWAYS_INLINE hash128( /// NOLINT size_t i, size_t keys_size, const ColumnRawPtrs & key_columns) { UInt128 key; @@ -269,29 +264,9 @@ static inline UInt128 ALWAYS_INLINE hash128( return key; } - -/// Copy keys to the pool. Then put into pool StringRefs to them and return the pointer to the first. -static inline StringRef * ALWAYS_INLINE placeKeysInPool( - size_t keys_size, StringRefs & keys, Arena & pool) -{ - for (size_t j = 0; j < keys_size; ++j) - { - char * place = pool.alloc(keys[j].size); - memcpySmallAllowReadWriteOverflow15(place, keys[j].data, keys[j].size); - keys[j].data = place; - } - - /// Place the StringRefs on the newly copied keys in the pool. - char * res = pool.alignedAlloc(keys_size * sizeof(StringRef), alignof(StringRef)); - memcpySmallAllowReadWriteOverflow15(res, keys.data(), keys_size * sizeof(StringRef)); - - return reinterpret_cast(res); -} - - /** Serialize keys into a continuous chunk of memory. */ -static inline StringRef ALWAYS_INLINE serializeKeysToPoolContiguous( +static inline StringRef ALWAYS_INLINE serializeKeysToPoolContiguous( /// NOLINT size_t i, size_t keys_size, const ColumnRawPtrs & key_columns, Arena & pool) { const char * begin = nullptr; diff --git a/src/Interpreters/Aggregator.h b/src/Interpreters/Aggregator.h index 3457e01f98f..f03bf45fbc6 100644 --- a/src/Interpreters/Aggregator.h +++ b/src/Interpreters/Aggregator.h @@ -660,7 +660,7 @@ struct AggregatedDataVariants : private boost::noncopyable case Type::without_key: break; #define M(NAME, IS_TWO_LEVEL) \ - case Type::NAME: NAME = std::make_unique(); break; + case Type::NAME: (NAME) = std::make_unique(); break; APPLY_FOR_AGGREGATED_VARIANTS(M) #undef M } @@ -677,7 +677,7 @@ struct AggregatedDataVariants : private boost::noncopyable case Type::without_key: return 1; #define M(NAME, IS_TWO_LEVEL) \ - case Type::NAME: return NAME->data.size() + (without_key != nullptr); + case Type::NAME: return (NAME)->data.size() + (without_key != nullptr); APPLY_FOR_AGGREGATED_VARIANTS(M) #undef M } @@ -694,7 +694,7 @@ struct AggregatedDataVariants : private boost::noncopyable case Type::without_key: return 1; #define M(NAME, IS_TWO_LEVEL) \ - case Type::NAME: return NAME->data.size(); + case Type::NAME: return (NAME)->data.size(); APPLY_FOR_AGGREGATED_VARIANTS(M) #undef M } @@ -753,6 +753,7 @@ struct AggregatedDataVariants : private boost::noncopyable M(low_cardinality_key_string) \ M(low_cardinality_key_fixed_string) \ + /// NOLINTNEXTLINE #define APPLY_FOR_VARIANTS_NOT_CONVERTIBLE_TO_TWO_LEVEL(M) \ M(key8) \ M(key16) \ @@ -766,6 +767,7 @@ struct AggregatedDataVariants : private boost::noncopyable M(low_cardinality_key8) \ M(low_cardinality_key16) \ + /// NOLINTNEXTLINE #define APPLY_FOR_VARIANTS_SINGLE_LEVEL(M) \ APPLY_FOR_VARIANTS_NOT_CONVERTIBLE_TO_TWO_LEVEL(M) \ APPLY_FOR_VARIANTS_CONVERTIBLE_TO_TWO_LEVEL(M) \ @@ -787,6 +789,7 @@ struct AggregatedDataVariants : private boost::noncopyable void convertToTwoLevel(); + /// NOLINTNEXTLINE #define APPLY_FOR_VARIANTS_TWO_LEVEL(M) \ M(key32_two_level) \ M(key64_two_level) \ @@ -1341,7 +1344,7 @@ private: template Method & getDataVariant(AggregatedDataVariants & variants); #define M(NAME, IS_TWO_LEVEL) \ - template <> inline decltype(AggregatedDataVariants::NAME)::element_type & getDataVariant(AggregatedDataVariants & variants) { return *variants.NAME; } + template <> inline decltype(AggregatedDataVariants::NAME)::element_type & getDataVariant(AggregatedDataVariants & variants) { return *variants.NAME; } /// NOLINT APPLY_FOR_AGGREGATED_VARIANTS(M) diff --git a/src/Interpreters/BloomFilter.h b/src/Interpreters/BloomFilter.h index 279ab6947ec..1fb9895cc27 100644 --- a/src/Interpreters/BloomFilter.h +++ b/src/Interpreters/BloomFilter.h @@ -31,7 +31,7 @@ public: using UnderType = UInt64; using Container = std::vector; - BloomFilter(const BloomFilterParameters & params); + explicit BloomFilter(const BloomFilterParameters & params); /// size -- size of filter in bytes. /// hashes -- number of used hash functions. /// seed -- random seed for hash functions generation. diff --git a/src/Interpreters/ClientInfo.cpp b/src/Interpreters/ClientInfo.cpp index 827e7d27409..75af25e842e 100644 --- a/src/Interpreters/ClientInfo.cpp +++ b/src/Interpreters/ClientInfo.cpp @@ -19,7 +19,7 @@ namespace ErrorCodes } -void ClientInfo::write(WriteBuffer & out, const UInt64 server_protocol_revision) const +void ClientInfo::write(WriteBuffer & out, UInt64 server_protocol_revision) const { if (server_protocol_revision < DBMS_MIN_REVISION_WITH_CLIENT_INFO) throw Exception("Logical error: method ClientInfo::write is called for unsupported server revision", ErrorCodes::LOGICAL_ERROR); @@ -99,7 +99,7 @@ void ClientInfo::write(WriteBuffer & out, const UInt64 server_protocol_revision) } -void ClientInfo::read(ReadBuffer & in, const UInt64 client_protocol_revision) +void ClientInfo::read(ReadBuffer & in, UInt64 client_protocol_revision) { if (client_protocol_revision < DBMS_MIN_REVISION_WITH_CLIENT_INFO) throw Exception("Logical error: method ClientInfo::read is called for unsupported client revision", ErrorCodes::LOGICAL_ERROR); diff --git a/src/Interpreters/ClientInfo.h b/src/Interpreters/ClientInfo.h index 3ce740c6436..0b40c78becc 100644 --- a/src/Interpreters/ClientInfo.h +++ b/src/Interpreters/ClientInfo.h @@ -119,8 +119,8 @@ public: * Only values that are not calculated automatically or passed separately are serialized. * Revisions are passed to use format that server will understand or client was used. */ - void write(WriteBuffer & out, const UInt64 server_protocol_revision) const; - void read(ReadBuffer & in, const UInt64 client_protocol_revision); + void write(WriteBuffer & out, UInt64 server_protocol_revision) const; + void read(ReadBuffer & in, UInt64 client_protocol_revision); /// Initialize parameters on client initiating query. void setInitialQuery(); diff --git a/src/Interpreters/Cluster.h b/src/Interpreters/Cluster.h index 248d212ebf0..e9f26c21089 100644 --- a/src/Interpreters/Cluster.h +++ b/src/Interpreters/Cluster.h @@ -63,7 +63,6 @@ public: /// is used to set a limit on the size of the timeout static Poco::Timespan saturate(Poco::Timespan v, Poco::Timespan limit); -public: using SlotToShard = std::vector; struct Address @@ -192,7 +191,6 @@ public: /// Name of directory for asynchronous write to StorageDistributed if has_internal_replication const std::string & insertPathForInternalReplication(bool prefer_localhost_replica, bool use_compact_format) const; - public: ShardInfoInsertPathForInternalReplication insert_path_for_internal_replication; /// Number of the shard, the indexation begins with 1 UInt32 shard_num = 0; diff --git a/src/Interpreters/ComparisonGraph.cpp b/src/Interpreters/ComparisonGraph.cpp index e236de67fdc..37d603b4923 100644 --- a/src/Interpreters/ComparisonGraph.cpp +++ b/src/Interpreters/ComparisonGraph.cpp @@ -159,7 +159,7 @@ ComparisonGraph::CompareResult ComparisonGraph::pathToCompareResult(Path path, b __builtin_unreachable(); } -std::optional ComparisonGraph::findPath(const size_t start, const size_t finish) const +std::optional ComparisonGraph::findPath(size_t start, size_t finish) const { const auto it = dists.find(std::make_pair(start, finish)); if (it == std::end(dists)) @@ -232,7 +232,7 @@ ComparisonGraph::CompareResult ComparisonGraph::compare(const ASTPtr & left, con return CompareResult::UNKNOWN; } -bool ComparisonGraph::isPossibleCompare(const CompareResult expected, const ASTPtr & left, const ASTPtr & right) const +bool ComparisonGraph::isPossibleCompare(CompareResult expected, const ASTPtr & left, const ASTPtr & right) const { const auto result = compare(left, right); @@ -267,7 +267,7 @@ bool ComparisonGraph::isPossibleCompare(const CompareResult expected, const ASTP return possible_pairs.contains({expected, result}); } -bool ComparisonGraph::isAlwaysCompare(const CompareResult expected, const ASTPtr & left, const ASTPtr & right) const +bool ComparisonGraph::isAlwaysCompare(CompareResult expected, const ASTPtr & left, const ASTPtr & right) const { const auto result = compare(left, right); @@ -324,12 +324,12 @@ std::optional ComparisonGraph::getComponentId(const ASTPtr & ast) const } } -bool ComparisonGraph::hasPath(const size_t left, const size_t right) const +bool ComparisonGraph::hasPath(size_t left, size_t right) const { return findPath(left, right) || findPath(right, left); } -ASTs ComparisonGraph::getComponent(const size_t id) const +ASTs ComparisonGraph::getComponent(size_t id) const { return graph.vertices[id].asts; } @@ -387,7 +387,7 @@ ComparisonGraph::CompareResult ComparisonGraph::functionNameToCompareResult(cons return it == std::end(relation_to_compare) ? CompareResult::UNKNOWN : it->second; } -ComparisonGraph::CompareResult ComparisonGraph::inverseCompareResult(const CompareResult result) +ComparisonGraph::CompareResult ComparisonGraph::inverseCompareResult(CompareResult result) { static const std::unordered_map inverse_relations = { @@ -486,7 +486,7 @@ std::vector ComparisonGraph::getVertices() const void ComparisonGraph::dfsComponents( const Graph & reversed_graph, size_t v, - OptionalIndices & components, const size_t component) + OptionalIndices & components, size_t component) { components[v] = component; for (const auto & edge : reversed_graph.edges[v]) diff --git a/src/Interpreters/ComparisonGraph.h b/src/Interpreters/ComparisonGraph.h index 20d6f135a0d..3891fbf51cf 100644 --- a/src/Interpreters/ComparisonGraph.h +++ b/src/Interpreters/ComparisonGraph.h @@ -17,7 +17,7 @@ class ComparisonGraph { public: /// atomic_formulas are extracted from constraints. - ComparisonGraph(const std::vector & atomic_formulas); + explicit ComparisonGraph(const std::vector & atomic_formulas); enum class CompareResult { @@ -32,15 +32,15 @@ public: static CompareResult atomToCompareResult(const CNFQuery::AtomicFormula & atom); static CompareResult functionNameToCompareResult(const std::string & name); - static CompareResult inverseCompareResult(const CompareResult result); + static CompareResult inverseCompareResult(CompareResult result); CompareResult compare(const ASTPtr & left, const ASTPtr & right) const; /// It's possible that left right - bool isPossibleCompare(const CompareResult expected, const ASTPtr & left, const ASTPtr & right) const; + bool isPossibleCompare(CompareResult expected, const ASTPtr & left, const ASTPtr & right) const; /// It's always true that left right - bool isAlwaysCompare(const CompareResult expected, const ASTPtr & left, const ASTPtr & right) const; + bool isAlwaysCompare(CompareResult expected, const ASTPtr & left, const ASTPtr & right) const; /// Returns all expressions from component to which @ast belongs if any. std::vector getEqual(const ASTPtr & ast) const; @@ -52,11 +52,11 @@ public: std::optional getComponentId(const ASTPtr & ast) const; /// Returns all expressions from component. - std::vector getComponent(const std::size_t id) const; + std::vector getComponent(size_t id) const; size_t getNumOfComponents() const { return graph.vertices.size(); } - bool hasPath(const size_t left, const size_t right) const; + bool hasPath(size_t left, size_t right) const; /// Find constants lessOrEqual and greaterOrEqual. /// For int and double linear programming can be applied here. @@ -131,7 +131,7 @@ private: /// Assigns index of component for each vertex. static void dfsComponents( const Graph & reversed_graph, size_t v, - OptionalIndices & components, const size_t component); + OptionalIndices & components, size_t component); enum class Path { @@ -140,7 +140,7 @@ private: }; static CompareResult pathToCompareResult(Path path, bool inverse); - std::optional findPath(const size_t start, const size_t finish) const; + std::optional findPath(size_t start, size_t finish) const; /// Calculate @dists. static std::map, Path> buildDistsFromGraph(const Graph & g); diff --git a/src/Interpreters/Context.cpp b/src/Interpreters/Context.cpp index d6b328a0380..a043ec24082 100644 --- a/src/Interpreters/Context.cpp +++ b/src/Interpreters/Context.cpp @@ -29,6 +29,7 @@ #include #include #include +#include #include #include #include @@ -297,6 +298,17 @@ struct ContextSharedPart ~ContextSharedPart() { + /// Wait for thread pool for background writes, + /// since it may use per-user MemoryTracker which will be destroyed here. + try + { + IDiskRemote::getThreadPoolWriter().wait(); + } + catch (...) + { + tryLogCurrentException(__PRETTY_FUNCTION__); + } + try { shutdown(); @@ -3236,6 +3248,8 @@ ReadSettings Context::getReadSettings() const res.remote_fs_read_max_backoff_ms = settings.remote_fs_read_max_backoff_ms; res.remote_fs_read_backoff_max_tries = settings.remote_fs_read_backoff_max_tries; + res.remote_fs_enable_cache = settings.remote_fs_enable_cache; + res.remote_fs_cache_max_wait_sec = settings.remote_fs_cache_max_wait_sec; res.remote_read_min_bytes_for_seek = settings.remote_read_min_bytes_for_seek; diff --git a/src/Interpreters/CrashLog.h b/src/Interpreters/CrashLog.h index 61503d95cee..930515c43ea 100644 --- a/src/Interpreters/CrashLog.h +++ b/src/Interpreters/CrashLog.h @@ -41,7 +41,7 @@ class CrashLog : public SystemLog public: static void initialize(std::shared_ptr crash_log_) { - crash_log = std::move(crash_log_); + crash_log = crash_log_; } }; diff --git a/src/Interpreters/DatabaseCatalog.h b/src/Interpreters/DatabaseCatalog.h index c2a46277015..34b42a3397c 100644 --- a/src/Interpreters/DatabaseCatalog.h +++ b/src/Interpreters/DatabaseCatalog.h @@ -107,7 +107,7 @@ struct TemporaryTableHolder : boost::noncopyable, WithContext StoragePtr getTable() const; - operator bool () const { return id != UUIDHelpers::Nil; } + operator bool () const { return id != UUIDHelpers::Nil; } /// NOLINT IDatabase * temporary_tables = nullptr; UUID id = UUIDHelpers::Nil; diff --git a/src/Interpreters/GetAggregatesVisitor.h b/src/Interpreters/GetAggregatesVisitor.h index 0eeab8348fd..3966653235a 100644 --- a/src/Interpreters/GetAggregatesVisitor.h +++ b/src/Interpreters/GetAggregatesVisitor.h @@ -2,6 +2,9 @@ #include #include +#include +#include +#include namespace DB { diff --git a/src/Interpreters/IInterpreterUnionOrSelectQuery.h b/src/Interpreters/IInterpreterUnionOrSelectQuery.h index 1f59dd36354..7906ab189fc 100644 --- a/src/Interpreters/IInterpreterUnionOrSelectQuery.h +++ b/src/Interpreters/IInterpreterUnionOrSelectQuery.h @@ -47,7 +47,7 @@ public: /// then we can cache the scalars forever (for any query that doesn't use the virtual storage either), but if it does use the virtual /// storage then we can only keep the scalar result around while we are working with that source block /// You can find more details about this under ExecuteScalarSubqueriesMatcher::visit - bool usesViewSource() { return uses_view_source; } + bool usesViewSource() const { return uses_view_source; } protected: ASTPtr query_ptr; diff --git a/src/Interpreters/IJoin.h b/src/Interpreters/IJoin.h index ba8367b57e3..2a3171adccd 100644 --- a/src/Interpreters/IJoin.h +++ b/src/Interpreters/IJoin.h @@ -25,7 +25,7 @@ public: /// Add block of data from right hand of JOIN. /// @returns false, if some limit was exceeded and you should not insert more data. - virtual bool addJoinedBlock(const Block & block, bool check_limits = true) = 0; + virtual bool addJoinedBlock(const Block & block, bool check_limits = true) = 0; /// NOLINT virtual void checkTypesOfKeys(const Block & block) const = 0; diff --git a/src/Interpreters/InJoinSubqueriesPreprocessor.h b/src/Interpreters/InJoinSubqueriesPreprocessor.h index d2166185d2b..e4ec3c81ed2 100644 --- a/src/Interpreters/InJoinSubqueriesPreprocessor.h +++ b/src/Interpreters/InJoinSubqueriesPreprocessor.h @@ -43,7 +43,7 @@ public: /// These methods could be overridden for the need of the unit test. virtual bool hasAtLeastTwoShards(const IStorage & table) const; virtual std::pair getRemoteDatabaseAndTableName(const IStorage & table) const; - virtual ~CheckShardsAndTables() {} + virtual ~CheckShardsAndTables() = default; }; InJoinSubqueriesPreprocessor( diff --git a/src/Interpreters/LogicalExpressionsOptimizer.h b/src/Interpreters/LogicalExpressionsOptimizer.h index 1a04b199a13..4991d31f8b1 100644 --- a/src/Interpreters/LogicalExpressionsOptimizer.h +++ b/src/Interpreters/LogicalExpressionsOptimizer.h @@ -29,7 +29,7 @@ class LogicalExpressionsOptimizer final { const UInt64 optimize_min_equality_disjunction_chain_length; - ExtractedSettings(UInt64 optimize_min_equality_disjunction_chain_length_) + explicit ExtractedSettings(UInt64 optimize_min_equality_disjunction_chain_length_) : optimize_min_equality_disjunction_chain_length(optimize_min_equality_disjunction_chain_length_) {} }; @@ -68,7 +68,6 @@ private: using DisjunctiveEqualityChainsMap = std::map; using DisjunctiveEqualityChain = DisjunctiveEqualityChainsMap::value_type; -private: /** Collect information about all the equations in the OR chains (not necessarily homogeneous). * This information is grouped by the expression that is on the left side of the equation. */ @@ -92,12 +91,10 @@ private: /// Restore the original column order after optimization. void reorderColumns(); -private: using ParentNodes = std::vector; using FunctionParentMap = std::unordered_map; using ColumnToPosition = std::unordered_map; -private: ASTSelectQuery * select_query; const ExtractedSettings settings; /// Information about the OR-chains inside the query. diff --git a/src/Interpreters/MySQL/InterpretersMySQLDDLQuery.cpp b/src/Interpreters/MySQL/InterpretersMySQLDDLQuery.cpp index df74a94ee57..515ef6c3058 100644 --- a/src/Interpreters/MySQL/InterpretersMySQLDDLQuery.cpp +++ b/src/Interpreters/MySQL/InterpretersMySQLDDLQuery.cpp @@ -130,6 +130,9 @@ static NamesAndTypesList getColumnsList(const ASTExpressionList * columns_defini child = new_child; } } + + if (type_name_upper == "DATE") + data_type_function->name = "Date32"; } if (is_nullable) data_type = makeASTFunction("Nullable", data_type); @@ -335,7 +338,7 @@ static ASTPtr getPartitionPolicy(const NamesAndTypesList & primary_keys) if (which.isNullable()) throw Exception("LOGICAL ERROR: MySQL primary key must be not null, it is a bug.", ErrorCodes::LOGICAL_ERROR); - if (which.isDate() || which.isDateTime() || which.isDateTime64()) + if (which.isDate() || which.isDate32() || which.isDateTime() || which.isDateTime64()) { /// In any case, date or datetime is always the best partitioning key return makeASTFunction("toYYYYMM", std::make_shared(primary_key.name)); diff --git a/src/Interpreters/MySQL/tests/gtest_create_rewritten.cpp b/src/Interpreters/MySQL/tests/gtest_create_rewritten.cpp index 680b9bd5606..71578bd5db7 100644 --- a/src/Interpreters/MySQL/tests/gtest_create_rewritten.cpp +++ b/src/Interpreters/MySQL/tests/gtest_create_rewritten.cpp @@ -39,7 +39,7 @@ TEST(MySQLCreateRewritten, ColumnsDataType) { {"TINYINT", "Int8"}, {"SMALLINT", "Int16"}, {"MEDIUMINT", "Int32"}, {"INT", "Int32"}, {"INTEGER", "Int32"}, {"BIGINT", "Int64"}, {"FLOAT", "Float32"}, {"DOUBLE", "Float64"}, - {"VARCHAR(10)", "String"}, {"CHAR(10)", "String"}, {"Date", "Date"}, {"DateTime", "DateTime"}, + {"VARCHAR(10)", "String"}, {"CHAR(10)", "String"}, {"Date", "Date32"}, {"DateTime", "DateTime"}, {"TIMESTAMP", "DateTime"}, {"BOOLEAN", "Bool"}, {"BIT", "UInt64"}, {"SET", "UInt64"}, {"YEAR", "UInt16"}, {"TIME", "Int64"}, {"GEOMETRY", "String"} }; @@ -104,7 +104,7 @@ TEST(MySQLCreateRewritten, PartitionPolicy) {"MEDIUMINT", "Int32", " PARTITION BY intDiv(key, 4294967)"}, {"INT", "Int32", " PARTITION BY intDiv(key, 4294967)"}, {"INTEGER", "Int32", " PARTITION BY intDiv(key, 4294967)"}, {"BIGINT", "Int64", " PARTITION BY intDiv(key, 18446744073709551)"}, {"FLOAT", "Float32", ""}, {"DOUBLE", "Float64", ""}, {"VARCHAR(10)", "String", ""}, {"CHAR(10)", "String", ""}, - {"Date", "Date", " PARTITION BY toYYYYMM(key)"}, {"DateTime", "DateTime", " PARTITION BY toYYYYMM(key)"}, + {"Date", "Date32", " PARTITION BY toYYYYMM(key)"}, {"DateTime", "DateTime", " PARTITION BY toYYYYMM(key)"}, {"TIMESTAMP", "DateTime", " PARTITION BY toYYYYMM(key)"}, {"BOOLEAN", "Bool", " PARTITION BY key"} }; @@ -135,7 +135,7 @@ TEST(MySQLCreateRewritten, OrderbyPolicy) {"MEDIUMINT", "Int32", " PARTITION BY intDiv(key, 4294967)"}, {"INT", "Int32", " PARTITION BY intDiv(key, 4294967)"}, {"INTEGER", "Int32", " PARTITION BY intDiv(key, 4294967)"}, {"BIGINT", "Int64", " PARTITION BY intDiv(key, 18446744073709551)"}, {"FLOAT", "Float32", ""}, {"DOUBLE", "Float64", ""}, {"VARCHAR(10)", "String", ""}, {"CHAR(10)", "String", ""}, - {"Date", "Date", " PARTITION BY toYYYYMM(key)"}, {"DateTime", "DateTime", " PARTITION BY toYYYYMM(key)"}, + {"Date", "Date32", " PARTITION BY toYYYYMM(key)"}, {"DateTime", "DateTime", " PARTITION BY toYYYYMM(key)"}, {"TIMESTAMP", "DateTime", " PARTITION BY toYYYYMM(key)"}, {"BOOLEAN", "Bool", " PARTITION BY key"} }; diff --git a/src/Interpreters/OpenTelemetrySpanLog.h b/src/Interpreters/OpenTelemetrySpanLog.h index f4b3a388b54..aa99a9f8e4b 100644 --- a/src/Interpreters/OpenTelemetrySpanLog.h +++ b/src/Interpreters/OpenTelemetrySpanLog.h @@ -25,7 +25,7 @@ struct OpenTelemetrySpan struct OpenTelemetrySpanLogElement : public OpenTelemetrySpan { OpenTelemetrySpanLogElement() = default; - OpenTelemetrySpanLogElement(const OpenTelemetrySpan & span) + explicit OpenTelemetrySpanLogElement(const OpenTelemetrySpan & span) : OpenTelemetrySpan(span) {} static std::string name() { return "OpenTelemetrySpanLog"; } @@ -44,7 +44,7 @@ public: struct OpenTelemetrySpanHolder : public OpenTelemetrySpan { - OpenTelemetrySpanHolder(const std::string & _operation_name); + explicit OpenTelemetrySpanHolder(const std::string & _operation_name); void addAttribute(const std::string& name, UInt64 value); void addAttribute(const std::string& name, const std::string& value); void addAttribute(const Exception & e); diff --git a/src/Interpreters/OptimizeIfWithConstantConditionVisitor.h b/src/Interpreters/OptimizeIfWithConstantConditionVisitor.h index 05d0330196b..ad98f92bafd 100644 --- a/src/Interpreters/OptimizeIfWithConstantConditionVisitor.h +++ b/src/Interpreters/OptimizeIfWithConstantConditionVisitor.h @@ -10,7 +10,7 @@ namespace DB class OptimizeIfWithConstantConditionVisitor { public: - OptimizeIfWithConstantConditionVisitor(Aliases & aliases_) + explicit OptimizeIfWithConstantConditionVisitor(Aliases & aliases_) : aliases(aliases_) {} diff --git a/src/Interpreters/OptimizeShardingKeyRewriteInVisitor.cpp b/src/Interpreters/OptimizeShardingKeyRewriteInVisitor.cpp index ecfda4cd0c1..991b449196d 100644 --- a/src/Interpreters/OptimizeShardingKeyRewriteInVisitor.cpp +++ b/src/Interpreters/OptimizeShardingKeyRewriteInVisitor.cpp @@ -55,7 +55,12 @@ bool shardContains( data.sharding_key_column_name); /// The value from IN can be non-numeric, /// but in this case it should be convertible to numeric type, let's try. - sharding_value = convertFieldToType(sharding_value, DataTypeUInt64()); + /// + /// NOTE: that conversion should not be done for signed types, + /// since it uses accurate cast, that will return Null, + /// but we need static_cast<> (as createBlockSelector()). + if (!isInt64OrUInt64FieldType(sharding_value.getType())) + sharding_value = convertFieldToType(sharding_value, DataTypeUInt64()); /// In case of conversion is not possible (NULL), shard cannot contain the value anyway. if (sharding_value.isNull()) return false; diff --git a/src/Interpreters/PartLog.cpp b/src/Interpreters/PartLog.cpp index f89f836871a..4947b50513c 100644 --- a/src/Interpreters/PartLog.cpp +++ b/src/Interpreters/PartLog.cpp @@ -46,6 +46,7 @@ NamesAndTypesList PartLogElement::getNamesAndTypes() {"table", std::make_shared()}, {"part_name", std::make_shared()}, {"partition_id", std::make_shared()}, + {"disk_name", std::make_shared()}, {"path_on_disk", std::make_shared()}, {"rows", std::make_shared()}, @@ -79,6 +80,7 @@ void PartLogElement::appendToBlock(MutableColumns & columns) const columns[i++]->insert(table_name); columns[i++]->insert(part_name); columns[i++]->insert(partition_id); + columns[i++]->insert(disk_name); columns[i++]->insert(path_on_disk); columns[i++]->insert(rows); @@ -155,6 +157,7 @@ bool PartLog::addNewParts( elem.table_name = table_id.table_name; elem.partition_id = part->info.partition_id; elem.part_name = part->name; + elem.disk_name = part->volume->getDisk()->getName(); elem.path_on_disk = part->getFullPath(); elem.bytes_compressed_on_disk = part->getBytesOnDisk(); diff --git a/src/Interpreters/PartLog.h b/src/Interpreters/PartLog.h index bdd1db4334a..5f502edb339 100644 --- a/src/Interpreters/PartLog.h +++ b/src/Interpreters/PartLog.h @@ -32,6 +32,7 @@ struct PartLogElement String table_name; String part_name; String partition_id; + String disk_name; String path_on_disk; /// Size of the part diff --git a/src/Interpreters/ProcessList.h b/src/Interpreters/ProcessList.h index 493b2ba81a9..c90c271679c 100644 --- a/src/Interpreters/ProcessList.h +++ b/src/Interpreters/ProcessList.h @@ -216,7 +216,7 @@ struct ProcessListForUserInfo /// Data about queries for one user. struct ProcessListForUser { - ProcessListForUser(ProcessList * global_process_list); + explicit ProcessListForUser(ProcessList * global_process_list); /// query_id -> ProcessListElement(s). There can be multiple queries with the same query_id as long as all queries except one are cancelled. using QueryToElement = std::unordered_map; diff --git a/src/Interpreters/QueryNormalizer.h b/src/Interpreters/QueryNormalizer.h index eebcff62cde..f532d869789 100644 --- a/src/Interpreters/QueryNormalizer.h +++ b/src/Interpreters/QueryNormalizer.h @@ -25,7 +25,7 @@ class QueryNormalizer bool prefer_column_name_to_alias; template - ExtractedSettings(const T & settings) + ExtractedSettings(const T & settings) /// NOLINT : max_ast_depth(settings.max_ast_depth) , max_expanded_ast_elements(settings.max_expanded_ast_elements) , prefer_column_name_to_alias(settings.prefer_column_name_to_alias) diff --git a/src/Interpreters/RedundantFunctionsInOrderByVisitor.h b/src/Interpreters/RedundantFunctionsInOrderByVisitor.h index 09362ea6be2..60c9fcf2a24 100644 --- a/src/Interpreters/RedundantFunctionsInOrderByVisitor.h +++ b/src/Interpreters/RedundantFunctionsInOrderByVisitor.h @@ -4,6 +4,8 @@ #include #include #include +#include +#include namespace DB { diff --git a/src/Interpreters/ReplaceQueryParameterVisitor.h b/src/Interpreters/ReplaceQueryParameterVisitor.h index cb3d0f668d8..dd785cd768e 100644 --- a/src/Interpreters/ReplaceQueryParameterVisitor.h +++ b/src/Interpreters/ReplaceQueryParameterVisitor.h @@ -1,6 +1,7 @@ #pragma once #include +#include #include namespace DB diff --git a/src/Interpreters/RewriteFunctionToSubcolumnVisitor.cpp b/src/Interpreters/RewriteFunctionToSubcolumnVisitor.cpp index 842f61cbdd2..a34c81d52e2 100644 --- a/src/Interpreters/RewriteFunctionToSubcolumnVisitor.cpp +++ b/src/Interpreters/RewriteFunctionToSubcolumnVisitor.cpp @@ -83,6 +83,7 @@ void RewriteFunctionToSubcolumnData::visit(ASTFunction & function, ASTPtr & ast) const auto & column_type = columns.get(name_in_storage).type; TypeIndex column_type_id = column_type->getTypeId(); + const auto & alias = function.tryGetAlias(); if (arguments.size() == 1) { @@ -91,7 +92,10 @@ void RewriteFunctionToSubcolumnData::visit(ASTFunction & function, ASTPtr & ast) { const auto & [type_id, subcolumn_name, transformer] = it->second; if (column_type_id == type_id) + { ast = transformer(name_in_storage, subcolumn_name); + ast->setAlias(alias); + } } } else @@ -116,6 +120,7 @@ void RewriteFunctionToSubcolumnData::visit(ASTFunction & function, ASTPtr & ast) return; ast = transformToSubcolumn(name_in_storage, subcolumn_name); + ast->setAlias(alias); } else { @@ -124,7 +129,10 @@ void RewriteFunctionToSubcolumnData::visit(ASTFunction & function, ASTPtr & ast) { const auto & [type_id, subcolumn_name, transformer] = it->second; if (column_type_id == type_id) + { ast = transformer(name_in_storage, subcolumn_name, arguments[1]); + ast->setAlias(alias); + } } } } diff --git a/src/Interpreters/RowRefs.h b/src/Interpreters/RowRefs.h index cae20b98caf..baff903e121 100644 --- a/src/Interpreters/RowRefs.h +++ b/src/Interpreters/RowRefs.h @@ -26,7 +26,7 @@ struct RowRef const Block * block = nullptr; SizeT row_num = 0; - RowRef() {} + RowRef() {} /// NOLINT RowRef(const Block * block_, size_t row_num_) : block(block_), row_num(row_num_) {} }; @@ -42,7 +42,7 @@ struct RowRefList : RowRef Batch * next; RowRef row_refs[MAX_SIZE]; - Batch(Batch * parent) + explicit Batch(Batch * parent) : next(parent) {} @@ -52,7 +52,7 @@ struct RowRefList : RowRef { if (full()) { - auto batch = pool.alloc(); + auto * batch = pool.alloc(); *batch = Batch(this); batch->insert(std::move(row_ref), pool); return batch; @@ -66,7 +66,7 @@ struct RowRefList : RowRef class ForwardIterator { public: - ForwardIterator(const RowRefList * begin) + explicit ForwardIterator(const RowRefList * begin) : root(begin) , first(true) , batch(root->next) @@ -115,7 +115,7 @@ struct RowRefList : RowRef size_t position; }; - RowRefList() {} + RowRefList() {} /// NOLINT RowRefList(const Block * block_, size_t row_num_) : RowRef(block_, row_num_) {} ForwardIterator begin() const { return ForwardIterator(this); } @@ -221,7 +221,7 @@ public: T asof_value; RowRef row_ref; - Entry(T v) : asof_value(v) {} + explicit Entry(T v) : asof_value(v) {} Entry(T v, RowRef rr) : asof_value(v), row_ref(rr) {} }; @@ -241,8 +241,8 @@ public: Entry::LookupPtr, Entry::LookupPtr>; - AsofRowRefs() {} - AsofRowRefs(TypeIndex t); + AsofRowRefs() = default; + explicit AsofRowRefs(TypeIndex t); static std::optional getTypeSize(const IColumn & asof_column, size_t & type_size); diff --git a/src/Interpreters/StorageID.h b/src/Interpreters/StorageID.h index f1fcfde25c0..29ba24c2e4c 100644 --- a/src/Interpreters/StorageID.h +++ b/src/Interpreters/StorageID.h @@ -41,9 +41,9 @@ struct StorageID assertNotEmpty(); } - StorageID(const ASTQueryWithTableAndOutput & query); - StorageID(const ASTTableIdentifier & table_identifier_node); - StorageID(const ASTPtr & node); + StorageID(const ASTQueryWithTableAndOutput & query); /// NOLINT + StorageID(const ASTTableIdentifier & table_identifier_node); /// NOLINT + StorageID(const ASTPtr & node); /// NOLINT String getDatabaseName() const; diff --git a/src/Interpreters/TableJoin.h b/src/Interpreters/TableJoin.h index a3f6f7bf36b..f7c03ac6e1a 100644 --- a/src/Interpreters/TableJoin.h +++ b/src/Interpreters/TableJoin.h @@ -103,7 +103,7 @@ private: friend class TreeRewriter; - const SizeLimits size_limits; + SizeLimits size_limits; const size_t default_max_bytes = 0; const bool join_use_nulls = false; const size_t max_joined_block_rows = 0; @@ -114,7 +114,7 @@ private: const String temporary_files_codec = "LZ4"; /// the limit has no technical reasons, it supposed to improve safety - const size_t MAX_DISJUNCTS = 16; + const size_t MAX_DISJUNCTS = 16; /// NOLINT ASTs key_asts_left; ASTs key_asts_right; diff --git a/src/Interpreters/TraceCollector.h b/src/Interpreters/TraceCollector.h index 3a9edf676be..b3f11ca5756 100644 --- a/src/Interpreters/TraceCollector.h +++ b/src/Interpreters/TraceCollector.h @@ -18,7 +18,7 @@ class TraceLog; class TraceCollector { public: - TraceCollector(std::shared_ptr trace_log_); + explicit TraceCollector(std::shared_ptr trace_log_); ~TraceCollector(); static inline void collect(TraceType trace_type, const StackTrace & stack_trace, Int64 size) diff --git a/src/Interpreters/TranslateQualifiedNamesVisitor.cpp b/src/Interpreters/TranslateQualifiedNamesVisitor.cpp index 0d7d56058b9..6016d54c7dc 100644 --- a/src/Interpreters/TranslateQualifiedNamesVisitor.cpp +++ b/src/Interpreters/TranslateQualifiedNamesVisitor.cpp @@ -303,7 +303,7 @@ void TranslateQualifiedNamesMatcher::visit(ASTExpressionList & node, const ASTPt } /// 'select * from a join b using id' should result one 'id' column -void TranslateQualifiedNamesMatcher::extractJoinUsingColumns(const ASTPtr ast, Data & data) +void TranslateQualifiedNamesMatcher::extractJoinUsingColumns(ASTPtr ast, Data & data) { const auto & table_join = ast->as(); diff --git a/src/Interpreters/TranslateQualifiedNamesVisitor.h b/src/Interpreters/TranslateQualifiedNamesVisitor.h index 0f35d052ed2..9c46d926eca 100644 --- a/src/Interpreters/TranslateQualifiedNamesVisitor.h +++ b/src/Interpreters/TranslateQualifiedNamesVisitor.h @@ -52,7 +52,7 @@ private: static void visit(ASTExpressionList &, const ASTPtr &, Data &); static void visit(ASTFunction &, const ASTPtr &, Data &); - static void extractJoinUsingColumns(const ASTPtr ast, Data & data); + static void extractJoinUsingColumns(ASTPtr ast, Data & data); }; /// Visits AST for names qualification. diff --git a/src/Interpreters/TreeCNFConverter.h b/src/Interpreters/TreeCNFConverter.h index 0c090c8d56b..a5d42e6b989 100644 --- a/src/Interpreters/TreeCNFConverter.h +++ b/src/Interpreters/TreeCNFConverter.h @@ -36,7 +36,7 @@ public: using OrGroup = std::set; using AndGroup = std::set; - CNFQuery(AndGroup && statements_) : statements(std::move(statements_)) { } + CNFQuery(AndGroup && statements_) : statements(std::move(statements_)) { } /// NOLINT template CNFQuery & filterAlwaysTrueGroups(P predicate_is_unknown) /// delete always true groups @@ -91,7 +91,7 @@ public: CNFQuery & appendGroup(AndGroup&& and_group) { for (auto && or_group : and_group) - statements.emplace(std::move(or_group)); + statements.emplace(or_group); return *this; } diff --git a/src/Interpreters/sortBlock.cpp b/src/Interpreters/sortBlock.cpp index c8a2d0903f2..3281445022e 100644 --- a/src/Interpreters/sortBlock.cpp +++ b/src/Interpreters/sortBlock.cpp @@ -41,15 +41,14 @@ struct PartialSortingLessImpl explicit PartialSortingLessImpl(const ColumnsWithSortDescriptions & columns_) : columns(columns_) { } - inline bool operator()(size_t a, size_t b) const + ALWAYS_INLINE int compare(size_t lhs, size_t rhs) const { + int res = 0; + for (const auto & elem : columns) { - int res; - if (elem.column_const) { - res = 0; continue; } @@ -57,52 +56,37 @@ struct PartialSortingLessImpl { if (isCollationRequired(elem.description)) { - res = elem.column->compareAtWithCollation(a, b, *elem.column, elem.description.nulls_direction, *elem.description.collator); + res = elem.column->compareAtWithCollation(lhs, rhs, *elem.column, elem.description.nulls_direction, *elem.description.collator); } else { - res = elem.column->compareAt(a, b, *elem.column, elem.description.nulls_direction); + res = elem.column->compareAt(lhs, rhs, *elem.column, elem.description.nulls_direction); } } else { - res = elem.column->compareAt(a, b, *elem.column, elem.description.nulls_direction); + res = elem.column->compareAt(lhs, rhs, *elem.column, elem.description.nulls_direction); } res *= elem.description.direction; - if (res < 0) - return true; - else if (res > 0) - return false; + + if (res != 0) + break; } - return false; + + return res; + } + + ALWAYS_INLINE bool operator()(size_t lhs, size_t rhs) const + { + int res = compare(lhs, rhs); + return res < 0; } }; using PartialSortingLess = PartialSortingLessImpl; using PartialSortingLessWithCollation = PartialSortingLessImpl; -} - -void convertTupleColumnIntoSortDescriptions( - const ColumnTuple * tuple, const SortColumnDescription & description, ColumnsWithSortDescriptions & result) -{ - for (const auto & column : tuple->getColumns()) - { - if (const auto * subtuple = typeid_cast(column.get())) - { - convertTupleColumnIntoSortDescriptions(subtuple, description, result); - } - else - { - result.emplace_back(ColumnWithSortDescription{column.get(), description, isColumnConst(*column)}); - - if (isCollationRequired(description) && !result.back().column->isCollationSupported()) - result.back().description.collator = nullptr; - } - } -} - ColumnsWithSortDescriptions getColumnsWithSortDescription(const Block & block, const SortDescription & description) { size_t size = description.size(); @@ -127,16 +111,13 @@ ColumnsWithSortDescriptions getColumnsWithSortDescription(const Block & block, c ErrorCodes::BAD_COLLATION); } - if (const auto * tuple = typeid_cast(column)) - convertTupleColumnIntoSortDescriptions(tuple, sort_column_description, result); - else - result.emplace_back(ColumnWithSortDescription{column, sort_column_description, isColumnConst(*column)}); + result.emplace_back(ColumnWithSortDescription{column, sort_column_description, isColumnConst(*column)}); } return result; } -void sortBlock(Block & block, const SortDescription & description, UInt64 limit) +void getBlockSortPermutationImpl(const Block & block, const SortDescription & description, IColumn::PermutationSortStability stability, UInt64 limit, IColumn::Permutation & permutation) { if (!block) return; @@ -152,25 +133,24 @@ void sortBlock(Block & block, const SortDescription & description, UInt64 limit) break; } } - if (all_const) - return; - IColumn::Permutation permutation; + if (unlikely(all_const)) + return; /// If only one column to sort by if (columns_with_sort_descriptions.size() == 1) { auto & column_with_sort_description = columns_with_sort_descriptions[0]; - bool reverse = column_with_sort_description.description.direction == -1; + IColumn::PermutationSortDirection direction = column_with_sort_description.description.direction == -1 ? IColumn::PermutationSortDirection::Descending : IColumn::PermutationSortDirection::Ascending; int nan_direction_hint = column_with_sort_description.description.nulls_direction; const auto & column = column_with_sort_description.column; if (isCollationRequired(column_with_sort_description.description)) column->getPermutationWithCollation( - *column_with_sort_description.description.collator, reverse, limit, nan_direction_hint, permutation); + *column_with_sort_description.description.collator, direction, stability, limit, nan_direction_hint, permutation); else - column->getPermutation(reverse, limit, nan_direction_hint, permutation); + column->getPermutation(direction, stability, limit, nan_direction_hint, permutation); } else { @@ -197,21 +177,32 @@ void sortBlock(Block & block, const SortDescription & description, UInt64 limit) continue; bool is_collation_required = isCollationRequired(column_with_sort_description.description); - bool reverse = column_with_sort_description.description.direction < 0; + IColumn::PermutationSortDirection direction = column_with_sort_description.description.direction == -1 ? IColumn::PermutationSortDirection::Descending : IColumn::PermutationSortDirection::Ascending; int nan_direction_hint = column_with_sort_description.description.nulls_direction; const auto & column = column_with_sort_description.column; if (is_collation_required) { column->updatePermutationWithCollation( - *column_with_sort_description.description.collator, reverse, limit, nan_direction_hint, permutation, ranges); + *column_with_sort_description.description.collator, direction, stability, limit, nan_direction_hint, permutation, ranges); } else { - column->updatePermutation(reverse, limit, nan_direction_hint, permutation, ranges); + column->updatePermutation(direction, stability, limit, nan_direction_hint, permutation, ranges); } } } +} + +} + +void sortBlock(Block & block, const SortDescription & description, UInt64 limit) +{ + IColumn::Permutation permutation; + getBlockSortPermutationImpl(block, description, IColumn::PermutationSortStability::Unstable, limit, permutation); + + if (permutation.empty()) + return; size_t columns = block.columns(); for (size_t i = 0; i < columns; ++i) @@ -221,19 +212,31 @@ void sortBlock(Block & block, const SortDescription & description, UInt64 limit) } } +void stableSortBlock(Block & block, const SortDescription & description) +{ + if (!block) + return; + + IColumn::Permutation permutation; + getBlockSortPermutationImpl(block, description, IColumn::PermutationSortStability::Stable, 0, permutation); + + if (permutation.empty()) + return; + + size_t columns = block.columns(); + for (size_t i = 0; i < columns; ++i) + { + auto & column_to_sort = block.getByPosition(i).column; + column_to_sort = column_to_sort->permute(permutation, 0); + } +} + void stableGetPermutation(const Block & block, const SortDescription & description, IColumn::Permutation & out_permutation) { if (!block) return; - size_t size = block.rows(); - out_permutation.resize(size); - for (size_t i = 0; i < size; ++i) - out_permutation[i] = i; - - ColumnsWithSortDescriptions columns_with_sort_desc = getColumnsWithSortDescription(block, description); - - std::stable_sort(out_permutation.begin(), out_permutation.end(), PartialSortingLess(columns_with_sort_desc)); + getBlockSortPermutationImpl(block, description, IColumn::PermutationSortStability::Stable, 0, out_permutation); } bool isAlreadySorted(const Block & block, const SortDescription & description) @@ -270,21 +273,4 @@ bool isAlreadySorted(const Block & block, const SortDescription & description) return true; } - -void stableSortBlock(Block & block, const SortDescription & description) -{ - if (!block) - return; - - IColumn::Permutation permutation; - stableGetPermutation(block, description, permutation); - - size_t columns = block.columns(); - for (size_t i = 0; i < columns; ++i) - { - auto & column_to_sort = block.safeGetByPosition(i).column; - column_to_sort = column_to_sort->permute(permutation, 0); - } -} - } diff --git a/src/Parsers/ASTAssignment.h b/src/Parsers/ASTAssignment.h index 88d4bb96c15..a37a31ae38e 100644 --- a/src/Parsers/ASTAssignment.h +++ b/src/Parsers/ASTAssignment.h @@ -28,6 +28,7 @@ public: protected: void formatImpl(const FormatSettings & settings, FormatState & state, FormatStateStacked frame) const override { + settings.ostr << (settings.hilite ? hilite_identifier : ""); settings.writeIdentifier(column_name); settings.ostr << (settings.hilite ? hilite_none : ""); diff --git a/src/Parsers/ASTCreateQuery.h b/src/Parsers/ASTCreateQuery.h index a0070892b79..04755a02399 100644 --- a/src/Parsers/ASTCreateQuery.h +++ b/src/Parsers/ASTCreateQuery.h @@ -51,7 +51,7 @@ public: void formatImpl(const FormatSettings & s, FormatState & state, FormatStateStacked frame) const override; - bool empty() + bool empty() const { return (!columns || columns->children.empty()) && (!indices || indices->children.empty()) && (!constraints || constraints->children.empty()) && (!projections || projections->children.empty()); diff --git a/src/Parsers/ASTFunctionWithKeyValueArguments.h b/src/Parsers/ASTFunctionWithKeyValueArguments.h index 5820e8564ac..4b745e2c1a2 100644 --- a/src/Parsers/ASTFunctionWithKeyValueArguments.h +++ b/src/Parsers/ASTFunctionWithKeyValueArguments.h @@ -19,7 +19,6 @@ public: /// Value is closed in brackets (HOST '127.0.0.1') bool second_with_brackets; -public: explicit ASTPair(bool second_with_brackets_) : second_with_brackets(second_with_brackets_) { @@ -54,7 +53,6 @@ public: { } -public: String getID(char delim) const override; ASTPtr clone() const override; diff --git a/src/Parsers/ASTHelpers.h b/src/Parsers/ASTHelpers.h index 086b361bf85..0b3db8e02d5 100644 --- a/src/Parsers/ASTHelpers.h +++ b/src/Parsers/ASTHelpers.h @@ -6,7 +6,7 @@ namespace DB { -static inline bool isFunctionCast(const ASTFunction * function) +static inline bool isFunctionCast(const ASTFunction * function) /// NOLINT { if (function) return function->name == "CAST" || function->name == "_CAST"; diff --git a/src/Parsers/ASTProjectionSelectQuery.h b/src/Parsers/ASTProjectionSelectQuery.h index 71334c50868..d93c10b6e39 100644 --- a/src/Parsers/ASTProjectionSelectQuery.h +++ b/src/Parsers/ASTProjectionSelectQuery.h @@ -26,10 +26,10 @@ public: ASTPtr & refSelect() { return getExpression(Expression::SELECT); } - const ASTPtr with() const { return getExpression(Expression::WITH); } - const ASTPtr select() const { return getExpression(Expression::SELECT); } - const ASTPtr groupBy() const { return getExpression(Expression::GROUP_BY); } - const ASTPtr orderBy() const { return getExpression(Expression::ORDER_BY); } + ASTPtr with() const { return getExpression(Expression::WITH); } + ASTPtr select() const { return getExpression(Expression::SELECT); } + ASTPtr groupBy() const { return getExpression(Expression::GROUP_BY); } + ASTPtr orderBy() const { return getExpression(Expression::ORDER_BY); } /// Set/Reset/Remove expression. void setExpression(Expression expr, ASTPtr && ast); diff --git a/src/Parsers/ASTQueryWithOnCluster.h b/src/Parsers/ASTQueryWithOnCluster.h index b309ae5e847..c5daaa6ce37 100644 --- a/src/Parsers/ASTQueryWithOnCluster.h +++ b/src/Parsers/ASTQueryWithOnCluster.h @@ -17,7 +17,7 @@ public: /// new_database should be used by queries that refer to default db /// and default_database is specified for remote server - virtual ASTPtr getRewrittenASTWithoutOnCluster(const std::string & new_database = {}) const = 0; + virtual ASTPtr getRewrittenASTWithoutOnCluster(const std::string & new_database = {}) const = 0; /// NOLINT /// Returns a query prepared for execution on remote server std::string getRewrittenQueryWithoutOnCluster(const std::string & new_database = {}) const; diff --git a/src/Parsers/ASTSelectQuery.h b/src/Parsers/ASTSelectQuery.h index 1c631783fdb..9a8f1dbd2e7 100644 --- a/src/Parsers/ASTSelectQuery.h +++ b/src/Parsers/ASTSelectQuery.h @@ -91,21 +91,21 @@ public: ASTPtr & refWhere() { return getExpression(Expression::WHERE); } ASTPtr & refHaving() { return getExpression(Expression::HAVING); } - const ASTPtr with() const { return getExpression(Expression::WITH); } - const ASTPtr select() const { return getExpression(Expression::SELECT); } - const ASTPtr tables() const { return getExpression(Expression::TABLES); } - const ASTPtr prewhere() const { return getExpression(Expression::PREWHERE); } - const ASTPtr where() const { return getExpression(Expression::WHERE); } - const ASTPtr groupBy() const { return getExpression(Expression::GROUP_BY); } - const ASTPtr having() const { return getExpression(Expression::HAVING); } - const ASTPtr window() const { return getExpression(Expression::WINDOW); } - const ASTPtr orderBy() const { return getExpression(Expression::ORDER_BY); } - const ASTPtr limitByOffset() const { return getExpression(Expression::LIMIT_BY_OFFSET); } - const ASTPtr limitByLength() const { return getExpression(Expression::LIMIT_BY_LENGTH); } - const ASTPtr limitBy() const { return getExpression(Expression::LIMIT_BY); } - const ASTPtr limitOffset() const { return getExpression(Expression::LIMIT_OFFSET); } - const ASTPtr limitLength() const { return getExpression(Expression::LIMIT_LENGTH); } - const ASTPtr settings() const { return getExpression(Expression::SETTINGS); } + ASTPtr with() const { return getExpression(Expression::WITH); } + ASTPtr select() const { return getExpression(Expression::SELECT); } + ASTPtr tables() const { return getExpression(Expression::TABLES); } + ASTPtr prewhere() const { return getExpression(Expression::PREWHERE); } + ASTPtr where() const { return getExpression(Expression::WHERE); } + ASTPtr groupBy() const { return getExpression(Expression::GROUP_BY); } + ASTPtr having() const { return getExpression(Expression::HAVING); } + ASTPtr window() const { return getExpression(Expression::WINDOW); } + ASTPtr orderBy() const { return getExpression(Expression::ORDER_BY); } + ASTPtr limitByOffset() const { return getExpression(Expression::LIMIT_BY_OFFSET); } + ASTPtr limitByLength() const { return getExpression(Expression::LIMIT_BY_LENGTH); } + ASTPtr limitBy() const { return getExpression(Expression::LIMIT_BY); } + ASTPtr limitOffset() const { return getExpression(Expression::LIMIT_OFFSET); } + ASTPtr limitLength() const { return getExpression(Expression::LIMIT_LENGTH); } + ASTPtr settings() const { return getExpression(Expression::SETTINGS); } bool hasFiltration() const { return where() || prewhere() || having(); } diff --git a/src/Parsers/ASTTTLElement.h b/src/Parsers/ASTTTLElement.h index a396a4c54e0..9705cafbce3 100644 --- a/src/Parsers/ASTTTLElement.h +++ b/src/Parsers/ASTTTLElement.h @@ -37,8 +37,8 @@ public: ASTPtr clone() const override; - const ASTPtr ttl() const { return getExpression(ttl_expr_pos); } - const ASTPtr where() const { return getExpression(where_expr_pos); } + ASTPtr ttl() const { return getExpression(ttl_expr_pos); } + ASTPtr where() const { return getExpression(where_expr_pos); } void setTTL(ASTPtr && ast) { setExpression(ttl_expr_pos, std::forward(ast)); } void setWhere(ASTPtr && ast) { setExpression(where_expr_pos, std::forward(ast)); } @@ -50,7 +50,6 @@ private: int ttl_expr_pos; int where_expr_pos; -private: void setExpression(int & pos, ASTPtr && ast); ASTPtr getExpression(int pos, bool clone = false) const; }; diff --git a/src/Parsers/ASTTableOverrides.cpp b/src/Parsers/ASTTableOverrides.cpp index 8fc21db218f..0f34a9fb247 100644 --- a/src/Parsers/ASTTableOverrides.cpp +++ b/src/Parsers/ASTTableOverrides.cpp @@ -93,7 +93,7 @@ ASTPtr ASTTableOverrideList::tryGetTableOverride(const String & name) const return children[it->second]; } -void ASTTableOverrideList::setTableOverride(const String & name, const ASTPtr ast) +void ASTTableOverrideList::setTableOverride(const String & name, ASTPtr ast) { auto it = positions.find(name); if (it == positions.end()) diff --git a/src/Parsers/ASTTableOverrides.h b/src/Parsers/ASTTableOverrides.h index c0603f7a8e0..c47260789d8 100644 --- a/src/Parsers/ASTTableOverrides.h +++ b/src/Parsers/ASTTableOverrides.h @@ -40,7 +40,7 @@ public: String getID(char) const override { return "TableOverrideList"; } ASTPtr clone() const override; void formatImpl(const FormatSettings & settings, FormatState & state, FormatStateStacked frame) const override; - void setTableOverride(const String & name, const ASTPtr ast); + void setTableOverride(const String & name, ASTPtr ast); void removeTableOverride(const String & name); ASTPtr tryGetTableOverride(const String & name) const; bool hasOverride(const String & name) const; diff --git a/src/Parsers/ASTUseQuery.h b/src/Parsers/ASTUseQuery.h index 4e4a13c2a7f..16d449f905f 100644 --- a/src/Parsers/ASTUseQuery.h +++ b/src/Parsers/ASTUseQuery.h @@ -25,7 +25,6 @@ protected: void formatImpl(const FormatSettings & settings, FormatState &, FormatStateStacked) const override { settings.ostr << (settings.hilite ? hilite_keyword : "") << "USE " << (settings.hilite ? hilite_none : "") << backQuoteIfNeed(database); - return; } }; diff --git a/src/Parsers/Access/ASTUserNameWithHost.h b/src/Parsers/Access/ASTUserNameWithHost.h index ada9bfb0673..bd28b42b48a 100644 --- a/src/Parsers/Access/ASTUserNameWithHost.h +++ b/src/Parsers/Access/ASTUserNameWithHost.h @@ -23,7 +23,7 @@ public: void concatParts(); ASTUserNameWithHost() = default; - ASTUserNameWithHost(const String & name_) : base_name(name_) {} + explicit ASTUserNameWithHost(const String & name_) : base_name(name_) {} String getID(char) const override { return "UserNameWithHost"; } ASTPtr clone() const override { return std::make_shared(*this); } void formatImpl(const FormatSettings & settings, FormatState &, FormatStateStacked) const override; @@ -39,7 +39,7 @@ public: auto begin() const { return names.begin(); } auto end() const { return names.end(); } auto front() const { return *begin(); } - void push_back(const String & name_) { names.push_back(std::make_shared(name_)); } + void push_back(const String & name_) { names.push_back(std::make_shared(name_)); } /// NOLINT Strings toStrings() const; void concatParts(); diff --git a/src/Parsers/CommonParsers.h b/src/Parsers/CommonParsers.h index 58fac2341cf..d2911754b24 100644 --- a/src/Parsers/CommonParsers.h +++ b/src/Parsers/CommonParsers.h @@ -33,7 +33,8 @@ class ParserToken : public IParserBase private: TokenType token_type; public: - ParserToken(TokenType token_type_) : token_type(token_type_) {} + ParserToken(TokenType token_type_) : token_type(token_type_) {} /// NOLINT + protected: const char * getName() const override { return "token"; } diff --git a/src/Parsers/ExpressionListParsers.h b/src/Parsers/ExpressionListParsers.h index 358fe778f91..86d0fd0f861 100644 --- a/src/Parsers/ExpressionListParsers.h +++ b/src/Parsers/ExpressionListParsers.h @@ -207,7 +207,7 @@ private: ParserPtr elem_parser; public: - ParserCastExpression(ParserPtr && elem_parser_) + explicit ParserCastExpression(ParserPtr && elem_parser_) : elem_parser(std::move(elem_parser_)) { } diff --git a/src/Parsers/IAST.h b/src/Parsers/IAST.h index fdf821c4a0b..bd8167c64fe 100644 --- a/src/Parsers/IAST.h +++ b/src/Parsers/IAST.h @@ -69,7 +69,7 @@ public: } /** Get the text that identifies this element. */ - virtual String getID(char delimiter = '_') const = 0; + virtual String getID(char delimiter = '_') const = 0; /// NOLINT ASTPtr ptr() { return shared_from_this(); } diff --git a/src/Parsers/ParserExplainQuery.h b/src/Parsers/ParserExplainQuery.h index a1865e30239..ba30e97a58f 100644 --- a/src/Parsers/ParserExplainQuery.h +++ b/src/Parsers/ParserExplainQuery.h @@ -14,7 +14,7 @@ protected: const char * getName() const override { return "EXPLAIN"; } bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override; public: - ParserExplainQuery(const char* end_) : end(end_) {} + explicit ParserExplainQuery(const char* end_) : end(end_) {} }; } diff --git a/src/Parsers/ParserQueryWithOutput.h b/src/Parsers/ParserQueryWithOutput.h index 854d5a74ffd..1fd7bec1eea 100644 --- a/src/Parsers/ParserQueryWithOutput.h +++ b/src/Parsers/ParserQueryWithOutput.h @@ -15,7 +15,7 @@ protected: const char * getName() const override { return "Query with output"; } bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override; public: - ParserQueryWithOutput(const char * end_) : end(end_) {} + explicit ParserQueryWithOutput(const char * end_) : end(end_) {} }; } diff --git a/src/Parsers/ParserTablesInSelectQuery.h b/src/Parsers/ParserTablesInSelectQuery.h index 9e5b591ccbe..772f1992f4d 100644 --- a/src/Parsers/ParserTablesInSelectQuery.h +++ b/src/Parsers/ParserTablesInSelectQuery.h @@ -21,7 +21,7 @@ protected: class ParserTablesInSelectQueryElement : public IParserBase { public: - ParserTablesInSelectQueryElement(bool is_first_) : is_first(is_first_) {} + explicit ParserTablesInSelectQueryElement(bool is_first_) : is_first(is_first_) {} protected: const char * getName() const override { return "table, table function, subquery or list of joined tables"; } diff --git a/src/Processors/Chunk.h b/src/Processors/Chunk.h index e70ba57a267..1c9240ba114 100644 --- a/src/Processors/Chunk.h +++ b/src/Processors/Chunk.h @@ -90,7 +90,7 @@ public: bool hasRows() const { return num_rows > 0; } bool hasColumns() const { return !columns.empty(); } bool empty() const { return !hasRows() && !hasColumns(); } - operator bool() const { return !empty(); } + operator bool() const { return !empty(); } /// NOLINT void addColumn(ColumnPtr column); void addColumn(size_t position, ColumnPtr column); diff --git a/src/Processors/Formats/ISchemaReader.h b/src/Processors/Formats/ISchemaReader.h index 67a8eb88d61..2d35809e26a 100644 --- a/src/Processors/Formats/ISchemaReader.h +++ b/src/Processors/Formats/ISchemaReader.h @@ -14,7 +14,7 @@ namespace DB class ISchemaReader { public: - ISchemaReader(ReadBuffer & in_) : in(in_) {} + explicit ISchemaReader(ReadBuffer & in_) : in(in_) {} virtual NamesAndTypesList readSchema() = 0; diff --git a/src/Processors/Formats/Impl/ArrowBufferedStreams.h b/src/Processors/Formats/Impl/ArrowBufferedStreams.h index d649c52557f..e06eab04f1b 100644 --- a/src/Processors/Formats/Impl/ArrowBufferedStreams.h +++ b/src/Processors/Formats/Impl/ArrowBufferedStreams.h @@ -44,7 +44,7 @@ class RandomAccessFileFromSeekableReadBuffer : public arrow::io::RandomAccessFil public: RandomAccessFileFromSeekableReadBuffer(SeekableReadBuffer & in_, off_t file_size_); - RandomAccessFileFromSeekableReadBuffer(SeekableReadBufferWithSize & in_); + explicit RandomAccessFileFromSeekableReadBuffer(SeekableReadBufferWithSize & in_); arrow::Result GetSize() override; diff --git a/src/Processors/Formats/Impl/ArrowColumnToCHColumn.cpp b/src/Processors/Formats/Impl/ArrowColumnToCHColumn.cpp index ecaa485c3d6..5c367bb69f0 100644 --- a/src/Processors/Formats/Impl/ArrowColumnToCHColumn.cpp +++ b/src/Processors/Formats/Impl/ArrowColumnToCHColumn.cpp @@ -32,7 +32,6 @@ #include #include - /// UINT16 and UINT32 are processed separately, see comments in readColumnFromArrowColumn. #define FOR_ARROW_NUMERIC_TYPES(M) \ M(arrow::Type::UINT8, DB::UInt8) \ @@ -66,9 +65,9 @@ namespace ErrorCodes extern const int DUPLICATE_COLUMN; extern const int THERE_IS_NO_COLUMN; extern const int UNKNOWN_EXCEPTION; + extern const int INCORRECT_NUMBER_OF_COLUMNS; } - /// Inserts numeric data right into internal column data to reduce an overhead template > static ColumnWithTypeAndName readColumnWithNumericData(std::shared_ptr & arrow_column, const String & column_name) @@ -532,6 +531,9 @@ void ArrowColumnToCHColumn::arrowTableToCHChunk(Chunk & res, std::shared_ptrsecond->length(); columns_list.reserve(header.rows()); diff --git a/src/Processors/Formats/Impl/AvroRowInputFormat.h b/src/Processors/Formats/Impl/AvroRowInputFormat.h index 1e8ee4aebb9..7a598de1f6a 100644 --- a/src/Processors/Formats/Impl/AvroRowInputFormat.h +++ b/src/Processors/Formats/Impl/AvroRowInputFormat.h @@ -61,7 +61,7 @@ private: , target_column_idx(target_column_idx_) , deserialize_fn(deserialize_fn_) {} - Action(SkipFn skip_fn_) + explicit Action(SkipFn skip_fn_) : type(Skip) , skip_fn(skip_fn_) {} diff --git a/src/Processors/Formats/Impl/CapnProtoRowOutputFormat.h b/src/Processors/Formats/Impl/CapnProtoRowOutputFormat.h index 288b36508ce..12dc5eda2b3 100644 --- a/src/Processors/Formats/Impl/CapnProtoRowOutputFormat.h +++ b/src/Processors/Formats/Impl/CapnProtoRowOutputFormat.h @@ -15,7 +15,7 @@ namespace DB class CapnProtoOutputStream : public kj::OutputStream { public: - CapnProtoOutputStream(WriteBuffer & out_); + explicit CapnProtoOutputStream(WriteBuffer & out_); void write(const void * buffer, size_t size) override; diff --git a/src/Processors/Formats/Impl/ConstantExpressionTemplate.h b/src/Processors/Formats/Impl/ConstantExpressionTemplate.h index 6659243df63..c5d4f033258 100644 --- a/src/Processors/Formats/Impl/ConstantExpressionTemplate.h +++ b/src/Processors/Formats/Impl/ConstantExpressionTemplate.h @@ -3,6 +3,8 @@ #include #include #include +#include +#include namespace DB { diff --git a/src/Processors/Formats/Impl/JSONCompactEachRowRowOutputFormat.cpp b/src/Processors/Formats/Impl/JSONCompactEachRowRowOutputFormat.cpp index c4645e0d63d..4a2c4209acf 100644 --- a/src/Processors/Formats/Impl/JSONCompactEachRowRowOutputFormat.cpp +++ b/src/Processors/Formats/Impl/JSONCompactEachRowRowOutputFormat.cpp @@ -55,9 +55,9 @@ void JSONCompactEachRowRowOutputFormat::writeRowEndDelimiter() void JSONCompactEachRowRowOutputFormat::writeTotals(const Columns & columns, size_t row_num) { writeChar('\n', out); - size_t num_columns = columns.size(); + size_t columns_size = columns.size(); writeRowStartDelimiter(); - for (size_t i = 0; i < num_columns; ++i) + for (size_t i = 0; i < columns_size; ++i) { if (i != 0) writeFieldDelimiter(); diff --git a/src/Processors/Formats/Impl/JSONRowOutputFormat.cpp b/src/Processors/Formats/Impl/JSONRowOutputFormat.cpp index 8130b2b4cb1..61ac25ca441 100644 --- a/src/Processors/Formats/Impl/JSONRowOutputFormat.cpp +++ b/src/Processors/Formats/Impl/JSONRowOutputFormat.cpp @@ -154,9 +154,9 @@ void JSONRowOutputFormat::writeBeforeTotals() void JSONRowOutputFormat::writeTotals(const Columns & columns, size_t row_num) { - size_t num_columns = columns.size(); + size_t columns_size = columns.size(); - for (size_t i = 0; i < num_columns; ++i) + for (size_t i = 0; i < columns_size; ++i) { if (i != 0) writeTotalsFieldDelimiter(); diff --git a/src/Processors/Formats/Impl/MsgPackRowInputFormat.cpp b/src/Processors/Formats/Impl/MsgPackRowInputFormat.cpp index 56fc5d7857b..607e6f36767 100644 --- a/src/Processors/Formats/Impl/MsgPackRowInputFormat.cpp +++ b/src/Processors/Formats/Impl/MsgPackRowInputFormat.cpp @@ -353,7 +353,7 @@ bool MsgPackVisitor::visit_nil() bool MsgPackVisitor::visit_ext(const char * value, uint32_t size) { int8_t type = *value; - if (*value == int8_t(MsgPackExtensionTypes::UUID)) + if (*value == int8_t(MsgPackExtensionTypes::UUIDType)) { insertUUID(info_stack.top().column, info_stack.top().type, value + 1, size - 1); return true; @@ -496,11 +496,12 @@ DataTypePtr MsgPackSchemaReader::getDataType(const msgpack::object & object) case msgpack::type::object_type::EXT: { msgpack::object_ext object_ext = object.via.ext; - if (object_ext.type() == int8_t(MsgPackExtensionTypes::UUID)) + if (object_ext.type() == int8_t(MsgPackExtensionTypes::UUIDType)) return std::make_shared(); throw Exception(ErrorCodes::BAD_ARGUMENTS, "Msgpack extension type {%x} is not supported", object_ext.type()); } } + __builtin_unreachable(); } DataTypes MsgPackSchemaReader::readRowAndGetDataTypes() diff --git a/src/Processors/Formats/Impl/MsgPackRowOutputFormat.cpp b/src/Processors/Formats/Impl/MsgPackRowOutputFormat.cpp index edec9774b5f..e53aafb4e56 100644 --- a/src/Processors/Formats/Impl/MsgPackRowOutputFormat.cpp +++ b/src/Processors/Formats/Impl/MsgPackRowOutputFormat.cpp @@ -199,7 +199,7 @@ void MsgPackRowOutputFormat::serializeField(const IColumn & column, DataTypePtr writeBinaryBigEndian(value.toUnderType().items[0], buf); writeBinaryBigEndian(value.toUnderType().items[1], buf); StringRef uuid_ext = buf.stringRef(); - packer.pack_ext(sizeof(UUID), int8_t(MsgPackExtensionTypes::UUID)); + packer.pack_ext(sizeof(UUID), int8_t(MsgPackExtensionTypes::UUIDType)); packer.pack_ext_body(uuid_ext.data, uuid_ext.size); return; } @@ -213,8 +213,8 @@ void MsgPackRowOutputFormat::serializeField(const IColumn & column, DataTypePtr void MsgPackRowOutputFormat::write(const Columns & columns, size_t row_num) { - size_t num_columns = columns.size(); - for (size_t i = 0; i < num_columns; ++i) + size_t columns_size = columns.size(); + for (size_t i = 0; i < columns_size; ++i) { serializeField(*columns[i], types[i], row_num); } diff --git a/src/Processors/Formats/Impl/ORCBlockInputFormat.cpp b/src/Processors/Formats/Impl/ORCBlockInputFormat.cpp index 4950e1fb952..aa9f7874ae8 100644 --- a/src/Processors/Formats/Impl/ORCBlockInputFormat.cpp +++ b/src/Processors/Formats/Impl/ORCBlockInputFormat.cpp @@ -1,4 +1,5 @@ #include "ORCBlockInputFormat.h" +#include #if USE_ORC #include @@ -52,6 +53,9 @@ Chunk ORCBlockInputFormat::generate() if (!table || !table->num_rows()) return res; + if (format_settings.use_lowercase_column_name) + table = *table->RenameColumns(include_column_names); + arrow_column_to_ch_column->arrowTableToCHChunk(res, table); /// If defaults_for_omitted_fields is true, calculate the default values from default expression for omitted fields. /// Otherwise fill the missing columns with zero values of its type. @@ -69,6 +73,7 @@ void ORCBlockInputFormat::resetParser() file_reader.reset(); include_indices.clear(); + include_column_names.clear(); block_missing_values.clear(); } @@ -120,6 +125,20 @@ static void getFileReaderAndSchema( if (!read_schema_result.ok()) throw Exception(read_schema_result.status().ToString(), ErrorCodes::BAD_ARGUMENTS); schema = std::move(read_schema_result).ValueOrDie(); + + if (format_settings.use_lowercase_column_name) + { + std::vector> fields; + fields.reserve(schema->num_fields()); + for (int i = 0; i < schema->num_fields(); ++i) + { + const auto& field = schema->field(i); + auto name = field->name(); + boost::to_lower(name); + fields.push_back(field->WithName(name)); + } + schema = arrow::schema(fields, schema->metadata()); + } } void ORCBlockInputFormat::prepareReader() @@ -148,9 +167,11 @@ void ORCBlockInputFormat::prepareReader() const auto & name = schema->field(i)->name(); if (getPort().getHeader().has(name) || nested_table_names.contains(name)) { - column_names.push_back(name); for (int j = 0; j != indexes_count; ++j) + { include_indices.push_back(index + j); + include_column_names.push_back(name); + } } index += indexes_count; } diff --git a/src/Processors/Formats/Impl/ORCBlockInputFormat.h b/src/Processors/Formats/Impl/ORCBlockInputFormat.h index bb136d02d6e..bd2151d78ff 100644 --- a/src/Processors/Formats/Impl/ORCBlockInputFormat.h +++ b/src/Processors/Formats/Impl/ORCBlockInputFormat.h @@ -45,10 +45,9 @@ private: std::unique_ptr arrow_column_to_ch_column; - std::vector column_names; - // indices of columns to read from ORC file std::vector include_indices; + std::vector include_column_names; std::vector missing_columns; BlockMissingValues block_missing_values; diff --git a/src/Processors/Formats/Impl/ORCBlockOutputFormat.h b/src/Processors/Formats/Impl/ORCBlockOutputFormat.h index 2ffee597e8f..f69fd1c0aab 100644 --- a/src/Processors/Formats/Impl/ORCBlockOutputFormat.h +++ b/src/Processors/Formats/Impl/ORCBlockOutputFormat.h @@ -17,7 +17,7 @@ class WriteBuffer; class ORCOutputStream : public orc::OutputStream { public: - ORCOutputStream(WriteBuffer & out_); + explicit ORCOutputStream(WriteBuffer & out_); uint64_t getLength() const override; uint64_t getNaturalWriteSize() const override; diff --git a/src/Processors/Formats/Impl/ParquetBlockInputFormat.cpp b/src/Processors/Formats/Impl/ParquetBlockInputFormat.cpp index 3f0d9980573..548bf0138f5 100644 --- a/src/Processors/Formats/Impl/ParquetBlockInputFormat.cpp +++ b/src/Processors/Formats/Impl/ParquetBlockInputFormat.cpp @@ -1,4 +1,6 @@ #include "ParquetBlockInputFormat.h" +#include + #if USE_PARQUET #include @@ -13,9 +15,6 @@ #include "ArrowColumnToCHColumn.h" #include -#include - - namespace DB { @@ -57,6 +56,9 @@ Chunk ParquetBlockInputFormat::generate() throw ParsingException{"Error while reading Parquet data: " + read_status.ToString(), ErrorCodes::CANNOT_READ_ALL_DATA}; + if (format_settings.use_lowercase_column_name) + table = *table->RenameColumns(column_names); + ++row_group_current; arrow_column_to_ch_column->arrowTableToCHChunk(res, table); @@ -76,6 +78,7 @@ void ParquetBlockInputFormat::resetParser() file_reader.reset(); column_indices.clear(); + column_names.clear(); row_group_current = 0; block_missing_values.clear(); } @@ -120,6 +123,20 @@ static void getFileReaderAndSchema( return; THROW_ARROW_NOT_OK(parquet::arrow::OpenFile(std::move(arrow_file), arrow::default_memory_pool(), &file_reader)); THROW_ARROW_NOT_OK(file_reader->GetSchema(&schema)); + + if (format_settings.use_lowercase_column_name) + { + std::vector> fields; + fields.reserve(schema->num_fields()); + for (int i = 0; i < schema->num_fields(); ++i) + { + const auto& field = schema->field(i); + auto name = field->name(); + boost::to_lower(name); + fields.push_back(field->WithName(name)); + } + schema = arrow::schema(fields, schema->metadata()); + } } void ParquetBlockInputFormat::prepareReader() @@ -150,7 +167,10 @@ void ParquetBlockInputFormat::prepareReader() if (getPort().getHeader().has(name) || nested_table_names.contains(name)) { for (int j = 0; j != indexes_count; ++j) + { column_indices.push_back(index + j); + column_names.push_back(name); + } } index += indexes_count; } diff --git a/src/Processors/Formats/Impl/ParquetBlockInputFormat.h b/src/Processors/Formats/Impl/ParquetBlockInputFormat.h index 1faadaa3d21..eba9aac29f2 100644 --- a/src/Processors/Formats/Impl/ParquetBlockInputFormat.h +++ b/src/Processors/Formats/Impl/ParquetBlockInputFormat.h @@ -40,6 +40,7 @@ private: int row_group_total = 0; // indices of columns to read from Parquet file std::vector column_indices; + std::vector column_names; std::unique_ptr arrow_column_to_ch_column; int row_group_current = 0; std::vector missing_columns; diff --git a/src/Processors/Formats/Impl/RegexpRowInputFormat.h b/src/Processors/Formats/Impl/RegexpRowInputFormat.h index 75c630d0607..04f24bbb3e4 100644 --- a/src/Processors/Formats/Impl/RegexpRowInputFormat.h +++ b/src/Processors/Formats/Impl/RegexpRowInputFormat.h @@ -22,7 +22,7 @@ class ReadBuffer; class RegexpFieldExtractor { public: - RegexpFieldExtractor(const FormatSettings & format_settings); + explicit RegexpFieldExtractor(const FormatSettings & format_settings); /// Return true if row was successfully parsed and row fields were extracted. bool parseRow(PeekableReadBuffer & buf); diff --git a/src/Processors/Formats/Impl/TabSeparatedRowInputFormat.h b/src/Processors/Formats/Impl/TabSeparatedRowInputFormat.h index ed67a8256bc..abab5b02c96 100644 --- a/src/Processors/Formats/Impl/TabSeparatedRowInputFormat.h +++ b/src/Processors/Formats/Impl/TabSeparatedRowInputFormat.h @@ -53,7 +53,7 @@ public: bool parseFieldDelimiterWithDiagnosticInfo(WriteBuffer & out) override; bool parseRowEndWithDiagnosticInfo(WriteBuffer & out) override; - FormatSettings::EscapingRule getEscapingRule() + FormatSettings::EscapingRule getEscapingRule() const { return is_raw ? FormatSettings::EscapingRule::Raw : FormatSettings::EscapingRule::Escaped; } diff --git a/src/Processors/Formats/Impl/VerticalRowOutputFormat.cpp b/src/Processors/Formats/Impl/VerticalRowOutputFormat.cpp index 0905e4243cd..468770e2515 100644 --- a/src/Processors/Formats/Impl/VerticalRowOutputFormat.cpp +++ b/src/Processors/Formats/Impl/VerticalRowOutputFormat.cpp @@ -141,7 +141,7 @@ void VerticalRowOutputFormat::writeSpecialRow(const Columns & columns, size_t ro row_number = 0; field_number = 0; - size_t num_columns = columns.size(); + size_t columns_size = columns.size(); writeCString(title, out); writeCString(":\n", out); @@ -151,7 +151,7 @@ void VerticalRowOutputFormat::writeSpecialRow(const Columns & columns, size_t ro writeCString("─", out); writeChar('\n', out); - for (size_t i = 0; i < num_columns; ++i) + for (size_t i = 0; i < columns_size; ++i) writeField(*columns[i], *serializations[i], row_num); } diff --git a/src/Processors/Merges/Algorithms/FixedSizeDequeWithGaps.h b/src/Processors/Merges/Algorithms/FixedSizeDequeWithGaps.h index 35cfded4214..ff8f113d9a6 100644 --- a/src/Processors/Merges/Algorithms/FixedSizeDequeWithGaps.h +++ b/src/Processors/Merges/Algorithms/FixedSizeDequeWithGaps.h @@ -1,5 +1,7 @@ #pragma once +#include + namespace DB { diff --git a/src/Processors/Merges/Algorithms/Graphite.cpp b/src/Processors/Merges/Algorithms/Graphite.cpp index 2c6d08ed287..c0f595fa539 100644 --- a/src/Processors/Merges/Algorithms/Graphite.cpp +++ b/src/Processors/Merges/Algorithms/Graphite.cpp @@ -89,7 +89,7 @@ inline static const Patterns & selectPatternsForMetricType(const Graphite::Param Graphite::RollupRule selectPatternForPath( const Graphite::Params & params, - const StringRef path) + StringRef path) { const Graphite::Pattern * first_match = &undef_pattern; diff --git a/src/Processors/Merges/Algorithms/Graphite.h b/src/Processors/Merges/Algorithms/Graphite.h index dc39cb46386..05306ebe30f 100644 --- a/src/Processors/Merges/Algorithms/Graphite.h +++ b/src/Processors/Merges/Algorithms/Graphite.h @@ -147,7 +147,7 @@ struct Params using RollupRule = std::pair; -Graphite::RollupRule selectPatternForPath(const Graphite::Params & params, const StringRef path); +Graphite::RollupRule selectPatternForPath(const Graphite::Params & params, StringRef path); void setGraphitePatternsFromConfig(ContextPtr context, const String & config_element, Graphite::Params & params); diff --git a/src/Processors/Merges/Algorithms/MergedData.h b/src/Processors/Merges/Algorithms/MergedData.h index 9bf33d72f31..89da346980d 100644 --- a/src/Processors/Merges/Algorithms/MergedData.h +++ b/src/Processors/Merges/Algorithms/MergedData.h @@ -1,5 +1,11 @@ #pragma once +#include +#include +#include +#include + + namespace DB { diff --git a/src/Processors/Port.h b/src/Processors/Port.h index 9f27b440be5..7cb25f3930e 100644 --- a/src/Processors/Port.h +++ b/src/Processors/Port.h @@ -214,7 +214,7 @@ protected: public: using Data = State::Data; - Port(Block header_) : header(std::move(header_)) {} + Port(Block header_) : header(std::move(header_)) {} /// NOLINT Port(Block header_, IProcessor * processor_) : header(std::move(header_)), processor(processor_) {} void setUpdateInfo(UpdateInfo * info) { update_info = info; } @@ -303,12 +303,12 @@ public: Chunk ALWAYS_INLINE pull(bool set_not_needed = false) { - auto data_ = pullData(set_not_needed); + auto pull_data = pullData(set_not_needed); - if (data_.exception) - std::rethrow_exception(data_.exception); + if (pull_data.exception) + std::rethrow_exception(pull_data.exception); - return std::move(data_.chunk); + return std::move(pull_data.chunk); } bool ALWAYS_INLINE isFinished() const @@ -396,7 +396,7 @@ public: void ALWAYS_INLINE pushException(std::exception_ptr exception) { - pushData({.chunk = {}, .exception = std::move(exception)}); + pushData({.chunk = {}, .exception = exception}); } void ALWAYS_INLINE pushData(Data data_) diff --git a/src/Processors/QueueBuffer.h b/src/Processors/QueueBuffer.h index 826f4a22b8b..6856e214823 100644 --- a/src/Processors/QueueBuffer.h +++ b/src/Processors/QueueBuffer.h @@ -17,7 +17,7 @@ private: public: String getName() const override { return "QueueBuffer"; } - QueueBuffer(Block header) + explicit QueueBuffer(Block header) : IAccumulatingTransform(header, header) { } diff --git a/src/Processors/Sources/MySQLSource.cpp b/src/Processors/Sources/MySQLSource.cpp index 538aba9d1f3..a9b408064d9 100644 --- a/src/Processors/Sources/MySQLSource.cpp +++ b/src/Processors/Sources/MySQLSource.cpp @@ -225,6 +225,10 @@ namespace assert_cast(column).insertValue(UInt16(value.getDate().getDayNum())); read_bytes_size += 2; break; + case ValueType::vtDate32: + assert_cast(column).insertValue(Int32(value.getDate().getExtenedDayNum())); + read_bytes_size += 4; + break; case ValueType::vtDateTime: { ReadBufferFromString in(value); diff --git a/src/Processors/Transforms/AggregatingInOrderTransform.h b/src/Processors/Transforms/AggregatingInOrderTransform.h index 929ab98d6e6..e4c217a8f81 100644 --- a/src/Processors/Transforms/AggregatingInOrderTransform.h +++ b/src/Processors/Transforms/AggregatingInOrderTransform.h @@ -11,7 +11,7 @@ namespace DB struct ChunkInfoWithAllocatedBytes : public ChunkInfo { - ChunkInfoWithAllocatedBytes(Int64 allocated_bytes_) + explicit ChunkInfoWithAllocatedBytes(Int64 allocated_bytes_) : allocated_bytes(allocated_bytes_) {} Int64 allocated_bytes; }; diff --git a/src/Processors/Transforms/AggregatingTransform.h b/src/Processors/Transforms/AggregatingTransform.h index 01df264005b..d7917fc95a7 100644 --- a/src/Processors/Transforms/AggregatingTransform.h +++ b/src/Processors/Transforms/AggregatingTransform.h @@ -12,7 +12,7 @@ class AggregatedArenasChunkInfo : public ChunkInfo { public: Arenas arenas; - AggregatedArenasChunkInfo(Arenas arenas_) + explicit AggregatedArenasChunkInfo(Arenas arenas_) : arenas(std::move(arenas_)) {} }; diff --git a/src/Processors/Transforms/ColumnGathererTransform.h b/src/Processors/Transforms/ColumnGathererTransform.h index 2d013e596ce..da6dc877abf 100644 --- a/src/Processors/Transforms/ColumnGathererTransform.h +++ b/src/Processors/Transforms/ColumnGathererTransform.h @@ -20,7 +20,7 @@ struct RowSourcePart RowSourcePart() = default; - RowSourcePart(size_t source_num, bool skip_flag = false) + explicit RowSourcePart(size_t source_num, bool skip_flag = false) { static_assert(sizeof(*this) == 1, "Size of RowSourcePart is too big due to compiler settings"); setSourceNum(source_num); diff --git a/src/Processors/Transforms/MergingAggregatedMemoryEfficientTransform.cpp b/src/Processors/Transforms/MergingAggregatedMemoryEfficientTransform.cpp index d01a809e666..34e5231c626 100644 --- a/src/Processors/Transforms/MergingAggregatedMemoryEfficientTransform.cpp +++ b/src/Processors/Transforms/MergingAggregatedMemoryEfficientTransform.cpp @@ -266,7 +266,7 @@ void GroupingAggregatedTransform::addChunk(Chunk chunk, size_t input) last_bucket_number[input] = bucket; } } - else if (const auto * in_order_info = typeid_cast(info.get())) + else if (typeid_cast(info.get())) { single_level_chunks.emplace_back(std::move(chunk)); } @@ -334,7 +334,7 @@ void MergingAggregatedBucketTransform::transform(Chunk & chunk) blocks_list.emplace_back(std::move(block)); } - else if (const auto * in_order_info = typeid_cast(cur_info.get())) + else if (typeid_cast(cur_info.get())) { Block block = header.cloneWithColumns(cur_chunk.detachColumns()); block.info.is_overflows = false; diff --git a/src/Processors/Transforms/MergingAggregatedTransform.cpp b/src/Processors/Transforms/MergingAggregatedTransform.cpp index dd2b315d53c..11d32278caf 100644 --- a/src/Processors/Transforms/MergingAggregatedTransform.cpp +++ b/src/Processors/Transforms/MergingAggregatedTransform.cpp @@ -49,7 +49,7 @@ void MergingAggregatedTransform::consume(Chunk chunk) bucket_to_blocks[agg_info->bucket_num].emplace_back(std::move(block)); } - else if (const auto * in_order_info = typeid_cast(info.get())) + else if (typeid_cast(info.get())) { auto block = getInputPort().getHeader().cloneWithColumns(chunk.getColumns()); block.info.is_overflows = false; diff --git a/src/Processors/Transforms/PostgreSQLSource.cpp b/src/Processors/Transforms/PostgreSQLSource.cpp index 88f092a2533..a31cd879257 100644 --- a/src/Processors/Transforms/PostgreSQLSource.cpp +++ b/src/Processors/Transforms/PostgreSQLSource.cpp @@ -28,7 +28,7 @@ PostgreSQLSource::PostgreSQLSource( postgres::ConnectionHolderPtr connection_holder_, const std::string & query_str_, const Block & sample_block, - const UInt64 max_block_size_) + UInt64 max_block_size_) : SourceWithProgress(sample_block.cloneEmpty()) , query_str(query_str_) , max_block_size(max_block_size_) @@ -43,7 +43,7 @@ PostgreSQLSource::PostgreSQLSource( std::shared_ptr tx_, const std::string & query_str_, const Block & sample_block, - const UInt64 max_block_size_, + UInt64 max_block_size_, bool auto_commit_) : SourceWithProgress(sample_block.cloneEmpty()) , query_str(query_str_) diff --git a/src/Processors/Transforms/PostgreSQLSource.h b/src/Processors/Transforms/PostgreSQLSource.h index c7e55c09c32..bd6203042bb 100644 --- a/src/Processors/Transforms/PostgreSQLSource.h +++ b/src/Processors/Transforms/PostgreSQLSource.h @@ -24,7 +24,7 @@ public: postgres::ConnectionHolderPtr connection_holder_, const String & query_str_, const Block & sample_block, - const UInt64 max_block_size_); + UInt64 max_block_size_); String getName() const override { return "PostgreSQL"; } @@ -33,7 +33,7 @@ protected: std::shared_ptr tx_, const std::string & query_str_, const Block & sample_block, - const UInt64 max_block_size_, + UInt64 max_block_size_, bool auto_commit_); String query_str; diff --git a/src/Processors/Transforms/TotalsHavingTransform.cpp b/src/Processors/Transforms/TotalsHavingTransform.cpp index 0b7797da24f..45e972afa3f 100644 --- a/src/Processors/Transforms/TotalsHavingTransform.cpp +++ b/src/Processors/Transforms/TotalsHavingTransform.cpp @@ -138,7 +138,7 @@ IProcessor::Status TotalsHavingTransform::prepare() if (!totals_output.canPush()) return Status::PortFull; - if (!totals) + if (!total_prepared) return Status::Ready; totals_output.push(std::move(totals)); @@ -312,6 +312,8 @@ void TotalsHavingTransform::prepareTotals() /// Note: after expression totals may have several rows if `arrayJoin` was used in expression. totals = Chunk(block.getColumns(), num_rows); } + + total_prepared = true; } } diff --git a/src/Processors/Transforms/TotalsHavingTransform.h b/src/Processors/Transforms/TotalsHavingTransform.h index 03635054c65..6b4afb2fa8b 100644 --- a/src/Processors/Transforms/TotalsHavingTransform.h +++ b/src/Processors/Transforms/TotalsHavingTransform.h @@ -46,6 +46,7 @@ protected: void transform(Chunk & chunk) override; bool finished_transform = false; + bool total_prepared = false; Chunk totals; private: diff --git a/src/Processors/Transforms/WindowTransform.cpp b/src/Processors/Transforms/WindowTransform.cpp index df34c592819..3c96e12e869 100644 --- a/src/Processors/Transforms/WindowTransform.cpp +++ b/src/Processors/Transforms/WindowTransform.cpp @@ -7,6 +7,7 @@ #include #include #include +#include #include #include #include @@ -22,6 +23,7 @@ namespace ErrorCodes { extern const int BAD_ARGUMENTS; extern const int NOT_IMPLEMENTED; + extern const int ILLEGAL_COLUMN; } // Interface for true window functions. It's not much of an interface, they just @@ -986,7 +988,23 @@ void WindowTransform::writeOutCurrentRow() auto * buf = ws.aggregate_function_state.data(); // FIXME does it also allocate the result on the arena? // We'll have to pass it out with blocks then... - a->insertResultInto(buf, *result_column, arena.get()); + + if (a->isState()) + { + /// AggregateFunction's states should be inserted into column using specific way + auto * res_col_aggregate_function = typeid_cast(result_column); + if (!res_col_aggregate_function) + { + throw Exception("State function " + a->getName() + " inserts results into non-state column ", + ErrorCodes::ILLEGAL_COLUMN); + } + res_col_aggregate_function->insertFrom(buf); + } + else + { + a->insertResultInto(buf, *result_column, arena.get()); + } + } } diff --git a/src/Processors/Transforms/WindowTransform.h b/src/Processors/Transforms/WindowTransform.h index 077979e83b9..d536c8780d2 100644 --- a/src/Processors/Transforms/WindowTransform.h +++ b/src/Processors/Transforms/WindowTransform.h @@ -245,7 +245,6 @@ public: return RowNumber{first_block_number, 0}; } -public: /* * Data (formerly) inherited from ISimpleTransform, needed for the * implementation of the IProcessor interface. @@ -349,10 +348,10 @@ public: template <> struct fmt::formatter { - constexpr auto parse(format_parse_context & ctx) + static constexpr auto parse(format_parse_context & ctx) { - auto it = ctx.begin(); - auto end = ctx.end(); + const auto * it = ctx.begin(); + const auto * end = ctx.end(); /// Only support {}. if (it != end && *it != '}') diff --git a/src/QueryPipeline/SizeLimits.h b/src/QueryPipeline/SizeLimits.h index ce7e1795475..fc052714b0c 100644 --- a/src/QueryPipeline/SizeLimits.h +++ b/src/QueryPipeline/SizeLimits.h @@ -26,7 +26,7 @@ struct SizeLimits UInt64 max_bytes = 0; OverflowMode overflow_mode = OverflowMode::THROW; - SizeLimits() {} + SizeLimits() = default; SizeLimits(UInt64 max_rows_, UInt64 max_bytes_, OverflowMode overflow_mode_) : max_rows(max_rows_), max_bytes(max_bytes_), overflow_mode(overflow_mode_) {} diff --git a/src/Server/HTTPHandlerRequestFilter.h b/src/Server/HTTPHandlerRequestFilter.h index 078dcb04595..3236b35d5ae 100644 --- a/src/Server/HTTPHandlerRequestFilter.h +++ b/src/Server/HTTPHandlerRequestFilter.h @@ -40,7 +40,7 @@ static inline bool checkExpression(const StringRef & match_str, const std::pair< return match_str == expression.first; } -static inline auto methodsFilter(Poco::Util::AbstractConfiguration & config, const std::string & config_path) +static inline auto methodsFilter(Poco::Util::AbstractConfiguration & config, const std::string & config_path) /// NOLINT { std::vector methods; Poco::StringTokenizer tokenizer(config.getString(config_path), ","); @@ -64,7 +64,7 @@ static inline auto getExpression(const std::string & expression) return std::make_pair(expression, compiled_regex); } -static inline auto urlFilter(Poco::Util::AbstractConfiguration & config, const std::string & config_path) +static inline auto urlFilter(Poco::Util::AbstractConfiguration & config, const std::string & config_path) /// NOLINT { return [expression = getExpression(config.getString(config_path))](const HTTPServerRequest & request) { @@ -75,7 +75,7 @@ static inline auto urlFilter(Poco::Util::AbstractConfiguration & config, const s }; } -static inline auto headersFilter(Poco::Util::AbstractConfiguration & config, const std::string & prefix) +static inline auto headersFilter(Poco::Util::AbstractConfiguration & config, const std::string & prefix) /// NOLINT { std::unordered_map> headers_expression; Poco::Util::AbstractConfiguration::Keys headers_name; diff --git a/src/Server/KeeperTCPHandler.cpp b/src/Server/KeeperTCPHandler.cpp index 3e354cfd18f..655d17e61fa 100644 --- a/src/Server/KeeperTCPHandler.cpp +++ b/src/Server/KeeperTCPHandler.cpp @@ -202,25 +202,30 @@ struct SocketInterruptablePollWrapper #endif }; -KeeperTCPHandler::KeeperTCPHandler(IServer & server_, const Poco::Net::StreamSocket & socket_) +KeeperTCPHandler::KeeperTCPHandler( + const Poco::Util::AbstractConfiguration & config_ref, + std::shared_ptr keeper_dispatcher_, + Poco::Timespan receive_timeout_, + Poco::Timespan send_timeout_, + const Poco::Net::StreamSocket & socket_) : Poco::Net::TCPServerConnection(socket_) - , server(server_) , log(&Poco::Logger::get("KeeperTCPHandler")) - , global_context(Context::createCopy(server.context())) - , keeper_dispatcher(global_context->getKeeperDispatcher()) + , keeper_dispatcher(keeper_dispatcher_) , operation_timeout( 0, - global_context->getConfigRef().getUInt( + config_ref.getUInt( "keeper_server.coordination_settings.operation_timeout_ms", Coordination::DEFAULT_OPERATION_TIMEOUT_MS) * 1000) , min_session_timeout( 0, - global_context->getConfigRef().getUInt( + config_ref.getUInt( "keeper_server.coordination_settings.min_session_timeout_ms", Coordination::DEFAULT_MIN_SESSION_TIMEOUT_MS) * 1000) , max_session_timeout( 0, - global_context->getConfigRef().getUInt( + config_ref.getUInt( "keeper_server.coordination_settings.session_timeout_ms", Coordination::DEFAULT_MAX_SESSION_TIMEOUT_MS) * 1000) , poll_wrapper(std::make_unique(socket_)) + , send_timeout(send_timeout_) + , receive_timeout(receive_timeout_) , responses(std::make_unique(std::numeric_limits::max())) , last_op(std::make_unique(EMPTY_LAST_OP)) { @@ -289,11 +294,9 @@ void KeeperTCPHandler::runImpl() { setThreadName("KeeperHandler"); ThreadStatus thread_status; - auto global_receive_timeout = global_context->getSettingsRef().receive_timeout; - auto global_send_timeout = global_context->getSettingsRef().send_timeout; - socket().setReceiveTimeout(global_receive_timeout); - socket().setSendTimeout(global_send_timeout); + socket().setReceiveTimeout(receive_timeout); + socket().setSendTimeout(send_timeout); socket().setNoDelay(true); in = std::make_shared(socket()); diff --git a/src/Server/KeeperTCPHandler.h b/src/Server/KeeperTCPHandler.h index b8cccafeca5..9895c335c96 100644 --- a/src/Server/KeeperTCPHandler.h +++ b/src/Server/KeeperTCPHandler.h @@ -48,7 +48,12 @@ private: static std::unordered_set connections; public: - KeeperTCPHandler(IServer & server_, const Poco::Net::StreamSocket & socket_); + KeeperTCPHandler( + const Poco::Util::AbstractConfiguration & config_ref, + std::shared_ptr keeper_dispatcher_, + Poco::Timespan receive_timeout_, + Poco::Timespan send_timeout_, + const Poco::Net::StreamSocket & socket_); void run() override; KeeperConnectionStats & getConnectionStats(); @@ -58,9 +63,7 @@ public: ~KeeperTCPHandler() override; private: - IServer & server; Poco::Logger * log; - ContextPtr global_context; std::shared_ptr keeper_dispatcher; Poco::Timespan operation_timeout; Poco::Timespan min_session_timeout; @@ -69,6 +72,8 @@ private: int64_t session_id{-1}; Stopwatch session_stopwatch; SocketInterruptablePollWrapperPtr poll_wrapper; + Poco::Timespan send_timeout; + Poco::Timespan receive_timeout; ThreadSafeResponseQueuePtr responses; diff --git a/src/Server/KeeperTCPHandlerFactory.h b/src/Server/KeeperTCPHandlerFactory.h index 58dc73d7c27..76309ffc119 100644 --- a/src/Server/KeeperTCPHandlerFactory.h +++ b/src/Server/KeeperTCPHandlerFactory.h @@ -10,11 +10,17 @@ namespace DB { +using ConfigGetter = std::function; + class KeeperTCPHandlerFactory : public TCPServerConnectionFactory { private: - IServer & server; + ConfigGetter config_getter; + std::shared_ptr keeper_dispatcher; Poco::Logger * log; + Poco::Timespan receive_timeout; + Poco::Timespan send_timeout; + class DummyTCPHandler : public Poco::Net::TCPServerConnection { public: @@ -23,9 +29,17 @@ private: }; public: - KeeperTCPHandlerFactory(IServer & server_, bool secure) - : server(server_) + KeeperTCPHandlerFactory( + ConfigGetter config_getter_, + std::shared_ptr keeper_dispatcher_, + Poco::Timespan receive_timeout_, + Poco::Timespan send_timeout_, + bool secure) + : config_getter(config_getter_) + , keeper_dispatcher(keeper_dispatcher_) , log(&Poco::Logger::get(std::string{"KeeperTCP"} + (secure ? "S" : "") + "HandlerFactory")) + , receive_timeout(receive_timeout_) + , send_timeout(send_timeout_) { } @@ -34,7 +48,7 @@ public: try { LOG_TRACE(log, "Keeper request. Address: {}", socket.peerAddress().toString()); - return new KeeperTCPHandler(server, socket); + return new KeeperTCPHandler(config_getter(), keeper_dispatcher, receive_timeout, send_timeout, socket); } catch (const Poco::Net::NetException &) { diff --git a/src/Server/ProtocolServerAdapter.cpp b/src/Server/ProtocolServerAdapter.cpp index b41ad2376f1..dbc676432f5 100644 --- a/src/Server/ProtocolServerAdapter.cpp +++ b/src/Server/ProtocolServerAdapter.cpp @@ -1,7 +1,7 @@ #include #include -#if USE_GRPC +#if USE_GRPC && !defined(KEEPER_STANDALONE_BUILD) #include #endif @@ -37,7 +37,7 @@ ProtocolServerAdapter::ProtocolServerAdapter( { } -#if USE_GRPC +#if USE_GRPC && !defined(KEEPER_STANDALONE_BUILD) class ProtocolServerAdapter::GRPCServerAdapterImpl : public Impl { public: diff --git a/src/Server/ProtocolServerAdapter.h b/src/Server/ProtocolServerAdapter.h index 9b3b1af0301..90aec7471ee 100644 --- a/src/Server/ProtocolServerAdapter.h +++ b/src/Server/ProtocolServerAdapter.h @@ -21,7 +21,7 @@ public: ProtocolServerAdapter & operator =(ProtocolServerAdapter && src) = default; ProtocolServerAdapter(const std::string & listen_host_, const char * port_name_, const std::string & description_, std::unique_ptr tcp_server_); -#if USE_GRPC +#if USE_GRPC && !defined(KEEPER_STANDALONE_BUILD) ProtocolServerAdapter(const std::string & listen_host_, const char * port_name_, const std::string & description_, std::unique_ptr grpc_server_); #endif @@ -52,7 +52,7 @@ private: class Impl { public: - virtual ~Impl() {} + virtual ~Impl() = default; virtual void start() = 0; virtual void stop() = 0; virtual bool isStopping() const = 0; diff --git a/src/Server/TCPHandler.h b/src/Server/TCPHandler.h index b6ce9fa7507..153b8c35ea4 100644 --- a/src/Server/TCPHandler.h +++ b/src/Server/TCPHandler.h @@ -15,6 +15,8 @@ #include #include #include +#include +#include #include diff --git a/src/Server/TCPHandlerFactory.h b/src/Server/TCPHandlerFactory.h index 03b2592198d..6e27dfc93bd 100644 --- a/src/Server/TCPHandlerFactory.h +++ b/src/Server/TCPHandlerFactory.h @@ -1,6 +1,7 @@ #pragma once #include +#include #include #include #include diff --git a/src/Storages/Cache/ExternalDataSourceCache.h b/src/Storages/Cache/ExternalDataSourceCache.h index a25686b49c1..5ffb2b20fc7 100644 --- a/src/Storages/Cache/ExternalDataSourceCache.h +++ b/src/Storages/Cache/ExternalDataSourceCache.h @@ -14,7 +14,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/src/Storages/Cache/RemoteFileCachePolicy.h b/src/Storages/Cache/RemoteFileCachePolicy.h index 7d742d6ea14..5c212264bd2 100644 --- a/src/Storages/Cache/RemoteFileCachePolicy.h +++ b/src/Storages/Cache/RemoteFileCachePolicy.h @@ -1,6 +1,10 @@ #pragma once + +#include + namespace DB { + struct RemoteFileCacheWeightFunction { size_t operator()(const RemoteCacheController & cache) const { return cache.getFileSize(); } @@ -14,4 +18,5 @@ struct RemoteFileCacheReleaseFunction controller->close(); } }; + } diff --git a/src/Storages/CompressionCodecSelector.h b/src/Storages/CompressionCodecSelector.h index 746b3ce37ee..4c088924cdb 100644 --- a/src/Storages/CompressionCodecSelector.h +++ b/src/Storages/CompressionCodecSelector.h @@ -69,7 +69,7 @@ private: std::vector elements; public: - CompressionCodecSelector() {} /// Always returns the default method. + CompressionCodecSelector() = default; /// Always returns the default method. CompressionCodecSelector(const Poco::Util::AbstractConfiguration & config, const std::string & config_prefix) { @@ -78,7 +78,7 @@ public: for (const auto & name : keys) { - if (!startsWith(name.data(), "case")) + if (!startsWith(name, "case")) throw Exception("Unknown element in config: " + config_prefix + "." + name + ", must be 'case'", ErrorCodes::UNKNOWN_ELEMENT_IN_CONFIG); elements.emplace_back(config, config_prefix + "." + name); diff --git a/src/Storages/HDFS/ReadBufferFromHDFS.cpp b/src/Storages/HDFS/ReadBufferFromHDFS.cpp index 0ad55162fb2..902307fc828 100644 --- a/src/Storages/HDFS/ReadBufferFromHDFS.cpp +++ b/src/Storages/HDFS/ReadBufferFromHDFS.cpp @@ -184,6 +184,11 @@ off_t ReadBufferFromHDFS::getPosition() return impl->getPosition() - available(); } +size_t ReadBufferFromHDFS::getFileOffsetOfBufferEnd() const +{ + return impl->getPosition(); +} + } #endif diff --git a/src/Storages/HDFS/ReadBufferFromHDFS.h b/src/Storages/HDFS/ReadBufferFromHDFS.h index aa20e20fa48..e8cdcb27360 100644 --- a/src/Storages/HDFS/ReadBufferFromHDFS.h +++ b/src/Storages/HDFS/ReadBufferFromHDFS.h @@ -39,6 +39,8 @@ public: std::optional getTotalSize() override; + size_t getFileOffsetOfBufferEnd() const override; + private: std::unique_ptr impl; }; diff --git a/src/Storages/Kafka/StorageKafka.cpp b/src/Storages/Kafka/StorageKafka.cpp index 30acbcdf62b..ae470cdccc9 100644 --- a/src/Storages/Kafka/StorageKafka.cpp +++ b/src/Storages/Kafka/StorageKafka.cpp @@ -405,7 +405,7 @@ ProducerBufferPtr StorageKafka::createWriteBuffer(const Block & header) } -ConsumerBufferPtr StorageKafka::createReadBuffer(const size_t consumer_number) +ConsumerBufferPtr StorageKafka::createReadBuffer(size_t consumer_number) { cppkafka::Configuration conf; diff --git a/src/Storages/Kafka/StorageKafka.h b/src/Storages/Kafka/StorageKafka.h index 62de3e5183d..03e90b1f6c3 100644 --- a/src/Storages/Kafka/StorageKafka.h +++ b/src/Storages/Kafka/StorageKafka.h @@ -120,7 +120,7 @@ private: HandleKafkaErrorMode handle_error_mode; SettingsChanges createSettingsAdjustments(); - ConsumerBufferPtr createReadBuffer(const size_t consumer_number); + ConsumerBufferPtr createReadBuffer(size_t consumer_number); /// If named_collection is specified. String collection_name; diff --git a/src/Storages/LiveView/LiveViewEventsSource.h b/src/Storages/LiveView/LiveViewEventsSource.h index 77ee06c702c..1f9f8bfb785 100644 --- a/src/Storages/LiveView/LiveViewEventsSource.h +++ b/src/Storages/LiveView/LiveViewEventsSource.h @@ -44,7 +44,7 @@ public: : SourceWithProgress({ColumnWithTypeAndName(ColumnUInt64::create(), std::make_shared(), "version")}), storage(std::move(storage_)), blocks_ptr(std::move(blocks_ptr_)), blocks_metadata_ptr(std::move(blocks_metadata_ptr_)), - active_ptr(std::move(active_ptr_)), has_limit(has_limit_), + active_ptr(active_ptr_), has_limit(has_limit_), limit(limit_), heartbeat_interval_usec(heartbeat_interval_sec_ * 1000000) { diff --git a/src/Storages/LiveView/LiveViewSource.h b/src/Storages/LiveView/LiveViewSource.h index ec726359581..8d63890f603 100644 --- a/src/Storages/LiveView/LiveViewSource.h +++ b/src/Storages/LiveView/LiveViewSource.h @@ -26,7 +26,7 @@ public: : SourceWithProgress(storage_->getHeader()) , storage(std::move(storage_)), blocks_ptr(std::move(blocks_ptr_)), blocks_metadata_ptr(std::move(blocks_metadata_ptr_)), - active_ptr(std::move(active_ptr_)), + active_ptr(active_ptr_), has_limit(has_limit_), limit(limit_), heartbeat_interval_usec(heartbeat_interval_sec_ * 1000000) { diff --git a/src/Storages/LiveView/StorageBlocks.h b/src/Storages/LiveView/StorageBlocks.h index b87d3f051d0..01293a1e5d7 100644 --- a/src/Storages/LiveView/StorageBlocks.h +++ b/src/Storages/LiveView/StorageBlocks.h @@ -18,9 +18,9 @@ public: QueryProcessingStage::Enum to_stage_) : IStorage(table_id_), pipes(std::move(pipes_)), to_stage(to_stage_) { - StorageInMemoryMetadata metadata_; - metadata_.setColumns(columns_); - setInMemoryMetadata(metadata_); + StorageInMemoryMetadata storage_metadata; + storage_metadata.setColumns(columns_); + setInMemoryMetadata(storage_metadata); } static StoragePtr createStorage(const StorageID & table_id, const ColumnsDescription & columns, Pipes pipes, QueryProcessingStage::Enum to_stage) diff --git a/src/Storages/LiveView/TemporaryLiveViewCleaner.h b/src/Storages/LiveView/TemporaryLiveViewCleaner.h index 3fe0079a46f..9cc5933eb89 100644 --- a/src/Storages/LiveView/TemporaryLiveViewCleaner.h +++ b/src/Storages/LiveView/TemporaryLiveViewCleaner.h @@ -31,7 +31,7 @@ public: private: friend std::unique_ptr::deleter_type; - TemporaryLiveViewCleaner(ContextMutablePtr global_context_); + explicit TemporaryLiveViewCleaner(ContextMutablePtr global_context_); ~TemporaryLiveViewCleaner(); void backgroundThreadFunc(); diff --git a/src/Storages/MarkCache.h b/src/Storages/MarkCache.h index 06143e954f8..a3f92650426 100644 --- a/src/Storages/MarkCache.h +++ b/src/Storages/MarkCache.h @@ -40,7 +40,7 @@ private: using Base = LRUCache; public: - MarkCache(size_t max_size_in_bytes) + explicit MarkCache(size_t max_size_in_bytes) : Base(max_size_in_bytes) {} /// Calculate key from path to file and offset. diff --git a/src/Storages/MergeTree/ActiveDataPartSet.h b/src/Storages/MergeTree/ActiveDataPartSet.h index 0b747ab83b9..8ab03625d5c 100644 --- a/src/Storages/MergeTree/ActiveDataPartSet.h +++ b/src/Storages/MergeTree/ActiveDataPartSet.h @@ -22,15 +22,14 @@ using Strings = std::vector; class ActiveDataPartSet { public: - ActiveDataPartSet(MergeTreeDataFormatVersion format_version_) : format_version(format_version_) {} + explicit ActiveDataPartSet(MergeTreeDataFormatVersion format_version_) : format_version(format_version_) {} ActiveDataPartSet(MergeTreeDataFormatVersion format_version_, const Strings & names); - ActiveDataPartSet(const ActiveDataPartSet & other) - : format_version(other.format_version) - , part_info_to_name(other.part_info_to_name) - {} + ActiveDataPartSet(const ActiveDataPartSet & other) = default; - ActiveDataPartSet(ActiveDataPartSet && other) noexcept { swap(other); } + ActiveDataPartSet & operator=(const ActiveDataPartSet & other) = default; + + ActiveDataPartSet(ActiveDataPartSet && other) noexcept = default; void swap(ActiveDataPartSet & other) noexcept { @@ -38,16 +37,6 @@ public: std::swap(part_info_to_name, other.part_info_to_name); } - ActiveDataPartSet & operator=(const ActiveDataPartSet & other) - { - if (&other != this) - { - ActiveDataPartSet tmp(other); - swap(tmp); - } - return *this; - } - /// Returns true if the part was actually added. If out_replaced_parts != nullptr, it will contain /// parts that were replaced from the set by the newly added part. bool add(const String & name, Strings * out_replaced_parts = nullptr); diff --git a/src/Storages/MergeTree/AllMergeSelector.cpp b/src/Storages/MergeTree/AllMergeSelector.cpp index 79080df1570..5e406c6e4f7 100644 --- a/src/Storages/MergeTree/AllMergeSelector.cpp +++ b/src/Storages/MergeTree/AllMergeSelector.cpp @@ -8,7 +8,7 @@ namespace DB AllMergeSelector::PartsRange AllMergeSelector::select( const PartsRanges & parts_ranges, - const size_t /*max_total_size_to_merge*/) + size_t /*max_total_size_to_merge*/) { size_t min_partition_size = 0; PartsRanges::const_iterator best_partition; diff --git a/src/Storages/MergeTree/AllMergeSelector.h b/src/Storages/MergeTree/AllMergeSelector.h index d3b399b2fc5..6cd3bb6f3fa 100644 --- a/src/Storages/MergeTree/AllMergeSelector.h +++ b/src/Storages/MergeTree/AllMergeSelector.h @@ -13,7 +13,7 @@ public: /// Parameter max_total_size_to_merge is ignored. PartsRange select( const PartsRanges & parts_ranges, - const size_t max_total_size_to_merge) override; + size_t max_total_size_to_merge) override; }; } diff --git a/src/Storages/MergeTree/BackgroundProcessList.h b/src/Storages/MergeTree/BackgroundProcessList.h index 81aded5e45c..baf3e281257 100644 --- a/src/Storages/MergeTree/BackgroundProcessList.h +++ b/src/Storages/MergeTree/BackgroundProcessList.h @@ -26,7 +26,7 @@ public: BackgroundProcessListEntry(const BackgroundProcessListEntry &) = delete; BackgroundProcessListEntry & operator=(const BackgroundProcessListEntry &) = delete; - BackgroundProcessListEntry(BackgroundProcessListEntry &&) = default; + BackgroundProcessListEntry(BackgroundProcessListEntry &&) noexcept = default; BackgroundProcessListEntry(BackgroundProcessList & list_, const typename container_t::iterator it_, const CurrentMetrics::Metric & metric) : list(list_), it{it_}, metric_increment{metric} diff --git a/src/Storages/MergeTree/BoolMask.h b/src/Storages/MergeTree/BoolMask.h index c26a0ed6c58..11f9238aa28 100644 --- a/src/Storages/MergeTree/BoolMask.h +++ b/src/Storages/MergeTree/BoolMask.h @@ -6,7 +6,7 @@ struct BoolMask bool can_be_true = false; bool can_be_false = false; - BoolMask() {} + BoolMask() = default; BoolMask(bool can_be_true_, bool can_be_false_) : can_be_true(can_be_true_), can_be_false(can_be_false_) {} BoolMask operator &(const BoolMask & m) const diff --git a/src/Storages/MergeTree/ColumnSizeEstimator.h b/src/Storages/MergeTree/ColumnSizeEstimator.h index 61c0ac64dbd..597dc80e525 100644 --- a/src/Storages/MergeTree/ColumnSizeEstimator.h +++ b/src/Storages/MergeTree/ColumnSizeEstimator.h @@ -1,6 +1,7 @@ #pragma once -#include "Storages/MergeTree/IMergeTreeDataPart.h" +#include +#include namespace DB diff --git a/src/Storages/MergeTree/DataPartsExchange.cpp b/src/Storages/MergeTree/DataPartsExchange.cpp index 6ff985aac37..a78d9050c94 100644 --- a/src/Storages/MergeTree/DataPartsExchange.cpp +++ b/src/Storages/MergeTree/DataPartsExchange.cpp @@ -13,7 +13,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/src/Storages/MergeTree/KeyCondition.h b/src/Storages/MergeTree/KeyCondition.h index dee46ae52ce..afe4a9f3e20 100644 --- a/src/Storages/MergeTree/KeyCondition.h +++ b/src/Storages/MergeTree/KeyCondition.h @@ -31,7 +31,7 @@ struct FieldRef : public Field /// Create as explicit field without block. template - FieldRef(T && value) : Field(std::forward(value)) {} + FieldRef(T && value) : Field(std::forward(value)) {} /// NOLINT /// Create as reference to field in block. FieldRef(ColumnsWithTypeAndName * columns_, size_t row_idx_, size_t column_idx_) @@ -60,10 +60,10 @@ public: bool right_included = false; /// includes the right border /// The whole universe (not null). - Range() {} + Range() {} /// NOLINT /// One point. - Range(const FieldRef & point) + Range(const FieldRef & point) /// NOLINT : left(point), right(point), left_included(true), right_included(true) {} /// A bounded two-sided range. @@ -313,8 +313,8 @@ private: ALWAYS_TRUE, }; - RPNElement() {} - RPNElement(Function function_) : function(function_) {} + RPNElement() = default; + RPNElement(Function function_) : function(function_) {} /// NOLINT RPNElement(Function function_, size_t key_column_) : function(function_), key_column(key_column_) {} RPNElement(Function function_, size_t key_column_, const Range & range_) : function(function_), range(range_), key_column(key_column_) {} diff --git a/src/Storages/MergeTree/LeaderElection.h b/src/Storages/MergeTree/LeaderElection.h index b05026d52f9..6d3281c8c61 100644 --- a/src/Storages/MergeTree/LeaderElection.h +++ b/src/Storages/MergeTree/LeaderElection.h @@ -1,11 +1,13 @@ #pragma once +#include #include #include #include #include #include +namespace fs = std::filesystem; namespace zkutil { diff --git a/src/Storages/MergeTree/LevelMergeSelector.cpp b/src/Storages/MergeTree/LevelMergeSelector.cpp index 7bcfbf6160a..16947277463 100644 --- a/src/Storages/MergeTree/LevelMergeSelector.cpp +++ b/src/Storages/MergeTree/LevelMergeSelector.cpp @@ -105,7 +105,7 @@ void selectWithinPartition( LevelMergeSelector::PartsRange LevelMergeSelector::select( const PartsRanges & parts_ranges, - const size_t max_total_size_to_merge) + size_t max_total_size_to_merge) { Estimator estimator; diff --git a/src/Storages/MergeTree/LevelMergeSelector.h b/src/Storages/MergeTree/LevelMergeSelector.h index 5849b34e320..f4080c379c4 100644 --- a/src/Storages/MergeTree/LevelMergeSelector.h +++ b/src/Storages/MergeTree/LevelMergeSelector.h @@ -21,7 +21,7 @@ public: PartsRange select( const PartsRanges & parts_ranges, - const size_t max_total_size_to_merge) override; + size_t max_total_size_to_merge) override; private: const Settings settings; diff --git a/src/Storages/MergeTree/MergeSelector.h b/src/Storages/MergeTree/MergeSelector.h index aac805823a9..c55f738f879 100644 --- a/src/Storages/MergeTree/MergeSelector.h +++ b/src/Storages/MergeTree/MergeSelector.h @@ -63,7 +63,7 @@ public: */ virtual PartsRange select( const PartsRanges & parts_ranges, - const size_t max_total_size_to_merge) = 0; + size_t max_total_size_to_merge) = 0; virtual ~IMergeSelector() = default; }; diff --git a/src/Storages/MergeTree/MergeTreeBaseSelectProcessor.h b/src/Storages/MergeTree/MergeTreeBaseSelectProcessor.h index c462c34aa83..2e906ecfce0 100644 --- a/src/Storages/MergeTree/MergeTreeBaseSelectProcessor.h +++ b/src/Storages/MergeTree/MergeTreeBaseSelectProcessor.h @@ -86,7 +86,6 @@ protected: void initializeRangeReaders(MergeTreeReadTask & task); -protected: const MergeTreeData & storage; StorageMetadataPtr metadata_snapshot; diff --git a/src/Storages/MergeTree/MergeTreeBlockReadUtils.cpp b/src/Storages/MergeTree/MergeTreeBlockReadUtils.cpp index 07d51d25700..dadccd2f9dc 100644 --- a/src/Storages/MergeTree/MergeTreeBlockReadUtils.cpp +++ b/src/Storages/MergeTree/MergeTreeBlockReadUtils.cpp @@ -118,9 +118,9 @@ NameSet injectRequiredColumns(const MergeTreeData & storage, const StorageMetada MergeTreeReadTask::MergeTreeReadTask( - const MergeTreeData::DataPartPtr & data_part_, const MarkRanges & mark_ranges_, const size_t part_index_in_query_, + const MergeTreeData::DataPartPtr & data_part_, const MarkRanges & mark_ranges_, size_t part_index_in_query_, const Names & ordered_names_, const NameSet & column_name_set_, const NamesAndTypesList & columns_, - const NamesAndTypesList & pre_columns_, const bool remove_prewhere_column_, const bool should_reorder_, + const NamesAndTypesList & pre_columns_, bool remove_prewhere_column_, bool should_reorder_, MergeTreeBlockSizePredictorPtr && size_predictor_) : data_part{data_part_}, mark_ranges{mark_ranges_}, part_index_in_query{part_index_in_query_}, ordered_names{ordered_names_}, column_name_set{column_name_set_}, columns{columns_}, pre_columns{pre_columns_}, diff --git a/src/Storages/MergeTree/MergeTreeBlockReadUtils.h b/src/Storages/MergeTree/MergeTreeBlockReadUtils.h index b931a13c027..1f70ca72f39 100644 --- a/src/Storages/MergeTree/MergeTreeBlockReadUtils.h +++ b/src/Storages/MergeTree/MergeTreeBlockReadUtils.h @@ -55,9 +55,9 @@ struct MergeTreeReadTask bool isFinished() const { return mark_ranges.empty() && range_reader.isCurrentRangeFinished(); } MergeTreeReadTask( - const MergeTreeData::DataPartPtr & data_part_, const MarkRanges & mark_ranges_, const size_t part_index_in_query_, + const MergeTreeData::DataPartPtr & data_part_, const MarkRanges & mark_ranges_, size_t part_index_in_query_, const Names & ordered_names_, const NameSet & column_name_set_, const NamesAndTypesList & columns_, - const NamesAndTypesList & pre_columns_, const bool remove_prewhere_column_, const bool should_reorder_, + const NamesAndTypesList & pre_columns_, bool remove_prewhere_column_, bool should_reorder_, MergeTreeBlockSizePredictorPtr && size_predictor_); }; @@ -86,7 +86,7 @@ struct MergeTreeBlockSizePredictor void startBlock(); /// Updates statistic for more accurate prediction - void update(const Block & sample_block, const Columns & columns, size_t num_rows, double decay = DECAY()); + void update(const Block & sample_block, const Columns & columns, size_t num_rows, double decay = calculateDecay()); /// Return current block size (after update()) inline size_t getBlockSize() const @@ -112,7 +112,7 @@ struct MergeTreeBlockSizePredictor : 0; } - inline void updateFilteredRowsRation(size_t rows_was_read, size_t rows_was_filtered, double decay = DECAY()) + inline void updateFilteredRowsRation(size_t rows_was_read, size_t rows_was_filtered, double decay = calculateDecay()) { double alpha = std::pow(1. - decay, rows_was_read); double current_ration = rows_was_filtered / std::max(1.0, static_cast(rows_was_read)); @@ -125,7 +125,7 @@ struct MergeTreeBlockSizePredictor /// After n=NUM_UPDATES_TO_TARGET_WEIGHT updates v_{n} = (1 - TARGET_WEIGHT) * v_{0} + TARGET_WEIGHT * v_{target} static constexpr double TARGET_WEIGHT = 0.5; static constexpr size_t NUM_UPDATES_TO_TARGET_WEIGHT = 8192; - static double DECAY() { return 1. - std::pow(TARGET_WEIGHT, 1. / NUM_UPDATES_TO_TARGET_WEIGHT); } + static double calculateDecay() { return 1. - std::pow(TARGET_WEIGHT, 1. / NUM_UPDATES_TO_TARGET_WEIGHT); } protected: diff --git a/src/Storages/MergeTree/MergeTreeData.cpp b/src/Storages/MergeTree/MergeTreeData.cpp index 1f598be6896..e76c2888315 100644 --- a/src/Storages/MergeTree/MergeTreeData.cpp +++ b/src/Storages/MergeTree/MergeTreeData.cpp @@ -5818,6 +5818,7 @@ try if (result_part) { + part_log_elem.disk_name = result_part->volume->getDisk()->getName(); part_log_elem.path_on_disk = result_part->getFullPath(); part_log_elem.bytes_compressed_on_disk = result_part->getBytesOnDisk(); part_log_elem.rows = result_part->rows_count; diff --git a/src/Storages/MergeTree/MergeTreeData.h b/src/Storages/MergeTree/MergeTreeData.h index 0040d1c903f..bdb4b542744 100644 --- a/src/Storages/MergeTree/MergeTreeData.h +++ b/src/Storages/MergeTree/MergeTreeData.h @@ -900,7 +900,7 @@ public: /// Lock part in zookeeper for shared data in several nodes /// Overridden in StorageReplicatedMergeTree - virtual void lockSharedData(const IMergeTreeDataPart &, bool = false) const {} + virtual void lockSharedData(const IMergeTreeDataPart &, bool = false) const {} /// NOLINT /// Unlock shared data part in zookeeper /// Overridden in StorageReplicatedMergeTree diff --git a/src/Storages/MergeTree/MergeTreeDataMergerMutator.h b/src/Storages/MergeTree/MergeTreeDataMergerMutator.h index 31d61adcc11..9a60e4c6078 100644 --- a/src/Storages/MergeTree/MergeTreeDataMergerMutator.h +++ b/src/Storages/MergeTree/MergeTreeDataMergerMutator.h @@ -184,7 +184,6 @@ private: bool need_remove_expired_values, const MergeTreeData::MergingParams & merging_params) const; -private: MergeTreeData & data; const size_t max_tasks_count; diff --git a/src/Storages/MergeTree/MergeTreeDataPartChecksum.h b/src/Storages/MergeTree/MergeTreeDataPartChecksum.h index 06f1fb06f25..15acb88aa0f 100644 --- a/src/Storages/MergeTree/MergeTreeDataPartChecksum.h +++ b/src/Storages/MergeTree/MergeTreeDataPartChecksum.h @@ -26,7 +26,7 @@ struct MergeTreeDataPartChecksum UInt64 uncompressed_size {}; uint128 uncompressed_hash {}; - MergeTreeDataPartChecksum() {} + MergeTreeDataPartChecksum() = default; MergeTreeDataPartChecksum(UInt64 file_size_, uint128 file_hash_) : file_size(file_size_), file_hash(file_hash_) {} MergeTreeDataPartChecksum(UInt64 file_size_, uint128 file_hash_, UInt64 uncompressed_size_, uint128 uncompressed_hash_) : file_size(file_size_), file_hash(file_hash_), is_compressed(true), diff --git a/src/Storages/MergeTree/MergeTreeDataPartType.h b/src/Storages/MergeTree/MergeTreeDataPartType.h index fecd9d00cdc..7cf23c7a045 100644 --- a/src/Storages/MergeTree/MergeTreeDataPartType.h +++ b/src/Storages/MergeTree/MergeTreeDataPartType.h @@ -25,7 +25,7 @@ public: }; MergeTreeDataPartType() : value(UNKNOWN) {} - MergeTreeDataPartType(Value value_) : value(value_) {} + MergeTreeDataPartType(Value value_) : value(value_) {} /// NOLINT bool operator==(const MergeTreeDataPartType & other) const { diff --git a/src/Storages/MergeTree/MergeTreeIndexConditionBloomFilter.cpp b/src/Storages/MergeTree/MergeTreeIndexConditionBloomFilter.cpp index 2057dec957e..7b194de8103 100644 --- a/src/Storages/MergeTree/MergeTreeIndexConditionBloomFilter.cpp +++ b/src/Storages/MergeTree/MergeTreeIndexConditionBloomFilter.cpp @@ -552,7 +552,7 @@ bool MergeTreeIndexConditionBloomFilter::traverseASTEquals( for (const auto & f : value_field.get()) { - if ((f.isNull() && !is_nullable) || f.IsDecimal(f.getType())) + if ((f.isNull() && !is_nullable) || f.isDecimal(f.getType())) return false; mutable_column->insert(convertFieldToType(f, *actual_type, value_type.get())); diff --git a/src/Storages/MergeTree/MergeTreeIndexConditionBloomFilter.h b/src/Storages/MergeTree/MergeTreeIndexConditionBloomFilter.h index 5c6559ba298..27fd701c67b 100644 --- a/src/Storages/MergeTree/MergeTreeIndexConditionBloomFilter.h +++ b/src/Storages/MergeTree/MergeTreeIndexConditionBloomFilter.h @@ -38,7 +38,7 @@ public: ALWAYS_TRUE, }; - RPNElement(Function function_ = FUNCTION_UNKNOWN) : function(function_) {} + RPNElement(Function function_ = FUNCTION_UNKNOWN) : function(function_) {} /// NOLINT Function function = FUNCTION_UNKNOWN; std::vector> predicate; diff --git a/src/Storages/MergeTree/MergeTreeIndexFullText.h b/src/Storages/MergeTree/MergeTreeIndexFullText.h index 1826719df0b..5f5956553dc 100644 --- a/src/Storages/MergeTree/MergeTreeIndexFullText.h +++ b/src/Storages/MergeTree/MergeTreeIndexFullText.h @@ -102,7 +102,7 @@ private: ALWAYS_TRUE, }; - RPNElement( + RPNElement( /// NOLINT Function function_ = FUNCTION_UNKNOWN, size_t key_column_ = 0, std::unique_ptr && const_bloom_filter_ = nullptr) : function(function_), key_column(key_column_), bloom_filter(std::move(const_bloom_filter_)) {} diff --git a/src/Storages/MergeTree/MergeTreeIndexHypothesis.cpp b/src/Storages/MergeTree/MergeTreeIndexHypothesis.cpp index 6658730b7c1..088029d9e8e 100644 --- a/src/Storages/MergeTree/MergeTreeIndexHypothesis.cpp +++ b/src/Storages/MergeTree/MergeTreeIndexHypothesis.cpp @@ -18,7 +18,7 @@ MergeTreeIndexGranuleHypothesis::MergeTreeIndexGranuleHypothesis(const String & { } -MergeTreeIndexGranuleHypothesis::MergeTreeIndexGranuleHypothesis(const String & index_name_, const bool met_) +MergeTreeIndexGranuleHypothesis::MergeTreeIndexGranuleHypothesis(const String & index_name_, bool met_) : index_name(index_name_), is_empty(false), met(met_) { } diff --git a/src/Storages/MergeTree/MergeTreeIndexHypothesis.h b/src/Storages/MergeTree/MergeTreeIndexHypothesis.h index 43b56d9559f..578bb6f3f7a 100644 --- a/src/Storages/MergeTree/MergeTreeIndexHypothesis.h +++ b/src/Storages/MergeTree/MergeTreeIndexHypothesis.h @@ -16,7 +16,7 @@ struct MergeTreeIndexGranuleHypothesis : public IMergeTreeIndexGranule MergeTreeIndexGranuleHypothesis( const String & index_name_, - const bool met_); + bool met_); void serializeBinary(WriteBuffer & ostr) const override; void deserializeBinary(ReadBuffer & istr, MergeTreeIndexVersion version) override; @@ -55,7 +55,7 @@ private: class MergeTreeIndexHypothesis : public IMergeTreeIndex { public: - MergeTreeIndexHypothesis( + explicit MergeTreeIndexHypothesis( const IndexDescription & index_) : IMergeTreeIndex(index_) {} diff --git a/src/Storages/MergeTree/MergeTreeIndexMinMax.h b/src/Storages/MergeTree/MergeTreeIndexMinMax.h index 0e05e25fb36..9f78c86a498 100644 --- a/src/Storages/MergeTree/MergeTreeIndexMinMax.h +++ b/src/Storages/MergeTree/MergeTreeIndexMinMax.h @@ -68,7 +68,7 @@ private: class MergeTreeIndexMinMax : public IMergeTreeIndex { public: - MergeTreeIndexMinMax(const IndexDescription & index_) + explicit MergeTreeIndexMinMax(const IndexDescription & index_) : IMergeTreeIndex(index_) {} @@ -83,7 +83,7 @@ public: bool mayBenefitFromIndexForIn(const ASTPtr & node) const override; const char* getSerializedFileExtension() const override { return ".idx2"; } - MergeTreeIndexFormat getDeserializedFormat(const DiskPtr disk, const std::string & path_prefix) const override; + MergeTreeIndexFormat getDeserializedFormat(const DiskPtr disk, const std::string & path_prefix) const override; /// NOLINT }; } diff --git a/src/Storages/MergeTree/MergeTreeIndices.h b/src/Storages/MergeTree/MergeTreeIndices.h index a761fc3124e..984a2bb7762 100644 --- a/src/Storages/MergeTree/MergeTreeIndices.h +++ b/src/Storages/MergeTree/MergeTreeIndices.h @@ -29,7 +29,7 @@ struct MergeTreeIndexFormat MergeTreeIndexVersion version; const char* extension; - operator bool() const { return version != 0; } + operator bool() const { return version != 0; } /// NOLINT }; /// Stores some info about a single block of data. @@ -122,7 +122,7 @@ using MergeTreeIndexMergedConditions = std::vector &, String * reason)>; public: - MergeTreePartsMover(MergeTreeData * data_) + explicit MergeTreePartsMover(MergeTreeData * data_) : data(data_) , log(&Poco::Logger::get("MergeTreePartsMover")) { @@ -59,7 +59,6 @@ public: /// merge or mutation. void swapClonedPart(const std::shared_ptr & cloned_parts) const; -public: /// Can stop background moves and moves from queries ActionBlocker moves_blocker; diff --git a/src/Storages/MergeTree/MergeTreeReadPool.cpp b/src/Storages/MergeTree/MergeTreeReadPool.cpp index c89affb5365..3c31ffa7c97 100644 --- a/src/Storages/MergeTree/MergeTreeReadPool.cpp +++ b/src/Storages/MergeTree/MergeTreeReadPool.cpp @@ -18,9 +18,9 @@ namespace ErrorCodes namespace DB { MergeTreeReadPool::MergeTreeReadPool( - const size_t threads_, - const size_t sum_marks_, - const size_t min_marks_for_concurrent_read_, + size_t threads_, + size_t sum_marks_, + size_t min_marks_for_concurrent_read_, RangesInDataParts && parts_, const MergeTreeData & data_, const StorageMetadataPtr & metadata_snapshot_, @@ -28,7 +28,7 @@ MergeTreeReadPool::MergeTreeReadPool( const Names & column_names_, const BackoffSettings & backoff_settings_, size_t preferred_block_size_bytes_, - const bool do_not_steal_tasks_) + bool do_not_steal_tasks_) : backoff_settings{backoff_settings_} , backoff_state{threads_} , data{data_} @@ -45,7 +45,7 @@ MergeTreeReadPool::MergeTreeReadPool( } -MergeTreeReadTaskPtr MergeTreeReadPool::getTask(const size_t min_marks_to_read, const size_t thread, const Names & ordered_names) +MergeTreeReadTaskPtr MergeTreeReadPool::getTask(size_t min_marks_to_read, size_t thread, const Names & ordered_names) { const std::lock_guard lock{mutex}; @@ -149,7 +149,7 @@ Block MergeTreeReadPool::getHeader() const return metadata_snapshot->getSampleBlockForColumns(column_names, data.getVirtuals(), data.getStorageID()); } -void MergeTreeReadPool::profileFeedback(const ReadBufferFromFileBase::ProfileInfo info) +void MergeTreeReadPool::profileFeedback(ReadBufferFromFileBase::ProfileInfo info) { if (backoff_settings.min_read_latency_ms == 0 || do_not_steal_tasks) return; @@ -232,8 +232,8 @@ std::vector MergeTreeReadPool::fillPerPartInfo(const RangesInDataParts & void MergeTreeReadPool::fillPerThreadInfo( - const size_t threads, const size_t sum_marks, std::vector per_part_sum_marks, - const RangesInDataParts & parts, const size_t min_marks_for_concurrent_read) + size_t threads, size_t sum_marks, std::vector per_part_sum_marks, + const RangesInDataParts & parts, size_t min_marks_for_concurrent_read) { threads_tasks.resize(threads); if (parts.empty()) diff --git a/src/Storages/MergeTree/MergeTreeReadPool.h b/src/Storages/MergeTree/MergeTreeReadPool.h index aac4d5016a2..4ab4393ef5a 100644 --- a/src/Storages/MergeTree/MergeTreeReadPool.h +++ b/src/Storages/MergeTree/MergeTreeReadPool.h @@ -40,7 +40,7 @@ public: size_t min_concurrency = 1; /// Constants above is just an example. - BackoffSettings(const Settings & settings) + explicit BackoffSettings(const Settings & settings) : min_read_latency_ms(settings.read_backoff_min_latency_ms.totalMilliseconds()), max_throughput(settings.read_backoff_max_throughput), min_interval_between_events_ms(settings.read_backoff_min_interval_between_events_ms.totalMilliseconds()), @@ -63,27 +63,27 @@ private: Stopwatch time_since_prev_event {CLOCK_MONOTONIC_COARSE}; size_t num_events = 0; - BackoffState(size_t threads) : current_threads(threads) {} + explicit BackoffState(size_t threads) : current_threads(threads) {} }; BackoffState backoff_state; public: MergeTreeReadPool( - const size_t threads_, const size_t sum_marks_, const size_t min_marks_for_concurrent_read_, + size_t threads_, size_t sum_marks_, size_t min_marks_for_concurrent_read_, RangesInDataParts && parts_, const MergeTreeData & data_, const StorageMetadataPtr & metadata_snapshot_, const PrewhereInfoPtr & prewhere_info_, const Names & column_names_, const BackoffSettings & backoff_settings_, size_t preferred_block_size_bytes_, - const bool do_not_steal_tasks_ = false); + bool do_not_steal_tasks_ = false); - MergeTreeReadTaskPtr getTask(const size_t min_marks_to_read, const size_t thread, const Names & ordered_names); + MergeTreeReadTaskPtr getTask(size_t min_marks_to_read, size_t thread, const Names & ordered_names); /** Each worker could call this method and pass information about read performance. * If read performance is too low, pool could decide to lower number of threads: do not assign more tasks to several threads. * This allows to overcome excessive load to disk subsystem, when reads are not from page cache. */ - void profileFeedback(const ReadBufferFromFileBase::ProfileInfo info); + void profileFeedback(ReadBufferFromFileBase::ProfileInfo info); Block getHeader() const; @@ -91,8 +91,8 @@ private: std::vector fillPerPartInfo(const RangesInDataParts & parts); void fillPerThreadInfo( - const size_t threads, const size_t sum_marks, std::vector per_part_sum_marks, - const RangesInDataParts & parts, const size_t min_marks_for_concurrent_read); + size_t threads, size_t sum_marks, std::vector per_part_sum_marks, + const RangesInDataParts & parts, size_t min_marks_for_concurrent_read); const MergeTreeData & data; StorageMetadataPtr metadata_snapshot; diff --git a/src/Storages/MergeTree/MergeTreeSequentialSource.h b/src/Storages/MergeTree/MergeTreeSequentialSource.h index 7eefdd9335b..a7405140c6d 100644 --- a/src/Storages/MergeTree/MergeTreeSequentialSource.h +++ b/src/Storages/MergeTree/MergeTreeSequentialSource.h @@ -58,7 +58,6 @@ private: /// current row at which we stop reading size_t current_row = 0; -private: /// Closes readers and unlock part locks void finish(); }; diff --git a/src/Storages/MergeTree/MergeTreeSettings.cpp b/src/Storages/MergeTree/MergeTreeSettings.cpp index b120c230005..c1cc3b6ed3c 100644 --- a/src/Storages/MergeTree/MergeTreeSettings.cpp +++ b/src/Storages/MergeTree/MergeTreeSettings.cpp @@ -122,7 +122,7 @@ void MergeTreeSettings::sanityCheck(const Settings & query_settings) const { throw Exception( ErrorCodes::BAD_ARGUMENTS, - "min_bytes_to_rebalance_partition_over_jbod: {} is lower than specified max_bytes_to_merge_at_max_space_in_pool / 150: {}", + "min_bytes_to_rebalance_partition_over_jbod: {} is lower than specified max_bytes_to_merge_at_max_space_in_pool / 1024: {}", min_bytes_to_rebalance_partition_over_jbod, max_bytes_to_merge_at_max_space_in_pool / 1024); } diff --git a/src/Storages/MergeTree/MergeTreeSettings.h b/src/Storages/MergeTree/MergeTreeSettings.h index 15f250bc9cd..f6931def490 100644 --- a/src/Storages/MergeTree/MergeTreeSettings.h +++ b/src/Storages/MergeTree/MergeTreeSettings.h @@ -151,6 +151,7 @@ struct Settings; M(UInt64, replicated_max_parallel_fetches_for_table, 0, "Obsolete setting, does nothing.", 0) \ M(Bool, write_final_mark, true, "Obsolete setting, does nothing.", 0) /// Settings that should not change after the creation of a table. + /// NOLINTNEXTLINE #define APPLY_FOR_IMMUTABLE_MERGE_TREE_SETTINGS(M) \ M(index_granularity) diff --git a/src/Storages/MergeTree/MergeTreeThreadSelectProcessor.cpp b/src/Storages/MergeTree/MergeTreeThreadSelectProcessor.cpp index 6a44da06f1f..145d292138a 100644 --- a/src/Storages/MergeTree/MergeTreeThreadSelectProcessor.cpp +++ b/src/Storages/MergeTree/MergeTreeThreadSelectProcessor.cpp @@ -13,15 +13,15 @@ namespace ErrorCodes } MergeTreeThreadSelectProcessor::MergeTreeThreadSelectProcessor( - const size_t thread_, + size_t thread_, const MergeTreeReadPoolPtr & pool_, - const size_t min_marks_to_read_, - const UInt64 max_block_size_rows_, + size_t min_marks_to_read_, + UInt64 max_block_size_rows_, size_t preferred_block_size_bytes_, size_t preferred_max_column_in_block_size_bytes_, const MergeTreeData & storage_, const StorageMetadataPtr & metadata_snapshot_, - const bool use_uncompressed_cache_, + bool use_uncompressed_cache_, const PrewhereInfoPtr & prewhere_info_, ExpressionActionsSettings actions_settings, const MergeTreeReaderSettings & reader_settings_, diff --git a/src/Storages/MergeTree/MergeTreeThreadSelectProcessor.h b/src/Storages/MergeTree/MergeTreeThreadSelectProcessor.h index 110c4fa34e6..ae25ca2a88a 100644 --- a/src/Storages/MergeTree/MergeTreeThreadSelectProcessor.h +++ b/src/Storages/MergeTree/MergeTreeThreadSelectProcessor.h @@ -15,15 +15,15 @@ class MergeTreeThreadSelectProcessor final : public MergeTreeBaseSelectProcessor { public: MergeTreeThreadSelectProcessor( - const size_t thread_, + size_t thread_, const std::shared_ptr & pool_, - const size_t min_marks_to_read_, - const UInt64 max_block_size_, + size_t min_marks_to_read_, + UInt64 max_block_size_, size_t preferred_block_size_bytes_, size_t preferred_max_column_in_block_size_bytes_, const MergeTreeData & storage_, const StorageMetadataPtr & metadata_snapshot_, - const bool use_uncompressed_cache_, + bool use_uncompressed_cache_, const PrewhereInfoPtr & prewhere_info_, ExpressionActionsSettings actions_settings, const MergeTreeReaderSettings & reader_settings_, diff --git a/src/Storages/MergeTree/MergeTreeWhereOptimizer.h b/src/Storages/MergeTree/MergeTreeWhereOptimizer.h index 4aa7aa532a8..fa14fea94d1 100644 --- a/src/Storages/MergeTree/MergeTreeWhereOptimizer.h +++ b/src/Storages/MergeTree/MergeTreeWhereOptimizer.h @@ -79,8 +79,6 @@ private: /// Transform Conditions list to WHERE or PREWHERE expression. static ASTPtr reconstruct(const Conditions & conditions); - void optimizeConjunction(ASTSelectQuery & select, ASTFunction * const fun) const; - void optimizeArbitrary(ASTSelectQuery & select) const; UInt64 getIdentifiersColumnSize(const NameSet & identifiers) const; diff --git a/src/Storages/MergeTree/PartMovesBetweenShardsOrchestrator.h b/src/Storages/MergeTree/PartMovesBetweenShardsOrchestrator.h index 14ef91c0777..07c5c55d873 100644 --- a/src/Storages/MergeTree/PartMovesBetweenShardsOrchestrator.h +++ b/src/Storages/MergeTree/PartMovesBetweenShardsOrchestrator.h @@ -63,7 +63,7 @@ public: }; EntryState(): value(TODO) {} - EntryState(Value value_): value(value_) {} + EntryState(Value value_): value(value_) {} /// NOLINT Value value; @@ -173,7 +173,6 @@ private: void removePins(const Entry & entry, zkutil::ZooKeeperPtr zk); void syncStateFromZK(); -private: StorageReplicatedMergeTree & storage; String zookeeper_path; diff --git a/src/Storages/MergeTree/RPNBuilder.h b/src/Storages/MergeTree/RPNBuilder.h index d63781db67d..183808c9290 100644 --- a/src/Storages/MergeTree/RPNBuilder.h +++ b/src/Storages/MergeTree/RPNBuilder.h @@ -22,8 +22,8 @@ public: using AtomFromASTFunc = std::function< bool(const ASTPtr & node, ContextPtr context, Block & block_with_constants, RPNElement & out)>; - RPNBuilder(const SelectQueryInfo & query_info, ContextPtr context_, const AtomFromASTFunc & atomFromAST_) - : WithContext(context_), atomFromAST(atomFromAST_) + RPNBuilder(const SelectQueryInfo & query_info, ContextPtr context_, const AtomFromASTFunc & atom_from_ast_) + : WithContext(context_), atom_from_ast(atom_from_ast_) { /** Evaluation of expressions that depend only on constants. * For the index to be used, if it is written, for example `WHERE Date = toDate(now())`. @@ -79,7 +79,7 @@ private: } } - if (!atomFromAST(node, getContext(), block_with_constants, element)) + if (!atom_from_ast(node, getContext(), block_with_constants, element)) { element.function = RPNElement::FUNCTION_UNKNOWN; } @@ -114,7 +114,7 @@ private: return true; } - const AtomFromASTFunc & atomFromAST; + const AtomFromASTFunc & atom_from_ast; Block block_with_constants; RPN rpn; }; diff --git a/src/Storages/MergeTree/ReplicatedMergeTreeAltersSequence.h b/src/Storages/MergeTree/ReplicatedMergeTreeAltersSequence.h index e5d3dd0a737..aa58e16a716 100644 --- a/src/Storages/MergeTree/ReplicatedMergeTreeAltersSequence.h +++ b/src/Storages/MergeTree/ReplicatedMergeTreeAltersSequence.h @@ -29,7 +29,6 @@ private: bool data_finished = false; }; -private: /// alter_version -> AlterState. std::map queue_state; diff --git a/src/Storages/MergeTree/ReplicatedMergeTreeCleanupThread.h b/src/Storages/MergeTree/ReplicatedMergeTreeCleanupThread.h index 509b52ec07f..861de620926 100644 --- a/src/Storages/MergeTree/ReplicatedMergeTreeCleanupThread.h +++ b/src/Storages/MergeTree/ReplicatedMergeTreeCleanupThread.h @@ -24,7 +24,7 @@ class StorageReplicatedMergeTree; class ReplicatedMergeTreeCleanupThread { public: - ReplicatedMergeTreeCleanupThread(StorageReplicatedMergeTree & storage_); + explicit ReplicatedMergeTreeCleanupThread(StorageReplicatedMergeTree & storage_); void start() { task->activateAndSchedule(); } diff --git a/src/Storages/MergeTree/ReplicatedMergeTreeMergeStrategyPicker.h b/src/Storages/MergeTree/ReplicatedMergeTreeMergeStrategyPicker.h index 70eacbee102..91f5824f8fc 100644 --- a/src/Storages/MergeTree/ReplicatedMergeTreeMergeStrategyPicker.h +++ b/src/Storages/MergeTree/ReplicatedMergeTreeMergeStrategyPicker.h @@ -42,7 +42,7 @@ struct ReplicatedMergeTreeLogEntryData; class ReplicatedMergeTreeMergeStrategyPicker: public boost::noncopyable { public: - ReplicatedMergeTreeMergeStrategyPicker(StorageReplicatedMergeTree & storage_); + explicit ReplicatedMergeTreeMergeStrategyPicker(StorageReplicatedMergeTree & storage_); /// triggers refreshing the cached state (list of replicas etc.) /// used when we get new merge event from the zookeeper queue ( see queueUpdatingTask() etc ) diff --git a/src/Storages/MergeTree/ReplicatedMergeTreePartCheckThread.h b/src/Storages/MergeTree/ReplicatedMergeTreePartCheckThread.h index a8ce4fedd6d..7c2c2401bf0 100644 --- a/src/Storages/MergeTree/ReplicatedMergeTreePartCheckThread.h +++ b/src/Storages/MergeTree/ReplicatedMergeTreePartCheckThread.h @@ -30,7 +30,7 @@ class StorageReplicatedMergeTree; class ReplicatedMergeTreePartCheckThread { public: - ReplicatedMergeTreePartCheckThread(StorageReplicatedMergeTree & storage_); + explicit ReplicatedMergeTreePartCheckThread(StorageReplicatedMergeTree & storage_); ~ReplicatedMergeTreePartCheckThread(); /// Processing of the queue to be checked is done in the background thread, which you must first start. @@ -42,12 +42,12 @@ public: { ReplicatedMergeTreePartCheckThread * parent; - TemporarilyStop(ReplicatedMergeTreePartCheckThread * parent_) : parent(parent_) + explicit TemporarilyStop(ReplicatedMergeTreePartCheckThread * parent_) : parent(parent_) { parent->stop(); } - TemporarilyStop(TemporarilyStop && old) : parent(old.parent) + TemporarilyStop(TemporarilyStop && old) noexcept : parent(old.parent) { old.parent = nullptr; } diff --git a/src/Storages/MergeTree/ReplicatedMergeTreeQueue.cpp b/src/Storages/MergeTree/ReplicatedMergeTreeQueue.cpp index 2bc1a538bae..c5798aaefe5 100644 --- a/src/Storages/MergeTree/ReplicatedMergeTreeQueue.cpp +++ b/src/Storages/MergeTree/ReplicatedMergeTreeQueue.cpp @@ -971,7 +971,7 @@ ReplicatedMergeTreeQueue::StringSet ReplicatedMergeTreeQueue::moveSiblingPartsFo return parts_for_merge; } -bool ReplicatedMergeTreeQueue::checkReplaceRangeCanBeRemoved(const MergeTreePartInfo & part_info, const LogEntryPtr entry_ptr, const ReplicatedMergeTreeLogEntryData & current) const +bool ReplicatedMergeTreeQueue::checkReplaceRangeCanBeRemoved(const MergeTreePartInfo & part_info, LogEntryPtr entry_ptr, const ReplicatedMergeTreeLogEntryData & current) const { if (entry_ptr->type != LogEntry::REPLACE_RANGE) return false; @@ -1515,7 +1515,7 @@ ReplicatedMergeTreeQueue::SelectedEntryPtr ReplicatedMergeTreeQueue::selectEntry bool ReplicatedMergeTreeQueue::processEntry( std::function get_zookeeper, LogEntryPtr & entry, - const std::function func) + std::function func) { std::exception_ptr saved_exception; diff --git a/src/Storages/MergeTree/ReplicatedMergeTreeQueue.h b/src/Storages/MergeTree/ReplicatedMergeTreeQueue.h index ce0afcc0343..ae0ca806344 100644 --- a/src/Storages/MergeTree/ReplicatedMergeTreeQueue.h +++ b/src/Storages/MergeTree/ReplicatedMergeTreeQueue.h @@ -184,7 +184,7 @@ private: /// Check that entry_ptr is REPLACE_RANGE entry and can be removed from queue because current entry covers it bool checkReplaceRangeCanBeRemoved( - const MergeTreePartInfo & part_info, const LogEntryPtr entry_ptr, const ReplicatedMergeTreeLogEntryData & current) const; + const MergeTreePartInfo & part_info, LogEntryPtr entry_ptr, const ReplicatedMergeTreeLogEntryData & current) const; /// Ensures that only one thread is simultaneously updating mutations. std::mutex update_mutations_mutex; @@ -366,7 +366,7 @@ public: * If there was an exception during processing, it saves it in `entry`. * Returns true if there were no exceptions during the processing. */ - bool processEntry(std::function get_zookeeper, LogEntryPtr & entry, const std::function func); + bool processEntry(std::function get_zookeeper, LogEntryPtr & entry, std::function func); /// Count the number of merges and mutations of single parts in the queue. OperationsInQueue countMergesAndPartMutations() const; diff --git a/src/Storages/MergeTree/ReplicatedMergeTreeQuorumEntry.h b/src/Storages/MergeTree/ReplicatedMergeTreeQuorumEntry.h index f560850a6c6..4cdcc936e21 100644 --- a/src/Storages/MergeTree/ReplicatedMergeTreeQuorumEntry.h +++ b/src/Storages/MergeTree/ReplicatedMergeTreeQuorumEntry.h @@ -23,8 +23,8 @@ struct ReplicatedMergeTreeQuorumEntry size_t required_number_of_replicas{}; std::set replicas; - ReplicatedMergeTreeQuorumEntry() {} - ReplicatedMergeTreeQuorumEntry(const String & str) + ReplicatedMergeTreeQuorumEntry() = default; + explicit ReplicatedMergeTreeQuorumEntry(const String & str) { fromString(str); } diff --git a/src/Storages/MergeTree/ReplicatedMergeTreeRestartingThread.h b/src/Storages/MergeTree/ReplicatedMergeTreeRestartingThread.h index e62cff4baf6..99e56ffb366 100644 --- a/src/Storages/MergeTree/ReplicatedMergeTreeRestartingThread.h +++ b/src/Storages/MergeTree/ReplicatedMergeTreeRestartingThread.h @@ -22,7 +22,7 @@ class StorageReplicatedMergeTree; class ReplicatedMergeTreeRestartingThread { public: - ReplicatedMergeTreeRestartingThread(StorageReplicatedMergeTree & storage_); + explicit ReplicatedMergeTreeRestartingThread(StorageReplicatedMergeTree & storage_); void start() { task->activateAndSchedule(); } diff --git a/src/Storages/MergeTree/SimpleMergeSelector.cpp b/src/Storages/MergeTree/SimpleMergeSelector.cpp index 0775e021c76..434d44022df 100644 --- a/src/Storages/MergeTree/SimpleMergeSelector.cpp +++ b/src/Storages/MergeTree/SimpleMergeSelector.cpp @@ -202,7 +202,7 @@ void selectWithinPartition( SimpleMergeSelector::PartsRange SimpleMergeSelector::select( const PartsRanges & parts_ranges, - const size_t max_total_size_to_merge) + size_t max_total_size_to_merge) { Estimator estimator; diff --git a/src/Storages/MergeTree/SimpleMergeSelector.h b/src/Storages/MergeTree/SimpleMergeSelector.h index 3e104d1319a..11ffe8b672a 100644 --- a/src/Storages/MergeTree/SimpleMergeSelector.h +++ b/src/Storages/MergeTree/SimpleMergeSelector.h @@ -152,7 +152,7 @@ public: PartsRange select( const PartsRanges & parts_ranges, - const size_t max_total_size_to_merge) override; + size_t max_total_size_to_merge) override; private: const Settings settings; diff --git a/src/Storages/MergeTree/StorageFromMergeTreeDataPart.h b/src/Storages/MergeTree/StorageFromMergeTreeDataPart.h index 729b545e9a0..1dc1bd1eca4 100644 --- a/src/Storages/MergeTree/StorageFromMergeTreeDataPart.h +++ b/src/Storages/MergeTree/StorageFromMergeTreeDataPart.h @@ -80,7 +80,7 @@ public: protected: /// Used in part mutation. - StorageFromMergeTreeDataPart(const MergeTreeData::DataPartPtr & part_) + explicit StorageFromMergeTreeDataPart(const MergeTreeData::DataPartPtr & part_) : IStorage(getIDFromPart(part_)) , parts({part_}) , storage(part_->storage) diff --git a/src/Storages/MergeTree/TTLMergeSelector.cpp b/src/Storages/MergeTree/TTLMergeSelector.cpp index 6a42ce039ac..d5657aa680d 100644 --- a/src/Storages/MergeTree/TTLMergeSelector.cpp +++ b/src/Storages/MergeTree/TTLMergeSelector.cpp @@ -18,7 +18,7 @@ const String & getPartitionIdForPart(const ITTLMergeSelector::Part & part_info) IMergeSelector::PartsRange ITTLMergeSelector::select( const PartsRanges & parts_ranges, - const size_t max_total_size_to_merge) + size_t max_total_size_to_merge) { using Iterator = IMergeSelector::PartsRange::const_iterator; Iterator best_begin; diff --git a/src/Storages/MergeTree/TTLMergeSelector.h b/src/Storages/MergeTree/TTLMergeSelector.h index d41ba6f519d..88dc1fffee2 100644 --- a/src/Storages/MergeTree/TTLMergeSelector.h +++ b/src/Storages/MergeTree/TTLMergeSelector.h @@ -30,7 +30,7 @@ public: PartsRange select( const PartsRanges & parts_ranges, - const size_t max_total_size_to_merge) override; + size_t max_total_size_to_merge) override; /// Get TTL value for part, may depend on child type and some settings in /// constructor. diff --git a/src/Storages/PostgreSQL/MaterializedPostgreSQLConsumer.cpp b/src/Storages/PostgreSQL/MaterializedPostgreSQLConsumer.cpp index db040584536..5b963a544c8 100644 --- a/src/Storages/PostgreSQL/MaterializedPostgreSQLConsumer.cpp +++ b/src/Storages/PostgreSQL/MaterializedPostgreSQLConsumer.cpp @@ -645,6 +645,10 @@ void MaterializedPostgreSQLConsumer::addNested( assert(!storages.contains(postgres_table_name)); storages.emplace(postgres_table_name, nested_storage_info); + auto it = deleted_tables.find(postgres_table_name); + if (it != deleted_tables.end()) + deleted_tables.erase(it); + /// Replication consumer will read wall and check for currently processed table whether it is allowed to start applying /// changes to this table. waiting_list[postgres_table_name] = table_start_lsn; @@ -663,7 +667,9 @@ void MaterializedPostgreSQLConsumer::updateNested(const String & table_name, Sto void MaterializedPostgreSQLConsumer::removeNested(const String & postgres_table_name) { - storages.erase(postgres_table_name); + auto it = storages.find(postgres_table_name); + if (it != storages.end()) + storages.erase(it); deleted_tables.insert(postgres_table_name); } @@ -727,6 +733,7 @@ bool MaterializedPostgreSQLConsumer::readFromReplicationSlot() { if (e.code() == ErrorCodes::POSTGRESQL_REPLICATION_INTERNAL_ERROR) continue; + throw; } } diff --git a/src/Storages/PostgreSQL/MaterializedPostgreSQLConsumer.h b/src/Storages/PostgreSQL/MaterializedPostgreSQLConsumer.h index 41284b5b5a1..a01f9394190 100644 --- a/src/Storages/PostgreSQL/MaterializedPostgreSQLConsumer.h +++ b/src/Storages/PostgreSQL/MaterializedPostgreSQLConsumer.h @@ -126,7 +126,7 @@ private: static Int64 getLSNValue(const std::string & lsn) { UInt32 upper_half, lower_half; - std::sscanf(lsn.data(), "%X/%X", &upper_half, &lower_half); + std::sscanf(lsn.data(), "%X/%X", &upper_half, &lower_half); /// NOLINT return (static_cast(upper_half) << 32) + lower_half; } diff --git a/src/Storages/PostgreSQL/StorageMaterializedPostgreSQL.cpp b/src/Storages/PostgreSQL/StorageMaterializedPostgreSQL.cpp index c72dec824f0..582a568cb48 100644 --- a/src/Storages/PostgreSQL/StorageMaterializedPostgreSQL.cpp +++ b/src/Storages/PostgreSQL/StorageMaterializedPostgreSQL.cpp @@ -291,11 +291,11 @@ Pipe StorageMaterializedPostgreSQL::read( std::shared_ptr StorageMaterializedPostgreSQL::getMaterializedColumnsDeclaration( - const String name, const String type, UInt64 default_value) + String name, String type, UInt64 default_value) { auto column_declaration = std::make_shared(); - column_declaration->name = name; + column_declaration->name = std::move(name); column_declaration->type = makeASTFunction(type); column_declaration->default_specifier = "MATERIALIZED"; @@ -352,7 +352,7 @@ ASTPtr StorageMaterializedPostgreSQL::getColumnDeclaration(const DataTypePtr & d ast_expression->name = "DateTime64"; ast_expression->arguments = std::make_shared(); ast_expression->arguments->children.emplace_back(std::make_shared(UInt32(6))); - return std::move(ast_expression); + return ast_expression; } return std::make_shared(data_type->getName()); @@ -382,8 +382,6 @@ ASTPtr StorageMaterializedPostgreSQL::getCreateNestedTableQuery( PostgreSQLTableStructurePtr table_structure, const ASTTableOverride * table_override) { auto create_table_query = std::make_shared(); - if (table_override) - applyTableOverrideToCreateQuery(*table_override, create_table_query.get()); auto table_id = getStorageID(); create_table_query->setTable(getNestedTableName()); @@ -496,12 +494,37 @@ ASTPtr StorageMaterializedPostgreSQL::getCreateNestedTableQuery( constraints = metadata_snapshot->getConstraints(); } - columns_declare_list->columns->children.emplace_back(getMaterializedColumnsDeclaration("_sign", "Int8", 1)); - columns_declare_list->columns->children.emplace_back(getMaterializedColumnsDeclaration("_version", "UInt64", 1)); - create_table_query->set(create_table_query->columns_list, columns_declare_list); - create_table_query->set(create_table_query->storage, storage); + if (table_override) + { + if (auto * columns = table_override->columns) + { + if (columns->columns) + { + for (const auto & override_column_ast : columns->columns->children) + { + auto * override_column = override_column_ast->as(); + if (override_column->name == "_sign" || override_column->name == "_version") + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Cannot override _sign and _version column"); + } + } + } + + create_table_query->set(create_table_query->columns_list, columns_declare_list); + + applyTableOverrideToCreateQuery(*table_override, create_table_query.get()); + + create_table_query->columns_list->columns->children.emplace_back(getMaterializedColumnsDeclaration("_sign", "Int8", 1)); + create_table_query->columns_list->columns->children.emplace_back(getMaterializedColumnsDeclaration("_version", "UInt64", 1)); + } + else + { + columns_declare_list->columns->children.emplace_back(getMaterializedColumnsDeclaration("_sign", "Int8", 1)); + columns_declare_list->columns->children.emplace_back(getMaterializedColumnsDeclaration("_version", "UInt64", 1)); + create_table_query->set(create_table_query->columns_list, columns_declare_list); + } + /// Add columns _sign and _version, so that they can be accessed from nested ReplacingMergeTree table if needed. ordinary_columns_and_types.push_back({"_sign", std::make_shared()}); ordinary_columns_and_types.push_back({"_version", std::make_shared()}); @@ -511,7 +534,7 @@ ASTPtr StorageMaterializedPostgreSQL::getCreateNestedTableQuery( storage_metadata.setConstraints(constraints); setInMemoryMetadata(storage_metadata); - return std::move(create_table_query); + return create_table_query; } diff --git a/src/Storages/PostgreSQL/StorageMaterializedPostgreSQL.h b/src/Storages/PostgreSQL/StorageMaterializedPostgreSQL.h index ff9b95cad7c..e6ce3bbdf65 100644 --- a/src/Storages/PostgreSQL/StorageMaterializedPostgreSQL.h +++ b/src/Storages/PostgreSQL/StorageMaterializedPostgreSQL.h @@ -135,7 +135,7 @@ protected: private: static std::shared_ptr getMaterializedColumnsDeclaration( - const String name, const String type, UInt64 default_value); + String name, String type, UInt64 default_value); ASTPtr getColumnDeclaration(const DataTypePtr & data_type) const; diff --git a/src/Storages/RabbitMQ/RabbitMQHandler.h b/src/Storages/RabbitMQ/RabbitMQHandler.h index 8f355c4a0dc..25b32a29f58 100644 --- a/src/Storages/RabbitMQ/RabbitMQHandler.h +++ b/src/Storages/RabbitMQ/RabbitMQHandler.h @@ -7,6 +7,7 @@ #include #include #include +#include namespace DB { diff --git a/src/Storages/StorageBuffer.cpp b/src/Storages/StorageBuffer.cpp index c1f2e14da7c..ead0d6b1260 100644 --- a/src/Storages/StorageBuffer.cpp +++ b/src/Storages/StorageBuffer.cpp @@ -474,6 +474,14 @@ static void appendBlock(const Block & from, Block & to) const IColumn & col_from = *from.getByPosition(column_no).column.get(); last_col = IColumn::mutate(std::move(to.getByPosition(column_no).column)); + /// In case of ColumnAggregateFunction aggregate states will + /// be allocated from the query context but can be destroyed from the + /// server context (in case of background flush), and thus memory + /// will be leaked from the query, but only tracked memory, not + /// memory itself. + /// + /// To avoid this, prohibit sharing the aggregate states. + last_col->ensureOwnership(); last_col->insertRangeFrom(col_from, 0, rows); to.getByPosition(column_no).column = std::move(last_col); diff --git a/src/Storages/StorageMaterializedMySQL.h b/src/Storages/StorageMaterializedMySQL.h index 8ba98c3000f..ae874649b40 100644 --- a/src/Storages/StorageMaterializedMySQL.h +++ b/src/Storages/StorageMaterializedMySQL.h @@ -38,7 +38,7 @@ public: void drop() override { nested_storage->drop(); } private: - [[noreturn]] void throwNotAllowed() const + [[noreturn]] static void throwNotAllowed() { throw Exception("This method is not allowed for MaterializedMySQL", ErrorCodes::NOT_IMPLEMENTED); } diff --git a/src/Storages/StorageMongoDB.h b/src/Storages/StorageMongoDB.h index 16b85364c5c..0edfb558759 100644 --- a/src/Storages/StorageMongoDB.h +++ b/src/Storages/StorageMongoDB.h @@ -22,7 +22,7 @@ public: StorageMongoDB( const StorageID & table_id_, const std::string & host_, - short unsigned int port_, + uint16_t port_, const std::string & database_name_, const std::string & collection_name_, const std::string & username_, @@ -49,7 +49,7 @@ private: void connectIfNotConnected(); const std::string host; - const short unsigned int port; + const uint16_t port; /// NOLINT const std::string database_name; const std::string collection_name; const std::string username; diff --git a/src/Storages/StorageNull.h b/src/Storages/StorageNull.h index 5fef7f984e4..82baa98834d 100644 --- a/src/Storages/StorageNull.h +++ b/src/Storages/StorageNull.h @@ -61,11 +61,11 @@ protected: const StorageID & table_id_, ColumnsDescription columns_description_, ConstraintsDescription constraints_, const String & comment) : IStorage(table_id_) { - StorageInMemoryMetadata metadata_; - metadata_.setColumns(columns_description_); - metadata_.setConstraints(constraints_); - metadata_.setComment(comment); - setInMemoryMetadata(metadata_); + StorageInMemoryMetadata storage_metadata; + storage_metadata.setColumns(columns_description_); + storage_metadata.setConstraints(constraints_); + storage_metadata.setComment(comment); + setInMemoryMetadata(storage_metadata); } }; diff --git a/src/Storages/StorageReplicatedMergeTree.cpp b/src/Storages/StorageReplicatedMergeTree.cpp index c53a4963fbe..332abd83095 100644 --- a/src/Storages/StorageReplicatedMergeTree.cpp +++ b/src/Storages/StorageReplicatedMergeTree.cpp @@ -7150,9 +7150,9 @@ void StorageReplicatedMergeTree::createTableSharedID() if (!zookeeper->tryGet(zookeeper_table_id_path, id)) { UUID table_id_candidate; - auto storage_id = getStorageID(); - if (storage_id.uuid != UUIDHelpers::Nil) - table_id_candidate = storage_id.uuid; + auto local_storage_id = getStorageID(); + if (local_storage_id.uuid != UUIDHelpers::Nil) + table_id_candidate = local_storage_id.uuid; else table_id_candidate = UUIDHelpers::generateV4(); diff --git a/src/Storages/StorageReplicatedMergeTree.h b/src/Storages/StorageReplicatedMergeTree.h index 056671dc164..935bd048603 100644 --- a/src/Storages/StorageReplicatedMergeTree.h +++ b/src/Storages/StorageReplicatedMergeTree.h @@ -283,7 +283,7 @@ public: // Return table id, common for different replicas String getTableSharedID() const; - static const String getDefaultZooKeeperName() { return default_zookeeper_name; } + static String getDefaultZooKeeperName() { return default_zookeeper_name; } /// Check if there are new broken disks and enqueue part recovery tasks. void checkBrokenDisks(); @@ -837,6 +837,7 @@ String getPartNamePossiblyFake(MergeTreeDataFormatVersion format_version, const * PS. Perhaps it would be better to add a flag to the DataPart that a part is inserted into ZK. * But here it's too easy to get confused with the consistency of this flag. */ +/// NOLINTNEXTLINE #define MAX_AGE_OF_LOCAL_PART_THAT_WASNT_ADDED_TO_ZOOKEEPER (5 * 60) } diff --git a/src/Storages/StorageS3.cpp b/src/Storages/StorageS3.cpp index c685a542d13..ec506ad0cd0 100644 --- a/src/Storages/StorageS3.cpp +++ b/src/Storages/StorageS3.cpp @@ -233,7 +233,7 @@ StorageS3Source::StorageS3Source( const ColumnsDescription & columns_, UInt64 max_block_size_, UInt64 max_single_read_retries_, - const String compression_hint_, + String compression_hint_, const std::shared_ptr & client_, const String & bucket_, std::shared_ptr file_iterator_) @@ -245,7 +245,7 @@ StorageS3Source::StorageS3Source( , columns_desc(columns_) , max_block_size(max_block_size_) , max_single_read_retries(max_single_read_retries_) - , compression_hint(compression_hint_) + , compression_hint(std::move(compression_hint_)) , client(client_) , sample_block(sample_block_) , format_settings(format_settings_) diff --git a/src/Storages/StorageS3.h b/src/Storages/StorageS3.h index cecf7f50860..b2283687e2b 100644 --- a/src/Storages/StorageS3.h +++ b/src/Storages/StorageS3.h @@ -71,7 +71,7 @@ public: const ColumnsDescription & columns_, UInt64 max_block_size_, UInt64 max_single_read_retries_, - const String compression_hint_, + String compression_hint_, const std::shared_ptr & client_, const String & bucket, std::shared_ptr file_iterator_); diff --git a/src/Storages/StorageTableFunction.h b/src/Storages/StorageTableFunction.h index 0b7ab30fa24..8054762d389 100644 --- a/src/Storages/StorageTableFunction.h +++ b/src/Storages/StorageTableFunction.h @@ -6,6 +6,7 @@ #include #include #include +#include namespace DB @@ -148,7 +149,7 @@ public: if (nested) StorageProxy::renameInMemory(new_table_id); else - IStorage::renameInMemory(new_table_id); + IStorage::renameInMemory(new_table_id); /// NOLINT } bool isView() const override { return false; } diff --git a/src/Storages/StorageView.h b/src/Storages/StorageView.h index 5ca23434356..cd36a10aae7 100644 --- a/src/Storages/StorageView.h +++ b/src/Storages/StorageView.h @@ -40,7 +40,7 @@ public: size_t max_block_size, unsigned num_streams) override; - void replaceWithSubquery(ASTSelectQuery & select_query, ASTPtr & view_name, const StorageMetadataPtr & metadata_snapshot) const + static void replaceWithSubquery(ASTSelectQuery & select_query, ASTPtr & view_name, const StorageMetadataPtr & metadata_snapshot) { replaceWithSubquery(select_query, metadata_snapshot->getSelectQuery().inner_query->clone(), view_name); } diff --git a/src/Storages/System/IStorageSystemOneBlock.h b/src/Storages/System/IStorageSystemOneBlock.h index 33086498730..d78c8179a71 100644 --- a/src/Storages/System/IStorageSystemOneBlock.h +++ b/src/Storages/System/IStorageSystemOneBlock.h @@ -32,11 +32,11 @@ protected: virtual void fillData(MutableColumns & res_columns, ContextPtr context, const SelectQueryInfo & query_info) const = 0; public: - IStorageSystemOneBlock(const StorageID & table_id_) : IStorage(table_id_) + explicit IStorageSystemOneBlock(const StorageID & table_id_) : IStorage(table_id_) { - StorageInMemoryMetadata metadata_; - metadata_.setColumns(ColumnsDescription(Self::getNamesAndTypes(), Self::getNamesAndAliases())); - setInMemoryMetadata(metadata_); + StorageInMemoryMetadata storage_metadata; + storage_metadata.setColumns(ColumnsDescription(Self::getNamesAndTypes(), Self::getNamesAndAliases())); + setInMemoryMetadata(storage_metadata); } Pipe read( diff --git a/src/Storages/System/StorageSystemDataSkippingIndices.h b/src/Storages/System/StorageSystemDataSkippingIndices.h index d86890f5e27..4af2398a04b 100644 --- a/src/Storages/System/StorageSystemDataSkippingIndices.h +++ b/src/Storages/System/StorageSystemDataSkippingIndices.h @@ -26,7 +26,7 @@ public: bool isSystemStorage() const override { return true; } protected: - StorageSystemDataSkippingIndices(const StorageID & table_id_); + explicit StorageSystemDataSkippingIndices(const StorageID & table_id_); }; } diff --git a/src/Storages/System/StorageSystemDetachedParts.h b/src/Storages/System/StorageSystemDetachedParts.h index ece9d495500..51ee93a2f15 100644 --- a/src/Storages/System/StorageSystemDetachedParts.h +++ b/src/Storages/System/StorageSystemDetachedParts.h @@ -29,8 +29,8 @@ protected: SelectQueryInfo & query_info, ContextPtr context, QueryProcessingStage::Enum /*processed_stage*/, - const size_t /*max_block_size*/, - const unsigned /*num_streams*/) override; + size_t /*max_block_size*/, + unsigned /*num_streams*/) override; }; } diff --git a/src/Storages/System/StorageSystemDisks.h b/src/Storages/System/StorageSystemDisks.h index 2541dedd8fc..1404d6023d4 100644 --- a/src/Storages/System/StorageSystemDisks.h +++ b/src/Storages/System/StorageSystemDisks.h @@ -32,7 +32,7 @@ public: bool isSystemStorage() const override { return true; } protected: - StorageSystemDisks(const StorageID & table_id_); + explicit StorageSystemDisks(const StorageID & table_id_); }; } diff --git a/src/Storages/System/StorageSystemPartsBase.h b/src/Storages/System/StorageSystemPartsBase.h index edee6b7c9d9..9e50bada540 100644 --- a/src/Storages/System/StorageSystemPartsBase.h +++ b/src/Storages/System/StorageSystemPartsBase.h @@ -23,7 +23,7 @@ struct StoragesInfo bool need_inactive_parts = false; MergeTreeData * data = nullptr; - operator bool() const { return storage != nullptr; } + operator bool() const { return storage != nullptr; } /// NOLINT MergeTreeData::DataPartsVector getParts(MergeTreeData::DataPartStateVector & state, bool has_state_column, bool require_projection_parts = false) const; }; diff --git a/src/Storages/System/StorageSystemPartsColumns.h b/src/Storages/System/StorageSystemPartsColumns.h index be9533a6f70..9cdd2befb40 100644 --- a/src/Storages/System/StorageSystemPartsColumns.h +++ b/src/Storages/System/StorageSystemPartsColumns.h @@ -21,7 +21,7 @@ public: std::string getName() const override { return "SystemPartsColumns"; } protected: - StorageSystemPartsColumns(const StorageID & table_id_); + explicit StorageSystemPartsColumns(const StorageID & table_id_); void processNextStorage( ContextPtr context, MutableColumns & columns, std::vector & columns_mask, const StoragesInfo & info, bool has_state_column) override; }; diff --git a/src/Storages/System/StorageSystemProjectionPartsColumns.h b/src/Storages/System/StorageSystemProjectionPartsColumns.h index 61cc8179637..ade07b70a23 100644 --- a/src/Storages/System/StorageSystemProjectionPartsColumns.h +++ b/src/Storages/System/StorageSystemProjectionPartsColumns.h @@ -21,7 +21,7 @@ public: std::string getName() const override { return "SystemProjectionPartsColumns"; } protected: - StorageSystemProjectionPartsColumns(const StorageID & table_id_); + explicit StorageSystemProjectionPartsColumns(const StorageID & table_id_); void processNextStorage( ContextPtr context, MutableColumns & columns, std::vector & columns_mask, const StoragesInfo & info, bool has_state_column) override; }; diff --git a/src/Storages/System/StorageSystemReplicas.h b/src/Storages/System/StorageSystemReplicas.h index cf457efe250..500b4e97546 100644 --- a/src/Storages/System/StorageSystemReplicas.h +++ b/src/Storages/System/StorageSystemReplicas.h @@ -30,7 +30,7 @@ public: bool isSystemStorage() const override { return true; } protected: - StorageSystemReplicas(const StorageID & table_id_); + explicit StorageSystemReplicas(const StorageID & table_id_); }; } diff --git a/src/Storages/System/StorageSystemStackTrace.h b/src/Storages/System/StorageSystemStackTrace.h index a5827e32e6f..da4315d3ffa 100644 --- a/src/Storages/System/StorageSystemStackTrace.h +++ b/src/Storages/System/StorageSystemStackTrace.h @@ -27,7 +27,7 @@ public: String getName() const override { return "SystemStackTrace"; } static NamesAndTypesList getNamesAndTypes(); - StorageSystemStackTrace(const StorageID & table_id_); + explicit StorageSystemStackTrace(const StorageID & table_id_); protected: using IStorageSystemOneBlock::IStorageSystemOneBlock; diff --git a/src/Storages/System/StorageSystemStoragePolicies.h b/src/Storages/System/StorageSystemStoragePolicies.h index f202299db1f..28730ce33c4 100644 --- a/src/Storages/System/StorageSystemStoragePolicies.h +++ b/src/Storages/System/StorageSystemStoragePolicies.h @@ -32,7 +32,7 @@ public: bool isSystemStorage() const override { return true; } protected: - StorageSystemStoragePolicies(const StorageID & table_id_); + explicit StorageSystemStoragePolicies(const StorageID & table_id_); }; } diff --git a/src/Storages/System/StorageSystemTables.h b/src/Storages/System/StorageSystemTables.h index 808dc862e8d..23f3aedb164 100644 --- a/src/Storages/System/StorageSystemTables.h +++ b/src/Storages/System/StorageSystemTables.h @@ -30,7 +30,7 @@ public: bool isSystemStorage() const override { return true; } protected: - StorageSystemTables(const StorageID & table_id_); + explicit StorageSystemTables(const StorageID & table_id_); }; } diff --git a/src/TableFunctions/TableFunctionS3.cpp b/src/TableFunctions/TableFunctionS3.cpp index 1cce6c4b292..f844772983a 100644 --- a/src/TableFunctions/TableFunctionS3.cpp +++ b/src/TableFunctions/TableFunctionS3.cpp @@ -183,7 +183,7 @@ StoragePtr TableFunctionS3::executeImpl(const ASTPtr & /*ast_function*/, Context upload_part_size_multiply_parts_count_threshold, max_single_part_upload_size, max_connections, - getActualTableStructure(context), + columns, ConstraintsDescription{}, String{}, context, diff --git a/tests/ci/build_check.py b/tests/ci/build_check.py index 4ea97d68ded..c318e163689 100644 --- a/tests/ci/build_check.py +++ b/tests/ci/build_check.py @@ -240,6 +240,7 @@ def main(): "https://s3.amazonaws.com/clickhouse-builds/" + url.replace("+", "%2B").replace(" ", "%20") ) + success = len(build_urls) > 0 create_json_artifact( TEMP_PATH, build_name, @@ -247,9 +248,13 @@ def main(): build_urls, build_config, 0, - len(build_urls) > 0, + success, ) - return + # Fail build job if not successeded + if not success: + sys.exit(1) + else: + sys.exit(0) image_name = get_image_name(build_config) docker_image = get_image_with_version(IMAGES_PATH, image_name) diff --git a/tests/ci/ci_config.json b/tests/ci/ci_config.json deleted file mode 100644 index 19afdd172d5..00000000000 --- a/tests/ci/ci_config.json +++ /dev/null @@ -1,83 +0,0 @@ -{ - "build_config": [ - { - "compiler": "clang-13", - "build-type": "", - "sanitizer": "", - "package-type": "deb", - "bundled": "bundled", - "splitted": "unsplitted", - "alien_pkgs": true, - "tidy": "disable", - "with_coverage": false - }, - { - "compiler": "clang-13", - "build-type": "", - "sanitizer": "", - "package-type": "performance", - "bundled": "bundled", - "splitted": "unsplitted", - "tidy": "disable", - "with_coverage": false - }, - { - "compiler": "gcc-11", - "build-type": "", - "sanitizer": "", - "package-type": "binary", - "bundled": "bundled", - "splitted": "unsplitted", - "tidy": "disable", - "with_coverage": false - }, - { - "compiler": "clang-13", - "build-type": "", - "sanitizer": "", - "package-type": "binary", - "bundled": "bundled", - "splitted": "unsplitted", - "tidy": "disable", - "with_coverage": false - } - ], - "tests_config": { - "Testflows check": { - "required_build_properties": { - "compiler": "clang-13", - "package_type": "deb", - "build_type": "relwithdebuginfo", - "sanitizer": "none", - "bundled": "bundled", - "splitted": "unsplitted", - "clang-tidy": "disable", - "with_coverage": false - } - }, - "Release": { - "required_build_properties": { - "compiler": "clang-13", - "package_type": "deb", - "build_type": "relwithdebuginfo", - "sanitizer": "none", - "bundled": "bundled", - "splitted": "unsplitted", - "clang-tidy": "disable", - "with_coverage": false - } - }, - "ClickHouse Keeper Jepsen": { - "required_build_properties": { - "compiler": "clang-13", - "package_type": "binary", - "build_type": "relwithdebuginfo", - "sanitizer": "none", - "bundled": "bundled", - "splitted": "unsplitted", - "clang-tidy": "disable", - "with_coverage": false - } - } - } -} diff --git a/tests/config/config.d/s3_storage_policy_by_default.xml b/tests/config/config.d/s3_storage_policy_by_default.xml index 6ce997a2c16..b4a2d697c78 100644 --- a/tests/config/config.d/s3_storage_policy_by_default.xml +++ b/tests/config/config.d/s3_storage_policy_by_default.xml @@ -6,6 +6,8 @@ http://localhost:11111/test/test/ clickhouse clickhouse + 1 + 22548578304 diff --git a/tests/config/config.d/storage_conf.xml b/tests/config/config.d/storage_conf.xml new file mode 100644 index 00000000000..2e43f735605 --- /dev/null +++ b/tests/config/config.d/storage_conf.xml @@ -0,0 +1,23 @@ + + + + + s3 + http://localhost:11111/test/00170_test/ + clickhouse + clickhouse + 1 + 22548578304 + + + + + +
+ s3_cache +
+
+
+
+
+
diff --git a/tests/config/install.sh b/tests/config/install.sh index 3d0f6828430..4e8252f32e0 100755 --- a/tests/config/install.sh +++ b/tests/config/install.sh @@ -79,6 +79,15 @@ fi if [[ -n "$USE_DATABASE_ORDINARY" ]] && [[ "$USE_DATABASE_ORDINARY" -eq 1 ]]; then ln -sf $SRC_PATH/users.d/database_ordinary.xml $DEST_SERVER_PATH/users.d/ fi + +if [[ -n "$USE_S3_STORAGE_FOR_MERGE_TREE" ]] && [[ "$USE_S3_STORAGE_FOR_MERGE_TREE" -eq 1 ]]; then + ln -sf $SRC_PATH/config.d/s3_storage_policy_by_default.xml $DEST_SERVER_PATH/config.d/ +fi + +if [[ -n "$EXPORT_S3_STORAGE_POLICIES" ]]; then + ln -sf $SRC_PATH/config.d/storage_conf.xml $DEST_SERVER_PATH/config.d/ +fi + if [[ -n "$USE_DATABASE_REPLICATED" ]] && [[ "$USE_DATABASE_REPLICATED" -eq 1 ]]; then ln -sf $SRC_PATH/users.d/database_replicated.xml $DEST_SERVER_PATH/users.d/ ln -sf $SRC_PATH/config.d/database_replicated.xml $DEST_SERVER_PATH/config.d/ @@ -108,8 +117,4 @@ if [[ -n "$USE_DATABASE_REPLICATED" ]] && [[ "$USE_DATABASE_REPLICATED" -eq 1 ]] sudo chgrp clickhouse /var/lib/clickhouse2 fi -if [[ -n "$USE_S3_STORAGE_FOR_MERGE_TREE" ]] && [[ "$USE_S3_STORAGE_FOR_MERGE_TREE" -eq 1 ]]; then - ln -sf $SRC_PATH/config.d/s3_storage_policy_by_default.xml $DEST_SERVER_PATH/config.d/ -fi - ln -sf $SRC_PATH/client_config.xml $DEST_CLIENT_PATH/config.xml diff --git a/tests/integration/test_materialized_mysql_database/materialize_with_ddl.py b/tests/integration/test_materialized_mysql_database/materialize_with_ddl.py index 0f66fd5dcdf..fef2b8a6ffb 100644 --- a/tests/integration/test_materialized_mysql_database/materialize_with_ddl.py +++ b/tests/integration/test_materialized_mysql_database/materialize_with_ddl.py @@ -1218,4 +1218,28 @@ def materialized_database_settings_materialized_mysql_tables_list(clickhouse_nod check_query(clickhouse_node, "SELECT COUNT() FROM test_database.c FORMAT TSV", "2\n") clickhouse_node.query("DROP DATABASE test_database") - mysql_node.query("DROP DATABASE test_database") \ No newline at end of file + mysql_node.query("DROP DATABASE test_database") + + +def materialized_database_mysql_date_type_to_date32(clickhouse_node, mysql_node, service_name): + mysql_node.query("DROP DATABASE IF EXISTS test_database") + clickhouse_node.query("DROP DATABASE IF EXISTS test_database") + mysql_node.query("CREATE DATABASE test_database") + mysql_node.query("CREATE TABLE test_database.a (a INT(11) NOT NULL PRIMARY KEY, b date DEFAULT NULL)") + # can't support date that less than 1925 year for now + mysql_node.query("INSERT INTO test_database.a VALUES(1, '1900-04-16')") + # test date that is older than 1925 + mysql_node.query("INSERT INTO test_database.a VALUES(2, '1925-03-16')") + mysql_node.query("INSERT INTO test_database.a VALUES(3, '1971-02-16')") + mysql_node.query("INSERT INTO test_database.a VALUES(4, '2101-05-16')") + + clickhouse_node.query("CREATE DATABASE test_database ENGINE = MaterializedMySQL('{}:3306', 'test_database', 'root', 'clickhouse')".format(service_name)) + check_query(clickhouse_node, "SELECT b from test_database.a order by a FORMAT TSV", "1970-01-01\n1925-03-16\n1971-02-16\n2101-05-16\n") + + mysql_node.query("INSERT INTO test_database.a VALUES(5, '1925-04-16')") + mysql_node.query("INSERT INTO test_database.a VALUES(6, '2022-02-16')") + mysql_node.query("INSERT INTO test_database.a VALUES(7, '2283-11-11')") + + check_query(clickhouse_node, "SELECT b from test_database.a order by a FORMAT TSV", "1970-01-01\n1925-03-16\n1971-02-16\n2101-05-16\n1925-04-16\n2022-02-16\n" + + "2283-11-11\n") + diff --git a/tests/integration/test_materialized_mysql_database/test.py b/tests/integration/test_materialized_mysql_database/test.py index 3f9d4d5ce40..027f874596d 100644 --- a/tests/integration/test_materialized_mysql_database/test.py +++ b/tests/integration/test_materialized_mysql_database/test.py @@ -261,3 +261,7 @@ def test_materialized_database_support_all_kinds_of_mysql_datatype(started_clust def test_materialized_database_settings_materialized_mysql_tables_list(started_cluster, started_mysql_8_0, started_mysql_5_7, clickhouse_node): materialize_with_ddl.materialized_database_settings_materialized_mysql_tables_list(clickhouse_node, started_mysql_8_0, "mysql80") materialize_with_ddl.materialized_database_settings_materialized_mysql_tables_list(clickhouse_node, started_mysql_5_7, "mysql57") + +def test_materialized_database_mysql_date_type_to_date32(started_cluster, started_mysql_8_0, started_mysql_5_7, clickhouse_node): + materialize_with_ddl.materialized_database_mysql_date_type_to_date32(clickhouse_node, started_mysql_8_0, "mysql80") + materialize_with_ddl.materialized_database_mysql_date_type_to_date32(clickhouse_node, started_mysql_5_7, "mysql57") \ No newline at end of file diff --git a/tests/integration/test_merge_tree_s3/configs/config.d/storage_conf.xml b/tests/integration/test_merge_tree_s3/configs/config.d/storage_conf.xml index a0fe0a6f609..2f1b8275a0b 100644 --- a/tests/integration/test_merge_tree_s3/configs/config.d/storage_conf.xml +++ b/tests/integration/test_merge_tree_s3/configs/config.d/storage_conf.xml @@ -19,6 +19,14 @@ local / + + s3 + http://minio1:9001/root/data/ + minio + minio123 + 33554432 + 1 +
@@ -38,6 +46,13 @@ + + +
+ s3_with_cache +
+
+
diff --git a/tests/integration/test_mysql_database_engine/test.py b/tests/integration/test_mysql_database_engine/test.py index ff1c955d78b..35d6d6e72b6 100644 --- a/tests/integration/test_mysql_database_engine/test.py +++ b/tests/integration/test_mysql_database_engine/test.py @@ -235,6 +235,9 @@ int8_values = [0, 1, -1, 127, -128] uint8_values = [0, 1, 255] # string_values = ["'ClickHouse'", 'NULL'] string_values = ["'ClickHouse'"] +date_values=["'1970-01-01'"] +date2Date32_values=["'1925-01-01'", "'2283-11-11'"] +date2String_values=["'1000-01-01'", "'9999-12-31'"] decimal_values = [0, 0.123, 0.4, 5.67, 8.91011, 123456789.123, -0.123, -0.4, -5.67, -8.91011, -123456789.123] @@ -274,6 +277,9 @@ timestamp_values_no_subsecond = ["'2015-05-18 07:40:01'", "'2019-09-16 19:20:11' pytest.param("common_types", "VARCHAR(10)", "Nullable(String)", string_values, "", id="common_types_20"), + pytest.param("common_types", "DATE", "Nullable(Date)", date_values, "", id="common_types_21"), + pytest.param("common_types", "DATE", "Nullable(Date32)", date2Date32_values, "date2Date32", id="common_types_22"), + pytest.param("common_types", "DATE", "Nullable(String)", date2String_values, "date2String", id="common_types_23"), pytest.param("decimal_default", "decimal NOT NULL", "Decimal(10, 0)", decimal_values, "decimal,datetime64", id="decimal_1"), diff --git a/tests/integration/test_part_log_table/configs/config_disk_name_test.xml b/tests/integration/test_part_log_table/configs/config_disk_name_test.xml new file mode 100644 index 00000000000..c8831031674 --- /dev/null +++ b/tests/integration/test_part_log_table/configs/config_disk_name_test.xml @@ -0,0 +1,30 @@ + + + + + local + /path1/ + + + local + /path2/ + + + + + +
+ test1 +
+
+
+ + +
+ test2 +
+
+
+
+
+
diff --git a/tests/integration/test_part_log_table/test.py b/tests/integration/test_part_log_table/test.py index 050e8c831c7..eba909acf4a 100644 --- a/tests/integration/test_part_log_table/test.py +++ b/tests/integration/test_part_log_table/test.py @@ -6,6 +6,7 @@ cluster = ClickHouseCluster(__file__) node1 = cluster.add_instance("node1", main_configs=["configs/config_without_standard_part_log.xml"]) node2 = cluster.add_instance("node2", main_configs=["configs/config_with_standard_part_log.xml"]) node3 = cluster.add_instance("node3", main_configs=["configs/config_with_non_standard_part_log.xml"]) +node4 = cluster.add_instance("node4", main_configs=["configs/config_disk_name_test.xml"]) @pytest.fixture(scope="module") @@ -40,3 +41,11 @@ def test_config_with_non_standard_part_log(start_cluster): node3.query("INSERT INTO test_table VALUES ('name', 1)") node3.query("SYSTEM FLUSH LOGS") assert node3.query("SELECT * FROM system.own_part_log") != "" + +def test_config_disk_name_test(start_cluster): + node4.query("CREATE TABLE test_table1(word String, value UInt64) ENGINE = MergeTree() ORDER BY word SETTINGS storage_policy = 'test1'") + node4.query("INSERT INTO test_table1(*) VALUES ('test1', 2)") + node4.query("CREATE TABLE test_table2(word String, value UInt64) ENGINE = MergeTree() ORDER BY word SETTINGS storage_policy = 'test2'") + node4.query("INSERT INTO test_table2(*) VALUES ('test2', 3)") + node4.query("SYSTEM FLUSH LOGS") + assert node4.query("SELECT DISTINCT disk_name FROM system.part_log ORDER by disk_name") == "test1\ntest2\n" diff --git a/tests/integration/test_postgresql_replica_database_engine_2/test.py b/tests/integration/test_postgresql_replica_database_engine_2/test.py index 3226c040e8e..0115988222c 100644 --- a/tests/integration/test_postgresql_replica_database_engine_2/test.py +++ b/tests/integration/test_postgresql_replica_database_engine_2/test.py @@ -114,6 +114,8 @@ def test_add_new_table_to_replication(started_cluster): assert(result[:63] == "CREATE DATABASE test_database\\nENGINE = MaterializedPostgreSQL(") assert(result[-222:] == ")\\nSETTINGS materialized_postgresql_tables_list = \\'postgresql_replica_0,postgresql_replica_1,postgresql_replica_2,postgresql_replica_3,postgresql_replica_4,postgresql_replica_5,postgresql_replica_6,postgresql_replica_7\\'\n") + instance.query(f"INSERT INTO postgres_database.{table_name} SELECT number, number from numbers(10000, 10000)") + result = instance.query("SHOW TABLES FROM test_database") assert(result == "postgresql_replica_0\npostgresql_replica_1\npostgresql_replica_2\npostgresql_replica_3\npostgresql_replica_4\npostgresql_replica_5\npostgresql_replica_6\npostgresql_replica_7\n") check_several_tables_are_synchronized(instance, NUM_TABLES + 3) @@ -133,7 +135,7 @@ def test_remove_table_from_replication(started_cluster): assert(result[-59:] == "\\'postgres_database\\', \\'postgres\\', \\'mysecretpassword\\')\n") table_name = 'postgresql_replica_4' - instance.query(f'DETACH TABLE test_database.{table_name}'); + instance.query(f'DETACH TABLE test_database.{table_name} PERMANENTLY'); result = instance.query_and_get_error(f'SELECT * FROM test_database.{table_name}') assert("doesn't exist" in result) @@ -147,13 +149,15 @@ def test_remove_table_from_replication(started_cluster): instance.query(f'ATTACH TABLE test_database.{table_name}'); check_tables_are_synchronized(instance, table_name); check_several_tables_are_synchronized(instance, NUM_TABLES) + instance.query(f"INSERT INTO postgres_database.{table_name} SELECT number, number from numbers(10000, 10000)") + check_tables_are_synchronized(instance, table_name); result = instance.query('SHOW CREATE DATABASE test_database') assert(result[:63] == "CREATE DATABASE test_database\\nENGINE = MaterializedPostgreSQL(") assert(result[-159:] == ")\\nSETTINGS materialized_postgresql_tables_list = \\'postgresql_replica_0,postgresql_replica_1,postgresql_replica_2,postgresql_replica_3,postgresql_replica_4\\'\n") table_name = 'postgresql_replica_1' - instance.query(f'DETACH TABLE test_database.{table_name}'); + instance.query(f'DETACH TABLE test_database.{table_name} PERMANENTLY'); result = instance.query('SHOW CREATE DATABASE test_database') assert(result[:63] == "CREATE DATABASE test_database\\nENGINE = MaterializedPostgreSQL(") assert(result[-138:] == ")\\nSETTINGS materialized_postgresql_tables_list = \\'postgresql_replica_0,postgresql_replica_2,postgresql_replica_3,postgresql_replica_4\\'\n") @@ -162,7 +166,7 @@ def test_remove_table_from_replication(started_cluster): cursor.execute(f'drop table if exists postgresql_replica_0;') # Removing from replication table which does not exist in PostgreSQL must be ok. - instance.query('DETACH TABLE test_database.postgresql_replica_0'); + instance.query('DETACH TABLE test_database.postgresql_replica_0 PERMANENTLY'); assert instance.contains_in_log("from publication, because table does not exist in PostgreSQL") @@ -236,7 +240,7 @@ def test_database_with_single_non_default_schema(started_cluster): print('DETACH-ATTACH') detached_table_name = "postgresql_replica_1" - instance.query(f"DETACH TABLE {materialized_db}.{detached_table_name}") + instance.query(f"DETACH TABLE {materialized_db}.{detached_table_name} PERMANENTLY") assert not instance.contains_in_log("from publication, because table does not exist in PostgreSQL") instance.query(f"ATTACH TABLE {materialized_db}.{detached_table_name}") check_tables_are_synchronized(instance, detached_table_name, postgres_database=clickhouse_postgres_db); @@ -306,7 +310,7 @@ def test_database_with_multiple_non_default_schemas_1(started_cluster): print('DETACH-ATTACH') detached_table_name = "postgresql_replica_1" - instance.query(f"DETACH TABLE {materialized_db}.`{schema_name}.{detached_table_name}`") + instance.query(f"DETACH TABLE {materialized_db}.`{schema_name}.{detached_table_name}` PERMANENTLY") assert not instance.contains_in_log("from publication, because table does not exist in PostgreSQL") instance.query(f"ATTACH TABLE {materialized_db}.`{schema_name}.{detached_table_name}`") assert_show_tables("test_schema.postgresql_replica_0\ntest_schema.postgresql_replica_1\ntest_schema.postgresql_replica_2\ntest_schema.postgresql_replica_3\ntest_schema.postgresql_replica_4\n") @@ -385,7 +389,7 @@ def test_database_with_multiple_non_default_schemas_2(started_cluster): detached_table_name = "postgresql_replica_1" detached_table_schema = "schema0" clickhouse_postgres_db = f'clickhouse_postgres_db0' - instance.query(f"DETACH TABLE {materialized_db}.`{detached_table_schema}.{detached_table_name}`") + instance.query(f"DETACH TABLE {materialized_db}.`{detached_table_schema}.{detached_table_name}` PERMANENTLY") assert not instance.contains_in_log("from publication, because table does not exist in PostgreSQL") instance.query(f"ATTACH TABLE {materialized_db}.`{detached_table_schema}.{detached_table_name}`") assert_show_tables("schema0.postgresql_replica_0\nschema0.postgresql_replica_1\nschema1.postgresql_replica_0\nschema1.postgresql_replica_1\n") @@ -399,7 +403,7 @@ def test_table_override(started_cluster): create_postgres_table(cursor, table_name, template=postgres_table_template_5); instance.query(f"create table {table_name}(key Int32, value UUID) engine = PostgreSQL (postgres1, table={table_name})") instance.query(f"insert into {table_name} select number, generateUUIDv4() from numbers(10)") - table_overrides = f" TABLE OVERRIDE {table_name} (COLUMNS (key Int32, value UUID))" + table_overrides = f" TABLE OVERRIDE {table_name} (COLUMNS (key Int32, value UUID) PARTITION BY key)" pg_manager.create_materialized_db( ip=started_cluster.postgres_ip, port=started_cluster.postgres_port, settings=[f"materialized_postgresql_tables_list = '{table_name}'"], @@ -407,7 +411,7 @@ def test_table_override(started_cluster): assert_nested_table_is_created(instance, table_name, materialized_database) result = instance.query(f"show create table {materialized_database}.{table_name}") print(result) - expected = "CREATE TABLE test_database.table_override\\n(\\n `key` Int32,\\n `value` UUID,\\n `_sign` Int8() MATERIALIZED 1,\\n `_version` UInt64() MATERIALIZED 1\\n)\\nENGINE = ReplacingMergeTree(_version)\\nORDER BY tuple(key)" + expected = "CREATE TABLE test_database.table_override\\n(\\n `key` Int32,\\n `value` UUID,\\n `_sign` Int8() MATERIALIZED 1,\\n `_version` UInt64() MATERIALIZED 1\\n)\\nENGINE = ReplacingMergeTree(_version)\\nPARTITION BY key\\nORDER BY tuple(key)" assert(result.strip() == expected) time.sleep(5) query = f"select * from {materialized_database}.{table_name} order by key" diff --git a/tests/integration/test_storage_s3/test.py b/tests/integration/test_storage_s3/test.py index b2c7bbc1510..fa183a365b1 100644 --- a/tests/integration/test_storage_s3/test.py +++ b/tests/integration/test_storage_s3/test.py @@ -1068,9 +1068,9 @@ def test_insert_select_schema_inference(started_cluster): bucket = started_cluster.minio_bucket instance = started_cluster.instances["dummy"] - instance.query(f"insert into function s3('http://{started_cluster.minio_host}:{started_cluster.minio_port}/{bucket}/test.arrow') select toUInt64(1) as x settings s3_truncate_on_insert=1") - result = instance.query(f"desc s3('http://{started_cluster.minio_host}:{started_cluster.minio_port}/{bucket}/test.arrow')") + instance.query(f"insert into function s3('http://{started_cluster.minio_host}:{started_cluster.minio_port}/{bucket}/test_insert_select.native') select toUInt64(1) as x") + result = instance.query(f"desc s3('http://{started_cluster.minio_host}:{started_cluster.minio_port}/{bucket}/test_insert_select.native')") assert(result.strip() == 'x\tUInt64') - result = instance.query(f"select * from s3('http://{started_cluster.minio_host}:{started_cluster.minio_port}/{bucket}/test.arrow')") + result = instance.query(f"select * from s3('http://{started_cluster.minio_host}:{started_cluster.minio_port}/{bucket}/test_insert_select.native')") assert(int(result) == 1) diff --git a/tests/performance/date_time_long.xml b/tests/performance/date_time_long.xml index 0c3d85f9659..f210c807b12 100644 --- a/tests/performance/date_time_long.xml +++ b/tests/performance/date_time_long.xml @@ -83,7 +83,7 @@ time_zone UTC - Europe/Moscow + Asia/Istanbul Asia/Kolkata diff --git a/tests/performance/date_time_short.xml b/tests/performance/date_time_short.xml index 826e1619ab7..de859710670 100644 --- a/tests/performance/date_time_short.xml +++ b/tests/performance/date_time_short.xml @@ -18,7 +18,7 @@ time_zone - Europe/Moscow + Asia/Istanbul diff --git a/tests/performance/generate_table_function.xml b/tests/performance/generate_table_function.xml index bc49a7de1bd..c219d73b6cf 100644 --- a/tests/performance/generate_table_function.xml +++ b/tests/performance/generate_table_function.xml @@ -4,8 +4,8 @@ SELECT sum(NOT ignore(*)) FROM (SELECT * FROM generateRandom('i Enum8(\'hello\' = 1, \'world\' = 5)', 0, 10, 10) LIMIT 1000000000); SELECT sum(NOT ignore(*)) FROM (SELECT * FROM generateRandom('i Array(Nullable(Enum8(\'hello\' = 1, \'world\' = 5)))', 0, 10, 10) LIMIT 100000000); SELECT sum(NOT ignore(*)) FROM (SELECT * FROM generateRandom('i Nullable(Enum16(\'h\' = 1, \'w\' = 5 , \'o\' = -200))', 0, 10, 10) LIMIT 1000000000); - SELECT sum(NOT ignore(*)) FROM (SELECT * FROM generateRandom('d Date, dt DateTime, dtm DateTime(\'Europe/Moscow\')', 0, 10, 10) LIMIT 1000000000); - SELECT sum(NOT ignore(*)) FROM (SELECT * FROM generateRandom('dt64 DateTime64, dts64 DateTime64(6), dtms64 DateTime64(6 ,\'Europe/Moscow\')', 0, 10, 10) LIMIT 100000000); + SELECT sum(NOT ignore(*)) FROM (SELECT * FROM generateRandom('d Date, dt DateTime, dtm DateTime(\'Asia/Istanbul\')', 0, 10, 10) LIMIT 1000000000); + SELECT sum(NOT ignore(*)) FROM (SELECT * FROM generateRandom('dt64 DateTime64, dts64 DateTime64(6), dtms64 DateTime64(6 ,\'Asia/Istanbul\')', 0, 10, 10) LIMIT 100000000); SELECT sum(NOT ignore(*)) FROM (SELECT * FROM generateRandom('f32 Float32, f64 Float64', 0, 10, 10) LIMIT 1000000000); SELECT sum(NOT ignore(*)) FROM (SELECT * FROM generateRandom('d32 Decimal32(4), d64 Decimal64(8), d128 Decimal128(16)', 0, 10, 10) LIMIT 1000000000); SELECT sum(NOT ignore(*)) FROM (SELECT * FROM generateRandom('i Tuple(Int32, Int64)', 0, 10, 10) LIMIT 1000000000); diff --git a/tests/performance/merge_tree_insert.xml b/tests/performance/merge_tree_insert.xml new file mode 100644 index 00000000000..1e987d27d50 --- /dev/null +++ b/tests/performance/merge_tree_insert.xml @@ -0,0 +1,41 @@ + + + + + integer_primary_key_table_name + + merge_tree_insert_1 + merge_tree_insert_2 + merge_tree_insert_3 + + + + + string_primary_key_table_name + + merge_tree_insert_4 + merge_tree_insert_5 + merge_tree_insert_6 + + + + + CREATE TABLE merge_tree_insert_1 (value_1 UInt64, value_2 UInt64, value_3 UInt64) ENGINE = MergeTree ORDER BY (value_1) + CREATE TABLE merge_tree_insert_2 (value_1 UInt64, value_2 UInt64, value_3 UInt64) ENGINE = MergeTree ORDER BY (value_1, value_2) + CREATE TABLE merge_tree_insert_3 (value_1 UInt64, value_2 UInt64, value_3 UInt64) ENGINE = MergeTree ORDER BY (value_1, value_2, value_3) + CREATE TABLE merge_tree_insert_4 (value_1 String, value_2 String, value_3 String) ENGINE = MergeTree ORDER BY (value_1) + CREATE TABLE merge_tree_insert_5 (value_1 String, value_2 String, value_3 String) ENGINE = MergeTree ORDER BY (value_1, value_2) + CREATE TABLE merge_tree_insert_6 (value_1 String, value_2 String, value_3 String) ENGINE = MergeTree ORDER BY (value_1, value_2, value_3) + + INSERT INTO {integer_primary_key_table_name} SELECT rand64(0), rand64(1), rand64(2) FROM system.numbers LIMIT 500000 + INSERT INTO {integer_primary_key_table_name} SELECT rand64(0), rand64(1), rand64(2) FROM system.numbers LIMIT 1000000 + INSERT INTO {integer_primary_key_table_name} SELECT rand64(0), rand64(1), rand64(2) FROM system.numbers LIMIT 1500000 + + INSERT INTO {string_primary_key_table_name} SELECT toString(rand64(0)), toString(rand64(1)), toString(rand64(2)) FROM system.numbers LIMIT 500000 + INSERT INTO {string_primary_key_table_name} SELECT toString(rand64(0)), toString(rand64(1)), toString(rand64(2)) FROM system.numbers LIMIT 1000000 + INSERT INTO {string_primary_key_table_name} SELECT toString(rand64(0)), toString(rand64(1)), toString(rand64(2)) FROM system.numbers LIMIT 1500000 + + DROP TABLE IF EXISTS {integer_primary_key_table_name} + DROP TABLE IF EXISTS {string_primary_key_table_name} + + diff --git a/tests/queries/0_stateless/00632_get_sample_block_cache.sql b/tests/queries/0_stateless/00632_get_sample_block_cache.sql index 3c1b7ed70e4..f9b241bbf1e 100644 --- a/tests/queries/0_stateless/00632_get_sample_block_cache.sql +++ b/tests/queries/0_stateless/00632_get_sample_block_cache.sql @@ -1,3 +1,5 @@ +-- Tags: long + SET joined_subquery_requires_alias = 0; -- This test (SELECT) without cache can take tens minutes diff --git a/tests/queries/0_stateless/01162_strange_mutations.sh b/tests/queries/0_stateless/01162_strange_mutations.sh index fecb1b8d8c0..c759d113f84 100755 --- a/tests/queries/0_stateless/01162_strange_mutations.sh +++ b/tests/queries/0_stateless/01162_strange_mutations.sh @@ -11,7 +11,7 @@ $CLICKHOUSE_CLIENT -q "CREATE OR REPLACE VIEW t1 AS SELECT number * 10 AS id, nu for engine in "${engines[@]}" do $CLICKHOUSE_CLIENT -q "drop table if exists t" - $CLICKHOUSE_CLIENT -q "create table t (n int) engine=$engine" + $CLICKHOUSE_CLIENT -q "create table t (n int) engine=$engine" 2>&1| grep -Ev "Removing leftovers from table|removed by another replica" $CLICKHOUSE_CLIENT -q "select engine from system.tables where database=currentDatabase() and name='t'" $CLICKHOUSE_CLIENT -q "insert into t values (1)" $CLICKHOUSE_CLIENT -q "insert into t values (2)" @@ -25,7 +25,7 @@ do $CLICKHOUSE_CLIENT -q "drop table t" $CLICKHOUSE_CLIENT -q "drop table if exists test" - $CLICKHOUSE_CLIENT -q "CREATE TABLE test ENGINE=$engine AS SELECT number + 100 AS n, 0 AS test FROM numbers(50)" + $CLICKHOUSE_CLIENT -q "CREATE TABLE test ENGINE=$engine AS SELECT number + 100 AS n, 0 AS test FROM numbers(50)" 2>&1| grep -Ev "Removing leftovers from table|removed by another replica" $CLICKHOUSE_CLIENT -q "select count(), sum(n), sum(test) from test" if [[ $engine == *"ReplicatedMergeTree"* ]]; then $CLICKHOUSE_CLIENT -q "ALTER TABLE test diff --git a/tests/queries/0_stateless/01283_max_threads_simple_query_optimization.sql b/tests/queries/0_stateless/01283_max_threads_simple_query_optimization.sql index 61db4376c91..59d8605ba1c 100644 --- a/tests/queries/0_stateless/01283_max_threads_simple_query_optimization.sql +++ b/tests/queries/0_stateless/01283_max_threads_simple_query_optimization.sql @@ -1,5 +1,7 @@ DROP TABLE IF EXISTS data_01283; +set remote_filesystem_read_method='read'; + CREATE TABLE data_01283 engine=MergeTree() ORDER BY key PARTITION BY key diff --git a/tests/queries/0_stateless/01323_too_many_threads_bug.sql b/tests/queries/0_stateless/01323_too_many_threads_bug.sql index 6033fe66cd3..5dbb5aca2ec 100644 --- a/tests/queries/0_stateless/01323_too_many_threads_bug.sql +++ b/tests/queries/0_stateless/01323_too_many_threads_bug.sql @@ -1,5 +1,7 @@ drop table if exists table_01323_many_parts; +set remote_filesystem_read_method='read'; + create table table_01323_many_parts (x UInt64) engine = MergeTree order by x partition by x % 100; set max_partitions_per_insert_block = 100; insert into table_01323_many_parts select number from numbers(100000); diff --git a/tests/queries/bugs/01482_move_to_prewhere_and_cast.reference b/tests/queries/0_stateless/01482_move_to_prewhere_and_cast.reference similarity index 100% rename from tests/queries/bugs/01482_move_to_prewhere_and_cast.reference rename to tests/queries/0_stateless/01482_move_to_prewhere_and_cast.reference diff --git a/tests/queries/bugs/01482_move_to_prewhere_and_cast.sql b/tests/queries/0_stateless/01482_move_to_prewhere_and_cast.sql similarity index 91% rename from tests/queries/bugs/01482_move_to_prewhere_and_cast.sql rename to tests/queries/0_stateless/01482_move_to_prewhere_and_cast.sql index b81cf585b13..282363dcdd7 100644 --- a/tests/queries/bugs/01482_move_to_prewhere_and_cast.sql +++ b/tests/queries/0_stateless/01482_move_to_prewhere_and_cast.sql @@ -1,6 +1,3 @@ --- Tags: no-polymorphic-parts --- Tag no-polymorphic-parts: bug, shoud be fixed - DROP TABLE IF EXISTS APPLICATION; DROP TABLE IF EXISTS DATABASE_IO; @@ -22,9 +19,9 @@ ORDER BY Date; insert into table DATABASE_IO values ('AppA', 'BaseA', '2020-01-01 00:00:00', 1000); SELECT `APPLICATION`.`Name` AS `App`, - CAST(CAST(`DATABASE_IO`.`Date` AS DATE) AS DATE) AS `date` + CAST(CAST(`DATABASE_IO`.`Date` AS DATE) AS DATE) AS `date` FROM `DATABASE_IO` -INNER +INNER JOIN `APPLICATION` ON (`DATABASE_IO`.`Base` = `APPLICATION`.`Base`) WHERE ( CAST(CAST(`DATABASE_IO`.`Date` AS DATE) AS TIMESTAMP) >= toDateTime('2020-01-01 00:00:00') diff --git a/tests/queries/0_stateless/01506_buffer_table_alter_block_structure_2.sql b/tests/queries/0_stateless/01506_buffer_table_alter_block_structure_2.sql index 8862037c82b..f9c227942ac 100644 --- a/tests/queries/0_stateless/01506_buffer_table_alter_block_structure_2.sql +++ b/tests/queries/0_stateless/01506_buffer_table_alter_block_structure_2.sql @@ -5,12 +5,11 @@ CREATE TABLE buf_dest (timestamp DateTime) ENGINE = MergeTree PARTITION BY toYYYYMMDD(timestamp) ORDER BY (timestamp); -CREATE TABLE buf (timestamp DateTime) Engine = Buffer(currentDatabase(), buf_dest, 16, 0.1, 0.1, 2000000, 20000000, 100000000, 300000000);; +CREATE TABLE buf (timestamp DateTime) Engine = Buffer(currentDatabase(), buf_dest, 16, 86400, 86400, 2000000, 20000000, 100000000, 300000000);; INSERT INTO buf (timestamp) VALUES (toDateTime('2020-01-01 00:05:00')); ---- wait for buffer to flush -SELECT sleep(1) from numbers(1) settings max_block_size=1 format Null; +OPTIMIZE TABLE buf; ALTER TABLE buf_dest ADD COLUMN s String; ALTER TABLE buf ADD COLUMN s String; diff --git a/tests/queries/0_stateless/01524_do_not_merge_across_partitions_select_final.sql b/tests/queries/0_stateless/01524_do_not_merge_across_partitions_select_final.sql index 25c47c008bd..ca9f296b6bf 100644 --- a/tests/queries/0_stateless/01524_do_not_merge_across_partitions_select_final.sql +++ b/tests/queries/0_stateless/01524_do_not_merge_across_partitions_select_final.sql @@ -2,7 +2,7 @@ DROP TABLE IF EXISTS select_final; SET do_not_merge_across_partitions_select_final = 1; -CREATE TABLE select_final (t DateTime, x Int32, string String) ENGINE = ReplacingMergeTree() PARTITION BY toYYYYMM(t) ORDER BY (x, t); +CREATE TABLE select_final (t DateTime, x Int32, string String) ENGINE = ReplacingMergeTree() PARTITION BY toYYYYMM(t) ORDER BY (x, t); INSERT INTO select_final SELECT toDate('2000-01-01'), number, '' FROM numbers(2); INSERT INTO select_final SELECT toDate('2000-01-01'), number + 1, '' FROM numbers(2); @@ -31,6 +31,8 @@ INSERT INTO select_final SELECT toDate('2000-01-01'), number, '' FROM numbers(50 OPTIMIZE TABLE select_final FINAL; +SET remote_filesystem_read_method = 'read'; + SELECT max(x) FROM select_final FINAL; SYSTEM FLUSH LOGS; diff --git a/tests/queries/0_stateless/01591_window_functions.reference b/tests/queries/0_stateless/01591_window_functions.reference index 8af2c4c6b25..655232fcdd4 100644 --- a/tests/queries/0_stateless/01591_window_functions.reference +++ b/tests/queries/0_stateless/01591_window_functions.reference @@ -1141,6 +1141,28 @@ from ( from numbers_mt(10000) ) settings max_block_size = 7; 49995000 +-- a test with aggregate function which is -state type +select bitmapCardinality(bs) +from + ( + select groupBitmapMergeState(bm) over (order by k asc rows between unbounded preceding and current row) as bs + from + ( + select + groupBitmapState(number) as bm, k + from + ( + select + number, + number % 3 as k + from numbers(3) + ) + group by k + ) + ); +1 +2 +3 -- -INT_MIN row offset that can lead to problems with negation, found when fuzzing -- under UBSan. Should be limited to at most INT_MAX. select count() over (rows between 2147483648 preceding and 2147493648 following) from numbers(2); -- { serverError 36 } diff --git a/tests/queries/0_stateless/01591_window_functions.sql b/tests/queries/0_stateless/01591_window_functions.sql index e1e0842ad89..4a900045c6d 100644 --- a/tests/queries/0_stateless/01591_window_functions.sql +++ b/tests/queries/0_stateless/01591_window_functions.sql @@ -1,3 +1,5 @@ +-- Tags: long + -- { echo } -- just something basic @@ -442,6 +444,26 @@ from ( from numbers_mt(10000) ) settings max_block_size = 7; +-- a test with aggregate function which is -state type +select bitmapCardinality(bs) +from + ( + select groupBitmapMergeState(bm) over (order by k asc rows between unbounded preceding and current row) as bs + from + ( + select + groupBitmapState(number) as bm, k + from + ( + select + number, + number % 3 as k + from numbers(3) + ) + group by k + ) + ); + -- -INT_MIN row offset that can lead to problems with negation, found when fuzzing -- under UBSan. Should be limited to at most INT_MAX. select count() over (rows between 2147483648 preceding and 2147493648 following) from numbers(2); -- { serverError 36 } diff --git a/tests/queries/0_stateless/01605_adaptive_granularity_block_borders.sql b/tests/queries/0_stateless/01605_adaptive_granularity_block_borders.sql index 9f26302e564..750809da338 100644 --- a/tests/queries/0_stateless/01605_adaptive_granularity_block_borders.sql +++ b/tests/queries/0_stateless/01605_adaptive_granularity_block_borders.sql @@ -22,6 +22,8 @@ OPTIMIZE TABLE adaptive_table FINAL; SELECT marks FROM system.parts WHERE table = 'adaptive_table' and database=currentDatabase() and active; +SET remote_fs_enable_cache = 0; + -- If we have computed granularity incorrectly than we will exceed this limit. SET max_memory_usage='30M'; diff --git a/tests/queries/0_stateless/01641_memory_tracking_insert_optimize.sql b/tests/queries/0_stateless/01641_memory_tracking_insert_optimize.sql index 6bbf6fcec6a..7ec3153886c 100644 --- a/tests/queries/0_stateless/01641_memory_tracking_insert_optimize.sql +++ b/tests/queries/0_stateless/01641_memory_tracking_insert_optimize.sql @@ -2,6 +2,10 @@ drop table if exists data_01641; +-- Disable cache for s3 storage tests because it increases memory usage. +set remote_fs_enable_cache=0; +set remote_filesystem_read_method='read'; + create table data_01641 (key Int, value String) engine=MergeTree order by (key, repeat(value, 40)) settings old_parts_lifetime=0, min_bytes_for_wide_part=0; SET max_block_size = 1000, min_insert_block_size_rows = 0, min_insert_block_size_bytes = 0; @@ -9,6 +13,7 @@ insert into data_01641 select number, toString(number) from numbers(120000); -- Definitely should fail and it proves that memory is tracked in OPTIMIZE query. set max_memory_usage='10Mi', max_untracked_memory=0; + optimize table data_01641 final; -- { serverError 241 } drop table data_01641; diff --git a/tests/queries/0_stateless/01756_optimize_skip_unused_shards_rewrite_in.reference b/tests/queries/0_stateless/01756_optimize_skip_unused_shards_rewrite_in.reference index 9b76ca91780..15e00db0231 100644 --- a/tests/queries/0_stateless/01756_optimize_skip_unused_shards_rewrite_in.reference +++ b/tests/queries/0_stateless/01756_optimize_skip_unused_shards_rewrite_in.reference @@ -12,6 +12,9 @@ WITH _CAST(\'default\', \'Nullable(String)\') AS `id_2` SELECT `one`.`dummy`, ig optimize_skip_unused_shards_rewrite_in(0,) 0 0 WITH _CAST(\'default\', \'Nullable(String)\') AS `id_0` SELECT `one`.`dummy`, ignore(`id_0`) FROM `system`.`one` WHERE `dummy` IN tuple(0) +signed column +WITH _CAST(\'default\', \'Nullable(String)\') AS `key_signed` SELECT `key`, ignore(`key_signed`) FROM `default`.`data_01756_signed` WHERE `key` IN tuple(-1) +WITH _CAST(\'default\', \'Nullable(String)\') AS `key_signed` SELECT `key`, ignore(`key_signed`) FROM `default`.`data_01756_signed` WHERE `key` IN tuple(-2) 0 0 errors diff --git a/tests/queries/0_stateless/01756_optimize_skip_unused_shards_rewrite_in.sql b/tests/queries/0_stateless/01756_optimize_skip_unused_shards_rewrite_in.sql index 220d5d91a0b..b0900073151 100644 --- a/tests/queries/0_stateless/01756_optimize_skip_unused_shards_rewrite_in.sql +++ b/tests/queries/0_stateless/01756_optimize_skip_unused_shards_rewrite_in.sql @@ -9,6 +9,7 @@ drop table if exists dist_01756; drop table if exists dist_01756_str; drop table if exists dist_01756_column; drop table if exists data_01756_str; +drop table if exists data_01756_signed; -- SELECT -- intHash64(0) % 2, @@ -83,6 +84,20 @@ select query from system.query_log where type = 'QueryFinish' order by query; +-- signed column +select 'signed column'; +create table data_01756_signed (key Int) engine=Null; +with (select currentDatabase()) as key_signed select *, ignore(key_signed) from cluster(test_cluster_two_shards, currentDatabase(), data_01756_signed, key) where key in (-1, -2); +system flush logs; +select query from system.query_log where + event_date >= yesterday() and + event_time > now() - interval 1 hour and + not is_initial_query and + query not like '%system%query_log%' and + query like concat('WITH%', currentDatabase(), '%AS `key_signed` %') and + type = 'QueryFinish' +order by query; + -- not tuple select * from dist_01756 where dummy in (0); select * from dist_01756 where dummy in ('0'); @@ -139,3 +154,4 @@ drop table dist_01756; drop table dist_01756_str; drop table dist_01756_column; drop table data_01756_str; +drop table data_01756_signed; diff --git a/tests/queries/0_stateless/01926_order_by_desc_limit.sql b/tests/queries/0_stateless/01926_order_by_desc_limit.sql index 9ee7f4a6aff..9f65cf73252 100644 --- a/tests/queries/0_stateless/01926_order_by_desc_limit.sql +++ b/tests/queries/0_stateless/01926_order_by_desc_limit.sql @@ -2,6 +2,8 @@ DROP TABLE IF EXISTS order_by_desc; +SET remote_fs_enable_cache=0; + CREATE TABLE order_by_desc (u UInt32, s String) ENGINE MergeTree ORDER BY u PARTITION BY u % 100 SETTINGS index_granularity = 1024; diff --git a/tests/queries/0_stateless/01930_optimize_skip_unused_shards_rewrite_in.reference b/tests/queries/0_stateless/01930_optimize_skip_unused_shards_rewrite_in.reference index b856b079327..9896f9396b6 100644 --- a/tests/queries/0_stateless/01930_optimize_skip_unused_shards_rewrite_in.reference +++ b/tests/queries/0_stateless/01930_optimize_skip_unused_shards_rewrite_in.reference @@ -38,8 +38,16 @@ select _shard_num, * from remote('127.{1..4}', view(select toUInt8(id) id from d 1 0 1 0 1 0 +1 128 2 1 4 127 +4 255 +4 255 +4 255 +4 255 +4 255 +4 255 +4 255 -- Int16, Int16 select _shard_num, * from remote('127.{1..4}', view(select toInt16(id) id from data), toInt16(id)) where id in (0, 1, 0x7fff) order by _shard_num, id; 1 0 @@ -72,8 +80,14 @@ select _shard_num, * from remote('127.{1..4}', view(select toUInt16(id) id from 1 0 1 0 1 0 +1 32768 2 1 4 32767 +4 65535 +4 65535 +4 65535 +4 65535 +4 65535 -- Int32, Int32 select _shard_num, * from remote('127.{1..4}', view(select toInt32(id) id from data), toInt32(id)) where id in (0, 1, 0x7fffffff) order by _shard_num, id; 1 0 @@ -100,8 +114,12 @@ select _shard_num, * from remote('127.{1..4}', view(select toUInt32(id) id from select _shard_num, * from remote('127.{1..4}', view(select toUInt32(id) id from data), toInt32(id)) where id in (0, 1, 0x7fffffff, 0x80000000, 0xffffffff) order by _shard_num, id; 1 0 1 0 +1 2147483648 2 1 4 2147483647 +4 4294967295 +4 4294967295 +4 4294967295 -- Int64, Int64 select _shard_num, * from remote('127.{1..4}', view(select toInt64(id) id from data), toInt64(id)) where id in (0, 1, 0x7fffffffffffffff) order by _shard_num, id; 1 0 @@ -122,8 +140,10 @@ select _shard_num, * from remote('127.{1..4}', view(select toUInt64(id) id from -- UInt64, Int64 select _shard_num, * from remote('127.{1..4}', view(select toUInt64(id) id from data), toInt64(id)) where id in (0, 1, 0x7fffffffffffffff, 0x8000000000000000, 0xffffffffffffffff) order by _shard_num, id; 1 0 +1 9223372036854775808 2 1 4 9223372036854775807 +4 18446744073709551615 -- modulo(Int8) select distinct _shard_num, * from remote('127.{1..4}', view(select toInt16(id) id from data), toInt8(id)%255) where id in (-1) order by _shard_num, id; 4 -1 diff --git a/tests/queries/0_stateless/02151_replace_regexp_all_empty_match_alternative.reference b/tests/queries/0_stateless/02151_replace_regexp_all_empty_match_alternative.reference index e8183f05f5d..da7b788b157 100644 --- a/tests/queries/0_stateless/02151_replace_regexp_all_empty_match_alternative.reference +++ b/tests/queries/0_stateless/02151_replace_regexp_all_empty_match_alternative.reference @@ -1,3 +1,18 @@ 1 1 1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 diff --git a/tests/queries/0_stateless/02151_replace_regexp_all_empty_match_alternative.sql b/tests/queries/0_stateless/02151_replace_regexp_all_empty_match_alternative.sql index 6725fa04114..ebbc6ce97e0 100644 --- a/tests/queries/0_stateless/02151_replace_regexp_all_empty_match_alternative.sql +++ b/tests/queries/0_stateless/02151_replace_regexp_all_empty_match_alternative.sql @@ -1,3 +1,21 @@ -select replaceRegexpAll(',,1,,', '^[,]*|[,]*$', '') x; -select replaceRegexpAll(',,1', '^[,]*|[,]*$', '') x; -select replaceRegexpAll('1,,', '^[,]*|[,]*$', '') x; +SELECT replaceRegexpAll(',,1,,', '^[,]*|[,]*$', ''); +SELECT replaceRegexpAll(',,1', '^[,]*|[,]*$', ''); +SELECT replaceRegexpAll('1,,', '^[,]*|[,]*$', ''); + +SELECT replaceRegexpAll(materialize(',,1,,'), '^[,]*|[,]*$', ''); +SELECT replaceRegexpAll(materialize(',,1'), '^[,]*|[,]*$', ''); +SELECT replaceRegexpAll(materialize('1,,'), '^[,]*|[,]*$', ''); + +SELECT replaceRegexpAll('a', 'z*', '') == 'a'; +SELECT replaceRegexpAll('aa', 'z*', '') == 'aa'; +SELECT replaceRegexpAll('aaq', 'z*', '') == 'aaq'; +SELECT replaceRegexpAll('aazq', 'z*', '') == 'aaq'; +SELECT replaceRegexpAll('aazzq', 'z*', '') == 'aaq'; +SELECT replaceRegexpAll('aazzqa', 'z*', '') == 'aaqa'; + +SELECT replaceRegexpAll(materialize('a'), 'z*', '') == 'a'; +SELECT replaceRegexpAll(materialize('aa'), 'z*', '') == 'aa'; +SELECT replaceRegexpAll(materialize('aaq'), 'z*', '') == 'aaq'; +SELECT replaceRegexpAll(materialize('aazq'), 'z*', '') == 'aaq'; +SELECT replaceRegexpAll(materialize('aazzq'), 'z*', '') == 'aaq'; +SELECT replaceRegexpAll(materialize('aazzqa'), 'z*', '') == 'aaqa'; diff --git a/tests/queries/0_stateless/02207_allow_plaintext_and_no_password.config.xml b/tests/queries/0_stateless/02207_allow_plaintext_and_no_password.config.xml new file mode 100644 index 00000000000..891fb45e4ba --- /dev/null +++ b/tests/queries/0_stateless/02207_allow_plaintext_and_no_password.config.xml @@ -0,0 +1,24 @@ + + + + trace + true + + + 9000 + 0 + 0 + . + 0 + + + + + users.xml + + + + ./ + + + diff --git a/tests/queries/0_stateless/02207_allow_plaintext_and_no_password.reference b/tests/queries/0_stateless/02207_allow_plaintext_and_no_password.reference new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/queries/0_stateless/02207_allow_plaintext_and_no_password.sh b/tests/queries/0_stateless/02207_allow_plaintext_and_no_password.sh new file mode 100755 index 00000000000..693f1d817e3 --- /dev/null +++ b/tests/queries/0_stateless/02207_allow_plaintext_and_no_password.sh @@ -0,0 +1,94 @@ +#!/usr/bin/env bash +# Tags: no-tsan, no-asan, no-ubsan, no-msan, no-parallel, no-fasttest +# Tag no-tsan: requires jemalloc to track small allocations +# Tag no-asan: requires jemalloc to track small allocations +# Tag no-ubsan: requires jemalloc to track small allocations +# Tag no-msan: requires jemalloc to track small allocations + + + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + +cp /etc/clickhouse-server/users.xml "$CURDIR"/users.xml +sed -i 's/<\/password>/c64c5e4e53ea1a9f1427d2713b3a22bbebe8940bc807adaf654744b1568c70ab<\/password_sha256_hex>/g' "$CURDIR"/users.xml + sed -i 's//1<\/access_management>/g' "$CURDIR"/users.xml + +server_opts=( + "--config-file=$CURDIR/$(basename "${BASH_SOURCE[0]}" .sh).config.xml" + "--" + # to avoid multiple listen sockets (complexity for port discovering) + "--listen_host=127.1" + # we will discover the real port later. + "--tcp_port=0" + "--shutdown_wait_unfinished=0" +) + +CLICKHOUSE_WATCHDOG_ENABLE=0 $CLICKHOUSE_SERVER_BINARY "${server_opts[@]}" &> clickhouse-server.stderr & +server_pid=$! + +server_port= +i=0 retries=300 +# wait until server will start to listen (max 30 seconds) +while [[ -z $server_port ]] && [[ $i -lt $retries ]]; do + server_port=$(lsof -n -a -P -i tcp -s tcp:LISTEN -p $server_pid 2>/dev/null | awk -F'[ :]' '/LISTEN/ { print $(NF-1) }') + ((++i)) + sleep 0.1 + if ! kill -0 $server_pid >& /dev/null; then + echo "No server (pid $server_pid)" + break + fi +done +if [[ -z $server_port ]]; then + echo "Cannot wait for LISTEN socket" >&2 + exit 1 +fi + +# wait for the server to start accepting tcp connections (max 30 seconds) +i=0 retries=300 +while ! $CLICKHOUSE_CLIENT_BINARY -u default --password='1w2swhb1' --host 127.1 --port "$server_port" --format Null -q 'select 1' 2>/dev/null && [[ $i -lt $retries ]]; do + sleep 0.1 + if ! kill -0 $server_pid >& /dev/null; then + echo "No server (pid $server_pid)" + break + fi +done + + +if ! $CLICKHOUSE_CLIENT_BINARY -u default --password='1w2swhb1' --host 127.1 --port "$server_port" --format Null -q 'select 1'; then + echo "Cannot wait until server will start accepting connections on " >&2 + exit 1 +fi + +$CLICKHOUSE_CLIENT_BINARY -u default --password='1w2swhb1' --host 127.1 --port "$server_port" -q " DROP USER IF EXISTS u_02207, u1_02207"; + +$CLICKHOUSE_CLIENT_BINARY -u default --password='1w2swhb1' --host 127.1 --port "$server_port" -q "CREATE USER u_02207 IDENTIFIED WITH double_sha1_hash BY '8DCDD69CE7D121DE8013062AEAEB2A148910D50E' +" + +$CLICKHOUSE_CLIENT_BINARY -u default --password='1w2swhb1' --host 127.1 --port "$server_port" -q " CREATE USER u1_02207 IDENTIFIED BY 'qwe123'"; + +$CLICKHOUSE_CLIENT_BINARY -u default --password='1w2swhb1' --host 127.1 --port "$server_port" -q "CREATE USER u2_02207 HOST IP '127.1' IDENTIFIED WITH plaintext_password BY 'qwerty' " " -- { serverError 516 } --" &> /dev/null ; + +$CLICKHOUSE_CLIENT_BINARY -u default --password='1w2swhb1' --host 127.1 --port "$server_port" -q "CREATE USER u3_02207 HOST IP '127.1' IDENTIFIED WITH no_password " " -- { serverError 516 } --" &> /dev/null ; + +$CLICKHOUSE_CLIENT_BINARY -u default --password='1w2swhb1' --host 127.1 --port "$server_port" -q "CREATE USER u4_02207 HOST IP '127.1' NOT IDENTIFIED " " -- { serverError 516 } --" &> /dev/null ; + +$CLICKHOUSE_CLIENT_BINARY -u default --password='1w2swhb1' --host 127.1 --port "$server_port" -q "CREATE USER IF NOT EXISTS u5_02207 " " -- { serverError 516 } --" &> /dev/null ; + +$CLICKHOUSE_CLIENT_BINARY -u default --password='1w2swhb1' --host 127.1 --port "$server_port" -q " DROP USER u_02207, u1_02207"; + + +# no sleep, since flushing to stderr should not be buffered. + grep 'User is not allowed to Create users' clickhouse-server.stderr + + +# send TERM and save the error code to ensure that it is 0 (EXIT_SUCCESS) +kill $server_pid +wait $server_pid +return_code=$? + +rm -f clickhouse-server.stderr +rm -f "$CURDIR"/users.xml + +exit $return_code diff --git a/tests/queries/0_stateless/02226_s3_with_cache.reference b/tests/queries/0_stateless/02226_s3_with_cache.reference new file mode 100644 index 00000000000..214addac2d6 --- /dev/null +++ b/tests/queries/0_stateless/02226_s3_with_cache.reference @@ -0,0 +1,2 @@ +SELECT 1, * FROM test LIMIT 10 FORMAT Null; 1 0 1 +SELECT 2, * FROM test LIMIT 10 FORMAT Null; 0 1 0 diff --git a/tests/queries/0_stateless/02226_s3_with_cache.sql b/tests/queries/0_stateless/02226_s3_with_cache.sql new file mode 100644 index 00000000000..b3126a419df --- /dev/null +++ b/tests/queries/0_stateless/02226_s3_with_cache.sql @@ -0,0 +1,44 @@ +-- Tags: no-parallel, no-fasttest, long + +SET max_memory_usage='20G'; + +CREATE TABLE test (key UInt32, value String) Engine=MergeTree() ORDER BY key SETTINGS storage_policy='s3_cache'; +INSERT INTO test SELECT * FROM generateRandom('key UInt32, value String') LIMIT 10000; + +SET remote_filesystem_read_method='threadpool'; + +SELECT 1, * FROM test LIMIT 10 FORMAT Null; + +SYSTEM FLUSH LOGS; +SELECT query, + ProfileEvents['RemoteFSReadBytes'] > 0 as remote_fs_read, + ProfileEvents['RemoteFSCacheReadBytes'] > 0 as remote_fs_cache_read, + ProfileEvents['RemoteFSCacheDownloadBytes'] > 0 as remote_fs_read_and_download +FROM system.query_log +WHERE query LIKE 'SELECT 1, * FROM test LIMIT%' +AND type = 'QueryFinish' +AND current_database = currentDatabase() +ORDER BY query_start_time DESC +LIMIT 1; + +SET remote_filesystem_read_method='read'; + +SELECT 2, * FROM test LIMIT 10 FORMAT Null; + +SYSTEM FLUSH LOGS; +SELECT query, + ProfileEvents['RemoteFSReadBytes'] > 0 as remote_fs_read, + ProfileEvents['RemoteFSCacheReadBytes'] > 0 as remote_fs_cache_read, + ProfileEvents['RemoteFSCacheDownloadBytes'] > 0 as remote_fs_read_and_download +FROM system.query_log +WHERE query LIKE 'SELECT 2, * FROM test LIMIT%' +AND type = 'QueryFinish' +AND current_database = currentDatabase() +ORDER BY query_start_time DESC +LIMIT 1; + +SET remote_filesystem_read_method='threadpool'; + +SELECT * FROM test WHERE value LIKE '%abc%' ORDER BY value LIMIT 10 FORMAT Null; + +DROP TABLE test; diff --git a/tests/queries/0_stateless/02231_buffer_aggregate_states_leak.reference b/tests/queries/0_stateless/02231_buffer_aggregate_states_leak.reference new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/queries/0_stateless/02231_buffer_aggregate_states_leak.sql b/tests/queries/0_stateless/02231_buffer_aggregate_states_leak.sql new file mode 100644 index 00000000000..a53b7f50e51 --- /dev/null +++ b/tests/queries/0_stateless/02231_buffer_aggregate_states_leak.sql @@ -0,0 +1,36 @@ +-- Tags: long + +drop table if exists buffer_02231; +drop table if exists out_02231; +drop table if exists in_02231; +drop table if exists mv_02231; + +-- To reproduce leak of memory tracking of aggregate states, +-- background flush is required. +create table buffer_02231 +( + key Int, + v1 AggregateFunction(groupArray, String) +) engine=Buffer(currentDatabase(), 'out_02231', + /* layers= */1, + /* min/max time */ 86400, 86400, + /* min/max rows */ 1e9, 1e9, + /* min/max bytes */ 1e12, 1e12, + /* flush time */ 1 +); +create table out_02231 as buffer_02231 engine=Null(); +create table in_02231 (number Int) engine=Null(); + +-- Create lots of INSERT blocks with MV +create materialized view mv_02231 to buffer_02231 as select + number as key, + groupArrayState(toString(number)) as v1 +from in_02231 +group by key; + +insert into in_02231 select * from numbers(10e6) settings max_memory_usage='300Mi'; + +drop table buffer_02231; +drop table out_02231; +drop table in_02231; +drop table mv_02231; diff --git a/tests/queries/0_stateless/02232_functions_to_subcolumns_alias.reference b/tests/queries/0_stateless/02232_functions_to_subcolumns_alias.reference new file mode 100644 index 00000000000..f18e41e497e --- /dev/null +++ b/tests/queries/0_stateless/02232_functions_to_subcolumns_alias.reference @@ -0,0 +1,8 @@ +cnt +2 +t0 t0 +100 100 +0 0 +hit +1 +0 diff --git a/tests/queries/0_stateless/02232_functions_to_subcolumns_alias.sql b/tests/queries/0_stateless/02232_functions_to_subcolumns_alias.sql new file mode 100644 index 00000000000..89383ed4ba3 --- /dev/null +++ b/tests/queries/0_stateless/02232_functions_to_subcolumns_alias.sql @@ -0,0 +1,10 @@ +DROP TABLE IF EXISTS t_functions_to_subcolumns_alias; + +CREATE TABLE t_functions_to_subcolumns_alias (id UInt64, t Tuple(UInt64, String), m Map(String, UInt64)) ENGINE = Memory; +INSERT INTO t_functions_to_subcolumns_alias VALUES (1, (100, 'abc'), map('foo', 1, 'bar', 2)) (2, NULL, map()); + +SELECT count(id) AS cnt FROM t_functions_to_subcolumns_alias FORMAT TSVWithNames; +SELECT tupleElement(t, 1) as t0, t0 FROM t_functions_to_subcolumns_alias FORMAT TSVWithNames; +SELECT mapContains(m, 'foo') AS hit FROM t_functions_to_subcolumns_alias FORMAT TSVWithNames; + +DROP TABLE t_functions_to_subcolumns_alias; diff --git a/tests/queries/0_stateless/02233_setting_input_format_use_lowercase_column_name.reference b/tests/queries/0_stateless/02233_setting_input_format_use_lowercase_column_name.reference new file mode 100644 index 00000000000..5c383cb3035 --- /dev/null +++ b/tests/queries/0_stateless/02233_setting_input_format_use_lowercase_column_name.reference @@ -0,0 +1,6 @@ +Parquet +123 1 +456 2 +ORC +123 1 +456 2 diff --git a/tests/queries/0_stateless/02233_setting_input_format_use_lowercase_column_name.sh b/tests/queries/0_stateless/02233_setting_input_format_use_lowercase_column_name.sh new file mode 100755 index 00000000000..b946addd01c --- /dev/null +++ b/tests/queries/0_stateless/02233_setting_input_format_use_lowercase_column_name.sh @@ -0,0 +1,22 @@ +#!/usr/bin/env bash +# Tags: no-ubsan, no-fasttest + +CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CUR_DIR"/../shell_config.sh + +echo "Parquet" +DATA_FILE=$CUR_DIR/data_parquet/test_setting_input_format_use_lowercase_column_name.parquet +${CLICKHOUSE_CLIENT} --query="DROP TABLE IF EXISTS parquet_load" +${CLICKHOUSE_CLIENT} --query="CREATE TABLE parquet_load (id String, score Int32) ENGINE = Memory" +cat "$DATA_FILE" | ${CLICKHOUSE_CLIENT} -q "INSERT INTO parquet_load FORMAT Parquet SETTINGS input_format_use_lowercase_column_name=true" +${CLICKHOUSE_CLIENT} --query="SELECT * FROM parquet_load" +${CLICKHOUSE_CLIENT} --query="drop table parquet_load" + +echo "ORC" +DATA_FILE=$CUR_DIR/data_orc/test_setting_input_format_use_lowercase_column_name.orc +${CLICKHOUSE_CLIENT} --query="DROP TABLE IF EXISTS orc_load" +${CLICKHOUSE_CLIENT} --query="CREATE TABLE orc_load (id String, score Int32) ENGINE = Memory" +cat "$DATA_FILE" | ${CLICKHOUSE_CLIENT} -q "INSERT INTO orc_load FORMAT ORC SETTINGS input_format_use_lowercase_column_name=true" +${CLICKHOUSE_CLIENT} --query="SELECT * FROM orc_load" +${CLICKHOUSE_CLIENT} --query="drop table orc_load" diff --git a/tests/queries/0_stateless/02233_with_total_empty_chunk.reference b/tests/queries/0_stateless/02233_with_total_empty_chunk.reference new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/queries/0_stateless/02233_with_total_empty_chunk.sql b/tests/queries/0_stateless/02233_with_total_empty_chunk.sql new file mode 100644 index 00000000000..bf9ce85b6ed --- /dev/null +++ b/tests/queries/0_stateless/02233_with_total_empty_chunk.sql @@ -0,0 +1 @@ +SELECT (NULL, NULL, NULL, NULL, NULL, NULL, NULL) FROM numbers(0) GROUP BY number WITH TOTALS HAVING sum(number) <= arrayJoin([]); diff --git a/tests/queries/0_stateless/02234_column_function_short_circuit.reference b/tests/queries/0_stateless/02234_column_function_short_circuit.reference new file mode 100644 index 00000000000..2c08a29620e --- /dev/null +++ b/tests/queries/0_stateless/02234_column_function_short_circuit.reference @@ -0,0 +1,2 @@ +2.3 +4.3 diff --git a/tests/queries/0_stateless/02234_column_function_short_circuit.sql b/tests/queries/0_stateless/02234_column_function_short_circuit.sql new file mode 100644 index 00000000000..a6a36841073 --- /dev/null +++ b/tests/queries/0_stateless/02234_column_function_short_circuit.sql @@ -0,0 +1,43 @@ +DROP TABLE IF EXISTS dict_table; +DROP TABLE IF EXISTS data_table; +DROP DICTIONARY IF EXISTS dict; + +create table dict_table +( + `strField` String, + `dateField` Date, + `float64Field` Float64 +) Engine Log(); + +insert into dict_table values ('SomeStr', toDate('2021-01-01'), 1.1), ('SomeStr2', toDate('2021-01-02'), 2.2); + +create dictionary dict +( + `strField` String, + `dateField` Date, + `float64Field` Float64 +) +PRIMARY KEY strField, dateField +SOURCE (CLICKHOUSE(TABLE 'dict_table')) +LIFETIME(MIN 300 MAX 360) +LAYOUT (COMPLEX_KEY_HASHED()); + +create table data_table +( + `float64Field1` Float64, + `float64Field2` Float64, + `strField1` String, + `strField2` String +) Engine Log(); + +insert into data_table values (1.1, 1.2, 'SomeStr', 'SomeStr'), (2.1, 2.2, 'SomeStr2', 'SomeStr2'); + +select round( + float64Field1 * if(strField1 != '', 1.0, dictGetFloat64('dict', 'float64Field', (strField1, toDate('2021-01-01')))) + + if(strField2 != '', 1.0, dictGetFloat64('dict', 'float64Field', (strField2, toDate('2021-01-01')))) * if(isFinite(float64Field2), float64Field2, 0), + 2) +from data_table; + +DROP DICTIONARY dict; +DROP TABLE dict_table; +DROP TABLE data_table; diff --git a/tests/queries/0_stateless/02234_position_case_insensitive_utf8.reference b/tests/queries/0_stateless/02234_position_case_insensitive_utf8.reference new file mode 100644 index 00000000000..aa47d0d46d4 --- /dev/null +++ b/tests/queries/0_stateless/02234_position_case_insensitive_utf8.reference @@ -0,0 +1,2 @@ +0 +0 diff --git a/tests/queries/0_stateless/02234_position_case_insensitive_utf8.sql b/tests/queries/0_stateless/02234_position_case_insensitive_utf8.sql new file mode 100644 index 00000000000..d77b13e7f97 --- /dev/null +++ b/tests/queries/0_stateless/02234_position_case_insensitive_utf8.sql @@ -0,0 +1,2 @@ +SELECT positionCaseInsensitiveUTF8('Hello', materialize('%\xF0%')); +SELECT DISTINCT positionCaseInsensitiveUTF8(materialize('Hello'), '%\xF0%') FROM numbers(1000); diff --git a/tests/queries/0_stateless/data_orc/test_setting_input_format_use_lowercase_column_name.orc b/tests/queries/0_stateless/data_orc/test_setting_input_format_use_lowercase_column_name.orc new file mode 100644 index 00000000000..136f9980064 Binary files /dev/null and b/tests/queries/0_stateless/data_orc/test_setting_input_format_use_lowercase_column_name.orc differ diff --git a/tests/queries/0_stateless/data_parquet/test_setting_input_format_use_lowercase_column_name.parquet b/tests/queries/0_stateless/data_parquet/test_setting_input_format_use_lowercase_column_name.parquet new file mode 100644 index 00000000000..922def77caf Binary files /dev/null and b/tests/queries/0_stateless/data_parquet/test_setting_input_format_use_lowercase_column_name.parquet differ diff --git a/tests/queries/1_stateful/00011_sorting.sql b/tests/queries/1_stateful/00011_sorting.sql index 381be7b7dd4..3e451360e1b 100644 --- a/tests/queries/1_stateful/00011_sorting.sql +++ b/tests/queries/1_stateful/00011_sorting.sql @@ -1 +1 @@ -SELECT EventTime::DateTime('Europe/Moscow') FROM test.hits ORDER BY EventTime DESC LIMIT 10 +SELECT EventTime::DateTime('Asia/Dubai') FROM test.hits ORDER BY EventTime DESC LIMIT 10 diff --git a/tests/queries/1_stateful/00012_sorting_distributed.sql b/tests/queries/1_stateful/00012_sorting_distributed.sql index c71f643045d..2f852af1dba 100644 --- a/tests/queries/1_stateful/00012_sorting_distributed.sql +++ b/tests/queries/1_stateful/00012_sorting_distributed.sql @@ -1,3 +1,3 @@ -- Tags: distributed -SELECT EventTime::DateTime('Europe/Moscow') FROM remote('127.0.0.{1,2}', test, hits) ORDER BY EventTime DESC LIMIT 10 +SELECT EventTime::DateTime('Asia/Dubai') FROM remote('127.0.0.{1,2}', test, hits) ORDER BY EventTime DESC LIMIT 10 diff --git a/tests/queries/1_stateful/00066_sorting_distributed_many_replicas.sql b/tests/queries/1_stateful/00066_sorting_distributed_many_replicas.sql index 3e34d9d1348..63a833af114 100644 --- a/tests/queries/1_stateful/00066_sorting_distributed_many_replicas.sql +++ b/tests/queries/1_stateful/00066_sorting_distributed_many_replicas.sql @@ -1,4 +1,4 @@ -- Tags: replica, distributed SET max_parallel_replicas = 2; -SELECT EventTime::DateTime('Europe/Moscow') FROM remote('127.0.0.{1|2}', test, hits) ORDER BY EventTime DESC LIMIT 10 +SELECT EventTime::DateTime('Asia/Dubai') FROM remote('127.0.0.{1|2}', test, hits) ORDER BY EventTime DESC LIMIT 10 diff --git a/tests/queries/1_stateful/00071_merge_tree_optimize_aio.sql b/tests/queries/1_stateful/00071_merge_tree_optimize_aio.sql index 241f0f9b13b..16c0097bf21 100644 --- a/tests/queries/1_stateful/00071_merge_tree_optimize_aio.sql +++ b/tests/queries/1_stateful/00071_merge_tree_optimize_aio.sql @@ -1,6 +1,6 @@ DROP TABLE IF EXISTS test.hits_snippet; -CREATE TABLE test.hits_snippet(EventTime DateTime('Europe/Moscow'), EventDate Date, CounterID UInt32, UserID UInt64, URL String, Referer String) ENGINE = MergeTree(EventDate, intHash32(UserID), (CounterID, EventDate, intHash32(UserID), EventTime), 8192); +CREATE TABLE test.hits_snippet(EventTime DateTime('Asia/Dubai'), EventDate Date, CounterID UInt32, UserID UInt64, URL String, Referer String) ENGINE = MergeTree(EventDate, intHash32(UserID), (CounterID, EventDate, intHash32(UserID), EventTime), 8192); SET min_insert_block_size_rows = 0, min_insert_block_size_bytes = 0; SET max_block_size = 4096; diff --git a/tests/queries/1_stateful/00072_compare_date_and_string_index.sql b/tests/queries/1_stateful/00072_compare_date_and_string_index.sql index af5d932fecb..d652b1bc559 100644 --- a/tests/queries/1_stateful/00072_compare_date_and_string_index.sql +++ b/tests/queries/1_stateful/00072_compare_date_and_string_index.sql @@ -15,7 +15,7 @@ SELECT count() FROM test.hits WHERE EventDate IN (toDate('2014-03-18'), toDate(' SELECT count() FROM test.hits WHERE EventDate = concat('2014-0', '3-18'); DROP TABLE IF EXISTS test.hits_indexed_by_time; -CREATE TABLE test.hits_indexed_by_time (EventDate Date, EventTime DateTime('Europe/Moscow')) ENGINE = MergeTree ORDER BY (EventDate, EventTime); +CREATE TABLE test.hits_indexed_by_time (EventDate Date, EventTime DateTime('Asia/Dubai')) ENGINE = MergeTree ORDER BY (EventDate, EventTime); INSERT INTO test.hits_indexed_by_time SELECT EventDate, EventTime FROM test.hits; SELECT count() FROM test.hits_indexed_by_time WHERE EventTime = '2014-03-18 01:02:03'; @@ -25,12 +25,12 @@ SELECT count() FROM test.hits_indexed_by_time WHERE EventTime <= '2014-03-18 01: SELECT count() FROM test.hits_indexed_by_time WHERE EventTime >= '2014-03-18 01:02:03'; SELECT count() FROM test.hits_indexed_by_time WHERE EventTime IN ('2014-03-18 01:02:03', '2014-03-19 04:05:06'); -SELECT count() FROM test.hits_indexed_by_time WHERE EventTime = toDateTime('2014-03-18 01:02:03', 'Europe/Moscow'); -SELECT count() FROM test.hits_indexed_by_time WHERE EventTime < toDateTime('2014-03-18 01:02:03', 'Europe/Moscow'); -SELECT count() FROM test.hits_indexed_by_time WHERE EventTime > toDateTime('2014-03-18 01:02:03', 'Europe/Moscow'); -SELECT count() FROM test.hits_indexed_by_time WHERE EventTime <= toDateTime('2014-03-18 01:02:03', 'Europe/Moscow'); -SELECT count() FROM test.hits_indexed_by_time WHERE EventTime >= toDateTime('2014-03-18 01:02:03', 'Europe/Moscow'); -SELECT count() FROM test.hits_indexed_by_time WHERE EventTime IN (toDateTime('2014-03-18 01:02:03', 'Europe/Moscow'), toDateTime('2014-03-19 04:05:06', 'Europe/Moscow')); +SELECT count() FROM test.hits_indexed_by_time WHERE EventTime = toDateTime('2014-03-18 01:02:03', 'Asia/Dubai'); +SELECT count() FROM test.hits_indexed_by_time WHERE EventTime < toDateTime('2014-03-18 01:02:03', 'Asia/Dubai'); +SELECT count() FROM test.hits_indexed_by_time WHERE EventTime > toDateTime('2014-03-18 01:02:03', 'Asia/Dubai'); +SELECT count() FROM test.hits_indexed_by_time WHERE EventTime <= toDateTime('2014-03-18 01:02:03', 'Asia/Dubai'); +SELECT count() FROM test.hits_indexed_by_time WHERE EventTime >= toDateTime('2014-03-18 01:02:03', 'Asia/Dubai'); +SELECT count() FROM test.hits_indexed_by_time WHERE EventTime IN (toDateTime('2014-03-18 01:02:03', 'Asia/Dubai'), toDateTime('2014-03-19 04:05:06', 'Asia/Dubai')); SELECT count() FROM test.hits_indexed_by_time WHERE EventTime = concat('2014-03-18 ', '01:02:03'); diff --git a/tests/queries/1_stateful/00075_left_array_join.sql b/tests/queries/1_stateful/00075_left_array_join.sql index 52a48462b9d..1fd045a26bf 100644 --- a/tests/queries/1_stateful/00075_left_array_join.sql +++ b/tests/queries/1_stateful/00075_left_array_join.sql @@ -1,2 +1,2 @@ -SELECT UserID, EventTime::DateTime('Europe/Moscow'), pp.Key1, pp.Key2, ParsedParams.Key1 FROM test.hits ARRAY JOIN ParsedParams AS pp WHERE CounterID = 1704509 ORDER BY UserID, EventTime, pp.Key1, pp.Key2 LIMIT 100; -SELECT UserID, EventTime::DateTime('Europe/Moscow'), pp.Key1, pp.Key2, ParsedParams.Key1 FROM test.hits LEFT ARRAY JOIN ParsedParams AS pp WHERE CounterID = 1704509 ORDER BY UserID, EventTime, pp.Key1, pp.Key2 LIMIT 100; +SELECT UserID, EventTime::DateTime('Asia/Dubai'), pp.Key1, pp.Key2, ParsedParams.Key1 FROM test.hits ARRAY JOIN ParsedParams AS pp WHERE CounterID = 1704509 ORDER BY UserID, EventTime, pp.Key1, pp.Key2 LIMIT 100; +SELECT UserID, EventTime::DateTime('Asia/Dubai'), pp.Key1, pp.Key2, ParsedParams.Key1 FROM test.hits LEFT ARRAY JOIN ParsedParams AS pp WHERE CounterID = 1704509 ORDER BY UserID, EventTime, pp.Key1, pp.Key2 LIMIT 100; diff --git a/tests/queries/1_stateful/00091_prewhere_two_conditions.sql b/tests/queries/1_stateful/00091_prewhere_two_conditions.sql index c5952be83b6..1e476d3a27d 100644 --- a/tests/queries/1_stateful/00091_prewhere_two_conditions.sql +++ b/tests/queries/1_stateful/00091_prewhere_two_conditions.sql @@ -2,12 +2,12 @@ SET max_bytes_to_read = 600000000; SET optimize_move_to_prewhere = 1; -SELECT uniq(URL) FROM test.hits WHERE toTimeZone(EventTime, 'Europe/Moscow') >= '2014-03-20 00:00:00' AND toTimeZone(EventTime, 'Europe/Moscow') < '2014-03-21 00:00:00'; -SELECT uniq(URL) FROM test.hits WHERE toTimeZone(EventTime, 'Europe/Moscow') >= '2014-03-20 00:00:00' AND URL != '' AND toTimeZone(EventTime, 'Europe/Moscow') < '2014-03-21 00:00:00'; -SELECT uniq(*) FROM test.hits WHERE toTimeZone(EventTime, 'Europe/Moscow') >= '2014-03-20 00:00:00' AND toTimeZone(EventTime, 'Europe/Moscow') < '2014-03-21 00:00:00' AND EventDate = '2014-03-21'; -WITH toTimeZone(EventTime, 'Europe/Moscow') AS xyz SELECT uniq(*) FROM test.hits WHERE xyz >= '2014-03-20 00:00:00' AND xyz < '2014-03-21 00:00:00' AND EventDate = '2014-03-21'; +SELECT uniq(URL) FROM test.hits WHERE toTimeZone(EventTime, 'Asia/Dubai') >= '2014-03-20 00:00:00' AND toTimeZone(EventTime, 'Asia/Dubai') < '2014-03-21 00:00:00'; +SELECT uniq(URL) FROM test.hits WHERE toTimeZone(EventTime, 'Asia/Dubai') >= '2014-03-20 00:00:00' AND URL != '' AND toTimeZone(EventTime, 'Asia/Dubai') < '2014-03-21 00:00:00'; +SELECT uniq(*) FROM test.hits WHERE toTimeZone(EventTime, 'Asia/Dubai') >= '2014-03-20 00:00:00' AND toTimeZone(EventTime, 'Asia/Dubai') < '2014-03-21 00:00:00' AND EventDate = '2014-03-21'; +WITH toTimeZone(EventTime, 'Asia/Dubai') AS xyz SELECT uniq(*) FROM test.hits WHERE xyz >= '2014-03-20 00:00:00' AND xyz < '2014-03-21 00:00:00' AND EventDate = '2014-03-21'; SET optimize_move_to_prewhere = 0; -SELECT uniq(URL) FROM test.hits WHERE toTimeZone(EventTime, 'Europe/Moscow') >= '2014-03-20 00:00:00' AND toTimeZone(EventTime, 'Europe/Moscow') < '2014-03-21 00:00:00'; -- { serverError 307 } -SELECT uniq(URL) FROM test.hits WHERE toTimeZone(EventTime, 'Europe/Moscow') >= '2014-03-20 00:00:00' AND URL != '' AND toTimeZone(EventTime, 'Europe/Moscow') < '2014-03-21 00:00:00'; -- { serverError 307 } +SELECT uniq(URL) FROM test.hits WHERE toTimeZone(EventTime, 'Asia/Dubai') >= '2014-03-20 00:00:00' AND toTimeZone(EventTime, 'Asia/Dubai') < '2014-03-21 00:00:00'; -- { serverError 307 } +SELECT uniq(URL) FROM test.hits WHERE toTimeZone(EventTime, 'Asia/Dubai') >= '2014-03-20 00:00:00' AND URL != '' AND toTimeZone(EventTime, 'Asia/Dubai') < '2014-03-21 00:00:00'; -- { serverError 307 } diff --git a/tests/queries/1_stateful/00159_parallel_formatting_csv_and_friends.reference b/tests/queries/1_stateful/00159_parallel_formatting_csv_and_friends.reference index 04107d74341..3b7b346e7e8 100644 --- a/tests/queries/1_stateful/00159_parallel_formatting_csv_and_friends.reference +++ b/tests/queries/1_stateful/00159_parallel_formatting_csv_and_friends.reference @@ -1,8 +1,8 @@ CSV, false -ea1c740f03f5dcc43a3044528ad0a98f - +6929aaeac016d22c20464e3be38c64cd - CSV, true -ea1c740f03f5dcc43a3044528ad0a98f - +6929aaeac016d22c20464e3be38c64cd - CSVWithNames, false -e986f353467c87b07e7143d7bff2daff - +1610d7eac24fb923cd973c99ab7e3a8d - CSVWithNames, true -e986f353467c87b07e7143d7bff2daff - +1610d7eac24fb923cd973c99ab7e3a8d - diff --git a/tests/queries/1_stateful/00159_parallel_formatting_csv_and_friends.sh b/tests/queries/1_stateful/00159_parallel_formatting_csv_and_friends.sh index a6b5620812d..1476d2892bf 100755 --- a/tests/queries/1_stateful/00159_parallel_formatting_csv_and_friends.sh +++ b/tests/queries/1_stateful/00159_parallel_formatting_csv_and_friends.sh @@ -10,10 +10,10 @@ for format in "${FORMATS[@]}" do echo "$format, false"; $CLICKHOUSE_CLIENT --output_format_parallel_formatting=false -q \ - "SELECT ClientEventTime::DateTime('Europe/Moscow') as a, MobilePhoneModel as b, ClientIP6 as c FROM test.hits ORDER BY a, b, c Format $format" | md5sum + "SELECT ClientEventTime::DateTime('Asia/Dubai') as a, MobilePhoneModel as b, ClientIP6 as c FROM test.hits ORDER BY a, b, c Format $format" | md5sum echo "$format, true"; $CLICKHOUSE_CLIENT --output_format_parallel_formatting=true -q \ - "SELECT ClientEventTime::DateTime('Europe/Moscow') as a, MobilePhoneModel as b, ClientIP6 as c FROM test.hits ORDER BY a, b, c Format $format" | md5sum + "SELECT ClientEventTime::DateTime('Asia/Dubai') as a, MobilePhoneModel as b, ClientIP6 as c FROM test.hits ORDER BY a, b, c Format $format" | md5sum done diff --git a/tests/queries/1_stateful/00159_parallel_formatting_http.reference b/tests/queries/1_stateful/00159_parallel_formatting_http.reference index 8eabf5d4f03..34ecd115748 100644 --- a/tests/queries/1_stateful/00159_parallel_formatting_http.reference +++ b/tests/queries/1_stateful/00159_parallel_formatting_http.reference @@ -1,12 +1,12 @@ TSV, false -6e4ce4996dd0e036d27cb0d2166c8e59 - +9e0a1b1db4d1e56b4b571a8824dde35b - TSV, true -6e4ce4996dd0e036d27cb0d2166c8e59 - +9e0a1b1db4d1e56b4b571a8824dde35b - CSV, false -ab6b3616f31e8a952c802ca92562e418 - +c9c6f633a59d349f9f8a14ee2f1cb1b3 - CSV, true -ab6b3616f31e8a952c802ca92562e418 - +c9c6f633a59d349f9f8a14ee2f1cb1b3 - JSONCompactEachRow, false -1651b540b43bd6c62446f4c340bf13c7 - +826e244bd6c547b52955dd69df61ea22 - JSONCompactEachRow, true -1651b540b43bd6c62446f4c340bf13c7 - +826e244bd6c547b52955dd69df61ea22 - diff --git a/tests/queries/1_stateful/00159_parallel_formatting_http.sh b/tests/queries/1_stateful/00159_parallel_formatting_http.sh index 1dcae50812e..ea4a4d12867 100755 --- a/tests/queries/1_stateful/00159_parallel_formatting_http.sh +++ b/tests/queries/1_stateful/00159_parallel_formatting_http.sh @@ -10,8 +10,8 @@ FORMATS=('TSV' 'CSV' 'JSONCompactEachRow') for format in "${FORMATS[@]}" do echo "$format, false"; - ${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}&query=SELECT+ClientEventTime::DateTime('Europe/Moscow')+as+a,MobilePhoneModel+as+b,ClientIP6+as+c+FROM+test.hits+ORDER+BY+a,b,c+LIMIT+1000000+Format+$format&output_format_parallel_formatting=false" -d' ' | md5sum + ${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}&query=SELECT+ClientEventTime::DateTime('Asia/Dubai')+as+a,MobilePhoneModel+as+b,ClientIP6+as+c+FROM+test.hits+ORDER+BY+a,b,c+LIMIT+1000000+Format+$format&output_format_parallel_formatting=false" -d' ' | md5sum echo "$format, true"; - ${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}&query=SELECT+ClientEventTime::DateTime('Europe/Moscow')+as+a,MobilePhoneModel+as+b,ClientIP6+as+c+FROM+test.hits+ORDER+BY+a,b,c+LIMIT+1000000+Format+$format&output_format_parallel_formatting=true" -d' ' | md5sum + ${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}&query=SELECT+ClientEventTime::DateTime('Asia/Dubai')+as+a,MobilePhoneModel+as+b,ClientIP6+as+c+FROM+test.hits+ORDER+BY+a,b,c+LIMIT+1000000+Format+$format&output_format_parallel_formatting=true" -d' ' | md5sum done diff --git a/tests/queries/1_stateful/00159_parallel_formatting_json_and_friends.reference b/tests/queries/1_stateful/00159_parallel_formatting_json_and_friends.reference index 7ad5359a30e..42e69ea3a0d 100644 --- a/tests/queries/1_stateful/00159_parallel_formatting_json_and_friends.reference +++ b/tests/queries/1_stateful/00159_parallel_formatting_json_and_friends.reference @@ -1,28 +1,28 @@ JSONEachRow, false -e0a3c9978a92a277f2fff4664f3c1749 - +c6b89185cc5b3dff5d3779e2e1551b81 - JSONEachRow, true -e0a3c9978a92a277f2fff4664f3c1749 - +c6b89185cc5b3dff5d3779e2e1551b81 - JSONCompactEachRow, false -0c1efbbc25a5bd90a2ecea559d283667 - +5c838a00e22d943fa429c45106b7ff4d - JSONCompactEachRow, true -0c1efbbc25a5bd90a2ecea559d283667 - +5c838a00e22d943fa429c45106b7ff4d - JSONCompactStringsEachRow, false -0c1efbbc25a5bd90a2ecea559d283667 - +5c838a00e22d943fa429c45106b7ff4d - JSONCompactStringsEachRow, true -0c1efbbc25a5bd90a2ecea559d283667 - +5c838a00e22d943fa429c45106b7ff4d - JSONCompactEachRowWithNames, false -b9e4f8ecadbb650245d1762f4187ee0a - +e3231b1c8187de4da6752d692b2ddba9 - JSONCompactEachRowWithNames, true -b9e4f8ecadbb650245d1762f4187ee0a - +e3231b1c8187de4da6752d692b2ddba9 - JSONCompactStringsEachRowWithNames, false -b9e4f8ecadbb650245d1762f4187ee0a - +e3231b1c8187de4da6752d692b2ddba9 - JSONCompactStringsEachRowWithNames, true -b9e4f8ecadbb650245d1762f4187ee0a - +e3231b1c8187de4da6752d692b2ddba9 - JSONCompactEachRowWithNamesAndTypes, false -8b41f7375999b53d4c9607398456fe5b - +21302d11da0bf8d37ab599e28a51bac2 - JSONCompactEachRowWithNamesAndTypes, true -8b41f7375999b53d4c9607398456fe5b - +21302d11da0bf8d37ab599e28a51bac2 - JSONCompactStringsEachRowWithNamesAndTypes, false -8b41f7375999b53d4c9607398456fe5b - +21302d11da0bf8d37ab599e28a51bac2 - JSONCompactStringsEachRowWithNamesAndTypes, true -8b41f7375999b53d4c9607398456fe5b - +21302d11da0bf8d37ab599e28a51bac2 - diff --git a/tests/queries/1_stateful/00159_parallel_formatting_json_and_friends.sh b/tests/queries/1_stateful/00159_parallel_formatting_json_and_friends.sh index f6c87eabfde..a96ed0c9b96 100755 --- a/tests/queries/1_stateful/00159_parallel_formatting_json_and_friends.sh +++ b/tests/queries/1_stateful/00159_parallel_formatting_json_and_friends.sh @@ -12,9 +12,9 @@ for format in "${FORMATS[@]}" do echo "$format, false"; $CLICKHOUSE_CLIENT --output_format_parallel_formatting=false -q \ - "SELECT ClientEventTime::DateTime('Europe/Moscow') as a, MobilePhoneModel as b, ClientIP6 as c FROM test.hits ORDER BY a, b, c LIMIT 3000000 Format $format" | md5sum + "SELECT ClientEventTime::DateTime('Asia/Dubai') as a, MobilePhoneModel as b, ClientIP6 as c FROM test.hits ORDER BY a, b, c LIMIT 3000000 Format $format" | md5sum echo "$format, true"; $CLICKHOUSE_CLIENT --output_format_parallel_formatting=true -q \ - "SELECT ClientEventTime::DateTime('Europe/Moscow') as a, MobilePhoneModel as b, ClientIP6 as c FROM test.hits ORDER BY a, b, c LIMIT 3000000 Format $format" | md5sum + "SELECT ClientEventTime::DateTime('Asia/Dubai') as a, MobilePhoneModel as b, ClientIP6 as c FROM test.hits ORDER BY a, b, c LIMIT 3000000 Format $format" | md5sum done diff --git a/tests/queries/1_stateful/00159_parallel_formatting_tsv_and_friends.reference b/tests/queries/1_stateful/00159_parallel_formatting_tsv_and_friends.reference index 04d6db3b4af..91e3af03db8 100644 --- a/tests/queries/1_stateful/00159_parallel_formatting_tsv_and_friends.reference +++ b/tests/queries/1_stateful/00159_parallel_formatting_tsv_and_friends.reference @@ -1,12 +1,12 @@ TSV, false -8a984bbbfb127c430f67173f5371c6cb - +194d5061de4cae59489d989373f8effe - TSV, true -8a984bbbfb127c430f67173f5371c6cb - +194d5061de4cae59489d989373f8effe - TSVWithNames, false -ead321ed96754ff1aa39d112bc28c43d - +a6d327a3611288b3f973d00e6116f16e - TSVWithNames, true -ead321ed96754ff1aa39d112bc28c43d - +a6d327a3611288b3f973d00e6116f16e - TSKV, false -1735308ecea5c269846f36a55d5b335f - +c2e32a21c08aacf60bda21248ce4f73f - TSKV, true -1735308ecea5c269846f36a55d5b335f - +c2e32a21c08aacf60bda21248ce4f73f - diff --git a/tests/queries/1_stateful/00159_parallel_formatting_tsv_and_friends.sh b/tests/queries/1_stateful/00159_parallel_formatting_tsv_and_friends.sh index 02d083c0498..9d48774dd2d 100755 --- a/tests/queries/1_stateful/00159_parallel_formatting_tsv_and_friends.sh +++ b/tests/queries/1_stateful/00159_parallel_formatting_tsv_and_friends.sh @@ -11,9 +11,9 @@ for format in "${FORMATS[@]}" do echo "$format, false"; $CLICKHOUSE_CLIENT --output_format_parallel_formatting=false -q \ - "SELECT ClientEventTime::DateTime('Europe/Moscow') as a, MobilePhoneModel as b, ClientIP6 as c FROM test.hits ORDER BY a, b, c Format $format" | md5sum + "SELECT ClientEventTime::DateTime('Asia/Dubai') as a, MobilePhoneModel as b, ClientIP6 as c FROM test.hits ORDER BY a, b, c Format $format" | md5sum echo "$format, true"; $CLICKHOUSE_CLIENT --output_format_parallel_formatting=true -q \ - "SELECT ClientEventTime::DateTime('Europe/Moscow') as a, MobilePhoneModel as b, ClientIP6 as c FROM test.hits ORDER BY a, b, c Format $format" | md5sum + "SELECT ClientEventTime::DateTime('Asia/Dubai') as a, MobilePhoneModel as b, ClientIP6 as c FROM test.hits ORDER BY a, b, c Format $format" | md5sum done diff --git a/tests/queries/1_stateful/00161_parallel_parsing_with_names.reference b/tests/queries/1_stateful/00161_parallel_parsing_with_names.reference index fb0ba75c148..cd8c2e21b09 100644 --- a/tests/queries/1_stateful/00161_parallel_parsing_with_names.reference +++ b/tests/queries/1_stateful/00161_parallel_parsing_with_names.reference @@ -1,8 +1,8 @@ TSVWithNames, false -29caf86494f169d6339f6c5610b20731 - +0c6d493d47ff0aa1c6111c40c2b6cfcf - TSVWithNames, true -29caf86494f169d6339f6c5610b20731 - +0c6d493d47ff0aa1c6111c40c2b6cfcf - CSVWithNames, false -29caf86494f169d6339f6c5610b20731 - +0c6d493d47ff0aa1c6111c40c2b6cfcf - CSVWithNames, true -29caf86494f169d6339f6c5610b20731 - +0c6d493d47ff0aa1c6111c40c2b6cfcf - diff --git a/tests/queries/1_stateful/00161_parallel_parsing_with_names.sh b/tests/queries/1_stateful/00161_parallel_parsing_with_names.sh index 777d95fa0af..a1136a47319 100755 --- a/tests/queries/1_stateful/00161_parallel_parsing_with_names.sh +++ b/tests/queries/1_stateful/00161_parallel_parsing_with_names.sh @@ -10,21 +10,21 @@ $CLICKHOUSE_CLIENT -q "DROP TABLE IF EXISTS parsing_with_names" for format in "${FORMATS[@]}" do # Columns are permuted - $CLICKHOUSE_CLIENT -q "CREATE TABLE parsing_with_names(c FixedString(16), a DateTime('Europe/Moscow'), b String) ENGINE=Memory()" + $CLICKHOUSE_CLIENT -q "CREATE TABLE parsing_with_names(c FixedString(16), a DateTime('Asia/Dubai'), b String) ENGINE=Memory()" echo "$format, false"; $CLICKHOUSE_CLIENT --output_format_parallel_formatting=false -q \ - "SELECT URLRegions as d, toTimeZone(ClientEventTime, 'Europe/Moscow') as a, MobilePhoneModel as b, ParamPrice as e, ClientIP6 as c FROM test.hits LIMIT 50000 Format $format" | \ + "SELECT URLRegions as d, toTimeZone(ClientEventTime, 'Asia/Dubai') as a, MobilePhoneModel as b, ParamPrice as e, ClientIP6 as c FROM test.hits LIMIT 50000 Format $format" | \ $CLICKHOUSE_CLIENT --input_format_skip_unknown_fields=1 --input_format_parallel_parsing=false -q "INSERT INTO parsing_with_names FORMAT $format" $CLICKHOUSE_CLIENT -q "SELECT * FROM parsing_with_names;" | md5sum $CLICKHOUSE_CLIENT -q "DROP TABLE IF EXISTS parsing_with_names" - $CLICKHOUSE_CLIENT -q "CREATE TABLE parsing_with_names(c FixedString(16), a DateTime('Europe/Moscow'), b String) ENGINE=Memory()" + $CLICKHOUSE_CLIENT -q "CREATE TABLE parsing_with_names(c FixedString(16), a DateTime('Asia/Dubai'), b String) ENGINE=Memory()" echo "$format, true"; $CLICKHOUSE_CLIENT --output_format_parallel_formatting=false -q \ - "SELECT URLRegions as d, toTimeZone(ClientEventTime, 'Europe/Moscow') as a, MobilePhoneModel as b, ParamPrice as e, ClientIP6 as c FROM test.hits LIMIT 50000 Format $format" | \ + "SELECT URLRegions as d, toTimeZone(ClientEventTime, 'Asia/Dubai') as a, MobilePhoneModel as b, ParamPrice as e, ClientIP6 as c FROM test.hits LIMIT 50000 Format $format" | \ $CLICKHOUSE_CLIENT --input_format_skip_unknown_fields=1 --input_format_parallel_parsing=true -q "INSERT INTO parsing_with_names FORMAT $format" $CLICKHOUSE_CLIENT -q "SELECT * FROM parsing_with_names;" | md5sum diff --git a/tests/queries/1_stateful/00163_column_oriented_formats.reference b/tests/queries/1_stateful/00163_column_oriented_formats.reference index cb20aca4392..cf29a217fe4 100644 --- a/tests/queries/1_stateful/00163_column_oriented_formats.reference +++ b/tests/queries/1_stateful/00163_column_oriented_formats.reference @@ -1,12 +1,12 @@ Parquet -6b397d4643bc1f920f3eb8aa87ee180c - +093d0270733e505af52436f9df4a779f - 7fe6d8c57ddc5fe37bbdcb7f73c5fa78 - d8746733270cbeff7ab3550c9b944fb6 - Arrow -6b397d4643bc1f920f3eb8aa87ee180c - +093d0270733e505af52436f9df4a779f - 7fe6d8c57ddc5fe37bbdcb7f73c5fa78 - d8746733270cbeff7ab3550c9b944fb6 - ORC -6b397d4643bc1f920f3eb8aa87ee180c - +093d0270733e505af52436f9df4a779f - 7fe6d8c57ddc5fe37bbdcb7f73c5fa78 - d8746733270cbeff7ab3550c9b944fb6 - diff --git a/tests/queries/1_stateful/00163_column_oriented_formats.sh b/tests/queries/1_stateful/00163_column_oriented_formats.sh index 50ad20cbe92..803474c4fa7 100755 --- a/tests/queries/1_stateful/00163_column_oriented_formats.sh +++ b/tests/queries/1_stateful/00163_column_oriented_formats.sh @@ -11,7 +11,7 @@ for format in "${FORMATS[@]}" do echo $format $CLICKHOUSE_CLIENT -q "DROP TABLE IF EXISTS 00163_column_oriented SYNC" - $CLICKHOUSE_CLIENT -q "CREATE TABLE 00163_column_oriented(ClientEventTime DateTime('Europe/Moscow'), MobilePhoneModel String, ClientIP6 FixedString(16)) ENGINE=File($format)" + $CLICKHOUSE_CLIENT -q "CREATE TABLE 00163_column_oriented(ClientEventTime DateTime('Asia/Dubai'), MobilePhoneModel String, ClientIP6 FixedString(16)) ENGINE=File($format)" $CLICKHOUSE_CLIENT -q "INSERT INTO 00163_column_oriented SELECT ClientEventTime, MobilePhoneModel, ClientIP6 FROM test.hits ORDER BY ClientEventTime, MobilePhoneModel, ClientIP6 LIMIT 100" $CLICKHOUSE_CLIENT -q "SELECT ClientEventTime from 00163_column_oriented" | md5sum $CLICKHOUSE_CLIENT -q "SELECT MobilePhoneModel from 00163_column_oriented" | md5sum diff --git a/tests/queries/1_stateful/00167_parallel_parsing_with_names_and_types.reference b/tests/queries/1_stateful/00167_parallel_parsing_with_names_and_types.reference index 0c0367694b2..a2c69c24fa2 100644 --- a/tests/queries/1_stateful/00167_parallel_parsing_with_names_and_types.reference +++ b/tests/queries/1_stateful/00167_parallel_parsing_with_names_and_types.reference @@ -1,20 +1,20 @@ TSVWithNamesAndTypes, false -7c1feeaae418e502d66fcc8e31946f2e - +0bd9fe2bc50147cd260bb58457329385 - TSVWithNamesAndTypes, true -7c1feeaae418e502d66fcc8e31946f2e - +0bd9fe2bc50147cd260bb58457329385 - CSVWithNamesAndTypes, false -7c1feeaae418e502d66fcc8e31946f2e - +0bd9fe2bc50147cd260bb58457329385 - CSVWithNamesAndTypes, true -7c1feeaae418e502d66fcc8e31946f2e - +0bd9fe2bc50147cd260bb58457329385 - JSONStringsEachRow, false -7c1feeaae418e502d66fcc8e31946f2e - +0bd9fe2bc50147cd260bb58457329385 - JSONStringsEachRow, true -7c1feeaae418e502d66fcc8e31946f2e - +0bd9fe2bc50147cd260bb58457329385 - JSONCompactEachRowWithNamesAndTypes, false -7c1feeaae418e502d66fcc8e31946f2e - +0bd9fe2bc50147cd260bb58457329385 - JSONCompactEachRowWithNamesAndTypes, true -7c1feeaae418e502d66fcc8e31946f2e - +0bd9fe2bc50147cd260bb58457329385 - JSONCompactStringsEachRowWithNamesAndTypes, false -7c1feeaae418e502d66fcc8e31946f2e - +0bd9fe2bc50147cd260bb58457329385 - JSONCompactStringsEachRowWithNamesAndTypes, true -7c1feeaae418e502d66fcc8e31946f2e - +0bd9fe2bc50147cd260bb58457329385 - diff --git a/tests/queries/1_stateful/00167_parallel_parsing_with_names_and_types.sh b/tests/queries/1_stateful/00167_parallel_parsing_with_names_and_types.sh index 9fdca20d097..33562918f67 100755 --- a/tests/queries/1_stateful/00167_parallel_parsing_with_names_and_types.sh +++ b/tests/queries/1_stateful/00167_parallel_parsing_with_names_and_types.sh @@ -10,21 +10,21 @@ $CLICKHOUSE_CLIENT -q "DROP TABLE IF EXISTS parsing_with_names" for format in "${FORMATS[@]}" do # Columns are permuted - $CLICKHOUSE_CLIENT -q "CREATE TABLE parsing_with_names(c FixedString(16), a DateTime('Europe/Moscow'), b String) ENGINE=Memory()" + $CLICKHOUSE_CLIENT -q "CREATE TABLE parsing_with_names(c FixedString(16), a DateTime('Asia/Dubai'), b String) ENGINE=Memory()" echo "$format, false"; $CLICKHOUSE_CLIENT --output_format_parallel_formatting=false -q \ - "SELECT URLRegions as d, toTimeZone(ClientEventTime, 'Europe/Moscow') as a, MobilePhoneModel as b, ParamPrice as e, ClientIP6 as c FROM test.hits LIMIT 5000 Format $format" | \ + "SELECT URLRegions as d, toTimeZone(ClientEventTime, 'Asia/Dubai') as a, MobilePhoneModel as b, ParamPrice as e, ClientIP6 as c FROM test.hits LIMIT 5000 Format $format" | \ $CLICKHOUSE_CLIENT --input_format_skip_unknown_fields=1 --input_format_parallel_parsing=false -q "INSERT INTO parsing_with_names FORMAT $format SETTINGS input_format_null_as_default=0" $CLICKHOUSE_CLIENT -q "SELECT * FROM parsing_with_names;" | md5sum $CLICKHOUSE_CLIENT -q "DROP TABLE IF EXISTS parsing_with_names" - $CLICKHOUSE_CLIENT -q "CREATE TABLE parsing_with_names(c FixedString(16), a DateTime('Europe/Moscow'), b String) ENGINE=Memory()" + $CLICKHOUSE_CLIENT -q "CREATE TABLE parsing_with_names(c FixedString(16), a DateTime('Asia/Dubai'), b String) ENGINE=Memory()" echo "$format, true"; $CLICKHOUSE_CLIENT --output_format_parallel_formatting=false -q \ - "SELECT URLRegions as d, toTimeZone(ClientEventTime, 'Europe/Moscow') as a, MobilePhoneModel as b, ParamPrice as e, ClientIP6 as c FROM test.hits LIMIT 5000 Format $format" | \ + "SELECT URLRegions as d, toTimeZone(ClientEventTime, 'Asia/Dubai') as a, MobilePhoneModel as b, ParamPrice as e, ClientIP6 as c FROM test.hits LIMIT 5000 Format $format" | \ $CLICKHOUSE_CLIENT --input_format_skip_unknown_fields=1 --input_format_parallel_parsing=true -q "INSERT INTO parsing_with_names FORMAT $format SETTINGS input_format_null_as_default=0" $CLICKHOUSE_CLIENT -q "SELECT * FROM parsing_with_names;" | md5sum diff --git a/tests/queries/1_stateful/00170_s3_cache.reference b/tests/queries/1_stateful/00170_s3_cache.reference new file mode 100644 index 00000000000..9c9c3bc537f --- /dev/null +++ b/tests/queries/1_stateful/00170_s3_cache.reference @@ -0,0 +1,270 @@ +-- { echo } +SET max_memory_usage='20G'; +SELECT count() FROM test.hits_s3; +8873898 +SELECT count() FROM test.hits_s3 WHERE AdvEngineID != 0; +30641 +SELECT sum(AdvEngineID), count(), avg(ResolutionWidth) FROM test.hits_s3 ; +329039 8873898 1400.8565027454677 +SELECT sum(UserID) FROM test.hits_s3 ; +15358948234638402412 +SELECT uniq(UserID) FROM test.hits_s3 ; +120665 +SELECT uniq(SearchPhrase) FROM test.hits_s3 ; +132591 +SELECT min(EventDate), max(EventDate) FROM test.hits_s3 ; +2014-03-17 2014-03-23 +SELECT AdvEngineID, count() FROM test.hits_s3 WHERE AdvEngineID != 0 GROUP BY AdvEngineID ORDER BY AdvEngineID DESC; +62 7 +61 12 +58 83 +55 281 +52 454 +51 74 +50 353 +49 7 +48 224 +42 72 +41 76 +40 91 +35 2751 +32 141 +30 1832 +24 9 +22 3 +18 3 +16 1019 +12 1 +10 3 +4 10 +3 22948 +2 187 +SELECT RegionID, uniq(UserID) AS u FROM test.hits_s3 GROUP BY RegionID ORDER BY u DESC LIMIT 10; +196 9275 +8363 4624 +15887 4585 +241 4488 +207 3596 +3 3319 +12504 1594 +183 1592 +57 1251 +225 1177 +SELECT RegionID, sum(AdvEngineID), count() AS c, avg(ResolutionWidth), uniq(UserID) FROM test.hits_s3 GROUP BY RegionID ORDER BY c DESC LIMIT 10; +196 32570 1311992 1437.5239170665675 9275 +3 11425 428577 1424.2968801405582 3319 +241 8291 320659 1149.9956152797831 4488 +207 7360 285615 1264.5680093832607 3596 +15887 27514 197463 1392.8657064867848 4585 +8363 26522 197154 1361.9469247390364 4624 +183 13054 186914 1470.3840054784553 1592 +225 1817 164048 1404.8909831268898 1177 +40 1883 107154 1407.6735912798401 808 +57 2146 99424 1200.338721033151 1251 +SELECT MobilePhoneModel, uniq(UserID) AS u FROM test.hits_s3 WHERE MobilePhoneModel != '' GROUP BY MobilePhoneModel ORDER BY u DESC LIMIT 10; +S820_ROW 7616 +iPhone 2 6111 +LG Optimus 4134 +Samsung Galaxy 813 +iPad HD 7 604 +Sams 558 +Samsung Galaxy Note 501 +iPad 2 434 +iPhone S720 393 +iPad 10 FHD 306 +SELECT MobilePhone, MobilePhoneModel, uniq(UserID) AS u FROM test.hits_s3 WHERE MobilePhoneModel != '' GROUP BY MobilePhone, MobilePhoneModel ORDER BY u DESC LIMIT 10; +1 S820_ROW 7613 +7 iPhone 2 5993 +1 LG Optimus 4098 +5 Samsung Galaxy Note 499 +5 Sams 346 +5 Samsung Galaxy 273 +7 iPad HD 7 240 +5 iPad 213 +4 Sams 210 +7 Samsung Galaxy 189 +SELECT uniq(SearchPhrase), count() AS c FROM test.hits_s3 WHERE SearchPhrase != '' GROUP BY SearchPhrase ORDER BY c DESC LIMIT 10; +1 3567 +1 2402 +1 2166 +1 1848 +1 1659 +1 1549 +1 1480 +1 1247 +1 1112 +1 1091 +SELECT uniq(SearchPhrase), uniq(UserID) AS u FROM test.hits_s3 WHERE SearchPhrase != '' GROUP BY SearchPhrase ORDER BY u DESC LIMIT 10; +1 786 +1 479 +1 320 +1 188 +1 181 +1 174 +1 173 +1 162 +1 159 +1 141 +SELECT SearchEngineID, uniq(SearchPhrase), count() AS c FROM test.hits_s3 WHERE SearchPhrase != '' GROUP BY SearchEngineID, SearchPhrase ORDER BY c DESC LIMIT 10; +3 1 3490 +3 1 2166 +3 1 1599 +3 1 1549 +3 1 1530 +3 1 1442 +3 1 1247 +3 1 1112 +3 1 1091 +3 1 1064 +SELECT UserID, count() FROM test.hits_s3 GROUP BY UserID ORDER BY count() DESC LIMIT 10; +1205491256153864188 31519 +3228040076666004453 20688 +2543118835429830843 16329 +1961021224905272484 13484 +4322253409885123546 11186 +2034549784946942048 10970 +397859646441652491 8229 +8032089779962875762 8149 +1839265440135330496 7816 +5548175707459682622 7806 +SELECT UserID, uniq(SearchPhrase) as m, count() as c FROM test.hits_s3 GROUP BY UserID, SearchPhrase ORDER BY UserID, m, c DESC LIMIT 10; +2961521519262 1 56 +87878526839192 1 414 +87878526839192 1 15 +87878526839192 1 6 +87878526839192 1 6 +87878526839192 1 5 +87878526839192 1 5 +87878526839192 1 5 +87878526839192 1 4 +87878526839192 1 3 +SELECT UserID, uniq(SearchPhrase) as m, count() as c FROM test.hits_s3 GROUP BY UserID, SearchPhrase ORDER BY UserID, m, c LIMIT 10; +2961521519262 1 56 +87878526839192 1 1 +87878526839192 1 1 +87878526839192 1 1 +87878526839192 1 2 +87878526839192 1 3 +87878526839192 1 4 +87878526839192 1 5 +87878526839192 1 5 +87878526839192 1 5 +SELECT UserID, toMinute(EventTime) AS m, uniq(SearchPhrase) as u, count() as c FROM test.hits_s3 GROUP BY UserID, m, SearchPhrase ORDER BY UserID DESC LIMIT 10 FORMAT Null; +SELECT UserID FROM test.hits_s3 WHERE UserID = 12345678901234567890; +SELECT count() FROM test.hits_s3 WHERE URL LIKE '%metrika%'; +2348 +SELECT uniq(SearchPhrase) as u, max(URL) as m, count() AS c FROM test.hits_s3 WHERE URL LIKE '%metrika%' AND SearchPhrase != '' GROUP BY SearchPhrase ORDER BY u, m, c DESC LIMIT 10; +1 goal://delive/812metrika.com/kizi-bulochkomna 4 +1 goal://delive/812metrika.com/kizi-bulochkomna 2 +1 goal://delive/812metrika.com/kizi-bulochkomna 2 +1 goal://delive/812metrika.com/kizi-bulochkomna 2 +1 goal://mail.yandex.ru/yrs/ekonometrika/kermosure-batakte 2 +1 http:%2F%2F%2F2014/03/18/cid=54&metrika.com 1 +1 http:%2F%2Ffiles&order=0&metrikancy-podar 1 +1 http:%2F%2Fiteme.metrika 1 +1 http:%2F%2Fproduct/shop.rbc.ru/rostometrikatuvali-k-pensadabuga/nauka_30_m_610_730641%2F01%2Fannovsk/dom-drugie_zhalujsta-s-social 1 +1 http:%2F%2Fwww.kirovanny/donnel_mart]=creative=0&metrika.ru/socialog 1 +SELECT uniq(SearchPhrase), max(URL), max(Title), count() AS c, uniq(UserID) FROM test.hits_s3 WHERE Title LIKE '%Яндекс%' AND URL NOT LIKE '%.yandex.%' AND SearchPhrase != '' GROUP BY SearchPhrase ORDER BY c DESC LIMIT 10; +1 http://korer.ru/categories.ru/?vkb Яндекс: нашлось 184 тыс изображений програница 27 тыс. ответов в России - 1245 1 +1 http://korer.ru/categories.ru/?vkb Яндекс.Картинках, поиск на AVITO.ru • Знакомства вакансии на дом электриса 710 1 +1 http://yandsearch[run][min]=200 одного подаров в Краснодателя » Страница 2 - современно в Яндекс: нашлось 8 мартфонарнажатие и последник Красность рисунки на AVITO.ru. Часы VU+ Uno 696 310 +1 http://korer.ru/categories.ru/?vkb Яндекс: нашем качествует о тебя не следников PRAJNA Cerator.org.com / Shopogody - Read izle, Diva.BY 668 1 +1 http://yandex.ru/chechristana.ru/clck/jsredircnt=1377554 Яндекс.Новости в Санкт-Петербурге: 228-135 тыс. ответов цифр трудников на Весная 572 1 +1 https://dns-state=AiuY0DBWFJ4ePaEs статися водят? - Испании туре за неделки игрушенко — Ирина домашних услуг Россия) - Яндекс: нашлось 236 тыс изображений 546 54 +1 http://korer.ru/categories.ru/?vkb Яндекс.Новоришь всё о купить модели Виннис, ЧП. Соболєв і 457 1 +1 https://my.mail.ru/appliancePotr 芒果 | ТЕЛЕГРАФ - Яндекс.Почта Mail.Ru: Из-за смотреть 439 221 +1 http://korer.ru/categories.ru/?vkb Продажа плании онлайн бесплатно в Яндекс.Маркетинг - новости менеджера, 61 438 1 +1 http://korer.ru/categories.ru/?vkb Яндекс: нашем качестве: почалась 396 Hp) 5-dr 200000 для зимние восписок тили 395 1 +SELECT * FROM test.hits_s3 WHERE URL LIKE '%metrika%' ORDER BY EventTime LIMIT 10 format Null; +SELECT SearchPhrase FROM test.hits_s3 WHERE SearchPhrase != '' ORDER BY EventTime LIMIT 10 FORMAT Null; +SELECT SearchPhrase FROM test.hits_s3 WHERE SearchPhrase != '' ORDER BY SearchPhrase LIMIT 10 FORMAT Null; +SELECT SearchPhrase FROM test.hits_s3 WHERE SearchPhrase != '' ORDER BY EventTime, SearchPhrase LIMIT 10 FORMAT Null; +SELECT CounterID, avg(length(URL)) AS l, count() AS c FROM test.hits_s3 WHERE URL != '' GROUP BY CounterID HAVING c > 100000 ORDER BY l DESC LIMIT 25; +25703952 185.35847185332617 147211 +732797 145.03929351646454 475142 +792887 123.97688315087015 252197 +3807842 78.46108053235935 196033 +1704509 60.11621475966243 523264 +598875 20.267298451681793 337140 +SELECT domainWithoutWWW(Referer) AS key, avg(length(Referer)) AS l, count() AS c, max(Referer) FROM test.hits_s3 WHERE Referer != '' GROUP BY key HAVING c > 100000 ORDER BY l DESC LIMIT 25; +vk.com.ua 670.6812170535467 205447 https://vk.com.ua/health.mail.yandsearch?lr=213&msid=87&redircnt=1310461&with_photorcycle/users/424246b7dcbba51/offers +avito.ru 89.56139198679928 243623 https://avito.ru/стих по биатлона +vk.com 88.93009846053418 680171 https://vk.com/video +yandex.ru 85.79982623523495 554773 https://yandex.ru/yandsearch + 81.39774471008556 2237229 httpvmkNCAErJlhPSHlqdmtsWFc4MXZtLUR1Q3Y9tM8jq5BkkHRyeFVKWTEJ6dE9iQnYCex9 +m.auto.ru 58.542011573622986 118027 https://m.auto.ru/yoshka-sokaklari-60.html#/battle-ru11 +SELECT sum(ResolutionWidth), sum(ResolutionWidth + 1), sum(ResolutionWidth + 2), sum(ResolutionWidth + 3), sum(ResolutionWidth + 4), sum(ResolutionWidth + 5), sum(ResolutionWidth + 6), sum(ResolutionWidth + 7), sum(ResolutionWidth + 8), sum(ResolutionWidth + 9), sum(ResolutionWidth + 10), sum(ResolutionWidth + 11), sum(ResolutionWidth + 12), sum(ResolutionWidth + 13), sum(ResolutionWidth + 14), sum(ResolutionWidth + 15), sum(ResolutionWidth + 16), sum(ResolutionWidth + 17), sum(ResolutionWidth + 18), sum(ResolutionWidth + 19), sum(ResolutionWidth + 20), sum(ResolutionWidth + 21), sum(ResolutionWidth + 22), sum(ResolutionWidth + 23), sum(ResolutionWidth + 24), sum(ResolutionWidth + 25), sum(ResolutionWidth + 26), sum(ResolutionWidth + 27), sum(ResolutionWidth + 28), sum(ResolutionWidth + 29), sum(ResolutionWidth + 30), sum(ResolutionWidth + 31), sum(ResolutionWidth + 32), sum(ResolutionWidth + 33), sum(ResolutionWidth + 34), sum(ResolutionWidth + 35), sum(ResolutionWidth + 36), sum(ResolutionWidth + 37), sum(ResolutionWidth + 38), sum(ResolutionWidth + 39), sum(ResolutionWidth + 40), sum(ResolutionWidth + 41), sum(ResolutionWidth + 42), sum(ResolutionWidth + 43), sum(ResolutionWidth + 44), sum(ResolutionWidth + 45), sum(ResolutionWidth + 46), sum(ResolutionWidth + 47), sum(ResolutionWidth + 48), sum(ResolutionWidth + 49), sum(ResolutionWidth + 50), sum(ResolutionWidth + 51), sum(ResolutionWidth + 52), sum(ResolutionWidth + 53), sum(ResolutionWidth + 54), sum(ResolutionWidth + 55), sum(ResolutionWidth + 56), sum(ResolutionWidth + 57), sum(ResolutionWidth + 58), sum(ResolutionWidth + 59), sum(ResolutionWidth + 60), sum(ResolutionWidth + 61), sum(ResolutionWidth + 62), sum(ResolutionWidth + 63), sum(ResolutionWidth + 64), sum(ResolutionWidth + 65), sum(ResolutionWidth + 66), sum(ResolutionWidth + 67), sum(ResolutionWidth + 68), sum(ResolutionWidth + 69), sum(ResolutionWidth + 70), sum(ResolutionWidth + 71), sum(ResolutionWidth + 72), sum(ResolutionWidth + 73), sum(ResolutionWidth + 74), sum(ResolutionWidth + 75), sum(ResolutionWidth + 76), sum(ResolutionWidth + 77), sum(ResolutionWidth + 78), sum(ResolutionWidth + 79), sum(ResolutionWidth + 80), sum(ResolutionWidth + 81), sum(ResolutionWidth + 82), sum(ResolutionWidth + 83), sum(ResolutionWidth + 84), sum(ResolutionWidth + 85), sum(ResolutionWidth + 86), sum(ResolutionWidth + 87), sum(ResolutionWidth + 88), sum(ResolutionWidth + 89) FROM test.hits_s3; +12431057718 12439931616 12448805514 12457679412 12466553310 12475427208 12484301106 12493175004 12502048902 12510922800 12519796698 12528670596 12537544494 12546418392 12555292290 12564166188 12573040086 12581913984 12590787882 12599661780 12608535678 12617409576 12626283474 12635157372 12644031270 12652905168 12661779066 12670652964 12679526862 12688400760 12697274658 12706148556 12715022454 12723896352 12732770250 12741644148 12750518046 12759391944 12768265842 12777139740 12786013638 12794887536 12803761434 12812635332 12821509230 12830383128 12839257026 12848130924 12857004822 12865878720 12874752618 12883626516 12892500414 12901374312 12910248210 12919122108 12927996006 12936869904 12945743802 12954617700 12963491598 12972365496 12981239394 12990113292 12998987190 13007861088 13016734986 13025608884 13034482782 13043356680 13052230578 13061104476 13069978374 13078852272 13087726170 13096600068 13105473966 13114347864 13123221762 13132095660 13140969558 13149843456 13158717354 13167591252 13176465150 13185339048 13194212946 13203086844 13211960742 13220834640 +SELECT SearchEngineID, ClientIP, count() AS c, sum(Refresh), avg(ResolutionWidth) FROM test.hits_s3 WHERE SearchPhrase != '' GROUP BY SearchEngineID, ClientIP ORDER BY c DESC LIMIT 10; +3 1660732911 2564 21 1339 +3 1795610432 1808 49 1622 +3 442614592 1801 63 1622 +3 280750947 1722 92 1339 +3 1794713726 1565 143 1297 +3 2122160434 1449 29 1846 +3 2120191779 1431 117 1339 +3 3726560380 1338 37 1339 +3 1382059522 1212 25 1386 +3 2454020642 1108 25 1339 +SELECT WatchID, ClientIP, count() AS c, sum(Refresh), avg(ResolutionWidth) FROM test.hits_s3 WHERE SearchPhrase != '' GROUP BY WatchID, ClientIP ORDER BY c, WatchID DESC LIMIT 10; +9223343978848462524 807160513 1 0 1339 +9223311592760478486 622798371 1 0 1622 +9223290551912005343 1399751135 1 0 1386 +9223283743622263900 4248624768 1 0 1339 +9223277679551805964 2079360072 1 0 1639 +9223250576755718785 471654323 1 0 1622 +9223247301332594153 2030669591 1 0 1297 +9223246228500137980 2156909056 1 0 467 +9223227691645120897 91683468 1 0 1846 +9223220893120643152 1357136342 1 0 1297 +SELECT WatchID, ClientIP, count() AS c, sum(Refresh), avg(ResolutionWidth) FROM test.hits_s3 GROUP BY WatchID, ClientIP ORDER BY c, WatchID DESC LIMIT 10; +9223371678237104442 1510763633 1 0 1622 +9223371583739401906 1316647510 1 0 1587 +9223369973176670469 1581144184 1 0 1297 +9223369447059354172 1759910327 1 0 1339 +9223368297061364285 1900808651 1 0 1339 +9223367627527921417 1250879542 1 0 1587 +9223367120605710467 818965311 1 0 1622 +9223365068732217887 287613368 1 0 1386 +9223364444623921469 697478885 1 0 1622 +9223363407092000972 76513606 1 0 1297 +SELECT URL, count() AS c FROM test.hits_s3 GROUP BY URL ORDER BY c DESC LIMIT 10; +http://public_search 311119 +http://auto.ru/chatay-barana.ru/traction.html#maybettaya 189442 +http://korer.ru/categories.ru/?vkb 142669 +http://main=hurriyet.com/iframe/frm_index.ru/photofunki-sayesilcipo-showthredir?from=&seatsTo=&purchynet.com/galaxy-nosti.ru/preso.tv/Archi.shtml?002 122598 +http://korablitz.ru/L_1OFFERS_CRD 45069 +http://bravoslava-230v 32907 +http://images.yandex.ru 22100 +http://doc/00003713844324&education.html?logi-38-rasstreferer_id 21145 +http://rutube.ru/patianu 19064 +http://search?win=11&pos=22&img_url=http:%2F%2Fcs411276 19060 +SELECT 1, URL, count() AS c FROM test.hits_s3 GROUP BY 1, URL ORDER BY c DESC LIMIT 10; +1 http://public_search 311119 +1 http://auto.ru/chatay-barana.ru/traction.html#maybettaya 189442 +1 http://korer.ru/categories.ru/?vkb 142669 +1 http://main=hurriyet.com/iframe/frm_index.ru/photofunki-sayesilcipo-showthredir?from=&seatsTo=&purchynet.com/galaxy-nosti.ru/preso.tv/Archi.shtml?002 122598 +1 http://korablitz.ru/L_1OFFERS_CRD 45069 +1 http://bravoslava-230v 32907 +1 http://images.yandex.ru 22100 +1 http://doc/00003713844324&education.html?logi-38-rasstreferer_id 21145 +1 http://rutube.ru/patianu 19064 +1 http://search?win=11&pos=22&img_url=http:%2F%2Fcs411276 19060 +SELECT ClientIP AS x, x - 1, x - 2, x - 3, count() AS c FROM test.hits_s3 GROUP BY x, x - 1, x - 2, x - 3 ORDER BY c DESC LIMIT 10; +2950145570 2950145569 2950145568 2950145567 8149 +2408492821 2408492820 2408492819 2408492818 7770 +2494028488 2494028487 2494028486 2494028485 7696 +1688720600 1688720599 1688720598 1688720597 7681 +356903718 356903717 356903716 356903715 6817 +908127740 908127739 908127738 908127737 6624 +45907785 45907784 45907783 45907782 6556 +1567954933 1567954932 1567954931 1567954930 6203 +406416527 406416526 406416525 406416524 6015 +1410634230 1410634229 1410634228 1410634227 5742 +SELECT URL, count() AS PageViews FROM test.hits_s3 WHERE CounterID = 62 AND EventDate >= '2013-07-01' AND EventDate <= '2013-07-31' AND NOT DontCountHits AND NOT Refresh AND notEmpty(URL) GROUP BY URL ORDER BY PageViews DESC LIMIT 10; +SELECT Title, count() AS PageViews FROM test.hits_s3 WHERE CounterID = 62 AND EventDate >= '2013-07-01' AND EventDate <= '2013-07-31' AND NOT DontCountHits AND NOT Refresh AND notEmpty(Title) GROUP BY Title ORDER BY PageViews, Title DESC LIMIT 10; +SELECT URL, count() AS PageViews FROM test.hits_s3 WHERE CounterID = 62 AND EventDate >= '2013-07-01' AND EventDate <= '2013-07-31' AND NOT Refresh AND IsLink AND NOT IsDownload GROUP BY URL ORDER BY PageViews DESC LIMIT 1000; +SELECT TraficSourceID, SearchEngineID, AdvEngineID, ((SearchEngineID = 0 AND AdvEngineID = 0) ? Referer : '') AS Src, URL AS Dst, count() AS PageViews FROM test.hits_s3 WHERE CounterID = 62 AND EventDate >= '2013-07-01' AND EventDate <= '2013-07-31' AND NOT Refresh GROUP BY TraficSourceID, SearchEngineID, AdvEngineID, Src, Dst ORDER BY PageViews, TraficSourceID DESC LIMIT 1000; +SELECT URLHash, EventDate, count() AS PageViews FROM test.hits_s3 WHERE CounterID = 62 AND EventDate >= '2013-07-01' AND EventDate <= '2013-07-31' AND NOT Refresh AND TraficSourceID IN (-1, 6) AND RefererHash = halfMD5('http://example.ru/') GROUP BY URLHash, EventDate ORDER BY PageViews DESC LIMIT 100; +SELECT WindowClientWidth, WindowClientHeight, count() AS PageViews FROM test.hits_s3 WHERE CounterID = 62 AND EventDate >= '2013-07-01' AND EventDate <= '2013-07-31' AND NOT Refresh AND NOT DontCountHits AND URLHash = halfMD5('http://example.ru/') GROUP BY WindowClientWidth, WindowClientHeight ORDER BY PageViews DESC LIMIT 10000; +SELECT toStartOfMinute(EventTime) AS Minute, count() AS PageViews FROM test.hits_s3 WHERE CounterID = 62 AND EventDate >= '2013-07-01' AND EventDate <= '2013-07-02' AND NOT Refresh AND NOT DontCountHits GROUP BY Minute ORDER BY Minute; diff --git a/tests/queries/1_stateful/00170_s3_cache.sql b/tests/queries/1_stateful/00170_s3_cache.sql new file mode 100644 index 00000000000..af3fd402596 --- /dev/null +++ b/tests/queries/1_stateful/00170_s3_cache.sql @@ -0,0 +1,45 @@ +-- { echo } +SET max_memory_usage='20G'; +SELECT count() FROM test.hits_s3; +SELECT count() FROM test.hits_s3 WHERE AdvEngineID != 0; +SELECT sum(AdvEngineID), count(), avg(ResolutionWidth) FROM test.hits_s3 ; +SELECT sum(UserID) FROM test.hits_s3 ; +SELECT uniq(UserID) FROM test.hits_s3 ; +SELECT uniq(SearchPhrase) FROM test.hits_s3 ; +SELECT min(EventDate), max(EventDate) FROM test.hits_s3 ; +SELECT AdvEngineID, count() FROM test.hits_s3 WHERE AdvEngineID != 0 GROUP BY AdvEngineID ORDER BY AdvEngineID DESC; +SELECT RegionID, uniq(UserID) AS u FROM test.hits_s3 GROUP BY RegionID ORDER BY u DESC LIMIT 10; +SELECT RegionID, sum(AdvEngineID), count() AS c, avg(ResolutionWidth), uniq(UserID) FROM test.hits_s3 GROUP BY RegionID ORDER BY c DESC LIMIT 10; +SELECT MobilePhoneModel, uniq(UserID) AS u FROM test.hits_s3 WHERE MobilePhoneModel != '' GROUP BY MobilePhoneModel ORDER BY u DESC LIMIT 10; +SELECT MobilePhone, MobilePhoneModel, uniq(UserID) AS u FROM test.hits_s3 WHERE MobilePhoneModel != '' GROUP BY MobilePhone, MobilePhoneModel ORDER BY u DESC LIMIT 10; +SELECT uniq(SearchPhrase), count() AS c FROM test.hits_s3 WHERE SearchPhrase != '' GROUP BY SearchPhrase ORDER BY c DESC LIMIT 10; +SELECT uniq(SearchPhrase), uniq(UserID) AS u FROM test.hits_s3 WHERE SearchPhrase != '' GROUP BY SearchPhrase ORDER BY u DESC LIMIT 10; +SELECT SearchEngineID, uniq(SearchPhrase), count() AS c FROM test.hits_s3 WHERE SearchPhrase != '' GROUP BY SearchEngineID, SearchPhrase ORDER BY c DESC LIMIT 10; +SELECT UserID, count() FROM test.hits_s3 GROUP BY UserID ORDER BY count() DESC LIMIT 10; +SELECT UserID, uniq(SearchPhrase) as m, count() as c FROM test.hits_s3 GROUP BY UserID, SearchPhrase ORDER BY UserID, m, c DESC LIMIT 10; +SELECT UserID, uniq(SearchPhrase) as m, count() as c FROM test.hits_s3 GROUP BY UserID, SearchPhrase ORDER BY UserID, m, c LIMIT 10; +SELECT UserID, toMinute(EventTime) AS m, uniq(SearchPhrase) as u, count() as c FROM test.hits_s3 GROUP BY UserID, m, SearchPhrase ORDER BY UserID DESC LIMIT 10 FORMAT Null; +SELECT UserID FROM test.hits_s3 WHERE UserID = 12345678901234567890; +SELECT count() FROM test.hits_s3 WHERE URL LIKE '%metrika%'; +SELECT uniq(SearchPhrase) as u, max(URL) as m, count() AS c FROM test.hits_s3 WHERE URL LIKE '%metrika%' AND SearchPhrase != '' GROUP BY SearchPhrase ORDER BY u, m, c DESC LIMIT 10; +SELECT uniq(SearchPhrase), max(URL), max(Title), count() AS c, uniq(UserID) FROM test.hits_s3 WHERE Title LIKE '%Яндекс%' AND URL NOT LIKE '%.yandex.%' AND SearchPhrase != '' GROUP BY SearchPhrase ORDER BY c DESC LIMIT 10; +SELECT * FROM test.hits_s3 WHERE URL LIKE '%metrika%' ORDER BY EventTime LIMIT 10 format Null; +SELECT SearchPhrase FROM test.hits_s3 WHERE SearchPhrase != '' ORDER BY EventTime LIMIT 10 FORMAT Null; +SELECT SearchPhrase FROM test.hits_s3 WHERE SearchPhrase != '' ORDER BY SearchPhrase LIMIT 10 FORMAT Null; +SELECT SearchPhrase FROM test.hits_s3 WHERE SearchPhrase != '' ORDER BY EventTime, SearchPhrase LIMIT 10 FORMAT Null; +SELECT CounterID, avg(length(URL)) AS l, count() AS c FROM test.hits_s3 WHERE URL != '' GROUP BY CounterID HAVING c > 100000 ORDER BY l DESC LIMIT 25; +SELECT domainWithoutWWW(Referer) AS key, avg(length(Referer)) AS l, count() AS c, max(Referer) FROM test.hits_s3 WHERE Referer != '' GROUP BY key HAVING c > 100000 ORDER BY l DESC LIMIT 25; +SELECT sum(ResolutionWidth), sum(ResolutionWidth + 1), sum(ResolutionWidth + 2), sum(ResolutionWidth + 3), sum(ResolutionWidth + 4), sum(ResolutionWidth + 5), sum(ResolutionWidth + 6), sum(ResolutionWidth + 7), sum(ResolutionWidth + 8), sum(ResolutionWidth + 9), sum(ResolutionWidth + 10), sum(ResolutionWidth + 11), sum(ResolutionWidth + 12), sum(ResolutionWidth + 13), sum(ResolutionWidth + 14), sum(ResolutionWidth + 15), sum(ResolutionWidth + 16), sum(ResolutionWidth + 17), sum(ResolutionWidth + 18), sum(ResolutionWidth + 19), sum(ResolutionWidth + 20), sum(ResolutionWidth + 21), sum(ResolutionWidth + 22), sum(ResolutionWidth + 23), sum(ResolutionWidth + 24), sum(ResolutionWidth + 25), sum(ResolutionWidth + 26), sum(ResolutionWidth + 27), sum(ResolutionWidth + 28), sum(ResolutionWidth + 29), sum(ResolutionWidth + 30), sum(ResolutionWidth + 31), sum(ResolutionWidth + 32), sum(ResolutionWidth + 33), sum(ResolutionWidth + 34), sum(ResolutionWidth + 35), sum(ResolutionWidth + 36), sum(ResolutionWidth + 37), sum(ResolutionWidth + 38), sum(ResolutionWidth + 39), sum(ResolutionWidth + 40), sum(ResolutionWidth + 41), sum(ResolutionWidth + 42), sum(ResolutionWidth + 43), sum(ResolutionWidth + 44), sum(ResolutionWidth + 45), sum(ResolutionWidth + 46), sum(ResolutionWidth + 47), sum(ResolutionWidth + 48), sum(ResolutionWidth + 49), sum(ResolutionWidth + 50), sum(ResolutionWidth + 51), sum(ResolutionWidth + 52), sum(ResolutionWidth + 53), sum(ResolutionWidth + 54), sum(ResolutionWidth + 55), sum(ResolutionWidth + 56), sum(ResolutionWidth + 57), sum(ResolutionWidth + 58), sum(ResolutionWidth + 59), sum(ResolutionWidth + 60), sum(ResolutionWidth + 61), sum(ResolutionWidth + 62), sum(ResolutionWidth + 63), sum(ResolutionWidth + 64), sum(ResolutionWidth + 65), sum(ResolutionWidth + 66), sum(ResolutionWidth + 67), sum(ResolutionWidth + 68), sum(ResolutionWidth + 69), sum(ResolutionWidth + 70), sum(ResolutionWidth + 71), sum(ResolutionWidth + 72), sum(ResolutionWidth + 73), sum(ResolutionWidth + 74), sum(ResolutionWidth + 75), sum(ResolutionWidth + 76), sum(ResolutionWidth + 77), sum(ResolutionWidth + 78), sum(ResolutionWidth + 79), sum(ResolutionWidth + 80), sum(ResolutionWidth + 81), sum(ResolutionWidth + 82), sum(ResolutionWidth + 83), sum(ResolutionWidth + 84), sum(ResolutionWidth + 85), sum(ResolutionWidth + 86), sum(ResolutionWidth + 87), sum(ResolutionWidth + 88), sum(ResolutionWidth + 89) FROM test.hits_s3; +SELECT SearchEngineID, ClientIP, count() AS c, sum(Refresh), avg(ResolutionWidth) FROM test.hits_s3 WHERE SearchPhrase != '' GROUP BY SearchEngineID, ClientIP ORDER BY c DESC LIMIT 10; +SELECT WatchID, ClientIP, count() AS c, sum(Refresh), avg(ResolutionWidth) FROM test.hits_s3 WHERE SearchPhrase != '' GROUP BY WatchID, ClientIP ORDER BY c, WatchID DESC LIMIT 10; +SELECT WatchID, ClientIP, count() AS c, sum(Refresh), avg(ResolutionWidth) FROM test.hits_s3 GROUP BY WatchID, ClientIP ORDER BY c, WatchID DESC LIMIT 10; +SELECT URL, count() AS c FROM test.hits_s3 GROUP BY URL ORDER BY c DESC LIMIT 10; +SELECT 1, URL, count() AS c FROM test.hits_s3 GROUP BY 1, URL ORDER BY c DESC LIMIT 10; +SELECT ClientIP AS x, x - 1, x - 2, x - 3, count() AS c FROM test.hits_s3 GROUP BY x, x - 1, x - 2, x - 3 ORDER BY c DESC LIMIT 10; +SELECT URL, count() AS PageViews FROM test.hits_s3 WHERE CounterID = 62 AND EventDate >= '2013-07-01' AND EventDate <= '2013-07-31' AND NOT DontCountHits AND NOT Refresh AND notEmpty(URL) GROUP BY URL ORDER BY PageViews DESC LIMIT 10; +SELECT Title, count() AS PageViews FROM test.hits_s3 WHERE CounterID = 62 AND EventDate >= '2013-07-01' AND EventDate <= '2013-07-31' AND NOT DontCountHits AND NOT Refresh AND notEmpty(Title) GROUP BY Title ORDER BY PageViews, Title DESC LIMIT 10; +SELECT URL, count() AS PageViews FROM test.hits_s3 WHERE CounterID = 62 AND EventDate >= '2013-07-01' AND EventDate <= '2013-07-31' AND NOT Refresh AND IsLink AND NOT IsDownload GROUP BY URL ORDER BY PageViews DESC LIMIT 1000; +SELECT TraficSourceID, SearchEngineID, AdvEngineID, ((SearchEngineID = 0 AND AdvEngineID = 0) ? Referer : '') AS Src, URL AS Dst, count() AS PageViews FROM test.hits_s3 WHERE CounterID = 62 AND EventDate >= '2013-07-01' AND EventDate <= '2013-07-31' AND NOT Refresh GROUP BY TraficSourceID, SearchEngineID, AdvEngineID, Src, Dst ORDER BY PageViews, TraficSourceID DESC LIMIT 1000; +SELECT URLHash, EventDate, count() AS PageViews FROM test.hits_s3 WHERE CounterID = 62 AND EventDate >= '2013-07-01' AND EventDate <= '2013-07-31' AND NOT Refresh AND TraficSourceID IN (-1, 6) AND RefererHash = halfMD5('http://example.ru/') GROUP BY URLHash, EventDate ORDER BY PageViews DESC LIMIT 100; +SELECT WindowClientWidth, WindowClientHeight, count() AS PageViews FROM test.hits_s3 WHERE CounterID = 62 AND EventDate >= '2013-07-01' AND EventDate <= '2013-07-31' AND NOT Refresh AND NOT DontCountHits AND URLHash = halfMD5('http://example.ru/') GROUP BY WindowClientWidth, WindowClientHeight ORDER BY PageViews DESC LIMIT 10000; +SELECT toStartOfMinute(EventTime) AS Minute, count() AS PageViews FROM test.hits_s3 WHERE CounterID = 62 AND EventDate >= '2013-07-01' AND EventDate <= '2013-07-02' AND NOT Refresh AND NOT DontCountHits GROUP BY Minute ORDER BY Minute; diff --git a/tests/queries/bugs/position_case_insensitive_utf8.sql b/tests/queries/bugs/position_case_insensitive_utf8.sql deleted file mode 100644 index 00ddd1b498d..00000000000 --- a/tests/queries/bugs/position_case_insensitive_utf8.sql +++ /dev/null @@ -1,2 +0,0 @@ -SELECT positionCaseInsensitiveUTF8('Hello', materialize('%\xF0%')); -SELECT positionCaseInsensitiveUTF8(materialize('Hello'), '%\xF0%') FROM numbers(1000); diff --git a/tests/testflows/aes_encryption/configs/clickhouse/common.xml b/tests/testflows/aes_encryption/configs/clickhouse/common.xml deleted file mode 100644 index 31fa972199f..00000000000 --- a/tests/testflows/aes_encryption/configs/clickhouse/common.xml +++ /dev/null @@ -1,6 +0,0 @@ - - Europe/Moscow - 0.0.0.0 - /var/lib/clickhouse/ - /var/lib/clickhouse/tmp/ - diff --git a/tests/testflows/aes_encryption/configs/clickhouse/config.xml b/tests/testflows/aes_encryption/configs/clickhouse/config.xml deleted file mode 100644 index 9854f9f990e..00000000000 --- a/tests/testflows/aes_encryption/configs/clickhouse/config.xml +++ /dev/null @@ -1,436 +0,0 @@ - - - - - - trace - /var/log/clickhouse-server/clickhouse-server.log - /var/log/clickhouse-server/clickhouse-server.err.log - 1000M - 10 - - - - 8123 - 9000 - - - - - - - - - /etc/clickhouse-server/server.crt - /etc/clickhouse-server/server.key - - /etc/clickhouse-server/dhparam.pem - none - true - true - sslv2,sslv3 - true - - - - true - true - sslv2,sslv3 - true - - - - RejectCertificateHandler - - - - - - - - - 9009 - - - - - - - - - - - - - - - - - - - - 4096 - 3 - - - 100 - - - - - - 8589934592 - - - 5368709120 - - - - /var/lib/clickhouse/ - - - /var/lib/clickhouse/tmp/ - - - /var/lib/clickhouse/user_files/ - - - /var/lib/clickhouse/access/ - - - users.xml - - - default - - - - - - default - - - - - - - - - false - - - - - - - - localhost - 9000 - - - - - - - localhost - 9000 - - - - - localhost - 9000 - - - - - - - localhost - 9440 - 1 - - - - - - - localhost - 9000 - - - - - localhost - 1 - - - - - - - - - - - - - - - - - 3600 - - - - 3600 - - - 60 - - - - - - - - - - system - query_log
- - toYYYYMM(event_date) - - 7500 -
- - - - system - trace_log
- - toYYYYMM(event_date) - 7500 -
- - - - system - query_thread_log
- toYYYYMM(event_date) - 7500 -
- - - - system - part_log
- toYYYYMM(event_date) - 7500 -
- - - - - - - - - - - - - - *_dictionary.xml - - - - - - - - - - /clickhouse/task_queue/ddl - - - - - - - - - - - - - - - - click_cost - any - - 0 - 3600 - - - 86400 - 7200 - - - - max - - 0 - 60 - - - 3600 - 300 - - - 86400 - 3600 - - - - - - /var/lib/clickhouse/format_schemas/ - - - -
diff --git a/tests/testflows/aes_encryption/configs/clickhouse/users.xml b/tests/testflows/aes_encryption/configs/clickhouse/users.xml deleted file mode 100644 index c7d0ecae693..00000000000 --- a/tests/testflows/aes_encryption/configs/clickhouse/users.xml +++ /dev/null @@ -1,133 +0,0 @@ - - - - - - - - 10000000000 - - - 0 - - - random - - - - - 1 - - - - - - - - - - - - - ::/0 - - - - default - - - default - - - 1 - - - - - - - - - - - - - - - - - 3600 - - - 0 - 0 - 0 - 0 - 0 - - - - diff --git a/tests/testflows/datetime64_extended_range/configs/clickhouse/common.xml b/tests/testflows/datetime64_extended_range/configs/clickhouse/common.xml deleted file mode 100644 index 31fa972199f..00000000000 --- a/tests/testflows/datetime64_extended_range/configs/clickhouse/common.xml +++ /dev/null @@ -1,6 +0,0 @@ - - Europe/Moscow - 0.0.0.0 - /var/lib/clickhouse/ - /var/lib/clickhouse/tmp/ - diff --git a/tests/testflows/datetime64_extended_range/configs/clickhouse/config.xml b/tests/testflows/datetime64_extended_range/configs/clickhouse/config.xml deleted file mode 100644 index a9a37875273..00000000000 --- a/tests/testflows/datetime64_extended_range/configs/clickhouse/config.xml +++ /dev/null @@ -1,436 +0,0 @@ - - - - - - trace - /var/log/clickhouse-server/clickhouse-server.log - /var/log/clickhouse-server/clickhouse-server.err.log - 1000M - 10 - - - - 8123 - 9000 - - - - - - - - - /etc/clickhouse-server/server.crt - /etc/clickhouse-server/server.key - - /etc/clickhouse-server/dhparam.pem - none - true - true - sslv2,sslv3 - true - - - - true - true - sslv2,sslv3 - true - - - - RejectCertificateHandler - - - - - - - - - 9009 - - - - - - - - - - - - - - - - - - - - 4096 - 3 - - - 100 - - - - - - 8589934592 - - - 5368709120 - - - - /var/lib/clickhouse/ - - - /var/lib/clickhouse/tmp/ - - - /var/lib/clickhouse/user_files/ - - - /var/lib/clickhouse/access/ - - - users.xml - - - default - - - - - - default - - - - - - - - - false - - - - - - - - localhost - 9000 - - - - - - - localhost - 9000 - - - - - localhost - 9000 - - - - - - - localhost - 9440 - 1 - - - - - - - localhost - 9000 - - - - - localhost - 1 - - - - - - - - - - - - - - - - - 3600 - - - - 3600 - - - 60 - - - - - - - - - - system - query_log
- - toYYYYMM(event_date) - - 7500 -
- - - - system - trace_log
- - toYYYYMM(event_date) - 7500 -
- - - - system - query_thread_log
- toYYYYMM(event_date) - 7500 -
- - - - system - part_log
- toYYYYMM(event_date) - 7500 -
- - - - - - - - - - - - - - *_dictionary.xml - - - - - - - - - - /clickhouse/task_queue/ddl - - - - - - - - - - - - - - - - click_cost - any - - 0 - 3600 - - - 86400 - 60 - - - - max - - 0 - 60 - - - 3600 - 300 - - - 86400 - 3600 - - - - - - /var/lib/clickhouse/format_schemas/ - - - -
diff --git a/tests/testflows/datetime64_extended_range/configs/clickhouse/users.xml b/tests/testflows/datetime64_extended_range/configs/clickhouse/users.xml deleted file mode 100644 index c7d0ecae693..00000000000 --- a/tests/testflows/datetime64_extended_range/configs/clickhouse/users.xml +++ /dev/null @@ -1,133 +0,0 @@ - - - - - - - - 10000000000 - - - 0 - - - random - - - - - 1 - - - - - - - - - - - - - ::/0 - - - - default - - - default - - - 1 - - - - - - - - - - - - - - - - - 3600 - - - 0 - 0 - 0 - 0 - 0 - - - - diff --git a/tests/testflows/example/configs/clickhouse/common.xml b/tests/testflows/example/configs/clickhouse/common.xml deleted file mode 100644 index 31fa972199f..00000000000 --- a/tests/testflows/example/configs/clickhouse/common.xml +++ /dev/null @@ -1,6 +0,0 @@ - - Europe/Moscow - 0.0.0.0 - /var/lib/clickhouse/ - /var/lib/clickhouse/tmp/ - diff --git a/tests/testflows/example/configs/clickhouse/config.xml b/tests/testflows/example/configs/clickhouse/config.xml deleted file mode 100644 index 9854f9f990e..00000000000 --- a/tests/testflows/example/configs/clickhouse/config.xml +++ /dev/null @@ -1,436 +0,0 @@ - - - - - - trace - /var/log/clickhouse-server/clickhouse-server.log - /var/log/clickhouse-server/clickhouse-server.err.log - 1000M - 10 - - - - 8123 - 9000 - - - - - - - - - /etc/clickhouse-server/server.crt - /etc/clickhouse-server/server.key - - /etc/clickhouse-server/dhparam.pem - none - true - true - sslv2,sslv3 - true - - - - true - true - sslv2,sslv3 - true - - - - RejectCertificateHandler - - - - - - - - - 9009 - - - - - - - - - - - - - - - - - - - - 4096 - 3 - - - 100 - - - - - - 8589934592 - - - 5368709120 - - - - /var/lib/clickhouse/ - - - /var/lib/clickhouse/tmp/ - - - /var/lib/clickhouse/user_files/ - - - /var/lib/clickhouse/access/ - - - users.xml - - - default - - - - - - default - - - - - - - - - false - - - - - - - - localhost - 9000 - - - - - - - localhost - 9000 - - - - - localhost - 9000 - - - - - - - localhost - 9440 - 1 - - - - - - - localhost - 9000 - - - - - localhost - 1 - - - - - - - - - - - - - - - - - 3600 - - - - 3600 - - - 60 - - - - - - - - - - system - query_log
- - toYYYYMM(event_date) - - 7500 -
- - - - system - trace_log
- - toYYYYMM(event_date) - 7500 -
- - - - system - query_thread_log
- toYYYYMM(event_date) - 7500 -
- - - - system - part_log
- toYYYYMM(event_date) - 7500 -
- - - - - - - - - - - - - - *_dictionary.xml - - - - - - - - - - /clickhouse/task_queue/ddl - - - - - - - - - - - - - - - - click_cost - any - - 0 - 3600 - - - 86400 - 7200 - - - - max - - 0 - 60 - - - 3600 - 300 - - - 86400 - 3600 - - - - - - /var/lib/clickhouse/format_schemas/ - - - -
diff --git a/tests/testflows/example/configs/clickhouse/users.xml b/tests/testflows/example/configs/clickhouse/users.xml deleted file mode 100644 index c7d0ecae693..00000000000 --- a/tests/testflows/example/configs/clickhouse/users.xml +++ /dev/null @@ -1,133 +0,0 @@ - - - - - - - - 10000000000 - - - 0 - - - random - - - - - 1 - - - - - - - - - - - - - ::/0 - - - - default - - - default - - - 1 - - - - - - - - - - - - - - - - - 3600 - - - 0 - 0 - 0 - 0 - 0 - - - - diff --git a/tests/testflows/extended_precision_data_types/configs/clickhouse/common.xml b/tests/testflows/extended_precision_data_types/configs/clickhouse/common.xml deleted file mode 100644 index 0ba01589b90..00000000000 --- a/tests/testflows/extended_precision_data_types/configs/clickhouse/common.xml +++ /dev/null @@ -1,6 +0,0 @@ - - Europe/Moscow - :: - /var/lib/clickhouse/ - /var/lib/clickhouse/tmp/ - diff --git a/tests/testflows/extended_precision_data_types/configs/clickhouse/config.xml b/tests/testflows/extended_precision_data_types/configs/clickhouse/config.xml deleted file mode 100644 index 842a0573d49..00000000000 --- a/tests/testflows/extended_precision_data_types/configs/clickhouse/config.xml +++ /dev/null @@ -1,448 +0,0 @@ - - - - - - trace - /var/log/clickhouse-server/clickhouse-server.log - /var/log/clickhouse-server/clickhouse-server.err.log - 1000M - 10 - - - - 8123 - 9000 - - - - - - - - - /etc/clickhouse-server/server.crt - /etc/clickhouse-server/server.key - - /etc/clickhouse-server/dhparam.pem - none - true - true - sslv2,sslv3 - true - - - - true - true - sslv2,sslv3 - true - - - - RejectCertificateHandler - - - - - - - - - 9009 - - - - - - - - 0.0.0.0 - - - - - - - - - - - - 4096 - 3 - - - 100 - - - - - - 8589934592 - - - 5368709120 - - - - /var/lib/clickhouse/ - - - /var/lib/clickhouse/tmp/ - - - /var/lib/clickhouse/user_files/ - - - /var/lib/clickhouse/access/ - - - - - - users.xml - - - - /var/lib/clickhouse/access/ - - - - - users.xml - - - default - - - - - - default - - - - - - - - - false - - - - - - - - localhost - 9000 - - - - - - - localhost - 9000 - - - - - localhost - 9000 - - - - - - - localhost - 9440 - 1 - - - - - - - localhost - 9000 - - - - - localhost - 1 - - - - - - - - - - - - - - - - - 3600 - - - - 3600 - - - 60 - - - - - - - - - - system - query_log
- - toYYYYMM(event_date) - - 7500 -
- - - - system - trace_log
- - toYYYYMM(event_date) - 7500 -
- - - - system - query_thread_log
- toYYYYMM(event_date) - 7500 -
- - - - system - part_log
- toYYYYMM(event_date) - 7500 -
- - - - - - - - - - - - - - *_dictionary.xml - - - - - - - - - - /clickhouse/task_queue/ddl - - - - - - - - - - - - - - - - click_cost - any - - 0 - 3600 - - - 86400 - 60 - - - - max - - 0 - 60 - - - 3600 - 300 - - - 86400 - 3600 - - - - - - /var/lib/clickhouse/format_schemas/ - - - -
diff --git a/tests/testflows/extended_precision_data_types/configs/clickhouse/users.xml b/tests/testflows/extended_precision_data_types/configs/clickhouse/users.xml deleted file mode 100644 index c7d0ecae693..00000000000 --- a/tests/testflows/extended_precision_data_types/configs/clickhouse/users.xml +++ /dev/null @@ -1,133 +0,0 @@ - - - - - - - - 10000000000 - - - 0 - - - random - - - - - 1 - - - - - - - - - - - - - ::/0 - - - - default - - - default - - - 1 - - - - - - - - - - - - - - - - - 3600 - - - 0 - 0 - 0 - 0 - 0 - - - - diff --git a/tests/testflows/kerberos/configs/clickhouse/common.xml b/tests/testflows/kerberos/configs/clickhouse/common.xml deleted file mode 100644 index 31fa972199f..00000000000 --- a/tests/testflows/kerberos/configs/clickhouse/common.xml +++ /dev/null @@ -1,6 +0,0 @@ - - Europe/Moscow - 0.0.0.0 - /var/lib/clickhouse/ - /var/lib/clickhouse/tmp/ - diff --git a/tests/testflows/kerberos/configs/clickhouse/config.xml b/tests/testflows/kerberos/configs/clickhouse/config.xml deleted file mode 100644 index 0d2904eed48..00000000000 --- a/tests/testflows/kerberos/configs/clickhouse/config.xml +++ /dev/null @@ -1,440 +0,0 @@ - - - - - - trace - /var/log/clickhouse-server/clickhouse-server.log - /var/log/clickhouse-server/clickhouse-server.err.log - 1000M - 10 - - - - 8123 - 9000 - - - - - - - - - /etc/clickhouse-server/server.crt - /etc/clickhouse-server/server.key - - /etc/clickhouse-server/dhparam.pem - none - true - true - sslv2,sslv3 - true - - - - true - true - sslv2,sslv3 - true - - - - RejectCertificateHandler - - - - - - - - - 9009 - - - - - - - - 0.0.0.0 - - - - - - - - - - - - 4096 - 3 - - - 100 - - - - - - 8589934592 - - - 5368709120 - - - - /var/lib/clickhouse/ - - - /var/lib/clickhouse/tmp/ - - - /var/lib/clickhouse/user_files/ - - - - - - users.xml - - - - /var/lib/clickhouse/access/ - - - - - default - - - - - - default - - - - - - - - - false - - - - - - - - localhost - 9000 - - - - - - - localhost - 9000 - - - - - localhost - 9000 - - - - - - - localhost - 9440 - 1 - - - - - - - localhost - 9000 - - - - - localhost - 1 - - - - - - - - - - - - - - - - - 3600 - - - - 3600 - - - 60 - - - - - - - - - - system - query_log
- - toYYYYMM(event_date) - - 7500 -
- - - - system - trace_log
- - toYYYYMM(event_date) - 7500 -
- - - - system - query_thread_log
- toYYYYMM(event_date) - 7500 -
- - - - system - part_log
- toYYYYMM(event_date) - 7500 -
- - - - - - - - - - - - - - *_dictionary.xml - - - - - - - - - - /clickhouse/task_queue/ddl - - - - - - - - - - - - - - - - click_cost - any - - 0 - 3600 - - - 86400 - 60 - - - - max - - 0 - 60 - - - 3600 - 300 - - - 86400 - 3600 - - - - - - /var/lib/clickhouse/format_schemas/ - - - -
diff --git a/tests/testflows/kerberos/configs/clickhouse/users.xml b/tests/testflows/kerberos/configs/clickhouse/users.xml deleted file mode 100644 index c7d0ecae693..00000000000 --- a/tests/testflows/kerberos/configs/clickhouse/users.xml +++ /dev/null @@ -1,133 +0,0 @@ - - - - - - - - 10000000000 - - - 0 - - - random - - - - - 1 - - - - - - - - - - - - - ::/0 - - - - default - - - default - - - 1 - - - - - - - - - - - - - - - - - 3600 - - - 0 - 0 - 0 - 0 - 0 - - - - diff --git a/tests/testflows/ldap/authentication/configs/clickhouse/common.xml b/tests/testflows/ldap/authentication/configs/clickhouse/common.xml deleted file mode 100644 index 31fa972199f..00000000000 --- a/tests/testflows/ldap/authentication/configs/clickhouse/common.xml +++ /dev/null @@ -1,6 +0,0 @@ - - Europe/Moscow - 0.0.0.0 - /var/lib/clickhouse/ - /var/lib/clickhouse/tmp/ - diff --git a/tests/testflows/ldap/authentication/configs/clickhouse/config.xml b/tests/testflows/ldap/authentication/configs/clickhouse/config.xml deleted file mode 100644 index 53ffa10384e..00000000000 --- a/tests/testflows/ldap/authentication/configs/clickhouse/config.xml +++ /dev/null @@ -1,442 +0,0 @@ - - - - - - trace - /var/log/clickhouse-server/clickhouse-server.log - /var/log/clickhouse-server/clickhouse-server.err.log - 1000M - 10 - - - - 8123 - 9000 - - - - - - - - - /etc/clickhouse-server/server.crt - /etc/clickhouse-server/server.key - - /etc/clickhouse-server/dhparam.pem - none - true - true - sslv2,sslv3 - true - - - - true - true - sslv2,sslv3 - true - - - - RejectCertificateHandler - - - - - - - - - 9009 - - - - - - - - - - - - - - - - - - - - 4096 - 3 - - - 100 - - - - - - 8589934592 - - - 5368709120 - - - - /var/lib/clickhouse/ - - - /var/lib/clickhouse/tmp/ - - - /var/lib/clickhouse/user_files/ - - - - - - users.xml - - - - /var/lib/clickhouse/access/ - - - - - default - - - - - - default - - - - - - - - - false - - - - - - - - localhost - 9000 - - - - - - - localhost - 9000 - - - - - localhost - 9000 - - - - - - - localhost - 9440 - 1 - - - - - - - localhost - 9000 - - - - - localhost - 1 - - - - - - - - - - - - - - - - - 3600 - - - - 3600 - - - 60 - - - - - - - - - - system - query_log
- - toYYYYMM(event_date) - - 7500 -
- - - - system - trace_log
- - toYYYYMM(event_date) - 7500 -
- - - - system - query_thread_log
- toYYYYMM(event_date) - 7500 -
- - - - system - part_log
- toYYYYMM(event_date) - 7500 -
- - - - - - - - - - - - - - *_dictionary.xml - - - - - - - - - - /clickhouse/task_queue/ddl - - - - - - - - - - - - - - - - click_cost - any - - 0 - 3600 - - - 86400 - 7200 - - - - max - - 0 - 60 - - - 3600 - 300 - - - 86400 - 3600 - - - - - - /var/lib/clickhouse/format_schemas/ - - - -
diff --git a/tests/testflows/ldap/authentication/configs/clickhouse/users.xml b/tests/testflows/ldap/authentication/configs/clickhouse/users.xml deleted file mode 100644 index c7d0ecae693..00000000000 --- a/tests/testflows/ldap/authentication/configs/clickhouse/users.xml +++ /dev/null @@ -1,133 +0,0 @@ - - - - - - - - 10000000000 - - - 0 - - - random - - - - - 1 - - - - - - - - - - - - - ::/0 - - - - default - - - default - - - 1 - - - - - - - - - - - - - - - - - 3600 - - - 0 - 0 - 0 - 0 - 0 - - - - diff --git a/tests/testflows/ldap/external_user_directory/configs/clickhouse/common.xml b/tests/testflows/ldap/external_user_directory/configs/clickhouse/common.xml deleted file mode 100644 index 31fa972199f..00000000000 --- a/tests/testflows/ldap/external_user_directory/configs/clickhouse/common.xml +++ /dev/null @@ -1,6 +0,0 @@ - - Europe/Moscow - 0.0.0.0 - /var/lib/clickhouse/ - /var/lib/clickhouse/tmp/ - diff --git a/tests/testflows/ldap/external_user_directory/configs/clickhouse/config.xml b/tests/testflows/ldap/external_user_directory/configs/clickhouse/config.xml deleted file mode 100644 index 53ffa10384e..00000000000 --- a/tests/testflows/ldap/external_user_directory/configs/clickhouse/config.xml +++ /dev/null @@ -1,442 +0,0 @@ - - - - - - trace - /var/log/clickhouse-server/clickhouse-server.log - /var/log/clickhouse-server/clickhouse-server.err.log - 1000M - 10 - - - - 8123 - 9000 - - - - - - - - - /etc/clickhouse-server/server.crt - /etc/clickhouse-server/server.key - - /etc/clickhouse-server/dhparam.pem - none - true - true - sslv2,sslv3 - true - - - - true - true - sslv2,sslv3 - true - - - - RejectCertificateHandler - - - - - - - - - 9009 - - - - - - - - - - - - - - - - - - - - 4096 - 3 - - - 100 - - - - - - 8589934592 - - - 5368709120 - - - - /var/lib/clickhouse/ - - - /var/lib/clickhouse/tmp/ - - - /var/lib/clickhouse/user_files/ - - - - - - users.xml - - - - /var/lib/clickhouse/access/ - - - - - default - - - - - - default - - - - - - - - - false - - - - - - - - localhost - 9000 - - - - - - - localhost - 9000 - - - - - localhost - 9000 - - - - - - - localhost - 9440 - 1 - - - - - - - localhost - 9000 - - - - - localhost - 1 - - - - - - - - - - - - - - - - - 3600 - - - - 3600 - - - 60 - - - - - - - - - - system - query_log
- - toYYYYMM(event_date) - - 7500 -
- - - - system - trace_log
- - toYYYYMM(event_date) - 7500 -
- - - - system - query_thread_log
- toYYYYMM(event_date) - 7500 -
- - - - system - part_log
- toYYYYMM(event_date) - 7500 -
- - - - - - - - - - - - - - *_dictionary.xml - - - - - - - - - - /clickhouse/task_queue/ddl - - - - - - - - - - - - - - - - click_cost - any - - 0 - 3600 - - - 86400 - 7200 - - - - max - - 0 - 60 - - - 3600 - 300 - - - 86400 - 3600 - - - - - - /var/lib/clickhouse/format_schemas/ - - - -
diff --git a/tests/testflows/ldap/external_user_directory/configs/clickhouse/users.xml b/tests/testflows/ldap/external_user_directory/configs/clickhouse/users.xml deleted file mode 100644 index c7d0ecae693..00000000000 --- a/tests/testflows/ldap/external_user_directory/configs/clickhouse/users.xml +++ /dev/null @@ -1,133 +0,0 @@ - - - - - - - - 10000000000 - - - 0 - - - random - - - - - 1 - - - - - - - - - - - - - ::/0 - - - - default - - - default - - - 1 - - - - - - - - - - - - - - - - - 3600 - - - 0 - 0 - 0 - 0 - 0 - - - - diff --git a/tests/testflows/ldap/role_mapping/configs/clickhouse/common.xml b/tests/testflows/ldap/role_mapping/configs/clickhouse/common.xml deleted file mode 100644 index 31fa972199f..00000000000 --- a/tests/testflows/ldap/role_mapping/configs/clickhouse/common.xml +++ /dev/null @@ -1,6 +0,0 @@ - - Europe/Moscow - 0.0.0.0 - /var/lib/clickhouse/ - /var/lib/clickhouse/tmp/ - diff --git a/tests/testflows/ldap/role_mapping/configs/clickhouse/config.xml b/tests/testflows/ldap/role_mapping/configs/clickhouse/config.xml deleted file mode 100644 index 53ffa10384e..00000000000 --- a/tests/testflows/ldap/role_mapping/configs/clickhouse/config.xml +++ /dev/null @@ -1,442 +0,0 @@ - - - - - - trace - /var/log/clickhouse-server/clickhouse-server.log - /var/log/clickhouse-server/clickhouse-server.err.log - 1000M - 10 - - - - 8123 - 9000 - - - - - - - - - /etc/clickhouse-server/server.crt - /etc/clickhouse-server/server.key - - /etc/clickhouse-server/dhparam.pem - none - true - true - sslv2,sslv3 - true - - - - true - true - sslv2,sslv3 - true - - - - RejectCertificateHandler - - - - - - - - - 9009 - - - - - - - - - - - - - - - - - - - - 4096 - 3 - - - 100 - - - - - - 8589934592 - - - 5368709120 - - - - /var/lib/clickhouse/ - - - /var/lib/clickhouse/tmp/ - - - /var/lib/clickhouse/user_files/ - - - - - - users.xml - - - - /var/lib/clickhouse/access/ - - - - - default - - - - - - default - - - - - - - - - false - - - - - - - - localhost - 9000 - - - - - - - localhost - 9000 - - - - - localhost - 9000 - - - - - - - localhost - 9440 - 1 - - - - - - - localhost - 9000 - - - - - localhost - 1 - - - - - - - - - - - - - - - - - 3600 - - - - 3600 - - - 60 - - - - - - - - - - system - query_log
- - toYYYYMM(event_date) - - 7500 -
- - - - system - trace_log
- - toYYYYMM(event_date) - 7500 -
- - - - system - query_thread_log
- toYYYYMM(event_date) - 7500 -
- - - - system - part_log
- toYYYYMM(event_date) - 7500 -
- - - - - - - - - - - - - - *_dictionary.xml - - - - - - - - - - /clickhouse/task_queue/ddl - - - - - - - - - - - - - - - - click_cost - any - - 0 - 3600 - - - 86400 - 7200 - - - - max - - 0 - 60 - - - 3600 - 300 - - - 86400 - 3600 - - - - - - /var/lib/clickhouse/format_schemas/ - - - -
diff --git a/tests/testflows/ldap/role_mapping/configs/clickhouse/users.xml b/tests/testflows/ldap/role_mapping/configs/clickhouse/users.xml deleted file mode 100644 index c7d0ecae693..00000000000 --- a/tests/testflows/ldap/role_mapping/configs/clickhouse/users.xml +++ /dev/null @@ -1,133 +0,0 @@ - - - - - - - - 10000000000 - - - 0 - - - random - - - - - 1 - - - - - - - - - - - - - ::/0 - - - - default - - - default - - - 1 - - - - - - - - - - - - - - - - - 3600 - - - 0 - 0 - 0 - 0 - 0 - - - - diff --git a/tests/testflows/map_type/configs/clickhouse/config.xml b/tests/testflows/map_type/configs/clickhouse/config.xml deleted file mode 100644 index 842a0573d49..00000000000 --- a/tests/testflows/map_type/configs/clickhouse/config.xml +++ /dev/null @@ -1,448 +0,0 @@ - - - - - - trace - /var/log/clickhouse-server/clickhouse-server.log - /var/log/clickhouse-server/clickhouse-server.err.log - 1000M - 10 - - - - 8123 - 9000 - - - - - - - - - /etc/clickhouse-server/server.crt - /etc/clickhouse-server/server.key - - /etc/clickhouse-server/dhparam.pem - none - true - true - sslv2,sslv3 - true - - - - true - true - sslv2,sslv3 - true - - - - RejectCertificateHandler - - - - - - - - - 9009 - - - - - - - - 0.0.0.0 - - - - - - - - - - - - 4096 - 3 - - - 100 - - - - - - 8589934592 - - - 5368709120 - - - - /var/lib/clickhouse/ - - - /var/lib/clickhouse/tmp/ - - - /var/lib/clickhouse/user_files/ - - - /var/lib/clickhouse/access/ - - - - - - users.xml - - - - /var/lib/clickhouse/access/ - - - - - users.xml - - - default - - - - - - default - - - - - - - - - false - - - - - - - - localhost - 9000 - - - - - - - localhost - 9000 - - - - - localhost - 9000 - - - - - - - localhost - 9440 - 1 - - - - - - - localhost - 9000 - - - - - localhost - 1 - - - - - - - - - - - - - - - - - 3600 - - - - 3600 - - - 60 - - - - - - - - - - system - query_log
- - toYYYYMM(event_date) - - 7500 -
- - - - system - trace_log
- - toYYYYMM(event_date) - 7500 -
- - - - system - query_thread_log
- toYYYYMM(event_date) - 7500 -
- - - - system - part_log
- toYYYYMM(event_date) - 7500 -
- - - - - - - - - - - - - - *_dictionary.xml - - - - - - - - - - /clickhouse/task_queue/ddl - - - - - - - - - - - - - - - - click_cost - any - - 0 - 3600 - - - 86400 - 60 - - - - max - - 0 - 60 - - - 3600 - 300 - - - 86400 - 3600 - - - - - - /var/lib/clickhouse/format_schemas/ - - - -
diff --git a/tests/testflows/map_type/configs/clickhouse/users.xml b/tests/testflows/map_type/configs/clickhouse/users.xml deleted file mode 100644 index c7d0ecae693..00000000000 --- a/tests/testflows/map_type/configs/clickhouse/users.xml +++ /dev/null @@ -1,133 +0,0 @@ - - - - - - - - 10000000000 - - - 0 - - - random - - - - - 1 - - - - - - - - - - - - - ::/0 - - - - default - - - default - - - 1 - - - - - - - - - - - - - - - - - 3600 - - - 0 - 0 - 0 - 0 - 0 - - - - diff --git a/tests/testflows/rbac/configs/clickhouse/common.xml b/tests/testflows/rbac/configs/clickhouse/common.xml deleted file mode 100644 index 0ba01589b90..00000000000 --- a/tests/testflows/rbac/configs/clickhouse/common.xml +++ /dev/null @@ -1,6 +0,0 @@ - - Europe/Moscow - :: - /var/lib/clickhouse/ - /var/lib/clickhouse/tmp/ - diff --git a/tests/testflows/rbac/configs/clickhouse/config.xml b/tests/testflows/rbac/configs/clickhouse/config.xml deleted file mode 100644 index f71f14f4733..00000000000 --- a/tests/testflows/rbac/configs/clickhouse/config.xml +++ /dev/null @@ -1,456 +0,0 @@ - - - - - - trace - /var/log/clickhouse-server/clickhouse-server.log - /var/log/clickhouse-server/clickhouse-server.err.log - 1000M - 10 - - - - 8123 - 9000 - - - - - - - - - /etc/clickhouse-server/server.crt - /etc/clickhouse-server/server.key - - /etc/clickhouse-server/dhparam.pem - none - true - true - sslv2,sslv3 - true - - - - true - true - sslv2,sslv3 - true - - - - RejectCertificateHandler - - - - - - - - - 9009 - - - - - - - - 0.0.0.0 - - - - - - - - - - - - 4096 - 3 - - - 100 - - - - - - 8589934592 - - - 5368709120 - - - - /var/lib/clickhouse/ - - - /var/lib/clickhouse/tmp/ - - - /var/lib/clickhouse/user_files/ - - - /var/lib/clickhouse/access/ - - - - - - users.xml - - - - /var/lib/clickhouse/access/ - - - - - users.xml - - - default - - - - - - default - - - - - - - - - false - - - - - - - - localhost - 9000 - - - - - - - localhost - 9000 - - - - - localhost - 9000 - - - - - - - localhost - 9440 - 1 - - - - - - - localhost - 9000 - - - - - localhost - 1 - - - - - - - - - - - - - - - - - 3600 - - - - 3600 - - - 60 - - - - - - - - - - system - query_log
- - toYYYYMM(event_date) - - 7500 -
- - - - system - trace_log
- - toYYYYMM(event_date) - 7500 -
- - - - system - query_thread_log
- toYYYYMM(event_date) - 7500 -
- - - - system - query_views_log
- toYYYYMM(event_date) - 7500 -
- - - - system - part_log
- toYYYYMM(event_date) - 7500 -
- - - - - - - - - - - - - - *_dictionary.xml - - - - - - - - - - /clickhouse/task_queue/ddl - - - - - - - - - - - - - - - - click_cost - any - - 0 - 3600 - - - 86400 - 7200 - - - - max - - 0 - 60 - - - 3600 - 300 - - - 86400 - 3600 - - - - - - /var/lib/clickhouse/format_schemas/ - - - -
diff --git a/tests/testflows/rbac/configs/clickhouse/users.xml b/tests/testflows/rbac/configs/clickhouse/users.xml deleted file mode 100644 index c7d0ecae693..00000000000 --- a/tests/testflows/rbac/configs/clickhouse/users.xml +++ /dev/null @@ -1,133 +0,0 @@ - - - - - - - - 10000000000 - - - 0 - - - random - - - - - 1 - - - - - - - - - - - - - ::/0 - - - - default - - - default - - - 1 - - - - - - - - - - - - - - - - - 3600 - - - 0 - 0 - 0 - 0 - 0 - - - - diff --git a/tests/testflows/window_functions/configs/clickhouse/config.xml b/tests/testflows/window_functions/configs/clickhouse/config.xml deleted file mode 100644 index 842a0573d49..00000000000 --- a/tests/testflows/window_functions/configs/clickhouse/config.xml +++ /dev/null @@ -1,448 +0,0 @@ - - - - - - trace - /var/log/clickhouse-server/clickhouse-server.log - /var/log/clickhouse-server/clickhouse-server.err.log - 1000M - 10 - - - - 8123 - 9000 - - - - - - - - - /etc/clickhouse-server/server.crt - /etc/clickhouse-server/server.key - - /etc/clickhouse-server/dhparam.pem - none - true - true - sslv2,sslv3 - true - - - - true - true - sslv2,sslv3 - true - - - - RejectCertificateHandler - - - - - - - - - 9009 - - - - - - - - 0.0.0.0 - - - - - - - - - - - - 4096 - 3 - - - 100 - - - - - - 8589934592 - - - 5368709120 - - - - /var/lib/clickhouse/ - - - /var/lib/clickhouse/tmp/ - - - /var/lib/clickhouse/user_files/ - - - /var/lib/clickhouse/access/ - - - - - - users.xml - - - - /var/lib/clickhouse/access/ - - - - - users.xml - - - default - - - - - - default - - - - - - - - - false - - - - - - - - localhost - 9000 - - - - - - - localhost - 9000 - - - - - localhost - 9000 - - - - - - - localhost - 9440 - 1 - - - - - - - localhost - 9000 - - - - - localhost - 1 - - - - - - - - - - - - - - - - - 3600 - - - - 3600 - - - 60 - - - - - - - - - - system - query_log
- - toYYYYMM(event_date) - - 7500 -
- - - - system - trace_log
- - toYYYYMM(event_date) - 7500 -
- - - - system - query_thread_log
- toYYYYMM(event_date) - 7500 -
- - - - system - part_log
- toYYYYMM(event_date) - 7500 -
- - - - - - - - - - - - - - *_dictionary.xml - - - - - - - - - - /clickhouse/task_queue/ddl - - - - - - - - - - - - - - - - click_cost - any - - 0 - 3600 - - - 86400 - 60 - - - - max - - 0 - 60 - - - 3600 - 300 - - - 86400 - 3600 - - - - - - /var/lib/clickhouse/format_schemas/ - - - -
diff --git a/tests/testflows/window_functions/configs/clickhouse/users.xml b/tests/testflows/window_functions/configs/clickhouse/users.xml deleted file mode 100644 index c7d0ecae693..00000000000 --- a/tests/testflows/window_functions/configs/clickhouse/users.xml +++ /dev/null @@ -1,133 +0,0 @@ - - - - - - - - 10000000000 - - - 0 - - - random - - - - - 1 - - - - - - - - - - - - - ::/0 - - - - default - - - default - - - 1 - - - - - - - - - - - - - - - - - 3600 - - - 0 - 0 - 0 - 0 - 0 - - - - diff --git a/utils/check-marks/main.cpp b/utils/check-marks/main.cpp index 36b81509046..df6f6e5267e 100644 --- a/utils/check-marks/main.cpp +++ b/utils/check-marks/main.cpp @@ -10,7 +10,7 @@ #include #include #include -#include +#include #include diff --git a/utils/github-hook/hook.py b/utils/github-hook/hook.py deleted file mode 100644 index 1ea65f3c3ab..00000000000 --- a/utils/github-hook/hook.py +++ /dev/null @@ -1,320 +0,0 @@ -# -*- coding: utf-8 -*- -import json -import requests -import time -import os - -DB = 'gh-data' -RETRIES = 5 - -API_URL = 'https://api.github.com/repos/ClickHouse/ClickHouse/' - - -def _reverse_dict_with_list(source): - result = {} - for key, value in list(source.items()): - for elem in value: - result[elem] = key - return result - - -MARKER_TO_LABEL = { - '- New Feature': ['pr-feature'], - '- Bug Fix': ['pr-bugfix'], - '- Improvement': ['pr-improvement'], - '- Performance Improvement': ['pr-performance'], - '- Backward Incompatible Change': ['pr-backward-incompatible'], - '- Build/Testing/Packaging Improvement': ['pr-build'], - '- Documentation': ['pr-documentation', 'pr-doc-fix'], - '- Other': ['pr-other'], - '- Not for changelog': ['pr-not-for-changelog'] -} - -LABEL_TO_MARKER = _reverse_dict_with_list(MARKER_TO_LABEL) - -DOC_ALERT_LABELS = { - 'pr-feature' -} - - -def set_labels_for_pr(pull_request_number, labels, headers): - data = { - "labels": list(labels) - } - - for i in range(RETRIES): - try: - response = requests.put(API_URL + 'issues/' + str(pull_request_number) + '/labels', json=data, headers=headers) - response.raise_for_status() - break - except Exception as ex: - print(("Exception", ex)) - time.sleep(0.2) - - -def get_required_labels_from_desc(description, current_labels): - result = set([]) - # find first matching category - for marker, labels in list(MARKER_TO_LABEL.items()): - if marker in description: - if not any(label in current_labels for label in labels): - result.add(labels[0]) - break - - # if no category than leave as is - if not result: - return current_labels - - # save all old labels except category label - for label in current_labels: - if label not in result and label not in LABEL_TO_MARKER: - result.add(label) - - # if some of labels require doc alert - if any(label in result for label in DOC_ALERT_LABELS): - result.add('doc-alert') - - return result - - -def label_pull_request_event(response): - pull_request = response['pull_request'] - current_labels = set([label['name'] for label in pull_request['labels']]) - pr_description = pull_request['body'] if pull_request['body'] else '' - required_labels = get_required_labels_from_desc(pr_description, current_labels) - if not required_labels.issubset(current_labels): - token = os.getenv('GITHUB_TOKEN') - auth = {'Authorization': 'token ' + token} - set_labels_for_pr(pull_request['number'], required_labels, auth) - - -def process_issue_event(response): - issue = response['issue'] - return dict( - action=response['action'], - sender=response['sender']['login'], - updated_at=issue['updated_at'], - url=issue['url'], - number=issue['number'], - author=issue['user']['login'], - labels=[label['name'] for label in issue['labels']], - state=issue['state'], - assignees=[assignee['login'] for assignee in issue['assignees']], - created_at=issue['created_at'], - body=issue['body'] if issue['body'] else '', - title=issue['title'], - comments=issue['comments'], - raw_json=json.dumps(response),) - - -def process_issue_comment_event(response): - issue = response['issue'] - comment = response['comment'] - - return dict( - action='comment_' + response['action'], - sender=response['sender']['login'], - updated_at=issue['updated_at'], - url=issue['url'], - number=issue['number'], - author=issue['user']['login'], - labels=[label['name'] for label in issue['labels']], - state=issue['state'], - assignees=[assignee['login'] for assignee in issue['assignees']], - created_at=issue['created_at'], - body=issue['body'] if issue['body'] else '', - title=issue['title'], - comments=issue['comments'], - comment_body=comment['body'], - comment_author=comment['user']['login'], - comment_url=comment['url'], - comment_created_at=comment['created_at'], - comment_updated_at=comment['updated_at'], - raw_json=json.dumps(response),) - - -def process_pull_request_event(response): - pull_request = response['pull_request'] - result = dict( - updated_at=pull_request['updated_at'], - number=pull_request['number'], - action=response['action'], - sender=response['sender']['login'], - url=pull_request['url'], - author=pull_request['user']['login'], - labels=[label['name'] for label in pull_request['labels']], - state=pull_request['state'], - body=pull_request['body'] if pull_request['body'] else '', - title=pull_request['title'], - created_at=pull_request['created_at'], - assignees=[assignee['login'] for assignee in pull_request['assignees']], - requested_reviewers=[reviewer['login'] for reviewer in pull_request['requested_reviewers']], - head_repo=pull_request['head']['repo']['full_name'], - head_ref=pull_request['head']['ref'], - head_clone_url=pull_request['head']['repo']['clone_url'], - head_ssh_url=pull_request['head']['repo']['ssh_url'], - base_repo=pull_request['base']['repo']['full_name'], - base_ref=pull_request['base']['ref'], - base_clone_url=pull_request['base']['repo']['clone_url'], - base_ssh_url=pull_request['base']['repo']['ssh_url'], - raw_json=json.dumps(response), - ) - - if 'mergeable' in pull_request and pull_request['mergeable'] is not None: - result['mergeable'] = 1 if pull_request['mergeable'] else 0 - - if 'merged_by' in pull_request and pull_request['merged_by'] is not None: - result['merged_by'] = pull_request['merged_by']['login'] - - if 'merged_at' in pull_request and pull_request['merged_at'] is not None: - result['merged_at'] = pull_request['merged_at'] - - if 'closed_at' in pull_request and pull_request['closed_at'] is not None: - result['closed_at'] = pull_request['closed_at'] - - if 'merge_commit_sha' in pull_request and pull_request['merge_commit_sha'] is not None: - result['merge_commit_sha'] = pull_request['merge_commit_sha'] - - if 'draft' in pull_request: - result['draft'] = 1 if pull_request['draft'] else 0 - - for field in ['comments', 'review_comments', 'commits', 'additions', 'deletions', 'changed_files']: - if field in pull_request: - result[field] = pull_request[field] - - return result - - -def process_pull_request_review(response): - result = process_pull_request_event(response) - review = response['review'] - result['action'] = 'review_' + result['action'] - result['review_body'] = review['body'] if review['body'] is not None else '' - result['review_id'] = review['id'] - result['review_author'] = review['user']['login'] - result['review_commit_sha'] = review['commit_id'] - result['review_submitted_at'] = review['submitted_at'] - result['review_state'] = review['state'] - return result - - -def process_pull_request_review_comment(response): - result = process_pull_request_event(response) - comment = response['comment'] - result['action'] = 'review_comment_' + result['action'] - result['review_id'] = comment['pull_request_review_id'] - result['review_comment_path'] = comment['path'] - result['review_commit_sha'] = comment['commit_id'] - result['review_comment_body'] = comment['body'] - result['review_comment_author'] = comment['user']['login'] - result['review_comment_created_at'] = comment['created_at'] - result['review_comment_updated_at'] = comment['updated_at'] - return result - - -def process_push(response): - common_part = dict( - before_sha=response['before'], - after_sha=response['after'], - full_ref=response['ref'], - ref=response['ref'].split('/')[-1], - repo=response['repository']['full_name'], - pusher=response['pusher']['name'], - sender=response['sender']['login'], - pushed_at=response['repository']['pushed_at'], - raw_json=json.dumps(response), - ) - commits = response['commits'] - result = [] - for commit in commits: - commit_dict = common_part.copy() - commit_dict['sha'] = commit['id'] - commit_dict['tree_sha'] = commit['tree_id'] - commit_dict['author'] = commit['author']['name'] - commit_dict['committer'] = commit['committer']['name'] - commit_dict['message'] = commit['message'] - commit_dict['commited_at'] = commit['timestamp'] - result.append(commit_dict) - return result - - -def event_processor_dispatcher(headers, body, inserter): - if 'X-Github-Event' in headers: - if headers['X-Github-Event'] == 'issues': - result = process_issue_event(body) - inserter.insert_event_into(DB, 'issues', result) - elif headers['X-Github-Event'] == 'issue_comment': - result = process_issue_comment_event(body) - inserter.insert_event_into(DB, 'issues', result) - elif headers['X-Github-Event'] == 'pull_request': - result = process_pull_request_event(body) - inserter.insert_event_into(DB, 'pull_requests', result) - label_pull_request_event(body) - elif headers['X-Github-Event'] == 'pull_request_review': - result = process_pull_request_review(body) - inserter.insert_event_into(DB, 'pull_requests', result) - elif headers['X-Github-Event'] == 'pull_request_review_comment': - result = process_pull_request_review_comment(body) - inserter.insert_event_into(DB, 'pull_requests', result) - elif headers['X-Github-Event'] == 'push': - result = process_push(body) - inserter.insert_events_into(DB, 'commits', result) - - -class ClickHouseInserter(object): - def __init__(self, url, user, password): - self.url = url - self.auth = { - 'X-ClickHouse-User': user, - 'X-ClickHouse-Key': password - } - - def _insert_json_str_info(self, db, table, json_str): - params = { - 'database': db, - 'query': 'INSERT INTO {table} FORMAT JSONEachRow'.format(table=table), - 'date_time_input_format': 'best_effort' - } - for i in range(RETRIES): - response = None - try: - response = requests.post(self.url, params=params, data=json_str, headers=self.auth, verify=False) - response.raise_for_status() - break - except Exception as ex: - print(("Cannot insert with exception %s", str(ex))) - if response: - print(("Response text %s", response.text)) - time.sleep(0.1) - else: - raise Exception("Cannot insert data into clickhouse") - - def insert_event_into(self, db, table, event): - event_str = json.dumps(event) - self._insert_json_str_info(db, table, event_str) - - def insert_events_into(self, db, table, events): - jsons = [] - for event in events: - jsons.append(json.dumps(event)) - - self._insert_json_str_info(db, table, ','.join(jsons)) - - -def test(event, context): - inserter = ClickHouseInserter( - os.getenv('CLICKHOUSE_URL'), - os.getenv('CLICKHOUSE_USER'), - os.getenv('CLICKHOUSE_PASSWORD')) - - body = json.loads(event['body'], strict=False) - headers = event['headers'] - event_processor_dispatcher(headers, body, inserter) - - return { - 'statusCode': 200, - 'headers': { - 'Content-Type': 'text/plain' - }, - 'isBase64Encoded': False, - } diff --git a/utils/graphite-rollup/CMakeLists.txt b/utils/graphite-rollup/CMakeLists.txt index 3cc0d3e756f..d5dca3db32e 100644 --- a/utils/graphite-rollup/CMakeLists.txt +++ b/utils/graphite-rollup/CMakeLists.txt @@ -17,7 +17,7 @@ target_include_directories( ${ClickHouse_SOURCE_DIR}/contrib/double-conversion ${ClickHouse_SOURCE_DIR}/contrib/dragonbox/include ${ClickHouse_SOURCE_DIR}/contrib/fmtlib/include ${ClickHouse_SOURCE_DIR}/contrib/cityhash102/include - ${RE2_INCLUDE_DIR} ${CMAKE_BINARY_DIR}/contrib/re2_st + ${RE2_INCLUDE_DIR} ${CMAKE_BINARY_DIR}/contrib/re2-cmake ) target_compile_definitions(graphite-rollup-bench PRIVATE RULES_DIR="${CMAKE_CURRENT_SOURCE_DIR}") diff --git a/website/blog/ru/2016/clickhouse-meetup-v-moskve-21-noyabrya-2016.md b/website/blog/ru/2016/clickhouse-meetup-v-moskve-21-noyabrya-2016.md deleted file mode 100644 index 2c0463687b4..00000000000 --- a/website/blog/ru/2016/clickhouse-meetup-v-moskve-21-noyabrya-2016.md +++ /dev/null @@ -1,8 +0,0 @@ ---- -title: 'ClickHouse Meetup в Москве, 21 ноября 2016' -image: 'https://blog-images.clickhouse.com/ru/2016/clickhouse-meetup-v-moskve-21-noyabrya-2016/main.jpg' -date: '2016-11-22' -tags: ['мероприятия', 'meetup', 'Москва'] ---- - -[Посмотреть видео](https://events.yandex.ru/lib/talks/4351/) diff --git a/website/blog/ru/2016/clickhouse-na-highload-2016.md b/website/blog/ru/2016/clickhouse-na-highload-2016.md deleted file mode 100644 index 7dacbde140a..00000000000 --- a/website/blog/ru/2016/clickhouse-na-highload-2016.md +++ /dev/null @@ -1,14 +0,0 @@ ---- -title: 'ClickHouse на HighLoad++ 2016' -image: 'https://blog-images.clickhouse.com/ru/2016/clickhouse-na-highload-2016/main.jpg' -date: '2016-12-10' -tags: ['мероприятия', 'конференции', 'Москва', 'HighLoad++'] ---- - -![iframe](https://www.youtube.com/embed/TAiCXHgZn50) - -[Расшифровка доклада](https://habrahabr.ru/post/322724/) - -![iframe](https://www.youtube.com/embed/tf38TPvwjJ4) - -[Расшифровка доклада](https://habrahabr.ru/post/322620/) diff --git a/website/blog/ru/2016/clickhouse-na-vstreche-pro-infrastrukturu-khraneniya-i-obrabotki-dannykh-v-yandekse.md b/website/blog/ru/2016/clickhouse-na-vstreche-pro-infrastrukturu-khraneniya-i-obrabotki-dannykh-v-yandekse.md deleted file mode 100644 index d90a7b9c4bb..00000000000 --- a/website/blog/ru/2016/clickhouse-na-vstreche-pro-infrastrukturu-khraneniya-i-obrabotki-dannykh-v-yandekse.md +++ /dev/null @@ -1,10 +0,0 @@ ---- -title: 'ClickHouse на встрече про инфраструктуру хранения и обработки данных в Яндексе' -image: 'https://blog-images.clickhouse.com/ru/2016/clickhouse-na-vstreche-pro-infrastrukturu-khraneniya-i-obrabotki-dannykh-v-yandekse/main.jpg' -date: '2016-10-16' -tags: ['мероприятия', 'инфраструктура'] ---- - -![iframe](https://www.youtube.com/embed/Ho4_dQk7dAg) - -[Страница мероприятия «Яндекс изнутри: инфраструктура хранения и обработки данных»](https://events.yandex.ru/events/meetings/15-oct-2016/), прошедшего 15 октября 2016 года. diff --git a/website/blog/ru/2016/yandeks-otkryvaet-clickhouse.md b/website/blog/ru/2016/yandeks-otkryvaet-clickhouse.md deleted file mode 100644 index e7216f47408..00000000000 --- a/website/blog/ru/2016/yandeks-otkryvaet-clickhouse.md +++ /dev/null @@ -1,10 +0,0 @@ ---- -title: 'Яндекс открывает ClickHouse' -image: 'https://blog-images.clickhouse.com/ru/2016/yandeks-otkryvaet-clickhouse/main.jpg' -date: '2016-06-15' -tags: ['анонс', 'GitHub', 'лицензия'] ---- - -Сегодня внутренняя разработка компании Яндекс — [аналитическая СУБД ClickHouse](https://clickhouse.com/), стала доступна каждому. Исходники опубликованы на [GitHub](https://github.com/ClickHouse/ClickHouse) под лицензией Apache 2.0. - -ClickHouse позволяет выполнять аналитические запросы в интерактивном режиме по данным, обновляемым в реальном времени. Система способна масштабироваться до десятков триллионов записей и петабайт хранимых данных. Использование ClickHouse открывает возможности, которые раньше было даже трудно представить: вы можете сохранять весь поток данных без предварительной агрегации и быстро получать отчёты в любых разрезах. ClickHouse разработан в Яндексе для задач [Яндекс.Метрики](https://metrika.yandex.ru/) — второй по величине системы веб-аналитики в мире. diff --git a/website/blog/ru/2017/clickhouse-meetup-edet-v-minsk.md b/website/blog/ru/2017/clickhouse-meetup-edet-v-minsk.md deleted file mode 100644 index adab2fd7676..00000000000 --- a/website/blog/ru/2017/clickhouse-meetup-edet-v-minsk.md +++ /dev/null @@ -1,14 +0,0 @@ ---- -title: 'ClickHouse MeetUp едет в Минск!' -image: 'https://blog-images.clickhouse.com/ru/2017/clickhouse-meetup-edet-v-minsk/main.jpg' -date: '2017-06-13' -tags: ['мероприятия', 'meetup', 'Минск', 'Беларусь', 'анонс'] ---- - -29 июня в Минске впервые выступят с докладами создатели СУБД ClickHоuse и те, кто ежедневно использует её для решения аналитических задач. Докладчики расскажут о последних изменениях и предстоящих обновлениях СУБД, а также о нюансах работы с ней. - -Встреча будет интересна администраторам ClickHouse и тем, кто пока только присматривается к системе. Мы приглашаем белорусских пользователей также поделиться своим опытом использования ClickHоuse и выступить на встрече с блиц-докладами: при регистрации мы предложим вам такую возможность! - -Участие в мероприятии бесплатное, но необходимо заранее зарегистрироваться: количество мест в зале ограничено. - -Посмотреть программу и подать заявку на участие можно на [странице встречи](https://events.yandex.ru/events/meetings/29-june-2017). diff --git a/website/blog/ru/2017/clickhouse-meetup-v-ekaterinburge-16-maya-2017.md b/website/blog/ru/2017/clickhouse-meetup-v-ekaterinburge-16-maya-2017.md deleted file mode 100644 index b7441b7ac30..00000000000 --- a/website/blog/ru/2017/clickhouse-meetup-v-ekaterinburge-16-maya-2017.md +++ /dev/null @@ -1,8 +0,0 @@ ---- -title: 'ClickHouse Meetup в Екатеринбурге, 16 мая 2017' -image: 'https://blog-images.clickhouse.com/ru/2017/clickhouse-meetup-v-ekaterinburge-16-maya-2017/main.jpg' -date: '2017-05-17' -tags: ['мероприятия', 'meetup', 'Екатеринбург'] ---- - -[Посмотреть презентацию](https://presentations.clickhouse.com/meetup6/) diff --git a/website/blog/ru/2017/clickhouse-meetup-v-minske-itogi.md b/website/blog/ru/2017/clickhouse-meetup-v-minske-itogi.md deleted file mode 100644 index 8cd3375abe9..00000000000 --- a/website/blog/ru/2017/clickhouse-meetup-v-minske-itogi.md +++ /dev/null @@ -1,16 +0,0 @@ ---- -title: 'ClickHouse MeetUp в Минске: итоги' -image: 'https://blog-images.clickhouse.com/ru/2017/clickhouse-meetup-v-minske-itogi/main.jpg' -date: '2017-06-19' -tags: ['мероприятия', 'meetup', 'Минск', 'Беларусь'] ---- - -Недавно в Минске мы встретились с пользователями ClickHouse и техническими специалистами, кто только знакомится с СУБД. - -Мы делимся с вами презентациями докладчиков и будем рады ответить на вопросы в [чате ClickHouse в Телеграме](https://t.me/clickhouse_ru). - -[История создания ClickHouse, новости и планы по развитию](https://presentations.clickhouse.com/meetup7/), Алексей Миловидов - -[Использование ClickHouse для мониторинга связности сети](https://presentations.clickhouse.com/meetup7/netmon.pdf), Дмитрий Липин - -[Разбираемся во внутреннем устройстве ClickHouse](https://presentations.clickhouse.com/meetup7/internals.pdf), Виталий Людвиченко diff --git a/website/blog/ru/2017/clickhouse-meetup-v-novosibirske-3-aprelya-2017.md b/website/blog/ru/2017/clickhouse-meetup-v-novosibirske-3-aprelya-2017.md deleted file mode 100644 index e8bbf23c2c4..00000000000 --- a/website/blog/ru/2017/clickhouse-meetup-v-novosibirske-3-aprelya-2017.md +++ /dev/null @@ -1,10 +0,0 @@ ---- -title: 'ClickHouse Meetup в Новосибирске, 3 апреля 2017' -image: 'https://blog-images.clickhouse.com/ru/2017/clickhouse-meetup-v-novosibirske-3-aprelya-2017/main.jpg' -date: '2017-04-04' -tags: ['мероприятия', 'meetup', 'Новосибирск'] ---- - -[Презентация Алексея Миловидова](https://presentations.clickhouse.com/meetup4/) - -[Презентация Марии Мансуровой](https://presentations.clickhouse.com/meetup4/clickhouse_for_analysts.pdf) diff --git a/website/blog/ru/2017/clickhouse-meetup-v-sankt-peterburge-28-fevralya-2017.md b/website/blog/ru/2017/clickhouse-meetup-v-sankt-peterburge-28-fevralya-2017.md deleted file mode 100644 index 16bf2822746..00000000000 --- a/website/blog/ru/2017/clickhouse-meetup-v-sankt-peterburge-28-fevralya-2017.md +++ /dev/null @@ -1,8 +0,0 @@ ---- -title: 'ClickHouse Meetup в Санкт-Петербурге, 28 февраля 2017' -image: 'https://blog-images.clickhouse.com/ru/2017/clickhouse-meetup-v-sankt-peterburge-28-fevralya-2017/main.jpg' -date: '2017-03-01' -tags: ['мероприятия', 'meetup', 'Санкт-Петербург'] ---- - -![iframe](https://www.youtube.com/embed/CVrwp4Zoex4) diff --git a/website/blog/ru/2017/clickhouse-na-uwdc-2017.md b/website/blog/ru/2017/clickhouse-na-uwdc-2017.md deleted file mode 100644 index 1806f5fb6ba..00000000000 --- a/website/blog/ru/2017/clickhouse-na-uwdc-2017.md +++ /dev/null @@ -1,10 +0,0 @@ ---- -title: 'ClickHouse на UWDC 2017' -image: 'https://blog-images.clickhouse.com/ru/2017/clickhouse-na-uwdc-2017/main.jpg' -date: '2017-05-20' -tags: ['мероприятия', 'конференции', 'Челябинск'] ---- - -![iframe](https://www.youtube.com/embed/isYA4e5zg1M?t=2h8m15s) - -[Посмотреть презентацию](https://presentations.clickhouse.com/uwdc/) diff --git a/website/blog/ru/2019/clickhouse-meetup-v-limassole-7-maya-2019.md b/website/blog/ru/2019/clickhouse-meetup-v-limassole-7-maya-2019.md deleted file mode 100644 index a4dbff081ff..00000000000 --- a/website/blog/ru/2019/clickhouse-meetup-v-limassole-7-maya-2019.md +++ /dev/null @@ -1,38 +0,0 @@ ---- -title: 'ClickHouse Meetup в Лимассоле, 7 мая 2019' -image: 'https://blog-images.clickhouse.com/ru/2019/clickhouse-meetup-v-limassole-7-maya-2019/main.jpg' -date: '2019-05-14' -tags: ['мероприятия', 'meetup', 'Лимассол', 'Кипр', 'Европа'] ---- - -Первый ClickHouse Meetup под открытым небом прошел в сердце Лимассола, второго по размеру города Кипра, на крыше, любезно предоставленной Exness Group. С крыши открывались сногсшибательные виды, но докладчики отлично справлялись с конкуренцией с ними за внимание аудитории. Более ста человек присоединилось к мероприятие, что в очередной раз подтверждает высокий интерес к ClickHouse по всему земному шару. Контент мероприятия также доступен в формате [видеозаписи](https://www.youtube.com/watch?v=_rpU-TvSfZ8). - -[Кирилл Шваков](https://github.com/kshvakov) сыграл ключевую роль в том, чтобы данное мероприятие стало возможным: наладил коммуникацию с ClickHouse сообществом на Кипре, нашел отличную площадку и докладчиков. Большинство ClickHouse митапов по всему миру происходят благодаря активным участникам сообщества таким как Кирилл. Если вы хотите помочь нам организовать ClickHouse митап в своём регионе, пожалуйста свяжитесь с командой ClickHouse в Яндексе через [эту форму](https://clickhouse.com/#meet) или любым другим удобным способом. - -![Кирилл Шваков](https://blog-images.clickhouse.com/ru/2019/clickhouse-meetup-v-limassole-7-maya-2019/1.jpg) - -Кирилл широко известен благодаря его замечательногму [ClickHouse Go Driver](https://github.com/clickhouse/clickhouse-go), работающему по нативному протоколу, а его открывающий доклад был о его опыте оптимизации ClickHouse запросов и решению реальных прикладных задач в Integros и Wisebits. [Слайды](https://presentations.clickhouse.com/meetup22/strategies.pdf). [Полные тексты запросов](https://github.com/kshvakov/ClickHouse-Meetup-Exness). - -Мероприятие началось ранним вечером… -![Вечер в Лимассоле](https://blog-images.clickhouse.com/ru/2019/clickhouse-meetup-v-limassole-7-maya-2019/2.jpg) - -…но природе потребовалось всего около часа, чтобы включить «ночной режим». Зато проецируемые слайды стало заметно легче читать. -![Ночь в Лимассоле](https://blog-images.clickhouse.com/ru/2019/clickhouse-meetup-v-limassole-7-maya-2019/3.jpg) - -Сергей Томилов с его коллегами из Exness Platform Team поделились деталями об эволюции их систем для анализа логов и метрик, а также как они в итоге стали использовать ClickHouse для долгосрочного хранения и анализа данных([слайды](https://presentations.clickhouse.com/meetup22/exness.pdf)): -![Сергей Томилов](https://blog-images.clickhouse.com/ru/2019/clickhouse-meetup-v-limassole-7-maya-2019/4.jpg) - -Алексей Миловидов из команды ClickHouse в Яндексе продемонстрировал функциональность из недавних релизов ClickHouse, а также рассказал о том, что стоит ждать в ближайшем будущем([слайды](https://presentations.clickhouse.com/meetup22/new_features/)): -![Алексей Миловидов](https://blog-images.clickhouse.com/ru/2019/clickhouse-meetup-v-limassole-7-maya-2019/5.jpg) - -Александр Зайцев, технический директор Altinity, показал обзор того, как можно интегрировать ClickHouse в окружения, работающие на Kubernetes([слайды](https://presentations.clickhouse.com/meetup22/kubernetes.pdf)): -![Александр Зайцев](https://blog-images.clickhouse.com/ru/2019/clickhouse-meetup-v-limassole-7-maya-2019/6.jpg) - -Владимир Гончаров, бекенд разработчик из Aloha Browser, закрывал ClickHouse Limassol Meetup демонстрацией нескольких проектов для интеграции других opensource продуктов для анализа логов с ClickHouse ([слайды](https://presentations.clickhouse.com/meetup22/aloha.pdf)): -![Владимир Гончаров](https://blog-images.clickhouse.com/ru/2019/clickhouse-meetup-v-limassole-7-maya-2019/7.jpg) - -К сожалению, приближалась полнось и только самые «морозостойкие» любители ClickHouse продержались всё мероприятие, так стало заметно холодать. - -![Лимассол](https://blog-images.clickhouse.com/ru/2019/clickhouse-meetup-v-limassole-7-maya-2019/8.jpg) - -Больше фотографий с мероприятия доступно в [коротком послесловии от Exness](https://www.facebook.com/events/386638262181785/permalink/402167077295570/). diff --git a/website/blog/ru/2019/clickhouse-meetup-v-moskve-5-sentyabrya-2019.md b/website/blog/ru/2019/clickhouse-meetup-v-moskve-5-sentyabrya-2019.md deleted file mode 100644 index 7e82fd653d7..00000000000 --- a/website/blog/ru/2019/clickhouse-meetup-v-moskve-5-sentyabrya-2019.md +++ /dev/null @@ -1,10 +0,0 @@ ---- -title: 'ClickHouse Meetup в Москве, 5 сентября 2019' -image: 'https://blog-images.clickhouse.com/ru/2019/clickhouse-meetup-v-moskve-5-sentyabrya-2019/main.jpg' -date: '2019-09-06' -tags: ['мероприятия', 'meetup', 'Москва'] ---- - -![iframe](https://www.youtube.com/embed/videoseries?list=PL0Z2YDlm0b3gYSwohnKFUozYy9QdUpcT_) - -[Слайды опубликованы на GitHub](https://github.com/clickhouse/clickhouse-presentations/tree/master/meetup28). diff --git a/website/blog/ru/2019/clickhouse-meetup-v-novosibirske-26-iyunya-2019.md b/website/blog/ru/2019/clickhouse-meetup-v-novosibirske-26-iyunya-2019.md deleted file mode 100644 index a90efdca645..00000000000 --- a/website/blog/ru/2019/clickhouse-meetup-v-novosibirske-26-iyunya-2019.md +++ /dev/null @@ -1,12 +0,0 @@ ---- -title: 'ClickHouse Meetup в Новосибирске, 26 июня 2019' -image: 'https://blog-images.clickhouse.com/ru/2019/clickhouse-meetup-v-novosibirske-26-iyunya-2019/main.jpg' -date: '2019-06-05' -tags: ['мероприятия', 'meetup', 'Новосибирск'] ---- - -Изюминкой второго ClickHouse митапа в Новосибирске были два низкоуровневых доклада с погружением во внутренности ClickHouse, а остальная часть контента была очень прикладной с конкретными сценариями. Любезно предоставленный S7 зал на сто человек был полон до самого завершения последнего доклада где-то ближе к полуночи. - -![iframe](https://www.youtube.com/embed/videoseries?list=PL0Z2YDlm0b3ionSVt-NYC9Vu_83xxhb4J) - -Как обычно, [все слайды опубликованы на GitHub](https://presentations.clickhouse.com/meetup25). diff --git a/website/blog/ru/2019/clickhouse-meetup-v-sankt-peterburge-27-iyulya-2019.md b/website/blog/ru/2019/clickhouse-meetup-v-sankt-peterburge-27-iyulya-2019.md deleted file mode 100644 index bef157ade4e..00000000000 --- a/website/blog/ru/2019/clickhouse-meetup-v-sankt-peterburge-27-iyulya-2019.md +++ /dev/null @@ -1,10 +0,0 @@ ---- -title: 'ClickHouse Meetup в Санкт-Петербурге, 27 июля 2019' -image: 'https://blog-images.clickhouse.com/ru/2019/clickhouse-meetup-v-sankt-peterburge-27-iyulya-2019/main.jpg' -date: '2019-08-01' -tags: ['мероприятия', 'meetup', 'Санкт-Петербург'] ---- - -![iframe](https://www.youtube.com/embed/videoseries?list=PL0Z2YDlm0b3j3X7TWrKmnEPcfEG901W-T) - -[Слайды опубликованы на GitHub](https://github.com/ClickHouse/clickhouse-presentations/tree/master/meetup27). diff --git a/website/blog/ru/2019/clickrouse-meetup-v-minske-11-iyulya-2019.md b/website/blog/ru/2019/clickrouse-meetup-v-minske-11-iyulya-2019.md deleted file mode 100644 index e6897f17156..00000000000 --- a/website/blog/ru/2019/clickrouse-meetup-v-minske-11-iyulya-2019.md +++ /dev/null @@ -1,12 +0,0 @@ ---- -title: 'ClickHouse Meetup в Минске, 11 июля 2019' -image: 'https://blog-images.clickhouse.com/ru/2019/clickrouse-meetup-v-minske-11-iyulya-2019/main.jpg' -date: '2019-07-12' -tags: ['мероприятия', 'meetup', 'Минск', 'Беларусь'] ---- - -![iframe](https://www.youtube.com/embed/videoseries?list=PL0Z2YDlm0b3hLz6dmyu6gM_X871FG9eCc) - -[Все слайды опубликованы на GitHub](https://github.com/ClickHouse/clickhouse-presentations/tree/master/meetup26). - -![Минск](https://blog-images.clickhouse.com/ru/2019/clickrouse-meetup-v-minske-11-iyulya-2019/1.jpg) diff --git a/website/blog/ru/index.md b/website/blog/ru/index.md deleted file mode 100644 index 227a69408dc..00000000000 --- a/website/blog/ru/index.md +++ /dev/null @@ -1,3 +0,0 @@ ---- -is_index: true ---- diff --git a/website/blog/ru/redirects.txt b/website/blog/ru/redirects.txt deleted file mode 100644 index 4e34d53af3d..00000000000 --- a/website/blog/ru/redirects.txt +++ /dev/null @@ -1,15 +0,0 @@ -yandeks-otkryvaet-clickhouse.md 2016/yandeks-otkryvaet-clickhouse.md -clickhouse-meetup-v-moskve-21-noyabrya-2016.md 2016/clickhouse-meetup-v-moskve-21-noyabrya-2016.md -clickhouse-na-vstreche-pro-infrastrukturu-khraneniya-i-obrabotki-dannykh-v-yandekse.md 2016/clickhouse-na-vstreche-pro-infrastrukturu-khraneniya-i-obrabotki-dannykh-v-yandekse.md -clickhouse-na-highload-2016.md 2016/clickhouse-na-highload-2016.md -clickhouse-meetup-v-novosibirske-3-aprelya-2017.md 2017/clickhouse-meetup-v-novosibirske-3-aprelya-2017.md -clickhouse-meetup-v-minske-itogi.md 2017/clickhouse-meetup-v-minske-itogi.md -clickhouse-meetup-v-sankt-peterburge-28-fevralya-2017.md 2017/clickhouse-meetup-v-sankt-peterburge-28-fevralya-2017.md -clickhouse-meetup-v-ekaterinburge-16-maya-2017.md 2017/clickhouse-meetup-v-ekaterinburge-16-maya-2017.md -clickhouse-na-uwdc-2017.md 2017/clickhouse-na-uwdc-2017.md -clickhouse-meetup-edet-v-minsk.md 2017/clickhouse-meetup-edet-v-minsk.md -clickhouse-meetup-v-sankt-peterburge-27-iyulya-2019.md 2019/clickhouse-meetup-v-sankt-peterburge-27-iyulya-2019.md -clickhouse-meetup-v-moskve-5-sentyabrya-2019.md 2019/clickhouse-meetup-v-moskve-5-sentyabrya-2019.md -clickhouse-meetup-v-novosibirske-26-iyunya-2019.md 2019/clickhouse-meetup-v-novosibirske-26-iyunya-2019.md -clickrouse-meetup-v-minske-11-iyulya-2019.md 2019/clickrouse-meetup-v-minske-11-iyulya-2019.md -clickhouse-meetup-v-limassole-7-maya-2019.md 2019/clickhouse-meetup-v-limassole-7-maya-2019.md diff --git a/website/js/base.js b/website/js/base.js index d953d5f6a1f..6704231c69d 100644 --- a/website/js/base.js +++ b/website/js/base.js @@ -85,6 +85,9 @@ $(element).append( '' ); + $(element).append( + '' + ); }); } });