From aa8f282ad95d92b5e89838e228afbcb67cf2865c Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sat, 19 Feb 2022 18:43:02 +0100 Subject: [PATCH 001/669] Switch to clang/llvm 14 --- docker/packager/binary/Dockerfile | 2 +- docker/packager/deb/Dockerfile | 2 +- docker/packager/packager | 6 ++-- docker/test/base/Dockerfile | 2 +- docker/test/codebrowser/Dockerfile | 2 +- docker/test/fasttest/Dockerfile | 2 +- docker/test/fuzzer/run-fuzzer.sh | 4 +-- docker/test/keeper-jepsen/run.sh | 2 +- docs/_includes/cmake_in_clickhouse_header.md | 4 +-- docs/en/development/build-cross-arm.md | 4 +-- docs/en/development/build-cross-osx.md | 6 ++-- docs/en/development/build-cross-riscv.md | 2 +- docs/en/development/build.md | 2 +- docs/en/development/developer-instruction.md | 2 +- docs/ru/development/developer-instruction.md | 8 ----- tests/ci/ci_config.json | 12 ++++---- tests/ci/ci_config.py | 32 ++++++++++---------- 17 files changed, 42 insertions(+), 52 deletions(-) diff --git a/docker/packager/binary/Dockerfile b/docker/packager/binary/Dockerfile index e3e2e689b17..71dfca9fa4e 100644 --- a/docker/packager/binary/Dockerfile +++ b/docker/packager/binary/Dockerfile @@ -6,7 +6,7 @@ FROM ubuntu:20.04 ARG apt_archive="http://archive.ubuntu.com" RUN sed -i "s|http://archive.ubuntu.com|$apt_archive|g" /etc/apt/sources.list -ENV DEBIAN_FRONTEND=noninteractive LLVM_VERSION=13 +ENV DEBIAN_FRONTEND=noninteractive LLVM_VERSION=14 RUN apt-get update \ && apt-get install \ diff --git a/docker/packager/deb/Dockerfile b/docker/packager/deb/Dockerfile index 76a5f1d91c0..e150f37b6c9 100644 --- a/docker/packager/deb/Dockerfile +++ b/docker/packager/deb/Dockerfile @@ -6,7 +6,7 @@ FROM ubuntu:20.04 ARG apt_archive="http://archive.ubuntu.com" RUN sed -i "s|http://archive.ubuntu.com|$apt_archive|g" /etc/apt/sources.list -ENV DEBIAN_FRONTEND=noninteractive LLVM_VERSION=13 +ENV DEBIAN_FRONTEND=noninteractive LLVM_VERSION=14 RUN apt-get update \ && apt-get install ca-certificates lsb-release wget gnupg apt-transport-https \ diff --git a/docker/packager/packager b/docker/packager/packager index 05b2e02df96..4c80411951e 100755 --- a/docker/packager/packager +++ b/docker/packager/packager @@ -196,10 +196,8 @@ if __name__ == "__main__": parser.add_argument("--clickhouse-repo-path", default=os.path.join(os.path.dirname(os.path.abspath(__file__)), os.pardir, os.pardir)) parser.add_argument("--output-dir", required=True) parser.add_argument("--build-type", choices=("debug", ""), default="") - parser.add_argument("--compiler", choices=("clang-11", "clang-11-darwin", "clang-11-darwin-aarch64", "clang-11-aarch64", - "clang-12", "clang-12-darwin", "clang-12-darwin-aarch64", "clang-12-aarch64", - "clang-13", "clang-13-darwin", "clang-13-darwin-aarch64", "clang-13-aarch64", "clang-13-ppc64le", - "clang-11-freebsd", "clang-12-freebsd", "clang-13-freebsd", "gcc-11"), default="clang-13") + parser.add_argument("--compiler", choices=("clang-14", "clang-14-darwin", "clang-14-darwin-aarch64", "clang-14-aarch64", "clang-14-ppc64le", + "clang-14-freebsd", "gcc-11"), default="clang-14") parser.add_argument("--sanitizer", choices=("address", "thread", "memory", "undefined", ""), default="") parser.add_argument("--split-binary", action="store_true") parser.add_argument("--clang-tidy", action="store_true") diff --git a/docker/test/base/Dockerfile b/docker/test/base/Dockerfile index 6beab2e5bb7..ca44354620f 100644 --- a/docker/test/base/Dockerfile +++ b/docker/test/base/Dockerfile @@ -7,7 +7,7 @@ FROM clickhouse/test-util:$FROM_TAG ARG apt_archive="http://archive.ubuntu.com" RUN sed -i "s|http://archive.ubuntu.com|$apt_archive|g" /etc/apt/sources.list -ENV DEBIAN_FRONTEND=noninteractive LLVM_VERSION=13 +ENV DEBIAN_FRONTEND=noninteractive LLVM_VERSION=14 RUN apt-get update \ && apt-get install ca-certificates lsb-release wget gnupg apt-transport-https \ diff --git a/docker/test/codebrowser/Dockerfile b/docker/test/codebrowser/Dockerfile index 97f3f54ad98..102c2d4c697 100644 --- a/docker/test/codebrowser/Dockerfile +++ b/docker/test/codebrowser/Dockerfile @@ -8,7 +8,7 @@ FROM clickhouse/binary-builder:$FROM_TAG ARG apt_archive="http://archive.ubuntu.com" RUN sed -i "s|http://archive.ubuntu.com|$apt_archive|g" /etc/apt/sources.list -RUN apt-get update && apt-get --yes --allow-unauthenticated install clang-13 libllvm13 libclang-13-dev libmlir-13-dev +RUN apt-get update && apt-get --yes --allow-unauthenticated install clang-14 libllvm14 libclang-14-dev libmlir-14-dev # repo versions doesn't work correctly with C++17 # also we push reports to s3, so we add index.html to subfolder urls diff --git a/docker/test/fasttest/Dockerfile b/docker/test/fasttest/Dockerfile index 46b74d89e13..03a79b45a10 100644 --- a/docker/test/fasttest/Dockerfile +++ b/docker/test/fasttest/Dockerfile @@ -7,7 +7,7 @@ FROM clickhouse/test-util:$FROM_TAG ARG apt_archive="http://archive.ubuntu.com" RUN sed -i "s|http://archive.ubuntu.com|$apt_archive|g" /etc/apt/sources.list -ENV DEBIAN_FRONTEND=noninteractive LLVM_VERSION=13 +ENV DEBIAN_FRONTEND=noninteractive LLVM_VERSION=14 RUN apt-get update \ && apt-get install ca-certificates lsb-release wget gnupg apt-transport-https \ diff --git a/docker/test/fuzzer/run-fuzzer.sh b/docker/test/fuzzer/run-fuzzer.sh index e18c07bf2c1..d8cb417f18b 100755 --- a/docker/test/fuzzer/run-fuzzer.sh +++ b/docker/test/fuzzer/run-fuzzer.sh @@ -12,8 +12,8 @@ stage=${stage:-} script_dir="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )" echo "$script_dir" repo_dir=ch -BINARY_TO_DOWNLOAD=${BINARY_TO_DOWNLOAD:="clang-13_debug_none_bundled_unsplitted_disable_False_binary"} -BINARY_URL_TO_DOWNLOAD=${BINARY_URL_TO_DOWNLOAD:="https://clickhouse-builds.s3.yandex.net/$PR_TO_TEST/$SHA_TO_TEST/clickhouse_build_check/$BINARY_TO_DOWNLOAD/clickhouse"} +BINARY_TO_DOWNLOAD=${BINARY_TO_DOWNLOAD:="clang-14_debug_none_bundled_unsplitted_disable_False_binary"} +BINARY_URL_TO_DOWNLOAD=${BINARY_URL_TO_DOWNLOAD:="https://clickhouse-builds.s3.amazonaws.com/$PR_TO_TEST/$SHA_TO_TEST/clickhouse_build_check/$BINARY_TO_DOWNLOAD/clickhouse"} function clone { diff --git a/docker/test/keeper-jepsen/run.sh b/docker/test/keeper-jepsen/run.sh index d7534270e2c..53ef63f33b4 100644 --- a/docker/test/keeper-jepsen/run.sh +++ b/docker/test/keeper-jepsen/run.sh @@ -2,7 +2,7 @@ set -euo pipefail -CLICKHOUSE_PACKAGE=${CLICKHOUSE_PACKAGE:="https://clickhouse-builds.s3.yandex.net/$PR_TO_TEST/$SHA_TO_TEST/clickhouse_build_check/clang-13_relwithdebuginfo_none_bundled_unsplitted_disable_False_binary/clickhouse"} +CLICKHOUSE_PACKAGE=${CLICKHOUSE_PACKAGE:="https://clickhouse-builds.s3.amazonaws.com/$PR_TO_TEST/$SHA_TO_TEST/clickhouse_build_check/clang-14_relwithdebuginfo_none_bundled_unsplitted_disable_False_binary/clickhouse"} CLICKHOUSE_REPO_PATH=${CLICKHOUSE_REPO_PATH:=""} diff --git a/docs/_includes/cmake_in_clickhouse_header.md b/docs/_includes/cmake_in_clickhouse_header.md index 02019f13964..de4b1ef7af5 100644 --- a/docs/_includes/cmake_in_clickhouse_header.md +++ b/docs/_includes/cmake_in_clickhouse_header.md @@ -6,8 +6,8 @@ Minimal ClickHouse build example: ```bash cmake .. \ - -DCMAKE_C_COMPILER=$(which clang-13) \ - -DCMAKE_CXX_COMPILER=$(which clang++-13) \ + -DCMAKE_C_COMPILER=$(which clang-14) \ + -DCMAKE_CXX_COMPILER=$(which clang++-14) \ -DCMAKE_BUILD_TYPE=Debug \ -DENABLE_CLICKHOUSE_ALL=OFF \ -DENABLE_CLICKHOUSE_SERVER=ON \ diff --git a/docs/en/development/build-cross-arm.md b/docs/en/development/build-cross-arm.md index eb99105a857..bf378cb7712 100644 --- a/docs/en/development/build-cross-arm.md +++ b/docs/en/development/build-cross-arm.md @@ -9,7 +9,7 @@ This is for the case when you have Linux machine and want to use it to build `cl The cross-build for AARCH64 is based on the [Build instructions](../development/build.md), follow them first. -## Install Clang-13 +## Install Clang-14 or newer Follow the instructions from https://apt.llvm.org/ for your Ubuntu or Debian setup or do ``` @@ -30,7 +30,7 @@ tar xJf gcc-arm-8.3-2019.03-x86_64-aarch64-linux-gnu.tar.xz -C build-aarch64/cma ``` bash cd ClickHouse mkdir build-arm64 -CC=clang-13 CXX=clang++-13 cmake . -Bbuild-arm64 -DCMAKE_TOOLCHAIN_FILE=cmake/linux/toolchain-aarch64.cmake +CC=clang-14 CXX=clang++-14 cmake . -Bbuild-arm64 -DCMAKE_TOOLCHAIN_FILE=cmake/linux/toolchain-aarch64.cmake ninja -C build-arm64 ``` diff --git a/docs/en/development/build-cross-osx.md b/docs/en/development/build-cross-osx.md index c7e40013113..e786843552d 100644 --- a/docs/en/development/build-cross-osx.md +++ b/docs/en/development/build-cross-osx.md @@ -9,14 +9,14 @@ This is for the case when you have Linux machine and want to use it to build `cl The cross-build for Mac OS X is based on the [Build instructions](../development/build.md), follow them first. -## Install Clang-13 +## Install Clang-14 Follow the instructions from https://apt.llvm.org/ for your Ubuntu or Debian setup. For example the commands for Bionic are like: ``` bash -sudo echo "deb [trusted=yes] http://apt.llvm.org/bionic/ llvm-toolchain-bionic-13 main" >> /etc/apt/sources.list -sudo apt-get install clang-13 +sudo echo "deb [trusted=yes] http://apt.llvm.org/bionic/ llvm-toolchain-bionic-14 main" >> /etc/apt/sources.list +sudo apt-get install clang-14 ``` ## Install Cross-Compilation Toolset {#install-cross-compilation-toolset} diff --git a/docs/en/development/build-cross-riscv.md b/docs/en/development/build-cross-riscv.md index 5cdce710b41..6fab85314f8 100644 --- a/docs/en/development/build-cross-riscv.md +++ b/docs/en/development/build-cross-riscv.md @@ -23,7 +23,7 @@ sudo bash -c "$(wget -O - https://apt.llvm.org/llvm.sh)" ``` bash cd ClickHouse mkdir build-riscv64 -CC=clang-13 CXX=clang++-13 cmake . -Bbuild-riscv64 -G Ninja -DCMAKE_TOOLCHAIN_FILE=cmake/linux/toolchain-riscv64.cmake -DGLIBC_COMPATIBILITY=OFF -DENABLE_LDAP=OFF -DOPENSSL_NO_ASM=ON -DENABLE_JEMALLOC=ON -DENABLE_PARQUET=OFF -DENABLE_ORC=OFF -DUSE_UNWIND=OFF -DENABLE_GRPC=OFF -DENABLE_HDFS=OFF -DENABLE_MYSQL=OFF +CC=clang-14 CXX=clang++-14 cmake . -Bbuild-riscv64 -G Ninja -DCMAKE_TOOLCHAIN_FILE=cmake/linux/toolchain-riscv64.cmake -DGLIBC_COMPATIBILITY=OFF -DENABLE_LDAP=OFF -DOPENSSL_NO_ASM=ON -DENABLE_JEMALLOC=ON -DENABLE_PARQUET=OFF -DENABLE_ORC=OFF -DUSE_UNWIND=OFF -DENABLE_GRPC=OFF -DENABLE_HDFS=OFF -DENABLE_MYSQL=OFF ninja -C build-riscv64 ``` diff --git a/docs/en/development/build.md b/docs/en/development/build.md index aaa3bdfd043..19401ba3766 100644 --- a/docs/en/development/build.md +++ b/docs/en/development/build.md @@ -76,7 +76,7 @@ The build requires the following components: - Git (is used only to checkout the sources, it’s not needed for the build) - CMake 3.10 or newer - Ninja -- C++ compiler: clang-13 or newer +- C++ compiler: clang-14 or newer - Linker: lld If all the components are installed, you may build in the same way as the steps above. diff --git a/docs/en/development/developer-instruction.md b/docs/en/development/developer-instruction.md index f7d7100d181..a23617365a5 100644 --- a/docs/en/development/developer-instruction.md +++ b/docs/en/development/developer-instruction.md @@ -154,7 +154,7 @@ While inside the `build` directory, configure your build by running CMake. Befor export CC=clang CXX=clang++ cmake .. -If you installed clang using the automatic installation script above, also specify the version of clang installed in the first command, e.g. `export CC=clang-13 CXX=clang++-13`. The clang version will be in the script output. +If you installed clang using the automatic installation script above, also specify the version of clang installed in the first command, e.g. `export CC=clang-14 CXX=clang++-14`. The clang version will be in the script output. The `CC` variable specifies the compiler for C (short for C Compiler), and `CXX` variable instructs which C++ compiler is to be used for building. diff --git a/docs/ru/development/developer-instruction.md b/docs/ru/development/developer-instruction.md index 964d39163d8..92e80578c49 100644 --- a/docs/ru/development/developer-instruction.md +++ b/docs/ru/development/developer-instruction.md @@ -200,14 +200,6 @@ cmake -DUSE_DEBUG_HELPERS=1 -DUSE_STATIC_LIBRARIES=0 -DSPLIT_SHARED_LIBRARIES=1 В процессе сборки могут появится сообщения `libprotobuf WARNING` про protobuf файлы в библиотеке libhdfs2. Это не имеет значения. -В случае получения ошибок вида `error: variable 'y' set but not used [-Werror,-Wunused-but-set-variable]` ножно попробовать использовать другую версию компилятора сlang. Например, на момент написания данного текста описанная выше команда по установке clang для Ubuntu 20.04 по-умолчанию устанавливает clang-13, с которым возникает эта ошибка. Для решения проблемы можно установить clang-12 с помощью команд: -```bash -wget https://apt.llvm.org/llvm.sh -chmod +x llvm.sh -sudo ./llvm.sh 12 -``` -И далее использовать именно его, указав соответствующую версию при установке переменных окружения CC и CXX перед вызовом cmake. - При успешной сборке, вы получите готовый исполняемый файл `ClickHouse/build/programs/clickhouse`: ls -l programs/clickhouse diff --git a/tests/ci/ci_config.json b/tests/ci/ci_config.json index 19afdd172d5..81ed2fff3d2 100644 --- a/tests/ci/ci_config.json +++ b/tests/ci/ci_config.json @@ -1,7 +1,7 @@ { "build_config": [ { - "compiler": "clang-13", + "compiler": "clang-14", "build-type": "", "sanitizer": "", "package-type": "deb", @@ -12,7 +12,7 @@ "with_coverage": false }, { - "compiler": "clang-13", + "compiler": "clang-14", "build-type": "", "sanitizer": "", "package-type": "performance", @@ -32,7 +32,7 @@ "with_coverage": false }, { - "compiler": "clang-13", + "compiler": "clang-14", "build-type": "", "sanitizer": "", "package-type": "binary", @@ -45,7 +45,7 @@ "tests_config": { "Testflows check": { "required_build_properties": { - "compiler": "clang-13", + "compiler": "clang-14", "package_type": "deb", "build_type": "relwithdebuginfo", "sanitizer": "none", @@ -57,7 +57,7 @@ }, "Release": { "required_build_properties": { - "compiler": "clang-13", + "compiler": "clang-14", "package_type": "deb", "build_type": "relwithdebuginfo", "sanitizer": "none", @@ -69,7 +69,7 @@ }, "ClickHouse Keeper Jepsen": { "required_build_properties": { - "compiler": "clang-13", + "compiler": "clang-14", "package_type": "binary", "build_type": "relwithdebuginfo", "sanitizer": "none", diff --git a/tests/ci/ci_config.py b/tests/ci/ci_config.py index 000d3d9a000..9afb5e981f6 100644 --- a/tests/ci/ci_config.py +++ b/tests/ci/ci_config.py @@ -8,7 +8,7 @@ BuildConfig = Dict[str, ConfValue] CI_CONFIG = { "build_config": { "package_release": { - "compiler": "clang-13", + "compiler": "clang-14", "build_type": "", "sanitizer": "", "package_type": "deb", @@ -19,7 +19,7 @@ CI_CONFIG = { "with_coverage": False, }, "performance": { - "compiler": "clang-13", + "compiler": "clang-14", "build_type": "", "sanitizer": "", "package_type": "performance", @@ -39,7 +39,7 @@ CI_CONFIG = { "with_coverage": False, }, "package_aarch64": { - "compiler": "clang-13-aarch64", + "compiler": "clang-14-aarch64", "build_type": "", "sanitizer": "", "package_type": "deb", @@ -50,7 +50,7 @@ CI_CONFIG = { "with_coverage": False, }, "package_asan": { - "compiler": "clang-13", + "compiler": "clang-14", "build_type": "", "sanitizer": "address", "package_type": "deb", @@ -60,7 +60,7 @@ CI_CONFIG = { "with_coverage": False, }, "package_ubsan": { - "compiler": "clang-13", + "compiler": "clang-14", "build_type": "", "sanitizer": "undefined", "package_type": "deb", @@ -70,7 +70,7 @@ CI_CONFIG = { "with_coverage": False, }, "package_tsan": { - "compiler": "clang-13", + "compiler": "clang-14", "build_type": "", "sanitizer": "thread", "package_type": "deb", @@ -80,7 +80,7 @@ CI_CONFIG = { "with_coverage": False, }, "package_msan": { - "compiler": "clang-13", + "compiler": "clang-14", "build_type": "", "sanitizer": "memory", "package_type": "deb", @@ -90,7 +90,7 @@ CI_CONFIG = { "with_coverage": False, }, "package_debug": { - "compiler": "clang-13", + "compiler": "clang-14", "build_type": "debug", "sanitizer": "", "package_type": "deb", @@ -100,7 +100,7 @@ CI_CONFIG = { "with_coverage": False, }, "binary_release": { - "compiler": "clang-13", + "compiler": "clang-14", "build_type": "", "sanitizer": "", "package_type": "binary", @@ -111,7 +111,7 @@ CI_CONFIG = { "with_coverage": False, }, "binary_tidy": { - "compiler": "clang-13", + "compiler": "clang-14", "build_type": "debug", "sanitizer": "", "package_type": "binary", @@ -122,7 +122,7 @@ CI_CONFIG = { "with_coverage": False, }, "binary_splitted": { - "compiler": "clang-13", + "compiler": "clang-14", "build_type": "", "sanitizer": "", "package_type": "binary", @@ -132,7 +132,7 @@ CI_CONFIG = { "with_coverage": False, }, "binary_darwin": { - "compiler": "clang-13-darwin", + "compiler": "clang-14-darwin", "build_type": "", "sanitizer": "", "package_type": "binary", @@ -143,7 +143,7 @@ CI_CONFIG = { "with_coverage": False, }, "binary_aarch64": { - "compiler": "clang-13-aarch64", + "compiler": "clang-14-aarch64", "build_type": "", "sanitizer": "", "package_type": "binary", @@ -154,7 +154,7 @@ CI_CONFIG = { "with_coverage": False, }, "binary_freebsd": { - "compiler": "clang-13-freebsd", + "compiler": "clang-14-freebsd", "build_type": "", "sanitizer": "", "package_type": "binary", @@ -165,7 +165,7 @@ CI_CONFIG = { "with_coverage": False, }, "binary_darwin_aarch64": { - "compiler": "clang-13-darwin-aarch64", + "compiler": "clang-14-darwin-aarch64", "build_type": "", "sanitizer": "", "package_type": "binary", @@ -176,7 +176,7 @@ CI_CONFIG = { "with_coverage": False, }, "binary_ppc64le": { - "compiler": "clang-13-ppc64le", + "compiler": "clang-14-ppc64le", "build_type": "", "sanitizer": "", "package_type": "binary", From 26b905be65ab81acdc443dcaedc6764f0d1719ef Mon Sep 17 00:00:00 2001 From: zvonand Date: Sun, 20 Mar 2022 15:38:11 +0300 Subject: [PATCH 002/669] old work upload --- src/Functions/nonNegativeDerivative.cpp | 13 + src/Functions/nonNegativeDerivative.h | 234 ++++++++++++++++++ .../registerFunctionsMiscellaneous.cpp | 2 + 3 files changed, 249 insertions(+) create mode 100644 src/Functions/nonNegativeDerivative.cpp create mode 100644 src/Functions/nonNegativeDerivative.h diff --git a/src/Functions/nonNegativeDerivative.cpp b/src/Functions/nonNegativeDerivative.cpp new file mode 100644 index 00000000000..7a68aa5b2fe --- /dev/null +++ b/src/Functions/nonNegativeDerivative.cpp @@ -0,0 +1,13 @@ +#include +#include + + +namespace DB +{ + +void registerFunctionNonNegativeDerivative(FunctionFactory & factory) +{ + factory.registerFunction(); +} + +} diff --git a/src/Functions/nonNegativeDerivative.h b/src/Functions/nonNegativeDerivative.h new file mode 100644 index 00000000000..8b58b2a232f --- /dev/null +++ b/src/Functions/nonNegativeDerivative.h @@ -0,0 +1,234 @@ +#pragma once +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int ILLEGAL_TYPE_OF_ARGUMENT; + extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; +} + +/** Calculate derivative of given value column by specified timestamp. */ +class FunctionNonNegativeDerivativeImpl : public IFunction +{ +private: + static NO_SANITIZE_UNDEFINED Int64 getResultScaling() + { + Int64 res = 1; + + return res; + } + + template + static NO_SANITIZE_UNDEFINED void process(const PaddedPODArray & metric, PaddedPODArray & result, + const PaddedPODArray & timestamp, const UInt32 ts_scale, + const std::tuple interval, const NullMap * null_map) + { + size_t size = metric.size(); + result.resize(size); + + if (size == 0) + return; + + Src prev_metric_value{}; + DateTime64 prev_ts_value{}; + + bool first_row = false; + + for (size_t i = 0; i < size; ++i) + { + if (null_map && (*null_map)[i]) + { + result[i] = Dst{}; + continue; + } + + if (!first_row) + { + auto cur = metric[i]; + /// Overflow is Ok. + result[i] = static_cast(cur) - prev_metric_value; + prev_metric_value = cur; + } + else + { + result[i] = 0; + prev_metric_value = metric[i]; + first_row = false; + } + } + } + + /// Result type is same as result of subtraction of argument types. + template + using DstFieldType = typename NumberTraits::ResultOfSubtraction::Type; + + /// Call polymorphic lambda with tag argument of concrete field type of src_type. + template + void dispatchForSourceType(const IDataType & src_type, F && f) const + { + WhichDataType which(src_type); + + if (which.isUInt8()) + f(UInt8()); + else if (which.isUInt16()) + f(UInt16()); + else if (which.isUInt32()) + f(UInt32()); + else if (which.isUInt64()) + f(UInt64()); + else if (which.isInt8()) + f(Int8()); + else if (which.isInt16()) + f(Int16()); + else if (which.isInt32()) + f(Int32()); + else if (which.isInt64()) + f(Int64()); + else if (which.isFloat32()) + f(Float32()); + else if (which.isFloat64()) + f(Float64()); + else if (which.isDate()) + f(DataTypeDate::FieldType()); + else if (which.isDate32()) + f(DataTypeDate::FieldType()); + else if (which.isDateTime()) + f(DataTypeDateTime::FieldType()); + else + throw Exception("First argument for function " + getName() + " must have numeric type.", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); + } + +public: + static constexpr auto name = "nonNegativeDerivative"; + + static FunctionPtr create(ContextPtr) + { + return std::make_shared(); + } + + String getName() const override + { + return name; + } + + bool isStateful() const override + { + return true; + } + + size_t getNumberOfArguments() const override + { + return 0; + } + + bool isDeterministic() const override { return false; } + bool isDeterministicInScopeOfQuery() const override + { + return true; + } + + bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return false; } + + bool useDefaultImplementationForNulls() const override { return false; } + + DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override + { + DataTypePtr res; + dispatchForSourceType(*removeNullable(arguments[0]), [&](auto field_type_tag) + { + res = std::make_shared>>(); + }); + + if (arguments[0]->isNullable()) + res = makeNullable(res); + + return res; + } + + static std::tuple + dispatchForIntervalColumns(const ColumnWithTypeAndName & interval_column) + { + const auto * interval_type = checkAndGetDataType(interval_column.type.get()); + if (!interval_type) + throw Exception("Illegal value" + interval_column.name + "for function nonNegativeDerivative, INTERVAL expected", + ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); + + const auto * interval_column_const_int64 = checkAndGetColumnConst(interval_column.column.get()); + if (!interval_column_const_int64) + throw Exception("Illegal value " + interval_column.name + "for function nonNegativeDerivative, INTERVAL expected", + ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); + + Int64 num_units = interval_column_const_int64->getValue(); + if (num_units <= 0) + throw Exception("Value for column " + interval_column.name + "for function nonNegativeDerivative must be positive", + ErrorCodes::ARGUMENT_OUT_OF_BOUND); + + return {interval_type->getKind(), num_units}; + } + + ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t input_rows_count) const override + { + if (arguments.size() < 2 || arguments.size() > 3) + throw Exception("Invalid number of arguments, expected 2 or 3: nonNegativeDerivative(metric, timestamp[, INTERVAL x SECOND])", + DB::ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); + + const auto & metric = arguments.at(0); + const auto & timestamp = arguments.at(1); + + const auto timestamp_scale = assert_cast(*arguments[0].type).getScale(); + + // Default interval value: INTERVAL 1 SECOND + const auto interval_params = arguments.size() == 3 ? dispatchForIntervalColumns(arguments.at(2)) : std::tuple(IntervalKind::Second, 1); + + /// When column is constant, its derivative is 0. + if (isColumnConst(*metric.column)) + return result_type->createColumnConstWithDefaultValue(input_rows_count); + + auto res_column = removeNullable(result_type)->createColumn(); + + const auto * metric_column = metric.column.get(); + const auto * timestamp_column = timestamp.column.get(); + + ColumnPtr null_map_column = nullptr; + const NullMap * null_map = nullptr; + if (const auto * nullable_column = checkAndGetColumn(metric_column)) + { + metric_column = &nullable_column->getNestedColumn(); + null_map_column = nullable_column->getNullMapColumnPtr(); + null_map = &nullable_column->getNullMapData(); + } + + dispatchForSourceType(*removeNullable(metric.type), [&](auto field_type_tag) + { + using MetricFieldType = decltype(field_type_tag); + + process(assert_cast &>(*metric_column).getData(), + assert_cast> &>(*res_column).getData(), + assert_cast &>(*timestamp_column).getData(), + timestamp_scale, interval_params, null_map); + }); + + if (null_map_column) + return ColumnNullable::create(std::move(res_column), null_map_column); + else + return res_column; + } +}; + +} diff --git a/src/Functions/registerFunctionsMiscellaneous.cpp b/src/Functions/registerFunctionsMiscellaneous.cpp index 76d61ce509a..14474b715de 100644 --- a/src/Functions/registerFunctionsMiscellaneous.cpp +++ b/src/Functions/registerFunctionsMiscellaneous.cpp @@ -80,6 +80,7 @@ void registerFunctionInitialQueryID(FunctionFactory & factory); void registerFunctionServerUUID(FunctionFactory &); void registerFunctionZooKeeperSessionUptime(FunctionFactory &); void registerFunctionGetOSKernelVersion(FunctionFactory &); +void registerFunctionNonNegativeDerivative(FunctionFactory &); #if USE_ICU void registerFunctionConvertCharset(FunctionFactory &); @@ -166,6 +167,7 @@ void registerFunctionsMiscellaneous(FunctionFactory & factory) registerFunctionServerUUID(factory); registerFunctionZooKeeperSessionUptime(factory); registerFunctionGetOSKernelVersion(factory); + registerFunctionNonNegativeDerivative(factory); #if USE_ICU registerFunctionConvertCharset(factory); From 2edafbe7852744e84cb442cd3de5c24843bf622d Mon Sep 17 00:00:00 2001 From: zvonand Date: Tue, 22 Mar 2022 16:05:58 +0300 Subject: [PATCH 003/669] update nonNegDer --- src/Functions/nonNegativeDerivative.h | 68 ++++++++++++++++----------- 1 file changed, 41 insertions(+), 27 deletions(-) diff --git a/src/Functions/nonNegativeDerivative.h b/src/Functions/nonNegativeDerivative.h index 8b58b2a232f..1be7e8368b4 100644 --- a/src/Functions/nonNegativeDerivative.h +++ b/src/Functions/nonNegativeDerivative.h @@ -27,16 +27,33 @@ namespace ErrorCodes class FunctionNonNegativeDerivativeImpl : public IFunction { private: - static NO_SANITIZE_UNDEFINED Int64 getResultScaling() + /** Get interval length in seconds **/ + static NO_SANITIZE_UNDEFINED Float64 getResultScaling(const std::tuple interval) { - Int64 res = 1; + auto interval_kind = std::get<0>(interval); + auto interval_length = std::get<1>(interval); - return res; + switch (interval_kind) + { + case IntervalKind::Week: + return interval_length * 604800; + case IntervalKind::Day: + return interval_length * 86400; + case IntervalKind::Hour: + return interval_length * 3600; + case IntervalKind::Minute: + return interval_length * 60; + case IntervalKind::Second: + return interval_length; + default: + throw Exception(fmt::format("Interval kind {}: interval length is variadic, only precise intervals accepted", + IntervalKind(interval_kind).toKeyword()), ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); + } } - template + template static NO_SANITIZE_UNDEFINED void process(const PaddedPODArray & metric, PaddedPODArray & result, - const PaddedPODArray & timestamp, const UInt32 ts_scale, + const PaddedPODArray & timestamp, const UInt32 ts_scale, const std::tuple interval, const NullMap * null_map) { size_t size = metric.size(); @@ -46,9 +63,11 @@ private: return; Src prev_metric_value{}; - DateTime64 prev_ts_value{}; + Ts prev_ts{}; - bool first_row = false; + bool first_row = true; + auto interval_length = getResultScaling(interval); + auto ts_scale_multiplier = common::exp10_i64(ts_scale); for (size_t i = 0; i < size; ++i) { @@ -58,25 +77,25 @@ private: continue; } - if (!first_row) - { - auto cur = metric[i]; - /// Overflow is Ok. - result[i] = static_cast(cur) - prev_metric_value; - prev_metric_value = cur; - } - else + if (first_row) { result[i] = 0; prev_metric_value = metric[i]; first_row = false; } + else + { + auto cur = metric[i]; + auto multiply = interval_length * ts_scale_multiplier / (timestamp[i].value - prev_ts.value); + result[i] = (cur - prev_metric_value) * multiply; + prev_metric_value = cur; + } } } /// Result type is same as result of subtraction of argument types. template - using DstFieldType = typename NumberTraits::ResultOfSubtraction::Type; + using DstFieldType = typename NumberTraits::ResultOfFloatingPointDivision::Type; /// Call polymorphic lambda with tag argument of concrete field type of src_type. template @@ -132,16 +151,11 @@ public: return true; } - size_t getNumberOfArguments() const override - { - return 0; - } + bool isVariadic() const override { return true; } + size_t getNumberOfArguments() const override { return 0; } bool isDeterministic() const override { return false; } - bool isDeterministicInScopeOfQuery() const override - { - return true; - } + bool isDeterministicInScopeOfQuery() const override { return false; } bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return false; } @@ -191,7 +205,7 @@ public: const auto & metric = arguments.at(0); const auto & timestamp = arguments.at(1); - const auto timestamp_scale = assert_cast(*arguments[0].type).getScale(); + const auto timestamp_scale = assert_cast(*arguments[1].type).getScale(); // Default interval value: INTERVAL 1 SECOND const auto interval_params = arguments.size() == 3 ? dispatchForIntervalColumns(arguments.at(2)) : std::tuple(IntervalKind::Second, 1); @@ -220,8 +234,8 @@ public: process(assert_cast &>(*metric_column).getData(), assert_cast> &>(*res_column).getData(), - assert_cast &>(*timestamp_column).getData(), - timestamp_scale, interval_params, null_map); + assert_cast &>(*timestamp_column).getData(), timestamp_scale, + interval_params, null_map); }); if (null_map_column) From 3e42072d95d1a07a2039cdd90b3808c527f93f08 Mon Sep 17 00:00:00 2001 From: zvonand Date: Tue, 22 Mar 2022 18:09:36 +0300 Subject: [PATCH 004/669] fix --- src/Functions/nonNegativeDerivative.h | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/Functions/nonNegativeDerivative.h b/src/Functions/nonNegativeDerivative.h index 1be7e8368b4..71949c7c991 100644 --- a/src/Functions/nonNegativeDerivative.h +++ b/src/Functions/nonNegativeDerivative.h @@ -28,7 +28,7 @@ class FunctionNonNegativeDerivativeImpl : public IFunction { private: /** Get interval length in seconds **/ - static NO_SANITIZE_UNDEFINED Float64 getResultScaling(const std::tuple interval) + static NO_SANITIZE_UNDEFINED Float64 getIntervalLength(const std::tuple interval) { auto interval_kind = std::get<0>(interval); auto interval_length = std::get<1>(interval); @@ -66,7 +66,7 @@ private: Ts prev_ts{}; bool first_row = true; - auto interval_length = getResultScaling(interval); + auto interval_length = getIntervalLength(interval); auto ts_scale_multiplier = common::exp10_i64(ts_scale); for (size_t i = 0; i < size; ++i) @@ -81,6 +81,7 @@ private: { result[i] = 0; prev_metric_value = metric[i]; + prev_ts = timestamp[i]; first_row = false; } else @@ -89,6 +90,7 @@ private: auto multiply = interval_length * ts_scale_multiplier / (timestamp[i].value - prev_ts.value); result[i] = (cur - prev_metric_value) * multiply; prev_metric_value = cur; + prev_ts = timestamp[i]; } } } From b483888a033e87d37e9591820e0aec66b0a4a050 Mon Sep 17 00:00:00 2001 From: zvonand Date: Wed, 23 Mar 2022 22:21:35 +0300 Subject: [PATCH 005/669] added tests --- src/Functions/nonNegativeDerivative.h | 4 +--- .../0_stateless/02232_non_negative_derivative.sql | 11 +++++++++++ 2 files changed, 12 insertions(+), 3 deletions(-) create mode 100644 tests/queries/0_stateless/02232_non_negative_derivative.sql diff --git a/src/Functions/nonNegativeDerivative.h b/src/Functions/nonNegativeDerivative.h index 71949c7c991..b7214027fad 100644 --- a/src/Functions/nonNegativeDerivative.h +++ b/src/Functions/nonNegativeDerivative.h @@ -88,18 +88,16 @@ private: { auto cur = metric[i]; auto multiply = interval_length * ts_scale_multiplier / (timestamp[i].value - prev_ts.value); - result[i] = (cur - prev_metric_value) * multiply; + result[i] = cur >= prev_metric_value ? (cur - prev_metric_value) * multiply : 0; prev_metric_value = cur; prev_ts = timestamp[i]; } } } - /// Result type is same as result of subtraction of argument types. template using DstFieldType = typename NumberTraits::ResultOfFloatingPointDivision::Type; - /// Call polymorphic lambda with tag argument of concrete field type of src_type. template void dispatchForSourceType(const IDataType & src_type, F && f) const { diff --git a/tests/queries/0_stateless/02232_non_negative_derivative.sql b/tests/queries/0_stateless/02232_non_negative_derivative.sql new file mode 100644 index 00000000000..1b4c97a1982 --- /dev/null +++ b/tests/queries/0_stateless/02232_non_negative_derivative.sql @@ -0,0 +1,11 @@ +SELECT ts, metric, nonNegativeDerivative(metric, ts, INTERVAL 1 SECOND) AS nnd +FROM ( + SELECT * FROM VALUES ( + 'ts DateTime64(3, "UTC"), metric Int32', + (toDateTime64('1979-12-12 21:21:21.123', 3, 'UTC'), 1), + (toDateTime64('1979-12-12 21:21:21.124', 3, 'UTC'), 2), + (toDateTime64('1979-12-12 21:21:21.127', 3, 'UTC'), 3), + (toDateTime64('1979-12-12 21:21:21.129', 3, 'UTC'), 2), + (toDateTime('1979-12-12 21:21:22', 'UTC'), 13) + ) + ); \ No newline at end of file From e47bc1c300dc38d8d11c7ef97b5353687cfad872 Mon Sep 17 00:00:00 2001 From: zvonand Date: Wed, 23 Mar 2022 22:32:30 +0300 Subject: [PATCH 006/669] updated tests --- .../02232_non_negative_derivative.reference | 12 ++++++++++++ .../02232_non_negative_derivative.sql | 18 ++++++++++++++++-- 2 files changed, 28 insertions(+), 2 deletions(-) create mode 100644 tests/queries/0_stateless/02232_non_negative_derivative.reference diff --git a/tests/queries/0_stateless/02232_non_negative_derivative.reference b/tests/queries/0_stateless/02232_non_negative_derivative.reference new file mode 100644 index 00000000000..be942129a06 --- /dev/null +++ b/tests/queries/0_stateless/02232_non_negative_derivative.reference @@ -0,0 +1,12 @@ +1979-12-13 00:21:21.123 1 0 +1979-12-13 00:21:21.124 2 1000 +1979-12-13 00:21:21.127 3 333.3333333333333 +1979-12-13 00:21:21.129 2 0 +1979-12-13 00:21:22.000 13 12.629161882893225 +1979-12-13 00:21:23.000 10 0 +1979-12-13 00:21:21.123 1.1 0 +1979-12-13 00:21:21.124 2.34 1499903999.9999998 +1979-12-13 00:21:21.127 3.7 548352000.0000001 +1979-12-13 00:21:21.129 2.1 0 +1979-12-13 00:21:22.000 1.3345 0 +1979-12-13 00:21:23.000 1.5 200188.8 diff --git a/tests/queries/0_stateless/02232_non_negative_derivative.sql b/tests/queries/0_stateless/02232_non_negative_derivative.sql index 1b4c97a1982..fd9f86be60a 100644 --- a/tests/queries/0_stateless/02232_non_negative_derivative.sql +++ b/tests/queries/0_stateless/02232_non_negative_derivative.sql @@ -6,6 +6,20 @@ FROM ( (toDateTime64('1979-12-12 21:21:21.124', 3, 'UTC'), 2), (toDateTime64('1979-12-12 21:21:21.127', 3, 'UTC'), 3), (toDateTime64('1979-12-12 21:21:21.129', 3, 'UTC'), 2), - (toDateTime('1979-12-12 21:21:22', 'UTC'), 13) + (toDateTime('1979-12-12 21:21:22', 'UTC'), 13), + (toDateTime('1979-12-12 21:21:23', 'UTC'), 10) ) - ); \ No newline at end of file + ); + +SELECT ts, metric, nonNegativeDerivative(metric, ts, INTERVAL 2 WEEK) AS nnd +FROM ( + SELECT * FROM VALUES ( + 'ts DateTime64(3, "UTC"), metric Float64', + (toDateTime64('1979-12-12 21:21:21.123', 3, 'UTC'), 1.1), + (toDateTime64('1979-12-12 21:21:21.124', 3, 'UTC'), 2.34), + (toDateTime64('1979-12-12 21:21:21.127', 3, 'UTC'), 3.7), + (toDateTime64('1979-12-12 21:21:21.129', 3, 'UTC'), 2.1), + (toDateTime('1979-12-12 21:21:22', 'UTC'), 1.3345), + (toDateTime('1979-12-12 21:21:23', 'UTC'), 1.5) + ) + ); \ No newline at end of file From add44ed9142715db5f2c0017e4bb6bf218739de6 Mon Sep 17 00:00:00 2001 From: zvonand Date: Thu, 24 Mar 2022 14:21:42 +0300 Subject: [PATCH 007/669] updated test --- src/Functions/nonNegativeDerivative.h | 6 ++ .../02232_non_negative_derivative.reference | 62 +++++++++++++++---- .../02232_non_negative_derivative.sql | 43 ++++++------- 3 files changed, 75 insertions(+), 36 deletions(-) diff --git a/src/Functions/nonNegativeDerivative.h b/src/Functions/nonNegativeDerivative.h index b7214027fad..92dd483cdbf 100644 --- a/src/Functions/nonNegativeDerivative.h +++ b/src/Functions/nonNegativeDerivative.h @@ -45,6 +45,12 @@ private: return interval_length * 60; case IntervalKind::Second: return interval_length; + case IntervalKind::Millisecond: + return interval_length * 0.001; + case IntervalKind::Microsecond: + return interval_length * 0.000001; + case IntervalKind::Nanosecond: + return interval_length * 0.000000001; default: throw Exception(fmt::format("Interval kind {}: interval length is variadic, only precise intervals accepted", IntervalKind(interval_kind).toKeyword()), ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); diff --git a/tests/queries/0_stateless/02232_non_negative_derivative.reference b/tests/queries/0_stateless/02232_non_negative_derivative.reference index be942129a06..575b50f89ab 100644 --- a/tests/queries/0_stateless/02232_non_negative_derivative.reference +++ b/tests/queries/0_stateless/02232_non_negative_derivative.reference @@ -1,12 +1,50 @@ -1979-12-13 00:21:21.123 1 0 -1979-12-13 00:21:21.124 2 1000 -1979-12-13 00:21:21.127 3 333.3333333333333 -1979-12-13 00:21:21.129 2 0 -1979-12-13 00:21:22.000 13 12.629161882893225 -1979-12-13 00:21:23.000 10 0 -1979-12-13 00:21:21.123 1.1 0 -1979-12-13 00:21:21.124 2.34 1499903999.9999998 -1979-12-13 00:21:21.127 3.7 548352000.0000001 -1979-12-13 00:21:21.129 2.1 0 -1979-12-13 00:21:22.000 1.3345 0 -1979-12-13 00:21:23.000 1.5 200188.8 +- shall work for precise intervals +1979-12-12 21:21:21.123 1.1 0 +1979-12-12 21:21:21.124 2.34 0.00000372 +1979-12-12 21:21:21.127 3.7 0.0000013600000000000005 +1979-12-12 21:21:21.129 2.1 0 +1979-12-12 21:21:22.000 1.3345 0 +1979-12-12 21:21:23.000 1.54 6.165000000000001e-10 +1979-12-12 21:21:21.123 1.1 0 +1979-12-12 21:21:21.124 2.34 0.004959999999999999 +1979-12-12 21:21:21.127 3.7 0.0018133333333333337 +1979-12-12 21:21:21.129 2.1 0 +1979-12-12 21:21:22.000 1.3345 0 +1979-12-12 21:21:23.000 1.54 8.22e-7 +1979-12-12 21:21:21.123 1.1 0 +1979-12-12 21:21:21.124 2.34 6.199999999999999 +1979-12-12 21:21:21.127 3.7 2.2666666666666675 +1979-12-12 21:21:21.129 2.1 0 +1979-12-12 21:21:22.000 1.3345 0 +1979-12-12 21:21:23.000 1.54 0.0010275000000000002 +1979-12-12 21:21:21.123 1.1 0 +1979-12-12 21:21:21.124 2.34 7439.999999999998 +1979-12-12 21:21:21.127 3.7 2720.0000000000005 +1979-12-12 21:21:21.129 2.1 0 +1979-12-12 21:21:22.000 1.3345 0 +1979-12-12 21:21:23.000 1.54 1.233 +1979-12-12 21:21:21.123 1.1 0 +1979-12-12 21:21:21.124 2.34 520799.9999999999 +1979-12-12 21:21:21.127 3.7 190400.00000000006 +1979-12-12 21:21:21.129 2.1 0 +1979-12-12 21:21:22.000 1.3345 0 +1979-12-12 21:21:23.000 1.54 86.31 +1979-12-12 21:21:21.123 1.1 0 +1979-12-12 21:21:21.124 2.34 35711999.99999999 +1979-12-12 21:21:21.127 3.7 13056000.000000004 +1979-12-12 21:21:21.129 2.1 0 +1979-12-12 21:21:22.000 1.3345 0 +1979-12-12 21:21:23.000 1.54 5918.400000000001 +1979-12-12 21:21:21.123 1.1 0 +1979-12-12 21:21:21.124 2.34 964223999.9999998 +1979-12-12 21:21:21.127 3.7 352512000.00000006 +1979-12-12 21:21:21.129 2.1 0 +1979-12-12 21:21:22.000 1.3345 0 +1979-12-12 21:21:23.000 1.54 159796.80000000002 +1979-12-12 21:21:21.123 1.1 0 +1979-12-12 21:21:21.124 2.34 7499519999.999999 +1979-12-12 21:21:21.127 3.7 2741760000.0000005 +1979-12-12 21:21:21.129 2.1 0 +1979-12-12 21:21:22.000 1.3345 0 +1979-12-12 21:21:23.000 1.54 1242864 +- shall not work for month, quarter, year diff --git a/tests/queries/0_stateless/02232_non_negative_derivative.sql b/tests/queries/0_stateless/02232_non_negative_derivative.sql index fd9f86be60a..fe25985315b 100644 --- a/tests/queries/0_stateless/02232_non_negative_derivative.sql +++ b/tests/queries/0_stateless/02232_non_negative_derivative.sql @@ -1,25 +1,20 @@ -SELECT ts, metric, nonNegativeDerivative(metric, ts, INTERVAL 1 SECOND) AS nnd -FROM ( - SELECT * FROM VALUES ( - 'ts DateTime64(3, "UTC"), metric Int32', - (toDateTime64('1979-12-12 21:21:21.123', 3, 'UTC'), 1), - (toDateTime64('1979-12-12 21:21:21.124', 3, 'UTC'), 2), - (toDateTime64('1979-12-12 21:21:21.127', 3, 'UTC'), 3), - (toDateTime64('1979-12-12 21:21:21.129', 3, 'UTC'), 2), - (toDateTime('1979-12-12 21:21:22', 'UTC'), 13), - (toDateTime('1979-12-12 21:21:23', 'UTC'), 10) - ) - ); +DROP TABLE IF EXISTS nnd; +CREATE TABLE nnd (id Int8, ts DateTime64(3, 'UTC'), metric Float64) ENGINE=MergeTree() ORDER BY id; +INSERT INTO nnd VALUES (1, toDateTime64('1979-12-12 21:21:21.123', 3, 'UTC'), 1.1), (2, toDateTime64('1979-12-12 21:21:21.124', 3, 'UTC'), 2.34), (3, toDateTime64('1979-12-12 21:21:21.127', 3, 'UTC'), 3.7), (4, toDateTime64('1979-12-12 21:21:21.129', 3, 'UTC'), 2.1), (5, toDateTime('1979-12-12 21:21:22', 'UTC'), 1.3345), (6, toDateTime('1979-12-12 21:21:23', 'UTC'), 1.54); -SELECT ts, metric, nonNegativeDerivative(metric, ts, INTERVAL 2 WEEK) AS nnd -FROM ( - SELECT * FROM VALUES ( - 'ts DateTime64(3, "UTC"), metric Float64', - (toDateTime64('1979-12-12 21:21:21.123', 3, 'UTC'), 1.1), - (toDateTime64('1979-12-12 21:21:21.124', 3, 'UTC'), 2.34), - (toDateTime64('1979-12-12 21:21:21.127', 3, 'UTC'), 3.7), - (toDateTime64('1979-12-12 21:21:21.129', 3, 'UTC'), 2.1), - (toDateTime('1979-12-12 21:21:22', 'UTC'), 1.3345), - (toDateTime('1979-12-12 21:21:23', 'UTC'), 1.5) - ) - ); \ No newline at end of file +SELECT '- shall work for precise intervals'; +SELECT ts, metric, nonNegativeDerivative(metric, ts, INTERVAL 3 NANOSECOND) FROM nnd; +SELECT ts, metric, nonNegativeDerivative(metric, ts, INTERVAL 4 MICROSECOND) FROM nnd; +SELECT ts, metric, nonNegativeDerivative(metric, ts, INTERVAL 5 MILLISECOND) FROM nnd; +SELECT ts, metric, nonNegativeDerivative(metric, ts, INTERVAL 6 SECOND) FROM nnd; +SELECT ts, metric, nonNegativeDerivative(metric, ts, INTERVAL 7 MINUTE) FROM nnd; +SELECT ts, metric, nonNegativeDerivative(metric, ts, INTERVAL 8 HOUR) FROM nnd; +SELECT ts, metric, nonNegativeDerivative(metric, ts, INTERVAL 9 DAY) FROM nnd; +SELECT ts, metric, nonNegativeDerivative(metric, ts, INTERVAL 10 WEEK) FROM nnd; + +SELECT '- shall not work for month, quarter, year'; +SELECT ts, metric, nonNegativeDerivative(metric, ts, INTERVAL 11 MONTH) FROM nnd; -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } +SELECT ts, metric, nonNegativeDerivative(metric, ts, INTERVAL 12 QUARTER) FROM nnd; -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } +SELECT ts, metric, nonNegativeDerivative(metric, ts, INTERVAL 13 YEAR) FROM nnd; -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } + +DROP TABLE IF EXISTS nnd; From 43c4f3cf84448657f40c62600ef6c92e998fa843 Mon Sep 17 00:00:00 2001 From: zvonand Date: Thu, 24 Mar 2022 15:56:41 +0300 Subject: [PATCH 008/669] updated docs --- .../functions/other-functions.md | 48 +++++++++++++++++++ .../functions/other-functions.md | 46 ++++++++++++++++++ 2 files changed, 94 insertions(+) diff --git a/docs/en/sql-reference/functions/other-functions.md b/docs/en/sql-reference/functions/other-functions.md index bce3f9144b1..dd4c29cbcd2 100644 --- a/docs/en/sql-reference/functions/other-functions.md +++ b/docs/en/sql-reference/functions/other-functions.md @@ -914,6 +914,54 @@ WHERE diff != 1 Same as for [runningDifference](./other-functions.md#other_functions-runningdifference), the difference is the value of the first row, returned the value of the first row, and each subsequent row returns the difference from the previous row. +## nonNegativeDerivative(metric, timestamp[, INTERVAL]) {#other_functions-nonnegativederivative} + +Calculates the growth ratio of given metric values by given timestamps. +Returns 0 for the first row and derivative for all the consecutive rows. +If at some point derivative is less than zero, this value is ignored and the result is 0. + +!!! warning "Warning" +It can reach the previous row only inside the currently processed data block. +The result of the function depends on the affected data blocks and the order of data in the block. + +!!! warning "Warning" +The rows order used during the calculation of `nonNegativeDerivative` can differ from the order of rows returned to the user. +To prevent that you can make a subquery with [ORDER BY](../../sql-reference/statements/select/order-by.md) and call the function from outside the subquery. + +Example: + +``` sql +SELECT + ts, + metric, + nonNegativeDerivative(metric, ts, toIntervalNanosecond(3)) AS nnd +FROM +( + SELECT * + FROM + values('id Int32, ts DateTime(3), metric Float64', + (1, toDateTime64('1979-12-12 21:21:21.123', 3, 'UTC'), 1.1), + (2, toDateTime64('1979-12-12 21:21:21.124', 3, 'UTC'), 2.34), + (3, toDateTime64('1979-12-12 21:21:21.127', 3, 'UTC'), 3.7), + (4, toDateTime64('1979-12-12 21:21:21.129', 3, 'UTC'), 2.1)) +) + +``` + +``` text +┌──────────────────────ts─┬─metric─┬──────────────────────nnd─┐ +│ 1979-12-13 00:21:21.123 │ 1.1 │ 0 │ +│ 1979-12-13 00:21:21.124 │ 2.34 │ 0.00000372 │ +│ 1979-12-13 00:21:21.127 │ 3.7 │ 0.0000013600000000000005 │ +│ 1979-12-13 00:21:21.129 │ 2.1 │ 0 │ +└─────────────────────────┴────────┴──────────────────────────┘ + +``` + +!!! note "Note" +Like with [runningDifference](#runningdifferencex-other_functions-runningdifference), block size affects the result. With each new block, the function state is reset. + + ## runningConcurrency {#runningconcurrency} Calculates the number of concurrent events. diff --git a/docs/ru/sql-reference/functions/other-functions.md b/docs/ru/sql-reference/functions/other-functions.md index 62f25c221c5..62776400a84 100644 --- a/docs/ru/sql-reference/functions/other-functions.md +++ b/docs/ru/sql-reference/functions/other-functions.md @@ -859,6 +859,52 @@ WHERE diff != 1 То же, что и [runningDifference](./other-functions.md#other_functions-runningdifference), но в первой строке возвращается значение первой строки, а не ноль. +## nonNegativeDerivative(metric, timestamp[, INTERVAL]) {#other_functions-nonnegativederivative} + +В каждый промежуток времени между моментами в колонке `timestamp` считает прирост функции относительно указанного интервала. +Для первого ряда это значение 0. Если какое-либо из полученных значений отрицательно, то результатом будет 0. + +!!! warning "Warning" +Имейте в виду, что значение производной может быть посчитано только внутри одного блока данных. +Результат вычисления зависит от порядка данных внутри блока. + +!!! warning "Warning" +Порядок рядов, используемый при работе функции, может изменяться от того, что возвращается пользователю. +Дабы избежать этого, используйте [ORDER BY](../../sql-reference/statements/select/order-by.md). + +Пример использования: + +``` sql +SELECT + ts, + metric, + nonNegativeDerivative(metric, ts, toIntervalNanosecond(3)) AS nnd +FROM +( + SELECT * + FROM + values('id Int32, ts DateTime(3), metric Float64', + (1, toDateTime64('1979-12-12 21:21:21.123', 3, 'UTC'), 1.1), + (2, toDateTime64('1979-12-12 21:21:21.124', 3, 'UTC'), 2.34), + (3, toDateTime64('1979-12-12 21:21:21.127', 3, 'UTC'), 3.7), + (4, toDateTime64('1979-12-12 21:21:21.129', 3, 'UTC'), 2.1)) +) + +``` + +``` text +┌──────────────────────ts─┬─metric─┬──────────────────────nnd─┐ +│ 1979-12-13 00:21:21.123 │ 1.1 │ 0 │ +│ 1979-12-13 00:21:21.124 │ 2.34 │ 0.00000372 │ +│ 1979-12-13 00:21:21.127 │ 3.7 │ 0.0000013600000000000005 │ +│ 1979-12-13 00:21:21.129 │ 2.1 │ 0 │ +└─────────────────────────┴────────┴──────────────────────────┘ + +``` + +!!! note "Note" +Как и в случае с [runningDifference](#runningdifferencex-other_functions-runningdifference), размер блока влияет на результат. С каждым новым блоком состояние функции обнуляется. + ## runningConcurrency {#runningconcurrency} Подсчитывает количество одновременно идущих событий. From 38df1e945b243abd3d6a7c3b22f9709f202f8fea Mon Sep 17 00:00:00 2001 From: zvonand Date: Thu, 24 Mar 2022 16:36:11 +0300 Subject: [PATCH 009/669] fix style --- src/Functions/nonNegativeDerivative.h | 1 + 1 file changed, 1 insertion(+) diff --git a/src/Functions/nonNegativeDerivative.h b/src/Functions/nonNegativeDerivative.h index 92dd483cdbf..88166345f54 100644 --- a/src/Functions/nonNegativeDerivative.h +++ b/src/Functions/nonNegativeDerivative.h @@ -21,6 +21,7 @@ namespace ErrorCodes { extern const int ILLEGAL_TYPE_OF_ARGUMENT; extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; + extern const int ARGUMENT_OUT_OF_BOUND; } /** Calculate derivative of given value column by specified timestamp. */ From 3c2ce079cbc430acdb730d93ba90d65ab8572e28 Mon Sep 17 00:00:00 2001 From: zvonand Date: Thu, 24 Mar 2022 17:35:53 +0300 Subject: [PATCH 010/669] scope fix --- src/Functions/nonNegativeDerivative.h | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/src/Functions/nonNegativeDerivative.h b/src/Functions/nonNegativeDerivative.h index 88166345f54..13f1735b8ff 100644 --- a/src/Functions/nonNegativeDerivative.h +++ b/src/Functions/nonNegativeDerivative.h @@ -54,7 +54,7 @@ private: return interval_length * 0.000000001; default: throw Exception(fmt::format("Interval kind {}: interval length is variadic, only precise intervals accepted", - IntervalKind(interval_kind).toKeyword()), ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); + IntervalKind(interval_kind).toKeyword()), DB::ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); } } @@ -137,7 +137,7 @@ private: else if (which.isDateTime()) f(DataTypeDateTime::FieldType()); else - throw Exception("First argument for function " + getName() + " must have numeric type.", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); + throw Exception("First argument for function " + getName() + " must have numeric type.", DB::ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); } public: @@ -188,17 +188,17 @@ public: const auto * interval_type = checkAndGetDataType(interval_column.type.get()); if (!interval_type) throw Exception("Illegal value" + interval_column.name + "for function nonNegativeDerivative, INTERVAL expected", - ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); + DB::ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); const auto * interval_column_const_int64 = checkAndGetColumnConst(interval_column.column.get()); if (!interval_column_const_int64) throw Exception("Illegal value " + interval_column.name + "for function nonNegativeDerivative, INTERVAL expected", - ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); + DB::ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); Int64 num_units = interval_column_const_int64->getValue(); if (num_units <= 0) throw Exception("Value for column " + interval_column.name + "for function nonNegativeDerivative must be positive", - ErrorCodes::ARGUMENT_OUT_OF_BOUND); + DB::ErrorCodes::ARGUMENT_OUT_OF_BOUND); return {interval_type->getKind(), num_units}; } From c01aa3e4fbe5dd8c8dcbb3d17456bf260059a293 Mon Sep 17 00:00:00 2001 From: zvonand Date: Thu, 24 Mar 2022 19:33:24 +0300 Subject: [PATCH 011/669] retrigger checks From 5a40d868ea5a6ba1280c6c69f3735430ab0539e1 Mon Sep 17 00:00:00 2001 From: FArthur-cmd <613623@mail.ru> Date: Thu, 24 Mar 2022 22:45:54 +0300 Subject: [PATCH 012/669] add compressor --- utils/CMakeLists.txt | 1 + utils/self-extr-exec/CMakeLists.txt | 5 + utils/self-extr-exec/compressor.cpp | 141 ++++++++++++++++++++++++++ utils/self-extr-exec/decompressor.cpp | 7 ++ 4 files changed, 154 insertions(+) create mode 100644 utils/self-extr-exec/CMakeLists.txt create mode 100644 utils/self-extr-exec/compressor.cpp create mode 100644 utils/self-extr-exec/decompressor.cpp diff --git a/utils/CMakeLists.txt b/utils/CMakeLists.txt index 51300472ed1..11334988957 100644 --- a/utils/CMakeLists.txt +++ b/utils/CMakeLists.txt @@ -32,6 +32,7 @@ if (NOT DEFINED ENABLE_UTILS OR ENABLE_UTILS) add_subdirectory (check-mysql-binlog) add_subdirectory (keeper-bench) add_subdirectory (graphite-rollup) + add_subdirectory (self-extr-exec) if (TARGET ch_contrib::nuraft) add_subdirectory (keeper-data-dumper) diff --git a/utils/self-extr-exec/CMakeLists.txt b/utils/self-extr-exec/CMakeLists.txt new file mode 100644 index 00000000000..0054996dd82 --- /dev/null +++ b/utils/self-extr-exec/CMakeLists.txt @@ -0,0 +1,5 @@ +add_executable (compressor compressor.cpp) +target_link_libraries(compressor PUBLIC ch_contrib::zstd) + +add_executable (decompressor decompressor.cpp) +target_link_libraries(compressor PUBLIC ch_contrib::zstd) diff --git a/utils/self-extr-exec/compressor.cpp b/utils/self-extr-exec/compressor.cpp new file mode 100644 index 00000000000..bf0b2c6c297 --- /dev/null +++ b/utils/self-extr-exec/compressor.cpp @@ -0,0 +1,141 @@ +#include +#include +#include +#include +#include +#include +#include +#include + +/// Main compression part +int doCompress(char * input, char * output, off_t & in_offset, off_t & out_offset, + off_t input_size, off_t output_size, ZSTD_CCtx * cctx) +{ + size_t compressed_size = ZSTD_compress2(cctx, output + out_offset, output_size, input + in_offset, input_size); + if (ZSTD_isError(compressed_size)) + { + std::cout << "Cannot compress block with ZSTD: " + std::string(ZSTD_getErrorName(compressed_size)) << std::endl; + return 1; + } + in_offset += input_size; + out_offset += output_size; + return 0; +} + +/// compress data from opened file into output file +int compress(int in_fd, int out_fd, int level=3) +{ + /// read data about input file + struct stat info_in; + fstat(in_fd, &info_in); + if (info_in.st_size == 0) { + std::cout << "Empty input file" << std::endl; + return 1; + } + + /// Read data about output file. + /// Compressed data will be added to the end of file + /// It will allow to create self extracting executable from file + struct stat info_out; + fstat(out_fd, &info_out); + + /// As experiments showed, size of compressed file is 4 times less than clickhouse executable + /// Get a little bit more memory to prevent errors with size. + /// For compression this difference will not be huge + ftruncate(out_fd, info_out.st_size + info_in.st_size / 3); + off_t in_offset = 0, out_offset = info_out.st_size; + + /// mmap files + char * input = static_cast(mmap(nullptr, info_in.st_size, PROT_READ | PROT_EXEC, MAP_SHARED , in_fd, 0)); + char * output = static_cast(mmap(nullptr, info_out.st_size + info_in.st_size / 3, PROT_READ | PROT_WRITE | PROT_EXEC, MAP_SHARED, out_fd, 0)); + if (input == reinterpret_cast(-1) || output == reinterpret_cast(-1)) + { + std::cout << (input == reinterpret_cast(-1)) << " " << (output == reinterpret_cast(-1)) << std::endl; + perror(nullptr); + return 1; + } + + /// Create context + ZSTD_CCtx * cctx = ZSTD_createCCtx(); + ZSTD_CCtx_setParameter(cctx, ZSTD_c_compressionLevel, level); + ZSTD_CCtx_setParameter(cctx, ZSTD_c_checksumFlag, 1); + // ZSTD_CCtx_setParameter(cctx, ZSTD_c_strategy, 9); + + /// Remember size of file. It will help to avoid using additional memory + /// during decompression + // char * file_size = reinterpret_cast(&info_in.st_size); + // for (size_t i = 0; i < sizeof(info_in.st_size)/sizeof(char); ++i) + // output[out_offset++] = *file_size; + + + /// limits for size of block to prevent high memory usage or bad compression + // off_t max_block_size = 100000000ull; + // off_t min_block_size = 10000000ull; + off_t size = 0; + + /// Compress data + while (in_offset < info_in.st_size) + { + /// take blocks of maximum size + /// optimize last block (it can be bigger, if it is not too huge) + // if (info_in.st_size - in_offset < max_block_size || info_in.st_size - in_offset < max_block_size + min_block_size) + // size = info_in.st_size - in_offset; + // else + // size = max_block_size; + size = info_in.st_size - in_offset; + + /// Compress data or exit if error happens + if (0 != doCompress(input, output, in_offset, out_offset, size, ZSTD_compressBound(size), cctx)) + { + ftruncate(out_fd, info_out.st_size); + munmap(input, info_in.st_size); + munmap(output, info_out.st_size + info_in.st_size / 3); + return 1; + } + std::cout << in_offset << " " << out_offset << std::endl; + } + + /// Shrink file size and unmap + ftruncate(out_fd, out_offset); + munmap(input, info_in.st_size); + munmap(output, info_out.st_size + info_in.st_size / 3); + return 0; +} + +int main(int argc, char* argv[]) +{ + if (argc < 3) + { + std::cout << "Not enough arguments.\ncompressor [file that should be compressed] [file name for compressed file] [OPTIONAL level of compression]" << std::endl; + return 0; + } + + int input_fd = open(argv[1], O_RDWR); + if (input_fd == -1) + { + perror(nullptr); + return 0; + } + + int output_fd = open(argv[2], O_RDWR | O_CREAT, 0777); + if (input_fd == -1) + { + perror(nullptr); + return 0; + } + + int result; + if (argc == 4) + result = compress(input_fd, output_fd, strtol(argv[3], nullptr, 10)); + else + result = compress(input_fd, output_fd); + + if (result == 0) + std::cout << "Successfully compressed" << std::endl; + else + std::cout << "An error has occurred" << std::endl; + + close(input_fd); + close(output_fd); + return 0; +} diff --git a/utils/self-extr-exec/decompressor.cpp b/utils/self-extr-exec/decompressor.cpp new file mode 100644 index 00000000000..5f0a467aef7 --- /dev/null +++ b/utils/self-extr-exec/decompressor.cpp @@ -0,0 +1,7 @@ +#include + +int main() +{ + std::cout << "Test" << std::endl; + return 0; +} From 7f0c6fb103727662e63f5a90a626f1092d090883 Mon Sep 17 00:00:00 2001 From: FArthur-cmd <613623@mail.ru> Date: Tue, 29 Mar 2022 19:30:09 +0000 Subject: [PATCH 013/669] improve self-extr-executable --- utils/self-extr-exec/CMakeLists.txt | 2 +- utils/self-extr-exec/compressor.cpp | 55 +++++++----- utils/self-extr-exec/decompressor.cpp | 106 ++++++++++++++++++++++- utils/self-extr-exec/make_compression.sh | 9 ++ 4 files changed, 145 insertions(+), 27 deletions(-) create mode 100755 utils/self-extr-exec/make_compression.sh diff --git a/utils/self-extr-exec/CMakeLists.txt b/utils/self-extr-exec/CMakeLists.txt index 0054996dd82..b804fc27e72 100644 --- a/utils/self-extr-exec/CMakeLists.txt +++ b/utils/self-extr-exec/CMakeLists.txt @@ -2,4 +2,4 @@ add_executable (compressor compressor.cpp) target_link_libraries(compressor PUBLIC ch_contrib::zstd) add_executable (decompressor decompressor.cpp) -target_link_libraries(compressor PUBLIC ch_contrib::zstd) +target_link_libraries(decompressor PUBLIC ch_contrib::zstd) diff --git a/utils/self-extr-exec/compressor.cpp b/utils/self-extr-exec/compressor.cpp index bf0b2c6c297..951117ec8be 100644 --- a/utils/self-extr-exec/compressor.cpp +++ b/utils/self-extr-exec/compressor.cpp @@ -18,12 +18,12 @@ int doCompress(char * input, char * output, off_t & in_offset, off_t & out_offse return 1; } in_offset += input_size; - out_offset += output_size; + out_offset += compressed_size; return 0; } /// compress data from opened file into output file -int compress(int in_fd, int out_fd, int level=3) +int compress(int in_fd, int out_fd, int level=5) { /// read data about input file struct stat info_in; @@ -32,25 +32,33 @@ int compress(int in_fd, int out_fd, int level=3) std::cout << "Empty input file" << std::endl; return 1; } + std::cout << "In current size is " << info_in.st_size << std::endl; /// Read data about output file. /// Compressed data will be added to the end of file /// It will allow to create self extracting executable from file struct stat info_out; - fstat(out_fd, &info_out); + fstat(out_fd, &info_out); + std::cout << "Out current size is " << info_out.st_size << std::endl; + + /// NOTE: next parametrs depend on binary size + // 6402520 is size of stripped decompressor + size_t start = 6405000ull; + + // 22550744 size of decompressor + // size_t start = 22550780ull; /// As experiments showed, size of compressed file is 4 times less than clickhouse executable /// Get a little bit more memory to prevent errors with size. /// For compression this difference will not be huge - ftruncate(out_fd, info_out.st_size + info_in.st_size / 3); - off_t in_offset = 0, out_offset = info_out.st_size; + ftruncate(out_fd, start + info_in.st_size / 3); + off_t in_offset = 0, out_offset = start; /// mmap files - char * input = static_cast(mmap(nullptr, info_in.st_size, PROT_READ | PROT_EXEC, MAP_SHARED , in_fd, 0)); - char * output = static_cast(mmap(nullptr, info_out.st_size + info_in.st_size / 3, PROT_READ | PROT_WRITE | PROT_EXEC, MAP_SHARED, out_fd, 0)); + char * input = static_cast(mmap(nullptr, info_in.st_size, PROT_READ, MAP_PRIVATE , in_fd, 0)); + char * output = static_cast(mmap(nullptr, start + info_in.st_size / 3, PROT_READ | PROT_WRITE | PROT_EXEC, MAP_SHARED, out_fd, 0)); if (input == reinterpret_cast(-1) || output == reinterpret_cast(-1)) { - std::cout << (input == reinterpret_cast(-1)) << " " << (output == reinterpret_cast(-1)) << std::endl; perror(nullptr); return 1; } @@ -59,37 +67,36 @@ int compress(int in_fd, int out_fd, int level=3) ZSTD_CCtx * cctx = ZSTD_createCCtx(); ZSTD_CCtx_setParameter(cctx, ZSTD_c_compressionLevel, level); ZSTD_CCtx_setParameter(cctx, ZSTD_c_checksumFlag, 1); - // ZSTD_CCtx_setParameter(cctx, ZSTD_c_strategy, 9); /// Remember size of file. It will help to avoid using additional memory /// during decompression - // char * file_size = reinterpret_cast(&info_in.st_size); - // for (size_t i = 0; i < sizeof(info_in.st_size)/sizeof(char); ++i) - // output[out_offset++] = *file_size; + char * file_size = reinterpret_cast(&info_in.st_size); + for (size_t i = 0; i < sizeof(info_in.st_size)/sizeof(char); ++i) + output[out_offset++] = *(file_size + i); /// limits for size of block to prevent high memory usage or bad compression - // off_t max_block_size = 100000000ull; - // off_t min_block_size = 10000000ull; + off_t max_block_size = 1ull<<27; + off_t min_block_size = 1ull<<23; off_t size = 0; + std::cout << in_offset << " " << out_offset << std::endl; /// Compress data while (in_offset < info_in.st_size) { /// take blocks of maximum size /// optimize last block (it can be bigger, if it is not too huge) - // if (info_in.st_size - in_offset < max_block_size || info_in.st_size - in_offset < max_block_size + min_block_size) - // size = info_in.st_size - in_offset; - // else - // size = max_block_size; - size = info_in.st_size - in_offset; + if (info_in.st_size - in_offset < max_block_size || info_in.st_size - in_offset < max_block_size + min_block_size) + size = info_in.st_size - in_offset; + else + size = max_block_size; /// Compress data or exit if error happens if (0 != doCompress(input, output, in_offset, out_offset, size, ZSTD_compressBound(size), cctx)) { ftruncate(out_fd, info_out.st_size); munmap(input, info_in.st_size); - munmap(output, info_out.st_size + info_in.st_size / 3); + munmap(output, start + info_in.st_size / 3); return 1; } std::cout << in_offset << " " << out_offset << std::endl; @@ -98,7 +105,7 @@ int compress(int in_fd, int out_fd, int level=3) /// Shrink file size and unmap ftruncate(out_fd, out_offset); munmap(input, info_in.st_size); - munmap(output, info_out.st_size + info_in.st_size / 3); + munmap(output, start + info_in.st_size / 3); return 0; } @@ -110,15 +117,15 @@ int main(int argc, char* argv[]) return 0; } - int input_fd = open(argv[1], O_RDWR); + int input_fd = open(argv[1], O_RDONLY); if (input_fd == -1) { perror(nullptr); return 0; } - int output_fd = open(argv[2], O_RDWR | O_CREAT, 0777); - if (input_fd == -1) + int output_fd = open(argv[2], O_RDWR | O_CREAT, 0775); + if (output_fd == -1) { perror(nullptr); return 0; diff --git a/utils/self-extr-exec/decompressor.cpp b/utils/self-extr-exec/decompressor.cpp index 5f0a467aef7..6d6b44065ee 100644 --- a/utils/self-extr-exec/decompressor.cpp +++ b/utils/self-extr-exec/decompressor.cpp @@ -1,7 +1,109 @@ -#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +/// decompress part +int doDecompress(char * input, char * output, off_t & in_offset, off_t & out_offset, + off_t input_size, off_t output_size, ZSTD_DCtx* dctx) +{ + size_t decompressed_size = ZSTD_decompressDCtx(dctx, output + out_offset, output_size, input + in_offset, input_size); + if (ZSTD_isError(decompressed_size)) + { + return 1; + } + in_offset += input_size; + out_offset += decompressed_size; + return 0; +} + +/// decompress data from in_fd into out_fd +int decompress(int in_fd, int out_fd) +{ + /// Read data about output file. + /// Compressed data will replace data in file + struct stat info_in; + fstat(in_fd, &info_in); + + /// NOTE: next parametrs depend on binary size + // 22550780ull for full, 6405000ull for stripped; + off_t in_offset = 6405000ull /*size of decompressor*/, out_offset = 0; + + /// mmap files + char * input = static_cast(mmap(nullptr, info_in.st_size, PROT_READ, MAP_SHARED , in_fd, 0)); + if (input == reinterpret_cast(-1)) + { + perror(nullptr); + return 1; + } + + /// Create context + ZSTD_DCtx * dctx = ZSTD_createDCtx(); + + /// Read size of file. It will help to avoid using additional memory + /// during decompression. + size_t * file_size = reinterpret_cast(input + in_offset); + in_offset += sizeof(size_t); + + /// Prepare output file + ftruncate(out_fd, *file_size); + char * output = static_cast(mmap(nullptr, *file_size, PROT_READ | PROT_WRITE | PROT_EXEC, MAP_SHARED, out_fd, 0)); + if (output == reinterpret_cast(-1)) + { + perror(nullptr); + return 1; + } + + off_t size = 0; + off_t max_block_size = 1ull<<27; + + /// Compress data + while (in_offset < info_in.st_size) + { + size = ZSTD_findFrameCompressedSize(input + in_offset, max_block_size); + + /// Compress data or exit if error happens + if (0 != doDecompress(input, output, in_offset, out_offset, size, max_block_size, dctx)) + { + munmap(input, info_in.st_size); + munmap(output, *file_size); + return 1; + } + } + + /// Shrink file size and unmap + munmap(output, *file_size); + munmap(input, info_in.st_size); + return 0; +} int main() { - std::cout << "Test" << std::endl; + int input_fd = open("decompressor", O_RDONLY); + if (input_fd == -1) + { + perror(nullptr); + return 0; + } + + int output_fd = open("clickhouse_decompressed", O_RDWR | O_CREAT, 0775); + if (output_fd == -1) + { + perror(nullptr); + return 0; + } + + if (0 != decompress(input_fd, output_fd)) + { + return 1; + } + + close(input_fd); + close(output_fd); return 0; } diff --git a/utils/self-extr-exec/make_compression.sh b/utils/self-extr-exec/make_compression.sh new file mode 100755 index 00000000000..6fd8517c0db --- /dev/null +++ b/utils/self-extr-exec/make_compression.sh @@ -0,0 +1,9 @@ +#!/usr/bin/env bash + +# Put this script near compressor and decompressor + +strip decompressor + +# TODO use env variables +./compressor ../../programs/clickhouse decompressor + \ No newline at end of file From 31e07fe078cee9410a1e13592cb473f5a5792afc Mon Sep 17 00:00:00 2001 From: Vasily Nemkov Date: Wed, 30 Mar 2022 16:53:25 +0300 Subject: [PATCH 014/669] Fixed crash when reporting successful login. --- src/Interpreters/SessionLog.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Interpreters/SessionLog.cpp b/src/Interpreters/SessionLog.cpp index a0c29c07d38..c808d7b66d7 100644 --- a/src/Interpreters/SessionLog.cpp +++ b/src/Interpreters/SessionLog.cpp @@ -212,8 +212,8 @@ void SessionLog::addLoginSuccess(const UUID & auth_id, std::optional ses DB::SessionLogElement log_entry(auth_id, SESSION_LOGIN_SUCCESS); log_entry.client_info = client_info; + if (const auto user = login_context.getUser()) { - const auto user = access->getUser(); log_entry.user = user->getName(); log_entry.user_identified_with = user->auth_data.getType(); log_entry.external_auth_server = user->auth_data.getLDAPServerName(); From dc88d8d5716c591a57741e6dc48e281aab2e7653 Mon Sep 17 00:00:00 2001 From: Vasily Nemkov Date: Wed, 30 Mar 2022 23:37:24 +0300 Subject: [PATCH 015/669] Explicitly passing a user object to create a LogIn event --- src/Interpreters/Session.cpp | 3 ++- src/Interpreters/SessionLog.cpp | 11 ++++------- src/Interpreters/SessionLog.h | 3 ++- 3 files changed, 8 insertions(+), 9 deletions(-) diff --git a/src/Interpreters/Session.cpp b/src/Interpreters/Session.cpp index 2af9a2b6bbc..7f71167834d 100644 --- a/src/Interpreters/Session.cpp +++ b/src/Interpreters/Session.cpp @@ -486,7 +486,8 @@ ContextMutablePtr Session::makeQueryContextImpl(const ClientInfo * client_info_t session_log->addLoginSuccess( auth_id, named_session ? std::optional(named_session->key.second) : std::nullopt, - *query_context); + *query_context, + *user); notified_session_log_about_login = true; } diff --git a/src/Interpreters/SessionLog.cpp b/src/Interpreters/SessionLog.cpp index c808d7b66d7..689f44f536a 100644 --- a/src/Interpreters/SessionLog.cpp +++ b/src/Interpreters/SessionLog.cpp @@ -203,7 +203,7 @@ void SessionLogElement::appendToBlock(MutableColumns & columns) const columns[i++]->insertData(auth_failure_reason.data(), auth_failure_reason.length()); } -void SessionLog::addLoginSuccess(const UUID & auth_id, std::optional session_id, const Context & login_context) +void SessionLog::addLoginSuccess(const UUID & auth_id, std::optional session_id, const Context & login_context, const User & login_user) { const auto access = login_context.getAccess(); const auto & settings = login_context.getSettingsRef(); @@ -212,12 +212,9 @@ void SessionLog::addLoginSuccess(const UUID & auth_id, std::optional ses DB::SessionLogElement log_entry(auth_id, SESSION_LOGIN_SUCCESS); log_entry.client_info = client_info; - if (const auto user = login_context.getUser()) - { - log_entry.user = user->getName(); - log_entry.user_identified_with = user->auth_data.getType(); - log_entry.external_auth_server = user->auth_data.getLDAPServerName(); - } + log_entry.user = login_user.getName(); + log_entry.user_identified_with = login_user.auth_data.getType(); + log_entry.external_auth_server = login_user.auth_data.getLDAPServerName(); if (session_id) log_entry.session_id = *session_id; diff --git a/src/Interpreters/SessionLog.h b/src/Interpreters/SessionLog.h index 26f137565cb..2c9ea1c69e3 100644 --- a/src/Interpreters/SessionLog.h +++ b/src/Interpreters/SessionLog.h @@ -18,6 +18,7 @@ enum SessionLogElementType : int8_t }; class ContextAccess; +struct User; /** A struct which will be inserted as row into session_log table. * @@ -69,7 +70,7 @@ class SessionLog : public SystemLog using SystemLog::SystemLog; public: - void addLoginSuccess(const UUID & auth_id, std::optional session_id, const Context & login_context); + void addLoginSuccess(const UUID & auth_id, std::optional session_id, const Context & login_context, const User & login_user); void addLoginFailure(const UUID & auth_id, const ClientInfo & info, const String & user, const Exception & reason); void addLogOut(const UUID & auth_id, const String & user, const ClientInfo & client_info); }; From 6c1d04710f556634cd8f53ad3bf994e51dd20965 Mon Sep 17 00:00:00 2001 From: FArthur-cmd <613623@mail.ru> Date: Wed, 30 Mar 2022 22:01:50 +0000 Subject: [PATCH 016/669] finish self exctracting executable --- utils/self-extr-exec/compressor.cpp | 4 ++-- utils/self-extr-exec/decompressor.cpp | 15 ++++++++++----- utils/self-extr-exec/make_compression.sh | 5 +++-- 3 files changed, 15 insertions(+), 9 deletions(-) diff --git a/utils/self-extr-exec/compressor.cpp b/utils/self-extr-exec/compressor.cpp index 951117ec8be..996a7080a6c 100644 --- a/utils/self-extr-exec/compressor.cpp +++ b/utils/self-extr-exec/compressor.cpp @@ -45,8 +45,8 @@ int compress(int in_fd, int out_fd, int level=5) // 6402520 is size of stripped decompressor size_t start = 6405000ull; - // 22550744 size of decompressor - // size_t start = 22550780ull; + // 22558008ull size of decompressor + // size_t start = 22558008ull; /// As experiments showed, size of compressed file is 4 times less than clickhouse executable /// Get a little bit more memory to prevent errors with size. diff --git a/utils/self-extr-exec/decompressor.cpp b/utils/self-extr-exec/decompressor.cpp index 6d6b44065ee..2c30342f28b 100644 --- a/utils/self-extr-exec/decompressor.cpp +++ b/utils/self-extr-exec/decompressor.cpp @@ -31,7 +31,7 @@ int decompress(int in_fd, int out_fd) fstat(in_fd, &info_in); /// NOTE: next parametrs depend on binary size - // 22550780ull for full, 6405000ull for stripped; + // 22558008ull for full, 6405000ull for stripped; off_t in_offset = 6405000ull /*size of decompressor*/, out_offset = 0; /// mmap files @@ -82,16 +82,16 @@ int decompress(int in_fd, int out_fd) return 0; } -int main() +int main(int /*argc*/, char* argv[]) { - int input_fd = open("decompressor", O_RDONLY); + int input_fd = open(argv[0], O_RDONLY); if (input_fd == -1) { perror(nullptr); return 0; } - int output_fd = open("clickhouse_decompressed", O_RDWR | O_CREAT, 0775); + int output_fd = open("clickhouse.decompressed", O_RDWR | O_CREAT, 0775); if (output_fd == -1) { perror(nullptr); @@ -103,7 +103,12 @@ int main() return 1; } - close(input_fd); + fsync(output_fd); close(output_fd); + close(input_fd); + + /// NOTE: This command should not depend from any variables. + /// It should be changed if file changes. + execl("/usr/bin/bash", "bash", "-c", "mv ./clickhouse.decompressed ./clickhouse", NULL); return 0; } diff --git a/utils/self-extr-exec/make_compression.sh b/utils/self-extr-exec/make_compression.sh index 6fd8517c0db..80d8ddb6519 100755 --- a/utils/self-extr-exec/make_compression.sh +++ b/utils/self-extr-exec/make_compression.sh @@ -2,8 +2,9 @@ # Put this script near compressor and decompressor -strip decompressor +cp decompressor clickhouse +strip clickhouse # TODO use env variables -./compressor ../../programs/clickhouse decompressor +./compressor ../../programs/clickhouse clickhouse \ No newline at end of file From 0df2302a6d0af46dce7d9ea542537cda02800f30 Mon Sep 17 00:00:00 2001 From: Vasily Nemkov Date: Thu, 31 Mar 2022 21:16:51 +0300 Subject: [PATCH 017/669] Test that tries to reproduce a race condition --- .../02242_delete_user_race.reference | 0 .../0_stateless/02242_delete_user_race.sh | 78 +++++++++++++++++++ 2 files changed, 78 insertions(+) create mode 100644 tests/queries/0_stateless/02242_delete_user_race.reference create mode 100755 tests/queries/0_stateless/02242_delete_user_race.sh diff --git a/tests/queries/0_stateless/02242_delete_user_race.reference b/tests/queries/0_stateless/02242_delete_user_race.reference new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/queries/0_stateless/02242_delete_user_race.sh b/tests/queries/0_stateless/02242_delete_user_race.sh new file mode 100755 index 00000000000..7b5bd366e69 --- /dev/null +++ b/tests/queries/0_stateless/02242_delete_user_race.sh @@ -0,0 +1,78 @@ +#!/usr/bin/env bash +# Tags: race, no-fasttest, no-parallel + +# Test tries to reproduce a race between threads: +# - deletes user +# - creates user +# - uses it as session user +# - apply role to the user +# +# https://github.com/ClickHouse/ClickHouse/issues/35714 + +set -e + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh +# shellcheck source=./replication.lib +. "$CURDIR"/replication.lib + +#export PS4='\nDEBUG level:$SHLVL subshell-level: $BASH_SUBSHELL \nsource-file:${BASH_SOURCE} line#:${LINENO} function:${FUNCNAME[0]:+${FUNCNAME[0]}(): }\nstatement: ' + + +$CLICKHOUSE_CLIENT -nm -q " + DROP ROLE IF EXISTS test_role_02242; + CREATE ROLE test_role_02242; +" + +readonly REPEAT=1000 + +function delete_user() +{ + local i + for (( i = 0; i < REPEAT; ++i )) + do + $CLICKHOUSE_CLIENT -q "DROP USER IF EXISTS test_user_02242" ||: + done +} + +function create_and_login_user() +{ + local i + for (( i = 0; i < REPEAT; ++i )) + do + $CLICKHOUSE_CLIENT -q "CREATE USER IF NOT EXISTS test_user_02242" ||: + $CLICKHOUSE_CLIENT -u "test_user_02242" -q "SELECT version()" > /dev/null ||: + done +} + +function set_role() +{ + local i + for (( i = 0; i < REPEAT; ++i )) + do + $CLICKHOUSE_CLIENT -q "SET ROLE test_role_02242 TO test_user_02242" ||: + done +} + +export -f delete_user +export -f create_and_login_user +export -f set_role + + +TIMEOUT=0.1 + +for (( i = 0 ; i < 1000; ++i )) +do + clickhouse_client_loop_timeout $TIMEOUT create_and_login_user 2> /dev/null & + clickhouse_client_loop_timeout $TIMEOUT delete_user 2> /dev/null & + clickhouse_client_loop_timeout $TIMEOUT login_user 2> /dev/null & + clickhouse_client_loop_timeout $TIMEOUT set_role 2> /dev/null & +done + +wait + +# $CLICKHOUSE_CLIENT -q "DROP ROLE IF EXISTS test_role_02242" +# $CLICKHOUSE_CLIENT -q "DROP USER IF EXISTS test_user_02242" + +# wait From 183e952ac53461d297f7a1a310fbe8d30b00195a Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Fri, 1 Apr 2022 03:52:42 +0200 Subject: [PATCH 018/669] Add clang-tidy --- cmake/analysis.cmake | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cmake/analysis.cmake b/cmake/analysis.cmake index d1b9c86f15f..fd86e07dd6b 100644 --- a/cmake/analysis.cmake +++ b/cmake/analysis.cmake @@ -6,7 +6,7 @@ if (ENABLE_CLANG_TIDY) message(FATAL_ERROR "clang-tidy requires CMake version at least 3.6.") endif() - find_program (CLANG_TIDY_PATH NAMES "clang-tidy" "clang-tidy-13" "clang-tidy-12" "clang-tidy-11" "clang-tidy-10" "clang-tidy-9" "clang-tidy-8") + find_program (CLANG_TIDY_PATH NAMES "clang-tidy" "clang-tidy-14" "clang-tidy-13") if (CLANG_TIDY_PATH) message(STATUS From d89e03e04049ce1b128136b822507e2f775bca2d Mon Sep 17 00:00:00 2001 From: Vasily Nemkov Date: Fri, 1 Apr 2022 15:32:53 +0300 Subject: [PATCH 019/669] Made test less flakky --- tests/queries/0_stateless/02242_delete_user_race.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/queries/0_stateless/02242_delete_user_race.sh b/tests/queries/0_stateless/02242_delete_user_race.sh index 7b5bd366e69..256775e5334 100755 --- a/tests/queries/0_stateless/02242_delete_user_race.sh +++ b/tests/queries/0_stateless/02242_delete_user_race.sh @@ -62,7 +62,7 @@ export -f set_role TIMEOUT=0.1 -for (( i = 0 ; i < 1000; ++i )) +for (( i = 0 ; i < 100; ++i )) do clickhouse_client_loop_timeout $TIMEOUT create_and_login_user 2> /dev/null & clickhouse_client_loop_timeout $TIMEOUT delete_user 2> /dev/null & From 3ef9036f52e86f251f277008a671eb303017d688 Mon Sep 17 00:00:00 2001 From: FArthur-cmd <613623@mail.ru> Date: Thu, 7 Apr 2022 12:21:14 +0000 Subject: [PATCH 020/669] improve compressor --- utils/self-extr-exec/compressor.cpp | 323 ++++++++++++++++++++------ utils/self-extr-exec/decompressor.cpp | 272 ++++++++++++++++++---- 2 files changed, 480 insertions(+), 115 deletions(-) diff --git a/utils/self-extr-exec/compressor.cpp b/utils/self-extr-exec/compressor.cpp index 996a7080a6c..4a067f45890 100644 --- a/utils/self-extr-exec/compressor.cpp +++ b/utils/self-extr-exec/compressor.cpp @@ -1,3 +1,4 @@ +#include #include #include #include @@ -7,6 +8,47 @@ #include #include +/* +Overview of compression: + ______________________ + | Decompressor | + |----------------------| + | Compressed file 1 | + | Compressed file 2 | + | ... | + |----------------------| + | Info about 1 file | + | Info about 2 file | + | ... | + |----------------------| + | Metadata | + |______________________| +*/ + +/* +Metadata contains: + 1) number of files to support multiple file compression + 2) start_of_files_data to know start of files metadata + 3) end of binary to know start of compressed data + 4) uncompressed data size +*/ +struct MetaData +{ + size_t number_of_files = 0; + size_t start_of_files_data = 0; +}; + +/// Information about each file for correct extraction. +/// Each file data is followed by name of file +/// with length equals to name_length. +struct FileData +{ + size_t start = 0; + size_t end = 0; + size_t name_length = 0; + size_t uncompressed_size = 0; +}; + /// Main compression part int doCompress(char * input, char * output, off_t & in_offset, off_t & out_offset, off_t input_size, off_t output_size, ZSTD_CCtx * cctx) @@ -23,41 +65,27 @@ int doCompress(char * input, char * output, off_t & in_offset, off_t & out_offse } /// compress data from opened file into output file -int compress(int in_fd, int out_fd, int level=5) +int compress(int in_fd, int out_fd, int level, off_t& pointer, const struct stat& info_in) { - /// read data about input file - struct stat info_in; - fstat(in_fd, &info_in); - if (info_in.st_size == 0) { - std::cout << "Empty input file" << std::endl; - return 1; - } - std::cout << "In current size is " << info_in.st_size << std::endl; - - /// Read data about output file. - /// Compressed data will be added to the end of file - /// It will allow to create self extracting executable from file - struct stat info_out; - fstat(out_fd, &info_out); - std::cout << "Out current size is " << info_out.st_size << std::endl; - - /// NOTE: next parametrs depend on binary size - // 6402520 is size of stripped decompressor - size_t start = 6405000ull; - - // 22558008ull size of decompressor - // size_t start = 22558008ull; - /// As experiments showed, size of compressed file is 4 times less than clickhouse executable /// Get a little bit more memory to prevent errors with size. /// For compression this difference will not be huge - ftruncate(out_fd, start + info_in.st_size / 3); - off_t in_offset = 0, out_offset = start; + if (0 != ftruncate(out_fd, pointer + info_in.st_size / 3)) + { + perror(nullptr); + return 1; + } + off_t in_offset = 0; /// mmap files - char * input = static_cast(mmap(nullptr, info_in.st_size, PROT_READ, MAP_PRIVATE , in_fd, 0)); - char * output = static_cast(mmap(nullptr, start + info_in.st_size / 3, PROT_READ | PROT_WRITE | PROT_EXEC, MAP_SHARED, out_fd, 0)); - if (input == reinterpret_cast(-1) || output == reinterpret_cast(-1)) + char * input = static_cast(mmap(nullptr, info_in.st_size, PROT_READ, MAP_PRIVATE, in_fd, 0)); + char * output = static_cast( + mmap(nullptr, pointer + info_in.st_size / 3, + PROT_READ | PROT_WRITE, MAP_SHARED, + out_fd, + 0) + ); + if (input == MAP_FAILED || output == MAP_FAILED) { perror(nullptr); return 1; @@ -65,21 +93,36 @@ int compress(int in_fd, int out_fd, int level=5) /// Create context ZSTD_CCtx * cctx = ZSTD_createCCtx(); - ZSTD_CCtx_setParameter(cctx, ZSTD_c_compressionLevel, level); - ZSTD_CCtx_setParameter(cctx, ZSTD_c_checksumFlag, 1); + if (cctx == nullptr) + { + std::cout << "Failed to create context for compression" << std::endl; + return 1; + } - /// Remember size of file. It will help to avoid using additional memory - /// during decompression - char * file_size = reinterpret_cast(&info_in.st_size); - for (size_t i = 0; i < sizeof(info_in.st_size)/sizeof(char); ++i) - output[out_offset++] = *(file_size + i); + size_t check_result; + /// Set level and enable checksums + check_result = ZSTD_CCtx_setParameter(cctx, ZSTD_c_compressionLevel, level); + if (ZSTD_isError(check_result)) + { + std::cout << "Failed to set compression level: " + std::string(ZSTD_getErrorName(check_result)) << std::endl; + return 1; + } + check_result = ZSTD_CCtx_setParameter(cctx, ZSTD_c_checksumFlag, 1); + if (ZSTD_isError(check_result)) + { + std::cout << "Failed to set checksums: " + std::string(ZSTD_getErrorName(check_result)) << std::endl; + return 1; + } /// limits for size of block to prevent high memory usage or bad compression off_t max_block_size = 1ull<<27; off_t min_block_size = 1ull<<23; off_t size = 0; - std::cout << in_offset << " " << out_offset << std::endl; + + /// TODO: Maybe make better information instead of offsets + std::cout << "Current offset in infile is|\t Output pointer is " << std::endl; + std::cout << in_offset << "\t\t\t\t" << pointer << std::endl; /// Compress data while (in_offset < info_in.st_size) @@ -92,20 +135,152 @@ int compress(int in_fd, int out_fd, int level=5) size = max_block_size; /// Compress data or exit if error happens - if (0 != doCompress(input, output, in_offset, out_offset, size, ZSTD_compressBound(size), cctx)) + if (0 != doCompress(input, output, in_offset, pointer, size, ZSTD_compressBound(size), cctx)) { - ftruncate(out_fd, info_out.st_size); - munmap(input, info_in.st_size); - munmap(output, start + info_in.st_size / 3); + if (0 != munmap(input, info_in.st_size)) + perror(nullptr); + if (0 != munmap(output, pointer + info_in.st_size / 3)) + perror(nullptr); return 1; } - std::cout << in_offset << " " << out_offset << std::endl; + std::cout << in_offset << "\t\t\t" << pointer << std::endl; } /// Shrink file size and unmap - ftruncate(out_fd, out_offset); - munmap(input, info_in.st_size); - munmap(output, start + info_in.st_size / 3); + if (0 != ftruncate(out_fd, pointer) || 0 != munmap(input, info_in.st_size) || + 0 != munmap(output, pointer + info_in.st_size / 3)) + { + perror(nullptr); + return 1; + } + return 0; +} + +/// Save Metadata at the end of file +int saveMetaData(char* filenames[], int count, int output_fd, const MetaData& metadata, + FileData* files_data, size_t pointer, size_t sum_file_size) +{ + /// Allocate memory for metadata + if (0 != ftruncate(output_fd, pointer + count * sizeof(FileData) + sum_file_size + sizeof(MetaData))) + { + perror(nullptr); + return 1; + } + + char * output = static_cast( + mmap(nullptr, + pointer + count * sizeof(FileData) + sum_file_size + sizeof(MetaData), + PROT_READ | PROT_WRITE, MAP_SHARED, + output_fd, + 0) + ); + if (output == MAP_FAILED) + { + perror(nullptr); + return 1; + } + + /// save information about files and their names + for (int i = 0; i < count; ++i) + { + /// Save file data + memcpy(output + pointer, reinterpret_cast(files_data + i), sizeof(FileData)); + pointer += sizeof(FileData); + + /// Save file name + memcpy(output + pointer, filenames[i], files_data[i].name_length); + pointer += files_data[i].name_length; + } + + /// Save metadata + memcpy(output + pointer, reinterpret_cast(&metadata), sizeof(MetaData)); + return 0; +} + +/// Fills metadata and calls compression function for each file +int compressFiles(char* filenames[], int count, int output_fd, int level, const struct stat& info_out) +{ + /// TODO: check that compression can be done (?) + /// It is difficult to predict compressed size and + /// the upper estimate of memory (size + 1/3 sum_of_files_size) + /// is very rude and can fail even if compression can be successfully done + + MetaData metadata; + size_t sum_file_size = 0; + metadata.number_of_files = count; + off_t pointer = info_out.st_size; + + /// Store information about each file and compress it + FileData* files_data = new FileData[count]; + char * names[count]; + for (int i = 0; i < count; ++i) + { + std::cout << "Start compression for " << filenames[i] << std::endl; + + int input_fd = open(filenames[i], O_RDONLY); + if (input_fd == -1) + { + perror(nullptr); + delete [] files_data; + return 1; + } + + /// Remember information about file name + /// This should be made after the file is opened + /// because filename should be extracted from path + names[i] = strrchr(filenames[i], '/') + 1; + files_data[i].name_length = strlen(names[i]); + sum_file_size += files_data[i].name_length; + + /// read data about input file + struct stat info_in; + if (0 != fstat(input_fd, &info_in)) + { + perror(nullptr); + delete [] files_data; + return 1; + } + + if (info_in.st_size == 0) { + std::cout << "Empty input file will be skipped." << std::endl; + continue; + } + + std::cout << "Input file current size is " << info_in.st_size << std::endl; + + /// Remember information about uncompressed size of file and + /// start of it's compression version + files_data[i].uncompressed_size = info_in.st_size; + files_data[i].start = pointer; + + /// Compressed data will be added to the end of file + /// It will allow to create self extracting executable from file + if (0 != compress(input_fd, output_fd, level, pointer, info_in)) + { + perror(nullptr); + delete [] files_data; + return 1; + } + + /// This error is less important, than others. + /// If file cannot be closed, in some cases it will lead to + /// error in other function that will stop compression process + if (0 != close(input_fd)) + perror(nullptr); + + files_data[i].end = pointer; + } + + /// save location of files information + metadata.start_of_files_data = pointer; + + if (0 != saveMetaData(names, count, output_fd, metadata, files_data, pointer, sum_file_size)) + { + delete [] files_data; + return 1; + } + + delete [] files_data; return 0; } @@ -113,36 +288,54 @@ int main(int argc, char* argv[]) { if (argc < 3) { - std::cout << "Not enough arguments.\ncompressor [file that should be compressed] [file name for compressed file] [OPTIONAL level of compression]" << std::endl; - return 0; - } - - int input_fd = open(argv[1], O_RDONLY); - if (input_fd == -1) - { - perror(nullptr); + std::cout << "Not enough arguments.\ncompressor [OPTIONAL --level of compression] [file name for compressed file] [files that should be compressed]" << std::endl; return 0; } - int output_fd = open(argv[2], O_RDWR | O_CREAT, 0775); + int start_of_files = 1; + + /// Set compression level + int level = 5; + if (0 == memcmp(argv[1], "--level=", 8)) + { + level = strtol(argv[argc - 1], nullptr, 10); + ++start_of_files; + } + + int output_fd = open(argv[start_of_files], O_RDWR | O_CREAT, 0775); if (output_fd == -1) { perror(nullptr); - return 0; + return 1; + } + ++start_of_files; + + struct stat info_out; + if (0 != fstat(output_fd, &info_out)) + { + perror(nullptr); + return 1; } - int result; - if (argc == 4) - result = compress(input_fd, output_fd, strtol(argv[3], nullptr, 10)); + if (0 != compressFiles(&argv[start_of_files], argc - start_of_files, output_fd, level, info_out)) + { + std::cout << "Compression was not successful." << std::endl; + + /// Cancel changes. Reset the file to its original state + if (0 != ftruncate(output_fd, info_out.st_size)) + { + perror(nullptr); + } + } else - result = compress(input_fd, output_fd); - - if (result == 0) + { std::cout << "Successfully compressed" << std::endl; - else - std::cout << "An error has occurred" << std::endl; + } - close(input_fd); - close(output_fd); + if (0 != close(output_fd)) + { + perror(nullptr); + return 1; + } return 0; } diff --git a/utils/self-extr-exec/decompressor.cpp b/utils/self-extr-exec/decompressor.cpp index 2c30342f28b..4a102ad526a 100644 --- a/utils/self-extr-exec/decompressor.cpp +++ b/utils/self-extr-exec/decompressor.cpp @@ -1,13 +1,39 @@ #include #include +#include #include #include +#include #include #include #include #include #include +/* +Metadata contains: + 1) number of files to support multiple file compression + 2) start_of_files_data to know start of files metadata + 3) end of binary to know start of compressed data + 4) uncompressed data size +*/ +struct MetaData +{ + size_t number_of_files = 0; + size_t start_of_files_data = 0; +}; + +/// Information about each file for correct extraction. +/// Each file data is followed by name of file +/// with length equals to name_length. +struct FileData +{ + size_t start = 0; + size_t end = 0; + size_t name_length = 0; + size_t uncompressed_size = 0; +}; + /// decompress part int doDecompress(char * input, char * output, off_t & in_offset, off_t & out_offset, off_t input_size, off_t output_size, ZSTD_DCtx* dctx) @@ -23,65 +49,172 @@ int doDecompress(char * input, char * output, off_t & in_offset, off_t & out_off } /// decompress data from in_fd into out_fd -int decompress(int in_fd, int out_fd) +int decompress(char * input, char * output, off_t start, off_t end) +{ + off_t in_pointer = start, out_pointer = 0; + off_t size = 0; + off_t max_block_size = 1ull<<27; + + /// Create context + ZSTD_DCtx * dctx = ZSTD_createDCtx(); + if (dctx == nullptr) + { + printf("Failed to create context for compression"); + return 1; + } + + /// Compress data + while (in_pointer < end) + { + size = ZSTD_findFrameCompressedSize(input + in_pointer, max_block_size); + + /// Compress data or exit if error happens + if (0 != doDecompress(input, output, in_pointer, out_pointer, size, max_block_size, dctx)) + return 1; + } + + return 0; +} + + +/// Read data about files and decomrpess them. +int decompressFiles(int input_fd, char* argv[]) { /// Read data about output file. /// Compressed data will replace data in file struct stat info_in; - fstat(in_fd, &info_in); - - /// NOTE: next parametrs depend on binary size - // 22558008ull for full, 6405000ull for stripped; - off_t in_offset = 6405000ull /*size of decompressor*/, out_offset = 0; - - /// mmap files - char * input = static_cast(mmap(nullptr, info_in.st_size, PROT_READ, MAP_SHARED , in_fd, 0)); - if (input == reinterpret_cast(-1)) + if (0 != fstat(input_fd, &info_in)) { perror(nullptr); return 1; } - /// Create context - ZSTD_DCtx * dctx = ZSTD_createDCtx(); - - /// Read size of file. It will help to avoid using additional memory - /// during decompression. - size_t * file_size = reinterpret_cast(input + in_offset); - in_offset += sizeof(size_t); - - /// Prepare output file - ftruncate(out_fd, *file_size); - char * output = static_cast(mmap(nullptr, *file_size, PROT_READ | PROT_WRITE | PROT_EXEC, MAP_SHARED, out_fd, 0)); - if (output == reinterpret_cast(-1)) + /// mmap input file + char * input = static_cast(mmap(nullptr, info_in.st_size, PROT_READ, MAP_PRIVATE , input_fd, 0)); + if (input == MAP_FAILED) { perror(nullptr); return 1; - } + } - off_t size = 0; - off_t max_block_size = 1ull<<27; + /// Read metadata from end of file + MetaData metadata = *reinterpret_cast(input + info_in.st_size - sizeof(MetaData)); - /// Compress data - while (in_offset < info_in.st_size) + /// Prepare to read information about files and decompress them + off_t files_pointer = metadata.start_of_files_data; + size_t decompressed_full_size = 0; + + /// Read files metadata and check if decompression is possible + off_t check_pointer = metadata.start_of_files_data; + for (size_t i = 0; i < metadata.number_of_files; ++i) { - size = ZSTD_findFrameCompressedSize(input + in_offset, max_block_size); + FileData data = *reinterpret_cast(input + check_pointer); + decompressed_full_size += data.uncompressed_size; + check_pointer += sizeof(FileData) + data.name_length; + } - /// Compress data or exit if error happens - if (0 != doDecompress(input, output, in_offset, out_offset, size, max_block_size, dctx)) + /// Check free space + struct statfs fs_info; + if (0 != fstatfs(input_fd, &fs_info)) + { + perror(nullptr); + if (0 != munmap(input, info_in.st_size)) + perror(nullptr); + return 1; + } + if (fs_info.f_blocks * info_in.st_blksize < decompressed_full_size) + { + printf("Not enough space for decompression. Have %lu, need %zu.", + fs_info.f_blocks * info_in.st_blksize, decompressed_full_size); + return 1; + } + + FileData file_info; + /// Decompress files with appropriate file names + for (size_t i = 0; i < metadata.number_of_files; ++i) + { + /// Read information about file + file_info = *reinterpret_cast(input + files_pointer); + files_pointer += sizeof(FileData); + char file_name[file_info.name_length]; + memcpy(file_name, input + files_pointer, file_info.name_length); + files_pointer += file_info.name_length; + + /// Open file for decompressed data + int output_fd; + /// Check that name differs from executable filename + if (0 == memcmp(file_name, strrchr(argv[0], '/') + 1, file_info.name_length)) { - munmap(input, info_in.st_size); - munmap(output, *file_size); + /// Add .decompressed + char new_file_name[file_info.name_length + 13]; + memcpy(new_file_name, file_name, file_info.name_length); + memcpy(new_file_name + file_info.name_length, ".decompressed", 13); + output_fd = open(new_file_name, O_RDWR | O_CREAT, 0775); + } + else + { + output_fd = open(file_name, O_RDWR | O_CREAT, 0775); + } + if (output_fd == -1) + { + perror(nullptr); + if (0 != munmap(input, info_in.st_size)) + perror(nullptr); + return 0; + } + + /// Prepare output file + if (0 != ftruncate(output_fd, file_info.uncompressed_size)) + { + perror(nullptr); + if (0 != munmap(input, info_in.st_size)) + perror(nullptr); return 1; } - } - /// Shrink file size and unmap - munmap(output, *file_size); - munmap(input, info_in.st_size); + char * output = static_cast( + mmap(nullptr, + file_info.uncompressed_size, + PROT_READ | PROT_WRITE, MAP_SHARED, + output_fd, + 0) + ); + if (output == MAP_FAILED) + { + perror(nullptr); + if (0 != munmap(input, info_in.st_size)) + perror(nullptr); + return 1; + } + + /// Decompress data into file + if (0 != decompress(input, output, file_info.start, file_info.end)) + { + if (0 != munmap(input, info_in.st_size)) + perror(nullptr); + if (0 != munmap(output, file_info.uncompressed_size)) + perror(nullptr); + return 1; + } + + /// TODO: return 1? + if (0 != fsync(output_fd)) + perror(nullptr); + if (0 != close(output_fd)) + perror(nullptr); + } + + if (0 != munmap(input, info_in.st_size)) + perror(nullptr); return 0; } +void fill(char * dest, char * source, size_t length, size_t& shift) +{ + memcpy(dest + shift, source, length); + shift += length; +} + int main(int /*argc*/, char* argv[]) { int input_fd = open(argv[0], O_RDONLY); @@ -90,25 +223,64 @@ int main(int /*argc*/, char* argv[]) perror(nullptr); return 0; } - - int output_fd = open("clickhouse.decompressed", O_RDWR | O_CREAT, 0775); - if (output_fd == -1) - { - perror(nullptr); - return 0; - } - if (0 != decompress(input_fd, output_fd)) + /// Decompress all files + if (0 != decompressFiles(input_fd, argv)) { + printf("Error happend"); + if (0 != close(input_fd)) + perror(nullptr); return 1; } - fsync(output_fd); - close(output_fd); - close(input_fd); + if (0 != close(input_fd)) + perror(nullptr); - /// NOTE: This command should not depend from any variables. - /// It should be changed if file changes. + /// According to documentation `mv` will rename file if it + /// doesn't move to other directory. + /// Sometimes `rename` doesn't exist by default and + /// `rename.ul` is set instead. It will lead to errors + /// that can be easily avoided with help of `mv` + + // /// TODO: decompressor name can differ from executable + // char bash[] = "/usr/bin/bash"; + // size_t length = 0; + // for (int i = 1; i < argc; ++i) + // length += strlen(argv[i]); + // /// mv filename.decompressed filename && filename agrs... + // char command[8 + 3 + strlen(argv[0]) + 14 + strlen(argv[0]) + 4 + strlen(argv[0]) + length + argc - 1]; + // memset(command, '\0', 8 + 3 + strlen(argv[0]) + 14 + strlen(argv[0]) + 4 + strlen(argv[0]) + length + argc - 1); + + // /// fill command + // size_t shift = 0; + // char executable[] = "bash -c "; + // char mv[] = "mv "; + // char decompressed[] = ".decompressed "; + // char add_command[] = " && "; + // char space[] = " "; + // fill(command, executable, 8, shift); + // fill(command, mv, 3, shift); + // fill(command, argv[0], strlen(argv[0]), shift); + // fill(command, decompressed, 14, shift); + // fill(command, argv[0], strlen(argv[0]), shift); + // fill(command, add_command, 4, shift); + // fill(command, argv[0], strlen(argv[0]), shift); + // fill(command, space, 1, shift); + // for (int i = 1; i < argc; ++i) + // { + // fill(command, argv[i], strlen(argv[i]), shift); + // if (i != argc - 1) + // fill(command, space, 1, shift); + // } + // printf("%s", command); + // fflush(stdout); + + // char *newargv[] = { bash, command, nullptr }; + // char *newenviron[] = { nullptr }; + // execve("/usr/bin/bash", newargv, newenviron); + + /// This part of code will be reached only if error happened execl("/usr/bin/bash", "bash", "-c", "mv ./clickhouse.decompressed ./clickhouse", NULL); - return 0; + perror(nullptr); + return 1; } From c01da8b7535d591f35a97ae2f99e3e7ac8c5f16d Mon Sep 17 00:00:00 2001 From: FArthur-cmd <613623@mail.ru> Date: Thu, 7 Apr 2022 12:31:41 +0000 Subject: [PATCH 021/669] remove whitespace --- utils/self-extr-exec/decompressor.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/utils/self-extr-exec/decompressor.cpp b/utils/self-extr-exec/decompressor.cpp index 4a102ad526a..34c6e0daad5 100644 --- a/utils/self-extr-exec/decompressor.cpp +++ b/utils/self-extr-exec/decompressor.cpp @@ -90,7 +90,7 @@ int decompressFiles(int input_fd, char* argv[]) } /// mmap input file - char * input = static_cast(mmap(nullptr, info_in.st_size, PROT_READ, MAP_PRIVATE , input_fd, 0)); + char * input = static_cast(mmap(nullptr, info_in.st_size, PROT_READ, MAP_PRIVATE, input_fd, 0)); if (input == MAP_FAILED) { perror(nullptr); From 99b14084f0ae47033b6ec50b7d5b88e200722593 Mon Sep 17 00:00:00 2001 From: "Mikhail f. Shiryaev" Date: Thu, 7 Apr 2022 14:38:56 +0200 Subject: [PATCH 022/669] Update contrib/sysroot --- contrib/sysroot | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/contrib/sysroot b/contrib/sysroot index bbcac834526..e9fb375d0a1 160000 --- a/contrib/sysroot +++ b/contrib/sysroot @@ -1 +1 @@ -Subproject commit bbcac834526d90d1e764164b861be426891d1743 +Subproject commit e9fb375d0a1e5ebfd74c043f088f2342552103f8 From 5b69caf38385061d4c5f8f38cb8c26dff2130e6b Mon Sep 17 00:00:00 2001 From: "Mikhail f. Shiryaev" Date: Thu, 7 Apr 2022 15:18:26 +0200 Subject: [PATCH 023/669] Update codebrowser clang to 14 --- docker/test/codebrowser/Dockerfile | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docker/test/codebrowser/Dockerfile b/docker/test/codebrowser/Dockerfile index 102c2d4c697..8df03dcf62b 100644 --- a/docker/test/codebrowser/Dockerfile +++ b/docker/test/codebrowser/Dockerfile @@ -15,7 +15,7 @@ RUN apt-get update && apt-get --yes --allow-unauthenticated install clang-14 lib # https://github.com/ClickHouse-Extras/woboq_codebrowser/commit/37e15eaf377b920acb0b48dbe82471be9203f76b RUN git clone https://github.com/ClickHouse-Extras/woboq_codebrowser -RUN cd woboq_codebrowser && cmake . -DCMAKE_BUILD_TYPE=Release -DCMAKE_CXX_COMPILER=clang\+\+-13 -DCMAKE_C_COMPILER=clang-13 && make -j +RUN cd woboq_codebrowser && cmake . -DCMAKE_BUILD_TYPE=Release -DCMAKE_CXX_COMPILER=clang\+\+-14 -DCMAKE_C_COMPILER=clang-14 && make -j ENV CODEGEN=/woboq_codebrowser/generator/codebrowser_generator ENV CODEINDEX=/woboq_codebrowser/indexgenerator/codebrowser_indexgenerator @@ -28,7 +28,7 @@ ENV SHA=nosha ENV DATA="https://s3.amazonaws.com/clickhouse-test-reports/codebrowser/data" CMD mkdir -p $BUILD_DIRECTORY && cd $BUILD_DIRECTORY && \ - cmake $SOURCE_DIRECTORY -DCMAKE_CXX_COMPILER=/usr/bin/clang\+\+-13 -DCMAKE_C_COMPILER=/usr/bin/clang-13 -DCMAKE_EXPORT_COMPILE_COMMANDS=ON -DENABLE_EMBEDDED_COMPILER=0 -DENABLE_S3=0 && \ + cmake $SOURCE_DIRECTORY -DCMAKE_CXX_COMPILER=/usr/bin/clang\+\+-14 -DCMAKE_C_COMPILER=/usr/bin/clang-14 -DCMAKE_EXPORT_COMPILE_COMMANDS=ON -DENABLE_EMBEDDED_COMPILER=0 -DENABLE_S3=0 && \ mkdir -p $HTML_RESULT_DIRECTORY && \ $CODEGEN -b $BUILD_DIRECTORY -a -o $HTML_RESULT_DIRECTORY -p ClickHouse:$SOURCE_DIRECTORY:$SHA -d $DATA | ts '%Y-%m-%d %H:%M:%S' && \ cp -r $STATIC_DATA $HTML_RESULT_DIRECTORY/ &&\ From 20aee231bd736a333ad04c773985a6df9ceafaeb Mon Sep 17 00:00:00 2001 From: "Mikhail f. Shiryaev" Date: Fri, 8 Apr 2022 13:41:39 +0200 Subject: [PATCH 024/669] Improve codebrowser image, use a temporary branch as source --- docker/test/codebrowser/Dockerfile | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/docker/test/codebrowser/Dockerfile b/docker/test/codebrowser/Dockerfile index 8df03dcf62b..c7aed618f6a 100644 --- a/docker/test/codebrowser/Dockerfile +++ b/docker/test/codebrowser/Dockerfile @@ -13,9 +13,13 @@ RUN apt-get update && apt-get --yes --allow-unauthenticated install clang-14 lib # repo versions doesn't work correctly with C++17 # also we push reports to s3, so we add index.html to subfolder urls # https://github.com/ClickHouse-Extras/woboq_codebrowser/commit/37e15eaf377b920acb0b48dbe82471be9203f76b -RUN git clone https://github.com/ClickHouse-Extras/woboq_codebrowser - -RUN cd woboq_codebrowser && cmake . -DCMAKE_BUILD_TYPE=Release -DCMAKE_CXX_COMPILER=clang\+\+-14 -DCMAKE_C_COMPILER=clang-14 && make -j +# TODO: remove branch in a few weeks after merge, e.g. in May or June 2022 +RUN git clone https://github.com/ClickHouse-Extras/woboq_codebrowser --branch llvm-14 \ + && cd woboq_codebrowser \ + && cmake . -DCMAKE_BUILD_TYPE=Release -DCMAKE_CXX_COMPILER=clang\+\+-14 -DCMAKE_C_COMPILER=clang-14 \ + && make -j \ + && cd .. \ + && rm -rf woboq_codebrowser ENV CODEGEN=/woboq_codebrowser/generator/codebrowser_generator ENV CODEINDEX=/woboq_codebrowser/indexgenerator/codebrowser_indexgenerator From a54c01cf723d993cb24df60cf076d74fd8973fcf Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Mon, 11 Apr 2022 00:44:30 +0200 Subject: [PATCH 025/669] Remove useless code in ReplicatedMergeTreeRestartingThread --- .../Impl/ParallelFormattingOutputFormat.cpp | 13 +++++++------ .../MergeTree/MergeFromLogEntryTask.cpp | 1 - .../ReplicatedMergeTreeRestartingThread.cpp | 19 ------------------- src/Storages/StorageReplicatedMergeTree.cpp | 3 +-- 4 files changed, 8 insertions(+), 28 deletions(-) diff --git a/src/Processors/Formats/Impl/ParallelFormattingOutputFormat.cpp b/src/Processors/Formats/Impl/ParallelFormattingOutputFormat.cpp index 26be258a34f..dd43cb1f7f7 100644 --- a/src/Processors/Formats/Impl/ParallelFormattingOutputFormat.cpp +++ b/src/Processors/Formats/Impl/ParallelFormattingOutputFormat.cpp @@ -178,40 +178,41 @@ namespace DB switch (unit.type) { - case ProcessingUnitType::START : + case ProcessingUnitType::START: { formatter->writePrefix(); break; } - case ProcessingUnitType::PLAIN : + case ProcessingUnitType::PLAIN: { formatter->consume(std::move(unit.chunk)); break; } - case ProcessingUnitType::PLAIN_FINISH : + case ProcessingUnitType::PLAIN_FINISH: { formatter->writeSuffix(); break; } - case ProcessingUnitType::TOTALS : + case ProcessingUnitType::TOTALS: { formatter->consumeTotals(std::move(unit.chunk)); break; } - case ProcessingUnitType::EXTREMES : + case ProcessingUnitType::EXTREMES: { if (are_totals_written) formatter->setTotalsAreWritten(); formatter->consumeExtremes(std::move(unit.chunk)); break; } - case ProcessingUnitType::FINALIZE : + case ProcessingUnitType::FINALIZE: { formatter->setOutsideStatistics(std::move(unit.statistics)); formatter->finalizeImpl(); break; } } + /// Flush all the data to handmade buffer. formatter->flush(); unit.actual_memory_size = out_buffer.getActualSize(); diff --git a/src/Storages/MergeTree/MergeFromLogEntryTask.cpp b/src/Storages/MergeTree/MergeFromLogEntryTask.cpp index 9459849b90a..91be83ace16 100644 --- a/src/Storages/MergeTree/MergeFromLogEntryTask.cpp +++ b/src/Storages/MergeTree/MergeFromLogEntryTask.cpp @@ -181,7 +181,6 @@ ReplicatedMergeMutateTaskBase::PrepareResult MergeFromLogEntryTask::prepare() future_merged_part->updatePath(storage, reserved_space.get()); future_merged_part->merge_type = entry.merge_type; - if (storage_settings_ptr->allow_remote_fs_zero_copy_replication) { if (auto disk = reserved_space->getDisk(); disk->getType() == DB::DiskType::S3) diff --git a/src/Storages/MergeTree/ReplicatedMergeTreeRestartingThread.cpp b/src/Storages/MergeTree/ReplicatedMergeTreeRestartingThread.cpp index dc52660f1f6..9b4234a99e5 100644 --- a/src/Storages/MergeTree/ReplicatedMergeTreeRestartingThread.cpp +++ b/src/Storages/MergeTree/ReplicatedMergeTreeRestartingThread.cpp @@ -304,25 +304,6 @@ void ReplicatedMergeTreeRestartingThread::activateReplica() String is_active_path = fs::path(storage.replica_path) / "is_active"; - /** If the node is marked as active, but the mark is made in the same instance, delete it. - * This is possible only when session in ZooKeeper expires. - */ - String data; - Coordination::Stat stat; - bool has_is_active = zookeeper->tryGet(is_active_path, data, &stat); - if (has_is_active && data == active_node_identifier) - { - auto code = zookeeper->tryRemove(is_active_path, stat.version); - - if (code == Coordination::Error::ZBADVERSION) - throw Exception("Another instance of replica " + storage.replica_path + " was created just now." - " You shouldn't run multiple instances of same replica. You need to check configuration files.", - ErrorCodes::REPLICA_IS_ALREADY_ACTIVE); - - if (code != Coordination::Error::ZOK && code != Coordination::Error::ZNONODE) - throw Coordination::Exception(code, is_active_path); - } - /// Simultaneously declare that this replica is active, and update the host. Coordination::Requests ops; ops.emplace_back(zkutil::makeCreateRequest(is_active_path, active_node_identifier, zkutil::CreateMode::Ephemeral)); diff --git a/src/Storages/StorageReplicatedMergeTree.cpp b/src/Storages/StorageReplicatedMergeTree.cpp index 66a5baf555b..9dc733739a5 100644 --- a/src/Storages/StorageReplicatedMergeTree.cpp +++ b/src/Storages/StorageReplicatedMergeTree.cpp @@ -35,7 +35,6 @@ #include #include - #include #include @@ -84,6 +83,7 @@ #include #include + namespace fs = std::filesystem; namespace ProfileEvents @@ -5735,7 +5735,6 @@ void StorageReplicatedMergeTree::fetchPartition( String best_replica; { - /// List of replicas of source shard. replicas = zookeeper->getChildren(fs::path(from) / "replicas"); From b4cd8561a5e1d12621e73d21bbc3cc5cc8d31ef8 Mon Sep 17 00:00:00 2001 From: taiyang-li <654010905@qq.com> Date: Wed, 13 Apr 2022 16:06:19 +0800 Subject: [PATCH 026/669] wip --- src/Disks/IDiskRemote.cpp | 2 +- ...chronousReadIndirectBufferFromRemoteFS.cpp | 2 +- src/Disks/IO/ThreadPoolRemoteFSReader.cpp | 12 +- src/Disks/IO/ThreadPoolRemoteFSReader.h | 17 +- .../HDFS/AsynchronousReadBufferFromHDFS.cpp | 243 ++++++++++++++++++ .../HDFS/AsynchronousReadBufferFromHDFS.h | 49 ++++ src/Storages/HDFS/ReadBufferFromHDFS.cpp | 10 + src/Storages/HDFS/ReadBufferFromHDFS.h | 5 + 8 files changed, 324 insertions(+), 16 deletions(-) create mode 100644 src/Storages/HDFS/AsynchronousReadBufferFromHDFS.cpp create mode 100644 src/Storages/HDFS/AsynchronousReadBufferFromHDFS.h diff --git a/src/Disks/IDiskRemote.cpp b/src/Disks/IDiskRemote.cpp index ead951084ad..405c0a84fc0 100644 --- a/src/Disks/IDiskRemote.cpp +++ b/src/Disks/IDiskRemote.cpp @@ -645,7 +645,7 @@ AsynchronousReaderPtr IDiskRemote::getThreadPoolReader() { constexpr size_t pool_size = 50; constexpr size_t queue_size = 1000000; - static AsynchronousReaderPtr reader = std::make_shared(pool_size, queue_size); + static AsynchronousReaderPtr reader = std::make_shared>(pool_size, queue_size); return reader; } diff --git a/src/Disks/IO/AsynchronousReadIndirectBufferFromRemoteFS.cpp b/src/Disks/IO/AsynchronousReadIndirectBufferFromRemoteFS.cpp index e693a8e9ea8..665c3c67126 100644 --- a/src/Disks/IO/AsynchronousReadIndirectBufferFromRemoteFS.cpp +++ b/src/Disks/IO/AsynchronousReadIndirectBufferFromRemoteFS.cpp @@ -101,7 +101,7 @@ bool AsynchronousReadIndirectBufferFromRemoteFS::hasPendingDataToRead() std::future AsynchronousReadIndirectBufferFromRemoteFS::readInto(char * data, size_t size) { IAsynchronousReader::Request request; - request.descriptor = std::make_shared(impl); + request.descriptor = std::make_shared>(impl); request.buf = data; request.size = size; request.offset = file_offset_of_buffer_end; diff --git a/src/Disks/IO/ThreadPoolRemoteFSReader.cpp b/src/Disks/IO/ThreadPoolRemoteFSReader.cpp index b1ae42d03d6..e63008b3906 100644 --- a/src/Disks/IO/ThreadPoolRemoteFSReader.cpp +++ b/src/Disks/IO/ThreadPoolRemoteFSReader.cpp @@ -27,20 +27,22 @@ namespace CurrentMetrics namespace DB { - -ReadBufferFromRemoteFSGather::ReadResult ThreadPoolRemoteFSReader::RemoteFSFileDescriptor::readInto(char * data, size_t size, size_t offset, size_t ignore) +template +IAsynchronousReader::Result RemoteFSFileDescriptor::readInto(char * data, size_t size, size_t offset, size_t ignore) { return reader->readInto(data, size, offset, ignore); } -ThreadPoolRemoteFSReader::ThreadPoolRemoteFSReader(size_t pool_size, size_t queue_size_) +template +ThreadPoolRemoteFSReader::ThreadPoolRemoteFSReader(size_t pool_size, size_t queue_size_) : pool(pool_size, pool_size, queue_size_) { } -std::future ThreadPoolRemoteFSReader::submit(Request request) +template +std::future ThreadPoolRemoteFSReader::submit(Request request) { ThreadGroupStatusPtr running_group = CurrentThread::isInitialized() && CurrentThread::get().getThreadGroup() ? CurrentThread::get().getThreadGroup() @@ -65,7 +67,7 @@ std::future ThreadPoolRemoteFSReader::submit(Reques setThreadName("VFSRead"); CurrentMetrics::Increment metric_increment{CurrentMetrics::Read}; - auto * remote_fs_fd = assert_cast(request.descriptor.get()); + auto * remote_fs_fd = assert_cast *>(request.descriptor.get()); Stopwatch watch(CLOCK_MONOTONIC); diff --git a/src/Disks/IO/ThreadPoolRemoteFSReader.h b/src/Disks/IO/ThreadPoolRemoteFSReader.h index b2d5f11724a..c2690deacad 100644 --- a/src/Disks/IO/ThreadPoolRemoteFSReader.h +++ b/src/Disks/IO/ThreadPoolRemoteFSReader.h @@ -10,30 +10,29 @@ namespace DB { +template class ThreadPoolRemoteFSReader : public IAsynchronousReader { - -private: - ThreadPool pool; - public: ThreadPoolRemoteFSReader(size_t pool_size, size_t queue_size_); std::future submit(Request request) override; - struct RemoteFSFileDescriptor; +private: + ThreadPool pool; }; -struct ThreadPoolRemoteFSReader::RemoteFSFileDescriptor : public IFileDescriptor +template +class RemoteFSFileDescriptor : public IAsynchronousReader::IFileDescriptor { public: - explicit RemoteFSFileDescriptor(std::shared_ptr reader_) : reader(reader_) {} + explicit RemoteFSFileDescriptor(std::shared_ptr reader_) : reader(std::move(reader_)) { } - ReadBufferFromRemoteFSGather::ReadResult readInto(char * data, size_t size, size_t offset, size_t ignore = 0); + IAsynchronousReader::Result readInto(char * data, size_t size, size_t offset, size_t ignore = 0); private: - std::shared_ptr reader; + std::shared_ptr reader; }; } diff --git a/src/Storages/HDFS/AsynchronousReadBufferFromHDFS.cpp b/src/Storages/HDFS/AsynchronousReadBufferFromHDFS.cpp new file mode 100644 index 00000000000..a5d550b353c --- /dev/null +++ b/src/Storages/HDFS/AsynchronousReadBufferFromHDFS.cpp @@ -0,0 +1,243 @@ +#include "AsynchronousReadBufferFromHDFS.h" + +#if USE_HDFS +#include +#include +#include +#include + +namespace ProfileEvents +{ + extern const Event AsynchronousReadWaitMicroseconds; +} + +namespace CurrentMetrics +{ + extern const Metric AsynchronousReadWait; +} + + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int NETWORK_ERROR; + extern const int CANNOT_OPEN_FILE; + extern const int CANNOT_SEEK_THROUGH_FILE; + extern const int SEEK_POSITION_OUT_OF_BOUND; + extern const int LOGICAL_ERROR; +} + +AsynchronousReadBufferFromHDFS::~AsynchronousReadBufferFromHDFS() = default; + +class AsynchronousReadBufferFromHDFS::AsynchronousReadBufferFromHDFSImpl : public BufferWithOwnMemory +{ +public: + explicit AsynchronousReadBufferFromHDFSImpl( + std::shared_ptr in_, + const std::string & hdfs_uri_, + const std::string & hdfs_file_path_, + const Poco::Util::AbstractConfiguration & config_, + size_t buf_size_, size_t read_until_position_) + : BufferWithOwnMemory(buf_size_) + , in(in_) + , hdfs_uri(hdfs_uri_) + , hdfs_file_path(hdfs_file_path_) + , builder(createHDFSBuilder(hdfs_uri_, config_)) + , read_until_position(read_until_position_) + { + fs = createHDFSFS(builder.get()); + fin = hdfsOpenFile(fs.get(), hdfs_file_path.c_str(), O_RDONLY, 0, 0, 0); + + if (fin == nullptr) + throw Exception(ErrorCodes::CANNOT_OPEN_FILE, + "Unable to open HDFS file: {}. Error: {}", + hdfs_uri + hdfs_file_path, std::string(hdfsGetLastError())); + } + + ~AsynchronousReadBufferFromHDFSImpl() override + { + hdfsCloseFile(fs.get(), fin); + } + + std::optional getTotalSize() const + { + auto * file_info = hdfsGetPathInfo(fs.get(), hdfs_file_path.c_str()); + if (!file_info) + return std::nullopt; + return file_info->mSize; + } + + bool nextImpl() override + { + if (prefetch_future.valid()) + { + /// Read request already in flight. Wait for its completion. + size_t size = 0; + size_t offset = 0; + { + Stopwatch watch; + CurrentMetrics::Increment metric_increment{CurrentMetrics::AsynchronousReadWait}; + auto result = prefetch_future.get(); + size = result.size; + offset = result.offset; + assert(offset < size || size == 0); + ProfileEvents::increment(ProfileEvents::AsynchronousReadWaitMicroseconds, watch.elapsedMicroseconds()); + } + prefetch_future = {}; + + if (size) + { + prefetch_buffer.swap(memory); + /// Adjust the working buffer so that it ignores `offset` bytes. + setWithBytesToIgnore(memory.data(), size, offset); + return true; + } + return false; + } + else + { + /// No pending request. Do synchronous read. + auto [size, offset] = readInto(memory.data(), memory.size()).get(); + file_offset_of_buffer_end += size; + + if (size) + { + /// Adjust the working buffer so that it ignores `offset` bytes. + setWithBytesToIgnore(memory.data(), size, offset); + return true; + } + + return false; + } + } + + off_t seek(off_t file_offset_, int whence) override + { + if (whence != SEEK_SET) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Only SEEK_SET is supported"); + + file_offset_of_buffer_end = file_offset_; + int seek_status = hdfsSeek(fs.get(), fin, file_offset_of_buffer_end); + if (seek_status != 0) + throw Exception(ErrorCodes::CANNOT_SEEK_THROUGH_FILE, "Fail to seek HDFS file: {}, error: {}", hdfs_uri, std::string(hdfsGetLastError())); + return file_offset_of_buffer_end; + } + + off_t getPosition() override + { + return file_offset_of_buffer_end; + } + + void prefetch() override + { + if (prefetch_future.valid()) + return; + + /// Will request the same amount of data that is read in nextImpl. + prefetch_buffer.resize(internal_buffer.size()); + prefetch_future = readInto(prefetch_buffer.data(), prefetch_buffer.size()); + } + + void finalize() + { + if (prefetch_future.valid()) + { + prefetch_future.wait(); + prefetch_future = {}; + } + } + + static AsynchronousReaderPtr getThreadPoolReader() + { + constexpr size_t pool_size = 50; + constexpr size_t queue_size = 1000000; + static AsynchronousReaderPtr reader = std::make_shared>(pool_size, queue_size); + return reader; + } + +private: + std::future readInto(char * data, size_t size) + { + IAsynchronousReader::Request request; + request.descriptor = std::make_shared>(in); + request.buf = data; + request.size = size; + request.offset = file_offset_of_buffer_end; + request.priority = priority; + request.ignore = 0; + return getThreadPoolReader()->submit(request); + } + + std::shared_ptr in; + Int32 priority; + Memory<> prefetch_buffer; + std::future prefetch_future; +}; + +AsynchronousReadBufferFromHDFS::AsynchronousReadBufferFromHDFS( + const String & hdfs_uri_, + const String & hdfs_file_path_, + const Poco::Util::AbstractConfiguration & config_, + size_t buf_size_, size_t read_until_position_) + : SeekableReadBufferWithSize(nullptr, 0) + , impl(std::make_unique(hdfs_uri_, hdfs_file_path_, config_, buf_size_, read_until_position_)) +{ +} + +std::optional AsynchronousReadBufferFromHDFS::getTotalSize() +{ + return impl->getTotalSize(); +} + +bool AsynchronousReadBufferFromHDFS::nextImpl() +{ + impl->position() = impl->buffer().begin() + offset(); + auto result = impl->next(); + + if (result) + BufferBase::set(impl->buffer().begin(), impl->buffer().size(), impl->offset()); /// use the buffer returned by `impl` + + return result; +} + + +off_t AsynchronousReadBufferFromHDFS::seek(off_t offset_, int whence) +{ + if (whence != SEEK_SET) + throw Exception("Only SEEK_SET mode is allowed.", ErrorCodes::CANNOT_SEEK_THROUGH_FILE); + + if (offset_ < 0) + throw Exception("Seek position is out of bounds. Offset: " + std::to_string(offset_), ErrorCodes::SEEK_POSITION_OUT_OF_BOUND); + + if (!working_buffer.empty() + && size_t(offset_) >= impl->getPosition() - working_buffer.size() + && offset_ < impl->getPosition()) + { + pos = working_buffer.end() - (impl->getPosition() - offset_); + assert(pos >= working_buffer.begin()); + assert(pos <= working_buffer.end()); + + return getPosition(); + } + + resetWorkingBuffer(); + impl->seek(offset_, whence); + return impl->getPosition(); +} + + +off_t AsynchronousReadBufferFromHDFS::getPosition() +{ + return impl->getPosition() - available(); +} + +size_t AsynchronousReadBufferFromHDFS::getFileOffsetOfBufferEnd() const +{ + return impl->getPosition(); +} + +} + +#endif diff --git a/src/Storages/HDFS/AsynchronousReadBufferFromHDFS.h b/src/Storages/HDFS/AsynchronousReadBufferFromHDFS.h new file mode 100644 index 00000000000..f542d3d3d3e --- /dev/null +++ b/src/Storages/HDFS/AsynchronousReadBufferFromHDFS.h @@ -0,0 +1,49 @@ +#pragma once + +#include + +#if USE_HDFS +#include +#include + +#include +#include + +#include +#include +#include +#include +#include +#include + +namespace DB +{ + +class AsynchronousReadBufferFromHDFS : public SeekableReadBufferWithSize +{ +class AsynchronousReadBufferFromHDFSImpl; + +public: + AsynchronousReadBufferFromHDFS(const String & hdfs_uri_, const String & hdfs_file_path_, + const Poco::Util::AbstractConfiguration & config_, + size_t buf_size_ = DBMS_DEFAULT_BUFFER_SIZE, + size_t read_until_position_ = 0); + + ~AsynchronousReadBufferFromHDFS() override; + + bool nextImpl() override; + + off_t seek(off_t offset_, int whence) override; + + off_t getPosition() override; + + std::optional getTotalSize() override; + + size_t getFileOffsetOfBufferEnd() const override; + +private: + std::unique_ptr impl; +}; + +} +#endif diff --git a/src/Storages/HDFS/ReadBufferFromHDFS.cpp b/src/Storages/HDFS/ReadBufferFromHDFS.cpp index af4b324cd77..36c3626b754 100644 --- a/src/Storages/HDFS/ReadBufferFromHDFS.cpp +++ b/src/Storages/HDFS/ReadBufferFromHDFS.cpp @@ -181,6 +181,16 @@ size_t ReadBufferFromHDFS::getFileOffsetOfBufferEnd() const return impl->getPosition(); } +ReadBufferFromHDFS::ReadResult ReadBufferFromHDFS::readInto(char * data, size_t size, size_t offset, size_t ignore) +{ + set(data, size); + seek(offset, SEEK_SET); + auto result = nextImpl(); + if (result) + return {working_buffer.size(), ignore}; + return {0, 0}; +} + } #endif diff --git a/src/Storages/HDFS/ReadBufferFromHDFS.h b/src/Storages/HDFS/ReadBufferFromHDFS.h index e8cdcb27360..cff3e032e61 100644 --- a/src/Storages/HDFS/ReadBufferFromHDFS.h +++ b/src/Storages/HDFS/ReadBufferFromHDFS.h @@ -5,6 +5,7 @@ #if USE_HDFS #include #include +#include #include #include #include @@ -24,6 +25,8 @@ class ReadBufferFromHDFS : public SeekableReadBufferWithSize struct ReadBufferFromHDFSImpl; public: + using ReadResult = IAsynchronousReader::Result; + ReadBufferFromHDFS(const String & hdfs_uri_, const String & hdfs_file_path_, const Poco::Util::AbstractConfiguration & config_, size_t buf_size_ = DBMS_DEFAULT_BUFFER_SIZE, @@ -41,6 +44,8 @@ public: size_t getFileOffsetOfBufferEnd() const override; + ReadResult readInto(char * data, size_t size, size_t offset, size_t ignore = 0); + private: std::unique_ptr impl; }; From 0baa4f97ec3dc81ecccf13cbc828733ed2918817 Mon Sep 17 00:00:00 2001 From: FArthur-cmd <613623@mail.ru> Date: Thu, 14 Apr 2022 11:43:40 +0000 Subject: [PATCH 027/669] add parallel decompression, forward arguments and fix compression level --- utils/self-extr-exec/compressor.cpp | 22 +-- utils/self-extr-exec/decompressor.cpp | 212 +++++++++++++++++++------- 2 files changed, 166 insertions(+), 68 deletions(-) diff --git a/utils/self-extr-exec/compressor.cpp b/utils/self-extr-exec/compressor.cpp index 4a067f45890..19e7f0d9848 100644 --- a/utils/self-extr-exec/compressor.cpp +++ b/utils/self-extr-exec/compressor.cpp @@ -25,7 +25,7 @@ Overview of compression: |______________________| */ -/* +/* Metadata contains: 1) number of files to support multiple file compression 2) start_of_files_data to know start of files metadata @@ -147,7 +147,7 @@ int compress(int in_fd, int out_fd, int level, off_t& pointer, const struct stat } /// Shrink file size and unmap - if (0 != ftruncate(out_fd, pointer) || 0 != munmap(input, info_in.st_size) || + if (0 != ftruncate(out_fd, pointer) || 0 != munmap(input, info_in.st_size) || 0 != munmap(output, pointer + info_in.st_size / 3)) { perror(nullptr); @@ -157,7 +157,7 @@ int compress(int in_fd, int out_fd, int level, off_t& pointer, const struct stat } /// Save Metadata at the end of file -int saveMetaData(char* filenames[], int count, int output_fd, const MetaData& metadata, +int saveMetaData(char* filenames[], int count, int output_fd, const MetaData& metadata, FileData* files_data, size_t pointer, size_t sum_file_size) { /// Allocate memory for metadata @@ -168,7 +168,7 @@ int saveMetaData(char* filenames[], int count, int output_fd, const MetaData& me } char * output = static_cast( - mmap(nullptr, + mmap(nullptr, pointer + count * sizeof(FileData) + sum_file_size + sizeof(MetaData), PROT_READ | PROT_WRITE, MAP_SHARED, output_fd, @@ -241,7 +241,8 @@ int compressFiles(char* filenames[], int count, int output_fd, int level, const return 1; } - if (info_in.st_size == 0) { + if (info_in.st_size == 0) + { std::cout << "Empty input file will be skipped." << std::endl; continue; } @@ -252,14 +253,14 @@ int compressFiles(char* filenames[], int count, int output_fd, int level, const /// start of it's compression version files_data[i].uncompressed_size = info_in.st_size; files_data[i].start = pointer; - + /// Compressed data will be added to the end of file /// It will allow to create self extracting executable from file if (0 != compress(input_fd, output_fd, level, pointer, info_in)) { perror(nullptr); delete [] files_data; - return 1; + return 1; } /// This error is less important, than others. @@ -291,14 +292,14 @@ int main(int argc, char* argv[]) std::cout << "Not enough arguments.\ncompressor [OPTIONAL --level of compression] [file name for compressed file] [files that should be compressed]" << std::endl; return 0; } - + int start_of_files = 1; /// Set compression level int level = 5; if (0 == memcmp(argv[1], "--level=", 8)) { - level = strtol(argv[argc - 1], nullptr, 10); + level = strtol(argv[1] + 8, nullptr, 10); ++start_of_files; } @@ -317,10 +318,11 @@ int main(int argc, char* argv[]) return 1; } + std::cout << "Compression with level " << level << std::endl; if (0 != compressFiles(&argv[start_of_files], argc - start_of_files, output_fd, level, info_out)) { std::cout << "Compression was not successful." << std::endl; - + /// Cancel changes. Reset the file to its original state if (0 != ftruncate(output_fd, info_out.st_size)) { diff --git a/utils/self-extr-exec/decompressor.cpp b/utils/self-extr-exec/decompressor.cpp index 34c6e0daad5..706b09d0d71 100644 --- a/utils/self-extr-exec/decompressor.cpp +++ b/utils/self-extr-exec/decompressor.cpp @@ -7,10 +7,11 @@ #include #include #include +#include #include #include -/* +/* Metadata contains: 1) number of files to support multiple file compression 2) start_of_files_data to know start of files metadata @@ -41,19 +42,20 @@ int doDecompress(char * input, char * output, off_t & in_offset, off_t & out_off size_t decompressed_size = ZSTD_decompressDCtx(dctx, output + out_offset, output_size, input + in_offset, input_size); if (ZSTD_isError(decompressed_size)) { + printf("%s\n", ZSTD_getErrorName(decompressed_size)); return 1; } - in_offset += input_size; - out_offset += decompressed_size; return 0; } /// decompress data from in_fd into out_fd -int decompress(char * input, char * output, off_t start, off_t end) +int decompress(char * input, char * output, off_t start, off_t end, size_t max_number_of_forks=10) { off_t in_pointer = start, out_pointer = 0; off_t size = 0; off_t max_block_size = 1ull<<27; + off_t decompressed_size = 0; + size_t number_of_forks = 0; /// Create context ZSTD_DCtx * dctx = ZSTD_createDCtx(); @@ -62,23 +64,89 @@ int decompress(char * input, char * output, off_t start, off_t end) printf("Failed to create context for compression"); return 1; } + pid_t pid; + bool error_happened = false; /// Compress data - while (in_pointer < end) + while (in_pointer < end && !error_happened) { size = ZSTD_findFrameCompressedSize(input + in_pointer, max_block_size); + if (ZSTD_isError(size)) + { + printf("%s\n", ZSTD_getErrorName(size)); + break; + } - /// Compress data or exit if error happens - if (0 != doDecompress(input, output, in_pointer, out_pointer, size, max_block_size, dctx)) - return 1; + decompressed_size = ZSTD_getFrameContentSize(input + in_pointer, max_block_size); + if (ZSTD_isError(decompressed_size)) + { + printf("%s\n", ZSTD_getErrorName(decompressed_size)); + break; + } + + pid = fork(); + if (-1 == pid) + { + perror(nullptr); + /// Decompress data in main process. Exit if error happens + if (0 != doDecompress(input, output, in_pointer, out_pointer, size, max_block_size, dctx)) + break; + } + else if (pid == 0) + { + /// Decompress data. Exit if error happens + if (0 != doDecompress(input, output, in_pointer, out_pointer, size, max_block_size, dctx)) + exit(1); + exit(0); + } + else + { + ++number_of_forks; + while (number_of_forks >= max_number_of_forks) + { + /// Wait any fork + int status; + waitpid(0, &status, 0); + + /// If error happened, stop processing + if (WEXITSTATUS(status) != 0) + { + error_happened = true; + break; + } + + --number_of_forks; + } + in_pointer += size; + out_pointer += decompressed_size; + } } + /// wait for all working decompressions + while (number_of_forks > 0) + { + /// Wait any fork + int status; + waitpid(0, &status, 0); + + if (WEXITSTATUS(status) != 0) + { + error_happened = true; + } + + --number_of_forks; + } + + /// If error happen end of processed part will not reach end + if (in_pointer < end || error_happened) + return 1; + return 0; } /// Read data about files and decomrpess them. -int decompressFiles(int input_fd, char* argv[]) +int decompressFiles(int input_fd, char* argv[], bool & have_compressed_analoge) { /// Read data about output file. /// Compressed data will replace data in file @@ -124,7 +192,7 @@ int decompressFiles(int input_fd, char* argv[]) } if (fs_info.f_blocks * info_in.st_blksize < decompressed_full_size) { - printf("Not enough space for decompression. Have %lu, need %zu.", + printf("Not enough space for decompression. Have %lu, need %zu.", fs_info.f_blocks * info_in.st_blksize, decompressed_full_size); return 1; } @@ -136,7 +204,9 @@ int decompressFiles(int input_fd, char* argv[]) /// Read information about file file_info = *reinterpret_cast(input + files_pointer); files_pointer += sizeof(FileData); - char file_name[file_info.name_length]; + char file_name[file_info.name_length + 1]; + /// Filename should be ended with \0 + memset(file_name, '\0', file_info.name_length + 1); memcpy(file_name, input + files_pointer, file_info.name_length); files_pointer += file_info.name_length; @@ -150,6 +220,7 @@ int decompressFiles(int input_fd, char* argv[]) memcpy(new_file_name, file_name, file_info.name_length); memcpy(new_file_name + file_info.name_length, ".decompressed", 13); output_fd = open(new_file_name, O_RDWR | O_CREAT, 0775); + have_compressed_analoge = true; } else { @@ -173,10 +244,10 @@ int decompressFiles(int input_fd, char* argv[]) } char * output = static_cast( - mmap(nullptr, - file_info.uncompressed_size, - PROT_READ | PROT_WRITE, MAP_SHARED, - output_fd, + mmap(nullptr, + file_info.uncompressed_size, + PROT_READ | PROT_WRITE, MAP_SHARED, + output_fd, 0) ); if (output == MAP_FAILED) @@ -197,25 +268,56 @@ int decompressFiles(int input_fd, char* argv[]) return 1; } - /// TODO: return 1? if (0 != fsync(output_fd)) perror(nullptr); if (0 != close(output_fd)) perror(nullptr); } - + if (0 != munmap(input, info_in.st_size)) perror(nullptr); return 0; } +/// Copy particular part of command and update shift void fill(char * dest, char * source, size_t length, size_t& shift) { memcpy(dest + shift, source, length); shift += length; } -int main(int /*argc*/, char* argv[]) +/// Set command to `mv filename.decompressed filename && filename agrs...` +void fillCommand(char command[], int argc, char * argv[], size_t length) +{ + memset(command, '\0', 3 + strlen(argv[0]) + 14 + strlen(argv[0]) + 4 + strlen(argv[0]) + length + argc); + + /// position in command + size_t shift = 0; + + /// Support variables to create command + char mv[] = "mv "; + char decompressed[] = ".decompressed "; + char add_command[] = " && "; + char space[] = " "; + + fill(command, mv, 3, shift); + fill(command, argv[0], strlen(argv[0]), shift); + fill(command, decompressed, 14, shift); + fill(command, argv[0], strlen(argv[0]), shift); + fill(command, add_command, 4, shift); + fill(command, argv[0], strlen(argv[0]), shift); + fill(command, space, 1, shift); + + /// forward all arguments + for (int i = 1; i < argc; ++i) + { + fill(command, argv[i], strlen(argv[i]), shift); + if (i != argc - 1) + fill(command, space, 1, shift); + } +} + +int main(int argc, char* argv[]) { int input_fd = open(argv[0], O_RDONLY); if (input_fd == -1) @@ -224,8 +326,10 @@ int main(int /*argc*/, char* argv[]) return 0; } + bool have_compressed_analoge = false; + /// Decompress all files - if (0 != decompressFiles(input_fd, argv)) + if (0 != decompressFiles(input_fd, argv, have_compressed_analoge)) { printf("Error happend"); if (0 != close(input_fd)) @@ -242,45 +346,37 @@ int main(int /*argc*/, char* argv[]) /// `rename.ul` is set instead. It will lead to errors /// that can be easily avoided with help of `mv` - // /// TODO: decompressor name can differ from executable - // char bash[] = "/usr/bin/bash"; - // size_t length = 0; - // for (int i = 1; i < argc; ++i) - // length += strlen(argv[i]); - // /// mv filename.decompressed filename && filename agrs... - // char command[8 + 3 + strlen(argv[0]) + 14 + strlen(argv[0]) + 4 + strlen(argv[0]) + length + argc - 1]; - // memset(command, '\0', 8 + 3 + strlen(argv[0]) + 14 + strlen(argv[0]) + 4 + strlen(argv[0]) + length + argc - 1); - - // /// fill command - // size_t shift = 0; - // char executable[] = "bash -c "; - // char mv[] = "mv "; - // char decompressed[] = ".decompressed "; - // char add_command[] = " && "; - // char space[] = " "; - // fill(command, executable, 8, shift); - // fill(command, mv, 3, shift); - // fill(command, argv[0], strlen(argv[0]), shift); - // fill(command, decompressed, 14, shift); - // fill(command, argv[0], strlen(argv[0]), shift); - // fill(command, add_command, 4, shift); - // fill(command, argv[0], strlen(argv[0]), shift); - // fill(command, space, 1, shift); - // for (int i = 1; i < argc; ++i) - // { - // fill(command, argv[i], strlen(argv[i]), shift); - // if (i != argc - 1) - // fill(command, space, 1, shift); - // } - // printf("%s", command); - // fflush(stdout); + if (!have_compressed_analoge) + { + printf("Can't apply arguments to this binary"); + /// remove file + char * name = strrchr(argv[0], '/') + 1; + execlp("rm", "rm", name, NULL); + perror(nullptr); + return 1; + } + else + { + /// move decompressed file instead of this binary and apply command + char bash[] = "/usr/bin/bash"; + char executable[] = "-c"; - // char *newargv[] = { bash, command, nullptr }; - // char *newenviron[] = { nullptr }; - // execve("/usr/bin/bash", newargv, newenviron); + /// length of forwarded args + size_t length = 0; + for (int i = 1; i < argc; ++i) + length += strlen(argv[i]); - /// This part of code will be reached only if error happened - execl("/usr/bin/bash", "bash", "-c", "mv ./clickhouse.decompressed ./clickhouse", NULL); - perror(nullptr); - return 1; + /// mv filename.decompressed filename && filename agrs... + char command[3 + strlen(argv[0]) + 14 + strlen(argv[0]) + 4 + strlen(argv[0]) + length + argc]; + fillCommand(command, argc, argv, length); + + /// replace file and call executable + char * newargv[] = { bash, executable, command, nullptr }; + char * newenviron[] = { nullptr }; + execve("/usr/bin/bash", newargv, newenviron); + + /// This part of code will be reached only if error happened + perror(nullptr); + return 1; + } } From 4ce0b280e623957155a6788affbaaa2ecd70cb33 Mon Sep 17 00:00:00 2001 From: Alexander Tokmakov Date: Thu, 14 Apr 2022 15:31:05 +0200 Subject: [PATCH 028/669] wait for node to disappear instead of removing --- src/Common/ZooKeeper/ZooKeeper.cpp | 14 ++++++++++++++ src/Common/ZooKeeper/ZooKeeper.h | 4 ++++ src/Common/randomSeed.cpp | 3 ++- src/Interpreters/DDLWorker.cpp | 10 +--------- .../ReplicatedMergeTreeRestartingThread.cpp | 1 + 5 files changed, 22 insertions(+), 10 deletions(-) diff --git a/src/Common/ZooKeeper/ZooKeeper.cpp b/src/Common/ZooKeeper/ZooKeeper.cpp index 0f4b141d058..17fcc303081 100644 --- a/src/Common/ZooKeeper/ZooKeeper.cpp +++ b/src/Common/ZooKeeper/ZooKeeper.cpp @@ -841,6 +841,20 @@ bool ZooKeeper::waitForDisappear(const std::string & path, const WaitCondition & return false; } +void ZooKeeper::waitForEphemeralToDisappearIfAny(const std::string & path) +{ + zkutil::EventPtr eph_node_disappeared = std::make_shared(); + String content; + if (!tryGet(path, content, nullptr, eph_node_disappeared)) + return; + + int32_t timeout_ms = 2 * session_timeout_ms; + if (!eph_node_disappeared->tryWait(timeout_ms)) + throw DB::Exception(DB::ErrorCodes::LOGICAL_ERROR, + "Ephemeral node {} still exists after {}s, probably it's owned by someone else. Node data: '{}'", + path, timeout_ms / 1000, content); +} + ZooKeeperPtr ZooKeeper::startNewSession() const { return std::make_shared(hosts, identity, session_timeout_ms, operation_timeout_ms, chroot, implementation, zk_log, get_priority_load_balancing); diff --git a/src/Common/ZooKeeper/ZooKeeper.h b/src/Common/ZooKeeper/ZooKeeper.h index 4d5bd039a55..3a210513e25 100644 --- a/src/Common/ZooKeeper/ZooKeeper.h +++ b/src/Common/ZooKeeper/ZooKeeper.h @@ -240,6 +240,10 @@ public: /// The function returns true if waited and false if waiting was interrupted by condition. bool waitForDisappear(const std::string & path, const WaitCondition & condition = {}); + /// Wait for the ephemeral node created in previous session to disappear. + /// Throws LOGICAL_ERROR if node still exists after 2x session_timeout. + void waitForEphemeralToDisappearIfAny(const std::string & path); + /// Async interface (a small subset of operations is implemented). /// /// Usage: diff --git a/src/Common/randomSeed.cpp b/src/Common/randomSeed.cpp index 74088d0a4f3..15a380a1805 100644 --- a/src/Common/randomSeed.cpp +++ b/src/Common/randomSeed.cpp @@ -5,6 +5,7 @@ #include #include #include +#include #include @@ -29,6 +30,6 @@ DB::UInt64 randomSeed() hash.update(times.tv_nsec); hash.update(times.tv_sec); hash.update(getThreadId()); - hash.update(×); + hash.update(getFQDNOrHostName()); return hash.get64(); } diff --git a/src/Interpreters/DDLWorker.cpp b/src/Interpreters/DDLWorker.cpp index 9af6b61a0c1..ff6622949a1 100644 --- a/src/Interpreters/DDLWorker.cpp +++ b/src/Interpreters/DDLWorker.cpp @@ -545,15 +545,7 @@ void DDLWorker::processTask(DDLTaskBase & task, const ZooKeeperPtr & zookeeper) { /// Connection has been lost and now we are retrying, /// but our previous ephemeral node still exists. - zkutil::EventPtr eph_node_disappeared = std::make_shared(); - String dummy; - if (zookeeper->tryGet(active_node_path, dummy, nullptr, eph_node_disappeared)) - { - constexpr int timeout_ms = 60 * 1000; - if (!eph_node_disappeared->tryWait(timeout_ms)) - throw Exception(ErrorCodes::LOGICAL_ERROR, "Ephemeral node {} still exists, " - "probably it's owned by someone else", active_node_path); - } + zookeeper->waitForEphemeralToDisappearIfAny(active_node_path); } zookeeper->create(active_node_path, {}, zkutil::CreateMode::Ephemeral); diff --git a/src/Storages/MergeTree/ReplicatedMergeTreeRestartingThread.cpp b/src/Storages/MergeTree/ReplicatedMergeTreeRestartingThread.cpp index 9b4234a99e5..7df1c0a13d8 100644 --- a/src/Storages/MergeTree/ReplicatedMergeTreeRestartingThread.cpp +++ b/src/Storages/MergeTree/ReplicatedMergeTreeRestartingThread.cpp @@ -303,6 +303,7 @@ void ReplicatedMergeTreeRestartingThread::activateReplica() ReplicatedMergeTreeAddress address = storage.getReplicatedMergeTreeAddress(); String is_active_path = fs::path(storage.replica_path) / "is_active"; + zookeeper->waitForEphemeralToDisappearIfAny(is_active_path); /// Simultaneously declare that this replica is active, and update the host. Coordination::Requests ops; From 8daebf38b91c336227460c4f83cae3e6fbfb6a37 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sat, 16 Apr 2022 00:48:10 +0200 Subject: [PATCH 029/669] Remove "google-readability-casting" because constructor-style cast is Ok --- .clang-tidy | 1 - 1 file changed, 1 deletion(-) diff --git a/.clang-tidy b/.clang-tidy index 6fd67876923..3ffc6026f0f 100644 --- a/.clang-tidy +++ b/.clang-tidy @@ -118,7 +118,6 @@ Checks: '-*, google-build-namespaces, google-default-arguments, google-explicit-constructor, - google-readability-casting, google-readability-avoid-underscore-in-googletest-name, google-runtime-int, google-runtime-operator, From de7725083e6e7a3d3c1614ff2e426ff9411cd9e8 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sat, 16 Apr 2022 01:15:40 +0200 Subject: [PATCH 030/669] Fix clang-tidy, part 1 --- contrib/sysroot | 2 +- src/Access/DiskAccessStorage.cpp | 3 ++- .../AggregateFunctionAvgWeighted.cpp | 4 ++-- src/AggregateFunctions/AggregateFunctionDeltaSum.cpp | 2 +- .../AggregateFunctionGroupArray.cpp | 2 +- .../AggregateFunctionGroupBitmap.cpp | 2 +- .../AggregateFunctionGroupUniqArray.cpp | 2 +- src/AggregateFunctions/AggregateFunctionQuantile.cpp | 4 ++-- src/AggregateFunctions/AggregateFunctionSparkbar.cpp | 2 +- .../AggregateFunctionStatisticsSimple.cpp | 2 +- src/AggregateFunctions/AggregateFunctionSum.cpp | 2 +- src/AggregateFunctions/AggregateFunctionSumCount.cpp | 2 +- src/AggregateFunctions/AggregateFunctionTopK.cpp | 2 +- src/Common/UTF8Helpers.cpp | 2 +- src/Dictionaries/IPAddressDictionary.cpp | 12 ++++++------ src/Functions/FunctionHelpers.cpp | 2 +- src/Functions/FunctionsLogical.cpp | 6 +++--- src/Functions/array/arrayElement.cpp | 2 +- src/Functions/if.cpp | 6 +++--- src/Functions/toLowCardinality.cpp | 1 - src/Interpreters/JIT/compileFunction.cpp | 2 +- src/Interpreters/convertFieldToType.cpp | 12 ++++++------ src/Parsers/Access/ASTShowAccessEntitiesQuery.cpp | 3 ++- src/Storages/FileLog/StorageFileLog.cpp | 2 +- .../MergeTree/MergeTreeDataSelectExecutor.cpp | 2 +- src/Storages/MergeTree/MergeTreeDataWriter.cpp | 2 +- src/Storages/MergeTree/MergeTreeIndexBloomFilter.cpp | 2 +- .../MergeTree/MergeTreeIndexGranuleBloomFilter.cpp | 2 +- src/Storages/RocksDB/StorageEmbeddedRocksDB.cpp | 2 +- 29 files changed, 46 insertions(+), 45 deletions(-) diff --git a/contrib/sysroot b/contrib/sysroot index e9fb375d0a1..bbcac834526 160000 --- a/contrib/sysroot +++ b/contrib/sysroot @@ -1 +1 @@ -Subproject commit e9fb375d0a1e5ebfd74c043f088f2342552103f8 +Subproject commit bbcac834526d90d1e764164b861be426891d1743 diff --git a/src/Access/DiskAccessStorage.cpp b/src/Access/DiskAccessStorage.cpp index 7393fcd8d36..a9eb27c291c 100644 --- a/src/Access/DiskAccessStorage.cpp +++ b/src/Access/DiskAccessStorage.cpp @@ -327,7 +327,8 @@ void DiskAccessStorage::scheduleWriteLists(AccessEntityType type) /// Create the 'need_rebuild_lists.mark' file. /// This file will be used later to find out if writing lists is successful or not. - std::ofstream{getNeedRebuildListsMarkFilePath(directory_path)}; + std::ofstream out{getNeedRebuildListsMarkFilePath(directory_path)}; + out.close(); lists_writing_thread = ThreadFromGlobalPool{&DiskAccessStorage::listsWritingThreadFunc, this}; lists_writing_thread_is_waiting = true; diff --git a/src/AggregateFunctions/AggregateFunctionAvgWeighted.cpp b/src/AggregateFunctions/AggregateFunctionAvgWeighted.cpp index ab6fdc8fd7e..4d7901a7fac 100644 --- a/src/AggregateFunctions/AggregateFunctionAvgWeighted.cpp +++ b/src/AggregateFunctions/AggregateFunctionAvgWeighted.cpp @@ -39,7 +39,7 @@ bool allowTypes(const DataTypePtr& left, const DataTypePtr& right) noexcept } template -static IAggregateFunction * create(const IDataType & second_type, TArgs && ... args) +IAggregateFunction * create(const IDataType & second_type, TArgs && ... args) { const WhichDataType which(second_type); @@ -51,7 +51,7 @@ static IAggregateFunction * create(const IDataType & second_type, TArgs && ... a // Not using helper functions because there are no templates for binary decimal/numeric function. template -static IAggregateFunction * create(const IDataType & first_type, const IDataType & second_type, TArgs && ... args) +IAggregateFunction * create(const IDataType & first_type, const IDataType & second_type, TArgs && ... args) { const WhichDataType which(first_type); diff --git a/src/AggregateFunctions/AggregateFunctionDeltaSum.cpp b/src/AggregateFunctions/AggregateFunctionDeltaSum.cpp index f1c6e7c6112..3b43d9a85f8 100644 --- a/src/AggregateFunctions/AggregateFunctionDeltaSum.cpp +++ b/src/AggregateFunctions/AggregateFunctionDeltaSum.cpp @@ -30,7 +30,7 @@ AggregateFunctionPtr createAggregateFunctionDeltaSum( throw Exception("Incorrect number of arguments for aggregate function " + name, ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); - DataTypePtr data_type = arguments[0]; + const DataTypePtr & data_type = arguments[0]; if (isInteger(data_type) || isFloat(data_type)) return AggregateFunctionPtr(createWithNumericType( diff --git a/src/AggregateFunctions/AggregateFunctionGroupArray.cpp b/src/AggregateFunctions/AggregateFunctionGroupArray.cpp index 5a9fd778277..85075d5a4d6 100644 --- a/src/AggregateFunctions/AggregateFunctionGroupArray.cpp +++ b/src/AggregateFunctions/AggregateFunctionGroupArray.cpp @@ -20,7 +20,7 @@ namespace { template