diff --git a/CMakeLists.txt b/CMakeLists.txt index aea5e0617a4..cb9134d2ac0 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -513,8 +513,8 @@ endif () macro (add_executable target) # invoke built-in add_executable # explicitly acquire and interpose malloc symbols by clickhouse_malloc - # if GLIBC_COMPATIBILITY is ON and not sanitizer build, provide memcpy symbol explicitly to neutrialize thinlto's libcall generation. - if (GLIBC_COMPATIBILITY AND NOT SANITIZE) + # if GLIBC_COMPATIBILITY is ON and ENABLE_THINLTO is on than provide memcpy symbol explicitly to neutrialize thinlto's libcall generation. + if (GLIBC_COMPATIBILITY AND ENABLE_THINLTO) _add_executable (${ARGV} $ $) else () _add_executable (${ARGV} $) diff --git a/docker/images.json b/docker/images.json index e9e91864e1e..f5b10a14313 100644 --- a/docker/images.json +++ b/docker/images.json @@ -9,7 +9,8 @@ "name": "yandex/clickhouse-binary-builder", "dependent": [ "docker/test/split_build_smoke_test", - "docker/test/pvs" + "docker/test/pvs", + "docker/test/codebrowser" ] }, "docker/packager/unbundled": { diff --git a/docker/packager/binary/build.sh b/docker/packager/binary/build.sh index 5df0392cb4d..8acbe271b1f 100755 --- a/docker/packager/binary/build.sh +++ b/docker/packager/binary/build.sh @@ -17,7 +17,9 @@ ccache --show-stats ||: ccache --zero-stats ||: ln -s /usr/lib/x86_64-linux-gnu/libOpenCL.so.1.0.0 /usr/lib/libOpenCL.so ||: rm -f CMakeCache.txt -cmake --debug-trycompile --verbose=1 -DCMAKE_VERBOSE_MAKEFILE=1 -LA "-DCMAKE_BUILD_TYPE=$BUILD_TYPE" "-DSANITIZE=$SANITIZER" -DENABLE_CHECK_HEAVY_BUILDS=1 "$CMAKE_FLAGS" .. +# Read cmake arguments into array (possibly empty) +read -ra CMAKE_FLAGS <<< "${CMAKE_FLAGS:-}" +cmake --debug-trycompile --verbose=1 -DCMAKE_VERBOSE_MAKEFILE=1 -LA "-DCMAKE_BUILD_TYPE=$BUILD_TYPE" "-DSANITIZE=$SANITIZER" -DENABLE_CHECK_HEAVY_BUILDS=1 "${CMAKE_FLAGS[@]}" .. # shellcheck disable=SC2086 # No quotes because I want it to expand to nothing if empty. ninja $NINJA_FLAGS clickhouse-bundle mv ./programs/clickhouse* /output diff --git a/docker/test/codebrowser/Dockerfile b/docker/test/codebrowser/Dockerfile index cb3462cad0e..e03f94a85e0 100644 --- a/docker/test/codebrowser/Dockerfile +++ b/docker/test/codebrowser/Dockerfile @@ -1,33 +1,15 @@ # docker build --network=host -t yandex/clickhouse-codebrowser . # docker run --volume=path_to_repo:/repo_folder --volume=path_to_result:/test_output yandex/clickhouse-codebrowser -FROM ubuntu:18.04 +FROM yandex/clickhouse-binary-builder -RUN apt-get --allow-unauthenticated update -y \ - && env DEBIAN_FRONTEND=noninteractive \ - apt-get --allow-unauthenticated install --yes --no-install-recommends \ - bash \ - sudo \ - wget \ - software-properties-common \ - ca-certificates \ - apt-transport-https \ - build-essential \ - gpg-agent \ - git - -RUN wget -nv -O - https://apt.kitware.com/keys/kitware-archive-latest.asc | sudo apt-key add - -RUN sudo apt-add-repository 'deb https://apt.kitware.com/ubuntu/ bionic main' -RUN sudo echo "deb [trusted=yes] http://apt.llvm.org/bionic/ llvm-toolchain-bionic-8 main" >> /etc/apt/sources.list - -RUN sudo apt-get --yes --allow-unauthenticated update -# To build woboq -RUN sudo apt-get --yes --allow-unauthenticated install cmake clang-8 libllvm8 libclang-8-dev +RUN apt-get update && apt-get --yes --allow-unauthenticated install clang-9 libllvm9 libclang-9-dev # repo versions doesn't work correctly with C++17 # also we push reports to s3, so we add index.html to subfolder urls # https://github.com/ClickHouse-Extras/woboq_codebrowser/commit/37e15eaf377b920acb0b48dbe82471be9203f76b RUN git clone https://github.com/ClickHouse-Extras/woboq_codebrowser -RUN cd woboq_codebrowser && cmake . -DCMAKE_BUILD_TYPE=Release && make -j + +RUN cd woboq_codebrowser && cmake . -DCMAKE_BUILD_TYPE=Release -DCMAKE_CXX_COMPILER=clang\+\+-9 -DCMAKE_C_COMPILER=clang-9 && make -j ENV CODEGEN=/woboq_codebrowser/generator/codebrowser_generator ENV CODEINDEX=/woboq_codebrowser/indexgenerator/codebrowser_indexgenerator @@ -40,7 +22,7 @@ ENV SHA=nosha ENV DATA="data" CMD mkdir -p $BUILD_DIRECTORY && cd $BUILD_DIRECTORY && \ - cmake $SOURCE_DIRECTORY -DCMAKE_CXX_COMPILER=/usr/bin/clang\+\+-8 -DCMAKE_C_COMPILER=/usr/bin/clang-8 -DCMAKE_EXPORT_COMPILE_COMMANDS=ON && \ + cmake $SOURCE_DIRECTORY -DCMAKE_CXX_COMPILER=/usr/bin/clang\+\+-11 -DCMAKE_C_COMPILER=/usr/bin/clang-11 -DCMAKE_EXPORT_COMPILE_COMMANDS=ON && \ mkdir -p $HTML_RESULT_DIRECTORY && \ $CODEGEN -b $BUILD_DIRECTORY -a -o $HTML_RESULT_DIRECTORY -p ClickHouse:$SOURCE_DIRECTORY:$SHA -d $DATA && \ cp -r $STATIC_DATA $HTML_RESULT_DIRECTORY/ &&\ diff --git a/docs/en/operations/server-configuration-parameters/settings.md b/docs/en/operations/server-configuration-parameters/settings.md index ee0373c70b4..a25befa7588 100644 --- a/docs/en/operations/server-configuration-parameters/settings.md +++ b/docs/en/operations/server-configuration-parameters/settings.md @@ -66,6 +66,32 @@ If no conditions met for a data part, ClickHouse uses the `lz4` compression. ``` +## custom_settings_prefixes {#custom_settings_prefixes} + +List of prefixes for [custom settings](../../operations/settings/index.md#custom_settings). The prefixes must be separated with commas. + +**Example** + +```xml +custom_ +``` + +**See Also** + +- [Custom settings](../../operations/settings/index.md#custom_settings) + +## core_dump + +Configures soft limit for core dump file size, one gigabyte by default. +```xml + + 1073741824 + +``` + +(Hard limit is configured via system tools) + + ## default\_database {#default-database} The default database. @@ -405,7 +431,7 @@ Limits total RAM usage by the ClickHouse server. Possible values: - Positive integer. -- 0 — Unlimited. +- 0 (auto). Default value: `0`. diff --git a/docs/en/operations/settings/index.md b/docs/en/operations/settings/index.md index 59394d77b11..cd483694521 100644 --- a/docs/en/operations/settings/index.md +++ b/docs/en/operations/settings/index.md @@ -28,4 +28,30 @@ Ways to configure settings, in order of priority: Settings that can only be made in the server config file are not covered in this section. +## Custom Settings {#custom_settings} + +In addition to the common [settings](../../operations/settings/settings.md), users can define custom settings. + +A custom setting name must begin with one of predefined prefixes. The list of these prefixes must be declared in the [custom_settings_prefixes](../../operations/server-configuration-parameters/settings.md#custom_settings_prefixes) parameter in the server configuration file. + +```xml +custom_ +``` + +To define a custom setting use `SET` command: + +```sql +SET custom_a = 123; +``` + +To get the current value of a custom setting use `getSetting()` function: + +```sql +SELECT getSetting('custom_a'); +``` + +**See Also** + +- [Server Configuration Settings](../../operations/server-configuration-parameters/settings.md) + [Original article](https://clickhouse.tech/docs/en/operations/settings/) diff --git a/docs/en/operations/settings/settings.md b/docs/en/operations/settings/settings.md index e97a418f1ed..decaf6b9029 100644 --- a/docs/en/operations/settings/settings.md +++ b/docs/en/operations/settings/settings.md @@ -1144,9 +1144,9 @@ See also: ## insert\_quorum\_timeout {#settings-insert_quorum_timeout} -Write to quorum timeout in seconds. If the timeout has passed and no write has taken place yet, ClickHouse will generate an exception and the client must repeat the query to write the same block to the same or any other replica. +Write to quorum timeout in milliseconds. If the timeout has passed and no write has taken place yet, ClickHouse will generate an exception and the client must repeat the query to write the same block to the same or any other replica. -Default value: 60 seconds. +Default value: 600000 milliseconds (ten minutes). See also: diff --git a/docs/en/sql-reference/functions/machine-learning-functions.md b/docs/en/sql-reference/functions/machine-learning-functions.md index 9de3854c3e3..8627fc26bad 100644 --- a/docs/en/sql-reference/functions/machine-learning-functions.md +++ b/docs/en/sql-reference/functions/machine-learning-functions.md @@ -16,3 +16,82 @@ The [stochasticLinearRegression](../../sql-reference/aggregate-functions/referen ## stochasticLogisticRegression {#stochastic-logistic-regression} The [stochasticLogisticRegression](../../sql-reference/aggregate-functions/reference/stochasticlogisticregression.md#agg_functions-stochasticlogisticregression) aggregate function implements stochastic gradient descent method for binary classification problem. Uses `evalMLMethod` to predict on new data. + +## bayesAB {#bayesab} + +Compares test groups (variants) and calculates for each group the probability to be the best one. The first group is used as a control group. + +**Syntax** + +``` sql +bayesAB(distribution_name, higher_is_better, variant_names, x, y) +``` + +**Parameters** + +- `distribution_name` — Name of the probability distribution. [String](../../sql-reference/data-types/string.md). Possible values: + + - `beta` for [Beta distribution](https://en.wikipedia.org/wiki/Beta_distribution) + - `gamma` for [Gamma distribution](https://en.wikipedia.org/wiki/Gamma_distribution) + +- `higher_is_better` — Boolean flag. [Boolean](../../sql-reference/data-types/boolean.md). Possible values: + + - `0` - lower values are considered to be better than higher + - `1` - higher values are considered to be better than lower + +- `variant_names` - Variant names. [Array](../../sql-reference/data-types/array.md)([String](../../sql-reference/data-types/string.md)). + +- `x` - Numbers of tests for the corresponding variants. [Array](../../sql-reference/data-types/array.md)([Float64](../../sql-reference/data-types/float.md)). + +- `y` - Numbers of successful tests for the corresponding variants. [Array](../../sql-reference/data-types/array.md)([Float64](../../sql-reference/data-types/float.md)). + +!!! note "Note" + All three arrays must have the same size. All `x` and `y` values must be non-negative constant numbers. `y` cannot be larger than `x`. + +**Returned values** + +For each variant the function calculates: +- `beats_control` - long-term probability to out-perform the first (control) variant +- `to_be_best` - long-term probability to out-perform all other variants + +Type: JSON. + +**Example** + +Query: + +``` sql +SELECT bayesAB('beta', 1, ['Control', 'A', 'B'], [3000., 3000., 3000.], [100., 90., 110.]) FORMAT PrettySpace; +``` + +Result: + +``` text +{ + "data":[ + { + "variant_name":"Control", + "x":3000, + "y":100, + "beats_control":0, + "to_be_best":0.22619 + }, + { + "variant_name":"A", + "x":3000, + "y":90, + "beats_control":0.23469, + "to_be_best":0.04671 + }, + { + "variant_name":"B", + "x":3000, + "y":110, + "beats_control":0.7580899999999999, + "to_be_best":0.7271 + } + ] +} +``` + +[Original article](https://clickhouse.tech/docs/en/query_language/functions/machine-learning-functions/) diff --git a/docs/en/sql-reference/functions/other-functions.md b/docs/en/sql-reference/functions/other-functions.md index 1c059e9f97b..3d06bbfff9e 100644 --- a/docs/en/sql-reference/functions/other-functions.md +++ b/docs/en/sql-reference/functions/other-functions.md @@ -1491,4 +1491,40 @@ Result: ``` +## getSetting {#getSetting} + +Returns the current value of a [custom setting](../../operations/settings/index.md#custom_settings). + +**Syntax** + +```sql +getSetting('custom_setting'); +``` + +**Parameter** + +- `custom_setting` — The setting name. [String](../../sql-reference/data-types/string.md). + +**Returned value** + +- The setting current value. + +**Example** + +```sql +SET custom_a = 123; +SELECT getSetting('custom_a'); +``` + +**Result** + +``` +123 +``` + +**See Also** + +- [Custom Settings](../../operations/settings/index.md#custom_settings) + + [Original article](https://clickhouse.tech/docs/en/query_language/functions/other_functions/) diff --git a/docs/en/sql-reference/functions/string-functions.md b/docs/en/sql-reference/functions/string-functions.md index 8ed3b073fa8..a31237ecfb5 100644 --- a/docs/en/sql-reference/functions/string-functions.md +++ b/docs/en/sql-reference/functions/string-functions.md @@ -487,4 +487,75 @@ Returns the CRC64 checksum of a string, using CRC-64-ECMA polynomial. The result type is UInt64. +## normalizeQuery {#normalized-query} + +Replaces literals, sequences of literals and complex aliases with placeholders. + +**Syntax** +``` sql +normalizeQuery(x) +``` + +**Parameters** + +- `x` — Sequence of characters. [String](../../sql-reference/data-types/string.md). + +**Returned value** + +- Sequence of characters with placeholders. + +Type: [String](../../sql-reference/data-types/string.md). + +**Example** + +Query: + +``` sql +SELECT normalizeQuery('[1, 2, 3, x]') AS query; +``` + +Result: + +``` text +┌─query────┐ +│ [?.., x] │ +└──────────┘ +``` + +## normalizedQueryHash {#normalized-query-hash} + +Returns identical 64bit hash values without the values of literals for similar queries. It helps to analyze query log. + +**Syntax** + +``` sql +normalizedQueryHash(x) +``` + +**Parameters** + +- `x` — Sequence of characters. [String](../../sql-reference/data-types/string.md). + +**Returned value** + +- Hash value. + +Type: [UInt64](../../sql-reference/data-types/int-uint.md#uint-ranges). + +**Example** + +Query: + +``` sql +SELECT normalizedQueryHash('SELECT 1 AS `xyz`') != normalizedQueryHash('SELECT 1 AS `abc`') AS res; +``` + +Result: + +``` text +┌─res─┐ +│ 1 │ +└─────┘ +``` + [Original article](https://clickhouse.tech/docs/en/query_language/functions/string_functions/) diff --git a/docs/en/sql-reference/functions/type-conversion-functions.md b/docs/en/sql-reference/functions/type-conversion-functions.md index d4518c1f45b..29b164545fc 100644 --- a/docs/en/sql-reference/functions/type-conversion-functions.md +++ b/docs/en/sql-reference/functions/type-conversion-functions.md @@ -735,4 +735,45 @@ SELECT fromUnixTimestamp64Milli(i64, 'UTC') └──────────────────────────────────────┘ ``` +## formatRow {#formatrow} + +Converts arbitrary expressions into a string via given format. + +**Syntax** + +``` sql +formatRow(format, x, y, ...) +``` + +**Parameters** + +- `format` — Text format. For example, [CSV](../../interfaces/formats.md#csv), [TSV](../../interfaces/formats.md#tabseparated). +- `x`,`y`, ... — Expressions. + +**Returned value** + +- A formatted string (for text formats it's usually terminated with the new line character). + +**Example** + +Query: + +``` sql +SELECT formatRow('CSV', number, 'good') +FROM numbers(3) +``` + +Result: + +``` text +┌─formatRow('CSV', number, 'good')─┐ +│ 0,"good" + │ +│ 1,"good" + │ +│ 2,"good" + │ +└──────────────────────────────────┘ +``` + [Original article](https://clickhouse.tech/docs/en/query_language/functions/type_conversion_functions/) diff --git a/docs/ru/sql-reference/functions/string-functions.md b/docs/ru/sql-reference/functions/string-functions.md index a9e254f215b..134c096de84 100644 --- a/docs/ru/sql-reference/functions/string-functions.md +++ b/docs/ru/sql-reference/functions/string-functions.md @@ -479,4 +479,75 @@ SELECT trimBoth(' Hello, world! ') Тип результата — UInt64. +## normalizeQuery {#normalized-query} + +Заменяет литералы, последовательности литералов и сложные псевдонимы заполнителями. + +**Синтаксис** +``` sql +normalizeQuery(x) +``` + +**Параметры** + +- `x` — Последовательность символов. [String](../../sql-reference/data-types/string.md). + +**Возвращаемое значение** + +- Последовательность символов с заполнителями. + +Тип: [String](../../sql-reference/data-types/string.md). + +**Пример** + +Запрос: + +``` sql +SELECT normalizeQuery('[1, 2, 3, x]') AS query; +``` + +Результат: + +``` text +┌─query────┐ +│ [?.., x] │ +└──────────┘ +``` + +## normalizedQueryHash {#normalized-query-hash} + +Возвращает идентичные 64-битные хэш - суммы без значений литералов для аналогичных запросов. Это помогает анализировать журнал запросов. + +**Синтаксис** + +``` sql +normalizedQueryHash(x) +``` + +**Параметры** + +- `x` — Последовательность символов. [String](../../sql-reference/data-types/string.md). + +**Возвращаемое значение** + +- Хэш-сумма. + +Тип: [UInt64](../../sql-reference/data-types/int-uint.md#uint-ranges). + +**Пример** + +Запрос: + +``` sql +SELECT normalizedQueryHash('SELECT 1 AS `xyz`') != normalizedQueryHash('SELECT 1 AS `abc`') AS res; +``` + +Результат: + +``` text +┌─res─┐ +│ 1 │ +└─────┘ +``` + [Оригинальная статья](https://clickhouse.tech/docs/ru/query_language/functions/string_functions/) diff --git a/docs/ru/sql-reference/functions/type-conversion-functions.md b/docs/ru/sql-reference/functions/type-conversion-functions.md index 0440d70902c..7571fcf6c0b 100644 --- a/docs/ru/sql-reference/functions/type-conversion-functions.md +++ b/docs/ru/sql-reference/functions/type-conversion-functions.md @@ -723,4 +723,44 @@ SELECT toLowCardinality('1') └───────────────────────┘ ``` +## formatRow {#formatrow} + +Преобразует произвольные выражения в строку заданного формата. + +**Синтаксис** + +``` sql +formatRow(format, x, y, ...) +``` + +**Параметры** + +- `format` — Текстовый формат. Например, [CSV](../../interfaces/formats.md#csv), [TSV](../../interfaces/formats.md#tabseparated). +- `x`,`y`, ... — Выражения. + +**Возвращаемое значение** + +- Отформатированная строка (в текстовых форматах обычно с завершающим переводом строки). + +**Пример** + +Запрос: + +``` sql +SELECT formatRow('CSV', number, 'good') +FROM numbers(3) +``` + +Ответ: + +``` text +┌─formatRow('CSV', number, 'good')─┐ +│ 0,"good" + │ +│ 1,"good" + │ +│ 2,"good" + │ +└──────────────────────────────────┘ +``` [Оригинальная статья](https://clickhouse.tech/docs/ru/query_language/functions/type_conversion_functions/) diff --git a/programs/main.cpp b/programs/main.cpp index b91bd732f21..fad2d35f3bd 100644 --- a/programs/main.cpp +++ b/programs/main.cpp @@ -2,10 +2,15 @@ #include #include +#ifdef __linux__ +#include +#endif + #include #include #include #include +#include #include /// pair #if !defined(ARCADIA_BUILD) @@ -57,6 +62,7 @@ int mainEntryClickHouseStatus(int argc, char ** argv); int mainEntryClickHouseRestart(int argc, char ** argv); #endif +#define ARRAY_SIZE(a) (sizeof(a)/sizeof((a)[0])) namespace { @@ -150,28 +156,29 @@ enum class InstructionFail AVX512 = 8 }; -const char * instructionFailToString(InstructionFail fail) +std::pair instructionFailToString(InstructionFail fail) { switch (fail) { +#define ret(x) return std::make_pair(x, ARRAY_SIZE(x) - 1) case InstructionFail::NONE: - return "NONE"; + ret("NONE"); case InstructionFail::SSE3: - return "SSE3"; + ret("SSE3"); case InstructionFail::SSSE3: - return "SSSE3"; + ret("SSSE3"); case InstructionFail::SSE4_1: - return "SSE4.1"; + ret("SSE4.1"); case InstructionFail::SSE4_2: - return "SSE4.2"; + ret("SSE4.2"); case InstructionFail::POPCNT: - return "POPCNT"; + ret("POPCNT"); case InstructionFail::AVX: - return "AVX"; + ret("AVX"); case InstructionFail::AVX2: - return "AVX2"; + ret("AVX2"); case InstructionFail::AVX512: - return "AVX512"; + ret("AVX512"); } __builtin_unreachable(); } @@ -238,7 +245,7 @@ void checkRequiredInstructionsImpl(volatile InstructionFail & fail) } /// This function is safe to use in static initializers. -void writeError(const char * data, size_t size) +void writeErrorLen(const char * data, size_t size) { while (size != 0) { @@ -254,6 +261,12 @@ void writeError(const char * data, size_t size) } } } +/// Macros to avoid using strlen(), since it may fail if SSE is not supported. +#define writeError(data) do \ + { \ + static_assert(__builtin_constant_p(data)); \ + writeErrorLen(data, ARRAY_SIZE(data) - 1); \ + } while (false) /// Check SSE and others instructions availability. Calls exit on fail. /// This function must be called as early as possible, even before main, because static initializers may use unavailable instructions. @@ -272,8 +285,7 @@ void checkRequiredInstructions() /// Typical implementation of strlen is using SSE4.2 or AVX2. /// But this is not the case because it's compiler builtin and is executed at compile time. - const char * msg = "Can not set signal handler\n"; - writeError(msg, strlen(msg)); + writeError("Can not set signal handler\n"); _Exit(1); } @@ -281,12 +293,9 @@ void checkRequiredInstructions() if (sigsetjmp(jmpbuf, 1)) { - const char * msg1 = "Instruction check fail. The CPU does not support "; - writeError(msg1, strlen(msg1)); - const char * msg2 = instructionFailToString(fail); - writeError(msg2, strlen(msg2)); - const char * msg3 = " instruction set.\n"; - writeError(msg3, strlen(msg3)); + writeError("Instruction check fail. The CPU does not support "); + std::apply(writeErrorLen, instructionFailToString(fail)); + writeError(" instruction set.\n"); _Exit(1); } @@ -294,13 +303,60 @@ void checkRequiredInstructions() if (sigaction(signal, &sa_old, nullptr)) { - const char * msg = "Can not set signal handler\n"; - writeError(msg, strlen(msg)); + writeError("Can not set signal handler\n"); _Exit(1); } } -struct Checker { Checker() { checkRequiredInstructions(); } } checker; +#ifdef __linux__ +/// clickhouse uses jemalloc as a production allocator +/// and jemalloc relies on working MADV_DONTNEED, +/// which doesn't work under qemu +/// +/// but do this only under for linux, since only it return zeroed pages after MADV_DONTNEED +/// (and jemalloc assumes this too, see contrib/jemalloc-cmake/include_linux_x86_64/jemalloc/internal/jemalloc_internal_defs.h.in) +void checkRequiredMadviseFlags() +{ + size_t size = 1 << 16; + void * addr = mmap(nullptr, size, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0); + if (addr == MAP_FAILED) + { + writeError("Can not mmap pages for MADV_DONTNEED check\n"); + _Exit(1); + } + memset(addr, 'A', size); + + if (!madvise(addr, size, MADV_DONTNEED)) + { + /// Suboptimal, but should be simple. + for (size_t i = 0; i < size; ++i) + { + if (reinterpret_cast(addr)[i] != 0) + { + writeError("MADV_DONTNEED does not zeroed page. jemalloc will be broken\n"); + _Exit(1); + } + } + } + + if (munmap(addr, size)) + { + writeError("Can not munmap pages for MADV_DONTNEED check\n"); + _Exit(1); + } +} +#endif + +struct Checker +{ + Checker() + { + checkRequiredInstructions(); +#ifdef __linux__ + checkRequiredMadviseFlags(); +#endif + } +} checker; } diff --git a/src/AggregateFunctions/QuantileTDigest.h b/src/AggregateFunctions/QuantileTDigest.h index b90979c02b9..02d43ede66d 100644 --- a/src/AggregateFunctions/QuantileTDigest.h +++ b/src/AggregateFunctions/QuantileTDigest.h @@ -36,7 +36,7 @@ namespace ErrorCodes * uses asin, which slows down the algorithm a bit. */ template -class QuantileTDigest +class TDigest { using Value = Float32; using Count = Float32; @@ -86,20 +86,12 @@ class QuantileTDigest /// The memory will be allocated to several elements at once, so that the state occupies 64 bytes. static constexpr size_t bytes_in_arena = 128 - sizeof(PODArray) - sizeof(Count) - sizeof(UInt32); - using Summary = PODArrayWithStackMemory; + using Centroids = PODArrayWithStackMemory; - Summary summary; + Centroids centroids; Count count = 0; UInt32 unmerged = 0; - /** Linear interpolation at the point x on the line (x1, y1)..(x2, y2) - */ - static Value interpolate(Value x, Value x1, Value y1, Value x2, Value y2) - { - double k = (x - x1) / (x2 - x1); - return y1 + k * (y2 - y1); - } - struct RadixSortTraits { using Element = Centroid; @@ -122,13 +114,14 @@ class QuantileTDigest */ void addCentroid(const Centroid & c) { - summary.push_back(c); + centroids.push_back(c); count += c.count; ++unmerged; if (unmerged >= params.max_unmerged) compress(); } +public: /** Performs compression of accumulated centroids * When merging, the invariant is retained to the maximum size of each * centroid that does not exceed `4 q (1 - q) \ delta N`. @@ -137,16 +130,16 @@ class QuantileTDigest { if (unmerged > 0) { - RadixSort::executeLSD(summary.data(), summary.size()); + RadixSort::executeLSD(centroids.data(), centroids.size()); - if (summary.size() > 3) + if (centroids.size() > 3) { /// A pair of consecutive bars of the histogram. - auto l = summary.begin(); + auto l = centroids.begin(); auto r = std::next(l); Count sum = 0; - while (r != summary.end()) + while (r != centroids.end()) { // we use quantile which gives us the smallest error @@ -188,14 +181,13 @@ class QuantileTDigest } /// At the end of the loop, all values to the right of l were "eaten". - summary.resize(l - summary.begin() + 1); + centroids.resize(l - centroids.begin() + 1); } unmerged = 0; } } -public: /** Adds to the digest a change in `x` with a weight of `cnt` (default 1) */ void add(T x, UInt64 cnt = 1) @@ -203,17 +195,17 @@ public: addCentroid(Centroid(Value(x), Count(cnt))); } - void merge(const QuantileTDigest & other) + void merge(const TDigest & other) { - for (const auto & c : other.summary) + for (const auto & c : other.centroids) addCentroid(c); } void serialize(WriteBuffer & buf) { compress(); - writeVarUInt(summary.size(), buf); - buf.write(reinterpret_cast(summary.data()), summary.size() * sizeof(summary[0])); + writeVarUInt(centroids.size(), buf); + buf.write(reinterpret_cast(centroids.data()), centroids.size() * sizeof(centroids[0])); } void deserialize(ReadBuffer & buf) @@ -222,36 +214,113 @@ public: readVarUInt(size, buf); if (size > params.max_unmerged) - throw Exception("Too large t-digest summary size", ErrorCodes::TOO_LARGE_ARRAY_SIZE); + throw Exception("Too large t-digest centroids size", ErrorCodes::TOO_LARGE_ARRAY_SIZE); - summary.resize(size); - buf.read(reinterpret_cast(summary.data()), size * sizeof(summary[0])); + centroids.resize(size); + buf.read(reinterpret_cast(centroids.data()), size * sizeof(centroids[0])); count = 0; - for (const auto & c : summary) + for (const auto & c : centroids) count += c.count; } + Count getCount() + { + return count; + } + + const Centroids & getCentroids() const + { + return centroids; + } + + void reset() + { + centroids.resize(0); + count = 0; + unmerged = 0; + } +}; + +template +class QuantileTDigest +{ + using Value = Float32; + using Count = Float32; + + /** We store two t-digests. When an amount of elements in sub_tdigest become more than merge_threshold + * we merge sub_tdigest in main_tdigest and reset sub_tdigest. This method is needed to decrease an amount of + * centroids in t-digest (experiments show that after merge_threshold the size of t-digest significantly grows, + * but merging two big t-digest decreases it). + */ + TDigest main_tdigest; + TDigest sub_tdigest; + size_t merge_threshold = 1e7; + + /** Linear interpolation at the point x on the line (x1, y1)..(x2, y2) + */ + static Value interpolate(Value x, Value x1, Value y1, Value x2, Value y2) + { + double k = (x - x1) / (x2 - x1); + return y1 + k * (y2 - y1); + } + + void mergeTDigests() + { + main_tdigest.merge(sub_tdigest); + sub_tdigest.reset(); + } + +public: + void add(T x, UInt64 cnt = 1) + { + if (sub_tdigest.getCount() >= merge_threshold) + mergeTDigests(); + sub_tdigest.add(x, cnt); + } + + void merge(const QuantileTDigest & other) + { + mergeTDigests(); + main_tdigest.merge(other.main_tdigest); + main_tdigest.merge(other.sub_tdigest); + } + + void serialize(WriteBuffer & buf) + { + mergeTDigests(); + main_tdigest.serialize(buf); + } + + void deserialize(ReadBuffer & buf) + { + sub_tdigest.reset(); + main_tdigest.deserialize(buf); + } + /** Calculates the quantile q [0, 1] based on the digest. * For an empty digest returns NaN. */ template ResultType getImpl(Float64 level) { - if (summary.empty()) + mergeTDigests(); + + auto & centroids = main_tdigest.getCentroids(); + if (centroids.empty()) return std::is_floating_point_v ? NAN : 0; - compress(); + main_tdigest.compress(); - if (summary.size() == 1) - return summary.front().mean; + if (centroids.size() == 1) + return centroids.front().mean; - Float64 x = level * count; + Float64 x = level * main_tdigest.getCount(); Float64 prev_x = 0; Count sum = 0; - Value prev_mean = summary.front().mean; + Value prev_mean = centroids.front().mean; - for (const auto & c : summary) + for (const auto & c : centroids) { Float64 current_x = sum + c.count * 0.5; @@ -263,7 +332,7 @@ public: prev_x = current_x; } - return summary.back().mean; + return centroids.back().mean; } /** Get multiple quantiles (`size` parts). @@ -274,29 +343,32 @@ public: template void getManyImpl(const Float64 * levels, const size_t * levels_permutation, size_t size, ResultType * result) { - if (summary.empty()) + mergeTDigests(); + + auto & centroids = main_tdigest.getCentroids(); + if (centroids.empty()) { for (size_t result_num = 0; result_num < size; ++result_num) result[result_num] = std::is_floating_point_v ? NAN : 0; return; } - compress(); + main_tdigest.compress(); - if (summary.size() == 1) + if (centroids.size() == 1) { for (size_t result_num = 0; result_num < size; ++result_num) - result[result_num] = summary.front().mean; + result[result_num] = centroids.front().mean; return; } - Float64 x = levels[levels_permutation[0]] * count; + Float64 x = levels[levels_permutation[0]] * main_tdigest.getCount(); Float64 prev_x = 0; Count sum = 0; - Value prev_mean = summary.front().mean; + Value prev_mean = centroids.front().mean; size_t result_num = 0; - for (const auto & c : summary) + for (const auto & c : centroids) { Float64 current_x = sum + c.count * 0.5; @@ -308,7 +380,7 @@ public: if (result_num >= size) return; - x = levels[levels_permutation[result_num]] * count; + x = levels[levels_permutation[result_num]] * main_tdigest.getCount(); } sum += c.count; @@ -316,7 +388,7 @@ public: prev_x = current_x; } - auto rest_of_results = summary.back().mean; + auto rest_of_results = centroids.back().mean; for (; result_num < size; ++result_num) result[levels_permutation[result_num]] = rest_of_results; } diff --git a/src/Common/Exception.h b/src/Common/Exception.h index 763b90048bb..314c59cbf51 100644 --- a/src/Common/Exception.h +++ b/src/Common/Exception.h @@ -22,10 +22,14 @@ public: Exception() = default; Exception(const std::string & msg, int code); + Exception(int code, const std::string & message) + : Exception(message, code) + {} + // Format message with fmt::format, like the logging functions. - template - Exception(int code, Fmt&&... fmt) - : Exception(fmt::format(std::forward(fmt)...), code) + template + Exception(int code, const std::string & fmt, Args&&... args) + : Exception(fmt::format(fmt, std::forward(args)...), code) {} struct CreateFromPocoTag {}; @@ -40,7 +44,16 @@ public: const char * what() const throw() override { return message().data(); } /// Add something to the existing message. - void addMessage(const std::string & arg) { extendedMessage(arg); } + template + void addMessage(const std::string& format, Args&&... args) + { + extendedMessage(fmt::format(format, std::forward(args)...)); + } + + void addMessage(const std::string& message) + { + extendedMessage(message); + } std::string getStackTraceString() const; diff --git a/src/Common/filesystemHelpers.cpp b/src/Common/filesystemHelpers.cpp index e722fbc9c0f..51a66fba3aa 100644 --- a/src/Common/filesystemHelpers.cpp +++ b/src/Common/filesystemHelpers.cpp @@ -5,19 +5,37 @@ # include # include #endif +#include #include #include #include + namespace DB { + namespace ErrorCodes { extern const int LOGICAL_ERROR; extern const int SYSTEM_ERROR; extern const int NOT_IMPLEMENTED; + extern const int CANNOT_STATVFS; } + +struct statvfs getStatVFS(const String & path) +{ + struct statvfs fs; + while (statvfs(path.c_str(), &fs) != 0) + { + if (errno == EINTR) + continue; + throwFromErrnoWithPath("Could not calculate available disk space (statvfs)", path, ErrorCodes::CANNOT_STATVFS); + } + return fs; +} + + bool enoughSpaceInDirectory(const std::string & path [[maybe_unused]], size_t data_size [[maybe_unused]]) { #if POCO_VERSION >= 0x01090000 @@ -46,7 +64,7 @@ std::filesystem::path getMountPoint(std::filesystem::path absolute_path) const auto get_device_id = [](const std::filesystem::path & p) { struct stat st; - if (stat(p.c_str(), &st)) + if (stat(p.c_str(), &st)) /// NOTE: man stat does not list EINTR as possible error throwFromErrnoWithPath("Cannot stat " + p.string(), p.string(), ErrorCodes::SYSTEM_ERROR); return st.st_dev; }; diff --git a/src/Common/filesystemHelpers.h b/src/Common/filesystemHelpers.h index f97f91d2647..f534b61808a 100644 --- a/src/Common/filesystemHelpers.h +++ b/src/Common/filesystemHelpers.h @@ -12,10 +12,6 @@ namespace DB { -namespace ErrorCodes -{ - extern const int CANNOT_STATVFS; -} using TemporaryFile = Poco::TemporaryFile; @@ -31,12 +27,6 @@ std::filesystem::path getMountPoint(std::filesystem::path absolute_path); #endif String getFilesystemName([[maybe_unused]] const String & mount_point); -inline struct statvfs getStatVFS(const String & path) -{ - struct statvfs fs; - if (statvfs(path.c_str(), &fs) != 0) - throwFromErrnoWithPath("Could not calculate available disk space (statvfs)", path, ErrorCodes::CANNOT_STATVFS); - return fs; -} +struct statvfs getStatVFS(const String & path); } diff --git a/src/Core/BaseSettings.h b/src/Core/BaseSettings.h index 7de87b345c1..b193fdd4c93 100644 --- a/src/Core/BaseSettings.h +++ b/src/Core/BaseSettings.h @@ -390,13 +390,21 @@ String BaseSettings::valueToStringUtil(const std::string_view & name, c template Field BaseSettings::stringToValueUtil(const std::string_view & name, const String & str) { - const auto & accessor = Traits::Accessor::instance(); - if (size_t index = accessor.find(name); index != static_cast(-1)) - return accessor.stringToValueUtil(index, str); - if constexpr (Traits::allow_custom_settings) - return Field::restoreFromDump(str); - else - BaseSettingsHelpers::throwSettingNotFound(name); + try + { + const auto & accessor = Traits::Accessor::instance(); + if (size_t index = accessor.find(name); index != static_cast(-1)) + return accessor.stringToValueUtil(index, str); + if constexpr (Traits::allow_custom_settings) + return Field::restoreFromDump(str); + else + BaseSettingsHelpers::throwSettingNotFound(name); + } + catch (Exception & e) + { + e.addMessage("while parsing value '{}' for setting '{}'", str, name); + throw; + } } template diff --git a/src/Core/Protocol.h b/src/Core/Protocol.h index a370a29dac8..f383e509751 100644 --- a/src/Core/Protocol.h +++ b/src/Core/Protocol.h @@ -146,6 +146,7 @@ namespace Protocol "Ping", "TablesStatusRequest", "KeepAlive", + "Scalar", }; return packet <= MAX ? data[packet] diff --git a/src/Core/Settings.h b/src/Core/Settings.h index 6eb853828c5..3ecb79c3fce 100644 --- a/src/Core/Settings.h +++ b/src/Core/Settings.h @@ -469,6 +469,7 @@ class IColumn; M(Bool, output_format_enable_streaming, false, "Enable streaming in output formats that support it.", 0) \ M(Bool, output_format_write_statistics, true, "Write statistics about read rows, bytes, time elapsed in suitable output formats.", 0) \ M(Bool, allow_non_metadata_alters, true, "Allow to execute alters which affects not only tables metadata, but also data on disk", 0) \ + M(Bool, output_format_pretty_row_numbers, false, "Add row numbers before each row for pretty output format", 0) \ #define LIST_OF_SETTINGS(M) \ COMMON_SETTINGS(M) \ diff --git a/src/Disks/S3/DiskS3.cpp b/src/Disks/S3/DiskS3.cpp index 6abb72efeb0..2705040841c 100644 --- a/src/Disks/S3/DiskS3.cpp +++ b/src/Disks/S3/DiskS3.cpp @@ -18,7 +18,7 @@ #include #include -#include +#include #include #include @@ -36,6 +36,32 @@ namespace ErrorCodes extern const int NOT_IMPLEMENTED; } + +/// Helper class to collect keys into chunks of maximum size (to prepare batch requests to AWS API) +class DiskS3::AwsS3KeyKeeper : public std::list> +{ +public: + void addKey(const String & key); + +private: + /// limit for one DeleteObject request + /// see https://docs.aws.amazon.com/AmazonS3/latest/API/API_DeleteObjects.html + const static size_t chunk_limit = 1000; +}; + +void DiskS3::AwsS3KeyKeeper::addKey(const String & key) +{ + if (empty() || back().size() >= chunk_limit) + { /// add one more chunk + push_back(value_type()); + back().reserve(chunk_limit); + } + + Aws::S3::Model::ObjectIdentifier obj; + obj.SetKey(key); + back().push_back(obj); +} + namespace { String getRandomName() @@ -634,7 +660,7 @@ std::unique_ptr DiskS3::writeFile(const String & path, } } -void DiskS3::remove(const String & path) +void DiskS3::removeMeta(const String & path, AwsS3KeyKeeper & keys) { LOG_DEBUG(&Poco::Logger::get("DiskS3"), "Remove file by path: {}", backQuote(metadata_path + path)); @@ -647,14 +673,9 @@ void DiskS3::remove(const String & path) if (metadata.ref_count == 0) { file.remove(); + for (const auto & [s3_object_path, _] : metadata.s3_objects) - { - /// TODO: Make operation idempotent. Do not throw exception if key is already deleted. - Aws::S3::Model::DeleteObjectRequest request; - request.SetBucket(bucket); - request.SetKey(s3_root_path + s3_object_path); - throwIfError(client->DeleteObject(request)); - } + keys.addKey(s3_root_path + s3_object_path); } else /// In other case decrement number of references, save metadata and delete file. { @@ -665,25 +686,57 @@ void DiskS3::remove(const String & path) } else file.remove(); + } -void DiskS3::removeRecursive(const String & path) +void DiskS3::removeMetaRecursive(const String & path, AwsS3KeyKeeper & keys) { checkStackSize(); /// This is needed to prevent stack overflow in case of cyclic symlinks. Poco::File file(metadata_path + path); if (file.isFile()) { - remove(path); + removeMeta(path, keys); } else { for (auto it{iterateDirectory(path)}; it->isValid(); it->next()) - removeRecursive(it->path()); + removeMetaRecursive(it->path(), keys); file.remove(); } } +void DiskS3::removeAws(const AwsS3KeyKeeper & keys) +{ + if (!keys.empty()) + { + for (const auto & chunk : keys) + { + Aws::S3::Model::Delete delkeys; + delkeys.SetObjects(chunk); + + /// TODO: Make operation idempotent. Do not throw exception if key is already deleted. + Aws::S3::Model::DeleteObjectsRequest request; + request.SetBucket(bucket); + request.SetDelete(delkeys); + throwIfError(client->DeleteObjects(request)); + } + } +} + +void DiskS3::remove(const String & path) +{ + AwsS3KeyKeeper keys; + removeMeta(path, keys); + removeAws(keys); +} + +void DiskS3::removeRecursive(const String & path) +{ + AwsS3KeyKeeper keys; + removeMetaRecursive(path, keys); + removeAws(keys); +} bool DiskS3::tryReserve(UInt64 bytes) { diff --git a/src/Disks/S3/DiskS3.h b/src/Disks/S3/DiskS3.h index 2d9c7f79865..fe8c47931b5 100644 --- a/src/Disks/S3/DiskS3.h +++ b/src/Disks/S3/DiskS3.h @@ -21,6 +21,8 @@ class DiskS3 : public IDisk public: friend class DiskS3Reservation; + class AwsS3KeyKeeper; + DiskS3( String name_, std::shared_ptr client_, @@ -111,6 +113,10 @@ public: private: bool tryReserve(UInt64 bytes); + void removeMeta(const String & path, AwsS3KeyKeeper & keys); + void removeMetaRecursive(const String & path, AwsS3KeyKeeper & keys); + void removeAws(const AwsS3KeyKeeper & keys); + private: const String name; std::shared_ptr client; diff --git a/src/Formats/FormatFactory.cpp b/src/Formats/FormatFactory.cpp index 368b760044d..065b14f86b7 100644 --- a/src/Formats/FormatFactory.cpp +++ b/src/Formats/FormatFactory.cpp @@ -107,6 +107,7 @@ static FormatSettings getOutputFormatSetting(const Settings & settings, const Co format_settings.pretty.charset = settings.output_format_pretty_grid_charset.toString() == "ASCII" ? FormatSettings::Pretty::Charset::ASCII : FormatSettings::Pretty::Charset::UTF8; + format_settings.pretty.output_format_pretty_row_numbers = settings.output_format_pretty_row_numbers; format_settings.template_settings.resultset_format = settings.format_template_resultset; format_settings.template_settings.row_format = settings.format_template_row; format_settings.template_settings.row_between_delimiter = settings.format_template_rows_between_delimiter; diff --git a/src/Formats/FormatSettings.h b/src/Formats/FormatSettings.h index cd5cab8cf5a..a97bd9bf6c6 100644 --- a/src/Formats/FormatSettings.h +++ b/src/Formats/FormatSettings.h @@ -45,6 +45,8 @@ struct FormatSettings UInt64 max_value_width = 10000; bool color = true; + bool output_format_pretty_row_numbers = false; + enum class Charset { UTF8, diff --git a/src/Functions/FunctionDateOrDateTimeToSomething.h b/src/Functions/FunctionDateOrDateTimeToSomething.h index f50cab25d2a..2fde07416a3 100644 --- a/src/Functions/FunctionDateOrDateTimeToSomething.h +++ b/src/Functions/FunctionDateOrDateTimeToSomething.h @@ -71,7 +71,9 @@ public: if constexpr (std::is_same_v) { std::string time_zone = extractTimeZoneNameFromFunctionArguments(arguments, 1, 0); - if (time_zone.empty()) + /// only validate the time_zone part if the number of arguments is 2. This is mainly + /// to accommodate functions like toStartOfDay(today()), toStartOfDay(yesterday()) etc. + if (arguments.size() == 2 && time_zone.empty()) throw Exception( "Function " + getName() + " supports a 2nd argument (optional) that must be non-empty and be a valid time zone", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); diff --git a/src/Functions/FunctionsJSON.h b/src/Functions/FunctionsJSON.h index 40498353195..a5c88cad1d6 100644 --- a/src/Functions/FunctionsJSON.h +++ b/src/Functions/FunctionsJSON.h @@ -279,7 +279,7 @@ public: String getName() const override { return Name::name; } bool isVariadic() const override { return true; } size_t getNumberOfArguments() const override { return 0; } - bool useDefaultImplementationForConstants() const override { return false; } + bool useDefaultImplementationForConstants() const override { return true; } DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override { diff --git a/src/Interpreters/ClusterProxy/executeQuery.cpp b/src/Interpreters/ClusterProxy/executeQuery.cpp index 1ebd3009ff7..eb636395fd9 100644 --- a/src/Interpreters/ClusterProxy/executeQuery.cpp +++ b/src/Interpreters/ClusterProxy/executeQuery.cpp @@ -15,18 +15,24 @@ namespace DB namespace ClusterProxy { -Context removeUserRestrictionsFromSettings(const Context & context, const Settings & settings, Poco::Logger * log) +Context updateSettingsForCluster(const Cluster & cluster, const Context & context, const Settings & settings, Poco::Logger * log) { Settings new_settings = settings; new_settings.queue_max_wait_ms = Cluster::saturate(new_settings.queue_max_wait_ms, settings.max_execution_time); - /// Does not matter on remote servers, because queries are sent under different user. - new_settings.max_concurrent_queries_for_user = 0; - new_settings.max_memory_usage_for_user = 0; + /// If "secret" (in remote_servers) is not in use, + /// user on the shard is not the same as the user on the initiator, + /// hence per-user limits should not be applied. + if (cluster.getSecret().empty()) + { + /// Does not matter on remote servers, because queries are sent under different user. + new_settings.max_concurrent_queries_for_user = 0; + new_settings.max_memory_usage_for_user = 0; - /// Set as unchanged to avoid sending to remote server. - new_settings.max_concurrent_queries_for_user.changed = false; - new_settings.max_memory_usage_for_user.changed = false; + /// Set as unchanged to avoid sending to remote server. + new_settings.max_concurrent_queries_for_user.changed = false; + new_settings.max_memory_usage_for_user.changed = false; + } if (settings.force_optimize_skip_unused_shards_nesting && settings.force_optimize_skip_unused_shards) { @@ -84,7 +90,7 @@ Pipe executeQuery( const std::string query = queryToString(query_ast); - Context new_context = removeUserRestrictionsFromSettings(context, settings, log); + Context new_context = updateSettingsForCluster(*cluster, context, settings, log); ThrottlerPtr user_level_throttler; if (auto * process_list_element = context.getProcessListElement()) diff --git a/src/Interpreters/ClusterProxy/executeQuery.h b/src/Interpreters/ClusterProxy/executeQuery.h index f0d9539770d..3a3fca4d54c 100644 --- a/src/Interpreters/ClusterProxy/executeQuery.h +++ b/src/Interpreters/ClusterProxy/executeQuery.h @@ -18,9 +18,16 @@ namespace ClusterProxy class IStreamFactory; -/// removes different restrictions (like max_concurrent_queries_for_user, max_memory_usage_for_user, etc.) -/// from settings and creates new context with them -Context removeUserRestrictionsFromSettings(const Context & context, const Settings & settings, Poco::Logger * log = nullptr); +/// Update settings for Distributed query. +/// +/// - Removes different restrictions (like max_concurrent_queries_for_user, max_memory_usage_for_user, etc.) +/// (but only if cluster does not have secret, since if it has, the user is the same) +/// - Update some settings depends on force_optimize_skip_unused_shards and: +/// - force_optimize_skip_unused_shards_nesting +/// - optimize_skip_unused_shards_nesting +/// +/// @return new Context with adjusted settings +Context updateSettingsForCluster(const Cluster & cluster, const Context & context, const Settings & settings, Poco::Logger * log = nullptr); /// Execute a distributed query, creating a vector of BlockInputStreams, from which the result can be read. /// `stream_factory` object encapsulates the logic of creating streams for a different type of query diff --git a/src/Processors/Formats/Impl/PrettyBlockOutputFormat.cpp b/src/Processors/Formats/Impl/PrettyBlockOutputFormat.cpp index 9a9c150fad4..b07cf83eb85 100644 --- a/src/Processors/Formats/Impl/PrettyBlockOutputFormat.cpp +++ b/src/Processors/Formats/Impl/PrettyBlockOutputFormat.cpp @@ -33,6 +33,10 @@ void PrettyBlockOutputFormat::calculateWidths( WidthsPerColumn & widths, Widths & max_padded_widths, Widths & name_widths) { size_t num_rows = std::min(chunk.getNumRows(), format_settings.pretty.max_rows); + + /// len(num_rows) + len(". ") + row_number_width = std::floor(std::log10(num_rows)) + 3; + size_t num_columns = chunk.getNumColumns(); const auto & columns = chunk.getColumns(); @@ -196,9 +200,20 @@ void PrettyBlockOutputFormat::write(const Chunk & chunk, PortKind port_kind) std::string middle_values_separator_s = middle_values_separator.str(); std::string bottom_separator_s = bottom_separator.str(); + if (format_settings.pretty.output_format_pretty_row_numbers) + { + /// Write left blank + writeString(String(row_number_width, ' '), out); + } /// Output the block writeString(top_separator_s, out); + if (format_settings.pretty.output_format_pretty_row_numbers) + { + /// Write left blank + writeString(String(row_number_width, ' '), out); + } + /// Names writeCString(grid_symbols.bold_bar, out); writeCString(" ", out); @@ -238,12 +253,35 @@ void PrettyBlockOutputFormat::write(const Chunk & chunk, PortKind port_kind) writeCString(grid_symbols.bold_bar, out); writeCString("\n", out); + if (format_settings.pretty.output_format_pretty_row_numbers) + { + /// Write left blank + writeString(String(row_number_width, ' '), out); + } writeString(middle_names_separator_s, out); for (size_t i = 0; i < num_rows && total_rows + i < max_rows; ++i) { if (i != 0) + { + if (format_settings.pretty.output_format_pretty_row_numbers) + { + /// Write left blank + writeString(String(row_number_width, ' '), out); + } writeString(middle_values_separator_s, out); + } + + if (format_settings.pretty.output_format_pretty_row_numbers) + { + // Write row number; + auto row_num_string = std::to_string(i + 1) + ". "; + for (size_t j = 0; j < row_number_width - row_num_string.size(); ++j) + { + writeCString(" ", out); + } + writeString(row_num_string, out); + } writeCString(grid_symbols.bar, out); @@ -262,6 +300,11 @@ void PrettyBlockOutputFormat::write(const Chunk & chunk, PortKind port_kind) writeCString("\n", out); } + if (format_settings.pretty.output_format_pretty_row_numbers) + { + /// Write left blank + writeString(String(row_number_width, ' '), out); + } writeString(bottom_separator_s, out); total_rows += num_rows; diff --git a/src/Processors/Formats/Impl/PrettyBlockOutputFormat.h b/src/Processors/Formats/Impl/PrettyBlockOutputFormat.h index 59628cf322c..de79fe5ee2a 100644 --- a/src/Processors/Formats/Impl/PrettyBlockOutputFormat.h +++ b/src/Processors/Formats/Impl/PrettyBlockOutputFormat.h @@ -33,6 +33,8 @@ protected: size_t terminal_width = 0; bool suffix_written = false; + size_t row_number_width = 7; // "10000. " + const FormatSettings format_settings; using Widths = PODArray; diff --git a/src/Processors/Formats/Impl/PrettyCompactBlockOutputFormat.cpp b/src/Processors/Formats/Impl/PrettyCompactBlockOutputFormat.cpp index 254bb9381f2..e832dd83388 100644 --- a/src/Processors/Formats/Impl/PrettyCompactBlockOutputFormat.cpp +++ b/src/Processors/Formats/Impl/PrettyCompactBlockOutputFormat.cpp @@ -69,6 +69,12 @@ void PrettyCompactBlockOutputFormat::writeHeader( const Widths & max_widths, const Widths & name_widths) { + if (format_settings.pretty.output_format_pretty_row_numbers) + { + /// Write left blank + writeString(String(row_number_width, ' '), out); + } + const GridSymbols & grid_symbols = format_settings.pretty.charset == FormatSettings::Pretty::Charset::UTF8 ? utf8_grid_symbols : ascii_grid_symbols; @@ -117,6 +123,12 @@ void PrettyCompactBlockOutputFormat::writeHeader( void PrettyCompactBlockOutputFormat::writeBottom(const Widths & max_widths) { + if (format_settings.pretty.output_format_pretty_row_numbers) + { + /// Write left blank + writeString(String(row_number_width, ' '), out); + } + const GridSymbols & grid_symbols = format_settings.pretty.charset == FormatSettings::Pretty::Charset::UTF8 ? utf8_grid_symbols : ascii_grid_symbols; @@ -144,6 +156,17 @@ void PrettyCompactBlockOutputFormat::writeRow( const WidthsPerColumn & widths, const Widths & max_widths) { + if (format_settings.pretty.output_format_pretty_row_numbers) + { + // Write row number; + auto row_num_string = std::to_string(row_num + 1) + ". "; + for (size_t i = 0; i < row_number_width - row_num_string.size(); ++i) + { + writeCString(" ", out); + } + writeString(row_num_string, out); + } + const GridSymbols & grid_symbols = format_settings.pretty.charset == FormatSettings::Pretty::Charset::UTF8 ? utf8_grid_symbols : ascii_grid_symbols; diff --git a/src/Processors/Formats/Impl/PrettySpaceBlockOutputFormat.cpp b/src/Processors/Formats/Impl/PrettySpaceBlockOutputFormat.cpp index 6b42ea57e1f..f1a5cbac8e0 100644 --- a/src/Processors/Formats/Impl/PrettySpaceBlockOutputFormat.cpp +++ b/src/Processors/Formats/Impl/PrettySpaceBlockOutputFormat.cpp @@ -29,6 +29,10 @@ void PrettySpaceBlockOutputFormat::write(const Chunk & chunk, PortKind port_kind Widths name_widths; calculateWidths(header, chunk, widths, max_widths, name_widths); + if (format_settings.pretty.output_format_pretty_row_numbers) + { + writeString(String(row_number_width, ' '), out); + } /// Names for (size_t i = 0; i < num_columns; ++i) { @@ -66,6 +70,16 @@ void PrettySpaceBlockOutputFormat::write(const Chunk & chunk, PortKind port_kind for (size_t row = 0; row < num_rows && total_rows + row < max_rows; ++row) { + if (format_settings.pretty.output_format_pretty_row_numbers) + { + // Write row number; + auto row_num_string = std::to_string(row + 1) + ". "; + for (size_t i = 0; i < row_number_width - row_num_string.size(); ++i) + { + writeCString(" ", out); + } + writeString(row_num_string, out); + } for (size_t column = 0; column < num_columns; ++column) { if (column != 0) diff --git a/src/Processors/Transforms/AggregatingInOrderTransform.cpp b/src/Processors/Transforms/AggregatingInOrderTransform.cpp index 7edeff65ec8..0db95bc3b20 100644 --- a/src/Processors/Transforms/AggregatingInOrderTransform.cpp +++ b/src/Processors/Transforms/AggregatingInOrderTransform.cpp @@ -4,6 +4,12 @@ namespace DB { +namespace ErrorCodes +{ + extern const int LOGICAL_ERROR; +} + + AggregatingInOrderTransform::AggregatingInOrderTransform( Block header, AggregatingTransformParamsPtr params_, const SortDescription & group_by_description_, size_t res_block_size_) @@ -140,6 +146,24 @@ void AggregatingInOrderTransform::consume(Chunk chunk) block_end_reached = true; need_generate = true; cur_block_size = 0; + + /// Arenas cannot be destroyed here, since later, in FinalizingSimpleTransform + /// there will be finalizeChunk(), but even after + /// finalizeChunk() we cannot destroy arena, since some memory + /// from Arena still in use, so we attach it to the Chunk to + /// remove it once it will be consumed. + if (params->final) + { + if (variants.aggregates_pools.size() != 1) + throw Exception("Too much arenas", ErrorCodes::LOGICAL_ERROR); + + Arenas arenas(1, std::make_shared()); + std::swap(variants.aggregates_pools, arenas); + variants.aggregates_pool = variants.aggregates_pools.at(0).get(); + + chunk.setChunkInfo(std::make_shared(std::move(arenas))); + } + return; } diff --git a/src/Processors/Transforms/AggregatingTransform.h b/src/Processors/Transforms/AggregatingTransform.h index 235d01ebc77..ab18992151a 100644 --- a/src/Processors/Transforms/AggregatingTransform.h +++ b/src/Processors/Transforms/AggregatingTransform.h @@ -8,6 +8,15 @@ namespace DB { +class AggregatedArenasChunkInfo : public ChunkInfo +{ +public: + Arenas arenas; + AggregatedArenasChunkInfo(Arenas arenas_) + : arenas(std::move(arenas_)) + {} +}; + class AggregatedChunkInfo : public ChunkInfo { public: diff --git a/src/Processors/tests/gtest_exception_on_incorrect_pipeline.cpp b/src/Processors/tests/gtest_exception_on_incorrect_pipeline.cpp index 10a0fae2696..3d85ffede9a 100644 --- a/src/Processors/tests/gtest_exception_on_incorrect_pipeline.cpp +++ b/src/Processors/tests/gtest_exception_on_incorrect_pipeline.cpp @@ -49,21 +49,15 @@ TEST(Processors, PortsNotConnected) processors.emplace_back(std::move(source)); processors.emplace_back(std::move(sink)); - auto exec = [&]() + try { - - try - { - PipelineExecutor executor(processors); - executor.execute(1); - } - catch (DB::Exception & e) - { - std::cout << e.displayText() << std::endl; - ASSERT_TRUE(e.displayText().find("pipeline") != std::string::npos); - throw; - } - }; - - ASSERT_THROW(exec(), DB::Exception); + PipelineExecutor executor(processors); + executor.execute(1); + ASSERT_TRUE(false) << "Should have thrown."; + } + catch (DB::Exception & e) + { + std::cout << e.displayText() << std::endl; + ASSERT_TRUE(e.displayText().find("pipeline") != std::string::npos) << "Expected 'pipeline', got: " << e.displayText(); + } } diff --git a/src/Storages/MergeTree/MergeTreeData.cpp b/src/Storages/MergeTree/MergeTreeData.cpp index 97f481eee8a..f2b26a928c1 100644 --- a/src/Storages/MergeTree/MergeTreeData.cpp +++ b/src/Storages/MergeTree/MergeTreeData.cpp @@ -1330,6 +1330,44 @@ bool isSafeForPartitionKeyConversion(const IDataType * from, const IDataType * t return false; } +/// Special check for alters of VersionedCollapsingMergeTree version column +void checkVersionColumnTypesConversion(const IDataType * old_type, const IDataType * new_type, const String column_name) +{ + /// Check new type can be used as version + if (!new_type->canBeUsedAsVersion()) + throw Exception("Cannot alter version column " + backQuoteIfNeed(column_name) + + " to type " + new_type->getName() + + " because version column must be of an integer type or of type Date or DateTime" + , ErrorCodes::ALTER_OF_COLUMN_IS_FORBIDDEN); + + auto which_new_type = WhichDataType(new_type); + auto which_old_type = WhichDataType(old_type); + + /// Check alter to different sign or float -> int and so on + if ((which_old_type.isInt() && !which_new_type.isInt()) + || (which_old_type.isUInt() && !which_new_type.isUInt()) + || (which_old_type.isDate() && !which_new_type.isDate()) + || (which_old_type.isDateTime() && !which_new_type.isDateTime()) + || (which_old_type.isFloat() && !which_new_type.isFloat())) + { + throw Exception("Cannot alter version column " + backQuoteIfNeed(column_name) + + " from type " + old_type->getName() + + " to type " + new_type->getName() + " because new type will change sort order of version column." + + " The only possible conversion is expansion of the number of bytes of the current type." + , ErrorCodes::ALTER_OF_COLUMN_IS_FORBIDDEN); + } + + /// Check alter to smaller size: UInt64 -> UInt32 and so on + if (new_type->getSizeOfValueInMemory() < old_type->getSizeOfValueInMemory()) + { + throw Exception("Cannot alter version column " + backQuoteIfNeed(column_name) + + " from type " + old_type->getName() + + " to type " + new_type->getName() + " because new type is smaller than current in the number of bytes." + + " The only possible conversion is expansion of the number of bytes of the current type." + , ErrorCodes::ALTER_OF_COLUMN_IS_FORBIDDEN); + } +} + } void MergeTreeData::checkAlterIsPossible(const AlterCommands & commands, const Settings & settings) const @@ -1416,6 +1454,18 @@ void MergeTreeData::checkAlterIsPossible(const AlterCommands & commands, const S getPartitionIDFromQuery(command.partition, global_context); } + /// Some type changes for version column is allowed despite it's a part of sorting key + if (command.type == AlterCommand::MODIFY_COLUMN && command.column_name == merging_params.version_column) + { + const IDataType * new_type = command.data_type.get(); + const IDataType * old_type = old_types[command.column_name]; + + checkVersionColumnTypesConversion(old_type, new_type, command.column_name); + + /// No other checks required + continue; + } + if (command.type == AlterCommand::MODIFY_ORDER_BY && !is_custom_partitioned) { throw Exception( diff --git a/src/Storages/getStructureOfRemoteTable.cpp b/src/Storages/getStructureOfRemoteTable.cpp index 7dfee90588a..6a5ef58a326 100644 --- a/src/Storages/getStructureOfRemoteTable.cpp +++ b/src/Storages/getStructureOfRemoteTable.cpp @@ -25,43 +25,8 @@ namespace ErrorCodes } -ColumnsDescription getStructureOfRemoteTable( - const Cluster & cluster, - const StorageID & table_id, - const Context & context, - const ASTPtr & table_func_ptr) -{ - const auto & shards_info = cluster.getShardsInfo(); - - std::string fail_messages; - - for (const auto & shard_info : shards_info) - { - try - { - const auto & res = getStructureOfRemoteTableInShard(shard_info, table_id, context, table_func_ptr); - - /// Expect at least some columns. - /// This is a hack to handle the empty block case returned by Connection when skip_unavailable_shards is set. - if (res.empty()) - continue; - - return res; - } - catch (const NetException &) - { - std::string fail_message = getCurrentExceptionMessage(false); - fail_messages += fail_message + '\n'; - continue; - } - } - - throw NetException( - "All attempts to get table structure failed. Log: \n\n" + fail_messages + "\n", - ErrorCodes::NO_REMOTE_SHARD_AVAILABLE); -} - ColumnsDescription getStructureOfRemoteTableInShard( + const Cluster & cluster, const Cluster::ShardInfo & shard_info, const StorageID & table_id, const Context & context, @@ -96,7 +61,7 @@ ColumnsDescription getStructureOfRemoteTableInShard( ColumnsDescription res; - auto new_context = ClusterProxy::removeUserRestrictionsFromSettings(context, context.getSettingsRef()); + auto new_context = ClusterProxy::updateSettingsForCluster(cluster, context, context.getSettingsRef()); /// Expect only needed columns from the result of DESC TABLE. NOTE 'comment' column is ignored for compatibility reasons. Block sample_block @@ -151,4 +116,40 @@ ColumnsDescription getStructureOfRemoteTableInShard( return res; } +ColumnsDescription getStructureOfRemoteTable( + const Cluster & cluster, + const StorageID & table_id, + const Context & context, + const ASTPtr & table_func_ptr) +{ + const auto & shards_info = cluster.getShardsInfo(); + + std::string fail_messages; + + for (const auto & shard_info : shards_info) + { + try + { + const auto & res = getStructureOfRemoteTableInShard(cluster, shard_info, table_id, context, table_func_ptr); + + /// Expect at least some columns. + /// This is a hack to handle the empty block case returned by Connection when skip_unavailable_shards is set. + if (res.empty()) + continue; + + return res; + } + catch (const NetException &) + { + std::string fail_message = getCurrentExceptionMessage(false); + fail_messages += fail_message + '\n'; + continue; + } + } + + throw NetException( + "All attempts to get table structure failed. Log: \n\n" + fail_messages + "\n", + ErrorCodes::NO_REMOTE_SHARD_AVAILABLE); +} + } diff --git a/src/Storages/getStructureOfRemoteTable.h b/src/Storages/getStructureOfRemoteTable.h index fa7c80c2800..af418144cb0 100644 --- a/src/Storages/getStructureOfRemoteTable.h +++ b/src/Storages/getStructureOfRemoteTable.h @@ -19,10 +19,4 @@ ColumnsDescription getStructureOfRemoteTable( const Context & context, const ASTPtr & table_func_ptr = nullptr); -ColumnsDescription getStructureOfRemoteTableInShard( - const Cluster::ShardInfo & shard_info, - const StorageID & table_id, - const Context & context, - const ASTPtr & table_func_ptr = nullptr); - } diff --git a/tests/integration/test_distributed_inter_server_secret/test.py b/tests/integration/test_distributed_inter_server_secret/test.py index bd9e6d111ca..b1daf2271d0 100644 --- a/tests/integration/test_distributed_inter_server_secret/test.py +++ b/tests/integration/test_distributed_inter_server_secret/test.py @@ -79,6 +79,20 @@ def get_query_user_info(node, query_pattern): type = 'QueryFinish' """.format(query_pattern)).strip().split('\t') +# @return -- settings +def get_query_setting_on_shard(node, query_pattern, setting): + node.query("SYSTEM FLUSH LOGS") + return node.query(""" + SELECT (arrayFilter(x -> ((x.1) = '{}'), arrayZip(Settings.Names, Settings.Values))[1]).2 + FROM system.query_log + WHERE + query LIKE '%{}%' AND + NOT is_initial_query AND + query NOT LIKE '%system.query_log%' AND + type = 'QueryFinish' + LIMIT 1 + """.format(setting, query_pattern)).strip() + def test_insecure(): n1.query('SELECT * FROM dist_insecure') @@ -149,4 +163,45 @@ def test_user_secure_cluster(user, password): assert get_query_user_info(n1, id_) == [user, user] assert get_query_user_info(n2, id_) == [user, user] +@users +def test_per_user_inline_settings_insecure_cluster(user, password): + id_ = 'query-ddl-settings-dist_insecure-' + user + query_with_id(n1, id_, """ + SELECT * FROM dist_insecure + SETTINGS + prefer_localhost_replica=0, + max_memory_usage_for_user=1e9, + max_untracked_memory=0 + """, user=user, password=password) + assert get_query_setting_on_shard(n1, id_, 'max_memory_usage_for_user') == '' +@users +def test_per_user_inline_settings_secure_cluster(user, password): + id_ = 'query-ddl-settings-dist_secure-' + user + query_with_id(n1, id_, """ + SELECT * FROM dist_secure + SETTINGS + prefer_localhost_replica=0, + max_memory_usage_for_user=1e9, + max_untracked_memory=0 + """, user=user, password=password) + assert int(get_query_setting_on_shard(n1, id_, 'max_memory_usage_for_user')) == int(1e9) +@users +def test_per_user_protocol_settings_insecure_cluster(user, password): + id_ = 'query-protocol-settings-dist_insecure-' + user + query_with_id(n1, id_, 'SELECT * FROM dist_insecure', user=user, password=password, settings={ + 'prefer_localhost_replica': 0, + 'max_memory_usage_for_user': int(1e9), + 'max_untracked_memory': 0, + }) + assert get_query_setting_on_shard(n1, id_, 'max_memory_usage_for_user') == '' +@users +def test_per_user_protocol_settings_secure_cluster(user, password): + id_ = 'query-protocol-settings-dist_secure-' + user + query_with_id(n1, id_, 'SELECT * FROM dist_secure', user=user, password=password, settings={ + 'prefer_localhost_replica': 0, + 'max_memory_usage_for_user': int(1e9), + 'max_untracked_memory': 0, + }) + assert int(get_query_setting_on_shard(n1, id_, 'max_memory_usage_for_user')) == int(1e9) + # TODO: check user for INSERT diff --git a/tests/integration/test_s3_with_https/test.py b/tests/integration/test_s3_with_https/test.py index 17f24bba794..58e3b3c2a3b 100644 --- a/tests/integration/test_s3_with_https/test.py +++ b/tests/integration/test_s3_with_https/test.py @@ -10,7 +10,7 @@ logging.getLogger().addHandler(logging.StreamHandler()) def check_proxy_logs(cluster, proxy_instance): logs = cluster.get_container_logs(proxy_instance) # Check that all possible interactions with Minio are present - for http_method in ["PUT", "GET", "DELETE"]: + for http_method in ["PUT", "GET", "POST"]: assert logs.find(http_method + " https://minio1") >= 0 diff --git a/tests/integration/test_s3_with_proxy/test.py b/tests/integration/test_s3_with_proxy/test.py index 70a50ae0e15..586895fffe5 100644 --- a/tests/integration/test_s3_with_proxy/test.py +++ b/tests/integration/test_s3_with_proxy/test.py @@ -37,7 +37,7 @@ def cluster(): cluster.shutdown() -def check_proxy_logs(cluster, proxy_instance, http_methods={"POST", "PUT", "GET", "DELETE"}): +def check_proxy_logs(cluster, proxy_instance, http_methods={"POST", "PUT", "GET"}): for i in range(10): logs = cluster.get_container_logs(proxy_instance) # Check with retry that all possible interactions with Minio are present @@ -73,4 +73,4 @@ def test_s3_with_proxy_list(cluster, policy): node.query("DROP TABLE IF EXISTS s3_test NO DELAY") for proxy in ["proxy1", "proxy2"]: - check_proxy_logs(cluster, proxy, ["PUT", "GET", "DELETE"]) + check_proxy_logs(cluster, proxy, ["PUT", "GET"]) diff --git a/tests/queries/0_stateless/00921_datetime64_compatibility.reference b/tests/queries/0_stateless/00921_datetime64_compatibility.reference index 1a909c8c754..a42517104b9 100644 --- a/tests/queries/0_stateless/00921_datetime64_compatibility.reference +++ b/tests/queries/0_stateless/00921_datetime64_compatibility.reference @@ -88,38 +88,37 @@ SELECT toStartOfWeek(N) "Date","2019-09-15" ------------------------------------------ SELECT toStartOfDay(N) - -Code: 43: Function toStartOfDay supports a 2nd argument (optional) that must be non-empty and be a valid time zone. +"DateTime","2019-09-16 00:00:00" "DateTime('Europe/Minsk')","2019-09-16 00:00:00" "DateTime('Europe/Minsk')","2019-09-16 00:00:00" ------------------------------------------ SELECT toStartOfHour(N) -Code: 43: Function toStartOfHour supports a 2nd argument (optional) that must be non-empty and be a valid time zone. +Code: 43: Illegal type Date of argument for function toStartOfHour. "DateTime('Europe/Minsk')","2019-09-16 19:00:00" "DateTime('Europe/Minsk')","2019-09-16 19:00:00" ------------------------------------------ SELECT toStartOfMinute(N) -Code: 43: Function toStartOfMinute supports a 2nd argument (optional) that must be non-empty and be a valid time zone. +Code: 43: Illegal type Date of argument for function toStartOfMinute. "DateTime('Europe/Minsk')","2019-09-16 19:20:00" "DateTime('Europe/Minsk')","2019-09-16 19:20:00" ------------------------------------------ SELECT toStartOfFiveMinute(N) -Code: 43: Function toStartOfFiveMinute supports a 2nd argument (optional) that must be non-empty and be a valid time zone. +Code: 43: Illegal type Date of argument for function toStartOfFiveMinute. "DateTime('Europe/Minsk')","2019-09-16 19:20:00" "DateTime('Europe/Minsk')","2019-09-16 19:20:00" ------------------------------------------ SELECT toStartOfTenMinutes(N) -Code: 43: Function toStartOfTenMinutes supports a 2nd argument (optional) that must be non-empty and be a valid time zone. +Code: 43: Illegal type Date of argument for function toStartOfTenMinutes. "DateTime('Europe/Minsk')","2019-09-16 19:20:00" "DateTime('Europe/Minsk')","2019-09-16 19:20:00" ------------------------------------------ SELECT toStartOfFifteenMinutes(N) -Code: 43: Function toStartOfFifteenMinutes supports a 2nd argument (optional) that must be non-empty and be a valid time zone. +Code: 43: Illegal type Date of argument for function toStartOfFifteenMinutes. "DateTime('Europe/Minsk')","2019-09-16 19:15:00" "DateTime('Europe/Minsk')","2019-09-16 19:15:00" ------------------------------------------ @@ -167,7 +166,7 @@ Code: 43: Illegal type Date of argument for function date_trunc. ------------------------------------------ SELECT toTime(N) -Code: 43: Function toTime supports a 2nd argument (optional) that must be non-empty and be a valid time zone. +Code: 43: Illegal type Date of argument for function toTime. "DateTime('Europe/Minsk')","1970-01-02 19:20:11" "DateTime('Europe/Minsk')","1970-01-02 19:20:11" ------------------------------------------ @@ -233,7 +232,7 @@ SELECT toYearWeek(N) ------------------------------------------ SELECT timeSlot(N) -Code: 43: Function timeSlot supports a 2nd argument (optional) that must be non-empty and be a valid time zone. +Code: 43: Illegal type Date of argument for function timeSlot. "DateTime('Europe/Minsk')","2019-09-16 19:00:00" "DateTime('Europe/Minsk')","2019-09-16 19:00:00" ------------------------------------------ diff --git a/tests/queries/0_stateless/01103_check_cpu_instructions_at_startup.reference b/tests/queries/0_stateless/01103_check_cpu_instructions_at_startup.reference index 03ed07cf1a4..bcc7aebeae8 100644 --- a/tests/queries/0_stateless/01103_check_cpu_instructions_at_startup.reference +++ b/tests/queries/0_stateless/01103_check_cpu_instructions_at_startup.reference @@ -2,4 +2,4 @@ Instruction check fail. The CPU does not support SSSE3 instruction set. Instruction check fail. The CPU does not support SSE4.1 instruction set. Instruction check fail. The CPU does not support SSE4.2 instruction set. Instruction check fail. The CPU does not support POPCNT instruction set. -1 +MADV_DONTNEED does not zeroed page. jemalloc will be broken diff --git a/tests/queries/0_stateless/01472_toStartOfInterval_disallow_empty_tz_field.sql b/tests/queries/0_stateless/01472_toStartOfInterval_disallow_empty_tz_field.sql index f7d6fd2f357..c2bdac2b279 100644 --- a/tests/queries/0_stateless/01472_toStartOfInterval_disallow_empty_tz_field.sql +++ b/tests/queries/0_stateless/01472_toStartOfInterval_disallow_empty_tz_field.sql @@ -21,3 +21,7 @@ SELECT toStartOfHour(toDateTime('2017-12-31 01:59:00', 'UTC'), 'UTC'); -- succes SELECT toStartOfMinute(toDateTime('2017-12-31 00:00:00', 'UTC'), ''); -- {serverError 43} SELECT toStartOfMinute(toDateTime('2017-12-31 00:01:30', 'UTC'), 'UTC'); -- success + +-- special case - allow empty time_zone when using functions like today(), yesterday() etc. +SELECT toStartOfDay(today()) FORMAT Null; -- success +SELECT toStartOfDay(yesterday()) FORMAT Null; -- success diff --git a/tests/queries/0_stateless/01509_output_format_pretty_row_numbers.reference b/tests/queries/0_stateless/01509_output_format_pretty_row_numbers.reference new file mode 100644 index 00000000000..9010c371fdf --- /dev/null +++ b/tests/queries/0_stateless/01509_output_format_pretty_row_numbers.reference @@ -0,0 +1,212 @@ +┏━━━━━━━━┓ +┃ number ┃ +┡━━━━━━━━┩ +│ 0 │ +├────────┤ +│ 1 │ +├────────┤ +│ 2 │ +├────────┤ +│ 3 │ +├────────┤ +│ 4 │ +├────────┤ +│ 5 │ +├────────┤ +│ 6 │ +├────────┤ +│ 7 │ +├────────┤ +│ 8 │ +├────────┤ +│ 9 │ +└────────┘ +┌─number─┐ +│ 0 │ +│ 1 │ +│ 2 │ +│ 3 │ +│ 4 │ +│ 5 │ +│ 6 │ +│ 7 │ +│ 8 │ +│ 9 │ +└────────┘ +┌─number─┐ +│ 0 │ +│ 1 │ +│ 2 │ +│ 3 │ +│ 4 │ +│ 5 │ +│ 6 │ +│ 7 │ +│ 8 │ +│ 9 │ +└────────┘ +┏━━━━━━━━┓ +┃ number ┃ +┡━━━━━━━━┩ +│ 0 │ +├────────┤ +│ 1 │ +├────────┤ +│ 2 │ +├────────┤ +│ 3 │ +├────────┤ +│ 4 │ +├────────┤ +│ 5 │ +├────────┤ +│ 6 │ +├────────┤ +│ 7 │ +├────────┤ +│ 8 │ +├────────┤ +│ 9 │ +└────────┘ +┌─number─┐ +│ 0 │ +│ 1 │ +│ 2 │ +│ 3 │ +│ 4 │ +│ 5 │ +│ 6 │ +│ 7 │ +│ 8 │ +│ 9 │ +└────────┘ + number + + 0 + 1 + 2 + 3 + 4 + 5 + 6 + 7 + 8 + 9 + number + + 0 + 1 + 2 + 3 + 4 + 5 + 6 + 7 + 8 + 9 + ┏━━━━━━━━┓ + ┃ number ┃ + ┡━━━━━━━━┩ + 1. │ 0 │ + ├────────┤ + 2. │ 1 │ + ├────────┤ + 3. │ 2 │ + ├────────┤ + 4. │ 3 │ + ├────────┤ + 5. │ 4 │ + ├────────┤ + 6. │ 5 │ + ├────────┤ + 7. │ 6 │ + ├────────┤ + 8. │ 7 │ + ├────────┤ + 9. │ 8 │ + ├────────┤ +10. │ 9 │ + └────────┘ + ┌─number─┐ + 1. │ 0 │ + 2. │ 1 │ + 3. │ 2 │ + 4. │ 3 │ + 5. │ 4 │ + 6. │ 5 │ + 7. │ 6 │ + 8. │ 7 │ + 9. │ 8 │ +10. │ 9 │ + └────────┘ + ┌─number─┐ + 1. │ 0 │ + 2. │ 1 │ + 3. │ 2 │ + 4. │ 3 │ + 5. │ 4 │ + 6. │ 5 │ + 7. │ 6 │ + 8. │ 7 │ + 9. │ 8 │ +10. │ 9 │ + └────────┘ + ┏━━━━━━━━┓ + ┃ number ┃ + ┡━━━━━━━━┩ + 1. │ 0 │ + ├────────┤ + 2. │ 1 │ + ├────────┤ + 3. │ 2 │ + ├────────┤ + 4. │ 3 │ + ├────────┤ + 5. │ 4 │ + ├────────┤ + 6. │ 5 │ + ├────────┤ + 7. │ 6 │ + ├────────┤ + 8. │ 7 │ + ├────────┤ + 9. │ 8 │ + ├────────┤ +10. │ 9 │ + └────────┘ + ┌─number─┐ + 1. │ 0 │ + 2. │ 1 │ + 3. │ 2 │ + 4. │ 3 │ + 5. │ 4 │ + 6. │ 5 │ + 7. │ 6 │ + 8. │ 7 │ + 9. │ 8 │ +10. │ 9 │ + └────────┘ + number + + 1. 0 + 2. 1 + 3. 2 + 4. 3 + 5. 4 + 6. 5 + 7. 6 + 8. 7 + 9. 8 +10. 9 + number + + 1. 0 + 2. 1 + 3. 2 + 4. 3 + 5. 4 + 6. 5 + 7. 6 + 8. 7 + 9. 8 +10. 9 diff --git a/tests/queries/0_stateless/01509_output_format_pretty_row_numbers.sql b/tests/queries/0_stateless/01509_output_format_pretty_row_numbers.sql new file mode 100644 index 00000000000..3c0eeaf0e53 --- /dev/null +++ b/tests/queries/0_stateless/01509_output_format_pretty_row_numbers.sql @@ -0,0 +1,15 @@ +SELECT * FROM numbers(10) FORMAT Pretty; +SELECT * FROM numbers(10) FORMAT PrettyCompact; +SELECT * FROM numbers(10) FORMAT PrettyCompactMonoBlock; +SELECT * FROM numbers(10) FORMAT PrettyNoEscapes; +SELECT * FROM numbers(10) FORMAT PrettyCompactNoEscapes; +SELECT * FROM numbers(10) FORMAT PrettySpaceNoEscapes; +SELECT * FROM numbers(10) FORMAT PrettySpace; +SET output_format_pretty_row_numbers=1; +SELECT * FROM numbers(10) FORMAT Pretty; +SELECT * FROM numbers(10) FORMAT PrettyCompact; +SELECT * FROM numbers(10) FORMAT PrettyCompactMonoBlock; +SELECT * FROM numbers(10) FORMAT PrettyNoEscapes; +SELECT * FROM numbers(10) FORMAT PrettyCompactNoEscapes; +SELECT * FROM numbers(10) FORMAT PrettySpaceNoEscapes; +SELECT * FROM numbers(10) FORMAT PrettySpace; diff --git a/tests/queries/0_stateless/01511_alter_version_versioned_collapsing_merge_tree.reference b/tests/queries/0_stateless/01511_alter_version_versioned_collapsing_merge_tree.reference new file mode 100644 index 00000000000..3ff3fea93e5 --- /dev/null +++ b/tests/queries/0_stateless/01511_alter_version_versioned_collapsing_merge_tree.reference @@ -0,0 +1,13 @@ +1 1 1 -1 +2 2 2 -1 +CREATE TABLE default.table_with_version\n(\n `key` UInt64,\n `value` String,\n `version` UInt8,\n `sign` Int8\n)\nENGINE = VersionedCollapsingMergeTree(sign, version)\nORDER BY key\nSETTINGS index_granularity = 8192 +1 1 1 -1 +2 2 2 -1 +CREATE TABLE default.table_with_version\n(\n `key` UInt64,\n `value` String,\n `version` UInt32,\n `sign` Int8\n)\nENGINE = VersionedCollapsingMergeTree(sign, version)\nORDER BY key\nSETTINGS index_granularity = 8192 +1 1 2 1 +2 2 2 -1 +1 1 2 1 +2 2 2 -1 +3 3 65555 1 +1 1 2 1 +2 2 2 -1 diff --git a/tests/queries/0_stateless/01511_alter_version_versioned_collapsing_merge_tree.sql b/tests/queries/0_stateless/01511_alter_version_versioned_collapsing_merge_tree.sql new file mode 100644 index 00000000000..8f0b2d12ab0 --- /dev/null +++ b/tests/queries/0_stateless/01511_alter_version_versioned_collapsing_merge_tree.sql @@ -0,0 +1,46 @@ +DROP TABLE IF EXISTS table_with_version; + +CREATE TABLE table_with_version +( + key UInt64, + value String, + version UInt8, + sign Int8 +) +ENGINE VersionedCollapsingMergeTree(sign, version) +ORDER BY key; + +INSERT INTO table_with_version VALUES (1, '1', 1, -1); +INSERT INTO table_with_version VALUES (2, '2', 2, -1); + +SELECT * FROM table_with_version ORDER BY key; + +SHOW CREATE TABLE table_with_version; + +ALTER TABLE table_with_version MODIFY COLUMN version UInt32; + +SELECT * FROM table_with_version ORDER BY key; + +SHOW CREATE TABLE table_with_version; + +INSERT INTO TABLE table_with_version VALUES(1, '1', 1, 1); +INSERT INTO TABLE table_with_version VALUES(1, '1', 2, 1); + +SELECT * FROM table_with_version FINAL ORDER BY key; + +INSERT INTO TABLE table_with_version VALUES(3, '3', 65555, 1); + +SELECT * FROM table_with_version FINAL ORDER BY key; + +INSERT INTO TABLE table_with_version VALUES(3, '3', 65555, -1); + +SELECT * FROM table_with_version FINAL ORDER BY key; + +ALTER TABLE table_with_version MODIFY COLUMN version String; --{serverError 524} +ALTER TABLE table_with_version MODIFY COLUMN version Int64; --{serverError 524} +ALTER TABLE table_with_version MODIFY COLUMN version UInt16; --{serverError 524} +ALTER TABLE table_with_version MODIFY COLUMN version Float64; --{serverError 524} +ALTER TABLE table_with_version MODIFY COLUMN version Date; --{serverError 524} +ALTER TABLE table_with_version MODIFY COLUMN version DateTime; --{serverError 524} + +DROP TABLE IF EXISTS table_with_version; diff --git a/tests/queries/0_stateless/01511_alter_version_versioned_collapsing_merge_tree_zookeeper.reference b/tests/queries/0_stateless/01511_alter_version_versioned_collapsing_merge_tree_zookeeper.reference new file mode 100644 index 00000000000..c6cd81a4aca --- /dev/null +++ b/tests/queries/0_stateless/01511_alter_version_versioned_collapsing_merge_tree_zookeeper.reference @@ -0,0 +1,16 @@ +1 1 1 -1 +2 2 2 -1 +CREATE TABLE default.table_with_version_replicated_1\n(\n `key` UInt64,\n `value` String,\n `version` UInt8,\n `sign` Int8\n)\nENGINE = ReplicatedVersionedCollapsingMergeTree(\'/clickhouse/test_01511/t\', \'1\', sign, version)\nORDER BY key\nSETTINGS index_granularity = 8192 +1 1 1 -1 +2 2 2 -1 +CREATE TABLE default.table_with_version_replicated_1\n(\n `key` UInt64,\n `value` String,\n `version` UInt32,\n `sign` Int8\n)\nENGINE = ReplicatedVersionedCollapsingMergeTree(\'/clickhouse/test_01511/t\', \'1\', sign, version)\nORDER BY key\nSETTINGS index_granularity = 8192 +1 1 2 1 +2 2 2 -1 +1 1 2 1 +2 2 2 -1 +3 3 65555 1 +1 1 2 1 +2 2 2 -1 +CREATE TABLE default.table_with_version_replicated_2\n(\n `key` UInt64,\n `value` String,\n `version` UInt32,\n `sign` Int8\n)\nENGINE = ReplicatedVersionedCollapsingMergeTree(\'/clickhouse/test_01511/t\', \'2\', sign, version)\nORDER BY key\nSETTINGS index_granularity = 8192 +1 1 2 1 +2 2 2 -1 diff --git a/tests/queries/0_stateless/01511_alter_version_versioned_collapsing_merge_tree_zookeeper.sql b/tests/queries/0_stateless/01511_alter_version_versioned_collapsing_merge_tree_zookeeper.sql new file mode 100644 index 00000000000..1307f055e5c --- /dev/null +++ b/tests/queries/0_stateless/01511_alter_version_versioned_collapsing_merge_tree_zookeeper.sql @@ -0,0 +1,64 @@ +DROP TABLE IF EXISTS table_with_version_replicated_1; +DROP TABLE IF EXISTS table_with_version_replicated_2; + +CREATE TABLE table_with_version_replicated_1 +( + key UInt64, + value String, + version UInt8, + sign Int8 +) +ENGINE ReplicatedVersionedCollapsingMergeTree('/clickhouse/test_01511/t', '1', sign, version) +ORDER BY key; + +CREATE TABLE table_with_version_replicated_2 +( + key UInt64, + value String, + version UInt8, + sign Int8 +) +ENGINE ReplicatedVersionedCollapsingMergeTree('/clickhouse/test_01511/t', '2', sign, version) +ORDER BY key; + +INSERT INTO table_with_version_replicated_1 VALUES (1, '1', 1, -1); +INSERT INTO table_with_version_replicated_1 VALUES (2, '2', 2, -1); + +SELECT * FROM table_with_version_replicated_1 ORDER BY key; + +SHOW CREATE TABLE table_with_version_replicated_1; + +ALTER TABLE table_with_version_replicated_1 MODIFY COLUMN version UInt32 SETTINGS replication_alter_partitions_sync=2; + +SELECT * FROM table_with_version_replicated_1 ORDER BY key; + +SHOW CREATE TABLE table_with_version_replicated_1; + +INSERT INTO TABLE table_with_version_replicated_1 VALUES(1, '1', 1, 1); +INSERT INTO TABLE table_with_version_replicated_1 VALUES(1, '1', 2, 1); + +SELECT * FROM table_with_version_replicated_1 FINAL ORDER BY key; + +INSERT INTO TABLE table_with_version_replicated_1 VALUES(3, '3', 65555, 1); + +SELECT * FROM table_with_version_replicated_1 FINAL ORDER BY key; + +INSERT INTO TABLE table_with_version_replicated_1 VALUES(3, '3', 65555, -1); + +SYSTEM SYNC REPLICA table_with_version_replicated_2; + +DETACH TABLE table_with_version_replicated_1; +DETACH TABLE table_with_version_replicated_2; +ATTACH TABLE table_with_version_replicated_2; +ATTACH TABLE table_with_version_replicated_1; + +SELECT * FROM table_with_version_replicated_1 FINAL ORDER BY key; + +SYSTEM SYNC REPLICA table_with_version_replicated_2; + +SHOW CREATE TABLE table_with_version_replicated_2; + +SELECT * FROM table_with_version_replicated_2 FINAL ORDER BY key; + +DROP TABLE IF EXISTS table_with_version_replicated_1; +DROP TABLE IF EXISTS table_with_version_replicated_2; diff --git a/tests/queries/0_stateless/01513_optimize_aggregation_in_order_memory.reference b/tests/queries/0_stateless/01513_optimize_aggregation_in_order_memory.reference new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/queries/0_stateless/01513_optimize_aggregation_in_order_memory.sql b/tests/queries/0_stateless/01513_optimize_aggregation_in_order_memory.sql new file mode 100644 index 00000000000..38920262fba --- /dev/null +++ b/tests/queries/0_stateless/01513_optimize_aggregation_in_order_memory.sql @@ -0,0 +1,16 @@ +drop table if exists data_01513; +create table data_01513 (key String) engine=MergeTree() order by key; +-- 10e3 groups, 1e3 keys each +insert into data_01513 select number%10e3 from numbers(toUInt64(2e6)); +-- reduce number of parts to 1 +optimize table data_01513 final; + +-- this is enough to trigger non-reusable Chunk in Arena. +set max_memory_usage='500M'; +set max_threads=1; +set max_block_size=500; + +select key, groupArray(repeat('a', 200)), count() from data_01513 group by key format Null; -- { serverError 241; } +select key, groupArray(repeat('a', 200)), count() from data_01513 group by key format Null settings optimize_aggregation_in_order=1; +-- for WITH TOTALS previous groups should be kept. +select key, groupArray(repeat('a', 200)), count() from data_01513 group by key with totals format Null settings optimize_aggregation_in_order=1; -- { serverError 241; } diff --git a/website/templates/index/performance.html b/website/templates/index/performance.html index 61cd31a06ca..75e2a43f902 100644 --- a/website/templates/index/performance.html +++ b/website/templates/index/performance.html @@ -18,27 +18,26 @@