diff --git a/CHANGELOG.md b/CHANGELOG.md index 950bdc7e374..345ee2c6213 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -22,14 +22,14 @@ * Add setting `allow_non_metadata_alters` which restricts to execute `ALTER` queries which modify data on disk. Disabled be default. Closes [#11547](https://github.com/ClickHouse/ClickHouse/issues/11547). [#12635](https://github.com/ClickHouse/ClickHouse/pull/12635) ([alesapin](https://github.com/alesapin)). * A function `formatRow` is added to support turning arbitrary expressions into a string via given format. It's useful for manipulating SQL outputs and is quite versatile combined with the `columns` function. [#12574](https://github.com/ClickHouse/ClickHouse/pull/12574) ([Amos Bird](https://github.com/amosbird)). * Add `FROM_UNIXTIME` function for compatibility with MySQL, related to [12149](https://github.com/ClickHouse/ClickHouse/issues/12149). [#12484](https://github.com/ClickHouse/ClickHouse/pull/12484) ([flynn](https://github.com/ucasFL)). -* Allow Nullable types as keys in MergeTree tables if `allow_nullable_key` table setting is enabled. https://github.com/ClickHouse/ClickHouse/issues/5319. [#12433](https://github.com/ClickHouse/ClickHouse/pull/12433) ([Amos Bird](https://github.com/amosbird)). +* Allow Nullable types as keys in MergeTree tables if `allow_nullable_key` table setting is enabled. Closes [#5319](https://github.com/ClickHouse/ClickHouse/issues/5319). [#12433](https://github.com/ClickHouse/ClickHouse/pull/12433) ([Amos Bird](https://github.com/amosbird)). * Integration with [COS](https://intl.cloud.tencent.com/product/cos). [#12386](https://github.com/ClickHouse/ClickHouse/pull/12386) ([fastio](https://github.com/fastio)). * Add mapAdd and mapSubtract functions for adding/subtracting key-mapped values. [#11735](https://github.com/ClickHouse/ClickHouse/pull/11735) ([Ildus Kurbangaliev](https://github.com/ildus)). #### Bug Fix * Fix premature `ON CLUSTER` timeouts for queries that must be executed on a single replica. Fixes [#6704](https://github.com/ClickHouse/ClickHouse/issues/6704), [#7228](https://github.com/ClickHouse/ClickHouse/issues/7228), [#13361](https://github.com/ClickHouse/ClickHouse/issues/13361), [#11884](https://github.com/ClickHouse/ClickHouse/issues/11884). [#13450](https://github.com/ClickHouse/ClickHouse/pull/13450) ([alesapin](https://github.com/alesapin)). -* Fix crash in mark inclusion search introduced in https://github.com/ClickHouse/ClickHouse/pull/12277. [#14225](https://github.com/ClickHouse/ClickHouse/pull/14225) ([Amos Bird](https://github.com/amosbird)). +* Fix crash in mark inclusion search introduced in [#12277](https://github.com/ClickHouse/ClickHouse/pull/12277). [#14225](https://github.com/ClickHouse/ClickHouse/pull/14225) ([Amos Bird](https://github.com/amosbird)). * Fix race condition in external dictionaries with cache layout which can lead server crash. [#12566](https://github.com/ClickHouse/ClickHouse/pull/12566) ([alesapin](https://github.com/alesapin)). * Fix visible data clobbering by progress bar in client in interactive mode. This fixes [#12562](https://github.com/ClickHouse/ClickHouse/issues/12562) and [#13369](https://github.com/ClickHouse/ClickHouse/issues/13369) and [#13584](https://github.com/ClickHouse/ClickHouse/issues/13584) and fixes [#12964](https://github.com/ClickHouse/ClickHouse/issues/12964). [#13691](https://github.com/ClickHouse/ClickHouse/pull/13691) ([alexey-milovidov](https://github.com/alexey-milovidov)). * Fixed incorrect sorting order for `LowCardinality` columns when ORDER BY multiple columns is used. This fixes [#13958](https://github.com/ClickHouse/ClickHouse/issues/13958). [#14223](https://github.com/ClickHouse/ClickHouse/pull/14223) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). @@ -71,7 +71,7 @@ * Fix function if with nullable constexpr as cond that is not literal NULL. Fixes [#12463](https://github.com/ClickHouse/ClickHouse/issues/12463). [#13226](https://github.com/ClickHouse/ClickHouse/pull/13226) ([alexey-milovidov](https://github.com/alexey-milovidov)). * Fix assert in `arrayElement` function in case of array elements are Nullable and array subscript is also Nullable. This fixes [#12172](https://github.com/ClickHouse/ClickHouse/issues/12172). [#13224](https://github.com/ClickHouse/ClickHouse/pull/13224) ([alexey-milovidov](https://github.com/alexey-milovidov)). * Fix DateTime64 conversion functions with constant argument. [#13205](https://github.com/ClickHouse/ClickHouse/pull/13205) ([Azat Khuzhin](https://github.com/azat)). -* Fix parsing row policies from users.xml when names of databases or tables contain dots. This fixes https://github.com/ClickHouse/ClickHouse/issues/5779, https://github.com/ClickHouse/ClickHouse/issues/12527. [#13199](https://github.com/ClickHouse/ClickHouse/pull/13199) ([Vitaly Baranov](https://github.com/vitlibar)). +* Fix parsing row policies from users.xml when names of databases or tables contain dots. This fixes [#5779](https://github.com/ClickHouse/ClickHouse/issues/5779), [#12527](https://github.com/ClickHouse/ClickHouse/issues/12527). [#13199](https://github.com/ClickHouse/ClickHouse/pull/13199) ([Vitaly Baranov](https://github.com/vitlibar)). * Fix access to `redis` dictionary after connection was dropped once. It may happen with `cache` and `direct` dictionary layouts. [#13082](https://github.com/ClickHouse/ClickHouse/pull/13082) ([Anton Popov](https://github.com/CurtizJ)). * Fix wrong index analysis with functions. It could lead to some data parts being skipped when reading from `MergeTree` tables. Fixes [#13060](https://github.com/ClickHouse/ClickHouse/issues/13060). Fixes [#12406](https://github.com/ClickHouse/ClickHouse/issues/12406). [#13081](https://github.com/ClickHouse/ClickHouse/pull/13081) ([Anton Popov](https://github.com/CurtizJ)). * Fix error `Cannot convert column because it is constant but values of constants are different in source and result` for remote queries which use deterministic functions in scope of query, but not deterministic between queries, like `now()`, `now64()`, `randConstant()`. Fixes [#11327](https://github.com/ClickHouse/ClickHouse/issues/11327). [#13075](https://github.com/ClickHouse/ClickHouse/pull/13075) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). @@ -89,7 +89,7 @@ * Fixed [#10572](https://github.com/ClickHouse/ClickHouse/issues/10572) fix bloom filter index with const expression. [#12659](https://github.com/ClickHouse/ClickHouse/pull/12659) ([Winter Zhang](https://github.com/zhang2014)). * Fix SIGSEGV in StorageKafka when broker is unavailable (and not only). [#12658](https://github.com/ClickHouse/ClickHouse/pull/12658) ([Azat Khuzhin](https://github.com/azat)). * Add support for function `if` with `Array(UUID)` arguments. This fixes [#11066](https://github.com/ClickHouse/ClickHouse/issues/11066). [#12648](https://github.com/ClickHouse/ClickHouse/pull/12648) ([alexey-milovidov](https://github.com/alexey-milovidov)). -* CREATE USER IF NOT EXISTS now doesn't throw exception if the user exists. This fixes https://github.com/ClickHouse/ClickHouse/issues/12507. [#12646](https://github.com/ClickHouse/ClickHouse/pull/12646) ([Vitaly Baranov](https://github.com/vitlibar)). +* CREATE USER IF NOT EXISTS now doesn't throw exception if the user exists. This fixes [#12507](https://github.com/ClickHouse/ClickHouse/issues/12507). [#12646](https://github.com/ClickHouse/ClickHouse/pull/12646) ([Vitaly Baranov](https://github.com/vitlibar)). * Exception `There is no supertype...` can be thrown during `ALTER ... UPDATE` in unexpected cases (e.g. when subtracting from UInt64 column). This fixes [#7306](https://github.com/ClickHouse/ClickHouse/issues/7306). This fixes [#4165](https://github.com/ClickHouse/ClickHouse/issues/4165). [#12633](https://github.com/ClickHouse/ClickHouse/pull/12633) ([alexey-milovidov](https://github.com/alexey-milovidov)). * Fix possible `Pipeline stuck` error for queries with external sorting. Fixes [#12617](https://github.com/ClickHouse/ClickHouse/issues/12617). [#12618](https://github.com/ClickHouse/ClickHouse/pull/12618) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). * Fix error `Output of TreeExecutor is not sorted` for `OPTIMIZE DEDUPLICATE`. Fixes [#11572](https://github.com/ClickHouse/ClickHouse/issues/11572). [#12613](https://github.com/ClickHouse/ClickHouse/pull/12613) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). @@ -123,7 +123,7 @@ * Fix assert in `parseDateTimeBestEffort`. This fixes [#12649](https://github.com/ClickHouse/ClickHouse/issues/12649). [#13227](https://github.com/ClickHouse/ClickHouse/pull/13227) ([alexey-milovidov](https://github.com/alexey-milovidov)). * Minor optimization in Processors/PipelineExecutor: breaking out of a loop because it makes sense to do so. [#13058](https://github.com/ClickHouse/ClickHouse/pull/13058) ([Mark Papadakis](https://github.com/markpapadakis)). * Support TRUNCATE table without TABLE keyword. [#12653](https://github.com/ClickHouse/ClickHouse/pull/12653) ([Winter Zhang](https://github.com/zhang2014)). -* Fix explain query format overwrite by default, issue https://github.com/ClickHouse/ClickHouse/issues/12432. [#12541](https://github.com/ClickHouse/ClickHouse/pull/12541) ([BohuTANG](https://github.com/BohuTANG)). +* Fix explain query format overwrite by default. This fixes [#12541](https://github.com/ClickHouse/ClickHouse/issues/12432). [#12541](https://github.com/ClickHouse/ClickHouse/pull/12541) ([BohuTANG](https://github.com/BohuTANG)). * Allow to set JOIN kind and type in more standad way: `LEFT SEMI JOIN` instead of `SEMI LEFT JOIN`. For now both are correct. [#12520](https://github.com/ClickHouse/ClickHouse/pull/12520) ([Artem Zuikov](https://github.com/4ertus2)). * Changes default value for `multiple_joins_rewriter_version` to 2. It enables new multiple joins rewriter that knows about column names. [#12469](https://github.com/ClickHouse/ClickHouse/pull/12469) ([Artem Zuikov](https://github.com/4ertus2)). * Add several metrics for requests to S3 storages. [#12464](https://github.com/ClickHouse/ClickHouse/pull/12464) ([ianton-ru](https://github.com/ianton-ru)). diff --git a/README.md b/README.md index 300ef4555a2..7f6a102a2dd 100644 --- a/README.md +++ b/README.md @@ -18,3 +18,4 @@ ClickHouse is an open-source column-oriented database management system that all ## Upcoming Events * [ClickHouse Data Integration Virtual Meetup](https://www.eventbrite.com/e/clickhouse-september-virtual-meetup-data-integration-tickets-117421895049) on September 10, 2020. +* [ClickHouse talk at Ya.Subbotnik (in Russian)](https://ya.cc/t/cIBI-3yECj5JF) on September 12, 2020. diff --git a/base/mysqlxx/ResultBase.h b/base/mysqlxx/ResultBase.h index 126a5c1ecca..b72b5682122 100644 --- a/base/mysqlxx/ResultBase.h +++ b/base/mysqlxx/ResultBase.h @@ -1,9 +1,7 @@ #pragma once -#include #include - namespace mysqlxx { @@ -22,6 +20,11 @@ class ResultBase public: ResultBase(MYSQL_RES * res_, Connection * conn_, const Query * query_); + ResultBase(const ResultBase &) = delete; + ResultBase & operator=(const ResultBase &) = delete; + ResultBase(ResultBase &&) = default; + ResultBase & operator=(ResultBase &&) = default; + Connection * getConnection() { return conn; } MYSQL_FIELDS getFields() { return fields; } unsigned getNumFields() { return num_fields; } diff --git a/base/mysqlxx/Value.h b/base/mysqlxx/Value.h index 9fdb33a442d..dfa86e8aa7d 100644 --- a/base/mysqlxx/Value.h +++ b/base/mysqlxx/Value.h @@ -254,7 +254,23 @@ template <> inline std::string Value::get() cons template <> inline LocalDate Value::get() const { return getDate(); } template <> inline LocalDateTime Value::get() const { return getDateTime(); } -template inline T Value::get() const { return T(*this); } + +namespace details +{ +// To avoid stack overflow when converting to type with no appropriate c-tor, +// resulting in endless recursive calls from `Value::get()` to `Value::operator T()` to `Value::get()` to ... +template >> +inline T contructFromValue(const Value & val) +{ + return T(val); +} +} + +template +inline T Value::get() const +{ + return details::contructFromValue(*this); +} inline std::ostream & operator<< (std::ostream & ostr, const Value & x) diff --git a/cmake/autogenerated_versions.txt b/cmake/autogenerated_versions.txt index 27586821af2..6ca3999ff7f 100644 --- a/cmake/autogenerated_versions.txt +++ b/cmake/autogenerated_versions.txt @@ -1,9 +1,9 @@ # This strings autochanged from release_lib.sh: -SET(VERSION_REVISION 54439) +SET(VERSION_REVISION 54440) SET(VERSION_MAJOR 20) -SET(VERSION_MINOR 9) +SET(VERSION_MINOR 10) SET(VERSION_PATCH 1) -SET(VERSION_GITHASH 0586f0d555f7481b394afc55bbb29738cd573a1c) -SET(VERSION_DESCRIBE v20.9.1.1-prestable) -SET(VERSION_STRING 20.9.1.1) +SET(VERSION_GITHASH 11a247d2f42010c1a17bf678c3e00a4bc89b23f8) +SET(VERSION_DESCRIBE v20.10.1.1-prestable) +SET(VERSION_STRING 20.10.1.1) # end of autochange diff --git a/debian/changelog b/debian/changelog index c7c20ccd6d0..244b2b1fde4 100644 --- a/debian/changelog +++ b/debian/changelog @@ -1,5 +1,5 @@ -clickhouse (20.9.1.1) unstable; urgency=low +clickhouse (20.10.1.1) unstable; urgency=low * Modified source code - -- clickhouse-release Mon, 31 Aug 2020 23:07:38 +0300 + -- clickhouse-release Tue, 08 Sep 2020 17:04:39 +0300 diff --git a/docker/client/Dockerfile b/docker/client/Dockerfile index 36ca0ee107a..5ce506aafa3 100644 --- a/docker/client/Dockerfile +++ b/docker/client/Dockerfile @@ -1,7 +1,7 @@ FROM ubuntu:18.04 ARG repository="deb https://repo.clickhouse.tech/deb/stable/ main/" -ARG version=20.9.1.* +ARG version=20.10.1.* RUN apt-get update \ && apt-get install --yes --no-install-recommends \ diff --git a/docker/packager/binary/build.sh b/docker/packager/binary/build.sh index 72adba5d762..7c3de9aaebd 100755 --- a/docker/packager/binary/build.sh +++ b/docker/packager/binary/build.sh @@ -18,7 +18,7 @@ ccache --zero-stats ||: ln -s /usr/lib/x86_64-linux-gnu/libOpenCL.so.1.0.0 /usr/lib/libOpenCL.so ||: rm -f CMakeCache.txt cmake --debug-trycompile --verbose=1 -DCMAKE_VERBOSE_MAKEFILE=1 -LA -DCMAKE_BUILD_TYPE=$BUILD_TYPE -DSANITIZE=$SANITIZER $CMAKE_FLAGS .. -ninja $NINJA_FLAGS clickhouse-bundle +ninja -j $(($(nproc) / 2)) $NINJA_FLAGS clickhouse-bundle mv ./programs/clickhouse* /output mv ./src/unit_tests_dbms /output find . -name '*.so' -print -exec mv '{}' /output \; diff --git a/docker/server/Dockerfile b/docker/server/Dockerfile index c3950c58437..c15bd89b646 100644 --- a/docker/server/Dockerfile +++ b/docker/server/Dockerfile @@ -1,7 +1,7 @@ FROM ubuntu:20.04 ARG repository="deb https://repo.clickhouse.tech/deb/stable/ main/" -ARG version=20.9.1.* +ARG version=20.10.1.* ARG gosu_ver=1.10 RUN apt-get update \ diff --git a/docker/test/Dockerfile b/docker/test/Dockerfile index bb09fa1de56..ae588af2459 100644 --- a/docker/test/Dockerfile +++ b/docker/test/Dockerfile @@ -1,7 +1,7 @@ FROM ubuntu:18.04 ARG repository="deb https://repo.clickhouse.tech/deb/stable/ main/" -ARG version=20.9.1.* +ARG version=20.10.1.* RUN apt-get update && \ apt-get install -y apt-transport-https dirmngr && \ diff --git a/docker/test/integration/runner/compose/docker_compose_redis.yml b/docker/test/integration/runner/compose/docker_compose_redis.yml index 2c9ace96d0c..72df99ec59b 100644 --- a/docker/test/integration/runner/compose/docker_compose_redis.yml +++ b/docker/test/integration/runner/compose/docker_compose_redis.yml @@ -5,4 +5,4 @@ services: restart: always ports: - 6380:6379 - command: redis-server --requirepass "clickhouse" + command: redis-server --requirepass "clickhouse" --databases 32 diff --git a/docs/en/operations/system-tables/stack_trace.md b/docs/en/operations/system-tables/stack_trace.md index b1714a93a20..44b13047cc3 100644 --- a/docs/en/operations/system-tables/stack_trace.md +++ b/docs/en/operations/system-tables/stack_trace.md @@ -82,8 +82,8 @@ res: /lib/x86_64-linux-gnu/libc-2.27.so - [Introspection Functions](../../sql-reference/functions/introspection.md) — Which introspection functions are available and how to use them. - [system.trace_log](../system-tables/trace_log.md) — Contains stack traces collected by the sampling query profiler. -- [arrayMap](../../sql-reference/functions/higher-order-functions.md#higher_order_functions-array-map) — Description and usage example of the `arrayMap` function. -- [arrayFilter](../../sql-reference/functions/higher-order-functions.md#higher_order_functions-array-filter) — Description and usage example of the `arrayFilter` function. +- [arrayMap](../../sql-reference/functions/array-functions.md#array-map) — Description and usage example of the `arrayMap` function. +- [arrayFilter](../../sql-reference/functions/array-functions.md#array-filter) — Description and usage example of the `arrayFilter` function. [Original article](https://clickhouse.tech/docs/en/operations/system-tables/stack_trace) diff --git a/docs/en/sql-reference/data-types/tuple.md b/docs/en/sql-reference/data-types/tuple.md index 60adb942925..e396006d957 100644 --- a/docs/en/sql-reference/data-types/tuple.md +++ b/docs/en/sql-reference/data-types/tuple.md @@ -7,7 +7,7 @@ toc_title: Tuple(T1, T2, ...) A tuple of elements, each having an individual [type](../../sql-reference/data-types/index.md#data_types). -Tuples are used for temporary column grouping. Columns can be grouped when an IN expression is used in a query, and for specifying certain formal parameters of lambda functions. For more information, see the sections [IN operators](../../sql-reference/operators/in.md) and [Higher order functions](../../sql-reference/functions/higher-order-functions.md). +Tuples are used for temporary column grouping. Columns can be grouped when an IN expression is used in a query, and for specifying certain formal parameters of lambda functions. For more information, see the sections [IN operators](../../sql-reference/operators/in.md) and [Higher order functions](../../sql-reference/functions/index.md#higher-order-functions). Tuples can be the result of a query. In this case, for text formats other than JSON, values are comma-separated in brackets. In JSON formats, tuples are output as arrays (in square brackets). diff --git a/docs/en/sql-reference/functions/arithmetic-functions.md b/docs/en/sql-reference/functions/arithmetic-functions.md index 5d89d6d335b..c4b151f59ce 100644 --- a/docs/en/sql-reference/functions/arithmetic-functions.md +++ b/docs/en/sql-reference/functions/arithmetic-functions.md @@ -1,5 +1,5 @@ --- -toc_priority: 35 +toc_priority: 34 toc_title: Arithmetic --- diff --git a/docs/en/sql-reference/functions/array-functions.md b/docs/en/sql-reference/functions/array-functions.md index 91ecc963b1f..82700a109b5 100644 --- a/docs/en/sql-reference/functions/array-functions.md +++ b/docs/en/sql-reference/functions/array-functions.md @@ -1,9 +1,9 @@ --- -toc_priority: 46 +toc_priority: 35 toc_title: Arrays --- -# Functions for Working with Arrays {#functions-for-working-with-arrays} +# Array Functions {#functions-for-working-with-arrays} ## empty {#function-empty} @@ -241,6 +241,12 @@ SELECT indexOf([1, 3, NULL, NULL], NULL) Elements set to `NULL` are handled as normal values. +## arrayCount(\[func,\] arr1, …) {#array-count} + +Returns the number of elements in the arr array for which func returns something other than 0. If ‘func’ is not specified, it returns the number of non-zero elements in the array. + +Note that the `arrayCount` is a [higher-order function](../../sql-reference/functions/index.md#higher-order-functions). You can pass a lambda function to it as the first argument. + ## countEqual(arr, x) {#countequalarr-x} Returns the number of elements in the array equal to x. Equivalent to arrayCount (elem -\> elem = x, arr). @@ -568,7 +574,7 @@ SELECT arraySort([1, nan, 2, NULL, 3, nan, -4, NULL, inf, -inf]); - `NaN` values are right before `NULL`. - `Inf` values are right before `NaN`. -Note that `arraySort` is a [higher-order function](../../sql-reference/functions/higher-order-functions.md). You can pass a lambda function to it as the first argument. In this case, sorting order is determined by the result of the lambda function applied to the elements of the array. +Note that `arraySort` is a [higher-order function](../../sql-reference/functions/index.md#higher-order-functions). You can pass a lambda function to it as the first argument. In this case, sorting order is determined by the result of the lambda function applied to the elements of the array. Let’s consider the following example: @@ -668,7 +674,7 @@ SELECT arrayReverseSort([1, nan, 2, NULL, 3, nan, -4, NULL, inf, -inf]) as res; - `NaN` values are right before `NULL`. - `-Inf` values are right before `NaN`. -Note that the `arrayReverseSort` is a [higher-order function](../../sql-reference/functions/higher-order-functions.md). You can pass a lambda function to it as the first argument. Example is shown below. +Note that the `arrayReverseSort` is a [higher-order function](../../sql-reference/functions/index.md#higher-order-functions). You can pass a lambda function to it as the first argument. Example is shown below. ``` sql SELECT arrayReverseSort((x) -> -x, [1, 2, 3]) as res; @@ -1120,7 +1126,205 @@ Result: ``` text ┌─arrayAUC([0.1, 0.4, 0.35, 0.8], [0, 0, 1, 1])─┐ │ 0.75 │ -└────────────────────────────────────────---──┘ +└───────────────────────────────────────────────┘ ``` +## arrayMap(func, arr1, …) {#array-map} + +Returns an array obtained from the original application of the `func` function to each element in the `arr` array. + +Examples: + +``` sql +SELECT arrayMap(x -> (x + 2), [1, 2, 3]) as res; +``` + +``` text +┌─res─────┐ +│ [3,4,5] │ +└─────────┘ +``` + +The following example shows how to create a tuple of elements from different arrays: + +``` sql +SELECT arrayMap((x, y) -> (x, y), [1, 2, 3], [4, 5, 6]) AS res +``` + +``` text +┌─res─────────────────┐ +│ [(1,4),(2,5),(3,6)] │ +└─────────────────────┘ +``` + +Note that the `arrayMap` is a [higher-order function](../../sql-reference/functions/index.md#higher-order-functions). You must pass a lambda function to it as the first argument, and it can’t be omitted. + +## arrayFilter(func, arr1, …) {#array-filter} + +Returns an array containing only the elements in `arr1` for which `func` returns something other than 0. + +Examples: + +``` sql +SELECT arrayFilter(x -> x LIKE '%World%', ['Hello', 'abc World']) AS res +``` + +``` text +┌─res───────────┐ +│ ['abc World'] │ +└───────────────┘ +``` + +``` sql +SELECT + arrayFilter( + (i, x) -> x LIKE '%World%', + arrayEnumerate(arr), + ['Hello', 'abc World'] AS arr) + AS res +``` + +``` text +┌─res─┐ +│ [2] │ +└─────┘ +``` + +Note that the `arrayFilter` is a [higher-order function](../../sql-reference/functions/index.md#higher-order-functions). You must pass a lambda function to it as the first argument, and it can’t be omitted. + +## arrayFill(func, arr1, …) {#array-fill} + +Scan through `arr1` from the first element to the last element and replace `arr1[i]` by `arr1[i - 1]` if `func` returns 0. The first element of `arr1` will not be replaced. + +Examples: + +``` sql +SELECT arrayFill(x -> not isNull(x), [1, null, 3, 11, 12, null, null, 5, 6, 14, null, null]) AS res +``` + +``` text +┌─res──────────────────────────────┐ +│ [1,1,3,11,12,12,12,5,6,14,14,14] │ +└──────────────────────────────────┘ +``` + +Note that the `arrayFill` is a [higher-order function](../../sql-reference/functions/index.md#higher-order-functions). You must pass a lambda function to it as the first argument, and it can’t be omitted. + +## arrayReverseFill(func, arr1, …) {#array-reverse-fill} + +Scan through `arr1` from the last element to the first element and replace `arr1[i]` by `arr1[i + 1]` if `func` returns 0. The last element of `arr1` will not be replaced. + +Examples: + +``` sql +SELECT arrayReverseFill(x -> not isNull(x), [1, null, 3, 11, 12, null, null, 5, 6, 14, null, null]) AS res +``` + +``` text +┌─res────────────────────────────────┐ +│ [1,3,3,11,12,5,5,5,6,14,NULL,NULL] │ +└────────────────────────────────────┘ +``` + +Note that the `arrayReverseFilter` is a [higher-order function](../../sql-reference/functions/index.md#higher-order-functions). You must pass a lambda function to it as the first argument, and it can’t be omitted. + +## arraySplit(func, arr1, …) {#array-split} + +Split `arr1` into multiple arrays. When `func` returns something other than 0, the array will be split on the left hand side of the element. The array will not be split before the first element. + +Examples: + +``` sql +SELECT arraySplit((x, y) -> y, [1, 2, 3, 4, 5], [1, 0, 0, 1, 0]) AS res +``` + +``` text +┌─res─────────────┐ +│ [[1,2,3],[4,5]] │ +└─────────────────┘ +``` + +Note that the `arraySplit` is a [higher-order function](../../sql-reference/functions/index.md#higher-order-functions). You must pass a lambda function to it as the first argument, and it can’t be omitted. + +## arrayReverseSplit(func, arr1, …) {#array-reverse-split} + +Split `arr1` into multiple arrays. When `func` returns something other than 0, the array will be split on the right hand side of the element. The array will not be split after the last element. + +Examples: + +``` sql +SELECT arrayReverseSplit((x, y) -> y, [1, 2, 3, 4, 5], [1, 0, 0, 1, 0]) AS res +``` + +``` text +┌─res───────────────┐ +│ [[1],[2,3,4],[5]] │ +└───────────────────┘ +``` + +Note that the `arrayReverseSplit` is a [higher-order function](../../sql-reference/functions/index.md#higher-order-functions). You must pass a lambda function to it as the first argument, and it can’t be omitted. + +## arrayExists(\[func,\] arr1, …) {#arrayexistsfunc-arr1} + +Returns 1 if there is at least one element in `arr` for which `func` returns something other than 0. Otherwise, it returns 0. + +Note that the `arrayExists` is a [higher-order function](../../sql-reference/functions/index.md#higher-order-functions). You can pass a lambda function to it as the first argument. + +## arrayAll(\[func,\] arr1, …) {#arrayallfunc-arr1} + +Returns 1 if `func` returns something other than 0 for all the elements in `arr`. Otherwise, it returns 0. + +Note that the `arrayAll` is a [higher-order function](../../sql-reference/functions/index.md#higher-order-functions). You can pass a lambda function to it as the first argument. + +## arrayFirst(func, arr1, …) {#array-first} + +Returns the first element in the `arr1` array for which `func` returns something other than 0. + +Note that the `arrayFirst` is a [higher-order function](../../sql-reference/functions/index.md#higher-order-functions). You must pass a lambda function to it as the first argument, and it can’t be omitted. + +## arrayFirstIndex(func, arr1, …) {#array-first-index} + +Returns the index of the first element in the `arr1` array for which `func` returns something other than 0. + +Note that the `arrayFirstIndex` is a [higher-order function](../../sql-reference/functions/index.md#higher-order-functions). You must pass a lambda function to it as the first argument, and it can’t be omitted. + +## arraySum(\[func,\] arr1, …) {#array-sum} + +Returns the sum of the `func` values. If the function is omitted, it just returns the sum of the array elements. + +Note that the `arraySum` is a [higher-order function](../../sql-reference/functions/index.md#higher-order-functions). You can pass a lambda function to it as the first argument. + +## arrayCumSum(\[func,\] arr1, …) {#arraycumsumfunc-arr1} + +Returns an array of partial sums of elements in the source array (a running sum). If the `func` function is specified, then the values of the array elements are converted by this function before summing. + +Example: + +``` sql +SELECT arrayCumSum([1, 1, 1, 1]) AS res +``` + +``` text +┌─res──────────┐ +│ [1, 2, 3, 4] │ +└──────────────┘ +``` + +Note that the `arrayCumSum` is a [higher-order function](../../sql-reference/functions/index.md#higher-order-functions). You can pass a lambda function to it as the first argument. + +## arrayCumSumNonNegative(arr) {#arraycumsumnonnegativearr} + +Same as `arrayCumSum`, returns an array of partial sums of elements in the source array (a running sum). Different `arrayCumSum`, when then returned value contains a value less than zero, the value is replace with zero and the subsequent calculation is performed with zero parameters. For example: + +``` sql +SELECT arrayCumSumNonNegative([1, 1, -4, 1]) AS res +``` + +``` text +┌─res───────┐ +│ [1,2,0,1] │ +└───────────┘ +``` +Note that the `arraySumNonNegative` is a [higher-order function](../../sql-reference/functions/index.md#higher-order-functions). You can pass a lambda function to it as the first argument. + [Original article](https://clickhouse.tech/docs/en/query_language/functions/array_functions/) diff --git a/docs/en/sql-reference/functions/higher-order-functions.md b/docs/en/sql-reference/functions/higher-order-functions.md deleted file mode 100644 index 484bdaa12e6..00000000000 --- a/docs/en/sql-reference/functions/higher-order-functions.md +++ /dev/null @@ -1,262 +0,0 @@ ---- -toc_priority: 57 -toc_title: Higher-Order ---- - -# Higher-order Functions {#higher-order-functions} - -## `->` operator, lambda(params, expr) function {#operator-lambdaparams-expr-function} - -Allows describing a lambda function for passing to a higher-order function. The left side of the arrow has a formal parameter, which is any ID, or multiple formal parameters – any IDs in a tuple. The right side of the arrow has an expression that can use these formal parameters, as well as any table columns. - -Examples: `x -> 2 * x, str -> str != Referer.` - -Higher-order functions can only accept lambda functions as their functional argument. - -A lambda function that accepts multiple arguments can be passed to a higher-order function. In this case, the higher-order function is passed several arrays of identical length that these arguments will correspond to. - -For some functions, such as [arrayCount](#higher_order_functions-array-count) or [arraySum](#higher_order_functions-array-count), the first argument (the lambda function) can be omitted. In this case, identical mapping is assumed. - -A lambda function can’t be omitted for the following functions: - -- [arrayMap](#higher_order_functions-array-map) -- [arrayFilter](#higher_order_functions-array-filter) -- [arrayFill](#higher_order_functions-array-fill) -- [arrayReverseFill](#higher_order_functions-array-reverse-fill) -- [arraySplit](#higher_order_functions-array-split) -- [arrayReverseSplit](#higher_order_functions-array-reverse-split) -- [arrayFirst](#higher_order_functions-array-first) -- [arrayFirstIndex](#higher_order_functions-array-first-index) - -### arrayMap(func, arr1, …) {#higher_order_functions-array-map} - -Returns an array obtained from the original application of the `func` function to each element in the `arr` array. - -Examples: - -``` sql -SELECT arrayMap(x -> (x + 2), [1, 2, 3]) as res; -``` - -``` text -┌─res─────┐ -│ [3,4,5] │ -└─────────┘ -``` - -The following example shows how to create a tuple of elements from different arrays: - -``` sql -SELECT arrayMap((x, y) -> (x, y), [1, 2, 3], [4, 5, 6]) AS res -``` - -``` text -┌─res─────────────────┐ -│ [(1,4),(2,5),(3,6)] │ -└─────────────────────┘ -``` - -Note that the first argument (lambda function) can’t be omitted in the `arrayMap` function. - -### arrayFilter(func, arr1, …) {#higher_order_functions-array-filter} - -Returns an array containing only the elements in `arr1` for which `func` returns something other than 0. - -Examples: - -``` sql -SELECT arrayFilter(x -> x LIKE '%World%', ['Hello', 'abc World']) AS res -``` - -``` text -┌─res───────────┐ -│ ['abc World'] │ -└───────────────┘ -``` - -``` sql -SELECT - arrayFilter( - (i, x) -> x LIKE '%World%', - arrayEnumerate(arr), - ['Hello', 'abc World'] AS arr) - AS res -``` - -``` text -┌─res─┐ -│ [2] │ -└─────┘ -``` - -Note that the first argument (lambda function) can’t be omitted in the `arrayFilter` function. - -### arrayFill(func, arr1, …) {#higher_order_functions-array-fill} - -Scan through `arr1` from the first element to the last element and replace `arr1[i]` by `arr1[i - 1]` if `func` returns 0. The first element of `arr1` will not be replaced. - -Examples: - -``` sql -SELECT arrayFill(x -> not isNull(x), [1, null, 3, 11, 12, null, null, 5, 6, 14, null, null]) AS res -``` - -``` text -┌─res──────────────────────────────┐ -│ [1,1,3,11,12,12,12,5,6,14,14,14] │ -└──────────────────────────────────┘ -``` - -Note that the first argument (lambda function) can’t be omitted in the `arrayFill` function. - -### arrayReverseFill(func, arr1, …) {#higher_order_functions-array-reverse-fill} - -Scan through `arr1` from the last element to the first element and replace `arr1[i]` by `arr1[i + 1]` if `func` returns 0. The last element of `arr1` will not be replaced. - -Examples: - -``` sql -SELECT arrayReverseFill(x -> not isNull(x), [1, null, 3, 11, 12, null, null, 5, 6, 14, null, null]) AS res -``` - -``` text -┌─res────────────────────────────────┐ -│ [1,3,3,11,12,5,5,5,6,14,NULL,NULL] │ -└────────────────────────────────────┘ -``` - -Note that the first argument (lambda function) can’t be omitted in the `arrayReverseFill` function. - -### arraySplit(func, arr1, …) {#higher_order_functions-array-split} - -Split `arr1` into multiple arrays. When `func` returns something other than 0, the array will be split on the left hand side of the element. The array will not be split before the first element. - -Examples: - -``` sql -SELECT arraySplit((x, y) -> y, [1, 2, 3, 4, 5], [1, 0, 0, 1, 0]) AS res -``` - -``` text -┌─res─────────────┐ -│ [[1,2,3],[4,5]] │ -└─────────────────┘ -``` - -Note that the first argument (lambda function) can’t be omitted in the `arraySplit` function. - -### arrayReverseSplit(func, arr1, …) {#higher_order_functions-array-reverse-split} - -Split `arr1` into multiple arrays. When `func` returns something other than 0, the array will be split on the right hand side of the element. The array will not be split after the last element. - -Examples: - -``` sql -SELECT arrayReverseSplit((x, y) -> y, [1, 2, 3, 4, 5], [1, 0, 0, 1, 0]) AS res -``` - -``` text -┌─res───────────────┐ -│ [[1],[2,3,4],[5]] │ -└───────────────────┘ -``` - -Note that the first argument (lambda function) can’t be omitted in the `arraySplit` function. - -### arrayCount(\[func,\] arr1, …) {#higher_order_functions-array-count} - -Returns the number of elements in the arr array for which func returns something other than 0. If ‘func’ is not specified, it returns the number of non-zero elements in the array. - -### arrayExists(\[func,\] arr1, …) {#arrayexistsfunc-arr1} - -Returns 1 if there is at least one element in ‘arr’ for which ‘func’ returns something other than 0. Otherwise, it returns 0. - -### arrayAll(\[func,\] arr1, …) {#arrayallfunc-arr1} - -Returns 1 if ‘func’ returns something other than 0 for all the elements in ‘arr’. Otherwise, it returns 0. - -### arraySum(\[func,\] arr1, …) {#higher-order-functions-array-sum} - -Returns the sum of the ‘func’ values. If the function is omitted, it just returns the sum of the array elements. - -### arrayFirst(func, arr1, …) {#higher_order_functions-array-first} - -Returns the first element in the ‘arr1’ array for which ‘func’ returns something other than 0. - -Note that the first argument (lambda function) can’t be omitted in the `arrayFirst` function. - -### arrayFirstIndex(func, arr1, …) {#higher_order_functions-array-first-index} - -Returns the index of the first element in the ‘arr1’ array for which ‘func’ returns something other than 0. - -Note that the first argument (lambda function) can’t be omitted in the `arrayFirstIndex` function. - -### arrayCumSum(\[func,\] arr1, …) {#arraycumsumfunc-arr1} - -Returns an array of partial sums of elements in the source array (a running sum). If the `func` function is specified, then the values of the array elements are converted by this function before summing. - -Example: - -``` sql -SELECT arrayCumSum([1, 1, 1, 1]) AS res -``` - -``` text -┌─res──────────┐ -│ [1, 2, 3, 4] │ -└──────────────┘ -``` - -### arrayCumSumNonNegative(arr) {#arraycumsumnonnegativearr} - -Same as `arrayCumSum`, returns an array of partial sums of elements in the source array (a running sum). Different `arrayCumSum`, when then returned value contains a value less than zero, the value is replace with zero and the subsequent calculation is performed with zero parameters. For example: - -``` sql -SELECT arrayCumSumNonNegative([1, 1, -4, 1]) AS res -``` - -``` text -┌─res───────┐ -│ [1,2,0,1] │ -└───────────┘ -``` - -### arraySort(\[func,\] arr1, …) {#arraysortfunc-arr1} - -Returns an array as result of sorting the elements of `arr1` in ascending order. If the `func` function is specified, sorting order is determined by the result of the function `func` applied to the elements of array (arrays) - -The [Schwartzian transform](https://en.wikipedia.org/wiki/Schwartzian_transform) is used to improve sorting efficiency. - -Example: - -``` sql -SELECT arraySort((x, y) -> y, ['hello', 'world'], [2, 1]); -``` - -``` text -┌─res────────────────┐ -│ ['world', 'hello'] │ -└────────────────────┘ -``` - -For more information about the `arraySort` method, see the [Functions for Working With Arrays](../../sql-reference/functions/array-functions.md#array_functions-sort) section. - -### arrayReverseSort(\[func,\] arr1, …) {#arrayreversesortfunc-arr1} - -Returns an array as result of sorting the elements of `arr1` in descending order. If the `func` function is specified, sorting order is determined by the result of the function `func` applied to the elements of array (arrays). - -Example: - -``` sql -SELECT arrayReverseSort((x, y) -> y, ['hello', 'world'], [2, 1]) as res; -``` - -``` text -┌─res───────────────┐ -│ ['hello','world'] │ -└───────────────────┘ -``` - -For more information about the `arrayReverseSort` method, see the [Functions for Working With Arrays](../../sql-reference/functions/array-functions.md#array_functions-reverse-sort) section. - -[Original article](https://clickhouse.tech/docs/en/query_language/functions/higher_order_functions/) diff --git a/docs/en/sql-reference/functions/index.md b/docs/en/sql-reference/functions/index.md index 65514eff673..1a0b9d83b5f 100644 --- a/docs/en/sql-reference/functions/index.md +++ b/docs/en/sql-reference/functions/index.md @@ -44,6 +44,21 @@ Functions have the following behaviors: Functions can’t change the values of their arguments – any changes are returned as the result. Thus, the result of calculating separate functions does not depend on the order in which the functions are written in the query. +## Higher-order functions, `->` operator and lambda(params, expr) function {#higher-order-functions} + +Higher-order functions can only accept lambda functions as their functional argument. To pass a lambda function to a higher-order function use `->` operator. The left side of the arrow has a formal parameter, which is any ID, or multiple formal parameters – any IDs in a tuple. The right side of the arrow has an expression that can use these formal parameters, as well as any table columns. + +Examples: + +``` +x -> 2 * x +str -> str != Referer +``` + +A lambda function that accepts multiple arguments can also be passed to a higher-order function. In this case, the higher-order function is passed several arrays of identical length that these arguments will correspond to. + +For some functions the first argument (the lambda function) can be omitted. In this case, identical mapping is assumed. + ## Error Handling {#error-handling} Some functions might throw an exception if the data is invalid. In this case, the query is canceled and an error text is returned to the client. For distributed processing, when an exception occurs on one of the servers, the other servers also attempt to abort the query. diff --git a/docs/en/sql-reference/functions/introspection.md b/docs/en/sql-reference/functions/introspection.md index 6848f74da1f..1fd39c704c5 100644 --- a/docs/en/sql-reference/functions/introspection.md +++ b/docs/en/sql-reference/functions/introspection.md @@ -98,7 +98,7 @@ LIMIT 1 \G ``` -The [arrayMap](../../sql-reference/functions/higher-order-functions.md#higher_order_functions-array-map) function allows to process each individual element of the `trace` array by the `addressToLine` function. The result of this processing you see in the `trace_source_code_lines` column of output. +The [arrayMap](../../sql-reference/functions/array-functions.md#array-map) function allows to process each individual element of the `trace` array by the `addressToLine` function. The result of this processing you see in the `trace_source_code_lines` column of output. ``` text Row 1: @@ -184,7 +184,7 @@ LIMIT 1 \G ``` -The [arrayMap](../../sql-reference/functions/higher-order-functions.md#higher_order_functions-array-map) function allows to process each individual element of the `trace` array by the `addressToSymbols` function. The result of this processing you see in the `trace_symbols` column of output. +The [arrayMap](../../sql-reference/functions/array-functions.md#array-map) function allows to process each individual element of the `trace` array by the `addressToSymbols` function. The result of this processing you see in the `trace_symbols` column of output. ``` text Row 1: @@ -281,7 +281,7 @@ LIMIT 1 \G ``` -The [arrayMap](../../sql-reference/functions/higher-order-functions.md#higher_order_functions-array-map) function allows to process each individual element of the `trace` array by the `demangle` function. The result of this processing you see in the `trace_functions` column of output. +The [arrayMap](../../sql-reference/functions/array-functions.md#array-map) function allows to process each individual element of the `trace` array by the `demangle` function. The result of this processing you see in the `trace_functions` column of output. ``` text Row 1: diff --git a/docs/ru/engines/table-engines/mergetree-family/mergetree.md b/docs/ru/engines/table-engines/mergetree-family/mergetree.md index f04fbae18ba..3c80fe663f1 100644 --- a/docs/ru/engines/table-engines/mergetree-family/mergetree.md +++ b/docs/ru/engines/table-engines/mergetree-family/mergetree.md @@ -1,3 +1,8 @@ +--- +toc_priority: 30 +toc_title: MergeTree +--- + # MergeTree {#table_engines-mergetree} Движок `MergeTree`, а также другие движки этого семейства (`*MergeTree`) — это наиболее функциональные движки таблиц ClickHouse. @@ -28,8 +33,8 @@ CREATE TABLE [IF NOT EXISTS] [db.]table_name [ON CLUSTER cluster] INDEX index_name1 expr1 TYPE type1(...) GRANULARITY value1, INDEX index_name2 expr2 TYPE type2(...) GRANULARITY value2 ) ENGINE = MergeTree() +ORDER BY expr [PARTITION BY expr] -[ORDER BY expr] [PRIMARY KEY expr] [SAMPLE BY expr] [TTL expr [DELETE|TO DISK 'xxx'|TO VOLUME 'xxx'], ...] @@ -38,27 +43,42 @@ CREATE TABLE [IF NOT EXISTS] [db.]table_name [ON CLUSTER cluster] Описание параметров смотрите в [описании запроса CREATE](../../../engines/table-engines/mergetree-family/mergetree.md). -!!! note "Note" +!!! note "Примечание" `INDEX` — экспериментальная возможность, смотрите [Индексы пропуска данных](#table_engine-mergetree-data_skipping-indexes). ### Секции запроса {#mergetree-query-clauses} - `ENGINE` — имя и параметры движка. `ENGINE = MergeTree()`. `MergeTree` не имеет параметров. -- `PARTITION BY` — [ключ партиционирования](custom-partitioning-key.md). Для партиционирования по месяцам используйте выражение `toYYYYMM(date_column)`, где `date_column` — столбец с датой типа [Date](../../../engines/table-engines/mergetree-family/mergetree.md). В этом случае имена партиций имеют формат `"YYYYMM"`. +- `ORDER BY` — ключ сортировки. + + Кортеж столбцов или произвольных выражений. Пример: `ORDER BY (CounterID, EventDate)`. -- `ORDER BY` — ключ сортировки. Кортеж столбцов или произвольных выражений. Пример: `ORDER BY (CounterID, EventDate)`. + ClickHouse использует ключ сортировки в качестве первичного ключа, если первичный ключ не задан в секции `PRIMARY KEY`. -- `PRIMARY KEY` — первичный ключ, если он [отличается от ключа сортировки](#pervichnyi-kliuch-otlichnyi-ot-kliucha-sortirovki). По умолчанию первичный ключ совпадает с ключом сортировки (который задаётся секцией `ORDER BY`.) Поэтому в большинстве случаев секцию `PRIMARY KEY` отдельно указывать не нужно. + Чтобы отключить сортировку, используйте синтаксис `ORDER BY tuple()`. Смотрите [выбор первичного ключа](#vybor-pervichnogo-kliucha). -- `SAMPLE BY` — выражение для сэмплирования. Если используется выражение для сэмплирования, то первичный ключ должен содержать его. Пример: `SAMPLE BY intHash32(UserID) ORDER BY (CounterID, EventDate, intHash32(UserID))`. +- `PARTITION BY` — [ключ партиционирования](custom-partitioning-key.md). Необязательный параметр. -- `TTL` — список правил, определяющих длительности хранения строк, а также задающих правила перемещения частей на определённые тома или диски. Выражение должно возвращать столбец `Date` или `DateTime`. Пример: `TTL date + INTERVAL 1 DAY`. - - Тип правила `DELETE|TO DISK 'xxx'|TO VOLUME 'xxx'` указывает действие, которое будет выполнено с частью, удаление строк (прореживание), перемещение (при выполнении условия для всех строк части) на определённый диск (`TO DISK 'xxx'`) или том (`TO VOLUME 'xxx'`). - - Поведение по умолчанию соответствует удалению строк (`DELETE`). В списке правил может быть указано только одно выражение с поведением `DELETE`. - - Дополнительные сведения смотрите в разделе [TTL для столбцов и таблиц](#table_engine-mergetree-ttl) + Для партиционирования по месяцам используйте выражение `toYYYYMM(date_column)`, где `date_column` — столбец с датой типа [Date](../../../engines/table-engines/mergetree-family/mergetree.md). В этом случае имена партиций имеют формат `"YYYYMM"`. -- `SETTINGS` — дополнительные параметры, регулирующие поведение `MergeTree`: +- `PRIMARY KEY` — первичный ключ, если он [отличается от ключа сортировки](#pervichnyi-kliuch-otlichnyi-ot-kliucha-sortirovki). Необязательный параметр. + + По умолчанию первичный ключ совпадает с ключом сортировки (который задаётся секцией `ORDER BY`.) Поэтому в большинстве случаев секцию `PRIMARY KEY` отдельно указывать не нужно. + +- `SAMPLE BY` — выражение для сэмплирования. Необязательный параметр. + + Если используется выражение для сэмплирования, то первичный ключ должен содержать его. Пример: `SAMPLE BY intHash32(UserID) ORDER BY (CounterID, EventDate, intHash32(UserID))`. + +- `TTL` — список правил, определяющих длительности хранения строк, а также задающих правила перемещения частей на определённые тома или диски. Необязательный параметр. + + Выражение должно возвращать столбец `Date` или `DateTime`. Пример: `TTL date + INTERVAL 1 DAY`. + + Тип правила `DELETE|TO DISK 'xxx'|TO VOLUME 'xxx'` указывает действие, которое будет выполнено с частью, удаление строк (прореживание), перемещение (при выполнении условия для всех строк части) на определённый диск (`TO DISK 'xxx'`) или том (`TO VOLUME 'xxx'`). Поведение по умолчанию соответствует удалению строк (`DELETE`). В списке правил может быть указано только одно выражение с поведением `DELETE`. + + Дополнительные сведения смотрите в разделе [TTL для столбцов и таблиц](#table_engine-mergetree-ttl) + +- `SETTINGS` — дополнительные параметры, регулирующие поведение `MergeTree` (необязательные): - `index_granularity` — максимальное количество строк данных между засечками индекса. По умолчанию — 8192. Смотрите [Хранение данных](#mergetree-data-storage). - `index_granularity_bytes` — максимальный размер гранул данных в байтах. По умолчанию — 10Mb. Чтобы ограничить размер гранул только количеством строк, установите значение 0 (не рекомендовано). Смотрите [Хранение данных](#mergetree-data-storage). @@ -180,6 +200,14 @@ ClickHouse не требует уникального первичного кл Длинный первичный ключ будет негативно влиять на производительность вставки и потребление памяти, однако на производительность ClickHouse при запросах `SELECT` лишние столбцы в первичном ключе не влияют. +Вы можете создать таблицу без первичного ключа, используя синтаксис `ORDER BY tuple()`. В этом случае ClickHouse хранит данные в порядке вставки. Если вы хотите сохранить порядок данных при вставке данных с помощью запросов `INSERT ... SELECT`, установите [max\_insert\_threads = 1](../../../operations/settings/settings.md#settings-max-insert-threads). + +Чтобы выбрать данные в первоначальном порядке, используйте +[однопоточные](../../../operations/settings/settings.md#settings-max_threads) запросы `SELECT. + + + + ### Первичный ключ, отличный от ключа сортировки {#pervichnyi-kliuch-otlichnyi-ot-kliucha-sortirovki} Существует возможность задать первичный ключ (выражение, значения которого будут записаны в индексный файл для diff --git a/docs/ru/interfaces/third-party/gui.md b/docs/ru/interfaces/third-party/gui.md index a872e35ce0b..f7eaa5cc77f 100644 --- a/docs/ru/interfaces/third-party/gui.md +++ b/docs/ru/interfaces/third-party/gui.md @@ -93,6 +93,10 @@ [cickhouse-plantuml](https://pypi.org/project/clickhouse-plantuml/) — скрипт, генерирующий [PlantUML](https://plantuml.com/) диаграммы схем таблиц. +### xeus-clickhouse {#xeus-clickhouse} + +[xeus-clickhouse](https://github.com/wangfenjin/xeus-clickhouse) — это ядро Jupyter для ClickHouse, которое поддерживает запрос ClickHouse-данных с использованием SQL в Jupyter. + ## Коммерческие {#kommercheskie} ### DataGrip {#datagrip} diff --git a/docs/ru/operations/system-tables/index.md b/docs/ru/operations/system-tables/index.md index 95715cd84c4..6fa989d3d0d 100644 --- a/docs/ru/operations/system-tables/index.md +++ b/docs/ru/operations/system-tables/index.md @@ -7,10 +7,38 @@ toc_title: Системные таблицы ## Введение {#system-tables-introduction} -Системные таблицы используются для реализации части функциональности системы, а также предоставляют доступ к информации о работе системы. -Вы не можете удалить системную таблицу (хотя можете сделать DETACH). -Для системных таблиц нет файлов с данными на диске и файлов с метаданными. Сервер создаёт все системные таблицы при старте. -В системные таблицы нельзя записывать данные - можно только читать. -Системные таблицы расположены в базе данных system. +Системные таблицы содержат информацию о: + +- Состоянии сервера, процессов и окружении. +- Внутренних процессах сервера. + +Системные таблицы: + +- Находятся в базе данных `system`. +- Доступны только для чтения данных. +- Не могут быть удалены или изменены, но их можно отсоединить. + +Системные таблицы `metric_log`, `query_log`, `query_thread_log`, `trace_log` системные таблицы хранят данные в файловой системе. Остальные системные таблицы хранят свои данные в оперативной памяти. Сервер ClickHouse создает такие системные таблицы при запуске. + +### Источники системных показателей + +Для сбора системных показателей сервер ClickHouse использует: + +- Возможности `CAP_NET_ADMIN`. +- [procfs](https://ru.wikipedia.org/wiki/Procfs) (только Linux). + +**procfs** + +Если для сервера ClickHouse не включено `CAP_NET_ADMIN`, он пытается обратиться к `ProcfsMetricsProvider`. `ProcfsMetricsProvider` позволяет собирать системные показатели для каждого запроса (для CPU и I/O). + +Если procfs поддерживается и включена в системе, то сервер ClickHouse собирает следующие системные показатели: + +- `OSCPUVirtualTimeMicroseconds` +- `OSCPUWaitMicroseconds` +- `OSIOWaitMicroseconds` +- `OSReadChars` +- `OSWriteChars` +- `OSReadBytes` +- `OSWriteBytes` [Оригинальная статья](https://clickhouse.tech/docs/ru/operations/system-tables/) diff --git a/docs/ru/operations/system-tables/stack_trace.md b/docs/ru/operations/system-tables/stack_trace.md index 966a07633d8..0689e15c35c 100644 --- a/docs/ru/operations/system-tables/stack_trace.md +++ b/docs/ru/operations/system-tables/stack_trace.md @@ -82,7 +82,7 @@ res: /lib/x86_64-linux-gnu/libc-2.27.so - [Функции интроспекции](../../sql-reference/functions/introspection.md) — Что такое функции интроспекции и как их использовать. - [system.trace_log](../../operations/system-tables/trace_log.md#system_tables-trace_log) — Содержит трассировки стека, собранные профилировщиком выборочных запросов. -- [arrayMap](../../sql-reference/functions/higher-order-functions.md#higher_order_functions-array-map) — Описание и пример использования функции `arrayMap`. -- [arrayFilter](../../sql-reference/functions/higher-order-functions.md#higher_order_functions-array-filter) — Описание и пример использования функции `arrayFilter`. +- [arrayMap](../../sql-reference/functions/array-functions.md#array-map) — Описание и пример использования функции `arrayMap`. +- [arrayFilter](../../sql-reference/functions/array-functions.md#array-filter) — Описание и пример использования функции `arrayFilter`. [Оригинальная статья](https://clickhouse.tech/docs/ru/operations/system_tables/stack_trace) diff --git a/docs/ru/sql-reference/data-types/simpleaggregatefunction.md b/docs/ru/sql-reference/data-types/simpleaggregatefunction.md index d36dc87e8ba..52f0412a177 100644 --- a/docs/ru/sql-reference/data-types/simpleaggregatefunction.md +++ b/docs/ru/sql-reference/data-types/simpleaggregatefunction.md @@ -9,6 +9,7 @@ The following aggregate functions are supported: - [`min`](../../sql-reference/aggregate-functions/reference/min.md#agg_function-min) - [`max`](../../sql-reference/aggregate-functions/reference/max.md#agg_function-max) - [`sum`](../../sql-reference/aggregate-functions/reference/sum.md#agg_function-sum) +- [`sumWithOverflow`](../../sql-reference/aggregate-functions/reference/sumwithoverflow.md#sumwithoverflowx) - [`groupBitAnd`](../../sql-reference/aggregate-functions/reference/groupbitand.md#groupbitand) - [`groupBitOr`](../../sql-reference/aggregate-functions/reference/groupbitor.md#groupbitor) - [`groupBitXor`](../../sql-reference/aggregate-functions/reference/groupbitxor.md#groupbitxor) diff --git a/docs/ru/sql-reference/data-types/tuple.md b/docs/ru/sql-reference/data-types/tuple.md index 566a582eb95..0a1089d1aef 100644 --- a/docs/ru/sql-reference/data-types/tuple.md +++ b/docs/ru/sql-reference/data-types/tuple.md @@ -2,7 +2,7 @@ Кортеж из элементов любого [типа](index.md#data_types). Элементы кортежа могут быть одного или разных типов. -Кортежи используются для временной группировки столбцов. Столбцы могут группироваться при использовании выражения IN в запросе, а также для указания нескольких формальных параметров лямбда-функций. Подробнее смотрите разделы [Операторы IN](../../sql-reference/data-types/tuple.md), [Функции высшего порядка](../../sql-reference/functions/higher-order-functions.md#higher-order-functions). +Кортежи используются для временной группировки столбцов. Столбцы могут группироваться при использовании выражения IN в запросе, а также для указания нескольких формальных параметров лямбда-функций. Подробнее смотрите разделы [Операторы IN](../../sql-reference/data-types/tuple.md), [Функции высшего порядка](../../sql-reference/functions/index.md#higher-order-functions). Кортежи могут быть результатом запроса. В этом случае, в текстовых форматах кроме JSON, значения выводятся в круглых скобках через запятую. В форматах JSON, кортежи выводятся в виде массивов (в квадратных скобках). diff --git a/docs/ru/sql-reference/functions/array-functions.md b/docs/ru/sql-reference/functions/array-functions.md index cb1d179be47..91c0443c85d 100644 --- a/docs/ru/sql-reference/functions/array-functions.md +++ b/docs/ru/sql-reference/functions/array-functions.md @@ -1,4 +1,4 @@ -# Функции по работе с массивами {#funktsii-po-rabote-s-massivami} +# Массивы {#functions-for-working-with-arrays} ## empty {#function-empty} @@ -186,6 +186,13 @@ SELECT indexOf([1, 3, NULL, NULL], NULL) Элементы, равные `NULL`, обрабатываются как обычные значения. +## arrayCount(\[func,\] arr1, …) {#array-count} + +Возвращает количество элементов массива `arr`, для которых функция `func` возвращает не 0. Если `func` не указана - возвращает количество ненулевых элементов массива. + +Функция `arrayCount` является [функцией высшего порядка](../../sql-reference/functions/index.md#higher-order-functions) — в качестве первого аргумента ей можно передать лямбда-функцию. + + ## countEqual(arr, x) {#countequalarr-x} Возвращает количество элементов массива, равных x. Эквивалентно arrayCount(elem -\> elem = x, arr). @@ -513,7 +520,7 @@ SELECT arraySort([1, nan, 2, NULL, 3, nan, -4, NULL, inf, -inf]); - Значения `NaN` идут перед `NULL`. - Значения `Inf` идут перед `NaN`. -Функция `arraySort` является [функцией высшего порядка](higher-order-functions.md) — в качестве первого аргумента ей можно передать лямбда-функцию. В этом случае порядок сортировки определяется результатом применения лямбда-функции на элементы массива. +Функция `arraySort` является [функцией высшего порядка](../../sql-reference/functions/index.md#higher-order-functions) — в качестве первого аргумента ей можно передать лямбда-функцию. В этом случае порядок сортировки определяется результатом применения лямбда-функции на элементы массива. Рассмотрим пример: @@ -613,7 +620,7 @@ SELECT arrayReverseSort([1, nan, 2, NULL, 3, nan, -4, NULL, inf, -inf]) as res; - Значения `NaN` идут перед `NULL`. - Значения `-Inf` идут перед `NaN`. -Функция `arrayReverseSort` является [функцией высшего порядка](higher-order-functions.md). Вы можете передать ей в качестве первого аргумента лямбда-функцию. Например: +Функция `arrayReverseSort` является [функцией высшего порядка](../../sql-reference/functions/index.md#higher-order-functions) — в качестве первого аргумента ей можно передать лямбда-функцию. Например: ``` sql SELECT arrayReverseSort((x) -> -x, [1, 2, 3]) as res; @@ -1036,6 +1043,116 @@ SELECT arrayZip(['a', 'b', 'c'], [5, 2, 1]) └──────────────────────────────────────┘ ``` +## arrayMap(func, arr1, …) {#array-map} + +Возвращает массив, полученный на основе результатов применения функции `func` к каждому элементу массива `arr`. + +Примеры: + +``` sql +SELECT arrayMap(x -> (x + 2), [1, 2, 3]) as res; +``` + +``` text +┌─res─────┐ +│ [3,4,5] │ +└─────────┘ +``` + +Следующий пример показывает, как создать кортежи из элементов разных массивов: + +``` sql +SELECT arrayMap((x, y) -> (x, y), [1, 2, 3], [4, 5, 6]) AS res +``` + +``` text +┌─res─────────────────┐ +│ [(1,4),(2,5),(3,6)] │ +└─────────────────────┘ +``` + +Функция `arrayMap` является [функцией высшего порядка](../../sql-reference/functions/index.md#higher-order-functions) — в качестве первого аргумента ей нужно передать лямбда-функцию, и этот аргумент не может быть опущен. + +## arrayFilter(func, arr1, …) {#array-filter} + +Возвращает массив, содержащий только те элементы массива `arr1`, для которых функция `func` возвращает не 0. + +Примеры: + +``` sql +SELECT arrayFilter(x -> x LIKE '%World%', ['Hello', 'abc World']) AS res +``` + +``` text +┌─res───────────┐ +│ ['abc World'] │ +└───────────────┘ +``` + +``` sql +SELECT + arrayFilter( + (i, x) -> x LIKE '%World%', + arrayEnumerate(arr), + ['Hello', 'abc World'] AS arr) + AS res +``` + +``` text +┌─res─┐ +│ [2] │ +└─────┘ +``` + +Функция `arrayFilter` является [функцией высшего порядка](../../sql-reference/functions/index.md#higher-order-functions) — в качестве первого аргумента ей нужно передать лямбда-функцию, и этот аргумент не может быть опущен. + +## arrayExists(\[func,\] arr1, …) {#arrayexistsfunc-arr1} + +Возвращает 1, если существует хотя бы один элемент массива `arr`, для которого функция func возвращает не 0. Иначе возвращает 0. + +Функция `arrayExists` является [функцией высшего порядка](../../sql-reference/functions/index.md#higher-order-functions) - в качестве первого аргумента ей можно передать лямбда-функцию. + +## arrayAll(\[func,\] arr1, …) {#arrayallfunc-arr1} + +Возвращает 1, если для всех элементов массива `arr`, функция `func` возвращает не 0. Иначе возвращает 0. + +Функция `arrayAll` является [функцией высшего порядка](../../sql-reference/functions/index.md#higher-order-functions) - в качестве первого аргумента ей можно передать лямбда-функцию. + +## arrayFirst(func, arr1, …) {#array-first} + +Возвращает первый элемент массива `arr1`, для которого функция func возвращает не 0. + +Функция `arrayFirst` является [функцией высшего порядка](../../sql-reference/functions/index.md#higher-order-functions) — в качестве первого аргумента ей нужно передать лямбда-функцию, и этот аргумент не может быть опущен. + +## arrayFirstIndex(func, arr1, …) {#array-first-index} + +Возвращает индекс первого элемента массива `arr1`, для которого функция func возвращает не 0. + +Функция `arrayFirstIndex` является [функцией высшего порядка](../../sql-reference/functions/index.md#higher-order-functions) — в качестве первого аргумента ей нужно передать лямбда-функцию, и этот аргумент не может быть опущен. + +## arraySum(\[func,\] arr1, …) {#array-sum} + +Возвращает сумму значений функции `func`. Если функция не указана - просто возвращает сумму элементов массива. + +Функция `arraySum` является [функцией высшего порядка](../../sql-reference/functions/index.md#higher-order-functions) - в качестве первого аргумента ей можно передать лямбда-функцию. + +## arrayCumSum(\[func,\] arr1, …) {#arraycumsumfunc-arr1} + +Возвращает массив из частичных сумм элементов исходного массива (сумма с накоплением). Если указана функция `func`, то значения элементов массива преобразуются этой функцией перед суммированием. + +Функция `arrayCumSum` является [функцией высшего порядка](../../sql-reference/functions/index.md#higher-order-functions) - в качестве первого аргумента ей можно передать лямбда-функцию. + +Пример: + +``` sql +SELECT arrayCumSum([1, 1, 1, 1]) AS res +``` + +``` text +┌─res──────────┐ +│ [1, 2, 3, 4] │ +└──────────────┘ + ## arrayAUC {#arrayauc} Вычисляет площадь под кривой. diff --git a/docs/ru/sql-reference/functions/higher-order-functions.md b/docs/ru/sql-reference/functions/higher-order-functions.md deleted file mode 100644 index cd3dee5b1a7..00000000000 --- a/docs/ru/sql-reference/functions/higher-order-functions.md +++ /dev/null @@ -1,167 +0,0 @@ -# Функции высшего порядка {#higher-order-functions} - -## Оператор `->`, функция lambda(params, expr) {#operator-funktsiia-lambdaparams-expr} - -Позволяет описать лямбда-функцию для передачи в функцию высшего порядка. Слева от стрелочки стоит формальный параметр - произвольный идентификатор, или несколько формальных параметров - произвольные идентификаторы в кортеже. Справа от стрелочки стоит выражение, в котором могут использоваться эти формальные параметры, а также любые столбцы таблицы. - -Примеры: `x -> 2 * x, str -> str != Referer.` - -Функции высшего порядка, в качестве своего функционального аргумента могут принимать только лямбда-функции. - -В функции высшего порядка может быть передана лямбда-функция, принимающая несколько аргументов. В этом случае, в функцию высшего порядка передаётся несколько массивов одинаковых длин, которым эти аргументы будут соответствовать. - -Для некоторых функций, например [arrayCount](#higher_order_functions-array-count) или [arraySum](#higher_order_functions-array-sum), первый аргумент (лямбда-функция) может отсутствовать. В этом случае, подразумевается тождественное отображение. - -Для функций, перечисленных ниже, лямбда-функцию должна быть указана всегда: - -- [arrayMap](#higher_order_functions-array-map) -- [arrayFilter](#higher_order_functions-array-filter) -- [arrayFirst](#higher_order_functions-array-first) -- [arrayFirstIndex](#higher_order_functions-array-first-index) - -### arrayMap(func, arr1, …) {#higher_order_functions-array-map} - -Вернуть массив, полученный на основе результатов применения функции `func` к каждому элементу массива `arr`. - -Примеры: - -``` sql -SELECT arrayMap(x -> (x + 2), [1, 2, 3]) as res; -``` - -``` text -┌─res─────┐ -│ [3,4,5] │ -└─────────┘ -``` - -Следующий пример показывает, как создать кортежи из элементов разных массивов: - -``` sql -SELECT arrayMap((x, y) -> (x, y), [1, 2, 3], [4, 5, 6]) AS res -``` - -``` text -┌─res─────────────────┐ -│ [(1,4),(2,5),(3,6)] │ -└─────────────────────┘ -``` - -Обратите внимание, что у функции `arrayMap` первый аргумент (лямбда-функция) не может быть опущен. - -### arrayFilter(func, arr1, …) {#higher_order_functions-array-filter} - -Вернуть массив, содержащий только те элементы массива `arr1`, для которых функция `func` возвращает не 0. - -Примеры: - -``` sql -SELECT arrayFilter(x -> x LIKE '%World%', ['Hello', 'abc World']) AS res -``` - -``` text -┌─res───────────┐ -│ ['abc World'] │ -└───────────────┘ -``` - -``` sql -SELECT - arrayFilter( - (i, x) -> x LIKE '%World%', - arrayEnumerate(arr), - ['Hello', 'abc World'] AS arr) - AS res -``` - -``` text -┌─res─┐ -│ [2] │ -└─────┘ -``` - -Обратите внимание, что у функции `arrayFilter` первый аргумент (лямбда-функция) не может быть опущен. - -### arrayCount(\[func,\] arr1, …) {#higher_order_functions-array-count} - -Вернуть количество элементов массива `arr`, для которых функция func возвращает не 0. Если func не указана - вернуть количество ненулевых элементов массива. - -### arrayExists(\[func,\] arr1, …) {#arrayexistsfunc-arr1} - -Вернуть 1, если существует хотя бы один элемент массива `arr`, для которого функция func возвращает не 0. Иначе вернуть 0. - -### arrayAll(\[func,\] arr1, …) {#arrayallfunc-arr1} - -Вернуть 1, если для всех элементов массива `arr`, функция `func` возвращает не 0. Иначе вернуть 0. - -### arraySum(\[func,\] arr1, …) {#higher_order_functions-array-sum} - -Вернуть сумму значений функции `func`. Если функция не указана - просто вернуть сумму элементов массива. - -### arrayFirst(func, arr1, …) {#higher_order_functions-array-first} - -Вернуть первый элемент массива `arr1`, для которого функция func возвращает не 0. - -Обратите внимание, что у функции `arrayFirst` первый аргумент (лямбда-функция) не может быть опущен. - -### arrayFirstIndex(func, arr1, …) {#higher_order_functions-array-first-index} - -Вернуть индекс первого элемента массива `arr1`, для которого функция func возвращает не 0. - -Обратите внимание, что у функции `arrayFirstFilter` первый аргумент (лямбда-функция) не может быть опущен. - -### arrayCumSum(\[func,\] arr1, …) {#arraycumsumfunc-arr1} - -Возвращает массив из частичных сумм элементов исходного массива (сумма с накоплением). Если указана функция `func`, то значения элементов массива преобразуются этой функцией перед суммированием. - -Пример: - -``` sql -SELECT arrayCumSum([1, 1, 1, 1]) AS res -``` - -``` text -┌─res──────────┐ -│ [1, 2, 3, 4] │ -└──────────────┘ -``` - -### arraySort(\[func,\] arr1, …) {#arraysortfunc-arr1} - -Возвращает отсортированный в восходящем порядке массив `arr1`. Если задана функция `func`, то порядок сортировки определяется результатом применения функции `func` на элементы массива (массивов). - -Для улучшения эффективности сортировки применяется [Преобразование Шварца](https://ru.wikipedia.org/wiki/%D0%9F%D1%80%D0%B5%D0%BE%D0%B1%D1%80%D0%B0%D0%B7%D0%BE%D0%B2%D0%B0%D0%BD%D0%B8%D0%B5_%D0%A8%D0%B2%D0%B0%D1%80%D1%86%D0%B0). - -Пример: - -``` sql -SELECT arraySort((x, y) -> y, ['hello', 'world'], [2, 1]); -``` - -``` text -┌─res────────────────┐ -│ ['world', 'hello'] │ -└────────────────────┘ -``` - -Подробная информация о методе `arraySort` приведена в разделе [Функции по работе с массивами](array-functions.md#array_functions-sort). - -### arrayReverseSort(\[func,\] arr1, …) {#arrayreversesortfunc-arr1} - -Возвращает отсортированный в нисходящем порядке массив `arr1`. Если задана функция `func`, то порядок сортировки определяется результатом применения функции `func` на элементы массива (массивов). - -Пример: - -``` sql -SELECT arrayReverseSort((x, y) -> y, ['hello', 'world'], [2, 1]) as res; -``` - -``` text -┌─res───────────────┐ -│ ['hello','world'] │ -└───────────────────┘ -``` - -Подробная информация о методе `arrayReverseSort` приведена в разделе [Функции по работе с массивами](array-functions.md#array_functions-reverse-sort). - -[Оригинальная статья](https://clickhouse.tech/docs/ru/query_language/functions/higher_order_functions/) diff --git a/docs/ru/sql-reference/functions/index.md b/docs/ru/sql-reference/functions/index.md index 06d3d892cf9..9c1c0c5ca9d 100644 --- a/docs/ru/sql-reference/functions/index.md +++ b/docs/ru/sql-reference/functions/index.md @@ -38,6 +38,20 @@ Функции не могут поменять значения своих аргументов - любые изменения возвращаются в качестве результата. Соответственно, от порядка записи функций в запросе, результат вычислений отдельных функций не зависит. +## Функции высшего порядка, оператор `->` и функция lambda(params, expr) {#higher-order-functions} + +Функции высшего порядка, в качестве своего функционального аргумента могут принимать только лямбда-функции. Чтобы передать лямбда-функцию в функцию высшего порядка, используйте оператор `->`. Слева от стрелочки стоит формальный параметр — произвольный идентификатор, или несколько формальных параметров — произвольные идентификаторы в кортеже. Справа от стрелочки стоит выражение, в котором могут использоваться эти формальные параметры, а также любые столбцы таблицы. + +Примеры: +``` +x -> 2 * x +str -> str != Referer +``` + +В функции высшего порядка может быть передана лямбда-функция, принимающая несколько аргументов. В этом случае в функцию высшего порядка передаётся несколько массивов одинаковой длины, которым эти аргументы будут соответствовать. + +Для некоторых функций первый аргумент (лямбда-функция) может отсутствовать. В этом случае подразумевается тождественное отображение. + ## Обработка ошибок {#obrabotka-oshibok} Некоторые функции могут кидать исключения в случае ошибочных данных. В этом случае, выполнение запроса прерывается, и текст ошибки выводится клиенту. При распределённой обработке запроса, при возникновении исключения на одном из серверов, на другие серверы пытается отправиться просьба тоже прервать выполнение запроса. diff --git a/docs/ru/sql-reference/functions/introspection.md b/docs/ru/sql-reference/functions/introspection.md index 9c6a0711ec9..655c4be8318 100644 --- a/docs/ru/sql-reference/functions/introspection.md +++ b/docs/ru/sql-reference/functions/introspection.md @@ -93,7 +93,7 @@ LIMIT 1 \G ``` -Функция [arrayMap](higher-order-functions.md#higher_order_functions-array-map) позволяет обрабатывать каждый отдельный элемент массива `trace` с помощью функции `addressToLine`. Результат этой обработки вы видите в виде `trace_source_code_lines` колонки выходных данных. +Функция [arrayMap](../../sql-reference/functions/array-functions.md#array-map) позволяет обрабатывать каждый отдельный элемент массива `trace` с помощью функции `addressToLine`. Результат этой обработки вы видите в виде `trace_source_code_lines` колонки выходных данных. ``` text Row 1: @@ -179,7 +179,7 @@ LIMIT 1 \G ``` -То [arrayMap](higher-order-functions.md#higher_order_functions-array-map) функция позволяет обрабатывать каждый отдельный элемент системы. `trace` массив по типу `addressToSymbols` функция. Результат этой обработки вы видите в виде `trace_symbols` колонка выходных данных. +То [arrayMap](../../sql-reference/functions/array-functions.md#array-map) функция позволяет обрабатывать каждый отдельный элемент системы. `trace` массив по типу `addressToSymbols` функция. Результат этой обработки вы видите в виде `trace_symbols` колонка выходных данных. ``` text Row 1: @@ -276,7 +276,7 @@ LIMIT 1 \G ``` -Функция [arrayMap](higher-order-functions.md#higher_order_functions-array-map) позволяет обрабатывать каждый отдельный элемент массива `trace` с помощью функции `demangle`. +Функция [arrayMap](../../sql-reference/functions/array-functions.md#array-map) позволяет обрабатывать каждый отдельный элемент массива `trace` с помощью функции `demangle`. ``` text Row 1: diff --git a/docs/zh/getting-started/tutorial.md b/docs/zh/getting-started/tutorial.md index 38d5a586806..43c7ed0ec59 100644 --- a/docs/zh/getting-started/tutorial.md +++ b/docs/zh/getting-started/tutorial.md @@ -1,6 +1,4 @@ --- -machine_translated: true -machine_translated_rev: 72537a2d527c63c07aa5d2361a8829f3895cf2bd toc_priority: 12 toc_title: "\u6559\u7A0B" --- @@ -9,27 +7,27 @@ toc_title: "\u6559\u7A0B" ## 从本教程中可以期待什么? {#what-to-expect-from-this-tutorial} -通过本教程,您将学习如何设置一个简单的ClickHouse集群。 它会很小,但容错和可扩展。 然后,我们将使用其中一个示例数据集来填充数据并执行一些演示查询。 +通过本教程,您将学习如何设置一个简单的ClickHouse集群。 它会很小,但却是容错和可扩展的。 然后,我们将使用其中一个示例数据集来填充数据并执行一些演示查询。 ## 单节点设置 {#single-node-setup} -为了推迟分布式环境的复杂性,我们将首先在单个服务器或虚拟机上部署ClickHouse。 ClickHouse通常是从安装 [黛布](install.md#install-from-deb-packages) 或 [rpm](install.md#from-rpm-packages) 包,但也有 [替代办法](install.md#from-docker-image) 对于不支持它们的操作系统。 +为了推迟分布式环境的复杂性,我们将首先在单个服务器或虚拟机上部署ClickHouse。 ClickHouse通常是从[deb](install.md#install-from-deb-packages) 或 [rpm](install.md#from-rpm-packages) 包安装,但对于不支持它们的操作系统也有 [替代方法](install.md#from-docker-image) 。 -例如,您选择了 `deb` 包和执行: +例如,您选择了从 `deb` 包安装,执行: ``` bash {% include 'install/deb.sh' %} ``` -我们在安装的软件包中有什么: +在我们安装的软件中包含这些包: -- `clickhouse-client` 包包含 [ツ环板clientョツ嘉ッツ偲](../interfaces/cli.md) 应用程序,交互式ClickHouse控制台客户端。 -- `clickhouse-common` 包包含一个ClickHouse可执行文件。 -- `clickhouse-server` 包包含要作为服务器运行ClickHouse的配置文件。 +- `clickhouse-client` 包,包含 [clickhouse-client](../interfaces/cli.md) 应用程序,它是交互式ClickHouse控制台客户端。 +- `clickhouse-common` 包,包含一个ClickHouse可执行文件。 +- `clickhouse-server` 包,包含要作为服务端运行的ClickHouse配置文件。 -服务器配置文件位于 `/etc/clickhouse-server/`. 在进一步讨论之前,请注意 `` 元素in `config.xml`. Path确定数据存储的位置,因此应该位于磁盘容量较大的卷上;默认值为 `/var/lib/clickhouse/`. 如果你想调整配置,直接编辑并不方便 `config.xml` 文件,考虑到它可能会在未来的软件包更新中被重写。 复盖配置元素的推荐方法是创建 [在配置文件。d目录](../operations/configuration-files.md) 它作为 “patches” 要配置。xml +服务端配置文件位于 `/etc/clickhouse-server/`。 在进一步讨论之前,请注意 `config.xml`文件中的`` 元素. Path决定了数据存储的位置,因此该位置应该位于磁盘容量较大的卷上;默认值为 `/var/lib/clickhouse/`。 如果你想调整配置,考虑到它可能会在未来的软件包更新中被重写,直接编辑`config.xml` 文件并不方便。 推荐的方法是在[配置文件](../operations/configuration-files.md)目录创建文件,作为config.xml文件的“补丁”,用以复写配置元素。 -你可能已经注意到了, `clickhouse-server` 安装包后不会自动启动。 它也不会在更新后自动重新启动。 您启动服务器的方式取决于您的init系统,通常情况下,它是: +你可能已经注意到了, `clickhouse-server` 安装后不会自动启动。 它也不会在更新后自动重新启动。 您启动服务端的方式取决于您的初始系统,通常情况下是这样: ``` bash sudo service clickhouse-server start @@ -41,13 +39,13 @@ sudo service clickhouse-server start sudo /etc/init.d/clickhouse-server start ``` -服务器日志的默认位置是 `/var/log/clickhouse-server/`. 服务器已准备好处理客户端连接一旦它记录 `Ready for connections` 消息 +服务端日志的默认位置是 `/var/log/clickhouse-server/`。当服务端在日志中记录 `Ready for connections` 消息,即表示服务端已准备好处理客户端连接。 -一旦 `clickhouse-server` 正在运行我们可以利用 `clickhouse-client` 连接到服务器并运行一些测试查询,如 `SELECT "Hello, world!";`. +一旦 `clickhouse-server` 启动并运行,我们可以利用 `clickhouse-client` 连接到服务端,并运行一些测试查询,如 `SELECT "Hello, world!";`.
-Clickhouse-客户端的快速提示 +Clickhouse-client的快速提示 交互模式: diff --git a/docs/zh/guides/apply-catboost-model.md b/docs/zh/guides/apply-catboost-model.md index be21c372307..3657a947ad2 100644 --- a/docs/zh/guides/apply-catboost-model.md +++ b/docs/zh/guides/apply-catboost-model.md @@ -15,7 +15,7 @@ toc_title: "\u5E94\u7528CatBoost\u6A21\u578B" 1. [创建表](#create-table). 2. [将数据插入到表中](#insert-data-to-table). -3. [碌莽禄into拢Integrate010-68520682\](#integrate-catboost-into-clickhouse) (可选步骤)。 +3. [将CatBoost集成到ClickHouse中](#integrate-catboost-into-clickhouse) (可选步骤)。 4. [从SQL运行模型推理](#run-model-inference). 有关训练CatBoost模型的详细信息,请参阅 [培训和应用模型](https://catboost.ai/docs/features/training.html#training). @@ -119,12 +119,12 @@ FROM amazon_train +-------+ ``` -## 3. 碌莽禄into拢Integrate010-68520682\ {#integrate-catboost-into-clickhouse} +## 3. 将CatBoost集成到ClickHouse中 {#integrate-catboost-into-clickhouse} !!! note "注" **可选步骤。** Docker映像包含运行CatBoost和ClickHouse所需的所有内容。 -碌莽禄to拢integrate010-68520682\: +CatBoost集成到ClickHouse步骤: **1.** 构建评估库。 diff --git a/programs/odbc-bridge/ODBCBlockInputStream.cpp b/programs/odbc-bridge/ODBCBlockInputStream.cpp index 1316ff8f4c6..00ca89bd887 100644 --- a/programs/odbc-bridge/ODBCBlockInputStream.cpp +++ b/programs/odbc-bridge/ODBCBlockInputStream.cpp @@ -15,6 +15,7 @@ namespace DB namespace ErrorCodes { extern const int NUMBER_OF_COLUMNS_DOESNT_MATCH; + extern const int UNKNOWN_TYPE; } @@ -86,6 +87,8 @@ namespace case ValueType::vtUUID: assert_cast(column).insert(parse(value.convert())); break; + default: + throw Exception("Unsupported value type", ErrorCodes::UNKNOWN_TYPE); } } diff --git a/programs/odbc-bridge/ODBCBlockOutputStream.cpp b/programs/odbc-bridge/ODBCBlockOutputStream.cpp index b5bffc58c55..82ca861ea67 100644 --- a/programs/odbc-bridge/ODBCBlockOutputStream.cpp +++ b/programs/odbc-bridge/ODBCBlockOutputStream.cpp @@ -13,6 +13,11 @@ namespace DB { +namespace ErrorCodes +{ + extern const int UNKNOWN_TYPE; +} + namespace { using ValueType = ExternalResultDescription::ValueType; @@ -79,6 +84,9 @@ namespace return Poco::Dynamic::Var(std::to_string(LocalDateTime(time_t(field.get())))).convert(); case ValueType::vtUUID: return Poco::Dynamic::Var(UUID(field.get()).toUnderType().toHexString()).convert(); + default: + throw Exception("Unsupported value type", ErrorCodes::UNKNOWN_TYPE); + } __builtin_unreachable(); } diff --git a/src/AggregateFunctions/AggregateFunctionArray.cpp b/src/AggregateFunctions/AggregateFunctionArray.cpp index 7fe4f1f448b..d0f17da5aa4 100644 --- a/src/AggregateFunctions/AggregateFunctionArray.cpp +++ b/src/AggregateFunctions/AggregateFunctionArray.cpp @@ -12,6 +12,9 @@ namespace ErrorCodes extern const int ILLEGAL_TYPE_OF_ARGUMENT; } +namespace +{ + class AggregateFunctionCombinatorArray final : public IAggregateFunctionCombinator { public: @@ -45,6 +48,8 @@ public: } }; +} + void registerAggregateFunctionCombinatorArray(AggregateFunctionCombinatorFactory & factory) { factory.registerCombinator(std::make_shared()); diff --git a/src/AggregateFunctions/AggregateFunctionDistinct.cpp b/src/AggregateFunctions/AggregateFunctionDistinct.cpp index 4d89e8fb199..8ad37f49797 100644 --- a/src/AggregateFunctions/AggregateFunctionDistinct.cpp +++ b/src/AggregateFunctions/AggregateFunctionDistinct.cpp @@ -6,12 +6,14 @@ namespace DB { - namespace ErrorCodes { extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; } +namespace +{ + class AggregateFunctionCombinatorDistinct final : public IAggregateFunctionCombinator { public: @@ -56,6 +58,8 @@ public: } }; +} + void registerAggregateFunctionCombinatorDistinct(AggregateFunctionCombinatorFactory & factory) { factory.registerCombinator(std::make_shared()); diff --git a/src/AggregateFunctions/AggregateFunctionForEach.cpp b/src/AggregateFunctions/AggregateFunctionForEach.cpp index 693bc6839fa..6e0365fc04b 100644 --- a/src/AggregateFunctions/AggregateFunctionForEach.cpp +++ b/src/AggregateFunctions/AggregateFunctionForEach.cpp @@ -12,6 +12,9 @@ namespace ErrorCodes extern const int ILLEGAL_TYPE_OF_ARGUMENT; } +namespace +{ + class AggregateFunctionCombinatorForEach final : public IAggregateFunctionCombinator { public: @@ -42,6 +45,8 @@ public: } }; +} + void registerAggregateFunctionCombinatorForEach(AggregateFunctionCombinatorFactory & factory) { factory.registerCombinator(std::make_shared()); diff --git a/src/AggregateFunctions/AggregateFunctionMerge.cpp b/src/AggregateFunctions/AggregateFunctionMerge.cpp index 2ce3f0e11f6..17157d21bd1 100644 --- a/src/AggregateFunctions/AggregateFunctionMerge.cpp +++ b/src/AggregateFunctions/AggregateFunctionMerge.cpp @@ -13,6 +13,9 @@ namespace ErrorCodes extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; } +namespace +{ + class AggregateFunctionCombinatorMerge final : public IAggregateFunctionCombinator { public: @@ -55,6 +58,8 @@ public: } }; +} + void registerAggregateFunctionCombinatorMerge(AggregateFunctionCombinatorFactory & factory) { factory.registerCombinator(std::make_shared()); diff --git a/src/AggregateFunctions/AggregateFunctionNull.cpp b/src/AggregateFunctions/AggregateFunctionNull.cpp index c88d1e7f24c..f584ae1f34c 100644 --- a/src/AggregateFunctions/AggregateFunctionNull.cpp +++ b/src/AggregateFunctions/AggregateFunctionNull.cpp @@ -15,6 +15,9 @@ namespace ErrorCodes extern const int ILLEGAL_TYPE_OF_ARGUMENT; } +namespace +{ + class AggregateFunctionCombinatorNull final : public IAggregateFunctionCombinator { public: @@ -119,6 +122,8 @@ public: } }; +} + void registerAggregateFunctionCombinatorNull(AggregateFunctionCombinatorFactory & factory) { factory.registerCombinator(std::make_shared()); diff --git a/src/AggregateFunctions/AggregateFunctionOrFill.cpp b/src/AggregateFunctions/AggregateFunctionOrFill.cpp index ce8fc8d9ca5..af107e26ca9 100644 --- a/src/AggregateFunctions/AggregateFunctionOrFill.cpp +++ b/src/AggregateFunctions/AggregateFunctionOrFill.cpp @@ -6,6 +6,8 @@ namespace DB { +namespace +{ template class AggregateFunctionCombinatorOrFill final : public IAggregateFunctionCombinator @@ -32,6 +34,8 @@ public: } }; +} + void registerAggregateFunctionCombinatorOrFill(AggregateFunctionCombinatorFactory & factory) { factory.registerCombinator(std::make_shared>()); diff --git a/src/AggregateFunctions/AggregateFunctionResample.cpp b/src/AggregateFunctions/AggregateFunctionResample.cpp index 389c9048918..b81fb442f27 100644 --- a/src/AggregateFunctions/AggregateFunctionResample.cpp +++ b/src/AggregateFunctions/AggregateFunctionResample.cpp @@ -13,6 +13,9 @@ namespace ErrorCodes extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; } +namespace +{ + class AggregateFunctionCombinatorResample final : public IAggregateFunctionCombinator { public: @@ -93,6 +96,8 @@ public: } }; +} + void registerAggregateFunctionCombinatorResample(AggregateFunctionCombinatorFactory & factory) { factory.registerCombinator(std::make_shared()); diff --git a/src/AggregateFunctions/AggregateFunctionResample.h b/src/AggregateFunctions/AggregateFunctionResample.h index 92fa8fbb2a5..c1528686785 100644 --- a/src/AggregateFunctions/AggregateFunctionResample.h +++ b/src/AggregateFunctions/AggregateFunctionResample.h @@ -4,6 +4,7 @@ #include #include #include +#include namespace DB @@ -60,7 +61,18 @@ public: if (end < begin) total = 0; else - total = (end - begin + step - 1) / step; + { + Key dif; + size_t sum; + if (common::subOverflow(end, begin, dif) + || common::addOverflow(static_cast(dif), step, sum)) + { + throw Exception("Overflow in internal computations in function " + getName() + + ". Too large arguments", ErrorCodes::ARGUMENT_OUT_OF_BOUND); + } + + total = (sum - 1) / step; // total = (end - begin + step - 1) / step + } if (total > MAX_ELEMENTS) throw Exception("The range given in function " diff --git a/src/AggregateFunctions/AggregateFunctionState.cpp b/src/AggregateFunctions/AggregateFunctionState.cpp index 9d1c677c0ff..348d8ba44dd 100644 --- a/src/AggregateFunctions/AggregateFunctionState.cpp +++ b/src/AggregateFunctions/AggregateFunctionState.cpp @@ -13,6 +13,9 @@ namespace ErrorCodes extern const int BAD_ARGUMENTS; } +namespace +{ + class AggregateFunctionCombinatorState final : public IAggregateFunctionCombinator { public: @@ -33,6 +36,8 @@ public: } }; +} + void registerAggregateFunctionCombinatorState(AggregateFunctionCombinatorFactory & factory) { factory.registerCombinator(std::make_shared()); diff --git a/src/Columns/ColumnLowCardinality.h b/src/Columns/ColumnLowCardinality.h index e3b879d6dd5..275292d2d72 100644 --- a/src/Columns/ColumnLowCardinality.h +++ b/src/Columns/ColumnLowCardinality.h @@ -170,7 +170,6 @@ public: size_t sizeOfValueIfFixed() const override { return getDictionary().sizeOfValueIfFixed(); } bool isNumeric() const override { return getDictionary().isNumeric(); } bool lowCardinality() const override { return true; } - bool isNullable() const override { return isColumnNullable(*dictionary.getColumnUniquePtr()); } const IColumnUnique & getDictionary() const { return dictionary.getColumnUnique(); } const ColumnPtr & getDictionaryPtr() const { return dictionary.getColumnUniquePtr(); } diff --git a/src/Common/ErrorCodes.cpp b/src/Common/ErrorCodes.cpp index 297192e650b..85da23fb303 100644 --- a/src/Common/ErrorCodes.cpp +++ b/src/Common/ErrorCodes.cpp @@ -281,7 +281,7 @@ namespace ErrorCodes extern const int DICTIONARY_IS_EMPTY = 281; extern const int INCORRECT_INDEX = 282; extern const int UNKNOWN_DISTRIBUTED_PRODUCT_MODE = 283; - extern const int UNKNOWN_GLOBAL_SUBQUERIES_METHOD = 284; + extern const int WRONG_GLOBAL_SUBQUERY = 284; extern const int TOO_FEW_LIVE_REPLICAS = 285; extern const int UNSATISFIED_QUORUM_FOR_PREVIOUS_WRITE = 286; extern const int UNKNOWN_FORMAT_VERSION = 287; @@ -507,6 +507,7 @@ namespace ErrorCodes extern const int CANNOT_DECLARE_RABBITMQ_EXCHANGE = 540; extern const int CANNOT_CREATE_RABBITMQ_QUEUE_BINDING = 541; extern const int CANNOT_REMOVE_RABBITMQ_EXCHANGE = 542; + extern const int UNKNOWN_MYSQL_DATATYPES_SUPPORT_LEVEL = 543; extern const int KEEPER_EXCEPTION = 999; extern const int POCO_EXCEPTION = 1000; diff --git a/src/Compression/CachedCompressedReadBuffer.cpp b/src/Compression/CachedCompressedReadBuffer.cpp index 1b083c004c0..3fb45ab0948 100644 --- a/src/Compression/CachedCompressedReadBuffer.cpp +++ b/src/Compression/CachedCompressedReadBuffer.cpp @@ -72,9 +72,10 @@ bool CachedCompressedReadBuffer::nextImpl() } CachedCompressedReadBuffer::CachedCompressedReadBuffer( - const std::string & path_, std::function()> file_in_creator_, UncompressedCache * cache_) + const std::string & path_, std::function()> file_in_creator_, UncompressedCache * cache_, bool allow_different_codecs_) : ReadBuffer(nullptr, 0), file_in_creator(std::move(file_in_creator_)), cache(cache_), path(path_), file_pos(0) { + allow_different_codecs = allow_different_codecs_; } void CachedCompressedReadBuffer::seek(size_t offset_in_compressed_file, size_t offset_in_decompressed_block) diff --git a/src/Compression/CachedCompressedReadBuffer.h b/src/Compression/CachedCompressedReadBuffer.h index 88bcec8197d..c2338f6f841 100644 --- a/src/Compression/CachedCompressedReadBuffer.h +++ b/src/Compression/CachedCompressedReadBuffer.h @@ -38,7 +38,7 @@ private: clockid_t clock_type {}; public: - CachedCompressedReadBuffer(const std::string & path, std::function()> file_in_creator, UncompressedCache * cache_); + CachedCompressedReadBuffer(const std::string & path, std::function()> file_in_creator, UncompressedCache * cache_, bool allow_different_codecs_ = false); void seek(size_t offset_in_compressed_file, size_t offset_in_decompressed_block); diff --git a/src/Compression/CompressedReadBufferBase.cpp b/src/Compression/CompressedReadBufferBase.cpp index a05b5cd7f64..be2f697e1b3 100644 --- a/src/Compression/CompressedReadBufferBase.cpp +++ b/src/Compression/CompressedReadBufferBase.cpp @@ -105,13 +105,24 @@ size_t CompressedReadBufferBase::readCompressedData(size_t & size_decompressed, uint8_t method = ICompressionCodec::readMethod(own_compressed_buffer.data()); if (!codec) + { codec = CompressionCodecFactory::instance().get(method); + } else if (method != codec->getMethodByte()) - throw Exception("Data compressed with different methods, given method byte 0x" - + getHexUIntLowercase(method) - + ", previous method byte 0x" - + getHexUIntLowercase(codec->getMethodByte()), - ErrorCodes::CANNOT_DECOMPRESS); + { + if (allow_different_codecs) + { + codec = CompressionCodecFactory::instance().get(method); + } + else + { + throw Exception("Data compressed with different methods, given method byte 0x" + + getHexUIntLowercase(method) + + ", previous method byte 0x" + + getHexUIntLowercase(codec->getMethodByte()), + ErrorCodes::CANNOT_DECOMPRESS); + } + } size_compressed_without_checksum = ICompressionCodec::readCompressedBlockSize(own_compressed_buffer.data()); size_decompressed = ICompressionCodec::readDecompressedBlockSize(own_compressed_buffer.data()); @@ -163,21 +174,32 @@ void CompressedReadBufferBase::decompress(char * to, size_t size_decompressed, s uint8_t method = ICompressionCodec::readMethod(compressed_buffer); if (!codec) + { codec = CompressionCodecFactory::instance().get(method); + } else if (codec->getMethodByte() != method) - throw Exception("Data compressed with different methods, given method byte " - + getHexUIntLowercase(method) - + ", previous method byte " - + getHexUIntLowercase(codec->getMethodByte()), - ErrorCodes::CANNOT_DECOMPRESS); + { + if (allow_different_codecs) + { + codec = CompressionCodecFactory::instance().get(method); + } + else + { + throw Exception("Data compressed with different methods, given method byte " + + getHexUIntLowercase(method) + + ", previous method byte " + + getHexUIntLowercase(codec->getMethodByte()), + ErrorCodes::CANNOT_DECOMPRESS); + } + } codec->decompress(compressed_buffer, size_compressed_without_checksum, to); } /// 'compressed_in' could be initialized lazily, but before first call of 'readCompressedData'. -CompressedReadBufferBase::CompressedReadBufferBase(ReadBuffer * in) - : compressed_in(in), own_compressed_buffer(0) +CompressedReadBufferBase::CompressedReadBufferBase(ReadBuffer * in, bool allow_different_codecs_) + : compressed_in(in), own_compressed_buffer(0), allow_different_codecs(allow_different_codecs_) { } diff --git a/src/Compression/CompressedReadBufferBase.h b/src/Compression/CompressedReadBufferBase.h index f44140dcd04..71dc5274d5b 100644 --- a/src/Compression/CompressedReadBufferBase.h +++ b/src/Compression/CompressedReadBufferBase.h @@ -26,6 +26,9 @@ protected: /// Don't checksum on decompressing. bool disable_checksum = false; + /// Allow reading data, compressed by different codecs from one file. + bool allow_different_codecs; + /// Read compressed data into compressed_buffer. Get size of decompressed data from block header. Checksum if need. /// Returns number of compressed bytes read. size_t readCompressedData(size_t & size_decompressed, size_t & size_compressed_without_checksum); @@ -34,7 +37,7 @@ protected: public: /// 'compressed_in' could be initialized lazily, but before first call of 'readCompressedData'. - CompressedReadBufferBase(ReadBuffer * in = nullptr); + CompressedReadBufferBase(ReadBuffer * in = nullptr, bool allow_different_codecs_ = false); ~CompressedReadBufferBase(); /** Disable checksums. diff --git a/src/Compression/CompressedReadBufferFromFile.cpp b/src/Compression/CompressedReadBufferFromFile.cpp index 8d6a42eacd3..f3fa2d6bc10 100644 --- a/src/Compression/CompressedReadBufferFromFile.cpp +++ b/src/Compression/CompressedReadBufferFromFile.cpp @@ -36,20 +36,22 @@ bool CompressedReadBufferFromFile::nextImpl() return true; } -CompressedReadBufferFromFile::CompressedReadBufferFromFile(std::unique_ptr buf) +CompressedReadBufferFromFile::CompressedReadBufferFromFile(std::unique_ptr buf, bool allow_different_codecs_) : BufferWithOwnMemory(0), p_file_in(std::move(buf)), file_in(*p_file_in) { compressed_in = &file_in; + allow_different_codecs = allow_different_codecs_; } CompressedReadBufferFromFile::CompressedReadBufferFromFile( - const std::string & path, size_t estimated_size, size_t aio_threshold, size_t mmap_threshold, size_t buf_size) + const std::string & path, size_t estimated_size, size_t aio_threshold, size_t mmap_threshold, size_t buf_size, bool allow_different_codecs_) : BufferWithOwnMemory(0) , p_file_in(createReadBufferFromFileBase(path, estimated_size, aio_threshold, mmap_threshold, buf_size)) , file_in(*p_file_in) { compressed_in = &file_in; + allow_different_codecs = allow_different_codecs_; } diff --git a/src/Compression/CompressedReadBufferFromFile.h b/src/Compression/CompressedReadBufferFromFile.h index 1729490f606..166b2595ef9 100644 --- a/src/Compression/CompressedReadBufferFromFile.h +++ b/src/Compression/CompressedReadBufferFromFile.h @@ -28,10 +28,11 @@ private: bool nextImpl() override; public: - CompressedReadBufferFromFile(std::unique_ptr buf); + CompressedReadBufferFromFile(std::unique_ptr buf, bool allow_different_codecs_ = false); CompressedReadBufferFromFile( - const std::string & path, size_t estimated_size, size_t aio_threshold, size_t mmap_threshold, size_t buf_size = DBMS_DEFAULT_BUFFER_SIZE); + const std::string & path, size_t estimated_size, size_t aio_threshold, size_t mmap_threshold, + size_t buf_size = DBMS_DEFAULT_BUFFER_SIZE, bool allow_different_codecs_ = false); void seek(size_t offset_in_compressed_file, size_t offset_in_decompressed_block); diff --git a/src/Compression/CompressionCodecDelta.cpp b/src/Compression/CompressionCodecDelta.cpp index 51bd19f646b..ecb7c36b205 100644 --- a/src/Compression/CompressionCodecDelta.cpp +++ b/src/Compression/CompressionCodecDelta.cpp @@ -36,6 +36,11 @@ ASTPtr CompressionCodecDelta::getCodecDesc() const return makeASTFunction("Delta", literal); } +void CompressionCodecDelta::updateHash(SipHash & hash) const +{ + getCodecDesc()->updateTreeHash(hash); +} + namespace { diff --git a/src/Compression/CompressionCodecDelta.h b/src/Compression/CompressionCodecDelta.h index 5c3979e063e..a192fab051a 100644 --- a/src/Compression/CompressionCodecDelta.h +++ b/src/Compression/CompressionCodecDelta.h @@ -14,7 +14,10 @@ public: ASTPtr getCodecDesc() const override; + void updateHash(SipHash & hash) const override; + protected: + UInt32 doCompressData(const char * source, UInt32 source_size, char * dest) const override; void doDecompressData(const char * source, UInt32 source_size, char * dest, UInt32 uncompressed_size) const override; diff --git a/src/Compression/CompressionCodecDoubleDelta.cpp b/src/Compression/CompressionCodecDoubleDelta.cpp index 157e2df1a3f..dd2e95a916d 100644 --- a/src/Compression/CompressionCodecDoubleDelta.cpp +++ b/src/Compression/CompressionCodecDoubleDelta.cpp @@ -339,6 +339,12 @@ ASTPtr CompressionCodecDoubleDelta::getCodecDesc() const return std::make_shared("DoubleDelta"); } +void CompressionCodecDoubleDelta::updateHash(SipHash & hash) const +{ + getCodecDesc()->updateTreeHash(hash); + hash.update(data_bytes_size); +} + UInt32 CompressionCodecDoubleDelta::getMaxCompressedDataSize(UInt32 uncompressed_size) const { const auto result = 2 // common header diff --git a/src/Compression/CompressionCodecDoubleDelta.h b/src/Compression/CompressionCodecDoubleDelta.h index a2690d24414..30ef086077d 100644 --- a/src/Compression/CompressionCodecDoubleDelta.h +++ b/src/Compression/CompressionCodecDoubleDelta.h @@ -100,7 +100,10 @@ public: ASTPtr getCodecDesc() const override; + void updateHash(SipHash & hash) const override; + protected: + UInt32 doCompressData(const char * source, UInt32 source_size, char * dest) const override; void doDecompressData(const char * source, UInt32 source_size, char * dest, UInt32 uncompressed_size) const override; diff --git a/src/Compression/CompressionCodecGorilla.cpp b/src/Compression/CompressionCodecGorilla.cpp index 042835f4a32..3d08734fe91 100644 --- a/src/Compression/CompressionCodecGorilla.cpp +++ b/src/Compression/CompressionCodecGorilla.cpp @@ -254,6 +254,12 @@ ASTPtr CompressionCodecGorilla::getCodecDesc() const return std::make_shared("Gorilla"); } +void CompressionCodecGorilla::updateHash(SipHash & hash) const +{ + getCodecDesc()->updateTreeHash(hash); + hash.update(data_bytes_size); +} + UInt32 CompressionCodecGorilla::getMaxCompressedDataSize(UInt32 uncompressed_size) const { const auto result = 2 // common header diff --git a/src/Compression/CompressionCodecGorilla.h b/src/Compression/CompressionCodecGorilla.h index 523add0700f..df0f329dc31 100644 --- a/src/Compression/CompressionCodecGorilla.h +++ b/src/Compression/CompressionCodecGorilla.h @@ -97,7 +97,10 @@ public: ASTPtr getCodecDesc() const override; + void updateHash(SipHash & hash) const override; + protected: + UInt32 doCompressData(const char * source, UInt32 source_size, char * dest) const override; void doDecompressData(const char * source, UInt32 source_size, char * dest, UInt32 uncompressed_size) const override; diff --git a/src/Compression/CompressionCodecLZ4.cpp b/src/Compression/CompressionCodecLZ4.cpp index cf3622cd702..1370349d68d 100644 --- a/src/Compression/CompressionCodecLZ4.cpp +++ b/src/Compression/CompressionCodecLZ4.cpp @@ -35,6 +35,11 @@ ASTPtr CompressionCodecLZ4::getCodecDesc() const return std::make_shared("LZ4"); } +void CompressionCodecLZ4::updateHash(SipHash & hash) const +{ + getCodecDesc()->updateTreeHash(hash); +} + UInt32 CompressionCodecLZ4::getMaxCompressedDataSize(UInt32 uncompressed_size) const { return LZ4_COMPRESSBOUND(uncompressed_size); diff --git a/src/Compression/CompressionCodecLZ4.h b/src/Compression/CompressionCodecLZ4.h index 2f19af08185..229e25481e6 100644 --- a/src/Compression/CompressionCodecLZ4.h +++ b/src/Compression/CompressionCodecLZ4.h @@ -18,6 +18,8 @@ public: UInt32 getAdditionalSizeAtTheEndOfBuffer() const override { return LZ4::ADDITIONAL_BYTES_AT_END_OF_BUFFER; } + void updateHash(SipHash & hash) const override; + protected: UInt32 doCompressData(const char * source, UInt32 source_size, char * dest) const override; diff --git a/src/Compression/CompressionCodecMultiple.cpp b/src/Compression/CompressionCodecMultiple.cpp index 868df90825e..77f0fc132fe 100644 --- a/src/Compression/CompressionCodecMultiple.cpp +++ b/src/Compression/CompressionCodecMultiple.cpp @@ -37,6 +37,12 @@ ASTPtr CompressionCodecMultiple::getCodecDesc() const return result; } +void CompressionCodecMultiple::updateHash(SipHash & hash) const +{ + for (const auto & codec : codecs) + codec->updateHash(hash); +} + UInt32 CompressionCodecMultiple::getMaxCompressedDataSize(UInt32 uncompressed_size) const { UInt32 compressed_size = uncompressed_size; diff --git a/src/Compression/CompressionCodecMultiple.h b/src/Compression/CompressionCodecMultiple.h index cd50d3250e3..6bac189bdf7 100644 --- a/src/Compression/CompressionCodecMultiple.h +++ b/src/Compression/CompressionCodecMultiple.h @@ -19,7 +19,10 @@ public: static std::vector getCodecsBytesFromData(const char * source); + void updateHash(SipHash & hash) const override; + protected: + UInt32 doCompressData(const char * source, UInt32 source_size, char * dest) const override; void doDecompressData(const char * source, UInt32 source_size, char * dest, UInt32 decompressed_size) const override; diff --git a/src/Compression/CompressionCodecNone.cpp b/src/Compression/CompressionCodecNone.cpp index 50c19b2b547..f727c4b4860 100644 --- a/src/Compression/CompressionCodecNone.cpp +++ b/src/Compression/CompressionCodecNone.cpp @@ -17,6 +17,11 @@ ASTPtr CompressionCodecNone::getCodecDesc() const return std::make_shared("NONE"); } +void CompressionCodecNone::updateHash(SipHash & hash) const +{ + getCodecDesc()->updateTreeHash(hash); +} + UInt32 CompressionCodecNone::doCompressData(const char * source, UInt32 source_size, char * dest) const { memcpy(dest, source, source_size); diff --git a/src/Compression/CompressionCodecNone.h b/src/Compression/CompressionCodecNone.h index ed604063198..370ef301694 100644 --- a/src/Compression/CompressionCodecNone.h +++ b/src/Compression/CompressionCodecNone.h @@ -15,7 +15,10 @@ public: ASTPtr getCodecDesc() const override; + void updateHash(SipHash & hash) const override; + protected: + UInt32 doCompressData(const char * source, UInt32 source_size, char * dest) const override; void doDecompressData(const char * source, UInt32 source_size, char * dest, UInt32 uncompressed_size) const override; diff --git a/src/Compression/CompressionCodecT64.cpp b/src/Compression/CompressionCodecT64.cpp index 16462e50ebd..30972a5fe1f 100644 --- a/src/Compression/CompressionCodecT64.cpp +++ b/src/Compression/CompressionCodecT64.cpp @@ -646,6 +646,13 @@ ASTPtr CompressionCodecT64::getCodecDesc() const return makeASTFunction("T64", literal); } +void CompressionCodecT64::updateHash(SipHash & hash) const +{ + getCodecDesc()->updateTreeHash(hash); + hash.update(type_idx); + hash.update(variant); +} + void registerCodecT64(CompressionCodecFactory & factory) { auto reg_func = [&](const ASTPtr & arguments, DataTypePtr type) -> CompressionCodecPtr diff --git a/src/Compression/CompressionCodecT64.h b/src/Compression/CompressionCodecT64.h index 11efbea0955..9671eb81ce1 100644 --- a/src/Compression/CompressionCodecT64.h +++ b/src/Compression/CompressionCodecT64.h @@ -35,6 +35,8 @@ public: ASTPtr getCodecDesc() const override; + void updateHash(SipHash & hash) const override; + protected: UInt32 doCompressData(const char * src, UInt32 src_size, char * dst) const override; void doDecompressData(const char * src, UInt32 src_size, char * dst, UInt32 uncompressed_size) const override; diff --git a/src/Compression/CompressionCodecZSTD.cpp b/src/Compression/CompressionCodecZSTD.cpp index ab48580533e..3b317884fec 100644 --- a/src/Compression/CompressionCodecZSTD.cpp +++ b/src/Compression/CompressionCodecZSTD.cpp @@ -32,6 +32,11 @@ ASTPtr CompressionCodecZSTD::getCodecDesc() const return makeASTFunction("ZSTD", literal); } +void CompressionCodecZSTD::updateHash(SipHash & hash) const +{ + getCodecDesc()->updateTreeHash(hash); +} + UInt32 CompressionCodecZSTD::getMaxCompressedDataSize(UInt32 uncompressed_size) const { return ZSTD_compressBound(uncompressed_size); diff --git a/src/Compression/CompressionCodecZSTD.h b/src/Compression/CompressionCodecZSTD.h index 2ad893083c3..3bfb6bb1d4d 100644 --- a/src/Compression/CompressionCodecZSTD.h +++ b/src/Compression/CompressionCodecZSTD.h @@ -21,7 +21,10 @@ public: UInt32 getMaxCompressedDataSize(UInt32 uncompressed_size) const override; + void updateHash(SipHash & hash) const override; + protected: + UInt32 doCompressData(const char * source, UInt32 source_size, char * dest) const override; void doDecompressData(const char * source, UInt32 source_size, char * dest, UInt32 uncompressed_size) const override; diff --git a/src/Compression/ICompressionCodec.cpp b/src/Compression/ICompressionCodec.cpp index 4aafc298658..5de015b2680 100644 --- a/src/Compression/ICompressionCodec.cpp +++ b/src/Compression/ICompressionCodec.cpp @@ -35,6 +35,13 @@ ASTPtr ICompressionCodec::getFullCodecDesc() const return result; } +UInt64 ICompressionCodec::getHash() const +{ + SipHash hash; + updateHash(hash); + return hash.get64(); +} + UInt32 ICompressionCodec::compress(const char * source, UInt32 source_size, char * dest) const { assert(source != nullptr && dest != nullptr); diff --git a/src/Compression/ICompressionCodec.h b/src/Compression/ICompressionCodec.h index fa1f73ce4dd..8f72ba55200 100644 --- a/src/Compression/ICompressionCodec.h +++ b/src/Compression/ICompressionCodec.h @@ -5,6 +5,7 @@ #include #include #include +#include namespace DB @@ -36,6 +37,10 @@ public: /// "CODEC(LZ4,LZ4HC(5))" ASTPtr getFullCodecDesc() const; + /// Hash, that depends on codec ast and optional parameters like data type + virtual void updateHash(SipHash & hash) const = 0; + UInt64 getHash() const; + /// Compressed bytes from uncompressed source to dest. Dest should preallocate memory UInt32 compress(const char * source, UInt32 source_size, char * dest) const; diff --git a/src/Core/DecimalComparison.h b/src/Core/DecimalComparison.h index 93992029634..b9ae2a1fe79 100644 --- a/src/Core/DecimalComparison.h +++ b/src/Core/DecimalComparison.h @@ -129,7 +129,7 @@ private: Shift shift; if (decimal0 && decimal1) { - auto result_type = decimalResultType(*decimal0, *decimal1, false, false); + auto result_type = decimalResultType(*decimal0, *decimal1); shift.a = static_cast(result_type.scaleFactorFor(*decimal0, false).value); shift.b = static_cast(result_type.scaleFactorFor(*decimal1, false).value); } diff --git a/src/Core/ExternalResultDescription.cpp b/src/Core/ExternalResultDescription.cpp index 5ed34764909..941ee003c94 100644 --- a/src/Core/ExternalResultDescription.cpp +++ b/src/Core/ExternalResultDescription.cpp @@ -1,9 +1,11 @@ #include "ExternalResultDescription.h" #include #include +#include #include #include #include +#include #include #include #include @@ -64,6 +66,14 @@ void ExternalResultDescription::init(const Block & sample_block_) types.emplace_back(ValueType::vtString, is_nullable); else if (typeid_cast(type)) types.emplace_back(ValueType::vtString, is_nullable); + else if (typeid_cast(type)) + types.emplace_back(ValueType::vtDateTime64, is_nullable); + else if (typeid_cast *>(type)) + types.emplace_back(ValueType::vtDecimal32, is_nullable); + else if (typeid_cast *>(type)) + types.emplace_back(ValueType::vtDecimal64, is_nullable); + else if (typeid_cast *>(type)) + types.emplace_back(ValueType::vtDecimal128, is_nullable); else throw Exception{"Unsupported type " + type->getName(), ErrorCodes::UNKNOWN_TYPE}; } diff --git a/src/Core/ExternalResultDescription.h b/src/Core/ExternalResultDescription.h index 0bd77afa628..29294fcf2c8 100644 --- a/src/Core/ExternalResultDescription.h +++ b/src/Core/ExternalResultDescription.h @@ -26,6 +26,10 @@ struct ExternalResultDescription vtDate, vtDateTime, vtUUID, + vtDateTime64, + vtDecimal32, + vtDecimal64, + vtDecimal128 }; Block sample_block; diff --git a/src/Core/MultiEnum.h b/src/Core/MultiEnum.h new file mode 100644 index 00000000000..748550a8779 --- /dev/null +++ b/src/Core/MultiEnum.h @@ -0,0 +1,99 @@ +#pragma once + +#include +#include + +// Wrapper around enum that can have multiple values (or none) set at once. +template +struct MultiEnum +{ + using StorageType = StorageTypeT; + using EnumType = EnumTypeT; + + MultiEnum() = default; + + template ...>>> + explicit MultiEnum(EnumValues ... v) + : MultiEnum((toBitFlag(v) | ... | 0u)) + {} + + template >> + explicit MultiEnum(ValueType v) + : bitset(v) + { + static_assert(std::is_unsigned_v); + static_assert(std::is_unsigned_v && std::is_integral_v); + } + + MultiEnum(const MultiEnum & other) = default; + MultiEnum & operator=(const MultiEnum & other) = default; + + bool isSet(EnumType value) const + { + return bitset & toBitFlag(value); + } + + void set(EnumType value) + { + bitset |= toBitFlag(value); + } + + void unSet(EnumType value) + { + bitset &= ~(toBitFlag(value)); + } + + void reset() + { + bitset = 0; + } + + StorageType getValue() const + { + return bitset; + } + + template >> + void setValue(ValueType new_value) + { + // Can't set value from any enum avoid confusion + static_assert(!std::is_enum_v); + bitset = new_value; + } + + bool operator==(const MultiEnum & other) const + { + return bitset == other.bitset; + } + + template >> + bool operator==(ValueType other) const + { + // Shouldn't be comparable with any enum to avoid confusion + static_assert(!std::is_enum_v); + return bitset == other; + } + + template + bool operator!=(U && other) const + { + return !(*this == other); + } + + template >> + friend bool operator==(ValueType left, MultiEnum right) + { + return right == left; + } + + template + friend bool operator!=(L left, MultiEnum right) + { + return !(right == left); + } + +private: + StorageType bitset = 0; + + static StorageType toBitFlag(EnumType v) { return StorageType{1} << static_cast(v); } +}; diff --git a/src/Core/Settings.h b/src/Core/Settings.h index d44b85e0fa8..7d0f54fe725 100644 --- a/src/Core/Settings.h +++ b/src/Core/Settings.h @@ -383,6 +383,7 @@ class IColumn; M(Bool, alter_partition_verbose_result, false, "Output information about affected parts. Currently works only for FREEZE and ATTACH commands.", 0) \ M(Bool, allow_experimental_database_materialize_mysql, false, "Allow to create database with Engine=MaterializeMySQL(...).", 0) \ M(Bool, system_events_show_zero_values, false, "Include all metrics, even with zero values", 0) \ + M(MySQLDataTypesSupport, mysql_datatypes_support_level, 0, "Which MySQL types should be converted to corresponding ClickHouse types (rather than being represented as String). Can be empty or any combination of 'decimal' or 'datetime64'. When empty MySQL's DECIMAL and DATETIME/TIMESTAMP with non-zero precison are seen as String on ClickHouse's side.", 0) \ \ /** Obsolete settings that do nothing but left for compatibility reasons. Remove each one after half a year of obsolescence. */ \ \ @@ -440,6 +441,7 @@ class IColumn; M(String, output_format_avro_codec, "", "Compression codec used for output. Possible values: 'null', 'deflate', 'snappy'.", 0) \ M(UInt64, output_format_avro_sync_interval, 16 * 1024, "Sync interval in bytes.", 0) \ M(Bool, output_format_tsv_crlf_end_of_line, false, "If it is set true, end of line in TSV format will be \\r\\n instead of \\n.", 0) \ + M(String, output_format_tsv_null_representation, "\\N", "Custom NULL representation in TSV format", 0) \ \ M(UInt64, input_format_allow_errors_num, 0, "Maximum absolute amount of errors while reading text formats (like CSV, TSV). In case of error, if at least absolute or relative amount of errors is lower than corresponding value, will skip until next line and continue.", 0) \ M(Float, input_format_allow_errors_ratio, 0, "Maximum relative amount of errors while reading text formats (like CSV, TSV). In case of error, if at least absolute or relative amount of errors is lower than corresponding value, will skip until next line and continue.", 0) \ diff --git a/src/Core/SettingsEnums.cpp b/src/Core/SettingsEnums.cpp index 1a03f5f4578..c0d2906e2fc 100644 --- a/src/Core/SettingsEnums.cpp +++ b/src/Core/SettingsEnums.cpp @@ -11,6 +11,7 @@ namespace ErrorCodes extern const int UNKNOWN_DISTRIBUTED_PRODUCT_MODE; extern const int UNKNOWN_JOIN; extern const int BAD_ARGUMENTS; + extern const int UNKNOWN_MYSQL_DATATYPES_SUPPORT_LEVEL; } @@ -91,4 +92,8 @@ IMPLEMENT_SETTING_ENUM_WITH_RENAME(DefaultDatabaseEngine, ErrorCodes::BAD_ARGUME {{"Ordinary", DefaultDatabaseEngine::Ordinary}, {"Atomic", DefaultDatabaseEngine::Atomic}}) +IMPLEMENT_SETTING_MULTI_ENUM(MySQLDataTypesSupport, ErrorCodes::UNKNOWN_MYSQL_DATATYPES_SUPPORT_LEVEL, + {{"decimal", MySQLDataTypesSupport::DECIMAL}, + {"datetime64", MySQLDataTypesSupport::DATETIME64}}) + } diff --git a/src/Core/SettingsEnums.h b/src/Core/SettingsEnums.h index 16ebef87e01..7ed5ffb0c35 100644 --- a/src/Core/SettingsEnums.h +++ b/src/Core/SettingsEnums.h @@ -126,4 +126,15 @@ enum class DefaultDatabaseEngine }; DECLARE_SETTING_ENUM(DefaultDatabaseEngine) + + +enum class MySQLDataTypesSupport +{ + DECIMAL, // convert MySQL's decimal and number to ClickHouse Decimal when applicable + DATETIME64, // convert MySQL's DATETIME and TIMESTAMP and ClickHouse DateTime64 if precision is > 0 or range is greater that for DateTime. + // ENUM +}; + +DECLARE_SETTING_MULTI_ENUM(MySQLDataTypesSupport) + } diff --git a/src/Core/SettingsFields.h b/src/Core/SettingsFields.h index ca774336f88..270d0c7c7d0 100644 --- a/src/Core/SettingsFields.h +++ b/src/Core/SettingsFields.h @@ -4,9 +4,11 @@ #include #include #include +#include #include #include #include +#include namespace DB @@ -328,6 +330,113 @@ void SettingFieldEnum::readBinary(ReadBuffer & in) throw Exception(msg, ERROR_CODE_FOR_UNEXPECTED_NAME); \ } +// Mostly like SettingFieldEnum, but can have multiple enum values (or none) set at once. +template +struct SettingFieldMultiEnum +{ + using EnumType = Enum; + using ValueType = MultiEnum; + using StorageType = typename ValueType::StorageType; + + ValueType value; + bool changed = false; + + explicit SettingFieldMultiEnum(ValueType v = ValueType{}) : value{v} {} + explicit SettingFieldMultiEnum(EnumType e) : value{e} {} + explicit SettingFieldMultiEnum(StorageType s) : value(s) {} + explicit SettingFieldMultiEnum(const Field & f) : value(parseValueFromString(f.safeGet())) {} + + operator ValueType() const { return value; } + explicit operator StorageType() const { return value.getValue(); } + explicit operator Field() const { return toString(); } + + SettingFieldMultiEnum & operator= (StorageType x) { changed = x != value.getValue(); value.setValue(x); return *this; } + SettingFieldMultiEnum & operator= (ValueType x) { changed = !(x == value); value = x; return *this; } + SettingFieldMultiEnum & operator= (const Field & x) { parseFromString(x.safeGet()); return *this; } + + String toString() const + { + static const String separator = ","; + String result; + for (StorageType i = 0; i < Traits::getEnumSize(); ++i) + { + const auto v = static_cast(i); + if (value.isSet(v)) + { + result += Traits::toString(v); + result += separator; + } + } + + if (result.size() > 0) + result.erase(result.size() - separator.size()); + + return result; + } + void parseFromString(const String & str) { *this = parseValueFromString(str); } + + void writeBinary(WriteBuffer & out) const; + void readBinary(ReadBuffer & in); + +private: + static ValueType parseValueFromString(const std::string_view str) + { + static const String separators=", "; + + ValueType result; + + //to avoid allocating memory on substr() + const std::string_view str_view{str}; + + auto value_start = str_view.find_first_not_of(separators); + while (value_start != std::string::npos) + { + auto value_end = str_view.find_first_of(separators, value_start + 1); + if (value_end == std::string::npos) + value_end = str_view.size(); + + result.set(Traits::fromString(str_view.substr(value_start, value_end - value_start))); + value_start = str_view.find_first_not_of(separators, value_end); + } + + return result; + } +}; + +template +void SettingFieldMultiEnum::writeBinary(WriteBuffer & out) const +{ + SettingFieldEnumHelpers::writeBinary(toString(), out); +} + +template +void SettingFieldMultiEnum::readBinary(ReadBuffer & in) +{ + parseFromString(SettingFieldEnumHelpers::readBinary(in)); +} + +#define DECLARE_SETTING_MULTI_ENUM(ENUM_TYPE) \ + DECLARE_SETTING_MULTI_ENUM_WITH_RENAME(ENUM_TYPE, ENUM_TYPE) + +#define DECLARE_SETTING_MULTI_ENUM_WITH_RENAME(ENUM_TYPE, NEW_NAME) \ + struct SettingField##NEW_NAME##Traits \ + { \ + using EnumType = ENUM_TYPE; \ + static size_t getEnumSize(); \ + static const String & toString(EnumType value); \ + static EnumType fromString(const std::string_view & str); \ + }; \ + \ + using SettingField##NEW_NAME = SettingFieldMultiEnum; + +#define IMPLEMENT_SETTING_MULTI_ENUM(ENUM_TYPE, ERROR_CODE_FOR_UNEXPECTED_NAME, ...) \ + IMPLEMENT_SETTING_MULTI_ENUM_WITH_RENAME(ENUM_TYPE, ERROR_CODE_FOR_UNEXPECTED_NAME, __VA_ARGS__) + +#define IMPLEMENT_SETTING_MULTI_ENUM_WITH_RENAME(NEW_NAME, ERROR_CODE_FOR_UNEXPECTED_NAME, ...) \ + IMPLEMENT_SETTING_ENUM_WITH_RENAME(NEW_NAME, ERROR_CODE_FOR_UNEXPECTED_NAME, __VA_ARGS__)\ + size_t SettingField##NEW_NAME##Traits::getEnumSize() {\ + return std::initializer_list> __VA_ARGS__ .size();\ + } /// Can keep a value of any type. Used for user-defined settings. struct SettingFieldCustom diff --git a/src/Core/tests/gtest_multienum.cpp b/src/Core/tests/gtest_multienum.cpp new file mode 100644 index 00000000000..70c7699aa5c --- /dev/null +++ b/src/Core/tests/gtest_multienum.cpp @@ -0,0 +1,158 @@ +#include + +#include +#include +#include + +namespace +{ + +using namespace DB; +enum class TestEnum : UInt8 +{ + // name represents which bit is going to be set + ZERO, + ONE, + TWO, + THREE, + FOUR, + FIVE +}; +} + +GTEST_TEST(MultiEnum, WithDefault) +{ + MultiEnum multi_enum; + ASSERT_EQ(0, multi_enum.getValue()); + ASSERT_EQ(0, multi_enum); + + ASSERT_FALSE(multi_enum.isSet(TestEnum::ZERO)); + ASSERT_FALSE(multi_enum.isSet(TestEnum::ONE)); + ASSERT_FALSE(multi_enum.isSet(TestEnum::TWO)); + ASSERT_FALSE(multi_enum.isSet(TestEnum::THREE)); + ASSERT_FALSE(multi_enum.isSet(TestEnum::FOUR)); + ASSERT_FALSE(multi_enum.isSet(TestEnum::FIVE)); +} + +GTEST_TEST(MultiEnum, WitheEnum) +{ + MultiEnum multi_enum(TestEnum::FOUR); + ASSERT_EQ(16, multi_enum.getValue()); + ASSERT_EQ(16, multi_enum); + + ASSERT_FALSE(multi_enum.isSet(TestEnum::ZERO)); + ASSERT_FALSE(multi_enum.isSet(TestEnum::ONE)); + ASSERT_FALSE(multi_enum.isSet(TestEnum::TWO)); + ASSERT_FALSE(multi_enum.isSet(TestEnum::THREE)); + ASSERT_TRUE(multi_enum.isSet(TestEnum::FOUR)); + ASSERT_FALSE(multi_enum.isSet(TestEnum::FIVE)); +} + +GTEST_TEST(MultiEnum, WithValue) +{ + const MultiEnum multi_enum(13u); // (1 | (1 << 2 | 1 << 3) + + ASSERT_TRUE(multi_enum.isSet(TestEnum::ZERO)); + ASSERT_FALSE(multi_enum.isSet(TestEnum::ONE)); + ASSERT_TRUE(multi_enum.isSet(TestEnum::TWO)); + ASSERT_TRUE(multi_enum.isSet(TestEnum::THREE)); + ASSERT_FALSE(multi_enum.isSet(TestEnum::FOUR)); + ASSERT_FALSE(multi_enum.isSet(TestEnum::FIVE)); +} + +GTEST_TEST(MultiEnum, WithMany) +{ + MultiEnum multi_enum{TestEnum::ONE, TestEnum::FIVE}; + ASSERT_EQ(1 << 1 | 1 << 5, multi_enum.getValue()); + ASSERT_EQ(1 << 1 | 1 << 5, multi_enum); + + ASSERT_FALSE(multi_enum.isSet(TestEnum::ZERO)); + ASSERT_TRUE(multi_enum.isSet(TestEnum::ONE)); + ASSERT_FALSE(multi_enum.isSet(TestEnum::TWO)); + ASSERT_FALSE(multi_enum.isSet(TestEnum::THREE)); + ASSERT_FALSE(multi_enum.isSet(TestEnum::FOUR)); + ASSERT_TRUE(multi_enum.isSet(TestEnum::FIVE)); +} + +GTEST_TEST(MultiEnum, WithCopyConstructor) +{ + const MultiEnum multi_enum_source{TestEnum::ONE, TestEnum::FIVE}; + MultiEnum multi_enum{multi_enum_source}; + + ASSERT_EQ(1 << 1 | 1 << 5, multi_enum.getValue()); +} + +GTEST_TEST(MultiEnum, SetAndUnSet) +{ + MultiEnum multi_enum; + multi_enum.set(TestEnum::ONE); + ASSERT_EQ(1 << 1, multi_enum); + + multi_enum.set(TestEnum::TWO); + ASSERT_EQ(1 << 1| (1 << 2), multi_enum); + + multi_enum.unSet(TestEnum::ONE); + ASSERT_EQ(1 << 2, multi_enum); +} + +GTEST_TEST(MultiEnum, SetValueOnDifferentTypes) +{ + MultiEnum multi_enum; + + multi_enum.setValue(static_cast(1)); + ASSERT_EQ(1, multi_enum); + + multi_enum.setValue(static_cast(2)); + ASSERT_EQ(2, multi_enum); + + multi_enum.setValue(static_cast(3)); + ASSERT_EQ(3, multi_enum); + + multi_enum.setValue(static_cast(4)); + ASSERT_EQ(4, multi_enum); +} + +// shouldn't compile +//GTEST_TEST(MultiEnum, WithOtherEnumType) +//{ +// MultiEnum multi_enum; + +// enum FOO {BAR, FOOBAR}; +// MultiEnum multi_enum2(BAR); +// MultiEnum multi_enum3(BAR, FOOBAR); +// multi_enum.setValue(FOO::BAR); +// multi_enum == FOO::BAR; +// FOO::BAR == multi_enum; +//} + +GTEST_TEST(MultiEnum, SetSameValueMultipleTimes) +{ + // Setting same value is idempotent. + MultiEnum multi_enum; + multi_enum.set(TestEnum::ONE); + ASSERT_EQ(1 << 1, multi_enum); + + multi_enum.set(TestEnum::ONE); + ASSERT_EQ(1 << 1, multi_enum); +} + +GTEST_TEST(MultiEnum, UnSetValuesThatWerentSet) +{ + // Unsetting values that weren't set shouldn't change other flags nor aggregate value. + MultiEnum multi_enum{TestEnum::ONE, TestEnum::THREE}; + multi_enum.unSet(TestEnum::TWO); + ASSERT_EQ(1 << 1 | 1 << 3, multi_enum); + + multi_enum.unSet(TestEnum::FOUR); + ASSERT_EQ(1 << 1 | 1 << 3, multi_enum); + + multi_enum.unSet(TestEnum::FIVE); + ASSERT_EQ(1 << 1 | 1 << 3, multi_enum); +} + +GTEST_TEST(MultiEnum, Reset) +{ + MultiEnum multi_enum{TestEnum::ONE, TestEnum::THREE}; + multi_enum.reset(); + ASSERT_EQ(0, multi_enum); +} diff --git a/src/Core/tests/gtest_settings.cpp b/src/Core/tests/gtest_settings.cpp new file mode 100644 index 00000000000..8833d86c397 --- /dev/null +++ b/src/Core/tests/gtest_settings.cpp @@ -0,0 +1,146 @@ +#include + +#include +#include +#include + +namespace +{ +using namespace DB; +using SettingMySQLDataTypesSupport = SettingFieldMultiEnum; +} + +namespace DB +{ + +template +bool operator== (const SettingFieldMultiEnum & setting, const Field & f) +{ + return Field(setting) == f; +} + +template +bool operator== (const Field & f, const SettingFieldMultiEnum & setting) +{ + return f == Field(setting); +} + +} + +GTEST_TEST(MySQLDataTypesSupport, WithDefault) +{ + // Setting can be default-initialized and that means all values are unset. + const SettingMySQLDataTypesSupport setting; + ASSERT_EQ(0, setting.value.getValue()); + ASSERT_EQ("", setting.toString()); + ASSERT_EQ(setting, Field("")); + + ASSERT_FALSE(setting.value.isSet(MySQLDataTypesSupport::DECIMAL)); + ASSERT_FALSE(setting.value.isSet(MySQLDataTypesSupport::DATETIME64)); +} + +GTEST_TEST(SettingMySQLDataTypesSupport, WithDECIMAL) +{ + // Setting can be initialized with MySQLDataTypesSupport::DECIMAL + // and this value can be obtained in varios forms with getters. + const SettingMySQLDataTypesSupport setting(MySQLDataTypesSupport::DECIMAL); + ASSERT_EQ(1, setting.value.getValue()); + + ASSERT_TRUE(setting.value.isSet(MySQLDataTypesSupport::DECIMAL)); + ASSERT_FALSE(setting.value.isSet(MySQLDataTypesSupport::DATETIME64)); + + ASSERT_EQ("decimal", setting.toString()); + ASSERT_EQ(Field("decimal"), setting); +} + +GTEST_TEST(SettingMySQLDataTypesSupport, With1) +{ + // Setting can be initialized with int value corresponding to DECIMAL + // and rest of the test is the same as for that value. + const SettingMySQLDataTypesSupport setting(1u); + ASSERT_EQ(1, setting.value.getValue()); + + ASSERT_TRUE(setting.value.isSet(MySQLDataTypesSupport::DECIMAL)); + ASSERT_FALSE(setting.value.isSet(MySQLDataTypesSupport::DATETIME64)); + + ASSERT_EQ("decimal", setting.toString()); + ASSERT_EQ(Field("decimal"), setting); +} + +GTEST_TEST(SettingMySQLDataTypesSupport, WithMultipleValues) +{ + // Setting can be initialized with int value corresponding to (DECIMAL | DATETIME64) + const SettingMySQLDataTypesSupport setting(3u); + ASSERT_EQ(3, setting.value.getValue()); + + ASSERT_TRUE(setting.value.isSet(MySQLDataTypesSupport::DECIMAL)); + ASSERT_TRUE(setting.value.isSet(MySQLDataTypesSupport::DATETIME64)); + + ASSERT_EQ("decimal,datetime64", setting.toString()); + ASSERT_EQ(Field("decimal,datetime64"), setting); +} + +GTEST_TEST(SettingMySQLDataTypesSupport, SetString) +{ + SettingMySQLDataTypesSupport setting; + setting = String("decimal"); + ASSERT_TRUE(setting.changed); + ASSERT_TRUE(setting.value.isSet(MySQLDataTypesSupport::DECIMAL)); + ASSERT_FALSE(setting.value.isSet(MySQLDataTypesSupport::DATETIME64)); + ASSERT_EQ("decimal", setting.toString()); + ASSERT_EQ(Field("decimal"), setting); + + setting = "datetime64,decimal"; + ASSERT_TRUE(setting.changed); + ASSERT_TRUE(setting.value.isSet(MySQLDataTypesSupport::DECIMAL)); + ASSERT_TRUE(setting.value.isSet(MySQLDataTypesSupport::DATETIME64)); + ASSERT_EQ("decimal,datetime64", setting.toString()); + ASSERT_EQ(Field("decimal,datetime64"), setting); + + // comma with spaces + setting = " datetime64 , decimal "; + ASSERT_FALSE(setting.changed); // false since value is the same as previous one. + ASSERT_TRUE(setting.value.isSet(MySQLDataTypesSupport::DECIMAL)); + ASSERT_TRUE(setting.value.isSet(MySQLDataTypesSupport::DATETIME64)); + ASSERT_EQ("decimal,datetime64", setting.toString()); + ASSERT_EQ(Field("decimal,datetime64"), setting); + + setting = String(",,,,,,,, ,decimal"); + ASSERT_TRUE(setting.changed); + ASSERT_TRUE(setting.value.isSet(MySQLDataTypesSupport::DECIMAL)); + ASSERT_FALSE(setting.value.isSet(MySQLDataTypesSupport::DATETIME64)); + ASSERT_EQ("decimal", setting.toString()); + ASSERT_EQ(Field("decimal"), setting); + + setting = String(",decimal,decimal,decimal,decimal,decimal,decimal,decimal,decimal,decimal,"); + ASSERT_FALSE(setting.changed); //since previous value was DECIMAL + ASSERT_TRUE(setting.value.isSet(MySQLDataTypesSupport::DECIMAL)); + ASSERT_FALSE(setting.value.isSet(MySQLDataTypesSupport::DATETIME64)); + ASSERT_EQ("decimal", setting.toString()); + ASSERT_EQ(Field("decimal"), setting); + + setting = String(""); + ASSERT_TRUE(setting.changed); + ASSERT_FALSE(setting.value.isSet(MySQLDataTypesSupport::DECIMAL)); + ASSERT_FALSE(setting.value.isSet(MySQLDataTypesSupport::DATETIME64)); + ASSERT_EQ("", setting.toString()); + ASSERT_EQ(Field(""), setting); +} + +GTEST_TEST(SettingMySQLDataTypesSupport, SetInvalidString) +{ + // Setting can be initialized with int value corresponding to (DECIMAL | DATETIME64) + SettingMySQLDataTypesSupport setting; + EXPECT_THROW(setting = String("FOOBAR"), Exception); + ASSERT_FALSE(setting.changed); + ASSERT_EQ(0, setting.value.getValue()); + + EXPECT_THROW(setting = String("decimal,datetime64,123"), Exception); + ASSERT_FALSE(setting.changed); + ASSERT_EQ(0, setting.value.getValue()); + + EXPECT_NO_THROW(setting = String(", ")); + ASSERT_FALSE(setting.changed); + ASSERT_EQ(0, setting.value.getValue()); +} + diff --git a/src/DataStreams/ExpressionBlockInputStream.cpp b/src/DataStreams/ExpressionBlockInputStream.cpp index 9673395a21a..4840a6263f6 100644 --- a/src/DataStreams/ExpressionBlockInputStream.cpp +++ b/src/DataStreams/ExpressionBlockInputStream.cpp @@ -18,7 +18,7 @@ String ExpressionBlockInputStream::getName() const { return "Expression"; } Block ExpressionBlockInputStream::getTotals() { totals = children.back()->getTotals(); - expression->executeOnTotals(totals); + expression->execute(totals); return totals; } @@ -30,14 +30,6 @@ Block ExpressionBlockInputStream::getHeader() const Block ExpressionBlockInputStream::readImpl() { - if (!initialized) - { - if (expression->resultIsAlwaysEmpty()) - return {}; - - initialized = true; - } - Block res = children.back()->read(); if (res) expression->execute(res); diff --git a/src/DataStreams/ExpressionBlockInputStream.h b/src/DataStreams/ExpressionBlockInputStream.h index 62141a060af..fae54fbcfbf 100644 --- a/src/DataStreams/ExpressionBlockInputStream.h +++ b/src/DataStreams/ExpressionBlockInputStream.h @@ -25,7 +25,6 @@ public: Block getHeader() const override; protected: - bool initialized = false; ExpressionActionsPtr expression; Block readImpl() override; diff --git a/src/DataStreams/FilterBlockInputStream.cpp b/src/DataStreams/FilterBlockInputStream.cpp index b4b00083d7f..83b36c97db7 100644 --- a/src/DataStreams/FilterBlockInputStream.cpp +++ b/src/DataStreams/FilterBlockInputStream.cpp @@ -54,7 +54,7 @@ String FilterBlockInputStream::getName() const { return "Filter"; } Block FilterBlockInputStream::getTotals() { totals = children.back()->getTotals(); - expression->executeOnTotals(totals); + expression->execute(totals); return totals; } diff --git a/src/DataStreams/MongoDBBlockInputStream.cpp b/src/DataStreams/MongoDBBlockInputStream.cpp index 7865f854547..25abdd909c4 100644 --- a/src/DataStreams/MongoDBBlockInputStream.cpp +++ b/src/DataStreams/MongoDBBlockInputStream.cpp @@ -37,6 +37,7 @@ namespace ErrorCodes extern const int TYPE_MISMATCH; extern const int MONGODB_CANNOT_AUTHENTICATE; extern const int NOT_FOUND_COLUMN_IN_BLOCK; + extern const int UNKNOWN_TYPE; } @@ -298,6 +299,8 @@ namespace ErrorCodes::TYPE_MISMATCH}; break; } + default: + throw Exception("Value of unsupported type:" + column.getName(), ErrorCodes::UNKNOWN_TYPE); } } diff --git a/src/DataTypes/DataTypeDecimalBase.h b/src/DataTypes/DataTypeDecimalBase.h index 265d58d69e1..c5669ab735a 100644 --- a/src/DataTypes/DataTypeDecimalBase.h +++ b/src/DataTypes/DataTypeDecimalBase.h @@ -156,38 +156,31 @@ protected: }; -template typename DecimalType> -typename std::enable_if_t<(sizeof(T) >= sizeof(U)), DecimalType> -inline decimalResultType(const DecimalType & tx, const DecimalType & ty, bool is_multiply, bool is_divide) +template typename DecimalType> +inline auto decimalResultType(const DecimalType & tx, const DecimalType & ty) { - UInt32 scale = (tx.getScale() > ty.getScale() ? tx.getScale() : ty.getScale()); - if (is_multiply) + UInt32 scale{}; + if constexpr (is_multiply) scale = tx.getScale() + ty.getScale(); - else if (is_divide) + else if constexpr (is_division) scale = tx.getScale(); - return DecimalType(DecimalUtils::maxPrecision(), scale); + else + scale = (tx.getScale() > ty.getScale() ? tx.getScale() : ty.getScale()); + + if constexpr (sizeof(T) < sizeof(U)) + return DecimalType(DecimalUtils::maxPrecision(), scale); + else + return DecimalType(DecimalUtils::maxPrecision(), scale); } -template typename DecimalType> -typename std::enable_if_t<(sizeof(T) < sizeof(U)), const DecimalType> -inline decimalResultType(const DecimalType & tx, const DecimalType & ty, bool is_multiply, bool is_divide) -{ - UInt32 scale = (tx.getScale() > ty.getScale() ? tx.getScale() : ty.getScale()); - if (is_multiply) - scale = tx.getScale() * ty.getScale(); - else if (is_divide) - scale = tx.getScale(); - return DecimalType(DecimalUtils::maxPrecision(), scale); -} - -template typename DecimalType> -inline const DecimalType decimalResultType(const DecimalType & tx, const DataTypeNumber &, bool, bool) +template typename DecimalType> +inline const DecimalType decimalResultType(const DecimalType & tx, const DataTypeNumber &) { return DecimalType(DecimalUtils::maxPrecision(), tx.getScale()); } -template typename DecimalType> -inline const DecimalType decimalResultType(const DataTypeNumber &, const DecimalType & ty, bool, bool) +template typename DecimalType> +inline const DecimalType decimalResultType(const DataTypeNumber &, const DecimalType & ty) { return DecimalType(DecimalUtils::maxPrecision(), ty.getScale()); } diff --git a/src/DataTypes/DataTypeNullable.cpp b/src/DataTypes/DataTypeNullable.cpp index 847047850fd..67acf89ef42 100644 --- a/src/DataTypes/DataTypeNullable.cpp +++ b/src/DataTypes/DataTypeNullable.cpp @@ -217,7 +217,7 @@ void DataTypeNullable::serializeTextEscaped(const IColumn & column, size_t row_n const ColumnNullable & col = assert_cast(column); if (col.isNullAt(row_num)) - writeCString("\\N", ostr); + writeString(settings.tsv.null_representation, ostr); else nested_data_type->serializeAsTextEscaped(col.getNestedColumn(), row_num, ostr, settings); } diff --git a/src/DataTypes/convertMySQLDataType.cpp b/src/DataTypes/convertMySQLDataType.cpp index 054dc412915..23899ea197a 100644 --- a/src/DataTypes/convertMySQLDataType.cpp +++ b/src/DataTypes/convertMySQLDataType.cpp @@ -2,11 +2,16 @@ #include #include +#include +#include #include #include #include #include "DataTypeDate.h" #include "DataTypeDateTime.h" +#include "DataTypeDateTime64.h" +#include "DataTypeEnum.h" +#include "DataTypesDecimal.h" #include "DataTypeFixedString.h" #include "DataTypeNullable.h" #include "DataTypeString.h" @@ -25,52 +30,88 @@ ASTPtr dataTypeConvertToQuery(const DataTypePtr & data_type) return makeASTFunction("Nullable", dataTypeConvertToQuery(typeid_cast(data_type.get())->getNestedType())); } -DataTypePtr convertMySQLDataType(const std::string & mysql_data_type, bool is_nullable, bool is_unsigned, size_t length) +DataTypePtr convertMySQLDataType(MultiEnum type_support, + const std::string & mysql_data_type, + bool is_nullable, + bool is_unsigned, + size_t length, + size_t precision, + size_t scale) { - DataTypePtr res; - if (mysql_data_type == "tinyint") - { - if (is_unsigned) - res = std::make_shared(); - else - res = std::make_shared(); - } - else if (mysql_data_type == "smallint") - { - if (is_unsigned) - res = std::make_shared(); - else - res = std::make_shared(); - } - else if (mysql_data_type == "int" || mysql_data_type == "mediumint") - { - if (is_unsigned) - res = std::make_shared(); - else - res = std::make_shared(); - } - else if (mysql_data_type == "bigint") - { - if (is_unsigned) - res = std::make_shared(); - else - res = std::make_shared(); - } - else if (mysql_data_type == "float") - res = std::make_shared(); - else if (mysql_data_type == "double") - res = std::make_shared(); - else if (mysql_data_type == "date") - res = std::make_shared(); - else if (mysql_data_type == "datetime" || mysql_data_type == "timestamp") - res = std::make_shared(); - else if (mysql_data_type == "binary") - res = std::make_shared(length); - else + // we expect mysql_data_type to be either "basic_type" or "type_with_params(param1, param2, ...)" + auto data_type = std::string_view(mysql_data_type); + const auto param_start_pos = data_type.find("("); + const auto type_name = data_type.substr(0, param_start_pos); + + DataTypePtr res = [&]() -> DataTypePtr { + if (type_name == "tinyint") + { + if (is_unsigned) + return std::make_shared(); + else + return std::make_shared(); + } + if (type_name == "smallint") + { + if (is_unsigned) + return std::make_shared(); + else + return std::make_shared(); + } + if (type_name == "int" || type_name == "mediumint") + { + if (is_unsigned) + return std::make_shared(); + else + return std::make_shared(); + } + if (type_name == "bigint") + { + if (is_unsigned) + return std::make_shared(); + else + return std::make_shared(); + } + if (type_name == "float") + return std::make_shared(); + if (type_name == "double") + return std::make_shared(); + if (type_name == "date") + return std::make_shared(); + if (type_name == "binary") + return std::make_shared(length); + if (type_name == "datetime" || type_name == "timestamp") + { + if (!type_support.isSet(MySQLDataTypesSupport::DATETIME64)) + return std::make_shared(); + + if (type_name == "timestamp" && scale == 0) + { + return std::make_shared(); + } + else if (type_name == "datetime" || type_name == "timestamp") + { + return std::make_shared(scale); + } + } + + if (type_support.isSet(MySQLDataTypesSupport::DECIMAL) && (type_name == "numeric" || type_name == "decimal")) + { + if (precision <= DecimalUtils::maxPrecision()) + return std::make_shared>(precision, scale); + else if (precision <= DecimalUtils::maxPrecision()) + return std::make_shared>(precision, scale); + else if (precision <= DecimalUtils::maxPrecision()) + return std::make_shared>(precision, scale); + } + /// Also String is fallback for all unknown types. - res = std::make_shared(); + return std::make_shared(); + }(); + if (is_nullable) res = std::make_shared(res); + return res; } diff --git a/src/DataTypes/convertMySQLDataType.h b/src/DataTypes/convertMySQLDataType.h index 54477afb385..f1c4a73d6f7 100644 --- a/src/DataTypes/convertMySQLDataType.h +++ b/src/DataTypes/convertMySQLDataType.h @@ -1,17 +1,20 @@ #pragma once #include +#include #include #include "IDataType.h" namespace DB { +enum class MySQLDataTypesSupport; + /// Convert data type to query. for example /// DataTypeUInt8 -> ASTIdentifier(UInt8) /// DataTypeNullable(DataTypeUInt8) -> ASTFunction(ASTIdentifier(UInt8)) ASTPtr dataTypeConvertToQuery(const DataTypePtr & data_type); /// Convert MySQL type to ClickHouse data type. -DataTypePtr convertMySQLDataType(const std::string & mysql_data_type, bool is_nullable, bool is_unsigned, size_t length); +DataTypePtr convertMySQLDataType(MultiEnum type_support, const std::string & mysql_data_type, bool is_nullable, bool is_unsigned, size_t length, size_t precision, size_t scale); } diff --git a/src/DataTypes/tests/gtest_DataType_deserializeAsText.cpp b/src/DataTypes/tests/gtest_DataType_deserializeAsText.cpp new file mode 100644 index 00000000000..48e2f0d80a0 --- /dev/null +++ b/src/DataTypes/tests/gtest_DataType_deserializeAsText.cpp @@ -0,0 +1,101 @@ +#include +#include +#include +#include +#include +#include +#include +#include + +#pragma GCC diagnostic ignored "-Wmissing-declarations" +#include + +#include +#include + +#include + +namespace std +{ + +template +inline std::ostream& operator<<(std::ostream & ostr, const std::vector & v) +{ + ostr << "["; + for (const auto & i : v) + { + ostr << i << ", "; + } + return ostr << "] (" << v.size() << ") items"; +} + +} + +using namespace DB; + +struct ParseDataTypeTestCase +{ + const char * type_name; + std::vector values; + FieldVector expected_values; +}; + +std::ostream & operator<<(std::ostream & ostr, const ParseDataTypeTestCase & test_case) +{ + return ostr << "ParseDataTypeTestCase{\"" << test_case.type_name << "\", " << test_case.values << "}"; +} + + +class ParseDataTypeTest : public ::testing::TestWithParam +{ +public: + void SetUp() override + { + const auto & p = GetParam(); + + data_type = DataTypeFactory::instance().get(p.type_name); + } + + DataTypePtr data_type; +}; + +TEST_P(ParseDataTypeTest, parseStringValue) +{ + const auto & p = GetParam(); + + auto col = data_type->createColumn(); + for (const auto & value : p.values) + { + ReadBuffer buffer(const_cast(value.data()), value.size(), 0); + data_type->deserializeAsWholeText(*col, buffer, FormatSettings{}); + } + + ASSERT_EQ(p.expected_values.size(), col->size()) << "Actual items: " << *col; + for (size_t i = 0; i < col->size(); ++i) + { + ASSERT_EQ(p.expected_values[i], (*col)[i]); + } +} + + +INSTANTIATE_TEST_SUITE_P(ParseDecimal, + ParseDataTypeTest, + ::testing::ValuesIn( + std::initializer_list{ + { + "Decimal(8, 0)", + {"0", "5", "8", "-5", "-8", "12345678", "-12345678"}, + + std::initializer_list{ + DecimalField(0, 0), + DecimalField(5, 0), + DecimalField(8, 0), + DecimalField(-5, 0), + DecimalField(-8, 0), + DecimalField(12345678, 0), + DecimalField(-12345678, 0) + } + } + } + ) +); diff --git a/src/Databases/MySQL/DatabaseConnectionMySQL.cpp b/src/Databases/MySQL/DatabaseConnectionMySQL.cpp index 0d944e215a0..9c94014bf23 100644 --- a/src/Databases/MySQL/DatabaseConnectionMySQL.cpp +++ b/src/Databases/MySQL/DatabaseConnectionMySQL.cpp @@ -10,6 +10,7 @@ # include # include # include +# include # include # include # include @@ -43,31 +44,14 @@ constexpr static const auto suffix = ".remove_flag"; static constexpr const std::chrono::seconds cleaner_sleep_time{30}; static const std::chrono::seconds lock_acquire_timeout{10}; -static String toQueryStringWithQuote(const std::vector & quote_list) -{ - WriteBufferFromOwnString quote_list_query; - quote_list_query << "("; - - for (size_t index = 0; index < quote_list.size(); ++index) - { - if (index) - quote_list_query << ","; - - quote_list_query << quote << quote_list[index]; - } - - quote_list_query << ")"; - return quote_list_query.str(); -} - -DatabaseConnectionMySQL::DatabaseConnectionMySQL( - const Context & global_context_, const String & database_name_, const String & metadata_path_, +DatabaseConnectionMySQL::DatabaseConnectionMySQL(const Context & context, const String & database_name_, const String & metadata_path_, const ASTStorage * database_engine_define_, const String & database_name_in_mysql_, mysqlxx::Pool && pool) : IDatabase(database_name_) - , global_context(global_context_.getGlobalContext()) + , global_context(context.getGlobalContext()) , metadata_path(metadata_path_) , database_engine_define(database_engine_define_->clone()) , database_name_in_mysql(database_name_in_mysql_) + , mysql_datatypes_support_level(context.getQueryContext().getSettingsRef().mysql_datatypes_support_level) , mysql_pool(std::move(pool)) { empty(); /// test database is works fine. @@ -78,7 +62,7 @@ bool DatabaseConnectionMySQL::empty() const { std::lock_guard lock(mutex); - fetchTablesIntoLocalCache(); + fetchTablesIntoLocalCache(global_context); if (local_tables_cache.empty()) return true; @@ -90,12 +74,12 @@ bool DatabaseConnectionMySQL::empty() const return true; } -DatabaseTablesIteratorPtr DatabaseConnectionMySQL::getTablesIterator(const Context &, const FilterByNameFunction & filter_by_table_name) +DatabaseTablesIteratorPtr DatabaseConnectionMySQL::getTablesIterator(const Context & context, const FilterByNameFunction & filter_by_table_name) { Tables tables; std::lock_guard lock(mutex); - fetchTablesIntoLocalCache(); + fetchTablesIntoLocalCache(context); for (const auto & [table_name, modify_time_and_storage] : local_tables_cache) if (!remove_or_detach_tables.count(table_name) && (!filter_by_table_name || filter_by_table_name(table_name))) @@ -109,11 +93,11 @@ bool DatabaseConnectionMySQL::isTableExist(const String & name, const Context & return bool(tryGetTable(name, context)); } -StoragePtr DatabaseConnectionMySQL::tryGetTable(const String & mysql_table_name, const Context &) const +StoragePtr DatabaseConnectionMySQL::tryGetTable(const String & mysql_table_name, const Context & context) const { std::lock_guard lock(mutex); - fetchTablesIntoLocalCache(); + fetchTablesIntoLocalCache(context); if (!remove_or_detach_tables.count(mysql_table_name) && local_tables_cache.find(mysql_table_name) != local_tables_cache.end()) return local_tables_cache[mysql_table_name].second; @@ -157,11 +141,11 @@ static ASTPtr getCreateQueryFromStorage(const StoragePtr & storage, const ASTPtr return create_table_query; } -ASTPtr DatabaseConnectionMySQL::getCreateTableQueryImpl(const String & table_name, const Context &, bool throw_on_error) const +ASTPtr DatabaseConnectionMySQL::getCreateTableQueryImpl(const String & table_name, const Context & context, bool throw_on_error) const { std::lock_guard lock(mutex); - fetchTablesIntoLocalCache(); + fetchTablesIntoLocalCache(context); if (local_tables_cache.find(table_name) == local_tables_cache.end()) { @@ -178,7 +162,7 @@ time_t DatabaseConnectionMySQL::getObjectMetadataModificationTime(const String & { std::lock_guard lock(mutex); - fetchTablesIntoLocalCache(); + fetchTablesIntoLocalCache(global_context); if (local_tables_cache.find(table_name) == local_tables_cache.end()) throw Exception("MySQL table " + database_name_in_mysql + "." + table_name + " doesn't exist.", ErrorCodes::UNKNOWN_TABLE); @@ -194,12 +178,12 @@ ASTPtr DatabaseConnectionMySQL::getCreateDatabaseQuery() const return create_query; } -void DatabaseConnectionMySQL::fetchTablesIntoLocalCache() const +void DatabaseConnectionMySQL::fetchTablesIntoLocalCache(const Context & context) const { const auto & tables_with_modification_time = fetchTablesWithModificationTime(); destroyLocalCacheExtraTables(tables_with_modification_time); - fetchLatestTablesStructureIntoCache(tables_with_modification_time); + fetchLatestTablesStructureIntoCache(tables_with_modification_time, context); } void DatabaseConnectionMySQL::destroyLocalCacheExtraTables(const std::map & tables_with_modification_time) const @@ -216,7 +200,7 @@ void DatabaseConnectionMySQL::destroyLocalCacheExtraTables(const std::map &tables_modification_time) const +void DatabaseConnectionMySQL::fetchLatestTablesStructureIntoCache(const std::map &tables_modification_time, const Context & context) const { std::vector wait_update_tables_name; for (const auto & table_modification_time : tables_modification_time) @@ -228,7 +212,7 @@ void DatabaseConnectionMySQL::fetchLatestTablesStructureIntoCache(const std::map wait_update_tables_name.emplace_back(table_modification_time.first); } - std::map tables_and_columns = fetchTablesColumnsList(wait_update_tables_name); + std::map tables_and_columns = fetchTablesColumnsList(wait_update_tables_name, context); for (const auto & table_and_columns : tables_and_columns) { @@ -280,53 +264,16 @@ std::map DatabaseConnectionMySQL::fetchTablesWithModificationTim return tables_with_modification_time; } -std::map DatabaseConnectionMySQL::fetchTablesColumnsList(const std::vector & tables_name) const +std::map DatabaseConnectionMySQL::fetchTablesColumnsList(const std::vector & tables_name, const Context & context) const { - std::map tables_and_columns; + const auto & settings = context.getSettingsRef(); - if (tables_name.empty()) - return tables_and_columns; - - Block tables_columns_sample_block - { - { std::make_shared(), "table_name" }, - { std::make_shared(), "column_name" }, - { std::make_shared(), "column_type" }, - { std::make_shared(), "is_nullable" }, - { std::make_shared(), "is_unsigned" }, - { std::make_shared(), "length" }, - }; - - WriteBufferFromOwnString query; - query << "SELECT " - " TABLE_NAME AS table_name," - " COLUMN_NAME AS column_name," - " DATA_TYPE AS column_type," - " IS_NULLABLE = 'YES' AS is_nullable," - " COLUMN_TYPE LIKE '%unsigned' AS is_unsigned," - " CHARACTER_MAXIMUM_LENGTH AS length" - " FROM INFORMATION_SCHEMA.COLUMNS" - " WHERE TABLE_SCHEMA = " << quote << database_name_in_mysql - << " AND TABLE_NAME IN " << toQueryStringWithQuote(tables_name) << " ORDER BY ORDINAL_POSITION"; - - const auto & external_table_functions_use_nulls = global_context.getSettings().external_table_functions_use_nulls; - MySQLBlockInputStream result(mysql_pool.get(), query.str(), tables_columns_sample_block, DEFAULT_BLOCK_SIZE); - while (Block block = result.read()) - { - size_t rows = block.rows(); - for (size_t i = 0; i < rows; ++i) - { - String table_name = (*block.getByPosition(0).column)[i].safeGet(); - tables_and_columns[table_name].emplace_back((*block.getByPosition(1).column)[i].safeGet(), - convertMySQLDataType( - (*block.getByPosition(2).column)[i].safeGet(), - (*block.getByPosition(3).column)[i].safeGet() && - external_table_functions_use_nulls, - (*block.getByPosition(4).column)[i].safeGet(), - (*block.getByPosition(5).column)[i].safeGet())); - } - } - return tables_and_columns; + return DB::fetchTablesColumnsList( + mysql_pool, + database_name_in_mysql, + tables_name, + settings.external_table_functions_use_nulls, + mysql_datatypes_support_level); } void DatabaseConnectionMySQL::shutdown() diff --git a/src/Databases/MySQL/DatabaseConnectionMySQL.h b/src/Databases/MySQL/DatabaseConnectionMySQL.h index c4fb3d5f90c..e9f72adc013 100644 --- a/src/Databases/MySQL/DatabaseConnectionMySQL.h +++ b/src/Databases/MySQL/DatabaseConnectionMySQL.h @@ -4,17 +4,27 @@ #if USE_MYSQL #include -#include -#include -#include -#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include namespace DB { class Context; +enum class MySQLDataTypesSupport; + /** Real-time access to table list and table structure from remote MySQL * It doesn't make any manipulations with filesystem. * All tables are created by calling code after real-time pull-out structure from remote MySQL @@ -25,7 +35,7 @@ public: ~DatabaseConnectionMySQL() override; DatabaseConnectionMySQL( - const Context & global_context, const String & database_name, const String & metadata_path, + const Context & context, const String & database_name, const String & metadata_path, const ASTStorage * database_engine_define, const String & database_name_in_mysql, mysqlxx::Pool && pool); String getEngineName() const override { return "MySQL"; } @@ -66,6 +76,9 @@ private: String metadata_path; ASTPtr database_engine_define; String database_name_in_mysql; + // Cache setting for later from query context upon creation, + // so column types depend on the settings set at query-level. + MultiEnum mysql_datatypes_support_level; std::atomic quit{false}; std::condition_variable cond; @@ -81,15 +94,15 @@ private: void cleanOutdatedTables(); - void fetchTablesIntoLocalCache() const; + void fetchTablesIntoLocalCache(const Context & context) const; std::map fetchTablesWithModificationTime() const; - std::map fetchTablesColumnsList(const std::vector & tables_name) const; + std::map fetchTablesColumnsList(const std::vector & tables_name, const Context & context) const; void destroyLocalCacheExtraTables(const std::map & tables_with_modification_time) const; - void fetchLatestTablesStructureIntoCache(const std::map & tables_modification_time) const; + void fetchLatestTablesStructureIntoCache(const std::map & tables_modification_time, const Context & context) const; ThreadFromGlobalPool thread; }; diff --git a/src/Databases/MySQL/FetchTablesColumnsList.cpp b/src/Databases/MySQL/FetchTablesColumnsList.cpp new file mode 100644 index 00000000000..3e25c703a1d --- /dev/null +++ b/src/Databases/MySQL/FetchTablesColumnsList.cpp @@ -0,0 +1,114 @@ +#if !defined(ARCADIA_BUILD) +# include "config_core.h" +#endif + +#if USE_MYSQL +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +namespace +{ +using namespace DB; + +String toQueryStringWithQuote(const std::vector & quote_list) +{ + WriteBufferFromOwnString quote_list_query; + quote_list_query << "("; + + for (size_t index = 0; index < quote_list.size(); ++index) + { + if (index) + quote_list_query << ","; + + quote_list_query << quote << quote_list[index]; + } + + quote_list_query << ")"; + return quote_list_query.str(); +} +} + +namespace DB +{ + +std::map fetchTablesColumnsList( + mysqlxx::Pool & pool, + const String & database_name, + const std::vector & tables_name, + bool external_table_functions_use_nulls, + MultiEnum type_support) +{ + std::map tables_and_columns; + + if (tables_name.empty()) + return tables_and_columns; + + Block tables_columns_sample_block + { + { std::make_shared(), "table_name" }, + { std::make_shared(), "column_name" }, + { std::make_shared(), "column_type" }, + { std::make_shared(), "is_nullable" }, + { std::make_shared(), "is_unsigned" }, + { std::make_shared(), "length" }, + { std::make_shared(), "precision" }, + { std::make_shared(), "scale" }, + }; + + WriteBufferFromOwnString query; + query << "SELECT " + " TABLE_NAME AS table_name," + " COLUMN_NAME AS column_name," + " COLUMN_TYPE AS column_type," + " IS_NULLABLE = 'YES' AS is_nullable," + " COLUMN_TYPE LIKE '%unsigned' AS is_unsigned," + " CHARACTER_MAXIMUM_LENGTH AS length," + " NUMERIC_PRECISION as ''," + " IF(ISNULL(NUMERIC_SCALE), DATETIME_PRECISION, NUMERIC_SCALE) AS scale" // we know DATETIME_PRECISION as a scale in CH + " FROM INFORMATION_SCHEMA.COLUMNS" + " WHERE TABLE_SCHEMA = " << quote << database_name + << " AND TABLE_NAME IN " << toQueryStringWithQuote(tables_name) << " ORDER BY ORDINAL_POSITION"; + + MySQLBlockInputStream result(pool.get(), query.str(), tables_columns_sample_block, DEFAULT_BLOCK_SIZE); + while (Block block = result.read()) + { + const auto & table_name_col = *block.getByPosition(0).column; + const auto & column_name_col = *block.getByPosition(1).column; + const auto & column_type_col = *block.getByPosition(2).column; + const auto & is_nullable_col = *block.getByPosition(3).column; + const auto & is_unsigned_col = *block.getByPosition(4).column; + const auto & char_max_length_col = *block.getByPosition(5).column; + const auto & precision_col = *block.getByPosition(6).column; + const auto & scale_col = *block.getByPosition(7).column; + + size_t rows = block.rows(); + for (size_t i = 0; i < rows; ++i) + { + String table_name = table_name_col[i].safeGet(); + tables_and_columns[table_name].emplace_back( + column_name_col[i].safeGet(), + convertMySQLDataType( + type_support, + column_type_col[i].safeGet(), + external_table_functions_use_nulls && is_nullable_col[i].safeGet(), + is_unsigned_col[i].safeGet(), + char_max_length_col[i].safeGet(), + precision_col[i].safeGet(), + scale_col[i].safeGet())); + } + } + return tables_and_columns; +} + +} + +#endif diff --git a/src/Databases/MySQL/FetchTablesColumnsList.h b/src/Databases/MySQL/FetchTablesColumnsList.h new file mode 100644 index 00000000000..52191c2ecb8 --- /dev/null +++ b/src/Databases/MySQL/FetchTablesColumnsList.h @@ -0,0 +1,28 @@ +#pragma once + +#include "config_core.h" +#if USE_MYSQL + +#include + +#include +#include +#include +#include + +#include +#include + +namespace DB +{ + +std::map fetchTablesColumnsList( + mysqlxx::Pool & pool, + const String & database_name, + const std::vector & tables_name, + bool external_table_functions_use_nulls, + MultiEnum type_support); + +} + +#endif diff --git a/src/Databases/ya.make b/src/Databases/ya.make index 50b58cf3e71..726127bfe52 100644 --- a/src/Databases/ya.make +++ b/src/Databases/ya.make @@ -19,6 +19,7 @@ SRCS( DatabaseWithDictionaries.cpp MySQL/DatabaseConnectionMySQL.cpp MySQL/DatabaseMaterializeMySQL.cpp + MySQL/FetchTablesColumnsList.cpp MySQL/MaterializeMetadata.cpp MySQL/MaterializeMySQLSettings.cpp MySQL/MaterializeMySQLSyncThread.cpp diff --git a/src/Dictionaries/CassandraBlockInputStream.cpp b/src/Dictionaries/CassandraBlockInputStream.cpp index 4f6a62a0eea..721cb44a82e 100644 --- a/src/Dictionaries/CassandraBlockInputStream.cpp +++ b/src/Dictionaries/CassandraBlockInputStream.cpp @@ -19,6 +19,7 @@ namespace DB namespace ErrorCodes { extern const int TYPE_MISMATCH; + extern const int UNKNOWN_TYPE; } CassandraBlockInputStream::CassandraBlockInputStream( @@ -140,6 +141,8 @@ void CassandraBlockInputStream::insertValue(IColumn & column, ValueType type, co assert_cast(column).insert(parse(uuid_str.data(), uuid_str.size())); break; } + default: + throw Exception("Unknown type : " + std::to_string(static_cast(type)), ErrorCodes::UNKNOWN_TYPE); } } @@ -252,6 +255,8 @@ void CassandraBlockInputStream::assertTypes(const CassResultPtr & result) expected = CASS_VALUE_TYPE_UUID; expected_text = "uuid"; break; + default: + throw Exception("Unknown type : " + std::to_string(static_cast(description.types[i].first)), ErrorCodes::UNKNOWN_TYPE); } CassValueType got = cass_result_column_type(result, i); diff --git a/src/Dictionaries/RedisBlockInputStream.cpp b/src/Dictionaries/RedisBlockInputStream.cpp index a3ee86ae1d6..a5514d14155 100644 --- a/src/Dictionaries/RedisBlockInputStream.cpp +++ b/src/Dictionaries/RedisBlockInputStream.cpp @@ -26,6 +26,7 @@ namespace DB extern const int LOGICAL_ERROR; extern const int NUMBER_OF_COLUMNS_DOESNT_MATCH; extern const int INTERNAL_REDIS_ERROR; + extern const int UNKNOWN_TYPE; } @@ -103,6 +104,8 @@ namespace DB case ValueType::vtUUID: assert_cast(column).insertValue(parse(string_value)); break; + default: + throw Exception("Value of unsupported type:" + column.getName(), ErrorCodes::UNKNOWN_TYPE); } } } diff --git a/src/Disks/S3/registerDiskS3.cpp b/src/Disks/S3/registerDiskS3.cpp index 341ada59631..fbd19ce1cd9 100644 --- a/src/Disks/S3/registerDiskS3.cpp +++ b/src/Disks/S3/registerDiskS3.cpp @@ -145,9 +145,12 @@ void registerDiskS3(DiskFactory & factory) config.getUInt64(config_prefix + ".min_bytes_for_seek", 1024 * 1024)); /// This code is used only to check access to the corresponding disk. - checkWriteAccess(*s3disk); - checkReadAccess(name, *s3disk); - checkRemoveAccess(*s3disk); + if (!config.getBool(config_prefix + ".skip_access_check", false)) + { + checkWriteAccess(*s3disk); + checkReadAccess(name, *s3disk); + checkRemoveAccess(*s3disk); + } bool cache_enabled = config.getBool(config_prefix + ".cache_enabled", true); diff --git a/src/Formats/FormatFactory.cpp b/src/Formats/FormatFactory.cpp index 935d31d6541..a1065b2c452 100644 --- a/src/Formats/FormatFactory.cpp +++ b/src/Formats/FormatFactory.cpp @@ -111,6 +111,7 @@ static FormatSettings getOutputFormatSetting(const Settings & settings, const Co format_settings.template_settings.row_format = settings.format_template_row; format_settings.template_settings.row_between_delimiter = settings.format_template_rows_between_delimiter; format_settings.tsv.crlf_end_of_line = settings.output_format_tsv_crlf_end_of_line; + format_settings.tsv.null_representation = settings.output_format_tsv_null_representation; format_settings.write_statistics = settings.output_format_write_statistics; format_settings.parquet.row_group_size = settings.output_format_parquet_row_group_size; format_settings.schema.format_schema = settings.format_schema; diff --git a/src/Formats/FormatSettings.h b/src/Formats/FormatSettings.h index 299ec353f03..70173bc847d 100644 --- a/src/Formats/FormatSettings.h +++ b/src/Formats/FormatSettings.h @@ -78,6 +78,7 @@ struct FormatSettings { bool empty_as_default = false; bool crlf_end_of_line = false; + String null_representation = "\\N"; }; TSV tsv; diff --git a/src/Formats/MySQLBlockInputStream.cpp b/src/Formats/MySQLBlockInputStream.cpp index 17c09cdc14d..f85680c0031 100644 --- a/src/Formats/MySQLBlockInputStream.cpp +++ b/src/Formats/MySQLBlockInputStream.cpp @@ -7,13 +7,15 @@ # include # include # include +# include +# include +# include # include # include # include # include # include "MySQLBlockInputStream.h" - namespace DB { namespace ErrorCodes @@ -39,7 +41,7 @@ namespace { using ValueType = ExternalResultDescription::ValueType; - void insertValue(IColumn & column, const ValueType type, const mysqlxx::Value & value) + void insertValue(const IDataType & data_type, IColumn & column, const ValueType type, const mysqlxx::Value & value) { switch (type) { @@ -85,6 +87,15 @@ namespace case ValueType::vtUUID: assert_cast(column).insert(parse(value.data(), value.size())); break; + case ValueType::vtDateTime64:[[fallthrough]]; + case ValueType::vtDecimal32: [[fallthrough]]; + case ValueType::vtDecimal64: [[fallthrough]]; + case ValueType::vtDecimal128: + { + ReadBuffer buffer(const_cast(value.data()), value.size(), 0); + data_type.deserializeAsWholeText(column, buffer, FormatSettings{}); + break; + } } } @@ -112,19 +123,21 @@ Block MySQLBlockInputStream::readImpl() for (const auto idx : ext::range(0, row.size())) { const auto value = row[idx]; + const auto & sample = description.sample_block.getByPosition(idx); if (!value.isNull()) { if (description.types[idx].second) { ColumnNullable & column_nullable = assert_cast(*columns[idx]); - insertValue(column_nullable.getNestedColumn(), description.types[idx].first, value); + const auto & data_type = assert_cast(*sample.type); + insertValue(*data_type.getNestedType(), column_nullable.getNestedColumn(), description.types[idx].first, value); column_nullable.getNullMapData().emplace_back(0); } else - insertValue(*columns[idx], description.types[idx].first, value); + insertValue(*sample.type, *columns[idx], description.types[idx].first, value); } else - insertDefaultValue(*columns[idx], *description.sample_block.getByPosition(idx).column); + insertDefaultValue(*columns[idx], *sample.column); } ++num_rows; diff --git a/src/Functions/CRC.cpp b/src/Functions/CRC.cpp index 96edf9a0d8e..6083e5ef16f 100644 --- a/src/Functions/CRC.cpp +++ b/src/Functions/CRC.cpp @@ -72,6 +72,9 @@ namespace ErrorCodes extern const int ILLEGAL_TYPE_OF_ARGUMENT; } +namespace +{ + template struct CRCFunctionWrapper { @@ -127,6 +130,8 @@ using FunctionCRC32IEEE = FunctionCRC; // Uses CRC-64-ECMA polynomial using FunctionCRC64ECMA = FunctionCRC; +} + template void registerFunctionCRCImpl(FunctionFactory & factory) { diff --git a/src/Functions/FunctionBinaryArithmetic.h b/src/Functions/FunctionBinaryArithmetic.h index 2a467451684..15b6ea6ca5d 100644 --- a/src/Functions/FunctionBinaryArithmetic.h +++ b/src/Functions/FunctionBinaryArithmetic.h @@ -22,6 +22,7 @@ #include #include "IFunctionImpl.h" #include "FunctionHelpers.h" +#include "IsOperation.h" #include "DivisionUtils.h" #include "castTypeToEither.h" #include "FunctionFactory.h" @@ -167,17 +168,6 @@ struct BinaryOperationImpl : BinaryOperationImplBase }; -template struct PlusImpl; -template struct MinusImpl; -template struct MultiplyImpl; -template struct DivideFloatingImpl; -template struct DivideIntegralImpl; -template struct DivideIntegralOrZeroImpl; -template struct LeastBaseImpl; -template struct GreatestBaseImpl; -template struct ModuloImpl; - - /// Binary operations for Decimals need scale args /// +|- scale one of args (which scale factor is not 1). ScaleR = oneof(Scale1, Scale2); /// * no agrs scale. ScaleR = Scale1 + Scale2; @@ -185,15 +175,15 @@ template struct ModuloImpl; template typename Operation, typename ResultType_, bool _check_overflow = true> struct DecimalBinaryOperation { - static constexpr bool is_plus_minus = std::is_same_v, PlusImpl> || - std::is_same_v, MinusImpl>; - static constexpr bool is_multiply = std::is_same_v, MultiplyImpl>; - static constexpr bool is_float_division = std::is_same_v, DivideFloatingImpl>; - static constexpr bool is_int_division = std::is_same_v, DivideIntegralImpl> || - std::is_same_v, DivideIntegralOrZeroImpl>; + static constexpr bool is_plus_minus = IsOperation::plus || + IsOperation::minus; + static constexpr bool is_multiply = IsOperation::multiply; + static constexpr bool is_float_division = IsOperation::div_floating; + static constexpr bool is_int_division = IsOperation::div_int || + IsOperation::div_int_or_zero; static constexpr bool is_division = is_float_division || is_int_division; - static constexpr bool is_compare = std::is_same_v, LeastBaseImpl> || - std::is_same_v, GreatestBaseImpl>; + static constexpr bool is_compare = IsOperation::least || + IsOperation::greatest; static constexpr bool is_plus_minus_compare = is_plus_minus || is_compare; static constexpr bool can_overflow = is_plus_minus || is_multiply; @@ -529,15 +519,7 @@ private: /// it's not correct for Decimal using Op = Operation; public: - static constexpr bool allow_decimal = - std::is_same_v, PlusImpl> || - std::is_same_v, MinusImpl> || - std::is_same_v, MultiplyImpl> || - std::is_same_v, DivideFloatingImpl> || - std::is_same_v, DivideIntegralImpl> || - std::is_same_v, DivideIntegralOrZeroImpl> || - std::is_same_v, LeastBaseImpl> || - std::is_same_v, GreatestBaseImpl>; + static constexpr bool allow_decimal = IsOperation::allow_decimal; /// Appropriate result type for binary operator on numeric types. "Date" can also mean /// DateTime, but if both operands are Dates, their type must be the same (e.g. Date - DateTime is invalid). @@ -556,21 +538,21 @@ public: DataTypeFromFieldType>, /// Date + Integral -> Date /// Integral + Date -> Date - Case>, Switch< + Case::plus, Switch< Case, LeftDataType>, Case, RightDataType>>>, /// Date - Date -> Int32 /// Date - Integral -> Date - Case>, Switch< + Case::minus, Switch< Case, DataTypeInt32>, Case && IsIntegral, LeftDataType>>>, /// least(Date, Date) -> Date /// greatest(Date, Date) -> Date - Case && (std::is_same_v> || std::is_same_v>), + Case && (IsOperation::least || IsOperation::greatest), LeftDataType>, /// Date % Int32 -> Int32 /// Date % Float -> Float64 - Case>, Switch< + Case::modulo, Switch< Case && IsIntegral, RightDataType>, Case && IsFloatingPoint, DataTypeFloat64>>>>; }; @@ -579,6 +561,9 @@ public: template