Merge branch 'master' into ldap-any-user-authentication

* master: (189 commits)
  Fix ugly links in docs
  Update Internals.cpp
  Whitespace
  Fix ugliness in docs
  better
  better backticks
  Try fix tests.
  Fetch helper image before run
  Fetch helper image before run
  Fetch helper image before run
  Update version_date.tsv after release 20.7.4.11
  Update version_date.tsv after release 20.8.4.11
  Update 01515_with_global_and_with_propagation.sql
  fix compilation error
  Supress pvs
  Update version_date.tsv after release 20.9.3.45
  Add missed file
  fix thread restart for parallel quorum inserts
  Update openssl
  Fix race in openssl
  ...
This commit is contained in:
Denis Glazachev 2020-10-14 16:46:37 +04:00
commit 6eec447a64
286 changed files with 4244 additions and 1367 deletions

2
.gitmodules vendored
View File

@ -158,7 +158,7 @@
url = https://github.com/openldap/openldap.git
[submodule "contrib/AMQP-CPP"]
path = contrib/AMQP-CPP
url = https://github.com/CopernicaMarketingSoftware/AMQP-CPP.git
url = https://github.com/ClickHouse-Extras/AMQP-CPP.git
[submodule "contrib/cassandra"]
path = contrib/cassandra
url = https://github.com/ClickHouse-Extras/cpp-driver.git

View File

@ -300,6 +300,11 @@ if (COMPILER_CLANG)
option(ENABLE_THINLTO "Clang-specific link time optimization" ON)
endif()
# Set new experimental pass manager, it's a performance, build time and binary size win.
# Can be removed after https://reviews.llvm.org/D66490 merged and released to at least two versions of clang.
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fexperimental-new-pass-manager")
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -fexperimental-new-pass-manager")
# We cannot afford to use LTO when compiling unit tests, and it's not enough
# to only supply -fno-lto at the final linking stage. So we disable it
# completely.
@ -513,8 +518,8 @@ endif ()
macro (add_executable target)
# invoke built-in add_executable
# explicitly acquire and interpose malloc symbols by clickhouse_malloc
# if GLIBC_COMPATIBILITY is ON and not sanitizer build, provide memcpy symbol explicitly to neutrialize thinlto's libcall generation.
if (GLIBC_COMPATIBILITY AND NOT SANITIZE)
# if GLIBC_COMPATIBILITY is ON and ENABLE_THINLTO is on than provide memcpy symbol explicitly to neutrialize thinlto's libcall generation.
if (GLIBC_COMPATIBILITY AND ENABLE_THINLTO)
_add_executable (${ARGV} $<TARGET_OBJECTS:clickhouse_malloc> $<TARGET_OBJECTS:clickhouse_memcpy>)
else ()
_add_executable (${ARGV} $<TARGET_OBJECTS:clickhouse_malloc>)

View File

@ -21,8 +21,8 @@ void Pool::Entry::incrementRefCount()
{
if (!data)
return;
++data->ref_count;
if (data->ref_count == 1)
/// First reference, initialize thread
if (data->ref_count.fetch_add(1) == 0)
mysql_thread_init();
}
@ -30,12 +30,10 @@ void Pool::Entry::decrementRefCount()
{
if (!data)
return;
if (data->ref_count > 0)
{
--data->ref_count;
if (data->ref_count == 0)
mysql_thread_end();
}
/// We were the last user of this thread, deinitialize it
if (data->ref_count.fetch_sub(1) == 1)
mysql_thread_end();
}

View File

@ -3,6 +3,7 @@
#include <list>
#include <memory>
#include <mutex>
#include <atomic>
#include <Poco/Exception.h>
#include <mysqlxx/Connection.h>
@ -35,7 +36,9 @@ protected:
struct Connection
{
mysqlxx::Connection conn;
int ref_count = 0;
/// Ref count modified in constructor/descructor of Entry
/// but also read in pool code.
std::atomic<int> ref_count = 0;
};
public:

View File

@ -31,6 +31,7 @@ if (COMPILER_CLANG)
add_warning(pedantic)
no_warning(vla-extension)
no_warning(zero-length-array)
no_warning(c11-extensions)
add_warning(comma)
add_warning(conditional-uninitialized)

2
contrib/AMQP-CPP vendored

@ -1 +1 @@
Subproject commit 1c08399ab0ab9e4042ef8e2bbe9e208e5dcbc13b
Subproject commit d63e1f016582e9faaaf279aa24513087a07bc6e7

View File

@ -16,6 +16,7 @@ set (SRCS
${LIBRARY_DIR}/src/flags.cpp
${LIBRARY_DIR}/src/linux_tcp/openssl.cpp
${LIBRARY_DIR}/src/linux_tcp/tcpconnection.cpp
${LIBRARY_DIR}/src/inbuffer.cpp
${LIBRARY_DIR}/src/receivedframe.cpp
${LIBRARY_DIR}/src/table.cpp
${LIBRARY_DIR}/src/watchable.cpp

2
contrib/libhdfs3 vendored

@ -1 +1 @@
Subproject commit 1b666578c85094306b061352078022f6350bfab8
Subproject commit 30552ac527f2c14070d834e171493b2e7f662375

@ -1 +1 @@
Subproject commit 3f512fedf0ba0f769a1b4852b4bac542d92c5b20
Subproject commit f5638e954a79f50bac7c7a5deaa5a241e0ce8b5f

2
contrib/openssl vendored

@ -1 +1 @@
Subproject commit 07e9623064508d15dd61367f960ebe7fc9aecd77
Subproject commit 237260dd6a4bca5cb5a321d366a8a9c807957455

View File

@ -9,7 +9,8 @@
"name": "yandex/clickhouse-binary-builder",
"dependent": [
"docker/test/split_build_smoke_test",
"docker/test/pvs"
"docker/test/pvs",
"docker/test/codebrowser"
]
},
"docker/packager/unbundled": {

View File

@ -17,7 +17,9 @@ ccache --show-stats ||:
ccache --zero-stats ||:
ln -s /usr/lib/x86_64-linux-gnu/libOpenCL.so.1.0.0 /usr/lib/libOpenCL.so ||:
rm -f CMakeCache.txt
cmake --debug-trycompile --verbose=1 -DCMAKE_VERBOSE_MAKEFILE=1 -LA "-DCMAKE_BUILD_TYPE=$BUILD_TYPE" "-DSANITIZE=$SANITIZER" -DENABLE_CHECK_HEAVY_BUILDS=1 "$CMAKE_FLAGS" ..
# Read cmake arguments into array (possibly empty)
read -ra CMAKE_FLAGS <<< "${CMAKE_FLAGS:-}"
cmake --debug-trycompile --verbose=1 -DCMAKE_VERBOSE_MAKEFILE=1 -LA "-DCMAKE_BUILD_TYPE=$BUILD_TYPE" "-DSANITIZE=$SANITIZER" -DENABLE_CHECK_HEAVY_BUILDS=1 "${CMAKE_FLAGS[@]}" ..
# shellcheck disable=SC2086 # No quotes because I want it to expand to nothing if empty.
ninja $NINJA_FLAGS clickhouse-bundle
mv ./programs/clickhouse* /output

View File

@ -1,33 +1,15 @@
# docker build --network=host -t yandex/clickhouse-codebrowser .
# docker run --volume=path_to_repo:/repo_folder --volume=path_to_result:/test_output yandex/clickhouse-codebrowser
FROM ubuntu:18.04
FROM yandex/clickhouse-binary-builder
RUN apt-get --allow-unauthenticated update -y \
&& env DEBIAN_FRONTEND=noninteractive \
apt-get --allow-unauthenticated install --yes --no-install-recommends \
bash \
sudo \
wget \
software-properties-common \
ca-certificates \
apt-transport-https \
build-essential \
gpg-agent \
git
RUN wget -nv -O - https://apt.kitware.com/keys/kitware-archive-latest.asc | sudo apt-key add -
RUN sudo apt-add-repository 'deb https://apt.kitware.com/ubuntu/ bionic main'
RUN sudo echo "deb [trusted=yes] http://apt.llvm.org/bionic/ llvm-toolchain-bionic-8 main" >> /etc/apt/sources.list
RUN sudo apt-get --yes --allow-unauthenticated update
# To build woboq
RUN sudo apt-get --yes --allow-unauthenticated install cmake clang-8 libllvm8 libclang-8-dev
RUN apt-get update && apt-get --yes --allow-unauthenticated install clang-9 libllvm9 libclang-9-dev
# repo versions doesn't work correctly with C++17
# also we push reports to s3, so we add index.html to subfolder urls
# https://github.com/ClickHouse-Extras/woboq_codebrowser/commit/37e15eaf377b920acb0b48dbe82471be9203f76b
RUN git clone https://github.com/ClickHouse-Extras/woboq_codebrowser
RUN cd woboq_codebrowser && cmake . -DCMAKE_BUILD_TYPE=Release && make -j
RUN cd woboq_codebrowser && cmake . -DCMAKE_BUILD_TYPE=Release -DCMAKE_CXX_COMPILER=clang\+\+-9 -DCMAKE_C_COMPILER=clang-9 && make -j
ENV CODEGEN=/woboq_codebrowser/generator/codebrowser_generator
ENV CODEINDEX=/woboq_codebrowser/indexgenerator/codebrowser_indexgenerator
@ -40,7 +22,7 @@ ENV SHA=nosha
ENV DATA="data"
CMD mkdir -p $BUILD_DIRECTORY && cd $BUILD_DIRECTORY && \
cmake $SOURCE_DIRECTORY -DCMAKE_CXX_COMPILER=/usr/bin/clang\+\+-8 -DCMAKE_C_COMPILER=/usr/bin/clang-8 -DCMAKE_EXPORT_COMPILE_COMMANDS=ON && \
cmake $SOURCE_DIRECTORY -DCMAKE_CXX_COMPILER=/usr/bin/clang\+\+-11 -DCMAKE_C_COMPILER=/usr/bin/clang-11 -DCMAKE_EXPORT_COMPILE_COMMANDS=ON && \
mkdir -p $HTML_RESULT_DIRECTORY && \
$CODEGEN -b $BUILD_DIRECTORY -a -o $HTML_RESULT_DIRECTORY -p ClickHouse:$SOURCE_DIRECTORY:$SHA -d $DATA && \
cp -r $STATIC_DATA $HTML_RESULT_DIRECTORY/ &&\

View File

@ -11,7 +11,7 @@ RUN apt-get update \
&& echo "${LLVM_PUBKEY_HASH} /tmp/llvm-snapshot.gpg.key" | sha384sum -c \
&& apt-key add /tmp/llvm-snapshot.gpg.key \
&& export CODENAME="$(lsb_release --codename --short | tr 'A-Z' 'a-z')" \
&& echo "deb [trusted=yes] http://apt.llvm.org/${CODENAME}/ llvm-toolchain-${CODENAME}-${LLVM_VERSION} main" >> \
&& echo "deb [trusted=yes] https://apt.llvm.org/${CODENAME}/ llvm-toolchain-${CODENAME}-${LLVM_VERSION} main" >> \
/etc/apt/sources.list
# initial packages

View File

@ -20,6 +20,7 @@ toc_title: Client Libraries
- [simpod/clickhouse-client](https://packagist.org/packages/simpod/clickhouse-client)
- [seva-code/php-click-house-client](https://packagist.org/packages/seva-code/php-click-house-client)
- [SeasClick C++ client](https://github.com/SeasX/SeasClick)
- [one-ck](https://github.com/lizhichao/one-ck)
- Go
- [clickhouse](https://github.com/kshvakov/clickhouse/)
- [go-clickhouse](https://github.com/roistat/go-clickhouse)

View File

@ -66,6 +66,32 @@ If no conditions met for a data part, ClickHouse uses the `lz4` compression.
</compression>
```
## custom_settings_prefixes {#custom_settings_prefixes}
List of prefixes for [custom settings](../../operations/settings/index.md#custom_settings). The prefixes must be separated with commas.
**Example**
```xml
<custom_settings_prefixes>custom_</custom_settings_prefixes>
```
**See Also**
- [Custom settings](../../operations/settings/index.md#custom_settings)
## core_dump
Configures soft limit for core dump file size, one gigabyte by default.
```xml
<core_dump>
<size_limit>1073741824</size_limit>
</core_dump>
```
(Hard limit is configured via system tools)
## default\_database {#default-database}
The default database.
@ -405,7 +431,7 @@ Limits total RAM usage by the ClickHouse server.
Possible values:
- Positive integer.
- 0 — Unlimited.
- 0 (auto).
Default value: `0`.

View File

@ -28,4 +28,30 @@ Ways to configure settings, in order of priority:
Settings that can only be made in the server config file are not covered in this section.
## Custom Settings {#custom_settings}
In addition to the common [settings](../../operations/settings/settings.md), users can define custom settings.
A custom setting name must begin with one of predefined prefixes. The list of these prefixes must be declared in the [custom_settings_prefixes](../../operations/server-configuration-parameters/settings.md#custom_settings_prefixes) parameter in the server configuration file.
```xml
<custom_settings_prefixes>custom_</custom_settings_prefixes>
```
To define a custom setting use `SET` command:
```sql
SET custom_a = 123;
```
To get the current value of a custom setting use `getSetting()` function:
```sql
SELECT getSetting('custom_a');
```
**See Also**
- [Server Configuration Settings](../../operations/server-configuration-parameters/settings.md)
[Original article](https://clickhouse.tech/docs/en/operations/settings/) <!--hide-->

View File

@ -70,6 +70,35 @@ Works with tables in the MergeTree family.
If `force_primary_key=1`, ClickHouse checks to see if the query has a primary key condition that can be used for restricting data ranges. If there is no suitable condition, it throws an exception. However, it does not check whether the condition reduces the amount of data to read. For more information about data ranges in MergeTree tables, see [MergeTree](../../engines/table-engines/mergetree-family/mergetree.md).
## force\_data\_skipping\_indices {#settings-force_data_skipping_indices}
Disables query execution if passed data skipping indices wasn't used.
Consider the following example:
```sql
CREATE TABLE data
(
key Int,
d1 Int,
d1_null Nullable(Int),
INDEX d1_idx d1 TYPE minmax GRANULARITY 1,
INDEX d1_null_idx assumeNotNull(d1_null) TYPE minmax GRANULARITY 1
)
Engine=MergeTree()
ORDER BY key;
SELECT * FROM data_01515;
SELECT * FROM data_01515 SETTINGS force_data_skipping_indices=''; -- query will produce CANNOT_PARSE_TEXT error.
SELECT * FROM data_01515 SETTINGS force_data_skipping_indices='d1_idx'; -- query will produce INDEX_NOT_USED error.
SELECT * FROM data_01515 WHERE d1 = 0 SETTINGS force_data_skipping_indices='d1_idx'; -- Ok.
SELECT * FROM data_01515 WHERE d1 = 0 SETTINGS force_data_skipping_indices='`d1_idx`'; -- Ok (example of full featured parser).
SELECT * FROM data_01515 WHERE d1 = 0 SETTINGS force_data_skipping_indices='`d1_idx`, d1_null_idx'; -- query will produce INDEX_NOT_USED error, since d1_null_idx is not used.
SELECT * FROM data_01515 WHERE d1 = 0 AND assumeNotNull(d1_null) = 0 SETTINGS force_data_skipping_indices='`d1_idx`, d1_null_idx'; -- Ok.
```
Works with tables in the MergeTree family.
## format\_schema {#format-schema}
This parameter is useful when you are using formats that require a schema definition, such as [Capn Proto](https://capnproto.org/) or [Protobuf](https://developers.google.com/protocol-buffers/). The value depends on the format.
@ -1144,9 +1173,9 @@ See also:
## insert\_quorum\_timeout {#settings-insert_quorum_timeout}
Write to quorum timeout in seconds. If the timeout has passed and no write has taken place yet, ClickHouse will generate an exception and the client must repeat the query to write the same block to the same or any other replica.
Write to quorum timeout in milliseconds. If the timeout has passed and no write has taken place yet, ClickHouse will generate an exception and the client must repeat the query to write the same block to the same or any other replica.
Default value: 60 seconds.
Default value: 600000 milliseconds (ten minutes).
See also:
@ -1565,7 +1594,7 @@ See also:
## allow\_introspection\_functions {#settings-allow_introspection_functions}
Enables of disables [introspections functions](../../sql-reference/functions/introspection.md) for query profiling.
Enables or disables [introspections functions](../../sql-reference/functions/introspection.md) for query profiling.
Possible values:
@ -2027,3 +2056,14 @@ Result:
```
[Original article](https://clickhouse.tech/docs/en/operations/settings/settings/) <!-- hide -->
## allow_experimental_bigint_types {#allow_experimental_bigint_types}
Enables or disables integer values exceeding the range that is supported by the int data type.
Possible values:
- 1 — The bigint data type is enabled.
- 0 — The bigint data type is disabled.
Default value: `0`.

View File

@ -3,7 +3,7 @@ toc_priority: 42
toc_title: Decimal
---
# Decimal(P, S), Decimal32(S), Decimal64(S), Decimal128(S), Decimal256(S) {#decimalp-s-decimal32s-decimal64s-decimal128s}
# Decimal(P, S), Decimal32(S), Decimal64(S), Decimal128(S), Decimal256(S) {#decimal}
Signed fixed-point numbers that keep precision during add, subtract and multiply operations. For division least significant digits are discarded (not rounded).
@ -107,4 +107,8 @@ SELECT toDecimal32(1, 8) < 100
DB::Exception: Can't compare.
```
**See also**
- [isDecimalOverflow](../../sql-reference/functions/other-functions.md#is-decimal-overflow)
- [countDigits](../../sql-reference/functions/other-functions.md#count-digits)
[Original article](https://clickhouse.tech/docs/en/data_types/decimal/) <!--hide-->

View File

@ -3,7 +3,7 @@ toc_priority: 40
toc_title: UInt8, UInt16, UInt32, UInt64, UInt256, Int8, Int16, Int32, Int64, Int128, Int256
---
# UInt8, UInt16, UInt32, UInt64, UInt256, Int8, Int16, Int32, Int64, Int128, Int256 {#uint8-uint16-uint32-uint64-int8-int16-int32-int64}
# UInt8, UInt16, UInt32, UInt64, UInt256, Int8, Int16, Int32, Int64, Int128, Int256 {#uint8-uint16-uint32-uint64-uint256-int8-int16-int32-int64-int128-int256}
Fixed-length integers, with or without a sign.

View File

@ -16,3 +16,82 @@ The [stochasticLinearRegression](../../sql-reference/aggregate-functions/referen
## stochasticLogisticRegression {#stochastic-logistic-regression}
The [stochasticLogisticRegression](../../sql-reference/aggregate-functions/reference/stochasticlogisticregression.md#agg_functions-stochasticlogisticregression) aggregate function implements stochastic gradient descent method for binary classification problem. Uses `evalMLMethod` to predict on new data.
## bayesAB {#bayesab}
Compares test groups (variants) and calculates for each group the probability to be the best one. The first group is used as a control group.
**Syntax**
``` sql
bayesAB(distribution_name, higher_is_better, variant_names, x, y)
```
**Parameters**
- `distribution_name` — Name of the probability distribution. [String](../../sql-reference/data-types/string.md). Possible values:
- `beta` for [Beta distribution](https://en.wikipedia.org/wiki/Beta_distribution)
- `gamma` for [Gamma distribution](https://en.wikipedia.org/wiki/Gamma_distribution)
- `higher_is_better` — Boolean flag. [Boolean](../../sql-reference/data-types/boolean.md). Possible values:
- `0` - lower values are considered to be better than higher
- `1` - higher values are considered to be better than lower
- `variant_names` - Variant names. [Array](../../sql-reference/data-types/array.md)([String](../../sql-reference/data-types/string.md)).
- `x` - Numbers of tests for the corresponding variants. [Array](../../sql-reference/data-types/array.md)([Float64](../../sql-reference/data-types/float.md)).
- `y` - Numbers of successful tests for the corresponding variants. [Array](../../sql-reference/data-types/array.md)([Float64](../../sql-reference/data-types/float.md)).
!!! note "Note"
All three arrays must have the same size. All `x` and `y` values must be non-negative constant numbers. `y` cannot be larger than `x`.
**Returned values**
For each variant the function calculates:
- `beats_control` - long-term probability to out-perform the first (control) variant
- `to_be_best` - long-term probability to out-perform all other variants
Type: JSON.
**Example**
Query:
``` sql
SELECT bayesAB('beta', 1, ['Control', 'A', 'B'], [3000., 3000., 3000.], [100., 90., 110.]) FORMAT PrettySpace;
```
Result:
``` text
{
"data":[
{
"variant_name":"Control",
"x":3000,
"y":100,
"beats_control":0,
"to_be_best":0.22619
},
{
"variant_name":"A",
"x":3000,
"y":90,
"beats_control":0.23469,
"to_be_best":0.04671
},
{
"variant_name":"B",
"x":3000,
"y":110,
"beats_control":0.7580899999999999,
"to_be_best":0.7271
}
]
}
```
[Original article](https://clickhouse.tech/docs/en/query_language/functions/machine-learning-functions/) <!--hide-->

View File

@ -1491,4 +1491,115 @@ Result:
```
## getSetting {#getSetting}
Returns the current value of a [custom setting](../../operations/settings/index.md#custom_settings).
**Syntax**
```sql
getSetting('custom_setting');
```
**Parameter**
- `custom_setting` — The setting name. [String](../../sql-reference/data-types/string.md).
**Returned value**
- The setting current value.
**Example**
```sql
SET custom_a = 123;
SELECT getSetting('custom_a');
```
**Result**
```
123
```
**See Also**
- [Custom Settings](../../operations/settings/index.md#custom_settings)
## isDecimalOverflow {#is-decimal-overflow}
Checks whether the [Decimal](../../sql-reference/data-types/decimal.md) value is out of its (or specified) precision.
**Syntax**
``` sql
isDecimalOverflow(d, [p])
```
**Parameters**
- `d` — value. [Decimal](../../sql-reference/data-types/decimal.md).
- `p` — precision. Optional. If omitted, the initial presicion of the first argument is used. Using of this paratemer could be helpful for data extraction to another DBMS or file. [UInt8](../../sql-reference/data-types/int-uint.md#uint-ranges).
**Returned values**
- `1` — Decimal value has more digits then it's precision allow,
- `0` — Decimal value satisfies the specified precision.
**Example**
Query:
``` sql
SELECT isDecimalOverflow(toDecimal32(1000000000, 0), 9),
isDecimalOverflow(toDecimal32(1000000000, 0)),
isDecimalOverflow(toDecimal32(-1000000000, 0), 9),
isDecimalOverflow(toDecimal32(-1000000000, 0));
```
Result:
``` text
1 1 1 1
```
## countDigits {#count-digits}
Returns number of decimal digits you need to represent the value.
**Syntax**
``` sql
countDigits(x)
```
**Parameters**
- `x` — [Int](../../sql-reference/data-types/int-uint.md) or [Decimal](../../sql-reference/data-types/decimal.md) value.
**Returned value**
Number of digits.
Type: [UInt8](../../sql-reference/data-types/int-uint.md#uint-ranges).
!!! note "Note"
For `Decimal` values takes into account their scales: calculates result over underlying integer type which is `(value * scale)`. For example: `countDigits(42) = 2`, `countDigits(42.000) = 5`, `countDigits(0.04200) = 4`. I.e. you may check decimal overflow for `Decimal64` with `countDecimal(x) > 18`. It's a slow variant of [isDecimalOverflow](#is-decimal-overflow).
**Example**
Query:
``` sql
SELECT countDigits(toDecimal32(1, 9)), countDigits(toDecimal32(-1, 9)),
countDigits(toDecimal64(1, 18)), countDigits(toDecimal64(-1, 18)),
countDigits(toDecimal128(1, 38)), countDigits(toDecimal128(-1, 38));
```
Result:
``` text
10 10 19 19 39 39
```
[Original article](https://clickhouse.tech/docs/en/query_language/functions/other_functions/) <!--hide-->

View File

@ -487,4 +487,75 @@ Returns the CRC64 checksum of a string, using CRC-64-ECMA polynomial.
The result type is UInt64.
## normalizeQuery {#normalized-query}
Replaces literals, sequences of literals and complex aliases with placeholders.
**Syntax**
``` sql
normalizeQuery(x)
```
**Parameters**
- `x` — Sequence of characters. [String](../../sql-reference/data-types/string.md).
**Returned value**
- Sequence of characters with placeholders.
Type: [String](../../sql-reference/data-types/string.md).
**Example**
Query:
``` sql
SELECT normalizeQuery('[1, 2, 3, x]') AS query;
```
Result:
``` text
┌─query────┐
│ [?.., x] │
└──────────┘
```
## normalizedQueryHash {#normalized-query-hash}
Returns identical 64bit hash values without the values of literals for similar queries. It helps to analyze query log.
**Syntax**
``` sql
normalizedQueryHash(x)
```
**Parameters**
- `x` — Sequence of characters. [String](../../sql-reference/data-types/string.md).
**Returned value**
- Hash value.
Type: [UInt64](../../sql-reference/data-types/int-uint.md#uint-ranges).
**Example**
Query:
``` sql
SELECT normalizedQueryHash('SELECT 1 AS `xyz`') != normalizedQueryHash('SELECT 1 AS `abc`') AS res;
```
Result:
``` text
┌─res─┐
│ 1 │
└─────┘
```
[Original article](https://clickhouse.tech/docs/en/query_language/functions/string_functions/) <!--hide-->

View File

@ -360,6 +360,89 @@ Extracts a fragment of a string using a regular expression. If haystack do
Extracts all the fragments of a string using a regular expression. If haystack doesnt match the pattern regex, an empty string is returned. Returns an array of strings consisting of all matches to the regex. In general, the behavior is the same as the extract function (it takes the first subpattern, or the entire expression if there isnt a subpattern).
## extractAllGroupsHorizontal {#extractallgroups-horizontal}
Matches all groups of the `haystack` string using the `pattern` regular expression. Returns an array of arrays, where the first array includes all fragments matching the first group, the second array - matching the second group, etc.
!!! note "Note"
`extractAllGroupsHorizontal` function is slower than [extractAllGroupsVertical](#extractallgroups-vertical).
**Syntax**
``` sql
extractAllGroupsHorizontal(haystack, pattern)
```
**Parameters**
- `haystack` — Input string. Type: [String](../../sql-reference/data-types/string.md).
- `pattern` — Regular expression with [re2 syntax](https://github.com/google/re2/wiki/Syntax). Must contain groups, each group enclosed in parentheses. If `pattern` contains no groups, an exception is thrown. Type: [String](../../sql-reference/data-types/string.md).
**Returned value**
- Type: [Array](../../sql-reference/data-types/array.md).
If `haystack` doesnt match the `pattern` regex, an array of empty arrays is returned.
**Example**
Query:
``` sql
SELECT extractAllGroupsHorizontal('abc=111, def=222, ghi=333', '("[^"]+"|\\w+)=("[^"]+"|\\w+)')
```
Result:
``` text
┌─extractAllGroupsHorizontal('abc=111, def=222, ghi=333', '("[^"]+"|\\w+)=("[^"]+"|\\w+)')─┐
│ [['abc','def','ghi'],['111','222','333']] │
└──────────────────────────────────────────────────────────────────────────────────────────┘
```
**See also**
- [extractAllGroupsVertical](#extractallgroups-vertical)
## extractAllGroupsVertical {#extractallgroups-vertical}
Matches all groups of the `haystack` string using the `pattern` regular expression. Returns an array of arrays, where each array includes matching fragments from every group. Fragments are grouped in order of appearance in the `haystack`.
**Syntax**
``` sql
extractAllGroupsVertical(haystack, pattern)
```
**Parameters**
- `haystack` — Input string. Type: [String](../../sql-reference/data-types/string.md).
- `pattern` — Regular expression with [re2 syntax](https://github.com/google/re2/wiki/Syntax). Must contain groups, each group enclosed in parentheses. If `pattern` contains no groups, an exception is thrown. Type: [String](../../sql-reference/data-types/string.md).
**Returned value**
- Type: [Array](../../sql-reference/data-types/array.md).
If `haystack` doesnt match the `pattern` regex, an empty array is returned.
**Example**
Query:
``` sql
SELECT extractAllGroupsVertical('abc=111, def=222, ghi=333', '("[^"]+"|\\w+)=("[^"]+"|\\w+)')
```
Result:
``` text
┌─extractAllGroupsVertical('abc=111, def=222, ghi=333', '("[^"]+"|\\w+)=("[^"]+"|\\w+)')─┐
│ [['abc','111'],['def','222'],['ghi','333']] │
└────────────────────────────────────────────────────────────────────────────────────────┘
```
**See also**
- [extractAllGroupsHorizontal](#extractallgroups-horizontal)
## like(haystack, pattern), haystack LIKE pattern operator {#function-like}
Checks whether a string matches a simple regular expression.

View File

@ -11,7 +11,7 @@ When you convert a value from one to another data type, you should remember that
ClickHouse has the [same behavior as C++ programs](https://en.cppreference.com/w/cpp/language/implicit_conversion).
## toInt(8\|16\|32\|64\|128\|256) {#toint8163264}
## toInt(8\|16\|32\|64\|128\|256) {#toint8163264128256}
Converts an input value to the [Int](../../sql-reference/data-types/int-uint.md) data type. This function family includes:
@ -62,7 +62,7 @@ select toInt64OrZero('123123'), toInt8OrZero('123qwe123')
└─────────────────────────┴───────────────────────────┘
```
## toInt(8\|16\|32\|64\|128\|256)OrNull {#toint8163264ornull}
## toInt(8\|16\|32\|64\|128\|256)OrNull {#toint8163264128256ornull}
It takes an argument of type String and tries to parse it into Int (8 \| 16 \| 32 \| 64 \| 128 \| 256). If failed, returns NULL.
@ -78,7 +78,7 @@ select toInt64OrNull('123123'), toInt8OrNull('123qwe123')
└─────────────────────────┴───────────────────────────┘
```
## toUInt(8\|16\|32\|64\|256) {#touint8163264}
## toUInt(8\|16\|32\|64\|256) {#touint8163264256}
Converts an input value to the [UInt](../../sql-reference/data-types/int-uint.md) data type. This function family includes:
@ -112,9 +112,9 @@ SELECT toUInt64(nan), toUInt32(-32), toUInt16('16'), toUInt8(8.8)
└─────────────────────┴───────────────┴────────────────┴──────────────┘
```
## toUInt(8\|16\|32\|64\|256)OrZero {#touint8163264orzero}
## toUInt(8\|16\|32\|64\|256)OrZero {#touint8163264256orzero}
## toUInt(8\|16\|32\|64\|256)OrNull {#touint8163264ornull}
## toUInt(8\|16\|32\|64\|256)OrNull {#touint8163264256ornull}
## toFloat(32\|64) {#tofloat3264}
@ -134,7 +134,7 @@ SELECT toUInt64(nan), toUInt32(-32), toUInt16('16'), toUInt8(8.8)
## toDateTimeOrNull {#todatetimeornull}
## toDecimal(32\|64\|128\|256) {#todecimal3264128}
## toDecimal(32\|64\|128\|256) {#todecimal3264128256}
Converts `value` to the [Decimal](../../sql-reference/data-types/decimal.md) data type with precision of `S`. The `value` can be a number or a string. The `S` (scale) parameter specifies the number of decimal places.
@ -143,7 +143,7 @@ Converts `value` to the [Decimal](../../sql-reference/data-types/decimal.md) dat
- `toDecimal128(value, S)`
- `toDecimal256(value, S)`
## toDecimal(32\|64\|128\|256)OrNull {#todecimal3264128ornull}
## toDecimal(32\|64\|128\|256)OrNull {#todecimal3264128256ornull}
Converts an input string to a [Nullable(Decimal(P,S))](../../sql-reference/data-types/decimal.md) data type value. This family of functions include:
@ -188,7 +188,7 @@ SELECT toDecimal32OrNull(toString(-1.111), 2) AS val, toTypeName(val)
└──────┴────────────────────────────────────────────────────┘
```
## toDecimal(32\|64\|128\|256)OrZero {#todecimal3264128orzero}
## toDecimal(32\|64\|128\|256)OrZero {#todecimal3264128256orzero}
Converts an input value to the [Decimal(P,S)](../../sql-reference/data-types/decimal.md) data type. This family of functions include:
@ -735,4 +735,45 @@ SELECT fromUnixTimestamp64Milli(i64, 'UTC')
└──────────────────────────────────────┘
```
## formatRow {#formatrow}
Converts arbitrary expressions into a string via given format.
**Syntax**
``` sql
formatRow(format, x, y, ...)
```
**Parameters**
- `format` — Text format. For example, [CSV](../../interfaces/formats.md#csv), [TSV](../../interfaces/formats.md#tabseparated).
- `x`,`y`, ... — Expressions.
**Returned value**
- A formatted string (for text formats it's usually terminated with the new line character).
**Example**
Query:
``` sql
SELECT formatRow('CSV', number, 'good')
FROM numbers(3)
```
Result:
``` text
┌─formatRow('CSV', number, 'good')─┐
│ 0,"good"
│ 1,"good"
│ 2,"good"
└──────────────────────────────────┘
```
[Original article](https://clickhouse.tech/docs/en/query_language/functions/type_conversion_functions/) <!--hide-->

View File

@ -102,4 +102,9 @@ SELECT toDecimal32(1, 8) < 100
DB::Exception: Can't compare.
```
**Смотрите также**
- [isDecimalOverflow](../../sql-reference/functions/other-functions.md#is-decimal-overflow)
- [countDigits](../../sql-reference/functions/other-functions.md#count-digits)
[Оригинальная статья](https://clickhouse.tech/docs/ru/data_types/decimal/) <!--hide-->

View File

@ -1431,5 +1431,80 @@ SELECT randomStringUTF8(13)
```
## isDecimalOverflow {#is-decimal-overflow}
Проверяет, находится ли число [Decimal](../../sql-reference/data-types/decimal.md#decimalp-s-decimal32s-decimal64s-decimal128s) вне собственной (или заданной) области значений.
**Синтаксис**
``` sql
isDecimalOverflow(d, [p])
```
**Параметры**
- `d` — число. [Decimal](../../sql-reference/data-types/decimal.md#decimalp-s-decimal32s-decimal64s-decimal128s).
- `p` — точность. Необязательный параметр. Если опущен, используется исходная точность первого аргумента. Использование этого параметра может быть полезно для извлечения данных в другую СУБД или файл. [UInt8](../../sql-reference/data-types/int-uint.md#uint-ranges).
**Возвращаемое значение**
- `1` — число имеет больше цифр, чем позволяет точность.
- `0` — число удовлетворяет заданной точности.
**Пример**
Запрос:
``` sql
SELECT isDecimalOverflow(toDecimal32(1000000000, 0), 9),
isDecimalOverflow(toDecimal32(1000000000, 0)),
isDecimalOverflow(toDecimal32(-1000000000, 0), 9),
isDecimalOverflow(toDecimal32(-1000000000, 0));
```
Результат:
``` text
1 1 1 1
```
## countDigits {#count-digits}
Возвращает количество десятичных цифр, необходимых для представления значения.
**Синтаксис**
``` sql
countDigits(x)
```
**Параметры**
- `x` — [целое](../../sql-reference/data-types/int-uint.md#uint8-uint16-uint32-uint64-int8-int16-int32-int64) или [дробное](../../sql-reference/data-types/decimal.md#decimalp-s-decimal32s-decimal64s-decimal128s) число.
**Возвращаемое значение**
Количество цифр.
Тип: [UInt8](../../sql-reference/data-types/int-uint.md#uint-ranges).
!!! note "Примечание"
Для `Decimal` значений учитывается их масштаб: вычисляется результат по базовому целочисленному типу, полученному как `(value * scale)`. Например: `countDigits(42) = 2`, `countDigits(42.000) = 5`, `countDigits(0.04200) = 4`. То есть вы можете проверить десятичное переполнение для `Decimal64` с помощью `countDecimal(x) > 18`. Это медленный вариант [isDecimalOverflow](#is-decimal-overflow).
**Пример**
Запрос:
``` sql
SELECT countDigits(toDecimal32(1, 9)), countDigits(toDecimal32(-1, 9)),
countDigits(toDecimal64(1, 18)), countDigits(toDecimal64(-1, 18)),
countDigits(toDecimal128(1, 38)), countDigits(toDecimal128(-1, 38));
```
Результат:
``` text
10 10 19 19 39 39
```
[Оригинальная статья](https://clickhouse.tech/docs/ru/query_language/functions/other_functions/) <!--hide-->

View File

@ -479,4 +479,75 @@ SELECT trimBoth(' Hello, world! ')
Тип результата — UInt64.
## normalizeQuery {#normalized-query}
Заменяет литералы, последовательности литералов и сложные псевдонимы заполнителями.
**Синтаксис**
``` sql
normalizeQuery(x)
```
**Параметры**
- `x` — Последовательность символов. [String](../../sql-reference/data-types/string.md).
**Возвращаемое значение**
- Последовательность символов с заполнителями.
Тип: [String](../../sql-reference/data-types/string.md).
**Пример**
Запрос:
``` sql
SELECT normalizeQuery('[1, 2, 3, x]') AS query;
```
Результат:
``` text
┌─query────┐
│ [?.., x] │
└──────────┘
```
## normalizedQueryHash {#normalized-query-hash}
Возвращает идентичные 64-битные хэш - суммы без значений литералов для аналогичных запросов. Это помогает анализировать журнал запросов.
**Синтаксис**
``` sql
normalizedQueryHash(x)
```
**Параметры**
- `x` — Последовательность символов. [String](../../sql-reference/data-types/string.md).
**Возвращаемое значение**
- Хэш-сумма.
Тип: [UInt64](../../sql-reference/data-types/int-uint.md#uint-ranges).
**Пример**
Запрос:
``` sql
SELECT normalizedQueryHash('SELECT 1 AS `xyz`') != normalizedQueryHash('SELECT 1 AS `abc`') AS res;
```
Результат:
``` text
┌─res─┐
│ 1 │
└─────┘
```
[Оригинальная статья](https://clickhouse.tech/docs/ru/query_language/functions/string_functions/) <!--hide-->

View File

@ -341,6 +341,89 @@ Result:
Извлечение всех фрагментов строки по регулярному выражению. Если haystack не соответствует регулярному выражению pattern, то возвращается пустая строка. Возвращается массив строк, состоящий из всех соответствий регулярному выражению. В остальном, поведение аналогично функции extract (по прежнему, вынимается первый subpattern, или всё выражение, если subpattern-а нет).
## extractAllGroupsHorizontal {#extractallgroups-horizontal}
Разбирает строку `haystack` на фрагменты, соответствующие группам регулярного выражения `pattern`. Возвращает массив массивов, где первый массив содержит все фрагменты, соответствующие первой группе регулярного выражения, второй массив - соответствующие второй группе, и т.д.
!!! note "Замечание"
Функция `extractAllGroupsHorizontal` работает медленнее, чем функция [extractAllGroupsVertical](#extractallgroups-vertical).
**Синтаксис**
``` sql
extractAllGroupsHorizontal(haystack, pattern)
```
**Параметры**
- `haystack` — строка для разбора. Тип: [String](../../sql-reference/data-types/string.md).
- `pattern` — регулярное выражение, построенное по синтаксическим правилам [re2](https://github.com/google/re2/wiki/Syntax). Выражение должно содержать группы, заключенные в круглые скобки. Если выражение не содержит групп, генерируется исключение. Тип: [String](../../sql-reference/data-types/string.md).
**Возвращаемое значение**
- Тип: [Array](../../sql-reference/data-types/array.md).
Если в строке `haystack` нет групп, соответствующих регулярному выражению `pattern`, возвращается массив пустых массивов.
**Пример**
Запрос:
``` sql
SELECT extractAllGroupsHorizontal('abc=111, def=222, ghi=333', '("[^"]+"|\\w+)=("[^"]+"|\\w+)')
```
Результат:
``` text
┌─extractAllGroupsHorizontal('abc=111, def=222, ghi=333', '("[^"]+"|\\w+)=("[^"]+"|\\w+)')─┐
│ [['abc','def','ghi'],['111','222','333']] │
└──────────────────────────────────────────────────────────────────────────────────────────┘
```
**См. также**
- функция [extractAllGroupsVertical](#extractallgroups-vertical)
## extractAllGroupsVertical {#extractallgroups-vertical}
Разбирает строку `haystack` на фрагменты, соответствующие группам регулярного выражения `pattern`. Возвращает массив массивов, где каждый массив содержит по одному фрагменту, соответствующему каждой группе регулярного выражения. Фрагменты группируются в массивы в соответствии с порядком появления в исходной строке.
**Синтаксис**
``` sql
extractAllGroupsVertical(haystack, pattern)
```
**Параметры**
- `haystack` — строка для разбора. Тип: [String](../../sql-reference/data-types/string.md).
- `pattern` — регулярное выражение, построенное по синтаксическим правилам [re2](https://github.com/google/re2/wiki/Syntax). Выражение должно содержать группы, заключенные в круглые скобки. Если выражение не содержит групп, генерируется исключение. Тип: [String](../../sql-reference/data-types/string.md).
**Возвращаемое значение**
- Тип: [Array](../../sql-reference/data-types/array.md).
Если в строке `haystack` нет групп, соответствующих регулярному выражению `pattern`, возвращается пустой массив.
**Пример**
Запрос:
``` sql
SELECT extractAllGroupsVertical('abc=111, def=222, ghi=333', '("[^"]+"|\\w+)=("[^"]+"|\\w+)')
```
Результат:
``` text
┌─extractAllGroupsVertical('abc=111, def=222, ghi=333', '("[^"]+"|\\w+)=("[^"]+"|\\w+)')─┐
│ [['abc','111'],['def','222'],['ghi','333']] │
└────────────────────────────────────────────────────────────────────────────────────────┘
```
**См. также**
- функция [extractAllGroupsHorizontal](#extractallgroups-horizontal)
## like(haystack, pattern), оператор haystack LIKE pattern {#function-like}
Проверка строки на соответствие простому регулярному выражению.

View File

@ -723,4 +723,44 @@ SELECT toLowCardinality('1')
└───────────────────────┘
```
## formatRow {#formatrow}
Преобразует произвольные выражения в строку заданного формата.
**Синтаксис**
``` sql
formatRow(format, x, y, ...)
```
**Параметры**
- `format` — Текстовый формат. Например, [CSV](../../interfaces/formats.md#csv), [TSV](../../interfaces/formats.md#tabseparated).
- `x`,`y`, ... — Выражения.
**Возвращаемое значение**
- Отформатированная строка (в текстовых форматах обычно с завершающим переводом строки).
**Пример**
Запрос:
``` sql
SELECT formatRow('CSV', number, 'good')
FROM numbers(3)
```
Ответ:
``` text
┌─formatRow('CSV', number, 'good')─┐
│ 0,"good"
│ 1,"good"
│ 2,"good"
└──────────────────────────────────┘
```
[Оригинальная статья](https://clickhouse.tech/docs/ru/query_language/functions/type_conversion_functions/) <!--hide-->

View File

@ -24,14 +24,14 @@ toc_title: "\u0412\u0432\u0435\u0434\u0435\u043D\u0438\u0435"
| Функция | Описание |
|-----------------------|---------------------------------------------------------------------------------------------------------------------------------------|
| [file](file.md) | Создаёт таблицу с движком [File](../../sql-reference/table-functions/index.md). |
| [merge](merge.md) | Создаёт таблицу с движком [Merge](../../sql-reference/table-functions/index.md). |
| [file](file.md) | Создаёт таблицу с движком [File](../../engines/table-engines/special/file.md). |
| [merge](merge.md) | Создаёт таблицу с движком [Merge](../../engines/table-engines/special/merge.md). |
| [numbers](numbers.md) | Создаёт таблицу с единственным столбцом, заполненным целыми числами. |
| [remote](remote.md) | Предоставляет доступ к удалённым серверам, не создавая таблицу с движком [Distributed](../../sql-reference/table-functions/index.md). |
| [url](url.md) | Создаёт таблицу с движком [Url](../../sql-reference/table-functions/index.md). |
| [mysql](mysql.md) | Создаёт таблицу с движком [MySQL](../../sql-reference/table-functions/index.md). |
| [jdbc](jdbc.md) | Создаёт таблицу с дижком [JDBC](../../sql-reference/table-functions/index.md). |
| [odbc](odbc.md) | Создаёт таблицу с движком [ODBC](../../sql-reference/table-functions/index.md). |
| [hdfs](hdfs.md) | Создаёт таблицу с движком [HDFS](../../sql-reference/table-functions/index.md). |
| [remote](remote.md) | Предоставляет доступ к удалённым серверам, не создавая таблицу с движком [Distributed](../../engines/table-engines/special/distributed.md). |
| [url](url.md) | Создаёт таблицу с движком [Url](../../engines/table-engines/special/url.md). |
| [mysql](mysql.md) | Создаёт таблицу с движком [MySQL](../../engines/table-engines/integrations/mysql.md). |
| [jdbc](jdbc.md) | Создаёт таблицу с дижком [JDBC](../../engines/table-engines/integrations/jdbc.md). |
| [odbc](odbc.md) | Создаёт таблицу с движком [ODBC](../../engines/table-engines/integrations/odbc.md). |
| [hdfs](hdfs.md) | Создаёт таблицу с движком [HDFS](../../engines/table-engines/integrations/hdfs.md). |
[Оригинальная статья](https://clickhouse.tech/docs/ru/query_language/table_functions/) <!--hide-->

View File

@ -468,7 +468,7 @@ clickhouse-client --query "INSERT INTO tutorial.hits_v1 FORMAT TSV" --max_insert
clickhouse-client --query "INSERT INTO tutorial.visits_v1 FORMAT TSV" --max_insert_block_size=100000 < visits_v1.tsv
```
ClickHouse有很多 [要调整的设置](../operations/settings/index.md) 在控制台客户端中指定它们的一种方法是通过参数,我们可以看到 `--max_insert_block_size`. 找出可用的设置,它们意味着什么以及默认值的最简单方法是查询 `system.settings` 表:
ClickHouse有很多 [要调整的设置](../operations/settings/index.md) 在控制台客户端中指定它们的一种方法是通过参数,就像我们看到上面语句中的 `--max_insert_block_size`。找出可用的设置、含义及其默认值的最简单方法是查询 `system.settings` 表:
``` sql
SELECT name, value, changed, description
@ -479,7 +479,7 @@ FORMAT TSV
max_insert_block_size 1048576 0 "The maximum block size for insertion, if we control the creation of blocks for insertion."
```
您也可以 [OPTIMIZE](../sql-reference/statements/misc.md#misc_operations-optimize) 导入后的表。 使用MergeTree-family引擎配置的表总是在后台合并数据部分以优化数据存储或至少检查是否有意义。 这些查询强制表引擎立即进行存储优化,而不是稍后进行一段时间:
您也可以 [OPTIMIZE](../sql-reference/statements/misc.md#misc_operations-optimize) 导入后的表。 使用MergeTree-family引擎配置的表总是在后台合并数据部分以优化数据存储或至少检查是否有意义。 这些查询强制表引擎立即进行存储优化,而不是稍后一段时间执行:
``` bash
clickhouse-client --query "OPTIMIZE TABLE tutorial.hits_v1 FINAL"
@ -521,14 +521,14 @@ WHERE (CounterID = 912887) AND (toYYYYMM(StartDate) = 201403) AND (domain(StartU
ClickHouse集群是一个同质集群。 设置步骤:
1. 在群集的所有计算机上安装ClickHouse服务器
1. 在群集的所有机器上安装ClickHouse服务端
2. 在配置文件中设置群集配置
3. 在每个实例上创建本地表
4. 创建一个 [分布式表](../engines/table-engines/special/distributed.md)
[分布式表](../engines/table-engines/special/distributed.md) 实际上是一种 “view” 到ClickHouse集群的本地表。 从分布式表中选择查询使用集群所有分片的资源执行。 您可以为多个集群指定configs并创建多个分布式表为不同的集群提供视图。
[分布式表](../engines/table-engines/special/distributed.md) 实际上是一种 “视图”映射到ClickHouse集群的本地表。 从分布式表中执行 **SELECT** 查询会使用集群所有分片的资源。 您可以为多个集群指定configs并创建多个分布式表为不同的集群提供视图。
具有三个分片的集群的示例配置,每个分片一个副本:
具有三个分片,每个分片一个副本的集群的示例配置:
``` xml
<remote_servers>
@ -555,7 +555,7 @@ ClickHouse集群是一个同质集群。 设置步骤:
</remote_servers>
```
为了进一步演示,让我们创建一个新的本地表 `CREATE TABLE` 我们用于查询 `hits_v1`,但不同的表名:
为了进一步演示,让我们使用和创建 `hits_v1` 表相同的 `CREATE TABLE` 语句创建一个新的本地表,但表名不同:
``` sql
CREATE TABLE tutorial.hits_local (...) ENGINE = MergeTree() ...
@ -570,14 +570,14 @@ ENGINE = Distributed(perftest_3shards_1replicas, tutorial, hits_local, rand());
常见的做法是在集群的所有计算机上创建类似的分布式表。 它允许在群集的任何计算机上运行分布式查询。 还有一个替代选项可以使用以下方法为给定的SELECT查询创建临时分布式表 [远程](../sql-reference/table-functions/remote.md) 表功能。
我们走吧 [INSERT SELECT](../sql-reference/statements/insert-into.md) 将该表传播到多个服务器。
让我们运行 [INSERT SELECT](../sql-reference/statements/insert-into.md) 将该表传播到多个服务器。
``` sql
INSERT INTO tutorial.hits_all SELECT * FROM tutorial.hits_v1;
```
!!! warning "碌莽禄Notice:"
这种方法不适合大型表的分片。 有一个单独的工具 [ツ环板-ョツ嘉ッツ偲](../operations/utilities/clickhouse-copier.md) 这可以重新分片任意大表。
!!! warning "注意:"
这种方法不适合大型表的分片。 有一个单独的工具 [clickhouse-copier](../operations/utilities/clickhouse-copier.md) 这可以重新分片任意大表。
正如您所期望的那样如果计算量大的查询使用3台服务器而不是一个则运行速度快N倍。
@ -609,10 +609,10 @@ INSERT INTO tutorial.hits_all SELECT * FROM tutorial.hits_v1;
</remote_servers>
```
启用本机复制 [动物园管理员](http://zookeeper.apache.org/) 是必需的。 ClickHouse负责所有副本的数据一致性并在失败后自动运行恢复过程。 建议将ZooKeeper集群部署在单独的服务器上其中没有其他进程包括ClickHouse正在运行)。
启用本机复制 [Zookeeper](http://zookeeper.apache.org/) 是必需的。 ClickHouse负责所有副本的数据一致性并在失败后自动运行恢复过程。 建议将ZooKeeper集群部署在单独的服务器上其中没有其他进程包括运行的ClickHouse
!!! note "注"
ZooKeeper不是一个严格的requirement:在某些简单的情况下,您可以通过将数据写入应用程序代码中的所有副本来复制数据。 这种方法是 **不** 建议在这种情况下ClickHouse将无法保证所有副本上的数据一致性。 因此,它成为您的应用程序的责任
ZooKeeper不是一个严格的要求:在某些简单的情况下,您可以通过将数据写入应用程序代码中的所有副本来复制数据。 这种方法是 **不** 建议在这种情况下ClickHouse将无法保证所有副本上的数据一致性。 因此需要由您的应用来保证这一点
ZooKeeper位置在配置文件中指定:

View File

@ -168,6 +168,26 @@ ASTPtr extractOrderBy(const ASTPtr & storage_ast)
throw Exception("ORDER BY cannot be empty", ErrorCodes::BAD_ARGUMENTS);
}
/// Wraps only identifiers with backticks.
std::string wrapIdentifiersWithBackticks(const ASTPtr & root)
{
if (auto identifier = std::dynamic_pointer_cast<ASTIdentifier>(root))
return backQuote(identifier->name);
if (auto function = std::dynamic_pointer_cast<ASTFunction>(root))
return function->name + '(' + wrapIdentifiersWithBackticks(function->arguments) + ')';
if (auto expression_list = std::dynamic_pointer_cast<ASTExpressionList>(root))
{
Names function_arguments(expression_list->children.size());
for (size_t i = 0; i < expression_list->children.size(); ++i)
function_arguments[i] = wrapIdentifiersWithBackticks(expression_list->children[0]);
return boost::algorithm::join(function_arguments, ", ");
}
throw Exception("Primary key could be represented only as columns or functions from columns.", ErrorCodes::BAD_ARGUMENTS);
}
Names extractPrimaryKeyColumnNames(const ASTPtr & storage_ast)
{
@ -189,13 +209,14 @@ Names extractPrimaryKeyColumnNames(const ASTPtr & storage_ast)
ErrorCodes::BAD_ARGUMENTS);
Names primary_key_columns;
Names sorting_key_columns;
NameSet primary_key_columns_set;
for (size_t i = 0; i < sorting_key_size; ++i)
{
/// Column name could be represented as a f_1(f_2(...f_n(column_name))).
/// Each f_i could take one or more parameters.
/// We will wrap identifiers with backticks to allow non-standart identifier names.
String sorting_key_column = sorting_key_expr_list->children[i]->getColumnName();
sorting_key_columns.push_back(sorting_key_column);
if (i < primary_key_size)
{
@ -208,7 +229,7 @@ Names extractPrimaryKeyColumnNames(const ASTPtr & storage_ast)
if (!primary_key_columns_set.emplace(pk_column).second)
throw Exception("Primary key contains duplicate columns", ErrorCodes::BAD_ARGUMENTS);
primary_key_columns.push_back(pk_column);
primary_key_columns.push_back(wrapIdentifiersWithBackticks(primary_key_expr_list->children[i]));
}
}

View File

@ -6,6 +6,9 @@
#include <Core/Defines.h>
#include <ext/map.h>
#include <boost/algorithm/string/join.hpp>
namespace DB
{
@ -269,7 +272,7 @@ inline TaskTable::TaskTable(TaskCluster & parent, const Poco::Util::AbstractConf
ParserStorage parser_storage;
engine_push_ast = parseQuery(parser_storage, engine_push_str, 0, DBMS_DEFAULT_MAX_PARSER_DEPTH);
engine_push_partition_key_ast = extractPartitionKey(engine_push_ast);
primary_key_comma_separated = Nested::createCommaSeparatedStringFrom(extractPrimaryKeyColumnNames(engine_push_ast));
primary_key_comma_separated = boost::algorithm::join(extractPrimaryKeyColumnNames(engine_push_ast), ", ");
is_replicated_table = isReplicatedTableEngine(engine_push_ast);
}

View File

@ -2,10 +2,15 @@
#include <setjmp.h>
#include <unistd.h>
#ifdef __linux__
#include <sys/mman.h>
#endif
#include <new>
#include <iostream>
#include <vector>
#include <string>
#include <tuple>
#include <utility> /// pair
#if !defined(ARCADIA_BUILD)
@ -57,6 +62,7 @@ int mainEntryClickHouseStatus(int argc, char ** argv);
int mainEntryClickHouseRestart(int argc, char ** argv);
#endif
#define ARRAY_SIZE(a) (sizeof(a)/sizeof((a)[0]))
namespace
{
@ -150,28 +156,29 @@ enum class InstructionFail
AVX512 = 8
};
const char * instructionFailToString(InstructionFail fail)
std::pair<const char *, size_t> instructionFailToString(InstructionFail fail)
{
switch (fail)
{
#define ret(x) return std::make_pair(x, ARRAY_SIZE(x) - 1)
case InstructionFail::NONE:
return "NONE";
ret("NONE");
case InstructionFail::SSE3:
return "SSE3";
ret("SSE3");
case InstructionFail::SSSE3:
return "SSSE3";
ret("SSSE3");
case InstructionFail::SSE4_1:
return "SSE4.1";
ret("SSE4.1");
case InstructionFail::SSE4_2:
return "SSE4.2";
ret("SSE4.2");
case InstructionFail::POPCNT:
return "POPCNT";
ret("POPCNT");
case InstructionFail::AVX:
return "AVX";
ret("AVX");
case InstructionFail::AVX2:
return "AVX2";
ret("AVX2");
case InstructionFail::AVX512:
return "AVX512";
ret("AVX512");
}
__builtin_unreachable();
}
@ -238,7 +245,7 @@ void checkRequiredInstructionsImpl(volatile InstructionFail & fail)
}
/// This function is safe to use in static initializers.
void writeError(const char * data, size_t size)
void writeErrorLen(const char * data, size_t size)
{
while (size != 0)
{
@ -254,6 +261,12 @@ void writeError(const char * data, size_t size)
}
}
}
/// Macros to avoid using strlen(), since it may fail if SSE is not supported.
#define writeError(data) do \
{ \
static_assert(__builtin_constant_p(data)); \
writeErrorLen(data, ARRAY_SIZE(data) - 1); \
} while (false)
/// Check SSE and others instructions availability. Calls exit on fail.
/// This function must be called as early as possible, even before main, because static initializers may use unavailable instructions.
@ -272,8 +285,7 @@ void checkRequiredInstructions()
/// Typical implementation of strlen is using SSE4.2 or AVX2.
/// But this is not the case because it's compiler builtin and is executed at compile time.
const char * msg = "Can not set signal handler\n";
writeError(msg, strlen(msg));
writeError("Can not set signal handler\n");
_Exit(1);
}
@ -281,12 +293,9 @@ void checkRequiredInstructions()
if (sigsetjmp(jmpbuf, 1))
{
const char * msg1 = "Instruction check fail. The CPU does not support ";
writeError(msg1, strlen(msg1));
const char * msg2 = instructionFailToString(fail);
writeError(msg2, strlen(msg2));
const char * msg3 = " instruction set.\n";
writeError(msg3, strlen(msg3));
writeError("Instruction check fail. The CPU does not support ");
std::apply(writeErrorLen, instructionFailToString(fail));
writeError(" instruction set.\n");
_Exit(1);
}
@ -294,13 +303,60 @@ void checkRequiredInstructions()
if (sigaction(signal, &sa_old, nullptr))
{
const char * msg = "Can not set signal handler\n";
writeError(msg, strlen(msg));
writeError("Can not set signal handler\n");
_Exit(1);
}
}
struct Checker { Checker() { checkRequiredInstructions(); } } checker;
#ifdef __linux__
/// clickhouse uses jemalloc as a production allocator
/// and jemalloc relies on working MADV_DONTNEED,
/// which doesn't work under qemu
///
/// but do this only under for linux, since only it return zeroed pages after MADV_DONTNEED
/// (and jemalloc assumes this too, see contrib/jemalloc-cmake/include_linux_x86_64/jemalloc/internal/jemalloc_internal_defs.h.in)
void checkRequiredMadviseFlags()
{
size_t size = 1 << 16;
void * addr = mmap(nullptr, size, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0);
if (addr == MAP_FAILED)
{
writeError("Can not mmap pages for MADV_DONTNEED check\n");
_Exit(1);
}
memset(addr, 'A', size);
if (!madvise(addr, size, MADV_DONTNEED))
{
/// Suboptimal, but should be simple.
for (size_t i = 0; i < size; ++i)
{
if (reinterpret_cast<unsigned char *>(addr)[i] != 0)
{
writeError("MADV_DONTNEED does not zeroed page. jemalloc will be broken\n");
_Exit(1);
}
}
}
if (munmap(addr, size))
{
writeError("Can not munmap pages for MADV_DONTNEED check\n");
_Exit(1);
}
}
#endif
struct Checker
{
Checker()
{
checkRequiredInstructions();
#ifdef __linux__
checkRequiredMadviseFlags();
#endif
}
} checker;
}

View File

@ -267,12 +267,6 @@ int Server::main(const std::vector<std::string> & /*args*/)
registerDictionaries();
registerDisks();
#if !defined(ARCADIA_BUILD)
#if USE_OPENCL
BitonicSort::getInstance().configure();
#endif
#endif
CurrentMetrics::set(CurrentMetrics::Revision, ClickHouseRevision::getVersionRevision());
CurrentMetrics::set(CurrentMetrics::VersionInteger, ClickHouseRevision::getVersionInteger());

View File

@ -143,13 +143,13 @@ void LinearModelData::updateState()
void LinearModelData::predict(
ColumnVector<Float64>::Container & container,
Block & block,
ColumnsWithTypeAndName & columns,
size_t offset,
size_t limit,
const ColumnNumbers & arguments,
const Context & context) const
{
gradient_computer->predict(container, block, offset, limit, arguments, weights, bias, context);
gradient_computer->predict(container, columns, offset, limit, arguments, weights, bias, context);
}
void LinearModelData::returnWeights(IColumn & to) const
@ -449,7 +449,7 @@ void IWeightsUpdater::addToBatch(
void LogisticRegression::predict(
ColumnVector<Float64>::Container & container,
Block & block,
ColumnsWithTypeAndName & columns,
size_t offset,
size_t limit,
const ColumnNumbers & arguments,
@ -457,7 +457,7 @@ void LogisticRegression::predict(
Float64 bias,
const Context & /*context*/) const
{
size_t rows_num = block.rows();
size_t rows_num = columns[arguments.front()].column->size();
if (offset > rows_num || offset + limit > rows_num)
throw Exception("Invalid offset and limit for LogisticRegression::predict. "
@ -468,7 +468,7 @@ void LogisticRegression::predict(
for (size_t i = 1; i < arguments.size(); ++i)
{
const ColumnWithTypeAndName & cur_col = block.getByPosition(arguments[i]);
const ColumnWithTypeAndName & cur_col = columns[arguments[i]];
if (!isNativeNumber(cur_col.type))
throw Exception("Prediction arguments must have numeric type", ErrorCodes::BAD_ARGUMENTS);
@ -518,7 +518,7 @@ void LogisticRegression::compute(
void LinearRegression::predict(
ColumnVector<Float64>::Container & container,
Block & block,
ColumnsWithTypeAndName & columns,
size_t offset,
size_t limit,
const ColumnNumbers & arguments,
@ -531,7 +531,7 @@ void LinearRegression::predict(
throw Exception("In predict function number of arguments differs from the size of weights vector", ErrorCodes::LOGICAL_ERROR);
}
size_t rows_num = block.rows();
size_t rows_num = columns[arguments.front()].column->size();
if (offset > rows_num || offset + limit > rows_num)
throw Exception("Invalid offset and limit for LogisticRegression::predict. "
@ -542,7 +542,7 @@ void LinearRegression::predict(
for (size_t i = 1; i < arguments.size(); ++i)
{
const ColumnWithTypeAndName & cur_col = block.getByPosition(arguments[i]);
const ColumnWithTypeAndName & cur_col = columns[arguments[i]];
if (!isNativeNumber(cur_col.type))
throw Exception("Prediction arguments must have numeric type", ErrorCodes::BAD_ARGUMENTS);

View File

@ -39,7 +39,7 @@ public:
virtual void predict(
ColumnVector<Float64>::Container & container,
Block & block,
ColumnsWithTypeAndName & columns,
size_t offset,
size_t limit,
const ColumnNumbers & arguments,
@ -65,7 +65,7 @@ public:
void predict(
ColumnVector<Float64>::Container & container,
Block & block,
ColumnsWithTypeAndName & columns,
size_t offset,
size_t limit,
const ColumnNumbers & arguments,
@ -91,7 +91,7 @@ public:
void predict(
ColumnVector<Float64>::Container & container,
Block & block,
ColumnsWithTypeAndName & columns,
size_t offset,
size_t limit,
const ColumnNumbers & arguments,
@ -264,7 +264,7 @@ public:
void predict(
ColumnVector<Float64>::Container & container,
Block & block,
ColumnsWithTypeAndName & columns,
size_t offset,
size_t limit,
const ColumnNumbers & arguments,
@ -364,7 +364,7 @@ public:
void predictValues(
ConstAggregateDataPtr place,
IColumn & to,
Block & block,
ColumnsWithTypeAndName & columns,
size_t offset,
size_t limit,
const ColumnNumbers & arguments,
@ -382,7 +382,7 @@ public:
throw Exception("Cast of column of predictions is incorrect. getReturnTypeToPredict must return same value as it is casted to",
ErrorCodes::LOGICAL_ERROR);
this->data(place).predict(column->getData(), block, offset, limit, arguments, context);
this->data(place).predict(column->getData(), columns, offset, limit, arguments, context);
}
/** This function is called if aggregate function without State modifier is selected in a query.

View File

@ -114,7 +114,7 @@ public:
virtual void predictValues(
ConstAggregateDataPtr /* place */,
IColumn & /*to*/,
Block & /*block*/,
ColumnsWithTypeAndName & /*block*/,
size_t /*offset*/,
size_t /*limit*/,
const ColumnNumbers & /*arguments*/,

View File

@ -36,7 +36,7 @@ namespace ErrorCodes
* uses asin, which slows down the algorithm a bit.
*/
template <typename T>
class QuantileTDigest
class TDigest
{
using Value = Float32;
using Count = Float32;
@ -86,20 +86,12 @@ class QuantileTDigest
/// The memory will be allocated to several elements at once, so that the state occupies 64 bytes.
static constexpr size_t bytes_in_arena = 128 - sizeof(PODArray<Centroid>) - sizeof(Count) - sizeof(UInt32);
using Summary = PODArrayWithStackMemory<Centroid, bytes_in_arena>;
using Centroids = PODArrayWithStackMemory<Centroid, bytes_in_arena>;
Summary summary;
Centroids centroids;
Count count = 0;
UInt32 unmerged = 0;
/** Linear interpolation at the point x on the line (x1, y1)..(x2, y2)
*/
static Value interpolate(Value x, Value x1, Value y1, Value x2, Value y2)
{
double k = (x - x1) / (x2 - x1);
return y1 + k * (y2 - y1);
}
struct RadixSortTraits
{
using Element = Centroid;
@ -122,13 +114,14 @@ class QuantileTDigest
*/
void addCentroid(const Centroid & c)
{
summary.push_back(c);
centroids.push_back(c);
count += c.count;
++unmerged;
if (unmerged >= params.max_unmerged)
compress();
}
public:
/** Performs compression of accumulated centroids
* When merging, the invariant is retained to the maximum size of each
* centroid that does not exceed `4 q (1 - q) \ delta N`.
@ -137,16 +130,16 @@ class QuantileTDigest
{
if (unmerged > 0)
{
RadixSort<RadixSortTraits>::executeLSD(summary.data(), summary.size());
RadixSort<RadixSortTraits>::executeLSD(centroids.data(), centroids.size());
if (summary.size() > 3)
if (centroids.size() > 3)
{
/// A pair of consecutive bars of the histogram.
auto l = summary.begin();
auto l = centroids.begin();
auto r = std::next(l);
Count sum = 0;
while (r != summary.end())
while (r != centroids.end())
{
// we use quantile which gives us the smallest error
@ -188,14 +181,13 @@ class QuantileTDigest
}
/// At the end of the loop, all values to the right of l were "eaten".
summary.resize(l - summary.begin() + 1);
centroids.resize(l - centroids.begin() + 1);
}
unmerged = 0;
}
}
public:
/** Adds to the digest a change in `x` with a weight of `cnt` (default 1)
*/
void add(T x, UInt64 cnt = 1)
@ -203,17 +195,17 @@ public:
addCentroid(Centroid(Value(x), Count(cnt)));
}
void merge(const QuantileTDigest & other)
void merge(const TDigest & other)
{
for (const auto & c : other.summary)
for (const auto & c : other.centroids)
addCentroid(c);
}
void serialize(WriteBuffer & buf)
{
compress();
writeVarUInt(summary.size(), buf);
buf.write(reinterpret_cast<const char *>(summary.data()), summary.size() * sizeof(summary[0]));
writeVarUInt(centroids.size(), buf);
buf.write(reinterpret_cast<const char *>(centroids.data()), centroids.size() * sizeof(centroids[0]));
}
void deserialize(ReadBuffer & buf)
@ -222,36 +214,113 @@ public:
readVarUInt(size, buf);
if (size > params.max_unmerged)
throw Exception("Too large t-digest summary size", ErrorCodes::TOO_LARGE_ARRAY_SIZE);
throw Exception("Too large t-digest centroids size", ErrorCodes::TOO_LARGE_ARRAY_SIZE);
summary.resize(size);
buf.read(reinterpret_cast<char *>(summary.data()), size * sizeof(summary[0]));
centroids.resize(size);
buf.read(reinterpret_cast<char *>(centroids.data()), size * sizeof(centroids[0]));
count = 0;
for (const auto & c : summary)
for (const auto & c : centroids)
count += c.count;
}
Count getCount()
{
return count;
}
const Centroids & getCentroids() const
{
return centroids;
}
void reset()
{
centroids.resize(0);
count = 0;
unmerged = 0;
}
};
template <typename T>
class QuantileTDigest
{
using Value = Float32;
using Count = Float32;
/** We store two t-digests. When an amount of elements in sub_tdigest become more than merge_threshold
* we merge sub_tdigest in main_tdigest and reset sub_tdigest. This method is needed to decrease an amount of
* centroids in t-digest (experiments show that after merge_threshold the size of t-digest significantly grows,
* but merging two big t-digest decreases it).
*/
TDigest<T> main_tdigest;
TDigest<T> sub_tdigest;
size_t merge_threshold = 1e7;
/** Linear interpolation at the point x on the line (x1, y1)..(x2, y2)
*/
static Value interpolate(Value x, Value x1, Value y1, Value x2, Value y2)
{
double k = (x - x1) / (x2 - x1);
return y1 + k * (y2 - y1);
}
void mergeTDigests()
{
main_tdigest.merge(sub_tdigest);
sub_tdigest.reset();
}
public:
void add(T x, UInt64 cnt = 1)
{
if (sub_tdigest.getCount() >= merge_threshold)
mergeTDigests();
sub_tdigest.add(x, cnt);
}
void merge(const QuantileTDigest & other)
{
mergeTDigests();
main_tdigest.merge(other.main_tdigest);
main_tdigest.merge(other.sub_tdigest);
}
void serialize(WriteBuffer & buf)
{
mergeTDigests();
main_tdigest.serialize(buf);
}
void deserialize(ReadBuffer & buf)
{
sub_tdigest.reset();
main_tdigest.deserialize(buf);
}
/** Calculates the quantile q [0, 1] based on the digest.
* For an empty digest returns NaN.
*/
template <typename ResultType>
ResultType getImpl(Float64 level)
{
if (summary.empty())
mergeTDigests();
auto & centroids = main_tdigest.getCentroids();
if (centroids.empty())
return std::is_floating_point_v<ResultType> ? NAN : 0;
compress();
main_tdigest.compress();
if (summary.size() == 1)
return summary.front().mean;
if (centroids.size() == 1)
return centroids.front().mean;
Float64 x = level * count;
Float64 x = level * main_tdigest.getCount();
Float64 prev_x = 0;
Count sum = 0;
Value prev_mean = summary.front().mean;
Value prev_mean = centroids.front().mean;
for (const auto & c : summary)
for (const auto & c : centroids)
{
Float64 current_x = sum + c.count * 0.5;
@ -263,7 +332,7 @@ public:
prev_x = current_x;
}
return summary.back().mean;
return centroids.back().mean;
}
/** Get multiple quantiles (`size` parts).
@ -274,29 +343,32 @@ public:
template <typename ResultType>
void getManyImpl(const Float64 * levels, const size_t * levels_permutation, size_t size, ResultType * result)
{
if (summary.empty())
mergeTDigests();
auto & centroids = main_tdigest.getCentroids();
if (centroids.empty())
{
for (size_t result_num = 0; result_num < size; ++result_num)
result[result_num] = std::is_floating_point_v<ResultType> ? NAN : 0;
return;
}
compress();
main_tdigest.compress();
if (summary.size() == 1)
if (centroids.size() == 1)
{
for (size_t result_num = 0; result_num < size; ++result_num)
result[result_num] = summary.front().mean;
result[result_num] = centroids.front().mean;
return;
}
Float64 x = levels[levels_permutation[0]] * count;
Float64 x = levels[levels_permutation[0]] * main_tdigest.getCount();
Float64 prev_x = 0;
Count sum = 0;
Value prev_mean = summary.front().mean;
Value prev_mean = centroids.front().mean;
size_t result_num = 0;
for (const auto & c : summary)
for (const auto & c : centroids)
{
Float64 current_x = sum + c.count * 0.5;
@ -308,7 +380,7 @@ public:
if (result_num >= size)
return;
x = levels[levels_permutation[result_num]] * count;
x = levels[levels_permutation[result_num]] * main_tdigest.getCount();
}
sum += c.count;
@ -316,7 +388,7 @@ public:
prev_x = current_x;
}
auto rest_of_results = summary.back().mean;
auto rest_of_results = centroids.back().mean;
for (; result_num < size; ++result_num)
result[levels_permutation[result_num]] = rest_of_results;
}

View File

@ -161,7 +161,7 @@ MutableColumnPtr ColumnAggregateFunction::convertToValues(MutableColumnPtr colum
return res;
}
MutableColumnPtr ColumnAggregateFunction::predictValues(Block & block, const ColumnNumbers & arguments, const Context & context) const
MutableColumnPtr ColumnAggregateFunction::predictValues(ColumnsWithTypeAndName & block, const ColumnNumbers & arguments, const Context & context) const
{
MutableColumnPtr res = func->getReturnTypeToPredict()->createColumn();
res->reserve(data.size());
@ -172,7 +172,7 @@ MutableColumnPtr ColumnAggregateFunction::predictValues(Block & block, const Col
if (data.size() == 1)
{
/// Case for const column. Predict using single model.
machine_learning_function->predictValues(data[0], *res, block, 0, block.rows(), arguments, context);
machine_learning_function->predictValues(data[0], *res, block, 0, block[arguments.front()].column->size(), arguments, context);
}
else
{

View File

@ -119,7 +119,7 @@ public:
const char * getFamilyName() const override { return "AggregateFunction"; }
TypeIndex getDataType() const override { return TypeIndex::AggregateFunction; }
MutableColumnPtr predictValues(Block & block, const ColumnNumbers & arguments, const Context & context) const;
MutableColumnPtr predictValues(ColumnsWithTypeAndName & block, const ColumnNumbers & arguments, const Context & context) const;
size_t size() const override
{

View File

@ -187,16 +187,16 @@ ColumnWithTypeAndName ColumnFunction::reduce() const
throw Exception("Cannot call function " + function->getName() + " because is has " + toString(args) +
"arguments but " + toString(captured) + " columns were captured.", ErrorCodes::LOGICAL_ERROR);
Block block(captured_columns);
block.insert({nullptr, function->getReturnType(), ""});
auto columns = captured_columns;
columns.emplace_back(ColumnWithTypeAndName {nullptr, function->getReturnType(), ""});
ColumnNumbers arguments(captured_columns.size());
for (size_t i = 0; i < captured_columns.size(); ++i)
arguments[i] = i;
function->execute(block, arguments, captured_columns.size(), size_);
function->execute(columns, arguments, captured_columns.size(), size_);
return block.getByPosition(captured_columns.size());
return columns[captured_columns.size()];
}
}

View File

@ -22,10 +22,14 @@ public:
Exception() = default;
Exception(const std::string & msg, int code);
Exception(int code, const std::string & message)
: Exception(message, code)
{}
// Format message with fmt::format, like the logging functions.
template <typename ...Fmt>
Exception(int code, Fmt&&... fmt)
: Exception(fmt::format(std::forward<Fmt>(fmt)...), code)
template <typename ...Args>
Exception(int code, const std::string & fmt, Args&&... args)
: Exception(fmt::format(fmt, std::forward<Args>(args)...), code)
{}
struct CreateFromPocoTag {};
@ -40,7 +44,16 @@ public:
const char * what() const throw() override { return message().data(); }
/// Add something to the existing message.
void addMessage(const std::string & arg) { extendedMessage(arg); }
template <typename ...Args>
void addMessage(const std::string& format, Args&&... args)
{
extendedMessage(fmt::format(format, std::forward<Args>(args)...));
}
void addMessage(const std::string& message)
{
extendedMessage(message);
}
std::string getStackTraceString() const;

View File

@ -82,13 +82,6 @@ inline UInt64 getCurrentTimeNanoseconds(clockid_t clock_type = CLOCK_MONOTONIC)
return ts.tv_sec * 1000000000ULL + ts.tv_nsec;
}
inline UInt64 getCurrentTimeMicroseconds()
{
struct timeval tv;
gettimeofday(&tv, nullptr);
return (tv.tv_sec) * 1000000U + (tv.tv_usec);
}
struct RUsageCounters
{
/// In nanoseconds
@ -115,13 +108,6 @@ struct RUsageCounters
hard_page_faults = static_cast<UInt64>(rusage.ru_majflt);
}
static RUsageCounters zeros(UInt64 real_time_ = getCurrentTimeNanoseconds())
{
RUsageCounters res;
res.real_time = real_time_;
return res;
}
static RUsageCounters current(UInt64 real_time_ = getCurrentTimeNanoseconds())
{
::rusage rusage {};

View File

@ -5,19 +5,37 @@
# include <cstdio>
# include <mntent.h>
#endif
#include <cerrno>
#include <Poco/File.h>
#include <Poco/Path.h>
#include <Poco/Version.h>
namespace DB
{
namespace ErrorCodes
{
extern const int LOGICAL_ERROR;
extern const int SYSTEM_ERROR;
extern const int NOT_IMPLEMENTED;
extern const int CANNOT_STATVFS;
}
struct statvfs getStatVFS(const String & path)
{
struct statvfs fs;
while (statvfs(path.c_str(), &fs) != 0)
{
if (errno == EINTR)
continue;
throwFromErrnoWithPath("Could not calculate available disk space (statvfs)", path, ErrorCodes::CANNOT_STATVFS);
}
return fs;
}
bool enoughSpaceInDirectory(const std::string & path [[maybe_unused]], size_t data_size [[maybe_unused]])
{
#if POCO_VERSION >= 0x01090000
@ -46,7 +64,7 @@ std::filesystem::path getMountPoint(std::filesystem::path absolute_path)
const auto get_device_id = [](const std::filesystem::path & p)
{
struct stat st;
if (stat(p.c_str(), &st))
if (stat(p.c_str(), &st)) /// NOTE: man stat does not list EINTR as possible error
throwFromErrnoWithPath("Cannot stat " + p.string(), p.string(), ErrorCodes::SYSTEM_ERROR);
return st.st_dev;
};

View File

@ -12,10 +12,6 @@
namespace DB
{
namespace ErrorCodes
{
extern const int CANNOT_STATVFS;
}
using TemporaryFile = Poco::TemporaryFile;
@ -31,12 +27,6 @@ std::filesystem::path getMountPoint(std::filesystem::path absolute_path);
#endif
String getFilesystemName([[maybe_unused]] const String & mount_point);
inline struct statvfs getStatVFS(const String & path)
{
struct statvfs fs;
if (statvfs(path.c_str(), &fs) != 0)
throwFromErrnoWithPath("Could not calculate available disk space (statvfs)", path, ErrorCodes::CANNOT_STATVFS);
return fs;
}
struct statvfs getStatVFS(const String & path);
}

View File

@ -390,13 +390,21 @@ String BaseSettings<Traits_>::valueToStringUtil(const std::string_view & name, c
template <typename Traits_>
Field BaseSettings<Traits_>::stringToValueUtil(const std::string_view & name, const String & str)
{
const auto & accessor = Traits::Accessor::instance();
if (size_t index = accessor.find(name); index != static_cast<size_t>(-1))
return accessor.stringToValueUtil(index, str);
if constexpr (Traits::allow_custom_settings)
return Field::restoreFromDump(str);
else
BaseSettingsHelpers::throwSettingNotFound(name);
try
{
const auto & accessor = Traits::Accessor::instance();
if (size_t index = accessor.find(name); index != static_cast<size_t>(-1))
return accessor.stringToValueUtil(index, str);
if constexpr (Traits::allow_custom_settings)
return Field::restoreFromDump(str);
else
BaseSettingsHelpers::throwSettingNotFound(name);
}
catch (Exception & e)
{
e.addMessage("while parsing value '{}' for setting '{}'", str, name);
throw;
}
}
template <typename Traits_>

View File

@ -152,6 +152,12 @@ public:
private:
void eraseImpl(size_t position);
void initializeIndexByName();
/// This is needed to allow function execution over data.
/// It is safe because functions does not change column names, so index is unaffected.
/// It is temporary.
friend struct ExpressionAction;
friend class ActionsDAG;
};
using Blocks = std::vector<Block>;

View File

@ -60,14 +60,14 @@ public:
using ArrayA = typename ColVecA::Container;
using ArrayB = typename ColVecB::Container;
DecimalComparison(Block & block, size_t result, const ColumnWithTypeAndName & col_left, const ColumnWithTypeAndName & col_right)
DecimalComparison(ColumnsWithTypeAndName & data, size_t result, const ColumnWithTypeAndName & col_left, const ColumnWithTypeAndName & col_right)
{
if (!apply(block, result, col_left, col_right))
if (!apply(data, result, col_left, col_right))
throw Exception("Wrong decimal comparison with " + col_left.type->getName() + " and " + col_right.type->getName(),
ErrorCodes::LOGICAL_ERROR);
}
static bool apply(Block & block, size_t result [[maybe_unused]],
static bool apply(ColumnsWithTypeAndName & data, size_t result [[maybe_unused]],
const ColumnWithTypeAndName & col_left, const ColumnWithTypeAndName & col_right)
{
if constexpr (_actual)
@ -77,7 +77,7 @@ public:
c_res = applyWithScale(col_left.column, col_right.column, shift);
if (c_res)
block.getByPosition(result).column = std::move(c_res);
data[result].column = std::move(c_res);
return true;
}
return false;

View File

@ -152,12 +152,14 @@ inline typename DecimalType::NativeType getFractionalPartWithScaleMultiplier(
{
using T = typename DecimalType::NativeType;
T result = decimal.value;
/// There's UB with min integer value here. But it does not matter for Decimals cause they use not full integer ranges.
/// Anycase we make modulo before compare to make scale_multiplier > 1 unaffected.
T result = decimal.value % scale_multiplier;
if constexpr (!keep_sign)
if (result < T(0))
result = -result;
return result % scale_multiplier;
return result;
}
/** Get fractional part from decimal

View File

@ -146,6 +146,7 @@ namespace Protocol
"Ping",
"TablesStatusRequest",
"KeepAlive",
"Scalar",
};
return packet <= MAX
? data[packet]

View File

@ -135,6 +135,7 @@ class IColumn;
\
M(Bool, force_index_by_date, 0, "Throw an exception if there is a partition key in a table, and it is not used.", 0) \
M(Bool, force_primary_key, 0, "Throw an exception if there is primary key in a table, and it is not used.", 0) \
M(String, force_data_skipping_indices, "", "Comma separated list of strings or literals with the name of the data skipping indices that should be used during query execution, otherwise an exception will be thrown.", 0) \
\
M(Float, max_streams_to_max_threads_ratio, 1, "Allows you to use more sources than the number of threads - to more evenly distribute work across threads. It is assumed that this is a temporary solution, since it will be possible in the future to make the number of sources equal to the number of threads, but for each source to dynamically select available work for itself.", 0) \
M(Float, max_streams_multiplier_for_merge_tables, 5, "Ask more streams when reading from Merge table. Streams will be spread across tables that Merge table will use. This allows more even distribution of work across threads and especially helpful when merged tables differ in size.", 0) \
@ -158,6 +159,7 @@ class IColumn;
\
M(UInt64, insert_quorum, 0, "For INSERT queries in the replicated table, wait writing for the specified number of replicas and linearize the addition of the data. 0 - disabled.", 0) \
M(Milliseconds, insert_quorum_timeout, 600000, "", 0) \
M(Bool, insert_quorum_parallel, false, "For quorum INSERT queries - enable to make parallel inserts without linearizability", 0) \
M(UInt64, select_sequential_consistency, 0, "For SELECT queries from the replicated table, throw an exception if the replica does not have a chunk written with the quorum; do not read the parts that have not yet been written with the quorum.", 0) \
M(UInt64, table_function_remote_max_addresses, 1000, "The maximum number of different shards and the maximum number of replicas of one shard in the `remote` function.", 0) \
M(Milliseconds, read_backoff_min_latency_ms, 1000, "Setting to reduce the number of threads in case of slow reads. Pay attention only to reads that took at least that much time.", 0) \
@ -389,14 +391,6 @@ class IColumn;
\
/** Obsolete settings that do nothing but left for compatibility reasons. Remove each one after half a year of obsolescence. */ \
\
M(Bool, allow_experimental_low_cardinality_type, true, "Obsolete setting, does nothing. Will be removed after 2019-08-13", 0) \
M(Bool, compile, false, "Whether query compilation is enabled. Will be removed after 2020-03-13", 0) \
M(UInt64, min_count_to_compile, 0, "Obsolete setting, does nothing. Will be removed after 2020-03-13", 0) \
M(Bool, allow_experimental_multiple_joins_emulation, true, "Obsolete setting, does nothing. Will be removed after 2020-05-31", 0) \
M(Bool, allow_experimental_cross_to_join_conversion, true, "Obsolete setting, does nothing. Will be removed after 2020-05-31", 0) \
M(Bool, allow_experimental_data_skipping_indices, true, "Obsolete setting, does nothing. Will be removed after 2020-05-31", 0) \
M(Bool, merge_tree_uniform_read_distribution, true, "Obsolete setting, does nothing. Will be removed after 2020-05-20", 0) \
M(UInt64, mark_cache_min_lifetime, 0, "Obsolete setting, does nothing. Will be removed after 2020-05-31", 0) \
M(Bool, partial_merge_join, false, "Obsolete. Use join_algorithm='prefer_partial_merge' instead.", 0) \
M(UInt64, max_memory_usage_for_all_queries, 0, "Obsolete. Will be removed after 2020-10-20", 0) \
M(UInt64, multiple_joins_rewriter_version, 0, "Obsolete setting, does nothing. Will be removed after 2021-03-31", 0) \
@ -469,6 +463,8 @@ class IColumn;
M(Bool, output_format_enable_streaming, false, "Enable streaming in output formats that support it.", 0) \
M(Bool, output_format_write_statistics, true, "Write statistics about read rows, bytes, time elapsed in suitable output formats.", 0) \
M(Bool, allow_non_metadata_alters, true, "Allow to execute alters which affects not only tables metadata, but also data on disk", 0) \
M(Bool, output_format_pretty_row_numbers, false, "Add row numbers before each row for pretty output format", 0) \
M(Bool, enable_global_with_statement, false, "Propagate WITH statements to UNION queries and all subqueries", 0) \
#define LIST_OF_SETTINGS(M) \
COMMON_SETTINGS(M) \

View File

@ -70,17 +70,6 @@ std::pair<std::string, std::string> splitName(const std::string & name)
return {{ begin, first_end }, { second_begin, end }};
}
std::string createCommaSeparatedStringFrom(const Names & names)
{
std::ostringstream ss;
if (!names.empty())
{
std::copy(names.begin(), std::prev(names.end()), std::ostream_iterator<std::string>(ss, ", "));
ss << names.back();
}
return ss.str();
}
std::string extractTableName(const std::string & nested_name)
{

View File

@ -13,8 +13,6 @@ namespace Nested
std::pair<std::string, std::string> splitName(const std::string & name);
std::string createCommaSeparatedStringFrom(const Names & names);
/// Returns the prefix of the name to the first '.'. Or the name is unchanged if there is no dot.
std::string extractTableName(const std::string & nested_name);

View File

@ -173,7 +173,6 @@ namespace
std::function<void(WriteBufferFromFile &)> send_data;
ThreadFromGlobalPool thread;
};
}

View File

@ -194,6 +194,7 @@ DiskCacheWrapper::writeFile(const String & path, size_t buf_size, WriteMode mode
auto src_buffer = cache_disk->readFile(path, buf_size, estimated_size, aio_threshold, 0);
auto dst_buffer = DiskDecorator::writeFile(path, buf_size, mode, estimated_size, aio_threshold);
copyData(*src_buffer, *dst_buffer);
dst_buffer->finalize();
},
buf_size);
}

View File

@ -27,6 +27,7 @@ void copyFile(IDisk & from_disk, const String & from_path, IDisk & to_disk, cons
auto in = from_disk.readFile(from_path);
auto out = to_disk.writeFile(to_path);
copyData(*in, *out);
out->finalize();
}

View File

@ -18,7 +18,7 @@
#include <Common/thread_local_rng.h>
#include <aws/s3/model/CopyObjectRequest.h>
#include <aws/s3/model/DeleteObjectRequest.h>
#include <aws/s3/model/DeleteObjectsRequest.h>
#include <aws/s3/model/GetObjectRequest.h>
#include <boost/algorithm/string.hpp>
@ -36,6 +36,32 @@ namespace ErrorCodes
extern const int NOT_IMPLEMENTED;
}
/// Helper class to collect keys into chunks of maximum size (to prepare batch requests to AWS API)
class DiskS3::AwsS3KeyKeeper : public std::list<Aws::Vector<Aws::S3::Model::ObjectIdentifier>>
{
public:
void addKey(const String & key);
private:
/// limit for one DeleteObject request
/// see https://docs.aws.amazon.com/AmazonS3/latest/API/API_DeleteObjects.html
const static size_t chunk_limit = 1000;
};
void DiskS3::AwsS3KeyKeeper::addKey(const String & key)
{
if (empty() || back().size() >= chunk_limit)
{ /// add one more chunk
push_back(value_type());
back().reserve(chunk_limit);
}
Aws::S3::Model::ObjectIdentifier obj;
obj.SetKey(key);
back().push_back(obj);
}
namespace
{
String getRandomName()
@ -634,7 +660,7 @@ std::unique_ptr<WriteBufferFromFileBase> DiskS3::writeFile(const String & path,
}
}
void DiskS3::remove(const String & path)
void DiskS3::removeMeta(const String & path, AwsS3KeyKeeper & keys)
{
LOG_DEBUG(&Poco::Logger::get("DiskS3"), "Remove file by path: {}", backQuote(metadata_path + path));
@ -647,14 +673,9 @@ void DiskS3::remove(const String & path)
if (metadata.ref_count == 0)
{
file.remove();
for (const auto & [s3_object_path, _] : metadata.s3_objects)
{
/// TODO: Make operation idempotent. Do not throw exception if key is already deleted.
Aws::S3::Model::DeleteObjectRequest request;
request.SetBucket(bucket);
request.SetKey(s3_root_path + s3_object_path);
throwIfError(client->DeleteObject(request));
}
keys.addKey(s3_root_path + s3_object_path);
}
else /// In other case decrement number of references, save metadata and delete file.
{
@ -665,25 +686,57 @@ void DiskS3::remove(const String & path)
}
else
file.remove();
}
void DiskS3::removeRecursive(const String & path)
void DiskS3::removeMetaRecursive(const String & path, AwsS3KeyKeeper & keys)
{
checkStackSize(); /// This is needed to prevent stack overflow in case of cyclic symlinks.
Poco::File file(metadata_path + path);
if (file.isFile())
{
remove(path);
removeMeta(path, keys);
}
else
{
for (auto it{iterateDirectory(path)}; it->isValid(); it->next())
removeRecursive(it->path());
removeMetaRecursive(it->path(), keys);
file.remove();
}
}
void DiskS3::removeAws(const AwsS3KeyKeeper & keys)
{
if (!keys.empty())
{
for (const auto & chunk : keys)
{
Aws::S3::Model::Delete delkeys;
delkeys.SetObjects(chunk);
/// TODO: Make operation idempotent. Do not throw exception if key is already deleted.
Aws::S3::Model::DeleteObjectsRequest request;
request.SetBucket(bucket);
request.SetDelete(delkeys);
throwIfError(client->DeleteObjects(request));
}
}
}
void DiskS3::remove(const String & path)
{
AwsS3KeyKeeper keys;
removeMeta(path, keys);
removeAws(keys);
}
void DiskS3::removeRecursive(const String & path)
{
AwsS3KeyKeeper keys;
removeMetaRecursive(path, keys);
removeAws(keys);
}
bool DiskS3::tryReserve(UInt64 bytes)
{

View File

@ -21,6 +21,8 @@ class DiskS3 : public IDisk
public:
friend class DiskS3Reservation;
class AwsS3KeyKeeper;
DiskS3(
String name_,
std::shared_ptr<Aws::S3::S3Client> client_,
@ -111,6 +113,10 @@ public:
private:
bool tryReserve(UInt64 bytes);
void removeMeta(const String & path, AwsS3KeyKeeper & keys);
void removeMetaRecursive(const String & path, AwsS3KeyKeeper & keys);
void removeAws(const AwsS3KeyKeeper & keys);
private:
const String name;
std::shared_ptr<Aws::S3::S3Client> client;

View File

@ -1,3 +1,4 @@
#include <aws/core/client/DefaultRetryStrategy.h>
#include <IO/ReadHelpers.h>
#include <IO/S3Common.h>
#include <IO/WriteHelpers.h>
@ -123,6 +124,9 @@ void registerDiskS3(DiskFactory & factory)
if (proxy_config)
cfg.perRequestConfiguration = [proxy_config](const auto & request) { return proxy_config->getConfiguration(request); };
cfg.retryStrategy = std::make_shared<Aws::Client::DefaultRetryStrategy>(
config.getUInt(config_prefix + ".retry_attempts", 10));
auto client = S3::ClientFactory::instance().create(
cfg,
uri.is_virtual_hosted_style,

View File

@ -107,6 +107,7 @@ static FormatSettings getOutputFormatSetting(const Settings & settings, const Co
format_settings.pretty.charset = settings.output_format_pretty_grid_charset.toString() == "ASCII" ?
FormatSettings::Pretty::Charset::ASCII :
FormatSettings::Pretty::Charset::UTF8;
format_settings.pretty.output_format_pretty_row_numbers = settings.output_format_pretty_row_numbers;
format_settings.template_settings.resultset_format = settings.format_template_resultset;
format_settings.template_settings.row_format = settings.format_template_row;
format_settings.template_settings.row_between_delimiter = settings.format_template_rows_between_delimiter;

View File

@ -45,6 +45,8 @@ struct FormatSettings
UInt64 max_value_width = 10000;
bool color = true;
bool output_format_pretty_row_numbers = false;
enum class Charset
{
UTF8,

View File

@ -6,6 +6,7 @@
#include <Core/DecimalFunctions.h>
#include <Functions/FunctionHelpers.h>
#include <Functions/extractTimeZoneFromFunctionArguments.h>
#include <Functions/IFunctionImpl.h>
#include <Common/Exception.h>
#include <common/DateLUTImpl.h>
@ -115,7 +116,7 @@ template <typename FromDataType, typename ToDataType>
struct CustomWeekTransformImpl
{
template <typename Transform>
static void execute(Block & block, const ColumnNumbers & arguments, size_t result, size_t /*input_rows_count*/, Transform transform = {})
static void execute(FunctionArguments & block, const ColumnNumbers & arguments, size_t result, size_t /*input_rows_count*/, Transform transform = {})
{
const auto op = Transformer<typename FromDataType::FieldType, typename ToDataType::FieldType, Transform>{std::move(transform)};
@ -126,7 +127,7 @@ struct CustomWeekTransformImpl
week_mode = week_mode_column->getValue<UInt8>();
}
const DateLUTImpl & time_zone = extractTimeZoneFromFunctionArguments(block, arguments, 2, 0);
const DateLUTImpl & time_zone = extractTimeZoneFromFunctionArguments(block.data, arguments, 2, 0);
const ColumnPtr source_col = block.getByPosition(arguments[0]).column;
if (const auto * sources = checkAndGetColumn<typename FromDataType::ColumnType>(source_col.get()))
{

View File

@ -6,6 +6,7 @@
#include <Columns/ColumnVector.h>
#include <Columns/ColumnDecimal.h>
#include <Functions/FunctionHelpers.h>
#include <Functions/IFunctionImpl.h>
#include <Functions/extractTimeZoneFromFunctionArguments.h>
#include <DataTypes/DataTypeDateTime.h>
#include <DataTypes/DataTypeDateTime64.h>
@ -682,11 +683,11 @@ struct Transformer
template <typename FromDataType, typename ToDataType, typename Transform>
struct DateTimeTransformImpl
{
static void execute(Block & block, const ColumnNumbers & arguments, size_t result, size_t /*input_rows_count*/, const Transform & transform = {})
static void execute(FunctionArguments & block, const ColumnNumbers & arguments, size_t result, size_t /*input_rows_count*/, const Transform & transform = {})
{
using Op = Transformer<typename FromDataType::FieldType, typename ToDataType::FieldType, Transform>;
const DateLUTImpl & time_zone = extractTimeZoneFromFunctionArguments(block, arguments, 1, 0);
const DateLUTImpl & time_zone = extractTimeZoneFromFunctionArguments(block.data, arguments, 1, 0);
const ColumnPtr source_col = block.getByPosition(arguments[0]).column;
if (const auto * sources = checkAndGetColumn<typename FromDataType::ColumnType>(source_col.get()))

View File

@ -729,7 +729,7 @@ class FunctionBinaryArithmetic : public IFunction
{new_block.getByPosition(new_arguments[0]), new_block.getByPosition(new_arguments[1])};
auto function = function_builder->build(new_arguments_with_type_and_name);
function->execute(new_block, new_arguments, result, input_rows_count);
function->execute(new_block.data, new_arguments, result, input_rows_count);
block.getByPosition(result).column = new_block.getByPosition(result).column;
}
@ -872,7 +872,7 @@ public:
col_right->getChars().data(),
out_chars.data(),
out_chars.size());
block.getByPosition(result).column = ColumnConst::create(std::move(col_res), block.rows());
block.getByPosition(result).column = ColumnConst::create(std::move(col_res), col_left_raw->size());
return true;
}
}
@ -988,7 +988,7 @@ public:
col_res = ColVecResult::create(0, type.getScale());
auto & vec_res = col_res->getData();
vec_res.resize(block.rows());
vec_res.resize(col_left_raw->size());
if (col_left && col_right)
{
@ -1032,7 +1032,7 @@ public:
col_res = ColVecResult::create();
auto & vec_res = col_res->getData();
vec_res.resize(block.rows());
vec_res.resize(col_left_raw->size());
if (col_left && col_right)
{
@ -1171,6 +1171,7 @@ class FunctionBinaryArithmeticWithConstants : public FunctionBinaryArithmetic<Op
public:
using Base = FunctionBinaryArithmetic<Op, Name, valid_on_default_arguments>;
using Monotonicity = typename Base::Monotonicity;
using Block = typename Base::Block;
static FunctionPtr create(
const ColumnWithTypeAndName & left_,
@ -1194,21 +1195,25 @@ public:
{
if (left.column && isColumnConst(*left.column) && arguments.size() == 1)
{
Block block_with_constant
ColumnsWithTypeAndName block_with_constant
= {{left.column->cloneResized(input_rows_count), left.type, left.name},
block.getByPosition(arguments[0]),
block.getByPosition(result)};
Base::executeImpl(block_with_constant, {0, 1}, 2, input_rows_count);
block.getByPosition(result) = block_with_constant.getByPosition(2);
FunctionArguments args(block_with_constant);
Base::executeImpl(args, {0, 1}, 2, input_rows_count);
block.getByPosition(result) = block_with_constant[2];
}
else if (right.column && isColumnConst(*right.column) && arguments.size() == 1)
{
Block block_with_constant
ColumnsWithTypeAndName block_with_constant
= {block.getByPosition(arguments[0]),
{right.column->cloneResized(input_rows_count), right.type, right.name},
block.getByPosition(result)};
Base::executeImpl(block_with_constant, {0, 1}, 2, input_rows_count);
block.getByPosition(result) = block_with_constant.getByPosition(2);
FunctionArguments args(block_with_constant);
Base::executeImpl(args, {0, 1}, 2, input_rows_count);
block.getByPosition(result) = block_with_constant[2];
}
else
Base::executeImpl(block, arguments, result, input_rows_count);
@ -1242,13 +1247,15 @@ public:
{
auto transform = [&](const Field & point)
{
Block block_with_constant
ColumnsWithTypeAndName block_with_constant
= {{left.column->cloneResized(1), left.type, left.name},
{right.type->createColumnConst(1, point), right.type, right.name},
{nullptr, return_type, ""}};
Base::executeImpl(block_with_constant, {0, 1}, 2, 1);
FunctionArguments args(block_with_constant);
Base::executeImpl(args, {0, 1}, 2, 1);
Field point_transformed;
block_with_constant.getByPosition(2).column->get(0, point_transformed);
block_with_constant[2].column->get(0, point_transformed);
return point_transformed;
};
transform(left_point);
@ -1277,13 +1284,15 @@ public:
{
auto transform = [&](const Field & point)
{
Block block_with_constant
ColumnsWithTypeAndName block_with_constant
= {{left.type->createColumnConst(1, point), left.type, left.name},
{right.column->cloneResized(1), right.type, right.name},
{nullptr, return_type, ""}};
Base::executeImpl(block_with_constant, {0, 1}, 2, 1);
FunctionArguments args(block_with_constant);
Base::executeImpl(args, {0, 1}, 2, 1);
Field point_transformed;
block_with_constant.getByPosition(2).column->get(0, point_transformed);
block_with_constant[2].column->get(0, point_transformed);
return point_transformed;
};

View File

@ -304,7 +304,7 @@ private:
template <typename FromDataType, typename ToDataType, typename Transform>
struct DateTimeAddIntervalImpl
{
static void execute(Transform transform, Block & block, const ColumnNumbers & arguments, size_t result)
static void execute(Transform transform, FunctionArguments & block, const ColumnNumbers & arguments, size_t result)
{
using FromValueType = typename FromDataType::FieldType;
using FromColumnType = typename FromDataType::ColumnType;
@ -312,7 +312,7 @@ struct DateTimeAddIntervalImpl
auto op = Adder<Transform>{std::move(transform)};
const DateLUTImpl & time_zone = extractTimeZoneFromFunctionArguments(block, arguments, 2, 0);
const DateLUTImpl & time_zone = extractTimeZoneFromFunctionArguments(block.data, arguments, 2, 0);
const ColumnPtr source_col = block.getByPosition(arguments[0]).column;

View File

@ -71,7 +71,9 @@ public:
if constexpr (std::is_same_v<ToDataType, DataTypeDateTime>)
{
std::string time_zone = extractTimeZoneNameFromFunctionArguments(arguments, 1, 0);
if (time_zone.empty())
/// only validate the time_zone part if the number of arguments is 2. This is mainly
/// to accommodate functions like toStartOfDay(today()), toStartOfDay(yesterday()) etc.
if (arguments.size() == 2 && time_zone.empty())
throw Exception(
"Function " + getName() + " supports a 2nd argument (optional) that must be non-empty and be a valid time zone",
ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);

View File

@ -2,6 +2,7 @@
#include <Functions/IFunctionAdaptors.h>
#include <Common/IFactoryWithAliases.h>
#include <Interpreters/Context.h>
#include <functional>
#include <memory>

View File

@ -51,14 +51,14 @@ Columns convertConstTupleToConstantElements(const ColumnConst & column)
}
static Block createBlockWithNestedColumnsImpl(const Block & block, const std::unordered_set<size_t> & args)
static ColumnsWithTypeAndName createBlockWithNestedColumnsImpl(const ColumnsWithTypeAndName & columns, const std::unordered_set<size_t> & args)
{
Block res;
size_t columns = block.columns();
ColumnsWithTypeAndName res;
size_t num_columns = columns.size();
for (size_t i = 0; i < columns; ++i)
for (size_t i = 0; i < num_columns; ++i)
{
const auto & col = block.getByPosition(i);
const auto & col = columns[i];
if (args.count(i) && col.type->isNullable())
{
@ -66,40 +66,40 @@ static Block createBlockWithNestedColumnsImpl(const Block & block, const std::un
if (!col.column)
{
res.insert({nullptr, nested_type, col.name});
res.emplace_back(ColumnWithTypeAndName{nullptr, nested_type, col.name});
}
else if (const auto * nullable = checkAndGetColumn<ColumnNullable>(*col.column))
{
const auto & nested_col = nullable->getNestedColumnPtr();
res.insert({nested_col, nested_type, col.name});
res.emplace_back(ColumnWithTypeAndName{nested_col, nested_type, col.name});
}
else if (const auto * const_column = checkAndGetColumn<ColumnConst>(*col.column))
{
const auto & nested_col = checkAndGetColumn<ColumnNullable>(const_column->getDataColumn())->getNestedColumnPtr();
res.insert({ ColumnConst::create(nested_col, col.column->size()), nested_type, col.name});
res.emplace_back(ColumnWithTypeAndName{ ColumnConst::create(nested_col, col.column->size()), nested_type, col.name});
}
else
throw Exception("Illegal column for DataTypeNullable", ErrorCodes::ILLEGAL_COLUMN);
}
else
res.insert(col);
res.emplace_back(col);
}
return res;
}
Block createBlockWithNestedColumns(const Block & block, const ColumnNumbers & args)
ColumnsWithTypeAndName createBlockWithNestedColumns(const ColumnsWithTypeAndName & columns, const ColumnNumbers & args)
{
std::unordered_set<size_t> args_set(args.begin(), args.end());
return createBlockWithNestedColumnsImpl(block, args_set);
return createBlockWithNestedColumnsImpl(columns, args_set);
}
Block createBlockWithNestedColumns(const Block & block, const ColumnNumbers & args, size_t result)
ColumnsWithTypeAndName createBlockWithNestedColumns(const ColumnsWithTypeAndName & columns, const ColumnNumbers & args, size_t result)
{
std::unordered_set<size_t> args_set(args.begin(), args.end());
args_set.insert(result);
return createBlockWithNestedColumnsImpl(block, args_set);
return createBlockWithNestedColumnsImpl(columns, args_set);
}
void validateArgumentType(const IFunction & func, const DataTypes & arguments,

View File

@ -85,10 +85,10 @@ Columns convertConstTupleToConstantElements(const ColumnConst & column);
/// Returns the copy of a given block in which each column specified in
/// the "arguments" parameter is replaced with its respective nested
/// column if it is nullable.
Block createBlockWithNestedColumns(const Block & block, const ColumnNumbers & args);
ColumnsWithTypeAndName createBlockWithNestedColumns(const ColumnsWithTypeAndName & columns, const ColumnNumbers & args);
/// Similar function as above. Additionally transform the result type if needed.
Block createBlockWithNestedColumns(const Block & block, const ColumnNumbers & args, size_t result);
ColumnsWithTypeAndName createBlockWithNestedColumns(const ColumnsWithTypeAndName & columns, const ColumnNumbers & args, size_t result);
/// Checks argument type at specified index with predicate.
/// throws if there is no argument at specified index or if predicate returns false.

View File

@ -19,11 +19,11 @@ namespace ErrorCodes
template <bool or_null>
void ExecutableFunctionJoinGet<or_null>::execute(Block & block, const ColumnNumbers & arguments, size_t result, size_t)
{
Block keys;
ColumnsWithTypeAndName keys;
for (size_t i = 2; i < arguments.size(); ++i)
{
auto key = block.getByPosition(arguments[i]);
keys.insert(std::move(key));
keys.emplace_back(std::move(key));
}
block.getByPosition(result) = join->joinGet(keys, result_block);
}
@ -31,7 +31,7 @@ void ExecutableFunctionJoinGet<or_null>::execute(Block & block, const ColumnNumb
template <bool or_null>
ExecutableFunctionImplPtr FunctionJoinGet<or_null>::prepare(const Block &, const ColumnNumbers &, size_t) const
{
return std::make_unique<ExecutableFunctionJoinGet<or_null>>(join, Block{{return_type->createColumn(), return_type, attr_name}});
return std::make_unique<ExecutableFunctionJoinGet<or_null>>(join, DB::Block{{return_type->createColumn(), return_type, attr_name}});
}
static auto getJoin(const ColumnsWithTypeAndName & arguments, const Context & context)

View File

@ -1,6 +1,7 @@
#include <Functions/IFunctionImpl.h>
#include <Storages/IStorage_fwd.h>
#include <Storages/TableLockHolder.h>
#include <Core/Block.h>
namespace DB
{
@ -13,7 +14,7 @@ template <bool or_null>
class ExecutableFunctionJoinGet final : public IExecutableFunctionImpl
{
public:
ExecutableFunctionJoinGet(HashJoinPtr join_, const Block & result_block_)
ExecutableFunctionJoinGet(HashJoinPtr join_, const DB::Block & result_block_)
: join(std::move(join_)), result_block(result_block_) {}
static constexpr auto name = or_null ? "joinGetOrNull" : "joinGet";
@ -28,7 +29,7 @@ public:
private:
HashJoinPtr join;
Block result_block;
DB::Block result_block;
};
template <bool or_null>

View File

@ -158,7 +158,7 @@ public:
#endif
}
void executeImpl(Block & block, const ColumnNumbers & arguments, size_t result, size_t input_rows_count) const override
void executeImpl(FunctionArguments & block, const ColumnNumbers & arguments, size_t result, size_t input_rows_count) const override
{
selector.selectAndExecute(block, arguments, result, input_rows_count);
}

View File

@ -35,7 +35,7 @@ public:
return 1;
}
bool isInjective(const Block &) const override
bool isInjective(const ColumnsWithTypeAndName &) const override
{
return is_injective;
}

View File

@ -117,7 +117,7 @@ public:
}
size_t getNumberOfArguments() const override { return 1; }
bool isInjective(const Block &) const override { return is_injective; }
bool isInjective(const ColumnsWithTypeAndName &) const override { return is_injective; }
bool useDefaultImplementationForConstants() const override { return true; }

View File

@ -72,7 +72,7 @@ public:
String getName() const override { return name; }
size_t getNumberOfArguments() const override { return 1; }
bool isInjective(const Block &) const override { return true; }
bool isInjective(const ColumnsWithTypeAndName &) const override { return true; }
DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override
{
@ -326,7 +326,7 @@ public:
}
size_t getNumberOfArguments() const override { return 1; }
bool isInjective(const Block &) const override { return mask_tail_octets == 0; }
bool isInjective(const ColumnsWithTypeAndName &) const override { return mask_tail_octets == 0; }
DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override
{
@ -447,7 +447,7 @@ public:
String getName() const override { return name; }
size_t getNumberOfArguments() const override { return 1; }
bool isInjective(const Block &) const override { return true; }
bool isInjective(const ColumnsWithTypeAndName &) const override { return true; }
DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override
{
@ -546,7 +546,7 @@ public:
}
size_t getNumberOfArguments() const override { return 1; }
bool isInjective(const Block &) const override { return true; }
bool isInjective(const ColumnsWithTypeAndName &) const override { return true; }
DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override
{
@ -739,7 +739,7 @@ public:
}
size_t getNumberOfArguments() const override { return 1; }
bool isInjective(const Block &) const override { return true; }
bool isInjective(const ColumnsWithTypeAndName &) const override { return true; }
DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override
{
@ -837,7 +837,7 @@ public:
}
size_t getNumberOfArguments() const override { return 1; }
bool isInjective(const Block &) const override { return true; }
bool isInjective(const ColumnsWithTypeAndName &) const override { return true; }
DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override
{
@ -941,7 +941,7 @@ public:
}
size_t getNumberOfArguments() const override { return 1; }
bool isInjective(const Block &) const override { return true; }
bool isInjective(const ColumnsWithTypeAndName &) const override { return true; }
DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override
{
@ -1224,7 +1224,7 @@ public:
}
size_t getNumberOfArguments() const override { return 1; }
bool isInjective(const Block &) const override { return true; }
bool isInjective(const ColumnsWithTypeAndName &) const override { return true; }
DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override
{
@ -1313,7 +1313,7 @@ public:
}
bool isVariadic() const override { return true; }
bool isInjective(const Block &) const override { return true; }
bool isInjective(const ColumnsWithTypeAndName &) const override { return true; }
size_t getNumberOfArguments() const override { return 0; }
DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override
@ -1408,7 +1408,7 @@ public:
}
size_t getNumberOfArguments() const override { return 1; }
bool isInjective(const Block &) const override { return true; }
bool isInjective(const ColumnsWithTypeAndName &) const override { return true; }
DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override
{

View File

@ -688,9 +688,9 @@ private:
using RightDataType = typename Types::RightType;
if (check_decimal_overflow)
DecimalComparison<LeftDataType, RightDataType, Op, true>(block, result, col_left, col_right);
DecimalComparison<LeftDataType, RightDataType, Op, true>(block.data, result, col_left, col_right);
else
DecimalComparison<LeftDataType, RightDataType, Op, false>(block, result, col_left, col_right);
DecimalComparison<LeftDataType, RightDataType, Op, false>(block.data, result, col_left, col_right);
return true;
};
@ -852,13 +852,14 @@ private:
{
auto column_converted = type_to_compare->createColumnConst(input_rows_count, converted);
Block tmp_block
ColumnsWithTypeAndName tmp_block_columns
{
{ left_const ? column_converted : col_left_untyped->getPtr(), type_to_compare, "" },
{ !left_const ? column_converted : col_right_untyped->getPtr(), type_to_compare, "" },
block.getByPosition(result)
};
FunctionArguments tmp_block(tmp_block_columns);
executeImpl(tmp_block, {0, 1}, 2, input_rows_count);
block.getByPosition(result).column = std::move(tmp_block.getByPosition(2).column);
@ -949,24 +950,24 @@ private:
ColumnsWithTypeAndName convolution_types(tuple_size);
Block tmp_block;
ColumnsWithTypeAndName tmp_block;
for (size_t i = 0; i < tuple_size; ++i)
{
tmp_block.insert(x[i]);
tmp_block.insert(y[i]);
tmp_block.emplace_back(x[i]);
tmp_block.emplace_back(y[i]);
auto impl = func_compare->build({x[i], y[i]});
convolution_types[i].type = impl->getReturnType();
/// Comparison of the elements.
tmp_block.insert({ nullptr, impl->getReturnType(), "" });
tmp_block.emplace_back(ColumnWithTypeAndName{ nullptr, impl->getReturnType(), "" });
impl->execute(tmp_block, {i * 3, i * 3 + 1}, i * 3 + 2, input_rows_count);
}
if (tuple_size == 1)
{
/// Do not call AND for single-element tuple.
block.getByPosition(result).column = tmp_block.getByPosition(2).column;
block.getByPosition(result).column = tmp_block[2].column;
return;
}
@ -977,10 +978,10 @@ private:
convolution_args[i] = i * 3 + 2;
auto impl = func_convolution->build(convolution_types);
tmp_block.insert({ nullptr, impl->getReturnType(), "" });
tmp_block.emplace_back(ColumnWithTypeAndName{ nullptr, impl->getReturnType(), "" });
impl->execute(tmp_block, convolution_args, tuple_size * 3, input_rows_count);
block.getByPosition(result).column = tmp_block.getByPosition(tuple_size * 3).column;
block.getByPosition(result).column = tmp_block[tuple_size * 3].column;
}
void executeTupleLessGreaterImpl(
@ -996,34 +997,34 @@ private:
size_t tuple_size,
size_t input_rows_count) const
{
Block tmp_block;
ColumnsWithTypeAndName tmp_block;
/// Pairwise comparison of the inequality of all elements; on the equality of all elements except the last.
/// (x[i], y[i], x[i] < y[i], x[i] == y[i])
for (size_t i = 0; i < tuple_size; ++i)
{
tmp_block.insert(x[i]);
tmp_block.insert(y[i]);
tmp_block.emplace_back(x[i]);
tmp_block.emplace_back(y[i]);
tmp_block.insert(ColumnWithTypeAndName()); // pos == i * 4 + 2
tmp_block.emplace_back(ColumnWithTypeAndName()); // pos == i * 4 + 2
if (i + 1 != tuple_size)
{
auto impl_head = func_compare_head->build({x[i], y[i]});
tmp_block.getByPosition(i * 4 + 2).type = impl_head->getReturnType();
tmp_block[i * 4 + 2].type = impl_head->getReturnType();
impl_head->execute(tmp_block, {i * 4, i * 4 + 1}, i * 4 + 2, input_rows_count);
tmp_block.insert(ColumnWithTypeAndName()); // i * 4 + 3
tmp_block.emplace_back(ColumnWithTypeAndName()); // i * 4 + 3
auto impl_equals = func_equals->build({x[i], y[i]});
tmp_block.getByPosition(i * 4 + 3).type = impl_equals->getReturnType();
tmp_block[i * 4 + 3].type = impl_equals->getReturnType();
impl_equals->execute(tmp_block, {i * 4, i * 4 + 1}, i * 4 + 3, input_rows_count);
}
else
{
auto impl_tail = func_compare_tail->build({x[i], y[i]});
tmp_block.getByPosition(i * 4 + 2).type = impl_tail->getReturnType();
tmp_block[i * 4 + 2].type = impl_tail->getReturnType();
impl_tail->execute(tmp_block, {i * 4, i * 4 + 1}, i * 4 + 2, input_rows_count);
}
}
@ -1039,31 +1040,31 @@ private:
{
--i;
size_t and_lhs_pos = tmp_block.columns() - 1; // res
size_t and_lhs_pos = tmp_block.size() - 1; // res
size_t and_rhs_pos = i * 4 + 3; // `x == y`[i]
tmp_block.insert(ColumnWithTypeAndName());
tmp_block.emplace_back(ColumnWithTypeAndName());
ColumnsWithTypeAndName and_args = {{ nullptr, tmp_block.getByPosition(and_lhs_pos).type, "" },
{ nullptr, tmp_block.getByPosition(and_rhs_pos).type, "" }};
ColumnsWithTypeAndName and_args = {{ nullptr, tmp_block[and_lhs_pos].type, "" },
{ nullptr, tmp_block[and_rhs_pos].type, "" }};
auto func_and_adaptor = func_and->build(and_args);
tmp_block.getByPosition(tmp_block.columns() - 1).type = func_and_adaptor->getReturnType();
func_and_adaptor->execute(tmp_block, {and_lhs_pos, and_rhs_pos}, tmp_block.columns() - 1, input_rows_count);
tmp_block[tmp_block.size() - 1].type = func_and_adaptor->getReturnType();
func_and_adaptor->execute(tmp_block, {and_lhs_pos, and_rhs_pos}, tmp_block.size() - 1, input_rows_count);
size_t or_lhs_pos = tmp_block.columns() - 1; // (res && `x == y`[i])
size_t or_lhs_pos = tmp_block.size() - 1; // (res && `x == y`[i])
size_t or_rhs_pos = i * 4 + 2; // `x < y`[i]
tmp_block.insert(ColumnWithTypeAndName());
tmp_block.emplace_back(ColumnWithTypeAndName());
ColumnsWithTypeAndName or_args = {{ nullptr, tmp_block.getByPosition(or_lhs_pos).type, "" },
{ nullptr, tmp_block.getByPosition(or_rhs_pos).type, "" }};
ColumnsWithTypeAndName or_args = {{ nullptr, tmp_block[or_lhs_pos].type, "" },
{ nullptr, tmp_block[or_rhs_pos].type, "" }};
auto func_or_adaptor = func_or->build(or_args);
tmp_block.getByPosition(tmp_block.columns() - 1).type = func_or_adaptor->getReturnType();
func_or_adaptor->execute(tmp_block, {or_lhs_pos, or_rhs_pos}, tmp_block.columns() - 1, input_rows_count);
tmp_block[tmp_block.size() - 1].type = func_or_adaptor->getReturnType();
func_or_adaptor->execute(tmp_block, {or_lhs_pos, or_rhs_pos}, tmp_block.size() - 1, input_rows_count);
}
block.getByPosition(result).column = tmp_block.getByPosition(tmp_block.columns() - 1).column;
block.getByPosition(result).column = tmp_block[tmp_block.size() - 1].column;
}
void executeGenericIdenticalTypes(Block & block, size_t result, const IColumn * c0, const IColumn * c1) const

View File

@ -101,7 +101,7 @@ struct ConvertImpl
using ToFieldType = typename ToDataType::FieldType;
template <typename Additions = void *>
static void NO_SANITIZE_UNDEFINED execute(Block & block, const ColumnNumbers & arguments, size_t result, size_t /*input_rows_count*/,
static void NO_SANITIZE_UNDEFINED execute(FunctionArguments & block, const ColumnNumbers & arguments, size_t result, size_t /*input_rows_count*/,
Additions additions [[maybe_unused]] = Additions())
{
const ColumnWithTypeAndName & named_from = block.getByPosition(arguments[0]);
@ -441,7 +441,7 @@ struct FormatImpl<DataTypeDecimal<FieldType>>
template <typename FieldType, typename Name>
struct ConvertImpl<DataTypeEnum<FieldType>, DataTypeNumber<FieldType>, Name>
{
static void execute(Block & block, const ColumnNumbers & arguments, size_t result, size_t /*input_rows_count*/)
static void execute(FunctionArguments & block, const ColumnNumbers & arguments, size_t result, size_t /*input_rows_count*/)
{
block.getByPosition(result).column = block.getByPosition(arguments[0]).column;
}
@ -454,7 +454,7 @@ struct ConvertImpl<FromDataType, std::enable_if_t<!std::is_same_v<FromDataType,
using FromFieldType = typename FromDataType::FieldType;
using ColVecType = std::conditional_t<IsDecimalNumber<FromFieldType>, ColumnDecimal<FromFieldType>, ColumnVector<FromFieldType>>;
static void execute(Block & block, const ColumnNumbers & arguments, size_t result, size_t /*input_rows_count*/)
static void execute(FunctionArguments & block, const ColumnNumbers & arguments, size_t result, size_t /*input_rows_count*/)
{
const auto & col_with_type_and_name = block.getByPosition(arguments[0]);
const auto & type = static_cast<const FromDataType &>(*col_with_type_and_name.type);
@ -463,7 +463,7 @@ struct ConvertImpl<FromDataType, std::enable_if_t<!std::is_same_v<FromDataType,
/// For argument of DateTime type, second argument with time zone could be specified.
if constexpr (std::is_same_v<FromDataType, DataTypeDateTime> || std::is_same_v<FromDataType, DataTypeDateTime64>)
time_zone = &extractTimeZoneFromFunctionArguments(block, arguments, 1, 0);
time_zone = &extractTimeZoneFromFunctionArguments(block.data, arguments, 1, 0);
if (const auto col_from = checkAndGetColumn<ColVecType>(col_with_type_and_name.column.get()))
{
@ -508,7 +508,7 @@ struct ConvertImpl<FromDataType, std::enable_if_t<!std::is_same_v<FromDataType,
/// Generic conversion of any type to String.
struct ConvertImplGenericToString
{
static void execute(Block & block, const ColumnNumbers & arguments, size_t result)
static void execute(FunctionArguments & block, const ColumnNumbers & arguments, size_t result)
{
const auto & col_with_type_and_name = block.getByPosition(arguments[0]);
const IDataType & type = *col_with_type_and_name.type;
@ -605,7 +605,7 @@ inline bool tryParseImpl<DataTypeDateTime>(DataTypeDateTime::FieldType & x, Read
/** Throw exception with verbose message when string value is not parsed completely.
*/
[[noreturn]] inline void throwExceptionForIncompletelyParsedValue(ReadBuffer & read_buffer, Block & block, size_t result)
[[noreturn]] inline void throwExceptionForIncompletelyParsedValue(ReadBuffer & read_buffer, FunctionArguments & block, size_t result)
{
const IDataType & to_type = *block.getByPosition(result).type;
@ -670,7 +670,7 @@ struct ConvertThroughParsing
}
template <typename Additions = void *>
static void execute(Block & block, const ColumnNumbers & arguments, size_t result, size_t input_rows_count,
static void execute(FunctionArguments & block, const ColumnNumbers & arguments, size_t result, size_t input_rows_count,
Additions additions [[maybe_unused]] = Additions())
{
using ColVecTo = typename ToDataType::ColumnType;
@ -687,7 +687,7 @@ struct ConvertThroughParsing
local_time_zone = &dt_col->getTimeZone();
else
{
local_time_zone = &extractTimeZoneFromFunctionArguments(block, arguments, 1, 0);
local_time_zone = &extractTimeZoneFromFunctionArguments(block.data, arguments, 1, 0);
}
if constexpr (parsing_mode == ConvertFromStringParsingMode::BestEffort || parsing_mode == ConvertFromStringParsingMode::BestEffortUS)
@ -865,7 +865,7 @@ struct ConvertImpl<std::enable_if_t<!std::is_same_v<ToDataType, DataTypeFixedStr
/// Generic conversion of any type from String. Used for complex types: Array and Tuple.
struct ConvertImplGenericFromString
{
static void execute(Block & block, const ColumnNumbers & arguments, size_t result)
static void execute(FunctionArguments & block, const ColumnNumbers & arguments, size_t result)
{
const IColumn & col_from = *block.getByPosition(arguments[0]).column;
size_t size = col_from.size();
@ -920,7 +920,7 @@ struct ConvertImpl<DataTypeString, DataTypeUInt32, NameToUnixTimestamp>
template <typename T, typename Name>
struct ConvertImpl<std::enable_if_t<!T::is_parametric, T>, T, Name>
{
static void execute(Block & block, const ColumnNumbers & arguments, size_t result, size_t /*input_rows_count*/)
static void execute(FunctionArguments & block, const ColumnNumbers & arguments, size_t result, size_t /*input_rows_count*/)
{
block.getByPosition(result).column = block.getByPosition(arguments[0]).column;
}
@ -933,7 +933,7 @@ struct ConvertImpl<std::enable_if_t<!T::is_parametric, T>, T, Name>
template <typename Name>
struct ConvertImpl<DataTypeFixedString, DataTypeString, Name>
{
static void execute(Block & block, const ColumnNumbers & arguments, size_t result, size_t /*input_rows_count*/)
static void execute(FunctionArguments & block, const ColumnNumbers & arguments, size_t result, size_t /*input_rows_count*/)
{
if (const ColumnFixedString * col_from = checkAndGetColumn<ColumnFixedString>(block.getByPosition(arguments[0]).column.get()))
{
@ -1048,7 +1048,7 @@ public:
bool isVariadic() const override { return true; }
size_t getNumberOfArguments() const override { return 0; }
bool isInjective(const Block &) const override { return std::is_same_v<Name, NameToString>; }
bool isInjective(const ColumnsWithTypeAndName &) const override { return std::is_same_v<Name, NameToString>; }
DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override
{
@ -1952,7 +1952,7 @@ private:
return [function_adaptor] (Block & block, const ColumnNumbers & arguments, const size_t result, size_t input_rows_count)
{
function_adaptor->execute(block, arguments, result, input_rows_count);
function_adaptor->execute(block.data, arguments, result, input_rows_count);
};
}
@ -1966,7 +1966,7 @@ private:
return [function_adaptor] (Block & block, const ColumnNumbers & arguments, const size_t result, size_t input_rows_count)
{
function_adaptor->execute(block, arguments, result, input_rows_count);
function_adaptor->execute(block.data, arguments, result, input_rows_count);
};
}
@ -1994,7 +1994,7 @@ private:
return [function_adaptor] (Block & block, const ColumnNumbers & arguments, const size_t result, size_t input_rows_count)
{
function_adaptor->execute(block, arguments, result, input_rows_count);
function_adaptor->execute(block.data, arguments, result, input_rows_count);
};
}
@ -2093,14 +2093,15 @@ private:
if (const ColumnArray * col_array = checkAndGetColumn<ColumnArray>(array_arg.column.get()))
{
/// create block for converting nested column containing original and result columns
Block nested_block
ColumnsWithTypeAndName nested_block_columns
{
{ col_array->getDataPtr(), from_nested_type, "" },
{ nullptr, to_nested_type, "" }
};
Block nested_block(nested_block_columns);
/// convert nested column
nested_function(nested_block, {0}, 1, nested_block.rows());
nested_function(nested_block, {0}, 1, nested_block_columns.front().column->size());
/// set converted nested column to result
block.getByPosition(result).column = ColumnArray::create(nested_block.getByPosition(1).column, col_array->getOffsetsPtr());
@ -2145,21 +2146,23 @@ private:
const auto col = block.getByPosition(arguments.front()).column.get();
/// copy tuple elements to a separate block
Block element_block;
ColumnsWithTypeAndName element_block_columns;
size_t tuple_size = from_element_types.size();
const ColumnTuple & column_tuple = typeid_cast<const ColumnTuple &>(*col);
/// create columns for source elements
for (size_t i = 0; i < tuple_size; ++i)
element_block.insert({ column_tuple.getColumns()[i], from_element_types[i], "" });
element_block_columns.emplace_back(ColumnWithTypeAndName{ column_tuple.getColumns()[i], from_element_types[i], "" });
/// create columns for converted elements
for (const auto & to_element_type : to_element_types)
element_block.insert({ nullptr, to_element_type, "" });
element_block_columns.emplace_back(ColumnWithTypeAndName{ nullptr, to_element_type, "" });
/// insert column for converted tuple
element_block.insert({ nullptr, std::make_shared<DataTypeTuple>(to_element_types), "" });
element_block_columns.emplace_back(ColumnWithTypeAndName{ nullptr, std::make_shared<DataTypeTuple>(to_element_types), "" });
FunctionArguments element_block(element_block_columns);
/// invoke conversion for each element
for (const auto idx_element_wrapper : ext::enumerate(element_wrappers))
@ -2197,7 +2200,7 @@ private:
return [func_or_adaptor] (Block & block, const ColumnNumbers & arguments, const size_t result, size_t input_rows_count)
{
func_or_adaptor->execute(block, arguments, result, input_rows_count);
func_or_adaptor->execute(block.data, arguments, result, input_rows_count);
};
}
else
@ -2422,22 +2425,24 @@ private:
const auto & nullable_type = static_cast<const DataTypeNullable &>(*ret_type);
const auto & nested_type = nullable_type.getNestedType();
Block tmp_block;
ColumnsWithTypeAndName tmp_block_columns;
if (source_is_nullable)
tmp_block = createBlockWithNestedColumns(block, arguments);
tmp_block_columns = createBlockWithNestedColumns(block.data, arguments);
else
tmp_block = block;
tmp_block_columns = block.data;
size_t tmp_res_index = block.columns();
tmp_block.insert({nullptr, nested_type, ""});
tmp_block_columns.emplace_back(ColumnWithTypeAndName {nullptr, nested_type, ""});
/// Add original ColumnNullable for createStringToEnumWrapper()
if (source_is_nullable)
{
if (arguments.size() != 1)
throw Exception("Invalid number of arguments", ErrorCodes::LOGICAL_ERROR);
tmp_block.insert(block.getByPosition(arguments.front()));
tmp_block_columns.emplace_back(block.getByPosition(arguments.front()));
}
FunctionArguments tmp_block(tmp_block_columns);
/// Perform the requested conversion.
wrapper(tmp_block, arguments, tmp_res_index, input_rows_count);
@ -2448,7 +2453,7 @@ private:
throw Exception("Couldn't convert " + block.getByPosition(arguments[0]).type->getName() + " to "
+ nested_type->getName() + " in " + " prepareRemoveNullable wrapper.", ErrorCodes::LOGICAL_ERROR);
res.column = wrapInNullable(tmp_res.column, Block({block.getByPosition(arguments[0]), tmp_res}), {0}, 1, input_rows_count);
res.column = wrapInNullable(tmp_res.column, {block.getByPosition(arguments[0]), tmp_res}, {0}, 1, input_rows_count);
};
}
else if (source_is_nullable)
@ -2457,7 +2462,7 @@ private:
return [wrapper, skip_not_null_check] (Block & block, const ColumnNumbers & arguments, const size_t result, size_t input_rows_count)
{
Block tmp_block = createBlockWithNestedColumns(block, arguments, result);
auto tmp_block_columns = createBlockWithNestedColumns(block.data, arguments, result);
/// Check that all values are not-NULL.
/// Check can be skipped in case if LowCardinality dictionary is transformed.
@ -2473,6 +2478,7 @@ private:
ErrorCodes::CANNOT_INSERT_NULL_IN_ORDINARY_COLUMN};
}
FunctionArguments tmp_block(tmp_block_columns);
wrapper(tmp_block, arguments, result, input_rows_count);
block.getByPosition(result).column = tmp_block.getByPosition(result).column;
};

View File

@ -593,7 +593,7 @@ public:
/// For the purpose of query optimization, we assume this function to be injective
/// even in face of fact that there are many different cities named Moscow.
bool isInjective(const Block &) const override { return true; }
bool isInjective(const ColumnsWithTypeAndName &) const override { return true; }
DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override
{

View File

@ -283,7 +283,7 @@ private:
bool useDefaultImplementationForConstants() const final { return true; }
ColumnNumbers getArgumentsThatAreAlwaysConstant() const final { return {0, 1}; }
bool isInjective(const Block & sample_block) const override
bool isInjective(const ColumnsWithTypeAndName & sample_block) const override
{
return helper.isDictGetFunctionInjective(sample_block);
}
@ -813,7 +813,7 @@ private:
bool useDefaultImplementationForConstants() const final { return true; }
ColumnNumbers getArgumentsThatAreAlwaysConstant() const final { return {0, 1}; }
bool isInjective(const Block & sample_block) const override
bool isInjective(const ColumnsWithTypeAndName & sample_block) const override
{
return helper.isDictGetFunctionInjective(sample_block);
}
@ -1385,7 +1385,7 @@ private:
bool useDefaultImplementationForConstants() const final { return true; }
ColumnNumbers getArgumentsThatAreAlwaysConstant() const final { return {0, 1}; }
bool isInjective(const Block & sample_block) const override
bool isInjective(const ColumnsWithTypeAndName & sample_block) const override
{
return helper.isDictGetFunctionInjective(sample_block);
}
@ -1533,7 +1533,7 @@ private:
bool useDefaultImplementationForConstants() const final { return true; }
ColumnNumbers getArgumentsThatAreAlwaysConstant() const final { return {0, 1}; }
bool isInjective(const Block & sample_block) const override
bool isInjective(const ColumnsWithTypeAndName & sample_block) const override
{
return helper.isDictGetFunctionInjective(sample_block);
}
@ -1672,7 +1672,7 @@ public:
private:
size_t getNumberOfArguments() const override { return 2; }
bool isInjective(const Block & /*sample_block*/) const override { return true; }
bool isInjective(const ColumnsWithTypeAndName & /*sample_block*/) const override { return true; }
bool useDefaultImplementationForConstants() const final { return true; }
ColumnNumbers getArgumentsThatAreAlwaysConstant() const final { return {0}; }

View File

@ -5,6 +5,7 @@ namespace DB
{
class ExternalModelsLoader;
class Context;
/// Evaluate external model.
/// First argument - model name, the others - model arguments.

View File

@ -42,7 +42,7 @@ public:
}
size_t getNumberOfArguments() const override { return 1; }
bool isInjective(const Block &) const override { return true; }
bool isInjective(const ColumnsWithTypeAndName &) const override { return true; }
DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override
{

View File

@ -689,7 +689,7 @@ public:
#endif
}
void executeImpl(Block & block, const ColumnNumbers & arguments, size_t result, size_t input_rows_count) const override
void executeImpl(FunctionArguments & block, const ColumnNumbers & arguments, size_t result, size_t input_rows_count) const override
{
selector.selectAndExecute(block, arguments, result, input_rows_count);
}
@ -1086,7 +1086,7 @@ public:
#endif
}
void executeImpl(Block & block, const ColumnNumbers & arguments, size_t result, size_t input_rows_count) const override
void executeImpl(FunctionArguments & block, const ColumnNumbers & arguments, size_t result, size_t input_rows_count) const override
{
selector.selectAndExecute(block, arguments, result, input_rows_count);
}

View File

@ -10,7 +10,7 @@ namespace ErrorCodes
}
std::vector<FunctionJSONHelpers::Move> FunctionJSONHelpers::prepareMoves(const char * function_name, Block & block, const ColumnNumbers & arguments, size_t first_index_argument, size_t num_index_arguments)
std::vector<FunctionJSONHelpers::Move> FunctionJSONHelpers::prepareMoves(const char * function_name, FunctionArguments & block, const ColumnNumbers & arguments, size_t first_index_argument, size_t num_index_arguments)
{
std::vector<Move> moves;
moves.reserve(num_index_arguments);

View File

@ -55,7 +55,7 @@ public:
class Executor
{
public:
static void run(Block & block, const ColumnNumbers & arguments, size_t result_pos, size_t input_rows_count)
static void run(FunctionArguments & block, const ColumnNumbers & arguments, size_t result_pos, size_t input_rows_count)
{
MutableColumnPtr to{block.getByPosition(result_pos).type->createColumn()};
to->reserve(input_rows_count);
@ -94,7 +94,7 @@ public:
Impl<JSONParser> impl;
/// prepare() does Impl-specific preparation before handling each row.
if constexpr (has_member_function_prepare<void (Impl<JSONParser>::*)(const char *, const Block &, const ColumnNumbers &, size_t)>::value)
if constexpr (has_member_function_prepare<void (Impl<JSONParser>::*)(const char *, const FunctionArguments &, const ColumnNumbers &, size_t)>::value)
impl.prepare(Name::name, block, arguments, result_pos);
using Element = typename JSONParser::Element;
@ -166,11 +166,11 @@ private:
String key;
};
static std::vector<Move> prepareMoves(const char * function_name, Block & block, const ColumnNumbers & arguments, size_t first_index_argument, size_t num_index_arguments);
static std::vector<Move> prepareMoves(const char * function_name, FunctionArguments & block, const ColumnNumbers & arguments, size_t first_index_argument, size_t num_index_arguments);
/// Performs moves of types MoveType::Index and MoveType::ConstIndex.
template <typename JSONParser>
static bool performMoves(const Block & block, const ColumnNumbers & arguments, size_t row,
static bool performMoves(const FunctionArguments & block, const ColumnNumbers & arguments, size_t row,
const typename JSONParser::Element & document, const std::vector<Move> & moves,
typename JSONParser::Element & element, std::string_view & last_key)
{
@ -279,7 +279,7 @@ public:
String getName() const override { return Name::name; }
bool isVariadic() const override { return true; }
size_t getNumberOfArguments() const override { return 0; }
bool useDefaultImplementationForConstants() const override { return false; }
bool useDefaultImplementationForConstants() const override { return true; }
DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override
{
@ -334,7 +334,7 @@ public:
static DataTypePtr getReturnType(const char *, const ColumnsWithTypeAndName &) { return std::make_shared<DataTypeUInt8>(); }
static size_t getNumberOfIndexArguments(const Block &, const ColumnNumbers & arguments) { return arguments.size() - 1; }
static size_t getNumberOfIndexArguments(const FunctionArguments &, const ColumnNumbers & arguments) { return arguments.size() - 1; }
static bool insertResultToColumn(IColumn & dest, const Element &, const std::string_view &)
{
@ -362,7 +362,7 @@ public:
return std::make_shared<DataTypeUInt8>();
}
static size_t getNumberOfIndexArguments(const Block &, const ColumnNumbers &) { return 0; }
static size_t getNumberOfIndexArguments(const FunctionArguments &, const ColumnNumbers &) { return 0; }
static bool insertResultToColumn(IColumn & dest, const Element &, const std::string_view &)
{
@ -386,7 +386,7 @@ public:
return std::make_shared<DataTypeUInt64>();
}
static size_t getNumberOfIndexArguments(const Block &, const ColumnNumbers & arguments) { return arguments.size() - 1; }
static size_t getNumberOfIndexArguments(const FunctionArguments &, const ColumnNumbers & arguments) { return arguments.size() - 1; }
static bool insertResultToColumn(IColumn & dest, const Element & element, const std::string_view &)
{
@ -416,7 +416,7 @@ public:
return std::make_shared<DataTypeString>();
}
static size_t getNumberOfIndexArguments(const Block &, const ColumnNumbers & arguments) { return arguments.size() - 1; }
static size_t getNumberOfIndexArguments(const FunctionArguments &, const ColumnNumbers & arguments) { return arguments.size() - 1; }
static bool insertResultToColumn(IColumn & dest, const Element &, const std::string_view & last_key)
{
@ -450,7 +450,7 @@ public:
return std::make_shared<DataTypeEnum<Int8>>(values);
}
static size_t getNumberOfIndexArguments(const Block &, const ColumnNumbers & arguments) { return arguments.size() - 1; }
static size_t getNumberOfIndexArguments(const FunctionArguments &, const ColumnNumbers & arguments) { return arguments.size() - 1; }
static bool insertResultToColumn(IColumn & dest, const Element & element, const std::string_view &)
{
@ -492,7 +492,7 @@ public:
return std::make_shared<DataTypeNumber<NumberType>>();
}
static size_t getNumberOfIndexArguments(const Block &, const ColumnNumbers & arguments) { return arguments.size() - 1; }
static size_t getNumberOfIndexArguments(const FunctionArguments &, const ColumnNumbers & arguments) { return arguments.size() - 1; }
static bool insertResultToColumn(IColumn & dest, const Element & element, const std::string_view &)
{
@ -557,7 +557,7 @@ public:
return std::make_shared<DataTypeUInt8>();
}
static size_t getNumberOfIndexArguments(const Block &, const ColumnNumbers & arguments) { return arguments.size() - 1; }
static size_t getNumberOfIndexArguments(const FunctionArguments &, const ColumnNumbers & arguments) { return arguments.size() - 1; }
static bool insertResultToColumn(IColumn & dest, const Element & element, const std::string_view &)
{
@ -582,7 +582,7 @@ public:
return std::make_shared<DataTypeString>();
}
static size_t getNumberOfIndexArguments(const Block &, const ColumnNumbers & arguments) { return arguments.size() - 1; }
static size_t getNumberOfIndexArguments(const FunctionArguments &, const ColumnNumbers & arguments) { return arguments.size() - 1; }
static bool insertResultToColumn(IColumn & dest, const Element & element, const std::string_view &)
{
@ -909,9 +909,9 @@ public:
return DataTypeFactory::instance().get(col_type_const->getValue<String>());
}
static size_t getNumberOfIndexArguments(const Block &, const ColumnNumbers & arguments) { return arguments.size() - 2; }
static size_t getNumberOfIndexArguments(const FunctionArguments &, const ColumnNumbers & arguments) { return arguments.size() - 2; }
void prepare(const char * function_name, const Block & block, const ColumnNumbers &, size_t result_pos)
void prepare(const char * function_name, const FunctionArguments & block, const ColumnNumbers &, size_t result_pos)
{
extract_tree = JSONExtractTree<JSONParser>::build(function_name, block.getByPosition(result_pos).type);
}
@ -950,9 +950,9 @@ public:
return std::make_unique<DataTypeArray>(tuple_type);
}
static size_t getNumberOfIndexArguments(const Block &, const ColumnNumbers & arguments) { return arguments.size() - 2; }
static size_t getNumberOfIndexArguments(const FunctionArguments &, const ColumnNumbers & arguments) { return arguments.size() - 2; }
void prepare(const char * function_name, const Block & block, const ColumnNumbers &, size_t result_pos)
void prepare(const char * function_name, const FunctionArguments & block, const ColumnNumbers &, size_t result_pos)
{
const auto & result_type = block.getByPosition(result_pos).type;
const auto tuple_type = typeid_cast<const DataTypeArray *>(result_type.get())->getNestedType();
@ -1002,7 +1002,7 @@ public:
return std::make_shared<DataTypeString>();
}
static size_t getNumberOfIndexArguments(const Block &, const ColumnNumbers & arguments) { return arguments.size() - 1; }
static size_t getNumberOfIndexArguments(const FunctionArguments &, const ColumnNumbers & arguments) { return arguments.size() - 1; }
static bool insertResultToColumn(IColumn & dest, const Element & element, const std::string_view &)
{
@ -1106,7 +1106,7 @@ public:
return std::make_shared<DataTypeArray>(std::make_shared<DataTypeString>());
}
static size_t getNumberOfIndexArguments(const Block &, const ColumnNumbers & arguments) { return arguments.size() - 1; }
static size_t getNumberOfIndexArguments(const FunctionArguments &, const ColumnNumbers & arguments) { return arguments.size() - 1; }
static bool insertResultToColumn(IColumn & dest, const Element & element, const std::string_view &)
{
@ -1138,7 +1138,7 @@ public:
return std::make_unique<DataTypeArray>(tuple_type);
}
static size_t getNumberOfIndexArguments(const Block &, const ColumnNumbers & arguments) { return arguments.size() - 1; }
static size_t getNumberOfIndexArguments(const FunctionArguments &, const ColumnNumbers & arguments) { return arguments.size() - 1; }
bool insertResultToColumn(IColumn & dest, const Element & element, const std::string_view &)
{

View File

@ -554,7 +554,7 @@ DataTypePtr FunctionUnaryLogical<Impl, Name>::getReturnTypeImpl(const DataTypes
}
template <template <typename> class Impl, typename T>
bool functionUnaryExecuteType(Block & block, const ColumnNumbers & arguments, size_t result)
bool functionUnaryExecuteType(FunctionArguments & block, const ColumnNumbers & arguments, size_t result)
{
if (auto col = checkAndGetColumn<ColumnVector<T>>(block.getByPosition(arguments[0]).column.get()))
{

View File

@ -37,7 +37,7 @@ public:
void execute(Block & block, const ColumnNumbers & arguments, size_t result, size_t /*input_rows_count*/) override
{
Block expr_block;
DB::Block expr_block;
for (size_t i = 0; i < arguments.size(); ++i)
{
const auto & argument = block.getByPosition(arguments[i]);

View File

@ -102,7 +102,7 @@ public:
#endif
}
void executeImpl(Block & block, const ColumnNumbers & arguments, size_t result, size_t input_rows_count) const override
void executeImpl(FunctionArguments & block, const ColumnNumbers & arguments, size_t result, size_t input_rows_count) const override
{
selector.selectAndExecute(block, arguments, result, input_rows_count);
}

View File

@ -458,7 +458,7 @@ class Dispatcher
FloatRoundingImpl<T, rounding_mode, scale_mode>,
IntegerRoundingImpl<T, rounding_mode, scale_mode, tie_breaking_mode>>;
static void apply(Block & block, const ColumnVector<T> * col, Int64 scale_arg, size_t result)
static void apply(FunctionArguments & block, const ColumnVector<T> * col, Int64 scale_arg, size_t result)
{
auto col_res = ColumnVector<T>::create();
@ -487,7 +487,7 @@ class Dispatcher
block.getByPosition(result).column = std::move(col_res);
}
static void apply(Block & block, const ColumnDecimal<T> * col, Int64 scale_arg, size_t result)
static void apply(FunctionArguments & block, const ColumnDecimal<T> * col, Int64 scale_arg, size_t result)
{
const typename ColumnDecimal<T>::Container & vec_src = col->getData();
@ -501,7 +501,7 @@ class Dispatcher
}
public:
static void apply(Block & block, const IColumn * column, Int64 scale_arg, size_t result)
static void apply(FunctionArguments & block, const IColumn * column, Int64 scale_arg, size_t result)
{
if constexpr (IsNumber<T>)
apply(block, checkAndGetColumn<ColumnVector<T>>(column), scale_arg, result);

View File

@ -75,7 +75,7 @@ public:
}
/// Initialize by the function arguments.
void init(Block & /*block*/, const ColumnNumbers & /*arguments*/) {}
void init(FunctionArguments & /*block*/, const ColumnNumbers & /*arguments*/) {}
/// Called for each next string.
void set(Pos pos_, Pos end_)
@ -136,7 +136,7 @@ public:
ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
}
void init(Block & block, const ColumnNumbers & arguments)
void init(FunctionArguments & block, const ColumnNumbers & arguments)
{
const ColumnConst * col = checkAndGetColumnConstStringOrFixedString(block.getByPosition(arguments[0]).column.get());
@ -204,7 +204,7 @@ public:
SplitByCharImpl::checkArguments(arguments);
}
void init(Block & block, const ColumnNumbers & arguments)
void init(FunctionArguments & block, const ColumnNumbers & arguments)
{
const ColumnConst * col = checkAndGetColumnConstStringOrFixedString(block.getByPosition(arguments[0]).column.get());
@ -284,7 +284,7 @@ public:
}
/// Initialize by the function arguments.
void init(Block & block, const ColumnNumbers & arguments)
void init(FunctionArguments & block, const ColumnNumbers & arguments)
{
const ColumnConst * col = checkAndGetColumnConstStringOrFixedString(block.getByPosition(arguments[1]).column.get());

View File

@ -5,12 +5,10 @@
#include <Common/LRUCache.h>
#include <Columns/ColumnConst.h>
#include <Columns/ColumnNullable.h>
#include <Columns/ColumnArray.h>
#include <Columns/ColumnTuple.h>
#include <Columns/ColumnLowCardinality.h>
#include <DataTypes/DataTypeNothing.h>
#include <DataTypes/DataTypeNullable.h>
#include <DataTypes/DataTypeTuple.h>
#include <DataTypes/Native.h>
#include <DataTypes/DataTypeLowCardinality.h>
#include <Functions/FunctionHelpers.h>
@ -106,7 +104,7 @@ void ExecutableFunctionAdaptor::createLowCardinalityResultCache(size_t cache_siz
}
ColumnPtr wrapInNullable(const ColumnPtr & src, const Block & block, const ColumnNumbers & args, size_t result, size_t input_rows_count)
ColumnPtr wrapInNullable(const ColumnPtr & src, const ColumnsWithTypeAndName & columns, const ColumnNumbers & args, size_t result, size_t input_rows_count)
{
ColumnPtr result_null_map_column;
@ -123,14 +121,14 @@ ColumnPtr wrapInNullable(const ColumnPtr & src, const Block & block, const Colum
for (const auto & arg : args)
{
const ColumnWithTypeAndName & elem = block.getByPosition(arg);
const ColumnWithTypeAndName & elem = columns[arg];
if (!elem.type->isNullable())
continue;
/// Const Nullable that are NULL.
if (elem.column->onlyNull())
{
auto result_type = block.getByPosition(result).type;
auto result_type = columns[result].type;
assert(result_type->isNullable());
return result_type->createColumnConstWithDefaultValue(input_rows_count);
}
@ -177,13 +175,13 @@ struct NullPresence
bool has_null_constant = false;
};
NullPresence getNullPresense(const Block & block, const ColumnNumbers & args)
NullPresence getNullPresense(const ColumnsWithTypeAndName & columns, const ColumnNumbers & args)
{
NullPresence res;
for (const auto & arg : args)
{
const auto & elem = block.getByPosition(arg);
const auto & elem = columns[arg];
if (!res.has_nullable)
res.has_nullable = elem.type->isNullable();
@ -209,44 +207,44 @@ NullPresence getNullPresense(const ColumnsWithTypeAndName & args)
return res;
}
bool allArgumentsAreConstants(const Block & block, const ColumnNumbers & args)
bool allArgumentsAreConstants(const ColumnsWithTypeAndName & columns, const ColumnNumbers & args)
{
for (auto arg : args)
if (!isColumnConst(*block.getByPosition(arg).column))
if (!isColumnConst(*columns[arg].column))
return false;
return true;
}
}
bool ExecutableFunctionAdaptor::defaultImplementationForConstantArguments(
Block & block, const ColumnNumbers & args, size_t result, size_t input_rows_count, bool dry_run)
ColumnsWithTypeAndName & columns, const ColumnNumbers & args, size_t result, size_t input_rows_count, bool dry_run)
{
ColumnNumbers arguments_to_remain_constants = impl->getArgumentsThatAreAlwaysConstant();
/// Check that these arguments are really constant.
for (auto arg_num : arguments_to_remain_constants)
if (arg_num < args.size() && !isColumnConst(*block.getByPosition(args[arg_num]).column))
if (arg_num < args.size() && !isColumnConst(*columns[args[arg_num]].column))
throw Exception("Argument at index " + toString(arg_num) + " for function " + getName() + " must be constant", ErrorCodes::ILLEGAL_COLUMN);
if (args.empty() || !impl->useDefaultImplementationForConstants() || !allArgumentsAreConstants(block, args))
if (args.empty() || !impl->useDefaultImplementationForConstants() || !allArgumentsAreConstants(columns, args))
return false;
Block temporary_block;
ColumnsWithTypeAndName temporary_columns;
bool have_converted_columns = false;
size_t arguments_size = args.size();
for (size_t arg_num = 0; arg_num < arguments_size; ++arg_num)
{
const ColumnWithTypeAndName & column = block.getByPosition(args[arg_num]);
const ColumnWithTypeAndName & column = columns[args[arg_num]];
if (arguments_to_remain_constants.end() != std::find(arguments_to_remain_constants.begin(), arguments_to_remain_constants.end(), arg_num))
{
temporary_block.insert({column.column->cloneResized(1), column.type, column.name});
temporary_columns.emplace_back(ColumnWithTypeAndName{column.column->cloneResized(1), column.type, column.name});
}
else
{
have_converted_columns = true;
temporary_block.insert({ assert_cast<const ColumnConst *>(column.column.get())->getDataColumnPtr(), column.type, column.name });
temporary_columns.emplace_back(ColumnWithTypeAndName{ assert_cast<const ColumnConst *>(column.column.get())->getDataColumnPtr(), column.type, column.name });
}
}
@ -257,39 +255,39 @@ bool ExecutableFunctionAdaptor::defaultImplementationForConstantArguments(
throw Exception("Number of arguments for function " + getName() + " doesn't match: the function requires more arguments",
ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH);
temporary_block.insert(block.getByPosition(result));
temporary_columns.emplace_back(columns[result]);
ColumnNumbers temporary_argument_numbers(arguments_size);
for (size_t i = 0; i < arguments_size; ++i)
temporary_argument_numbers[i] = i;
executeWithoutLowCardinalityColumns(temporary_block, temporary_argument_numbers, arguments_size, temporary_block.rows(), dry_run);
executeWithoutLowCardinalityColumns(temporary_columns, temporary_argument_numbers, arguments_size, 1, dry_run);
ColumnPtr result_column;
/// extremely rare case, when we have function with completely const arguments
/// but some of them produced by non isDeterministic function
if (temporary_block.getByPosition(arguments_size).column->size() > 1)
result_column = temporary_block.getByPosition(arguments_size).column->cloneResized(1);
if (temporary_columns[arguments_size].column->size() > 1)
result_column = temporary_columns[arguments_size].column->cloneResized(1);
else
result_column = temporary_block.getByPosition(arguments_size).column;
result_column = temporary_columns[arguments_size].column;
block.getByPosition(result).column = ColumnConst::create(result_column, input_rows_count);
columns[result].column = ColumnConst::create(result_column, input_rows_count);
return true;
}
bool ExecutableFunctionAdaptor::defaultImplementationForNulls(
Block & block, const ColumnNumbers & args, size_t result, size_t input_rows_count, bool dry_run)
ColumnsWithTypeAndName & columns, const ColumnNumbers & args, size_t result, size_t input_rows_count, bool dry_run)
{
if (args.empty() || !impl->useDefaultImplementationForNulls())
return false;
NullPresence null_presence = getNullPresense(block, args);
NullPresence null_presence = getNullPresense(columns, args);
if (null_presence.has_null_constant)
{
auto & result_column = block.getByPosition(result).column;
auto result_type = block.getByPosition(result).type;
auto & result_column = columns[result].column;
auto result_type = columns[result].type;
// Default implementation for nulls returns null result for null arguments,
// so the result type must be nullable.
assert(result_type->isNullable());
@ -300,10 +298,9 @@ bool ExecutableFunctionAdaptor::defaultImplementationForNulls(
if (null_presence.has_nullable)
{
Block temporary_block = createBlockWithNestedColumns(block, args, result);
executeWithoutLowCardinalityColumns(temporary_block, args, result, temporary_block.rows(), dry_run);
block.getByPosition(result).column = wrapInNullable(temporary_block.getByPosition(result).column, block, args,
result, input_rows_count);
ColumnsWithTypeAndName temporary_columns = createBlockWithNestedColumns(columns, args, result);
executeWithoutLowCardinalityColumns(temporary_columns, args, result, input_rows_count, dry_run);
columns[result].column = wrapInNullable(temporary_columns[result].column, columns, args, result, input_rows_count);
return true;
}
@ -311,27 +308,29 @@ bool ExecutableFunctionAdaptor::defaultImplementationForNulls(
}
void ExecutableFunctionAdaptor::executeWithoutLowCardinalityColumns(
Block & block, const ColumnNumbers & args, size_t result, size_t input_rows_count, bool dry_run)
ColumnsWithTypeAndName & columns, const ColumnNumbers & args, size_t result, size_t input_rows_count, bool dry_run)
{
if (defaultImplementationForConstantArguments(block, args, result, input_rows_count, dry_run))
if (defaultImplementationForConstantArguments(columns, args, result, input_rows_count, dry_run))
return;
if (defaultImplementationForNulls(block, args, result, input_rows_count, dry_run))
if (defaultImplementationForNulls(columns, args, result, input_rows_count, dry_run))
return;
FunctionArguments arguments(columns);
if (dry_run)
impl->executeDryRun(block, args, result, input_rows_count);
impl->executeDryRun(arguments, args, result, input_rows_count);
else
impl->execute(block, args, result, input_rows_count);
impl->execute(arguments, args, result, input_rows_count);
}
static const ColumnLowCardinality * findLowCardinalityArgument(const Block & block, const ColumnNumbers & args)
static const ColumnLowCardinality * findLowCardinalityArgument(const ColumnsWithTypeAndName & columns, const ColumnNumbers & args)
{
const ColumnLowCardinality * result_column = nullptr;
for (auto arg : args)
{
const ColumnWithTypeAndName & column = block.getByPosition(arg);
const ColumnWithTypeAndName & column = columns[arg];
if (const auto * low_cardinality_column = checkAndGetColumn<ColumnLowCardinality>(column.column.get()))
{
if (result_column)
@ -345,7 +344,7 @@ static const ColumnLowCardinality * findLowCardinalityArgument(const Block & blo
}
static ColumnPtr replaceLowCardinalityColumnsByNestedAndGetDictionaryIndexes(
Block & block, const ColumnNumbers & args, bool can_be_executed_on_default_arguments, size_t input_rows_count)
ColumnsWithTypeAndName & columns, const ColumnNumbers & args, bool can_be_executed_on_default_arguments, size_t input_rows_count)
{
size_t num_rows = input_rows_count;
ColumnPtr indexes;
@ -353,7 +352,7 @@ static ColumnPtr replaceLowCardinalityColumnsByNestedAndGetDictionaryIndexes(
/// Find first LowCardinality column and replace it to nested dictionary.
for (auto arg : args)
{
ColumnWithTypeAndName & column = block.getByPosition(arg);
ColumnWithTypeAndName & column = columns[arg];
if (const auto * low_cardinality_column = checkAndGetColumn<ColumnLowCardinality>(column.column.get()))
{
/// Single LowCardinality column is supported now.
@ -389,7 +388,7 @@ static ColumnPtr replaceLowCardinalityColumnsByNestedAndGetDictionaryIndexes(
/// Change size of constants.
for (auto arg : args)
{
ColumnWithTypeAndName & column = block.getByPosition(arg);
ColumnWithTypeAndName & column = columns[arg];
if (const auto * column_const = checkAndGetColumn<ColumnConst>(column.column.get()))
{
column.column = column_const->removeLowCardinality()->cloneResized(num_rows);
@ -397,37 +396,44 @@ static ColumnPtr replaceLowCardinalityColumnsByNestedAndGetDictionaryIndexes(
}
}
#ifndef NDEBUG
block.checkNumberOfRows(true);
#endif
return indexes;
}
static void convertLowCardinalityColumnsToFull(Block & block, const ColumnNumbers & args)
static void convertLowCardinalityColumnsToFull(ColumnsWithTypeAndName & columns, const ColumnNumbers & args)
{
for (auto arg : args)
{
ColumnWithTypeAndName & column = block.getByPosition(arg);
ColumnWithTypeAndName & column = columns[arg];
column.column = recursiveRemoveLowCardinality(column.column);
column.type = recursiveRemoveLowCardinality(column.type);
}
}
void ExecutableFunctionAdaptor::execute(Block & block, const ColumnNumbers & arguments, size_t result, size_t input_rows_count, bool dry_run)
static ColumnsWithTypeAndName cloneWithEmptyColumns(const ColumnsWithTypeAndName & columns)
{
ColumnsWithTypeAndName res;
size_t num_columns = columns.size();
for (size_t i = 0; i < num_columns; ++i)
res.emplace_back(ColumnWithTypeAndName{ nullptr, columns[i].type, columns[i].name });
return res;
}
void ExecutableFunctionAdaptor::execute(ColumnsWithTypeAndName & columns, const ColumnNumbers & arguments, size_t result, size_t input_rows_count, bool dry_run)
{
if (impl->useDefaultImplementationForLowCardinalityColumns())
{
auto & res = block.safeGetByPosition(result);
Block block_without_low_cardinality = block.cloneWithoutColumns();
auto & res = columns[result];
ColumnsWithTypeAndName columns_without_low_cardinality = cloneWithEmptyColumns(columns);
for (auto arg : arguments)
block_without_low_cardinality.safeGetByPosition(arg).column = block.safeGetByPosition(arg).column;
columns_without_low_cardinality[arg].column = columns[arg].column;
if (const auto * res_low_cardinality_type = typeid_cast<const DataTypeLowCardinality *>(res.type.get()))
{
const auto * low_cardinality_column = findLowCardinalityArgument(block, arguments);
const auto * low_cardinality_column = findLowCardinalityArgument(columns, arguments);
bool can_be_executed_on_default_arguments = impl->canBeExecutedOnDefaultArguments();
bool use_cache = low_cardinality_result_cache && can_be_executed_on_default_arguments
&& low_cardinality_column && low_cardinality_column->isSharedDictionary();
@ -447,13 +453,17 @@ void ExecutableFunctionAdaptor::execute(Block & block, const ColumnNumbers & arg
}
}
block_without_low_cardinality.safeGetByPosition(result).type = res_low_cardinality_type->getDictionaryType();
columns_without_low_cardinality[result].type = res_low_cardinality_type->getDictionaryType();
ColumnPtr indexes = replaceLowCardinalityColumnsByNestedAndGetDictionaryIndexes(
block_without_low_cardinality, arguments, can_be_executed_on_default_arguments, input_rows_count);
columns_without_low_cardinality, arguments, can_be_executed_on_default_arguments, input_rows_count);
executeWithoutLowCardinalityColumns(block_without_low_cardinality, arguments, result, block_without_low_cardinality.rows(), dry_run);
size_t new_input_rows_count = arguments.empty()
? input_rows_count
: columns_without_low_cardinality[arguments.front()].column->size();
auto keys = block_without_low_cardinality.safeGetByPosition(result).column->convertToFullColumnIfConst();
executeWithoutLowCardinalityColumns(columns_without_low_cardinality, arguments, result, new_input_rows_count, dry_run);
auto keys = columns_without_low_cardinality[result].column->convertToFullColumnIfConst();
auto res_mut_dictionary = DataTypeLowCardinality::createColumnUnique(*res_low_cardinality_type->getDictionaryType());
ColumnPtr res_indexes = res_mut_dictionary->uniqueInsertRangeFrom(*keys, 0, keys->size());
@ -482,13 +492,13 @@ void ExecutableFunctionAdaptor::execute(Block & block, const ColumnNumbers & arg
}
else
{
convertLowCardinalityColumnsToFull(block_without_low_cardinality, arguments);
executeWithoutLowCardinalityColumns(block_without_low_cardinality, arguments, result, input_rows_count, dry_run);
res.column = block_without_low_cardinality.safeGetByPosition(result).column;
convertLowCardinalityColumnsToFull(columns_without_low_cardinality, arguments);
executeWithoutLowCardinalityColumns(columns_without_low_cardinality, arguments, result, input_rows_count, dry_run);
res.column = columns_without_low_cardinality[result].column;
}
}
else
executeWithoutLowCardinalityColumns(block, arguments, result, input_rows_count, dry_run);
executeWithoutLowCardinalityColumns(columns, arguments, result, input_rows_count, dry_run);
}
void FunctionOverloadResolverAdaptor::checkNumberOfArguments(size_t number_of_arguments) const
@ -519,7 +529,7 @@ DataTypePtr FunctionOverloadResolverAdaptor::getReturnTypeWithoutLowCardinality(
if (null_presence.has_nullable)
{
Block nested_block = createBlockWithNestedColumns(
Block(arguments),
arguments,
ext::collection_cast<ColumnNumbers>(ext::range(0, arguments.size())));
auto return_type = impl->getReturnType(ColumnsWithTypeAndName(nested_block.begin(), nested_block.end()));
return makeNullable(return_type);

View File

@ -3,7 +3,7 @@
#include <memory>
#include <Core/Names.h>
#include <Core/Block.h>
#include <Core/ColumnsWithTypeAndName.h>
#include <Core/ColumnNumbers.h>
#include <DataTypes/IDataType.h>
@ -40,12 +40,13 @@ class Field;
class IExecutableFunction
{
public:
virtual ~IExecutableFunction() = default;
/// Get the main function name.
virtual String getName() const = 0;
virtual void execute(Block & block, const ColumnNumbers & arguments, size_t result, size_t input_rows_count, bool dry_run) = 0;
virtual void execute(ColumnsWithTypeAndName & columns, const ColumnNumbers & arguments, size_t result, size_t input_rows_count, bool dry_run) = 0;
virtual void createLowCardinalityResultCache(size_t cache_size) = 0;
};
@ -70,12 +71,12 @@ public:
/// Do preparations and return executable.
/// sample_block should contain data types of arguments and values of constants, if relevant.
virtual ExecutableFunctionPtr prepare(const Block & sample_block, const ColumnNumbers & arguments, size_t result) const = 0;
virtual ExecutableFunctionPtr prepare(const ColumnsWithTypeAndName & sample_block, const ColumnNumbers & arguments, size_t result) const = 0;
/// TODO: make const
virtual void execute(Block & block, const ColumnNumbers & arguments, size_t result, size_t input_rows_count, bool dry_run = false)
virtual void execute(ColumnsWithTypeAndName & columns, const ColumnNumbers & arguments, size_t result, size_t input_rows_count, bool dry_run = false)
{
return prepare(block, arguments, result)->execute(block, arguments, result, input_rows_count, dry_run);
return prepare(columns, arguments, result)->execute(columns, arguments, result, input_rows_count, dry_run);
}
#if USE_EMBEDDED_COMPILER
@ -110,7 +111,7 @@ public:
* There is no need to implement function if it has zero arguments.
* Must return ColumnConst with single row or nullptr.
*/
virtual ColumnPtr getResultIfAlwaysReturnsConstantAndHasArguments(const Block & /*block*/, const ColumnNumbers & /*arguments*/) const { return nullptr; }
virtual ColumnPtr getResultIfAlwaysReturnsConstantAndHasArguments(const ColumnsWithTypeAndName & /*block*/, const ColumnNumbers & /*arguments*/) const { return nullptr; }
/** Function is called "injective" if it returns different result for different values of arguments.
* Example: hex, negate, tuple...
@ -138,7 +139,7 @@ public:
* ignore it anyway, and creating arguments just for checking is
* function injective or not is overkill).
*/
virtual bool isInjective(const Block & /*sample_block*/) const { return false; }
virtual bool isInjective(const ColumnsWithTypeAndName & /*sample_block*/) const { return false; }
/** Function is called "deterministic", if it returns same result for same values of arguments.
* Most of functions are deterministic. Notable counterexample is rand().
@ -193,7 +194,7 @@ public:
/// See the comment for the same method in IFunctionBase
virtual bool isDeterministic() const = 0;
virtual bool isDeterministicInScopeOfQuery() const = 0;
virtual bool isInjective(const Block &) const = 0;
virtual bool isInjective(const ColumnsWithTypeAndName &) const = 0;
/// Override and return true if function needs to depend on the state of the data.
virtual bool isStateful() const = 0;
@ -228,6 +229,6 @@ using FunctionOverloadResolverPtr = std::shared_ptr<IFunctionOverloadResolver>;
/** Return ColumnNullable of src, with null map as OR-ed null maps of args columns in blocks.
* Or ColumnConst(ColumnNullable) if the result is always NULL or if the result is constant and always not NULL.
*/
ColumnPtr wrapInNullable(const ColumnPtr & src, const Block & block, const ColumnNumbers & args, size_t result, size_t input_rows_count);
ColumnPtr wrapInNullable(const ColumnPtr & src, const ColumnsWithTypeAndName & columns, const ColumnNumbers & args, size_t result, size_t input_rows_count);
}

View File

@ -14,7 +14,7 @@ public:
String getName() const final { return impl->getName(); }
void execute(Block & block, const ColumnNumbers & arguments, size_t result, size_t input_rows_count, bool dry_run) final;
void execute(ColumnsWithTypeAndName & columns, const ColumnNumbers & arguments, size_t result, size_t input_rows_count, bool dry_run) final;
void createLowCardinalityResultCache(size_t cache_size) override;
@ -25,13 +25,13 @@ private:
ExecutableFunctionLowCardinalityResultCachePtr low_cardinality_result_cache;
bool defaultImplementationForConstantArguments(
Block & block, const ColumnNumbers & args, size_t result, size_t input_rows_count, bool dry_run);
ColumnsWithTypeAndName & columns, const ColumnNumbers & args, size_t result, size_t input_rows_count, bool dry_run);
bool defaultImplementationForNulls(
Block & block, const ColumnNumbers & args, size_t result, size_t input_rows_count, bool dry_run);
ColumnsWithTypeAndName & columns, const ColumnNumbers & args, size_t result, size_t input_rows_count, bool dry_run);
void executeWithoutLowCardinalityColumns(
Block & block, const ColumnNumbers & args, size_t result, size_t input_rows_count, bool dry_run);
ColumnsWithTypeAndName & columns, const ColumnNumbers & args, size_t result, size_t input_rows_count, bool dry_run);
};
class FunctionBaseAdaptor final : public IFunctionBase
@ -44,9 +44,10 @@ public:
const DataTypes & getArgumentTypes() const final { return impl->getArgumentTypes(); }
const DataTypePtr & getReturnType() const final { return impl->getReturnType(); }
ExecutableFunctionPtr prepare(const Block & sample_block, const ColumnNumbers & arguments, size_t result) const final
ExecutableFunctionPtr prepare(const ColumnsWithTypeAndName & sample_block, const ColumnNumbers & arguments, size_t result) const final
{
return std::make_shared<ExecutableFunctionAdaptor>(impl->prepare(sample_block, arguments, result));
FunctionArguments columns(const_cast<ColumnsWithTypeAndName &>(sample_block));
return std::make_shared<ExecutableFunctionAdaptor>(impl->prepare(columns, arguments, result));
}
#if USE_EMBEDDED_COMPILER
@ -63,12 +64,12 @@ public:
bool isStateful() const final { return impl->isStateful(); }
bool isSuitableForConstantFolding() const final { return impl->isSuitableForConstantFolding(); }
ColumnPtr getResultIfAlwaysReturnsConstantAndHasArguments(const Block & block, const ColumnNumbers & arguments) const final
ColumnPtr getResultIfAlwaysReturnsConstantAndHasArguments(const ColumnsWithTypeAndName & columns, const ColumnNumbers & arguments) const final
{
return impl->getResultIfAlwaysReturnsConstantAndHasArguments(block, arguments);
return impl->getResultIfAlwaysReturnsConstantAndHasArguments(columns, arguments);
}
bool isInjective(const Block & sample_block) const final { return impl->isInjective(sample_block); }
bool isInjective(const ColumnsWithTypeAndName & sample_block) const final { return impl->isInjective(sample_block); }
bool isDeterministic() const final { return impl->isDeterministic(); }
bool isDeterministicInScopeOfQuery() const final { return impl->isDeterministicInScopeOfQuery(); }
bool hasInformationAboutMonotonicity() const final { return impl->hasInformationAboutMonotonicity(); }
@ -96,7 +97,7 @@ public:
bool isDeterministicInScopeOfQuery() const final { return impl->isDeterministicInScopeOfQuery(); }
bool isInjective(const Block & block) const final { return impl->isInjective(block); }
bool isInjective(const ColumnsWithTypeAndName & columns) const final { return impl->isInjective(columns); }
bool isStateful() const final { return impl->isStateful(); }
@ -190,14 +191,14 @@ public:
}
bool isSuitableForConstantFolding() const override { return function->isSuitableForConstantFolding(); }
ColumnPtr getResultIfAlwaysReturnsConstantAndHasArguments(const Block & block, const ColumnNumbers & arguments_) const override
ColumnPtr getResultIfAlwaysReturnsConstantAndHasArguments(const ColumnsWithTypeAndName & columns, const ColumnNumbers & arguments_) const override
{
return function->getResultIfAlwaysReturnsConstantAndHasArguments(block, arguments_);
return function->getResultIfAlwaysReturnsConstantAndHasArguments(columns, arguments_);
}
bool isStateful() const override { return function->isStateful(); }
bool isInjective(const Block & sample_block) const override { return function->isInjective(sample_block); }
bool isInjective(const ColumnsWithTypeAndName & sample_block) const override { return function->isInjective(sample_block); }
bool isDeterministic() const override { return function->isDeterministic(); }
@ -223,7 +224,7 @@ public:
bool isDeterministic() const override { return function->isDeterministic(); }
bool isDeterministicInScopeOfQuery() const override { return function->isDeterministicInScopeOfQuery(); }
bool isInjective(const Block &block) const override { return function->isInjective(block); }
bool isInjective(const ColumnsWithTypeAndName & columns) const override { return function->isInjective(columns); }
String getName() const override { return function->getName(); }
bool isStateful() const override { return function->isStateful(); }

View File

@ -22,8 +22,46 @@ namespace ErrorCodes
{
extern const int ILLEGAL_TYPE_OF_ARGUMENT;
extern const int NOT_IMPLEMENTED;
extern const int POSITION_OUT_OF_BOUND;
}
class FunctionArguments
{
public:
explicit FunctionArguments(ColumnsWithTypeAndName & arguments) : data(arguments) {}
const ColumnWithTypeAndName & getByPosition(size_t position) const { return data[position]; }
ColumnWithTypeAndName & getByPosition(size_t position) { return data[position]; }
ColumnWithTypeAndName & safeGetByPosition(size_t position)
{
checkPosition(position);
return data[position];
}
const ColumnWithTypeAndName & safeGetByPosition(size_t position) const
{
checkPosition(position);
return data[position];
}
size_t columns() const { return data.size(); }
const ColumnsWithTypeAndName & getColumnsWithTypeAndName() const { return data; }
ColumnsWithTypeAndName & data;
private:
void checkPosition(size_t position) const
{
if (data.empty())
throw Exception("Arguments are empty", ErrorCodes::POSITION_OUT_OF_BOUND);
if (position >= data.size())
throw Exception("Position " + std::to_string(position)
+ " is out of bound in FunctionArguments::safeGetByPosition(), max position = "
+ std::to_string(data.size() - 1), ErrorCodes::POSITION_OUT_OF_BOUND);
}
};
/// Cache for functions result if it was executed on low cardinality column.
class ExecutableFunctionLowCardinalityResultCache;
using ExecutableFunctionLowCardinalityResultCachePtr = std::shared_ptr<ExecutableFunctionLowCardinalityResultCache>;
@ -31,6 +69,8 @@ using ExecutableFunctionLowCardinalityResultCachePtr = std::shared_ptr<Executabl
class IExecutableFunctionImpl
{
public:
using Block = FunctionArguments;
virtual ~IExecutableFunctionImpl() = default;
virtual String getName() const = 0;
@ -82,6 +122,8 @@ using ExecutableFunctionImplPtr = std::unique_ptr<IExecutableFunctionImpl>;
class IFunctionBaseImpl
{
public:
using Block = FunctionArguments;
virtual ~IFunctionBaseImpl() = default;
virtual String getName() const = 0;
@ -105,9 +147,9 @@ public:
virtual bool isStateful() const { return false; }
virtual bool isSuitableForConstantFolding() const { return true; }
virtual ColumnPtr getResultIfAlwaysReturnsConstantAndHasArguments(const Block & /*block*/, const ColumnNumbers & /*arguments*/) const { return nullptr; }
virtual ColumnPtr getResultIfAlwaysReturnsConstantAndHasArguments(const ColumnsWithTypeAndName & /*columns*/, const ColumnNumbers & /*arguments*/) const { return nullptr; }
virtual bool isInjective(const Block & /*sample_block*/) const { return false; }
virtual bool isInjective(const ColumnsWithTypeAndName & /*sample_block*/) const { return false; }
virtual bool isDeterministic() const { return true; }
virtual bool isDeterministicInScopeOfQuery() const { return true; }
virtual bool hasInformationAboutMonotonicity() const { return false; }
@ -125,6 +167,8 @@ using FunctionBaseImplPtr = std::unique_ptr<IFunctionBaseImpl>;
class IFunctionOverloadResolverImpl
{
public:
using Block = FunctionArguments;
virtual ~IFunctionOverloadResolverImpl() = default;
virtual String getName() const = 0;
@ -152,7 +196,7 @@ public:
/// Properties from IFunctionOverloadResolver. See comments in IFunction.h
virtual bool isDeterministic() const { return true; }
virtual bool isDeterministicInScopeOfQuery() const { return true; }
virtual bool isInjective(const Block &) const { return false; }
virtual bool isInjective(const ColumnsWithTypeAndName &) const { return false; }
virtual bool isStateful() const { return false; }
virtual bool isVariadic() const { return false; }
@ -191,6 +235,8 @@ using FunctionOverloadResolverImplPtr = std::unique_ptr<IFunctionOverloadResolve
class IFunction
{
public:
using Block = FunctionArguments;
virtual ~IFunction() = default;
virtual String getName() const = 0;
@ -250,8 +296,8 @@ public:
/// Properties from IFunctionBase (see IFunction.h)
virtual bool isSuitableForConstantFolding() const { return true; }
virtual ColumnPtr getResultIfAlwaysReturnsConstantAndHasArguments(const Block & /*block*/, const ColumnNumbers & /*arguments*/) const { return nullptr; }
virtual bool isInjective(const Block & /*sample_block*/) const { return false; }
virtual ColumnPtr getResultIfAlwaysReturnsConstantAndHasArguments(const ColumnsWithTypeAndName & /*columns*/, const ColumnNumbers & /*arguments*/) const { return nullptr; }
virtual bool isInjective(const ColumnsWithTypeAndName & /*sample_block*/) const { return false; }
virtual bool isDeterministic() const { return true; }
virtual bool isDeterministicInScopeOfQuery() const { return true; }
virtual bool isStateful() const { return false; }

View File

@ -141,10 +141,7 @@ struct MatchImpl
{
size_t size = offsets.size();
constexpr int flags = case_insensitive ?
Regexps::Regexp::RE_CASELESS : 0;
auto regexp = Regexps::get<like, true>(pattern, flags);
auto regexp = Regexps::get<like, true, case_insensitive>(pattern);
std::string required_substring;
bool is_trivial;

View File

@ -204,7 +204,7 @@ public:
* If FunctionInterface is IFunction, then "executeImpl" method of the implementation will be called
* and "execute" otherwise.
*/
void selectAndExecute(Block & block, const ColumnNumbers & arguments, size_t result, size_t input_rows_count) const
void selectAndExecute(FunctionArguments & block, const ColumnNumbers & arguments, size_t result, size_t input_rows_count) const
{
if (implementations.empty())
throw Exception("There are no available implementations for function " "TODO(dakovalkov): add name",

View File

@ -58,21 +58,24 @@ namespace Regexps
* You must hold the ownership while using the object.
* In destructor, it returns the object back to the Pool for further reuse.
*/
template <bool like, bool no_capture>
inline Pool::Pointer get(const std::string & pattern, int flags = 0)
template <bool like, bool no_capture, bool case_insensitive = false>
inline Pool::Pointer get(const std::string & pattern)
{
/// C++11 has thread-safe function-local statics on most modern compilers.
static Pool known_regexps; /// Different variables for different pattern parameters.
return known_regexps.get(pattern, [flags, &pattern]
return known_regexps.get(pattern, [&pattern]
{
int flags_final = flags | OptimizedRegularExpression::RE_DOT_NL;
int flags = OptimizedRegularExpression::RE_DOT_NL;
if (no_capture)
flags_final |= OptimizedRegularExpression::RE_NO_CAPTURE;
flags |= OptimizedRegularExpression::RE_NO_CAPTURE;
if (case_insensitive)
flags |= Regexps::Regexp::RE_CASELESS;
ProfileEvents::increment(ProfileEvents::RegexpCreated);
return new Regexp{createRegexp<like>(pattern, flags_final)};
return new Regexp{createRegexp<like>(pattern, flags)};
});
}
}

View File

@ -29,7 +29,7 @@ public:
ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
}
void init(Block & /*block*/, const ColumnNumbers & /*arguments*/) {}
void init(FunctionArguments & /*block*/, const ColumnNumbers & /*arguments*/) {}
/// Returns the position of the argument that is the column of rows
static size_t getStringsArgumentPosition()

View File

@ -28,7 +28,7 @@ public:
ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
}
void init(Block & /*block*/, const ColumnNumbers & /*arguments*/) {}
void init(FunctionArguments & /*block*/, const ColumnNumbers & /*arguments*/) {}
/// Returns the position of the argument that is the column of rows
static size_t getStringsArgumentPosition()

Some files were not shown because too many files have changed in this diff Show More