Merge branch 'master' into i_object_storage

This commit is contained in:
alesapin 2022-05-22 12:16:10 +02:00
commit c8d92b87c8
69 changed files with 1221 additions and 180 deletions

4
.gitmodules vendored
View File

@ -268,3 +268,7 @@
[submodule "contrib/eigen"]
path = contrib/eigen
url = https://github.com/eigen-mirror/eigen
[submodule "contrib/hashidsxx"]
path = contrib/hashidsxx
url = https://github.com/schoentoon/hashidsxx.git

View File

@ -32,7 +32,8 @@ elseif (ARCH_AARCH64)
set (COMPILER_FLAGS "${COMPILER_FLAGS} -march=armv8-a+crc")
elseif (ARCH_PPC64LE)
set (COMPILER_FLAGS "${COMPILER_FLAGS} -maltivec -mcpu=power8 -DNO_WARN_X86_INTRINSICS")
# Note that gcc and clang have support for x86 SSE2 intrinsics when building for PowerPC
set (COMPILER_FLAGS "${COMPILER_FLAGS} -maltivec -mcpu=power8 -D__SSE2__=1 -DNO_WARN_X86_INTRINSICS")
elseif (ARCH_AMD64)
set (TEST_FLAG "-mssse3")

View File

@ -27,7 +27,7 @@ macro(clickhouse_strip_binary)
)
install(PROGRAMS ${STRIP_DESTINATION_DIR}/bin/${STRIP_TARGET} DESTINATION ${CMAKE_INSTALL_BINDIR} COMPONENT clickhouse)
install(FILES ${STRIP_DESTINATION_DIR}/lib/debug/bin/${STRIP_TARGET}.debug DESTINATION ${CMAKE_INSTALL_LIBDIR}/debug/${CMAKE_INSTALL_FULL_BINDIR}/${STRIP_TARGET}.debug COMPONENT clickhouse)
install(FILES ${STRIP_DESTINATION_DIR}/lib/debug/bin/${STRIP_TARGET}.debug DESTINATION ${CMAKE_INSTALL_LIBDIR}/debug/${CMAKE_INSTALL_FULL_BINDIR} COMPONENT clickhouse)
endmacro()

View File

@ -140,6 +140,7 @@ add_contrib (libpq-cmake libpq)
add_contrib (nuraft-cmake NuRaft)
add_contrib (fast_float-cmake fast_float)
add_contrib (datasketches-cpp-cmake datasketches-cpp)
add_contrib (hashidsxx-cmake hashidsxx)
option(ENABLE_NLP "Enable NLP functions support" ${ENABLE_LIBRARIES})
if (ENABLE_NLP)

View File

@ -2,22 +2,15 @@ set(EIGEN_LIBRARY_DIR "${ClickHouse_SOURCE_DIR}/contrib/eigen")
add_library (_eigen INTERFACE)
option (ENABLE_MKL "Build Eigen with Intel MKL" OFF)
if (ENABLE_MKL)
set(MKL_THREADING sequential)
set(MKL_INTERFACE lp64)
find_package(MKL REQUIRED)
if (MKL_FOUND)
message("MKL INCLUDE: ${MKL_INCLUDE}")
message("MKL LIBRARIES: ${MKL_LIBRARIES}")
target_compile_definitions(_eigen INTERFACE EIGEN_USE_MKL_ALL)
target_include_directories(_eigen INTERFACE ${MKL_INCLUDE})
target_link_libraries(_eigen INTERFACE ${MKL_LIBRARIES})
endif()
endif()
# Only include MPL2 code from Eigen library
target_compile_definitions(_eigen INTERFACE EIGEN_MPL2_ONLY)
# Clang by default mimics gcc 4.2.1 compatibility but Eigen checks __GNUC__ version to enable
# a workaround for bug https://gcc.gnu.org/bugzilla/show_bug.cgi?id=72867 fixed in 6.3
# So we fake gcc > 6.3 when building with clang
if (COMPILER_CLANG AND ARCH_PPC64LE)
target_compile_options(_eigen INTERFACE -fgnuc-version=6.4)
endif()
target_include_directories (_eigen SYSTEM INTERFACE ${EIGEN_LIBRARY_DIR})
add_library(ch_contrib::eigen ALIAS _eigen)

1
contrib/hashidsxx vendored Submodule

@ -0,0 +1 @@
Subproject commit 783f6911ccfdaca83e3cfac084c4aad888a80cee

View File

@ -0,0 +1,14 @@
set (LIBRARY_DIR "${ClickHouse_SOURCE_DIR}/contrib/hashidsxx")
set (SRCS
"${LIBRARY_DIR}/hashids.cpp"
)
set (HDRS
"${LIBRARY_DIR}/hashids.h"
)
add_library(_hashidsxx ${SRCS} ${HDRS})
target_include_directories(_hashidsxx SYSTEM PUBLIC "${LIBRARY_DIR}")
add_library(ch_contrib::hashidsxx ALIAS _hashidsxx)

View File

@ -178,6 +178,7 @@ function clone_submodules
contrib/replxx
contrib/wyhash
contrib/eigen
contrib/hashidsxx
)
git submodule sync

View File

@ -12,11 +12,13 @@ Columns:
- `name` ([String](../../sql-reference/data-types/string.md)) — Table name.
- `uuid` ([UUID](../../sql-reference/data-types/uuid.md)) — Table uuid (Atomic database).
- `engine` ([String](../../sql-reference/data-types/string.md)) — Table engine name (without parameters).
- `is_temporary` ([UInt8](../../sql-reference/data-types/int-uint.md)) - Flag that indicates whether the table is temporary.
- `data_path` ([String](../../sql-reference/data-types/string.md)) - Path to the table data in the file system.
- `data_paths` ([Array](../../sql-reference/data-types/array.md)([String](../../sql-reference/data-types/string.md))) - Paths to the table data in the file systems.
- `metadata_path` ([String](../../sql-reference/data-types/string.md)) - Path to the table metadata in the file system.
@ -60,6 +62,14 @@ Columns:
- `has_own_data` ([UInt8](../../sql-reference/data-types/int-uint.md)) — Flag that indicates whether the table itself stores some data on disk or only accesses some other source.
- `loading_dependencies_database` ([Array](../../sql-reference/data-types/array.md)([String](../../sql-reference/data-types/string.md))) - Database loading dependencies (list of objects which should be loaded before the current object).
- `loading_dependencies_table` ([Array](../../sql-reference/data-types/array.md)([String](../../sql-reference/data-types/string.md))) - Table loading dependencies (list of objects which should be loaded before the current object).
- `loading_dependent_database` ([Array](../../sql-reference/data-types/array.md)([String](../../sql-reference/data-types/string.md))) - Dependent loading database.
- `loading_dependent_table` ([Array](../../sql-reference/data-types/array.md)([String](../../sql-reference/data-types/string.md))) - Dependent loading table.
The `system.tables` table is used in `SHOW TABLES` query implementation.
**Example**
@ -95,6 +105,10 @@ lifetime_rows: ᴺᵁᴸᴸ
lifetime_bytes: ᴺᵁᴸᴸ
comment:
has_own_data: 0
loading_dependencies_database: []
loading_dependencies_table: []
loading_dependent_database: []
loading_dependent_table: []
Row 2:
──────
@ -122,4 +136,8 @@ lifetime_rows: ᴺᵁᴸᴸ
lifetime_bytes: ᴺᵁᴸᴸ
comment:
has_own_data: 0
loading_dependencies_database: []
loading_dependencies_table: []
loading_dependent_database: []
loading_dependent_table: []
```

View File

@ -29,12 +29,14 @@ There are multiple ways of user identification:
- `IDENTIFIED WITH no_password`
- `IDENTIFIED WITH plaintext_password BY 'qwerty'`
- `IDENTIFIED WITH sha256_password BY 'qwerty'` or `IDENTIFIED BY 'password'`
- `IDENTIFIED WITH sha256_hash BY 'hash'`
- `IDENTIFIED WITH sha256_hash BY 'hash'` or `IDENTIFIED WITH sha256_hash BY 'hash' SALT 'salt'`
- `IDENTIFIED WITH double_sha1_password BY 'qwerty'`
- `IDENTIFIED WITH double_sha1_hash BY 'hash'`
- `IDENTIFIED WITH ldap SERVER 'server_name'`
- `IDENTIFIED WITH kerberos` or `IDENTIFIED WITH kerberos REALM 'realm'`
For identification with sha256_hash using `SALT` - hash must be calculated from concatination of 'password' and 'salt'.
## User Host {#user-host}
User host is a host from which a connection to ClickHouse server could be established. The host can be specified in the `HOST` query section in the following ways:

View File

@ -12,11 +12,13 @@
- `name` ([String](../../sql-reference/data-types/string.md)) — имя таблицы.
- `uuid` ([UUID](../../sql-reference/data-types/uuid.md)) — Uuid таблицы (Atomic database).
- `engine` ([String](../../sql-reference/data-types/string.md)) — движок таблицы (без параметров).
- `is_temporary` ([UInt8](../../sql-reference/data-types/int-uint.md)) — флаг, указывающий на то, временная это таблица или нет.
- `data_path` ([String](../../sql-reference/data-types/string.md)) — путь к данным таблицы в файловой системе.
- `data_paths` ([Array](../../sql-reference/data-types/array.md)([String](../../sql-reference/data-types/string.md))) — пути к данным таблицы в файловых системах.
- `metadata_path` ([String](../../sql-reference/data-types/string.md)) — путь к табличным метаданным в файловой системе.
@ -60,6 +62,14 @@
- `has_own_data` ([UInt8](../../sql-reference/data-types/int-uint.md)) — флаг, показывающий хранит ли таблица сама какие-то данные на диске или только обращается к какому-то другому источнику.
- `loading_dependencies_database` ([Array](../../sql-reference/data-types/array.md)([String](../../sql-reference/data-types/string.md))) - базы данных необходимые для загрузки объекта.
- `loading_dependencies_table` ([Array](../../sql-reference/data-types/array.md)([String](../../sql-reference/data-types/string.md))) - таблицы необходимые для загрузки объекта.
- `loading_dependent_database` ([Array](../../sql-reference/data-types/array.md)([String](../../sql-reference/data-types/string.md))) - базы данных, которым объект необходим для загрузки.
- `loading_dependent_table` ([Array](../../sql-reference/data-types/array.md)([String](../../sql-reference/data-types/string.md))) - таблицы, которым объект необходим для загрузки.
Таблица `system.tables` используется при выполнении запроса `SHOW TABLES`.
**Пример**
@ -95,6 +105,10 @@ lifetime_rows: ᴺᵁᴸᴸ
lifetime_bytes: ᴺᵁᴸᴸ
comment:
has_own_data: 0
loading_dependencies_database: []
loading_dependencies_table: []
loading_dependent_database: []
loading_dependent_table: []
Row 2:
──────
@ -122,4 +136,8 @@ lifetime_rows: ᴺᵁᴸᴸ
lifetime_bytes: ᴺᵁᴸᴸ
comment:
has_own_data: 0
loading_dependencies_database: []
loading_dependencies_table: []
loading_dependent_database: []
loading_dependent_table: []
```

View File

@ -29,12 +29,14 @@ CREATE USER [IF NOT EXISTS | OR REPLACE] name1 [ON CLUSTER cluster_name1]
- `IDENTIFIED WITH no_password`
- `IDENTIFIED WITH plaintext_password BY 'qwerty'`
- `IDENTIFIED WITH sha256_password BY 'qwerty'` or `IDENTIFIED BY 'password'`
- `IDENTIFIED WITH sha256_hash BY 'hash'`
- `IDENTIFIED WITH sha256_hash BY 'hash'` or `IDENTIFIED WITH sha256_hash BY 'hash' SALT 'salt'`
- `IDENTIFIED WITH double_sha1_password BY 'qwerty'`
- `IDENTIFIED WITH double_sha1_hash BY 'hash'`
- `IDENTIFIED WITH ldap SERVER 'server_name'`
- `IDENTIFIED WITH kerberos` or `IDENTIFIED WITH kerberos REALM 'realm'`
Для идентификации с sha256_hash используя `SALT` - хэш должен быть вычислен от конкатенации 'password' и 'salt'.
## Пользовательский хост
Пользовательский хост — это хост, с которого можно установить соединение с сервером ClickHouse. Хост задается в секции `HOST` следующими способами:

View File

@ -68,12 +68,306 @@ SELECT char(0xE4, 0xBD, 0xA0, 0xE5, 0xA5, 0xBD) AS hello;
## hex {#hex}
接受`String``unsigned integer``Date`或`DateTime`类型的参数。返回包含参数的十六进制表示的字符串。使用大写字母`A-F`。不使用`0x`前缀或`h`后缀。对于字符串,所有字节都简单地编码为两个十六进制数字。数字转换为大端(«易阅读»)格式。对于数字,去除其中较旧的零,但仅限整个字节。例如,`hex1='01'`。 `Date`被编码为自Unix时间开始以来的天数。 `DateTime`编码为自Unix时间开始以来的秒数。
返回包含参数的十六进制表示的字符串。
## unhex(str) {#unhexstr}
别名为: `HEX`
接受包含任意数量的十六进制数字的字符串并返回包含相应字节的字符串。支持大写和小写字母A-F。十六进制数字的数量不必是偶数。如果是奇数则最后一位数被解释为00-0F字节的低位。如果参数字符串包含除十六进制数字以外的任何内容则返回一些实现定义的结果不抛出异常
如果要将结果转换为数字可以使用«reverse»和«reinterpretAsType»函数。
**语法**
``` sql
hex(arg)
```
该函数使用大写字母`A-F`,不使用任何前缀(如`0x`)或后缀(如`h`
对于整数参数,它从高到低(大端或“人类可读”顺序)打印十六进制数字(“半字节”)。它从左侧第一个非零字节开始(省略前导零字节),但即使前导数字为零,也始终打印每个字节的两个数字。
类型为[Date](../../sql-reference/data-types/date.md)和[DateTime](../../sql-reference/data-types/datetime.md)的值将被格式化为相应的整数(日期为 Epoch 以来的天数DateTime 为 Unix Timestamp 的值)。
对于[String](../../sql-reference/data-types/string.md)和[FixedString](../../sql-reference/data-types/fixedstring.md),所有字节都被简单地编码为两个十六进制数字。零字节不会被省略。
类型为[Float](../../sql-reference/data-types/float.md)和[Decimal](../../sql-reference/data-types/decimal.md)的值被编码为它们在内存中的表示。由于我们支持小端架构,它们以小端编码。零前导尾随字节不会被省略。
类型为[UUID](../data-types/uuid.md)的值被编码为大端顺序字符串。
**参数**
- `arg` — 要转换为十六进制的值。类型为[String](../../sql-reference/data-types/string.md)[UInt](../../sql-reference/data-types/int-uint.md)[Float](../../sql-reference/data-types/float.md)[Decimal](../../sql-reference/data-types/decimal.md)[Date](../../sql-reference/data-types/date.md)或者[DateTime](../../sql-reference/data-types/datetime.md)。
**返回值**
- 具有参数的十六进制表示的字符串。
类型为:[String](../../sql-reference/data-types/string.md)。
**示例**
查询语句:
``` sql
SELECT hex(1);
```
结果:
``` text
01
```
查询语句:
``` sql
SELECT hex(toFloat32(number)) AS hex_presentation FROM numbers(15, 2);
```
结果:
``` text
┌─hex_presentation─┐
│ 00007041 │
│ 00008041 │
└──────────────────┘
```
查询语句:
``` sql
SELECT hex(toFloat64(number)) AS hex_presentation FROM numbers(15, 2);
```
结果:
``` text
┌─hex_presentation─┐
│ 0000000000002E40 │
│ 0000000000003040 │
└──────────────────┘
```
查询语句:
``` sql
SELECT lower(hex(toUUID('61f0c404-5cb3-11e7-907b-a6006ad3dba0'))) as uuid_hex
```
结果:
``` text
┌─uuid_hex─────────────────────────┐
│ 61f0c4045cb311e7907ba6006ad3dba0 │
└──────────────────────────────────┘
```
## unhex {#unhexstr}
执行[hex](#hex)函数的相反操作。它将每对十六进制数字(在参数中)解释为一个数字,并将其转换为该数字表示的字节。返回值是一个二进制字符串 (BLOB)。
如果要将结果转换为数字,可以使用 [reverse](../../sql-reference/functions/string-functions.md#reverse) 和 [reinterpretAs<Type>](../../sql-reference/functions/type-conversion-functions.md#type-conversion-functions) 函数。
:::注意
如果从 `clickhouse-client` 中调用 `unhex`,二进制字符串将使用 UTF-8 显示。
:::
别名为:`UNHEX`。
**语法**
``` sql
unhex(arg)
```
**参数**
- `arg` — 包含任意数量的十六进制数字的字符串。类型为:[String](../../sql-reference/data-types/string.md)。
支持大写和小写字母A-F。十六进制数字的数量不必是偶数。如果是奇数则最后一位数被解释为00-0F字节的低位。如果参数字符串包含除十六进制数字以外的任何内容则返回一些实现定义的结果不抛出异常。对于数字参数 unhex()不执行 hex(N) 的倒数。
**返回值**
- 二进制字符串 (BLOB)。
类型为: [String](../../sql-reference/data-types/string.md)。
**示例**
查询语句:
``` sql
SELECT unhex('303132'), UNHEX('4D7953514C');
```
结果:
``` text
┌─unhex('303132')─┬─unhex('4D7953514C')─┐
│ 012 │ MySQL │
└─────────────────┴─────────────────────┘
```
查询语句:
``` sql
SELECT reinterpretAsUInt64(reverse(unhex('FFF'))) AS num;
```
结果:
``` text
┌──num─┐
│ 4095 │
└──────┘
```
## bin {#bin}
返回一个包含参数二进制表示的字符串。
**语法**
``` sql
bin(arg)
```
别名为: `BIN`
对于整数参数,它从最高有效到最低有效(大端或“人类可读”顺序)打印 bin 数字。它从最重要的非零字节开始(省略前导零字节),但如果前导数字为零,则始终打印每个字节的八位数字。
类型为[Date](../../sql-reference/data-types/date.md)和[DateTime](../../sql-reference/data-types/datetime.md)的值被格式化为相应的整数(`Date` 为 Epoch 以来的天数,`DateTime` 为 Unix Timestamp 的值)。
对于[String](../../sql-reference/data-types/string.md)和[FixedString](../../sql-reference/data-types/fixedstring.md),所有字节都被简单地编码为八个二进制数。零字节不会被省略。
类型为[Float](../../sql-reference/data-types/float.md)和[Decimal](../../sql-reference/data-types/decimal.md)的值被编码为它们在内存中的表示。由于我们支持小端架构,它们以小端编码。零前导尾随字节不会被省略。
类型为[UUID](../data-types/uuid.md)的值被编码为大端顺序字符串。
**参数**
- `arg` — 要转换为二进制的值。类型为[String](../../sql-reference/data-types/string.md)[FixedString](../../sql-reference/data-types/fixedstring.md)[UInt](../../sql-reference/data-types/int-uint.md)[Float](../../sql-reference/data-types/float.md)[Decimal](../../sql-reference/data-types/decimal.md)[Date](../../sql-reference/data-types/date.md)或者[DateTime](../../sql-reference/data-types/datetime.md)。
**返回值**
- 具有参数的二进制表示的字符串。
类型为: [String](../../sql-reference/data-types/string.md)。
**示例**
查询语句:
``` sql
SELECT bin(14);
```
结果:
``` text
┌─bin(14)──┐
│ 00001110 │
└──────────┘
```
查询语句:
``` sql
SELECT bin(toFloat32(number)) AS bin_presentation FROM numbers(15, 2);
```
结果:
``` text
┌─bin_presentation─────────────────┐
│ 00000000000000000111000001000001 │
│ 00000000000000001000000001000001 │
└──────────────────────────────────┘
```
查询语句:
``` sql
SELECT bin(toFloat64(number)) AS bin_presentation FROM numbers(15, 2);
```
结果:
``` text
┌─bin_presentation─────────────────────────────────────────────────┐
│ 0000000000000000000000000000000000000000000000000010111001000000 │
│ 0000000000000000000000000000000000000000000000000011000001000000 │
└──────────────────────────────────────────────────────────────────┘
```
查询语句:
``` sql
SELECT bin(toUUID('61f0c404-5cb3-11e7-907b-a6006ad3dba0')) as bin_uuid
```
结果:
``` text
┌─bin_uuid─────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┐
│ 01100001111100001100010000000100010111001011001100010001111001111001000001111011101001100000000001101010110100111101101110100000 │
└──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┘
```
## unbin {#unbinstr}
将每对二进制数字(在参数中)解释为一个数字,并将其转换为该数字表示的字节。这些函数执行与 [bin](#bin) 相反的操作。
**语法**
``` sql
unbin(arg)
```
别名为: `UNBIN`
对于数字参数,`unbin()` 不会返回 `bin()` 的倒数。如果要将结果转换为数字,可以使用[reverse](../../sql-reference/functions/string-functions.md#reverse) 和 [reinterpretAs<Type>](../../sql-reference/functions/type-conversion-functions.md#reinterpretasuint8163264) 函数。
:::note
如果从 `clickhouse-client` 中调用 `unbin`,则使用 UTF-8 显示二进制字符串。
:::
支持二进制数字`0`和`1`。二进制位数不必是八的倍数。如果参数字符串包含二进制数字以外的任何内容,则返回一些实现定义的结果(不抛出异常)。
**参数**
- `arg` — 包含任意数量的二进制数字的字符串。类型为[String](../../sql-reference/data-types/string.md)。
**返回值**
- 二进制字符串 (BLOB)。
类型为:[String](../../sql-reference/data-types/string.md)。
**示例**
查询语句:
``` sql
SELECT UNBIN('001100000011000100110010'), UNBIN('0100110101111001010100110101000101001100');
```
结果:
``` text
┌─unbin('001100000011000100110010')─┬─unbin('0100110101111001010100110101000101001100')─┐
│ 012 │ MySQL │
└───────────────────────────────────┴───────────────────────────────────────────────────┘
```
查询语句:
``` sql
SELECT reinterpretAsUInt64(reverse(unbin('1110'))) AS num;
```
结果:
``` text
┌─num─┐
│ 14 │
└─────┘
```
## UUIDStringToNum(str) {#uuidstringtonumstr}
@ -91,4 +385,55 @@ SELECT char(0xE4, 0xBD, 0xA0, 0xE5, 0xA5, 0xBD) AS hello;
接受一个整数。返回一个UInt64类型数组其中包含一组2的幂列表其列表中的所有值相加等于这个整数。数组中的数字按升序排列。
## bitPositionsToArray(num) {#bitpositionstoarraynum}
接受整数并将其转换为无符号整数。返回一个 `UInt64` 数字数组,其中包含 `arg` 中等于 `1` 的位的位置列表,按升序排列。
**语法**
```sql
bitPositionsToArray(arg)
```
**参数**
- `arg` — 整数值。类型为[Int/UInt](../../sql-reference/data-types/int-uint.md)。
**返回值**
- 包含等于 `1` 的位位置列表的数组,按升序排列。
类型为: [Array](../../sql-reference/data-types/array.md)([UInt64](../../sql-reference/data-types/int-uint.md))。
**示例**
查询语句:
``` sql
SELECT bitPositionsToArray(toInt8(1)) AS bit_positions;
```
结果:
``` text
┌─bit_positions─┐
│ [0] │
└───────────────┘
```
查询语句:
``` sql
SELECT bitPositionsToArray(toInt8(-1)) AS bit_positions;
```
结果:
``` text
┌─bit_positions─────┐
│ [0,1,2,3,4,5,6,7] │
└───────────────────┘
```
[来源文章](https://clickhouse.com/docs/en/query_language/functions/encoding_functions/) <!--hide-->

View File

@ -345,7 +345,7 @@ HedgedConnections::ReplicaLocation HedgedConnections::getReadyReplicaLocation(As
else
throw Exception("Unknown event from epoll", ErrorCodes::LOGICAL_ERROR);
}
};
}
bool HedgedConnections::resumePacketReceiver(const HedgedConnections::ReplicaLocation & location)
{

View File

@ -219,7 +219,7 @@ protected:
/// Get internal immutable ptr. Does not change internal use counter.
immutable_ptr<T> detach() && { return std::move(value); }
operator bool() const { return value != nullptr; } /// NOLINT
explicit operator bool() const { return value != nullptr; }
bool operator! () const { return value == nullptr; }
bool operator== (const chameleon_ptr & rhs) const { return value == rhs.value; }

View File

@ -169,7 +169,7 @@ struct StringHashTableLookupResult
auto & operator*() const { return *this; }
auto * operator->() { return this; }
auto * operator->() const { return this; }
operator bool() const { return mapped_ptr; } /// NOLINT
explicit operator bool() const { return mapped_ptr; }
friend bool operator==(const StringHashTableLookupResult & a, const std::nullptr_t &) { return !a.mapped_ptr; }
friend bool operator==(const std::nullptr_t &, const StringHashTableLookupResult & b) { return !b.mapped_ptr; }
friend bool operator!=(const StringHashTableLookupResult & a, const std::nullptr_t &) { return a.mapped_ptr; }

View File

@ -32,7 +32,7 @@ void assertRange(
ASSERT_EQ(range.left, expected_range.left);
ASSERT_EQ(range.right, expected_range.right);
ASSERT_EQ(file_segment->state(), expected_state);
};
}
void printRanges(const auto & segments)
{

View File

@ -22,7 +22,7 @@ extern const int CANNOT_COMPILE_REGEXP;
extern const int NO_ELEMENTS_IN_CONFIG;
extern const int INVALID_CONFIG_PARAMETER;
}
};
}
TEST(Common, SensitiveDataMasker)

View File

@ -790,7 +790,7 @@ std::vector<CodecTestSequence> generatePyramidOfSequences(const size_t sequences
}
return sequences;
};
}
// helper macro to produce human-friendly sequence name from generator
#define G(generator) generator, #generator

View File

@ -161,7 +161,7 @@ public:
task_info->deactivate();
}
operator bool() const { return task_info != nullptr; } /// NOLINT
explicit operator bool() const { return task_info != nullptr; }
BackgroundSchedulePoolTaskInfo * operator->() { return task_info.get(); }
const BackgroundSchedulePoolTaskInfo * operator->() const { return task_info.get(); }

View File

@ -108,7 +108,7 @@ public:
/// Approximate number of allocated bytes in memory - for profiling and limits.
size_t allocatedBytes() const;
operator bool() const { return !!columns(); } /// NOLINT
explicit operator bool() const { return !!columns(); }
bool operator!() const { return !this->operator bool(); } /// NOLINT
/** Get a list of column names separated by commas. */

View File

@ -587,6 +587,7 @@ static constexpr UInt64 operator""_GiB(unsigned long long value)
/** Experimental functions */ \
M(Bool, allow_experimental_funnel_functions, false, "Enable experimental functions for funnel analysis.", 0) \
M(Bool, allow_experimental_nlp_functions, false, "Enable experimental functions for natural language processing.", 0) \
M(Bool, allow_experimental_hash_functions, false, "Enable experimental hash functions (hashid, etc)", 0) \
M(Bool, allow_experimental_object_type, false, "Allow Object and JSON data types", 0) \
M(String, insert_deduplication_token, "", "If not empty, used for duplicate detection instead of data digest", 0) \
M(Bool, count_distinct_optimization, false, "Rewrite count distinct to subquery of group by", 0) \
@ -661,7 +662,7 @@ static constexpr UInt64 operator""_GiB(unsigned long long value)
M(Char, input_format_hive_text_map_keys_delimiter, '\x03', "Delimiter between a pair of map key/values in Hive Text File", 0) \
M(UInt64, input_format_msgpack_number_of_columns, 0, "The number of columns in inserted MsgPack data. Used for automatic schema inference from data.", 0) \
M(MsgPackUUIDRepresentation, output_format_msgpack_uuid_representation, FormatSettings::MsgPackUUIDRepresentation::EXT, "The way how to output UUID in MsgPack format.", 0) \
M(UInt64, input_format_max_rows_to_read_for_schema_inference, 100, "The maximum rows of data to read for automatic schema inference", 0) \
M(UInt64, input_format_max_rows_to_read_for_schema_inference, 25000, "The maximum rows of data to read for automatic schema inference", 0) \
M(Bool, input_format_csv_use_best_effort_in_schema_inference, true, "Use some tweaks and heuristics to infer schema in CSV format", 0) \
M(Bool, input_format_tsv_use_best_effort_in_schema_inference, true, "Use some tweaks and heuristics to infer schema in TSV format", 0) \
M(Bool, input_format_parquet_skip_columns_with_unsupported_types_in_schema_inference, false, "Allow to skip columns with unsupported types while schema inference for format Parquet", 0) \

View File

@ -22,7 +22,7 @@ static auto typeFromString(const std::string & str)
{
auto & data_type_factory = DataTypeFactory::instance();
return data_type_factory.get(str);
};
}
static auto typesFromString(const std::string & str)
{
@ -33,7 +33,7 @@ static auto typesFromString(const std::string & str)
data_types.push_back(typeFromString(data_type));
return data_types;
};
}
struct TypesTestCase
{

View File

@ -73,11 +73,15 @@ ColumnsDescription readSchemaFromFormat(
{
std::string exception_messages;
SchemaReaderPtr schema_reader;
size_t max_rows_to_read = format_settings ? format_settings->max_rows_to_read_for_schema_inference : context->getSettingsRef().input_format_max_rows_to_read_for_schema_inference;
size_t iterations = 0;
while ((buf = read_buffer_iterator()))
{
++iterations;
if (buf->eof())
{
auto exception_message = fmt::format("Cannot extract table structure from {} format file, file is emptyg", format_name);
auto exception_message = fmt::format("Cannot extract table structure from {} format file, file is empty", format_name);
if (!retry)
throw Exception(ErrorCodes::CANNOT_EXTRACT_TABLE_STRUCTURE, exception_message);
@ -89,12 +93,26 @@ ColumnsDescription readSchemaFromFormat(
try
{
schema_reader = FormatFactory::instance().getSchemaReader(format_name, *buf, context, format_settings);
schema_reader->setMaxRowsToRead(max_rows_to_read);
names_and_types = schema_reader->readSchema();
break;
}
catch (...)
{
auto exception_message = getCurrentExceptionMessage(false);
size_t rows_read = schema_reader->getNumRowsRead();
assert(rows_read <= max_rows_to_read);
max_rows_to_read -= schema_reader->getNumRowsRead();
if (rows_read != 0 && max_rows_to_read == 0)
{
exception_message += "\nTo increase the maximum number of rows to read for structure determination, use setting input_format_max_rows_to_read_for_schema_inference";
if (iterations > 1)
{
exception_messages += "\n" + exception_message;
break;
}
retry = false;
}
if (!retry || !isRetryableSchemaInferenceError(getCurrentExceptionCode()))
throw Exception(ErrorCodes::CANNOT_EXTRACT_TABLE_STRUCTURE, "Cannot extract table structure from {} format file. Error: {}", format_name, exception_message);

View File

@ -23,6 +23,7 @@ target_link_libraries(clickhouse_functions
dbms
ch_contrib::metrohash
ch_contrib::murmurhash
ch_contrib::hashidsxx
PRIVATE
ch_contrib::zlib

View File

@ -0,0 +1,12 @@
#include "FunctionHashID.h"
#include <Functions/FunctionFactory.h>
namespace DB
{
void registerFunctionHashID(FunctionFactory & factory)
{
factory.registerFunction<FunctionHashID>();
}
}

View File

@ -0,0 +1,169 @@
#pragma once
#include <Common/config.h>
#include <hashids.h>
#include <Columns/ColumnString.h>
#include <Columns/ColumnsNumber.h>
#include <DataTypes/DataTypeString.h>
#include <Functions/FunctionFactory.h>
#include <Functions/FunctionHelpers.h>
#include <Functions/IFunction.h>
#include <Interpreters/Context.h>
#include <functional>
#include <initializer_list>
namespace DB
{
namespace ErrorCodes
{
extern const int BAD_ARGUMENTS;
extern const int ILLEGAL_COLUMN;
extern const int ILLEGAL_TYPE_OF_ARGUMENT;
extern const int SUPPORT_IS_DISABLED;
extern const int TOO_MANY_ARGUMENTS_FOR_FUNCTION;
extern const int TOO_FEW_ARGUMENTS_FOR_FUNCTION;
}
// hashid(string, salt)
class FunctionHashID : public IFunction
{
public:
static constexpr auto name = "hashid";
static FunctionPtr create(ContextPtr context)
{
if (!context->getSettingsRef().allow_experimental_hash_functions)
throw Exception(ErrorCodes::SUPPORT_IS_DISABLED,
"Hashing function '{}' is experimental. Set `allow_experimental_hash_functions` setting to enable it", name);
return std::make_shared<FunctionHashID>();
}
String getName() const override { return name; }
size_t getNumberOfArguments() const override { return 0; }
bool isVariadic() const override { return true; }
bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return false; }
DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override
{
if (arguments.size() < 1)
throw Exception(ErrorCodes::TOO_FEW_ARGUMENTS_FOR_FUNCTION, "Function {} expects at least one argument", getName());
const auto & id_col = arguments[0];
if (!isUnsignedInteger(id_col.type))
throw Exception(
ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT,
"First argument of function {} must be unsigned integer, got {}",
getName(),
arguments[0].type->getName());
if (arguments.size() > 1)
{
const auto & hash_col = arguments[1];
if (!isString(hash_col.type) || !isColumnConst(*hash_col.column.get()))
throw Exception(
ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT,
"Second argument of function {} must be String, got {}",
getName(),
arguments[1].type->getName());
}
if (arguments.size() > 2)
{
const auto & min_length_col = arguments[2];
if (!isUInt8(min_length_col.type) || !isColumnConst(*min_length_col.column.get()))
throw Exception(
ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT,
"Third argument of function {} must be UInt8, got {}",
getName(),
arguments[2].type->getName());
}
if (arguments.size() > 3)
{
const auto & alphabet_col = arguments[3];
if (!isString(alphabet_col.type) || !isColumnConst(*alphabet_col.column.get()))
throw Exception(
ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT,
"Fourth argument of function {} must be String, got {}",
getName(),
arguments[3].type->getName());
}
if (arguments.size() > 4)
{
throw Exception(
ErrorCodes::TOO_MANY_ARGUMENTS_FOR_FUNCTION,
"Function {} expect no more than four arguments (integer, salt, min_length, optional_alphabet), got {}",
getName(),
arguments.size());
}
return std::make_shared<DataTypeString>();
}
ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const override
{
const auto & numcolumn = arguments[0].column;
if (checkAndGetColumn<ColumnUInt8>(numcolumn.get()) || checkAndGetColumn<ColumnUInt16>(numcolumn.get())
|| checkAndGetColumn<ColumnUInt32>(numcolumn.get()) || checkAndGetColumn<ColumnUInt64>(numcolumn.get())
|| checkAndGetColumnConst<ColumnUInt8>(numcolumn.get()) || checkAndGetColumnConst<ColumnUInt16>(numcolumn.get())
|| checkAndGetColumnConst<ColumnUInt32>(numcolumn.get()) || checkAndGetColumnConst<ColumnUInt64>(numcolumn.get()))
{
std::string salt;
UInt8 minLength = 0;
std::string alphabet;
if (arguments.size() >= 4)
{
const auto & alphabetcolumn = arguments[3].column;
if (auto alpha_col = checkAndGetColumnConst<ColumnString>(alphabetcolumn.get()))
{
alphabet = alpha_col->getValue<String>();
if (alphabet.find('\0') != std::string::npos)
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Custom alphabet must not contain null character");
}
}
else
alphabet.assign(DEFAULT_ALPHABET);
if (arguments.size() >= 3)
{
const auto & minlengthcolumn = arguments[2].column;
if (auto min_length_col = checkAndGetColumnConst<ColumnUInt8>(minlengthcolumn.get()))
minLength = min_length_col->getValue<UInt8>();
}
if (arguments.size() >= 2)
{
const auto & saltcolumn = arguments[1].column;
if (auto salt_col = checkAndGetColumnConst<ColumnString>(saltcolumn.get()))
salt = salt_col->getValue<String>();
}
hashidsxx::Hashids hash(salt, minLength, alphabet);
auto col_res = ColumnString::create();
for (size_t i = 0; i < input_rows_count; ++i)
{
col_res->insert(hash.encode({numcolumn->getUInt(i)}));
}
return col_res;
}
else
throw Exception(
"Illegal column " + arguments[0].column->getName() + " of first argument of function hashid", ErrorCodes::ILLEGAL_COLUMN);
}
};
}

View File

@ -2,6 +2,7 @@
#include <Common/memcmpSmall.h>
#include <Common/assert_cast.h>
#include <Common/TargetSpecific.h>
#include <Columns/ColumnsNumber.h>
#include <Columns/ColumnConst.h>
@ -84,8 +85,8 @@ struct NumComparisonImpl
using ContainerA = PaddedPODArray<A>;
using ContainerB = PaddedPODArray<B>;
/// If you don't specify NO_INLINE, the compiler will inline this function, but we don't need this as this function contains tight loop inside.
static void NO_INLINE vectorVector(const ContainerA & a, const ContainerB & b, PaddedPODArray<UInt8> & c)
MULTITARGET_FUNCTION_WRAPPER_AVX2_SSE42(vectorVectorImpl,
MULTITARGET_FH(static void), /*vectorVectorImpl*/ MULTITARGET_FB((const ContainerA & a, const ContainerB & b, PaddedPODArray<UInt8> & c) /// NOLINT
{
/** GCC 4.8.2 vectorizes a loop only if it is written in this form.
* In this case, if you loop through the array index (the code will look simpler),
@ -105,9 +106,29 @@ struct NumComparisonImpl
++b_pos;
++c_pos;
}
}))
static void NO_INLINE vectorVector(const ContainerA & a, const ContainerB & b, PaddedPODArray<UInt8> & c)
{
#if USE_MULTITARGET_CODE
if (isArchSupported(TargetArch::AVX2))
{
vectorVectorImplAVX2(a, b, c);
return;
}
else if (isArchSupported(TargetArch::SSE42))
{
vectorVectorImplSSE42(a, b, c);
return;
}
#endif
vectorVectorImpl(a, b, c);
}
static void NO_INLINE vectorConstant(const ContainerA & a, B b, PaddedPODArray<UInt8> & c)
MULTITARGET_FUNCTION_WRAPPER_AVX2_SSE42(vectorConstantImpl,
MULTITARGET_FH(static void), /*vectorConstantImpl*/ MULTITARGET_FB((const ContainerA & a, B b, PaddedPODArray<UInt8> & c) /// NOLINT
{
size_t size = a.size();
const A * __restrict a_pos = a.data();
@ -120,6 +141,24 @@ struct NumComparisonImpl
++a_pos;
++c_pos;
}
}))
static void NO_INLINE vectorConstant(const ContainerA & a, B b, PaddedPODArray<UInt8> & c)
{
#if USE_MULTITARGET_CODE
if (isArchSupported(TargetArch::AVX2))
{
vectorConstantImplAVX2(a, b, c);
return;
}
else if (isArchSupported(TargetArch::SSE42))
{
vectorConstantImplSSE42(a, b, c);
return;
}
#endif
vectorConstantImpl(a, b, c);
}
static void constantVector(A a, const ContainerB & b, PaddedPODArray<UInt8> & c)

View File

@ -9,19 +9,19 @@ using FunctionSubtractNanoseconds = FunctionDateOrDateTimeAddInterval<SubtractNa
void registerFunctionSubtractNanoseconds(FunctionFactory & factory)
{
factory.registerFunction<FunctionSubtractNanoseconds>();
};
}
using FunctionSubtractMicroseconds = FunctionDateOrDateTimeAddInterval<SubtractMicrosecondsImpl>;
void registerFunctionSubtractMicroseconds(FunctionFactory & factory)
{
factory.registerFunction<FunctionSubtractMicroseconds>();
};
}
using FunctionSubtractMilliseconds = FunctionDateOrDateTimeAddInterval<SubtractMillisecondsImpl>;
void registerFunctionSubtractMilliseconds(FunctionFactory & factory)
{
factory.registerFunction<FunctionSubtractMilliseconds>();
};
}
}

View File

@ -24,6 +24,7 @@ void registerFunctionsEmbeddedDictionaries(FunctionFactory &);
void registerFunctionsExternalDictionaries(FunctionFactory &);
void registerFunctionsExternalModels(FunctionFactory &);
void registerFunctionsFormatting(FunctionFactory &);
void registerFunctionHashID(FunctionFactory &);
void registerFunctionsHashing(FunctionFactory &);
void registerFunctionsHigherOrder(FunctionFactory &);
void registerFunctionsLogical(FunctionFactory &);
@ -137,6 +138,7 @@ void registerFunctions()
#endif
registerFunctionTid(factory);
registerFunctionLogTrace(factory);
registerFunctionHashID(factory);
}
}

View File

@ -160,8 +160,11 @@ static void setLazyExecutionInfo(
const ActionsDAGReverseInfo::NodeInfo & node_info = reverse_info.nodes_info[reverse_info.reverse_index.at(node)];
/// If node is used in result or it doesn't have parents, we can't enable lazy execution.
if (node_info.used_in_result || node_info.parents.empty())
if (node_info.used_in_result || node_info.parents.empty() || (node->type != ActionsDAG::ActionType::FUNCTION && node->type != ActionsDAG::ActionType::ALIAS))
{
lazy_execution_info.can_be_lazy_executed = false;
return;
}
/// To fill lazy execution info for current node we need to create it for all it's parents.
for (const auto & parent : node_info.parents)
@ -172,7 +175,7 @@ static void setLazyExecutionInfo(
{
/// Use set, because one node can be more than one argument.
/// Example: expr1 AND expr2 AND expr1.
std::set<size_t> indexes;
std::unordered_set<size_t> indexes;
for (size_t i = 0; i != parent->children.size(); ++i)
{
if (node == parent->children[i])
@ -294,6 +297,10 @@ static std::unordered_set<const ActionsDAG::Node *> processShortCircuitFunctions
short_circuit_nodes[&node] = short_circuit_settings;
}
/// If there are no short-circuit functions, no need to do anything.
if (short_circuit_nodes.empty())
return {};
auto reverse_info = getActionsDAGReverseInfo(nodes, actions_dag.getIndex());
/// For each node we fill LazyExecutionInfo.

View File

@ -1093,7 +1093,10 @@ void InterpreterSelectQuery::executeImpl(QueryPlan & query_plan, std::optional<P
bool use_grouping_set_key = expressions.use_grouping_set_key;
if (query.group_by_with_grouping_sets && query.group_by_with_totals)
throw Exception("WITH TOTALS and GROUPING SETS are not supported together", ErrorCodes::NOT_IMPLEMENTED);
throw Exception(ErrorCodes::NOT_IMPLEMENTED, "WITH TOTALS and GROUPING SETS are not supported together");
if (query.group_by_with_grouping_sets && (query.group_by_with_rollup || query.group_by_with_cube))
throw Exception(ErrorCodes::NOT_IMPLEMENTED, "GROUPING SETS are not supported together with ROLLUP and CUBE");
if (query_info.projection && query_info.projection->desc->type == ProjectionDescription::Type::Aggregate)
{

View File

@ -61,7 +61,7 @@ void fillColumnArray(const Strings & data, IColumn & column)
}
auto & offsets = array.getOffsets();
offsets.push_back(offsets.back() + size);
};
}
}

View File

@ -46,7 +46,7 @@ namespace
}
bool parseAuthenticationData(IParserBase::Pos & pos, Expected & expected, bool id_mode, AuthenticationData & auth_data)
bool parseAuthenticationData(IParserBase::Pos & pos, Expected & expected, AuthenticationData & auth_data)
{
return IParserBase::wrapParseImpl(pos, [&]
{
@ -120,7 +120,7 @@ namespace
return false;
value = ast->as<const ASTLiteral &>().value.safeGet<String>();
if (id_mode && expect_hash)
if (expect_hash && type == AuthenticationType::SHA256_PASSWORD)
{
if (ParserKeyword{"SALT"}.ignore(pos, expected) && ParserStringLiteral{}.parse(pos, ast, expected))
{
@ -447,7 +447,7 @@ bool ParserCreateUserQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expec
if (!auth_data)
{
AuthenticationData new_auth_data;
if (parseAuthenticationData(pos, expected, attach_mode, new_auth_data))
if (parseAuthenticationData(pos, expected, new_auth_data))
{
auth_data = std::move(new_auth_data);
continue;

View File

@ -261,10 +261,18 @@ INSTANTIATE_TEST_SUITE_P(ParserCreateUserQuery, ParserTest,
"CREATE USER user1 IDENTIFIED WITH sha256_password BY 'qwe123'",
"CREATE USER user1 IDENTIFIED WITH sha256_hash BY '[A-Za-z0-9]{64}' SALT '[A-Za-z0-9]{64}'"
},
{
"CREATE USER user1 IDENTIFIED WITH sha256_hash BY '7A37B85C8918EAC19A9089C0FA5A2AB4DCE3F90528DCDEEC108B23DDF3607B99' SALT 'salt'",
"CREATE USER user1 IDENTIFIED WITH sha256_hash BY '7A37B85C8918EAC19A9089C0FA5A2AB4DCE3F90528DCDEEC108B23DDF3607B99' SALT 'salt'"
},
{
"ALTER USER user1 IDENTIFIED WITH sha256_password BY 'qwe123'",
"ALTER USER user1 IDENTIFIED WITH sha256_hash BY '[A-Za-z0-9]{64}' SALT '[A-Za-z0-9]{64}'"
},
{
"ALTER USER user1 IDENTIFIED WITH sha256_hash BY '7A37B85C8918EAC19A9089C0FA5A2AB4DCE3F90528DCDEEC108B23DDF3607B99' SALT 'salt'",
"ALTER USER user1 IDENTIFIED WITH sha256_hash BY '7A37B85C8918EAC19A9089C0FA5A2AB4DCE3F90528DCDEEC108B23DDF3607B99' SALT 'salt'"
},
{
"CREATE USER user1 IDENTIFIED WITH sha256_password BY 'qwe123' SALT 'EFFD7F6B03B3EA68B8F86C1E91614DD50E42EB31EF7160524916444D58B5E264'",
"throws Syntax error"

View File

@ -90,7 +90,7 @@ public:
bool hasRows() const { return num_rows > 0; }
bool hasColumns() const { return !columns.empty(); }
bool empty() const { return !hasRows() && !hasColumns(); }
operator bool() const { return !empty(); } /// NOLINT
explicit operator bool() const { return !empty(); }
void addColumn(ColumnPtr column);
void addColumn(size_t position, ColumnPtr column);

View File

@ -12,6 +12,7 @@ namespace ErrorCodes
extern const int TYPE_MISMATCH;
extern const int INCORRECT_DATA;
extern const int EMPTY_DATA_PASSED;
extern const int BAD_ARGUMENTS;
}
static void chooseResultType(
@ -48,16 +49,14 @@ static void chooseResultType(
}
}
static void checkTypeAndAppend(NamesAndTypesList & result, DataTypePtr & type, const String & name, const DataTypePtr & default_type, size_t max_rows_to_read)
static void checkTypeAndAppend(NamesAndTypesList & result, DataTypePtr & type, const String & name, const DataTypePtr & default_type, size_t rows_read)
{
if (!type)
{
if (!default_type)
throw Exception(
ErrorCodes::ONLY_NULLS_WHILE_READING_SCHEMA,
"Cannot determine table structure by first {} rows of data, because some columns contain only Nulls. To increase the maximum "
"number of rows to read for structure determination, use setting input_format_max_rows_to_read_for_schema_inference",
max_rows_to_read);
"Cannot determine table structure by first {} rows of data, because some columns contain only Nulls", rows_read);
type = default_type;
}
@ -65,7 +64,7 @@ static void checkTypeAndAppend(NamesAndTypesList & result, DataTypePtr & type, c
}
IRowSchemaReader::IRowSchemaReader(ReadBuffer & in_, const FormatSettings & format_settings)
: ISchemaReader(in_), max_rows_to_read(format_settings.max_rows_to_read_for_schema_inference)
: ISchemaReader(in_)
{
if (!format_settings.column_names_for_schema_inference.empty())
{
@ -94,8 +93,14 @@ IRowSchemaReader::IRowSchemaReader(ReadBuffer & in_, const FormatSettings & form
NamesAndTypesList IRowSchemaReader::readSchema()
{
if (max_rows_to_read == 0)
throw Exception(
ErrorCodes::BAD_ARGUMENTS,
"Cannot read rows to determine the schema, the maximum number of rows to read is set to 0. "
"Most likely setting input_format_max_rows_to_read_for_schema_inference is set to 0");
DataTypes data_types = readRowAndGetDataTypes();
for (size_t row = 1; row < max_rows_to_read; ++row)
for (rows_read = 1; rows_read < max_rows_to_read; ++rows_read)
{
DataTypes new_data_types = readRowAndGetDataTypes();
if (new_data_types.empty())
@ -111,7 +116,7 @@ NamesAndTypesList IRowSchemaReader::readSchema()
if (!new_data_types[i])
continue;
chooseResultType(data_types[i], new_data_types[i], common_type_checker, getDefaultType(i), std::to_string(i + 1), row);
chooseResultType(data_types[i], new_data_types[i], common_type_checker, getDefaultType(i), std::to_string(i + 1), rows_read);
}
}
@ -136,7 +141,7 @@ NamesAndTypesList IRowSchemaReader::readSchema()
for (size_t i = 0; i != data_types.size(); ++i)
{
/// Check that we could determine the type of this column.
checkTypeAndAppend(result, data_types[i], column_names[i], getDefaultType(i), max_rows_to_read);
checkTypeAndAppend(result, data_types[i], column_names[i], getDefaultType(i), rows_read);
}
return result;
@ -151,13 +156,19 @@ DataTypePtr IRowSchemaReader::getDefaultType(size_t column) const
return nullptr;
}
IRowWithNamesSchemaReader::IRowWithNamesSchemaReader(ReadBuffer & in_, size_t max_rows_to_read_, DataTypePtr default_type_)
: ISchemaReader(in_), max_rows_to_read(max_rows_to_read_), default_type(default_type_)
IRowWithNamesSchemaReader::IRowWithNamesSchemaReader(ReadBuffer & in_, DataTypePtr default_type_)
: ISchemaReader(in_), default_type(default_type_)
{
}
NamesAndTypesList IRowWithNamesSchemaReader::readSchema()
{
if (max_rows_to_read == 0)
throw Exception(
ErrorCodes::BAD_ARGUMENTS,
"Cannot read rows to determine the schema, the maximum number of rows to read is set to 0. "
"Most likely setting input_format_max_rows_to_read_for_schema_inference is set to 0");
bool eof = false;
auto names_and_types = readRowAndGetNamesAndDataTypes(eof);
std::unordered_map<String, DataTypePtr> names_to_types;
@ -170,7 +181,7 @@ NamesAndTypesList IRowWithNamesSchemaReader::readSchema()
names_order.push_back(name);
}
for (size_t row = 1; row < max_rows_to_read; ++row)
for (rows_read = 1; rows_read < max_rows_to_read; ++rows_read)
{
auto new_names_and_types = readRowAndGetNamesAndDataTypes(eof);
if (eof)
@ -189,7 +200,7 @@ NamesAndTypesList IRowWithNamesSchemaReader::readSchema()
}
auto & type = it->second;
chooseResultType(type, new_type, common_type_checker, default_type, name, row);
chooseResultType(type, new_type, common_type_checker, default_type, name, rows_read);
}
}
@ -202,7 +213,7 @@ NamesAndTypesList IRowWithNamesSchemaReader::readSchema()
{
auto & type = names_to_types[name];
/// Check that we could determine the type of this column.
checkTypeAndAppend(result, type, name, default_type, max_rows_to_read);
checkTypeAndAppend(result, type, name, default_type, rows_read);
}
return result;

View File

@ -26,6 +26,9 @@ public:
virtual bool needContext() const { return false; }
virtual void setContext(ContextPtr &) {}
virtual void setMaxRowsToRead(size_t) {}
virtual size_t getNumRowsRead() const { return 0; }
virtual ~ISchemaReader() = default;
protected:
@ -61,10 +64,14 @@ protected:
void setColumnNames(const std::vector<String> & names) { column_names = names; }
void setMaxRowsToRead(size_t max_rows) override { max_rows_to_read = max_rows; }
size_t getNumRowsRead() const override { return rows_read; }
private:
DataTypePtr getDefaultType(size_t column) const;
size_t max_rows_to_read;
size_t rows_read = 0;
DataTypePtr default_type;
DataTypes default_types;
CommonDataTypeChecker common_type_checker;
@ -79,7 +86,7 @@ private:
class IRowWithNamesSchemaReader : public ISchemaReader
{
public:
IRowWithNamesSchemaReader(ReadBuffer & in_, size_t max_rows_to_read_, DataTypePtr default_type_ = nullptr);
IRowWithNamesSchemaReader(ReadBuffer & in_, DataTypePtr default_type_ = nullptr);
NamesAndTypesList readSchema() override;
bool hasStrictOrderOfColumns() const override { return false; }
@ -92,8 +99,12 @@ protected:
/// Set eof = true if can't read more data.
virtual NamesAndTypesList readRowAndGetNamesAndDataTypes(bool & eof) = 0;
void setMaxRowsToRead(size_t max_rows) override { max_rows_to_read = max_rows; }
size_t getNumRowsRead() const override { return rows_read; }
private:
size_t max_rows_to_read;
size_t rows_read = 0;
DataTypePtr default_type;
CommonDataTypeChecker common_type_checker;
};

View File

@ -307,7 +307,7 @@ void JSONEachRowRowInputFormat::readSuffix()
}
JSONEachRowSchemaReader::JSONEachRowSchemaReader(ReadBuffer & in_, bool json_strings_, const FormatSettings & format_settings)
: IRowWithNamesSchemaReader(in_, format_settings.max_rows_to_read_for_schema_inference)
: IRowWithNamesSchemaReader(in_)
, json_strings(json_strings_)
{
bool allow_bools_as_numbers = format_settings.json.read_bools_as_numbers;

View File

@ -88,7 +88,7 @@ void ParallelParsingInputFormat::parserThreadFunction(ThreadGroupStatusPtr threa
// We don't know how many blocks will be. So we have to read them all
// until an empty block occurred.
Chunk chunk;
while (!parsing_finished && (chunk = parser.getChunk()) != Chunk())
while (!parsing_finished && (chunk = parser.getChunk()))
{
/// Variable chunk is moved, but it is not really used in the next iteration.
/// NOLINTNEXTLINE(bugprone-use-after-move, hicpp-invalid-access-moved)

View File

@ -214,10 +214,7 @@ void TSKVRowInputFormat::resetParser()
}
TSKVSchemaReader::TSKVSchemaReader(ReadBuffer & in_, const FormatSettings & format_settings_)
: IRowWithNamesSchemaReader(
in_,
format_settings_.max_rows_to_read_for_schema_inference,
getDefaultDataTypeForEscapingRule(FormatSettings::EscapingRule::Escaped))
: IRowWithNamesSchemaReader(in_, getDefaultDataTypeForEscapingRule(FormatSettings::EscapingRule::Escaped))
, format_settings(format_settings_)
{
}

View File

@ -1480,7 +1480,7 @@ bool TCPHandler::receiveUnexpectedData(bool throw_exception)
maybe_compressed_in = in;
auto skip_block_in = std::make_shared<NativeReader>(*maybe_compressed_in, client_tcp_protocol_version);
bool read_ok = skip_block_in->read();
bool read_ok = !!skip_block_in->read();
if (!read_ok)
state.read_all_data = true;

View File

@ -14,7 +14,7 @@ namespace ErrorCodes
extern const int UNKNOWN_SETTING;
}
IMPLEMENT_SETTINGS_TRAITS(ExecutableSettingsTraits, LIST_OF_EXECUTABLE_SETTINGS);
IMPLEMENT_SETTINGS_TRAITS(ExecutableSettingsTraits, LIST_OF_EXECUTABLE_SETTINGS)
void ExecutableSettings::loadFromQuery(ASTStorage & storage_def)
{

View File

@ -19,7 +19,7 @@ namespace ErrorCodes
{
extern const int INCORRECT_QUERY;
extern const int LOGICAL_ERROR;
};
}
IndexDescription::IndexDescription(const IndexDescription & other)
: definition_ast(other.definition_ast ? other.definition_ast->clone() : nullptr)

View File

@ -84,4 +84,4 @@ ColumnsDescription MeiliSearchColumnDescriptionFetcher::fetchColumnsDescription(
return ColumnsDescription(list);
}
};
}

View File

@ -21,4 +21,4 @@ private:
MeiliSearchConnection connection;
};
};
}

View File

@ -660,7 +660,7 @@ MergeTreeIndexConditionPtr MergeTreeIndexFullText::createIndexCondition(
const SelectQueryInfo & query, ContextPtr context) const
{
return std::make_shared<MergeTreeConditionFullText>(query, context, index.sample_block, params, token_extractor.get());
};
}
bool MergeTreeIndexFullText::mayBenefitFromIndexForIn(const ASTPtr & node) const
{

View File

@ -484,7 +484,7 @@ MergeTreeIndexConditionPtr MergeTreeIndexSet::createIndexCondition(
const SelectQueryInfo & query, ContextPtr context) const
{
return std::make_shared<MergeTreeIndexConditionSet>(index.name, index.sample_block, max_rows, query, context);
};
}
bool MergeTreeIndexSet::mayBenefitFromIndexForIn(const ASTPtr &) const
{

View File

@ -29,7 +29,7 @@ struct MergeTreeIndexFormat
MergeTreeIndexVersion version;
const char* extension;
operator bool() const { return version != 0; } /// NOLINT
explicit operator bool() const { return version != 0; }
};
/// Stores some info about a single block of data.

View File

@ -123,14 +123,14 @@ static const ASTFunction * getAsTuple(const ASTPtr & node)
if (const auto * func = node->as<ASTFunction>(); func && func->name == "tuple")
return func;
return {};
};
}
static bool getAsTupleLiteral(const ASTPtr & node, Tuple & tuple)
{
if (const auto * value_tuple = node->as<ASTLiteral>())
return value_tuple && value_tuple->value.tryGet<Tuple>(tuple);
return false;
};
}
bool MergeTreeWhereOptimizer::tryAnalyzeTuple(Conditions & res, const ASTFunction * func, bool is_final) const
{

View File

@ -29,7 +29,7 @@ namespace ErrorCodes
extern const int ILLEGAL_PROJECTION;
extern const int NOT_IMPLEMENTED;
extern const int LOGICAL_ERROR;
};
}
bool ProjectionDescription::isPrimaryKeyColumnPossiblyWrappedInFunctions(const ASTPtr & node) const
{

View File

@ -45,6 +45,10 @@ static std::unordered_set<std::string> parse_dir(const std::string & dir)
static void populateTable(const X509 * cert, MutableColumns & res_columns, const std::string & path, bool def)
{
BIO * b = BIO_new(BIO_s_mem());
SCOPE_EXIT(
{
BIO_free(b);
});
size_t col = 0;
res_columns[col++]->insert(X509_get_version(cert) + 1);
@ -53,11 +57,14 @@ static void populateTable(const X509 * cert, MutableColumns & res_columns, const
char buf[1024] = {0};
const ASN1_INTEGER * sn = cert->cert_info->serialNumber;
BIGNUM * bnsn = ASN1_INTEGER_to_BN(sn, nullptr);
SCOPE_EXIT(
{
BN_free(bnsn);
});
if (BN_print(b, bnsn) > 0 && BIO_read(b, buf, sizeof(buf)) > 0)
res_columns[col]->insert(buf);
else
res_columns[col]->insertDefault();
BN_free(bnsn);
}
++col;
@ -79,8 +86,11 @@ static void populateTable(const X509 * cert, MutableColumns & res_columns, const
char * issuer = X509_NAME_oneline(cert->cert_info->issuer, nullptr, 0);
if (issuer)
{
SCOPE_EXIT(
{
OPENSSL_free(issuer);
});
res_columns[col]->insert(issuer);
OPENSSL_free(issuer);
}
else
res_columns[col]->insertDefault();
@ -107,8 +117,11 @@ static void populateTable(const X509 * cert, MutableColumns & res_columns, const
char * subject = X509_NAME_oneline(cert->cert_info->subject, nullptr, 0);
if (subject)
{
SCOPE_EXIT(
{
OPENSSL_free(subject);
});
res_columns[col]->insert(subject);
OPENSSL_free(subject);
}
else
res_columns[col]->insertDefault();
@ -133,8 +146,6 @@ static void populateTable(const X509 * cert, MutableColumns & res_columns, const
res_columns[col++]->insert(path);
res_columns[col++]->insert(def);
BIO_free(b);
}
static void enumCertificates(const std::string & dir, bool def, MutableColumns & res_columns)

View File

@ -22,7 +22,7 @@ struct StoragesInfo
bool need_inactive_parts = false;
MergeTreeData * data = nullptr;
operator bool() const { return storage != nullptr; } /// NOLINT
explicit operator bool() const { return storage != nullptr; }
MergeTreeData::DataPartsVector
getParts(MergeTreeData::DataPartStateVector & state, bool has_state_column, bool require_projection_parts = false) const;
};

View File

@ -47,6 +47,7 @@
#include <Processors/QueryPlan/Optimizations/QueryPlanOptimizationSettings.h>
#include <Processors/Executors/PipelineExecutor.h>
#include <Processors/Sinks/EmptySink.h>
#include <Storages/AlterCommands.h>
#include <Storages/StorageFactory.h>
#include <Common/typeid_cast.h>
#include <Common/ProfileEvents.h>
@ -74,6 +75,7 @@ namespace ErrorCodes
extern const int QUERY_IS_NOT_SUPPORTED_IN_WINDOW_VIEW;
extern const int SUPPORT_IS_DISABLED;
extern const int TABLE_WAS_NOT_DROPPED;
extern const int NOT_IMPLEMENTED;
}
namespace
@ -482,6 +484,65 @@ bool StorageWindowView::optimize(
return getInnerTable()->optimize(query, metadata_snapshot, partition, final, deduplicate, deduplicate_by_columns, local_context);
}
void StorageWindowView::alter(
const AlterCommands & params,
ContextPtr local_context,
AlterLockHolder &)
{
auto table_id = getStorageID();
StorageInMemoryMetadata new_metadata = getInMemoryMetadata();
StorageInMemoryMetadata old_metadata = getInMemoryMetadata();
params.apply(new_metadata, local_context);
const auto & new_select = new_metadata.select;
const auto & new_select_query = new_metadata.select.inner_query;
modifying_query = true;
SCOPE_EXIT({
modifying_query = false;
});
shutdown();
auto inner_query = initInnerQuery(new_select_query->as<ASTSelectQuery &>(), local_context);
dropInnerTableIfAny(true, local_context);
/// create inner table
std::exchange(has_inner_table, true);
auto create_context = Context::createCopy(local_context);
auto inner_create_query = getInnerTableCreateQuery(inner_query, inner_table_id);
InterpreterCreateQuery create_interpreter(inner_create_query, create_context);
create_interpreter.setInternal(true);
create_interpreter.execute();
DatabaseCatalog::instance().addDependency(select_table_id, table_id);
shutdown_called = false;
clean_cache_task = getContext()->getSchedulePool().createTask(getStorageID().getFullTableName(), [this] { threadFuncCleanup(); });
fire_task = getContext()->getSchedulePool().createTask(
getStorageID().getFullTableName(), [this] { is_proctime ? threadFuncFireProc() : threadFuncFireEvent(); });
clean_cache_task->deactivate();
fire_task->deactivate();
new_metadata.setSelectQuery(new_select);
DatabaseCatalog::instance().getDatabase(table_id.database_name)->alterTable(local_context, table_id, new_metadata);
setInMemoryMetadata(new_metadata);
startup();
}
void StorageWindowView::checkAlterIsPossible(const AlterCommands & commands, ContextPtr /*local_context*/) const
{
for (const auto & command : commands)
{
if (!command.isCommentAlter() && command.type != AlterCommand::MODIFY_QUERY)
throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Alter of type '{}' is not supported by storage {}", command.type, getName());
}
}
std::pair<BlocksPtr, Block> StorageWindowView::getNewBlocks(UInt32 watermark)
{
UInt32 w_start = addTime(watermark, window_kind, -window_num_units, *time_zone);
@ -649,6 +710,7 @@ inline void StorageWindowView::fire(UInt32 watermark)
}
fire_condition.notify_all();
}
if (!target_table_id.empty())
{
StoragePtr target_table = getTargetTable();
@ -724,13 +786,12 @@ ASTPtr StorageWindowView::getSourceTableSelectQuery()
return select_with_union_query;
}
std::shared_ptr<ASTCreateQuery> StorageWindowView::getInnerTableCreateQuery(
const ASTPtr & inner_query, ASTStorage * storage, const String & database_name, const String & table_name)
ASTPtr StorageWindowView::getInnerTableCreateQuery(const ASTPtr & inner_query, const StorageID & inner_table_id)
{
/// We will create a query to create an internal table.
auto inner_create_query = std::make_shared<ASTCreateQuery>();
inner_create_query->setDatabase(database_name);
inner_create_query->setTable(table_name);
inner_create_query->setDatabase(inner_table_id.getDatabaseName());
inner_create_query->setTable(inner_table_id.getTableName());
Aliases aliases;
QueryAliasesVisitor(aliases).visit(inner_query);
@ -809,29 +870,31 @@ std::shared_ptr<ASTCreateQuery> StorageWindowView::getInnerTableCreateQuery(
};
auto new_storage = std::make_shared<ASTStorage>();
/// storage != nullptr in case create window view with INNER ENGINE syntax
if (storage)
/// inner_storage_engine != nullptr in case create window view with ENGINE syntax
if (inner_table_engine)
{
if (storage->ttl_table)
auto storage = inner_table_engine->as<ASTStorage &>();
if (storage.ttl_table)
throw Exception(
ErrorCodes::QUERY_IS_NOT_SUPPORTED_IN_WINDOW_VIEW,
"TTL is not supported for inner table in Window View");
new_storage->set(new_storage->engine, storage->engine->clone());
new_storage->set(new_storage->engine, storage.engine->clone());
if (endsWith(storage->engine->name, "MergeTree"))
if (endsWith(storage.engine->name, "MergeTree"))
{
if (storage->partition_by)
new_storage->set(new_storage->partition_by, visit(storage->partition_by));
if (storage->primary_key)
new_storage->set(new_storage->primary_key, visit(storage->primary_key));
if (storage->order_by)
new_storage->set(new_storage->order_by, visit(storage->order_by));
if (storage->sample_by)
new_storage->set(new_storage->sample_by, visit(storage->sample_by));
if (storage.partition_by)
new_storage->set(new_storage->partition_by, visit(storage.partition_by));
if (storage.primary_key)
new_storage->set(new_storage->primary_key, visit(storage.primary_key));
if (storage.order_by)
new_storage->set(new_storage->order_by, visit(storage.order_by));
if (storage.sample_by)
new_storage->set(new_storage->sample_by, visit(storage.sample_by));
if (storage->settings)
new_storage->set(new_storage->settings, storage->settings->clone());
if (storage.settings)
new_storage->set(new_storage->settings, storage.settings->clone());
}
}
else
@ -1166,6 +1229,7 @@ StorageWindowView::StorageWindowView(
: IStorage(table_id_)
, WithContext(context_->getGlobalContext())
, log(&Poco::Logger::get(fmt::format("StorageWindowView({}.{})", table_id_.database_name, table_id_.table_name)))
, clean_interval_ms(context_->getSettingsRef().window_view_clean_interval.totalMilliseconds())
{
if (!query.select)
throw Exception(ErrorCodes::INCORRECT_QUERY, "SELECT query is not specified for {}", getName());
@ -1189,68 +1253,29 @@ StorageWindowView::StorageWindowView(
ErrorCodes::QUERY_IS_NOT_SUPPORTED_IN_WINDOW_VIEW,
"UNION is not supported for {}", getName());
select_query = query.select->list_of_selects->children.at(0)->clone();
String select_database_name = getContext()->getCurrentDatabase();
String select_table_name;
auto select_query_tmp = select_query->clone();
extractDependentTable(getContext(), select_query_tmp, select_database_name, select_table_name);
/// If the table is not specified - use the table `system.one`
if (select_table_name.empty())
{
select_database_name = "system";
select_table_name = "one";
}
select_table_id = StorageID(select_database_name, select_table_name);
/// Extract all info from query; substitute Function_tumble and Function_hop with Function_windowID.
auto inner_query = innerQueryParser(select_query->as<ASTSelectQuery &>());
// Parse mergeable query
mergeable_query = inner_query->clone();
ReplaceFunctionNowData func_now_data;
ReplaceFunctionNowVisitor(func_now_data).visit(mergeable_query);
is_time_column_func_now = func_now_data.is_time_column_func_now;
if (is_time_column_func_now)
window_id_name = func_now_data.window_id_name;
// Parse final query (same as mergeable query but has tumble/hop instead of windowID)
final_query = mergeable_query->clone();
ReplaceWindowIdMatcher::Data final_query_data;
if (is_tumble)
final_query_data.window_name = "tumble";
else
final_query_data.window_name = "hop";
ReplaceWindowIdMatcher::Visitor(final_query_data).visit(final_query);
is_watermark_strictly_ascending = query.is_watermark_strictly_ascending;
is_watermark_ascending = query.is_watermark_ascending;
is_watermark_bounded = query.is_watermark_bounded;
/// Extract information about watermark, lateness.
eventTimeParser(query);
if (attach_)
{
inner_table_id = StorageID(table_id_.database_name, generateInnerTableName(table_id_));
if (inner_target_table)
target_table_id = StorageID(table_id_.database_name, generateTargetTableName(table_id_));
else
target_table_id = query.to_table_id;
}
else
{
/// create inner table
auto inner_create_query
= getInnerTableCreateQuery(inner_query, query.inner_storage, table_id_.database_name, generateInnerTableName(table_id_));
target_table_id = query.to_table_id;
auto inner_query = initInnerQuery(query.select->list_of_selects->children.at(0)->as<ASTSelectQuery &>(), context_);
if (query.inner_storage)
inner_table_engine = query.inner_storage->clone();
inner_table_id = StorageID(getStorageID().database_name, generateInnerTableName(getStorageID()));
inner_fetch_query = generateInnerFetchQuery(inner_table_id);
if (is_proctime)
next_fire_signal = getWindowUpperBound(std::time(nullptr));
std::exchange(has_inner_table, true);
if (!attach_)
{
auto inner_create_query = getInnerTableCreateQuery(inner_query, inner_table_id);
auto create_context = Context::createCopy(context_);
InterpreterCreateQuery create_interpreter(inner_create_query, create_context);
create_interpreter.setInternal(true);
create_interpreter.execute();
inner_table_id = StorageID(inner_create_query->getDatabase(), inner_create_query->getTable());
if (inner_target_table)
{
/// create inner target table
@ -1277,18 +1302,53 @@ StorageWindowView::StorageWindowView(
inner_fetch_query = generateInnerFetchQuery(inner_table_id);
clean_interval_ms = getContext()->getSettingsRef().window_view_clean_interval.totalMilliseconds();
next_fire_signal = getWindowUpperBound(std::time(nullptr));
clean_cache_task = getContext()->getSchedulePool().createTask(getStorageID().getFullTableName(), [this] { threadFuncCleanup(); });
if (is_proctime)
fire_task = getContext()->getSchedulePool().createTask(getStorageID().getFullTableName(), [this] { threadFuncFireProc(); });
else
fire_task = getContext()->getSchedulePool().createTask(getStorageID().getFullTableName(), [this] { threadFuncFireEvent(); });
fire_task = getContext()->getSchedulePool().createTask(
getStorageID().getFullTableName(), [this] { is_proctime ? threadFuncFireProc() : threadFuncFireEvent(); });
clean_cache_task->deactivate();
fire_task->deactivate();
}
ASTPtr StorageWindowView::initInnerQuery(ASTSelectQuery query, ContextPtr context_)
{
select_query = query.clone();
input_header.clear();
output_header.clear();
String select_database_name = getContext()->getCurrentDatabase();
String select_table_name;
auto select_query_tmp = query.clone();
extractDependentTable(context_, select_query_tmp, select_database_name, select_table_name);
/// If the table is not specified - use the table `system.one`
if (select_table_name.empty())
{
select_database_name = "system";
select_table_name = "one";
}
select_table_id = StorageID(select_database_name, select_table_name);
/// Extract all info from query; substitute Function_tumble and Function_hop with Function_windowID.
auto inner_query = innerQueryParser(query);
/// Parse mergeable query
mergeable_query = inner_query->clone();
ReplaceFunctionNowData func_now_data;
ReplaceFunctionNowVisitor(func_now_data).visit(mergeable_query);
is_time_column_func_now = func_now_data.is_time_column_func_now;
if (!is_proctime && is_time_column_func_now)
throw Exception("now() is not supported for Event time processing.", ErrorCodes::INCORRECT_QUERY);
if (is_time_column_func_now)
window_id_name = func_now_data.window_id_name;
/// Parse final query (same as mergeable query but has tumble/hop instead of windowID)
final_query = mergeable_query->clone();
ReplaceWindowIdMatcher::Data final_query_data;
final_query_data.window_name = is_tumble ? "tumble" : "hop";
ReplaceWindowIdMatcher::Visitor(final_query_data).visit(final_query);
return inner_query;
}
ASTPtr StorageWindowView::innerQueryParser(const ASTSelectQuery & query)
{
@ -1351,13 +1411,16 @@ ASTPtr StorageWindowView::innerQueryParser(const ASTSelectQuery & query)
void StorageWindowView::eventTimeParser(const ASTCreateQuery & query)
{
watermark_num_units = 0;
lateness_num_units = 0;
is_watermark_strictly_ascending = query.is_watermark_strictly_ascending;
is_watermark_ascending = query.is_watermark_ascending;
is_watermark_bounded = query.is_watermark_bounded;
if (query.is_watermark_strictly_ascending || query.is_watermark_ascending || query.is_watermark_bounded)
{
is_proctime = false;
if (is_time_column_func_now)
throw Exception("now() is not supported for Event time processing.", ErrorCodes::INCORRECT_QUERY);
if (query.is_watermark_ascending)
{
is_watermark_bounded = true;
@ -1371,6 +1434,8 @@ void StorageWindowView::eventTimeParser(const ASTCreateQuery & query)
"Illegal type WATERMARK function should be Interval");
}
}
else
is_proctime = true;
if (query.allowed_lateness)
{
@ -1379,11 +1444,16 @@ void StorageWindowView::eventTimeParser(const ASTCreateQuery & query)
query.lateness_function, lateness_kind, lateness_num_units,
"Illegal type ALLOWED_LATENESS function should be Interval");
}
else
allowed_lateness = false;
}
void StorageWindowView::writeIntoWindowView(
StorageWindowView & window_view, const Block & block, ContextPtr local_context)
{
while (window_view.modifying_query)
std::this_thread::sleep_for(std::chrono::milliseconds(100));
if (!window_view.is_proctime && window_view.max_watermark == 0 && block.rows() > 0)
{
std::lock_guard lock(window_view.fire_signal_mutex);

View File

@ -134,6 +134,10 @@ public:
const Names & deduplicate_by_columns,
ContextPtr context) override;
void alter(const AlterCommands & params, ContextPtr context, AlterLockHolder & table_lock_holder) override;
void checkAlterIsPossible(const AlterCommands & commands, ContextPtr context) const override;
void startup() override;
void shutdown() override;
@ -187,14 +191,14 @@ private:
ASTPtr mergeable_query;
/// Used to fetch the mergeable state and generate the final result. e.g. SELECT * FROM * GROUP BY tumble(____timestamp, *)
ASTPtr final_query;
/// Used to fetch the data from inner storage.
ASTPtr inner_fetch_query;
bool is_proctime{true};
bool is_proctime;
bool is_time_column_func_now;
bool is_tumble; // false if is hop
std::atomic<bool> shutdown_called{false};
std::atomic<bool> modifying_query{false};
bool has_inner_table{true};
bool inner_target_table{false};
mutable Block input_header;
@ -204,10 +208,10 @@ private:
UInt32 max_timestamp = 0;
UInt32 max_watermark = 0; // next watermark to fire
UInt32 max_fired_watermark = 0;
bool is_watermark_strictly_ascending{false};
bool is_watermark_ascending{false};
bool is_watermark_bounded{false};
bool allowed_lateness{false};
bool is_watermark_strictly_ascending;
bool is_watermark_ascending;
bool is_watermark_bounded;
bool allowed_lateness;
UInt32 next_fire_signal;
std::deque<UInt32> fire_signal;
std::list<std::weak_ptr<WindowViewSource>> watch_streams;
@ -227,8 +231,8 @@ private:
Int64 window_num_units;
Int64 hop_num_units;
Int64 slice_num_units;
Int64 watermark_num_units = 0;
Int64 lateness_num_units = 0;
Int64 watermark_num_units;
Int64 lateness_num_units;
Int64 slide_num_units;
String window_id_name;
String window_id_alias;
@ -240,6 +244,8 @@ private:
StorageID target_table_id = StorageID::createEmpty();
StorageID inner_table_id = StorageID::createEmpty();
ASTPtr inner_table_engine;
BackgroundSchedulePool::TaskHolder clean_cache_task;
BackgroundSchedulePool::TaskHolder fire_task;
@ -248,9 +254,8 @@ private:
ASTPtr innerQueryParser(const ASTSelectQuery & query);
void eventTimeParser(const ASTCreateQuery & query);
ASTPtr initInnerQuery(ASTSelectQuery query, ContextPtr context);
std::shared_ptr<ASTCreateQuery> getInnerTableCreateQuery(
const ASTPtr & inner_query, ASTStorage * storage, const String & database_name, const String & table_name);
UInt32 getCleanupBound();
ASTPtr getCleanupQuery();
@ -267,6 +272,7 @@ private:
void updateMaxTimestamp(UInt32 timestamp);
ASTPtr getFinalQuery() const { return final_query->clone(); }
ASTPtr getInnerTableCreateQuery(const ASTPtr & inner_query, const StorageID & inner_table_id);
StoragePtr getSourceTable() const;
StoragePtr getInnerTable() const;

View File

@ -0,0 +1,9 @@
1 1 1990-01-01 12:00:05
1 2 1990-01-01 12:00:05
1 3 1990-01-01 12:00:05
----ALTER TABLE...MODIFY QUERY----
1 1 1990-01-01 12:00:05
1 2 1990-01-01 12:00:05
1 3 1990-01-01 12:00:05
1 12 1990-01-01 12:00:15
1 14 1990-01-01 12:00:15

View File

@ -0,0 +1,47 @@
#!/usr/bin/env bash
CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
# shellcheck source=../shell_config.sh
. "$CURDIR"/../shell_config.sh
$CLICKHOUSE_CLIENT --multiquery <<EOF
SET allow_experimental_window_view = 1;
DROP TABLE IF EXISTS mt;
DROP TABLE IF EXISTS dst;
DROP TABLE IF EXISTS wv;
CREATE TABLE dst(count UInt64, market Int64, w_end DateTime) Engine=MergeTree ORDER BY tuple();
CREATE TABLE mt(a Int32, market Int64, timestamp DateTime) ENGINE=MergeTree ORDER BY tuple();
CREATE WINDOW VIEW wv TO dst WATERMARK=ASCENDING AS SELECT count(a) AS count, market, tumbleEnd(wid) AS w_end FROM mt GROUP BY tumble(timestamp, INTERVAL '5' SECOND, 'US/Samoa') AS wid, market;
INSERT INTO mt VALUES (1, 1, '1990/01/01 12:00:00');
INSERT INTO mt VALUES (1, 2, '1990/01/01 12:00:01');
INSERT INTO mt VALUES (1, 3, '1990/01/01 12:00:02');
INSERT INTO mt VALUES (1, 4, '1990/01/01 12:00:05');
INSERT INTO mt VALUES (1, 5, '1990/01/01 12:00:06');
EOF
while true; do
$CLICKHOUSE_CLIENT --query="SELECT count(*) FROM dst" | grep -q "3" && break || sleep .5 ||:
done
$CLICKHOUSE_CLIENT --query="SELECT * FROM dst ORDER BY market, w_end;"
$CLICKHOUSE_CLIENT --query="SELECT '----ALTER TABLE...MODIFY QUERY----';"
$CLICKHOUSE_CLIENT --multiquery <<EOF
ALTER TABLE wv MODIFY QUERY SELECT count(a) AS count, market * 2, tumbleEnd(wid) AS w_end FROM mt GROUP BY tumble(timestamp, INTERVAL '5' SECOND, 'US/Samoa') AS wid, market;
INSERT INTO mt VALUES (1, 6, '1990/01/01 12:00:10');
INSERT INTO mt VALUES (1, 7, '1990/01/01 12:00:11');
INSERT INTO mt VALUES (1, 8, '1990/01/01 12:00:30');
EOF
while true; do
$CLICKHOUSE_CLIENT --query="SELECT count(*) FROM dst" | grep -q "5" && break || sleep .5 ||:
done
$CLICKHOUSE_CLIENT --query="SELECT * FROM dst ORDER BY market, w_end;"
$CLICKHOUSE_CLIENT --query="DROP TABLE wv"
$CLICKHOUSE_CLIENT --query="DROP TABLE mt"
$CLICKHOUSE_CLIENT --query="DROP TABLE dst"

View File

@ -0,0 +1,9 @@
1 1 1990-01-01 12:00:05
1 2 1990-01-01 12:00:05
1 3 1990-01-01 12:00:05
----ALTER TABLE...MODIFY QUERY----
1 1 1990-01-01 12:00:05
1 2 1990-01-01 12:00:05
1 3 1990-01-01 12:00:05
1 12 1990-01-01 12:00:15
1 14 1990-01-01 12:00:15

View File

@ -0,0 +1,50 @@
#!/usr/bin/env bash
CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
# shellcheck source=../shell_config.sh
. "$CURDIR"/../shell_config.sh
$CLICKHOUSE_CLIENT --multiquery <<EOF
SET allow_experimental_window_view = 1;
DROP TABLE IF EXISTS mt;
DROP TABLE IF EXISTS mt2;
DROP TABLE IF EXISTS dst;
DROP TABLE IF EXISTS wv;
CREATE TABLE dst(count UInt64, market Int64, w_end DateTime) Engine=MergeTree ORDER BY tuple();
CREATE TABLE mt(a Int32, market Int64, timestamp DateTime) ENGINE=MergeTree ORDER BY tuple();
CREATE TABLE mt2(a Int32, market Int64, timestamp DateTime) ENGINE=MergeTree ORDER BY tuple();
CREATE WINDOW VIEW wv TO dst WATERMARK=ASCENDING AS SELECT count(a) AS count, market, tumbleEnd(wid) AS w_end FROM mt GROUP BY tumble(timestamp, INTERVAL '5' SECOND, 'US/Samoa') AS wid, market;
INSERT INTO mt VALUES (1, 1, '1990/01/01 12:00:00');
INSERT INTO mt VALUES (1, 2, '1990/01/01 12:00:01');
INSERT INTO mt VALUES (1, 3, '1990/01/01 12:00:02');
INSERT INTO mt VALUES (1, 4, '1990/01/01 12:00:05');
INSERT INTO mt VALUES (1, 5, '1990/01/01 12:00:06');
EOF
while true; do
$CLICKHOUSE_CLIENT --query="SELECT count(*) FROM dst" | grep -q "3" && break || sleep .5 ||:
done
$CLICKHOUSE_CLIENT --query="SELECT * FROM dst ORDER BY market, w_end;"
$CLICKHOUSE_CLIENT --query="SELECT '----ALTER TABLE...MODIFY QUERY----';"
$CLICKHOUSE_CLIENT --multiquery <<EOF
ALTER TABLE wv MODIFY QUERY SELECT count(a) AS count, market * 2, tumbleEnd(wid) AS w_end FROM mt2 GROUP BY tumble(timestamp, INTERVAL '5' SECOND, 'US/Samoa') AS wid, market;
INSERT INTO mt2 VALUES (1, 6, '1990/01/01 12:00:10');
INSERT INTO mt2 VALUES (1, 7, '1990/01/01 12:00:11');
INSERT INTO mt2 VALUES (1, 8, '1990/01/01 12:00:30');
EOF
while true; do
$CLICKHOUSE_CLIENT --query="SELECT count(*) FROM dst" | grep -q "5" && break || sleep .5 ||:
done
$CLICKHOUSE_CLIENT --query="SELECT * FROM dst ORDER BY market, w_end;"
$CLICKHOUSE_CLIENT --query="DROP TABLE wv"
$CLICKHOUSE_CLIENT --query="DROP TABLE mt"
$CLICKHOUSE_CLIENT --query="DROP TABLE mt2"
$CLICKHOUSE_CLIENT --query="DROP TABLE dst"

View File

@ -0,0 +1,95 @@
#!/usr/bin/env python3
# Tags: no-parallel
import os
import sys
CURDIR = os.path.dirname(os.path.realpath(__file__))
sys.path.insert(0, os.path.join(CURDIR, "helpers"))
from client import client, prompt, end_of_block
log = None
# uncomment the line below for debugging
# log=sys.stdout
with client(name="client1>", log=log) as client1, client(
name="client2>", log=log
) as client2, client(name="client3>", log=log) as client3:
client1.expect(prompt)
client2.expect(prompt)
client3.expect(prompt)
client1.send("SET allow_experimental_window_view = 1")
client1.expect(prompt)
client1.send("SET window_view_heartbeat_interval = 1")
client1.expect(prompt)
client2.send("SET allow_experimental_window_view = 1")
client2.expect(prompt)
client3.send("SET allow_experimental_window_view = 1")
client3.expect(prompt)
client3.send("SET window_view_heartbeat_interval = 1")
client3.expect(prompt)
client1.send("CREATE DATABASE IF NOT EXISTS 01078_window_view_alter_query_watch")
client1.expect(prompt)
client1.send("DROP TABLE IF EXISTS 01078_window_view_alter_query_watch.mt NO DELAY")
client1.expect(prompt)
client1.send("DROP TABLE IF EXISTS 01078_window_view_alter_query_watch.wv NO DELAY")
client1.expect(prompt)
client1.send(
"CREATE TABLE 01078_window_view_alter_query_watch.mt(a Int32, timestamp DateTime) ENGINE=MergeTree ORDER BY tuple()"
)
client1.expect(prompt)
client1.send(
"CREATE WINDOW VIEW 01078_window_view_alter_query_watch.wv ENGINE Memory WATERMARK=ASCENDING AS SELECT count(a) AS count, hopEnd(wid) AS w_end FROM 01078_window_view_alter_query_watch.mt GROUP BY hop(timestamp, INTERVAL '2' SECOND, INTERVAL '3' SECOND, 'US/Samoa') AS wid"
)
client1.expect(prompt)
client1.send("WATCH 01078_window_view_alter_query_watch.wv")
client1.expect("Query id" + end_of_block)
client1.expect("Progress: 0.00 rows.*\)")
client2.send(
"INSERT INTO 01078_window_view_alter_query_watch.mt VALUES (1, toDateTime('1990/01/01 12:00:00', 'US/Samoa'));"
)
client2.expect("Ok.")
client2.send(
"INSERT INTO 01078_window_view_alter_query_watch.mt VALUES (1, toDateTime('1990/01/01 12:00:05', 'US/Samoa'));"
)
client2.expect("Ok.")
client1.expect("1" + end_of_block)
client1.expect("Progress: 1.00 rows.*\)")
client2.send(
"ALTER TABLE 01078_window_view_alter_query_watch.wv MODIFY QUERY SELECT count(a) * 2 AS count, hopEnd(wid) AS w_end FROM 01078_window_view_alter_query_watch.mt GROUP BY hop(timestamp, INTERVAL '2' SECOND, INTERVAL '3' SECOND, 'US/Samoa') AS wid"
)
client2.expect("Ok.")
client2.expect(prompt)
client1.expect("1 row" + end_of_block)
client1.expect(prompt)
client3.send("WATCH 01078_window_view_alter_query_watch.wv")
client3.expect("Query id" + end_of_block)
client3.expect("Progress: 0.00 rows.*\)")
client2.send(
"INSERT INTO 01078_window_view_alter_query_watch.mt VALUES (1, toDateTime('1990/01/01 12:00:06', 'US/Samoa'));"
)
client2.expect("Ok.")
client2.send(
"INSERT INTO 01078_window_view_alter_query_watch.mt VALUES (1, toDateTime('1990/01/01 12:00:10', 'US/Samoa'));"
)
client2.expect("Ok.")
client3.expect("2" + end_of_block)
client3.expect("Progress: 1.00 rows.*\)")
# send Ctrl-C
client3.send("\x03", eol="")
match = client3.expect("(%s)|([#\$] )" % prompt)
if match.groups()[1]:
client3.send(client3.command)
client3.expect(prompt)
client3.send("DROP TABLE 01078_window_view_alter_query_watch.wv NO DELAY;")
client3.expect(prompt)
client3.send("DROP TABLE 01078_window_view_alter_query_watch.mt;")
client3.expect(prompt)
client3.send("DROP DATABASE IF EXISTS 01078_window_view_alter_query_watch;")
client3.expect(prompt)

View File

@ -0,0 +1,11 @@
0 gY
1 jR
2 k5
3 l5
4 mO
0 pbgkmdljlpjoapne
1 akemglnjepjpodba
2 obmgndljgajpkeao
3 dldokmpjpgjgeanb
4 nkdlpgajngjnobme
YQrvD5XGvbx

View File

@ -0,0 +1,5 @@
SET allow_experimental_hash_functions = 1;
select number, hashid(number) from system.numbers limit 5;
select number, hashid(number, 's3cr3t', 16, 'abcdefghijklmnop') from system.numbers limit 5;
select hashid(1234567890123456, 's3cr3t');

View File

@ -0,0 +1,25 @@
-- Tags: no-backward-compatibility-check:22.5.1
SELECT
number
FROM
numbers(10)
GROUP BY
GROUPING SETS
(
number,
number % 2
)
WITH ROLLUP; -- { serverError NOT_IMPLEMENTED }
SELECT
number
FROM
numbers(10)
GROUP BY
GROUPING SETS
(
number,
number % 2
)
WITH CUBE; -- { serverError NOT_IMPLEMENTED }

View File

@ -0,0 +1,6 @@
2
4
6
8
x Nullable(String)
x Nullable(String)

View File

@ -0,0 +1,18 @@
#!/usr/bin/env bash
# Tags: no-fasttest
CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
# shellcheck source=../shell_config.sh
. "$CUR_DIR"/../shell_config.sh
$CLICKHOUSE_CLIENT -q "insert into function file(data1.jsonl) select NULL as x from numbers(10) settings engine_file_truncate_on_insert=1"
$CLICKHOUSE_CLIENT -q "insert into function file(data2.jsonl) select NULL as x from numbers(10) settings engine_file_truncate_on_insert=1"
$CLICKHOUSE_CLIENT -q "insert into function file(data3.jsonl) select NULL as x from numbers(10) settings engine_file_truncate_on_insert=1"
$CLICKHOUSE_CLIENT -q "insert into function file(data4.jsonl) select number % 2 ? number : NULL as x from numbers(10) settings engine_file_truncate_on_insert=1"
$CLICKHOUSE_CLIENT -q "desc file('data*.jsonl') settings input_format_max_rows_to_read_for_schema_inference=8" 2>&1 | grep -c 'ONLY_NULLS_WHILE_READING_SCHEMA';
$CLICKHOUSE_CLIENT -q "desc file('data*.jsonl') settings input_format_max_rows_to_read_for_schema_inference=16" 2>&1 | grep -c 'ONLY_NULLS_WHILE_READING_SCHEMA';
$CLICKHOUSE_CLIENT -q "desc file('data*.jsonl') settings input_format_max_rows_to_read_for_schema_inference=24" 2>&1 | grep -c 'ONLY_NULLS_WHILE_READING_SCHEMA';
$CLICKHOUSE_CLIENT -q "desc file('data*.jsonl') settings input_format_max_rows_to_read_for_schema_inference=31" 2>&1 | grep -c 'ONLY_NULLS_WHILE_READING_SCHEMA';
$CLICKHOUSE_CLIENT -q "desc file('data*.jsonl') settings input_format_max_rows_to_read_for_schema_inference=32"
$CLICKHOUSE_CLIENT -q "desc file('data*.jsonl') settings input_format_max_rows_to_read_for_schema_inference=100"