diff --git a/cmake/limit_jobs.cmake b/cmake/limit_jobs.cmake index 3a33b3b9989..acc38b6fa2a 100644 --- a/cmake/limit_jobs.cmake +++ b/cmake/limit_jobs.cmake @@ -1,43 +1,38 @@ -# Usage: -# set (MAX_COMPILER_MEMORY 2000 CACHE INTERNAL "") # In megabytes -# set (MAX_LINKER_MEMORY 3500 CACHE INTERNAL "") -# include (cmake/limit_jobs.cmake) +# Limit compiler/linker job concurrency to avoid OOMs on subtrees where compilation/linking is memory-intensive. +# +# Usage from CMake: +# set (MAX_COMPILER_MEMORY 2000 CACHE INTERNAL "") # megabyte +# set (MAX_LINKER_MEMORY 3500 CACHE INTERNAL "") # megabyte +# include (cmake/limit_jobs.cmake) +# +# (bigger values mean fewer jobs) -cmake_host_system_information(RESULT TOTAL_PHYSICAL_MEMORY QUERY TOTAL_PHYSICAL_MEMORY) # Not available under freebsd +cmake_host_system_information(RESULT TOTAL_PHYSICAL_MEMORY QUERY TOTAL_PHYSICAL_MEMORY) cmake_host_system_information(RESULT NUMBER_OF_LOGICAL_CORES QUERY NUMBER_OF_LOGICAL_CORES) -# 1 if not set -option(PARALLEL_COMPILE_JOBS "Maximum number of concurrent compilation jobs" "") +# Set to disable the automatic job-limiting +option(PARALLEL_COMPILE_JOBS "Maximum number of concurrent compilation jobs" OFF) +option(PARALLEL_LINK_JOBS "Maximum number of concurrent link jobs" OFF) -# 1 if not set -option(PARALLEL_LINK_JOBS "Maximum number of concurrent link jobs" "") - -if (NOT PARALLEL_COMPILE_JOBS AND TOTAL_PHYSICAL_MEMORY AND MAX_COMPILER_MEMORY) +if (NOT PARALLEL_COMPILE_JOBS AND MAX_COMPILER_MEMORY) math(EXPR PARALLEL_COMPILE_JOBS ${TOTAL_PHYSICAL_MEMORY}/${MAX_COMPILER_MEMORY}) if (NOT PARALLEL_COMPILE_JOBS) set (PARALLEL_COMPILE_JOBS 1) endif () - if (NOT NUMBER_OF_LOGICAL_CORES OR PARALLEL_COMPILE_JOBS LESS NUMBER_OF_LOGICAL_CORES) - set (PARALLEL_COMPILE_JOBS_LESS TRUE) + if (PARALLEL_COMPILE_JOBS LESS NUMBER_OF_LOGICAL_CORES) + message(WARNING "The auto-calculated compile jobs limit (${PARALLEL_COMPILE_JOBS}) underutilizes CPU cores (${NUMBER_OF_LOGICAL_CORES}). Set PARALLEL_COMPILE_JOBS to override.") endif() endif () -if (PARALLEL_COMPILE_JOBS AND (NOT NUMBER_OF_LOGICAL_CORES OR PARALLEL_COMPILE_JOBS LESS NUMBER_OF_LOGICAL_CORES)) - set(CMAKE_JOB_POOL_COMPILE compile_job_pool${CMAKE_CURRENT_SOURCE_DIR}) - string (REGEX REPLACE "[^a-zA-Z0-9]+" "_" CMAKE_JOB_POOL_COMPILE ${CMAKE_JOB_POOL_COMPILE}) - set_property(GLOBAL APPEND PROPERTY JOB_POOLS ${CMAKE_JOB_POOL_COMPILE}=${PARALLEL_COMPILE_JOBS}) -endif () - - -if (NOT PARALLEL_LINK_JOBS AND TOTAL_PHYSICAL_MEMORY AND MAX_LINKER_MEMORY) +if (NOT PARALLEL_LINK_JOBS AND MAX_LINKER_MEMORY) math(EXPR PARALLEL_LINK_JOBS ${TOTAL_PHYSICAL_MEMORY}/${MAX_LINKER_MEMORY}) if (NOT PARALLEL_LINK_JOBS) set (PARALLEL_LINK_JOBS 1) endif () - if (NOT NUMBER_OF_LOGICAL_CORES OR PARALLEL_LINK_JOBS LESS NUMBER_OF_LOGICAL_CORES) - set (PARALLEL_LINK_JOBS_LESS TRUE) + if (PARALLEL_LINK_JOBS LESS NUMBER_OF_LOGICAL_CORES) + message(WARNING "The auto-calculated link jobs limit (${PARALLEL_LINK_JOBS}) underutilizes CPU cores (${NUMBER_OF_LOGICAL_CORES}). Set PARALLEL_LINK_JOBS to override.") endif() endif () @@ -52,20 +47,16 @@ if (CMAKE_BUILD_TYPE_UC STREQUAL "RELWITHDEBINFO" AND ENABLE_THINLTO AND PARALLE set (PARALLEL_LINK_JOBS 2) endif() -if (PARALLEL_LINK_JOBS AND (NOT NUMBER_OF_LOGICAL_CORES OR PARALLEL_LINK_JOBS LESS NUMBER_OF_LOGICAL_CORES)) +message(STATUS "Building sub-tree with ${PARALLEL_COMPILE_JOBS} compile jobs and ${PARALLEL_LINK_JOBS} linker jobs (system: ${NUMBER_OF_LOGICAL_CORES} cores, ${TOTAL_PHYSICAL_MEMORY} MB DRAM, 'OFF' means the native core count).") + +if (PARALLEL_COMPILE_JOBS LESS NUMBER_OF_LOGICAL_CORES) + set(CMAKE_JOB_POOL_COMPILE compile_job_pool${CMAKE_CURRENT_SOURCE_DIR}) + string (REGEX REPLACE "[^a-zA-Z0-9]+" "_" CMAKE_JOB_POOL_COMPILE ${CMAKE_JOB_POOL_COMPILE}) + set_property(GLOBAL APPEND PROPERTY JOB_POOLS ${CMAKE_JOB_POOL_COMPILE}=${PARALLEL_COMPILE_JOBS}) +endif () + +if (PARALLEL_LINK_JOBS LESS NUMBER_OF_LOGICAL_CORES) set(CMAKE_JOB_POOL_LINK link_job_pool${CMAKE_CURRENT_SOURCE_DIR}) string (REGEX REPLACE "[^a-zA-Z0-9]+" "_" CMAKE_JOB_POOL_LINK ${CMAKE_JOB_POOL_LINK}) set_property(GLOBAL APPEND PROPERTY JOB_POOLS ${CMAKE_JOB_POOL_LINK}=${PARALLEL_LINK_JOBS}) endif () - -if (PARALLEL_COMPILE_JOBS OR PARALLEL_LINK_JOBS) - message(STATUS - "${CMAKE_CURRENT_SOURCE_DIR}: Have ${TOTAL_PHYSICAL_MEMORY} megabytes of memory. - Limiting concurrent linkers jobs to ${PARALLEL_LINK_JOBS} and compiler jobs to ${PARALLEL_COMPILE_JOBS} (system has ${NUMBER_OF_LOGICAL_CORES} logical cores)") - if (PARALLEL_COMPILE_JOBS_LESS) - message(WARNING "The autocalculated compile jobs limit (${PARALLEL_COMPILE_JOBS}) underutilizes CPU cores (${NUMBER_OF_LOGICAL_CORES}). Set PARALLEL_COMPILE_JOBS to override.") - endif() - if (PARALLEL_LINK_JOBS_LESS) - message(WARNING "The autocalculated link jobs limit (${PARALLEL_LINK_JOBS}) underutilizes CPU cores (${NUMBER_OF_LOGICAL_CORES}). Set PARALLEL_LINK_JOBS to override.") - endif() -endif () diff --git a/docs/en/operations/server-configuration-parameters/settings.md b/docs/en/operations/server-configuration-parameters/settings.md index 82dac74e647..a6ae517e401 100644 --- a/docs/en/operations/server-configuration-parameters/settings.md +++ b/docs/en/operations/server-configuration-parameters/settings.md @@ -1,11 +1,11 @@ --- slug: /en/operations/server-configuration-parameters/settings sidebar_position: 57 -sidebar_label: Server Settings +sidebar_label: Global Server Settings description: This section contains descriptions of server settings that cannot be changed at the session or query level. --- -# Server Settings +# Global Server Settings This section contains descriptions of server settings that cannot be changed at the session or query level. diff --git a/docs/en/operations/settings/index.md b/docs/en/operations/settings/index.md index eb1d5db5676..6863d7f3191 100644 --- a/docs/en/operations/settings/index.md +++ b/docs/en/operations/settings/index.md @@ -7,90 +7,16 @@ pagination_next: en/operations/settings/settings # Settings Overview -There are multiple ways to define ClickHouse settings. Settings are configured in layers, and each subsequent layer redefines the previous values of a setting. +There are two main groups of ClickHouse settings: -The order of priority for defining a setting is: +- Global server settings +- Query-level settings -1. Settings in the `users.xml` server configuration file +The main distinction between global server settings and query-level settings is that +global server settings must be set in configuration files while query-level settings +can be set in configuration files or with SQL queries. - - Set in the element ``. +Read about [global server settings](/docs/en/operations/server-configuration-parameters/settings.md) to learn more about configuring your ClickHouse server at the global server level. -2. Session settings +Read about [query-level settings](/docs/en/operations/settings/settings-query-level.md) to learn more about configuring your ClickHouse server at the query-level. - - Send `SET setting=value` from the ClickHouse console client in interactive mode. - Similarly, you can use ClickHouse sessions in the HTTP protocol. To do this, you need to specify the `session_id` HTTP parameter. - -3. Query settings - - - When starting the ClickHouse console client in non-interactive mode, set the startup parameter `--setting=value`. - - When using the HTTP API, pass CGI parameters (`URL?setting_1=value&setting_2=value...`). - - Define settings in the [SETTINGS](../../sql-reference/statements/select/index.md#settings-in-select-query) clause of the SELECT query. The setting value is applied only to that query and is reset to the default or previous value after the query is executed. - -View the [Settings](./settings.md) page for a description of the ClickHouse settings. - -## Converting a Setting to its Default Value - -If you change a setting and would like to revert it back to its default value, set the value to `DEFAULT`. The syntax looks like: - -```sql -SET setting_name = DEFAULT -``` - -For example, the default value of `max_insert_block_size` is 1048449. Suppose you change its value to 100000: - -```sql -SET max_insert_block_size=100000; - -SELECT value FROM system.settings where name='max_insert_block_size'; -``` - -The response is: - -```response -┌─value──┐ -│ 100000 │ -└────────┘ -``` - -The following command sets its value back to 1048449: - -```sql -SET max_insert_block_size=DEFAULT; - -SELECT value FROM system.settings where name='max_insert_block_size'; -``` - -The setting is now back to its default: - -```response -┌─value───┐ -│ 1048449 │ -└─────────┘ -``` - - -## Custom Settings {#custom_settings} - -In addition to the common [settings](../../operations/settings/settings.md), users can define custom settings. - -A custom setting name must begin with one of predefined prefixes. The list of these prefixes must be declared in the [custom_settings_prefixes](../../operations/server-configuration-parameters/settings.md#custom_settings_prefixes) parameter in the server configuration file. - -```xml -custom_ -``` - -To define a custom setting use `SET` command: - -```sql -SET custom_a = 123; -``` - -To get the current value of a custom setting use `getSetting()` function: - -```sql -SELECT getSetting('custom_a'); -``` - -**See Also** - -- [Server Configuration Settings](../../operations/server-configuration-parameters/settings.md) diff --git a/docs/en/operations/settings/settings-query-level.md b/docs/en/operations/settings/settings-query-level.md new file mode 100644 index 00000000000..81cc2294a4c --- /dev/null +++ b/docs/en/operations/settings/settings-query-level.md @@ -0,0 +1,217 @@ +--- +sidebar_label: Query-level Settings +title: Query-level Settings +slug: /en/operations/settings/query-level +--- + +There are multiple ways to set ClickHouse query-level settings. Settings are configured in layers, and each subsequent layer redefines the previous values of a setting. + +The order of priority for defining a setting is: + +1. Applying a setting to a user directly, or within a settings profile + + - SQL (recommended) + - adding one or more XML or YAML files to `/etc/clickhouse-server/users.d` + +2. Session settings + + - Send `SET setting=value` from the ClickHouse Cloud SQL console or + `clickhouse client` in interactive mode. Similarly, you can use ClickHouse + sessions in the HTTP protocol. To do this, you need to specify the + `session_id` HTTP parameter. + +3. Query settings + + - When starting `clickhouse client` in non-interactive mode, set the startup + parameter `--setting=value`. + - When using the HTTP API, pass CGI parameters (`URL?setting_1=value&setting_2=value...`). + - Define settings in the + [SETTINGS](../../sql-reference/statements/select/index.md#settings-in-select-query) + clause of the SELECT query. The setting value is applied only to that query + and is reset to the default or previous value after the query is executed. + +## Examples + +These examples all set the value of the `async_insert` setting to `1`, and +show how to examine the settings in a running system. + +### Using SQL to apply a setting to a user directly + +This creates the user `ingester` with the setting `async_inset = 1`: + +```sql +CREATE USER ingester +IDENTIFIED WITH sha256_hash BY '7e099f39b84ea79559b3e85ea046804e63725fd1f46b37f281276aae20f86dc3' +# highlight-next-line +SETTINGS async_insert = 1 +``` + +#### Examine the settings profile and assignment + +```sql +SHOW ACCESS +``` + +```response +┌─ACCESS─────────────────────────────────────────────────────────────────────────────┐ +│ ... │ +# highlight-next-line +│ CREATE USER ingester IDENTIFIED WITH sha256_password SETTINGS async_insert = true │ +│ ... │ +└────────────────────────────────────────────────────────────────────────────────────┘ +``` +### Using SQL to create a settings profile and assign to a user + +This creates the profile `log_ingest` with the setting `async_inset = 1`: + +```sql +CREATE +SETTINGS PROFILE log_ingest SETTINGS async_insert = 1 +``` + +This creates the user `ingester` and assigns the user the settings profile `log_ingest`: + +```sql +CREATE USER ingester +IDENTIFIED WITH sha256_hash BY '7e099f39b84ea79559b3e85ea046804e63725fd1f46b37f281276aae20f86dc3' +# highlight-next-line +SETTINGS PROFILE log_ingest +``` + + +### Using XML to create a settings profile and user + +```xml title=/etc/clickhouse-server/users.d/users.xml + +# highlight-start + + + 1 + + +# highlight-end + + + + 7e099f39b84ea79559b3e85ea046804e63725fd1f46b37f281276aae20f86dc3 +# highlight-start + log_ingest +# highlight-end + + + 7e099f39b84ea79559b3e85ea046804e63725fd1f46b37f281276aae20f86dc3 + 1 + 1 + + + +``` + +#### Examine the settings profile and assignment + +```sql +SHOW ACCESS +``` + +```response +┌─ACCESS─────────────────────────────────────────────────────────────────────────────┐ +│ CREATE USER default IDENTIFIED WITH sha256_password │ +# highlight-next-line +│ CREATE USER ingester IDENTIFIED WITH sha256_password SETTINGS PROFILE log_ingest │ +│ CREATE SETTINGS PROFILE default │ +# highlight-next-line +│ CREATE SETTINGS PROFILE log_ingest SETTINGS async_insert = true │ +│ CREATE SETTINGS PROFILE readonly SETTINGS readonly = 1 │ +│ ... │ +└────────────────────────────────────────────────────────────────────────────────────┘ +``` + +### Assign a setting to a session + +```sql +SET async_insert =1; +SELECT value FROM system.settings where name='async_insert'; +``` + +```response +┌─value──┐ +│ 1 │ +└────────┘ +``` + +### Assign a setting during a query + +```sql +INSERT INTO YourTable +# highlight-next-line +SETTINGS async_insert=1 +VALUES (...) +``` + + +## Converting a Setting to its Default Value + +If you change a setting and would like to revert it back to its default value, set the value to `DEFAULT`. The syntax looks like: + +```sql +SET setting_name = DEFAULT +``` + +For example, the default value of `async_insert` is `0`. Suppose you change its value to `1`: + +```sql +SET async_insert = 1; + +SELECT value FROM system.settings where name='async_insert'; +``` + +The response is: + +```response +┌─value──┐ +│ 1 │ +└────────┘ +``` + +The following command sets its value back to 0: + +```sql +SET async_insert = DEFAULT; + +SELECT value FROM system.settings where name='async_insert'; +``` + +The setting is now back to its default: + +```response +┌─value───┐ +│ 0 │ +└─────────┘ +``` + +## Custom Settings {#custom_settings} + +In addition to the common [settings](../../operations/settings/settings.md), users can define custom settings. + +A custom setting name must begin with one of predefined prefixes. The list of these prefixes must be declared in the [custom_settings_prefixes](../../operations/server-configuration-parameters/settings.md#custom_settings_prefixes) parameter in the server configuration file. + +```xml +custom_ +``` + +To define a custom setting use `SET` command: + +```sql +SET custom_a = 123; +``` + +To get the current value of a custom setting use `getSetting()` function: + +```sql +SELECT getSetting('custom_a'); +``` + +**See Also** + +- View the [Settings](./settings.md) page for a description of the ClickHouse settings. +- [Global server settings](../../operations/server-configuration-parameters/settings.md) diff --git a/docs/en/sql-reference/functions/array-functions.md b/docs/en/sql-reference/functions/array-functions.md index 7f2b8f3c605..44d385312d0 100644 --- a/docs/en/sql-reference/functions/array-functions.md +++ b/docs/en/sql-reference/functions/array-functions.md @@ -102,6 +102,8 @@ The function also works for strings. Can be optimized by enabling the [optimize_functions_to_subcolumns](../../operations/settings/settings.md#optimize-functions-to-subcolumns) setting. With `optimize_functions_to_subcolumns = 1` the function reads only [size0](../../sql-reference/data-types/array.md#array-size) subcolumn instead of reading and processing the whole array column. The query `SELECT length(arr) FROM table` transforms to `SELECT arr.size0 FROM TABLE`. +Alias: `OCTET_LENGTH` + ## emptyArrayUInt8, emptyArrayUInt16, emptyArrayUInt32, emptyArrayUInt64 ## emptyArrayInt8, emptyArrayInt16, emptyArrayInt32, emptyArrayInt64 @@ -142,6 +144,7 @@ range([start, ] end [, step]) - All arguments `start`, `end`, `step` must be below data types: `UInt8`, `UInt16`, `UInt32`, `UInt64`,`Int8`, `Int16`, `Int32`, `Int64`, as well as elements of the returned array, which's type is a super type of all arguments. - An exception is thrown if query results in arrays with a total length of more than number of elements specified by the [function_range_max_elements_in_block](../../operations/settings/settings.md#settings-function_range_max_elements_in_block) setting. +- Returns Null if any argument has Nullable(Nothing) type. An exception is thrown if any argument has Null value (Nullable(T) type). **Examples** @@ -878,7 +881,7 @@ A special function. See the section [“ArrayJoin function”](../../sql-referen ## arrayDifference -Calculates an array of differences between adjacent array elements. The first element of the result array will be 0, the second `a[1] - a[0]`, the third `a[2] - a[1]`, etc. The type of elements in the result array is determined by the type inference rules for subtraction (e.g. `UInt8` - `UInt8` = `Int16`). +Calculates an array of differences between adjacent array elements. The first element of the result array will be 0, the second `a[1] - a[0]`, the third `a[2] - a[1]`, etc. The type of elements in the result array is determined by the type inference rules for subtraction (e.g. `UInt8` - `UInt8` = `Int16`). **Syntax** @@ -996,6 +999,24 @@ SELECT └──────────────┴───────────┘ ``` +## arrayJaccardIndex + +Returns the [Jaccard index](https://en.wikipedia.org/wiki/Jaccard_index) of two arrays. + +**Example** + +Query: +``` sql +SELECT arrayJaccardIndex([1, 2], [2, 3]) AS res +``` + +Result: +``` text +┌─res────────────────┐ +│ 0.3333333333333333 │ +└────────────────────┘ +``` + ## arrayReduce Applies an aggregate function to array elements and returns its result. The name of the aggregation function is passed as a string in single quotes `'max'`, `'sum'`. When using parametric aggregate functions, the parameter is indicated after the function name in parentheses `'uniqUpTo(6)'`. diff --git a/docs/en/sql-reference/functions/string-functions.md b/docs/en/sql-reference/functions/string-functions.md index 5175bbf0615..4f174a53ad6 100644 --- a/docs/en/sql-reference/functions/string-functions.md +++ b/docs/en/sql-reference/functions/string-functions.md @@ -90,6 +90,8 @@ Returns the length of a string in bytes (not: in characters or Unicode code poin The function also works for arrays. +Alias: `OCTET_LENGTH` + ## lengthUTF8 Returns the length of a string in Unicode code points (not: in bytes or characters). It assumes that the string contains valid UTF-8 encoded text. If this assumption is violated, no exception is thrown and the result is undefined. @@ -1253,3 +1255,15 @@ Result: │ A240 │ └──────────────────┘ ``` + +## initcap + +Convert the first letter of each word to upper case and the rest to lower case. Words are sequences of alphanumeric characters separated by non-alphanumeric characters. + +## initcapUTF8 + +Like [initcap](#initcap), assuming that the string contains valid UTF-8 encoded text. If this assumption is violated, no exception is thrown and the result is undefined. + +Does not detect the language, e.g. for Turkish the result might not be exactly correct (i/İ vs. i/I). + +If the length of the UTF-8 byte sequence is different for upper and lower case of a code point, the result may be incorrect for this code point. diff --git a/docs/ru/sql-reference/functions/array-functions.md b/docs/ru/sql-reference/functions/array-functions.md index c43323d68fd..439eddfd752 100644 --- a/docs/ru/sql-reference/functions/array-functions.md +++ b/docs/ru/sql-reference/functions/array-functions.md @@ -145,6 +145,8 @@ range([start, ] end [, step]) - Если в результате запроса создаются массивы суммарной длиной больше, чем количество элементов, указанное настройкой [function_range_max_elements_in_block](../../operations/settings/settings.md#settings-function_range_max_elements_in_block), то генерируется исключение. +- Возвращает Null если любой аргумент Nullable(Nothing) типа. Генерируется исключение если любой аргумент Null (Nullable(T) тип). + **Примеры** Запрос: diff --git a/docs/ru/sql-reference/functions/string-functions.md b/docs/ru/sql-reference/functions/string-functions.md index 9638e25d488..b872200f99b 100644 --- a/docs/ru/sql-reference/functions/string-functions.md +++ b/docs/ru/sql-reference/functions/string-functions.md @@ -1113,3 +1113,14 @@ A text with tags . The content within CDATA Do Nothing for 2 Minutes 2:00   ``` + +## initcap {#initcap} + +Переводит первую букву каждого слова в строке в верхний регистр, а остальные — в нижний. Словами считаются последовательности алфавитно-цифровых символов, разделённые любыми другими символами. + +## initcapUTF8 {#initcapUTF8} + +Как [initcap](#initcap), предполагая, что строка содержит набор байтов, представляющий текст в кодировке UTF-8. +Не учитывает язык. То есть, для турецкого языка, результат может быть не совсем верным. +Если длина UTF-8 последовательности байтов различна для верхнего и нижнего регистра кодовой точки, то для этой кодовой точки результат работы может быть некорректным. +Если строка содержит набор байтов, не являющийся UTF-8, то поведение не определено. diff --git a/src/AggregateFunctions/AggregateFunctionBoundingRatio.h b/src/AggregateFunctions/AggregateFunctionBoundingRatio.h index 935adbf2b7d..82e4f1122a8 100644 --- a/src/AggregateFunctions/AggregateFunctionBoundingRatio.h +++ b/src/AggregateFunctions/AggregateFunctionBoundingRatio.h @@ -67,29 +67,38 @@ struct AggregateFunctionBoundingRatioData } } - void serialize(WriteBuffer & buf) const - { - writeBinary(empty, buf); - - if (!empty) - { - writePODBinary(left, buf); - writePODBinary(right, buf); - } - } - - void deserialize(ReadBuffer & buf) - { - readBinary(empty, buf); - - if (!empty) - { - readPODBinary(left, buf); - readPODBinary(right, buf); - } - } + void serialize(WriteBuffer & buf) const; + void deserialize(ReadBuffer & buf); }; +template +inline void transformEndianness(AggregateFunctionBoundingRatioData::Point & p) +{ + transformEndianness(p.x); + transformEndianness(p.y); +} + +void AggregateFunctionBoundingRatioData::serialize(WriteBuffer & buf) const +{ + writeBinaryLittleEndian(empty, buf); + + if (!empty) + { + writeBinaryLittleEndian(left, buf); + writeBinaryLittleEndian(right, buf); + } +} + +void AggregateFunctionBoundingRatioData::deserialize(ReadBuffer & buf) +{ + readBinaryLittleEndian(empty, buf); + + if (!empty) + { + readBinaryLittleEndian(left, buf); + readBinaryLittleEndian(right, buf); + } +} class AggregateFunctionBoundingRatio final : public IAggregateFunctionDataHelper { diff --git a/src/AggregateFunctions/AggregateFunctionDeltaSum.h b/src/AggregateFunctions/AggregateFunctionDeltaSum.h index 199d2706d3a..d64f949825a 100644 --- a/src/AggregateFunctions/AggregateFunctionDeltaSum.h +++ b/src/AggregateFunctions/AggregateFunctionDeltaSum.h @@ -103,18 +103,18 @@ public: void serialize(ConstAggregateDataPtr __restrict place, WriteBuffer & buf, std::optional /* version */) const override { - writeIntBinary(this->data(place).sum, buf); - writeIntBinary(this->data(place).first, buf); - writeIntBinary(this->data(place).last, buf); - writePODBinary(this->data(place).seen, buf); + writeBinaryLittleEndian(this->data(place).sum, buf); + writeBinaryLittleEndian(this->data(place).first, buf); + writeBinaryLittleEndian(this->data(place).last, buf); + writeBinaryLittleEndian(this->data(place).seen, buf); } void deserialize(AggregateDataPtr __restrict place, ReadBuffer & buf, std::optional /* version */, Arena *) const override { - readIntBinary(this->data(place).sum, buf); - readIntBinary(this->data(place).first, buf); - readIntBinary(this->data(place).last, buf); - readPODBinary(this->data(place).seen, buf); + readBinaryLittleEndian(this->data(place).sum, buf); + readBinaryLittleEndian(this->data(place).first, buf); + readBinaryLittleEndian(this->data(place).last, buf); + readBinaryLittleEndian(this->data(place).seen, buf); } void insertResultInto(AggregateDataPtr __restrict place, IColumn & to, Arena *) const override diff --git a/src/AggregateFunctions/AggregateFunctionDeltaSumTimestamp.h b/src/AggregateFunctions/AggregateFunctionDeltaSumTimestamp.h index 5ca07bb0bdf..5eeb1425afb 100644 --- a/src/AggregateFunctions/AggregateFunctionDeltaSumTimestamp.h +++ b/src/AggregateFunctions/AggregateFunctionDeltaSumTimestamp.h @@ -144,22 +144,22 @@ public: void serialize(ConstAggregateDataPtr __restrict place, WriteBuffer & buf, std::optional /* version */) const override { - writeIntBinary(this->data(place).sum, buf); - writeIntBinary(this->data(place).first, buf); - writeIntBinary(this->data(place).first_ts, buf); - writeIntBinary(this->data(place).last, buf); - writeIntBinary(this->data(place).last_ts, buf); - writePODBinary(this->data(place).seen, buf); + writeBinaryLittleEndian(this->data(place).sum, buf); + writeBinaryLittleEndian(this->data(place).first, buf); + writeBinaryLittleEndian(this->data(place).first_ts, buf); + writeBinaryLittleEndian(this->data(place).last, buf); + writeBinaryLittleEndian(this->data(place).last_ts, buf); + writeBinaryLittleEndian(this->data(place).seen, buf); } void deserialize(AggregateDataPtr __restrict place, ReadBuffer & buf, std::optional /* version */, Arena *) const override { - readIntBinary(this->data(place).sum, buf); - readIntBinary(this->data(place).first, buf); - readIntBinary(this->data(place).first_ts, buf); - readIntBinary(this->data(place).last, buf); - readIntBinary(this->data(place).last_ts, buf); - readPODBinary(this->data(place).seen, buf); + readBinaryLittleEndian(this->data(place).sum, buf); + readBinaryLittleEndian(this->data(place).first, buf); + readBinaryLittleEndian(this->data(place).first_ts, buf); + readBinaryLittleEndian(this->data(place).last, buf); + readBinaryLittleEndian(this->data(place).last_ts, buf); + readBinaryLittleEndian(this->data(place).seen, buf); } void insertResultInto(AggregateDataPtr __restrict place, IColumn & to, Arena *) const override diff --git a/src/AggregateFunctions/AggregateFunctionGroupArray.h b/src/AggregateFunctions/AggregateFunctionGroupArray.h index 7a5e6a8cb2d..b5905105457 100644 --- a/src/AggregateFunctions/AggregateFunctionGroupArray.h +++ b/src/AggregateFunctions/AggregateFunctionGroupArray.h @@ -266,19 +266,20 @@ public: void serialize(ConstAggregateDataPtr __restrict place, WriteBuffer & buf, std::optional /* version */) const override { const auto & value = this->data(place).value; - size_t size = value.size(); + const size_t size = value.size(); writeVarUInt(size, buf); - buf.write(reinterpret_cast(value.data()), size * sizeof(value[0])); + for (const auto & element : value) + writeBinaryLittleEndian(element, buf); if constexpr (Trait::last) - DB::writeIntBinary(this->data(place).total_values, buf); + writeBinaryLittleEndian(this->data(place).total_values, buf); if constexpr (Trait::sampler == Sampler::RNG) { - DB::writeIntBinary(this->data(place).total_values, buf); + writeBinaryLittleEndian(this->data(place).total_values, buf); WriteBufferFromOwnString rng_buf; rng_buf << this->data(place).rng; - DB::writeStringBinary(rng_buf.str(), buf); + writeStringBinary(rng_buf.str(), buf); } } @@ -297,16 +298,17 @@ public: auto & value = this->data(place).value; value.resize_exact(size, arena); - buf.readStrict(reinterpret_cast(value.data()), size * sizeof(value[0])); + for (auto & element : value) + readBinaryLittleEndian(element, buf); if constexpr (Trait::last) - DB::readIntBinary(this->data(place).total_values, buf); + readBinaryLittleEndian(this->data(place).total_values, buf); if constexpr (Trait::sampler == Sampler::RNG) { - DB::readIntBinary(this->data(place).total_values, buf); + readBinaryLittleEndian(this->data(place).total_values, buf); std::string rng_string; - DB::readStringBinary(rng_string, buf); + readStringBinary(rng_string, buf); ReadBufferFromString rng_buf(rng_string); rng_buf >> this->data(place).rng; } @@ -603,14 +605,14 @@ public: node->write(buf); if constexpr (Trait::last) - DB::writeIntBinary(data(place).total_values, buf); + writeBinaryLittleEndian(data(place).total_values, buf); if constexpr (Trait::sampler == Sampler::RNG) { - DB::writeIntBinary(data(place).total_values, buf); + writeBinaryLittleEndian(data(place).total_values, buf); WriteBufferFromOwnString rng_buf; rng_buf << data(place).rng; - DB::writeStringBinary(rng_buf.str(), buf); + writeStringBinary(rng_buf.str(), buf); } } @@ -636,13 +638,13 @@ public: value[i] = Node::read(buf, arena); if constexpr (Trait::last) - DB::readIntBinary(data(place).total_values, buf); + readBinaryLittleEndian(data(place).total_values, buf); if constexpr (Trait::sampler == Sampler::RNG) { - DB::readIntBinary(data(place).total_values, buf); + readBinaryLittleEndian(data(place).total_values, buf); std::string rng_string; - DB::readStringBinary(rng_string, buf); + readStringBinary(rng_string, buf); ReadBufferFromString rng_buf(rng_string); rng_buf >> data(place).rng; } diff --git a/src/AggregateFunctions/QuantileApprox.h b/src/AggregateFunctions/QuantileApprox.h index f58f1396fb4..6b2a6cf4398 100644 --- a/src/AggregateFunctions/QuantileApprox.h +++ b/src/AggregateFunctions/QuantileApprox.h @@ -233,35 +233,35 @@ public: void write(WriteBuffer & buf) const { - writeIntBinary(compress_threshold, buf); - writeFloatBinary(relative_error, buf); - writeIntBinary(count, buf); - writeIntBinary(sampled.size(), buf); + writeBinaryLittleEndian(compress_threshold, buf); + writeBinaryLittleEndian(relative_error, buf); + writeBinaryLittleEndian(count, buf); + writeBinaryLittleEndian(sampled.size(), buf); for (const auto & stats : sampled) { - writeFloatBinary(stats.value, buf); - writeIntBinary(stats.g, buf); - writeIntBinary(stats.delta, buf); + writeBinaryLittleEndian(stats.value, buf); + writeBinaryLittleEndian(stats.g, buf); + writeBinaryLittleEndian(stats.delta, buf); } } void read(ReadBuffer & buf) { - readIntBinary(compress_threshold, buf); - readFloatBinary(relative_error, buf); - readIntBinary(count, buf); + readBinaryLittleEndian(compress_threshold, buf); + readBinaryLittleEndian(relative_error, buf); + readBinaryLittleEndian(count, buf); size_t sampled_len = 0; - readIntBinary(sampled_len, buf); + readBinaryLittleEndian(sampled_len, buf); sampled.resize(sampled_len); for (size_t i = 0; i < sampled_len; ++i) { auto stats = sampled[i]; - readFloatBinary(stats.value, buf); - readIntBinary(stats.g, buf); - readIntBinary(stats.delta, buf); + readBinaryLittleEndian(stats.value, buf); + readBinaryLittleEndian(stats.g, buf); + readBinaryLittleEndian(stats.delta, buf); } } diff --git a/src/AggregateFunctions/ReservoirSampler.h b/src/AggregateFunctions/ReservoirSampler.h index 3d723d5aace..7409a3fa0dd 100644 --- a/src/AggregateFunctions/ReservoirSampler.h +++ b/src/AggregateFunctions/ReservoirSampler.h @@ -207,8 +207,8 @@ public: void read(DB::ReadBuffer & buf) { - DB::readIntBinary(sample_count, buf); - DB::readIntBinary(total_values, buf); + DB::readBinaryLittleEndian(sample_count, buf); + DB::readBinaryLittleEndian(total_values, buf); size_t size = std::min(total_values, sample_count); static constexpr size_t MAX_RESERVOIR_SIZE = 1_GiB; @@ -224,22 +224,22 @@ public: rng_buf >> rng; for (size_t i = 0; i < samples.size(); ++i) - DB::readBinary(samples[i], buf); + DB::readBinaryLittleEndian(samples[i], buf); sorted = false; } void write(DB::WriteBuffer & buf) const { - DB::writeIntBinary(sample_count, buf); - DB::writeIntBinary(total_values, buf); + DB::writeBinaryLittleEndian(sample_count, buf); + DB::writeBinaryLittleEndian(total_values, buf); DB::WriteBufferFromOwnString rng_buf; rng_buf << rng; DB::writeStringBinary(rng_buf.str(), buf); for (size_t i = 0; i < std::min(sample_count, total_values); ++i) - DB::writeBinary(samples[i], buf); + DB::writeBinaryLittleEndian(samples[i], buf); } private: diff --git a/src/Analyzer/Passes/QueryAnalysisPass.cpp b/src/Analyzer/Passes/QueryAnalysisPass.cpp index da8933aabaa..abf02547ccd 100644 --- a/src/Analyzer/Passes/QueryAnalysisPass.cpp +++ b/src/Analyzer/Passes/QueryAnalysisPass.cpp @@ -6223,7 +6223,11 @@ void QueryAnalyzer::resolveTableFunction(QueryTreeNodePtr & table_function_node, const auto & insertion_table = scope_context->getInsertionTable(); if (!insertion_table.empty()) { - const auto & insert_structure = DatabaseCatalog::instance().getTable(insertion_table, scope_context)->getInMemoryMetadataPtr()->getColumns(); + const auto & insert_structure = DatabaseCatalog::instance() + .getTable(insertion_table, scope_context) + ->getInMemoryMetadataPtr() + ->getColumns() + .getInsertable(); DB::ColumnsDescription structure_hint; bool use_columns_from_insert_query = true; diff --git a/src/Common/TransformEndianness.hpp b/src/Common/TransformEndianness.hpp index 4d690d75d9e..0a9055dde15 100644 --- a/src/Common/TransformEndianness.hpp +++ b/src/Common/TransformEndianness.hpp @@ -59,4 +59,10 @@ inline void transformEndianness(std::pair & pair) transformEndianness(pair.first); transformEndianness(pair.second); } + +template +inline void transformEndianness(StrongTypedef & x) +{ + transformEndianness(x.toUnderType()); +} } diff --git a/src/Common/mysqlxx/Pool.cpp b/src/Common/mysqlxx/Pool.cpp index d10889d1f97..64a69c48e1d 100644 --- a/src/Common/mysqlxx/Pool.cpp +++ b/src/Common/mysqlxx/Pool.cpp @@ -25,8 +25,6 @@ void Pool::Entry::incrementRefCount() /// First reference, initialize thread if (data->ref_count.fetch_add(1) == 0) mysql_thread_init(); - - chassert(!data->removed_from_pool); } @@ -43,7 +41,10 @@ void Pool::Entry::decrementRefCount() /// In Pool::Entry::disconnect() we remove connection from the list of pool's connections. /// So now we must deallocate the memory. if (data->removed_from_pool) + { + data->conn.disconnect(); ::delete data; + } } } @@ -230,8 +231,6 @@ void Pool::removeConnection(Connection* connection) std::lock_guard lock(mutex); if (connection) { - if (!connection->removed_from_pool) - connection->conn.disconnect(); connections.remove(connection); connection->removed_from_pool = true; } @@ -240,6 +239,7 @@ void Pool::removeConnection(Connection* connection) void Pool::Entry::disconnect() { + // Remove the Entry from the Pool. Actual disconnection is delayed until refcount == 0. pool->removeConnection(data); } diff --git a/src/DataTypes/IDataType.h b/src/DataTypes/IDataType.h index bfc4a71083d..4adafe5d212 100644 --- a/src/DataTypes/IDataType.h +++ b/src/DataTypes/IDataType.h @@ -410,21 +410,29 @@ inline bool isDateTime(const T & data_type) { return WhichDataType(data_type).is template inline bool isDateTime64(const T & data_type) { return WhichDataType(data_type).isDateTime64(); } -inline bool isEnum(const DataTypePtr & data_type) { return WhichDataType(data_type).isEnum(); } -inline bool isDecimal(const DataTypePtr & data_type) { return WhichDataType(data_type).isDecimal(); } -inline bool isTuple(const DataTypePtr & data_type) { return WhichDataType(data_type).isTuple(); } -inline bool isArray(const DataTypePtr & data_type) { return WhichDataType(data_type).isArray(); } -inline bool isMap(const DataTypePtr & data_type) {return WhichDataType(data_type).isMap(); } -inline bool isInterval(const DataTypePtr & data_type) {return WhichDataType(data_type).isInterval(); } -inline bool isNothing(const DataTypePtr & data_type) { return WhichDataType(data_type).isNothing(); } -inline bool isUUID(const DataTypePtr & data_type) { return WhichDataType(data_type).isUUID(); } -inline bool isIPv4(const DataTypePtr & data_type) { return WhichDataType(data_type).isIPv4(); } -inline bool isIPv6(const DataTypePtr & data_type) { return WhichDataType(data_type).isIPv6(); } +template +inline bool isEnum(const T & data_type) { return WhichDataType(data_type).isEnum(); } +template +inline bool isDecimal(const T & data_type) { return WhichDataType(data_type).isDecimal(); } +template +inline bool isTuple(const T & data_type) { return WhichDataType(data_type).isTuple(); } +template +inline bool isArray(const T & data_type) { return WhichDataType(data_type).isArray(); } +template +inline bool isMap(const T & data_type) {return WhichDataType(data_type).isMap(); } +template +inline bool isInterval(const T & data_type) {return WhichDataType(data_type).isInterval(); } +template +inline bool isNothing(const T & data_type) { return WhichDataType(data_type).isNothing(); } +template +inline bool isUUID(const T & data_type) { return WhichDataType(data_type).isUUID(); } +template +inline bool isIPv4(const T & data_type) { return WhichDataType(data_type).isIPv4(); } +template +inline bool isIPv6(const T & data_type) { return WhichDataType(data_type).isIPv6(); } template -inline bool isObject(const T & data_type) -{ - return WhichDataType(data_type).isObject(); +inline bool isObject(const T & data_type) { return WhichDataType(data_type).isObject(); } template diff --git a/src/DataTypes/tests/gtest_DataType_deserializeAsText.cpp b/src/DataTypes/tests/gtest_DataType_deserializeAsText.cpp index 2c0feab6d86..b755bd109d0 100644 --- a/src/DataTypes/tests/gtest_DataType_deserializeAsText.cpp +++ b/src/DataTypes/tests/gtest_DataType_deserializeAsText.cpp @@ -2,7 +2,6 @@ #include #include #include -#include #include #include #include diff --git a/src/Functions/FunctionsConversion.h b/src/Functions/FunctionsConversion.h index 00c4cfe7284..b272e88d17d 100644 --- a/src/Functions/FunctionsConversion.h +++ b/src/Functions/FunctionsConversion.h @@ -203,6 +203,21 @@ struct ConvertImpl } } + if constexpr (std::is_same_v && std::is_same_v) + { + static_assert(std::is_same_v, "UInt128 and UUID types must be same"); + if constexpr (std::endian::native == std::endian::little) + { + vec_to[i].items[1] = vec_from[i].toUnderType().items[0]; + vec_to[i].items[0] = vec_from[i].toUnderType().items[1]; + } + else + { + vec_to[i] = vec_from[i].toUnderType(); + } + continue; + } + if constexpr (std::is_same_v != std::is_same_v) { throw Exception(ErrorCodes::NOT_IMPLEMENTED, diff --git a/src/Functions/LowerUpperUTF8Impl.h b/src/Functions/LowerUpperUTF8Impl.h index f6b18439fd1..460f75f9bde 100644 --- a/src/Functions/LowerUpperUTF8Impl.h +++ b/src/Functions/LowerUpperUTF8Impl.h @@ -133,8 +133,6 @@ struct LowerUpperUTF8Impl } else { - static const Poco::UTF8Encoding utf8; - size_t src_sequence_length = UTF8::seqLength(*src); /// In case partial buffer was passed (due to SSE optimization) /// we cannot convert it with current src_end, but we may have more diff --git a/src/Functions/array/arrayDotProduct.cpp b/src/Functions/array/arrayDotProduct.cpp index d17c223cc2f..47e865785d4 100644 --- a/src/Functions/array/arrayDotProduct.cpp +++ b/src/Functions/array/arrayDotProduct.cpp @@ -1,6 +1,5 @@ #include #include -#include #include #include #include diff --git a/src/Functions/array/arrayJaccardIndex.cpp b/src/Functions/array/arrayJaccardIndex.cpp new file mode 100644 index 00000000000..755e0f8278f --- /dev/null +++ b/src/Functions/array/arrayJaccardIndex.cpp @@ -0,0 +1,161 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace DB +{ +namespace ErrorCodes +{ + extern const int ILLEGAL_COLUMN; + extern const int ILLEGAL_TYPE_OF_ARGUMENT; + extern const int LOGICAL_ERROR; +} + +class FunctionArrayJaccardIndex : public IFunction +{ +private: + using ResultType = Float64; + + struct LeftAndRightSizes + { + size_t left_size; + size_t right_size; + }; + + template + static LeftAndRightSizes getArraySizes(const ColumnArray::Offsets & left_offsets, const ColumnArray::Offsets & right_offsets, size_t i) + { + size_t left_size; + size_t right_size; + + if constexpr (left_is_const) + left_size = left_offsets[0]; + else + left_size = left_offsets[i] - left_offsets[i - 1]; + + if constexpr (right_is_const) + right_size = right_offsets[0]; + else + right_size = right_offsets[i] - right_offsets[i - 1]; + + return {left_size, right_size}; + } + + template + static void vector(const ColumnArray::Offsets & intersect_offsets, const ColumnArray::Offsets & left_offsets, const ColumnArray::Offsets & right_offsets, PaddedPODArray & res) + { + for (size_t i = 0; i < res.size(); ++i) + { + LeftAndRightSizes sizes = getArraySizes(left_offsets, right_offsets, i); + size_t intersect_size = intersect_offsets[i] - intersect_offsets[i - 1]; + res[i] = static_cast(intersect_size) / (sizes.left_size + sizes.right_size - intersect_size); + } + } + + template + static void vectorWithEmptyIntersect(const ColumnArray::Offsets & left_offsets, const ColumnArray::Offsets & right_offsets, PaddedPODArray & res) + { + for (size_t i = 0; i < res.size(); ++i) + { + LeftAndRightSizes sizes = getArraySizes(left_offsets, right_offsets, i); + if (sizes.left_size == 0 && sizes.right_size == 0) + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "array aggregate functions cannot be performed on two empty arrays"); + res[i] = 0; + } + } + +public: + static constexpr auto name = "arrayJaccardIndex"; + String getName() const override { return name; } + static FunctionPtr create(ContextPtr context_) { return std::make_shared(context_); } + explicit FunctionArrayJaccardIndex(ContextPtr context_) : context(context_) {} + size_t getNumberOfArguments() const override { return 2; } + bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo &) const override { return true; } + bool useDefaultImplementationForConstants() const override { return true; } + + DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override + { + FunctionArgumentDescriptors args{ + {"array_1", &isArray, nullptr, "Array"}, + {"array_2", &isArray, nullptr, "Array"}, + }; + validateFunctionArgumentTypes(*this, arguments, args); + return std::make_shared>(); + } + + ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const override + { + auto cast_to_array = [&](const ColumnWithTypeAndName & col) -> std::pair + { + if (const ColumnConst * col_const = typeid_cast(col.column.get())) + { + const ColumnArray * col_const_array = checkAndGetColumn(col_const->getDataColumnPtr().get()); + return {col_const_array, true}; + } + else if (const ColumnArray * col_non_const_array = checkAndGetColumn(col.column.get())) + return {col_non_const_array, false}; + else + throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Argument for function {} must be array but it has type {}.", col.column->getName(), getName()); + }; + + const auto & [left_array, left_is_const] = cast_to_array(arguments[0]); + const auto & [right_array, right_is_const] = cast_to_array(arguments[1]); + + auto intersect_array = FunctionFactory::instance().get("arrayIntersect", context)->build(arguments); + + ColumnWithTypeAndName intersect_column; + intersect_column.type = intersect_array->getResultType(); + intersect_column.column = intersect_array->execute(arguments, intersect_column.type, input_rows_count); + + const auto * intersect_column_type = checkAndGetDataType(intersect_column.type.get()); + if (!intersect_column_type) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Unexpected return type for function arrayIntersect"); + + auto col_res = ColumnVector::create(); + typename ColumnVector::Container & vec_res = col_res->getData(); + vec_res.resize(input_rows_count); + +#define EXECUTE_VECTOR(left_is_const, right_is_const) \ + if (typeid_cast(intersect_column_type->getNestedType().get())) \ + vectorWithEmptyIntersect(left_array->getOffsets(), right_array->getOffsets(), vec_res); \ + else \ + { \ + const ColumnArray * intersect_column_array = checkAndGetColumn(intersect_column.column.get()); \ + vector(intersect_column_array->getOffsets(), left_array->getOffsets(), right_array->getOffsets(), vec_res); \ + } + + if (!left_is_const && !right_is_const) + EXECUTE_VECTOR(false, false) + else if (!left_is_const && right_is_const) + EXECUTE_VECTOR(false, true) + else if (left_is_const && !right_is_const) + EXECUTE_VECTOR(true, false) + else + EXECUTE_VECTOR(true, true) + +#undef EXECUTE_VECTOR + + return col_res; + } + +private: + ContextPtr context; +}; + +REGISTER_FUNCTION(ArrayJaccardIndex) +{ + factory.registerFunction(); +} + +} diff --git a/src/Functions/array/arrayNorm.cpp b/src/Functions/array/arrayNorm.cpp index e14133f931f..027a33d094c 100644 --- a/src/Functions/array/arrayNorm.cpp +++ b/src/Functions/array/arrayNorm.cpp @@ -5,7 +5,6 @@ #include #include #include -#include #include #include diff --git a/src/Functions/array/length.cpp b/src/Functions/array/length.cpp index f09ad17892b..91a5e5fdec2 100644 --- a/src/Functions/array/length.cpp +++ b/src/Functions/array/length.cpp @@ -101,6 +101,7 @@ It is ok to have ASCII NUL bytes in strings, and they will be counted as well. .categories{"String", "Array"} }, FunctionFactory::CaseInsensitive); + factory.registerAlias("OCTET_LENGTH", "length", FunctionFactory::CaseInsensitive); } } diff --git a/src/Functions/array/range.cpp b/src/Functions/array/range.cpp index f1f0fef8fd9..57679ccb180 100644 --- a/src/Functions/array/range.cpp +++ b/src/Functions/array/range.cpp @@ -3,9 +3,12 @@ #include #include #include +#include #include #include +#include #include +#include #include #include #include @@ -21,6 +24,7 @@ namespace ErrorCodes extern const int ILLEGAL_COLUMN; extern const int ILLEGAL_TYPE_OF_ARGUMENT; extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; + extern const int BAD_ARGUMENTS; } @@ -43,6 +47,7 @@ private: size_t getNumberOfArguments() const override { return 0; } bool isVariadic() const override { return true; } + bool useDefaultImplementationForNulls() const override { return false; } bool useDefaultImplementationForConstants() const override { return true; } bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return true; } @@ -55,13 +60,18 @@ private: getName(), arguments.size()); } + if (std::find_if (arguments.cbegin(), arguments.cend(), [](const auto & arg) { return arg->onlyNull(); }) != arguments.cend()) + return makeNullable(std::make_shared()); + DataTypes arg_types; for (size_t i = 0, size = arguments.size(); i < size; ++i) { - if (i < 2 && WhichDataType(arguments[i]).isIPv4()) + DataTypePtr type_no_nullable = removeNullable(arguments[i]); + + if (i < 2 && WhichDataType(type_no_nullable).isIPv4()) arg_types.emplace_back(std::make_shared()); - else if (isInteger(arguments[i])) - arg_types.push_back(arguments[i]); + else if (isInteger(type_no_nullable)) + arg_types.push_back(type_no_nullable); else throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Illegal type {} of argument of function {}", arguments[i]->getName(), getName()); @@ -376,6 +386,10 @@ private: ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t input_rows_count) const override { + NullPresence null_presence = getNullPresense(arguments); + if (null_presence.has_null_constant) + return result_type->createColumnConstWithDefaultValue(input_rows_count); + DataTypePtr elem_type = checkAndGetDataType(result_type.get())->getNestedType(); WhichDataType which(elem_type); @@ -386,10 +400,31 @@ private: "for unsigned/signed integers up to 64 bit", getName()); } + auto throwIfNullValue = [&](const ColumnWithTypeAndName & col) + { + if (!col.type->isNullable()) + return; + const ColumnNullable * nullable_col = checkAndGetColumn(*col.column); + if (!nullable_col) + nullable_col = checkAndGetColumnConstData(col.column.get()); + if (!nullable_col) + return; + const auto & null_map = nullable_col->getNullMapData(); + if (!memoryIsZero(null_map.data(), 0, null_map.size())) + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Illegal (null) value column {} of argument of function {}", col.column->getName(), getName()); + }; + ColumnPtr res; if (arguments.size() == 1) { + throwIfNullValue(arguments[0]); const auto * col = arguments[0].column.get(); + if (arguments[0].type->isNullable()) + { + const auto * nullable = checkAndGetColumn(*arguments[0].column); + col = nullable->getNestedColumnPtr().get(); + } + if (!((res = executeInternal(col)) || (res = executeInternal(col)) || (res = executeInternal(col)) || (res = executeInternal(col)) || (res = executeInternal(col)) || (res = executeInternal(col)) || (res = executeInternal(col)) || (res = executeInternal(col)))) @@ -404,6 +439,7 @@ private: for (size_t i = 0; i < arguments.size(); ++i) { + throwIfNullValue(arguments[i]); if (i == 1) columns_holder[i] = castColumn(arguments[i], elem_type)->convertToFullColumnIfConst(); else diff --git a/src/Functions/concat.cpp b/src/Functions/concat.cpp index 8fefc2d5b8a..8288d872f18 100644 --- a/src/Functions/concat.cpp +++ b/src/Functions/concat.cpp @@ -1,6 +1,5 @@ #include #include -#include #include #include #include diff --git a/src/Functions/ifNotFinite.cpp b/src/Functions/ifNotFinite.cpp index 5ce5d0ede70..d7af10eec44 100644 --- a/src/Functions/ifNotFinite.cpp +++ b/src/Functions/ifNotFinite.cpp @@ -2,7 +2,6 @@ #include #include #include -#include #include diff --git a/src/Functions/initcap.cpp b/src/Functions/initcap.cpp new file mode 100644 index 00000000000..5460ee06792 --- /dev/null +++ b/src/Functions/initcap.cpp @@ -0,0 +1,66 @@ +#include +#include +#include + +namespace DB +{ +namespace +{ + +struct InitcapImpl +{ + static void vector(const ColumnString::Chars & data, + const ColumnString::Offsets & offsets, + ColumnString::Chars & res_data, + ColumnString::Offsets & res_offsets) + { + if (data.empty()) + return; + res_data.resize(data.size()); + res_offsets.assign(offsets); + array(data.data(), data.data() + data.size(), res_data.data()); + } + + static void vectorFixed(const ColumnString::Chars & data, size_t /*n*/, ColumnString::Chars & res_data) + { + res_data.resize(data.size()); + array(data.data(), data.data() + data.size(), res_data.data()); + } + +private: + static void array(const UInt8 * src, const UInt8 * src_end, UInt8 * dst) + { + bool prev_alphanum = false; + + for (; src < src_end; ++src, ++dst) + { + char c = *src; + bool alphanum = isAlphaNumericASCII(c); + if (alphanum && !prev_alphanum) + if (isAlphaASCII(c)) + *dst = toUpperIfAlphaASCII(c); + else + *dst = c; + else if (isAlphaASCII(c)) + *dst = toLowerIfAlphaASCII(c); + else + *dst = c; + prev_alphanum = alphanum; + } + } +}; + +struct NameInitcap +{ + static constexpr auto name = "initcap"; +}; +using FunctionInitcap = FunctionStringToString; + +} + +REGISTER_FUNCTION(Initcap) +{ + factory.registerFunction({}, FunctionFactory::CaseInsensitive); +} + +} diff --git a/src/Functions/initcapUTF8.cpp b/src/Functions/initcapUTF8.cpp new file mode 100644 index 00000000000..076dcff6622 --- /dev/null +++ b/src/Functions/initcapUTF8.cpp @@ -0,0 +1,114 @@ +#include +#include +#include +#include +#include + + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int BAD_ARGUMENTS; +} + +namespace +{ + +struct InitcapUTF8Impl +{ + static void vector( + const ColumnString::Chars & data, + const ColumnString::Offsets & offsets, + ColumnString::Chars & res_data, + ColumnString::Offsets & res_offsets) + { + if (data.empty()) + return; + res_data.resize(data.size()); + res_offsets.assign(offsets); + array(data.data(), data.data() + data.size(), offsets, res_data.data()); + } + + [[noreturn]] static void vectorFixed(const ColumnString::Chars &, size_t, ColumnString::Chars &) + { + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Function initcapUTF8 cannot work with FixedString argument"); + } + + static void processCodePoint(const UInt8 *& src, const UInt8 * src_end, UInt8 *& dst, bool& prev_alphanum) + { + size_t src_sequence_length = UTF8::seqLength(*src); + auto src_code_point = UTF8::convertUTF8ToCodePoint(src, src_end - src); + + if (src_code_point) + { + bool alpha = Poco::Unicode::isAlpha(*src_code_point); + bool alphanum = alpha || Poco::Unicode::isDigit(*src_code_point); + + int dst_code_point = *src_code_point; + if (alphanum && !prev_alphanum) + { + if (alpha) + dst_code_point = Poco::Unicode::toUpper(*src_code_point); + } + else if (alpha) + { + dst_code_point = Poco::Unicode::toLower(*src_code_point); + } + prev_alphanum = alphanum; + if (dst_code_point > 0) + { + size_t dst_sequence_length = UTF8::convertCodePointToUTF8(dst_code_point, dst, src_end - src); + assert(dst_sequence_length <= 4); + + if (dst_sequence_length == src_sequence_length) + { + src += dst_sequence_length; + dst += dst_sequence_length; + return; + } + } + } + + *dst = *src; + ++dst; + ++src; + prev_alphanum = false; + } + +private: + + static void array(const UInt8 * src, const UInt8 * src_end, const ColumnString::Offsets & offsets, UInt8 * dst) + { + const auto * offset_it = offsets.begin(); + const UInt8 * begin = src; + + /// handle remaining symbols, row by row (to avoid influence of bad UTF8 symbols from one row, to another) + while (src < src_end) + { + const UInt8 * row_end = begin + *offset_it; + chassert(row_end >= src); + bool prev_alphanum = false; + while (src < row_end) + processCodePoint(src, row_end, dst, prev_alphanum); + ++offset_it; + } + } +}; + +struct NameInitcapUTF8 +{ + static constexpr auto name = "initcapUTF8"; +}; + +using FunctionInitcapUTF8 = FunctionStringToString; + +} + +REGISTER_FUNCTION(InitcapUTF8) +{ + factory.registerFunction(); +} + +} diff --git a/src/Interpreters/AsynchronousInsertLog.cpp b/src/Interpreters/AsynchronousInsertLog.cpp index eeccd9ad92e..d7c9059d9de 100644 --- a/src/Interpreters/AsynchronousInsertLog.cpp +++ b/src/Interpreters/AsynchronousInsertLog.cpp @@ -55,21 +55,10 @@ void AsynchronousInsertLogElement::appendToBlock(MutableColumns & columns) const columns[i++]->insert(event_time); columns[i++]->insert(event_time_microseconds); - const auto & insert_query = assert_cast(*query); - columns[i++]->insert(queryToString(insert_query)); - - if (insert_query.table_id) - { - columns[i++]->insert(insert_query.table_id.getDatabaseName()); - columns[i++]->insert(insert_query.table_id.getTableName()); - } - else - { - columns[i++]->insertDefault(); - columns[i++]->insertDefault(); - } - - columns[i++]->insert(insert_query.format); + columns[i++]->insert(query_for_logging); + columns[i++]->insert(database); + columns[i++]->insert(table); + columns[i++]->insert(format); columns[i++]->insert(query_id); columns[i++]->insert(bytes); columns[i++]->insert(rows); diff --git a/src/Interpreters/AsynchronousInsertLog.h b/src/Interpreters/AsynchronousInsertLog.h index 372d1cf5a1b..a76db78d3ea 100644 --- a/src/Interpreters/AsynchronousInsertLog.h +++ b/src/Interpreters/AsynchronousInsertLog.h @@ -21,8 +21,11 @@ struct AsynchronousInsertLogElement time_t event_time{}; Decimal64 event_time_microseconds{}; - ASTPtr query; String query_id; + String query_for_logging; + String database; + String table; + String format; UInt64 bytes{}; UInt64 rows{}; String exception; diff --git a/src/Interpreters/AsynchronousInsertQueue.cpp b/src/Interpreters/AsynchronousInsertQueue.cpp index e6417de53b4..0da762699d2 100644 --- a/src/Interpreters/AsynchronousInsertQueue.cpp +++ b/src/Interpreters/AsynchronousInsertQueue.cpp @@ -1,33 +1,37 @@ #include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include #include #include +#include #include -#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include #include #include +#include +#include +#include +#include +#include +#include +#include namespace CurrentMetrics @@ -202,6 +206,7 @@ AsynchronousInsertQueue::push(ASTPtr query, ContextPtr query_context) query = query->clone(); const auto & settings = query_context->getSettingsRef(); auto & insert_query = query->as(); + insert_query.async_insert_flush = true; InterpreterInsertQuery interpreter(query, query_context, settings.insert_allow_materialized_columns); auto table = interpreter.getTable(insert_query); @@ -398,6 +403,12 @@ try const auto * log = &Poco::Logger::get("AsynchronousInsertQueue"); const auto & insert_query = assert_cast(*key.query); auto insert_context = Context::createCopy(global_context); + DB::CurrentThread::QueryScope query_scope_holder(insert_context); + bool internal = false; // To enable logging this query + bool async_insert = true; + + /// Disabled query spans. Could be activated by initializing this to a SpanHolder + std::shared_ptr query_span{nullptr}; /// 'resetParser' doesn't work for parallel parsing. key.settings.set("input_format_parallel_parsing", false); @@ -405,12 +416,67 @@ try insert_context->setSettings(key.settings); /// Set initial_query_id, because it's used in InterpreterInsertQuery for table lock. - insert_context->getClientInfo().query_kind = ClientInfo::QueryKind::INITIAL_QUERY; insert_context->setCurrentQueryId(""); - InterpreterInsertQuery interpreter(key.query, insert_context, key.settings.insert_allow_materialized_columns, false, false, true); - auto pipeline = interpreter.execute().pipeline; - assert(pipeline.pushing()); + auto insert_query_id = insert_context->getCurrentQueryId(); + auto query_start_time = std::chrono::system_clock::now(); + Stopwatch start_watch{CLOCK_MONOTONIC}; + ClientInfo & client_info = insert_context->getClientInfo(); + client_info.query_kind = ClientInfo::QueryKind::INITIAL_QUERY; + client_info.initial_query_start_time = timeInSeconds(query_start_time); + client_info.initial_query_start_time_microseconds = timeInMicroseconds(query_start_time); + client_info.current_query_id = insert_query_id; + client_info.initial_query_id = insert_query_id; + size_t log_queries_cut_to_length = insert_context->getSettingsRef().log_queries_cut_to_length; + String query_for_logging = insert_query.hasSecretParts() + ? insert_query.formatForLogging(log_queries_cut_to_length) + : wipeSensitiveDataAndCutToLength(serializeAST(insert_query), log_queries_cut_to_length); + + /// We add it to the process list so + /// a) it appears in system.processes + /// b) can be cancelled if we want to + /// c) has an associated process list element where runtime metrics are stored + auto process_list_entry + = insert_context->getProcessList().insert(query_for_logging, key.query.get(), insert_context, start_watch.getStart()); + auto query_status = process_list_entry->getQueryStatus(); + insert_context->setProcessListElement(std::move(query_status)); + + String query_database{}; + String query_table{}; + if (insert_query.table_id) + { + query_database = insert_query.table_id.getDatabaseName(); + query_table = insert_query.table_id.getTableName(); + insert_context->setInsertionTable(insert_query.table_id); + } + std::unique_ptr interpreter; + QueryPipeline pipeline; + QueryLogElement query_log_elem; + + try + { + interpreter = std::make_unique( + key.query, insert_context, key.settings.insert_allow_materialized_columns, false, false, true); + pipeline = interpreter->execute().pipeline; + chassert(pipeline.pushing()); + + query_log_elem = logQueryStart( + query_start_time, + insert_context, + query_for_logging, + key.query, + pipeline, + interpreter, + internal, + query_database, + query_table, + async_insert); + } + catch (...) + { + logExceptionBeforeStart(query_for_logging, insert_context, key.query, query_span, start_watch.elapsedMilliseconds()); + throw; + } auto header = pipeline.getHeader(); auto format = getInputFormatFromASTInsertQuery(key.query, false, header, insert_context, nullptr); @@ -470,7 +536,10 @@ try AsynchronousInsertLogElement elem; elem.event_time = timeInSeconds(entry->create_time); elem.event_time_microseconds = timeInMicroseconds(entry->create_time); - elem.query = key.query; + elem.query_for_logging = query_for_logging; + elem.database = query_database; + elem.table = query_table; + elem.format = insert_query.format; elem.query_id = entry->query_id; elem.bytes = bytes_size; elem.rows = num_rows; @@ -493,7 +562,6 @@ try } format->addBuffer(std::move(last_buffer)); - auto insert_query_id = insert_context->getCurrentQueryId(); ProfileEvents::increment(ProfileEvents::AsyncInsertRows, total_rows); auto finish_entries = [&] @@ -531,9 +599,14 @@ try LOG_INFO(log, "Flushed {} rows, {} bytes for query '{}'", total_rows, total_bytes, key.query_str); + + bool pulling_pipeline = false; + logQueryFinish(query_log_elem, insert_context, key.query, pipeline, pulling_pipeline, query_span, internal); } catch (...) { + bool log_error = true; + logQueryException(query_log_elem, insert_context, start_watch, key.query, query_span, internal, log_error); if (!log_elements.empty()) { auto exception = getCurrentExceptionMessage(false); diff --git a/src/Interpreters/Context.cpp b/src/Interpreters/Context.cpp index 5019933c2af..10a46fda24e 100644 --- a/src/Interpreters/Context.cpp +++ b/src/Interpreters/Context.cpp @@ -1524,7 +1524,11 @@ StoragePtr Context::executeTableFunction(const ASTPtr & table_expression, const uint64_t use_structure_from_insertion_table_in_table_functions = getSettingsRef().use_structure_from_insertion_table_in_table_functions; if (use_structure_from_insertion_table_in_table_functions && table_function_ptr->needStructureHint() && hasInsertionTable()) { - const auto & insert_structure = DatabaseCatalog::instance().getTable(getInsertionTable(), shared_from_this())->getInMemoryMetadataPtr()->getColumns(); + const auto & insert_structure = DatabaseCatalog::instance() + .getTable(getInsertionTable(), shared_from_this()) + ->getInMemoryMetadataPtr() + ->getColumns() + .getInsertable(); DB::ColumnsDescription structure_hint; bool use_columns_from_insert_query = true; diff --git a/src/Interpreters/ProcessList.cpp b/src/Interpreters/ProcessList.cpp index daa8d434ab6..1503e396298 100644 --- a/src/Interpreters/ProcessList.cpp +++ b/src/Interpreters/ProcessList.cpp @@ -37,8 +37,8 @@ static bool isUnlimitedQuery(const IAST * ast) if (!ast) return false; - /// It is KILL QUERY - if (ast->as()) + /// It is KILL QUERY or an async insert flush query + if (ast->as() || ast->getQueryKind() == IAST::QueryKind::AsyncInsertFlush) return true; /// It is SELECT FROM system.processes diff --git a/src/Interpreters/ProcessList.h b/src/Interpreters/ProcessList.h index e5a61497ff2..2eea49e1267 100644 --- a/src/Interpreters/ProcessList.h +++ b/src/Interpreters/ProcessList.h @@ -393,7 +393,7 @@ public: /** Register running query. Returns refcounted object, that will remove element from list in destructor. * If too many running queries - wait for not more than specified (see settings) amount of time. * If timeout is passed - throw an exception. - * Don't count KILL QUERY queries. + * Don't count KILL QUERY queries or async insert flush queries */ EntryPtr insert(const String & query_, const IAST * ast, ContextMutablePtr query_context, UInt64 watch_start_nanoseconds); diff --git a/src/Interpreters/executeQuery.cpp b/src/Interpreters/executeQuery.cpp index 694226af6b0..4b76d20f31d 100644 --- a/src/Interpreters/executeQuery.cpp +++ b/src/Interpreters/executeQuery.cpp @@ -155,7 +155,6 @@ static void logQuery(const String & query, ContextPtr context, bool internal, Qu } } - /// Call this inside catch block. static void setExceptionStackTrace(QueryLogElement & elem) { @@ -208,7 +207,332 @@ static void logException(ContextPtr context, QueryLogElement & elem, bool log_er LOG_INFO(&Poco::Logger::get("executeQuery"), message); } -static void onExceptionBeforeStart( +static void +addStatusInfoToQueryElement(QueryLogElement & element, const QueryStatusInfo & info, const ASTPtr query_ast, const ContextPtr context_ptr) +{ + const auto time_now = std::chrono::system_clock::now(); + UInt64 elapsed_microseconds = info.elapsed_microseconds; + element.event_time = timeInSeconds(time_now); + element.event_time_microseconds = timeInMicroseconds(time_now); + element.query_duration_ms = elapsed_microseconds / 1000; + + ProfileEvents::increment(ProfileEvents::QueryTimeMicroseconds, elapsed_microseconds); + if (query_ast->as() || query_ast->as()) + { + ProfileEvents::increment(ProfileEvents::SelectQueryTimeMicroseconds, elapsed_microseconds); + } + else if (query_ast->as()) + { + ProfileEvents::increment(ProfileEvents::InsertQueryTimeMicroseconds, elapsed_microseconds); + } + else + { + ProfileEvents::increment(ProfileEvents::OtherQueryTimeMicroseconds, elapsed_microseconds); + } + + element.read_rows = info.read_rows; + element.read_bytes = info.read_bytes; + + element.written_rows = info.written_rows; + element.written_bytes = info.written_bytes; + + element.memory_usage = info.peak_memory_usage > 0 ? info.peak_memory_usage : 0; + + element.thread_ids = info.thread_ids; + element.profile_counters = info.profile_counters; + + /// We need to refresh the access info since dependent views might have added extra information, either during + /// creation of the view (PushingToViews chain) or while executing its internal SELECT + const auto & access_info = context_ptr->getQueryAccessInfo(); + element.query_databases.insert(access_info.databases.begin(), access_info.databases.end()); + element.query_tables.insert(access_info.tables.begin(), access_info.tables.end()); + element.query_columns.insert(access_info.columns.begin(), access_info.columns.end()); + element.query_partitions.insert(access_info.partitions.begin(), access_info.partitions.end()); + element.query_projections.insert(access_info.projections.begin(), access_info.projections.end()); + element.query_views.insert(access_info.views.begin(), access_info.views.end()); + + const auto & factories_info = context_ptr->getQueryFactoriesInfo(); + element.used_aggregate_functions = factories_info.aggregate_functions; + element.used_aggregate_function_combinators = factories_info.aggregate_function_combinators; + element.used_database_engines = factories_info.database_engines; + element.used_data_type_families = factories_info.data_type_families; + element.used_dictionaries = factories_info.dictionaries; + element.used_formats = factories_info.formats; + element.used_functions = factories_info.functions; + element.used_storages = factories_info.storages; + element.used_table_functions = factories_info.table_functions; + + element.async_read_counters = context_ptr->getAsyncReadCounters(); +} + + +QueryLogElement logQueryStart( + const std::chrono::time_point & query_start_time, + const ContextMutablePtr & context, + const String & query_for_logging, + const ASTPtr & query_ast, + const QueryPipeline & pipeline, + const std::unique_ptr & interpreter, + bool internal, + const String & query_database, + const String & query_table, + bool async_insert) +{ + const Settings & settings = context->getSettingsRef(); + + QueryLogElement elem; + + elem.type = QueryLogElementType::QUERY_START; + elem.event_time = timeInSeconds(query_start_time); + elem.event_time_microseconds = timeInMicroseconds(query_start_time); + elem.query_start_time = timeInSeconds(query_start_time); + elem.query_start_time_microseconds = timeInMicroseconds(query_start_time); + + elem.current_database = context->getCurrentDatabase(); + elem.query = query_for_logging; + if (settings.log_formatted_queries) + elem.formatted_query = queryToString(query_ast); + elem.normalized_query_hash = normalizedQueryHash(query_for_logging); + elem.query_kind = query_ast->getQueryKind(); + + elem.client_info = context->getClientInfo(); + + if (auto txn = context->getCurrentTransaction()) + elem.tid = txn->tid; + + bool log_queries = settings.log_queries && !internal; + + /// Log into system table start of query execution, if need. + if (log_queries) + { + /// This check is not obvious, but without it 01220_scalar_optimization_in_alter fails. + if (pipeline.initialized()) + { + const auto & info = context->getQueryAccessInfo(); + elem.query_databases = info.databases; + elem.query_tables = info.tables; + elem.query_columns = info.columns; + elem.query_partitions = info.partitions; + elem.query_projections = info.projections; + elem.query_views = info.views; + } + + if (async_insert) + InterpreterInsertQuery::extendQueryLogElemImpl(elem, context); + else if (interpreter) + interpreter->extendQueryLogElem(elem, query_ast, context, query_database, query_table); + + if (settings.log_query_settings) + elem.query_settings = std::make_shared(context->getSettingsRef()); + + elem.log_comment = settings.log_comment; + if (elem.log_comment.size() > settings.max_query_size) + elem.log_comment.resize(settings.max_query_size); + + if (elem.type >= settings.log_queries_min_type && !settings.log_queries_min_query_duration_ms.totalMilliseconds()) + { + if (auto query_log = context->getQueryLog()) + query_log->add(elem); + } + } + + return elem; +} + +void logQueryFinish( + QueryLogElement & elem, + const ContextMutablePtr & context, + const ASTPtr & query_ast, + const QueryPipeline & query_pipeline, + bool pulling_pipeline, + std::shared_ptr query_span, + bool internal) +{ + const Settings & settings = context->getSettingsRef(); + auto log_queries = settings.log_queries && !internal; + auto log_queries_min_type = settings.log_queries_min_type; + auto log_queries_min_query_duration_ms = settings.log_queries_min_query_duration_ms.totalMilliseconds(); + auto log_processors_profiles = settings.log_processors_profiles; + + QueryStatusPtr process_list_elem = context->getProcessListElement(); + if (process_list_elem) + { + /// Update performance counters before logging to query_log + CurrentThread::finalizePerformanceCounters(); + + QueryStatusInfo info = process_list_elem->getInfo(true, context->getSettingsRef().log_profile_events); + elem.type = QueryLogElementType::QUERY_FINISH; + + addStatusInfoToQueryElement(elem, info, query_ast, context); + + if (pulling_pipeline) + { + query_pipeline.tryGetResultRowsAndBytes(elem.result_rows, elem.result_bytes); + } + else /// will be used only for ordinary INSERT queries + { + auto progress_out = process_list_elem->getProgressOut(); + elem.result_rows = progress_out.written_rows; + elem.result_bytes = progress_out.written_bytes; + } + + auto progress_callback = context->getProgressCallback(); + if (progress_callback) + { + Progress p; + p.incrementPiecewiseAtomically(Progress{ResultProgress{elem.result_rows, elem.result_bytes}}); + progress_callback(p); + } + + if (elem.read_rows != 0) + { + double elapsed_seconds = static_cast(info.elapsed_microseconds) / 1000000.0; + double rows_per_second = static_cast(elem.read_rows) / elapsed_seconds; + LOG_DEBUG( + &Poco::Logger::get("executeQuery"), + "Read {} rows, {} in {} sec., {} rows/sec., {}/sec.", + elem.read_rows, + ReadableSize(elem.read_bytes), + elapsed_seconds, + rows_per_second, + ReadableSize(elem.read_bytes / elapsed_seconds)); + } + + if (log_queries && elem.type >= log_queries_min_type + && static_cast(elem.query_duration_ms) >= log_queries_min_query_duration_ms) + { + if (auto query_log = context->getQueryLog()) + query_log->add(elem); + } + if (log_processors_profiles) + { + if (auto processors_profile_log = context->getProcessorsProfileLog()) + { + ProcessorProfileLogElement processor_elem; + processor_elem.event_time = elem.event_time; + processor_elem.event_time_microseconds = elem.event_time_microseconds; + processor_elem.initial_query_id = elem.client_info.initial_query_id; + processor_elem.query_id = elem.client_info.current_query_id; + + auto get_proc_id = [](const IProcessor & proc) -> UInt64 { return reinterpret_cast(&proc); }; + + for (const auto & processor : query_pipeline.getProcessors()) + { + std::vector parents; + for (const auto & port : processor->getOutputs()) + { + if (!port.isConnected()) + continue; + const IProcessor & next = port.getInputPort().getProcessor(); + parents.push_back(get_proc_id(next)); + } + + processor_elem.id = get_proc_id(*processor); + processor_elem.parent_ids = std::move(parents); + + processor_elem.plan_step = reinterpret_cast(processor->getQueryPlanStep()); + processor_elem.plan_group = processor->getQueryPlanStepGroup(); + + processor_elem.processor_name = processor->getName(); + + /// NOTE: convert this to UInt64 + processor_elem.elapsed_us = static_cast(processor->getElapsedUs()); + processor_elem.input_wait_elapsed_us = static_cast(processor->getInputWaitElapsedUs()); + processor_elem.output_wait_elapsed_us = static_cast(processor->getOutputWaitElapsedUs()); + + auto stats = processor->getProcessorDataStats(); + processor_elem.input_rows = stats.input_rows; + processor_elem.input_bytes = stats.input_bytes; + processor_elem.output_rows = stats.output_rows; + processor_elem.output_bytes = stats.output_bytes; + + processors_profile_log->add(processor_elem); + } + } + } + } + + if (query_span) + { + query_span->addAttribute("db.statement", elem.query); + query_span->addAttribute("clickhouse.query_id", elem.client_info.current_query_id); + query_span->addAttribute("clickhouse.query_status", "QueryFinish"); + query_span->addAttributeIfNotEmpty("clickhouse.tracestate", OpenTelemetry::CurrentContext().tracestate); + query_span->addAttributeIfNotZero("clickhouse.read_rows", elem.read_rows); + query_span->addAttributeIfNotZero("clickhouse.read_bytes", elem.read_bytes); + query_span->addAttributeIfNotZero("clickhouse.written_rows", elem.written_rows); + query_span->addAttributeIfNotZero("clickhouse.written_bytes", elem.written_bytes); + query_span->addAttributeIfNotZero("clickhouse.memory_usage", elem.memory_usage); + query_span->finish(); + } +} + +void logQueryException( + QueryLogElement & elem, + const ContextMutablePtr & context, + const Stopwatch & start_watch, + const ASTPtr & query_ast, + std::shared_ptr query_span, + bool internal, + bool log_error) +{ + const Settings & settings = context->getSettingsRef(); + auto log_queries = settings.log_queries && !internal; + auto log_queries_min_type = settings.log_queries_min_type; + auto log_queries_min_query_duration_ms = settings.log_queries_min_query_duration_ms.totalMilliseconds(); + + elem.type = QueryLogElementType::EXCEPTION_WHILE_PROCESSING; + elem.exception_code = getCurrentExceptionCode(); + auto exception_message = getCurrentExceptionMessageAndPattern(/* with_stacktrace */ false); + elem.exception = std::move(exception_message.text); + elem.exception_format_string = exception_message.format_string; + + QueryStatusPtr process_list_elem = context->getProcessListElement(); + + /// Update performance counters before logging to query_log + CurrentThread::finalizePerformanceCounters(); + const auto time_now = std::chrono::system_clock::now(); + elem.event_time = timeInSeconds(time_now); + elem.event_time_microseconds = timeInMicroseconds(time_now); + + if (process_list_elem) + { + QueryStatusInfo info = process_list_elem->getInfo(true, settings.log_profile_events, false); + addStatusInfoToQueryElement(elem, info, query_ast, context); + } + else + { + elem.query_duration_ms = start_watch.elapsedMilliseconds(); + } + + if (settings.calculate_text_stack_trace && log_error) + setExceptionStackTrace(elem); + logException(context, elem, log_error); + + /// In case of exception we log internal queries also + if (log_queries && elem.type >= log_queries_min_type && static_cast(elem.query_duration_ms) >= log_queries_min_query_duration_ms) + { + if (auto query_log = context->getQueryLog()) + query_log->add(elem); + } + + ProfileEvents::increment(ProfileEvents::FailedQuery); + if (query_ast->as() || query_ast->as()) + ProfileEvents::increment(ProfileEvents::FailedSelectQuery); + else if (query_ast->as()) + ProfileEvents::increment(ProfileEvents::FailedInsertQuery); + + if (query_span) + { + query_span->addAttribute("db.statement", elem.query); + query_span->addAttribute("clickhouse.query_id", elem.client_info.current_query_id); + query_span->addAttribute("clickhouse.exception", elem.exception); + query_span->addAttribute("clickhouse.exception_code", elem.exception_code); + query_span->finish(); + } +} + +void logExceptionBeforeStart( const String & query_for_logging, ContextPtr context, ASTPtr ast, @@ -431,7 +755,7 @@ static std::tuple executeQueryImpl( logQuery(query_for_logging, context, internal, stage); if (!internal) - onExceptionBeforeStart(query_for_logging, context, ast, query_span, start_watch.elapsedMilliseconds()); + logExceptionBeforeStart(query_for_logging, context, ast, query_span, start_watch.elapsedMilliseconds()); throw; } @@ -804,132 +1128,23 @@ static std::tuple executeQueryImpl( /// Everything related to query log. { - QueryLogElement elem; - - elem.type = QueryLogElementType::QUERY_START; - - elem.event_time = timeInSeconds(query_start_time); - elem.event_time_microseconds = timeInMicroseconds(query_start_time); - elem.query_start_time = timeInSeconds(query_start_time); - elem.query_start_time_microseconds = timeInMicroseconds(query_start_time); - - elem.current_database = context->getCurrentDatabase(); - elem.query = query_for_logging; - if (settings.log_formatted_queries) - elem.formatted_query = queryToString(ast); - elem.normalized_query_hash = normalizedQueryHash(query_for_logging); - elem.query_kind = ast->getQueryKind(); - - elem.client_info = client_info; - - if (auto txn = context->getCurrentTransaction()) - elem.tid = txn->tid; - - bool log_queries = settings.log_queries && !internal; - - /// Log into system table start of query execution, if need. - if (log_queries) - { - /// This check is not obvious, but without it 01220_scalar_optimization_in_alter fails. - if (pipeline.initialized()) - { - const auto & info = context->getQueryAccessInfo(); - elem.query_databases = info.databases; - elem.query_tables = info.tables; - elem.query_columns = info.columns; - elem.query_partitions = info.partitions; - elem.query_projections = info.projections; - elem.query_views = info.views; - } - - if (async_insert) - InterpreterInsertQuery::extendQueryLogElemImpl(elem, context); - else if (interpreter) - interpreter->extendQueryLogElem(elem, ast, context, query_database, query_table); - - if (settings.log_query_settings) - elem.query_settings = std::make_shared(context->getSettingsRef()); - - elem.log_comment = settings.log_comment; - if (elem.log_comment.size() > settings.max_query_size) - elem.log_comment.resize(settings.max_query_size); - - if (elem.type >= settings.log_queries_min_type && !settings.log_queries_min_query_duration_ms.totalMilliseconds()) - { - if (auto query_log = context->getQueryLog()) - query_log->add(elem); - } - } - - /// Common code for finish and exception callbacks - auto status_info_to_query_log - = [](QueryLogElement & element, const QueryStatusInfo & info, const ASTPtr query_ast, const ContextPtr context_ptr) mutable - { - const auto time_now = std::chrono::system_clock::now(); - UInt64 elapsed_microseconds = info.elapsed_microseconds; - element.event_time = timeInSeconds(time_now); - element.event_time_microseconds = timeInMicroseconds(time_now); - element.query_duration_ms = elapsed_microseconds / 1000; - - ProfileEvents::increment(ProfileEvents::QueryTimeMicroseconds, elapsed_microseconds); - if (query_ast->as() || query_ast->as()) - { - ProfileEvents::increment(ProfileEvents::SelectQueryTimeMicroseconds, elapsed_microseconds); - } - else if (query_ast->as()) - { - ProfileEvents::increment(ProfileEvents::InsertQueryTimeMicroseconds, elapsed_microseconds); - } - else - { - ProfileEvents::increment(ProfileEvents::OtherQueryTimeMicroseconds, elapsed_microseconds); - } - - element.read_rows = info.read_rows; - element.read_bytes = info.read_bytes; - - element.written_rows = info.written_rows; - element.written_bytes = info.written_bytes; - - element.memory_usage = info.peak_memory_usage > 0 ? info.peak_memory_usage : 0; - - element.thread_ids = info.thread_ids; - element.profile_counters = info.profile_counters; - - /// We need to refresh the access info since dependent views might have added extra information, either during - /// creation of the view (PushingToViews chain) or while executing its internal SELECT - const auto & access_info = context_ptr->getQueryAccessInfo(); - element.query_databases.insert(access_info.databases.begin(), access_info.databases.end()); - element.query_tables.insert(access_info.tables.begin(), access_info.tables.end()); - element.query_columns.insert(access_info.columns.begin(), access_info.columns.end()); - element.query_partitions.insert(access_info.partitions.begin(), access_info.partitions.end()); - element.query_projections.insert(access_info.projections.begin(), access_info.projections.end()); - element.query_views.insert(access_info.views.begin(), access_info.views.end()); - - const auto & factories_info = context_ptr->getQueryFactoriesInfo(); - element.used_aggregate_functions = factories_info.aggregate_functions; - element.used_aggregate_function_combinators = factories_info.aggregate_function_combinators; - element.used_database_engines = factories_info.database_engines; - element.used_data_type_families = factories_info.data_type_families; - element.used_dictionaries = factories_info.dictionaries; - element.used_formats = factories_info.formats; - element.used_functions = factories_info.functions; - element.used_storages = factories_info.storages; - element.used_table_functions = factories_info.table_functions; - - element.async_read_counters = context_ptr->getAsyncReadCounters(); - }; - + QueryLogElement elem = logQueryStart( + query_start_time, + context, + query_for_logging, + ast, + pipeline, + interpreter, + internal, + query_database, + query_table, + async_insert); /// Also make possible for caller to log successful query finish and exception during execution. auto finish_callback = [elem, context, ast, write_into_query_cache, - log_queries, - log_queries_min_type = settings.log_queries_min_type, - log_queries_min_query_duration_ms = settings.log_queries_min_query_duration_ms.totalMilliseconds(), - log_processors_profiles = settings.log_processors_profiles, - status_info_to_query_log, + internal, implicit_txn_control, execute_implicit_tcl_query, pulling_pipeline = pipeline.pulling(), @@ -940,137 +1155,15 @@ static std::tuple executeQueryImpl( /// partial/garbage results in case of exceptions during query execution. query_pipeline.finalizeWriteInQueryCache(); - QueryStatusPtr process_list_elem = context->getProcessListElement(); + logQueryFinish(elem, context, ast, query_pipeline, pulling_pipeline, query_span, internal); - if (process_list_elem) - { - /// Update performance counters before logging to query_log - CurrentThread::finalizePerformanceCounters(); - - QueryStatusInfo info = process_list_elem->getInfo(true, context->getSettingsRef().log_profile_events); - elem.type = QueryLogElementType::QUERY_FINISH; - - status_info_to_query_log(elem, info, ast, context); - - if (pulling_pipeline) - { - query_pipeline.tryGetResultRowsAndBytes(elem.result_rows, elem.result_bytes); - } - else /// will be used only for ordinary INSERT queries - { - auto progress_out = process_list_elem->getProgressOut(); - elem.result_rows = progress_out.written_rows; - elem.result_bytes = progress_out.written_bytes; - } - - auto progress_callback = context->getProgressCallback(); - if (progress_callback) - { - Progress p; - p.incrementPiecewiseAtomically(Progress{ResultProgress{elem.result_rows, elem.result_bytes}}); - progress_callback(p); - } - - if (elem.read_rows != 0) - { - double elapsed_seconds = static_cast(info.elapsed_microseconds) / 1000000.0; - double rows_per_second = static_cast(elem.read_rows) / elapsed_seconds; - LOG_DEBUG( - &Poco::Logger::get("executeQuery"), - "Read {} rows, {} in {} sec., {} rows/sec., {}/sec.", - elem.read_rows, - ReadableSize(elem.read_bytes), - elapsed_seconds, - rows_per_second, - ReadableSize(elem.read_bytes / elapsed_seconds)); - } - - if (log_queries && elem.type >= log_queries_min_type && static_cast(elem.query_duration_ms) >= log_queries_min_query_duration_ms) - { - if (auto query_log = context->getQueryLog()) - query_log->add(elem); - } - if (log_processors_profiles) - { - if (auto processors_profile_log = context->getProcessorsProfileLog()) - { - ProcessorProfileLogElement processor_elem; - processor_elem.event_time = elem.event_time; - processor_elem.event_time_microseconds = elem.event_time_microseconds; - processor_elem.initial_query_id = elem.client_info.initial_query_id; - processor_elem.query_id = elem.client_info.current_query_id; - - auto get_proc_id = [](const IProcessor & proc) -> UInt64 - { - return reinterpret_cast(&proc); - }; - - for (const auto & processor : query_pipeline.getProcessors()) - { - std::vector parents; - for (const auto & port : processor->getOutputs()) - { - if (!port.isConnected()) - continue; - const IProcessor & next = port.getInputPort().getProcessor(); - parents.push_back(get_proc_id(next)); - } - - processor_elem.id = get_proc_id(*processor); - processor_elem.parent_ids = std::move(parents); - - processor_elem.plan_step = reinterpret_cast(processor->getQueryPlanStep()); - processor_elem.plan_group = processor->getQueryPlanStepGroup(); - - processor_elem.processor_name = processor->getName(); - - /// NOTE: convert this to UInt64 - processor_elem.elapsed_us = static_cast(processor->getElapsedUs()); - processor_elem.input_wait_elapsed_us = static_cast(processor->getInputWaitElapsedUs()); - processor_elem.output_wait_elapsed_us = static_cast(processor->getOutputWaitElapsedUs()); - - auto stats = processor->getProcessorDataStats(); - processor_elem.input_rows = stats.input_rows; - processor_elem.input_bytes = stats.input_bytes; - processor_elem.output_rows = stats.output_rows; - processor_elem.output_bytes = stats.output_bytes; - - processors_profile_log->add(processor_elem); - } - } - } - - if (*implicit_txn_control) - execute_implicit_tcl_query(context, ASTTransactionControl::COMMIT); - } - - if (query_span) - { - query_span->addAttribute("db.statement", elem.query); - query_span->addAttribute("clickhouse.query_id", elem.client_info.current_query_id); - query_span->addAttribute("clickhouse.query_status", "QueryFinish"); - query_span->addAttributeIfNotEmpty("clickhouse.tracestate", OpenTelemetry::CurrentContext().tracestate); - query_span->addAttributeIfNotZero("clickhouse.read_rows", elem.read_rows); - query_span->addAttributeIfNotZero("clickhouse.read_bytes", elem.read_bytes); - query_span->addAttributeIfNotZero("clickhouse.written_rows", elem.written_rows); - query_span->addAttributeIfNotZero("clickhouse.written_bytes", elem.written_bytes); - query_span->addAttributeIfNotZero("clickhouse.memory_usage", elem.memory_usage); - query_span->finish(); - } + if (*implicit_txn_control) + execute_implicit_tcl_query(context, ASTTransactionControl::COMMIT); }; - auto exception_callback = [start_watch, - elem, - context, - ast, - log_queries, - log_queries_min_type = settings.log_queries_min_type, - log_queries_min_query_duration_ms = settings.log_queries_min_query_duration_ms.totalMilliseconds(), - my_quota(quota), - status_info_to_query_log, - implicit_txn_control, - execute_implicit_tcl_query, - query_span](bool log_error) mutable + auto exception_callback = + [start_watch, elem, context, ast, internal, my_quota(quota), implicit_txn_control, execute_implicit_tcl_query, query_span]( + bool log_error) mutable { if (*implicit_txn_control) execute_implicit_tcl_query(context, ASTTransactionControl::ROLLBACK); @@ -1080,60 +1173,7 @@ static std::tuple executeQueryImpl( if (my_quota) my_quota->used(QuotaType::ERRORS, 1, /* check_exceeded = */ false); - elem.type = QueryLogElementType::EXCEPTION_WHILE_PROCESSING; - elem.exception_code = getCurrentExceptionCode(); - auto exception_message = getCurrentExceptionMessageAndPattern(/* with_stacktrace */ false); - elem.exception = std::move(exception_message.text); - elem.exception_format_string = exception_message.format_string; - - QueryStatusPtr process_list_elem = context->getProcessListElement(); - const Settings & current_settings = context->getSettingsRef(); - - /// Update performance counters before logging to query_log - CurrentThread::finalizePerformanceCounters(); - const auto time_now = std::chrono::system_clock::now(); - elem.event_time = timeInSeconds(time_now); - elem.event_time_microseconds = timeInMicroseconds(time_now); - - if (process_list_elem) - { - QueryStatusInfo info = process_list_elem->getInfo(true, current_settings.log_profile_events, false); - status_info_to_query_log(elem, info, ast, context); - } - else - { - elem.query_duration_ms = start_watch.elapsedMilliseconds(); - } - - if (current_settings.calculate_text_stack_trace && log_error) - setExceptionStackTrace(elem); - logException(context, elem, log_error); - - /// In case of exception we log internal queries also - if (log_queries && elem.type >= log_queries_min_type && static_cast(elem.query_duration_ms) >= log_queries_min_query_duration_ms) - { - if (auto query_log = context->getQueryLog()) - query_log->add(elem); - } - - ProfileEvents::increment(ProfileEvents::FailedQuery); - if (ast->as() || ast->as()) - { - ProfileEvents::increment(ProfileEvents::FailedSelectQuery); - } - else if (ast->as()) - { - ProfileEvents::increment(ProfileEvents::FailedInsertQuery); - } - - if (query_span) - { - query_span->addAttribute("db.statement", elem.query); - query_span->addAttribute("clickhouse.query_id", elem.client_info.current_query_id); - query_span->addAttribute("clickhouse.exception", elem.exception); - query_span->addAttribute("clickhouse.exception_code", elem.exception_code); - query_span->finish(); - } + logQueryException(elem, context, start_watch, ast, query_span, internal, log_error); }; res.finish_callback = std::move(finish_callback); @@ -1148,7 +1188,7 @@ static std::tuple executeQueryImpl( txn->onException(); if (!internal) - onExceptionBeforeStart(query_for_logging, context, ast, query_span, start_watch.elapsedMilliseconds()); + logExceptionBeforeStart(query_for_logging, context, ast, query_span, start_watch.elapsedMilliseconds()); throw; } diff --git a/src/Interpreters/executeQuery.h b/src/Interpreters/executeQuery.h index 93152cc1de6..53624f8c812 100644 --- a/src/Interpreters/executeQuery.h +++ b/src/Interpreters/executeQuery.h @@ -1,15 +1,21 @@ #pragma once #include -#include -#include #include +#include +#include +#include + +#include +#include namespace DB { +class IInterpreter; class ReadBuffer; class WriteBuffer; +struct QueryStatusInfo; struct QueryResultDetails { @@ -66,4 +72,41 @@ BlockIO executeQuery( /// if built pipeline does not require any input and does not produce any output. void executeTrivialBlockIO(BlockIO & streams, ContextPtr context); +/// Prepares a QueryLogElement and, if enabled, logs it to system.query_log +QueryLogElement logQueryStart( + const std::chrono::time_point & query_start_time, + const ContextMutablePtr & context, + const String & query_for_logging, + const ASTPtr & query_ast, + const QueryPipeline & pipeline, + const std::unique_ptr & interpreter, + bool internal, + const String & query_database, + const String & query_table, + bool async_insert); + +void logQueryFinish( + QueryLogElement & elem, + const ContextMutablePtr & context, + const ASTPtr & query_ast, + const QueryPipeline & query_pipeline, + bool pulling_pipeline, + std::shared_ptr query_span, + bool internal); + +void logQueryException( + QueryLogElement & elem, + const ContextMutablePtr & context, + const Stopwatch & start_watch, + const ASTPtr & query_ast, + std::shared_ptr query_span, + bool internal, + bool log_error); + +void logExceptionBeforeStart( + const String & query_for_logging, + ContextPtr context, + ASTPtr ast, + const std::shared_ptr & query_span, + UInt64 elapsed_millliseconds); } diff --git a/src/Parsers/ASTInsertQuery.h b/src/Parsers/ASTInsertQuery.h index 43780e27114..45fd3d97950 100644 --- a/src/Parsers/ASTInsertQuery.h +++ b/src/Parsers/ASTInsertQuery.h @@ -35,6 +35,8 @@ public: /// Data from buffer to insert after inlined one - may be nullptr. ReadBuffer * tail = nullptr; + bool async_insert_flush = false; + String getDatabase() const; String getTable() const; @@ -66,7 +68,7 @@ public: return res; } - QueryKind getQueryKind() const override { return QueryKind::Insert; } + QueryKind getQueryKind() const override { return async_insert_flush ? QueryKind::AsyncInsertFlush : QueryKind::Insert; } protected: void formatImpl(const FormatSettings & settings, FormatState & state, FormatStateStacked frame) const override; diff --git a/src/Parsers/IAST.h b/src/Parsers/IAST.h index aa5302a15b9..7a8ab36518d 100644 --- a/src/Parsers/IAST.h +++ b/src/Parsers/IAST.h @@ -305,6 +305,7 @@ public: Commit, Rollback, SetTransactionSnapshot, + AsyncInsertFlush }; /// Return QueryKind of this AST query. virtual QueryKind getQueryKind() const { return QueryKind::None; } diff --git a/tests/integration/ci-runner.py b/tests/integration/ci-runner.py index 9629a5821b5..31f4a7666c8 100755 --- a/tests/integration/ci-runner.py +++ b/tests/integration/ci-runner.py @@ -810,7 +810,6 @@ class ClickhouseIntegrationTestsRunner: result_state = "failure" if not should_fail: break - assert should_fail logging.info("Try is OK, all tests passed, going to clear env") clear_ip_tables_and_restart_daemons() logging.info("And going to sleep for some time") diff --git a/tests/integration/test_multiple_disks/test.py b/tests/integration/test_multiple_disks/test.py index 5561d63840b..bc7ac6683af 100644 --- a/tests/integration/test_multiple_disks/test.py +++ b/tests/integration/test_multiple_disks/test.py @@ -818,9 +818,10 @@ def test_start_stop_moves(start_cluster, name, engine): node1.query(f"SYSTEM STOP MOVES {name}") node1.query(f"SYSTEM STOP MERGES {name}") + first_part = None for i in range(5): data = [] # 5MB in total - for i in range(5): + for _ in range(5): data.append(get_random_string(1024 * 1024)) # 1MB row # jbod size is 40MB, so lets insert 5MB batch 7 times node1.query_with_retry( @@ -829,7 +830,13 @@ def test_start_stop_moves(start_cluster, name, engine): ) ) - first_part = get_oldest_part(node1, name) + # we cannot rely simply on modification time of part because it can be changed + # by different background operations so we explicitly check after the first + # part is inserted + if i == 0: + first_part = get_oldest_part(node1, name) + + assert first_part is not None used_disks = get_used_disks_for_table(node1, name) diff --git a/tests/queries/0_stateless/02317_distinct_in_order_optimization.sql b/tests/queries/0_stateless/02317_distinct_in_order_optimization.sql index a794709caba..d05a25882a7 100644 --- a/tests/queries/0_stateless/02317_distinct_in_order_optimization.sql +++ b/tests/queries/0_stateless/02317_distinct_in_order_optimization.sql @@ -18,7 +18,7 @@ select distinct a from distinct_in_order settings max_block_size=10, max_threads select '-- create table with not only primary key columns'; drop table if exists distinct_in_order sync; -create table distinct_in_order (a int, b int, c int) engine=MergeTree() order by (a, b); +create table distinct_in_order (a int, b int, c int) engine=MergeTree() order by (a, b) SETTINGS index_granularity = 8192, index_granularity_bytes = '10Mi'; insert into distinct_in_order select number % number, number % 5, number % 10 from numbers(1,1000000); select '-- distinct with primary key prefix only'; @@ -59,16 +59,16 @@ drop table if exists distinct_in_order sync; select '-- check that distinct in order returns the same result as ordinary distinct'; drop table if exists distinct_cardinality_low sync; -CREATE TABLE distinct_cardinality_low (low UInt64, medium UInt64, high UInt64) ENGINE MergeTree() ORDER BY (low, medium); +CREATE TABLE distinct_cardinality_low (low UInt64, medium UInt64, high UInt64) ENGINE MergeTree() ORDER BY (low, medium) SETTINGS index_granularity = 8192, index_granularity_bytes = '10Mi'; INSERT INTO distinct_cardinality_low SELECT number % 1e1, number % 1e2, number % 1e3 FROM numbers_mt(1e4); drop table if exists distinct_in_order sync; drop table if exists ordinary_distinct sync; select '-- check that distinct in order WITH order by returns the same result as ordinary distinct'; -create table distinct_in_order (low UInt64, medium UInt64, high UInt64) engine=MergeTree() order by (low, medium); +create table distinct_in_order (low UInt64, medium UInt64, high UInt64) engine=MergeTree() order by (low, medium) SETTINGS index_granularity = 8192, index_granularity_bytes = '10Mi'; insert into distinct_in_order select distinct * from distinct_cardinality_low order by high settings optimize_distinct_in_order=1; -create table ordinary_distinct (low UInt64, medium UInt64, high UInt64) engine=MergeTree() order by (low, medium); +create table ordinary_distinct (low UInt64, medium UInt64, high UInt64) engine=MergeTree() order by (low, medium) SETTINGS index_granularity = 8192, index_granularity_bytes = '10Mi'; insert into ordinary_distinct select distinct * from distinct_cardinality_low order by high settings optimize_distinct_in_order=0; select count() as diff from (select distinct * from distinct_in_order except select * from ordinary_distinct); @@ -76,9 +76,9 @@ drop table if exists distinct_in_order sync; drop table if exists ordinary_distinct sync; select '-- check that distinct in order WITHOUT order by returns the same result as ordinary distinct'; -create table distinct_in_order (low UInt64, medium UInt64, high UInt64) engine=MergeTree() order by (low, medium); +create table distinct_in_order (low UInt64, medium UInt64, high UInt64) engine=MergeTree() order by (low, medium) SETTINGS index_granularity = 8192, index_granularity_bytes = '10Mi'; insert into distinct_in_order select distinct * from distinct_cardinality_low settings optimize_distinct_in_order=1; -create table ordinary_distinct (low UInt64, medium UInt64, high UInt64) engine=MergeTree() order by (low, medium); +create table ordinary_distinct (low UInt64, medium UInt64, high UInt64) engine=MergeTree() order by (low, medium) SETTINGS index_granularity = 8192, index_granularity_bytes = '10Mi'; insert into ordinary_distinct select distinct * from distinct_cardinality_low settings optimize_distinct_in_order=0; select count() as diff from (select distinct * from distinct_in_order except select * from ordinary_distinct); @@ -86,9 +86,9 @@ drop table if exists distinct_in_order; drop table if exists ordinary_distinct; select '-- check that distinct in order WITHOUT order by and WITH filter returns the same result as ordinary distinct'; -create table distinct_in_order (low UInt64, medium UInt64, high UInt64) engine=MergeTree() order by (low, medium); +create table distinct_in_order (low UInt64, medium UInt64, high UInt64) engine=MergeTree() order by (low, medium) SETTINGS index_granularity = 8192, index_granularity_bytes = '10Mi'; insert into distinct_in_order select distinct * from distinct_cardinality_low where low > 0 settings optimize_distinct_in_order=1; -create table ordinary_distinct (low UInt64, medium UInt64, high UInt64) engine=MergeTree() order by (low, medium); +create table ordinary_distinct (low UInt64, medium UInt64, high UInt64) engine=MergeTree() order by (low, medium) SETTINGS index_granularity = 8192, index_granularity_bytes = '10Mi'; insert into ordinary_distinct select distinct * from distinct_cardinality_low where low > 0 settings optimize_distinct_in_order=0; select count() as diff from (select distinct * from distinct_in_order except select * from ordinary_distinct); @@ -102,12 +102,12 @@ drop table if exists sorting_key_contain_function; select '-- bug 42185, distinct in order and empty sort description'; select '-- distinct in order, sorting key tuple()'; -create table sorting_key_empty_tuple (a int, b int) engine=MergeTree() order by tuple(); +create table sorting_key_empty_tuple (a int, b int) engine=MergeTree() order by tuple() SETTINGS index_granularity = 8192, index_granularity_bytes = '10Mi'; insert into sorting_key_empty_tuple select number % 2, number % 5 from numbers(1,10); select distinct a from sorting_key_empty_tuple; select '-- distinct in order, sorting key contains function'; -create table sorting_key_contain_function (datetime DateTime, a int) engine=MergeTree() order by (toDate(datetime)); +create table sorting_key_contain_function (datetime DateTime, a int) engine=MergeTree() order by (toDate(datetime)) SETTINGS index_granularity = 8192, index_granularity_bytes = '10Mi'; insert into sorting_key_contain_function values ('2000-01-01', 1); insert into sorting_key_contain_function values ('2000-01-01', 2); select distinct datetime from sorting_key_contain_function; diff --git a/tests/queries/0_stateless/02415_all_new_functions_must_be_documented.reference b/tests/queries/0_stateless/02415_all_new_functions_must_be_documented.reference index b5c133988e6..fc00bfdadca 100644 --- a/tests/queries/0_stateless/02415_all_new_functions_must_be_documented.reference +++ b/tests/queries/0_stateless/02415_all_new_functions_must_be_documented.reference @@ -112,6 +112,7 @@ arrayFirstIndex arrayFirstOrNull arrayFlatten arrayIntersect +arrayJaccardIndex arrayJoin arrayLast arrayLastIndex @@ -363,6 +364,8 @@ in inIgnoreSet indexHint indexOf +initcap +initcapUTF8 initialQueryID initializeAggregation intDiv diff --git a/tests/queries/0_stateless/02737_arrayJaccardIndex.reference b/tests/queries/0_stateless/02737_arrayJaccardIndex.reference new file mode 100644 index 00000000000..62a51ec0ab2 --- /dev/null +++ b/tests/queries/0_stateless/02737_arrayJaccardIndex.reference @@ -0,0 +1,23 @@ +negative tests +const arguments +[1,2] [1,2,3,4] 0.5 +[1,1.1,2.2] [2.2,3.3,444] 0.2 +[1] [1] 1 +['a'] ['a','aa','aaa'] 0.33 +[[1,2],[3,4]] [[1,2],[3,5]] 0.33 +non-const arguments +[1] [1,2] 0.5 +[1,2] [1,2] 1 +[1,2,3] [1,2] 0.67 +[1] [] 0 +[1,2] [] 0 +[1,2,3] [] 0 +[1,2] [1] 0.5 +[1,2] [1,2] 1 +[1,2] [1,2,3] 0.67 +[] [1] 0 +[] [1,2] 0 +[] [1,2,3] 0 +[1] [1] 1 +[1,2] [1,2] 1 +[1,2,3] [1,2,3] 1 diff --git a/tests/queries/0_stateless/02737_arrayJaccardIndex.sql b/tests/queries/0_stateless/02737_arrayJaccardIndex.sql new file mode 100644 index 00000000000..499debd94b7 --- /dev/null +++ b/tests/queries/0_stateless/02737_arrayJaccardIndex.sql @@ -0,0 +1,30 @@ +SELECT 'negative tests'; + +SELECT 'a' AS arr1, 2 AS arr2, round(arrayJaccardIndex(arr1, arr2), 2); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } +SELECT [] AS arr1, [] AS arr2, round(arrayJaccardIndex(arr1, arr2), 2); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } +SELECT ['1', '2'] AS arr1, [1,2] AS arr2, round(arrayJaccardIndex(arr1, arr2), 2); -- { serverError NO_COMMON_TYPE } + +SELECT 'const arguments'; + +SELECT [1,2] AS arr1, [1,2,3,4] AS arr2, round(arrayJaccardIndex(arr1, arr2), 2); +SELECT [1, 1.1, 2.2] AS arr1, [2.2, 3.3, 444] AS arr2, round(arrayJaccardIndex(arr1, arr2), 2); +SELECT [toUInt16(1)] AS arr1, [toUInt32(1)] AS arr2, round(arrayJaccardIndex(arr1, arr2), 2); +SELECT ['a'] AS arr1, ['a', 'aa', 'aaa'] AS arr2, round(arrayJaccardIndex(arr1, arr2), 2); +SELECT [[1,2], [3,4]] AS arr1, [[1,2], [3,5]] AS arr2, round(arrayJaccardIndex(arr1, arr2), 2); + +SELECT 'non-const arguments'; + +DROP TABLE IF EXISTS array_jaccard_index; + +CREATE TABLE array_jaccard_index (arr Array(UInt8)) engine = MergeTree ORDER BY arr; +INSERT INTO array_jaccard_index values ([1,2,3]); +INSERT INTO array_jaccard_index values ([1,2]); +INSERT INTO array_jaccard_index values ([1]); + +SELECT arr, [1,2] AS other, round(arrayJaccardIndex(arr, other), 2) FROM array_jaccard_index ORDER BY arr; +SELECT arr, [] AS other, round(arrayJaccardIndex(arr, other), 2) FROM array_jaccard_index ORDER BY arr; +SELECT [1,2] AS other, arr, round(arrayJaccardIndex(other, arr), 2) FROM array_jaccard_index ORDER BY arr; +SELECT [] AS other, arr, round(arrayJaccardIndex(other, arr), 2) FROM array_jaccard_index ORDER BY arr; +SELECT arr, arr, round(arrayJaccardIndex(arr, arr), 2) FROM array_jaccard_index ORDER BY arr; + +DROP TABLE array_jaccard_index; diff --git a/tests/queries/0_stateless/02790_async_queries_in_query_log.reference b/tests/queries/0_stateless/02790_async_queries_in_query_log.reference new file mode 100644 index 00000000000..e202a38f068 --- /dev/null +++ b/tests/queries/0_stateless/02790_async_queries_in_query_log.reference @@ -0,0 +1,135 @@ + +system.query_log +Row 1: +────── +type: QueryStart +read_rows: 0 +read_bytes: 0 +written_rows: 0 +written_bytes: 0 +result_rows: 0 +result_bytes: 0 +query: INSERT INTO default.async_insert_landing SETTINGS wait_for_async_insert = 1, async_insert = 1 FORMAT Values +query_kind: AsyncInsertFlush +databases: ['default'] +tables: ['default.async_insert_landing'] +columns: [] +views: [] +exception_code: 0 + +Row 2: +────── +type: QueryFinish +read_rows: 0 +read_bytes: 0 +written_rows: 4 +written_bytes: 16 +result_rows: 4 +result_bytes: 16 +query: INSERT INTO default.async_insert_landing SETTINGS wait_for_async_insert = 1, async_insert = 1 FORMAT Values +query_kind: AsyncInsertFlush +databases: ['default'] +tables: ['default.async_insert_landing'] +columns: [] +views: [] +exception_code: 0 + +system.query_views_log + +system.query_log +Row 1: +────── +type: QueryStart +read_rows: 0 +read_bytes: 0 +written_rows: 0 +written_bytes: 0 +result_rows: 0 +result_bytes: 0 +query: INSERT INTO default.async_insert_landing SETTINGS wait_for_async_insert = 1, async_insert = 1 FORMAT Values +query_kind: AsyncInsertFlush +databases: ['default'] +tables: ['default.async_insert_landing','default.async_insert_target'] +columns: [] +views: ['default.async_insert_mv'] +exception_code: 0 + +Row 2: +────── +type: QueryFinish +read_rows: 3 +read_bytes: 12 +written_rows: 6 +written_bytes: 12 +result_rows: 6 +result_bytes: 12 +query: INSERT INTO default.async_insert_landing SETTINGS wait_for_async_insert = 1, async_insert = 1 FORMAT Values +query_kind: AsyncInsertFlush +databases: ['default'] +tables: ['default.async_insert_landing','default.async_insert_target'] +columns: ['default.async_insert_landing.id'] +views: ['default.async_insert_mv'] +exception_code: 0 + +system.query_views_log +Row 1: +────── +view_name: default.async_insert_mv +view_type: Materialized +view_query: SELECT id + throwIf(id = 42) FROM default.async_insert_landing +view_target: default.async_insert_target +read_rows: 3 +read_bytes: 12 +written_rows: 3 +written_bytes: 0 +status: QueryFinish +exception_code: 0 + +system.query_log +Row 1: +────── +type: QueryStart +read_rows: 0 +read_bytes: 0 +written_rows: 0 +written_bytes: 0 +result_rows: 0 +result_bytes: 0 +query: INSERT INTO default.async_insert_landing SETTINGS wait_for_async_insert = 1, async_insert = 1 FORMAT Values +query_kind: AsyncInsertFlush +databases: ['default'] +tables: ['default.async_insert_landing','default.async_insert_target'] +columns: [] +views: ['default.async_insert_mv'] +exception_code: 0 + +Row 2: +────── +type: Exc*****onWhileProcessing +read_rows: 3 +read_bytes: 12 +written_rows: 3 +written_bytes: 12 +result_rows: 0 +result_bytes: 0 +query: INSERT INTO default.async_insert_landing SETTINGS wait_for_async_insert = 1, async_insert = 1 FORMAT Values +query_kind: AsyncInsertFlush +databases: ['default'] +tables: ['default.async_insert_landing','default.async_insert_target'] +columns: ['default.async_insert_landing.id'] +views: ['default.async_insert_mv'] +exception_code: 395 + +system.query_views_log +Row 1: +────── +view_name: default.async_insert_mv +view_type: Materialized +view_query: SELECT id + throwIf(id = 42) FROM default.async_insert_landing +view_target: default.async_insert_target +read_rows: 3 +read_bytes: 12 +written_rows: 0 +written_bytes: 0 +status: Exc*****onWhileProcessing +exception_code: 395 diff --git a/tests/queries/0_stateless/02790_async_queries_in_query_log.sh b/tests/queries/0_stateless/02790_async_queries_in_query_log.sh new file mode 100755 index 00000000000..d1c8fe122cb --- /dev/null +++ b/tests/queries/0_stateless/02790_async_queries_in_query_log.sh @@ -0,0 +1,75 @@ +#!/usr/bin/env bash + +CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CUR_DIR"/../shell_config.sh + +function print_flush_query_logs() +{ + ${CLICKHOUSE_CLIENT} -q "SYSTEM FLUSH LOGS" + echo "" + echo "system.query_log" + ${CLICKHOUSE_CLIENT} -q " + SELECT + replace(type::String, 'Exception', 'Exc*****on') as type, + read_rows, + read_bytes, + written_rows, + written_bytes, + result_rows, + result_bytes, + query, + query_kind, + databases, + tables, + columns, + views, + exception_code + FROM system.query_log + WHERE + event_date >= yesterday() + AND initial_query_id = (SELECT flush_query_id FROM system.asynchronous_insert_log WHERE query_id = '$1') + -- AND current_database = currentDatabase() -- Just to silence style check: this is not ok for this test since the query uses default values + ORDER BY type DESC + FORMAT Vertical" + + echo "" + echo "system.query_views_log" + ${CLICKHOUSE_CLIENT} -q " + SELECT + view_name, + view_type, + view_query, + view_target, + read_rows, + read_bytes, + written_rows, + written_bytes, + replace(status::String, 'Exception', 'Exc*****on') as status, + exception_code + FROM system.query_views_log + WHERE + event_date >= yesterday() + AND initial_query_id = (SELECT flush_query_id FROM system.asynchronous_insert_log WHERE query_id = '$1') + FORMAT Vertical" +} + + +${CLICKHOUSE_CLIENT} -q "CREATE TABLE async_insert_landing (id UInt32) ENGINE = MergeTree ORDER BY id" + +query_id="$(random_str 10)" +${CLICKHOUSE_CLIENT} --query_id="${query_id}" -q "INSERT INTO async_insert_landing SETTINGS wait_for_async_insert=1, async_insert=1 values (1), (2), (3), (4);" +print_flush_query_logs ${query_id} + + +${CLICKHOUSE_CLIENT} -q "CREATE TABLE async_insert_target (id UInt32) ENGINE = MergeTree ORDER BY id" +${CLICKHOUSE_CLIENT} -q "CREATE MATERIALIZED VIEW async_insert_mv TO async_insert_target AS SELECT id + throwIf(id = 42) FROM async_insert_landing" + +query_id="$(random_str 10)" +${CLICKHOUSE_CLIENT} --query_id="${query_id}" -q "INSERT INTO async_insert_landing SETTINGS wait_for_async_insert=1, async_insert=1 values (11), (12), (13);" +print_flush_query_logs ${query_id} + + +query_id="$(random_str 10)" +${CLICKHOUSE_CLIENT} --query_id="${query_id}" -q "INSERT INTO async_insert_landing SETTINGS wait_for_async_insert=1, async_insert=1 values (42), (12), (13)" 2>/dev/null || true +print_flush_query_logs ${query_id} diff --git a/tests/queries/0_stateless/02797_range_nullable.reference b/tests/queries/0_stateless/02797_range_nullable.reference new file mode 100644 index 00000000000..0e01ba9984c --- /dev/null +++ b/tests/queries/0_stateless/02797_range_nullable.reference @@ -0,0 +1,8 @@ +\N +\N +\N +\N +[0] +[0,2,4,6,8] +[0,2,4,6,8] +[0,2,4,6,8] diff --git a/tests/queries/0_stateless/02797_range_nullable.sql b/tests/queries/0_stateless/02797_range_nullable.sql new file mode 100644 index 00000000000..ae35eb6fb57 --- /dev/null +++ b/tests/queries/0_stateless/02797_range_nullable.sql @@ -0,0 +1,12 @@ +SELECT range(null); +SELECT range(10, null); +SELECT range(10, 2, null); +select range('string', Null); +SELECT range(toNullable(1)); +SELECT range(0::Nullable(UInt64), 10::Nullable(UInt64), 2::Nullable(UInt64)); +SELECT range(0::Nullable(Int64), 10::Nullable(Int64), 2::Nullable(Int64)); +SELECT range(materialize(0), 10::Nullable(UInt64), 2::Nullable(UInt64)); +SELECT range(Null::Nullable(UInt64), 10::Nullable(UInt64), 2::Nullable(UInt64)); -- { serverError BAD_ARGUMENTS } +SELECT range(0::Nullable(UInt64), Null::Nullable(UInt64), 2::Nullable(UInt64)); -- { serverError BAD_ARGUMENTS } +SELECT range(0::Nullable(UInt64), 10::Nullable(UInt64), Null::Nullable(UInt64)); -- { serverError BAD_ARGUMENTS } +SELECT range(Null::Nullable(UInt8), materialize(1)); -- { serverError BAD_ARGUMENTS } diff --git a/tests/queries/0_stateless/02810_convert_uuid_to_uint128.reference b/tests/queries/0_stateless/02810_convert_uuid_to_uint128.reference new file mode 100644 index 00000000000..3b44d4ba086 --- /dev/null +++ b/tests/queries/0_stateless/02810_convert_uuid_to_uint128.reference @@ -0,0 +1,6 @@ +0 +329871470813054077831677335124932328170 +340282366920938463463374607431768211455 +329871470813054077831677335124932328170 +329871470813054077831677335124932328170 +329871470813054077831677335124932328170 diff --git a/tests/queries/0_stateless/02810_convert_uuid_to_uint128.sql b/tests/queries/0_stateless/02810_convert_uuid_to_uint128.sql new file mode 100644 index 00000000000..155596dd1d5 --- /dev/null +++ b/tests/queries/0_stateless/02810_convert_uuid_to_uint128.sql @@ -0,0 +1,8 @@ +SELECT toUInt128(toUUID('00000000-0000-0000-0000-000000000000')); +SELECT toUInt128(toUUID('f82aef31-279e-431f-8b00-2899ad387aea')); +SELECT toUInt128(toUUID('ffffffff-ffff-ffff-ffff-ffffffffffff')); +SELECT toUInt64(toUUID('00000000-0000-0000-0000-000000000000')); -- { serverError NOT_IMPLEMENTED } +SELECT toInt128(toUUID('00000000-0000-0000-0000-000000000000')); -- { serverError NOT_IMPLEMENTED } +SELECT cast(toUUID('f82aef31-279e-431f-8b00-2899ad387aea'), 'UInt128'); +select accurateCast(toUUID('f82aef31-279e-431f-8b00-2899ad387aea'), 'UInt128'); +select toUUID('f82aef31-279e-431f-8b00-2899ad387aea')::UInt128; diff --git a/tests/queries/0_stateless/02810_initcap.reference b/tests/queries/0_stateless/02810_initcap.reference new file mode 100644 index 00000000000..0d24e14c445 --- /dev/null +++ b/tests/queries/0_stateless/02810_initcap.reference @@ -0,0 +1,13 @@ + +Hello +Hello +Hello World +Yeah, Well, I`M Gonna Go Build My Own Theme Park +Crc32ieee Is The Best Function +42ok + +Hello +Yeah, Well, I`M Gonna Go Build My Own Theme Park +Привет, Как Дела? +Ätsch, Bätsch +We Dont Support Cases When Lowercase And Uppercase Characters Occupy Different Number Of Bytes In Utf-8. As An Example, This Happens For ß And ẞ. diff --git a/tests/queries/0_stateless/02810_initcap.sql b/tests/queries/0_stateless/02810_initcap.sql new file mode 100644 index 00000000000..1a730003604 --- /dev/null +++ b/tests/queries/0_stateless/02810_initcap.sql @@ -0,0 +1,14 @@ +select initcap(''); +select initcap('Hello'); +select initcap('hello'); +select initcap('hello world'); +select initcap('yeah, well, i`m gonna go build my own theme park'); +select initcap('CRC32IEEE is the best function'); +select initcap('42oK'); + +select initcapUTF8(''); +select initcapUTF8('Hello'); +select initcapUTF8('yeah, well, i`m gonna go build my own theme park'); +select initcapUTF8('привет, как дела?'); +select initcapUTF8('ätsch, bätsch'); +select initcapUTF8('We dont support cases when lowercase and uppercase characters occupy different number of bytes in UTF-8. As an example, this happens for ß and ẞ.'); \ No newline at end of file diff --git a/tests/queries/0_stateless/02811_insert_schema_inference.reference b/tests/queries/0_stateless/02811_insert_schema_inference.reference new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/queries/0_stateless/02811_insert_schema_inference.sql b/tests/queries/0_stateless/02811_insert_schema_inference.sql new file mode 100644 index 00000000000..9de710047f7 --- /dev/null +++ b/tests/queries/0_stateless/02811_insert_schema_inference.sql @@ -0,0 +1,9 @@ +drop table if exists test; +create table test +( + n1 UInt32, + n2 UInt32 alias murmurHash3_32(n1), + n3 UInt32 materialized n2 + 1 +)engine=MergeTree order by n1; +insert into test select * from generateRandom() limit 10; +drop table test; diff --git a/tests/queries/0_stateless/02815_alias_to_length.reference b/tests/queries/0_stateless/02815_alias_to_length.reference new file mode 100644 index 00000000000..de958a364ef --- /dev/null +++ b/tests/queries/0_stateless/02815_alias_to_length.reference @@ -0,0 +1,4 @@ +4 +4 +15 +4 diff --git a/tests/queries/0_stateless/02815_alias_to_length.sql b/tests/queries/0_stateless/02815_alias_to_length.sql new file mode 100644 index 00000000000..780ac7dac6d --- /dev/null +++ b/tests/queries/0_stateless/02815_alias_to_length.sql @@ -0,0 +1,6 @@ +SELECT OCTET_LENGTH('1234'); +SELECT OcTet_lenGtH('1234'); +SELECT OCTET_LENGTH('你好,世界'); + +-- This is a implementation-specific behavior of getting the length of an array. +SELECT OCTET_LENGTH([1,2,3,4]); diff --git a/utils/check-style/aspell-ignore/en/aspell-dict.txt b/utils/check-style/aspell-ignore/en/aspell-dict.txt index 6ddca6db538..6fca55128b4 100644 --- a/utils/check-style/aspell-ignore/en/aspell-dict.txt +++ b/utils/check-style/aspell-ignore/en/aspell-dict.txt @@ -1035,6 +1035,7 @@ arrayFirst arrayFirstIndex arrayFlatten arrayIntersect +arrayJaccardIndex arrayJoin arrayLast arrayLastIndex @@ -1581,6 +1582,8 @@ indexOf infi initialQueryID initializeAggregation +initcap +initcapUTF injective innogames inodes @@ -1608,6 +1611,7 @@ isNull isValidJSON isValidUTF iteratively +jaccard javaHash javaHashUTF jbod