diff --git a/CMakeLists.txt b/CMakeLists.txt index d10bc63c15e..06e6f943fd3 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -442,8 +442,9 @@ elseif (OS_DARWIN) include(cmake/darwin/default_libs.cmake) elseif (OS_FREEBSD) include(cmake/freebsd/default_libs.cmake) +else() + link_libraries(global-group) endif () -link_libraries(global-group) if (NOT (OS_LINUX OR OS_DARWIN)) # Using system libs can cause a lot of warnings in includes (on macro expansion). @@ -592,7 +593,7 @@ add_subdirectory (programs) add_subdirectory (tests) add_subdirectory (utils) -include (cmake/sanitize_target_link_libraries.cmake) +include (cmake/sanitize_targets.cmake) # Build native targets if necessary get_property(NATIVE_BUILD_TARGETS GLOBAL PROPERTY NATIVE_BUILD_TARGETS) diff --git a/base/glibc-compatibility/glibc-compatibility.c b/base/glibc-compatibility/glibc-compatibility.c index d10bc6ba723..bae03ad590a 100644 --- a/base/glibc-compatibility/glibc-compatibility.c +++ b/base/glibc-compatibility/glibc-compatibility.c @@ -220,13 +220,13 @@ struct statx { uint32_t stx_dev_minor; uint64_t spare[14]; }; -#endif int statx(int fd, const char *restrict path, int flag, unsigned int mask, struct statx *restrict statxbuf) { return syscall(SYS_statx, fd, path, flag, mask, statxbuf); } +#endif #include diff --git a/cmake/darwin/default_libs.cmake b/cmake/darwin/default_libs.cmake index 1f92663a4b9..3e6e4907a71 100644 --- a/cmake/darwin/default_libs.cmake +++ b/cmake/darwin/default_libs.cmake @@ -23,6 +23,7 @@ set(THREADS_PREFER_PTHREAD_FLAG ON) find_package(Threads REQUIRED) include (cmake/cxx.cmake) +link_libraries(global-group) target_link_libraries(global-group INTERFACE $ diff --git a/cmake/freebsd/default_libs.cmake b/cmake/freebsd/default_libs.cmake index 65d5f0511d9..3e1f22ef2e4 100644 --- a/cmake/freebsd/default_libs.cmake +++ b/cmake/freebsd/default_libs.cmake @@ -24,6 +24,7 @@ find_package(Threads REQUIRED) include (cmake/unwind.cmake) include (cmake/cxx.cmake) +link_libraries(global-group) target_link_libraries(global-group INTERFACE $ diff --git a/cmake/linux/default_libs.cmake b/cmake/linux/default_libs.cmake index 21bead7020c..23c5fc3e14f 100644 --- a/cmake/linux/default_libs.cmake +++ b/cmake/linux/default_libs.cmake @@ -34,6 +34,13 @@ set(CMAKE_C_STANDARD_LIBRARIES ${DEFAULT_LIBS}) set(THREADS_PREFER_PTHREAD_FLAG ON) find_package(Threads REQUIRED) +include (cmake/unwind.cmake) +include (cmake/cxx.cmake) + +# Delay the call to link the global interface after the libc++ libraries are included to avoid circular dependencies +# which are ok with static libraries but not with dynamic ones +link_libraries(global-group) + if (NOT OS_ANDROID) if (NOT USE_MUSL) # Our compatibility layer doesn't build under Android, many errors in musl. @@ -42,9 +49,6 @@ if (NOT OS_ANDROID) add_subdirectory(base/harmful) endif () -include (cmake/unwind.cmake) -include (cmake/cxx.cmake) - target_link_libraries(global-group INTERFACE -Wl,--start-group $ diff --git a/cmake/sanitize_target_link_libraries.cmake b/cmake/sanitize_targets.cmake similarity index 65% rename from cmake/sanitize_target_link_libraries.cmake rename to cmake/sanitize_targets.cmake index d66ea338a52..8f61da2009d 100644 --- a/cmake/sanitize_target_link_libraries.cmake +++ b/cmake/sanitize_targets.cmake @@ -1,3 +1,13 @@ +# https://stackoverflow.com/a/62311397/328260 +macro (get_all_targets_recursive targets dir) + get_property (subdirectories DIRECTORY ${dir} PROPERTY SUBDIRECTORIES) + foreach (subdir ${subdirectories}) + get_all_targets_recursive (${targets} ${subdir}) + endforeach () + get_property (current_targets DIRECTORY ${dir} PROPERTY BUILDSYSTEM_TARGETS) + list (APPEND ${targets} ${current_targets}) +endmacro () + # When you will try to link target with the directory (that exists), cmake will # skip this without an error, only the following warning will be reported: # @@ -18,23 +28,12 @@ # -- but cannot be used with link_libraries() # - use BUILDSYSTEM_TARGETS property to get list of all targets and sanitize # -- this will work. - -# https://stackoverflow.com/a/62311397/328260 function (get_all_targets var) set (targets) get_all_targets_recursive (targets ${CMAKE_CURRENT_SOURCE_DIR}) set (${var} ${targets} PARENT_SCOPE) endfunction() -macro (get_all_targets_recursive targets dir) - get_property (subdirectories DIRECTORY ${dir} PROPERTY SUBDIRECTORIES) - foreach (subdir ${subdirectories}) - get_all_targets_recursive (${targets} ${subdir}) - endforeach () - get_property (current_targets DIRECTORY ${dir} PROPERTY BUILDSYSTEM_TARGETS) - list (APPEND ${targets} ${current_targets}) -endmacro () - -macro (sanitize_link_libraries target) +function (sanitize_link_libraries target) get_target_property(target_type ${target} TYPE) if (${target_type} STREQUAL "INTERFACE_LIBRARY") get_property(linked_libraries TARGET ${target} PROPERTY INTERFACE_LINK_LIBRARIES) @@ -48,9 +47,35 @@ macro (sanitize_link_libraries target) message(FATAL_ERROR "${target} requested to link with directory: ${linked_library}") endif() endforeach() -endmacro() - +endfunction() get_all_targets (all_targets) foreach (target ${all_targets}) sanitize_link_libraries(${target}) endforeach() + +# +# Do not allow to define -W* from contrib publically (INTERFACE/PUBLIC). +# +function (get_contrib_targets var) + set (targets) + get_all_targets_recursive (targets ${CMAKE_CURRENT_SOURCE_DIR}/contrib) + set (${var} ${targets} PARENT_SCOPE) +endfunction() +function (sanitize_interface_flags target) + get_target_property(target_type ${target} TYPE) + get_property(compile_definitions TARGET ${target} PROPERTY INTERFACE_COMPILE_DEFINITIONS) + get_property(compile_options TARGET ${target} PROPERTY INTERFACE_COMPILE_OPTIONS) + if (NOT "${compile_options}" STREQUAL "") + message(FATAL_ERROR "${target} set INTERFACE_COMPILE_OPTIONS to ${compile_options}. This is forbidden.") + endif() + if ("${compile_definitions}" MATCHES "-Wl,") + # linker option - OK + elseif ("${compile_definitions}" MATCHES "-W") + message(FATAL_ERROR "${target} contains ${compile_definitions} flags in INTERFACE_COMPILE_DEFINITIONS. This is forbidden.") + endif() +endfunction() +get_contrib_targets (contrib_targets) +foreach (contrib_target ${contrib_targets}) + sanitize_interface_flags(${contrib_target}) +endforeach() + diff --git a/contrib/libcxx-cmake/CMakeLists.txt b/contrib/libcxx-cmake/CMakeLists.txt index 53c6ff58f83..8dc154e9d91 100644 --- a/contrib/libcxx-cmake/CMakeLists.txt +++ b/contrib/libcxx-cmake/CMakeLists.txt @@ -57,7 +57,7 @@ add_library(cxx ${SRCS}) set_target_properties(cxx PROPERTIES FOLDER "contrib/libcxx-cmake") target_include_directories(cxx SYSTEM BEFORE PRIVATE $) -target_include_directories(cxx SYSTEM BEFORE PUBLIC $) +target_include_directories(cxx SYSTEM BEFORE PUBLIC $<$:$>) target_compile_definitions(cxx PRIVATE -D_LIBCPP_BUILDING_LIBRARY -DLIBCXX_BUILDING_LIBCXXABI) # Enable capturing stack traces for all exceptions. diff --git a/docker/test/stress/run.sh b/docker/test/stress/run.sh index b9075991ddd..0afd6cc19f3 100644 --- a/docker/test/stress/run.sh +++ b/docker/test/stress/run.sh @@ -388,6 +388,8 @@ else rm -f /etc/clickhouse-server/config.d/storage_conf.xml ||: rm -f /etc/clickhouse-server/config.d/azure_storage_conf.xml ||: + # Turn on after 22.12 + rm -f /etc/clickhouse-server/config.d/compressed_marks_and_index.xml ||: # it uses recently introduced settings which previous versions may not have rm -f /etc/clickhouse-server/users.d/insert_keeper_retries.xml ||: @@ -451,6 +453,7 @@ else # FIXME https://github.com/ClickHouse/ClickHouse/issues/39197 ("Missing columns: 'v3' while processing query: 'v3, k, v1, v2, p'") # NOTE Incompatibility was introduced in https://github.com/ClickHouse/ClickHouse/pull/39263, it's expected # ("This engine is deprecated and is not supported in transactions", "[Queue = DB::MergeMutateRuntimeQueue]: Code: 235. DB::Exception: Part") + # FIXME https://github.com/ClickHouse/ClickHouse/issues/39174 - bad mutation does not indicate backward incompatibility echo "Check for Error messages in server log:" zgrep -Fav -e "Code: 236. DB::Exception: Cancelled merging parts" \ -e "Code: 236. DB::Exception: Cancelled mutating parts" \ @@ -485,6 +488,7 @@ else -e "(ReplicatedMergeTreeAttachThread): Initialization failed. Error" \ -e "Code: 269. DB::Exception: Destination table is myself" \ -e "Coordination::Exception: Connection loss" \ + -e "MutateFromLogEntryTask" \ /var/log/clickhouse-server/clickhouse-server.backward.clean.log | zgrep -Fa "" > /test_output/bc_check_error_messages.txt \ && echo -e 'Backward compatibility check: Error message in clickhouse-server.log (see bc_check_error_messages.txt)\tFAIL' >> /test_output/test_results.tsv \ || echo -e 'Backward compatibility check: No Error messages in clickhouse-server.log\tOK' >> /test_output/test_results.tsv diff --git a/docs/en/interfaces/formats.md b/docs/en/interfaces/formats.md index 56708def497..3221b1a06fa 100644 --- a/docs/en/interfaces/formats.md +++ b/docs/en/interfaces/formats.md @@ -13,7 +13,7 @@ The supported formats are: | Format | Input | Output | |-------------------------------------------------------------------------------------------|------|--------| | [TabSeparated](#tabseparated) | ✔ | ✔ | -| [TabSeparatedRaw](#tabseparatedraw) | ✔ | ✔ | +| [TabSeparatedRaw](#tabseparatedraw) | ✔ | ✔ | | [TabSeparatedWithNames](#tabseparatedwithnames) | ✔ | ✔ | | [TabSeparatedWithNamesAndTypes](#tabseparatedwithnamesandtypes) | ✔ | ✔ | | [TabSeparatedRawWithNames](#tabseparatedrawwithnames) | ✔ | ✔ | @@ -48,6 +48,7 @@ The supported formats are: | [JSONCompactStringsEachRowWithNames](#jsoncompactstringseachrowwithnames) | ✔ | ✔ | | [JSONCompactStringsEachRowWithNamesAndTypes](#jsoncompactstringseachrowwithnamesandtypes) | ✔ | ✔ | | [JSONObjectEachRow](#jsonobjecteachrow) | ✔ | ✔ | +| [BSONEachRow](#bsoneachrow) | ✔ | ✔ | | [TSKV](#tskv) | ✔ | ✔ | | [Pretty](#pretty) | ✗ | ✔ | | [PrettyNoEscapes](#prettynoescapes) | ✗ | ✔ | @@ -1210,6 +1211,69 @@ SELECT * FROM json_each_row_nested - [output_format_json_array_of_rows](../operations/settings/settings.md#output_format_json_array_of_rows) - output a JSON array of all rows in JSONEachRow(Compact) format. Default value - `false`. - [output_format_json_validate_utf8](../operations/settings/settings.md#output_format_json_validate_utf8) - enables validation of UTF-8 sequences in JSON output formats (note that it doesn't impact formats JSON/JSONCompact/JSONColumnsWithMetadata, they always validate utf8). Default value - `false`. +## BSONEachRow {#bsoneachrow} + +In this format, ClickHouse formats/parses data as a sequence of BSON documents without any separator between them. +Each row is formatted as a single document and each column is formatted as a single BSON document field with column name as a key. + +For output it uses the following correspondence between ClickHouse types and BSON types: + +| ClickHouse type | BSON Type | +|-----------------------------------------------------------------------------------------------------------|-----------------------------------------------------------------------------------------------------------| +| [Bool](../sql-reference/data-types/boolean.md) | `\x08` boolean | +| [Int8/UInt8](../sql-reference/data-types/int-uint.md) | `\x10` int32 | +| [Int16UInt16](../sql-reference/data-types/int-uint.md) | `\x10` int32 | +| [Int32](../sql-reference/data-types/int-uint.md) | `\x10` int32 | +| [UInt32](../sql-reference/data-types/int-uint.md) | `\x12` int64 | +| [Int64/UInt64](../sql-reference/data-types/int-uint.md) | `\x12` int64 | +| [Float32/Float64](../sql-reference/data-types/float.md) | `\x01` double | +| [Date](../sql-reference/data-types/date.md)/[Date32](../sql-reference/data-types/date32.md) | `\x10` int32 | +| [DateTime](../sql-reference/data-types/datetime.md) | `\x12` int64 | +| [DateTime64](../sql-reference/data-types/datetime64.md) | `\x09` datetime | +| [Decimal32](../sql-reference/data-types/decimal.md) | `\x10` int32 | +| [Decimal64](../sql-reference/data-types/decimal.md) | `\x12` int64 | +| [Decimal128](../sql-reference/data-types/decimal.md) | `\x05` binary, `\x00` binary subtype, size = 16 | +| [Decimal256](../sql-reference/data-types/decimal.md) | `\x05` binary, `\x00` binary subtype, size = 32 | +| [Int128/UInt128](../sql-reference/data-types/int-uint.md) | `\x05` binary, `\x00` binary subtype, size = 16 | +| [Int256/UInt256](../sql-reference/data-types/int-uint.md) | `\x05` binary, `\x00` binary subtype, size = 32 | +| [String](../sql-reference/data-types/string.md)/[FixedString](../sql-reference/data-types/fixedstring.md) | `\x05` binary, `\x00` binary subtype or \x02 string if setting output_format_bson_string_as_string is enabled | +| [UUID](../sql-reference/data-types/uuid.md) | `\x05` binary, `\x04` uuid subtype, size = 16 | +| [Array](../sql-reference/data-types/array.md) | `\x04` array | +| [Tuple](../sql-reference/data-types/tuple.md) | `\x04` array | +| [Named Tuple](../sql-reference/data-types/tuple.md) | `\x03` document | +| [Map](../sql-reference/data-types/map.md) (with String keys) | `\x03` document | + +For input it uses the following correspondence between BSON types and ClickHouse types: + +| BSON Type | ClickHouse Type | +|------------------------------------------|--------------------------------------------------------------------------------------------------------------------------------------------------------------| +| `\x01` double | [Float32/Float64](../sql-reference/data-types/float.md) | +| `\x02` string | [String](../sql-reference/data-types/string.md)/[FixedString](../sql-reference/data-types/fixedstring.md) | +| `\x03` document | [Map](../sql-reference/data-types/map.md)/[Named Tuple](../sql-reference/data-types/tuple.md) | +| `\x04` array | [Array](../sql-reference/data-types/array.md)/[Tuple](../sql-reference/data-types/tuple.md) | +| `\x05` binary, `\x00` binary subtype | [String](../sql-reference/data-types/string.md)/[FixedString](../sql-reference/data-types/fixedstring.md) | +| `\x05` binary, `\x02` old binary subtype | [String](../sql-reference/data-types/string.md)/[FixedString](../sql-reference/data-types/fixedstring.md) | +| `\x05` binary, `\x03` old uuid subtype | [UUID](../sql-reference/data-types/uuid.md) | +| `\x05` binary, `\x04` uuid subtype | [UUID](../sql-reference/data-types/uuid.md) | +| `\x07` ObjectId | [String](../sql-reference/data-types/string.md)/[FixedString](../sql-reference/data-types/fixedstring.md) | +| `\x08` boolean | [Bool](../sql-reference/data-types/boolean.md) | +| `\x09` datetime | [DateTime64](../sql-reference/data-types/datetime64.md) | +| `\x0A` null value | [NULL](../sql-reference/data-types/nullable.md) | +| `\x0D` JavaScript code | [String](../sql-reference/data-types/string.md)/[FixedString](../sql-reference/data-types/fixedstring.md) | +| `\x0E` symbol | [String](../sql-reference/data-types/string.md)/[FixedString](../sql-reference/data-types/fixedstring.md) | +| `\x10` int32 | [Int32/UInt32](../sql-reference/data-types/int-uint.md)/[Decimal32](../sql-reference/data-types/decimal.md) | +| `\x12` int64 | [Int64/UInt64](../sql-reference/data-types/int-uint.md)/[Decimal64](../sql-reference/data-types/decimal.md)/[DateTime64](../sql-reference/data-types/datetime64.md) | + +Other BSON types are not supported. Also, it performs conversion between different integer types (for example, you can insert BSON int32 value into ClickHouse UInt8). +Big integers and decimals (Int128/UInt128/Int256/UInt256/Decimal128/Decimal256) can be parsed from BSON Binary value with `\x00` binary subtype. In this case this format will validate that the size of binary data equals the size of expected value. + +Note: this format don't work properly on Big-Endian platforms. + +### BSON format settings {#bson-format-settings} + +- [output_format_bson_string_as_string](../operations/settings/settings.md#output_format_bson_string_as_string) - use BSON String type instead of Binary for String columns. Default value - `false`. +- [input_format_bson_skip_fields_with_unsupported_types_in_schema_inference](../operations/settings/settings.md#input_format_bson_skip_fields_with_unsupported_types_in_schema_inference) - allow skipping columns with unsupported types while schema inference for format BSONEachRow. Default value - `false`. + ## Native {#native} The most efficient format. Data is written and read by blocks in binary format. For each block, the number of rows, number of columns, column names and types, and parts of columns in this block are recorded one after another. In other words, this format is “columnar” – it does not convert columns to rows. This is the format used in the native interface for interaction between servers, for using the command-line client, and for C++ clients. diff --git a/docs/en/operations/settings/settings.md b/docs/en/operations/settings/settings.md index a15a6e9bf4a..7494f3db71a 100644 --- a/docs/en/operations/settings/settings.md +++ b/docs/en/operations/settings/settings.md @@ -4784,7 +4784,7 @@ Possible values: Default value: 1. -## SQLInsert format settings {$sqlinsert-format-settings} +## SQLInsert format settings {#sqlinsert-format-settings} ### output_format_sql_insert_max_batch_size {#output_format_sql_insert_max_batch_size} @@ -4815,3 +4815,17 @@ Default value: `false`. Quote column names with "`" characters Default value: `true`. + +## BSONEachRow format settings {#bson-each-row-format-settings} + +### output_format_bson_string_as_string {#output_format_bson_string_as_string} + +Use BSON String type instead of Binary for String columns. + +Disabled by default. + +### input_format_bson_skip_fields_with_unsupported_types_in_schema_inference {#input_format_bson_skip_fields_with_unsupported_types_in_schema_inference} + +Allow skipping columns with unsupported types while schema inference for format BSONEachRow. + +Disabled by default. diff --git a/docs/en/sql-reference/statements/alter/projection.md b/docs/en/sql-reference/statements/alter/projection.md index 3f6f493aa89..908d28d7ab1 100644 --- a/docs/en/sql-reference/statements/alter/projection.md +++ b/docs/en/sql-reference/statements/alter/projection.md @@ -11,6 +11,14 @@ Projections store data in a format that optimizes query execution, this feature You can define one or more projections for a table, and during the query analysis the projection with the least data to scan will be selected by ClickHouse without modifying the query provided by the user. +:::note Disk usage + +Projections will create internally a new hidden table, this means that more IO and space on disk will be required. +Example, If the projection has defined a different primary key, all the data from the original table will be duplicated. +::: + +You can see more technical details about how projections work internally on this [page](/docs/en/guides/improving-query-performance/sparse-primary-indexes/sparse-primary-indexes-multiple.md/#option-3-projections). + ## Example filtering without using primary keys Creating the table: diff --git a/docs/en/sql-reference/statements/create/view.md b/docs/en/sql-reference/statements/create/view.md index 5833c43f55d..85741117d2a 100644 --- a/docs/en/sql-reference/statements/create/view.md +++ b/docs/en/sql-reference/statements/create/view.md @@ -60,7 +60,7 @@ If you specify `POPULATE`, the existing table data is inserted into the view whe A `SELECT` query can contain `DISTINCT`, `GROUP BY`, `ORDER BY`, `LIMIT`. Note that the corresponding conversions are performed independently on each block of inserted data. For example, if `GROUP BY` is set, data is aggregated during insertion, but only within a single packet of inserted data. The data won’t be further aggregated. The exception is when using an `ENGINE` that independently performs data aggregation, such as `SummingMergeTree`. -The execution of [ALTER](../../../sql-reference/statements/alter/view.md) queries on materialized views has limitations, so they might be inconvenient. If the materialized view uses the construction `TO [db.]name`, you can `DETACH` the view, run `ALTER` for the target table, and then `ATTACH` the previously detached (`DETACH`) view. +The execution of [ALTER](/docs/en/sql-reference/statements/alter/view.md) queries on materialized views has limitations, for example, you can not update the `SELECT` query, so this might be inconvenient. If the materialized view uses the construction `TO [db.]name`, you can `DETACH` the view, run `ALTER` for the target table, and then `ATTACH` the previously detached (`DETACH`) view. Note that materialized view is influenced by [optimize_on_insert](../../../operations/settings/settings.md#optimize-on-insert) setting. The data is merged before the insertion into a view. diff --git a/docs/en/sql-reference/statements/explain.md b/docs/en/sql-reference/statements/explain.md index f4a6ccb0c7d..c5995e067d1 100644 --- a/docs/en/sql-reference/statements/explain.md +++ b/docs/en/sql-reference/statements/explain.md @@ -47,6 +47,7 @@ Union - `AST` — Abstract syntax tree. - `SYNTAX` — Query text after AST-level optimizations. +- `QUERY TREE` — Query tree after Query Tree level optimizations. - `PLAN` — Query execution plan. - `PIPELINE` — Query execution pipeline. @@ -110,6 +111,32 @@ FROM CROSS JOIN system.numbers AS c ``` +### EXPLAIN QUERY TREE + +Settings: + +- `run_passes` — Run all query tree passes before dumping the query tree. Defaul: `1`. +- `dump_passes` — Dump information about used passes before dumping the query tree. Default: `0`. +- `passes` — Specifies how many passes to run. If set to `-1`, runs all the passes. Default: `-1`. + +Example: +```sql +EXPLAIN QUERY TREE SELECT id, value FROM test_table; +``` + +``` +QUERY id: 0 + PROJECTION COLUMNS + id UInt64 + value String + PROJECTION + LIST id: 1, nodes: 2 + COLUMN id: 2, column_name: id, result_type: UInt64, source_id: 3 + COLUMN id: 4, column_name: value, result_type: String, source_id: 3 + JOIN TREE + TABLE id: 3, table_name: default.test_table +``` + ### EXPLAIN PLAN Dump query plan steps. diff --git a/docs/en/sql-reference/statements/select/group-by.md b/docs/en/sql-reference/statements/select/group-by.md index ac02e9ab5a1..2df8581c447 100644 --- a/docs/en/sql-reference/statements/select/group-by.md +++ b/docs/en/sql-reference/statements/select/group-by.md @@ -243,6 +243,54 @@ If `max_rows_to_group_by` and `group_by_overflow_mode = 'any'` are not used, all You can use `WITH TOTALS` in subqueries, including subqueries in the [JOIN](../../../sql-reference/statements/select/join.md) clause (in this case, the respective total values are combined). +## GROUP BY ALL + +`GROUP BY ALL` is equivalent to listing all the SELECT-ed expressions that are not aggregate functions. + +For example: + +``` sql +SELECT + a * 2, + b, + count(c), +FROM t +GROUP BY ALL +``` + +is the same as + +``` sql +SELECT + a * 2, + b, + count(c), +FROM t +GROUP BY a * 2, b +``` + +For a special case that if there is a function having both aggregate functions and other fields as its arguments, the `GROUP BY` keys will contain the maximum non-aggregate fields we can extract from it. + +For example: + +``` sql +SELECT + substring(a, 4, 2), + substring(substring(a, 1, 2), 1, count(b)) +FROM t +GROUP BY ALL +``` + +is the same as + +``` sql +SELECT + substring(a, 4, 2), + substring(substring(a, 1, 2), 1, count(b)) +FROM t +GROUP BY substring(a, 4, 2), substring(a, 1, 2) +``` + ## Examples Example: diff --git a/docs/ru/operations/external-authenticators/kerberos.md b/docs/ru/operations/external-authenticators/kerberos.md index 7b0702b2132..865ea639c89 100644 --- a/docs/ru/operations/external-authenticators/kerberos.md +++ b/docs/ru/operations/external-authenticators/kerberos.md @@ -98,7 +98,7 @@ ClickHouse предоставляет возможность аутентифи :::danger "Важно" - Если пользователь настроен для Kerberos-аутентификации, другие виды уатентификации будут для него недоступны. Если наряду с `kerberos` в определении пользователя будет указан какой-либо другой способ аутентификации, ClickHouse завершит работу. + Если пользователь настроен для Kerberos-аутентификации, другие виды аутентификации будут для него недоступны. Если наряду с `kerberos` в определении пользователя будет указан какой-либо другой способ аутентификации, ClickHouse завершит работу. :::info "" Ещё раз отметим, что кроме `users.xml`, необходимо также включить Kerberos в `config.xml`. diff --git a/docs/zh/engines/table-engines/integrations/kafka.md b/docs/zh/engines/table-engines/integrations/kafka.md index 707ee962ace..c6f11d9efce 100644 --- a/docs/zh/engines/table-engines/integrations/kafka.md +++ b/docs/zh/engines/table-engines/integrations/kafka.md @@ -74,7 +74,7 @@ Kafka 特性: 消费的消息会被自动追踪,因此每个消息在不同的消费组里只会记录一次。如果希望获得两次数据,则使用另一个组名创建副本。 -消费组可以灵活配置并且在集群之间同步。例如,如果群集中有10个主题和5个表副本,则每个副本将获得2个主题。 如果副本数量发生变化,主题将自动在副本中重新分配。了解更多信息请访问 http://kafka.apache.org/intro。 +消费组可以灵活配置并且在集群之间同步。例如,如果群集中有10个主题和5个表副本,则每个副本将获得2个主题。 如果副本数量发生变化,主题将自动在副本中重新分配。了解更多信息请访问 [http://kafka.apache.org/intro](http://kafka.apache.org/intro)。 `SELECT` 查询对于读取消息并不是很有用(调试除外),因为每条消息只能被读取一次。使用物化视图创建实时线程更实用。您可以这样做: diff --git a/docs/zh/guides/improving-query-performance/sparse-primary-indexes.md b/docs/zh/guides/improving-query-performance/sparse-primary-indexes.md index 13b4c368a96..7e847c02dcc 100644 --- a/docs/zh/guides/improving-query-performance/sparse-primary-indexes.md +++ b/docs/zh/guides/improving-query-performance/sparse-primary-indexes.md @@ -164,7 +164,7 @@ SETTINGS index_granularity = 8192, index_granularity_bytes = 0;
  • index_granularity: 显式设置为其默认值8192。这意味着对于每一组8192行,主索引将有一个索引条目,例如,如果表包含16384行,那么索引将有两个索引条目。

  • -
  • index_granularity_bytes: 设置为0表示禁止字适应索引粒度。自适应索引粒度意味着ClickHouse自动为一组n行创建一个索引条目 +
  • index_granularity_bytes: 设置为0表示禁止自适应索引粒度。自适应索引粒度意味着ClickHouse自动为一组n行创建一个索引条目
    • 如果n小于8192,但n行的合并行数据大小大于或等于10MB (index_granularity_bytes的默认值)或
    • n达到8192
    • diff --git a/docs/zh/sql-reference/statements/select/group-by.md b/docs/zh/sql-reference/statements/select/group-by.md index 90b3c7660ee..31c1649bc30 100644 --- a/docs/zh/sql-reference/statements/select/group-by.md +++ b/docs/zh/sql-reference/statements/select/group-by.md @@ -77,6 +77,54 @@ sidebar_label: GROUP BY 您可以使用 `WITH TOTALS` 在子查询中,包括在子查询 [JOIN](../../../sql-reference/statements/select/join.md) 子句(在这种情况下,将各自的总值合并)。 +## GROUP BY ALL {#group-by-all} + +`GROUP BY ALL` 相当于对所有被查询的并且不被聚合函数使用的字段进行`GROUP BY`。 + +例如 + +``` sql +SELECT + a * 2, + b, + count(c), +FROM t +GROUP BY ALL +``` + +效果等同于 + +``` sql +SELECT + a * 2, + b, + count(c), +FROM t +GROUP BY a * 2, b +``` + +对于一种特殊情况,如果一个 function 的参数中同时有聚合函数和其他字段,会对参数中能提取的最大非聚合字段进行`GROUP BY`。 + +例如: + +``` sql +SELECT + substring(a, 4, 2), + substring(substring(a, 1, 2), 1, count(b)) +FROM t +GROUP BY ALL +``` + +效果等同于 + +``` sql +SELECT + substring(a, 4, 2), + substring(substring(a, 1, 2), 1, count(b)) +FROM t +GROUP BY substring(a, 4, 2), substring(a, 1, 2) +``` + ## 例子 {#examples} 示例: diff --git a/packages/clickhouse-server.service b/packages/clickhouse-server.service index 028b4fbf8ab..1581b95213e 100644 --- a/packages/clickhouse-server.service +++ b/packages/clickhouse-server.service @@ -9,7 +9,10 @@ After=time-sync.target network-online.target Wants=time-sync.target [Service] -Type=simple +Type=notify + +# Switching off watchdog is very important for sd_notify to work correctly. +Environment=CLICKHOUSE_WATCHDOG_ENABLE=0 User=clickhouse Group=clickhouse Restart=always diff --git a/programs/diagnostics/go.mod b/programs/diagnostics/go.mod index fb1568ea491..d063fc9c3a9 100644 --- a/programs/diagnostics/go.mod +++ b/programs/diagnostics/go.mod @@ -80,8 +80,8 @@ require ( go.opentelemetry.io/otel v1.4.1 // indirect go.opentelemetry.io/otel/trace v1.4.1 // indirect golang.org/x/net v0.0.0-20211108170745-6635138e15ea // indirect - golang.org/x/sys v0.0.0-20220114195835-da31bd327af9 // indirect - golang.org/x/text v0.3.7 // indirect + golang.org/x/sys v0.0.0-20220722155257-8c9f86f7a55f // indirect + golang.org/x/text v0.3.8 // indirect google.golang.org/genproto v0.0.0-20211208223120-3a66f561d7aa // indirect google.golang.org/grpc v1.43.0 // indirect google.golang.org/protobuf v1.27.1 // indirect diff --git a/programs/diagnostics/go.sum b/programs/diagnostics/go.sum index aa69472e9c2..e8b6558bc99 100644 --- a/programs/diagnostics/go.sum +++ b/programs/diagnostics/go.sum @@ -1078,8 +1078,8 @@ golang.org/x/sys v0.0.0-20211109184856-51b60fd695b3/go.mod h1:oPkhp1MJrh7nUepCBc golang.org/x/sys v0.0.0-20211110154304-99a53858aa08/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.0.0-20211124211545-fe61309f8881/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.0.0-20211205182925-97ca703d548d/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= -golang.org/x/sys v0.0.0-20220114195835-da31bd327af9 h1:XfKQ4OlFl8okEOr5UvAqFRVj8pY/4yfcXrddB8qAbU0= -golang.org/x/sys v0.0.0-20220114195835-da31bd327af9/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.0.0-20220722155257-8c9f86f7a55f h1:v4INt8xihDGvnrfjMDVXGxw9wrfxYyCjk0KbXjhR55s= +golang.org/x/sys v0.0.0-20220722155257-8c9f86f7a55f/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo= golang.org/x/text v0.0.0-20170915032832-14c0d48ead0c/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= @@ -1089,8 +1089,9 @@ golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= golang.org/x/text v0.3.4/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= golang.org/x/text v0.3.5/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= golang.org/x/text v0.3.6/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= -golang.org/x/text v0.3.7 h1:olpwvP2KacW1ZWvsR7uQhoyTYvKAupfQrRGBFM352Gk= golang.org/x/text v0.3.7/go.mod h1:u+2+/6zg+i71rQMx5EYifcz6MCKuco9NR6JIITiCfzQ= +golang.org/x/text v0.3.8 h1:nAL+RVCQ9uMn3vJZbV+MRnydTJFPf8qqY42YiA6MrqY= +golang.org/x/text v0.3.8/go.mod h1:E6s5w1FMmriuDzIBO73fBruAKo1PCIq6d2Q6DHfQ8WQ= golang.org/x/time v0.0.0-20180412165947-fbb02b2291d2/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ= golang.org/x/time v0.0.0-20181108054448-85acf8d2951c/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ= golang.org/x/time v0.0.0-20190308202827-9d24e82272b4/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ= diff --git a/programs/keeper/Keeper.cpp b/programs/keeper/Keeper.cpp index a1bf324f482..8f65141b533 100644 --- a/programs/keeper/Keeper.cpp +++ b/programs/keeper/Keeper.cpp @@ -262,6 +262,7 @@ void Keeper::defineOptions(Poco::Util::OptionSet & options) } int Keeper::main(const std::vector & /*args*/) +try { Poco::Logger * log = &logger(); @@ -473,6 +474,12 @@ int Keeper::main(const std::vector & /*args*/) return Application::EXIT_OK; } +catch (...) +{ + /// Poco does not provide stacktrace. + tryLogCurrentException("Application"); + throw; +} void Keeper::logRevision() const diff --git a/programs/server/Server.cpp b/programs/server/Server.cpp index d2ad9b04993..4702985c0ae 100644 --- a/programs/server/Server.cpp +++ b/programs/server/Server.cpp @@ -99,6 +99,10 @@ #include "config_version.h" #if defined(OS_LINUX) +# include +# include +# include +# include # include # include # include @@ -273,6 +277,7 @@ namespace ErrorCodes extern const int MISMATCHING_USERS_FOR_PROCESS_AND_DATA; extern const int NETWORK_ERROR; extern const int CORRUPTED_DATA; + extern const int SYSTEM_ERROR; } @@ -646,7 +651,53 @@ static void sanityChecks(Server & server) } } +#if defined(OS_LINUX) +/// Sends notification to systemd, analogous to sd_notify from libsystemd +static void systemdNotify(const std::string_view & command) +{ + const char * path = getenv("NOTIFY_SOCKET"); // NOLINT(concurrency-mt-unsafe) + + if (path == nullptr) + return; /// not using systemd + + int s = socket(AF_UNIX, SOCK_DGRAM | SOCK_CLOEXEC, 0); + + if (s == -1) + throwFromErrno("Can't create UNIX socket for systemd notify.", ErrorCodes::SYSTEM_ERROR); + + SCOPE_EXIT({ close(s); }); + + const size_t len = strlen(path); + + struct sockaddr_un addr; + + addr.sun_family = AF_UNIX; + + if (len < 2 || len > sizeof(addr.sun_path) - 1) + throw Exception(ErrorCodes::SYSTEM_ERROR, "NOTIFY_SOCKET env var value \"{}\" is wrong.", path); + + memcpy(addr.sun_path, path, len + 1); /// write last zero as well. + + size_t addrlen = offsetof(struct sockaddr_un, sun_path) + len; + + /// '@' meass this is Linux abstract socket, per documentation it must be sun_path[0] must be set to '\0' for it. + if (path[0] == '@') + addr.sun_path[0] = 0; + else if (path[0] == '/') + addrlen += 1; /// non-abstract-addresses should be zero terminated. + else + throw Exception(ErrorCodes::SYSTEM_ERROR, "Wrong UNIX path \"{}\" in NOTIFY_SOCKET env var", path); + + const struct sockaddr *sock_addr = reinterpret_cast (&addr); + + if (sendto(s, command.data(), command.size(), 0, sock_addr, static_cast (addrlen)) != static_cast (command.size())) + throw Exception("Failed to notify systemd.", ErrorCodes::SYSTEM_ERROR); + +} +#endif + int Server::main(const std::vector & /*args*/) +try { Poco::Logger * log = &logger(); @@ -1148,6 +1199,9 @@ int Server::main(const std::vector & /*args*/) total_memory_tracker.setDescription("(total)"); total_memory_tracker.setMetric(CurrentMetrics::MemoryTracking); + bool allow_use_jemalloc_memory = config->getBool("allow_use_jemalloc_memory", true); + total_memory_tracker.setAllowUseJemallocMemory(allow_use_jemalloc_memory); + auto * global_overcommit_tracker = global_context->getGlobalOvercommitTracker(); total_memory_tracker.setOvercommitTracker(global_overcommit_tracker); @@ -1776,6 +1830,10 @@ int Server::main(const std::vector & /*args*/) tryLogCurrentException(log, "Caught exception while starting cluster discovery"); } +#if defined(OS_LINUX) + systemdNotify("READY=1\n"); +#endif + SCOPE_EXIT_SAFE({ LOG_DEBUG(log, "Received termination signal."); @@ -1845,6 +1903,12 @@ int Server::main(const std::vector & /*args*/) return Application::EXIT_OK; } +catch (...) +{ + /// Poco does not provide stacktrace. + tryLogCurrentException("Application"); + throw; +} std::unique_ptr Server::buildProtocolStackFromConfig( const Poco::Util::AbstractConfiguration & config, diff --git a/src/Access/Common/AccessType.h b/src/Access/Common/AccessType.h index 8263f50d1b0..ed87b13f01a 100644 --- a/src/Access/Common/AccessType.h +++ b/src/Access/Common/AccessType.h @@ -130,8 +130,8 @@ enum class AccessType M(SHOW_ROW_POLICIES, "SHOW POLICIES, SHOW CREATE ROW POLICY, SHOW CREATE POLICY", TABLE, SHOW_ACCESS) \ M(SHOW_QUOTAS, "SHOW CREATE QUOTA", GLOBAL, SHOW_ACCESS) \ M(SHOW_SETTINGS_PROFILES, "SHOW PROFILES, SHOW CREATE SETTINGS PROFILE, SHOW CREATE PROFILE", GLOBAL, SHOW_ACCESS) \ - M(SHOW_NAMED_COLLECTIONS, "SHOW NAMED COLLECTIONS", GLOBAL, SHOW_ACCESS) \ M(SHOW_ACCESS, "", GROUP, ACCESS_MANAGEMENT) \ + M(SHOW_NAMED_COLLECTIONS, "SHOW NAMED COLLECTIONS", GROUP, ACCESS_MANAGEMENT) \ M(ACCESS_MANAGEMENT, "", GROUP, ALL) \ \ M(SYSTEM_SHUTDOWN, "SYSTEM KILL, SHUTDOWN", GLOBAL, SYSTEM) \ diff --git a/src/Access/ExternalAuthenticators.cpp b/src/Access/ExternalAuthenticators.cpp index 8da16b86f4e..e1c598f26f5 100644 --- a/src/Access/ExternalAuthenticators.cpp +++ b/src/Access/ExternalAuthenticators.cpp @@ -2,6 +2,8 @@ #include #include #include +#include + #include #include @@ -73,6 +75,7 @@ void parseLDAPServer(LDAPClient::Params & params, const Poco::Util::AbstractConf const bool has_tls_ca_cert_file = config.has(ldap_server_config + ".tls_ca_cert_file"); const bool has_tls_ca_cert_dir = config.has(ldap_server_config + ".tls_ca_cert_dir"); const bool has_tls_cipher_suite = config.has(ldap_server_config + ".tls_cipher_suite"); + const bool has_search_limit = config.has(ldap_server_config + ".search_limit"); if (!has_host) throw Exception("Missing 'host' entry", ErrorCodes::BAD_ARGUMENTS); @@ -91,8 +94,8 @@ void parseLDAPServer(LDAPClient::Params & params, const Poco::Util::AbstractConf } else if (has_auth_dn_prefix || has_auth_dn_suffix) { - const auto auth_dn_prefix = config.getString(ldap_server_config + ".auth_dn_prefix"); - const auto auth_dn_suffix = config.getString(ldap_server_config + ".auth_dn_suffix"); + std::string auth_dn_prefix = config.getString(ldap_server_config + ".auth_dn_prefix"); + std::string auth_dn_suffix = config.getString(ldap_server_config + ".auth_dn_suffix"); params.bind_dn = auth_dn_prefix + "{user_name}" + auth_dn_suffix; } @@ -176,14 +179,17 @@ void parseLDAPServer(LDAPClient::Params & params, const Poco::Util::AbstractConf if (has_port) { - const auto port = config.getInt64(ldap_server_config + ".port"); - if (port < 0 || port > 65535) + UInt32 port = config.getUInt(ldap_server_config + ".port"); + if (port > 65535) throw Exception("Bad value for 'port' entry", ErrorCodes::BAD_ARGUMENTS); params.port = port; } else params.port = (params.enable_tls == LDAPClient::Params::TLSEnable::YES ? 636 : 389); + + if (has_search_limit) + params.search_limit = static_cast(config.getUInt64(ldap_server_config + ".search_limit")); } void parseKerberosParams(GSSAcceptorContext::Params & params, const Poco::Util::AbstractConfiguration & config) @@ -313,11 +319,26 @@ void ExternalAuthenticators::setConfiguration(const Poco::Util::AbstractConfigur } } +UInt128 computeParamsHash(const LDAPClient::Params & params, const LDAPClient::RoleSearchParamsList * role_search_params) +{ + SipHash hash; + params.updateHash(hash); + if (role_search_params) + { + for (const auto & params_instance : *role_search_params) + { + params_instance.updateHash(hash); + } + } + + return hash.get128(); +} + bool ExternalAuthenticators::checkLDAPCredentials(const String & server, const BasicCredentials & credentials, const LDAPClient::RoleSearchParamsList * role_search_params, LDAPClient::SearchResultsList * role_search_results) const { std::optional params; - std::size_t params_hash = 0; + UInt128 params_hash = 0; { std::scoped_lock lock(mutex); @@ -331,14 +352,7 @@ bool ExternalAuthenticators::checkLDAPCredentials(const String & server, const B params->user = credentials.getUserName(); params->password = credentials.getPassword(); - params->combineCoreHash(params_hash); - if (role_search_params) - { - for (const auto & params_instance : *role_search_params) - { - params_instance.combineHash(params_hash); - } - } + params_hash = computeParamsHash(*params, role_search_params); // Check the cache, but only if the caching is enabled at all. if (params->verification_cooldown > std::chrono::seconds{0}) @@ -408,15 +422,7 @@ bool ExternalAuthenticators::checkLDAPCredentials(const String & server, const B new_params.user = credentials.getUserName(); new_params.password = credentials.getPassword(); - std::size_t new_params_hash = 0; - new_params.combineCoreHash(new_params_hash); - if (role_search_params) - { - for (const auto & params_instance : *role_search_params) - { - params_instance.combineHash(new_params_hash); - } - } + const UInt128 new_params_hash = computeParamsHash(new_params, role_search_params); // If the critical server params have changed while we were checking the password, we discard the current result. if (params_hash != new_params_hash) diff --git a/src/Access/LDAPClient.cpp b/src/Access/LDAPClient.cpp index 2affbc293ec..856ffd18a32 100644 --- a/src/Access/LDAPClient.cpp +++ b/src/Access/LDAPClient.cpp @@ -2,10 +2,10 @@ #include #include #include +#include #include #include -#include #include #include @@ -15,6 +15,22 @@ #include +namespace +{ + +template >>> +void updateHash(SipHash & hash, const T & value) +{ + hash.update(value); +} + +void updateHash(SipHash & hash, const std::string & value) +{ + hash.update(value.size()); + hash.update(value); +} + +} namespace DB { @@ -26,30 +42,30 @@ namespace ErrorCodes extern const int LDAP_ERROR; } -void LDAPClient::SearchParams::combineHash(std::size_t & seed) const +void LDAPClient::SearchParams::updateHash(SipHash & hash) const { - boost::hash_combine(seed, base_dn); - boost::hash_combine(seed, static_cast(scope)); - boost::hash_combine(seed, search_filter); - boost::hash_combine(seed, attribute); + ::updateHash(hash, base_dn); + ::updateHash(hash, static_cast(scope)); + ::updateHash(hash, search_filter); + ::updateHash(hash, attribute); } -void LDAPClient::RoleSearchParams::combineHash(std::size_t & seed) const +void LDAPClient::RoleSearchParams::updateHash(SipHash & hash) const { - SearchParams::combineHash(seed); - boost::hash_combine(seed, prefix); + SearchParams::updateHash(hash); + ::updateHash(hash, prefix); } -void LDAPClient::Params::combineCoreHash(std::size_t & seed) const +void LDAPClient::Params::updateHash(SipHash & hash) const { - boost::hash_combine(seed, host); - boost::hash_combine(seed, port); - boost::hash_combine(seed, bind_dn); - boost::hash_combine(seed, user); - boost::hash_combine(seed, password); + ::updateHash(hash, host); + ::updateHash(hash, port); + ::updateHash(hash, bind_dn); + ::updateHash(hash, user); + ::updateHash(hash, password); if (user_dn_detection) - user_dn_detection->combineHash(seed); + user_dn_detection->updateHash(hash); } LDAPClient::LDAPClient(const Params & params_) @@ -153,13 +169,13 @@ namespace } -void LDAPClient::diag(int rc, String text) +void LDAPClient::handleError(int result_code, String text) { std::scoped_lock lock(ldap_global_mutex); - if (rc != LDAP_SUCCESS) + if (result_code != LDAP_SUCCESS) { - const char * raw_err_str = ldap_err2string(rc); + const char * raw_err_str = ldap_err2string(result_code); if (raw_err_str && *raw_err_str != '\0') { if (!text.empty()) @@ -214,7 +230,7 @@ bool LDAPClient::openConnection() SCOPE_EXIT({ ldap_memfree(uri); }); - diag(ldap_initialize(&handle, uri)); + handleError(ldap_initialize(&handle, uri)); if (!handle) throw Exception("ldap_initialize() failed", ErrorCodes::LDAP_ERROR); } @@ -226,13 +242,13 @@ bool LDAPClient::openConnection() case LDAPClient::Params::ProtocolVersion::V2: value = LDAP_VERSION2; break; case LDAPClient::Params::ProtocolVersion::V3: value = LDAP_VERSION3; break; } - diag(ldap_set_option(handle, LDAP_OPT_PROTOCOL_VERSION, &value)); + handleError(ldap_set_option(handle, LDAP_OPT_PROTOCOL_VERSION, &value)); } - diag(ldap_set_option(handle, LDAP_OPT_RESTART, LDAP_OPT_ON)); + handleError(ldap_set_option(handle, LDAP_OPT_RESTART, LDAP_OPT_ON)); #ifdef LDAP_OPT_KEEPCONN - diag(ldap_set_option(handle, LDAP_OPT_KEEPCONN, LDAP_OPT_ON)); + handleError(ldap_set_option(handle, LDAP_OPT_KEEPCONN, LDAP_OPT_ON)); #endif #ifdef LDAP_OPT_TIMEOUT @@ -240,7 +256,7 @@ bool LDAPClient::openConnection() ::timeval operation_timeout; operation_timeout.tv_sec = params.operation_timeout.count(); operation_timeout.tv_usec = 0; - diag(ldap_set_option(handle, LDAP_OPT_TIMEOUT, &operation_timeout)); + handleError(ldap_set_option(handle, LDAP_OPT_TIMEOUT, &operation_timeout)); } #endif @@ -249,18 +265,18 @@ bool LDAPClient::openConnection() ::timeval network_timeout; network_timeout.tv_sec = params.network_timeout.count(); network_timeout.tv_usec = 0; - diag(ldap_set_option(handle, LDAP_OPT_NETWORK_TIMEOUT, &network_timeout)); + handleError(ldap_set_option(handle, LDAP_OPT_NETWORK_TIMEOUT, &network_timeout)); } #endif { const int search_timeout = static_cast(params.search_timeout.count()); - diag(ldap_set_option(handle, LDAP_OPT_TIMELIMIT, &search_timeout)); + handleError(ldap_set_option(handle, LDAP_OPT_TIMELIMIT, &search_timeout)); } { - const int size_limit = params.search_limit; - diag(ldap_set_option(handle, LDAP_OPT_SIZELIMIT, &size_limit)); + const int size_limit = static_cast(params.search_limit); + handleError(ldap_set_option(handle, LDAP_OPT_SIZELIMIT, &size_limit)); } #ifdef LDAP_OPT_X_TLS_PROTOCOL_MIN @@ -274,7 +290,7 @@ bool LDAPClient::openConnection() case LDAPClient::Params::TLSProtocolVersion::TLS1_1: value = LDAP_OPT_X_TLS_PROTOCOL_TLS1_1; break; case LDAPClient::Params::TLSProtocolVersion::TLS1_2: value = LDAP_OPT_X_TLS_PROTOCOL_TLS1_2; break; } - diag(ldap_set_option(handle, LDAP_OPT_X_TLS_PROTOCOL_MIN, &value)); + handleError(ldap_set_option(handle, LDAP_OPT_X_TLS_PROTOCOL_MIN, &value)); } #endif @@ -288,44 +304,44 @@ bool LDAPClient::openConnection() case LDAPClient::Params::TLSRequireCert::TRY: value = LDAP_OPT_X_TLS_TRY; break; case LDAPClient::Params::TLSRequireCert::DEMAND: value = LDAP_OPT_X_TLS_DEMAND; break; } - diag(ldap_set_option(handle, LDAP_OPT_X_TLS_REQUIRE_CERT, &value)); + handleError(ldap_set_option(handle, LDAP_OPT_X_TLS_REQUIRE_CERT, &value)); } #endif #ifdef LDAP_OPT_X_TLS_CERTFILE if (!params.tls_cert_file.empty()) - diag(ldap_set_option(handle, LDAP_OPT_X_TLS_CERTFILE, params.tls_cert_file.c_str())); + handleError(ldap_set_option(handle, LDAP_OPT_X_TLS_CERTFILE, params.tls_cert_file.c_str())); #endif #ifdef LDAP_OPT_X_TLS_KEYFILE if (!params.tls_key_file.empty()) - diag(ldap_set_option(handle, LDAP_OPT_X_TLS_KEYFILE, params.tls_key_file.c_str())); + handleError(ldap_set_option(handle, LDAP_OPT_X_TLS_KEYFILE, params.tls_key_file.c_str())); #endif #ifdef LDAP_OPT_X_TLS_CACERTFILE if (!params.tls_ca_cert_file.empty()) - diag(ldap_set_option(handle, LDAP_OPT_X_TLS_CACERTFILE, params.tls_ca_cert_file.c_str())); + handleError(ldap_set_option(handle, LDAP_OPT_X_TLS_CACERTFILE, params.tls_ca_cert_file.c_str())); #endif #ifdef LDAP_OPT_X_TLS_CACERTDIR if (!params.tls_ca_cert_dir.empty()) - diag(ldap_set_option(handle, LDAP_OPT_X_TLS_CACERTDIR, params.tls_ca_cert_dir.c_str())); + handleError(ldap_set_option(handle, LDAP_OPT_X_TLS_CACERTDIR, params.tls_ca_cert_dir.c_str())); #endif #ifdef LDAP_OPT_X_TLS_CIPHER_SUITE if (!params.tls_cipher_suite.empty()) - diag(ldap_set_option(handle, LDAP_OPT_X_TLS_CIPHER_SUITE, params.tls_cipher_suite.c_str())); + handleError(ldap_set_option(handle, LDAP_OPT_X_TLS_CIPHER_SUITE, params.tls_cipher_suite.c_str())); #endif #ifdef LDAP_OPT_X_TLS_NEWCTX { const int i_am_a_server = 0; - diag(ldap_set_option(handle, LDAP_OPT_X_TLS_NEWCTX, &i_am_a_server)); + handleError(ldap_set_option(handle, LDAP_OPT_X_TLS_NEWCTX, &i_am_a_server)); } #endif if (params.enable_tls == LDAPClient::Params::TLSEnable::YES_STARTTLS) - diag(ldap_start_tls_s(handle, nullptr, nullptr)); + handleError(ldap_start_tls_s(handle, nullptr, nullptr)); final_user_name = escapeForDN(params.user); final_bind_dn = replacePlaceholders(params.bind_dn, { {"{user_name}", final_user_name} }); @@ -346,7 +362,7 @@ bool LDAPClient::openConnection() if (rc == LDAP_INVALID_CREDENTIALS) return false; - diag(rc); + handleError(rc); } // Once bound, run the user DN search query and update the default value, if asked. @@ -425,7 +441,7 @@ LDAPClient::SearchResults LDAPClient::search(const SearchParams & search_params) } }); - diag(ldap_search_ext_s(handle, final_base_dn.c_str(), scope, final_search_filter.c_str(), attrs, 0, nullptr, nullptr, &timeout, params.search_limit, &msgs)); + handleError(ldap_search_ext_s(handle, final_base_dn.c_str(), scope, final_search_filter.c_str(), attrs, 0, nullptr, nullptr, &timeout, params.search_limit, &msgs)); for ( auto * msg = ldap_first_message(handle, msgs); @@ -452,7 +468,7 @@ LDAPClient::SearchResults LDAPClient::search(const SearchParams & search_params) ::berval bv; - diag(ldap_get_dn_ber(handle, msg, &ber, &bv)); + handleError(ldap_get_dn_ber(handle, msg, &ber, &bv)); if (bv.bv_val && bv.bv_len > 0) result.emplace(bv.bv_val, bv.bv_len); @@ -504,7 +520,7 @@ LDAPClient::SearchResults LDAPClient::search(const SearchParams & search_params) case LDAP_RES_SEARCH_REFERENCE: { char ** referrals = nullptr; - diag(ldap_parse_reference(handle, msg, &referrals, nullptr, 0)); + handleError(ldap_parse_reference(handle, msg, &referrals, nullptr, 0)); if (referrals) { @@ -528,7 +544,7 @@ LDAPClient::SearchResults LDAPClient::search(const SearchParams & search_params) char * matched_msg = nullptr; char * error_msg = nullptr; - diag(ldap_parse_result(handle, msg, &rc, &matched_msg, &error_msg, nullptr, nullptr, 0)); + handleError(ldap_parse_result(handle, msg, &rc, &matched_msg, &error_msg, nullptr, nullptr, 0)); if (rc != LDAP_SUCCESS) { @@ -610,7 +626,7 @@ bool LDAPSimpleAuthClient::authenticate(const RoleSearchParamsList * role_search #else // USE_LDAP -void LDAPClient::diag(const int, String) +void LDAPClient::handleError(const int, String) { throw Exception("ClickHouse was built without LDAP support", ErrorCodes::FEATURE_IS_NOT_ENABLED_AT_BUILD_TIME); } diff --git a/src/Access/LDAPClient.h b/src/Access/LDAPClient.h index f228bac5926..bf193bf6bb2 100644 --- a/src/Access/LDAPClient.h +++ b/src/Access/LDAPClient.h @@ -16,6 +16,7 @@ #include #include +class SipHash; namespace DB { @@ -38,7 +39,7 @@ public: String search_filter; String attribute = "cn"; - void combineHash(std::size_t & seed) const; + void updateHash(SipHash & hash) const; }; struct RoleSearchParams @@ -46,7 +47,7 @@ public: { String prefix; - void combineHash(std::size_t & seed) const; + void updateHash(SipHash & hash) const; }; using RoleSearchParamsList = std::vector; @@ -95,7 +96,7 @@ public: ProtocolVersion protocol_version = ProtocolVersion::V3; String host; - std::uint16_t port = 636; + UInt16 port = 636; TLSEnable enable_tls = TLSEnable::YES; TLSProtocolVersion tls_minimum_protocol_version = TLSProtocolVersion::TLS1_2; @@ -119,9 +120,9 @@ public: std::chrono::seconds operation_timeout{40}; std::chrono::seconds network_timeout{30}; std::chrono::seconds search_timeout{20}; - std::uint32_t search_limit = 100; + UInt32 search_limit = 256; /// An arbitrary number, no particular motivation for this value. - void combineCoreHash(std::size_t & seed) const; + void updateHash(SipHash & hash) const; }; explicit LDAPClient(const Params & params_); @@ -133,7 +134,7 @@ public: LDAPClient & operator= (LDAPClient &&) = delete; protected: - MAYBE_NORETURN void diag(int rc, String text = ""); + MAYBE_NORETURN void handleError(int result_code, String text = ""); MAYBE_NORETURN bool openConnection(); void closeConnection() noexcept; SearchResults search(const SearchParams & search_params); diff --git a/src/Access/UsersConfigAccessStorage.cpp b/src/Access/UsersConfigAccessStorage.cpp index 28ef4ebaa7a..e3690f36cc3 100644 --- a/src/Access/UsersConfigAccessStorage.cpp +++ b/src/Access/UsersConfigAccessStorage.cpp @@ -228,6 +228,12 @@ namespace user->access.revokeGrantOption(AccessType::ALL); } + bool show_named_collections = config.getBool(user_config + ".show_named_collections", false); + if (!show_named_collections) + { + user->access.revoke(AccessType::SHOW_NAMED_COLLECTIONS); + } + String default_database = config.getString(user_config + ".default_database", ""); user->default_database = default_database; diff --git a/src/AggregateFunctions/AggregateFunctionMinMaxAny.h b/src/AggregateFunctions/AggregateFunctionMinMaxAny.h index 6e20e91025f..ef812c0361e 100644 --- a/src/AggregateFunctions/AggregateFunctionMinMaxAny.h +++ b/src/AggregateFunctions/AggregateFunctionMinMaxAny.h @@ -30,6 +30,7 @@ namespace ErrorCodes extern const int ILLEGAL_TYPE_OF_ARGUMENT; extern const int NOT_IMPLEMENTED; extern const int TOO_LARGE_STRING_SIZE; + extern const int LOGICAL_ERROR; } /** Aggregate functions that store one of passed values. @@ -485,13 +486,15 @@ struct SingleValueDataString //-V730 private: using Self = SingleValueDataString; - Int32 size = -1; /// -1 indicates that there is no value. - Int32 capacity = 0; /// power of two or zero + /// 0 size indicates that there is no value. Empty string must has terminating '\0' and, therefore, size of empty string is 1 + UInt32 size = 0; + UInt32 capacity = 0; /// power of two or zero char * large_data; public: - static constexpr Int32 AUTOMATIC_STORAGE_SIZE = 64; - static constexpr Int32 MAX_SMALL_STRING_SIZE = AUTOMATIC_STORAGE_SIZE - sizeof(size) - sizeof(capacity) - sizeof(large_data); + static constexpr UInt32 AUTOMATIC_STORAGE_SIZE = 64; + static constexpr UInt32 MAX_SMALL_STRING_SIZE = AUTOMATIC_STORAGE_SIZE - sizeof(size) - sizeof(capacity) - sizeof(large_data); + static constexpr UInt32 MAX_STRING_SIZE = std::numeric_limits::max(); private: char small_data[MAX_SMALL_STRING_SIZE]; /// Including the terminating zero. @@ -502,7 +505,7 @@ public: bool has() const { - return size >= 0; + return size; } private: @@ -536,20 +539,27 @@ public: void write(WriteBuffer & buf, const ISerialization & /*serialization*/) const { - writeBinary(size, buf); + if (unlikely(MAX_STRING_SIZE < size)) + throw Exception(ErrorCodes::LOGICAL_ERROR, "String size is too big ({}), it's a bug", size); + + /// For serialization we use signed Int32 (for historical reasons), -1 means "no value" + Int32 size_to_write = size ? size : -1; + writeBinary(size_to_write, buf); if (has()) buf.write(getData(), size); } - void allocateLargeDataIfNeeded(Int64 size_to_reserve, Arena * arena) + void allocateLargeDataIfNeeded(UInt32 size_to_reserve, Arena * arena) { if (capacity < size_to_reserve) { - capacity = static_cast(roundUpToPowerOfTwoOrZero(size_to_reserve)); - /// It might happen if the size was too big and the rounded value does not fit a size_t - if (unlikely(capacity < size_to_reserve)) + if (unlikely(MAX_STRING_SIZE < size_to_reserve)) throw Exception(ErrorCodes::TOO_LARGE_STRING_SIZE, "String size is too big ({})", size_to_reserve); + size_t rounded_capacity = roundUpToPowerOfTwoOrZero(size_to_reserve); + chassert(rounded_capacity <= MAX_STRING_SIZE + 1); /// rounded_capacity <= 2^31 + capacity = static_cast(rounded_capacity); + /// Don't free large_data here. large_data = arena->alloc(capacity); } @@ -557,31 +567,28 @@ public: void read(ReadBuffer & buf, const ISerialization & /*serialization*/, Arena * arena) { - Int32 rhs_size; - readBinary(rhs_size, buf); + /// For serialization we use signed Int32 (for historical reasons), -1 means "no value" + Int32 rhs_size_signed; + readBinary(rhs_size_signed, buf); - if (rhs_size < 0) + if (rhs_size_signed < 0) { /// Don't free large_data here. - size = rhs_size; + size = 0; return; } + UInt32 rhs_size = rhs_size_signed; if (rhs_size <= MAX_SMALL_STRING_SIZE) { /// Don't free large_data here. - size = rhs_size; - - if (size > 0) - buf.readStrict(small_data, size); + buf.readStrict(small_data, size); } else { /// Reserve one byte more for null-character - Int64 rhs_size_to_reserve = rhs_size; - rhs_size_to_reserve += 1; /// Avoid overflow - allocateLargeDataIfNeeded(rhs_size_to_reserve, arena); + allocateLargeDataIfNeeded(rhs_size + 1, arena); size = rhs_size; buf.readStrict(large_data, size); } @@ -616,7 +623,10 @@ public: /// Assuming to.has() void changeImpl(StringRef value, Arena * arena) { - Int32 value_size = static_cast(value.size); + if (unlikely(MAX_STRING_SIZE < value.size)) + throw Exception(ErrorCodes::TOO_LARGE_STRING_SIZE, "String size is too big ({})", value.size); + + UInt32 value_size = static_cast(value.size); if (value_size <= MAX_SMALL_STRING_SIZE) { diff --git a/src/AggregateFunctions/AggregateFunctionSumMap.h b/src/AggregateFunctions/AggregateFunctionSumMap.h index cee59fcc907..e9db1a71511 100644 --- a/src/AggregateFunctions/AggregateFunctionSumMap.h +++ b/src/AggregateFunctions/AggregateFunctionSumMap.h @@ -155,7 +155,7 @@ public: "Values for {} are expected to be Numeric, Float or Decimal, passed type {}", getName(), value_type->getName()}; - WhichDataType value_type_to_check(value_type); + WhichDataType value_type_to_check(value_type_without_nullable); /// Do not promote decimal because of implementation issues of this function design /// Currently we cannot get result column type in case of decimal we cannot get decimal scale diff --git a/src/Analyzer/HashUtils.h b/src/Analyzer/HashUtils.h new file mode 100644 index 00000000000..46222755938 --- /dev/null +++ b/src/Analyzer/HashUtils.h @@ -0,0 +1,60 @@ +#pragma once + +#include + +namespace DB +{ + +/** This structure holds query tree node ptr and its hash. It can be used as hash map key to avoid unnecessary hash + * recalculations. + * + * Example of usage: + * std::unordered_map map; + */ +template +struct QueryTreeNodeWithHash +{ + QueryTreeNodeWithHash(QueryTreeNodePtrType node_) /// NOLINT + : node(std::move(node_)) + , hash(node->getTreeHash().first) + {} + + QueryTreeNodePtrType node = nullptr; + size_t hash = 0; +}; + +template +inline bool operator==(const QueryTreeNodeWithHash & lhs, const QueryTreeNodeWithHash & rhs) +{ + return lhs.hash == rhs.hash && lhs.node->isEqual(*rhs.node); +} + +template +inline bool operator!=(const QueryTreeNodeWithHash & lhs, const QueryTreeNodeWithHash & rhs) +{ + return !(lhs == rhs); +} + +using QueryTreeNodePtrWithHash = QueryTreeNodeWithHash; +using QueryTreeNodeRawPtrWithHash = QueryTreeNodeWithHash; +using QueryTreeNodeConstRawPtrWithHash = QueryTreeNodeWithHash; + +using QueryTreeNodePtrWithHashSet = std::unordered_set; +using QueryTreeNodeConstRawPtrWithHashSet = std::unordered_set; + +template +using QueryTreeNodePtrWithHashMap = std::unordered_map; + +template +using QueryTreeNodeConstRawPtrWithHashMap = std::unordered_map; + +} + +template +struct std::hash> +{ + size_t operator()(const DB::QueryTreeNodeWithHash & node_with_hash) const + { + return node_with_hash.hash; + } +}; diff --git a/src/Analyzer/Passes/FuseFunctionsPass.cpp b/src/Analyzer/Passes/FuseFunctionsPass.cpp index 6c6613e7598..a75afc7cfa6 100644 --- a/src/Analyzer/Passes/FuseFunctionsPass.cpp +++ b/src/Analyzer/Passes/FuseFunctionsPass.cpp @@ -8,6 +8,7 @@ #include #include #include +#include #include #include @@ -48,43 +49,24 @@ public: /// Do not apply for `count()` with without arguments or `count(*)`, only `count(x)` is supported. return; - mapping[QueryTreeNodeWithHash(argument_nodes[0])].push_back(&node); + argument_to_functions_mapping[argument_nodes[0]].push_back(&node); } - struct QueryTreeNodeWithHash - { - const QueryTreeNodePtr & node; - IQueryTreeNode::Hash hash; - - explicit QueryTreeNodeWithHash(const QueryTreeNodePtr & node_) - : node(node_) - , hash(node->getTreeHash()) - {} - - bool operator==(const QueryTreeNodeWithHash & rhs) const - { - return hash == rhs.hash && node->isEqual(*rhs.node); - } - - struct Hash - { - size_t operator() (const QueryTreeNodeWithHash & key) const { return key.hash.first ^ key.hash.second; } - }; - }; - /// argument -> list of sum/count/avg functions with this argument - std::unordered_map, QueryTreeNodeWithHash::Hash> mapping; + QueryTreeNodePtrWithHashMap> argument_to_functions_mapping; private: std::unordered_set names_to_collect; }; -QueryTreeNodePtr createResolvedFunction(ContextPtr context, const String & name, DataTypePtr result_type, QueryTreeNodes arguments) +QueryTreeNodePtr createResolvedFunction(const ContextPtr & context, const String & name, const DataTypePtr & result_type, QueryTreeNodes arguments) { auto function_node = std::make_shared(name); + auto function = FunctionFactory::instance().get(name, context); function_node->resolveAsFunction(std::move(function), result_type); function_node->getArguments().getNodes() = std::move(arguments); + return function_node; } @@ -94,21 +76,20 @@ FunctionNodePtr createResolvedAggregateFunction(const String & name, const Query AggregateFunctionProperties properties; auto aggregate_function = AggregateFunctionFactory::instance().get(name, {argument->getResultType()}, parameters, properties); - function_node->resolveAsAggregateFunction(aggregate_function, aggregate_function->getReturnType()); + function_node->getArguments().getNodes() = { argument }; - function_node->getArgumentsNode() = std::make_shared(QueryTreeNodes{argument}); return function_node; } -QueryTreeNodePtr createTupleElementFunction(ContextPtr context, DataTypePtr result_type, QueryTreeNodePtr argument, UInt64 index) +QueryTreeNodePtr createTupleElementFunction(const ContextPtr & context, const DataTypePtr & result_type, QueryTreeNodePtr argument, UInt64 index) { - return createResolvedFunction(context, "tupleElement", result_type, {argument, std::make_shared(index)}); + return createResolvedFunction(context, "tupleElement", result_type, {std::move(argument), std::make_shared(index)}); } -QueryTreeNodePtr createArrayElementFunction(ContextPtr context, DataTypePtr result_type, QueryTreeNodePtr argument, UInt64 index) +QueryTreeNodePtr createArrayElementFunction(const ContextPtr & context, const DataTypePtr & result_type, QueryTreeNodePtr argument, UInt64 index) { - return createResolvedFunction(context, "arrayElement", result_type, {argument, std::make_shared(index)}); + return createResolvedFunction(context, "arrayElement", result_type, {std::move(argument), std::make_shared(index)}); } void replaceWithSumCount(QueryTreeNodePtr & node, const FunctionNodePtr & sum_count_node, ContextPtr context) @@ -151,6 +132,7 @@ FunctionNodePtr createFusedQuantilesNode(const std::vector n { Array parameters; parameters.reserve(nodes.size()); + for (const auto * node : nodes) { const FunctionNode & function_node = (*node)->as(); @@ -172,6 +154,7 @@ FunctionNodePtr createFusedQuantilesNode(const std::vector n parameters.push_back(constant_value->getValue()); } + return createResolvedAggregateFunction("quantiles", argument, parameters); } @@ -181,7 +164,7 @@ void tryFuseSumCountAvg(QueryTreeNodePtr query_tree_node, ContextPtr context) FuseFunctionsVisitor visitor({"sum", "count", "avg"}); visitor.visit(query_tree_node); - for (auto & [argument, nodes] : visitor.mapping) + for (auto & [argument, nodes] : visitor.argument_to_functions_mapping) { if (nodes.size() < 2) continue; @@ -199,24 +182,22 @@ void tryFuseQuantiles(QueryTreeNodePtr query_tree_node, ContextPtr context) { FuseFunctionsVisitor visitor_quantile({"quantile"}); visitor_quantile.visit(query_tree_node); - for (auto & [argument, nodes] : visitor_quantile.mapping) + + for (auto & [argument, nodes] : visitor_quantile.argument_to_functions_mapping) { - if (nodes.size() < 2) + size_t nodes_size = nodes.size(); + if (nodes_size < 2) continue; auto quantiles_node = createFusedQuantilesNode(nodes, argument.node); auto result_array_type = std::dynamic_pointer_cast(quantiles_node->getResultType()); if (!result_array_type) - { throw Exception(ErrorCodes::LOGICAL_ERROR, "Unexpected return type '{}' of function '{}', should be array", quantiles_node->getResultType(), quantiles_node->getFunctionName()); - } - for (size_t i = 0; i < nodes.size(); ++i) - { + for (size_t i = 0; i < nodes_size; ++i) *nodes[i] = createArrayElementFunction(context, result_array_type->getNestedType(), quantiles_node, i + 1); - } } } diff --git a/src/Analyzer/Passes/OrderByLimitByDuplicateEliminationPass.cpp b/src/Analyzer/Passes/OrderByLimitByDuplicateEliminationPass.cpp index e4d6633b6e6..3632c41028b 100644 --- a/src/Analyzer/Passes/OrderByLimitByDuplicateEliminationPass.cpp +++ b/src/Analyzer/Passes/OrderByLimitByDuplicateEliminationPass.cpp @@ -3,6 +3,7 @@ #include #include #include +#include namespace DB { @@ -10,35 +11,6 @@ namespace DB namespace { -struct QueryTreeNodeWithHash -{ - explicit QueryTreeNodeWithHash(const IQueryTreeNode * node_) - : node(node_) - , hash(node->getTreeHash().first) - {} - - const IQueryTreeNode * node = nullptr; - size_t hash = 0; -}; - -struct QueryTreeNodeWithHashHash -{ - size_t operator()(const QueryTreeNodeWithHash & node_with_hash) const - { - return node_with_hash.hash; - } -}; - -struct QueryTreeNodeWithHashEqualTo -{ - bool operator()(const QueryTreeNodeWithHash & lhs_node, const QueryTreeNodeWithHash & rhs_node) const - { - return lhs_node.hash == rhs_node.hash && lhs_node.node->isEqual(*rhs_node.node); - } -}; - -using QueryTreeNodeWithHashSet = std::unordered_set; - class OrderByLimitByDuplicateEliminationVisitor : public InDepthQueryTreeVisitor { public: @@ -93,7 +65,7 @@ public: } private: - QueryTreeNodeWithHashSet unique_expressions_nodes_set; + QueryTreeNodeConstRawPtrWithHashSet unique_expressions_nodes_set; }; } diff --git a/src/Analyzer/Passes/QueryAnalysisPass.cpp b/src/Analyzer/Passes/QueryAnalysisPass.cpp index 138ff721f99..391a9582324 100644 --- a/src/Analyzer/Passes/QueryAnalysisPass.cpp +++ b/src/Analyzer/Passes/QueryAnalysisPass.cpp @@ -67,6 +67,8 @@ #include #include +#include + namespace DB { @@ -517,7 +519,7 @@ public: private: QueryTreeNodes expressions; - std::unordered_map> alias_name_to_expressions; + std::unordered_map alias_name_to_expressions; }; /** Projection names is name of query tree node that is used in projection part of query node. @@ -1100,6 +1102,10 @@ private: static void validateJoinTableExpressionWithoutAlias(const QueryTreeNodePtr & join_node, const QueryTreeNodePtr & table_expression_node, IdentifierResolveScope & scope); + static void expandGroupByAll(QueryNode & query_tree_node_typed); + + static std::pair recursivelyCollectMaxOrdinaryExpressions(QueryTreeNodePtr & node, QueryTreeNodes & into); + /// Resolve identifier functions static QueryTreeNodePtr tryResolveTableIdentifierFromDatabaseCatalog(const Identifier & table_identifier, ContextPtr context); @@ -1929,6 +1935,68 @@ void QueryAnalyzer::validateJoinTableExpressionWithoutAlias(const QueryTreeNodeP scope.scope_node->formatASTForErrorMessage()); } +std::pair QueryAnalyzer::recursivelyCollectMaxOrdinaryExpressions(QueryTreeNodePtr & node, QueryTreeNodes & into) +{ + checkStackSize(); + + if (node->as()) + { + into.push_back(node); + return {false, 1}; + } + + auto * function = node->as(); + + if (!function) + return {false, 0}; + + if (function->isAggregateFunction()) + return {true, 0}; + + UInt64 pushed_children = 0; + bool has_aggregate = false; + + for (auto & child : function->getArguments().getNodes()) + { + auto [child_has_aggregate, child_pushed_children] = recursivelyCollectMaxOrdinaryExpressions(child, into); + has_aggregate |= child_has_aggregate; + pushed_children += child_pushed_children; + } + + /// The current function is not aggregate function and there is no aggregate function in its arguments, + /// so use the current function to replace its arguments + if (!has_aggregate) + { + for (UInt64 i = 0; i < pushed_children; i++) + into.pop_back(); + + into.push_back(node); + pushed_children = 1; + } + + return {has_aggregate, pushed_children}; +} + +/** Expand GROUP BY ALL by extracting all the SELECT-ed expressions that are not aggregate functions. + * + * For a special case that if there is a function having both aggregate functions and other fields as its arguments, + * the `GROUP BY` keys will contain the maximum non-aggregate fields we can extract from it. + * + * Example: + * SELECT substring(a, 4, 2), substring(substring(a, 1, 2), 1, count(b)) FROM t GROUP BY ALL + * will expand as + * SELECT substring(a, 4, 2), substring(substring(a, 1, 2), 1, count(b)) FROM t GROUP BY substring(a, 4, 2), substring(a, 1, 2) + */ +void QueryAnalyzer::expandGroupByAll(QueryNode & query_tree_node_typed) +{ + auto & group_by_nodes = query_tree_node_typed.getGroupBy().getNodes(); + auto & projection_list = query_tree_node_typed.getProjection(); + + for (auto & node : projection_list.getNodes()) + recursivelyCollectMaxOrdinaryExpressions(node, group_by_nodes); + +} + /// Resolve identifier functions implementation @@ -2171,18 +2239,19 @@ QueryTreeNodePtr QueryAnalyzer::tryResolveIdentifierFromAliases(const Identifier auto & alias_identifier_node = it->second->as(); auto identifier = alias_identifier_node.getIdentifier(); auto lookup_result = tryResolveIdentifier(IdentifierLookup{identifier, identifier_lookup.lookup_context}, scope, identifier_resolve_settings); - if (!lookup_result.isResolved()) + if (!lookup_result.resolved_identifier) { std::unordered_set valid_identifiers; collectScopeWithParentScopesValidIdentifiersForTypoCorrection(identifier, scope, true, false, false, valid_identifiers); - auto hints = collectIdentifierTypoHints(identifier, valid_identifiers); - throw Exception(ErrorCodes::UNKNOWN_IDENTIFIER, "Unknown {} identifier '{}' in scope {}{}", - toStringLowercase(IdentifierLookupContext::EXPRESSION), + + throw Exception(ErrorCodes::UNKNOWN_IDENTIFIER, "Unknown {} identifier '{}'. In scope {}{}", + toStringLowercase(identifier_lookup.lookup_context), identifier.getFullName(), scope.scope_node->formatASTForErrorMessage(), getHintsErrorMessageSuffix(hints)); } + it->second = lookup_result.resolved_identifier; /** During collection of aliases if node is identifier and has alias, we cannot say if it is @@ -2193,9 +2262,9 @@ QueryTreeNodePtr QueryAnalyzer::tryResolveIdentifierFromAliases(const Identifier * If we resolved identifier node as function, we must remove identifier node alias from * expression alias map. */ - if (identifier_lookup.isExpressionLookup() && it->second) + if (identifier_lookup.isExpressionLookup()) scope.alias_name_to_lambda_node.erase(identifier_bind_part); - else if (identifier_lookup.isFunctionLookup() && it->second) + else if (identifier_lookup.isFunctionLookup()) scope.alias_name_to_expression_node.erase(identifier_bind_part); scope.expressions_in_resolve_process_stack.popNode(); @@ -3203,11 +3272,9 @@ QueryAnalyzer::QueryTreeNodesWithNames QueryAnalyzer::resolveUnqualifiedMatcher( if (auto * array_join_node = table_expression->as()) { - size_t table_expressions_column_nodes_with_names_stack_size = table_expressions_column_nodes_with_names_stack.size(); - if (table_expressions_column_nodes_with_names_stack_size < 1) + if (table_expressions_column_nodes_with_names_stack.empty()) throw Exception(ErrorCodes::LOGICAL_ERROR, - "Expected at least 1 table expressions on stack before ARRAY JOIN processing. Actual {}", - table_expressions_column_nodes_with_names_stack_size); + "Expected at least 1 table expressions on stack before ARRAY JOIN processing"); auto & table_expression_column_nodes_with_names = table_expressions_column_nodes_with_names_stack.back(); @@ -5388,25 +5455,7 @@ void QueryAnalyzer::resolveQueryJoinTreeNode(QueryTreeNodePtr & join_tree_node, } } - /// TODO: Special functions that can take query - /// TODO: Support qualified matchers for table function - - for (auto & argument_node : table_function_node.getArguments().getNodes()) - { - if (argument_node->getNodeType() == QueryTreeNodeType::MATCHER) - { - throw Exception(ErrorCodes::BAD_ARGUMENTS, - "Matcher as table function argument is not supported {}. In scope {}", - join_tree_node->formatASTForErrorMessage(), - scope.scope_node->formatASTForErrorMessage()); - } - - auto * function_node = argument_node->as(); - if (function_node && table_function_factory.hasNameOrAlias(function_node->getFunctionName())) - continue; - - resolveExpressionNode(argument_node, scope, false /*allow_lambda_expression*/, true /*allow_table_expression*/); - } + resolveExpressionNodeList(table_function_node.getArgumentsNode(), scope, false /*allow_lambda_expression*/, true /*allow_table_expression*/); auto table_function_ast = table_function_node.toAST(); table_function_ptr->parseArguments(table_function_ast, scope_context); @@ -6006,6 +6055,9 @@ void QueryAnalyzer::resolveQuery(const QueryTreeNodePtr & query_node, Identifier node->removeAlias(); } + if (query_node_typed.isGroupByAll()) + expandGroupByAll(query_node_typed); + /** Validate aggregates * * 1. Check that there are no aggregate functions and GROUPING function in JOIN TREE, WHERE, PREWHERE, in another aggregate functions. diff --git a/src/Analyzer/QueryNode.cpp b/src/Analyzer/QueryNode.cpp index c5bbc193544..d31a3660336 100644 --- a/src/Analyzer/QueryNode.cpp +++ b/src/Analyzer/QueryNode.cpp @@ -54,6 +54,9 @@ void QueryNode::dumpTreeImpl(WriteBuffer & buffer, FormatState & format_state, s if (is_group_by_with_totals) buffer << ", is_group_by_with_totals: " << is_group_by_with_totals; + if (is_group_by_all) + buffer << ", is_group_by_all: " << is_group_by_all; + std::string group_by_type; if (is_group_by_with_rollup) group_by_type = "rollup"; @@ -117,7 +120,7 @@ void QueryNode::dumpTreeImpl(WriteBuffer & buffer, FormatState & format_state, s getWhere()->dumpTreeImpl(buffer, format_state, indent + 4); } - if (hasGroupBy()) + if (!is_group_by_all && hasGroupBy()) { buffer << '\n' << std::string(indent + 2, ' ') << "GROUP BY\n"; getGroupBy().dumpTreeImpl(buffer, format_state, indent + 4); @@ -198,7 +201,8 @@ bool QueryNode::isEqualImpl(const IQueryTreeNode & rhs) const is_group_by_with_totals == rhs_typed.is_group_by_with_totals && is_group_by_with_rollup == rhs_typed.is_group_by_with_rollup && is_group_by_with_cube == rhs_typed.is_group_by_with_cube && - is_group_by_with_grouping_sets == rhs_typed.is_group_by_with_grouping_sets; + is_group_by_with_grouping_sets == rhs_typed.is_group_by_with_grouping_sets && + is_group_by_all == rhs_typed.is_group_by_all; } void QueryNode::updateTreeHashImpl(HashState & state) const @@ -226,6 +230,7 @@ void QueryNode::updateTreeHashImpl(HashState & state) const state.update(is_group_by_with_rollup); state.update(is_group_by_with_cube); state.update(is_group_by_with_grouping_sets); + state.update(is_group_by_all); if (constant_value) { @@ -251,6 +256,7 @@ QueryTreeNodePtr QueryNode::cloneImpl() const result_query_node->is_group_by_with_rollup = is_group_by_with_rollup; result_query_node->is_group_by_with_cube = is_group_by_with_cube; result_query_node->is_group_by_with_grouping_sets = is_group_by_with_grouping_sets; + result_query_node->is_group_by_all = is_group_by_all; result_query_node->cte_name = cte_name; result_query_node->projection_columns = projection_columns; result_query_node->constant_value = constant_value; @@ -267,6 +273,7 @@ ASTPtr QueryNode::toASTImpl() const select_query->group_by_with_rollup = is_group_by_with_rollup; select_query->group_by_with_cube = is_group_by_with_cube; select_query->group_by_with_grouping_sets = is_group_by_with_grouping_sets; + select_query->group_by_all = is_group_by_all; if (hasWith()) select_query->setExpression(ASTSelectQuery::Expression::WITH, getWith().toAST()); @@ -283,7 +290,7 @@ ASTPtr QueryNode::toASTImpl() const if (getWhere()) select_query->setExpression(ASTSelectQuery::Expression::WHERE, getWhere()->toAST()); - if (hasGroupBy()) + if (!is_group_by_all && hasGroupBy()) select_query->setExpression(ASTSelectQuery::Expression::GROUP_BY, getGroupBy().toAST()); if (hasHaving()) diff --git a/src/Analyzer/QueryNode.h b/src/Analyzer/QueryNode.h index 1bb381c95c9..5eb70f168ec 100644 --- a/src/Analyzer/QueryNode.h +++ b/src/Analyzer/QueryNode.h @@ -176,6 +176,18 @@ public: is_group_by_with_grouping_sets = is_group_by_with_grouping_sets_value; } + /// Returns true, if query node has GROUP BY ALL modifier, false otherwise + bool isGroupByAll() const + { + return is_group_by_all; + } + + /// Set query node GROUP BY ALL modifier value + void setIsGroupByAll(bool is_group_by_all_value) + { + is_group_by_all = is_group_by_all_value; + } + /// Returns true if query node WITH section is not empty, false otherwise bool hasWith() const { @@ -580,6 +592,7 @@ private: bool is_group_by_with_rollup = false; bool is_group_by_with_cube = false; bool is_group_by_with_grouping_sets = false; + bool is_group_by_all = false; std::string cte_name; NamesAndTypes projection_columns; diff --git a/src/Analyzer/QueryTreeBuilder.cpp b/src/Analyzer/QueryTreeBuilder.cpp index 51745d820e7..01ecd4ece30 100644 --- a/src/Analyzer/QueryTreeBuilder.cpp +++ b/src/Analyzer/QueryTreeBuilder.cpp @@ -215,6 +215,7 @@ QueryTreeNodePtr QueryTreeBuilder::buildSelectExpression(const ASTPtr & select_q current_query_tree->setIsGroupByWithCube(select_query_typed.group_by_with_cube); current_query_tree->setIsGroupByWithRollup(select_query_typed.group_by_with_rollup); current_query_tree->setIsGroupByWithGroupingSets(select_query_typed.group_by_with_grouping_sets); + current_query_tree->setIsGroupByAll(select_query_typed.group_by_all); current_query_tree->setOriginalAST(select_query); auto select_settings = select_query_typed.settings(); diff --git a/src/Backups/registerBackupEngineS3.cpp b/src/Backups/registerBackupEngineS3.cpp index afd4bd2b21d..33b0049dc4d 100644 --- a/src/Backups/registerBackupEngineS3.cpp +++ b/src/Backups/registerBackupEngineS3.cpp @@ -110,12 +110,12 @@ void registerBackupEngineS3(BackupFactory & factory) if (params.open_mode == IBackup::OpenMode::READ) { - auto reader = std::make_shared(S3::URI{Poco::URI{s3_uri}}, access_key_id, secret_access_key, params.context); + auto reader = std::make_shared(S3::URI{s3_uri}, access_key_id, secret_access_key, params.context); return std::make_unique(backup_name_for_logging, archive_params, params.base_backup_info, reader, params.context); } else { - auto writer = std::make_shared(S3::URI{Poco::URI{s3_uri}}, access_key_id, secret_access_key, params.context); + auto writer = std::make_shared(S3::URI{s3_uri}, access_key_id, secret_access_key, params.context); return std::make_unique(backup_name_for_logging, archive_params, params.base_backup_info, writer, params.context, params.is_internal_backup, params.backup_coordination, params.backup_uuid); } #else diff --git a/src/Client/ClientBase.cpp b/src/Client/ClientBase.cpp index 1ed9ff58fdc..16597748f1e 100644 --- a/src/Client/ClientBase.cpp +++ b/src/Client/ClientBase.cpp @@ -1401,6 +1401,11 @@ try QueryPipeline pipeline(std::move(pipe)); PullingAsyncPipelineExecutor executor(pipeline); + if (need_render_progress) + { + pipeline.setProgressCallback([this](const Progress & progress){ onProgress(progress); }); + } + Block block; while (executor.pull(block)) { @@ -1445,12 +1450,6 @@ catch (...) void ClientBase::sendDataFromStdin(Block & sample, const ColumnsDescription & columns_description, ASTPtr parsed_query) { - if (need_render_progress) - { - /// Add callback to track reading from fd. - std_in.setProgressCallback(global_context); - } - /// Send data read from stdin. try { diff --git a/src/Client/ClientBase.h b/src/Client/ClientBase.h index 6c85d6a5f2b..2b15bfa7cac 100644 --- a/src/Client/ClientBase.h +++ b/src/Client/ClientBase.h @@ -171,6 +171,11 @@ protected: void initTtyBuffer(ProgressOption progress); + /// Should be one of the first, to be destroyed the last, + /// since other members can use them. + SharedContextHolder shared_context; + ContextMutablePtr global_context; + bool is_interactive = false; /// Use either interactive line editing interface or batch mode. bool is_multiquery = false; bool delayed_interactive = false; @@ -208,9 +213,6 @@ protected: /// Settings specified via command line args Settings cmd_settings; - SharedContextHolder shared_context; - ContextMutablePtr global_context; - /// thread status should be destructed before shared context because it relies on process list. std::optional thread_status; diff --git a/src/Columns/ColumnArray.cpp b/src/Columns/ColumnArray.cpp index d34ae640962..63cadbb5241 100644 --- a/src/Columns/ColumnArray.cpp +++ b/src/Columns/ColumnArray.cpp @@ -524,11 +524,13 @@ void ColumnArray::insertRangeFrom(const IColumn & src, size_t start, size_t leng size_t nested_offset = src_concrete.offsetAt(start); size_t nested_length = src_concrete.getOffsets()[start + length - 1] - nested_offset; + Offsets & cur_offsets = getOffsets(); + /// Reserve offsets before to make it more exception safe (in case of MEMORY_LIMIT_EXCEEDED) + cur_offsets.reserve(cur_offsets.size() + length); + getData().insertRangeFrom(src_concrete.getData(), nested_offset, nested_length); - Offsets & cur_offsets = getOffsets(); const Offsets & src_offsets = src_concrete.getOffsets(); - if (start == 0 && cur_offsets.empty()) { cur_offsets.assign(src_offsets.begin(), src_offsets.begin() + length); diff --git a/src/Columns/ColumnString.cpp b/src/Columns/ColumnString.cpp index 982951f05b0..f3c344e1bd7 100644 --- a/src/Columns/ColumnString.cpp +++ b/src/Columns/ColumnString.cpp @@ -124,6 +124,9 @@ void ColumnString::insertRangeFrom(const IColumn & src, size_t start, size_t len size_t nested_offset = src_concrete.offsetAt(start); size_t nested_length = src_concrete.offsets[start + length - 1] - nested_offset; + /// Reserve offsets before to make it more exception safe (in case of MEMORY_LIMIT_EXCEEDED) + offsets.reserve(offsets.size() + length); + size_t old_chars_size = chars.size(); chars.resize(old_chars_size + nested_length); memcpy(&chars[old_chars_size], &src_concrete.chars[nested_offset], nested_length); diff --git a/src/Common/CurrentMetrics.cpp b/src/Common/CurrentMetrics.cpp index 6d9fd686765..6878533c2fd 100644 --- a/src/Common/CurrentMetrics.cpp +++ b/src/Common/CurrentMetrics.cpp @@ -5,6 +5,7 @@ #define APPLY_FOR_METRICS(M) \ M(Query, "Number of executing queries") \ M(Merge, "Number of executing background merges") \ + M(Move, "Number of currently executing moves") \ M(PartMutation, "Number of mutations (ALTER DELETE/UPDATE)") \ M(ReplicatedFetch, "Number of data parts being fetched from replica") \ M(ReplicatedSend, "Number of data parts being sent to replicas") \ diff --git a/src/Common/EventRateMeter.h b/src/Common/EventRateMeter.h index f70258faa9e..3a21a80ce8b 100644 --- a/src/Common/EventRateMeter.h +++ b/src/Common/EventRateMeter.h @@ -27,6 +27,14 @@ public: /// NOTE: Adding events into distant past (further than `period`) must be avoided. void add(double now, double count) { + // Remove data for initial heating stage that can present at the beginning of a query. + // Otherwise it leads to wrong gradual increase of average value, turning algorithm into not very reactive. + if (count != 0.0 && ++data_points < 5) + { + start = events.time; + events = ExponentiallySmoothedAverage(); + } + if (now - period <= start) // precise counting mode events = ExponentiallySmoothedAverage(events.value + count, now); else // exponential smoothing mode @@ -51,6 +59,7 @@ public: { start = now; events = ExponentiallySmoothedAverage(); + data_points = 0; } private: @@ -58,6 +67,7 @@ private: const double half_decay_time; double start; // Instant in past without events before it; when measurement started or reset ExponentiallySmoothedAverage events; // Estimated number of events in the last `period` + size_t data_points = 0; }; } diff --git a/src/Common/MemoryTracker.cpp b/src/Common/MemoryTracker.cpp index b530410ec63..f556b255fc2 100644 --- a/src/Common/MemoryTracker.cpp +++ b/src/Common/MemoryTracker.cpp @@ -220,7 +220,7 @@ void MemoryTracker::allocImpl(Int64 size, bool throw_if_memory_exceeded, MemoryT Int64 limit_to_check = current_hard_limit; #if USE_JEMALLOC - if (level == VariableContext::Global) + if (level == VariableContext::Global && allow_use_jemalloc_memory.load(std::memory_order_relaxed)) { /// Jemalloc arenas may keep some extra memory. /// This memory was substucted from RSS to decrease memory drift. diff --git a/src/Common/MemoryTracker.h b/src/Common/MemoryTracker.h index 2d898935dcf..f6113d31423 100644 --- a/src/Common/MemoryTracker.h +++ b/src/Common/MemoryTracker.h @@ -55,6 +55,7 @@ private: std::atomic soft_limit {0}; std::atomic hard_limit {0}; std::atomic profiler_limit {0}; + std::atomic_bool allow_use_jemalloc_memory {true}; static std::atomic free_memory_in_allocator_arenas; @@ -125,6 +126,10 @@ public: { return soft_limit.load(std::memory_order_relaxed); } + void setAllowUseJemallocMemory(bool value) + { + allow_use_jemalloc_memory.store(value, std::memory_order_relaxed); + } /** Set limit if it was not set. * Otherwise, set limit to new value, if new value is greater than previous limit. diff --git a/src/Common/ProgressIndication.h b/src/Common/ProgressIndication.h index 294b7c9a493..717de5debb9 100644 --- a/src/Common/ProgressIndication.h +++ b/src/Common/ProgressIndication.h @@ -90,7 +90,7 @@ private: bool write_progress_on_update = false; - EventRateMeter cpu_usage_meter{static_cast(clock_gettime_ns()), 3'000'000'000 /*ns*/}; // average cpu utilization last 3 second + EventRateMeter cpu_usage_meter{static_cast(clock_gettime_ns()), 2'000'000'000 /*ns*/}; // average cpu utilization last 2 second HostToThreadTimesMap thread_data; /// In case of all of the above: /// - clickhouse-local diff --git a/src/Common/SipHash.h b/src/Common/SipHash.h index 281a65ca36a..d931b871550 100644 --- a/src/Common/SipHash.h +++ b/src/Common/SipHash.h @@ -189,6 +189,13 @@ public: finalize(); return v0 ^ v1 ^ v2 ^ v3; } + + UInt128 get128() + { + UInt128 res; + get128(res); + return res; + } }; @@ -208,9 +215,7 @@ inline UInt128 sipHash128(const char * data, const size_t size) { SipHash hash; hash.update(data, size); - UInt128 res; - hash.get128(res); - return res; + return hash.get128(); } inline UInt64 sipHash64(const char * data, const size_t size) diff --git a/src/Common/TaskStatsInfoGetter.cpp b/src/Common/TaskStatsInfoGetter.cpp index b81da2f3fe2..0398b2b579c 100644 --- a/src/Common/TaskStatsInfoGetter.cpp +++ b/src/Common/TaskStatsInfoGetter.cpp @@ -8,6 +8,7 @@ #include "hasLinuxCapability.h" #include +#include #include #include @@ -205,6 +206,20 @@ bool checkPermissionsImpl() { TaskStatsInfoGetter(); } + catch (const Exception & e) + { + if (e.code() == ErrorCodes::NETLINK_ERROR) + { + /// This error happens all the time when running inside Docker - consider it ok, + /// don't create noise with this error. + LOG_DEBUG(&Poco::Logger::get(__PRETTY_FUNCTION__), "{}", getCurrentExceptionMessage(false)); + } + else + { + tryLogCurrentException(__PRETTY_FUNCTION__); + } + return false; + } catch (...) { tryLogCurrentException(__PRETTY_FUNCTION__); diff --git a/src/Common/ThreadProfileEvents.h b/src/Common/ThreadProfileEvents.h index 5895ef3208a..48746dd5f2a 100644 --- a/src/Common/ThreadProfileEvents.h +++ b/src/Common/ThreadProfileEvents.h @@ -1,6 +1,7 @@ #pragma once #include +#include #include #include #include @@ -47,6 +48,8 @@ struct RUsageCounters UInt64 soft_page_faults = 0; UInt64 hard_page_faults = 0; + UInt64 thread_id = 0; + RUsageCounters() = default; RUsageCounters(const ::rusage & rusage_, UInt64 real_time_) { @@ -61,6 +64,8 @@ struct RUsageCounters soft_page_faults = static_cast(rusage.ru_minflt); hard_page_faults = static_cast(rusage.ru_majflt); + + thread_id = getThreadId(); } static RUsageCounters current() @@ -78,6 +83,12 @@ struct RUsageCounters static void incrementProfileEvents(const RUsageCounters & prev, const RUsageCounters & curr, ProfileEvents::Counters & profile_events) { + chassert(prev.thread_id == curr.thread_id); + /// LONG_MAX is ~106751 days + chassert(curr.real_time - prev.real_time < LONG_MAX); + chassert(curr.user_time - prev.user_time < LONG_MAX); + chassert(curr.sys_time - prev.sys_time < LONG_MAX); + profile_events.increment(ProfileEvents::RealTimeMicroseconds, (curr.real_time - prev.real_time) / 1000U); profile_events.increment(ProfileEvents::UserTimeMicroseconds, (curr.user_time - prev.user_time) / 1000U); profile_events.increment(ProfileEvents::SystemTimeMicroseconds, (curr.sys_time - prev.sys_time) / 1000U); diff --git a/src/Common/ThreadStatus.h b/src/Common/ThreadStatus.h index 67450d8c779..6ec46e3e9dc 100644 --- a/src/Common/ThreadStatus.h +++ b/src/Common/ThreadStatus.h @@ -179,8 +179,8 @@ protected: /// Is used to send logs from logs_queue to client in case of fatal errors. std::function fatal_error_callback; - /// It is used to avoid enabling the query profiler when you have multiple ThreadStatus in the same thread - bool query_profiler_enabled = true; + /// See setInternalThread() + bool internal_thread = false; /// Requires access to query_id. friend class MemoryTrackerThreadSwitcher; @@ -225,11 +225,21 @@ public: return global_context.lock(); } - void disableProfiling() - { - assert(!query_profiler_real && !query_profiler_cpu); - query_profiler_enabled = false; - } + /// "Internal" ThreadStatus is used for materialized views for separate + /// tracking into system.query_views_log + /// + /// You can have multiple internal threads, but only one non-internal with + /// the same thread_id. + /// + /// "Internal" thread: + /// - cannot have query profiler + /// since the running (main query) thread should already have one + /// - should not try to obtain latest counter on detach + /// because detaching of such threads will be done from a different + /// thread_id, and some counters are not available (i.e. getrusage()), + /// but anyway they are accounted correctly in the main ThreadStatus of a + /// query. + void setInternalThread(); /// Starts new query and create new thread group for it, current thread becomes master thread of the query void initializeQuery(); diff --git a/src/Coordination/KeeperSnapshotManagerS3.cpp b/src/Coordination/KeeperSnapshotManagerS3.cpp index cc837f5f496..75acc7ecb8b 100644 --- a/src/Coordination/KeeperSnapshotManagerS3.cpp +++ b/src/Coordination/KeeperSnapshotManagerS3.cpp @@ -65,7 +65,7 @@ void KeeperSnapshotManagerS3::updateS3Configuration(const Poco::Util::AbstractCo auto auth_settings = S3::AuthSettings::loadFromConfig(config_prefix, config); auto endpoint = config.getString(config_prefix + ".endpoint"); - auto new_uri = S3::URI{Poco::URI(endpoint)}; + auto new_uri = S3::URI{endpoint}; { std::lock_guard client_lock{snapshot_s3_client_mutex}; diff --git a/src/Core/Block.cpp b/src/Core/Block.cpp index 33691e83d27..492766f8f51 100644 --- a/src/Core/Block.cpp +++ b/src/Core/Block.cpp @@ -667,9 +667,15 @@ Names Block::getDataTypeNames() const } -std::unordered_map Block::getNamesToIndexesMap() const +Block::NameMap Block::getNamesToIndexesMap() const { - return index_by_name; + NameMap res; + res.reserve(index_by_name.size()); + + for (const auto & [name, index] : index_by_name) + res[name] = index; + + return res; } diff --git a/src/Core/Block.h b/src/Core/Block.h index 5a5458cc8f7..eb9d57ea6f8 100644 --- a/src/Core/Block.h +++ b/src/Core/Block.h @@ -5,6 +5,8 @@ #include #include +#include + #include #include #include @@ -93,7 +95,10 @@ public: Names getNames() const; DataTypes getDataTypes() const; Names getDataTypeNames() const; - std::unordered_map getNamesToIndexesMap() const; + + /// Hash table match `column name -> position in the block`. + using NameMap = HashMap; + NameMap getNamesToIndexesMap() const; Serializations getSerializations() const; diff --git a/src/Core/Settings.h b/src/Core/Settings.h index bcaea96512d..cfc7df6c853 100644 --- a/src/Core/Settings.h +++ b/src/Core/Settings.h @@ -851,6 +851,9 @@ static constexpr UInt64 operator""_GiB(unsigned long long value) M(Bool, output_format_sql_insert_include_column_names, true, "Include column names in INSERT query", 0) \ M(Bool, output_format_sql_insert_use_replace, false, "Use REPLACE statement instead of INSERT", 0) \ M(Bool, output_format_sql_insert_quote_names, true, "Quote column names with '`' characters", 0) \ + \ + M(Bool, output_format_bson_string_as_string, false, "Use BSON String type instead of Binary for String columns.", 0) \ + M(Bool, input_format_bson_skip_fields_with_unsupported_types_in_schema_inference, false, "Skip fields with unsupported types while schema inference for format BSON.", 0) \ // End of FORMAT_FACTORY_SETTINGS // Please add settings non-related to formats into the COMMON_SETTINGS above. diff --git a/src/Dictionaries/DictionaryStructure.cpp b/src/Dictionaries/DictionaryStructure.cpp index ec6a56ec2ab..104cf6d3346 100644 --- a/src/Dictionaries/DictionaryStructure.cpp +++ b/src/Dictionaries/DictionaryStructure.cpp @@ -284,7 +284,7 @@ std::vector DictionaryStructure::getAttributes( std::unordered_set attribute_names; std::vector res_attributes; - const FormatSettings format_settings; + const FormatSettings format_settings = {}; for (const auto & config_elem : config_elems) { diff --git a/src/Dictionaries/ExternalQueryBuilder.h b/src/Dictionaries/ExternalQueryBuilder.h index 9d79ec3e702..353367b12b6 100644 --- a/src/Dictionaries/ExternalQueryBuilder.h +++ b/src/Dictionaries/ExternalQueryBuilder.h @@ -62,7 +62,7 @@ struct ExternalQueryBuilder private: - const FormatSettings format_settings; + const FormatSettings format_settings = {}; void composeLoadAllQuery(WriteBuffer & out) const; diff --git a/src/Dictionaries/MongoDBDictionarySource.cpp b/src/Dictionaries/MongoDBDictionarySource.cpp index 0a8d632efcb..9c751d5ce97 100644 --- a/src/Dictionaries/MongoDBDictionarySource.cpp +++ b/src/Dictionaries/MongoDBDictionarySource.cpp @@ -74,7 +74,6 @@ void registerDictionarySourceMongoDB(DictionarySourceFactory & factory) // Poco/MongoDB/BSONWriter.h:54: void writeCString(const std::string & value); // src/IO/WriteHelpers.h:146 #define writeCString(s, buf) #include -#include namespace DB diff --git a/src/Dictionaries/MongoDBDictionarySource.h b/src/Dictionaries/MongoDBDictionarySource.h index 0d4c5f6aa0b..ac5f19816d2 100644 --- a/src/Dictionaries/MongoDBDictionarySource.h +++ b/src/Dictionaries/MongoDBDictionarySource.h @@ -1,5 +1,6 @@ #pragma once +#include #include #include "DictionaryStructure.h" diff --git a/src/Disks/IO/createReadBufferFromFileBase.cpp b/src/Disks/IO/createReadBufferFromFileBase.cpp index 98da89f81ed..b274786f162 100644 --- a/src/Disks/IO/createReadBufferFromFileBase.cpp +++ b/src/Disks/IO/createReadBufferFromFileBase.cpp @@ -42,7 +42,7 @@ std::unique_ptr createReadBufferFromFileBase( if (read_hint.has_value()) estimated_size = *read_hint; else if (file_size.has_value()) - estimated_size = file_size.has_value() ? *file_size : 0; + estimated_size = *file_size; if (!existing_memory && settings.local_fs_method == LocalFSReadMethod::mmap @@ -158,7 +158,15 @@ std::unique_ptr createReadBufferFromFileBase( #endif ProfileEvents::increment(ProfileEvents::CreatedReadBufferOrdinary); - return create(settings.local_fs_buffer_size, flags); + + size_t buffer_size = settings.local_fs_buffer_size; + /// Check if the buffer can be smaller than default + if (read_hint.has_value() && *read_hint > 0 && *read_hint < buffer_size) + buffer_size = *read_hint; + if (file_size.has_value() && *file_size < buffer_size) + buffer_size = *file_size; + + return create(buffer_size, flags); } } diff --git a/src/Disks/ObjectStorages/MetadataStorageFromDiskTransactionOperations.cpp b/src/Disks/ObjectStorages/MetadataStorageFromDiskTransactionOperations.cpp index 010fc103254..ce5171fedee 100644 --- a/src/Disks/ObjectStorages/MetadataStorageFromDiskTransactionOperations.cpp +++ b/src/Disks/ObjectStorages/MetadataStorageFromDiskTransactionOperations.cpp @@ -4,6 +4,7 @@ #include #include #include +#include #include #include @@ -62,7 +63,7 @@ UnlinkFileOperation::UnlinkFileOperation(const std::string & path_, IDisk & disk void UnlinkFileOperation::execute(std::unique_lock &) { - auto buf = disk.readFile(path); + auto buf = disk.readFile(path, ReadSettings{}, std::nullopt, disk.getFileSize(path)); readStringUntilEOF(prev_data, *buf); disk.removeFile(path); } diff --git a/src/Disks/ObjectStorages/S3/S3ObjectStorage.cpp b/src/Disks/ObjectStorages/S3/S3ObjectStorage.cpp index 11c9345d4a1..996268079e8 100644 --- a/src/Disks/ObjectStorages/S3/S3ObjectStorage.cpp +++ b/src/Disks/ObjectStorages/S3/S3ObjectStorage.cpp @@ -658,7 +658,7 @@ std::unique_ptr S3ObjectStorage::cloneObjectStorage( return std::make_unique( std::move(new_client), std::move(new_s3_settings), version_id, s3_capabilities, new_namespace, - S3::URI(Poco::URI(config.getString(config_prefix + ".endpoint"))).endpoint); + config.getString(config_prefix + ".endpoint")); } } diff --git a/src/Disks/ObjectStorages/S3/diskSettings.cpp b/src/Disks/ObjectStorages/S3/diskSettings.cpp index ca2e9d04926..ee6b798629c 100644 --- a/src/Disks/ObjectStorages/S3/diskSettings.cpp +++ b/src/Disks/ObjectStorages/S3/diskSettings.cpp @@ -137,7 +137,7 @@ std::unique_ptr getClient( settings.request_settings.get_request_throttler, settings.request_settings.put_request_throttler); - S3::URI uri(Poco::URI(config.getString(config_prefix + ".endpoint"))); + S3::URI uri(config.getString(config_prefix + ".endpoint")); if (uri.key.back() != '/') throw Exception("S3 path must ends with '/', but '" + uri.key + "' doesn't.", ErrorCodes::BAD_ARGUMENTS); diff --git a/src/Disks/ObjectStorages/S3/registerDiskS3.cpp b/src/Disks/ObjectStorages/S3/registerDiskS3.cpp index 76f2e5994fb..533a925aa1b 100644 --- a/src/Disks/ObjectStorages/S3/registerDiskS3.cpp +++ b/src/Disks/ObjectStorages/S3/registerDiskS3.cpp @@ -104,7 +104,7 @@ void registerDiskS3(DiskFactory & factory, bool global_skip_access_check) ContextPtr context, const DisksMap & /*map*/) -> DiskPtr { - S3::URI uri(Poco::URI(config.getString(config_prefix + ".endpoint"))); + S3::URI uri(config.getString(config_prefix + ".endpoint")); if (uri.key.empty()) throw Exception(ErrorCodes::BAD_ARGUMENTS, "No key in S3 uri: {}", uri.uri.toString()); diff --git a/src/Formats/BSONTypes.cpp b/src/Formats/BSONTypes.cpp new file mode 100644 index 00000000000..813c155325a --- /dev/null +++ b/src/Formats/BSONTypes.cpp @@ -0,0 +1,106 @@ +#include +#include +#include + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int UNKNOWN_TYPE; +} + +static std::string byteToHexString(uint8_t byte) +{ + return "0x" + getHexUIntUppercase(byte); +} + +BSONType getBSONType(uint8_t value) +{ + if ((value >= 0x01 && value <= 0x13) || value == 0xFF || value == 0x7f) + return BSONType(value); + + throw Exception(ErrorCodes::UNKNOWN_TYPE, "Unknown BSON type: {}", byteToHexString(value)); +} + +BSONBinarySubtype getBSONBinarySubtype(uint8_t value) +{ + if (value <= 0x07) + return BSONBinarySubtype(value); + + throw Exception(ErrorCodes::UNKNOWN_TYPE, "Unknown BSON binary subtype: {}", byteToHexString(value)); +} + +std::string getBSONTypeName(BSONType type) +{ + switch (type) + { + case BSONType::BINARY: + return "Binary"; + case BSONType::SYMBOL: + return "Symbol"; + case BSONType::ARRAY: + return "Array"; + case BSONType::DOCUMENT: + return "Document"; + case BSONType::TIMESTAMP: + return "Timestamp"; + case BSONType::INT64: + return "Int64"; + case BSONType::INT32: + return "Int32"; + case BSONType::BOOL: + return "Bool"; + case BSONType::DOUBLE: + return "Double"; + case BSONType::STRING: + return "String"; + case BSONType::DECIMAL128: + return "Decimal128"; + case BSONType::JAVA_SCRIPT_CODE_W_SCOPE: + return "JavaScript code w/ scope"; + case BSONType::JAVA_SCRIPT_CODE: + return "JavaScript code"; + case BSONType::DB_POINTER: + return "DBPointer"; + case BSONType::REGEXP: + return "Regexp"; + case BSONType::DATETIME: + return "Datetime"; + case BSONType::OBJECT_ID: + return "ObjectId"; + case BSONType::UNDEFINED: + return "Undefined"; + case BSONType::NULL_VALUE: + return "Null"; + case BSONType::MAX_KEY: + return "Max key"; + case BSONType::MIN_KEY: + return "Min key"; + } +} + +std::string getBSONBinarySubtypeName(BSONBinarySubtype subtype) +{ + switch (subtype) + { + case BSONBinarySubtype::BINARY: + return "Binary"; + case BSONBinarySubtype::FUNCTION: + return "Function"; + case BSONBinarySubtype::BINARY_OLD: + return "Binary (Old)"; + case BSONBinarySubtype::UUID_OLD: + return "UUID (Old)"; + case BSONBinarySubtype::UUID: + return "UUID"; + case BSONBinarySubtype::MD5: + return "MD5"; + case BSONBinarySubtype::ENCRYPTED_BSON_VALUE: + return "Encrypted BSON value"; + case BSONBinarySubtype::COMPRESSED_BSON_COLUMN: + return "Compressed BSON column"; + } +} + +} diff --git a/src/Formats/BSONTypes.h b/src/Formats/BSONTypes.h new file mode 100644 index 00000000000..2d20cdae698 --- /dev/null +++ b/src/Formats/BSONTypes.h @@ -0,0 +1,57 @@ +#pragma once + +#include +#include + +namespace DB +{ + +static const uint8_t BSON_DOCUMENT_END = 0x00; +using BSONSizeT = uint32_t; +static const BSONSizeT MAX_BSON_SIZE = std::numeric_limits::max(); + +/// See details on https://bsonspec.org/spec.html +enum class BSONType +{ + DOUBLE = 0x01, + STRING = 0x02, + DOCUMENT = 0x03, + ARRAY = 0x04, + BINARY = 0x05, + UNDEFINED = 0x06, + OBJECT_ID = 0x07, + BOOL = 0x08, + DATETIME = 0x09, + NULL_VALUE = 0x0A, + REGEXP = 0x0B, + DB_POINTER = 0x0C, + JAVA_SCRIPT_CODE = 0x0D, + SYMBOL = 0x0E, + JAVA_SCRIPT_CODE_W_SCOPE = 0x0F, + INT32 = 0x10, + TIMESTAMP = 0x11, + INT64 = 0x12, + DECIMAL128 = 0x13, + MIN_KEY = 0xFF, + MAX_KEY = 0x7F, +}; + +enum class BSONBinarySubtype +{ + BINARY = 0x00, + FUNCTION = 0x01, + BINARY_OLD = 0x02, + UUID_OLD = 0x03, + UUID = 0x04, + MD5 = 0x05, + ENCRYPTED_BSON_VALUE = 0x06, + COMPRESSED_BSON_COLUMN = 0x07, +}; + +BSONType getBSONType(uint8_t value); +std::string getBSONTypeName(BSONType type); + +BSONBinarySubtype getBSONBinarySubtype(uint8_t value); +std::string getBSONBinarySubtypeName(BSONBinarySubtype subtype); + +} diff --git a/src/Formats/ColumnMapping.cpp b/src/Formats/ColumnMapping.cpp index 8704619e477..b9285a3bc09 100644 --- a/src/Formats/ColumnMapping.cpp +++ b/src/Formats/ColumnMapping.cpp @@ -18,7 +18,7 @@ void ColumnMapping::setupByHeader(const Block & header) } void ColumnMapping::addColumns( - const Names & column_names, const std::unordered_map & column_indexes_by_names, const FormatSettings & settings) + const Names & column_names, const Block::NameMap & column_indexes_by_names, const FormatSettings & settings) { std::vector read_columns(column_indexes_by_names.size(), false); @@ -26,8 +26,8 @@ void ColumnMapping::addColumns( { names_of_columns.push_back(name); - const auto column_it = column_indexes_by_names.find(name); - if (column_it == column_indexes_by_names.end()) + const auto * column_it = column_indexes_by_names.find(name); + if (!column_it) { if (settings.skip_unknown_fields) { @@ -41,7 +41,7 @@ void ColumnMapping::addColumns( name, column_indexes_for_input_fields.size()); } - const auto column_index = column_it->second; + const auto column_index = column_it->getMapped(); if (read_columns[column_index]) throw Exception("Duplicate field found while parsing format header: " + name, ErrorCodes::INCORRECT_DATA); diff --git a/src/Formats/ColumnMapping.h b/src/Formats/ColumnMapping.h index c0f2d459924..c20e598580f 100644 --- a/src/Formats/ColumnMapping.h +++ b/src/Formats/ColumnMapping.h @@ -28,7 +28,7 @@ struct ColumnMapping void setupByHeader(const Block & header); void addColumns( - const Names & column_names, const std::unordered_map & column_indexes_by_names, const FormatSettings & settings); + const Names & column_names, const Block::NameMap & column_indexes_by_names, const FormatSettings & settings); void insertDefaultsForNotSeenColumns(MutableColumns & columns, std::vector & read_columns); }; diff --git a/src/Formats/EscapingRuleUtils.cpp b/src/Formats/EscapingRuleUtils.cpp index bba94e98e49..2414d8c5ba4 100644 --- a/src/Formats/EscapingRuleUtils.cpp +++ b/src/Formats/EscapingRuleUtils.cpp @@ -834,17 +834,23 @@ DataTypes getDefaultDataTypeForEscapingRules(const std::vector * numbers_parsed_from_json_strings = nullptr); void transformInferredJSONTypesIfNeeded(DataTypePtr & first, DataTypePtr & second, const FormatSettings & settings); +String getAdditionalFormatInfoForAllRowBasedFormats(const FormatSettings & settings); String getAdditionalFormatInfoByEscapingRule(const FormatSettings & settings, FormatSettings::EscapingRule escaping_rule); void checkSupportedDelimiterAfterField(FormatSettings::EscapingRule escaping_rule, const String & delimiter, const DataTypePtr & type); diff --git a/src/Formats/FormatFactory.cpp b/src/Formats/FormatFactory.cpp index 868e275fcdc..9c54a3526db 100644 --- a/src/Formats/FormatFactory.cpp +++ b/src/Formats/FormatFactory.cpp @@ -178,6 +178,8 @@ FormatSettings getFormatSettings(ContextPtr context, const Settings & settings) format_settings.try_infer_integers = settings.input_format_try_infer_integers; format_settings.try_infer_dates = settings.input_format_try_infer_dates; format_settings.try_infer_datetimes = settings.input_format_try_infer_datetimes; + format_settings.bson.output_string_as_string = settings.output_format_bson_string_as_string; + format_settings.bson.skip_fields_with_unsupported_types_in_schema_inference = settings.input_format_bson_skip_fields_with_unsupported_types_in_schema_inference; /// Validate avro_schema_registry_url with RemoteHostFilter when non-empty and in Server context if (format_settings.schema.is_server) diff --git a/src/Formats/FormatSettings.h b/src/Formats/FormatSettings.h index 3ff227c5b56..c7c9bfc816c 100644 --- a/src/Formats/FormatSettings.h +++ b/src/Formats/FormatSettings.h @@ -303,6 +303,12 @@ struct FormatSettings bool use_replace = false; bool quote_names = true; } sql_insert; + + struct + { + bool output_string_as_string; + bool skip_fields_with_unsupported_types_in_schema_inference; + } bson; }; } diff --git a/src/Formats/JSONUtils.cpp b/src/Formats/JSONUtils.cpp index 020a7b32403..926e3478ad8 100644 --- a/src/Formats/JSONUtils.cpp +++ b/src/Formats/JSONUtils.cpp @@ -231,7 +231,14 @@ namespace JSONUtils { auto type = getDataTypeFromFieldImpl(key_value_pair.second, settings, numbers_parsed_from_json_strings); if (!type) + { + /// If we couldn't infer nested type and Object type is not enabled, + /// we can't determine the type of this JSON field. + if (!settings.json.try_infer_objects) + return nullptr; + continue; + } if (settings.json.try_infer_objects && isObject(type)) return std::make_shared("json", true); diff --git a/src/Formats/registerFormats.cpp b/src/Formats/registerFormats.cpp index ba40fe442ab..285e234167b 100644 --- a/src/Formats/registerFormats.cpp +++ b/src/Formats/registerFormats.cpp @@ -19,6 +19,7 @@ void registerFileSegmentationEngineJSONCompactEachRow(FormatFactory & factory); void registerFileSegmentationEngineHiveText(FormatFactory & factory); #endif void registerFileSegmentationEngineLineAsString(FormatFactory & factory); +void registerFileSegmentationEngineBSONEachRow(FormatFactory & factory); /// Formats for both input/output. @@ -49,6 +50,8 @@ void registerInputFormatJSONColumns(FormatFactory & factory); void registerOutputFormatJSONColumns(FormatFactory & factory); void registerInputFormatJSONCompactColumns(FormatFactory & factory); void registerOutputFormatJSONCompactColumns(FormatFactory & factory); +void registerInputFormatBSONEachRow(FormatFactory & factory); +void registerOutputFormatBSONEachRow(FormatFactory & factory); void registerInputFormatJSONColumnsWithMetadata(FormatFactory & factory); void registerOutputFormatJSONColumnsWithMetadata(FormatFactory & factory); void registerInputFormatProtobuf(FormatFactory & factory); @@ -136,7 +139,7 @@ void registerTSKVSchemaReader(FormatFactory & factory); void registerValuesSchemaReader(FormatFactory & factory); void registerTemplateSchemaReader(FormatFactory & factory); void registerMySQLSchemaReader(FormatFactory & factory); - +void registerBSONEachRowSchemaReader(FormatFactory & factory); void registerFileExtensions(FormatFactory & factory); @@ -155,6 +158,7 @@ void registerFormats() registerFileSegmentationEngineHiveText(factory); #endif registerFileSegmentationEngineLineAsString(factory); + registerFileSegmentationEngineBSONEachRow(factory); registerInputFormatNative(factory); @@ -184,6 +188,8 @@ void registerFormats() registerOutputFormatJSONColumns(factory); registerInputFormatJSONCompactColumns(factory); registerOutputFormatJSONCompactColumns(factory); + registerInputFormatBSONEachRow(factory); + registerOutputFormatBSONEachRow(factory); registerInputFormatJSONColumnsWithMetadata(factory); registerOutputFormatJSONColumnsWithMetadata(factory); registerInputFormatProtobuf(factory); @@ -267,6 +273,7 @@ void registerFormats() registerValuesSchemaReader(factory); registerTemplateSchemaReader(factory); registerMySQLSchemaReader(factory); + registerBSONEachRowSchemaReader(factory); } } diff --git a/src/Functions/filesystem.cpp b/src/Functions/filesystem.cpp index 12813c3d852..7af1c61d3b8 100644 --- a/src/Functions/filesystem.cpp +++ b/src/Functions/filesystem.cpp @@ -1,31 +1,40 @@ -#include -#include +#include +#include #include +#include +#include +#include #include -#include #include namespace DB { +namespace ErrorCodes +{ + extern const int ILLEGAL_COLUMN; + extern const int ILLEGAL_TYPE_OF_ARGUMENT; + extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; + extern const int UNKNOWN_DISK; +} namespace { struct FilesystemAvailable { static constexpr auto name = "filesystemAvailable"; - static std::uintmax_t get(const std::filesystem::space_info & spaceinfo) { return spaceinfo.available; } + static std::uintmax_t get(const DiskPtr & disk) { return disk->getAvailableSpace(); } }; -struct FilesystemFree +struct FilesystemUnreserved { - static constexpr auto name = "filesystemFree"; - static std::uintmax_t get(const std::filesystem::space_info & spaceinfo) { return spaceinfo.free; } + static constexpr auto name = "filesystemUnreserved"; + static std::uintmax_t get(const DiskPtr & disk) { return disk->getUnreservedSpace(); } }; struct FilesystemCapacity { static constexpr auto name = "filesystemCapacity"; - static std::uintmax_t get(const std::filesystem::space_info & spaceinfo) { return spaceinfo.capacity; } + static std::uintmax_t get(const DiskPtr & disk) { return disk->getTotalSpace(); } }; template @@ -34,34 +43,72 @@ class FilesystemImpl : public IFunction public: static constexpr auto name = Impl::name; - static FunctionPtr create(ContextPtr context) - { - return std::make_shared>(std::filesystem::space(context->getPath())); - } + static FunctionPtr create(ContextPtr context_) { return std::make_shared>(context_); } + + explicit FilesystemImpl(ContextPtr context_) : context(context_) { } + + bool useDefaultImplementationForConstants() const override { return true; } bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return false; } - explicit FilesystemImpl(std::filesystem::space_info spaceinfo_) : spaceinfo(spaceinfo_) { } - String getName() const override { return name; } + + bool isVariadic() const override { return true; } + size_t getNumberOfArguments() const override { return 0; } bool isDeterministic() const override { return false; } - DataTypePtr getReturnTypeImpl(const DataTypes & /*arguments*/) const override + DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override { + if (arguments.size() > 1) + { + throw Exception("Arguments size of function " + getName() + " should be 0 or 1", ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); + } + if (arguments.size() == 1 && !isStringOrFixedString(arguments[0])) + { + throw Exception( + "Arguments of function " + getName() + " should be String or FixedString", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); + } return std::make_shared(); } - ColumnPtr executeImpl(const ColumnsWithTypeAndName &, const DataTypePtr &, size_t input_rows_count) const override + ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const override { - return DataTypeUInt64().createColumnConst(input_rows_count, static_cast(Impl::get(spaceinfo))); + if (arguments.empty()) + { + auto disk = context->getDisk("default"); + return DataTypeUInt64().createColumnConst(input_rows_count, Impl::get(disk)); + } + else + { + auto col = arguments[0].column; + if (const ColumnString * col_str = checkAndGetColumn(col.get())) + { + auto disk_map = context->getDisksMap(); + + auto col_res = ColumnVector::create(col_str->size()); + auto & data = col_res->getData(); + for (size_t i = 0; i < col_str->size(); ++i) + { + auto disk_name = col_str->getDataAt(i).toString(); + if (auto it = disk_map.find(disk_name); it != disk_map.end()) + data[i] = Impl::get(it->second); + else + throw Exception( + "Unknown disk name " + disk_name + " while execute function " + getName(), ErrorCodes::UNKNOWN_DISK); + } + return col_res; + } + throw Exception( + "Illegal column " + arguments[0].column->getName() + " of argument of function " + getName(), ErrorCodes::ILLEGAL_COLUMN); + } } private: - std::filesystem::space_info spaceinfo; + ContextPtr context; }; } @@ -70,7 +117,7 @@ REGISTER_FUNCTION(Filesystem) { factory.registerFunction>(); factory.registerFunction>(); - factory.registerFunction>(); + factory.registerFunction>(); } } diff --git a/src/Functions/if.cpp b/src/Functions/if.cpp index f3401713834..0baf64c83d9 100644 --- a/src/Functions/if.cpp +++ b/src/Functions/if.cpp @@ -1016,6 +1016,7 @@ public: size_t getNumberOfArguments() const override { return 3; } bool useDefaultImplementationForNulls() const override { return false; } + bool useDefaultImplementationForNothing() const override { return false; } bool isShortCircuit(ShortCircuitSettings & settings, size_t /*number_of_arguments*/) const override { settings.enable_lazy_execution_for_first_argument = false; diff --git a/src/Functions/multiIf.cpp b/src/Functions/multiIf.cpp index 6fc722e32f4..f658528a2a7 100644 --- a/src/Functions/multiIf.cpp +++ b/src/Functions/multiIf.cpp @@ -50,6 +50,7 @@ public: bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return false; } size_t getNumberOfArguments() const override { return 0; } bool useDefaultImplementationForNulls() const override { return false; } + bool useDefaultImplementationForNothing() const override { return false; } ColumnNumbers getArgumentsThatDontImplyNullableReturnType(size_t number_of_arguments) const override { diff --git a/src/Functions/runningConcurrency.cpp b/src/Functions/runningConcurrency.cpp index c759476006f..c112165fda7 100644 --- a/src/Functions/runningConcurrency.cpp +++ b/src/Functions/runningConcurrency.cpp @@ -57,7 +57,7 @@ namespace DB if (unlikely(begin > end)) { - const FormatSettings default_format; + const FormatSettings default_format{}; WriteBufferFromOwnString buf_begin, buf_end; begin_serializaion->serializeTextQuoted(*(arguments[0].column), i, buf_begin, default_format); end_serialization->serializeTextQuoted(*(arguments[1].column), i, buf_end, default_format); diff --git a/src/IO/ReadHelpers.cpp b/src/IO/ReadHelpers.cpp index b9e0e0507cc..127912a0b2a 100644 --- a/src/IO/ReadHelpers.cpp +++ b/src/IO/ReadHelpers.cpp @@ -1278,6 +1278,25 @@ void skipToUnescapedNextLineOrEOF(ReadBuffer & buf) } } +void skipNullTerminated(ReadBuffer & buf) +{ + while (!buf.eof()) + { + char * next_pos = find_first_symbols<'\0'>(buf.position(), buf.buffer().end()); + buf.position() = next_pos; + + if (!buf.hasPendingData()) + continue; + + if (*buf.position() == '\0') + { + ++buf.position(); + return; + } + } +} + + void saveUpToPosition(ReadBuffer & in, Memory<> & memory, char * current) { assert(current >= in.position()); diff --git a/src/IO/ReadHelpers.h b/src/IO/ReadHelpers.h index a7227811261..33783dc026a 100644 --- a/src/IO/ReadHelpers.h +++ b/src/IO/ReadHelpers.h @@ -1448,6 +1448,8 @@ void skipToCarriageReturnOrEOF(ReadBuffer & buf); /// Skip to next character after next unescaped \n. If no \n in stream, skip to end. Does not throw on invalid escape sequences. void skipToUnescapedNextLineOrEOF(ReadBuffer & buf); +/// Skip to next character after next \0. If no \0 in stream, skip to end. +void skipNullTerminated(ReadBuffer & buf); /** This function just copies the data from buffer's internal position (in.position()) * to current position (from arguments) into memory. diff --git a/src/IO/S3/tests/gtest_aws_s3_client.cpp b/src/IO/S3/tests/gtest_aws_s3_client.cpp index 2d298ca5df2..3b7152c0e7e 100644 --- a/src/IO/S3/tests/gtest_aws_s3_client.cpp +++ b/src/IO/S3/tests/gtest_aws_s3_client.cpp @@ -76,7 +76,7 @@ TEST(IOTestAwsS3Client, AppendExtraSSECHeaders) DB::RemoteHostFilter remote_host_filter; unsigned int s3_max_redirects = 100; - DB::S3::URI uri(Poco::URI(http.getUrl() + "/IOTestAwsS3ClientAppendExtraHeaders/test.txt")); + DB::S3::URI uri(http.getUrl() + "/IOTestAwsS3ClientAppendExtraHeaders/test.txt"); String access_key_id = "ACCESS_KEY_ID"; String secret_access_key = "SECRET_ACCESS_KEY"; String region = "us-east-1"; diff --git a/src/IO/S3Common.cpp b/src/IO/S3Common.cpp index 5af09275df4..91f575d5097 100644 --- a/src/IO/S3Common.cpp +++ b/src/IO/S3Common.cpp @@ -759,7 +759,7 @@ namespace S3 put_request_throttler); } - URI::URI(const Poco::URI & uri_) + URI::URI(const std::string & uri_) { /// Case when bucket name represented in domain name of S3 URL. /// E.g. (https://bucket-name.s3.Region.amazonaws.com/key) @@ -777,16 +777,32 @@ namespace S3 static constexpr auto OBS = "OBS"; static constexpr auto OSS = "OSS"; - uri = uri_; + uri = Poco::URI(uri_); + storage_name = S3; if (uri.getHost().empty()) throw Exception(ErrorCodes::BAD_ARGUMENTS, "Host is empty in S3 URI."); /// Extract object version ID from query string. + bool has_version_id = false; for (const auto & [query_key, query_value] : uri.getQueryParameters()) if (query_key == "versionId") + { version_id = query_value; + has_version_id = true; + } + + /// Poco::URI will ignore '?' when parsing the path, but if there is a vestionId in the http parameter, + /// '?' can not be used as a wildcard, otherwise it will be ambiguous. + /// If no "vertionId" in the http parameter, '?' can be used as a wildcard. + /// It is necessary to encode '?' to avoid deletion during parsing path. + if (!has_version_id && uri_.find('?') != String::npos) + { + String uri_with_question_mark_encode; + Poco::URI::encode(uri_, "?", uri_with_question_mark_encode); + uri = Poco::URI(uri_with_question_mark_encode); + } String name; String endpoint_authority_from_uri; diff --git a/src/IO/S3Common.h b/src/IO/S3Common.h index 01a6b8d5d82..c68d76ece41 100644 --- a/src/IO/S3Common.h +++ b/src/IO/S3Common.h @@ -119,8 +119,7 @@ struct URI bool is_virtual_hosted_style; - explicit URI(const Poco::URI & uri_); - explicit URI(const std::string & uri_) : URI(Poco::URI(uri_)) {} + explicit URI(const std::string & uri_); static void validateBucket(const String & bucket, const Poco::URI & uri); }; diff --git a/src/IO/tests/gtest_s3_uri.cpp b/src/IO/tests/gtest_s3_uri.cpp index 161dc81266c..c088e41f1e8 100644 --- a/src/IO/tests/gtest_s3_uri.cpp +++ b/src/IO/tests/gtest_s3_uri.cpp @@ -20,55 +20,55 @@ struct TestCase }; const TestCase TestCases[] = { - {S3::URI(Poco::URI("https://bucketname.s3.us-east-2.amazonaws.com/data")), + {S3::URI("https://bucketname.s3.us-east-2.amazonaws.com/data"), "https://s3.us-east-2.amazonaws.com", "bucketname", "data", "", true}, - {S3::URI(Poco::URI("https://bucketname.s3.us-east-2.amazonaws.com/data?firstKey=someKey&secondKey=anotherKey")), + {S3::URI("https://bucketname.s3.us-east-2.amazonaws.com/data?firstKey=someKey&secondKey=anotherKey"), + "https://s3.us-east-2.amazonaws.com", + "bucketname", + "data?firstKey=someKey&secondKey=anotherKey", + "", + true}, + {S3::URI("https://bucketname.s3.us-east-2.amazonaws.com/data?versionId=testVersionId&anotherKey=someOtherKey"), + "https://s3.us-east-2.amazonaws.com", + "bucketname", + "data", + "testVersionId", + true}, + {S3::URI("https://bucketname.s3.us-east-2.amazonaws.com/data?firstKey=someKey&versionId=testVersionId&anotherKey=someOtherKey"), + "https://s3.us-east-2.amazonaws.com", + "bucketname", + "data", + "testVersionId", + true}, + {S3::URI("https://bucketname.s3.us-east-2.amazonaws.com/data?anotherKey=someOtherKey&versionId=testVersionId"), + "https://s3.us-east-2.amazonaws.com", + "bucketname", + "data", + "testVersionId", + true}, + {S3::URI("https://bucketname.s3.us-east-2.amazonaws.com/data?versionId=testVersionId"), + "https://s3.us-east-2.amazonaws.com", + "bucketname", + "data", + "testVersionId", + true}, + {S3::URI("https://bucketname.s3.us-east-2.amazonaws.com/data?versionId="), "https://s3.us-east-2.amazonaws.com", "bucketname", "data", "", true}, - {S3::URI(Poco::URI("https://bucketname.s3.us-east-2.amazonaws.com/data?versionId=testVersionId&anotherKey=someOtherKey")), - "https://s3.us-east-2.amazonaws.com", - "bucketname", - "data", - "testVersionId", - true}, - {S3::URI(Poco::URI("https://bucketname.s3.us-east-2.amazonaws.com/data?firstKey=someKey&versionId=testVersionId&anotherKey=someOtherKey")), - "https://s3.us-east-2.amazonaws.com", - "bucketname", - "data", - "testVersionId", - true}, - {S3::URI(Poco::URI("https://bucketname.s3.us-east-2.amazonaws.com/data?anotherKey=someOtherKey&versionId=testVersionId")), - "https://s3.us-east-2.amazonaws.com", - "bucketname", - "data", - "testVersionId", - true}, - {S3::URI(Poco::URI("https://bucketname.s3.us-east-2.amazonaws.com/data?versionId=testVersionId")), - "https://s3.us-east-2.amazonaws.com", - "bucketname", - "data", - "testVersionId", - true}, - {S3::URI(Poco::URI("https://bucketname.s3.us-east-2.amazonaws.com/data?versionId=")), + {S3::URI("https://bucketname.s3.us-east-2.amazonaws.com/data?versionId&"), "https://s3.us-east-2.amazonaws.com", "bucketname", "data", "", true}, - {S3::URI(Poco::URI("https://bucketname.s3.us-east-2.amazonaws.com/data?versionId&")), - "https://s3.us-east-2.amazonaws.com", - "bucketname", - "data", - "", - true}, - {S3::URI(Poco::URI("https://bucketname.s3.us-east-2.amazonaws.com/data?versionId")), + {S3::URI("https://bucketname.s3.us-east-2.amazonaws.com/data?versionId"), "https://s3.us-east-2.amazonaws.com", "bucketname", "data", @@ -83,7 +83,7 @@ class S3UriTest : public testing::TestWithParam TEST(S3UriTest, validPatterns) { { - S3::URI uri(Poco::URI("https://jokserfn.s3.amazonaws.com/")); + S3::URI uri("https://jokserfn.s3.amazonaws.com/"); ASSERT_EQ("https://s3.amazonaws.com", uri.endpoint); ASSERT_EQ("jokserfn", uri.bucket); ASSERT_EQ("", uri.key); @@ -91,7 +91,7 @@ TEST(S3UriTest, validPatterns) ASSERT_EQ(true, uri.is_virtual_hosted_style); } { - S3::URI uri(Poco::URI("https://s3.amazonaws.com/jokserfn/")); + S3::URI uri("https://s3.amazonaws.com/jokserfn/"); ASSERT_EQ("https://s3.amazonaws.com", uri.endpoint); ASSERT_EQ("jokserfn", uri.bucket); ASSERT_EQ("", uri.key); @@ -99,7 +99,7 @@ TEST(S3UriTest, validPatterns) ASSERT_EQ(false, uri.is_virtual_hosted_style); } { - S3::URI uri(Poco::URI("https://amazonaws.com/bucket/")); + S3::URI uri("https://amazonaws.com/bucket/"); ASSERT_EQ("https://amazonaws.com", uri.endpoint); ASSERT_EQ("bucket", uri.bucket); ASSERT_EQ("", uri.key); @@ -107,7 +107,7 @@ TEST(S3UriTest, validPatterns) ASSERT_EQ(false, uri.is_virtual_hosted_style); } { - S3::URI uri(Poco::URI("https://jokserfn.s3.amazonaws.com/data")); + S3::URI uri("https://jokserfn.s3.amazonaws.com/data"); ASSERT_EQ("https://s3.amazonaws.com", uri.endpoint); ASSERT_EQ("jokserfn", uri.bucket); ASSERT_EQ("data", uri.key); @@ -115,7 +115,7 @@ TEST(S3UriTest, validPatterns) ASSERT_EQ(true, uri.is_virtual_hosted_style); } { - S3::URI uri(Poco::URI("https://storage.amazonaws.com/jokserfn/data")); + S3::URI uri("https://storage.amazonaws.com/jokserfn/data"); ASSERT_EQ("https://storage.amazonaws.com", uri.endpoint); ASSERT_EQ("jokserfn", uri.bucket); ASSERT_EQ("data", uri.key); @@ -123,7 +123,7 @@ TEST(S3UriTest, validPatterns) ASSERT_EQ(false, uri.is_virtual_hosted_style); } { - S3::URI uri(Poco::URI("https://bucketname.cos.ap-beijing.myqcloud.com/data")); + S3::URI uri("https://bucketname.cos.ap-beijing.myqcloud.com/data"); ASSERT_EQ("https://cos.ap-beijing.myqcloud.com", uri.endpoint); ASSERT_EQ("bucketname", uri.bucket); ASSERT_EQ("data", uri.key); @@ -131,7 +131,7 @@ TEST(S3UriTest, validPatterns) ASSERT_EQ(true, uri.is_virtual_hosted_style); } { - S3::URI uri(Poco::URI("https://bucketname.s3.us-east-2.amazonaws.com/data")); + S3::URI uri("https://bucketname.s3.us-east-2.amazonaws.com/data"); ASSERT_EQ("https://s3.us-east-2.amazonaws.com", uri.endpoint); ASSERT_EQ("bucketname", uri.bucket); ASSERT_EQ("data", uri.key); @@ -139,7 +139,7 @@ TEST(S3UriTest, validPatterns) ASSERT_EQ(true, uri.is_virtual_hosted_style); } { - S3::URI uri(Poco::URI("https://s3.us-east-2.amazonaws.com/bucketname/data")); + S3::URI uri("https://s3.us-east-2.amazonaws.com/bucketname/data"); ASSERT_EQ("https://s3.us-east-2.amazonaws.com", uri.endpoint); ASSERT_EQ("bucketname", uri.bucket); ASSERT_EQ("data", uri.key); @@ -147,7 +147,7 @@ TEST(S3UriTest, validPatterns) ASSERT_EQ(false, uri.is_virtual_hosted_style); } { - S3::URI uri(Poco::URI("https://bucketname.s3-us-east-2.amazonaws.com/data")); + S3::URI uri("https://bucketname.s3-us-east-2.amazonaws.com/data"); ASSERT_EQ("https://s3-us-east-2.amazonaws.com", uri.endpoint); ASSERT_EQ("bucketname", uri.bucket); ASSERT_EQ("data", uri.key); @@ -155,7 +155,7 @@ TEST(S3UriTest, validPatterns) ASSERT_EQ(true, uri.is_virtual_hosted_style); } { - S3::URI uri(Poco::URI("https://s3-us-east-2.amazonaws.com/bucketname/data")); + S3::URI uri("https://s3-us-east-2.amazonaws.com/bucketname/data"); ASSERT_EQ("https://s3-us-east-2.amazonaws.com", uri.endpoint); ASSERT_EQ("bucketname", uri.bucket); ASSERT_EQ("data", uri.key); @@ -166,7 +166,7 @@ TEST(S3UriTest, validPatterns) TEST_P(S3UriTest, invalidPatterns) { - ASSERT_ANY_THROW(S3::URI(Poco::URI(GetParam()))); + ASSERT_ANY_THROW(S3::URI new_uri(GetParam())); } TEST(S3UriTest, versionIdChecks) diff --git a/src/Interpreters/Context.cpp b/src/Interpreters/Context.cpp index b413c784159..913b0535358 100644 --- a/src/Interpreters/Context.cpp +++ b/src/Interpreters/Context.cpp @@ -24,6 +24,7 @@ #include #include #include +#include #include #include #include @@ -229,6 +230,7 @@ struct ContextSharedPart : boost::noncopyable ProcessList process_list; /// Executing queries at the moment. GlobalOvercommitTracker global_overcommit_tracker; MergeList merge_list; /// The list of executable merge (for (Replicated)?MergeTree) + MovesList moves_list; /// The list of executing moves (for (Replicated)?MergeTree) ReplicatedFetchList replicated_fetch_list; ConfigurationPtr users_config; /// Config with the users, profiles and quotas sections. InterserverIOHandler interserver_io_handler; /// Handler for interserver communication. @@ -637,6 +639,8 @@ const ProcessList & Context::getProcessList() const { return shared->process_lis OvercommitTracker * Context::getGlobalOvercommitTracker() const { return &shared->global_overcommit_tracker; } MergeList & Context::getMergeList() { return shared->merge_list; } const MergeList & Context::getMergeList() const { return shared->merge_list; } +MovesList & Context::getMovesList() { return shared->moves_list; } +const MovesList & Context::getMovesList() const { return shared->moves_list; } ReplicatedFetchList & Context::getReplicatedFetchList() { return shared->replicated_fetch_list; } const ReplicatedFetchList & Context::getReplicatedFetchList() const { return shared->replicated_fetch_list; } diff --git a/src/Interpreters/Context.h b/src/Interpreters/Context.h index a0b62da364e..bc89ce36edc 100644 --- a/src/Interpreters/Context.h +++ b/src/Interpreters/Context.h @@ -63,6 +63,7 @@ using InterserverCredentialsPtr = std::shared_ptr; class InterserverIOHandler; class BackgroundSchedulePool; class MergeList; +class MovesList; class ReplicatedFetchList; class Cluster; class Compiler; @@ -775,6 +776,9 @@ public: MergeList & getMergeList(); const MergeList & getMergeList() const; + MovesList & getMovesList(); + const MovesList & getMovesList() const; + ReplicatedFetchList & getReplicatedFetchList(); const ReplicatedFetchList & getReplicatedFetchList() const; diff --git a/src/Interpreters/DatabaseCatalog.cpp b/src/Interpreters/DatabaseCatalog.cpp index 7ceb0bf3a00..7c78d08fa24 100644 --- a/src/Interpreters/DatabaseCatalog.cpp +++ b/src/Interpreters/DatabaseCatalog.cpp @@ -1015,7 +1015,7 @@ void DatabaseCatalog::dropTableFinally(const TableMarkedAsDropped & table) for (const auto & [disk_name, disk] : getContext()->getDisksMap()) { String data_path = "store/" + getPathForUUID(table.table_id.uuid); - if (!disk->exists(data_path) || disk->isReadOnly()) + if (disk->isReadOnly() || !disk->exists(data_path)) continue; LOG_INFO(log, "Removing data directory {} of dropped table {} from disk {}", data_path, table.table_id.getNameForLogs(), disk_name); diff --git a/src/Interpreters/InterpreterExplainQuery.cpp b/src/Interpreters/InterpreterExplainQuery.cpp index 2864b433e00..2341059a04f 100644 --- a/src/Interpreters/InterpreterExplainQuery.cpp +++ b/src/Interpreters/InterpreterExplainQuery.cpp @@ -165,7 +165,7 @@ struct QueryASTSettings struct QueryTreeSettings { - bool run_passes = false; + bool run_passes = true; bool dump_passes = false; bool dump_ast = false; Int64 passes = -1; diff --git a/src/Interpreters/InterpreterExternalDDLQuery.cpp b/src/Interpreters/InterpreterExternalDDLQuery.cpp index 61fbc34784f..5c06ab4b818 100644 --- a/src/Interpreters/InterpreterExternalDDLQuery.cpp +++ b/src/Interpreters/InterpreterExternalDDLQuery.cpp @@ -13,6 +13,7 @@ # include # include # include +# include #endif namespace DB @@ -44,7 +45,7 @@ BlockIO InterpreterExternalDDLQuery::execute() if (arguments.size() != 2 || !arguments[0]->as() || !arguments[1]->as()) throw Exception("MySQL External require two identifier arguments.", ErrorCodes::BAD_ARGUMENTS); - if (external_ddl_query.external_ddl->as()) + if (external_ddl_query.external_ddl->as()) return MySQLInterpreter::InterpreterMySQLDropQuery( external_ddl_query.external_ddl, getContext(), getIdentifierName(arguments[0]), getIdentifierName(arguments[1])).execute(); diff --git a/src/Interpreters/InterpreterSelectQuery.cpp b/src/Interpreters/InterpreterSelectQuery.cpp index 2e20cfbd964..3bb6b2e2c87 100644 --- a/src/Interpreters/InterpreterSelectQuery.cpp +++ b/src/Interpreters/InterpreterSelectQuery.cpp @@ -2728,13 +2728,18 @@ void InterpreterSelectQuery::executeDistinct(QueryPlan & query_plan, bool before { const Settings & settings = context->getSettingsRef(); - auto [limit_length, limit_offset] = getLimitLengthAndOffset(query, context); UInt64 limit_for_distinct = 0; - /// If after this stage of DISTINCT ORDER BY is not executed, + /// If after this stage of DISTINCT, + /// (1) ORDER BY is not executed + /// (2) there is no LIMIT BY (todo: we can check if DISTINCT and LIMIT BY expressions are match) /// then you can get no more than limit_length + limit_offset of different rows. - if ((!query.orderBy() || !before_order) && limit_length <= std::numeric_limits::max() - limit_offset) - limit_for_distinct = limit_length + limit_offset; + if ((!query.orderBy() || !before_order) && !query.limitBy()) + { + auto [limit_length, limit_offset] = getLimitLengthAndOffset(query, context); + if (limit_length <= std::numeric_limits::max() - limit_offset) + limit_for_distinct = limit_length + limit_offset; + } SizeLimits limits(settings.max_rows_in_distinct, settings.max_bytes_in_distinct, settings.distinct_overflow_mode); diff --git a/src/Interpreters/MySQL/InterpretersMySQLDDLQuery.cpp b/src/Interpreters/MySQL/InterpretersMySQLDDLQuery.cpp index 4ed22b34e26..6989940323c 100644 --- a/src/Interpreters/MySQL/InterpretersMySQLDDLQuery.cpp +++ b/src/Interpreters/MySQL/InterpretersMySQLDDLQuery.cpp @@ -6,6 +6,7 @@ #include #include #include +#include #include #include #include @@ -543,15 +544,29 @@ void InterpreterDropImpl::validate(const InterpreterDropImpl::TQuery & /*query*/ ASTs InterpreterDropImpl::getRewrittenQueries( const InterpreterDropImpl::TQuery & drop_query, ContextPtr context, const String & mapped_to_database, const String & mysql_database) { - const auto & database_name = resolveDatabase(drop_query.getDatabase(), mysql_database, mapped_to_database, context); - - /// Skip drop database|view|dictionary - if (database_name != mapped_to_database || !drop_query.table || drop_query.is_view || drop_query.is_dictionary) + /// Skip drop database|view|dictionary|others + if (drop_query.kind != TQuery::Kind::Table) return {}; - - ASTPtr rewritten_query = drop_query.clone(); - rewritten_query->as()->setDatabase(mapped_to_database); - return ASTs{rewritten_query}; + TQuery::QualifiedNames tables = drop_query.names; + ASTs rewritten_querys; + for (const auto & table: tables) + { + const auto & database_name = resolveDatabase(table.schema, mysql_database, mapped_to_database, context); + if (database_name != mapped_to_database) + continue; + auto rewritten_query = std::make_shared(); + rewritten_query->setTable(table.shortName); + rewritten_query->setDatabase(mapped_to_database); + if (drop_query.is_truncate) + rewritten_query->kind = ASTDropQuery::Kind::Truncate; + else + rewritten_query->kind = ASTDropQuery::Kind::Drop; + rewritten_query->is_view = false; + //To avoid failure, we always set exists + rewritten_query->if_exists = true; + rewritten_querys.push_back(rewritten_query); + } + return rewritten_querys; } void InterpreterRenameImpl::validate(const InterpreterRenameImpl::TQuery & rename_query, ContextPtr /*context*/) diff --git a/src/Interpreters/MySQL/InterpretersMySQLDDLQuery.h b/src/Interpreters/MySQL/InterpretersMySQLDDLQuery.h index 1ffaacc7dcc..824024e020d 100644 --- a/src/Interpreters/MySQL/InterpretersMySQLDDLQuery.h +++ b/src/Interpreters/MySQL/InterpretersMySQLDDLQuery.h @@ -2,11 +2,11 @@ #include #include -#include #include #include #include #include +#include #include #include @@ -17,7 +17,7 @@ namespace MySQLInterpreter { struct InterpreterDropImpl { - using TQuery = ASTDropQuery; + using TQuery = MySQLParser::ASTDropQuery; static void validate(const TQuery & query, ContextPtr context); diff --git a/src/Interpreters/ThreadStatusExt.cpp b/src/Interpreters/ThreadStatusExt.cpp index ee126f2da11..ad7884ade55 100644 --- a/src/Interpreters/ThreadStatusExt.cpp +++ b/src/Interpreters/ThreadStatusExt.cpp @@ -131,6 +131,12 @@ void ThreadStatus::setupState(const ThreadGroupStatusPtr & thread_group_) thread_state = ThreadState::AttachedToQuery; } +void ThreadStatus::setInternalThread() +{ + chassert(!query_profiler_real && !query_profiler_cpu); + internal_thread = true; +} + void ThreadStatus::initializeQuery() { setupState(std::make_shared()); @@ -177,41 +183,44 @@ void ThreadStatus::initPerformanceCounters() // query_start_time_nanoseconds cannot be used here since RUsageCounters expect CLOCK_MONOTONIC *last_rusage = RUsageCounters::current(); - if (auto query_context_ptr = query_context.lock()) + if (!internal_thread) { - const Settings & settings = query_context_ptr->getSettingsRef(); - if (settings.metrics_perf_events_enabled) + if (auto query_context_ptr = query_context.lock()) + { + const Settings & settings = query_context_ptr->getSettingsRef(); + if (settings.metrics_perf_events_enabled) + { + try + { + current_thread_counters.initializeProfileEvents( + settings.metrics_perf_events_list); + } + catch (...) + { + tryLogCurrentException(__PRETTY_FUNCTION__); + } + } + } + + if (!taskstats) { try { - current_thread_counters.initializeProfileEvents( - settings.metrics_perf_events_list); + taskstats = TasksStatsCounters::create(thread_id); } catch (...) { - tryLogCurrentException(__PRETTY_FUNCTION__); + tryLogCurrentException(log); } } + if (taskstats) + taskstats->reset(); } - - if (!taskstats) - { - try - { - taskstats = TasksStatsCounters::create(thread_id); - } - catch (...) - { - tryLogCurrentException(log); - } - } - if (taskstats) - taskstats->reset(); } void ThreadStatus::finalizePerformanceCounters() { - if (performance_counters_finalized) + if (performance_counters_finalized || internal_thread) return; performance_counters_finalized = true; @@ -270,7 +279,7 @@ void ThreadStatus::resetPerformanceCountersLastUsage() void ThreadStatus::initQueryProfiler() { - if (!query_profiler_enabled) + if (internal_thread) return; /// query profilers are useless without trace collector diff --git a/src/Interpreters/TreeRewriter.cpp b/src/Interpreters/TreeRewriter.cpp index da12dccd8d8..30fab527ac5 100644 --- a/src/Interpreters/TreeRewriter.cpp +++ b/src/Interpreters/TreeRewriter.cpp @@ -1,8 +1,8 @@ #include #include + #include #include - #include #include @@ -45,10 +45,10 @@ #include #include #include -#include #include #include +#include #include @@ -784,6 +784,67 @@ void collectJoinedColumns(TableJoin & analyzed_join, ASTTableJoin & table_join, } } +std::pair recursivelyCollectMaxOrdinaryExpressions(const ASTPtr & expr, ASTExpressionList & into) +{ + checkStackSize(); + + if (expr->as()) + { + into.children.push_back(expr); + return {false, 1}; + } + + auto * function = expr->as(); + + if (!function) + return {false, 0}; + + if (AggregateUtils::isAggregateFunction(*function)) + return {true, 0}; + + UInt64 pushed_children = 0; + bool has_aggregate = false; + + for (const auto & child : function->arguments->children) + { + auto [child_has_aggregate, child_pushed_children] = recursivelyCollectMaxOrdinaryExpressions(child, into); + has_aggregate |= child_has_aggregate; + pushed_children += child_pushed_children; + } + + /// The current function is not aggregate function and there is no aggregate function in its arguments, + /// so use the current function to replace its arguments + if (!has_aggregate) + { + for (UInt64 i = 0; i < pushed_children; i++) + into.children.pop_back(); + + into.children.push_back(expr); + pushed_children = 1; + } + + return {has_aggregate, pushed_children}; +} + +/** Expand GROUP BY ALL by extracting all the SELECT-ed expressions that are not aggregate functions. + * + * For a special case that if there is a function having both aggregate functions and other fields as its arguments, + * the `GROUP BY` keys will contain the maximum non-aggregate fields we can extract from it. + * + * Example: + * SELECT substring(a, 4, 2), substring(substring(a, 1, 2), 1, count(b)) FROM t GROUP BY ALL + * will expand as + * SELECT substring(a, 4, 2), substring(substring(a, 1, 2), 1, count(b)) FROM t GROUP BY substring(a, 4, 2), substring(a, 1, 2) + */ +void expandGroupByAll(ASTSelectQuery * select_query) +{ + auto group_expression_list = std::make_shared(); + + for (const auto & expr : select_query->select()->children) + recursivelyCollectMaxOrdinaryExpressions(expr, *group_expression_list); + + select_query->setExpression(ASTSelectQuery::Expression::GROUP_BY, group_expression_list); +} std::vector getAggregates(ASTPtr & query, const ASTSelectQuery & select_query) { @@ -1276,6 +1337,10 @@ TreeRewriterResultPtr TreeRewriter::analyzeSelect( normalize(query, result.aliases, all_source_columns_set, select_options.ignore_alias, settings, /* allow_self_aliases = */ true, getContext()); + // expand GROUP BY ALL + if (select_query->group_by_all) + expandGroupByAll(select_query); + /// Remove unneeded columns according to 'required_result_columns'. /// Leave all selected columns in case of DISTINCT; columns that contain arrayJoin function inside. /// Must be after 'normalizeTree' (after expanding aliases, for aliases not get lost) diff --git a/src/Parsers/ASTSelectQuery.cpp b/src/Parsers/ASTSelectQuery.cpp index 76849653b4e..e0e3b1a90c1 100644 --- a/src/Parsers/ASTSelectQuery.cpp +++ b/src/Parsers/ASTSelectQuery.cpp @@ -93,7 +93,7 @@ void ASTSelectQuery::formatImpl(const FormatSettings & s, FormatState & state, F where()->formatImpl(s, state, frame); } - if (groupBy()) + if (!group_by_all && groupBy()) { s.ostr << (s.hilite ? hilite_keyword : "") << s.nl_or_ws << indent_str << "GROUP BY" << (s.hilite ? hilite_none : ""); if (!group_by_with_grouping_sets) @@ -104,6 +104,9 @@ void ASTSelectQuery::formatImpl(const FormatSettings & s, FormatState & state, F } } + if (group_by_all) + s.ostr << (s.hilite ? hilite_keyword : "") << s.nl_or_ws << indent_str << "GROUP BY ALL" << (s.hilite ? hilite_none : ""); + if (group_by_with_rollup) s.ostr << (s.hilite ? hilite_keyword : "") << s.nl_or_ws << indent_str << (s.one_line ? "" : " ") << "WITH ROLLUP" << (s.hilite ? hilite_none : ""); diff --git a/src/Parsers/ASTSelectQuery.h b/src/Parsers/ASTSelectQuery.h index 5e3af545f12..3db8524c8b6 100644 --- a/src/Parsers/ASTSelectQuery.h +++ b/src/Parsers/ASTSelectQuery.h @@ -82,6 +82,7 @@ public: ASTPtr clone() const override; bool distinct = false; + bool group_by_all = false; bool group_by_with_totals = false; bool group_by_with_rollup = false; bool group_by_with_cube = false; diff --git a/src/Parsers/MySQL/ASTDropQuery.cpp b/src/Parsers/MySQL/ASTDropQuery.cpp new file mode 100644 index 00000000000..fb76d93363a --- /dev/null +++ b/src/Parsers/MySQL/ASTDropQuery.cpp @@ -0,0 +1,119 @@ +#include + +#include +#include +#include +#include +#include + +namespace DB +{ + +namespace MySQLParser +{ + +ASTPtr ASTDropQuery::clone() const +{ + auto res = std::make_shared(*this); + res->children.clear(); + res->is_truncate = is_truncate; + res->if_exists = if_exists; + return res; +} + +bool ParserDropQuery::parseImpl(IParser::Pos & pos, ASTPtr & node, Expected & expected) +{ + ParserKeyword s_drop("DROP"); + ParserKeyword s_truncate("TRUNCATE"); + ParserKeyword s_table("TABLE"); + ParserKeyword s_database("DATABASE"); + ParserKeyword s_if_exists("IF EXISTS"); + ParserKeyword s_view("VIEW"); + ParserKeyword on("ON"); + ParserIdentifier name_p(false); + + ParserKeyword s_event("EVENT"); + ParserKeyword s_function("FUNCTION"); + ParserKeyword s_index("INDEX"); + ParserKeyword s_server("SERVER"); + ParserKeyword s_trigger("TRIGGER"); + + auto query = std::make_shared(); + node = query; + ASTDropQuery::QualifiedNames names; + bool if_exists = false; + bool is_truncate = false; + + if (s_truncate.ignore(pos, expected) && s_table.ignore(pos, expected)) + { + is_truncate = true; + query->kind = ASTDropQuery::Kind::Table; + ASTDropQuery::QualifiedName name; + if (parseDatabaseAndTableName(pos, expected, name.schema, name.shortName)) + names.push_back(name); + else + return false; + } + else if (s_drop.ignore(pos, expected)) + { + if (s_database.ignore(pos, expected)) + { + query->kind = ASTDropQuery::Kind::Database; + if (s_if_exists.ignore(pos, expected)) + if_exists = true; + ASTPtr database; + if (!name_p.parse(pos, database, expected)) + return false; + } + else + { + if (s_view.ignore(pos, expected)) + query->kind = ASTDropQuery::Kind::View; + else if (s_table.ignore(pos, expected)) + query->kind = ASTDropQuery::Kind::Table; + else if (s_index.ignore(pos, expected)) + { + ASTPtr index; + query->kind = ASTDropQuery::Kind::Index; + if (!(name_p.parse(pos, index, expected) && on.ignore(pos, expected))) + return false; + } + else if (s_event.ignore(pos, expected) || s_function.ignore(pos, expected) || s_server.ignore(pos, expected) + || s_trigger.ignore(pos, expected)) + { + query->kind = ASTDropQuery::Kind::Other; + } + else + return false; + + if (s_if_exists.ignore(pos, expected)) + if_exists = true; + //parse name + auto parse_element = [&] + { + ASTDropQuery::QualifiedName element; + if (parseDatabaseAndTableName(pos, expected, element.schema, element.shortName)) + { + names.emplace_back(std::move(element)); + return true; + } + return false; + }; + + if (!ParserList::parseUtil(pos, expected, parse_element, false)) + return false; + } + } + else + return false; + + query->if_exists = if_exists; + query->names = names; + query->is_truncate = is_truncate; + + return true; +} + +} + +} diff --git a/src/Parsers/MySQL/ASTDropQuery.h b/src/Parsers/MySQL/ASTDropQuery.h new file mode 100644 index 00000000000..ff95277ae5e --- /dev/null +++ b/src/Parsers/MySQL/ASTDropQuery.h @@ -0,0 +1,64 @@ +#pragma once + +#include +#include +#include +#include + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int NOT_IMPLEMENTED; +} + +namespace MySQLParser +{ + +class ASTDropQuery : public IAST +{ +public: + enum Kind + { + Table, + View, + Database, + Index, + /// TRIGGER,FUNCTION,EVENT and so on, No need for support + Other, + }; + Kind kind; + struct QualifiedName + { + String schema; + String shortName; + }; + + using QualifiedNames = std::vector; + QualifiedNames names; + bool if_exists{false}; + //drop or truncate + bool is_truncate{false}; + + ASTPtr clone() const override; + String getID(char /*delim*/) const override {return "ASTDropQuery" ;} + +protected: + void formatImpl(const FormatSettings & /*settings*/, FormatState & /*state*/, FormatStateStacked /*frame*/) const override + { + throw Exception("Method formatImpl is not supported by MySQLParser::ASTDropQuery.", ErrorCodes::NOT_IMPLEMENTED); + } +}; + +class ParserDropQuery : public IParserBase +{ +protected: + const char * getName() const override { return "DROP query"; } + + bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override; +}; + +} + +} diff --git a/src/Parsers/ParserExternalDDLQuery.cpp b/src/Parsers/ParserExternalDDLQuery.cpp index 839838c4f54..5d6874f524d 100644 --- a/src/Parsers/ParserExternalDDLQuery.cpp +++ b/src/Parsers/ParserExternalDDLQuery.cpp @@ -11,6 +11,7 @@ #if USE_MYSQL # include # include +# include #endif namespace DB @@ -43,7 +44,7 @@ bool ParserExternalDDLQuery::parseImpl(IParser::Pos & pos, ASTPtr & node, Expect if (external_ddl_query->from->name == "MySQL") { #if USE_MYSQL - ParserDropQuery p_drop_query; + MySQLParser::ParserDropQuery p_drop_query; ParserRenameQuery p_rename_query; MySQLParser::ParserAlterQuery p_alter_query; MySQLParser::ParserCreateQuery p_create_query; diff --git a/src/Parsers/ParserSelectQuery.cpp b/src/Parsers/ParserSelectQuery.cpp index cf335270734..201cd750af8 100644 --- a/src/Parsers/ParserSelectQuery.cpp +++ b/src/Parsers/ParserSelectQuery.cpp @@ -195,6 +195,8 @@ bool ParserSelectQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) select_query->group_by_with_cube = true; else if (s_grouping_sets.ignore(pos, expected)) select_query->group_by_with_grouping_sets = true; + else if (s_all.ignore(pos, expected)) + select_query->group_by_all = true; if ((select_query->group_by_with_rollup || select_query->group_by_with_cube || select_query->group_by_with_grouping_sets) && !open_bracket.ignore(pos, expected)) @@ -205,7 +207,7 @@ bool ParserSelectQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) if (!grouping_sets_list.parse(pos, group_expression_list, expected)) return false; } - else + else if (!select_query->group_by_all) { if (!exp_list.parse(pos, group_expression_list, expected)) return false; diff --git a/src/Parsers/fuzzers/create_parser_fuzzer.cpp b/src/Parsers/fuzzers/create_parser_fuzzer.cpp index 032d9ca3ffe..13cb1dfd36e 100644 --- a/src/Parsers/fuzzers/create_parser_fuzzer.cpp +++ b/src/Parsers/fuzzers/create_parser_fuzzer.cpp @@ -15,6 +15,12 @@ try DB::ParserCreateQuery parser; DB::ASTPtr ast = parseQuery(parser, input.data(), input.data() + input.size(), "", 0, 1000); + const UInt64 max_ast_depth = 1000; + ast->checkDepth(max_ast_depth); + + const UInt64 max_ast_elements = 50000; + ast->checkSize(max_ast_elements); + DB::WriteBufferFromOwnString wb; DB::formatAST(*ast, wb); diff --git a/src/Planner/PlannerJoins.cpp b/src/Planner/PlannerJoins.cpp index 53b9cfc5d99..b59dccc92c2 100644 --- a/src/Planner/PlannerJoins.cpp +++ b/src/Planner/PlannerJoins.cpp @@ -87,8 +87,8 @@ void JoinClause::dump(WriteBuffer & buffer) const { const auto & asof_condition = asof_conditions[i]; - buffer << "key_index: " << asof_condition.key_index; - buffer << "inequality: " << toString(asof_condition.asof_inequality); + buffer << " key_index: " << asof_condition.key_index; + buffer << " inequality: " << toString(asof_condition.asof_inequality); if (i + 1 != asof_conditions_size) buffer << ','; diff --git a/src/Planner/TableExpressionData.h b/src/Planner/TableExpressionData.h index 57eaa28e072..4537065c58a 100644 --- a/src/Planner/TableExpressionData.h +++ b/src/Planner/TableExpressionData.h @@ -183,19 +183,19 @@ public: } private: - /// Valid for table, table function, query, union, array join table expression nodes + /// Valid for table, table function, array join, query, union nodes NamesAndTypesList columns; - /// Valid for table, table function, query, union, array join table expression nodes + /// Valid for table, table function, array join, query, union nodes NameSet columns_names; - /// Valid only for table table expression node + /// Valid only for table node NameSet alias_columns_names; - /// Valid for table, table function, query, union table, array join expression nodes + /// Valid for table, table function, array join, query, union nodes ColumnNameToColumnIdentifier column_name_to_column_identifier; - /// Valid for table, table function, query, union table, array join expression nodes + /// Valid for table, table function, array join, query, union nodes ColumnIdentifierToColumnName column_identifier_to_column_name; /// Is storage remote diff --git a/src/Processors/Formats/Impl/BSONEachRowRowInputFormat.cpp b/src/Processors/Formats/Impl/BSONEachRowRowInputFormat.cpp new file mode 100644 index 00000000000..878860aeb25 --- /dev/null +++ b/src/Processors/Formats/Impl/BSONEachRowRowInputFormat.cpp @@ -0,0 +1,978 @@ +#include + +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int INCORRECT_DATA; + extern const int ILLEGAL_COLUMN; + extern const int TOO_LARGE_STRING_SIZE; + extern const int UNKNOWN_TYPE; +} + +namespace +{ + enum + { + UNKNOWN_FIELD = size_t(-1), + }; +} + +BSONEachRowRowInputFormat::BSONEachRowRowInputFormat( + ReadBuffer & in_, const Block & header_, Params params_, const FormatSettings & format_settings_) + : IRowInputFormat(header_, in_, std::move(params_)) + , format_settings(format_settings_) + , name_map(header_.getNamesToIndexesMap()) + , prev_positions(header_.columns()) + , types(header_.getDataTypes()) +{ +} + +inline size_t BSONEachRowRowInputFormat::columnIndex(const StringRef & name, size_t key_index) +{ + /// Optimization by caching the order of fields (which is almost always the same) + /// and a quick check to match the next expected field, instead of searching the hash table. + + if (prev_positions.size() > key_index && prev_positions[key_index] && name == prev_positions[key_index]->getKey()) + { + return prev_positions[key_index]->getMapped(); + } + else + { + auto * it = name_map.find(name); + + if (it) + { + if (key_index < prev_positions.size()) + prev_positions[key_index] = it; + + return it->getMapped(); + } + else + return UNKNOWN_FIELD; + } +} + +/// Read the field name. Resulting StringRef is valid only before next read from buf. +static StringRef readBSONKeyName(ReadBuffer & in, String & key_holder) +{ + // This is just an optimization: try to avoid copying the name into key_holder + + if (!in.eof()) + { + char * next_pos = find_first_symbols<0>(in.position(), in.buffer().end()); + + if (next_pos != in.buffer().end()) + { + StringRef res(in.position(), next_pos - in.position()); + in.position() = next_pos + 1; + return res; + } + } + + key_holder.clear(); + readNullTerminated(key_holder, in); + return key_holder; +} + +static UInt8 readBSONType(ReadBuffer & in) +{ + UInt8 type; + readBinary(type, in); + return type; +} + +static size_t readBSONSize(ReadBuffer & in) +{ + BSONSizeT size; + readBinary(size, in); + return size; +} + +template +static void readAndInsertInteger(ReadBuffer & in, IColumn & column, const DataTypePtr & data_type, BSONType bson_type) +{ + /// We allow to read any integer into any integer column. + /// For example we can read BSON Int32 into ClickHouse UInt8. + + if (bson_type == BSONType::INT32) + { + UInt32 value; + readBinary(value, in); + assert_cast &>(column).insertValue(static_cast(value)); + } + else if (bson_type == BSONType::INT64) + { + UInt64 value; + readBinary(value, in); + assert_cast &>(column).insertValue(static_cast(value)); + } + else if (bson_type == BSONType::BOOL) + { + UInt8 value; + readBinary(value, in); + assert_cast &>(column).insertValue(static_cast(value)); + } + else + { + throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Cannot insert BSON {} into column with type {}", getBSONTypeName(bson_type), data_type->getName()); + } +} + +template +static void readAndInsertDouble(ReadBuffer & in, IColumn & column, const DataTypePtr & data_type, BSONType bson_type) +{ + if (bson_type != BSONType::DOUBLE) + throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Cannot insert BSON {} into column with type {}", getBSONTypeName(bson_type), data_type->getName()); + + Float64 value; + readBinary(value, in); + assert_cast &>(column).insertValue(static_cast(value)); +} + +template +static void readAndInsertSmallDecimal(ReadBuffer & in, IColumn & column, const DataTypePtr & data_type, BSONType bson_type) +{ + if (bson_type != expected_bson_type) + throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Cannot insert BSON {} into column with type {}", getBSONTypeName(bson_type), data_type->getName()); + + DecimalType value; + readBinary(value, in); + assert_cast &>(column).insertValue(value); +} + +static void readAndInsertDateTime64(ReadBuffer & in, IColumn & column, BSONType bson_type) +{ + if (bson_type != BSONType::INT64 && bson_type != BSONType::DATETIME) + throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Cannot insert BSON {} into DateTime64 column", getBSONTypeName(bson_type)); + + DateTime64 value; + readBinary(value, in); + assert_cast(column).insertValue(value); +} + +template +static void readAndInsertBigInteger(ReadBuffer & in, IColumn & column, const DataTypePtr & data_type, BSONType bson_type) +{ + if (bson_type != BSONType::BINARY) + throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Cannot insert BSON {} into column with type {}", getBSONTypeName(bson_type), data_type->getName()); + + auto size = readBSONSize(in); + auto subtype = getBSONBinarySubtype(readBSONType(in)); + if (subtype != BSONBinarySubtype::BINARY) + throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Cannot insert BSON Binary subtype {} into column with type {}", getBSONBinarySubtypeName(subtype), data_type->getName()); + + using ValueType = typename ColumnType::ValueType; + + if (size != sizeof(ValueType)) + throw Exception( + ErrorCodes::INCORRECT_DATA, + "Cannot parse value of type {}, size of binary data is not equal to the binary size of expected value: {} != {}", + data_type->getName(), + size, + sizeof(ValueType)); + + ValueType value; + readBinary(value, in); + assert_cast(column).insertValue(value); +} + +template +static void readAndInsertStringImpl(ReadBuffer & in, IColumn & column, size_t size) +{ + if constexpr (is_fixed_string) + { + auto & fixed_string_column = assert_cast(column); + size_t n = fixed_string_column.getN(); + if (size > n) + throw Exception("Too large string for FixedString column", ErrorCodes::TOO_LARGE_STRING_SIZE); + + auto & data = fixed_string_column.getChars(); + + size_t old_size = data.size(); + data.resize_fill(old_size + n); + + try + { + in.readStrict(reinterpret_cast(data.data() + old_size), size); + } + catch (...) + { + /// Restore column state in case of any exception. + data.resize_assume_reserved(old_size); + throw; + } + } + else + { + auto & column_string = assert_cast(column); + auto & data = column_string.getChars(); + auto & offsets = column_string.getOffsets(); + + size_t old_chars_size = data.size(); + size_t offset = old_chars_size + size + 1; + offsets.push_back(offset); + + try + { + data.resize(offset); + in.readStrict(reinterpret_cast(&data[offset - size - 1]), size); + data.back() = 0; + } + catch (...) + { + /// Restore column state in case of any exception. + offsets.pop_back(); + data.resize_assume_reserved(old_chars_size); + throw; + } + } +} + +template +static void readAndInsertString(ReadBuffer & in, IColumn & column, BSONType bson_type) +{ + if (bson_type == BSONType::STRING || bson_type == BSONType::SYMBOL || bson_type == BSONType::JAVA_SCRIPT_CODE) + { + auto size = readBSONSize(in); + readAndInsertStringImpl(in, column, size - 1); + assertChar(0, in); + } + else if (bson_type == BSONType::BINARY) + { + auto size = readBSONSize(in); + auto subtype = getBSONBinarySubtype(readBSONType(in)); + if (subtype == BSONBinarySubtype::BINARY || subtype == BSONBinarySubtype::BINARY_OLD) + readAndInsertStringImpl(in, column, size); + else + throw Exception( + ErrorCodes::ILLEGAL_COLUMN, + "Cannot insert BSON Binary subtype {} into String column", + getBSONBinarySubtypeName(subtype)); + } + else if (bson_type == BSONType::OBJECT_ID) + { + readAndInsertStringImpl(in, column, 12); + } + else + { + throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Cannot insert BSON {} into String column", getBSONTypeName(bson_type)); + } +} + +static void readAndInsertUUID(ReadBuffer & in, IColumn & column, BSONType bson_type) +{ + if (bson_type == BSONType::BINARY) + { + auto size = readBSONSize(in); + auto subtype = getBSONBinarySubtype(readBSONType(in)); + if (subtype == BSONBinarySubtype::UUID || subtype == BSONBinarySubtype::UUID_OLD) + { + if (size != sizeof(UUID)) + throw Exception( + ErrorCodes::INCORRECT_DATA, + "Cannot parse value of type UUID, size of binary data is not equal to the binary size of UUID value: {} != {}", + size, + sizeof(UUID)); + + UUID value; + readBinary(value, in); + assert_cast(column).insertValue(value); + } + else + { + throw Exception( + ErrorCodes::ILLEGAL_COLUMN, + "Cannot insert BSON Binary subtype {} into UUID column", + getBSONBinarySubtypeName(subtype)); + } + } + else + { + throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Cannot insert BSON {} into UUID column", getBSONTypeName(bson_type)); + } +} + +void BSONEachRowRowInputFormat::readArray(IColumn & column, const DataTypePtr & data_type, BSONType bson_type) +{ + if (bson_type != BSONType::ARRAY) + throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Cannot insert BSON {} into Array column", getBSONTypeName(bson_type)); + + const auto * data_type_array = assert_cast(data_type.get()); + const auto & nested_type = data_type_array->getNestedType(); + auto & array_column = assert_cast(column); + auto & nested_column = array_column.getData(); + + size_t document_start = in->count(); + BSONSizeT document_size; + readBinary(document_size, *in); + while (in->count() - document_start + sizeof(BSON_DOCUMENT_END) != document_size) + { + auto nested_bson_type = getBSONType(readBSONType(*in)); + readBSONKeyName(*in, current_key_name); + readField(nested_column, nested_type, nested_bson_type); + } + + assertChar(BSON_DOCUMENT_END, *in); + array_column.getOffsets().push_back(array_column.getData().size()); +} + +void BSONEachRowRowInputFormat::readTuple(IColumn & column, const DataTypePtr & data_type, BSONType bson_type) +{ + if (bson_type != BSONType::ARRAY && bson_type != BSONType::DOCUMENT) + throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Cannot insert BSON {} into Tuple column", getBSONTypeName(bson_type)); + + /// When BSON type is ARRAY, names in nested document are not useful + /// (most likely they are just sequential numbers). + bool use_key_names = bson_type == BSONType::DOCUMENT; + + const auto * data_type_tuple = assert_cast(data_type.get()); + auto & tuple_column = assert_cast(column); + size_t read_nested_columns = 0; + + size_t document_start = in->count(); + BSONSizeT document_size; + readBinary(document_size, *in); + while (in->count() - document_start + sizeof(BSON_DOCUMENT_END) != document_size) + { + auto nested_bson_type = getBSONType(readBSONType(*in)); + auto name = readBSONKeyName(*in, current_key_name); + + size_t index = read_nested_columns; + if (use_key_names) + { + auto try_get_index = data_type_tuple->tryGetPositionByName(name.toString()); + if (!try_get_index) + throw Exception( + ErrorCodes::INCORRECT_DATA, + "Cannot parse tuple column with type {} from BSON array/embedded document field: tuple doesn't have element with name \"{}\"", + data_type->getName(), + name); + index = *try_get_index; + } + + if (index >= data_type_tuple->getElements().size()) + throw Exception( + ErrorCodes::INCORRECT_DATA, + "Cannot parse tuple column with type {} from BSON array/embedded document field: the number of fields BSON document exceeds the number of fields in tuple", + data_type->getName()); + + readField(tuple_column.getColumn(index), data_type_tuple->getElement(index), nested_bson_type); + ++read_nested_columns; + } + + assertChar(BSON_DOCUMENT_END, *in); + + if (read_nested_columns != data_type_tuple->getElements().size()) + throw Exception( + ErrorCodes::INCORRECT_DATA, + "Cannot parse tuple column with type {} from BSON array/embedded document field, the number of fields in tuple and BSON document doesn't match: {} != {}", + data_type->getName(), + data_type_tuple->getElements().size(), + read_nested_columns); +} + +void BSONEachRowRowInputFormat::readMap(IColumn & column, const DataTypePtr & data_type, BSONType bson_type) +{ + if (bson_type != BSONType::DOCUMENT) + throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Cannot insert BSON {} into Map column", getBSONTypeName(bson_type)); + + const auto * data_type_map = assert_cast(data_type.get()); + const auto & key_data_type = data_type_map->getKeyType(); + if (!isStringOrFixedString(key_data_type)) + throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Only maps with String key type are supported in BSON, got key type: {}", key_data_type->getName()); + + const auto & value_data_type = data_type_map->getValueType(); + auto & column_map = assert_cast(column); + auto & key_column = column_map.getNestedData().getColumn(0); + auto & value_column = column_map.getNestedData().getColumn(1); + auto & offsets = column_map.getNestedColumn().getOffsets(); + + size_t document_start = in->count(); + BSONSizeT document_size; + readBinary(document_size, *in); + while (in->count() - document_start + sizeof(BSON_DOCUMENT_END) != document_size) + { + auto nested_bson_type = getBSONType(readBSONType(*in)); + auto name = readBSONKeyName(*in, current_key_name); + key_column.insertData(name.data, name.size); + readField(value_column, value_data_type, nested_bson_type); + } + + assertChar(BSON_DOCUMENT_END, *in); + offsets.push_back(key_column.size()); +} + + +bool BSONEachRowRowInputFormat::readField(IColumn & column, const DataTypePtr & data_type, BSONType bson_type) +{ + if (bson_type == BSONType::NULL_VALUE) + { + if (data_type->isNullable()) + { + column.insertDefault(); + return true; + } + + if (!format_settings.null_as_default) + throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Cannot insert BSON Null value into non-nullable column with type {}", getBSONTypeName(bson_type), data_type->getName()); + + column.insertDefault(); + return false; + } + + switch (data_type->getTypeId()) + { + case TypeIndex::Nullable: + { + auto & nullable_column = assert_cast(column); + auto & nested_column = nullable_column.getNestedColumn(); + const auto & nested_type = assert_cast(data_type.get())->getNestedType(); + nullable_column.getNullMapColumn().insertValue(0); + return readField(nested_column, nested_type, bson_type); + } + case TypeIndex::LowCardinality: + { + auto & lc_column = assert_cast(column); + auto tmp_column = lc_column.getDictionary().getNestedColumn()->cloneEmpty(); + const auto & dict_type = assert_cast(data_type.get())->getDictionaryType(); + auto res = readField(*tmp_column, dict_type, bson_type); + lc_column.insertFromFullColumn(*tmp_column, 0); + return res; + } + case TypeIndex::Int8: + { + readAndInsertInteger(*in, column, data_type, bson_type); + return true; + } + case TypeIndex::UInt8: + { + readAndInsertInteger(*in, column, data_type, bson_type); + return true; + } + case TypeIndex::Int16: + { + readAndInsertInteger(*in, column, data_type, bson_type); + return true; + } + case TypeIndex::Date: [[fallthrough]]; + case TypeIndex::UInt16: + { + readAndInsertInteger(*in, column, data_type, bson_type); + return true; + } + case TypeIndex::Date32: [[fallthrough]]; + case TypeIndex::Int32: + { + readAndInsertInteger(*in, column, data_type, bson_type); + return true; + } + case TypeIndex::DateTime: [[fallthrough]]; + case TypeIndex::UInt32: + { + readAndInsertInteger(*in, column, data_type, bson_type); + return true; + } + case TypeIndex::Int64: + { + readAndInsertInteger(*in, column, data_type, bson_type); + return true; + } + case TypeIndex::UInt64: + { + readAndInsertInteger(*in, column, data_type, bson_type); + return true; + } + case TypeIndex::Int128: + { + readAndInsertBigInteger(*in, column, data_type, bson_type); + return true; + } + case TypeIndex::UInt128: + { + readAndInsertBigInteger(*in, column, data_type, bson_type); + return true; + } + case TypeIndex::Int256: + { + readAndInsertBigInteger(*in, column, data_type, bson_type); + return true; + } + case TypeIndex::UInt256: + { + readAndInsertBigInteger(*in, column, data_type, bson_type); + return true; + } + case TypeIndex::Float32: + { + readAndInsertDouble(*in, column, data_type, bson_type); + return true; + } + case TypeIndex::Float64: + { + readAndInsertDouble(*in, column, data_type, bson_type); + return true; + } + case TypeIndex::Decimal32: + { + readAndInsertSmallDecimal(*in, column, data_type, bson_type); + return true; + } + case TypeIndex::Decimal64: + { + readAndInsertSmallDecimal(*in, column, data_type, bson_type); + return true; + } + case TypeIndex::Decimal128: + { + readAndInsertBigInteger>(*in, column, data_type, bson_type); + return true; + } + case TypeIndex::Decimal256: + { + readAndInsertBigInteger>(*in, column, data_type, bson_type); + return true; + } + case TypeIndex::DateTime64: + { + readAndInsertDateTime64(*in, column, bson_type); + return true; + } + case TypeIndex::FixedString: + { + readAndInsertString(*in, column, bson_type); + return true; + } + case TypeIndex::String: + { + readAndInsertString(*in, column, bson_type); + return true; + } + case TypeIndex::UUID: + { + readAndInsertUUID(*in, column, bson_type); + return true; + } + case TypeIndex::Array: + { + readArray(column, data_type, bson_type); + return true; + } + case TypeIndex::Tuple: + { + readTuple(column, data_type, bson_type); + return true; + } + case TypeIndex::Map: + { + readMap(column, data_type, bson_type); + return true; + } + default: + { + throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Type {} is not supported for output in BSON format", data_type->getName()); + } + } +} + +static void skipBSONField(ReadBuffer & in, BSONType type) +{ + switch (type) + { + case BSONType::DOUBLE: + { + in.ignore(sizeof(Float64)); + break; + } + case BSONType::BOOL: + { + in.ignore(sizeof(UInt8)); + break; + } + case BSONType::INT64: [[fallthrough]]; + case BSONType::DATETIME: [[fallthrough]]; + case BSONType::TIMESTAMP: + { + in.ignore(sizeof(UInt64)); + break; + } + case BSONType::INT32: + { + in.ignore(sizeof(Int32)); + break; + } + case BSONType::JAVA_SCRIPT_CODE: [[fallthrough]]; + case BSONType::SYMBOL: [[fallthrough]]; + case BSONType::STRING: + { + BSONSizeT size; + readBinary(size, in); + in.ignore(size); + break; + } + case BSONType::DOCUMENT: [[fallthrough]]; + case BSONType::ARRAY: + { + BSONSizeT size; + readBinary(size, in); + in.ignore(size - sizeof(size)); + break; + } + case BSONType::BINARY: + { + BSONSizeT size; + readBinary(size, in); + in.ignore(size + 1); + break; + } + case BSONType::MIN_KEY: [[fallthrough]]; + case BSONType::MAX_KEY: [[fallthrough]]; + case BSONType::UNDEFINED: [[fallthrough]]; + case BSONType::NULL_VALUE: + { + break; + } + case BSONType::OBJECT_ID: + { + in.ignore(12); + break; + } + case BSONType::REGEXP: + { + skipNullTerminated(in); + skipNullTerminated(in); + break; + } + case BSONType::DB_POINTER: + { + BSONSizeT size; + readBinary(size, in); + in.ignore(size + 12); + break; + } + case BSONType::JAVA_SCRIPT_CODE_W_SCOPE: + { + BSONSizeT size; + readBinary(size, in); + in.ignore(size - sizeof(size)); + break; + } + case BSONType::DECIMAL128: + { + in.ignore(16); + break; + } + } +} + +void BSONEachRowRowInputFormat::skipUnknownField(BSONType type, const String & key_name) +{ + if (!format_settings.skip_unknown_fields) + throw Exception(ErrorCodes::INCORRECT_DATA, "Unknown field found while parsing BSONEachRow format: {}", key_name); + + skipBSONField(*in, type); +} + +void BSONEachRowRowInputFormat::syncAfterError() +{ + /// Skip all remaining bytes in current document + size_t already_read_bytes = in->count() - current_document_start; + in->ignore(current_document_size - already_read_bytes); +} + +bool BSONEachRowRowInputFormat::readRow(MutableColumns & columns, RowReadExtension & ext) +{ + size_t num_columns = columns.size(); + + read_columns.assign(num_columns, false); + seen_columns.assign(num_columns, false); + + if (in->eof()) + return false; + + size_t key_index = 0; + + current_document_start = in->count(); + readBinary(current_document_size, *in); + while (in->count() - current_document_start + sizeof(BSON_DOCUMENT_END) != current_document_size) + { + auto type = getBSONType(readBSONType(*in)); + auto name = readBSONKeyName(*in, current_key_name); + auto index = columnIndex(name, key_index); + + if (index == UNKNOWN_FIELD) + { + current_key_name.assign(name.data, name.size); + skipUnknownField(BSONType(type), current_key_name); + } + else + { + seen_columns[index] = true; + read_columns[index] = readField(*columns[index], types[index], BSONType(type)); + } + + ++key_index; + } + + assertChar(BSON_DOCUMENT_END, *in); + + const auto & header = getPort().getHeader(); + /// Fill non-visited columns with the default values. + for (size_t i = 0; i < num_columns; ++i) + if (!seen_columns[i]) + header.getByPosition(i).type->insertDefaultInto(*columns[i]); + + if (format_settings.defaults_for_omitted_fields) + ext.read_columns = read_columns; + else + ext.read_columns.assign(read_columns.size(), true); + + return true; +} + +BSONEachRowSchemaReader::BSONEachRowSchemaReader(ReadBuffer & in_, const FormatSettings & settings_) + : IRowWithNamesSchemaReader(in_, settings_) +{ +} + +DataTypePtr BSONEachRowSchemaReader::getDataTypeFromBSONField(BSONType type, bool allow_to_skip_unsupported_types, bool & skip) +{ + switch (type) + { + case BSONType::DOUBLE: + { + in.ignore(sizeof(Float64)); + return makeNullable(std::make_shared()); + } + case BSONType::BOOL: + { + in.ignore(sizeof(UInt8)); + return makeNullable(DataTypeFactory::instance().get("Bool")); + } + case BSONType::INT64: + { + in.ignore(sizeof(Int64)); + return makeNullable(std::make_shared()); + } + case BSONType::DATETIME: + { + in.ignore(sizeof(Int64)); + return makeNullable(std::make_shared(6, "UTC")); + } + case BSONType::INT32: + { + in.ignore(sizeof(Int32)); + return makeNullable(std::make_shared()); + } + case BSONType::SYMBOL: [[fallthrough]]; + case BSONType::JAVA_SCRIPT_CODE: [[fallthrough]]; + case BSONType::OBJECT_ID: [[fallthrough]]; + case BSONType::STRING: + { + BSONSizeT size; + readBinary(size, in); + in.ignore(size); + return makeNullable(std::make_shared()); + } + case BSONType::DOCUMENT: + { + auto nested_names_and_types = getDataTypesFromBSONDocument(false); + auto nested_types = nested_names_and_types.getTypes(); + bool types_are_equal = true; + if (nested_types.empty() || !nested_types[0]) + return nullptr; + + for (size_t i = 1; i != nested_types.size(); ++i) + { + if (!nested_types[i]) + return nullptr; + + types_are_equal &= nested_types[i]->equals(*nested_types[0]); + } + + if (types_are_equal) + return std::make_shared(std::make_shared(), nested_types[0]); + + return std::make_shared(std::move(nested_types), nested_names_and_types.getNames()); + + } + case BSONType::ARRAY: + { + auto nested_types = getDataTypesFromBSONDocument(false).getTypes(); + bool types_are_equal = true; + if (nested_types.empty() || !nested_types[0]) + return nullptr; + + for (size_t i = 1; i != nested_types.size(); ++i) + { + if (!nested_types[i]) + return nullptr; + + types_are_equal &= nested_types[i]->equals(*nested_types[0]); + } + + if (types_are_equal) + return std::make_shared(nested_types[0]); + + return std::make_shared(std::move(nested_types)); + } + case BSONType::BINARY: + { + BSONSizeT size; + readBinary(size, in); + auto subtype = getBSONBinarySubtype(readBSONType(in)); + in.ignore(size); + switch (subtype) + { + case BSONBinarySubtype::BINARY_OLD: [[fallthrough]]; + case BSONBinarySubtype::BINARY: + return makeNullable(std::make_shared()); + case BSONBinarySubtype::UUID_OLD: [[fallthrough]]; + case BSONBinarySubtype::UUID: + return makeNullable(std::make_shared()); + default: + throw Exception(ErrorCodes::UNKNOWN_TYPE, "BSON binary subtype {} is not supported", getBSONBinarySubtypeName(subtype)); + } + } + case BSONType::NULL_VALUE: + { + return nullptr; + } + default: + { + if (!allow_to_skip_unsupported_types) + throw Exception(ErrorCodes::UNKNOWN_TYPE, "BSON type {} is not supported", getBSONTypeName(type)); + + skip = true; + skipBSONField(in, type); + return nullptr; + } + } +} + +NamesAndTypesList BSONEachRowSchemaReader::getDataTypesFromBSONDocument(bool allow_to_skip_unsupported_types) +{ + size_t document_start = in.count(); + BSONSizeT document_size; + readBinary(document_size, in); + NamesAndTypesList names_and_types; + while (in.count() - document_start + sizeof(BSON_DOCUMENT_END) != document_size) + { + auto bson_type = getBSONType(readBSONType(in)); + String name; + readNullTerminated(name, in); + bool skip = false; + auto type = getDataTypeFromBSONField(bson_type, allow_to_skip_unsupported_types, skip); + if (!skip) + names_and_types.emplace_back(name, type); + } + + assertChar(BSON_DOCUMENT_END, in); + + return names_and_types; +} + +NamesAndTypesList BSONEachRowSchemaReader::readRowAndGetNamesAndDataTypes(bool & eof) +{ + if (in.eof()) + { + eof = true; + return {}; + } + + return getDataTypesFromBSONDocument(format_settings.bson.skip_fields_with_unsupported_types_in_schema_inference); +} + +void BSONEachRowSchemaReader::transformTypesIfNeeded(DataTypePtr & type, DataTypePtr & new_type) +{ + DataTypes types = {type, new_type}; + /// For example for integer conversion Int32, + auto least_supertype = tryGetLeastSupertype(types); + if (least_supertype) + type = new_type = least_supertype; +} + +static std::pair +fileSegmentationEngineBSONEachRow(ReadBuffer & in, DB::Memory<> & memory, size_t min_bytes, size_t max_rows) +{ + size_t number_of_rows = 0; + + while (!in.eof() && memory.size() < min_bytes && number_of_rows < max_rows) + { + BSONSizeT document_size; + readBinary(document_size, in); + if (min_bytes != 0 && document_size > 10 * min_bytes) + throw ParsingException( + ErrorCodes::INCORRECT_DATA, + "Size of BSON document is extremely large. Expected not greater than {} bytes, but current is {} bytes per row. Increase " + "the value setting 'min_chunk_bytes_for_parallel_parsing' or check your data manually, most likely BSON is malformed", + min_bytes, document_size); + + size_t old_size = memory.size(); + memory.resize(old_size + document_size); + memcpy(memory.data() + old_size, reinterpret_cast(&document_size), sizeof(document_size)); + in.readStrict(memory.data() + old_size + sizeof(document_size), document_size - sizeof(document_size)); + ++number_of_rows; + } + + return {!in.eof(), number_of_rows}; +} + +void registerInputFormatBSONEachRow(FormatFactory & factory) +{ + factory.registerInputFormat( + "BSONEachRow", + [](ReadBuffer & buf, const Block & sample, IRowInputFormat::Params params, const FormatSettings & settings) + { return std::make_shared(buf, sample, std::move(params), settings); }); +} + +void registerFileSegmentationEngineBSONEachRow(FormatFactory & factory) +{ + factory.registerFileSegmentationEngine("BSONEachRow", &fileSegmentationEngineBSONEachRow); +} + +void registerBSONEachRowSchemaReader(FormatFactory & factory) +{ + factory.registerSchemaReader("BSONEachRow", [](ReadBuffer & buf, const FormatSettings & settings) + { + return std::make_unique(buf, settings); + }); + factory.registerAdditionalInfoForSchemaCacheGetter("BSONEachRow", [](const FormatSettings & settings) + { + String result = getAdditionalFormatInfoForAllRowBasedFormats(settings); + return result + fmt::format(", skip_fields_with_unsupported_types_in_schema_inference={}", + settings.bson.skip_fields_with_unsupported_types_in_schema_inference); + }); +} + +} diff --git a/src/Processors/Formats/Impl/BSONEachRowRowInputFormat.h b/src/Processors/Formats/Impl/BSONEachRowRowInputFormat.h new file mode 100644 index 00000000000..d0830ca2781 --- /dev/null +++ b/src/Processors/Formats/Impl/BSONEachRowRowInputFormat.h @@ -0,0 +1,115 @@ +#pragma once + +#include +#include +#include +#include +#include +#include + + +namespace DB +{ + +/* + * Class for parsing data in BSON format. + * Each row is parsed as a separate BSON document. + * Each column is parsed as a single field with column name as a key. + * It uses the following correspondence between BSON types and ClickHouse types: + * + * BSON Type | ClickHouse Type + * \x01 double | Float32/Float64 + * \x02 string | String/FixedString + * \x03 document | Map/Named Tuple + * \x04 array | Array/Tuple + * \x05 binary, \x00 binary subtype | String/FixedString + * \x05 binary, \x02 old binary subtype | String/FixedString + * \x05 binary, \x03 old uuid subtype | UUID + * \x05 binary, \x04 uuid subtype | UUID + * \x07 ObjectId | String + * \x08 boolean | Bool + * \x09 datetime | DateTime64 + * \x0A null value | NULL + * \x0D JavaScript code | String + * \x0E symbol | String/FixedString + * \x10 int32 | Int32/Decimal32 + * \x12 int64 | Int64/Decimal64/DateTime64 + * \x11 uint64 | UInt64 + * + * Other BSON types are not supported. + * Also, we perform conversion between different integer types + * (for example, you can insert BSON int32 value into ClickHouse UInt8) + * Big integers and decimals Int128/UInt128/Int256/UInt256/Decimal128/Decimal256 + * can be parsed from BSON Binary value with \x00 binary subtype. In this case + * we validate that the size of binary data equals the size of expected value. + * + * Note: this format will not work on Big-Endian platforms. + */ + +class ReadBuffer; +class BSONEachRowRowInputFormat final : public IRowInputFormat +{ +public: + BSONEachRowRowInputFormat( + ReadBuffer & in_, const Block & header_, Params params_, const FormatSettings & format_settings_); + + String getName() const override { return "BSONEachRowRowInputFormat"; } + void resetParser() override { } + +private: + void readPrefix() override { } + void readSuffix() override { } + + bool readRow(MutableColumns & columns, RowReadExtension & ext) override; + bool allowSyncAfterError() const override { return true; } + void syncAfterError() override; + + size_t columnIndex(const StringRef & name, size_t key_index); + + using ColumnReader = std::function; + + bool readField(IColumn & column, const DataTypePtr & data_type, BSONType bson_type); + void skipUnknownField(BSONType type, const String & key_name); + + void readTuple(IColumn & column, const DataTypePtr & data_type, BSONType bson_type); + void readArray(IColumn & column, const DataTypePtr & data_type, BSONType bson_type); + void readMap(IColumn & column, const DataTypePtr & data_type, BSONType bson_type); + + const FormatSettings format_settings; + + /// Buffer for the read from the stream field name. Used when you have to copy it. + String current_key_name; + + /// Set of columns for which the values were read. The rest will be filled with default values. + std::vector read_columns; + /// Set of columns which already met in row. Exception is thrown if there are more than one column with the same name. + std::vector seen_columns; + /// These sets may be different, because if null_as_default=1 read_columns[i] will be false and seen_columns[i] will be true + /// for row like {..., "non-nullable column name" : null, ...} + + /// Hash table match `field name -> position in the block`. + Block::NameMap name_map; + + /// Cached search results for previous row (keyed as index in JSON object) - used as a hint. + std::vector prev_positions; + + DataTypes types; + + size_t current_document_start; + BSONSizeT current_document_size; +}; + +class BSONEachRowSchemaReader : public IRowWithNamesSchemaReader +{ +public: + BSONEachRowSchemaReader(ReadBuffer & in_, const FormatSettings & settings_); + +private: + NamesAndTypesList readRowAndGetNamesAndDataTypes(bool & eof) override; + void transformTypesIfNeeded(DataTypePtr & type, DataTypePtr & new_type) override; + + NamesAndTypesList getDataTypesFromBSONDocument(bool skip_unsupported_types); + DataTypePtr getDataTypeFromBSONField(BSONType type, bool skip_unsupported_types, bool & skip); +}; + +} diff --git a/src/Processors/Formats/Impl/BSONEachRowRowOutputFormat.cpp b/src/Processors/Formats/Impl/BSONEachRowRowOutputFormat.cpp new file mode 100644 index 00000000000..c296114a6e7 --- /dev/null +++ b/src/Processors/Formats/Impl/BSONEachRowRowOutputFormat.cpp @@ -0,0 +1,527 @@ +#include + +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include + +#include +#include + + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int INCORRECT_DATA; + extern const int ILLEGAL_COLUMN; + extern const int LOGICAL_ERROR; +} + +/// In BSON all names should be valid UTF8 sequences +static String toValidUTF8String(const String & name) +{ + WriteBufferFromOwnString buf; + WriteBufferValidUTF8 validating_buf(buf); + writeString(name, validating_buf); + validating_buf.finalize(); + return buf.str(); +} + +BSONEachRowRowOutputFormat::BSONEachRowRowOutputFormat( + WriteBuffer & out_, const Block & header_, const RowOutputFormatParams & params_, const FormatSettings & settings_) + : IRowOutputFormat(header_, out_, params_), settings(settings_) +{ + const auto & sample = getPort(PortKind::Main).getHeader(); + fields.reserve(sample.columns()); + for (const auto & field : sample.getNamesAndTypes()) + fields.emplace_back(toValidUTF8String(field.name), field.type); +} + +static void writeBSONSize(size_t size, WriteBuffer & buf) +{ + if (size > MAX_BSON_SIZE) + throw Exception(ErrorCodes::INCORRECT_DATA, "Too large document/value size: {}. Maximum allowed size: {}.", size, MAX_BSON_SIZE); + + writePODBinary(BSONSizeT(size), buf); +} + +template +static void writeBSONType(Type type, WriteBuffer & buf) +{ + UInt8 value = UInt8(type); + writeBinary(value, buf); +} + +static void writeBSONTypeAndKeyName(BSONType type, const String & name, WriteBuffer & buf) +{ + writeBSONType(type, buf); + writeString(name, buf); + writeChar(0x00, buf); +} + +template +static void writeBSONNumber(BSONType type, const IColumn & column, size_t row_num, const String & name, WriteBuffer & buf) +{ + writeBSONTypeAndKeyName(type, name, buf); + writePODBinary(assert_cast(column).getElement(row_num), buf); +} + +template +static void writeBSONString(const IColumn & column, size_t row_num, const String & name, WriteBuffer & buf, bool as_bson_string) +{ + const auto & string_column = assert_cast(column); + StringRef data = string_column.getDataAt(row_num); + if (as_bson_string) + { + writeBSONTypeAndKeyName(BSONType::STRING, name, buf); + writeBSONSize(data.size + 1, buf); + writeString(data, buf); + writeChar(0x00, buf); + } + else + { + writeBSONTypeAndKeyName(BSONType::BINARY, name, buf); + writeBSONSize(data.size, buf); + writeBSONType(BSONBinarySubtype::BINARY, buf); + writeString(data, buf); + } +} + +template +static void writeBSONBigInteger(const IColumn & column, size_t row_num, const String & name, WriteBuffer & buf) +{ + writeBSONTypeAndKeyName(BSONType::BINARY, name, buf); + writeBSONSize(sizeof(typename ColumnType::ValueType), buf); + writeBSONType(BSONBinarySubtype::BINARY, buf); + auto data = assert_cast(column).getDataAt(row_num); + buf.write(data.data, data.size); +} + +size_t BSONEachRowRowOutputFormat::countBSONFieldSize(const IColumn & column, const DataTypePtr & data_type, size_t row_num, const String & name) +{ + size_t size = 1; // Field type + size += name.size() + 1; // Field name and \0 + switch (column.getDataType()) + { + case TypeIndex::Int8: [[fallthrough]]; + case TypeIndex::Int16: [[fallthrough]]; + case TypeIndex::UInt16: [[fallthrough]]; + case TypeIndex::Date: [[fallthrough]]; + case TypeIndex::Date32: [[fallthrough]]; + case TypeIndex::Decimal32: [[fallthrough]]; + case TypeIndex::Int32: + { + return size + sizeof(Int32); + } + case TypeIndex::UInt8: + { + if (isBool(data_type)) + return size + 1; + + return size + sizeof(Int32); + } + case TypeIndex::Float32: [[fallthrough]]; + case TypeIndex::Float64: [[fallthrough]]; + case TypeIndex::UInt32: [[fallthrough]]; + case TypeIndex::Int64: [[fallthrough]]; + case TypeIndex::UInt64: [[fallthrough]]; + case TypeIndex::DateTime: [[fallthrough]]; + case TypeIndex::Decimal64: [[fallthrough]]; + case TypeIndex::DateTime64: + { + return size + sizeof(UInt64); + } + case TypeIndex::Int128: [[fallthrough]]; + case TypeIndex::UInt128: [[fallthrough]]; + case TypeIndex::Decimal128: + { + return size + sizeof(BSONSizeT) + 1 + sizeof(UInt128); // Size of a binary + binary subtype + 16 bytes of value + } + case TypeIndex::Int256: [[fallthrough]]; + case TypeIndex::UInt256: [[fallthrough]]; + case TypeIndex::Decimal256: + { + return size + sizeof(BSONSizeT) + 1 + sizeof(UInt256); // Size of a binary + binary subtype + 32 bytes of value + } + case TypeIndex::String: + { + const auto & string_column = assert_cast(column); + return size + sizeof(BSONSizeT) + string_column.getDataAt(row_num).size + 1; // Size of data + data + \0 or BSON subtype (in case of BSON binary) + } + case TypeIndex::FixedString: + { + const auto & string_column = assert_cast(column); + return size + sizeof(BSONSizeT) + string_column.getN() + 1; // Size of data + data + \0 or BSON subtype (in case of BSON binary) + } + case TypeIndex::UUID: + { + return size + sizeof(BSONSizeT) + 1 + sizeof(UUID); // Size of data + BSON binary subtype + 16 bytes of value + } + case TypeIndex::LowCardinality: + { + const auto & lc_column = assert_cast(column); + auto dict_type = assert_cast(data_type.get())->getDictionaryType(); + auto dict_column = lc_column.getDictionary().getNestedColumn(); + size_t index = lc_column.getIndexAt(row_num); + return countBSONFieldSize(*dict_column, dict_type, index, name); + } + case TypeIndex::Nullable: + { + auto nested_type = removeNullable(data_type); + const ColumnNullable & column_nullable = assert_cast(column); + if (column_nullable.isNullAt(row_num)) + return size; /// Null has no value, just type + return countBSONFieldSize(column_nullable.getNestedColumn(), nested_type, row_num, name); + } + case TypeIndex::Array: + { + size += sizeof(BSONSizeT); // Size of a document + + const auto & nested_type = assert_cast(data_type.get())->getNestedType(); + const ColumnArray & column_array = assert_cast(column); + const IColumn & nested_column = column_array.getData(); + const ColumnArray::Offsets & offsets = column_array.getOffsets(); + size_t offset = offsets[row_num - 1]; + size_t array_size = offsets[row_num] - offset; + + for (size_t i = 0; i < array_size; ++i) + size += countBSONFieldSize(nested_column, nested_type, offset + i, std::to_string(i)); // Add size of each value from array + + return size + sizeof(BSON_DOCUMENT_END); // Add final \0 + } + case TypeIndex::Tuple: + { + size += sizeof(BSONSizeT); // Size of a document + + const auto * tuple_type = assert_cast(data_type.get()); + const auto & nested_types = tuple_type->getElements(); + bool have_explicit_names = tuple_type->haveExplicitNames(); + const auto & nested_names = tuple_type->getElementNames(); + const auto & tuple_column = assert_cast(column); + const auto & nested_columns = tuple_column.getColumns(); + + for (size_t i = 0; i < nested_columns.size(); ++i) + { + String key_name = have_explicit_names ? toValidUTF8String(nested_names[i]) : std::to_string(i); + size += countBSONFieldSize(*nested_columns[i], nested_types[i], row_num, key_name); // Add size of each value from tuple + } + + return size + sizeof(BSON_DOCUMENT_END); // Add final \0 + } + case TypeIndex::Map: + { + size += sizeof(BSONSizeT); // Size of a document + + const auto & map_type = assert_cast(*data_type); + if (!isStringOrFixedString(map_type.getKeyType())) + throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Only maps with String key type are supported in BSON, got key type: {}", map_type.getKeyType()->getName()); + const auto & value_type = map_type.getValueType(); + + const auto & map_column = assert_cast(column); + const auto & nested_column = map_column.getNestedColumn(); + const auto & key_value_columns = map_column.getNestedData().getColumns(); + const auto & key_column = key_value_columns[0]; + const auto & value_column = key_value_columns[1]; + const auto & offsets = nested_column.getOffsets(); + size_t offset = offsets[row_num - 1]; + size_t map_size = offsets[row_num] - offset; + + for (size_t i = 0; i < map_size; ++i) + { + String key = toValidUTF8String(key_column->getDataAt(offset + i).toString()); + size += countBSONFieldSize(*value_column, value_type, offset + i, key); + } + + return size + sizeof(BSON_DOCUMENT_END); // Add final \0 + } + default: + throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Type {} is not supported in BSON output format", data_type->getName()); + } +} + +void BSONEachRowRowOutputFormat::serializeField(const IColumn & column, const DataTypePtr & data_type, size_t row_num, const String & name) +{ + switch (column.getDataType()) + { + case TypeIndex::Float32: + { + writeBSONNumber(BSONType::DOUBLE, column, row_num, name, out); + break; + } + case TypeIndex::Float64: + { + writeBSONNumber(BSONType::DOUBLE, column, row_num, name, out); + break; + } + case TypeIndex::Int8: + { + writeBSONNumber(BSONType::INT32, column, row_num, name, out); + break; + } + case TypeIndex::UInt8: + { + if (isBool(data_type)) + writeBSONNumber(BSONType::BOOL, column, row_num, name, out); + else + writeBSONNumber(BSONType::INT32, column, row_num, name, out); + break; + } + case TypeIndex::Int16: + { + writeBSONNumber(BSONType::INT32, column, row_num, name, out); + break; + } + case TypeIndex::Date: [[fallthrough]]; + case TypeIndex::UInt16: + { + writeBSONNumber(BSONType::INT32, column, row_num, name, out); + break; + } + case TypeIndex::Date32: [[fallthrough]]; + case TypeIndex::Int32: + { + writeBSONNumber(BSONType::INT32, column, row_num, name, out); + break; + } + case TypeIndex::DateTime: [[fallthrough]]; + case TypeIndex::UInt32: + { + writeBSONNumber(BSONType::INT64, column, row_num, name, out); + break; + } + case TypeIndex::Int64: + { + writeBSONNumber(BSONType::INT64, column, row_num, name, out); + break; + } + case TypeIndex::UInt64: + { + writeBSONNumber(BSONType::INT64, column, row_num, name, out); + break; + } + case TypeIndex::Int128: + { + writeBSONBigInteger(column, row_num, name, out); + break; + } + case TypeIndex::UInt128: + { + writeBSONBigInteger(column, row_num, name, out); + break; + } + case TypeIndex::Int256: + { + writeBSONBigInteger(column, row_num, name, out); + break; + } + case TypeIndex::UInt256: + { + writeBSONBigInteger(column, row_num, name, out); + break; + } + case TypeIndex::Decimal32: + { + writeBSONNumber, Decimal32>(BSONType::INT32, column, row_num, name, out); + break; + } + case TypeIndex::DateTime64: + { + writeBSONNumber, Decimal64>(BSONType::DATETIME, column, row_num, name, out); + break; + } + case TypeIndex::Decimal64: + { + writeBSONNumber, Decimal64>(BSONType::INT64, column, row_num, name, out); + break; + } + case TypeIndex::Decimal128: + { + writeBSONBigInteger>(column, row_num, name, out); + break; + } + case TypeIndex::Decimal256: + { + writeBSONBigInteger>(column, row_num, name, out); + break; + } + case TypeIndex::String: + { + writeBSONString(column, row_num, name, out, settings.bson.output_string_as_string); + break; + } + case TypeIndex::FixedString: + { + writeBSONString(column, row_num, name, out, settings.bson.output_string_as_string); + break; + } + case TypeIndex::UUID: + { + writeBSONTypeAndKeyName(BSONType::BINARY, name, out); + writeBSONSize(sizeof(UUID), out); + writeBSONType(BSONBinarySubtype::UUID, out); + writeBinary(assert_cast(column).getElement(row_num), out); + break; + } + case TypeIndex::LowCardinality: + { + const auto & lc_column = assert_cast(column); + auto dict_type = assert_cast(data_type.get())->getDictionaryType(); + auto dict_column = lc_column.getDictionary().getNestedColumn(); + size_t index = lc_column.getIndexAt(row_num); + serializeField(*dict_column, dict_type, index, name); + break; + } + case TypeIndex::Nullable: + { + auto nested_type = removeNullable(data_type); + const ColumnNullable & column_nullable = assert_cast(column); + if (!column_nullable.isNullAt(row_num)) + serializeField(column_nullable.getNestedColumn(), nested_type, row_num, name); + else + writeBSONTypeAndKeyName(BSONType::NULL_VALUE, name, out); + break; + } + case TypeIndex::Array: + { + const auto & nested_type = assert_cast(data_type.get())->getNestedType(); + const ColumnArray & column_array = assert_cast(column); + const IColumn & nested_column = column_array.getData(); + const ColumnArray::Offsets & offsets = column_array.getOffsets(); + size_t offset = offsets[row_num - 1]; + size_t array_size = offsets[row_num] - offset; + + writeBSONTypeAndKeyName(BSONType::ARRAY, name, out); + + size_t document_size = sizeof(BSONSizeT); + for (size_t i = 0; i < array_size; ++i) + document_size += countBSONFieldSize(nested_column, nested_type, offset + i, std::to_string(i)); // Add size of each value from array + document_size += sizeof(BSON_DOCUMENT_END); // Add final \0 + + writeBSONSize(document_size, out); + + for (size_t i = 0; i < array_size; ++i) + serializeField(nested_column, nested_type, offset + i, std::to_string(i)); + + writeChar(BSON_DOCUMENT_END, out); + break; + } + case TypeIndex::Tuple: + { + const auto * tuple_type = assert_cast(data_type.get()); + const auto & nested_types = tuple_type->getElements(); + bool have_explicit_names = tuple_type->haveExplicitNames(); + const auto & nested_names = tuple_type->getElementNames(); + const auto & tuple_column = assert_cast(column); + const auto & nested_columns = tuple_column.getColumns(); + + BSONType bson_type = have_explicit_names ? BSONType::DOCUMENT : BSONType::ARRAY; + writeBSONTypeAndKeyName(bson_type, name, out); + + size_t document_size = sizeof(BSONSizeT); + for (size_t i = 0; i < nested_columns.size(); ++i) + { + String key_name = have_explicit_names ? toValidUTF8String(nested_names[i]) : std::to_string(i); + document_size += countBSONFieldSize(*nested_columns[i], nested_types[i], row_num, key_name); // Add size of each value from tuple + } + document_size += sizeof(BSON_DOCUMENT_END); // Add final \0 + + writeBSONSize(document_size, out); + + for (size_t i = 0; i < nested_columns.size(); ++i) + serializeField(*nested_columns[i], nested_types[i], row_num, toValidUTF8String(nested_names[i])); + + writeChar(BSON_DOCUMENT_END, out); + break; + } + case TypeIndex::Map: + { + const auto & map_type = assert_cast(*data_type); + if (!isStringOrFixedString(map_type.getKeyType())) + throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Only maps with String key type are supported in BSON, got key type: {}", map_type.getKeyType()->getName()); + const auto & value_type = map_type.getValueType(); + + const auto & map_column = assert_cast(column); + const auto & nested_column = map_column.getNestedColumn(); + const auto & key_value_columns = map_column.getNestedData().getColumns(); + const auto & key_column = key_value_columns[0]; + const auto & value_column = key_value_columns[1]; + const auto & offsets = nested_column.getOffsets(); + size_t offset = offsets[row_num - 1]; + size_t map_size = offsets[row_num] - offset; + + writeBSONTypeAndKeyName(BSONType::DOCUMENT, name, out); + + size_t document_size = sizeof(BSONSizeT); + for (size_t i = 0; i < map_size; ++i) + { + String key = toValidUTF8String(key_column->getDataAt(offset + i).toString()); + document_size += countBSONFieldSize(*value_column, value_type, offset + i, key); + } + document_size += sizeof(BSON_DOCUMENT_END); + + writeBSONSize(document_size, out); + + for (size_t i = 0; i < map_size; ++i) + { + String key = toValidUTF8String(key_column->getDataAt(offset + i).toString()); + serializeField(*value_column, value_type, offset + i, key); + } + + writeChar(BSON_DOCUMENT_END, out); + break; + } + default: + throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Type {} is not supported in BSON output format", data_type->getName()); + } +} + +void BSONEachRowRowOutputFormat::write(const Columns & columns, size_t row_num) +{ + /// We should calculate and write document size before its content + size_t document_size = sizeof(BSONSizeT); + for (size_t i = 0; i != columns.size(); ++i) + document_size += countBSONFieldSize(*columns[i], fields[i].type, row_num, fields[i].name); + document_size += sizeof(BSON_DOCUMENT_END); + + size_t document_start = out.count(); + writeBSONSize(document_size, out); + + for (size_t i = 0; i != columns.size(); ++i) + serializeField(*columns[i], fields[i].type, row_num, fields[i].name); + + writeChar(BSON_DOCUMENT_END, out); + + size_t actual_document_size = out.count() - document_start; + if (actual_document_size != document_size) + throw Exception( + ErrorCodes::LOGICAL_ERROR, + "The actual size of the BSON document does not match the estimated size: {} != {}", + actual_document_size, + document_size); +} + +void registerOutputFormatBSONEachRow(FormatFactory & factory) +{ + factory.registerOutputFormat( + "BSONEachRow", + [](WriteBuffer & buf, const Block & sample, const RowOutputFormatParams & params, const FormatSettings & _format_settings) + { return std::make_shared(buf, sample, params, _format_settings); }); + factory.markOutputFormatSupportsParallelFormatting("BSONEachRow"); +} + +} diff --git a/src/Processors/Formats/Impl/BSONEachRowRowOutputFormat.h b/src/Processors/Formats/Impl/BSONEachRowRowOutputFormat.h new file mode 100644 index 00000000000..f2252cabebe --- /dev/null +++ b/src/Processors/Formats/Impl/BSONEachRowRowOutputFormat.h @@ -0,0 +1,69 @@ +#pragma once + +#include +#include +#include +#include +#include + +namespace DB +{ + +/* + * Class for formatting data in BSON format. + * Each row is formatted as a separate BSON document. + * Each column is formatted as a single field with column name as a key. + * It uses the following correspondence between ClickHouse types and BSON types: + * + * ClickHouse type | BSON Type + * Bool | \x08 boolean + * Int8/UInt8 | \x10 int32 + * Int16UInt16 | \x10 int32 + * Int32 | \x10 int32 + * UInt32 | \x12 int64 + * Int64 | \x12 int64 + * UInt64 | \x11 uint64 + * Float32/Float64 | \x01 double + * Date/Date32 | \x10 int32 + * DateTime | \x12 int64 + * DateTime64 | \x09 datetime + * Decimal32 | \x10 int32 + * Decimal64 | \x12 int64 + * Decimal128 | \x05 binary, \x00 binary subtype, size = 16 + * Decimal256 | \x05 binary, \x00 binary subtype, size = 32 + * Int128/UInt128 | \x05 binary, \x00 binary subtype, size = 16 + * Int256/UInt256 | \x05 binary, \x00 binary subtype, size = 32 + * String/FixedString | \x05 binary, \x00 binary subtype or \x02 string if setting output_format_bson_string_as_string is enabled + * UUID | \x05 binary, \x04 uuid subtype, size = 16 + * Array | \x04 array + * Tuple | \x04 array + * Named Tuple | \x03 document + * Map (with String keys) | \x03 document + * + * Note: on Big-Endian platforms this format will not work properly. + */ + +class BSONEachRowRowOutputFormat final : public IRowOutputFormat +{ +public: + BSONEachRowRowOutputFormat( + WriteBuffer & out_, const Block & header_, const RowOutputFormatParams & params_, const FormatSettings & settings_); + + String getName() const override { return "BSONEachRowRowOutputFormat"; } + +private: + void write(const Columns & columns, size_t row_num) override; + void writeField(const IColumn &, const ISerialization &, size_t) override { } + + void serializeField(const IColumn & column, const DataTypePtr & data_type, size_t row_num, const String & name); + + /// Count field size in bytes that we will get after serialization in BSON format. + /// It's needed to calculate document size before actual serialization, + /// because in BSON format we should write the size of the document before its content. + size_t countBSONFieldSize(const IColumn & column, const DataTypePtr & data_type, size_t row_num, const String & name); + + NamesAndTypes fields; + FormatSettings settings; +}; + +} diff --git a/src/Processors/Formats/Impl/BinaryRowInputFormat.cpp b/src/Processors/Formats/Impl/BinaryRowInputFormat.cpp index 91bebd0daa4..047a55d3f90 100644 --- a/src/Processors/Formats/Impl/BinaryRowInputFormat.cpp +++ b/src/Processors/Formats/Impl/BinaryRowInputFormat.cpp @@ -13,7 +13,7 @@ namespace ErrorCodes extern const int CANNOT_SKIP_UNKNOWN_FIELD; } -BinaryRowInputFormat::BinaryRowInputFormat(ReadBuffer & in_, Block header, Params params_, bool with_names_, bool with_types_, const FormatSettings & format_settings_) +BinaryRowInputFormat::BinaryRowInputFormat(ReadBuffer & in_, const Block & header, Params params_, bool with_names_, bool with_types_, const FormatSettings & format_settings_) : RowInputFormatWithNamesAndTypes( header, in_, diff --git a/src/Processors/Formats/Impl/BinaryRowInputFormat.h b/src/Processors/Formats/Impl/BinaryRowInputFormat.h index ff7cc013cee..7e600c5b3dd 100644 --- a/src/Processors/Formats/Impl/BinaryRowInputFormat.h +++ b/src/Processors/Formats/Impl/BinaryRowInputFormat.h @@ -20,7 +20,7 @@ class ReadBuffer; class BinaryRowInputFormat final : public RowInputFormatWithNamesAndTypes { public: - BinaryRowInputFormat(ReadBuffer & in_, Block header, Params params_, bool with_names_, bool with_types_, const FormatSettings & format_settings_); + BinaryRowInputFormat(ReadBuffer & in_, const Block & header, Params params_, bool with_names_, bool with_types_, const FormatSettings & format_settings_); String getName() const override { return "BinaryRowInputFormat"; } diff --git a/src/Processors/Formats/Impl/JSONColumnsBlockInputFormatBase.cpp b/src/Processors/Formats/Impl/JSONColumnsBlockInputFormatBase.cpp index 0ada15637ce..cfd68079bba 100644 --- a/src/Processors/Formats/Impl/JSONColumnsBlockInputFormatBase.cpp +++ b/src/Processors/Formats/Impl/JSONColumnsBlockInputFormatBase.cpp @@ -72,10 +72,10 @@ JSONColumnsBlockInputFormatBase::JSONColumnsBlockInputFormatBase( : IInputFormat(header_, in_) , format_settings(format_settings_) , fields(header_.getNamesAndTypes()) - , name_to_index(header_.getNamesToIndexesMap()) , serializations(header_.getSerializations()) , reader(std::move(reader_)) { + name_to_index = getPort().getHeader().getNamesToIndexesMap(); } size_t JSONColumnsBlockInputFormatBase::readColumn( @@ -125,7 +125,7 @@ Chunk JSONColumnsBlockInputFormatBase::generate() { /// Check if this name appears in header. If no, skip this column or throw /// an exception according to setting input_format_skip_unknown_fields - if (!name_to_index.contains(*column_name)) + if (!name_to_index.has(*column_name)) { if (!format_settings.skip_unknown_fields) throw Exception(ErrorCodes::INCORRECT_DATA, "Unknown column found in input data: {}", *column_name); diff --git a/src/Processors/Formats/Impl/JSONColumnsBlockInputFormatBase.h b/src/Processors/Formats/Impl/JSONColumnsBlockInputFormatBase.h index 308c8a59b92..a8311123afc 100644 --- a/src/Processors/Formats/Impl/JSONColumnsBlockInputFormatBase.h +++ b/src/Processors/Formats/Impl/JSONColumnsBlockInputFormatBase.h @@ -60,7 +60,7 @@ protected: const FormatSettings format_settings; const NamesAndTypes fields; /// Maps column names and their positions in header. - std::unordered_map name_to_index; + Block::NameMap name_to_index; Serializations serializations; std::unique_ptr reader; BlockMissingValues block_missing_values; diff --git a/src/Processors/Formats/Impl/JSONEachRowRowInputFormat.cpp b/src/Processors/Formats/Impl/JSONEachRowRowInputFormat.cpp index db5a027844b..8a5ef33b73d 100644 --- a/src/Processors/Formats/Impl/JSONEachRowRowInputFormat.cpp +++ b/src/Processors/Formats/Impl/JSONEachRowRowInputFormat.cpp @@ -37,25 +37,25 @@ JSONEachRowRowInputFormat::JSONEachRowRowInputFormat( Params params_, const FormatSettings & format_settings_, bool yield_strings_) - : IRowInputFormat(header_, in_, std::move(params_)), format_settings(format_settings_), name_map(header_.columns()), yield_strings(yield_strings_) + : IRowInputFormat(header_, in_, std::move(params_)) + , format_settings(format_settings_) + , prev_positions(header_.columns()) + , yield_strings(yield_strings_) { - size_t num_columns = getPort().getHeader().columns(); - for (size_t i = 0; i < num_columns; ++i) + name_map = getPort().getHeader().getNamesToIndexesMap(); + if (format_settings_.import_nested_json) { - const String & column_name = columnName(i); - name_map[column_name] = i; /// NOTE You could place names more cache-locally. - if (format_settings_.import_nested_json) + for (size_t i = 0; i != header_.columns(); ++i) { - const auto split = Nested::splitName(column_name); + const StringRef column_name = header_.getByPosition(i).name; + const auto split = Nested::splitName(column_name.toView()); if (!split.second.empty()) { - const StringRef table_name(column_name.data(), split.first.size()); + const StringRef table_name(column_name.data, split.first.size()); name_map[table_name] = NESTED_FIELD; } } } - - prev_positions.resize(num_columns); } const String & JSONEachRowRowInputFormat::columnName(size_t i) const diff --git a/src/Processors/Formats/Impl/MsgPackRowInputFormat.cpp b/src/Processors/Formats/Impl/MsgPackRowInputFormat.cpp index 80fdda687e2..4e62754bc3d 100644 --- a/src/Processors/Formats/Impl/MsgPackRowInputFormat.cpp +++ b/src/Processors/Formats/Impl/MsgPackRowInputFormat.cpp @@ -32,6 +32,7 @@ #include #include +#include namespace DB { @@ -552,12 +553,9 @@ void registerMsgPackSchemaReader(FormatFactory & factory) }); factory.registerAdditionalInfoForSchemaCacheGetter("MsgPack", [](const FormatSettings & settings) { - return fmt::format( - "number_of_columns={}, schema_inference_hints={}, max_rows_to_read_for_schema_inference={}", - settings.msgpack.number_of_columns, - settings.schema_inference_hints, - settings.max_rows_to_read_for_schema_inference); - }); + String result = getAdditionalFormatInfoForAllRowBasedFormats(settings); + return result + fmt::format(", number_of_columns={}", settings.msgpack.number_of_columns); + }); } } diff --git a/src/Processors/Formats/Impl/MySQLDumpRowInputFormat.cpp b/src/Processors/Formats/Impl/MySQLDumpRowInputFormat.cpp index beca7ad2552..faa74e234b9 100644 --- a/src/Processors/Formats/Impl/MySQLDumpRowInputFormat.cpp +++ b/src/Processors/Formats/Impl/MySQLDumpRowInputFormat.cpp @@ -35,9 +35,9 @@ MySQLDumpRowInputFormat::MySQLDumpRowInputFormat(ReadBuffer & in_, const Block & : IRowInputFormat(header_, in_, params_) , table_name(format_settings_.mysql_dump.table_name) , types(header_.getDataTypes()) - , column_indexes_by_names(header_.getNamesToIndexesMap()) , format_settings(format_settings_) { + column_indexes_by_names = getPort().getHeader().getNamesToIndexesMap(); } diff --git a/src/Processors/Formats/Impl/MySQLDumpRowInputFormat.h b/src/Processors/Formats/Impl/MySQLDumpRowInputFormat.h index 6be20550e49..c28355054d7 100644 --- a/src/Processors/Formats/Impl/MySQLDumpRowInputFormat.h +++ b/src/Processors/Formats/Impl/MySQLDumpRowInputFormat.h @@ -22,7 +22,7 @@ private: String table_name; DataTypes types; - std::unordered_map column_indexes_by_names; + Block::NameMap column_indexes_by_names; const FormatSettings format_settings; }; diff --git a/src/Processors/Formats/RowInputFormatWithNamesAndTypes.cpp b/src/Processors/Formats/RowInputFormatWithNamesAndTypes.cpp index 0804b188c07..e0e8ea47a7b 100644 --- a/src/Processors/Formats/RowInputFormatWithNamesAndTypes.cpp +++ b/src/Processors/Formats/RowInputFormatWithNamesAndTypes.cpp @@ -30,8 +30,8 @@ RowInputFormatWithNamesAndTypes::RowInputFormatWithNamesAndTypes( , with_names(with_names_) , with_types(with_types_) , format_reader(std::move(format_reader_)) - , column_indexes_by_names(header_.getNamesToIndexesMap()) { + column_indexes_by_names = getPort().getHeader().getNamesToIndexesMap(); } void RowInputFormatWithNamesAndTypes::readPrefix() diff --git a/src/Processors/Formats/RowInputFormatWithNamesAndTypes.h b/src/Processors/Formats/RowInputFormatWithNamesAndTypes.h index 9d0734f4567..e7dda957b04 100644 --- a/src/Processors/Formats/RowInputFormatWithNamesAndTypes.h +++ b/src/Processors/Formats/RowInputFormatWithNamesAndTypes.h @@ -59,7 +59,7 @@ private: std::unique_ptr format_reader; protected: - std::unordered_map column_indexes_by_names; + Block::NameMap column_indexes_by_names; }; /// Base class for parsing data in input formats with -WithNames and -WithNamesAndTypes suffixes. diff --git a/src/Processors/Transforms/MongoDBSource.cpp b/src/Processors/Transforms/MongoDBSource.cpp index 9eef17cf40d..b8f40789e83 100644 --- a/src/Processors/Transforms/MongoDBSource.cpp +++ b/src/Processors/Transforms/MongoDBSource.cpp @@ -3,11 +3,8 @@ #include #include -#include #include #include -#include -#include #include #include @@ -18,7 +15,6 @@ #include #include #include -#include #include // only after poco diff --git a/src/Processors/Transforms/MongoDBSource.h b/src/Processors/Transforms/MongoDBSource.h index 322aa4f50de..d03a7a45477 100644 --- a/src/Processors/Transforms/MongoDBSource.h +++ b/src/Processors/Transforms/MongoDBSource.h @@ -1,5 +1,7 @@ #pragma once +#include + #include #include #include diff --git a/src/Processors/Transforms/buildPushingToViewsChain.cpp b/src/Processors/Transforms/buildPushingToViewsChain.cpp index 830f400faf2..cc484855e76 100644 --- a/src/Processors/Transforms/buildPushingToViewsChain.cpp +++ b/src/Processors/Transforms/buildPushingToViewsChain.cpp @@ -275,10 +275,8 @@ Chain buildPushingToViewsChain( SCOPE_EXIT({ current_thread = original_thread; }); std::unique_ptr view_thread_status_ptr = std::make_unique(); - /// Disable query profiler for this ThreadStatus since the running (main query) thread should already have one - /// If we didn't disable it, then we could end up with N + 1 (N = number of dependencies) profilers which means - /// N times more interruptions - view_thread_status_ptr->disableProfiling(); + /// Copy of a ThreadStatus should be internal. + view_thread_status_ptr->setInternalThread(); /// view_thread_status_ptr will be moved later (on and on), so need to capture raw pointer. view_thread_status_ptr->deleter = [thread_status = view_thread_status_ptr.get(), running_group] { diff --git a/src/Storages/MergeTree/BackgroundProcessList.h b/src/Storages/MergeTree/BackgroundProcessList.h index baf3e281257..c9a4887cca3 100644 --- a/src/Storages/MergeTree/BackgroundProcessList.h +++ b/src/Storages/MergeTree/BackgroundProcessList.h @@ -10,7 +10,7 @@ namespace DB { /// Common code for background processes lists, like system.merges and system.replicated_fetches -/// Look at examples in MergeList and ReplicatedFetchList +/// Look at examples in MergeList, MovesList and ReplicatedFetchList template class BackgroundProcessList; diff --git a/src/Storages/MergeTree/IMergeTreeDataPart.cpp b/src/Storages/MergeTree/IMergeTreeDataPart.cpp index f38105ce1f6..81e0e8dc95a 100644 --- a/src/Storages/MergeTree/IMergeTreeDataPart.cpp +++ b/src/Storages/MergeTree/IMergeTreeDataPart.cpp @@ -2023,8 +2023,7 @@ std::optional getIndexExtensionFromFilesystem(const IDataPartStorag for (auto it = data_part_storage.iterate(); it->isValid(); it->next()) { const auto & extension = fs::path(it->name()).extension(); - if (extension == getIndexExtension(false) - || extension == getIndexExtension(true)) + if (extension == getIndexExtension(true)) return extension; } } diff --git a/src/Storages/MergeTree/MergeTreeData.cpp b/src/Storages/MergeTree/MergeTreeData.cpp index 22e67c17c5e..5c731a03aa8 100644 --- a/src/Storages/MergeTree/MergeTreeData.cpp +++ b/src/Storages/MergeTree/MergeTreeData.cpp @@ -6648,7 +6648,7 @@ MergeTreeData::CurrentlyMovingPartsTagger::CurrentlyMovingPartsTagger(MergeTreeM MergeTreeData::CurrentlyMovingPartsTagger::~CurrentlyMovingPartsTagger() { std::lock_guard lock(data.moving_parts_mutex); - for (const auto & moving_part : parts_to_move) + for (auto & moving_part : parts_to_move) { /// Something went completely wrong if (!data.currently_moving_parts.contains(moving_part.part)) @@ -6774,6 +6774,14 @@ bool MergeTreeData::moveParts(const CurrentlyMovingPartsTaggerPtr & moving_tagge nullptr); }; + // Register in global moves list (StorageSystemMoves) + auto moves_list_entry = getContext()->getMovesList().insert( + getStorageID(), + moving_part.part->name, + moving_part.reserved_space->getDisk()->getName(), + moving_part.reserved_space->getDisk()->getPath(), + moving_part.part->getBytesOnDisk()); + try { /// If zero-copy replication enabled than replicas shouldn't try to diff --git a/src/Storages/MergeTree/MergeTreePartsMover.h b/src/Storages/MergeTree/MergeTreePartsMover.h index 0266b2daa46..dfb4bb954d7 100644 --- a/src/Storages/MergeTree/MergeTreePartsMover.h +++ b/src/Storages/MergeTree/MergeTreePartsMover.h @@ -5,14 +5,14 @@ #include #include #include +#include #include namespace DB { -/// Active part from storage and destination reservation where -/// it have to be moved. +/// Active part from storage and destination reservation where it has to be moved struct MergeTreeMoveEntry { std::shared_ptr part; @@ -54,7 +54,7 @@ public: /// Replaces cloned part from detached directory into active data parts set. /// Replacing part changes state to DeleteOnDestroy and will be removed from disk after destructor of - ///IMergeTreeDataPart called. If replacing part doesn't exists or not active (committed) than + /// IMergeTreeDataPart called. If replacing part doesn't exists or not active (committed) than /// cloned part will be removed and log message will be reported. It may happen in case of concurrent /// merge or mutation. void swapClonedPart(const MergeTreeMutableDataPartPtr & cloned_parts) const; diff --git a/src/Storages/MergeTree/MovesList.cpp b/src/Storages/MergeTree/MovesList.cpp new file mode 100644 index 00000000000..730cd44a697 --- /dev/null +++ b/src/Storages/MergeTree/MovesList.cpp @@ -0,0 +1,37 @@ +#include +#include +#include + +namespace DB +{ + +MovesListElement::MovesListElement( + const StorageID & table_id_, + const std::string & part_name_, + const std::string & target_disk_name_, + const std::string & target_disk_path_, + UInt64 part_size_) + : table_id(table_id_) + , part_name(part_name_) + , target_disk_name(target_disk_name_) + , target_disk_path(target_disk_path_) + , part_size(part_size_) + , thread_id(getThreadId()) +{ +} + +MoveInfo MovesListElement::getInfo() const +{ + MoveInfo res; + res.database = table_id.database_name; + res.table = table_id.table_name; + res.part_name = part_name; + res.target_disk_name = target_disk_name; + res.target_disk_path = target_disk_path; + res.part_size = part_size; + res.elapsed = watch.elapsedSeconds(); + res.thread_id = thread_id; + return res; +} + +} diff --git a/src/Storages/MergeTree/MovesList.h b/src/Storages/MergeTree/MovesList.h new file mode 100644 index 00000000000..42f0901b41d --- /dev/null +++ b/src/Storages/MergeTree/MovesList.h @@ -0,0 +1,64 @@ +#pragma once +#include +#include +#include +#include +#include +#include + +namespace CurrentMetrics +{ + extern const Metric Move; +} + +namespace DB +{ + +struct MoveInfo +{ + std::string database; + std::string table; + std::string part_name; + std::string target_disk_name; + std::string target_disk_path; + UInt64 part_size; + + Float64 elapsed; + UInt64 thread_id; +}; + +struct MovesListElement : private boost::noncopyable +{ + const StorageID table_id; + const std::string part_name; + const std::string target_disk_name; + const std::string target_disk_path; + const UInt64 part_size; + + Stopwatch watch; + const UInt64 thread_id; + + MovesListElement( + const StorageID & table_id_, + const std::string & part_name_, + const std::string & target_disk_name_, + const std::string & target_disk_path_, + UInt64 part_size_); + + MoveInfo getInfo() const; +}; + + +/// List of currently processing moves +class MovesList final : public BackgroundProcessList +{ +private: + using Parent = BackgroundProcessList; + +public: + MovesList() + : Parent(CurrentMetrics::Move) + {} +}; + +} diff --git a/src/Storages/StorageBuffer.cpp b/src/Storages/StorageBuffer.cpp index 65b4dce3ad2..e382e7f7bbb 100644 --- a/src/Storages/StorageBuffer.cpp +++ b/src/Storages/StorageBuffer.cpp @@ -434,7 +434,7 @@ void StorageBuffer::read( } -static void appendBlock(const Block & from, Block & to) +static void appendBlock(Poco::Logger * log, const Block & from, Block & to) { size_t rows = from.rows(); size_t old_rows = to.rows(); @@ -456,7 +456,24 @@ static void appendBlock(const Block & from, Block & to) for (size_t column_no = 0, columns = to.columns(); column_no < columns; ++column_no) { const IColumn & col_from = *from.getByPosition(column_no).column.get(); - last_col = IColumn::mutate(std::move(to.getByPosition(column_no).column)); + { + /// Usually IColumn::mutate() here will simply move pointers, + /// however in case of parallel reading from it via SELECT, it + /// is possible for the full IColumn::clone() here, and in this + /// case it may fail due to MEMORY_LIMIT_EXCEEDED, and this + /// breaks the rollback, since the column got lost, it is + /// neither in last_col nor in "to" block. + /// + /// The safest option here, is to do a full clone every time, + /// however, it is overhead. And it looks like the only + /// exception that is possible here is MEMORY_LIMIT_EXCEEDED, + /// and it is better to simply suppress it, to avoid overhead + /// for every INSERT into Buffer (Anyway we have a + /// LOGICAL_ERROR in rollback that will bail if something else + /// will happens here). + LockMemoryExceptionInThread temporarily_ignore_any_memory_limits(VariableContext::Global); + last_col = IColumn::mutate(std::move(to.getByPosition(column_no).column)); + } /// In case of ColumnAggregateFunction aggregate states will /// be allocated from the query context but can be destroyed from the @@ -468,7 +485,10 @@ static void appendBlock(const Block & from, Block & to) last_col->ensureOwnership(); last_col->insertRangeFrom(col_from, 0, rows); - to.getByPosition(column_no).column = std::move(last_col); + { + DENY_ALLOCATIONS_IN_SCOPE; + to.getByPosition(column_no).column = std::move(last_col); + } } CurrentMetrics::add(CurrentMetrics::StorageBufferRows, rows); CurrentMetrics::add(CurrentMetrics::StorageBufferBytes, to.bytes() - old_bytes); @@ -481,6 +501,9 @@ static void appendBlock(const Block & from, Block & to) /// So ignore any memory limits, even global (since memory tracking has drift). LockMemoryExceptionInThread temporarily_ignore_any_memory_limits(VariableContext::Global); + /// But first log exception to get more details in case of LOGICAL_ERROR + tryLogCurrentException(log, "Caught exception while adding data to buffer, rolling back..."); + try { for (size_t column_no = 0, columns = to.columns(); column_no < columns; ++column_no) @@ -625,7 +648,7 @@ private: size_t old_rows = buffer.data.rows(); size_t old_bytes = buffer.data.allocatedBytes(); - appendBlock(sorted_block, buffer.data); + appendBlock(storage.log, sorted_block, buffer.data); storage.total_writes.rows += (buffer.data.rows() - old_rows); storage.total_writes.bytes += (buffer.data.allocatedBytes() - old_bytes); diff --git a/src/Storages/StorageReplicatedMergeTree.cpp b/src/Storages/StorageReplicatedMergeTree.cpp index 9eacec2351e..dd0e3023f0e 100644 --- a/src/Storages/StorageReplicatedMergeTree.cpp +++ b/src/Storages/StorageReplicatedMergeTree.cpp @@ -7645,7 +7645,15 @@ void StorageReplicatedMergeTree::createTableSharedID() const return; } - auto zookeeper = getZooKeeper(); + /// We may call getTableSharedID when table is shut down. If exception happen, restarting thread will be already turned + /// off and nobody will reconnect our zookeeper connection. In this case we use zookeeper connection from + /// context. + ZooKeeperPtr zookeeper; + if (shutdown_called.load()) + zookeeper = getZooKeeperIfTableShutDown(); + else + zookeeper = getZooKeeper(); + String zookeeper_table_id_path = fs::path(zookeeper_path) / "table_shared_id"; String id; if (!zookeeper->tryGet(zookeeper_table_id_path, id)) diff --git a/src/Storages/System/StorageSystemMoves.cpp b/src/Storages/System/StorageSystemMoves.cpp new file mode 100644 index 00000000000..6ecc9e7f373 --- /dev/null +++ b/src/Storages/System/StorageSystemMoves.cpp @@ -0,0 +1,47 @@ +#include +#include +#include +#include + + +namespace DB +{ + +NamesAndTypesList StorageSystemMoves::getNamesAndTypes() +{ + return { + {"database", std::make_shared()}, + {"table", std::make_shared()}, + {"elapsed", std::make_shared()}, + {"target_disk_name", std::make_shared()}, + {"target_disk_path", std::make_shared()}, + {"part_name", std::make_shared()}, + {"part_size", std::make_shared()}, + {"thread_id", std::make_shared()}, + }; +} + + +void StorageSystemMoves::fillData(MutableColumns & res_columns, ContextPtr context, const SelectQueryInfo &) const +{ + const auto access = context->getAccess(); + const bool check_access_for_tables = !access->isGranted(AccessType::SHOW_TABLES); + + for (const auto & move : context->getMovesList().get()) + { + if (check_access_for_tables && !access->isGranted(AccessType::SHOW_TABLES, move.database, move.table)) + continue; + + size_t i = 0; + res_columns[i++]->insert(move.database); + res_columns[i++]->insert(move.table); + res_columns[i++]->insert(move.elapsed); + res_columns[i++]->insert(move.target_disk_name); + res_columns[i++]->insert(move.target_disk_path); + res_columns[i++]->insert(move.part_name); + res_columns[i++]->insert(move.part_size); + res_columns[i++]->insert(move.thread_id); + } +} + +} diff --git a/src/Storages/System/StorageSystemMoves.h b/src/Storages/System/StorageSystemMoves.h new file mode 100644 index 00000000000..2e4ceec2abd --- /dev/null +++ b/src/Storages/System/StorageSystemMoves.h @@ -0,0 +1,28 @@ +#pragma once + +#include +#include +#include +#include + + +namespace DB +{ + +class Context; + + +class StorageSystemMoves final : public IStorageSystemOneBlock +{ +public: + std::string getName() const override { return "SystemMoves"; } + + static NamesAndTypesList getNamesAndTypes(); + +protected: + using IStorageSystemOneBlock::IStorageSystemOneBlock; + + void fillData(MutableColumns & res_columns, ContextPtr context, const SelectQueryInfo & query_info) const override; +}; + +} diff --git a/src/Storages/System/StorageSystemPartsBase.h b/src/Storages/System/StorageSystemPartsBase.h index cb6265d82df..c3d2e64b303 100644 --- a/src/Storages/System/StorageSystemPartsBase.h +++ b/src/Storages/System/StorageSystemPartsBase.h @@ -73,7 +73,7 @@ private: static bool hasStateColumn(const Names & column_names, const StorageSnapshotPtr & storage_snapshot); protected: - const FormatSettings format_settings; + const FormatSettings format_settings = {}; StorageSystemPartsBase(const StorageID & table_id_, NamesAndTypesList && columns_); diff --git a/src/Storages/System/attachSystemTables.cpp b/src/Storages/System/attachSystemTables.cpp index 068f7ddce46..e82f7c9bb2b 100644 --- a/src/Storages/System/attachSystemTables.cpp +++ b/src/Storages/System/attachSystemTables.cpp @@ -23,6 +23,7 @@ #include #include #include +#include #include #include #include @@ -159,6 +160,7 @@ void attachSystemTablesServer(ContextPtr context, IDatabase & system_database, b attach(context, system_database, "processes"); attach(context, system_database, "metrics"); attach(context, system_database, "merges"); + attach(context, system_database, "moves"); attach(context, system_database, "mutations"); attach(context, system_database, "replicas"); attach(context, system_database, "replication_queue"); diff --git a/src/TableFunctions/TableFunctionDeltaLake.cpp b/src/TableFunctions/TableFunctionDeltaLake.cpp index aa602e59d78..f831d4ae609 100644 --- a/src/TableFunctions/TableFunctionDeltaLake.cpp +++ b/src/TableFunctions/TableFunctionDeltaLake.cpp @@ -139,8 +139,7 @@ ColumnsDescription TableFunctionDeltaLake::getActualTableStructure(ContextPtr co StoragePtr TableFunctionDeltaLake::executeImpl( const ASTPtr & /*ast_function*/, ContextPtr context, const std::string & table_name, ColumnsDescription /*cached_columns*/) const { - Poco::URI uri(configuration.url); - S3::URI s3_uri(uri); + S3::URI s3_uri(configuration.url); ColumnsDescription columns; if (configuration.structure != "auto") diff --git a/src/TableFunctions/TableFunctionHudi.cpp b/src/TableFunctions/TableFunctionHudi.cpp index 996be2359a6..f39f3b515ec 100644 --- a/src/TableFunctions/TableFunctionHudi.cpp +++ b/src/TableFunctions/TableFunctionHudi.cpp @@ -139,8 +139,7 @@ ColumnsDescription TableFunctionHudi::getActualTableStructure(ContextPtr context StoragePtr TableFunctionHudi::executeImpl( const ASTPtr & /*ast_function*/, ContextPtr context, const std::string & table_name, ColumnsDescription /*cached_columns*/) const { - Poco::URI uri(configuration.url); - S3::URI s3_uri(uri); + S3::URI s3_uri(configuration.url); ColumnsDescription columns; if (configuration.structure != "auto") diff --git a/src/TableFunctions/TableFunctionMongoDB.cpp b/src/TableFunctions/TableFunctionMongoDB.cpp index 962afed8c11..b88b7fda063 100644 --- a/src/TableFunctions/TableFunctionMongoDB.cpp +++ b/src/TableFunctions/TableFunctionMongoDB.cpp @@ -1,3 +1,5 @@ +#include + #include #include @@ -7,7 +9,6 @@ #include #include -#include #include #include #include diff --git a/src/TableFunctions/TableFunctionMongoDB.h b/src/TableFunctions/TableFunctionMongoDB.h index 40e4802e9e6..dd62bf4b2b4 100644 --- a/src/TableFunctions/TableFunctionMongoDB.h +++ b/src/TableFunctions/TableFunctionMongoDB.h @@ -1,8 +1,8 @@ #pragma once +#include #include #include -#include namespace DB { diff --git a/src/TableFunctions/TableFunctionS3.cpp b/src/TableFunctions/TableFunctionS3.cpp index be6dc6d28c5..3b68a0766aa 100644 --- a/src/TableFunctions/TableFunctionS3.cpp +++ b/src/TableFunctions/TableFunctionS3.cpp @@ -153,8 +153,7 @@ bool TableFunctionS3::supportsReadingSubsetOfColumns() StoragePtr TableFunctionS3::executeImpl(const ASTPtr & /*ast_function*/, ContextPtr context, const std::string & table_name, ColumnsDescription /*cached_columns*/) const { - Poco::URI uri(configuration.url); - S3::URI s3_uri(uri); + S3::URI s3_uri (configuration.url); ColumnsDescription columns; if (configuration.structure != "auto") diff --git a/tests/ci/get_robot_token.py b/tests/ci/get_robot_token.py index 4fb8cb8f49f..163e1ce071e 100644 --- a/tests/ci/get_robot_token.py +++ b/tests/ci/get_robot_token.py @@ -1,4 +1,6 @@ #!/usr/bin/env python3 +import logging + import boto3 # type: ignore from github import Github # type: ignore @@ -9,14 +11,30 @@ def get_parameter_from_ssm(name, decrypt=True, client=None): return client.get_parameter(Name=name, WithDecryption=decrypt)["Parameter"]["Value"] -def get_best_robot_token(token_prefix_env_name="github_robot_token_", total_tokens=4): +def get_best_robot_token(token_prefix_env_name="github_robot_token_"): client = boto3.client("ssm", region_name="us-east-1") - tokens = {} - for i in range(1, total_tokens + 1): - token_name = token_prefix_env_name + str(i) - token = get_parameter_from_ssm(token_name, True, client) - gh = Github(token, per_page=100) - rest, _ = gh.rate_limiting - tokens[token] = rest + parameters = client.describe_parameters( + ParameterFilters=[ + {"Key": "Name", "Option": "BeginsWith", "Values": [token_prefix_env_name]} + ] + )["Parameters"] + assert parameters + token = {"login": "", "value": "", "rest": 0} - return max(tokens.items(), key=lambda x: x[1])[0] + for token_name in [p["Name"] for p in parameters]: + value = get_parameter_from_ssm(token_name, True, client) + gh = Github(value, per_page=100) + # Do not spend additional request to API by accessin user.login unless + # the token is chosen by the remaining requests number + user = gh.get_user() + rest, _ = gh.rate_limiting + logging.info("Get token with %s remaining requests", rest) + if token["rest"] < rest: + token = {"user": user, "value": value, "rest": rest} + + assert token["value"] + logging.info( + "User %s with %s remaining requests is used", token["user"].login, token["rest"] + ) + + return token["value"] diff --git a/tests/config/config.d/compressed_marks_and_index.xml b/tests/config/config.d/compressed_marks_and_index.xml new file mode 100644 index 00000000000..ba8bdfe9658 --- /dev/null +++ b/tests/config/config.d/compressed_marks_and_index.xml @@ -0,0 +1,6 @@ + + + true + true + + diff --git a/tests/config/install.sh b/tests/config/install.sh index 51865665f59..8963860e465 100755 --- a/tests/config/install.sh +++ b/tests/config/install.sh @@ -52,6 +52,7 @@ ln -sf $SRC_PATH/config.d/enable_zero_copy_replication.xml $DEST_SERVER_PATH/con ln -sf $SRC_PATH/config.d/nlp.xml $DEST_SERVER_PATH/config.d/ ln -sf $SRC_PATH/config.d/enable_keeper_map.xml $DEST_SERVER_PATH/config.d/ ln -sf $SRC_PATH/config.d/display_name.xml $DEST_SERVER_PATH/config.d/ +ln -sf $SRC_PATH/config.d/compressed_marks_and_index.xml $DEST_SERVER_PATH/config.d/ # Not supported with fasttest. if [ "${DEST_SERVER_PATH}" = "/etc/clickhouse-server" ] diff --git a/tests/config/users.d/access_management.xml b/tests/config/users.d/access_management.xml index 3399ef5915a..8f4d82805be 100644 --- a/tests/config/users.d/access_management.xml +++ b/tests/config/users.d/access_management.xml @@ -2,6 +2,7 @@ 1 + 1 diff --git a/tests/fuzz/dictionaries/functions.dict b/tests/fuzz/dictionaries/functions.dict index e4f347babf8..e2668d7d093 100644 --- a/tests/fuzz/dictionaries/functions.dict +++ b/tests/fuzz/dictionaries/functions.dict @@ -249,7 +249,7 @@ "cosh" "basename" "evalMLMethod" -"filesystemFree" +"filesystemUnreserved" "filesystemCapacity" "reinterpretAsDate" "filesystemAvailable" diff --git a/tests/integration/test_backward_compatibility/configs/wide_parts_only.xml b/tests/integration/test_backward_compatibility/configs/wide_parts_only.xml index 04d34327fef..c823dd02d5a 100644 --- a/tests/integration/test_backward_compatibility/configs/wide_parts_only.xml +++ b/tests/integration/test_backward_compatibility/configs/wide_parts_only.xml @@ -1,5 +1,7 @@ 0 + 0 + 0 diff --git a/tests/integration/test_global_overcommit_tracker/configs/global_overcommit_tracker.xml b/tests/integration/test_global_overcommit_tracker/configs/global_overcommit_tracker.xml index 6f83a570ccc..a51009542a3 100644 --- a/tests/integration/test_global_overcommit_tracker/configs/global_overcommit_tracker.xml +++ b/tests/integration/test_global_overcommit_tracker/configs/global_overcommit_tracker.xml @@ -1,3 +1,4 @@ 2000000000 + false \ No newline at end of file diff --git a/tests/integration/test_hive_query/test.py b/tests/integration/test_hive_query/test.py index a498320ed5b..791ae03f9f6 100644 --- a/tests/integration/test_hive_query/test.py +++ b/tests/integration/test_hive_query/test.py @@ -1,8 +1,14 @@ +import pytest + +# FIXME This test is too flaky +# https://github.com/ClickHouse/ClickHouse/issues/43541 + +pytestmark = pytest.mark.skip + import logging import os import time -import pytest from helpers.cluster import ClickHouseCluster from helpers.test_tools import TSV diff --git a/tests/integration/test_materialized_mysql_database/materialize_with_ddl.py b/tests/integration/test_materialized_mysql_database/materialize_with_ddl.py index bed7772a3dd..5b75b0dfc38 100644 --- a/tests/integration/test_materialized_mysql_database/materialize_with_ddl.py +++ b/tests/integration/test_materialized_mysql_database/materialize_with_ddl.py @@ -2184,3 +2184,44 @@ def savepoint(clickhouse_node, mysql_node, mysql_host): mysql_node.query(f"INSERT INTO {db}.t1 VALUES (2)") mysql_node.query("ROLLBACK TO savepoint_1") mysql_node.query("COMMIT") + + +def dropddl(clickhouse_node, mysql_node, mysql_host): + db = "dropddl" + clickhouse_node.query(f"DROP DATABASE IF EXISTS {db}") + mysql_node.query(f"DROP DATABASE IF EXISTS {db}") + mysql_node.query(f"CREATE DATABASE {db}") + mysql_node.query(f"CREATE TABLE {db}.t1 (a INT PRIMARY KEY, b INT)") + mysql_node.query(f"CREATE TABLE {db}.t2 (a INT PRIMARY KEY, b INT)") + mysql_node.query(f"CREATE TABLE {db}.t3 (a INT PRIMARY KEY, b INT)") + mysql_node.query(f"CREATE TABLE {db}.t4 (a INT PRIMARY KEY, b INT)") + mysql_node.query(f"CREATE VIEW {db}.v1 AS SELECT * FROM {db}.t1") + mysql_node.query(f"INSERT INTO {db}.t1(a, b) VALUES(1, 1)") + + clickhouse_node.query( + f"CREATE DATABASE {db} ENGINE = MaterializeMySQL('{mysql_host}:3306', '{db}', 'root', 'clickhouse')" + ) + check_query( + clickhouse_node, + f"SELECT count() FROM system.tables where database = '{db}' FORMAT TSV", + "4\n", + ) + check_query(clickhouse_node, f"SELECT * FROM {db}.t1 FORMAT TSV", "1\t1\n") + mysql_node.query(f"DROP EVENT IF EXISTS {db}.event_name") + mysql_node.query(f"DROP VIEW IF EXISTS {db}.view_name") + mysql_node.query(f"DROP FUNCTION IF EXISTS {db}.function_name") + mysql_node.query(f"DROP TRIGGER IF EXISTS {db}.trigger_name") + mysql_node.query(f"DROP INDEX `PRIMARY` ON {db}.t2") + mysql_node.query(f"DROP TABLE {db}.t3") + mysql_node.query(f"DROP TABLE if EXISTS {db}.t3,{db}.t4") + mysql_node.query(f"TRUNCATE TABLE {db}.t1") + mysql_node.query(f"INSERT INTO {db}.t2(a, b) VALUES(1, 1)") + check_query(clickhouse_node, f"SELECT * FROM {db}.t2 FORMAT TSV", "1\t1\n") + check_query(clickhouse_node, f"SELECT count() FROM {db}.t1 FORMAT TSV", "0\n") + check_query( + clickhouse_node, + f"SELECT name FROM system.tables where database = '{db}' FORMAT TSV", + "t1\nt2\n", + ) + mysql_node.query(f"DROP DATABASE {db}") + clickhouse_node.query(f"DROP DATABASE {db}") diff --git a/tests/integration/test_materialized_mysql_database/test.py b/tests/integration/test_materialized_mysql_database/test.py index 0e33c01a6c9..a22d73061ae 100644 --- a/tests/integration/test_materialized_mysql_database/test.py +++ b/tests/integration/test_materialized_mysql_database/test.py @@ -516,3 +516,10 @@ def test_savepoint_query( ): materialize_with_ddl.savepoint(clickhouse_node, started_mysql_8_0, "mysql80") materialize_with_ddl.savepoint(clickhouse_node, started_mysql_5_7, "mysql57") + + +def test_materialized_database_mysql_drop_ddl( + started_cluster, started_mysql_8_0, started_mysql_5_7, clickhouse_node +): + materialize_with_ddl.dropddl(clickhouse_node, started_mysql_8_0, "mysql80") + materialize_with_ddl.dropddl(clickhouse_node, started_mysql_5_7, "mysql57") diff --git a/tests/integration/test_merge_tree_s3/test.py b/tests/integration/test_merge_tree_s3/test.py index b2e93db2606..50b01a83cab 100644 --- a/tests/integration/test_merge_tree_s3/test.py +++ b/tests/integration/test_merge_tree_s3/test.py @@ -763,7 +763,7 @@ def test_cache_setting_compatibility(cluster, node_name): node.query("DROP TABLE IF EXISTS s3_test NO DELAY") node.query( - "CREATE TABLE s3_test (key UInt32, value String) Engine=MergeTree() ORDER BY key SETTINGS storage_policy='s3_cache_r';" + "CREATE TABLE s3_test (key UInt32, value String) Engine=MergeTree() ORDER BY key SETTINGS storage_policy='s3_cache_r', compress_marks=false, compress_primary_key=false;" ) node.query( "INSERT INTO s3_test SELECT * FROM generateRandom('key UInt32, value String') LIMIT 500" diff --git a/tests/integration/test_multiple_disks/test.py b/tests/integration/test_multiple_disks/test.py index d7117e2546a..8d8e65825cc 100644 --- a/tests/integration/test_multiple_disks/test.py +++ b/tests/integration/test_multiple_disks/test.py @@ -1655,7 +1655,7 @@ def test_freeze(start_cluster): ) ENGINE = MergeTree ORDER BY tuple() PARTITION BY toYYYYMM(d) - SETTINGS storage_policy='small_jbod_with_external' + SETTINGS storage_policy='small_jbod_with_external', compress_marks=false, compress_primary_key=false """ ) diff --git a/tests/integration/test_partition/test.py b/tests/integration/test_partition/test.py index c53bc5a9d0d..163230cbc44 100644 --- a/tests/integration/test_partition/test.py +++ b/tests/integration/test_partition/test.py @@ -36,7 +36,7 @@ def partition_table_simple(started_cluster): q( "CREATE TABLE test.partition_simple (date MATERIALIZED toDate(0), x UInt64, sample_key MATERIALIZED intHash64(x)) " "ENGINE=MergeTree PARTITION BY date SAMPLE BY sample_key ORDER BY (date,x,sample_key) " - "SETTINGS index_granularity=8192, index_granularity_bytes=0" + "SETTINGS index_granularity=8192, index_granularity_bytes=0, compress_marks=false, compress_primary_key=false" ) q("INSERT INTO test.partition_simple ( x ) VALUES ( now() )") q("INSERT INTO test.partition_simple ( x ) VALUES ( now()+1 )") @@ -115,7 +115,7 @@ def partition_table_complex(started_cluster): q("DROP TABLE IF EXISTS test.partition_complex") q( "CREATE TABLE test.partition_complex (p Date, k Int8, v1 Int8 MATERIALIZED k + 1) " - "ENGINE = MergeTree PARTITION BY p ORDER BY k SETTINGS index_granularity=1, index_granularity_bytes=0" + "ENGINE = MergeTree PARTITION BY p ORDER BY k SETTINGS index_granularity=1, index_granularity_bytes=0, compress_marks=false, compress_primary_key=false" ) q("INSERT INTO test.partition_complex (p, k) VALUES(toDate(31), 1)") q("INSERT INTO test.partition_complex (p, k) VALUES(toDate(1), 2)") @@ -153,7 +153,7 @@ def test_partition_complex(partition_table_complex): def cannot_attach_active_part_table(started_cluster): q("DROP TABLE IF EXISTS test.attach_active") q( - "CREATE TABLE test.attach_active (n UInt64) ENGINE = MergeTree() PARTITION BY intDiv(n, 4) ORDER BY n" + "CREATE TABLE test.attach_active (n UInt64) ENGINE = MergeTree() PARTITION BY intDiv(n, 4) ORDER BY n SETTINGS compress_marks=false, compress_primary_key=false" ) q("INSERT INTO test.attach_active SELECT number FROM system.numbers LIMIT 16") @@ -181,7 +181,7 @@ def attach_check_all_parts_table(started_cluster): q("SYSTEM STOP MERGES") q("DROP TABLE IF EXISTS test.attach_partition") q( - "CREATE TABLE test.attach_partition (n UInt64) ENGINE = MergeTree() PARTITION BY intDiv(n, 8) ORDER BY n" + "CREATE TABLE test.attach_partition (n UInt64) ENGINE = MergeTree() PARTITION BY intDiv(n, 8) ORDER BY n SETTINGS compress_marks=false, compress_primary_key=false" ) q( "INSERT INTO test.attach_partition SELECT number FROM system.numbers WHERE number % 2 = 0 LIMIT 8" @@ -259,7 +259,7 @@ def drop_detached_parts_table(started_cluster): q("SYSTEM STOP MERGES") q("DROP TABLE IF EXISTS test.drop_detached") q( - "CREATE TABLE test.drop_detached (n UInt64) ENGINE = MergeTree() PARTITION BY intDiv(n, 8) ORDER BY n" + "CREATE TABLE test.drop_detached (n UInt64) ENGINE = MergeTree() PARTITION BY intDiv(n, 8) ORDER BY n SETTINGS compress_marks=false, compress_primary_key=false" ) q( "INSERT INTO test.drop_detached SELECT number FROM system.numbers WHERE number % 2 = 0 LIMIT 8" @@ -329,9 +329,15 @@ def test_drop_detached_parts(drop_detached_parts_table): def test_system_detached_parts(drop_detached_parts_table): - q("create table sdp_0 (n int, x int) engine=MergeTree order by n") - q("create table sdp_1 (n int, x int) engine=MergeTree order by n partition by x") - q("create table sdp_2 (n int, x String) engine=MergeTree order by n partition by x") + q( + "create table sdp_0 (n int, x int) engine=MergeTree order by n SETTINGS compress_marks=false, compress_primary_key=false" + ) + q( + "create table sdp_1 (n int, x int) engine=MergeTree order by n partition by x SETTINGS compress_marks=false, compress_primary_key=false" + ) + q( + "create table sdp_2 (n int, x String) engine=MergeTree order by n partition by x SETTINGS compress_marks=false, compress_primary_key=false" + ) q( "create table sdp_3 (n int, x Enum('broken' = 0, 'all' = 1)) engine=MergeTree order by n partition by x" ) @@ -449,7 +455,9 @@ def test_system_detached_parts(drop_detached_parts_table): def test_detached_part_dir_exists(started_cluster): - q("create table detached_part_dir_exists (n int) engine=MergeTree order by n") + q( + "create table detached_part_dir_exists (n int) engine=MergeTree order by n SETTINGS compress_marks=false, compress_primary_key=false" + ) q("insert into detached_part_dir_exists select 1") # will create all_1_1_0 q( "alter table detached_part_dir_exists detach partition id 'all'" @@ -488,7 +496,7 @@ def test_detached_part_dir_exists(started_cluster): def test_make_clone_in_detached(started_cluster): q( - "create table clone_in_detached (n int, m String) engine=ReplicatedMergeTree('/clone_in_detached', '1') order by n" + "create table clone_in_detached (n int, m String) engine=ReplicatedMergeTree('/clone_in_detached', '1') order by n SETTINGS compress_marks=false, compress_primary_key=false" ) path = path_to_data + "data/default/clone_in_detached/" diff --git a/tests/integration/test_polymorphic_parts/test.py b/tests/integration/test_polymorphic_parts/test.py index 32b5e531fa8..361b4855747 100644 --- a/tests/integration/test_polymorphic_parts/test.py +++ b/tests/integration/test_polymorphic_parts/test.py @@ -728,7 +728,7 @@ def test_polymorphic_parts_index(start_cluster): """ CREATE TABLE test_index.index_compact(a UInt32, s String) ENGINE = MergeTree ORDER BY a - SETTINGS min_rows_for_wide_part = 1000, index_granularity = 128, merge_max_block_size = 100""" + SETTINGS min_rows_for_wide_part = 1000, index_granularity = 128, merge_max_block_size = 100, compress_marks=false, compress_primary_key=false""" ) node1.query( diff --git a/tests/queries/0_stateless/00502_sum_map.reference b/tests/queries/0_stateless/00502_sum_map.reference index 31b067a2bc9..b1cd0303004 100644 --- a/tests/queries/0_stateless/00502_sum_map.reference +++ b/tests/queries/0_stateless/00502_sum_map.reference @@ -1,26 +1,70 @@ +-- { echoOn } +DROP TABLE IF EXISTS sum_map; +CREATE TABLE sum_map(date Date, timeslot DateTime, statusMap Nested(status UInt16, requests UInt64)) ENGINE = Log; +INSERT INTO sum_map VALUES ('2000-01-01', '2000-01-01 00:00:00', [1, 2, 3], [10, 10, 10]), ('2000-01-01', '2000-01-01 00:00:00', [3, 4, 5], [10, 10, 10]), ('2000-01-01', '2000-01-01 00:01:00', [4, 5, 6], [10, 10, 10]), ('2000-01-01', '2000-01-01 00:01:00', [6, 7, 8], [10, 10, 10]); +SELECT * FROM sum_map ORDER BY timeslot, statusMap.status, statusMap.requests; 2000-01-01 2000-01-01 00:00:00 [1,2,3] [10,10,10] 2000-01-01 2000-01-01 00:00:00 [3,4,5] [10,10,10] 2000-01-01 2000-01-01 00:01:00 [4,5,6] [10,10,10] 2000-01-01 2000-01-01 00:01:00 [6,7,8] [10,10,10] +SELECT sumMap(statusMap.status, statusMap.requests) FROM sum_map; ([1,2,3,4,5,6,7,8],[10,10,20,20,20,20,10,10]) +SELECT sumMap((statusMap.status, statusMap.requests)) FROM sum_map; ([1,2,3,4,5,6,7,8],[10,10,20,20,20,20,10,10]) +SELECT sumMapMerge(s) FROM (SELECT sumMapState(statusMap.status, statusMap.requests) AS s FROM sum_map); ([1,2,3,4,5,6,7,8],[10,10,20,20,20,20,10,10]) +SELECT timeslot, sumMap(statusMap.status, statusMap.requests) FROM sum_map GROUP BY timeslot ORDER BY timeslot; 2000-01-01 00:00:00 ([1,2,3,4,5],[10,10,20,10,10]) 2000-01-01 00:01:00 ([4,5,6,7,8],[10,10,20,10,10]) +SELECT timeslot, sumMap(statusMap.status, statusMap.requests).1, sumMap(statusMap.status, statusMap.requests).2 FROM sum_map GROUP BY timeslot ORDER BY timeslot; 2000-01-01 00:00:00 [1,2,3,4,5] [10,10,20,10,10] 2000-01-01 00:01:00 [4,5,6,7,8] [10,10,20,10,10] +SELECT sumMapFiltered([1])(statusMap.status, statusMap.requests) FROM sum_map; ([1],[10]) +SELECT sumMapFiltered([1, 4, 8])(statusMap.status, statusMap.requests) FROM sum_map; ([1,4,8],[10,20,10]) +DROP TABLE sum_map; +DROP TABLE IF EXISTS sum_map_overflow; +CREATE TABLE sum_map_overflow(events Array(UInt8), counts Array(UInt8)) ENGINE = Log; +INSERT INTO sum_map_overflow VALUES ([1], [255]), ([1], [2]); +SELECT sumMap(events, counts) FROM sum_map_overflow; ([1],[257]) +SELECT sumMapWithOverflow(events, counts) FROM sum_map_overflow; ([1],[1]) +DROP TABLE sum_map_overflow; +select sumMap(val, cnt) from ( SELECT [ CAST(1, 'UInt64') ] as val, [1] as cnt ); ([1],[1]) +select sumMap(val, cnt) from ( SELECT [ CAST(1, 'Float64') ] as val, [1] as cnt ); ([1],[1]) +select sumMap(val, cnt) from ( SELECT [ CAST('a', 'Enum16(\'a\'=1)') ] as val, [1] as cnt ); (['a'],[1]) +select sumMap(val, cnt) from ( SELECT [ CAST(1, 'DateTime(\'Asia/Istanbul\')') ] as val, [1] as cnt ); (['1970-01-01 02:00:01'],[1]) +select sumMap(val, cnt) from ( SELECT [ CAST(1, 'Date') ] as val, [1] as cnt ); (['1970-01-02'],[1]) +select sumMap(val, cnt) from ( SELECT [ CAST('01234567-89ab-cdef-0123-456789abcdef', 'UUID') ] as val, [1] as cnt ); (['01234567-89ab-cdef-0123-456789abcdef'],[1]) +select sumMap(val, cnt) from ( SELECT [ CAST(1.01, 'Decimal(10,2)') ] as val, [1] as cnt ); ([1.01],[1]) +select sumMap(val, cnt) from ( SELECT [ CAST('a', 'FixedString(1)'), CAST('b', 'FixedString(1)' ) ] as val, [1, 2] as cnt ); (['a','b'],[1,2]) +select sumMap(val, cnt) from ( SELECT [ CAST('abc', 'String'), CAST('ab', 'String'), CAST('a', 'String') ] as val, [1, 2, 3] as cnt ); (['a','ab','abc'],[3,2,1]) +DROP TABLE IF EXISTS sum_map_decimal; +CREATE TABLE sum_map_decimal( + statusMap Nested( + goal_id UInt16, + revenue Decimal32(5) + ) +) ENGINE = Log; +INSERT INTO sum_map_decimal VALUES ([1, 2, 3], [1.0, 2.0, 3.0]), ([3, 4, 5], [3.0, 4.0, 5.0]), ([4, 5, 6], [4.0, 5.0, 6.0]), ([6, 7, 8], [6.0, 7.0, 8.0]); +SELECT sumMap(statusMap.goal_id, statusMap.revenue) FROM sum_map_decimal; ([1,2,3,4,5,6,7,8],[1,2,6,8,10,12,7,8]) +SELECT sumMapWithOverflow(statusMap.goal_id, statusMap.revenue) FROM sum_map_decimal; ([1,2,3,4,5,6,7,8],[1,2,6,8,10,12,7,8]) +DROP TABLE sum_map_decimal; +CREATE TABLE sum_map_decimal_nullable (`statusMap` Array(Tuple(goal_id UInt16, revenue Nullable(Decimal(9, 5))))) engine=Log; +INSERT INTO sum_map_decimal_nullable VALUES ([1, 2, 3], [1.0, 2.0, 3.0]), ([3, 4, 5], [3.0, 4.0, 5.0]), ([4, 5, 6], [4.0, 5.0, 6.0]), ([6, 7, 8], [6.0, 7.0, 8.0]); +SELECT sumMap(statusMap.goal_id, statusMap.revenue) FROM sum_map_decimal_nullable; +([1,2,3,4,5,6,7,8],[1,2,6,8,10,12,7,8]) +DROP TABLE sum_map_decimal_nullable; diff --git a/tests/queries/0_stateless/00502_sum_map.sql b/tests/queries/0_stateless/00502_sum_map.sql index acc87cc5f16..30037d49784 100644 --- a/tests/queries/0_stateless/00502_sum_map.sql +++ b/tests/queries/0_stateless/00502_sum_map.sql @@ -1,5 +1,6 @@ SET send_logs_level = 'fatal'; +-- { echoOn } DROP TABLE IF EXISTS sum_map; CREATE TABLE sum_map(date Date, timeslot DateTime, statusMap Nested(status UInt16, requests UInt64)) ENGINE = Log; @@ -54,3 +55,8 @@ SELECT sumMap(statusMap.goal_id, statusMap.revenue) FROM sum_map_decimal; SELECT sumMapWithOverflow(statusMap.goal_id, statusMap.revenue) FROM sum_map_decimal; DROP TABLE sum_map_decimal; + +CREATE TABLE sum_map_decimal_nullable (`statusMap` Array(Tuple(goal_id UInt16, revenue Nullable(Decimal(9, 5))))) engine=Log; +INSERT INTO sum_map_decimal_nullable VALUES ([1, 2, 3], [1.0, 2.0, 3.0]), ([3, 4, 5], [3.0, 4.0, 5.0]), ([4, 5, 6], [4.0, 5.0, 6.0]), ([6, 7, 8], [6.0, 7.0, 8.0]); +SELECT sumMap(statusMap.goal_id, statusMap.revenue) FROM sum_map_decimal_nullable; +DROP TABLE sum_map_decimal_nullable; diff --git a/tests/queries/0_stateless/00753_system_columns_and_system_tables_long.sql b/tests/queries/0_stateless/00753_system_columns_and_system_tables_long.sql index e1392d299dc..bbc7bedcb4f 100644 --- a/tests/queries/0_stateless/00753_system_columns_and_system_tables_long.sql +++ b/tests/queries/0_stateless/00753_system_columns_and_system_tables_long.sql @@ -12,7 +12,7 @@ CREATE TABLE check_system_tables ORDER BY name1 PARTITION BY name2 SAMPLE BY name1 - SETTINGS min_bytes_for_wide_part = 0; + SETTINGS min_bytes_for_wide_part = 0, compress_marks=false, compress_primary_key=false; SELECT name, partition_key, sorting_key, primary_key, sampling_key, storage_policy, total_rows FROM system.tables WHERE name = 'check_system_tables' AND database = currentDatabase() @@ -36,7 +36,8 @@ CREATE TABLE check_system_tables sign Int8 ) ENGINE = VersionedCollapsingMergeTree(sign, version) PARTITION BY date - ORDER BY date; + ORDER BY date + SETTINGS compress_marks=false, compress_primary_key=false; SELECT name, partition_key, sorting_key, primary_key, sampling_key FROM system.tables WHERE name = 'check_system_tables' AND database = currentDatabase() diff --git a/tests/queries/0_stateless/00804_test_delta_codec_compression.sql b/tests/queries/0_stateless/00804_test_delta_codec_compression.sql index 044d60aeafb..ca9bb1b177e 100644 --- a/tests/queries/0_stateless/00804_test_delta_codec_compression.sql +++ b/tests/queries/0_stateless/00804_test_delta_codec_compression.sql @@ -9,12 +9,12 @@ DROP TABLE IF EXISTS default_codec_synthetic; CREATE TABLE delta_codec_synthetic ( id UInt64 Codec(Delta, ZSTD(3)) -) ENGINE MergeTree() ORDER BY tuple() SETTINGS min_bytes_for_wide_part = 0; +) ENGINE MergeTree() ORDER BY tuple() SETTINGS min_bytes_for_wide_part = 0, compress_marks = false, compress_primary_key=false; CREATE TABLE default_codec_synthetic ( id UInt64 Codec(ZSTD(3)) -) ENGINE MergeTree() ORDER BY tuple() SETTINGS min_bytes_for_wide_part = 0; +) ENGINE MergeTree() ORDER BY tuple() SETTINGS min_bytes_for_wide_part = 0, compress_marks = false, compress_primary_key=false; INSERT INTO delta_codec_synthetic SELECT number FROM system.numbers LIMIT 5000000; INSERT INTO default_codec_synthetic SELECT number FROM system.numbers LIMIT 5000000; @@ -47,12 +47,12 @@ DROP TABLE IF EXISTS default_codec_float; CREATE TABLE delta_codec_float ( id Float64 Codec(Delta, LZ4HC) -) ENGINE MergeTree() ORDER BY tuple() SETTINGS min_bytes_for_wide_part = 0; +) ENGINE MergeTree() ORDER BY tuple() SETTINGS min_bytes_for_wide_part = 0, compress_marks = false, compress_primary_key=false; CREATE TABLE default_codec_float ( id Float64 Codec(LZ4HC) -) ENGINE MergeTree() ORDER BY tuple() SETTINGS min_bytes_for_wide_part = 0; +) ENGINE MergeTree() ORDER BY tuple() SETTINGS min_bytes_for_wide_part = 0, compress_marks = false, compress_primary_key=false; INSERT INTO delta_codec_float SELECT number FROM numbers(1547510400, 500000) WHERE number % 3 == 0 OR number % 5 == 0 OR number % 7 == 0 OR number % 11 == 0; INSERT INTO default_codec_float SELECT * from delta_codec_float; @@ -85,12 +85,12 @@ DROP TABLE IF EXISTS default_codec_string; CREATE TABLE delta_codec_string ( id Float64 Codec(Delta, LZ4) -) ENGINE MergeTree() ORDER BY tuple() SETTINGS min_bytes_for_wide_part = 0; +) ENGINE MergeTree() ORDER BY tuple() SETTINGS min_bytes_for_wide_part = 0, compress_marks = false, compress_primary_key=false; CREATE TABLE default_codec_string ( id Float64 Codec(LZ4) -) ENGINE MergeTree() ORDER BY tuple() SETTINGS min_bytes_for_wide_part = 0; +) ENGINE MergeTree() ORDER BY tuple() SETTINGS min_bytes_for_wide_part = 0, compress_marks = false, compress_primary_key=false; INSERT INTO delta_codec_string SELECT concat(toString(number), toString(number % 100)) FROM numbers(1547510400, 500000); INSERT INTO default_codec_string SELECT * from delta_codec_string; diff --git a/tests/queries/0_stateless/00824_filesystem.sql b/tests/queries/0_stateless/00824_filesystem.sql index cd4d69a703e..c8ac9179d42 100644 --- a/tests/queries/0_stateless/00824_filesystem.sql +++ b/tests/queries/0_stateless/00824_filesystem.sql @@ -1 +1 @@ -SELECT filesystemCapacity() >= filesystemFree() AND filesystemFree() >= filesystemAvailable() AND filesystemAvailable() >= 0; +SELECT filesystemCapacity() >= filesystemAvailable() AND filesystemAvailable() >= 0 AND filesystemUnreserved() >= 0; diff --git a/tests/queries/0_stateless/00961_checksums_in_system_parts_columns_table.sql b/tests/queries/0_stateless/00961_checksums_in_system_parts_columns_table.sql index 792bf62f9b1..cf9fd3cad12 100644 --- a/tests/queries/0_stateless/00961_checksums_in_system_parts_columns_table.sql +++ b/tests/queries/0_stateless/00961_checksums_in_system_parts_columns_table.sql @@ -1,7 +1,7 @@ DROP TABLE IF EXISTS test_00961; CREATE TABLE test_00961 (d Date, a String, b UInt8, x String, y Int8, z UInt32) - ENGINE = MergeTree PARTITION BY d ORDER BY (a, b) SETTINGS index_granularity = 111, min_bytes_for_wide_part = 0; + ENGINE = MergeTree PARTITION BY d ORDER BY (a, b) SETTINGS index_granularity = 111, min_bytes_for_wide_part = 0, compress_marks=false, compress_primary_key=false; INSERT INTO test_00961 VALUES ('2000-01-01', 'Hello, world!', 123, 'xxx yyy', -123, 123456789); diff --git a/tests/queries/0_stateless/01085_max_distributed_connections.sh b/tests/queries/0_stateless/01085_max_distributed_connections.sh index 34862289d1e..c5570fea9eb 100755 --- a/tests/queries/0_stateless/01085_max_distributed_connections.sh +++ b/tests/queries/0_stateless/01085_max_distributed_connections.sh @@ -1,5 +1,5 @@ #!/usr/bin/env bash -# Tags: distributed +# Tags: distributed, no-random-settings CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh @@ -12,7 +12,7 @@ while [[ $i -lt $retries ]]; do opts=( --max_distributed_connections 20 --max_threads 1 - --query "SELECT sleepEachRow(1) FROM remote('127.{2..21}', system.one)" + --query "SELECT sum(sleepEachRow(1)) FROM remote('127.{2..21}', system.one)" --format Null ) # 10 less then 20 seconds (20 streams), but long enough to cover possible load peaks diff --git a/tests/queries/0_stateless/01271_show_privileges.reference b/tests/queries/0_stateless/01271_show_privileges.reference index 59c7d978493..dd9fa7abc1b 100644 --- a/tests/queries/0_stateless/01271_show_privileges.reference +++ b/tests/queries/0_stateless/01271_show_privileges.reference @@ -85,8 +85,8 @@ SHOW ROLES ['SHOW CREATE ROLE'] GLOBAL SHOW ACCESS SHOW ROW POLICIES ['SHOW POLICIES','SHOW CREATE ROW POLICY','SHOW CREATE POLICY'] TABLE SHOW ACCESS SHOW QUOTAS ['SHOW CREATE QUOTA'] GLOBAL SHOW ACCESS SHOW SETTINGS PROFILES ['SHOW PROFILES','SHOW CREATE SETTINGS PROFILE','SHOW CREATE PROFILE'] GLOBAL SHOW ACCESS -SHOW NAMED COLLECTIONS ['SHOW NAMED COLLECTIONS'] GLOBAL SHOW ACCESS SHOW ACCESS [] \N ACCESS MANAGEMENT +SHOW NAMED COLLECTIONS ['SHOW NAMED COLLECTIONS'] \N ACCESS MANAGEMENT ACCESS MANAGEMENT [] \N ALL SYSTEM SHUTDOWN ['SYSTEM KILL','SHUTDOWN'] GLOBAL SYSTEM SYSTEM DROP DNS CACHE ['SYSTEM DROP DNS','DROP DNS CACHE','DROP DNS'] GLOBAL SYSTEM DROP CACHE diff --git a/tests/queries/0_stateless/01516_drop_table_stress_long.sh b/tests/queries/0_stateless/01516_drop_table_stress_long.sh index d2149d7122c..3932d0ebbc9 100755 --- a/tests/queries/0_stateless/01516_drop_table_stress_long.sh +++ b/tests/queries/0_stateless/01516_drop_table_stress_long.sh @@ -1,5 +1,5 @@ #!/usr/bin/env bash -# Tags: long +# Tags: long, no-debug CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh @@ -11,26 +11,26 @@ function drop_database() { # redirect stderr since it is racy with DROP TABLE # and tries to remove ${CURR_DATABASE}.data too. - ${CLICKHOUSE_CLIENT} -q "DROP DATABASE IF EXISTS ${CURR_DATABASE}" 2>/dev/null + ${CLICKHOUSE_CLIENT} --fsync_metadata=0 -q "DROP DATABASE IF EXISTS ${CURR_DATABASE}" 2>/dev/null } trap drop_database EXIT function drop_table() { - ${CLICKHOUSE_CLIENT} -q "DROP TABLE IF EXISTS ${CURR_DATABASE}.data3;" 2>&1 | grep -F "Code: " | grep -Fv "is currently dropped or renamed" - ${CLICKHOUSE_CLIENT} -q "DROP TABLE IF EXISTS ${CURR_DATABASE}.data1;" 2>&1 | grep -F "Code: " | grep -Fv "is currently dropped or renamed" - ${CLICKHOUSE_CLIENT} -q "DROP TABLE IF EXISTS ${CURR_DATABASE}.data2;" 2>&1 | grep -F "Code: " | grep -Fv "is currently dropped or renamed" + ${CLICKHOUSE_CLIENT} --fsync_metadata=0 -q "DROP TABLE IF EXISTS ${CURR_DATABASE}.data3;" 2>&1 | grep -F "Code: " | grep -Fv "is currently dropped or renamed" + ${CLICKHOUSE_CLIENT} --fsync_metadata=0 -q "DROP TABLE IF EXISTS ${CURR_DATABASE}.data1;" 2>&1 | grep -F "Code: " | grep -Fv "is currently dropped or renamed" + ${CLICKHOUSE_CLIENT} --fsync_metadata=0 -q "DROP TABLE IF EXISTS ${CURR_DATABASE}.data2;" 2>&1 | grep -F "Code: " | grep -Fv "is currently dropped or renamed" } function create() { - ${CLICKHOUSE_CLIENT} -q "CREATE DATABASE IF NOT EXISTS ${CURR_DATABASE};" - ${CLICKHOUSE_CLIENT} -q "CREATE TABLE IF NOT EXISTS ${CURR_DATABASE}.data1 Engine=MergeTree() ORDER BY number AS SELECT * FROM numbers(1);" 2>&1 | grep -F "Code: " | grep -Fv "is currently dropped or renamed" - ${CLICKHOUSE_CLIENT} -q "CREATE TABLE IF NOT EXISTS ${CURR_DATABASE}.data2 Engine=MergeTree() ORDER BY number AS SELECT * FROM numbers(1);" 2>&1 | grep -F "Code: " | grep -Fv "is currently dropped or renamed" - ${CLICKHOUSE_CLIENT} -q "CREATE TABLE IF NOT EXISTS ${CURR_DATABASE}.data3 Engine=MergeTree() ORDER BY number AS SELECT * FROM numbers(1);" 2>&1 | grep -F "Code: " | grep -Fv "is currently dropped or renamed" + ${CLICKHOUSE_CLIENT} --fsync_metadata=0 -q "CREATE DATABASE IF NOT EXISTS ${CURR_DATABASE};" + ${CLICKHOUSE_CLIENT} --fsync_metadata=0 -q "CREATE TABLE IF NOT EXISTS ${CURR_DATABASE}.data1 Engine=MergeTree() ORDER BY number AS SELECT * FROM numbers(1);" 2>&1 | grep -F "Code: " | grep -Fv "is currently dropped or renamed" + ${CLICKHOUSE_CLIENT} --fsync_metadata=0 -q "CREATE TABLE IF NOT EXISTS ${CURR_DATABASE}.data2 Engine=MergeTree() ORDER BY number AS SELECT * FROM numbers(1);" 2>&1 | grep -F "Code: " | grep -Fv "is currently dropped or renamed" + ${CLICKHOUSE_CLIENT} --fsync_metadata=0 -q "CREATE TABLE IF NOT EXISTS ${CURR_DATABASE}.data3 Engine=MergeTree() ORDER BY number AS SELECT * FROM numbers(1);" 2>&1 | grep -F "Code: " | grep -Fv "is currently dropped or renamed" } -for _ in {1..50}; do +for _ in {1..25}; do create drop_table & drop_database & diff --git a/tests/queries/0_stateless/02028_system_data_skipping_indices_size.sql b/tests/queries/0_stateless/02028_system_data_skipping_indices_size.sql index e77f88aa36f..1efb9cff6a4 100644 --- a/tests/queries/0_stateless/02028_system_data_skipping_indices_size.sql +++ b/tests/queries/0_stateless/02028_system_data_skipping_indices_size.sql @@ -7,7 +7,7 @@ CREATE TABLE test_table INDEX value_index value TYPE minmax GRANULARITY 1 ) Engine=MergeTree() -ORDER BY key; +ORDER BY key SETTINGS compress_marks=false; INSERT INTO test_table VALUES (0, 'Value'); SELECT * FROM system.data_skipping_indices WHERE database = currentDatabase(); diff --git a/tests/queries/0_stateless/02117_show_create_table_system.reference b/tests/queries/0_stateless/02117_show_create_table_system.reference index 6432a6e6518..ce881422f63 100644 --- a/tests/queries/0_stateless/02117_show_create_table_system.reference +++ b/tests/queries/0_stateless/02117_show_create_table_system.reference @@ -286,7 +286,7 @@ CREATE TABLE system.grants ( `user_name` Nullable(String), `role_name` Nullable(String), - `access_type` Enum16('SHOW DATABASES' = 0, 'SHOW TABLES' = 1, 'SHOW COLUMNS' = 2, 'SHOW DICTIONARIES' = 3, 'SHOW' = 4, 'SHOW FILESYSTEM CACHES' = 5, 'SELECT' = 6, 'INSERT' = 7, 'ALTER UPDATE' = 8, 'ALTER DELETE' = 9, 'ALTER ADD COLUMN' = 10, 'ALTER MODIFY COLUMN' = 11, 'ALTER DROP COLUMN' = 12, 'ALTER COMMENT COLUMN' = 13, 'ALTER CLEAR COLUMN' = 14, 'ALTER RENAME COLUMN' = 15, 'ALTER MATERIALIZE COLUMN' = 16, 'ALTER COLUMN' = 17, 'ALTER MODIFY COMMENT' = 18, 'ALTER ORDER BY' = 19, 'ALTER SAMPLE BY' = 20, 'ALTER ADD INDEX' = 21, 'ALTER DROP INDEX' = 22, 'ALTER MATERIALIZE INDEX' = 23, 'ALTER CLEAR INDEX' = 24, 'ALTER INDEX' = 25, 'ALTER ADD PROJECTION' = 26, 'ALTER DROP PROJECTION' = 27, 'ALTER MATERIALIZE PROJECTION' = 28, 'ALTER CLEAR PROJECTION' = 29, 'ALTER PROJECTION' = 30, 'ALTER ADD CONSTRAINT' = 31, 'ALTER DROP CONSTRAINT' = 32, 'ALTER CONSTRAINT' = 33, 'ALTER TTL' = 34, 'ALTER MATERIALIZE TTL' = 35, 'ALTER SETTINGS' = 36, 'ALTER MOVE PARTITION' = 37, 'ALTER FETCH PARTITION' = 38, 'ALTER FREEZE PARTITION' = 39, 'ALTER DATABASE SETTINGS' = 40, 'ALTER TABLE' = 41, 'ALTER DATABASE' = 42, 'ALTER VIEW REFRESH' = 43, 'ALTER VIEW MODIFY QUERY' = 44, 'ALTER VIEW' = 45, 'ALTER' = 46, 'CREATE DATABASE' = 47, 'CREATE TABLE' = 48, 'CREATE VIEW' = 49, 'CREATE DICTIONARY' = 50, 'CREATE TEMPORARY TABLE' = 51, 'CREATE FUNCTION' = 52, 'CREATE' = 53, 'DROP DATABASE' = 54, 'DROP TABLE' = 55, 'DROP VIEW' = 56, 'DROP DICTIONARY' = 57, 'DROP FUNCTION' = 58, 'DROP' = 59, 'TRUNCATE' = 60, 'OPTIMIZE' = 61, 'BACKUP' = 62, 'KILL QUERY' = 63, 'KILL TRANSACTION' = 64, 'MOVE PARTITION BETWEEN SHARDS' = 65, 'CREATE USER' = 66, 'ALTER USER' = 67, 'DROP USER' = 68, 'CREATE ROLE' = 69, 'ALTER ROLE' = 70, 'DROP ROLE' = 71, 'ROLE ADMIN' = 72, 'CREATE ROW POLICY' = 73, 'ALTER ROW POLICY' = 74, 'DROP ROW POLICY' = 75, 'CREATE QUOTA' = 76, 'ALTER QUOTA' = 77, 'DROP QUOTA' = 78, 'CREATE SETTINGS PROFILE' = 79, 'ALTER SETTINGS PROFILE' = 80, 'DROP SETTINGS PROFILE' = 81, 'SHOW USERS' = 82, 'SHOW ROLES' = 83, 'SHOW ROW POLICIES' = 84, 'SHOW QUOTAS' = 85, 'SHOW SETTINGS PROFILES' = 86, 'SHOW NAMED COLLECTIONS' = 87, 'SHOW ACCESS' = 88, 'ACCESS MANAGEMENT' = 89, 'SYSTEM SHUTDOWN' = 90, 'SYSTEM DROP DNS CACHE' = 91, 'SYSTEM DROP MARK CACHE' = 92, 'SYSTEM DROP UNCOMPRESSED CACHE' = 93, 'SYSTEM DROP MMAP CACHE' = 94, 'SYSTEM DROP COMPILED EXPRESSION CACHE' = 95, 'SYSTEM DROP FILESYSTEM CACHE' = 96, 'SYSTEM DROP SCHEMA CACHE' = 97, 'SYSTEM DROP CACHE' = 98, 'SYSTEM RELOAD CONFIG' = 99, 'SYSTEM RELOAD USERS' = 100, 'SYSTEM RELOAD SYMBOLS' = 101, 'SYSTEM RELOAD DICTIONARY' = 102, 'SYSTEM RELOAD MODEL' = 103, 'SYSTEM RELOAD FUNCTION' = 104, 'SYSTEM RELOAD EMBEDDED DICTIONARIES' = 105, 'SYSTEM RELOAD' = 106, 'SYSTEM RESTART DISK' = 107, 'SYSTEM MERGES' = 108, 'SYSTEM TTL MERGES' = 109, 'SYSTEM FETCHES' = 110, 'SYSTEM MOVES' = 111, 'SYSTEM DISTRIBUTED SENDS' = 112, 'SYSTEM REPLICATED SENDS' = 113, 'SYSTEM SENDS' = 114, 'SYSTEM REPLICATION QUEUES' = 115, 'SYSTEM DROP REPLICA' = 116, 'SYSTEM SYNC REPLICA' = 117, 'SYSTEM RESTART REPLICA' = 118, 'SYSTEM RESTORE REPLICA' = 119, 'SYSTEM SYNC DATABASE REPLICA' = 120, 'SYSTEM SYNC TRANSACTION LOG' = 121, 'SYSTEM FLUSH DISTRIBUTED' = 122, 'SYSTEM FLUSH LOGS' = 123, 'SYSTEM FLUSH' = 124, 'SYSTEM THREAD FUZZER' = 125, 'SYSTEM UNFREEZE' = 126, 'SYSTEM' = 127, 'dictGet' = 128, 'addressToLine' = 129, 'addressToLineWithInlines' = 130, 'addressToSymbol' = 131, 'demangle' = 132, 'INTROSPECTION' = 133, 'FILE' = 134, 'URL' = 135, 'REMOTE' = 136, 'MONGO' = 137, 'MEILISEARCH' = 138, 'MYSQL' = 139, 'POSTGRES' = 140, 'SQLITE' = 141, 'ODBC' = 142, 'JDBC' = 143, 'HDFS' = 144, 'S3' = 145, 'HIVE' = 146, 'SOURCES' = 147, 'CLUSTER' = 148, 'ALL' = 149, 'NONE' = 150), + `access_type` Enum16('SHOW DATABASES' = 0, 'SHOW TABLES' = 1, 'SHOW COLUMNS' = 2, 'SHOW DICTIONARIES' = 3, 'SHOW' = 4, 'SHOW FILESYSTEM CACHES' = 5, 'SELECT' = 6, 'INSERT' = 7, 'ALTER UPDATE' = 8, 'ALTER DELETE' = 9, 'ALTER ADD COLUMN' = 10, 'ALTER MODIFY COLUMN' = 11, 'ALTER DROP COLUMN' = 12, 'ALTER COMMENT COLUMN' = 13, 'ALTER CLEAR COLUMN' = 14, 'ALTER RENAME COLUMN' = 15, 'ALTER MATERIALIZE COLUMN' = 16, 'ALTER COLUMN' = 17, 'ALTER MODIFY COMMENT' = 18, 'ALTER ORDER BY' = 19, 'ALTER SAMPLE BY' = 20, 'ALTER ADD INDEX' = 21, 'ALTER DROP INDEX' = 22, 'ALTER MATERIALIZE INDEX' = 23, 'ALTER CLEAR INDEX' = 24, 'ALTER INDEX' = 25, 'ALTER ADD PROJECTION' = 26, 'ALTER DROP PROJECTION' = 27, 'ALTER MATERIALIZE PROJECTION' = 28, 'ALTER CLEAR PROJECTION' = 29, 'ALTER PROJECTION' = 30, 'ALTER ADD CONSTRAINT' = 31, 'ALTER DROP CONSTRAINT' = 32, 'ALTER CONSTRAINT' = 33, 'ALTER TTL' = 34, 'ALTER MATERIALIZE TTL' = 35, 'ALTER SETTINGS' = 36, 'ALTER MOVE PARTITION' = 37, 'ALTER FETCH PARTITION' = 38, 'ALTER FREEZE PARTITION' = 39, 'ALTER DATABASE SETTINGS' = 40, 'ALTER TABLE' = 41, 'ALTER DATABASE' = 42, 'ALTER VIEW REFRESH' = 43, 'ALTER VIEW MODIFY QUERY' = 44, 'ALTER VIEW' = 45, 'ALTER' = 46, 'CREATE DATABASE' = 47, 'CREATE TABLE' = 48, 'CREATE VIEW' = 49, 'CREATE DICTIONARY' = 50, 'CREATE TEMPORARY TABLE' = 51, 'CREATE FUNCTION' = 52, 'CREATE' = 53, 'DROP DATABASE' = 54, 'DROP TABLE' = 55, 'DROP VIEW' = 56, 'DROP DICTIONARY' = 57, 'DROP FUNCTION' = 58, 'DROP' = 59, 'TRUNCATE' = 60, 'OPTIMIZE' = 61, 'BACKUP' = 62, 'KILL QUERY' = 63, 'KILL TRANSACTION' = 64, 'MOVE PARTITION BETWEEN SHARDS' = 65, 'CREATE USER' = 66, 'ALTER USER' = 67, 'DROP USER' = 68, 'CREATE ROLE' = 69, 'ALTER ROLE' = 70, 'DROP ROLE' = 71, 'ROLE ADMIN' = 72, 'CREATE ROW POLICY' = 73, 'ALTER ROW POLICY' = 74, 'DROP ROW POLICY' = 75, 'CREATE QUOTA' = 76, 'ALTER QUOTA' = 77, 'DROP QUOTA' = 78, 'CREATE SETTINGS PROFILE' = 79, 'ALTER SETTINGS PROFILE' = 80, 'DROP SETTINGS PROFILE' = 81, 'SHOW USERS' = 82, 'SHOW ROLES' = 83, 'SHOW ROW POLICIES' = 84, 'SHOW QUOTAS' = 85, 'SHOW SETTINGS PROFILES' = 86, 'SHOW ACCESS' = 87, 'SHOW NAMED COLLECTIONS' = 88, 'ACCESS MANAGEMENT' = 89, 'SYSTEM SHUTDOWN' = 90, 'SYSTEM DROP DNS CACHE' = 91, 'SYSTEM DROP MARK CACHE' = 92, 'SYSTEM DROP UNCOMPRESSED CACHE' = 93, 'SYSTEM DROP MMAP CACHE' = 94, 'SYSTEM DROP COMPILED EXPRESSION CACHE' = 95, 'SYSTEM DROP FILESYSTEM CACHE' = 96, 'SYSTEM DROP SCHEMA CACHE' = 97, 'SYSTEM DROP CACHE' = 98, 'SYSTEM RELOAD CONFIG' = 99, 'SYSTEM RELOAD USERS' = 100, 'SYSTEM RELOAD SYMBOLS' = 101, 'SYSTEM RELOAD DICTIONARY' = 102, 'SYSTEM RELOAD MODEL' = 103, 'SYSTEM RELOAD FUNCTION' = 104, 'SYSTEM RELOAD EMBEDDED DICTIONARIES' = 105, 'SYSTEM RELOAD' = 106, 'SYSTEM RESTART DISK' = 107, 'SYSTEM MERGES' = 108, 'SYSTEM TTL MERGES' = 109, 'SYSTEM FETCHES' = 110, 'SYSTEM MOVES' = 111, 'SYSTEM DISTRIBUTED SENDS' = 112, 'SYSTEM REPLICATED SENDS' = 113, 'SYSTEM SENDS' = 114, 'SYSTEM REPLICATION QUEUES' = 115, 'SYSTEM DROP REPLICA' = 116, 'SYSTEM SYNC REPLICA' = 117, 'SYSTEM RESTART REPLICA' = 118, 'SYSTEM RESTORE REPLICA' = 119, 'SYSTEM SYNC DATABASE REPLICA' = 120, 'SYSTEM SYNC TRANSACTION LOG' = 121, 'SYSTEM FLUSH DISTRIBUTED' = 122, 'SYSTEM FLUSH LOGS' = 123, 'SYSTEM FLUSH' = 124, 'SYSTEM THREAD FUZZER' = 125, 'SYSTEM UNFREEZE' = 126, 'SYSTEM' = 127, 'dictGet' = 128, 'addressToLine' = 129, 'addressToLineWithInlines' = 130, 'addressToSymbol' = 131, 'demangle' = 132, 'INTROSPECTION' = 133, 'FILE' = 134, 'URL' = 135, 'REMOTE' = 136, 'MONGO' = 137, 'MEILISEARCH' = 138, 'MYSQL' = 139, 'POSTGRES' = 140, 'SQLITE' = 141, 'ODBC' = 142, 'JDBC' = 143, 'HDFS' = 144, 'S3' = 145, 'HIVE' = 146, 'SOURCES' = 147, 'CLUSTER' = 148, 'ALL' = 149, 'NONE' = 150), `database` Nullable(String), `table` Nullable(String), `column` Nullable(String), @@ -371,6 +371,19 @@ CREATE TABLE system.metrics ) ENGINE = SystemMetrics COMMENT 'SYSTEM TABLE is built on the fly.' +CREATE TABLE system.moves +( + `database` String, + `table` String, + `elapsed` Float64, + `target_disk_name` String, + `target_disk_path` String, + `part_name` String, + `part_size` UInt64, + `thread_id` UInt64 +) +ENGINE = SystemMoves +COMMENT 'SYSTEM TABLE is built on the fly.' CREATE TABLE system.mutations ( `database` String, @@ -549,10 +562,10 @@ ENGINE = SystemPartsColumns COMMENT 'SYSTEM TABLE is built on the fly.' CREATE TABLE system.privileges ( - `privilege` Enum16('SHOW DATABASES' = 0, 'SHOW TABLES' = 1, 'SHOW COLUMNS' = 2, 'SHOW DICTIONARIES' = 3, 'SHOW' = 4, 'SHOW FILESYSTEM CACHES' = 5, 'SELECT' = 6, 'INSERT' = 7, 'ALTER UPDATE' = 8, 'ALTER DELETE' = 9, 'ALTER ADD COLUMN' = 10, 'ALTER MODIFY COLUMN' = 11, 'ALTER DROP COLUMN' = 12, 'ALTER COMMENT COLUMN' = 13, 'ALTER CLEAR COLUMN' = 14, 'ALTER RENAME COLUMN' = 15, 'ALTER MATERIALIZE COLUMN' = 16, 'ALTER COLUMN' = 17, 'ALTER MODIFY COMMENT' = 18, 'ALTER ORDER BY' = 19, 'ALTER SAMPLE BY' = 20, 'ALTER ADD INDEX' = 21, 'ALTER DROP INDEX' = 22, 'ALTER MATERIALIZE INDEX' = 23, 'ALTER CLEAR INDEX' = 24, 'ALTER INDEX' = 25, 'ALTER ADD PROJECTION' = 26, 'ALTER DROP PROJECTION' = 27, 'ALTER MATERIALIZE PROJECTION' = 28, 'ALTER CLEAR PROJECTION' = 29, 'ALTER PROJECTION' = 30, 'ALTER ADD CONSTRAINT' = 31, 'ALTER DROP CONSTRAINT' = 32, 'ALTER CONSTRAINT' = 33, 'ALTER TTL' = 34, 'ALTER MATERIALIZE TTL' = 35, 'ALTER SETTINGS' = 36, 'ALTER MOVE PARTITION' = 37, 'ALTER FETCH PARTITION' = 38, 'ALTER FREEZE PARTITION' = 39, 'ALTER DATABASE SETTINGS' = 40, 'ALTER TABLE' = 41, 'ALTER DATABASE' = 42, 'ALTER VIEW REFRESH' = 43, 'ALTER VIEW MODIFY QUERY' = 44, 'ALTER VIEW' = 45, 'ALTER' = 46, 'CREATE DATABASE' = 47, 'CREATE TABLE' = 48, 'CREATE VIEW' = 49, 'CREATE DICTIONARY' = 50, 'CREATE TEMPORARY TABLE' = 51, 'CREATE FUNCTION' = 52, 'CREATE' = 53, 'DROP DATABASE' = 54, 'DROP TABLE' = 55, 'DROP VIEW' = 56, 'DROP DICTIONARY' = 57, 'DROP FUNCTION' = 58, 'DROP' = 59, 'TRUNCATE' = 60, 'OPTIMIZE' = 61, 'BACKUP' = 62, 'KILL QUERY' = 63, 'KILL TRANSACTION' = 64, 'MOVE PARTITION BETWEEN SHARDS' = 65, 'CREATE USER' = 66, 'ALTER USER' = 67, 'DROP USER' = 68, 'CREATE ROLE' = 69, 'ALTER ROLE' = 70, 'DROP ROLE' = 71, 'ROLE ADMIN' = 72, 'CREATE ROW POLICY' = 73, 'ALTER ROW POLICY' = 74, 'DROP ROW POLICY' = 75, 'CREATE QUOTA' = 76, 'ALTER QUOTA' = 77, 'DROP QUOTA' = 78, 'CREATE SETTINGS PROFILE' = 79, 'ALTER SETTINGS PROFILE' = 80, 'DROP SETTINGS PROFILE' = 81, 'SHOW USERS' = 82, 'SHOW ROLES' = 83, 'SHOW ROW POLICIES' = 84, 'SHOW QUOTAS' = 85, 'SHOW SETTINGS PROFILES' = 86, 'SHOW NAMED COLLECTIONS' = 87, 'SHOW ACCESS' = 88, 'ACCESS MANAGEMENT' = 89, 'SYSTEM SHUTDOWN' = 90, 'SYSTEM DROP DNS CACHE' = 91, 'SYSTEM DROP MARK CACHE' = 92, 'SYSTEM DROP UNCOMPRESSED CACHE' = 93, 'SYSTEM DROP MMAP CACHE' = 94, 'SYSTEM DROP COMPILED EXPRESSION CACHE' = 95, 'SYSTEM DROP FILESYSTEM CACHE' = 96, 'SYSTEM DROP SCHEMA CACHE' = 97, 'SYSTEM DROP CACHE' = 98, 'SYSTEM RELOAD CONFIG' = 99, 'SYSTEM RELOAD USERS' = 100, 'SYSTEM RELOAD SYMBOLS' = 101, 'SYSTEM RELOAD DICTIONARY' = 102, 'SYSTEM RELOAD MODEL' = 103, 'SYSTEM RELOAD FUNCTION' = 104, 'SYSTEM RELOAD EMBEDDED DICTIONARIES' = 105, 'SYSTEM RELOAD' = 106, 'SYSTEM RESTART DISK' = 107, 'SYSTEM MERGES' = 108, 'SYSTEM TTL MERGES' = 109, 'SYSTEM FETCHES' = 110, 'SYSTEM MOVES' = 111, 'SYSTEM DISTRIBUTED SENDS' = 112, 'SYSTEM REPLICATED SENDS' = 113, 'SYSTEM SENDS' = 114, 'SYSTEM REPLICATION QUEUES' = 115, 'SYSTEM DROP REPLICA' = 116, 'SYSTEM SYNC REPLICA' = 117, 'SYSTEM RESTART REPLICA' = 118, 'SYSTEM RESTORE REPLICA' = 119, 'SYSTEM SYNC DATABASE REPLICA' = 120, 'SYSTEM SYNC TRANSACTION LOG' = 121, 'SYSTEM FLUSH DISTRIBUTED' = 122, 'SYSTEM FLUSH LOGS' = 123, 'SYSTEM FLUSH' = 124, 'SYSTEM THREAD FUZZER' = 125, 'SYSTEM UNFREEZE' = 126, 'SYSTEM' = 127, 'dictGet' = 128, 'addressToLine' = 129, 'addressToLineWithInlines' = 130, 'addressToSymbol' = 131, 'demangle' = 132, 'INTROSPECTION' = 133, 'FILE' = 134, 'URL' = 135, 'REMOTE' = 136, 'MONGO' = 137, 'MEILISEARCH' = 138, 'MYSQL' = 139, 'POSTGRES' = 140, 'SQLITE' = 141, 'ODBC' = 142, 'JDBC' = 143, 'HDFS' = 144, 'S3' = 145, 'HIVE' = 146, 'SOURCES' = 147, 'CLUSTER' = 148, 'ALL' = 149, 'NONE' = 150), + `privilege` Enum16('SHOW DATABASES' = 0, 'SHOW TABLES' = 1, 'SHOW COLUMNS' = 2, 'SHOW DICTIONARIES' = 3, 'SHOW' = 4, 'SHOW FILESYSTEM CACHES' = 5, 'SELECT' = 6, 'INSERT' = 7, 'ALTER UPDATE' = 8, 'ALTER DELETE' = 9, 'ALTER ADD COLUMN' = 10, 'ALTER MODIFY COLUMN' = 11, 'ALTER DROP COLUMN' = 12, 'ALTER COMMENT COLUMN' = 13, 'ALTER CLEAR COLUMN' = 14, 'ALTER RENAME COLUMN' = 15, 'ALTER MATERIALIZE COLUMN' = 16, 'ALTER COLUMN' = 17, 'ALTER MODIFY COMMENT' = 18, 'ALTER ORDER BY' = 19, 'ALTER SAMPLE BY' = 20, 'ALTER ADD INDEX' = 21, 'ALTER DROP INDEX' = 22, 'ALTER MATERIALIZE INDEX' = 23, 'ALTER CLEAR INDEX' = 24, 'ALTER INDEX' = 25, 'ALTER ADD PROJECTION' = 26, 'ALTER DROP PROJECTION' = 27, 'ALTER MATERIALIZE PROJECTION' = 28, 'ALTER CLEAR PROJECTION' = 29, 'ALTER PROJECTION' = 30, 'ALTER ADD CONSTRAINT' = 31, 'ALTER DROP CONSTRAINT' = 32, 'ALTER CONSTRAINT' = 33, 'ALTER TTL' = 34, 'ALTER MATERIALIZE TTL' = 35, 'ALTER SETTINGS' = 36, 'ALTER MOVE PARTITION' = 37, 'ALTER FETCH PARTITION' = 38, 'ALTER FREEZE PARTITION' = 39, 'ALTER DATABASE SETTINGS' = 40, 'ALTER TABLE' = 41, 'ALTER DATABASE' = 42, 'ALTER VIEW REFRESH' = 43, 'ALTER VIEW MODIFY QUERY' = 44, 'ALTER VIEW' = 45, 'ALTER' = 46, 'CREATE DATABASE' = 47, 'CREATE TABLE' = 48, 'CREATE VIEW' = 49, 'CREATE DICTIONARY' = 50, 'CREATE TEMPORARY TABLE' = 51, 'CREATE FUNCTION' = 52, 'CREATE' = 53, 'DROP DATABASE' = 54, 'DROP TABLE' = 55, 'DROP VIEW' = 56, 'DROP DICTIONARY' = 57, 'DROP FUNCTION' = 58, 'DROP' = 59, 'TRUNCATE' = 60, 'OPTIMIZE' = 61, 'BACKUP' = 62, 'KILL QUERY' = 63, 'KILL TRANSACTION' = 64, 'MOVE PARTITION BETWEEN SHARDS' = 65, 'CREATE USER' = 66, 'ALTER USER' = 67, 'DROP USER' = 68, 'CREATE ROLE' = 69, 'ALTER ROLE' = 70, 'DROP ROLE' = 71, 'ROLE ADMIN' = 72, 'CREATE ROW POLICY' = 73, 'ALTER ROW POLICY' = 74, 'DROP ROW POLICY' = 75, 'CREATE QUOTA' = 76, 'ALTER QUOTA' = 77, 'DROP QUOTA' = 78, 'CREATE SETTINGS PROFILE' = 79, 'ALTER SETTINGS PROFILE' = 80, 'DROP SETTINGS PROFILE' = 81, 'SHOW USERS' = 82, 'SHOW ROLES' = 83, 'SHOW ROW POLICIES' = 84, 'SHOW QUOTAS' = 85, 'SHOW SETTINGS PROFILES' = 86, 'SHOW ACCESS' = 87, 'SHOW NAMED COLLECTIONS' = 88, 'ACCESS MANAGEMENT' = 89, 'SYSTEM SHUTDOWN' = 90, 'SYSTEM DROP DNS CACHE' = 91, 'SYSTEM DROP MARK CACHE' = 92, 'SYSTEM DROP UNCOMPRESSED CACHE' = 93, 'SYSTEM DROP MMAP CACHE' = 94, 'SYSTEM DROP COMPILED EXPRESSION CACHE' = 95, 'SYSTEM DROP FILESYSTEM CACHE' = 96, 'SYSTEM DROP SCHEMA CACHE' = 97, 'SYSTEM DROP CACHE' = 98, 'SYSTEM RELOAD CONFIG' = 99, 'SYSTEM RELOAD USERS' = 100, 'SYSTEM RELOAD SYMBOLS' = 101, 'SYSTEM RELOAD DICTIONARY' = 102, 'SYSTEM RELOAD MODEL' = 103, 'SYSTEM RELOAD FUNCTION' = 104, 'SYSTEM RELOAD EMBEDDED DICTIONARIES' = 105, 'SYSTEM RELOAD' = 106, 'SYSTEM RESTART DISK' = 107, 'SYSTEM MERGES' = 108, 'SYSTEM TTL MERGES' = 109, 'SYSTEM FETCHES' = 110, 'SYSTEM MOVES' = 111, 'SYSTEM DISTRIBUTED SENDS' = 112, 'SYSTEM REPLICATED SENDS' = 113, 'SYSTEM SENDS' = 114, 'SYSTEM REPLICATION QUEUES' = 115, 'SYSTEM DROP REPLICA' = 116, 'SYSTEM SYNC REPLICA' = 117, 'SYSTEM RESTART REPLICA' = 118, 'SYSTEM RESTORE REPLICA' = 119, 'SYSTEM SYNC DATABASE REPLICA' = 120, 'SYSTEM SYNC TRANSACTION LOG' = 121, 'SYSTEM FLUSH DISTRIBUTED' = 122, 'SYSTEM FLUSH LOGS' = 123, 'SYSTEM FLUSH' = 124, 'SYSTEM THREAD FUZZER' = 125, 'SYSTEM UNFREEZE' = 126, 'SYSTEM' = 127, 'dictGet' = 128, 'addressToLine' = 129, 'addressToLineWithInlines' = 130, 'addressToSymbol' = 131, 'demangle' = 132, 'INTROSPECTION' = 133, 'FILE' = 134, 'URL' = 135, 'REMOTE' = 136, 'MONGO' = 137, 'MEILISEARCH' = 138, 'MYSQL' = 139, 'POSTGRES' = 140, 'SQLITE' = 141, 'ODBC' = 142, 'JDBC' = 143, 'HDFS' = 144, 'S3' = 145, 'HIVE' = 146, 'SOURCES' = 147, 'CLUSTER' = 148, 'ALL' = 149, 'NONE' = 150), `aliases` Array(String), `level` Nullable(Enum8('GLOBAL' = 0, 'DATABASE' = 1, 'TABLE' = 2, 'DICTIONARY' = 3, 'VIEW' = 4, 'COLUMN' = 5)), - `parent_group` Nullable(Enum16('SHOW DATABASES' = 0, 'SHOW TABLES' = 1, 'SHOW COLUMNS' = 2, 'SHOW DICTIONARIES' = 3, 'SHOW' = 4, 'SHOW FILESYSTEM CACHES' = 5, 'SELECT' = 6, 'INSERT' = 7, 'ALTER UPDATE' = 8, 'ALTER DELETE' = 9, 'ALTER ADD COLUMN' = 10, 'ALTER MODIFY COLUMN' = 11, 'ALTER DROP COLUMN' = 12, 'ALTER COMMENT COLUMN' = 13, 'ALTER CLEAR COLUMN' = 14, 'ALTER RENAME COLUMN' = 15, 'ALTER MATERIALIZE COLUMN' = 16, 'ALTER COLUMN' = 17, 'ALTER MODIFY COMMENT' = 18, 'ALTER ORDER BY' = 19, 'ALTER SAMPLE BY' = 20, 'ALTER ADD INDEX' = 21, 'ALTER DROP INDEX' = 22, 'ALTER MATERIALIZE INDEX' = 23, 'ALTER CLEAR INDEX' = 24, 'ALTER INDEX' = 25, 'ALTER ADD PROJECTION' = 26, 'ALTER DROP PROJECTION' = 27, 'ALTER MATERIALIZE PROJECTION' = 28, 'ALTER CLEAR PROJECTION' = 29, 'ALTER PROJECTION' = 30, 'ALTER ADD CONSTRAINT' = 31, 'ALTER DROP CONSTRAINT' = 32, 'ALTER CONSTRAINT' = 33, 'ALTER TTL' = 34, 'ALTER MATERIALIZE TTL' = 35, 'ALTER SETTINGS' = 36, 'ALTER MOVE PARTITION' = 37, 'ALTER FETCH PARTITION' = 38, 'ALTER FREEZE PARTITION' = 39, 'ALTER DATABASE SETTINGS' = 40, 'ALTER TABLE' = 41, 'ALTER DATABASE' = 42, 'ALTER VIEW REFRESH' = 43, 'ALTER VIEW MODIFY QUERY' = 44, 'ALTER VIEW' = 45, 'ALTER' = 46, 'CREATE DATABASE' = 47, 'CREATE TABLE' = 48, 'CREATE VIEW' = 49, 'CREATE DICTIONARY' = 50, 'CREATE TEMPORARY TABLE' = 51, 'CREATE FUNCTION' = 52, 'CREATE' = 53, 'DROP DATABASE' = 54, 'DROP TABLE' = 55, 'DROP VIEW' = 56, 'DROP DICTIONARY' = 57, 'DROP FUNCTION' = 58, 'DROP' = 59, 'TRUNCATE' = 60, 'OPTIMIZE' = 61, 'BACKUP' = 62, 'KILL QUERY' = 63, 'KILL TRANSACTION' = 64, 'MOVE PARTITION BETWEEN SHARDS' = 65, 'CREATE USER' = 66, 'ALTER USER' = 67, 'DROP USER' = 68, 'CREATE ROLE' = 69, 'ALTER ROLE' = 70, 'DROP ROLE' = 71, 'ROLE ADMIN' = 72, 'CREATE ROW POLICY' = 73, 'ALTER ROW POLICY' = 74, 'DROP ROW POLICY' = 75, 'CREATE QUOTA' = 76, 'ALTER QUOTA' = 77, 'DROP QUOTA' = 78, 'CREATE SETTINGS PROFILE' = 79, 'ALTER SETTINGS PROFILE' = 80, 'DROP SETTINGS PROFILE' = 81, 'SHOW USERS' = 82, 'SHOW ROLES' = 83, 'SHOW ROW POLICIES' = 84, 'SHOW QUOTAS' = 85, 'SHOW SETTINGS PROFILES' = 86, 'SHOW NAMED COLLECTIONS' = 87, 'SHOW ACCESS' = 88, 'ACCESS MANAGEMENT' = 89, 'SYSTEM SHUTDOWN' = 90, 'SYSTEM DROP DNS CACHE' = 91, 'SYSTEM DROP MARK CACHE' = 92, 'SYSTEM DROP UNCOMPRESSED CACHE' = 93, 'SYSTEM DROP MMAP CACHE' = 94, 'SYSTEM DROP COMPILED EXPRESSION CACHE' = 95, 'SYSTEM DROP FILESYSTEM CACHE' = 96, 'SYSTEM DROP SCHEMA CACHE' = 97, 'SYSTEM DROP CACHE' = 98, 'SYSTEM RELOAD CONFIG' = 99, 'SYSTEM RELOAD USERS' = 100, 'SYSTEM RELOAD SYMBOLS' = 101, 'SYSTEM RELOAD DICTIONARY' = 102, 'SYSTEM RELOAD MODEL' = 103, 'SYSTEM RELOAD FUNCTION' = 104, 'SYSTEM RELOAD EMBEDDED DICTIONARIES' = 105, 'SYSTEM RELOAD' = 106, 'SYSTEM RESTART DISK' = 107, 'SYSTEM MERGES' = 108, 'SYSTEM TTL MERGES' = 109, 'SYSTEM FETCHES' = 110, 'SYSTEM MOVES' = 111, 'SYSTEM DISTRIBUTED SENDS' = 112, 'SYSTEM REPLICATED SENDS' = 113, 'SYSTEM SENDS' = 114, 'SYSTEM REPLICATION QUEUES' = 115, 'SYSTEM DROP REPLICA' = 116, 'SYSTEM SYNC REPLICA' = 117, 'SYSTEM RESTART REPLICA' = 118, 'SYSTEM RESTORE REPLICA' = 119, 'SYSTEM SYNC DATABASE REPLICA' = 120, 'SYSTEM SYNC TRANSACTION LOG' = 121, 'SYSTEM FLUSH DISTRIBUTED' = 122, 'SYSTEM FLUSH LOGS' = 123, 'SYSTEM FLUSH' = 124, 'SYSTEM THREAD FUZZER' = 125, 'SYSTEM UNFREEZE' = 126, 'SYSTEM' = 127, 'dictGet' = 128, 'addressToLine' = 129, 'addressToLineWithInlines' = 130, 'addressToSymbol' = 131, 'demangle' = 132, 'INTROSPECTION' = 133, 'FILE' = 134, 'URL' = 135, 'REMOTE' = 136, 'MONGO' = 137, 'MEILISEARCH' = 138, 'MYSQL' = 139, 'POSTGRES' = 140, 'SQLITE' = 141, 'ODBC' = 142, 'JDBC' = 143, 'HDFS' = 144, 'S3' = 145, 'HIVE' = 146, 'SOURCES' = 147, 'CLUSTER' = 148, 'ALL' = 149, 'NONE' = 150)) + `parent_group` Nullable(Enum16('SHOW DATABASES' = 0, 'SHOW TABLES' = 1, 'SHOW COLUMNS' = 2, 'SHOW DICTIONARIES' = 3, 'SHOW' = 4, 'SHOW FILESYSTEM CACHES' = 5, 'SELECT' = 6, 'INSERT' = 7, 'ALTER UPDATE' = 8, 'ALTER DELETE' = 9, 'ALTER ADD COLUMN' = 10, 'ALTER MODIFY COLUMN' = 11, 'ALTER DROP COLUMN' = 12, 'ALTER COMMENT COLUMN' = 13, 'ALTER CLEAR COLUMN' = 14, 'ALTER RENAME COLUMN' = 15, 'ALTER MATERIALIZE COLUMN' = 16, 'ALTER COLUMN' = 17, 'ALTER MODIFY COMMENT' = 18, 'ALTER ORDER BY' = 19, 'ALTER SAMPLE BY' = 20, 'ALTER ADD INDEX' = 21, 'ALTER DROP INDEX' = 22, 'ALTER MATERIALIZE INDEX' = 23, 'ALTER CLEAR INDEX' = 24, 'ALTER INDEX' = 25, 'ALTER ADD PROJECTION' = 26, 'ALTER DROP PROJECTION' = 27, 'ALTER MATERIALIZE PROJECTION' = 28, 'ALTER CLEAR PROJECTION' = 29, 'ALTER PROJECTION' = 30, 'ALTER ADD CONSTRAINT' = 31, 'ALTER DROP CONSTRAINT' = 32, 'ALTER CONSTRAINT' = 33, 'ALTER TTL' = 34, 'ALTER MATERIALIZE TTL' = 35, 'ALTER SETTINGS' = 36, 'ALTER MOVE PARTITION' = 37, 'ALTER FETCH PARTITION' = 38, 'ALTER FREEZE PARTITION' = 39, 'ALTER DATABASE SETTINGS' = 40, 'ALTER TABLE' = 41, 'ALTER DATABASE' = 42, 'ALTER VIEW REFRESH' = 43, 'ALTER VIEW MODIFY QUERY' = 44, 'ALTER VIEW' = 45, 'ALTER' = 46, 'CREATE DATABASE' = 47, 'CREATE TABLE' = 48, 'CREATE VIEW' = 49, 'CREATE DICTIONARY' = 50, 'CREATE TEMPORARY TABLE' = 51, 'CREATE FUNCTION' = 52, 'CREATE' = 53, 'DROP DATABASE' = 54, 'DROP TABLE' = 55, 'DROP VIEW' = 56, 'DROP DICTIONARY' = 57, 'DROP FUNCTION' = 58, 'DROP' = 59, 'TRUNCATE' = 60, 'OPTIMIZE' = 61, 'BACKUP' = 62, 'KILL QUERY' = 63, 'KILL TRANSACTION' = 64, 'MOVE PARTITION BETWEEN SHARDS' = 65, 'CREATE USER' = 66, 'ALTER USER' = 67, 'DROP USER' = 68, 'CREATE ROLE' = 69, 'ALTER ROLE' = 70, 'DROP ROLE' = 71, 'ROLE ADMIN' = 72, 'CREATE ROW POLICY' = 73, 'ALTER ROW POLICY' = 74, 'DROP ROW POLICY' = 75, 'CREATE QUOTA' = 76, 'ALTER QUOTA' = 77, 'DROP QUOTA' = 78, 'CREATE SETTINGS PROFILE' = 79, 'ALTER SETTINGS PROFILE' = 80, 'DROP SETTINGS PROFILE' = 81, 'SHOW USERS' = 82, 'SHOW ROLES' = 83, 'SHOW ROW POLICIES' = 84, 'SHOW QUOTAS' = 85, 'SHOW SETTINGS PROFILES' = 86, 'SHOW ACCESS' = 87, 'SHOW NAMED COLLECTIONS' = 88, 'ACCESS MANAGEMENT' = 89, 'SYSTEM SHUTDOWN' = 90, 'SYSTEM DROP DNS CACHE' = 91, 'SYSTEM DROP MARK CACHE' = 92, 'SYSTEM DROP UNCOMPRESSED CACHE' = 93, 'SYSTEM DROP MMAP CACHE' = 94, 'SYSTEM DROP COMPILED EXPRESSION CACHE' = 95, 'SYSTEM DROP FILESYSTEM CACHE' = 96, 'SYSTEM DROP SCHEMA CACHE' = 97, 'SYSTEM DROP CACHE' = 98, 'SYSTEM RELOAD CONFIG' = 99, 'SYSTEM RELOAD USERS' = 100, 'SYSTEM RELOAD SYMBOLS' = 101, 'SYSTEM RELOAD DICTIONARY' = 102, 'SYSTEM RELOAD MODEL' = 103, 'SYSTEM RELOAD FUNCTION' = 104, 'SYSTEM RELOAD EMBEDDED DICTIONARIES' = 105, 'SYSTEM RELOAD' = 106, 'SYSTEM RESTART DISK' = 107, 'SYSTEM MERGES' = 108, 'SYSTEM TTL MERGES' = 109, 'SYSTEM FETCHES' = 110, 'SYSTEM MOVES' = 111, 'SYSTEM DISTRIBUTED SENDS' = 112, 'SYSTEM REPLICATED SENDS' = 113, 'SYSTEM SENDS' = 114, 'SYSTEM REPLICATION QUEUES' = 115, 'SYSTEM DROP REPLICA' = 116, 'SYSTEM SYNC REPLICA' = 117, 'SYSTEM RESTART REPLICA' = 118, 'SYSTEM RESTORE REPLICA' = 119, 'SYSTEM SYNC DATABASE REPLICA' = 120, 'SYSTEM SYNC TRANSACTION LOG' = 121, 'SYSTEM FLUSH DISTRIBUTED' = 122, 'SYSTEM FLUSH LOGS' = 123, 'SYSTEM FLUSH' = 124, 'SYSTEM THREAD FUZZER' = 125, 'SYSTEM UNFREEZE' = 126, 'SYSTEM' = 127, 'dictGet' = 128, 'addressToLine' = 129, 'addressToLineWithInlines' = 130, 'addressToSymbol' = 131, 'demangle' = 132, 'INTROSPECTION' = 133, 'FILE' = 134, 'URL' = 135, 'REMOTE' = 136, 'MONGO' = 137, 'MEILISEARCH' = 138, 'MYSQL' = 139, 'POSTGRES' = 140, 'SQLITE' = 141, 'ODBC' = 142, 'JDBC' = 143, 'HDFS' = 144, 'S3' = 145, 'HIVE' = 146, 'SOURCES' = 147, 'CLUSTER' = 148, 'ALL' = 149, 'NONE' = 150)) ) ENGINE = SystemPrivileges COMMENT 'SYSTEM TABLE is built on the fly.' diff --git a/tests/queries/0_stateless/02117_show_create_table_system.sql b/tests/queries/0_stateless/02117_show_create_table_system.sql index 8b75ed60eec..37bf2667069 100644 --- a/tests/queries/0_stateless/02117_show_create_table_system.sql +++ b/tests/queries/0_stateless/02117_show_create_table_system.sql @@ -45,6 +45,7 @@ show create table macros format TSVRaw; show create table merge_tree_settings format TSVRaw; show create table merges format TSVRaw; show create table metrics format TSVRaw; +show create table moves format TSVRaw; show create table mutations format TSVRaw; show create table numbers format TSVRaw; show create table numbers_mt format TSVRaw; diff --git a/tests/queries/0_stateless/02124_buffer_insert_select_race.reference b/tests/queries/0_stateless/02124_buffer_insert_select_race.reference new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/queries/0_stateless/02124_buffer_insert_select_race.sh b/tests/queries/0_stateless/02124_buffer_insert_select_race.sh new file mode 100755 index 00000000000..22965a274c0 --- /dev/null +++ b/tests/queries/0_stateless/02124_buffer_insert_select_race.sh @@ -0,0 +1,27 @@ +#!/usr/bin/env bash +# Tags: no-fasttest + +# Regression test for 'Logical error: No column to rollback' in case of +# exception while commiting batch into the Buffer, see [1]. +# +# [1]: https://github.com/ClickHouse/ClickHouse/issues/42740 + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + +$CLICKHOUSE_CLIENT -q "DROP TABLE IF EXISTS t_buffer_string" +$CLICKHOUSE_CLIENT -q "CREATE TABLE t_buffer_string(key String) ENGINE = Buffer('', '', 1, 1, 1, 1000000000000, 1000000000000, 1000000000000, 1000000000000)" + +# --continue_on_errors -- to ignore possible MEMORY_LIMIT_EXCEEDED errors +# --concurrency -- we need have SELECT and INSERT in parallel to have refcount +# of the column in the Buffer block > 1, that way we will do +# full clone and moving a column may throw. +# +# It reproduces the problem 100% with MemoryTrackerFaultInjectorInThread in the appendBlock() +$CLICKHOUSE_BENCHMARK --randomize --timelimit 10 --continue_on_errors --concurrency 10 >& /dev/null < /dev/null > /dev/null - done -} - -TIMEOUT=10 - -export -f insert1 -export -f select1 - -timeout $TIMEOUT bash -c insert1 & -timeout $TIMEOUT bash -c select1 & - -wait +# --continue_on_errors -- to ignore possible MEMORY_LIMIT_EXCEEDED errors +$CLICKHOUSE_BENCHMARK --randomize --timelimit 10 --continue_on_errors --concurrency 10 >& /dev/null < 0, filesystemFree() <= filesystemCapacity()" +$CLICKHOUSE_LOCAL --query "SELECT filesystemAvailable() > 0, filesystemUnreserved() <= filesystemCapacity()" diff --git a/tests/queries/0_stateless/02366_explain_query_tree.sql b/tests/queries/0_stateless/02366_explain_query_tree.sql index c942f0cac37..c38b2d819d1 100644 --- a/tests/queries/0_stateless/02366_explain_query_tree.sql +++ b/tests/queries/0_stateless/02366_explain_query_tree.sql @@ -1,6 +1,6 @@ SET allow_experimental_analyzer = 1; -EXPLAIN QUERY TREE SELECT 1; +EXPLAIN QUERY TREE run_passes = 0 SELECT 1; SELECT '--'; @@ -13,7 +13,7 @@ CREATE TABLE test_table INSERT INTO test_table VALUES (0, 'Value'); -EXPLAIN QUERY TREE SELECT id, value FROM test_table; +EXPLAIN QUERY TREE run_passes = 0 SELECT id, value FROM test_table; SELECT '--'; @@ -21,7 +21,7 @@ EXPLAIN QUERY TREE run_passes = 1 SELECT id, value FROM test_table; SELECT '--'; -EXPLAIN QUERY TREE SELECT arrayMap(x -> x + id, [1, 2, 3]) FROM test_table; +EXPLAIN QUERY TREE run_passes = 0 SELECT arrayMap(x -> x + id, [1, 2, 3]) FROM test_table; SELECT '--'; @@ -29,7 +29,7 @@ EXPLAIN QUERY TREE run_passes = 1 SELECT arrayMap(x -> x + 1, [1, 2, 3]) FROM te SELECT '--'; -EXPLAIN QUERY TREE WITH x -> x + 1 AS lambda SELECT lambda(id) FROM test_table; +EXPLAIN QUERY TREE run_passes = 0 WITH x -> x + 1 AS lambda SELECT lambda(id) FROM test_table; SELECT '--'; diff --git a/tests/queries/0_stateless/02381_compress_marks_and_primary_key.sql b/tests/queries/0_stateless/02381_compress_marks_and_primary_key.sql index cf5ca15adeb..0f1b4f638cb 100644 --- a/tests/queries/0_stateless/02381_compress_marks_and_primary_key.sql +++ b/tests/queries/0_stateless/02381_compress_marks_and_primary_key.sql @@ -1,7 +1,7 @@ -- Tags: no-backward-compatibility-check drop table if exists test_02381; -create table test_02381(a UInt64, b UInt64) ENGINE = MergeTree order by (a, b); +create table test_02381(a UInt64, b UInt64) ENGINE = MergeTree order by (a, b) SETTINGS compress_marks=false, compress_primary_key=false; insert into test_02381 select number, number * 10 from system.numbers limit 1000000; drop table if exists test_02381_compress; diff --git a/tests/queries/0_stateless/02382_filesystem_cache_persistent_files.reference b/tests/queries/0_stateless/02382_filesystem_cache_persistent_files.reference index 7f79a172f4b..083f0f69dc8 100644 --- a/tests/queries/0_stateless/02382_filesystem_cache_persistent_files.reference +++ b/tests/queries/0_stateless/02382_filesystem_cache_persistent_files.reference @@ -3,7 +3,7 @@ SET enable_filesystem_cache_on_write_operations=0; SYSTEM DROP FILESYSTEM CACHE; DROP TABLE IF EXISTS nopers; -CREATE TABLE nopers (key UInt32, value String) Engine=MergeTree() ORDER BY key SETTINGS storage_policy='s3_cache', min_bytes_for_wide_part = 10485760; +CREATE TABLE nopers (key UInt32, value String) Engine=MergeTree() ORDER BY key SETTINGS storage_policy='s3_cache', min_bytes_for_wide_part = 10485760, compress_marks=false, compress_primary_key=false; SYSTEM STOP MERGES nopers; INSERT INTO nopers SELECT number, toString(number) FROM numbers(10); SELECT * FROM nopers FORMAT Null; @@ -22,7 +22,7 @@ ORDER BY file, cache, size; data.bin 0 114 data.mrk3 0 80 DROP TABLE IF EXISTS test; -CREATE TABLE test (key UInt32, value String) Engine=MergeTree() ORDER BY key SETTINGS storage_policy='s3_cache_small', min_bytes_for_wide_part = 10485760; +CREATE TABLE test (key UInt32, value String) Engine=MergeTree() ORDER BY key SETTINGS storage_policy='s3_cache_small', min_bytes_for_wide_part = 10485760, compress_marks=false, compress_primary_key=false; SYSTEM STOP MERGES test; INSERT INTO test SELECT number, toString(number) FROM numbers(100); SELECT * FROM test FORMAT Null; @@ -47,7 +47,7 @@ data.bin 0 746 data.mrk3 0 80 data.mrk3 0_persistent 80 DROP TABLE IF EXISTS test2; -CREATE TABLE test2 (key UInt32, value String) Engine=MergeTree() ORDER BY key SETTINGS storage_policy='s3_cache_small', min_bytes_for_wide_part = 10485760; +CREATE TABLE test2 (key UInt32, value String) Engine=MergeTree() ORDER BY key SETTINGS storage_policy='s3_cache_small', min_bytes_for_wide_part = 10485760, compress_marks=false, compress_primary_key=false; SYSTEM STOP MERGES test2; INSERT INTO test2 SELECT number, toString(number) FROM numbers(100000); SELECT * FROM test2 FORMAT Null; diff --git a/tests/queries/0_stateless/02382_filesystem_cache_persistent_files.sql b/tests/queries/0_stateless/02382_filesystem_cache_persistent_files.sql index d7171de48ad..6486840602e 100644 --- a/tests/queries/0_stateless/02382_filesystem_cache_persistent_files.sql +++ b/tests/queries/0_stateless/02382_filesystem_cache_persistent_files.sql @@ -7,7 +7,7 @@ SET enable_filesystem_cache_on_write_operations=0; SYSTEM DROP FILESYSTEM CACHE; DROP TABLE IF EXISTS nopers; -CREATE TABLE nopers (key UInt32, value String) Engine=MergeTree() ORDER BY key SETTINGS storage_policy='s3_cache', min_bytes_for_wide_part = 10485760; +CREATE TABLE nopers (key UInt32, value String) Engine=MergeTree() ORDER BY key SETTINGS storage_policy='s3_cache', min_bytes_for_wide_part = 10485760, compress_marks=false, compress_primary_key=false; SYSTEM STOP MERGES nopers; INSERT INTO nopers SELECT number, toString(number) FROM numbers(10); @@ -26,7 +26,7 @@ ON data_paths.cache_path = caches.cache_path ORDER BY file, cache, size; DROP TABLE IF EXISTS test; -CREATE TABLE test (key UInt32, value String) Engine=MergeTree() ORDER BY key SETTINGS storage_policy='s3_cache_small', min_bytes_for_wide_part = 10485760; +CREATE TABLE test (key UInt32, value String) Engine=MergeTree() ORDER BY key SETTINGS storage_policy='s3_cache_small', min_bytes_for_wide_part = 10485760, compress_marks=false, compress_primary_key=false; SYSTEM STOP MERGES test; INSERT INTO test SELECT number, toString(number) FROM numbers(100); @@ -49,7 +49,7 @@ ON data_paths.cache_path = caches.cache_path ORDER BY file, cache, size; DROP TABLE IF EXISTS test2; -CREATE TABLE test2 (key UInt32, value String) Engine=MergeTree() ORDER BY key SETTINGS storage_policy='s3_cache_small', min_bytes_for_wide_part = 10485760; +CREATE TABLE test2 (key UInt32, value String) Engine=MergeTree() ORDER BY key SETTINGS storage_policy='s3_cache_small', min_bytes_for_wide_part = 10485760, compress_marks=false, compress_primary_key=false; SYSTEM STOP MERGES test2; INSERT INTO test2 SELECT number, toString(number) FROM numbers(100000); diff --git a/tests/queries/0_stateless/02415_all_new_functions_must_be_documented.reference b/tests/queries/0_stateless/02415_all_new_functions_must_be_documented.reference index 040a8c8d317..34180020680 100644 --- a/tests/queries/0_stateless/02415_all_new_functions_must_be_documented.reference +++ b/tests/queries/0_stateless/02415_all_new_functions_must_be_documented.reference @@ -322,7 +322,7 @@ farmHash64 file filesystemAvailable filesystemCapacity -filesystemFree +filesystemUnreserved finalizeAggregation firstSignificantSubdomainCustom firstSignificantSubdomainCustomRFC diff --git a/tests/queries/0_stateless/02457_filesystem_function.reference b/tests/queries/0_stateless/02457_filesystem_function.reference new file mode 100644 index 00000000000..6ed281c757a --- /dev/null +++ b/tests/queries/0_stateless/02457_filesystem_function.reference @@ -0,0 +1,2 @@ +1 +1 diff --git a/tests/queries/0_stateless/02457_filesystem_function.sql b/tests/queries/0_stateless/02457_filesystem_function.sql new file mode 100644 index 00000000000..d8322bc65b5 --- /dev/null +++ b/tests/queries/0_stateless/02457_filesystem_function.sql @@ -0,0 +1,6 @@ +-- Tags: no-fasttest + +select filesystemCapacity('s3_disk') >= filesystemAvailable('s3_disk') and filesystemAvailable('s3_disk') >= filesystemUnreserved('s3_disk'); +select filesystemCapacity('default') >= filesystemAvailable('default') and filesystemAvailable('default') >= 0 and filesystemUnreserved('default') >= 0; + +select filesystemCapacity('__un_exists_disk'); -- { serverError UNKNOWN_DISK } diff --git a/tests/queries/0_stateless/02459_group_by_all.reference b/tests/queries/0_stateless/02459_group_by_all.reference new file mode 100644 index 00000000000..7c5ccbd8fbf --- /dev/null +++ b/tests/queries/0_stateless/02459_group_by_all.reference @@ -0,0 +1,44 @@ +abc1 1 +abc2 1 +abc3 1 +abc4 1 +abc 4 +abc ab +abc ab +abc ab +abc bc +abc bc +abc a +abc a +abc a +abc a +abc a +abc a +abc a +abc a +1 abc a +1 abc a +1 abc a +1 abc a +abc1 1 +abc2 1 +abc3 1 +abc4 1 +abc 4 +abc ab +abc ab +abc ab +abc bc +abc bc +abc a +abc a +abc a +abc a +abc a +abc a +abc a +abc a +1 abc a +1 abc a +1 abc a +1 abc a diff --git a/tests/queries/0_stateless/02459_group_by_all.sql b/tests/queries/0_stateless/02459_group_by_all.sql new file mode 100644 index 00000000000..4f08ee331a4 --- /dev/null +++ b/tests/queries/0_stateless/02459_group_by_all.sql @@ -0,0 +1,35 @@ +DROP TABLE IF EXISTS group_by_all; + +CREATE TABLE group_by_all +( + a String, + b int, + c int +) +engine = Memory; + +insert into group_by_all values ('abc1', 1, 1), ('abc2', 1, 1), ('abc3', 1, 1), ('abc4', 1, 1); + +select a, count(b) from group_by_all group by all order by a; +select substring(a, 1, 3), count(b) from group_by_all group by all; +select substring(a, 1, 3), substring(substring(a, 1, 2), 1, count(b)) from group_by_all group by all; +select substring(a, 1, 3), substring(substring(a, 1, 2), c, count(b)) from group_by_all group by all; +select substring(a, 1, 3), substring(substring(a, c, 2), c, count(b)) from group_by_all group by all; +select substring(a, 1, 3), substring(substring(a, c + 1, 2), 1, count(b)) from group_by_all group by all; +select substring(a, 1, 3), substring(substring(a, c + 1, 2), c, count(b)) from group_by_all group by all; +select substring(a, 1, 3), substring(substring(substring(a, c, count(b)), 1, count(b)), 1, count(b)) from group_by_all group by all; +select substring(a, 1, 3), substring(a, 1, count(b)) from group_by_all group by all; +select count(b) AS len, substring(a, 1, 3), substring(a, 1, len) from group_by_all group by all; + +SET allow_experimental_analyzer = 1; + +select a, count(b) from group_by_all group by all order by a; +select substring(a, 1, 3), count(b) from group_by_all group by all; +select substring(a, 1, 3), substring(substring(a, 1, 2), 1, count(b)) from group_by_all group by all; +select substring(a, 1, 3), substring(substring(a, 1, 2), c, count(b)) from group_by_all group by all; +select substring(a, 1, 3), substring(substring(a, c, 2), c, count(b)) from group_by_all group by all; +select substring(a, 1, 3), substring(substring(a, c + 1, 2), 1, count(b)) from group_by_all group by all; +select substring(a, 1, 3), substring(substring(a, c + 1, 2), c, count(b)) from group_by_all group by all; +select substring(a, 1, 3), substring(substring(substring(a, c, count(b)), 1, count(b)), 1, count(b)) from group_by_all group by all; +select substring(a, 1, 3), substring(a, 1, count(b)) from group_by_all group by all; +select count(b) AS len, substring(a, 1, 3), substring(a, 1, len) from group_by_all group by all; diff --git a/tests/queries/0_stateless/02473_infile_progress.py b/tests/queries/0_stateless/02473_infile_progress.py index c1b31eeecbb..6c1c32822d3 100755 --- a/tests/queries/0_stateless/02473_infile_progress.py +++ b/tests/queries/0_stateless/02473_infile_progress.py @@ -27,7 +27,7 @@ with client(name="client>", log=log) as client1: ) client1.expect(prompt) client1.send(f"INSERT INTO test.infile_progress FROM INFILE '{filename}'") - client1.expect("Progress: 0.00 rows, 10.00 B.*\)") + client1.expect("Progress: 5.00 rows, 30.00 B.*\)") client1.expect(prompt) # send Ctrl-C diff --git a/tests/queries/0_stateless/02475_bson_each_row_format.reference b/tests/queries/0_stateless/02475_bson_each_row_format.reference new file mode 100644 index 00000000000..b4a841ed3eb --- /dev/null +++ b/tests/queries/0_stateless/02475_bson_each_row_format.reference @@ -0,0 +1,252 @@ +Integers +false 0 0 0 0 0 0 0 0 +true 1 1 1 1 1 1 1 1 +true 2 2 2 2 2 2 2 2 +true 3 3 3 3 3 3 3 3 +true 4 4 4 4 4 4 4 4 +bool Nullable(Bool) +int8 Nullable(Int32) +uint8 Nullable(Int32) +int16 Nullable(Int32) +uint16 Nullable(Int32) +int32 Nullable(Int32) +uint32 Nullable(Int64) +int64 Nullable(Int64) +uint64 Nullable(Int64) +false 0 0 0 0 0 0 0 0 +true 1 1 1 1 1 1 1 1 +true 2 2 2 2 2 2 2 2 +true 3 3 3 3 3 3 3 3 +true 4 4 4 4 4 4 4 4 +Integers conversion +1 4294967295 +1 -1 +1 65535 +1 -1 +1 255 +1 -1 +uint64 Nullable(Int64) +int64 Nullable(Int64) +4294967297 -4294967297 +Floats +0 0 +0.5 0.5 +0.6666667 0.6666666666666666 +0.75 0.75 +0.8 0.8 +float32 Nullable(Float64) +float64 Nullable(Float64) +0 0 +0.5 0.5 +0.6666666865348816 0.6666666666666666 +0.75 0.75 +0.800000011920929 0.8 +Big integers +0 0 0 0 +-10000000000000000000000 10000000000000000000000 -100000000000000000000000000000000000000000000 100000000000000000000000000000000000000000000 +-20000000000000000000000 20000000000000000000000 -200000000000000000000000000000000000000000000 200000000000000000000000000000000000000000000 +-30000000000000000000000 30000000000000000000000 -300000000000000000000000000000000000000000000 300000000000000000000000000000000000000000000 +-40000000000000000000000 40000000000000000000000 -400000000000000000000000000000000000000000000 400000000000000000000000000000000000000000000 +int128 Nullable(String) +uint128 Nullable(String) +int256 Nullable(String) +uint256 Nullable(String) +Dates +1970-01-01 1970-01-01 1970-01-01 00:00:00 1970-01-01 00:00:00.000000 +1970-01-02 1970-01-02 1970-01-01 00:00:01 1970-01-01 00:00:01.000000 +1970-01-03 1970-01-03 1970-01-01 00:00:02 1970-01-01 00:00:02.000000 +1970-01-04 1970-01-04 1970-01-01 00:00:03 1970-01-01 00:00:03.000000 +1970-01-05 1970-01-05 1970-01-01 00:00:04 1970-01-01 00:00:04.000000 +date Nullable(Int32) +date32 Nullable(Int32) +datetime Nullable(Int64) +datetime64 Nullable(DateTime64(6, \'UTC\')) +0 0 0 1970-01-01 00:00:00.000000 +1 1 1 1970-01-01 00:00:01.000000 +2 2 2 1970-01-01 00:00:02.000000 +3 3 3 1970-01-01 00:00:03.000000 +4 4 4 1970-01-01 00:00:04.000000 +Decimals +0 0 0 0 +42.422 42.424242 42.424242424242 42.424242424242424242424242 +84.844 84.848484 84.848484848484 84.848484848484848484848484 +127.266 127.272726 127.272727272726 127.272727272727272727272726 +169.688 169.696968 169.696969696968 169.696969696969696969696968 +decimal32 Nullable(Int32) +decimal64 Nullable(Int64) +decimal128 Nullable(String) +decimal256 Nullable(String) +Strings + \0\0\0\0\0 +HelloWorld b\0\0\0\0 +HelloWorldHelloWorld cc\0\0\0 +HelloWorldHelloWorldHelloWorld ddd\0\0 +HelloWorldHelloWorldHelloWorldHelloWorld eeee\0 + \0\0\0\0\0 +HelloWorld b\0\0\0\0 +HelloWorldHelloWorld cc\0\0\0 +HelloWorldHelloWorldHelloWorld ddd\0\0 +HelloWorldHelloWorldHelloWorldHelloWorld eeee\0 +str Nullable(String) +fixstr Nullable(String) + \0\0\0\0\0 +HelloWorld b\0\0\0\0 +HelloWorldHelloWorld cc\0\0\0 +HelloWorldHelloWorldHelloWorld ddd\0\0 +HelloWorldHelloWorldHelloWorldHelloWorld eeee\0 +UUID +b86d5c23-4b87-4465-8f33-4a685fa1c868 +uuid Nullable(UUID) +b86d5c23-4b87-4465-8f33-4a685fa1c868 +LowCardinality +a +b +c +a +b +lc Nullable(String) +a +b +c +a +b +Nullable +0 +\N +2 +\N +4 +0 +0 +2 +0 +4 +FAIL +null Nullable(Int64) +0 +\N +2 +\N +4 +LowCardinality(Nullable) +a +\N +c +\N +b +lc Nullable(String) +a +\N +c +\N +b +Array +[] ['Hello'] +[0] ['Hello'] +[0,1] ['Hello'] +[0,1,2] ['Hello'] +[0,1,2,3] ['Hello'] +arr1 Array(Nullable(Int64)) +arr2 Array(Nullable(String)) +[] ['Hello'] +[0] ['Hello'] +[0,1] ['Hello'] +[0,1,2] ['Hello'] +[0,1,2,3] ['Hello'] +Tuple +(0,'Hello') +(1,'Hello') +(2,'Hello') +(3,'Hello') +(4,'Hello') +('Hello',0) +('Hello',1) +('Hello',2) +('Hello',3) +('Hello',4) +OK +OK +tuple Tuple(x Nullable(Int64), s Nullable(String)) +(0,'Hello') +(1,'Hello') +(2,'Hello') +(3,'Hello') +(4,'Hello') +(0,'Hello') +(1,'Hello') +(2,'Hello') +(3,'Hello') +(4,'Hello') +(0,'Hello') +(1,'Hello') +(2,'Hello') +(3,'Hello') +(4,'Hello') +OK +OK +tuple Tuple(Nullable(Int64), Nullable(String)) +(0,'Hello') +(1,'Hello') +(2,'Hello') +(3,'Hello') +(4,'Hello') +Map +OK +OK +{'a':0,'b':1} +{'a':1,'b':2} +{'a':2,'b':3} +{'a':3,'b':4} +{'a':4,'b':5} +map Map(String, Nullable(Int64)) +{'a':0,'b':1} +{'a':1,'b':2} +{'a':2,'b':3} +{'a':3,'b':4} +{'a':4,'b':5} +Nested types +[[],[0]] ((0,'Hello'),'Hello') {'a':{'a.a':0,'a.b':1},'b':{'b.a':0,'b.b':1}} +[[0],[0,1]] ((1,'Hello'),'Hello') {'a':{'a.a':1,'a.b':2},'b':{'b.a':1,'b.b':2}} +[[0,1],[0,1,2]] ((2,'Hello'),'Hello') {'a':{'a.a':2,'a.b':3},'b':{'b.a':2,'b.b':3}} +[[0,1,2],[0,1,2,3]] ((3,'Hello'),'Hello') {'a':{'a.a':3,'a.b':4},'b':{'b.a':3,'b.b':4}} +[[0,1,2,3],[0,1,2,3,4]] ((4,'Hello'),'Hello') {'a':{'a.a':4,'a.b':5},'b':{'b.a':4,'b.b':5}} +nested1 Array(Array(Nullable(Int64))) +nested2 Tuple(Tuple(x Nullable(Int64), s Nullable(String)), Nullable(String)) +nested3 Map(String, Map(String, Nullable(Int64))) +[[],[0]] ((0,'Hello'),'Hello') {'a':{'a.a':0,'a.b':1},'b':{'b.a':0,'b.b':1}} +[[0],[0,1]] ((1,'Hello'),'Hello') {'a':{'a.a':1,'a.b':2},'b':{'b.a':1,'b.b':2}} +[[0,1],[0,1,2]] ((2,'Hello'),'Hello') {'a':{'a.a':2,'a.b':3},'b':{'b.a':2,'b.b':3}} +[[0,1,2],[0,1,2,3]] ((3,'Hello'),'Hello') {'a':{'a.a':3,'a.b':4},'b':{'b.a':3,'b.b':4}} +[[0,1,2,3],[0,1,2,3,4]] ((4,'Hello'),'Hello') {'a':{'a.a':4,'a.b':5},'b':{'b.a':4,'b.b':5}} +[({'a':[],'b':[0]},[{'c':([],[0])},{'d':([0,1],[0,1,2])}])] +[({'a':[0],'b':[0,1]},[{'c':([0],[0,1])},{'d':([0,1,2],[0,1,2,3])}])] +[({'a':[0,1],'b':[0,1,2]},[{'c':([0,1],[0,1,2])},{'d':([0,1,2,3],[0,1,2,3,4])}])] +[({'a':[0,1,2],'b':[0,1,2,3]},[{'c':([0,1,2],[0,1,2,3])},{'d':([0,1,2,3,4],[0,1,2,3,4,5])}])] +[({'a':[0,1,2,3],'b':[0,1,2,3,4]},[{'c':([0,1,2,3],[0,1,2,3,4])},{'d':([0,1,2,3,4,5],[0,1,2,3,4,5,6])}])] +nested Array(Tuple(Map(String, Array(Nullable(Int64))), Array(Map(String, Array(Array(Nullable(Int64))))))) +[({'a':[],'b':[0]},[{'c':[[],[0]]},{'d':[[0,1],[0,1,2]]}])] +[({'a':[0],'b':[0,1]},[{'c':[[0],[0,1]]},{'d':[[0,1,2],[0,1,2,3]]}])] +[({'a':[0,1],'b':[0,1,2]},[{'c':[[0,1],[0,1,2]]},{'d':[[0,1,2,3],[0,1,2,3,4]]}])] +[({'a':[0,1,2],'b':[0,1,2,3]},[{'c':[[0,1,2],[0,1,2,3]]},{'d':[[0,1,2,3,4],[0,1,2,3,4,5]]}])] +[({'a':[0,1,2,3],'b':[0,1,2,3,4]},[{'c':[[0,1,2,3],[0,1,2,3,4]]},{'d':[[0,1,2,3,4,5],[0,1,2,3,4,5,6]]}])] +Schema inference +x Nullable(Int32) +x Nullable(Int64) +x Nullable(Int64) +FAIL +x Array(Nullable(Int32)) +x Array(Nullable(Int64)) +x Array(Nullable(Int64)) +FAIL +OK +OK +OK +OK +Sync after error +OK +0 42 [] +1 42 [0] +2 42 [0,1] +0 42 [] +1 42 [0] +2 42 [0,1] diff --git a/tests/queries/0_stateless/02475_bson_each_row_format.sh b/tests/queries/0_stateless/02475_bson_each_row_format.sh new file mode 100755 index 00000000000..6de33b38183 --- /dev/null +++ b/tests/queries/0_stateless/02475_bson_each_row_format.sh @@ -0,0 +1,199 @@ +#!/usr/bin/env bash +# Tags: no-parallel + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + +echo "Integers" +$CLICKHOUSE_CLIENT -q "insert into function file(02475_data.bsonEachRow) select number::Bool as bool, number::Int8 as int8, number::UInt8 as uint8, number::Int16 as int16, number::UInt16 as uint16, number::Int32 as int32, number::UInt32 as uint32, number::Int64 as int64, number::UInt64 as uint64 from numbers(5) settings engine_file_truncate_on_insert=1" +$CLICKHOUSE_CLIENT -q "select * from file(02475_data.bsonEachRow, auto, 'bool Bool, int8 Int8, uint8 UInt8, int16 Int16, uint16 UInt16, int32 Int32, uint32 UInt32, int64 Int64, uint64 UInt64')" + +$CLICKHOUSE_CLIENT -q "desc file(02475_data.bsonEachRow)" +$CLICKHOUSE_CLIENT -q "select * from file(02475_data.bsonEachRow)" + +echo "Integers conversion" +$CLICKHOUSE_CLIENT -q "insert into function file(02475_data.bsonEachRow, auto, 'uint64 UInt64, int64 Int64') select 4294967297, -4294967297 settings engine_file_truncate_on_insert=1" +$CLICKHOUSE_CLIENT -q "select * from file(02475_data.bsonEachRow, auto, 'uint64 UInt32, int64 UInt32')" +$CLICKHOUSE_CLIENT -q "select * from file(02475_data.bsonEachRow, auto, 'uint64 Int32, int64 Int32')" +$CLICKHOUSE_CLIENT -q "select * from file(02475_data.bsonEachRow, auto, 'uint64 UInt16, int64 UInt16')" +$CLICKHOUSE_CLIENT -q "select * from file(02475_data.bsonEachRow, auto, 'uint64 Int16, int64 Int16')" +$CLICKHOUSE_CLIENT -q "select * from file(02475_data.bsonEachRow, auto, 'uint64 UInt8, int64 UInt8')" +$CLICKHOUSE_CLIENT -q "select * from file(02475_data.bsonEachRow, auto, 'uint64 Int8, int64 Int8')" + +$CLICKHOUSE_CLIENT -q "desc file(02475_data.bsonEachRow)" +$CLICKHOUSE_CLIENT -q "select * from file(02475_data.bsonEachRow)" + + +echo "Floats" +$CLICKHOUSE_CLIENT -q "insert into function file(02475_data.bsonEachRow, auto, 'float32 Float32, float64 Float64') select number / (number + 1), number / (number + 1) from numbers(5) settings engine_file_truncate_on_insert=1" +$CLICKHOUSE_CLIENT -q "select * from file(02475_data.bsonEachRow, auto, 'float32 Float32, float64 Float64')"; + + +$CLICKHOUSE_CLIENT -q "desc file(02475_data.bsonEachRow)" +$CLICKHOUSE_CLIENT -q "select * from file(02475_data.bsonEachRow)" + + +echo "Big integers" +$CLICKHOUSE_CLIENT -q "insert into function file(02475_data.bsonEachRow, auto, 'int128 Int128, uint128 UInt128, int256 Int256, uint256 UInt256') select number * -10000000000000000000000::Int128 as int128, number * 10000000000000000000000::UInt128 as uint128, number * -100000000000000000000000000000000000000000000::Int256 as int256, number * 100000000000000000000000000000000000000000000::UInt256 as uint256 from numbers(5) settings engine_file_truncate_on_insert=1" +$CLICKHOUSE_CLIENT -q "select * from file(02475_data.bsonEachRow, auto, 'int128 Int128, uint128 UInt128, int256 Int256, uint256 UInt256')" + +$CLICKHOUSE_CLIENT -q "desc file(02475_data.bsonEachRow)" + + +echo "Dates" +$CLICKHOUSE_CLIENT -q "insert into function file(02475_data.bsonEachRow, auto, 'date Date, date32 Date32, datetime DateTime(\'UTC\'), datetime64 DateTime64(6, \'UTC\')') select number, number, number, number from numbers(5) settings engine_file_truncate_on_insert=1" +$CLICKHOUSE_CLIENT -q "select * from file(02475_data.bsonEachRow, auto, 'date Date, date32 Date32, datetime DateTime(\'UTC\'), datetime64 DateTime64(6, \'UTC\')')" + +$CLICKHOUSE_CLIENT -q "desc file(02475_data.bsonEachRow)" +$CLICKHOUSE_CLIENT -q "select * from file(02475_data.bsonEachRow)" + + +echo "Decimals" +$CLICKHOUSE_CLIENT -q "insert into function file(02475_data.bsonEachRow, auto, 'decimal32 Decimal32(3), decimal64 Decimal64(6), decimal128 Decimal128(12), decimal256 Decimal256(24)') select number * 42.422::Decimal32(3) as decimal32, number * 42.424242::Decimal64(6) as decimal64, number * 42.424242424242::Decimal128(12) as decimal128, number * 42.424242424242424242424242::Decimal256(24) as decimal256 from numbers(5) settings engine_file_truncate_on_insert=1" +$CLICKHOUSE_CLIENT -q "select * from file(02475_data.bsonEachRow, auto, 'decimal32 Decimal32(3), decimal64 Decimal64(6), decimal128 Decimal128(12), decimal256 Decimal256(24)')" + +$CLICKHOUSE_CLIENT -q "desc file(02475_data.bsonEachRow)" + + +echo "Strings" +$CLICKHOUSE_CLIENT -q "insert into function file(02475_data.bsonEachRow, auto, 'str String, fixstr FixedString(5)') select repeat('HelloWorld', number), repeat(char(97 + number), number % 6) from numbers(5) settings engine_file_truncate_on_insert=1, output_format_bson_string_as_string=0" +$CLICKHOUSE_CLIENT -q "select * from file(02475_data.bsonEachRow, auto, 'str String, fixstr FixedString(5)')" +$CLICKHOUSE_CLIENT -q "insert into function file(02475_data.bsonEachRow, auto, 'str String, fixstr FixedString(5)') select repeat('HelloWorld', number), repeat(char(97 + number), number % 6) from numbers(5) settings engine_file_truncate_on_insert=1, output_format_bson_string_as_string=1" +$CLICKHOUSE_CLIENT -q "select * from file(02475_data.bsonEachRow, auto, 'str String, fixstr FixedString(5)')" + + +$CLICKHOUSE_CLIENT -q "desc file(02475_data.bsonEachRow)" +$CLICKHOUSE_CLIENT -q "select * from file(02475_data.bsonEachRow)" + + +echo "UUID" +$CLICKHOUSE_CLIENT -q "insert into function file(02475_data.bsonEachRow, auto, 'uuid UUID') select 'b86d5c23-4b87-4465-8f33-4a685fa1c868'::UUID settings engine_file_truncate_on_insert=1" +$CLICKHOUSE_CLIENT -q "select * from file(02475_data.bsonEachRow, auto, 'uuid UUID')" + + +$CLICKHOUSE_CLIENT -q "desc file(02475_data.bsonEachRow)" +$CLICKHOUSE_CLIENT -q "select * from file(02475_data.bsonEachRow)" + + +echo "LowCardinality" +$CLICKHOUSE_CLIENT -q "insert into function file(02475_data.bsonEachRow, auto, 'lc LowCardinality(String)') select char(97 + number % 3)::LowCardinality(String) from numbers(5) settings engine_file_truncate_on_insert=1" +$CLICKHOUSE_CLIENT -q "select * from file(02475_data.bsonEachRow, auto, 'lc LowCardinality(String)')" + +$CLICKHOUSE_CLIENT -q "desc file(02475_data.bsonEachRow)" +$CLICKHOUSE_CLIENT -q "select * from file(02475_data.bsonEachRow)" + + +echo "Nullable" +$CLICKHOUSE_CLIENT -q "insert into function file(02475_data.bsonEachRow, auto, 'null Nullable(UInt32)') select number % 2 ? NULL : number from numbers(5) settings engine_file_truncate_on_insert=1" +$CLICKHOUSE_CLIENT -q "select * from file(02475_data.bsonEachRow, auto, 'null Nullable(UInt32)')" +$CLICKHOUSE_CLIENT -q "select * from file(02475_data.bsonEachRow, auto, 'null UInt32')" +$CLICKHOUSE_CLIENT -q "select * from file(02475_data.bsonEachRow, auto, 'null UInt32') settings input_format_null_as_default=0" 2>&1 | grep -q -F "INCORRECT_DATA" && echo "OK" || echo "FAIL" + +$CLICKHOUSE_CLIENT -q "desc file(02475_data.bsonEachRow)" +$CLICKHOUSE_CLIENT -q "select * from file(02475_data.bsonEachRow)" + + +echo "LowCardinality(Nullable)" +$CLICKHOUSE_CLIENT -q "insert into function file(02475_data.bsonEachRow, auto, 'lc LowCardinality(Nullable(String))') select number % 2 ? NULL : char(97 + number % 3)::LowCardinality(String) from numbers(5) settings engine_file_truncate_on_insert=1" +$CLICKHOUSE_CLIENT -q "select * from file(02475_data.bsonEachRow, auto, 'lc LowCardinality(Nullable(String))')" + +$CLICKHOUSE_CLIENT -q "desc file(02475_data.bsonEachRow)" +$CLICKHOUSE_CLIENT -q "select * from file(02475_data.bsonEachRow)" + + +echo "Array" +$CLICKHOUSE_CLIENT -q "insert into function file(02475_data.bsonEachRow, auto, 'arr1 Array(UInt64), arr2 Array(String)') select range(number), ['Hello'] from numbers(5) settings engine_file_truncate_on_insert=1" +$CLICKHOUSE_CLIENT -q "select * from file(02475_data.bsonEachRow, auto, 'arr1 Array(UInt64), arr2 Array(String)') settings engine_file_truncate_on_insert=1" + +$CLICKHOUSE_CLIENT -q "desc file(02475_data.bsonEachRow)" +$CLICKHOUSE_CLIENT -q "select * from file(02475_data.bsonEachRow)" + + +echo "Tuple" +$CLICKHOUSE_CLIENT -q "insert into function file(02475_data.bsonEachRow, auto, 'tuple Tuple(x UInt64, s String)') select tuple(number, 'Hello') from numbers(5) settings engine_file_truncate_on_insert=1" +$CLICKHOUSE_CLIENT -q "select * from file(02475_data.bsonEachRow, auto, 'tuple Tuple(x UInt64, s String)')" +$CLICKHOUSE_CLIENT -q "select * from file(02475_data.bsonEachRow, auto, 'tuple Tuple(s String, x UInt64)')" +$CLICKHOUSE_CLIENT -q "select * from file(02475_data.bsonEachRow, auto, 'tuple Tuple(x UInt64)')" 2>&1 | grep -q -F "INCORRECT_DATA" && echo "OK" || echo "FAIL" +$CLICKHOUSE_CLIENT -q "select * from file(02475_data.bsonEachRow, auto, 'tuple Tuple(x UInt64, b String)')" 2>&1 | grep -q -F "INCORRECT_DATA" && echo "OK" || echo "FAIL" + +$CLICKHOUSE_CLIENT -q "desc file(02475_data.bsonEachRow)" +$CLICKHOUSE_CLIENT -q "select * from file(02475_data.bsonEachRow)" + + +$CLICKHOUSE_CLIENT -q "insert into function file(02475_data.bsonEachRow, auto, 'tuple Tuple(UInt64, String)') select tuple(number, 'Hello') from numbers(5) settings engine_file_truncate_on_insert=1" +$CLICKHOUSE_CLIENT -q "select * from file(02475_data.bsonEachRow, auto, 'tuple Tuple(x UInt64, s String)')" +$CLICKHOUSE_CLIENT -q "select * from file(02475_data.bsonEachRow, auto, 'tuple Tuple(UInt64, String)')" +$CLICKHOUSE_CLIENT -q "select * from file(02475_data.bsonEachRow, auto, 'tuple Tuple(UInt64)')" 2>&1 | grep -q -F "INCORRECT_DATA" && echo "OK" || echo "FAIL" +$CLICKHOUSE_CLIENT -q "select * from file(02475_data.bsonEachRow, auto, 'tuple Tuple(UInt64, String, UInt64)')" 2>&1 | grep -q -F "INCORRECT_DATA" && echo "OK" || echo "FAIL" + +$CLICKHOUSE_CLIENT -q "desc file(02475_data.bsonEachRow)" +$CLICKHOUSE_CLIENT -q "select * from file(02475_data.bsonEachRow)" + + +echo "Map" +$CLICKHOUSE_CLIENT -q "insert into function file(02475_data.bsonEachRow, auto, 'map Map(UInt64, UInt64)') select map(1, number, 2, number + 1) from numbers(5) settings engine_file_truncate_on_insert=1" 2>&1 | grep -q -F "ILLEGAL_COLUMN" && echo "OK" || echo "FAIL" +$CLICKHOUSE_CLIENT -q "insert into function file(02475_data.bsonEachRow, auto, 'map Map(String, UInt64)') select map('a', number, 'b', number + 1) from numbers(5) settings engine_file_truncate_on_insert=1" + +$CLICKHOUSE_CLIENT -q "select * from file(02475_data.bsonEachRow, auto, 'map Map(UInt64, UInt64)')" 2>&1 | grep -q -F "ILLEGAL_COLUMN" && echo "OK" || echo "FAIL" +$CLICKHOUSE_CLIENT -q "select * from file(02475_data.bsonEachRow, auto, 'map Map(String, UInt64)')" + +$CLICKHOUSE_CLIENT -q "desc file(02475_data.bsonEachRow)" +$CLICKHOUSE_CLIENT -q "select * from file(02475_data.bsonEachRow)" + + +echo "Nested types" +$CLICKHOUSE_CLIENT -q "insert into function file(02475_data.bsonEachRow, auto, 'nested1 Array(Array(UInt32)), nested2 Tuple(Tuple(x UInt32, s String), String), nested3 Map(String, Map(String, UInt32))') select [range(number), range(number + 1)], tuple(tuple(number, 'Hello'), 'Hello'), map('a', map('a.a', number, 'a.b', number + 1), 'b', map('b.a', number, 'b.b', number + 1)) from numbers(5) settings engine_file_truncate_on_insert=1" +$CLICKHOUSE_CLIENT -q "select * from file(02475_data.bsonEachRow, auto, 'nested1 Array(Array(UInt32)), nested2 Tuple(Tuple(x UInt32, s String), String), nested3 Map(String, Map(String, UInt32))')" + +$CLICKHOUSE_CLIENT -q "desc file(02475_data.bsonEachRow)" +$CLICKHOUSE_CLIENT -q "select * from file(02475_data.bsonEachRow)" + +$CLICKHOUSE_CLIENT -q "insert into function file(02475_data.bsonEachRow, auto, 'nested Array(Tuple(Map(String, Array(UInt32)), Array(Map(String, Tuple(Array(UInt64), Array(UInt64))))))') select [(map('a', range(number), 'b', range(number + 1)), [map('c', (range(number), range(number + 1))), map('d', (range(number + 2), range(number + 3)))])] from numbers(5) settings engine_file_truncate_on_insert=1" +$CLICKHOUSE_CLIENT -q "select * from file(02475_data.bsonEachRow, auto, 'nested Array(Tuple(Map(String, Array(UInt32)), Array(Map(String, Tuple(Array(UInt64), Array(UInt64))))))')" + +$CLICKHOUSE_CLIENT -q "desc file(02475_data.bsonEachRow)" +$CLICKHOUSE_CLIENT -q "select * from file(02475_data.bsonEachRow)" + + +echo "Schema inference" +$CLICKHOUSE_CLIENT -q "insert into function file(02475_data.bsonEachRow) select number::Bool as x from numbers(2) settings engine_file_truncate_on_insert=1" +$CLICKHOUSE_CLIENT -q "insert into function file(02475_data.bsonEachRow) select number::Int32 as x from numbers(2)" +$CLICKHOUSE_CLIENT -q "desc file(02475_data.bsonEachRow)" +$CLICKHOUSE_CLIENT -q "insert into function file(02475_data.bsonEachRow) select number::UInt32 as x from numbers(2)" +$CLICKHOUSE_CLIENT -q "desc file(02475_data.bsonEachRow)" +$CLICKHOUSE_CLIENT -q "insert into function file(02475_data.bsonEachRow) select number::Int64 as x from numbers(2)" +$CLICKHOUSE_CLIENT -q "desc file(02475_data.bsonEachRow)" +$CLICKHOUSE_CLIENT -q "insert into function file(02475_data.bsonEachRow) select number::UInt64 as x from numbers(2)" +$CLICKHOUSE_CLIENT -q "desc file(02475_data.bsonEachRow)" 2>&1 | grep -q -F "TYPE_MISMATCH" && echo "OK" || echo "FAIL" + +$CLICKHOUSE_CLIENT -q "insert into function file(02475_data.bsonEachRow) select [number::Bool] as x from numbers(2) settings engine_file_truncate_on_insert=1" +$CLICKHOUSE_CLIENT -q "insert into function file(02475_data.bsonEachRow) select [number::Int32] as x from numbers(2)" +$CLICKHOUSE_CLIENT -q "desc file(02475_data.bsonEachRow)" +$CLICKHOUSE_CLIENT -q "insert into function file(02475_data.bsonEachRow) select [number::UInt32] as x from numbers(2)" +$CLICKHOUSE_CLIENT -q "desc file(02475_data.bsonEachRow)" +$CLICKHOUSE_CLIENT -q "insert into function file(02475_data.bsonEachRow) select [number::Int64] as x from numbers(2)" +$CLICKHOUSE_CLIENT -q "desc file(02475_data.bsonEachRow)" +$CLICKHOUSE_CLIENT -q "insert into function file(02475_data.bsonEachRow) select [number::UInt64] as x from numbers(2)" +$CLICKHOUSE_CLIENT -q "desc file(02475_data.bsonEachRow)" 2>&1 | grep -q -F "TYPE_MISMATCH" && echo "OK" || echo "FAIL" + +$CLICKHOUSE_CLIENT -q "insert into function file(02475_data.bsonEachRow) select [] as x from numbers(2) settings engine_file_truncate_on_insert=1" +$CLICKHOUSE_CLIENT -q "desc file(02475_data.bsonEachRow)" 2>&1 | grep -q -F "ONLY_NULLS_WHILE_READING_SCHEMA" && echo "OK" || echo "FAIL" + +$CLICKHOUSE_CLIENT -q "insert into function file(02475_data.bsonEachRow) select NULL as x from numbers(2) settings engine_file_truncate_on_insert=1" +$CLICKHOUSE_CLIENT -q "desc file(02475_data.bsonEachRow)" 2>&1 | grep -q -F "ONLY_NULLS_WHILE_READING_SCHEMA" && echo "OK" || echo "FAIL" + +$CLICKHOUSE_CLIENT -q "insert into function file(02475_data.bsonEachRow) select [NULL, 1] as x from numbers(2) settings engine_file_truncate_on_insert=1" +$CLICKHOUSE_CLIENT -q "desc file(02475_data.bsonEachRow)" 2>&1 | grep -q -F "ONLY_NULLS_WHILE_READING_SCHEMA" && echo "OK" || echo "FAIL" + +$CLICKHOUSE_CLIENT -q "insert into function file(02475_data.bsonEachRow) select tuple(1, 'str') as x from numbers(2) settings engine_file_truncate_on_insert=1" +$CLICKHOUSE_CLIENT -q "insert into function file(02475_data.bsonEachRow) select tuple(1) as x from numbers(2)" +$CLICKHOUSE_CLIENT -q "desc file(02475_data.bsonEachRow)" 2>&1 | grep -q -F "TYPE_MISMATCH" && echo "OK" || echo "FAIL" + + +echo "Sync after error" +$CLICKHOUSE_CLIENT -q "insert into function file(data.bsonEachRow) select number, 42::Int128 as int, range(number) as arr from numbers(3) settings engine_file_truncate_on_insert=1" +$CLICKHOUSE_CLIENT -q " insert into function file(data.bsonEachRow) select number, 'Hello' as int, range(number) as arr from numbers(2) settings engine_file_truncate_on_insert=0" +$CLICKHOUSE_CLIENT -q "insert into function file(data.bsonEachRow) select number, 42::Int128 as int, range(number) as arr from numbers(3) settings engine_file_truncate_on_insert=0" +$CLICKHOUSE_CLIENT -q "select * from file(data.bsonEachRow, auto, 'number UInt64, int Int128, arr Array(UInt64)') settings input_format_allow_errors_num=0" 2>&1 | grep -q -F "INCORRECT_DATA" && echo "OK" || echo "FAIL" +$CLICKHOUSE_CLIENT -q "select * from file(data.bsonEachRow, auto, 'number UInt64, int Int128, arr Array(UInt64)') settings input_format_allow_errors_num=2" diff --git a/tests/queries/0_stateless/02477_single_value_data_string_regression.reference b/tests/queries/0_stateless/02477_single_value_data_string_regression.reference index e89b8ff7d99..9285866de08 100644 --- a/tests/queries/0_stateless/02477_single_value_data_string_regression.reference +++ b/tests/queries/0_stateless/02477_single_value_data_string_regression.reference @@ -23,3 +23,8 @@ 1M without 0 1048576 1M with 0 1048575 fuzz2 0123 4 +1 0 +2 \0 1 +3 \0\0\0\0 4 +4 abrac\0dabra\0 12 +abrac\0dabra\0 12 diff --git a/tests/queries/0_stateless/02477_single_value_data_string_regression.sql b/tests/queries/0_stateless/02477_single_value_data_string_regression.sql index c8030733e34..0f11a06f3fc 100644 --- a/tests/queries/0_stateless/02477_single_value_data_string_regression.sql +++ b/tests/queries/0_stateless/02477_single_value_data_string_regression.sql @@ -90,11 +90,12 @@ SELECT '-1', maxMerge(x), length(maxMerge(x)) from (select CAST(unhex('ffffffff' SELECT '-2', maxMerge(x), length(maxMerge(x)) from (select CAST(unhex('fffffffe') || randomString(100500), 'AggregateFunction(max, String)') as x); SELECT '-2^31', maxMerge(x), length(maxMerge(x)) from (select CAST(unhex('00000080') || randomString(100500), 'AggregateFunction(max, String)') as x); -SELECT '2^31-2', maxMerge(x) from (select CAST(unhex('feffff7f') || randomString(100500), 'AggregateFunction(max, String)') as x); -- { serverError TOO_LARGE_STRING_SIZE } SELECT '2^31-1', maxMerge(x) from (select CAST(unhex('ffffff7f') || randomString(100500), 'AggregateFunction(max, String)') as x); -- { serverError TOO_LARGE_STRING_SIZE } -SELECT '2^30', maxMerge(x) from (select CAST(unhex('00000040') || randomString(100500), 'AggregateFunction(max, String)') as x); -- { serverError TOO_LARGE_STRING_SIZE } -SELECT '2^30+1', maxMerge(x) from (select CAST(unhex('01000040') || randomString(100500), 'AggregateFunction(max, String)') as x); -- { serverError TOO_LARGE_STRING_SIZE } +SELECT '2^31-2', maxMerge(x) from (select CAST(unhex('feffff7f') || randomString(100500), 'AggregateFunction(max, String)') as x); -- { serverError CANNOT_READ_ALL_DATA } + +SELECT '2^30', maxMerge(x) from (select CAST(unhex('00000040') || randomString(100500), 'AggregateFunction(max, String)') as x); -- { serverError CANNOT_READ_ALL_DATA } +SELECT '2^30+1', maxMerge(x) from (select CAST(unhex('01000040') || randomString(100500), 'AggregateFunction(max, String)') as x); -- { serverError CANNOT_READ_ALL_DATA } SELECT '2^30-1', maxMerge(x) from (select CAST(unhex('ffffff3f') || randomString(100500), 'AggregateFunction(max, String)') as x); -- { serverError CANNOT_READ_ALL_DATA } -- The following query works, but it's too long and consumes to much memory @@ -107,3 +108,14 @@ SELECT 'fuzz2', finalizeAggregation(CAST(unhex('04000000' || '30313233' || '01' SELECT 'fuzz3', finalizeAggregation(CAST(unhex('04000000' || '30313233' || '00' || 'ffffffffffffffff'), 'AggregateFunction(argMax, String, UInt64)')) as x, length(x); -- { serverError CORRUPTED_DATA } SELECT 'fuzz4', finalizeAggregation(CAST(unhex('04000000' || '30313233' || '00'), 'AggregateFunction(argMax, String, UInt64)')) as x, length(x); -- { serverError CORRUPTED_DATA } SELECT 'fuzz5', finalizeAggregation(CAST(unhex('0100000000000000000FFFFFFFF0'), 'AggregateFunction(argMax, UInt64, String)')); -- { serverError CORRUPTED_DATA } + + +drop table if exists aggr; +create table aggr (n int, s AggregateFunction(max, String)) engine=MergeTree order by n; +insert into aggr select 1, maxState(''); +insert into aggr select 2, maxState('\0'); +insert into aggr select 3, maxState('\0\0\0\0'); +insert into aggr select 4, maxState('abrac\0dabra\0'); +select n, maxMerge(s) as x, length(x) from aggr group by n order by n; +select maxMerge(s) as x, length(x) from aggr; +drop table aggr; diff --git a/tests/queries/0_stateless/02480_s3_support_wildcard.reference b/tests/queries/0_stateless/02480_s3_support_wildcard.reference new file mode 100644 index 00000000000..c6b63f647f8 --- /dev/null +++ b/tests/queries/0_stateless/02480_s3_support_wildcard.reference @@ -0,0 +1,43 @@ +-- { echo } +drop table if exists test_02480_write; +drop table if exists test_02480_write2; +create table test_02480_write (a UInt64, b String) engine = S3(s3_conn, filename='test_02480_{_partition_id}', format=Parquet) partition by a; +set s3_truncate_on_insert=1; +insert into test_02480_write values (1, 'a'), (22, 'b'), (333, 'c'); +select a, b from s3(s3_conn, filename='test_02480_*', format=Parquet) order by a; +1 a +22 b +333 c +select a, b from s3(s3_conn, filename='test_02480_?', format=Parquet) order by a; +1 a +select a, b from s3(s3_conn, filename='test_02480_??', format=Parquet) order by a; +22 b +select a, b from s3(s3_conn, filename='test_02480_?*?', format=Parquet) order by a; +22 b +333 c +select a, b from s3(s3_conn, filename='test_02480_{1,333}', format=Parquet) order by a; +1 a +333 c +select a, b from s3(s3_conn, filename='test_02480_{1..333}', format=Parquet) order by a; +1 a +22 b +333 c +create table test_02480_write2 (a UInt64, b String) engine = S3(s3_conn, filename='prefix/test_02480_{_partition_id}', format=Parquet) partition by a; +set s3_truncate_on_insert=1; +insert into test_02480_write2 values (4, 'd'), (55, 'f'), (666, 'g'); +select a, b from s3(s3_conn, filename='*/test_02480_*', format=Parquet) order by a; +4 d +55 f +666 g +select a, b from s3(s3_conn, filename='*/test_02480_?', format=Parquet) order by a; +4 d +select a, b from s3(s3_conn, filename='prefix/test_02480_??', format=Parquet) order by a; +55 f +select a, b from s3(s3_conn, filename='prefi?/test_02480_*', format=Parquet) order by a; +4 d +55 f +666 g +select a, b from s3(s3_conn, filename='p?*/test_02480_{56..666}', format=Parquet) order by a; +666 g +drop table test_02480_write; +drop table test_02480_write2; diff --git a/tests/queries/0_stateless/02480_s3_support_wildcard.sql b/tests/queries/0_stateless/02480_s3_support_wildcard.sql new file mode 100644 index 00000000000..9da5a022dc4 --- /dev/null +++ b/tests/queries/0_stateless/02480_s3_support_wildcard.sql @@ -0,0 +1,29 @@ +-- Tags: no-parallel, no-fasttest +-- Tag no-fasttest: Depends on AWS + +-- { echo } +drop table if exists test_02480_write; +drop table if exists test_02480_write2; +create table test_02480_write (a UInt64, b String) engine = S3(s3_conn, filename='test_02480_{_partition_id}', format=Parquet) partition by a; +set s3_truncate_on_insert=1; +insert into test_02480_write values (1, 'a'), (22, 'b'), (333, 'c'); + +select a, b from s3(s3_conn, filename='test_02480_*', format=Parquet) order by a; +select a, b from s3(s3_conn, filename='test_02480_?', format=Parquet) order by a; +select a, b from s3(s3_conn, filename='test_02480_??', format=Parquet) order by a; +select a, b from s3(s3_conn, filename='test_02480_?*?', format=Parquet) order by a; +select a, b from s3(s3_conn, filename='test_02480_{1,333}', format=Parquet) order by a; +select a, b from s3(s3_conn, filename='test_02480_{1..333}', format=Parquet) order by a; + +create table test_02480_write2 (a UInt64, b String) engine = S3(s3_conn, filename='prefix/test_02480_{_partition_id}', format=Parquet) partition by a; +set s3_truncate_on_insert=1; +insert into test_02480_write2 values (4, 'd'), (55, 'f'), (666, 'g'); + +select a, b from s3(s3_conn, filename='*/test_02480_*', format=Parquet) order by a; +select a, b from s3(s3_conn, filename='*/test_02480_?', format=Parquet) order by a; +select a, b from s3(s3_conn, filename='prefix/test_02480_??', format=Parquet) order by a; +select a, b from s3(s3_conn, filename='prefi?/test_02480_*', format=Parquet) order by a; +select a, b from s3(s3_conn, filename='p?*/test_02480_{56..666}', format=Parquet) order by a; + +drop table test_02480_write; +drop table test_02480_write2; diff --git a/tests/queries/0_stateless/02481_analyzer_join_alias_unknown_identifier_crash.reference b/tests/queries/0_stateless/02481_analyzer_join_alias_unknown_identifier_crash.reference new file mode 100644 index 00000000000..0a9ed2fb4c8 --- /dev/null +++ b/tests/queries/0_stateless/02481_analyzer_join_alias_unknown_identifier_crash.reference @@ -0,0 +1 @@ +String Value_1 diff --git a/tests/queries/0_stateless/02481_analyzer_join_alias_unknown_identifier_crash.sql b/tests/queries/0_stateless/02481_analyzer_join_alias_unknown_identifier_crash.sql new file mode 100644 index 00000000000..b0983159eaf --- /dev/null +++ b/tests/queries/0_stateless/02481_analyzer_join_alias_unknown_identifier_crash.sql @@ -0,0 +1,36 @@ +SET allow_experimental_analyzer = 1; + +DROP TABLE IF EXISTS test_table_join_1; +CREATE TABLE test_table_join_1 +( + id UInt8, + value String +) +ENGINE = TinyLog; + +INSERT INTO test_table_join_1 VALUES (0, 'Value_0'); + +DROP TABLE IF EXISTS test_table_join_2; +CREATE TABLE test_table_join_2 +( + id UInt16, + value String +) +ENGINE = TinyLog; + +INSERT INTO test_table_join_2 VALUES (0, 'Value_1'); + +SELECT + toTypeName(t2_value), + t2.value AS t2_value +FROM test_table_join_1 AS t1 +INNER JOIN test_table_join_2 USING (id); -- { serverError 47 }; + +SELECT + toTypeName(t2_value), + t2.value AS t2_value +FROM test_table_join_1 AS t1 +INNER JOIN test_table_join_2 AS t2 USING (id); + +DROP TABLE test_table_join_1; +DROP TABLE test_table_join_2; diff --git a/tests/queries/0_stateless/02481_i43247_ubsan_in_minmaxany.sql b/tests/queries/0_stateless/02481_i43247_ubsan_in_minmaxany.sql index 7dc29c2daae..c893e49fed3 100644 --- a/tests/queries/0_stateless/02481_i43247_ubsan_in_minmaxany.sql +++ b/tests/queries/0_stateless/02481_i43247_ubsan_in_minmaxany.sql @@ -1,6 +1,6 @@ -- https://github.com/ClickHouse/ClickHouse/issues/43247 SELECT finalizeAggregation(CAST('AggregateFunction(categoricalInformationValue, Nullable(UInt8), UInt8)AggregateFunction(categoricalInformationValue, Nullable(UInt8), UInt8)', - 'AggregateFunction(min, String)')); -- { serverError 131 } + 'AggregateFunction(min, String)')); -- { serverError CANNOT_READ_ALL_DATA } -- Value from hex(minState('0123456789012345678901234567890123456789012345678901234567890123')). Size 63 + 1 (64) SELECT finalizeAggregation(CAST(unhex('4000000030313233343536373839303132333435363738393031323334353637383930313233343536373839303132333435363738393031323334353637383930313233'), diff --git a/tests/queries/0_stateless/02482_if_with_nothing_argument.reference b/tests/queries/0_stateless/02482_if_with_nothing_argument.reference new file mode 100644 index 00000000000..484c9fb68b5 --- /dev/null +++ b/tests/queries/0_stateless/02482_if_with_nothing_argument.reference @@ -0,0 +1,2 @@ +[] 0 +[] 0 diff --git a/tests/queries/0_stateless/02482_if_with_nothing_argument.sql b/tests/queries/0_stateless/02482_if_with_nothing_argument.sql new file mode 100644 index 00000000000..af46ef30d02 --- /dev/null +++ b/tests/queries/0_stateless/02482_if_with_nothing_argument.sql @@ -0,0 +1,3 @@ +select [] as arr, if(empty(arr), 0, arr[-1]); +select [] as arr, multiIf(empty(arr), 0, length(arr) > 1, arr[-1], 0); + diff --git a/tests/queries/0_stateless/02492_clickhouse_local_context_uaf.reference b/tests/queries/0_stateless/02492_clickhouse_local_context_uaf.reference new file mode 100644 index 00000000000..d00491fd7e5 --- /dev/null +++ b/tests/queries/0_stateless/02492_clickhouse_local_context_uaf.reference @@ -0,0 +1 @@ +1 diff --git a/tests/queries/0_stateless/02492_clickhouse_local_context_uaf.sh b/tests/queries/0_stateless/02492_clickhouse_local_context_uaf.sh new file mode 100755 index 00000000000..00e7a426fd0 --- /dev/null +++ b/tests/queries/0_stateless/02492_clickhouse_local_context_uaf.sh @@ -0,0 +1,19 @@ +#!/usr/bin/env bash + +CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CUR_DIR"/../shell_config.sh + +# This is the regression test for clickhouse-local, that may use already freed +# context from the suggestion thread on error. + +grep_options=( + -e ^$ + -e 'Cannot create file: /no/such/directory' + -e 'Cannot load data for command line suggestions:' + -e 'ClickHouse local version' +) + +ASAN_OPTIONS=$ASAN_OPTIONS:exitcode=3 $CLICKHOUSE_LOCAL --history_file /no/such/directory |& grep -v "${grep_options[@]}" +# on sanitizer error the code will be not 1, but 3 +echo $? diff --git a/tests/queries/0_stateless/02493_analyzer_table_functions_untuple.reference b/tests/queries/0_stateless/02493_analyzer_table_functions_untuple.reference new file mode 100644 index 00000000000..c9a8d73701d --- /dev/null +++ b/tests/queries/0_stateless/02493_analyzer_table_functions_untuple.reference @@ -0,0 +1,28 @@ +0 +-- +0 +1 +-- +1 +2 +-- +(1) 0 +-- +(0,1) 0 +-- +(1,2) 1 +(1,2) 2 +-- +(1) 0 +-- +(0,1) 0 +-- +(1,2) 1 +(1,2) 2 +-- +('1') 0 +-- +('0','1') 0 +-- +('1','2') 1 +('1','2') 2 diff --git a/tests/queries/0_stateless/02493_analyzer_table_functions_untuple.sql b/tests/queries/0_stateless/02493_analyzer_table_functions_untuple.sql new file mode 100644 index 00000000000..bdbe65c643b --- /dev/null +++ b/tests/queries/0_stateless/02493_analyzer_table_functions_untuple.sql @@ -0,0 +1,47 @@ +SET allow_experimental_analyzer = 1; + +SELECT number FROM numbers(untuple(tuple(1))); + +SELECT '--'; + +SELECT number FROM numbers(untuple(tuple(0, 2))); + +SELECT '--'; + +SELECT number FROM numbers(untuple(tuple(1, 2))); + +SELECT '--'; + +SELECT cast(tuple(1), 'Tuple(value UInt64)') AS value, number FROM numbers(untuple(value)); + +SELECT '--'; + +SELECT cast(tuple(0, 1), 'Tuple(value_1 UInt64, value_2 UInt64)') AS value, number FROM numbers(untuple(value)); + +SELECT '--'; + +SELECT cast(tuple(1, 2), 'Tuple(value_1 UInt64, value_2 UInt64)') AS value, number FROM numbers(untuple(value)); + +SELECT '--'; + +SELECT cast(tuple(1), 'Tuple(value UInt64)') AS value, number FROM numbers(value.*); + +SELECT '--'; + +SELECT cast(tuple(0, 1), 'Tuple(value_1 UInt64, value_2 UInt64)') AS value, number FROM numbers(value.*); + +SELECT '--'; + +SELECT cast(tuple(1, 2), 'Tuple(value_1 UInt64, value_2 UInt64)') AS value, number FROM numbers(value.*); + +SELECT '--'; + +SELECT cast(tuple('1'), 'Tuple(value String)') AS value, number FROM numbers(value.* APPLY x -> toUInt64(x)); + +SELECT '--'; + +SELECT cast(tuple('0', '1'), 'Tuple(value_1 String, value_2 String)') AS value, number FROM numbers(value.* APPLY x -> toUInt64(x)); + +SELECT '--'; + +SELECT cast(tuple('1', '2'), 'Tuple(value_1 String, value_2 String)') AS value, number FROM numbers(value.* APPLY x -> toUInt64(x)); diff --git a/tests/queries/0_stateless/filesystem_cache_queries/02240_system_filesystem_cache_table.queries b/tests/queries/0_stateless/filesystem_cache_queries/02240_system_filesystem_cache_table.queries index ab73e97b96e..228dccfcb5b 100644 --- a/tests/queries/0_stateless/filesystem_cache_queries/02240_system_filesystem_cache_table.queries +++ b/tests/queries/0_stateless/filesystem_cache_queries/02240_system_filesystem_cache_table.queries @@ -3,7 +3,7 @@ SYSTEM DROP FILESYSTEM CACHE; SET enable_filesystem_cache_on_write_operations=0; DROP TABLE IF EXISTS test; -CREATE TABLE test (key UInt32, value String) Engine=MergeTree() ORDER BY key SETTINGS storage_policy='_storagePolicy', min_bytes_for_wide_part = 10485760; +CREATE TABLE test (key UInt32, value String) Engine=MergeTree() ORDER BY key SETTINGS storage_policy='_storagePolicy', min_bytes_for_wide_part = 10485760, compress_marks=false, compress_primary_key=false; SYSTEM STOP MERGES test; INSERT INTO test SELECT number, toString(number) FROM numbers(100); @@ -18,7 +18,7 @@ SYSTEM DROP FILESYSTEM CACHE; SELECT file_segment_range_begin, file_segment_range_end, size FROM system.filesystem_cache; DROP TABLE IF EXISTS test; -CREATE TABLE test (key UInt32, value String) Engine=MergeTree() ORDER BY key SETTINGS storage_policy='_storagePolicy_3', min_bytes_for_wide_part = 10485760; +CREATE TABLE test (key UInt32, value String) Engine=MergeTree() ORDER BY key SETTINGS storage_policy='_storagePolicy_3', min_bytes_for_wide_part = 10485760, compress_marks=false, compress_primary_key=false; INSERT INTO test SELECT number, toString(number) FROM numbers(100); SELECT * FROM test FORMAT Null; SELECT file_segment_range_begin, file_segment_range_end, size FROM system.filesystem_cache ORDER BY file_segment_range_end, size; diff --git a/tests/queries/0_stateless/filesystem_cache_queries/02241_filesystem_cache_on_write_operations.queries b/tests/queries/0_stateless/filesystem_cache_queries/02241_filesystem_cache_on_write_operations.queries index 76aebfcaca3..bd185942e6c 100644 --- a/tests/queries/0_stateless/filesystem_cache_queries/02241_filesystem_cache_on_write_operations.queries +++ b/tests/queries/0_stateless/filesystem_cache_queries/02241_filesystem_cache_on_write_operations.queries @@ -3,7 +3,7 @@ SET enable_filesystem_cache_on_write_operations=1; DROP TABLE IF EXISTS test; -CREATE TABLE test (key UInt32, value String) Engine=MergeTree() ORDER BY key SETTINGS storage_policy='_storagePolicy', min_bytes_for_wide_part = 10485760; +CREATE TABLE test (key UInt32, value String) Engine=MergeTree() ORDER BY key SETTINGS storage_policy='_storagePolicy', min_bytes_for_wide_part = 10485760, compress_marks=false, compress_primary_key=false; SYSTEM STOP MERGES test; SYSTEM DROP FILESYSTEM CACHE; SELECT file_segment_range_begin, file_segment_range_end, size, state diff --git a/tests/queries/0_stateless/filesystem_cache_queries/02242_system_filesystem_cache_log_table.queries b/tests/queries/0_stateless/filesystem_cache_queries/02242_system_filesystem_cache_log_table.queries index 386a1792ea4..56a8710cc93 100644 --- a/tests/queries/0_stateless/filesystem_cache_queries/02242_system_filesystem_cache_log_table.queries +++ b/tests/queries/0_stateless/filesystem_cache_queries/02242_system_filesystem_cache_log_table.queries @@ -6,7 +6,7 @@ SET enable_filesystem_cache_on_write_operations=0; DROP TABLE IF EXISTS test; DROP TABLE IF EXISTS system.filesystem_cache_log; -CREATE TABLE test (key UInt32, value String) Engine=MergeTree() ORDER BY key SETTINGS storage_policy='_storagePolicy', min_bytes_for_wide_part = 10485760; +CREATE TABLE test (key UInt32, value String) Engine=MergeTree() ORDER BY key SETTINGS storage_policy='_storagePolicy', min_bytes_for_wide_part = 10485760, compress_marks=false, compress_primary_key=false; SYSTEM STOP MERGES test; INSERT INTO test SELECT number, toString(number) FROM numbers(100000); diff --git a/tests/queries/1_stateful/00176_bson_parallel_parsing.reference b/tests/queries/1_stateful/00176_bson_parallel_parsing.reference new file mode 100644 index 00000000000..d86bac9de59 --- /dev/null +++ b/tests/queries/1_stateful/00176_bson_parallel_parsing.reference @@ -0,0 +1 @@ +OK diff --git a/tests/queries/1_stateful/00176_bson_parallel_parsing.sh b/tests/queries/1_stateful/00176_bson_parallel_parsing.sh new file mode 100755 index 00000000000..df1fd68b2b6 --- /dev/null +++ b/tests/queries/1_stateful/00176_bson_parallel_parsing.sh @@ -0,0 +1,35 @@ +#!/usr/bin/env bash +# Tags: disabled + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + +$CLICKHOUSE_CLIENT -q "DROP TABLE IF EXISTS parsing_bson" +$CLICKHOUSE_CLIENT -q "CREATE TABLE parsing_bson(WatchID UInt64, ClientIP6 FixedString(16), EventTime DateTime, Title String) ENGINE=Memory()" + + +$CLICKHOUSE_CLIENT --max_threads=0 --max_block_size=65505 --output_format_parallel_formatting=false -q \ +"SELECT WatchID, ClientIP6, EventTime, Title FROM test.hits ORDER BY UserID LIMIT 100000 Format BSONEachRow" > 00176_data.bson + +cat 00176_data.bson | $CLICKHOUSE_CLIENT --max_threads=0 --max_block_size=65505 --input_format_parallel_parsing=false -q "INSERT INTO parsing_bson FORMAT BSONEachRow" + +checksum1=$($CLICKHOUSE_CLIENT -q "SELECT * FROM parsing_bson ORDER BY WatchID;" | md5sum) +$CLICKHOUSE_CLIENT -q "TRUNCATE TABLE parsing_bson;" + +cat 00176_data.bson | $CLICKHOUSE_CLIENT --max_threads=0 --max_block_size=65505 --input_format_parallel_parsing=true -q "INSERT INTO parsing_bson FORMAT BSONEachRow" + +checksum2=$($CLICKHOUSE_CLIENT -q "SELECT * FROM parsing_bson ORDER BY WatchID;" | md5sum) + + +if [[ "$checksum1" == "$checksum2" ]]; +then + echo "OK" +else + echo "FAIL" +fi + +$CLICKHOUSE_CLIENT -q "DROP TABLE parsing_bson" + +rm 00176_data.bson + diff --git a/tests/queries/1_stateful/00176_distinct_limit_by_limit_bug_43377.reference b/tests/queries/1_stateful/00176_distinct_limit_by_limit_bug_43377.reference new file mode 100644 index 00000000000..f599e28b8ab --- /dev/null +++ b/tests/queries/1_stateful/00176_distinct_limit_by_limit_bug_43377.reference @@ -0,0 +1 @@ +10 diff --git a/tests/queries/1_stateful/00176_distinct_limit_by_limit_bug_43377.sql b/tests/queries/1_stateful/00176_distinct_limit_by_limit_bug_43377.sql new file mode 100644 index 00000000000..6397d7f5a28 --- /dev/null +++ b/tests/queries/1_stateful/00176_distinct_limit_by_limit_bug_43377.sql @@ -0,0 +1,11 @@ +SELECT count() +FROM +( + SELECT DISTINCT + Title, + SearchPhrase + FROM test.hits + WHERE (SearchPhrase != '') AND (NOT match(Title, '[а-яА-ЯёЁ]')) AND (NOT match(SearchPhrase, '[а-яА-ЯёЁ]')) + LIMIT 1 BY Title + LIMIT 10 +); diff --git a/utils/check-style/aspell-ignore/en/aspell-dict.txt b/utils/check-style/aspell-ignore/en/aspell-dict.txt index 63bf2c37ee3..25781190d6a 100644 --- a/utils/check-style/aspell-ignore/en/aspell-dict.txt +++ b/utils/check-style/aspell-ignore/en/aspell-dict.txt @@ -9,6 +9,9 @@ AddressSanitizer AppleClang ArrowStream AvroConfluent +BSON +BSONEachRow +Bool CCTOOLS CLion CMake @@ -95,6 +98,7 @@ NEKUDOTAYIM NULLIF NVME NuRaft +ObjectId Ok OpenSUSE OpenStack @@ -190,6 +194,8 @@ bools boringssl brotli buildable +bson +bsoneachrow camelCase capn capnproto @@ -450,6 +456,7 @@ subquery subseconds substring subtree +subtype sudo symlink symlinks @@ -482,6 +489,7 @@ userspace userver utils uuid +uint variadic varint vectorized