Merge branch 'ClickHouse:master' into zvonand-globs-small-fix

This commit is contained in:
Andrey Zvonov 2023-09-14 11:31:49 +03:00 committed by GitHub
commit ff16ddbbb3
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
383 changed files with 7365 additions and 5191 deletions

View File

@ -79,7 +79,7 @@ IndentWidth: 4
IndentWrappedFunctionNames: false
MacroBlockBegin: ''
MacroBlockEnd: ''
NamespaceIndentation: Inner
NamespaceIndentation: None
ObjCBlockIndentWidth: 4
ObjCSpaceAfterProperty: true
ObjCSpaceBeforeProtocolList: true
@ -89,6 +89,7 @@ PenaltyBreakFirstLessLess: 120
PenaltyBreakString: 1000
PenaltyExcessCharacter: 1000000
PenaltyReturnTypeOnItsOwnLine: 60
RemoveBracesLLVM: true
SpaceAfterCStyleCast: false
SpaceBeforeAssignmentOperators: true
SpaceBeforeParens: ControlStatements

6
.gitmodules vendored
View File

@ -40,9 +40,6 @@
[submodule "contrib/boost"]
path = contrib/boost
url = https://github.com/ClickHouse/boost
[submodule "contrib/base64"]
path = contrib/base64
url = https://github.com/ClickHouse/Turbo-Base64
[submodule "contrib/arrow"]
path = contrib/arrow
url = https://github.com/ClickHouse/arrow
@ -348,3 +345,6 @@
[submodule "contrib/robin-map"]
path = contrib/robin-map
url = https://github.com/Tessil/robin-map.git
[submodule "contrib/aklomp-base64"]
path = contrib/aklomp-base64
url = https://github.com/aklomp/base64.git

View File

@ -47,7 +47,7 @@ if (CMAKE_BUILD_TYPE_UC STREQUAL "RELWITHDEBINFO" AND ENABLE_THINLTO AND PARALLE
set (PARALLEL_LINK_JOBS 2)
endif()
message(STATUS "Building sub-tree with ${PARALLEL_COMPILE_JOBS} compile jobs and ${PARALLEL_LINK_JOBS} linker jobs (system: ${NUMBER_OF_LOGICAL_CORES} cores, ${TOTAL_PHYSICAL_MEMORY} MB DRAM, 'OFF' means the native core count).")
message(STATUS "Building sub-tree with ${PARALLEL_COMPILE_JOBS} compile jobs and ${PARALLEL_LINK_JOBS} linker jobs (system: ${NUMBER_OF_LOGICAL_CORES} cores, ${TOTAL_PHYSICAL_MEMORY} MB RAM, 'OFF' means the native core count).")
if (PARALLEL_COMPILE_JOBS LESS NUMBER_OF_LOGICAL_CORES)
set(CMAKE_JOB_POOL_COMPILE compile_job_pool${CMAKE_CURRENT_SOURCE_DIR})

View File

@ -135,7 +135,7 @@ add_contrib (aws-cmake
aws-cmake
)
add_contrib (base64-cmake base64)
add_contrib (aklomp-base64-cmake aklomp-base64)
add_contrib (simdjson-cmake simdjson)
add_contrib (rapidjson-cmake rapidjson)
add_contrib (fastops-cmake fastops)

1
contrib/aklomp-base64 vendored Submodule

@ -0,0 +1 @@
Subproject commit e77bd70bdd860c52c561568cffb251d88bba064c

View File

@ -0,0 +1 @@
config.h

View File

@ -0,0 +1,68 @@
option (ENABLE_BASE64 "Enable base64" ${ENABLE_LIBRARIES})
if (NOT ENABLE_BASE64)
message(STATUS "Not using base64")
return()
endif()
SET(LIBRARY_DIR "${ClickHouse_SOURCE_DIR}/contrib/aklomp-base64")
if (ARCH_AMD64)
# These defines enable/disable SIMD codecs in base64's runtime codec dispatch.
# We don't want to limit ourselves --> enable all.
set(HAVE_SSSE3 1)
set(HAVE_SSE41 1)
set(HAVE_SSE42 1)
set(HAVE_AVX 1)
set(HAVE_AVX2 1)
set(HAVE_AVX512 1)
endif ()
if (ARCH_AARCH64)
# The choice of HAVE_NEON* depends on the target machine because base64 provides
# no runtime dispatch on ARM. NEON is only mandatory with the normal build profile.
if(NOT NO_ARMV81_OR_HIGHER)
set(HAVE_NEON64 1)
set(HAVE_NEON32 0)
endif ()
endif ()
configure_file(config.h.in ${CMAKE_CURRENT_BINARY_DIR}/config.h)
add_library(_base64
"${LIBRARY_DIR}/lib/lib.c"
"${LIBRARY_DIR}/lib/codec_choose.c"
"${LIBRARY_DIR}/lib/tables/tables.c"
"${LIBRARY_DIR}/lib/tables/table_dec_32bit.h"
"${LIBRARY_DIR}/lib/tables/table_enc_12bit.h"
"${LIBRARY_DIR}/lib/codecs.h"
"${CMAKE_CURRENT_BINARY_DIR}/config.h"
"${LIBRARY_DIR}/lib/arch/generic/codec.c"
"${LIBRARY_DIR}/lib/arch/ssse3/codec.c"
"${LIBRARY_DIR}/lib/arch/sse41/codec.c"
"${LIBRARY_DIR}/lib/arch/sse42/codec.c"
"${LIBRARY_DIR}/lib/arch/avx/codec.c"
"${LIBRARY_DIR}/lib/arch/avx2/codec.c"
"${LIBRARY_DIR}/lib/arch/avx512/codec.c"
"${LIBRARY_DIR}/lib/arch/neon32/codec.c"
"${LIBRARY_DIR}/lib/arch/neon64/codec.c"
)
if (ARCH_AMD64)
set_source_files_properties(${LIBRARY_DIR}/lib/arch/ssse3/codec.c PROPERTIES COMPILE_FLAGS "-mssse3")
set_source_files_properties(${LIBRARY_DIR}/lib/arch/sse41/codec.c PROPERTIES COMPILE_FLAGS "-msse4.1")
set_source_files_properties(${LIBRARY_DIR}/lib/arch/sse42/codec.c PROPERTIES COMPILE_FLAGS "-msse4.2")
set_source_files_properties(${LIBRARY_DIR}/lib/arch/avx/codec.c PROPERTIES COMPILE_FLAGS "-mavx")
set_source_files_properties(${LIBRARY_DIR}/lib/arch/avx2/codec.c PROPERTIES COMPILE_FLAGS "-mavx2")
set_source_files_properties(${LIBRARY_DIR}/lib/arch/avx512/codec.c PROPERTIES COMPILE_FLAGS "-mavx512vl -mavx512vbmi")
endif()
target_include_directories(_base64 SYSTEM PUBLIC ${LIBRARY_DIR}/include)
target_include_directories(_base64 PRIVATE ${CMAKE_CURRENT_SOURCE_DIR} ${CMAKE_CURRENT_BINARY_DIR})
add_library(ch_contrib::base64 ALIAS _base64)

View File

@ -0,0 +1,9 @@
#cmakedefine01 HAVE_SSSE3
#cmakedefine01 HAVE_SSE41
#cmakedefine01 HAVE_SSE42
#cmakedefine01 HAVE_AVX
#cmakedefine01 HAVE_AVX2
#cmakedefine01 HAVE_AVX512
#cmakedefine01 HAVE_NEON32
#cmakedefine01 HAVE_NEON64

1
contrib/base64 vendored

@ -1 +0,0 @@
Subproject commit 8628e258090f9eb76d90ac3c91e1ab4690e9aa11

View File

@ -1,60 +0,0 @@
if(ARCH_AMD64 OR ARCH_AARCH64 OR ARCH_PPC64LE OR ARCH_S390X)
option (ENABLE_BASE64 "Enable base64" ${ENABLE_LIBRARIES})
elseif(ENABLE_BASE64)
message (${RECONFIGURE_MESSAGE_LEVEL} "base64 library is only supported on x86_64 and aarch64")
endif()
if (NOT ENABLE_BASE64)
message(STATUS "Not using base64")
return()
endif()
SET(LIBRARY_DIR "${ClickHouse_SOURCE_DIR}/contrib/base64")
add_library(_base64_scalar OBJECT "${LIBRARY_DIR}/turbob64c.c" "${LIBRARY_DIR}/turbob64d.c")
add_library(_base64_ssse3 OBJECT "${LIBRARY_DIR}/turbob64sse.c") # This file also contains code for ARM NEON
if (ARCH_AMD64)
add_library(_base64_avx OBJECT "${LIBRARY_DIR}/turbob64sse.c") # This is not a mistake. One file is compiled twice.
add_library(_base64_avx2 OBJECT "${LIBRARY_DIR}/turbob64avx2.c")
endif ()
target_compile_options(_base64_scalar PRIVATE -falign-loops)
if (ARCH_AMD64)
target_compile_options(_base64_ssse3 PRIVATE -mno-avx -mno-avx2 -mssse3 -falign-loops)
target_compile_options(_base64_avx PRIVATE -falign-loops -mavx)
target_compile_options(_base64_avx2 PRIVATE -falign-loops -mavx2)
else ()
if (ARCH_PPC64LE)
target_compile_options(_base64_ssse3 PRIVATE -D__SSSE3__ -falign-loops)
else()
target_compile_options(_base64_ssse3 PRIVATE -falign-loops)
endif()
endif ()
if (ARCH_AMD64)
add_library(_base64
$<TARGET_OBJECTS:_base64_scalar>
$<TARGET_OBJECTS:_base64_ssse3>
$<TARGET_OBJECTS:_base64_avx>
$<TARGET_OBJECTS:_base64_avx2>)
else ()
add_library(_base64
$<TARGET_OBJECTS:_base64_scalar>
$<TARGET_OBJECTS:_base64_ssse3>)
endif ()
target_include_directories(_base64 SYSTEM PUBLIC ${LIBRARY_DIR})
if (XCODE OR XCODE_VERSION)
# https://gitlab.kitware.com/cmake/cmake/issues/17457
# Some native build systems may not like targets that have only object files, so consider adding at least one real source file
# This applies to Xcode.
if (NOT EXISTS "${CMAKE_CURRENT_BINARY_DIR}/dummy.c")
file(WRITE "${CMAKE_CURRENT_BINARY_DIR}/dummy.c" "")
endif ()
target_sources(_base64 PRIVATE "${CMAKE_CURRENT_BINARY_DIR}/dummy.c")
endif ()
add_library(ch_contrib::base64 ALIAS _base64)

2
contrib/libunwind vendored

@ -1 +1 @@
Subproject commit e48aa13f67dc722511b5af33a32ba9b7748176b5
Subproject commit 30cc1d3fd3655a5cfa0ab112fe320fb9fc0a8344

2
contrib/re2 vendored

@ -1 +1 @@
Subproject commit 03da4fc0857c285e3a26782f6bc8931c4c950df4
Subproject commit a807e8a3aac2cc33c77b7071efea54fcabe38e0c

View File

@ -15,8 +15,8 @@ CLICKHOUSE_CI_LOGS_USER=${CLICKHOUSE_CI_LOGS_USER:-ci}
# Pre-configured destination cluster, where to export the data
CLICKHOUSE_CI_LOGS_CLUSTER=${CLICKHOUSE_CI_LOGS_CLUSTER:-system_logs_export}
EXTRA_COLUMNS=${EXTRA_COLUMNS:-"pull_request_number UInt32, commit_sha String, check_start_time DateTime, check_name LowCardinality(String), instance_type LowCardinality(String), "}
EXTRA_COLUMNS_EXPRESSION=${EXTRA_COLUMNS_EXPRESSION:-"0 AS pull_request_number, '' AS commit_sha, now() AS check_start_time, '' AS check_name, '' AS instance_type"}
EXTRA_COLUMNS=${EXTRA_COLUMNS:-"pull_request_number UInt32, commit_sha String, check_start_time DateTime, check_name LowCardinality(String), instance_type LowCardinality(String), instance_id String, "}
EXTRA_COLUMNS_EXPRESSION=${EXTRA_COLUMNS_EXPRESSION:-"0 AS pull_request_number, '' AS commit_sha, now() AS check_start_time, '' AS check_name, '' AS instance_type, '' AS instance_id"}
EXTRA_ORDER_BY_COLUMNS=${EXTRA_ORDER_BY_COLUMNS:-"check_name, "}
function __set_connection_args
@ -125,9 +125,9 @@ function setup_logs_replication
echo 'Create %_log tables'
clickhouse-client --query "SHOW TABLES FROM system LIKE '%\\_log'" | while read -r table
do
# Calculate hash of its structure:
# Calculate hash of its structure. Note: 1 is the version of extra columns - increment it if extra columns are changed:
hash=$(clickhouse-client --query "
SELECT sipHash64(groupArray((name, type)))
SELECT sipHash64(1, groupArray((name, type)))
FROM (SELECT name, type FROM system.columns
WHERE database = 'system' AND table = '$table'
ORDER BY position)

View File

@ -18,6 +18,7 @@ RUN apt-get update \
python3-termcolor \
unixodbc \
pv \
jq \
zstd \
--yes --no-install-recommends

View File

@ -120,7 +120,7 @@ function clone_submodules
contrib/libxml2
contrib/libunwind
contrib/fmtlib
contrib/base64
contrib/aklomp-base64
contrib/cctz
contrib/libcpuid
contrib/libdivide

View File

@ -40,6 +40,7 @@ RUN apt-get update -y \
cargo \
zstd \
file \
jq \
pv \
zip \
p7zip-full \

View File

@ -1139,6 +1139,8 @@ Optional parameters:
- `s3_max_put_burst` — Max number of requests that can be issued simultaneously before hitting request per second limit. By default (`0` value) equals to `s3_max_put_rps`.
- `s3_max_get_rps` — Maximum GET requests per second rate before throttling. Default value is `0` (unlimited).
- `s3_max_get_burst` — Max number of requests that can be issued simultaneously before hitting request per second limit. By default (`0` value) equals to `s3_max_get_rps`.
- `read_resource` — Resource name to be used for [scheduling](/docs/en/operations/workload-scheduling.md) of read requests to this disk. Default value is empty string (IO scheduling is not enabled for this disk).
- `write_resource` — Resource name to be used for [scheduling](/docs/en/operations/workload-scheduling.md) of write requests to this disk. Default value is empty string (IO scheduling is not enabled for this disk).
### Configuring the cache
@ -1251,6 +1253,8 @@ Other parameters:
* `cache_enabled` - Allows to cache mark and index files on local FS. Default value is `true`.
* `cache_path` - Path on local FS where to store cached mark and index files. Default value is `/var/lib/clickhouse/disks/<disk_name>/cache/`.
* `skip_access_check` - If true, disk access checks will not be performed on disk start-up. Default value is `false`.
* `read_resource` — Resource name to be used for [scheduling](/docs/en/operations/workload-scheduling.md) of read requests to this disk. Default value is empty string (IO scheduling is not enabled for this disk).
* `write_resource` — Resource name to be used for [scheduling](/docs/en/operations/workload-scheduling.md) of write requests to this disk. Default value is empty string (IO scheduling is not enabled for this disk).
Examples of working configurations can be found in integration tests directory (see e.g. [test_merge_tree_azure_blob_storage](https://github.com/ClickHouse/ClickHouse/blob/master/tests/integration/test_merge_tree_azure_blob_storage/configs/config.d/storage_conf.xml) or [test_azure_blob_storage_zero_copy_replication](https://github.com/ClickHouse/ClickHouse/blob/master/tests/integration/test_azure_blob_storage_zero_copy_replication/configs/config.d/storage_conf.xml)).

View File

@ -58,7 +58,7 @@ Connection: Close
Content-Type: text/tab-separated-values; charset=UTF-8
X-ClickHouse-Server-Display-Name: clickhouse.ru-central1.internal
X-ClickHouse-Query-Id: 5abe861c-239c-467f-b955-8a201abb8b7f
X-ClickHouse-Summary: {"read_rows":"0","read_bytes":"0","written_rows":"0","written_bytes":"0","total_rows_to_read":"0","peak_memory_usage":"0"}
X-ClickHouse-Summary: {"read_rows":"0","read_bytes":"0","written_rows":"0","written_bytes":"0","total_rows_to_read":"0","elapsed_ns":"662334,"peak_memory_usage":"0"}
1
```
@ -288,9 +288,9 @@ Similarly, you can use ClickHouse sessions in the HTTP protocol. To do this, you
You can receive information about the progress of a query in `X-ClickHouse-Progress` response headers. To do this, enable [send_progress_in_http_headers](../operations/settings/settings.md#settings-send_progress_in_http_headers). Example of the header sequence:
``` text
X-ClickHouse-Progress: {"read_rows":"2752512","read_bytes":"240570816","total_rows_to_read":"8880128","peak_memory_usage":"4371480"}
X-ClickHouse-Progress: {"read_rows":"5439488","read_bytes":"482285394","total_rows_to_read":"8880128","peak_memory_usage":"13621616"}
X-ClickHouse-Progress: {"read_rows":"8783786","read_bytes":"819092887","total_rows_to_read":"8880128","peak_memory_usage":"23155600"}
X-ClickHouse-Progress: {"read_rows":"2752512","read_bytes":"240570816","total_rows_to_read":"8880128","elapsed_ns":"662334","peak_memory_usage":"4371480"}
X-ClickHouse-Progress: {"read_rows":"5439488","read_bytes":"482285394","total_rows_to_read":"8880128","elapsed_ns":"992334","peak_memory_usage":"13621616"}
X-ClickHouse-Progress: {"read_rows":"8783786","read_bytes":"819092887","total_rows_to_read":"8880128","elapsed_ns":"1232334","peak_memory_usage":"23155600"}
```
Possible header fields:
@ -439,7 +439,7 @@ $ curl -v 'http://localhost:8123/predefined_query'
< X-ClickHouse-Format: Template
< X-ClickHouse-Timezone: Asia/Shanghai
< Keep-Alive: timeout=3
< X-ClickHouse-Summary: {"read_rows":"0","read_bytes":"0","written_rows":"0","written_bytes":"0","total_rows_to_read":"0","peak_memory_usage":"0"}
< X-ClickHouse-Summary: {"read_rows":"0","read_bytes":"0","written_rows":"0","written_bytes":"0","total_rows_to_read":"0","elapsed_ns":"662334","peak_memory_usage":"0"}
<
# HELP "Query" "Number of executing queries"
# TYPE "Query" counter
@ -604,7 +604,7 @@ $ curl -vv -H 'XXX:xxx' 'http://localhost:8123/hi'
< Content-Type: text/html; charset=UTF-8
< Transfer-Encoding: chunked
< Keep-Alive: timeout=3
< X-ClickHouse-Summary: {"read_rows":"0","read_bytes":"0","written_rows":"0","written_bytes":"0","total_rows_to_read":"0","peak_memory_usage":"0"}
< X-ClickHouse-Summary: {"read_rows":"0","read_bytes":"0","written_rows":"0","written_bytes":"0","total_rows_to_read":"0","elapsed_ns":"662334","peak_memory_usage":"0"}
<
* Connection #0 to host localhost left intact
Say Hi!%
@ -644,7 +644,7 @@ $ curl -v -H 'XXX:xxx' 'http://localhost:8123/get_config_static_handler'
< Content-Type: text/plain; charset=UTF-8
< Transfer-Encoding: chunked
< Keep-Alive: timeout=3
< X-ClickHouse-Summary: {"read_rows":"0","read_bytes":"0","written_rows":"0","written_bytes":"0","total_rows_to_read":"0","peak_memory_usage":"0"}
< X-ClickHouse-Summary: {"read_rows":"0","read_bytes":"0","written_rows":"0","written_bytes":"0","total_rows_to_read":"0","elapsed_ns":"662334","peak_memory_usage":"0"}
<
* Connection #0 to host localhost left intact
<html ng-app="SMI2"><head><base href="http://ui.tabix.io/"></head><body><div ui-view="" class="content-ui"></div><script src="http://loader.tabix.io/master.js"></script></body></html>%
@ -696,7 +696,7 @@ $ curl -vv -H 'XXX:xxx' 'http://localhost:8123/get_absolute_path_static_handler'
< Content-Type: text/html; charset=UTF-8
< Transfer-Encoding: chunked
< Keep-Alive: timeout=3
< X-ClickHouse-Summary: {"read_rows":"0","read_bytes":"0","written_rows":"0","written_bytes":"0","total_rows_to_read":"0","peak_memory_usage":"0"}
< X-ClickHouse-Summary: {"read_rows":"0","read_bytes":"0","written_rows":"0","written_bytes":"0","total_rows_to_read":"0","elapsed_ns":"662334","peak_memory_usage":"0"}
<
<html><body>Absolute Path File</body></html>
* Connection #0 to host localhost left intact
@ -715,7 +715,7 @@ $ curl -vv -H 'XXX:xxx' 'http://localhost:8123/get_relative_path_static_handler'
< Content-Type: text/html; charset=UTF-8
< Transfer-Encoding: chunked
< Keep-Alive: timeout=3
< X-ClickHouse-Summary: {"read_rows":"0","read_bytes":"0","written_rows":"0","written_bytes":"0","total_rows_to_read":"0","peak_memory_usage":"0"}
< X-ClickHouse-Summary: {"read_rows":"0","read_bytes":"0","written_rows":"0","written_bytes":"0","total_rows_to_read":"0","elapsed_ns":"662334","peak_memory_usage":"0"}
<
<html><body>Relative Path File</body></html>
* Connection #0 to host localhost left intact

View File

@ -71,7 +71,7 @@ Possible values:
- Any positive integer.
Default value: 150.
Default value: 1000.
ClickHouse artificially executes `INSERT` longer (adds sleep) so that the background merge process can merge parts faster than they are added.

View File

@ -3,12 +3,13 @@ slug: /en/operations/system-tables/information_schema
---
# INFORMATION_SCHEMA
`INFORMATION_SCHEMA` (`information_schema`) is a system database that contains views. Using these views, you can get information about the metadata of database objects. These views read data from the columns of the [system.columns](../../operations/system-tables/columns.md), [system.databases](../../operations/system-tables/databases.md) and [system.tables](../../operations/system-tables/tables.md) system tables.
The structure and composition of system tables may change in different versions of the product, but the support of the `information_schema` makes it possible to change the structure of system tables without changing the method of access to metadata. Metadata requests do not depend on the DBMS used.
`INFORMATION_SCHEMA` (or: `information_schema`) is a system database which provides a (somewhat) standardized, [DBMS-agnostic view](https://en.wikipedia.org/wiki/Information_schema) on metadata of database objects. The views in `INFORMATION_SCHEMA` are generally inferior to normal system tables but tools can use them to obtain basic information in a cross-DBMS manner. The structure and content of views in `INFORMATION_SCHEMA` is supposed to evolves in a backwards-compatible way, i.e. only new functionality is added but existing functionality is not changed or removed. In terms of internal implementation, views in `INFORMATION_SCHEMA` usually map to to normal system tables like [system.columns](../../operations/system-tables/columns.md), [system.databases](../../operations/system-tables/databases.md) and [system.tables](../../operations/system-tables/tables.md).
``` sql
SHOW TABLES FROM INFORMATION_SCHEMA;
-- or:
SHOW TABLES FROM information_schema;
```
``` text
@ -17,6 +18,10 @@ SHOW TABLES FROM INFORMATION_SCHEMA;
│ SCHEMATA │
│ TABLES │
│ VIEWS │
│ columns │
│ schemata │
│ tables │
│ views │
└──────────┘
```
@ -27,6 +32,8 @@ SHOW TABLES FROM INFORMATION_SCHEMA;
- [TABLES](#tables)
- [VIEWS](#views)
Case-insensitive equivalent views, e.g. `INFORMATION_SCHEMA.columns` are provided for reasons of compatibility with other databases.
## COLUMNS {#columns}
Contains columns read from the [system.columns](../../operations/system-tables/columns.md) system table and columns that are not supported in ClickHouse or do not make sense (always `NULL`), but must be by the standard.

View File

@ -1,7 +1,7 @@
---
slug: /en/operations/system-tables/licenses
---
# licenses
# licenses
Contains licenses of third-party libraries that are located in the [contrib](https://github.com/ClickHouse/ClickHouse/tree/master/contrib) directory of ClickHouse sources.
@ -20,21 +20,10 @@ SELECT library_name, license_type, license_path FROM system.licenses LIMIT 15
``` text
┌─library_name───────┬─license_type─┬─license_path────────────────────────┐
│ FastMemcpy │ MIT │ /contrib/FastMemcpy/LICENSE │
│ arrow │ Apache │ /contrib/arrow/LICENSE.txt │
│ avro │ Apache │ /contrib/avro/LICENSE.txt │
│ aws-c-common │ Apache │ /contrib/aws-c-common/LICENSE │
│ aws-c-event-stream │ Apache │ /contrib/aws-c-event-stream/LICENSE │
│ aws-checksums │ Apache │ /contrib/aws-checksums/LICENSE │
│ aws │ Apache │ /contrib/aws/LICENSE.txt │
│ base64 │ BSD 2-clause │ /contrib/base64/LICENSE │
│ boost │ Boost │ /contrib/boost/LICENSE_1_0.txt │
│ base64 │ BSD 2-clause │ /contrib/aklomp-base64/LICENSE │
│ brotli │ MIT │ /contrib/brotli/LICENSE │
│ capnproto │ MIT │ /contrib/capnproto/LICENSE │
│ cassandra │ Apache │ /contrib/cassandra/LICENSE.txt │
│ cctz │ Apache │ /contrib/cctz/LICENSE.txt │
│ cityhash102 │ MIT │ /contrib/cityhash102/COPYING │
│ cppkafka │ BSD 2-clause │ /contrib/cppkafka/LICENSE │
│ [...] │ [...] │ [...] │
└────────────────────┴──────────────┴─────────────────────────────────────┘
```

View File

@ -101,7 +101,8 @@ Columns:
- `ProfileEvents` ([Map(String, UInt64)](../../sql-reference/data-types/map.md)) — ProfileEvents that measure different metrics. The description of them could be found in the table [system.events](../../operations/system-tables/events.md#system_tables-events)
- `Settings` ([Map(String, String)](../../sql-reference/data-types/map.md)) — Settings that were changed when the client ran the query. To enable logging changes to settings, set the `log_query_settings` parameter to 1.
- `log_comment` ([String](../../sql-reference/data-types/string.md)) — Log comment. It can be set to arbitrary string no longer than [max_query_size](../../operations/settings/settings.md#settings-max_query_size). An empty string if it is not defined.
- `thread_ids` ([Array(UInt64)](../../sql-reference/data-types/array.md)) — Thread ids that are participating in query execution.
- `thread_ids` ([Array(UInt64)](../../sql-reference/data-types/array.md)) — Thread ids that are participating in query execution. These threads may not have run simultaneously.
- `peak_threads_usage` ([UInt64)](../../sql-reference/data-types/int-uint.md)) — Maximum count of simultaneous threads executing the query.
- `used_aggregate_functions` ([Array(String)](../../sql-reference/data-types/array.md)) — Canonical names of `aggregate functions`, which were used during query execution.
- `used_aggregate_function_combinators` ([Array(String)](../../sql-reference/data-types/array.md)) — Canonical names of `aggregate functions combinators`, which were used during query execution.
- `used_database_engines` ([Array(String)](../../sql-reference/data-types/array.md)) — Canonical names of `database engines`, which were used during query execution.

View File

@ -0,0 +1,64 @@
---
slug: /en/operations/system-tables/scheduler
---
# scheduler
Contains information and status for [scheduling nodes](/docs/en/operations/workload-scheduling.md/#hierarchy) residing on the local server.
This table can be used for monitoring. The table contains a row for every scheduling node.
Example:
``` sql
SELECT *
FROM system.scheduler
WHERE resource = 'network_read' AND path = '/prio/fair/prod'
FORMAT Vertical
```
``` text
Row 1:
──────
resource: network_read
path: /prio/fair/prod
type: fifo
weight: 5
priority: 0
is_active: 0
active_children: 0
dequeued_requests: 67
dequeued_cost: 4692272
busy_periods: 63
vruntime: 938454.1999999989
system_vruntime: ᴺᵁᴸᴸ
queue_length: 0
queue_cost: 0
budget: -60524
is_satisfied: ᴺᵁᴸᴸ
inflight_requests: ᴺᵁᴸᴸ
inflight_cost: ᴺᵁᴸᴸ
max_requests: ᴺᵁᴸᴸ
max_cost: ᴺᵁᴸᴸ
```
Columns:
- `resource` (`String`) - Resource name
- `path` (`String`) - Path to a scheduling node within this resource scheduling hierarchy
- `type` (`String`) - Type of a scheduling node.
- `weight` (`Float64`) - Weight of a node, used by a parent node of `fair`` type.
- `priority` (`Int64`) - Priority of a node, used by a parent node of 'priority' type (Lower value means higher priority).
- `is_active` (`UInt8`) - Whether this node is currently active - has resource requests to be dequeued and constraints satisfied.
- `active_children` (`UInt64`) - The number of children in active state.
- `dequeued_requests` (`UInt64`) - The total number of resource requests dequeued from this node.
- `dequeued_cost` (`UInt64`) - The sum of costs (e.g. size in bytes) of all requests dequeued from this node.
- `busy_periods` (`UInt64`) - The total number of deactivations of this node.
- `vruntime` (`Nullable(Float64)`) - For children of `fair` nodes only. Virtual runtime of a node used by SFQ algorithm to select the next child to process in a max-min fair manner.
- `system_vruntime` (`Nullable(Float64)`) - For `fair` nodes only. Virtual runtime showing `vruntime` of the last processed resource request. Used during child activation as the new value of `vruntime`.
- `queue_length` (`Nullable(UInt64)`) - For `fifo` nodes only. Current number of resource requests residing in the queue.
- `queue_cost` (`Nullable(UInt64)`) - For `fifo` nodes only. Sum of costs (e.g. size in bytes) of all requests residing in the queue.
- `budget` (`Nullable(Int64)`) - For `fifo` nodes only. The number of available "cost units" for new resource requests. Can appear in case of discrepancy of estimated and real costs of resource requests (e.g. after read/write failure)
- `is_satisfied` (`Nullable(UInt8)`) - For constraint nodes only (e.g. `inflight_limit`). Equals `1` if all the constraint of this node are satisfied.
- `inflight_requests` (`Nullable(Int64)`) - For `inflight_limit` nodes only. The number of resource requests dequeued from this node, that are currently in consumption state.
- `inflight_cost` (`Nullable(Int64)`) - For `inflight_limit` nodes only. The sum of costs (e.g. bytes) of all resource requests dequeued from this node, that are currently in consumption state.
- `max_requests` (`Nullable(Int64)`) - For `inflight_limit` nodes only. Upper limit for `inflight_requests` leading to constraint violation.
- `max_cost` (`Nullable(Int64)`) - For `inflight_limit` nodes only. Upper limit for `inflight_cost` leading to constraint violation.

View File

@ -16,6 +16,8 @@ A client application to interact with clickhouse-keeper by its native protocol.
- `--session-timeout=TIMEOUT` — Set session timeout in seconds. Default value: 10s.
- `--operation-timeout=TIMEOUT` — Set operation timeout in seconds. Default value: 10s.
- `--history-file=FILE_PATH` — Set path of history file. Default value: `~/.keeper-client-history`.
- `--log-level=LEVEL` — Set log level. Default value: `information`.
- `--no-confirmation` — If set, will not require a confirmation on several commands. Default value `false` for interactive and `true` for query
- `--help` — Shows the help message.
## Example {#clickhouse-keeper-client-example}
@ -44,6 +46,7 @@ keeper foo bar
- `ls [path]` -- Lists the nodes for the given path (default: cwd)
- `cd [path]` -- Change the working path (default `.`)
- `exists <path>` -- Returns `1` if node exists, `0` otherwise
- `set <path> <value> [version]` -- Updates the node's value. Only update if version matches (default: -1)
- `create <path> <value> [mode]` -- Creates new node with the set value
- `touch <path>` -- Creates new node with an empty string as value. Doesn't throw an exception if the node already exists
@ -56,3 +59,5 @@ keeper foo bar
- `find_super_nodes <threshold> [path]` -- Finds nodes with number of children larger than some threshold for the given path (default `.`)
- `delete_stale_backups` -- Deletes ClickHouse nodes used for backups that are now inactive
- `find_big_family [path] [n]` -- Returns the top n nodes with the biggest family in the subtree (default path = `.` and n = 10)
- `sync <path>` -- Synchronizes node between processes and leader
- `reconfig <add|remove|set> "<arg>" [version]` -- Reconfigure Keeper cluster. See https://clickhouse.com/docs/en/guides/sre/keeper/clickhouse-keeper#reconfiguration

View File

@ -0,0 +1,153 @@
---
slug: /en/operations/workload-scheduling
sidebar_position: 69
sidebar_label: "Workload scheduling"
title: "Workload scheduling"
---
When ClickHouse execute multiple queries simultaneously, they may be using shared resources (e.g. disks). Scheduling constraints and policies can be applied to regulate how resources are utilized and shared between different workloads. For every resource a scheduling hierarchy can be configured. Hierarchy root represents a resource, while leafs are queues, holding requests that exceed resource capacity.
:::note
Currently only remote disk IO can be scheduled using described method. For CPU scheduling see settings about thread pools and [`concurrent_threads_soft_limit_num`](server-configuration-parameters/settings.md#concurrent_threads_soft_limit_num). For flexible memory limits see [Memory overcommit](settings/memory-overcommit.md)
:::
## Disk configuration {#disk-config}
To enable IO scheduling for a specific disk, you have to specify `read_resource` and/or `write_resource` in storage configuration. It says ClickHouse what resource should be used for every read and write requests with given disk. Read and write resource can refer to the same resource name, which is useful for local SSDs or HDDs. Multiple different disks also can refer to the same resource, which is useful for remote disks: if you want to be able to allow fair division of network bandwidth between e.g. "production" and "development" workloads.
Example:
```xml
<clickhouse>
<storage_configuration>
...
<disks>
<s3>
<type>s3</type>
<endpoint>https://clickhouse-public-datasets.s3.amazonaws.com/my-bucket/root-path/</endpoint>
<access_key_id>your_access_key_id</access_key_id>
<secret_access_key>your_secret_access_key</secret_access_key>
<read_resource>network_read</read_resource>
<write_resource>network_write</write_resource>
</s3>
</disks>
<policies>
<s3_main>
<volumes>
<main>
<disk>s3</disk>
</main>
</volumes>
</s3_main>
</policies>
</storage_configuration>
</clickhouse>
```
## Workload markup {#workload_markup}
Queries can be marked with setting `workload` to distinguish different workloads. If `workload` is not set, than value "default" is used. Note that you are able to specify the other value using settings profiles. Setting constraints can be used to make `workload` constant if you want all queries from the user to be marked with fixed value of `workload` setting.
Let's consider an example of a system with two different workloads: "production" and "development".
```sql
SELECT count() FROM my_table WHERE value = 42 SETTINGS workload = 'production'
SELECT count() FROM my_table WHERE value = 13 SETTINGS workload = 'development'
```
## Resource scheduling hierarchy {#hierarchy}
From the standpoint of scheduling subsystem a resource represents a hierarchy of scheduling nodes.
```mermaid
graph TD
subgraph network_read
nr_root(("/"))
-->|100 concurrent requests| nr_fair("fair")
-->|75% bandwidth| nr_prod["prod"]
nr_fair
-->|25% bandwidth| nr_dev["dev"]
end
subgraph network_write
nw_root(("/"))
-->|100 concurrent requests| nw_fair("fair")
-->|75% bandwidth| nw_prod["prod"]
nw_fair
-->|25% bandwidth| nw_dev["dev"]
end
```
**Possible node types:**
* `inflight_limit` (constraint) - blocks if either number of concurrent in-flight requests exceeds `max_requests`, or their total cost exceeds `max_cost`; must have a single child.
* `fair` (policy) - selects the next request to serve from one of its children nodes according to max-min fairness; children nodes can specify `weight` (default is 1).
* `priority` (policy) - selects the next request to serve from one of its children nodes according to static priorities (lower value means higher priority); children nodes can specify `priority` (default is 0).
* `fifo` (queue) - leaf of the hierarchy capable of holding requests that exceed resource capacity.
The following example shows how to define IO scheduling hierarchies shown in the picture:
```xml
<clickhouse>
<resources>
<network_read>
<node path="/">
<type>inflight_limit</type>
<max_requests>100</max_requests>
</node>
<node path="/fair">
<type>fair</type>
</node>
<node path="/fair/prod">
<type>fifo</type>
<weight>3</weight>
</node>
<node path="/fair/dev">
<type>fifo</type>
</node>
</network_read>
<network_write>
<node path="/">
<type>inflight_limit</type>
<max_requests>100</max_requests>
</node>
<node path="/fair">
<type>fair</type>
</node>
<node path="/fair/prod">
<type>fifo</type>
<weight>3</weight>
</node>
<node path="/fair/dev">
<type>fifo</type>
</node>
</network_write>
</resources>
</clickhouse>
```
## Workload classifiers {#workload_classifiers}
Workload classifiers are used to define mapping from `workload` specified by a query into leaf-queues that should be used for specific resources. At the moment, workload classification is simple: only static mapping is available.
Example:
```xml
<clickhouse>
<workload_classifiers>
<production>
<network_read>/fair/prod</network_read>
<network_write>/fair/prod</network_write>
</production>
<development>
<network_read>/fair/dev</network_read>
<network_write>/fair/dev</network_write>
</development>
<default>
<network_read>/fair/dev</network_read>
<network_write>/fair/dev</network_write>
</default>
</workload_classifiers>
</clickhouse>
```
## See also
- [system.scheduler](/docs/en/operations/system-tables/scheduler.md)

View File

@ -7,6 +7,10 @@ sidebar_position: 30
The result is equal to the square root of [varPop](../../../sql-reference/aggregate-functions/reference/varpop.md).
:::note
Alias:
- `STD`
- `STDDEV_POP`
:::note
This function uses a numerically unstable algorithm. If you need [numerical stability](https://en.wikipedia.org/wiki/Numerical_stability) in calculations, use the `stddevPopStable` function. It works slower but provides a lower computational error.
:::

View File

@ -7,6 +7,8 @@ sidebar_position: 31
The result is equal to the square root of [varSamp](../../../sql-reference/aggregate-functions/reference/varsamp.md).
:::note
Alias: `STDDEV_SAMP`.
:::note
This function uses a numerically unstable algorithm. If you need [numerical stability](https://en.wikipedia.org/wiki/Numerical_stability) in calculations, use the `stddevSampStable` function. It works slower but provides a lower computational error.
:::

View File

@ -9,6 +9,8 @@ Calculates the amount `Σ((x - x̅)^2) / n`, where `n` is the sample size and `x
In other words, dispersion for a set of values. Returns `Float64`.
:::note
Alias: `VAR_POP`.
:::note
This function uses a numerically unstable algorithm. If you need [numerical stability](https://en.wikipedia.org/wiki/Numerical_stability) in calculations, use the `varPopStable` function. It works slower but provides a lower computational error.
:::

View File

@ -11,6 +11,8 @@ It represents an unbiased estimate of the variance of a random variable if passe
Returns `Float64`. When `n <= 1`, returns `+∞`.
:::note
Alias: `VAR_SAMP`.
:::note
This function uses a numerically unstable algorithm. If you need [numerical stability](https://en.wikipedia.org/wiki/Numerical_stability) in calculations, use the `varSampStable` function. It works slower but provides a lower computational error.
:::

View File

@ -2361,6 +2361,12 @@ Result:
└────────────────────────────────────────┴───────────────────────────────────────────────────────────────────────────────────────┘
```
#### Matching Modes
Pattern matching behavior can be modified with certain dictionary settings:
- `regexp_dict_flag_case_insensitive`: Use case-insensitive matching (defaults to `false`). Can be overridden in individual expressions with `(?i)` and `(?-i)`.
- `regexp_dict_flag_dotall`: Allow '.' to match newline characters (defaults to `false`).
### Use Regular Expression Tree Dictionary in ClickHouse Cloud
Above used `YAMLRegExpTree` source works in ClickHouse Open Source but not in ClickHouse Cloud. To use regexp tree dictionaries in ClickHouse could, first create a regexp tree dictionary from a YAML file locally in ClickHouse Open Source, then dump this dictionary into a CSV file using the `dictionary` table function and the [INTO OUTFILE](../statements/select/into-outfile.md) clause.

View File

@ -237,6 +237,11 @@ type_samoa: DateTime('US/Samoa')
int32samoa: 1546300800
```
**See Also**
- [formatDateTime](#date_time_functions-formatDateTime) - supports non-constant timezone.
- [toString](type-conversion-functions.md#tostring) - supports non-constant timezone.
## timeZoneOf
Returns the timezone name of [DateTime](../../sql-reference/data-types/datetime.md) or [DateTime64](../../sql-reference/data-types/datetime64.md) data types.
@ -720,6 +725,42 @@ SELECT toDate('2016-12-27') AS date, toYearWeek(date) AS yearWeek0, toYearWeek(d
└────────────┴───────────┴───────────┴───────────┴───────────────┘
```
## toDaysSinceYearZero
Returns for a given date, the number of days passed since [1 January 0000](https://en.wikipedia.org/wiki/Year_zero) in the [proleptic Gregorian calendar defined by ISO 8601](https://en.wikipedia.org/wiki/Gregorian_calendar#Proleptic_Gregorian_calendar). The calculation is the same as in MySQL's [`TO_DAYS()`](https://dev.mysql.com/doc/refman/8.0/en/date-and-time-functions.html#function_to-days) function.
**Syntax**
``` sql
toDaysSinceYearZero(date)
```
Aliases: `TO_DAYS`
**Arguments**
- `date` — The date to calculate the number of days passed since year zero from. [Date](../../sql-reference/data-types/date.md) or [Date32](../../sql-reference/data-types/date32.md).
**Returned value**
The number of days passed since date 0000-01-01.
Type: [UInt32](../../sql-reference/data-types/int-uint.md).
**Example**
``` sql
SELECT toDaysSinceYearZero(toDate('2023-09-08'));
```
Result:
``` text
┌─toDaysSinceYearZero(toDate('2023-09-08')))─┐
│ 713569 │
└────────────────────────────────────────────┘
```
## age
Returns the `unit` component of the difference between `startdate` and `enddate`. The difference is calculated using a precision of 1 microsecond.
@ -942,6 +983,8 @@ Result:
Adds the time interval or date interval to the provided date or date with time.
If the addition results in a value outside the bounds of the data type, the result is undefined.
**Syntax**
``` sql
@ -965,13 +1008,13 @@ Aliases: `dateAdd`, `DATE_ADD`.
- `year`
- `value` — Value of interval to add. [Int](../../sql-reference/data-types/int-uint.md).
- `date` — The date or date with time to which `value` is added. [Date](../../sql-reference/data-types/date.md) or [DateTime](../../sql-reference/data-types/datetime.md).
- `date` — The date or date with time to which `value` is added. [Date](../../sql-reference/data-types/date.md), [Date32](../../sql-reference/data-types/date32.md), [DateTime](../../sql-reference/data-types/datetime.md) or [DateTime64](../../sql-reference/data-types/datetime64.md).
**Returned value**
Date or date with time obtained by adding `value`, expressed in `unit`, to `date`.
Type: [Date](../../sql-reference/data-types/date.md) or [DateTime](../../sql-reference/data-types/datetime.md).
Type: [Date](../../sql-reference/data-types/date.md), [Date32](../../sql-reference/data-types/date32.md), [DateTime](../../sql-reference/data-types/datetime.md) or [DateTime64](../../sql-reference/data-types/datetime64.md).
**Example**
@ -987,10 +1030,16 @@ Result:
└───────────────────────────────────────────────┘
```
**See Also**
- [addDate](#addDate)
## date\_sub
Subtracts the time interval or date interval from the provided date or date with time.
If the subtraction results in a value outside the bounds of the data type, the result is undefined.
**Syntax**
``` sql
@ -1015,13 +1064,13 @@ Aliases: `dateSub`, `DATE_SUB`.
- `year`
- `value` — Value of interval to subtract. [Int](../../sql-reference/data-types/int-uint.md).
- `date` — The date or date with time from which `value` is subtracted. [Date](../../sql-reference/data-types/date.md) or [DateTime](../../sql-reference/data-types/datetime.md).
- `date` — The date or date with time from which `value` is subtracted. [Date](../../sql-reference/data-types/date.md), [Date32](../../sql-reference/data-types/date32.md), [DateTime](../../sql-reference/data-types/datetime.md) or [DateTime64](../../sql-reference/data-types/datetime64.md).
**Returned value**
Date or date with time obtained by subtracting `value`, expressed in `unit`, from `date`.
Type: [Date](../../sql-reference/data-types/date.md) or [DateTime](../../sql-reference/data-types/datetime.md).
Type: [Date](../../sql-reference/data-types/date.md), [Date32](../../sql-reference/data-types/date32.md), [DateTime](../../sql-reference/data-types/datetime.md) or [DateTime64](../../sql-reference/data-types/datetime64.md).
**Example**
@ -1037,10 +1086,15 @@ Result:
└────────────────────────────────────────────────┘
```
**See Also**
- [subDate](#subDate)
## timestamp\_add
Adds the specified time value with the provided date or date time value.
If the addition results in a value outside the bounds of the data type, the result is undefined.
**Syntax**
``` sql
@ -1051,7 +1105,7 @@ Aliases: `timeStampAdd`, `TIMESTAMP_ADD`.
**Arguments**
- `date` — Date or date with time. [Date](../../sql-reference/data-types/date.md) or [DateTime](../../sql-reference/data-types/datetime.md).
- `date` — Date or date with time. [Date](../../sql-reference/data-types/date.md), [Date32](../../sql-reference/data-types/date32.md), [DateTime](../../sql-reference/data-types/datetime.md) or [DateTime64](../../sql-reference/data-types/datetime64.md).
- `value` — Value of interval to add. [Int](../../sql-reference/data-types/int-uint.md).
- `unit` — The type of interval to add. [String](../../sql-reference/data-types/string.md).
Possible values:
@ -1069,7 +1123,7 @@ Aliases: `timeStampAdd`, `TIMESTAMP_ADD`.
Date or date with time with the specified `value` expressed in `unit` added to `date`.
Type: [Date](../../sql-reference/data-types/date.md) or [DateTime](../../sql-reference/data-types/datetime.md).
Type: [Date](../../sql-reference/data-types/date.md), [Date32](../../sql-reference/data-types/date32.md), [DateTime](../../sql-reference/data-types/datetime.md) or [DateTime64](../../sql-reference/data-types/datetime64.md).
**Example**
@ -1089,6 +1143,8 @@ Result:
Subtracts the time interval from the provided date or date with time.
If the subtraction results in a value outside the bounds of the data type, the result is undefined.
**Syntax**
``` sql
@ -1112,13 +1168,13 @@ Aliases: `timeStampSub`, `TIMESTAMP_SUB`.
- `year`
- `value` — Value of interval to subtract. [Int](../../sql-reference/data-types/int-uint.md).
- `date` — Date or date with time. [Date](../../sql-reference/data-types/date.md) or [DateTime](../../sql-reference/data-types/datetime.md).
- `date` — Date or date with time. [Date](../../sql-reference/data-types/date.md), [Date32](../../sql-reference/data-types/date32.md), [DateTime](../../sql-reference/data-types/datetime.md) or [DateTime64](../../sql-reference/data-types/datetime64.md).
**Returned value**
Date or date with time obtained by subtracting `value`, expressed in `unit`, from `date`.
Type: [Date](../../sql-reference/data-types/date.md) or [DateTime](../../sql-reference/data-types/datetime.md).
Type: [Date](../../sql-reference/data-types/date.md), [Date32](../../sql-reference/data-types/date32.md), [DateTime](../../sql-reference/data-types/datetime.md) or [DateTime64](../../sql-reference/data-types/datetime64.md).
**Example**
@ -1134,6 +1190,90 @@ Result:
└──────────────────────────────────────────────────────────────┘
```
## addDate
Adds the time interval or date interval to the provided date or date with time.
If the addition results in a value outside the bounds of the data type, the result is undefined.
**Syntax**
``` sql
addDate(date, interval)
```
**Arguments**
- `date` — The date or date with time to which `interval` is added. [Date](../../sql-reference/data-types/date.md), [Date32](../../sql-reference/data-types/date32.md), [DateTime](../../sql-reference/data-types/datetime.md) or [DateTime64](../../sql-reference/data-types/datetime64.md).
- `interval` — Interval to add. [Interval](../../sql-reference/data-types/special-data-types/interval.md).
**Returned value**
Date or date with time obtained by adding `interval` to `date`.
Type: [Date](../../sql-reference/data-types/date.md), [Date32](../../sql-reference/data-types/date32.md), [DateTime](../../sql-reference/data-types/datetime.md) or [DateTime64](../../sql-reference/data-types/datetime64.md).
**Example**
```sql
SELECT addDate(toDate('2018-01-01'), INTERVAL 3 YEAR);
```
Result:
```text
┌─addDate(toDate('2018-01-01'), toIntervalYear(3))─┐
│ 2021-01-01 │
└──────────────────────────────────────────────────┘
```
Alias: `ADDDATE`
**See Also**
- [date_add](#date_add)
## subDate
Subtracts the time interval or date interval from the provided date or date with time.
If the subtraction results in a value outside the bounds of the data type, the result is undefined.
**Syntax**
``` sql
subDate(date, interval)
```
**Arguments**
- `date` — The date or date with time from which `interval` is subtracted. [Date](../../sql-reference/data-types/date.md), [Date32](../../sql-reference/data-types/date32.md), [DateTime](../../sql-reference/data-types/datetime.md) or [DateTime64](../../sql-reference/data-types/datetime64.md).
- `interval` — Interval to subtract. [Interval](../../sql-reference/data-types/special-data-types/interval.md).
**Returned value**
Date or date with time obtained by subtracting `interval` from `date`.
Type: [Date](../../sql-reference/data-types/date.md), [Date32](../../sql-reference/data-types/date32.md), [DateTime](../../sql-reference/data-types/datetime.md) or [DateTime64](../../sql-reference/data-types/datetime64.md).
**Example**
```sql
SELECT subDate(toDate('2018-01-01'), INTERVAL 3 YEAR);
```
Result:
```text
┌─subDate(toDate('2018-01-01'), toIntervalYear(3))─┐
│ 2015-01-01 │
└──────────────────────────────────────────────────┘
```
Alias: `SUBDATE`
**See Also**
- [date_sub](#date_sub)
## now
Returns the current date and time at the moment of query analysis. The function is a constant expression.
@ -1494,6 +1634,33 @@ Result:
└─────────────────────────────────────────────────────────────────────┘
```
Additionally, the `formatDateTime` function can take a third String argument containing the name of the time zone. Example: `Asia/Istanbul`. In this case, the time is formatted according to the specified time zone.
**Example**
```sql
SELECT
now() AS ts,
time_zone,
formatDateTime(ts, '%T', time_zone) AS str_tz_time
FROM system.time_zones
WHERE time_zone LIKE 'Europe%'
LIMIT 10
┌──────────────────ts─┬─time_zone─────────┬─str_tz_time─┐
│ 2023-09-08 19:13:40 │ Europe/Amsterdam │ 21:13:40 │
│ 2023-09-08 19:13:40 │ Europe/Andorra │ 21:13:40 │
│ 2023-09-08 19:13:40 │ Europe/Astrakhan │ 23:13:40 │
│ 2023-09-08 19:13:40 │ Europe/Athens │ 22:13:40 │
│ 2023-09-08 19:13:40 │ Europe/Belfast │ 20:13:40 │
│ 2023-09-08 19:13:40 │ Europe/Belgrade │ 21:13:40 │
│ 2023-09-08 19:13:40 │ Europe/Berlin │ 21:13:40 │
│ 2023-09-08 19:13:40 │ Europe/Bratislava │ 21:13:40 │
│ 2023-09-08 19:13:40 │ Europe/Brussels │ 21:13:40 │
│ 2023-09-08 19:13:40 │ Europe/Bucharest │ 22:13:40 │
└─────────────────────┴───────────────────┴─────────────┘
```
**See Also**
- [formatDateTimeInJodaSyntax](##formatDateTimeInJodaSyntax)
@ -1603,7 +1770,7 @@ monthName(date)
**Arguments**
- `date` — Date or date with time. [Date](../../sql-reference/data-types/date.md) or [DateTime](../../sql-reference/data-types/datetime.md).
- `date` — Date or date with time. [Date](../../sql-reference/data-types/date.md), [DateTime](../../sql-reference/data-types/datetime.md) or [DateTime64](../../sql-reference/data-types/datetime64.md).
**Returned value**

View File

@ -33,6 +33,13 @@ Returns an array of selected substrings. Empty substrings may be selected when:
Type: [Array](../../sql-reference/data-types/array.md)([String](../../sql-reference/data-types/string.md)).
:::note
The behavior of parameter `max_substrings` changed starting with ClickHouse v22.11. In versions older than that, `max_substrings` > 0 meant that `max_substring`-many splits were performed and that the remainder of the string was returned as the final element of the list.
For example,
- in v22.10: `SELECT splitByChar('=', 'a=b=c=d', 2); -- ['a','b','c=d']`
- in v22.11: `SELECT splitByChar('=', 'a=b=c=d', 2); -- ['a','b']`
:::
**Example**
``` sql
@ -63,7 +70,6 @@ splitByString(separator, s[, max_substrings]))
- `s` — The string to split. [String](../../sql-reference/data-types/string.md).
- `max_substrings` — An optional `Int64` defaulting to 0. When `max_substrings` > 0, the returned substrings will be no more than `max_substrings`, otherwise the function will return as many substrings as possible.
**Returned value(s)**
Returns an array of selected substrings. Empty substrings may be selected when:

View File

@ -892,16 +892,29 @@ Query:
``` sql
SELECT
now() AS now_local,
toString(now(), 'Asia/Yekaterinburg') AS now_yekat;
now() AS ts,
time_zone,
toString(ts, time_zone) AS str_tz_datetime
FROM system.time_zones
WHERE time_zone LIKE 'Europe%'
LIMIT 10
```
Result:
```response
┌───────────now_local─┬─now_yekat───────────┐
│ 2016-06-15 00:11:21 │ 2016-06-15 02:11:21 │
└─────────────────────┴─────────────────────┘
┌──────────────────ts─┬─time_zone─────────┬─str_tz_datetime─────┐
│ 2023-09-08 19:14:59 │ Europe/Amsterdam │ 2023-09-08 21:14:59 │
│ 2023-09-08 19:14:59 │ Europe/Andorra │ 2023-09-08 21:14:59 │
│ 2023-09-08 19:14:59 │ Europe/Astrakhan │ 2023-09-08 23:14:59 │
│ 2023-09-08 19:14:59 │ Europe/Athens │ 2023-09-08 22:14:59 │
│ 2023-09-08 19:14:59 │ Europe/Belfast │ 2023-09-08 20:14:59 │
│ 2023-09-08 19:14:59 │ Europe/Belgrade │ 2023-09-08 21:14:59 │
│ 2023-09-08 19:14:59 │ Europe/Berlin │ 2023-09-08 21:14:59 │
│ 2023-09-08 19:14:59 │ Europe/Bratislava │ 2023-09-08 21:14:59 │
│ 2023-09-08 19:14:59 │ Europe/Brussels │ 2023-09-08 21:14:59 │
│ 2023-09-08 19:14:59 │ Europe/Bucharest │ 2023-09-08 22:14:59 │
└─────────────────────┴───────────────────┴─────────────────────┘
```
Also see the `toUnixTimestamp` function.

View File

@ -638,3 +638,16 @@ Outputs the content of the [system.table_engines](../../operations/system-tables
**See Also**
- [system.table_engines](../../operations/system-tables/table_engines.md) table
## SHOW FUNCTIONS
``` sql
SHOW FUNCTIONS [LIKE | ILIKE '<pattern>']
```
Outputs the content of the [system.functions](../../operations/system-tables/functions.md) table.
If either `LIKE` or `ILIKE` clause is specified, the query returns a list of system functions whose names match the provided `<pattern>`.
**See Also**
- [system.functions](../../operations/system-tables/functions.md) table

View File

@ -50,7 +50,7 @@ Connection: Close
Content-Type: text/tab-separated-values; charset=UTF-8
X-ClickHouse-Server-Display-Name: clickhouse.ru-central1.internal
X-ClickHouse-Query-Id: 5abe861c-239c-467f-b955-8a201abb8b7f
X-ClickHouse-Summary: {"read_rows":"0","read_bytes":"0","written_rows":"0","written_bytes":"0","total_rows_to_read":"0","peak_memory_usage":"0"}
X-ClickHouse-Summary: {"read_rows":"0","read_bytes":"0","written_rows":"0","written_bytes":"0","total_rows_to_read":"0","elapsed_ns":"662334","peak_memory_usage":"0"}
1
```
@ -267,9 +267,9 @@ $ echo 'SELECT number FROM system.numbers LIMIT 10' | curl 'http://localhost:812
Прогресс выполнения запроса можно отслеживать с помощью заголовков ответа `X-ClickHouse-Progress`. Для этого включите [send_progress_in_http_headers](../operations/settings/settings.md#settings-send_progress_in_http_headers). Пример последовательности заголовков:
``` text
X-ClickHouse-Progress: {"read_rows":"2752512","read_bytes":"240570816","total_rows_to_read":"8880128","peak_memory_usage":"4371480"}
X-ClickHouse-Progress: {"read_rows":"5439488","read_bytes":"482285394","total_rows_to_read":"8880128","peak_memory_usage":"13621616"}
X-ClickHouse-Progress: {"read_rows":"8783786","read_bytes":"819092887","total_rows_to_read":"8880128","peak_memory_usage":"23155600"}
X-ClickHouse-Progress: {"read_rows":"2752512","read_bytes":"240570816","total_rows_to_read":"8880128","elapsed_ns":"662334","peak_memory_usage":"4371480"}
X-ClickHouse-Progress: {"read_rows":"5439488","read_bytes":"482285394","total_rows_to_read":"8880128","elapsed_ns":"992334","peak_memory_usage":"13621616"}
X-ClickHouse-Progress: {"read_rows":"8783786","read_bytes":"819092887","total_rows_to_read":"8880128","elapsed_ns":"1232334","peak_memory_usage":"23155600"}
```
Возможные поля заголовка:
@ -530,7 +530,7 @@ $ curl -vv -H 'XXX:xxx' 'http://localhost:8123/hi'
< Content-Type: text/html; charset=UTF-8
< Transfer-Encoding: chunked
< Keep-Alive: timeout=3
< X-ClickHouse-Summary: {"read_rows":"0","read_bytes":"0","written_rows":"0","written_bytes":"0","total_rows_to_read":"0","peak_memory_usage":"0"}
< X-ClickHouse-Summary: {"read_rows":"0","read_bytes":"0","written_rows":"0","written_bytes":"0","total_rows_to_read":"0","elapsed_ns":"662334","peak_memory_usage":"0"}
<
* Connection #0 to host localhost left intact
Say Hi!%
@ -570,7 +570,7 @@ $ curl -v -H 'XXX:xxx' 'http://localhost:8123/get_config_static_handler'
< Content-Type: text/plain; charset=UTF-8
< Transfer-Encoding: chunked
< Keep-Alive: timeout=3
< X-ClickHouse-Summary: {"read_rows":"0","read_bytes":"0","written_rows":"0","written_bytes":"0","total_rows_to_read":"0","peak_memory_usage":"0"}
< X-ClickHouse-Summary: {"read_rows":"0","read_bytes":"0","written_rows":"0","written_bytes":"0","total_rows_to_read":"0","elapsed_ns":"662334","peak_memory_usage":"0"}
<
* Connection #0 to host localhost left intact
<html ng-app="SMI2"><head><base href="http://ui.tabix.io/"></head><body><div ui-view="" class="content-ui"></div><script src="http://loader.tabix.io/master.js"></script></body></html>%
@ -622,7 +622,7 @@ $ curl -vv -H 'XXX:xxx' 'http://localhost:8123/get_absolute_path_static_handler'
< Content-Type: text/html; charset=UTF-8
< Transfer-Encoding: chunked
< Keep-Alive: timeout=3
< X-ClickHouse-Summary: {"read_rows":"0","read_bytes":"0","written_rows":"0","written_bytes":"0","total_rows_to_read":"0","peak_memory_usage":"0"}
< X-ClickHouse-Summary: {"read_rows":"0","read_bytes":"0","written_rows":"0","written_bytes":"0","total_rows_to_read":"0","elapsed_ns":"662334","peak_memory_usage":"0"}
<
<html><body>Absolute Path File</body></html>
* Connection #0 to host localhost left intact
@ -641,7 +641,7 @@ $ curl -vv -H 'XXX:xxx' 'http://localhost:8123/get_relative_path_static_handler'
< Content-Type: text/html; charset=UTF-8
< Transfer-Encoding: chunked
< Keep-Alive: timeout=3
< X-ClickHouse-Summary: {"read_rows":"0","read_bytes":"0","written_rows":"0","written_bytes":"0","total_rows_to_read":"0","peak_memory_usage":"0"}
< X-ClickHouse-Summary: {"read_rows":"0","read_bytes":"0","written_rows":"0","written_bytes":"0","total_rows_to_read":"0","elapsed_ns":"662334","peak_memory_usage":"0"}
<
<html><body>Relative Path File</body></html>
* Connection #0 to host localhost left intact

View File

@ -20,21 +20,10 @@ SELECT library_name, license_type, license_path FROM system.licenses LIMIT 15
``` text
┌─library_name───────┬─license_type─┬─license_path────────────────────────┐
│ FastMemcpy │ MIT │ /contrib/FastMemcpy/LICENSE │
│ arrow │ Apache │ /contrib/arrow/LICENSE.txt │
│ avro │ Apache │ /contrib/avro/LICENSE.txt │
│ aws-c-common │ Apache │ /contrib/aws-c-common/LICENSE │
│ aws-c-event-stream │ Apache │ /contrib/aws-c-event-stream/LICENSE │
│ aws-checksums │ Apache │ /contrib/aws-checksums/LICENSE │
│ aws │ Apache │ /contrib/aws/LICENSE.txt │
│ base64 │ BSD 2-clause │ /contrib/base64/LICENSE │
│ boost │ Boost │ /contrib/boost/LICENSE_1_0.txt │
│ base64 │ BSD 2-clause │ /contrib/aklomp-base64/LICENSE │
│ brotli │ MIT │ /contrib/brotli/LICENSE │
│ capnproto │ MIT │ /contrib/capnproto/LICENSE │
│ cassandra │ Apache │ /contrib/cassandra/LICENSE.txt │
│ cctz │ Apache │ /contrib/cctz/LICENSE.txt │
│ cityhash102 │ MIT │ /contrib/cityhash102/COPYING │
│ cppkafka │ BSD 2-clause │ /contrib/cppkafka/LICENSE │
│ [...] │ [...] │ [...] │
└────────────────────┴──────────────┴─────────────────────────────────────┘
```

View File

@ -99,7 +99,8 @@ ClickHouse не удаляет данные из таблица автомати
- `ProfileEvents` ([Map(String, UInt64)](../../sql-reference/data-types/array.md)) — счетчики для изменения различных метрик. Описание метрик можно получить из таблицы [system.events](#system_tables-events)(#system_tables-events
- `Settings` ([Map(String, String)](../../sql-reference/data-types/array.md)) — имена настроек, которые меняются, когда клиент выполняет запрос. Чтобы разрешить логирование изменений настроек, установите параметр `log_query_settings` равным 1.
- `log_comment` ([String](../../sql-reference/data-types/string.md)) — комментарий к записи в логе. Представляет собой произвольную строку, длина которой должна быть не больше, чем [max_query_size](../../operations/settings/settings.md#settings-max_query_size). Если нет комментария, то пустая строка.
- `thread_ids` ([Array(UInt64)](../../sql-reference/data-types/array.md)) — идентификаторы потоков, участвующих в обработке запросов.
- `thread_ids` ([Array(UInt64)](../../sql-reference/data-types/array.md)) — идентификаторы потоков, участвующих в обработке запросов, эти потоки не обязательно выполняются одновременно.
- `peak_threads_usage` ([UInt64)](../../sql-reference/data-types/int-uint.md)) — максимальное количество одновременно работавших потоков, участвоваших в обработке запроса.
- `used_aggregate_functions` ([Array(String)](../../sql-reference/data-types/array.md)) — канонические имена `агрегатных функций`, использованных при выполнении запроса.
- `used_aggregate_function_combinators` ([Array(String)](../../sql-reference/data-types/array.md)) — канонические имена `комбинаторов агрегатных функций`, использованных при выполнении запроса.
- `used_database_engines` ([Array(String)](../../sql-reference/data-types/array.md)) — канонические имена `движков баз данных`, использованных при выполнении запроса.

View File

@ -53,7 +53,7 @@ Connection: Close
Content-Type: text/tab-separated-values; charset=UTF-8
X-ClickHouse-Server-Display-Name: clickhouse.ru-central1.internal
X-ClickHouse-Query-Id: 5abe861c-239c-467f-b955-8a201abb8b7f
X-ClickHouse-Summary: {"read_rows":"0","read_bytes":"0","written_rows":"0","written_bytes":"0","total_rows_to_read":"0","peak_memory_usage":"0"}
X-ClickHouse-Summary: {"read_rows":"0","read_bytes":"0","written_rows":"0","written_bytes":"0","total_rows_to_read":"0","elapsed_ns":"662334","peak_memory_usage":"0"}
1
```
@ -262,9 +262,9 @@ $ echo 'SELECT number FROM system.numbers LIMIT 10' | curl 'http://localhost:812
您可以在`X-ClickHouse-Progress`响应头中收到查询进度的信息。为此,启用[Http Header携带进度](../operations/settings/settings.md#settings-send_progress_in_http_headers)。示例:
``` text
X-ClickHouse-Progress: {"read_rows":"2752512","read_bytes":"240570816","total_rows_to_read":"8880128","peak_memory_usage":"4371480"}
X-ClickHouse-Progress: {"read_rows":"5439488","read_bytes":"482285394","total_rows_to_read":"8880128","peak_memory_usage":"13621616"}
X-ClickHouse-Progress: {"read_rows":"8783786","read_bytes":"819092887","total_rows_to_read":"8880128","peak_memory_usage":"23155600"}
X-ClickHouse-Progress: {"read_rows":"2752512","read_bytes":"240570816","total_rows_to_read":"8880128","elapsed_ns":"662334","peak_memory_usage":"4371480"}
X-ClickHouse-Progress: {"read_rows":"5439488","read_bytes":"482285394","total_rows_to_read":"8880128","elapsed_ns":"992334","peak_memory_usage":"13621616"}
X-ClickHouse-Progress: {"read_rows":"8783786","read_bytes":"819092887","total_rows_to_read":"8880128","elapsed_ns":"1232334","peak_memory_usage":"23155600"}
```
显示字段信息:
@ -363,7 +363,7 @@ $ curl -v 'http://localhost:8123/predefined_query'
< X-ClickHouse-Format: Template
< X-ClickHouse-Timezone: Asia/Shanghai
< Keep-Alive: timeout=3
< X-ClickHouse-Summary: {"read_rows":"0","read_bytes":"0","written_rows":"0","written_bytes":"0","total_rows_to_read":"0","peak_memory_usage":"0"}
< X-ClickHouse-Summary: {"read_rows":"0","read_bytes":"0","written_rows":"0","written_bytes":"0","total_rows_to_read":"0","elapsed_ns":"662334","peak_memory_usage":"0"}
<
# HELP "Query" "Number of executing queries"
# TYPE "Query" counter
@ -521,7 +521,7 @@ $ curl -vv -H 'XXX:xxx' 'http://localhost:8123/hi'
< Content-Type: text/html; charset=UTF-8
< Transfer-Encoding: chunked
< Keep-Alive: timeout=3
< X-ClickHouse-Summary: {"read_rows":"0","read_bytes":"0","written_rows":"0","written_bytes":"0","total_rows_to_read":"0","peak_memory_usage":"0"}
< X-ClickHouse-Summary: {"read_rows":"0","read_bytes":"0","written_rows":"0","written_bytes":"0","total_rows_to_read":"0","elapsed_ns":"662334","peak_memory_usage":"0"}
<
* Connection #0 to host localhost left intact
Say Hi!%
@ -561,7 +561,7 @@ $ curl -v -H 'XXX:xxx' 'http://localhost:8123/get_config_static_handler'
< Content-Type: text/plain; charset=UTF-8
< Transfer-Encoding: chunked
< Keep-Alive: timeout=3
< X-ClickHouse-Summary: {"read_rows":"0","read_bytes":"0","written_rows":"0","written_bytes":"0","total_rows_to_read":"0","peak_memory_usage":"0"}
< X-ClickHouse-Summary: {"read_rows":"0","read_bytes":"0","written_rows":"0","written_bytes":"0","total_rows_to_read":"0","elapsed_ns":"662334","peak_memory_usage":"0"}
<
* Connection #0 to host localhost left intact
<html ng-app="SMI2"><head><base href="http://ui.tabix.io/"></head><body><div ui-view="" class="content-ui"></div><script src="http://loader.tabix.io/master.js"></script></body></html>%
@ -613,7 +613,7 @@ $ curl -vv -H 'XXX:xxx' 'http://localhost:8123/get_absolute_path_static_handler'
< Content-Type: text/html; charset=UTF-8
< Transfer-Encoding: chunked
< Keep-Alive: timeout=3
< X-ClickHouse-Summary: {"read_rows":"0","read_bytes":"0","written_rows":"0","written_bytes":"0","total_rows_to_read":"0","peak_memory_usage":"0"}
< X-ClickHouse-Summary: {"read_rows":"0","read_bytes":"0","written_rows":"0","written_bytes":"0","total_rows_to_read":"0","elapsed_ns":"662334","peak_memory_usage":"0"}
<
<html><body>Absolute Path File</body></html>
* Connection #0 to host localhost left intact
@ -632,7 +632,7 @@ $ curl -vv -H 'XXX:xxx' 'http://localhost:8123/get_relative_path_static_handler'
< Content-Type: text/html; charset=UTF-8
< Transfer-Encoding: chunked
< Keep-Alive: timeout=3
< X-ClickHouse-Summary: {"read_rows":"0","read_bytes":"0","written_rows":"0","written_bytes":"0","total_rows_to_read":"0","peak_memory_usage":"0"}
< X-ClickHouse-Summary: {"read_rows":"0","read_bytes":"0","written_rows":"0","written_bytes":"0","total_rows_to_read":"0","elapsed_ns":"662334","peak_memory_usage":"0"}
<
<html><body>Relative Path File</body></html>
* Connection #0 to host localhost left intact

View File

@ -20,21 +20,9 @@ SELECT library_name, license_type, license_path FROM system.licenses LIMIT 15
``` text
┌─library_name───────┬─license_type─┬─license_path────────────────────────┐
│ FastMemcpy │ MIT │ /contrib/FastMemcpy/LICENSE │
│ arrow │ Apache │ /contrib/arrow/LICENSE.txt │
│ avro │ Apache │ /contrib/avro/LICENSE.txt │
│ aws-c-common │ Apache │ /contrib/aws-c-common/LICENSE │
│ aws-c-event-stream │ Apache │ /contrib/aws-c-event-stream/LICENSE │
│ aws-checksums │ Apache │ /contrib/aws-checksums/LICENSE │
│ aws │ Apache │ /contrib/aws/LICENSE.txt │
│ base64 │ BSD 2-clause │ /contrib/base64/LICENSE │
│ boost │ Boost │ /contrib/boost/LICENSE_1_0.txt │
│ base64 │ BSD 2-clause │ /contrib/aklomp-base64/LICENSE │
│ brotli │ MIT │ /contrib/brotli/LICENSE │
│ capnproto │ MIT │ /contrib/capnproto/LICENSE │
│ cassandra │ Apache │ /contrib/cassandra/LICENSE.txt │
│ cctz │ Apache │ /contrib/cctz/LICENSE.txt │
│ cityhash102 │ MIT │ /contrib/cityhash102/COPYING │
│ cppkafka │ BSD 2-clause │ /contrib/cppkafka/LICENSE │
│ [...] │ [...] │ [...] │
└────────────────────┴──────────────┴─────────────────────────────────────┘
```

View File

@ -9,11 +9,11 @@ namespace DB
bool LSCommand::parse(IParser::Pos & pos, std::shared_ptr<ASTKeeperQuery> & node, Expected & expected) const
{
String arg;
if (!parseKeeperPath(pos, expected, arg))
String path;
if (!parseKeeperPath(pos, expected, path))
return true;
node->args.push_back(std::move(arg));
node->args.push_back(std::move(path));
return true;
}
@ -42,11 +42,11 @@ void LSCommand::execute(const ASTKeeperQuery * query, KeeperClient * client) con
bool CDCommand::parse(IParser::Pos & pos, std::shared_ptr<ASTKeeperQuery> & node, Expected & expected) const
{
String arg;
if (!parseKeeperPath(pos, expected, arg))
String path;
if (!parseKeeperPath(pos, expected, path))
return true;
node->args.push_back(std::move(arg));
node->args.push_back(std::move(path));
return true;
}
@ -64,11 +64,12 @@ void CDCommand::execute(const ASTKeeperQuery * query, KeeperClient * client) con
bool SetCommand::parse(IParser::Pos & pos, std::shared_ptr<ASTKeeperQuery> & node, Expected & expected) const
{
String arg;
if (!parseKeeperPath(pos, expected, arg))
String path;
if (!parseKeeperPath(pos, expected, path))
return false;
node->args.push_back(std::move(arg));
node->args.push_back(std::move(path));
String arg;
if (!parseKeeperArg(pos, expected, arg))
return false;
node->args.push_back(std::move(arg));
@ -93,11 +94,12 @@ void SetCommand::execute(const ASTKeeperQuery * query, KeeperClient * client) co
bool CreateCommand::parse(IParser::Pos & pos, std::shared_ptr<ASTKeeperQuery> & node, Expected & expected) const
{
String arg;
if (!parseKeeperPath(pos, expected, arg))
String path;
if (!parseKeeperPath(pos, expected, path))
return false;
node->args.push_back(std::move(arg));
node->args.push_back(std::move(path));
String arg;
if (!parseKeeperArg(pos, expected, arg))
return false;
node->args.push_back(std::move(arg));
@ -143,10 +145,10 @@ void TouchCommand::execute(const ASTKeeperQuery * query, KeeperClient * client)
bool GetCommand::parse(IParser::Pos & pos, std::shared_ptr<ASTKeeperQuery> & node, Expected & expected) const
{
String arg;
if (!parseKeeperPath(pos, expected, arg))
String path;
if (!parseKeeperPath(pos, expected, path))
return false;
node->args.push_back(std::move(arg));
node->args.push_back(std::move(path));
return true;
}
@ -156,13 +158,28 @@ void GetCommand::execute(const ASTKeeperQuery * query, KeeperClient * client) co
std::cout << client->zookeeper->get(client->getAbsolutePath(query->args[0].safeGet<String>())) << "\n";
}
bool ExistsCommand::parse(IParser::Pos & pos, std::shared_ptr<ASTKeeperQuery> & node, DB::Expected & expected) const
{
String path;
if (!parseKeeperPath(pos, expected, path))
return false;
node->args.push_back(std::move(path));
return true;
}
void ExistsCommand::execute(const DB::ASTKeeperQuery * query, DB::KeeperClient * client) const
{
std::cout << client->zookeeper->exists(client->getAbsolutePath(query->args[0].safeGet<String>())) << "\n";
}
bool GetStatCommand::parse(IParser::Pos & pos, std::shared_ptr<ASTKeeperQuery> & node, Expected & expected) const
{
String arg;
if (!parseKeeperPath(pos, expected, arg))
String path;
if (!parseKeeperPath(pos, expected, path))
return true;
node->args.push_back(std::move(arg));
node->args.push_back(std::move(path));
return true;
}
@ -325,10 +342,10 @@ void FindBigFamily::execute(const ASTKeeperQuery * query, KeeperClient * client)
bool RMCommand::parse(IParser::Pos & pos, std::shared_ptr<ASTKeeperQuery> & node, Expected & expected) const
{
String arg;
if (!parseKeeperPath(pos, expected, arg))
String path;
if (!parseKeeperPath(pos, expected, path))
return false;
node->args.push_back(std::move(arg));
node->args.push_back(std::move(path));
return true;
}
@ -340,10 +357,10 @@ void RMCommand::execute(const ASTKeeperQuery * query, KeeperClient * client) con
bool RMRCommand::parse(IParser::Pos & pos, std::shared_ptr<ASTKeeperQuery> & node, Expected & expected) const
{
String arg;
if (!parseKeeperPath(pos, expected, arg))
String path;
if (!parseKeeperPath(pos, expected, path))
return false;
node->args.push_back(std::move(arg));
node->args.push_back(std::move(path));
return true;
}
@ -355,6 +372,70 @@ void RMRCommand::execute(const ASTKeeperQuery * query, KeeperClient * client) co
[client, path]{ client->zookeeper->removeRecursive(path); });
}
bool ReconfigCommand::parse(IParser::Pos & pos, std::shared_ptr<ASTKeeperQuery> & node, DB::Expected & expected) const
{
ReconfigCommand::Operation operation;
if (ParserKeyword{"ADD"}.ignore(pos, expected))
operation = ReconfigCommand::Operation::ADD;
else if (ParserKeyword{"REMOVE"}.ignore(pos, expected))
operation = ReconfigCommand::Operation::REMOVE;
else if (ParserKeyword{"SET"}.ignore(pos, expected))
operation = ReconfigCommand::Operation::SET;
else
return false;
node->args.push_back(operation);
ParserToken{TokenType::Whitespace}.ignore(pos);
String arg;
if (!parseKeeperArg(pos, expected, arg))
return false;
node->args.push_back(std::move(arg));
return true;
}
void ReconfigCommand::execute(const DB::ASTKeeperQuery * query, DB::KeeperClient * client) const
{
String joining;
String leaving;
String new_members;
auto operation = query->args[0].get<ReconfigCommand::Operation>();
switch (operation)
{
case static_cast<UInt8>(ReconfigCommand::Operation::ADD):
joining = query->args[1].safeGet<DB::String>();
break;
case static_cast<UInt8>(ReconfigCommand::Operation::REMOVE):
leaving = query->args[1].safeGet<DB::String>();
break;
case static_cast<UInt8>(ReconfigCommand::Operation::SET):
new_members = query->args[1].safeGet<DB::String>();
break;
default:
UNREACHABLE();
}
auto response = client->zookeeper->reconfig(joining, leaving, new_members);
std::cout << response.value << '\n';
}
bool SyncCommand::parse(IParser::Pos & pos, std::shared_ptr<ASTKeeperQuery> & node, DB::Expected & expected) const
{
String path;
if (!parseKeeperPath(pos, expected, path))
return false;
node->args.push_back(std::move(path));
return true;
}
void SyncCommand::execute(const DB::ASTKeeperQuery * query, DB::KeeperClient * client) const
{
std::cout << client->zookeeper->sync(client->getAbsolutePath(query->args[0].safeGet<String>())) << "\n";
}
bool HelpCommand::parse(IParser::Pos & /* pos */, std::shared_ptr<ASTKeeperQuery> & /* node */, Expected & /* expected */) const
{
return true;

View File

@ -101,6 +101,17 @@ class GetCommand : public IKeeperClientCommand
String getHelpMessage() const override { return "{} <path> -- Returns the node's value"; }
};
class ExistsCommand : public IKeeperClientCommand
{
String getName() const override { return "exists"; }
bool parse(IParser::Pos & pos, std::shared_ptr<ASTKeeperQuery> & node, Expected & expected) const override;
void execute(const ASTKeeperQuery * query, KeeperClient * client) const override;
String getHelpMessage() const override { return "{} <path> -- Returns `1` if node exists, `0` otherwise"; }
};
class GetStatCommand : public IKeeperClientCommand
{
String getName() const override { return "get_stat"; }
@ -177,6 +188,35 @@ class RMRCommand : public IKeeperClientCommand
String getHelpMessage() const override { return "{} <path> -- Recursively deletes path. Confirmation required"; }
};
class ReconfigCommand : public IKeeperClientCommand
{
enum class Operation : UInt8
{
ADD = 0,
REMOVE = 1,
SET = 2,
};
String getName() const override { return "reconfig"; }
bool parse(IParser::Pos & pos, std::shared_ptr<ASTKeeperQuery> & node, Expected & expected) const override;
void execute(const ASTKeeperQuery * query, KeeperClient * client) const override;
String getHelpMessage() const override { return "{} <add|remove|set> \"<arg>\" [version] -- Reconfigure Keeper cluster. See https://clickhouse.com/docs/en/guides/sre/keeper/clickhouse-keeper#reconfiguration"; }
};
class SyncCommand: public IKeeperClientCommand
{
String getName() const override { return "sync"; }
bool parse(IParser::Pos & pos, std::shared_ptr<ASTKeeperQuery> & node, Expected & expected) const override;
void execute(const ASTKeeperQuery * query, KeeperClient * client) const override;
String getHelpMessage() const override { return "{} <path> -- Synchronizes node between processes and leader"; }
};
class HelpCommand : public IKeeperClientCommand
{
String getName() const override { return "help"; }

View File

@ -84,8 +84,11 @@ std::vector<String> KeeperClient::getCompletions(const String & prefix) const
void KeeperClient::askConfirmation(const String & prompt, std::function<void()> && callback)
{
if (!ask_confirmation)
return callback();
std::cout << prompt << " Continue?\n";
need_confirmation = true;
waiting_confirmation = true;
confirmation_callback = callback;
}
@ -170,6 +173,14 @@ void KeeperClient::defineOptions(Poco::Util::OptionSet & options)
Poco::Util::Option("log-level", "", "set log level")
.argument("<level>")
.binding("log-level"));
options.addOption(
Poco::Util::Option("no-confirmation", "", "if set, will not require a confirmation on several commands. default false for interactive and true for query")
.binding("no-confirmation"));
options.addOption(
Poco::Util::Option("tests-mode", "", "run keeper-client in a special mode for tests. all commands output are separated by special symbols. default false")
.binding("tests-mode"));
}
void KeeperClient::initialize(Poco::Util::Application & /* self */)
@ -184,12 +195,15 @@ void KeeperClient::initialize(Poco::Util::Application & /* self */)
std::make_shared<CreateCommand>(),
std::make_shared<TouchCommand>(),
std::make_shared<GetCommand>(),
std::make_shared<ExistsCommand>(),
std::make_shared<GetStatCommand>(),
std::make_shared<FindSuperNodes>(),
std::make_shared<DeleteStaleBackups>(),
std::make_shared<FindBigFamily>(),
std::make_shared<RMCommand>(),
std::make_shared<RMRCommand>(),
std::make_shared<ReconfigCommand>(),
std::make_shared<SyncCommand>(),
std::make_shared<HelpCommand>(),
std::make_shared<FourLetterWordCommand>(),
});
@ -229,18 +243,6 @@ void KeeperClient::initialize(Poco::Util::Application & /* self */)
EventNotifier::init();
}
void KeeperClient::executeQuery(const String & query)
{
std::vector<String> queries;
boost::algorithm::split(queries, query, boost::is_any_of(";"));
for (const auto & query_text : queries)
{
if (!query_text.empty())
processQueryText(query_text);
}
}
bool KeeperClient::processQueryText(const String & text)
{
if (exit_strings.find(text) != exit_strings.end())
@ -248,29 +250,44 @@ bool KeeperClient::processQueryText(const String & text)
try
{
if (need_confirmation)
if (waiting_confirmation)
{
need_confirmation = false;
waiting_confirmation = false;
if (text.size() == 1 && (text == "y" || text == "Y"))
confirmation_callback();
return true;
}
KeeperParser parser;
String message;
const char * begin = text.data();
ASTPtr res = tryParseQuery(parser, begin, begin + text.size(), message, true, "", false, 0, 0, false);
const char * end = begin + text.size();
if (!res)
while (begin < end)
{
std::cerr << message << "\n";
return true;
String message;
ASTPtr res = tryParseQuery(
parser,
begin,
end,
/* out_error_message = */ message,
/* hilite = */ true,
/* description = */ "",
/* allow_multi_statements = */ true,
/* max_query_size = */ 0,
/* max_parser_depth = */ 0,
/* skip_insignificant = */ false);
if (!res)
{
std::cerr << message << "\n";
return true;
}
auto * query = res->as<ASTKeeperQuery>();
auto command = KeeperClient::commands.find(query->command);
command->second->execute(query, this);
}
auto * query = res->as<ASTKeeperQuery>();
auto command = KeeperClient::commands.find(query->command);
command->second->execute(query, this);
}
catch (Coordination::Exception & err)
{
@ -279,7 +296,7 @@ bool KeeperClient::processQueryText(const String & text)
return true;
}
void KeeperClient::runInteractive()
void KeeperClient::runInteractiveReplxx()
{
LineReader::Patterns query_extenders = {"\\"};
@ -299,7 +316,7 @@ void KeeperClient::runInteractive()
while (true)
{
String prompt;
if (need_confirmation)
if (waiting_confirmation)
prompt = "[y/n] ";
else
prompt = cwd.string() + " :) ";
@ -313,6 +330,26 @@ void KeeperClient::runInteractive()
}
}
void KeeperClient::runInteractiveInputStream()
{
for (String input; std::getline(std::cin, input);)
{
if (!processQueryText(input))
break;
std::cout << "\a\a\a\a" << std::endl;
std::cerr << std::flush;
}
}
void KeeperClient::runInteractive()
{
if (config().hasOption("tests-mode"))
runInteractiveInputStream();
else
runInteractiveReplxx();
}
int KeeperClient::main(const std::vector<String> & /* args */)
{
if (config().hasOption("help"))
@ -362,8 +399,13 @@ int KeeperClient::main(const std::vector<String> & /* args */)
zk_args.operation_timeout_ms = config().getInt("operation-timeout", 10) * 1000;
zookeeper = std::make_unique<zkutil::ZooKeeper>(zk_args);
if (config().has("no-confirmation") || config().has("query"))
ask_confirmation = false;
if (config().has("query"))
executeQuery(config().getString("query"));
{
processQueryText(config().getString("query"));
}
else
runInteractive();

View File

@ -49,8 +49,10 @@ public:
protected:
void runInteractive();
void runInteractiveReplxx();
void runInteractiveInputStream();
bool processQueryText(const String & text);
void executeQuery(const String & query);
void loadCommands(std::vector<Command> && new_commands);
@ -61,7 +63,8 @@ protected:
zkutil::ZooKeeperArgs zk_args;
bool need_confirmation = false;
bool ask_confirmation = true;
bool waiting_confirmation = false;
std::vector<String> registered_commands_and_four_letter_words;
};

View File

@ -7,43 +7,34 @@ namespace DB
bool parseKeeperArg(IParser::Pos & pos, Expected & expected, String & result)
{
expected.add(pos, getTokenName(TokenType::BareWord));
if (pos->type == TokenType::BareWord)
if (pos->type == TokenType::QuotedIdentifier || pos->type == TokenType::StringLiteral)
{
result = String(pos->begin, pos->end);
++pos;
if (!parseIdentifierOrStringLiteral(pos, expected, result))
return false;
ParserToken{TokenType::Whitespace}.ignore(pos);
return true;
}
bool status = parseIdentifierOrStringLiteral(pos, expected, result);
ParserToken{TokenType::Whitespace}.ignore(pos);
return status;
}
bool parseKeeperPath(IParser::Pos & pos, Expected & expected, String & path)
{
expected.add(pos, "path");
if (pos->type == TokenType::QuotedIdentifier || pos->type == TokenType::StringLiteral)
return parseIdentifierOrStringLiteral(pos, expected, path);
String result;
while (pos->type != TokenType::Whitespace && pos->type != TokenType::EndOfStream)
while (pos->type != TokenType::Whitespace && pos->type != TokenType::EndOfStream && pos->type != TokenType::Semicolon)
{
result.append(pos->begin, pos->end);
++pos;
}
ParserToken{TokenType::Whitespace}.ignore(pos);
if (result.empty())
return false;
path = result;
return true;
}
bool parseKeeperPath(IParser::Pos & pos, Expected & expected, String & path)
{
expected.add(pos, "path");
return parseKeeperArg(pos, expected, path);
}
bool KeeperParser::parseImpl(Pos & pos, ASTPtr & node, Expected & expected)
{
auto query = std::make_shared<ASTKeeperQuery>();

View File

@ -881,6 +881,8 @@ void LocalServer::processOptions(const OptionsDescription &, const CommandLineOp
config().setBool("no-system-tables", true);
if (options.count("only-system-tables"))
config().setBool("only-system-tables", true);
if (options.count("database"))
config().setString("default_database", options["database"].as<std::string>());
if (options.count("input-format"))
config().setString("table-data-format", options["input-format"].as<std::string>());

View File

@ -19,6 +19,7 @@ void registerAggregateFunctionsStatisticsSecondMoment(AggregateFunctionFactory &
factory.registerAlias("VAR_POP", "varPop", AggregateFunctionFactory::CaseInsensitive);
factory.registerAlias("STDDEV_SAMP", "stddevSamp", AggregateFunctionFactory::CaseInsensitive);
factory.registerAlias("STDDEV_POP", "stddevPop", AggregateFunctionFactory::CaseInsensitive);
factory.registerAlias("STD", "stddevPop", AggregateFunctionFactory::CaseInsensitive);
}
}

View File

@ -6341,9 +6341,9 @@ void QueryAnalyzer::resolveTableFunction(QueryTreeNodePtr & table_function_node,
{
/// For input function we should check if input format supports reading subset of columns.
if (table_function_ptr->getName() == "input")
use_columns_from_insert_query = FormatFactory::instance().checkIfFormatSupportsSubsetOfColumns(scope.context->getInsertFormat());
use_columns_from_insert_query = FormatFactory::instance().checkIfFormatSupportsSubsetOfColumns(scope.context->getInsertFormat(), scope.context);
else
use_columns_from_insert_query = table_function_ptr->supportsReadingSubsetOfColumns();
use_columns_from_insert_query = table_function_ptr->supportsReadingSubsetOfColumns(scope.context);
}
if (use_columns_from_insert_query)

View File

@ -1,195 +0,0 @@
#include "UniqToCountPass.h"
#include <AggregateFunctions/AggregateFunctionFactory.h>
#include <AggregateFunctions/IAggregateFunction.h>
#include <Analyzer/ColumnNode.h>
#include <Analyzer/FunctionNode.h>
#include <Analyzer/InDepthQueryTreeVisitor.h>
#include <Analyzer/QueryNode.h>
namespace DB
{
namespace
{
bool matchFnUniq(String func_name)
{
auto name = Poco::toLower(func_name);
return name == "uniq" || name == "uniqHLL12" || name == "uniqExact" || name == "uniqTheta" || name == "uniqCombined"
|| name == "uniqCombined64";
}
/// Extract the corresponding projection columns for group by node list.
/// For example:
/// SELECT a as aa, any(b) FROM table group by a; -> aa(ColumnNode)
NamesAndTypes extractProjectionColumnsForGroupBy(const QueryNode * query_node)
{
if (!query_node->hasGroupBy())
return {};
NamesAndTypes result;
for (const auto & group_by_ele : query_node->getGroupByNode()->getChildren())
{
const auto & projection_columns = query_node->getProjectionColumns();
const auto & projection_nodes = query_node->getProjection().getNodes();
assert(projection_columns.size() == projection_nodes.size());
for (size_t i = 0; i < projection_columns.size(); i++)
{
if (projection_nodes[i]->isEqual(*group_by_ele))
result.push_back(projection_columns[i]);
}
}
return result;
}
/// Whether query_columns equals subquery_columns.
/// query_columns: query columns from query
/// subquery_columns: projection columns from subquery
bool nodeListEquals(const QueryTreeNodes & query_columns, const NamesAndTypes & subquery_columns)
{
if (query_columns.size() != subquery_columns.size())
return false;
for (const auto & query_column : query_columns)
{
auto find = std::find_if(
subquery_columns.begin(),
subquery_columns.end(),
[&](const auto & subquery_column) -> bool
{
if (auto * column_node = query_column->as<ColumnNode>())
{
return subquery_column == column_node->getColumn();
}
return false;
});
if (find == subquery_columns.end())
return false;
}
return true;
}
/// Whether subquery_columns contains all columns in subquery_columns.
/// query_columns: query columns from query
/// subquery_columns: projection columns from subquery
bool nodeListContainsAll(const QueryTreeNodes & query_columns, const NamesAndTypes & subquery_columns)
{
if (query_columns.size() > subquery_columns.size())
return false;
for (const auto & query_column : query_columns)
{
auto find = std::find_if(
subquery_columns.begin(),
subquery_columns.end(),
[&](const auto & subquery_column) -> bool
{
if (auto * column_node = query_column->as<ColumnNode>())
{
return subquery_column == column_node->getColumn();
}
return false;
});
if (find == subquery_columns.end())
return false;
}
return true;
}
}
class UniqToCountVisitor : public InDepthQueryTreeVisitorWithContext<UniqToCountVisitor>
{
public:
using Base = InDepthQueryTreeVisitorWithContext<UniqToCountVisitor>;
using Base::Base;
void enterImpl(QueryTreeNodePtr & node)
{
if (!getSettings().optimize_uniq_to_count)
return;
auto * query_node = node->as<QueryNode>();
if (!query_node)
return;
/// Check that query has only single table expression which is subquery
auto * subquery_node = query_node->getJoinTree()->as<QueryNode>();
if (!subquery_node)
return;
/// Check that query has only single node in projection
auto & projection_nodes = query_node->getProjection().getNodes();
if (projection_nodes.size() != 1)
return;
/// Check that projection_node is a function
auto & projection_node = projection_nodes[0];
auto * function_node = projection_node->as<FunctionNode>();
if (!function_node)
return;
/// Check that query single projection node is `uniq` or its variants
if (!matchFnUniq(function_node->getFunctionName()))
return;
auto & uniq_arguments_nodes = function_node->getArguments().getNodes();
/// Whether query matches 'SELECT uniq(x ...) FROM (SELECT DISTINCT x ...)'
auto match_subquery_with_distinct = [&]() -> bool
{
if (!subquery_node->isDistinct())
return false;
/// uniq expression list == subquery projection columns
if (!nodeListEquals(uniq_arguments_nodes, subquery_node->getProjectionColumns()))
return false;
return true;
};
/// Whether query matches 'SELECT uniq(x ...) FROM (SELECT x ... GROUP BY x ...)'
auto match_subquery_with_group_by = [&]() -> bool
{
if (!subquery_node->hasGroupBy())
return false;
/// uniq argument node list == subquery group by node list
auto group_by_columns = extractProjectionColumnsForGroupBy(subquery_node);
if (!nodeListEquals(uniq_arguments_nodes, group_by_columns))
return false;
/// subquery projection columns must contain all columns in uniq argument node list
if (!nodeListContainsAll(uniq_arguments_nodes, subquery_node->getProjectionColumns()))
return false;
return true;
};
/// Replace uniq of initial query to count
if (match_subquery_with_distinct() || match_subquery_with_group_by())
{
AggregateFunctionProperties properties;
auto aggregate_function = AggregateFunctionFactory::instance().get("count", {}, {}, properties);
function_node->getArguments().getNodes().clear();
function_node->resolveAsAggregateFunction(std::move(aggregate_function));
}
}
};
void UniqToCountPass::run(QueryTreeNodePtr query_tree_node, ContextPtr context)
{
UniqToCountVisitor visitor(context);
visitor.visit(query_tree_node);
}
}

View File

@ -1,30 +0,0 @@
#pragma once
#include <Analyzer/IQueryTreePass.h>
namespace DB
{
/** Optimize `uniq` and its variants(except uniqUpTo) into `count` over subquery.
* Example: 'SELECT uniq(x ...) FROM (SELECT DISTINCT x ...)' to
* Result: 'SELECT count() FROM (SELECT DISTINCT x ...)'
*
* Example: 'SELECT uniq(x ...) FROM (SELECT x ... GROUP BY x ...)' to
* Result: 'SELECT count() FROM (SELECT x ... GROUP BY x ...)'
*
* Note that we can rewrite all uniq variants except uniqUpTo.
*/
class UniqToCountPass final : public IQueryTreePass
{
public:
String getName() override { return "UniqToCount"; }
String getDescription() override
{
return "Rewrite uniq and its variants(except uniqUpTo) to count if subquery has distinct or group by clause.";
}
void run(QueryTreeNodePtr query_tree_node, ContextPtr context) override;
};
}

View File

@ -18,7 +18,6 @@
#include <Analyzer/Utils.h>
#include <Analyzer/Passes/QueryAnalysisPass.h>
#include <Analyzer/Passes/CountDistinctPass.h>
#include <Analyzer/Passes/UniqToCountPass.h>
#include <Analyzer/Passes/FunctionToSubcolumnsPass.h>
#include <Analyzer/Passes/RewriteAggregateFunctionWithIfPass.h>
#include <Analyzer/Passes/SumIfToCountIfPass.h>
@ -248,7 +247,6 @@ void addQueryTreePasses(QueryTreePassManager & manager)
manager.addPass(std::make_unique<ConvertLogicalExpressionToCNFPass>());
manager.addPass(std::make_unique<CountDistinctPass>());
manager.addPass(std::make_unique<UniqToCountPass>());
manager.addPass(std::make_unique<RewriteAggregateFunctionWithIfPass>());
manager.addPass(std::make_unique<SumIfToCountIfPass>());
manager.addPass(std::make_unique<RewriteArrayExistsToHasPass>());

View File

@ -1071,7 +1071,9 @@ void ClientBase::receiveResult(ASTPtr parsed_query, Int32 signals_before_stop, b
}
catch (const LocalFormatError &)
{
local_format_error = std::current_exception();
/// Remember the first exception.
if (!local_format_error)
local_format_error = std::current_exception();
connection->sendCancel();
}
}

View File

@ -63,7 +63,7 @@ void interruptSignalHandler(int signum);
class InternalTextLogs;
class WriteBufferFromFileDescriptor;
class ClientBase : public Poco::Util::Application, public IHints<2, ClientBase>
class ClientBase : public Poco::Util::Application, public IHints<2>
{
public:

View File

@ -73,7 +73,7 @@ ColumnAggregateFunction::ColumnAggregateFunction(const AggregateFunctionPtr & fu
}
void ColumnAggregateFunction::set(const AggregateFunctionPtr & func_, size_t version_)
void ColumnAggregateFunction::set(const AggregateFunctionPtr & func_, std::optional<size_t> version_)
{
func = func_;
version = version_;

View File

@ -103,7 +103,7 @@ private:
public:
~ColumnAggregateFunction() override;
void set(const AggregateFunctionPtr & func_, size_t version_);
void set(const AggregateFunctionPtr & func_, std::optional<size_t> version_ = std::nullopt);
AggregateFunctionPtr getAggregateFunction() { return func; }
AggregateFunctionPtr getAggregateFunction() const { return func; }

View File

@ -80,7 +80,7 @@ StringRef ColumnDecimal<T>::serializeValueIntoArena(size_t n, Arena & arena, cha
res.data = pos;
}
memcpy(pos, &data[n], sizeof(T));
return StringRef(pos, sizeof(T));
return res;
}
template <is_decimal T>

View File

@ -57,7 +57,7 @@ ConcurrencyControl::Allocation::Allocation(ConcurrencyControl & parent_, SlotCou
*waiter = this;
}
// Grant single slot to allocation, returns true iff more slot(s) are required
// Grant single slot to allocation returns true iff more slot(s) are required
bool ConcurrencyControl::Allocation::grant()
{
std::unique_lock lock{mutex};

View File

@ -97,6 +97,14 @@ ThreadGroupPtr CurrentThread::getGroup()
return current_thread->getThreadGroup();
}
ContextPtr CurrentThread::getQueryContext()
{
if (unlikely(!current_thread))
return {};
return current_thread->getQueryContext();
}
std::string_view CurrentThread::getQueryId()
{
if (unlikely(!current_thread))

View File

@ -86,6 +86,10 @@ public:
static void finalizePerformanceCounters();
/// Returns a non-empty string if the thread is attached to a query
/// Returns attached query context
static ContextPtr getQueryContext();
static std::string_view getQueryId();
/// Initializes query with current thread as master thread in constructor, and detaches it in destructor

View File

@ -393,6 +393,18 @@ struct UInt128HashCRC32
}
};
#elif defined(__s390x__) && __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
struct UInt128HashCRC32
{
size_t operator()(UInt128 x) const
{
UInt64 crc = -1ULL;
crc = s390x_crc32(crc, x.items[UInt128::_impl::little(0)]);
crc = s390x_crc32(crc, x.items[UInt128::_impl::little(1)]);
return crc;
}
};
#else
/// On other platforms we do not use CRC32. NOTE This can be confusing.
@ -451,6 +463,19 @@ struct UInt256HashCRC32
}
};
#elif defined(__s390x__) && __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
struct UInt256HashCRC32
{
size_t operator()(UInt256 x) const
{
UInt64 crc = -1ULL;
crc = s390x_crc32(crc, x.items[UInt256::_impl::little(0)]);
crc = s390x_crc32(crc, x.items[UInt256::_impl::little(1)]);
crc = s390x_crc32(crc, x.items[UInt256::_impl::little(2)]);
crc = s390x_crc32(crc, x.items[UInt256::_impl::little(3)]);
return crc;
}
};
#else
/// We do not need to use CRC32 on other platforms. NOTE This can be confusing.

View File

@ -71,6 +71,28 @@ struct StringHashTableHash
res = _mm_crc32_u64(res, key.c);
return res;
}
#elif defined(__s390x__) && __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
size_t ALWAYS_INLINE operator()(StringKey8 key) const
{
size_t res = -1ULL;
res = s390x_crc32(res, key);
return res;
}
size_t ALWAYS_INLINE operator()(StringKey16 key) const
{
size_t res = -1ULL;
res = s390x_crc32(res, key.items[UInt128::_impl::little(0)]);
res = s390x_crc32(res, key.items[UInt128::_impl::little(1)]);
return res;
}
size_t ALWAYS_INLINE operator()(StringKey24 key) const
{
size_t res = -1ULL;
res = s390x_crc32(res, key.a);
res = s390x_crc32(res, key.b);
res = s390x_crc32(res, key.c);
return res;
}
#else
size_t ALWAYS_INLINE operator()(StringKey8 key) const
{

View File

@ -20,7 +20,7 @@ namespace ErrorCodes
* template parameter is available as Value
*/
template <typename ValueType>
class IFactoryWithAliases : public IHints<2, IFactoryWithAliases<ValueType>>
class IFactoryWithAliases : public IHints<2>
{
protected:
using Value = ValueType;

View File

@ -95,7 +95,7 @@ String getHintsErrorMessageSuffix(const std::vector<String> & hints);
void appendHintsMessage(String & error_message, const std::vector<String> & hints);
template <size_t MaxNumHints, typename Self>
template <size_t MaxNumHints = 1>
class IHints
{
public:

View File

@ -107,15 +107,25 @@ public:
static ThreadGroupPtr createForBackgroundProcess(ContextPtr storage_context);
std::vector<UInt64> getInvolvedThreadIds() const;
void linkThread(UInt64 thread_it);
size_t getPeakThreadsUsage() const;
void linkThread(UInt64 thread_id);
void unlinkThread();
private:
mutable std::mutex mutex;
/// Set up at creation, no race when reading
SharedData shared_data;
SharedData shared_data TSA_GUARDED_BY(mutex);
/// Set of all thread ids which has been attached to the group
std::unordered_set<UInt64> thread_ids;
std::unordered_set<UInt64> thread_ids TSA_GUARDED_BY(mutex);
/// Count of simultaneously working threads
size_t active_thread_count TSA_GUARDED_BY(mutex) = 0;
/// Peak threads count in the group
size_t peak_threads_usage TSA_GUARDED_BY(mutex) = 0;
};
/**

View File

@ -877,6 +877,24 @@ void ZooKeeper::handleEphemeralNodeExistence(const std::string & path, const std
}
}
Coordination::ReconfigResponse ZooKeeper::reconfig(
const std::string & joining,
const std::string & leaving,
const std::string & new_members,
int32_t version)
{
auto future_result = asyncReconfig(joining, leaving, new_members, version);
if (future_result.wait_for(std::chrono::milliseconds(args.operation_timeout_ms)) != std::future_status::ready)
{
impl->finalize(fmt::format("Operation timeout on {}", Coordination::OpNum::Reconfig));
throw KeeperException(Coordination::Error::ZOPERATIONTIMEOUT);
}
return future_result.get();
}
ZooKeeperPtr ZooKeeper::startNewSession() const
{
return std::make_shared<ZooKeeper>(args, zk_log);
@ -1226,6 +1244,27 @@ std::future<Coordination::SyncResponse> ZooKeeper::asyncSync(const std::string &
return future;
}
std::future<Coordination::ReconfigResponse> ZooKeeper::asyncReconfig(
const std::string & joining,
const std::string & leaving,
const std::string & new_members,
int32_t version)
{
auto promise = std::make_shared<std::promise<Coordination::ReconfigResponse>>();
auto future = promise->get_future();
auto callback = [promise](const Coordination::ReconfigResponse & response) mutable
{
if (response.error != Coordination::Error::ZOK)
promise->set_exception(std::make_exception_ptr(KeeperException(response.error)));
else
promise->set_value(response);
};
impl->reconfig(joining, leaving, new_members, version, std::move(callback));
return future;
}
void ZooKeeper::finalize(const String & reason)
{
impl->finalize(reason);

View File

@ -449,6 +449,12 @@ public:
/// disappear automatically after 3x session_timeout.
void handleEphemeralNodeExistence(const std::string & path, const std::string & fast_delete_if_equal_value);
Coordination::ReconfigResponse reconfig(
const std::string & joining,
const std::string & leaving,
const std::string & new_members,
int32_t version = -1);
/// Async interface (a small subset of operations is implemented).
///
/// Usage:
@ -529,6 +535,13 @@ public:
const std::string & path,
Coordination::ListRequestType list_request_type = Coordination::ListRequestType::ALL);
using FutureReconfig = std::future<Coordination::ReconfigResponse>;
FutureReconfig asyncReconfig(
const std::string & joining,
const std::string & leaving,
const std::string & new_members,
int32_t version = -1);
void finalize(const String & reason);
void setZooKeeperLog(std::shared_ptr<DB::ZooKeeperLog> zk_log_);

View File

@ -188,12 +188,17 @@ namespace Hashes
#include <nmmintrin.h>
#endif
#if defined(__s390x__) && __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
#include <crc32-s390x.h>
#endif
struct CRC32Hash
{
size_t operator()(Key x) const
{
#ifdef __SSE4_2__
return _mm_crc32_u64(-1ULL, x);
#elif defined(__s390x__) && __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
return s390x_crc32(-1ULL, x);
#else
/// On other platforms we do not have CRC32. NOTE This can be confusing.
return intHash64(x);

View File

@ -268,6 +268,16 @@ ReadSettings Context::getReadSettings() const
return ReadSettings{};
}
ResourceManagerPtr Context::getResourceManager() const
{
return nullptr;
}
ClassifierPtr Context::getWorkloadClassifier() const
{
return nullptr;
}
void Context::initializeKeeperDispatcher([[maybe_unused]] bool start_async) const
{
const auto & config_ref = getConfigRef();

View File

@ -13,6 +13,7 @@
#include <Core/BackgroundSchedulePool.h>
#include <IO/AsyncReadCounters.h>
#include <IO/IResourceManager.h>
#include <Poco/Util/Application.h>
@ -118,6 +119,10 @@ public:
ReadSettings getReadSettings() const;
/// Resource management related
ResourceManagerPtr getResourceManager() const;
ClassifierPtr getWorkloadClassifier() const;
std::shared_ptr<KeeperDispatcher> getKeeperDispatcher() const;
std::shared_ptr<KeeperDispatcher> tryGetKeeperDispatcher() const;
void initializeKeeperDispatcher(bool start_async) const;

View File

@ -778,7 +778,6 @@ class IColumn;
M(Bool, function_json_value_return_type_allow_nullable, false, "Allow function JSON_VALUE to return nullable type.", 0) \
M(Bool, function_json_value_return_type_allow_complex, false, "Allow function JSON_VALUE to return complex type, such as: struct, array, map.", 0) \
M(Bool, use_with_fill_by_sorting_prefix, true, "Columns preceding WITH FILL columns in ORDER BY clause form sorting prefix. Rows with different values in sorting prefix are filled independently", 0) \
M(Bool, optimize_uniq_to_count, true, "Rewrite uniq and its variants(except uniqUpTo) to count if subquery has distinct or group by clause.", 0) \
\
/** Experimental functions */ \
M(Bool, allow_experimental_funnel_functions, false, "Enable experimental functions for funnel analysis.", 0) \
@ -1058,6 +1057,8 @@ class IColumn;
\
M(Bool, format_display_secrets_in_show_and_select, false, "Do not hide secrets in SHOW and SELECT queries.", IMPORTANT) \
M(Bool, regexp_dict_allow_hyperscan, true, "Allow regexp_tree dictionary using Hyperscan library.", 0) \
M(Bool, regexp_dict_flag_case_insensitive, false, "Use case-insensitive matching for a regexp_tree dictionary. Can be overridden in individual expressions with (?i) and (?-i).", 0) \
M(Bool, regexp_dict_flag_dotall, false, "Allow '.' to match newline characters for a regexp_tree dictionary.", 0) \
\
M(Bool, dictionary_use_async_executor, false, "Execute a pipeline for reading from a dictionary with several threads. It's supported only by DIRECT dictionary with CLICKHOUSE source.", 0) \
M(Bool, precise_float_parsing, false, "Prefer more precise (but slower) float parsing algorithm", 0) \
@ -1076,7 +1077,7 @@ DECLARE_SETTINGS_TRAITS_ALLOW_CUSTOM_SETTINGS(SettingsTraits, LIST_OF_SETTINGS)
/** Settings of query execution.
* These settings go to users.xml.
*/
struct Settings : public BaseSettings<SettingsTraits>, public IHints<2, Settings>
struct Settings : public BaseSettings<SettingsTraits>, public IHints<2>
{
Settings() = default;

View File

@ -117,6 +117,33 @@ Field DataTypeAggregateFunction::getDefault() const
return field;
}
bool DataTypeAggregateFunction::strictEquals(const DataTypePtr & lhs_state_type, const DataTypePtr & rhs_state_type)
{
const auto * lhs_state = typeid_cast<const DataTypeAggregateFunction *>(lhs_state_type.get());
const auto * rhs_state = typeid_cast<const DataTypeAggregateFunction *>(rhs_state_type.get());
if (!lhs_state || !rhs_state)
return false;
if (lhs_state->function->getName() != rhs_state->function->getName())
return false;
if (lhs_state->parameters.size() != rhs_state->parameters.size())
return false;
for (size_t i = 0; i < lhs_state->parameters.size(); ++i)
if (lhs_state->parameters[i] != rhs_state->parameters[i])
return false;
if (lhs_state->argument_types.size() != rhs_state->argument_types.size())
return false;
for (size_t i = 0; i < lhs_state->argument_types.size(); ++i)
if (!lhs_state->argument_types[i]->equals(*rhs_state->argument_types[i]))
return false;
return true;
}
bool DataTypeAggregateFunction::equals(const IDataType & rhs) const
{
@ -126,34 +153,7 @@ bool DataTypeAggregateFunction::equals(const IDataType & rhs) const
auto lhs_state_type = function->getNormalizedStateType();
auto rhs_state_type = typeid_cast<const DataTypeAggregateFunction &>(rhs).function->getNormalizedStateType();
if (typeid(lhs_state_type.get()) != typeid(rhs_state_type.get()))
return false;
if (const auto * lhs_state = typeid_cast<const DataTypeAggregateFunction *>(lhs_state_type.get()))
{
const auto & rhs_state = typeid_cast<const DataTypeAggregateFunction &>(*rhs_state_type);
if (lhs_state->function->getName() != rhs_state.function->getName())
return false;
if (lhs_state->parameters.size() != rhs_state.parameters.size())
return false;
for (size_t i = 0; i < lhs_state->parameters.size(); ++i)
if (lhs_state->parameters[i] != rhs_state.parameters[i])
return false;
if (lhs_state->argument_types.size() != rhs_state.argument_types.size())
return false;
for (size_t i = 0; i < lhs_state->argument_types.size(); ++i)
if (!lhs_state->argument_types[i]->equals(*rhs_state.argument_types[i]))
return false;
return true;
}
return lhs_state_type->equals(*rhs_state_type);
return strictEquals(lhs_state_type, rhs_state_type);
}

View File

@ -60,6 +60,7 @@ public:
Field getDefault() const override;
static bool strictEquals(const DataTypePtr & lhs_state_type, const DataTypePtr & rhs_state_type);
bool equals(const IDataType & rhs) const override;
bool isParametric() const override { return true; }

View File

@ -13,7 +13,7 @@ namespace ErrorCodes
}
template <typename T>
class EnumValues : public IHints<1, EnumValues<T>>
class EnumValues : public IHints<>
{
public:
using Value = std::pair<std::string, T>;

View File

@ -136,8 +136,17 @@ DataTypePtr FieldToDataType<on_error>::operator() (const Array & x) const
DataTypes element_types;
element_types.reserve(x.size());
bool has_signed_int = false;
bool uint64_convert_possible = true;
for (const Field & elem : x)
element_types.emplace_back(applyVisitor(*this, elem));
{
DataTypePtr type = applyVisitor(*this, elem);
element_types.emplace_back(type);
checkUInt64ToIn64Conversion(has_signed_int, uint64_convert_possible, type, elem);
}
if (has_signed_int && uint64_convert_possible)
convertUInt64ToInt64IfPossible(element_types);
return std::make_shared<DataTypeArray>(getLeastSupertype<on_error>(element_types));
}
@ -165,14 +174,28 @@ DataTypePtr FieldToDataType<on_error>::operator() (const Map & map) const
key_types.reserve(map.size());
value_types.reserve(map.size());
bool k_has_signed_int = false;
bool k_uint64_convert_possible = true;
bool v_has_signed_int = false;
bool v_uint64_convert_possible = true;
for (const auto & elem : map)
{
const auto & tuple = elem.safeGet<const Tuple &>();
assert(tuple.size() == 2);
key_types.push_back(applyVisitor(*this, tuple[0]));
value_types.push_back(applyVisitor(*this, tuple[1]));
DataTypePtr k_type = applyVisitor(*this, tuple[0]);
key_types.push_back(k_type);
checkUInt64ToIn64Conversion(k_has_signed_int, k_uint64_convert_possible, k_type, tuple[0]);
DataTypePtr v_type = applyVisitor(*this, tuple[1]);
value_types.push_back(v_type);
checkUInt64ToIn64Conversion(v_has_signed_int, v_uint64_convert_possible, v_type, tuple[1]);
}
if (k_has_signed_int && k_uint64_convert_possible)
convertUInt64ToInt64IfPossible(key_types);
if (v_has_signed_int && v_uint64_convert_possible)
convertUInt64ToInt64IfPossible(value_types);
return std::make_shared<DataTypeMap>(
getLeastSupertype<on_error>(key_types),
getLeastSupertype<on_error>(value_types));
@ -204,6 +227,28 @@ DataTypePtr FieldToDataType<on_error>::operator()(const bool &) const
return DataTypeFactory::instance().get("Bool");
}
template <LeastSupertypeOnError on_error>
void FieldToDataType<on_error>::checkUInt64ToIn64Conversion(bool & has_signed_int, bool & uint64_convert_possible, const DataTypePtr & type, const Field & elem) const
{
if (uint64_convert_possible)
{
bool is_native_int = WhichDataType(type).isNativeInt();
if (is_native_int)
has_signed_int |= is_native_int;
else if (type->getTypeId() == TypeIndex::UInt64)
uint64_convert_possible &= (elem.template get<UInt64>() <= std::numeric_limits<Int64>::max());
}
}
template <LeastSupertypeOnError on_error>
void FieldToDataType<on_error>::convertUInt64ToInt64IfPossible(DataTypes & data_types) const
{
for (auto& type : data_types)
if (type->getTypeId() == TypeIndex::UInt64)
type = std::make_shared<DataTypeInt64>();
}
template class FieldToDataType<LeastSupertypeOnError::Throw>;
template class FieldToDataType<LeastSupertypeOnError::String>;
template class FieldToDataType<LeastSupertypeOnError::Null>;

View File

@ -45,6 +45,16 @@ public:
DataTypePtr operator() (const UInt256 & x) const;
DataTypePtr operator() (const Int256 & x) const;
DataTypePtr operator() (const bool & x) const;
private:
// The conditions for converting UInt64 to Int64 are:
// 1. The existence of Int.
// 2. The existence of UInt64, and the UInt64 value must be <= Int64.max.
void checkUInt64ToIn64Conversion(bool& has_signed_int, bool& uint64_convert_possible, const DataTypePtr & type, const Field & elem) const;
// Convert the UInt64 type to Int64 in order to cover other signed_integer types
// and obtain the least super type of all ints.
void convertUInt64ToInt64IfPossible(DataTypes & data_types) const;
};
}

View File

@ -493,7 +493,10 @@ void SerializationArray::deserializeText(IColumn & column, ReadBuffer & istr, co
deserializeTextImpl(column, istr,
[&](IColumn & nested_column)
{
nested->deserializeTextQuoted(nested_column, istr, settings);
if (settings.null_as_default)
SerializationNullable::deserializeTextQuotedImpl(nested_column, istr, settings, nested);
else
nested->deserializeTextQuoted(nested_column, istr, settings);
}, false);
if (whole && !istr.eof())
@ -604,7 +607,10 @@ void SerializationArray::deserializeTextCSV(IColumn & column, ReadBuffer & istr,
deserializeTextImpl(column, rb,
[&](IColumn & nested_column)
{
nested->deserializeTextCSV(nested_column, rb, settings);
if (settings.null_as_default)
SerializationNullable::deserializeTextCSVImpl(nested_column, rb, settings, nested);
else
nested->deserializeTextCSV(nested_column, rb, settings);
}, true);
}
else
@ -612,7 +618,10 @@ void SerializationArray::deserializeTextCSV(IColumn & column, ReadBuffer & istr,
deserializeTextImpl(column, rb,
[&](IColumn & nested_column)
{
nested->deserializeTextQuoted(nested_column, rb, settings);
if (settings.null_as_default)
SerializationNullable::deserializeTextQuotedImpl(nested_column, rb, settings, nested);
else
nested->deserializeTextQuoted(nested_column, rb, settings);
}, true);
}
}

View File

@ -192,7 +192,10 @@ void SerializationMap::deserializeText(IColumn & column, ReadBuffer & istr, cons
deserializeTextImpl(column, istr,
[&settings](ReadBuffer & buf, const SerializationPtr & subcolumn_serialization, IColumn & subcolumn)
{
subcolumn_serialization->deserializeTextQuoted(subcolumn, buf, settings);
if (settings.null_as_default)
SerializationNullable::deserializeTextQuotedImpl(subcolumn, buf, settings, subcolumn_serialization);
else
subcolumn_serialization->deserializeTextQuoted(subcolumn, buf, settings);
});
if (whole && !istr.eof())

View File

@ -135,7 +135,10 @@ void SerializationTuple::deserializeText(IColumn & column, ReadBuffer & istr, co
assertChar(',', istr);
skipWhitespaceIfAny(istr);
}
elems[i]->deserializeTextQuoted(extractElementColumn(column, i), istr, settings);
if (settings.null_as_default)
SerializationNullable::deserializeTextQuotedImpl(extractElementColumn(column, i), istr, settings, elems[i]);
else
elems[i]->deserializeTextQuoted(extractElementColumn(column, i), istr, settings);
}
// Special format for one element tuple (1,)
@ -366,7 +369,10 @@ void SerializationTuple::deserializeTextCSV(IColumn & column, ReadBuffer & istr,
assertChar(settings.csv.tuple_delimiter, istr);
skipWhitespaceIfAny(istr);
}
elems[i]->deserializeTextCSV(extractElementColumn(column, i), istr, settings);
if (settings.null_as_default)
SerializationNullable::deserializeTextCSVImpl(extractElementColumn(column, i), istr, settings, elems[i]);
else
elems[i]->deserializeTextCSV(extractElementColumn(column, i), istr, settings);
}
});
}

View File

@ -206,6 +206,8 @@ void RegExpTreeDictionary::initRegexNodes(Block & block)
re2_st::RE2::Options regexp_options;
regexp_options.set_log_errors(false);
regexp_options.set_case_sensitive(!flag_case_insensitive);
regexp_options.set_dot_nl(flag_dotall);
RegexTreeNodePtr node = std::make_shared<RegexTreeNode>(id, parent_id, regex, regexp_options);
int num_captures = std::min(node->searcher.NumberOfCapturingGroups() + 1, 10);
@ -330,11 +332,20 @@ void RegExpTreeDictionary::loadData()
std::vector<unsigned int> flags;
std::vector<size_t> lengths;
// Notes:
// - Always set HS_FLAG_SINGLEMATCH because we only care about whether a pattern matches at least once
// - HS_FLAG_CASELESS is supported by hs_compile_lit_multi, so we should set it if flag_case_insensitive is set.
// - HS_FLAG_DOTALL is not supported by hs_compile_lit_multi, but the '.' wildcard can't appear in any of the simple regexps
// anyway, so even if flag_dotall is set, we only need to configure the RE2 searcher, and don't need to set any Hyperscan flags.
unsigned int flag_bits = HS_FLAG_SINGLEMATCH;
if (flag_case_insensitive)
flag_bits |= HS_FLAG_CASELESS;
for (const std::string & simple_regexp : simple_regexps)
{
patterns.push_back(simple_regexp.data());
lengths.push_back(simple_regexp.size());
flags.push_back(HS_FLAG_SINGLEMATCH);
flags.push_back(flag_bits);
}
hs_database_t * db = nullptr;
@ -380,12 +391,16 @@ RegExpTreeDictionary::RegExpTreeDictionary(
const DictionaryStructure & structure_,
DictionarySourcePtr source_ptr_,
Configuration configuration_,
bool use_vectorscan_)
bool use_vectorscan_,
bool flag_case_insensitive_,
bool flag_dotall_)
: IDictionary(id_),
structure(structure_),
source_ptr(source_ptr_),
configuration(configuration_),
use_vectorscan(use_vectorscan_),
flag_case_insensitive(flag_case_insensitive_),
flag_dotall(flag_dotall_),
logger(&Poco::Logger::get("RegExpTreeDictionary"))
{
if (auto * ch_source = typeid_cast<ClickHouseDictionarySource *>(source_ptr.get()))
@ -859,7 +874,14 @@ void registerDictionaryRegExpTree(DictionaryFactory & factory)
auto context = copyContextAndApplySettingsFromDictionaryConfig(global_context, config, config_prefix);
return std::make_unique<RegExpTreeDictionary>(dict_id, dict_struct, std::move(source_ptr), configuration, context->getSettings().regexp_dict_allow_hyperscan);
return std::make_unique<RegExpTreeDictionary>(
dict_id,
dict_struct,
std::move(source_ptr),
configuration,
context->getSettings().regexp_dict_allow_hyperscan,
context->getSettings().regexp_dict_flag_case_insensitive,
context->getSettings().regexp_dict_flag_dotall);
};
factory.registerLayout("regexp_tree", create_layout, true);

View File

@ -49,7 +49,9 @@ public:
const DictionaryStructure & structure_,
DictionarySourcePtr source_ptr_,
Configuration configuration_,
bool use_vectorscan_);
bool use_vectorscan_,
bool flag_case_insensitive_,
bool flag_dotall_);
std::string getTypeName() const override { return name; }
@ -85,7 +87,8 @@ public:
std::shared_ptr<const IExternalLoadable> clone() const override
{
return std::make_shared<RegExpTreeDictionary>(getDictionaryID(), structure, source_ptr->clone(), configuration, use_vectorscan);
return std::make_shared<RegExpTreeDictionary>(
getDictionaryID(), structure, source_ptr->clone(), configuration, use_vectorscan, flag_case_insensitive, flag_dotall);
}
ColumnUInt8::Ptr hasKeys(const Columns &, const DataTypes &) const override
@ -189,6 +192,8 @@ private:
using RegexTreeNodePtr = std::shared_ptr<RegexTreeNode>;
bool use_vectorscan;
bool flag_case_insensitive;
bool flag_dotall;
std::vector<std::string> simple_regexps;
std::vector<UInt64> regexp_ids;

View File

@ -7,6 +7,7 @@
#include <IO/ReadHelpers.h>
#include <IO/WriteBufferFromFile.h>
#include <IO/WriteHelpers.h>
#include <Common/CurrentThread.h>
#include <Common/createHardLink.h>
#include <Common/quoteString.h>
#include <Common/logger_useful.h>
@ -65,6 +66,8 @@ DiskObjectStorage::DiskObjectStorage(
, metadata_storage(std::move(metadata_storage_))
, object_storage(std::move(object_storage_))
, send_metadata(config.getBool(config_prefix + ".send_metadata", false))
, read_resource_name(config.getString(config_prefix + ".read_resource", ""))
, write_resource_name(config.getString(config_prefix + ".write_resource", ""))
, metadata_helper(std::make_unique<DiskObjectStorageRemoteMetadataRestoreHelper>(this, ReadSettings{}))
{}
@ -480,6 +483,32 @@ DiskObjectStoragePtr DiskObjectStorage::createDiskObjectStorage()
config_prefix);
}
template <class Settings>
static inline Settings updateResourceLink(const Settings & settings, const String & resource_name)
{
if (resource_name.empty())
return settings;
if (auto query_context = CurrentThread::getQueryContext())
{
Settings result(settings);
result.resource_link = query_context->getWorkloadClassifier()->get(resource_name);
return result;
}
return settings;
}
String DiskObjectStorage::getReadResourceName() const
{
std::unique_lock lock(resource_mutex);
return read_resource_name;
}
String DiskObjectStorage::getWriteResourceName() const
{
std::unique_lock lock(resource_mutex);
return write_resource_name;
}
std::unique_ptr<ReadBufferFromFileBase> DiskObjectStorage::readFile(
const String & path,
const ReadSettings & settings,
@ -495,7 +524,7 @@ std::unique_ptr<ReadBufferFromFileBase> DiskObjectStorage::readFile(
return object_storage->readObjects(
storage_objects,
object_storage->getAdjustedSettingsFromMetadataFile(settings, path),
object_storage->getAdjustedSettingsFromMetadataFile(updateResourceLink(settings, getReadResourceName()), path),
read_hint,
file_size);
}
@ -513,7 +542,7 @@ std::unique_ptr<WriteBufferFromFileBase> DiskObjectStorage::writeFile(
path,
buf_size,
mode,
object_storage->getAdjustedSettingsFromMetadataFile(settings, path));
object_storage->getAdjustedSettingsFromMetadataFile(updateResourceLink(settings, getWriteResourceName()), path));
}
Strings DiskObjectStorage::getBlobPath(const String & path) const
@ -543,6 +572,15 @@ void DiskObjectStorage::applyNewSettings(
/// FIXME we cannot use config_prefix that was passed through arguments because the disk may be wrapped with cache and we need another name
const auto config_prefix = "storage_configuration.disks." + name;
object_storage->applyNewSettings(config, config_prefix, context_);
{
std::unique_lock lock(resource_mutex);
if (String new_read_resource_name = config.getString(config_prefix + ".read_resource", ""); new_read_resource_name != read_resource_name)
read_resource_name = new_read_resource_name;
if (String new_write_resource_name = config.getString(config_prefix + ".write_resource", ""); new_write_resource_name != write_resource_name)
write_resource_name = new_write_resource_name;
}
IDisk::applyNewSettings(config, context_, config_prefix, disk_map);
}

View File

@ -212,6 +212,9 @@ private:
/// execution.
DiskTransactionPtr createObjectStorageTransaction();
String getReadResourceName() const;
String getWriteResourceName() const;
const String object_storage_root_path;
Poco::Logger * log;
@ -226,6 +229,10 @@ private:
const bool send_metadata;
mutable std::mutex resource_mutex;
String read_resource_name;
String write_resource_name;
std::unique_ptr<DiskObjectStorageRemoteMetadataRestoreHelper> metadata_helper;
};

View File

@ -8,6 +8,7 @@
#include <base/defines.h>
#include <Disks/ObjectStorages/MetadataStorageFromDisk.h>
#include <boost/algorithm/string/join.hpp>
namespace DB
{
@ -157,14 +158,13 @@ struct RemoveObjectStorageOperation final : public IDiskObjectStorageOperation
struct RemoveManyObjectStorageOperation final : public IDiskObjectStorageOperation
{
RemoveBatchRequest remove_paths;
bool keep_all_batch_data;
NameSet file_names_remove_metadata_only;
const RemoveBatchRequest remove_paths;
const bool keep_all_batch_data;
const NameSet file_names_remove_metadata_only;
std::vector<String> paths_removed_with_objects;
std::vector<ObjectsToRemove> objects_to_remove;
bool remove_from_cache = false;
RemoveManyObjectStorageOperation(
IObjectStorage & object_storage_,
IMetadataStorage & metadata_storage_,
@ -204,6 +204,7 @@ struct RemoveManyObjectStorageOperation final : public IDiskObjectStorageOperati
if (unlink_outcome && !keep_all_batch_data && !file_names_remove_metadata_only.contains(fs::path(path).filename()))
{
objects_to_remove.emplace_back(ObjectsToRemove{std::move(objects), std::move(unlink_outcome)});
paths_removed_with_objects.push_back(path);
}
}
catch (const Exception & e)
@ -214,6 +215,12 @@ struct RemoveManyObjectStorageOperation final : public IDiskObjectStorageOperati
|| e.code() == ErrorCodes::CANNOT_READ_ALL_DATA
|| e.code() == ErrorCodes::CANNOT_OPEN_FILE)
{
LOG_DEBUG(
&Poco::Logger::get("RemoveManyObjectStorageOperation"),
"Can't read metadata because of an exception. Just remove it from the filesystem. Path: {}, exception: {}",
metadata_storage.getPath() + path,
e.message());
tx->unlinkFile(path);
}
else
@ -239,16 +246,31 @@ struct RemoveManyObjectStorageOperation final : public IDiskObjectStorageOperati
/// TL;DR Don't pay any attention to 404 status code
if (!remove_from_remote.empty())
object_storage.removeObjectsIfExist(remove_from_remote);
if (!keep_all_batch_data)
{
LOG_DEBUG(
&Poco::Logger::get("RemoveManyObjectStorageOperation"),
"metadata and objects were removed for [{}], "
"only metadata were removed for [{}].",
boost::algorithm::join(paths_removed_with_objects, ", "),
boost::algorithm::join(file_names_remove_metadata_only, ", "));
}
}
};
struct RemoveRecursiveObjectStorageOperation final : public IDiskObjectStorageOperation
{
std::string path;
/// path inside disk with metadata
const std::string path;
const bool keep_all_batch_data;
/// paths inside the 'this->path'
const NameSet file_names_remove_metadata_only;
/// map from local_path to its remote objects with hardlinks counter
/// local_path is the path inside 'this->path'
std::unordered_map<std::string, ObjectsToRemove> objects_to_remove_by_path;
bool keep_all_batch_data;
NameSet file_names_remove_metadata_only;
RemoveRecursiveObjectStorageOperation(
IObjectStorage & object_storage_,
@ -275,11 +297,16 @@ struct RemoveRecursiveObjectStorageOperation final : public IDiskObjectStorageOp
{
try
{
chassert(path_to_remove.starts_with(path));
auto rel_path = String(fs::relative(fs::path(path_to_remove), fs::path(path)));
auto objects_paths = metadata_storage.getStorageObjects(path_to_remove);
auto unlink_outcome = tx->unlinkMetadata(path_to_remove);
if (unlink_outcome)
if (unlink_outcome && !file_names_remove_metadata_only.contains(rel_path))
{
objects_to_remove_by_path[path_to_remove] = ObjectsToRemove{std::move(objects_paths), std::move(unlink_outcome)};
objects_to_remove_by_path[std::move(rel_path)]
= ObjectsToRemove{std::move(objects_paths), std::move(unlink_outcome)};
}
}
catch (const Exception & e)
@ -321,25 +348,38 @@ struct RemoveRecursiveObjectStorageOperation final : public IDiskObjectStorageOp
void undo() override
{
}
void finalize() override
{
if (!keep_all_batch_data)
{
std::vector<String> total_removed_paths;
total_removed_paths.reserve(objects_to_remove_by_path.size());
StoredObjects remove_from_remote;
for (auto && [local_path, objects_to_remove] : objects_to_remove_by_path)
{
if (!file_names_remove_metadata_only.contains(fs::path(local_path).filename()))
chassert(!file_names_remove_metadata_only.contains(local_path));
if (objects_to_remove.unlink_outcome->num_hardlinks == 0)
{
if (objects_to_remove.unlink_outcome->num_hardlinks == 0)
std::move(objects_to_remove.objects.begin(), objects_to_remove.objects.end(), std::back_inserter(remove_from_remote));
std::move(objects_to_remove.objects.begin(), objects_to_remove.objects.end(), std::back_inserter(remove_from_remote));
total_removed_paths.push_back(local_path);
}
}
/// Read comment inside RemoveObjectStorageOperation class
/// TL;DR Don't pay any attention to 404 status code
object_storage.removeObjectsIfExist(remove_from_remote);
LOG_DEBUG(
&Poco::Logger::get("RemoveRecursiveObjectStorageOperation"),
"Recursively remove path {}: "
"metadata and objects were removed for [{}], "
"only metadata were removed for [{}].",
path,
boost::algorithm::join(total_removed_paths, ", "),
boost::algorithm::join(file_names_remove_metadata_only, ", "));
}
}
};

View File

@ -54,6 +54,7 @@ void registerDiskHDFS(DiskFactory & factory, bool global_skip_access_check)
std::move(hdfs_storage),
config,
config_prefix);
disk->startup(context, skip_access_check);
return disk;

View File

@ -320,7 +320,7 @@ void S3ObjectStorage::removeObjectImpl(const StoredObject & object, bool if_exis
throwIfUnexpectedError(outcome, if_exists);
LOG_TRACE(log, "Object with path {} was removed from S3", object.remote_path);
LOG_DEBUG(log, "Object with path {} was removed from S3", object.remote_path);
}
void S3ObjectStorage::removeObjectsImpl(const StoredObjects & objects, bool if_exists)
@ -368,7 +368,7 @@ void S3ObjectStorage::removeObjectsImpl(const StoredObjects & objects, bool if_e
throwIfUnexpectedError(outcome, if_exists);
LOG_TRACE(log, "Objects with paths [{}] were removed from S3", keys);
LOG_DEBUG(log, "Objects with paths [{}] were removed from S3", keys);
}
}
}

View File

@ -54,6 +54,7 @@ void registerDiskWebServer(DiskFactory & factory, bool global_skip_access_check)
object_storage,
config,
config_prefix);
disk->startup(context, skip_access_check);
return disk;
};

View File

@ -684,10 +684,18 @@ void FormatFactory::markOutputFormatSupportsParallelFormatting(const String & na
void FormatFactory::markFormatSupportsSubsetOfColumns(const String & name)
{
auto & target = dict[name].supports_subset_of_columns;
auto & target = dict[name].subset_of_columns_support_checker;
if (target)
throw Exception(ErrorCodes::LOGICAL_ERROR, "FormatFactory: Format {} is already marked as supporting subset of columns", name);
target = true;
target = [](const FormatSettings &){ return true; };
}
void FormatFactory::registerSubsetOfColumnsSupportChecker(const String & name, SubsetOfColumnsSupportChecker subset_of_columns_support_checker)
{
auto & target = dict[name].subset_of_columns_support_checker;
if (target)
throw Exception(ErrorCodes::LOGICAL_ERROR, "FormatFactory: Format {} is already marked as supporting subset of columns", name);
target = std::move(subset_of_columns_support_checker);
}
void FormatFactory::markOutputFormatPrefersLargeBlocks(const String & name)
@ -698,10 +706,11 @@ void FormatFactory::markOutputFormatPrefersLargeBlocks(const String & name)
target = true;
}
bool FormatFactory::checkIfFormatSupportsSubsetOfColumns(const String & name) const
bool FormatFactory::checkIfFormatSupportsSubsetOfColumns(const DB::String & name, const ContextPtr & context, const std::optional<FormatSettings> & format_settings_) const
{
const auto & target = getCreators(name);
return target.supports_subset_of_columns;
auto format_settings = format_settings_ ? *format_settings_ : getFormatSettings(context);
return target.subset_of_columns_support_checker && target.subset_of_columns_support_checker(format_settings);
}
void FormatFactory::registerAdditionalInfoForSchemaCacheGetter(

View File

@ -123,6 +123,10 @@ private:
/// and the name of the message.
using AdditionalInfoForSchemaCacheGetter = std::function<String(const FormatSettings & settings)>;
/// Some formats can support reading subset of columns depending on settings.
/// The checker should return true if format support append.
using SubsetOfColumnsSupportChecker = std::function<bool(const FormatSettings & settings)>;
struct Creators
{
InputCreator input_creator;
@ -132,12 +136,11 @@ private:
SchemaReaderCreator schema_reader_creator;
ExternalSchemaReaderCreator external_schema_reader_creator;
bool supports_parallel_formatting{false};
bool supports_subcolumns{false};
bool supports_subset_of_columns{false};
bool prefers_large_blocks{false};
NonTrivialPrefixAndSuffixChecker non_trivial_prefix_and_suffix_checker;
AppendSupportChecker append_support_checker;
AdditionalInfoForSchemaCacheGetter additional_info_for_schema_cache_getter;
SubsetOfColumnsSupportChecker subset_of_columns_support_checker;
};
using FormatsDictionary = std::unordered_map<String, Creators>;
@ -225,9 +228,10 @@ public:
void markOutputFormatSupportsParallelFormatting(const String & name);
void markOutputFormatPrefersLargeBlocks(const String & name);
void markFormatSupportsSubsetOfColumns(const String & name);
bool checkIfFormatSupportsSubsetOfColumns(const String & name) const;
void markFormatSupportsSubsetOfColumns(const String & name);
void registerSubsetOfColumnsSupportChecker(const String & name, SubsetOfColumnsSupportChecker subset_of_columns_support_checker);
bool checkIfFormatSupportsSubsetOfColumns(const String & name, const ContextPtr & context, const std::optional<FormatSettings> & format_settings_ = std::nullopt) const;
bool checkIfFormatHasSchemaReader(const String & name) const;
bool checkIfFormatHasExternalSchemaReader(const String & name) const;

View File

@ -1,16 +1,96 @@
#include <Formats/insertNullAsDefaultIfNeeded.h>
#include <Columns/ColumnNullable.h>
#include <Columns/ColumnArray.h>
#include <Columns/ColumnTuple.h>
#include <Columns/ColumnMap.h>
#include <Columns/ColumnLowCardinality.h>
#include <DataTypes/DataTypeNullable.h>
#include <DataTypes/DataTypeArray.h>
#include <DataTypes/DataTypeTuple.h>
#include <DataTypes/DataTypeMap.h>
#include <DataTypes/DataTypeLowCardinality.h>
#include <Functions/FunctionHelpers.h>
namespace DB
{
void insertNullAsDefaultIfNeeded(ColumnWithTypeAndName & input_column, const ColumnWithTypeAndName & header_column, size_t column_i, BlockMissingValues * block_missing_values)
bool insertNullAsDefaultIfNeeded(ColumnWithTypeAndName & input_column, const ColumnWithTypeAndName & header_column, size_t column_i, BlockMissingValues * block_missing_values)
{
if (isArray(input_column.type) && isArray(header_column.type))
{
ColumnWithTypeAndName nested_input_column;
const auto * array_input_column = checkAndGetColumn<ColumnArray>(input_column.column.get());
nested_input_column.column = array_input_column->getDataPtr();
nested_input_column.type = checkAndGetDataType<DataTypeArray>(input_column.type.get())->getNestedType();
ColumnWithTypeAndName nested_header_column;
nested_header_column.column = checkAndGetColumn<ColumnArray>(header_column.column.get())->getDataPtr();
nested_header_column.type = checkAndGetDataType<DataTypeArray>(header_column.type.get())->getNestedType();
if (!insertNullAsDefaultIfNeeded(nested_input_column, nested_header_column, 0, nullptr))
return false;
input_column.column = ColumnArray::create(nested_input_column.column, array_input_column->getOffsetsPtr());
input_column.type = std::make_shared<DataTypeArray>(std::move(nested_input_column.type));
return true;
}
if (isTuple(input_column.type) && isTuple(header_column.type))
{
const auto * tuple_input_column = checkAndGetColumn<ColumnTuple>(input_column.column.get());
const auto * tuple_input_type = checkAndGetDataType<DataTypeTuple>(input_column.type.get());
const auto * tuple_header_column = checkAndGetColumn<ColumnTuple>(header_column.column.get());
const auto * tuple_header_type = checkAndGetDataType<DataTypeTuple>(header_column.type.get());
if (tuple_input_type->getElements().size() != tuple_header_type->getElements().size())
return false;
Columns nested_input_columns;
nested_input_columns.reserve(tuple_input_type->getElements().size());
DataTypes nested_input_types;
nested_input_types.reserve(tuple_input_type->getElements().size());
bool changed = false;
for (size_t i = 0; i != tuple_input_type->getElements().size(); ++i)
{
ColumnWithTypeAndName nested_input_column;
nested_input_column.column = tuple_input_column->getColumnPtr(i);
nested_input_column.type = tuple_input_type->getElement(i);
ColumnWithTypeAndName nested_header_column;
nested_header_column.column = tuple_header_column->getColumnPtr(i);
nested_header_column.type = tuple_header_type->getElement(i);
changed |= insertNullAsDefaultIfNeeded(nested_input_column, nested_header_column, 0, nullptr);
nested_input_columns.push_back(std::move(nested_input_column.column));
nested_input_types.push_back(std::move(nested_input_column.type));
}
if (!changed)
return false;
input_column.column = ColumnTuple::create(std::move(nested_input_columns));
input_column.type = std::make_shared<DataTypeTuple>(std::move(nested_input_types));
return true;
}
if (isMap(input_column.type) && isMap(header_column.type))
{
ColumnWithTypeAndName nested_input_column;
nested_input_column.column = checkAndGetColumn<ColumnMap>(input_column.column.get())->getNestedColumnPtr();
nested_input_column.type = checkAndGetDataType<DataTypeMap>(input_column.type.get())->getNestedType();
ColumnWithTypeAndName nested_header_column;
nested_header_column.column = checkAndGetColumn<ColumnMap>(header_column.column.get())->getNestedColumnPtr();
nested_header_column.type = checkAndGetDataType<DataTypeMap>(header_column.type.get())->getNestedType();
if (!insertNullAsDefaultIfNeeded(nested_input_column, nested_header_column, 0, nullptr))
return false;
input_column.column = ColumnMap::create(std::move(nested_input_column.column));
input_column.type = std::make_shared<DataTypeMap>(std::move(nested_input_column.type));
return true;
}
if (!isNullableOrLowCardinalityNullable(input_column.type) || isNullableOrLowCardinalityNullable(header_column.type))
return;
return false;
if (block_missing_values)
{
@ -32,6 +112,8 @@ void insertNullAsDefaultIfNeeded(ColumnWithTypeAndName & input_column, const Col
const auto * lc_type = assert_cast<const DataTypeLowCardinality *>(input_column.type.get());
input_column.type = std::make_shared<DataTypeLowCardinality>(removeNullable(lc_type->getDictionaryType()));
}
return true;
}
}

View File

@ -5,6 +5,6 @@
namespace DB
{
void insertNullAsDefaultIfNeeded(ColumnWithTypeAndName & input_column, const ColumnWithTypeAndName & header_column, size_t column_i, BlockMissingValues * block_missing_values);
bool insertNullAsDefaultIfNeeded(ColumnWithTypeAndName & input_column, const ColumnWithTypeAndName & header_column, size_t column_i, BlockMissingValues * block_missing_values);
}

View File

@ -12,8 +12,9 @@ void registerWithNamesAndTypes(const std::string & base_format_name, RegisterWit
void markFormatWithNamesAndTypesSupportsSamplingColumns(const std::string & base_format_name, FormatFactory & factory)
{
factory.markFormatSupportsSubsetOfColumns(base_format_name + "WithNames");
factory.markFormatSupportsSubsetOfColumns(base_format_name + "WithNamesAndTypes");
auto setting_checker = [](const FormatSettings & settings){ return settings.with_names_use_header; };
factory.registerSubsetOfColumnsSupportChecker(base_format_name + "WithNames", setting_checker);
factory.registerSubsetOfColumnsSupportChecker(base_format_name + "WithNamesAndTypes", setting_checker);
}
}

File diff suppressed because it is too large Load Diff

View File

@ -8,53 +8,34 @@
# include <Functions/FunctionHelpers.h>
# include <Functions/IFunction.h>
# include <Interpreters/Context_fwd.h>
# include <turbob64.h>
# include <libbase64.h>
# include <Common/MemorySanitizer.h>
# include <cstddef>
# include <span>
namespace DB
{
namespace ErrorCodes
{
extern const int BAD_ARGUMENTS;
extern const int ILLEGAL_COLUMN;
extern const int ILLEGAL_TYPE_OF_ARGUMENT;
extern const int INCORRECT_DATA;
}
namespace Detail
{
inline size_t base64Decode(const std::span<const UInt8> src, UInt8 * dst)
{
# if defined(__aarch64__)
return tb64sdec(reinterpret_cast<const uint8_t *>(src.data()), src.size(), reinterpret_cast<uint8_t *>(dst));
# else
return _tb64d(reinterpret_cast<const uint8_t *>(src.data()), src.size(), reinterpret_cast<uint8_t *>(dst));
# endif
}
}
struct Base64Encode
{
static constexpr auto name = "base64Encode";
static size_t getBufferSize(const size_t string_length, const size_t string_count)
static size_t getBufferSize(size_t string_length, size_t string_count)
{
return ((string_length - string_count) / 3 + string_count) * 4 + string_count;
}
static size_t performCoding(const std::span<const UInt8> src, UInt8 * dst)
static size_t perform(const std::span<const UInt8> src, UInt8 * dst)
{
/*
* Some bug in sse arm64 implementation?
* `base64Encode(repeat('a', 46))` returns wrong padding character
*/
# if defined(__aarch64__)
return tb64senc(reinterpret_cast<const uint8_t *>(src.data()), src.size(), reinterpret_cast<uint8_t *>(dst));
# else
return _tb64e(reinterpret_cast<const uint8_t *>(src.data()), src.size(), reinterpret_cast<uint8_t *>(dst));
# endif
size_t outlen = 0;
base64_encode(reinterpret_cast<const char *>(src.data()), src.size(), reinterpret_cast<char *>(dst), &outlen, 0);
return outlen;
}
};
@ -62,15 +43,17 @@ struct Base64Decode
{
static constexpr auto name = "base64Decode";
static size_t getBufferSize(const size_t string_length, const size_t string_count)
static size_t getBufferSize(size_t string_length, size_t string_count)
{
return ((string_length - string_count) / 4 + string_count) * 3 + string_count;
}
static size_t performCoding(const std::span<const UInt8> src, UInt8 * dst)
static size_t perform(const std::span<const UInt8> src, UInt8 * dst)
{
const auto outlen = Detail::base64Decode(src, dst);
if (src.size() > 0 && !outlen)
size_t outlen = 0;
int rc = base64_decode(reinterpret_cast<const char *>(src.data()), src.size(), reinterpret_cast<char *>(dst), &outlen, 0);
if (rc != 1)
throw Exception(
ErrorCodes::INCORRECT_DATA,
"Failed to {} input '{}'",
@ -85,17 +68,16 @@ struct TryBase64Decode
{
static constexpr auto name = "tryBase64Decode";
static size_t getBufferSize(const size_t string_length, const size_t string_count)
static size_t getBufferSize(size_t string_length, size_t string_count)
{
return Base64Decode::getBufferSize(string_length, string_count);
}
static size_t performCoding(const std::span<const UInt8> src, UInt8 * dst)
static size_t perform(const std::span<const UInt8> src, UInt8 * dst)
{
if (src.empty())
return 0;
size_t outlen = 0;
base64_decode(reinterpret_cast<const char *>(src.data()), src.size(), reinterpret_cast<char *>(dst), &outlen, 0);
const auto outlen = Detail::base64Decode(src, dst);
// during decoding character array can be partially polluted
// if fail, revert back and clean
if (!outlen)
@ -119,20 +101,16 @@ public:
DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override
{
if (arguments.size() != 1)
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Wrong number of arguments for function {}: 1 expected.", getName());
FunctionArgumentDescriptors mandatory_arguments{
{"value", &isStringOrFixedString<IDataType>, nullptr, "String or FixedString"}
};
if (!WhichDataType(arguments[0].type).isStringOrFixedString())
throw Exception(
ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT,
"Illegal type {} of 1st argument of function {}. Must be FixedString or String.",
arguments[0].type->getName(),
getName());
validateFunctionArgumentTypes(*this, arguments, mandatory_arguments);
return std::make_shared<DataTypeString>();
}
ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, const size_t input_rows_count) const override
ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const override
{
const auto & input_column = arguments[0].column;
if (const auto * src_column_as_fixed_string = checkAndGetColumn<ColumnFixedString>(*input_column))
@ -148,7 +126,7 @@ public:
}
private:
static ColumnPtr execute(const ColumnString & src_column, const size_t src_row_count)
static ColumnPtr execute(const ColumnString & src_column, size_t src_row_count)
{
auto dst_column = ColumnString::create();
auto & dst_chars = dst_column->getChars();
@ -169,7 +147,7 @@ private:
for (size_t row = 0; row < src_row_count; ++row)
{
const size_t src_length = src_offsets[row] - src_offset_prev - 1;
const auto outlen = Func::performCoding({src, src_length}, dst_pos);
const auto outlen = Func::perform({src, src_length}, dst_pos);
/// Base64 library is using AVX-512 with some shuffle operations.
/// Memory sanitizer don't understand if there was uninitialized memory in SIMD register but it was not used in the result of shuffle.
@ -188,7 +166,7 @@ private:
return dst_column;
}
static ColumnPtr execute(const ColumnFixedString & src_column, const size_t src_row_count)
static ColumnPtr execute(const ColumnFixedString & src_column, size_t src_row_count)
{
auto dst_column = ColumnString::create();
auto & dst_chars = dst_column->getChars();
@ -207,7 +185,7 @@ private:
for (size_t row = 0; row < src_row_count; ++row)
{
const auto outlen = Func::performCoding({src, src_n}, dst_pos);
const auto outlen = Func::perform({src, src_n}, dst_pos);
/// Base64 library is using AVX-512 with some shuffle operations.
/// Memory sanitizer don't understand if there was uninitialized memory in SIMD register but it was not used in the result of shuffle.
@ -225,6 +203,7 @@ private:
return dst_column;
}
};
}
#endif

View File

@ -33,6 +33,7 @@
#include <Columns/ColumnString.h>
#include <Columns/ColumnFixedString.h>
#include <Columns/ColumnConst.h>
#include <Columns/ColumnAggregateFunction.h>
#include <Columns/ColumnArray.h>
#include <Columns/ColumnNullable.h>
#include <Columns/ColumnTuple.h>
@ -88,7 +89,6 @@ namespace ErrorCodes
extern const int ILLEGAL_TYPE_OF_ARGUMENT;
extern const int NOT_IMPLEMENTED;
extern const int CANNOT_INSERT_NULL_IN_ORDINARY_COLUMN;
extern const int CANNOT_PARSE_BOOL;
}
@ -990,20 +990,9 @@ struct ConvertImpl<FromDataType, DataTypeString, Name, ConvertDefaultBehaviorTag
{
ColumnUInt8::MutablePtr null_map = copyNullMap(arguments[0].column);
const auto & col_with_type_and_name = columnGetNested(arguments[0]);
const auto & col_with_type_and_name = columnGetNested(arguments[0]);
const auto & type = static_cast<const FromDataType &>(*col_with_type_and_name.type);
const DateLUTImpl * time_zone = nullptr;
if constexpr (std::is_same_v<FromDataType, DataTypeDate> || std::is_same_v<FromDataType, DataTypeDate32>)
time_zone = &DateLUT::instance();
/// For argument of Date or DateTime type, second argument with time zone could be specified.
if constexpr (std::is_same_v<FromDataType, DataTypeDateTime> || std::is_same_v<FromDataType, DataTypeDateTime64>)
{
auto non_null_args = createBlockWithNestedColumns(arguments);
time_zone = &extractTimeZoneFromFunctionArguments(non_null_args, 1, 0);
}
if (const auto col_from = checkAndGetColumn<ColVecType>(col_with_type_and_name.column.get()))
{
auto col_to = ColumnString::create();
@ -1013,17 +1002,7 @@ struct ConvertImpl<FromDataType, DataTypeString, Name, ConvertDefaultBehaviorTag
ColumnString::Offsets & offsets_to = col_to->getOffsets();
size_t size = vec_from.size();
if constexpr (std::is_same_v<FromDataType, DataTypeDate>)
data_to.resize(size * (strlen("YYYY-MM-DD") + 1));
else if constexpr (std::is_same_v<FromDataType, DataTypeDate32>)
data_to.resize(size * (strlen("YYYY-MM-DD") + 1));
else if constexpr (std::is_same_v<FromDataType, DataTypeDateTime>)
data_to.resize(size * (strlen("YYYY-MM-DD hh:mm:ss") + 1));
else if constexpr (std::is_same_v<FromDataType, DataTypeDateTime64>)
data_to.resize(size * (strlen("YYYY-MM-DD hh:mm:ss.") + col_from->getScale() + 1));
else
data_to.resize(size * 3); /// Arbitrary
data_to.resize(size * 3);
offsets_to.resize(size);
WriteBufferFromVector<ColumnString::Chars> write_buffer(data_to);
@ -1032,7 +1011,8 @@ struct ConvertImpl<FromDataType, DataTypeString, Name, ConvertDefaultBehaviorTag
{
for (size_t i = 0; i < size; ++i)
{
bool is_ok = FormatImpl<FromDataType>::template execute<bool>(vec_from[i], write_buffer, &type, time_zone);
bool is_ok = FormatImpl<FromDataType>::template execute<bool>(vec_from[i], write_buffer, &type, nullptr);
/// We don't use timezones in this branch
null_map->getData()[i] |= !is_ok;
writeChar(0, write_buffer);
offsets_to[i] = write_buffer.count();
@ -1042,7 +1022,7 @@ struct ConvertImpl<FromDataType, DataTypeString, Name, ConvertDefaultBehaviorTag
{
for (size_t i = 0; i < size; ++i)
{
FormatImpl<FromDataType>::template execute<void>(vec_from[i], write_buffer, &type, time_zone);
FormatImpl<FromDataType>::template execute<void>(vec_from[i], write_buffer, &type, nullptr);
writeChar(0, write_buffer);
offsets_to[i] = write_buffer.count();
}
@ -1703,19 +1683,7 @@ struct ConvertImplGenericFromString
const auto & val = col_from_string->getDataAt(i);
ReadBufferFromMemory read_buffer(val.data, val.size);
try
{
serialization_from.deserializeWholeText(column_to, read_buffer, format_settings);
}
catch (const Exception & e)
{
if (e.code() == ErrorCodes::CANNOT_PARSE_BOOL && typeid_cast<ColumnNullable *>(&column_to))
{
column_to.insertDefault();
continue;
}
throw;
}
serialization_from.deserializeWholeText(column_to, read_buffer, format_settings);
if (!read_buffer.eof())
{
@ -3308,14 +3276,40 @@ private:
{
return &ConvertImplGenericFromString<ColumnString>::execute;
}
else
else if (const auto * agg_type = checkAndGetDataType<DataTypeAggregateFunction>(from_type_untyped.get()))
{
if (cast_type == CastType::accurateOrNull)
return createToNullableColumnWrapper();
else
throw Exception(ErrorCodes::CANNOT_CONVERT_TYPE, "Conversion from {} to {} is not supported",
from_type_untyped->getName(), to_type->getName());
if (agg_type->getFunction()->haveSameStateRepresentation(*to_type->getFunction()))
{
return [function = to_type->getFunction()](
ColumnsWithTypeAndName & arguments,
const DataTypePtr & /* result_type */,
const ColumnNullable * /* nullable_source */,
size_t /*input_rows_count*/) -> ColumnPtr
{
const auto & argument_column = arguments.front();
const auto * col_agg = checkAndGetColumn<ColumnAggregateFunction>(argument_column.column.get());
if (col_agg)
{
auto new_col_agg = ColumnAggregateFunction::create(*col_agg);
new_col_agg->set(function);
return new_col_agg;
}
else
{
throw Exception(
ErrorCodes::LOGICAL_ERROR,
"Illegal column {} for function CAST AS AggregateFunction",
argument_column.column->getName());
}
};
}
}
if (cast_type == CastType::accurateOrNull)
return createToNullableColumnWrapper();
else
throw Exception(ErrorCodes::CANNOT_CONVERT_TYPE, "Conversion from {} to {} is not supported",
from_type_untyped->getName(), to_type->getName());
}
WrapperType createArrayWrapper(const DataTypePtr & from_type_untyped, const DataTypeArray & to_type) const
@ -4096,7 +4090,16 @@ private:
safe_convert_custom_types = to_type->getCustomName() && from_type_custom_name->getName() == to_type->getCustomName()->getName();
if (from_type->equals(*to_type) && safe_convert_custom_types)
return createIdentityWrapper(from_type);
{
/// We can only use identity conversion for DataTypeAggregateFunction when they are strictly equivalent.
if (typeid_cast<const DataTypeAggregateFunction *>(from_type.get()))
{
if (DataTypeAggregateFunction::strictEquals(from_type, to_type))
return createIdentityWrapper(from_type);
}
else
return createIdentityWrapper(from_type);
}
else if (WhichDataType(from_type).isNothing())
return createNothingWrapper(to_type.get());
@ -4174,21 +4177,15 @@ private:
{
if constexpr (std::is_same_v<ToDataType, DataTypeIPv4>)
{
ret = [cast_ipv4_ipv6_default_on_conversion_error_value,
input_format_ipv4_default_on_conversion_error_value,
requested_result_is_nullable](
ColumnsWithTypeAndName & arguments,
const DataTypePtr & result_type,
const ColumnNullable * column_nullable,
size_t) -> ColumnPtr
ret = [cast_ipv4_ipv6_default_on_conversion_error_value, input_format_ipv4_default_on_conversion_error_value, requested_result_is_nullable](
ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, const ColumnNullable * column_nullable, size_t)
-> ColumnPtr
{
if (!WhichDataType(result_type).isIPv4())
throw Exception(ErrorCodes::TYPE_MISMATCH, "Wrong result type {}. Expected IPv4", result_type->getName());
const auto * null_map = column_nullable ? &column_nullable->getNullMapData() : nullptr;
if (requested_result_is_nullable)
return convertToIPv4<IPStringToNumExceptionMode::Null>(arguments[0].column, null_map);
else if (cast_ipv4_ipv6_default_on_conversion_error_value || input_format_ipv4_default_on_conversion_error_value)
if (cast_ipv4_ipv6_default_on_conversion_error_value || input_format_ipv4_default_on_conversion_error_value || requested_result_is_nullable)
return convertToIPv4<IPStringToNumExceptionMode::Default>(arguments[0].column, null_map);
else
return convertToIPv4<IPStringToNumExceptionMode::Throw>(arguments[0].column, null_map);
@ -4199,22 +4196,16 @@ private:
if constexpr (std::is_same_v<ToDataType, DataTypeIPv6>)
{
ret = [cast_ipv4_ipv6_default_on_conversion_error_value,
input_format_ipv6_default_on_conversion_error_value,
requested_result_is_nullable](
ColumnsWithTypeAndName & arguments,
const DataTypePtr & result_type,
const ColumnNullable * column_nullable,
size_t) -> ColumnPtr
ret = [cast_ipv4_ipv6_default_on_conversion_error_value, input_format_ipv6_default_on_conversion_error_value, requested_result_is_nullable](
ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, const ColumnNullable * column_nullable, size_t)
-> ColumnPtr
{
if (!WhichDataType(result_type).isIPv6())
throw Exception(
ErrorCodes::TYPE_MISMATCH, "Wrong result type {}. Expected IPv6", result_type->getName());
const auto * null_map = column_nullable ? &column_nullable->getNullMapData() : nullptr;
if (requested_result_is_nullable)
return convertToIPv6<IPStringToNumExceptionMode::Null>(arguments[0].column, null_map);
else if (cast_ipv4_ipv6_default_on_conversion_error_value || input_format_ipv6_default_on_conversion_error_value)
if (cast_ipv4_ipv6_default_on_conversion_error_value || input_format_ipv6_default_on_conversion_error_value || requested_result_is_nullable)
return convertToIPv6<IPStringToNumExceptionMode::Default>(arguments[0].column, null_map);
else
return convertToIPv6<IPStringToNumExceptionMode::Throw>(arguments[0].column, null_map);
@ -4225,18 +4216,7 @@ private:
if (to_type->getCustomSerialization() && to_type->getCustomName())
{
ret = [requested_result_is_nullable](
ColumnsWithTypeAndName & arguments,
const DataTypePtr & result_type,
const ColumnNullable * column_nullable,
size_t input_rows_count) -> ColumnPtr
{
auto wrapped_result_type = result_type;
if (requested_result_is_nullable)
wrapped_result_type = makeNullable(result_type);
return ConvertImplGenericFromString<typename FromDataType::ColumnType>::execute(
arguments, wrapped_result_type, column_nullable, input_rows_count);
};
ret = &ConvertImplGenericFromString<typename FromDataType::ColumnType>::execute;
return true;
}
}
@ -4251,9 +4231,7 @@ private:
ErrorCodes::TYPE_MISMATCH, "Wrong result type {}. Expected IPv4", result_type->getName());
const auto * null_map = column_nullable ? &column_nullable->getNullMapData() : nullptr;
if (requested_result_is_nullable)
return convertIPv6ToIPv4<IPStringToNumExceptionMode::Null>(arguments[0].column, null_map);
else if (cast_ipv4_ipv6_default_on_conversion_error_value)
if (cast_ipv4_ipv6_default_on_conversion_error_value || requested_result_is_nullable)
return convertIPv6ToIPv4<IPStringToNumExceptionMode::Default>(arguments[0].column, null_map);
else
return convertIPv6ToIPv4<IPStringToNumExceptionMode::Throw>(arguments[0].column, null_map);

View File

@ -0,0 +1,108 @@
#include <Functions/FunctionFactory.h>
#include <DataTypes/DataTypeDate.h>
#include <DataTypes/DataTypeDate32.h>
#include <DataTypes/DataTypeDateTime.h>
#include <DataTypes/DataTypeDateTime64.h>
namespace DB
{
namespace ErrorCodes
{
extern const int ILLEGAL_TYPE_OF_ARGUMENT;
}
namespace
{
template <typename Op>
class FunctionOpDate : public IFunction
{
public:
static constexpr auto name = Op::name;
explicit FunctionOpDate(ContextPtr context_) : context(context_) {}
static FunctionPtr create(ContextPtr context) { return std::make_shared<FunctionOpDate<Op>>(context); }
String getName() const override { return name; }
bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return false; }
size_t getNumberOfArguments() const override { return 2; }
DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override
{
if (!isDateOrDate32(arguments[0].type) && !isDateTime(arguments[0].type) && !isDateTime64(arguments[0].type))
throw Exception(
ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT,
"Illegal type {} of 1st argument of function {}. Should be a date or a date with time",
arguments[0].type->getName(),
getName());
if (!isInterval(arguments[1].type))
throw Exception(
ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT,
"Illegal type {} of 2nd argument of function {}. Should be an interval",
arguments[1].type->getName(),
getName());
auto op = FunctionFactory::instance().get(Op::internal_name, context);
auto op_build = op->build(arguments);
return op_build->getResultType();
}
bool useDefaultImplementationForConstants() const override { return true; }
ColumnNumbers getArgumentsThatAreAlwaysConstant() const override { return {0, 2}; }
ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const override
{
if (!isDateOrDate32(arguments[0].type) && !isDateTime(arguments[0].type) && !isDateTime64(arguments[0].type))
throw Exception(
ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT,
"Illegal type {} of 1st argument of function {}. Should be a date or a date with time",
arguments[0].type->getName(),
getName());
if (!isInterval(arguments[1].type))
throw Exception(
ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT,
"Illegal type {} of 2nd argument of function {}. Should be an interval",
arguments[1].type->getName(),
getName());
auto op = FunctionFactory::instance().get(Op::internal_name, context);
auto op_build = op->build(arguments);
auto res_type = op_build->getResultType();
return op_build->execute(arguments, res_type, input_rows_count);
}
private:
ContextPtr context;
};
}
struct AddDate
{
static constexpr auto name = "addDate";
static constexpr auto internal_name = "plus";
};
struct SubDate
{
static constexpr auto name = "subDate";
static constexpr auto internal_name = "minus";
};
using FunctionAddDate = FunctionOpDate<AddDate>;
using FunctionSubDate = FunctionOpDate<SubDate>;
REGISTER_FUNCTION(AddInterval)
{
factory.registerFunction<FunctionAddDate>({}, FunctionFactory::CaseInsensitive);
factory.registerFunction<FunctionSubDate>({}, FunctionFactory::CaseInsensitive);
}
}

View File

@ -140,18 +140,30 @@ struct NgramDistanceImpl
{
case 1:
res = 0;
memcpy(&res, pos, 1);
if constexpr (std::endian::native == std::endian::little)
memcpy(&res, pos, 1);
else
reverseMemcpy(reinterpret_cast<char*>(&res) + sizeof(CodePoint) - 1, pos, 1);
break;
case 2:
res = 0;
memcpy(&res, pos, 2);
if constexpr (std::endian::native == std::endian::little)
memcpy(&res, pos, 2);
else
reverseMemcpy(reinterpret_cast<char*>(&res) + sizeof(CodePoint) - 2, pos, 2);
break;
case 3:
res = 0;
memcpy(&res, pos, 3);
if constexpr (std::endian::native == std::endian::little)
memcpy(&res, pos, 3);
else
reverseMemcpy(reinterpret_cast<char*>(&res) + sizeof(CodePoint) - 3, pos, 3);
break;
default:
memcpy(&res, pos, 4);
if constexpr (std::endian::native == std::endian::little)
memcpy(&res, pos, 4);
else
reverseMemcpy(reinterpret_cast<char*>(&res) + sizeof(CodePoint) - 4, pos, 4);
}
/// This is not a really true case insensitive utf8. We zero the 5-th bit of every byte.

View File

@ -15,7 +15,7 @@ class BackupEntriesCollector;
class RestorerFromBackup;
/// Factory for SQLUserDefinedFunctions
class UserDefinedSQLFunctionFactory : public IHints<1, UserDefinedSQLFunctionFactory>
class UserDefinedSQLFunctionFactory : public IHints<>
{
public:
static UserDefinedSQLFunctionFactory & instance();

View File

@ -7,10 +7,9 @@ namespace DB
{
REGISTER_FUNCTION(Base64Decode)
{
tb64ini(0, 0);
factory.registerFunction<FunctionBase64Conversion<Base64Decode>>();
/// MysQL compatibility alias.
/// MySQL compatibility alias.
factory.registerAlias("FROM_BASE64", "base64Decode", FunctionFactory::CaseInsensitive);
}
}

View File

@ -7,10 +7,9 @@ namespace DB
{
REGISTER_FUNCTION(Base64Encode)
{
tb64ini(0, 0);
factory.registerFunction<FunctionBase64Conversion<Base64Encode>>();
/// MysQL compatibility alias.
/// MySQL compatibility alias.
factory.registerAlias("TO_BASE64", "base64Encode", FunctionFactory::CaseInsensitive);
}
}

View File

@ -13,7 +13,7 @@ REGISTER_FUNCTION(ToDayOfMonth)
{
factory.registerFunction<FunctionToDayOfMonth>();
/// MysQL compatibility alias.
/// MySQL compatibility alias.
factory.registerAlias("DAY", "toDayOfMonth", FunctionFactory::CaseInsensitive);
factory.registerAlias("DAYOFMONTH", "toDayOfMonth", FunctionFactory::CaseInsensitive);
}

View File

@ -12,7 +12,7 @@ REGISTER_FUNCTION(ToDayOfWeek)
{
factory.registerFunction<FunctionToDayOfWeek>();
/// MysQL compatibility alias.
/// MySQL compatibility alias.
factory.registerAlias("DAYOFWEEK", "toDayOfWeek", FunctionFactory::CaseInsensitive);
}

Some files were not shown because too many files have changed in this diff Show More