Merge branch 'master' into arrow_parquet_account_for_monotonically_increasing_offsets_across_batches

This commit is contained in:
Arthur Passos 2023-09-12 17:50:36 -03:00
commit da8caeffd2
496 changed files with 9587 additions and 5620 deletions

View File

@ -76,6 +76,7 @@ jobs:
uses: ClickHouse/checkout@v1
with:
clear-repository: true
fetch-depth: 0 # to find ancestor merge commits necessary for finding proper docker tags
- name: Download changed aarch64 images
uses: actions/download-artifact@v3
with:

View File

@ -73,6 +73,7 @@ jobs:
uses: ClickHouse/checkout@v1
with:
clear-repository: true
fetch-depth: 0 # to find ancestor merge commits necessary for finding proper docker tags
- name: Download changed aarch64 images
uses: actions/download-artifact@v3
with:

View File

@ -60,6 +60,7 @@ jobs:
uses: ClickHouse/checkout@v1
with:
clear-repository: true
fetch-depth: 0 # to find ancestor merge commits necessary for finding proper docker tags
- name: Download changed aarch64 images
uses: actions/download-artifact@v3
with:

View File

@ -53,6 +53,7 @@ jobs:
uses: ClickHouse/checkout@v1
with:
clear-repository: true
fetch-depth: 0 # to find ancestor merge commits necessary for finding proper docker tags
- name: Download changed aarch64 images
uses: actions/download-artifact@v3
with:

View File

@ -94,6 +94,7 @@ jobs:
uses: ClickHouse/checkout@v1
with:
clear-repository: true
fetch-depth: 0 # to find ancestor merge commits necessary for finding proper docker tags
- name: Download changed aarch64 images
uses: actions/download-artifact@v3
with:

View File

@ -52,6 +52,7 @@ jobs:
uses: ClickHouse/checkout@v1
with:
clear-repository: true
fetch-depth: 0 # to find ancestor merge commits necessary for finding proper docker tags
- name: Download changed aarch64 images
uses: actions/download-artifact@v3
with:

6
.gitmodules vendored
View File

@ -40,9 +40,6 @@
[submodule "contrib/boost"]
path = contrib/boost
url = https://github.com/ClickHouse/boost
[submodule "contrib/base64"]
path = contrib/base64
url = https://github.com/ClickHouse/Turbo-Base64
[submodule "contrib/arrow"]
path = contrib/arrow
url = https://github.com/ClickHouse/arrow
@ -348,3 +345,6 @@
[submodule "contrib/robin-map"]
path = contrib/robin-map
url = https://github.com/Tessil/robin-map.git
[submodule "contrib/aklomp-base64"]
path = contrib/aklomp-base64
url = https://github.com/aklomp/base64.git

View File

@ -47,7 +47,7 @@ if (CMAKE_BUILD_TYPE_UC STREQUAL "RELWITHDEBINFO" AND ENABLE_THINLTO AND PARALLE
set (PARALLEL_LINK_JOBS 2)
endif()
message(STATUS "Building sub-tree with ${PARALLEL_COMPILE_JOBS} compile jobs and ${PARALLEL_LINK_JOBS} linker jobs (system: ${NUMBER_OF_LOGICAL_CORES} cores, ${TOTAL_PHYSICAL_MEMORY} MB DRAM, 'OFF' means the native core count).")
message(STATUS "Building sub-tree with ${PARALLEL_COMPILE_JOBS} compile jobs and ${PARALLEL_LINK_JOBS} linker jobs (system: ${NUMBER_OF_LOGICAL_CORES} cores, ${TOTAL_PHYSICAL_MEMORY} MB RAM, 'OFF' means the native core count).")
if (PARALLEL_COMPILE_JOBS LESS NUMBER_OF_LOGICAL_CORES)
set(CMAKE_JOB_POOL_COMPILE compile_job_pool${CMAKE_CURRENT_SOURCE_DIR})

View File

@ -5,9 +5,9 @@ set (CMAKE_TRY_COMPILE_TARGET_TYPE STATIC_LIBRARY)
set (CMAKE_SYSTEM_NAME "Linux")
set (CMAKE_SYSTEM_PROCESSOR "ppc64le")
set (CMAKE_C_COMPILER_TARGET "ppc64le-linux-gnu")
set (CMAKE_CXX_COMPILER_TARGET "ppc64le-linux-gnu")
set (CMAKE_ASM_COMPILER_TARGET "ppc64le-linux-gnu")
set (CMAKE_C_COMPILER_TARGET "powerpc64le-linux-gnu")
set (CMAKE_CXX_COMPILER_TARGET "powerpc64le-linux-gnu")
set (CMAKE_ASM_COMPILER_TARGET "powerpc64le-linux-gnu")
# Will be changed later, but somehow needed to be set here.
set (CMAKE_AR "ar")

View File

@ -135,7 +135,7 @@ add_contrib (aws-cmake
aws-cmake
)
add_contrib (base64-cmake base64)
add_contrib (aklomp-base64-cmake aklomp-base64)
add_contrib (simdjson-cmake simdjson)
add_contrib (rapidjson-cmake rapidjson)
add_contrib (fastops-cmake fastops)

1
contrib/aklomp-base64 vendored Submodule

@ -0,0 +1 @@
Subproject commit e77bd70bdd860c52c561568cffb251d88bba064c

View File

@ -0,0 +1 @@
config.h

View File

@ -0,0 +1,68 @@
option (ENABLE_BASE64 "Enable base64" ${ENABLE_LIBRARIES})
if (NOT ENABLE_BASE64)
message(STATUS "Not using base64")
return()
endif()
SET(LIBRARY_DIR "${ClickHouse_SOURCE_DIR}/contrib/aklomp-base64")
if (ARCH_AMD64)
# These defines enable/disable SIMD codecs in base64's runtime codec dispatch.
# We don't want to limit ourselves --> enable all.
set(HAVE_SSSE3 1)
set(HAVE_SSE41 1)
set(HAVE_SSE42 1)
set(HAVE_AVX 1)
set(HAVE_AVX2 1)
set(HAVE_AVX512 1)
endif ()
if (ARCH_AARCH64)
# The choice of HAVE_NEON* depends on the target machine because base64 provides
# no runtime dispatch on ARM. NEON is only mandatory with the normal build profile.
if(NOT NO_ARMV81_OR_HIGHER)
set(HAVE_NEON64 1)
set(HAVE_NEON32 0)
endif ()
endif ()
configure_file(config.h.in ${CMAKE_CURRENT_BINARY_DIR}/config.h)
add_library(_base64
"${LIBRARY_DIR}/lib/lib.c"
"${LIBRARY_DIR}/lib/codec_choose.c"
"${LIBRARY_DIR}/lib/tables/tables.c"
"${LIBRARY_DIR}/lib/tables/table_dec_32bit.h"
"${LIBRARY_DIR}/lib/tables/table_enc_12bit.h"
"${LIBRARY_DIR}/lib/codecs.h"
"${CMAKE_CURRENT_BINARY_DIR}/config.h"
"${LIBRARY_DIR}/lib/arch/generic/codec.c"
"${LIBRARY_DIR}/lib/arch/ssse3/codec.c"
"${LIBRARY_DIR}/lib/arch/sse41/codec.c"
"${LIBRARY_DIR}/lib/arch/sse42/codec.c"
"${LIBRARY_DIR}/lib/arch/avx/codec.c"
"${LIBRARY_DIR}/lib/arch/avx2/codec.c"
"${LIBRARY_DIR}/lib/arch/avx512/codec.c"
"${LIBRARY_DIR}/lib/arch/neon32/codec.c"
"${LIBRARY_DIR}/lib/arch/neon64/codec.c"
)
if (ARCH_AMD64)
set_source_files_properties(${LIBRARY_DIR}/lib/arch/ssse3/codec.c PROPERTIES COMPILE_FLAGS "-mssse3")
set_source_files_properties(${LIBRARY_DIR}/lib/arch/sse41/codec.c PROPERTIES COMPILE_FLAGS "-msse4.1")
set_source_files_properties(${LIBRARY_DIR}/lib/arch/sse42/codec.c PROPERTIES COMPILE_FLAGS "-msse4.2")
set_source_files_properties(${LIBRARY_DIR}/lib/arch/avx/codec.c PROPERTIES COMPILE_FLAGS "-mavx")
set_source_files_properties(${LIBRARY_DIR}/lib/arch/avx2/codec.c PROPERTIES COMPILE_FLAGS "-mavx2")
set_source_files_properties(${LIBRARY_DIR}/lib/arch/avx512/codec.c PROPERTIES COMPILE_FLAGS "-mavx512vl -mavx512vbmi")
endif()
target_include_directories(_base64 SYSTEM PUBLIC ${LIBRARY_DIR}/include)
target_include_directories(_base64 PRIVATE ${CMAKE_CURRENT_SOURCE_DIR} ${CMAKE_CURRENT_BINARY_DIR})
add_library(ch_contrib::base64 ALIAS _base64)

View File

@ -0,0 +1,9 @@
#cmakedefine01 HAVE_SSSE3
#cmakedefine01 HAVE_SSE41
#cmakedefine01 HAVE_SSE42
#cmakedefine01 HAVE_AVX
#cmakedefine01 HAVE_AVX2
#cmakedefine01 HAVE_AVX512
#cmakedefine01 HAVE_NEON32
#cmakedefine01 HAVE_NEON64

1
contrib/base64 vendored

@ -1 +0,0 @@
Subproject commit 8628e258090f9eb76d90ac3c91e1ab4690e9aa11

View File

@ -1,60 +0,0 @@
if(ARCH_AMD64 OR ARCH_AARCH64 OR ARCH_PPC64LE OR ARCH_S390X)
option (ENABLE_BASE64 "Enable base64" ${ENABLE_LIBRARIES})
elseif(ENABLE_BASE64)
message (${RECONFIGURE_MESSAGE_LEVEL} "base64 library is only supported on x86_64 and aarch64")
endif()
if (NOT ENABLE_BASE64)
message(STATUS "Not using base64")
return()
endif()
SET(LIBRARY_DIR "${ClickHouse_SOURCE_DIR}/contrib/base64")
add_library(_base64_scalar OBJECT "${LIBRARY_DIR}/turbob64c.c" "${LIBRARY_DIR}/turbob64d.c")
add_library(_base64_ssse3 OBJECT "${LIBRARY_DIR}/turbob64sse.c") # This file also contains code for ARM NEON
if (ARCH_AMD64)
add_library(_base64_avx OBJECT "${LIBRARY_DIR}/turbob64sse.c") # This is not a mistake. One file is compiled twice.
add_library(_base64_avx2 OBJECT "${LIBRARY_DIR}/turbob64avx2.c")
endif ()
target_compile_options(_base64_scalar PRIVATE -falign-loops)
if (ARCH_AMD64)
target_compile_options(_base64_ssse3 PRIVATE -mno-avx -mno-avx2 -mssse3 -falign-loops)
target_compile_options(_base64_avx PRIVATE -falign-loops -mavx)
target_compile_options(_base64_avx2 PRIVATE -falign-loops -mavx2)
else ()
if (ARCH_PPC64LE)
target_compile_options(_base64_ssse3 PRIVATE -D__SSSE3__ -falign-loops)
else()
target_compile_options(_base64_ssse3 PRIVATE -falign-loops)
endif()
endif ()
if (ARCH_AMD64)
add_library(_base64
$<TARGET_OBJECTS:_base64_scalar>
$<TARGET_OBJECTS:_base64_ssse3>
$<TARGET_OBJECTS:_base64_avx>
$<TARGET_OBJECTS:_base64_avx2>)
else ()
add_library(_base64
$<TARGET_OBJECTS:_base64_scalar>
$<TARGET_OBJECTS:_base64_ssse3>)
endif ()
target_include_directories(_base64 SYSTEM PUBLIC ${LIBRARY_DIR})
if (XCODE OR XCODE_VERSION)
# https://gitlab.kitware.com/cmake/cmake/issues/17457
# Some native build systems may not like targets that have only object files, so consider adding at least one real source file
# This applies to Xcode.
if (NOT EXISTS "${CMAKE_CURRENT_BINARY_DIR}/dummy.c")
file(WRITE "${CMAKE_CURRENT_BINARY_DIR}/dummy.c" "")
endif ()
target_sources(_base64 PRIVATE "${CMAKE_CURRENT_BINARY_DIR}/dummy.c")
endif ()
add_library(ch_contrib::base64 ALIAS _base64)

2
contrib/libunwind vendored

@ -1 +1 @@
Subproject commit e48aa13f67dc722511b5af33a32ba9b7748176b5
Subproject commit 30cc1d3fd3655a5cfa0ab112fe320fb9fc0a8344

View File

@ -126,7 +126,7 @@ if(ENABLE_OPENSSL_DYNAMIC OR ENABLE_OPENSSL)
elseif(ARCH_PPC64LE)
macro(perl_generate_asm FILE_IN FILE_OUT)
add_custom_command(OUTPUT ${FILE_OUT}
COMMAND /usr/bin/env perl ${FILE_IN} "linux64" ${FILE_OUT})
COMMAND /usr/bin/env perl ${FILE_IN} "linux64v2" ${FILE_OUT})
endmacro()
perl_generate_asm(${OPENSSL_SOURCE_DIR}/crypto/aes/asm/aes-ppc.pl ${OPENSSL_BINARY_DIR}/crypto/aes/aes-ppc.s)

2
contrib/re2 vendored

@ -1 +1 @@
Subproject commit 03da4fc0857c285e3a26782f6bc8931c4c950df4
Subproject commit a807e8a3aac2cc33c77b7071efea54fcabe38e0c

View File

@ -15,8 +15,8 @@ CLICKHOUSE_CI_LOGS_USER=${CLICKHOUSE_CI_LOGS_USER:-ci}
# Pre-configured destination cluster, where to export the data
CLICKHOUSE_CI_LOGS_CLUSTER=${CLICKHOUSE_CI_LOGS_CLUSTER:-system_logs_export}
EXTRA_COLUMNS=${EXTRA_COLUMNS:-"pull_request_number UInt32, commit_sha String, check_start_time DateTime, check_name LowCardinality(String), instance_type LowCardinality(String), "}
EXTRA_COLUMNS_EXPRESSION=${EXTRA_COLUMNS_EXPRESSION:-"0 AS pull_request_number, '' AS commit_sha, now() AS check_start_time, '' AS check_name, '' AS instance_type"}
EXTRA_COLUMNS=${EXTRA_COLUMNS:-"pull_request_number UInt32, commit_sha String, check_start_time DateTime, check_name LowCardinality(String), instance_type LowCardinality(String), instance_id String, "}
EXTRA_COLUMNS_EXPRESSION=${EXTRA_COLUMNS_EXPRESSION:-"0 AS pull_request_number, '' AS commit_sha, now() AS check_start_time, '' AS check_name, '' AS instance_type, '' AS instance_id"}
EXTRA_ORDER_BY_COLUMNS=${EXTRA_ORDER_BY_COLUMNS:-"check_name, "}
function __set_connection_args
@ -125,9 +125,9 @@ function setup_logs_replication
echo 'Create %_log tables'
clickhouse-client --query "SHOW TABLES FROM system LIKE '%\\_log'" | while read -r table
do
# Calculate hash of its structure:
# Calculate hash of its structure. Note: 1 is the version of extra columns - increment it if extra columns are changed:
hash=$(clickhouse-client --query "
SELECT sipHash64(groupArray((name, type)))
SELECT sipHash64(1, groupArray((name, type)))
FROM (SELECT name, type FROM system.columns
WHERE database = 'system' AND table = '$table'
ORDER BY position)

View File

@ -18,6 +18,7 @@ RUN apt-get update \
python3-termcolor \
unixodbc \
pv \
jq \
zstd \
--yes --no-install-recommends

View File

@ -120,7 +120,7 @@ function clone_submodules
contrib/libxml2
contrib/libunwind
contrib/fmtlib
contrib/base64
contrib/aklomp-base64
contrib/cctz
contrib/libcpuid
contrib/libdivide

View File

@ -40,6 +40,7 @@ RUN apt-get update -y \
cargo \
zstd \
file \
jq \
pv \
zip \
p7zip-full \

View File

@ -1139,6 +1139,8 @@ Optional parameters:
- `s3_max_put_burst` — Max number of requests that can be issued simultaneously before hitting request per second limit. By default (`0` value) equals to `s3_max_put_rps`.
- `s3_max_get_rps` — Maximum GET requests per second rate before throttling. Default value is `0` (unlimited).
- `s3_max_get_burst` — Max number of requests that can be issued simultaneously before hitting request per second limit. By default (`0` value) equals to `s3_max_get_rps`.
- `read_resource` — Resource name to be used for [scheduling](/docs/en/operations/workload-scheduling.md) of read requests to this disk. Default value is empty string (IO scheduling is not enabled for this disk).
- `write_resource` — Resource name to be used for [scheduling](/docs/en/operations/workload-scheduling.md) of write requests to this disk. Default value is empty string (IO scheduling is not enabled for this disk).
### Configuring the cache
@ -1251,6 +1253,8 @@ Other parameters:
* `cache_enabled` - Allows to cache mark and index files on local FS. Default value is `true`.
* `cache_path` - Path on local FS where to store cached mark and index files. Default value is `/var/lib/clickhouse/disks/<disk_name>/cache/`.
* `skip_access_check` - If true, disk access checks will not be performed on disk start-up. Default value is `false`.
* `read_resource` — Resource name to be used for [scheduling](/docs/en/operations/workload-scheduling.md) of read requests to this disk. Default value is empty string (IO scheduling is not enabled for this disk).
* `write_resource` — Resource name to be used for [scheduling](/docs/en/operations/workload-scheduling.md) of write requests to this disk. Default value is empty string (IO scheduling is not enabled for this disk).
Examples of working configurations can be found in integration tests directory (see e.g. [test_merge_tree_azure_blob_storage](https://github.com/ClickHouse/ClickHouse/blob/master/tests/integration/test_merge_tree_azure_blob_storage/configs/config.d/storage_conf.xml) or [test_azure_blob_storage_zero_copy_replication](https://github.com/ClickHouse/ClickHouse/blob/master/tests/integration/test_azure_blob_storage_zero_copy_replication/configs/config.d/storage_conf.xml)).

View File

@ -30,7 +30,7 @@ It may lack support for new features.
## Usage {#cli_usage}
The client can be used in interactive and non-interactive (batch) mode.
The client can be used in interactive and non-interactive (batch) mode.
### Gather your connection details
<ConnectionDetails />
@ -177,8 +177,8 @@ You can pass parameters to `clickhouse-client` (all parameters have a default va
- `--user, -u` The username. Default value: default.
- `--password` The password. Default value: empty string.
- `--ask-password` - Prompt the user to enter a password.
- `--query, -q` The query to process when using non-interactive mode. Cannot be used simultaneously with `--queries-file`.
- `--queries-file` file path with queries to execute. Cannot be used simultaneously with `--query`.
- `--query, -q` The query to process when using non-interactive mode. `--query` can be specified multiple times, e.g. `--query "SELECT 1" --query "SELECT 2"`. Cannot be used simultaneously with `--queries-file`.
- `--queries-file` file path with queries to execute. `--queries-file` can be specified multiple times, e.g. `--query queries1.sql --query queries2.sql`. Cannot be used simultaneously with `--query`.
- `--multiquery, -n` If specified, multiple queries separated by semicolons can be listed after the `--query` option. For convenience, it is also possible to omit `--query` and pass the queries directly after `--multiquery`.
- `--multiline, -m` If specified, allow multiline queries (do not send the query on Enter).
- `--database, -d` Select the current default database. Default value: the current database from the server settings (default by default).

View File

@ -18,6 +18,8 @@ $ curl 'http://localhost:8123/'
Ok.
```
Also see: [HTTP response codes caveats](#http_response_codes_caveats).
Sometimes, `curl` command is not available on user operating systems. On Ubuntu or Debian, run `sudo apt install curl`. Please refer this [documentation](https://curl.se/download.html) to install it before running the examples.
Web UI can be accessed here: `http://localhost:8123/play`.
@ -56,7 +58,7 @@ Connection: Close
Content-Type: text/tab-separated-values; charset=UTF-8
X-ClickHouse-Server-Display-Name: clickhouse.ru-central1.internal
X-ClickHouse-Query-Id: 5abe861c-239c-467f-b955-8a201abb8b7f
X-ClickHouse-Summary: {"read_rows":"0","read_bytes":"0","written_rows":"0","written_bytes":"0","total_rows_to_read":"0","peak_memory_usage":"0"}
X-ClickHouse-Summary: {"read_rows":"0","read_bytes":"0","written_rows":"0","written_bytes":"0","total_rows_to_read":"0","elapsed_ns":"662334,"peak_memory_usage":"0"}
1
```
@ -286,9 +288,9 @@ Similarly, you can use ClickHouse sessions in the HTTP protocol. To do this, you
You can receive information about the progress of a query in `X-ClickHouse-Progress` response headers. To do this, enable [send_progress_in_http_headers](../operations/settings/settings.md#settings-send_progress_in_http_headers). Example of the header sequence:
``` text
X-ClickHouse-Progress: {"read_rows":"2752512","read_bytes":"240570816","total_rows_to_read":"8880128","peak_memory_usage":"4371480"}
X-ClickHouse-Progress: {"read_rows":"5439488","read_bytes":"482285394","total_rows_to_read":"8880128","peak_memory_usage":"13621616"}
X-ClickHouse-Progress: {"read_rows":"8783786","read_bytes":"819092887","total_rows_to_read":"8880128","peak_memory_usage":"23155600"}
X-ClickHouse-Progress: {"read_rows":"2752512","read_bytes":"240570816","total_rows_to_read":"8880128","elapsed_ns":"662334","peak_memory_usage":"4371480"}
X-ClickHouse-Progress: {"read_rows":"5439488","read_bytes":"482285394","total_rows_to_read":"8880128","elapsed_ns":"992334","peak_memory_usage":"13621616"}
X-ClickHouse-Progress: {"read_rows":"8783786","read_bytes":"819092887","total_rows_to_read":"8880128","elapsed_ns":"1232334","peak_memory_usage":"23155600"}
```
Possible header fields:
@ -323,6 +325,27 @@ $ curl -sS 'http://localhost:8123/?max_result_bytes=4000000&buffer_size=3000000&
Use buffering to avoid situations where a query processing error occurred after the response code and HTTP headers were sent to the client. In this situation, an error message is written at the end of the response body, and on the client-side, the error can only be detected at the parsing stage.
## HTTP response codes caveats {#http_response_codes_caveats}
Because of limitation of HTTP protocol, HTTP 200 response code does not guarantee that a query was successful.
Here is an example:
```
curl -v -Ss "http://localhost:8123/?max_block_size=1&query=select+sleepEachRow(0.001),throwIf(number=2)from+numbers(5)"
* Trying 127.0.0.1:8123...
...
< HTTP/1.1 200 OK
...
Code: 395. DB::Exception: Value passed to 'throwIf' function is non-zero: while executing 'FUNCTION throwIf(equals(number, 2) :: 1) -> throwIf(equals(number, 2))
```
The reason for this behavior is the nature of the HTTP protocol. The HTTP header is sent first with an HTTP code of 200, followed by the HTTP body, and then the error is injected into the body as plain text.
This behavior is independent of the format used, whether it's `Native`, `TSV`, or `JSON`; the error message will always be in the middle of the response stream.
You can mitigate this problem by enabling `wait_end_of_query=1` ([Response Buffering](#response-buffering)). In this case, the sending of the HTTP header is delayed until the entire query is resolved.
However, this does not completely solve the problem because the result must still fit within the `http_response_buffer_size`, and other settings like `send_progress_in_http_headers` can interfere with the delay of the header.
The only way to catch all errors is to analyze the HTTP body before parsing it using the required format.
### Queries with Parameters {#cli-queries-with-parameters}
You can create a query with parameters and pass values for them from the corresponding HTTP request parameters. For more information, see [Queries with Parameters for CLI](../interfaces/cli.md#cli-queries-with-parameters).
@ -416,7 +439,7 @@ $ curl -v 'http://localhost:8123/predefined_query'
< X-ClickHouse-Format: Template
< X-ClickHouse-Timezone: Asia/Shanghai
< Keep-Alive: timeout=3
< X-ClickHouse-Summary: {"read_rows":"0","read_bytes":"0","written_rows":"0","written_bytes":"0","total_rows_to_read":"0","peak_memory_usage":"0"}
< X-ClickHouse-Summary: {"read_rows":"0","read_bytes":"0","written_rows":"0","written_bytes":"0","total_rows_to_read":"0","elapsed_ns":"662334","peak_memory_usage":"0"}
<
# HELP "Query" "Number of executing queries"
# TYPE "Query" counter
@ -581,7 +604,7 @@ $ curl -vv -H 'XXX:xxx' 'http://localhost:8123/hi'
< Content-Type: text/html; charset=UTF-8
< Transfer-Encoding: chunked
< Keep-Alive: timeout=3
< X-ClickHouse-Summary: {"read_rows":"0","read_bytes":"0","written_rows":"0","written_bytes":"0","total_rows_to_read":"0","peak_memory_usage":"0"}
< X-ClickHouse-Summary: {"read_rows":"0","read_bytes":"0","written_rows":"0","written_bytes":"0","total_rows_to_read":"0","elapsed_ns":"662334","peak_memory_usage":"0"}
<
* Connection #0 to host localhost left intact
Say Hi!%
@ -621,7 +644,7 @@ $ curl -v -H 'XXX:xxx' 'http://localhost:8123/get_config_static_handler'
< Content-Type: text/plain; charset=UTF-8
< Transfer-Encoding: chunked
< Keep-Alive: timeout=3
< X-ClickHouse-Summary: {"read_rows":"0","read_bytes":"0","written_rows":"0","written_bytes":"0","total_rows_to_read":"0","peak_memory_usage":"0"}
< X-ClickHouse-Summary: {"read_rows":"0","read_bytes":"0","written_rows":"0","written_bytes":"0","total_rows_to_read":"0","elapsed_ns":"662334","peak_memory_usage":"0"}
<
* Connection #0 to host localhost left intact
<html ng-app="SMI2"><head><base href="http://ui.tabix.io/"></head><body><div ui-view="" class="content-ui"></div><script src="http://loader.tabix.io/master.js"></script></body></html>%
@ -673,7 +696,7 @@ $ curl -vv -H 'XXX:xxx' 'http://localhost:8123/get_absolute_path_static_handler'
< Content-Type: text/html; charset=UTF-8
< Transfer-Encoding: chunked
< Keep-Alive: timeout=3
< X-ClickHouse-Summary: {"read_rows":"0","read_bytes":"0","written_rows":"0","written_bytes":"0","total_rows_to_read":"0","peak_memory_usage":"0"}
< X-ClickHouse-Summary: {"read_rows":"0","read_bytes":"0","written_rows":"0","written_bytes":"0","total_rows_to_read":"0","elapsed_ns":"662334","peak_memory_usage":"0"}
<
<html><body>Absolute Path File</body></html>
* Connection #0 to host localhost left intact
@ -692,7 +715,7 @@ $ curl -vv -H 'XXX:xxx' 'http://localhost:8123/get_relative_path_static_handler'
< Content-Type: text/html; charset=UTF-8
< Transfer-Encoding: chunked
< Keep-Alive: timeout=3
< X-ClickHouse-Summary: {"read_rows":"0","read_bytes":"0","written_rows":"0","written_bytes":"0","total_rows_to_read":"0","peak_memory_usage":"0"}
< X-ClickHouse-Summary: {"read_rows":"0","read_bytes":"0","written_rows":"0","written_bytes":"0","total_rows_to_read":"0","elapsed_ns":"662334","peak_memory_usage":"0"}
<
<html><body>Relative Path File</body></html>
* Connection #0 to host localhost left intact

View File

@ -11,6 +11,8 @@ ClickHouse runs sampling profiler that allows analyzing query execution. Using p
Query profiler is automatically enabled in ClickHouse Cloud and you can run a sample query as follows
:::note If you are running the following query in ClickHouse Cloud, make sure to change `FROM system.trace_log` to `FROM clusterAllReplicas(default, system.trace_log)` to select from all nodes of the cluster :::
``` sql
SELECT
count(),

View File

@ -71,7 +71,7 @@ Possible values:
- Any positive integer.
Default value: 150.
Default value: 1000.
ClickHouse artificially executes `INSERT` longer (adds sleep) so that the background merge process can merge parts faster than they are added.

View File

@ -4644,6 +4644,14 @@ SELECT toFloat64('1.7091'), toFloat64('1.5008753E7') SETTINGS precise_float_pars
└─────────────────────┴──────────────────────────┘
```
## partial_result_update_duration_ms
Interval (in milliseconds) for sending updates with partial data about the result table to the client (in interactive mode) during query execution. Setting to 0 disables partial results. Only supported for single-threaded GROUP BY without key, ORDER BY, LIMIT and OFFSET.
## max_rows_in_partial_result
Maximum rows to show in the partial result after every real-time update while the query runs (use partial result limit + OFFSET as a value in case of OFFSET in the query).
## validate_tcp_client_information {#validate-tcp-client-information}
Determines whether validation of client information enabled when query packet is received from a client using a TCP connection.

View File

@ -3,12 +3,13 @@ slug: /en/operations/system-tables/information_schema
---
# INFORMATION_SCHEMA
`INFORMATION_SCHEMA` (`information_schema`) is a system database that contains views. Using these views, you can get information about the metadata of database objects. These views read data from the columns of the [system.columns](../../operations/system-tables/columns.md), [system.databases](../../operations/system-tables/databases.md) and [system.tables](../../operations/system-tables/tables.md) system tables.
The structure and composition of system tables may change in different versions of the product, but the support of the `information_schema` makes it possible to change the structure of system tables without changing the method of access to metadata. Metadata requests do not depend on the DBMS used.
`INFORMATION_SCHEMA` (or: `information_schema`) is a system database which provides a (somewhat) standardized, [DBMS-agnostic view](https://en.wikipedia.org/wiki/Information_schema) on metadata of database objects. The views in `INFORMATION_SCHEMA` are generally inferior to normal system tables but tools can use them to obtain basic information in a cross-DBMS manner. The structure and content of views in `INFORMATION_SCHEMA` is supposed to evolves in a backwards-compatible way, i.e. only new functionality is added but existing functionality is not changed or removed. In terms of internal implementation, views in `INFORMATION_SCHEMA` usually map to to normal system tables like [system.columns](../../operations/system-tables/columns.md), [system.databases](../../operations/system-tables/databases.md) and [system.tables](../../operations/system-tables/tables.md).
``` sql
SHOW TABLES FROM INFORMATION_SCHEMA;
-- or:
SHOW TABLES FROM information_schema;
```
``` text
@ -17,6 +18,10 @@ SHOW TABLES FROM INFORMATION_SCHEMA;
│ SCHEMATA │
│ TABLES │
│ VIEWS │
│ columns │
│ schemata │
│ tables │
│ views │
└──────────┘
```
@ -27,6 +32,8 @@ SHOW TABLES FROM INFORMATION_SCHEMA;
- [TABLES](#tables)
- [VIEWS](#views)
Case-insensitive equivalent views, e.g. `INFORMATION_SCHEMA.columns` are provided for reasons of compatibility with other databases.
## COLUMNS {#columns}
Contains columns read from the [system.columns](../../operations/system-tables/columns.md) system table and columns that are not supported in ClickHouse or do not make sense (always `NULL`), but must be by the standard.

View File

@ -1,7 +1,7 @@
---
slug: /en/operations/system-tables/licenses
---
# licenses
# licenses
Contains licenses of third-party libraries that are located in the [contrib](https://github.com/ClickHouse/ClickHouse/tree/master/contrib) directory of ClickHouse sources.
@ -20,21 +20,10 @@ SELECT library_name, license_type, license_path FROM system.licenses LIMIT 15
``` text
┌─library_name───────┬─license_type─┬─license_path────────────────────────┐
│ FastMemcpy │ MIT │ /contrib/FastMemcpy/LICENSE │
│ arrow │ Apache │ /contrib/arrow/LICENSE.txt │
│ avro │ Apache │ /contrib/avro/LICENSE.txt │
│ aws-c-common │ Apache │ /contrib/aws-c-common/LICENSE │
│ aws-c-event-stream │ Apache │ /contrib/aws-c-event-stream/LICENSE │
│ aws-checksums │ Apache │ /contrib/aws-checksums/LICENSE │
│ aws │ Apache │ /contrib/aws/LICENSE.txt │
│ base64 │ BSD 2-clause │ /contrib/base64/LICENSE │
│ boost │ Boost │ /contrib/boost/LICENSE_1_0.txt │
│ base64 │ BSD 2-clause │ /contrib/aklomp-base64/LICENSE │
│ brotli │ MIT │ /contrib/brotli/LICENSE │
│ capnproto │ MIT │ /contrib/capnproto/LICENSE │
│ cassandra │ Apache │ /contrib/cassandra/LICENSE.txt │
│ cctz │ Apache │ /contrib/cctz/LICENSE.txt │
│ cityhash102 │ MIT │ /contrib/cityhash102/COPYING │
│ cppkafka │ BSD 2-clause │ /contrib/cppkafka/LICENSE │
│ [...] │ [...] │ [...] │
└────────────────────┴──────────────┴─────────────────────────────────────┘
```

View File

@ -101,7 +101,8 @@ Columns:
- `ProfileEvents` ([Map(String, UInt64)](../../sql-reference/data-types/map.md)) — ProfileEvents that measure different metrics. The description of them could be found in the table [system.events](../../operations/system-tables/events.md#system_tables-events)
- `Settings` ([Map(String, String)](../../sql-reference/data-types/map.md)) — Settings that were changed when the client ran the query. To enable logging changes to settings, set the `log_query_settings` parameter to 1.
- `log_comment` ([String](../../sql-reference/data-types/string.md)) — Log comment. It can be set to arbitrary string no longer than [max_query_size](../../operations/settings/settings.md#settings-max_query_size). An empty string if it is not defined.
- `thread_ids` ([Array(UInt64)](../../sql-reference/data-types/array.md)) — Thread ids that are participating in query execution.
- `thread_ids` ([Array(UInt64)](../../sql-reference/data-types/array.md)) — Thread ids that are participating in query execution. These threads may not have run simultaneously.
- `peak_threads_usage` ([UInt64)](../../sql-reference/data-types/int-uint.md)) — Maximum count of simultaneous threads executing the query.
- `used_aggregate_functions` ([Array(String)](../../sql-reference/data-types/array.md)) — Canonical names of `aggregate functions`, which were used during query execution.
- `used_aggregate_function_combinators` ([Array(String)](../../sql-reference/data-types/array.md)) — Canonical names of `aggregate functions combinators`, which were used during query execution.
- `used_database_engines` ([Array(String)](../../sql-reference/data-types/array.md)) — Canonical names of `database engines`, which were used during query execution.

View File

@ -0,0 +1,64 @@
---
slug: /en/operations/system-tables/scheduler
---
# scheduler
Contains information and status for [scheduling nodes](/docs/en/operations/workload-scheduling.md/#hierarchy) residing on the local server.
This table can be used for monitoring. The table contains a row for every scheduling node.
Example:
``` sql
SELECT *
FROM system.scheduler
WHERE resource = 'network_read' AND path = '/prio/fair/prod'
FORMAT Vertical
```
``` text
Row 1:
──────
resource: network_read
path: /prio/fair/prod
type: fifo
weight: 5
priority: 0
is_active: 0
active_children: 0
dequeued_requests: 67
dequeued_cost: 4692272
busy_periods: 63
vruntime: 938454.1999999989
system_vruntime: ᴺᵁᴸᴸ
queue_length: 0
queue_cost: 0
budget: -60524
is_satisfied: ᴺᵁᴸᴸ
inflight_requests: ᴺᵁᴸᴸ
inflight_cost: ᴺᵁᴸᴸ
max_requests: ᴺᵁᴸᴸ
max_cost: ᴺᵁᴸᴸ
```
Columns:
- `resource` (`String`) - Resource name
- `path` (`String`) - Path to a scheduling node within this resource scheduling hierarchy
- `type` (`String`) - Type of a scheduling node.
- `weight` (`Float64`) - Weight of a node, used by a parent node of `fair`` type.
- `priority` (`Int64`) - Priority of a node, used by a parent node of 'priority' type (Lower value means higher priority).
- `is_active` (`UInt8`) - Whether this node is currently active - has resource requests to be dequeued and constraints satisfied.
- `active_children` (`UInt64`) - The number of children in active state.
- `dequeued_requests` (`UInt64`) - The total number of resource requests dequeued from this node.
- `dequeued_cost` (`UInt64`) - The sum of costs (e.g. size in bytes) of all requests dequeued from this node.
- `busy_periods` (`UInt64`) - The total number of deactivations of this node.
- `vruntime` (`Nullable(Float64)`) - For children of `fair` nodes only. Virtual runtime of a node used by SFQ algorithm to select the next child to process in a max-min fair manner.
- `system_vruntime` (`Nullable(Float64)`) - For `fair` nodes only. Virtual runtime showing `vruntime` of the last processed resource request. Used during child activation as the new value of `vruntime`.
- `queue_length` (`Nullable(UInt64)`) - For `fifo` nodes only. Current number of resource requests residing in the queue.
- `queue_cost` (`Nullable(UInt64)`) - For `fifo` nodes only. Sum of costs (e.g. size in bytes) of all requests residing in the queue.
- `budget` (`Nullable(Int64)`) - For `fifo` nodes only. The number of available "cost units" for new resource requests. Can appear in case of discrepancy of estimated and real costs of resource requests (e.g. after read/write failure)
- `is_satisfied` (`Nullable(UInt8)`) - For constraint nodes only (e.g. `inflight_limit`). Equals `1` if all the constraint of this node are satisfied.
- `inflight_requests` (`Nullable(Int64)`) - For `inflight_limit` nodes only. The number of resource requests dequeued from this node, that are currently in consumption state.
- `inflight_cost` (`Nullable(Int64)`) - For `inflight_limit` nodes only. The sum of costs (e.g. bytes) of all resource requests dequeued from this node, that are currently in consumption state.
- `max_requests` (`Nullable(Int64)`) - For `inflight_limit` nodes only. Upper limit for `inflight_requests` leading to constraint violation.
- `max_cost` (`Nullable(Int64)`) - For `inflight_limit` nodes only. Upper limit for `inflight_cost` leading to constraint violation.

View File

@ -16,6 +16,8 @@ A client application to interact with clickhouse-keeper by its native protocol.
- `--session-timeout=TIMEOUT` — Set session timeout in seconds. Default value: 10s.
- `--operation-timeout=TIMEOUT` — Set operation timeout in seconds. Default value: 10s.
- `--history-file=FILE_PATH` — Set path of history file. Default value: `~/.keeper-client-history`.
- `--log-level=LEVEL` — Set log level. Default value: `information`.
- `--no-confirmation` — If set, will not require a confirmation on several commands. Default value `false` for interactive and `true` for query
- `--help` — Shows the help message.
## Example {#clickhouse-keeper-client-example}
@ -44,6 +46,7 @@ keeper foo bar
- `ls [path]` -- Lists the nodes for the given path (default: cwd)
- `cd [path]` -- Change the working path (default `.`)
- `exists <path>` -- Returns `1` if node exists, `0` otherwise
- `set <path> <value> [version]` -- Updates the node's value. Only update if version matches (default: -1)
- `create <path> <value> [mode]` -- Creates new node with the set value
- `touch <path>` -- Creates new node with an empty string as value. Doesn't throw an exception if the node already exists
@ -56,3 +59,5 @@ keeper foo bar
- `find_super_nodes <threshold> [path]` -- Finds nodes with number of children larger than some threshold for the given path (default `.`)
- `delete_stale_backups` -- Deletes ClickHouse nodes used for backups that are now inactive
- `find_big_family [path] [n]` -- Returns the top n nodes with the biggest family in the subtree (default path = `.` and n = 10)
- `sync <path>` -- Synchronizes node between processes and leader
- `reconfig <add|remove|set> "<arg>" [version]` -- Reconfigure Keeper cluster. See https://clickhouse.com/docs/en/guides/sre/keeper/clickhouse-keeper#reconfiguration

View File

@ -202,8 +202,8 @@ Arguments:
- `-S`, `--structure` — table structure for input data.
- `--input-format` — input format, `TSV` by default.
- `-f`, `--file` — path to data, `stdin` by default.
- `-q`, `--query` — queries to execute with `;` as delimiter. Cannot be used simultaneously with `--queries-file`.
- `--queries-file` - file path with queries to execute. Cannot be used simultaneously with `--query`.
- `-q`, `--query` — queries to execute with `;` as delimiter. `--query` can be specified multiple times, e.g. `--query "SELECT 1" --query "SELECT 2"`. Cannot be used simultaneously with `--queries-file`.
- `--queries-file` - file path with queries to execute. `--queries-file` can be specified multiple times, e.g. `--query queries1.sql --query queries2.sql`. Cannot be used simultaneously with `--query`.
- `--multiquery, -n` If specified, multiple queries separated by semicolons can be listed after the `--query` option. For convenience, it is also possible to omit `--query` and pass the queries directly after `--multiquery`.
- `-N`, `--table` — table name where to put output data, `table` by default.
- `--format`, `--output-format` — output format, `TSV` by default.

View File

@ -0,0 +1,153 @@
---
slug: /en/operations/workload-scheduling
sidebar_position: 69
sidebar_label: "Workload scheduling"
title: "Workload scheduling"
---
When ClickHouse execute multiple queries simultaneously, they may be using shared resources (e.g. disks). Scheduling constraints and policies can be applied to regulate how resources are utilized and shared between different workloads. For every resource a scheduling hierarchy can be configured. Hierarchy root represents a resource, while leafs are queues, holding requests that exceed resource capacity.
:::note
Currently only remote disk IO can be scheduled using described method. For CPU scheduling see settings about thread pools and [`concurrent_threads_soft_limit_num`](server-configuration-parameters/settings.md#concurrent_threads_soft_limit_num). For flexible memory limits see [Memory overcommit](settings/memory-overcommit.md)
:::
## Disk configuration {#disk-config}
To enable IO scheduling for a specific disk, you have to specify `read_resource` and/or `write_resource` in storage configuration. It says ClickHouse what resource should be used for every read and write requests with given disk. Read and write resource can refer to the same resource name, which is useful for local SSDs or HDDs. Multiple different disks also can refer to the same resource, which is useful for remote disks: if you want to be able to allow fair division of network bandwidth between e.g. "production" and "development" workloads.
Example:
```xml
<clickhouse>
<storage_configuration>
...
<disks>
<s3>
<type>s3</type>
<endpoint>https://clickhouse-public-datasets.s3.amazonaws.com/my-bucket/root-path/</endpoint>
<access_key_id>your_access_key_id</access_key_id>
<secret_access_key>your_secret_access_key</secret_access_key>
<read_resource>network_read</read_resource>
<write_resource>network_write</write_resource>
</s3>
</disks>
<policies>
<s3_main>
<volumes>
<main>
<disk>s3</disk>
</main>
</volumes>
</s3_main>
</policies>
</storage_configuration>
</clickhouse>
```
## Workload markup {#workload_markup}
Queries can be marked with setting `workload` to distinguish different workloads. If `workload` is not set, than value "default" is used. Note that you are able to specify the other value using settings profiles. Setting constraints can be used to make `workload` constant if you want all queries from the user to be marked with fixed value of `workload` setting.
Let's consider an example of a system with two different workloads: "production" and "development".
```sql
SELECT count() FROM my_table WHERE value = 42 SETTINGS workload = 'production'
SELECT count() FROM my_table WHERE value = 13 SETTINGS workload = 'development'
```
## Resource scheduling hierarchy {#hierarchy}
From the standpoint of scheduling subsystem a resource represents a hierarchy of scheduling nodes.
```mermaid
graph TD
subgraph network_read
nr_root(("/"))
-->|100 concurrent requests| nr_fair("fair")
-->|75% bandwidth| nr_prod["prod"]
nr_fair
-->|25% bandwidth| nr_dev["dev"]
end
subgraph network_write
nw_root(("/"))
-->|100 concurrent requests| nw_fair("fair")
-->|75% bandwidth| nw_prod["prod"]
nw_fair
-->|25% bandwidth| nw_dev["dev"]
end
```
**Possible node types:**
* `inflight_limit` (constraint) - blocks if either number of concurrent in-flight requests exceeds `max_requests`, or their total cost exceeds `max_cost`; must have a single child.
* `fair` (policy) - selects the next request to serve from one of its children nodes according to max-min fairness; children nodes can specify `weight` (default is 1).
* `priority` (policy) - selects the next request to serve from one of its children nodes according to static priorities (lower value means higher priority); children nodes can specify `priority` (default is 0).
* `fifo` (queue) - leaf of the hierarchy capable of holding requests that exceed resource capacity.
The following example shows how to define IO scheduling hierarchies shown in the picture:
```xml
<clickhouse>
<resources>
<network_read>
<node path="/">
<type>inflight_limit</type>
<max_requests>100</max_requests>
</node>
<node path="/fair">
<type>fair</type>
</node>
<node path="/fair/prod">
<type>fifo</type>
<weight>3</weight>
</node>
<node path="/fair/dev">
<type>fifo</type>
</node>
</network_read>
<network_write>
<node path="/">
<type>inflight_limit</type>
<max_requests>100</max_requests>
</node>
<node path="/fair">
<type>fair</type>
</node>
<node path="/fair/prod">
<type>fifo</type>
<weight>3</weight>
</node>
<node path="/fair/dev">
<type>fifo</type>
</node>
</network_write>
</resources>
</clickhouse>
```
## Workload classifiers {#workload_classifiers}
Workload classifiers are used to define mapping from `workload` specified by a query into leaf-queues that should be used for specific resources. At the moment, workload classification is simple: only static mapping is available.
Example:
```xml
<clickhouse>
<workload_classifiers>
<production>
<network_read>/fair/prod</network_read>
<network_write>/fair/prod</network_write>
</production>
<development>
<network_read>/fair/dev</network_read>
<network_write>/fair/dev</network_write>
</development>
<default>
<network_read>/fair/dev</network_read>
<network_write>/fair/dev</network_write>
</default>
</workload_classifiers>
</clickhouse>
```
## See also
- [system.scheduler](/docs/en/operations/system-tables/scheduler.md)

View File

@ -7,6 +7,10 @@ sidebar_position: 30
The result is equal to the square root of [varPop](../../../sql-reference/aggregate-functions/reference/varpop.md).
:::note
Alias:
- `STD`
- `STDDEV_POP`
:::note
This function uses a numerically unstable algorithm. If you need [numerical stability](https://en.wikipedia.org/wiki/Numerical_stability) in calculations, use the `stddevPopStable` function. It works slower but provides a lower computational error.
:::

View File

@ -7,6 +7,8 @@ sidebar_position: 31
The result is equal to the square root of [varSamp](../../../sql-reference/aggregate-functions/reference/varsamp.md).
:::note
Alias: `STDDEV_SAMP`.
:::note
This function uses a numerically unstable algorithm. If you need [numerical stability](https://en.wikipedia.org/wiki/Numerical_stability) in calculations, use the `stddevSampStable` function. It works slower but provides a lower computational error.
:::

View File

@ -9,6 +9,8 @@ Calculates the amount `Σ((x - x̅)^2) / n`, where `n` is the sample size and `x
In other words, dispersion for a set of values. Returns `Float64`.
:::note
Alias: `VAR_POP`.
:::note
This function uses a numerically unstable algorithm. If you need [numerical stability](https://en.wikipedia.org/wiki/Numerical_stability) in calculations, use the `varPopStable` function. It works slower but provides a lower computational error.
:::

View File

@ -11,6 +11,8 @@ It represents an unbiased estimate of the variance of a random variable if passe
Returns `Float64`. When `n <= 1`, returns `+∞`.
:::note
Alias: `VAR_SAMP`.
:::note
This function uses a numerically unstable algorithm. If you need [numerical stability](https://en.wikipedia.org/wiki/Numerical_stability) in calculations, use the `varSampStable` function. It works slower but provides a lower computational error.
:::

View File

@ -2361,6 +2361,12 @@ Result:
└────────────────────────────────────────┴───────────────────────────────────────────────────────────────────────────────────────┘
```
#### Matching Modes
Pattern matching behavior can be modified with certain dictionary settings:
- `regexp_dict_flag_case_insensitive`: Use case-insensitive matching (defaults to `false`). Can be overridden in individual expressions with `(?i)` and `(?-i)`.
- `regexp_dict_flag_dotall`: Allow '.' to match newline characters (defaults to `false`).
### Use Regular Expression Tree Dictionary in ClickHouse Cloud
Above used `YAMLRegExpTree` source works in ClickHouse Open Source but not in ClickHouse Cloud. To use regexp tree dictionaries in ClickHouse could, first create a regexp tree dictionary from a YAML file locally in ClickHouse Open Source, then dump this dictionary into a CSV file using the `dictionary` table function and the [INTO OUTFILE](../statements/select/into-outfile.md) clause.

View File

@ -237,6 +237,11 @@ type_samoa: DateTime('US/Samoa')
int32samoa: 1546300800
```
**See Also**
- [formatDateTime](#date_time_functions-formatDateTime) - supports non-constant timezone.
- [toString](type-conversion-functions.md#tostring) - supports non-constant timezone.
## timeZoneOf
Returns the timezone name of [DateTime](../../sql-reference/data-types/datetime.md) or [DateTime64](../../sql-reference/data-types/datetime64.md) data types.
@ -720,6 +725,42 @@ SELECT toDate('2016-12-27') AS date, toYearWeek(date) AS yearWeek0, toYearWeek(d
└────────────┴───────────┴───────────┴───────────┴───────────────┘
```
## toDaysSinceYearZero
Returns for a given date, the number of days passed since [1 January 0000](https://en.wikipedia.org/wiki/Year_zero) in the [proleptic Gregorian calendar defined by ISO 8601](https://en.wikipedia.org/wiki/Gregorian_calendar#Proleptic_Gregorian_calendar). The calculation is the same as in MySQL's [`TO_DAYS()`](https://dev.mysql.com/doc/refman/8.0/en/date-and-time-functions.html#function_to-days) function.
**Syntax**
``` sql
toDaysSinceYearZero(date)
```
Aliases: `TO_DAYS`
**Arguments**
- `date` — The date to calculate the number of days passed since year zero from. [Date](../../sql-reference/data-types/date.md) or [Date32](../../sql-reference/data-types/date32.md).
**Returned value**
The number of days passed since date 0000-01-01.
Type: [UInt32](../../sql-reference/data-types/int-uint.md).
**Example**
``` sql
SELECT toDaysSinceYearZero(toDate('2023-09-08'));
```
Result:
``` text
┌─toDaysSinceYearZero(toDate('2023-09-08')))─┐
│ 713569 │
└────────────────────────────────────────────┘
```
## age
Returns the `unit` component of the difference between `startdate` and `enddate`. The difference is calculated using a precision of 1 microsecond.
@ -1494,6 +1535,33 @@ Result:
└─────────────────────────────────────────────────────────────────────┘
```
Additionally, the `formatDateTime` function can take a third String argument containing the name of the time zone. Example: `Asia/Istanbul`. In this case, the time is formatted according to the specified time zone.
**Example**
```sql
SELECT
now() AS ts,
time_zone,
formatDateTime(ts, '%T', time_zone) AS str_tz_time
FROM system.time_zones
WHERE time_zone LIKE 'Europe%'
LIMIT 10
┌──────────────────ts─┬─time_zone─────────┬─str_tz_time─┐
│ 2023-09-08 19:13:40 │ Europe/Amsterdam │ 21:13:40 │
│ 2023-09-08 19:13:40 │ Europe/Andorra │ 21:13:40 │
│ 2023-09-08 19:13:40 │ Europe/Astrakhan │ 23:13:40 │
│ 2023-09-08 19:13:40 │ Europe/Athens │ 22:13:40 │
│ 2023-09-08 19:13:40 │ Europe/Belfast │ 20:13:40 │
│ 2023-09-08 19:13:40 │ Europe/Belgrade │ 21:13:40 │
│ 2023-09-08 19:13:40 │ Europe/Berlin │ 21:13:40 │
│ 2023-09-08 19:13:40 │ Europe/Bratislava │ 21:13:40 │
│ 2023-09-08 19:13:40 │ Europe/Brussels │ 21:13:40 │
│ 2023-09-08 19:13:40 │ Europe/Bucharest │ 22:13:40 │
└─────────────────────┴───────────────────┴─────────────┘
```
**See Also**
- [formatDateTimeInJodaSyntax](##formatDateTimeInJodaSyntax)

View File

@ -204,7 +204,7 @@ Other possible results:
Query:
```sql
SELECT detectLanguageMixed('Je pense que je ne parviendrai jamais à parler français comme un natif. Where theres a will, theres a way.');
SELECT detectLanguage('Je pense que je ne parviendrai jamais à parler français comme un natif. Where theres a will, theres a way.');
```
Result:

View File

@ -33,6 +33,13 @@ Returns an array of selected substrings. Empty substrings may be selected when:
Type: [Array](../../sql-reference/data-types/array.md)([String](../../sql-reference/data-types/string.md)).
:::note
The behavior of parameter `max_substrings` changed starting with ClickHouse v22.11. In versions older than that, `max_substrings` > 0 meant that `max_substring`-many splits were performed and that the remainder of the string was returned as the final element of the list.
For example,
- in v22.10: `SELECT splitByChar('=', 'a=b=c=d', 2); -- ['a','b','c=d']`
- in v22.11: `SELECT splitByChar('=', 'a=b=c=d', 2); -- ['a','b']`
:::
**Example**
``` sql
@ -63,7 +70,6 @@ splitByString(separator, s[, max_substrings]))
- `s` — The string to split. [String](../../sql-reference/data-types/string.md).
- `max_substrings` — An optional `Int64` defaulting to 0. When `max_substrings` > 0, the returned substrings will be no more than `max_substrings`, otherwise the function will return as many substrings as possible.
**Returned value(s)**
Returns an array of selected substrings. Empty substrings may be selected when:

View File

@ -892,16 +892,29 @@ Query:
``` sql
SELECT
now() AS now_local,
toString(now(), 'Asia/Yekaterinburg') AS now_yekat;
now() AS ts,
time_zone,
toString(ts, time_zone) AS str_tz_datetime
FROM system.time_zones
WHERE time_zone LIKE 'Europe%'
LIMIT 10
```
Result:
```response
┌───────────now_local─┬─now_yekat───────────┐
│ 2016-06-15 00:11:21 │ 2016-06-15 02:11:21 │
└─────────────────────┴─────────────────────┘
┌──────────────────ts─┬─time_zone─────────┬─str_tz_datetime─────┐
│ 2023-09-08 19:14:59 │ Europe/Amsterdam │ 2023-09-08 21:14:59 │
│ 2023-09-08 19:14:59 │ Europe/Andorra │ 2023-09-08 21:14:59 │
│ 2023-09-08 19:14:59 │ Europe/Astrakhan │ 2023-09-08 23:14:59 │
│ 2023-09-08 19:14:59 │ Europe/Athens │ 2023-09-08 22:14:59 │
│ 2023-09-08 19:14:59 │ Europe/Belfast │ 2023-09-08 20:14:59 │
│ 2023-09-08 19:14:59 │ Europe/Belgrade │ 2023-09-08 21:14:59 │
│ 2023-09-08 19:14:59 │ Europe/Berlin │ 2023-09-08 21:14:59 │
│ 2023-09-08 19:14:59 │ Europe/Bratislava │ 2023-09-08 21:14:59 │
│ 2023-09-08 19:14:59 │ Europe/Brussels │ 2023-09-08 21:14:59 │
│ 2023-09-08 19:14:59 │ Europe/Bucharest │ 2023-09-08 22:14:59 │
└─────────────────────┴───────────────────┴─────────────────────┘
```
Also see the `toUnixTimestamp` function.

View File

@ -57,3 +57,9 @@ Output of a removed comment:
│ │
└─────────┘
```
**Caveats**
For Replicated tables, the comment can be different on different replicas. Modifying the comment applies to a single replica.
The feature is available since version 23.9. It does not work in previous ClickHouse versions.

View File

@ -391,19 +391,19 @@ DEFLATE_QPL is not available in ClickHouse Cloud.
### Specialized Codecs
These codecs are designed to make compression more effective by using specific features of data. Some of these codecs do not compress data themself. Instead, they prepare the data for a common purpose codec, which compresses it better than without this preparation.
These codecs are designed to make compression more effective by exploiting specific features of the data. Some of these codecs do not compress data themself, they instead preprocess the data such that a second compression stage using a general-purpose codec can achieve a higher data compression rate.
#### Delta
`Delta(delta_bytes)` — Compression approach in which raw values are replaced by the difference of two neighboring values, except for the first value that stays unchanged. Up to `delta_bytes` are used for storing delta values, so `delta_bytes` is the maximum size of raw values. Possible `delta_bytes` values: 1, 2, 4, 8. The default value for `delta_bytes` is `sizeof(type)` if equal to 1, 2, 4, or 8. In all other cases, its 1. Delta is a data preparation codec, i.e. cannot be used stand-alone.
`Delta(delta_bytes)` — Compression approach in which raw values are replaced by the difference of two neighboring values, except for the first value that stays unchanged. Up to `delta_bytes` are used for storing delta values, so `delta_bytes` is the maximum size of raw values. Possible `delta_bytes` values: 1, 2, 4, 8. The default value for `delta_bytes` is `sizeof(type)` if equal to 1, 2, 4, or 8. In all other cases, its 1. Delta is a data preparation codec, i.e. it cannot be used stand-alone.
#### DoubleDelta
`DoubleDelta(bytes_size)` — Calculates delta of deltas and writes it in compact binary form. Possible `bytes_size` values: 1, 2, 4, 8, the default value is `sizeof(type)` if equal to 1, 2, 4, or 8. In all other cases, its 1. Optimal compression rates are achieved for monotonic sequences with a constant stride, such as time series data. Can be used with any fixed-width type. Implements the algorithm used in Gorilla TSDB, extending it to support 64-bit types. Uses 1 extra bit for 32-bit deltas: 5-bit prefixes instead of 4-bit prefixes. For additional information, see Compressing Time Stamps in [Gorilla: A Fast, Scalable, In-Memory Time Series Database](http://www.vldb.org/pvldb/vol8/p1816-teller.pdf). DoubleDelta is a data preparation codec, i.e. cannot be used stand-alone.
`DoubleDelta(bytes_size)` — Calculates delta of deltas and writes it in compact binary form. Possible `bytes_size` values: 1, 2, 4, 8, the default value is `sizeof(type)` if equal to 1, 2, 4, or 8. In all other cases, its 1. Optimal compression rates are achieved for monotonic sequences with a constant stride, such as time series data. Can be used with any fixed-width type. Implements the algorithm used in Gorilla TSDB, extending it to support 64-bit types. Uses 1 extra bit for 32-bit deltas: 5-bit prefixes instead of 4-bit prefixes. For additional information, see Compressing Time Stamps in [Gorilla: A Fast, Scalable, In-Memory Time Series Database](http://www.vldb.org/pvldb/vol8/p1816-teller.pdf). DoubleDelta is a data preparation codec, i.e. it cannot be used stand-alone.
#### GCD
`GCD()` - - Calculates the greatest common denominator (GCD) of the values in the column, then divides each value by the GCD. Can be used with integer, decimal and date/time columns. A viable use case are timestamps or monetary values with high precision. GCD is a data preparation codec, i.e. cannot be used stand-alone.
`GCD()` - - Calculates the greatest common denominator (GCD) of the values in the column, then divides each value by the GCD. Can be used with integer, decimal and date/time columns. The codec is well suited for columns with values that change (increase or decrease) in multiples of the GCD, e.g. 24, 28, 16, 24, 8, 24 (GCD = 4). GCD is a data preparation codec, i.e. it cannot be used stand-alone.
#### Gorilla

View File

@ -638,3 +638,16 @@ Outputs the content of the [system.table_engines](../../operations/system-tables
**See Also**
- [system.table_engines](../../operations/system-tables/table_engines.md) table
## SHOW FUNCTIONS
``` sql
SHOW FUNCTIONS [LIKE | ILIKE '<pattern>']
```
Outputs the content of the [system.functions](../../operations/system-tables/functions.md) table.
If either `LIKE` or `ILIKE` clause is specified, the query returns a list of system functions whose names match the provided `<pattern>`.
**See Also**
- [system.functions](../../operations/system-tables/functions.md) table

View File

@ -128,7 +128,7 @@ $ clickhouse-client --param_tbl="numbers" --param_db="system" --param_col="numbe
- `--port` — порт для подключения, по умолчанию — 9000. Обратите внимание: для HTTP-интерфейса и нативного интерфейса используются разные порты.
- `--user, -u` — имя пользователя, по умолчанию — default.
- `--password` — пароль, по умолчанию — пустая строка.
- `--query, -q` — запрос для выполнения, при использовании в неинтерактивном режиме.
- `--query, -q` — запрос для выполнения, при использовании в неинтерактивном режиме. Допускается указание `--query` несколько раз (`--query "SELECT 1;" --query "SELECT 2;"...`).
- `--queries-file` - путь к файлу с запросами для выполнения. Необходимо указать только одну из опций: `query` или `queries-file`.
- `--database, -d` — выбрать текущую БД. Без указания значение берется из настроек сервера (по умолчанию — БД default).
- `--multiline, -m` — если указано — разрешить многострочные запросы, не отправлять запрос по нажатию Enter.

View File

@ -50,7 +50,7 @@ Connection: Close
Content-Type: text/tab-separated-values; charset=UTF-8
X-ClickHouse-Server-Display-Name: clickhouse.ru-central1.internal
X-ClickHouse-Query-Id: 5abe861c-239c-467f-b955-8a201abb8b7f
X-ClickHouse-Summary: {"read_rows":"0","read_bytes":"0","written_rows":"0","written_bytes":"0","total_rows_to_read":"0","peak_memory_usage":"0"}
X-ClickHouse-Summary: {"read_rows":"0","read_bytes":"0","written_rows":"0","written_bytes":"0","total_rows_to_read":"0","elapsed_ns":"662334","peak_memory_usage":"0"}
1
```
@ -267,9 +267,9 @@ $ echo 'SELECT number FROM system.numbers LIMIT 10' | curl 'http://localhost:812
Прогресс выполнения запроса можно отслеживать с помощью заголовков ответа `X-ClickHouse-Progress`. Для этого включите [send_progress_in_http_headers](../operations/settings/settings.md#settings-send_progress_in_http_headers). Пример последовательности заголовков:
``` text
X-ClickHouse-Progress: {"read_rows":"2752512","read_bytes":"240570816","total_rows_to_read":"8880128","peak_memory_usage":"4371480"}
X-ClickHouse-Progress: {"read_rows":"5439488","read_bytes":"482285394","total_rows_to_read":"8880128","peak_memory_usage":"13621616"}
X-ClickHouse-Progress: {"read_rows":"8783786","read_bytes":"819092887","total_rows_to_read":"8880128","peak_memory_usage":"23155600"}
X-ClickHouse-Progress: {"read_rows":"2752512","read_bytes":"240570816","total_rows_to_read":"8880128","elapsed_ns":"662334","peak_memory_usage":"4371480"}
X-ClickHouse-Progress: {"read_rows":"5439488","read_bytes":"482285394","total_rows_to_read":"8880128","elapsed_ns":"992334","peak_memory_usage":"13621616"}
X-ClickHouse-Progress: {"read_rows":"8783786","read_bytes":"819092887","total_rows_to_read":"8880128","elapsed_ns":"1232334","peak_memory_usage":"23155600"}
```
Возможные поля заголовка:
@ -530,7 +530,7 @@ $ curl -vv -H 'XXX:xxx' 'http://localhost:8123/hi'
< Content-Type: text/html; charset=UTF-8
< Transfer-Encoding: chunked
< Keep-Alive: timeout=3
< X-ClickHouse-Summary: {"read_rows":"0","read_bytes":"0","written_rows":"0","written_bytes":"0","total_rows_to_read":"0","peak_memory_usage":"0"}
< X-ClickHouse-Summary: {"read_rows":"0","read_bytes":"0","written_rows":"0","written_bytes":"0","total_rows_to_read":"0","elapsed_ns":"662334","peak_memory_usage":"0"}
<
* Connection #0 to host localhost left intact
Say Hi!%
@ -570,7 +570,7 @@ $ curl -v -H 'XXX:xxx' 'http://localhost:8123/get_config_static_handler'
< Content-Type: text/plain; charset=UTF-8
< Transfer-Encoding: chunked
< Keep-Alive: timeout=3
< X-ClickHouse-Summary: {"read_rows":"0","read_bytes":"0","written_rows":"0","written_bytes":"0","total_rows_to_read":"0","peak_memory_usage":"0"}
< X-ClickHouse-Summary: {"read_rows":"0","read_bytes":"0","written_rows":"0","written_bytes":"0","total_rows_to_read":"0","elapsed_ns":"662334","peak_memory_usage":"0"}
<
* Connection #0 to host localhost left intact
<html ng-app="SMI2"><head><base href="http://ui.tabix.io/"></head><body><div ui-view="" class="content-ui"></div><script src="http://loader.tabix.io/master.js"></script></body></html>%
@ -622,7 +622,7 @@ $ curl -vv -H 'XXX:xxx' 'http://localhost:8123/get_absolute_path_static_handler'
< Content-Type: text/html; charset=UTF-8
< Transfer-Encoding: chunked
< Keep-Alive: timeout=3
< X-ClickHouse-Summary: {"read_rows":"0","read_bytes":"0","written_rows":"0","written_bytes":"0","total_rows_to_read":"0","peak_memory_usage":"0"}
< X-ClickHouse-Summary: {"read_rows":"0","read_bytes":"0","written_rows":"0","written_bytes":"0","total_rows_to_read":"0","elapsed_ns":"662334","peak_memory_usage":"0"}
<
<html><body>Absolute Path File</body></html>
* Connection #0 to host localhost left intact
@ -641,7 +641,7 @@ $ curl -vv -H 'XXX:xxx' 'http://localhost:8123/get_relative_path_static_handler'
< Content-Type: text/html; charset=UTF-8
< Transfer-Encoding: chunked
< Keep-Alive: timeout=3
< X-ClickHouse-Summary: {"read_rows":"0","read_bytes":"0","written_rows":"0","written_bytes":"0","total_rows_to_read":"0","peak_memory_usage":"0"}
< X-ClickHouse-Summary: {"read_rows":"0","read_bytes":"0","written_rows":"0","written_bytes":"0","total_rows_to_read":"0","elapsed_ns":"662334","peak_memory_usage":"0"}
<
<html><body>Relative Path File</body></html>
* Connection #0 to host localhost left intact

View File

@ -20,21 +20,10 @@ SELECT library_name, license_type, license_path FROM system.licenses LIMIT 15
``` text
┌─library_name───────┬─license_type─┬─license_path────────────────────────┐
│ FastMemcpy │ MIT │ /contrib/FastMemcpy/LICENSE │
│ arrow │ Apache │ /contrib/arrow/LICENSE.txt │
│ avro │ Apache │ /contrib/avro/LICENSE.txt │
│ aws-c-common │ Apache │ /contrib/aws-c-common/LICENSE │
│ aws-c-event-stream │ Apache │ /contrib/aws-c-event-stream/LICENSE │
│ aws-checksums │ Apache │ /contrib/aws-checksums/LICENSE │
│ aws │ Apache │ /contrib/aws/LICENSE.txt │
│ base64 │ BSD 2-clause │ /contrib/base64/LICENSE │
│ boost │ Boost │ /contrib/boost/LICENSE_1_0.txt │
│ base64 │ BSD 2-clause │ /contrib/aklomp-base64/LICENSE │
│ brotli │ MIT │ /contrib/brotli/LICENSE │
│ capnproto │ MIT │ /contrib/capnproto/LICENSE │
│ cassandra │ Apache │ /contrib/cassandra/LICENSE.txt │
│ cctz │ Apache │ /contrib/cctz/LICENSE.txt │
│ cityhash102 │ MIT │ /contrib/cityhash102/COPYING │
│ cppkafka │ BSD 2-clause │ /contrib/cppkafka/LICENSE │
│ [...] │ [...] │ [...] │
└────────────────────┴──────────────┴─────────────────────────────────────┘
```

View File

@ -99,7 +99,8 @@ ClickHouse не удаляет данные из таблица автомати
- `ProfileEvents` ([Map(String, UInt64)](../../sql-reference/data-types/array.md)) — счетчики для изменения различных метрик. Описание метрик можно получить из таблицы [system.events](#system_tables-events)(#system_tables-events
- `Settings` ([Map(String, String)](../../sql-reference/data-types/array.md)) — имена настроек, которые меняются, когда клиент выполняет запрос. Чтобы разрешить логирование изменений настроек, установите параметр `log_query_settings` равным 1.
- `log_comment` ([String](../../sql-reference/data-types/string.md)) — комментарий к записи в логе. Представляет собой произвольную строку, длина которой должна быть не больше, чем [max_query_size](../../operations/settings/settings.md#settings-max_query_size). Если нет комментария, то пустая строка.
- `thread_ids` ([Array(UInt64)](../../sql-reference/data-types/array.md)) — идентификаторы потоков, участвующих в обработке запросов.
- `thread_ids` ([Array(UInt64)](../../sql-reference/data-types/array.md)) — идентификаторы потоков, участвующих в обработке запросов, эти потоки не обязательно выполняются одновременно.
- `peak_threads_usage` ([UInt64)](../../sql-reference/data-types/int-uint.md)) — максимальное количество одновременно работавших потоков, участвоваших в обработке запроса.
- `used_aggregate_functions` ([Array(String)](../../sql-reference/data-types/array.md)) — канонические имена `агрегатных функций`, использованных при выполнении запроса.
- `used_aggregate_function_combinators` ([Array(String)](../../sql-reference/data-types/array.md)) — канонические имена `комбинаторов агрегатных функций`, использованных при выполнении запроса.
- `used_database_engines` ([Array(String)](../../sql-reference/data-types/array.md)) — канонические имена `движков баз данных`, использованных при выполнении запроса.

View File

@ -116,7 +116,7 @@ $ clickhouse-client --param_tuple_in_tuple="(10, ('dt', 10))" -q "SELECT * FROM
- `--port` 连接的端口默认值9000。注意HTTP接口以及TCP原生接口使用的是不同端口。
- `--user, -u` 用户名。 默认值:`default`。
- `--password` 密码。 默认值:空字符串。
- `--query, -q` 使用非交互模式查询。
- `--query, -q` 使用非交互模式查询。 允许多次指定 `--query``--query "SELECT 1;" --query "SELECT 2;"...`)。
- `--database, -d` 默认当前操作的数据库. 默认值:服务端默认的配置(默认是`default`)。
- `--multiline, -m` 如果指定允许多行语句查询Enter仅代表换行不代表查询语句完结
- `--multiquery, -n` 如果指定, 允许处理用`;`号分隔的多个查询,只在非交互模式下生效。

View File

@ -53,7 +53,7 @@ Connection: Close
Content-Type: text/tab-separated-values; charset=UTF-8
X-ClickHouse-Server-Display-Name: clickhouse.ru-central1.internal
X-ClickHouse-Query-Id: 5abe861c-239c-467f-b955-8a201abb8b7f
X-ClickHouse-Summary: {"read_rows":"0","read_bytes":"0","written_rows":"0","written_bytes":"0","total_rows_to_read":"0","peak_memory_usage":"0"}
X-ClickHouse-Summary: {"read_rows":"0","read_bytes":"0","written_rows":"0","written_bytes":"0","total_rows_to_read":"0","elapsed_ns":"662334","peak_memory_usage":"0"}
1
```
@ -262,9 +262,9 @@ $ echo 'SELECT number FROM system.numbers LIMIT 10' | curl 'http://localhost:812
您可以在`X-ClickHouse-Progress`响应头中收到查询进度的信息。为此,启用[Http Header携带进度](../operations/settings/settings.md#settings-send_progress_in_http_headers)。示例:
``` text
X-ClickHouse-Progress: {"read_rows":"2752512","read_bytes":"240570816","total_rows_to_read":"8880128","peak_memory_usage":"4371480"}
X-ClickHouse-Progress: {"read_rows":"5439488","read_bytes":"482285394","total_rows_to_read":"8880128","peak_memory_usage":"13621616"}
X-ClickHouse-Progress: {"read_rows":"8783786","read_bytes":"819092887","total_rows_to_read":"8880128","peak_memory_usage":"23155600"}
X-ClickHouse-Progress: {"read_rows":"2752512","read_bytes":"240570816","total_rows_to_read":"8880128","elapsed_ns":"662334","peak_memory_usage":"4371480"}
X-ClickHouse-Progress: {"read_rows":"5439488","read_bytes":"482285394","total_rows_to_read":"8880128","elapsed_ns":"992334","peak_memory_usage":"13621616"}
X-ClickHouse-Progress: {"read_rows":"8783786","read_bytes":"819092887","total_rows_to_read":"8880128","elapsed_ns":"1232334","peak_memory_usage":"23155600"}
```
显示字段信息:
@ -363,7 +363,7 @@ $ curl -v 'http://localhost:8123/predefined_query'
< X-ClickHouse-Format: Template
< X-ClickHouse-Timezone: Asia/Shanghai
< Keep-Alive: timeout=3
< X-ClickHouse-Summary: {"read_rows":"0","read_bytes":"0","written_rows":"0","written_bytes":"0","total_rows_to_read":"0","peak_memory_usage":"0"}
< X-ClickHouse-Summary: {"read_rows":"0","read_bytes":"0","written_rows":"0","written_bytes":"0","total_rows_to_read":"0","elapsed_ns":"662334","peak_memory_usage":"0"}
<
# HELP "Query" "Number of executing queries"
# TYPE "Query" counter
@ -521,7 +521,7 @@ $ curl -vv -H 'XXX:xxx' 'http://localhost:8123/hi'
< Content-Type: text/html; charset=UTF-8
< Transfer-Encoding: chunked
< Keep-Alive: timeout=3
< X-ClickHouse-Summary: {"read_rows":"0","read_bytes":"0","written_rows":"0","written_bytes":"0","total_rows_to_read":"0","peak_memory_usage":"0"}
< X-ClickHouse-Summary: {"read_rows":"0","read_bytes":"0","written_rows":"0","written_bytes":"0","total_rows_to_read":"0","elapsed_ns":"662334","peak_memory_usage":"0"}
<
* Connection #0 to host localhost left intact
Say Hi!%
@ -561,7 +561,7 @@ $ curl -v -H 'XXX:xxx' 'http://localhost:8123/get_config_static_handler'
< Content-Type: text/plain; charset=UTF-8
< Transfer-Encoding: chunked
< Keep-Alive: timeout=3
< X-ClickHouse-Summary: {"read_rows":"0","read_bytes":"0","written_rows":"0","written_bytes":"0","total_rows_to_read":"0","peak_memory_usage":"0"}
< X-ClickHouse-Summary: {"read_rows":"0","read_bytes":"0","written_rows":"0","written_bytes":"0","total_rows_to_read":"0","elapsed_ns":"662334","peak_memory_usage":"0"}
<
* Connection #0 to host localhost left intact
<html ng-app="SMI2"><head><base href="http://ui.tabix.io/"></head><body><div ui-view="" class="content-ui"></div><script src="http://loader.tabix.io/master.js"></script></body></html>%
@ -613,7 +613,7 @@ $ curl -vv -H 'XXX:xxx' 'http://localhost:8123/get_absolute_path_static_handler'
< Content-Type: text/html; charset=UTF-8
< Transfer-Encoding: chunked
< Keep-Alive: timeout=3
< X-ClickHouse-Summary: {"read_rows":"0","read_bytes":"0","written_rows":"0","written_bytes":"0","total_rows_to_read":"0","peak_memory_usage":"0"}
< X-ClickHouse-Summary: {"read_rows":"0","read_bytes":"0","written_rows":"0","written_bytes":"0","total_rows_to_read":"0","elapsed_ns":"662334","peak_memory_usage":"0"}
<
<html><body>Absolute Path File</body></html>
* Connection #0 to host localhost left intact
@ -632,7 +632,7 @@ $ curl -vv -H 'XXX:xxx' 'http://localhost:8123/get_relative_path_static_handler'
< Content-Type: text/html; charset=UTF-8
< Transfer-Encoding: chunked
< Keep-Alive: timeout=3
< X-ClickHouse-Summary: {"read_rows":"0","read_bytes":"0","written_rows":"0","written_bytes":"0","total_rows_to_read":"0","peak_memory_usage":"0"}
< X-ClickHouse-Summary: {"read_rows":"0","read_bytes":"0","written_rows":"0","written_bytes":"0","total_rows_to_read":"0","elapsed_ns":"662334","peak_memory_usage":"0"}
<
<html><body>Relative Path File</body></html>
* Connection #0 to host localhost left intact

View File

@ -20,21 +20,9 @@ SELECT library_name, license_type, license_path FROM system.licenses LIMIT 15
``` text
┌─library_name───────┬─license_type─┬─license_path────────────────────────┐
│ FastMemcpy │ MIT │ /contrib/FastMemcpy/LICENSE │
│ arrow │ Apache │ /contrib/arrow/LICENSE.txt │
│ avro │ Apache │ /contrib/avro/LICENSE.txt │
│ aws-c-common │ Apache │ /contrib/aws-c-common/LICENSE │
│ aws-c-event-stream │ Apache │ /contrib/aws-c-event-stream/LICENSE │
│ aws-checksums │ Apache │ /contrib/aws-checksums/LICENSE │
│ aws │ Apache │ /contrib/aws/LICENSE.txt │
│ base64 │ BSD 2-clause │ /contrib/base64/LICENSE │
│ boost │ Boost │ /contrib/boost/LICENSE_1_0.txt │
│ base64 │ BSD 2-clause │ /contrib/aklomp-base64/LICENSE │
│ brotli │ MIT │ /contrib/brotli/LICENSE │
│ capnproto │ MIT │ /contrib/capnproto/LICENSE │
│ cassandra │ Apache │ /contrib/cassandra/LICENSE.txt │
│ cctz │ Apache │ /contrib/cctz/LICENSE.txt │
│ cityhash102 │ MIT │ /contrib/cityhash102/COPYING │
│ cppkafka │ BSD 2-clause │ /contrib/cppkafka/LICENSE │
│ [...] │ [...] │ [...] │
└────────────────────┴──────────────┴─────────────────────────────────────┘
```

View File

@ -1189,7 +1189,7 @@ void Client::processOptions(const OptionsDescription & options_description,
void Client::processConfig()
{
if (config().has("query") && config().has("queries-file"))
if (!queries.empty() && config().has("queries-file"))
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Options '--query' and '--queries-file' cannot be specified at the same time");
/// Batch mode is enabled if one of the following is true:
@ -1200,9 +1200,9 @@ void Client::processConfig()
/// - --queries-file command line option is present.
/// The value of the option is used as file with query (or of multiple queries) to execute.
delayed_interactive = config().has("interactive") && (config().has("query") || config().has("queries-file"));
delayed_interactive = config().has("interactive") && (!queries.empty() || config().has("queries-file"));
if (stdin_is_a_tty
&& (delayed_interactive || (!config().has("query") && queries_files.empty())))
&& (delayed_interactive || (queries.empty() && queries_files.empty())))
{
is_interactive = true;
}

View File

@ -9,11 +9,11 @@ namespace DB
bool LSCommand::parse(IParser::Pos & pos, std::shared_ptr<ASTKeeperQuery> & node, Expected & expected) const
{
String arg;
if (!parseKeeperPath(pos, expected, arg))
String path;
if (!parseKeeperPath(pos, expected, path))
return true;
node->args.push_back(std::move(arg));
node->args.push_back(std::move(path));
return true;
}
@ -42,11 +42,11 @@ void LSCommand::execute(const ASTKeeperQuery * query, KeeperClient * client) con
bool CDCommand::parse(IParser::Pos & pos, std::shared_ptr<ASTKeeperQuery> & node, Expected & expected) const
{
String arg;
if (!parseKeeperPath(pos, expected, arg))
String path;
if (!parseKeeperPath(pos, expected, path))
return true;
node->args.push_back(std::move(arg));
node->args.push_back(std::move(path));
return true;
}
@ -64,11 +64,12 @@ void CDCommand::execute(const ASTKeeperQuery * query, KeeperClient * client) con
bool SetCommand::parse(IParser::Pos & pos, std::shared_ptr<ASTKeeperQuery> & node, Expected & expected) const
{
String arg;
if (!parseKeeperPath(pos, expected, arg))
String path;
if (!parseKeeperPath(pos, expected, path))
return false;
node->args.push_back(std::move(arg));
node->args.push_back(std::move(path));
String arg;
if (!parseKeeperArg(pos, expected, arg))
return false;
node->args.push_back(std::move(arg));
@ -93,11 +94,12 @@ void SetCommand::execute(const ASTKeeperQuery * query, KeeperClient * client) co
bool CreateCommand::parse(IParser::Pos & pos, std::shared_ptr<ASTKeeperQuery> & node, Expected & expected) const
{
String arg;
if (!parseKeeperPath(pos, expected, arg))
String path;
if (!parseKeeperPath(pos, expected, path))
return false;
node->args.push_back(std::move(arg));
node->args.push_back(std::move(path));
String arg;
if (!parseKeeperArg(pos, expected, arg))
return false;
node->args.push_back(std::move(arg));
@ -143,10 +145,10 @@ void TouchCommand::execute(const ASTKeeperQuery * query, KeeperClient * client)
bool GetCommand::parse(IParser::Pos & pos, std::shared_ptr<ASTKeeperQuery> & node, Expected & expected) const
{
String arg;
if (!parseKeeperPath(pos, expected, arg))
String path;
if (!parseKeeperPath(pos, expected, path))
return false;
node->args.push_back(std::move(arg));
node->args.push_back(std::move(path));
return true;
}
@ -156,13 +158,28 @@ void GetCommand::execute(const ASTKeeperQuery * query, KeeperClient * client) co
std::cout << client->zookeeper->get(client->getAbsolutePath(query->args[0].safeGet<String>())) << "\n";
}
bool ExistsCommand::parse(IParser::Pos & pos, std::shared_ptr<ASTKeeperQuery> & node, DB::Expected & expected) const
{
String path;
if (!parseKeeperPath(pos, expected, path))
return false;
node->args.push_back(std::move(path));
return true;
}
void ExistsCommand::execute(const DB::ASTKeeperQuery * query, DB::KeeperClient * client) const
{
std::cout << client->zookeeper->exists(client->getAbsolutePath(query->args[0].safeGet<String>())) << "\n";
}
bool GetStatCommand::parse(IParser::Pos & pos, std::shared_ptr<ASTKeeperQuery> & node, Expected & expected) const
{
String arg;
if (!parseKeeperPath(pos, expected, arg))
String path;
if (!parseKeeperPath(pos, expected, path))
return true;
node->args.push_back(std::move(arg));
node->args.push_back(std::move(path));
return true;
}
@ -325,10 +342,10 @@ void FindBigFamily::execute(const ASTKeeperQuery * query, KeeperClient * client)
bool RMCommand::parse(IParser::Pos & pos, std::shared_ptr<ASTKeeperQuery> & node, Expected & expected) const
{
String arg;
if (!parseKeeperPath(pos, expected, arg))
String path;
if (!parseKeeperPath(pos, expected, path))
return false;
node->args.push_back(std::move(arg));
node->args.push_back(std::move(path));
return true;
}
@ -340,10 +357,10 @@ void RMCommand::execute(const ASTKeeperQuery * query, KeeperClient * client) con
bool RMRCommand::parse(IParser::Pos & pos, std::shared_ptr<ASTKeeperQuery> & node, Expected & expected) const
{
String arg;
if (!parseKeeperPath(pos, expected, arg))
String path;
if (!parseKeeperPath(pos, expected, path))
return false;
node->args.push_back(std::move(arg));
node->args.push_back(std::move(path));
return true;
}
@ -355,6 +372,70 @@ void RMRCommand::execute(const ASTKeeperQuery * query, KeeperClient * client) co
[client, path]{ client->zookeeper->removeRecursive(path); });
}
bool ReconfigCommand::parse(IParser::Pos & pos, std::shared_ptr<ASTKeeperQuery> & node, DB::Expected & expected) const
{
ReconfigCommand::Operation operation;
if (ParserKeyword{"ADD"}.ignore(pos, expected))
operation = ReconfigCommand::Operation::ADD;
else if (ParserKeyword{"REMOVE"}.ignore(pos, expected))
operation = ReconfigCommand::Operation::REMOVE;
else if (ParserKeyword{"SET"}.ignore(pos, expected))
operation = ReconfigCommand::Operation::SET;
else
return false;
node->args.push_back(operation);
ParserToken{TokenType::Whitespace}.ignore(pos);
String arg;
if (!parseKeeperArg(pos, expected, arg))
return false;
node->args.push_back(std::move(arg));
return true;
}
void ReconfigCommand::execute(const DB::ASTKeeperQuery * query, DB::KeeperClient * client) const
{
String joining;
String leaving;
String new_members;
auto operation = query->args[0].get<ReconfigCommand::Operation>();
switch (operation)
{
case static_cast<UInt8>(ReconfigCommand::Operation::ADD):
joining = query->args[1].safeGet<DB::String>();
break;
case static_cast<UInt8>(ReconfigCommand::Operation::REMOVE):
leaving = query->args[1].safeGet<DB::String>();
break;
case static_cast<UInt8>(ReconfigCommand::Operation::SET):
new_members = query->args[1].safeGet<DB::String>();
break;
default:
UNREACHABLE();
}
auto response = client->zookeeper->reconfig(joining, leaving, new_members);
std::cout << response.value << '\n';
}
bool SyncCommand::parse(IParser::Pos & pos, std::shared_ptr<ASTKeeperQuery> & node, DB::Expected & expected) const
{
String path;
if (!parseKeeperPath(pos, expected, path))
return false;
node->args.push_back(std::move(path));
return true;
}
void SyncCommand::execute(const DB::ASTKeeperQuery * query, DB::KeeperClient * client) const
{
std::cout << client->zookeeper->sync(client->getAbsolutePath(query->args[0].safeGet<String>())) << "\n";
}
bool HelpCommand::parse(IParser::Pos & /* pos */, std::shared_ptr<ASTKeeperQuery> & /* node */, Expected & /* expected */) const
{
return true;

View File

@ -101,6 +101,17 @@ class GetCommand : public IKeeperClientCommand
String getHelpMessage() const override { return "{} <path> -- Returns the node's value"; }
};
class ExistsCommand : public IKeeperClientCommand
{
String getName() const override { return "exists"; }
bool parse(IParser::Pos & pos, std::shared_ptr<ASTKeeperQuery> & node, Expected & expected) const override;
void execute(const ASTKeeperQuery * query, KeeperClient * client) const override;
String getHelpMessage() const override { return "{} <path> -- Returns `1` if node exists, `0` otherwise"; }
};
class GetStatCommand : public IKeeperClientCommand
{
String getName() const override { return "get_stat"; }
@ -177,6 +188,35 @@ class RMRCommand : public IKeeperClientCommand
String getHelpMessage() const override { return "{} <path> -- Recursively deletes path. Confirmation required"; }
};
class ReconfigCommand : public IKeeperClientCommand
{
enum class Operation : UInt8
{
ADD = 0,
REMOVE = 1,
SET = 2,
};
String getName() const override { return "reconfig"; }
bool parse(IParser::Pos & pos, std::shared_ptr<ASTKeeperQuery> & node, Expected & expected) const override;
void execute(const ASTKeeperQuery * query, KeeperClient * client) const override;
String getHelpMessage() const override { return "{} <add|remove|set> \"<arg>\" [version] -- Reconfigure Keeper cluster. See https://clickhouse.com/docs/en/guides/sre/keeper/clickhouse-keeper#reconfiguration"; }
};
class SyncCommand: public IKeeperClientCommand
{
String getName() const override { return "sync"; }
bool parse(IParser::Pos & pos, std::shared_ptr<ASTKeeperQuery> & node, Expected & expected) const override;
void execute(const ASTKeeperQuery * query, KeeperClient * client) const override;
String getHelpMessage() const override { return "{} <path> -- Synchronizes node between processes and leader"; }
};
class HelpCommand : public IKeeperClientCommand
{
String getName() const override { return "help"; }

View File

@ -84,8 +84,11 @@ std::vector<String> KeeperClient::getCompletions(const String & prefix) const
void KeeperClient::askConfirmation(const String & prompt, std::function<void()> && callback)
{
if (!ask_confirmation)
return callback();
std::cout << prompt << " Continue?\n";
need_confirmation = true;
waiting_confirmation = true;
confirmation_callback = callback;
}
@ -170,6 +173,14 @@ void KeeperClient::defineOptions(Poco::Util::OptionSet & options)
Poco::Util::Option("log-level", "", "set log level")
.argument("<level>")
.binding("log-level"));
options.addOption(
Poco::Util::Option("no-confirmation", "", "if set, will not require a confirmation on several commands. default false for interactive and true for query")
.binding("no-confirmation"));
options.addOption(
Poco::Util::Option("tests-mode", "", "run keeper-client in a special mode for tests. all commands output are separated by special symbols. default false")
.binding("tests-mode"));
}
void KeeperClient::initialize(Poco::Util::Application & /* self */)
@ -184,12 +195,15 @@ void KeeperClient::initialize(Poco::Util::Application & /* self */)
std::make_shared<CreateCommand>(),
std::make_shared<TouchCommand>(),
std::make_shared<GetCommand>(),
std::make_shared<ExistsCommand>(),
std::make_shared<GetStatCommand>(),
std::make_shared<FindSuperNodes>(),
std::make_shared<DeleteStaleBackups>(),
std::make_shared<FindBigFamily>(),
std::make_shared<RMCommand>(),
std::make_shared<RMRCommand>(),
std::make_shared<ReconfigCommand>(),
std::make_shared<SyncCommand>(),
std::make_shared<HelpCommand>(),
std::make_shared<FourLetterWordCommand>(),
});
@ -229,18 +243,6 @@ void KeeperClient::initialize(Poco::Util::Application & /* self */)
EventNotifier::init();
}
void KeeperClient::executeQuery(const String & query)
{
std::vector<String> queries;
boost::algorithm::split(queries, query, boost::is_any_of(";"));
for (const auto & query_text : queries)
{
if (!query_text.empty())
processQueryText(query_text);
}
}
bool KeeperClient::processQueryText(const String & text)
{
if (exit_strings.find(text) != exit_strings.end())
@ -248,29 +250,44 @@ bool KeeperClient::processQueryText(const String & text)
try
{
if (need_confirmation)
if (waiting_confirmation)
{
need_confirmation = false;
waiting_confirmation = false;
if (text.size() == 1 && (text == "y" || text == "Y"))
confirmation_callback();
return true;
}
KeeperParser parser;
String message;
const char * begin = text.data();
ASTPtr res = tryParseQuery(parser, begin, begin + text.size(), message, true, "", false, 0, 0, false);
const char * end = begin + text.size();
if (!res)
while (begin < end)
{
std::cerr << message << "\n";
return true;
String message;
ASTPtr res = tryParseQuery(
parser,
begin,
end,
/* out_error_message = */ message,
/* hilite = */ true,
/* description = */ "",
/* allow_multi_statements = */ true,
/* max_query_size = */ 0,
/* max_parser_depth = */ 0,
/* skip_insignificant = */ false);
if (!res)
{
std::cerr << message << "\n";
return true;
}
auto * query = res->as<ASTKeeperQuery>();
auto command = KeeperClient::commands.find(query->command);
command->second->execute(query, this);
}
auto * query = res->as<ASTKeeperQuery>();
auto command = KeeperClient::commands.find(query->command);
command->second->execute(query, this);
}
catch (Coordination::Exception & err)
{
@ -279,7 +296,7 @@ bool KeeperClient::processQueryText(const String & text)
return true;
}
void KeeperClient::runInteractive()
void KeeperClient::runInteractiveReplxx()
{
LineReader::Patterns query_extenders = {"\\"};
@ -299,7 +316,7 @@ void KeeperClient::runInteractive()
while (true)
{
String prompt;
if (need_confirmation)
if (waiting_confirmation)
prompt = "[y/n] ";
else
prompt = cwd.string() + " :) ";
@ -313,6 +330,26 @@ void KeeperClient::runInteractive()
}
}
void KeeperClient::runInteractiveInputStream()
{
for (String input; std::getline(std::cin, input);)
{
if (!processQueryText(input))
break;
std::cout << "\a\a\a\a" << std::endl;
std::cerr << std::flush;
}
}
void KeeperClient::runInteractive()
{
if (config().hasOption("tests-mode"))
runInteractiveInputStream();
else
runInteractiveReplxx();
}
int KeeperClient::main(const std::vector<String> & /* args */)
{
if (config().hasOption("help"))
@ -362,8 +399,13 @@ int KeeperClient::main(const std::vector<String> & /* args */)
zk_args.operation_timeout_ms = config().getInt("operation-timeout", 10) * 1000;
zookeeper = std::make_unique<zkutil::ZooKeeper>(zk_args);
if (config().has("no-confirmation") || config().has("query"))
ask_confirmation = false;
if (config().has("query"))
executeQuery(config().getString("query"));
{
processQueryText(config().getString("query"));
}
else
runInteractive();

View File

@ -49,8 +49,10 @@ public:
protected:
void runInteractive();
void runInteractiveReplxx();
void runInteractiveInputStream();
bool processQueryText(const String & text);
void executeQuery(const String & query);
void loadCommands(std::vector<Command> && new_commands);
@ -61,7 +63,8 @@ protected:
zkutil::ZooKeeperArgs zk_args;
bool need_confirmation = false;
bool ask_confirmation = true;
bool waiting_confirmation = false;
std::vector<String> registered_commands_and_four_letter_words;
};

View File

@ -7,43 +7,34 @@ namespace DB
bool parseKeeperArg(IParser::Pos & pos, Expected & expected, String & result)
{
expected.add(pos, getTokenName(TokenType::BareWord));
if (pos->type == TokenType::BareWord)
if (pos->type == TokenType::QuotedIdentifier || pos->type == TokenType::StringLiteral)
{
result = String(pos->begin, pos->end);
++pos;
if (!parseIdentifierOrStringLiteral(pos, expected, result))
return false;
ParserToken{TokenType::Whitespace}.ignore(pos);
return true;
}
bool status = parseIdentifierOrStringLiteral(pos, expected, result);
ParserToken{TokenType::Whitespace}.ignore(pos);
return status;
}
bool parseKeeperPath(IParser::Pos & pos, Expected & expected, String & path)
{
expected.add(pos, "path");
if (pos->type == TokenType::QuotedIdentifier || pos->type == TokenType::StringLiteral)
return parseIdentifierOrStringLiteral(pos, expected, path);
String result;
while (pos->type != TokenType::Whitespace && pos->type != TokenType::EndOfStream)
while (pos->type != TokenType::Whitespace && pos->type != TokenType::EndOfStream && pos->type != TokenType::Semicolon)
{
result.append(pos->begin, pos->end);
++pos;
}
ParserToken{TokenType::Whitespace}.ignore(pos);
if (result.empty())
return false;
path = result;
return true;
}
bool parseKeeperPath(IParser::Pos & pos, Expected & expected, String & path)
{
expected.add(pos, "path");
return parseKeeperArg(pos, expected, path);
}
bool KeeperParser::parseImpl(Pos & pos, ASTPtr & node, Expected & expected)
{
auto query = std::make_shared<ASTKeeperQuery>();

View File

@ -319,7 +319,7 @@ static bool checkIfStdinIsRegularFile()
std::string LocalServer::getInitialCreateTableQuery()
{
if (!config().has("table-structure") && !config().has("table-file") && !config().has("table-data-format") && (!checkIfStdinIsRegularFile() || !config().has("query")))
if (!config().has("table-structure") && !config().has("table-file") && !config().has("table-data-format") && (!checkIfStdinIsRegularFile() || queries.empty()))
return {};
auto table_name = backQuoteIfNeed(config().getString("table-name", "table"));
@ -461,7 +461,7 @@ try
if (first_time)
{
if (queries_files.empty() && !config().has("query"))
if (queries_files.empty() && queries.empty())
{
std::cerr << "\033[31m" << "ClickHouse compiled in fuzzing mode." << "\033[0m" << std::endl;
std::cerr << "\033[31m" << "You have to provide a query with --query or --queries-file option." << "\033[0m" << std::endl;
@ -473,7 +473,7 @@ try
#else
is_interactive = stdin_is_a_tty
&& (config().hasOption("interactive")
|| (!config().has("query") && !config().has("table-structure") && queries_files.empty() && !config().has("table-file")));
|| (queries.empty() && !config().has("table-structure") && queries_files.empty() && !config().has("table-file")));
#endif
if (!is_interactive)
{
@ -569,10 +569,10 @@ void LocalServer::updateLoggerLevel(const String & logs_level)
void LocalServer::processConfig()
{
if (config().has("query") && config().has("queries-file"))
if (!queries.empty() && config().has("queries-file"))
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Options '--query' and '--queries-file' cannot be specified at the same time");
delayed_interactive = config().has("interactive") && (config().has("query") || config().has("queries-file"));
delayed_interactive = config().has("interactive") && (!queries.empty() || config().has("queries-file"));
if (is_interactive && !delayed_interactive)
{
if (config().has("multiquery"))
@ -881,6 +881,8 @@ void LocalServer::processOptions(const OptionsDescription &, const CommandLineOp
config().setBool("no-system-tables", true);
if (options.count("only-system-tables"))
config().setBool("only-system-tables", true);
if (options.count("database"))
config().setString("default_database", options["database"].as<std::string>());
if (options.count("input-format"))
config().setString("table-data-format", options["input-format"].as<std::string>());

View File

@ -19,6 +19,7 @@ void registerAggregateFunctionsStatisticsSecondMoment(AggregateFunctionFactory &
factory.registerAlias("VAR_POP", "varPop", AggregateFunctionFactory::CaseInsensitive);
factory.registerAlias("STDDEV_SAMP", "stddevSamp", AggregateFunctionFactory::CaseInsensitive);
factory.registerAlias("STDDEV_POP", "stddevPop", AggregateFunctionFactory::CaseInsensitive);
factory.registerAlias("STD", "stddevPop", AggregateFunctionFactory::CaseInsensitive);
}
}

View File

@ -459,6 +459,10 @@ struct AnalysisOfVarianceMoments
void add(T value, size_t group)
{
if (group == std::numeric_limits<size_t>::max())
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Too many groups for analysis of variance (should be no more than {}, got {})",
MAX_GROUPS_NUMBER, group);
resizeIfNeeded(group + 1);
xs1[group] += value;
xs2[group] += value * value;

View File

@ -6341,9 +6341,9 @@ void QueryAnalyzer::resolveTableFunction(QueryTreeNodePtr & table_function_node,
{
/// For input function we should check if input format supports reading subset of columns.
if (table_function_ptr->getName() == "input")
use_columns_from_insert_query = FormatFactory::instance().checkIfFormatSupportsSubsetOfColumns(scope.context->getInsertFormat());
use_columns_from_insert_query = FormatFactory::instance().checkIfFormatSupportsSubsetOfColumns(scope.context->getInsertFormat(), scope.context);
else
use_columns_from_insert_query = table_function_ptr->supportsReadingSubsetOfColumns();
use_columns_from_insert_query = table_function_ptr->supportsReadingSubsetOfColumns(scope.context);
}
if (use_columns_from_insert_query)

View File

@ -1,6 +1,9 @@
#include <Backups/BackupUtils.h>
#include <Backups/DDLAdjustingForBackupVisitor.h>
#include <Access/Common/AccessRightsElement.h>
#include <Databases/DDLRenamingVisitor.h>
#include <Parsers/ASTCreateQuery.h>
#include <Parsers/formatAST.h>
#include <Interpreters/DatabaseCatalog.h>
#include <Common/setThreadName.h>
@ -95,4 +98,26 @@ AccessRightsElements getRequiredAccessToBackup(const ASTBackupQuery::Elements &
return required_access;
}
bool compareRestoredTableDef(const IAST & restored_table_create_query, const IAST & create_query_from_backup, const ContextPtr & global_context)
{
auto adjust_before_comparison = [&](const IAST & query) -> ASTPtr
{
auto new_query = query.clone();
adjustCreateQueryForBackup(new_query, global_context, nullptr);
ASTCreateQuery & create = typeid_cast<ASTCreateQuery &>(*new_query);
create.setUUID({});
create.if_not_exists = false;
return new_query;
};
ASTPtr query1 = adjust_before_comparison(restored_table_create_query);
ASTPtr query2 = adjust_before_comparison(create_query_from_backup);
return serializeAST(*query1) == serializeAST(*query2);
}
bool compareRestoredDatabaseDef(const IAST & restored_database_create_query, const IAST & create_query_from_backup, const ContextPtr & global_context)
{
return compareRestoredTableDef(restored_database_create_query, create_query_from_backup, global_context);
}
}

View File

@ -1,6 +1,7 @@
#pragma once
#include <Parsers/ASTBackupQuery.h>
#include <Interpreters/Context_fwd.h>
namespace DB
@ -12,8 +13,11 @@ class DDLRenamingMap;
/// Initializes a DDLRenamingMap from a BACKUP or RESTORE query.
DDLRenamingMap makeRenamingMapFromBackupQuery(const ASTBackupQuery::Elements & elements);
/// Returns access required to execute BACKUP query.
AccessRightsElements getRequiredAccessToBackup(const ASTBackupQuery::Elements & elements);
/// Checks the definition of a restored table - it must correspond to the definition from the backup.
bool compareRestoredTableDef(const IAST & restored_table_create_query, const IAST & create_query_from_backup, const ContextPtr & global_context);
bool compareRestoredDatabaseDef(const IAST & restored_database_create_query, const IAST & create_query_from_backup, const ContextPtr & global_context);
}

View File

@ -81,9 +81,6 @@ namespace
void visitCreateQuery(ASTCreateQuery & create, const DDLAdjustingForBackupVisitor::Data & data)
{
create.uuid = UUIDHelpers::Nil;
create.to_inner_uuid = UUIDHelpers::Nil;
if (create.storage)
visitStorage(*create.storage, data);
}

View File

@ -7,6 +7,7 @@ namespace DB
{
class Exception;
enum class UserDefinedSQLObjectType;
class ASTCreateQuery;
/// Replicas use this class to coordinate what they're reading from a backup while executing RESTORE ON CLUSTER.
/// There are two implementation of this interface: RestoreCoordinationLocal and RestoreCoordinationRemote.
@ -40,10 +41,13 @@ public:
/// The function returns false if user-defined function at a specified zk path are being already restored by another replica.
virtual bool acquireReplicatedSQLObjects(const String & loader_zk_path, UserDefinedSQLObjectType object_type) = 0;
/// Generates a new UUID for a table. The same UUID must be used for a replicated table on each replica,
/// (because otherwise the macro "{uuid}" in the ZooKeeper path will not work correctly).
virtual void generateUUIDForTable(ASTCreateQuery & create_query) = 0;
/// This function is used to check if concurrent restores are running
/// other than the restore passed to the function
virtual bool hasConcurrentRestores(const std::atomic<size_t> & num_active_restores) const = 0;
};
}

View File

@ -1,4 +1,5 @@
#include <Backups/RestoreCoordinationLocal.h>
#include <Parsers/formatAST.h>
#include <Common/logger_useful.h>
@ -51,6 +52,39 @@ bool RestoreCoordinationLocal::acquireReplicatedSQLObjects(const String &, UserD
return true;
}
void RestoreCoordinationLocal::generateUUIDForTable(ASTCreateQuery & create_query)
{
String query_str = serializeAST(create_query);
auto find_in_map = [&]
{
auto it = create_query_uuids.find(query_str);
if (it != create_query_uuids.end())
{
create_query.setUUID(it->second);
return true;
}
return false;
};
{
std::lock_guard lock{mutex};
if (find_in_map())
return;
}
auto new_uuids = create_query.generateRandomUUID(/* always_generate_new_uuid= */ true);
String new_query_str = serializeAST(create_query);
{
std::lock_guard lock{mutex};
if (find_in_map())
return;
create_query_uuids[query_str] = new_uuids;
}
}
bool RestoreCoordinationLocal::hasConcurrentRestores(const std::atomic<size_t> & num_active_restores) const
{
if (num_active_restores > 1)

View File

@ -1,6 +1,7 @@
#pragma once
#include <Backups/IRestoreCoordination.h>
#include <Parsers/ASTCreateQuery.h>
#include <mutex>
#include <set>
#include <unordered_set>
@ -39,6 +40,10 @@ public:
/// The function returns false if user-defined function at a specified zk path are being already restored by another replica.
bool acquireReplicatedSQLObjects(const String & loader_zk_path, UserDefinedSQLObjectType object_type) override;
/// Generates a new UUID for a table. The same UUID must be used for a replicated table on each replica,
/// (because otherwise the macro "{uuid}" in the ZooKeeper path will not work correctly).
void generateUUIDForTable(ASTCreateQuery & create_query) override;
bool hasConcurrentRestores(const std::atomic<size_t> & num_active_restores) const override;
private:
@ -46,6 +51,8 @@ private:
std::set<std::pair<String /* database_zk_path */, String /* table_name */>> acquired_tables_in_replicated_databases;
std::unordered_set<String /* table_zk_path */> acquired_data_in_replicated_tables;
std::unordered_map<String, ASTCreateQuery::UUIDs> create_query_uuids;
mutable std::mutex mutex;
};

View File

@ -2,6 +2,8 @@
#include <Backups/BackupCoordinationStage.h>
#include <Backups/RestoreCoordinationRemote.h>
#include <Backups/BackupCoordinationStageSync.h>
#include <Parsers/ASTCreateQuery.h>
#include <Parsers/formatAST.h>
#include <Functions/UserDefined/UserDefinedSQLObjectType.h>
#include <Common/ZooKeeper/KeeperException.h>
#include <Common/escapeForFileName.h>
@ -87,6 +89,7 @@ void RestoreCoordinationRemote::createRootNodes()
ops.emplace_back(zkutil::makeCreateRequest(zookeeper_path + "/repl_tables_data_acquired", "", zkutil::CreateMode::Persistent));
ops.emplace_back(zkutil::makeCreateRequest(zookeeper_path + "/repl_access_storages_acquired", "", zkutil::CreateMode::Persistent));
ops.emplace_back(zkutil::makeCreateRequest(zookeeper_path + "/repl_sql_objects_acquired", "", zkutil::CreateMode::Persistent));
ops.emplace_back(zkutil::makeCreateRequest(zookeeper_path + "/table_uuids", "", zkutil::CreateMode::Persistent));
zk->tryMulti(ops, responses);
});
}
@ -231,6 +234,33 @@ bool RestoreCoordinationRemote::acquireReplicatedSQLObjects(const String & loade
return result;
}
void RestoreCoordinationRemote::generateUUIDForTable(ASTCreateQuery & create_query)
{
String query_str = serializeAST(create_query);
String new_uuids_str = create_query.generateRandomUUID(/* always_generate_new_uuid= */ true).toString();
auto holder = with_retries.createRetriesControlHolder("generateUUIDForTable");
holder.retries_ctl.retryLoop(
[&, &zk = holder.faulty_zookeeper]()
{
with_retries.renewZooKeeper(zk);
String path = zookeeper_path + "/table_uuids/" + escapeForFileName(query_str);
Coordination::Error res = zk->tryCreate(path, new_uuids_str, zkutil::CreateMode::Persistent);
if (res == Coordination::Error::ZOK)
return;
if (res == Coordination::Error::ZNODEEXISTS)
{
create_query.setUUID(ASTCreateQuery::UUIDs::fromString(zk->get(path)));
return;
}
zkutil::KeeperException::fromPath(res, path);
});
}
void RestoreCoordinationRemote::removeAllNodes()
{
/// Usually this function is called by the initiator when a restore operation is complete so we don't need the coordination anymore.

View File

@ -46,6 +46,10 @@ public:
/// The function returns false if user-defined function at a specified zk path are being already restored by another replica.
bool acquireReplicatedSQLObjects(const String & loader_zk_path, UserDefinedSQLObjectType object_type) override;
/// Generates a new UUID for a table. The same UUID must be used for a replicated table on each replica,
/// (because otherwise the macro "{uuid}" in the ZooKeeper path will not work correctly).
void generateUUIDForTable(ASTCreateQuery & create_query) override;
bool hasConcurrentRestores(const std::atomic<size_t> & num_active_restores) const override;
private:

View File

@ -571,12 +571,14 @@ void RestorerFromBackup::createDatabase(const String & database_name) const
if (database_info.is_predefined_database)
return;
auto create_database_query = database_info.create_database_query;
if (restore_settings.create_table == RestoreTableCreationMode::kCreateIfNotExists)
{
create_database_query = create_database_query->clone();
create_database_query->as<ASTCreateQuery &>().if_not_exists = true;
}
auto create_database_query = typeid_cast<std::shared_ptr<ASTCreateQuery>>(database_info.create_database_query->clone());
/// Generate a new UUID for a database.
/// The generated UUID will be ignored if the database does not support UUIDs.
restore_coordination->generateUUIDForTable(*create_database_query);
/// Add the clause `IF NOT EXISTS` if that is specified in the restore settings.
create_database_query->if_not_exists = (restore_settings.create_table == RestoreTableCreationMode::kCreateIfNotExists);
LOG_TRACE(log, "Creating database {}: {}", backQuoteIfNeed(database_name), serializeAST(*create_database_query));
@ -605,17 +607,17 @@ void RestorerFromBackup::checkDatabase(const String & database_name)
if (!restore_settings.allow_different_database_def && !database_info.is_predefined_database)
{
/// Check that the database's definition is the same as expected.
ASTPtr create_database_query = database->getCreateDatabaseQuery();
adjustCreateQueryForBackup(create_database_query, context->getGlobalContext(), nullptr);
ASTPtr expected_create_query = database_info.create_database_query;
if (serializeAST(*create_database_query) != serializeAST(*expected_create_query))
ASTPtr existing_database_def = database->getCreateDatabaseQuery();
ASTPtr database_def_from_backup = database_info.create_database_query;
if (!compareRestoredDatabaseDef(*existing_database_def, *database_def_from_backup, context->getGlobalContext()))
{
throw Exception(
ErrorCodes::CANNOT_RESTORE_DATABASE,
"The database has a different definition: {} "
"comparing to its definition in the backup: {}",
serializeAST(*create_database_query),
serializeAST(*expected_create_query));
serializeAST(*existing_database_def),
serializeAST(*database_def_from_backup));
}
}
}
@ -714,20 +716,23 @@ void RestorerFromBackup::createTable(const QualifiedTableName & table_name)
if (table_info.is_predefined_table)
return;
auto create_table_query = table_info.create_table_query;
if (restore_settings.create_table == RestoreTableCreationMode::kCreateIfNotExists)
{
create_table_query = create_table_query->clone();
create_table_query->as<ASTCreateQuery &>().if_not_exists = true;
}
auto create_table_query = typeid_cast<std::shared_ptr<ASTCreateQuery>>(table_info.create_table_query->clone());
/// Generate a new UUID for a table (the same table on different hosts must use the same UUID, `restore_coordination` will make it so).
/// The generated UUID will be ignored if the database does not support UUIDs.
restore_coordination->generateUUIDForTable(*create_table_query);
/// Add the clause `IF NOT EXISTS` if that is specified in the restore settings.
create_table_query->if_not_exists = (restore_settings.create_table == RestoreTableCreationMode::kCreateIfNotExists);
LOG_TRACE(
log, "Creating {}: {}", tableNameWithTypeToString(table_name.database, table_name.table, false), serializeAST(*create_table_query));
try
{
DatabasePtr database = DatabaseCatalog::instance().getDatabase(table_name.database);
table_info.database = database;
if (!table_info.database)
table_info.database = DatabaseCatalog::instance().getDatabase(table_name.database);
DatabasePtr database = table_info.database;
/// Execute CREATE TABLE query (we call IDatabase::createTableRestoredFromBackup() to allow the database to do some
/// database-specific things).
@ -747,37 +752,33 @@ void RestorerFromBackup::createTable(const QualifiedTableName & table_name)
void RestorerFromBackup::checkTable(const QualifiedTableName & table_name)
{
auto & table_info = table_infos.at(table_name);
auto database = table_info.database;
try
{
if (!database)
{
database = DatabaseCatalog::instance().getDatabase(table_name.database);
table_info.database = database;
}
auto resolved_id = (table_name.database == DatabaseCatalog::TEMPORARY_DATABASE)
? context->resolveStorageID(StorageID{"", table_name.table}, Context::ResolveExternal)
: context->resolveStorageID(StorageID{table_name.database, table_name.table}, Context::ResolveGlobal);
if (!table_info.database)
table_info.database = DatabaseCatalog::instance().getDatabase(table_name.database);
DatabasePtr database = table_info.database;
StoragePtr storage = database->getTable(resolved_id.table_name, context);
table_info.storage = storage;
table_info.table_lock = storage->lockForShare(context->getInitialQueryId(), context->getSettingsRef().lock_acquire_timeout);
if (!restore_settings.allow_different_table_def && !table_info.is_predefined_table)
{
ASTPtr create_table_query = database->getCreateTableQuery(resolved_id.table_name, context);
adjustCreateQueryForBackup(create_table_query, context->getGlobalContext(), nullptr);
ASTPtr expected_create_query = table_info.create_table_query;
if (serializeAST(*create_table_query) != serializeAST(*expected_create_query))
ASTPtr existing_table_def = database->getCreateTableQuery(resolved_id.table_name, context);
ASTPtr table_def_from_backup = table_info.create_table_query;
if (!compareRestoredTableDef(*existing_table_def, *table_def_from_backup, context->getGlobalContext()))
{
throw Exception(
ErrorCodes::CANNOT_RESTORE_TABLE,
"The table has a different definition: {} "
"comparing to its definition in the backup: {}",
serializeAST(*create_table_query),
serializeAST(*expected_create_query));
serializeAST(*existing_table_def),
serializeAST(*table_def_from_backup));
}
}
}

View File

@ -441,7 +441,20 @@ void ClientBase::onData(Block & block, ASTPtr parsed_query)
if (!block)
return;
processed_rows += block.rows();
if (block.rows() == 0 && partial_result_mode == PartialResultMode::Active)
{
partial_result_mode = PartialResultMode::Inactive;
if (is_interactive)
{
progress_indication.clearProgressOutput(*tty_buf);
std::cout << "Full result:" << std::endl;
progress_indication.writeProgress(*tty_buf);
}
}
if (partial_result_mode == PartialResultMode::Inactive)
processed_rows += block.rows();
/// Even if all blocks are empty, we still need to initialize the output stream to write empty resultset.
initOutputFormat(block, parsed_query);
@ -451,13 +464,20 @@ void ClientBase::onData(Block & block, ASTPtr parsed_query)
if (block.rows() == 0 || (query_fuzzer_runs != 0 && processed_rows >= 100))
return;
if (!is_interactive && partial_result_mode == PartialResultMode::Active)
return;
/// If results are written INTO OUTFILE, we can avoid clearing progress to avoid flicker.
if (need_render_progress && tty_buf && (!select_into_file || select_into_file_and_stdout))
progress_indication.clearProgressOutput(*tty_buf);
try
{
output_format->write(materializeBlock(block));
if (partial_result_mode == PartialResultMode::Active)
output_format->writePartialResult(materializeBlock(block));
else
output_format->write(materializeBlock(block));
written_first_block = true;
}
catch (const Exception &)
@ -521,6 +541,9 @@ void ClientBase::onProfileInfo(const ProfileInfo & profile_info)
void ClientBase::initOutputFormat(const Block & block, ASTPtr parsed_query)
try
{
if (partial_result_mode == PartialResultMode::NotInit)
partial_result_mode = PartialResultMode::Active;
if (!output_format)
{
/// Ignore all results when fuzzing as they can be huge.
@ -931,6 +954,14 @@ void ClientBase::processOrdinaryQuery(const String & query_to_execute, ASTPtr pa
const auto & settings = global_context->getSettingsRef();
const Int32 signals_before_stop = settings.partial_result_on_first_cancel ? 2 : 1;
bool has_partial_result_setting = settings.partial_result_update_duration_ms.totalMilliseconds() > 0;
if (has_partial_result_setting)
{
partial_result_mode = PartialResultMode::NotInit;
if (is_interactive)
std::cout << "Partial result:" << std::endl;
}
int retries_left = 10;
while (retries_left)
@ -1736,6 +1767,7 @@ void ClientBase::processParsedSingleQuery(const String & full_query, const Strin
}
processed_rows = 0;
partial_result_mode = PartialResultMode::Inactive;
written_first_block = false;
progress_indication.resetProgress();
profile_events.watch.restart();
@ -2495,23 +2527,34 @@ void ClientBase::runNonInteractive()
return;
}
String text;
if (config().has("query"))
if (!queries.empty())
{
text += config().getRawString("query"); /// Poco configuration should not process substitutions in form of ${...} inside query.
for (const auto & query : queries)
{
if (query_fuzzer_runs)
{
if (!processWithFuzzing(query))
return;
}
else
{
if (!processQueryText(query))
return;
}
}
}
else
{
/// If 'query' parameter is not set, read a query from stdin.
/// The query is read entirely into memory (streaming is disabled).
ReadBufferFromFileDescriptor in(STDIN_FILENO);
String text;
readStringUntilEOF(text, in);
if (query_fuzzer_runs)
processWithFuzzing(text);
else
processQueryText(text);
}
if (query_fuzzer_runs)
processWithFuzzing(text);
else
processQueryText(text);
}
@ -2680,8 +2723,8 @@ void ClientBase::init(int argc, char ** argv)
stderr_is_a_tty = isatty(STDERR_FILENO);
terminal_width = getTerminalWidth();
Arguments common_arguments{""}; /// 0th argument is ignored.
std::vector<Arguments> external_tables_arguments;
Arguments common_arguments = {""}; /// 0th argument is ignored.
std::vector<Arguments> hosts_and_ports_arguments;
readArguments(argc, argv, common_arguments, external_tables_arguments, hosts_and_ports_arguments);
@ -2699,7 +2742,6 @@ void ClientBase::init(int argc, char ** argv)
}
po::variables_map options;
OptionsDescription options_description;
options_description.main_description.emplace(createOptionsDescription("Main options", terminal_width));
@ -2711,9 +2753,8 @@ void ClientBase::init(int argc, char ** argv)
("config-file,C", po::value<std::string>(), "config-file path")
("query,q", po::value<std::string>(), "query")
("queries-file", po::value<std::vector<std::string>>()->multitoken(),
"file path with queries to execute; multiple files can be specified (--queries-file file1 file2...)")
("query,q", po::value<std::vector<std::string>>()->multitoken(), R"(query; can be specified multiple times (--query "SELECT 1" --query "SELECT 2"...))")
("queries-file", po::value<std::vector<std::string>>()->multitoken(), "file path with queries to execute; multiple files can be specified (--queries-file file1 file2...)")
("multiquery,n", "If specified, multiple queries separated by semicolons can be listed after --query. For convenience, it is also possible to omit --query and pass the queries directly after --multiquery.")
("multiline,m", "If specified, allow multiline queries (do not send the query on Enter)")
("database,d", po::value<std::string>(), "database")
@ -2734,8 +2775,7 @@ void ClientBase::init(int argc, char ** argv)
("log-level", po::value<std::string>(), "log level")
("server_logs_file", po::value<std::string>(), "put server logs into specified file")
("suggestion_limit", po::value<int>()->default_value(10000),
"Suggestion limit for how many databases, tables and columns to fetch.")
("suggestion_limit", po::value<int>()->default_value(10000), "Suggestion limit for how many databases, tables and columns to fetch.")
("format,f", po::value<std::string>(), "default output format")
("vertical,E", "vertical output format, same as --format=Vertical or FORMAT Vertical or \\G at end of command")
@ -2773,6 +2813,7 @@ void ClientBase::init(int argc, char ** argv)
std::transform(external_options.begin(), external_options.end(), std::back_inserter(cmd_options), getter);
}
po::variables_map options;
parseAndCheckOptions(options_description, options, common_arguments);
po::notify(options);
@ -2800,7 +2841,7 @@ void ClientBase::init(int argc, char ** argv)
if (options.count("time"))
print_time_to_stderr = true;
if (options.count("query"))
config().setString("query", options["query"].as<std::string>());
queries = options["query"].as<std::vector<std::string>>();
if (options.count("query_id"))
config().setString("query_id", options["query_id"].as<std::string>());
if (options.count("database"))

View File

@ -63,7 +63,7 @@ void interruptSignalHandler(int signum);
class InternalTextLogs;
class WriteBufferFromFileDescriptor;
class ClientBase : public Poco::Util::Application, public IHints<2, ClientBase>
class ClientBase : public Poco::Util::Application, public IHints<2>
{
public:
@ -202,6 +202,7 @@ protected:
std::optional<Suggest> suggest;
bool load_suggestions = false;
std::vector<String> queries; /// Queries passed via '--query'
std::vector<String> queries_files; /// If not empty, queries will be read from these files
std::vector<String> interleave_queries_files; /// If not empty, run queries from these files before processing every file from 'queries_files'.
std::vector<String> cmd_options;
@ -271,6 +272,21 @@ protected:
size_t processed_rows = 0; /// How many rows have been read or written.
bool print_num_processed_rows = false; /// Whether to print the number of processed rows at
enum class PartialResultMode: UInt8
{
/// Query doesn't show partial result before the first block with 0 rows.
/// The first block with 0 rows initializes the output table format using its header.
NotInit,
/// Query shows partial result after the first and before the second block with 0 rows.
/// The second block with 0 rows indicates that that receiving blocks with partial result has been completed and next blocks will be with the full result.
Active,
/// Query doesn't show partial result at all.
Inactive,
};
PartialResultMode partial_result_mode = PartialResultMode::Inactive;
bool print_stack_trace = false;
/// The last exception that was received from the server. Is used for the
/// return code in batch mode.

View File

@ -73,7 +73,7 @@ ColumnAggregateFunction::ColumnAggregateFunction(const AggregateFunctionPtr & fu
}
void ColumnAggregateFunction::set(const AggregateFunctionPtr & func_, size_t version_)
void ColumnAggregateFunction::set(const AggregateFunctionPtr & func_, std::optional<size_t> version_)
{
func = func_;
version = version_;

View File

@ -103,7 +103,7 @@ private:
public:
~ColumnAggregateFunction() override;
void set(const AggregateFunctionPtr & func_, size_t version_);
void set(const AggregateFunctionPtr & func_, std::optional<size_t> version_ = std::nullopt);
AggregateFunctionPtr getAggregateFunction() { return func; }
AggregateFunctionPtr getAggregateFunction() const { return func; }

View File

@ -57,7 +57,7 @@ ConcurrencyControl::Allocation::Allocation(ConcurrencyControl & parent_, SlotCou
*waiter = this;
}
// Grant single slot to allocation, returns true iff more slot(s) are required
// Grant single slot to allocation returns true iff more slot(s) are required
bool ConcurrencyControl::Allocation::grant()
{
std::unique_lock lock{mutex};

View File

@ -97,6 +97,14 @@ ThreadGroupPtr CurrentThread::getGroup()
return current_thread->getThreadGroup();
}
ContextPtr CurrentThread::getQueryContext()
{
if (unlikely(!current_thread))
return {};
return current_thread->getQueryContext();
}
std::string_view CurrentThread::getQueryId()
{
if (unlikely(!current_thread))

View File

@ -86,6 +86,10 @@ public:
static void finalizePerformanceCounters();
/// Returns a non-empty string if the thread is attached to a query
/// Returns attached query context
static ContextPtr getQueryContext();
static std::string_view getQueryId();
/// Initializes query with current thread as master thread in constructor, and detaches it in destructor

View File

@ -393,6 +393,18 @@ struct UInt128HashCRC32
}
};
#elif defined(__s390x__) && __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
struct UInt128HashCRC32
{
size_t operator()(UInt128 x) const
{
UInt64 crc = -1ULL;
crc = s390x_crc32(crc, x.items[UInt128::_impl::little(0)]);
crc = s390x_crc32(crc, x.items[UInt128::_impl::little(1)]);
return crc;
}
};
#else
/// On other platforms we do not use CRC32. NOTE This can be confusing.
@ -451,6 +463,19 @@ struct UInt256HashCRC32
}
};
#elif defined(__s390x__) && __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
struct UInt256HashCRC32
{
size_t operator()(UInt256 x) const
{
UInt64 crc = -1ULL;
crc = s390x_crc32(crc, x.items[UInt256::_impl::little(0)]);
crc = s390x_crc32(crc, x.items[UInt256::_impl::little(1)]);
crc = s390x_crc32(crc, x.items[UInt256::_impl::little(2)]);
crc = s390x_crc32(crc, x.items[UInt256::_impl::little(3)]);
return crc;
}
};
#else
/// We do not need to use CRC32 on other platforms. NOTE This can be confusing.

View File

@ -71,6 +71,28 @@ struct StringHashTableHash
res = _mm_crc32_u64(res, key.c);
return res;
}
#elif defined(__s390x__) && __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
size_t ALWAYS_INLINE operator()(StringKey8 key) const
{
size_t res = -1ULL;
res = s390x_crc32(res, key);
return res;
}
size_t ALWAYS_INLINE operator()(StringKey16 key) const
{
size_t res = -1ULL;
res = s390x_crc32(res, key.items[UInt128::_impl::little(0)]);
res = s390x_crc32(res, key.items[UInt128::_impl::little(1)]);
return res;
}
size_t ALWAYS_INLINE operator()(StringKey24 key) const
{
size_t res = -1ULL;
res = s390x_crc32(res, key.a);
res = s390x_crc32(res, key.b);
res = s390x_crc32(res, key.c);
return res;
}
#else
size_t ALWAYS_INLINE operator()(StringKey8 key) const
{

View File

@ -20,7 +20,7 @@ namespace ErrorCodes
* template parameter is available as Value
*/
template <typename ValueType>
class IFactoryWithAliases : public IHints<2, IFactoryWithAliases<ValueType>>
class IFactoryWithAliases : public IHints<2>
{
protected:
using Value = ValueType;

View File

@ -95,7 +95,7 @@ String getHintsErrorMessageSuffix(const std::vector<String> & hints);
void appendHintsMessage(String & error_message, const std::vector<String> & hints);
template <size_t MaxNumHints, typename Self>
template <size_t MaxNumHints = 1>
class IHints
{
public:

View File

@ -107,15 +107,25 @@ public:
static ThreadGroupPtr createForBackgroundProcess(ContextPtr storage_context);
std::vector<UInt64> getInvolvedThreadIds() const;
void linkThread(UInt64 thread_it);
size_t getPeakThreadsUsage() const;
void linkThread(UInt64 thread_id);
void unlinkThread();
private:
mutable std::mutex mutex;
/// Set up at creation, no race when reading
SharedData shared_data;
SharedData shared_data TSA_GUARDED_BY(mutex);
/// Set of all thread ids which has been attached to the group
std::unordered_set<UInt64> thread_ids;
std::unordered_set<UInt64> thread_ids TSA_GUARDED_BY(mutex);
/// Count of simultaneously working threads
size_t active_thread_count TSA_GUARDED_BY(mutex) = 0;
/// Peak threads count in the group
size_t peak_threads_usage TSA_GUARDED_BY(mutex) = 0;
};
/**

View File

@ -877,6 +877,24 @@ void ZooKeeper::handleEphemeralNodeExistence(const std::string & path, const std
}
}
Coordination::ReconfigResponse ZooKeeper::reconfig(
const std::string & joining,
const std::string & leaving,
const std::string & new_members,
int32_t version)
{
auto future_result = asyncReconfig(joining, leaving, new_members, version);
if (future_result.wait_for(std::chrono::milliseconds(args.operation_timeout_ms)) != std::future_status::ready)
{
impl->finalize(fmt::format("Operation timeout on {}", Coordination::OpNum::Reconfig));
throw KeeperException(Coordination::Error::ZOPERATIONTIMEOUT);
}
return future_result.get();
}
ZooKeeperPtr ZooKeeper::startNewSession() const
{
return std::make_shared<ZooKeeper>(args, zk_log);
@ -1226,6 +1244,27 @@ std::future<Coordination::SyncResponse> ZooKeeper::asyncSync(const std::string &
return future;
}
std::future<Coordination::ReconfigResponse> ZooKeeper::asyncReconfig(
const std::string & joining,
const std::string & leaving,
const std::string & new_members,
int32_t version)
{
auto promise = std::make_shared<std::promise<Coordination::ReconfigResponse>>();
auto future = promise->get_future();
auto callback = [promise](const Coordination::ReconfigResponse & response) mutable
{
if (response.error != Coordination::Error::ZOK)
promise->set_exception(std::make_exception_ptr(KeeperException(response.error)));
else
promise->set_value(response);
};
impl->reconfig(joining, leaving, new_members, version, std::move(callback));
return future;
}
void ZooKeeper::finalize(const String & reason)
{
impl->finalize(reason);

View File

@ -449,6 +449,12 @@ public:
/// disappear automatically after 3x session_timeout.
void handleEphemeralNodeExistence(const std::string & path, const std::string & fast_delete_if_equal_value);
Coordination::ReconfigResponse reconfig(
const std::string & joining,
const std::string & leaving,
const std::string & new_members,
int32_t version = -1);
/// Async interface (a small subset of operations is implemented).
///
/// Usage:
@ -529,6 +535,13 @@ public:
const std::string & path,
Coordination::ListRequestType list_request_type = Coordination::ListRequestType::ALL);
using FutureReconfig = std::future<Coordination::ReconfigResponse>;
FutureReconfig asyncReconfig(
const std::string & joining,
const std::string & leaving,
const std::string & new_members,
int32_t version = -1);
void finalize(const String & reason);
void setZooKeeperLog(std::shared_ptr<DB::ZooKeeperLog> zk_log_);

View File

@ -188,12 +188,17 @@ namespace Hashes
#include <nmmintrin.h>
#endif
#if defined(__s390x__) && __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
#include <crc32-s390x.h>
#endif
struct CRC32Hash
{
size_t operator()(Key x) const
{
#ifdef __SSE4_2__
return _mm_crc32_u64(-1ULL, x);
#elif defined(__s390x__) && __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
return s390x_crc32(-1ULL, x);
#else
/// On other platforms we do not have CRC32. NOTE This can be confusing.
return intHash64(x);

View File

@ -5,7 +5,6 @@
#include <Parsers/IAST.h>
#include <Parsers/ASTLiteral.h>
#include <Parsers/ASTFunction.h>
#include <IO/WriteHelpers.h>
namespace DB

View File

@ -3,14 +3,8 @@
#include <Compression/CompressionFactory.h>
#include <base/unaligned.h>
#include <Parsers/IAST.h>
#include <Parsers/ASTLiteral.h>
#include <Parsers/ASTFunction.h>
#include <IO/WriteHelpers.h>
#include "Common/Exception.h"
#include "DataTypes/IDataType.h"
#include "base/Decimal_fwd.h"
#include "base/types.h"
#include "config.h"
#include <boost/integer/common_factor.hpp>
#include <libdivide-config.h>
@ -84,7 +78,7 @@ void compressDataForType(const char * source, UInt32 source_size, char * dest)
const char * const source_end = source + source_size;
T gcd_divider{};
T gcd_divider = 0;
const auto * cur_source = source;
while (gcd_divider != T(1) && cur_source < source_end)
{
@ -100,7 +94,7 @@ void compressDataForType(const char * source, UInt32 source_size, char * dest)
if constexpr (sizeof(T) <= 8)
{
/// libdivide support only UInt32 and UInt64.
/// libdivide supports only UInt32 and UInt64.
using LibdivideT = std::conditional_t<sizeof(T) <= 4, UInt32, UInt64>;
libdivide::divider<LibdivideT> divider(static_cast<LibdivideT>(gcd_divider));
cur_source = source;
@ -126,8 +120,6 @@ void compressDataForType(const char * source, UInt32 source_size, char * dest)
template <typename T>
void decompressDataForType(const char * source, UInt32 source_size, char * dest, UInt32 output_size)
{
const char * const output_end = dest + output_size;
if (source_size % sizeof(T) != 0)
throw Exception(ErrorCodes::CANNOT_DECOMPRESS, "Cannot GCD decompress, data size {} is not aligned to {}", source_size, sizeof(T));
@ -135,11 +127,14 @@ void decompressDataForType(const char * source, UInt32 source_size, char * dest,
throw Exception(ErrorCodes::CANNOT_DECOMPRESS, "Cannot GCD decompress, data size {} is less than {}", source_size, sizeof(T));
const char * const source_end = source + source_size;
const char * const dest_end = dest + output_size;
const T gcd_multiplier = unalignedLoad<T>(source);
source += sizeof(T);
while (source < source_end)
{
if (dest + sizeof(T) > output_end) [[unlikely]]
if (dest + sizeof(T) > dest_end) [[unlikely]]
throw Exception(ErrorCodes::CANNOT_DECOMPRESS, "Cannot decompress the data");
unalignedStore<T>(dest, unalignedLoad<T>(source) * gcd_multiplier);

View File

@ -268,6 +268,16 @@ ReadSettings Context::getReadSettings() const
return ReadSettings{};
}
ResourceManagerPtr Context::getResourceManager() const
{
return nullptr;
}
ClassifierPtr Context::getWorkloadClassifier() const
{
return nullptr;
}
void Context::initializeKeeperDispatcher([[maybe_unused]] bool start_async) const
{
const auto & config_ref = getConfigRef();

View File

@ -13,6 +13,7 @@
#include <Core/BackgroundSchedulePool.h>
#include <IO/AsyncReadCounters.h>
#include <IO/IResourceManager.h>
#include <Poco/Util/Application.h>
@ -118,6 +119,10 @@ public:
ReadSettings getReadSettings() const;
/// Resource management related
ResourceManagerPtr getResourceManager() const;
ClassifierPtr getWorkloadClassifier() const;
std::shared_ptr<KeeperDispatcher> getKeeperDispatcher() const;
std::shared_ptr<KeeperDispatcher> tryGetKeeperDispatcher() const;
void initializeKeeperDispatcher(bool start_async) const;

View File

@ -309,6 +309,9 @@ class IColumn;
\
M(Bool, partial_result_on_first_cancel, false, "Allows query to return a partial result after cancel.", 0) \
\
M(Milliseconds, partial_result_update_duration_ms, 0, "Interval (in milliseconds) for sending updates with partial data about the result table to the client (in interactive mode) during query execution. Setting to 0 disables partial results. Only supported for single-threaded GROUP BY without key, ORDER BY, LIMIT and OFFSET.", 0) \
M(UInt64, max_rows_in_partial_result, 10, "Maximum rows to show in the partial result after every real-time update while the query runs (use partial result limit + OFFSET as a value in case of OFFSET in the query).", 0) \
\
M(Bool, ignore_on_cluster_for_replicated_udf_queries, false, "Ignore ON CLUSTER clause for replicated UDF management queries.", 0) \
M(Bool, ignore_on_cluster_for_replicated_access_entities_queries, false, "Ignore ON CLUSTER clause for replicated access entities management queries.", 0) \
/** Settings for testing hedged requests */ \
@ -1053,6 +1056,8 @@ class IColumn;
\
M(Bool, format_display_secrets_in_show_and_select, false, "Do not hide secrets in SHOW and SELECT queries.", IMPORTANT) \
M(Bool, regexp_dict_allow_hyperscan, true, "Allow regexp_tree dictionary using Hyperscan library.", 0) \
M(Bool, regexp_dict_flag_case_insensitive, false, "Use case-insensitive matching for a regexp_tree dictionary. Can be overridden in individual expressions with (?i) and (?-i).", 0) \
M(Bool, regexp_dict_flag_dotall, false, "Allow '.' to match newline characters for a regexp_tree dictionary.", 0) \
\
M(Bool, dictionary_use_async_executor, false, "Execute a pipeline for reading from a dictionary with several threads. It's supported only by DIRECT dictionary with CLICKHOUSE source.", 0) \
M(Bool, precise_float_parsing, false, "Prefer more precise (but slower) float parsing algorithm", 0) \
@ -1071,7 +1076,7 @@ DECLARE_SETTINGS_TRAITS_ALLOW_CUSTOM_SETTINGS(SettingsTraits, LIST_OF_SETTINGS)
/** Settings of query execution.
* These settings go to users.xml.
*/
struct Settings : public BaseSettings<SettingsTraits>, public IHints<2, Settings>
struct Settings : public BaseSettings<SettingsTraits>, public IHints<2>
{
Settings() = default;

View File

@ -117,6 +117,33 @@ Field DataTypeAggregateFunction::getDefault() const
return field;
}
bool DataTypeAggregateFunction::strictEquals(const DataTypePtr & lhs_state_type, const DataTypePtr & rhs_state_type)
{
const auto * lhs_state = typeid_cast<const DataTypeAggregateFunction *>(lhs_state_type.get());
const auto * rhs_state = typeid_cast<const DataTypeAggregateFunction *>(rhs_state_type.get());
if (!lhs_state || !rhs_state)
return false;
if (lhs_state->function->getName() != rhs_state->function->getName())
return false;
if (lhs_state->parameters.size() != rhs_state->parameters.size())
return false;
for (size_t i = 0; i < lhs_state->parameters.size(); ++i)
if (lhs_state->parameters[i] != rhs_state->parameters[i])
return false;
if (lhs_state->argument_types.size() != rhs_state->argument_types.size())
return false;
for (size_t i = 0; i < lhs_state->argument_types.size(); ++i)
if (!lhs_state->argument_types[i]->equals(*rhs_state->argument_types[i]))
return false;
return true;
}
bool DataTypeAggregateFunction::equals(const IDataType & rhs) const
{
@ -126,34 +153,7 @@ bool DataTypeAggregateFunction::equals(const IDataType & rhs) const
auto lhs_state_type = function->getNormalizedStateType();
auto rhs_state_type = typeid_cast<const DataTypeAggregateFunction &>(rhs).function->getNormalizedStateType();
if (typeid(lhs_state_type.get()) != typeid(rhs_state_type.get()))
return false;
if (const auto * lhs_state = typeid_cast<const DataTypeAggregateFunction *>(lhs_state_type.get()))
{
const auto & rhs_state = typeid_cast<const DataTypeAggregateFunction &>(*rhs_state_type);
if (lhs_state->function->getName() != rhs_state.function->getName())
return false;
if (lhs_state->parameters.size() != rhs_state.parameters.size())
return false;
for (size_t i = 0; i < lhs_state->parameters.size(); ++i)
if (lhs_state->parameters[i] != rhs_state.parameters[i])
return false;
if (lhs_state->argument_types.size() != rhs_state.argument_types.size())
return false;
for (size_t i = 0; i < lhs_state->argument_types.size(); ++i)
if (!lhs_state->argument_types[i]->equals(*rhs_state.argument_types[i]))
return false;
return true;
}
return lhs_state_type->equals(*rhs_state_type);
return strictEquals(lhs_state_type, rhs_state_type);
}

View File

@ -60,6 +60,7 @@ public:
Field getDefault() const override;
static bool strictEquals(const DataTypePtr & lhs_state_type, const DataTypePtr & rhs_state_type);
bool equals(const IDataType & rhs) const override;
bool isParametric() const override { return true; }

View File

@ -13,7 +13,7 @@ namespace ErrorCodes
}
template <typename T>
class EnumValues : public IHints<1, EnumValues<T>>
class EnumValues : public IHints<>
{
public:
using Value = std::pair<std::string, T>;

View File

@ -136,8 +136,17 @@ DataTypePtr FieldToDataType<on_error>::operator() (const Array & x) const
DataTypes element_types;
element_types.reserve(x.size());
bool has_signed_int = false;
bool uint64_convert_possible = true;
for (const Field & elem : x)
element_types.emplace_back(applyVisitor(*this, elem));
{
DataTypePtr type = applyVisitor(*this, elem);
element_types.emplace_back(type);
checkUInt64ToIn64Conversion(has_signed_int, uint64_convert_possible, type, elem);
}
if (has_signed_int && uint64_convert_possible)
convertUInt64ToInt64IfPossible(element_types);
return std::make_shared<DataTypeArray>(getLeastSupertype<on_error>(element_types));
}
@ -165,14 +174,28 @@ DataTypePtr FieldToDataType<on_error>::operator() (const Map & map) const
key_types.reserve(map.size());
value_types.reserve(map.size());
bool k_has_signed_int = false;
bool k_uint64_convert_possible = true;
bool v_has_signed_int = false;
bool v_uint64_convert_possible = true;
for (const auto & elem : map)
{
const auto & tuple = elem.safeGet<const Tuple &>();
assert(tuple.size() == 2);
key_types.push_back(applyVisitor(*this, tuple[0]));
value_types.push_back(applyVisitor(*this, tuple[1]));
DataTypePtr k_type = applyVisitor(*this, tuple[0]);
key_types.push_back(k_type);
checkUInt64ToIn64Conversion(k_has_signed_int, k_uint64_convert_possible, k_type, tuple[0]);
DataTypePtr v_type = applyVisitor(*this, tuple[1]);
value_types.push_back(v_type);
checkUInt64ToIn64Conversion(v_has_signed_int, v_uint64_convert_possible, v_type, tuple[1]);
}
if (k_has_signed_int && k_uint64_convert_possible)
convertUInt64ToInt64IfPossible(key_types);
if (v_has_signed_int && v_uint64_convert_possible)
convertUInt64ToInt64IfPossible(value_types);
return std::make_shared<DataTypeMap>(
getLeastSupertype<on_error>(key_types),
getLeastSupertype<on_error>(value_types));
@ -204,6 +227,28 @@ DataTypePtr FieldToDataType<on_error>::operator()(const bool &) const
return DataTypeFactory::instance().get("Bool");
}
template <LeastSupertypeOnError on_error>
void FieldToDataType<on_error>::checkUInt64ToIn64Conversion(bool & has_signed_int, bool & uint64_convert_possible, const DataTypePtr & type, const Field & elem) const
{
if (uint64_convert_possible)
{
bool is_native_int = WhichDataType(type).isNativeInt();
if (is_native_int)
has_signed_int |= is_native_int;
else if (type->getTypeId() == TypeIndex::UInt64)
uint64_convert_possible &= (elem.template get<UInt64>() <= std::numeric_limits<Int64>::max());
}
}
template <LeastSupertypeOnError on_error>
void FieldToDataType<on_error>::convertUInt64ToInt64IfPossible(DataTypes & data_types) const
{
for (auto& type : data_types)
if (type->getTypeId() == TypeIndex::UInt64)
type = std::make_shared<DataTypeInt64>();
}
template class FieldToDataType<LeastSupertypeOnError::Throw>;
template class FieldToDataType<LeastSupertypeOnError::String>;
template class FieldToDataType<LeastSupertypeOnError::Null>;

View File

@ -45,6 +45,16 @@ public:
DataTypePtr operator() (const UInt256 & x) const;
DataTypePtr operator() (const Int256 & x) const;
DataTypePtr operator() (const bool & x) const;
private:
// The conditions for converting UInt64 to Int64 are:
// 1. The existence of Int.
// 2. The existence of UInt64, and the UInt64 value must be <= Int64.max.
void checkUInt64ToIn64Conversion(bool& has_signed_int, bool& uint64_convert_possible, const DataTypePtr & type, const Field & elem) const;
// Convert the UInt64 type to Int64 in order to cover other signed_integer types
// and obtain the least super type of all ints.
void convertUInt64ToInt64IfPossible(DataTypes & data_types) const;
};
}

View File

@ -493,7 +493,10 @@ void SerializationArray::deserializeText(IColumn & column, ReadBuffer & istr, co
deserializeTextImpl(column, istr,
[&](IColumn & nested_column)
{
nested->deserializeTextQuoted(nested_column, istr, settings);
if (settings.null_as_default)
SerializationNullable::deserializeTextQuotedImpl(nested_column, istr, settings, nested);
else
nested->deserializeTextQuoted(nested_column, istr, settings);
}, false);
if (whole && !istr.eof())
@ -604,7 +607,10 @@ void SerializationArray::deserializeTextCSV(IColumn & column, ReadBuffer & istr,
deserializeTextImpl(column, rb,
[&](IColumn & nested_column)
{
nested->deserializeTextCSV(nested_column, rb, settings);
if (settings.null_as_default)
SerializationNullable::deserializeTextCSVImpl(nested_column, rb, settings, nested);
else
nested->deserializeTextCSV(nested_column, rb, settings);
}, true);
}
else
@ -612,7 +618,10 @@ void SerializationArray::deserializeTextCSV(IColumn & column, ReadBuffer & istr,
deserializeTextImpl(column, rb,
[&](IColumn & nested_column)
{
nested->deserializeTextQuoted(nested_column, rb, settings);
if (settings.null_as_default)
SerializationNullable::deserializeTextQuotedImpl(nested_column, rb, settings, nested);
else
nested->deserializeTextQuoted(nested_column, rb, settings);
}, true);
}
}

Some files were not shown because too many files have changed in this diff Show More