mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-11-23 08:02:02 +00:00
Merge branch 'master' into json-object-as-tuple-inference
This commit is contained in:
commit
acadda1665
2
contrib/abseil-cpp
vendored
2
contrib/abseil-cpp
vendored
@ -1 +1 @@
|
||||
Subproject commit 215105818dfde3174fe799600bb0f3cae233d0bf
|
||||
Subproject commit 5655528c41830f733160de4fb0b99073841bae9e
|
@ -1,5 +1,5 @@
|
||||
set(ABSL_ROOT_DIR "${ClickHouse_SOURCE_DIR}/contrib/abseil-cpp")
|
||||
set(BUILD_TESTING OFF)
|
||||
|
||||
set(ABSL_PROPAGATE_CXX_STD ON)
|
||||
add_subdirectory("${ABSL_ROOT_DIR}" "${ClickHouse_BINARY_DIR}/contrib/abseil-cpp")
|
||||
|
||||
|
@ -1,14 +1,3 @@
|
||||
# Copyright 2015 The RE2 Authors. All Rights Reserved.
|
||||
# Use of this source code is governed by a BSD-style
|
||||
# license that can be found in the LICENSE file.
|
||||
|
||||
# This file was edited for ClickHouse
|
||||
|
||||
string(FIND ${CMAKE_CURRENT_BINARY_DIR} " " _have_space)
|
||||
if(_have_space GREATER 0)
|
||||
message(FATAL_ERROR "Using spaces in build path [${CMAKE_CURRENT_BINARY_DIR}] highly not recommended. Library re2st will be disabled.")
|
||||
endif()
|
||||
|
||||
set(SRC_DIR "${ClickHouse_SOURCE_DIR}/contrib/re2")
|
||||
|
||||
set(RE2_SOURCES
|
||||
@ -35,33 +24,9 @@ set(RE2_SOURCES
|
||||
${SRC_DIR}/util/rune.cc
|
||||
${SRC_DIR}/util/strutil.cc
|
||||
)
|
||||
add_library(re2 ${RE2_SOURCES})
|
||||
target_include_directories(re2 PUBLIC "${SRC_DIR}")
|
||||
target_link_libraries(re2 ch_contrib::abseil_str_format)
|
||||
|
||||
# Building re2 which is thread-safe and re2_st which is not.
|
||||
# re2 changes its state during matching of regular expression, e.g. creates temporary DFA.
|
||||
# It uses RWLock to process the same regular expression object from different threads.
|
||||
# In order to avoid redundant locks in some cases, we use not thread-safe version of the library (re2_st).
|
||||
add_library(_re2 ${RE2_SOURCES})
|
||||
target_include_directories(_re2 PUBLIC "${SRC_DIR}")
|
||||
target_link_libraries(_re2 ch_contrib::abseil_str_format)
|
||||
|
||||
add_library(re2_st ${RE2_SOURCES})
|
||||
target_compile_definitions (re2_st PRIVATE NDEBUG NO_THREADS re2=re2_st)
|
||||
target_include_directories (re2_st PRIVATE .)
|
||||
target_include_directories (re2_st SYSTEM PUBLIC ${CMAKE_CURRENT_BINARY_DIR})
|
||||
target_include_directories (re2_st SYSTEM BEFORE PUBLIC ${SRC_DIR})
|
||||
target_link_libraries (re2_st ch_contrib::abseil_str_format)
|
||||
|
||||
file (MAKE_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/re2_st)
|
||||
foreach (FILENAME filtered_re2.h re2.h set.h stringpiece.h)
|
||||
add_custom_command (OUTPUT "${CMAKE_CURRENT_BINARY_DIR}/re2_st/${FILENAME}"
|
||||
COMMAND ${CMAKE_COMMAND} -DSOURCE_FILENAME="${SRC_DIR}/re2/${FILENAME}"
|
||||
-DTARGET_FILENAME="${CMAKE_CURRENT_BINARY_DIR}/re2_st/${FILENAME}"
|
||||
-P "${CMAKE_CURRENT_SOURCE_DIR}/re2_transform.cmake"
|
||||
COMMENT "Creating ${FILENAME} for re2_st library.")
|
||||
add_custom_target (transform_${FILENAME} DEPENDS "${CMAKE_CURRENT_BINARY_DIR}/re2_st/${FILENAME}")
|
||||
add_dependencies (re2_st transform_${FILENAME})
|
||||
endforeach ()
|
||||
|
||||
# NOTE: you should not change name of library here, since it is used to generate required header (see above)
|
||||
add_library(ch_contrib::re2 ALIAS re2)
|
||||
add_library(ch_contrib::re2_st ALIAS re2_st)
|
||||
add_library(ch_contrib::re2 ALIAS _re2)
|
||||
|
@ -1,10 +0,0 @@
|
||||
file (READ ${SOURCE_FILENAME} CONTENT)
|
||||
string (REGEX REPLACE "using re2::RE2;" "" CONTENT "${CONTENT}")
|
||||
string (REGEX REPLACE "using re2::LazyRE2;" "" CONTENT "${CONTENT}")
|
||||
string (REGEX REPLACE "namespace re2 {" "namespace re2_st {" CONTENT "${CONTENT}")
|
||||
string (REGEX REPLACE "re2::" "re2_st::" CONTENT "${CONTENT}")
|
||||
string (REGEX REPLACE "\"re2/" "\"re2_st/" CONTENT "${CONTENT}")
|
||||
string (REGEX REPLACE "(.\\*?_H)" "\\1_ST" CONTENT "${CONTENT}")
|
||||
string (REGEX REPLACE "#define MUTEX_IS_PTHREAD_RWLOCK" "#undef MUTEX_IS_PTHREAD_RWLOCK" CONTENT "${CONTENT}")
|
||||
string (REGEX REPLACE "typedef std::mutex MutexType;" "struct MutexType { void lock() {} void unlock() {} };" CONTENT "${CONTENT}")
|
||||
file (WRITE ${TARGET_FILENAME} "${CONTENT}")
|
2
contrib/s2geometry
vendored
2
contrib/s2geometry
vendored
@ -1 +1 @@
|
||||
Subproject commit 4a7ebd5da04cb6c9ea38bbf5914a9f8f3c768564
|
||||
Subproject commit 0547c38371777a1c1c8be263a6f05c3bf71bb05b
|
@ -7,12 +7,6 @@ endif()
|
||||
|
||||
set(S2_SOURCE_DIR "${ClickHouse_SOURCE_DIR}/contrib/s2geometry/src")
|
||||
|
||||
set(ABSL_SOURCE_DIR "${ClickHouse_SOURCE_DIR}/contrib/abseil-cpp")
|
||||
if(NOT EXISTS "${ABSL_SOURCE_DIR}/CMakeLists.txt")
|
||||
message(FATAL_ERROR " submodule contrib/abseil-cpp is missing. To fix try run: \n git submodule update --init --recursive")
|
||||
endif()
|
||||
|
||||
|
||||
set(S2_SRCS
|
||||
"${S2_SOURCE_DIR}/s2/encoded_s2cell_id_vector.cc"
|
||||
"${S2_SOURCE_DIR}/s2/encoded_s2point_vector.cc"
|
||||
@ -58,7 +52,9 @@ set(S2_SRCS
|
||||
"${S2_SOURCE_DIR}/s2/s2edge_crossings.cc"
|
||||
"${S2_SOURCE_DIR}/s2/s2edge_distances.cc"
|
||||
"${S2_SOURCE_DIR}/s2/s2edge_tessellator.cc"
|
||||
"${S2_SOURCE_DIR}/s2/s2error.cc"
|
||||
"${S2_SOURCE_DIR}/s2/s2furthest_edge_query.cc"
|
||||
"${S2_SOURCE_DIR}/s2/s2hausdorff_distance_query.cc"
|
||||
"${S2_SOURCE_DIR}/s2/s2latlng.cc"
|
||||
"${S2_SOURCE_DIR}/s2/s2latlng_rect.cc"
|
||||
"${S2_SOURCE_DIR}/s2/s2latlng_rect_bounder.cc"
|
||||
@ -93,59 +89,58 @@ set(S2_SRCS
|
||||
"${S2_SOURCE_DIR}/s2/s2shape_index_buffered_region.cc"
|
||||
"${S2_SOURCE_DIR}/s2/s2shape_index_measures.cc"
|
||||
"${S2_SOURCE_DIR}/s2/s2shape_measures.cc"
|
||||
"${S2_SOURCE_DIR}/s2/s2shape_nesting_query.cc"
|
||||
"${S2_SOURCE_DIR}/s2/s2shapeutil_build_polygon_boundaries.cc"
|
||||
"${S2_SOURCE_DIR}/s2/s2shapeutil_coding.cc"
|
||||
"${S2_SOURCE_DIR}/s2/s2shapeutil_contains_brute_force.cc"
|
||||
"${S2_SOURCE_DIR}/s2/s2shapeutil_conversion.cc"
|
||||
"${S2_SOURCE_DIR}/s2/s2shapeutil_edge_iterator.cc"
|
||||
"${S2_SOURCE_DIR}/s2/s2shapeutil_get_reference_point.cc"
|
||||
"${S2_SOURCE_DIR}/s2/s2shapeutil_range_iterator.cc"
|
||||
"${S2_SOURCE_DIR}/s2/s2shapeutil_visit_crossing_edge_pairs.cc"
|
||||
"${S2_SOURCE_DIR}/s2/s2text_format.cc"
|
||||
"${S2_SOURCE_DIR}/s2/s2wedge_relations.cc"
|
||||
"${S2_SOURCE_DIR}/s2/s2winding_operation.cc"
|
||||
"${S2_SOURCE_DIR}/s2/strings/serialize.cc"
|
||||
"${S2_SOURCE_DIR}/s2/util/bits/bit-interleave.cc"
|
||||
"${S2_SOURCE_DIR}/s2/util/bits/bits.cc"
|
||||
"${S2_SOURCE_DIR}/s2/util/coding/coder.cc"
|
||||
"${S2_SOURCE_DIR}/s2/util/coding/varint.cc"
|
||||
"${S2_SOURCE_DIR}/s2/util/math/exactfloat/exactfloat.cc"
|
||||
"${S2_SOURCE_DIR}/s2/util/math/mathutil.cc"
|
||||
"${S2_SOURCE_DIR}/s2/util/units/length-units.cc"
|
||||
|
||||
)
|
||||
|
||||
add_library(_s2 ${S2_SRCS})
|
||||
add_library(ch_contrib::s2 ALIAS _s2)
|
||||
|
||||
set_property(TARGET _s2 PROPERTY CXX_STANDARD 17)
|
||||
|
||||
if (TARGET OpenSSL::SSL)
|
||||
target_link_libraries(_s2 PRIVATE OpenSSL::Crypto OpenSSL::SSL)
|
||||
endif()
|
||||
|
||||
# Copied from contrib/s2geometry/CMakeLists
|
||||
target_link_libraries(_s2 PRIVATE
|
||||
absl::base
|
||||
absl::btree
|
||||
absl::config
|
||||
absl::core_headers
|
||||
absl::dynamic_annotations
|
||||
absl::endian
|
||||
absl::fixed_array
|
||||
absl::flat_hash_map
|
||||
absl::flat_hash_set
|
||||
absl::hash
|
||||
absl::inlined_vector
|
||||
absl::int128
|
||||
absl::log_severity
|
||||
absl::memory
|
||||
absl::span
|
||||
absl::str_format
|
||||
absl::strings
|
||||
absl::type_traits
|
||||
absl::utility
|
||||
)
|
||||
absl::base
|
||||
absl::btree
|
||||
absl::check
|
||||
absl::config
|
||||
absl::core_headers
|
||||
absl::dynamic_annotations
|
||||
absl::endian
|
||||
absl::fixed_array
|
||||
absl::flags
|
||||
absl::flat_hash_map
|
||||
absl::flat_hash_set
|
||||
absl::hash
|
||||
absl::inlined_vector
|
||||
absl::int128
|
||||
absl::log
|
||||
absl::log_severity
|
||||
absl::memory
|
||||
absl::span
|
||||
absl::status
|
||||
absl::str_format
|
||||
absl::strings
|
||||
absl::type_traits
|
||||
absl::utility
|
||||
)
|
||||
|
||||
target_include_directories(_s2 SYSTEM BEFORE PUBLIC "${S2_SOURCE_DIR}/")
|
||||
target_include_directories(_s2 SYSTEM PUBLIC "${ABSL_SOURCE_DIR}")
|
||||
|
@ -325,7 +325,6 @@ def parse_env_variables(
|
||||
|
||||
if additional_pkgs:
|
||||
# NOTE: This are the env for packages/build script
|
||||
result.append("MAKE_APK=true")
|
||||
result.append("MAKE_RPM=true")
|
||||
result.append("MAKE_TGZ=true")
|
||||
|
||||
|
@ -88,5 +88,10 @@ RUN npm install -g azurite \
|
||||
COPY run.sh /
|
||||
COPY setup_minio.sh /
|
||||
COPY setup_hdfs_minicluster.sh /
|
||||
COPY attach_gdb.lib /
|
||||
COPY utils.lib /
|
||||
|
||||
# We store stress_tests.lib in stateless image to avoid duplication of this file in stress and upgrade tests
|
||||
COPY stress_tests.lib /
|
||||
|
||||
CMD ["/bin/bash", "/run.sh"]
|
||||
|
@ -1,6 +1,6 @@
|
||||
#!/bin/bash
|
||||
|
||||
source /usr/share/clickhouse-test/ci/utils.lib
|
||||
source /utils.lib
|
||||
|
||||
function attach_gdb_to_clickhouse()
|
||||
{
|
@ -22,10 +22,10 @@ dpkg -i package_folder/clickhouse-client_*.deb
|
||||
ln -s /usr/share/clickhouse-test/clickhouse-test /usr/bin/clickhouse-test
|
||||
|
||||
# shellcheck disable=SC1091
|
||||
source /usr/share/clickhouse-test/ci/attach_gdb.lib || true # FIXME: to not break old builds, clean on 2023-09-01
|
||||
source /attach_gdb.lib
|
||||
|
||||
# shellcheck disable=SC1091
|
||||
source /usr/share/clickhouse-test/ci/utils.lib || true # FIXME: to not break old builds, clean on 2023-09-01
|
||||
source /utils.lib
|
||||
|
||||
# install test configs
|
||||
/usr/share/clickhouse-test/config/install.sh
|
||||
|
@ -16,8 +16,8 @@ ln -s /usr/share/clickhouse-test/clickhouse-test /usr/bin/clickhouse-test
|
||||
|
||||
# Stress tests and upgrade check uses similar code that was placed
|
||||
# in a separate bash library. See tests/ci/stress_tests.lib
|
||||
source /usr/share/clickhouse-test/ci/attach_gdb.lib
|
||||
source /usr/share/clickhouse-test/ci/stress_tests.lib
|
||||
source /attach_gdb.lib
|
||||
source /stress_tests.lib
|
||||
|
||||
install_packages package_folder
|
||||
|
||||
|
@ -16,8 +16,8 @@ ln -s /usr/share/clickhouse-test/ci/get_previous_release_tag.py /usr/bin/get_pre
|
||||
|
||||
# Stress tests and upgrade check uses similar code that was placed
|
||||
# in a separate bash library. See tests/ci/stress_tests.lib
|
||||
source /usr/share/clickhouse-test/ci/attach_gdb.lib
|
||||
source /usr/share/clickhouse-test/ci/stress_tests.lib
|
||||
source /attach_gdb.lib
|
||||
source /stress_tests.lib
|
||||
|
||||
azurite-blob --blobHost 0.0.0.0 --blobPort 10000 --debug /azurite_log &
|
||||
./setup_minio.sh stateless # to have a proper environment
|
||||
|
@ -8,7 +8,7 @@ sidebar_label: EmbeddedRocksDB
|
||||
|
||||
This engine allows integrating ClickHouse with [rocksdb](http://rocksdb.org/).
|
||||
|
||||
## Creating a Table {#table_engine-EmbeddedRocksDB-creating-a-table}
|
||||
## Creating a Table {#creating-a-table}
|
||||
|
||||
``` sql
|
||||
CREATE TABLE [IF NOT EXISTS] [db.]table_name [ON CLUSTER cluster]
|
||||
@ -85,7 +85,7 @@ You can also change any [rocksdb options](https://github.com/facebook/rocksdb/wi
|
||||
</rocksdb>
|
||||
```
|
||||
|
||||
## Supported operations {#table_engine-EmbeddedRocksDB-supported-operations}
|
||||
## Supported operations {#supported-operations}
|
||||
|
||||
### Inserts
|
||||
|
||||
|
@ -14,7 +14,7 @@ Kafka lets you:
|
||||
- Organize fault-tolerant storage.
|
||||
- Process streams as they become available.
|
||||
|
||||
## Creating a Table {#table_engine-kafka-creating-a-table}
|
||||
## Creating a Table {#creating-a-table}
|
||||
|
||||
``` sql
|
||||
CREATE TABLE [IF NOT EXISTS] [db.]table_name [ON CLUSTER cluster]
|
||||
|
@ -13,7 +13,7 @@ This engine allows integrating ClickHouse with [NATS](https://nats.io/).
|
||||
- Publish or subscribe to message subjects.
|
||||
- Process new messages as they become available.
|
||||
|
||||
## Creating a Table {#table_engine-redisstreams-creating-a-table}
|
||||
## Creating a Table {#creating-a-table}
|
||||
|
||||
``` sql
|
||||
CREATE TABLE [IF NOT EXISTS] [db.]table_name [ON CLUSTER cluster]
|
||||
|
@ -13,7 +13,7 @@ This engine allows integrating ClickHouse with [RabbitMQ](https://www.rabbitmq.c
|
||||
- Publish or subscribe to data flows.
|
||||
- Process streams as they become available.
|
||||
|
||||
## Creating a Table {#table_engine-rabbitmq-creating-a-table}
|
||||
## Creating a Table {#creating-a-table}
|
||||
|
||||
``` sql
|
||||
CREATE TABLE [IF NOT EXISTS] [db.]table_name [ON CLUSTER cluster]
|
||||
|
@ -63,7 +63,7 @@ SETTINGS
|
||||
mode = 'ordered';
|
||||
```
|
||||
|
||||
## Settings {#s3queue-settings}
|
||||
## Settings {#settings}
|
||||
|
||||
### mode {#mode}
|
||||
|
||||
@ -93,7 +93,7 @@ Possible values:
|
||||
|
||||
Default value: `/`.
|
||||
|
||||
### s3queue_loading_retries {#s3queue_loading_retries}
|
||||
### s3queue_loading_retries {#loading_retries}
|
||||
|
||||
Retry file loading up to specified number of times. By default, there are no retries.
|
||||
Possible values:
|
||||
@ -102,7 +102,7 @@ Possible values:
|
||||
|
||||
Default value: `0`.
|
||||
|
||||
### s3queue_polling_min_timeout_ms {#s3queue_polling_min_timeout_ms}
|
||||
### s3queue_polling_min_timeout_ms {#polling_min_timeout_ms}
|
||||
|
||||
Minimal timeout before next polling (in milliseconds).
|
||||
|
||||
@ -112,7 +112,7 @@ Possible values:
|
||||
|
||||
Default value: `1000`.
|
||||
|
||||
### s3queue_polling_max_timeout_ms {#s3queue_polling_max_timeout_ms}
|
||||
### s3queue_polling_max_timeout_ms {#polling_max_timeout_ms}
|
||||
|
||||
Maximum timeout before next polling (in milliseconds).
|
||||
|
||||
@ -122,7 +122,7 @@ Possible values:
|
||||
|
||||
Default value: `10000`.
|
||||
|
||||
### s3queue_polling_backoff_ms {#s3queue_polling_backoff_ms}
|
||||
### s3queue_polling_backoff_ms {#polling_backoff_ms}
|
||||
|
||||
Polling backoff (in milliseconds).
|
||||
|
||||
@ -132,7 +132,7 @@ Possible values:
|
||||
|
||||
Default value: `0`.
|
||||
|
||||
### s3queue_tracked_files_limit {#s3queue_tracked_files_limit}
|
||||
### s3queue_tracked_files_limit {#tracked_files_limit}
|
||||
|
||||
Allows to limit the number of Zookeeper nodes if the 'unordered' mode is used, does nothing for 'ordered' mode.
|
||||
If limit reached the oldest processed files will be deleted from ZooKeeper node and processed again.
|
||||
@ -143,7 +143,7 @@ Possible values:
|
||||
|
||||
Default value: `1000`.
|
||||
|
||||
### s3queue_tracked_file_ttl_sec {#s3queue_tracked_file_ttl_sec}
|
||||
### s3queue_tracked_file_ttl_sec {#tracked_file_ttl_sec}
|
||||
|
||||
Maximum number of seconds to store processed files in ZooKeeper node (store forever by default) for 'unordered' mode, does nothing for 'ordered' mode.
|
||||
After the specified number of seconds, the file will be re-imported.
|
||||
@ -154,7 +154,7 @@ Possible values:
|
||||
|
||||
Default value: `0`.
|
||||
|
||||
### s3queue_polling_size {#s3queue_polling_size}
|
||||
### s3queue_polling_size {#polling_size}
|
||||
|
||||
Maximum files to fetch from S3 with SELECT or in background task.
|
||||
Engine takes files for processing from S3 in batches.
|
||||
|
@ -1222,7 +1222,6 @@ Configuration markup:
|
||||
<account_name>account</account_name>
|
||||
<account_key>pass123</account_key>
|
||||
<metadata_path>/var/lib/clickhouse/disks/blob_storage_disk/</metadata_path>
|
||||
<cache_enabled>true</cache_enabled>
|
||||
<cache_path>/var/lib/clickhouse/disks/blob_storage_disk/cache/</cache_path>
|
||||
<skip_access_check>false</skip_access_check>
|
||||
</blob_storage_disk>
|
||||
@ -1250,8 +1249,6 @@ Limit parameters (mainly for internal usage):
|
||||
|
||||
Other parameters:
|
||||
* `metadata_path` - Path on local FS to store metadata files for Blob Storage. Default value is `/var/lib/clickhouse/disks/<disk_name>/`.
|
||||
* `cache_enabled` - Allows to cache mark and index files on local FS. Default value is `true`.
|
||||
* `cache_path` - Path on local FS where to store cached mark and index files. Default value is `/var/lib/clickhouse/disks/<disk_name>/cache/`.
|
||||
* `skip_access_check` - If true, disk access checks will not be performed on disk start-up. Default value is `false`.
|
||||
* `read_resource` — Resource name to be used for [scheduling](/docs/en/operations/workload-scheduling.md) of read requests to this disk. Default value is empty string (IO scheduling is not enabled for this disk).
|
||||
* `write_resource` — Resource name to be used for [scheduling](/docs/en/operations/workload-scheduling.md) of write requests to this disk. Default value is empty string (IO scheduling is not enabled for this disk).
|
||||
|
@ -20,7 +20,7 @@ For example:
|
||||
|
||||
where path can be any other valid ZooKeeper path.
|
||||
|
||||
## Creating a Table {#table_engine-KeeperMap-creating-a-table}
|
||||
## Creating a Table {#creating-a-table}
|
||||
|
||||
``` sql
|
||||
CREATE TABLE [IF NOT EXISTS] [db.]table_name [ON CLUSTER cluster]
|
||||
@ -74,7 +74,7 @@ If multiple tables are created on the same ZooKeeper path, the values are persis
|
||||
As a result, it is possible to use `ON CLUSTER` clause when creating the table and sharing the data from multiple ClickHouse instances.
|
||||
Of course, it's possible to manually run `CREATE TABLE` with same path on unrelated ClickHouse instances to have same data sharing effect.
|
||||
|
||||
## Supported operations {#table_engine-KeeperMap-supported-operations}
|
||||
## Supported operations {#supported-operations}
|
||||
|
||||
### Inserts
|
||||
|
||||
|
@ -43,6 +43,12 @@ SETTINGS use_query_cache = true;
|
||||
will store the query result in the query cache. Subsequent executions of the same query (also with parameter `use_query_cache = true`) will
|
||||
read the computed result from the cache and return it immediately.
|
||||
|
||||
:::note
|
||||
Setting `use_query_cache` and all other query-cache-related settings only take an effect on stand-alone `SELECT` statements. In particular,
|
||||
the results of `SELECT`s to views created by `CREATE VIEW AS SELECT [...] SETTINGS use_query_cache = true` are not cached unless the `SELECT`
|
||||
statement runs with `SETTINGS use_query_cache = true`.
|
||||
:::
|
||||
|
||||
The way the cache is utilized can be configured in more detail using settings [enable_writes_to_query_cache](settings/settings.md#enable-writes-to-query-cache)
|
||||
and [enable_reads_from_query_cache](settings/settings.md#enable-reads-from-query-cache) (both `true` by default). The former setting
|
||||
controls whether query results are stored in the cache, whereas the latter setting determines if the database should try to retrieve query
|
||||
@ -84,7 +90,7 @@ It is also possible to limit the cache usage of individual users using [settings
|
||||
constraints](settings/constraints-on-settings.md). More specifically, you can restrict the maximum amount of memory (in bytes) a user may
|
||||
allocate in the query cache and the the maximum number of stored query results. For that, first provide configurations
|
||||
[query_cache_max_size_in_bytes](settings/settings.md#query-cache-max-size-in-bytes) and
|
||||
[query_cache_max_entries](settings/settings.md#query-cache-size-max-items) in a user profile in `users.xml`, then make both settings
|
||||
[query_cache_max_entries](settings/settings.md#query-cache-size-max-entries) in a user profile in `users.xml`, then make both settings
|
||||
readonly:
|
||||
|
||||
``` xml
|
||||
@ -134,10 +140,26 @@ block granularity when query results are later served from the query cache.
|
||||
|
||||
As a result, the query cache stores for each query multiple (partial)
|
||||
result blocks. While this behavior is a good default, it can be suppressed using setting
|
||||
[query_cache_squash_partial_query_results](settings/settings.md#query-cache-squash-partial-query-results).
|
||||
[query_cache_squash_partial_results](settings/settings.md#query-cache-squash-partial-results).
|
||||
|
||||
Also, results of queries with non-deterministic functions such as `rand()` and `now()` are not cached. This can be overruled using
|
||||
setting [query_cache_store_results_of_queries_with_nondeterministic_functions](settings/settings.md#query-cache-store-results-of-queries-with-nondeterministic-functions).
|
||||
Also, results of queries with non-deterministic functions are not cached. Such functions include
|
||||
- functions for accessing dictionaries: [`dictGet()`](../sql-reference/functions/ext-dict-functions.md#dictGet) etc.
|
||||
- [user-defined functions](../sql-reference/statements/create/function.md),
|
||||
- functions which return the current date or time: [`now()`](../sql-reference/functions/date-time-functions.md#now),
|
||||
[`today()`](../sql-reference/functions/date-time-functions.md#today),
|
||||
[`yesterday()`](../sql-reference/functions/date-time-functions.md#yesterday) etc.,
|
||||
- functions which return random values: [`randomString()`](../sql-reference/functions/random-functions.md#randomString),
|
||||
[`fuzzBits()`](../sql-reference/functions/random-functions.md#fuzzBits) etc.,
|
||||
- functions whose result depends on the size and order or the internal chunks used for query processing:
|
||||
[`nowInBlock()`](../sql-reference/functions/date-time-functions.md#nowInBlock) etc.,
|
||||
[`rowNumberInBlock()`](../sql-reference/functions/other-functions.md#rowNumberInBlock),
|
||||
[`runningDifference()`](../sql-reference/functions/other-functions.md#runningDifference),
|
||||
[`blockSize()`](../sql-reference/functions/other-functions.md#blockSize) etc.,
|
||||
- functions which depend on the environment: [`currentUser()`](../sql-reference/functions/other-functions.md#currentUser),
|
||||
[`queryID()`](../sql-reference/functions/other-functions.md#queryID),
|
||||
[`getMacro()`](../sql-reference/functions/other-functions.md#getMacro) etc.
|
||||
Caching of non-deterministic functions can be forced regardless using setting
|
||||
[query_cache_store_results_of_queries_with_nondeterministic_functions](settings/settings.md#query-cache-store-results-of-queries-with-nondeterministic-functions).
|
||||
|
||||
Finally, entries in the query cache are not shared between users due to security reasons. For example, user A must not be able to bypass a
|
||||
row policy on a table by running the same query as another user B for whom no such policy exists. However, if necessary, cache entries can
|
||||
|
@ -835,7 +835,7 @@ List of prefixes for [custom settings](../../operations/settings/index.md#custom
|
||||
|
||||
- [Custom settings](../../operations/settings/index.md#custom_settings)
|
||||
|
||||
## core_dump {#server_configuration_parameters-core_dump}
|
||||
## core_dump {#core_dump}
|
||||
|
||||
Configures soft limit for core dump file size.
|
||||
|
||||
@ -924,7 +924,7 @@ The path to the table in ZooKeeper.
|
||||
<default_replica_name>{replica}</default_replica_name>
|
||||
```
|
||||
|
||||
## dictionaries_config {#server_configuration_parameters-dictionaries_config}
|
||||
## dictionaries_config {#dictionaries_config}
|
||||
|
||||
The path to the config file for dictionaries.
|
||||
|
||||
@ -941,7 +941,7 @@ See also “[Dictionaries](../../sql-reference/dictionaries/index.md)”.
|
||||
<dictionaries_config>*_dictionary.xml</dictionaries_config>
|
||||
```
|
||||
|
||||
## user_defined_executable_functions_config {#server_configuration_parameters-user_defined_executable_functions_config}
|
||||
## user_defined_executable_functions_config {#user_defined_executable_functions_config}
|
||||
|
||||
The path to the config file for executable user defined functions.
|
||||
|
||||
@ -958,7 +958,7 @@ See also “[Executable User Defined Functions](../../sql-reference/functions/in
|
||||
<user_defined_executable_functions_config>*_function.xml</user_defined_executable_functions_config>
|
||||
```
|
||||
|
||||
## dictionaries_lazy_load {#server_configuration_parameters-dictionaries_lazy_load}
|
||||
## dictionaries_lazy_load {#dictionaries_lazy_load}
|
||||
|
||||
Lazy loading of dictionaries.
|
||||
|
||||
@ -974,7 +974,7 @@ The default is `true`.
|
||||
<dictionaries_lazy_load>true</dictionaries_lazy_load>
|
||||
```
|
||||
|
||||
## format_schema_path {#server_configuration_parameters-format_schema_path}
|
||||
## format_schema_path {#format_schema_path}
|
||||
|
||||
The path to the directory with the schemes for the input data, such as schemas for the [CapnProto](../../interfaces/formats.md#capnproto) format.
|
||||
|
||||
@ -985,7 +985,7 @@ The path to the directory with the schemes for the input data, such as schemas f
|
||||
<format_schema_path>format_schemas/</format_schema_path>
|
||||
```
|
||||
|
||||
## graphite {#server_configuration_parameters-graphite}
|
||||
## graphite {#graphite}
|
||||
|
||||
Sending data to [Graphite](https://github.com/graphite-project).
|
||||
|
||||
@ -1019,7 +1019,7 @@ You can configure multiple `<graphite>` clauses. For instance, you can use this
|
||||
</graphite>
|
||||
```
|
||||
|
||||
## graphite_rollup {#server_configuration_parameters-graphite-rollup}
|
||||
## graphite_rollup {#graphite-rollup}
|
||||
|
||||
Settings for thinning data for Graphite.
|
||||
|
||||
@ -1051,7 +1051,7 @@ For more details, see [GraphiteMergeTree](../../engines/table-engines/mergetree-
|
||||
|
||||
The port for connecting to the server over HTTP(s).
|
||||
|
||||
If `https_port` is specified, [openSSL](#server_configuration_parameters-openssl) must be configured.
|
||||
If `https_port` is specified, [openSSL](#openssl) must be configured.
|
||||
|
||||
If `http_port` is specified, the OpenSSL configuration is ignored even if it is set.
|
||||
|
||||
@ -1061,7 +1061,7 @@ If `http_port` is specified, the OpenSSL configuration is ignored even if it is
|
||||
<https_port>9999</https_port>
|
||||
```
|
||||
|
||||
## http_server_default_response {#server_configuration_parameters-http_server_default_response}
|
||||
## http_server_default_response {#http_server_default_response}
|
||||
|
||||
The page that is shown by default when you access the ClickHouse HTTP(s) server.
|
||||
The default value is “Ok.” (with a line feed at the end)
|
||||
@ -1086,7 +1086,7 @@ Expired time for HSTS in seconds. The default value is 0 means clickhouse disabl
|
||||
<hsts_max_age>600000</hsts_max_age>
|
||||
```
|
||||
|
||||
## include_from {#server_configuration_parameters-include_from}
|
||||
## include_from {#include_from}
|
||||
|
||||
The path to the file with substitutions.
|
||||
|
||||
@ -1222,7 +1222,7 @@ The number of seconds that ClickHouse waits for incoming requests before closing
|
||||
<keep_alive_timeout>10</keep_alive_timeout>
|
||||
```
|
||||
|
||||
## listen_host {#server_configuration_parameters-listen_host}
|
||||
## listen_host {#listen_host}
|
||||
|
||||
Restriction on hosts that requests can come from. If you want the server to answer all of them, specify `::`.
|
||||
|
||||
@ -1233,7 +1233,7 @@ Examples:
|
||||
<listen_host>127.0.0.1</listen_host>
|
||||
```
|
||||
|
||||
## listen_backlog {#server_configuration_parameters-listen_backlog}
|
||||
## listen_backlog {#listen_backlog}
|
||||
|
||||
Backlog (queue size of pending connections) of the listen socket.
|
||||
|
||||
@ -1253,7 +1253,7 @@ Examples:
|
||||
<listen_backlog>4096</listen_backlog>
|
||||
```
|
||||
|
||||
## logger {#server_configuration_parameters-logger}
|
||||
## logger {#logger}
|
||||
|
||||
Logging settings.
|
||||
|
||||
@ -1357,7 +1357,7 @@ Keys for syslog:
|
||||
Default value: `LOG_USER` if `address` is specified, `LOG_DAEMON` otherwise.
|
||||
- format – Message format. Possible values: `bsd` and `syslog.`
|
||||
|
||||
## send_crash_reports {#server_configuration_parameters-send_crash_reports}
|
||||
## send_crash_reports {#send_crash_reports}
|
||||
|
||||
Settings for opt-in sending crash reports to the ClickHouse core developers team via [Sentry](https://sentry.io).
|
||||
Enabling it, especially in pre-production environments, is highly appreciated.
|
||||
@ -1629,7 +1629,7 @@ Default value: `0.5`.
|
||||
|
||||
|
||||
|
||||
## merge_tree {#server_configuration_parameters-merge_tree}
|
||||
## merge_tree {#merge_tree}
|
||||
|
||||
Fine tuning for tables in the [MergeTree](../../engines/table-engines/mergetree-family/mergetree.md).
|
||||
|
||||
@ -1676,7 +1676,7 @@ To disable `metric_log` setting, you should create the following file `/etc/clic
|
||||
</clickhouse>
|
||||
```
|
||||
|
||||
## replicated_merge_tree {#server_configuration_parameters-replicated_merge_tree}
|
||||
## replicated_merge_tree {#replicated_merge_tree}
|
||||
|
||||
Fine tuning for tables in the [ReplicatedMergeTree](../../engines/table-engines/mergetree-family/mergetree.md).
|
||||
|
||||
@ -1692,7 +1692,7 @@ For more information, see the MergeTreeSettings.h header file.
|
||||
</replicated_merge_tree>
|
||||
```
|
||||
|
||||
## openSSL {#server_configuration_parameters-openssl}
|
||||
## openSSL {#openssl}
|
||||
|
||||
SSL client/server configuration.
|
||||
|
||||
@ -1751,7 +1751,7 @@ Keys for server/client settings:
|
||||
</openSSL>
|
||||
```
|
||||
|
||||
## part_log {#server_configuration_parameters-part-log}
|
||||
## part_log {#part-log}
|
||||
|
||||
Logging events that are associated with [MergeTree](../../engines/table-engines/mergetree-family/mergetree.md). For instance, adding or merging data. You can use the log to simulate merge algorithms and compare their characteristics. You can visualize the merge process.
|
||||
|
||||
@ -1791,7 +1791,7 @@ Default: false.
|
||||
</part_log>
|
||||
```
|
||||
|
||||
## path {#server_configuration_parameters-path}
|
||||
## path {#path}
|
||||
|
||||
The path to the directory containing data.
|
||||
|
||||
@ -1805,7 +1805,7 @@ The trailing slash is mandatory.
|
||||
<path>/var/lib/clickhouse/</path>
|
||||
```
|
||||
|
||||
## Prometheus {#server_configuration_parameters-prometheus}
|
||||
## Prometheus {#prometheus}
|
||||
|
||||
Exposing metrics data for scraping from [Prometheus](https://prometheus.io).
|
||||
|
||||
@ -1841,7 +1841,7 @@ Check (replace `127.0.0.1` with the IP addr or hostname of your ClickHouse serve
|
||||
curl 127.0.0.1:9363/metrics
|
||||
```
|
||||
|
||||
## query_log {#server_configuration_parameters-query-log}
|
||||
## query_log {#query-log}
|
||||
|
||||
Setting for logging queries received with the [log_queries=1](../../operations/settings/settings.md) setting.
|
||||
|
||||
@ -1911,7 +1911,7 @@ Data for the query cache is allocated in DRAM. If memory is scarce, make sure to
|
||||
</query_cache>
|
||||
```
|
||||
|
||||
## query_thread_log {#server_configuration_parameters-query_thread_log}
|
||||
## query_thread_log {#query_thread_log}
|
||||
|
||||
Setting for logging threads of queries received with the [log_query_threads=1](../../operations/settings/settings.md#settings-log-query-threads) setting.
|
||||
|
||||
@ -1953,7 +1953,7 @@ If the table does not exist, ClickHouse will create it. If the structure of the
|
||||
</query_thread_log>
|
||||
```
|
||||
|
||||
## query_views_log {#server_configuration_parameters-query_views_log}
|
||||
## query_views_log {#query_views_log}
|
||||
|
||||
Setting for logging views (live, materialized etc) dependant of queries received with the [log_query_views=1](../../operations/settings/settings.md#settings-log-query-views) setting.
|
||||
|
||||
@ -1995,7 +1995,7 @@ If the table does not exist, ClickHouse will create it. If the structure of the
|
||||
</query_views_log>
|
||||
```
|
||||
|
||||
## text_log {#server_configuration_parameters-text_log}
|
||||
## text_log {#text_log}
|
||||
|
||||
Settings for the [text_log](../../operations/system-tables/text_log.md#system_tables-text_log) system table for logging text messages.
|
||||
|
||||
@ -2037,7 +2037,7 @@ Default: false.
|
||||
</clickhouse>
|
||||
```
|
||||
|
||||
## trace_log {#server_configuration_parameters-trace_log}
|
||||
## trace_log {#trace_log}
|
||||
|
||||
Settings for the [trace_log](../../operations/system-tables/trace_log.md#system_tables-trace_log) system table operation.
|
||||
|
||||
@ -2073,7 +2073,7 @@ The default server configuration file `config.xml` contains the following settin
|
||||
</trace_log>
|
||||
```
|
||||
|
||||
## asynchronous_insert_log {#server_configuration_parameters-asynchronous_insert_log}
|
||||
## asynchronous_insert_log {#asynchronous_insert_log}
|
||||
|
||||
Settings for the [asynchronous_insert_log](../../operations/system-tables/asynchronous_insert_log.md#system_tables-asynchronous_insert_log) system table for logging async inserts.
|
||||
|
||||
@ -2112,7 +2112,7 @@ Default: false.
|
||||
</clickhouse>
|
||||
```
|
||||
|
||||
## crash_log {#server_configuration_parameters-crash_log}
|
||||
## crash_log {#crash_log}
|
||||
|
||||
Settings for the [crash_log](../../operations/system-tables/crash-log.md) system table operation.
|
||||
|
||||
@ -2150,7 +2150,7 @@ The default server configuration file `config.xml` contains the following settin
|
||||
</crash_log>
|
||||
```
|
||||
|
||||
## backup_log {#server_configuration_parameters-backup_log}
|
||||
## backup_log {#backup_log}
|
||||
|
||||
Settings for the [backup_log](../../operations/system-tables/backup_log.md) system table for logging `BACKUP` and `RESTORE` operations.
|
||||
|
||||
@ -2239,7 +2239,7 @@ For the value of the `incl` attribute, see the section “[Configuration files](
|
||||
- [Cluster Discovery](../../operations/cluster-discovery.md)
|
||||
- [Replicated database engine](../../engines/database-engines/replicated.md)
|
||||
|
||||
## timezone {#server_configuration_parameters-timezone}
|
||||
## timezone {#timezone}
|
||||
|
||||
The server’s time zone.
|
||||
|
||||
@ -2257,7 +2257,7 @@ The time zone is necessary for conversions between String and DateTime formats w
|
||||
|
||||
- [session_timezone](../settings/settings.md#session_timezone)
|
||||
|
||||
## tcp_port {#server_configuration_parameters-tcp_port}
|
||||
## tcp_port {#tcp_port}
|
||||
|
||||
Port for communicating with clients over the TCP protocol.
|
||||
|
||||
@ -2267,9 +2267,9 @@ Port for communicating with clients over the TCP protocol.
|
||||
<tcp_port>9000</tcp_port>
|
||||
```
|
||||
|
||||
## tcp_port_secure {#server_configuration_parameters-tcp_port_secure}
|
||||
## tcp_port_secure {#tcp_port_secure}
|
||||
|
||||
TCP port for secure communication with clients. Use it with [OpenSSL](#server_configuration_parameters-openssl) settings.
|
||||
TCP port for secure communication with clients. Use it with [OpenSSL](#openssl) settings.
|
||||
|
||||
**Possible values**
|
||||
|
||||
@ -2281,7 +2281,7 @@ Positive integer.
|
||||
<tcp_port_secure>9440</tcp_port_secure>
|
||||
```
|
||||
|
||||
## mysql_port {#server_configuration_parameters-mysql_port}
|
||||
## mysql_port {#mysql_port}
|
||||
|
||||
Port for communicating with clients over MySQL protocol.
|
||||
|
||||
@ -2295,7 +2295,7 @@ Example
|
||||
<mysql_port>9004</mysql_port>
|
||||
```
|
||||
|
||||
## postgresql_port {#server_configuration_parameters-postgresql_port}
|
||||
## postgresql_port {#postgresql_port}
|
||||
|
||||
Port for communicating with clients over PostgreSQL protocol.
|
||||
|
||||
@ -2326,7 +2326,7 @@ Path on the local filesystem to store temporary data for processing large querie
|
||||
```
|
||||
|
||||
|
||||
## user_files_path {#server_configuration_parameters-user_files_path}
|
||||
## user_files_path {#user_files_path}
|
||||
|
||||
The directory with user files. Used in the table function [file()](../../sql-reference/table-functions/file.md).
|
||||
|
||||
@ -2336,7 +2336,7 @@ The directory with user files. Used in the table function [file()](../../sql-ref
|
||||
<user_files_path>/var/lib/clickhouse/user_files/</user_files_path>
|
||||
```
|
||||
|
||||
## user_scripts_path {#server_configuration_parameters-user_scripts_path}
|
||||
## user_scripts_path {#user_scripts_path}
|
||||
|
||||
The directory with user scripts files. Used for Executable user defined functions [Executable User Defined Functions](../../sql-reference/functions/index.md#executable-user-defined-functions).
|
||||
|
||||
@ -2346,7 +2346,7 @@ The directory with user scripts files. Used for Executable user defined function
|
||||
<user_scripts_path>/var/lib/clickhouse/user_scripts/</user_scripts_path>
|
||||
```
|
||||
|
||||
## user_defined_path {#server_configuration_parameters-user_defined_path}
|
||||
## user_defined_path {#user_defined_path}
|
||||
|
||||
The directory with user defined files. Used for SQL user defined functions [SQL User Defined Functions](../../sql-reference/functions/index.md#user-defined-functions).
|
||||
|
||||
@ -2442,7 +2442,7 @@ Storage method for data part headers in ZooKeeper.
|
||||
|
||||
This setting only applies to the `MergeTree` family. It can be specified:
|
||||
|
||||
- Globally in the [merge_tree](#server_configuration_parameters-merge_tree) section of the `config.xml` file.
|
||||
- Globally in the [merge_tree](#merge_tree) section of the `config.xml` file.
|
||||
|
||||
ClickHouse uses the setting for all the tables on the server. You can change the setting at any time. Existing tables change their behaviour when the setting changes.
|
||||
|
||||
|
@ -48,7 +48,7 @@ Setting `readonly = 1` prohibits the user from changing settings. There is a way
|
||||
:::
|
||||
|
||||
|
||||
## allow_ddl {#settings_allow_ddl}
|
||||
## allow_ddl {#allow_ddl}
|
||||
|
||||
Allows or denies [DDL](https://en.wikipedia.org/wiki/Data_definition_language) queries.
|
||||
|
||||
|
@ -154,6 +154,13 @@ Result:
|
||||
Maximum query execution time in seconds.
|
||||
At this time, it is not checked for one of the sorting stages, or when merging and finalizing aggregate functions.
|
||||
|
||||
The `max_execution_time` parameter can be a bit tricky to understand.
|
||||
It operates based on interpolation relative to the current query execution speed (this behaviour is controlled by [timeout_before_checking_execution_speed](#timeout-before-checking-execution-speed)).
|
||||
ClickHouse will interrupt a query if the projected execution time exceeds the specified `max_execution_time`.
|
||||
By default, the timeout_before_checking_execution_speed is set to 1 second. This means that after just one second of query execution, ClickHouse will begin estimating the total execution time.
|
||||
If, for example, `max_execution_time` is set to 3600 seconds (1 hour), ClickHouse will terminate the query if the estimated time exceeds this 3600-second limit.
|
||||
If you set `timeout_before_checking_execution_speed `to 0, ClickHouse will use clock time as the basis for `max_execution_time`.
|
||||
|
||||
## timeout_overflow_mode {#timeout-overflow-mode}
|
||||
|
||||
What to do if the query is run longer than ‘max_execution_time’: ‘throw’ or ‘break’. By default, throw.
|
||||
|
@ -177,7 +177,7 @@ If `enable_optimize_predicate_expression = 1`, then the execution time of these
|
||||
|
||||
If `enable_optimize_predicate_expression = 0`, then the execution time of the second query is much longer because the `WHERE` clause applies to all the data after the subquery finishes.
|
||||
|
||||
## fallback_to_stale_replicas_for_distributed_queries {#settings-fallback_to_stale_replicas_for_distributed_queries}
|
||||
## fallback_to_stale_replicas_for_distributed_queries {#fallback_to_stale_replicas_for_distributed_queries}
|
||||
|
||||
Forces a query to an out-of-date replica if updated data is not available. See [Replication](../../engines/table-engines/mergetree-family/replication.md).
|
||||
|
||||
@ -187,7 +187,7 @@ Used when performing `SELECT` from a distributed table that points to replicated
|
||||
|
||||
By default, 1 (enabled).
|
||||
|
||||
## force_index_by_date {#settings-force_index_by_date}
|
||||
## force_index_by_date {#force_index_by_date}
|
||||
|
||||
Disables query execution if the index can’t be used by date.
|
||||
|
||||
@ -203,7 +203,7 @@ Works with tables in the MergeTree family.
|
||||
|
||||
If `force_primary_key=1`, ClickHouse checks to see if the query has a primary key condition that can be used for restricting data ranges. If there is no suitable condition, it throws an exception. However, it does not check whether the condition reduces the amount of data to read. For more information about data ranges in MergeTree tables, see [MergeTree](../../engines/table-engines/mergetree-family/mergetree.md).
|
||||
|
||||
## use_skip_indexes {#settings-use_skip_indexes}
|
||||
## use_skip_indexes {#use_skip_indexes}
|
||||
|
||||
Use data skipping indexes during query execution.
|
||||
|
||||
@ -214,7 +214,7 @@ Possible values:
|
||||
|
||||
Default value: 1.
|
||||
|
||||
## force_data_skipping_indices {#settings-force_data_skipping_indices}
|
||||
## force_data_skipping_indices {#force_data_skipping_indices}
|
||||
|
||||
Disables query execution if passed data skipping indices wasn't used.
|
||||
|
||||
@ -241,7 +241,7 @@ SELECT * FROM data_01515 WHERE d1 = 0 SETTINGS force_data_skipping_indices='`d1_
|
||||
SELECT * FROM data_01515 WHERE d1 = 0 AND assumeNotNull(d1_null) = 0 SETTINGS force_data_skipping_indices='`d1_idx`, d1_null_idx'; -- Ok.
|
||||
```
|
||||
|
||||
## ignore_data_skipping_indices {#settings-ignore_data_skipping_indices}
|
||||
## ignore_data_skipping_indices {#ignore_data_skipping_indices}
|
||||
|
||||
Ignores the skipping indexes specified if used by the query.
|
||||
|
||||
@ -401,7 +401,7 @@ Enables or disables [fsync](http://pubs.opengroup.org/onlinepubs/9699919799/func
|
||||
|
||||
It makes sense to disable it if the server has millions of tiny tables that are constantly being created and destroyed.
|
||||
|
||||
## function_range_max_elements_in_block {#settings-function_range_max_elements_in_block}
|
||||
## function_range_max_elements_in_block {#function_range_max_elements_in_block}
|
||||
|
||||
Sets the safety threshold for data volume generated by function [range](../../sql-reference/functions/array-functions.md/#range). Defines the maximum number of values generated by function per block of data (sum of array sizes for every row in a block).
|
||||
|
||||
@ -416,7 +416,7 @@ Default value: `500,000,000`.
|
||||
- [max_block_size](#setting-max_block_size)
|
||||
- [min_insert_block_size_rows](#min-insert-block-size-rows)
|
||||
|
||||
## enable_http_compression {#settings-enable_http_compression}
|
||||
## enable_http_compression {#enable_http_compression}
|
||||
|
||||
Enables or disables data compression in the response to an HTTP request.
|
||||
|
||||
@ -429,15 +429,15 @@ Possible values:
|
||||
|
||||
Default value: 0.
|
||||
|
||||
## http_zlib_compression_level {#settings-http_zlib_compression_level}
|
||||
## http_zlib_compression_level {#http_zlib_compression_level}
|
||||
|
||||
Sets the level of data compression in the response to an HTTP request if [enable_http_compression = 1](#settings-enable_http_compression).
|
||||
Sets the level of data compression in the response to an HTTP request if [enable_http_compression = 1](#enable_http_compression).
|
||||
|
||||
Possible values: Numbers from 1 to 9.
|
||||
|
||||
Default value: 3.
|
||||
|
||||
## http_native_compression_disable_checksumming_on_decompress {#settings-http_native_compression_disable_checksumming_on_decompress}
|
||||
## http_native_compression_disable_checksumming_on_decompress {#http_native_compression_disable_checksumming_on_decompress}
|
||||
|
||||
Enables or disables checksum verification when decompressing the HTTP POST data from the client. Used only for ClickHouse native compression format (not used with `gzip` or `deflate`).
|
||||
|
||||
@ -480,7 +480,7 @@ Possible values:
|
||||
|
||||
Default value: `1000`.
|
||||
|
||||
## send_progress_in_http_headers {#settings-send_progress_in_http_headers}
|
||||
## send_progress_in_http_headers {#send_progress_in_http_headers}
|
||||
|
||||
Enables or disables `X-ClickHouse-Progress` HTTP response headers in `clickhouse-server` responses.
|
||||
|
||||
@ -518,7 +518,7 @@ Possible values:
|
||||
|
||||
Default value: `1`.
|
||||
|
||||
## join_default_strictness {#settings-join_default_strictness}
|
||||
## join_default_strictness {#join_default_strictness}
|
||||
|
||||
Sets default strictness for [JOIN clauses](../../sql-reference/statements/select/join.md/#select-join).
|
||||
|
||||
@ -531,7 +531,7 @@ Possible values:
|
||||
|
||||
Default value: `ALL`.
|
||||
|
||||
## join_algorithm {#settings-join_algorithm}
|
||||
## join_algorithm {#join_algorithm}
|
||||
|
||||
Specifies which [JOIN](../../sql-reference/statements/select/join.md) algorithm is used.
|
||||
|
||||
@ -547,7 +547,7 @@ Possible values:
|
||||
|
||||
[Grace hash join](https://en.wikipedia.org/wiki/Hash_join#Grace_hash_join) is used. Grace hash provides an algorithm option that provides performant complex joins while limiting memory use.
|
||||
|
||||
The first phase of a grace join reads the right table and splits it into N buckets depending on the hash value of key columns (initially, N is `grace_hash_join_initial_buckets`). This is done in a way to ensure that each bucket can be processed independently. Rows from the first bucket are added to an in-memory hash table while the others are saved to disk. If the hash table grows beyond the memory limit (e.g., as set by [`max_bytes_in_join`](/docs/en/operations/settings/query-complexity.md/#settings-max_bytes_in_join)), the number of buckets is increased and the assigned bucket for each row. Any rows which don’t belong to the current bucket are flushed and reassigned.
|
||||
The first phase of a grace join reads the right table and splits it into N buckets depending on the hash value of key columns (initially, N is `grace_hash_join_initial_buckets`). This is done in a way to ensure that each bucket can be processed independently. Rows from the first bucket are added to an in-memory hash table while the others are saved to disk. If the hash table grows beyond the memory limit (e.g., as set by [`max_bytes_in_join`](/docs/en/operations/settings/query-complexity.md/#max_bytes_in_join)), the number of buckets is increased and the assigned bucket for each row. Any rows which don’t belong to the current bucket are flushed and reassigned.
|
||||
|
||||
Supports `INNER/LEFT/RIGHT/FULL ALL/ANY JOIN`.
|
||||
|
||||
@ -588,7 +588,7 @@ Possible values:
|
||||
ClickHouse always tries to use `partial_merge` join if possible, otherwise, it uses `hash`. *Deprecated*, same as `partial_merge,hash`.
|
||||
|
||||
|
||||
## join_any_take_last_row {#settings-join_any_take_last_row}
|
||||
## join_any_take_last_row {#join_any_take_last_row}
|
||||
|
||||
Changes the behaviour of join operations with `ANY` strictness.
|
||||
|
||||
@ -607,7 +607,7 @@ See also:
|
||||
|
||||
- [JOIN clause](../../sql-reference/statements/select/join.md/#select-join)
|
||||
- [Join table engine](../../engines/table-engines/special/join.md)
|
||||
- [join_default_strictness](#settings-join_default_strictness)
|
||||
- [join_default_strictness](#join_default_strictness)
|
||||
|
||||
## join_use_nulls {#join_use_nulls}
|
||||
|
||||
@ -879,7 +879,7 @@ Possible values:
|
||||
|
||||
Default value: 2013265920.
|
||||
|
||||
## min_bytes_to_use_direct_io {#settings-min-bytes-to-use-direct-io}
|
||||
## min_bytes_to_use_direct_io {#min-bytes-to-use-direct-io}
|
||||
|
||||
The minimum data volume required for using direct I/O access to the storage disk.
|
||||
|
||||
@ -917,7 +917,7 @@ Possible values:
|
||||
|
||||
Default value: `1`.
|
||||
|
||||
## log_queries {#settings-log-queries}
|
||||
## log_queries {#log-queries}
|
||||
|
||||
Setting up query logging.
|
||||
|
||||
@ -929,7 +929,7 @@ Example:
|
||||
log_queries=1
|
||||
```
|
||||
|
||||
## log_queries_min_query_duration_ms {#settings-log-queries-min-query-duration-ms}
|
||||
## log_queries_min_query_duration_ms {#log-queries-min-query-duration-ms}
|
||||
|
||||
If enabled (non-zero), queries faster than the value of this setting will not be logged (you can think about this as a `long_query_time` for [MySQL Slow Query Log](https://dev.mysql.com/doc/refman/5.7/en/slow-query-log.html)), and this basically means that you will not find them in the following tables:
|
||||
|
||||
@ -944,7 +944,7 @@ Only the queries with the following type will get to the log:
|
||||
- Type: milliseconds
|
||||
- Default value: 0 (any query)
|
||||
|
||||
## log_queries_min_type {#settings-log-queries-min-type}
|
||||
## log_queries_min_type {#log-queries-min-type}
|
||||
|
||||
`query_log` minimal type to log.
|
||||
|
||||
@ -962,11 +962,11 @@ Can be used to limit which entities will go to `query_log`, say you are interest
|
||||
log_queries_min_type='EXCEPTION_WHILE_PROCESSING'
|
||||
```
|
||||
|
||||
## log_query_threads {#settings-log-query-threads}
|
||||
## log_query_threads {#log-query-threads}
|
||||
|
||||
Setting up query threads logging.
|
||||
|
||||
Query threads log into the [system.query_thread_log](../../operations/system-tables/query_thread_log.md) table. This setting has effect only when [log_queries](#settings-log-queries) is true. Queries’ threads run by ClickHouse with this setup are logged according to the rules in the [query_thread_log](../../operations/server-configuration-parameters/settings.md/#server_configuration_parameters-query_thread_log) server configuration parameter.
|
||||
Query threads log into the [system.query_thread_log](../../operations/system-tables/query_thread_log.md) table. This setting has effect only when [log_queries](#log-queries) is true. Queries’ threads run by ClickHouse with this setup are logged according to the rules in the [query_thread_log](../../operations/server-configuration-parameters/settings.md/#server_configuration_parameters-query_thread_log) server configuration parameter.
|
||||
|
||||
Possible values:
|
||||
|
||||
@ -981,7 +981,7 @@ Default value: `1`.
|
||||
log_query_threads=1
|
||||
```
|
||||
|
||||
## log_query_views {#settings-log-query-views}
|
||||
## log_query_views {#log-query-views}
|
||||
|
||||
Setting up query views logging.
|
||||
|
||||
@ -993,7 +993,7 @@ Example:
|
||||
log_query_views=1
|
||||
```
|
||||
|
||||
## log_formatted_queries {#settings-log-formatted-queries}
|
||||
## log_formatted_queries {#log-formatted-queries}
|
||||
|
||||
Allows to log formatted queries to the [system.query_log](../../operations/system-tables/query_log.md) system table (populates `formatted_query` column in the [system.query_log](../../operations/system-tables/query_log.md)).
|
||||
|
||||
@ -1004,7 +1004,7 @@ Possible values:
|
||||
|
||||
Default value: `0`.
|
||||
|
||||
## log_comment {#settings-log-comment}
|
||||
## log_comment {#log-comment}
|
||||
|
||||
Specifies the value for the `log_comment` field of the [system.query_log](../system-tables/query_log.md) table and comment text for the server log.
|
||||
|
||||
@ -1012,7 +1012,7 @@ It can be used to improve the readability of server logs. Additionally, it helps
|
||||
|
||||
Possible values:
|
||||
|
||||
- Any string no longer than [max_query_size](#settings-max_query_size). If the max_query_size is exceeded, the server throws an exception.
|
||||
- Any string no longer than [max_query_size](#max_query_size). If the max_query_size is exceeded, the server throws an exception.
|
||||
|
||||
Default value: empty string.
|
||||
|
||||
@ -1036,7 +1036,7 @@ Result:
|
||||
└─────────────┴───────────┘
|
||||
```
|
||||
|
||||
## log_processors_profiles {#settings-log_processors_profiles}
|
||||
## log_processors_profiles {#log_processors_profiles}
|
||||
|
||||
Write time that processor spent during execution/waiting for data to `system.processors_profile_log` table.
|
||||
|
||||
@ -1045,7 +1045,7 @@ See also:
|
||||
- [`system.processors_profile_log`](../../operations/system-tables/processors_profile_log.md)
|
||||
- [`EXPLAIN PIPELINE`](../../sql-reference/statements/explain.md#explain-pipeline)
|
||||
|
||||
## max_insert_block_size {#settings-max_insert_block_size}
|
||||
## max_insert_block_size {#max_insert_block_size}
|
||||
|
||||
The size of blocks (in a count of rows) to form for insertion into a table.
|
||||
This setting only applies in cases when the server forms the blocks.
|
||||
@ -1079,7 +1079,7 @@ Possible values:
|
||||
|
||||
Default value: 268435456.
|
||||
|
||||
## max_replica_delay_for_distributed_queries {#settings-max_replica_delay_for_distributed_queries}
|
||||
## max_replica_delay_for_distributed_queries {#max_replica_delay_for_distributed_queries}
|
||||
|
||||
Disables lagging replicas for distributed queries. See [Replication](../../engines/table-engines/mergetree-family/replication.md).
|
||||
|
||||
@ -1096,7 +1096,7 @@ Default value: 300.
|
||||
|
||||
Used when performing `SELECT` from a distributed table that points to replicated tables.
|
||||
|
||||
## max_threads {#settings-max_threads}
|
||||
## max_threads {#max_threads}
|
||||
|
||||
The maximum number of query processing threads, excluding threads for retrieving data from remote servers (see the ‘max_distributed_connections’ parameter).
|
||||
|
||||
@ -1109,7 +1109,7 @@ For queries that are completed quickly because of a LIMIT, you can set a lower
|
||||
|
||||
The smaller the `max_threads` value, the less memory is consumed.
|
||||
|
||||
## max_insert_threads {#settings-max-insert-threads}
|
||||
## max_insert_threads {#max-insert-threads}
|
||||
|
||||
The maximum number of threads to execute the `INSERT SELECT` query.
|
||||
|
||||
@ -1120,7 +1120,7 @@ Possible values:
|
||||
|
||||
Default value: 0.
|
||||
|
||||
Parallel `INSERT SELECT` has effect only if the `SELECT` part is executed in parallel, see [max_threads](#settings-max_threads) setting.
|
||||
Parallel `INSERT SELECT` has effect only if the `SELECT` part is executed in parallel, see [max_threads](#max_threads) setting.
|
||||
Higher values will lead to higher memory usage.
|
||||
|
||||
## max_compress_block_size {#max-compress-block-size}
|
||||
@ -1149,7 +1149,7 @@ We are writing a URL column with the String type (average size of 60 bytes per v
|
||||
This is an expert-level setting, and you shouldn't change it if you're just getting started with ClickHouse.
|
||||
:::
|
||||
|
||||
## max_query_size {#settings-max_query_size}
|
||||
## max_query_size {#max_query_size}
|
||||
|
||||
The maximum number of bytes of a query string parsed by the SQL parser.
|
||||
Data in the VALUES clause of INSERT queries is processed by a separate stream parser (that consumes O(1) RAM) and not affected by this restriction.
|
||||
@ -1393,7 +1393,7 @@ Default value: 5000.
|
||||
|
||||
## stream_flush_interval_ms {#stream-flush-interval-ms}
|
||||
|
||||
Works for tables with streaming in the case of a timeout, or when a thread generates [max_insert_block_size](#settings-max_insert_block_size) rows.
|
||||
Works for tables with streaming in the case of a timeout, or when a thread generates [max_insert_block_size](#max_insert_block_size) rows.
|
||||
|
||||
The default value is 7500.
|
||||
|
||||
@ -1405,7 +1405,7 @@ Timeout for polling data from/to streaming storages.
|
||||
|
||||
Default value: 500.
|
||||
|
||||
## load_balancing {#settings-load_balancing}
|
||||
## load_balancing {#load_balancing}
|
||||
|
||||
Specifies the algorithm of replicas selection that is used for distributed query processing.
|
||||
|
||||
@ -1419,7 +1419,7 @@ ClickHouse supports the following algorithms of choosing replicas:
|
||||
|
||||
See also:
|
||||
|
||||
- [distributed_replica_max_ignored_errors](#settings-distributed_replica_max_ignored_errors)
|
||||
- [distributed_replica_max_ignored_errors](#distributed_replica_max_ignored_errors)
|
||||
|
||||
### Random (by Default) {#load_balancing-random}
|
||||
|
||||
@ -1473,20 +1473,20 @@ load_balancing = round_robin
|
||||
|
||||
This algorithm uses a round-robin policy across replicas with the same number of errors (only the queries with `round_robin` policy is accounted).
|
||||
|
||||
## prefer_localhost_replica {#settings-prefer-localhost-replica}
|
||||
## prefer_localhost_replica {#prefer-localhost-replica}
|
||||
|
||||
Enables/disables preferable using the localhost replica when processing distributed queries.
|
||||
|
||||
Possible values:
|
||||
|
||||
- 1 — ClickHouse always sends a query to the localhost replica if it exists.
|
||||
- 0 — ClickHouse uses the balancing strategy specified by the [load_balancing](#settings-load_balancing) setting.
|
||||
- 0 — ClickHouse uses the balancing strategy specified by the [load_balancing](#load_balancing) setting.
|
||||
|
||||
Default value: 1.
|
||||
|
||||
:::note
|
||||
Disable this setting if you use [max_parallel_replicas](#settings-max_parallel_replicas) without [parallel_replicas_custom_key](#settings-parallel_replicas_custom_key).
|
||||
If [parallel_replicas_custom_key](#settings-parallel_replicas_custom_key) is set, disable this setting only if it's used on a cluster with multiple shards containing multiple replicas.
|
||||
Disable this setting if you use [max_parallel_replicas](#max_parallel_replicas) without [parallel_replicas_custom_key](#parallel_replicas_custom_key).
|
||||
If [parallel_replicas_custom_key](#parallel_replicas_custom_key) is set, disable this setting only if it's used on a cluster with multiple shards containing multiple replicas.
|
||||
If it's used on a cluster with a single shard and multiple replicas, disabling this setting will have negative effects.
|
||||
:::
|
||||
|
||||
@ -1500,7 +1500,7 @@ See the section “WITH TOTALS modifier”.
|
||||
The threshold for `totals_mode = 'auto'`.
|
||||
See the section “WITH TOTALS modifier”.
|
||||
|
||||
## max_parallel_replicas {#settings-max_parallel_replicas}
|
||||
## max_parallel_replicas {#max_parallel_replicas}
|
||||
|
||||
The maximum number of replicas for each shard when executing a query.
|
||||
|
||||
@ -1527,23 +1527,23 @@ A query may be processed faster if it is executed on several servers in parallel
|
||||
- The sampling key is an expression that is expensive to calculate.
|
||||
- The cluster latency distribution has a long tail, so that querying more servers increases the query overall latency.
|
||||
|
||||
### Parallel processing using [parallel_replicas_custom_key](#settings-parallel_replicas_custom_key)
|
||||
### Parallel processing using [parallel_replicas_custom_key](#parallel_replicas_custom_key)
|
||||
|
||||
This setting is useful for any replicated table.
|
||||
|
||||
## parallel_replicas_custom_key {#settings-parallel_replicas_custom_key}
|
||||
## parallel_replicas_custom_key {#parallel_replicas_custom_key}
|
||||
|
||||
An arbitrary integer expression that can be used to split work between replicas for a specific table.
|
||||
The value can be any integer expression.
|
||||
A query may be processed faster if it is executed on several servers in parallel but it depends on the used [parallel_replicas_custom_key](#settings-parallel_replicas_custom_key)
|
||||
and [parallel_replicas_custom_key_filter_type](#settings-parallel_replicas_custom_key_filter_type).
|
||||
A query may be processed faster if it is executed on several servers in parallel but it depends on the used [parallel_replicas_custom_key](#parallel_replicas_custom_key)
|
||||
and [parallel_replicas_custom_key_filter_type](#parallel_replicas_custom_key_filter_type).
|
||||
|
||||
Simple expressions using primary keys are preferred.
|
||||
|
||||
If the setting is used on a cluster that consists of a single shard with multiple replicas, those replicas will be converted into virtual shards.
|
||||
Otherwise, it will behave same as for `SAMPLE` key, it will use multiple replicas of each shard.
|
||||
|
||||
## parallel_replicas_custom_key_filter_type {#settings-parallel_replicas_custom_key_filter_type}
|
||||
## parallel_replicas_custom_key_filter_type {#parallel_replicas_custom_key_filter_type}
|
||||
|
||||
How to use `parallel_replicas_custom_key` expression for splitting work between replicas.
|
||||
|
||||
@ -1637,7 +1637,7 @@ Possible values:
|
||||
|
||||
Default value: `1`.
|
||||
|
||||
## query_cache_store_results_of_queries_with_nondeterministic_functions {#query--store-results-of-queries-with-nondeterministic-functions}
|
||||
## query_cache_store_results_of_queries_with_nondeterministic_functions {#query-cache-store-results-of-queries-with-nondeterministic-functions}
|
||||
|
||||
If turned on, then results of `SELECT` queries with non-deterministic functions (e.g. `rand()`, `now()`) can be cached in the [query cache](../query-cache.md).
|
||||
|
||||
@ -1732,7 +1732,7 @@ Possible values:
|
||||
|
||||
Default value: 0 (no restriction).
|
||||
|
||||
## insert_quorum {#settings-insert_quorum}
|
||||
## insert_quorum {#insert_quorum}
|
||||
|
||||
Enables the quorum writes.
|
||||
|
||||
@ -1746,7 +1746,7 @@ Quorum writes
|
||||
|
||||
`INSERT` succeeds only when ClickHouse manages to correctly write data to the `insert_quorum` of replicas during the `insert_quorum_timeout`. If for any reason the number of replicas with successful writes does not reach the `insert_quorum`, the write is considered failed and ClickHouse will delete the inserted block from all the replicas where data has already been written.
|
||||
|
||||
When `insert_quorum_parallel` is disabled, all replicas in the quorum are consistent, i.e. they contain data from all previous `INSERT` queries (the `INSERT` sequence is linearized). When reading data written using `insert_quorum` and `insert_quorum_parallel` is disabled, you can turn on sequential consistency for `SELECT` queries using [select_sequential_consistency](#settings-select_sequential_consistency).
|
||||
When `insert_quorum_parallel` is disabled, all replicas in the quorum are consistent, i.e. they contain data from all previous `INSERT` queries (the `INSERT` sequence is linearized). When reading data written using `insert_quorum` and `insert_quorum_parallel` is disabled, you can turn on sequential consistency for `SELECT` queries using [select_sequential_consistency](#select_sequential_consistency).
|
||||
|
||||
ClickHouse generates an exception:
|
||||
|
||||
@ -1755,11 +1755,11 @@ ClickHouse generates an exception:
|
||||
|
||||
See also:
|
||||
|
||||
- [insert_quorum_timeout](#settings-insert_quorum_timeout)
|
||||
- [insert_quorum_parallel](#settings-insert_quorum_parallel)
|
||||
- [select_sequential_consistency](#settings-select_sequential_consistency)
|
||||
- [insert_quorum_timeout](#insert_quorum_timeout)
|
||||
- [insert_quorum_parallel](#insert_quorum_parallel)
|
||||
- [select_sequential_consistency](#select_sequential_consistency)
|
||||
|
||||
## insert_quorum_timeout {#settings-insert_quorum_timeout}
|
||||
## insert_quorum_timeout {#insert_quorum_timeout}
|
||||
|
||||
Write to a quorum timeout in milliseconds. If the timeout has passed and no write has taken place yet, ClickHouse will generate an exception and the client must repeat the query to write the same block to the same or any other replica.
|
||||
|
||||
@ -1767,11 +1767,11 @@ Default value: 600 000 milliseconds (ten minutes).
|
||||
|
||||
See also:
|
||||
|
||||
- [insert_quorum](#settings-insert_quorum)
|
||||
- [insert_quorum_parallel](#settings-insert_quorum_parallel)
|
||||
- [select_sequential_consistency](#settings-select_sequential_consistency)
|
||||
- [insert_quorum](#insert_quorum)
|
||||
- [insert_quorum_parallel](#insert_quorum_parallel)
|
||||
- [select_sequential_consistency](#select_sequential_consistency)
|
||||
|
||||
## insert_quorum_parallel {#settings-insert_quorum_parallel}
|
||||
## insert_quorum_parallel {#insert_quorum_parallel}
|
||||
|
||||
Enables or disables parallelism for quorum `INSERT` queries. If enabled, additional `INSERT` queries can be sent while previous queries have not yet finished. If disabled, additional writes to the same table will be rejected.
|
||||
|
||||
@ -1784,11 +1784,11 @@ Default value: 1.
|
||||
|
||||
See also:
|
||||
|
||||
- [insert_quorum](#settings-insert_quorum)
|
||||
- [insert_quorum_timeout](#settings-insert_quorum_timeout)
|
||||
- [select_sequential_consistency](#settings-select_sequential_consistency)
|
||||
- [insert_quorum](#insert_quorum)
|
||||
- [insert_quorum_timeout](#insert_quorum_timeout)
|
||||
- [select_sequential_consistency](#select_sequential_consistency)
|
||||
|
||||
## select_sequential_consistency {#settings-select_sequential_consistency}
|
||||
## select_sequential_consistency {#select_sequential_consistency}
|
||||
|
||||
Enables or disables sequential consistency for `SELECT` queries. Requires `insert_quorum_parallel` to be disabled (enabled by default).
|
||||
|
||||
@ -1807,11 +1807,11 @@ When `insert_quorum_parallel` is enabled (the default), then `select_sequential_
|
||||
|
||||
See also:
|
||||
|
||||
- [insert_quorum](#settings-insert_quorum)
|
||||
- [insert_quorum_timeout](#settings-insert_quorum_timeout)
|
||||
- [insert_quorum_parallel](#settings-insert_quorum_parallel)
|
||||
- [insert_quorum](#insert_quorum)
|
||||
- [insert_quorum_timeout](#insert_quorum_timeout)
|
||||
- [insert_quorum_parallel](#insert_quorum_parallel)
|
||||
|
||||
## insert_deduplicate {#settings-insert-deduplicate}
|
||||
## insert_deduplicate {#insert-deduplicate}
|
||||
|
||||
Enables or disables block deduplication of `INSERT` (for Replicated\* tables).
|
||||
|
||||
@ -1938,7 +1938,7 @@ For the replicated tables, by default, only 10000 of the most recent inserts for
|
||||
We recommend enabling the [async_block_ids_cache](merge-tree-settings.md/#use-async-block-ids-cache) to increase the efficiency of deduplication.
|
||||
This function does not work for non-replicated tables.
|
||||
|
||||
## deduplicate_blocks_in_dependent_materialized_views {#settings-deduplicate-blocks-in-dependent-materialized-views}
|
||||
## deduplicate_blocks_in_dependent_materialized_views {#deduplicate-blocks-in-dependent-materialized-views}
|
||||
|
||||
Enables or disables the deduplication check for materialized views that receive data from Replicated\* tables.
|
||||
|
||||
@ -2048,7 +2048,7 @@ Possible values:
|
||||
|
||||
Default value: 10000
|
||||
|
||||
## max_network_bytes {#settings-max-network-bytes}
|
||||
## max_network_bytes {#max-network-bytes}
|
||||
|
||||
Limits the data volume (in bytes) that is received or transmitted over the network when executing a query. This setting applies to every individual query.
|
||||
|
||||
@ -2059,7 +2059,7 @@ Possible values:
|
||||
|
||||
Default value: 0.
|
||||
|
||||
## max_network_bandwidth {#settings-max-network-bandwidth}
|
||||
## max_network_bandwidth {#max-network-bandwidth}
|
||||
|
||||
Limits the speed of the data exchange over the network in bytes per second. This setting applies to every query.
|
||||
|
||||
@ -2070,7 +2070,7 @@ Possible values:
|
||||
|
||||
Default value: 0.
|
||||
|
||||
## max_network_bandwidth_for_user {#settings-max-network-bandwidth-for-user}
|
||||
## max_network_bandwidth_for_user {#max-network-bandwidth-for-user}
|
||||
|
||||
Limits the speed of the data exchange over the network in bytes per second. This setting applies to all concurrently running queries performed by a single user.
|
||||
|
||||
@ -2081,7 +2081,7 @@ Possible values:
|
||||
|
||||
Default value: 0.
|
||||
|
||||
## max_network_bandwidth_for_all_users {#settings-max-network-bandwidth-for-all-users}
|
||||
## max_network_bandwidth_for_all_users {#max-network-bandwidth-for-all-users}
|
||||
|
||||
Limits the speed that data is exchanged at over the network in bytes per second. This setting applies to all concurrently running queries on the server.
|
||||
|
||||
@ -2092,7 +2092,7 @@ Possible values:
|
||||
|
||||
Default value: 0.
|
||||
|
||||
## count_distinct_implementation {#settings-count_distinct_implementation}
|
||||
## count_distinct_implementation {#count_distinct_implementation}
|
||||
|
||||
Specifies which of the `uniq*` functions should be used to perform the [COUNT(DISTINCT …)](../../sql-reference/aggregate-functions/reference/count.md/#agg_function-count) construction.
|
||||
|
||||
@ -2106,7 +2106,7 @@ Possible values:
|
||||
|
||||
Default value: `uniqExact`.
|
||||
|
||||
## skip_unavailable_shards {#settings-skip_unavailable_shards}
|
||||
## skip_unavailable_shards {#skip_unavailable_shards}
|
||||
|
||||
Enables or disables silently skipping of unavailable shards.
|
||||
|
||||
@ -2270,7 +2270,7 @@ Possible values:
|
||||
|
||||
Default value: 0
|
||||
|
||||
## force_optimize_skip_unused_shards_nesting {#settings-force_optimize_skip_unused_shards_nesting}
|
||||
## force_optimize_skip_unused_shards_nesting {#force_optimize_skip_unused_shards_nesting}
|
||||
|
||||
Controls [`force_optimize_skip_unused_shards`](#force-optimize-skip-unused-shards) (hence still requires [`force_optimize_skip_unused_shards`](#force-optimize-skip-unused-shards)) depends on the nesting level of the distributed query (case when you have `Distributed` table that look into another `Distributed` table).
|
||||
|
||||
@ -2400,7 +2400,7 @@ Enables caching of rows number during count from files in table functions `file`
|
||||
|
||||
Enabled by default.
|
||||
|
||||
## distributed_replica_error_half_life {#settings-distributed_replica_error_half_life}
|
||||
## distributed_replica_error_half_life {#distributed_replica_error_half_life}
|
||||
|
||||
- Type: seconds
|
||||
- Default value: 60 seconds
|
||||
@ -2411,10 +2411,10 @@ See also:
|
||||
|
||||
- [load_balancing](#load_balancing-round_robin)
|
||||
- [Table engine Distributed](../../engines/table-engines/special/distributed.md)
|
||||
- [distributed_replica_error_cap](#settings-distributed_replica_error_cap)
|
||||
- [distributed_replica_max_ignored_errors](#settings-distributed_replica_max_ignored_errors)
|
||||
- [distributed_replica_error_cap](#distributed_replica_error_cap)
|
||||
- [distributed_replica_max_ignored_errors](#distributed_replica_max_ignored_errors)
|
||||
|
||||
## distributed_replica_error_cap {#settings-distributed_replica_error_cap}
|
||||
## distributed_replica_error_cap {#distributed_replica_error_cap}
|
||||
|
||||
- Type: unsigned int
|
||||
- Default value: 1000
|
||||
@ -2425,10 +2425,10 @@ See also:
|
||||
|
||||
- [load_balancing](#load_balancing-round_robin)
|
||||
- [Table engine Distributed](../../engines/table-engines/special/distributed.md)
|
||||
- [distributed_replica_error_half_life](#settings-distributed_replica_error_half_life)
|
||||
- [distributed_replica_max_ignored_errors](#settings-distributed_replica_max_ignored_errors)
|
||||
- [distributed_replica_error_half_life](#distributed_replica_error_half_life)
|
||||
- [distributed_replica_max_ignored_errors](#distributed_replica_max_ignored_errors)
|
||||
|
||||
## distributed_replica_max_ignored_errors {#settings-distributed_replica_max_ignored_errors}
|
||||
## distributed_replica_max_ignored_errors {#distributed_replica_max_ignored_errors}
|
||||
|
||||
- Type: unsigned int
|
||||
- Default value: 0
|
||||
@ -2439,7 +2439,7 @@ See also:
|
||||
|
||||
- [load_balancing](#load_balancing-round_robin)
|
||||
- [Table engine Distributed](../../engines/table-engines/special/distributed.md)
|
||||
- [distributed_replica_error_cap](#settings-distributed_replica_error_cap)
|
||||
- [distributed_replica_error_cap](#distributed_replica_error_cap)
|
||||
- [distributed_replica_error_half_life](#settings-distributed_replica_error_half_life)
|
||||
|
||||
## distributed_directory_monitor_sleep_time_ms {#distributed_directory_monitor_sleep_time_ms}
|
||||
@ -2595,7 +2595,7 @@ Possible values:
|
||||
|
||||
Default value: 0.
|
||||
|
||||
## allow_introspection_functions {#settings-allow_introspection_functions}
|
||||
## allow_introspection_functions {#allow_introspection_functions}
|
||||
|
||||
Enables or disables [introspection functions](../../sql-reference/functions/introspection.md) for query profiling.
|
||||
|
||||
@ -3136,7 +3136,7 @@ Do not enable this feature in version `<= 21.8`. It's not properly implemented a
|
||||
## aggregate_functions_null_for_empty {#aggregate_functions_null_for_empty}
|
||||
|
||||
Enables or disables rewriting all aggregate functions in a query, adding [-OrNull](../../sql-reference/aggregate-functions/combinators.md/#agg-functions-combinator-ornull) suffix to them. Enable it for SQL standard compatibility.
|
||||
It is implemented via query rewrite (similar to [count_distinct_implementation](#settings-count_distinct_implementation) setting) to get consistent results for distributed queries.
|
||||
It is implemented via query rewrite (similar to [count_distinct_implementation](#count_distinct_implementation) setting) to get consistent results for distributed queries.
|
||||
|
||||
Possible values:
|
||||
|
||||
@ -4609,7 +4609,7 @@ Default: 0
|
||||
|
||||
## rewrite_count_distinct_if_with_count_distinct_implementation
|
||||
|
||||
Allows you to rewrite `countDistcintIf` with [count_distinct_implementation](#settings-count_distinct_implementation) setting.
|
||||
Allows you to rewrite `countDistcintIf` with [count_distinct_implementation](#count_distinct_implementation) setting.
|
||||
|
||||
Possible values:
|
||||
|
||||
@ -4667,3 +4667,44 @@ The default value is `false`.
|
||||
``` xml
|
||||
<validate_tcp_client_information>true</validate_tcp_client_information>
|
||||
```
|
||||
|
||||
## ignore_access_denied_multidirectory_globs {#ignore_access_denied_multidirectory_globs}
|
||||
|
||||
Allows to ignore 'permission denied' errors when using multi-directory `{}` globs for [File](../../sql-reference/table-functions/file.md#globs_in_path) and [HDFS](../../sql-reference/table-functions/hdfs.md) storages.
|
||||
This setting is only applicable to multi directory `{}` globs.
|
||||
|
||||
Possible values: `0`, `1`.
|
||||
|
||||
Default value: `0`.
|
||||
|
||||
### Example
|
||||
|
||||
Having the following structure in `user_files`:
|
||||
```
|
||||
my_directory/
|
||||
├── data1
|
||||
│ ├── f1.csv
|
||||
├── data2
|
||||
│ ├── f2.csv
|
||||
└── test_root
|
||||
```
|
||||
where `data1`, `data2` directories are accessible, but one has no rights to read `test_root` directories.
|
||||
|
||||
For a query like `SELECT *, _path, _file FROM file('my_directory/{data1/f1,data2/f2}.csv', CSV)` an exception will be thrown:
|
||||
`Code: 1001. DB::Exception: std::__1::__fs::filesystem::filesystem_error: filesystem error: in directory_iterator::directory_iterator(...): Permission denied`.
|
||||
It happens because a multi-directory glob requires a recursive search in _all_ available directories under `my_directory`.
|
||||
|
||||
If this setting is on, all inaccessible directories will be silently skipped, even if they are explicitly specified inside `{}`.
|
||||
|
||||
```sql
|
||||
SELECT _path, _file FROM file('my_directory/{data1/f1,data2/f2}.csv', CSV) SETTINGS ignore_access_denied_multidirectory_globs = 0;
|
||||
|
||||
Code: 1001. DB::Exception: std::__1::__fs::filesystem::filesystem_error: filesystem error: in directory_iterator::directory_iterator(...): Permission denied
|
||||
```
|
||||
```sql
|
||||
SELECT _path, _file FROM file('my_directory/{data1/f1,data2/f2}.csv', CSV) SETTINGS ignore_access_denied_multidirectory_globs = 1;
|
||||
|
||||
┌─_path───────────────────┬─_file───────┐
|
||||
│ <full path to file> │ <file name> │
|
||||
└─────────────────────────┴─────────────┘
|
||||
```
|
||||
|
@ -45,13 +45,13 @@ keeper foo bar
|
||||
## Commands {#clickhouse-keeper-client-commands}
|
||||
|
||||
- `ls [path]` -- Lists the nodes for the given path (default: cwd)
|
||||
- `cd [path]` -- Change the working path (default `.`)
|
||||
- `cd [path]` -- Changes the working path (default `.`)
|
||||
- `exists <path>` -- Returns `1` if node exists, `0` otherwise
|
||||
- `set <path> <value> [version]` -- Updates the node's value. Only update if version matches (default: -1)
|
||||
- `set <path> <value> [version]` -- Updates the node's value. Only updates if version matches (default: -1)
|
||||
- `create <path> <value> [mode]` -- Creates new node with the set value
|
||||
- `touch <path>` -- Creates new node with an empty string as value. Doesn't throw an exception if the node already exists
|
||||
- `get <path>` -- Returns the node's value
|
||||
- `remove <path>` -- Remove the node
|
||||
- `rm <path> [version]` -- Removes the node only if version matches (default: -1)
|
||||
- `rmr <path>` -- Recursively deletes path. Confirmation required
|
||||
- `flwc <command>` -- Executes four-letter-word command
|
||||
- `help` -- Prints this message
|
||||
|
@ -12,7 +12,7 @@ Values can be added to the array in any (indeterminate) order.
|
||||
|
||||
The second version (with the `max_size` parameter) limits the size of the resulting array to `max_size` elements. For example, `groupArray(1)(x)` is equivalent to `[any (x)]`.
|
||||
|
||||
In some cases, you can still rely on the order of execution. This applies to cases when `SELECT` comes from a subquery that uses `ORDER BY`.
|
||||
In some cases, you can still rely on the order of execution. This applies to cases when `SELECT` comes from a subquery that uses `ORDER BY` if the subquery result is small enough.
|
||||
|
||||
**Example**
|
||||
|
||||
|
@ -10,7 +10,7 @@ Syntax: `groupArrayLast(max_size)(x)`
|
||||
Creates an array of last argument values.
|
||||
For example, `groupArrayLast(1)(x)` is equivalent to `[anyLast (x)]`.
|
||||
|
||||
In some cases, you can still rely on the order of execution. This applies to cases when `SELECT` comes from a subquery that uses `ORDER BY`.
|
||||
In some cases, you can still rely on the order of execution. This applies to cases when `SELECT` comes from a subquery that uses `ORDER BY` if the subquery result is small enough.
|
||||
|
||||
**Example**
|
||||
|
||||
|
@ -123,7 +123,7 @@ LAYOUT(...) -- Memory layout configuration
|
||||
LIFETIME(...) -- Lifetime of dictionary in memory
|
||||
```
|
||||
|
||||
## Storing Dictionaries in Memory {#storig-dictionaries-in-memory}
|
||||
## Storing Dictionaries in Memory {#storing-dictionaries-in-memory}
|
||||
|
||||
There are a variety of ways to store dictionaries in memory.
|
||||
|
||||
|
@ -657,7 +657,7 @@ SELECT arraySlice([1, 2, NULL, 4, 5], 2, 3) AS res;
|
||||
|
||||
Array elements set to `NULL` are handled as normal values.
|
||||
|
||||
## arraySort(\[func,\] arr, …) {#array_functions-sort}
|
||||
## arraySort(\[func,\] arr, …) {#sort}
|
||||
|
||||
Sorts the elements of the `arr` array in ascending order. If the `func` function is specified, sorting order is determined by the result of the `func` function applied to the elements of the array. If `func` accepts multiple arguments, the `arraySort` function is passed several arrays that the arguments of `func` will correspond to. Detailed examples are shown at the end of `arraySort` description.
|
||||
|
||||
@ -716,7 +716,7 @@ SELECT arraySort((x) -> -x, [1, 2, 3]) as res;
|
||||
└─────────┘
|
||||
```
|
||||
|
||||
For each element of the source array, the lambda function returns the sorting key, that is, \[1 –\> -1, 2 –\> -2, 3 –\> -3\]. Since the `arraySort` function sorts the keys in ascending order, the result is \[3, 2, 1\]. Thus, the `(x) –> -x` lambda function sets the [descending order](#array_functions-reverse-sort) in a sorting.
|
||||
For each element of the source array, the lambda function returns the sorting key, that is, \[1 –\> -1, 2 –\> -2, 3 –\> -3\]. Since the `arraySort` function sorts the keys in ascending order, the result is \[3, 2, 1\]. Thus, the `(x) –> -x` lambda function sets the [descending order](#reverse-sort) in a sorting.
|
||||
|
||||
The lambda function can accept multiple arguments. In this case, you need to pass the `arraySort` function several arrays of identical length that the arguments of lambda function will correspond to. The resulting array will consist of elements from the first input array; elements from the next input array(s) specify the sorting keys. For example:
|
||||
|
||||
@ -762,7 +762,7 @@ To improve sorting efficiency, the [Schwartzian transform](https://en.wikipedia.
|
||||
|
||||
Same as `arraySort` with additional `limit` argument allowing partial sorting. Returns an array of the same size as the original array where elements in range `[1..limit]` are sorted in ascending order. Remaining elements `(limit..N]` shall contain elements in unspecified order.
|
||||
|
||||
## arrayReverseSort(\[func,\] arr, …) {#array_functions-reverse-sort}
|
||||
## arrayReverseSort(\[func,\] arr, …) {#reverse-sort}
|
||||
|
||||
Sorts the elements of the `arr` array in descending order. If the `func` function is specified, `arr` is sorted according to the result of the `func` function applied to the elements of the array, and then the sorted array is reversed. If `func` accepts multiple arguments, the `arrayReverseSort` function is passed several arrays that the arguments of `func` will correspond to. Detailed examples are shown at the end of `arrayReverseSort` description.
|
||||
|
||||
|
@ -239,7 +239,7 @@ int32samoa: 1546300800
|
||||
|
||||
**See Also**
|
||||
|
||||
- [formatDateTime](#date_time_functions-formatDateTime) - supports non-constant timezone.
|
||||
- [formatDateTime](#formatDateTime) - supports non-constant timezone.
|
||||
- [toString](type-conversion-functions.md#tostring) - supports non-constant timezone.
|
||||
|
||||
## timeZoneOf
|
||||
@ -1274,7 +1274,7 @@ Alias: `SUBDATE`
|
||||
**See Also**
|
||||
- [date_sub](#date_sub)
|
||||
|
||||
## now
|
||||
## now {#now}
|
||||
|
||||
Returns the current date and time at the moment of query analysis. The function is a constant expression.
|
||||
|
||||
@ -1361,7 +1361,7 @@ Result:
|
||||
└─────────────────────────┴───────────────────────────────┘
|
||||
```
|
||||
|
||||
## nowInBlock
|
||||
## nowInBlock {#nowInBlock}
|
||||
|
||||
Returns the current date and time at the moment of processing of each block of data. In contrast to the function [now](#now), it is not a constant expression, and the returned value will be different in different blocks for long-running queries.
|
||||
|
||||
@ -1405,14 +1405,14 @@ Result:
|
||||
└─────────────────────┴─────────────────────┴──────────┘
|
||||
```
|
||||
|
||||
## today
|
||||
## today {#today}
|
||||
|
||||
Accepts zero arguments and returns the current date at one of the moments of query analysis.
|
||||
The same as ‘toDate(now())’.
|
||||
|
||||
Aliases: `curdate`, `current_date`.
|
||||
|
||||
## yesterday
|
||||
## yesterday {#yesterday}
|
||||
|
||||
Accepts zero arguments and returns yesterday’s date at one of the moments of query analysis.
|
||||
The same as ‘today() - 1’.
|
||||
@ -1628,7 +1628,7 @@ SELECT timeSlots(toDateTime64('1980-12-12 21:01:02.1234', 4, 'UTC'), toDecimal64
|
||||
└───────────────────────────────────────────────────────────────────────────────────────────────────────────┘
|
||||
```
|
||||
|
||||
## formatDateTime {#date_time_functions-formatDateTime}
|
||||
## formatDateTime {#formatDateTime}
|
||||
|
||||
Formats a Time according to the given Format string. Format is a constant expression, so you cannot have multiple formats for a single result column.
|
||||
|
||||
@ -1753,7 +1753,7 @@ LIMIT 10
|
||||
- [formatDateTimeInJodaSyntax](##formatDateTimeInJodaSyntax)
|
||||
|
||||
|
||||
## formatDateTimeInJodaSyntax {#date_time_functions-formatDateTimeInJodaSyntax}
|
||||
## formatDateTimeInJodaSyntax {#formatDateTimeInJodaSyntax}
|
||||
|
||||
Similar to formatDateTime, except that it formats datetime in Joda style instead of MySQL style. Refer to https://joda-time.sourceforge.net/apidocs/org/joda/time/format/DateTimeFormat.html.
|
||||
|
||||
|
@ -12,7 +12,7 @@ For dictionaries created with [DDL queries](../../sql-reference/statements/creat
|
||||
|
||||
For information on connecting and configuring dictionaries, see [Dictionaries](../../sql-reference/dictionaries/index.md).
|
||||
|
||||
## dictGet, dictGetOrDefault, dictGetOrNull
|
||||
## dictGet, dictGetOrDefault, dictGetOrNull {#dictGet}
|
||||
|
||||
Retrieves values from a dictionary.
|
||||
|
||||
|
@ -19,7 +19,7 @@ halfMD5(par1, ...)
|
||||
```
|
||||
|
||||
The function is relatively slow (5 million short strings per second per processor core).
|
||||
Consider using the [sipHash64](#hash_functions-siphash64) function instead.
|
||||
Consider using the [sipHash64](#siphash64) function instead.
|
||||
|
||||
**Arguments**
|
||||
|
||||
@ -45,13 +45,13 @@ SELECT halfMD5(array('e','x','a'), 'mple', 10, toDateTime('2019-06-15 23:00:00')
|
||||
|
||||
Calculates the MD4 from a string and returns the resulting set of bytes as FixedString(16).
|
||||
|
||||
## MD5 {#hash_functions-md5}
|
||||
## MD5 {#md5}
|
||||
|
||||
Calculates the MD5 from a string and returns the resulting set of bytes as FixedString(16).
|
||||
If you do not need MD5 in particular, but you need a decent cryptographic 128-bit hash, use the ‘sipHash128’ function instead.
|
||||
If you want to get the same result as output by the md5sum utility, use lower(hex(MD5(s))).
|
||||
|
||||
## sipHash64 {#hash_functions-siphash64}
|
||||
## sipHash64 {#siphash64}
|
||||
|
||||
Produces a 64-bit [SipHash](https://en.wikipedia.org/wiki/SipHash) hash value.
|
||||
|
||||
@ -59,7 +59,7 @@ Produces a 64-bit [SipHash](https://en.wikipedia.org/wiki/SipHash) hash value.
|
||||
sipHash64(par1,...)
|
||||
```
|
||||
|
||||
This is a cryptographic hash function. It works at least three times faster than the [MD5](#hash_functions-md5) hash function.
|
||||
This is a cryptographic hash function. It works at least three times faster than the [MD5](#md5) hash function.
|
||||
|
||||
The function [interprets](/docs/en/sql-reference/functions/type-conversion-functions.md/#type_conversion_functions-reinterpretAsString) all the input parameters as strings and calculates the hash value for each of them. It then combines the hashes by the following algorithm:
|
||||
|
||||
@ -91,7 +91,7 @@ SELECT sipHash64(array('e','x','a'), 'mple', 10, toDateTime('2019-06-15 23:00:00
|
||||
|
||||
## sipHash64Keyed
|
||||
|
||||
Same as [sipHash64](#hash_functions-siphash64) but additionally takes an explicit key argument instead of using a fixed key.
|
||||
Same as [sipHash64](#siphash64) but additionally takes an explicit key argument instead of using a fixed key.
|
||||
|
||||
**Syntax**
|
||||
|
||||
@ -101,7 +101,7 @@ sipHash64Keyed((k0, k1), par1,...)
|
||||
|
||||
**Arguments**
|
||||
|
||||
Same as [sipHash64](#hash_functions-siphash64), but the first argument is a tuple of two UInt64 values representing the key.
|
||||
Same as [sipHash64](#siphash64), but the first argument is a tuple of two UInt64 values representing the key.
|
||||
|
||||
**Returned value**
|
||||
|
||||
@ -123,12 +123,12 @@ SELECT sipHash64Keyed((506097522914230528, 1084818905618843912), array('e','x','
|
||||
|
||||
## sipHash128
|
||||
|
||||
Like [sipHash64](#hash_functions-siphash64) but produces a 128-bit hash value, i.e. the final xor-folding state is done up to 128 bits.
|
||||
Like [sipHash64](#siphash64) but produces a 128-bit hash value, i.e. the final xor-folding state is done up to 128 bits.
|
||||
|
||||
:::note
|
||||
This 128-bit variant differs from the reference implementation and it's weaker.
|
||||
This version exists because, when it was written, there was no official 128-bit extension for SipHash.
|
||||
New projects should probably use [sipHash128Reference](#hash_functions-siphash128reference).
|
||||
New projects should probably use [sipHash128Reference](#siphash128reference).
|
||||
:::
|
||||
|
||||
**Syntax**
|
||||
@ -139,7 +139,7 @@ sipHash128(par1,...)
|
||||
|
||||
**Arguments**
|
||||
|
||||
Same as for [sipHash64](#hash_functions-siphash64).
|
||||
Same as for [sipHash64](#siphash64).
|
||||
|
||||
**Returned value**
|
||||
|
||||
@ -163,12 +163,12 @@ Result:
|
||||
|
||||
## sipHash128Keyed
|
||||
|
||||
Same as [sipHash128](#hash_functions-siphash128) but additionally takes an explicit key argument instead of using a fixed key.
|
||||
Same as [sipHash128](#siphash128) but additionally takes an explicit key argument instead of using a fixed key.
|
||||
|
||||
:::note
|
||||
This 128-bit variant differs from the reference implementation and it's weaker.
|
||||
This version exists because, when it was written, there was no official 128-bit extension for SipHash.
|
||||
New projects should probably use [sipHash128ReferenceKeyed](#hash_functions-siphash128referencekeyed).
|
||||
New projects should probably use [sipHash128ReferenceKeyed](#siphash128referencekeyed).
|
||||
:::
|
||||
|
||||
**Syntax**
|
||||
@ -179,7 +179,7 @@ sipHash128Keyed((k0, k1), par1,...)
|
||||
|
||||
**Arguments**
|
||||
|
||||
Same as [sipHash128](#hash_functions-siphash128), but the first argument is a tuple of two UInt64 values representing the key.
|
||||
Same as [sipHash128](#siphash128), but the first argument is a tuple of two UInt64 values representing the key.
|
||||
|
||||
**Returned value**
|
||||
|
||||
@ -203,7 +203,7 @@ Result:
|
||||
|
||||
## sipHash128Reference
|
||||
|
||||
Like [sipHash128](#hash_functions-siphash128) but implements the 128-bit algorithm from the original authors of SipHash.
|
||||
Like [sipHash128](#siphash128) but implements the 128-bit algorithm from the original authors of SipHash.
|
||||
|
||||
**Syntax**
|
||||
|
||||
@ -213,7 +213,7 @@ sipHash128Reference(par1,...)
|
||||
|
||||
**Arguments**
|
||||
|
||||
Same as for [sipHash128](#hash_functions-siphash128).
|
||||
Same as for [sipHash128](#siphash128).
|
||||
|
||||
**Returned value**
|
||||
|
||||
@ -237,7 +237,7 @@ Result:
|
||||
|
||||
## sipHash128ReferenceKeyed
|
||||
|
||||
Same as [sipHash128Reference](#hash_functions-siphash128reference) but additionally takes an explicit key argument instead of using a fixed key.
|
||||
Same as [sipHash128Reference](#siphash128reference) but additionally takes an explicit key argument instead of using a fixed key.
|
||||
|
||||
**Syntax**
|
||||
|
||||
@ -247,7 +247,7 @@ sipHash128ReferenceKeyed((k0, k1), par1,...)
|
||||
|
||||
**Arguments**
|
||||
|
||||
Same as [sipHash128Reference](#hash_functions-siphash128reference), but the first argument is a tuple of two UInt64 values representing the key.
|
||||
Same as [sipHash128Reference](#siphash128reference), but the first argument is a tuple of two UInt64 values representing the key.
|
||||
|
||||
**Returned value**
|
||||
|
||||
@ -536,7 +536,7 @@ Calculates `HiveHash` from a string.
|
||||
SELECT hiveHash('')
|
||||
```
|
||||
|
||||
This is just [JavaHash](#hash_functions-javahash) with zeroed out sign bit. This function is used in [Apache Hive](https://en.wikipedia.org/wiki/Apache_Hive) for versions before 3.0. This hash function is neither fast nor having a good quality. The only reason to use it is when this algorithm is already used in another system and you have to calculate exactly the same result.
|
||||
This is just [JavaHash](#javahash) with zeroed out sign bit. This function is used in [Apache Hive](https://en.wikipedia.org/wiki/Apache_Hive) for versions before 3.0. This hash function is neither fast nor having a good quality. The only reason to use it is when this algorithm is already used in another system and you have to calculate exactly the same result.
|
||||
|
||||
**Returned value**
|
||||
|
||||
|
@ -11,7 +11,7 @@ sidebar_label: Other
|
||||
Returns the name of the host on which this function was executed. If the function executes on a remote server (distributed processing), the remote server name is returned.
|
||||
If the function executes in the context of a distributed table, it generates a normal column with values relevant to each shard. Otherwise it produces a constant value.
|
||||
|
||||
## getMacro
|
||||
## getMacro {#getMacro}
|
||||
|
||||
Returns a named value from the [macros](../../operations/server-configuration-parameters/settings.md#macros) section of the server configuration.
|
||||
|
||||
@ -186,7 +186,7 @@ Returns the type name of the passed argument.
|
||||
|
||||
If `NULL` is passed, then the function returns type `Nullable(Nothing)`, which corresponds to ClickHouse's internal `NULL` representation.
|
||||
|
||||
## blockSize()
|
||||
## blockSize() {#blockSize}
|
||||
|
||||
In ClickHouse, queries are processed in blocks (chunks).
|
||||
This function returns the size (row count) of the block the function is called on.
|
||||
@ -311,7 +311,7 @@ Sleeps ‘seconds’ seconds for each row. The sleep time can be specified as in
|
||||
Returns the name of the current database.
|
||||
Useful in table engine parameters of `CREATE TABLE` queries where you need to specify the database.
|
||||
|
||||
## currentUser()
|
||||
## currentUser() {#currentUser}
|
||||
|
||||
Returns the name of the current user. In case of a distributed query, the name of the user who initiated the query is returned.
|
||||
|
||||
@ -771,7 +771,7 @@ If executed in the context of a distributed table, this function generates a nor
|
||||
|
||||
Returns the sequence number of the data block where the row is located.
|
||||
|
||||
## rowNumberInBlock()
|
||||
## rowNumberInBlock() {#rowNumberInBlock}
|
||||
|
||||
Returns the ordinal number of the row in the data block. Different data blocks are always recalculated.
|
||||
|
||||
@ -896,7 +896,7 @@ Result:
|
||||
└────────────┴───────┴───────────┴────────────────┘
|
||||
```
|
||||
|
||||
## runningDifference(x)
|
||||
## runningDifference(x) {#runningDifference}
|
||||
|
||||
Calculates the difference between two consecutive row values in the data block.
|
||||
Returns 0 for the first row, and for subsequent rows the difference to the previous row.
|
||||
@ -2274,7 +2274,7 @@ Result:
|
||||
└───────────────────────────┘
|
||||
```
|
||||
|
||||
## queryID
|
||||
## queryID {#queryID}
|
||||
|
||||
Returns the ID of the current query. Other parameters of a query can be extracted from the [system.query_log](../../operations/system-tables/query_log.md) table via `query_id`.
|
||||
|
||||
|
@ -478,7 +478,7 @@ Result:
|
||||
└─────────────────────┘
|
||||
```
|
||||
|
||||
## randomString
|
||||
## randomString {#randomString}
|
||||
|
||||
Generates a string of the specified length filled with random bytes (including zero bytes). Not all characters may be printable.
|
||||
|
||||
@ -627,7 +627,7 @@ Result:
|
||||
└──────────────────────┘
|
||||
```
|
||||
|
||||
## fuzzBits
|
||||
## fuzzBits {#fuzzBits}
|
||||
|
||||
**Syntax**
|
||||
|
||||
|
@ -90,7 +90,7 @@ In ClickHouse Cloud, by default, passwords must meet the following complexity re
|
||||
CREATE USER name3 IDENTIFIED WITH sha256_password BY 'my_password'
|
||||
```
|
||||
|
||||
The `name3` user can now login using `my_password`, but the password is stored as the hashed value above. THe following SQL file was created in `/var/lib/clickhouse/access` and gets executed at server startup:
|
||||
The `name3` user can now login using `my_password`, but the password is stored as the hashed value above. The following SQL file was created in `/var/lib/clickhouse/access` and gets executed at server startup:
|
||||
|
||||
```bash
|
||||
/var/lib/clickhouse/access $ cat 3843f510-6ebd-a52d-72ac-e021686d8a93.sql
|
||||
|
@ -340,6 +340,15 @@ After running this statement the `[db.]replicated_merge_tree_family_table_name`
|
||||
- If a `LIGHTWEIGHT` modifier was specified then the query waits only for `GET_PART`, `ATTACH_PART`, `DROP_RANGE`, `REPLACE_RANGE` and `DROP_PART` entries to be processed.
|
||||
- If a `PULL` modifier was specified then the query pulls new replication queue entries from ZooKeeper, but does not wait for anything to be processed.
|
||||
|
||||
### SYNC DATABASE REPLICA
|
||||
|
||||
Waits until the specified [replicated database](https://clickhouse.com/docs/en/engines/database-engines/replicated) applies all schema changes from the DDL queue of that database.
|
||||
|
||||
**Syntax**
|
||||
```sql
|
||||
SYSTEM SYNC DATABASE REPLICA replicated_database_name;
|
||||
```
|
||||
|
||||
### RESTART REPLICA
|
||||
|
||||
Provides possibility to reinitialize Zookeeper session's state for `ReplicatedMergeTree` table, will compare current state with Zookeeper as source of truth and add tasks to Zookeeper queue if needed.
|
||||
|
@ -135,13 +135,13 @@ Getting data from table in table.csv, located in archive1.zip or/and archive2.zi
|
||||
SELECT * FROM file('user_files/archives/archive{1..2}.zip :: table.csv');
|
||||
```
|
||||
|
||||
## Globs in Path
|
||||
## Globs in Path {#globs_in_path}
|
||||
|
||||
Multiple path components can have globs. For being processed file must exist and match to the whole path pattern (not only suffix or prefix).
|
||||
|
||||
- `*` — Substitutes any number of any characters except `/` including empty string.
|
||||
- `?` — Substitutes any single character.
|
||||
- `{some_string,another_string,yet_another_one}` — Substitutes any of strings `'some_string', 'another_string', 'yet_another_one'`, including `/`.
|
||||
- `{some_string,another_string,yet_another_one}` — Substitutes any of strings `'some_string', 'another_string', 'yet_another_one'`. In case at least one of strings contains `/`, `'permission denied'` errors may be ignored using [ignore_access_denied_multidirectory_globs](/docs/en/operations/settings/settings.md#ignore_access_denied_multidirectory_globs) setting for file & HDFS.
|
||||
- `{N..M}` — Substitutes any number in range from N to M including both borders.
|
||||
- `**` - Fetches all files inside the folder recursively.
|
||||
|
||||
@ -210,7 +210,7 @@ SELECT count(*) FROM file('big_dir/**/file002', 'CSV', 'name String, value UInt3
|
||||
- [engine_file_allow_create_multiple_files](/docs/en/operations/settings/settings.md#engine_file_allow_create_multiple_files) - allows to create a new file on each insert if format has suffix. Disabled by default.
|
||||
- [engine_file_skip_empty_files](/docs/en/operations/settings/settings.md#engine_file_skip_empty_files) - allows to skip empty files while reading. Disabled by default.
|
||||
- [storage_file_read_method](/docs/en/operations/settings/settings.md#engine-file-emptyif-not-exists) - method of reading data from storage file, one of: read, pread, mmap (only for clickhouse-local). Default value: `pread` for clickhouse-server, `mmap` for clickhouse-local.
|
||||
|
||||
- [ignore_access_denied_multidirectory_globs](/docs/en/operations/settings/settings.md#ignore_access_denied_multidirectory_globs) - allows to ignore permission denied errors for multi-directory globs.
|
||||
|
||||
|
||||
**See Also**
|
||||
|
@ -39,13 +39,13 @@ LIMIT 2
|
||||
└─────────┴─────────┴─────────┘
|
||||
```
|
||||
|
||||
**Globs in path**
|
||||
## Globs in path {#globs_in_path}
|
||||
|
||||
Multiple path components can have globs. For being processed file should exists and matches to the whole path pattern (not only suffix or prefix).
|
||||
|
||||
- `*` — Substitutes any number of any characters except `/` including empty string.
|
||||
- `?` — Substitutes any single character.
|
||||
- `{some_string,another_string,yet_another_one}` — Substitutes any of strings `'some_string', 'another_string', 'yet_another_one'`.
|
||||
- `{some_string,another_string,yet_another_one}` — Substitutes any of strings `'some_string', 'another_string', 'yet_another_one'`. In case at least one of strings contains `/`, `'permission denied'` errors may be ignored using [ignore_access_denied_multidirectory_globs](/docs/en/operations/settings/settings.md#ignore_access_denied_multidirectory_globs) setting.
|
||||
- `{N..M}` — Substitutes any number in range from N to M including both borders.
|
||||
|
||||
Constructions with `{}` are similar to the [remote table function](../../sql-reference/table-functions/remote.md)).
|
||||
@ -102,6 +102,7 @@ FROM hdfs('hdfs://hdfs1:9000/big_dir/file{0..9}{0..9}{0..9}', 'CSV', 'name Strin
|
||||
- [hdfs_truncate_on_insert](/docs/en/operations/settings/settings.md#hdfs-truncate-on-insert) - allows to truncate file before insert into it. Disabled by default.
|
||||
- [hdfs_create_multiple_files](/docs/en/operations/settings/settings.md#hdfs_allow_create_multiple_files) - allows to create a new file on each insert if format has suffix. Disabled by default.
|
||||
- [hdfs_skip_empty_files](/docs/en/operations/settings/settings.md#hdfs_skip_empty_files) - allows to skip empty files while reading. Disabled by default.
|
||||
- [ignore_access_denied_multidirectory_globs](/docs/en/operations/settings/settings.md#ignore_access_denied_multidirectory_globs) - allows to ignore permission denied errors for multi-directory globs.
|
||||
|
||||
**See Also**
|
||||
|
||||
|
@ -805,8 +805,6 @@ SETTINGS storage_policy = 'moving_from_ssd_to_hdd'
|
||||
<single_read_retries>4</single_read_retries>
|
||||
<min_bytes_for_seek>1000</min_bytes_for_seek>
|
||||
<metadata_path>/var/lib/clickhouse/disks/s3/</metadata_path>
|
||||
<cache_enabled>true</cache_enabled>
|
||||
<cache_path>/var/lib/clickhouse/disks/s3/cache/</cache_path>
|
||||
<skip_access_check>false</skip_access_check>
|
||||
</s3>
|
||||
</disks>
|
||||
@ -832,8 +830,6 @@ SETTINGS storage_policy = 'moving_from_ssd_to_hdd'
|
||||
- `single_read_retries` — число попыток выполнения запроса в случае возникновения ошибки в процессе чтения. Значение по умолчанию: `4`.
|
||||
- `min_bytes_for_seek` — минимальное количество байтов, которые используются для операций поиска вместо последовательного чтения. Значение по умолчанию: 1 МБайт.
|
||||
- `metadata_path` — путь к локальному файловому хранилищу для хранения файлов с метаданными для S3. Значение по умолчанию: `/var/lib/clickhouse/disks/<disk_name>/`.
|
||||
- `cache_enabled` — признак, разрешено ли хранение кэша засечек и файлов индекса в локальной файловой системе. Значение по умолчанию: `true`.
|
||||
- `cache_path` — путь в локальной файловой системе, где будут храниться кэш засечек и файлы индекса. Значение по умолчанию: `/var/lib/clickhouse/disks/<disk_name>/cache/`.
|
||||
- `skip_access_check` — признак, выполнять ли проверку доступов при запуске диска. Если установлено значение `true`, то проверка не выполняется. Значение по умолчанию: `false`.
|
||||
|
||||
Диск S3 может быть сконфигурирован как `main` или `cold`:
|
||||
|
@ -4209,3 +4209,45 @@ SELECT toFloat64('1.7091'), toFloat64('1.5008753E7') SETTINGS precise_float_pars
|
||||
│ 1.7091 │ 15008753 │
|
||||
└─────────────────────┴──────────────────────────┘
|
||||
```
|
||||
|
||||
## ignore_access_denied_multidirectory_globs {#ignore_access_denied_multidirectory_globs}
|
||||
|
||||
Позволяет игнорировать ошибку 'permission denied', возникающую при использовании шаблона `{}`, содержащего `/` внутри себя.
|
||||
Работает для [File](../../sql-reference/table-functions/file.md#globs_in_path) и [HDFS](../../sql-reference/table-functions/hdfs.md).
|
||||
Работает _только_ для указанных выше шаблонов `{}`.
|
||||
|
||||
Возможные значения: `0`, `1`.
|
||||
|
||||
Значение по умолчанию: `0`.
|
||||
|
||||
### Пример
|
||||
|
||||
Пусть в `user_files` имеется следующая структура:
|
||||
```
|
||||
my_directory/
|
||||
├── data1
|
||||
│ ├── f1.csv
|
||||
├── data2
|
||||
│ ├── f2.csv
|
||||
└── test_root
|
||||
```
|
||||
Пусть также директории `data1`, `data2` могут быть прочитаны, но прав на чтение `test_root` нет.
|
||||
|
||||
На запрос `SELECT *, _path, _file FROM file('my_directory/{data1/f1,data2/f2}.csv', CSV)` будет выброшено исключение:
|
||||
`Code: 1001. DB::Exception: std::__1::__fs::filesystem::filesystem_error: filesystem error: in directory_iterator::directory_iterator(...): Permission denied`.
|
||||
Это происходит, так как для обработки такого шаблона необходимо выполнить рекурсивный поиск по _всем_ директориям, находящимся внутри `my_directory`.
|
||||
|
||||
Если данная настройка имеет значение 1, то недоступные директории будут тихо пропущены, даже если они явно указаны внутри `{}`.
|
||||
|
||||
```sql
|
||||
SELECT _path, _file FROM file('my_directory/{data1/f1,data2/f2}.csv', CSV) SETTINGS ignore_access_denied_multidirectory_globs = 0;
|
||||
|
||||
Code: 1001. DB::Exception: std::__1::__fs::filesystem::filesystem_error: filesystem error: in directory_iterator::directory_iterator(...): Permission denied
|
||||
```
|
||||
```sql
|
||||
SELECT _path, _file FROM file('my_directory/{data1/f1,data2/f2}.csv', CSV) SETTINGS ignore_access_denied_multidirectory_globs = 1;
|
||||
|
||||
┌─_path───────────────────┬─_file───────┐
|
||||
│ <full path to file> │ <file name> │
|
||||
└─────────────────────────┴─────────────┘
|
||||
```
|
||||
|
@ -79,7 +79,7 @@ SELECT * FROM file('test.csv', 'CSV', 'column1 UInt32, column2 UInt32, column3 U
|
||||
|
||||
- `*` — заменяет любое количество любых символов кроме `/`, включая отсутствие символов.
|
||||
- `?` — заменяет ровно один любой символ.
|
||||
- `{some_string,another_string,yet_another_one}` — заменяет любую из строк `'some_string', 'another_string', 'yet_another_one'`, причём строка может содержать `/`.
|
||||
- `{some_string,another_string,yet_another_one}` — заменяет любую из строк `'some_string', 'another_string', 'yet_another_one'`. В случае, если в какой-либо из строк содержится `/`, то ошибки доступа (permission denied) к существующим, но недоступным директориям/файлам могут быть проигнорированы при помощи настройки [ignore_access_denied_multidirectory_globs](/docs/ru/operations/settings/settings.md#ignore_access_denied_multidirectory_globs).
|
||||
- `{N..M}` — заменяет любое число в интервале от `N` до `M` включительно (может содержать ведущие нули).
|
||||
|
||||
Конструкция с `{}` аналогична табличной функции [remote](remote.md).
|
||||
@ -124,6 +124,7 @@ SELECT count(*) FROM file('big_dir/file{0..9}{0..9}{0..9}', 'CSV', 'name String,
|
||||
- `_path` — путь к файлу.
|
||||
- `_file` — имя файла.
|
||||
|
||||
|
||||
**Смотрите также**
|
||||
|
||||
- [Виртуальные столбцы](index.md#table_engines-virtual_columns)
|
||||
|
@ -39,11 +39,11 @@ LIMIT 2
|
||||
└─────────┴─────────┴─────────┘
|
||||
```
|
||||
|
||||
**Шаблоны в пути**
|
||||
## Шаблоны поиска в компонентах пути {#globs-in-path}
|
||||
|
||||
- `*` — Заменяет любое количество любых символов кроме `/`, включая отсутствие символов.
|
||||
- `?` — Заменяет ровно один любой символ.
|
||||
- `{some_string,another_string,yet_another_one}` — Заменяет любую из строк `'some_string', 'another_string', 'yet_another_one'`.
|
||||
- `{some_string,another_string,yet_another_one}` — Заменяет любую из строк `'some_string', 'another_string', 'yet_another_one'`. В случае, если в какой-либо из строк содержится `/`, то ошибки доступа (permission denied) к существующим, но недоступным директориям/файлам могут быть проигнорированы при помощи настройки [ignore_access_denied_multidirectory_globs](/docs/ru/operations/settings/settings.md#ignore_access_denied_multidirectory_globs).
|
||||
- `{N..M}` — Заменяет любое число в интервале от `N` до `M` включительно (может содержать ведущие нули).
|
||||
|
||||
Конструкция с `{}` аналогична табличной функции [remote](remote.md).
|
||||
@ -62,3 +62,5 @@ LIMIT 2
|
||||
**Смотрите также**
|
||||
|
||||
- [Виртуальные столбцы](index.md#table_engines-virtual_columns)
|
||||
- Параметр [ignore_access_denied_multidirectory_globs](/docs/ru/operations/settings/settings.md#ignore_access_denied_multidirectory_globs)
|
||||
|
||||
|
@ -745,8 +745,6 @@ SETTINGS storage_policy = 'moving_from_ssd_to_hdd'
|
||||
<single_read_retries>4</single_read_retries>
|
||||
<min_bytes_for_seek>1000</min_bytes_for_seek>
|
||||
<metadata_path>/var/lib/clickhouse/disks/s3/</metadata_path>
|
||||
<cache_enabled>true</cache_enabled>
|
||||
<cache_path>/var/lib/clickhouse/disks/s3/cache/</cache_path>
|
||||
<skip_access_check>false</skip_access_check>
|
||||
</s3>
|
||||
</disks>
|
||||
@ -772,8 +770,6 @@ SETTINGS storage_policy = 'moving_from_ssd_to_hdd'
|
||||
- `single_read_retries` - 读过程中连接丢失后重试次数,默认值为4。
|
||||
- `min_bytes_for_seek` - 使用查找操作,而不是顺序读操作的最小字节数,默认值为1000。
|
||||
- `metadata_path` - 本地存放S3元数据文件的路径,默认值为`/var/lib/clickhouse/disks/<disk_name>/`
|
||||
- `cache_enabled` - 是否允许缓存标记和索引文件。默认值为`true`。
|
||||
- `cache_path` - 本地缓存标记和索引文件的路径。默认值为`/var/lib/clickhouse/disks/<disk_name>/cache/`。
|
||||
- `skip_access_check` - 如果为`true`,Clickhouse启动时不检查磁盘是否可用。默认为`false`。
|
||||
- `server_side_encryption_customer_key_base64` - 如果指定该项的值,请求时会加上为了访问SSE-C加密数据而必须的头信息。
|
||||
|
||||
@ -823,4 +819,3 @@ S3磁盘也可以设置冷热存储:
|
||||
- `_part_uuid` - 唯一部分标识符(如果 MergeTree 设置`assign_part_uuids` 已启用)。
|
||||
- `_partition_value` — `partition by` 表达式的值(元组)。
|
||||
- `_sample_factor` - 采样因子(来自请求)。
|
||||
|
||||
|
@ -5,8 +5,8 @@ sidebar_position: 31
|
||||
|
||||
# stddevSamp {#stddevsamp}
|
||||
|
||||
结果等于 [varSamp] (../../../sql-reference/aggregate-functions/reference/varsamp.md)的平方根。
|
||||
结果等于 [varSamp](../../../sql-reference/aggregate-functions/reference/varsamp.md) 的平方根。
|
||||
|
||||
:::note
|
||||
该函数使用数值不稳定的算法。 如果你需要 [数值稳定性](https://en.wikipedia.org/wiki/Numerical_stability) 在计算中,使用 `stddevSampStable` 函数。 它的工作速度较慢,但提供较低的计算错误。
|
||||
:::
|
||||
:::
|
||||
|
@ -32,10 +32,10 @@ contents:
|
||||
dst: /usr/bin/clickhouse-keeper
|
||||
- src: clickhouse-keeper.service
|
||||
dst: /lib/systemd/system/clickhouse-keeper.service
|
||||
- src: clickhouse
|
||||
- src: clickhouse-keeper
|
||||
dst: /usr/bin/clickhouse-keeper-client
|
||||
type: symlink
|
||||
- src: clickhouse
|
||||
- src: clickhouse-keeper
|
||||
dst: /usr/bin/clickhouse-keeper-converter
|
||||
type: symlink
|
||||
# docs
|
||||
|
@ -135,7 +135,7 @@ func TestConfigFileFrameCopy(t *testing.T) {
|
||||
sizes := map[string]int64{
|
||||
"users.xml": int64(2017),
|
||||
"default-password.xml": int64(188),
|
||||
"config.xml": int64(59506),
|
||||
"config.xml": int64(59377),
|
||||
"server-include.xml": int64(168),
|
||||
"user-include.xml": int64(559),
|
||||
}
|
||||
|
@ -1209,8 +1209,6 @@
|
||||
<single_read_retries>4</single_read_retries>
|
||||
<min_bytes_for_seek>1000</min_bytes_for_seek>
|
||||
<metadata_path>/var/lib/clickhouse/disks/s3/</metadata_path>
|
||||
<cache_enabled>true</cache_enabled>
|
||||
<cache_path>/var/lib/clickhouse/disks/s3/cache/</cache_path>
|
||||
<skip_access_check>false</skip_access_check>
|
||||
</s3>
|
||||
</disks>
|
||||
|
@ -18,7 +18,14 @@
|
||||
#include <Common/Exception.h>
|
||||
#include <Common/parseGlobs.h>
|
||||
|
||||
#ifdef __clang__
|
||||
# pragma clang diagnostic push
|
||||
# pragma clang diagnostic ignored "-Wzero-as-null-pointer-constant"
|
||||
#endif
|
||||
#include <re2/re2.h>
|
||||
#ifdef __clang__
|
||||
# pragma clang diagnostic pop
|
||||
#endif
|
||||
|
||||
static void setupLogging(const std::string & log_level)
|
||||
{
|
||||
|
@ -9,8 +9,6 @@
|
||||
#include <thread>
|
||||
#include <filesystem>
|
||||
|
||||
#include <re2/re2.h>
|
||||
|
||||
#include <boost/program_options.hpp>
|
||||
|
||||
#include <Common/TerminalSize.h>
|
||||
@ -26,6 +24,14 @@
|
||||
#include <IO/WriteBufferFromFile.h>
|
||||
#include <IO/WriteBufferFromFileDescriptor.h>
|
||||
|
||||
#ifdef __clang__
|
||||
# pragma clang diagnostic push
|
||||
# pragma clang diagnostic ignored "-Wzero-as-null-pointer-constant"
|
||||
#endif
|
||||
#include <re2/re2.h>
|
||||
#ifdef __clang__
|
||||
# pragma clang diagnostic pop
|
||||
#endif
|
||||
|
||||
static constexpr auto documentation = R"(
|
||||
A tool to extract information from Git repository for analytics.
|
||||
|
@ -347,12 +347,20 @@ bool RMCommand::parse(IParser::Pos & pos, std::shared_ptr<ASTKeeperQuery> & node
|
||||
return false;
|
||||
node->args.push_back(std::move(path));
|
||||
|
||||
ASTPtr version;
|
||||
if (ParserNumber{}.parse(pos, version, expected))
|
||||
node->args.push_back(version->as<ASTLiteral &>().value);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
void RMCommand::execute(const ASTKeeperQuery * query, KeeperClient * client) const
|
||||
{
|
||||
client->zookeeper->remove(client->getAbsolutePath(query->args[0].safeGet<String>()));
|
||||
Int32 version{-1};
|
||||
if (query->args.size() == 2)
|
||||
version = static_cast<Int32>(query->args[1].get<Int32>());
|
||||
|
||||
client->zookeeper->remove(client->getAbsolutePath(query->args[0].safeGet<String>()), version);
|
||||
}
|
||||
|
||||
bool RMRCommand::parse(IParser::Pos & pos, std::shared_ptr<ASTKeeperQuery> & node, Expected & expected) const
|
||||
@ -368,8 +376,8 @@ bool RMRCommand::parse(IParser::Pos & pos, std::shared_ptr<ASTKeeperQuery> & nod
|
||||
void RMRCommand::execute(const ASTKeeperQuery * query, KeeperClient * client) const
|
||||
{
|
||||
String path = client->getAbsolutePath(query->args[0].safeGet<String>());
|
||||
client->askConfirmation("You are going to recursively delete path " + path,
|
||||
[client, path]{ client->zookeeper->removeRecursive(path); });
|
||||
client->askConfirmation(
|
||||
"You are going to recursively delete path " + path, [client, path] { client->zookeeper->removeRecursive(path); });
|
||||
}
|
||||
|
||||
bool ReconfigCommand::parse(IParser::Pos & pos, std::shared_ptr<ASTKeeperQuery> & node, DB::Expected & expected) const
|
||||
|
@ -51,7 +51,7 @@ class CDCommand : public IKeeperClientCommand
|
||||
|
||||
void execute(const ASTKeeperQuery * query, KeeperClient * client) const override;
|
||||
|
||||
String getHelpMessage() const override { return "{} [path] -- Change the working path (default `.`)"; }
|
||||
String getHelpMessage() const override { return "{} [path] -- Changes the working path (default `.`)"; }
|
||||
};
|
||||
|
||||
class SetCommand : public IKeeperClientCommand
|
||||
@ -64,7 +64,7 @@ class SetCommand : public IKeeperClientCommand
|
||||
|
||||
String getHelpMessage() const override
|
||||
{
|
||||
return "{} <path> <value> [version] -- Updates the node's value. Only update if version matches (default: -1)";
|
||||
return "{} <path> <value> [version] -- Updates the node's value. Only updates if version matches (default: -1)";
|
||||
}
|
||||
};
|
||||
|
||||
@ -165,7 +165,6 @@ class FindBigFamily : public IKeeperClientCommand
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
class RMCommand : public IKeeperClientCommand
|
||||
{
|
||||
String getName() const override { return "rm"; }
|
||||
@ -174,7 +173,7 @@ class RMCommand : public IKeeperClientCommand
|
||||
|
||||
void execute(const ASTKeeperQuery * query, KeeperClient * client) const override;
|
||||
|
||||
String getHelpMessage() const override { return "{} <path> -- Remove the node"; }
|
||||
String getHelpMessage() const override { return "{} <path> [version] -- Removes the node only if version matches (default: -1)"; }
|
||||
};
|
||||
|
||||
class RMRCommand : public IKeeperClientCommand
|
||||
|
@ -11,8 +11,6 @@ bool parseKeeperArg(IParser::Pos & pos, Expected & expected, String & result)
|
||||
{
|
||||
if (!parseIdentifierOrStringLiteral(pos, expected, result))
|
||||
return false;
|
||||
|
||||
ParserToken{TokenType::Whitespace}.ignore(pos);
|
||||
}
|
||||
|
||||
while (pos->type != TokenType::Whitespace && pos->type != TokenType::EndOfStream && pos->type != TokenType::Semicolon)
|
||||
|
@ -448,8 +448,6 @@
|
||||
<account_name>account</account_name>
|
||||
<account_key>pass123</account_key>
|
||||
<metadata_path>/var/lib/clickhouse/disks/blob_storage_disk/</metadata_path>
|
||||
<cache_enabled>true</cache_enabled>
|
||||
<cache_path>/var/lib/clickhouse/disks/blob_storage_disk/cache/</cache_path>
|
||||
<skip_access_check>false</skip_access_check>
|
||||
</blob_storage_disk>
|
||||
</disks>
|
||||
|
@ -10,9 +10,17 @@
|
||||
#include <Disks/IO/createReadBufferFromFileBase.h>
|
||||
|
||||
#include <boost/program_options.hpp>
|
||||
#include <re2/re2.h>
|
||||
#include <filesystem>
|
||||
|
||||
#ifdef __clang__
|
||||
# pragma clang diagnostic push
|
||||
# pragma clang diagnostic ignored "-Wzero-as-null-pointer-constant"
|
||||
#endif
|
||||
#include <re2/re2.h>
|
||||
#ifdef __clang__
|
||||
# pragma clang diagnostic pop
|
||||
#endif
|
||||
|
||||
namespace fs = std::filesystem;
|
||||
|
||||
#define EXTRACT_PATH_PATTERN ".*\\/store/(.*)"
|
||||
|
@ -26,10 +26,17 @@
|
||||
#include <IO/Operators.h>
|
||||
#include <Poco/AccessExpireCache.h>
|
||||
#include <boost/algorithm/string/join.hpp>
|
||||
#include <re2/re2.h>
|
||||
#include <filesystem>
|
||||
#include <mutex>
|
||||
|
||||
#ifdef __clang__
|
||||
# pragma clang diagnostic push
|
||||
# pragma clang diagnostic ignored "-Wzero-as-null-pointer-constant"
|
||||
#endif
|
||||
#include <re2/re2.h>
|
||||
#ifdef __clang__
|
||||
# pragma clang diagnostic pop
|
||||
#endif
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
@ -1,11 +1,18 @@
|
||||
#pragma once
|
||||
|
||||
#include <re2/re2.h>
|
||||
|
||||
#include <Analyzer/Identifier.h>
|
||||
#include <Analyzer/IQueryTreeNode.h>
|
||||
#include <Analyzer/ListNode.h>
|
||||
|
||||
#ifdef __clang__
|
||||
# pragma clang diagnostic push
|
||||
# pragma clang diagnostic ignored "-Wzero-as-null-pointer-constant"
|
||||
#endif
|
||||
#include <re2/re2.h>
|
||||
#ifdef __clang__
|
||||
# pragma clang diagnostic pop
|
||||
#endif
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
|
@ -1,12 +1,18 @@
|
||||
#pragma once
|
||||
|
||||
#include <re2/re2.h>
|
||||
|
||||
#include <Analyzer/Identifier.h>
|
||||
#include <Analyzer/IQueryTreeNode.h>
|
||||
#include <Analyzer/ColumnTransformers.h>
|
||||
#include <Parsers/ASTAsterisk.h>
|
||||
|
||||
#ifdef __clang__
|
||||
# pragma clang diagnostic push
|
||||
# pragma clang diagnostic ignored "-Wzero-as-null-pointer-constant"
|
||||
#endif
|
||||
#include <re2/re2.h>
|
||||
#ifdef __clang__
|
||||
# pragma clang diagnostic pop
|
||||
#endif
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
@ -6243,11 +6243,11 @@ void QueryAnalyzer::resolveTableFunction(QueryTreeNodePtr & table_function_node,
|
||||
const auto & insertion_table = scope_context->getInsertionTable();
|
||||
if (!insertion_table.empty())
|
||||
{
|
||||
const auto & insert_structure = DatabaseCatalog::instance()
|
||||
.getTable(insertion_table, scope_context)
|
||||
->getInMemoryMetadataPtr()
|
||||
->getColumns()
|
||||
.getInsertable();
|
||||
const auto & insert_columns = DatabaseCatalog::instance()
|
||||
.getTable(insertion_table, scope_context)
|
||||
->getInMemoryMetadataPtr()
|
||||
->getColumns();
|
||||
const auto & insert_column_names = scope_context->hasInsertionTableColumnNames() ? *scope_context->getInsertionTableColumnNames() : insert_columns.getInsertable().getNames();
|
||||
DB::ColumnsDescription structure_hint;
|
||||
|
||||
bool use_columns_from_insert_query = true;
|
||||
@ -6255,8 +6255,8 @@ void QueryAnalyzer::resolveTableFunction(QueryTreeNodePtr & table_function_node,
|
||||
/// Insert table matches columns against SELECT expression by position, so we want to map
|
||||
/// insert table columns to table function columns through names from SELECT expression.
|
||||
|
||||
auto insert_column = insert_structure.begin();
|
||||
auto insert_structure_end = insert_structure.end(); /// end iterator of the range covered by possible asterisk
|
||||
auto insert_column_name_it = insert_column_names.begin();
|
||||
auto insert_column_names_end = insert_column_names.end(); /// end iterator of the range covered by possible asterisk
|
||||
auto virtual_column_names = table_function_ptr->getVirtualsToCheckBeforeUsingStructureHint();
|
||||
bool asterisk = false;
|
||||
const auto & expression_list = scope.scope_node->as<QueryNode &>().getProjection();
|
||||
@ -6264,7 +6264,7 @@ void QueryAnalyzer::resolveTableFunction(QueryTreeNodePtr & table_function_node,
|
||||
|
||||
/// We want to go through SELECT expression list and correspond each expression to column in insert table
|
||||
/// which type will be used as a hint for the file structure inference.
|
||||
for (; expression != expression_list.end() && insert_column != insert_structure_end; ++expression)
|
||||
for (; expression != expression_list.end() && insert_column_name_it != insert_column_names_end; ++expression)
|
||||
{
|
||||
if (auto * identifier_node = (*expression)->as<IdentifierNode>())
|
||||
{
|
||||
@ -6280,15 +6280,17 @@ void QueryAnalyzer::resolveTableFunction(QueryTreeNodePtr & table_function_node,
|
||||
break;
|
||||
}
|
||||
|
||||
structure_hint.add({ identifier_node->getIdentifier().getFullName(), insert_column->type });
|
||||
ColumnDescription column = insert_columns.get(*insert_column_name_it);
|
||||
column.name = identifier_node->getIdentifier().getFullName();
|
||||
structure_hint.add(std::move(column));
|
||||
}
|
||||
|
||||
/// Once we hit asterisk we want to find end of the range covered by asterisk
|
||||
/// contributing every further SELECT expression to the tail of insert structure
|
||||
if (asterisk)
|
||||
--insert_structure_end;
|
||||
--insert_column_names_end;
|
||||
else
|
||||
++insert_column;
|
||||
++insert_column_name_it;
|
||||
}
|
||||
else if (auto * matcher_node = (*expression)->as<MatcherNode>(); matcher_node && matcher_node->getMatcherType() == MatcherNodeType::ASTERISK)
|
||||
{
|
||||
@ -6322,18 +6324,18 @@ void QueryAnalyzer::resolveTableFunction(QueryTreeNodePtr & table_function_node,
|
||||
/// Once we hit asterisk we want to find end of the range covered by asterisk
|
||||
/// contributing every further SELECT expression to the tail of insert structure
|
||||
if (asterisk)
|
||||
--insert_structure_end;
|
||||
--insert_column_names_end;
|
||||
else
|
||||
++insert_column;
|
||||
++insert_column_name_it;
|
||||
}
|
||||
else
|
||||
{
|
||||
/// Once we hit asterisk we want to find end of the range covered by asterisk
|
||||
/// contributing every further SELECT expression to the tail of insert structure
|
||||
if (asterisk)
|
||||
--insert_structure_end;
|
||||
--insert_column_names_end;
|
||||
else
|
||||
++insert_column;
|
||||
++insert_column_name_it;
|
||||
}
|
||||
}
|
||||
|
||||
@ -6353,8 +6355,8 @@ void QueryAnalyzer::resolveTableFunction(QueryTreeNodePtr & table_function_node,
|
||||
/// Append tail of insert structure to the hint
|
||||
if (asterisk)
|
||||
{
|
||||
for (; insert_column != insert_structure_end; ++insert_column)
|
||||
structure_hint.add({ insert_column->name, insert_column->type });
|
||||
for (; insert_column_name_it != insert_column_names_end; ++insert_column_name_it)
|
||||
structure_hint.add(insert_columns.get(*insert_column_name_it));
|
||||
}
|
||||
|
||||
if (!structure_hint.empty())
|
||||
|
@ -49,6 +49,7 @@ namespace
|
||||
settings.auth_settings.region,
|
||||
context->getRemoteHostFilter(),
|
||||
static_cast<unsigned>(context->getGlobalContext()->getSettingsRef().s3_max_redirects),
|
||||
static_cast<unsigned>(context->getGlobalContext()->getSettingsRef().s3_retry_attempts),
|
||||
context->getGlobalContext()->getSettingsRef().enable_s3_requests_logging,
|
||||
/* for_disk_s3 = */ false, settings.request_settings.get_request_throttler, settings.request_settings.put_request_throttler,
|
||||
s3_uri.uri.getScheme());
|
||||
|
@ -316,7 +316,6 @@ target_link_libraries(clickhouse_common_io
|
||||
boost::context
|
||||
ch_contrib::cityhash
|
||||
ch_contrib::re2
|
||||
ch_contrib::re2_st
|
||||
ch_contrib::zlib
|
||||
pcg_random
|
||||
Poco::Foundation
|
||||
|
@ -433,7 +433,7 @@ void ColumnVector<T>::updatePermutation(IColumn::PermutationSortDirection direct
|
||||
template <typename T>
|
||||
MutableColumnPtr ColumnVector<T>::cloneResized(size_t size) const
|
||||
{
|
||||
auto res = this->create();
|
||||
auto res = this->create(size);
|
||||
|
||||
if (size > 0)
|
||||
{
|
||||
|
@ -397,6 +397,13 @@ public:
|
||||
/// It affects performance only (not correctness).
|
||||
virtual void reserve(size_t /*n*/) {}
|
||||
|
||||
/// Requests the removal of unused capacity.
|
||||
/// It is a non-binding request to reduce the capacity of the underlying container to its size.
|
||||
virtual MutablePtr shrinkToFit() const
|
||||
{
|
||||
return cloneResized(size());
|
||||
}
|
||||
|
||||
/// If we have another column as a source (owner of data), copy all data to ourself and reset source.
|
||||
virtual void ensureOwnership() {}
|
||||
|
||||
|
@ -7,11 +7,19 @@
|
||||
#include <filesystem>
|
||||
#include <format>
|
||||
#include <map>
|
||||
#include <re2/re2.h>
|
||||
|
||||
#include <boost/algorithm/string.hpp>
|
||||
#include <boost/algorithm/string/replace.hpp>
|
||||
|
||||
#ifdef __clang__
|
||||
# pragma clang diagnostic push
|
||||
# pragma clang diagnostic ignored "-Wzero-as-null-pointer-constant"
|
||||
#endif
|
||||
#include <re2/re2.h>
|
||||
#ifdef __clang__
|
||||
# pragma clang diagnostic pop
|
||||
#endif
|
||||
|
||||
namespace fs = std::filesystem;
|
||||
|
||||
namespace DB
|
||||
|
@ -2,7 +2,14 @@
|
||||
#include <Common/StringUtils/StringUtils.h>
|
||||
#include <Common/Exception.h>
|
||||
|
||||
#ifdef __clang__
|
||||
# pragma clang diagnostic push
|
||||
# pragma clang diagnostic ignored "-Wzero-as-null-pointer-constant"
|
||||
#endif
|
||||
#include <re2/re2.h>
|
||||
#ifdef __clang__
|
||||
# pragma clang diagnostic pop
|
||||
#endif
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
@ -8,6 +8,9 @@
|
||||
# include <base/defines.h>
|
||||
# include <simdjson.h>
|
||||
# include "ElementTypes.h"
|
||||
# include <Common/PODArray_fwd.h>
|
||||
# include <Common/PODArray.h>
|
||||
# include <charconv>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
@ -16,6 +19,254 @@ namespace ErrorCodes
|
||||
extern const int CANNOT_ALLOCATE_MEMORY;
|
||||
}
|
||||
|
||||
/// Format elements of basic types into string.
|
||||
/// The original implementation is mini_formatter in simdjson.h. But it is not public API, so we
|
||||
/// add a implementation here.
|
||||
class SimdJSONBasicFormatter
|
||||
{
|
||||
public:
|
||||
explicit SimdJSONBasicFormatter(PaddedPODArray<UInt8> & buffer_) : buffer(buffer_) {}
|
||||
inline void comma() { oneChar(','); }
|
||||
/** Start an array, prints [ **/
|
||||
inline void startArray() { oneChar('['); }
|
||||
/** End an array, prints ] **/
|
||||
inline void endArray() { oneChar(']'); }
|
||||
/** Start an array, prints { **/
|
||||
inline void startObject() { oneChar('{'); }
|
||||
/** Start an array, prints } **/
|
||||
inline void endObject() { oneChar('}'); }
|
||||
/** Prints a true **/
|
||||
inline void trueAtom()
|
||||
{
|
||||
const char * s = "true";
|
||||
buffer.insert(s, s + 4);
|
||||
}
|
||||
/** Prints a false **/
|
||||
inline void falseAtom()
|
||||
{
|
||||
const char * s = "false";
|
||||
buffer.insert(s, s + 5);
|
||||
}
|
||||
/** Prints a null **/
|
||||
inline void nullAtom()
|
||||
{
|
||||
const char * s = "null";
|
||||
buffer.insert(s, s + 4);
|
||||
}
|
||||
/** Prints a number **/
|
||||
inline void number(int64_t x)
|
||||
{
|
||||
char number_buffer[24];
|
||||
auto res = std::to_chars(number_buffer, number_buffer + sizeof(number_buffer), x);
|
||||
buffer.insert(number_buffer, res.ptr);
|
||||
}
|
||||
/** Prints a number **/
|
||||
inline void number(uint64_t x)
|
||||
{
|
||||
char number_buffer[24];
|
||||
auto res = std::to_chars(number_buffer, number_buffer + sizeof(number_buffer), x);
|
||||
buffer.insert(number_buffer, res.ptr);
|
||||
}
|
||||
/** Prints a number **/
|
||||
inline void number(double x)
|
||||
{
|
||||
char number_buffer[24];
|
||||
auto res = std::to_chars(number_buffer, number_buffer + sizeof(number_buffer), x);
|
||||
buffer.insert(number_buffer, res.ptr);
|
||||
}
|
||||
/** Prints a key (string + colon) **/
|
||||
inline void key(std::string_view unescaped)
|
||||
{
|
||||
string(unescaped);
|
||||
oneChar(':');
|
||||
}
|
||||
/** Prints a string. The string is escaped as needed. **/
|
||||
inline void string(std::string_view unescaped)
|
||||
{
|
||||
oneChar('\"');
|
||||
size_t i = 0;
|
||||
// Fast path for the case where we have no control character, no ", and no backslash.
|
||||
// This should include most keys.
|
||||
//
|
||||
// We would like to use 'bool' but some compilers take offense to bitwise operation
|
||||
// with bool types.
|
||||
constexpr static char needs_escaping[] = {
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
|
||||
for (; i + 8 <= unescaped.length(); i += 8)
|
||||
{
|
||||
// Poor's man vectorization. This could get much faster if we used SIMD.
|
||||
//
|
||||
// It is not the case that replacing '|' with '||' would be neutral performance-wise.
|
||||
if (needs_escaping[uint8_t(unescaped[i])] | needs_escaping[uint8_t(unescaped[i + 1])]
|
||||
| needs_escaping[uint8_t(unescaped[i + 2])] | needs_escaping[uint8_t(unescaped[i + 3])]
|
||||
| needs_escaping[uint8_t(unescaped[i + 4])] | needs_escaping[uint8_t(unescaped[i + 5])]
|
||||
| needs_escaping[uint8_t(unescaped[i + 6])] | needs_escaping[uint8_t(unescaped[i + 7])])
|
||||
{
|
||||
break;
|
||||
}
|
||||
}
|
||||
for (; i < unescaped.length(); i++)
|
||||
{
|
||||
if (needs_escaping[uint8_t(unescaped[i])])
|
||||
{
|
||||
break;
|
||||
}
|
||||
}
|
||||
// The following is also possible and omits a 256-byte table, but it is slower:
|
||||
// for (; (i < unescaped.length()) && (uint8_t(unescaped[i]) > 0x1F)
|
||||
// && (unescaped[i] != '\"') && (unescaped[i] != '\\'); i++) {}
|
||||
|
||||
// At least for long strings, the following should be fast. We could
|
||||
// do better by integrating the checks and the insertion.
|
||||
buffer.insert(unescaped.data(), unescaped.data() + i);
|
||||
// We caught a control character if we enter this loop (slow).
|
||||
// Note that we are do not restart from the beginning, but rather we continue
|
||||
// from the point where we encountered something that requires escaping.
|
||||
for (; i < unescaped.length(); i++)
|
||||
{
|
||||
switch (unescaped[i])
|
||||
{
|
||||
case '\"': {
|
||||
const char * s = "\\\"";
|
||||
buffer.insert(s, s + 2);
|
||||
}
|
||||
break;
|
||||
case '\\': {
|
||||
const char * s = "\\\\";
|
||||
buffer.insert(s, s + 2);
|
||||
}
|
||||
break;
|
||||
default:
|
||||
if (uint8_t(unescaped[i]) <= 0x1F)
|
||||
{
|
||||
// If packed, this uses 8 * 32 bytes.
|
||||
// Note that we expect most compilers to embed this code in the data
|
||||
// section.
|
||||
constexpr static simdjson::escape_sequence escaped[32] = {
|
||||
{6, "\\u0000"}, {6, "\\u0001"}, {6, "\\u0002"}, {6, "\\u0003"}, {6, "\\u0004"}, {6, "\\u0005"}, {6, "\\u0006"},
|
||||
{6, "\\u0007"}, {2, "\\b"}, {2, "\\t"}, {2, "\\n"}, {6, "\\u000b"}, {2, "\\f"}, {2, "\\r"},
|
||||
{6, "\\u000e"}, {6, "\\u000f"}, {6, "\\u0010"}, {6, "\\u0011"}, {6, "\\u0012"}, {6, "\\u0013"}, {6, "\\u0014"},
|
||||
{6, "\\u0015"}, {6, "\\u0016"}, {6, "\\u0017"}, {6, "\\u0018"}, {6, "\\u0019"}, {6, "\\u001a"}, {6, "\\u001b"},
|
||||
{6, "\\u001c"}, {6, "\\u001d"}, {6, "\\u001e"}, {6, "\\u001f"}};
|
||||
auto u = escaped[uint8_t(unescaped[i])];
|
||||
buffer.insert(u.string, u.string + u.length);
|
||||
}
|
||||
else
|
||||
{
|
||||
oneChar(unescaped[i]);
|
||||
}
|
||||
} // switch
|
||||
} // for
|
||||
oneChar('\"');
|
||||
}
|
||||
|
||||
inline void oneChar(char c)
|
||||
{
|
||||
buffer.push_back(c);
|
||||
}
|
||||
private:
|
||||
PaddedPODArray<UInt8> & buffer;
|
||||
|
||||
};
|
||||
|
||||
|
||||
/// Format object elements into string, element, array, object, kv-pair.
|
||||
/// Similar to string_builder in simdjson.h.
|
||||
class SimdJSONElementFormatter
|
||||
{
|
||||
public:
|
||||
explicit SimdJSONElementFormatter(PaddedPODArray<UInt8> & buffer_) : format(buffer_) {}
|
||||
/** Append an element to the builder (to be printed) **/
|
||||
inline void append(simdjson::dom::element value)
|
||||
{
|
||||
switch (value.type())
|
||||
{
|
||||
case simdjson::dom::element_type::UINT64: {
|
||||
format.number(value.get_uint64().value_unsafe());
|
||||
break;
|
||||
}
|
||||
case simdjson::dom::element_type::INT64: {
|
||||
format.number(value.get_int64().value_unsafe());
|
||||
break;
|
||||
}
|
||||
case simdjson::dom::element_type::DOUBLE: {
|
||||
format.number(value.get_double().value_unsafe());
|
||||
break;
|
||||
}
|
||||
case simdjson::dom::element_type::STRING: {
|
||||
format.string(value.get_string().value_unsafe());
|
||||
break;
|
||||
}
|
||||
case simdjson::dom::element_type::BOOL: {
|
||||
if (value.get_bool().value_unsafe())
|
||||
format.trueAtom();
|
||||
else
|
||||
format.falseAtom();
|
||||
break;
|
||||
}
|
||||
case simdjson::dom::element_type::NULL_VALUE: {
|
||||
format.nullAtom();
|
||||
break;
|
||||
}
|
||||
case simdjson::dom::element_type::ARRAY: {
|
||||
append(value.get_array().value_unsafe());
|
||||
break;
|
||||
}
|
||||
case simdjson::dom::element_type::OBJECT: {
|
||||
append(value.get_object().value_unsafe());
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
/** Append an array to the builder (to be printed) **/
|
||||
inline void append(simdjson::dom::array value)
|
||||
{
|
||||
format.startArray();
|
||||
auto iter = value.begin();
|
||||
auto end = value.end();
|
||||
if (iter != end)
|
||||
{
|
||||
append(*iter);
|
||||
for (++iter; iter != end; ++iter)
|
||||
{
|
||||
format.comma();
|
||||
append(*iter);
|
||||
}
|
||||
}
|
||||
format.endArray();
|
||||
}
|
||||
|
||||
inline void append(simdjson::dom::object value)
|
||||
{
|
||||
format.startObject();
|
||||
auto pair = value.begin();
|
||||
auto end = value.end();
|
||||
if (pair != end)
|
||||
{
|
||||
append(*pair);
|
||||
for (++pair; pair != end; ++pair)
|
||||
{
|
||||
format.comma();
|
||||
append(*pair);
|
||||
}
|
||||
}
|
||||
format.endObject();
|
||||
}
|
||||
|
||||
inline void append(simdjson::dom::key_value_pair kv)
|
||||
{
|
||||
format.key(kv.key);
|
||||
append(kv.value);
|
||||
}
|
||||
private:
|
||||
SimdJSONBasicFormatter format;
|
||||
};
|
||||
|
||||
/// This class can be used as an argument for the template class FunctionJSON.
|
||||
/// It provides ability to parse JSONs using simdjson library.
|
||||
struct SimdJSONParser
|
||||
|
@ -441,8 +441,7 @@ finish:
|
||||
}
|
||||
}
|
||||
|
||||
template <bool thread_safe>
|
||||
void OptimizedRegularExpressionImpl<thread_safe>::analyze(
|
||||
void OptimizedRegularExpression::analyze(
|
||||
std::string_view regexp_,
|
||||
std::string & required_substring,
|
||||
bool & is_trivial,
|
||||
@ -467,8 +466,7 @@ catch (...)
|
||||
LOG_ERROR(&Poco::Logger::get("OptimizeRegularExpression"), "Analyze RegularExpression failed, got error: {}", DB::getCurrentExceptionMessage(false));
|
||||
}
|
||||
|
||||
template <bool thread_safe>
|
||||
OptimizedRegularExpressionImpl<thread_safe>::OptimizedRegularExpressionImpl(const std::string & regexp_, int options)
|
||||
OptimizedRegularExpression::OptimizedRegularExpression(const std::string & regexp_, int options)
|
||||
{
|
||||
std::vector<std::string> alternatives_dummy; /// this vector extracts patterns a,b,c from pattern (a|b|c). for now it's not used.
|
||||
analyze(regexp_, required_substring, is_trivial, required_substring_is_prefix, alternatives_dummy);
|
||||
@ -486,7 +484,7 @@ OptimizedRegularExpressionImpl<thread_safe>::OptimizedRegularExpressionImpl(cons
|
||||
if (!is_trivial)
|
||||
{
|
||||
/// Compile the re2 regular expression.
|
||||
typename RegexType::Options regexp_options;
|
||||
typename re2::RE2::Options regexp_options;
|
||||
|
||||
/// Never write error messages to stderr. It's ignorant to do it from library code.
|
||||
regexp_options.set_log_errors(false);
|
||||
@ -497,7 +495,7 @@ OptimizedRegularExpressionImpl<thread_safe>::OptimizedRegularExpressionImpl(cons
|
||||
if (is_dot_nl)
|
||||
regexp_options.set_dot_nl(true);
|
||||
|
||||
re2 = std::make_unique<RegexType>(regexp_, regexp_options);
|
||||
re2 = std::make_unique<re2::RE2>(regexp_, regexp_options);
|
||||
if (!re2->ok())
|
||||
{
|
||||
throw DB::Exception(DB::ErrorCodes::CANNOT_COMPILE_REGEXP,
|
||||
@ -527,8 +525,7 @@ OptimizedRegularExpressionImpl<thread_safe>::OptimizedRegularExpressionImpl(cons
|
||||
}
|
||||
}
|
||||
|
||||
template <bool thread_safe>
|
||||
OptimizedRegularExpressionImpl<thread_safe>::OptimizedRegularExpressionImpl(OptimizedRegularExpressionImpl && rhs) noexcept
|
||||
OptimizedRegularExpression::OptimizedRegularExpression(OptimizedRegularExpression && rhs) noexcept
|
||||
: is_trivial(rhs.is_trivial)
|
||||
, required_substring_is_prefix(rhs.required_substring_is_prefix)
|
||||
, is_case_insensitive(rhs.is_case_insensitive)
|
||||
@ -545,8 +542,7 @@ OptimizedRegularExpressionImpl<thread_safe>::OptimizedRegularExpressionImpl(Opti
|
||||
}
|
||||
}
|
||||
|
||||
template <bool thread_safe>
|
||||
bool OptimizedRegularExpressionImpl<thread_safe>::match(const char * subject, size_t subject_size) const
|
||||
bool OptimizedRegularExpression::match(const char * subject, size_t subject_size) const
|
||||
{
|
||||
const UInt8 * haystack = reinterpret_cast<const UInt8 *>(subject);
|
||||
const UInt8 * haystack_end = haystack + subject_size;
|
||||
@ -577,13 +573,12 @@ bool OptimizedRegularExpressionImpl<thread_safe>::match(const char * subject, si
|
||||
}
|
||||
}
|
||||
|
||||
return re2->Match({subject, subject_size}, 0, subject_size, RegexType::UNANCHORED, nullptr, 0);
|
||||
return re2->Match({subject, subject_size}, 0, subject_size, re2::RE2::UNANCHORED, nullptr, 0);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
template <bool thread_safe>
|
||||
bool OptimizedRegularExpressionImpl<thread_safe>::match(const char * subject, size_t subject_size, Match & match) const
|
||||
bool OptimizedRegularExpression::match(const char * subject, size_t subject_size, Match & match) const
|
||||
{
|
||||
const UInt8 * haystack = reinterpret_cast<const UInt8 *>(subject);
|
||||
const UInt8 * haystack_end = haystack + subject_size;
|
||||
@ -624,7 +619,7 @@ bool OptimizedRegularExpressionImpl<thread_safe>::match(const char * subject, si
|
||||
|
||||
std::string_view piece;
|
||||
|
||||
if (!RegexType::PartialMatch({subject, subject_size}, *re2, &piece))
|
||||
if (!re2::RE2::PartialMatch({subject, subject_size}, *re2, &piece))
|
||||
return false;
|
||||
else
|
||||
{
|
||||
@ -636,8 +631,7 @@ bool OptimizedRegularExpressionImpl<thread_safe>::match(const char * subject, si
|
||||
}
|
||||
|
||||
|
||||
template <bool thread_safe>
|
||||
unsigned OptimizedRegularExpressionImpl<thread_safe>::match(const char * subject, size_t subject_size, MatchVec & matches, unsigned limit) const
|
||||
unsigned OptimizedRegularExpression::match(const char * subject, size_t subject_size, MatchVec & matches, unsigned limit) const
|
||||
{
|
||||
const UInt8 * haystack = reinterpret_cast<const UInt8 *>(subject);
|
||||
const UInt8 * haystack_end = haystack + subject_size;
|
||||
@ -695,7 +689,7 @@ unsigned OptimizedRegularExpressionImpl<thread_safe>::match(const char * subject
|
||||
{subject, subject_size},
|
||||
0,
|
||||
subject_size,
|
||||
RegexType::UNANCHORED,
|
||||
re2::RE2::UNANCHORED,
|
||||
pieces.data(),
|
||||
static_cast<int>(pieces.size())))
|
||||
{
|
||||
@ -721,6 +715,3 @@ unsigned OptimizedRegularExpressionImpl<thread_safe>::match(const char * subject
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
template class OptimizedRegularExpressionImpl<true>;
|
||||
template class OptimizedRegularExpressionImpl<false>;
|
||||
|
@ -6,9 +6,15 @@
|
||||
#include <optional>
|
||||
#include <Common/StringSearcher.h>
|
||||
#include "config.h"
|
||||
#include <re2/re2.h>
|
||||
#include <re2_st/re2.h>
|
||||
|
||||
#ifdef __clang__
|
||||
# pragma clang diagnostic push
|
||||
# pragma clang diagnostic ignored "-Wzero-as-null-pointer-constant"
|
||||
#endif
|
||||
#include <re2/re2.h>
|
||||
#ifdef __clang__
|
||||
# pragma clang diagnostic pop
|
||||
#endif
|
||||
|
||||
/** Uses two ways to optimize a regular expression:
|
||||
* 1. If the regular expression is trivial (reduces to finding a substring in a string),
|
||||
@ -37,8 +43,7 @@ namespace OptimizedRegularExpressionDetails
|
||||
};
|
||||
}
|
||||
|
||||
template <bool thread_safe>
|
||||
class OptimizedRegularExpressionImpl
|
||||
class OptimizedRegularExpression
|
||||
{
|
||||
public:
|
||||
enum Options
|
||||
@ -51,12 +56,10 @@ public:
|
||||
using Match = OptimizedRegularExpressionDetails::Match;
|
||||
using MatchVec = std::vector<Match>;
|
||||
|
||||
using RegexType = std::conditional_t<thread_safe, re2::RE2, re2_st::RE2>;
|
||||
|
||||
OptimizedRegularExpressionImpl(const std::string & regexp_, int options = 0); /// NOLINT
|
||||
OptimizedRegularExpression(const std::string & regexp_, int options = 0); /// NOLINT
|
||||
/// StringSearcher store pointers to required_substring, it must be updated on move.
|
||||
OptimizedRegularExpressionImpl(OptimizedRegularExpressionImpl && rhs) noexcept;
|
||||
OptimizedRegularExpressionImpl(const OptimizedRegularExpressionImpl & rhs) = delete;
|
||||
OptimizedRegularExpression(OptimizedRegularExpression && rhs) noexcept;
|
||||
OptimizedRegularExpression(const OptimizedRegularExpression & rhs) = delete;
|
||||
|
||||
bool match(const std::string & subject) const
|
||||
{
|
||||
@ -85,7 +88,7 @@ public:
|
||||
unsigned getNumberOfSubpatterns() const { return number_of_subpatterns; }
|
||||
|
||||
/// Get the regexp re2 or nullptr if the pattern is trivial (for output to the log).
|
||||
const std::unique_ptr<RegexType> & getRE2() const { return re2; }
|
||||
const std::unique_ptr<re2::RE2> & getRE2() const { return re2; }
|
||||
|
||||
void getAnalyzeResult(std::string & out_required_substring, bool & out_is_trivial, bool & out_required_substring_is_prefix) const
|
||||
{
|
||||
@ -110,9 +113,6 @@ private:
|
||||
std::string required_substring;
|
||||
std::optional<DB::ASCIICaseSensitiveStringSearcher> case_sensitive_substring_searcher;
|
||||
std::optional<DB::ASCIICaseInsensitiveStringSearcher> case_insensitive_substring_searcher;
|
||||
std::unique_ptr<RegexType> re2;
|
||||
std::unique_ptr<re2::RE2> re2;
|
||||
unsigned number_of_subpatterns;
|
||||
};
|
||||
|
||||
using OptimizedRegularExpression = OptimizedRegularExpressionImpl<true>;
|
||||
using OptimizedRegularExpressionSingleThreaded = OptimizedRegularExpressionImpl<false>;
|
||||
|
@ -1,4 +1,3 @@
|
||||
#include <re2/re2.h>
|
||||
#include <Poco/URI.h>
|
||||
#include <Poco/Util/AbstractConfiguration.h>
|
||||
#include <Common/RemoteHostFilter.h>
|
||||
@ -6,6 +5,14 @@
|
||||
#include <Common/Exception.h>
|
||||
#include <IO/WriteHelpers.h>
|
||||
|
||||
#ifdef __clang__
|
||||
# pragma clang diagnostic push
|
||||
# pragma clang diagnostic ignored "-Wzero-as-null-pointer-constant"
|
||||
#endif
|
||||
#include <re2/re2.h>
|
||||
#ifdef __clang__
|
||||
# pragma clang diagnostic pop
|
||||
#endif
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
@ -4,7 +4,14 @@
|
||||
#include <string>
|
||||
#include <atomic>
|
||||
|
||||
#ifdef __clang__
|
||||
# pragma clang diagnostic push
|
||||
# pragma clang diagnostic ignored "-Wzero-as-null-pointer-constant"
|
||||
#endif
|
||||
#include <re2/re2.h>
|
||||
#ifdef __clang__
|
||||
# pragma clang diagnostic pop
|
||||
#endif
|
||||
|
||||
#include <Poco/Util/AbstractConfiguration.h>
|
||||
|
||||
|
@ -701,7 +701,7 @@ void ZooKeeper::receiveThread()
|
||||
|
||||
if (in->poll(max_wait_us))
|
||||
{
|
||||
if (requests_queue.isFinished())
|
||||
if (finalization_started.test())
|
||||
break;
|
||||
|
||||
receiveEvent();
|
||||
|
@ -2,11 +2,18 @@
|
||||
#include <IO/WriteBufferFromString.h>
|
||||
#include <IO/ReadBufferFromString.h>
|
||||
#include <IO/Operators.h>
|
||||
#include <re2/re2.h>
|
||||
#include <algorithm>
|
||||
#include <sstream>
|
||||
#include <iomanip>
|
||||
|
||||
#ifdef __clang__
|
||||
# pragma clang diagnostic push
|
||||
# pragma clang diagnostic ignored "-Wzero-as-null-pointer-constant"
|
||||
#endif
|
||||
#include <re2/re2.h>
|
||||
#ifdef __clang__
|
||||
# pragma clang diagnostic pop
|
||||
#endif
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
@ -1,7 +1,14 @@
|
||||
#include <Common/parseGlobs.h>
|
||||
#include <re2/re2.h>
|
||||
#include <gtest/gtest.h>
|
||||
|
||||
#ifdef __clang__
|
||||
# pragma clang diagnostic push
|
||||
# pragma clang diagnostic ignored "-Wzero-as-null-pointer-constant"
|
||||
#endif
|
||||
#include <re2/re2.h>
|
||||
#ifdef __clang__
|
||||
# pragma clang diagnostic pop
|
||||
#endif
|
||||
|
||||
using namespace DB;
|
||||
|
||||
|
@ -80,6 +80,7 @@ void KeeperSnapshotManagerS3::updateS3Configuration(const Poco::Util::AbstractCo
|
||||
auto headers = auth_settings.headers;
|
||||
|
||||
static constexpr size_t s3_max_redirects = 10;
|
||||
static constexpr size_t s3_retry_attempts = 10;
|
||||
static constexpr bool enable_s3_requests_logging = false;
|
||||
|
||||
if (!new_uri.key.empty())
|
||||
@ -90,7 +91,7 @@ void KeeperSnapshotManagerS3::updateS3Configuration(const Poco::Util::AbstractCo
|
||||
|
||||
S3::PocoHTTPClientConfiguration client_configuration = S3::ClientFactory::instance().createClientConfiguration(
|
||||
auth_settings.region,
|
||||
RemoteHostFilter(), s3_max_redirects,
|
||||
RemoteHostFilter(), s3_max_redirects, s3_retry_attempts,
|
||||
enable_s3_requests_logging,
|
||||
/* for_disk_s3 = */ false, /* get_request_throttler = */ {}, /* put_request_throttler = */ {},
|
||||
new_uri.uri.getScheme());
|
||||
|
@ -1319,26 +1319,32 @@ TEST_P(CoordinationTest, SnapshotableHashMapDataSize)
|
||||
n2.setData("123456");
|
||||
n2.addChild("");
|
||||
|
||||
/// Note: Below, we check in many cases only that getApproximateDataSize() > 0. This is because
|
||||
/// the SnapshotableHashTable's approximate data size includes Node's sizeInBytes(). The
|
||||
/// latter includes sizeof(absl::flat_hash_set) which is surprisingly not constant across
|
||||
/// different runs. The approximate size is only used for statistics accounting, so this
|
||||
/// should be okay.
|
||||
|
||||
world.disableSnapshotMode();
|
||||
world.insert("world", n1);
|
||||
EXPECT_EQ(world.getApproximateDataSize(), 193);
|
||||
EXPECT_GT(world.getApproximateDataSize(), 0);
|
||||
world.updateValue("world", [&](Node & value) { value = n2; });
|
||||
EXPECT_EQ(world.getApproximateDataSize(), 211);
|
||||
EXPECT_GT(world.getApproximateDataSize(), 0);
|
||||
|
||||
world.erase("world");
|
||||
EXPECT_EQ(world.getApproximateDataSize(), 0);
|
||||
|
||||
world.enableSnapshotMode(100000);
|
||||
world.insert("world", n1);
|
||||
EXPECT_EQ(world.getApproximateDataSize(), 193);
|
||||
EXPECT_GT(world.getApproximateDataSize(), 0);
|
||||
world.updateValue("world", [&](Node & value) { value = n2; });
|
||||
EXPECT_EQ(world.getApproximateDataSize(), 404);
|
||||
EXPECT_GT(world.getApproximateDataSize(), 0);
|
||||
|
||||
world.clearOutdatedNodes();
|
||||
EXPECT_EQ(world.getApproximateDataSize(), 211);
|
||||
EXPECT_GT(world.getApproximateDataSize(), 0);
|
||||
|
||||
world.erase("world");
|
||||
EXPECT_EQ(world.getApproximateDataSize(), 211);
|
||||
EXPECT_GT(world.getApproximateDataSize(), 0);
|
||||
|
||||
world.clear();
|
||||
EXPECT_EQ(world.getApproximateDataSize(), 0);
|
||||
|
@ -597,6 +597,14 @@ Block Block::sortColumns() const
|
||||
return sorted_block;
|
||||
}
|
||||
|
||||
Block Block::shrinkToFit() const
|
||||
{
|
||||
Columns new_columns(data.size(), nullptr);
|
||||
for (size_t i = 0; i < data.size(); ++i)
|
||||
new_columns[i] = data[i].column->shrinkToFit();
|
||||
return cloneWithColumns(new_columns);
|
||||
}
|
||||
|
||||
|
||||
const ColumnsWithTypeAndName & Block::getColumnsWithTypeAndName() const
|
||||
{
|
||||
|
@ -149,6 +149,9 @@ public:
|
||||
/** Get a block with columns that have been rearranged in the order of their names. */
|
||||
Block sortColumns() const;
|
||||
|
||||
/** See IColumn::shrinkToFit() */
|
||||
Block shrinkToFit() const;
|
||||
|
||||
void clear();
|
||||
void swap(Block & other) noexcept;
|
||||
|
||||
|
@ -240,4 +240,19 @@ size_t NamesAndTypesList::getPosByName(const std::string &name) const noexcept
|
||||
return pos;
|
||||
}
|
||||
|
||||
String NamesAndTypesList::toNamesAndTypesDescription() const
|
||||
{
|
||||
WriteBufferFromOwnString buf;
|
||||
bool first = true;
|
||||
for (const auto & name_and_type : *this)
|
||||
{
|
||||
if (!std::exchange(first, false))
|
||||
writeCString(", ", buf);
|
||||
writeBackQuotedString(name_and_type.name, buf);
|
||||
writeChar(' ', buf);
|
||||
writeString(name_and_type.type->getName(), buf);
|
||||
}
|
||||
return buf.str();
|
||||
}
|
||||
|
||||
}
|
||||
|
@ -122,6 +122,8 @@ public:
|
||||
|
||||
/// Try to get column position by name, returns number of columns if column isn't found
|
||||
size_t getPosByName(const std::string & name) const noexcept;
|
||||
|
||||
String toNamesAndTypesDescription() const;
|
||||
};
|
||||
|
||||
using NamesAndTypesLists = std::vector<NamesAndTypesList>;
|
||||
|
@ -663,6 +663,7 @@ class IColumn;
|
||||
M(SetOperationMode, except_default_mode, SetOperationMode::ALL, "Set default mode in EXCEPT query. Possible values: empty string, 'ALL', 'DISTINCT'. If empty, query without mode will throw exception.", 0) \
|
||||
M(Bool, optimize_aggregators_of_group_by_keys, true, "Eliminates min/max/any/anyLast aggregators of GROUP BY keys in SELECT section", 0) \
|
||||
M(Bool, optimize_group_by_function_keys, true, "Eliminates functions of other keys in GROUP BY section", 0) \
|
||||
M(Bool, optimize_group_by_constant_keys, true, "Optimize GROUP BY when all keys in block are constant", 0) \
|
||||
M(Bool, legacy_column_name_of_tuple_literal, false, "List all names of element of large tuple literals in their column names instead of hash. This settings exists only for compatibility reasons. It makes sense to set to 'true', while doing rolling update of cluster from version lower than 21.7 to higher.", 0) \
|
||||
\
|
||||
M(Bool, query_plan_enable_optimizations, true, "Apply optimizations to query plan", 0) \
|
||||
@ -800,8 +801,10 @@ class IColumn;
|
||||
M(UInt64, extract_kvp_max_pairs_per_row, 1000, "Max number pairs that can be produced by extractKeyValuePairs function. Used to safeguard against consuming too much memory.", 0) \
|
||||
M(Timezone, session_timezone, "", "This setting can be removed in the future due to potential caveats. It is experimental and is not suitable for production usage. The default timezone for current session or query. The server default timezone if empty.", 0) \
|
||||
M(Bool, allow_create_index_without_type, false, "Allow CREATE INDEX query without TYPE. Query will be ignored. Made for SQL compatibility tests.", 0)\
|
||||
M(Bool, create_index_ignore_unique, false, "Ignore UNIQUE keyword in CREATE UNIQUE INDEX. Made for SQL compatibility tests.", 0)\
|
||||
// End of COMMON_SETTINGS
|
||||
M(Bool, create_index_ignore_unique, false, "Ignore UNIQUE keyword in CREATE UNIQUE INDEX. Made for SQL compatibility tests.", 0) \
|
||||
M(Bool, ignore_access_denied_multidirectory_globs, false, "Ignore access denied errors when processing multi-directory globs for file & HDFS.", 0)\
|
||||
|
||||
// End of COMMON_SETTINGS
|
||||
// Please add settings related to formats into the FORMAT_FACTORY_SETTINGS and move obsolete settings to OBSOLETE_SETTINGS.
|
||||
|
||||
#define MAKE_OBSOLETE(M, TYPE, NAME, DEFAULT) \
|
||||
|
@ -80,7 +80,8 @@ namespace SettingsChangesHistory
|
||||
/// It's used to implement `compatibility` setting (see https://github.com/ClickHouse/ClickHouse/issues/35972)
|
||||
static std::map<ClickHouseVersion, SettingsChangesHistory::SettingsChanges> settings_changes_history =
|
||||
{
|
||||
{"23.9", {{"input_format_json_try_infer_named_tuples_from_objects", false, true, "Try to infer named Tuples from JSON objects by default"}}},
|
||||
{"23.9", {{"optimize_group_by_constant_keys", false, true, "Optimize group by constant keys by default"},
|
||||
{"input_format_json_try_infer_named_tuples_from_objects", false, true, "Try to infer named Tuples from JSON objects by default"}}},
|
||||
{"23.8", {{"rewrite_count_distinct_if_with_count_distinct_implementation", false, true, "Rewrite countDistinctIf with count_distinct_implementation configuration"}}},
|
||||
{"23.7", {{"function_sleep_max_microseconds_per_block", 0, 3000000, "In previous versions, the maximum sleep time of 3 seconds was applied only for `sleep`, but not for `sleepEachRow` function. In the new version, we introduce this setting. If you set compatibility with the previous versions, we will disable the limit altogether."}}},
|
||||
{"23.6", {{"http_send_timeout", 180, 30, "3 minutes seems crazy long. Note that this is timeout for a single network write call, not for the whole upload operation."},
|
||||
|
@ -105,7 +105,7 @@ ASTPtr DatabaseDictionary::getCreateTableQueryImpl(const String & table_name, Co
|
||||
|
||||
auto names_and_types = StorageDictionary::getNamesAndTypes(ExternalDictionariesLoader::getDictionaryStructure(*load_result.config));
|
||||
buffer << "CREATE TABLE " << backQuoteIfNeed(getDatabaseName()) << '.' << backQuoteIfNeed(table_name) << " (";
|
||||
buffer << StorageDictionary::generateNamesAndTypesDescription(names_and_types);
|
||||
buffer << names_and_types.toNamesAndTypesDescription();
|
||||
buffer << ") Engine = Dictionary(" << backQuoteIfNeed(table_name) << ")";
|
||||
}
|
||||
|
||||
|
@ -15,10 +15,18 @@
|
||||
#include <TableFunctions/TableFunctionFactory.h>
|
||||
|
||||
#include <Poco/URI.h>
|
||||
#include <re2/re2.h>
|
||||
|
||||
#include <filesystem>
|
||||
|
||||
#ifdef __clang__
|
||||
# pragma clang diagnostic push
|
||||
# pragma clang diagnostic ignored "-Wzero-as-null-pointer-constant"
|
||||
#endif
|
||||
#include <re2/re2.h>
|
||||
#ifdef __clang__
|
||||
# pragma clang diagnostic pop
|
||||
#endif
|
||||
|
||||
namespace fs = std::filesystem;
|
||||
|
||||
namespace DB
|
||||
|
@ -117,14 +117,14 @@ struct RegExpTreeDictionary::RegexTreeNode
|
||||
UInt64 id;
|
||||
UInt64 parent_id;
|
||||
std::string regex;
|
||||
re2_st::RE2 searcher;
|
||||
re2::RE2 searcher;
|
||||
|
||||
RegexTreeNode(UInt64 id_, UInt64 parent_id_, const String & regex_, const re2_st::RE2::Options & regexp_options):
|
||||
RegexTreeNode(UInt64 id_, UInt64 parent_id_, const String & regex_, const re2::RE2::Options & regexp_options):
|
||||
id(id_), parent_id(parent_id_), regex(regex_), searcher(regex_, regexp_options) {}
|
||||
|
||||
bool match(const char * haystack, size_t size) const
|
||||
{
|
||||
return searcher.Match(haystack, 0, size, re2_st::RE2::Anchor::UNANCHORED, nullptr, 0);
|
||||
return searcher.Match(haystack, 0, size, re2::RE2::Anchor::UNANCHORED, nullptr, 0);
|
||||
}
|
||||
|
||||
struct AttributeValue
|
||||
@ -204,7 +204,7 @@ void RegExpTreeDictionary::initRegexNodes(Block & block)
|
||||
throw Exception(ErrorCodes::INCORRECT_DICTIONARY_DEFINITION, "There are invalid id {}", id);
|
||||
|
||||
|
||||
re2_st::RE2::Options regexp_options;
|
||||
re2::RE2::Options regexp_options;
|
||||
regexp_options.set_log_errors(false);
|
||||
regexp_options.set_case_sensitive(!flag_case_insensitive);
|
||||
regexp_options.set_dot_nl(flag_dotall);
|
||||
@ -480,11 +480,11 @@ public:
|
||||
inline size_t attributesFull() const { return n_full_attributes; }
|
||||
};
|
||||
|
||||
std::pair<String, bool> processBackRefs(const String & data, const re2_st::RE2 & searcher, const std::vector<StringPiece> & pieces)
|
||||
std::pair<String, bool> processBackRefs(const String & data, const re2::RE2 & searcher, const std::vector<StringPiece> & pieces)
|
||||
{
|
||||
std::string_view matches[10];
|
||||
String result;
|
||||
searcher.Match({data.data(), data.size()}, 0, data.size(), re2_st::RE2::Anchor::UNANCHORED, matches, 10);
|
||||
searcher.Match({data.data(), data.size()}, 0, data.size(), re2::RE2::Anchor::UNANCHORED, matches, 10);
|
||||
/// if the pattern is a single '$1' but fails to match, we would use the default value.
|
||||
if (pieces.size() == 1 && pieces[0].ref_num >= 0 && pieces[0].ref_num < 10 && matches[pieces[0].ref_num].empty())
|
||||
return std::make_pair(result, true);
|
||||
|
@ -4,10 +4,18 @@
|
||||
|
||||
#include <Common/Exception.h>
|
||||
#include <optional>
|
||||
#include <re2/re2.h>
|
||||
#include <azure/identity/managed_identity_credential.hpp>
|
||||
#include <Poco/Util/AbstractConfiguration.h>
|
||||
|
||||
#ifdef __clang__
|
||||
# pragma clang diagnostic push
|
||||
# pragma clang diagnostic ignored "-Wzero-as-null-pointer-constant"
|
||||
#endif
|
||||
#include <re2/re2.h>
|
||||
#ifdef __clang__
|
||||
# pragma clang diagnostic pop
|
||||
#endif
|
||||
|
||||
using namespace Azure::Storage::Blobs;
|
||||
|
||||
|
||||
|
@ -5,7 +5,15 @@
|
||||
#include <Disks/ObjectStorages/DiskObjectStorageRemoteMetadataRestoreHelper.h>
|
||||
#include <Disks/ObjectStorages/IMetadataStorage.h>
|
||||
#include <Disks/ObjectStorages/DiskObjectStorageTransaction.h>
|
||||
|
||||
#ifdef __clang__
|
||||
# pragma clang diagnostic push
|
||||
# pragma clang diagnostic ignored "-Wzero-as-null-pointer-constant"
|
||||
#endif
|
||||
#include <re2/re2.h>
|
||||
#ifdef __clang__
|
||||
# pragma clang diagnostic pop
|
||||
#endif
|
||||
|
||||
namespace CurrentMetrics
|
||||
{
|
||||
|
@ -52,6 +52,7 @@ std::unique_ptr<S3::Client> getClient(
|
||||
config.getString(config_prefix + ".region", ""),
|
||||
context->getRemoteHostFilter(),
|
||||
static_cast<int>(context->getGlobalContext()->getSettingsRef().s3_max_redirects),
|
||||
static_cast<int>(context->getGlobalContext()->getSettingsRef().s3_retry_attempts),
|
||||
context->getGlobalContext()->getSettingsRef().enable_s3_requests_logging,
|
||||
/* for_disk_s3 = */ true,
|
||||
settings.request_settings.get_request_throttler,
|
||||
|
@ -110,7 +110,7 @@ void WebObjectStorage::initialize(const String & uri_path, const std::unique_loc
|
||||
WebObjectStorage::WebObjectStorage(
|
||||
const String & url_,
|
||||
ContextPtr context_)
|
||||
: WithContext(context_->getBufferContext())
|
||||
: WithContext(context_->getGlobalContext())
|
||||
, url(url_)
|
||||
, log(&Poco::Logger::get("WebObjectStorage"))
|
||||
{
|
||||
|
@ -1156,12 +1156,12 @@ class FunctionBinaryArithmetic : public IFunction
|
||||
return function->execute(arguments, result_type, input_rows_count);
|
||||
}
|
||||
|
||||
ColumnPtr executeArrayImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t input_rows_count) const
|
||||
ColumnPtr executeArraysImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t input_rows_count) const
|
||||
{
|
||||
const auto * return_type_array = checkAndGetDataType<DataTypeArray>(result_type.get());
|
||||
|
||||
if (!return_type_array)
|
||||
throw Exception(ErrorCodes::LOGICAL_ERROR, "Return type for function {} must be array.", getName());
|
||||
throw Exception(ErrorCodes::LOGICAL_ERROR, "Return type for function {} must be array", getName());
|
||||
|
||||
auto num_args = arguments.size();
|
||||
DataTypes data_types;
|
||||
@ -1211,6 +1211,72 @@ class FunctionBinaryArithmetic : public IFunction
|
||||
return ColumnArray::create(res, typeid_cast<const ColumnArray *>(arguments[0].column.get())->getOffsetsPtr());
|
||||
}
|
||||
|
||||
ColumnPtr executeArrayWithNumericImpl(const ColumnsWithTypeAndName & args, const DataTypePtr & result_type, size_t input_rows_count) const
|
||||
{
|
||||
ColumnsWithTypeAndName arguments = args;
|
||||
bool is_swapped = isNumber(args[0].type); /// Defines the order of arguments (If array is first argument - is_swapped = false)
|
||||
|
||||
const auto * return_type_array = checkAndGetDataType<DataTypeArray>(result_type.get());
|
||||
if (!return_type_array)
|
||||
throw Exception(ErrorCodes::LOGICAL_ERROR, "Return type for function {} must be array", getName());
|
||||
|
||||
auto num_args = arguments.size();
|
||||
DataTypes data_types;
|
||||
|
||||
ColumnsWithTypeAndName new_arguments {num_args};
|
||||
DataTypePtr result_array_type;
|
||||
|
||||
const auto * left_const = typeid_cast<const ColumnConst *>(arguments[0].column.get());
|
||||
const auto * right_const = typeid_cast<const ColumnConst *>(arguments[1].column.get());
|
||||
|
||||
if (left_const && right_const)
|
||||
{
|
||||
new_arguments[0] = {left_const->getDataColumnPtr(), arguments[0].type, arguments[0].name};
|
||||
new_arguments[1] = {right_const->getDataColumnPtr(), arguments[1].type, arguments[1].name};
|
||||
auto col = executeImpl(new_arguments, result_type, 1);
|
||||
return ColumnConst::create(std::move(col), input_rows_count);
|
||||
}
|
||||
|
||||
if (right_const && is_swapped)
|
||||
{
|
||||
new_arguments[0] = {arguments[0].column.get()->getPtr(), arguments[0].type, arguments[0].name};
|
||||
new_arguments[1] = {right_const->convertToFullColumnIfConst(), arguments[1].type, arguments[1].name};
|
||||
return executeImpl(new_arguments, result_type, input_rows_count);
|
||||
}
|
||||
else if (left_const && !is_swapped)
|
||||
{
|
||||
new_arguments[0] = {left_const->convertToFullColumnIfConst(), arguments[0].type, arguments[0].name};
|
||||
new_arguments[1] = {arguments[1].column.get()->getPtr(), arguments[1].type, arguments[1].name};
|
||||
return executeImpl(new_arguments, result_type, input_rows_count);
|
||||
}
|
||||
|
||||
if (is_swapped)
|
||||
std::swap(arguments[1], arguments[0]);
|
||||
|
||||
const auto * left_array_col = typeid_cast<const ColumnArray *>(arguments[0].column.get());
|
||||
const auto & left_array_elements_type = typeid_cast<const DataTypeArray *>(arguments[0].type.get())->getNestedType();
|
||||
const auto & right_col = arguments[1].column.get()->cloneResized(left_array_col->size());
|
||||
|
||||
size_t rows_count = 0;
|
||||
const auto & left_offsets = left_array_col->getOffsets();
|
||||
if (!left_offsets.empty())
|
||||
rows_count = left_offsets.back();
|
||||
|
||||
new_arguments[0] = {left_array_col->getDataPtr(), left_array_elements_type, arguments[0].name};
|
||||
if (right_const)
|
||||
new_arguments[1] = {right_col->cloneResized(rows_count), arguments[1].type, arguments[1].name};
|
||||
else
|
||||
new_arguments[1] = {right_col->replicate(left_array_col->getOffsets()), arguments[1].type, arguments[1].name};
|
||||
|
||||
result_array_type = left_array_elements_type;
|
||||
|
||||
if (is_swapped)
|
||||
std::swap(new_arguments[1], new_arguments[0]);
|
||||
auto res = executeImpl(new_arguments, result_array_type, rows_count);
|
||||
|
||||
return ColumnArray::create(res, left_array_col->getOffsetsPtr());
|
||||
}
|
||||
|
||||
ColumnPtr executeTupleNumberOperator(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type,
|
||||
size_t input_rows_count, const FunctionOverloadResolverPtr & function_builder) const
|
||||
{
|
||||
@ -1425,6 +1491,25 @@ public:
|
||||
}
|
||||
}
|
||||
|
||||
if constexpr (is_multiply || is_division)
|
||||
{
|
||||
if (isArray(arguments[0]) && isNumber(arguments[1]))
|
||||
{
|
||||
DataTypes new_arguments {
|
||||
static_cast<const DataTypeArray &>(*arguments[0]).getNestedType(),
|
||||
arguments[1],
|
||||
};
|
||||
return std::make_shared<DataTypeArray>(getReturnTypeImplStatic(new_arguments, context));
|
||||
}
|
||||
if (isNumber(arguments[0]) && isArray(arguments[1]))
|
||||
{
|
||||
DataTypes new_arguments {
|
||||
arguments[0],
|
||||
static_cast<const DataTypeArray &>(*arguments[1]).getNestedType(),
|
||||
};
|
||||
return std::make_shared<DataTypeArray>(getReturnTypeImplStatic(new_arguments, context));
|
||||
}
|
||||
}
|
||||
|
||||
/// Special case when the function is plus or minus, one of arguments is Date/DateTime and another is Interval.
|
||||
if (auto function_builder = getFunctionForIntervalArithmetic(arguments[0], arguments[1], context))
|
||||
@ -2132,7 +2217,11 @@ ColumnPtr executeStringInteger(const ColumnsWithTypeAndName & arguments, const A
|
||||
});
|
||||
|
||||
if (isArray(result_type))
|
||||
return executeArrayImpl(arguments, result_type, input_rows_count);
|
||||
{
|
||||
if (!isArray(arguments[0].type) || !isArray(arguments[1].type))
|
||||
return executeArrayWithNumericImpl(arguments, result_type, input_rows_count);
|
||||
return executeArraysImpl(arguments, result_type, input_rows_count);
|
||||
}
|
||||
|
||||
if (!valid)
|
||||
{
|
||||
|
@ -35,10 +35,92 @@ extern const int TOO_FEW_ARGUMENTS_FOR_FUNCTION;
|
||||
extern const int BAD_ARGUMENTS;
|
||||
}
|
||||
|
||||
/// Have implemented the operator << for json elements. So we could use stringstream to serialize json elements.
|
||||
/// But stingstream have bad performance, not recommend to use it.
|
||||
template <typename Element>
|
||||
class DefaultJSONStringSerializer
|
||||
{
|
||||
public:
|
||||
explicit DefaultJSONStringSerializer(ColumnString & col_str_) : col_str(col_str_) { }
|
||||
|
||||
inline void addRawData(const char * ptr, size_t len)
|
||||
{
|
||||
out << std::string_view(ptr, len);
|
||||
}
|
||||
|
||||
inline void addRawString(std::string_view str)
|
||||
{
|
||||
out << str;
|
||||
}
|
||||
|
||||
/// serialize the json element into stringstream
|
||||
inline void addElement(const Element & element)
|
||||
{
|
||||
out << element.getElement();
|
||||
}
|
||||
inline void commit()
|
||||
{
|
||||
auto out_str = out.str();
|
||||
col_str.insertData(out_str.data(), out_str.size());
|
||||
}
|
||||
inline void rollback() {}
|
||||
private:
|
||||
ColumnString & col_str;
|
||||
std::stringstream out; // STYLE_CHECK_ALLOW_STD_STRING_STREAM
|
||||
};
|
||||
|
||||
/// A more efficient way to serialize json elements into destination column.
|
||||
/// Formatter takes the chars buffer in the ColumnString and put data into it directly.
|
||||
template<typename Element, typename Formatter>
|
||||
class JSONStringSerializer
|
||||
{
|
||||
public:
|
||||
explicit JSONStringSerializer(ColumnString & col_str_)
|
||||
: col_str(col_str_), chars(col_str_.getChars()), offsets(col_str_.getOffsets()), formatter(col_str_.getChars())
|
||||
{
|
||||
prev_offset = offsets.empty() ? 0 : offsets.back();
|
||||
}
|
||||
/// Put the data into column's buffer directly.
|
||||
inline void addRawData(const char * ptr, size_t len)
|
||||
{
|
||||
chars.insert(ptr, ptr + len);
|
||||
}
|
||||
|
||||
inline void addRawString(std::string_view str)
|
||||
{
|
||||
chars.insert(str.data(), str.data() + str.size());
|
||||
}
|
||||
|
||||
/// serialize the json element into column's buffer directly
|
||||
inline void addElement(const Element & element)
|
||||
{
|
||||
formatter.append(element.getElement());
|
||||
}
|
||||
inline void commit()
|
||||
{
|
||||
chars.push_back(0);
|
||||
offsets.push_back(chars.size());
|
||||
}
|
||||
inline void rollback()
|
||||
{
|
||||
chars.resize(prev_offset);
|
||||
}
|
||||
private:
|
||||
ColumnString & col_str;
|
||||
ColumnString::Chars & chars;
|
||||
IColumn::Offsets & offsets;
|
||||
Formatter formatter;
|
||||
size_t prev_offset;
|
||||
|
||||
};
|
||||
|
||||
class EmptyJSONStringSerializer{};
|
||||
|
||||
|
||||
class FunctionSQLJSONHelpers
|
||||
{
|
||||
public:
|
||||
template <typename Name, template <typename> typename Impl, class JSONParser>
|
||||
template <typename Name, typename Impl, class JSONParser>
|
||||
class Executor
|
||||
{
|
||||
public:
|
||||
@ -116,7 +198,7 @@ public:
|
||||
bool document_ok = false;
|
||||
|
||||
/// Parse JSON for every row
|
||||
Impl<JSONParser> impl;
|
||||
Impl impl;
|
||||
for (const auto i : collections::range(0, input_rows_count))
|
||||
{
|
||||
std::string_view json{
|
||||
@ -138,7 +220,7 @@ public:
|
||||
};
|
||||
};
|
||||
|
||||
template <typename Name, template <typename> typename Impl>
|
||||
template <typename Name, template <typename, typename> typename Impl>
|
||||
class FunctionSQLJSON : public IFunction, WithConstContext
|
||||
{
|
||||
public:
|
||||
@ -155,7 +237,8 @@ public:
|
||||
|
||||
DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override
|
||||
{
|
||||
return Impl<DummyJSONParser>::getReturnType(Name::name, arguments, getContext());
|
||||
return Impl<DummyJSONParser, DefaultJSONStringSerializer<DummyJSONParser::Element>>::getReturnType(
|
||||
Name::name, arguments, getContext());
|
||||
}
|
||||
|
||||
ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t input_rows_count) const override
|
||||
@ -168,9 +251,14 @@ public:
|
||||
unsigned parse_depth = static_cast<unsigned>(getContext()->getSettingsRef().max_parser_depth);
|
||||
#if USE_SIMDJSON
|
||||
if (getContext()->getSettingsRef().allow_simdjson)
|
||||
return FunctionSQLJSONHelpers::Executor<Name, Impl, SimdJSONParser>::run(arguments, result_type, input_rows_count, parse_depth, getContext());
|
||||
return FunctionSQLJSONHelpers::Executor<
|
||||
Name,
|
||||
Impl<SimdJSONParser, JSONStringSerializer<SimdJSONParser::Element, SimdJSONElementFormatter>>,
|
||||
SimdJSONParser>::run(arguments, result_type, input_rows_count, parse_depth, getContext());
|
||||
#endif
|
||||
return FunctionSQLJSONHelpers::Executor<Name, Impl, DummyJSONParser>::run(arguments, result_type, input_rows_count, parse_depth, getContext());
|
||||
return FunctionSQLJSONHelpers::
|
||||
Executor<Name, Impl<DummyJSONParser, DefaultJSONStringSerializer<DummyJSONParser::Element>>, DummyJSONParser>::run(
|
||||
arguments, result_type, input_rows_count, parse_depth, getContext());
|
||||
}
|
||||
};
|
||||
|
||||
@ -189,7 +277,7 @@ struct NameJSONQuery
|
||||
static constexpr auto name{"JSON_QUERY"};
|
||||
};
|
||||
|
||||
template <typename JSONParser>
|
||||
template <typename JSONParser, typename JSONStringSerializer>
|
||||
class JSONExistsImpl
|
||||
{
|
||||
public:
|
||||
@ -228,7 +316,7 @@ public:
|
||||
}
|
||||
};
|
||||
|
||||
template <typename JSONParser>
|
||||
template <typename JSONParser, typename JSONStringSerializer>
|
||||
class JSONValueImpl
|
||||
{
|
||||
public:
|
||||
@ -279,11 +367,7 @@ public:
|
||||
|
||||
if (status == VisitorStatus::Exhausted)
|
||||
return false;
|
||||
|
||||
std::stringstream out; // STYLE_CHECK_ALLOW_STD_STRING_STREAM
|
||||
out << current_element.getElement();
|
||||
auto output_str = out.str();
|
||||
ColumnString * col_str;
|
||||
ColumnString * col_str = nullptr;
|
||||
if (isColumnNullable(dest))
|
||||
{
|
||||
ColumnNullable & col_null = assert_cast<ColumnNullable &>(dest);
|
||||
@ -294,20 +378,15 @@ public:
|
||||
{
|
||||
col_str = assert_cast<ColumnString *>(&dest);
|
||||
}
|
||||
ColumnString::Chars & data = col_str->getChars();
|
||||
ColumnString::Offsets & offsets = col_str->getOffsets();
|
||||
|
||||
JSONStringSerializer json_serializer(*col_str);
|
||||
if (current_element.isString())
|
||||
{
|
||||
ReadBufferFromString buf(output_str);
|
||||
readJSONStringInto(data, buf);
|
||||
data.push_back(0);
|
||||
offsets.push_back(data.size());
|
||||
auto str = current_element.getString();
|
||||
json_serializer.addRawString(str);
|
||||
}
|
||||
else
|
||||
{
|
||||
col_str->insertData(output_str.data(), output_str.size());
|
||||
}
|
||||
json_serializer.addElement(current_element);
|
||||
json_serializer.commit();
|
||||
return true;
|
||||
}
|
||||
};
|
||||
@ -316,7 +395,7 @@ public:
|
||||
* Function to test jsonpath member access, will be removed in final PR
|
||||
* @tparam JSONParser parser
|
||||
*/
|
||||
template <typename JSONParser>
|
||||
template <typename JSONParser, typename JSONStringSerializer>
|
||||
class JSONQueryImpl
|
||||
{
|
||||
public:
|
||||
@ -328,23 +407,27 @@ public:
|
||||
|
||||
static bool insertResultToColumn(IColumn & dest, const Element & root, ASTPtr & query_ptr, const ContextPtr &)
|
||||
{
|
||||
ColumnString & col_str = assert_cast<ColumnString &>(dest);
|
||||
|
||||
GeneratorJSONPath<JSONParser> generator_json_path(query_ptr);
|
||||
Element current_element = root;
|
||||
VisitorStatus status;
|
||||
std::stringstream out; // STYLE_CHECK_ALLOW_STD_STRING_STREAM
|
||||
/// Create json array of results: [res1, res2, ...]
|
||||
out << "[";
|
||||
bool success = false;
|
||||
const char * array_begin = "[";
|
||||
const char * array_end = "]";
|
||||
const char * comma = ", ";
|
||||
JSONStringSerializer json_serializer(col_str);
|
||||
json_serializer.addRawData(array_begin, 1);
|
||||
while ((status = generator_json_path.getNextItem(current_element)) != VisitorStatus::Exhausted)
|
||||
{
|
||||
if (status == VisitorStatus::Ok)
|
||||
{
|
||||
if (success)
|
||||
{
|
||||
out << ", ";
|
||||
json_serializer.addRawData(comma, 2);
|
||||
}
|
||||
success = true;
|
||||
out << current_element.getElement();
|
||||
json_serializer.addElement(current_element);
|
||||
}
|
||||
else if (status == VisitorStatus::Error)
|
||||
{
|
||||
@ -354,14 +437,13 @@ public:
|
||||
}
|
||||
current_element = root;
|
||||
}
|
||||
out << "]";
|
||||
if (!success)
|
||||
{
|
||||
json_serializer.rollback();
|
||||
return false;
|
||||
}
|
||||
ColumnString & col_str = assert_cast<ColumnString &>(dest);
|
||||
auto output_str = out.str();
|
||||
col_str.insertData(output_str.data(), output_str.size());
|
||||
json_serializer.addRawData(array_end, 1);
|
||||
json_serializer.commit();
|
||||
return true;
|
||||
}
|
||||
};
|
||||
|
@ -464,7 +464,7 @@ public:
|
||||
"Must be constant string.", arguments[0].column->getName(), getName());
|
||||
|
||||
if (!col->getValue<String>().empty())
|
||||
re = std::make_shared<Regexps::Regexp>(Regexps::createRegexp<false, false, false>(col->getValue<String>()));
|
||||
re = std::make_shared<OptimizedRegularExpression>(Regexps::createRegexp<false, false, false>(col->getValue<String>()));
|
||||
}
|
||||
|
||||
/// Returns the position of the argument that is the column of strings
|
||||
@ -551,7 +551,7 @@ public:
|
||||
throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Illegal column {} of first argument of function {}. "
|
||||
"Must be constant string.", arguments[1].column->getName(), getName());
|
||||
|
||||
re = std::make_shared<Regexps::Regexp>(Regexps::createRegexp<false, false, false>(col->getValue<String>()));
|
||||
re = std::make_shared<OptimizedRegularExpression>(Regexps::createRegexp<false, false, false>(col->getValue<String>()));
|
||||
capture = re->getNumberOfSubpatterns() > 0 ? 1 : 0;
|
||||
|
||||
matches.resize(capture + 1);
|
||||
|
@ -9,7 +9,7 @@
|
||||
#include <Functions/Regexps.h>
|
||||
|
||||
#include "config.h"
|
||||
#include <re2_st/re2.h>
|
||||
#include <re2/re2.h>
|
||||
|
||||
|
||||
namespace DB
|
||||
@ -191,7 +191,7 @@ struct MatchImpl
|
||||
return;
|
||||
}
|
||||
|
||||
const auto & regexp = Regexps::Regexp(Regexps::createRegexp<is_like, /*no_capture*/ true, case_insensitive>(needle));
|
||||
const auto & regexp = OptimizedRegularExpression(Regexps::createRegexp<is_like, /*no_capture*/ true, case_insensitive>(needle));
|
||||
|
||||
String required_substring;
|
||||
bool is_trivial;
|
||||
@ -212,7 +212,7 @@ struct MatchImpl
|
||||
{reinterpret_cast<const char *>(&haystack_data[prev_offset]), haystack_offsets[i] - prev_offset - 1},
|
||||
0,
|
||||
haystack_offsets[i] - prev_offset - 1,
|
||||
re2_st::RE2::UNANCHORED,
|
||||
re2::RE2::UNANCHORED,
|
||||
nullptr,
|
||||
0);
|
||||
res[i] = negate ^ match;
|
||||
@ -266,7 +266,7 @@ struct MatchImpl
|
||||
{str_data, str_size},
|
||||
start_pos,
|
||||
end_pos,
|
||||
re2_st::RE2::UNANCHORED,
|
||||
re2::RE2::UNANCHORED,
|
||||
nullptr,
|
||||
0);
|
||||
res[i] = negate ^ match;
|
||||
@ -359,7 +359,7 @@ struct MatchImpl
|
||||
return;
|
||||
}
|
||||
|
||||
const auto & regexp = Regexps::Regexp(Regexps::createRegexp<is_like, /*no_capture*/ true, case_insensitive>(needle));
|
||||
const auto & regexp = OptimizedRegularExpression(Regexps::createRegexp<is_like, /*no_capture*/ true, case_insensitive>(needle));
|
||||
|
||||
String required_substring;
|
||||
bool is_trivial;
|
||||
@ -380,7 +380,7 @@ struct MatchImpl
|
||||
{reinterpret_cast<const char *>(&haystack[offset]), N},
|
||||
0,
|
||||
N,
|
||||
re2_st::RE2::UNANCHORED,
|
||||
re2::RE2::UNANCHORED,
|
||||
nullptr,
|
||||
0);
|
||||
res[i] = negate ^ match;
|
||||
@ -437,7 +437,7 @@ struct MatchImpl
|
||||
{str_data, N},
|
||||
start_pos,
|
||||
end_pos,
|
||||
re2_st::RE2::UNANCHORED,
|
||||
re2::RE2::UNANCHORED,
|
||||
nullptr,
|
||||
0);
|
||||
res[i] = negate ^ match;
|
||||
@ -526,7 +526,7 @@ struct MatchImpl
|
||||
{reinterpret_cast<const char *>(cur_haystack_data), cur_haystack_length},
|
||||
0,
|
||||
cur_haystack_length,
|
||||
re2_st::RE2::UNANCHORED,
|
||||
re2::RE2::UNANCHORED,
|
||||
nullptr,
|
||||
0);
|
||||
res[i] = negate ^ match;
|
||||
@ -552,7 +552,7 @@ struct MatchImpl
|
||||
{reinterpret_cast<const char *>(cur_haystack_data), cur_haystack_length},
|
||||
start_pos,
|
||||
end_pos,
|
||||
re2_st::RE2::UNANCHORED,
|
||||
re2::RE2::UNANCHORED,
|
||||
nullptr,
|
||||
0);
|
||||
res[i] = negate ^ match2;
|
||||
@ -635,7 +635,7 @@ struct MatchImpl
|
||||
{reinterpret_cast<const char *>(cur_haystack_data), cur_haystack_length},
|
||||
0,
|
||||
cur_haystack_length,
|
||||
re2_st::RE2::UNANCHORED,
|
||||
re2::RE2::UNANCHORED,
|
||||
nullptr,
|
||||
0);
|
||||
res[i] = negate ^ match;
|
||||
@ -661,7 +661,7 @@ struct MatchImpl
|
||||
{reinterpret_cast<const char *>(cur_haystack_data), cur_haystack_length},
|
||||
start_pos,
|
||||
end_pos,
|
||||
re2_st::RE2::UNANCHORED,
|
||||
re2::RE2::UNANCHORED,
|
||||
nullptr,
|
||||
0);
|
||||
res[i] = negate ^ match2;
|
||||
|
@ -341,7 +341,7 @@ struct MultiMatchAnyImpl
|
||||
{
|
||||
String needle(needles[j]);
|
||||
|
||||
const auto & regexp = Regexps::Regexp(Regexps::createRegexp</*like*/ false, /*no_capture*/ true, /*case_insensitive*/ false>(needle));
|
||||
const auto & regexp = OptimizedRegularExpression(Regexps::createRegexp</*like*/ false, /*no_capture*/ true, /*case_insensitive*/ false>(needle));
|
||||
|
||||
String required_substr;
|
||||
bool is_trivial;
|
||||
@ -364,7 +364,7 @@ struct MultiMatchAnyImpl
|
||||
{reinterpret_cast<const char *>(cur_haystack_data), cur_haystack_length},
|
||||
0,
|
||||
cur_haystack_length,
|
||||
re2_st::RE2::UNANCHORED,
|
||||
re2::RE2::UNANCHORED,
|
||||
nullptr,
|
||||
0);
|
||||
if constexpr (FindAny)
|
||||
@ -401,7 +401,7 @@ struct MultiMatchAnyImpl
|
||||
{reinterpret_cast<const char *>(cur_haystack_data), cur_haystack_length},
|
||||
start_pos,
|
||||
end_pos,
|
||||
re2_st::RE2::UNANCHORED,
|
||||
re2::RE2::UNANCHORED,
|
||||
nullptr,
|
||||
0);
|
||||
if constexpr (FindAny)
|
||||
|
@ -39,11 +39,10 @@ namespace ErrorCodes
|
||||
namespace Regexps
|
||||
{
|
||||
|
||||
using Regexp = OptimizedRegularExpressionSingleThreaded;
|
||||
using RegexpPtr = std::shared_ptr<Regexp>;
|
||||
using RegexpPtr = std::shared_ptr<OptimizedRegularExpression>;
|
||||
|
||||
template <bool like, bool no_capture, bool case_insensitive>
|
||||
inline Regexp createRegexp(const String & pattern)
|
||||
inline OptimizedRegularExpression createRegexp(const String & pattern)
|
||||
{
|
||||
int flags = OptimizedRegularExpression::RE_DOT_NL;
|
||||
if constexpr (no_capture)
|
||||
@ -65,7 +64,7 @@ inline Regexp createRegexp(const String & pattern)
|
||||
class LocalCacheTable
|
||||
{
|
||||
public:
|
||||
using RegexpPtr = std::shared_ptr<Regexp>;
|
||||
using RegexpPtr = std::shared_ptr<OptimizedRegularExpression>;
|
||||
|
||||
template <bool like, bool no_capture, bool case_insensitive>
|
||||
RegexpPtr getOrSet(const String & pattern)
|
||||
@ -74,11 +73,11 @@ public:
|
||||
|
||||
if (bucket.regexp == nullptr) [[unlikely]]
|
||||
/// insert new entry
|
||||
bucket = {pattern, std::make_shared<Regexp>(createRegexp<like, no_capture, case_insensitive>(pattern))};
|
||||
bucket = {pattern, std::make_shared<OptimizedRegularExpression>(createRegexp<like, no_capture, case_insensitive>(pattern))};
|
||||
else
|
||||
if (pattern != bucket.pattern)
|
||||
/// replace existing entry
|
||||
bucket = {pattern, std::make_shared<Regexp>(createRegexp<like, no_capture, case_insensitive>(pattern))};
|
||||
bucket = {pattern, std::make_shared<OptimizedRegularExpression>(createRegexp<like, no_capture, case_insensitive>(pattern))};
|
||||
|
||||
return bucket.regexp;
|
||||
}
|
||||
|
@ -4,9 +4,14 @@
|
||||
#include <Columns/ColumnString.h>
|
||||
#include <IO/WriteHelpers.h>
|
||||
|
||||
#include "config.h"
|
||||
#include <re2_st/re2.h>
|
||||
|
||||
#ifdef __clang__
|
||||
# pragma clang diagnostic push
|
||||
# pragma clang diagnostic ignored "-Wzero-as-null-pointer-constant"
|
||||
#endif
|
||||
#include <re2/re2.h>
|
||||
#ifdef __clang__
|
||||
# pragma clang diagnostic pop
|
||||
#endif
|
||||
|
||||
namespace DB
|
||||
{
|
||||
@ -95,7 +100,7 @@ struct ReplaceRegexpImpl
|
||||
size_t haystack_length,
|
||||
ColumnString::Chars & res_data,
|
||||
ColumnString::Offset & res_offset,
|
||||
const re2_st::RE2 & searcher,
|
||||
const re2::RE2 & searcher,
|
||||
int num_captures,
|
||||
const Instructions & instructions)
|
||||
{
|
||||
@ -110,7 +115,7 @@ struct ReplaceRegexpImpl
|
||||
/// If no more replacements possible for current string
|
||||
bool can_finish_current_string = false;
|
||||
|
||||
if (searcher.Match(haystack, match_pos, haystack_length, re2_st::RE2::Anchor::UNANCHORED, matches, num_captures))
|
||||
if (searcher.Match(haystack, match_pos, haystack_length, re2::RE2::Anchor::UNANCHORED, matches, num_captures))
|
||||
{
|
||||
const auto & match = matches[0]; /// Complete match (\0)
|
||||
size_t bytes_to_copy = (match.data() - haystack.data()) - copy_pos;
|
||||
@ -181,11 +186,11 @@ struct ReplaceRegexpImpl
|
||||
size_t haystack_size = haystack_offsets.size();
|
||||
res_offsets.resize(haystack_size);
|
||||
|
||||
re2_st::RE2::Options regexp_options;
|
||||
re2::RE2::Options regexp_options;
|
||||
/// Don't write error messages to stderr.
|
||||
regexp_options.set_log_errors(false);
|
||||
|
||||
re2_st::RE2 searcher(needle, regexp_options);
|
||||
re2::RE2 searcher(needle, regexp_options);
|
||||
|
||||
if (!searcher.ok())
|
||||
throw Exception(ErrorCodes::BAD_ARGUMENTS, "The pattern argument is not a valid re2 pattern: {}", searcher.error());
|
||||
@ -223,7 +228,7 @@ struct ReplaceRegexpImpl
|
||||
size_t haystack_size = haystack_offsets.size();
|
||||
res_offsets.resize(haystack_size);
|
||||
|
||||
re2_st::RE2::Options regexp_options;
|
||||
re2::RE2::Options regexp_options;
|
||||
/// Don't write error messages to stderr.
|
||||
regexp_options.set_log_errors(false);
|
||||
|
||||
@ -242,7 +247,7 @@ struct ReplaceRegexpImpl
|
||||
if (needle.empty())
|
||||
throw Exception(ErrorCodes::ARGUMENT_OUT_OF_BOUND, "Length of the pattern argument in function {} must be greater than 0.", name);
|
||||
|
||||
re2_st::RE2 searcher(needle, regexp_options);
|
||||
re2::RE2 searcher(needle, regexp_options);
|
||||
if (!searcher.ok())
|
||||
throw Exception(ErrorCodes::BAD_ARGUMENTS, "The pattern argument is not a valid re2 pattern: {}", searcher.error());
|
||||
int num_captures = std::min(searcher.NumberOfCapturingGroups() + 1, max_captures);
|
||||
@ -272,11 +277,11 @@ struct ReplaceRegexpImpl
|
||||
size_t haystack_size = haystack_offsets.size();
|
||||
res_offsets.resize(haystack_size);
|
||||
|
||||
re2_st::RE2::Options regexp_options;
|
||||
re2::RE2::Options regexp_options;
|
||||
/// Don't write error messages to stderr.
|
||||
regexp_options.set_log_errors(false);
|
||||
|
||||
re2_st::RE2 searcher(needle, regexp_options);
|
||||
re2::RE2 searcher(needle, regexp_options);
|
||||
|
||||
if (!searcher.ok())
|
||||
throw Exception(ErrorCodes::BAD_ARGUMENTS, "The pattern argument is not a valid re2 pattern: {}", searcher.error());
|
||||
@ -319,7 +324,7 @@ struct ReplaceRegexpImpl
|
||||
size_t haystack_size = haystack_offsets.size();
|
||||
res_offsets.resize(haystack_size);
|
||||
|
||||
re2_st::RE2::Options regexp_options;
|
||||
re2::RE2::Options regexp_options;
|
||||
/// Don't write error messages to stderr.
|
||||
regexp_options.set_log_errors(false);
|
||||
|
||||
@ -342,7 +347,7 @@ struct ReplaceRegexpImpl
|
||||
const char * repl_data = reinterpret_cast<const char *>(replacement_data.data() + repl_from);
|
||||
const size_t repl_length = static_cast<unsigned>(replacement_offsets[i] - repl_from - 1);
|
||||
|
||||
re2_st::RE2 searcher(needle, regexp_options);
|
||||
re2::RE2 searcher(needle, regexp_options);
|
||||
if (!searcher.ok())
|
||||
throw Exception(ErrorCodes::BAD_ARGUMENTS, "The pattern argument is not a valid re2 pattern: {}", searcher.error());
|
||||
int num_captures = std::min(searcher.NumberOfCapturingGroups() + 1, max_captures);
|
||||
@ -369,11 +374,11 @@ struct ReplaceRegexpImpl
|
||||
res_data.reserve(haystack_data.size());
|
||||
res_offsets.resize(haystack_size);
|
||||
|
||||
re2_st::RE2::Options regexp_options;
|
||||
re2::RE2::Options regexp_options;
|
||||
/// Don't write error messages to stderr.
|
||||
regexp_options.set_log_errors(false);
|
||||
|
||||
re2_st::RE2 searcher(needle, regexp_options);
|
||||
re2::RE2 searcher(needle, regexp_options);
|
||||
|
||||
if (!searcher.ok())
|
||||
throw Exception(ErrorCodes::BAD_ARGUMENTS, "The pattern argument is not a valid re2 pattern: {}", searcher.error());
|
||||
|
@ -50,7 +50,7 @@ bool SlowWithHyperscanChecker::isSlowOneRepeat(std::string_view regexp)
|
||||
size_t start_pos = 0;
|
||||
while (start_pos < haystack.size())
|
||||
{
|
||||
if (searcher_one_repeat.Match(haystack, start_pos, haystack.size(), re2_st::RE2::Anchor::UNANCHORED, matches, 2))
|
||||
if (searcher_one_repeat.Match(haystack, start_pos, haystack.size(), re2::RE2::Anchor::UNANCHORED, matches, 2))
|
||||
{
|
||||
const auto & match = matches[0];
|
||||
start_pos = (matches[0].data() - haystack.data()) + match.size(); // new start pos = prefix before match + match length
|
||||
@ -72,7 +72,7 @@ bool SlowWithHyperscanChecker::isSlowTwoRepeats(std::string_view regexp)
|
||||
size_t start_pos = 0;
|
||||
while (start_pos < haystack.size())
|
||||
{
|
||||
if (searcher_two_repeats.Match(haystack, start_pos, haystack.size(), re2_st::RE2::Anchor::UNANCHORED, matches, 3))
|
||||
if (searcher_two_repeats.Match(haystack, start_pos, haystack.size(), re2::RE2::Anchor::UNANCHORED, matches, 3))
|
||||
{
|
||||
const auto & match = matches[0];
|
||||
start_pos = (matches[0].data() - haystack.data()) + match.size(); // new start pos = prefix before match + match length
|
||||
|
@ -3,7 +3,14 @@
|
||||
#include <string_view>
|
||||
#include <vector>
|
||||
|
||||
#include <re2_st/re2.h>
|
||||
#ifdef __clang__
|
||||
# pragma clang diagnostic push
|
||||
# pragma clang diagnostic ignored "-Wzero-as-null-pointer-constant"
|
||||
#endif
|
||||
#include <re2/re2.h>
|
||||
#ifdef __clang__
|
||||
# pragma clang diagnostic pop
|
||||
#endif
|
||||
|
||||
namespace DB
|
||||
{
|
||||
@ -22,8 +29,8 @@ public:
|
||||
private:
|
||||
bool isSlowOneRepeat(std::string_view regexp);
|
||||
bool isSlowTwoRepeats(std::string_view regexp);
|
||||
re2_st::RE2 searcher_one_repeat;
|
||||
re2_st::RE2 searcher_two_repeats;
|
||||
re2::RE2 searcher_one_repeat;
|
||||
re2::RE2 searcher_two_repeats;
|
||||
};
|
||||
|
||||
}
|
||||
|
@ -55,7 +55,7 @@ public:
|
||||
ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t input_rows_count) const override
|
||||
{
|
||||
const ColumnConst * column_pattern = checkAndGetColumnConstStringOrFixedString(arguments[1].column.get());
|
||||
const Regexps::Regexp re = Regexps::createRegexp</*is_like*/ false, /*no_capture*/ true, CountMatchesBase::case_insensitive>(column_pattern->getValue<String>());
|
||||
const OptimizedRegularExpression re = Regexps::createRegexp</*is_like*/ false, /*no_capture*/ true, CountMatchesBase::case_insensitive>(column_pattern->getValue<String>());
|
||||
OptimizedRegularExpression::MatchVec matches;
|
||||
|
||||
const IColumn * column_haystack = arguments[0].column.get();
|
||||
@ -95,7 +95,7 @@ public:
|
||||
throw Exception(ErrorCodes::LOGICAL_ERROR, "Error in FunctionCountMatches::getReturnTypeImpl()");
|
||||
}
|
||||
|
||||
static uint64_t countMatches(std::string_view src, const Regexps::Regexp & re, OptimizedRegularExpression::MatchVec & matches)
|
||||
static uint64_t countMatches(std::string_view src, const OptimizedRegularExpression & re, OptimizedRegularExpression::MatchVec & matches)
|
||||
{
|
||||
/// Only one match is required, no need to copy more.
|
||||
static const unsigned matches_limit = 1;
|
||||
|
@ -21,7 +21,7 @@ struct ExtractImpl
|
||||
res_data.reserve(data.size() / 5);
|
||||
res_offsets.resize(offsets.size());
|
||||
|
||||
const Regexps::Regexp regexp = Regexps::createRegexp<false, false, false>(pattern);
|
||||
const OptimizedRegularExpression regexp = Regexps::createRegexp<false, false, false>(pattern);
|
||||
|
||||
unsigned capture = regexp.getNumberOfSubpatterns() > 0 ? 1 : 0;
|
||||
OptimizedRegularExpression::MatchVec matches;
|
||||
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue
Block a user