Merge branch 'master' into reject_poco

This commit is contained in:
Alexander Tokmakov 2024-07-12 14:30:03 +02:00
commit 5557dce188
331 changed files with 8841 additions and 5743 deletions

2
contrib/azure vendored

@ -1 +1 @@
Subproject commit 92c94d7f37a43cc8fc4d466884a95f610c0593bf
Subproject commit ea3e19a7be08519134c643177d56c7484dfec884

View File

@ -179,12 +179,19 @@ endif ()
target_compile_definitions(_jemalloc PRIVATE -DJEMALLOC_PROF=1)
# jemalloc provides support for two different libunwind flavors: the original HP libunwind and the one coming with gcc / g++ / libstdc++.
# The latter is identified by `JEMALLOC_PROF_LIBGCC` and uses `_Unwind_Backtrace` method instead of `unw_backtrace`.
# At the time ClickHouse uses LLVM libunwind which follows libgcc's way of backtracking.
# jemalloc provides support two unwind flavors:
# - JEMALLOC_PROF_LIBUNWIND - unw_backtrace() - gnu libunwind (compatible with llvm libunwind)
# - JEMALLOC_PROF_LIBGCC - _Unwind_Backtrace() - the original HP libunwind and the one coming with gcc / g++ / libstdc++.
#
# ClickHouse has to provide `unw_backtrace` method by the means of [commit 8e2b31e](https://github.com/ClickHouse/libunwind/commit/8e2b31e766dd502f6df74909e04a7dbdf5182eb1).
target_compile_definitions (_jemalloc PRIVATE -DJEMALLOC_PROF_LIBGCC=1)
# But for JEMALLOC_PROF_LIBGCC it also calls _Unwind_Backtrace() during
# bootstraping of jemalloc, which may lead to deadlock, if the dlsym will do
# allocations somewhere (like glibc does prio 2.34, see [1]).
#
# [1]: https://sourceware.org/git/?p=glibc.git;a=commit;h=fada9018199c21c469ff0e731ef75c6020074ac9
#
# And since ClickHouse unwind already supports unw_backtrace() we can safely
# switch to it to avoid this deadlock.
target_compile_definitions (_jemalloc PRIVATE -DJEMALLOC_PROF_LIBUNWIND=1)
target_link_libraries (_jemalloc PRIVATE unwind)
# for RTLD_NEXT

2
contrib/pocketfft vendored

@ -1 +1 @@
Subproject commit 9efd4da52cf8d28d14531d14e43ad9d913807546
Subproject commit f4c1aa8aa9ce79ad39e80f2c9c41b92ead90fda3

2
contrib/rocksdb vendored

@ -1 +1 @@
Subproject commit 078fa5638690004e1f744076d1bdcc4e93767304
Subproject commit be366233921293bd07a84dc4ea6991858665f202

View File

@ -5,20 +5,13 @@ if (NOT ENABLE_ROCKSDB)
return()
endif()
## this file is extracted from `contrib/rocksdb/CMakeLists.txt`
set(ROCKSDB_SOURCE_DIR "${ClickHouse_SOURCE_DIR}/contrib/rocksdb")
list(APPEND CMAKE_MODULE_PATH "${ROCKSDB_SOURCE_DIR}/cmake/modules/")
set(PORTABLE ON)
## always disable jemalloc for rocksdb by default
## because it introduces non-standard jemalloc APIs
# Always disable jemalloc for rocksdb by default because it introduces non-standard jemalloc APIs
option(WITH_JEMALLOC "build with JeMalloc" OFF)
set(USE_SNAPPY OFF)
if (TARGET ch_contrib::snappy)
set(USE_SNAPPY ON)
endif()
option(WITH_SNAPPY "build with SNAPPY" ${USE_SNAPPY})
## lz4, zlib, zstd is enabled in ClickHouse by default
option(WITH_LIBURING "build with liburing" OFF) # TODO could try to enable this conditionally, depending on ClickHouse's ENABLE_LIBURING
# ClickHouse cannot be compiled without snappy, lz4, zlib, zstd
option(WITH_SNAPPY "build with SNAPPY" ON)
option(WITH_LZ4 "build with lz4" ON)
option(WITH_ZLIB "build with zlib" ON)
option(WITH_ZSTD "build with zstd" ON)
@ -26,78 +19,46 @@ option(WITH_ZSTD "build with zstd" ON)
# third-party/folly is only validated to work on Linux and Windows for now.
# So only turn it on there by default.
if(CMAKE_SYSTEM_NAME MATCHES "Linux|Windows")
if(MSVC AND MSVC_VERSION LESS 1910)
# Folly does not compile with MSVC older than VS2017
option(WITH_FOLLY_DISTRIBUTED_MUTEX "build with folly::DistributedMutex" OFF)
else()
option(WITH_FOLLY_DISTRIBUTED_MUTEX "build with folly::DistributedMutex" ON)
endif()
option(WITH_FOLLY_DISTRIBUTED_MUTEX "build with folly::DistributedMutex" ON)
else()
option(WITH_FOLLY_DISTRIBUTED_MUTEX "build with folly::DistributedMutex" OFF)
endif()
if( NOT DEFINED CMAKE_CXX_STANDARD )
set(CMAKE_CXX_STANDARD 11)
if(WITH_SNAPPY)
add_definitions(-DSNAPPY)
list(APPEND THIRDPARTY_LIBS ch_contrib::snappy)
endif()
if(MSVC)
option(WITH_XPRESS "build with windows built in compression" OFF)
include("${ROCKSDB_SOURCE_DIR}/thirdparty.inc")
else()
if(CMAKE_SYSTEM_NAME MATCHES "FreeBSD" AND NOT CMAKE_SYSTEM_NAME MATCHES "kFreeBSD")
# FreeBSD has jemalloc as default malloc
# but it does not have all the jemalloc files in include/...
set(WITH_JEMALLOC ON)
else()
if(WITH_JEMALLOC AND TARGET ch_contrib::jemalloc)
add_definitions(-DROCKSDB_JEMALLOC -DJEMALLOC_NO_DEMANGLE)
list(APPEND THIRDPARTY_LIBS ch_contrib::jemalloc)
endif()
endif()
if(WITH_SNAPPY)
add_definitions(-DSNAPPY)
list(APPEND THIRDPARTY_LIBS ch_contrib::snappy)
endif()
if(WITH_ZLIB)
add_definitions(-DZLIB)
list(APPEND THIRDPARTY_LIBS ch_contrib::zlib)
endif()
if(WITH_LZ4)
add_definitions(-DLZ4)
list(APPEND THIRDPARTY_LIBS ch_contrib::lz4)
endif()
if(WITH_ZSTD)
add_definitions(-DZSTD)
list(APPEND THIRDPARTY_LIBS ch_contrib::zstd)
endif()
if(WITH_ZLIB)
add_definitions(-DZLIB)
list(APPEND THIRDPARTY_LIBS ch_contrib::zlib)
endif()
if(CMAKE_SYSTEM_PROCESSOR MATCHES "^(powerpc|ppc)64")
if(POWER9)
set(HAS_POWER9 1)
set(HAS_ALTIVEC 1)
else()
set(HAS_POWER8 1)
set(HAS_ALTIVEC 1)
endif(POWER9)
endif(CMAKE_SYSTEM_PROCESSOR MATCHES "^(powerpc|ppc)64")
if(WITH_LZ4)
add_definitions(-DLZ4)
list(APPEND THIRDPARTY_LIBS ch_contrib::lz4)
endif()
if(CMAKE_SYSTEM_PROCESSOR MATCHES "aarch64|AARCH64|arm64|ARM64")
set(HAS_ARMV8_CRC 1)
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -march=armv8-a+crc+crypto -Wno-unused-function")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -march=armv8-a+crc+crypto -Wno-unused-function")
endif(CMAKE_SYSTEM_PROCESSOR MATCHES "aarch64|AARCH64|arm64|ARM64")
if(WITH_ZSTD)
add_definitions(-DZSTD)
list(APPEND THIRDPARTY_LIBS ch_contrib::zstd)
endif()
option(PORTABLE "build a portable binary" ON)
if(ENABLE_AVX2 AND ENABLE_PCLMULQDQ)
if(ENABLE_SSE42 AND ENABLE_PCLMULQDQ)
add_definitions(-DHAVE_SSE42)
add_definitions(-DHAVE_PCLMUL)
endif()
if(CMAKE_SYSTEM_PROCESSOR MATCHES "arm64|aarch64|AARCH64")
set (HAS_ARMV8_CRC 1)
# the original build descriptions set specific flags for ARM. These flags are already subsumed by ClickHouse's general
# ARM flags, see cmake/cpu_features.cmake
# set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -march=armv8-a+crc+crypto -Wno-unused-function")
# set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -march=armv8-a+crc+crypto -Wno-unused-function")
endif()
set (HAVE_THREAD_LOCAL 1)
if(HAVE_THREAD_LOCAL)
add_definitions(-DROCKSDB_SUPPORT_THREAD_LOCAL)
@ -107,8 +68,6 @@ if(CMAKE_SYSTEM_NAME MATCHES "Darwin")
add_definitions(-DOS_MACOSX)
elseif(CMAKE_SYSTEM_NAME MATCHES "Linux")
add_definitions(-DOS_LINUX)
elseif(CMAKE_SYSTEM_NAME MATCHES "SunOS")
add_definitions(-DOS_SOLARIS)
elseif(CMAKE_SYSTEM_NAME MATCHES "FreeBSD")
add_definitions(-DOS_FREEBSD)
elseif(CMAKE_SYSTEM_NAME MATCHES "Android")
@ -123,12 +82,10 @@ endif()
if (OS_LINUX)
add_definitions(-DROCKSDB_SCHED_GETCPU_PRESENT)
add_definitions(-DROCKSDB_AUXV_SYSAUXV_PRESENT)
add_definitions(-DROCKSDB_AUXV_GETAUXVAL_PRESENT)
elseif (OS_FREEBSD)
add_definitions(-DROCKSDB_AUXV_SYSAUXV_PRESENT)
endif()
set(ROCKSDB_SOURCE_DIR "${ClickHouse_SOURCE_DIR}/contrib/rocksdb")
include_directories(${ROCKSDB_SOURCE_DIR})
include_directories("${ROCKSDB_SOURCE_DIR}/include")
@ -136,11 +93,11 @@ if(WITH_FOLLY_DISTRIBUTED_MUTEX)
include_directories("${ROCKSDB_SOURCE_DIR}/third-party/folly")
endif()
# Main library source code
set(SOURCES
${ROCKSDB_SOURCE_DIR}/cache/cache.cc
${ROCKSDB_SOURCE_DIR}/cache/cache_entry_roles.cc
${ROCKSDB_SOURCE_DIR}/cache/cache_key.cc
${ROCKSDB_SOURCE_DIR}/cache/cache_reservation_manager.cc
${ROCKSDB_SOURCE_DIR}/cache/clock_cache.cc
${ROCKSDB_SOURCE_DIR}/cache/lru_cache.cc
${ROCKSDB_SOURCE_DIR}/cache/sharded_cache.cc
@ -156,6 +113,7 @@ set(SOURCES
${ROCKSDB_SOURCE_DIR}/db/blob/blob_log_format.cc
${ROCKSDB_SOURCE_DIR}/db/blob/blob_log_sequential_reader.cc
${ROCKSDB_SOURCE_DIR}/db/blob/blob_log_writer.cc
${ROCKSDB_SOURCE_DIR}/db/blob/prefetch_buffer_collection.cc
${ROCKSDB_SOURCE_DIR}/db/builder.cc
${ROCKSDB_SOURCE_DIR}/db/c.cc
${ROCKSDB_SOURCE_DIR}/db/column_family.cc
@ -229,6 +187,7 @@ set(SOURCES
${ROCKSDB_SOURCE_DIR}/env/file_system_tracer.cc
${ROCKSDB_SOURCE_DIR}/env/fs_remap.cc
${ROCKSDB_SOURCE_DIR}/env/mock_env.cc
${ROCKSDB_SOURCE_DIR}/env/unique_id_gen.cc
${ROCKSDB_SOURCE_DIR}/file/delete_scheduler.cc
${ROCKSDB_SOURCE_DIR}/file/file_prefetch_buffer.cc
${ROCKSDB_SOURCE_DIR}/file/file_util.cc
@ -247,6 +206,7 @@ set(SOURCES
${ROCKSDB_SOURCE_DIR}/memory/concurrent_arena.cc
${ROCKSDB_SOURCE_DIR}/memory/jemalloc_nodump_allocator.cc
${ROCKSDB_SOURCE_DIR}/memory/memkind_kmem_allocator.cc
${ROCKSDB_SOURCE_DIR}/memory/memory_allocator.cc
${ROCKSDB_SOURCE_DIR}/memtable/alloc_tracker.cc
${ROCKSDB_SOURCE_DIR}/memtable/hash_linklist_rep.cc
${ROCKSDB_SOURCE_DIR}/memtable/hash_skiplist_rep.cc
@ -322,6 +282,7 @@ set(SOURCES
${ROCKSDB_SOURCE_DIR}/table/table_factory.cc
${ROCKSDB_SOURCE_DIR}/table/table_properties.cc
${ROCKSDB_SOURCE_DIR}/table/two_level_iterator.cc
${ROCKSDB_SOURCE_DIR}/table/unique_id.cc
${ROCKSDB_SOURCE_DIR}/test_util/sync_point.cc
${ROCKSDB_SOURCE_DIR}/test_util/sync_point_impl.cc
${ROCKSDB_SOURCE_DIR}/test_util/testutil.cc
@ -333,9 +294,12 @@ set(SOURCES
${ROCKSDB_SOURCE_DIR}/tools/ldb_tool.cc
${ROCKSDB_SOURCE_DIR}/tools/sst_dump_tool.cc
${ROCKSDB_SOURCE_DIR}/tools/trace_analyzer_tool.cc
${ROCKSDB_SOURCE_DIR}/trace_replay/trace_replay.cc
${ROCKSDB_SOURCE_DIR}/trace_replay/block_cache_tracer.cc
${ROCKSDB_SOURCE_DIR}/trace_replay/io_tracer.cc
${ROCKSDB_SOURCE_DIR}/trace_replay/trace_record_handler.cc
${ROCKSDB_SOURCE_DIR}/trace_replay/trace_record_result.cc
${ROCKSDB_SOURCE_DIR}/trace_replay/trace_record.cc
${ROCKSDB_SOURCE_DIR}/trace_replay/trace_replay.cc
${ROCKSDB_SOURCE_DIR}/util/coding.cc
${ROCKSDB_SOURCE_DIR}/util/compaction_job_stats_impl.cc
${ROCKSDB_SOURCE_DIR}/util/comparator.cc
@ -347,6 +311,7 @@ set(SOURCES
${ROCKSDB_SOURCE_DIR}/util/murmurhash.cc
${ROCKSDB_SOURCE_DIR}/util/random.cc
${ROCKSDB_SOURCE_DIR}/util/rate_limiter.cc
${ROCKSDB_SOURCE_DIR}/util/regex.cc
${ROCKSDB_SOURCE_DIR}/util/ribbon_config.cc
${ROCKSDB_SOURCE_DIR}/util/slice.cc
${ROCKSDB_SOURCE_DIR}/util/file_checksum_helper.cc
@ -362,18 +327,23 @@ set(SOURCES
${ROCKSDB_SOURCE_DIR}/utilities/blob_db/blob_db_impl_filesnapshot.cc
${ROCKSDB_SOURCE_DIR}/utilities/blob_db/blob_dump_tool.cc
${ROCKSDB_SOURCE_DIR}/utilities/blob_db/blob_file.cc
${ROCKSDB_SOURCE_DIR}/utilities/cache_dump_load.cc
${ROCKSDB_SOURCE_DIR}/utilities/cache_dump_load_impl.cc
${ROCKSDB_SOURCE_DIR}/utilities/cassandra/cassandra_compaction_filter.cc
${ROCKSDB_SOURCE_DIR}/utilities/cassandra/format.cc
${ROCKSDB_SOURCE_DIR}/utilities/cassandra/merge_operator.cc
${ROCKSDB_SOURCE_DIR}/utilities/checkpoint/checkpoint_impl.cc
${ROCKSDB_SOURCE_DIR}/utilities/compaction_filters.cc
${ROCKSDB_SOURCE_DIR}/utilities/compaction_filters/remove_emptyvalue_compactionfilter.cc
${ROCKSDB_SOURCE_DIR}/utilities/debug.cc
${ROCKSDB_SOURCE_DIR}/utilities/env_mirror.cc
${ROCKSDB_SOURCE_DIR}/utilities/env_timed.cc
${ROCKSDB_SOURCE_DIR}/utilities/fault_injection_env.cc
${ROCKSDB_SOURCE_DIR}/utilities/fault_injection_fs.cc
${ROCKSDB_SOURCE_DIR}/utilities/fault_injection_secondary_cache.cc
${ROCKSDB_SOURCE_DIR}/utilities/leveldb_options/leveldb_options.cc
${ROCKSDB_SOURCE_DIR}/utilities/memory/memory_util.cc
${ROCKSDB_SOURCE_DIR}/utilities/merge_operators.cc
${ROCKSDB_SOURCE_DIR}/utilities/merge_operators/bytesxor.cc
${ROCKSDB_SOURCE_DIR}/utilities/merge_operators/max.cc
${ROCKSDB_SOURCE_DIR}/utilities/merge_operators/put.cc
@ -393,6 +363,7 @@ set(SOURCES
${ROCKSDB_SOURCE_DIR}/utilities/simulator_cache/sim_cache.cc
${ROCKSDB_SOURCE_DIR}/utilities/table_properties_collectors/compact_on_deletion_collector.cc
${ROCKSDB_SOURCE_DIR}/utilities/trace/file_trace_reader_writer.cc
${ROCKSDB_SOURCE_DIR}/utilities/trace/replayer_impl.cc
${ROCKSDB_SOURCE_DIR}/utilities/transactions/lock/lock_manager.cc
${ROCKSDB_SOURCE_DIR}/utilities/transactions/lock/point/point_lock_tracker.cc
${ROCKSDB_SOURCE_DIR}/utilities/transactions/lock/point/point_lock_manager.cc
@ -411,6 +382,7 @@ set(SOURCES
${ROCKSDB_SOURCE_DIR}/utilities/transactions/write_unprepared_txn.cc
${ROCKSDB_SOURCE_DIR}/utilities/transactions/write_unprepared_txn_db.cc
${ROCKSDB_SOURCE_DIR}/utilities/ttl/db_ttl_impl.cc
${ROCKSDB_SOURCE_DIR}/utilities/wal_filter.cc
${ROCKSDB_SOURCE_DIR}/utilities/write_batch_with_index/write_batch_with_index.cc
${ROCKSDB_SOURCE_DIR}/utilities/write_batch_with_index/write_batch_with_index_internal.cc
${ROCKSDB_SOURCE_DIR}/utilities/transactions/lock/range/range_tree/lib/locktree/concurrent_tree.cc
@ -425,7 +397,7 @@ set(SOURCES
${ROCKSDB_SOURCE_DIR}/utilities/transactions/lock/range/range_tree/lib/standalone_port.cc
${ROCKSDB_SOURCE_DIR}/utilities/transactions/lock/range/range_tree/lib/util/dbt.cc
${ROCKSDB_SOURCE_DIR}/utilities/transactions/lock/range/range_tree/lib/util/memarena.cc
rocksdb_build_version.cc)
build_version.cc) # generated by hand
if(ENABLE_SSE42 AND ENABLE_PCLMULQDQ)
set_source_files_properties(
@ -462,5 +434,6 @@ endif()
add_library(_rocksdb ${SOURCES})
add_library(ch_contrib::rocksdb ALIAS _rocksdb)
target_link_libraries(_rocksdb PRIVATE ${THIRDPARTY_LIBS} ${SYSTEM_LIBS})
# SYSTEM is required to overcome some issues
target_include_directories(_rocksdb SYSTEM BEFORE INTERFACE "${ROCKSDB_SOURCE_DIR}/include")

View File

@ -16,6 +16,9 @@ dpkg -i package_folder/clickhouse-client_*.deb
ln -s /usr/share/clickhouse-test/clickhouse-test /usr/bin/clickhouse-test
# shellcheck disable=SC1091
source /utils.lib
# install test configs
/usr/share/clickhouse-test/config/install.sh
@ -272,3 +275,5 @@ if [[ -n "$USE_DATABASE_REPLICATED" ]] && [[ "$USE_DATABASE_REPLICATED" -eq 1 ]]
mv /var/log/clickhouse-server/stderr1.log /test_output/ ||:
mv /var/log/clickhouse-server/stderr2.log /test_output/ ||:
fi
collect_core_dumps

View File

@ -12,8 +12,7 @@ MAX_RUN_TIME=$((MAX_RUN_TIME == 0 ? 10800 : MAX_RUN_TIME))
USE_DATABASE_REPLICATED=${USE_DATABASE_REPLICATED:=0}
USE_SHARED_CATALOG=${USE_SHARED_CATALOG:=0}
# disable for now
RUN_SEQUENTIAL_TESTS_IN_PARALLEL=0
RUN_SEQUENTIAL_TESTS_IN_PARALLEL=1
if [[ "$USE_DATABASE_REPLICATED" -eq 1 ]] || [[ "$USE_SHARED_CATALOG" -eq 1 ]]; then
RUN_SEQUENTIAL_TESTS_IN_PARALLEL=0
@ -310,7 +309,7 @@ function run_tests()
try_run_with_retry 10 clickhouse-client -q "insert into system.zookeeper (name, path, value) values ('auxiliary_zookeeper2', '/test/chroot/', '')"
set +e
timeout -s TERM --preserve-status 120m clickhouse-test --testname --shard --zookeeper --check-zookeeper-session --hung-check --print-time \
timeout -k 60m -s TERM --preserve-status 140m clickhouse-test --testname --shard --zookeeper --check-zookeeper-session --hung-check --print-time \
--no-drop-if-fail --test-runs "$NUM_TRIES" "${ADDITIONAL_OPTIONS[@]}" 2>&1 \
| ts '%Y-%m-%d %H:%M:%S' \
| tee -a test_output/test_result.txt
@ -321,7 +320,7 @@ export -f run_tests
# This should be enough to setup job and collect artifacts
TIMEOUT=$((MAX_RUN_TIME - 300))
TIMEOUT=$((MAX_RUN_TIME - 600))
if [ "$NUM_TRIES" -gt "1" ]; then
# We don't run tests with Ordinary database in PRs, only in master.
# So run new/changed tests with Ordinary at least once in flaky check.
@ -483,3 +482,5 @@ if [[ "$USE_SHARED_CATALOG" -eq 1 ]]; then
mv /var/log/clickhouse-server/stderr1.log /test_output/ ||:
tar -chf /test_output/coordination1.tar /var/lib/clickhouse1/coordination ||:
fi
collect_core_dumps

View File

@ -1,8 +1,5 @@
#!/bin/bash
# core.COMM.PID-TID
sysctl kernel.core_pattern='core.%e.%p-%P'
OK="\tOK\t\\N\t"
FAIL="\tFAIL\t\\N\t"
@ -315,12 +312,4 @@ function collect_query_and_trace_logs()
done
}
function collect_core_dumps()
{
find . -type f -maxdepth 1 -name 'core.*' | while read -r core; do
zstd --threads=0 "$core"
mv "$core.zst" /test_output/
done
}
# vi: ft=bash

View File

@ -1,5 +1,10 @@
#!/bin/bash
# core.COMM.PID-TID
sysctl kernel.core_pattern='core.%e.%p-%P'
# ASAN doesn't work with suid_dumpable=2
sysctl fs.suid_dumpable=1
function run_with_retry()
{
if [[ $- =~ e ]]; then
@ -48,4 +53,12 @@ function timeout_with_logging() {
return $exit_code
}
function collect_core_dumps()
{
find . -type f -maxdepth 1 -name 'core.*' | while read -r core; do
zstd --threads=0 "$core"
mv "$core.zst" /test_output/
done
}
# vi: ft=bash

View File

@ -21,6 +21,9 @@ source /attach_gdb.lib
# shellcheck source=../stateless/stress_tests.lib
source /stress_tests.lib
# shellcheck disable=SC1091
source /utils.lib
install_packages package_folder
# Thread Fuzzer allows to check more permutations of possible thread scheduling

View File

@ -16,7 +16,7 @@ sidebar_label: clickhouse-local
While `clickhouse-local` is a great tool for development and testing purposes, and for processing files, it is not suitable for serving end users or applications. In these scenarios, it is recommended to use the open-source [ClickHouse](https://clickhouse.com/docs/en/install). ClickHouse is a powerful OLAP database that is designed to handle large-scale analytical workloads. It provides fast and efficient processing of complex queries on large datasets, making it ideal for use in production environments where high-performance is critical. Additionally, ClickHouse offers a wide range of features such as replication, sharding, and high availability, which are essential for scaling up to handle large datasets and serving applications. If you need to handle larger datasets or serve end users or applications, we recommend using open-source ClickHouse instead of `clickhouse-local`.
Please read the docs below that show example use cases for `clickhouse-local`, such as [querying local CSVs](#query-data-in-a-csv-file-using-sql) or [reading a parquet file in S3](#query-data-in-a-parquet-file-in-aws-s3).
Please read the docs below that show example use cases for `clickhouse-local`, such as [querying local file](#query_data_in_file) or [reading a parquet file in S3](#query-data-in-a-parquet-file-in-aws-s3).
## Download clickhouse-local

View File

@ -18,7 +18,7 @@ ClickHouse also supports:
During aggregation, all `NULL` arguments are skipped. If the aggregation has several arguments it will ignore any row in which one or more of them are NULL.
There is an exception to this rule, which are the functions [`first_value`](../../sql-reference/aggregate-functions/reference/first_value.md), [`last_value`](../../sql-reference/aggregate-functions/reference/last_value.md) and their aliases when followed by the modifier `RESPECT NULLS`: `FIRST_VALUE(b) RESPECT NULLS`.
There is an exception to this rule, which are the functions [`first_value`](../../sql-reference/aggregate-functions/reference/first_value.md), [`last_value`](../../sql-reference/aggregate-functions/reference/last_value.md) and their aliases (`any` and `anyLast` respectively) when followed by the modifier `RESPECT NULLS`. For example, `FIRST_VALUE(b) RESPECT NULLS`.
**Examples:**

View File

@ -5,12 +5,12 @@ sidebar_position: 102
# any
Selects the first encountered value of a column.
Selects the first encountered value of a column, ignoring any `NULL` values.
**Syntax**
```sql
any(column)
any(column) [RESPECT NULLS]
```
Aliases: `any_value`, [`first_value`](../reference/first_value.md).
@ -20,7 +20,9 @@ Aliases: `any_value`, [`first_value`](../reference/first_value.md).
**Returned value**
By default, it ignores NULL values and returns the first NOT NULL value found in the column. Like [`first_value`](../../../sql-reference/aggregate-functions/reference/first_value.md) it supports `RESPECT NULLS`, in which case it will select the first value passed, independently on whether it's NULL or not.
:::note
Supports the `RESPECT NULLS` modifier after the function name. Using this modifier will ensure the function selects the first value passed, regardless of whether it is `NULL` or not.
:::
:::note
The return type of the function is the same as the input, except for LowCardinality which is discarded. This means that given no rows as input it will return the default value of that type (0 for integers, or Null for a Nullable() column). You might use the `-OrNull` [combinator](../../../sql-reference/aggregate-functions/combinators.md) ) to modify this behaviour.

View File

@ -1,44 +0,0 @@
---
slug: /en/sql-reference/aggregate-functions/reference/any_respect_nulls
sidebar_position: 103
---
# any_respect_nulls
Selects the first encountered value of a column, irregardless of whether it is a `NULL` value or not.
Alias: `any_value_respect_nulls`, `first_value_repect_nulls`.
**Syntax**
```sql
any_respect_nulls(column)
```
**Parameters**
- `column`: The column name.
**Returned value**
- The last value encountered, irregardless of whether it is a `NULL` value or not.
**Example**
Query:
```sql
CREATE TABLE any_nulls (city Nullable(String)) ENGINE=Log;
INSERT INTO any_nulls (city) VALUES (NULL), ('Amsterdam'), ('New York'), ('Tokyo'), ('Valencia'), (NULL);
SELECT any(city), any_respect_nulls(city) FROM any_nulls;
```
```response
┌─any(city)─┬─any_respect_nulls(city)─┐
│ Amsterdam │ ᴺᵁᴸᴸ │
└───────────┴─────────────────────────┘
```
**See Also**
- [any](../reference/any.md)

View File

@ -5,17 +5,21 @@ sidebar_position: 105
# anyLast
Selects the last value encountered. The result is just as indeterminate as for the [any](../../../sql-reference/aggregate-functions/reference/any.md) function.
Selects the last value encountered, ignoring any `NULL` values by default. The result is just as indeterminate as for the [any](../../../sql-reference/aggregate-functions/reference/any.md) function.
**Syntax**
```sql
anyLast(column)
anyLast(column) [RESPECT NULLS]
```
**Parameters**
- `column`: The column name.
:::note
Supports the `RESPECT NULLS` modifier after the function name. Using this modifier will ensure the function selects the first value passed, regardless of whether it is `NULL` or not.
:::
**Returned value**
- The last value encountered.

View File

@ -1,39 +0,0 @@
---
slug: /en/sql-reference/aggregate-functions/reference/anylast_respect_nulls
sidebar_position: 106
---
# anyLast_respect_nulls
Selects the last value encountered, irregardless of whether it is `NULL` or not.
**Syntax**
```sql
anyLast_respect_nulls(column)
```
**Parameters**
- `column`: The column name.
**Returned value**
- The last value encountered, irregardless of whether it is `NULL` or not.
**Example**
Query:
```sql
CREATE TABLE any_last_nulls (city Nullable(String)) ENGINE=Log;
INSERT INTO any_last_nulls (city) VALUES ('Amsterdam'),(NULL),('New York'),('Tokyo'),('Valencia'),(NULL);
SELECT anyLast(city), anyLast_respect_nulls(city) FROM any_last_nulls;
```
```response
┌─anyLast(city)─┬─anyLast_respect_nulls(city)─┐
│ Valencia │ ᴺᵁᴸᴸ │
└───────────────┴─────────────────────────────┘
```

View File

@ -45,10 +45,9 @@ ClickHouse-specific aggregate functions:
- [aggThrow](../reference/aggthrow.md)
- [analysisOfVariance](../reference/analysis_of_variance.md)
- [any](../reference/any_respect_nulls.md)
- [any](../reference/any.md)
- [anyHeavy](../reference/anyheavy.md)
- [anyLast](../reference/anylast.md)
- [anyLast](../reference/anylast_respect_nulls.md)
- [boundingRatio](../reference/boundrat.md)
- [first_value](../reference/first_value.md)
- [last_value](../reference/last_value.md)

View File

@ -1,6 +1,6 @@
---
slug: /en/sql-reference/data-types/dynamic
sidebar_position: 56
sidebar_position: 62
sidebar_label: Dynamic
---
@ -494,13 +494,43 @@ SELECT count(), dynamicType(d), _part FROM test GROUP BY _part, dynamicType(d) O
As we can see, ClickHouse kept the most frequent types `UInt64` and `Array(UInt64)` and casted all other types to `String`.
## JSONExtract functions with Dynamic
All `JSONExtract*` functions support `Dynamic` type:
```sql
SELECT JSONExtract('{"a" : [1, 2, 3]}', 'a', 'Dynamic') AS dynamic, dynamicType(dynamic) AS dynamic_type;
```
```text
┌─dynamic─┬─dynamic_type───────────┐
│ [1,2,3] │ Array(Nullable(Int64)) │
└─────────┴────────────────────────┘
```
```sql
SELECT JSONExtract('{"obj" : {"a" : 42, "b" : "Hello", "c" : [1,2,3]}}', 'obj', 'Map(String, Variant(UInt32, String, Array(UInt32)))') AS map_of_dynamics, mapApply((k, v) -> (k, variantType(v)), map_of_dynamics) AS map_of_dynamic_types```
```text
┌─map_of_dynamics──────────────────┬─map_of_dynamic_types────────────────────────────┐
│ {'a':42,'b':'Hello','c':[1,2,3]} │ {'a':'UInt32','b':'String','c':'Array(UInt32)'} │
└──────────────────────────────────┴─────────────────────────────────────────────────┘
```
```sql
SELECT JSONExtractKeysAndValues('{"a" : 42, "b" : "Hello", "c" : [1,2,3]}', 'Variant(UInt32, String, Array(UInt32))') AS dynamics, arrayMap(x -> (x.1, variantType(x.2)), dynamics) AS dynamic_types```
```
```text
┌─dynamics───────────────────────────────┬─dynamic_types─────────────────────────────────────────┐
│ [('a',42),('b','Hello'),('c',[1,2,3])] │ [('a','UInt32'),('b','String'),('c','Array(UInt32)')] │
└────────────────────────────────────────┴───────────────────────────────────────────────────────┘
```
### Binary output format
In [RowBinary](../../interfaces/formats.md#rowbinary-rowbinary) format values of `Dynamic` type are serialized in the following format:
In RowBinary format values of `Dynamic` type are serialized in the following format:
```text
<binary_encoded_data_type><value_in_binary_format_according_to_the_data_type>
```
See the [data types binary encoding specification](../../sql-reference/data-types/data-types-binary-encoding.md)

View File

@ -5,11 +5,11 @@ sidebar_label: Object Data Type
keywords: [object, data type]
---
# Object Data Type
# Object Data Type (deprecated)
:::note
This feature is not production-ready and is now deprecated. If you need to work with JSON documents, consider using [this guide](/docs/en/integrations/data-ingestion/data-formats/json) instead. A new implementation to support JSON object is in progress and can be tracked [here](https://github.com/ClickHouse/ClickHouse/issues/54864)
:::
**This feature is not production-ready and is now deprecated.** If you need to work with JSON documents, consider using [this guide](/docs/en/integrations/data-ingestion/data-formats/json) instead. A new implementation to support JSON object is in progress and can be tracked [here](https://github.com/ClickHouse/ClickHouse/issues/54864).
<hr />
Stores JavaScript Object Notation (JSON) documents in a single column.

View File

@ -3080,4 +3080,4 @@ Result:
## Distance functions
All supported functions are described in [distance functions documentation](../../sql-reference/functions/distance-functions.md).
All supported functions are described in [distance functions documentation](../../sql-reference/functions/distance-functions.md).

View File

@ -314,10 +314,71 @@ SELECT groupBitXor(cityHash64(*)) FROM table
Calculates a 32-bit hash code from any type of integer.
This is a relatively fast non-cryptographic hash function of average quality for numbers.
**Syntax**
```sql
intHash32(int)
```
**Arguments**
- `int` — Integer to hash. [(U)Int*](../data-types/int-uint.md).
**Returned value**
- 32-bit hash code. [UInt32](../data-types/int-uint.md).
**Example**
Query:
```sql
SELECT intHash32(42);
```
Result:
```response
┌─intHash32(42)─┐
│ 1228623923 │
└───────────────┘
```
## intHash64
Calculates a 64-bit hash code from any type of integer.
It works faster than intHash32. Average quality.
This is a relatively fast non-cryptographic hash function of average quality for numbers.
It works faster than [intHash32](#inthash32).
**Syntax**
```sql
intHash64(int)
```
**Arguments**
- `int` — Integer to hash. [(U)Int*](../data-types/int-uint.md).
**Returned value**
- 64-bit hash code. [UInt64](../data-types/int-uint.md).
**Example**
Query:
```sql
SELECT intHash64(42);
```
Result:
```response
┌────────intHash64(42)─┐
│ 11490350930367293593 │
└──────────────────────┘
```
## SHA1, SHA224, SHA256, SHA512, SHA512_256

View File

@ -58,6 +58,8 @@ KILL QUERY WHERE query_id='2-857d-4a57-9ee0-327da5d60a90'
KILL QUERY WHERE user='username' SYNC
```
:::tip If you are killing a query in ClickHouse Cloud or in a self-managed cluster, then be sure to use the ```ON CLUSTER [cluster-name]``` option, in order to ensure the query is killed on all replicas:::
Read-only users can only stop their own queries.
By default, the asynchronous version of queries is used (`ASYNC`), which does not wait for confirmation that queries have stopped.
@ -131,6 +133,7 @@ KILL MUTATION WHERE database = 'default' AND table = 'table'
-- Cancel the specific mutation:
KILL MUTATION WHERE database = 'default' AND table = 'table' AND mutation_id = 'mutation_3.txt'
```
:::tip If you are killing a mutation in ClickHouse Cloud or in a self-managed cluster, then be sure to use the ```ON CLUSTER [cluster-name]``` option, in order to ensure the mutation is killed on all replicas:::
The query is useful when a mutation is stuck and cannot finish (e.g. if some function in the mutation query throws an exception when applied to the data contained in the table).

View File

@ -0,0 +1,73 @@
---
slug: /en/sql-reference/window-functions/dense_rank
sidebar_label: dense_rank
sidebar_position: 7
---
# dense_rank
Ranks the current row within its partition without gaps. In other words, if the value of any new row encountered is equal to the value of one of the previous rows then it will receive the next successive rank without any gaps in ranking.
The [rank](./rank.md) function provides the same behaviour, but with gaps in ranking.
**Syntax**
```sql
dense_rank (column_name)
OVER ([[PARTITION BY grouping_column] [ORDER BY sorting_column]
[ROWS or RANGE expression_to_bound_rows_withing_the_group]] | [window_name])
FROM table_name
WINDOW window_name as ([[PARTITION BY grouping_column] [ORDER BY sorting_column])
```
For more detail on window function syntax see: [Window Functions - Syntax](./index.md/#syntax).
**Returned value**
- A number for the current row within its partition, without gaps in ranking. [UInt64](../data-types/int-uint.md).
**Example**
The following example is based on the example provided in the video instructional [Ranking window functions in ClickHouse](https://youtu.be/Yku9mmBYm_4?si=XIMu1jpYucCQEoXA).
Query:
```sql
CREATE TABLE salaries
(
`team` String,
`player` String,
`salary` UInt32,
`position` String
)
Engine = Memory;
INSERT INTO salaries FORMAT Values
('Port Elizabeth Barbarians', 'Gary Chen', 195000, 'F'),
('New Coreystad Archdukes', 'Charles Juarez', 190000, 'F'),
('Port Elizabeth Barbarians', 'Michael Stanley', 150000, 'D'),
('New Coreystad Archdukes', 'Scott Harrison', 150000, 'D'),
('Port Elizabeth Barbarians', 'Robert George', 195000, 'M'),
('South Hampton Seagulls', 'Douglas Benson', 150000, 'M'),
('South Hampton Seagulls', 'James Henderson', 140000, 'M');
```
```sql
SELECT player, salary,
dense_rank() OVER (ORDER BY salary DESC) AS dense_rank
FROM salaries;
```
Result:
```response
┌─player──────────┬─salary─┬─dense_rank─┐
1. │ Gary Chen │ 195000 │ 1 │
2. │ Robert George │ 195000 │ 1 │
3. │ Charles Juarez │ 190000 │ 2 │
4. │ Michael Stanley │ 150000 │ 3 │
5. │ Douglas Benson │ 150000 │ 3 │
6. │ Scott Harrison │ 150000 │ 3 │
7. │ James Henderson │ 140000 │ 4 │
└─────────────────┴────────┴────────────┘
```

View File

@ -0,0 +1,79 @@
---
slug: /en/sql-reference/window-functions/first_value
sidebar_label: first_value
sidebar_position: 3
---
# first_value
Returns the first value evaluated within its ordered frame. By default, NULL arguments are skipped, however the `RESPECT NULLS` modifier can be used to override this behaviour.
**Syntax**
```sql
first_value (column_name) [[RESPECT NULLS] | [IGNORE NULLS]]
OVER ([[PARTITION BY grouping_column] [ORDER BY sorting_column]
[ROWS or RANGE expression_to_bound_rows_withing_the_group]] | [window_name])
FROM table_name
WINDOW window_name as ([[PARTITION BY grouping_column] [ORDER BY sorting_column])
```
Alias: `any`.
:::note
Using the optional modifier `RESPECT NULLS` after `first_value(column_name)` will ensure that `NULL` arguments are not skipped.
See [NULL processing](../aggregate-functions/index.md/#null-processing) for more information.
:::
For more detail on window function syntax see: [Window Functions - Syntax](./index.md/#syntax).
**Returned value**
- The first value evaluated within its ordered frame.
**Example**
In this example the `first_value` function is used to find the highest paid footballer from a fictional dataset of salaries of Premier League football players.
Query:
```sql
DROP TABLE IF EXISTS salaries;
CREATE TABLE salaries
(
`team` String,
`player` String,
`salary` UInt32,
`position` String
)
Engine = Memory;
INSERT INTO salaries FORMAT Values
('Port Elizabeth Barbarians', 'Gary Chen', 196000, 'F'),
('New Coreystad Archdukes', 'Charles Juarez', 190000, 'F'),
('Port Elizabeth Barbarians', 'Michael Stanley', 100000, 'D'),
('New Coreystad Archdukes', 'Scott Harrison', 180000, 'D'),
('Port Elizabeth Barbarians', 'Robert George', 195000, 'M'),
('South Hampton Seagulls', 'Douglas Benson', 150000, 'M'),
('South Hampton Seagulls', 'James Henderson', 140000, 'M');
```
```sql
SELECT player, salary,
first_value(player) OVER (ORDER BY salary DESC) AS highest_paid_player
FROM salaries;
```
Result:
```response
┌─player──────────┬─salary─┬─highest_paid_player─┐
1. │ Gary Chen │ 196000 │ Gary Chen │
2. │ Robert George │ 195000 │ Gary Chen │
3. │ Charles Juarez │ 190000 │ Gary Chen │
4. │ Scott Harrison │ 180000 │ Gary Chen │
5. │ Douglas Benson │ 150000 │ Gary Chen │
6. │ James Henderson │ 140000 │ Gary Chen │
7. │ Michael Stanley │ 100000 │ Gary Chen │
└─────────────────┴────────┴─────────────────────┘
```

View File

@ -1,10 +1,11 @@
---
slug: /en/sql-reference/window-functions/
sidebar_position: 62
sidebar_label: Window Functions
title: Window Functions
sidebar_position: 1
---
# Window Functions
Windows functions let you perform calculations across a set of rows that are related to the current row.
Some of the calculations that you can do are similar to those that can be done with an aggregate function, but a window function doesn't cause rows to be grouped into a single output - the individual rows are still returned.
@ -12,8 +13,8 @@ Some of the calculations that you can do are similar to those that can be done w
ClickHouse supports the standard grammar for defining windows and window functions. The table below indicates whether a feature is currently supported.
| Feature | Supported? |
|------------------------------------------------------------------------------------|---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
| Feature | Supported? |
|--------------------------------------------------------------------------|---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
| ad hoc window specification (`count(*) over (partition by id order by time desc)`) | ✅ |
| expressions involving window functions, e.g. `(count(*) over ()) / 2)` | ✅ |
| `WINDOW` clause (`select ... from table window w as (partition by id)`) | ✅ |
@ -75,14 +76,14 @@ WINDOW window_name as ([[PARTITION BY grouping_column] [ORDER BY sorting_column]
These functions can be used only as a window function.
- `row_number()` - Number the current row within its partition starting from 1.
- `first_value(x)` - Return the first non-NULL value evaluated within its ordered frame.
- `last_value(x)` - Return the last non-NULL value evaluated within its ordered frame.
- `nth_value(x, offset)` - Return the first non-NULL value evaluated against the nth row (offset) in its ordered frame.
- `rank()` - Rank the current row within its partition with gaps.
- `dense_rank()` - Rank the current row within its partition without gaps.
- `lagInFrame(x[, offset[, default]])` - Return a value evaluated at the row that is at a specified physical offset row before the current row within the ordered frame. The offset parameter, if not specified, defaults to 1, meaning it will fetch the value from the next row. If the calculated row exceeds the boundaries of the window frame, the specified default value is returned.
- `leadInFrame(x[, offset[, default]])` - Return a value evaluated at the row that is offset rows after the current row within the ordered frame. If offset is not provided, it defaults to 1. If the offset leads to a position outside the window frame, the specified default value is used.
- [`row_number()`](./row_number.md) - Number the current row within its partition starting from 1.
- [`first_value(x)`](./first_value.md) - Return the first value evaluated within its ordered frame.
- [`last_value(x)`](./last_value.md) - Return the last value evaluated within its ordered frame.
- [`nth_value(x, offset)`](./nth_value.md) - Return the first non-NULL value evaluated against the nth row (offset) in its ordered frame.
- [`rank()`](./rank.md) - Rank the current row within its partition with gaps.
- [`dense_rank()`](./dense_rank.md) - Rank the current row within its partition without gaps.
- [`lagInFrame(x)`](./lagInFrame.md) - Return a value evaluated at the row that is at a specified physical offset row before the current row within the ordered frame.
- [`leadInFrame(x)`](./leadInFrame.md) - Return a value evaluated at the row that is offset rows after the current row within the ordered frame.
## Examples

View File

@ -0,0 +1,79 @@
---
slug: /en/sql-reference/window-functions/lagInFrame
sidebar_label: lagInFrame
sidebar_position: 8
---
# lagInFrame
Returns a value evaluated at the row that is at a specified physical offset row before the current row within the ordered frame.
**Syntax**
```sql
lagInFrame(x[, offset[, default]])
OVER ([[PARTITION BY grouping_column] [ORDER BY sorting_column]
[ROWS or RANGE expression_to_bound_rows_withing_the_group]] | [window_name])
FROM table_name
WINDOW window_name as ([[PARTITION BY grouping_column] [ORDER BY sorting_column])
```
For more detail on window function syntax see: [Window Functions - Syntax](./index.md/#syntax).
**Parameters**
- `x` — Column name.
- `offset` — Offset to apply. [(U)Int*](../data-types/int-uint.md). (Optional - `1` by default).
- `default` — Value to return if calculated row exceeds the boundaries of the window frame. (Optional - `null` by default).
**Returned value**
- Value evaluated at the row that is at a specified physical offset before the current row within the ordered frame.
**Example**
This example looks at historical data for a specific stock and uses the `lagInFrame` function to calculate a day-to-day delta and percentage change in the closing price of the stock.
Query:
```sql
CREATE TABLE stock_prices
(
`date` Date,
`open` Float32, -- opening price
`high` Float32, -- daily high
`low` Float32, -- daily low
`close` Float32, -- closing price
`volume` UInt32 -- trade volume
)
Engine = Memory;
INSERT INTO stock_prices FORMAT Values
('2024-06-03', 113.62, 115.00, 112.00, 115.00, 438392000),
('2024-06-04', 115.72, 116.60, 114.04, 116.44, 403324000),
('2024-06-05', 118.37, 122.45, 117.47, 122.44, 528402000),
('2024-06-06', 124.05, 125.59, 118.32, 121.00, 664696000),
('2024-06-07', 119.77, 121.69, 118.02, 120.89, 412386000);
```
```sql
SELECT
date,
close,
lagInFrame(close, 1, close) OVER (ORDER BY date ASC) AS previous_day_close,
COALESCE(ROUND(close - previous_day_close, 2)) AS delta,
COALESCE(ROUND((delta / previous_day_close) * 100, 2)) AS percent_change
FROM stock_prices
ORDER BY date DESC;
```
Result:
```response
┌───────date─┬──close─┬─previous_day_close─┬─delta─┬─percent_change─┐
1. │ 2024-06-07 │ 120.89 │ 121 │ -0.11 │ -0.09 │
2. │ 2024-06-06 │ 121 │ 122.44 │ -1.44 │ -1.18 │
3. │ 2024-06-05 │ 122.44 │ 116.44 │ 6 │ 5.15 │
4. │ 2024-06-04 │ 116.44 │ 115 │ 1.44 │ 1.25 │
5. │ 2024-06-03 │ 115 │ 115 │ 0 │ 0 │
└────────────┴────────┴────────────────────┴───────┴────────────────┘
```

View File

@ -0,0 +1,79 @@
---
slug: /en/sql-reference/window-functions/last_value
sidebar_label: last_value
sidebar_position: 4
---
# last_value
Returns the last value evaluated within its ordered frame. By default, NULL arguments are skipped, however the `RESPECT NULLS` modifier can be used to override this behaviour.
**Syntax**
```sql
last_value (column_name) [[RESPECT NULLS] | [IGNORE NULLS]]
OVER ([[PARTITION BY grouping_column] [ORDER BY sorting_column]
[ROWS or RANGE expression_to_bound_rows_withing_the_group]] | [window_name])
FROM table_name
WINDOW window_name as ([[PARTITION BY grouping_column] [ORDER BY sorting_column])
```
Alias: `anyLast`.
:::note
Using the optional modifier `RESPECT NULLS` after `first_value(column_name)` will ensure that `NULL` arguments are not skipped.
See [NULL processing](../aggregate-functions/index.md/#null-processing) for more information.
:::
For more detail on window function syntax see: [Window Functions - Syntax](./index.md/#syntax).
**Returned value**
- The last value evaluated within its ordered frame.
**Example**
In this example the `last_value` function is used to find the highest paid footballer from a fictional dataset of salaries of Premier League football players.
Query:
```sql
DROP TABLE IF EXISTS salaries;
CREATE TABLE salaries
(
`team` String,
`player` String,
`salary` UInt32,
`position` String
)
Engine = Memory;
INSERT INTO salaries FORMAT Values
('Port Elizabeth Barbarians', 'Gary Chen', 196000, 'F'),
('New Coreystad Archdukes', 'Charles Juarez', 190000, 'F'),
('Port Elizabeth Barbarians', 'Michael Stanley', 100000, 'D'),
('New Coreystad Archdukes', 'Scott Harrison', 180000, 'D'),
('Port Elizabeth Barbarians', 'Robert George', 195000, 'M'),
('South Hampton Seagulls', 'Douglas Benson', 150000, 'M'),
('South Hampton Seagulls', 'James Henderson', 140000, 'M');
```
```sql
SELECT player, salary,
last_value(player) OVER (ORDER BY salary DESC RANGE BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) AS lowest_paid_player
FROM salaries;
```
Result:
```response
┌─player──────────┬─salary─┬─lowest_paid_player─┐
1. │ Gary Chen │ 196000 │ Michael Stanley │
2. │ Robert George │ 195000 │ Michael Stanley │
3. │ Charles Juarez │ 190000 │ Michael Stanley │
4. │ Scott Harrison │ 180000 │ Michael Stanley │
5. │ Douglas Benson │ 150000 │ Michael Stanley │
6. │ James Henderson │ 140000 │ Michael Stanley │
7. │ Michael Stanley │ 100000 │ Michael Stanley │
└─────────────────┴────────┴────────────────────┘
```

View File

@ -0,0 +1,60 @@
---
slug: /en/sql-reference/window-functions/leadInFrame
sidebar_label: leadInFrame
sidebar_position: 9
---
# leadInFrame
Returns a value evaluated at the row that is offset rows after the current row within the ordered frame.
**Syntax**
```sql
leadInFrame(x[, offset[, default]])
OVER ([[PARTITION BY grouping_column] [ORDER BY sorting_column]
[ROWS or RANGE expression_to_bound_rows_withing_the_group]] | [window_name])
FROM table_name
WINDOW window_name as ([[PARTITION BY grouping_column] [ORDER BY sorting_column])
```
For more detail on window function syntax see: [Window Functions - Syntax](./index.md/#syntax).
**Parameters**
- `x` — Column name.
- `offset` — Offset to apply. [(U)Int*](../data-types/int-uint.md). (Optional - `1` by default).
- `default` — Value to return if calculated row exceeds the boundaries of the window frame. (Optional - `null` by default).
**Returned value**
- value evaluated at the row that is offset rows after the current row within the ordered frame.
**Example**
This example looks at [historical data](https://www.kaggle.com/datasets/sazidthe1/nobel-prize-data) for Nobel Prize winners and uses the `leadInFrame` function to return a list of successive winners in the physics category.
Query:
```sql
CREATE OR REPLACE VIEW nobel_prize_laureates AS FROM file('nobel_laureates_data.csv') SELECT *;
```
```sql
FROM nobel_prize_laureates SELECT fullName, leadInFrame(year, 1, year) OVER (PARTITION BY category ORDER BY year) AS year, category, motivation WHERE category == 'physics' ORDER BY year DESC LIMIT 9;
```
Result:
```response
┌─fullName─────────┬─year─┬─category─┬─motivation─────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┐
1. │ Pierre Agostini │ 2023 │ physics │ for experimental methods that generate attosecond pulses of light for the study of electron dynamics in matter │
2. │ Ferenc Krausz │ 2023 │ physics │ for experimental methods that generate attosecond pulses of light for the study of electron dynamics in matter │
3. │ Anne L Huillier │ 2023 │ physics │ for experimental methods that generate attosecond pulses of light for the study of electron dynamics in matter │
4. │ Alain Aspect │ 2022 │ physics │ for experiments with entangled photons establishing the violation of Bell inequalities and pioneering quantum information science │
5. │ Anton Zeilinger │ 2022 │ physics │ for experiments with entangled photons establishing the violation of Bell inequalities and pioneering quantum information science │
6. │ John Clauser │ 2022 │ physics │ for experiments with entangled photons establishing the violation of Bell inequalities and pioneering quantum information science │
7. │ Syukuro Manabe │ 2021 │ physics │ for the physical modelling of Earths climate quantifying variability and reliably predicting global warming │
8. │ Klaus Hasselmann │ 2021 │ physics │ for the physical modelling of Earths climate quantifying variability and reliably predicting global warming │
9. │ Giorgio Parisi │ 2021 │ physics │ for the discovery of the interplay of disorder and fluctuations in physical systems from atomic to planetary scales │
└──────────────────┴──────┴──────────┴────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┘
```

View File

@ -0,0 +1,75 @@
---
slug: /en/sql-reference/window-functions/nth_value
sidebar_label: nth_value
sidebar_position: 5
---
# nth_value
Returns the first non-NULL value evaluated against the nth row (offset) in its ordered frame.
**Syntax**
```sql
nth_value (x, offset)
OVER ([[PARTITION BY grouping_column] [ORDER BY sorting_column]
[ROWS or RANGE expression_to_bound_rows_withing_the_group]] | [window_name])
FROM table_name
WINDOW window_name as ([[PARTITION BY grouping_column] [ORDER BY sorting_column])
```
For more detail on window function syntax see: [Window Functions - Syntax](./index.md/#syntax).
**Parameters**
- `x` — Column name.
- `offset` — nth row to evaluate current row against.
**Returned value**
- The first non-NULL value evaluated against the nth row (offset) in its ordered frame.
**Example**
In this example the `nth-value` function is used to find the third-highest salary from a fictional dataset of salaries of Premier League football players.
Query:
```sql
DROP TABLE IF EXISTS salaries;
CREATE TABLE salaries
(
`team` String,
`player` String,
`salary` UInt32,
`position` String
)
Engine = Memory;
INSERT INTO salaries FORMAT Values
('Port Elizabeth Barbarians', 'Gary Chen', 195000, 'F'),
('New Coreystad Archdukes', 'Charles Juarez', 190000, 'F'),
('Port Elizabeth Barbarians', 'Michael Stanley', 100000, 'D'),
('New Coreystad Archdukes', 'Scott Harrison', 180000, 'D'),
('Port Elizabeth Barbarians', 'Robert George', 195000, 'M'),
('South Hampton Seagulls', 'Douglas Benson', 150000, 'M'),
('South Hampton Seagulls', 'James Henderson', 140000, 'M');
```
```sql
SELECT player, salary, nth_value(player,3) OVER(ORDER BY salary DESC) AS third_highest_salary FROM salaries;
```
Result:
```response
┌─player──────────┬─salary─┬─third_highest_salary─┐
1. │ Gary Chen │ 195000 │ │
2. │ Robert George │ 195000 │ │
3. │ Charles Juarez │ 190000 │ Charles Juarez │
4. │ Scott Harrison │ 180000 │ Charles Juarez │
5. │ Douglas Benson │ 150000 │ Charles Juarez │
6. │ James Henderson │ 140000 │ Charles Juarez │
7. │ Michael Stanley │ 100000 │ Charles Juarez │
└─────────────────┴────────┴──────────────────────┘
```

View File

@ -0,0 +1,74 @@
---
slug: /en/sql-reference/window-functions/rank
sidebar_label: rank
sidebar_position: 6
---
# rank
Ranks the current row within its partition with gaps. In other words, if the value of any row it encounters is equal to the value of a previous row then it will receive the same rank as that previous row.
The rank of the next row is then equal to the rank of the previous row plus a gap equal to the number of times the previous rank was given.
The [dense_rank](./dense_rank.md) function provides the same behaviour but without gaps in ranking.
**Syntax**
```sql
rank (column_name)
OVER ([[PARTITION BY grouping_column] [ORDER BY sorting_column]
[ROWS or RANGE expression_to_bound_rows_withing_the_group]] | [window_name])
FROM table_name
WINDOW window_name as ([[PARTITION BY grouping_column] [ORDER BY sorting_column])
```
For more detail on window function syntax see: [Window Functions - Syntax](./index.md/#syntax).
**Returned value**
- A number for the current row within its partition, including gaps. [UInt64](../data-types/int-uint.md).
**Example**
The following example is based on the example provided in the video instructional [Ranking window functions in ClickHouse](https://youtu.be/Yku9mmBYm_4?si=XIMu1jpYucCQEoXA).
Query:
```sql
CREATE TABLE salaries
(
`team` String,
`player` String,
`salary` UInt32,
`position` String
)
Engine = Memory;
INSERT INTO salaries FORMAT Values
('Port Elizabeth Barbarians', 'Gary Chen', 195000, 'F'),
('New Coreystad Archdukes', 'Charles Juarez', 190000, 'F'),
('Port Elizabeth Barbarians', 'Michael Stanley', 150000, 'D'),
('New Coreystad Archdukes', 'Scott Harrison', 150000, 'D'),
('Port Elizabeth Barbarians', 'Robert George', 195000, 'M'),
('South Hampton Seagulls', 'Douglas Benson', 150000, 'M'),
('South Hampton Seagulls', 'James Henderson', 140000, 'M');
```
```sql
SELECT player, salary,
rank() OVER (ORDER BY salary DESC) AS rank
FROM salaries;
```
Result:
```response
┌─player──────────┬─salary─┬─rank─┐
1. │ Gary Chen │ 195000 │ 1 │
2. │ Robert George │ 195000 │ 1 │
3. │ Charles Juarez │ 190000 │ 3 │
4. │ Douglas Benson │ 150000 │ 4 │
5. │ Michael Stanley │ 150000 │ 4 │
6. │ Scott Harrison │ 150000 │ 4 │
7. │ James Henderson │ 140000 │ 7 │
└─────────────────┴────────┴──────┘
```

View File

@ -0,0 +1,67 @@
---
slug: /en/sql-reference/window-functions/row_number
sidebar_label: row_number
sidebar_position: 2
---
# row_number
Numbers the current row within its partition starting from 1.
**Syntax**
```sql
row_number (column_name)
OVER ([[PARTITION BY grouping_column] [ORDER BY sorting_column]
[ROWS or RANGE expression_to_bound_rows_withing_the_group]] | [window_name])
FROM table_name
WINDOW window_name as ([[PARTITION BY grouping_column] [ORDER BY sorting_column])
```
For more detail on window function syntax see: [Window Functions - Syntax](./index.md/#syntax).
**Returned value**
- A number for the current row within its partition. [UInt64](../data-types/int-uint.md).
**Example**
The following example is based on the example provided in the video instructional [Ranking window functions in ClickHouse](https://youtu.be/Yku9mmBYm_4?si=XIMu1jpYucCQEoXA).
Query:
```sql
CREATE TABLE salaries
(
`team` String,
`player` String,
`salary` UInt32,
`position` String
)
Engine = Memory;
INSERT INTO salaries FORMAT Values
('Port Elizabeth Barbarians', 'Gary Chen', 195000, 'F'),
('New Coreystad Archdukes', 'Charles Juarez', 190000, 'F'),
('Port Elizabeth Barbarians', 'Michael Stanley', 150000, 'D'),
('New Coreystad Archdukes', 'Scott Harrison', 150000, 'D'),
('Port Elizabeth Barbarians', 'Robert George', 195000, 'M');
```
```sql
SELECT player, salary,
row_number() OVER (ORDER BY salary DESC) AS row_number
FROM salaries;
```
Result:
```response
┌─player──────────┬─salary─┬─row_number─┐
1. │ Gary Chen │ 195000 │ 1 │
2. │ Robert George │ 195000 │ 2 │
3. │ Charles Juarez │ 190000 │ 3 │
4. │ Scott Harrison │ 150000 │ 4 │
5. │ Michael Stanley │ 150000 │ 5 │
└─────────────────┴────────┴────────────┘
```

View File

@ -35,10 +35,9 @@ disable = '''
broad-except,
bare-except,
no-else-return,
global-statement
global-statement,
'''
[tool.pylint.SIMILARITIES]
# due to SQL
min-similarity-lines=1000

View File

@ -8,6 +8,7 @@ namespace ProfileEvents
extern const Event DistributedConnectionUsable;
extern const Event DistributedConnectionMissingTable;
extern const Event DistributedConnectionStaleReplica;
extern const Event DistributedConnectionFailTry;
}
namespace DB
@ -97,6 +98,8 @@ void ConnectionEstablisher::run(ConnectionEstablisher::TryResult & result, std::
}
catch (const Exception & e)
{
ProfileEvents::increment(ProfileEvents::DistributedConnectionFailTry);
if (e.code() != ErrorCodes::NETWORK_ERROR && e.code() != ErrorCodes::SOCKET_TIMEOUT
&& e.code() != ErrorCodes::ATTEMPT_TO_READ_AFTER_EOF && e.code() != ErrorCodes::DNS_ERROR)
throw;

View File

@ -7,7 +7,6 @@
namespace ProfileEvents
{
extern const Event HedgedRequestsChangeReplica;
extern const Event DistributedConnectionFailTry;
extern const Event DistributedConnectionFailAtAll;
}
@ -327,7 +326,6 @@ HedgedConnectionsFactory::State HedgedConnectionsFactory::processFinishedConnect
{
ShuffledPool & shuffled_pool = shuffled_pools[index];
LOG_INFO(log, "Connection failed at try №{}, reason: {}", (shuffled_pool.error_count + 1), fail_message);
ProfileEvents::increment(ProfileEvents::DistributedConnectionFailTry);
shuffled_pool.error_count = std::min(pool->getMaxErrorCup(), shuffled_pool.error_count + 1);
shuffled_pool.slowdown_count = 0;

View File

@ -267,7 +267,11 @@ bool ColumnAggregateFunction::structureEquals(const IColumn & to) const
}
#if !defined(ABORT_ON_LOGICAL_ERROR)
void ColumnAggregateFunction::insertRangeFrom(const IColumn & from, size_t start, size_t length)
#else
void ColumnAggregateFunction::doInsertRangeFrom(const IColumn & from, size_t start, size_t length)
#endif
{
const ColumnAggregateFunction & from_concrete = assert_cast<const ColumnAggregateFunction &>(from);
@ -462,7 +466,11 @@ void ColumnAggregateFunction::insertFromWithOwnership(const IColumn & from, size
insertMergeFrom(from, n);
}
#if !defined(ABORT_ON_LOGICAL_ERROR)
void ColumnAggregateFunction::insertFrom(const IColumn & from, size_t n)
#else
void ColumnAggregateFunction::doInsertFrom(const IColumn & from, size_t n)
#endif
{
insertRangeFrom(from, n, 1);
}

View File

@ -145,7 +145,14 @@ public:
void insertData(const char * pos, size_t length) override;
#if !defined(ABORT_ON_LOGICAL_ERROR)
void insertFrom(const IColumn & from, size_t n) override;
#else
using IColumn::insertFrom;
void doInsertFrom(const IColumn & from, size_t n) override;
#endif
void insertFrom(ConstAggregateDataPtr place);
@ -182,7 +189,11 @@ public:
void protect() override;
#if !defined(ABORT_ON_LOGICAL_ERROR)
void insertRangeFrom(const IColumn & from, size_t start, size_t length) override;
#else
void doInsertRangeFrom(const IColumn & from, size_t start, size_t length) override;
#endif
void popBack(size_t n) override;
@ -201,7 +212,11 @@ public:
MutableColumns scatter(ColumnIndex num_columns, const Selector & selector) const override;
#if !defined(ABORT_ON_LOGICAL_ERROR)
int compareAt(size_t, size_t, const IColumn &, int) const override
#else
int doCompareAt(size_t, size_t, const IColumn &, int) const override
#endif
{
return 0;
}

View File

@ -337,7 +337,11 @@ bool ColumnArray::tryInsert(const Field & x)
return true;
}
#if !defined(ABORT_ON_LOGICAL_ERROR)
void ColumnArray::insertFrom(const IColumn & src_, size_t n)
#else
void ColumnArray::doInsertFrom(const IColumn & src_, size_t n)
#endif
{
const ColumnArray & src = assert_cast<const ColumnArray &>(src_);
size_t size = src.sizeAt(n);
@ -392,7 +396,11 @@ int ColumnArray::compareAtImpl(size_t n, size_t m, const IColumn & rhs_, int nan
: 1);
}
#if !defined(ABORT_ON_LOGICAL_ERROR)
int ColumnArray::compareAt(size_t n, size_t m, const IColumn & rhs_, int nan_direction_hint) const
#else
int ColumnArray::doCompareAt(size_t n, size_t m, const IColumn & rhs_, int nan_direction_hint) const
#endif
{
return compareAtImpl(n, m, rhs_, nan_direction_hint);
}
@ -535,7 +543,11 @@ void ColumnArray::getExtremes(Field & min, Field & max) const
}
#if !defined(ABORT_ON_LOGICAL_ERROR)
void ColumnArray::insertRangeFrom(const IColumn & src, size_t start, size_t length)
#else
void ColumnArray::doInsertRangeFrom(const IColumn & src, size_t start, size_t length)
#endif
{
if (length == 0)
return;

View File

@ -84,10 +84,18 @@ public:
void updateHashWithValue(size_t n, SipHash & hash) const override;
void updateWeakHash32(WeakHash32 & hash) const override;
void updateHashFast(SipHash & hash) const override;
#if !defined(ABORT_ON_LOGICAL_ERROR)
void insertRangeFrom(const IColumn & src, size_t start, size_t length) override;
#else
void doInsertRangeFrom(const IColumn & src, size_t start, size_t length) override;
#endif
void insert(const Field & x) override;
bool tryInsert(const Field & x) override;
#if !defined(ABORT_ON_LOGICAL_ERROR)
void insertFrom(const IColumn & src_, size_t n) override;
#else
void doInsertFrom(const IColumn & src_, size_t n) override;
#endif
void insertDefault() override;
void popBack(size_t n) override;
ColumnPtr filter(const Filter & filt, ssize_t result_size_hint) const override;
@ -95,7 +103,11 @@ public:
ColumnPtr permute(const Permutation & perm, size_t limit) const override;
ColumnPtr index(const IColumn & indexes, size_t limit) const override;
template <typename Type> ColumnPtr indexImpl(const PaddedPODArray<Type> & indexes, size_t limit) const;
#if !defined(ABORT_ON_LOGICAL_ERROR)
int compareAt(size_t n, size_t m, const IColumn & rhs_, int nan_direction_hint) const override;
#else
int doCompareAt(size_t n, size_t m, const IColumn & rhs_, int nan_direction_hint) const override;
#endif
int compareAtWithCollation(size_t n, size_t m, const IColumn & rhs_, int nan_direction_hint, const Collator & collator) const override;
void getPermutation(PermutationSortDirection direction, PermutationSortStability stability,
size_t limit, int nan_direction_hint, Permutation & res) const override;

View File

@ -85,7 +85,11 @@ public:
bool isDefaultAt(size_t) const override { throwMustBeDecompressed(); }
void insert(const Field &) override { throwMustBeDecompressed(); }
bool tryInsert(const Field &) override { throwMustBeDecompressed(); }
#if !defined(ABORT_ON_LOGICAL_ERROR)
void insertRangeFrom(const IColumn &, size_t, size_t) override { throwMustBeDecompressed(); }
#else
void doInsertRangeFrom(const IColumn &, size_t, size_t) override { throwMustBeDecompressed(); }
#endif
void insertData(const char *, size_t) override { throwMustBeDecompressed(); }
void insertDefault() override { throwMustBeDecompressed(); }
void popBack(size_t) override { throwMustBeDecompressed(); }
@ -100,7 +104,11 @@ public:
void expand(const Filter &, bool) override { throwMustBeDecompressed(); }
ColumnPtr permute(const Permutation &, size_t) const override { throwMustBeDecompressed(); }
ColumnPtr index(const IColumn &, size_t) const override { throwMustBeDecompressed(); }
#if !defined(ABORT_ON_LOGICAL_ERROR)
int compareAt(size_t, size_t, const IColumn &, int) const override { throwMustBeDecompressed(); }
#else
int doCompareAt(size_t, size_t, const IColumn &, int) const override { throwMustBeDecompressed(); }
#endif
void compareColumn(const IColumn &, size_t, PaddedPODArray<UInt64> *, PaddedPODArray<Int8> &, int, int) const override
{
throwMustBeDecompressed();

View File

@ -32,6 +32,8 @@ private:
ColumnConst(const ColumnConst & src) = default;
public:
bool isConst() const override { return true; }
ColumnPtr convertToFullColumn() const;
ColumnPtr convertToFullColumnIfConst() const override
@ -121,7 +123,11 @@ public:
return data->isNullAt(0);
}
#if !defined(ABORT_ON_LOGICAL_ERROR)
void insertRangeFrom(const IColumn &, size_t /*start*/, size_t length) override
#else
void doInsertRangeFrom(const IColumn &, size_t /*start*/, size_t length) override
#endif
{
s += length;
}
@ -145,12 +151,20 @@ public:
++s;
}
#if !defined(ABORT_ON_LOGICAL_ERROR)
void insertFrom(const IColumn &, size_t) override
#else
void doInsertFrom(const IColumn &, size_t) override
#endif
{
++s;
}
#if !defined(ABORT_ON_LOGICAL_ERROR)
void insertManyFrom(const IColumn & /*src*/, size_t /* position */, size_t length) override { s += length; }
#else
void doInsertManyFrom(const IColumn & /*src*/, size_t /* position */, size_t length) override { s += length; }
#endif
void insertDefault() override
{
@ -223,7 +237,11 @@ public:
return data->allocatedBytes() + sizeof(s);
}
#if !defined(ABORT_ON_LOGICAL_ERROR)
int compareAt(size_t, size_t, const IColumn & rhs, int nan_direction_hint) const override
#else
int doCompareAt(size_t, size_t, const IColumn & rhs, int nan_direction_hint) const override
#endif
{
return data->compareAt(0, 0, *assert_cast<const ColumnConst &>(rhs).data, nan_direction_hint);
}

View File

@ -32,7 +32,11 @@ namespace ErrorCodes
}
template <is_decimal T>
#if !defined(ABORT_ON_LOGICAL_ERROR)
int ColumnDecimal<T>::compareAt(size_t n, size_t m, const IColumn & rhs_, int) const
#else
int ColumnDecimal<T>::doCompareAt(size_t n, size_t m, const IColumn & rhs_, int) const
#endif
{
auto & other = static_cast<const Self &>(rhs_);
const T & a = data[n];
@ -331,7 +335,11 @@ void ColumnDecimal<T>::insertData(const char * src, size_t /*length*/)
}
template <is_decimal T>
#if !defined(ABORT_ON_LOGICAL_ERROR)
void ColumnDecimal<T>::insertRangeFrom(const IColumn & src, size_t start, size_t length)
#else
void ColumnDecimal<T>::doInsertRangeFrom(const IColumn & src, size_t start, size_t length)
#endif
{
const ColumnDecimal & src_vec = assert_cast<const ColumnDecimal &>(src);

View File

@ -55,9 +55,17 @@ public:
void reserve(size_t n) override { data.reserve_exact(n); }
void shrinkToFit() override { data.shrink_to_fit(); }
#if !defined(ABORT_ON_LOGICAL_ERROR)
void insertFrom(const IColumn & src, size_t n) override { data.push_back(static_cast<const Self &>(src).getData()[n]); }
#else
void doInsertFrom(const IColumn & src, size_t n) override { data.push_back(static_cast<const Self &>(src).getData()[n]); }
#endif
#if !defined(ABORT_ON_LOGICAL_ERROR)
void insertManyFrom(const IColumn & src, size_t position, size_t length) override
#else
void doInsertManyFrom(const IColumn & src, size_t position, size_t length) override
#endif
{
ValueType v = assert_cast<const Self &>(src).getData()[position];
data.resize_fill(data.size() + length, v);
@ -68,7 +76,11 @@ public:
void insertManyDefaults(size_t length) override { data.resize_fill(data.size() + length); }
void insert(const Field & x) override { data.push_back(x.get<T>()); }
bool tryInsert(const Field & x) override;
#if !defined(ABORT_ON_LOGICAL_ERROR)
void insertRangeFrom(const IColumn & src, size_t start, size_t length) override;
#else
void doInsertRangeFrom(const IColumn & src, size_t start, size_t length) override;
#endif
void popBack(size_t n) override
{
@ -92,7 +104,11 @@ public:
void updateHashWithValue(size_t n, SipHash & hash) const override;
void updateWeakHash32(WeakHash32 & hash) const override;
void updateHashFast(SipHash & hash) const override;
#if !defined(ABORT_ON_LOGICAL_ERROR)
int compareAt(size_t n, size_t m, const IColumn & rhs_, int nan_direction_hint) const override;
#else
int doCompareAt(size_t n, size_t m, const IColumn & rhs_, int nan_direction_hint) const override;
#endif
void getPermutation(IColumn::PermutationSortDirection direction, IColumn::PermutationSortStability stability,
size_t limit, int nan_direction_hint, IColumn::Permutation & res) const override;
void updatePermutation(IColumn::PermutationSortDirection direction, IColumn::PermutationSortStability stability,

View File

@ -215,7 +215,11 @@ bool ColumnDynamic::tryInsert(const DB::Field & x)
}
#if !defined(ABORT_ON_LOGICAL_ERROR)
void ColumnDynamic::insertFrom(const DB::IColumn & src_, size_t n)
#else
void ColumnDynamic::doInsertFrom(const DB::IColumn & src_, size_t n)
#endif
{
const auto & dynamic_src = assert_cast<const ColumnDynamic &>(src_);
@ -265,7 +269,11 @@ void ColumnDynamic::insertFrom(const DB::IColumn & src_, size_t n)
variant_col.insertIntoVariantFrom(string_variant_discr, *tmp_string_column, 0);
}
#if !defined(ABORT_ON_LOGICAL_ERROR)
void ColumnDynamic::insertRangeFrom(const DB::IColumn & src_, size_t start, size_t length)
#else
void ColumnDynamic::doInsertRangeFrom(const DB::IColumn & src_, size_t start, size_t length)
#endif
{
if (start + length > src_.size())
throw Exception(ErrorCodes::PARAMETER_OUT_OF_BOUND, "Parameter out of bound in ColumnDynamic::insertRangeFrom method. "
@ -431,7 +439,11 @@ void ColumnDynamic::insertRangeFrom(const DB::IColumn & src_, size_t start, size
}
}
#if !defined(ABORT_ON_LOGICAL_ERROR)
void ColumnDynamic::insertManyFrom(const DB::IColumn & src_, size_t position, size_t length)
#else
void ColumnDynamic::doInsertManyFrom(const DB::IColumn & src_, size_t position, size_t length)
#endif
{
const auto & dynamic_src = assert_cast<const ColumnDynamic &>(src_);
@ -591,7 +603,11 @@ void ColumnDynamic::updateHashWithValue(size_t n, SipHash & hash) const
variant_col.getVariantByGlobalDiscriminator(discr).updateHashWithValue(variant_col.offsetAt(n), hash);
}
#if !defined(ABORT_ON_LOGICAL_ERROR)
int ColumnDynamic::compareAt(size_t n, size_t m, const DB::IColumn & rhs, int nan_direction_hint) const
#else
int ColumnDynamic::doCompareAt(size_t n, size_t m, const DB::IColumn & rhs, int nan_direction_hint) const
#endif
{
const auto & left_variant = assert_cast<const ColumnVariant &>(*variant_column);
const auto & right_dynamic = assert_cast<const ColumnDynamic &>(rhs);

View File

@ -142,9 +142,16 @@ public:
void insert(const Field & x) override;
bool tryInsert(const Field & x) override;
#if !defined(ABORT_ON_LOGICAL_ERROR)
void insertFrom(const IColumn & src_, size_t n) override;
void insertRangeFrom(const IColumn & src, size_t start, size_t length) override;
void insertManyFrom(const IColumn & src, size_t position, size_t length) override;
#else
void doInsertFrom(const IColumn & src_, size_t n) override;
void doInsertRangeFrom(const IColumn & src, size_t start, size_t length) override;
void doInsertManyFrom(const IColumn & src, size_t position, size_t length) override;
#endif
void insertDefault() override
{
@ -213,7 +220,11 @@ public:
return scattered_columns;
}
#if !defined(ABORT_ON_LOGICAL_ERROR)
int compareAt(size_t n, size_t m, const IColumn & rhs, int nan_direction_hint) const override;
#else
int doCompareAt(size_t n, size_t m, const IColumn & rhs, int nan_direction_hint) const override;
#endif
bool hasEqualValues() const override
{

View File

@ -74,7 +74,11 @@ bool ColumnFixedString::tryInsert(const Field & x)
return true;
}
#if !defined(ABORT_ON_LOGICAL_ERROR)
void ColumnFixedString::insertFrom(const IColumn & src_, size_t index)
#else
void ColumnFixedString::doInsertFrom(const IColumn & src_, size_t index)
#endif
{
const ColumnFixedString & src = assert_cast<const ColumnFixedString &>(src_);
@ -86,7 +90,11 @@ void ColumnFixedString::insertFrom(const IColumn & src_, size_t index)
memcpySmallAllowReadWriteOverflow15(chars.data() + old_size, &src.chars[n * index], n);
}
#if !defined(ABORT_ON_LOGICAL_ERROR)
void ColumnFixedString::insertManyFrom(const IColumn & src, size_t position, size_t length)
#else
void ColumnFixedString::doInsertManyFrom(const IColumn & src, size_t position, size_t length)
#endif
{
const ColumnFixedString & src_concrete = assert_cast<const ColumnFixedString &>(src);
if (n != src_concrete.getN())
@ -219,7 +227,11 @@ size_t ColumnFixedString::estimateCardinalityInPermutedRange(const Permutation &
return elements.size();
}
#if !defined(ABORT_ON_LOGICAL_ERROR)
void ColumnFixedString::insertRangeFrom(const IColumn & src, size_t start, size_t length)
#else
void ColumnFixedString::doInsertRangeFrom(const IColumn & src, size_t start, size_t length)
#endif
{
const ColumnFixedString & src_concrete = assert_cast<const ColumnFixedString &>(src);
chassert(this->n == src_concrete.n);

View File

@ -98,9 +98,17 @@ public:
bool tryInsert(const Field & x) override;
#if !defined(ABORT_ON_LOGICAL_ERROR)
void insertFrom(const IColumn & src_, size_t index) override;
#else
void doInsertFrom(const IColumn & src_, size_t index) override;
#endif
#if !defined(ABORT_ON_LOGICAL_ERROR)
void insertManyFrom(const IColumn & src, size_t position, size_t length) override;
#else
void doInsertManyFrom(const IColumn & src, size_t position, size_t length) override;
#endif
void insertData(const char * pos, size_t length) override;
@ -129,7 +137,11 @@ public:
void updateHashFast(SipHash & hash) const override;
#if !defined(ABORT_ON_LOGICAL_ERROR)
int compareAt(size_t p1, size_t p2, const IColumn & rhs_, int /*nan_direction_hint*/) const override
#else
int doCompareAt(size_t p1, size_t p2, const IColumn & rhs_, int /*nan_direction_hint*/) const override
#endif
{
const ColumnFixedString & rhs = assert_cast<const ColumnFixedString &>(rhs_);
chassert(this->n == rhs.n);
@ -144,7 +156,11 @@ public:
size_t estimateCardinalityInPermutedRange(const Permutation & permutation, const EqualRange & equal_range) const override;
#if !defined(ABORT_ON_LOGICAL_ERROR)
void insertRangeFrom(const IColumn & src, size_t start, size_t length) override;
#else
void doInsertRangeFrom(const IColumn & src, size_t start, size_t length) override;
#endif
ColumnPtr filter(const IColumn::Filter & filt, ssize_t result_size_hint) const override;

View File

@ -72,7 +72,11 @@ ColumnPtr ColumnFunction::cut(size_t start, size_t length) const
return ColumnFunction::create(length, function, capture, is_short_circuit_argument, is_function_compiled);
}
#if !defined(ABORT_ON_LOGICAL_ERROR)
void ColumnFunction::insertFrom(const IColumn & src, size_t n)
#else
void ColumnFunction::doInsertFrom(const IColumn & src, size_t n)
#endif
{
const ColumnFunction & src_func = assert_cast<const ColumnFunction &>(src);
@ -89,7 +93,11 @@ void ColumnFunction::insertFrom(const IColumn & src, size_t n)
++elements_size;
}
#if !defined(ABORT_ON_LOGICAL_ERROR)
void ColumnFunction::insertRangeFrom(const IColumn & src, size_t start, size_t length)
#else
void ColumnFunction::doInsertRangeFrom(const IColumn & src, size_t start, size_t length)
#endif
{
const ColumnFunction & src_func = assert_cast<const ColumnFunction &>(src);

View File

@ -94,8 +94,16 @@ public:
throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Cannot insert into {}", getName());
}
#if !defined(ABORT_ON_LOGICAL_ERROR)
void insertFrom(const IColumn & src, size_t n) override;
#else
void doInsertFrom(const IColumn & src, size_t n) override;
#endif
#if !defined(ABORT_ON_LOGICAL_ERROR)
void insertRangeFrom(const IColumn &, size_t start, size_t length) override;
#else
void doInsertRangeFrom(const IColumn &, size_t start, size_t length) override;
#endif
void insertData(const char *, size_t) override
{
@ -137,7 +145,11 @@ public:
throw Exception(ErrorCodes::NOT_IMPLEMENTED, "popBack is not implemented for {}", getName());
}
#if !defined(ABORT_ON_LOGICAL_ERROR)
int compareAt(size_t, size_t, const IColumn &, int) const override
#else
int doCompareAt(size_t, size_t, const IColumn &, int) const override
#endif
{
throw Exception(ErrorCodes::NOT_IMPLEMENTED, "compareAt is not implemented for {}", getName());
}

View File

@ -159,7 +159,11 @@ void ColumnLowCardinality::insertDefault()
idx.insertPosition(getDictionary().getDefaultValueIndex());
}
#if !defined(ABORT_ON_LOGICAL_ERROR)
void ColumnLowCardinality::insertFrom(const IColumn & src, size_t n)
#else
void ColumnLowCardinality::doInsertFrom(const IColumn & src, size_t n)
#endif
{
const auto * low_cardinality_src = typeid_cast<const ColumnLowCardinality *>(&src);
@ -187,7 +191,11 @@ void ColumnLowCardinality::insertFromFullColumn(const IColumn & src, size_t n)
idx.insertPosition(getDictionary().uniqueInsertFrom(src, n));
}
#if !defined(ABORT_ON_LOGICAL_ERROR)
void ColumnLowCardinality::insertRangeFrom(const IColumn & src, size_t start, size_t length)
#else
void ColumnLowCardinality::doInsertRangeFrom(const IColumn & src, size_t start, size_t length)
#endif
{
const auto * low_cardinality_src = typeid_cast<const ColumnLowCardinality *>(&src);
@ -364,7 +372,11 @@ int ColumnLowCardinality::compareAtImpl(size_t n, size_t m, const IColumn & rhs,
return getDictionary().compareAt(n_index, m_index, low_cardinality_column.getDictionary(), nan_direction_hint);
}
#if !defined(ABORT_ON_LOGICAL_ERROR)
int ColumnLowCardinality::compareAt(size_t n, size_t m, const IColumn & rhs, int nan_direction_hint) const
#else
int ColumnLowCardinality::doCompareAt(size_t n, size_t m, const IColumn & rhs, int nan_direction_hint) const
#endif
{
return compareAtImpl(n, m, rhs, nan_direction_hint);
}

View File

@ -78,10 +78,18 @@ public:
bool tryInsert(const Field & x) override;
void insertDefault() override;
#if !defined(ABORT_ON_LOGICAL_ERROR)
void insertFrom(const IColumn & src, size_t n) override;
#else
void doInsertFrom(const IColumn & src, size_t n) override;
#endif
void insertFromFullColumn(const IColumn & src, size_t n);
#if !defined(ABORT_ON_LOGICAL_ERROR)
void insertRangeFrom(const IColumn & src, size_t start, size_t length) override;
#else
void doInsertRangeFrom(const IColumn & src, size_t start, size_t length) override;
#endif
void insertRangeFromFullColumn(const IColumn & src, size_t start, size_t length);
void insertRangeFromDictionaryEncodedColumn(const IColumn & keys, const IColumn & positions);
@ -127,7 +135,11 @@ public:
return ColumnLowCardinality::create(dictionary.getColumnUniquePtr(), getIndexes().index(indexes_, limit));
}
#if !defined(ABORT_ON_LOGICAL_ERROR)
int compareAt(size_t n, size_t m, const IColumn & rhs, int nan_direction_hint) const override;
#else
int doCompareAt(size_t n, size_t m, const IColumn & rhs, int nan_direction_hint) const override;
#endif
int compareAtWithCollation(size_t n, size_t m, const IColumn & rhs, int nan_direction_hint, const Collator &) const override;

View File

@ -153,17 +153,29 @@ void ColumnMap::updateHashFast(SipHash & hash) const
nested->updateHashFast(hash);
}
#if !defined(ABORT_ON_LOGICAL_ERROR)
void ColumnMap::insertFrom(const IColumn & src, size_t n)
#else
void ColumnMap::doInsertFrom(const IColumn & src, size_t n)
#endif
{
nested->insertFrom(assert_cast<const ColumnMap &>(src).getNestedColumn(), n);
}
#if !defined(ABORT_ON_LOGICAL_ERROR)
void ColumnMap::insertManyFrom(const IColumn & src, size_t position, size_t length)
#else
void ColumnMap::doInsertManyFrom(const IColumn & src, size_t position, size_t length)
#endif
{
assert_cast<ColumnArray &>(*nested).insertManyFrom(assert_cast<const ColumnMap &>(src).getNestedColumn(), position, length);
}
#if !defined(ABORT_ON_LOGICAL_ERROR)
void ColumnMap::insertRangeFrom(const IColumn & src, size_t start, size_t length)
#else
void ColumnMap::doInsertRangeFrom(const IColumn & src, size_t start, size_t length)
#endif
{
nested->insertRangeFrom(
assert_cast<const ColumnMap &>(src).getNestedColumn(),
@ -210,7 +222,11 @@ MutableColumns ColumnMap::scatter(ColumnIndex num_columns, const Selector & sele
return res;
}
#if !defined(ABORT_ON_LOGICAL_ERROR)
int ColumnMap::compareAt(size_t n, size_t m, const IColumn & rhs, int nan_direction_hint) const
#else
int ColumnMap::doCompareAt(size_t n, size_t m, const IColumn & rhs, int nan_direction_hint) const
#endif
{
const auto & rhs_map = assert_cast<const ColumnMap &>(rhs);
return nested->compareAt(n, m, rhs_map.getNestedColumn(), nan_direction_hint);

View File

@ -66,16 +66,28 @@ public:
void updateHashWithValue(size_t n, SipHash & hash) const override;
void updateWeakHash32(WeakHash32 & hash) const override;
void updateHashFast(SipHash & hash) const override;
#if !defined(ABORT_ON_LOGICAL_ERROR)
void insertFrom(const IColumn & src_, size_t n) override;
void insertManyFrom(const IColumn & src, size_t position, size_t length) override;
void insertRangeFrom(const IColumn & src, size_t start, size_t length) override;
#else
void doInsertFrom(const IColumn & src_, size_t n) override;
void doInsertManyFrom(const IColumn & src, size_t position, size_t length) override;
void doInsertRangeFrom(const IColumn & src, size_t start, size_t length) override;
#endif
ColumnPtr filter(const Filter & filt, ssize_t result_size_hint) const override;
void expand(const Filter & mask, bool inverted) override;
ColumnPtr permute(const Permutation & perm, size_t limit) const override;
ColumnPtr index(const IColumn & indexes, size_t limit) const override;
ColumnPtr replicate(const Offsets & offsets) const override;
MutableColumns scatter(ColumnIndex num_columns, const Selector & selector) const override;
#if !defined(ABORT_ON_LOGICAL_ERROR)
int compareAt(size_t n, size_t m, const IColumn & rhs, int nan_direction_hint) const override;
#else
int doCompareAt(size_t n, size_t m, const IColumn & rhs, int nan_direction_hint) const override;
#endif
void getExtremes(Field & min, Field & max) const override;
void getPermutation(IColumn::PermutationSortDirection direction, IColumn::PermutationSortStability stability,
size_t limit, int nan_direction_hint, IColumn::Permutation & res) const override;

View File

@ -221,7 +221,11 @@ const char * ColumnNullable::skipSerializedInArena(const char * pos) const
return pos;
}
#if !defined(ABORT_ON_LOGICAL_ERROR)
void ColumnNullable::insertRangeFrom(const IColumn & src, size_t start, size_t length)
#else
void ColumnNullable::doInsertRangeFrom(const IColumn & src, size_t start, size_t length)
#endif
{
const ColumnNullable & nullable_col = assert_cast<const ColumnNullable &>(src);
getNullMapColumn().insertRangeFrom(*nullable_col.null_map, start, length);
@ -258,7 +262,11 @@ bool ColumnNullable::tryInsert(const Field & x)
return true;
}
#if !defined(ABORT_ON_LOGICAL_ERROR)
void ColumnNullable::insertFrom(const IColumn & src, size_t n)
#else
void ColumnNullable::doInsertFrom(const IColumn & src, size_t n)
#endif
{
const ColumnNullable & src_concrete = assert_cast<const ColumnNullable &>(src);
getNestedColumn().insertFrom(src_concrete.getNestedColumn(), n);
@ -266,7 +274,11 @@ void ColumnNullable::insertFrom(const IColumn & src, size_t n)
}
#if !defined(ABORT_ON_LOGICAL_ERROR)
void ColumnNullable::insertManyFrom(const IColumn & src, size_t position, size_t length)
#else
void ColumnNullable::doInsertManyFrom(const IColumn & src, size_t position, size_t length)
#endif
{
const ColumnNullable & src_concrete = assert_cast<const ColumnNullable &>(src);
getNestedColumn().insertManyFrom(src_concrete.getNestedColumn(), position, length);
@ -402,7 +414,11 @@ int ColumnNullable::compareAtImpl(size_t n, size_t m, const IColumn & rhs_, int
return getNestedColumn().compareAt(n, m, nested_rhs, null_direction_hint);
}
#if !defined(ABORT_ON_LOGICAL_ERROR)
int ColumnNullable::compareAt(size_t n, size_t m, const IColumn & rhs_, int null_direction_hint) const
#else
int ColumnNullable::doCompareAt(size_t n, size_t m, const IColumn & rhs_, int null_direction_hint) const
#endif
{
return compareAtImpl(n, m, rhs_, null_direction_hint);
}

View File

@ -69,11 +69,21 @@ public:
char * serializeValueIntoMemory(size_t n, char * memory) const override;
const char * deserializeAndInsertFromArena(const char * pos) override;
const char * skipSerializedInArena(const char * pos) const override;
#if !defined(ABORT_ON_LOGICAL_ERROR)
void insertRangeFrom(const IColumn & src, size_t start, size_t length) override;
#else
void doInsertRangeFrom(const IColumn & src, size_t start, size_t length) override;
#endif
void insert(const Field & x) override;
bool tryInsert(const Field & x) override;
#if !defined(ABORT_ON_LOGICAL_ERROR)
void insertFrom(const IColumn & src, size_t n) override;
void insertManyFrom(const IColumn & src, size_t position, size_t length) override;
#else
void doInsertFrom(const IColumn & src, size_t n) override;
void doInsertManyFrom(const IColumn & src, size_t position, size_t length) override;
#endif
void insertFromNotNullable(const IColumn & src, size_t n);
void insertRangeFromNotNullable(const IColumn & src, size_t start, size_t length);
@ -90,7 +100,11 @@ public:
void expand(const Filter & mask, bool inverted) override;
ColumnPtr permute(const Permutation & perm, size_t limit) const override;
ColumnPtr index(const IColumn & indexes, size_t limit) const override;
#if !defined(ABORT_ON_LOGICAL_ERROR)
int compareAt(size_t n, size_t m, const IColumn & rhs_, int null_direction_hint) const override;
#else
int doCompareAt(size_t n, size_t m, const IColumn & rhs_, int null_direction_hint) const override;
#endif
#if USE_EMBEDDED_COMPILER

View File

@ -763,12 +763,20 @@ void ColumnObject::get(size_t n, Field & res) const
}
}
#if !defined(ABORT_ON_LOGICAL_ERROR)
void ColumnObject::insertFrom(const IColumn & src, size_t n)
#else
void ColumnObject::doInsertFrom(const IColumn & src, size_t n)
#endif
{
insert(src[n]);
}
#if !defined(ABORT_ON_LOGICAL_ERROR)
void ColumnObject::insertRangeFrom(const IColumn & src, size_t start, size_t length)
#else
void ColumnObject::doInsertRangeFrom(const IColumn & src, size_t start, size_t length)
#endif
{
const auto & src_object = assert_cast<const ColumnObject &>(src);

View File

@ -209,8 +209,15 @@ public:
void insert(const Field & field) override;
bool tryInsert(const Field & field) override;
void insertDefault() override;
#if !defined(ABORT_ON_LOGICAL_ERROR)
void insertFrom(const IColumn & src, size_t n) override;
void insertRangeFrom(const IColumn & src, size_t start, size_t length) override;
#else
void doInsertFrom(const IColumn & src, size_t n) override;
void doInsertRangeFrom(const IColumn & src, size_t start, size_t length) override;
#endif
void popBack(size_t length) override;
Field operator[](size_t n) const override;
void get(size_t n, Field & res) const override;
@ -228,7 +235,11 @@ public:
/// Order of rows in ColumnObject is undefined.
void getPermutation(PermutationSortDirection, PermutationSortStability, size_t, int, Permutation & res) const override;
void updatePermutation(PermutationSortDirection, PermutationSortStability, size_t, int, Permutation &, EqualRanges &) const override {}
#if !defined(ABORT_ON_LOGICAL_ERROR)
int compareAt(size_t, size_t, const IColumn &, int) const override { return 0; }
#else
int doCompareAt(size_t, size_t, const IColumn &, int) const override { return 0; }
#endif
void getExtremes(Field & min, Field & max) const override;
/// All other methods throw exception.

View File

@ -174,7 +174,11 @@ const char * ColumnSparse::skipSerializedInArena(const char * pos) const
return values->skipSerializedInArena(pos);
}
#if !defined(ABORT_ON_LOGICAL_ERROR)
void ColumnSparse::insertRangeFrom(const IColumn & src, size_t start, size_t length)
#else
void ColumnSparse::doInsertRangeFrom(const IColumn & src, size_t start, size_t length)
#endif
{
if (length == 0)
return;
@ -248,7 +252,11 @@ bool ColumnSparse::tryInsert(const Field & x)
return true;
}
#if !defined(ABORT_ON_LOGICAL_ERROR)
void ColumnSparse::insertFrom(const IColumn & src, size_t n)
#else
void ColumnSparse::doInsertFrom(const IColumn & src, size_t n)
#endif
{
if (const auto * src_sparse = typeid_cast<const ColumnSparse *>(&src))
{
@ -446,7 +454,11 @@ ColumnPtr ColumnSparse::indexImpl(const PaddedPODArray<Type> & indexes, size_t l
return ColumnSparse::create(std::move(res_values), std::move(res_offsets), limit);
}
#if !defined(ABORT_ON_LOGICAL_ERROR)
int ColumnSparse::compareAt(size_t n, size_t m, const IColumn & rhs_, int null_direction_hint) const
#else
int ColumnSparse::doCompareAt(size_t n, size_t m, const IColumn & rhs_, int null_direction_hint) const
#endif
{
if (const auto * rhs_sparse = typeid_cast<const ColumnSparse *>(&rhs_))
return values->compareAt(getValueIndex(n), rhs_sparse->getValueIndex(m), rhs_sparse->getValuesColumn(), null_direction_hint);

View File

@ -81,10 +81,18 @@ public:
char * serializeValueIntoMemory(size_t n, char * memory) const override;
const char * deserializeAndInsertFromArena(const char * pos) override;
const char * skipSerializedInArena(const char *) const override;
#if !defined(ABORT_ON_LOGICAL_ERROR)
void insertRangeFrom(const IColumn & src, size_t start, size_t length) override;
#else
void doInsertRangeFrom(const IColumn & src, size_t start, size_t length) override;
#endif
void insert(const Field & x) override;
bool tryInsert(const Field & x) override;
#if !defined(ABORT_ON_LOGICAL_ERROR)
void insertFrom(const IColumn & src, size_t n) override;
#else
void doInsertFrom(const IColumn & src, size_t n) override;
#endif
void insertDefault() override;
void insertManyDefaults(size_t length) override;
@ -98,7 +106,11 @@ public:
template <typename Type>
ColumnPtr indexImpl(const PaddedPODArray<Type> & indexes, size_t limit) const;
#if !defined(ABORT_ON_LOGICAL_ERROR)
int compareAt(size_t n, size_t m, const IColumn & rhs_, int null_direction_hint) const override;
#else
int doCompareAt(size_t n, size_t m, const IColumn & rhs_, int null_direction_hint) const override;
#endif
void compareColumn(const IColumn & rhs, size_t rhs_row_num,
PaddedPODArray<UInt64> * row_indexes, PaddedPODArray<Int8> & compare_results,
int direction, int nan_direction_hint) const override;

View File

@ -39,7 +39,11 @@ ColumnString::ColumnString(const ColumnString & src)
last_offset, chars.size());
}
#if !defined(ABORT_ON_LOGICAL_ERROR)
void ColumnString::insertManyFrom(const IColumn & src, size_t position, size_t length)
#else
void ColumnString::doInsertManyFrom(const IColumn & src, size_t position, size_t length)
#endif
{
const ColumnString & src_concrete = assert_cast<const ColumnString &>(src);
const UInt8 * src_buf = &src_concrete.chars[src_concrete.offsets[position - 1]];
@ -129,7 +133,11 @@ void ColumnString::updateWeakHash32(WeakHash32 & hash) const
}
#if !defined(ABORT_ON_LOGICAL_ERROR)
void ColumnString::insertRangeFrom(const IColumn & src, size_t start, size_t length)
#else
void ColumnString::doInsertRangeFrom(const IColumn & src, size_t start, size_t length)
#endif
{
if (length == 0)
return;

View File

@ -142,7 +142,11 @@ public:
return true;
}
#if !defined(ABORT_ON_LOGICAL_ERROR)
void insertFrom(const IColumn & src_, size_t n) override
#else
void doInsertFrom(const IColumn & src_, size_t n) override
#endif
{
const ColumnString & src = assert_cast<const ColumnString &>(src_);
const size_t size_to_append = src.offsets[n] - src.offsets[n - 1]; /// -1th index is Ok, see PaddedPODArray.
@ -165,7 +169,11 @@ public:
}
}
#if !defined(ABORT_ON_LOGICAL_ERROR)
void insertManyFrom(const IColumn & src, size_t position, size_t length) override;
#else
void doInsertManyFrom(const IColumn & src, size_t position, size_t length) override;
#endif
void insertData(const char * pos, size_t length) override
{
@ -212,7 +220,11 @@ public:
hash.update(reinterpret_cast<const char *>(chars.data()), chars.size() * sizeof(chars[0]));
}
#if !defined(ABORT_ON_LOGICAL_ERROR)
void insertRangeFrom(const IColumn & src, size_t start, size_t length) override;
#else
void doInsertRangeFrom(const IColumn & src, size_t start, size_t length) override;
#endif
ColumnPtr filter(const Filter & filt, ssize_t result_size_hint) const override;
@ -238,7 +250,11 @@ public:
offsets.push_back(offsets.back() + 1);
}
#if !defined(ABORT_ON_LOGICAL_ERROR)
int compareAt(size_t n, size_t m, const IColumn & rhs_, int /*nan_direction_hint*/) const override
#else
int doCompareAt(size_t n, size_t m, const IColumn & rhs_, int /*nan_direction_hint*/) const override
#endif
{
const ColumnString & rhs = assert_cast<const ColumnString &>(rhs_);
return memcmpSmallAllowOverflow15(chars.data() + offsetAt(n), sizeAt(n) - 1, rhs.chars.data() + rhs.offsetAt(m), rhs.sizeAt(m) - 1);

View File

@ -205,7 +205,11 @@ bool ColumnTuple::tryInsert(const Field & x)
return true;
}
#if !defined(ABORT_ON_LOGICAL_ERROR)
void ColumnTuple::insertFrom(const IColumn & src_, size_t n)
#else
void ColumnTuple::doInsertFrom(const IColumn & src_, size_t n)
#endif
{
const ColumnTuple & src = assert_cast<const ColumnTuple &>(src_);
@ -218,7 +222,11 @@ void ColumnTuple::insertFrom(const IColumn & src_, size_t n)
columns[i]->insertFrom(*src.columns[i], n);
}
#if !defined(ABORT_ON_LOGICAL_ERROR)
void ColumnTuple::insertManyFrom(const IColumn & src, size_t position, size_t length)
#else
void ColumnTuple::doInsertManyFrom(const IColumn & src, size_t position, size_t length)
#endif
{
const ColumnTuple & src_tuple = assert_cast<const ColumnTuple &>(src);
@ -318,7 +326,11 @@ void ColumnTuple::updateHashFast(SipHash & hash) const
column->updateHashFast(hash);
}
#if !defined(ABORT_ON_LOGICAL_ERROR)
void ColumnTuple::insertRangeFrom(const IColumn & src, size_t start, size_t length)
#else
void ColumnTuple::doInsertRangeFrom(const IColumn & src, size_t start, size_t length)
#endif
{
column_length += length;
const size_t tuple_size = columns.size();
@ -470,7 +482,11 @@ int ColumnTuple::compareAtImpl(size_t n, size_t m, const IColumn & rhs, int nan_
return 0;
}
#if !defined(ABORT_ON_LOGICAL_ERROR)
int ColumnTuple::compareAt(size_t n, size_t m, const IColumn & rhs, int nan_direction_hint) const
#else
int ColumnTuple::doCompareAt(size_t n, size_t m, const IColumn & rhs, int nan_direction_hint) const
#endif
{
return compareAtImpl(n, m, rhs, nan_direction_hint);
}

View File

@ -65,8 +65,15 @@ public:
void insertData(const char * pos, size_t length) override;
void insert(const Field & x) override;
bool tryInsert(const Field & x) override;
#if !defined(ABORT_ON_LOGICAL_ERROR)
void insertFrom(const IColumn & src_, size_t n) override;
void insertManyFrom(const IColumn & src, size_t position, size_t length) override;
#else
void doInsertFrom(const IColumn & src_, size_t n) override;
void doInsertManyFrom(const IColumn & src, size_t position, size_t length) override;
#endif
void insertDefault() override;
void popBack(size_t n) override;
StringRef serializeValueIntoArena(size_t n, Arena & arena, char const *& begin) const override;
@ -76,14 +83,22 @@ public:
void updateHashWithValue(size_t n, SipHash & hash) const override;
void updateWeakHash32(WeakHash32 & hash) const override;
void updateHashFast(SipHash & hash) const override;
#if !defined(ABORT_ON_LOGICAL_ERROR)
void insertRangeFrom(const IColumn & src, size_t start, size_t length) override;
#else
void doInsertRangeFrom(const IColumn & src, size_t start, size_t length) override;
#endif
ColumnPtr filter(const Filter & filt, ssize_t result_size_hint) const override;
void expand(const Filter & mask, bool inverted) override;
ColumnPtr permute(const Permutation & perm, size_t limit) const override;
ColumnPtr index(const IColumn & indexes, size_t limit) const override;
ColumnPtr replicate(const Offsets & offsets) const override;
MutableColumns scatter(ColumnIndex num_columns, const Selector & selector) const override;
#if !defined(ABORT_ON_LOGICAL_ERROR)
int compareAt(size_t n, size_t m, const IColumn & rhs, int nan_direction_hint) const override;
#else
int doCompareAt(size_t n, size_t m, const IColumn & rhs, int nan_direction_hint) const override;
#endif
int compareAtWithCollation(size_t n, size_t m, const IColumn & rhs, int nan_direction_hint, const Collator & collator) const override;
void getExtremes(Field & min, Field & max) const override;
void getPermutation(IColumn::PermutationSortDirection direction, IColumn::PermutationSortStability stability,

View File

@ -90,7 +90,11 @@ public:
return getNestedColumn()->updateHashWithValue(n, hash_func);
}
#if !defined(ABORT_ON_LOGICAL_ERROR)
int compareAt(size_t n, size_t m, const IColumn & rhs, int nan_direction_hint) const override;
#else
int doCompareAt(size_t n, size_t m, const IColumn & rhs, int nan_direction_hint) const override;
#endif
void getExtremes(Field & min, Field & max) const override { column_holder->getExtremes(min, max); }
bool valuesHaveFixedSize() const override { return column_holder->valuesHaveFixedSize(); }
@ -488,7 +492,11 @@ const char * ColumnUnique<ColumnType>::skipSerializedInArena(const char *) const
}
template <typename ColumnType>
#if !defined(ABORT_ON_LOGICAL_ERROR)
int ColumnUnique<ColumnType>::compareAt(size_t n, size_t m, const IColumn & rhs, int nan_direction_hint) const
#else
int ColumnUnique<ColumnType>::doCompareAt(size_t n, size_t m, const IColumn & rhs, int nan_direction_hint) const
#endif
{
if (is_nullable)
{

View File

@ -595,17 +595,29 @@ void ColumnVariant::insertManyFromImpl(const DB::IColumn & src_, size_t position
}
}
#if !defined(ABORT_ON_LOGICAL_ERROR)
void ColumnVariant::insertFrom(const IColumn & src_, size_t n)
#else
void ColumnVariant::doInsertFrom(const IColumn & src_, size_t n)
#endif
{
insertFromImpl(src_, n, nullptr);
}
#if !defined(ABORT_ON_LOGICAL_ERROR)
void ColumnVariant::insertRangeFrom(const IColumn & src_, size_t start, size_t length)
#else
void ColumnVariant::doInsertRangeFrom(const IColumn & src_, size_t start, size_t length)
#endif
{
insertRangeFromImpl(src_, start, length, nullptr);
}
#if !defined(ABORT_ON_LOGICAL_ERROR)
void ColumnVariant::insertManyFrom(const DB::IColumn & src_, size_t position, size_t length)
#else
void ColumnVariant::doInsertManyFrom(const DB::IColumn & src_, size_t position, size_t length)
#endif
{
insertManyFromImpl(src_, position, length, nullptr);
}
@ -1174,7 +1186,11 @@ bool ColumnVariant::hasEqualValues() const
return local_discriminators->hasEqualValues() && variants[localDiscriminatorAt(0)]->hasEqualValues();
}
#if !defined(ABORT_ON_LOGICAL_ERROR)
int ColumnVariant::compareAt(size_t n, size_t m, const IColumn & rhs, int nan_direction_hint) const
#else
int ColumnVariant::doCompareAt(size_t n, size_t m, const IColumn & rhs, int nan_direction_hint) const
#endif
{
const auto & rhs_variant = assert_cast<const ColumnVariant &>(rhs);
Discriminator left_discr = globalDiscriminatorAt(n);

View File

@ -180,9 +180,19 @@ public:
void insert(const Field & x) override;
bool tryInsert(const Field & x) override;
#if !defined(ABORT_ON_LOGICAL_ERROR)
void insertFrom(const IColumn & src_, size_t n) override;
void insertRangeFrom(const IColumn & src_, size_t start, size_t length) override;
void insertManyFrom(const IColumn & src_, size_t position, size_t length) override;
#else
using IColumn::insertFrom;
using IColumn::insertManyFrom;
using IColumn::insertRangeFrom;
void doInsertFrom(const IColumn & src_, size_t n) override;
void doInsertRangeFrom(const IColumn & src_, size_t start, size_t length) override;
void doInsertManyFrom(const IColumn & src_, size_t position, size_t length) override;
#endif
/// Methods for insertion from another Variant but with known mapping between global discriminators.
void insertFrom(const IColumn & src_, size_t n, const std::vector<ColumnVariant::Discriminator> & global_discriminators_mapping);
@ -213,7 +223,11 @@ public:
ColumnPtr indexImpl(const PaddedPODArray<Type> & indexes, size_t limit) const;
ColumnPtr replicate(const Offsets & replicate_offsets) const override;
MutableColumns scatter(ColumnIndex num_columns, const Selector & selector) const override;
#if !defined(ABORT_ON_LOGICAL_ERROR)
int compareAt(size_t n, size_t m, const IColumn & rhs, int nan_direction_hint) const override;
#else
int doCompareAt(size_t n, size_t m, const IColumn & rhs, int nan_direction_hint) const override;
#endif
bool hasEqualValues() const override;
void getExtremes(Field & min, Field & max) const override;
void getPermutation(IColumn::PermutationSortDirection direction, IColumn::PermutationSortStability stability,

View File

@ -503,7 +503,11 @@ bool ColumnVector<T>::tryInsert(const DB::Field & x)
}
template <typename T>
#if !defined(ABORT_ON_LOGICAL_ERROR)
void ColumnVector<T>::insertRangeFrom(const IColumn & src, size_t start, size_t length)
#else
void ColumnVector<T>::doInsertRangeFrom(const IColumn & src, size_t start, size_t length)
#endif
{
const ColumnVector & src_vec = assert_cast<const ColumnVector &>(src);

View File

@ -64,12 +64,20 @@ public:
return data.size();
}
#if !defined(ABORT_ON_LOGICAL_ERROR)
void insertFrom(const IColumn & src, size_t n) override
#else
void doInsertFrom(const IColumn & src, size_t n) override
#endif
{
data.push_back(assert_cast<const Self &>(src).getData()[n]);
}
#if !defined(ABORT_ON_LOGICAL_ERROR)
void insertManyFrom(const IColumn & src, size_t position, size_t length) override
#else
void doInsertManyFrom(const IColumn & src, size_t position, size_t length) override
#endif
{
ValueType v = assert_cast<const Self &>(src).getData()[position];
data.resize_fill(data.size() + length, v);
@ -142,7 +150,11 @@ public:
}
/// This method implemented in header because it could be possibly devirtualized.
#if !defined(ABORT_ON_LOGICAL_ERROR)
int compareAt(size_t n, size_t m, const IColumn & rhs_, int nan_direction_hint) const override
#else
int doCompareAt(size_t n, size_t m, const IColumn & rhs_, int nan_direction_hint) const override
#endif
{
return CompareHelper<T>::compare(data[n], assert_cast<const Self &>(rhs_).data[m], nan_direction_hint);
}
@ -228,7 +240,11 @@ public:
bool tryInsert(const DB::Field & x) override;
#if !defined(ABORT_ON_LOGICAL_ERROR)
void insertRangeFrom(const IColumn & src, size_t start, size_t length) override;
#else
void doInsertRangeFrom(const IColumn & src, size_t start, size_t length) override;
#endif
ColumnPtr filter(const IColumn::Filter & filt, ssize_t result_size_hint) const override;

View File

@ -46,7 +46,11 @@ String IColumn::dumpStructure() const
return res.str();
}
#if !defined(ABORT_ON_LOGICAL_ERROR)
void IColumn::insertFrom(const IColumn & src, size_t n)
#else
void IColumn::doInsertFrom(const IColumn & src, size_t n)
#endif
{
insert(src[n]);
}

View File

@ -1,15 +1,14 @@
#pragma once
#include <Common/COW.h>
#include <Common/PODArray_fwd.h>
#include <Common/Exception.h>
#include <Common/typeid_cast.h>
#include <base/StringRef.h>
#include <Core/TypeId.h>
#include <base/StringRef.h>
#include <Common/COW.h>
#include <Common/Exception.h>
#include <Common/PODArray_fwd.h>
#include <Common/typeid_cast.h>
#include "config.h"
class SipHash;
class Collator;
@ -180,18 +179,42 @@ public:
/// Appends n-th element from other column with the same type.
/// Is used in merge-sort and merges. It could be implemented in inherited classes more optimally than default implementation.
#if !defined(ABORT_ON_LOGICAL_ERROR)
virtual void insertFrom(const IColumn & src, size_t n);
#else
void insertFrom(const IColumn & src, size_t n)
{
assertTypeEquality(src);
doInsertFrom(src, n);
}
#endif
/// Appends range of elements from other column with the same type.
/// Could be used to concatenate columns.
#if !defined(ABORT_ON_LOGICAL_ERROR)
virtual void insertRangeFrom(const IColumn & src, size_t start, size_t length) = 0;
#else
void insertRangeFrom(const IColumn & src, size_t start, size_t length)
{
assertTypeEquality(src);
doInsertRangeFrom(src, start, length);
}
#endif
/// Appends one element from other column with the same type multiple times.
#if !defined(ABORT_ON_LOGICAL_ERROR)
virtual void insertManyFrom(const IColumn & src, size_t position, size_t length)
{
for (size_t i = 0; i < length; ++i)
insertFrom(src, position);
}
#else
void insertManyFrom(const IColumn & src, size_t position, size_t length)
{
assertTypeEquality(src);
doInsertManyFrom(src, position, length);
}
#endif
/// Appends one field multiple times. Can be optimized in inherited classes.
virtual void insertMany(const Field & field, size_t length)
@ -322,7 +345,15 @@ public:
*
* For non Nullable and non floating point types, nan_direction_hint is ignored.
*/
#if !defined(ABORT_ON_LOGICAL_ERROR)
[[nodiscard]] virtual int compareAt(size_t n, size_t m, const IColumn & rhs, int nan_direction_hint) const = 0;
#else
[[nodiscard]] int compareAt(size_t n, size_t m, const IColumn & rhs, int nan_direction_hint) const
{
assertTypeEquality(rhs);
return doCompareAt(n, m, rhs, nan_direction_hint);
}
#endif
#if USE_EMBEDDED_COMPILER
@ -610,6 +641,8 @@ public:
[[nodiscard]] virtual bool isSparse() const { return false; }
[[nodiscard]] virtual bool isConst() const { return false; }
[[nodiscard]] virtual bool isCollationSupported() const { return false; }
virtual ~IColumn() = default;
@ -633,6 +666,29 @@ protected:
Equals equals,
Sort full_sort,
PartialSort partial_sort) const;
#if defined(ABORT_ON_LOGICAL_ERROR)
virtual void doInsertFrom(const IColumn & src, size_t n);
virtual void doInsertRangeFrom(const IColumn & src, size_t start, size_t length) = 0;
virtual void doInsertManyFrom(const IColumn & src, size_t position, size_t length)
{
for (size_t i = 0; i < length; ++i)
insertFrom(src, position);
}
virtual int doCompareAt(size_t n, size_t m, const IColumn & rhs, int nan_direction_hint) const = 0;
private:
void assertTypeEquality(const IColumn & rhs) const
{
/// For Sparse and Const columns, we can compare only internal types. It is considered normal to e.g. insert from normal vector column to a sparse vector column.
/// This case is specifically handled in ColumnSparse implementation. Similar situation with Const column.
/// For the rest of column types we can compare the types directly.
chassert((isConst() || isSparse()) ? getDataType() == rhs.getDataType() : typeid(*this) == typeid(rhs));
}
#endif
};
using ColumnPtr = IColumn::Ptr;

View File

@ -26,7 +26,11 @@ public:
size_t byteSize() const override { return 0; }
size_t byteSizeAt(size_t) const override { return 0; }
size_t allocatedBytes() const override { return 0; }
#if !defined(ABORT_ON_LOGICAL_ERROR)
int compareAt(size_t, size_t, const IColumn &, int) const override { return 0; }
#else
int doCompareAt(size_t, size_t, const IColumn &, int) const override { return 0; }
#endif
void compareColumn(const IColumn &, size_t, PaddedPODArray<UInt64> *, PaddedPODArray<Int8> &, int, int) const override
{
}
@ -67,12 +71,20 @@ public:
{
}
#if !defined(ABORT_ON_LOGICAL_ERROR)
void insertFrom(const IColumn &, size_t) override
#else
void doInsertFrom(const IColumn &, size_t) override
#endif
{
++s;
}
#if !defined(ABORT_ON_LOGICAL_ERROR)
void insertRangeFrom(const IColumn & /*src*/, size_t /*start*/, size_t length) override
#else
void doInsertRangeFrom(const IColumn & /*src*/, size_t /*start*/, size_t length) override
#endif
{
s += length;
}

View File

@ -85,7 +85,11 @@ public:
throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Method tryInsert is not supported for ColumnUnique.");
}
#if !defined(ABORT_ON_LOGICAL_ERROR)
void insertRangeFrom(const IColumn &, size_t, size_t) override
#else
void doInsertRangeFrom(const IColumn &, size_t, size_t) override
#endif
{
throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Method insertRangeFrom is not supported for ColumnUnique.");
}

View File

@ -52,7 +52,11 @@ static ColumnPtr mockColumn(const DataTypePtr & type, size_t rows)
}
#if !defined(ABORT_ON_LOGICAL_ERROR)
static NO_INLINE void insertManyFrom(IColumn & dst, const IColumn & src)
#else
static NO_INLINE void doInsertManyFrom(IColumn & dst, const IColumn & src)
#endif
{
size_t size = src.size();
dst.insertManyFrom(src, size / 2, size);

View File

@ -38,10 +38,19 @@ namespace ErrorCodes
extern const int CANNOT_MREMAP;
}
void abortOnFailedAssertion(const String & description, void * const * trace, size_t trace_offset, size_t trace_size)
{
auto & logger = Poco::Logger::root();
LOG_FATAL(&logger, "Logical error: '{}'.", description);
if (trace)
LOG_FATAL(&logger, "Stack trace (when copying this message, always include the lines below):\n\n{}", StackTrace::toString(trace, trace_offset, trace_size));
abort();
}
void abortOnFailedAssertion(const String & description)
{
LOG_FATAL(&Poco::Logger::root(), "Logical error: '{}'.", description);
abort();
StackTrace st;
abortOnFailedAssertion(description, st.getFramePointers().data(), st.getOffset(), st.getSize());
}
bool terminate_on_any_exception = false;
@ -58,7 +67,7 @@ void handle_error_code(const std::string & msg, int code, bool remote, const Exc
#ifdef ABORT_ON_LOGICAL_ERROR
if (code == ErrorCodes::LOGICAL_ERROR)
{
abortOnFailedAssertion(msg);
abortOnFailedAssertion(msg, trace.data(), 0, trace.size());
}
#endif

View File

@ -25,8 +25,6 @@ namespace DB
class AtomicLogger;
[[noreturn]] void abortOnFailedAssertion(const String & description);
/// This flag can be set for testing purposes - to check that no exceptions are thrown.
extern bool terminate_on_any_exception;
@ -167,6 +165,8 @@ protected:
mutable std::vector<StackTrace::FramePointers> capture_thread_frame_pointers;
};
[[noreturn]] void abortOnFailedAssertion(const String & description, void * const * trace, size_t trace_offset, size_t trace_size);
[[noreturn]] void abortOnFailedAssertion(const String & description);
std::string getExceptionStackTraceString(const std::exception & e);
std::string getExceptionStackTraceString(std::exception_ptr e);

View File

@ -14,6 +14,7 @@
namespace DB
{
namespace ErrorCodes
{
extern const int CANNOT_ALLOCATE_MEMORY;

View File

@ -235,7 +235,7 @@ bool NamedCollectionFactory::loadIfNot(std::lock_guard<std::mutex> & lock)
loadFromConfig(context->getConfigRef(), lock);
loadFromSQL(lock);
if (metadata_storage->supportsPeriodicUpdate())
if (metadata_storage->isReplicated())
{
update_task = context->getSchedulePool().createTask("NamedCollectionsMetadataStorage", [this]{ updateFunc(); });
update_task->activate();
@ -357,6 +357,13 @@ void NamedCollectionFactory::reloadFromSQL()
add(std::move(collections), lock);
}
bool NamedCollectionFactory::usesReplicatedStorage()
{
std::lock_guard lock(mutex);
loadIfNot(lock);
return metadata_storage->isReplicated();
}
void NamedCollectionFactory::updateFunc()
{
LOG_TRACE(log, "Named collections background updating thread started");

View File

@ -34,6 +34,8 @@ public:
void updateFromSQL(const ASTAlterNamedCollectionQuery & query);
bool usesReplicatedStorage();
void loadIfNot();
void shutdown();

View File

@ -67,7 +67,7 @@ public:
virtual bool removeIfExists(const std::string & path) = 0;
virtual bool supportsPeriodicUpdate() const = 0;
virtual bool isReplicated() const = 0;
virtual bool waitUpdate(size_t /* timeout */) { return false; }
};
@ -89,7 +89,7 @@ public:
~LocalStorage() override = default;
bool supportsPeriodicUpdate() const override { return false; }
bool isReplicated() const override { return false; }
std::vector<std::string> list() const override
{
@ -221,7 +221,7 @@ public:
~ZooKeeperStorage() override = default;
bool supportsPeriodicUpdate() const override { return true; }
bool isReplicated() const override { return true; }
/// Return true if children changed.
bool waitUpdate(size_t timeout) override
@ -465,14 +465,14 @@ void NamedCollectionsMetadataStorage::writeCreateQuery(const ASTCreateNamedColle
storage->write(getFileName(query.collection_name), serializeAST(*normalized_query), replace);
}
bool NamedCollectionsMetadataStorage::supportsPeriodicUpdate() const
bool NamedCollectionsMetadataStorage::isReplicated() const
{
return storage->supportsPeriodicUpdate();
return storage->isReplicated();
}
bool NamedCollectionsMetadataStorage::waitUpdate()
{
if (!storage->supportsPeriodicUpdate())
if (!storage->isReplicated())
throw Exception(ErrorCodes::LOGICAL_ERROR, "Periodic updates are not supported");
const auto & config = Context::getGlobalContextInstance()->getConfigRef();

View File

@ -30,7 +30,7 @@ public:
/// Return true if update was made
bool waitUpdate();
bool supportsPeriodicUpdate() const;
bool isReplicated() const;
private:
class INamedCollectionsStorage;

View File

@ -28,7 +28,6 @@ namespace ErrorCodes
namespace ProfileEvents
{
extern const Event DistributedConnectionFailTry;
extern const Event DistributedConnectionFailAtAll;
extern const Event DistributedConnectionSkipReadOnlyReplica;
}
@ -285,7 +284,6 @@ PoolWithFailoverBase<TNestedPool>::getMany(
else
{
LOG_WARNING(log, "Connection failed at try №{}, reason: {}", (shuffled_pool.error_count + 1), fail_message);
ProfileEvents::increment(ProfileEvents::DistributedConnectionFailTry);
shuffled_pool.error_count = std::min(max_error_cap, shuffled_pool.error_count + 1);

View File

@ -568,6 +568,7 @@ The server successfully detected this situation and will download merged part fr
M(AggregationPreallocatedElementsInHashTables, "How many elements were preallocated in hash tables for aggregation.") \
M(AggregationHashTablesInitializedAsTwoLevel, "How many hash tables were inited as two-level for aggregation.") \
M(AggregationOptimizedEqualRangesOfKeys, "For how many blocks optimization of equal ranges of keys was applied") \
M(HashJoinPreallocatedElementsInHashTables, "How many elements were preallocated in hash tables for hash join.") \
\
M(MetadataFromKeeperCacheHit, "Number of times an object storage metadata request was answered from cache without making request to Keeper") \
M(MetadataFromKeeperCacheMiss, "Number of times an object storage metadata request had to be answered from Keeper") \

View File

@ -545,7 +545,7 @@ std::string StackTrace::toString() const
return toStringCached(frame_pointers, offset, size);
}
std::string StackTrace::toString(void ** frame_pointers_raw, size_t offset, size_t size)
std::string StackTrace::toString(void * const * frame_pointers_raw, size_t offset, size_t size)
{
__msan_unpoison(frame_pointers_raw, size * sizeof(*frame_pointers_raw));

View File

@ -59,7 +59,7 @@ public:
const FramePointers & getFramePointers() const { return frame_pointers; }
std::string toString() const;
static std::string toString(void ** frame_pointers, size_t offset, size_t size);
static std::string toString(void * const * frame_pointers, size_t offset, size_t size);
static void dropCache();
/// @param fatal - if true, will process inline frames (slower)

View File

@ -228,7 +228,6 @@ Pool::Entry Pool::tryGet()
for (auto connection_it = connections.cbegin(); connection_it != connections.cend();)
{
Connection * connection_ptr = *connection_it;
/// Fixme: There is a race condition here b/c we do not synchronize with Pool::Entry's copy-assignment operator
if (connection_ptr->ref_count == 0)
{
{

View File

@ -64,17 +64,6 @@ public:
decrementRefCount();
}
Entry & operator= (const Entry & src) /// NOLINT
{
pool = src.pool;
if (data)
decrementRefCount();
data = src.data;
if (data)
incrementRefCount();
return * this;
}
bool isNull() const
{
return data == nullptr;

View File

@ -13,13 +13,11 @@ mysqlxx::Pool::Entry getWithFailover(mysqlxx::Pool & connections_pool)
constexpr size_t max_tries = 3;
mysqlxx::Pool::Entry worker_connection;
for (size_t try_no = 1; try_no <= max_tries; ++try_no)
{
try
{
worker_connection = connections_pool.tryGet();
mysqlxx::Pool::Entry worker_connection = connections_pool.tryGet();
if (!worker_connection.isNull())
{

View File

@ -151,6 +151,7 @@ namespace DB
M(UInt64, global_profiler_real_time_period_ns, 0, "Period for real clock timer of global profiler (in nanoseconds). Set 0 value to turn off the real clock global profiler. Recommended value is at least 10000000 (100 times a second) for single queries or 1000000000 (once a second) for cluster-wide profiling.", 0) \
M(UInt64, global_profiler_cpu_time_period_ns, 0, "Period for CPU clock timer of global profiler (in nanoseconds). Set 0 value to turn off the CPU clock global profiler. Recommended value is at least 10000000 (100 times a second) for single queries or 1000000000 (once a second) for cluster-wide profiling.", 0) \
M(Bool, enable_azure_sdk_logging, false, "Enables logging from Azure sdk", 0) \
M(UInt64, max_entries_for_hash_table_stats, 10'000, "How many entries hash table statistics collected during aggregation is allowed to have", 0) \
M(String, merge_workload, "default", "Name of workload to be used to access resources for all merges (may be overridden by a merge tree setting)", 0) \
M(String, mutation_workload, "default", "Name of workload to be used to access resources for all mutations (may be overridden by a merge tree setting)", 0) \
M(Bool, prepare_system_log_tables_on_startup, false, "If true, ClickHouse creates all configured `system.*_log` tables before the startup. It can be helpful if some startup scripts depend on these tables.", 0) \

View File

@ -346,6 +346,7 @@ class IColumn;
\
M(Bool, ignore_on_cluster_for_replicated_udf_queries, false, "Ignore ON CLUSTER clause for replicated UDF management queries.", 0) \
M(Bool, ignore_on_cluster_for_replicated_access_entities_queries, false, "Ignore ON CLUSTER clause for replicated access entities management queries.", 0) \
M(Bool, ignore_on_cluster_for_replicated_named_collections_queries, false, "Ignore ON CLUSTER clause for replicated named collections management queries.", 0) \
/** Settings for testing hedged requests */ \
M(Milliseconds, sleep_in_send_tables_status_ms, 0, "Time to sleep in sending tables status response in TCPHandler", 0) \
M(Milliseconds, sleep_in_send_data_ms, 0, "Time to sleep in sending data in TCPHandler", 0) \
@ -680,9 +681,11 @@ class IColumn;
M(UInt64, insert_shard_id, 0, "If non zero, when insert into a distributed table, the data will be inserted into the shard `insert_shard_id` synchronously. Possible values range from 1 to `shards_number` of corresponding distributed table", 0) \
\
M(Bool, collect_hash_table_stats_during_aggregation, true, "Enable collecting hash table statistics to optimize memory allocation", 0) \
M(UInt64, max_entries_for_hash_table_stats, 10'000, "How many entries hash table statistics collected during aggregation is allowed to have", 0) \
M(UInt64, max_size_to_preallocate_for_aggregation, 100'000'000, "For how many elements it is allowed to preallocate space in all hash tables in total before aggregation", 0) \
\
M(Bool, collect_hash_table_stats_during_joins, true, "Enable collecting hash table statistics to optimize memory allocation", 0) \
M(UInt64, max_size_to_preallocate_for_joins, 100'000'000, "For how many elements it is allowed to preallocate space in all hash tables in total before join", 0) \
\
M(Bool, kafka_disable_num_consumers_limit, false, "Disable limit on kafka_num_consumers that depends on the number of available CPU cores", 0) \
M(Bool, enable_software_prefetch_in_aggregation, true, "Enable use of software prefetch in aggregation", 0) \
M(Bool, allow_aggregate_partitions_independently, false, "Enable independent aggregation of partitions on separate threads when partition key suits group by key. Beneficial when number of partitions close to number of cores and partitions have roughly the same size", 0) \
@ -1012,6 +1015,7 @@ class IColumn;
MAKE_DEPRECATED_BY_SERVER_CONFIG(M, UInt64, async_insert_threads, 16) \
MAKE_DEPRECATED_BY_SERVER_CONFIG(M, UInt64, max_replicated_fetches_network_bandwidth_for_server, 0) \
MAKE_DEPRECATED_BY_SERVER_CONFIG(M, UInt64, max_replicated_sends_network_bandwidth_for_server, 0) \
MAKE_DEPRECATED_BY_SERVER_CONFIG(M, UInt64, max_entries_for_hash_table_stats, 10'000) \
/* ---- */ \
MAKE_OBSOLETE(M, DefaultDatabaseEngine, default_database_engine, DefaultDatabaseEngine::Atomic) \
MAKE_OBSOLETE(M, UInt64, max_pipeline_depth, 0) \

View File

@ -67,6 +67,8 @@ static std::initializer_list<std::pair<ClickHouseVersion, SettingsChangesHistory
{"enable_named_columns_in_function_tuple", false, true, "Generate named tuples in function tuple() when all names are unique and can be treated as unquoted identifiers."},
{"input_format_json_ignore_key_case", false, false, "Ignore json key case while read json field from string."},
{"optimize_trivial_insert_select", true, false, "The optimization does not make sense in many cases."},
{"collect_hash_table_stats_during_joins", false, true, "New setting."},
{"max_size_to_preallocate_for_joins", 0, 100'000'000, "New setting."},
{"input_format_orc_read_use_writer_time_zone", false, false, "Whether use the writer's time zone in ORC stripe for ORC row reader, the default ORC row reader's time zone is GMT."},
{"lightweight_mutation_projection_mode", "throw", "throw", "When lightweight delete happens on a table with projection(s), the possible operations include throw the exception as projection exists, or drop all projection related to this table then do lightweight delete."},
{"database_replicated_allow_heavy_create", true, false, "Long-running DDL queries (CREATE AS SELECT and POPULATE) for Replicated database engine was forbidden"},
@ -74,6 +76,7 @@ static std::initializer_list<std::pair<ClickHouseVersion, SettingsChangesHistory
{"azure_sdk_max_retries", 10, 10, "Maximum number of retries in azure sdk"},
{"azure_sdk_retry_initial_backoff_ms", 10, 10, "Minimal backoff between retries in azure sdk"},
{"azure_sdk_retry_max_backoff_ms", 1000, 1000, "Maximal backoff between retries in azure sdk"},
{"ignore_on_cluster_for_replicated_named_collections_queries", false, false, "Ignore ON CLUSTER clause for replicated named collections management queries."},
{"postgresql_connection_attempt_timeout", 2, 2, "Allow to control 'connect_timeout' parameter of PostgreSQL connection."},
{"postgresql_connection_pool_retries", 2, 2, "Allow to control the number of retries in PostgreSQL connection pool."}
}},

View File

@ -11,6 +11,7 @@
#include <Parsers/ASTFunction.h>
#include <Parsers/ASTIdentifier.h>
#include <Parsers/ASTLiteral.h>
#include <Parsers/ASTSubquery.h>
#include <Parsers/ASTSelectWithUnionQuery.h>
#include <Parsers/ASTTTLElement.h>
#include <Poco/String.h>
@ -211,6 +212,13 @@ void DDLLoadingDependencyVisitor::extractTableNameFromArgument(const ASTFunction
qualified_name.database = table_identifier->getDatabaseName();
qualified_name.table = table_identifier->shortName();
}
else if (arg->as<ASTSubquery>())
{
/// Allow IN subquery.
/// Do not add tables from the subquery into dependencies,
/// because CREATE will succeed anyway.
return;
}
else
{
assert(false);

View File

@ -107,12 +107,24 @@ void DatabaseAtomic::attachTable(ContextPtr /* context_ */, const String & name,
StoragePtr DatabaseAtomic::detachTable(ContextPtr /* context */, const String & name)
{
// it is important to call the destructors of not_in_use without
// locked mutex to avoid potential deadlock.
DetachedTables not_in_use;
std::lock_guard lock(mutex);
auto table = DatabaseOrdinary::detachTableUnlocked(name);
table_name_to_path.erase(name);
detached_tables.emplace(table->getStorageID().uuid, table);
not_in_use = cleanupDetachedTables();
StoragePtr table;
{
std::lock_guard lock(mutex);
table = DatabaseOrdinary::detachTableUnlocked(name);
table_name_to_path.erase(name);
detached_tables.emplace(table->getStorageID().uuid, table);
not_in_use = cleanupDetachedTables();
}
if (!not_in_use.empty())
{
not_in_use.clear();
LOG_DEBUG(log, "Finished removing not used detached tables");
}
return table;
}

View File

@ -29,6 +29,7 @@
#include <Common/randomNumber.h>
#include <Common/setThreadName.h>
#include <base/sleep.h>
#include <base/scope_guard.h>
#include <boost/algorithm/string/split.hpp>
#include <boost/algorithm/string/trim.hpp>
#include <Parsers/CommonParsers.h>
@ -532,13 +533,17 @@ static inline void dumpDataForTables(
bool MaterializedMySQLSyncThread::prepareSynchronized(MaterializeMetadata & metadata)
{
bool opened_transaction = false;
mysqlxx::PoolWithFailover::Entry connection;
while (!isCancelled())
{
try
{
connection = pool.tryGet();
mysqlxx::PoolWithFailover::Entry connection = pool.tryGet();
SCOPE_EXIT({
if (opened_transaction)
connection->query("ROLLBACK").execute();
});
if (connection.isNull())
{
if (settings->max_wait_time_when_mysql_unavailable < 0)
@ -602,9 +607,6 @@ bool MaterializedMySQLSyncThread::prepareSynchronized(MaterializeMetadata & meta
{
tryLogCurrentException(log);
if (opened_transaction)
connection->query("ROLLBACK").execute();
if (settings->max_wait_time_when_mysql_unavailable < 0)
throw;

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,41 @@
#pragma once
#include <DataTypes/IDataType.h>
#include <Columns/IColumn.h>
#include <Formats/FormatSettings.h>
namespace DB
{
struct JSONExtractInsertSettings
{
/// If false, JSON boolean values won't be inserted into columns with integer types
/// It's used in JSONExtractInt64/JSONExtractUInt64/... functions.
bool convert_bool_to_integer = true;
/// If true, when complex type like Array/Map has both valid and invalid elements,
/// the default value will be inserted on invalid elements.
/// For example, if we have [1, "hello", 2] and type Array(UInt32),
/// we will insert [1, 0, 2] in the column. Used in all JSONExtract functions.
bool insert_default_on_invalid_elements_in_complex_types = false;
};
template <typename JSONParser>
class JSONExtractTreeNode
{
public:
JSONExtractTreeNode() = default;
virtual ~JSONExtractTreeNode() = default;
virtual bool insertResultToColumn(IColumn &, const typename JSONParser::Element &, const JSONExtractInsertSettings & insert_setting, const FormatSettings & format_settings, String & error) const = 0;
};
/// Build a tree for insertion JSON element into a column with provided data type.
template <typename JSONParser>
std::unique_ptr<JSONExtractTreeNode<JSONParser>> buildJSONExtractTree(const DataTypePtr & type, const char * source_for_exception_message);
template <typename JSONParser>
void jsonElementToString(const typename JSONParser::Element & element, WriteBuffer & buf, const FormatSettings & format_settings);
template <typename JSONParser, typename NumberType>
bool tryGetNumericValueFromJSONElement(NumberType & value, const typename JSONParser::Element & element, bool convert_bool_to_integer, String & error);
}

View File

@ -225,19 +225,6 @@ namespace
Paths paths;
};
bool checkIfTypesAreEqual(const DataTypes & types)
{
if (types.empty())
return true;
for (size_t i = 1; i < types.size(); ++i)
{
if (!types[0]->equals(*types[i]))
return false;
}
return true;
}
void updateTypeIndexes(DataTypes & data_types, TypeIndexesSet & type_indexes)
{
type_indexes.clear();
@ -272,24 +259,31 @@ namespace
type_indexes.erase(TypeIndex::Nothing);
}
/// If we have both Int64 and UInt64, convert all Int64 to UInt64,
/// If we have both Int64 and UInt64, convert all not-negative Int64 to UInt64,
/// because UInt64 is inferred only in case of Int64 overflow.
void transformIntegers(DataTypes & data_types, TypeIndexesSet & type_indexes)
void transformIntegers(DataTypes & data_types, TypeIndexesSet & type_indexes, JSONInferenceInfo * json_info)
{
if (!type_indexes.contains(TypeIndex::Int64) || !type_indexes.contains(TypeIndex::UInt64))
return;
bool have_negative_integers = false;
for (auto & type : data_types)
{
if (WhichDataType(type).isInt64())
type = std::make_shared<DataTypeUInt64>();
{
bool is_negative = json_info && json_info->negative_integers.contains(type.get());
have_negative_integers |= is_negative;
if (!is_negative)
type = std::make_shared<DataTypeUInt64>();
}
}
type_indexes.erase(TypeIndex::Int64);
if (!have_negative_integers)
type_indexes.erase(TypeIndex::Int64);
}
/// If we have both Int64 and Float64 types, convert all Int64 to Float64.
void transformIntegersAndFloatsToFloats(DataTypes & data_types, TypeIndexesSet & type_indexes)
void transformIntegersAndFloatsToFloats(DataTypes & data_types, TypeIndexesSet & type_indexes, JSONInferenceInfo * json_info)
{
bool have_floats = type_indexes.contains(TypeIndex::Float64);
bool have_integers = type_indexes.contains(TypeIndex::Int64) || type_indexes.contains(TypeIndex::UInt64);
@ -300,7 +294,12 @@ namespace
{
WhichDataType which(type);
if (which.isInt64() || which.isUInt64())
type = std::make_shared<DataTypeFloat64>();
{
auto new_type = std::make_shared<DataTypeFloat64>();
if (json_info && json_info->numbers_parsed_from_json_strings.erase(type.get()))
json_info->numbers_parsed_from_json_strings.insert(new_type.get());
type = new_type;
}
}
type_indexes.erase(TypeIndex::Int64);
@ -635,9 +634,9 @@ namespace
if (settings.try_infer_integers)
{
/// Transform Int64 to UInt64 if needed.
transformIntegers(data_types, type_indexes);
transformIntegers(data_types, type_indexes, json_info);
/// Transform integers to floats if needed.
transformIntegersAndFloatsToFloats(data_types, type_indexes);
transformIntegersAndFloatsToFloats(data_types, type_indexes, json_info);
}
/// Transform Date to DateTime or both to String if needed.
@ -887,7 +886,7 @@ namespace
}
template <bool is_json>
DataTypePtr tryInferNumber(ReadBuffer & buf, const FormatSettings & settings)
DataTypePtr tryInferNumber(ReadBuffer & buf, const FormatSettings & settings, JSONInferenceInfo * json_info)
{
if (buf.eof())
return nullptr;
@ -911,7 +910,12 @@ namespace
Int64 tmp_int;
buf.position() = number_start;
if (tryReadIntText(tmp_int, buf))
return std::make_shared<DataTypeInt64>();
{
auto type = std::make_shared<DataTypeInt64>();
if (json_info && tmp_int < 0)
json_info->negative_integers.insert(type.get());
return type;
}
/// In case of Int64 overflow we can try to infer UInt64.
UInt64 tmp_uint;
@ -934,7 +938,12 @@ namespace
Int64 tmp_int;
if (tryReadIntText(tmp_int, peekable_buf))
return std::make_shared<DataTypeInt64>();
{
auto type = std::make_shared<DataTypeInt64>();
if (json_info && tmp_int < 0)
json_info->negative_integers.insert(type.get());
return type;
}
peekable_buf.rollbackToCheckpoint(/* drop= */ true);
/// In case of Int64 overflow we can try to infer UInt64.
@ -952,7 +961,7 @@ namespace
}
template <bool is_json>
DataTypePtr tryInferNumberFromStringImpl(std::string_view field, const FormatSettings & settings)
DataTypePtr tryInferNumberFromStringImpl(std::string_view field, const FormatSettings & settings, JSONInferenceInfo * json_inference_info = nullptr)
{
ReadBufferFromString buf(field);
@ -960,7 +969,12 @@ namespace
{
Int64 tmp_int;
if (tryReadIntText(tmp_int, buf) && buf.eof())
return std::make_shared<DataTypeInt64>();
{
auto type = std::make_shared<DataTypeInt64>();
if (json_inference_info && tmp_int < 0)
json_inference_info->negative_integers.insert(type.get());
return type;
}
/// We can safely get back to the start of buffer, because we read from a string and we didn't reach eof.
buf.position() = buf.buffer().begin();
@ -1011,7 +1025,7 @@ namespace
{
if (settings.json.try_infer_numbers_from_strings)
{
if (auto number_type = tryInferNumberFromStringImpl<true>(field, settings))
if (auto number_type = tryInferNumberFromStringImpl<true>(field, settings, json_info))
{
json_info->numbers_parsed_from_json_strings.insert(number_type.get());
return number_type;
@ -1254,10 +1268,23 @@ namespace
}
/// Number
return tryInferNumber<is_json>(buf, settings);
return tryInferNumber<is_json>(buf, settings, json_info);
}
}
bool checkIfTypesAreEqual(const DataTypes & types)
{
if (types.empty())
return true;
for (size_t i = 1; i < types.size(); ++i)
{
if (!types[0]->equals(*types[i]))
return false;
}
return true;
}
void transformInferredTypesIfNeeded(DataTypePtr & first, DataTypePtr & second, const FormatSettings & settings)
{
DataTypes types = {first, second};
@ -1275,6 +1302,11 @@ void transformInferredJSONTypesIfNeeded(
second = std::move(types[1]);
}
void transformInferredJSONTypesIfNeeded(DataTypes & types, const FormatSettings & settings, JSONInferenceInfo * json_info)
{
transformInferredTypesIfNeededImpl<true>(types, settings, json_info);
}
void transformInferredJSONTypesFromDifferentFilesIfNeeded(DataTypePtr & first, DataTypePtr & second, const FormatSettings & settings)
{
JSONInferenceInfo json_info;
@ -1396,6 +1428,12 @@ DataTypePtr tryInferNumberFromString(std::string_view field, const FormatSetting
return tryInferNumberFromStringImpl<false>(field, settings);
}
DataTypePtr tryInferJSONNumberFromString(std::string_view field, const FormatSettings & settings, JSONInferenceInfo * json_info)
{
return tryInferNumberFromStringImpl<false>(field, settings, json_info);
}
DataTypePtr tryInferDateOrDateTimeFromString(std::string_view field, const FormatSettings & settings)
{
if (settings.try_infer_dates && tryInferDate(field))

View File

@ -2,6 +2,7 @@
#include <DataTypes/IDataType.h>
#include <IO/ReadBuffer.h>
#include <Formats/FormatSettings.h>
#include <vector>
@ -18,6 +19,11 @@ struct JSONInferenceInfo
/// We store numbers that were parsed from strings.
/// It's used in types transformation to change such numbers back to string if needed.
std::unordered_set<const IDataType *> numbers_parsed_from_json_strings;
/// Store integer types that were inferred from negative numbers.
/// It's used to determine common type for Int64 and UInt64
/// TODO: check it not only in JSON formats.
std::unordered_set<const IDataType *> negative_integers;
/// Indicates if currently we are inferring type for Map/Object key.
bool is_object_key = false;
/// When we transform types for the same column from different files
@ -48,6 +54,7 @@ DataTypePtr tryInferDateOrDateTimeFromString(std::string_view field, const Forma
/// Try to parse a number value from a string. By default, it tries to parse Float64,
/// but if setting try_infer_integers is enabled, it also tries to parse Int64.
DataTypePtr tryInferNumberFromString(std::string_view field, const FormatSettings & settings);
DataTypePtr tryInferJSONNumberFromString(std::string_view field, const FormatSettings & settings, JSONInferenceInfo * json_info);
/// It takes two types inferred for the same column and tries to transform them to a common type if possible.
/// It's also used when we try to infer some not ordinary types from another types.
@ -77,6 +84,7 @@ void transformInferredTypesIfNeeded(DataTypePtr & first, DataTypePtr & second, c
/// Example 2:
/// We merge DataTypeJSONPaths types to a single DataTypeJSONPaths type with union of all JSON paths.
void transformInferredJSONTypesIfNeeded(DataTypePtr & first, DataTypePtr & second, const FormatSettings & settings, JSONInferenceInfo * json_info);
void transformInferredJSONTypesIfNeeded(DataTypes & types, const FormatSettings & settings, JSONInferenceInfo * json_info);
/// Make final transform for types inferred in JSON format. It does 3 types of transformation:
/// 1) Checks if type is unnamed Tuple(...), tries to transform nested types to find a common type for them and if all nested types
@ -107,4 +115,6 @@ NamesAndTypesList getNamesAndRecursivelyNullableTypes(const Block & header);
/// Check if type contains Nothing, like Array(Tuple(Nullable(Nothing), String))
bool checkIfTypeIsComplete(const DataTypePtr & type);
bool checkIfTypesAreEqual(const DataTypes & types);
}

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -26,7 +26,6 @@
#include <Interpreters/TemporaryDataOnDisk.h>
#include <Parsers/ASTSelectQuery.h>
#include <base/sort.h>
#include <Common/CacheBase.h>
#include <Common/CurrentMetrics.h>
#include <Common/CurrentThread.h>
#include <Common/JSONBuilder.h>
@ -78,115 +77,6 @@ namespace ErrorCodes
namespace
{
/** Collects observed HashMap-s sizes to avoid redundant intermediate resizes.
*/
class HashTablesStatistics
{
public:
struct Entry
{
size_t sum_of_sizes; // used to determine if it's better to convert aggregation to two-level from the beginning
size_t median_size; // roughly the size we're going to preallocate on each thread
};
using Cache = DB::CacheBase<UInt64, Entry>;
using CachePtr = std::shared_ptr<Cache>;
using Params = DB::Aggregator::Params::StatsCollectingParams;
/// Collection and use of the statistics should be enabled.
std::optional<Entry> getSizeHint(const Params & params)
{
if (!params.isCollectionAndUseEnabled())
throw DB::Exception(DB::ErrorCodes::LOGICAL_ERROR, "Collection and use of the statistics should be enabled.");
std::lock_guard lock(mutex);
const auto cache = getHashTableStatsCache(params, lock);
if (const auto hint = cache->get(params.key))
{
LOG_TRACE(
getLogger("Aggregator"),
"An entry for key={} found in cache: sum_of_sizes={}, median_size={}",
params.key,
hint->sum_of_sizes,
hint->median_size);
return *hint;
}
return std::nullopt;
}
/// Collection and use of the statistics should be enabled.
void update(size_t sum_of_sizes, size_t median_size, const Params & params)
{
if (!params.isCollectionAndUseEnabled())
throw DB::Exception(DB::ErrorCodes::LOGICAL_ERROR, "Collection and use of the statistics should be enabled.");
std::lock_guard lock(mutex);
const auto cache = getHashTableStatsCache(params, lock);
const auto hint = cache->get(params.key);
// We'll maintain the maximum among all the observed values until the next prediction turns out to be too wrong.
if (!hint || sum_of_sizes < hint->sum_of_sizes / 2 || hint->sum_of_sizes < sum_of_sizes || median_size < hint->median_size / 2
|| hint->median_size < median_size)
{
LOG_TRACE(
getLogger("Aggregator"),
"Statistics updated for key={}: new sum_of_sizes={}, median_size={}",
params.key,
sum_of_sizes,
median_size);
cache->set(params.key, std::make_shared<Entry>(Entry{.sum_of_sizes = sum_of_sizes, .median_size = median_size}));
}
}
std::optional<DB::HashTablesCacheStatistics> getCacheStats() const
{
std::lock_guard lock(mutex);
if (hash_table_stats)
{
size_t hits = 0, misses = 0;
hash_table_stats->getStats(hits, misses);
return DB::HashTablesCacheStatistics{.entries = hash_table_stats->count(), .hits = hits, .misses = misses};
}
return std::nullopt;
}
static size_t calculateCacheKey(const DB::ASTPtr & select_query)
{
if (!select_query)
throw DB::Exception(DB::ErrorCodes::LOGICAL_ERROR, "Query ptr cannot be null");
const auto & select = select_query->as<DB::ASTSelectQuery &>();
// It may happen in some corner cases like `select 1 as num group by num`.
if (!select.tables())
return 0;
SipHash hash;
hash.update(select.tables()->getTreeHash(/*ignore_aliases=*/ true));
if (const auto where = select.where())
hash.update(where->getTreeHash(/*ignore_aliases=*/ true));
if (const auto group_by = select.groupBy())
hash.update(group_by->getTreeHash(/*ignore_aliases=*/ true));
return hash.get64();
}
private:
CachePtr getHashTableStatsCache(const Params & params, const std::lock_guard<std::mutex> &)
{
if (!hash_table_stats || hash_table_stats->maxSizeInBytes() != params.max_entries_for_hash_table_stats)
hash_table_stats = std::make_shared<Cache>(params.max_entries_for_hash_table_stats);
return hash_table_stats;
}
mutable std::mutex mutex;
CachePtr hash_table_stats;
};
HashTablesStatistics & getHashTablesStatistics()
{
static HashTablesStatistics hash_tables_stats;
return hash_tables_stats;
}
bool worthConvertToTwoLevel(
size_t group_by_two_level_threshold, size_t result_size, size_t group_by_two_level_threshold_bytes, auto result_size_bytes)
{
@ -215,49 +105,29 @@ void initDataVariantsWithSizeHint(
DB::AggregatedDataVariants & result, DB::AggregatedDataVariants::Type method_chosen, const DB::Aggregator::Params & params)
{
const auto & stats_collecting_params = params.stats_collecting_params;
if (stats_collecting_params.isCollectionAndUseEnabled())
const auto max_threads = params.group_by_two_level_threshold != 0 ? std::max(params.max_threads, 1ul) : 1;
if (auto hint = getSizeHint(stats_collecting_params, /*tables_cnt=*/max_threads))
{
if (auto hint = getHashTablesStatistics().getSizeHint(stats_collecting_params))
{
const auto max_threads = params.group_by_two_level_threshold != 0 ? std::max(params.max_threads, 1ul) : 1;
const auto lower_limit = hint->sum_of_sizes / max_threads;
const auto upper_limit = stats_collecting_params.max_size_to_preallocate_for_aggregation / max_threads;
if (hint->median_size > upper_limit)
{
/// Since we cannot afford to preallocate as much as we want, we will likely need to do resize anyway.
/// But we will also work with the big (i.e. not so cache friendly) HT from the beginning which may result in a slight slowdown.
/// So let's just do nothing.
LOG_TRACE(
getLogger("Aggregator"),
"No space were preallocated in hash tables because 'max_size_to_preallocate_for_aggregation' has too small value: {}, "
"should be at least {}",
stats_collecting_params.max_size_to_preallocate_for_aggregation,
hint->median_size * max_threads);
}
/// https://github.com/ClickHouse/ClickHouse/issues/44402#issuecomment-1359920703
else if ((max_threads > 1 && hint->sum_of_sizes > 100'000) || hint->sum_of_sizes > 500'000)
{
const auto adjusted = std::max(lower_limit, hint->median_size);
if (worthConvertToTwoLevel(
params.group_by_two_level_threshold,
hint->sum_of_sizes,
/*group_by_two_level_threshold_bytes*/ 0,
/*result_size_bytes*/ 0))
method_chosen = convertToTwoLevelTypeIfPossible(method_chosen);
result.init(method_chosen, adjusted);
ProfileEvents::increment(ProfileEvents::AggregationHashTablesInitializedAsTwoLevel, result.isTwoLevel());
return;
}
}
if (worthConvertToTwoLevel(
params.group_by_two_level_threshold,
hint->sum_of_sizes,
/*group_by_two_level_threshold_bytes*/ 0,
/*result_size_bytes*/ 0))
method_chosen = convertToTwoLevelTypeIfPossible(method_chosen);
result.init(method_chosen, hint->median_size);
}
result.init(method_chosen);
else
{
result.init(method_chosen);
}
ProfileEvents::increment(ProfileEvents::AggregationHashTablesInitializedAsTwoLevel, result.isTwoLevel());
}
/// Collection and use of the statistics should be enabled.
void updateStatistics(const DB::ManyAggregatedDataVariants & data_variants, const DB::Aggregator::Params::StatsCollectingParams & params)
void updateStatistics(const DB::ManyAggregatedDataVariants & data_variants, const DB::StatsCollectingParams & params)
{
if (!params.isCollectionAndUseEnabled())
throw DB::Exception(DB::ErrorCodes::LOGICAL_ERROR, "Collection and use of the statistics should be enabled.");
return;
std::vector<size_t> sizes(data_variants.size());
for (size_t i = 0; i < data_variants.size(); ++i)
@ -265,7 +135,7 @@ void updateStatistics(const DB::ManyAggregatedDataVariants & data_variants, cons
const auto median_size = sizes.begin() + sizes.size() / 2; // not precisely though...
std::nth_element(sizes.begin(), median_size, sizes.end());
const auto sum_of_sizes = std::accumulate(sizes.begin(), sizes.end(), 0ull);
getHashTablesStatistics().update(sum_of_sizes, *median_size, params);
DB::getHashTablesStatistics().update(sum_of_sizes, *median_size, params);
}
DB::ColumnNumbers calculateKeysPositions(const DB::Block & header, const DB::Aggregator::Params & params)
@ -300,24 +170,6 @@ size_t getMinBytesForPrefetch()
namespace DB
{
std::optional<HashTablesCacheStatistics> getHashTablesCacheStatistics()
{
return getHashTablesStatistics().getCacheStats();
}
Aggregator::Params::StatsCollectingParams::StatsCollectingParams() = default;
Aggregator::Params::StatsCollectingParams::StatsCollectingParams(
const ASTPtr & select_query_,
bool collect_hash_table_stats_during_aggregation_,
size_t max_entries_for_hash_table_stats_,
size_t max_size_to_preallocate_for_aggregation_)
: key(collect_hash_table_stats_during_aggregation_ ? HashTablesStatistics::calculateCacheKey(select_query_) : 0)
, max_entries_for_hash_table_stats(max_entries_for_hash_table_stats_)
, max_size_to_preallocate_for_aggregation(max_size_to_preallocate_for_aggregation_)
{
}
Block Aggregator::getHeader(bool final) const
{
return params.getHeader(header, final);
@ -2783,8 +2635,7 @@ ManyAggregatedDataVariants Aggregator::prepareVariantsToMerge(ManyAggregatedData
LOG_TRACE(log, "Merging aggregated data");
if (params.stats_collecting_params.isCollectionAndUseEnabled())
updateStatistics(data_variants, params.stats_collecting_params);
updateStatistics(data_variants, params.stats_collecting_params);
ManyAggregatedDataVariants non_empty_data;
non_empty_data.reserve(data_variants.size());
@ -3486,4 +3337,23 @@ void Aggregator::destroyAllAggregateStates(AggregatedDataVariants & result) cons
throw Exception(ErrorCodes::UNKNOWN_AGGREGATED_DATA_VARIANT, "Unknown aggregated data variant.");
}
UInt64 calculateCacheKey(const DB::ASTPtr & select_query)
{
if (!select_query)
throw DB::Exception(DB::ErrorCodes::LOGICAL_ERROR, "Query ptr cannot be null");
const auto & select = select_query->as<DB::ASTSelectQuery &>();
// It may happen in some corner cases like `select 1 as num group by num`.
if (!select.tables())
return 0;
SipHash hash;
hash.update(select.tables()->getTreeHash(/*ignore_aliases=*/true));
if (const auto where = select.where())
hash.update(where->getTreeHash(/*ignore_aliases=*/true));
if (const auto group_by = select.groupBy())
hash.update(group_by->getTreeHash(/*ignore_aliases=*/true));
return hash.get64();
}
}

Some files were not shown because too many files have changed in this diff Show More