Merge branch 'master' into fix-async-loader-dtor-race

This commit is contained in:
serxa 2024-07-12 09:36:18 +00:00
commit 8ffe1215f4
102 changed files with 1908 additions and 512 deletions

2
contrib/azure vendored

@ -1 +1 @@
Subproject commit 92c94d7f37a43cc8fc4d466884a95f610c0593bf
Subproject commit ea3e19a7be08519134c643177d56c7484dfec884

View File

@ -179,12 +179,19 @@ endif ()
target_compile_definitions(_jemalloc PRIVATE -DJEMALLOC_PROF=1)
# jemalloc provides support for two different libunwind flavors: the original HP libunwind and the one coming with gcc / g++ / libstdc++.
# The latter is identified by `JEMALLOC_PROF_LIBGCC` and uses `_Unwind_Backtrace` method instead of `unw_backtrace`.
# At the time ClickHouse uses LLVM libunwind which follows libgcc's way of backtracking.
# jemalloc provides support two unwind flavors:
# - JEMALLOC_PROF_LIBUNWIND - unw_backtrace() - gnu libunwind (compatible with llvm libunwind)
# - JEMALLOC_PROF_LIBGCC - _Unwind_Backtrace() - the original HP libunwind and the one coming with gcc / g++ / libstdc++.
#
# ClickHouse has to provide `unw_backtrace` method by the means of [commit 8e2b31e](https://github.com/ClickHouse/libunwind/commit/8e2b31e766dd502f6df74909e04a7dbdf5182eb1).
target_compile_definitions (_jemalloc PRIVATE -DJEMALLOC_PROF_LIBGCC=1)
# But for JEMALLOC_PROF_LIBGCC it also calls _Unwind_Backtrace() during
# bootstraping of jemalloc, which may lead to deadlock, if the dlsym will do
# allocations somewhere (like glibc does prio 2.34, see [1]).
#
# [1]: https://sourceware.org/git/?p=glibc.git;a=commit;h=fada9018199c21c469ff0e731ef75c6020074ac9
#
# And since ClickHouse unwind already supports unw_backtrace() we can safely
# switch to it to avoid this deadlock.
target_compile_definitions (_jemalloc PRIVATE -DJEMALLOC_PROF_LIBUNWIND=1)
target_link_libraries (_jemalloc PRIVATE unwind)
# for RTLD_NEXT

2
contrib/pocketfft vendored

@ -1 +1 @@
Subproject commit 9efd4da52cf8d28d14531d14e43ad9d913807546
Subproject commit f4c1aa8aa9ce79ad39e80f2c9c41b92ead90fda3

2
contrib/rocksdb vendored

@ -1 +1 @@
Subproject commit 078fa5638690004e1f744076d1bdcc4e93767304
Subproject commit be366233921293bd07a84dc4ea6991858665f202

View File

@ -5,20 +5,13 @@ if (NOT ENABLE_ROCKSDB)
return()
endif()
## this file is extracted from `contrib/rocksdb/CMakeLists.txt`
set(ROCKSDB_SOURCE_DIR "${ClickHouse_SOURCE_DIR}/contrib/rocksdb")
list(APPEND CMAKE_MODULE_PATH "${ROCKSDB_SOURCE_DIR}/cmake/modules/")
set(PORTABLE ON)
## always disable jemalloc for rocksdb by default
## because it introduces non-standard jemalloc APIs
# Always disable jemalloc for rocksdb by default because it introduces non-standard jemalloc APIs
option(WITH_JEMALLOC "build with JeMalloc" OFF)
set(USE_SNAPPY OFF)
if (TARGET ch_contrib::snappy)
set(USE_SNAPPY ON)
endif()
option(WITH_SNAPPY "build with SNAPPY" ${USE_SNAPPY})
## lz4, zlib, zstd is enabled in ClickHouse by default
option(WITH_LIBURING "build with liburing" OFF) # TODO could try to enable this conditionally, depending on ClickHouse's ENABLE_LIBURING
# ClickHouse cannot be compiled without snappy, lz4, zlib, zstd
option(WITH_SNAPPY "build with SNAPPY" ON)
option(WITH_LZ4 "build with lz4" ON)
option(WITH_ZLIB "build with zlib" ON)
option(WITH_ZSTD "build with zstd" ON)
@ -26,78 +19,46 @@ option(WITH_ZSTD "build with zstd" ON)
# third-party/folly is only validated to work on Linux and Windows for now.
# So only turn it on there by default.
if(CMAKE_SYSTEM_NAME MATCHES "Linux|Windows")
if(MSVC AND MSVC_VERSION LESS 1910)
# Folly does not compile with MSVC older than VS2017
option(WITH_FOLLY_DISTRIBUTED_MUTEX "build with folly::DistributedMutex" OFF)
else()
option(WITH_FOLLY_DISTRIBUTED_MUTEX "build with folly::DistributedMutex" ON)
endif()
option(WITH_FOLLY_DISTRIBUTED_MUTEX "build with folly::DistributedMutex" ON)
else()
option(WITH_FOLLY_DISTRIBUTED_MUTEX "build with folly::DistributedMutex" OFF)
endif()
if( NOT DEFINED CMAKE_CXX_STANDARD )
set(CMAKE_CXX_STANDARD 11)
if(WITH_SNAPPY)
add_definitions(-DSNAPPY)
list(APPEND THIRDPARTY_LIBS ch_contrib::snappy)
endif()
if(MSVC)
option(WITH_XPRESS "build with windows built in compression" OFF)
include("${ROCKSDB_SOURCE_DIR}/thirdparty.inc")
else()
if(CMAKE_SYSTEM_NAME MATCHES "FreeBSD" AND NOT CMAKE_SYSTEM_NAME MATCHES "kFreeBSD")
# FreeBSD has jemalloc as default malloc
# but it does not have all the jemalloc files in include/...
set(WITH_JEMALLOC ON)
else()
if(WITH_JEMALLOC AND TARGET ch_contrib::jemalloc)
add_definitions(-DROCKSDB_JEMALLOC -DJEMALLOC_NO_DEMANGLE)
list(APPEND THIRDPARTY_LIBS ch_contrib::jemalloc)
endif()
endif()
if(WITH_SNAPPY)
add_definitions(-DSNAPPY)
list(APPEND THIRDPARTY_LIBS ch_contrib::snappy)
endif()
if(WITH_ZLIB)
add_definitions(-DZLIB)
list(APPEND THIRDPARTY_LIBS ch_contrib::zlib)
endif()
if(WITH_LZ4)
add_definitions(-DLZ4)
list(APPEND THIRDPARTY_LIBS ch_contrib::lz4)
endif()
if(WITH_ZSTD)
add_definitions(-DZSTD)
list(APPEND THIRDPARTY_LIBS ch_contrib::zstd)
endif()
if(WITH_ZLIB)
add_definitions(-DZLIB)
list(APPEND THIRDPARTY_LIBS ch_contrib::zlib)
endif()
if(CMAKE_SYSTEM_PROCESSOR MATCHES "^(powerpc|ppc)64")
if(POWER9)
set(HAS_POWER9 1)
set(HAS_ALTIVEC 1)
else()
set(HAS_POWER8 1)
set(HAS_ALTIVEC 1)
endif(POWER9)
endif(CMAKE_SYSTEM_PROCESSOR MATCHES "^(powerpc|ppc)64")
if(WITH_LZ4)
add_definitions(-DLZ4)
list(APPEND THIRDPARTY_LIBS ch_contrib::lz4)
endif()
if(CMAKE_SYSTEM_PROCESSOR MATCHES "aarch64|AARCH64|arm64|ARM64")
set(HAS_ARMV8_CRC 1)
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -march=armv8-a+crc+crypto -Wno-unused-function")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -march=armv8-a+crc+crypto -Wno-unused-function")
endif(CMAKE_SYSTEM_PROCESSOR MATCHES "aarch64|AARCH64|arm64|ARM64")
if(WITH_ZSTD)
add_definitions(-DZSTD)
list(APPEND THIRDPARTY_LIBS ch_contrib::zstd)
endif()
option(PORTABLE "build a portable binary" ON)
if(ENABLE_AVX2 AND ENABLE_PCLMULQDQ)
if(ENABLE_SSE42 AND ENABLE_PCLMULQDQ)
add_definitions(-DHAVE_SSE42)
add_definitions(-DHAVE_PCLMUL)
endif()
if(CMAKE_SYSTEM_PROCESSOR MATCHES "arm64|aarch64|AARCH64")
set (HAS_ARMV8_CRC 1)
# the original build descriptions set specific flags for ARM. These flags are already subsumed by ClickHouse's general
# ARM flags, see cmake/cpu_features.cmake
# set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -march=armv8-a+crc+crypto -Wno-unused-function")
# set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -march=armv8-a+crc+crypto -Wno-unused-function")
endif()
set (HAVE_THREAD_LOCAL 1)
if(HAVE_THREAD_LOCAL)
add_definitions(-DROCKSDB_SUPPORT_THREAD_LOCAL)
@ -107,8 +68,6 @@ if(CMAKE_SYSTEM_NAME MATCHES "Darwin")
add_definitions(-DOS_MACOSX)
elseif(CMAKE_SYSTEM_NAME MATCHES "Linux")
add_definitions(-DOS_LINUX)
elseif(CMAKE_SYSTEM_NAME MATCHES "SunOS")
add_definitions(-DOS_SOLARIS)
elseif(CMAKE_SYSTEM_NAME MATCHES "FreeBSD")
add_definitions(-DOS_FREEBSD)
elseif(CMAKE_SYSTEM_NAME MATCHES "Android")
@ -123,12 +82,10 @@ endif()
if (OS_LINUX)
add_definitions(-DROCKSDB_SCHED_GETCPU_PRESENT)
add_definitions(-DROCKSDB_AUXV_SYSAUXV_PRESENT)
add_definitions(-DROCKSDB_AUXV_GETAUXVAL_PRESENT)
elseif (OS_FREEBSD)
add_definitions(-DROCKSDB_AUXV_SYSAUXV_PRESENT)
endif()
set(ROCKSDB_SOURCE_DIR "${ClickHouse_SOURCE_DIR}/contrib/rocksdb")
include_directories(${ROCKSDB_SOURCE_DIR})
include_directories("${ROCKSDB_SOURCE_DIR}/include")
@ -136,11 +93,11 @@ if(WITH_FOLLY_DISTRIBUTED_MUTEX)
include_directories("${ROCKSDB_SOURCE_DIR}/third-party/folly")
endif()
# Main library source code
set(SOURCES
${ROCKSDB_SOURCE_DIR}/cache/cache.cc
${ROCKSDB_SOURCE_DIR}/cache/cache_entry_roles.cc
${ROCKSDB_SOURCE_DIR}/cache/cache_key.cc
${ROCKSDB_SOURCE_DIR}/cache/cache_reservation_manager.cc
${ROCKSDB_SOURCE_DIR}/cache/clock_cache.cc
${ROCKSDB_SOURCE_DIR}/cache/lru_cache.cc
${ROCKSDB_SOURCE_DIR}/cache/sharded_cache.cc
@ -156,6 +113,7 @@ set(SOURCES
${ROCKSDB_SOURCE_DIR}/db/blob/blob_log_format.cc
${ROCKSDB_SOURCE_DIR}/db/blob/blob_log_sequential_reader.cc
${ROCKSDB_SOURCE_DIR}/db/blob/blob_log_writer.cc
${ROCKSDB_SOURCE_DIR}/db/blob/prefetch_buffer_collection.cc
${ROCKSDB_SOURCE_DIR}/db/builder.cc
${ROCKSDB_SOURCE_DIR}/db/c.cc
${ROCKSDB_SOURCE_DIR}/db/column_family.cc
@ -229,6 +187,7 @@ set(SOURCES
${ROCKSDB_SOURCE_DIR}/env/file_system_tracer.cc
${ROCKSDB_SOURCE_DIR}/env/fs_remap.cc
${ROCKSDB_SOURCE_DIR}/env/mock_env.cc
${ROCKSDB_SOURCE_DIR}/env/unique_id_gen.cc
${ROCKSDB_SOURCE_DIR}/file/delete_scheduler.cc
${ROCKSDB_SOURCE_DIR}/file/file_prefetch_buffer.cc
${ROCKSDB_SOURCE_DIR}/file/file_util.cc
@ -247,6 +206,7 @@ set(SOURCES
${ROCKSDB_SOURCE_DIR}/memory/concurrent_arena.cc
${ROCKSDB_SOURCE_DIR}/memory/jemalloc_nodump_allocator.cc
${ROCKSDB_SOURCE_DIR}/memory/memkind_kmem_allocator.cc
${ROCKSDB_SOURCE_DIR}/memory/memory_allocator.cc
${ROCKSDB_SOURCE_DIR}/memtable/alloc_tracker.cc
${ROCKSDB_SOURCE_DIR}/memtable/hash_linklist_rep.cc
${ROCKSDB_SOURCE_DIR}/memtable/hash_skiplist_rep.cc
@ -322,6 +282,7 @@ set(SOURCES
${ROCKSDB_SOURCE_DIR}/table/table_factory.cc
${ROCKSDB_SOURCE_DIR}/table/table_properties.cc
${ROCKSDB_SOURCE_DIR}/table/two_level_iterator.cc
${ROCKSDB_SOURCE_DIR}/table/unique_id.cc
${ROCKSDB_SOURCE_DIR}/test_util/sync_point.cc
${ROCKSDB_SOURCE_DIR}/test_util/sync_point_impl.cc
${ROCKSDB_SOURCE_DIR}/test_util/testutil.cc
@ -333,9 +294,12 @@ set(SOURCES
${ROCKSDB_SOURCE_DIR}/tools/ldb_tool.cc
${ROCKSDB_SOURCE_DIR}/tools/sst_dump_tool.cc
${ROCKSDB_SOURCE_DIR}/tools/trace_analyzer_tool.cc
${ROCKSDB_SOURCE_DIR}/trace_replay/trace_replay.cc
${ROCKSDB_SOURCE_DIR}/trace_replay/block_cache_tracer.cc
${ROCKSDB_SOURCE_DIR}/trace_replay/io_tracer.cc
${ROCKSDB_SOURCE_DIR}/trace_replay/trace_record_handler.cc
${ROCKSDB_SOURCE_DIR}/trace_replay/trace_record_result.cc
${ROCKSDB_SOURCE_DIR}/trace_replay/trace_record.cc
${ROCKSDB_SOURCE_DIR}/trace_replay/trace_replay.cc
${ROCKSDB_SOURCE_DIR}/util/coding.cc
${ROCKSDB_SOURCE_DIR}/util/compaction_job_stats_impl.cc
${ROCKSDB_SOURCE_DIR}/util/comparator.cc
@ -347,6 +311,7 @@ set(SOURCES
${ROCKSDB_SOURCE_DIR}/util/murmurhash.cc
${ROCKSDB_SOURCE_DIR}/util/random.cc
${ROCKSDB_SOURCE_DIR}/util/rate_limiter.cc
${ROCKSDB_SOURCE_DIR}/util/regex.cc
${ROCKSDB_SOURCE_DIR}/util/ribbon_config.cc
${ROCKSDB_SOURCE_DIR}/util/slice.cc
${ROCKSDB_SOURCE_DIR}/util/file_checksum_helper.cc
@ -362,18 +327,23 @@ set(SOURCES
${ROCKSDB_SOURCE_DIR}/utilities/blob_db/blob_db_impl_filesnapshot.cc
${ROCKSDB_SOURCE_DIR}/utilities/blob_db/blob_dump_tool.cc
${ROCKSDB_SOURCE_DIR}/utilities/blob_db/blob_file.cc
${ROCKSDB_SOURCE_DIR}/utilities/cache_dump_load.cc
${ROCKSDB_SOURCE_DIR}/utilities/cache_dump_load_impl.cc
${ROCKSDB_SOURCE_DIR}/utilities/cassandra/cassandra_compaction_filter.cc
${ROCKSDB_SOURCE_DIR}/utilities/cassandra/format.cc
${ROCKSDB_SOURCE_DIR}/utilities/cassandra/merge_operator.cc
${ROCKSDB_SOURCE_DIR}/utilities/checkpoint/checkpoint_impl.cc
${ROCKSDB_SOURCE_DIR}/utilities/compaction_filters.cc
${ROCKSDB_SOURCE_DIR}/utilities/compaction_filters/remove_emptyvalue_compactionfilter.cc
${ROCKSDB_SOURCE_DIR}/utilities/debug.cc
${ROCKSDB_SOURCE_DIR}/utilities/env_mirror.cc
${ROCKSDB_SOURCE_DIR}/utilities/env_timed.cc
${ROCKSDB_SOURCE_DIR}/utilities/fault_injection_env.cc
${ROCKSDB_SOURCE_DIR}/utilities/fault_injection_fs.cc
${ROCKSDB_SOURCE_DIR}/utilities/fault_injection_secondary_cache.cc
${ROCKSDB_SOURCE_DIR}/utilities/leveldb_options/leveldb_options.cc
${ROCKSDB_SOURCE_DIR}/utilities/memory/memory_util.cc
${ROCKSDB_SOURCE_DIR}/utilities/merge_operators.cc
${ROCKSDB_SOURCE_DIR}/utilities/merge_operators/bytesxor.cc
${ROCKSDB_SOURCE_DIR}/utilities/merge_operators/max.cc
${ROCKSDB_SOURCE_DIR}/utilities/merge_operators/put.cc
@ -393,6 +363,7 @@ set(SOURCES
${ROCKSDB_SOURCE_DIR}/utilities/simulator_cache/sim_cache.cc
${ROCKSDB_SOURCE_DIR}/utilities/table_properties_collectors/compact_on_deletion_collector.cc
${ROCKSDB_SOURCE_DIR}/utilities/trace/file_trace_reader_writer.cc
${ROCKSDB_SOURCE_DIR}/utilities/trace/replayer_impl.cc
${ROCKSDB_SOURCE_DIR}/utilities/transactions/lock/lock_manager.cc
${ROCKSDB_SOURCE_DIR}/utilities/transactions/lock/point/point_lock_tracker.cc
${ROCKSDB_SOURCE_DIR}/utilities/transactions/lock/point/point_lock_manager.cc
@ -411,6 +382,7 @@ set(SOURCES
${ROCKSDB_SOURCE_DIR}/utilities/transactions/write_unprepared_txn.cc
${ROCKSDB_SOURCE_DIR}/utilities/transactions/write_unprepared_txn_db.cc
${ROCKSDB_SOURCE_DIR}/utilities/ttl/db_ttl_impl.cc
${ROCKSDB_SOURCE_DIR}/utilities/wal_filter.cc
${ROCKSDB_SOURCE_DIR}/utilities/write_batch_with_index/write_batch_with_index.cc
${ROCKSDB_SOURCE_DIR}/utilities/write_batch_with_index/write_batch_with_index_internal.cc
${ROCKSDB_SOURCE_DIR}/utilities/transactions/lock/range/range_tree/lib/locktree/concurrent_tree.cc
@ -425,7 +397,7 @@ set(SOURCES
${ROCKSDB_SOURCE_DIR}/utilities/transactions/lock/range/range_tree/lib/standalone_port.cc
${ROCKSDB_SOURCE_DIR}/utilities/transactions/lock/range/range_tree/lib/util/dbt.cc
${ROCKSDB_SOURCE_DIR}/utilities/transactions/lock/range/range_tree/lib/util/memarena.cc
rocksdb_build_version.cc)
build_version.cc) # generated by hand
if(ENABLE_SSE42 AND ENABLE_PCLMULQDQ)
set_source_files_properties(
@ -462,5 +434,6 @@ endif()
add_library(_rocksdb ${SOURCES})
add_library(ch_contrib::rocksdb ALIAS _rocksdb)
target_link_libraries(_rocksdb PRIVATE ${THIRDPARTY_LIBS} ${SYSTEM_LIBS})
# SYSTEM is required to overcome some issues
target_include_directories(_rocksdb SYSTEM BEFORE INTERFACE "${ROCKSDB_SOURCE_DIR}/include")

View File

@ -16,6 +16,9 @@ dpkg -i package_folder/clickhouse-client_*.deb
ln -s /usr/share/clickhouse-test/clickhouse-test /usr/bin/clickhouse-test
# shellcheck disable=SC1091
source /utils.lib
# install test configs
/usr/share/clickhouse-test/config/install.sh
@ -272,3 +275,5 @@ if [[ -n "$USE_DATABASE_REPLICATED" ]] && [[ "$USE_DATABASE_REPLICATED" -eq 1 ]]
mv /var/log/clickhouse-server/stderr1.log /test_output/ ||:
mv /var/log/clickhouse-server/stderr2.log /test_output/ ||:
fi
collect_core_dumps

View File

@ -12,8 +12,7 @@ MAX_RUN_TIME=$((MAX_RUN_TIME == 0 ? 10800 : MAX_RUN_TIME))
USE_DATABASE_REPLICATED=${USE_DATABASE_REPLICATED:=0}
USE_SHARED_CATALOG=${USE_SHARED_CATALOG:=0}
# disable for now
RUN_SEQUENTIAL_TESTS_IN_PARALLEL=0
RUN_SEQUENTIAL_TESTS_IN_PARALLEL=1
if [[ "$USE_DATABASE_REPLICATED" -eq 1 ]] || [[ "$USE_SHARED_CATALOG" -eq 1 ]]; then
RUN_SEQUENTIAL_TESTS_IN_PARALLEL=0
@ -310,7 +309,7 @@ function run_tests()
try_run_with_retry 10 clickhouse-client -q "insert into system.zookeeper (name, path, value) values ('auxiliary_zookeeper2', '/test/chroot/', '')"
set +e
clickhouse-test --testname --shard --zookeeper --check-zookeeper-session --hung-check --print-time \
timeout -k 60m -s TERM --preserve-status 140m clickhouse-test --testname --shard --zookeeper --check-zookeeper-session --hung-check --print-time \
--no-drop-if-fail --test-runs "$NUM_TRIES" "${ADDITIONAL_OPTIONS[@]}" 2>&1 \
| ts '%Y-%m-%d %H:%M:%S' \
| tee -a test_output/test_result.txt
@ -483,3 +482,5 @@ if [[ "$USE_SHARED_CATALOG" -eq 1 ]]; then
mv /var/log/clickhouse-server/stderr1.log /test_output/ ||:
tar -chf /test_output/coordination1.tar /var/lib/clickhouse1/coordination ||:
fi
collect_core_dumps

View File

@ -1,8 +1,5 @@
#!/bin/bash
# core.COMM.PID-TID
sysctl kernel.core_pattern='core.%e.%p-%P'
OK="\tOK\t\\N\t"
FAIL="\tFAIL\t\\N\t"
@ -315,12 +312,4 @@ function collect_query_and_trace_logs()
done
}
function collect_core_dumps()
{
find . -type f -maxdepth 1 -name 'core.*' | while read -r core; do
zstd --threads=0 "$core"
mv "$core.zst" /test_output/
done
}
# vi: ft=bash

View File

@ -1,5 +1,10 @@
#!/bin/bash
# core.COMM.PID-TID
sysctl kernel.core_pattern='core.%e.%p-%P'
# ASAN doesn't work with suid_dumpable=2
sysctl fs.suid_dumpable=1
function run_with_retry()
{
if [[ $- =~ e ]]; then
@ -48,4 +53,12 @@ function timeout_with_logging() {
return $exit_code
}
function collect_core_dumps()
{
find . -type f -maxdepth 1 -name 'core.*' | while read -r core; do
zstd --threads=0 "$core"
mv "$core.zst" /test_output/
done
}
# vi: ft=bash

View File

@ -21,6 +21,9 @@ source /attach_gdb.lib
# shellcheck source=../stateless/stress_tests.lib
source /stress_tests.lib
# shellcheck disable=SC1091
source /utils.lib
install_packages package_folder
# Thread Fuzzer allows to check more permutations of possible thread scheduling

View File

@ -5,11 +5,11 @@ sidebar_label: Object Data Type
keywords: [object, data type]
---
# Object Data Type
# Object Data Type (deprecated)
:::note
This feature is not production-ready and is now deprecated. If you need to work with JSON documents, consider using [this guide](/docs/en/integrations/data-ingestion/data-formats/json) instead. A new implementation to support JSON object is in progress and can be tracked [here](https://github.com/ClickHouse/ClickHouse/issues/54864)
:::
**This feature is not production-ready and is now deprecated.** If you need to work with JSON documents, consider using [this guide](/docs/en/integrations/data-ingestion/data-formats/json) instead. A new implementation to support JSON object is in progress and can be tracked [here](https://github.com/ClickHouse/ClickHouse/issues/54864).
<hr />
Stores JavaScript Object Notation (JSON) documents in a single column.

View File

@ -58,6 +58,8 @@ KILL QUERY WHERE query_id='2-857d-4a57-9ee0-327da5d60a90'
KILL QUERY WHERE user='username' SYNC
```
:::tip If you are killing a query in ClickHouse Cloud or in a self-managed cluster, then be sure to use the ```ON CLUSTER [cluster-name]``` option, in order to ensure the query is killed on all replicas:::
Read-only users can only stop their own queries.
By default, the asynchronous version of queries is used (`ASYNC`), which does not wait for confirmation that queries have stopped.
@ -131,6 +133,7 @@ KILL MUTATION WHERE database = 'default' AND table = 'table'
-- Cancel the specific mutation:
KILL MUTATION WHERE database = 'default' AND table = 'table' AND mutation_id = 'mutation_3.txt'
```
:::tip If you are killing a mutation in ClickHouse Cloud or in a self-managed cluster, then be sure to use the ```ON CLUSTER [cluster-name]``` option, in order to ensure the mutation is killed on all replicas:::
The query is useful when a mutation is stuck and cannot finish (e.g. if some function in the mutation query throws an exception when applied to the data contained in the table).

View File

@ -35,10 +35,9 @@ disable = '''
broad-except,
bare-except,
no-else-return,
global-statement
global-statement,
'''
[tool.pylint.SIMILARITIES]
# due to SQL
min-similarity-lines=1000

View File

@ -267,7 +267,11 @@ bool ColumnAggregateFunction::structureEquals(const IColumn & to) const
}
#if !defined(ABORT_ON_LOGICAL_ERROR)
void ColumnAggregateFunction::insertRangeFrom(const IColumn & from, size_t start, size_t length)
#else
void ColumnAggregateFunction::doInsertRangeFrom(const IColumn & from, size_t start, size_t length)
#endif
{
const ColumnAggregateFunction & from_concrete = assert_cast<const ColumnAggregateFunction &>(from);
@ -462,7 +466,11 @@ void ColumnAggregateFunction::insertFromWithOwnership(const IColumn & from, size
insertMergeFrom(from, n);
}
#if !defined(ABORT_ON_LOGICAL_ERROR)
void ColumnAggregateFunction::insertFrom(const IColumn & from, size_t n)
#else
void ColumnAggregateFunction::doInsertFrom(const IColumn & from, size_t n)
#endif
{
insertRangeFrom(from, n, 1);
}

View File

@ -145,7 +145,14 @@ public:
void insertData(const char * pos, size_t length) override;
#if !defined(ABORT_ON_LOGICAL_ERROR)
void insertFrom(const IColumn & from, size_t n) override;
#else
using IColumn::insertFrom;
void doInsertFrom(const IColumn & from, size_t n) override;
#endif
void insertFrom(ConstAggregateDataPtr place);
@ -182,7 +189,11 @@ public:
void protect() override;
#if !defined(ABORT_ON_LOGICAL_ERROR)
void insertRangeFrom(const IColumn & from, size_t start, size_t length) override;
#else
void doInsertRangeFrom(const IColumn & from, size_t start, size_t length) override;
#endif
void popBack(size_t n) override;
@ -201,7 +212,11 @@ public:
MutableColumns scatter(ColumnIndex num_columns, const Selector & selector) const override;
#if !defined(ABORT_ON_LOGICAL_ERROR)
int compareAt(size_t, size_t, const IColumn &, int) const override
#else
int doCompareAt(size_t, size_t, const IColumn &, int) const override
#endif
{
return 0;
}

View File

@ -337,7 +337,11 @@ bool ColumnArray::tryInsert(const Field & x)
return true;
}
#if !defined(ABORT_ON_LOGICAL_ERROR)
void ColumnArray::insertFrom(const IColumn & src_, size_t n)
#else
void ColumnArray::doInsertFrom(const IColumn & src_, size_t n)
#endif
{
const ColumnArray & src = assert_cast<const ColumnArray &>(src_);
size_t size = src.sizeAt(n);
@ -392,7 +396,11 @@ int ColumnArray::compareAtImpl(size_t n, size_t m, const IColumn & rhs_, int nan
: 1);
}
#if !defined(ABORT_ON_LOGICAL_ERROR)
int ColumnArray::compareAt(size_t n, size_t m, const IColumn & rhs_, int nan_direction_hint) const
#else
int ColumnArray::doCompareAt(size_t n, size_t m, const IColumn & rhs_, int nan_direction_hint) const
#endif
{
return compareAtImpl(n, m, rhs_, nan_direction_hint);
}
@ -535,7 +543,11 @@ void ColumnArray::getExtremes(Field & min, Field & max) const
}
#if !defined(ABORT_ON_LOGICAL_ERROR)
void ColumnArray::insertRangeFrom(const IColumn & src, size_t start, size_t length)
#else
void ColumnArray::doInsertRangeFrom(const IColumn & src, size_t start, size_t length)
#endif
{
if (length == 0)
return;

View File

@ -84,10 +84,18 @@ public:
void updateHashWithValue(size_t n, SipHash & hash) const override;
void updateWeakHash32(WeakHash32 & hash) const override;
void updateHashFast(SipHash & hash) const override;
#if !defined(ABORT_ON_LOGICAL_ERROR)
void insertRangeFrom(const IColumn & src, size_t start, size_t length) override;
#else
void doInsertRangeFrom(const IColumn & src, size_t start, size_t length) override;
#endif
void insert(const Field & x) override;
bool tryInsert(const Field & x) override;
#if !defined(ABORT_ON_LOGICAL_ERROR)
void insertFrom(const IColumn & src_, size_t n) override;
#else
void doInsertFrom(const IColumn & src_, size_t n) override;
#endif
void insertDefault() override;
void popBack(size_t n) override;
ColumnPtr filter(const Filter & filt, ssize_t result_size_hint) const override;
@ -95,7 +103,11 @@ public:
ColumnPtr permute(const Permutation & perm, size_t limit) const override;
ColumnPtr index(const IColumn & indexes, size_t limit) const override;
template <typename Type> ColumnPtr indexImpl(const PaddedPODArray<Type> & indexes, size_t limit) const;
#if !defined(ABORT_ON_LOGICAL_ERROR)
int compareAt(size_t n, size_t m, const IColumn & rhs_, int nan_direction_hint) const override;
#else
int doCompareAt(size_t n, size_t m, const IColumn & rhs_, int nan_direction_hint) const override;
#endif
int compareAtWithCollation(size_t n, size_t m, const IColumn & rhs_, int nan_direction_hint, const Collator & collator) const override;
void getPermutation(PermutationSortDirection direction, PermutationSortStability stability,
size_t limit, int nan_direction_hint, Permutation & res) const override;

View File

@ -85,7 +85,11 @@ public:
bool isDefaultAt(size_t) const override { throwMustBeDecompressed(); }
void insert(const Field &) override { throwMustBeDecompressed(); }
bool tryInsert(const Field &) override { throwMustBeDecompressed(); }
#if !defined(ABORT_ON_LOGICAL_ERROR)
void insertRangeFrom(const IColumn &, size_t, size_t) override { throwMustBeDecompressed(); }
#else
void doInsertRangeFrom(const IColumn &, size_t, size_t) override { throwMustBeDecompressed(); }
#endif
void insertData(const char *, size_t) override { throwMustBeDecompressed(); }
void insertDefault() override { throwMustBeDecompressed(); }
void popBack(size_t) override { throwMustBeDecompressed(); }
@ -100,7 +104,11 @@ public:
void expand(const Filter &, bool) override { throwMustBeDecompressed(); }
ColumnPtr permute(const Permutation &, size_t) const override { throwMustBeDecompressed(); }
ColumnPtr index(const IColumn &, size_t) const override { throwMustBeDecompressed(); }
#if !defined(ABORT_ON_LOGICAL_ERROR)
int compareAt(size_t, size_t, const IColumn &, int) const override { throwMustBeDecompressed(); }
#else
int doCompareAt(size_t, size_t, const IColumn &, int) const override { throwMustBeDecompressed(); }
#endif
void compareColumn(const IColumn &, size_t, PaddedPODArray<UInt64> *, PaddedPODArray<Int8> &, int, int) const override
{
throwMustBeDecompressed();

View File

@ -32,6 +32,8 @@ private:
ColumnConst(const ColumnConst & src) = default;
public:
bool isConst() const override { return true; }
ColumnPtr convertToFullColumn() const;
ColumnPtr convertToFullColumnIfConst() const override
@ -121,7 +123,11 @@ public:
return data->isNullAt(0);
}
#if !defined(ABORT_ON_LOGICAL_ERROR)
void insertRangeFrom(const IColumn &, size_t /*start*/, size_t length) override
#else
void doInsertRangeFrom(const IColumn &, size_t /*start*/, size_t length) override
#endif
{
s += length;
}
@ -145,12 +151,20 @@ public:
++s;
}
#if !defined(ABORT_ON_LOGICAL_ERROR)
void insertFrom(const IColumn &, size_t) override
#else
void doInsertFrom(const IColumn &, size_t) override
#endif
{
++s;
}
#if !defined(ABORT_ON_LOGICAL_ERROR)
void insertManyFrom(const IColumn & /*src*/, size_t /* position */, size_t length) override { s += length; }
#else
void doInsertManyFrom(const IColumn & /*src*/, size_t /* position */, size_t length) override { s += length; }
#endif
void insertDefault() override
{
@ -223,7 +237,11 @@ public:
return data->allocatedBytes() + sizeof(s);
}
#if !defined(ABORT_ON_LOGICAL_ERROR)
int compareAt(size_t, size_t, const IColumn & rhs, int nan_direction_hint) const override
#else
int doCompareAt(size_t, size_t, const IColumn & rhs, int nan_direction_hint) const override
#endif
{
return data->compareAt(0, 0, *assert_cast<const ColumnConst &>(rhs).data, nan_direction_hint);
}

View File

@ -32,7 +32,11 @@ namespace ErrorCodes
}
template <is_decimal T>
#if !defined(ABORT_ON_LOGICAL_ERROR)
int ColumnDecimal<T>::compareAt(size_t n, size_t m, const IColumn & rhs_, int) const
#else
int ColumnDecimal<T>::doCompareAt(size_t n, size_t m, const IColumn & rhs_, int) const
#endif
{
auto & other = static_cast<const Self &>(rhs_);
const T & a = data[n];
@ -331,7 +335,11 @@ void ColumnDecimal<T>::insertData(const char * src, size_t /*length*/)
}
template <is_decimal T>
#if !defined(ABORT_ON_LOGICAL_ERROR)
void ColumnDecimal<T>::insertRangeFrom(const IColumn & src, size_t start, size_t length)
#else
void ColumnDecimal<T>::doInsertRangeFrom(const IColumn & src, size_t start, size_t length)
#endif
{
const ColumnDecimal & src_vec = assert_cast<const ColumnDecimal &>(src);

View File

@ -55,9 +55,17 @@ public:
void reserve(size_t n) override { data.reserve_exact(n); }
void shrinkToFit() override { data.shrink_to_fit(); }
#if !defined(ABORT_ON_LOGICAL_ERROR)
void insertFrom(const IColumn & src, size_t n) override { data.push_back(static_cast<const Self &>(src).getData()[n]); }
#else
void doInsertFrom(const IColumn & src, size_t n) override { data.push_back(static_cast<const Self &>(src).getData()[n]); }
#endif
#if !defined(ABORT_ON_LOGICAL_ERROR)
void insertManyFrom(const IColumn & src, size_t position, size_t length) override
#else
void doInsertManyFrom(const IColumn & src, size_t position, size_t length) override
#endif
{
ValueType v = assert_cast<const Self &>(src).getData()[position];
data.resize_fill(data.size() + length, v);
@ -68,7 +76,11 @@ public:
void insertManyDefaults(size_t length) override { data.resize_fill(data.size() + length); }
void insert(const Field & x) override { data.push_back(x.get<T>()); }
bool tryInsert(const Field & x) override;
#if !defined(ABORT_ON_LOGICAL_ERROR)
void insertRangeFrom(const IColumn & src, size_t start, size_t length) override;
#else
void doInsertRangeFrom(const IColumn & src, size_t start, size_t length) override;
#endif
void popBack(size_t n) override
{
@ -92,7 +104,11 @@ public:
void updateHashWithValue(size_t n, SipHash & hash) const override;
void updateWeakHash32(WeakHash32 & hash) const override;
void updateHashFast(SipHash & hash) const override;
#if !defined(ABORT_ON_LOGICAL_ERROR)
int compareAt(size_t n, size_t m, const IColumn & rhs_, int nan_direction_hint) const override;
#else
int doCompareAt(size_t n, size_t m, const IColumn & rhs_, int nan_direction_hint) const override;
#endif
void getPermutation(IColumn::PermutationSortDirection direction, IColumn::PermutationSortStability stability,
size_t limit, int nan_direction_hint, IColumn::Permutation & res) const override;
void updatePermutation(IColumn::PermutationSortDirection direction, IColumn::PermutationSortStability stability,

View File

@ -215,7 +215,11 @@ bool ColumnDynamic::tryInsert(const DB::Field & x)
}
#if !defined(ABORT_ON_LOGICAL_ERROR)
void ColumnDynamic::insertFrom(const DB::IColumn & src_, size_t n)
#else
void ColumnDynamic::doInsertFrom(const DB::IColumn & src_, size_t n)
#endif
{
const auto & dynamic_src = assert_cast<const ColumnDynamic &>(src_);
@ -265,7 +269,11 @@ void ColumnDynamic::insertFrom(const DB::IColumn & src_, size_t n)
variant_col.insertIntoVariantFrom(string_variant_discr, *tmp_string_column, 0);
}
#if !defined(ABORT_ON_LOGICAL_ERROR)
void ColumnDynamic::insertRangeFrom(const DB::IColumn & src_, size_t start, size_t length)
#else
void ColumnDynamic::doInsertRangeFrom(const DB::IColumn & src_, size_t start, size_t length)
#endif
{
if (start + length > src_.size())
throw Exception(ErrorCodes::PARAMETER_OUT_OF_BOUND, "Parameter out of bound in ColumnDynamic::insertRangeFrom method. "
@ -431,7 +439,11 @@ void ColumnDynamic::insertRangeFrom(const DB::IColumn & src_, size_t start, size
}
}
#if !defined(ABORT_ON_LOGICAL_ERROR)
void ColumnDynamic::insertManyFrom(const DB::IColumn & src_, size_t position, size_t length)
#else
void ColumnDynamic::doInsertManyFrom(const DB::IColumn & src_, size_t position, size_t length)
#endif
{
const auto & dynamic_src = assert_cast<const ColumnDynamic &>(src_);
@ -591,7 +603,11 @@ void ColumnDynamic::updateHashWithValue(size_t n, SipHash & hash) const
variant_col.getVariantByGlobalDiscriminator(discr).updateHashWithValue(variant_col.offsetAt(n), hash);
}
#if !defined(ABORT_ON_LOGICAL_ERROR)
int ColumnDynamic::compareAt(size_t n, size_t m, const DB::IColumn & rhs, int nan_direction_hint) const
#else
int ColumnDynamic::doCompareAt(size_t n, size_t m, const DB::IColumn & rhs, int nan_direction_hint) const
#endif
{
const auto & left_variant = assert_cast<const ColumnVariant &>(*variant_column);
const auto & right_dynamic = assert_cast<const ColumnDynamic &>(rhs);

View File

@ -142,9 +142,16 @@ public:
void insert(const Field & x) override;
bool tryInsert(const Field & x) override;
#if !defined(ABORT_ON_LOGICAL_ERROR)
void insertFrom(const IColumn & src_, size_t n) override;
void insertRangeFrom(const IColumn & src, size_t start, size_t length) override;
void insertManyFrom(const IColumn & src, size_t position, size_t length) override;
#else
void doInsertFrom(const IColumn & src_, size_t n) override;
void doInsertRangeFrom(const IColumn & src, size_t start, size_t length) override;
void doInsertManyFrom(const IColumn & src, size_t position, size_t length) override;
#endif
void insertDefault() override
{
@ -213,7 +220,11 @@ public:
return scattered_columns;
}
#if !defined(ABORT_ON_LOGICAL_ERROR)
int compareAt(size_t n, size_t m, const IColumn & rhs, int nan_direction_hint) const override;
#else
int doCompareAt(size_t n, size_t m, const IColumn & rhs, int nan_direction_hint) const override;
#endif
bool hasEqualValues() const override
{

View File

@ -74,7 +74,11 @@ bool ColumnFixedString::tryInsert(const Field & x)
return true;
}
#if !defined(ABORT_ON_LOGICAL_ERROR)
void ColumnFixedString::insertFrom(const IColumn & src_, size_t index)
#else
void ColumnFixedString::doInsertFrom(const IColumn & src_, size_t index)
#endif
{
const ColumnFixedString & src = assert_cast<const ColumnFixedString &>(src_);
@ -86,7 +90,11 @@ void ColumnFixedString::insertFrom(const IColumn & src_, size_t index)
memcpySmallAllowReadWriteOverflow15(chars.data() + old_size, &src.chars[n * index], n);
}
#if !defined(ABORT_ON_LOGICAL_ERROR)
void ColumnFixedString::insertManyFrom(const IColumn & src, size_t position, size_t length)
#else
void ColumnFixedString::doInsertManyFrom(const IColumn & src, size_t position, size_t length)
#endif
{
const ColumnFixedString & src_concrete = assert_cast<const ColumnFixedString &>(src);
if (n != src_concrete.getN())
@ -219,7 +227,11 @@ size_t ColumnFixedString::estimateCardinalityInPermutedRange(const Permutation &
return elements.size();
}
#if !defined(ABORT_ON_LOGICAL_ERROR)
void ColumnFixedString::insertRangeFrom(const IColumn & src, size_t start, size_t length)
#else
void ColumnFixedString::doInsertRangeFrom(const IColumn & src, size_t start, size_t length)
#endif
{
const ColumnFixedString & src_concrete = assert_cast<const ColumnFixedString &>(src);
chassert(this->n == src_concrete.n);

View File

@ -98,9 +98,17 @@ public:
bool tryInsert(const Field & x) override;
#if !defined(ABORT_ON_LOGICAL_ERROR)
void insertFrom(const IColumn & src_, size_t index) override;
#else
void doInsertFrom(const IColumn & src_, size_t index) override;
#endif
#if !defined(ABORT_ON_LOGICAL_ERROR)
void insertManyFrom(const IColumn & src, size_t position, size_t length) override;
#else
void doInsertManyFrom(const IColumn & src, size_t position, size_t length) override;
#endif
void insertData(const char * pos, size_t length) override;
@ -129,7 +137,11 @@ public:
void updateHashFast(SipHash & hash) const override;
#if !defined(ABORT_ON_LOGICAL_ERROR)
int compareAt(size_t p1, size_t p2, const IColumn & rhs_, int /*nan_direction_hint*/) const override
#else
int doCompareAt(size_t p1, size_t p2, const IColumn & rhs_, int /*nan_direction_hint*/) const override
#endif
{
const ColumnFixedString & rhs = assert_cast<const ColumnFixedString &>(rhs_);
chassert(this->n == rhs.n);
@ -144,7 +156,11 @@ public:
size_t estimateCardinalityInPermutedRange(const Permutation & permutation, const EqualRange & equal_range) const override;
#if !defined(ABORT_ON_LOGICAL_ERROR)
void insertRangeFrom(const IColumn & src, size_t start, size_t length) override;
#else
void doInsertRangeFrom(const IColumn & src, size_t start, size_t length) override;
#endif
ColumnPtr filter(const IColumn::Filter & filt, ssize_t result_size_hint) const override;

View File

@ -72,7 +72,11 @@ ColumnPtr ColumnFunction::cut(size_t start, size_t length) const
return ColumnFunction::create(length, function, capture, is_short_circuit_argument, is_function_compiled);
}
#if !defined(ABORT_ON_LOGICAL_ERROR)
void ColumnFunction::insertFrom(const IColumn & src, size_t n)
#else
void ColumnFunction::doInsertFrom(const IColumn & src, size_t n)
#endif
{
const ColumnFunction & src_func = assert_cast<const ColumnFunction &>(src);
@ -89,7 +93,11 @@ void ColumnFunction::insertFrom(const IColumn & src, size_t n)
++elements_size;
}
#if !defined(ABORT_ON_LOGICAL_ERROR)
void ColumnFunction::insertRangeFrom(const IColumn & src, size_t start, size_t length)
#else
void ColumnFunction::doInsertRangeFrom(const IColumn & src, size_t start, size_t length)
#endif
{
const ColumnFunction & src_func = assert_cast<const ColumnFunction &>(src);

View File

@ -94,8 +94,16 @@ public:
throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Cannot insert into {}", getName());
}
#if !defined(ABORT_ON_LOGICAL_ERROR)
void insertFrom(const IColumn & src, size_t n) override;
#else
void doInsertFrom(const IColumn & src, size_t n) override;
#endif
#if !defined(ABORT_ON_LOGICAL_ERROR)
void insertRangeFrom(const IColumn &, size_t start, size_t length) override;
#else
void doInsertRangeFrom(const IColumn &, size_t start, size_t length) override;
#endif
void insertData(const char *, size_t) override
{
@ -137,7 +145,11 @@ public:
throw Exception(ErrorCodes::NOT_IMPLEMENTED, "popBack is not implemented for {}", getName());
}
#if !defined(ABORT_ON_LOGICAL_ERROR)
int compareAt(size_t, size_t, const IColumn &, int) const override
#else
int doCompareAt(size_t, size_t, const IColumn &, int) const override
#endif
{
throw Exception(ErrorCodes::NOT_IMPLEMENTED, "compareAt is not implemented for {}", getName());
}

View File

@ -159,7 +159,11 @@ void ColumnLowCardinality::insertDefault()
idx.insertPosition(getDictionary().getDefaultValueIndex());
}
#if !defined(ABORT_ON_LOGICAL_ERROR)
void ColumnLowCardinality::insertFrom(const IColumn & src, size_t n)
#else
void ColumnLowCardinality::doInsertFrom(const IColumn & src, size_t n)
#endif
{
const auto * low_cardinality_src = typeid_cast<const ColumnLowCardinality *>(&src);
@ -187,7 +191,11 @@ void ColumnLowCardinality::insertFromFullColumn(const IColumn & src, size_t n)
idx.insertPosition(getDictionary().uniqueInsertFrom(src, n));
}
#if !defined(ABORT_ON_LOGICAL_ERROR)
void ColumnLowCardinality::insertRangeFrom(const IColumn & src, size_t start, size_t length)
#else
void ColumnLowCardinality::doInsertRangeFrom(const IColumn & src, size_t start, size_t length)
#endif
{
const auto * low_cardinality_src = typeid_cast<const ColumnLowCardinality *>(&src);
@ -364,7 +372,11 @@ int ColumnLowCardinality::compareAtImpl(size_t n, size_t m, const IColumn & rhs,
return getDictionary().compareAt(n_index, m_index, low_cardinality_column.getDictionary(), nan_direction_hint);
}
#if !defined(ABORT_ON_LOGICAL_ERROR)
int ColumnLowCardinality::compareAt(size_t n, size_t m, const IColumn & rhs, int nan_direction_hint) const
#else
int ColumnLowCardinality::doCompareAt(size_t n, size_t m, const IColumn & rhs, int nan_direction_hint) const
#endif
{
return compareAtImpl(n, m, rhs, nan_direction_hint);
}

View File

@ -78,10 +78,18 @@ public:
bool tryInsert(const Field & x) override;
void insertDefault() override;
#if !defined(ABORT_ON_LOGICAL_ERROR)
void insertFrom(const IColumn & src, size_t n) override;
#else
void doInsertFrom(const IColumn & src, size_t n) override;
#endif
void insertFromFullColumn(const IColumn & src, size_t n);
#if !defined(ABORT_ON_LOGICAL_ERROR)
void insertRangeFrom(const IColumn & src, size_t start, size_t length) override;
#else
void doInsertRangeFrom(const IColumn & src, size_t start, size_t length) override;
#endif
void insertRangeFromFullColumn(const IColumn & src, size_t start, size_t length);
void insertRangeFromDictionaryEncodedColumn(const IColumn & keys, const IColumn & positions);
@ -127,7 +135,11 @@ public:
return ColumnLowCardinality::create(dictionary.getColumnUniquePtr(), getIndexes().index(indexes_, limit));
}
#if !defined(ABORT_ON_LOGICAL_ERROR)
int compareAt(size_t n, size_t m, const IColumn & rhs, int nan_direction_hint) const override;
#else
int doCompareAt(size_t n, size_t m, const IColumn & rhs, int nan_direction_hint) const override;
#endif
int compareAtWithCollation(size_t n, size_t m, const IColumn & rhs, int nan_direction_hint, const Collator &) const override;

View File

@ -153,17 +153,29 @@ void ColumnMap::updateHashFast(SipHash & hash) const
nested->updateHashFast(hash);
}
#if !defined(ABORT_ON_LOGICAL_ERROR)
void ColumnMap::insertFrom(const IColumn & src, size_t n)
#else
void ColumnMap::doInsertFrom(const IColumn & src, size_t n)
#endif
{
nested->insertFrom(assert_cast<const ColumnMap &>(src).getNestedColumn(), n);
}
#if !defined(ABORT_ON_LOGICAL_ERROR)
void ColumnMap::insertManyFrom(const IColumn & src, size_t position, size_t length)
#else
void ColumnMap::doInsertManyFrom(const IColumn & src, size_t position, size_t length)
#endif
{
assert_cast<ColumnArray &>(*nested).insertManyFrom(assert_cast<const ColumnMap &>(src).getNestedColumn(), position, length);
}
#if !defined(ABORT_ON_LOGICAL_ERROR)
void ColumnMap::insertRangeFrom(const IColumn & src, size_t start, size_t length)
#else
void ColumnMap::doInsertRangeFrom(const IColumn & src, size_t start, size_t length)
#endif
{
nested->insertRangeFrom(
assert_cast<const ColumnMap &>(src).getNestedColumn(),
@ -210,7 +222,11 @@ MutableColumns ColumnMap::scatter(ColumnIndex num_columns, const Selector & sele
return res;
}
#if !defined(ABORT_ON_LOGICAL_ERROR)
int ColumnMap::compareAt(size_t n, size_t m, const IColumn & rhs, int nan_direction_hint) const
#else
int ColumnMap::doCompareAt(size_t n, size_t m, const IColumn & rhs, int nan_direction_hint) const
#endif
{
const auto & rhs_map = assert_cast<const ColumnMap &>(rhs);
return nested->compareAt(n, m, rhs_map.getNestedColumn(), nan_direction_hint);

View File

@ -66,16 +66,28 @@ public:
void updateHashWithValue(size_t n, SipHash & hash) const override;
void updateWeakHash32(WeakHash32 & hash) const override;
void updateHashFast(SipHash & hash) const override;
#if !defined(ABORT_ON_LOGICAL_ERROR)
void insertFrom(const IColumn & src_, size_t n) override;
void insertManyFrom(const IColumn & src, size_t position, size_t length) override;
void insertRangeFrom(const IColumn & src, size_t start, size_t length) override;
#else
void doInsertFrom(const IColumn & src_, size_t n) override;
void doInsertManyFrom(const IColumn & src, size_t position, size_t length) override;
void doInsertRangeFrom(const IColumn & src, size_t start, size_t length) override;
#endif
ColumnPtr filter(const Filter & filt, ssize_t result_size_hint) const override;
void expand(const Filter & mask, bool inverted) override;
ColumnPtr permute(const Permutation & perm, size_t limit) const override;
ColumnPtr index(const IColumn & indexes, size_t limit) const override;
ColumnPtr replicate(const Offsets & offsets) const override;
MutableColumns scatter(ColumnIndex num_columns, const Selector & selector) const override;
#if !defined(ABORT_ON_LOGICAL_ERROR)
int compareAt(size_t n, size_t m, const IColumn & rhs, int nan_direction_hint) const override;
#else
int doCompareAt(size_t n, size_t m, const IColumn & rhs, int nan_direction_hint) const override;
#endif
void getExtremes(Field & min, Field & max) const override;
void getPermutation(IColumn::PermutationSortDirection direction, IColumn::PermutationSortStability stability,
size_t limit, int nan_direction_hint, IColumn::Permutation & res) const override;

View File

@ -221,7 +221,11 @@ const char * ColumnNullable::skipSerializedInArena(const char * pos) const
return pos;
}
#if !defined(ABORT_ON_LOGICAL_ERROR)
void ColumnNullable::insertRangeFrom(const IColumn & src, size_t start, size_t length)
#else
void ColumnNullable::doInsertRangeFrom(const IColumn & src, size_t start, size_t length)
#endif
{
const ColumnNullable & nullable_col = assert_cast<const ColumnNullable &>(src);
getNullMapColumn().insertRangeFrom(*nullable_col.null_map, start, length);
@ -258,7 +262,11 @@ bool ColumnNullable::tryInsert(const Field & x)
return true;
}
#if !defined(ABORT_ON_LOGICAL_ERROR)
void ColumnNullable::insertFrom(const IColumn & src, size_t n)
#else
void ColumnNullable::doInsertFrom(const IColumn & src, size_t n)
#endif
{
const ColumnNullable & src_concrete = assert_cast<const ColumnNullable &>(src);
getNestedColumn().insertFrom(src_concrete.getNestedColumn(), n);
@ -266,7 +274,11 @@ void ColumnNullable::insertFrom(const IColumn & src, size_t n)
}
#if !defined(ABORT_ON_LOGICAL_ERROR)
void ColumnNullable::insertManyFrom(const IColumn & src, size_t position, size_t length)
#else
void ColumnNullable::doInsertManyFrom(const IColumn & src, size_t position, size_t length)
#endif
{
const ColumnNullable & src_concrete = assert_cast<const ColumnNullable &>(src);
getNestedColumn().insertManyFrom(src_concrete.getNestedColumn(), position, length);
@ -402,7 +414,11 @@ int ColumnNullable::compareAtImpl(size_t n, size_t m, const IColumn & rhs_, int
return getNestedColumn().compareAt(n, m, nested_rhs, null_direction_hint);
}
#if !defined(ABORT_ON_LOGICAL_ERROR)
int ColumnNullable::compareAt(size_t n, size_t m, const IColumn & rhs_, int null_direction_hint) const
#else
int ColumnNullable::doCompareAt(size_t n, size_t m, const IColumn & rhs_, int null_direction_hint) const
#endif
{
return compareAtImpl(n, m, rhs_, null_direction_hint);
}

View File

@ -69,11 +69,21 @@ public:
char * serializeValueIntoMemory(size_t n, char * memory) const override;
const char * deserializeAndInsertFromArena(const char * pos) override;
const char * skipSerializedInArena(const char * pos) const override;
#if !defined(ABORT_ON_LOGICAL_ERROR)
void insertRangeFrom(const IColumn & src, size_t start, size_t length) override;
#else
void doInsertRangeFrom(const IColumn & src, size_t start, size_t length) override;
#endif
void insert(const Field & x) override;
bool tryInsert(const Field & x) override;
#if !defined(ABORT_ON_LOGICAL_ERROR)
void insertFrom(const IColumn & src, size_t n) override;
void insertManyFrom(const IColumn & src, size_t position, size_t length) override;
#else
void doInsertFrom(const IColumn & src, size_t n) override;
void doInsertManyFrom(const IColumn & src, size_t position, size_t length) override;
#endif
void insertFromNotNullable(const IColumn & src, size_t n);
void insertRangeFromNotNullable(const IColumn & src, size_t start, size_t length);
@ -90,7 +100,11 @@ public:
void expand(const Filter & mask, bool inverted) override;
ColumnPtr permute(const Permutation & perm, size_t limit) const override;
ColumnPtr index(const IColumn & indexes, size_t limit) const override;
#if !defined(ABORT_ON_LOGICAL_ERROR)
int compareAt(size_t n, size_t m, const IColumn & rhs_, int null_direction_hint) const override;
#else
int doCompareAt(size_t n, size_t m, const IColumn & rhs_, int null_direction_hint) const override;
#endif
#if USE_EMBEDDED_COMPILER

View File

@ -763,12 +763,20 @@ void ColumnObject::get(size_t n, Field & res) const
}
}
#if !defined(ABORT_ON_LOGICAL_ERROR)
void ColumnObject::insertFrom(const IColumn & src, size_t n)
#else
void ColumnObject::doInsertFrom(const IColumn & src, size_t n)
#endif
{
insert(src[n]);
}
#if !defined(ABORT_ON_LOGICAL_ERROR)
void ColumnObject::insertRangeFrom(const IColumn & src, size_t start, size_t length)
#else
void ColumnObject::doInsertRangeFrom(const IColumn & src, size_t start, size_t length)
#endif
{
const auto & src_object = assert_cast<const ColumnObject &>(src);

View File

@ -209,8 +209,15 @@ public:
void insert(const Field & field) override;
bool tryInsert(const Field & field) override;
void insertDefault() override;
#if !defined(ABORT_ON_LOGICAL_ERROR)
void insertFrom(const IColumn & src, size_t n) override;
void insertRangeFrom(const IColumn & src, size_t start, size_t length) override;
#else
void doInsertFrom(const IColumn & src, size_t n) override;
void doInsertRangeFrom(const IColumn & src, size_t start, size_t length) override;
#endif
void popBack(size_t length) override;
Field operator[](size_t n) const override;
void get(size_t n, Field & res) const override;
@ -228,7 +235,11 @@ public:
/// Order of rows in ColumnObject is undefined.
void getPermutation(PermutationSortDirection, PermutationSortStability, size_t, int, Permutation & res) const override;
void updatePermutation(PermutationSortDirection, PermutationSortStability, size_t, int, Permutation &, EqualRanges &) const override {}
#if !defined(ABORT_ON_LOGICAL_ERROR)
int compareAt(size_t, size_t, const IColumn &, int) const override { return 0; }
#else
int doCompareAt(size_t, size_t, const IColumn &, int) const override { return 0; }
#endif
void getExtremes(Field & min, Field & max) const override;
/// All other methods throw exception.

View File

@ -174,7 +174,11 @@ const char * ColumnSparse::skipSerializedInArena(const char * pos) const
return values->skipSerializedInArena(pos);
}
#if !defined(ABORT_ON_LOGICAL_ERROR)
void ColumnSparse::insertRangeFrom(const IColumn & src, size_t start, size_t length)
#else
void ColumnSparse::doInsertRangeFrom(const IColumn & src, size_t start, size_t length)
#endif
{
if (length == 0)
return;
@ -248,7 +252,11 @@ bool ColumnSparse::tryInsert(const Field & x)
return true;
}
#if !defined(ABORT_ON_LOGICAL_ERROR)
void ColumnSparse::insertFrom(const IColumn & src, size_t n)
#else
void ColumnSparse::doInsertFrom(const IColumn & src, size_t n)
#endif
{
if (const auto * src_sparse = typeid_cast<const ColumnSparse *>(&src))
{
@ -446,7 +454,11 @@ ColumnPtr ColumnSparse::indexImpl(const PaddedPODArray<Type> & indexes, size_t l
return ColumnSparse::create(std::move(res_values), std::move(res_offsets), limit);
}
#if !defined(ABORT_ON_LOGICAL_ERROR)
int ColumnSparse::compareAt(size_t n, size_t m, const IColumn & rhs_, int null_direction_hint) const
#else
int ColumnSparse::doCompareAt(size_t n, size_t m, const IColumn & rhs_, int null_direction_hint) const
#endif
{
if (const auto * rhs_sparse = typeid_cast<const ColumnSparse *>(&rhs_))
return values->compareAt(getValueIndex(n), rhs_sparse->getValueIndex(m), rhs_sparse->getValuesColumn(), null_direction_hint);

View File

@ -81,10 +81,18 @@ public:
char * serializeValueIntoMemory(size_t n, char * memory) const override;
const char * deserializeAndInsertFromArena(const char * pos) override;
const char * skipSerializedInArena(const char *) const override;
#if !defined(ABORT_ON_LOGICAL_ERROR)
void insertRangeFrom(const IColumn & src, size_t start, size_t length) override;
#else
void doInsertRangeFrom(const IColumn & src, size_t start, size_t length) override;
#endif
void insert(const Field & x) override;
bool tryInsert(const Field & x) override;
#if !defined(ABORT_ON_LOGICAL_ERROR)
void insertFrom(const IColumn & src, size_t n) override;
#else
void doInsertFrom(const IColumn & src, size_t n) override;
#endif
void insertDefault() override;
void insertManyDefaults(size_t length) override;
@ -98,7 +106,11 @@ public:
template <typename Type>
ColumnPtr indexImpl(const PaddedPODArray<Type> & indexes, size_t limit) const;
#if !defined(ABORT_ON_LOGICAL_ERROR)
int compareAt(size_t n, size_t m, const IColumn & rhs_, int null_direction_hint) const override;
#else
int doCompareAt(size_t n, size_t m, const IColumn & rhs_, int null_direction_hint) const override;
#endif
void compareColumn(const IColumn & rhs, size_t rhs_row_num,
PaddedPODArray<UInt64> * row_indexes, PaddedPODArray<Int8> & compare_results,
int direction, int nan_direction_hint) const override;

View File

@ -39,7 +39,11 @@ ColumnString::ColumnString(const ColumnString & src)
last_offset, chars.size());
}
#if !defined(ABORT_ON_LOGICAL_ERROR)
void ColumnString::insertManyFrom(const IColumn & src, size_t position, size_t length)
#else
void ColumnString::doInsertManyFrom(const IColumn & src, size_t position, size_t length)
#endif
{
const ColumnString & src_concrete = assert_cast<const ColumnString &>(src);
const UInt8 * src_buf = &src_concrete.chars[src_concrete.offsets[position - 1]];
@ -129,7 +133,11 @@ void ColumnString::updateWeakHash32(WeakHash32 & hash) const
}
#if !defined(ABORT_ON_LOGICAL_ERROR)
void ColumnString::insertRangeFrom(const IColumn & src, size_t start, size_t length)
#else
void ColumnString::doInsertRangeFrom(const IColumn & src, size_t start, size_t length)
#endif
{
if (length == 0)
return;

View File

@ -142,7 +142,11 @@ public:
return true;
}
#if !defined(ABORT_ON_LOGICAL_ERROR)
void insertFrom(const IColumn & src_, size_t n) override
#else
void doInsertFrom(const IColumn & src_, size_t n) override
#endif
{
const ColumnString & src = assert_cast<const ColumnString &>(src_);
const size_t size_to_append = src.offsets[n] - src.offsets[n - 1]; /// -1th index is Ok, see PaddedPODArray.
@ -165,7 +169,11 @@ public:
}
}
#if !defined(ABORT_ON_LOGICAL_ERROR)
void insertManyFrom(const IColumn & src, size_t position, size_t length) override;
#else
void doInsertManyFrom(const IColumn & src, size_t position, size_t length) override;
#endif
void insertData(const char * pos, size_t length) override
{
@ -212,7 +220,11 @@ public:
hash.update(reinterpret_cast<const char *>(chars.data()), chars.size() * sizeof(chars[0]));
}
#if !defined(ABORT_ON_LOGICAL_ERROR)
void insertRangeFrom(const IColumn & src, size_t start, size_t length) override;
#else
void doInsertRangeFrom(const IColumn & src, size_t start, size_t length) override;
#endif
ColumnPtr filter(const Filter & filt, ssize_t result_size_hint) const override;
@ -238,7 +250,11 @@ public:
offsets.push_back(offsets.back() + 1);
}
#if !defined(ABORT_ON_LOGICAL_ERROR)
int compareAt(size_t n, size_t m, const IColumn & rhs_, int /*nan_direction_hint*/) const override
#else
int doCompareAt(size_t n, size_t m, const IColumn & rhs_, int /*nan_direction_hint*/) const override
#endif
{
const ColumnString & rhs = assert_cast<const ColumnString &>(rhs_);
return memcmpSmallAllowOverflow15(chars.data() + offsetAt(n), sizeAt(n) - 1, rhs.chars.data() + rhs.offsetAt(m), rhs.sizeAt(m) - 1);

View File

@ -205,7 +205,11 @@ bool ColumnTuple::tryInsert(const Field & x)
return true;
}
#if !defined(ABORT_ON_LOGICAL_ERROR)
void ColumnTuple::insertFrom(const IColumn & src_, size_t n)
#else
void ColumnTuple::doInsertFrom(const IColumn & src_, size_t n)
#endif
{
const ColumnTuple & src = assert_cast<const ColumnTuple &>(src_);
@ -218,7 +222,11 @@ void ColumnTuple::insertFrom(const IColumn & src_, size_t n)
columns[i]->insertFrom(*src.columns[i], n);
}
#if !defined(ABORT_ON_LOGICAL_ERROR)
void ColumnTuple::insertManyFrom(const IColumn & src, size_t position, size_t length)
#else
void ColumnTuple::doInsertManyFrom(const IColumn & src, size_t position, size_t length)
#endif
{
const ColumnTuple & src_tuple = assert_cast<const ColumnTuple &>(src);
@ -318,7 +326,11 @@ void ColumnTuple::updateHashFast(SipHash & hash) const
column->updateHashFast(hash);
}
#if !defined(ABORT_ON_LOGICAL_ERROR)
void ColumnTuple::insertRangeFrom(const IColumn & src, size_t start, size_t length)
#else
void ColumnTuple::doInsertRangeFrom(const IColumn & src, size_t start, size_t length)
#endif
{
column_length += length;
const size_t tuple_size = columns.size();
@ -470,7 +482,11 @@ int ColumnTuple::compareAtImpl(size_t n, size_t m, const IColumn & rhs, int nan_
return 0;
}
#if !defined(ABORT_ON_LOGICAL_ERROR)
int ColumnTuple::compareAt(size_t n, size_t m, const IColumn & rhs, int nan_direction_hint) const
#else
int ColumnTuple::doCompareAt(size_t n, size_t m, const IColumn & rhs, int nan_direction_hint) const
#endif
{
return compareAtImpl(n, m, rhs, nan_direction_hint);
}

View File

@ -65,8 +65,15 @@ public:
void insertData(const char * pos, size_t length) override;
void insert(const Field & x) override;
bool tryInsert(const Field & x) override;
#if !defined(ABORT_ON_LOGICAL_ERROR)
void insertFrom(const IColumn & src_, size_t n) override;
void insertManyFrom(const IColumn & src, size_t position, size_t length) override;
#else
void doInsertFrom(const IColumn & src_, size_t n) override;
void doInsertManyFrom(const IColumn & src, size_t position, size_t length) override;
#endif
void insertDefault() override;
void popBack(size_t n) override;
StringRef serializeValueIntoArena(size_t n, Arena & arena, char const *& begin) const override;
@ -76,14 +83,22 @@ public:
void updateHashWithValue(size_t n, SipHash & hash) const override;
void updateWeakHash32(WeakHash32 & hash) const override;
void updateHashFast(SipHash & hash) const override;
#if !defined(ABORT_ON_LOGICAL_ERROR)
void insertRangeFrom(const IColumn & src, size_t start, size_t length) override;
#else
void doInsertRangeFrom(const IColumn & src, size_t start, size_t length) override;
#endif
ColumnPtr filter(const Filter & filt, ssize_t result_size_hint) const override;
void expand(const Filter & mask, bool inverted) override;
ColumnPtr permute(const Permutation & perm, size_t limit) const override;
ColumnPtr index(const IColumn & indexes, size_t limit) const override;
ColumnPtr replicate(const Offsets & offsets) const override;
MutableColumns scatter(ColumnIndex num_columns, const Selector & selector) const override;
#if !defined(ABORT_ON_LOGICAL_ERROR)
int compareAt(size_t n, size_t m, const IColumn & rhs, int nan_direction_hint) const override;
#else
int doCompareAt(size_t n, size_t m, const IColumn & rhs, int nan_direction_hint) const override;
#endif
int compareAtWithCollation(size_t n, size_t m, const IColumn & rhs, int nan_direction_hint, const Collator & collator) const override;
void getExtremes(Field & min, Field & max) const override;
void getPermutation(IColumn::PermutationSortDirection direction, IColumn::PermutationSortStability stability,

View File

@ -90,7 +90,11 @@ public:
return getNestedColumn()->updateHashWithValue(n, hash_func);
}
#if !defined(ABORT_ON_LOGICAL_ERROR)
int compareAt(size_t n, size_t m, const IColumn & rhs, int nan_direction_hint) const override;
#else
int doCompareAt(size_t n, size_t m, const IColumn & rhs, int nan_direction_hint) const override;
#endif
void getExtremes(Field & min, Field & max) const override { column_holder->getExtremes(min, max); }
bool valuesHaveFixedSize() const override { return column_holder->valuesHaveFixedSize(); }
@ -488,7 +492,11 @@ const char * ColumnUnique<ColumnType>::skipSerializedInArena(const char *) const
}
template <typename ColumnType>
#if !defined(ABORT_ON_LOGICAL_ERROR)
int ColumnUnique<ColumnType>::compareAt(size_t n, size_t m, const IColumn & rhs, int nan_direction_hint) const
#else
int ColumnUnique<ColumnType>::doCompareAt(size_t n, size_t m, const IColumn & rhs, int nan_direction_hint) const
#endif
{
if (is_nullable)
{

View File

@ -595,17 +595,29 @@ void ColumnVariant::insertManyFromImpl(const DB::IColumn & src_, size_t position
}
}
#if !defined(ABORT_ON_LOGICAL_ERROR)
void ColumnVariant::insertFrom(const IColumn & src_, size_t n)
#else
void ColumnVariant::doInsertFrom(const IColumn & src_, size_t n)
#endif
{
insertFromImpl(src_, n, nullptr);
}
#if !defined(ABORT_ON_LOGICAL_ERROR)
void ColumnVariant::insertRangeFrom(const IColumn & src_, size_t start, size_t length)
#else
void ColumnVariant::doInsertRangeFrom(const IColumn & src_, size_t start, size_t length)
#endif
{
insertRangeFromImpl(src_, start, length, nullptr);
}
#if !defined(ABORT_ON_LOGICAL_ERROR)
void ColumnVariant::insertManyFrom(const DB::IColumn & src_, size_t position, size_t length)
#else
void ColumnVariant::doInsertManyFrom(const DB::IColumn & src_, size_t position, size_t length)
#endif
{
insertManyFromImpl(src_, position, length, nullptr);
}
@ -1174,7 +1186,11 @@ bool ColumnVariant::hasEqualValues() const
return local_discriminators->hasEqualValues() && variants[localDiscriminatorAt(0)]->hasEqualValues();
}
#if !defined(ABORT_ON_LOGICAL_ERROR)
int ColumnVariant::compareAt(size_t n, size_t m, const IColumn & rhs, int nan_direction_hint) const
#else
int ColumnVariant::doCompareAt(size_t n, size_t m, const IColumn & rhs, int nan_direction_hint) const
#endif
{
const auto & rhs_variant = assert_cast<const ColumnVariant &>(rhs);
Discriminator left_discr = globalDiscriminatorAt(n);

View File

@ -180,9 +180,19 @@ public:
void insert(const Field & x) override;
bool tryInsert(const Field & x) override;
#if !defined(ABORT_ON_LOGICAL_ERROR)
void insertFrom(const IColumn & src_, size_t n) override;
void insertRangeFrom(const IColumn & src_, size_t start, size_t length) override;
void insertManyFrom(const IColumn & src_, size_t position, size_t length) override;
#else
using IColumn::insertFrom;
using IColumn::insertManyFrom;
using IColumn::insertRangeFrom;
void doInsertFrom(const IColumn & src_, size_t n) override;
void doInsertRangeFrom(const IColumn & src_, size_t start, size_t length) override;
void doInsertManyFrom(const IColumn & src_, size_t position, size_t length) override;
#endif
/// Methods for insertion from another Variant but with known mapping between global discriminators.
void insertFrom(const IColumn & src_, size_t n, const std::vector<ColumnVariant::Discriminator> & global_discriminators_mapping);
@ -213,7 +223,11 @@ public:
ColumnPtr indexImpl(const PaddedPODArray<Type> & indexes, size_t limit) const;
ColumnPtr replicate(const Offsets & replicate_offsets) const override;
MutableColumns scatter(ColumnIndex num_columns, const Selector & selector) const override;
#if !defined(ABORT_ON_LOGICAL_ERROR)
int compareAt(size_t n, size_t m, const IColumn & rhs, int nan_direction_hint) const override;
#else
int doCompareAt(size_t n, size_t m, const IColumn & rhs, int nan_direction_hint) const override;
#endif
bool hasEqualValues() const override;
void getExtremes(Field & min, Field & max) const override;
void getPermutation(IColumn::PermutationSortDirection direction, IColumn::PermutationSortStability stability,

View File

@ -503,7 +503,11 @@ bool ColumnVector<T>::tryInsert(const DB::Field & x)
}
template <typename T>
#if !defined(ABORT_ON_LOGICAL_ERROR)
void ColumnVector<T>::insertRangeFrom(const IColumn & src, size_t start, size_t length)
#else
void ColumnVector<T>::doInsertRangeFrom(const IColumn & src, size_t start, size_t length)
#endif
{
const ColumnVector & src_vec = assert_cast<const ColumnVector &>(src);

View File

@ -64,12 +64,20 @@ public:
return data.size();
}
#if !defined(ABORT_ON_LOGICAL_ERROR)
void insertFrom(const IColumn & src, size_t n) override
#else
void doInsertFrom(const IColumn & src, size_t n) override
#endif
{
data.push_back(assert_cast<const Self &>(src).getData()[n]);
}
#if !defined(ABORT_ON_LOGICAL_ERROR)
void insertManyFrom(const IColumn & src, size_t position, size_t length) override
#else
void doInsertManyFrom(const IColumn & src, size_t position, size_t length) override
#endif
{
ValueType v = assert_cast<const Self &>(src).getData()[position];
data.resize_fill(data.size() + length, v);
@ -142,7 +150,11 @@ public:
}
/// This method implemented in header because it could be possibly devirtualized.
#if !defined(ABORT_ON_LOGICAL_ERROR)
int compareAt(size_t n, size_t m, const IColumn & rhs_, int nan_direction_hint) const override
#else
int doCompareAt(size_t n, size_t m, const IColumn & rhs_, int nan_direction_hint) const override
#endif
{
return CompareHelper<T>::compare(data[n], assert_cast<const Self &>(rhs_).data[m], nan_direction_hint);
}
@ -228,7 +240,11 @@ public:
bool tryInsert(const DB::Field & x) override;
#if !defined(ABORT_ON_LOGICAL_ERROR)
void insertRangeFrom(const IColumn & src, size_t start, size_t length) override;
#else
void doInsertRangeFrom(const IColumn & src, size_t start, size_t length) override;
#endif
ColumnPtr filter(const IColumn::Filter & filt, ssize_t result_size_hint) const override;

View File

@ -46,7 +46,11 @@ String IColumn::dumpStructure() const
return res.str();
}
#if !defined(ABORT_ON_LOGICAL_ERROR)
void IColumn::insertFrom(const IColumn & src, size_t n)
#else
void IColumn::doInsertFrom(const IColumn & src, size_t n)
#endif
{
insert(src[n]);
}

View File

@ -1,15 +1,14 @@
#pragma once
#include <Common/COW.h>
#include <Common/PODArray_fwd.h>
#include <Common/Exception.h>
#include <Common/typeid_cast.h>
#include <base/StringRef.h>
#include <Core/TypeId.h>
#include <base/StringRef.h>
#include <Common/COW.h>
#include <Common/Exception.h>
#include <Common/PODArray_fwd.h>
#include <Common/typeid_cast.h>
#include "config.h"
class SipHash;
class Collator;
@ -180,18 +179,42 @@ public:
/// Appends n-th element from other column with the same type.
/// Is used in merge-sort and merges. It could be implemented in inherited classes more optimally than default implementation.
#if !defined(ABORT_ON_LOGICAL_ERROR)
virtual void insertFrom(const IColumn & src, size_t n);
#else
void insertFrom(const IColumn & src, size_t n)
{
assertTypeEquality(src);
doInsertFrom(src, n);
}
#endif
/// Appends range of elements from other column with the same type.
/// Could be used to concatenate columns.
#if !defined(ABORT_ON_LOGICAL_ERROR)
virtual void insertRangeFrom(const IColumn & src, size_t start, size_t length) = 0;
#else
void insertRangeFrom(const IColumn & src, size_t start, size_t length)
{
assertTypeEquality(src);
doInsertRangeFrom(src, start, length);
}
#endif
/// Appends one element from other column with the same type multiple times.
#if !defined(ABORT_ON_LOGICAL_ERROR)
virtual void insertManyFrom(const IColumn & src, size_t position, size_t length)
{
for (size_t i = 0; i < length; ++i)
insertFrom(src, position);
}
#else
void insertManyFrom(const IColumn & src, size_t position, size_t length)
{
assertTypeEquality(src);
doInsertManyFrom(src, position, length);
}
#endif
/// Appends one field multiple times. Can be optimized in inherited classes.
virtual void insertMany(const Field & field, size_t length)
@ -322,7 +345,15 @@ public:
*
* For non Nullable and non floating point types, nan_direction_hint is ignored.
*/
#if !defined(ABORT_ON_LOGICAL_ERROR)
[[nodiscard]] virtual int compareAt(size_t n, size_t m, const IColumn & rhs, int nan_direction_hint) const = 0;
#else
[[nodiscard]] int compareAt(size_t n, size_t m, const IColumn & rhs, int nan_direction_hint) const
{
assertTypeEquality(rhs);
return doCompareAt(n, m, rhs, nan_direction_hint);
}
#endif
#if USE_EMBEDDED_COMPILER
@ -610,6 +641,8 @@ public:
[[nodiscard]] virtual bool isSparse() const { return false; }
[[nodiscard]] virtual bool isConst() const { return false; }
[[nodiscard]] virtual bool isCollationSupported() const { return false; }
virtual ~IColumn() = default;
@ -633,6 +666,29 @@ protected:
Equals equals,
Sort full_sort,
PartialSort partial_sort) const;
#if defined(ABORT_ON_LOGICAL_ERROR)
virtual void doInsertFrom(const IColumn & src, size_t n);
virtual void doInsertRangeFrom(const IColumn & src, size_t start, size_t length) = 0;
virtual void doInsertManyFrom(const IColumn & src, size_t position, size_t length)
{
for (size_t i = 0; i < length; ++i)
insertFrom(src, position);
}
virtual int doCompareAt(size_t n, size_t m, const IColumn & rhs, int nan_direction_hint) const = 0;
private:
void assertTypeEquality(const IColumn & rhs) const
{
/// For Sparse and Const columns, we can compare only internal types. It is considered normal to e.g. insert from normal vector column to a sparse vector column.
/// This case is specifically handled in ColumnSparse implementation. Similar situation with Const column.
/// For the rest of column types we can compare the types directly.
chassert((isConst() || isSparse()) ? getDataType() == rhs.getDataType() : typeid(*this) == typeid(rhs));
}
#endif
};
using ColumnPtr = IColumn::Ptr;

View File

@ -26,7 +26,11 @@ public:
size_t byteSize() const override { return 0; }
size_t byteSizeAt(size_t) const override { return 0; }
size_t allocatedBytes() const override { return 0; }
#if !defined(ABORT_ON_LOGICAL_ERROR)
int compareAt(size_t, size_t, const IColumn &, int) const override { return 0; }
#else
int doCompareAt(size_t, size_t, const IColumn &, int) const override { return 0; }
#endif
void compareColumn(const IColumn &, size_t, PaddedPODArray<UInt64> *, PaddedPODArray<Int8> &, int, int) const override
{
}
@ -67,12 +71,20 @@ public:
{
}
#if !defined(ABORT_ON_LOGICAL_ERROR)
void insertFrom(const IColumn &, size_t) override
#else
void doInsertFrom(const IColumn &, size_t) override
#endif
{
++s;
}
#if !defined(ABORT_ON_LOGICAL_ERROR)
void insertRangeFrom(const IColumn & /*src*/, size_t /*start*/, size_t length) override
#else
void doInsertRangeFrom(const IColumn & /*src*/, size_t /*start*/, size_t length) override
#endif
{
s += length;
}

View File

@ -85,7 +85,11 @@ public:
throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Method tryInsert is not supported for ColumnUnique.");
}
#if !defined(ABORT_ON_LOGICAL_ERROR)
void insertRangeFrom(const IColumn &, size_t, size_t) override
#else
void doInsertRangeFrom(const IColumn &, size_t, size_t) override
#endif
{
throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Method insertRangeFrom is not supported for ColumnUnique.");
}

View File

@ -52,7 +52,11 @@ static ColumnPtr mockColumn(const DataTypePtr & type, size_t rows)
}
#if !defined(ABORT_ON_LOGICAL_ERROR)
static NO_INLINE void insertManyFrom(IColumn & dst, const IColumn & src)
#else
static NO_INLINE void doInsertManyFrom(IColumn & dst, const IColumn & src)
#endif
{
size_t size = src.size();
dst.insertManyFrom(src, size / 2, size);

View File

@ -38,10 +38,19 @@ namespace ErrorCodes
extern const int CANNOT_MREMAP;
}
void abortOnFailedAssertion(const String & description, void * const * trace, size_t trace_offset, size_t trace_size)
{
auto & logger = Poco::Logger::root();
LOG_FATAL(&logger, "Logical error: '{}'.", description);
if (trace)
LOG_FATAL(&logger, "Stack trace (when copying this message, always include the lines below):\n\n{}", StackTrace::toString(trace, trace_offset, trace_size));
abort();
}
void abortOnFailedAssertion(const String & description)
{
LOG_FATAL(&Poco::Logger::root(), "Logical error: '{}'.", description);
abort();
StackTrace st;
abortOnFailedAssertion(description, st.getFramePointers().data(), st.getOffset(), st.getSize());
}
bool terminate_on_any_exception = false;
@ -58,7 +67,7 @@ void handle_error_code(const std::string & msg, int code, bool remote, const Exc
#ifdef ABORT_ON_LOGICAL_ERROR
if (code == ErrorCodes::LOGICAL_ERROR)
{
abortOnFailedAssertion(msg);
abortOnFailedAssertion(msg, trace.data(), 0, trace.size());
}
#endif

View File

@ -25,8 +25,6 @@ namespace DB
class AtomicLogger;
[[noreturn]] void abortOnFailedAssertion(const String & description);
/// This flag can be set for testing purposes - to check that no exceptions are thrown.
extern bool terminate_on_any_exception;
@ -167,6 +165,8 @@ protected:
mutable std::vector<StackTrace::FramePointers> capture_thread_frame_pointers;
};
[[noreturn]] void abortOnFailedAssertion(const String & description, void * const * trace, size_t trace_offset, size_t trace_size);
[[noreturn]] void abortOnFailedAssertion(const String & description);
std::string getExceptionStackTraceString(const std::exception & e);
std::string getExceptionStackTraceString(std::exception_ptr e);

View File

@ -235,7 +235,7 @@ bool NamedCollectionFactory::loadIfNot(std::lock_guard<std::mutex> & lock)
loadFromConfig(context->getConfigRef(), lock);
loadFromSQL(lock);
if (metadata_storage->supportsPeriodicUpdate())
if (metadata_storage->isReplicated())
{
update_task = context->getSchedulePool().createTask("NamedCollectionsMetadataStorage", [this]{ updateFunc(); });
update_task->activate();
@ -357,6 +357,13 @@ void NamedCollectionFactory::reloadFromSQL()
add(std::move(collections), lock);
}
bool NamedCollectionFactory::usesReplicatedStorage()
{
std::lock_guard lock(mutex);
loadIfNot(lock);
return metadata_storage->isReplicated();
}
void NamedCollectionFactory::updateFunc()
{
LOG_TRACE(log, "Named collections background updating thread started");

View File

@ -34,6 +34,8 @@ public:
void updateFromSQL(const ASTAlterNamedCollectionQuery & query);
bool usesReplicatedStorage();
void loadIfNot();
void shutdown();

View File

@ -67,7 +67,7 @@ public:
virtual bool removeIfExists(const std::string & path) = 0;
virtual bool supportsPeriodicUpdate() const = 0;
virtual bool isReplicated() const = 0;
virtual bool waitUpdate(size_t /* timeout */) { return false; }
};
@ -89,7 +89,7 @@ public:
~LocalStorage() override = default;
bool supportsPeriodicUpdate() const override { return false; }
bool isReplicated() const override { return false; }
std::vector<std::string> list() const override
{
@ -221,7 +221,7 @@ public:
~ZooKeeperStorage() override = default;
bool supportsPeriodicUpdate() const override { return true; }
bool isReplicated() const override { return true; }
/// Return true if children changed.
bool waitUpdate(size_t timeout) override
@ -465,14 +465,14 @@ void NamedCollectionsMetadataStorage::writeCreateQuery(const ASTCreateNamedColle
storage->write(getFileName(query.collection_name), serializeAST(*normalized_query), replace);
}
bool NamedCollectionsMetadataStorage::supportsPeriodicUpdate() const
bool NamedCollectionsMetadataStorage::isReplicated() const
{
return storage->supportsPeriodicUpdate();
return storage->isReplicated();
}
bool NamedCollectionsMetadataStorage::waitUpdate()
{
if (!storage->supportsPeriodicUpdate())
if (!storage->isReplicated())
throw Exception(ErrorCodes::LOGICAL_ERROR, "Periodic updates are not supported");
const auto & config = Context::getGlobalContextInstance()->getConfigRef();

View File

@ -30,7 +30,7 @@ public:
/// Return true if update was made
bool waitUpdate();
bool supportsPeriodicUpdate() const;
bool isReplicated() const;
private:
class INamedCollectionsStorage;

View File

@ -545,7 +545,7 @@ std::string StackTrace::toString() const
return toStringCached(frame_pointers, offset, size);
}
std::string StackTrace::toString(void ** frame_pointers_raw, size_t offset, size_t size)
std::string StackTrace::toString(void * const * frame_pointers_raw, size_t offset, size_t size)
{
__msan_unpoison(frame_pointers_raw, size * sizeof(*frame_pointers_raw));

View File

@ -59,7 +59,7 @@ public:
const FramePointers & getFramePointers() const { return frame_pointers; }
std::string toString() const;
static std::string toString(void ** frame_pointers, size_t offset, size_t size);
static std::string toString(void * const * frame_pointers, size_t offset, size_t size);
static void dropCache();
/// @param fatal - if true, will process inline frames (slower)

View File

@ -346,6 +346,7 @@ class IColumn;
\
M(Bool, ignore_on_cluster_for_replicated_udf_queries, false, "Ignore ON CLUSTER clause for replicated UDF management queries.", 0) \
M(Bool, ignore_on_cluster_for_replicated_access_entities_queries, false, "Ignore ON CLUSTER clause for replicated access entities management queries.", 0) \
M(Bool, ignore_on_cluster_for_replicated_named_collections_queries, false, "Ignore ON CLUSTER clause for replicated named collections management queries.", 0) \
/** Settings for testing hedged requests */ \
M(Milliseconds, sleep_in_send_tables_status_ms, 0, "Time to sleep in sending tables status response in TCPHandler", 0) \
M(Milliseconds, sleep_in_send_data_ms, 0, "Time to sleep in sending data in TCPHandler", 0) \

View File

@ -76,6 +76,7 @@ static std::initializer_list<std::pair<ClickHouseVersion, SettingsChangesHistory
{"azure_sdk_max_retries", 10, 10, "Maximum number of retries in azure sdk"},
{"azure_sdk_retry_initial_backoff_ms", 10, 10, "Minimal backoff between retries in azure sdk"},
{"azure_sdk_retry_max_backoff_ms", 1000, 1000, "Maximal backoff between retries in azure sdk"},
{"ignore_on_cluster_for_replicated_named_collections_queries", false, false, "Ignore ON CLUSTER clause for replicated named collections management queries."},
{"postgresql_connection_attempt_timeout", 2, 2, "Allow to control 'connect_timeout' parameter of PostgreSQL connection."},
{"postgresql_connection_pool_retries", 2, 2, "Allow to control the number of retries in PostgreSQL connection pool."}
}},

View File

@ -11,6 +11,7 @@
#include <Parsers/ASTFunction.h>
#include <Parsers/ASTIdentifier.h>
#include <Parsers/ASTLiteral.h>
#include <Parsers/ASTSubquery.h>
#include <Parsers/ASTSelectWithUnionQuery.h>
#include <Parsers/ASTTTLElement.h>
#include <Poco/String.h>
@ -211,6 +212,13 @@ void DDLLoadingDependencyVisitor::extractTableNameFromArgument(const ASTFunction
qualified_name.database = table_identifier->getDatabaseName();
qualified_name.table = table_identifier->shortName();
}
else if (arg->as<ASTSubquery>())
{
/// Allow IN subquery.
/// Do not add tables from the subquery into dependencies,
/// because CREATE will succeed anyway.
return;
}
else
{
assert(false);

View File

@ -107,12 +107,24 @@ void DatabaseAtomic::attachTable(ContextPtr /* context_ */, const String & name,
StoragePtr DatabaseAtomic::detachTable(ContextPtr /* context */, const String & name)
{
// it is important to call the destructors of not_in_use without
// locked mutex to avoid potential deadlock.
DetachedTables not_in_use;
std::lock_guard lock(mutex);
auto table = DatabaseOrdinary::detachTableUnlocked(name);
table_name_to_path.erase(name);
detached_tables.emplace(table->getStorageID().uuid, table);
not_in_use = cleanupDetachedTables();
StoragePtr table;
{
std::lock_guard lock(mutex);
table = DatabaseOrdinary::detachTableUnlocked(name);
table_name_to_path.erase(name);
detached_tables.emplace(table->getStorageID().uuid, table);
not_in_use = cleanupDetachedTables();
}
if (!not_in_use.empty())
{
not_in_use.clear();
LOG_DEBUG(log, "Finished removing not used detached tables");
}
return table;
}

View File

@ -4,6 +4,7 @@
#include <Access/ContextAccess.h>
#include <Interpreters/Context.h>
#include <Interpreters/executeDDLQueryOnCluster.h>
#include <Interpreters/removeOnClusterClauseIfNeeded.h>
#include <Common/NamedCollections/NamedCollectionsFactory.h>
@ -13,14 +14,16 @@ namespace DB
BlockIO InterpreterAlterNamedCollectionQuery::execute()
{
auto current_context = getContext();
const auto & query = query_ptr->as<const ASTAlterNamedCollectionQuery &>();
const auto updated_query = removeOnClusterClauseIfNeeded(query_ptr, getContext());
const auto & query = updated_query->as<const ASTAlterNamedCollectionQuery &>();
current_context->checkAccess(AccessType::ALTER_NAMED_COLLECTION, query.collection_name);
if (!query.cluster.empty())
{
DDLQueryOnClusterParams params;
return executeDDLQueryOnCluster(query_ptr, current_context, params);
return executeDDLQueryOnCluster(updated_query, current_context, params);
}
NamedCollectionFactory::instance().updateFromSQL(query);

View File

@ -4,6 +4,7 @@
#include <Access/ContextAccess.h>
#include <Interpreters/Context.h>
#include <Interpreters/executeDDLQueryOnCluster.h>
#include <Interpreters/removeOnClusterClauseIfNeeded.h>
#include <Common/NamedCollections/NamedCollectionsFactory.h>
@ -13,14 +14,16 @@ namespace DB
BlockIO InterpreterCreateNamedCollectionQuery::execute()
{
auto current_context = getContext();
const auto & query = query_ptr->as<const ASTCreateNamedCollectionQuery &>();
const auto updated_query = removeOnClusterClauseIfNeeded(query_ptr, getContext());
const auto & query = updated_query->as<const ASTCreateNamedCollectionQuery &>();
current_context->checkAccess(AccessType::CREATE_NAMED_COLLECTION, query.collection_name);
if (!query.cluster.empty())
{
DDLQueryOnClusterParams params;
return executeDDLQueryOnCluster(query_ptr, current_context, params);
return executeDDLQueryOnCluster(updated_query, current_context, params);
}
NamedCollectionFactory::instance().createFromSQL(query);

View File

@ -4,6 +4,7 @@
#include <Access/ContextAccess.h>
#include <Interpreters/Context.h>
#include <Interpreters/executeDDLQueryOnCluster.h>
#include <Interpreters/removeOnClusterClauseIfNeeded.h>
#include <Common/NamedCollections/NamedCollectionsFactory.h>
@ -13,14 +14,16 @@ namespace DB
BlockIO InterpreterDropNamedCollectionQuery::execute()
{
auto current_context = getContext();
const auto & query = query_ptr->as<const ASTDropNamedCollectionQuery &>();
const auto updated_query = removeOnClusterClauseIfNeeded(query_ptr, getContext());
const auto & query = updated_query->as<const ASTDropNamedCollectionQuery &>();
current_context->checkAccess(AccessType::DROP_NAMED_COLLECTION, query.collection_name);
if (!query.cluster.empty())
{
DDLQueryOnClusterParams params;
return executeDDLQueryOnCluster(query_ptr, current_context, params);
return executeDDLQueryOnCluster(updated_query, current_context, params);
}
NamedCollectionFactory::instance().removeFromSQL(query);

View File

@ -15,6 +15,10 @@
#include <Parsers/Access/ASTCreateUserQuery.h>
#include <Parsers/Access/ASTDropAccessEntityQuery.h>
#include <Parsers/Access/ASTGrantQuery.h>
#include <Parsers/ASTCreateNamedCollectionQuery.h>
#include <Parsers/ASTAlterNamedCollectionQuery.h>
#include <Parsers/ASTDropNamedCollectionQuery.h>
#include <Common/NamedCollections/NamedCollectionsFactory.h>
namespace DB
@ -38,6 +42,13 @@ static bool isAccessControlQuery(const ASTPtr & query)
|| query->as<ASTGrantQuery>();
}
static bool isNamedCollectionQuery(const ASTPtr & query)
{
return query->as<ASTCreateNamedCollectionQuery>()
|| query->as<ASTDropNamedCollectionQuery>()
|| query->as<ASTAlterNamedCollectionQuery>();
}
ASTPtr removeOnClusterClauseIfNeeded(const ASTPtr & query, ContextPtr context, const WithoutOnClusterASTRewriteParams & params)
{
auto * query_on_cluster = dynamic_cast<ASTQueryWithOnCluster *>(query.get());
@ -50,7 +61,10 @@ ASTPtr removeOnClusterClauseIfNeeded(const ASTPtr & query, ContextPtr context, c
&& context->getUserDefinedSQLObjectsStorage().isReplicated())
|| (isAccessControlQuery(query)
&& context->getSettings().ignore_on_cluster_for_replicated_access_entities_queries
&& context->getAccessControl().containsStorage(ReplicatedAccessStorage::STORAGE_TYPE)))
&& context->getAccessControl().containsStorage(ReplicatedAccessStorage::STORAGE_TYPE))
|| (isNamedCollectionQuery(query)
&& context->getSettings().ignore_on_cluster_for_replicated_named_collections_queries
&& NamedCollectionFactory::instance().usesReplicatedStorage()))
{
LOG_DEBUG(getLogger("removeOnClusterClauseIfNeeded"), "ON CLUSTER clause was ignored for query {}", query->getID());
return query_on_cluster->getRewrittenASTWithoutOnCluster(params);

View File

@ -874,46 +874,6 @@ static Field applyFunctionForField(
return (*col)[0];
}
/// The case when arguments may have types different than in the primary key.
static std::pair<Field, DataTypePtr> applyFunctionForFieldOfUnknownType(
const FunctionBasePtr & func,
const DataTypePtr & arg_type,
const Field & arg_value)
{
ColumnsWithTypeAndName arguments{{ arg_type->createColumnConst(1, arg_value), arg_type, "x" }};
DataTypePtr return_type = func->getResultType();
auto col = func->execute(arguments, return_type, 1);
Field result = (*col)[0];
return {std::move(result), std::move(return_type)};
}
/// Same as above but for binary operators
static std::pair<Field, DataTypePtr> applyBinaryFunctionForFieldOfUnknownType(
const FunctionOverloadResolverPtr & func,
const DataTypePtr & arg_type,
const Field & arg_value,
const DataTypePtr & arg_type2,
const Field & arg_value2)
{
ColumnsWithTypeAndName arguments{
{arg_type->createColumnConst(1, arg_value), arg_type, "x"}, {arg_type2->createColumnConst(1, arg_value2), arg_type2, "y"}};
FunctionBasePtr func_base = func->build(arguments);
DataTypePtr return_type = func_base->getResultType();
auto col = func_base->execute(arguments, return_type, 1);
Field result = (*col)[0];
return {std::move(result), std::move(return_type)};
}
static FieldRef applyFunction(const FunctionBasePtr & func, const DataTypePtr & current_type, const FieldRef & field)
{
/// Fallback for fields without block reference.
@ -940,164 +900,92 @@ static FieldRef applyFunction(const FunctionBasePtr & func, const DataTypePtr &
return {field.columns, field.row_idx, result_idx};
}
/** When table's key has expression with these functions from a column,
* and when a column in a query is compared with a constant, such as:
* CREATE TABLE (x String) ORDER BY toDate(x)
* SELECT ... WHERE x LIKE 'Hello%'
* we want to apply the function to the constant for index analysis,
* but should modify it to pass on un-parsable values.
*/
static std::set<std::string_view> date_time_parsing_functions = {
"toDate",
"toDate32",
"toDateTime",
"toDateTime64",
"parseDateTimeBestEffort",
"parseDateTimeBestEffortUS",
"parseDateTime32BestEffort",
"parseDateTime64BestEffort",
"parseDateTime",
"parseDateTimeInJodaSyntax",
};
/** The key functional expression constraint may be inferred from a plain column in the expression.
* For example, if the key contains `toStartOfHour(Timestamp)` and query contains `WHERE Timestamp >= now()`,
* it can be assumed that if `toStartOfHour()` is monotonic on [now(), inf), the `toStartOfHour(Timestamp) >= toStartOfHour(now())`
* condition also holds, so the index may be used to select only parts satisfying this condition.
*
* To check the assumption, we'd need to assert that the inverse function to this transformation is also monotonic, however the
* inversion isn't exported (or even viable for not strictly monotonic functions such as `toStartOfHour()`).
* Instead, we can qualify only functions that do not transform the range (for example rounding),
* which while not strictly monotonic, are monotonic everywhere on the input range.
*/
bool KeyCondition::transformConstantWithValidFunctions(
ContextPtr context,
const String & expr_name,
size_t & out_key_column_num,
DataTypePtr & out_key_column_type,
Field & out_value,
DataTypePtr & out_type,
std::function<bool(const IFunctionBase &, const IDataType &)> always_monotonic) const
/// Sequentially applies functions to the column, returns `true`
/// if all function arguments are compatible with functions
/// signatures, and none of the functions produce `NULL` output.
///
/// After functions chain execution, fills result column and its type.
bool applyFunctionChainToColumn(
const ColumnPtr & in_column,
const DataTypePtr & in_data_type,
const std::vector<FunctionBasePtr> & functions,
ColumnPtr & out_column,
DataTypePtr & out_data_type)
{
const auto & sample_block = key_expr->getSampleBlock();
// Remove LowCardinality from input column, and convert it to regular one
auto result_column = in_column->convertToFullIfNeeded();
auto result_type = removeLowCardinality(in_data_type);
for (const auto & node : key_expr->getNodes())
// In case function sequence is empty, return full non-LowCardinality column
if (functions.empty())
{
auto it = key_columns.find(node.result_name);
if (it != key_columns.end())
{
std::stack<const ActionsDAG::Node *> chain;
const auto * cur_node = &node;
bool is_valid_chain = true;
while (is_valid_chain)
{
if (cur_node->result_name == expr_name)
break;
chain.push(cur_node);
if (cur_node->type == ActionsDAG::ActionType::FUNCTION && cur_node->children.size() <= 2)
{
is_valid_chain = always_monotonic(*cur_node->function_base, *cur_node->result_type);
const ActionsDAG::Node * next_node = nullptr;
for (const auto * arg : cur_node->children)
{
if (arg->column && isColumnConst(*arg->column))
continue;
if (next_node)
is_valid_chain = false;
next_node = arg;
}
if (!next_node)
is_valid_chain = false;
cur_node = next_node;
}
else if (cur_node->type == ActionsDAG::ActionType::ALIAS)
cur_node = cur_node->children.front();
else
is_valid_chain = false;
}
if (is_valid_chain)
{
out_type = removeLowCardinality(out_type);
auto const_type = removeLowCardinality(cur_node->result_type);
auto const_column = out_type->createColumnConst(1, out_value);
auto const_value = (*castColumnAccurateOrNull({const_column, out_type, ""}, const_type))[0];
if (const_value.isNull())
return false;
while (!chain.empty())
{
const auto * func = chain.top();
chain.pop();
if (func->type != ActionsDAG::ActionType::FUNCTION)
continue;
const auto & func_name = func->function_base->getName();
auto func_base = func->function_base;
const auto & arg_types = func_base->getArgumentTypes();
if (date_time_parsing_functions.contains(func_name) && !arg_types.empty() && isStringOrFixedString(arg_types[0]))
{
auto func_or_null = FunctionFactory::instance().get(func_name + "OrNull", context);
ColumnsWithTypeAndName arguments;
int i = 0;
for (const auto & type : func->function_base->getArgumentTypes())
arguments.push_back({nullptr, type, fmt::format("_{}", i++)});
func_base = func_or_null->build(arguments);
}
if (func->children.size() == 1)
{
std::tie(const_value, const_type)
= applyFunctionForFieldOfUnknownType(func_base, const_type, const_value);
}
else if (func->children.size() == 2)
{
const auto * left = func->children[0];
const auto * right = func->children[1];
if (left->column && isColumnConst(*left->column))
{
auto left_arg_type = left->result_type;
auto left_arg_value = (*left->column)[0];
std::tie(const_value, const_type) = applyBinaryFunctionForFieldOfUnknownType(
FunctionFactory::instance().get(func_base->getName(), context),
left_arg_type, left_arg_value, const_type, const_value);
}
else
{
auto right_arg_type = right->result_type;
auto right_arg_value = (*right->column)[0];
std::tie(const_value, const_type) = applyBinaryFunctionForFieldOfUnknownType(
FunctionFactory::instance().get(func_base->getName(), context),
const_type, const_value, right_arg_type, right_arg_value);
}
}
if (const_value.isNull())
return false;
}
out_key_column_num = it->second;
out_key_column_type = sample_block.getByName(it->first).type;
out_value = const_value;
out_type = const_type;
return true;
}
}
out_column = result_column;
out_data_type = result_type;
return true;
}
return false;
// If first function arguments are empty, cannot transform input column
if (functions[0]->getArgumentTypes().empty())
{
return false;
}
// And cast it to the argument type of the first function in the chain
auto in_argument_type = functions[0]->getArgumentTypes()[0];
if (canBeSafelyCasted(result_type, in_argument_type))
{
result_column = castColumnAccurate({result_column, result_type, ""}, in_argument_type);
result_type = in_argument_type;
}
// If column cannot be casted accurate, casting with OrNull, and in case all
// values has been casted (no nulls), unpacking nested column from nullable.
// In case any further functions require Nullable input, they'll be able
// to cast it.
else
{
result_column = castColumnAccurateOrNull({result_column, result_type, ""}, in_argument_type);
const auto & result_column_nullable = assert_cast<const ColumnNullable &>(*result_column);
const auto & null_map_data = result_column_nullable.getNullMapData();
for (char8_t i : null_map_data)
{
if (i != 0)
return false;
}
result_column = result_column_nullable.getNestedColumnPtr();
result_type = removeNullable(in_argument_type);
}
for (const auto & func : functions)
{
if (func->getArgumentTypes().empty())
return false;
auto argument_type = func->getArgumentTypes()[0];
if (!canBeSafelyCasted(result_type, argument_type))
return false;
result_column = castColumnAccurate({result_column, result_type, ""}, argument_type);
result_column = func->execute({{result_column, argument_type, ""}}, func->getResultType(), result_column->size());
result_type = func->getResultType();
// Transforming nullable columns to the nested ones, in case no nulls found
if (result_column->isNullable())
{
const auto & result_column_nullable = assert_cast<const ColumnNullable &>(*result_column);
const auto & null_map_data = result_column_nullable.getNullMapData();
for (char8_t i : null_map_data)
{
if (i != 0)
return false;
}
result_column = result_column_nullable.getNestedColumnPtr();
result_type = removeNullable(func->getResultType());
}
}
out_column = result_column;
out_data_type = result_type;
return true;
}
bool KeyCondition::canConstantBeWrappedByMonotonicFunctions(
@ -1118,13 +1006,13 @@ bool KeyCondition::canConstantBeWrappedByMonotonicFunctions(
if (out_value.isNull())
return false;
return transformConstantWithValidFunctions(
MonotonicFunctionsChain transform_functions;
auto can_transform_constant = extractMonotonicFunctionsChainFromKey(
node.getTreeContext().getQueryContext(),
expr_name,
out_key_column_num,
out_key_column_type,
out_value,
out_type,
transform_functions,
[](const IFunctionBase & func, const IDataType & type)
{
if (!func.hasInformationAboutMonotonicity())
@ -1138,6 +1026,27 @@ bool KeyCondition::canConstantBeWrappedByMonotonicFunctions(
}
return true;
});
if (!can_transform_constant)
return false;
auto const_column = out_type->createColumnConst(1, out_value);
ColumnPtr transformed_const_column;
DataTypePtr transformed_const_type;
bool constant_transformed = applyFunctionChainToColumn(
const_column,
out_type,
transform_functions,
transformed_const_column,
transformed_const_type);
if (!constant_transformed)
return false;
out_value = (*transformed_const_column)[0];
out_type = transformed_const_type;
return true;
}
/// Looking for possible transformation of `column = constant` into `partition_expr = function(constant)`
@ -1173,28 +1082,48 @@ bool KeyCondition::canConstantBeWrappedByFunctions(
if (out_value.isNull())
return false;
return transformConstantWithValidFunctions(
MonotonicFunctionsChain transform_functions;
auto can_transform_constant = extractMonotonicFunctionsChainFromKey(
node.getTreeContext().getQueryContext(),
expr_name,
out_key_column_num,
out_key_column_type,
out_value,
transform_functions,
[](const IFunctionBase & func, const IDataType &) { return func.isDeterministic(); });
if (!can_transform_constant)
return false;
auto const_column = out_type->createColumnConst(1, out_value);
ColumnPtr transformed_const_column;
DataTypePtr transformed_const_type;
bool constant_transformed = applyFunctionChainToColumn(
const_column,
out_type,
[](const IFunctionBase & func, const IDataType &)
{
return func.isDeterministic();
});
transform_functions,
transformed_const_column,
transformed_const_type);
if (!constant_transformed)
return false;
out_value = (*transformed_const_column)[0];
out_type = transformed_const_type;
return true;
}
bool KeyCondition::tryPrepareSetIndex(
const RPNBuilderFunctionTreeNode & func,
RPNElement & out,
size_t & out_key_column_num)
size_t & out_key_column_num,
bool & is_constant_transformed)
{
const auto & left_arg = func.getArgumentAt(0);
out_key_column_num = 0;
std::vector<MergeTreeSetIndex::KeyTuplePositionMapping> indexes_mapping;
std::vector<MonotonicFunctionsChain> set_transforming_chains;
DataTypes data_types;
auto get_key_tuple_position_mapping = [&](const RPNBuilderTreeNode & node, size_t tuple_index)
@ -1203,6 +1132,7 @@ bool KeyCondition::tryPrepareSetIndex(
index_mapping.tuple_index = tuple_index;
DataTypePtr data_type;
std::optional<size_t> key_space_filling_curve_argument_pos;
MonotonicFunctionsChain set_transforming_chain;
if (isKeyPossiblyWrappedByMonotonicFunctions(
node, index_mapping.key_index, key_space_filling_curve_argument_pos, data_type, index_mapping.functions)
&& !key_space_filling_curve_argument_pos) /// We don't support the analysis of space-filling curves and IN set.
@ -1210,6 +1140,15 @@ bool KeyCondition::tryPrepareSetIndex(
indexes_mapping.push_back(index_mapping);
data_types.push_back(data_type);
out_key_column_num = std::max(out_key_column_num, index_mapping.key_index);
set_transforming_chains.push_back(set_transforming_chain);
}
// For partition index, checking if set can be transformed to prune any partitions
else if (single_point && canSetValuesBeWrappedByFunctions(node, index_mapping.key_index, data_type, set_transforming_chain))
{
indexes_mapping.push_back(index_mapping);
data_types.push_back(data_type);
out_key_column_num = std::max(out_key_column_num, index_mapping.key_index);
set_transforming_chains.push_back(set_transforming_chain);
}
};
@ -1275,6 +1214,23 @@ bool KeyCondition::tryPrepareSetIndex(
auto set_element_type = set_types[set_element_index];
auto set_column = set_columns[set_element_index];
if (!set_transforming_chains[indexes_mapping_index].empty())
{
ColumnPtr transformed_set_column;
DataTypePtr transformed_set_type;
if (!applyFunctionChainToColumn(
set_column,
set_element_type,
set_transforming_chains[indexes_mapping_index],
transformed_set_column,
transformed_set_type))
return false;
set_column = transformed_set_column;
set_element_type = transformed_set_type;
is_constant_transformed = true;
}
if (canBeSafelyCasted(set_element_type, key_column_type))
{
set_columns[set_element_index] = castColumn({set_column, set_element_type, {}}, key_column_type);
@ -1571,6 +1527,191 @@ bool KeyCondition::isKeyPossiblyWrappedByMonotonicFunctionsImpl(
return false;
}
/** When table's key has expression with these functions from a column,
* and when a column in a query is compared with a constant, such as:
* CREATE TABLE (x String) ORDER BY toDate(x)
* SELECT ... WHERE x LIKE 'Hello%'
* we want to apply the function to the constant for index analysis,
* but should modify it to pass on un-parsable values.
*/
static std::set<std::string_view> date_time_parsing_functions = {
"toDate",
"toDate32",
"toDateTime",
"toDateTime64",
"parseDateTimeBestEffort",
"parseDateTimeBestEffortUS",
"parseDateTime32BestEffort",
"parseDateTime64BestEffort",
"parseDateTime",
"parseDateTimeInJodaSyntax",
};
/** The key functional expression constraint may be inferred from a plain column in the expression.
* For example, if the key contains `toStartOfHour(Timestamp)` and query contains `WHERE Timestamp >= now()`,
* it can be assumed that if `toStartOfHour()` is monotonic on [now(), inf), the `toStartOfHour(Timestamp) >= toStartOfHour(now())`
* condition also holds, so the index may be used to select only parts satisfying this condition.
*
* To check the assumption, we'd need to assert that the inverse function to this transformation is also monotonic, however the
* inversion isn't exported (or even viable for not strictly monotonic functions such as `toStartOfHour()`).
* Instead, we can qualify only functions that do not transform the range (for example rounding),
* which while not strictly monotonic, are monotonic everywhere on the input range.
*/
bool KeyCondition::extractMonotonicFunctionsChainFromKey(
ContextPtr context,
const String & expr_name,
size_t & out_key_column_num,
DataTypePtr & out_key_column_type,
MonotonicFunctionsChain & out_functions_chain,
std::function<bool(const IFunctionBase &, const IDataType &)> always_monotonic) const
{
const auto & sample_block = key_expr->getSampleBlock();
for (const auto & node : key_expr->getNodes())
{
auto it = key_columns.find(node.result_name);
if (it != key_columns.end())
{
std::stack<const ActionsDAG::Node *> chain;
const auto * cur_node = &node;
bool is_valid_chain = true;
while (is_valid_chain)
{
if (cur_node->result_name == expr_name)
break;
chain.push(cur_node);
if (cur_node->type == ActionsDAG::ActionType::FUNCTION && cur_node->children.size() <= 2)
{
is_valid_chain = always_monotonic(*cur_node->function_base, *cur_node->result_type);
const ActionsDAG::Node * next_node = nullptr;
for (const auto * arg : cur_node->children)
{
if (arg->column && isColumnConst(*arg->column))
continue;
if (next_node)
is_valid_chain = false;
next_node = arg;
}
if (!next_node)
is_valid_chain = false;
cur_node = next_node;
}
else if (cur_node->type == ActionsDAG::ActionType::ALIAS)
cur_node = cur_node->children.front();
else
is_valid_chain = false;
}
if (is_valid_chain)
{
while (!chain.empty())
{
const auto * func = chain.top();
chain.pop();
if (func->type != ActionsDAG::ActionType::FUNCTION)
continue;
auto func_name = func->function_base->getName();
auto func_base = func->function_base;
ColumnsWithTypeAndName arguments;
ColumnWithTypeAndName const_arg;
FunctionWithOptionalConstArg::Kind kind = FunctionWithOptionalConstArg::Kind::NO_CONST;
if (date_time_parsing_functions.contains(func_name))
{
const auto & arg_types = func_base->getArgumentTypes();
if (!arg_types.empty() && isStringOrFixedString(arg_types[0]))
{
func_name = func_name + "OrNull";
}
}
auto func_builder = FunctionFactory::instance().tryGet(func_name, context);
if (func->children.size() == 1)
{
arguments.push_back({nullptr, removeLowCardinality(func->children[0]->result_type), ""});
}
else if (func->children.size() == 2)
{
const auto * left = func->children[0];
const auto * right = func->children[1];
if (left->column && isColumnConst(*left->column))
{
const_arg = {left->result_type->createColumnConst(0, (*left->column)[0]), left->result_type, ""};
arguments.push_back(const_arg);
arguments.push_back({nullptr, removeLowCardinality(right->result_type), ""});
kind = FunctionWithOptionalConstArg::Kind::LEFT_CONST;
}
else
{
const_arg = {right->result_type->createColumnConst(0, (*right->column)[0]), right->result_type, ""};
arguments.push_back({nullptr, removeLowCardinality(left->result_type), ""});
arguments.push_back(const_arg);
kind = FunctionWithOptionalConstArg::Kind::RIGHT_CONST;
}
}
auto out_func = func_builder->build(arguments);
if (kind == FunctionWithOptionalConstArg::Kind::NO_CONST)
out_functions_chain.push_back(out_func);
else
out_functions_chain.push_back(std::make_shared<FunctionWithOptionalConstArg>(out_func, const_arg, kind));
}
out_key_column_num = it->second;
out_key_column_type = sample_block.getByName(it->first).type;
return true;
}
}
}
return false;
}
bool KeyCondition::canSetValuesBeWrappedByFunctions(
const RPNBuilderTreeNode & node,
size_t & out_key_column_num,
DataTypePtr & out_key_res_column_type,
MonotonicFunctionsChain & out_functions_chain)
{
// Checking if column name matches any of key subexpressions
String expr_name = node.getColumnName();
if (array_joined_column_names.contains(expr_name))
return false;
if (!key_subexpr_names.contains(expr_name))
{
expr_name = node.getColumnNameWithModuloLegacy();
if (!key_subexpr_names.contains(expr_name))
return false;
}
return extractMonotonicFunctionsChainFromKey(
node.getTreeContext().getQueryContext(),
expr_name,
out_key_column_num,
out_key_res_column_type,
out_functions_chain,
[](const IFunctionBase & func, const IDataType &)
{
return func.isDeterministic();
});
}
static void castValueToType(const DataTypePtr & desired_type, Field & src_value, const DataTypePtr & src_type, const String & node_column_name)
{
@ -1649,7 +1790,7 @@ bool KeyCondition::extractAtomFromTree(const RPNBuilderTreeNode & node, RPNEleme
if (functionIsInOrGlobalInOperator(func_name))
{
if (tryPrepareSetIndex(func, out, key_column_num))
if (tryPrepareSetIndex(func, out, key_column_num, is_constant_transformed))
{
key_arg_pos = 0;
is_set_const = true;

View File

@ -14,6 +14,7 @@
#include <Storages/SelectQueryInfo.h>
#include <Storages/MergeTree/RPNBuilder.h>
#include "DataTypes/Serializations/ISerialization.h"
namespace DB
@ -253,13 +254,12 @@ private:
DataTypePtr & out_key_column_type,
std::vector<RPNBuilderFunctionTreeNode> & out_functions_chain);
bool transformConstantWithValidFunctions(
bool extractMonotonicFunctionsChainFromKey(
ContextPtr context,
const String & expr_name,
size_t & out_key_column_num,
DataTypePtr & out_key_column_type,
Field & out_value,
DataTypePtr & out_type,
MonotonicFunctionsChain & out_functions_chain,
std::function<bool(const IFunctionBase &, const IDataType &)> always_monotonic) const;
bool canConstantBeWrappedByMonotonicFunctions(
@ -276,13 +276,25 @@ private:
Field & out_value,
DataTypePtr & out_type);
/// Checks if node is a subexpression of any of key columns expressions,
/// wrapped by deterministic functions, and if so, returns `true`, and
/// specifies key column position / type. Besides that it produces the
/// chain of functions which should be executed on set, to transform it
/// into key column values.
bool canSetValuesBeWrappedByFunctions(
const RPNBuilderTreeNode & node,
size_t & out_key_column_num,
DataTypePtr & out_key_res_column_type,
MonotonicFunctionsChain & out_functions_chain);
/// If it's possible to make an RPNElement
/// that will filter values (possibly tuples) by the content of 'prepared_set',
/// do it and return true.
bool tryPrepareSetIndex(
const RPNBuilderFunctionTreeNode & func,
RPNElement & out,
size_t & out_key_column_num);
size_t & out_key_column_num,
bool & is_constant_transformed);
/// Checks that the index can not be used.
///

View File

@ -8,6 +8,7 @@
#include <Common/logger_useful.h>
#include <Columns/ColumnString.h>
#include <Columns/ColumnNullable.h>
#include <Columns/ColumnArray.h>
#include <Formats/FormatFactory.h>
#include <IO/ReadBufferFromFileBase.h>
@ -30,6 +31,7 @@
#include <DataTypes/DataTypeUUID.h>
#include <DataTypes/DataTypesDecimal.h>
#include <DataTypes/DataTypesNumber.h>
#include <DataTypes/NestedUtils.h>
#include <boost/algorithm/string/case_conv.hpp>
#include <parquet/file_reader.h>
@ -111,7 +113,7 @@ struct DeltaLakeMetadataImpl
std::set<String> result_files;
NamesAndTypesList current_schema;
DataLakePartitionColumns current_partition_columns;
const auto checkpoint_version = getCheckpointIfExists(result_files);
const auto checkpoint_version = getCheckpointIfExists(result_files, current_schema, current_partition_columns);
if (checkpoint_version)
{
@ -205,9 +207,32 @@ struct DeltaLakeMetadataImpl
Poco::Dynamic::Var json = parser.parse(json_str);
Poco::JSON::Object::Ptr object = json.extract<Poco::JSON::Object::Ptr>();
// std::ostringstream oss; // STYLE_CHECK_ALLOW_STD_STRING_STREAM
// object->stringify(oss);
// LOG_TEST(log, "Metadata: {}", oss.str());
std::ostringstream oss; // STYLE_CHECK_ALLOW_STD_STRING_STREAM
object->stringify(oss);
LOG_TEST(log, "Metadata: {}", oss.str());
if (object->has("metaData"))
{
const auto metadata_object = object->get("metaData").extract<Poco::JSON::Object::Ptr>();
const auto schema_object = metadata_object->getValue<String>("schemaString");
Poco::JSON::Parser p;
Poco::Dynamic::Var fields_json = parser.parse(schema_object);
const Poco::JSON::Object::Ptr & fields_object = fields_json.extract<Poco::JSON::Object::Ptr>();
auto current_schema = parseMetadata(fields_object);
if (file_schema.empty())
{
file_schema = current_schema;
}
else if (file_schema != current_schema)
{
throw Exception(ErrorCodes::NOT_IMPLEMENTED,
"Reading from files with different schema is not possible "
"({} is different from {})",
file_schema.toString(), current_schema.toString());
}
}
if (object->has("add"))
{
@ -230,7 +255,12 @@ struct DeltaLakeMetadataImpl
const auto value = partition_values->getValue<String>(partition_name);
auto name_and_type = file_schema.tryGetByName(partition_name);
if (!name_and_type)
throw Exception(ErrorCodes::LOGICAL_ERROR, "No such column in schema: {}", partition_name);
{
throw Exception(
ErrorCodes::LOGICAL_ERROR,
"No such column in schema: {} (schema: {})",
partition_name, file_schema.toNamesAndTypesDescription());
}
auto field = getFieldValue(value, name_and_type->type);
current_partition_columns.emplace_back(*name_and_type, field);
@ -246,52 +276,35 @@ struct DeltaLakeMetadataImpl
auto path = object->get("remove").extract<Poco::JSON::Object::Ptr>()->getValue<String>("path");
result.erase(fs::path(configuration->getPath()) / path);
}
if (object->has("metaData"))
{
const auto metadata_object = object->get("metaData").extract<Poco::JSON::Object::Ptr>();
const auto schema_object = metadata_object->getValue<String>("schemaString");
Poco::JSON::Parser p;
Poco::Dynamic::Var fields_json = parser.parse(schema_object);
Poco::JSON::Object::Ptr fields_object = fields_json.extract<Poco::JSON::Object::Ptr>();
const auto fields = fields_object->get("fields").extract<Poco::JSON::Array::Ptr>();
NamesAndTypesList current_schema;
for (size_t i = 0; i < fields->size(); ++i)
{
const auto field = fields->getObject(static_cast<UInt32>(i));
auto column_name = field->getValue<String>("name");
auto type = field->getValue<String>("type");
auto is_nullable = field->getValue<bool>("nullable");
std::string physical_name;
auto schema_metadata_object = field->get("metadata").extract<Poco::JSON::Object::Ptr>();
if (schema_metadata_object->has("delta.columnMapping.physicalName"))
physical_name = schema_metadata_object->getValue<String>("delta.columnMapping.physicalName");
else
physical_name = column_name;
LOG_TEST(log, "Found column: {}, type: {}, nullable: {}, physical name: {}",
column_name, type, is_nullable, physical_name);
current_schema.push_back({physical_name, getFieldType(field, "type", is_nullable)});
}
if (file_schema.empty())
{
file_schema = current_schema;
}
else if (file_schema != current_schema)
{
throw Exception(ErrorCodes::NOT_IMPLEMENTED,
"Reading from files with different schema is not possible "
"({} is different from {})",
file_schema.toString(), current_schema.toString());
}
}
}
}
NamesAndTypesList parseMetadata(const Poco::JSON::Object::Ptr & metadata_json)
{
NamesAndTypesList schema;
const auto fields = metadata_json->get("fields").extract<Poco::JSON::Array::Ptr>();
for (size_t i = 0; i < fields->size(); ++i)
{
const auto field = fields->getObject(static_cast<UInt32>(i));
auto column_name = field->getValue<String>("name");
auto type = field->getValue<String>("type");
auto is_nullable = field->getValue<bool>("nullable");
std::string physical_name;
auto schema_metadata_object = field->get("metadata").extract<Poco::JSON::Object::Ptr>();
if (schema_metadata_object->has("delta.columnMapping.physicalName"))
physical_name = schema_metadata_object->getValue<String>("delta.columnMapping.physicalName");
else
physical_name = column_name;
LOG_TEST(log, "Found column: {}, type: {}, nullable: {}, physical name: {}",
column_name, type, is_nullable, physical_name);
schema.push_back({physical_name, getFieldType(field, "type", is_nullable)});
}
return schema;
}
DataTypePtr getFieldType(const Poco::JSON::Object::Ptr & field, const String & type_key, bool is_nullable)
{
if (field->isObject(type_key))
@ -505,7 +518,10 @@ struct DeltaLakeMetadataImpl
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Arrow error: {}", _s.ToString()); \
} while (false)
size_t getCheckpointIfExists(std::set<String> & result)
size_t getCheckpointIfExists(
std::set<String> & result,
NamesAndTypesList & file_schema,
DataLakePartitionColumns & file_partition_columns)
{
const auto version = readLastCheckpointIfExists();
if (!version)
@ -526,7 +542,8 @@ struct DeltaLakeMetadataImpl
auto columns = ParquetSchemaReader(*buf, format_settings).readSchema();
/// Read only columns that we need.
columns.filterColumns(NameSet{"add", "remove"});
auto filter_column_names = NameSet{"add", "metaData"};
columns.filterColumns(filter_column_names);
Block header;
for (const auto & column : columns)
header.insert({column.type->createColumn(), column.type, column.name});
@ -540,9 +557,6 @@ struct DeltaLakeMetadataImpl
ArrowMemoryPool::instance(),
&reader));
std::shared_ptr<arrow::Schema> file_schema;
THROW_ARROW_NOT_OK(reader->GetSchema(&file_schema));
ArrowColumnToCHColumn column_reader(
header, "Parquet",
format_settings.parquet.allow_missing_columns,
@ -553,29 +567,85 @@ struct DeltaLakeMetadataImpl
std::shared_ptr<arrow::Table> table;
THROW_ARROW_NOT_OK(reader->ReadTable(&table));
Chunk res = column_reader.arrowTableToCHChunk(table, reader->parquet_reader()->metadata()->num_rows());
const auto & res_columns = res.getColumns();
Chunk chunk = column_reader.arrowTableToCHChunk(table, reader->parquet_reader()->metadata()->num_rows());
auto res_block = header.cloneWithColumns(chunk.detachColumns());
res_block = Nested::flatten(res_block);
if (res_columns.size() != 2)
{
throw Exception(
ErrorCodes::INCORRECT_DATA,
"Unexpected number of columns: {} (having: {}, expected: {})",
res_columns.size(), res.dumpStructure(), header.dumpStructure());
}
const auto * nullable_path_column = assert_cast<const ColumnNullable *>(res_block.getByName("add.path").column.get());
const auto & path_column = assert_cast<const ColumnString &>(nullable_path_column->getNestedColumn());
const auto * nullable_schema_column = assert_cast<const ColumnNullable *>(res_block.getByName("metaData.schemaString").column.get());
const auto & schema_column = assert_cast<const ColumnString &>(nullable_schema_column->getNestedColumn());
auto partition_values_column_raw = res_block.getByName("add.partitionValues").column;
const auto & partition_values_column = assert_cast<const ColumnMap &>(*partition_values_column_raw);
const auto * tuple_column = assert_cast<const ColumnTuple *>(res_columns[0].get());
const auto & nullable_column = assert_cast<const ColumnNullable &>(tuple_column->getColumn(0));
const auto & path_column = assert_cast<const ColumnString &>(nullable_column.getNestedColumn());
for (size_t i = 0; i < path_column.size(); ++i)
{
const auto filename = String(path_column.getDataAt(i));
if (filename.empty())
const auto metadata = String(schema_column.getDataAt(i));
if (!metadata.empty())
{
Poco::JSON::Parser parser;
Poco::Dynamic::Var json = parser.parse(metadata);
const Poco::JSON::Object::Ptr & object = json.extract<Poco::JSON::Object::Ptr>();
auto current_schema = parseMetadata(object);
if (file_schema.empty())
{
file_schema = current_schema;
LOG_TEST(log, "Processed schema from checkpoint: {}", file_schema.toString());
}
else if (file_schema != current_schema)
{
throw Exception(ErrorCodes::NOT_IMPLEMENTED,
"Reading from files with different schema is not possible "
"({} is different from {})",
file_schema.toString(), current_schema.toString());
}
}
}
for (size_t i = 0; i < path_column.size(); ++i)
{
const auto path = String(path_column.getDataAt(i));
if (path.empty())
continue;
LOG_TEST(log, "Adding {}", filename);
const auto [_, inserted] = result.insert(std::filesystem::path(configuration->getPath()) / filename);
auto filename = fs::path(path).filename().string();
auto it = file_partition_columns.find(filename);
if (it == file_partition_columns.end())
{
Field map;
partition_values_column.get(i, map);
auto partition_values_map = map.safeGet<Map>();
if (!partition_values_map.empty())
{
auto & current_partition_columns = file_partition_columns[filename];
for (const auto & map_value : partition_values_map)
{
const auto tuple = map_value.safeGet<Tuple>();
const auto partition_name = tuple[0].safeGet<String>();
auto name_and_type = file_schema.tryGetByName(partition_name);
if (!name_and_type)
{
throw Exception(
ErrorCodes::LOGICAL_ERROR,
"No such column in schema: {} (schema: {})",
partition_name, file_schema.toString());
}
const auto value = tuple[1].safeGet<String>();
auto field = getFieldValue(value, name_and_type->type);
current_partition_columns.emplace_back(std::move(name_and_type.value()), std::move(field));
LOG_TEST(log, "Partition {} value is {} (for {})", partition_name, value, filename);
}
}
}
LOG_TEST(log, "Adding {}", path);
const auto [_, inserted] = result.insert(std::filesystem::path(configuration->getPath()) / path);
if (!inserted)
throw Exception(ErrorCodes::INCORRECT_DATA, "File already exists {}", filename);
throw Exception(ErrorCodes::INCORRECT_DATA, "File already exists {}", path);
}
return version;

View File

@ -41,6 +41,7 @@ public:
auto object_storage = base_configuration->createObjectStorage(context, /* is_readonly */true);
DataLakeMetadataPtr metadata;
NamesAndTypesList schema_from_metadata;
const bool use_schema_from_metadata = columns_.empty();
if (base_configuration->format == "auto")
base_configuration->format = "Parquet";
@ -50,8 +51,9 @@ public:
try
{
metadata = DataLakeMetadata::create(object_storage, base_configuration, context);
schema_from_metadata = metadata->getTableSchema();
configuration->setPaths(metadata->getDataFiles());
if (use_schema_from_metadata)
schema_from_metadata = metadata->getTableSchema();
}
catch (...)
{
@ -66,7 +68,7 @@ public:
return std::make_shared<IStorageDataLake<DataLakeMetadata>>(
base_configuration, std::move(metadata), configuration, object_storage,
context, table_id_,
columns_.empty() ? ColumnsDescription(schema_from_metadata) : columns_,
use_schema_from_metadata ? ColumnsDescription(schema_from_metadata) : columns_,
constraints_, comment_, format_settings_);
}

View File

@ -206,23 +206,25 @@ Chunk StorageObjectStorageSource::generate()
if (!partition_columns.empty() && chunk_size && chunk.hasColumns())
{
auto partition_values = partition_columns.find(filename);
for (const auto & [name_and_type, value] : partition_values->second)
if (partition_values != partition_columns.end())
{
if (!read_from_format_info.source_header.has(name_and_type.name))
continue;
for (const auto & [name_and_type, value] : partition_values->second)
{
if (!read_from_format_info.source_header.has(name_and_type.name))
continue;
const auto column_pos = read_from_format_info.source_header.getPositionByName(name_and_type.name);
auto partition_column = name_and_type.type->createColumnConst(chunk.getNumRows(), value)->convertToFullColumnIfConst();
const auto column_pos = read_from_format_info.source_header.getPositionByName(name_and_type.name);
auto partition_column = name_and_type.type->createColumnConst(chunk.getNumRows(), value)->convertToFullColumnIfConst();
/// This column is filled with default value now, remove it.
chunk.erase(column_pos);
/// This column is filled with default value now, remove it.
chunk.erase(column_pos);
/// Add correct values.
if (chunk.hasColumns())
chunk.addColumn(column_pos, std::move(partition_column));
else
chunk.addColumn(std::move(partition_column));
/// Add correct values.
if (column_pos < chunk.getNumColumns())
chunk.addColumn(column_pos, std::move(partition_column));
else
chunk.addColumn(std::move(partition_column));
}
}
}
return chunk;

View File

@ -5,20 +5,21 @@
#include <base/hex.h>
#include <base/interpolate.h>
#include <Common/FailPoint.h>
#include <Common/Macros.h>
#include <Common/MemoryTracker.h>
#include <Common/ProfileEventsScope.h>
#include <Common/StringUtils.h>
#include <Common/ThreadFuzzer.h>
#include <Common/ZooKeeper/KeeperException.h>
#include <Common/ZooKeeper/Types.h>
#include <Common/escapeForFileName.h>
#include <Common/formatReadable.h>
#include <Common/logger_useful.h>
#include <Common/noexcept_scope.h>
#include <Common/randomDelay.h>
#include <Common/thread_local_rng.h>
#include <Common/typeid_cast.h>
#include <Common/ThreadFuzzer.h>
#include <Common/FailPoint.h>
#include <Common/randomDelay.h>
#include <Core/ServerUUID.h>
@ -5272,6 +5273,8 @@ void StorageReplicatedMergeTree::flushAndPrepareForShutdown()
if (shutdown_prepared_called.exchange(true))
return;
LOG_TRACE(log, "Start preparing for shutdown");
try
{
auto settings_ptr = getSettings();
@ -5282,7 +5285,11 @@ void StorageReplicatedMergeTree::flushAndPrepareForShutdown()
stopBeingLeader();
if (attach_thread)
{
attach_thread->shutdown();
LOG_TRACE(log, "The attach thread is shutdown");
}
restarting_thread.shutdown(/* part_of_full_shutdown */true);
/// Explicitly set the event, because the restarting thread will not set it again
@ -5295,6 +5302,8 @@ void StorageReplicatedMergeTree::flushAndPrepareForShutdown()
shutdown_deadline.emplace(std::chrono::system_clock::now());
throw;
}
LOG_TRACE(log, "Finished preparing for shutdown");
}
void StorageReplicatedMergeTree::partialShutdown()
@ -5332,6 +5341,8 @@ void StorageReplicatedMergeTree::shutdown(bool)
if (shutdown_called.exchange(true))
return;
LOG_TRACE(log, "Shutdown started");
flushAndPrepareForShutdown();
if (!shutdown_deadline.has_value())
@ -5374,6 +5385,7 @@ void StorageReplicatedMergeTree::shutdown(bool)
/// Wait for all of them
std::lock_guard lock(data_parts_exchange_ptr->rwlock);
}
LOG_TRACE(log, "Shutdown finished");
}

View File

@ -35,7 +35,6 @@ void registerStorageFuzzJSON(StorageFactory & factory);
void registerStorageS3(StorageFactory & factory);
void registerStorageHudi(StorageFactory & factory);
void registerStorageS3Queue(StorageFactory & factory);
void registerStorageAzureQueue(StorageFactory & factory);
#if USE_PARQUET
void registerStorageDeltaLake(StorageFactory & factory);
@ -45,6 +44,10 @@ void registerStorageIceberg(StorageFactory & factory);
#endif
#endif
#if USE_AZURE_BLOB_STORAGE
void registerStorageAzureQueue(StorageFactory & factory);
#endif
#if USE_HDFS
#if USE_HIVE
void registerStorageHive(StorageFactory & factory);

View File

@ -15,3 +15,4 @@ warn_return_any = True
no_implicit_reexport = True
strict_equality = True
extra_checks = True
ignore_missing_imports = True

View File

@ -15,7 +15,7 @@ import upload_result_helper
from build_check import get_release_or_pr
from ci_config import CI
from ci_metadata import CiMetadata
from ci_utils import GHActions, normalize_string
from ci_utils import GHActions, normalize_string, Shell
from clickhouse_helper import (
CiLogsCredentials,
ClickHouseHelper,
@ -53,6 +53,7 @@ from stopwatch import Stopwatch
from tee_popen import TeePopen
from ci_cache import CiCache
from ci_settings import CiSettings
from ci_buddy import CIBuddy
from version_helper import get_version_from_repo
# pylint: disable=too-many-lines
@ -262,6 +263,8 @@ def check_missing_images_on_dockerhub(
def _pre_action(s3, indata, pr_info):
print("Clear dmesg")
Shell.run("sudo dmesg --clear ||:")
CommitStatusData.cleanup()
JobReport.cleanup()
BuildResult.cleanup()
@ -1118,6 +1121,12 @@ def main() -> int:
### POST action: start
elif args.post:
if Shell.check(
"sudo dmesg -T | grep -q -e 'Out of memory: Killed process' -e 'oom_reaper: reaped process' -e 'oom-kill:constraint=CONSTRAINT_NONE'"
):
print("WARNING: OOM while job execution")
CIBuddy().post_error("Out Of Memory")
job_report = JobReport.load() if JobReport.exist() else None
if job_report:
ch_helper = ClickHouseHelper()

88
tests/ci/ci_buddy.py Normal file
View File

@ -0,0 +1,88 @@
import json
import os
import boto3
import requests
from botocore.exceptions import ClientError
from pr_info import PRInfo
from ci_utils import Shell
class CIBuddy:
_HEADERS = {"Content-Type": "application/json"}
def __init__(self, dry_run=False):
self.repo = os.getenv("GITHUB_REPOSITORY", "")
self.dry_run = dry_run
res = self._get_webhooks()
self.test_channel = ""
self.dev_ci_channel = ""
if res:
self.test_channel = json.loads(res)["test_channel"]
self.dev_ci_channel = json.loads(res)["ci_channel"]
self.job_name = os.getenv("CHECK_NAME", "unknown")
pr_info = PRInfo()
self.pr_number = pr_info.number
self.head_ref = pr_info.head_ref
self.commit_url = pr_info.commit_html_url
@staticmethod
def _get_webhooks():
name = "ci_buddy_web_hooks"
session = boto3.Session(region_name="us-east-1") # Replace with your region
ssm_client = session.client("ssm")
json_string = None
try:
response = ssm_client.get_parameter(
Name=name,
WithDecryption=True, # Set to True if the parameter is a SecureString
)
json_string = response["Parameter"]["Value"]
except ClientError as e:
print(f"An error occurred: {e}")
return json_string
def post(self, message, dry_run=None):
if dry_run is None:
dry_run = self.dry_run
print(f"Posting slack message, dry_run [{dry_run}]")
if dry_run:
url = self.test_channel
else:
url = self.dev_ci_channel
data = {"text": message}
try:
requests.post(url, headers=self._HEADERS, data=json.dumps(data), timeout=10)
except Exception as e:
print(f"ERROR: Failed to post message, ex {e}")
def post_error(self, error_description, job_name="", with_instance_info=True):
instance_id, instance_type = "unknown", "unknown"
if with_instance_info:
instance_id = Shell.run("ec2metadata --instance-id") or instance_id
instance_type = Shell.run("ec2metadata --instance-type") or instance_type
if not job_name:
job_name = os.getenv("CHECK_NAME", "unknown")
line_err = f":red_circle: {error_description} :red_circle:\n\n"
line_ghr = f" *Runner:* `{instance_type}`, `{instance_id}`\n"
line_job = f" *Job:* `{job_name}`\n"
line_pr_ = f" *PR:* <https://github.com/{self.repo}/pull/{self.pr_number}|#{self.pr_number}>\n"
line_br_ = f" *Branch:* `{self.head_ref}`, <{self.commit_url}|commit>\n"
message = line_err
message += line_job
if with_instance_info:
message += line_ghr
if self.pr_number > 0:
message += line_pr_
else:
message += line_br_
self.post(message)
if __name__ == "__main__":
# test
buddy = CIBuddy(dry_run=True)
buddy.post_error("Out of memory")

View File

@ -1,4 +1,5 @@
import os
import subprocess
from contextlib import contextmanager
from pathlib import Path
from typing import Any, Iterator, List, Union
@ -42,3 +43,43 @@ class GHActions:
for line in lines:
print(line)
print("::endgroup::")
class Shell:
@classmethod
def run_strict(cls, command):
subprocess.run(
command + " 2>&1",
shell=True,
stdout=subprocess.PIPE,
stderr=subprocess.PIPE,
text=True,
check=True,
)
@classmethod
def run(cls, command):
res = ""
result = subprocess.run(
command,
shell=True,
stdout=subprocess.PIPE,
stderr=subprocess.PIPE,
text=True,
check=False,
)
if result.returncode == 0:
res = result.stdout
return res.strip()
@classmethod
def check(cls, command):
result = subprocess.run(
command + " 2>&1",
shell=True,
stdout=subprocess.PIPE,
stderr=subprocess.PIPE,
text=True,
check=False,
)
return result.returncode == 0

View File

@ -104,6 +104,8 @@ def get_run_command(
return (
f"docker run --volume={builds_path}:/package_folder "
# For dmesg and sysctl
"--privileged "
f"{ci_logs_args}"
f"--volume={repo_path}/tests:/usr/share/clickhouse-test "
f"{volume_with_broken_test}"

View File

@ -2501,28 +2501,13 @@ def do_run_tests(jobs, test_suite: TestSuite):
)
],
)
while not future_seq.ready():
sleep(0.1)
if server_died.is_set():
sleep(5)
break
future_seq.wait()
while not future.ready():
sleep(0.1)
if server_died.is_set():
print("== Server died ==")
sleep(5)
break
print("future wait DONE")
future.wait()
finally:
print("pool.terminate")
pool.terminate()
print("pool.close")
pool.close()
print("pool.join")
pool.join()
print("pool.join DONE")
if not args.run_sequential_tests_in_parallel:
run_tests_array(

View File

@ -9,4 +9,21 @@
<key1>value1</key1>
</collection1>
</named_collections>
<remote_servers>
<replicated_nc_nodes_cluster>
<shard>
<internal_replication>true</internal_replication>
<replica>
<host>node_with_keeper</host>
<port>9000</port>
</replica>
<replica>
<host>node_with_keeper_2</host>
<port>9000</port>
</replica>
</shard>
<allow_distributed_ddl_queries>true</allow_distributed_ddl_queries>
</replicated_nc_nodes_cluster>
</remote_servers>
</clickhouse>

View File

@ -1,4 +1,9 @@
<clickhouse>
<profiles>
<default>
<ignore_on_cluster_for_replicated_named_collections_queries>0</ignore_on_cluster_for_replicated_named_collections_queries>
</default>
</profiles>
<users>
<default>
<password></password>

View File

@ -3,6 +3,8 @@ import pytest
import os
import time
from helpers.cluster import ClickHouseCluster
from contextlib import nullcontext as does_not_raise
from helpers.client import QueryRuntimeException
SCRIPT_DIR = os.path.dirname(os.path.realpath(__file__))
NAMED_COLLECTIONS_CONFIG = os.path.join(
@ -761,3 +763,32 @@ def test_keeper_storage(cluster):
check_dropped(node1)
check_dropped(node2)
@pytest.mark.parametrize(
"ignore, expected_raise",
[(True, does_not_raise()), (False, pytest.raises(QueryRuntimeException))],
)
def test_keeper_storage_remove_on_cluster(cluster, ignore, expected_raise):
node = cluster.instances["node_with_keeper"]
replace_in_users_config(
node,
"ignore_on_cluster_for_replicated_named_collections_queries>.",
f"ignore_on_cluster_for_replicated_named_collections_queries>{int(ignore)}",
)
node.query("SYSTEM RELOAD CONFIG")
with expected_raise:
node.query(
"DROP NAMED COLLECTION IF EXISTS test_nc ON CLUSTER `replicated_nc_nodes_cluster`"
)
node.query(
f"CREATE NAMED COLLECTION test_nc ON CLUSTER `replicated_nc_nodes_cluster` AS key1=1, key2=2 OVERRIDABLE"
)
node.query(
f"ALTER NAMED COLLECTION test_nc ON CLUSTER `replicated_nc_nodes_cluster` SET key2=3"
)
node.query(
f"DROP NAMED COLLECTION test_nc ON CLUSTER `replicated_nc_nodes_cluster`"
)

View File

@ -161,6 +161,9 @@ def test_parallel_replicas_custom_key_replicatedmergetree(
insert_data("test_table_for_rmt", row_num, all_nodes=False)
for node in nodes:
node.query("SYSTEM SYNC REPLICA test_table_for_rmt LIGHTWEIGHT")
expected_result = ""
for i in range(4):
expected_result += f"{i}\t250\n"

View File

@ -596,19 +596,116 @@ def test_partition_columns(started_cluster):
)
assert result == 1
# instance.query(
# f"""
# DROP TABLE IF EXISTS {TABLE_NAME};
# CREATE TABLE {TABLE_NAME} (a Int32, b String, c DateTime)
# ENGINE=DeltaLake('http://{started_cluster.minio_ip}:{started_cluster.minio_port}/{bucket}/{result_file}/', 'minio', 'minio123')"""
# )
# assert (
# int(
# instance.query(
# f"SELECT count() FROM {TABLE_NAME} WHERE c != toDateTime('2000/01/05')"
# )
# )
# == num_rows - 1
# )
# instance.query(f"SELECT a, b, c, FROM {TABLE_NAME}")
# assert False
instance.query(
f"""
DROP TABLE IF EXISTS {TABLE_NAME};
CREATE TABLE {TABLE_NAME} (a Nullable(Int32), b Nullable(String), c Nullable(Date32), d Nullable(Int32), e Nullable(Bool))
ENGINE=DeltaLake('http://{started_cluster.minio_ip}:{started_cluster.minio_port}/{bucket}/{result_file}/', 'minio', 'minio123')"""
)
assert (
"""1 test1 2000-01-01 1 false
2 test2 2000-01-02 2 false
3 test3 2000-01-03 3 false
4 test4 2000-01-04 4 false
5 test5 2000-01-05 5 false
6 test6 2000-01-06 6 false
7 test7 2000-01-07 7 false
8 test8 2000-01-08 8 false
9 test9 2000-01-09 9 false"""
== instance.query(f"SELECT * FROM {TABLE_NAME} ORDER BY b").strip()
)
assert (
int(
instance.query(
f"SELECT count() FROM {TABLE_NAME} WHERE c == toDateTime('2000/01/05')"
)
)
== 1
)
# Subset of columns should work.
instance.query(
f"""
DROP TABLE IF EXISTS {TABLE_NAME};
CREATE TABLE {TABLE_NAME} (b Nullable(String), c Nullable(Date32), d Nullable(Int32))
ENGINE=DeltaLake('http://{started_cluster.minio_ip}:{started_cluster.minio_port}/{bucket}/{result_file}/', 'minio', 'minio123')"""
)
assert (
"""test1 2000-01-01 1
test2 2000-01-02 2
test3 2000-01-03 3
test4 2000-01-04 4
test5 2000-01-05 5
test6 2000-01-06 6
test7 2000-01-07 7
test8 2000-01-08 8
test9 2000-01-09 9"""
== instance.query(f"SELECT * FROM {TABLE_NAME} ORDER BY b").strip()
)
for i in range(num_rows + 1, 2 * num_rows + 1):
data = [
(
i,
"test" + str(i),
datetime.strptime(f"2000-01-{i}", "%Y-%m-%d"),
i,
False,
)
]
df = spark.createDataFrame(data=data, schema=schema)
df.printSchema()
df.write.mode("append").format("delta").partitionBy(partition_columns).save(
f"/{TABLE_NAME}"
)
files = upload_directory(minio_client, bucket, f"/{TABLE_NAME}", "")
ok = False
for file in files:
if file.endswith("last_checkpoint"):
ok = True
assert ok
result = int(
instance.query(
f"""SELECT count()
FROM deltaLake('http://{started_cluster.minio_ip}:{started_cluster.minio_port}/{bucket}/{result_file}/', 'minio', 'minio123')
"""
)
)
assert result == num_rows * 2
assert (
"""1 test1 2000-01-01 1 false
2 test2 2000-01-02 2 false
3 test3 2000-01-03 3 false
4 test4 2000-01-04 4 false
5 test5 2000-01-05 5 false
6 test6 2000-01-06 6 false
7 test7 2000-01-07 7 false
8 test8 2000-01-08 8 false
9 test9 2000-01-09 9 false
10 test10 2000-01-10 10 false
11 test11 2000-01-11 11 false
12 test12 2000-01-12 12 false
13 test13 2000-01-13 13 false
14 test14 2000-01-14 14 false
15 test15 2000-01-15 15 false
16 test16 2000-01-16 16 false
17 test17 2000-01-17 17 false
18 test18 2000-01-18 18 false"""
== instance.query(
f"""
SELECT * FROM deltaLake('http://{started_cluster.minio_ip}:{started_cluster.minio_port}/{bucket}/{result_file}/', 'minio', 'minio123') ORDER BY c
"""
).strip()
)
assert (
int(
instance.query(
f"SELECT count() FROM {TABLE_NAME} WHERE c == toDateTime('2000/01/15')"
)
)
== 1
)

View File

@ -78,13 +78,13 @@ def wait_rabbitmq_to_start(rabbitmq_docker_id, cookie, timeout=180):
def kill_rabbitmq(rabbitmq_id):
p = subprocess.Popen(("docker", "stop", rabbitmq_id), stdout=subprocess.PIPE)
p.communicate()
p.wait(timeout=60)
return p.returncode == 0
def revive_rabbitmq(rabbitmq_id, cookie):
p = subprocess.Popen(("docker", "start", rabbitmq_id), stdout=subprocess.PIPE)
p.communicate()
p.wait(timeout=60)
wait_rabbitmq_to_start(rabbitmq_id, cookie)

View File

@ -1,12 +1,12 @@
<!-- Tests functions replaceRegexpAll and replaceRegexpOne with trivial patterns. These trigger internally a fallback to simple string replacement -->>
<!-- _materialize_ because the shortcut is only implemented for non-const haystack + const needle + const replacement strings -->>
<!-- Tests functions replaceRegexpAll and replaceRegexpOne with trivial patterns. These trigger internally a fallback to simple string replacement -->
<!-- _materialize_ because the shortcut is only implemented for non-const haystack + const needle + const replacement strings -->
<test>
<!-- trivial pattern -->>
<query>WITH 'Many years later as he faced the firing squad, Colonel Aureliano Buendia was to remember that distant afternoon when his father took him to discover ice.' AS s SELECT replaceRegexpAll(materialize(s), ' ', '\n') AS w FROM numbers(5000000) FORMAT Null</query>
<query>WITH 'Many years later as he faced the firing squad, Colonel Aureliano Buendia was to remember that distant afternoon when his father took him to discover ice.' AS s SELECT replaceRegexpOne(materialize(s), ' ', '\n') AS w FROM numbers(5000000) FORMAT Null</query>
<!-- trivial pattern -->
<query>WITH 'Many years later as he faced the firing squad, Colonel Aureliano Buendia was to remember that distant afternoon when his father took him to discover ice.' AS s SELECT replaceRegexpAll(materialize(s), ' ', '\n') AS w FROM numbers_mt(5000000) FORMAT Null</query>
<query>WITH 'Many years later as he faced the firing squad, Colonel Aureliano Buendia was to remember that distant afternoon when his father took him to discover ice.' AS s SELECT replaceRegexpOne(materialize(s), ' ', '\n') AS w FROM numbers_mt(5000000) FORMAT Null</query>
<!-- non-trivial patterns -->>
<!-- deliberately testing with fewer rows to keep runtimes reasonable -->>
<query>WITH 'Many years later as he faced the firing squad, Colonel Aureliano Buendia was to remember that distant afternoon when his father took him to discover ice.' AS s SELECT replaceRegexpAll(materialize(s), '\s+', '\\0\n') AS w FROM numbers(500000) FORMAT Null</query>
<query>WITH 'Many years later as he faced the firing squad, Colonel Aureliano Buendia was to remember that distant afternoon when his father took him to discover ice.' AS s SELECT replaceRegexpOne(materialize(s), '\s+', '\\0\n') AS w FROM numbers(500000) FORMAT Null</query>
<!-- non-trivial patterns -->
<!-- deliberately testing with fewer rows to keep runtimes reasonable -->
<query>WITH 'Many years later as he faced the firing squad, Colonel Aureliano Buendia was to remember that distant afternoon when his father took him to discover ice.' AS s SELECT replaceRegexpAll(materialize(s), '\s+', '\\0\n') AS w FROM numbers_mt(500000) FORMAT Null</query>
<query>WITH 'Many years later as he faced the firing squad, Colonel Aureliano Buendia was to remember that distant afternoon when his father took him to discover ice.' AS s SELECT replaceRegexpOne(materialize(s), '\s+', '\\0\n') AS w FROM numbers_mt(500000) FORMAT Null</query>
</test>

View File

@ -15,7 +15,7 @@ tmp_path=$(mktemp "$CURDIR/01268_procfs_metrics.XXXXXX")
trap 'rm -f $tmp_path' EXIT
truncate -s1025 "$tmp_path"
$CLICKHOUSE_LOCAL --profile-events-delay-ms=-1 --print-profile-events -q "SELECT * FROM file('$tmp_path', 'LineAsString') FORMAT Null" |& grep -m1 -F -o -e OSReadChars
$CLICKHOUSE_LOCAL --profile-events-delay-ms=-1 --print-profile-events --storage_file_read_method=pread -q "SELECT * FROM file('$tmp_path', 'LineAsString') FORMAT Null" |& grep -m1 -F -o -e OSReadChars
# NOTE: that OSCPUVirtualTimeMicroseconds is in microseconds, so 1e6 is not enough.
$CLICKHOUSE_LOCAL --profile-events-delay-ms=-1 --print-profile-events -q "SELECT * FROM numbers(1e8) FORMAT Null" |& grep -m1 -F -o -e OSCPUVirtualTimeMicroseconds
exit 0

View File

@ -10,7 +10,7 @@ insert into testX select number from numbers(10) settings
optimize_trivial_insert_select=0,
max_insert_threads=0; -- { serverError FUNCTION_THROW_IF_VALUE_IS_NON_ZERO }
system flush logs;
select arrayUniq(thread_ids) from system.query_log where
select peak_threads_usage from system.query_log where
current_database = currentDatabase() and
type != 'QueryStart' and
query like '%insert into testX %' and
@ -34,7 +34,7 @@ insert into testX select number from numbers(10) settings
optimize_trivial_insert_select=0,
max_insert_threads=16; -- { serverError FUNCTION_THROW_IF_VALUE_IS_NON_ZERO }
system flush logs;
select arrayUniq(thread_ids) from system.query_log where
select peak_threads_usage from system.query_log where
current_database = currentDatabase() and
type != 'QueryStart' and
query like '%insert into testX %' and
@ -58,7 +58,7 @@ insert into testX select number from numbers(10) settings
optimize_trivial_insert_select=1,
max_insert_threads=0; -- { serverError FUNCTION_THROW_IF_VALUE_IS_NON_ZERO }
system flush logs;
select arrayUniq(thread_ids) from system.query_log where
select peak_threads_usage from system.query_log where
current_database = currentDatabase() and
type != 'QueryStart' and
query like '%insert into testX %' and
@ -82,7 +82,7 @@ insert into testX select number from numbers(10) settings
optimize_trivial_insert_select=1,
max_insert_threads=16; -- { serverError FUNCTION_THROW_IF_VALUE_IS_NON_ZERO }
system flush logs;
select arrayUniq(thread_ids) from system.query_log where
select peak_threads_usage from system.query_log where
current_database = currentDatabase() and
type != 'QueryStart' and
query like '%insert into testX %' and
@ -106,7 +106,7 @@ insert into testX select number from numbers(10) settings
optimize_trivial_insert_select=0,
max_insert_threads=0; -- { serverError FUNCTION_THROW_IF_VALUE_IS_NON_ZERO }
system flush logs;
select arrayUniq(thread_ids) from system.query_log where
select peak_threads_usage from system.query_log where
current_database = currentDatabase() and
type != 'QueryStart' and
query like '%insert into testX %' and
@ -130,7 +130,7 @@ insert into testX select number from numbers(10) settings
optimize_trivial_insert_select=0,
max_insert_threads=16; -- { serverError FUNCTION_THROW_IF_VALUE_IS_NON_ZERO }
system flush logs;
select arrayUniq(thread_ids) from system.query_log where
select peak_threads_usage from system.query_log where
current_database = currentDatabase() and
type != 'QueryStart' and
query like '%insert into testX %' and
@ -154,7 +154,7 @@ insert into testX select number from numbers(10) settings
optimize_trivial_insert_select=1,
max_insert_threads=0; -- { serverError FUNCTION_THROW_IF_VALUE_IS_NON_ZERO }
system flush logs;
select arrayUniq(thread_ids) from system.query_log where
select peak_threads_usage from system.query_log where
current_database = currentDatabase() and
type != 'QueryStart' and
query like '%insert into testX %' and
@ -178,7 +178,7 @@ insert into testX select number from numbers(10) settings
optimize_trivial_insert_select=1,
max_insert_threads=16; -- { serverError FUNCTION_THROW_IF_VALUE_IS_NON_ZERO }
system flush logs;
select arrayUniq(thread_ids) from system.query_log where
select peak_threads_usage from system.query_log where
current_database = currentDatabase() and
type != 'QueryStart' and
query like '%insert into testX %' and

View File

@ -28,7 +28,7 @@ insert into testX select number from numbers(10) settings
optimize_trivial_insert_select={{ optimize_trivial_insert_select }},
max_insert_threads={{ max_insert_threads }}; -- { serverError FUNCTION_THROW_IF_VALUE_IS_NON_ZERO }
system flush logs;
select arrayUniq(thread_ids) from system.query_log where
select peak_threads_usage from system.query_log where
current_database = currentDatabase() and
type != 'QueryStart' and
query like '%insert into testX %' and

View File

@ -14,11 +14,11 @@ set log_queries = 1;
select x from table_01323_many_parts limit 10 format Null;
system flush logs;
select arrayUniq(thread_ids) <= 4 from system.query_log where current_database = currentDatabase() AND event_date >= today() - 1 and query ilike '%select x from table_01323_many_parts%' and query not like '%system.query_log%' and type = 'QueryFinish' order by query_start_time desc limit 1;
select peak_threads_usage <= 4 from system.query_log where current_database = currentDatabase() AND event_date >= today() - 1 and query ilike '%select x from table_01323_many_parts%' and query not like '%system.query_log%' and type = 'QueryFinish' order by query_start_time desc limit 1;
select x from table_01323_many_parts order by x limit 10 format Null;
system flush logs;
select arrayUniq(thread_ids) <= 36 from system.query_log where current_database = currentDatabase() AND event_date >= today() - 1 and query ilike '%select x from table_01323_many_parts order by x%' and query not like '%system.query_log%' and type = 'QueryFinish' order by query_start_time desc limit 1;
select peak_threads_usage <= 36 from system.query_log where current_database = currentDatabase() AND event_date >= today() - 1 and query ilike '%select x from table_01323_many_parts order by x%' and query not like '%system.query_log%' and type = 'QueryFinish' order by query_start_time desc limit 1;
drop table if exists table_01323_many_parts;

View File

@ -5,8 +5,6 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
# shellcheck source=../shell_config.sh
. "$CURDIR"/../shell_config.sh
USER_FILES_PATH=$($CLICKHOUSE_CLIENT_BINARY --query "select _path,_file from file('nonexist.txt', 'CSV', 'val1 char')" 2>&1 | grep Exception | awk '{gsub("/nonexist.txt","",$9); print $9}')
FILE_NAME=test_02240.data
DATA_FILE=${USER_FILES_PATH:?}/$FILE_NAME

View File

@ -10,7 +10,7 @@ create materialized view t_mv Engine = Null AS select now() as ts, max(a) from t
insert into t select * from numbers_mt(10e6) settings max_threads = 16, max_insert_threads=16, max_block_size=100000;
system flush logs;
select arrayUniq(thread_ids)>=16 from system.query_log where
select peak_threads_usage>=16 from system.query_log where
event_date >= yesterday() and
current_database = currentDatabase() and
type = 'QueryFinish' and

View File

@ -17,3 +17,27 @@ ENGINE = MergeTree ORDER BY conversation;
INSERT INTO t2(conversation) VALUES (42);
select * from t2;
drop table t1;
INSERT INTO t2(conversation) VALUES (42); -- { serverError UNKNOWN_TABLE }
drop table t2;
CREATE TABLE t2 (
`conversation` UInt64,
CONSTRAINT constraint_conversation CHECK conversation IN (SELECT id FROM t1)
)
ENGINE = MergeTree ORDER BY conversation;
INSERT INTO t2(conversation) VALUES (42); -- { serverError UNKNOWN_TABLE }
CREATE TABLE t1 (
`id` UInt64
)
ENGINE = MergeTree ORDER BY id;
INSERT INTO t1(id) VALUES (42);
INSERT INTO t2(conversation) VALUES (42);
select * from t2;

View File

@ -36,19 +36,10 @@ SELECT * FROM t1 JOIN t2 ON (t1.x == t2.x AND ((t2.x IS NOT NULL) AND (t1.x IS N
3 3 3 33
\N \N \N \N
-- aliases defined in the join condition are valid
SELECT *, e, e2 FROM t1 FULL JOIN t2 ON ( ( ((t1.x == t2.x) AS e) AND ((t2.x IS NOT NULL) AND (t1.x IS NOT NULL)) ) OR ( (t2.x IS NULL) AND (t1.x IS NULL) ) AS e2 ) ORDER BY t1.x NULLS LAST, t2.x NULLS LAST;
1 42 \N \N \N 0
2 2 2 2 1 1
3 3 3 33 1 1
\N \N 4 42 \N 0
\N \N \N \N \N 1
SELECT *, e, e2 FROM t1 FULL JOIN t2 ON ( ( ((t1.x == t2.x) AS e) AND ((t2.x IS NOT NULL) AND (t1.x IS NOT NULL)) ) AS e2 ) ORDER BY t1.x NULLS LAST, t2.x NULLS LAST;
1 42 \N \N \N 0
2 2 2 2 1 1
3 3 3 33 1 1
\N \N 4 42 \N 0
\N \N \N \N \N 0
\N \N \N \N \N 0
-- FIXME(@vdimir) broken query formatting for the following queries:
-- SELECT *, e, e2 FROM t1 FULL JOIN t2 ON ( ( ((t1.x == t2.x) AS e) AND ((t2.x IS NOT NULL) AND (t1.x IS NOT NULL)) ) OR ( (t2.x IS NULL) AND (t1.x IS NULL) ) AS e2 ) ORDER BY t1.x NULLS LAST, t2.x NULLS LAST;
-- SELECT *, e, e2 FROM t1 FULL JOIN t2 ON ( ( ((t1.x == t2.x) AS e) AND ((t2.x IS NOT NULL) AND (t1.x IS NOT NULL)) ) AS e2 ) ORDER BY t1.x NULLS LAST, t2.x NULLS LAST;
-- check for non-nullable columns for which `is null` is replaced with constant
SELECT * FROM t1n as t1 JOIN t2n as t2 ON (t1.x == t2.x AND ((t2.x IS NOT NULL) AND (t1.x IS NOT NULL)) ) OR ( (t2.x IS NULL) AND (t1.x IS NULL) ) ORDER BY t1.x NULLS LAST;
2 2 2 2

View File

@ -36,8 +36,9 @@ SELECT x = y OR (x IS NULL AND y IS NULL) FROM t1 ORDER BY x NULLS LAST;
SELECT * FROM t1 JOIN t2 ON (t1.x == t2.x AND ((t2.x IS NOT NULL) AND (t1.x IS NOT NULL)) ) OR ( (t2.x IS NULL) AND (t1.x IS NULL) ) ORDER BY t1.x NULLS LAST;
-- aliases defined in the join condition are valid
SELECT *, e, e2 FROM t1 FULL JOIN t2 ON ( ( ((t1.x == t2.x) AS e) AND ((t2.x IS NOT NULL) AND (t1.x IS NOT NULL)) ) OR ( (t2.x IS NULL) AND (t1.x IS NULL) ) AS e2 ) ORDER BY t1.x NULLS LAST, t2.x NULLS LAST;
SELECT *, e, e2 FROM t1 FULL JOIN t2 ON ( ( ((t1.x == t2.x) AS e) AND ((t2.x IS NOT NULL) AND (t1.x IS NOT NULL)) ) AS e2 ) ORDER BY t1.x NULLS LAST, t2.x NULLS LAST;
-- FIXME(@vdimir) broken query formatting for the following queries:
-- SELECT *, e, e2 FROM t1 FULL JOIN t2 ON ( ( ((t1.x == t2.x) AS e) AND ((t2.x IS NOT NULL) AND (t1.x IS NOT NULL)) ) OR ( (t2.x IS NULL) AND (t1.x IS NULL) ) AS e2 ) ORDER BY t1.x NULLS LAST, t2.x NULLS LAST;
-- SELECT *, e, e2 FROM t1 FULL JOIN t2 ON ( ( ((t1.x == t2.x) AS e) AND ((t2.x IS NOT NULL) AND (t1.x IS NOT NULL)) ) AS e2 ) ORDER BY t1.x NULLS LAST, t2.x NULLS LAST;
-- check for non-nullable columns for which `is null` is replaced with constant
SELECT * FROM t1n as t1 JOIN t2n as t2 ON (t1.x == t2.x AND ((t2.x IS NOT NULL) AND (t1.x IS NOT NULL)) ) OR ( (t2.x IS NULL) AND (t1.x IS NULL) ) ORDER BY t1.x NULLS LAST;

View File

@ -0,0 +1,50 @@
-- Single partition by function
0
2
-- Nested partition by function
1
2
1
1
-- Nested partition by function, LowCardinality
1
2
1
1
-- Nested partition by function, Nullable
1
2
1
1
-- Nested partition by function, LowCardinality + Nullable
1
2
1
1
-- Non-safe cast
2
2
-- Multiple partition columns
1
1
1
2
-- LowCardinality set
1
1
-- Nullable set
1
1
-- LowCardinality + Nullable set
1
1
-- Not failing with date parsing functions
1
0
-- Pruning + not failing with nested date parsing functions
1
2
0
-- Empty transform functions
2
1

View File

@ -0,0 +1,258 @@
SELECT '-- Single partition by function';
DROP TABLE IF EXISTS 03173_single_function;
CREATE TABLE 03173_single_function (
dt Date,
)
ENGINE = MergeTree
ORDER BY tuple()
PARTITION BY toMonth(dt);
INSERT INTO 03173_single_function
SELECT toDate('2000-01-01') + 10 * number FROM numbers(50)
UNION ALL
SELECT toDate('2100-01-01') + 10 * number FROM numbers(50);
OPTIMIZE TABLE 03173_single_function FINAL;
SELECT count() FROM 03173_single_function WHERE dt IN ('2024-01-20', '2024-05-25') SETTINGS log_comment='03173_single_function';
SYSTEM FLUSH LOGS;
SELECT ProfileEvents['SelectedParts'] FROM system.query_log WHERE type = 'QueryFinish' AND current_database = currentDatabase() AND log_comment = '03173_single_function';
DROP TABLE IF EXISTS 03173_single_function;
SELECT '-- Nested partition by function';
DROP TABLE IF EXISTS 03173_nested_function;
CREATE TABLE 03173_nested_function(
id Int32,
)
ENGINE = MergeTree
ORDER BY tuple()
PARTITION BY xxHash32(id) % 3;
INSERT INTO 03173_nested_function SELECT number FROM numbers(100);
OPTIMIZE TABLE 03173_nested_function FINAL;
SELECT count() FROM 03173_nested_function WHERE id IN (10) SETTINGS log_comment='03173_nested_function';
SELECT count() FROM 03173_nested_function WHERE xxHash32(id) IN (2158931063, 1449383981) SETTINGS log_comment='03173_nested_function_subexpr';
SYSTEM FLUSH LOGS;
SELECT ProfileEvents['SelectedParts'] FROM system.query_log WHERE type = 'QueryFinish' AND current_database = currentDatabase() AND log_comment = '03173_nested_function';
SELECT ProfileEvents['SelectedParts'] FROM system.query_log WHERE type = 'QueryFinish' AND current_database = currentDatabase() AND log_comment = '03173_nested_function_subexpr';
DROP TABLE IF EXISTS 03173_nested_function;
SELECT '-- Nested partition by function, LowCardinality';
SET allow_suspicious_low_cardinality_types = 1;
DROP TABLE IF EXISTS 03173_nested_function_lc;
CREATE TABLE 03173_nested_function_lc(
id LowCardinality(Int32),
)
ENGINE = MergeTree
ORDER BY tuple()
PARTITION BY xxHash32(id) % 3;
INSERT INTO 03173_nested_function_lc SELECT number FROM numbers(100);
OPTIMIZE TABLE 03173_nested_function_lc FINAL;
SELECT count() FROM 03173_nested_function_lc WHERE id IN (10) SETTINGS log_comment='03173_nested_function_lc';
SELECT count() FROM 03173_nested_function_lc WHERE xxHash32(id) IN (2158931063, 1449383981) SETTINGS log_comment='03173_nested_function_subexpr_lc';
SYSTEM FLUSH LOGS;
SELECT ProfileEvents['SelectedParts'] FROM system.query_log WHERE type = 'QueryFinish' AND current_database = currentDatabase() AND log_comment = '03173_nested_function_lc';
SELECT ProfileEvents['SelectedParts'] FROM system.query_log WHERE type = 'QueryFinish' AND current_database = currentDatabase() AND log_comment = '03173_nested_function_subexpr_lc';
DROP TABLE IF EXISTS 03173_nested_function_lc;
SELECT '-- Nested partition by function, Nullable';
DROP TABLE IF EXISTS 03173_nested_function_null;
CREATE TABLE 03173_nested_function_null(
id Nullable(Int32),
)
ENGINE = MergeTree
ORDER BY tuple()
PARTITION BY xxHash32(id) % 3
SETTINGS allow_nullable_key=1;
INSERT INTO 03173_nested_function_null SELECT number FROM numbers(100);
OPTIMIZE TABLE 03173_nested_function_null FINAL;
SELECT count() FROM 03173_nested_function_null WHERE id IN (10) SETTINGS log_comment='03173_nested_function_null';
SELECT count() FROM 03173_nested_function_null WHERE xxHash32(id) IN (2158931063, 1449383981) SETTINGS log_comment='03173_nested_function_subexpr_null';
SYSTEM FLUSH LOGS;
SELECT ProfileEvents['SelectedParts'] FROM system.query_log WHERE type = 'QueryFinish' AND current_database = currentDatabase() AND log_comment = '03173_nested_function_null';
SELECT ProfileEvents['SelectedParts'] FROM system.query_log WHERE type = 'QueryFinish' AND current_database = currentDatabase() AND log_comment = '03173_nested_function_subexpr_null';
DROP TABLE IF EXISTS 03173_nested_function_null;
SELECT '-- Nested partition by function, LowCardinality + Nullable';
DROP TABLE IF EXISTS 03173_nested_function_lc_null;
SET allow_suspicious_low_cardinality_types = 1;
CREATE TABLE 03173_nested_function_lc_null(
id LowCardinality(Nullable(Int32)),
)
ENGINE = MergeTree
ORDER BY tuple()
PARTITION BY xxHash32(id) % 3
SETTINGS allow_nullable_key=1;
INSERT INTO 03173_nested_function_lc_null SELECT number FROM numbers(100);
OPTIMIZE TABLE 03173_nested_function_lc_null FINAL;
SELECT count() FROM 03173_nested_function_lc_null WHERE id IN (10) SETTINGS log_comment='03173_nested_function_lc_null';
SELECT count() FROM 03173_nested_function_lc_null WHERE xxHash32(id) IN (2158931063, 1449383981) SETTINGS log_comment='03173_nested_function_subexpr_lc_null';
SYSTEM FLUSH LOGS;
SELECT ProfileEvents['SelectedParts'] FROM system.query_log WHERE type = 'QueryFinish' AND current_database = currentDatabase() AND log_comment = '03173_nested_function_lc_null';
SELECT ProfileEvents['SelectedParts'] FROM system.query_log WHERE type = 'QueryFinish' AND current_database = currentDatabase() AND log_comment = '03173_nested_function_subexpr_lc_null';
DROP TABLE IF EXISTS 03173_nested_function_lc_null;
SELECT '-- Non-safe cast';
DROP TABLE IF EXISTS 03173_nonsafe_cast;
CREATE TABLE 03173_nonsafe_cast(
id Int64,
)
ENGINE = MergeTree
ORDER BY tuple()
PARTITION BY xxHash32(id) % 3;
INSERT INTO 03173_nonsafe_cast SELECT number FROM numbers(100);
OPTIMIZE TABLE 03173_nonsafe_cast FINAL;
SELECT count() FROM 03173_nonsafe_cast WHERE id IN (SELECT '50' UNION ALL SELECT '99') SETTINGS log_comment='03173_nonsafe_cast';
SYSTEM FLUSH LOGS;
SELECT ProfileEvents['SelectedParts'] FROM system.query_log WHERE type = 'QueryFinish' AND current_database = currentDatabase() AND log_comment = '03173_nonsafe_cast';
DROP TABLE IF EXISTS 03173_nonsafe_cast;
SELECT '-- Multiple partition columns';
DROP TABLE IF EXISTS 03173_multiple_partition_cols;
CREATE TABLE 03173_multiple_partition_cols (
key1 Int32,
key2 Int32
)
ENGINE = MergeTree
ORDER BY tuple()
PARTITION BY (intDiv(key1, 50), xxHash32(key2) % 3);
INSERT INTO 03173_multiple_partition_cols SELECT number, number FROM numbers(100);
OPTIMIZE TABLE 03173_multiple_partition_cols FINAL;
SELECT count() FROM 03173_multiple_partition_cols WHERE key2 IN (4) SETTINGS log_comment='03173_multiple_columns';
SELECT count() FROM 03173_multiple_partition_cols WHERE xxHash32(key2) IN (4251411170) SETTINGS log_comment='03173_multiple_columns_subexpr';
SYSTEM FLUSH LOGS;
SELECT ProfileEvents['SelectedParts'] FROM system.query_log WHERE type = 'QueryFinish' AND current_database = currentDatabase() AND log_comment = '03173_multiple_columns';
-- Due to xxHash32() in WHERE condition, MinMax is unable to eliminate any parts,
-- so partition pruning leave two parts (for key1 // 50 = 0 and key1 // 50 = 1)
SELECT ProfileEvents['SelectedParts'] FROM system.query_log WHERE type = 'QueryFinish' AND current_database = currentDatabase() AND log_comment = '03173_multiple_columns_subexpr';
-- Preparing base table for filtering by LowCardinality/Nullable sets
DROP TABLE IF EXISTS 03173_base_data_source;
CREATE TABLE 03173_base_data_source(
id Int32,
)
ENGINE = MergeTree
ORDER BY tuple()
PARTITION BY xxHash32(id) % 3;
INSERT INTO 03173_base_data_source SELECT number FROM numbers(100);
OPTIMIZE TABLE 03173_base_data_source FINAL;
SELECT '-- LowCardinality set';
SET allow_suspicious_low_cardinality_types = 1;
DROP TABLE IF EXISTS 03173_low_cardinality_set;
CREATE TABLE 03173_low_cardinality_set (id LowCardinality(Int32)) ENGINE=Memory AS SELECT 10;
SELECT count() FROM 03173_base_data_source WHERE id IN (SELECT id FROM 03173_low_cardinality_set) SETTINGS log_comment='03173_low_cardinality_set';
SYSTEM FLUSH LOGS;
SELECT ProfileEvents['SelectedParts'] FROM system.query_log WHERE type = 'QueryFinish' AND current_database = currentDatabase() AND log_comment = '03173_low_cardinality_set';
DROP TABLE IF EXISTS 03173_low_cardinality_set;
SELECT '-- Nullable set';
DROP TABLE IF EXISTS 03173_nullable_set;
CREATE TABLE 03173_nullable_set (id Nullable(Int32)) ENGINE=Memory AS SELECT 10;
SELECT count() FROM 03173_base_data_source WHERE id IN (SELECT id FROM 03173_nullable_set) SETTINGS log_comment='03173_nullable_set';
SYSTEM FLUSH LOGS;
SELECT ProfileEvents['SelectedParts'] FROM system.query_log WHERE type = 'QueryFinish' AND current_database = currentDatabase() AND log_comment = '03173_nullable_set';
DROP TABLE IF EXISTS 03173_nullable_set;
SELECT '-- LowCardinality + Nullable set';
DROP TABLE IF EXISTS 03173_lc_nullable_set;
CREATE TABLE 03173_lc_nullable_set (id LowCardinality(Nullable(Int32))) ENGINE=Memory AS SELECT 10 UNION ALL SELECT NULL;
SELECT count() FROM 03173_base_data_source WHERE id IN (SELECT id FROM 03173_lc_nullable_set) SETTINGS log_comment='03173_lc_nullable_set';
SYSTEM FLUSH LOGS;
SELECT ProfileEvents['SelectedParts'] FROM system.query_log WHERE type = 'QueryFinish' AND current_database = currentDatabase() AND log_comment = '03173_lc_nullable_set';
DROP TABLE IF EXISTS 03173_lc_nullable_set;
SELECT '-- Not failing with date parsing functions';
DROP TABLE IF EXISTS 03173_date_parsing;
CREATE TABLE 03173_date_parsing (
id String
)
ENGINE=MergeTree
ORDER BY tuple()
PARTITION BY toDate(id);
INSERT INTO 03173_date_parsing
SELECT toString(toDate('2023-04-01') + number)
FROM numbers(20);
SELECT count() FROM 03173_date_parsing WHERE id IN ('2023-04-02', '2023-05-02');
SELECT count() FROM 03173_date_parsing WHERE id IN ('not a date');
DROP TABLE IF EXISTS 03173_date_parsing;
SELECT '-- Pruning + not failing with nested date parsing functions';
DROP TABLE IF EXISTS 03173_nested_date_parsing;
CREATE TABLE 03173_nested_date_parsing (
id String
)
ENGINE=MergeTree
ORDER BY tuple()
PARTITION BY toMonth(toDate(id));
INSERT INTO 03173_nested_date_parsing
SELECT toString(toDate('2000-01-01') + 10 * number) FROM numbers(50)
UNION ALL
SELECT toString(toDate('2100-01-01') + 10 * number) FROM numbers(50);
SELECT count() FROM 03173_nested_date_parsing WHERE id IN ('2000-01-21', '2023-05-02') SETTINGS log_comment='03173_nested_date_parsing';
SYSTEM FLUSH LOGS;
SELECT ProfileEvents['SelectedParts'] FROM system.query_log WHERE type = 'QueryFinish' AND current_database = currentDatabase() AND log_comment = '03173_nested_date_parsing';
SELECT count() FROM 03173_nested_date_parsing WHERE id IN ('not a date');
DROP TABLE IF EXISTS 03173_nested_date_parsing;
SELECT '-- Empty transform functions';
DROP TABLE IF EXISTS 03173_empty_transform;
CREATE TABLE 03173_empty_transform(
id Int32,
)
ENGINE = MergeTree
ORDER BY tuple()
PARTITION BY xxHash32(id) % 3;
INSERT INTO 03173_empty_transform SELECT number FROM numbers(6);
OPTIMIZE TABLE 03173_empty_transform FINAL;
SELECT id FROM 03173_empty_transform WHERE xxHash32(id) % 3 IN (xxHash32(2::Int32) % 3) SETTINGS log_comment='03173_empty_transform';
SYSTEM FLUSH LOGS;
SELECT ProfileEvents['SelectedParts'] FROM system.query_log WHERE type = 'QueryFinish' AND current_database = currentDatabase() AND log_comment = '03173_empty_transform';
DROP TABLE IF EXISTS 03173_empty_transform;

Some files were not shown because too many files have changed in this diff Show More