diff --git a/.clang-tidy b/.clang-tidy
index 4dd8b9859c9..7241c372319 100644
--- a/.clang-tidy
+++ b/.clang-tidy
@@ -23,9 +23,12 @@ Checks: '*,
-bugprone-implicit-widening-of-multiplication-result,
-bugprone-narrowing-conversions,
-bugprone-not-null-terminated-result,
+ -bugprone-reserved-identifier,
-bugprone-unchecked-optional-access,
-cert-dcl16-c,
+ -cert-dcl37-c,
+ -cert-dcl51-cpp,
-cert-err58-cpp,
-cert-msc32-c,
-cert-msc51-cpp,
@@ -129,6 +132,7 @@ Checks: '*,
-readability-function-cognitive-complexity,
-readability-function-size,
-readability-identifier-length,
+ -readability-identifier-naming,
-readability-implicit-bool-conversion,
-readability-isolate-declaration,
-readability-magic-numbers,
@@ -158,56 +162,28 @@ Checks: '*,
WarningsAsErrors: '*'
-# TODO: use dictionary syntax for CheckOptions when minimum clang-tidy level rose to 15
-# some-check.SomeOption: 'some value'
-# instead of
-# - key: some-check.SomeOption
-# value: 'some value'
CheckOptions:
- - key: readability-identifier-naming.ClassCase
- value: CamelCase
- - key: readability-identifier-naming.EnumCase
- value: CamelCase
- - key: readability-identifier-naming.LocalVariableCase
- value: lower_case
- - key: readability-identifier-naming.StaticConstantCase
- value: aNy_CasE
- - key: readability-identifier-naming.MemberCase
- value: lower_case
- - key: readability-identifier-naming.PrivateMemberPrefix
- value: ''
- - key: readability-identifier-naming.ProtectedMemberPrefix
- value: ''
- - key: readability-identifier-naming.PublicMemberCase
- value: lower_case
- - key: readability-identifier-naming.MethodCase
- value: camelBack
- - key: readability-identifier-naming.PrivateMethodPrefix
- value: ''
- - key: readability-identifier-naming.ProtectedMethodPrefix
- value: ''
- - key: readability-identifier-naming.ParameterPackCase
- value: lower_case
- - key: readability-identifier-naming.StructCase
- value: CamelCase
- - key: readability-identifier-naming.TemplateTemplateParameterCase
- value: CamelCase
- - key: readability-identifier-naming.TemplateUsingCase
- value: lower_case
- - key: readability-identifier-naming.TypeTemplateParameterCase
- value: CamelCase
- - key: readability-identifier-naming.TypedefCase
- value: CamelCase
- - key: readability-identifier-naming.UnionCase
- value: CamelCase
- - key: readability-identifier-naming.UsingCase
- value: CamelCase
- - key: modernize-loop-convert.UseCxx20ReverseRanges
- value: false
- - key: performance-move-const-arg.CheckTriviallyCopyableMove
- value: false
- # Workaround clang-tidy bug: https://github.com/llvm/llvm-project/issues/46097
- - key: readability-identifier-naming.TypeTemplateParameterIgnoredRegexp
- value: expr-type
- - key: cppcoreguidelines-avoid-do-while.IgnoreMacros
- value: true
+ readability-identifier-naming.ClassCase: CamelCase
+ readability-identifier-naming.EnumCase: CamelCase
+ readability-identifier-naming.LocalVariableCase: lower_case
+ readability-identifier-naming.StaticConstantCase: aNy_CasE
+ readability-identifier-naming.MemberCase: lower_case
+ readability-identifier-naming.PrivateMemberPrefix: ''
+ readability-identifier-naming.ProtectedMemberPrefix: ''
+ readability-identifier-naming.PublicMemberCase: lower_case
+ readability-identifier-naming.MethodCase: camelBack
+ readability-identifier-naming.PrivateMethodPrefix: ''
+ readability-identifier-naming.ProtectedMethodPrefix: ''
+ readability-identifier-naming.ParameterPackCase: lower_case
+ readability-identifier-naming.StructCase: CamelCase
+ readability-identifier-naming.TemplateTemplateParameterCase: CamelCase
+ readability-identifier-naming.TemplateUsingCase: lower_case
+ readability-identifier-naming.TypeTemplateParameterCase: CamelCase
+ readability-identifier-naming.TypedefCase: CamelCase
+ readability-identifier-naming.UnionCase: CamelCase
+ readability-identifier-naming.UsingCase: CamelCase
+ modernize-loop-convert.UseCxx20ReverseRanges: false
+ performance-move-const-arg.CheckTriviallyCopyableMove: false
+ # Workaround clang-tidy bug: https://github.com/llvm/llvm-project/issues/46097
+ readability-identifier-naming.TypeTemplateParameterIgnoredRegexp: expr-type
+ cppcoreguidelines-avoid-do-while.IgnoreMacros: true
diff --git a/CMakeLists.txt b/CMakeLists.txt
index cbb666b81c3..59b38e7763f 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -184,26 +184,12 @@ if (OS_DARWIN)
set (ENABLE_CURL_BUILD OFF)
endif ()
-# Ignored if `lld` is used
-option(ADD_GDB_INDEX_FOR_GOLD "Add .gdb-index to resulting binaries for gold linker.")
-
if (NOT CMAKE_BUILD_TYPE_UC STREQUAL "RELEASE")
# Can be lld or ld-lld or lld-13 or /path/to/lld.
if (LINKER_NAME MATCHES "lld" AND OS_LINUX)
set (CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -Wl,--gdb-index")
set (CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} -Wl,--gdb-index")
message (STATUS "Adding .gdb-index via --gdb-index linker option.")
- # we use another tool for gdb-index, because gold linker removes section .debug_aranges, which used inside clickhouse stacktraces
- # http://sourceware-org.1504.n7.nabble.com/gold-No-debug-aranges-section-when-linking-with-gdb-index-td540965.html#a556932
- elseif (LINKER_NAME MATCHES "gold$" AND ADD_GDB_INDEX_FOR_GOLD)
- find_program (GDB_ADD_INDEX_EXE NAMES "gdb-add-index" DOC "Path to gdb-add-index executable")
- if (NOT GDB_ADD_INDEX_EXE)
- set (USE_GDB_ADD_INDEX 0)
- message (WARNING "Cannot add gdb index to binaries, because gold linker is used, but gdb-add-index executable not found.")
- else()
- set (USE_GDB_ADD_INDEX 1)
- message (STATUS "gdb-add-index found: ${GDB_ADD_INDEX_EXE}")
- endif()
endif ()
endif()
@@ -301,12 +287,12 @@ if (ENABLE_BUILD_PROFILING)
endif ()
endif ()
-set (CMAKE_CXX_STANDARD 20)
-set (CMAKE_CXX_EXTENSIONS ON) # Same as gnu++2a (ON) vs c++2a (OFF): https://cmake.org/cmake/help/latest/prop_tgt/CXX_EXTENSIONS.html
+set (CMAKE_CXX_STANDARD 23)
+set (CMAKE_CXX_EXTENSIONS OFF)
set (CMAKE_CXX_STANDARD_REQUIRED ON)
set (CMAKE_C_STANDARD 11)
-set (CMAKE_C_EXTENSIONS ON)
+set (CMAKE_C_EXTENSIONS ON) # required by most contribs written in C
set (CMAKE_C_STANDARD_REQUIRED ON)
if (COMPILER_GCC OR COMPILER_CLANG)
diff --git a/base/base/CMakeLists.txt b/base/base/CMakeLists.txt
index 64785d575c5..8ab3c8a0711 100644
--- a/base/base/CMakeLists.txt
+++ b/base/base/CMakeLists.txt
@@ -2,6 +2,10 @@ if (USE_CLANG_TIDY)
set (CMAKE_CXX_CLANG_TIDY "${CLANG_TIDY_PATH}")
endif ()
+# TODO: Remove this. We like to compile with C++23 (set by top-level CMakeLists) but Clang crashes with our libcxx
+# when instantiated from JSON.cpp. Try again when libcxx(abi) and Clang are upgraded to 16.
+set (CMAKE_CXX_STANDARD 20)
+
set (SRCS
argsToConfig.cpp
coverage.cpp
diff --git a/base/poco/Data/ODBC/include/Poco/Data/ODBC/Extractor.h b/base/poco/Data/ODBC/include/Poco/Data/ODBC/Extractor.h
index 82e2f895638..3914f33df76 100644
--- a/base/poco/Data/ODBC/include/Poco/Data/ODBC/Extractor.h
+++ b/base/poco/Data/ODBC/include/Poco/Data/ODBC/Extractor.h
@@ -466,7 +466,7 @@ namespace Data
bool extractManualImpl(std::size_t pos, T & val, SQLSMALLINT cType)
{
SQLRETURN rc = 0;
- T value = (T)0;
+ T value;
resizeLengths(pos);
diff --git a/base/poco/Foundation/src/Message.cpp b/base/poco/Foundation/src/Message.cpp
index 6e9076579c6..663c96e47a2 100644
--- a/base/poco/Foundation/src/Message.cpp
+++ b/base/poco/Foundation/src/Message.cpp
@@ -27,8 +27,7 @@ Message::Message():
_tid(0),
_file(0),
_line(0),
- _pMap(0),
- _fmt_str(0)
+ _pMap(0)
{
init();
}
diff --git a/cmake/tools.cmake b/cmake/tools.cmake
index 4d4d741cc3a..4e1954f27f7 100644
--- a/cmake/tools.cmake
+++ b/cmake/tools.cmake
@@ -50,15 +50,18 @@ endif ()
string (REGEX MATCHALL "[0-9]+" COMPILER_VERSION_LIST ${CMAKE_CXX_COMPILER_VERSION})
list (GET COMPILER_VERSION_LIST 0 COMPILER_VERSION_MAJOR)
-# Example values: `lld-10`, `gold`.
+# Example values: `lld-10`
option (LINKER_NAME "Linker name or full path")
+if (LINKER_NAME MATCHES "gold")
+ message (FATAL_ERROR "Linking with gold is unsupported. Please use lld.")
+endif ()
+
# s390x doesnt support lld
if (NOT ARCH_S390X)
if (NOT LINKER_NAME)
if (COMPILER_GCC)
find_program (LLD_PATH NAMES "ld.lld")
- find_program (GOLD_PATH NAMES "ld.gold")
elseif (COMPILER_CLANG)
# llvm lld is a generic driver.
# Invoke ld.lld (Unix), ld64.lld (macOS), lld-link (Windows), wasm-ld (WebAssembly) instead
@@ -67,13 +70,11 @@ if (NOT ARCH_S390X)
elseif (OS_DARWIN)
find_program (LLD_PATH NAMES "ld64.lld-${COMPILER_VERSION_MAJOR}" "ld64.lld")
endif ()
- find_program (GOLD_PATH NAMES "ld.gold" "gold")
endif ()
endif()
endif()
if ((OS_LINUX OR OS_DARWIN) AND NOT LINKER_NAME)
- # prefer lld linker over gold or ld on linux and macos
if (LLD_PATH)
if (COMPILER_GCC)
# GCC driver requires one of supported linker names like "lld".
@@ -83,17 +84,6 @@ if ((OS_LINUX OR OS_DARWIN) AND NOT LINKER_NAME)
set (LINKER_NAME ${LLD_PATH})
endif ()
endif ()
-
- if (NOT LINKER_NAME)
- if (GOLD_PATH)
- message (FATAL_ERROR "Linking with gold is unsupported. Please use lld.")
- if (COMPILER_GCC)
- set (LINKER_NAME "gold")
- else ()
- set (LINKER_NAME ${GOLD_PATH})
- endif ()
- endif ()
- endif ()
endif ()
# TODO: allow different linker on != OS_LINUX
diff --git a/contrib/grpc-cmake/CMakeLists.txt b/contrib/grpc-cmake/CMakeLists.txt
index b1ed7e464b6..b4cf0ad5e66 100644
--- a/contrib/grpc-cmake/CMakeLists.txt
+++ b/contrib/grpc-cmake/CMakeLists.txt
@@ -48,6 +48,9 @@ set(gRPC_ABSL_PROVIDER "clickhouse" CACHE STRING "" FORCE)
# We don't want to build C# extensions.
set(gRPC_BUILD_CSHARP_EXT OFF)
+# TODO: Remove this. We generally like to compile with C++23 but grpc isn't ready yet.
+set (CMAKE_CXX_STANDARD 20)
+
set(_gRPC_CARES_LIBRARIES ch_contrib::c-ares)
set(gRPC_CARES_PROVIDER "clickhouse" CACHE STRING "" FORCE)
add_subdirectory("${_gRPC_SOURCE_DIR}" "${_gRPC_BINARY_DIR}")
diff --git a/contrib/krb5 b/contrib/krb5
index f8262a1b548..9453aec0d50 160000
--- a/contrib/krb5
+++ b/contrib/krb5
@@ -1 +1 @@
-Subproject commit f8262a1b548eb29d97e059260042036255d07f8d
+Subproject commit 9453aec0d50e5aff9b189051611b321b40935d02
diff --git a/contrib/krb5-cmake/CMakeLists.txt b/contrib/krb5-cmake/CMakeLists.txt
index ceaa270ad85..93b90c15201 100644
--- a/contrib/krb5-cmake/CMakeLists.txt
+++ b/contrib/krb5-cmake/CMakeLists.txt
@@ -160,6 +160,8 @@ set(ALL_SRCS
# "${KRB5_SOURCE_DIR}/lib/gssapi/spnego/negoex_trace.c"
+ "${KRB5_SOURCE_DIR}/lib/crypto/builtin/kdf.c"
+ "${KRB5_SOURCE_DIR}/lib/crypto/builtin/cmac.c"
"${KRB5_SOURCE_DIR}/lib/crypto/krb/prng.c"
"${KRB5_SOURCE_DIR}/lib/crypto/krb/enc_dk_cmac.c"
# "${KRB5_SOURCE_DIR}/lib/crypto/krb/crc32.c"
@@ -183,7 +185,6 @@ set(ALL_SRCS
"${KRB5_SOURCE_DIR}/lib/crypto/krb/block_size.c"
"${KRB5_SOURCE_DIR}/lib/crypto/krb/string_to_key.c"
"${KRB5_SOURCE_DIR}/lib/crypto/krb/verify_checksum.c"
- "${KRB5_SOURCE_DIR}/lib/crypto/krb/crypto_libinit.c"
"${KRB5_SOURCE_DIR}/lib/crypto/krb/derive.c"
"${KRB5_SOURCE_DIR}/lib/crypto/krb/random_to_key.c"
"${KRB5_SOURCE_DIR}/lib/crypto/krb/verify_checksum_iov.c"
@@ -217,9 +218,7 @@ set(ALL_SRCS
"${KRB5_SOURCE_DIR}/lib/crypto/krb/s2k_rc4.c"
"${KRB5_SOURCE_DIR}/lib/crypto/krb/valid_cksumtype.c"
"${KRB5_SOURCE_DIR}/lib/crypto/krb/nfold.c"
- "${KRB5_SOURCE_DIR}/lib/crypto/krb/prng_fortuna.c"
"${KRB5_SOURCE_DIR}/lib/crypto/krb/encrypt_length.c"
- "${KRB5_SOURCE_DIR}/lib/crypto/krb/cmac.c"
"${KRB5_SOURCE_DIR}/lib/crypto/krb/keyblocks.c"
"${KRB5_SOURCE_DIR}/lib/crypto/krb/prf_rc4.c"
"${KRB5_SOURCE_DIR}/lib/crypto/krb/s2k_pbkdf2.c"
@@ -228,11 +227,11 @@ set(ALL_SRCS
"${KRB5_SOURCE_DIR}/lib/crypto/openssl/enc_provider/rc4.c"
"${KRB5_SOURCE_DIR}/lib/crypto/openssl/enc_provider/des3.c"
#"${KRB5_SOURCE_DIR}/lib/crypto/openssl/enc_provider/camellia.c"
+ "${KRB5_SOURCE_DIR}/lib/crypto/openssl/cmac.c"
"${KRB5_SOURCE_DIR}/lib/crypto/openssl/sha256.c"
"${KRB5_SOURCE_DIR}/lib/crypto/openssl/hmac.c"
+ "${KRB5_SOURCE_DIR}/lib/crypto/openssl/kdf.c"
"${KRB5_SOURCE_DIR}/lib/crypto/openssl/pbkdf2.c"
- "${KRB5_SOURCE_DIR}/lib/crypto/openssl/init.c"
- "${KRB5_SOURCE_DIR}/lib/crypto/openssl/stubs.c"
# "${KRB5_SOURCE_DIR}/lib/crypto/openssl/hash_provider/hash_crc32.c"
"${KRB5_SOURCE_DIR}/lib/crypto/openssl/hash_provider/hash_evp.c"
"${KRB5_SOURCE_DIR}/lib/crypto/openssl/des/des_keys.c"
@@ -312,7 +311,6 @@ set(ALL_SRCS
"${KRB5_SOURCE_DIR}/lib/krb5/krb/allow_weak.c"
"${KRB5_SOURCE_DIR}/lib/krb5/krb/mk_rep.c"
"${KRB5_SOURCE_DIR}/lib/krb5/krb/mk_priv.c"
- "${KRB5_SOURCE_DIR}/lib/krb5/krb/s4u_authdata.c"
"${KRB5_SOURCE_DIR}/lib/krb5/krb/preauth_otp.c"
"${KRB5_SOURCE_DIR}/lib/krb5/krb/init_keyblock.c"
"${KRB5_SOURCE_DIR}/lib/krb5/krb/ser_addr.c"
@@ -688,6 +686,7 @@ target_include_directories(_krb5 PRIVATE
target_compile_definitions(_krb5 PRIVATE
KRB5_PRIVATE
+ CRYPTO_OPENSSL
_GSS_STATIC_LINK=1
KRB5_DEPRECATED=1
LOCALEDIR="/usr/local/share/locale"
diff --git a/docker/test/stress/run.sh b/docker/test/stress/run.sh
index 15f58d6c3a3..314e9c2acfd 100644
--- a/docker/test/stress/run.sh
+++ b/docker/test/stress/run.sh
@@ -44,6 +44,8 @@ if [ "$is_tsan_build" -eq "0" ]; then
fi
export ZOOKEEPER_FAULT_INJECTION=1
+# Initial run without S3 to create system.*_log on local file system to make it
+# available for dump via clickhouse-local
configure
azurite-blob --blobHost 0.0.0.0 --blobPort 10000 --debug /azurite_log &
diff --git a/docker/test/upgrade/run.sh b/docker/test/upgrade/run.sh
index f637ea24df3..b9abe5b51fe 100644
--- a/docker/test/upgrade/run.sh
+++ b/docker/test/upgrade/run.sh
@@ -49,17 +49,19 @@ echo -e "Successfully cloned previous release tests$OK" >> /test_output/test_res
echo -e "Successfully downloaded previous release packages$OK" >> /test_output/test_results.tsv
# Make upgrade check more funny by forcing Ordinary engine for system database
-mkdir /var/lib/clickhouse/metadata
+mkdir -p /var/lib/clickhouse/metadata
echo "ATTACH DATABASE system ENGINE=Ordinary" > /var/lib/clickhouse/metadata/system.sql
# Install previous release packages
install_packages previous_release_package_folder
-# Start server from previous release
-# Let's enable S3 storage by default
-export USE_S3_STORAGE_FOR_MERGE_TREE=1
-# Previous version may not be ready for fault injections
-export ZOOKEEPER_FAULT_INJECTION=0
+# Initial run without S3 to create system.*_log on local file system to make it
+# available for dump via clickhouse-local
+configure
+
+start
+stop
+mv /var/log/clickhouse-server/clickhouse-server.log /var/log/clickhouse-server/clickhouse-server.initial.log
# force_sync=false doesn't work correctly on some older versions
sudo cat /etc/clickhouse-server/config.d/keeper_port.xml \
@@ -67,8 +69,6 @@ sudo cat /etc/clickhouse-server/config.d/keeper_port.xml \
> /etc/clickhouse-server/config.d/keeper_port.xml.tmp
sudo mv /etc/clickhouse-server/config.d/keeper_port.xml.tmp /etc/clickhouse-server/config.d/keeper_port.xml
-configure
-
# But we still need default disk because some tables loaded only into it
sudo cat /etc/clickhouse-server/config.d/s3_storage_policy_by_default.xml \
| sed "s|s3|s3default|" \
@@ -76,6 +76,13 @@ sudo cat /etc/clickhouse-server/config.d/s3_storage_policy_by_default.xml \
sudo chown clickhouse /etc/clickhouse-server/config.d/s3_storage_policy_by_default.xml
sudo chgrp clickhouse /etc/clickhouse-server/config.d/s3_storage_policy_by_default.xml
+# Start server from previous release
+# Let's enable S3 storage by default
+export USE_S3_STORAGE_FOR_MERGE_TREE=1
+# Previous version may not be ready for fault injections
+export ZOOKEEPER_FAULT_INJECTION=0
+configure
+
start
clickhouse-client --query="SELECT 'Server version: ', version()"
@@ -185,8 +192,6 @@ tar -chf /test_output/coordination.tar /var/lib/clickhouse/coordination ||:
collect_query_and_trace_logs
-check_oom_in_dmesg
-
mv /var/log/clickhouse-server/stderr.log /test_output/
# Write check result into check_status.tsv
diff --git a/docs/en/interfaces/http.md b/docs/en/interfaces/http.md
index 9af6df0c87d..32f323a63d5 100644
--- a/docs/en/interfaces/http.md
+++ b/docs/en/interfaces/http.md
@@ -309,6 +309,7 @@ The HTTP interface allows passing external data (external temporary tables) for
## Response Buffering {#response-buffering}
You can enable response buffering on the server-side. The `buffer_size` and `wait_end_of_query` URL parameters are provided for this purpose.
+Also settings `http_response_buffer_size` and `http_wait_end_of_query` can be used.
`buffer_size` determines the number of bytes in the result to buffer in the server memory. If a result body is larger than this threshold, the buffer is written to the HTTP channel, and the remaining data is sent directly to the HTTP channel.
diff --git a/docs/en/operations/server-configuration-parameters/settings.md b/docs/en/operations/server-configuration-parameters/settings.md
index 99daddeeb99..0424c3520e0 100644
--- a/docs/en/operations/server-configuration-parameters/settings.md
+++ b/docs/en/operations/server-configuration-parameters/settings.md
@@ -765,7 +765,7 @@ Default value: `0`.
## concurrent_threads_soft_limit_ratio_to_cores {#concurrent_threads_soft_limit_ratio_to_cores}
The maximum number of query processing threads as multiple of number of logical cores.
-More details: [concurrent_threads_soft_limit_num](#concurrent-threads-soft-limit-num).
+More details: [concurrent_threads_soft_limit_num](#concurrent_threads_soft_limit_num).
Possible values:
diff --git a/docs/en/operations/settings/settings.md b/docs/en/operations/settings/settings.md
index 37c6841225b..3c53f4fd0cf 100644
--- a/docs/en/operations/settings/settings.md
+++ b/docs/en/operations/settings/settings.md
@@ -966,10 +966,10 @@ This is an expert-level setting, and you shouldn't change it if you're just gett
## max_query_size {#settings-max_query_size}
-The maximum part of a query that can be taken to RAM for parsing with the SQL parser.
-The INSERT query also contains data for INSERT that is processed by a separate stream parser (that consumes O(1) RAM), which is not included in this restriction.
+The maximum number of bytes of a query string parsed by the SQL parser.
+Data in the VALUES clause of INSERT queries is processed by a separate stream parser (that consumes O(1) RAM) and not affected by this restriction.
-Default value: 256 KiB.
+Default value: 262144 (= 256 KiB).
## max_parser_depth {#max_parser_depth}
diff --git a/docs/en/operations/storing-data.md b/docs/en/operations/storing-data.md
index 3f9a0f67187..e019a3741cf 100644
--- a/docs/en/operations/storing-data.md
+++ b/docs/en/operations/storing-data.md
@@ -80,7 +80,7 @@ Required parameters:
- `type` — `encrypted`. Otherwise the encrypted disk is not created.
- `disk` — Type of disk for data storage.
-- `key` — The key for encryption and decryption. Type: [Uint64](/docs/en/sql-reference/data-types/int-uint.md). You can use `key_hex` parameter to encrypt in hexadecimal form.
+- `key` — The key for encryption and decryption. Type: [Uint64](/docs/en/sql-reference/data-types/int-uint.md). You can use `key_hex` parameter to encode the key in hexadecimal form.
You can specify multiple keys using the `id` attribute (see example above).
Optional parameters:
diff --git a/docs/en/operations/system-tables/marked_dropped_tables.md b/docs/en/operations/system-tables/marked_dropped_tables.md
new file mode 100644
index 00000000000..23e969f7624
--- /dev/null
+++ b/docs/en/operations/system-tables/marked_dropped_tables.md
@@ -0,0 +1,37 @@
+---
+slug: /en/operations/system-tables/marked_dropped_tables
+---
+# marked_dropped_tables
+
+Contains information about tables that drop table has been executed but data cleanup has not been actually performed.
+
+Columns:
+
+- `index` ([UInt32](../../sql-reference/data-types/int-uint.md)) — Index in marked_dropped_tables queue.
+- `database` ([String](../../sql-reference/data-types/string.md)) — Database.
+- `table` ([String](../../sql-reference/data-types/string.md)) — Table name.
+- `uuid` ([UUID](../../sql-reference/data-types/uuid.md)) — Table uuid.
+- `engine` ([String](../../sql-reference/data-types/string.md)) — Table engine name.
+- `metadata_dropped_path` ([String](../../sql-reference/data-types/string.md)) — Path of table's metadata file in metadate_dropped directory.
+- `table_dropped_time` ([DateTime](../../sql-reference/data-types/datetime.md)) — The time when the next attempt to remove table's data is scheduled on. Usually it's the table when the table was dropped plus `database_atomic_delay_before_drop_table_sec`
+
+**Example**
+
+The following example shows how to get information about marked_dropped_tables.
+
+``` sql
+SELECT *
+FROM system.marked_dropped_tables\G
+```
+
+``` text
+Row 1:
+──────
+index: 0
+database: default
+table: test
+uuid: 03141bb2-e97a-4d7c-a172-95cc066bb3bd
+engine: MergeTree
+metadata_dropped_path: /data/ClickHouse/build/programs/data/metadata_dropped/default.test.03141bb2-e97a-4d7c-a172-95cc066bb3bd.sql
+table_dropped_time: 2023-03-16 23:43:31
+```
diff --git a/docs/en/operations/utilities/clickhouse-format.md b/docs/en/operations/utilities/clickhouse-format.md
index bf2e618b791..101310cc65e 100644
--- a/docs/en/operations/utilities/clickhouse-format.md
+++ b/docs/en/operations/utilities/clickhouse-format.md
@@ -27,7 +27,7 @@ $ clickhouse-format --query "select number from numbers(10) where number%2 order
Result:
-```text
+```sql
SELECT number
FROM numbers(10)
WHERE number % 2
@@ -54,7 +54,7 @@ $ clickhouse-format -n <<< "SELECT * FROM (SELECT 1 AS x UNION ALL SELECT 1 UNIO
Result:
-```text
+```sql
SELECT *
FROM
(
@@ -75,7 +75,7 @@ $ clickhouse-format --seed Hello --obfuscate <<< "SELECT cost_first_screen BETWE
Result:
-```text
+```sql
SELECT treasury_mammoth_hazelnut BETWEEN nutmeg AND span, CASE WHEN chive >= 116 THEN switching ELSE ANYTHING END;
```
@@ -87,7 +87,7 @@ $ clickhouse-format --seed World --obfuscate <<< "SELECT cost_first_screen BETWE
Result:
-```text
+```sql
SELECT horse_tape_summer BETWEEN folklore AND moccasins, CASE WHEN intestine >= 116 THEN nonconformist ELSE FORESTRY END;
```
@@ -99,7 +99,7 @@ $ clickhouse-format --backslash <<< "SELECT * FROM (SELECT 1 AS x UNION ALL SELE
Result:
-```text
+```sql
SELECT * \
FROM \
( \
diff --git a/docs/en/sql-reference/functions/tuple-functions.md b/docs/en/sql-reference/functions/tuple-functions.md
index a31ec3c41d2..c248499be69 100644
--- a/docs/en/sql-reference/functions/tuple-functions.md
+++ b/docs/en/sql-reference/functions/tuple-functions.md
@@ -22,15 +22,15 @@ tuple(x, y, …)
## tupleElement
A function that allows getting a column from a tuple.
-‘N’ is the column index, starting from 1. ‘N’ must be a constant. ‘N’ must be a strict postive integer no greater than the size of the tuple.
-There is no cost to execute the function.
-The function implements the operator `x.N`.
+If the second argument is a number `n`, it is the column index, starting from 1. If the second argument is a string `s`, it represents the name of the element. Besides, we can provide the third optional argument, such that when index out of bounds or element for such name does not exist, the default value returned instead of throw exception. The second and third arguments if provided are always must be constant. There is no cost to execute the function.
+
+The function implements the operator `x.n` and `x.s`.
**Syntax**
``` sql
-tupleElement(tuple, n)
+tupleElement(tuple, n/s [, default_value])
```
## untuple
diff --git a/docs/en/sql-reference/statements/alter/comment.md b/docs/en/sql-reference/statements/alter/comment.md
index f8742765619..cc49c6abf80 100644
--- a/docs/en/sql-reference/statements/alter/comment.md
+++ b/docs/en/sql-reference/statements/alter/comment.md
@@ -16,7 +16,7 @@ ALTER TABLE [db].name [ON CLUSTER cluster] MODIFY COMMENT 'Comment'
**Examples**
-Creating a table with comment (for more information, see the [COMMENT] clause(../../../sql-reference/statements/create/table.md#comment-table)):
+Creating a table with comment (for more information, see the [COMMENT](../../../sql-reference/statements/create/table.md#comment-table) clause):
``` sql
CREATE TABLE table_with_comment
diff --git a/docs/en/sql-reference/statements/create/table.md b/docs/en/sql-reference/statements/create/table.md
index ed35df9b97a..de39d960476 100644
--- a/docs/en/sql-reference/statements/create/table.md
+++ b/docs/en/sql-reference/statements/create/table.md
@@ -393,15 +393,15 @@ These codecs are designed to make compression more effective by using specific f
#### DoubleDelta
-`DoubleDelta` — Calculates delta of deltas and writes it in compact binary form. Optimal compression rates are achieved for monotonic sequences with a constant stride, such as time series data. Can be used with any fixed-width type. Implements the algorithm used in Gorilla TSDB, extending it to support 64-bit types. Uses 1 extra bit for 32-byte deltas: 5-bit prefixes instead of 4-bit prefixes. For additional information, see Compressing Time Stamps in [Gorilla: A Fast, Scalable, In-Memory Time Series Database](http://www.vldb.org/pvldb/vol8/p1816-teller.pdf).
+`DoubleDelta(bytes_size)` — Calculates delta of deltas and writes it in compact binary form. Possible `bytes_size` values: 1, 2, 4, 8, the default value is `sizeof(type)` if equal to 1, 2, 4, or 8. In all other cases, it’s 1. Optimal compression rates are achieved for monotonic sequences with a constant stride, such as time series data. Can be used with any fixed-width type. Implements the algorithm used in Gorilla TSDB, extending it to support 64-bit types. Uses 1 extra bit for 32-bit deltas: 5-bit prefixes instead of 4-bit prefixes. For additional information, see Compressing Time Stamps in [Gorilla: A Fast, Scalable, In-Memory Time Series Database](http://www.vldb.org/pvldb/vol8/p1816-teller.pdf).
#### Gorilla
-`Gorilla` — Calculates XOR between current and previous floating point value and writes it in compact binary form. The smaller the difference between consecutive values is, i.e. the slower the values of the series changes, the better the compression rate. Implements the algorithm used in Gorilla TSDB, extending it to support 64-bit types. For additional information, see section 4.1 in [Gorilla: A Fast, Scalable, In-Memory Time Series Database](https://doi.org/10.14778/2824032.2824078).
+`Gorilla(bytes_size)` — Calculates XOR between current and previous floating point value and writes it in compact binary form. The smaller the difference between consecutive values is, i.e. the slower the values of the series changes, the better the compression rate. Implements the algorithm used in Gorilla TSDB, extending it to support 64-bit types. Possible `bytes_size` values: 1, 2, 4, 8, the default value is `sizeof(type)` if equal to 1, 2, 4, or 8. In all other cases, it’s 1. For additional information, see section 4.1 in [Gorilla: A Fast, Scalable, In-Memory Time Series Database](https://doi.org/10.14778/2824032.2824078).
#### FPC
-`FPC` - Repeatedly predicts the next floating point value in the sequence using the better of two predictors, then XORs the actual with the predicted value, and leading-zero compresses the result. Similar to Gorilla, this is efficient when storing a series of floating point values that change slowly. For 64-bit values (double), FPC is faster than Gorilla, for 32-bit values your mileage may vary. For a detailed description of the algorithm see [High Throughput Compression of Double-Precision Floating-Point Data](https://userweb.cs.txstate.edu/~burtscher/papers/dcc07a.pdf).
+`FPC(level, float_size)` - Repeatedly predicts the next floating point value in the sequence using the better of two predictors, then XORs the actual with the predicted value, and leading-zero compresses the result. Similar to Gorilla, this is efficient when storing a series of floating point values that change slowly. For 64-bit values (double), FPC is faster than Gorilla, for 32-bit values your mileage may vary. Possible `level` values: 1-28, the default value is 12. Possible `float_size` values: 4, 8, the default value is `sizeof(type)` if type is Float. In all other cases, it’s 4. For a detailed description of the algorithm see [High Throughput Compression of Double-Precision Floating-Point Data](https://userweb.cs.txstate.edu/~burtscher/papers/dcc07a.pdf).
#### T64
@@ -473,7 +473,7 @@ ENGINE = MergeTree ORDER BY x;
ClickHouse supports temporary tables which have the following characteristics:
- Temporary tables disappear when the session ends, including if the connection is lost.
-- A temporary table uses the Memory engine only.
+- A temporary table uses the Memory table engine when engine is not specified and it may use any table engine except Replicated and `KeeperMap` engines.
- The DB can’t be specified for a temporary table. It is created outside of databases.
- Impossible to create a temporary table with distributed DDL query on all cluster servers (by using `ON CLUSTER`): this table exists only in the current session.
- If a temporary table has the same name as another one and a query specifies the table name without specifying the DB, the temporary table will be used.
@@ -487,7 +487,7 @@ CREATE TEMPORARY TABLE [IF NOT EXISTS] table_name
name1 [type1] [DEFAULT|MATERIALIZED|ALIAS expr1],
name2 [type2] [DEFAULT|MATERIALIZED|ALIAS expr2],
...
-)
+) [ENGINE = engine]
```
In most cases, temporary tables are not created manually, but when using external data for a query, or for distributed `(GLOBAL) IN`. For more information, see the appropriate sections
diff --git a/docs/en/sql-reference/statements/grant.md b/docs/en/sql-reference/statements/grant.md
index 3383ea70a2b..1d9b2c9ea30 100644
--- a/docs/en/sql-reference/statements/grant.md
+++ b/docs/en/sql-reference/statements/grant.md
@@ -105,7 +105,8 @@ Hierarchy of privileges:
- [CREATE](#grant-create)
- `CREATE DATABASE`
- `CREATE TABLE`
- - `CREATE TEMPORARY TABLE`
+ - `CREATE ARBITRARY TEMPORARY TABLE`
+ - `CREATE TEMPORARY TABLE`
- `CREATE VIEW`
- `CREATE DICTIONARY`
- `CREATE FUNCTION`
@@ -313,7 +314,8 @@ Allows executing [CREATE](../../sql-reference/statements/create/index.md) and [A
- `CREATE`. Level: `GROUP`
- `CREATE DATABASE`. Level: `DATABASE`
- `CREATE TABLE`. Level: `TABLE`
- - `CREATE TEMPORARY TABLE`. Level: `GLOBAL`
+ - `CREATE ARBITRARY TEMPORARY TABLE`. Level: `GLOBAL`
+ - `CREATE TEMPORARY TABLE`. Level: `GLOBAL`
- `CREATE VIEW`. Level: `VIEW`
- `CREATE DICTIONARY`. Level: `DICTIONARY`
diff --git a/docs/en/sql-reference/statements/insert-into.md b/docs/en/sql-reference/statements/insert-into.md
index 03a4ab3453c..f2d590d196b 100644
--- a/docs/en/sql-reference/statements/insert-into.md
+++ b/docs/en/sql-reference/statements/insert-into.md
@@ -91,6 +91,13 @@ INSERT INTO t FORMAT TabSeparated
You can insert data separately from the query by using the command-line client or the HTTP interface. For more information, see the section “[Interfaces](../../interfaces)”.
+:::note
+If you want to specify `SETTINGS` for `INSERT` query then you have to do it _before_ `FORMAT` clause since everything after `FORMAT format_name` is treated as data. For example:
+```sql
+INSERT INTO table SETTINGS ... FORMAT format_name data_set
+```
+:::
+
## Constraints
If table has [constraints](../../sql-reference/statements/create/table.md#constraints), their expressions will be checked for each row of inserted data. If any of those constraints is not satisfied — server will raise an exception containing constraint name and expression, the query will be stopped.
diff --git a/docs/ru/sql-reference/statements/create/table.md b/docs/ru/sql-reference/statements/create/table.md
index 7a930b529ed..64eae49be6c 100644
--- a/docs/ru/sql-reference/statements/create/table.md
+++ b/docs/ru/sql-reference/statements/create/table.md
@@ -260,8 +260,8 @@ ENGINE = MergeTree()
Кодеки шифрования:
-- `CODEC('AES-128-GCM-SIV')` — Зашифровывает данные с помощью AES-128 в режиме [RFC 8452](https://tools.ietf.org/html/rfc8452) GCM-SIV.
-- `CODEC('AES-256-GCM-SIV')` — Зашифровывает данные с помощью AES-256 в режиме GCM-SIV.
+- `CODEC('AES-128-GCM-SIV')` — Зашифровывает данные с помощью AES-128 в режиме [RFC 8452](https://tools.ietf.org/html/rfc8452) GCM-SIV.
+- `CODEC('AES-256-GCM-SIV')` — Зашифровывает данные с помощью AES-256 в режиме GCM-SIV.
Эти кодеки используют фиксированный одноразовый ключ шифрования. Таким образом, это детерминированное шифрование. Оно совместимо с поддерживающими дедупликацию движками, в частности, [ReplicatedMergeTree](../../../engines/table-engines/mergetree-family/replication.md). Однако у шифрования имеется недостаток: если дважды зашифровать один и тот же блок данных, текст на выходе получится одинаковым, и злоумышленник, у которого есть доступ к диску, заметит эту эквивалентность (при этом доступа к содержимому он не получит).
@@ -274,10 +274,10 @@ ENGINE = MergeTree()
**Пример**
```sql
-CREATE TABLE mytable
+CREATE TABLE mytable
(
x String Codec(AES_128_GCM_SIV)
-)
+)
ENGINE = MergeTree ORDER BY x;
```
@@ -287,10 +287,10 @@ ENGINE = MergeTree ORDER BY x;
**Пример**
```sql
-CREATE TABLE mytable
+CREATE TABLE mytable
(
x String Codec(Delta, LZ4, AES_128_GCM_SIV)
-)
+)
ENGINE = MergeTree ORDER BY x;
```
@@ -299,7 +299,7 @@ ENGINE = MergeTree ORDER BY x;
ClickHouse поддерживает временные таблицы со следующими характеристиками:
- Временные таблицы исчезают после завершения сессии, в том числе при обрыве соединения.
-- Временная таблица использует только модуль памяти.
+- Временная таблица использует движок таблиц Memory когда движок не указан и она может использовать любой движок таблиц за исключением движков Replicated и `KeeperMap`.
- Невозможно указать базу данных для временной таблицы. Она создается вне баз данных.
- Невозможно создать временную таблицу распределённым DDL запросом на всех серверах кластера (с опцией `ON CLUSTER`): такая таблица существует только в рамках существующей сессии.
- Если временная таблица имеет то же имя, что и некоторая другая, то, при упоминании в запросе без указания БД, будет использована временная таблица.
@@ -313,7 +313,7 @@ CREATE TEMPORARY TABLE [IF NOT EXISTS] table_name
name1 [type1] [DEFAULT|MATERIALIZED|ALIAS expr1],
name2 [type2] [DEFAULT|MATERIALIZED|ALIAS expr2],
...
-)
+) [ENGINE = engine]
```
В большинстве случаев, временные таблицы создаются не вручную, а при использовании внешних данных для запроса, или при распределённом `(GLOBAL) IN`. Подробнее см. соответствующие разделы
diff --git a/docs/ru/sql-reference/statements/grant.md b/docs/ru/sql-reference/statements/grant.md
index 7c281634c98..73c63850750 100644
--- a/docs/ru/sql-reference/statements/grant.md
+++ b/docs/ru/sql-reference/statements/grant.md
@@ -107,7 +107,8 @@ GRANT SELECT(x,y) ON db.table TO john WITH GRANT OPTION
- [CREATE](#grant-create)
- `CREATE DATABASE`
- `CREATE TABLE`
- - `CREATE TEMPORARY TABLE`
+ - `CREATE ARBITRARY TEMPORARY TABLE`
+ - `CREATE TEMPORARY TABLE`
- `CREATE VIEW`
- `CREATE DICTIONARY`
- `CREATE FUNCTION`
@@ -314,7 +315,8 @@ GRANT INSERT(x,y) ON db.table TO john
- `CREATE`. Уровень: `GROUP`
- `CREATE DATABASE`. Уровень: `DATABASE`
- `CREATE TABLE`. Уровень: `TABLE`
- - `CREATE TEMPORARY TABLE`. Уровень: `GLOBAL`
+ - `CREATE ARBITRARY TEMPORARY TABLE`. Уровень: `GLOBAL`
+ - `CREATE TEMPORARY TABLE`. Уровень: `GLOBAL`
- `CREATE VIEW`. Уровень: `VIEW`
- `CREATE DICTIONARY`. Уровень: `DICTIONARY`
diff --git a/packages/clickhouse-server.service b/packages/clickhouse-server.service
index 090461df988..7742d8b278a 100644
--- a/packages/clickhouse-server.service
+++ b/packages/clickhouse-server.service
@@ -18,7 +18,7 @@ Group=clickhouse
Restart=always
RestartSec=30
# Since ClickHouse is systemd aware default 1m30sec may not be enough
-TimeoutStartSec=infinity
+TimeoutStartSec=0
# %p is resolved to the systemd unit name
RuntimeDirectory=%p
ExecStart=/usr/bin/clickhouse-server --config=/etc/clickhouse-server/config.xml --pid-file=%t/%p/%p.pid
diff --git a/programs/CMakeLists.txt b/programs/CMakeLists.txt
index 5b97daf2998..47017a94cb5 100644
--- a/programs/CMakeLists.txt
+++ b/programs/CMakeLists.txt
@@ -400,10 +400,6 @@ endif ()
add_custom_target (clickhouse-bundle ALL DEPENDS ${CLICKHOUSE_BUNDLE})
-if (USE_GDB_ADD_INDEX)
- add_custom_command(TARGET clickhouse POST_BUILD COMMAND ${GDB_ADD_INDEX_EXE} clickhouse COMMENT "Adding .gdb-index to clickhouse" VERBATIM)
-endif()
-
if (USE_BINARY_HASH)
add_custom_command(TARGET clickhouse POST_BUILD COMMAND ./clickhouse hash-binary > hash && ${OBJCOPY_PATH} --add-section .clickhouse.hash=hash clickhouse COMMENT "Adding section '.clickhouse.hash' to clickhouse binary" VERBATIM)
endif()
diff --git a/programs/compressor/Compressor.cpp b/programs/compressor/Compressor.cpp
index b60138b5692..cc25747702a 100644
--- a/programs/compressor/Compressor.cpp
+++ b/programs/compressor/Compressor.cpp
@@ -66,6 +66,7 @@ int mainEntryClickHouseCompressor(int argc, char ** argv)
using namespace DB;
namespace po = boost::program_options;
+ bool print_stacktrace = false;
try
{
po::options_description desc = createOptionsDescription("Allowed options", getTerminalWidth());
@@ -84,6 +85,7 @@ int mainEntryClickHouseCompressor(int argc, char ** argv)
("level", po::value(), "compression level for codecs specified via flags")
("none", "use no compression instead of LZ4")
("stat", "print block statistics of compressed data")
+ ("stacktrace", "print stacktrace of exception")
;
po::positional_options_description positional_desc;
@@ -107,6 +109,7 @@ int mainEntryClickHouseCompressor(int argc, char ** argv)
bool use_deflate_qpl = options.count("deflate_qpl");
bool stat_mode = options.count("stat");
bool use_none = options.count("none");
+ print_stacktrace = options.count("stacktrace");
unsigned block_size = options["block-size"].as();
std::vector codecs;
if (options.count("codec"))
@@ -188,11 +191,12 @@ int mainEntryClickHouseCompressor(int argc, char ** argv)
/// Compression
CompressedWriteBuffer to(*wb, codec, block_size);
copyData(*rb, to);
+ to.finalize();
}
}
catch (...)
{
- std::cerr << getCurrentExceptionMessage(true) << '\n';
+ std::cerr << getCurrentExceptionMessage(print_stacktrace) << '\n';
return getCurrentExceptionCode();
}
diff --git a/programs/odbc-bridge/CMakeLists.txt b/programs/odbc-bridge/CMakeLists.txt
index f649e81c50a..118610e4dcd 100644
--- a/programs/odbc-bridge/CMakeLists.txt
+++ b/programs/odbc-bridge/CMakeLists.txt
@@ -35,10 +35,6 @@ target_link_libraries(clickhouse-odbc-bridge PRIVATE
set_target_properties(clickhouse-odbc-bridge PROPERTIES RUNTIME_OUTPUT_DIRECTORY ..)
target_compile_options (clickhouse-odbc-bridge PRIVATE -Wno-reserved-id-macro -Wno-keyword-macro)
-if (USE_GDB_ADD_INDEX)
- add_custom_command(TARGET clickhouse-odbc-bridge POST_BUILD COMMAND ${GDB_ADD_INDEX_EXE} ../clickhouse-odbc-bridge COMMENT "Adding .gdb-index to clickhouse-odbc-bridge" VERBATIM)
-endif()
-
if (SPLIT_DEBUG_SYMBOLS)
clickhouse_split_debug_symbols(TARGET clickhouse-odbc-bridge DESTINATION_DIR ${CMAKE_CURRENT_BINARY_DIR}/../${SPLITTED_DEBUG_SYMBOLS_DIR} BINARY_PATH ../clickhouse-odbc-bridge)
else()
diff --git a/programs/odbc-bridge/ColumnInfoHandler.cpp b/programs/odbc-bridge/ColumnInfoHandler.cpp
index 6e93246e59a..147ba43a51d 100644
--- a/programs/odbc-bridge/ColumnInfoHandler.cpp
+++ b/programs/odbc-bridge/ColumnInfoHandler.cpp
@@ -30,7 +30,7 @@ namespace DB
namespace ErrorCodes
{
- extern const int LOGICAL_ERROR;
+ extern const int UNKNOWN_TABLE;
extern const int BAD_ARGUMENTS;
}
@@ -180,8 +180,19 @@ void ODBCColumnsInfoHandler::handleRequest(HTTPServerRequest & request, HTTPServ
columns.emplace_back(column_name, std::move(column_type));
}
+ /// Usually this should not happen, since in case of table does not
+ /// exists, the call should be succeeded.
+ /// However it is possible sometimes because internally there are two
+ /// queries in ClickHouse ODBC bridge:
+ /// - system.tables
+ /// - system.columns
+ /// And if between this two queries the table will be removed, them
+ /// there will be no columns
+ ///
+ /// Also sometimes system.columns can return empty result because of
+ /// the cached value of total tables to scan.
if (columns.empty())
- throw Exception(ErrorCodes::LOGICAL_ERROR, "Columns definition was not returned");
+ throw Exception(ErrorCodes::UNKNOWN_TABLE, "Columns definition was not returned");
WriteBufferFromHTTPServerResponse out(
response,
diff --git a/src/Access/AccessBackup.cpp b/src/Access/AccessBackup.cpp
index 53565e8e5d7..800a54e69b3 100644
--- a/src/Access/AccessBackup.cpp
+++ b/src/Access/AccessBackup.cpp
@@ -72,12 +72,11 @@ namespace
return std::make_shared(buf.str());
}
- static AccessEntitiesInBackup fromBackupEntry(const IBackupEntry & backup_entry, const String & file_path)
+ static AccessEntitiesInBackup fromBackupEntry(std::unique_ptr buf, const String & file_path)
{
try
{
AccessEntitiesInBackup res;
- std::unique_ptr buf = backup_entry.getReadBuffer();
bool dependencies_found = false;
@@ -343,8 +342,8 @@ void AccessRestorerFromBackup::addDataPath(const String & data_path)
for (const String & filename : filenames)
{
String filepath_in_backup = data_path_in_backup_fs / filename;
- auto backup_entry = backup->readFile(filepath_in_backup);
- auto ab = AccessEntitiesInBackup::fromBackupEntry(*backup_entry, filepath_in_backup);
+ auto read_buffer_from_backup = backup->readFile(filepath_in_backup);
+ auto ab = AccessEntitiesInBackup::fromBackupEntry(std::move(read_buffer_from_backup), filepath_in_backup);
boost::range::copy(ab.entities, std::back_inserter(entities));
boost::range::copy(ab.dependencies, std::inserter(dependencies, dependencies.end()));
diff --git a/src/Access/Common/AccessType.h b/src/Access/Common/AccessType.h
index f57cc2886e3..c73c0499fbe 100644
--- a/src/Access/Common/AccessType.h
+++ b/src/Access/Common/AccessType.h
@@ -15,6 +15,7 @@ enum class AccessType
/// node_type either specifies access type's level (GLOBAL/DATABASE/TABLE/DICTIONARY/VIEW/COLUMNS),
/// or specifies that the access type is a GROUP of other access types;
/// parent_group_name is the name of the group containing this access type (or NONE if there is no such group).
+/// NOTE A parent group must be declared AFTER all its children.
#define APPLY_FOR_ACCESS_TYPES(M) \
M(SHOW_DATABASES, "", DATABASE, SHOW) /* allows to execute SHOW DATABASES, SHOW CREATE DATABASE, USE ;
implicitly enabled by any grant on the database */\
@@ -86,8 +87,10 @@ enum class AccessType
M(CREATE_VIEW, "", VIEW, CREATE) /* allows to execute {CREATE|ATTACH} VIEW;
implicitly enabled by the grant CREATE_TABLE */\
M(CREATE_DICTIONARY, "", DICTIONARY, CREATE) /* allows to execute {CREATE|ATTACH} DICTIONARY */\
- M(CREATE_TEMPORARY_TABLE, "", GLOBAL, CREATE) /* allows to create and manipulate temporary tables;
+ M(CREATE_TEMPORARY_TABLE, "", GLOBAL, CREATE_ARBITRARY_TEMPORARY_TABLE) /* allows to create and manipulate temporary tables;
implicitly enabled by the grant CREATE_TABLE on any table */ \
+ M(CREATE_ARBITRARY_TEMPORARY_TABLE, "", GLOBAL, CREATE) /* allows to create and manipulate temporary tables
+ with arbitrary table engine */\
M(CREATE_FUNCTION, "", GLOBAL, CREATE) /* allows to execute CREATE FUNCTION */ \
M(CREATE_NAMED_COLLECTION, "", GLOBAL, CREATE) /* allows to execute CREATE NAMED COLLECTION */ \
M(CREATE, "", GROUP, ALL) /* allows to execute {CREATE|ATTACH} */ \
diff --git a/src/Access/ContextAccess.cpp b/src/Access/ContextAccess.cpp
index fbaacb2263b..cc51183c51f 100644
--- a/src/Access/ContextAccess.cpp
+++ b/src/Access/ContextAccess.cpp
@@ -81,6 +81,11 @@ namespace
if ((level == 0) && (max_flags_with_children & create_table))
res |= create_temporary_table;
+ /// CREATE TABLE (on any database/table) => CREATE_ARBITRARY_TEMPORARY_TABLE (global)
+ static const AccessFlags create_arbitrary_temporary_table = AccessType::CREATE_ARBITRARY_TEMPORARY_TABLE;
+ if ((level == 0) && (max_flags_with_children & create_table))
+ res |= create_arbitrary_temporary_table;
+
/// ALTER_TTL => ALTER_MATERIALIZE_TTL
static const AccessFlags alter_ttl = AccessType::ALTER_TTL;
static const AccessFlags alter_materialize_ttl = AccessType::ALTER_MATERIALIZE_TTL;
diff --git a/src/Analyzer/Passes/GroupingFunctionsResolvePass.cpp b/src/Analyzer/Passes/GroupingFunctionsResolvePass.cpp
index 20f1713f8c2..0cf5310a3ad 100644
--- a/src/Analyzer/Passes/GroupingFunctionsResolvePass.cpp
+++ b/src/Analyzer/Passes/GroupingFunctionsResolvePass.cpp
@@ -32,17 +32,17 @@ enum class GroupByKind
GROUPING_SETS
};
-class GroupingFunctionResolveVisitor : public InDepthQueryTreeVisitor
+class GroupingFunctionResolveVisitor : public InDepthQueryTreeVisitorWithContext
{
public:
GroupingFunctionResolveVisitor(GroupByKind group_by_kind_,
QueryTreeNodePtrWithHashMap aggregation_key_to_index_,
ColumnNumbersList grouping_sets_keys_indices_,
ContextPtr context_)
- : group_by_kind(group_by_kind_)
+ : InDepthQueryTreeVisitorWithContext(std::move(context_))
+ , group_by_kind(group_by_kind_)
, aggregation_key_to_index(std::move(aggregation_key_to_index_))
, grouping_sets_keys_indexes(std::move(grouping_sets_keys_indices_))
- , context(std::move(context_))
{
}
@@ -71,7 +71,7 @@ public:
FunctionOverloadResolverPtr grouping_function_resolver;
bool add_grouping_set_column = false;
- bool force_grouping_standard_compatibility = context->getSettingsRef().force_grouping_standard_compatibility;
+ bool force_grouping_standard_compatibility = getSettings().force_grouping_standard_compatibility;
size_t aggregation_keys_size = aggregation_key_to_index.size();
switch (group_by_kind)
@@ -132,7 +132,6 @@ private:
GroupByKind group_by_kind;
QueryTreeNodePtrWithHashMap aggregation_key_to_index;
ColumnNumbersList grouping_sets_keys_indexes;
- ContextPtr context;
};
void resolveGroupingFunctions(QueryTreeNodePtr & query_node, ContextPtr context)
@@ -164,12 +163,17 @@ void resolveGroupingFunctions(QueryTreeNodePtr & query_node, ContextPtr context)
grouping_sets_used_aggregation_keys_list.emplace_back();
auto & grouping_sets_used_aggregation_keys = grouping_sets_used_aggregation_keys_list.back();
+ QueryTreeNodePtrWithHashSet used_keys_in_set;
+
for (auto & grouping_set_key_node : grouping_set_keys_list_node_typed.getNodes())
{
+ if (used_keys_in_set.contains(grouping_set_key_node))
+ continue;
+ used_keys_in_set.insert(grouping_set_key_node);
+ grouping_sets_used_aggregation_keys.push_back(grouping_set_key_node);
+
if (aggregation_key_to_index.contains(grouping_set_key_node))
continue;
-
- grouping_sets_used_aggregation_keys.push_back(grouping_set_key_node);
aggregation_key_to_index.emplace(grouping_set_key_node, aggregation_node_index);
++aggregation_node_index;
}
diff --git a/src/Analyzer/Passes/QueryAnalysisPass.cpp b/src/Analyzer/Passes/QueryAnalysisPass.cpp
index 38575965973..f5f577a20ab 100644
--- a/src/Analyzer/Passes/QueryAnalysisPass.cpp
+++ b/src/Analyzer/Passes/QueryAnalysisPass.cpp
@@ -5727,8 +5727,27 @@ void QueryAnalyzer::resolveInterpolateColumnsNodeList(QueryTreeNodePtr & interpo
{
auto & interpolate_node_typed = interpolate_node->as();
+ auto * column_to_interpolate = interpolate_node_typed.getExpression()->as();
+ if (!column_to_interpolate)
+ throw Exception(ErrorCodes::LOGICAL_ERROR, "INTERPOLATE can work only for indentifiers, but {} is found",
+ interpolate_node_typed.getExpression()->formatASTForErrorMessage());
+ auto column_to_interpolate_name = column_to_interpolate->getIdentifier().getFullName();
+
resolveExpressionNode(interpolate_node_typed.getExpression(), scope, false /*allow_lambda_expression*/, false /*allow_table_expression*/);
- resolveExpressionNode(interpolate_node_typed.getInterpolateExpression(), scope, false /*allow_lambda_expression*/, false /*allow_table_expression*/);
+
+ bool is_column_constant = interpolate_node_typed.getExpression()->getNodeType() == QueryTreeNodeType::CONSTANT;
+
+ auto & interpolation_to_resolve = interpolate_node_typed.getInterpolateExpression();
+ IdentifierResolveScope interpolate_scope(interpolation_to_resolve, &scope /*parent_scope*/);
+
+ auto fake_column_node = std::make_shared(NameAndTypePair(column_to_interpolate_name, interpolate_node_typed.getExpression()->getResultType()), interpolate_node_typed.getExpression());
+ if (is_column_constant)
+ interpolate_scope.expression_argument_name_to_node.emplace(column_to_interpolate_name, fake_column_node);
+
+ resolveExpressionNode(interpolation_to_resolve, interpolate_scope, false /*allow_lambda_expression*/, false /*allow_table_expression*/);
+
+ if (is_column_constant)
+ interpolation_to_resolve = interpolation_to_resolve->cloneAndReplace(fake_column_node, interpolate_node_typed.getExpression());
}
}
diff --git a/src/Analyzer/ValidationUtils.cpp b/src/Analyzer/ValidationUtils.cpp
index d70ed1170fc..58e6f26c03a 100644
--- a/src/Analyzer/ValidationUtils.cpp
+++ b/src/Analyzer/ValidationUtils.cpp
@@ -56,7 +56,7 @@ public:
}
if (!found_argument_in_group_by_keys)
- throw Exception(ErrorCodes::NOT_AN_AGGREGATE,
+ throw Exception(ErrorCodes::BAD_ARGUMENTS,
"GROUPING function argument {} is not in GROUP BY keys. In query {}",
grouping_function_arguments_node->formatASTForErrorMessage(),
query_node->formatASTForErrorMessage());
diff --git a/src/Backups/BackupIO.cpp b/src/Backups/BackupIO.cpp
index 6ca0c8e7bee..cc252c2f1bd 100644
--- a/src/Backups/BackupIO.cpp
+++ b/src/Backups/BackupIO.cpp
@@ -1,9 +1,10 @@
#include
#include
-#include
+#include
#include
+
namespace DB
{
@@ -12,6 +13,15 @@ namespace ErrorCodes
extern const int NOT_IMPLEMENTED;
}
+void IBackupReader::copyFileToDisk(const String & file_name, size_t size, DiskPtr destination_disk, const String & destination_path,
+ WriteMode write_mode, const WriteSettings & write_settings)
+{
+ auto read_buffer = readFile(file_name);
+ auto write_buffer = destination_disk->writeFile(destination_path, std::min(size, DBMS_DEFAULT_BUFFER_SIZE), write_mode, write_settings);
+ copyData(*read_buffer, *write_buffer, size);
+ write_buffer->finalize();
+}
+
void IBackupWriter::copyDataToFile(const CreateReadBufferFunction & create_read_buffer, UInt64 offset, UInt64 size, const String & dest_file_name)
{
auto read_buffer = create_read_buffer();
diff --git a/src/Backups/BackupIO.h b/src/Backups/BackupIO.h
index fe2bed6aa1a..cf3d29ee51e 100644
--- a/src/Backups/BackupIO.h
+++ b/src/Backups/BackupIO.h
@@ -17,6 +17,8 @@ public:
virtual bool fileExists(const String & file_name) = 0;
virtual UInt64 getFileSize(const String & file_name) = 0;
virtual std::unique_ptr readFile(const String & file_name) = 0;
+ virtual void copyFileToDisk(const String & file_name, size_t size, DiskPtr destination_disk, const String & destination_path,
+ WriteMode write_mode, const WriteSettings & write_settings);
virtual DataSourceDescription getDataSourceDescription() const = 0;
};
diff --git a/src/Backups/BackupIO_Disk.cpp b/src/Backups/BackupIO_Disk.cpp
index 1b7202a5c28..cc6076541d0 100644
--- a/src/Backups/BackupIO_Disk.cpp
+++ b/src/Backups/BackupIO_Disk.cpp
@@ -1,4 +1,5 @@
#include
+#include
#include
#include
#include
@@ -12,7 +13,8 @@ namespace ErrorCodes
extern const int LOGICAL_ERROR;
}
-BackupReaderDisk::BackupReaderDisk(const DiskPtr & disk_, const String & path_) : disk(disk_), path(path_)
+BackupReaderDisk::BackupReaderDisk(const DiskPtr & disk_, const String & path_)
+ : disk(disk_), path(path_), log(&Poco::Logger::get("BackupReaderDisk"))
{
}
@@ -33,6 +35,21 @@ std::unique_ptr BackupReaderDisk::readFile(const String & fi
return disk->readFile(path / file_name);
}
+void BackupReaderDisk::copyFileToDisk(const String & file_name, size_t size, DiskPtr destination_disk, const String & destination_path,
+ WriteMode write_mode, const WriteSettings & write_settings)
+{
+ if (write_mode == WriteMode::Rewrite)
+ {
+ LOG_TRACE(log, "Copying {}/{} from disk {} to {} by the disk", path, file_name, disk->getName(), destination_disk->getName());
+ disk->copyFile(path / file_name, *destination_disk, destination_path, write_settings);
+ return;
+ }
+
+ LOG_TRACE(log, "Copying {}/{} from disk {} to {} through buffers", path, file_name, disk->getName(), destination_disk->getName());
+ IBackupReader::copyFileToDisk(file_name, size, destination_disk, destination_path, write_mode, write_settings);
+}
+
+
BackupWriterDisk::BackupWriterDisk(const DiskPtr & disk_, const String & path_) : disk(disk_), path(path_)
{
}
diff --git a/src/Backups/BackupIO_Disk.h b/src/Backups/BackupIO_Disk.h
index 5e5c431da7d..600e4f8ff39 100644
--- a/src/Backups/BackupIO_Disk.h
+++ b/src/Backups/BackupIO_Disk.h
@@ -17,11 +17,14 @@ public:
bool fileExists(const String & file_name) override;
UInt64 getFileSize(const String & file_name) override;
std::unique_ptr readFile(const String & file_name) override;
+ void copyFileToDisk(const String & file_name, size_t size, DiskPtr destination_disk, const String & destination_path,
+ WriteMode write_mode, const WriteSettings & write_settings) override;
DataSourceDescription getDataSourceDescription() const override;
private:
DiskPtr disk;
std::filesystem::path path;
+ Poco::Logger * log;
};
class BackupWriterDisk : public IBackupWriter
diff --git a/src/Backups/BackupIO_File.cpp b/src/Backups/BackupIO_File.cpp
index c010cae15de..5bf6d54928d 100644
--- a/src/Backups/BackupIO_File.cpp
+++ b/src/Backups/BackupIO_File.cpp
@@ -1,15 +1,18 @@
#include
+#include
#include
#include
#include
#include
+#include
+
namespace fs = std::filesystem;
namespace DB
{
-BackupReaderFile::BackupReaderFile(const String & path_) : path(path_)
+BackupReaderFile::BackupReaderFile(const String & path_) : path(path_), log(&Poco::Logger::get("BackupReaderFile"))
{
}
@@ -30,6 +33,22 @@ std::unique_ptr BackupReaderFile::readFile(const String & fi
return createReadBufferFromFileBase(path / file_name, {});
}
+void BackupReaderFile::copyFileToDisk(const String & file_name, size_t size, DiskPtr destination_disk, const String & destination_path,
+ WriteMode write_mode, const WriteSettings & write_settings)
+{
+ if (destination_disk->getDataSourceDescription() == getDataSourceDescription())
+ {
+ /// Use more optimal way.
+ LOG_TRACE(log, "Copying {}/{} to disk {} locally", path, file_name, destination_disk->getName());
+ fs::copy(path / file_name, fullPath(destination_disk, destination_path), fs::copy_options::overwrite_existing);
+ return;
+ }
+
+ LOG_TRACE(log, "Copying {}/{} to disk {} through buffers", path, file_name, destination_disk->getName());
+ IBackupReader::copyFileToDisk(path / file_name, size, destination_disk, destination_path, write_mode, write_settings);
+}
+
+
BackupWriterFile::BackupWriterFile(const String & path_) : path(path_)
{
}
diff --git a/src/Backups/BackupIO_File.h b/src/Backups/BackupIO_File.h
index 1727323ba4e..e1f4324a39f 100644
--- a/src/Backups/BackupIO_File.h
+++ b/src/Backups/BackupIO_File.h
@@ -15,10 +15,13 @@ public:
bool fileExists(const String & file_name) override;
UInt64 getFileSize(const String & file_name) override;
std::unique_ptr readFile(const String & file_name) override;
+ void copyFileToDisk(const String & file_name, size_t size, DiskPtr destination_disk, const String & destination_path,
+ WriteMode write_mode, const WriteSettings & write_settings) override;
DataSourceDescription getDataSourceDescription() const override;
private:
std::filesystem::path path;
+ Poco::Logger * log;
};
class BackupWriterFile : public IBackupWriter
diff --git a/src/Backups/BackupIO_S3.cpp b/src/Backups/BackupIO_S3.cpp
index 2f315e8d488..0a757f94a49 100644
--- a/src/Backups/BackupIO_S3.cpp
+++ b/src/Backups/BackupIO_S3.cpp
@@ -2,6 +2,7 @@
#if USE_AWS_S3
#include
+#include
#include
#include
#include
@@ -96,6 +97,7 @@ BackupReaderS3::BackupReaderS3(
, client(makeS3Client(s3_uri_, access_key_id_, secret_access_key_, context_))
, read_settings(context_->getReadSettings())
, request_settings(context_->getStorageS3Settings().getSettings(s3_uri.uri.toString()).request_settings)
+ , log(&Poco::Logger::get("BackupReaderS3"))
{
request_settings.max_single_read_retries = context_->getSettingsRef().s3_max_single_read_retries; // FIXME: Avoid taking value for endpoint
}
@@ -127,6 +129,27 @@ std::unique_ptr BackupReaderS3::readFile(const String & file
client, s3_uri.bucket, fs::path(s3_uri.key) / file_name, s3_uri.version_id, request_settings, read_settings);
}
+void BackupReaderS3::copyFileToDisk(const String & file_name, size_t size, DiskPtr destination_disk, const String & destination_path,
+ WriteMode write_mode, const WriteSettings & write_settings)
+{
+ LOG_TRACE(log, "Copying {} to disk {}", file_name, destination_disk->getName());
+
+ copyS3FileToDisk(
+ client,
+ s3_uri.bucket,
+ fs::path(s3_uri.key) / file_name,
+ s3_uri.version_id,
+ 0,
+ size,
+ destination_disk,
+ destination_path,
+ write_mode,
+ read_settings,
+ write_settings,
+ request_settings,
+ threadPoolCallbackRunner(BackupsIOThreadPool::get(), "BackupReaderS3"));
+}
+
BackupWriterS3::BackupWriterS3(
const S3::URI & s3_uri_, const String & access_key_id_, const String & secret_access_key_, const ContextPtr & context_)
diff --git a/src/Backups/BackupIO_S3.h b/src/Backups/BackupIO_S3.h
index 9c3132c5689..94e61248428 100644
--- a/src/Backups/BackupIO_S3.h
+++ b/src/Backups/BackupIO_S3.h
@@ -22,6 +22,8 @@ public:
bool fileExists(const String & file_name) override;
UInt64 getFileSize(const String & file_name) override;
std::unique_ptr readFile(const String & file_name) override;
+ void copyFileToDisk(const String & file_name, size_t size, DiskPtr destination_disk, const String & destination_path,
+ WriteMode write_mode, const WriteSettings & write_settings) override;
DataSourceDescription getDataSourceDescription() const override;
private:
@@ -29,6 +31,7 @@ private:
std::shared_ptr client;
ReadSettings read_settings;
S3Settings::RequestSettings request_settings;
+ Poco::Logger * log;
};
diff --git a/src/Backups/BackupImpl.cpp b/src/Backups/BackupImpl.cpp
index fb8abee814a..a89dc2ff2b3 100644
--- a/src/Backups/BackupImpl.cpp
+++ b/src/Backups/BackupImpl.cpp
@@ -79,66 +79,6 @@ namespace
}
-class BackupImpl::BackupEntryFromBackupImpl : public IBackupEntry
-{
-public:
- BackupEntryFromBackupImpl(
- const std::shared_ptr & backup_,
- const String & archive_suffix_,
- const String & data_file_name_,
- UInt64 size_,
- const UInt128 checksum_,
- BackupEntryPtr base_backup_entry_ = {})
- : backup(backup_), archive_suffix(archive_suffix_), data_file_name(data_file_name_), size(size_), checksum(checksum_),
- base_backup_entry(std::move(base_backup_entry_))
- {
- }
-
- std::unique_ptr getReadBuffer() const override
- {
- std::unique_ptr read_buffer;
- if (backup->use_archives)
- read_buffer = backup->getArchiveReader(archive_suffix)->readFile(data_file_name);
- else
- read_buffer = backup->reader->readFile(data_file_name);
- if (base_backup_entry)
- {
- size_t base_size = base_backup_entry->getSize();
- read_buffer = std::make_unique(
- base_backup_entry->getReadBuffer(), base_size, std::move(read_buffer), size - base_size);
- }
- return read_buffer;
- }
-
- UInt64 getSize() const override { return size; }
- std::optional getChecksum() const override { return checksum; }
-
- String getFilePath() const override
- {
- return data_file_name;
- }
-
- DiskPtr tryGetDiskIfExists() const override
- {
- return nullptr;
- }
-
- DataSourceDescription getDataSourceDescription() const override
- {
- return backup->reader->getDataSourceDescription();
- }
-
-
-private:
- const std::shared_ptr backup;
- const String archive_suffix;
- const String data_file_name;
- const UInt64 size;
- const UInt128 checksum;
- BackupEntryPtr base_backup_entry;
-};
-
-
BackupImpl::BackupImpl(
const String & backup_name_for_logging_,
const ArchiveParams & archive_params_,
@@ -645,24 +585,22 @@ SizeAndChecksum BackupImpl::getFileSizeAndChecksum(const String & file_name) con
return {info->size, info->checksum};
}
-BackupEntryPtr BackupImpl::readFile(const String & file_name) const
+std::unique_ptr BackupImpl::readFile(const String & file_name) const
{
return readFile(getFileSizeAndChecksum(file_name));
}
-BackupEntryPtr BackupImpl::readFile(const SizeAndChecksum & size_and_checksum) const
+std::unique_ptr BackupImpl::readFile(const SizeAndChecksum & size_and_checksum) const
{
- std::lock_guard lock{mutex};
if (open_mode != OpenMode::READ)
throw Exception(ErrorCodes::LOGICAL_ERROR, "Backup is not opened for reading");
- ++num_read_files;
- num_read_bytes += size_and_checksum.first;
-
- if (!size_and_checksum.first)
+ if (size_and_checksum.first == 0)
{
/// Entry's data is empty.
- return std::make_unique(nullptr, 0, UInt128{0, 0});
+ std::lock_guard lock{mutex};
+ ++num_read_files;
+ return std::make_unique(static_cast(nullptr), 0);
}
auto info_opt = coordination->getFileInfo(size_and_checksum);
@@ -677,45 +615,149 @@ BackupEntryPtr BackupImpl::readFile(const SizeAndChecksum & size_and_checksum) c
const auto & info = *info_opt;
+ std::unique_ptr read_buffer;
+ std::unique_ptr base_read_buffer;
+
+ if (info.size > info.base_size)
+ {
+ /// Make `read_buffer` if there is data for this backup entry in this backup.
+ if (use_archives)
+ {
+ std::shared_ptr archive_reader;
+ {
+ std::lock_guard lock{mutex};
+ archive_reader = getArchiveReader(info.archive_suffix);
+ }
+ read_buffer = archive_reader->readFile(info.data_file_name);
+ }
+ else
+ {
+ read_buffer = reader->readFile(info.data_file_name);
+ }
+ }
+
+ if (info.base_size)
+ {
+ /// Make `base_read_buffer` if there is data for this backup entry in the base backup.
+ if (!base_backup)
+ {
+ throw Exception(
+ ErrorCodes::NO_BASE_BACKUP,
+ "Backup {}: Entry {} is marked to be read from a base backup, but there is no base backup specified",
+ backup_name_for_logging, formatSizeAndChecksum(size_and_checksum));
+ }
+
+ if (!base_backup->fileExists(std::pair(info.base_size, info.base_checksum)))
+ {
+ throw Exception(
+ ErrorCodes::WRONG_BASE_BACKUP,
+ "Backup {}: Entry {} is marked to be read from a base backup, but doesn't exist there",
+ backup_name_for_logging, formatSizeAndChecksum(size_and_checksum));
+ }
+
+ base_read_buffer = base_backup->readFile(std::pair{info.base_size, info.base_checksum});
+ }
+
+ {
+ /// Update number of read files.
+ std::lock_guard lock{mutex};
+ ++num_read_files;
+ num_read_bytes += info.size;
+ }
+
if (!info.base_size)
{
- /// Data goes completely from this backup, the base backup isn't used.
- return std::make_unique(
- std::static_pointer_cast(shared_from_this()), info.archive_suffix, info.data_file_name, info.size, info.checksum);
+ /// Data comes completely from this backup, the base backup isn't used.
+ return read_buffer;
}
-
- if (!base_backup)
+ else if (info.size == info.base_size)
{
- throw Exception(
- ErrorCodes::NO_BASE_BACKUP,
- "Backup {}: Entry {} is marked to be read from a base backup, but there is no base backup specified",
- backup_name_for_logging, formatSizeAndChecksum(size_and_checksum));
+ /// Data comes completely from the base backup (nothing comes from this backup).
+ return base_read_buffer;
}
-
- if (!base_backup->fileExists(std::pair(info.base_size, info.base_checksum)))
+ else
{
- throw Exception(
- ErrorCodes::WRONG_BASE_BACKUP,
- "Backup {}: Entry {} is marked to be read from a base backup, but doesn't exist there",
- backup_name_for_logging, formatSizeAndChecksum(size_and_checksum));
- }
-
- auto base_entry = base_backup->readFile(std::pair{info.base_size, info.base_checksum});
-
- if (info.size == info.base_size)
- {
- /// Data goes completely from the base backup (nothing goes from this backup).
- return base_entry;
- }
-
- {
- /// The beginning of the data goes from the base backup,
- /// and the ending goes from this backup.
- return std::make_unique(
- static_pointer_cast(shared_from_this()), info.archive_suffix, info.data_file_name, info.size, info.checksum, std::move(base_entry));
+ /// The beginning of the data comes from the base backup,
+ /// and the ending comes from this backup.
+ return std::make_unique(
+ std::move(base_read_buffer), info.base_size, std::move(read_buffer), info.size - info.base_size);
}
}
+size_t BackupImpl::copyFileToDisk(const String & file_name, DiskPtr destination_disk, const String & destination_path,
+ WriteMode write_mode, const WriteSettings & write_settings) const
+{
+ return copyFileToDisk(getFileSizeAndChecksum(file_name), destination_disk, destination_path, write_mode, write_settings);
+}
+
+size_t BackupImpl::copyFileToDisk(const SizeAndChecksum & size_and_checksum, DiskPtr destination_disk, const String & destination_path,
+ WriteMode write_mode, const WriteSettings & write_settings) const
+{
+ if (open_mode != OpenMode::READ)
+ throw Exception(ErrorCodes::LOGICAL_ERROR, "Backup is not opened for reading");
+
+ if (size_and_checksum.first == 0)
+ {
+ /// Entry's data is empty.
+ if (write_mode == WriteMode::Rewrite)
+ {
+ /// Just create an empty file.
+ destination_disk->createFile(destination_path);
+ }
+ std::lock_guard lock{mutex};
+ ++num_read_files;
+ return 0;
+ }
+
+ auto info_opt = coordination->getFileInfo(size_and_checksum);
+ if (!info_opt)
+ {
+ throw Exception(
+ ErrorCodes::BACKUP_ENTRY_NOT_FOUND,
+ "Backup {}: Entry {} not found in the backup",
+ backup_name_for_logging,
+ formatSizeAndChecksum(size_and_checksum));
+ }
+
+ const auto & info = *info_opt;
+
+ bool file_copied = false;
+
+ if (info.size && !info.base_size && !use_archives)
+ {
+ /// Data comes completely from this backup.
+ reader->copyFileToDisk(info.data_file_name, info.size, destination_disk, destination_path, write_mode, write_settings);
+ file_copied = true;
+
+ }
+ else if (info.size && (info.size == info.base_size))
+ {
+ /// Data comes completely from the base backup (nothing comes from this backup).
+ base_backup->copyFileToDisk(std::pair{info.base_size, info.base_checksum}, destination_disk, destination_path, write_mode, write_settings);
+ file_copied = true;
+ }
+
+ if (file_copied)
+ {
+ /// The file is already copied, but `num_read_files` is not updated yet.
+ std::lock_guard lock{mutex};
+ ++num_read_files;
+ num_read_bytes += info.size;
+ }
+ else
+ {
+ /// Use the generic way to copy data. `readFile()` will update `num_read_files`.
+ auto read_buffer = readFile(size_and_checksum);
+ auto write_buffer = destination_disk->writeFile(destination_path, std::min(info.size, DBMS_DEFAULT_BUFFER_SIZE),
+ write_mode, write_settings);
+ copyData(*read_buffer, *write_buffer, info.size);
+ write_buffer->finalize();
+ }
+
+ return info.size;
+}
+
+
namespace
{
diff --git a/src/Backups/BackupImpl.h b/src/Backups/BackupImpl.h
index 4aa300d5021..c33ca7c94ad 100644
--- a/src/Backups/BackupImpl.h
+++ b/src/Backups/BackupImpl.h
@@ -73,8 +73,12 @@ public:
UInt64 getFileSize(const String & file_name) const override;
UInt128 getFileChecksum(const String & file_name) const override;
SizeAndChecksum getFileSizeAndChecksum(const String & file_name) const override;
- BackupEntryPtr readFile(const String & file_name) const override;
- BackupEntryPtr readFile(const SizeAndChecksum & size_and_checksum) const override;
+ std::unique_ptr readFile(const String & file_name) const override;
+ std::unique_ptr readFile(const SizeAndChecksum & size_and_checksum) const override;
+ size_t copyFileToDisk(const String & file_name, DiskPtr destination_disk, const String & destination_path,
+ WriteMode write_mode, const WriteSettings & write_settings) const override;
+ size_t copyFileToDisk(const SizeAndChecksum & size_and_checksum, DiskPtr destination_disk, const String & destination_path,
+ WriteMode write_mode, const WriteSettings & write_settings) const override;
void writeFile(const String & file_name, BackupEntryPtr entry) override;
void finalizeWriting() override;
bool supportsWritingInMultipleThreads() const override { return !use_archives; }
diff --git a/src/Backups/IBackup.h b/src/Backups/IBackup.h
index 208305e3f35..03fab6a25d6 100644
--- a/src/Backups/IBackup.h
+++ b/src/Backups/IBackup.h
@@ -1,6 +1,8 @@
#pragma once
#include
+#include
+#include
#include
#include
@@ -8,7 +10,10 @@
namespace DB
{
class IBackupEntry;
+class IDisk;
using BackupEntryPtr = std::shared_ptr;
+using DiskPtr = std::shared_ptr;
+class SeekableReadBuffer;
/// Represents a backup, i.e. a storage of BackupEntries which can be accessed by their names.
/// A backup can be either incremental or non-incremental. An incremental backup doesn't store
@@ -95,8 +100,15 @@ public:
virtual SizeAndChecksum getFileSizeAndChecksum(const String & file_name) const = 0;
/// Reads an entry from the backup.
- virtual BackupEntryPtr readFile(const String & file_name) const = 0;
- virtual BackupEntryPtr readFile(const SizeAndChecksum & size_and_checksum) const = 0;
+ virtual std::unique_ptr readFile(const String & file_name) const = 0;
+ virtual std::unique_ptr readFile(const SizeAndChecksum & size_and_checksum) const = 0;
+
+ /// Copies a file from the backup to a specified destination disk. Returns the number of bytes written.
+ virtual size_t copyFileToDisk(const String & file_name, DiskPtr destination_disk, const String & destination_path,
+ WriteMode write_mode = WriteMode::Rewrite, const WriteSettings & write_settings = {}) const = 0;
+
+ virtual size_t copyFileToDisk(const SizeAndChecksum & size_and_checksum, DiskPtr destination_disk, const String & destination_path,
+ WriteMode write_mode = WriteMode::Rewrite, const WriteSettings & write_settings = {}) const = 0;
/// Puts a new entry to the backup.
virtual void writeFile(const String & file_name, BackupEntryPtr entry) = 0;
diff --git a/src/Backups/RestorerFromBackup.cpp b/src/Backups/RestorerFromBackup.cpp
index b19cbaf975c..68a68379f79 100644
--- a/src/Backups/RestorerFromBackup.cpp
+++ b/src/Backups/RestorerFromBackup.cpp
@@ -316,7 +316,7 @@ void RestorerFromBackup::findTableInBackup(const QualifiedTableName & table_name
= *root_path_in_use / "data" / escapeForFileName(table_name_in_backup.database) / escapeForFileName(table_name_in_backup.table);
}
- auto read_buffer = backup->readFile(*metadata_path)->getReadBuffer();
+ auto read_buffer = backup->readFile(*metadata_path);
String create_query_str;
readStringUntilEOF(create_query_str, *read_buffer);
read_buffer.reset();
@@ -410,7 +410,7 @@ void RestorerFromBackup::findDatabaseInBackup(const String & database_name_in_ba
if (metadata_path)
{
- auto read_buffer = backup->readFile(*metadata_path)->getReadBuffer();
+ auto read_buffer = backup->readFile(*metadata_path);
String create_query_str;
readStringUntilEOF(create_query_str, *read_buffer);
read_buffer.reset();
diff --git a/src/Client/Connection.cpp b/src/Client/Connection.cpp
index 4f7cf893328..6643a94c3bc 100644
--- a/src/Client/Connection.cpp
+++ b/src/Client/Connection.cpp
@@ -216,6 +216,7 @@ void Connection::disconnect()
socket->close();
socket = nullptr;
connected = false;
+ nonce.reset();
}
@@ -324,6 +325,14 @@ void Connection::receiveHello()
password_complexity_rules.push_back({std::move(original_pattern), std::move(exception_message)});
}
}
+ if (server_revision >= DBMS_MIN_REVISION_WITH_INTERSERVER_SECRET_V2)
+ {
+ chassert(!nonce.has_value());
+
+ UInt64 read_nonce;
+ readIntBinary(read_nonce, *in);
+ nonce.emplace(read_nonce);
+ }
}
else if (packet_type == Protocol::Server::Exception)
receiveException()->rethrow();
@@ -584,6 +593,9 @@ void Connection::sendQuery(
{
#if USE_SSL
std::string data(salt);
+ // For backward compatibility
+ if (nonce.has_value())
+ data += std::to_string(nonce.value());
data += cluster_secret;
data += query;
data += query_id;
@@ -593,8 +605,8 @@ void Connection::sendQuery(
std::string hash = encodeSHA256(data);
writeStringBinary(hash, *out);
#else
- throw Exception(ErrorCodes::SUPPORT_IS_DISABLED,
- "Inter-server secret support is disabled, because ClickHouse was built without SSL library");
+ throw Exception(ErrorCodes::SUPPORT_IS_DISABLED,
+ "Inter-server secret support is disabled, because ClickHouse was built without SSL library");
#endif
}
else
diff --git a/src/Client/Connection.h b/src/Client/Connection.h
index d806c5e8b1f..b86567e2ed0 100644
--- a/src/Client/Connection.h
+++ b/src/Client/Connection.h
@@ -167,7 +167,10 @@ private:
/// For inter-server authorization
String cluster;
String cluster_secret;
+ /// For DBMS_MIN_REVISION_WITH_INTERSERVER_SECRET
String salt;
+ /// For DBMS_MIN_REVISION_WITH_INTERSERVER_SECRET_V2
+ std::optional nonce;
/// Address is resolved during the first connection (or the following reconnects)
/// Use it only for logging purposes
diff --git a/src/Common/CurrentMetrics.cpp b/src/Common/CurrentMetrics.cpp
index d696070aa41..5b20d98aa01 100644
--- a/src/Common/CurrentMetrics.cpp
+++ b/src/Common/CurrentMetrics.cpp
@@ -11,13 +11,21 @@
M(ReplicatedSend, "Number of data parts being sent to replicas") \
M(ReplicatedChecks, "Number of data parts checking for consistency") \
M(BackgroundMergesAndMutationsPoolTask, "Number of active merges and mutations in an associated background pool") \
+ M(BackgroundMergesAndMutationsPoolSize, "Limit on number of active merges and mutations in an associated background pool") \
M(BackgroundFetchesPoolTask, "Number of active fetches in an associated background pool") \
+ M(BackgroundFetchesPoolSize, "Limit on number of simultaneous fetches in an associated background pool") \
M(BackgroundCommonPoolTask, "Number of active tasks in an associated background pool") \
+ M(BackgroundCommonPoolSize, "Limit on number of tasks in an associated background pool") \
M(BackgroundMovePoolTask, "Number of active tasks in BackgroundProcessingPool for moves") \
+ M(BackgroundMovePoolSize, "Limit on number of tasks in BackgroundProcessingPool for moves") \
M(BackgroundSchedulePoolTask, "Number of active tasks in BackgroundSchedulePool. This pool is used for periodic ReplicatedMergeTree tasks, like cleaning old data parts, altering data parts, replica re-initialization, etc.") \
+ M(BackgroundSchedulePoolSize, "Limit on number of tasks in BackgroundSchedulePool. This pool is used for periodic ReplicatedMergeTree tasks, like cleaning old data parts, altering data parts, replica re-initialization, etc.") \
M(BackgroundBufferFlushSchedulePoolTask, "Number of active tasks in BackgroundBufferFlushSchedulePool. This pool is used for periodic Buffer flushes") \
+ M(BackgroundBufferFlushSchedulePoolSize, "Limit on number of tasks in BackgroundBufferFlushSchedulePool") \
M(BackgroundDistributedSchedulePoolTask, "Number of active tasks in BackgroundDistributedSchedulePool. This pool is used for distributed sends that is done in background.") \
+ M(BackgroundDistributedSchedulePoolSize, "Limit on number of tasks in BackgroundDistributedSchedulePool") \
M(BackgroundMessageBrokerSchedulePoolTask, "Number of active tasks in BackgroundProcessingPool for message streaming") \
+ M(BackgroundMessageBrokerSchedulePoolSize, "Limit on number of tasks in BackgroundProcessingPool for message streaming") \
M(CacheDictionaryUpdateQueueBatches, "Number of 'batches' (a set of keys) in update queue in CacheDictionaries.") \
M(CacheDictionaryUpdateQueueKeys, "Exact number of keys in update queue in CacheDictionaries.") \
M(DiskSpaceReservedForMerge, "Disk space reserved for currently running background merges. It is slightly more than the total size of currently merging parts.") \
diff --git a/src/Common/LRUCachePolicy.h b/src/Common/LRUCachePolicy.h
index 3c069eb276b..b6c0ef0d3ef 100644
--- a/src/Common/LRUCachePolicy.h
+++ b/src/Common/LRUCachePolicy.h
@@ -31,7 +31,7 @@ public:
* max_elements_size == 0 means no elements size restrictions.
*/
explicit LRUCachePolicy(size_t max_size_, size_t max_elements_size_ = 0, OnWeightLossFunction on_weight_loss_function_ = {})
- : max_size(std::max(static_cast(1), max_size_)), max_elements_size(max_elements_size_)
+ : max_size(std::max(1uz, max_size_)), max_elements_size(max_elements_size_)
{
Base::on_weight_loss_function = on_weight_loss_function_;
}
diff --git a/src/Common/RWLock.cpp b/src/Common/RWLock.cpp
index c2419d0c1b7..2d0fcfa3e74 100644
--- a/src/Common/RWLock.cpp
+++ b/src/Common/RWLock.cpp
@@ -97,7 +97,7 @@ private:
* Note: "SM" in the commentaries below stands for STATE MODIFICATION
*/
RWLockImpl::LockHolder
-RWLockImpl::getLock(RWLockImpl::Type type, const String & query_id, const std::chrono::milliseconds & lock_timeout_ms)
+RWLockImpl::getLock(RWLockImpl::Type type, const String & query_id, const std::chrono::milliseconds & lock_timeout_ms, bool throw_in_fast_path)
{
const auto lock_deadline_tp =
(lock_timeout_ms == std::chrono::milliseconds(0))
@@ -130,11 +130,19 @@ RWLockImpl::getLock(RWLockImpl::Type type, const String & query_id, const std::c
if (owner_query_it != owner_queries.end())
{
if (wrlock_owner != writers_queue.end())
- throw Exception(ErrorCodes::LOGICAL_ERROR, "RWLockImpl::getLock(): RWLock is already locked in exclusive mode");
+ {
+ if (throw_in_fast_path)
+ throw Exception(ErrorCodes::LOGICAL_ERROR, "RWLockImpl::getLock(): RWLock is already locked in exclusive mode");
+ return nullptr;
+ }
/// Lock upgrading is not supported
if (type == Write)
- throw Exception(ErrorCodes::LOGICAL_ERROR, "RWLockImpl::getLock(): Cannot acquire exclusive lock while RWLock is already locked");
+ {
+ if (throw_in_fast_path)
+ throw Exception(ErrorCodes::LOGICAL_ERROR, "RWLockImpl::getLock(): Cannot acquire exclusive lock while RWLock is already locked");
+ return nullptr;
+ }
/// N.B. Type is Read here, query_id is not empty and it_query is a valid iterator
++owner_query_it->second; /// SM1: nothrow
diff --git a/src/Common/RWLock.h b/src/Common/RWLock.h
index cb4cf7f9200..dd965b65026 100644
--- a/src/Common/RWLock.h
+++ b/src/Common/RWLock.h
@@ -56,7 +56,7 @@ public:
/// Empty query_id means the lock is acquired from outside of query context (e.g. in a background thread).
LockHolder getLock(Type type, const String & query_id,
- const std::chrono::milliseconds & lock_timeout_ms = std::chrono::milliseconds(0));
+ const std::chrono::milliseconds & lock_timeout_ms = std::chrono::milliseconds(0), bool throw_in_fast_path = true);
/// Use as query_id to acquire a lock outside the query context.
inline static const String NO_QUERY = String();
diff --git a/src/Common/ZooKeeper/ZooKeeperLock.cpp b/src/Common/ZooKeeper/ZooKeeperLock.cpp
index 1200dcdb533..a52c942a35f 100644
--- a/src/Common/ZooKeeper/ZooKeeperLock.cpp
+++ b/src/Common/ZooKeeper/ZooKeeperLock.cpp
@@ -41,6 +41,16 @@ ZooKeeperLock::~ZooKeeperLock()
}
}
+bool ZooKeeperLock::isLocked() const
+{
+ return locked;
+}
+
+const std::string & ZooKeeperLock::getLockPath() const
+{
+ return lock_path;
+}
+
void ZooKeeperLock::unlock()
{
if (!locked)
diff --git a/src/Common/ZooKeeper/ZooKeeperLock.h b/src/Common/ZooKeeper/ZooKeeperLock.h
index f249e69dcc3..755ca1333b8 100644
--- a/src/Common/ZooKeeper/ZooKeeperLock.h
+++ b/src/Common/ZooKeeper/ZooKeeperLock.h
@@ -37,6 +37,8 @@ public:
void unlock();
bool tryLock();
+ bool isLocked() const;
+ const std::string & getLockPath() const;
private:
zkutil::ZooKeeperPtr zookeeper;
diff --git a/src/Compression/CompressionCodecDelta.cpp b/src/Compression/CompressionCodecDelta.cpp
index 6d6078b9ee1..37f9230da14 100644
--- a/src/Compression/CompressionCodecDelta.cpp
+++ b/src/Compression/CompressionCodecDelta.cpp
@@ -193,7 +193,8 @@ void registerCodecDelta(CompressionCodecFactory & factory)
UInt8 method_code = static_cast(CompressionMethodByte::Delta);
auto codec_builder = [&](const ASTPtr & arguments, const IDataType * column_type) -> CompressionCodecPtr
{
- UInt8 delta_bytes_size = 0;
+ /// Default bytes size is 1.
+ UInt8 delta_bytes_size = 1;
if (arguments && !arguments->children.empty())
{
@@ -202,8 +203,8 @@ void registerCodecDelta(CompressionCodecFactory & factory)
const auto children = arguments->children;
const auto * literal = children[0]->as();
- if (!literal)
- throw Exception(ErrorCodes::ILLEGAL_CODEC_PARAMETER, "Delta codec argument must be integer");
+ if (!literal || literal->value.getType() != Field::Types::Which::UInt64)
+ throw Exception(ErrorCodes::ILLEGAL_CODEC_PARAMETER, "Delta codec argument must be unsigned integer");
size_t user_bytes_size = literal->value.safeGet();
if (user_bytes_size != 1 && user_bytes_size != 2 && user_bytes_size != 4 && user_bytes_size != 8)
diff --git a/src/Compression/CompressionCodecDoubleDelta.cpp b/src/Compression/CompressionCodecDoubleDelta.cpp
index 782675dfd32..dea15f99a5a 100644
--- a/src/Compression/CompressionCodecDoubleDelta.cpp
+++ b/src/Compression/CompressionCodecDoubleDelta.cpp
@@ -7,7 +7,7 @@
#include
#include
#include
-#include
+#include
#include
#include
@@ -31,7 +31,7 @@ namespace DB
/** DoubleDelta column codec implementation.
*
* Based on Gorilla paper: http://www.vldb.org/pvldb/vol8/p1816-teller.pdf, which was extended
- * to support 64bit types. The drawback is 1 extra bit for 32-byte wide deltas: 5-bit prefix
+ * to support 64bit types. The drawback is 1 extra bit for 32-bit wide deltas: 5-bit prefix
* instead of 4-bit prefix.
*
* This codec is best used against monotonic integer sequences with constant (or almost constant)
@@ -145,6 +145,8 @@ namespace ErrorCodes
extern const int CANNOT_COMPRESS;
extern const int CANNOT_DECOMPRESS;
extern const int BAD_ARGUMENTS;
+ extern const int ILLEGAL_SYNTAX_FOR_CODEC_TYPE;
+ extern const int ILLEGAL_CODEC_PARAMETER;
}
namespace
@@ -549,10 +551,28 @@ void registerCodecDoubleDelta(CompressionCodecFactory & factory)
factory.registerCompressionCodecWithType("DoubleDelta", method_code,
[&](const ASTPtr & arguments, const IDataType * column_type) -> CompressionCodecPtr
{
- if (arguments)
- throw Exception(ErrorCodes::BAD_ARGUMENTS, "Codec DoubleDelta does not accept any arguments");
+ /// Default bytes size is 1.
+ UInt8 data_bytes_size = 1;
+ if (arguments && !arguments->children.empty())
+ {
+ if (arguments->children.size() > 1)
+ throw Exception(ErrorCodes::ILLEGAL_SYNTAX_FOR_CODEC_TYPE, "DoubleDelta codec must have 1 parameter, given {}", arguments->children.size());
+
+ const auto children = arguments->children;
+ const auto * literal = children[0]->as();
+ if (!literal || literal->value.getType() != Field::Types::Which::UInt64)
+ throw Exception(ErrorCodes::ILLEGAL_CODEC_PARAMETER, "DoubleDelta codec argument must be unsigned integer");
+
+ size_t user_bytes_size = literal->value.safeGet();
+ if (user_bytes_size != 1 && user_bytes_size != 2 && user_bytes_size != 4 && user_bytes_size != 8)
+ throw Exception(ErrorCodes::ILLEGAL_CODEC_PARAMETER, "Argument value for DoubleDelta codec can be 1, 2, 4 or 8, given {}", user_bytes_size);
+ data_bytes_size = static_cast(user_bytes_size);
+ }
+ else if (column_type)
+ {
+ data_bytes_size = getDataBytesSize(column_type);
+ }
- UInt8 data_bytes_size = column_type ? getDataBytesSize(column_type) : 0;
return std::make_shared(data_bytes_size);
});
}
diff --git a/src/Compression/CompressionCodecFPC.cpp b/src/Compression/CompressionCodecFPC.cpp
index 31b12b762c8..8c3e518ed62 100644
--- a/src/Compression/CompressionCodecFPC.cpp
+++ b/src/Compression/CompressionCodecFPC.cpp
@@ -109,28 +109,42 @@ void registerCodecFPC(CompressionCodecFactory & factory)
auto method_code = static_cast(CompressionMethodByte::FPC);
auto codec_builder = [&](const ASTPtr & arguments, const IDataType * column_type) -> CompressionCodecPtr
{
- UInt8 float_width = 0;
+ /// Set default float width to 4.
+ UInt8 float_width = 4;
if (column_type != nullptr)
float_width = getFloatBytesSize(*column_type);
UInt8 level = CompressionCodecFPC::DEFAULT_COMPRESSION_LEVEL;
if (arguments && !arguments->children.empty())
{
- if (arguments->children.size() > 1)
+ if (arguments->children.size() > 2)
{
throw Exception(ErrorCodes::ILLEGAL_SYNTAX_FOR_CODEC_TYPE,
- "FPC codec must have 1 parameter, given {}", arguments->children.size());
+ "FPC codec must have from 0 to 2 parameters, given {}", arguments->children.size());
}
const auto * literal = arguments->children.front()->as();
- if (!literal)
- throw Exception(ErrorCodes::ILLEGAL_CODEC_PARAMETER, "FPC codec argument must be integer");
+ if (!literal || literal->value.getType() != Field::Types::Which::UInt64)
+ throw Exception(ErrorCodes::ILLEGAL_CODEC_PARAMETER, "FPC codec argument must be unsigned integer");
level = literal->value.safeGet();
if (level < 1 || level > CompressionCodecFPC::MAX_COMPRESSION_LEVEL)
throw Exception(ErrorCodes::ILLEGAL_CODEC_PARAMETER, "FPC codec level must be between {} and {}",
1, static_cast(CompressionCodecFPC::MAX_COMPRESSION_LEVEL));
+
+ if (arguments->children.size() == 2)
+ {
+ literal = arguments->children[1]->as();
+ if (!literal || !isInt64OrUInt64FieldType(literal->value.getType()))
+ throw Exception(ErrorCodes::ILLEGAL_CODEC_PARAMETER, "FPC codec argument must be unsigned integer");
+
+ size_t user_float_width = literal->value.safeGet();
+ if (user_float_width != 4 && user_float_width != 8)
+ throw Exception(ErrorCodes::ILLEGAL_CODEC_PARAMETER, "Float size for FPC codec can be 4 or 8, given {}", user_float_width);
+ float_width = static_cast(user_float_width);
+ }
}
+
return std::make_shared(float_width, level);
};
factory.registerCompressionCodecWithType("FPC", method_code, codec_builder);
diff --git a/src/Compression/CompressionCodecGorilla.cpp b/src/Compression/CompressionCodecGorilla.cpp
index d68648bd83c..568640153ac 100644
--- a/src/Compression/CompressionCodecGorilla.cpp
+++ b/src/Compression/CompressionCodecGorilla.cpp
@@ -7,6 +7,7 @@
#include
#include
#include
+#include
#include
#include
#include
@@ -134,6 +135,8 @@ namespace ErrorCodes
extern const int CANNOT_COMPRESS;
extern const int CANNOT_DECOMPRESS;
extern const int BAD_ARGUMENTS;
+ extern const int ILLEGAL_SYNTAX_FOR_CODEC_TYPE;
+ extern const int ILLEGAL_CODEC_PARAMETER;
}
namespace
@@ -445,10 +448,28 @@ void registerCodecGorilla(CompressionCodecFactory & factory)
UInt8 method_code = static_cast(CompressionMethodByte::Gorilla);
auto codec_builder = [&](const ASTPtr & arguments, const IDataType * column_type) -> CompressionCodecPtr
{
- if (arguments)
- throw Exception(ErrorCodes::BAD_ARGUMENTS, "Codec Gorilla does not accept any arguments");
+ /// Default bytes size is 1
+ UInt8 data_bytes_size = 1;
+ if (arguments && !arguments->children.empty())
+ {
+ if (arguments->children.size() > 1)
+ throw Exception(ErrorCodes::ILLEGAL_SYNTAX_FOR_CODEC_TYPE, "Gorilla codec must have 1 parameter, given {}", arguments->children.size());
+
+ const auto children = arguments->children;
+ const auto * literal = children[0]->as();
+ if (!literal || literal->value.getType() != Field::Types::Which::UInt64)
+ throw Exception(ErrorCodes::ILLEGAL_CODEC_PARAMETER, "Gorilla codec argument must be unsigned integer");
+
+ size_t user_bytes_size = literal->value.safeGet();
+ if (user_bytes_size != 1 && user_bytes_size != 2 && user_bytes_size != 4 && user_bytes_size != 8)
+ throw Exception(ErrorCodes::ILLEGAL_CODEC_PARAMETER, "Argument value for Gorilla codec can be 1, 2, 4 or 8, given {}", user_bytes_size);
+ data_bytes_size = static_cast(user_bytes_size);
+ }
+ else if (column_type)
+ {
+ data_bytes_size = getDataBytesSize(column_type);
+ }
- UInt8 data_bytes_size = column_type ? getDataBytesSize(column_type) : 0;
return std::make_shared(data_bytes_size);
};
factory.registerCompressionCodecWithType("Gorilla", method_code, codec_builder);
diff --git a/src/Compression/CompressionCodecT64.cpp b/src/Compression/CompressionCodecT64.cpp
index e7f1615128a..1f8331c8a5f 100644
--- a/src/Compression/CompressionCodecT64.cpp
+++ b/src/Compression/CompressionCodecT64.cpp
@@ -33,7 +33,8 @@ public:
Bit
};
- CompressionCodecT64(TypeIndex type_idx_, Variant variant_);
+ // type_idx_ is required for compression, but not for decompression.
+ CompressionCodecT64(std::optional type_idx_, Variant variant_);
uint8_t getMethodByte() const override;
@@ -53,7 +54,7 @@ protected:
bool isGenericCompression() const override { return false; }
private:
- TypeIndex type_idx;
+ std::optional type_idx;
Variant variant;
};
@@ -91,9 +92,12 @@ enum class MagicNumber : uint8_t
IPv4 = 21,
};
-MagicNumber serializeTypeId(TypeIndex type_id)
+MagicNumber serializeTypeId(std::optional type_id)
{
- switch (type_id)
+ if (!type_id)
+ throw Exception(ErrorCodes::CANNOT_COMPRESS, "T64 codec doesn't support compression without information about column type");
+
+ switch (*type_id)
{
case TypeIndex::UInt8: return MagicNumber::UInt8;
case TypeIndex::UInt16: return MagicNumber::UInt16;
@@ -115,7 +119,7 @@ MagicNumber serializeTypeId(TypeIndex type_id)
break;
}
- throw Exception(ErrorCodes::LOGICAL_ERROR, "Type is not supported by T64 codec: {}", static_cast(type_id));
+ throw Exception(ErrorCodes::LOGICAL_ERROR, "Type is not supported by T64 codec: {}", static_cast(*type_id));
}
TypeIndex deserializeTypeId(uint8_t serialized_type_id)
@@ -632,7 +636,7 @@ UInt32 CompressionCodecT64::doCompressData(const char * src, UInt32 src_size, ch
memcpy(dst, &cookie, 1);
dst += 1;
- switch (baseType(type_idx))
+ switch (baseType(*type_idx))
{
case TypeIndex::Int8:
return 1 + compressData(src, src_size, dst, variant);
@@ -699,7 +703,7 @@ uint8_t CompressionCodecT64::getMethodByte() const
return codecId();
}
-CompressionCodecT64::CompressionCodecT64(TypeIndex type_idx_, Variant variant_)
+CompressionCodecT64::CompressionCodecT64(std::optional type_idx_, Variant variant_)
: type_idx(type_idx_)
, variant(variant_)
{
@@ -712,7 +716,7 @@ CompressionCodecT64::CompressionCodecT64(TypeIndex type_idx_, Variant variant_)
void CompressionCodecT64::updateHash(SipHash & hash) const
{
getCodecDesc()->updateTreeHash(hash);
- hash.update(type_idx);
+ hash.update(type_idx.value_or(TypeIndex::Nothing));
hash.update(variant);
}
@@ -742,9 +746,14 @@ void registerCodecT64(CompressionCodecFactory & factory)
throw Exception(ErrorCodes::ILLEGAL_CODEC_PARAMETER, "Wrong modification for T64: {}", name);
}
- auto type_idx = typeIdx(type);
- if (type && type_idx == TypeIndex::Nothing)
- throw Exception(ErrorCodes::ILLEGAL_SYNTAX_FOR_CODEC_TYPE, "T64 codec is not supported for specified type {}", type->getName());
+ std::optional type_idx;
+ if (type)
+ {
+ type_idx = typeIdx(type);
+ if (type_idx == TypeIndex::Nothing)
+ throw Exception(
+ ErrorCodes::ILLEGAL_SYNTAX_FOR_CODEC_TYPE, "T64 codec is not supported for specified type {}", type->getName());
+ }
return std::make_shared(type_idx, variant);
};
diff --git a/src/Core/BackgroundSchedulePool.cpp b/src/Core/BackgroundSchedulePool.cpp
index 993cfb6ef04..5384ee7f961 100644
--- a/src/Core/BackgroundSchedulePool.cpp
+++ b/src/Core/BackgroundSchedulePool.cpp
@@ -149,8 +149,9 @@ Coordination::WatchCallback BackgroundSchedulePoolTaskInfo::getWatchCallback()
}
-BackgroundSchedulePool::BackgroundSchedulePool(size_t size_, CurrentMetrics::Metric tasks_metric_, const char *thread_name_)
+BackgroundSchedulePool::BackgroundSchedulePool(size_t size_, CurrentMetrics::Metric tasks_metric_, CurrentMetrics::Metric size_metric_, const char *thread_name_)
: tasks_metric(tasks_metric_)
+ , size_metric(size_metric_, size_)
, thread_name(thread_name_)
{
LOG_INFO(&Poco::Logger::get("BackgroundSchedulePool/" + thread_name), "Create BackgroundSchedulePool with {} threads", size_);
@@ -177,6 +178,8 @@ void BackgroundSchedulePool::increaseThreadsCount(size_t new_threads_count)
threads.resize(new_threads_count);
for (size_t i = old_threads_count; i < new_threads_count; ++i)
threads[i] = ThreadFromGlobalPoolNoTracingContextPropagation([this] { threadFunction(); });
+
+ size_metric.changeTo(new_threads_count);
}
diff --git a/src/Core/BackgroundSchedulePool.h b/src/Core/BackgroundSchedulePool.h
index 0fb70b1f715..ef6fbfa68e9 100644
--- a/src/Core/BackgroundSchedulePool.h
+++ b/src/Core/BackgroundSchedulePool.h
@@ -54,7 +54,7 @@ public:
void increaseThreadsCount(size_t new_threads_count);
/// thread_name_ cannot be longer then 13 bytes (2 bytes is reserved for "/D" suffix for delayExecutionThreadFunction())
- BackgroundSchedulePool(size_t size_, CurrentMetrics::Metric tasks_metric_, const char *thread_name_);
+ BackgroundSchedulePool(size_t size_, CurrentMetrics::Metric tasks_metric_, CurrentMetrics::Metric size_metric_, const char *thread_name_);
~BackgroundSchedulePool();
private:
@@ -91,6 +91,7 @@ private:
DelayedTasks delayed_tasks;
CurrentMetrics::Metric tasks_metric;
+ CurrentMetrics::Increment size_metric;
std::string thread_name;
};
diff --git a/src/Core/ProtocolDefines.h b/src/Core/ProtocolDefines.h
index 3bbfb95f020..f687145668a 100644
--- a/src/Core/ProtocolDefines.h
+++ b/src/Core/ProtocolDefines.h
@@ -35,7 +35,6 @@
#define DBMS_MERGE_TREE_PART_INFO_VERSION 1
-/// Minimum revision supporting interserver secret.
#define DBMS_MIN_REVISION_WITH_INTERSERVER_SECRET 54441
#define DBMS_MIN_REVISION_WITH_X_FORWARDED_FOR_IN_CLIENT_INFO 54443
@@ -54,7 +53,7 @@
/// NOTE: DBMS_TCP_PROTOCOL_VERSION has nothing common with VERSION_REVISION,
/// later is just a number for server version (one number instead of commit SHA)
/// for simplicity (sometimes it may be more convenient in some use cases).
-#define DBMS_TCP_PROTOCOL_VERSION 54461
+#define DBMS_TCP_PROTOCOL_VERSION 54462
#define DBMS_MIN_PROTOCOL_VERSION_WITH_INITIAL_QUERY_START_TIME 54449
@@ -72,3 +71,5 @@
#define DBMS_MIN_PROTOCOL_VERSION_WITH_SERVER_QUERY_TIME_IN_PROGRESS 54460
#define DBMS_MIN_PROTOCOL_VERSION_WITH_PASSWORD_COMPLEXITY_RULES 54461
+
+#define DBMS_MIN_REVISION_WITH_INTERSERVER_SECRET_V2 54462
diff --git a/src/Core/Settings.h b/src/Core/Settings.h
index 2ebf50e766e..755e52adb06 100644
--- a/src/Core/Settings.h
+++ b/src/Core/Settings.h
@@ -50,7 +50,7 @@ class IColumn;
M(UInt64, max_download_buffer_size, 10*1024*1024, "The maximal size of buffer for parallel downloading (e.g. for URL engine) per each thread.", 0) \
M(UInt64, max_read_buffer_size, DBMS_DEFAULT_BUFFER_SIZE, "The maximum size of the buffer to read from the filesystem.", 0) \
M(UInt64, max_distributed_connections, 1024, "The maximum number of connections for distributed processing of one query (should be greater than max_threads).", 0) \
- M(UInt64, max_query_size, DBMS_DEFAULT_MAX_QUERY_SIZE, "Which part of the query can be read into RAM for parsing (the remaining data for INSERT, if any, is read later)", 0) \
+ M(UInt64, max_query_size, DBMS_DEFAULT_MAX_QUERY_SIZE, "The maximum number of bytes of a query string parsed by the SQL parser. Data in the VALUES clause of INSERT queries is processed by a separate stream parser (that consumes O(1) RAM) and not affected by this restriction.", 0) \
M(UInt64, interactive_delay, 100000, "The interval in microseconds to check if the request is cancelled, and to send progress info.", 0) \
M(Seconds, connect_timeout, DBMS_DEFAULT_CONNECT_TIMEOUT_SEC, "Connection timeout if there are no replicas.", 0) \
M(Milliseconds, connect_timeout_with_failover_ms, 50, "Connection timeout for selecting first healthy replica.", 0) \
@@ -253,6 +253,8 @@ class IColumn;
M(Bool, send_progress_in_http_headers, false, "Send progress notifications using X-ClickHouse-Progress headers. Some clients do not support high amount of HTTP headers (Python requests in particular), so it is disabled by default.", 0) \
\
M(UInt64, http_headers_progress_interval_ms, 100, "Do not send HTTP headers X-ClickHouse-Progress more frequently than at each specified interval.", 0) \
+ M(Bool, http_wait_end_of_query, false, "Enable HTTP response buffering on the server-side.", 0) \
+ M(UInt64, http_response_buffer_size, false, "The number of bytes to buffer in the server memory before sending a HTTP response to the client or flushing to disk (when http_wait_end_of_query is enabled).", 0) \
\
M(Bool, fsync_metadata, true, "Do fsync after changing metadata for tables and databases (.sql files). Could be disabled in case of poor latency on server with high load of DDL queries and high load of disk subsystem.", 0) \
\
@@ -716,6 +718,7 @@ class IColumn;
M(Float, insert_keeper_fault_injection_probability, 0.0f, "Approximate probability of failure for a keeper request during insert. Valid value is in interval [0.0f, 1.0f]", 0) \
M(UInt64, insert_keeper_fault_injection_seed, 0, "0 - random seed, otherwise the setting value", 0) \
M(Bool, force_aggregation_in_order, false, "Force use of aggregation in order on remote nodes during distributed aggregation. PLEASE, NEVER CHANGE THIS SETTING VALUE MANUALLY!", IMPORTANT) \
+ M(UInt64, http_max_request_param_data_size, 10_MiB, "Limit on size of request data used as a query parameter in predefined HTTP requests.", 0) \
// End of COMMON_SETTINGS
// Please add settings related to formats into the FORMAT_FACTORY_SETTINGS and move obsolete settings to OBSOLETE_SETTINGS.
diff --git a/src/Databases/DatabaseMemory.cpp b/src/Databases/DatabaseMemory.cpp
index fda0bbe8032..3ede69d5362 100644
--- a/src/Databases/DatabaseMemory.cpp
+++ b/src/Databases/DatabaseMemory.cpp
@@ -26,7 +26,12 @@ namespace ErrorCodes
DatabaseMemory::DatabaseMemory(const String & name_, ContextPtr context_)
: DatabaseWithOwnTablesBase(name_, "DatabaseMemory(" + name_ + ")", context_)
, data_path("data/" + escapeForFileName(database_name) + "/")
-{}
+{
+ /// Temporary database should not have any data on the moment of its creation
+ /// In case of sudden server shutdown remove database folder of temporary database
+ if (name_ == DatabaseCatalog::TEMPORARY_DATABASE)
+ removeDataPath(context_);
+}
void DatabaseMemory::createTable(
ContextPtr /*context*/,
@@ -71,8 +76,7 @@ void DatabaseMemory::dropTable(
if (table->storesDataOnDisk())
{
- assert(getDatabaseName() != DatabaseCatalog::TEMPORARY_DATABASE);
- fs::path table_data_dir{getTableDataPath(table_name)};
+ fs::path table_data_dir{fs::path{getContext()->getPath()} / getTableDataPath(table_name)};
if (fs::exists(table_data_dir))
fs::remove_all(table_data_dir);
}
@@ -80,7 +84,6 @@ void DatabaseMemory::dropTable(
catch (...)
{
std::lock_guard lock{mutex};
- assert(database_name != DatabaseCatalog::TEMPORARY_DATABASE);
attachTableUnlocked(table_name, table);
throw;
}
@@ -129,10 +132,15 @@ UUID DatabaseMemory::tryGetTableUUID(const String & table_name) const
return UUIDHelpers::Nil;
}
+void DatabaseMemory::removeDataPath(ContextPtr local_context)
+{
+ std::filesystem::remove_all(local_context->getPath() + data_path);
+}
+
void DatabaseMemory::drop(ContextPtr local_context)
{
/// Remove data on explicit DROP DATABASE
- std::filesystem::remove_all(local_context->getPath() + data_path);
+ removeDataPath(local_context);
}
void DatabaseMemory::alterTable(ContextPtr local_context, const StorageID & table_id, const StorageInMemoryMetadata & metadata)
diff --git a/src/Databases/DatabaseMemory.h b/src/Databases/DatabaseMemory.h
index 6262543b0c1..0f703a0b46e 100644
--- a/src/Databases/DatabaseMemory.h
+++ b/src/Databases/DatabaseMemory.h
@@ -53,6 +53,8 @@ public:
std::vector> getTablesForBackup(const FilterByNameFunction & filter, const ContextPtr & local_context) const override;
private:
+ void removeDataPath(ContextPtr local_context);
+
const String data_path;
using NameToASTCreate = std::unordered_map;
NameToASTCreate create_queries TSA_GUARDED_BY(mutex);
diff --git a/src/Disks/FakeDiskTransaction.h b/src/Disks/FakeDiskTransaction.h
index 46be885739e..5dae17041e1 100644
--- a/src/Disks/FakeDiskTransaction.h
+++ b/src/Disks/FakeDiskTransaction.h
@@ -68,6 +68,15 @@ public:
return disk.writeFile(path, buf_size, mode, settings);
}
+ void writeFileUsingCustomWriteObject(
+ const String & path,
+ WriteMode mode,
+ std::function & object_attributes)>
+ custom_write_object_function) override
+ {
+ disk.writeFileUsingCustomWriteObject(path, mode, std::move(custom_write_object_function));
+ }
+
void removeFile(const std::string & path) override
{
disk.removeFile(path);
diff --git a/src/Disks/IDisk.cpp b/src/Disks/IDisk.cpp
index 2a60f32929c..4969cc7c700 100644
--- a/src/Disks/IDisk.cpp
+++ b/src/Disks/IDisk.cpp
@@ -38,6 +38,15 @@ void IDisk::copyFile(const String & from_file_path, IDisk & to_disk, const Strin
out->finalize();
}
+void IDisk::writeFileUsingCustomWriteObject(
+ const String &, WriteMode, std::function &)>)
+{
+ throw Exception(
+ ErrorCodes::NOT_IMPLEMENTED,
+ "Method `writeFileUsingCustomWriteObject()` is not implemented for disk: {}",
+ getDataSourceDescription().type);
+}
+
DiskTransactionPtr IDisk::createTransaction()
{
diff --git a/src/Disks/IDisk.h b/src/Disks/IDisk.h
index 79b2fefe964..ea117c0e0c6 100644
--- a/src/Disks/IDisk.h
+++ b/src/Disks/IDisk.h
@@ -209,6 +209,15 @@ public:
WriteMode mode = WriteMode::Rewrite,
const WriteSettings & settings = {}) = 0;
+ /// Write a file using a custom function to write an object to the disk's object storage.
+ /// This method is alternative to writeFile(), the difference is that writeFile() calls IObjectStorage::writeObject()
+ /// to write an object to the object storage while this method allows to specify a callback for that.
+ virtual void writeFileUsingCustomWriteObject(
+ const String & path,
+ WriteMode mode,
+ std::function & object_attributes)>
+ custom_write_object_function);
+
/// Remove file. Throws exception if file doesn't exists or it's a directory.
/// Return whether file was finally removed. (For remote disks it is not always removed).
virtual void removeFile(const String & path) = 0;
diff --git a/src/Disks/IDiskTransaction.h b/src/Disks/IDiskTransaction.h
index 02c8731428d..2edbe858c06 100644
--- a/src/Disks/IDiskTransaction.h
+++ b/src/Disks/IDiskTransaction.h
@@ -68,6 +68,13 @@ public:
const WriteSettings & settings = {},
bool autocommit = true) = 0;
+ /// Write a file using a custom function to write an object to the disk's object storage.
+ virtual void writeFileUsingCustomWriteObject(
+ const String & path,
+ WriteMode mode,
+ std::function & object_attributes)>
+ custom_write_object_function) = 0;
+
/// Remove file. Throws exception if file doesn't exists or it's a directory.
virtual void removeFile(const std::string & path) = 0;
diff --git a/src/Disks/ObjectStorages/DiskObjectStorage.cpp b/src/Disks/ObjectStorages/DiskObjectStorage.cpp
index d55b1c91c07..44cb80558af 100644
--- a/src/Disks/ObjectStorages/DiskObjectStorage.cpp
+++ b/src/Disks/ObjectStorages/DiskObjectStorage.cpp
@@ -577,6 +577,17 @@ std::unique_ptr DiskObjectStorage::writeFile(
return result;
}
+void DiskObjectStorage::writeFileUsingCustomWriteObject(
+ const String & path,
+ WriteMode mode,
+ std::function & object_attributes)>
+ custom_write_object_function)
+{
+ LOG_TEST(log, "Write file: {}", path);
+ auto transaction = createObjectStorageTransaction();
+ return transaction->writeFileUsingCustomWriteObject(path, mode, std::move(custom_write_object_function));
+}
+
void DiskObjectStorage::applyNewSettings(
const Poco::Util::AbstractConfiguration & config, ContextPtr context_, const String &, const DisksMap &)
{
diff --git a/src/Disks/ObjectStorages/DiskObjectStorage.h b/src/Disks/ObjectStorages/DiskObjectStorage.h
index a24acc270c0..d6723d1eb71 100644
--- a/src/Disks/ObjectStorages/DiskObjectStorage.h
+++ b/src/Disks/ObjectStorages/DiskObjectStorage.h
@@ -152,6 +152,12 @@ public:
WriteMode mode,
const WriteSettings & settings) override;
+ void writeFileUsingCustomWriteObject(
+ const String & path,
+ WriteMode mode,
+ std::function & object_attributes)>
+ custom_write_object_function) override;
+
void copy(const String & from_path, const std::shared_ptr & to_disk, const String & to_path) override;
void applyNewSettings(const Poco::Util::AbstractConfiguration & config, ContextPtr context_, const String &, const DisksMap &) override;
diff --git a/src/Disks/ObjectStorages/DiskObjectStorageTransaction.cpp b/src/Disks/ObjectStorages/DiskObjectStorageTransaction.cpp
index a9d82a3e0b1..072e747aa4a 100644
--- a/src/Disks/ObjectStorages/DiskObjectStorageTransaction.cpp
+++ b/src/Disks/ObjectStorages/DiskObjectStorageTransaction.cpp
@@ -670,6 +670,44 @@ std::unique_ptr DiskObjectStorageTransaction::writeFile
}
+void DiskObjectStorageTransaction::writeFileUsingCustomWriteObject(
+ const String & path,
+ WriteMode mode,
+ std::function & object_attributes)>
+ custom_write_object_function)
+{
+ /// This function is a simplified and adapted version of DiskObjectStorageTransaction::writeFile().
+ auto blob_name = object_storage.generateBlobNameForPath(path);
+ std::optional object_attributes;
+
+ if (metadata_helper)
+ {
+ auto revision = metadata_helper->revision_counter + 1;
+ metadata_helper->revision_counter++;
+ object_attributes = {
+ {"path", path}
+ };
+ blob_name = "r" + revisionToString(revision) + "-file-" + blob_name;
+ }
+
+ auto object = StoredObject::create(object_storage, fs::path(metadata_storage.getObjectStorageRootPath()) / blob_name);
+ auto write_operation = std::make_unique(object_storage, metadata_storage, object);
+
+ operations_to_execute.emplace_back(std::move(write_operation));
+
+ /// We always use mode Rewrite because we simulate append using metadata and different files
+ size_t object_size = std::move(custom_write_object_function)(object, WriteMode::Rewrite, object_attributes);
+
+ /// Create metadata (see create_metadata_callback in DiskObjectStorageTransaction::writeFile()).
+ if (mode == WriteMode::Rewrite)
+ metadata_transaction->createMetadataFile(path, blob_name, object_size);
+ else
+ metadata_transaction->addBlobToMetadata(path, blob_name, object_size);
+
+ metadata_transaction->commit();
+}
+
+
void DiskObjectStorageTransaction::createHardLink(const std::string & src_path, const std::string & dst_path)
{
operations_to_execute.emplace_back(
diff --git a/src/Disks/ObjectStorages/DiskObjectStorageTransaction.h b/src/Disks/ObjectStorages/DiskObjectStorageTransaction.h
index 9e6bd5b6307..080a3e42057 100644
--- a/src/Disks/ObjectStorages/DiskObjectStorageTransaction.h
+++ b/src/Disks/ObjectStorages/DiskObjectStorageTransaction.h
@@ -99,6 +99,13 @@ public:
const WriteSettings & settings = {},
bool autocommit = true) override;
+ /// Write a file using a custom function to write an object to the disk's object storage.
+ void writeFileUsingCustomWriteObject(
+ const String & path,
+ WriteMode mode,
+ std::function & object_attributes)>
+ custom_write_object_function) override;
+
void removeFile(const std::string & path) override;
void removeFileIfExists(const std::string & path) override;
void removeDirectory(const std::string & path) override;
diff --git a/src/Disks/ObjectStorages/S3/S3ObjectStorage.cpp b/src/Disks/ObjectStorages/S3/S3ObjectStorage.cpp
index cbba5ed64f9..83c0c7446a8 100644
--- a/src/Disks/ObjectStorages/S3/S3ObjectStorage.cpp
+++ b/src/Disks/ObjectStorages/S3/S3ObjectStorage.cpp
@@ -1,7 +1,4 @@
#include
-#include
-#include
-
#if USE_AWS_S3
@@ -18,10 +15,12 @@
#include
#include
#include
+#include
#include
#include
#include
+#include
#include
#include
#include
diff --git a/src/Disks/ObjectStorages/S3/copyS3FileToDisk.cpp b/src/Disks/ObjectStorages/S3/copyS3FileToDisk.cpp
new file mode 100644
index 00000000000..098e02595f5
--- /dev/null
+++ b/src/Disks/ObjectStorages/S3/copyS3FileToDisk.cpp
@@ -0,0 +1,69 @@
+#include
+
+#if USE_AWS_S3
+
+#include
+#include
+#include
+#include
+#include
+
+
+namespace DB
+{
+
+void copyS3FileToDisk(
+ const std::shared_ptr & s3_client,
+ const String & src_bucket,
+ const String & src_key,
+ const std::optional & version_id,
+ std::optional src_offset,
+ std::optional src_size,
+ DiskPtr destination_disk,
+ const String & destination_path,
+ WriteMode write_mode,
+ const ReadSettings & read_settings,
+ const WriteSettings & write_settings,
+ const S3Settings::RequestSettings & request_settings,
+ ThreadPoolCallbackRunner scheduler)
+{
+ if (!src_offset)
+ src_offset = 0;
+
+ if (!src_size)
+ src_size = S3::getObjectSize(*s3_client, src_bucket, src_key, version_id.value_or(""), request_settings) - *src_offset;
+
+ auto destination_data_source_description = destination_disk->getDataSourceDescription();
+ if (destination_data_source_description != DataSourceDescription{DataSourceType::S3, s3_client->getInitialEndpoint(), false, false})
+ {
+ LOG_TRACE(&Poco::Logger::get("copyS3FileToDisk"), "Copying {} to disk {} through buffers", src_key, destination_disk->getName());
+ ReadBufferFromS3 read_buffer{s3_client, src_bucket, src_key, {}, request_settings, read_settings};
+ if (*src_offset)
+ read_buffer.seek(*src_offset, SEEK_SET);
+ auto write_buffer = destination_disk->writeFile(destination_path, std::min(*src_size, DBMS_DEFAULT_BUFFER_SIZE), write_mode, write_settings);
+ copyData(read_buffer, *write_buffer, *src_size);
+ write_buffer->finalize();
+ return;
+ }
+
+ LOG_TRACE(&Poco::Logger::get("copyS3FileToDisk"), "Copying {} to disk {} using native copy", src_key, destination_disk->getName());
+
+ String dest_bucket = destination_disk->getObjectStorage()->getObjectsNamespace();
+
+ auto custom_write_object = [&](const StoredObject & object_, WriteMode write_mode_, const std::optional & object_attributes_) -> size_t
+ {
+ /// Object storage always uses mode `Rewrite` because it simulates append using metadata and different files.
+ chassert(write_mode_ == WriteMode::Rewrite);
+
+ copyS3File(s3_client, src_bucket, src_key, *src_offset, *src_size, dest_bucket, /* dest_key= */ object_.absolute_path,
+ request_settings, object_attributes_, scheduler, /* for_disk_s3= */ true);
+
+ return *src_size;
+ };
+
+ destination_disk->writeFileUsingCustomWriteObject(destination_path, write_mode, custom_write_object);
+}
+
+}
+
+#endif
diff --git a/src/Disks/ObjectStorages/S3/copyS3FileToDisk.h b/src/Disks/ObjectStorages/S3/copyS3FileToDisk.h
new file mode 100644
index 00000000000..21c92ec9623
--- /dev/null
+++ b/src/Disks/ObjectStorages/S3/copyS3FileToDisk.h
@@ -0,0 +1,36 @@
+#pragma once
+
+#include "config.h"
+
+#if USE_AWS_S3
+
+#include
+#include
+#include
+
+
+namespace DB
+{
+
+/// Copies an object from S3 bucket to a disk of any type.
+/// Depending on the disk the function can either do copying though buffers
+/// (i.e. download the object by portions and then write those portions to the specified disk),
+/// or perform a server-side copy.
+void copyS3FileToDisk(
+ const std::shared_ptr & s3_client,
+ const String & src_bucket,
+ const String & src_key,
+ const std::optional & version_id,
+ std::optional src_offset,
+ std::optional src_size,
+ DiskPtr destination_disk,
+ const String & destination_path,
+ WriteMode write_mode = WriteMode::Rewrite,
+ const ReadSettings & read_settings = {},
+ const WriteSettings & write_settings = {},
+ const S3Settings::RequestSettings & request_settings = {},
+ ThreadPoolCallbackRunner scheduler = {});
+
+}
+
+#endif
diff --git a/src/Disks/getDiskConfigurationFromAST.cpp b/src/Disks/getDiskConfigurationFromAST.cpp
index e6b08046036..4b1323b4db8 100644
--- a/src/Disks/getDiskConfigurationFromAST.cpp
+++ b/src/Disks/getDiskConfigurationFromAST.cpp
@@ -83,4 +83,24 @@ DiskConfigurationPtr getDiskConfigurationFromAST(const std::string & root_name,
return conf;
}
+
+ASTs convertDiskConfigurationToAST(const Poco::Util::AbstractConfiguration & configuration, const std::string & config_path)
+{
+ ASTs result;
+
+ Poco::Util::AbstractConfiguration::Keys keys;
+ configuration.keys(config_path, keys);
+
+ for (const auto & key : keys)
+ {
+ result.push_back(
+ makeASTFunction(
+ "equals",
+ std::make_shared(key),
+ std::make_shared(configuration.getString(config_path + "." + key))));
+ }
+
+ return result;
+}
+
}
diff --git a/src/Disks/getDiskConfigurationFromAST.h b/src/Disks/getDiskConfigurationFromAST.h
index 1f9d7c1bfe6..5697955e914 100644
--- a/src/Disks/getDiskConfigurationFromAST.h
+++ b/src/Disks/getDiskConfigurationFromAST.h
@@ -25,4 +25,12 @@ using DiskConfigurationPtr = Poco::AutoPtr;
*/
DiskConfigurationPtr getDiskConfigurationFromAST(const std::string & root_name, const ASTs & disk_args, ContextPtr context);
+/// The same as above function, but return XML::Document for easier modification of result configuration.
+[[ maybe_unused ]] Poco::AutoPtr getDiskConfigurationFromASTImpl(const std::string & root_name, const ASTs & disk_args, ContextPtr context);
+
+/*
+ * A reverse function.
+ */
+[[ maybe_unused ]] ASTs convertDiskConfigurationToAST(const Poco::Util::AbstractConfiguration & configuration, const std::string & config_path);
+
}
diff --git a/src/Functions/FunctionsConversion.h b/src/Functions/FunctionsConversion.h
index 55003044ff5..f832bf404a8 100644
--- a/src/Functions/FunctionsConversion.h
+++ b/src/Functions/FunctionsConversion.h
@@ -803,7 +803,7 @@ struct ConvertImpl, DataTypeNumber, Name, Con
}
};
-static ColumnUInt8::MutablePtr copyNullMap(ColumnPtr col)
+static inline ColumnUInt8::MutablePtr copyNullMap(ColumnPtr col)
{
ColumnUInt8::MutablePtr null_map = nullptr;
if (const auto * col_null = checkAndGetColumn(col.get()))
diff --git a/src/Functions/FunctionsStringSimilarity.cpp b/src/Functions/FunctionsStringSimilarity.cpp
index 0cc0248baf4..faa01abb675 100644
--- a/src/Functions/FunctionsStringSimilarity.cpp
+++ b/src/Functions/FunctionsStringSimilarity.cpp
@@ -285,9 +285,9 @@ struct NgramDistanceImpl
size_t first_size = dispatchSearcher(calculateHaystackStatsAndMetric, data.data(), data_size, common_stats.get(), distance, nullptr);
/// For !symmetric version we should not use first_size.
if constexpr (symmetric)
- res = distance * 1.f / std::max(first_size + second_size, static_cast(1));
+ res = distance * 1.f / std::max(first_size + second_size, 1uz);
else
- res = 1.f - distance * 1.f / std::max(second_size, static_cast(1));
+ res = 1.f - distance * 1.f / std::max(second_size, 1uz);
}
else
{
@@ -353,9 +353,9 @@ struct NgramDistanceImpl
/// For !symmetric version we should not use haystack_stats_size.
if constexpr (symmetric)
- res[i] = distance * 1.f / std::max(haystack_stats_size + needle_stats_size, static_cast(1));
+ res[i] = distance * 1.f / std::max(haystack_stats_size + needle_stats_size, 1uz);
else
- res[i] = 1.f - distance * 1.f / std::max(needle_stats_size, static_cast(1));
+ res[i] = 1.f - distance * 1.f / std::max(needle_stats_size, 1uz);
}
else
{
@@ -424,7 +424,7 @@ struct NgramDistanceImpl
for (size_t j = 0; j < needle_stats_size; ++j)
--common_stats[needle_ngram_storage[j]];
- res[i] = 1.f - distance * 1.f / std::max(needle_stats_size, static_cast(1));
+ res[i] = 1.f - distance * 1.f / std::max(needle_stats_size, 1uz);
}
else
{
@@ -471,9 +471,9 @@ struct NgramDistanceImpl
ngram_storage.get());
/// For !symmetric version we should not use haystack_stats_size.
if constexpr (symmetric)
- res[i] = distance * 1.f / std::max(haystack_stats_size + needle_stats_size, static_cast(1));
+ res[i] = distance * 1.f / std::max(haystack_stats_size + needle_stats_size, 1uz);
else
- res[i] = 1.f - distance * 1.f / std::max(needle_stats_size, static_cast(1));
+ res[i] = 1.f - distance * 1.f / std::max(needle_stats_size, 1uz);
}
else
{
diff --git a/src/Functions/UserDefined/UserDefinedSQLObjectsBackup.cpp b/src/Functions/UserDefined/UserDefinedSQLObjectsBackup.cpp
index 6962c21280d..25c309aeb65 100644
--- a/src/Functions/UserDefined/UserDefinedSQLObjectsBackup.cpp
+++ b/src/Functions/UserDefined/UserDefinedSQLObjectsBackup.cpp
@@ -71,8 +71,7 @@ restoreUserDefinedSQLObjects(RestorerFromBackup & restorer, const String & data_
String function_name = unescapeForFileName(escaped_function_name);
String filepath = data_path_in_backup_fs / filename;
- auto backup_entry = backup->readFile(filepath);
- auto in = backup_entry->getReadBuffer();
+ auto in = backup->readFile(filepath);
String statement_def;
readStringUntilEOF(statement_def, *in);
diff --git a/src/Functions/bitSlice.cpp b/src/Functions/bitSlice.cpp
index 9b0ee4d5f1e..e2b455846d8 100644
--- a/src/Functions/bitSlice.cpp
+++ b/src/Functions/bitSlice.cpp
@@ -291,7 +291,7 @@ public:
ssize_t remain_byte = src.getElementSize() - offset_byte;
if (length < 0)
{
- length_byte = std::max(remain_byte + (length / word_size), static_cast(0));
+ length_byte = std::max(remain_byte + (length / word_size), 0z);
over_bit = word_size + (length % word_size);
if (length_byte == 1 && over_bit <= offset_bit) // begin and end are in same byte AND there are no gaps
length_byte = 0;
@@ -330,7 +330,7 @@ public:
size_t size = src.getElementSize();
if (length < 0)
{
- length_byte = std::max(static_cast(offset_byte) + (length / word_size), static_cast(0));
+ length_byte = std::max(static_cast(offset_byte) + (length / word_size), 0z);
over_bit = word_size + (length % word_size);
if (length_byte == 1 && over_bit <= offset_bit) // begin and end are in same byte AND there are no gaps
length_byte = 0;
@@ -395,7 +395,7 @@ public:
}
else
{
- length_byte = std::max(remain_byte + (static_cast(length) / word_size), static_cast(0));
+ length_byte = std::max(remain_byte + (static_cast(length) / word_size), 0z);
over_bit = word_size + (length % word_size);
if (length_byte == 1 && over_bit <= offset_bit) // begin and end are in same byte AND there are no gaps
length_byte = 0;
diff --git a/src/Functions/positionCaseInsensitive.cpp b/src/Functions/positionCaseInsensitive.cpp
index 4e3b670fe54..f71ce0078cc 100644
--- a/src/Functions/positionCaseInsensitive.cpp
+++ b/src/Functions/positionCaseInsensitive.cpp
@@ -20,5 +20,6 @@ using FunctionPositionCaseInsensitive = FunctionsStringSearch();
+ factory.registerAlias("instr", NamePositionCaseInsensitive::name, FunctionFactory::CaseInsensitive);
}
}
diff --git a/src/IO/MemoryReadWriteBuffer.cpp b/src/IO/MemoryReadWriteBuffer.cpp
index 8958390fe03..93ce5ce7ce9 100644
--- a/src/IO/MemoryReadWriteBuffer.cpp
+++ b/src/IO/MemoryReadWriteBuffer.cpp
@@ -106,7 +106,7 @@ void MemoryWriteBuffer::addChunk()
}
else
{
- next_chunk_size = std::max(static_cast(1), static_cast(chunk_tail->size() * growth_rate));
+ next_chunk_size = std::max(1uz, static_cast(chunk_tail->size() * growth_rate));
next_chunk_size = std::min(next_chunk_size, max_chunk_size);
}
diff --git a/src/IO/S3/Client.cpp b/src/IO/S3/Client.cpp
index 5c0539ee486..aba884948da 100644
--- a/src/IO/S3/Client.cpp
+++ b/src/IO/S3/Client.cpp
@@ -123,9 +123,8 @@ Client::Client(
{
auto * endpoint_provider = dynamic_cast(accessEndpointProvider().get());
endpoint_provider->GetBuiltInParameters().GetParameter("Region").GetString(explicit_region);
- std::string endpoint;
- endpoint_provider->GetBuiltInParameters().GetParameter("Endpoint").GetString(endpoint);
- detect_region = explicit_region == Aws::Region::AWS_GLOBAL && endpoint.find(".amazonaws.com") != std::string::npos;
+ endpoint_provider->GetBuiltInParameters().GetParameter("Endpoint").GetString(initial_endpoint);
+ detect_region = explicit_region == Aws::Region::AWS_GLOBAL && initial_endpoint.find(".amazonaws.com") != std::string::npos;
cache = std::make_shared();
ClientCacheRegistry::instance().registerClient(cache);
@@ -133,6 +132,7 @@ Client::Client(
Client::Client(const Client & other)
: Aws::S3::S3Client(other)
+ , initial_endpoint(other.initial_endpoint)
, explicit_region(other.explicit_region)
, detect_region(other.detect_region)
, max_redirects(other.max_redirects)
diff --git a/src/IO/S3/Client.h b/src/IO/S3/Client.h
index 18ba62d1006..7ac97555dd1 100644
--- a/src/IO/S3/Client.h
+++ b/src/IO/S3/Client.h
@@ -109,6 +109,9 @@ public:
}
}
+ /// Returns the initial endpoint.
+ const String & getInitialEndpoint() const { return initial_endpoint; }
+
/// Decorator for RetryStrategy needed for this client to work correctly.
/// We want to manually handle permanent moves (status code 301) because:
/// - redirect location is written in XML format inside the response body something that doesn't exist for HEAD
@@ -198,6 +201,8 @@ private:
bool checkIfWrongRegionDefined(const std::string & bucket, const Aws::S3::S3Error & error, std::string & region) const;
void insertRegionOverride(const std::string & bucket, const std::string & region) const;
+ String initial_endpoint;
+
std::string explicit_region;
mutable bool detect_region = true;
diff --git a/src/IO/copyData.cpp b/src/IO/copyData.cpp
index b189c318d67..07222a930b5 100644
--- a/src/IO/copyData.cpp
+++ b/src/IO/copyData.cpp
@@ -10,6 +10,7 @@ namespace DB
namespace ErrorCodes
{
extern const int ATTEMPT_TO_READ_AFTER_EOF;
+ extern const int CANNOT_READ_ALL_DATA;
}
namespace
@@ -91,6 +92,13 @@ void copyData(ReadBuffer & from, WriteBuffer & to, size_t bytes, std::function & is_cancelled, ThrottlerPtr throttler)
{
copyDataImpl(from, to, false, std::numeric_limits::max(), &is_cancelled, throttler);
diff --git a/src/IO/copyData.h b/src/IO/copyData.h
index 2202f36f79e..b67088d8e47 100644
--- a/src/IO/copyData.h
+++ b/src/IO/copyData.h
@@ -27,6 +27,9 @@ void copyData(ReadBuffer & from, WriteBuffer & to, size_t bytes, const std::atom
void copyData(ReadBuffer & from, WriteBuffer & to, std::function cancellation_hook);
void copyData(ReadBuffer & from, WriteBuffer & to, size_t bytes, std::function cancellation_hook);
+/// Copies at most `max_bytes` bytes from ReadBuffer to WriteBuffer. If there are more bytes, then throws an exception.
+void copyDataMaxBytes(ReadBuffer & from, WriteBuffer & to, size_t max_bytes);
+
/// Same as above but also use throttler to limit maximum speed
void copyDataWithThrottler(ReadBuffer & from, WriteBuffer & to, const std::atomic & is_cancelled, ThrottlerPtr throttler);
void copyDataWithThrottler(ReadBuffer & from, WriteBuffer & to, size_t bytes, const std::atomic & is_cancelled, ThrottlerPtr throttler);
diff --git a/src/Interpreters/Aggregator.cpp b/src/Interpreters/Aggregator.cpp
index f5e98250af1..09c2eebfdd6 100644
--- a/src/Interpreters/Aggregator.cpp
+++ b/src/Interpreters/Aggregator.cpp
@@ -1442,7 +1442,8 @@ void Aggregator::prepareAggregateInstructions(
aggregate_columns[i][j] = materialized_columns.back().get();
/// Sparse columns without defaults may be handled incorrectly.
- if (aggregate_columns[i][j]->getNumberOfDefaultRows() == 0)
+ if (aggregate_columns[i][j]->isSparse()
+ && aggregate_columns[i][j]->getNumberOfDefaultRows() == 0)
allow_sparse_arguments = false;
auto full_column = allow_sparse_arguments
diff --git a/src/Interpreters/Context.cpp b/src/Interpreters/Context.cpp
index cf1d5203bf7..2cfa55f0d87 100644
--- a/src/Interpreters/Context.cpp
+++ b/src/Interpreters/Context.cpp
@@ -129,13 +129,21 @@ namespace CurrentMetrics
{
extern const Metric ContextLockWait;
extern const Metric BackgroundMovePoolTask;
+ extern const Metric BackgroundMovePoolSize;
extern const Metric BackgroundSchedulePoolTask;
+ extern const Metric BackgroundSchedulePoolSize;
extern const Metric BackgroundBufferFlushSchedulePoolTask;
+ extern const Metric BackgroundBufferFlushSchedulePoolSize;
extern const Metric BackgroundDistributedSchedulePoolTask;
+ extern const Metric BackgroundDistributedSchedulePoolSize;
extern const Metric BackgroundMessageBrokerSchedulePoolTask;
+ extern const Metric BackgroundMessageBrokerSchedulePoolSize;
extern const Metric BackgroundMergesAndMutationsPoolTask;
+ extern const Metric BackgroundMergesAndMutationsPoolSize;
extern const Metric BackgroundFetchesPoolTask;
+ extern const Metric BackgroundFetchesPoolSize;
extern const Metric BackgroundCommonPoolTask;
+ extern const Metric BackgroundCommonPoolSize;
}
namespace DB
@@ -2175,6 +2183,7 @@ BackgroundSchedulePool & Context::getBufferFlushSchedulePool() const
shared->buffer_flush_schedule_pool = std::make_unique(
background_buffer_flush_schedule_pool_size,
CurrentMetrics::BackgroundBufferFlushSchedulePoolTask,
+ CurrentMetrics::BackgroundBufferFlushSchedulePoolSize,
"BgBufSchPool");
}
@@ -2226,6 +2235,7 @@ BackgroundSchedulePool & Context::getSchedulePool() const
shared->schedule_pool = std::make_unique(
background_schedule_pool_size,
CurrentMetrics::BackgroundSchedulePoolTask,
+ CurrentMetrics::BackgroundSchedulePoolSize,
"BgSchPool");
}
@@ -2246,6 +2256,7 @@ BackgroundSchedulePool & Context::getDistributedSchedulePool() const
shared->distributed_schedule_pool = std::make_unique(
background_distributed_schedule_pool_size,
CurrentMetrics::BackgroundDistributedSchedulePoolTask,
+ CurrentMetrics::BackgroundDistributedSchedulePoolSize,
"BgDistSchPool");
}
@@ -2266,6 +2277,7 @@ BackgroundSchedulePool & Context::getMessageBrokerSchedulePool() const
shared->message_broker_schedule_pool = std::make_unique(
background_message_broker_schedule_pool_size,
CurrentMetrics::BackgroundMessageBrokerSchedulePoolTask,
+ CurrentMetrics::BackgroundMessageBrokerSchedulePoolSize,
"BgMBSchPool");
}
@@ -3826,6 +3838,7 @@ void Context::initializeBackgroundExecutorsIfNeeded()
/*max_threads_count*/background_pool_size,
/*max_tasks_count*/background_pool_size * background_merges_mutations_concurrency_ratio,
CurrentMetrics::BackgroundMergesAndMutationsPoolTask,
+ CurrentMetrics::BackgroundMergesAndMutationsPoolSize,
background_merges_mutations_scheduling_policy
);
LOG_INFO(shared->log, "Initialized background executor for merges and mutations with num_threads={}, num_tasks={}, scheduling_policy={}",
@@ -3836,7 +3849,8 @@ void Context::initializeBackgroundExecutorsIfNeeded()
"Move",
background_move_pool_size,
background_move_pool_size,
- CurrentMetrics::BackgroundMovePoolTask
+ CurrentMetrics::BackgroundMovePoolTask,
+ CurrentMetrics::BackgroundMovePoolSize
);
LOG_INFO(shared->log, "Initialized background executor for move operations with num_threads={}, num_tasks={}", background_move_pool_size, background_move_pool_size);
@@ -3845,7 +3859,8 @@ void Context::initializeBackgroundExecutorsIfNeeded()
"Fetch",
background_fetches_pool_size,
background_fetches_pool_size,
- CurrentMetrics::BackgroundFetchesPoolTask
+ CurrentMetrics::BackgroundFetchesPoolTask,
+ CurrentMetrics::BackgroundFetchesPoolSize
);
LOG_INFO(shared->log, "Initialized background executor for fetches with num_threads={}, num_tasks={}", background_fetches_pool_size, background_fetches_pool_size);
@@ -3854,7 +3869,8 @@ void Context::initializeBackgroundExecutorsIfNeeded()
"Common",
background_common_pool_size,
background_common_pool_size,
- CurrentMetrics::BackgroundCommonPoolTask
+ CurrentMetrics::BackgroundCommonPoolTask,
+ CurrentMetrics::BackgroundCommonPoolSize
);
LOG_INFO(shared->log, "Initialized background executor for common operations (e.g. clearing old parts) with num_threads={}, num_tasks={}", background_common_pool_size, background_common_pool_size);
diff --git a/src/Interpreters/DatabaseCatalog.cpp b/src/Interpreters/DatabaseCatalog.cpp
index 975e0da66ce..b11a973c7b7 100644
--- a/src/Interpreters/DatabaseCatalog.cpp
+++ b/src/Interpreters/DatabaseCatalog.cpp
@@ -121,9 +121,16 @@ TemporaryTableHolder::~TemporaryTableHolder()
{
if (id != UUIDHelpers::Nil)
{
- auto table = getTable();
- table->flushAndShutdown();
- temporary_tables->dropTable(getContext(), "_tmp_" + toString(id));
+ try
+ {
+ auto table = getTable();
+ table->flushAndShutdown();
+ temporary_tables->dropTable(getContext(), "_tmp_" + toString(id));
+ }
+ catch (...)
+ {
+ tryLogCurrentException("TemporaryTableHolder");
+ }
}
}
@@ -140,7 +147,6 @@ StoragePtr TemporaryTableHolder::getTable() const
return table;
}
-
void DatabaseCatalog::initializeAndLoadTemporaryDatabase()
{
drop_delay_sec = getContext()->getConfigRef().getInt("database_atomic_delay_before_drop_table_sec", default_drop_delay_sec);
diff --git a/src/Interpreters/DatabaseCatalog.h b/src/Interpreters/DatabaseCatalog.h
index 4200373018d..88645ff72af 100644
--- a/src/Interpreters/DatabaseCatalog.h
+++ b/src/Interpreters/DatabaseCatalog.h
@@ -235,6 +235,21 @@ public:
void checkTableCanBeRemovedOrRenamed(const StorageID & table_id, bool check_referential_dependencies, bool check_loading_dependencies, bool is_drop_database = false) const;
+
+ struct TableMarkedAsDropped
+ {
+ StorageID table_id = StorageID::createEmpty();
+ StoragePtr table;
+ String metadata_path;
+ time_t drop_time{};
+ };
+ using TablesMarkedAsDropped = std::list;
+
+ TablesMarkedAsDropped getTablesMarkedDropped()
+ {
+ std::lock_guard lock(tables_marked_dropped_mutex);
+ return tables_marked_dropped;
+ }
private:
// The global instance of database catalog. unique_ptr is to allow
// deferred initialization. Thought I'd use std::optional, but I can't
@@ -263,15 +278,6 @@ private:
return uuid.toUnderType().items[0] >> (64 - bits_for_first_level);
}
- struct TableMarkedAsDropped
- {
- StorageID table_id = StorageID::createEmpty();
- StoragePtr table;
- String metadata_path;
- time_t drop_time{};
- };
- using TablesMarkedAsDropped = std::list;
-
void dropTableDataTask();
void dropTableFinally(const TableMarkedAsDropped & table);
diff --git a/src/Interpreters/ExpressionActions.cpp b/src/Interpreters/ExpressionActions.cpp
index 9931ae97286..4c9f47e5915 100644
--- a/src/Interpreters/ExpressionActions.cpp
+++ b/src/Interpreters/ExpressionActions.cpp
@@ -848,6 +848,23 @@ std::string ExpressionActions::dumpActions() const
return ss.str();
}
+void ExpressionActions::describeActions(WriteBuffer & out, std::string_view prefix) const
+{
+ bool first = true;
+
+ for (const auto & action : actions)
+ {
+ out << prefix << (first ? "Actions: " : " ");
+ out << action.toString() << '\n';
+ first = false;
+ }
+
+ out << prefix << "Positions:";
+ for (const auto & pos : result_positions)
+ out << ' ' << pos;
+ out << '\n';
+}
+
JSONBuilder::ItemPtr ExpressionActions::toTree() const
{
auto inputs_array = std::make_unique();
diff --git a/src/Interpreters/ExpressionActions.h b/src/Interpreters/ExpressionActions.h
index 11957997a30..db6670c50b9 100644
--- a/src/Interpreters/ExpressionActions.h
+++ b/src/Interpreters/ExpressionActions.h
@@ -109,6 +109,9 @@ public:
const Block & getSampleBlock() const { return sample_block; }
std::string dumpActions() const;
+
+ void describeActions(WriteBuffer & out, std::string_view prefix) const;
+
JSONBuilder::ItemPtr toTree() const;
static NameAndTypePair getSmallestColumn(const NamesAndTypesList & columns);
diff --git a/src/Interpreters/FillingRow.cpp b/src/Interpreters/FillingRow.cpp
index b03049a209f..5c2ad548c93 100644
--- a/src/Interpreters/FillingRow.cpp
+++ b/src/Interpreters/FillingRow.cpp
@@ -107,39 +107,4 @@ void FillingRow::initFromDefaults(size_t from_pos)
row[i] = getFillDescription(i).fill_from;
}
-void insertFromFillingRow(MutableColumns & filling_columns, MutableColumns & interpolate_columns, MutableColumns & other_columns,
- const FillingRow & filling_row, const Block & interpolate_block)
-{
- for (size_t i = 0, size = filling_columns.size(); i < size; ++i)
- {
- if (filling_row[i].isNull())
- {
- filling_columns[i]->insertDefault();
- }
- else
- {
- filling_columns[i]->insert(filling_row[i]);
- }
- }
-
- if (size_t size = interpolate_block.columns())
- {
- Columns columns = interpolate_block.getColumns();
- for (size_t i = 0; i < size; ++i)
- interpolate_columns[i]->insertFrom(*columns[i]->convertToFullColumnIfConst(), 0);
- }
- else
- for (const auto & interpolate_column : interpolate_columns)
- interpolate_column->insertDefault();
-
- for (const auto & other_column : other_columns)
- other_column->insertDefault();
-}
-
-void copyRowFromColumns(MutableColumns & dest, const Columns & source, size_t row_num)
-{
- for (size_t i = 0, size = source.size(); i < size; ++i)
- dest[i]->insertFrom(*source[i], row_num);
-}
-
}
diff --git a/src/Interpreters/FillingRow.h b/src/Interpreters/FillingRow.h
index 331c237285b..8d47094d0de 100644
--- a/src/Interpreters/FillingRow.h
+++ b/src/Interpreters/FillingRow.h
@@ -39,8 +39,4 @@ private:
SortDescription sort_description;
};
-void insertFromFillingRow(MutableColumns & filling_columns, MutableColumns & interpolate_columns, MutableColumns & other_columns,
- const FillingRow & filling_row, const Block & interpolate_block);
-void copyRowFromColumns(MutableColumns & dest, const Columns & source, size_t row_num);
-
}
diff --git a/src/Interpreters/HashJoin.cpp b/src/Interpreters/HashJoin.cpp
index fba985da41c..b4376426700 100644
--- a/src/Interpreters/HashJoin.cpp
+++ b/src/Interpreters/HashJoin.cpp
@@ -495,7 +495,7 @@ size_t HashJoin::getTotalByteCount() const
if (!data)
return 0;
-#ifdef NDEBUG
+#ifndef NDEBUG
size_t debug_blocks_allocated_size = 0;
for (const auto & block : data->blocks)
debug_blocks_allocated_size += block.allocatedBytes();
diff --git a/src/Interpreters/InterpreterCreateQuery.cpp b/src/Interpreters/InterpreterCreateQuery.cpp
index c352280b7ed..7a4d65a4d57 100644
--- a/src/Interpreters/InterpreterCreateQuery.cpp
+++ b/src/Interpreters/InterpreterCreateQuery.cpp
@@ -940,23 +940,32 @@ void InterpreterCreateQuery::setEngine(ASTCreateQuery & create) const
if (create.temporary)
{
- if (create.storage && create.storage->engine && create.storage->engine->name != "Memory")
- throw Exception(ErrorCodes::INCORRECT_QUERY, "Temporary tables can only be created with ENGINE = Memory, not {}",
- create.storage->engine->name);
-
/// It's possible if some part of storage definition (such as PARTITION BY) is specified, but ENGINE is not.
/// It makes sense when default_table_engine setting is used, but not for temporary tables.
/// For temporary tables we ignore this setting to allow CREATE TEMPORARY TABLE query without specifying ENGINE
- /// even if setting is set to MergeTree or something like that (otherwise MergeTree will be substituted and query will fail).
- if (create.storage && !create.storage->engine)
- throw Exception(ErrorCodes::INCORRECT_QUERY, "Invalid storage definition for temporary table: must be either ENGINE = Memory or empty");
- auto engine_ast = std::make_shared();
- engine_ast->name = "Memory";
- engine_ast->no_empty_args = true;
- auto storage_ast = std::make_shared();
- storage_ast->set(storage_ast->engine, engine_ast);
- create.set(create.storage, storage_ast);
+ if (!create.cluster.empty())
+ throw Exception(ErrorCodes::INCORRECT_QUERY, "Temporary tables cannot be created with ON CLUSTER clause");
+
+ if (create.storage)
+ {
+ if (create.storage->engine)
+ {
+ if (create.storage->engine->name.starts_with("Replicated") || create.storage->engine->name == "KeeperMap")
+ throw Exception(ErrorCodes::INCORRECT_QUERY, "Temporary tables cannot be created with Replicated or KeeperMap table engines");
+ }
+ else
+ throw Exception(ErrorCodes::INCORRECT_QUERY, "Invalid storage definition for temporary table");
+ }
+ else
+ {
+ auto engine_ast = std::make_shared();
+ engine_ast->name = "Memory";
+ engine_ast->no_empty_args = true;
+ auto storage_ast = std::make_shared();
+ storage_ast->set(storage_ast->engine, engine_ast);
+ create.set(create.storage, storage_ast);
+ }
return;
}
@@ -1284,8 +1293,21 @@ bool InterpreterCreateQuery::doCreateTable(ASTCreateQuery & create,
if (create.if_not_exists && getContext()->tryResolveStorageID({"", create.getTable()}, Context::ResolveExternal))
return false;
+ DatabasePtr database = DatabaseCatalog::instance().getDatabase(DatabaseCatalog::TEMPORARY_DATABASE);
+
String temporary_table_name = create.getTable();
- auto temporary_table = TemporaryTableHolder(getContext(), properties.columns, properties.constraints, query_ptr);
+ auto creator = [&](const StorageID & table_id)
+ {
+ return StorageFactory::instance().get(create,
+ database->getTableDataPath(table_id.getTableName()),
+ getContext(),
+ getContext()->getGlobalContext(),
+ properties.columns,
+ properties.constraints,
+ false);
+ };
+ auto temporary_table = TemporaryTableHolder(getContext(), creator, query_ptr);
+
getContext()->getSessionContext()->addExternalTable(temporary_table_name, std::move(temporary_table));
return true;
}
@@ -1712,7 +1734,13 @@ AccessRightsElements InterpreterCreateQuery::getRequiredAccess() const
else
{
if (create.temporary)
- required_access.emplace_back(AccessType::CREATE_TEMPORARY_TABLE);
+ {
+ /// Currently default table engine for temporary tables is Memory. default_table_engine does not affect temporary tables.
+ if (create.storage && create.storage->engine && create.storage->engine->name != "Memory")
+ required_access.emplace_back(AccessType::CREATE_ARBITRARY_TEMPORARY_TABLE);
+ else
+ required_access.emplace_back(AccessType::CREATE_TEMPORARY_TABLE);
+ }
else
{
if (create.replace_table)
diff --git a/src/Interpreters/InterpreterDropQuery.cpp b/src/Interpreters/InterpreterDropQuery.cpp
index f4507de5ac7..e16403bed67 100644
--- a/src/Interpreters/InterpreterDropQuery.cpp
+++ b/src/Interpreters/InterpreterDropQuery.cpp
@@ -282,11 +282,6 @@ BlockIO InterpreterDropQuery::executeToTemporaryTable(const String & table_name,
else if (kind == ASTDropQuery::Kind::Drop)
{
context_handle->removeExternalTable(table_name);
- table->flushAndShutdown();
- auto table_lock = table->lockExclusively(getContext()->getCurrentQueryId(), getContext()->getSettingsRef().lock_acquire_timeout);
- /// Delete table data
- table->drop();
- table->is_dropped = true;
}
else if (kind == ASTDropQuery::Kind::Detach)
{
diff --git a/src/Interpreters/ReplaceQueryParameterVisitor.cpp b/src/Interpreters/ReplaceQueryParameterVisitor.cpp
index 893c93f0950..fad9d4bbfb2 100644
--- a/src/Interpreters/ReplaceQueryParameterVisitor.cpp
+++ b/src/Interpreters/ReplaceQueryParameterVisitor.cpp
@@ -83,7 +83,10 @@ void ReplaceQueryParameterVisitor::visitQueryParameter(ASTPtr & ast)
IColumn & temp_column = *temp_column_ptr;
ReadBufferFromString read_buffer{value};
FormatSettings format_settings;
- data_type->getDefaultSerialization()->deserializeTextEscaped(temp_column, read_buffer, format_settings);
+ if (ast_param.name == "_request_body")
+ data_type->getDefaultSerialization()->deserializeWholeText(temp_column, read_buffer, format_settings);
+ else
+ data_type->getDefaultSerialization()->deserializeTextEscaped(temp_column, read_buffer, format_settings);
if (!read_buffer.eof())
throw Exception(ErrorCodes::BAD_QUERY_PARAMETER,
diff --git a/src/Parsers/ExpressionElementParsers.cpp b/src/Parsers/ExpressionElementParsers.cpp
index 72353a42a87..a6354cd0e81 100644
--- a/src/Parsers/ExpressionElementParsers.cpp
+++ b/src/Parsers/ExpressionElementParsers.cpp
@@ -2151,8 +2151,9 @@ bool ParserTTLElement::parseImpl(Pos & pos, ASTPtr & node, Expected & expected)
ParserKeyword s_set("SET");
ParserKeyword s_recompress("RECOMPRESS");
ParserKeyword s_codec("CODEC");
- ParserToken s_comma(TokenType::Comma);
- ParserToken s_eq(TokenType::Equals);
+ ParserKeyword s_materialize("MATERIALIZE");
+ ParserKeyword s_remove("REMOVE");
+ ParserKeyword s_modify("MODIFY");
ParserIdentifier parser_identifier;
ParserStringLiteral parser_string_literal;
@@ -2160,8 +2161,11 @@ bool ParserTTLElement::parseImpl(Pos & pos, ASTPtr & node, Expected & expected)
ParserExpressionList parser_keys_list(false);
ParserCodec parser_codec;
- ParserList parser_assignment_list(
- std::make_unique(), std::make_unique(TokenType::Comma));
+ if (s_materialize.checkWithoutMoving(pos, expected) ||
+ s_remove.checkWithoutMoving(pos, expected) ||
+ s_modify.checkWithoutMoving(pos, expected))
+
+ return false;
ASTPtr ttl_expr;
if (!parser_exp.parse(pos, ttl_expr, expected))
@@ -2219,6 +2223,9 @@ bool ParserTTLElement::parseImpl(Pos & pos, ASTPtr & node, Expected & expected)
if (s_set.ignore(pos))
{
+ ParserList parser_assignment_list(
+ std::make_unique(), std::make_unique(TokenType::Comma));
+
if (!parser_assignment_list.parse(pos, group_by_assignments, expected))
return false;
}
diff --git a/src/Planner/Planner.cpp b/src/Planner/Planner.cpp
index 37a4614bad3..2242bf92e6b 100644
--- a/src/Planner/Planner.cpp
+++ b/src/Planner/Planner.cpp
@@ -395,7 +395,11 @@ void addMergingAggregatedStep(QueryPlan & query_plan,
* but it can work more slowly.
*/
- Aggregator::Params params(aggregation_analysis_result.aggregation_keys,
+ auto keys = aggregation_analysis_result.aggregation_keys;
+ if (!aggregation_analysis_result.grouping_sets_parameters_list.empty())
+ keys.insert(keys.begin(), "__grouping_set");
+
+ Aggregator::Params params(keys,
aggregation_analysis_result.aggregate_descriptions,
query_analysis_result.aggregate_overflow_row,
settings.max_threads,
diff --git a/src/Processors/Formats/Impl/PrettySpaceBlockOutputFormat.cpp b/src/Processors/Formats/Impl/PrettySpaceBlockOutputFormat.cpp
index 46d1872412c..0fb1a413a6c 100644
--- a/src/Processors/Formats/Impl/PrettySpaceBlockOutputFormat.cpp
+++ b/src/Processors/Formats/Impl/PrettySpaceBlockOutputFormat.cpp
@@ -45,7 +45,7 @@ void PrettySpaceBlockOutputFormat::writeChunk(const Chunk & chunk, PortKind port
if (col.type->shouldAlignRightInPrettyFormats())
{
- for (ssize_t k = 0; k < std::max(static_cast(0), static_cast(max_widths[i] - name_widths[i])); ++k)
+ for (ssize_t k = 0; k < std::max(0z, static_cast(max_widths[i] - name_widths[i])); ++k)
writeChar(' ', out);
if (format_settings.pretty.color)
@@ -62,7 +62,7 @@ void PrettySpaceBlockOutputFormat::writeChunk(const Chunk & chunk, PortKind port
if (format_settings.pretty.color)
writeCString("\033[0m", out);
- for (ssize_t k = 0; k < std::max(static_cast(0), static_cast(max_widths[i] - name_widths[i])); ++k)
+ for (ssize_t k = 0; k < std::max(0z, static_cast(max_widths[i] - name_widths[i])); ++k)
writeChar(' ', out);
}
}
diff --git a/src/Processors/QueryPlan/AggregatingStep.cpp b/src/Processors/QueryPlan/AggregatingStep.cpp
index 9bf351442b2..69dfa05899b 100644
--- a/src/Processors/QueryPlan/AggregatingStep.cpp
+++ b/src/Processors/QueryPlan/AggregatingStep.cpp
@@ -38,7 +38,6 @@ static ITransformingStep::Traits getTraits(bool should_produce_results_in_order_
return ITransformingStep::Traits
{
{
- .preserves_distinct_columns = false, /// Actually, we may check that distinct names are in aggregation keys
.returns_single_stream = should_produce_results_in_order_of_bucket_number,
.preserves_number_of_streams = false,
.preserves_sorting = false,
diff --git a/src/Processors/QueryPlan/ArrayJoinStep.cpp b/src/Processors/QueryPlan/ArrayJoinStep.cpp
index bd1908a4a6d..23a0a756f0d 100644
--- a/src/Processors/QueryPlan/ArrayJoinStep.cpp
+++ b/src/Processors/QueryPlan/ArrayJoinStep.cpp
@@ -14,7 +14,6 @@ static ITransformingStep::Traits getTraits()
return ITransformingStep::Traits
{
{
- .preserves_distinct_columns = false,
.returns_single_stream = false,
.preserves_number_of_streams = true,
.preserves_sorting = false,
diff --git a/src/Processors/QueryPlan/CreateSetAndFilterOnTheFlyStep.cpp b/src/Processors/QueryPlan/CreateSetAndFilterOnTheFlyStep.cpp
index 53dcec9ef0a..07137e87736 100644
--- a/src/Processors/QueryPlan/CreateSetAndFilterOnTheFlyStep.cpp
+++ b/src/Processors/QueryPlan/CreateSetAndFilterOnTheFlyStep.cpp
@@ -40,7 +40,6 @@ static ITransformingStep::Traits getTraits()
return ITransformingStep::Traits
{
{
- .preserves_distinct_columns = true,
.returns_single_stream = false,
.preserves_number_of_streams = true,
.preserves_sorting = true,
diff --git a/src/Processors/QueryPlan/CreatingSetsStep.cpp b/src/Processors/QueryPlan/CreatingSetsStep.cpp
index 23e0a17a31b..b696b77ccfe 100644
--- a/src/Processors/QueryPlan/CreatingSetsStep.cpp
+++ b/src/Processors/QueryPlan/CreatingSetsStep.cpp
@@ -21,7 +21,6 @@ static ITransformingStep::Traits getTraits()
return ITransformingStep::Traits
{
{
- .preserves_distinct_columns = true,
.returns_single_stream = false,
.preserves_number_of_streams = true,
.preserves_sorting = true,
diff --git a/src/Processors/QueryPlan/CubeStep.cpp b/src/Processors/QueryPlan/CubeStep.cpp
index 03f952ac782..0c632c346c7 100644
--- a/src/Processors/QueryPlan/CubeStep.cpp
+++ b/src/Processors/QueryPlan/CubeStep.cpp
@@ -14,7 +14,6 @@ static ITransformingStep::Traits getTraits()
return ITransformingStep::Traits
{
{
- .preserves_distinct_columns = false,
.returns_single_stream = true,
.preserves_number_of_streams = false,
.preserves_sorting = false,
@@ -32,9 +31,6 @@ CubeStep::CubeStep(const DataStream & input_stream_, Aggregator::Params params_,
, final(final_)
, use_nulls(use_nulls_)
{
- /// Aggregation keys are distinct
- for (const auto & key : params.keys)
- output_stream->distinct_columns.insert(key);
}
ProcessorPtr addGroupingSetForTotals(const Block & header, const Names & keys, bool use_nulls, const BuildQueryPipelineSettings & settings, UInt64 grouping_set_number)
@@ -89,9 +85,5 @@ void CubeStep::updateOutputStream()
{
output_stream = createOutputStream(
input_streams.front(), generateOutputHeader(params.getHeader(input_streams.front().header, final), params.keys, use_nulls), getDataStreamTraits());
-
- /// Aggregation keys are distinct
- for (const auto & key : params.keys)
- output_stream->distinct_columns.insert(key);
}
}
diff --git a/src/Processors/QueryPlan/DistinctStep.cpp b/src/Processors/QueryPlan/DistinctStep.cpp
index 323ef0bbdab..15ed02b700e 100644
--- a/src/Processors/QueryPlan/DistinctStep.cpp
+++ b/src/Processors/QueryPlan/DistinctStep.cpp
@@ -10,28 +10,13 @@
namespace DB
{
-static bool checkColumnsAlreadyDistinct(const Names & columns, const NameSet & distinct_names)
-{
- if (distinct_names.empty())
- return false;
-
- /// Now we need to check that distinct_names is a subset of columns.
- std::unordered_set columns_set(columns.begin(), columns.end());
- for (const auto & name : distinct_names)
- if (!columns_set.contains(name))
- return false;
-
- return true;
-}
-
-static ITransformingStep::Traits getTraits(bool pre_distinct, bool already_distinct_columns)
+static ITransformingStep::Traits getTraits(bool pre_distinct)
{
return ITransformingStep::Traits
{
{
- .preserves_distinct_columns = already_distinct_columns, /// Will be calculated separately otherwise
- .returns_single_stream = !pre_distinct && !already_distinct_columns,
- .preserves_number_of_streams = pre_distinct || already_distinct_columns,
+ .returns_single_stream = !pre_distinct,
+ .preserves_number_of_streams = pre_distinct,
.preserves_sorting = true, /// Sorting is preserved indeed because of implementation.
},
{
@@ -62,34 +47,23 @@ DistinctStep::DistinctStep(
: ITransformingStep(
input_stream_,
input_stream_.header,
- getTraits(pre_distinct_, checkColumnsAlreadyDistinct(columns_, input_stream_.distinct_columns)))
+ getTraits(pre_distinct_))
, set_size_limits(set_size_limits_)
, limit_hint(limit_hint_)
, columns(columns_)
, pre_distinct(pre_distinct_)
, optimize_distinct_in_order(optimize_distinct_in_order_)
{
- if (!output_stream->distinct_columns.empty() /// Columns already distinct, do nothing
- && (!pre_distinct /// Main distinct
- || input_stream_.has_single_port)) /// pre_distinct for single port works as usual one
- {
- /// Build distinct set.
- for (const auto & name : columns)
- output_stream->distinct_columns.insert(name);
- }
}
void DistinctStep::transformPipeline(QueryPipelineBuilder & pipeline, const BuildQueryPipelineSettings &)
{
- const auto & input_stream = input_streams.back();
- if (checkColumnsAlreadyDistinct(columns, input_stream.distinct_columns))
- return;
-
if (!pre_distinct)
pipeline.resize(1);
if (optimize_distinct_in_order)
{
+ const auto & input_stream = input_streams.back();
const SortDescription distinct_sort_desc = getSortDescription(input_stream.sort_description, columns);
if (!distinct_sort_desc.empty())
{
@@ -197,16 +171,7 @@ void DistinctStep::updateOutputStream()
output_stream = createOutputStream(
input_streams.front(),
input_streams.front().header,
- getTraits(pre_distinct, checkColumnsAlreadyDistinct(columns, input_streams.front().distinct_columns)).data_stream_traits);
-
- if (!output_stream->distinct_columns.empty() /// Columns already distinct, do nothing
- && (!pre_distinct /// Main distinct
- || input_streams.front().has_single_port)) /// pre_distinct for single port works as usual one
- {
- /// Build distinct set.
- for (const auto & name : columns)
- output_stream->distinct_columns.insert(name);
- }
+ getTraits(pre_distinct).data_stream_traits);
}
}
diff --git a/src/Processors/QueryPlan/ExpressionStep.cpp b/src/Processors/QueryPlan/ExpressionStep.cpp
index dcfa6e5a891..250a1733caa 100644
--- a/src/Processors/QueryPlan/ExpressionStep.cpp
+++ b/src/Processors/QueryPlan/ExpressionStep.cpp
@@ -15,7 +15,6 @@ static ITransformingStep::Traits getTraits(const ActionsDAGPtr & actions, const
return ITransformingStep::Traits
{
{
- .preserves_distinct_columns = !actions->hasArrayJoin(),
.returns_single_stream = false,
.preserves_number_of_streams = true,
.preserves_sorting = actions->isSortingPreserved(header, sort_description),
@@ -33,8 +32,6 @@ ExpressionStep::ExpressionStep(const DataStream & input_stream_, const ActionsDA
getTraits(actions_dag_, input_stream_.header, input_stream_.sort_description))
, actions_dag(actions_dag_)
{
- /// Some columns may be removed by expression.
- updateDistinctColumns(output_stream->header, output_stream->distinct_columns);
}
void ExpressionStep::transformPipeline(QueryPipelineBuilder & pipeline, const BuildQueryPipelineSettings & settings)
@@ -63,22 +60,9 @@ void ExpressionStep::transformPipeline(QueryPipelineBuilder & pipeline, const Bu
void ExpressionStep::describeActions(FormatSettings & settings) const
{
- String prefix(settings.offset, ' ');
- bool first = true;
-
+ String prefix(settings.offset, settings.indent_char);
auto expression = std::make_shared(actions_dag);
- for (const auto & action : expression->getActions())
- {
- settings.out << prefix << (first ? "Actions: "
- : " ");
- first = false;
- settings.out << action.toString() << '\n';
- }
-
- settings.out << prefix << "Positions:";
- for (const auto & pos : expression->getResultPositions())
- settings.out << ' ' << pos;
- settings.out << '\n';
+ expression->describeActions(settings.out, prefix);
}
void ExpressionStep::describeActions(JSONBuilder::JSONMap & map) const
diff --git a/src/Processors/QueryPlan/ExtremesStep.cpp b/src/Processors/QueryPlan/ExtremesStep.cpp
index 4524b9883d6..010a82072cf 100644
--- a/src/Processors/QueryPlan/ExtremesStep.cpp
+++ b/src/Processors/QueryPlan/ExtremesStep.cpp
@@ -9,7 +9,6 @@ static ITransformingStep::Traits getTraits()
return ITransformingStep::Traits
{
{
- .preserves_distinct_columns = true,
.returns_single_stream = false,
.preserves_number_of_streams = true,
.preserves_sorting = true,
diff --git a/src/Processors/QueryPlan/FillingStep.cpp b/src/Processors/QueryPlan/FillingStep.cpp
index 13b7ca625fb..20d7d6d0f8f 100644
--- a/src/Processors/QueryPlan/FillingStep.cpp
+++ b/src/Processors/QueryPlan/FillingStep.cpp
@@ -17,7 +17,6 @@ static ITransformingStep::Traits getTraits()
return ITransformingStep::Traits
{
{
- .preserves_distinct_columns = false, /// TODO: it seem to actually be true. Check it later.
.returns_single_stream = true,
.preserves_number_of_streams = true,
.preserves_sorting = true,
diff --git a/src/Processors/QueryPlan/FilterStep.cpp b/src/Processors/QueryPlan/FilterStep.cpp
index 4699a7c1908..dc837446a96 100644
--- a/src/Processors/QueryPlan/FilterStep.cpp
+++ b/src/Processors/QueryPlan/FilterStep.cpp
@@ -23,7 +23,6 @@ static ITransformingStep::Traits getTraits(const ActionsDAGPtr & expression, con
return ITransformingStep::Traits
{
{
- .preserves_distinct_columns = !expression->hasArrayJoin(), /// I suppose it actually never happens
.returns_single_stream = false,
.preserves_number_of_streams = true,
.preserves_sorting = preserves_sorting,
@@ -51,8 +50,6 @@ FilterStep::FilterStep(
, filter_column_name(std::move(filter_column_name_))
, remove_filter_column(remove_filter_column_)
{
- /// TODO: it would be easier to remove all expressions from filter step. It should only filter by column name.
- updateDistinctColumns(output_stream->header, output_stream->distinct_columns);
}
void FilterStep::transformPipeline(QueryPipelineBuilder & pipeline, const BuildQueryPipelineSettings & settings)
@@ -82,27 +79,15 @@ void FilterStep::transformPipeline(QueryPipelineBuilder & pipeline, const BuildQ
void FilterStep::describeActions(FormatSettings & settings) const
{
- String prefix(settings.offset, ' ');
+ String prefix(settings.offset, settings.indent_char);
settings.out << prefix << "Filter column: " << filter_column_name;
if (remove_filter_column)
settings.out << " (removed)";
settings.out << '\n';
- bool first = true;
auto expression = std::make_shared(actions_dag);
- for (const auto & action : expression->getActions())
- {
- settings.out << prefix << (first ? "Actions: "
- : " ");
- first = false;
- settings.out << action.toString() << '\n';
- }
-
- settings.out << prefix << "Positions:";
- for (const auto & pos : expression->getResultPositions())
- settings.out << ' ' << pos;
- settings.out << '\n';
+ expression->describeActions(settings.out, prefix);
}
void FilterStep::describeActions(JSONBuilder::JSONMap & map) const
diff --git a/src/Processors/QueryPlan/IQueryPlanStep.h b/src/Processors/QueryPlan/IQueryPlanStep.h
index 316ecff9c2e..a608c6f8058 100644
--- a/src/Processors/QueryPlan/IQueryPlanStep.h
+++ b/src/Processors/QueryPlan/IQueryPlanStep.h
@@ -23,11 +23,6 @@ class DataStream
public:
Block header;
- /// Tuples with those columns are distinct.
- /// It doesn't mean that columns are distinct separately.
- /// Removing any column from this list breaks this invariant.
- NameSet distinct_columns = {};
-
/// QueryPipeline has single port. Totals or extremes ports are not counted.
bool has_single_port = false;
@@ -51,8 +46,7 @@ public:
bool hasEqualPropertiesWith(const DataStream & other) const
{
- return distinct_columns == other.distinct_columns
- && has_single_port == other.has_single_port
+ return has_single_port == other.has_single_port
&& sort_description == other.sort_description
&& (sort_description.empty() || sort_scope == other.sort_scope);
}
diff --git a/src/Processors/QueryPlan/ITransformingStep.cpp b/src/Processors/QueryPlan/ITransformingStep.cpp
index 195fa9ad68c..9ecfdb0af22 100644
--- a/src/Processors/QueryPlan/ITransformingStep.cpp
+++ b/src/Processors/QueryPlan/ITransformingStep.cpp
@@ -20,9 +20,6 @@ DataStream ITransformingStep::createOutputStream(
{
DataStream output_stream{.header = std::move(output_header)};
- if (stream_traits.preserves_distinct_columns)
- output_stream.distinct_columns = input_stream.distinct_columns;
-
output_stream.has_single_port = stream_traits.returns_single_stream
|| (input_stream.has_single_port && stream_traits.preserves_number_of_streams);
@@ -50,21 +47,6 @@ QueryPipelineBuilderPtr ITransformingStep::updatePipeline(QueryPipelineBuilders
return std::move(pipelines.front());
}
-void ITransformingStep::updateDistinctColumns(const Block & res_header, NameSet & distinct_columns)
-{
- if (distinct_columns.empty())
- return;
-
- for (const auto & column : distinct_columns)
- {
- if (!res_header.has(column))
- {
- distinct_columns.clear();
- break;
- }
- }
-}
-
void ITransformingStep::describePipeline(FormatSettings & settings) const
{
IQueryPlanStep::describePipeline(processors, settings);
diff --git a/src/Processors/QueryPlan/ITransformingStep.h b/src/Processors/QueryPlan/ITransformingStep.h
index 1513b4307f8..77de668fbdb 100644
--- a/src/Processors/QueryPlan/ITransformingStep.h
+++ b/src/Processors/QueryPlan/ITransformingStep.h
@@ -18,11 +18,6 @@ public:
/// They are specified in constructor and cannot be changed.
struct DataStreamTraits
{
- /// Keep distinct_columns unchanged.
- /// Examples: true for LimitStep, false for ExpressionStep with ARRAY JOIN
- /// It some columns may be removed from result header, call updateDistinctColumns
- bool preserves_distinct_columns;
-
/// True if pipeline has single output port after this step.
/// Examples: MergeSortingStep, AggregatingStep
bool returns_single_stream;
@@ -69,8 +64,6 @@ public:
input_streams.emplace_back(std::move(input_stream));
updateOutputStream();
-
- updateDistinctColumns(output_stream->header, output_stream->distinct_columns);
}
void describePipeline(FormatSettings & settings) const override;
@@ -83,9 +76,6 @@ public:
}
protected:
- /// Clear distinct_columns if res_header doesn't contain all of them.
- static void updateDistinctColumns(const Block & res_header, NameSet & distinct_columns);
-
/// Create output stream from header and traits.
static DataStream createOutputStream(
const DataStream & input_stream,
diff --git a/src/Processors/QueryPlan/JoinStep.cpp b/src/Processors/QueryPlan/JoinStep.cpp
index 6e212a53bc6..2ff8f161e99 100644
--- a/src/Processors/QueryPlan/JoinStep.cpp
+++ b/src/Processors/QueryPlan/JoinStep.cpp
@@ -83,7 +83,6 @@ static ITransformingStep::Traits getStorageJoinTraits()
return ITransformingStep::Traits
{
{
- .preserves_distinct_columns = false,
.returns_single_stream = false,
.preserves_number_of_streams = true,
.preserves_sorting = false,
diff --git a/src/Processors/QueryPlan/LimitByStep.cpp b/src/Processors/QueryPlan/LimitByStep.cpp
index 39086e995fc..8b4abecc12c 100644
--- a/src/Processors/QueryPlan/LimitByStep.cpp
+++ b/src/Processors/QueryPlan/LimitByStep.cpp
@@ -12,7 +12,6 @@ static ITransformingStep::Traits getTraits()
return ITransformingStep::Traits
{
{
- .preserves_distinct_columns = true,
.returns_single_stream = true,
.preserves_number_of_streams = false,
.preserves_sorting = true,
diff --git a/src/Processors/QueryPlan/LimitStep.cpp b/src/Processors/QueryPlan/LimitStep.cpp
index 144ac16f0d5..5e5a7387832 100644
--- a/src/Processors/QueryPlan/LimitStep.cpp
+++ b/src/Processors/QueryPlan/LimitStep.cpp
@@ -12,7 +12,6 @@ static ITransformingStep::Traits getTraits()
return ITransformingStep::Traits
{
{
- .preserves_distinct_columns = true,
.returns_single_stream = false,
.preserves_number_of_streams = true,
.preserves_sorting = true,
diff --git a/src/Processors/QueryPlan/MergingAggregatedStep.cpp b/src/Processors/QueryPlan/MergingAggregatedStep.cpp
index e4fc332a1fd..8b5f21442b1 100644
--- a/src/Processors/QueryPlan/MergingAggregatedStep.cpp
+++ b/src/Processors/QueryPlan/MergingAggregatedStep.cpp
@@ -24,7 +24,6 @@ static ITransformingStep::Traits getTraits(bool should_produce_results_in_order_
return ITransformingStep::Traits
{
{
- .preserves_distinct_columns = false,
.returns_single_stream = should_produce_results_in_order_of_bucket_number,
.preserves_number_of_streams = false,
.preserves_sorting = false,
@@ -62,10 +61,6 @@ MergingAggregatedStep::MergingAggregatedStep(
, should_produce_results_in_order_of_bucket_number(should_produce_results_in_order_of_bucket_number_)
, memory_bound_merging_of_aggregation_results_enabled(memory_bound_merging_of_aggregation_results_enabled_)
{
- /// Aggregation keys are distinct
- for (const auto & key : params.keys)
- output_stream->distinct_columns.insert(key);
-
if (memoryBoundMergingWillBeUsed() && should_produce_results_in_order_of_bucket_number)
{
output_stream->sort_description = group_by_sort_description;
@@ -157,10 +152,6 @@ void MergingAggregatedStep::describeActions(JSONBuilder::JSONMap & map) const
void MergingAggregatedStep::updateOutputStream()
{
output_stream = createOutputStream(input_streams.front(), params.getHeader(input_streams.front().header, final), getDataStreamTraits());
-
- /// Aggregation keys are distinct
- for (const auto & key : params.keys)
- output_stream->distinct_columns.insert(key);
}
bool MergingAggregatedStep::memoryBoundMergingWillBeUsed() const
diff --git a/src/Processors/QueryPlan/MergingAggregatedStep.h b/src/Processors/QueryPlan/MergingAggregatedStep.h
index 2dea289ca89..3a7e2b66183 100644
--- a/src/Processors/QueryPlan/MergingAggregatedStep.h
+++ b/src/Processors/QueryPlan/MergingAggregatedStep.h
@@ -27,6 +27,7 @@ public:
bool memory_bound_merging_of_aggregation_results_enabled_);
String getName() const override { return "MergingAggregated"; }
+ const Aggregator::Params & getParams() const { return params; }
void transformPipeline(QueryPipelineBuilder & pipeline, const BuildQueryPipelineSettings &) override;
diff --git a/src/Processors/QueryPlan/OffsetStep.cpp b/src/Processors/QueryPlan/OffsetStep.cpp
index e0c70ba2f28..4bbe81f9169 100644
--- a/src/Processors/QueryPlan/OffsetStep.cpp
+++ b/src/Processors/QueryPlan/OffsetStep.cpp
@@ -12,7 +12,6 @@ static ITransformingStep::Traits getTraits()
return ITransformingStep::Traits
{
{
- .preserves_distinct_columns = true,
.returns_single_stream = false,
.preserves_number_of_streams = true,
.preserves_sorting = true,
diff --git a/src/Processors/QueryPlan/Optimizations/distinctReadInOrder.cpp b/src/Processors/QueryPlan/Optimizations/distinctReadInOrder.cpp
index d584a27f16e..6334594de30 100644
--- a/src/Processors/QueryPlan/Optimizations/distinctReadInOrder.cpp
+++ b/src/Processors/QueryPlan/Optimizations/distinctReadInOrder.cpp
@@ -1,5 +1,7 @@
#include
#include
+#include
+#include
#include
#include
#include
@@ -7,6 +9,71 @@
namespace DB::QueryPlanOptimizations
{
+/// build actions DAG from stack of steps
+static ActionsDAGPtr buildActionsForPlanPath(std::vector & dag_stack)
+{
+ if (dag_stack.empty())
+ return nullptr;
+
+ ActionsDAGPtr path_actions = dag_stack.back()->clone();
+ dag_stack.pop_back();
+ while (!dag_stack.empty())
+ {
+ ActionsDAGPtr clone = dag_stack.back()->clone();
+ dag_stack.pop_back();
+ path_actions->mergeInplace(std::move(*clone));
+ }
+ return path_actions;
+}
+
+static const ActionsDAG::Node * getOriginalNodeForOutputAlias(const ActionsDAGPtr & actions, const String & output_name)
+{
+ /// find alias in output
+ const ActionsDAG::Node * output_alias = nullptr;
+ for (const auto * node : actions->getOutputs())
+ {
+ if (node->result_name == output_name)
+ {
+ output_alias = node;
+ break;
+ }
+ }
+ if (!output_alias)
+ return nullptr;
+
+ /// find original(non alias) node it refers to
+ const ActionsDAG::Node * node = output_alias;
+ while (node && node->type == ActionsDAG::ActionType::ALIAS)
+ {
+ chassert(!node->children.empty());
+ node = node->children.front();
+ }
+ if (node && node->type != ActionsDAG::ActionType::INPUT)
+ return nullptr;
+
+ return node;
+}
+
+static std::set
+getOriginalDistinctColumns(const ColumnsWithTypeAndName & distinct_columns, std::vector & dag_stack)
+{
+ auto actions = buildActionsForPlanPath(dag_stack);
+ std::set original_distinct_columns;
+ for (const auto & column : distinct_columns)
+ {
+ /// const columns doesn't affect DISTINCT, so skip them
+ if (isColumnConst(*column.column))
+ continue;
+
+ const auto * input_node = getOriginalNodeForOutputAlias(actions, column.name);
+ if (!input_node)
+ break;
+
+ original_distinct_columns.insert(input_node->result_name);
+ }
+ return original_distinct_columns;
+}
+
size_t tryDistinctReadInOrder(QueryPlan::Node * parent_node)
{
/// check if it is preliminary distinct node
@@ -22,8 +89,10 @@ size_t tryDistinctReadInOrder(QueryPlan::Node * parent_node)
/// walk through the plan
/// (1) check if nodes below preliminary distinct preserve sorting
/// (2) gather transforming steps to update their sorting properties later
+ /// (3) gather actions DAG to find original names for columns in distinct step later
std::vector steps_to_update;
QueryPlan::Node * node = parent_node;
+ std::vector dag_stack;
while (!node->children.empty())
{
auto * step = dynamic_cast(node->step.get());
@@ -36,6 +105,11 @@ size_t tryDistinctReadInOrder(QueryPlan::Node * parent_node)
steps_to_update.push_back(step);
+ if (const auto * const expr = typeid_cast(step); expr)
+ dag_stack.push_back(expr->getExpression());
+ else if (const auto * const filter = typeid_cast(step); filter)
+ dag_stack.push_back(filter->getExpression());
+
node = node->children.front();
}
@@ -50,28 +124,24 @@ size_t tryDistinctReadInOrder(QueryPlan::Node * parent_node)
if (read_from_merge_tree->getOutputStream().sort_description.empty())
return 0;
- /// find non-const columns in DISTINCT
+ /// get original names for DISTINCT columns
const ColumnsWithTypeAndName & distinct_columns = pre_distinct->getOutputStream().header.getColumnsWithTypeAndName();
- std::set non_const_columns;
- for (const auto & column : distinct_columns)
- {
- if (!isColumnConst(*column.column))
- non_const_columns.emplace(column.name);
- }
+ auto original_distinct_columns = getOriginalDistinctColumns(distinct_columns, dag_stack);
- const Names& sorting_key_columns = read_from_merge_tree->getStorageMetadata()->getSortingKeyColumns();
/// check if DISTINCT has the same columns as sorting key
+ const Names & sorting_key_columns = read_from_merge_tree->getStorageMetadata()->getSortingKeyColumns();
size_t number_of_sorted_distinct_columns = 0;
for (const auto & column_name : sorting_key_columns)
{
- if (non_const_columns.end() == non_const_columns.find(column_name))
+ if (!original_distinct_columns.contains(column_name))
break;
++number_of_sorted_distinct_columns;
}
+
/// apply optimization only when distinct columns match or form prefix of sorting key
/// todo: check if reading in order optimization would be beneficial when sorting key is prefix of columns in DISTINCT
- if (number_of_sorted_distinct_columns != non_const_columns.size())
+ if (number_of_sorted_distinct_columns != original_distinct_columns.size())
return 0;
/// check if another read in order optimization is already applied
diff --git a/src/Processors/QueryPlan/Optimizations/removeRedundantDistinct.cpp b/src/Processors/QueryPlan/Optimizations/removeRedundantDistinct.cpp
index 02725dc3122..c9a0270f6e7 100644
--- a/src/Processors/QueryPlan/Optimizations/removeRedundantDistinct.cpp
+++ b/src/Processors/QueryPlan/Optimizations/removeRedundantDistinct.cpp
@@ -11,6 +11,7 @@
#include
#include
#include
+#include
#include
#include
#include
@@ -100,24 +101,29 @@ namespace
logDebug("aggregation_keys", aggregation_keys);
logDebug("aggregation_keys size", aggregation_keys.size());
logDebug("distinct_columns size", distinct_columns.size());
- if (aggregation_keys.size() != distinct_columns.size())
- return false;
- /// compare columns of two DISTINCTs
+ std::set original_distinct_columns;
for (const auto & column : distinct_columns)
{
logDebug("distinct column name", column);
const auto * alias_node = getOriginalNodeForOutputAlias(path_actions, String(column));
if (!alias_node)
{
- logDebug("original name for alias is not found for", column);
- return false;
+ logDebug("original name for alias is not found", column);
+ original_distinct_columns.insert(column);
}
-
- logDebug("alias result name", alias_node->result_name);
- if (std::find(cbegin(aggregation_keys), cend(aggregation_keys), alias_node->result_name) == aggregation_keys.cend())
+ else
{
- logDebug("alias result name is not found in aggregation keys", alias_node->result_name);
+ logDebug("alias result name", alias_node->result_name);
+ original_distinct_columns.insert(alias_node->result_name);
+ }
+ }
+ /// if aggregation keys are part of distinct columns then rows already distinct
+ for (const auto & key : aggregation_keys)
+ {
+ if (!original_distinct_columns.contains(key))
+ {
+ logDebug("aggregation key NOT found: {}", key);
return false;
}
}
@@ -176,7 +182,7 @@ namespace
while (!node->children.empty())
{
const IQueryPlanStep * current_step = node->step.get();
- if (typeid_cast(current_step))
+ if (typeid_cast