diff --git a/.github/workflows/backport_branches.yml b/.github/workflows/backport_branches.yml
index 0d81a7b303c..d69168b01ee 100644
--- a/.github/workflows/backport_branches.yml
+++ b/.github/workflows/backport_branches.yml
@@ -349,6 +349,13 @@ jobs:
with:
clear-repository: true
submodules: true
+ - name: Apply sparse checkout for contrib # in order to check that it doesn't break build
+ run: |
+ rm -rf "$GITHUB_WORKSPACE/contrib" && echo 'removed'
+ git -C "$GITHUB_WORKSPACE" checkout . && echo 'restored'
+ "$GITHUB_WORKSPACE/contrib/update-submodules.sh" && echo 'OK'
+ du -hs "$GITHUB_WORKSPACE/contrib" ||:
+ find "$GITHUB_WORKSPACE/contrib" -type f | wc -l ||:
- name: Build
run: |
sudo rm -fr "$TEMP_PATH"
diff --git a/.github/workflows/master.yml b/.github/workflows/master.yml
index ecd5b85d320..1182481c897 100644
--- a/.github/workflows/master.yml
+++ b/.github/workflows/master.yml
@@ -487,6 +487,13 @@ jobs:
with:
clear-repository: true
submodules: true
+ - name: Apply sparse checkout for contrib # in order to check that it doesn't break build
+ run: |
+ rm -rf "$GITHUB_WORKSPACE/contrib" && echo 'removed'
+ git -C "$GITHUB_WORKSPACE" checkout . && echo 'restored'
+ "$GITHUB_WORKSPACE/contrib/update-submodules.sh" && echo 'OK'
+ du -hs "$GITHUB_WORKSPACE/contrib" ||:
+ find "$GITHUB_WORKSPACE/contrib" -type f | wc -l ||:
- name: Build
run: |
sudo rm -fr "$TEMP_PATH"
diff --git a/.github/workflows/pull_request.yml b/.github/workflows/pull_request.yml
index ab0cbbb7ec1..6fccc0542b7 100644
--- a/.github/workflows/pull_request.yml
+++ b/.github/workflows/pull_request.yml
@@ -550,6 +550,13 @@ jobs:
with:
clear-repository: true
submodules: true
+ - name: Apply sparse checkout for contrib # in order to check that it doesn't break build
+ run: |
+ rm -rf "$GITHUB_WORKSPACE/contrib" && echo 'removed'
+ git -C "$GITHUB_WORKSPACE" checkout . && echo 'restored'
+ "$GITHUB_WORKSPACE/contrib/update-submodules.sh" && echo 'OK'
+ du -hs "$GITHUB_WORKSPACE/contrib" ||:
+ find "$GITHUB_WORKSPACE/contrib" -type f | wc -l ||:
- name: Build
run: |
sudo rm -fr "$TEMP_PATH"
diff --git a/.github/workflows/release_branches.yml b/.github/workflows/release_branches.yml
index 1282dbef50b..21284815583 100644
--- a/.github/workflows/release_branches.yml
+++ b/.github/workflows/release_branches.yml
@@ -406,6 +406,13 @@ jobs:
with:
clear-repository: true
submodules: true
+ - name: Apply sparse checkout for contrib # in order to check that it doesn't break build
+ run: |
+ rm -rf "$GITHUB_WORKSPACE/contrib" && echo 'removed'
+ git -C "$GITHUB_WORKSPACE" checkout . && echo 'restored'
+ "$GITHUB_WORKSPACE/contrib/update-submodules.sh" && echo 'OK'
+ du -hs "$GITHUB_WORKSPACE/contrib" ||:
+ find "$GITHUB_WORKSPACE/contrib" -type f | wc -l ||:
- name: Build
run: |
sudo rm -fr "$TEMP_PATH"
diff --git a/README.md b/README.md
index 61d840ecd34..cee3a945262 100644
--- a/README.md
+++ b/README.md
@@ -1,4 +1,4 @@
-[![ClickHouse — open source distributed column-oriented DBMS](https://github.com/ClickHouse/clickhouse-presentations/raw/master/images/logo-400x240.png)](https://clickhouse.com)
+[](https://clickhouse.com?utm_source=github)
ClickHouse® is an open-source column-oriented database management system that allows generating analytical data reports in real-time.
diff --git a/base/base/wide_integer_impl.h b/base/base/wide_integer_impl.h
index 7cdb527f9cf..ed4570d5e3f 100644
--- a/base/base/wide_integer_impl.h
+++ b/base/base/wide_integer_impl.h
@@ -155,13 +155,13 @@ struct common_type, Arithmetic>
std::is_floating_point_v,
Arithmetic,
std::conditional_t<
- sizeof(Arithmetic) < Bits * sizeof(long),
+ sizeof(Arithmetic) * 8 < Bits,
wide::integer,
std::conditional_t<
- Bits * sizeof(long) < sizeof(Arithmetic),
+ Bits < sizeof(Arithmetic) * 8,
Arithmetic,
std::conditional_t<
- Bits * sizeof(long) == sizeof(Arithmetic) && (std::is_same_v || std::is_signed_v),
+ Bits == sizeof(Arithmetic) * 8 && (std::is_same_v || std::is_signed_v),
Arithmetic,
wide::integer>>>>;
};
diff --git a/contrib/cctz b/contrib/cctz
index 7c78edd52b4..5e05432420f 160000
--- a/contrib/cctz
+++ b/contrib/cctz
@@ -1 +1 @@
-Subproject commit 7c78edd52b4d65acc103c2f195818ffcabe6fe0d
+Subproject commit 5e05432420f9692418e2e12aff09859e420b14a2
diff --git a/contrib/krb5 b/contrib/krb5
index 9453aec0d50..b56ce6ba690 160000
--- a/contrib/krb5
+++ b/contrib/krb5
@@ -1 +1 @@
-Subproject commit 9453aec0d50e5aff9b189051611b321b40935d02
+Subproject commit b56ce6ba690e1f320df1a64afa34980c3e462617
diff --git a/contrib/krb5-cmake/CMakeLists.txt b/contrib/krb5-cmake/CMakeLists.txt
index 93b90c15201..44058456ed4 100644
--- a/contrib/krb5-cmake/CMakeLists.txt
+++ b/contrib/krb5-cmake/CMakeLists.txt
@@ -15,10 +15,6 @@ if(NOT AWK_PROGRAM)
message(FATAL_ERROR "You need the awk program to build ClickHouse with krb5 enabled.")
endif()
-if (NOT (ENABLE_OPENSSL OR ENABLE_OPENSSL_DYNAMIC))
- add_compile_definitions(USE_BORINGSSL=1)
-endif ()
-
set(KRB5_SOURCE_DIR "${ClickHouse_SOURCE_DIR}/contrib/krb5/src")
set(KRB5_ET_BIN_DIR "${CMAKE_CURRENT_BINARY_DIR}/include_private")
@@ -162,6 +158,11 @@ set(ALL_SRCS
"${KRB5_SOURCE_DIR}/lib/crypto/builtin/kdf.c"
"${KRB5_SOURCE_DIR}/lib/crypto/builtin/cmac.c"
+ "${KRB5_SOURCE_DIR}/lib/crypto/builtin/des/des_keys.c"
+ "${KRB5_SOURCE_DIR}/lib/crypto/builtin/des/f_parity.c"
+ "${KRB5_SOURCE_DIR}/lib/crypto/builtin/enc_provider/rc4.c"
+ "${KRB5_SOURCE_DIR}/lib/crypto/builtin/hash_provider/hash_md4.c"
+ "${KRB5_SOURCE_DIR}/lib/crypto/builtin/md4/md4.c"
"${KRB5_SOURCE_DIR}/lib/crypto/krb/prng.c"
"${KRB5_SOURCE_DIR}/lib/crypto/krb/enc_dk_cmac.c"
# "${KRB5_SOURCE_DIR}/lib/crypto/krb/crc32.c"
@@ -226,7 +227,6 @@ set(ALL_SRCS
# "${KRB5_SOURCE_DIR}/lib/crypto/openssl/enc_provider/des.c"
"${KRB5_SOURCE_DIR}/lib/crypto/openssl/enc_provider/rc4.c"
"${KRB5_SOURCE_DIR}/lib/crypto/openssl/enc_provider/des3.c"
- #"${KRB5_SOURCE_DIR}/lib/crypto/openssl/enc_provider/camellia.c"
"${KRB5_SOURCE_DIR}/lib/crypto/openssl/cmac.c"
"${KRB5_SOURCE_DIR}/lib/crypto/openssl/sha256.c"
"${KRB5_SOURCE_DIR}/lib/crypto/openssl/hmac.c"
@@ -474,6 +474,14 @@ set(ALL_SRCS
"${KRB5_SOURCE_DIR}/lib/krb5/krb5_libinit.c"
)
+if (NOT (ENABLE_OPENSSL OR ENABLE_OPENSSL_DYNAMIC))
+ add_compile_definitions(USE_BORINGSSL=1)
+else()
+ set(ALL_SRCS ${ALL_SRCS}
+ "${KRB5_SOURCE_DIR}/lib/crypto/openssl/enc_provider/camellia.c"
+ )
+endif()
+
add_custom_command(
OUTPUT "${CMAKE_CURRENT_BINARY_DIR}/compile_et"
COMMAND /bin/sh
@@ -673,6 +681,7 @@ target_include_directories(_krb5 PRIVATE
"${KRB5_SOURCE_DIR}/lib/gssapi/krb5"
"${KRB5_SOURCE_DIR}/lib/gssapi/spnego"
"${KRB5_SOURCE_DIR}/util/et"
+ "${KRB5_SOURCE_DIR}/lib/crypto/builtin/md4"
"${KRB5_SOURCE_DIR}/lib/crypto/openssl"
"${KRB5_SOURCE_DIR}/lib/crypto/krb"
"${KRB5_SOURCE_DIR}/util/profile"
diff --git a/contrib/sparse-checkout/setup-sparse-checkout.sh b/contrib/sparse-checkout/setup-sparse-checkout.sh
new file mode 100755
index 00000000000..3feba6c5adf
--- /dev/null
+++ b/contrib/sparse-checkout/setup-sparse-checkout.sh
@@ -0,0 +1,19 @@
+#!/bin/sh
+
+set -e
+
+git config submodule."contrib/llvm-project".update '!../sparse-checkout/update-llvm-project.sh'
+git config submodule."contrib/croaring".update '!../sparse-checkout/update-croaring.sh'
+git config submodule."contrib/aws".update '!../sparse-checkout/update-aws.sh'
+git config submodule."contrib/openssl".update '!../sparse-checkout/update-openssl.sh'
+git config submodule."contrib/boringssl".update '!../sparse-checkout/update-boringssl.sh'
+git config submodule."contrib/arrow".update '!../sparse-checkout/update-arrow.sh'
+git config submodule."contrib/grpc".update '!../sparse-checkout/update-grpc.sh'
+git config submodule."contrib/orc".update '!../sparse-checkout/update-orc.sh'
+git config submodule."contrib/h3".update '!../sparse-checkout/update-h3.sh'
+git config submodule."contrib/icu".update '!../sparse-checkout/update-icu.sh'
+git config submodule."contrib/boost".update '!../sparse-checkout/update-boost.sh'
+git config submodule."contrib/aws-s2n-tls".update '!../sparse-checkout/update-aws-s2n-tls.sh'
+git config submodule."contrib/protobuf".update '!../sparse-checkout/update-protobuf.sh'
+git config submodule."contrib/libxml2".update '!../sparse-checkout/update-libxml2.sh'
+git config submodule."contrib/brotli".update '!../sparse-checkout/update-brotli.sh'
diff --git a/contrib/sparse-checkout/update-arrow.sh b/contrib/sparse-checkout/update-arrow.sh
new file mode 100755
index 00000000000..e004b60da02
--- /dev/null
+++ b/contrib/sparse-checkout/update-arrow.sh
@@ -0,0 +1,12 @@
+#!/bin/sh
+
+echo "Using sparse checkout for arrow"
+
+FILES_TO_CHECKOUT=$(git rev-parse --git-dir)/info/sparse-checkout
+echo '/*' > $FILES_TO_CHECKOUT
+echo '!/*/*' >> $FILES_TO_CHECKOUT
+echo '/cpp/*' >> $FILES_TO_CHECKOUT
+
+git config core.sparsecheckout true
+git checkout $1
+git read-tree -mu HEAD
diff --git a/contrib/sparse-checkout/update-aws-s2n-tls.sh b/contrib/sparse-checkout/update-aws-s2n-tls.sh
new file mode 100755
index 00000000000..4d65dc4b81d
--- /dev/null
+++ b/contrib/sparse-checkout/update-aws-s2n-tls.sh
@@ -0,0 +1,13 @@
+#!/bin/sh
+
+echo "Using sparse checkout for aws-s2n-tls"
+
+FILES_TO_CHECKOUT=$(git rev-parse --git-dir)/info/sparse-checkout
+echo '/*' > $FILES_TO_CHECKOUT
+echo '!/test/*' >> $FILES_TO_CHECKOUT
+echo '!/docs/*' >> $FILES_TO_CHECKOUT
+echo '!/compliance/*' >> $FILES_TO_CHECKOUT
+
+git config core.sparsecheckout true
+git checkout $1
+git read-tree -mu HEAD
diff --git a/contrib/sparse-checkout/update-aws.sh b/contrib/sparse-checkout/update-aws.sh
new file mode 100755
index 00000000000..c8d4c5a89c2
--- /dev/null
+++ b/contrib/sparse-checkout/update-aws.sh
@@ -0,0 +1,13 @@
+#!/bin/sh
+
+echo "Using sparse checkout for aws"
+
+FILES_TO_CHECKOUT=$(git rev-parse --git-dir)/info/sparse-checkout
+echo '/*' > $FILES_TO_CHECKOUT
+echo '!/*/*' >> $FILES_TO_CHECKOUT
+echo '/aws-cpp-sdk-core/*' >> $FILES_TO_CHECKOUT
+echo '/aws-cpp-sdk-s3/*' >> $FILES_TO_CHECKOUT
+
+git config core.sparsecheckout true
+git checkout $1
+git read-tree -mu HEAD
diff --git a/contrib/sparse-checkout/update-boost.sh b/contrib/sparse-checkout/update-boost.sh
new file mode 100755
index 00000000000..9bd1f6c1796
--- /dev/null
+++ b/contrib/sparse-checkout/update-boost.sh
@@ -0,0 +1,85 @@
+#!/bin/sh
+
+echo "Using sparse checkout for boost"
+
+FILES_TO_CHECKOUT=$(git rev-parse --git-dir)/info/sparse-checkout
+echo '/*' > $FILES_TO_CHECKOUT
+echo '!/*/*' >> $FILES_TO_CHECKOUT
+echo '/boost/*' > $FILES_TO_CHECKOUT
+echo '!/boost/*/*' >> $FILES_TO_CHECKOUT
+echo '/boost/algorithm/*' >> $FILES_TO_CHECKOUT
+echo '/boost/any/*' >> $FILES_TO_CHECKOUT
+echo '/boost/atomic/*' >> $FILES_TO_CHECKOUT
+echo '/boost/assert/*' >> $FILES_TO_CHECKOUT
+echo '/boost/bind/*' >> $FILES_TO_CHECKOUT
+echo '/boost/concept/*' >> $FILES_TO_CHECKOUT
+echo '/boost/config/*' >> $FILES_TO_CHECKOUT
+echo '/boost/container/*' >> $FILES_TO_CHECKOUT
+echo '/boost/container_hash/*' >> $FILES_TO_CHECKOUT
+echo '/boost/context/*' >> $FILES_TO_CHECKOUT
+echo '/boost/convert/*' >> $FILES_TO_CHECKOUT
+echo '/boost/coroutine/*' >> $FILES_TO_CHECKOUT
+echo '/boost/core/*' >> $FILES_TO_CHECKOUT
+echo '/boost/detail/*' >> $FILES_TO_CHECKOUT
+echo '/boost/dynamic_bitset/*' >> $FILES_TO_CHECKOUT
+echo '/boost/exception/*' >> $FILES_TO_CHECKOUT
+echo '/boost/filesystem/*' >> $FILES_TO_CHECKOUT
+echo '/boost/functional/*' >> $FILES_TO_CHECKOUT
+echo '/boost/function/*' >> $FILES_TO_CHECKOUT
+echo '/boost/geometry/*' >> $FILES_TO_CHECKOUT
+echo '/boost/graph/*' >> $FILES_TO_CHECKOUT
+echo '/boost/heap/*' >> $FILES_TO_CHECKOUT
+echo '/boost/integer/*' >> $FILES_TO_CHECKOUT
+echo '/boost/intrusive/*' >> $FILES_TO_CHECKOUT
+echo '/boost/iostreams/*' >> $FILES_TO_CHECKOUT
+echo '/boost/io/*' >> $FILES_TO_CHECKOUT
+echo '/boost/iterator/*' >> $FILES_TO_CHECKOUT
+echo '/boost/math/*' >> $FILES_TO_CHECKOUT
+echo '/boost/move/*' >> $FILES_TO_CHECKOUT
+echo '/boost/mpl/*' >> $FILES_TO_CHECKOUT
+echo '/boost/multi_index/*' >> $FILES_TO_CHECKOUT
+echo '/boost/multiprecision/*' >> $FILES_TO_CHECKOUT
+echo '/boost/numeric/*' >> $FILES_TO_CHECKOUT
+echo '/boost/predef/*' >> $FILES_TO_CHECKOUT
+echo '/boost/preprocessor/*' >> $FILES_TO_CHECKOUT
+echo '/boost/program_options/*' >> $FILES_TO_CHECKOUT
+echo '/boost/range/*' >> $FILES_TO_CHECKOUT
+echo '/boost/regex/*' >> $FILES_TO_CHECKOUT
+echo '/boost/smart_ptr/*' >> $FILES_TO_CHECKOUT
+echo '/boost/type_index/*' >> $FILES_TO_CHECKOUT
+echo '/boost/type_traits/*' >> $FILES_TO_CHECKOUT
+echo '/boost/system/*' >> $FILES_TO_CHECKOUT
+echo '/boost/tti/*' >> $FILES_TO_CHECKOUT
+echo '/boost/utility/*' >> $FILES_TO_CHECKOUT
+echo '/boost/lexical_cast/*' >> $FILES_TO_CHECKOUT
+echo '/boost/optional/*' >> $FILES_TO_CHECKOUT
+echo '/boost/property_map/*' >> $FILES_TO_CHECKOUT
+echo '/boost/pending/*' >> $FILES_TO_CHECKOUT
+echo '/boost/multi_array/*' >> $FILES_TO_CHECKOUT
+echo '/boost/tuple/*' >> $FILES_TO_CHECKOUT
+echo '/boost/icl/*' >> $FILES_TO_CHECKOUT
+echo '/boost/unordered/*' >> $FILES_TO_CHECKOUT
+echo '/boost/typeof/*' >> $FILES_TO_CHECKOUT
+echo '/boost/parameter/*' >> $FILES_TO_CHECKOUT
+echo '/boost/mp11/*' >> $FILES_TO_CHECKOUT
+echo '/boost/archive/*' >> $FILES_TO_CHECKOUT
+echo '/boost/function_types/*' >> $FILES_TO_CHECKOUT
+echo '/boost/serialization/*' >> $FILES_TO_CHECKOUT
+echo '/boost/fusion/*' >> $FILES_TO_CHECKOUT
+echo '/boost/variant/*' >> $FILES_TO_CHECKOUT
+echo '/boost/format/*' >> $FILES_TO_CHECKOUT
+echo '/boost/locale/*' >> $FILES_TO_CHECKOUT
+echo '/boost/random/*' >> $FILES_TO_CHECKOUT
+echo '/boost/spirit/*' >> $FILES_TO_CHECKOUT
+echo '/boost/uuid/*' >> $FILES_TO_CHECKOUT
+echo '/boost/xpressive/*' >> $FILES_TO_CHECKOUT
+echo '/boost/asio/*' >> $FILES_TO_CHECKOUT
+echo '/boost/circular_buffer/*' >> $FILES_TO_CHECKOUT
+echo '/boost/proto/*' >> $FILES_TO_CHECKOUT
+echo '/boost/qvm/*' >> $FILES_TO_CHECKOUT
+echo '/boost/property_tree/*' >> $FILES_TO_CHECKOUT
+echo '/libs/*' >> $FILES_TO_CHECKOUT
+
+git config core.sparsecheckout true
+git checkout $1
+git read-tree -mu HEAD
\ No newline at end of file
diff --git a/contrib/sparse-checkout/update-boringssl.sh b/contrib/sparse-checkout/update-boringssl.sh
new file mode 100755
index 00000000000..f877a78afed
--- /dev/null
+++ b/contrib/sparse-checkout/update-boringssl.sh
@@ -0,0 +1,14 @@
+#!/bin/sh
+
+echo "Using sparse checkout for boringsll"
+
+FILES_TO_CHECKOUT=$(git rev-parse --git-dir)/info/sparse-checkout
+echo '/*' > $FILES_TO_CHECKOUT
+echo '!/fuzz/*' >> $FILES_TO_CHECKOUT
+echo '!/crypto/cipher_extra/test/*' >> $FILES_TO_CHECKOUT
+echo '!/third_party/wycheproof_testvectors/*' >> $FILES_TO_CHECKOUT
+echo '!/third_party/googletest/*' >> $FILES_TO_CHECKOUT
+
+git config core.sparsecheckout true
+git checkout $1
+git read-tree -mu HEAD
diff --git a/contrib/sparse-checkout/update-brotli.sh b/contrib/sparse-checkout/update-brotli.sh
new file mode 100755
index 00000000000..8784f5e4125
--- /dev/null
+++ b/contrib/sparse-checkout/update-brotli.sh
@@ -0,0 +1,12 @@
+#!/bin/sh
+
+echo "Using sparse checkout for brotli"
+
+FILES_TO_CHECKOUT=$(git rev-parse --git-dir)/info/sparse-checkout
+echo '/*' > $FILES_TO_CHECKOUT
+echo '!/*/*' >> $FILES_TO_CHECKOUT
+echo '/c/*' >> $FILES_TO_CHECKOUT
+
+git config core.sparsecheckout true
+git checkout $1
+git read-tree -mu HEAD
diff --git a/contrib/sparse-checkout/update-croaring.sh b/contrib/sparse-checkout/update-croaring.sh
new file mode 100755
index 00000000000..9b7bba19df4
--- /dev/null
+++ b/contrib/sparse-checkout/update-croaring.sh
@@ -0,0 +1,12 @@
+#!/bin/sh
+
+echo "Using sparse checkout for croaring"
+
+FILES_TO_CHECKOUT=$(git rev-parse --git-dir)/info/sparse-checkout
+echo '/*' > $FILES_TO_CHECKOUT
+echo '!/benchmarks/*' >> $FILES_TO_CHECKOUT
+echo '!/tests/*' >> $FILES_TO_CHECKOUT
+
+git config core.sparsecheckout true
+git checkout $1
+git read-tree -mu HEAD
diff --git a/contrib/sparse-checkout/update-grpc.sh b/contrib/sparse-checkout/update-grpc.sh
new file mode 100755
index 00000000000..38934fdbc1b
--- /dev/null
+++ b/contrib/sparse-checkout/update-grpc.sh
@@ -0,0 +1,22 @@
+#!/bin/sh
+
+echo "Using sparse checkout for grpc"
+
+FILES_TO_CHECKOUT=$(git rev-parse --git-dir)/info/sparse-checkout
+echo '/*' > $FILES_TO_CHECKOUT
+echo '!/test/*' >> $FILES_TO_CHECKOUT
+echo '/test/build/*' >> $FILES_TO_CHECKOUT
+echo '!/tools/*' >> $FILES_TO_CHECKOUT
+echo '/tools/codegen/*' >> $FILES_TO_CHECKOUT
+echo '!/examples/*' >> $FILES_TO_CHECKOUT
+echo '!/doc/*' >> $FILES_TO_CHECKOUT
+# FIXME why do we need csharp?
+#echo '!/src/csharp/*' >> $FILES_TO_CHECKOUT
+echo '!/src/python/*' >> $FILES_TO_CHECKOUT
+echo '!/src/objective-c/*' >> $FILES_TO_CHECKOUT
+echo '!/src/php/*' >> $FILES_TO_CHECKOUT
+echo '!/src/ruby/*' >> $FILES_TO_CHECKOUT
+
+git config core.sparsecheckout true
+git checkout $1
+git read-tree -mu HEAD
diff --git a/contrib/sparse-checkout/update-h3.sh b/contrib/sparse-checkout/update-h3.sh
new file mode 100755
index 00000000000..127885f89cc
--- /dev/null
+++ b/contrib/sparse-checkout/update-h3.sh
@@ -0,0 +1,12 @@
+#!/bin/sh
+
+echo "Using sparse checkout for h3"
+
+FILES_TO_CHECKOUT=$(git rev-parse --git-dir)/info/sparse-checkout
+echo '/*' > $FILES_TO_CHECKOUT
+echo '!/tests/*' >> $FILES_TO_CHECKOUT
+echo '!/website/*' >> $FILES_TO_CHECKOUT
+
+git config core.sparsecheckout true
+git checkout $1
+git read-tree -mu HEAD
diff --git a/contrib/sparse-checkout/update-icu.sh b/contrib/sparse-checkout/update-icu.sh
new file mode 100755
index 00000000000..76af39f07a4
--- /dev/null
+++ b/contrib/sparse-checkout/update-icu.sh
@@ -0,0 +1,12 @@
+#!/bin/sh
+
+echo "Using sparse checkout for icu"
+
+FILES_TO_CHECKOUT=$(git rev-parse --git-dir)/info/sparse-checkout
+echo '/*' > $FILES_TO_CHECKOUT
+echo '!/*/*' >> $FILES_TO_CHECKOUT
+echo '/icu4c/*' >> $FILES_TO_CHECKOUT
+
+git config core.sparsecheckout true
+git checkout $1
+git read-tree -mu HEAD
\ No newline at end of file
diff --git a/contrib/sparse-checkout/update-libxml2.sh b/contrib/sparse-checkout/update-libxml2.sh
new file mode 100755
index 00000000000..24faf11eec9
--- /dev/null
+++ b/contrib/sparse-checkout/update-libxml2.sh
@@ -0,0 +1,16 @@
+#!/bin/sh
+
+echo "Using sparse checkout for libxml2"
+
+FILES_TO_CHECKOUT=$(git rev-parse --git-dir)/info/sparse-checkout
+echo '/*' > $FILES_TO_CHECKOUT
+echo '!/result/*' >> $FILES_TO_CHECKOUT
+echo '!/test/*' >> $FILES_TO_CHECKOUT
+echo '!/doc/*' >> $FILES_TO_CHECKOUT
+echo '!/os400/*' >> $FILES_TO_CHECKOUT
+echo '!/fuzz/*' >> $FILES_TO_CHECKOUT
+echo '!/python/*' >> $FILES_TO_CHECKOUT
+
+git config core.sparsecheckout true
+git checkout $1
+git read-tree -mu HEAD
diff --git a/contrib/sparse-checkout/update-llvm-project.sh b/contrib/sparse-checkout/update-llvm-project.sh
new file mode 100755
index 00000000000..53c3b691d3a
--- /dev/null
+++ b/contrib/sparse-checkout/update-llvm-project.sh
@@ -0,0 +1,27 @@
+#!/bin/sh
+
+echo "Using sparse checkout for llvm-project"
+
+FILES_TO_CHECKOUT=$(git rev-parse --git-dir)/info/sparse-checkout
+echo '/*' > $FILES_TO_CHECKOUT
+echo '!/*/*' >> $FILES_TO_CHECKOUT
+echo '/llvm/*' >> $FILES_TO_CHECKOUT
+echo '!/llvm/*/*' >> $FILES_TO_CHECKOUT
+echo '/llvm/cmake/*' >> $FILES_TO_CHECKOUT
+echo '/llvm/projects/*' >> $FILES_TO_CHECKOUT
+echo '/llvm/include/*' >> $FILES_TO_CHECKOUT
+echo '/llvm/lib/*' >> $FILES_TO_CHECKOUT
+echo '/llvm/utils/TableGen/*' >> $FILES_TO_CHECKOUT
+echo '/libcxxabi/*' >> $FILES_TO_CHECKOUT
+echo '!/libcxxabi/test/*' >> $FILES_TO_CHECKOUT
+echo '/libcxx/*' >> $FILES_TO_CHECKOUT
+echo '!/libcxx/test/*' >> $FILES_TO_CHECKOUT
+echo '/libunwind/*' >> $FILES_TO_CHECKOUT
+echo '!/libunwind/test/*' >> $FILES_TO_CHECKOUT
+echo '/compiler-rt/*' >> $FILES_TO_CHECKOUT
+echo '!/compiler-rt/test/*' >> $FILES_TO_CHECKOUT
+echo '/cmake/*' >> $FILES_TO_CHECKOUT
+
+git config core.sparsecheckout true
+git checkout $1
+git read-tree -mu HEAD
diff --git a/contrib/sparse-checkout/update-openssl.sh b/contrib/sparse-checkout/update-openssl.sh
new file mode 100755
index 00000000000..33e19f43cb7
--- /dev/null
+++ b/contrib/sparse-checkout/update-openssl.sh
@@ -0,0 +1,15 @@
+#!/bin/sh
+
+echo "Using sparse checkout for openssl"
+
+FILES_TO_CHECKOUT=$(git rev-parse --git-dir)/info/sparse-checkout
+echo '/*' > $FILES_TO_CHECKOUT
+echo '!/fuzz/*' >> $FILES_TO_CHECKOUT
+echo '!/test/*' >> $FILES_TO_CHECKOUT
+echo '!/doc/*' >> $FILES_TO_CHECKOUT
+echo '!/providers/*' >> $FILES_TO_CHECKOUT
+echo '!/apps/*' >> $FILES_TO_CHECKOUT
+
+git config core.sparsecheckout true
+git checkout $1
+git read-tree -mu HEAD
diff --git a/contrib/sparse-checkout/update-orc.sh b/contrib/sparse-checkout/update-orc.sh
new file mode 100755
index 00000000000..57ab57a8d52
--- /dev/null
+++ b/contrib/sparse-checkout/update-orc.sh
@@ -0,0 +1,13 @@
+#!/bin/sh
+
+echo "Using sparse checkout for orc"
+
+FILES_TO_CHECKOUT=$(git rev-parse --git-dir)/info/sparse-checkout
+echo '/*' > $FILES_TO_CHECKOUT
+echo '!/*/*' >> $FILES_TO_CHECKOUT
+echo '/c++/*' >> $FILES_TO_CHECKOUT
+echo '/proto/*' >> $FILES_TO_CHECKOUT
+
+git config core.sparsecheckout true
+git checkout $1
+git read-tree -mu HEAD
diff --git a/contrib/sparse-checkout/update-protobuf.sh b/contrib/sparse-checkout/update-protobuf.sh
new file mode 100755
index 00000000000..31c037c2cf5
--- /dev/null
+++ b/contrib/sparse-checkout/update-protobuf.sh
@@ -0,0 +1,13 @@
+#!/bin/sh
+
+echo "Using sparse checkout for protobuf"
+
+FILES_TO_CHECKOUT=$(git rev-parse --git-dir)/info/sparse-checkout
+echo '!/*' > $FILES_TO_CHECKOUT
+echo '/*/*' >> $FILES_TO_CHECKOUT
+echo '/src/*' >> $FILES_TO_CHECKOUT
+echo '/cmake/*' >> $FILES_TO_CHECKOUT
+
+git config core.sparsecheckout true
+git checkout $1
+git read-tree -mu HEAD
diff --git a/contrib/update-submodules.sh b/contrib/update-submodules.sh
new file mode 100755
index 00000000000..c94681e6240
--- /dev/null
+++ b/contrib/update-submodules.sh
@@ -0,0 +1,11 @@
+#!/bin/sh
+
+set -e
+
+WORKDIR=$(dirname "$0")
+WORKDIR=$(readlink -f "${WORKDIR}")
+
+"$WORKDIR/sparse-checkout/setup-sparse-checkout.sh"
+git submodule init
+git submodule sync
+git submodule update --depth=1
diff --git a/docs/en/development/developer-instruction.md b/docs/en/development/developer-instruction.md
index ace5ab79bb4..6bcdadeb1eb 100644
--- a/docs/en/development/developer-instruction.md
+++ b/docs/en/development/developer-instruction.md
@@ -39,9 +39,15 @@ Next, you need to download the source files onto your working machine. This is c
In the command line terminal run:
- git clone --recursive --shallow-submodules git@github.com:your_github_username/ClickHouse.git
+ git clone --shallow-submodules git@github.com:your_github_username/ClickHouse.git
cd ClickHouse
+Or (if you'd like to use sparse checkout for submodules and avoid checking out unneeded files):
+
+ git clone git@github.com:your_github_username/ClickHouse.git
+ cd ClickHouse
+ ./contrib/update-submodules.sh
+
Note: please, substitute *your_github_username* with what is appropriate!
This command will create a directory `ClickHouse` containing the working copy of the project.
diff --git a/docs/en/engines/table-engines/mergetree-family/replication.md b/docs/en/engines/table-engines/mergetree-family/replication.md
index e9ca87916a0..e14ba5699e4 100644
--- a/docs/en/engines/table-engines/mergetree-family/replication.md
+++ b/docs/en/engines/table-engines/mergetree-family/replication.md
@@ -8,11 +8,18 @@ sidebar_label: Data Replication
:::note
In ClickHouse Cloud replication is managed for you. Please create your tables without adding arguments. For example, in the text below you would replace:
+
+```sql
+ENGINE = ReplicatedReplacingMergeTree(
+ '/clickhouse/tables/{shard}/table_name',
+ '{replica}',
+ ver
+)
```
-ENGINE = ReplicatedReplacingMergeTree('/clickhouse/tables/{shard}/table_name', '{replica}', ver)
-```
+
with:
-```
+
+```sql
ENGINE = ReplicatedReplacingMergeTree
```
:::
diff --git a/docs/en/interfaces/formats.md b/docs/en/interfaces/formats.md
index c18c63d13c2..b4823d5ebaf 100644
--- a/docs/en/interfaces/formats.md
+++ b/docs/en/interfaces/formats.md
@@ -78,7 +78,7 @@ The supported formats are:
| [Null](#null) | ✗ | ✔ |
| [XML](#xml) | ✗ | ✔ |
| [CapnProto](#capnproto) | ✔ | ✔ |
-| [LineAsString](#lineasstring) | ✔ | ✗ |
+| [LineAsString](#lineasstring) | ✔ | ✔ |
| [Regexp](#data-format-regexp) | ✔ | ✗ |
| [RawBLOB](#rawblob) | ✔ | ✔ |
| [MsgPack](#msgpack) | ✔ | ✔ |
@@ -1235,8 +1235,8 @@ For output it uses the following correspondence between ClickHouse types and BSO
| ClickHouse type | BSON Type |
|-----------------------------------------------------------------------------------------------------------------------|---------------------------------------------------------------------------------------------------------------|
| [Bool](/docs/en/sql-reference/data-types/boolean.md) | `\x08` boolean |
-| [Int8/UInt8](/docs/en/sql-reference/data-types/int-uint.md) | `\x10` int32 |
-| [Int16UInt16](/docs/en/sql-reference/data-types/int-uint.md) | `\x10` int32 |
+| [Int8/UInt8](/docs/en/sql-reference/data-types/int-uint.md)/[Enum8](/docs/en/sql-reference/data-types/enum.md) | `\x10` int32 |
+| [Int16/UInt16](/docs/en/sql-reference/data-types/int-uint.md)/[Enum16](/docs/en/sql-reference/data-types/enum.md) | `\x10` int32 |
| [Int32](/docs/en/sql-reference/data-types/int-uint.md) | `\x10` int32 |
| [UInt32](/docs/en/sql-reference/data-types/int-uint.md) | `\x12` int64 |
| [Int64/UInt64](/docs/en/sql-reference/data-types/int-uint.md) | `\x12` int64 |
@@ -1255,30 +1255,30 @@ For output it uses the following correspondence between ClickHouse types and BSO
| [Array](/docs/en/sql-reference/data-types/array.md) | `\x04` array |
| [Tuple](/docs/en/sql-reference/data-types/tuple.md) | `\x04` array |
| [Named Tuple](/docs/en/sql-reference/data-types/tuple.md) | `\x03` document |
-| [Map](/docs/en/sql-reference/data-types/map.md) (with String keys) | `\x03` document |
+| [Map](/docs/en/sql-reference/data-types/map.md) | `\x03` document |
| [IPv4](/docs/en/sql-reference/data-types/domains/ipv4.md) | `\x10` int32 |
| [IPv6](/docs/en/sql-reference/data-types/domains/ipv6.md) | `\x05` binary, `\x00` binary subtype |
For input it uses the following correspondence between BSON types and ClickHouse types:
-| BSON Type | ClickHouse Type |
-|------------------------------------------|---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
-| `\x01` double | [Float32/Float64](/docs/en/sql-reference/data-types/float.md) |
-| `\x02` string | [String](/docs/en/sql-reference/data-types/string.md)/[FixedString](/docs/en/sql-reference/data-types/fixedstring.md) |
-| `\x03` document | [Map](/docs/en/sql-reference/data-types/map.md)/[Named Tuple](/docs/en/sql-reference/data-types/tuple.md) |
-| `\x04` array | [Array](/docs/en/sql-reference/data-types/array.md)/[Tuple](/docs/en/sql-reference/data-types/tuple.md) |
-| `\x05` binary, `\x00` binary subtype | [String](/docs/en/sql-reference/data-types/string.md)/[FixedString](/docs/en/sql-reference/data-types/fixedstring.md)/[IPv6](/docs/en/sql-reference/data-types/domains/ipv6.md) |
-| `\x05` binary, `\x02` old binary subtype | [String](/docs/en/sql-reference/data-types/string.md)/[FixedString](/docs/en/sql-reference/data-types/fixedstring.md) |
-| `\x05` binary, `\x03` old uuid subtype | [UUID](/docs/en/sql-reference/data-types/uuid.md) |
-| `\x05` binary, `\x04` uuid subtype | [UUID](/docs/en/sql-reference/data-types/uuid.md) |
-| `\x07` ObjectId | [String](/docs/en/sql-reference/data-types/string.md)/[FixedString](/docs/en/sql-reference/data-types/fixedstring.md) |
-| `\x08` boolean | [Bool](/docs/en/sql-reference/data-types/boolean.md) |
-| `\x09` datetime | [DateTime64](/docs/en/sql-reference/data-types/datetime64.md) |
-| `\x0A` null value | [NULL](/docs/en/sql-reference/data-types/nullable.md) |
-| `\x0D` JavaScript code | [String](/docs/en/sql-reference/data-types/string.md)/[FixedString](/docs/en/sql-reference/data-types/fixedstring.md) |
-| `\x0E` symbol | [String](/docs/en/sql-reference/data-types/string.md)/[FixedString](/docs/en/sql-reference/data-types/fixedstring.md) |
-| `\x10` int32 | [Int32/UInt32](/docs/en/sql-reference/data-types/int-uint.md)/[Decimal32](/docs/en/sql-reference/data-types/decimal.md)/[IPv4](/docs/en/sql-reference/data-types/domains/ipv4.md) |
-| `\x12` int64 | [Int64/UInt64](/docs/en/sql-reference/data-types/int-uint.md)/[Decimal64](/docs/en/sql-reference/data-types/decimal.md)/[DateTime64](/docs/en/sql-reference/data-types/datetime64.md) |
+| BSON Type | ClickHouse Type |
+|------------------------------------------|---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
+| `\x01` double | [Float32/Float64](/docs/en/sql-reference/data-types/float.md) |
+| `\x02` string | [String](/docs/en/sql-reference/data-types/string.md)/[FixedString](/docs/en/sql-reference/data-types/fixedstring.md) |
+| `\x03` document | [Map](/docs/en/sql-reference/data-types/map.md)/[Named Tuple](/docs/en/sql-reference/data-types/tuple.md) |
+| `\x04` array | [Array](/docs/en/sql-reference/data-types/array.md)/[Tuple](/docs/en/sql-reference/data-types/tuple.md) |
+| `\x05` binary, `\x00` binary subtype | [String](/docs/en/sql-reference/data-types/string.md)/[FixedString](/docs/en/sql-reference/data-types/fixedstring.md)/[IPv6](/docs/en/sql-reference/data-types/domains/ipv6.md) |
+| `\x05` binary, `\x02` old binary subtype | [String](/docs/en/sql-reference/data-types/string.md)/[FixedString](/docs/en/sql-reference/data-types/fixedstring.md) |
+| `\x05` binary, `\x03` old uuid subtype | [UUID](/docs/en/sql-reference/data-types/uuid.md) |
+| `\x05` binary, `\x04` uuid subtype | [UUID](/docs/en/sql-reference/data-types/uuid.md) |
+| `\x07` ObjectId | [String](/docs/en/sql-reference/data-types/string.md)/[FixedString](/docs/en/sql-reference/data-types/fixedstring.md) |
+| `\x08` boolean | [Bool](/docs/en/sql-reference/data-types/boolean.md) |
+| `\x09` datetime | [DateTime64](/docs/en/sql-reference/data-types/datetime64.md) |
+| `\x0A` null value | [NULL](/docs/en/sql-reference/data-types/nullable.md) |
+| `\x0D` JavaScript code | [String](/docs/en/sql-reference/data-types/string.md)/[FixedString](/docs/en/sql-reference/data-types/fixedstring.md) |
+| `\x0E` symbol | [String](/docs/en/sql-reference/data-types/string.md)/[FixedString](/docs/en/sql-reference/data-types/fixedstring.md) |
+| `\x10` int32 | [Int32/UInt32](/docs/en/sql-reference/data-types/int-uint.md)/[Decimal32](/docs/en/sql-reference/data-types/decimal.md)/[IPv4](/docs/en/sql-reference/data-types/domains/ipv4.md)/[Enum8/Enum16](/docs/en/sql-reference/data-types/enum.md) |
+| `\x12` int64 | [Int64/UInt64](/docs/en/sql-reference/data-types/int-uint.md)/[Decimal64](/docs/en/sql-reference/data-types/decimal.md)/[DateTime64](/docs/en/sql-reference/data-types/datetime64.md) |
Other BSON types are not supported. Also, it performs conversion between different integer types (for example, you can insert BSON int32 value into ClickHouse UInt8).
Big integers and decimals (Int128/UInt128/Int256/UInt256/Decimal128/Decimal256) can be parsed from BSON Binary value with `\x00` binary subtype. In this case this format will validate that the size of binary data equals the size of expected value.
@@ -1877,6 +1877,13 @@ Column names must:
Output Avro file compression and sync interval can be configured with [output_format_avro_codec](/docs/en/operations/settings/settings-formats.md/#output_format_avro_codec) and [output_format_avro_sync_interval](/docs/en/operations/settings/settings-formats.md/#output_format_avro_sync_interval) respectively.
+### Example Data {#example-data-avro}
+
+Using the ClickHouse [DESCRIBE](/docs/en/sql-reference/statements/describe-table) function, you can quickly view the inferred format of an Avro file like the following example. This example includes the URL of a publicly accessible Avro file in the ClickHouse S3 public bucket:
+
+``` DESCRIBE url('https://clickhouse-public-datasets.s3.eu-central-1.amazonaws.com/hits.avro','Avro');
+```
+
## AvroConfluent {#data-format-avro-confluent}
AvroConfluent supports decoding single-object Avro messages commonly used with [Kafka](https://kafka.apache.org/) and [Confluent Schema Registry](https://docs.confluent.io/current/schema-registry/index.html).
@@ -1936,30 +1943,31 @@ Setting `format_avro_schema_registry_url` needs to be configured in `users.xml`
The table below shows supported data types and how they match ClickHouse [data types](/docs/en/sql-reference/data-types/index.md) in `INSERT` and `SELECT` queries.
-| Parquet data type (`INSERT`) | ClickHouse data type | Parquet data type (`SELECT`) |
-|----------------------------------------------------|-----------------------------------------------------------------|------------------------------|
-| `BOOL` | [Bool](/docs/en/sql-reference/data-types/boolean.md) | `BOOL` |
-| `UINT8`, `BOOL` | [UInt8](/docs/en/sql-reference/data-types/int-uint.md) | `UINT8` |
-| `INT8` | [Int8](/docs/en/sql-reference/data-types/int-uint.md) | `INT8` |
-| `UINT16` | [UInt16](/docs/en/sql-reference/data-types/int-uint.md) | `UINT16` |
-| `INT16` | [Int16](/docs/en/sql-reference/data-types/int-uint.md) | `INT16` |
-| `UINT32` | [UInt32](/docs/en/sql-reference/data-types/int-uint.md) | `UINT32` |
-| `INT32` | [Int32](/docs/en/sql-reference/data-types/int-uint.md) | `INT32` |
-| `UINT64` | [UInt64](/docs/en/sql-reference/data-types/int-uint.md) | `UINT64` |
-| `INT64` | [Int64](/docs/en/sql-reference/data-types/int-uint.md) | `INT64` |
-| `FLOAT` | [Float32](/docs/en/sql-reference/data-types/float.md) | `FLOAT` |
-| `DOUBLE` | [Float64](/docs/en/sql-reference/data-types/float.md) | `DOUBLE` |
-| `DATE` | [Date32](/docs/en/sql-reference/data-types/date.md) | `DATE` |
-| `TIME (ms)` | [DateTime](/docs/en/sql-reference/data-types/datetime.md) | `UINT32` |
-| `TIMESTAMP`, `TIME (us, ns)` | [DateTime64](/docs/en/sql-reference/data-types/datetime64.md) | `TIMESTAMP` |
-| `STRING`, `BINARY` | [String](/docs/en/sql-reference/data-types/string.md) | `BINARY` |
-| `STRING`, `BINARY`, `FIXED_LENGTH_BYTE_ARRAY` | [FixedString](/docs/en/sql-reference/data-types/fixedstring.md) | `FIXED_LENGTH_BYTE_ARRAY` |
-| `DECIMAL` | [Decimal](/docs/en/sql-reference/data-types/decimal.md) | `DECIMAL` |
-| `LIST` | [Array](/docs/en/sql-reference/data-types/array.md) | `LIST` |
-| `STRUCT` | [Tuple](/docs/en/sql-reference/data-types/tuple.md) | `STRUCT` |
-| `MAP` | [Map](/docs/en/sql-reference/data-types/map.md) | `MAP` |
-| `UINT32` | [IPv4](/docs/en/sql-reference/data-types/domains/ipv4.md) | `UINT32` |
-| `FIXED_LENGTH_BYTE_ARRAY` | [IPv6](/docs/en/sql-reference/data-types/domains/ipv6.md) | `FIXED_LENGTH_BYTE_ARRAY` |
+| Parquet data type (`INSERT`) | ClickHouse data type | Parquet data type (`SELECT`) |
+|-----------------------------------------------|------------------------------------------------------------------------------------------------------------|-------------------------------|
+| `BOOL` | [Bool](/docs/en/sql-reference/data-types/boolean.md) | `BOOL` |
+| `UINT8`, `BOOL` | [UInt8](/docs/en/sql-reference/data-types/int-uint.md) | `UINT8` |
+| `INT8` | [Int8](/docs/en/sql-reference/data-types/int-uint.md)/[Enum8](/docs/en/sql-reference/data-types/enum.md) | `INT8` |
+| `UINT16` | [UInt16](/docs/en/sql-reference/data-types/int-uint.md) | `UINT16` |
+| `INT16` | [Int16](/docs/en/sql-reference/data-types/int-uint.md)/[Enum16](/docs/en/sql-reference/data-types/enum.md) | `INT16` |
+| `UINT32` | [UInt32](/docs/en/sql-reference/data-types/int-uint.md) | `UINT32` |
+| `INT32` | [Int32](/docs/en/sql-reference/data-types/int-uint.md) | `INT32` |
+| `UINT64` | [UInt64](/docs/en/sql-reference/data-types/int-uint.md) | `UINT64` |
+| `INT64` | [Int64](/docs/en/sql-reference/data-types/int-uint.md) | `INT64` |
+| `FLOAT` | [Float32](/docs/en/sql-reference/data-types/float.md) | `FLOAT` |
+| `DOUBLE` | [Float64](/docs/en/sql-reference/data-types/float.md) | `DOUBLE` |
+| `DATE` | [Date32](/docs/en/sql-reference/data-types/date.md) | `DATE` |
+| `TIME (ms)` | [DateTime](/docs/en/sql-reference/data-types/datetime.md) | `UINT32` |
+| `TIMESTAMP`, `TIME (us, ns)` | [DateTime64](/docs/en/sql-reference/data-types/datetime64.md) | `TIMESTAMP` |
+| `STRING`, `BINARY` | [String](/docs/en/sql-reference/data-types/string.md) | `BINARY` |
+| `STRING`, `BINARY`, `FIXED_LENGTH_BYTE_ARRAY` | [FixedString](/docs/en/sql-reference/data-types/fixedstring.md) | `FIXED_LENGTH_BYTE_ARRAY` |
+| `DECIMAL` | [Decimal](/docs/en/sql-reference/data-types/decimal.md) | `DECIMAL` |
+| `LIST` | [Array](/docs/en/sql-reference/data-types/array.md) | `LIST` |
+| `STRUCT` | [Tuple](/docs/en/sql-reference/data-types/tuple.md) | `STRUCT` |
+| `MAP` | [Map](/docs/en/sql-reference/data-types/map.md) | `MAP` |
+| `UINT32` | [IPv4](/docs/en/sql-reference/data-types/domains/ipv4.md) | `UINT32` |
+| `FIXED_LENGTH_BYTE_ARRAY`, `BINARY` | [IPv6](/docs/en/sql-reference/data-types/domains/ipv6.md) | `FIXED_LENGTH_BYTE_ARRAY` |
+| `FIXED_LENGTH_BYTE_ARRAY`, `BINARY` | [Int128/UInt128/Int256/UInt256](/docs/en/sql-reference/data-types/int-uint.md) | `FIXED_LENGTH_BYTE_ARRAY` |
Arrays can be nested and can have a value of the `Nullable` type as an argument. `Tuple` and `Map` types also can be nested.
@@ -2005,31 +2013,32 @@ To exchange data with Hadoop, you can use [HDFS table engine](/docs/en/engines/t
The table below shows supported data types and how they match ClickHouse [data types](/docs/en/sql-reference/data-types/index.md) in `INSERT` and `SELECT` queries.
-| Arrow data type (`INSERT`) | ClickHouse data type | Arrow data type (`SELECT`) |
-|-----------------------------------------|-----------------------------------------------------------------|----------------------------|
-| `BOOL` | [Bool](/docs/en/sql-reference/data-types/boolean.md) | `BOOL` |
-| `UINT8`, `BOOL` | [UInt8](/docs/en/sql-reference/data-types/int-uint.md) | `UINT8` |
-| `INT8` | [Int8](/docs/en/sql-reference/data-types/int-uint.md) | `INT8` |
-| `UINT16` | [UInt16](/docs/en/sql-reference/data-types/int-uint.md) | `UINT16` |
-| `INT16` | [Int16](/docs/en/sql-reference/data-types/int-uint.md) | `INT16` |
-| `UINT32` | [UInt32](/docs/en/sql-reference/data-types/int-uint.md) | `UINT32` |
-| `INT32` | [Int32](/docs/en/sql-reference/data-types/int-uint.md) | `INT32` |
-| `UINT64` | [UInt64](/docs/en/sql-reference/data-types/int-uint.md) | `UINT64` |
-| `INT64` | [Int64](/docs/en/sql-reference/data-types/int-uint.md) | `INT64` |
-| `FLOAT`, `HALF_FLOAT` | [Float32](/docs/en/sql-reference/data-types/float.md) | `FLOAT32` |
-| `DOUBLE` | [Float64](/docs/en/sql-reference/data-types/float.md) | `FLOAT64` |
-| `DATE32` | [Date32](/docs/en/sql-reference/data-types/date32.md) | `UINT16` |
-| `DATE64` | [DateTime](/docs/en/sql-reference/data-types/datetime.md) | `UINT32` |
-| `TIMESTAMP`, `TIME32`, `TIME64` | [DateTime64](/docs/en/sql-reference/data-types/datetime64.md) | `UINT32` |
-| `STRING`, `BINARY` | [String](/docs/en/sql-reference/data-types/string.md) | `BINARY` |
-| `STRING`, `BINARY`, `FIXED_SIZE_BINARY` | [FixedString](/docs/en/sql-reference/data-types/fixedstring.md) | `FIXED_SIZE_BINARY` |
-| `DECIMAL` | [Decimal](/docs/en/sql-reference/data-types/decimal.md) | `DECIMAL` |
-| `DECIMAL256` | [Decimal256](/docs/en/sql-reference/data-types/decimal.md) | `DECIMAL256` |
-| `LIST` | [Array](/docs/en/sql-reference/data-types/array.md) | `LIST` |
-| `STRUCT` | [Tuple](/docs/en/sql-reference/data-types/tuple.md) | `STRUCT` |
-| `MAP` | [Map](/docs/en/sql-reference/data-types/map.md) | `MAP` |
-| `UINT32` | [IPv4](/docs/en/sql-reference/data-types/domains/ipv4.md) | `UINT32` |
-| `FIXED_SIZE_BINARY`, `BINARY` | [IPv6](/docs/en/sql-reference/data-types/domains/ipv6.md) | `FIXED_SIZE_BINARY` |
+| Arrow data type (`INSERT`) | ClickHouse data type | Arrow data type (`SELECT`) |
+|-----------------------------------------|------------------------------------------------------------------------------------------------------------|----------------------------|
+| `BOOL` | [Bool](/docs/en/sql-reference/data-types/boolean.md) | `BOOL` |
+| `UINT8`, `BOOL` | [UInt8](/docs/en/sql-reference/data-types/int-uint.md) | `UINT8` |
+| `INT8` | [Int8](/docs/en/sql-reference/data-types/int-uint.md)/[Enum8](/docs/en/sql-reference/data-types/enum.md) | `INT8` |
+| `UINT16` | [UInt16](/docs/en/sql-reference/data-types/int-uint.md) | `UINT16` |
+| `INT16` | [Int16](/docs/en/sql-reference/data-types/int-uint.md)/[Enum16](/docs/en/sql-reference/data-types/enum.md) | `INT16` |
+| `UINT32` | [UInt32](/docs/en/sql-reference/data-types/int-uint.md) | `UINT32` |
+| `INT32` | [Int32](/docs/en/sql-reference/data-types/int-uint.md) | `INT32` |
+| `UINT64` | [UInt64](/docs/en/sql-reference/data-types/int-uint.md) | `UINT64` |
+| `INT64` | [Int64](/docs/en/sql-reference/data-types/int-uint.md) | `INT64` |
+| `FLOAT`, `HALF_FLOAT` | [Float32](/docs/en/sql-reference/data-types/float.md) | `FLOAT32` |
+| `DOUBLE` | [Float64](/docs/en/sql-reference/data-types/float.md) | `FLOAT64` |
+| `DATE32` | [Date32](/docs/en/sql-reference/data-types/date32.md) | `UINT16` |
+| `DATE64` | [DateTime](/docs/en/sql-reference/data-types/datetime.md) | `UINT32` |
+| `TIMESTAMP`, `TIME32`, `TIME64` | [DateTime64](/docs/en/sql-reference/data-types/datetime64.md) | `UINT32` |
+| `STRING`, `BINARY` | [String](/docs/en/sql-reference/data-types/string.md) | `BINARY` |
+| `STRING`, `BINARY`, `FIXED_SIZE_BINARY` | [FixedString](/docs/en/sql-reference/data-types/fixedstring.md) | `FIXED_SIZE_BINARY` |
+| `DECIMAL` | [Decimal](/docs/en/sql-reference/data-types/decimal.md) | `DECIMAL` |
+| `DECIMAL256` | [Decimal256](/docs/en/sql-reference/data-types/decimal.md) | `DECIMAL256` |
+| `LIST` | [Array](/docs/en/sql-reference/data-types/array.md) | `LIST` |
+| `STRUCT` | [Tuple](/docs/en/sql-reference/data-types/tuple.md) | `STRUCT` |
+| `MAP` | [Map](/docs/en/sql-reference/data-types/map.md) | `MAP` |
+| `UINT32` | [IPv4](/docs/en/sql-reference/data-types/domains/ipv4.md) | `UINT32` |
+| `FIXED_SIZE_BINARY`, `BINARY` | [IPv6](/docs/en/sql-reference/data-types/domains/ipv6.md) | `FIXED_SIZE_BINARY` |
+| `FIXED_SIZE_BINARY`, `BINARY` | [Int128/UInt128/Int256/UInt256](/docs/en/sql-reference/data-types/int-uint.md) | `FIXED_SIZE_BINARY` |
Arrays can be nested and can have a value of the `Nullable` type as an argument. `Tuple` and `Map` types also can be nested.
@@ -2078,23 +2087,26 @@ $ clickhouse-client --query="SELECT * FROM {some_table} FORMAT Arrow" > {filenam
The table below shows supported data types and how they match ClickHouse [data types](/docs/en/sql-reference/data-types/index.md) in `INSERT` and `SELECT` queries.
-| ORC data type (`INSERT`) | ClickHouse data type | ORC data type (`SELECT`) |
-|---------------------------------------|---------------------------------------------------------------|--------------------------|
-| `Boolean` | [UInt8](/docs/en/sql-reference/data-types/int-uint.md) | `Boolean` |
-| `Tinyint` | [Int8](/docs/en/sql-reference/data-types/int-uint.md) | `Tinyint` |
-| `Smallint` | [Int16](/docs/en/sql-reference/data-types/int-uint.md) | `Smallint` |
-| `Int` | [Int32](/docs/en/sql-reference/data-types/int-uint.md) | `Int` |
-| `Bigint` | [Int64](/docs/en/sql-reference/data-types/int-uint.md) | `Bigint` |
-| `Float` | [Float32](/docs/en/sql-reference/data-types/float.md) | `Float` |
-| `Double` | [Float64](/docs/en/sql-reference/data-types/float.md) | `Double` |
-| `Decimal` | [Decimal](/docs/en/sql-reference/data-types/decimal.md) | `Decimal` |
-| `Date` | [Date32](/docs/en/sql-reference/data-types/date32.md) | `Date` |
-| `Timestamp` | [DateTime64](/docs/en/sql-reference/data-types/datetime64.md) | `Timestamp` |
-| `String`, `Char`, `Varchar`, `Binary` | [String](/docs/en/sql-reference/data-types/string.md) | `Binary` |
-| `List` | [Array](/docs/en/sql-reference/data-types/array.md) | `List` |
-| `Struct` | [Tuple](/docs/en/sql-reference/data-types/tuple.md) | `Struct` |
-| `Map` | [Map](/docs/en/sql-reference/data-types/map.md) | `Map` |
-| `-` | [IPv4](/docs/en/sql-reference/data-types/int-uint.md) | `Int` |
+| ORC data type (`INSERT`) | ClickHouse data type | ORC data type (`SELECT`) |
+|---------------------------------------|-------------------------------------------------------------------------------------------------------------------|--------------------------|
+| `Boolean` | [UInt8](/docs/en/sql-reference/data-types/int-uint.md) | `Boolean` |
+| `Tinyint` | [Int8/UInt8](/docs/en/sql-reference/data-types/int-uint.md)/[Enum8](/docs/en/sql-reference/data-types/enum.md) | `Tinyint` |
+| `Smallint` | [Int16/UInt16](/docs/en/sql-reference/data-types/int-uint.md)/[Enum16](/docs/en/sql-reference/data-types/enum.md) | `Smallint` |
+| `Int` | [Int32/UInt32](/docs/en/sql-reference/data-types/int-uint.md) | `Int` |
+| `Bigint` | [Int64/UInt32](/docs/en/sql-reference/data-types/int-uint.md) | `Bigint` |
+| `Float` | [Float32](/docs/en/sql-reference/data-types/float.md) | `Float` |
+| `Double` | [Float64](/docs/en/sql-reference/data-types/float.md) | `Double` |
+| `Decimal` | [Decimal](/docs/en/sql-reference/data-types/decimal.md) | `Decimal` |
+| `Date` | [Date32](/docs/en/sql-reference/data-types/date32.md) | `Date` |
+| `Timestamp` | [DateTime64](/docs/en/sql-reference/data-types/datetime64.md) | `Timestamp` |
+| `String`, `Char`, `Varchar`, `Binary` | [String](/docs/en/sql-reference/data-types/string.md) | `Binary` |
+| `List` | [Array](/docs/en/sql-reference/data-types/array.md) | `List` |
+| `Struct` | [Tuple](/docs/en/sql-reference/data-types/tuple.md) | `Struct` |
+| `Map` | [Map](/docs/en/sql-reference/data-types/map.md) | `Map` |
+| `Int` | [IPv4](/docs/en/sql-reference/data-types/int-uint.md) | `Int` |
+| `Binary` | [IPv6](/docs/en/sql-reference/data-types/domains/ipv6.md) | `Binary` |
+| `Binary` | [Int128/UInt128/Int256/UInt256](/docs/en/sql-reference/data-types/int-uint.md) | `Binary` |
+| `Binary` | [Decimal256](/docs/en/sql-reference/data-types/decimal.md) | `Binary` |
Other types are not supported.
diff --git a/docs/en/operations/system-tables/replicas.md b/docs/en/operations/system-tables/replicas.md
index e711d9a7784..15426eefbcc 100644
--- a/docs/en/operations/system-tables/replicas.md
+++ b/docs/en/operations/system-tables/replicas.md
@@ -50,6 +50,7 @@ last_queue_update: 2021-10-12 14:50:08
absolute_delay: 99
total_replicas: 5
active_replicas: 5
+lost_part_count: 0
last_queue_update_exception:
zookeeper_exception:
replica_is_active: {'r1':1,'r2':1}
@@ -90,6 +91,7 @@ The next 4 columns have a non-zero value only where there is an active session w
- `absolute_delay` (`UInt64`) - How big lag in seconds the current replica has.
- `total_replicas` (`UInt8`) - The total number of known replicas of this table.
- `active_replicas` (`UInt8`) - The number of replicas of this table that have a session in ClickHouse Keeper (i.e., the number of functioning replicas).
+- `lost_part_count` (`UInt64`) - The number of data parts lost in the table by all replicas in total since table creation. Value is persisted in ClickHouse Keeper and can only increase.
- `last_queue_update_exception` (`String`) - When the queue contains broken entries. Especially important when ClickHouse breaks backward compatibility between versions and log entries written by newer versions aren't parseable by old versions.
- `zookeeper_exception` (`String`) - The last exception message, got if the error happened when fetching the info from ClickHouse Keeper.
- `replica_is_active` ([Map(String, UInt8)](../../sql-reference/data-types/map.md)) — Map between replica name and is replica active.
diff --git a/docs/en/operations/system-tables/storage_policies.md b/docs/en/operations/system-tables/storage_policies.md
index 966b677c7e3..69e0f7f0a55 100644
--- a/docs/en/operations/system-tables/storage_policies.md
+++ b/docs/en/operations/system-tables/storage_policies.md
@@ -11,8 +11,16 @@ Columns:
- `volume_name` ([String](../../sql-reference/data-types/string.md)) — Volume name defined in the storage policy.
- `volume_priority` ([UInt64](../../sql-reference/data-types/int-uint.md)) — Volume order number in the configuration, the data fills the volumes according this priority, i.e. data during inserts and merges is written to volumes with a lower priority (taking into account other rules: TTL, `max_data_part_size`, `move_factor`).
- `disks` ([Array(String)](../../sql-reference/data-types/array.md)) — Disk names, defined in the storage policy.
+- `volume_type` ([Enum8](../../sql-reference/data-types/enum.md)) — Type of volume. Can have one of the following values:
+ - `JBOD`
+ - `SINGLE_DISK`
+ - `UNKNOWN`
- `max_data_part_size` ([UInt64](../../sql-reference/data-types/int-uint.md)) — Maximum size of a data part that can be stored on volume disks (0 — no limit).
- `move_factor` ([Float64](../../sql-reference/data-types/float.md)) — Ratio of free disk space. When the ratio exceeds the value of configuration parameter, ClickHouse start to move data to the next volume in order.
- `prefer_not_to_merge` ([UInt8](../../sql-reference/data-types/int-uint.md)) — Value of the `prefer_not_to_merge` setting. When this setting is enabled, merging data on this volume is not allowed. This allows controlling how ClickHouse works with slow disks.
+- `perform_ttl_move_on_insert` ([UInt8](../../sql-reference/data-types/int-uint.md)) — Value of the `perform_ttl_move_on_insert` setting. — Disables TTL move on data part INSERT. By default if we insert a data part that already expired by the TTL move rule it immediately goes to a volume/disk declared in move rule. This can significantly slowdown insert in case if destination volume/disk is slow (e.g. S3).
+- `load_balancing` ([Enum8](../../sql-reference/data-types/enum.md)) — Policy for disk balancing. Can have one of the following values:
+ - `ROUND_ROBIN`
+ - `LEAST_USED`
If the storage policy contains more then one volume, then information for each volume is stored in the individual row of the table.
diff --git a/docs/en/operations/utilities/clickhouse-local.md b/docs/en/operations/utilities/clickhouse-local.md
index a23e0745dec..6363d9cab27 100644
--- a/docs/en/operations/utilities/clickhouse-local.md
+++ b/docs/en/operations/utilities/clickhouse-local.md
@@ -6,7 +6,13 @@ sidebar_label: clickhouse-local
# clickhouse-local
-The `clickhouse-local` program enables you to perform fast processing on local files, without having to deploy and configure the ClickHouse server. It accepts data that represent tables and queries them using [ClickHouse SQL dialect](../../sql-reference/index.md). `clickhouse-local` uses the same core as ClickHouse server, so it supports most of the features and the same set of formats and table engines.
+## When to use clickhouse-local vs. ClickHouse
+
+`clickhouse-local` is an easy-to-use version of ClickHouse that is ideal for developers who need to perform fast processing on local and remote files using SQL without having to install a full database server. With `clickhouse-local`, developers can use SQL commands (using the [ClickHouse SQL dialect](../../sql-reference/index.md)) directly from the command line, providing a simple and efficient way to access ClickHouse features without the need for a full ClickHouse installation. One of the main benefits of `clickhouse-local` is that it is already included when installing [clickhouse-client](https://clickhouse.com/docs/en/integrations/sql-clients/clickhouse-client-local). This means that developers can get started with `clickhouse-local` quickly, without the need for a complex installation process.
+
+While `clickhouse-local` is a great tool for development and testing purposes, and for processing files, it is not suitable for serving end users or applications. In these scenarios, it is recommended to use the open-source [ClickHouse](https://clickhouse.com/docs/en/install). ClickHouse is a powerful OLAP database that is designed to handle large-scale analytical workloads. It provides fast and efficient processing of complex queries on large datasets, making it ideal for use in production environments where high-performance is critical. Additionally, ClickHouse offers a wide range of features such as replication, sharding, and high availability, which are essential for scaling up to handle large datasets and serving applications. If you need to handle larger datasets or serve end users or applications, we recommend using open-source ClickHouse instead of `clickhouse-local`.
+
+Please read the docs below that show example use cases for `clickhouse-local`, such as [querying local CSVs](#query-data-in-a-csv-file-using-sql) or [reading a parquet file in S3](#query-data-in-a-parquet-file-in-aws-s3).
## Download clickhouse-local
diff --git a/docs/en/sql-reference/aggregate-functions/reference/deltasumtimestamp.md b/docs/en/sql-reference/aggregate-functions/reference/deltasumtimestamp.md
index e08e69b7cf6..afcf2a48c23 100644
--- a/docs/en/sql-reference/aggregate-functions/reference/deltasumtimestamp.md
+++ b/docs/en/sql-reference/aggregate-functions/reference/deltasumtimestamp.md
@@ -6,7 +6,7 @@ title: deltaSumTimestamp
Adds the difference between consecutive rows. If the difference is negative, it is ignored.
-This function is primarily for [materialized views](../../../sql-reference/statements/create/view.md#materialized) that are ordered by some time bucket-aligned timestamp, for example, a `toStartOfMinute` bucket. Because the rows in such a materialized view will all have the same timestamp, it is impossible for them to be merged in the "right" order. This function keeps track of the `timestamp` of the values it's seen, so it's possible to order the states correctly during merging.
+This function is primarily for [materialized views](../../../sql-reference/statements/create/view.md#materialized) that store data ordered by some time bucket-aligned timestamp, for example, a `toStartOfMinute` bucket. Because the rows in such a materialized view will all have the same timestamp, it is impossible for them to be merged in the correct order, without storing the original, unrounded timestamp value. The `deltaSumTimestamp` function keeps track of the original `timestamp` of the values it's seen, so the values (states) of the function are correctly computed during merging of parts.
To calculate the delta sum across an ordered collection you can simply use the [deltaSum](../../../sql-reference/aggregate-functions/reference/deltasum.md#agg_functions-deltasum) function.
diff --git a/docs/en/sql-reference/functions/date-time-functions.md b/docs/en/sql-reference/functions/date-time-functions.md
index 71b7fa07f18..903654c2f0a 100644
--- a/docs/en/sql-reference/functions/date-time-functions.md
+++ b/docs/en/sql-reference/functions/date-time-functions.md
@@ -1264,7 +1264,7 @@ Using replacement fields, you can define a pattern for the resulting string. “
| %d | day of the month, zero-padded (01-31) | 02 |
| %D | Short MM/DD/YY date, equivalent to %m/%d/%y | 01/02/18 |
| %e | day of the month, space-padded (1-31) | 2 |
-| %f | fractional second from the fractional part of DateTime64 | 1234560 |
+| %f | fractional second, see 'Note 1' below | 1234560 |
| %F | short YYYY-MM-DD date, equivalent to %Y-%m-%d | 2018-01-02 |
| %g | two-digit year format, aligned to ISO 8601, abbreviated from four-digit notation | 18 |
| %G | four-digit year format for ISO week number, calculated from the week-based year [defined by the ISO 8601](https://en.wikipedia.org/wiki/ISO_8601#Week_dates) standard, normally useful only with %V | 2018 |
@@ -1276,16 +1276,16 @@ Using replacement fields, you can define a pattern for the resulting string. “
| %k | hour in 24h format (00-23) | 22 |
| %l | hour in 12h format (01-12) | 09 |
| %m | month as an integer number (01-12) | 01 |
-| %M | minute (00-59) | 33 |
+| %M | full month name (January-December), see 'Note 2' below | January |
| %n | new-line character (‘’) | |
| %p | AM or PM designation | PM |
| %Q | Quarter (1-4) | 1 |
-| %r | 12-hour HH:MM AM/PM time, equivalent to %H:%M %p | 10:30 PM |
-| %R | 24-hour HH:MM time, equivalent to %H:%M | 22:33 |
+| %r | 12-hour HH:MM AM/PM time, equivalent to %H:%i %p | 10:30 PM |
+| %R | 24-hour HH:MM time, equivalent to %H:%i | 22:33 |
| %s | second (00-59) | 44 |
| %S | second (00-59) | 44 |
| %t | horizontal-tab character (’) | |
-| %T | ISO 8601 time format (HH:MM:SS), equivalent to %H:%M:%S | 22:33:44 |
+| %T | ISO 8601 time format (HH:MM:SS), equivalent to %H:%i:%S | 22:33:44 |
| %u | ISO 8601 weekday as number with Monday as 1 (1-7) | 2 |
| %V | ISO 8601 week number (01-53) | 01 |
| %w | weekday as a integer number with Sunday as 0 (0-6) | 2 |
@@ -1295,6 +1295,10 @@ Using replacement fields, you can define a pattern for the resulting string. “
| %z | Time offset from UTC as +HHMM or -HHMM | -0500 |
| %% | a % sign | % |
+Note 1: In ClickHouse versions earlier than v23.4, `%f` prints a single zero (0) if the formatted value is a Date, Date32 or DateTime (which have no fractional seconds) or a DateTime64 with a precision of 0. The previous behavior can be restored using setting `formatdatetime_f_prints_single_zero = 1`.
+
+Note 2: In ClickHouse versions earlier than v23.4, `%M` prints the minute (00-59) instead of the full month name (January-December). The previous behavior can be restored using setting `formatdatetime_parsedatetime_m_is_month_name = 0`.
+
**Example**
Query:
diff --git a/docs/en/sql-reference/functions/hash-functions.md b/docs/en/sql-reference/functions/hash-functions.md
index 2943ba13861..5fcf6a2d1df 100644
--- a/docs/en/sql-reference/functions/hash-functions.md
+++ b/docs/en/sql-reference/functions/hash-functions.md
@@ -441,11 +441,11 @@ SELECT farmHash64(array('e','x','a'), 'mple', 10, toDateTime('2019-06-15 23:00:0
## javaHash
-Calculates JavaHash from a [string](http://hg.openjdk.java.net/jdk8u/jdk8u/jdk/file/478a4add975b/src/share/classes/java/lang/String.java#l1452),
-[Byte](https://hg.openjdk.java.net/jdk8u/jdk8u/jdk/file/478a4add975b/src/share/classes/java/lang/Byte.java#l405),
-[Short](https://hg.openjdk.java.net/jdk8u/jdk8u/jdk/file/478a4add975b/src/share/classes/java/lang/Short.java#l410),
-[Integer](https://hg.openjdk.java.net/jdk8u/jdk8u/jdk/file/478a4add975b/src/share/classes/java/lang/Integer.java#l959),
-[Long](https://hg.openjdk.java.net/jdk8u/jdk8u/jdk/file/478a4add975b/src/share/classes/java/lang/Long.java#l1060).
+Calculates JavaHash from a [string](http://hg.openjdk.java.net/jdk8u/jdk8u/jdk/file/478a4add975b/src/share/classes/java/lang/String.java#l1452),
+[Byte](https://hg.openjdk.java.net/jdk8u/jdk8u/jdk/file/478a4add975b/src/share/classes/java/lang/Byte.java#l405),
+[Short](https://hg.openjdk.java.net/jdk8u/jdk8u/jdk/file/478a4add975b/src/share/classes/java/lang/Short.java#l410),
+[Integer](https://hg.openjdk.java.net/jdk8u/jdk8u/jdk/file/478a4add975b/src/share/classes/java/lang/Integer.java#l959),
+[Long](https://hg.openjdk.java.net/jdk8u/jdk8u/jdk/file/478a4add975b/src/share/classes/java/lang/Long.java#l1060).
This hash function is neither fast nor having a good quality. The only reason to use it is when this algorithm is already used in another system and you have to calculate exactly the same result.
Note that Java only support calculating signed integers hash, so if you want to calculate unsigned integers hash you must cast it to proper signed ClickHouse types.
@@ -660,6 +660,45 @@ Result:
└──────────────────────┴─────────────────────┘
```
+
+## kafkaMurmurHash
+
+Calculates a 32-bit [MurmurHash2](https://github.com/aappleby/smhasher) hash value using the same hash seed as [Kafka](https://github.com/apache/kafka/blob/461c5cfe056db0951d9b74f5adc45973670404d7/clients/src/main/java/org/apache/kafka/common/utils/Utils.java#L482) and without the highest bit to be compatible with [Default Partitioner](https://github.com/apache/kafka/blob/139f7709bd3f5926901a21e55043388728ccca78/clients/src/main/java/org/apache/kafka/clients/producer/internals/BuiltInPartitioner.java#L328).
+
+**Syntax**
+
+```sql
+MurmurHash(par1, ...)
+```
+
+**Arguments**
+
+- `par1, ...` — A variable number of parameters that can be any of the [supported data types](/docs/en/sql-reference/data-types/index.md/#data_types).
+
+**Returned value**
+
+- Calculated hash value.
+
+Type: [UInt32](/docs/en/sql-reference/data-types/int-uint.md).
+
+**Example**
+
+Query:
+
+```sql
+SELECT
+ kafkaMurmurHash('foobar') AS res1,
+ kafkaMurmurHash(array('e','x','a'), 'mple', 10, toDateTime('2019-06-15 23:00:00')) AS res2
+```
+
+Result:
+
+```response
+┌───────res1─┬─────res2─┐
+│ 1357151166 │ 85479775 │
+└────────────┴──────────┘
+```
+
## murmurHash3_32, murmurHash3_64
Produces a [MurmurHash3](https://github.com/aappleby/smhasher) hash value.
diff --git a/docs/en/sql-reference/functions/string-replace-functions.md b/docs/en/sql-reference/functions/string-replace-functions.md
index 50e15f70f5d..d4c7c451af2 100644
--- a/docs/en/sql-reference/functions/string-replace-functions.md
+++ b/docs/en/sql-reference/functions/string-replace-functions.md
@@ -13,17 +13,18 @@ Functions for [searching](../../sql-reference/functions/string-search-functions.
## replaceOne(haystack, pattern, replacement)
Replaces the first occurrence of the substring ‘pattern’ (if it exists) in ‘haystack’ by the ‘replacement’ string.
-‘pattern’ and ‘replacement’ must be constants.
## replaceAll(haystack, pattern, replacement), replace(haystack, pattern, replacement)
Replaces all occurrences of the substring ‘pattern’ in ‘haystack’ by the ‘replacement’ string.
+Alias: `replace`.
+
## replaceRegexpOne(haystack, pattern, replacement)
Replaces the first occurrence of the substring matching the regular expression ‘pattern’ in ‘haystack‘ by the ‘replacement‘ string.
-‘pattern‘ must be a constant [re2 regular expression](https://github.com/google/re2/wiki/Syntax).
-‘replacement’ must be a plain constant string or a constant string containing substitutions `\0-\9`.
+‘pattern‘ must be a [re2 regular expression](https://github.com/google/re2/wiki/Syntax).
+‘replacement’ must be a plain string or a string containing substitutions `\0-\9`.
Substitutions `\1-\9` correspond to the 1st to 9th capturing group (submatch), substitution `\0` corresponds to the entire match.
To use a verbatim `\` character in the ‘pattern‘ or ‘replacement‘ string, escape it using `\`.
Also keep in mind that string literals require an extra escaping.
@@ -88,6 +89,8 @@ SELECT replaceRegexpAll('Hello, World!', '^', 'here: ') AS res
└─────────────────────┘
```
+Alias: `REGEXP_REPLACE`.
+
## regexpQuoteMeta(s)
The function adds a backslash before some predefined characters in the string.
diff --git a/docs/en/sql-reference/functions/type-conversion-functions.md b/docs/en/sql-reference/functions/type-conversion-functions.md
index 213ed187f15..5ce72caa3b9 100644
--- a/docs/en/sql-reference/functions/type-conversion-functions.md
+++ b/docs/en/sql-reference/functions/type-conversion-functions.md
@@ -1245,7 +1245,6 @@ Returns DateTime values parsed from input string according to a MySQL style form
**Supported format specifiers**
All format specifiers listed in [formatDateTime](/docs/en/sql-reference/functions/date-time-functions.md#date_time_functions-formatDateTime) except:
-- %f: fractional second
- %Q: Quarter (1-4)
**Example**
diff --git a/docs/en/sql-reference/statements/drop.md b/docs/en/sql-reference/statements/drop.md
index 8a83a8fae1d..b6208c2fd52 100644
--- a/docs/en/sql-reference/statements/drop.md
+++ b/docs/en/sql-reference/statements/drop.md
@@ -22,6 +22,10 @@ DROP DATABASE [IF EXISTS] db [ON CLUSTER cluster] [SYNC]
Deletes the table.
+:::tip
+Also see [UNDROP TABLE](/docs/en/sql-reference/statements/undrop.md)
+:::
+
Syntax:
``` sql
diff --git a/docs/en/sql-reference/statements/undrop.md b/docs/en/sql-reference/statements/undrop.md
new file mode 100644
index 00000000000..40ac1ab4f99
--- /dev/null
+++ b/docs/en/sql-reference/statements/undrop.md
@@ -0,0 +1,99 @@
+---
+slug: /en/sql-reference/statements/undrop
+sidebar_label: UNDROP
+---
+
+# UNDROP TABLE
+
+Cancels the dropping of the table.
+
+Beginning with ClickHouse version 23.3 it is possible to UNDROP a table in an Atomic database
+within `database_atomic_delay_before_drop_table_sec` (8 minutes by default) of issuing the DROP TABLE statement. Dropped tables are listed in
+a system table called `system.dropped_tables`.
+
+If you have a materialized view without a `TO` clause associated with the dropped table, then you will also have to UNDROP the inner table of that view.
+
+:::note
+UNDROP TABLE is experimental. To use it add this setting:
+```sql
+set allow_experimental_undrop_table_query = 1;
+```
+:::
+
+:::tip
+Also see [DROP TABLE](/docs/en/sql-reference/statements/drop.md)
+:::
+
+Syntax:
+
+``` sql
+UNDROP TABLE [db.]name [UUID ''] [ON CLUSTER cluster]
+```
+
+**Example**
+
+``` sql
+set allow_experimental_undrop_table_query = 1;
+```
+
+```sql
+CREATE TABLE undropMe
+(
+ `id` UInt8
+)
+ENGINE = MergeTree
+ORDER BY id
+```
+
+```sql
+DROP TABLE undropMe
+```
+```sql
+SELECT *
+FROM system.dropped_tables
+FORMAT Vertical
+```
+```response
+Row 1:
+──────
+index: 0
+database: default
+table: undropMe
+uuid: aa696a1a-1d70-4e60-a841-4c80827706cc
+engine: MergeTree
+metadata_dropped_path: /var/lib/clickhouse/metadata_dropped/default.undropMe.aa696a1a-1d70-4e60-a841-4c80827706cc.sql
+table_dropped_time: 2023-04-05 14:12:12
+
+1 row in set. Elapsed: 0.001 sec.
+```
+```sql
+UNDROP TABLE undropMe
+```
+```response
+Ok.
+```
+```sql
+SELECT *
+FROM system.dropped_tables
+FORMAT Vertical
+```
+```response
+Ok.
+
+0 rows in set. Elapsed: 0.001 sec.
+```
+```sql
+DESCRIBE TABLE undropMe
+FORMAT Vertical
+```
+```response
+Row 1:
+──────
+name: id
+type: UInt8
+default_type:
+default_expression:
+comment:
+codec_expression:
+ttl_expression:
+```
diff --git a/docs/en/sql-reference/syntax.md b/docs/en/sql-reference/syntax.md
index 63c5042f9e8..ea2df235c1a 100644
--- a/docs/en/sql-reference/syntax.md
+++ b/docs/en/sql-reference/syntax.md
@@ -14,7 +14,7 @@ The `INSERT` query uses both parsers:
INSERT INTO t VALUES (1, 'Hello, world'), (2, 'abc'), (3, 'def')
```
-The `INSERT INTO t VALUES` fragment is parsed by the full parser, and the data `(1, 'Hello, world'), (2, 'abc'), (3, 'def')` is parsed by the fast stream parser. You can also turn on the full parser for the data by using the [input_format_values_interpret_expressions](../operations/settings/settings-formats.md#settings-input_format_values_interpret_expressions) setting. When `input_format_values_interpret_expressions = 1`, ClickHouse first tries to parse values with the fast stream parser. If it fails, ClickHouse tries to use the full parser for the data, treating it like an SQL [expression](#syntax-expressions).
+The `INSERT INTO t VALUES` fragment is parsed by the full parser, and the data `(1, 'Hello, world'), (2, 'abc'), (3, 'def')` is parsed by the fast stream parser. You can also turn on the full parser for the data by using the [input_format_values_interpret_expressions](../operations/settings/settings-formats.md#input_format_values_interpret_expressions) setting. When `input_format_values_interpret_expressions = 1`, ClickHouse first tries to parse values with the fast stream parser. If it fails, ClickHouse tries to use the full parser for the data, treating it like an SQL [expression](#expressions).
Data can have any format. When a query is received, the server calculates no more than [max_query_size](../operations/settings/settings.md#settings-max_query_size) bytes of the request in RAM (by default, 1 MB), and the rest is stream parsed.
It allows for avoiding issues with large `INSERT` queries.
@@ -45,7 +45,7 @@ You can check whether a data type name is case-sensitive in the [system.data_typ
In contrast to standard SQL, all other keywords (including functions names) are **case-sensitive**.
-Keywords are not reserved; they are treated as such only in the corresponding context. If you use [identifiers](#syntax-identifiers) with the same name as the keywords, enclose them into double-quotes or backticks. For example, the query `SELECT "FROM" FROM table_name` is valid if the table `table_name` has column with the name `"FROM"`.
+Keywords are not reserved; they are treated as such only in the corresponding context. If you use [identifiers](#identifiers) with the same name as the keywords, enclose them into double-quotes or backticks. For example, the query `SELECT "FROM" FROM table_name` is valid if the table `table_name` has column with the name `"FROM"`.
## Identifiers
@@ -54,7 +54,7 @@ Identifiers are:
- Cluster, database, table, partition, and column names.
- Functions.
- Data types.
-- [Expression aliases](#syntax-expression_aliases).
+- [Expression aliases](#expression_aliases).
Identifiers can be quoted or non-quoted. The latter is preferred.
@@ -108,7 +108,7 @@ Depending on the data format (input or output), `NULL` may have a different repr
There are many nuances to processing `NULL`. For example, if at least one of the arguments of a comparison operation is `NULL`, the result of this operation is also `NULL`. The same is true for multiplication, addition, and other operations. For more information, read the documentation for each operation.
-In queries, you can check `NULL` using the [IS NULL](../sql-reference/operators/index.md#operator-is-null) and [IS NOT NULL](../sql-reference/operators/index.md) operators and the related functions `isNull` and `isNotNull`.
+In queries, you can check `NULL` using the [IS NULL](../sql-reference/operators/index.md#is-null) and [IS NOT NULL](../sql-reference/operators/index.md#is-not-null) operators and the related functions `isNull` and `isNotNull`.
### Heredoc
@@ -149,7 +149,7 @@ For example, the following SQL defines parameters named `a`, `b`, `c` and `d` -
SET param_a = 13;
SET param_b = 'str';
SET param_c = '2022-08-04 18:30:53';
-SET param_d = {'10': [11, 12], '13': [14, 15]}';
+SET param_d = {'10': [11, 12], '13': [14, 15]};
SELECT
{a: UInt32},
@@ -166,7 +166,7 @@ Result:
If you are using `clickhouse-client`, the parameters are specified as `--param_name=value`. For example, the following parameter has the name `message` and it is retrieved as a `String`:
-```sql
+```bash
clickhouse-client --param_message='hello' --query="SELECT {message: String}"
```
@@ -190,7 +190,7 @@ Query parameters are not general text substitutions which can be used in arbitra
## Functions
Function calls are written like an identifier with a list of arguments (possibly empty) in round brackets. In contrast to standard SQL, the brackets are required, even for an empty argument list. Example: `now()`.
-There are regular and aggregate functions (see the section “Aggregate functions”). Some aggregate functions can contain two lists of arguments in brackets. Example: `quantile (0.9) (x)`. These aggregate functions are called “parametric” functions, and the arguments in the first list are called “parameters”. The syntax of aggregate functions without parameters is the same as for regular functions.
+There are regular and aggregate functions (see the section [Aggregate functions](/docs/en/sql-reference/aggregate-functions/index.md)). Some aggregate functions can contain two lists of arguments in brackets. Example: `quantile (0.9) (x)`. These aggregate functions are called “parametric” functions, and the arguments in the first list are called “parameters”. The syntax of aggregate functions without parameters is the same as for regular functions.
## Operators
@@ -199,7 +199,7 @@ For example, the expression `1 + 2 * 3 + 4` is transformed to `plus(plus(1, mult
## Data Types and Database Table Engines
-Data types and table engines in the `CREATE` query are written the same way as identifiers or functions. In other words, they may or may not contain an argument list in brackets. For more information, see the sections “Data types,” “Table engines,” and “CREATE”.
+Data types and table engines in the `CREATE` query are written the same way as identifiers or functions. In other words, they may or may not contain an argument list in brackets. For more information, see the sections [Data types](/docs/en/sql-reference/data-types/index.md), [Table engines](/docs/en/engines/table-engines/index.md), and [CREATE](/docs/en/sql-reference/statements/create/index.md).
## Expression Aliases
@@ -211,17 +211,17 @@ expr AS alias
- `AS` — The keyword for defining aliases. You can define the alias for a table name or a column name in a `SELECT` clause without using the `AS` keyword.
- For example, `SELECT table_name_alias.column_name FROM table_name table_name_alias`.
+ For example, `SELECT table_name_alias.column_name FROM table_name table_name_alias`.
- In the [CAST](./functions/type-conversion-functions.md#type_conversion_function-cast) function, the `AS` keyword has another meaning. See the description of the function.
+ In the [CAST](./functions/type-conversion-functions.md#castx-t) function, the `AS` keyword has another meaning. See the description of the function.
- `expr` — Any expression supported by ClickHouse.
- For example, `SELECT column_name * 2 AS double FROM some_table`.
+ For example, `SELECT column_name * 2 AS double FROM some_table`.
-- `alias` — Name for `expr`. Aliases should comply with the [identifiers](#syntax-identifiers) syntax.
+- `alias` — Name for `expr`. Aliases should comply with the [identifiers](#identifiers) syntax.
- For example, `SELECT "table t".column_name FROM table_name AS "table t"`.
+ For example, `SELECT "table t".column_name FROM table_name AS "table t"`.
### Notes on Usage
@@ -254,11 +254,11 @@ Received exception from server (version 18.14.17):
Code: 184. DB::Exception: Received from localhost:9000, 127.0.0.1. DB::Exception: Aggregate function sum(b) is found inside another aggregate function in query.
```
-In this example, we declared table `t` with column `b`. Then, when selecting data, we defined the `sum(b) AS b` alias. As aliases are global, ClickHouse substituted the literal `b` in the expression `argMax(a, b)` with the expression `sum(b)`. This substitution caused the exception. You can change this default behavior by setting [prefer_column_name_to_alias](../operations/settings/settings.md#prefer_column_name_to_alias) to `1`.
+In this example, we declared table `t` with column `b`. Then, when selecting data, we defined the `sum(b) AS b` alias. As aliases are global, ClickHouse substituted the literal `b` in the expression `argMax(a, b)` with the expression `sum(b)`. This substitution caused the exception. You can change this default behavior by setting [prefer_column_name_to_alias](../operations/settings/settings.md#prefer-column-name-to-alias) to `1`.
## Asterisk
-In a `SELECT` query, an asterisk can replace the expression. For more information, see the section “SELECT”.
+In a `SELECT` query, an asterisk can replace the expression. For more information, see the section [SELECT](/docs/en/sql-reference/statements/select/index.md#asterisk).
## Expressions
diff --git a/docs/en/sql-reference/table-functions/executable.md b/docs/en/sql-reference/table-functions/executable.md
index 22c74eb8cfa..5a24c3ab11d 100644
--- a/docs/en/sql-reference/table-functions/executable.md
+++ b/docs/en/sql-reference/table-functions/executable.md
@@ -20,7 +20,7 @@ A key advantage between ordinary UDF functions and the `executable` table functi
The `executable` table function requires three parameters and accepts an optional list of input queries:
```sql
-executable(script_name, format, structure, [input_query...])
+executable(script_name, format, structure, [input_query...] [,SETTINGS ...])
```
- `script_name`: the file name of the script. saved in the `user_scripts` folder (the default folder of the `user_scripts_path` setting)
@@ -83,6 +83,15 @@ The response looks like:
└────┴────────────┘
```
+## Settings
+
+- `send_chunk_header` - controls whether to send row count before sending a chunk of data to process. Default value is `false`.
+- `pool_size` — Size of pool. If 0 is specified as `pool_size` then there is no pool size restrictions. Default value is `16`.
+- `max_command_execution_time` — Maximum executable script command execution time for processing block of data. Specified in seconds. Default value is 10.
+- `command_termination_timeout` — executable script should contain main read-write loop. After table function is destroyed, pipe is closed, and executable file will have `command_termination_timeout` seconds to shutdown, before ClickHouse will send SIGTERM signal to child process. Specified in seconds. Default value is 10.
+- `command_read_timeout` - timeout for reading data from command stdout in milliseconds. Default value 10000.
+- `command_write_timeout` - timeout for writing data to command stdin in milliseconds. Default value 10000.
+
## Passing Query Results to a Script
Be sure to check out the example in the `Executable` table engine on [how to pass query results to a script](../../engines/table-engines/special/executable.md#passing-query-results-to-a-script). Here is how you execute the same script in that example using the `executable` table function:
@@ -94,4 +103,4 @@ SELECT * FROM executable(
'id UInt64, sentiment Float32',
(SELECT id, comment FROM hackernews WHERE id > 0 AND comment != '' LIMIT 20)
);
-```
\ No newline at end of file
+```
diff --git a/docs/ru/development/developer-instruction.md b/docs/ru/development/developer-instruction.md
index 80472178ae2..7294bc2ae87 100644
--- a/docs/ru/development/developer-instruction.md
+++ b/docs/ru/development/developer-instruction.md
@@ -41,9 +41,15 @@ ClickHouse не работает и не собирается на 32-битны
Выполните в терминале:
- git clone git@github.com:your_github_username/ClickHouse.git --recursive
+ git clone --shallow-submodules git@github.com:your_github_username/ClickHouse.git
cd ClickHouse
+Или (если вы хотите использовать sparse checkout для submodules):
+
+ git clone git@github.com:your_github_username/ClickHouse.git
+ cd ClickHouse
+ ./contrib/update-submodules.sh
+
Замените слово `your_github_username` в команде для git на имя вашего аккаунта на GitHub.
Эта команда создаст директорию ClickHouse, содержащую рабочую копию проекта.
diff --git a/docs/ru/sql-reference/aggregate-functions/reference/deltasumtimestamp.md b/docs/ru/sql-reference/aggregate-functions/reference/deltasumtimestamp.md
index 7be933d67d7..50434419651 100644
--- a/docs/ru/sql-reference/aggregate-functions/reference/deltasumtimestamp.md
+++ b/docs/ru/sql-reference/aggregate-functions/reference/deltasumtimestamp.md
@@ -7,7 +7,7 @@ sidebar_position: 141
Суммирует разницу между последовательными строками. Если разница отрицательна — она будет проигнорирована.
-Эта функция предназначена в первую очередь для [материализованных представлений](../../../sql-reference/statements/create/view.md#materialized), упорядоченных по некоторому временному бакету согласно timestamp, например, по бакету `toStartOfMinute`. Поскольку строки в таком материализованном представлении будут иметь одинаковый timestamp, невозможно объединить их в "правом" порядке. Функция отслеживает `timestamp` наблюдаемых значений, поэтому возможно правильно упорядочить состояния во время слияния.
+Эта функция предназначена в первую очередь для [материализованных представлений](../../../sql-reference/statements/create/view.md#materialized), хранящих данные, упорядоченные по некоторому округленному временному интервалу, согласно timestamp, например, по бакету `toStartOfMinute`. Поскольку строки в таком материализованном представлении будут иметь одинаковый timestamp, их невозможно объединить в правильном порядке без хранения исходного, неокругленного значения timestamp. Функция `deltaSumTimestamp` отслеживает исходные `timestamp` наблюдаемых значений, поэтому значения (состояния) функции правильно вычисляются во время слияния кусков.
Чтобы вычислить разницу между упорядоченными последовательными строками, вы можете использовать функцию [deltaSum](../../../sql-reference/aggregate-functions/reference/deltasum.md#agg_functions-deltasum) вместо функции `deltaSumTimestamp`.
diff --git a/programs/client/Client.cpp b/programs/client/Client.cpp
index 660b8d7c00a..df0abceb8c6 100644
--- a/programs/client/Client.cpp
+++ b/programs/client/Client.cpp
@@ -277,11 +277,11 @@ void Client::initialize(Poco::Util::Application & self)
*/
const char * env_user = getenv("CLICKHOUSE_USER"); // NOLINT(concurrency-mt-unsafe)
- if (env_user)
+ if (env_user && !config().has("user"))
config().setString("user", env_user);
const char * env_password = getenv("CLICKHOUSE_PASSWORD"); // NOLINT(concurrency-mt-unsafe)
- if (env_password)
+ if (env_password && !config().has("password"))
config().setString("password", env_password);
parseConnectionsCredentials();
diff --git a/programs/install/Install.cpp b/programs/install/Install.cpp
index b142159fbdf..d83e189f7ef 100644
--- a/programs/install/Install.cpp
+++ b/programs/install/Install.cpp
@@ -375,15 +375,22 @@ int mainEntryClickHouseInstall(int argc, char ** argv)
try
{
- ReadBufferFromFile in(binary_self_path.string());
- WriteBufferFromFile out(main_bin_tmp_path.string());
- copyData(in, out);
- out.sync();
+ String source = binary_self_path.string();
+ String destination = main_bin_tmp_path.string();
- if (0 != fchmod(out.getFD(), S_IRUSR | S_IRGRP | S_IROTH | S_IXUSR | S_IXGRP | S_IXOTH))
+ /// Try to make a hard link first, as an optimization.
+ /// It is possible if the source and the destination are on the same filesystems.
+ if (0 != link(source.c_str(), destination.c_str()))
+ {
+ ReadBufferFromFile in(binary_self_path.string());
+ WriteBufferFromFile out(main_bin_tmp_path.string());
+ copyData(in, out);
+ out.sync();
+ out.finalize();
+ }
+
+ if (0 != chmod(destination.c_str(), S_IRUSR | S_IRGRP | S_IROTH | S_IXUSR | S_IXGRP | S_IXOTH))
throwFromErrno(fmt::format("Cannot chmod {}", main_bin_tmp_path.string()), ErrorCodes::SYSTEM_ERROR);
-
- out.finalize();
}
catch (const Exception & e)
{
diff --git a/programs/keeper/Keeper.cpp b/programs/keeper/Keeper.cpp
index 266b363eb47..3853c955171 100644
--- a/programs/keeper/Keeper.cpp
+++ b/programs/keeper/Keeper.cpp
@@ -17,7 +17,6 @@
#include
#include
#include
-#include
#include
#include
#include
diff --git a/programs/server/Server.cpp b/programs/server/Server.cpp
index 9ef9f704f61..164e1ce14e5 100644
--- a/programs/server/Server.cpp
+++ b/programs/server/Server.cpp
@@ -981,7 +981,7 @@ try
StatusFile status{path / "status", StatusFile::write_full_info};
- DB::ServerUUID::load(path / "uuid", log);
+ ServerUUID::load(path / "uuid", log);
/// Try to increase limit on number of open files.
{
diff --git a/src/Access/DiskAccessStorage.cpp b/src/Access/DiskAccessStorage.cpp
index ef88e8a225f..710cf257b95 100644
--- a/src/Access/DiskAccessStorage.cpp
+++ b/src/Access/DiskAccessStorage.cpp
@@ -10,6 +10,7 @@
#include
#include
#include
+#include
#include
#include
#include
@@ -19,6 +20,7 @@
#include
#include
#include
+#include
namespace DB
@@ -317,15 +319,15 @@ void DiskAccessStorage::scheduleWriteLists(AccessEntityType type)
return; /// If the lists' writing thread is still waiting we can update `types_of_lists_to_write` easily,
/// without restarting that thread.
- if (lists_writing_thread.joinable())
- lists_writing_thread.join();
+ if (lists_writing_thread && lists_writing_thread->joinable())
+ lists_writing_thread->join();
/// Create the 'need_rebuild_lists.mark' file.
/// This file will be used later to find out if writing lists is successful or not.
std::ofstream out{getNeedRebuildListsMarkFilePath(directory_path)};
out.close();
- lists_writing_thread = ThreadFromGlobalPool{&DiskAccessStorage::listsWritingThreadFunc, this};
+ lists_writing_thread = std::make_unique(&DiskAccessStorage::listsWritingThreadFunc, this);
lists_writing_thread_is_waiting = true;
}
@@ -349,10 +351,10 @@ void DiskAccessStorage::listsWritingThreadFunc()
void DiskAccessStorage::stopListsWritingThread()
{
- if (lists_writing_thread.joinable())
+ if (lists_writing_thread && lists_writing_thread->joinable())
{
lists_writing_thread_should_exit.notify_one();
- lists_writing_thread.join();
+ lists_writing_thread->join();
}
}
diff --git a/src/Access/DiskAccessStorage.h b/src/Access/DiskAccessStorage.h
index b1ef1d10ba7..069a966c8e9 100644
--- a/src/Access/DiskAccessStorage.h
+++ b/src/Access/DiskAccessStorage.h
@@ -1,7 +1,7 @@
#pragma once
#include
-#include
+#include
#include
@@ -81,7 +81,7 @@ private:
bool failed_to_write_lists TSA_GUARDED_BY(mutex) = false;
/// List files are written in a separate thread.
- ThreadFromGlobalPool lists_writing_thread;
+ std::unique_ptr lists_writing_thread;
/// Signals `lists_writing_thread` to exit.
std::condition_variable lists_writing_thread_should_exit;
diff --git a/src/Access/ReplicatedAccessStorage.cpp b/src/Access/ReplicatedAccessStorage.cpp
index ddc5e8bfed1..f34e6728ab3 100644
--- a/src/Access/ReplicatedAccessStorage.cpp
+++ b/src/Access/ReplicatedAccessStorage.cpp
@@ -1,3 +1,4 @@
+#include
#include
#include
#include
@@ -15,6 +16,7 @@
#include
#include
#include
+#include
#include
#include
#include
@@ -72,7 +74,7 @@ void ReplicatedAccessStorage::startWatchingThread()
{
bool prev_watching_flag = watching.exchange(true);
if (!prev_watching_flag)
- watching_thread = ThreadFromGlobalPool(&ReplicatedAccessStorage::runWatchingThread, this);
+ watching_thread = std::make_unique(&ReplicatedAccessStorage::runWatchingThread, this);
}
void ReplicatedAccessStorage::stopWatchingThread()
@@ -81,8 +83,8 @@ void ReplicatedAccessStorage::stopWatchingThread()
if (prev_watching_flag)
{
watched_queue->finish();
- if (watching_thread.joinable())
- watching_thread.join();
+ if (watching_thread && watching_thread->joinable())
+ watching_thread->join();
}
}
diff --git a/src/Access/ReplicatedAccessStorage.h b/src/Access/ReplicatedAccessStorage.h
index d9d4b628f8d..555d58e6b04 100644
--- a/src/Access/ReplicatedAccessStorage.h
+++ b/src/Access/ReplicatedAccessStorage.h
@@ -2,7 +2,7 @@
#include
-#include
+#include
#include
#include
#include
@@ -21,7 +21,7 @@ public:
static constexpr char STORAGE_TYPE[] = "replicated";
ReplicatedAccessStorage(const String & storage_name, const String & zookeeper_path, zkutil::GetZooKeeper get_zookeeper, AccessChangesNotifier & changes_notifier_, bool allow_backup);
- virtual ~ReplicatedAccessStorage() override;
+ ~ReplicatedAccessStorage() override;
const char * getStorageType() const override { return STORAGE_TYPE; }
@@ -43,7 +43,7 @@ private:
std::mutex cached_zookeeper_mutex;
std::atomic watching = false;
- ThreadFromGlobalPool watching_thread;
+ std::unique_ptr watching_thread;
std::shared_ptr> watched_queue;
std::optional insertImpl(const AccessEntityPtr & entity, bool replace_if_exists, bool throw_if_exists) override;
diff --git a/src/AggregateFunctions/AggregateFunctionMaxIntersections.h b/src/AggregateFunctions/AggregateFunctionMaxIntersections.h
index 2c54293eeec..5074e491f60 100644
--- a/src/AggregateFunctions/AggregateFunctionMaxIntersections.h
+++ b/src/AggregateFunctions/AggregateFunctionMaxIntersections.h
@@ -1,6 +1,5 @@
#pragma once
-#include
#include
#include
diff --git a/src/AggregateFunctions/AggregateFunctionSparkbar.h b/src/AggregateFunctions/AggregateFunctionSparkbar.h
index 78f7e9fcefa..30e107bc4db 100644
--- a/src/AggregateFunctions/AggregateFunctionSparkbar.h
+++ b/src/AggregateFunctions/AggregateFunctionSparkbar.h
@@ -11,7 +11,6 @@
#include
#include
#include
-#include
#include
#include
#include
diff --git a/src/AggregateFunctions/AggregateFunctionSumMap.h b/src/AggregateFunctions/AggregateFunctionSumMap.h
index f51ec423c69..b30f5ff5220 100644
--- a/src/AggregateFunctions/AggregateFunctionSumMap.h
+++ b/src/AggregateFunctions/AggregateFunctionSumMap.h
@@ -18,7 +18,6 @@
#include
#include
#include