Merge branch 'master' into enable_constant_column_search

This commit is contained in:
Robert Schulze 2022-09-16 11:09:45 +02:00 committed by GitHub
commit 13a2bbaf5e
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
83 changed files with 1270 additions and 260 deletions

View File

@ -22,6 +22,8 @@ Checks: '*,
-bugprone-implicit-widening-of-multiplication-result, -bugprone-implicit-widening-of-multiplication-result,
-bugprone-narrowing-conversions, -bugprone-narrowing-conversions,
-bugprone-not-null-terminated-result, -bugprone-not-null-terminated-result,
-bugprone-unchecked-optional-access,
-bugprone-assignment-in-if-condition,
-cert-dcl16-c, -cert-dcl16-c,
-cert-err58-cpp, -cert-err58-cpp,
@ -103,6 +105,7 @@ Checks: '*,
-misc-no-recursion, -misc-no-recursion,
-misc-non-private-member-variables-in-classes, -misc-non-private-member-variables-in-classes,
-misc-const-correctness,
-modernize-avoid-c-arrays, -modernize-avoid-c-arrays,
-modernize-concat-nested-namespaces, -modernize-concat-nested-namespaces,
@ -114,6 +117,7 @@ Checks: '*,
-modernize-use-nodiscard, -modernize-use-nodiscard,
-modernize-use-override, -modernize-use-override,
-modernize-use-trailing-return-type, -modernize-use-trailing-return-type,
-modernize-macro-to-enum,
-performance-inefficient-string-concatenation, -performance-inefficient-string-concatenation,
-performance-no-int-to-ptr, -performance-no-int-to-ptr,
@ -135,6 +139,7 @@ Checks: '*,
-readability-suspicious-call-argument, -readability-suspicious-call-argument,
-readability-uppercase-literal-suffix, -readability-uppercase-literal-suffix,
-readability-use-anyofallof, -readability-use-anyofallof,
-readability-simplify-boolean-expr,
-zirkon-*, -zirkon-*,
' '

View File

@ -3,7 +3,7 @@ option (ENABLE_CLANG_TIDY "Use clang-tidy static analyzer" OFF)
if (ENABLE_CLANG_TIDY) if (ENABLE_CLANG_TIDY)
find_program (CLANG_TIDY_PATH NAMES "clang-tidy" "clang-tidy-14" "clang-tidy-13" "clang-tidy-12") find_program (CLANG_TIDY_PATH NAMES "clang-tidy" "clang-tidy-15" "clang-tidy-14" "clang-tidy-13" "clang-tidy-12")
if (CLANG_TIDY_PATH) if (CLANG_TIDY_PATH)
message(STATUS message(STATUS

View File

@ -45,6 +45,7 @@ if (CMAKE_CROSSCOMPILING)
endif () endif ()
if (USE_MUSL) if (USE_MUSL)
# use of undeclared identifier 'PTHREAD_RECURSIVE_MUTEX_INITIALIZER_NP'
set (ENABLE_SENTRY OFF CACHE INTERNAL "") set (ENABLE_SENTRY OFF CACHE INTERNAL "")
set (ENABLE_ODBC OFF CACHE INTERNAL "") set (ENABLE_ODBC OFF CACHE INTERNAL "")
set (ENABLE_GRPC OFF CACHE INTERNAL "") set (ENABLE_GRPC OFF CACHE INTERNAL "")

2
contrib/NuRaft vendored

@ -1 +1 @@
Subproject commit 1be805e7cb2494aa8170015493474379b0362dfc Subproject commit e15858f8ad0ce8aba85cf74e3763874c76bf927c

View File

@ -415,7 +415,7 @@
/* /*
* Defined if strerror_r returns char * if _GNU_SOURCE is defined. * Defined if strerror_r returns char * if _GNU_SOURCE is defined.
*/ */
#define JEMALLOC_STRERROR_R_RETURNS_CHAR_WITH_GNU_SOURCE /* #undef JEMALLOC_STRERROR_R_RETURNS_CHAR_WITH_GNU_SOURCE */
/* Performs additional safety checks when defined. */ /* Performs additional safety checks when defined. */
/* #undef JEMALLOC_OPT_SAFETY_CHECKS */ /* #undef JEMALLOC_OPT_SAFETY_CHECKS */

View File

@ -440,7 +440,9 @@
#define HAVE_STRERROR 1 #define HAVE_STRERROR 1
/* Define to 1 if you have the `strerror_r' function. */ /* Define to 1 if you have the `strerror_r' function. */
#ifndef USE_MUSL
#define HAVE_STRERROR_R 1 #define HAVE_STRERROR_R 1
#endif
/* Define to 1 if you have the <strings.h> header file. */ /* Define to 1 if you have the <strings.h> header file. */
#define HAVE_STRINGS_H 1 #define HAVE_STRINGS_H 1

2
contrib/libcpuid vendored

@ -1 +1 @@
Subproject commit 8db3b8d2d32d22437f063ce692a1b9bb15e42d18 Subproject commit 503083acb77edf9fbce22a05826307dff2ce96e6

View File

@ -63,6 +63,13 @@ target_include_directories (_libpq SYSTEM PUBLIC ${LIBPQ_SOURCE_DIR})
target_include_directories (_libpq SYSTEM PUBLIC "${LIBPQ_SOURCE_DIR}/include") target_include_directories (_libpq SYSTEM PUBLIC "${LIBPQ_SOURCE_DIR}/include")
target_include_directories (_libpq SYSTEM PRIVATE "${LIBPQ_SOURCE_DIR}/configs") target_include_directories (_libpq SYSTEM PRIVATE "${LIBPQ_SOURCE_DIR}/configs")
# NOTE: this is a dirty hack to avoid and instead pg_config.h should be shipped
# for different OS'es like for jemalloc, not one generic for all OS'es like
# now.
if (OS_DARWIN OR OS_FREEBSD OR USE_MUSL)
target_compile_definitions(_libpq PRIVATE -DSTRERROR_R_INT=1)
endif()
target_link_libraries (_libpq PRIVATE OpenSSL::SSL) target_link_libraries (_libpq PRIVATE OpenSSL::SSL)
add_library(ch_contrib::libpq ALIAS _libpq) add_library(ch_contrib::libpq ALIAS _libpq)

2
contrib/librdkafka vendored

@ -1 +1 @@
Subproject commit ff32b4e9eeafd0b276f010ee969179e4e9e6d0b2 Subproject commit 6f3b483426a8c8ec950e27e446bec175cf8b553f

2
contrib/llvm vendored

@ -1 +1 @@
Subproject commit 20607e61728e97c969e536644c3c0c1bb1a50672 Subproject commit 0db5bf5bd2452cd8f1283a1fcdc04845af705bfc

@ -1 +1 @@
Subproject commit f431047ac8da13179c488018dddf1c0d0771a997 Subproject commit ae10fb8c224c3f41571446e1ed7fd57b9e5e366b

2
contrib/vectorscan vendored

@ -1 +1 @@
Subproject commit 73695e419c27af7fe2a099c7aa57931cc02aea5d Subproject commit f6250ae3e5a3085000239313ad0689cc1e00cdc2

View File

@ -304,7 +304,7 @@ target_include_directories (_vectorscan SYSTEM PUBLIC "${LIBRARY_DIR}/src")
# Please regenerate these files if you update vectorscan. # Please regenerate these files if you update vectorscan.
if (ARCH_AMD64) if (ARCH_AMD64)
target_include_directories (_vectorscan PRIVATE x86_64) target_include_directories (_vectorscan PRIVATE amd64)
endif () endif ()
if (ARCH_AARCH64) if (ARCH_AARCH64)

View File

@ -67,24 +67,5 @@ ENV GOCACHE=/workdir/
RUN mkdir /workdir && chmod 777 /workdir RUN mkdir /workdir && chmod 777 /workdir
WORKDIR /workdir WORKDIR /workdir
# NOTE: thread sanitizer is broken in clang-14, we have to build it with clang-15
# https://github.com/ClickHouse/ClickHouse/pull/39450
# https://github.com/google/sanitizers/issues/1540
# https://github.com/google/sanitizers/issues/1552
RUN export CODENAME="$(lsb_release --codename --short | tr 'A-Z' 'a-z')" \
&& echo "deb [trusted=yes] https://apt.llvm.org/${CODENAME}/ llvm-toolchain-${CODENAME}-15 main" >> \
/etc/apt/sources.list.d/clang.list \
&& apt-get update \
&& apt-get install \
clang-15 \
llvm-15 \
clang-tidy-15 \
--yes --no-install-recommends \
&& apt-get clean
# for external_symbolizer_path
RUN ln -s /usr/bin/llvm-symbolizer-15 /usr/bin/llvm-symbolizer
COPY build.sh / COPY build.sh /
CMD ["bash", "-c", "/build.sh 2>&1"] CMD ["bash", "-c", "/build.sh 2>&1"]

View File

@ -339,17 +339,16 @@ if __name__ == "__main__":
parser.add_argument( parser.add_argument(
"--compiler", "--compiler",
choices=( choices=(
"clang-15", # For TSAN builds, see #39450 "clang-15",
"clang-14", "clang-15-darwin",
"clang-14-darwin", "clang-15-darwin-aarch64",
"clang-14-darwin-aarch64", "clang-15-aarch64",
"clang-14-aarch64", "clang-15-ppc64le",
"clang-14-ppc64le", "clang-15-amd64sse2",
"clang-14-amd64sse2", "clang-15-freebsd",
"clang-14-freebsd",
"gcc-11", "gcc-11",
), ),
default="clang-14", default="clang-15",
help="a compiler to use", help="a compiler to use",
) )
parser.add_argument( parser.add_argument(

View File

@ -16,11 +16,10 @@ RUN apt-get update \
# and MEMORY_LIMIT_EXCEEDED exceptions in Functional tests (total memory limit in Functional tests is ~55.24 GiB). # and MEMORY_LIMIT_EXCEEDED exceptions in Functional tests (total memory limit in Functional tests is ~55.24 GiB).
# TSAN will flush shadow memory when reaching this limit. # TSAN will flush shadow memory when reaching this limit.
# It may cause false-negatives, but it's better than OOM. # It may cause false-negatives, but it's better than OOM.
RUN echo "TSAN_OPTIONS='verbosity=1000 halt_on_error=1 history_size=7 memory_limit_mb=46080'" >> /etc/environment; \ RUN echo "TSAN_OPTIONS='verbosity=1000 halt_on_error=1 history_size=7 memory_limit_mb=46080'" >> /etc/environment
echo "UBSAN_OPTIONS='print_stacktrace=1'" >> /etc/environment; \ RUN echo "UBSAN_OPTIONS='print_stacktrace=1'" >> /etc/environment
echo "MSAN_OPTIONS='abort_on_error=1 poison_in_dtor=1'" >> /etc/environment; \ RUN echo "MSAN_OPTIONS='abort_on_error=1 poison_in_dtor=1'" >> /etc/environment
echo "LSAN_OPTIONS='suppressions=/usr/share/clickhouse-test/config/lsan_suppressions.txt'" >> /etc/environment; \ RUN echo "LSAN_OPTIONS='suppressions=/usr/share/clickhouse-test/config/lsan_suppressions.txt'" >> /etc/environment
ln -s /usr/lib/llvm-${LLVM_VERSION}/bin/llvm-symbolizer /usr/bin/llvm-symbolizer;
# Sanitizer options for current shell (not current, but the one that will be spawned on "docker run") # Sanitizer options for current shell (not current, but the one that will be spawned on "docker run")
# (but w/o verbosity for TSAN, otherwise test.reference will not match) # (but w/o verbosity for TSAN, otherwise test.reference will not match)
ENV TSAN_OPTIONS='halt_on_error=1 history_size=7 memory_limit_mb=46080' ENV TSAN_OPTIONS='halt_on_error=1 history_size=7 memory_limit_mb=46080'

View File

@ -8,16 +8,41 @@ FROM clickhouse/binary-builder:$FROM_TAG
ARG apt_archive="http://archive.ubuntu.com" ARG apt_archive="http://archive.ubuntu.com"
RUN sed -i "s|http://archive.ubuntu.com|$apt_archive|g" /etc/apt/sources.list RUN sed -i "s|http://archive.ubuntu.com|$apt_archive|g" /etc/apt/sources.list
RUN apt-get update && apt-get --yes --allow-unauthenticated install clang-14 libllvm14 libclang-14-dev libmlir-14-dev RUN apt-get update && apt-get --yes --allow-unauthenticated install libclang-${LLVM_VERSION}-dev libmlir-${LLVM_VERSION}-dev
# libclang-15-dev does not contain proper symlink:
#
# This is what cmake will search for:
#
# # readlink -f /usr/lib/llvm-15/lib/libclang-15.so.1
# /usr/lib/x86_64-linux-gnu/libclang-15.so.1
#
# This is what exists:
#
# # ls -l /usr/lib/x86_64-linux-gnu/libclang-15*
# lrwxrwxrwx 1 root root 16 Sep 5 13:31 /usr/lib/x86_64-linux-gnu/libclang-15.so -> libclang-15.so.1
# lrwxrwxrwx 1 root root 21 Sep 5 13:31 /usr/lib/x86_64-linux-gnu/libclang-15.so.15 -> libclang-15.so.15.0.0
# -rw-r--r-- 1 root root 31835760 Sep 5 13:31 /usr/lib/x86_64-linux-gnu/libclang-15.so.15.0.0
#
ARG TARGETARCH
RUN arch=${TARGETARCH:-amd64} \
&& case $arch in \
amd64) rarch=x86_64 ;; \
arm64) rarch=aarch64 ;; \
*) exit 1 ;; \
esac \
&& ln -rsf /usr/lib/$rarch-linux-gnu/libclang-15.so.15 /usr/lib/$rarch-linux-gnu/libclang-15.so.1
# repo versions doesn't work correctly with C++17 # repo versions doesn't work correctly with C++17
# also we push reports to s3, so we add index.html to subfolder urls # also we push reports to s3, so we add index.html to subfolder urls
# https://github.com/ClickHouse-Extras/woboq_codebrowser/commit/37e15eaf377b920acb0b48dbe82471be9203f76b # https://github.com/ClickHouse-Extras/woboq_codebrowser/commit/37e15eaf377b920acb0b48dbe82471be9203f76b
# TODO: remove branch in a few weeks after merge, e.g. in May or June 2022 # TODO: remove branch in a few weeks after merge, e.g. in May or June 2022
RUN git clone https://github.com/ClickHouse-Extras/woboq_codebrowser --branch llvm-14 \ #
# FIXME: update location of a repo
RUN git clone https://github.com/azat/woboq_codebrowser --branch llvm-15 \
&& cd woboq_codebrowser \ && cd woboq_codebrowser \
&& cmake . -DCMAKE_BUILD_TYPE=Release -DCMAKE_CXX_COMPILER=clang\+\+-14 -DCMAKE_C_COMPILER=clang-14 \ && cmake . -G Ninja -DCMAKE_BUILD_TYPE=Release -DCMAKE_CXX_COMPILER=clang\+\+-${LLVM_VERSION} -DCMAKE_C_COMPILER=clang-${LLVM_VERSION} \
&& make -j \ && ninja \
&& cd .. \ && cd .. \
&& rm -rf woboq_codebrowser && rm -rf woboq_codebrowser
@ -32,7 +57,7 @@ ENV SHA=nosha
ENV DATA="https://s3.amazonaws.com/clickhouse-test-reports/codebrowser/data" ENV DATA="https://s3.amazonaws.com/clickhouse-test-reports/codebrowser/data"
CMD mkdir -p $BUILD_DIRECTORY && cd $BUILD_DIRECTORY && \ CMD mkdir -p $BUILD_DIRECTORY && cd $BUILD_DIRECTORY && \
cmake $SOURCE_DIRECTORY -DCMAKE_CXX_COMPILER=/usr/bin/clang\+\+-14 -DCMAKE_C_COMPILER=/usr/bin/clang-14 -DCMAKE_EXPORT_COMPILE_COMMANDS=ON -DENABLE_EMBEDDED_COMPILER=0 -DENABLE_S3=0 && \ cmake $SOURCE_DIRECTORY -DCMAKE_CXX_COMPILER=/usr/bin/clang\+\+-${LLVM_VERSION} -DCMAKE_C_COMPILER=/usr/bin/clang-${LLVM_VERSION} -DCMAKE_EXPORT_COMPILE_COMMANDS=ON -DENABLE_EMBEDDED_COMPILER=0 -DENABLE_S3=0 && \
mkdir -p $HTML_RESULT_DIRECTORY && \ mkdir -p $HTML_RESULT_DIRECTORY && \
$CODEGEN -b $BUILD_DIRECTORY -a -o $HTML_RESULT_DIRECTORY -p ClickHouse:$SOURCE_DIRECTORY:$SHA -d $DATA | ts '%Y-%m-%d %H:%M:%S' && \ $CODEGEN -b $BUILD_DIRECTORY -a -o $HTML_RESULT_DIRECTORY -p ClickHouse:$SOURCE_DIRECTORY:$SHA -d $DATA | ts '%Y-%m-%d %H:%M:%S' && \
cp -r $STATIC_DATA $HTML_RESULT_DIRECTORY/ &&\ cp -r $STATIC_DATA $HTML_RESULT_DIRECTORY/ &&\

View File

@ -19,7 +19,7 @@ stage=${stage:-}
script_dir="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )" script_dir="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )"
echo "$script_dir" echo "$script_dir"
repo_dir=ch repo_dir=ch
BINARY_TO_DOWNLOAD=${BINARY_TO_DOWNLOAD:="clang-14_debug_none_unsplitted_disable_False_binary"} BINARY_TO_DOWNLOAD=${BINARY_TO_DOWNLOAD:="clang-15_debug_none_unsplitted_disable_False_binary"}
BINARY_URL_TO_DOWNLOAD=${BINARY_URL_TO_DOWNLOAD:="https://clickhouse-builds.s3.amazonaws.com/$PR_TO_TEST/$SHA_TO_TEST/clickhouse_build_check/$BINARY_TO_DOWNLOAD/clickhouse"} BINARY_URL_TO_DOWNLOAD=${BINARY_URL_TO_DOWNLOAD:="https://clickhouse-builds.s3.amazonaws.com/$PR_TO_TEST/$SHA_TO_TEST/clickhouse_build_check/$BINARY_TO_DOWNLOAD/clickhouse"}
function clone function clone

View File

@ -2,7 +2,7 @@
set -euo pipefail set -euo pipefail
CLICKHOUSE_PACKAGE=${CLICKHOUSE_PACKAGE:="https://clickhouse-builds.s3.amazonaws.com/$PR_TO_TEST/$SHA_TO_TEST/clickhouse_build_check/clang-14_relwithdebuginfo_none_unsplitted_disable_False_binary/clickhouse"} CLICKHOUSE_PACKAGE=${CLICKHOUSE_PACKAGE:="https://clickhouse-builds.s3.amazonaws.com/$PR_TO_TEST/$SHA_TO_TEST/clickhouse_build_check/clang-15_relwithdebuginfo_none_unsplitted_disable_False_binary/clickhouse"}
CLICKHOUSE_REPO_PATH=${CLICKHOUSE_REPO_PATH:=""} CLICKHOUSE_REPO_PATH=${CLICKHOUSE_REPO_PATH:=""}

View File

@ -61,7 +61,7 @@ function configure
cp -rv right/config left ||: cp -rv right/config left ||:
# Start a temporary server to rename the tables # Start a temporary server to rename the tables
while pkill clickhouse-serv; do echo . ; sleep 1 ; done while pkill -f clickhouse-serv ; do echo . ; sleep 1 ; done
echo all killed echo all killed
set -m # Spawn temporary in its own process groups set -m # Spawn temporary in its own process groups
@ -88,7 +88,7 @@ function configure
clickhouse-client --port $LEFT_SERVER_PORT --query "create database test" ||: clickhouse-client --port $LEFT_SERVER_PORT --query "create database test" ||:
clickhouse-client --port $LEFT_SERVER_PORT --query "rename table datasets.hits_v1 to test.hits" ||: clickhouse-client --port $LEFT_SERVER_PORT --query "rename table datasets.hits_v1 to test.hits" ||:
while pkill clickhouse-serv; do echo . ; sleep 1 ; done while pkill -f clickhouse-serv ; do echo . ; sleep 1 ; done
echo all killed echo all killed
# Make copies of the original db for both servers. Use hardlinks instead # Make copies of the original db for both servers. Use hardlinks instead
@ -106,7 +106,7 @@ function configure
function restart function restart
{ {
while pkill clickhouse-serv; do echo . ; sleep 1 ; done while pkill -f clickhouse-serv ; do echo . ; sleep 1 ; done
echo all killed echo all killed
# Change the jemalloc settings here. # Change the jemalloc settings here.
@ -1400,7 +1400,7 @@ case "$stage" in
while env kill -- -$watchdog_pid ; do sleep 1; done while env kill -- -$watchdog_pid ; do sleep 1; done
# Stop the servers to free memory for the subsequent query analysis. # Stop the servers to free memory for the subsequent query analysis.
while pkill clickhouse-serv; do echo . ; sleep 1 ; done while pkill -f clickhouse-serv ; do echo . ; sleep 1 ; done
echo Servers stopped. echo Servers stopped.
;& ;&
"analyze_queries") "analyze_queries")

View File

@ -5,7 +5,7 @@ FROM ubuntu:20.04
ARG apt_archive="http://archive.ubuntu.com" ARG apt_archive="http://archive.ubuntu.com"
RUN sed -i "s|http://archive.ubuntu.com|$apt_archive|g" /etc/apt/sources.list RUN sed -i "s|http://archive.ubuntu.com|$apt_archive|g" /etc/apt/sources.list
ENV DEBIAN_FRONTEND=noninteractive LLVM_VERSION=14 ENV DEBIAN_FRONTEND=noninteractive LLVM_VERSION=15
RUN apt-get update \ RUN apt-get update \
&& apt-get install \ && apt-get install \
@ -56,6 +56,8 @@ RUN apt-get update \
# This symlink required by gcc to find lld compiler # This symlink required by gcc to find lld compiler
RUN ln -s /usr/bin/lld-${LLVM_VERSION} /usr/bin/ld.lld RUN ln -s /usr/bin/lld-${LLVM_VERSION} /usr/bin/ld.lld
# for external_symbolizer_path
RUN ln -s /usr/bin/llvm-symbolizer-${LLVM_VERSION} /usr/bin/llvm-symbolizer
ARG CCACHE_VERSION=4.6.1 ARG CCACHE_VERSION=4.6.1
RUN mkdir /tmp/ccache \ RUN mkdir /tmp/ccache \

View File

@ -134,6 +134,13 @@ Example of configuration for versions later or equal to 22.8:
<max_size>10000000</max_size> <max_size>10000000</max_size>
</cache> </cache>
</disks> </disks>
<policies>
<volumes>
<main>
<disk>cache</disk>
</main>
</volumes>
<policies>
</storage_configuration> </storage_configuration>
``` ```
@ -151,6 +158,13 @@ Example of configuration for versions earlier than 22.8:
<data_cache_size>10000000</data_cache_size> <data_cache_size>10000000</data_cache_size>
</s3> </s3>
</disks> </disks>
<policies>
<volumes>
<main>
<disk>s3</disk>
</main>
</volumes>
<policies>
</storage_configuration> </storage_configuration>
``` ```

View File

@ -13,7 +13,7 @@ Creates a table from a file. This table function is similar to [url](../../sql-r
**Syntax** **Syntax**
``` sql ``` sql
file(path, format, structure) file(path [,format] [,structure])
``` ```
**Parameters** **Parameters**

View File

@ -11,7 +11,7 @@ Provides table-like interface to select/insert files in [Amazon S3](https://aws.
**Syntax** **Syntax**
``` sql ``` sql
s3(path, [aws_access_key_id, aws_secret_access_key,] format, structure, [compression]) s3(path [,aws_access_key_id, aws_secret_access_key] [,format] [,structure] [,compression])
``` ```
**Arguments** **Arguments**

View File

@ -10,7 +10,7 @@ Allows processing files from [Amazon S3](https://aws.amazon.com/s3/) in parallel
**Syntax** **Syntax**
``` sql ``` sql
s3Cluster(cluster_name, source, [access_key_id, secret_access_key,] format, structure) s3Cluster(cluster_name, source, [,access_key_id, secret_access_key] [,format] [,structure])
``` ```
**Arguments** **Arguments**

View File

@ -13,7 +13,7 @@ sidebar_label: url
**Syntax** **Syntax**
``` sql ``` sql
url(URL, format, structure) url(URL [,format] [,structure])
``` ```
**Parameters** **Parameters**

View File

@ -13,7 +13,7 @@ sidebar_label: file
**Синтаксис** **Синтаксис**
``` sql ``` sql
file(path, format, structure) file(path [,format] [,structure])
``` ```
**Параметры** **Параметры**

View File

@ -11,7 +11,7 @@ sidebar_label: s3
**Синтаксис** **Синтаксис**
``` sql ``` sql
s3(path, [aws_access_key_id, aws_secret_access_key,] format, structure, [compression]) s3(path [,aws_access_key_id, aws_secret_access_key] [,format] [,structure] [,compression])
``` ```
**Aргументы** **Aргументы**

View File

@ -11,7 +11,7 @@ sidebar_label: s3Cluster
**Синтаксис** **Синтаксис**
``` sql ``` sql
s3Cluster(cluster_name, source, [access_key_id, secret_access_key,] format, structure) s3Cluster(cluster_name, source, [,access_key_id, secret_access_key] [,format] [,structure])
``` ```
**Аргументы** **Аргументы**

View File

@ -13,7 +13,7 @@ sidebar_label: url
**Синтаксис** **Синтаксис**
``` sql ``` sql
url(URL, format, structure) url(URL [,format] [,structure])
``` ```
**Параметры** **Параметры**

View File

@ -278,6 +278,71 @@ public:
} }
} }
void addBatchSinglePlace(
size_t row_begin, size_t row_end, AggregateDataPtr __restrict place, const IColumn ** columns, Arena * arena, ssize_t) const final
{
std::unique_ptr<UInt8[]> final_null_flags = std::make_unique<UInt8[]>(row_end);
const size_t filter_column_num = number_of_arguments - 1;
if (is_nullable[filter_column_num])
{
const ColumnNullable * nullable_column = assert_cast<const ColumnNullable *>(columns[filter_column_num]);
const IColumn & filter_column = nullable_column->getNestedColumn();
const UInt8 * filter_null_map = nullable_column->getNullMapColumn().getData().data();
const UInt8 * filter_values = assert_cast<const ColumnUInt8 &>(filter_column).getData().data();
for (size_t i = row_begin; i < row_end; i++)
{
final_null_flags[i] = (null_is_skipped && filter_null_map[i]) || !filter_values[i];
}
}
else
{
const IColumn * filter_column = columns[filter_column_num];
const UInt8 * filter_values = assert_cast<const ColumnUInt8 *>(filter_column)->getData().data();
for (size_t i = row_begin; i < row_end; i++)
final_null_flags[i] = !filter_values[i];
}
const IColumn * nested_columns[number_of_arguments];
for (size_t arg = 0; arg < number_of_arguments; arg++)
{
if (is_nullable[arg])
{
const ColumnNullable & nullable_col = assert_cast<const ColumnNullable &>(*columns[arg]);
if (null_is_skipped && (arg != filter_column_num))
{
const ColumnUInt8 & nullmap_column = nullable_col.getNullMapColumn();
const UInt8 * col_null_map = nullmap_column.getData().data();
for (size_t r = row_begin; r < row_end; r++)
{
final_null_flags[r] |= col_null_map[r];
}
}
nested_columns[arg] = &nullable_col.getNestedColumn();
}
else
nested_columns[arg] = columns[arg];
}
bool at_least_one = false;
for (size_t i = row_begin; i < row_end; i++)
{
if (!final_null_flags[i])
{
at_least_one = true;
break;
}
}
if (at_least_one)
{
this->setFlag(place);
this->nested_function->addBatchSinglePlaceNotNull(
row_begin, row_end, this->nestedPlace(place), nested_columns, final_null_flags.get(), arena, -1);
}
}
#if USE_EMBEDDED_COMPILER #if USE_EMBEDDED_COMPILER
void compileAdd(llvm::IRBuilderBase & builder, llvm::Value * aggregate_data_ptr, const DataTypes & arguments_types, const std::vector<llvm::Value *> & argument_values) const override void compileAdd(llvm::IRBuilderBase & builder, llvm::Value * aggregate_data_ptr, const DataTypes & arguments_types, const std::vector<llvm::Value *> & argument_values) const override

View File

@ -414,6 +414,109 @@ public:
this->nested_function->add(this->nestedPlace(place), nested_columns, row_num, arena); this->nested_function->add(this->nestedPlace(place), nested_columns, row_num, arena);
} }
void addBatchSinglePlace(
size_t row_begin,
size_t row_end,
AggregateDataPtr __restrict place,
const IColumn ** columns,
Arena * arena,
ssize_t if_argument_pos) const final
{
/// We are going to merge all the flags into a single one to be able to call the nested batching functions
std::vector<const UInt8 *> nullable_filters;
const IColumn * nested_columns[number_of_arguments];
std::unique_ptr<UInt8[]> final_flags = nullptr;
const UInt8 * final_flags_ptr = nullptr;
if (if_argument_pos >= 0)
{
final_flags = std::make_unique<UInt8[]>(row_end);
final_flags_ptr = final_flags.get();
bool included_elements = 0;
const auto & flags = assert_cast<const ColumnUInt8 &>(*columns[if_argument_pos]).getData();
for (size_t i = row_begin; i < row_end; i++)
{
final_flags[i] = !flags.data()[i];
included_elements += !!flags.data()[i];
}
if (included_elements == 0)
return;
if (included_elements != (row_end - row_begin))
{
nullable_filters.push_back(final_flags_ptr);
}
}
for (size_t i = 0; i < number_of_arguments; ++i)
{
if (is_nullable[i])
{
const ColumnNullable & nullable_col = assert_cast<const ColumnNullable &>(*columns[i]);
nested_columns[i] = &nullable_col.getNestedColumn();
if constexpr (null_is_skipped)
{
const ColumnUInt8 & nullmap_column = nullable_col.getNullMapColumn();
nullable_filters.push_back(nullmap_column.getData().data());
}
}
else
{
nested_columns[i] = columns[i];
}
}
bool found_one = false;
chassert(nullable_filters.size() > 0); /// We work under the assumption that we reach this because one argument was NULL
if (nullable_filters.size() == 1)
{
/// We can avoid making copies of the only filter but we still need to check that there is data to be added
final_flags_ptr = nullable_filters[0];
for (size_t i = row_begin; i < row_end; i++)
{
if (!final_flags_ptr[i])
{
found_one = true;
break;
}
}
}
else
{
if (!final_flags)
{
final_flags = std::make_unique<UInt8[]>(row_end);
final_flags_ptr = final_flags.get();
}
const size_t filter_start = nullable_filters[0] == final_flags_ptr ? 1 : 0;
for (size_t filter = filter_start; filter < nullable_filters.size(); filter++)
{
for (size_t i = row_begin; i < row_end; i++)
final_flags[i] |= nullable_filters[filter][i];
}
for (size_t i = row_begin; i < row_end; i++)
{
if (!final_flags_ptr[i])
{
found_one = true;
break;
}
}
}
if (!found_one)
return; // Nothing to do and nothing to mark
this->setFlag(place);
this->nested_function->addBatchSinglePlaceNotNull(
row_begin, row_end, this->nestedPlace(place), nested_columns, final_flags_ptr, arena, -1);
}
#if USE_EMBEDDED_COMPILER #if USE_EMBEDDED_COMPILER

View File

@ -424,6 +424,8 @@ public:
alloc(new_size_degree); alloc(new_size_degree);
if (m_size <= 1)
{
for (size_t i = 0; i < m_size; ++i) for (size_t i = 0; i < m_size; ++i)
{ {
HashValue x = 0; HashValue x = 0;
@ -434,6 +436,20 @@ public:
reinsertImpl(x); reinsertImpl(x);
} }
} }
else
{
auto hs = std::make_unique<HashValue[]>(m_size);
rb.readStrict(reinterpret_cast<char *>(hs.get()), m_size * sizeof(HashValue));
for (size_t i = 0; i < m_size; ++i)
{
if (hs[i] == 0)
has_zero = true;
else
reinsertImpl(hs[i]);
}
}
}
void readAndMerge(DB::ReadBuffer & rb) void readAndMerge(DB::ReadBuffer & rb)
{ {
@ -458,6 +474,8 @@ public:
resize(new_size_degree); resize(new_size_degree);
} }
if (rhs_size <= 1)
{
for (size_t i = 0; i < rhs_size; ++i) for (size_t i = 0; i < rhs_size; ++i)
{ {
HashValue x = 0; HashValue x = 0;
@ -465,6 +483,17 @@ public:
insertHash(x); insertHash(x);
} }
} }
else
{
auto hs = std::make_unique<HashValue[]>(rhs_size);
rb.readStrict(reinterpret_cast<char *>(hs.get()), rhs_size * sizeof(HashValue));
for (size_t i = 0; i < rhs_size; ++i)
{
insertHash(hs[i]);
}
}
}
static void skip(DB::ReadBuffer & rb) static void skip(DB::ReadBuffer & rb)
{ {

View File

@ -722,5 +722,3 @@ public:
return res; return res;
} }
}; };
template <typename Key, typename Payload> constexpr size_t ArrayCache<Key, Payload>::min_chunk_size;

View File

@ -135,7 +135,7 @@ MemoryStatisticsOS::Data MemoryStatisticsOS::get() const
struct kinfo_proc kp; struct kinfo_proc kp;
size_t len = sizeof(struct kinfo_proc); size_t len = sizeof(struct kinfo_proc);
if (-1 == ::sysctl(mib, 4, &kp, &len, NULL, 0)) if (-1 == ::sysctl(mib, 4, &kp, &len, nullptr, 0))
throwFromErrno("Cannot sysctl(kern.proc.pid." + std::to_string(self) + ")", ErrorCodes::SYSTEM_ERROR); throwFromErrno("Cannot sysctl(kern.proc.pid." + std::to_string(self) + ")", ErrorCodes::SYSTEM_ERROR);
if (sizeof(struct kinfo_proc) != len) if (sizeof(struct kinfo_proc) != len)

View File

@ -189,6 +189,9 @@ KeeperConfigurationAndSettings::loadFromConfig(const Poco::Util::AbstractConfigu
ret->coordination_settings->loadFromConfig("keeper_server.coordination_settings", config); ret->coordination_settings->loadFromConfig("keeper_server.coordination_settings", config);
if (ret->coordination_settings->quorum_reads)
LOG_WARNING(&Poco::Logger::get("KeeperConfigurationAndSettings"), "Setting 'quorum_reads' is deprecated. Please use 'read_mode'");
return ret; return ret;
} }

View File

@ -26,6 +26,7 @@ struct Settings;
M(Milliseconds, heart_beat_interval_ms, 500, "Heartbeat interval between quorum nodes", 0) \ M(Milliseconds, heart_beat_interval_ms, 500, "Heartbeat interval between quorum nodes", 0) \
M(Milliseconds, election_timeout_lower_bound_ms, 1000, "Lower bound of election timer (avoid too often leader elections)", 0) \ M(Milliseconds, election_timeout_lower_bound_ms, 1000, "Lower bound of election timer (avoid too often leader elections)", 0) \
M(Milliseconds, election_timeout_upper_bound_ms, 2000, "Upper bound of election timer (avoid too often leader elections)", 0) \ M(Milliseconds, election_timeout_upper_bound_ms, 2000, "Upper bound of election timer (avoid too often leader elections)", 0) \
M(Milliseconds, leadership_expiry, 0, "How often will leader node check if it still has majority. Set it lower or equal to election_timeout_lower_bound_ms to have linearizable reads.", 0) \
M(UInt64, reserved_log_items, 100000, "How many log items to store (don't remove during compaction)", 0) \ M(UInt64, reserved_log_items, 100000, "How many log items to store (don't remove during compaction)", 0) \
M(UInt64, snapshot_distance, 100000, "How many log items we have to collect to write new snapshot", 0) \ M(UInt64, snapshot_distance, 100000, "How many log items we have to collect to write new snapshot", 0) \
M(Bool, auto_forwarding, true, "Allow to forward write requests from followers to leader", 0) \ M(Bool, auto_forwarding, true, "Allow to forward write requests from followers to leader", 0) \
@ -38,11 +39,12 @@ struct Settings;
M(UInt64, stale_log_gap, 10000, "When node became stale and should receive snapshots from leader", 0) \ M(UInt64, stale_log_gap, 10000, "When node became stale and should receive snapshots from leader", 0) \
M(UInt64, fresh_log_gap, 200, "When node became fresh", 0) \ M(UInt64, fresh_log_gap, 200, "When node became fresh", 0) \
M(UInt64, max_requests_batch_size, 100, "Max size of batch in requests count before it will be sent to RAFT", 0) \ M(UInt64, max_requests_batch_size, 100, "Max size of batch in requests count before it will be sent to RAFT", 0) \
M(Bool, quorum_reads, false, "Execute read requests as writes through whole RAFT consesus with similar speed", 0) \ M(Bool, quorum_reads, false, "Deprecated - use read_mode. Execute read requests as writes through whole RAFT consesus with similar speed", 0) \
M(Bool, force_sync, true, "Call fsync on each change in RAFT changelog", 0) \ M(Bool, force_sync, true, "Call fsync on each change in RAFT changelog", 0) \
M(Bool, compress_logs, true, "Write compressed coordination logs in ZSTD format", 0) \ M(Bool, compress_logs, true, "Write compressed coordination logs in ZSTD format", 0) \
M(Bool, compress_snapshots_with_zstd_format, true, "Write compressed snapshots in ZSTD format (instead of custom LZ4)", 0) \ M(Bool, compress_snapshots_with_zstd_format, true, "Write compressed snapshots in ZSTD format (instead of custom LZ4)", 0) \
M(UInt64, configuration_change_tries_count, 20, "How many times we will try to apply configuration change (add/remove server) to the cluster", 0) M(UInt64, configuration_change_tries_count, 20, "How many times we will try to apply configuration change (add/remove server) to the cluster", 0) \
M(String, read_mode, "nonlinear", "How should reads be processed. Valid values: 'nonlinear', 'fastlinear', 'quorum'. 'nonlinear' is the fastest option because there are no consistency requirements", 0)
DECLARE_SETTINGS_TRAITS(CoordinationSettingsTraits, LIST_OF_COORDINATION_SETTINGS) DECLARE_SETTINGS_TRAITS(CoordinationSettingsTraits, LIST_OF_COORDINATION_SETTINGS)

View File

@ -1,4 +1,5 @@
#include <Coordination/KeeperDispatcher.h> #include <Coordination/KeeperDispatcher.h>
#include <libnuraft/async.hxx>
#include <Common/setThreadName.h> #include <Common/setThreadName.h>
#include <Common/ZooKeeper/KeeperException.h> #include <Common/ZooKeeper/KeeperException.h>
#include <future> #include <future>
@ -6,6 +7,8 @@
#include <Poco/Path.h> #include <Poco/Path.h>
#include <Common/hex.h> #include <Common/hex.h>
#include <filesystem> #include <filesystem>
#include <iterator>
#include <limits>
#include <Common/checkStackSize.h> #include <Common/checkStackSize.h>
#include <Common/CurrentMetrics.h> #include <Common/CurrentMetrics.h>
@ -30,22 +33,83 @@ namespace ErrorCodes
KeeperDispatcher::KeeperDispatcher() KeeperDispatcher::KeeperDispatcher()
: responses_queue(std::numeric_limits<size_t>::max()) : responses_queue(std::numeric_limits<size_t>::max())
, read_requests_queue(std::numeric_limits<size_t>::max())
, finalize_requests_queue(std::numeric_limits<size_t>::max())
, configuration_and_settings(std::make_shared<KeeperConfigurationAndSettings>()) , configuration_and_settings(std::make_shared<KeeperConfigurationAndSettings>())
, log(&Poco::Logger::get("KeeperDispatcher")) , log(&Poco::Logger::get("KeeperDispatcher"))
{ {
} }
/// ZooKeepers has 2 requirements:
/// - writes need to be linearizable
/// - all requests from single session need to be processed in the order of their arrival
///
/// Because of that, we cannot process read and write requests from SAME session at the same time.
/// To be able to process read and write requests in parallel we need to make sure that only 1 type
/// of request is being processed from a single session.
/// Multiple types from different sessions can be processed at the same time.
///
/// We do some in-session housekeeping to make sure that the multithreaded request processing is correct.
/// When a request is received from a client, we check if there are requests being processed from that same
/// session, and if yes, of what type. If the types are the same, and there are no requests of different
/// type inbetetween, we can instanly add it to active request queue. Otherwise, we need to wait until
/// all requests of the other type are processed.
///
/// There are multiple threads used for processing the request, each of them communicating with a queue.
/// Assumption: only one type of request is being processed from a same session at any point in time (read or write).
///
/// requestThread -> requests currently being processed
/// readRequestThread -> thread for processing read requests
/// finalizeRequestThread -> thread for finalizing requests:
/// - in-session housekeeping, add requests to the active request queue if there are any
///
/// If reads are linearizable without quorum, a request can possibly wait for a certain log to be committed.
/// In that case we add it to the waiting queue for that log.
/// When that log is committed, the committing thread will send that read request to readRequestThread so it can be processed.
///
void KeeperDispatcher::requestThread() void KeeperDispatcher::requestThread()
{ {
setThreadName("KeeperReqT"); setThreadName("KeeperReqT");
/// Result of requests batch from previous iteration /// Result of requests batch from previous iteration
RaftAppendResult prev_result = nullptr; RaftResult prev_result = nullptr;
/// Requests from previous iteration. We store them to be able const auto previous_quorum_done = [&] { return !prev_result || prev_result->has_result() || prev_result->get_result_code() != nuraft::cmd_result_code::OK; };
/// to send errors to the client.
KeeperStorage::RequestsForSessions prev_batch;
const auto needs_quorum = [](const auto & coordination_settings, const auto & request)
{
return coordination_settings->quorum_reads || coordination_settings->read_mode.toString() == "quorum" || !request.request->isReadRequest();
};
KeeperStorage::RequestsForSessions quorum_requests;
KeeperStorage::RequestsForSessions read_requests;
auto process_quorum_requests = [&, this]() mutable
{
/// Forcefully process all previous pending requests
if (prev_result)
forceWaitAndProcessResult(prev_result);
prev_result = server->putRequestBatch(quorum_requests);
if (prev_result)
{
prev_result->when_ready([&, requests_for_sessions = std::move(quorum_requests)](nuraft::cmd_result<nuraft::ptr<nuraft::buffer>> & result, nuraft::ptr<std::exception> &) mutable
{
if (!result.get_accepted() || result.get_result_code() == nuraft::cmd_result_code::TIMEOUT)
addErrorResponses(requests_for_sessions, Coordination::Error::ZOPERATIONTIMEOUT);
else if (result.get_result_code() != nuraft::cmd_result_code::OK)
addErrorResponses(requests_for_sessions, Coordination::Error::ZCONNECTIONLOSS);
});
}
quorum_requests.clear();
};
/// ZooKeeper requires that the requests inside a single session are processed in a strict order
/// (we cannot process later requests before all the previous once are processed)
/// By making sure that at this point we can either have just read requests or just write requests
/// from a single session, we can process them independently
while (!shutdown_called) while (!shutdown_called)
{ {
KeeperStorage::RequestForSession request; KeeperStorage::RequestForSession request;
@ -54,94 +118,67 @@ void KeeperDispatcher::requestThread()
uint64_t max_wait = coordination_settings->operation_timeout_ms.totalMilliseconds(); uint64_t max_wait = coordination_settings->operation_timeout_ms.totalMilliseconds();
uint64_t max_batch_size = coordination_settings->max_requests_batch_size; uint64_t max_batch_size = coordination_settings->max_requests_batch_size;
/// The code below do a very simple thing: batch all write (quorum) requests into vector until
/// previous write batch is not finished or max_batch size achieved. The main complexity goes from
/// the ability to process read requests without quorum (from local state). So when we are collecting
/// requests into a batch we must check that the new request is not read request. Otherwise we have to
/// process all already accumulated write requests, wait them synchronously and only after that process
/// read request. So reads are some kind of "separator" for writes.
try try
{ {
if (requests_queue->tryPop(request, max_wait)) if (active_requests_queue->tryPop(request, max_wait))
{ {
CurrentMetrics::sub(CurrentMetrics::KeeperOutstandingRequets); CurrentMetrics::sub(CurrentMetrics::KeeperOutstandingRequets);
if (shutdown_called) if (shutdown_called)
break; break;
KeeperStorage::RequestsForSessions current_batch; if (needs_quorum(coordination_settings, request))
quorum_requests.emplace_back(request);
bool has_read_request = false; else
read_requests.emplace_back(request);
/// If new request is not read request or we must to process it through quorum.
/// Otherwise we will process it locally.
if (coordination_settings->quorum_reads || !request.request->isReadRequest())
{
current_batch.emplace_back(request);
/// Waiting until previous append will be successful, or batch is big enough /// Waiting until previous append will be successful, or batch is big enough
/// has_result == false && get_result_code == OK means that our request still not processed. /// has_result == false && get_result_code == OK means that our request still not processed.
/// Sometimes NuRaft set errorcode without setting result, so we check both here. /// Sometimes NuRaft set errorcode without setting result, so we check both here.
while (prev_result && (!prev_result->has_result() && prev_result->get_result_code() == nuraft::cmd_result_code::OK) && current_batch.size() <= max_batch_size) while (true)
{ {
if (quorum_requests.size() > max_batch_size)
break;
if (read_requests.size() > max_batch_size)
{
processReadRequests(coordination_settings, read_requests);
if (previous_quorum_done())
break;
}
/// Trying to get batch requests as fast as possible /// Trying to get batch requests as fast as possible
if (requests_queue->tryPop(request, 1)) if (active_requests_queue->tryPop(request, 1))
{ {
CurrentMetrics::sub(CurrentMetrics::KeeperOutstandingRequets); CurrentMetrics::sub(CurrentMetrics::KeeperOutstandingRequets);
/// Don't append read request into batch, we have to process them separately if (needs_quorum(coordination_settings, request))
if (!coordination_settings->quorum_reads && request.request->isReadRequest()) quorum_requests.emplace_back(request);
{ else
has_read_request = true; read_requests.emplace_back(request);
break;
} }
else else
{ {
/// batch of read requests can send at most one request
/// so we don't care if the previous batch hasn't received response
if (!read_requests.empty())
processReadRequests(coordination_settings, read_requests);
current_batch.emplace_back(request); /// if we still didn't process previous batch we can
} /// increase are current batch even more
if (previous_quorum_done())
break;
} }
if (shutdown_called) if (shutdown_called)
break; break;
} }
}
else
has_read_request = true;
if (shutdown_called) if (shutdown_called)
break; break;
/// Forcefully process all previous pending requests if (!quorum_requests.empty())
if (prev_result) process_quorum_requests();
forceWaitAndProcessResult(prev_result, prev_batch);
/// Process collected write requests batch
if (!current_batch.empty())
{
auto result = server->putRequestBatch(current_batch);
if (result)
{
if (has_read_request) /// If we will execute read request next, than we have to process result now
forceWaitAndProcessResult(result, current_batch);
}
else
{
addErrorResponses(current_batch, Coordination::Error::ZCONNECTIONLOSS);
current_batch.clear();
}
prev_batch = std::move(current_batch);
prev_result = result;
}
/// Read request always goes after write batch (last request)
if (has_read_request)
{
if (server->isLeaderAlive())
server->putLocalReadRequest(request);
else
addErrorResponses({request}, Coordination::Error::ZCONNECTIONLOSS);
}
} }
} }
catch (...) catch (...)
@ -151,6 +188,72 @@ void KeeperDispatcher::requestThread()
} }
} }
void KeeperDispatcher::processReadRequests(const CoordinationSettingsPtr & coordination_settings, KeeperStorage::RequestsForSessions & read_requests)
{
if (coordination_settings->read_mode.toString() == "fastlinear")
{
// we just want to know what's the current latest committed log on Leader node
auto leader_info_result = server->getLeaderInfo();
if (leader_info_result)
{
leader_info_result->when_ready([&, requests_for_sessions = std::move(read_requests)](nuraft::cmd_result<nuraft::ptr<nuraft::buffer>> & result, nuraft::ptr<std::exception> & exception) mutable
{
if (!result.get_accepted() || result.get_result_code() == nuraft::cmd_result_code::TIMEOUT)
{
addErrorResponses(requests_for_sessions, Coordination::Error::ZOPERATIONTIMEOUT);
return;
}
if (result.get_result_code() != nuraft::cmd_result_code::OK)
{
addErrorResponses(requests_for_sessions, Coordination::Error::ZCONNECTIONLOSS);
return;
}
if (exception)
{
LOG_INFO(log, "Got exception while waiting for read results {}", exception->what());
addErrorResponses(requests_for_sessions, Coordination::Error::ZCONNECTIONLOSS);
return;
}
auto & leader_info_ctx = result.get();
if (!leader_info_ctx)
{
addErrorResponses(requests_for_sessions, Coordination::Error::ZCONNECTIONLOSS);
return;
}
KeeperServer::NodeInfo leader_info;
leader_info.term = leader_info_ctx->get_ulong();
leader_info.last_committed_index = leader_info_ctx->get_ulong();
std::lock_guard lock(leader_waiter_mutex);
auto node_info = server->getNodeInfo();
/// we're behind, we need to wait
if (node_info.term < leader_info.term || node_info.last_committed_index < leader_info.last_committed_index)
{
auto & leader_waiter = leader_waiters[leader_info];
leader_waiter.insert(leader_waiter.end(), requests_for_sessions.begin(), requests_for_sessions.end());
LOG_TRACE(log, "waiting for term {}, idx {}", leader_info.term, leader_info.last_committed_index);
}
/// process it in background thread
else if (!read_requests_queue.push(std::move(requests_for_sessions)))
throw Exception(ErrorCodes::SYSTEM_ERROR, "Cannot push read requests to queue");
});
}
}
else
{
assert(coordination_settings->read_mode.toString() == "nonlinear");
if (!read_requests_queue.push(std::move(read_requests)))
throw Exception(ErrorCodes::SYSTEM_ERROR, "Cannot push read requests to queue");
}
read_requests.clear();
}
void KeeperDispatcher::responseThread() void KeeperDispatcher::responseThread()
{ {
setThreadName("KeeperRspT"); setThreadName("KeeperRspT");
@ -200,6 +303,65 @@ void KeeperDispatcher::snapshotThread()
} }
} }
/// Background thread for processing read requests
void KeeperDispatcher::readRequestThread()
{
setThreadName("KeeperReadT");
while (!shutdown_called)
{
KeeperStorage::RequestsForSessions requests;
if (!read_requests_queue.pop(requests))
break;
if (shutdown_called)
break;
try
{
for (const auto & request_info : requests)
{
if (server->isLeaderAlive())
server->putLocalReadRequest(request_info);
else
addErrorResponses({request_info}, Coordination::Error::ZCONNECTIONLOSS);
}
if (!finalize_requests_queue.push(std::move(requests)))
throw Exception(ErrorCodes::SYSTEM_ERROR, "Cannot push read requests to queue");
}
catch (...)
{
tryLogCurrentException(__PRETTY_FUNCTION__);
}
}
}
/// We finalize requests every time we commit a single log with request
/// or process a batch of read requests.
/// Because it can get heavy, we do it in background thread.
void KeeperDispatcher::finalizeRequestsThread()
{
setThreadName("KeeperFinalT");
while (!shutdown_called)
{
KeeperStorage::RequestsForSessions requests;
if (!finalize_requests_queue.pop(requests))
break;
if (shutdown_called)
break;
try
{
finalizeRequests(requests);
}
catch (...)
{
tryLogCurrentException(__PRETTY_FUNCTION__);
}
}
}
void KeeperDispatcher::setResponse(int64_t session_id, const Coordination::ZooKeeperResponsePtr & response) void KeeperDispatcher::setResponse(int64_t session_id, const Coordination::ZooKeeperResponsePtr & response)
{ {
std::lock_guard lock(session_to_response_callback_mutex); std::lock_guard lock(session_to_response_callback_mutex);
@ -255,6 +417,30 @@ bool KeeperDispatcher::putRequest(const Coordination::ZooKeeperRequestPtr & requ
request_info.time = duration_cast<milliseconds>(system_clock::now().time_since_epoch()).count(); request_info.time = duration_cast<milliseconds>(system_clock::now().time_since_epoch()).count();
request_info.session_id = session_id; request_info.session_id = session_id;
{
std::lock_guard lock{unprocessed_request_mutex};
auto unprocessed_requests_it = unprocessed_requests_for_session.find(session_id);
if (unprocessed_requests_it == unprocessed_requests_for_session.end())
{
auto & unprocessed_requests = unprocessed_requests_for_session[session_id];
unprocessed_requests.unprocessed_num = 1;
unprocessed_requests.is_read = request->isReadRequest();
}
else
{
auto & unprocessed_requests = unprocessed_requests_it->second;
/// queue is not empty, or the request types don't match, put it in the waiting queue
if (!unprocessed_requests.request_queue.empty() || unprocessed_requests.is_read != request->isReadRequest())
{
unprocessed_requests.request_queue.push_back(std::move(request_info));
return true;
}
++unprocessed_requests.unprocessed_num;
}
}
std::lock_guard lock(push_request_mutex); std::lock_guard lock(push_request_mutex);
if (shutdown_called) if (shutdown_called)
@ -263,10 +449,10 @@ bool KeeperDispatcher::putRequest(const Coordination::ZooKeeperRequestPtr & requ
/// Put close requests without timeouts /// Put close requests without timeouts
if (request->getOpNum() == Coordination::OpNum::Close) if (request->getOpNum() == Coordination::OpNum::Close)
{ {
if (!requests_queue->push(std::move(request_info))) if (!active_requests_queue->push(std::move(request_info)))
throw Exception("Cannot push request to queue", ErrorCodes::SYSTEM_ERROR); throw Exception("Cannot push request to queue", ErrorCodes::SYSTEM_ERROR);
} }
else if (!requests_queue->tryPush(std::move(request_info), configuration_and_settings->coordination_settings->operation_timeout_ms.totalMilliseconds())) else if (!active_requests_queue->tryPush(std::move(request_info), configuration_and_settings->coordination_settings->operation_timeout_ms.totalMilliseconds()))
{ {
throw Exception("Cannot push request to queue within operation timeout", ErrorCodes::TIMEOUT_EXCEEDED); throw Exception("Cannot push request to queue within operation timeout", ErrorCodes::TIMEOUT_EXCEEDED);
} }
@ -279,13 +465,23 @@ void KeeperDispatcher::initialize(const Poco::Util::AbstractConfiguration & conf
LOG_DEBUG(log, "Initializing storage dispatcher"); LOG_DEBUG(log, "Initializing storage dispatcher");
configuration_and_settings = KeeperConfigurationAndSettings::loadFromConfig(config, standalone_keeper); configuration_and_settings = KeeperConfigurationAndSettings::loadFromConfig(config, standalone_keeper);
requests_queue = std::make_unique<RequestsQueue>(configuration_and_settings->coordination_settings->max_requests_batch_size); active_requests_queue = std::make_unique<RequestsQueue>(configuration_and_settings->coordination_settings->max_requests_batch_size);
request_thread = ThreadFromGlobalPool([this] { requestThread(); }); request_thread = ThreadFromGlobalPool([this] { requestThread(); });
responses_thread = ThreadFromGlobalPool([this] { responseThread(); }); responses_thread = ThreadFromGlobalPool([this] { responseThread(); });
snapshot_thread = ThreadFromGlobalPool([this] { snapshotThread(); }); snapshot_thread = ThreadFromGlobalPool([this] { snapshotThread(); });
read_request_thread = ThreadFromGlobalPool([this] { readRequestThread(); });
finalize_requests_thread = ThreadFromGlobalPool([this] { finalizeRequestsThread(); });
server = std::make_unique<KeeperServer>(configuration_and_settings, config, responses_queue, snapshots_queue); server = std::make_unique<KeeperServer>(
configuration_and_settings,
config,
responses_queue,
snapshots_queue,
[this](const KeeperStorage::RequestForSession & request_for_session, uint64_t log_term, uint64_t log_idx)
{ onRequestCommit(request_for_session, log_term, log_idx); },
[this](uint64_t term, uint64_t last_idx)
{ onApplySnapshot(term, last_idx); });
try try
{ {
@ -333,9 +529,9 @@ void KeeperDispatcher::shutdown()
if (session_cleaner_thread.joinable()) if (session_cleaner_thread.joinable())
session_cleaner_thread.join(); session_cleaner_thread.join();
if (requests_queue) if (active_requests_queue)
{ {
requests_queue->finish(); active_requests_queue->finish();
if (request_thread.joinable()) if (request_thread.joinable())
request_thread.join(); request_thread.join();
@ -349,6 +545,14 @@ void KeeperDispatcher::shutdown()
if (snapshot_thread.joinable()) if (snapshot_thread.joinable())
snapshot_thread.join(); snapshot_thread.join();
read_requests_queue.finish();
if (read_request_thread.joinable())
read_request_thread.join();
finalize_requests_queue.finish();
if (finalize_requests_thread.joinable())
finalize_requests_thread.join();
update_configuration_queue.finish(); update_configuration_queue.finish();
if (update_configuration_thread.joinable()) if (update_configuration_thread.joinable())
update_configuration_thread.join(); update_configuration_thread.join();
@ -357,7 +561,7 @@ void KeeperDispatcher::shutdown()
KeeperStorage::RequestForSession request_for_session; KeeperStorage::RequestForSession request_for_session;
/// Set session expired for all pending requests /// Set session expired for all pending requests
while (requests_queue && requests_queue->tryPop(request_for_session)) while (active_requests_queue && active_requests_queue->tryPop(request_for_session))
{ {
CurrentMetrics::sub(CurrentMetrics::KeeperOutstandingRequets); CurrentMetrics::sub(CurrentMetrics::KeeperOutstandingRequets);
auto response = request_for_session.request->makeResponse(); auto response = request_for_session.request->makeResponse();
@ -474,7 +678,7 @@ void KeeperDispatcher::sessionCleanerTask()
}; };
{ {
std::lock_guard lock(push_request_mutex); std::lock_guard lock(push_request_mutex);
if (!requests_queue->push(std::move(request_info))) if (!active_requests_queue->push(std::move(request_info)))
LOG_INFO(log, "Cannot push close request to queue while cleaning outdated sessions"); LOG_INFO(log, "Cannot push close request to queue while cleaning outdated sessions");
CurrentMetrics::add(CurrentMetrics::KeeperOutstandingRequets); CurrentMetrics::add(CurrentMetrics::KeeperOutstandingRequets);
} }
@ -524,19 +728,12 @@ void KeeperDispatcher::addErrorResponses(const KeeperStorage::RequestsForSession
} }
} }
void KeeperDispatcher::forceWaitAndProcessResult(RaftAppendResult & result, KeeperStorage::RequestsForSessions & requests_for_sessions) void KeeperDispatcher::forceWaitAndProcessResult(RaftResult & result)
{ {
if (!result->has_result()) if (!result->has_result())
result->get(); result->get();
/// If we get some errors, than send them to clients
if (!result->get_accepted() || result->get_result_code() == nuraft::cmd_result_code::TIMEOUT)
addErrorResponses(requests_for_sessions, Coordination::Error::ZOPERATIONTIMEOUT);
else if (result->get_result_code() != nuraft::cmd_result_code::OK)
addErrorResponses(requests_for_sessions, Coordination::Error::ZCONNECTIONLOSS);
result = nullptr; result = nullptr;
requests_for_sessions.clear();
} }
int64_t KeeperDispatcher::getSessionID(int64_t session_timeout_ms) int64_t KeeperDispatcher::getSessionID(int64_t session_timeout_ms)
@ -584,7 +781,7 @@ int64_t KeeperDispatcher::getSessionID(int64_t session_timeout_ms)
/// Push new session request to queue /// Push new session request to queue
{ {
std::lock_guard lock(push_request_mutex); std::lock_guard lock(push_request_mutex);
if (!requests_queue->tryPush(std::move(request_info), session_timeout_ms)) if (!active_requests_queue->tryPush(std::move(request_info), session_timeout_ms))
throw Exception("Cannot push session id request to queue within session timeout", ErrorCodes::TIMEOUT_EXCEEDED); throw Exception("Cannot push session id request to queue within session timeout", ErrorCodes::TIMEOUT_EXCEEDED);
CurrentMetrics::add(CurrentMetrics::KeeperOutstandingRequets); CurrentMetrics::add(CurrentMetrics::KeeperOutstandingRequets);
} }
@ -657,6 +854,122 @@ void KeeperDispatcher::updateConfigurationThread()
} }
} }
// Used to update the state for a session based on the requests
// - update the number of current unprocessed requests for the session
// - if the number of unprocessed requests is 0, we can start adding next type of requests
// from unprocessed requests queue to the active queue
void KeeperDispatcher::finalizeRequests(const KeeperStorage::RequestsForSessions & requests_for_sessions)
{
std::unordered_map<int64_t, size_t> counts_for_session;
for (const auto & request_for_session : requests_for_sessions)
{
++counts_for_session[request_for_session.session_id];
}
std::lock_guard lock{unprocessed_request_mutex};
for (const auto [session_id, count] : counts_for_session)
{
auto unprocessed_requests_it = unprocessed_requests_for_session.find(session_id);
if (unprocessed_requests_it == unprocessed_requests_for_session.end())
continue;
auto & unprocessed_requests = unprocessed_requests_it->second;
unprocessed_requests.unprocessed_num -= count;
if (unprocessed_requests.unprocessed_num == 0)
{
if (!unprocessed_requests.request_queue.empty())
{
auto & unprocessed_requests_queue = unprocessed_requests.request_queue;
unprocessed_requests.is_read = !unprocessed_requests.is_read;
// start adding next type of requests
while (!unprocessed_requests_queue.empty() && unprocessed_requests_queue.front().request->isReadRequest() == unprocessed_requests.is_read)
{
auto & front_request = unprocessed_requests_queue.front();
/// Put close requests without timeouts
if (front_request.request->getOpNum() == Coordination::OpNum::Close)
{
if (!active_requests_queue->push(std::move(front_request)))
throw Exception("Cannot push request to queue", ErrorCodes::SYSTEM_ERROR);
}
else if (!active_requests_queue->tryPush(std::move(front_request), configuration_and_settings->coordination_settings->operation_timeout_ms.totalMilliseconds()))
{
throw Exception("Cannot push request to queue within operation timeout", ErrorCodes::TIMEOUT_EXCEEDED);
}
++unprocessed_requests.unprocessed_num;
unprocessed_requests_queue.pop_front();
}
}
else
{
unprocessed_requests_for_session.erase(unprocessed_requests_it);
}
}
}
}
// Finalize request
// Process read requests that were waiting for this commit
void KeeperDispatcher::onRequestCommit(const KeeperStorage::RequestForSession & request_for_session, uint64_t log_term, uint64_t log_idx)
{
if (!finalize_requests_queue.push({request_for_session}))
throw Exception(ErrorCodes::SYSTEM_ERROR, "Cannot push read requests to queue");
KeeperStorage::RequestsForSessions requests;
{
std::lock_guard lock(leader_waiter_mutex);
auto request_queue_it = leader_waiters.find(KeeperServer::NodeInfo{.term = log_term, .last_committed_index = log_idx});
if (request_queue_it != leader_waiters.end())
{
requests = std::move(request_queue_it->second);
leader_waiters.erase(request_queue_it);
}
}
if (requests.empty())
return;
if (!read_requests_queue.push(std::move(requests)))
throw Exception(ErrorCodes::SYSTEM_ERROR, "Cannot push read requests to queue");
}
/// Process all read request that are waiting for lower or currently last processed log index
void KeeperDispatcher::onApplySnapshot(uint64_t term, uint64_t last_idx)
{
KeeperServer::NodeInfo current_node_info{term, last_idx};
KeeperStorage::RequestsForSessions requests;
{
std::lock_guard lock(leader_waiter_mutex);
for (auto leader_waiter_it = leader_waiters.begin(); leader_waiter_it != leader_waiters.end();)
{
auto waiting_node_info = leader_waiter_it->first;
if (waiting_node_info.term <= current_node_info.term
&& waiting_node_info.last_committed_index <= current_node_info.last_committed_index)
{
for (auto & request : leader_waiter_it->second)
{
requests.push_back(std::move(request));
}
leader_waiter_it = leader_waiters.erase(leader_waiter_it);
}
else
{
++leader_waiter_it;
}
}
}
if (requests.empty())
return;
if (!read_requests_queue.push(std::move(requests)))
throw Exception(ErrorCodes::SYSTEM_ERROR, "Cannot push read requests to queue");
}
bool KeeperDispatcher::isServerActive() const bool KeeperDispatcher::isServerActive() const
{ {
return checkInit() && hasLeader() && !server->isRecovering(); return checkInit() && hasLeader() && !server->isRecovering();
@ -721,7 +1034,7 @@ Keeper4LWInfo KeeperDispatcher::getKeeper4LWInfo() const
Keeper4LWInfo result = server->getPartiallyFilled4LWInfo(); Keeper4LWInfo result = server->getPartiallyFilled4LWInfo();
{ {
std::lock_guard lock(push_request_mutex); std::lock_guard lock(push_request_mutex);
result.outstanding_requests_count = requests_queue->size(); result.outstanding_requests_count = active_requests_queue->size();
} }
{ {
std::lock_guard lock(session_to_response_callback_mutex); std::lock_guard lock(session_to_response_callback_mutex);

View File

@ -32,9 +32,12 @@ private:
using UpdateConfigurationQueue = ConcurrentBoundedQueue<ConfigUpdateAction>; using UpdateConfigurationQueue = ConcurrentBoundedQueue<ConfigUpdateAction>;
/// Size depends on coordination settings /// Size depends on coordination settings
std::unique_ptr<RequestsQueue> requests_queue; /// Request currently being processed
std::unique_ptr<RequestsQueue> active_requests_queue;
ResponsesQueue responses_queue; ResponsesQueue responses_queue;
SnapshotsQueue snapshots_queue{1}; SnapshotsQueue snapshots_queue{1};
ConcurrentBoundedQueue<KeeperStorage::RequestsForSessions> read_requests_queue;
ConcurrentBoundedQueue<KeeperStorage::RequestsForSessions> finalize_requests_queue;
/// More than 1k updates is definitely misconfiguration. /// More than 1k updates is definitely misconfiguration.
UpdateConfigurationQueue update_configuration_queue{1000}; UpdateConfigurationQueue update_configuration_queue{1000};
@ -64,6 +67,8 @@ private:
ThreadFromGlobalPool snapshot_thread; ThreadFromGlobalPool snapshot_thread;
/// Apply or wait for configuration changes /// Apply or wait for configuration changes
ThreadFromGlobalPool update_configuration_thread; ThreadFromGlobalPool update_configuration_thread;
ThreadFromGlobalPool read_request_thread;
ThreadFromGlobalPool finalize_requests_thread;
/// RAFT wrapper. /// RAFT wrapper.
std::unique_ptr<KeeperServer> server; std::unique_ptr<KeeperServer> server;
@ -77,6 +82,34 @@ private:
/// Counter for new session_id requests. /// Counter for new session_id requests.
std::atomic<int64_t> internal_session_id_counter{0}; std::atomic<int64_t> internal_session_id_counter{0};
/// A read request needs to have at least the log it was the last committed log on the leader
/// at the time the request was being made.
/// If the node is stale, we need to wait to commit that log before doing local read requests to achieve
/// linearizability.
std::unordered_map<KeeperServer::NodeInfo, KeeperStorage::RequestsForSessions> leader_waiters;
std::mutex leader_waiter_mutex;
/// We can be actively processing one type of requests (either read or write) from a single session.
/// If we receive a request of a type that is not currently being processed, we put it in the waiting queue.
/// Also, we want to process them in ariving order, so if we have a different type in the queue, we cannot process that request
/// but wait for all the previous requests to finish.
/// E.g. READ -> WRITE -> READ, the last READ will go to the waiting queue even though we are currently processing the first READ
/// because we have WRITE request before it that needs to be processed.
struct UnprocessedRequests
{
/// how many requests are currently in the active request queue
size_t unprocessed_num{0};
/// is_read currently being processed
bool is_read{false};
std::list<KeeperStorage::RequestForSession> request_queue;
};
// Called every time a batch of requests are processed.
void finalizeRequests(const KeeperStorage::RequestsForSessions & requests_for_sessions);
std::unordered_map<int64_t, UnprocessedRequests> unprocessed_requests_for_session;
std::mutex unprocessed_request_mutex;
/// Thread put requests to raft /// Thread put requests to raft
void requestThread(); void requestThread();
/// Thread put responses for subscribed sessions /// Thread put responses for subscribed sessions
@ -88,6 +121,12 @@ private:
/// Thread apply or wait configuration changes from leader /// Thread apply or wait configuration changes from leader
void updateConfigurationThread(); void updateConfigurationThread();
void readRequestThread();
void finalizeRequestsThread();
void processReadRequests(const CoordinationSettingsPtr & coordination_settings, KeeperStorage::RequestsForSessions & read_requests);
void setResponse(int64_t session_id, const Coordination::ZooKeeperResponsePtr & response); void setResponse(int64_t session_id, const Coordination::ZooKeeperResponsePtr & response);
/// Add error responses for requests to responses queue. /// Add error responses for requests to responses queue.
@ -96,7 +135,7 @@ private:
/// Forcefully wait for result and sets errors if something when wrong. /// Forcefully wait for result and sets errors if something when wrong.
/// Clears both arguments /// Clears both arguments
void forceWaitAndProcessResult(RaftAppendResult & result, KeeperStorage::RequestsForSessions & requests_for_sessions); static void forceWaitAndProcessResult(RaftResult & result);
public: public:
/// Just allocate some objects, real initialization is done by `intialize method` /// Just allocate some objects, real initialization is done by `intialize method`
@ -116,6 +155,12 @@ public:
return server && server->checkInit(); return server && server->checkInit();
} }
/// Called when a single log with request is committed.
void onRequestCommit(const KeeperStorage::RequestForSession & request_for_session, uint64_t log_term, uint64_t log_idx);
/// Called when a snapshot is applied
void onApplySnapshot(uint64_t term, uint64_t last_idx);
/// Is server accepting requests, i.e. connected to the cluster /// Is server accepting requests, i.e. connected to the cluster
/// and achieved quorum /// and achieved quorum
bool isServerActive() const; bool isServerActive() const;

View File

@ -105,7 +105,9 @@ KeeperServer::KeeperServer(
const KeeperConfigurationAndSettingsPtr & configuration_and_settings_, const KeeperConfigurationAndSettingsPtr & configuration_and_settings_,
const Poco::Util::AbstractConfiguration & config, const Poco::Util::AbstractConfiguration & config,
ResponsesQueue & responses_queue_, ResponsesQueue & responses_queue_,
SnapshotsQueue & snapshots_queue_) SnapshotsQueue & snapshots_queue_,
KeeperStateMachine::CommitCallback commit_callback,
KeeperStateMachine::ApplySnapshotCallback apply_snapshot_callback)
: server_id(configuration_and_settings_->server_id) : server_id(configuration_and_settings_->server_id)
, coordination_settings(configuration_and_settings_->coordination_settings) , coordination_settings(configuration_and_settings_->coordination_settings)
, log(&Poco::Logger::get("KeeperServer")) , log(&Poco::Logger::get("KeeperServer"))
@ -113,7 +115,7 @@ KeeperServer::KeeperServer(
, keeper_context{std::make_shared<KeeperContext>()} , keeper_context{std::make_shared<KeeperContext>()}
, create_snapshot_on_exit(config.getBool("keeper_server.create_snapshot_on_exit", true)) , create_snapshot_on_exit(config.getBool("keeper_server.create_snapshot_on_exit", true))
{ {
if (coordination_settings->quorum_reads) if (coordination_settings->quorum_reads || coordination_settings->read_mode.toString() == "quorum")
LOG_WARNING(log, "Quorum reads enabled, Keeper will work slower."); LOG_WARNING(log, "Quorum reads enabled, Keeper will work slower.");
keeper_context->digest_enabled = config.getBool("keeper_server.digest_enabled", false); keeper_context->digest_enabled = config.getBool("keeper_server.digest_enabled", false);
@ -125,7 +127,9 @@ KeeperServer::KeeperServer(
configuration_and_settings_->snapshot_storage_path, configuration_and_settings_->snapshot_storage_path,
coordination_settings, coordination_settings,
keeper_context, keeper_context,
checkAndGetSuperdigest(configuration_and_settings_->super_digest)); checkAndGetSuperdigest(configuration_and_settings_->super_digest),
std::move(commit_callback),
std::move(apply_snapshot_callback));
state_manager = nuraft::cs_new<KeeperStateManager>( state_manager = nuraft::cs_new<KeeperStateManager>(
server_id, server_id,
@ -176,6 +180,13 @@ struct KeeperServer::KeeperRaftServer : public nuraft::raft_server
reconfigure(new_config); reconfigure(new_config);
} }
RaftResult getLeaderInfo()
{
nuraft::ptr<nuraft::req_msg> req
= nuraft::cs_new<nuraft::req_msg>(0ull, nuraft::msg_type::leader_status_request, 0, 0, 0ull, 0ull, 0ull);
return send_msg_to_leader(req);
}
void commit_in_bg() override void commit_in_bg() override
{ {
// For NuRaft, if any commit fails (uncaught exception) the whole server aborts as a safety // For NuRaft, if any commit fails (uncaught exception) the whole server aborts as a safety
@ -269,6 +280,20 @@ void KeeperServer::launchRaftServer(const Poco::Util::AbstractConfiguration & co
coordination_settings->election_timeout_lower_bound_ms.totalMilliseconds(), "election_timeout_lower_bound_ms", log); coordination_settings->election_timeout_lower_bound_ms.totalMilliseconds(), "election_timeout_lower_bound_ms", log);
params.election_timeout_upper_bound_ = getValueOrMaxInt32AndLogWarning( params.election_timeout_upper_bound_ = getValueOrMaxInt32AndLogWarning(
coordination_settings->election_timeout_upper_bound_ms.totalMilliseconds(), "election_timeout_upper_bound_ms", log); coordination_settings->election_timeout_upper_bound_ms.totalMilliseconds(), "election_timeout_upper_bound_ms", log);
params.leadership_expiry_ = getValueOrMaxInt32AndLogWarning(coordination_settings->leadership_expiry.totalMilliseconds(), "leadership_expiry", log);
if (coordination_settings->read_mode.toString() == "fastlinear")
{
if (params.leadership_expiry_ == 0)
params.leadership_expiry_ = params.election_timeout_lower_bound_;
else if (params.leadership_expiry_ > params.election_timeout_lower_bound_)
{
LOG_WARNING(log, "To use fast linearizable reads, leadership_expiry should be set to a value that is less or equal to the election_timeout_upper_bound_ms. "
"Based on current settings, there are no guarantees for linearizability of reads.");
}
}
params.reserved_log_items_ = getValueOrMaxInt32AndLogWarning(coordination_settings->reserved_log_items, "reserved_log_items", log); params.reserved_log_items_ = getValueOrMaxInt32AndLogWarning(coordination_settings->reserved_log_items, "reserved_log_items", log);
params.snapshot_distance_ = getValueOrMaxInt32AndLogWarning(coordination_settings->snapshot_distance, "snapshot_distance", log); params.snapshot_distance_ = getValueOrMaxInt32AndLogWarning(coordination_settings->snapshot_distance, "snapshot_distance", log);
@ -487,7 +512,7 @@ void KeeperServer::putLocalReadRequest(const KeeperStorage::RequestForSession &
state_machine->processReadRequest(request_for_session); state_machine->processReadRequest(request_for_session);
} }
RaftAppendResult KeeperServer::putRequestBatch(const KeeperStorage::RequestsForSessions & requests_for_sessions) RaftResult KeeperServer::putRequestBatch(const KeeperStorage::RequestsForSessions & requests_for_sessions)
{ {
std::vector<nuraft::ptr<nuraft::buffer>> entries; std::vector<nuraft::ptr<nuraft::buffer>> entries;
for (const auto & request_for_session : requests_for_sessions) for (const auto & request_for_session : requests_for_sessions)
@ -713,6 +738,20 @@ std::vector<int64_t> KeeperServer::getDeadSessions()
return state_machine->getDeadSessions(); return state_machine->getDeadSessions();
} }
RaftResult KeeperServer::getLeaderInfo()
{
std::lock_guard lock{server_write_mutex};
if (is_recovering)
return nullptr;
return raft_instance->getLeaderInfo();
}
KeeperServer::NodeInfo KeeperServer::getNodeInfo()
{
return { .term = raft_instance->get_term(), .last_committed_index = state_machine->last_commit_index() };
}
ConfigUpdateActions KeeperServer::getConfigurationDiff(const Poco::Util::AbstractConfiguration & config) ConfigUpdateActions KeeperServer::getConfigurationDiff(const Poco::Util::AbstractConfiguration & config)
{ {
auto diff = state_manager->getConfigurationDiff(config); auto diff = state_manager->getConfigurationDiff(config);

View File

@ -14,7 +14,7 @@
namespace DB namespace DB
{ {
using RaftAppendResult = nuraft::ptr<nuraft::cmd_result<nuraft::ptr<nuraft::buffer>>>; using RaftResult = nuraft::ptr<nuraft::cmd_result<nuraft::ptr<nuraft::buffer>>>;
class KeeperServer class KeeperServer
{ {
@ -71,7 +71,9 @@ public:
const KeeperConfigurationAndSettingsPtr & settings_, const KeeperConfigurationAndSettingsPtr & settings_,
const Poco::Util::AbstractConfiguration & config_, const Poco::Util::AbstractConfiguration & config_,
ResponsesQueue & responses_queue_, ResponsesQueue & responses_queue_,
SnapshotsQueue & snapshots_queue_); SnapshotsQueue & snapshots_queue_,
KeeperStateMachine::CommitCallback commit_callback,
KeeperStateMachine::ApplySnapshotCallback apply_snapshot_callback);
/// Load state machine from the latest snapshot and load log storage. Start NuRaft with required settings. /// Load state machine from the latest snapshot and load log storage. Start NuRaft with required settings.
void startup(const Poco::Util::AbstractConfiguration & config, bool enable_ipv6 = true); void startup(const Poco::Util::AbstractConfiguration & config, bool enable_ipv6 = true);
@ -84,7 +86,7 @@ public:
/// Put batch of requests into Raft and get result of put. Responses will be set separately into /// Put batch of requests into Raft and get result of put. Responses will be set separately into
/// responses_queue. /// responses_queue.
RaftAppendResult putRequestBatch(const KeeperStorage::RequestsForSessions & requests); RaftResult putRequestBatch(const KeeperStorage::RequestsForSessions & requests);
/// Return set of the non-active sessions /// Return set of the non-active sessions
std::vector<int64_t> getDeadSessions(); std::vector<int64_t> getDeadSessions();
@ -119,6 +121,17 @@ public:
int getServerID() const { return server_id; } int getServerID() const { return server_id; }
struct NodeInfo
{
uint64_t term;
uint64_t last_committed_index;
bool operator==(const NodeInfo &) const = default;
};
RaftResult getLeaderInfo();
NodeInfo getNodeInfo();
/// Get configuration diff between current configuration in RAFT and in XML file /// Get configuration diff between current configuration in RAFT and in XML file
ConfigUpdateActions getConfigurationDiff(const Poco::Util::AbstractConfiguration & config); ConfigUpdateActions getConfigurationDiff(const Poco::Util::AbstractConfiguration & config);
@ -126,10 +139,23 @@ public:
/// Synchronously check for update results with retries. /// Synchronously check for update results with retries.
void applyConfigurationUpdate(const ConfigUpdateAction & task); void applyConfigurationUpdate(const ConfigUpdateAction & task);
/// Wait configuration update for action. Used by followers. /// Wait configuration update for action. Used by followers.
/// Return true if update was successfully received. /// Return true if update was successfully received.
bool waitConfigurationUpdate(const ConfigUpdateAction & task); bool waitConfigurationUpdate(const ConfigUpdateAction & task);
}; };
} }
namespace std
{
template <>
struct hash<DB::KeeperServer::NodeInfo>
{
size_t operator()(const DB::KeeperServer::NodeInfo & info) const
{
SipHash hash_state;
hash_state.update(info.term);
hash_state.update(info.last_committed_index);
return hash_state.get64();
}
};
}

View File

@ -44,7 +44,9 @@ KeeperStateMachine::KeeperStateMachine(
const std::string & snapshots_path_, const std::string & snapshots_path_,
const CoordinationSettingsPtr & coordination_settings_, const CoordinationSettingsPtr & coordination_settings_,
const KeeperContextPtr & keeper_context_, const KeeperContextPtr & keeper_context_,
const std::string & superdigest_) const std::string & superdigest_,
CommitCallback commit_callback_,
ApplySnapshotCallback apply_snapshot_callback_)
: coordination_settings(coordination_settings_) : coordination_settings(coordination_settings_)
, snapshot_manager( , snapshot_manager(
snapshots_path_, snapshots_path_,
@ -58,6 +60,8 @@ KeeperStateMachine::KeeperStateMachine(
, last_committed_idx(0) , last_committed_idx(0)
, log(&Poco::Logger::get("KeeperStateMachine")) , log(&Poco::Logger::get("KeeperStateMachine"))
, superdigest(superdigest_) , superdigest(superdigest_)
, commit_callback(std::move(commit_callback_))
, apply_snapshot_callback(std::move(apply_snapshot_callback_))
, keeper_context(keeper_context_) , keeper_context(keeper_context_)
{ {
} }
@ -223,11 +227,11 @@ bool KeeperStateMachine::preprocess(const KeeperStorage::RequestForSession & req
return true; return true;
} }
nuraft::ptr<nuraft::buffer> KeeperStateMachine::commit(const uint64_t log_idx, nuraft::buffer & data) nuraft::ptr<nuraft::buffer> KeeperStateMachine::commit_ext(const ext_op_params & params)
{ {
auto request_for_session = parseRequest(data); auto request_for_session = parseRequest(*params.data);
if (!request_for_session.zxid) if (!request_for_session.zxid)
request_for_session.zxid = log_idx; request_for_session.zxid = params.log_idx;
/// Special processing of session_id request /// Special processing of session_id request
if (request_for_session.request->getOpNum() == Coordination::OpNum::SessionID) if (request_for_session.request->getOpNum() == Coordination::OpNum::SessionID)
@ -272,8 +276,9 @@ nuraft::ptr<nuraft::buffer> KeeperStateMachine::commit(const uint64_t log_idx, n
assertDigest(*request_for_session.digest, storage->getNodesDigest(true), *request_for_session.request, true); assertDigest(*request_for_session.digest, storage->getNodesDigest(true), *request_for_session.request, true);
} }
last_committed_idx = params.log_idx;
commit_callback(request_for_session, params.log_term, params.log_idx);
ProfileEvents::increment(ProfileEvents::KeeperCommits); ProfileEvents::increment(ProfileEvents::KeeperCommits);
last_committed_idx = log_idx;
return nullptr; return nullptr;
} }
@ -306,6 +311,7 @@ bool KeeperStateMachine::apply_snapshot(nuraft::snapshot & s)
ProfileEvents::increment(ProfileEvents::KeeperSnapshotApplys); ProfileEvents::increment(ProfileEvents::KeeperSnapshotApplys);
last_committed_idx = s.get_last_log_idx(); last_committed_idx = s.get_last_log_idx();
apply_snapshot_callback(s.get_last_log_term(), s.get_last_log_idx());
return true; return true;
} }
@ -320,6 +326,10 @@ void KeeperStateMachine::commit_config(const uint64_t /* log_idx */, nuraft::ptr
void KeeperStateMachine::rollback(uint64_t log_idx, nuraft::buffer & data) void KeeperStateMachine::rollback(uint64_t log_idx, nuraft::buffer & data)
{ {
auto request_for_session = parseRequest(data); auto request_for_session = parseRequest(data);
if (request_for_session.request->getOpNum() == Coordination::OpNum::SessionID)
return;
// If we received a log from an older node, use the log_idx as the zxid // If we received a log from an older node, use the log_idx as the zxid
// log_idx will always be larger or equal to the zxid so we can safely do this // log_idx will always be larger or equal to the zxid so we can safely do this
// (log_idx is increased for all logs, while zxid is only increased for requests) // (log_idx is increased for all logs, while zxid is only increased for requests)

View File

@ -20,13 +20,18 @@ using SnapshotsQueue = ConcurrentBoundedQueue<CreateSnapshotTask>;
class KeeperStateMachine : public nuraft::state_machine class KeeperStateMachine : public nuraft::state_machine
{ {
public: public:
using CommitCallback = std::function<void(const KeeperStorage::RequestForSession &, uint64_t, uint64_t)>;
using ApplySnapshotCallback = std::function<void(uint64_t, uint64_t)>;
KeeperStateMachine( KeeperStateMachine(
ResponsesQueue & responses_queue_, ResponsesQueue & responses_queue_,
SnapshotsQueue & snapshots_queue_, SnapshotsQueue & snapshots_queue_,
const std::string & snapshots_path_, const std::string & snapshots_path_,
const CoordinationSettingsPtr & coordination_settings_, const CoordinationSettingsPtr & coordination_settings_,
const KeeperContextPtr & keeper_context_, const KeeperContextPtr & keeper_context_,
const std::string & superdigest_ = ""); const std::string & superdigest_ = "",
CommitCallback commit_callback_ = [](const KeeperStorage::RequestForSession &, uint64_t, uint64_t){},
ApplySnapshotCallback apply_snapshot_callback_ = [](uint64_t, uint64_t){});
/// Read state from the latest snapshot /// Read state from the latest snapshot
void init(); void init();
@ -37,7 +42,7 @@ public:
nuraft::ptr<nuraft::buffer> pre_commit(uint64_t log_idx, nuraft::buffer & data) override; nuraft::ptr<nuraft::buffer> pre_commit(uint64_t log_idx, nuraft::buffer & data) override;
nuraft::ptr<nuraft::buffer> commit(const uint64_t log_idx, nuraft::buffer & data) override; /// NOLINT nuraft::ptr<nuraft::buffer> commit_ext(const ext_op_params & params) override; /// NOLINT
/// Save new cluster config to our snapshot (copy of the config stored in StateManager) /// Save new cluster config to our snapshot (copy of the config stored in StateManager)
void commit_config(const uint64_t log_idx, nuraft::ptr<nuraft::cluster_config> & new_conf) override; /// NOLINT void commit_config(const uint64_t log_idx, nuraft::ptr<nuraft::cluster_config> & new_conf) override; /// NOLINT
@ -145,6 +150,11 @@ private:
/// Special part of ACL system -- superdigest specified in server config. /// Special part of ACL system -- superdigest specified in server config.
const std::string superdigest; const std::string superdigest;
/// call when a request is committed
const CommitCallback commit_callback;
/// call when snapshot is applied
const ApplySnapshotCallback apply_snapshot_callback;
KeeperContextPtr keeper_context; KeeperContextPtr keeper_context;
}; };

View File

@ -1330,8 +1330,9 @@ void testLogAndStateMachine(Coordination::CoordinationSettingsPtr settings, uint
changelog.append(entry); changelog.append(entry);
changelog.end_of_append_batch(0, 0); changelog.end_of_append_batch(0, 0);
state_machine->pre_commit(i, changelog.entry_at(i)->get_buf()); auto entry_buf = changelog.entry_at(i)->get_buf_ptr();
state_machine->commit(i, changelog.entry_at(i)->get_buf()); state_machine->pre_commit(i, *entry_buf);
state_machine->commit_ext(nuraft::state_machine::ext_op_params{i, entry_buf});
bool snapshot_created = false; bool snapshot_created = false;
if (i % settings->snapshot_distance == 0) if (i % settings->snapshot_distance == 0)
{ {
@ -1375,8 +1376,9 @@ void testLogAndStateMachine(Coordination::CoordinationSettingsPtr settings, uint
for (size_t i = restore_machine->last_commit_index() + 1; i < restore_changelog.next_slot(); ++i) for (size_t i = restore_machine->last_commit_index() + 1; i < restore_changelog.next_slot(); ++i)
{ {
restore_machine->pre_commit(i, changelog.entry_at(i)->get_buf()); auto entry = changelog.entry_at(i)->get_buf_ptr();
restore_machine->commit(i, changelog.entry_at(i)->get_buf()); restore_machine->pre_commit(i, *entry);
restore_machine->commit_ext(nuraft::state_machine::ext_op_params{i, entry});
} }
auto & source_storage = state_machine->getStorage(); auto & source_storage = state_machine->getStorage();
@ -1477,18 +1479,18 @@ TEST_P(CoordinationTest, TestEphemeralNodeRemove)
std::shared_ptr<ZooKeeperCreateRequest> request_c = std::make_shared<ZooKeeperCreateRequest>(); std::shared_ptr<ZooKeeperCreateRequest> request_c = std::make_shared<ZooKeeperCreateRequest>();
request_c->path = "/hello"; request_c->path = "/hello";
request_c->is_ephemeral = true; request_c->is_ephemeral = true;
auto entry_c = getLogEntryFromZKRequest(0, 1, state_machine->getNextZxid(), request_c); auto entry_c = getLogEntryFromZKRequest(0, 1, state_machine->getNextZxid(), request_c)->get_buf_ptr();
state_machine->pre_commit(1, entry_c->get_buf()); state_machine->pre_commit(1, *entry_c);
state_machine->commit(1, entry_c->get_buf()); state_machine->commit_ext(nuraft::state_machine::ext_op_params{1, entry_c});
const auto & storage = state_machine->getStorage(); const auto & storage = state_machine->getStorage();
EXPECT_EQ(storage.ephemerals.size(), 1); EXPECT_EQ(storage.ephemerals.size(), 1);
std::shared_ptr<ZooKeeperRemoveRequest> request_d = std::make_shared<ZooKeeperRemoveRequest>(); std::shared_ptr<ZooKeeperRemoveRequest> request_d = std::make_shared<ZooKeeperRemoveRequest>();
request_d->path = "/hello"; request_d->path = "/hello";
/// Delete from other session /// Delete from other session
auto entry_d = getLogEntryFromZKRequest(0, 2, state_machine->getNextZxid(), request_d); auto entry_d = getLogEntryFromZKRequest(0, 2, state_machine->getNextZxid(), request_d)->get_buf_ptr();
state_machine->pre_commit(2, entry_d->get_buf()); state_machine->pre_commit(2, *entry_d);
state_machine->commit(2, entry_d->get_buf()); state_machine->commit_ext(nuraft::state_machine::ext_op_params{2, entry_d});
EXPECT_EQ(storage.ephemerals.size(), 0); EXPECT_EQ(storage.ephemerals.size(), 0);
} }

View File

@ -481,7 +481,7 @@ static constexpr UInt64 operator""_GiB(unsigned long long value)
M(Bool, optimize_if_chain_to_multiif, false, "Replace if(cond1, then1, if(cond2, ...)) chains to multiIf. Currently it's not beneficial for numeric types.", 0) \ M(Bool, optimize_if_chain_to_multiif, false, "Replace if(cond1, then1, if(cond2, ...)) chains to multiIf. Currently it's not beneficial for numeric types.", 0) \
M(Bool, optimize_multiif_to_if, true, "Replace 'multiIf' with only one condition to 'if'.", 0) \ M(Bool, optimize_multiif_to_if, true, "Replace 'multiIf' with only one condition to 'if'.", 0) \
M(Bool, optimize_if_transform_strings_to_enum, false, "Replaces string-type arguments in If and Transform to enum. Disabled by default cause it could make inconsistent change in distributed query that would lead to its fail.", 0) \ M(Bool, optimize_if_transform_strings_to_enum, false, "Replaces string-type arguments in If and Transform to enum. Disabled by default cause it could make inconsistent change in distributed query that would lead to its fail.", 0) \
M(Bool, optimize_monotonous_functions_in_order_by, true, "Replace monotonous function with its argument in ORDER BY", 0) \ M(Bool, optimize_monotonous_functions_in_order_by, false, "Replace monotonous function with its argument in ORDER BY", 0) \
M(Bool, optimize_functions_to_subcolumns, false, "Transform functions to subcolumns, if possible, to reduce amount of read data. E.g. 'length(arr)' -> 'arr.size0', 'col IS NULL' -> 'col.null' ", 0) \ M(Bool, optimize_functions_to_subcolumns, false, "Transform functions to subcolumns, if possible, to reduce amount of read data. E.g. 'length(arr)' -> 'arr.size0', 'col IS NULL' -> 'col.null' ", 0) \
M(Bool, optimize_using_constraints, false, "Use constraints for query optimization", 0) \ M(Bool, optimize_using_constraints, false, "Use constraints for query optimization", 0) \
M(Bool, optimize_substitute_columns, false, "Use constraints for column substitution", 0) \ M(Bool, optimize_substitute_columns, false, "Use constraints for column substitution", 0) \

View File

@ -14,7 +14,7 @@
namespace std // NOLINT(cert-dcl58-cpp) namespace std // NOLINT(cert-dcl58-cpp)
{ {
using namespace experimental::coroutines_v1; using namespace experimental::coroutines_v1; // NOLINT(cert-dcl58-cpp)
} }
#if __has_warning("-Wdeprecated-experimental-coroutine") #if __has_warning("-Wdeprecated-experimental-coroutine")

View File

@ -143,9 +143,11 @@ void CachedOnDiskReadBufferFromFile::initialize(size_t offset, size_t size)
} }
CachedOnDiskReadBufferFromFile::ImplementationBufferPtr CachedOnDiskReadBufferFromFile::ImplementationBufferPtr
CachedOnDiskReadBufferFromFile::getCacheReadBuffer(size_t offset) const CachedOnDiskReadBufferFromFile::getCacheReadBuffer(const FileSegment & file_segment) const
{ {
auto path = cache->getPathInLocalCache(cache_key, offset, is_persistent); /// Use is_persistent flag from in-memory state of the filesegment,
/// because it is consistent with what is written on disk.
auto path = file_segment.getPathInLocalCache();
ReadSettings local_read_settings{settings}; ReadSettings local_read_settings{settings};
/// Do not allow to use asynchronous version of LocalFSReadMethod. /// Do not allow to use asynchronous version of LocalFSReadMethod.
@ -237,8 +239,6 @@ bool CachedOnDiskReadBufferFromFile::canStartFromCache(size_t current_offset, co
CachedOnDiskReadBufferFromFile::ImplementationBufferPtr CachedOnDiskReadBufferFromFile::ImplementationBufferPtr
CachedOnDiskReadBufferFromFile::getReadBufferForFileSegment(FileSegmentPtr & file_segment) CachedOnDiskReadBufferFromFile::getReadBufferForFileSegment(FileSegmentPtr & file_segment)
{ {
auto range = file_segment->range();
auto download_state = file_segment->state(); auto download_state = file_segment->state();
LOG_TEST(log, "getReadBufferForFileSegment: {}", file_segment->getInfoForLog()); LOG_TEST(log, "getReadBufferForFileSegment: {}", file_segment->getInfoForLog());
@ -247,7 +247,7 @@ CachedOnDiskReadBufferFromFile::getReadBufferForFileSegment(FileSegmentPtr & fil
if (download_state == FileSegment::State::DOWNLOADED) if (download_state == FileSegment::State::DOWNLOADED)
{ {
read_type = ReadType::CACHED; read_type = ReadType::CACHED;
return getCacheReadBuffer(range.left); return getCacheReadBuffer(*file_segment);
} }
else else
{ {
@ -280,7 +280,7 @@ CachedOnDiskReadBufferFromFile::getReadBufferForFileSegment(FileSegmentPtr & fil
/// file_offset_of_buffer_end /// file_offset_of_buffer_end
read_type = ReadType::CACHED; read_type = ReadType::CACHED;
return getCacheReadBuffer(range.left); return getCacheReadBuffer(*file_segment);
} }
download_state = file_segment->wait(); download_state = file_segment->wait();
@ -289,7 +289,7 @@ CachedOnDiskReadBufferFromFile::getReadBufferForFileSegment(FileSegmentPtr & fil
case FileSegment::State::DOWNLOADED: case FileSegment::State::DOWNLOADED:
{ {
read_type = ReadType::CACHED; read_type = ReadType::CACHED;
return getCacheReadBuffer(range.left); return getCacheReadBuffer(*file_segment);
} }
case FileSegment::State::EMPTY: case FileSegment::State::EMPTY:
case FileSegment::State::PARTIALLY_DOWNLOADED: case FileSegment::State::PARTIALLY_DOWNLOADED:
@ -305,7 +305,7 @@ CachedOnDiskReadBufferFromFile::getReadBufferForFileSegment(FileSegmentPtr & fil
/// file_offset_of_buffer_end /// file_offset_of_buffer_end
read_type = ReadType::CACHED; read_type = ReadType::CACHED;
return getCacheReadBuffer(range.left); return getCacheReadBuffer(*file_segment);
} }
auto downloader_id = file_segment->getOrSetDownloader(); auto downloader_id = file_segment->getOrSetDownloader();
@ -323,7 +323,7 @@ CachedOnDiskReadBufferFromFile::getReadBufferForFileSegment(FileSegmentPtr & fil
read_type = ReadType::CACHED; read_type = ReadType::CACHED;
file_segment->resetDownloader(); file_segment->resetDownloader();
return getCacheReadBuffer(range.left); return getCacheReadBuffer(*file_segment);
} }
if (file_segment->getCurrentWriteOffset() < file_offset_of_buffer_end) if (file_segment->getCurrentWriteOffset() < file_offset_of_buffer_end)
@ -339,7 +339,7 @@ CachedOnDiskReadBufferFromFile::getReadBufferForFileSegment(FileSegmentPtr & fil
LOG_TEST(log, "Predownload. File segment info: {}", file_segment->getInfoForLog()); LOG_TEST(log, "Predownload. File segment info: {}", file_segment->getInfoForLog());
chassert(file_offset_of_buffer_end > file_segment->getCurrentWriteOffset()); chassert(file_offset_of_buffer_end > file_segment->getCurrentWriteOffset());
bytes_to_predownload = file_offset_of_buffer_end - file_segment->getCurrentWriteOffset(); bytes_to_predownload = file_offset_of_buffer_end - file_segment->getCurrentWriteOffset();
chassert(bytes_to_predownload < range.size()); chassert(bytes_to_predownload < file_segment->range().size());
} }
read_type = ReadType::REMOTE_FS_READ_AND_PUT_IN_CACHE; read_type = ReadType::REMOTE_FS_READ_AND_PUT_IN_CACHE;
@ -354,7 +354,7 @@ CachedOnDiskReadBufferFromFile::getReadBufferForFileSegment(FileSegmentPtr & fil
if (canStartFromCache(file_offset_of_buffer_end, *file_segment)) if (canStartFromCache(file_offset_of_buffer_end, *file_segment))
{ {
read_type = ReadType::CACHED; read_type = ReadType::CACHED;
return getCacheReadBuffer(range.left); return getCacheReadBuffer(*file_segment);
} }
else else
{ {

View File

@ -68,7 +68,7 @@ private:
ImplementationBufferPtr getReadBufferForFileSegment(FileSegmentPtr & file_segment); ImplementationBufferPtr getReadBufferForFileSegment(FileSegmentPtr & file_segment);
ImplementationBufferPtr getCacheReadBuffer(size_t offset) const; ImplementationBufferPtr getCacheReadBuffer(const FileSegment & file_segment) const;
std::optional<size_t> getLastNonDownloadedOffset() const; std::optional<size_t> getLastNonDownloadedOffset() const;

View File

@ -13,7 +13,6 @@ namespace DB
namespace ErrorCodes namespace ErrorCodes
{ {
extern const int UNKNOWN_FORMAT; extern const int UNKNOWN_FORMAT;
extern const int LOGICAL_ERROR;
} }
void DiskObjectStorageMetadata::deserialize(ReadBuffer & buf) void DiskObjectStorageMetadata::deserialize(ReadBuffer & buf)
@ -131,9 +130,6 @@ DiskObjectStorageMetadata::DiskObjectStorageMetadata(
void DiskObjectStorageMetadata::addObject(const String & path, size_t size) void DiskObjectStorageMetadata::addObject(const String & path, size_t size)
{ {
if (!object_storage_root_path.empty() && path.starts_with(object_storage_root_path))
throw Exception(ErrorCodes::LOGICAL_ERROR, "Expected relative path");
total_size += size; total_size += size;
storage_objects.emplace_back(path, size); storage_objects.emplace_back(path, size);
} }

View File

@ -530,6 +530,7 @@ String FormatFactory::getFormatFromFileDescriptor(int fd)
return getFormatFromFileName(file_path, false); return getFormatFromFileName(file_path, false);
return ""; return "";
#else #else
(void)fd;
return ""; return "";
#endif #endif
} }

View File

@ -233,7 +233,7 @@ void ReadBufferFromFileDescriptor::rewind()
/// Assuming file descriptor supports 'select', check that we have data to read or wait until timeout. /// Assuming file descriptor supports 'select', check that we have data to read or wait until timeout.
bool ReadBufferFromFileDescriptor::poll(size_t timeout_microseconds) bool ReadBufferFromFileDescriptor::poll(size_t timeout_microseconds) const
{ {
fd_set fds; fd_set fds;
FD_ZERO(&fds); FD_ZERO(&fds);

View File

@ -66,7 +66,7 @@ public:
private: private:
/// Assuming file descriptor supports 'select', check that we have data to read or wait until timeout. /// Assuming file descriptor supports 'select', check that we have data to read or wait until timeout.
bool poll(size_t timeout_microseconds); bool poll(size_t timeout_microseconds) const;
}; };

View File

@ -61,18 +61,23 @@ public:
return host_fqdn_id; return host_fqdn_id;
} }
std::string getQueueDir() const
{
return queue_dir;
}
void startup(); void startup();
virtual void shutdown(); virtual void shutdown();
bool isCurrentlyActive() const { return initialized && !stop_flag; } bool isCurrentlyActive() const { return initialized && !stop_flag; }
protected:
/// Returns cached ZooKeeper session (possibly expired). /// Returns cached ZooKeeper session (possibly expired).
ZooKeeperPtr tryGetZooKeeper() const; ZooKeeperPtr tryGetZooKeeper() const;
/// If necessary, creates a new session and caches it. /// If necessary, creates a new session and caches it.
ZooKeeperPtr getAndSetZooKeeper(); ZooKeeperPtr getAndSetZooKeeper();
protected:
/// Iterates through queue tasks in ZooKeeper, runs execution of new tasks /// Iterates through queue tasks in ZooKeeper, runs execution of new tasks
void scheduleTasks(bool reinitialized); void scheduleTasks(bool reinitialized);

View File

@ -739,7 +739,10 @@ CompiledAggregateFunctions compileAggregateFunctions(CHJIT & jit, const std::vec
{ {
compileCreateAggregateStatesFunctions(module, functions, create_aggregate_states_functions_name); compileCreateAggregateStatesFunctions(module, functions, create_aggregate_states_functions_name);
compileAddIntoAggregateStatesFunctions(module, functions, add_aggregate_states_functions_name); compileAddIntoAggregateStatesFunctions(module, functions, add_aggregate_states_functions_name);
compileAddIntoAggregateStatesFunctionsSinglePlace(module, functions, add_aggregate_states_functions_name_single_place); /// FIXME: this leads to use-of-uninitialized-value in llvm
/// But for now, it is safe, since it is not used by Aggregator anyway
(void)compileAddIntoAggregateStatesFunctionsSinglePlace;
/// compileAddIntoAggregateStatesFunctionsSinglePlace(module, functions, add_aggregate_states_functions_name_single_place);
compileMergeAggregatesStates(module, functions, merge_aggregate_states_functions_name); compileMergeAggregatesStates(module, functions, merge_aggregate_states_functions_name);
compileInsertAggregatesIntoResultColumns(module, functions, insert_aggregate_states_functions_name); compileInsertAggregatesIntoResultColumns(module, functions, insert_aggregate_states_functions_name);
}); });
@ -752,7 +755,7 @@ CompiledAggregateFunctions compileAggregateFunctions(CHJIT & jit, const std::vec
assert(create_aggregate_states_function); assert(create_aggregate_states_function);
assert(add_into_aggregate_states_function); assert(add_into_aggregate_states_function);
assert(add_into_aggregate_states_function_single_place); /// assert(add_into_aggregate_states_function_single_place); /// FIXME
assert(merge_aggregate_states_function); assert(merge_aggregate_states_function);
assert(insert_aggregate_states_function); assert(insert_aggregate_states_function);

View File

@ -2,6 +2,15 @@
#if USE_MSGPACK #if USE_MSGPACK
/// FIXME: there is some issue with clang-15, that incorrectly detect a
/// "Attempt to free released memory" in msgpack::unpack(), because of delete
/// operator for zone (from msgpack/v1/detail/cpp11_zone.hpp), hence NOLINT
///
/// NOTE: that I was not able to suppress it locally, only with
/// NOLINTBEGIN/NOLINTEND
//
// NOLINTBEGIN(clang-analyzer-cplusplus.NewDelete)
#include <cstdlib> #include <cstdlib>
#include <Common/assert_cast.h> #include <Common/assert_cast.h>
#include <IO/ReadHelpers.h> #include <IO/ReadHelpers.h>
@ -235,8 +244,10 @@ static void insertNull(IColumn & column, DataTypePtr type)
assert_cast<ColumnNullable &>(column).insertDefault(); assert_cast<ColumnNullable &>(column).insertDefault();
} }
static void insertUUID(IColumn & column, DataTypePtr /*type*/, const char * value, size_t size) static void insertUUID(IColumn & column, DataTypePtr type, const char * value, size_t size)
{ {
if (!isUUID(type))
throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Cannot insert MessagePack UUID into column with type {}.", type->getName());
ReadBufferFromMemory buf(value, size); ReadBufferFromMemory buf(value, size);
UUID uuid; UUID uuid;
readBinaryBigEndian(uuid.toUnderType().items[0], buf); readBinaryBigEndian(uuid.toUnderType().items[0], buf);
@ -551,6 +562,8 @@ void registerMsgPackSchemaReader(FormatFactory & factory)
} }
// NOLINTEND(clang-analyzer-cplusplus.NewDelete)
#else #else
namespace DB namespace DB

View File

@ -255,7 +255,7 @@ private:
class HDFSSource::URISIterator::Impl class HDFSSource::URISIterator::Impl
{ {
public: public:
explicit Impl(const std::vector<const String> & uris_, ContextPtr context) explicit Impl(const std::vector<String> & uris_, ContextPtr context)
{ {
auto path_and_uri = getPathFromUriAndUriWithoutPath(uris_[0]); auto path_and_uri = getPathFromUriAndUriWithoutPath(uris_[0]);
HDFSBuilderWrapper builder = createHDFSBuilder(path_and_uri.second + "/", context->getGlobalContext()->getConfigRef()); HDFSBuilderWrapper builder = createHDFSBuilder(path_and_uri.second + "/", context->getGlobalContext()->getConfigRef());
@ -293,7 +293,7 @@ String HDFSSource::DisclosedGlobIterator::next()
return pimpl->next(); return pimpl->next();
} }
HDFSSource::URISIterator::URISIterator(const std::vector<const String> & uris_, ContextPtr context) HDFSSource::URISIterator::URISIterator(const std::vector<String> & uris_, ContextPtr context)
: pimpl(std::make_shared<HDFSSource::URISIterator::Impl>(uris_, context)) : pimpl(std::make_shared<HDFSSource::URISIterator::Impl>(uris_, context))
{ {
} }

View File

@ -86,7 +86,7 @@ private:
const String & format_name, const String & format_name,
const ContextPtr & ctx); const ContextPtr & ctx);
std::vector<const String> uris; std::vector<String> uris;
String format_name; String format_name;
String compression_method; String compression_method;
const bool distributed_processing; const bool distributed_processing;
@ -116,7 +116,7 @@ public:
class URISIterator class URISIterator
{ {
public: public:
URISIterator(const std::vector<const String> & uris_, ContextPtr context); URISIterator(const std::vector<String> & uris_, ContextPtr context);
String next(); String next();
private: private:
class Impl; class Impl;

View File

@ -205,9 +205,9 @@ static void fillStatusColumns(MutableColumns & res_columns, size_t & col,
void StorageSystemDDLWorkerQueue::fillData(MutableColumns & res_columns, ContextPtr context, const SelectQueryInfo &) const void StorageSystemDDLWorkerQueue::fillData(MutableColumns & res_columns, ContextPtr context, const SelectQueryInfo &) const
{ {
zkutil::ZooKeeperPtr zookeeper = context->getZooKeeper(); auto& ddl_worker = context->getDDLWorker();
fs::path ddl_zookeeper_path = context->getConfigRef().getString("distributed_ddl.path", "/clickhouse/task_queue/ddl/"); fs::path ddl_zookeeper_path = ddl_worker.getQueueDir();
zkutil::ZooKeeperPtr zookeeper = ddl_worker.getAndSetZooKeeper();
Strings ddl_task_paths = zookeeper->getChildren(ddl_zookeeper_path); Strings ddl_task_paths = zookeeper->getChildren(ddl_zookeeper_path);
GetResponseFutures ddl_task_futures; GetResponseFutures ddl_task_futures;

View File

@ -291,7 +291,9 @@ def main():
logging.info("Will try to fetch cache for our build") logging.info("Will try to fetch cache for our build")
try: try:
get_ccache_if_not_exists(ccache_path, s3_helper, pr_info.number, TEMP_PATH) get_ccache_if_not_exists(
ccache_path, s3_helper, pr_info.number, TEMP_PATH, pr_info.release_pr
)
except Exception as e: except Exception as e:
# In case there are issues with ccache, remove the path and do not fail a build # In case there are issues with ccache, remove the path and do not fail a build
logging.info("Failed to get ccache, building without it. Error: %s", e) logging.info("Failed to get ccache, building without it. Error: %s", e)

View File

@ -11,6 +11,7 @@ import requests # type: ignore
from compress_files import decompress_fast, compress_fast from compress_files import decompress_fast, compress_fast
from env_helper import S3_DOWNLOAD, S3_BUILDS_BUCKET from env_helper import S3_DOWNLOAD, S3_BUILDS_BUCKET
from s3_helper import S3Helper
DOWNLOAD_RETRIES_COUNT = 5 DOWNLOAD_RETRIES_COUNT = 5
@ -57,12 +58,19 @@ def dowload_file_with_progress(url, path):
def get_ccache_if_not_exists( def get_ccache_if_not_exists(
path_to_ccache_dir, s3_helper, current_pr_number, temp_path path_to_ccache_dir: str,
s3_helper: S3Helper,
current_pr_number: int,
temp_path: str,
release_pr: int,
) -> int: ) -> int:
"""returns: number of PR for downloaded PR. -1 if ccache not found""" """returns: number of PR for downloaded PR. -1 if ccache not found"""
ccache_name = os.path.basename(path_to_ccache_dir) ccache_name = os.path.basename(path_to_ccache_dir)
cache_found = False cache_found = False
prs_to_check = [current_pr_number] prs_to_check = [current_pr_number]
# Release PR is either 0 or defined
if release_pr:
prs_to_check.append(release_pr)
ccache_pr = -1 ccache_pr = -1
if current_pr_number != 0: if current_pr_number != 0:
prs_to_check.append(0) prs_to_check.append(0)

View File

@ -8,7 +8,7 @@ BuildConfig = Dict[str, ConfValue]
CI_CONFIG = { CI_CONFIG = {
"build_config": { "build_config": {
"package_release": { "package_release": {
"compiler": "clang-14", "compiler": "clang-15",
"build_type": "", "build_type": "",
"sanitizer": "", "sanitizer": "",
"package_type": "deb", "package_type": "deb",
@ -19,7 +19,7 @@ CI_CONFIG = {
"with_coverage": False, "with_coverage": False,
}, },
"coverity": { "coverity": {
"compiler": "clang-14", "compiler": "clang-15",
"build_type": "", "build_type": "",
"sanitizer": "", "sanitizer": "",
"package_type": "coverity", "package_type": "coverity",
@ -29,7 +29,7 @@ CI_CONFIG = {
"official": False, "official": False,
}, },
"package_aarch64": { "package_aarch64": {
"compiler": "clang-14-aarch64", "compiler": "clang-15-aarch64",
"build_type": "", "build_type": "",
"sanitizer": "", "sanitizer": "",
"package_type": "deb", "package_type": "deb",
@ -40,7 +40,7 @@ CI_CONFIG = {
"with_coverage": False, "with_coverage": False,
}, },
"package_asan": { "package_asan": {
"compiler": "clang-14", "compiler": "clang-15",
"build_type": "", "build_type": "",
"sanitizer": "address", "sanitizer": "address",
"package_type": "deb", "package_type": "deb",
@ -49,7 +49,7 @@ CI_CONFIG = {
"with_coverage": False, "with_coverage": False,
}, },
"package_ubsan": { "package_ubsan": {
"compiler": "clang-14", "compiler": "clang-15",
"build_type": "", "build_type": "",
"sanitizer": "undefined", "sanitizer": "undefined",
"package_type": "deb", "package_type": "deb",
@ -67,7 +67,7 @@ CI_CONFIG = {
"with_coverage": False, "with_coverage": False,
}, },
"package_msan": { "package_msan": {
"compiler": "clang-14", "compiler": "clang-15",
"build_type": "", "build_type": "",
"sanitizer": "memory", "sanitizer": "memory",
"package_type": "deb", "package_type": "deb",
@ -76,7 +76,7 @@ CI_CONFIG = {
"with_coverage": False, "with_coverage": False,
}, },
"package_debug": { "package_debug": {
"compiler": "clang-14", "compiler": "clang-15",
"build_type": "debug", "build_type": "debug",
"sanitizer": "", "sanitizer": "",
"package_type": "deb", "package_type": "deb",
@ -85,7 +85,7 @@ CI_CONFIG = {
"with_coverage": False, "with_coverage": False,
}, },
"binary_release": { "binary_release": {
"compiler": "clang-14", "compiler": "clang-15",
"build_type": "", "build_type": "",
"sanitizer": "", "sanitizer": "",
"package_type": "binary", "package_type": "binary",
@ -94,7 +94,7 @@ CI_CONFIG = {
"with_coverage": False, "with_coverage": False,
}, },
"binary_tidy": { "binary_tidy": {
"compiler": "clang-14", "compiler": "clang-15",
"build_type": "debug", "build_type": "debug",
"sanitizer": "", "sanitizer": "",
"package_type": "binary", "package_type": "binary",
@ -104,7 +104,7 @@ CI_CONFIG = {
"with_coverage": False, "with_coverage": False,
}, },
"binary_shared": { "binary_shared": {
"compiler": "clang-14", "compiler": "clang-15",
"build_type": "", "build_type": "",
"sanitizer": "", "sanitizer": "",
"package_type": "binary", "package_type": "binary",
@ -113,7 +113,7 @@ CI_CONFIG = {
"with_coverage": False, "with_coverage": False,
}, },
"binary_darwin": { "binary_darwin": {
"compiler": "clang-14-darwin", "compiler": "clang-15-darwin",
"build_type": "", "build_type": "",
"sanitizer": "", "sanitizer": "",
"package_type": "binary", "package_type": "binary",
@ -123,7 +123,7 @@ CI_CONFIG = {
"with_coverage": False, "with_coverage": False,
}, },
"binary_aarch64": { "binary_aarch64": {
"compiler": "clang-14-aarch64", "compiler": "clang-15-aarch64",
"build_type": "", "build_type": "",
"sanitizer": "", "sanitizer": "",
"package_type": "binary", "package_type": "binary",
@ -132,7 +132,7 @@ CI_CONFIG = {
"with_coverage": False, "with_coverage": False,
}, },
"binary_freebsd": { "binary_freebsd": {
"compiler": "clang-14-freebsd", "compiler": "clang-15-freebsd",
"build_type": "", "build_type": "",
"sanitizer": "", "sanitizer": "",
"package_type": "binary", "package_type": "binary",
@ -142,7 +142,7 @@ CI_CONFIG = {
"with_coverage": False, "with_coverage": False,
}, },
"binary_darwin_aarch64": { "binary_darwin_aarch64": {
"compiler": "clang-14-darwin-aarch64", "compiler": "clang-15-darwin-aarch64",
"build_type": "", "build_type": "",
"sanitizer": "", "sanitizer": "",
"package_type": "binary", "package_type": "binary",
@ -152,7 +152,7 @@ CI_CONFIG = {
"with_coverage": False, "with_coverage": False,
}, },
"binary_ppc64le": { "binary_ppc64le": {
"compiler": "clang-14-ppc64le", "compiler": "clang-15-ppc64le",
"build_type": "", "build_type": "",
"sanitizer": "", "sanitizer": "",
"package_type": "binary", "package_type": "binary",
@ -162,7 +162,7 @@ CI_CONFIG = {
"with_coverage": False, "with_coverage": False,
}, },
"binary_amd64sse2": { "binary_amd64sse2": {
"compiler": "clang-14-amd64sse2", "compiler": "clang-15-amd64sse2",
"build_type": "", "build_type": "",
"sanitizer": "", "sanitizer": "",
"package_type": "binary", "package_type": "binary",

View File

@ -125,7 +125,7 @@ if __name__ == "__main__":
logging.info("Will try to fetch cache for our build") logging.info("Will try to fetch cache for our build")
ccache_for_pr = get_ccache_if_not_exists( ccache_for_pr = get_ccache_if_not_exists(
cache_path, s3_helper, pr_info.number, temp_path cache_path, s3_helper, pr_info.number, temp_path, pr_info.release_pr
) )
upload_master_ccache = ccache_for_pr in (-1, 0) upload_master_ccache = ccache_for_pr in (-1, 0)

View File

@ -86,7 +86,7 @@ class PRInfo:
self.changed_files = set() # type: Set[str] self.changed_files = set() # type: Set[str]
self.body = "" self.body = ""
self.diff_urls = [] self.diff_urls = []
self.release_pr = "" self.release_pr = 0
ref = github_event.get("ref", "refs/head/master") ref = github_event.get("ref", "refs/head/master")
if ref and ref.startswith("refs/heads/"): if ref and ref.startswith("refs/heads/"):
ref = ref[11:] ref = ref[11:]

View File

@ -38,6 +38,20 @@
<path>/jbod1/</path> <path>/jbod1/</path>
<max_size>1000000000</max_size> <max_size>1000000000</max_size>
</s3_with_cache_and_jbod> </s3_with_cache_and_jbod>
<s3_r>
<type>s3</type>
<endpoint>http://minio1:9001/root/data/</endpoint>
<access_key_id>minio</access_key_id>
<secret_access_key>minio123</secret_access_key>
<s3_max_single_part_upload_size>33554432</s3_max_single_part_upload_size>
</s3_r>
<s3_cache_r>
<type>cache</type>
<disk>s3_r</disk>
<path>/s3_cache_r/</path>
<max_size>1000000000</max_size>
<do_not_evict_index_and_mark_files>1</do_not_evict_index_and_mark_files>
</s3_cache_r>
</disks> </disks>
<policies> <policies>
<s3> <s3>
@ -78,6 +92,13 @@
</main> </main>
</volumes> </volumes>
</s3_with_cache_and_jbod> </s3_with_cache_and_jbod>
<s3_cache_r>
<volumes>
<main>
<disk>s3_cache_r</disk>
</main>
</volumes>
</s3_cache_r>
</policies> </policies>
</storage_configuration> </storage_configuration>

View File

@ -6,7 +6,6 @@ import pytest
from helpers.cluster import ClickHouseCluster from helpers.cluster import ClickHouseCluster
from helpers.utility import generate_values, replace_config, SafeThread from helpers.utility import generate_values, replace_config, SafeThread
SCRIPT_DIR = os.path.dirname(os.path.realpath(__file__)) SCRIPT_DIR = os.path.dirname(os.path.realpath(__file__))
@ -36,6 +35,7 @@ def cluster():
"/jbod1:size=2M", "/jbod1:size=2M",
], ],
) )
logging.info("Starting cluster...") logging.info("Starting cluster...")
cluster.start() cluster.start()
logging.info("Cluster started") logging.info("Cluster started")
@ -742,3 +742,79 @@ def test_store_cleanup_disk_s3(cluster, node_name):
"CREATE TABLE s3_test UUID '00000000-1000-4000-8000-000000000001' (n UInt64) Engine=MergeTree() ORDER BY n SETTINGS storage_policy='s3';" "CREATE TABLE s3_test UUID '00000000-1000-4000-8000-000000000001' (n UInt64) Engine=MergeTree() ORDER BY n SETTINGS storage_policy='s3';"
) )
node.query("INSERT INTO s3_test SELECT 1") node.query("INSERT INTO s3_test SELECT 1")
@pytest.mark.parametrize("node_name", ["node"])
def test_cache_setting_compatibility(cluster, node_name):
node = cluster.instances[node_name]
node.query("DROP TABLE IF EXISTS s3_test NO DELAY")
node.query(
"CREATE TABLE s3_test (key UInt32, value String) Engine=MergeTree() ORDER BY key SETTINGS storage_policy='s3_cache_r';"
)
node.query(
"INSERT INTO s3_test SELECT * FROM generateRandom('key UInt32, value String') LIMIT 500"
)
result = node.query("SYSTEM DROP FILESYSTEM CACHE")
result = node.query(
"SELECT count() FROM system.filesystem_cache WHERE cache_path LIKE '%persistent'"
)
assert int(result) == 0
node.query("SELECT * FROM s3_test")
result = node.query(
"SELECT count() FROM system.filesystem_cache WHERE cache_path LIKE '%persistent'"
)
assert int(result) > 0
config_path = os.path.join(
SCRIPT_DIR,
f"./{cluster.instances_dir_name}/node/configs/config.d/storage_conf.xml",
)
replace_config(
config_path,
"<do_not_evict_index_and_mark_files>1</do_not_evict_index_and_mark_files>",
"<do_not_evict_index_and_mark_files>0</do_not_evict_index_and_mark_files>",
)
result = node.query("DESCRIBE CACHE 's3_cache_r'")
assert result.strip().endswith("1")
node.restart_clickhouse()
result = node.query("DESCRIBE CACHE 's3_cache_r'")
assert result.strip().endswith("0")
result = node.query(
"SELECT count() FROM system.filesystem_cache WHERE cache_path LIKE '%persistent'"
)
assert int(result) > 0
node.query("SELECT * FROM s3_test FORMAT Null")
assert not node.contains_in_log("No such file or directory: Cache info:")
replace_config(
config_path,
"<do_not_evict_index_and_mark_files>0</do_not_evict_index_and_mark_files>",
"<do_not_evict_index_and_mark_files>1</do_not_evict_index_and_mark_files>",
)
result = node.query(
"SELECT count() FROM system.filesystem_cache WHERE cache_path LIKE '%persistent'"
)
assert int(result) > 0
node.restart_clickhouse()
result = node.query("DESCRIBE CACHE 's3_cache_r'")
assert result.strip().endswith("1")
node.query("SELECT * FROM s3_test FORMAT Null")
assert not node.contains_in_log("No such file or directory: Cache info:")

View File

@ -36,8 +36,10 @@ def started_node():
def test_send_segfault(started_node): def test_send_segfault(started_node):
# NOTE: another option is to increase waiting time.
if ( if (
started_node.is_built_with_thread_sanitizer() started_node.is_built_with_thread_sanitizer()
or started_node.is_built_with_address_sanitizer()
or started_node.is_built_with_memory_sanitizer() or started_node.is_built_with_memory_sanitizer()
): ):
pytest.skip("doesn't fit in timeouts for stacktrace generation") pytest.skip("doesn't fit in timeouts for stacktrace generation")

View File

@ -21,7 +21,8 @@
<heart_beat_interval_ms>1000</heart_beat_interval_ms> <heart_beat_interval_ms>1000</heart_beat_interval_ms>
<election_timeout_lower_bound_ms>2000</election_timeout_lower_bound_ms> <election_timeout_lower_bound_ms>2000</election_timeout_lower_bound_ms>
<election_timeout_upper_bound_ms>4000</election_timeout_upper_bound_ms> <election_timeout_upper_bound_ms>4000</election_timeout_upper_bound_ms>
<quorum_reads>{quorum_reads}</quorum_reads> <quorum_reads>0</quorum_reads>
<read_mode>fastlinear</read_mode>
<snapshot_distance>{snapshot_distance}</snapshot_distance> <snapshot_distance>{snapshot_distance}</snapshot_distance>
<stale_log_gap>{stale_log_gap}</stale_log_gap> <stale_log_gap>{stale_log_gap}</stale_log_gap>
<reserved_log_items>{reserved_log_items}</reserved_log_items> <reserved_log_items>{reserved_log_items}</reserved_log_items>

View File

@ -27,7 +27,12 @@
(invoke! [this test op] (invoke! [this test op]
(case (:f op) (case (:f op)
:read (exec-with-retries 30 (fn [] :read (try
(assoc op
:type :ok
:value (count (zk-list conn root-path)))
(catch Exception _ (assoc op :type :info, :error :connect-error)))
:final-read (exec-with-retries 30 (fn []
(assoc op (assoc op
:type :ok :type :ok
:value (count (zk-list conn root-path))))) :value (count (zk-list conn root-path)))))
@ -49,7 +54,5 @@
:checker (checker/compose :checker (checker/compose
{:counter (checker/counter) {:counter (checker/counter)
:perf (checker/perf)}) :perf (checker/perf)})
:generator (->> (range) :generator (gen/mix [r add])
(map (fn [x] :final-generator (gen/once {:type :invoke, :f :final-read, :value nil})})
(->> (gen/mix [r add])))))
:final-generator (gen/once {:type :invoke, :f :read, :value nil})})

View File

@ -98,7 +98,6 @@
#"\{srv2\}" (get nodes 1) #"\{srv2\}" (get nodes 1)
#"\{srv3\}" (get nodes 2) #"\{srv3\}" (get nodes 2)
#"\{id\}" (str (inc (.indexOf nodes node))) #"\{id\}" (str (inc (.indexOf nodes node)))
#"\{quorum_reads\}" (str (boolean (:quorum test)))
#"\{snapshot_distance\}" (str (:snapshot-distance test)) #"\{snapshot_distance\}" (str (:snapshot-distance test))
#"\{stale_log_gap\}" (str (:stale-log-gap test)) #"\{stale_log_gap\}" (str (:stale-log-gap test))
#"\{reserved_log_items\}" (str (:reserved-log-items test))}] #"\{reserved_log_items\}" (str (:reserved-log-items test))}]

View File

@ -103,7 +103,7 @@
current-nemesis (get custom-nemesis/custom-nemesises (:nemesis opts))] current-nemesis (get custom-nemesis/custom-nemesises (:nemesis opts))]
(merge tests/noop-test (merge tests/noop-test
opts opts
{:name (str "clickhouse-keeper-quorum=" quorum "-" (name (:workload opts)) "-" (name (:nemesis opts))) {:name (str "clickhouse-keeper-" (name (:workload opts)) "-" (name (:nemesis opts)))
:os ubuntu/os :os ubuntu/os
:db (get-db opts) :db (get-db opts)
:pure-generators true :pure-generators true

View File

@ -20,7 +20,8 @@
(assoc this :conn (zk-connect node 9181 30000))) (assoc this :conn (zk-connect node 9181 30000)))
(setup! [this test] (setup! [this test]
(zk-create-range conn 300)) ; 300 nodes to be sure (exec-with-retries 30 (fn []
(zk-create-range conn 300))))
(invoke! [_ test op] (invoke! [_ test op]
(let [[k v] (:value op) (let [[k v] (:value op)

View File

@ -45,7 +45,7 @@
(defn zk-connect (defn zk-connect
[host port timeout] [host port timeout]
(exec-with-retries 30 (fn [] (zk/connect (str host ":" port) :timeout-msec timeout)))) (zk/connect (str host ":" port) :timeout-msec timeout))
(defn zk-create-range (defn zk-create-range
[conn n] [conn n]

View File

@ -32,5 +32,21 @@
<query>SELECT avgWeighted(num_u, num) FROM perf_avg FORMAT Null</query> <query>SELECT avgWeighted(num_u, num) FROM perf_avg FORMAT Null</query>
<query>SELECT avgWeighted(num_u, num_u) FROM perf_avg FORMAT Null</query> <query>SELECT avgWeighted(num_u, num_u) FROM perf_avg FORMAT Null</query>
<query>SELECT avgWeighted(num_f, num_f) FROM perf_avg FORMAT Null</query>
<query>SELECT avgWeighted(toNullable(num_f), num_f) FROM perf_avg FORMAT Null</query>
<query>SELECT avgWeighted(num_f, toNullable(num_f)) FROM perf_avg FORMAT Null</query>
<query>SELECT avgWeighted(toNullable(num_f), toNullable(num_f)) FROM perf_avg FORMAT Null</query>
<query>SELECT avgWeightedIf(num_f, num_f, num % 10) FROM perf_avg FORMAT Null</query>
<query>SELECT avgWeightedIf(toNullable(num_f), num_f, num % 10) FROM perf_avg FORMAT Null</query>
<query>SELECT avgWeightedIf(num_f, toNullable(num_f), num % 10) FROM perf_avg FORMAT Null</query>
<query>SELECT avgWeightedIf(toNullable(num_f), toNullable(num_f), num % 10) FROM perf_avg FORMAT Null</query>
<query>SELECT avgWeightedIf(num_f, num_f, toNullable(num) % 10) FROM perf_avg FORMAT Null</query>
<query>SELECT avgWeightedIf(toNullable(num_f), num_f, toNullable(num) % 10) FROM perf_avg FORMAT Null</query>
<query>SELECT avgWeightedIf(num_f, toNullable(num_f), toNullable(num) % 10) FROM perf_avg FORMAT Null</query>
<query>SELECT avgWeightedIf(toNullable(num_f), toNullable(num_f), toNullable(num) % 10) FROM perf_avg FORMAT Null</query>
<drop_query>DROP TABLE IF EXISTS perf_avg</drop_query> <drop_query>DROP TABLE IF EXISTS perf_avg</drop_query>
</test> </test>

View File

@ -0,0 +1,58 @@
<test>
<create_query>
create table matview_1
(
a String,
b_count AggregateFunction(uniq, UInt64)
) Engine=MergeTree partition by tuple()
ORDER by tuple()
SETTINGS index_granularity = 1024;
</create_query>
<create_query>
create table matview_10000
(
a String,
b_count AggregateFunction(uniq, String)
) Engine=MergeTree partition by tuple()
ORDER by tuple()
SETTINGS index_granularity = 1024;
</create_query>
<drop_query>DROP TABLE IF EXISTS matview_1</drop_query>
<drop_query>DROP TABLE IF EXISTS matview_10000</drop_query>
<fill_query>
INSERT INTO matview_10000
SELECT a, uniqState(b) b_count
FROM
(
SELECT toString(rand() % 1000) a, toString(number % 10000) b
FROM numbers_mt(20000000)
)
GROUP BY a
SETTINGS max_insert_threads=8;
</fill_query>
<fill_query>OPTIMIZE TABLE matview_10000 FINAL</fill_query>
<fill_query>
INSERT INTO matview_1
SELECT '1', uniqState(number) b_count
FROM
(
SELECT *
FROM numbers_mt(2000000)
)
GROUP BY number
SETTINGS max_insert_threads=8;
</fill_query>
<fill_query>OPTIMIZE TABLE matview_1 FINAL</fill_query>
<!-- Test with ~10000 elements per state -->
<query>select a, uniqMerge(b_count) as b_count from matview_10000 prewhere a='55' group by a FORMAT Null SETTINGS max_threads=1;</query>
<query>select uniqMerge(b_count) as b_count from matview_10000 FORMAT Null SETTINGS max_threads=1;</query>
<!-- Test with ~1 elements per state -->
<query>select uniqMerge(b_count) as b_count FROM matview_1 FORMAT Null SETTINGS max_threads=1;</query>
</test>

View File

@ -0,0 +1,2 @@
2020-01-01 01:00:00 1
2020-01-01 01:00:00 999

View File

@ -0,0 +1,7 @@
SELECT
toStartOfHour(c1) AS _c1,
c2
FROM values((toDateTime('2020-01-01 01:01:01'), 999), (toDateTime('2020-01-01 01:01:59'), 1))
ORDER BY
_c1 ASC,
c2 ASC

View File

@ -17,7 +17,7 @@ INSERT INTO test_table(timestamp, value) SELECT toDateTime('2020-01-01 12:00:00'
INSERT INTO test_table(timestamp, value) SELECT toDateTime('2020-01-02 12:00:00'), 1 FROM numbers(10); INSERT INTO test_table(timestamp, value) SELECT toDateTime('2020-01-02 12:00:00'), 1 FROM numbers(10);
INSERT INTO test_table(timestamp, value) SELECT toDateTime('2020-01-03 12:00:00'), 1 FROM numbers(10); INSERT INTO test_table(timestamp, value) SELECT toDateTime('2020-01-03 12:00:00'), 1 FROM numbers(10);
set optimize_respect_aliases = 1; set optimize_respect_aliases = 1, optimize_monotonous_functions_in_order_by = 1;
SELECT 'test-partition-prune'; SELECT 'test-partition-prune';
SELECT COUNT() = 10 FROM test_table WHERE day = '2020-01-01' SETTINGS max_rows_to_read = 10; SELECT COUNT() = 10 FROM test_table WHERE day = '2020-01-01' SETTINGS max_rows_to_read = 10;

View File

@ -56,7 +56,13 @@ ENGINE = MergeTree ORDER BY (toStartOfDay(dt), d);
INSERT INTO t_read_in_order SELECT toDateTime('2020-10-10 00:00:00') + number, 1 / (number % 100 + 1), number FROM numbers(1000); INSERT INTO t_read_in_order SELECT toDateTime('2020-10-10 00:00:00') + number, 1 / (number % 100 + 1), number FROM numbers(1000);
EXPLAIN PIPELINE SELECT toStartOfDay(dt) as date, d FROM t_read_in_order ORDER BY date, round(d) LIMIT 5; EXPLAIN PIPELINE SELECT toStartOfDay(dt) as date, d FROM t_read_in_order ORDER BY date, round(d) LIMIT 5;
SELECT toStartOfDay(dt) as date, d FROM t_read_in_order ORDER BY date, round(d) LIMIT 5; SELECT * from (
SELECT toStartOfDay(dt) as date, d FROM t_read_in_order ORDER BY date, round(d) LIMIT 50000000000
-- subquery with limit 50000000 to stabilize a test result and prevent order by d pushdown
) order by d limit 5;
EXPLAIN PIPELINE SELECT toStartOfDay(dt) as date, d FROM t_read_in_order ORDER BY date, round(d) LIMIT 5; EXPLAIN PIPELINE SELECT toStartOfDay(dt) as date, d FROM t_read_in_order ORDER BY date, round(d) LIMIT 5;
SELECT toStartOfDay(dt) as date, d FROM t_read_in_order WHERE date = '2020-10-10' ORDER BY round(d) LIMIT 5; SELECT * from (
SELECT toStartOfDay(dt) as date, d FROM t_read_in_order WHERE date = '2020-10-10' ORDER BY round(d) LIMIT 50000000000
-- subquery with limit 50000000 to stabilize a test result and prevent order by d pushdown
) order by d limit 5;

View File

@ -0,0 +1,65 @@
-- { echo }
SELECT avgWeighted(number, number) t, toTypeName(t) FROM numbers(1);
nan Float64
SELECT avgWeighted(number, number + 1) t, toTypeName(t) FROM numbers(0);
nan Float64
SELECT avgWeighted(toNullable(number), number) t, toTypeName(t) FROM numbers(1);
nan Nullable(Float64)
SELECT avgWeighted(if(number < 10000, NULL, number), number) t, toTypeName(t) FROM numbers(100);
\N Nullable(Float64)
SELECT avgWeighted(if(number < 50, NULL, number), number) t, toTypeName(t) FROM numbers(100);
77.29530201342281 Nullable(Float64)
SELECT avgWeighted(number, if(number < 10000, NULL, number)) t, toTypeName(t) FROM numbers(100);
\N Nullable(Float64)
SELECT avgWeighted(number, if(number < 50, NULL, number)) t, toTypeName(t) FROM numbers(100);
77.29530201342281 Nullable(Float64)
SELECT avgWeighted(toNullable(number), if(number < 10000, NULL, number)) t, toTypeName(t) FROM numbers(100);
\N Nullable(Float64)
SELECT avgWeighted(toNullable(number), if(number < 50, NULL, number)) t, toTypeName(t) FROM numbers(100);
77.29530201342281 Nullable(Float64)
SELECT avgWeighted(if(number < 10000, NULL, number), toNullable(number)) t, toTypeName(t) FROM numbers(100);
\N Nullable(Float64)
SELECT avgWeighted(if(number < 50, NULL, number), toNullable(number)) t, toTypeName(t) FROM numbers(100);
77.29530201342281 Nullable(Float64)
SELECT avgWeighted(if(number < 10000, NULL, number), if(number < 10000, NULL, number)) t, toTypeName(t) FROM numbers(100);
\N Nullable(Float64)
SELECT avgWeighted(if(number < 50, NULL, number), if(number < 10000, NULL, number)) t, toTypeName(t) FROM numbers(100);
\N Nullable(Float64)
SELECT avgWeighted(if(number < 10000, NULL, number), if(number < 50, NULL, number)) t, toTypeName(t) FROM numbers(100);
\N Nullable(Float64)
SELECT avgWeighted(if(number < 50, NULL, number), if(number < 50, NULL, number)) t, toTypeName(t) FROM numbers(100);
77.29530201342281 Nullable(Float64)
SELECT avgWeightedIf(number, number, number % 10) t, toTypeName(t) FROM numbers(100);
66.63333333333334 Float64
SELECT avgWeightedIf(number, number, toNullable(number % 10)) t, toTypeName(t) FROM numbers(100);
66.63333333333334 Float64
SELECT avgWeightedIf(number, number, if(number < 10000, NULL, number % 10)) t, toTypeName(t) FROM numbers(100);
nan Float64
SELECT avgWeightedIf(number, number, if(number < 50, NULL, number % 10)) t, toTypeName(t) FROM numbers(100);
77.75555555555556 Float64
SELECT avgWeightedIf(number, number, if(number < 0, NULL, number % 10)) t, toTypeName(t) FROM numbers(100);
66.63333333333334 Float64
SELECT avgWeightedIf(if(number < 10000, NULL, number), if(number < 10000, NULL, number), if(number < 10000, NULL, number % 10)) t, toTypeName(t) FROM numbers(100);
\N Nullable(Float64)
SELECT avgWeightedIf(if(number < 50, NULL, number), if(number < 10000, NULL, number), if(number < 10000, NULL, number % 10)) t, toTypeName(t) FROM numbers(100);
\N Nullable(Float64)
SELECT avgWeightedIf(if(number < 10000, NULL, number), if(number < 50, NULL, number), if(number < 10000, NULL, number % 10)) t, toTypeName(t) FROM numbers(100);
\N Nullable(Float64)
SELECT avgWeightedIf(if(number < 50, NULL, number), if(number < 50, NULL, number), if(number < 10000, NULL, number % 10)) t, toTypeName(t) FROM numbers(100);
\N Nullable(Float64)
SELECT avgWeightedIf(if(number < 10000, NULL, number), if(number < 10000, NULL, number), if(number < 50, NULL, number % 10)) t, toTypeName(t) FROM numbers(100);
\N Nullable(Float64)
SELECT avgWeightedIf(if(number < 50, NULL, number), if(number < 10000, NULL, number), if(number < 50, NULL, number % 10)) t, toTypeName(t) FROM numbers(100);
\N Nullable(Float64)
SELECT avgWeightedIf(if(number < 10000, NULL, number), if(number < 50, NULL, number), if(number < 50, NULL, number % 10)) t, toTypeName(t) FROM numbers(100);
\N Nullable(Float64)
SELECT avgWeightedIf(if(number < 50, NULL, number), if(number < 50, NULL, number), if(number < 50, NULL, number % 10)) t, toTypeName(t) FROM numbers(100);
77.75555555555556 Nullable(Float64)
SELECT avgWeightedIf(if(number < 10000, NULL, number), if(number < 10000, NULL, number), if(number < 0, NULL, number % 10)) t, toTypeName(t) FROM numbers(100);
\N Nullable(Float64)
SELECT avgWeightedIf(if(number < 50, NULL, number), if(number < 10000, NULL, number), if(number < 0, NULL, number % 10)) t, toTypeName(t) FROM numbers(100);
\N Nullable(Float64)
SELECT avgWeightedIf(if(number < 10000, NULL, number), if(number < 50, NULL, number), if(number < 0, NULL, number % 10)) t, toTypeName(t) FROM numbers(100);
\N Nullable(Float64)
SELECT avgWeightedIf(if(number < 50, NULL, number), if(number < 50, NULL, number), if(number < 0, NULL, number % 10)) t, toTypeName(t) FROM numbers(100);
77.75555555555556 Nullable(Float64)

View File

@ -0,0 +1,41 @@
-- { echo }
SELECT avgWeighted(number, number) t, toTypeName(t) FROM numbers(1);
SELECT avgWeighted(number, number + 1) t, toTypeName(t) FROM numbers(0);
SELECT avgWeighted(toNullable(number), number) t, toTypeName(t) FROM numbers(1);
SELECT avgWeighted(if(number < 10000, NULL, number), number) t, toTypeName(t) FROM numbers(100);
SELECT avgWeighted(if(number < 50, NULL, number), number) t, toTypeName(t) FROM numbers(100);
SELECT avgWeighted(number, if(number < 10000, NULL, number)) t, toTypeName(t) FROM numbers(100);
SELECT avgWeighted(number, if(number < 50, NULL, number)) t, toTypeName(t) FROM numbers(100);
SELECT avgWeighted(toNullable(number), if(number < 10000, NULL, number)) t, toTypeName(t) FROM numbers(100);
SELECT avgWeighted(toNullable(number), if(number < 50, NULL, number)) t, toTypeName(t) FROM numbers(100);
SELECT avgWeighted(if(number < 10000, NULL, number), toNullable(number)) t, toTypeName(t) FROM numbers(100);
SELECT avgWeighted(if(number < 50, NULL, number), toNullable(number)) t, toTypeName(t) FROM numbers(100);
SELECT avgWeighted(if(number < 10000, NULL, number), if(number < 10000, NULL, number)) t, toTypeName(t) FROM numbers(100);
SELECT avgWeighted(if(number < 50, NULL, number), if(number < 10000, NULL, number)) t, toTypeName(t) FROM numbers(100);
SELECT avgWeighted(if(number < 10000, NULL, number), if(number < 50, NULL, number)) t, toTypeName(t) FROM numbers(100);
SELECT avgWeighted(if(number < 50, NULL, number), if(number < 50, NULL, number)) t, toTypeName(t) FROM numbers(100);
SELECT avgWeightedIf(number, number, number % 10) t, toTypeName(t) FROM numbers(100);
SELECT avgWeightedIf(number, number, toNullable(number % 10)) t, toTypeName(t) FROM numbers(100);
SELECT avgWeightedIf(number, number, if(number < 10000, NULL, number % 10)) t, toTypeName(t) FROM numbers(100);
SELECT avgWeightedIf(number, number, if(number < 50, NULL, number % 10)) t, toTypeName(t) FROM numbers(100);
SELECT avgWeightedIf(number, number, if(number < 0, NULL, number % 10)) t, toTypeName(t) FROM numbers(100);
SELECT avgWeightedIf(if(number < 10000, NULL, number), if(number < 10000, NULL, number), if(number < 10000, NULL, number % 10)) t, toTypeName(t) FROM numbers(100);
SELECT avgWeightedIf(if(number < 50, NULL, number), if(number < 10000, NULL, number), if(number < 10000, NULL, number % 10)) t, toTypeName(t) FROM numbers(100);
SELECT avgWeightedIf(if(number < 10000, NULL, number), if(number < 50, NULL, number), if(number < 10000, NULL, number % 10)) t, toTypeName(t) FROM numbers(100);
SELECT avgWeightedIf(if(number < 50, NULL, number), if(number < 50, NULL, number), if(number < 10000, NULL, number % 10)) t, toTypeName(t) FROM numbers(100);
SELECT avgWeightedIf(if(number < 10000, NULL, number), if(number < 10000, NULL, number), if(number < 50, NULL, number % 10)) t, toTypeName(t) FROM numbers(100);
SELECT avgWeightedIf(if(number < 50, NULL, number), if(number < 10000, NULL, number), if(number < 50, NULL, number % 10)) t, toTypeName(t) FROM numbers(100);
SELECT avgWeightedIf(if(number < 10000, NULL, number), if(number < 50, NULL, number), if(number < 50, NULL, number % 10)) t, toTypeName(t) FROM numbers(100);
SELECT avgWeightedIf(if(number < 50, NULL, number), if(number < 50, NULL, number), if(number < 50, NULL, number % 10)) t, toTypeName(t) FROM numbers(100);
SELECT avgWeightedIf(if(number < 10000, NULL, number), if(number < 10000, NULL, number), if(number < 0, NULL, number % 10)) t, toTypeName(t) FROM numbers(100);
SELECT avgWeightedIf(if(number < 50, NULL, number), if(number < 10000, NULL, number), if(number < 0, NULL, number % 10)) t, toTypeName(t) FROM numbers(100);
SELECT avgWeightedIf(if(number < 10000, NULL, number), if(number < 50, NULL, number), if(number < 0, NULL, number % 10)) t, toTypeName(t) FROM numbers(100);
SELECT avgWeightedIf(if(number < 50, NULL, number), if(number < 50, NULL, number), if(number < 0, NULL, number % 10)) t, toTypeName(t) FROM numbers(100);

View File

@ -0,0 +1,4 @@
-- Tags: no-parallel, no-fasttest
insert into function file(02422_data.msgpack) select toUUID('f4cdd80d-5d15-4bdc-9527-adcca635ec1f') as uuid settings output_format_msgpack_uuid_representation='ext';
select * from file(02422_data.msgpack, auto, 'x Int32'); -- {serverError ILLEGAL_COLUMN}