Merge branch 'master' into client-format-error-fix

This commit is contained in:
mergify[bot] 2022-04-07 14:15:44 +00:00 committed by GitHub
commit f84dfe5073
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
355 changed files with 5394 additions and 1707 deletions

View File

@ -144,6 +144,7 @@ Checks: '-*,
clang-analyzer-cplusplus.SelfAssignment,
clang-analyzer-deadcode.DeadStores,
clang-analyzer-cplusplus.Move,
clang-analyzer-optin.cplusplus.UninitializedObject,
clang-analyzer-optin.cplusplus.VirtualCall,
clang-analyzer-security.insecureAPI.UncheckedReturn,
clang-analyzer-security.insecureAPI.bcmp,
@ -163,6 +164,8 @@ Checks: '-*,
clang-analyzer-unix.cstring.NullArg,
boost-use-to-string,
alpha.security.cert.env.InvalidPtr,
'
WarningsAsErrors: '*'

View File

@ -1,4 +1,4 @@
Changelog category (leave one):
### Changelog category (leave one):
- New Feature
- Improvement
- Bug Fix (user-visible misbehaviour in official stable or prestable release)
@ -9,7 +9,7 @@ Changelog category (leave one):
- Not for changelog (changelog entry is not required)
Changelog entry (a user-readable short description of the changes that goes to CHANGELOG.md):
### Changelog entry (a user-readable short description of the changes that goes to CHANGELOG.md):
...

View File

@ -341,10 +341,15 @@ jobs:
steps:
- name: Set envs
run: |
DEPENDENCIES=$(cat << 'EOF' | jq '. | length'
${{ toJSON(needs) }}
EOF
)
echo "DEPENDENCIES=$DEPENDENCIES" >> "$GITHUB_ENV"
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/report_check
REPORTS_PATH=${{runner.temp}}/reports_dir
CHECK_NAME=ClickHouse build check (actions)
REPORTS_PATH=${{runner.temp}}/reports_dir
TEMP_PATH=${{runner.temp}}/report_check
EOF
- name: Download json reports
uses: actions/download-artifact@v2
@ -360,7 +365,7 @@ jobs:
sudo rm -fr "$TEMP_PATH"
mkdir -p "$TEMP_PATH"
cd "$GITHUB_WORKSPACE/tests/ci"
python3 build_report_check.py "$CHECK_NAME"
python3 build_report_check.py "$CHECK_NAME" "$DEPENDENCIES"
- name: Cleanup
if: always()
run: |

View File

@ -992,10 +992,16 @@ jobs:
steps:
- name: Set envs
run: |
DEPENDENCIES=$(cat << 'EOF' | jq '. | length'
${{ toJSON(needs) }}
EOF
)
echo "DEPENDENCIES=$DEPENDENCIES" >> "$GITHUB_ENV"
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/report_check
REPORTS_PATH=${{runner.temp}}/reports_dir
CHECK_NAME=ClickHouse build check (actions)
REPORTS_PATH=${{runner.temp}}/reports_dir
REPORTS_PATH=${{runner.temp}}/reports_dir
TEMP_PATH=${{runner.temp}}/report_check
EOF
- name: Download json reports
uses: actions/download-artifact@v2
@ -1011,7 +1017,7 @@ jobs:
sudo rm -fr "$TEMP_PATH"
mkdir -p "$TEMP_PATH"
cd "$GITHUB_WORKSPACE/tests/ci"
python3 build_report_check.py "$CHECK_NAME"
python3 build_report_check.py "$CHECK_NAME" "$DEPENDENCIES"
- name: Cleanup
if: always()
run: |

View File

@ -72,3 +72,52 @@ jobs:
with:
name: changed_images
path: ${{ runner.temp }}/changed_images.json
BuilderCoverity:
needs: DockerHubPush
runs-on: [self-hosted, builder]
steps:
- name: Set envs
run: |
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/build_check
IMAGES_PATH=${{runner.temp}}/images_path
REPO_COPY=${{runner.temp}}/build_check/ClickHouse
CACHES_PATH=${{runner.temp}}/../ccaches
CHECK_NAME=ClickHouse build check (actions)
BUILD_NAME=coverity
EOF
- name: Download changed images
uses: actions/download-artifact@v2
with:
name: changed_images
path: ${{ env.IMAGES_PATH }}
- name: Clear repository
run: |
sudo rm -fr "$GITHUB_WORKSPACE" && mkdir "$GITHUB_WORKSPACE"
- name: Check out repository code
id: coverity-checkout
uses: actions/checkout@v2
with:
submodules: 'true'
fetch-depth: 0 # otherwise we will have no info about contributors
- name: Build
run: |
sudo rm -fr "$TEMP_PATH"
mkdir -p "$TEMP_PATH"
cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
cd "$REPO_COPY/tests/ci" && python3 build_check.py "$CHECK_NAME" "$BUILD_NAME" "${{ secrets.COV_TOKEN }}"
- name: Upload Coverity Analysis
if: ${{ success() || failure() }}
run: |
curl --form token='${{ secrets.COV_TOKEN }}' \
--form email='${{ secrets.ROBOT_CLICKHOUSE_EMAIL }}' \
--form file="@$TEMP_PATH/$BUILD_NAME/clickhouse-scan.tgz" \
--form version="${GITHUB_REF#refs/heads/}-${GITHUB_SHA::6}" \
--form description="Nighly Scan: $(date +'%Y-%m-%dT%H:%M:%S')" \
https://scan.coverity.com/builds?project=ClickHouse%2FClickHouse
- name: Cleanup
if: always()
run: |
docker kill "$(docker ps -q)" ||:
docker rm -f "$(docker ps -a -q)" ||:
sudo rm -fr "$TEMP_PATH" "$CACHES_PATH"

View File

@ -1044,10 +1044,16 @@ jobs:
steps:
- name: Set envs
run: |
DEPENDENCIES=$(cat << 'EOF' | jq '. | length'
${{ toJSON(needs) }}
EOF
)
echo "DEPENDENCIES=$DEPENDENCIES" >> "$GITHUB_ENV"
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/report_check
REPORTS_PATH=${{runner.temp}}/reports_dir
CHECK_NAME=ClickHouse build check (actions)
REPORTS_PATH=${{runner.temp}}/reports_dir
REPORTS_PATH=${{runner.temp}}/reports_dir
TEMP_PATH=${{runner.temp}}/report_check
EOF
- name: Download json reports
uses: actions/download-artifact@v2
@ -1063,7 +1069,7 @@ jobs:
sudo rm -fr "$TEMP_PATH"
mkdir -p "$TEMP_PATH"
cd "$GITHUB_WORKSPACE/tests/ci"
python3 build_report_check.py "$CHECK_NAME"
python3 build_report_check.py "$CHECK_NAME" "$DEPENDENCIES"
- name: Cleanup
if: always()
run: |

View File

@ -436,10 +436,16 @@ jobs:
steps:
- name: Set envs
run: |
DEPENDENCIES=$(cat << 'EOF' | jq '. | length'
${{ toJSON(needs) }}
EOF
)
echo "DEPENDENCIES=$DEPENDENCIES" >> "$GITHUB_ENV"
cat >> "$GITHUB_ENV" << 'EOF'
TEMP_PATH=${{runner.temp}}/report_check
REPORTS_PATH=${{runner.temp}}/reports_dir
CHECK_NAME=ClickHouse build check (actions)
REPORTS_PATH=${{runner.temp}}/reports_dir
REPORTS_PATH=${{runner.temp}}/reports_dir
TEMP_PATH=${{runner.temp}}/report_check
EOF
- name: Download json reports
uses: actions/download-artifact@v2
@ -455,7 +461,7 @@ jobs:
sudo rm -fr "$TEMP_PATH"
mkdir -p "$TEMP_PATH"
cd "$GITHUB_WORKSPACE/tests/ci"
python3 build_report_check.py "$CHECK_NAME"
python3 build_report_check.py "$CHECK_NAME" "$DEPENDENCIES"
- name: Cleanup
if: always()
run: |

View File

@ -294,14 +294,19 @@ include(cmake/cpu_features.cmake)
# Enable it explicitly.
set (COMPILER_FLAGS "${COMPILER_FLAGS} -fasynchronous-unwind-tables")
# Reproducible builds
# If turned `ON`, remap file source paths in debug info, predefined preprocessor macros and __builtin_FILE().
option(ENABLE_BUILD_PATH_MAPPING "Enable remap file source paths in debug info, predefined preprocessor macros and __builtin_FILE(). It's to generate reproducible builds. See https://reproducible-builds.org/docs/build-path" ON)
# Reproducible builds.
if (CMAKE_BUILD_TYPE_UC STREQUAL "DEBUG")
set (ENABLE_BUILD_PATH_MAPPING_DEFAULT OFF)
else ()
set (ENABLE_BUILD_PATH_MAPPING_DEFAULT ON)
endif ()
option (ENABLE_BUILD_PATH_MAPPING "Enable remapping of file source paths in debug info, predefined preprocessor macros, and __builtin_FILE(). It's used to generate reproducible builds. See https://reproducible-builds.org/docs/build-path" ${ENABLE_BUILD_PATH_MAPPING_DEFAULT})
if (ENABLE_BUILD_PATH_MAPPING)
set (COMPILER_FLAGS "${COMPILER_FLAGS} -ffile-prefix-map=${CMAKE_SOURCE_DIR}=.")
set (CMAKE_ASM_FLAGS "${CMAKE_ASM_FLAGS} -ffile-prefix-map=${CMAKE_SOURCE_DIR}=.")
endif()
endif ()
if (${CMAKE_VERSION} VERSION_LESS "3.12.4")
# CMake < 3.12 doesn't support setting 20 as a C++ standard version.

View File

@ -197,7 +197,6 @@ void Loggers::buildLoggers(Poco::Util::AbstractConfiguration & config, Poco::Log
Poco::AutoPtr<OwnPatternFormatter> pf = new OwnPatternFormatter(color_enabled);
Poco::AutoPtr<DB::OwnFormattingChannel> log = new DB::OwnFormattingChannel(pf, new Poco::ConsoleChannel);
logger.warning("Logging " + console_log_level_string + " to console");
log->setLevel(console_log_level);
split->addChannel(log, "console");
}

2
contrib/arrow vendored

@ -1 +1 @@
Subproject commit 1d9cc51daa4e7e9fc6926320ef73759818bd736e
Subproject commit efdcd015cfdee1b6aa349c9ca227ca12c3d697f5

View File

@ -1,4 +1,4 @@
set (ENABLE_KRB5_DEFAULT 1)
set (ENABLE_KRB5_DEFAULT ${ENABLE_LIBRARIES})
if (NOT CMAKE_SYSTEM_NAME MATCHES "Linux" AND NOT (CMAKE_SYSTEM_NAME MATCHES "Darwin" AND NOT CMAKE_CROSSCOMPILING))
message (WARNING "krb5 disabled in non-Linux and non-native-Darwin environments")
set (ENABLE_KRB5_DEFAULT 0)
@ -16,6 +16,7 @@ if(NOT AWK_PROGRAM)
endif()
set(KRB5_SOURCE_DIR "${ClickHouse_SOURCE_DIR}/contrib/krb5/src")
set(KRB5_ET_BIN_DIR "${CMAKE_CURRENT_BINARY_DIR}/include_private")
set(ALL_SRCS
"${KRB5_SOURCE_DIR}/util/et/et_name.c"
@ -90,7 +91,6 @@ set(ALL_SRCS
"${KRB5_SOURCE_DIR}/lib/gssapi/krb5/get_tkt_flags.c"
"${KRB5_SOURCE_DIR}/lib/gssapi/krb5/set_allowable_enctypes.c"
"${KRB5_SOURCE_DIR}/lib/gssapi/krb5/k5sealiov.c"
"${KRB5_SOURCE_DIR}/lib/gssapi/krb5/gssapi_err_krb5.c"
"${KRB5_SOURCE_DIR}/lib/gssapi/krb5/canon_name.c"
"${KRB5_SOURCE_DIR}/lib/gssapi/krb5/inq_cred.c"
"${KRB5_SOURCE_DIR}/lib/gssapi/krb5/export_sec_context.c"
@ -143,11 +143,12 @@ set(ALL_SRCS
"${KRB5_SOURCE_DIR}/lib/gssapi/generic/util_buffer_set.c"
"${KRB5_SOURCE_DIR}/lib/gssapi/generic/util_set.c"
"${KRB5_SOURCE_DIR}/lib/gssapi/generic/util_token.c"
"${KRB5_SOURCE_DIR}/lib/gssapi/generic/gssapi_err_generic.c"
"${KRB5_SOURCE_DIR}/lib/gssapi/generic/disp_major_status.c"
"${KRB5_SOURCE_DIR}/lib/gssapi/generic/util_seqstate.c"
"${KRB5_SOURCE_DIR}/lib/gssapi/generic/util_errmap.c"
"${KRB5_SOURCE_DIR}/lib/gssapi/generic/rel_buffer.c"
"${KRB5_ET_BIN_DIR}/lib/gssapi/krb5/gssapi_err_krb5.c"
"${KRB5_ET_BIN_DIR}/lib/gssapi/generic/gssapi_err_generic.c"
"${KRB5_SOURCE_DIR}/lib/gssapi/spnego/spnego_mech.c"
"${KRB5_SOURCE_DIR}/lib/gssapi/spnego/negoex_util.c"
@ -256,8 +257,8 @@ set(ALL_SRCS
"${KRB5_SOURCE_DIR}/util/profile/prof_parse.c"
"${KRB5_SOURCE_DIR}/util/profile/prof_get.c"
"${KRB5_SOURCE_DIR}/util/profile/prof_set.c"
"${KRB5_SOURCE_DIR}/util/profile/prof_err.c"
"${KRB5_SOURCE_DIR}/util/profile/prof_init.c"
"${KRB5_ET_BIN_DIR}/util/profile/prof_err.c"
"${KRB5_SOURCE_DIR}/lib/krb5/krb/fwd_tgt.c"
"${KRB5_SOURCE_DIR}/lib/krb5/krb/conv_creds.c"
"${KRB5_SOURCE_DIR}/lib/krb5/krb/fast.c"
@ -450,13 +451,12 @@ set(ALL_SRCS
"${KRB5_SOURCE_DIR}/lib/krb5/error_tables/k5e1_err.c"
"${KRB5_SOURCE_DIR}/lib/krb5/error_tables/kdb5_err.c"
"${KRB5_SOURCE_DIR}/lib/krb5/error_tables/asn1_err.c"
"${KRB5_SOURCE_DIR}/lib/krb5/error_tables/krb5_err.c"
"${KRB5_SOURCE_DIR}/lib/krb5/error_tables/krb524_err.c"
"${KRB5_SOURCE_DIR}/lib/krb5/error_tables/kv5m_err.c"
"${KRB5_ET_BIN_DIR}/lib/krb5/error_tables/k5e1_err.c"
"${KRB5_ET_BIN_DIR}/lib/krb5/error_tables/kdb5_err.c"
"${KRB5_ET_BIN_DIR}/lib/krb5/error_tables/asn1_err.c"
"${KRB5_ET_BIN_DIR}/lib/krb5/error_tables/krb5_err.c"
"${KRB5_ET_BIN_DIR}/lib/krb5/error_tables/krb524_err.c"
"${KRB5_ET_BIN_DIR}/lib/krb5/error_tables/kv5m_err.c"
"${KRB5_SOURCE_DIR}/lib/krb5/rcache/rc_base.c"
@ -473,7 +473,7 @@ set(ALL_SRCS
)
add_custom_command(
OUTPUT "${KRB5_SOURCE_DIR}/util/et/compile_et"
OUTPUT "${CMAKE_CURRENT_BINARY_DIR}/compile_et"
COMMAND /bin/sh
./config_script
./compile_et.sh
@ -481,50 +481,17 @@ add_custom_command(
${AWK_PROGRAM}
sed
>
compile_et
${CMAKE_CURRENT_BINARY_DIR}/compile_et
DEPENDS "${KRB5_SOURCE_DIR}/util/et/compile_et.sh" "${KRB5_SOURCE_DIR}/util/et/config_script"
WORKING_DIRECTORY "${KRB5_SOURCE_DIR}/util/et"
)
file(GLOB_RECURSE ET_FILES
"${KRB5_SOURCE_DIR}/*.et"
)
function(preprocess_et out_var)
set(result)
foreach(in_f ${ARGN})
string(REPLACE
.et
.c
F_C
${in_f}
)
string(REPLACE
.et
.h
F_H
${in_f}
)
get_filename_component(ET_PATH ${in_f} DIRECTORY)
add_custom_command(OUTPUT ${F_C} ${F_H}
COMMAND perl "${KRB5_SOURCE_DIR}/util/et/compile_et" -d "${KRB5_SOURCE_DIR}/util/et" ${in_f}
DEPENDS ${in_f} "${KRB5_SOURCE_DIR}/util/et/compile_et"
WORKING_DIRECTORY ${ET_PATH}
VERBATIM
)
list(APPEND result ${F_C})
endforeach()
set(${out_var} "${result}" PARENT_SCOPE)
endfunction()
add_custom_command(
OUTPUT "${KRB5_SOURCE_DIR}/lib/gssapi/krb5/error_map.h"
OUTPUT "${KRB5_ET_BIN_DIR}/error_map.h"
COMMAND perl
-I../../../util
../../../util/gen-map.pl
-oerror_map.h
-o${KRB5_ET_BIN_DIR}/error_map.h
NAME=gsserrmap
KEY=OM_uint32
VALUE=char*
@ -536,22 +503,21 @@ add_custom_command(
add_custom_target(
ERROR_MAP_H
DEPENDS "${KRB5_SOURCE_DIR}/lib/gssapi/krb5/error_map.h"
DEPENDS "${KRB5_ET_BIN_DIR}/error_map.h"
VERBATIM
)
add_custom_command(
OUTPUT "${KRB5_SOURCE_DIR}/lib/gssapi/generic/errmap.h"
COMMAND perl -w -I../../../util ../../../util/gen.pl bimap errmap.h NAME=mecherrmap LEFT=OM_uint32 RIGHT=struct\ mecherror LEFTPRINT=print_OM_uint32 RIGHTPRINT=mecherror_print LEFTCMP=cmp_OM_uint32 RIGHTCMP=mecherror_cmp
OUTPUT "${KRB5_ET_BIN_DIR}/errmap.h"
COMMAND perl -w -I../../../util ../../../util/gen.pl bimap ${KRB5_ET_BIN_DIR}/errmap.h NAME=mecherrmap LEFT=OM_uint32 RIGHT=struct\ mecherror LEFTPRINT=print_OM_uint32 RIGHTPRINT=mecherror_print LEFTCMP=cmp_OM_uint32 RIGHTCMP=mecherror_cmp
WORKING_DIRECTORY "${KRB5_SOURCE_DIR}/lib/gssapi/generic"
)
add_custom_target(
ERRMAP_H
DEPENDS "${KRB5_SOURCE_DIR}/lib/gssapi/generic/errmap.h"
DEPENDS "${KRB5_ET_BIN_DIR}/errmap.h"
VERBATIM
)
add_custom_target(
KRB_5_H
DEPENDS "${CMAKE_CURRENT_BINARY_DIR}/include/krb5/krb5.h"
@ -567,7 +533,40 @@ add_dependencies(
KRB_5_H
)
preprocess_et(processed_et_files ${ET_FILES})
#
# Generate error tables
#
function(preprocess_et et_path)
string(REPLACE .et .c F_C ${et_path})
string(REPLACE .et .h F_H ${et_path})
get_filename_component(et_dir ${et_path} DIRECTORY)
get_filename_component(et_name ${et_path} NAME_WLE)
add_custom_command(OUTPUT ${F_C} ${F_H} ${KRB5_ET_BIN_DIR}/${et_name}.h
COMMAND perl "${CMAKE_CURRENT_BINARY_DIR}/compile_et" -d "${KRB5_SOURCE_DIR}/util/et" ${et_path}
# for #include w/o path (via -iquote)
COMMAND ${CMAKE_COMMAND} -E create_symlink ${F_H} ${KRB5_ET_BIN_DIR}/${et_name}.h
DEPENDS ${et_path} "${CMAKE_CURRENT_BINARY_DIR}/compile_et"
WORKING_DIRECTORY ${et_dir}
VERBATIM
)
endfunction()
function(generate_error_tables)
file(GLOB_RECURSE ET_FILES "${KRB5_SOURCE_DIR}/*.et")
foreach(et_path ${ET_FILES})
string(REPLACE ${KRB5_SOURCE_DIR} ${KRB5_ET_BIN_DIR} et_bin_path ${et_path})
string(REPLACE / _ et_target_name ${et_path})
get_filename_component(et_bin_dir ${et_bin_path} DIRECTORY)
add_custom_command(OUTPUT ${et_bin_path}
COMMAND ${CMAKE_COMMAND} -E make_directory ${et_bin_dir}
COMMAND ${CMAKE_COMMAND} -E copy_if_different ${et_path} ${et_bin_path}
VERBATIM
)
preprocess_et(${et_bin_path})
endforeach()
endfunction()
generate_error_tables()
if(CMAKE_SYSTEM_NAME MATCHES "Darwin")
add_custom_command(
@ -634,12 +633,12 @@ file(MAKE_DIRECTORY
SET(KRBHDEP
"${KRB5_SOURCE_DIR}/include/krb5/krb5.hin"
"${KRB5_SOURCE_DIR}/lib/krb5/error_tables/krb5_err.h"
"${KRB5_SOURCE_DIR}/lib/krb5/error_tables/k5e1_err.h"
"${KRB5_SOURCE_DIR}/lib/krb5/error_tables/kdb5_err.h"
"${KRB5_SOURCE_DIR}/lib/krb5/error_tables/kv5m_err.h"
"${KRB5_SOURCE_DIR}/lib/krb5/error_tables/krb524_err.h"
"${KRB5_SOURCE_DIR}/lib/krb5/error_tables/asn1_err.h"
"${KRB5_ET_BIN_DIR}/lib/krb5/error_tables/krb5_err.h"
"${KRB5_ET_BIN_DIR}/lib/krb5/error_tables/k5e1_err.h"
"${KRB5_ET_BIN_DIR}/lib/krb5/error_tables/kdb5_err.h"
"${KRB5_ET_BIN_DIR}/lib/krb5/error_tables/kv5m_err.h"
"${KRB5_ET_BIN_DIR}/lib/krb5/error_tables/krb524_err.h"
"${KRB5_ET_BIN_DIR}/lib/krb5/error_tables/asn1_err.h"
)
# cmake < 3.18 does not have 'cat' command
@ -656,6 +655,11 @@ target_include_directories(_krb5 SYSTEM BEFORE PUBLIC
"${CMAKE_CURRENT_BINARY_DIR}/include"
)
target_compile_options(_krb5 PRIVATE
# For '#include "file.h"'
-iquote "${CMAKE_CURRENT_BINARY_DIR}/include_private"
)
target_include_directories(_krb5 PRIVATE
"${CMAKE_CURRENT_BINARY_DIR}/include_private" # For autoconf.h and other generated headers.
${KRB5_SOURCE_DIR}

2
contrib/poco vendored

@ -1 +1 @@
Subproject commit 520a90e02e3e5cb90afeae1846d161dbc508a6f1
Subproject commit 008b16469471d55b176db181756c94e3f14dd2dc

2
contrib/replxx vendored

@ -1 +1 @@
Subproject commit 6f0b6f151ae2a044625ae93acd19ca365fcea64d
Subproject commit 3fd0e3c9364a589447453d9906d854ebd8d385c5

2
contrib/unixodbc vendored

@ -1 +1 @@
Subproject commit b0ad30f7f6289c12b76f04bfb9d466374bb32168
Subproject commit a2cd5395e8c7f7390025ec93af5bfebef3fb5fcd

View File

@ -20,6 +20,8 @@ ENV LANG=en_US.UTF-8 \
COPY --from=glibc-donor /lib/linux-gnu/libc.so.6 /lib/linux-gnu/libdl.so.2 /lib/linux-gnu/libm.so.6 /lib/linux-gnu/libpthread.so.0 /lib/linux-gnu/librt.so.1 /lib/linux-gnu/libnss_dns.so.2 /lib/linux-gnu/libnss_files.so.2 /lib/linux-gnu/libresolv.so.2 /lib/linux-gnu/ld-2.31.so /lib/
COPY --from=glibc-donor /etc/nsswitch.conf /etc/
COPY entrypoint.sh /entrypoint.sh
ARG TARGETARCH
RUN arch=${TARGETARCH:-amd64} \
&& case $arch in \
amd64) mkdir -p /lib64 && ln -sf /lib/ld-2.31.so /lib64/ld-linux-x86-64.so.2 ;; \

View File

@ -25,13 +25,21 @@ read -ra CMAKE_FLAGS <<< "${CMAKE_FLAGS:-}"
env
cmake --debug-trycompile --verbose=1 -DCMAKE_VERBOSE_MAKEFILE=1 -LA "-DCMAKE_BUILD_TYPE=$BUILD_TYPE" "-DSANITIZE=$SANITIZER" -DENABLE_CHECK_HEAVY_BUILDS=1 "${CMAKE_FLAGS[@]}" ..
if [ "coverity" == "$COMBINED_OUTPUT" ]
then
wget --post-data "token=$COV_TOKEN&project=ClickHouse%2FClickHouse" -qO- https://scan.coverity.com/download/linux64 | tar xz -C /opt/cov-analysis --strip-components 1
export PATH=$PATH:/opt/cov-analysis/bin
cov-configure --config ./coverity.config --template --comptype clangcc --compiler "$CC"
SCAN_WRAPPER="cov-build --config ./coverity.config --dir cov-int"
fi
cache_status
# clear cache stats
ccache --zero-stats ||:
# No quotes because I want it to expand to nothing if empty.
# shellcheck disable=SC2086
ninja $NINJA_FLAGS clickhouse-bundle
# shellcheck disable=SC2086 # No quotes because I want it to expand to nothing if empty.
$SCAN_WRAPPER ninja $NINJA_FLAGS clickhouse-bundle
cache_status
@ -91,6 +99,12 @@ then
mv "$COMBINED_OUTPUT.tgz" /output
fi
if [ "coverity" == "$COMBINED_OUTPUT" ]
then
tar -cv -I pigz -f "coverity-scan.tgz" cov-int
mv "coverity-scan.tgz" /output
fi
# Also build fuzzers if any sanitizer specified
# if [ -n "$SANITIZER" ]
# then

View File

@ -86,6 +86,7 @@ def parse_env_variables(
additional_pkgs,
with_coverage,
with_binaries,
coverity_scan,
):
DARWIN_SUFFIX = "-darwin"
DARWIN_ARM_SUFFIX = "-darwin-aarch64"
@ -176,6 +177,9 @@ def parse_env_variables(
if package_type == "performance":
result.append("COMBINED_OUTPUT=performance")
cmake_flags.append("-DENABLE_TESTS=0")
elif package_type == "coverity":
result.append("COMBINED_OUTPUT=coverity")
result.append("COV_TOKEN={}".format(cov_token))
elif split_binary:
result.append("COMBINED_OUTPUT=shared_build")
@ -262,9 +266,8 @@ if __name__ == "__main__":
# and configs to be used for performance test.
parser.add_argument(
"--package-type",
choices=("deb", "binary", "performance"),
choices=["deb", "binary", "performance", "coverity"],
required=True,
help="a build type",
)
parser.add_argument(
"--clickhouse-repo-path",
@ -325,12 +328,13 @@ if __name__ == "__main__":
parser.add_argument(
"--docker-image-version", default="latest", help="docker image tag to use"
)
parser.add_argument("--cov_token", default="")
args = parser.parse_args()
if not os.path.isabs(args.output_dir):
args.output_dir = os.path.abspath(os.path.join(os.getcwd(), args.output_dir))
image_type = "binary" if args.package_type == "performance" else args.package_type
image_type = "binary" if args.package_type in ("performance", "coverity") else args.package_type
image_name = "clickhouse/binary-builder"
if not os.path.isabs(args.clickhouse_repo_path):
@ -372,6 +376,7 @@ if __name__ == "__main__":
args.additional_pkgs,
args.with_coverage,
args.with_binaries,
args.cov_token,
)
run_docker_image_with_env(

View File

@ -94,8 +94,9 @@ RUN arch=${TARGETARCH:-amd64} \
&& apt-get update \
&& apt-get --yes -o "Dpkg::Options::=--force-confdef" -o "Dpkg::Options::=--force-confold" upgrade \
&& for package in ${PACKAGES}; do \
apt-get install --allow-unauthenticated --yes --no-install-recommends "${package}=${VERSION}" || exit 1 \
packages="${packages} ${package}=${VERSION}" \
; done \
&& apt-get install --allow-unauthenticated --yes --no-install-recommends ${packages} || exit 1 \
; fi \
&& clickhouse-local -q 'SELECT * FROM system.build_options' \
&& rm -rf \

View File

@ -226,7 +226,6 @@ quit
--receive_data_timeout_ms=10000 \
--stacktrace \
--query-fuzzer-runs=1000 \
--testmode \
--queries-file $(ls -1 ch/tests/queries/0_stateless/*.sql | sort -R) \
$NEW_TESTS_OPT \
> >(tail -n 100000 > fuzzer.log) \

View File

@ -1,8 +1,10 @@
# docker build -t clickhouse/mysql-js-client .
# MySQL JavaScript client docker container
FROM node:8
FROM node:16.14.2
WORKDIR /usr/app
RUN npm install mysql
COPY ./test.js test.js
COPY ./test.js ./test.js

View File

@ -106,17 +106,6 @@ function stop()
function start()
{
# Rename existing log file - it will be more convenient to read separate files for separate server runs.
if [ -f '/var/log/clickhouse-server/clickhouse-server.log' ]
then
log_file_counter=1
while [ -f "/var/log/clickhouse-server/clickhouse-server.log.${log_file_counter}" ]
do
log_file_counter=$((log_file_counter + 1))
done
mv '/var/log/clickhouse-server/clickhouse-server.log' "/var/log/clickhouse-server/clickhouse-server.log.${log_file_counter}"
fi
counter=0
until clickhouse-client --query "SELECT 1"
do
@ -190,6 +179,8 @@ clickhouse-client --query "ATTACH DATABASE IF NOT EXISTS datasets ENGINE = Ordin
clickhouse-client --query "CREATE DATABASE IF NOT EXISTS test"
stop
mv /var/log/clickhouse-server/clickhouse-server.log /var/log/clickhouse-server/clickhouse-server.initial.log
start
clickhouse-client --query "SHOW TABLES FROM datasets"
@ -205,6 +196,8 @@ clickhouse-client --query "SHOW TABLES FROM test"
|| echo -e 'Test script failed\tFAIL' >> /test_output/test_results.tsv
stop
mv /var/log/clickhouse-server/clickhouse-server.log /var/log/clickhouse-server/clickhouse-server.stress.log
start
clickhouse-client --query "SELECT 'Server successfully started', 'OK'" >> /test_output/test_results.tsv \
@ -263,10 +256,12 @@ mkdir previous_release_package_folder
clickhouse-client --query="SELECT version()" | ./download_previous_release && echo -e 'Download script exit code\tOK' >> /test_output/test_results.tsv \
|| echo -e 'Download script failed\tFAIL' >> /test_output/test_results.tsv
stop
mv /var/log/clickhouse-server/clickhouse-server.log /var/log/clickhouse-server/clickhouse-server.clean.log
if [ "$(ls -A previous_release_package_folder/clickhouse-common-static_*.deb && ls -A previous_release_package_folder/clickhouse-server_*.deb)" ]
then
echo -e "Successfully downloaded previous release packets\tOK" >> /test_output/test_results.tsv
stop
# Uninstall current packages
dpkg --remove clickhouse-client
@ -289,7 +284,7 @@ then
install_packages package_folder
mkdir tmp_stress_output
./stress --backward-compatibility-check --output-folder tmp_stress_output --global-time-limit=1200 \
&& echo -e 'Backward compatibility check: Test script exit code\tOK' >> /test_output/test_results.tsv \
|| echo -e 'Backward compatibility check: Test script failed\tFAIL' >> /test_output/test_results.tsv
@ -297,8 +292,9 @@ then
clickhouse-client --query="SELECT 'Tables count:', count() FROM system.tables"
stop
stop
mv /var/log/clickhouse-server/clickhouse-server.log /var/log/clickhouse-server/clickhouse-server.backward.stress.log
# Start new server
configure
start 500
@ -310,8 +306,9 @@ then
# Let the server run for a while before checking log.
sleep 60
stop
mv /var/log/clickhouse-server/clickhouse-server.log /var/log/clickhouse-server/clickhouse-server.backward.clean.log
# Error messages (we should ignore some errors)
echo "Check for Error messages in server log:"
@ -332,7 +329,7 @@ then
-e "Code: 1000, e.code() = 111, Connection refused" \
-e "UNFINISHED" \
-e "Renaming unexpected part" \
/var/log/clickhouse-server/clickhouse-server.log | zgrep -Fa "<Error>" > /test_output/bc_check_error_messages.txt \
/var/log/clickhouse-server/clickhouse-server.backward.*.log | zgrep -Fa "<Error>" > /test_output/bc_check_error_messages.txt \
&& echo -e 'Backward compatibility check: Error message in clickhouse-server.log (see bc_check_error_messages.txt)\tFAIL' >> /test_output/test_results.tsv \
|| echo -e 'Backward compatibility check: No Error messages in clickhouse-server.log\tOK' >> /test_output/test_results.tsv
@ -348,13 +345,13 @@ then
rm -f /test_output/tmp
# OOM
zgrep -Fa " <Fatal> Application: Child process was terminated by signal 9" /var/log/clickhouse-server/clickhouse-server.log > /dev/null \
zgrep -Fa " <Fatal> Application: Child process was terminated by signal 9" /var/log/clickhouse-server/clickhouse-server.backward.*.log > /dev/null \
&& echo -e 'Backward compatibility check: OOM killer (or signal 9) in clickhouse-server.log\tFAIL' >> /test_output/test_results.tsv \
|| echo -e 'Backward compatibility check: No OOM messages in clickhouse-server.log\tOK' >> /test_output/test_results.tsv
# Logical errors
echo "Check for Logical errors in server log:"
zgrep -Fa -A20 "Code: 49, e.displayText() = DB::Exception:" /var/log/clickhouse-server/clickhouse-server.log > /test_output/bc_check_logical_errors.txt \
zgrep -Fa -A20 "Code: 49, e.displayText() = DB::Exception:" /var/log/clickhouse-server/clickhouse-server.backward.*.log > /test_output/bc_check_logical_errors.txt \
&& echo -e 'Backward compatibility check: Logical error thrown (see clickhouse-server.log or bc_check_logical_errors.txt)\tFAIL' >> /test_output/test_results.tsv \
|| echo -e 'Backward compatibility check: No logical errors\tOK' >> /test_output/test_results.tsv
@ -362,19 +359,18 @@ then
[ -s /test_output/bc_check_logical_errors.txt ] || rm /test_output/bc_check_logical_errors.txt
# Crash
zgrep -Fa "########################################" /var/log/clickhouse-server/clickhouse-server.log > /dev/null \
zgrep -Fa "########################################" /var/log/clickhouse-server/clickhouse-server.backward.*.log > /dev/null \
&& echo -e 'Backward compatibility check: Killed by signal (in clickhouse-server.log)\tFAIL' >> /test_output/test_results.tsv \
|| echo -e 'Backward compatibility check: Not crashed\tOK' >> /test_output/test_results.tsv
# It also checks for crash without stacktrace (printed by watchdog)
echo "Check for Fatal message in server log:"
zgrep -Fa " <Fatal> " /var/log/clickhouse-server/clickhouse-server.log > /test_output/bc_check_fatal_messages.txt \
zgrep -Fa " <Fatal> " /var/log/clickhouse-server/clickhouse-server.backward.*.log > /test_output/bc_check_fatal_messages.txt \
&& echo -e 'Backward compatibility check: Fatal message in clickhouse-server.log (see bc_check_fatal_messages.txt)\tFAIL' >> /test_output/test_results.tsv \
|| echo -e 'Backward compatibility check: No fatal messages in clickhouse-server.log\tOK' >> /test_output/test_results.tsv
# Remove file bc_check_fatal_messages.txt if it's empty
[ -s /test_output/bc_check_fatal_messages.txt ] || rm /test_output/bc_check_fatal_messages.txt
else
echo -e "Backward compatibility check: Failed to download previous release packets\tFAIL" >> /test_output/test_results.tsv
fi

View File

@ -43,7 +43,7 @@ toc_title: Adopters
| <a href="https://city-mobil.ru" class="favicon">Citymobil</a> | Taxi | Analytics | — | — | [Blog Post in Russian, March 2020](https://habr.com/en/company/citymobil/blog/490660/) |
| <a href="https://cloudflare.com" class="favicon">Cloudflare</a> | CDN | Traffic analysis | 36 servers | — | [Blog post, May 2017](https://blog.cloudflare.com/how-cloudflare-analyzes-1m-dns-queries-per-second/), [Blog post, March 2018](https://blog.cloudflare.com/http-analytics-for-6m-requests-per-second-using-clickhouse/) |
| <a href="https://corporate.comcast.com/" class="favicon">Comcast</a> | Media | CDN Traffic Analysis | — | — | [ApacheCon 2019 Talk](https://www.youtube.com/watch?v=e9TZ6gFDjNg) |
| <a href="https://contentsquare.com" class="favicon">ContentSquare</a> | Web analytics | Main product | — | — | [Blog post in French, November 2018](http://souslecapot.net/2018/11/21/patrick-chatain-vp-engineering-chez-contentsquare-penser-davantage-amelioration-continue-que-revolution-constante/) |
| <a href="https://contentsquare.com" class="favicon">Contentsquare</a> | Web analytics | Main product | — | — | [Blog post in French, November 2018](http://souslecapot.net/2018/11/21/patrick-chatain-vp-engineering-chez-contentsquare-penser-davantage-amelioration-continue-que-revolution-constante/) |
| <a href="https://coru.net/" class="favicon">Corunet</a> | Analytics | Main product | — | — | [Slides in English, April 2019](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup21/predictive_models.pdf) |
| <a href="https://www.creditx.com" class="favicon">CraiditX 氪信</a> | Finance AI | Analysis | — | — | [Slides in English, November 2019](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup33/udf.pptx) |
| <a href="https://crazypanda.ru/en/" class="favicon">Crazypanda</a> | Games | | — | — | Live session on ClickHouse meetup |

View File

@ -1467,6 +1467,18 @@ The update is performed asynchronously, in a separate system thread.
- [background_schedule_pool_size](../../operations/settings/settings.md#background_schedule_pool_size)
## dns_max_consecutive_failures {#server-settings-dns-max-consecutive-failures}
The number of consecutive failures accepted when updating a DNS cache entry before it is dropped.
Use `0` to disable cache dropping (entries will only be cleaned by `SYSTEM DROP DNS CACHE`)
**Default value**: 5.
**See also**
- [`SYSTEM DROP DNS CACHE`](../../sql-reference/statements/system.md#query_language-system-drop-dns-cache)
## distributed_ddl {#server-settings-distributed_ddl}
Manage executing [distributed ddl queries](../../sql-reference/distributed-ddl.md) (CREATE, DROP, ALTER, RENAME) on cluster.

View File

@ -1062,6 +1062,15 @@ Result:
└─────────────┴───────────┘
```
## log_processors_profiles {#settings-log_processors_profiles}
Write time that processor spent during execution/waiting for data to `system.processors_profile_log` table.
See also:
- [`system.processors_profile_log`](../../operations/system-tables/processors_profile_log.md#system-processors_profile_log)
- [`EXPLAIN PIPELINE`](../../sql-reference/statements/explain.md#explain-pipeline)
## max_insert_block_size {#settings-max_insert_block_size}
The size of blocks (in a count of rows) to form for insertion into a table.

View File

@ -0,0 +1,75 @@
# system.processors_profile_log {#system-processors_profile_log}
This table contains profiling on processors level (that you can find in [`EXPLAIN PIPELINE`](../../sql-reference/statements/explain.md#explain-pipeline)).
Columns:
- `event_date` ([Date](../../sql-reference/data-types/date.md)) — The date when the event happened.
- `event_time` ([DateTime64](../../sql-reference/data-types/datetime64.md)) — The date and time when the event happened.
- `id` ([UInt64](../../sql-reference/data-types/int-uint.md)) — ID of processor
- `parent_ids` ([Array(UInt64)](../../sql-reference/data-types/array.md)) — Parent processors IDs
- `query_id` ([String](../../sql-reference/data-types/string.md)) — ID of the query
- `name` ([LowCardinality(String)](../../sql-reference/data-types/lowcardinality.md)) — Name of the processor.
- `elapsed_us` ([UInt64](../../sql-reference/data-types/int-uint.md)) — Number of microseconds this processor was executed.
- `input_wait_elapsed_us` ([UInt64](../../sql-reference/data-types/int-uint.md)) — Number of microseconds this processor was waiting for data (from other processor).
- `output_wait_elapsed_us` ([UInt64](../../sql-reference/data-types/int-uint.md)) — Number of microseconds this processor was waiting because output port was full.
**Example**
Query:
``` sql
EXPLAIN PIPELINE
SELECT sleep(1)
┌─explain─────────────────────────┐
│ (Expression) │
│ ExpressionTransform │
│ (SettingQuotaAndLimits) │
│ (ReadFromStorage) │
│ SourceFromSingleChunk 0 → 1 │
└─────────────────────────────────┘
SELECT sleep(1)
SETTINGS log_processors_profiles = 1
Query id: feb5ed16-1c24-4227-aa54-78c02b3b27d4
┌─sleep(1)─┐
│ 0 │
└──────────┘
1 rows in set. Elapsed: 1.018 sec.
SELECT
name,
elapsed_us,
input_wait_elapsed_us,
output_wait_elapsed_us
FROM system.processors_profile_log
WHERE query_id = 'feb5ed16-1c24-4227-aa54-78c02b3b27d4'
ORDER BY name ASC
```
Result:
``` text
┌─name────────────────────┬─elapsed_us─┬─input_wait_elapsed_us─┬─output_wait_elapsed_us─┐
│ ExpressionTransform │ 1000497 │ 2823 │ 197 │
│ LazyOutputFormat │ 36 │ 1002188 │ 0 │
│ LimitsCheckingTransform │ 10 │ 1002994 │ 106 │
│ NullSource │ 5 │ 1002074 │ 0 │
│ NullSource │ 1 │ 1002084 │ 0 │
│ SourceFromSingleChunk │ 45 │ 4736 │ 1000819 │
└─────────────────────────┴────────────┴───────────────────────┴────────────────────────┘
```
Here you can see:
- `ExpressionTransform` was executing `sleep(1)` function, so it `work` will takes 1e6, and so `elapsed_us` > 1e6.
- `SourceFromSingleChunk` need to wait, because `ExpressionTransform` does not accept any data during execution of `sleep(1)`, so it will be in `PortFull` state for 1e6 us, and so `output_wait_elapsed_us` > 1e6.
- `LimitsCheckingTransform`/`NullSource`/`LazyOutputFormat` need to wait until `ExpressionTransform` will execute `sleep(1)` to process the result, so `input_wait_elapsed_us` > 1e6.
**See Also**
- [`EXPLAIN PIPELINE`](../../sql-reference/statements/explain.md#explain-pipeline)

View File

@ -393,6 +393,13 @@ This is a generalization of other functions named `toStartOf*`. For example,
`toStartOfInterval(t, INTERVAL 1 day)` returns the same as `toStartOfDay(t)`,
`toStartOfInterval(t, INTERVAL 15 minute)` returns the same as `toStartOfFifteenMinutes(t)` etc.
## toLastDayOfMonth {#toLastDayOfMonth}
Rounds up a date or date with time to the last day of the month.
Returns the date.
Alias: `LAST_DAY`.
## toTime {#totime}
Converts a date with time to a certain fixed date, while preserving the time.

View File

@ -77,7 +77,7 @@ A function configuration contains the following settings:
- `argument` - argument description with the `type`, and optional `name` of an argument. Each argument is described in a separate setting. Specifying name is necessary if argument names are part of serialization for user defined function format like [Native](../../interfaces/formats.md#native) or [JSONEachRow](../../interfaces/formats.md#jsoneachrow). Default argument name value is `c` + argument_number.
- `format` - a [format](../../interfaces/formats.md) in which arguments are passed to the command.
- `return_type` - the type of a returned value.
- `return_name` - name of retuned value. Specifying return name is necessary if return name is part of serialization for user defined function format like [Native](../../interfaces/formats.md#native) or [JSONEachRow](../../interfaces/formats.md#jsoneachrow). Optional. Default value is `result`.
- `return_name` - name of returned value. Specifying return name is necessary if return name is part of serialization for user defined function format like [Native](../../interfaces/formats.md#native) or [JSONEachRow](../../interfaces/formats.md#jsoneachrow). Optional. Default value is `result`.
- `type` - an executable type. If `type` is set to `executable` then single command is started. If it is set to `executable_pool` then a pool of commands is created.
- `max_command_execution_time` - maximum execution time in seconds for processing block of data. This setting is valid for `executable_pool` commands only. Optional. Default value is `10`.
- `command_termination_timeout` - time in seconds during which a command should finish after its pipe is closed. After that time `SIGTERM` is sent to the process executing the command. Optional. Default value is `10`.

View File

@ -2499,3 +2499,41 @@ Result:
│ 286 │
└──────────────────────────┘
```
## getTypeSerializationStreams {#getTypeSerializationStreams}
return the serialization streams of data type.
**Syntax**
``` sql
getTypeSerializationStreams(type_name)
getTypeSerializationStreams(column)
```
**Arguments**
- `type_name` - Name of data type to get its serialization paths. [String](../../sql-reference/data-types/string.md#string).
- `column` - any column which has a data type
**Returned value**
- List of serialization streams;
Type: [Array](../../sql-reference/data-types/array.md)([String](../../sql-reference/data-types/string.md)).
**Example**
Query:
``` sql
SELECT getTypeSerializationStreams('Array(Array(Int8))')
```
Result:
``` text
┌───────────────────────getTypeSerializationStreams('Array(Array(Int8))')─────────────────────────────┐
│ ['{ArraySizes}','{ArrayElements, ArraySizes}','{ArrayElements, ArrayElements, Regular}'] │
└─────────────────────────────────────────────────────────────────────────────────────────────────────┘
```

View File

@ -27,7 +27,7 @@ toc_title: "\u30A2\u30C0\u30D7\u30BF\u30FC"
| <a href="http://cisco.com/" class="favicon">Cisco</a> | ネットワーク | トラフィック分析 | — | — | [ライトニングトーク2019](https://youtu.be/-hI1vDR2oPY?t=5057) |
| <a href="https://www.citadelsecurities.com/" class="favicon">Citadel Securities</a> | 金融 | — | — | — | [2019年の貢献](https://github.com/ClickHouse/ClickHouse/pull/4774) |
| <a href="https://city-mobil.ru" class="favicon">シティモービル</a> | タクシー | 分析 | — | — | [ロシア語でのブログ投稿,月2020](https://habr.com/en/company/citymobil/blog/490660/) |
| <a href="https://contentsquare.com" class="favicon">ContentSquare</a> | ウェブ分析 | 主な製品 | — | — | [フランス語でのブログ投稿,November2018](http://souslecapot.net/2018/11/21/patrick-chatain-vp-engineering-chez-contentsquare-penser-davantage-amelioration-continue-que-revolution-constante/) |
| <a href="https://contentsquare.com" class="favicon">Contentsquare</a> | ウェブ分析 | 主な製品 | — | — | [フランス語でのブログ投稿,November2018](http://souslecapot.net/2018/11/21/patrick-chatain-vp-engineering-chez-contentsquare-penser-davantage-amelioration-continue-que-revolution-constante/) |
| <a href="https://cloudflare.com" class="favicon">Cloudflare</a> | CDN | トラフィック分析 | 36台のサーバー | — | [ブログ投稿,月2017](https://blog.cloudflare.com/how-cloudflare-analyzes-1m-dns-queries-per-second/), [ブログ投稿,月2018](https://blog.cloudflare.com/http-analytics-for-6m-requests-per-second-using-clickhouse/) |
| <a href="https://coru.net/" class="favicon">コルネット</a> | 分析 | 主な製品 | — | — | [2019年英語スライド](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup21/predictive_models.pdf) |
| <a href="https://www.creditx.com" class="favicon">CraiditX 氪信</a> | ファイナンスAI | 分析 | — | — | [2019年のスライド](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup33/udf.pptx) |

View File

@ -126,7 +126,7 @@ CREATE TABLE pg_table_schema_with_dots (a UInt32)
**См. также**
- [Движок таблиц PostgreSQL](../../sql-reference/table-functions/postgresql.md)
- [Движок таблиц PostgreSQL](../../engines/table-engines/integrations/postgresql.md)
- [Использование PostgreSQL как источника данных для внешнего словаря](../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-sources.md#dicts-external_dicts_dict_sources-postgresql)
[Оригинальная статья](https://clickhouse.com/docs/ru/sql-reference/table-functions/postgresql/) <!--hide-->

View File

@ -163,10 +163,24 @@ void Client::initialize(Poco::Util::Application & self)
configReadClient(config(), home_path);
/** getenv is thread-safe in Linux glibc and in all sane libc implementations.
* But the standard does not guarantee that subsequent calls will not rewrite the value by returned pointer.
*
* man getenv:
*
* As typically implemented, getenv() returns a pointer to a string within the environment list.
* The caller must take care not to modify this string, since that would change the environment of
* the process.
*
* The implementation of getenv() is not required to be reentrant. The string pointed to by the return value of getenv()
* may be statically allocated, and can be modified by a subsequent call to getenv(), putenv(3), setenv(3), or unsetenv(3).
*/
const char * env_user = getenv("CLICKHOUSE_USER");
const char * env_password = getenv("CLICKHOUSE_PASSWORD");
if (env_user)
config().setString("user", env_user);
const char * env_password = getenv("CLICKHOUSE_PASSWORD");
if (env_password)
config().setString("password", env_password);
@ -810,7 +824,7 @@ void Client::addOptions(OptionsDescription & options_description)
("quota_key", po::value<std::string>(), "A string to differentiate quotas when the user have keyed quotas configured on server")
("max_client_network_bandwidth", po::value<int>(), "the maximum speed of data exchange over the network for the client in bytes per second.")
("compression", po::value<bool>(), "enable or disable compression")
("compression", po::value<bool>(), "enable or disable compression (enabled by default for remote communication and disabled for localhost communication).")
("query-fuzzer-runs", po::value<int>()->default_value(0), "After executing every SELECT query, do random mutations in it and run again specified number of times. This is used for testing to discover unexpected corner cases.")
("interleave-queries-file", po::value<std::vector<std::string>>()->multitoken(),
@ -1005,6 +1019,7 @@ void Client::processConfig()
global_context->setCurrentQueryId(query_id);
}
print_stack_trace = config().getBool("stacktrace", false);
logging_initialized = true;
if (config().has("multiquery"))
is_multiquery = true;

View File

@ -434,6 +434,14 @@ catch (...)
return getCurrentExceptionCode();
}
void LocalServer::updateLoggerLevel(const String & logs_level)
{
if (!logging_initialized)
return;
config().setString("logger.level", logs_level);
updateLevels(config(), logger());
}
void LocalServer::processConfig()
{
@ -460,30 +468,31 @@ void LocalServer::processConfig()
auto logging = (config().has("logger.console")
|| config().has("logger.level")
|| config().has("log-level")
|| config().has("send_logs_level")
|| config().has("logger.log"));
auto file_logging = config().has("server_logs_file");
if (is_interactive && logging && !file_logging)
throw Exception("For interactive mode logging is allowed only with --server_logs_file option",
ErrorCodes::BAD_ARGUMENTS);
auto level = config().getString("log-level", "trace");
if (file_logging)
if (config().has("server_logs_file"))
{
auto level = Poco::Logger::parseLevel(config().getString("log-level", "trace"));
Poco::Logger::root().setLevel(level);
auto poco_logs_level = Poco::Logger::parseLevel(level);
Poco::Logger::root().setLevel(poco_logs_level);
Poco::Logger::root().setChannel(Poco::AutoPtr<Poco::SimpleFileChannel>(new Poco::SimpleFileChannel(server_logs_file)));
logging_initialized = true;
}
else if (logging)
else if (logging || is_interactive)
{
// force enable logging
config().setString("logger", "logger");
// sensitive data rules are not used here
auto log_level_default = is_interactive && !logging ? "none" : level;
config().setString("logger.level", config().getString("log-level", config().getString("send_logs_level", log_level_default)));
buildLoggers(config(), logger(), "clickhouse-local");
logging_initialized = true;
}
else
{
Poco::Logger::root().setLevel("none");
Poco::Logger::root().setChannel(Poco::AutoPtr<Poco::NullChannel>(new Poco::NullChannel()));
logging_initialized = false;
}
shared_context = Context::createShared();
@ -713,6 +722,8 @@ void LocalServer::processOptions(const OptionsDescription &, const CommandLineOp
config().setString("logger.log", options["logger.log"].as<std::string>());
if (options.count("logger.level"))
config().setString("logger.level", options["logger.level"].as<std::string>());
if (options.count("send_logs_level"))
config().setString("send_logs_level", options["send_logs_level"].as<std::string>());
}
}

View File

@ -46,6 +46,8 @@ protected:
void processConfig() override;
void updateLoggerLevel(const String & logs_level) override;
private:
/** Composes CREATE subquery based on passed arguments (--structure --file --table and --input-format)
* This query will be executed first, before queries passed through --query argument

View File

@ -1503,7 +1503,8 @@ int Server::main(const std::vector<std::string> & /*args*/)
else
{
/// Initialize a watcher periodically updating DNS cache
dns_cache_updater = std::make_unique<DNSCacheUpdater>(global_context, config().getInt("dns_cache_update_period", 15));
dns_cache_updater = std::make_unique<DNSCacheUpdater>(
global_context, config().getInt("dns_cache_update_period", 15), config().getUInt("dns_max_consecutive_failures", 5));
}
#if defined(OS_LINUX)

View File

@ -1030,14 +1030,26 @@
<flush_interval_milliseconds>1000</flush_interval_milliseconds>
</crash_log>
<!-- Session log. Stores user log in (successful or not) and log out events. -->
<session_log>
<!-- Session log. Stores user log in (successful or not) and log out events.
Note: session log has known security issues and should not be used in production.
-->
<!-- <session_log>
<database>system</database>
<table>session_log</table>
<partition_by>toYYYYMM(event_date)</partition_by>
<flush_interval_milliseconds>7500</flush_interval_milliseconds>
</session_log>
</session_log> -->
<!-- Profiling on Processors level. -->
<processors_profile_log>
<database>system</database>
<table>processors_profile_log</table>
<partition_by>toYYYYMM(event_date)</partition_by>
<flush_interval_milliseconds>7500</flush_interval_milliseconds>
</processors_profile_log>
<!-- <top_level_domains_path>/var/lib/clickhouse/top_level_domains/</top_level_domains_path> -->
<!-- Custom TLD lists.

View File

@ -49,6 +49,18 @@ if (COMPILER_GCC)
add_definitions ("-fno-tree-loop-distribute-patterns")
endif ()
# ClickHouse developers may use platform-dependent code under some macro (e.g. `#ifdef ENABLE_MULTITARGET`).
# If turned ON, this option defines such macro.
# See `src/Common/TargetSpecific.h`
option(ENABLE_MULTITARGET_CODE "Enable platform-dependent code" ON)
if (ENABLE_MULTITARGET_CODE)
add_definitions(-DENABLE_MULTITARGET_CODE=1)
else()
add_definitions(-DENABLE_MULTITARGET_CODE=0)
endif()
add_subdirectory (Access)
add_subdirectory (Backups)
add_subdirectory (Columns)

View File

@ -1325,6 +1325,13 @@ void ClientBase::processParsedSingleQuery(const String & full_query, const Strin
}
}
if (const auto * set_query = parsed_query->as<ASTSetQuery>())
{
const auto * logs_level_field = set_query->changes.tryGet(std::string_view{"send_logs_level"});
if (logs_level_field)
updateLoggerLevel(logs_level_field->safeGet<String>());
}
processed_rows = 0;
written_first_block = false;
progress_indication.resetProgress();
@ -1521,24 +1528,19 @@ MultiQueryProcessingStage ClientBase::analyzeMultiQueryText(
bool ClientBase::executeMultiQuery(const String & all_queries_text)
{
// It makes sense not to base any control flow on this, so that it is
// the same in tests and in normal usage. The only difference is that in
// normal mode we ignore the test hints.
const bool test_mode = config().has("testmode");
if (test_mode)
{
/// disable logs if expects errors
TestHint test_hint(test_mode, all_queries_text);
if (test_hint.clientError() || test_hint.serverError())
processTextAsSingleQuery("SET send_logs_level = 'fatal'");
}
bool echo_query = echo_queries;
/// Test tags are started with "--" so they are interpreted as comments anyway.
/// But if the echo is enabled we have to remove the test tags from `all_queries_text`
/// because we don't want test tags to be echoed.
size_t test_tags_length = test_mode ? getTestTagsLength(all_queries_text) : 0;
{
/// disable logs if expects errors
TestHint test_hint(all_queries_text);
if (test_hint.clientError() || test_hint.serverError())
processTextAsSingleQuery("SET send_logs_level = 'fatal'");
}
size_t test_tags_length = getTestTagsLength(all_queries_text);
/// Several queries separated by ';'.
/// INSERT data is ended by the end of line, not ';'.
@ -1575,7 +1577,7 @@ bool ClientBase::executeMultiQuery(const String & all_queries_text)
// Try to find test hint for syntax error. We don't know where
// the query ends because we failed to parse it, so we consume
// the entire line.
TestHint hint(test_mode, String(this_query_begin, this_query_end - this_query_begin));
TestHint hint(String(this_query_begin, this_query_end - this_query_begin));
if (hint.serverError())
{
// Syntax errors are considered as client errors
@ -1613,7 +1615,7 @@ bool ClientBase::executeMultiQuery(const String & all_queries_text)
// Look for the hint in the text of query + insert data + trailing
// comments, e.g. insert into t format CSV 'a' -- { serverError 123 }.
// Use the updated query boundaries we just calculated.
TestHint test_hint(test_mode, full_query);
TestHint test_hint(full_query);
// Echo all queries if asked; makes for a more readable reference file.
echo_query = test_hint.echoQueries().value_or(echo_query);
@ -2214,8 +2216,6 @@ void ClientBase::init(int argc, char ** argv)
("suggestion_limit", po::value<int>()->default_value(10000),
"Suggestion limit for how many databases, tables and columns to fetch.")
("testmode,T", "enable test hints in comments")
("format,f", po::value<std::string>(), "default output format")
("vertical,E", "vertical output format, same as --format=Vertical or FORMAT Vertical or \\G at end of command")
("highlight", po::value<bool>()->default_value(true), "enable or disable basic syntax highlight in interactive command line")
@ -2321,8 +2321,6 @@ void ClientBase::init(int argc, char ** argv)
config().setBool("interactive", true);
if (options.count("pager"))
config().setString("pager", options["pager"].as<std::string>());
if (options.count("testmode"))
config().setBool("testmode", true);
if (options.count("log-level"))
Poco::Logger::root().setLevel(options["log-level"].as<std::string>());

View File

@ -95,6 +95,7 @@ protected:
std::optional<ProgramOptionsDescription> hosts_and_ports_description;
};
virtual void updateLoggerLevel(const String &) {}
virtual void printHelpMessage(const OptionsDescription & options_description) = 0;
virtual void addOptions(OptionsDescription & options_description) = 0;
virtual void processOptions(const OptionsDescription & options_description,
@ -265,6 +266,8 @@ protected:
bool allow_repeated_settings = false;
bool cancelled = false;
bool logging_initialized = false;
};
}

View File

@ -32,12 +32,9 @@ int parseErrorCode(DB::ReadBufferFromString & in)
namespace DB
{
TestHint::TestHint(bool enabled_, const String & query_)
TestHint::TestHint(const String & query_)
: query(query_)
{
if (!enabled_)
return;
// Don't parse error hints in leading comments, because it feels weird.
// Leading 'echo' hint is OK.
bool is_leading_hint = true;

View File

@ -7,7 +7,7 @@
namespace DB
{
/// Checks expected server and client error codes in --testmode.
/// Checks expected server and client error codes.
///
/// The following comment hints are supported:
///
@ -25,12 +25,12 @@ namespace DB
///
/// Examples:
///
/// - echo 'select / -- { clientError 62 }' | clickhouse-client --testmode -nm
/// - echo 'select / -- { clientError 62 }' | clickhouse-client -nm
///
// Here the client parses the query but it is incorrect, so it expects
/// SYNTAX_ERROR (62).
///
/// - echo 'select foo -- { serverError 47 }' | clickhouse-client --testmode -nm
/// - echo 'select foo -- { serverError 47 }' | clickhouse-client -nm
///
/// But here the query is correct, but there is no such column "foo", so it
/// is UNKNOWN_IDENTIFIER server error.
@ -43,7 +43,7 @@ namespace DB
class TestHint
{
public:
TestHint(bool enabled_, const String & query_);
TestHint(const String & query_);
int serverError() const { return server_error; }
int clientError() const { return client_error; }

View File

@ -125,7 +125,7 @@ class FindResultImpl : public FindResultImplBase, public FindResultImplOffsetBas
public:
FindResultImpl()
: FindResultImplBase(false), FindResultImplOffsetBase<need_offset>(0)
: FindResultImplBase(false), FindResultImplOffsetBase<need_offset>(0) // NOLINT(clang-analyzer-optin.cplusplus.UninitializedObject) intentionally allow uninitialized value here
{}
FindResultImpl(Mapped * value_, bool found_, size_t off)

View File

@ -214,6 +214,9 @@ private:
/// offset in bits to the next to the rightmost bit at that byte; or zero if the rightmost bit is the rightmost bit in that byte.
offset_r = (l + content_width) % 8;
content_l = nullptr;
content_r = nullptr;
}
UInt8 ALWAYS_INLINE read(UInt8 value_l) const

View File

@ -81,6 +81,14 @@
M(ActiveSyncDrainedConnections, "Number of active connections drained synchronously.") \
M(AsynchronousReadWait, "Number of threads waiting for asynchronous read.") \
M(PendingAsyncInsert, "Number of asynchronous inserts that are waiting for flush.") \
M(KafkaConsumers, "Number of active Kafka consumers") \
M(KafkaConsumersWithAssignment, "Number of active Kafka consumers which have some partitions assigned.") \
M(KafkaProducers, "Number of active Kafka producer created") \
M(KafkaLibrdkafkaThreads, "Number of active librdkafka threads") \
M(KafkaBackgroundReads, "Number of background reads currently working (populating materialized views from Kafka)") \
M(KafkaConsumersInUse, "Number of consumers which are currently used by direct or background reads") \
M(KafkaWrites, "Number of currently running inserts to Kafka") \
M(KafkaAssignedPartitions, "Number of partitions Kafka tables currently assigned to") \
namespace CurrentMetrics
{

View File

@ -118,12 +118,15 @@ static DNSResolver::IPAddresses resolveIPAddressImpl(const std::string & host)
}
catch (const Poco::Net::DNSException & e)
{
LOG_ERROR(&Poco::Logger::get("DNSResolver"), "Cannot resolve host ({}), error {}: {}.", host, e.code(), e.message());
LOG_ERROR(&Poco::Logger::get("DNSResolver"), "Cannot resolve host ({}), error {}: {}.", host, e.code(), e.name());
addresses.clear();
}
if (addresses.empty())
{
ProfileEvents::increment(ProfileEvents::DNSError);
throw Exception("Not found address of host: " + host, ErrorCodes::DNS_ERROR);
}
return addresses;
}
@ -142,6 +145,9 @@ static String reverseResolveImpl(const Poco::Net::IPAddress & address)
struct DNSResolver::Impl
{
using HostWithConsecutiveFailures = std::unordered_map<String, UInt32>;
using AddressWithConsecutiveFailures = std::unordered_map<Poco::Net::IPAddress, UInt32>;
CachedFn<&resolveIPAddressImpl> cache_host;
CachedFn<&reverseResolveImpl> cache_address;
@ -152,12 +158,12 @@ struct DNSResolver::Impl
std::optional<String> host_name;
/// Store hosts, which was asked to resolve from last update of DNS cache.
NameSet new_hosts;
std::unordered_set<Poco::Net::IPAddress> new_addresses;
HostWithConsecutiveFailures new_hosts;
AddressWithConsecutiveFailures new_addresses;
/// Store all hosts, which was whenever asked to resolve
NameSet known_hosts;
std::unordered_set<Poco::Net::IPAddress> known_addresses;
HostWithConsecutiveFailures known_hosts;
AddressWithConsecutiveFailures known_addresses;
/// If disabled, will not make cache lookups, will resolve addresses manually on each call
std::atomic<bool> disable_cache{false};
@ -246,38 +252,68 @@ String DNSResolver::getHostName()
static const String & cacheElemToString(const String & str) { return str; }
static String cacheElemToString(const Poco::Net::IPAddress & addr) { return addr.toString(); }
template<typename UpdateF, typename ElemsT>
bool DNSResolver::updateCacheImpl(UpdateF && update_func, ElemsT && elems, const String & log_msg)
template <typename UpdateF, typename ElemsT>
bool DNSResolver::updateCacheImpl(
UpdateF && update_func,
ElemsT && elems,
UInt32 max_consecutive_failures,
const String & notfound_log_msg,
const String & dropped_log_msg)
{
bool updated = false;
String lost_elems;
for (const auto & elem : elems)
using iterators = typename std::remove_reference_t<decltype(elems)>::iterator;
std::vector<iterators> elements_to_drop;
for (auto it = elems.begin(); it != elems.end(); it++)
{
try
{
updated |= (this->*update_func)(elem);
updated |= (this->*update_func)(it->first);
it->second = 0;
}
catch (const Poco::Net::NetException &)
catch (const DB::Exception & e)
{
ProfileEvents::increment(ProfileEvents::DNSError);
if (e.code() != ErrorCodes::DNS_ERROR)
{
tryLogCurrentException(log, __PRETTY_FUNCTION__);
continue;
}
if (!lost_elems.empty())
lost_elems += ", ";
lost_elems += cacheElemToString(elem);
lost_elems += cacheElemToString(it->first);
if (max_consecutive_failures)
{
it->second++;
if (it->second >= max_consecutive_failures)
elements_to_drop.emplace_back(it);
}
}
catch (...)
{
tryLogCurrentException(__PRETTY_FUNCTION__);
tryLogCurrentException(log, __PRETTY_FUNCTION__);
}
}
if (!lost_elems.empty())
LOG_INFO(log, fmt::runtime(log_msg), lost_elems);
LOG_INFO(log, fmt::runtime(notfound_log_msg), lost_elems);
if (elements_to_drop.size())
{
updated = true;
String deleted_elements;
for (auto it : elements_to_drop)
{
if (!deleted_elements.empty())
deleted_elements += ", ";
deleted_elements += cacheElemToString(it->first);
elems.erase(it);
}
LOG_INFO(log, fmt::runtime(dropped_log_msg), deleted_elements);
}
return updated;
}
bool DNSResolver::updateCache()
bool DNSResolver::updateCache(UInt32 max_consecutive_failures)
{
LOG_DEBUG(log, "Updating DNS cache");
@ -301,8 +337,14 @@ bool DNSResolver::updateCache()
/// DROP DNS CACHE will wait on update_mutex (possibly while holding drop_mutex)
std::lock_guard lock(impl->update_mutex);
bool hosts_updated = updateCacheImpl(&DNSResolver::updateHost, impl->known_hosts, "Cached hosts not found: {}");
updateCacheImpl(&DNSResolver::updateAddress, impl->known_addresses, "Cached addresses not found: {}");
bool hosts_updated = updateCacheImpl(
&DNSResolver::updateHost, impl->known_hosts, max_consecutive_failures, "Cached hosts not found: {}", "Cached hosts dropped: {}");
updateCacheImpl(
&DNSResolver::updateAddress,
impl->known_addresses,
max_consecutive_failures,
"Cached addresses not found: {}",
"Cached addresses dropped: {}");
LOG_DEBUG(log, "Updated DNS cache");
return hosts_updated;
@ -326,13 +368,15 @@ bool DNSResolver::updateAddress(const Poco::Net::IPAddress & address)
void DNSResolver::addToNewHosts(const String & host)
{
std::lock_guard lock(impl->drop_mutex);
impl->new_hosts.insert(host);
UInt8 consecutive_failures = 0;
impl->new_hosts.insert({host, consecutive_failures});
}
void DNSResolver::addToNewAddresses(const Poco::Net::IPAddress & address)
{
std::lock_guard lock(impl->drop_mutex);
impl->new_addresses.insert(address);
UInt8 consecutive_failures = 0;
impl->new_addresses.insert({address, consecutive_failures});
}
DNSResolver::~DNSResolver() = default;

View File

@ -47,14 +47,20 @@ public:
void dropCache();
/// Updates all known hosts in cache.
/// Returns true if IP of any host has been changed.
bool updateCache();
/// Returns true if IP of any host has been changed or an element was dropped (too many failures)
bool updateCache(UInt32 max_consecutive_failures);
~DNSResolver();
private:
template<typename UpdateF, typename ElemsT>
bool updateCacheImpl(UpdateF && update_func, ElemsT && elems, const String & log_msg);
template <typename UpdateF, typename ElemsT>
bool updateCacheImpl(
UpdateF && update_func,
ElemsT && elems,
UInt32 max_consecutive_failures,
const String & notfound_log_msg,
const String & dropped_log_msg);
DNSResolver();

View File

@ -360,6 +360,27 @@ public:
return toDayNum(LUTIndex(i - (lut[i].day_of_month - 1)));
}
/// Round up to last day of month.
template <typename DateOrTime>
inline Time toLastDayOfMonth(DateOrTime v) const
{
const LUTIndex i = toLUTIndex(v);
if constexpr (std::is_unsigned_v<DateOrTime> || std::is_same_v<DateOrTime, DayNum>)
return lut_saturated[i - lut[i].day_of_month + lut[i].days_in_month].date;
else
return lut[i - lut[i].day_of_month + lut[i].days_in_month].date;
}
template <typename DateOrTime>
inline auto toLastDayNumOfMonth(DateOrTime v) const
{
const LUTIndex i = toLUTIndex(v);
if constexpr (std::is_unsigned_v<DateOrTime> || std::is_same_v<DateOrTime, DayNum>)
return toDayNum(LUTIndexWithSaturation(i - lut[i].day_of_month + lut[i].days_in_month));
else
return toDayNum(LUTIndex(i - lut[i].day_of_month + lut[i].days_in_month));
}
/// Round down to start of quarter.
template <typename DateOrTime>
inline auto toFirstDayNumOfQuarter(DateOrTime v) const

View File

@ -61,7 +61,7 @@ private:
class JSONBool : public IItem
{
public:
explicit JSONBool(bool value_) : value(std::move(value_)) {}
explicit JSONBool(bool value_) : value(value_) {}
void format(const FormatSettings & settings, FormatContext & context) override;
private:
@ -74,7 +74,7 @@ public:
void add(ItemPtr value) { values.push_back(std::move(value)); }
void add(std::string value) { add(std::make_unique<JSONString>(std::move(value))); }
void add(const char * value) { add(std::make_unique<JSONString>(value)); }
void add(bool value) { add(std::make_unique<JSONBool>(std::move(value))); }
void add(bool value) { add(std::make_unique<JSONBool>(value)); }
template <typename T>
requires std::is_arithmetic_v<T>
@ -99,7 +99,7 @@ public:
void add(std::string key, std::string value) { add(std::move(key), std::make_unique<JSONString>(std::move(value))); }
void add(std::string key, const char * value) { add(std::move(key), std::make_unique<JSONString>(value)); }
void add(std::string key, std::string_view value) { add(std::move(key), std::make_unique<JSONString>(value)); }
void add(std::string key, bool value) { add(std::move(key), std::make_unique<JSONBool>(std::move(value))); }
void add(std::string key, bool value) { add(std::move(key), std::make_unique<JSONBool>(value)); }
template <typename T>
requires std::is_arithmetic_v<T>

View File

@ -0,0 +1,15 @@
#include <IO/WriteHelpers.h>
#include <Common/NamePrompter.h>
namespace DB::detail
{
void appendHintsMessageImpl(String & message, const std::vector<String> & hints)
{
if (hints.empty())
{
return;
}
message += ". Maybe you meant: " + toString(hints);
}
}

View File

@ -90,6 +90,10 @@ private:
}
};
namespace detail
{
void appendHintsMessageImpl(String & message, const std::vector<String> & hints);
}
template <size_t MaxNumHints, typename Self>
class IHints
@ -102,6 +106,12 @@ public:
return prompter.getHints(name, getAllRegisteredNames());
}
void appendHintsMessage(String & message, const String & name) const
{
auto hints = getHints(name);
detail::appendHintsMessageImpl(message, hints);
}
IHints() = default;
IHints(const IHints &) = default;
@ -114,5 +124,4 @@ public:
private:
NamePrompter<MaxNumHints> prompter;
};
}

View File

@ -112,6 +112,8 @@
M(CompileExpressionsMicroseconds, "Total time spent for compilation of expressions to LLVM code.") \
M(CompileExpressionsBytes, "Number of bytes used for expressions compilation.") \
\
M(ExecuteShellCommand, "Number of shell command executions.") \
\
M(ExternalSortWritePart, "") \
M(ExternalSortMerge, "") \
M(ExternalAggregationWritePart, "") \
@ -295,6 +297,25 @@
M(MergeTreeMetadataCacheHit, "Number of times the read of meta file was done from MergeTree metadata cache") \
M(MergeTreeMetadataCacheMiss, "Number of times the read of meta file was not done from MergeTree metadata cache") \
\
M(KafkaRebalanceRevocations, "Number of partition revocations (the first stage of consumer group rebalance)") \
M(KafkaRebalanceAssignments, "Number of partition assignments (the final stage of consumer group rebalance)") \
M(KafkaRebalanceErrors, "Number of failed consumer group rebalances") \
M(KafkaMessagesPolled, "Number of Kafka messages polled from librdkafka to ClickHouse") \
M(KafkaMessagesRead, "Number of Kafka messages already processed by ClickHouse") \
M(KafkaMessagesFailed, "Number of Kafka messages ClickHouse failed to parse") \
M(KafkaRowsRead, "Number of rows parsed from Kafka messages") \
M(KafkaRowsRejected, "Number of parsed rows which were later rejected (due to rebalances / errors or similar reasons). Those rows will be consumed again after the rebalance.") \
M(KafkaDirectReads, "Number of direct selects from Kafka tables since server start") \
M(KafkaBackgroundReads, "Number of background reads populating materialized views from Kafka since server start") \
M(KafkaCommits, "Number of successful commits of consumed offsets to Kafka (normally should be the same as KafkaBackgroundReads)") \
M(KafkaCommitFailures, "Number of failed commits of consumed offsets to Kafka (usually is a sign of some data duplication)") \
M(KafkaConsumerErrors, "Number of errors reported by librdkafka during polls") \
M(KafkaWrites, "Number of writes (inserts) to Kafka tables ") \
M(KafkaRowsWritten, "Number of rows inserted into Kafka tables") \
M(KafkaProducerFlushes, "Number of explicit flushes to Kafka producer") \
M(KafkaMessagesProduced, "Number of messages produced to Kafka") \
M(KafkaProducerErrors, "Number of errors during producing the messages to Kafka") \
\
M(ScalarSubqueriesGlobalCacheHit, "Number of times a read from a scalar subquery was done using the global cache") \
M(ScalarSubqueriesLocalCacheHit, "Number of times a read from a scalar subquery was done using the local cache") \
M(ScalarSubqueriesCacheMiss, "Number of times a read from a scalar subquery was not cached and had to be calculated completely")

View File

@ -29,6 +29,11 @@ namespace
};
}
namespace ProfileEvents
{
extern const Event ExecuteShellCommand;
}
namespace DB
{
@ -158,6 +163,7 @@ std::unique_ptr<ShellCommand> ShellCommand::executeImpl(
const Config & config)
{
logCommand(filename, argv);
ProfileEvents::increment(ProfileEvents::ExecuteShellCommand);
#if !defined(USE_MUSL)
/** Here it is written that with a normal call `vfork`, there is a chance of deadlock in multithreaded programs,

View File

@ -9,6 +9,7 @@
#include <Interpreters/SessionLog.h>
#include <Interpreters/TextLog.h>
#include <Interpreters/TraceLog.h>
#include <Interpreters/ProcessorsProfileLog.h>
#include <Interpreters/ZooKeeperLog.h>
#include <Common/MemoryTrackerBlockerInThread.h>

View File

@ -24,6 +24,7 @@
M(SessionLogElement) \
M(TraceLogElement) \
M(ZooKeeperLogElement) \
M(ProcessorProfileLogElement) \
M(TextLogElement)
namespace Poco

View File

@ -1,4 +1,4 @@
#include <Functions/TargetSpecific.h>
#include <Common/TargetSpecific.h>
#include <Common/CpuId.h>

View File

@ -701,24 +701,34 @@ void ZooKeeper::removeChildrenRecursive(const std::string & path, const String &
}
}
void ZooKeeper::tryRemoveChildrenRecursive(const std::string & path, const String & keep_child_node)
bool ZooKeeper::tryRemoveChildrenRecursive(const std::string & path, bool probably_flat, const String & keep_child_node)
{
Strings children;
if (tryGetChildren(path, children) != Coordination::Error::ZOK)
return;
return false;
bool removed_as_expected = true;
while (!children.empty())
{
Coordination::Requests ops;
Strings batch;
ops.reserve(MULTI_BATCH_SIZE);
batch.reserve(MULTI_BATCH_SIZE);
for (size_t i = 0; i < MULTI_BATCH_SIZE && !children.empty(); ++i)
{
String child_path = fs::path(path) / children.back();
tryRemoveChildrenRecursive(child_path);
/// Will try to avoid recursive getChildren calls if child_path probably has no children.
/// It may be extremely slow when path contain a lot of leaf children.
if (!probably_flat)
tryRemoveChildrenRecursive(child_path);
if (likely(keep_child_node.empty() || keep_child_node != children.back()))
{
batch.push_back(child_path);
ops.emplace_back(zkutil::makeRemoveRequest(child_path, -1));
}
children.pop_back();
}
@ -726,10 +736,39 @@ void ZooKeeper::tryRemoveChildrenRecursive(const std::string & path, const Strin
/// this means someone is concurrently removing these children and we will have
/// to remove them one by one.
Coordination::Responses responses;
if (tryMulti(ops, responses) != Coordination::Error::ZOK)
for (const std::string & child : batch)
tryRemove(child);
if (tryMulti(ops, responses) == Coordination::Error::ZOK)
continue;
removed_as_expected = false;
std::vector<zkutil::ZooKeeper::FutureRemove> futures;
futures.reserve(batch.size());
for (const std::string & child : batch)
futures.push_back(asyncTryRemoveNoThrow(child, -1));
for (size_t i = 0; i < batch.size(); ++i)
{
auto res = futures[i].get();
if (res.error == Coordination::Error::ZOK)
continue;
if (res.error == Coordination::Error::ZNONODE)
continue;
if (res.error == Coordination::Error::ZNOTEMPTY)
{
if (probably_flat)
{
/// It actually has children, let's remove them
tryRemoveChildrenRecursive(batch[i]);
tryRemove(batch[i]);
}
continue;
}
throw KeeperException(res.error, batch[i]);
}
}
return removed_as_expected;
}
void ZooKeeper::removeRecursive(const std::string & path)

View File

@ -225,7 +225,10 @@ public:
/// If keep_child_node is not empty, this method will not remove path/keep_child_node (but will remove its subtree).
/// It can be useful to keep some child node as a flag which indicates that path is currently removing.
void removeChildrenRecursive(const std::string & path, const String & keep_child_node = {});
void tryRemoveChildrenRecursive(const std::string & path, const String & keep_child_node = {});
/// If probably_flat is true, this method will optimistically try to remove children non-recursive
/// and will fall back to recursive removal if it gets ZNOTEMPTY for some child.
/// Returns true if no kind of fallback happened.
bool tryRemoveChildrenRecursive(const std::string & path, bool probably_flat = false, const String & keep_child_node = {});
/// Remove all children nodes (non recursive).
void removeChildren(const std::string & path);

View File

@ -846,7 +846,7 @@ void ZooKeeper::receiveEvent()
void ZooKeeper::finalize(bool error_send, bool error_receive, const String & reason)
{
/// If some thread (send/receive) already finalizing session don't try to do it
bool already_started = finalization_started.exchange(true);
bool already_started = finalization_started.test_and_set();
LOG_TEST(log, "Finalizing session {}: finalization_started={}, queue_finished={}, reason={}",
session_id, already_started, requests_queue.isFinished(), reason);

View File

@ -209,7 +209,7 @@ private:
std::atomic<XID> next_xid {1};
/// Mark session finalization start. Used to avoid simultaneous
/// finalization from different threads. One-shot flag.
std::atomic<bool> finalization_started {false};
std::atomic_flag finalization_started;
using clock = std::chrono::steady_clock;

View File

@ -146,6 +146,8 @@ TEST(DateLUTTest, TimeValuesInMiddleOfRange)
EXPECT_EQ(lut.addYears(time, 10), 1884270011 /*time_t*/);
EXPECT_EQ(lut.timeToString(time), "2019-09-16 19:20:11" /*std::string*/);
EXPECT_EQ(lut.dateToString(time), "2019-09-16" /*std::string*/);
EXPECT_EQ(lut.toLastDayOfMonth(time), 1569790800 /*time_t*/);
EXPECT_EQ(lut.toLastDayNumOfMonth(time), DayNum(18169) /*DayNum*/);
}
@ -206,6 +208,8 @@ TEST(DateLUTTest, TimeValuesAtLeftBoderOfRange)
EXPECT_EQ(lut.addYears(time, 10), 315532800 /*time_t*/);
EXPECT_EQ(lut.timeToString(time), "1970-01-01 00:00:00" /*std::string*/);
EXPECT_EQ(lut.dateToString(time), "1970-01-01" /*std::string*/);
EXPECT_EQ(lut.toLastDayOfMonth(time), 2592000 /*time_t*/);
EXPECT_EQ(lut.toLastDayNumOfMonth(time), DayNum(30) /*DayNum*/);
}
TEST(DateLUTTest, TimeValuesAtRightBoderOfRangeOfOldLUT)
@ -225,7 +229,7 @@ TEST(DateLUTTest, TimeValuesAtRightBoderOfRangeOfOldLUT)
EXPECT_EQ(lut.toFirstDayOfWeek(time), 4293820800 /*time_t*/);
EXPECT_EQ(lut.toFirstDayNumOfWeek(time), DayNum(49697));
EXPECT_EQ(lut.toFirstDayOfMonth(time), 4291747200 /*time_t*/); // 2016-01-01
EXPECT_EQ(lut.toFirstDayOfMonth(time), 4291747200 /*time_t*/); // 2106-01-01
EXPECT_EQ(lut.toFirstDayNumOfMonth(time), DayNum(49673));
EXPECT_EQ(lut.toFirstDayNumOfQuarter(time), DayNum(49673) /*DayNum*/);
EXPECT_EQ(lut.toFirstDayOfQuarter(time), 4291747200 /*time_t*/);
@ -268,6 +272,8 @@ TEST(DateLUTTest, TimeValuesAtRightBoderOfRangeOfOldLUT)
EXPECT_EQ(lut.timeToString(time), "2106-01-31 01:17:53" /*std::string*/);
EXPECT_EQ(lut.dateToString(time), "2106-01-31" /*std::string*/);
EXPECT_EQ(lut.toLastDayOfMonth(time), 4294339200 /*time_t*/); // 2106-01-01
EXPECT_EQ(lut.toLastDayNumOfMonth(time), DayNum(49703));
}

View File

@ -222,8 +222,8 @@ public:
}
/// Check for duplicated changelog ids
if (logs.count(record.header.index) != 0)
std::erase_if(logs, [record] (const auto & item) { return item.first >= record.header.index; });
if (logs.contains(record.header.index))
std::erase_if(logs, [&record] (const auto & item) { return item.first >= record.header.index; });
result.total_entries_read_from_log += 1;
@ -659,6 +659,7 @@ LogEntryPtr Changelog::getLatestConfigChange() const
nuraft::ptr<nuraft::buffer> Changelog::serializeEntriesToBuffer(uint64_t index, int32_t count)
{
std::vector<nuraft::ptr<nuraft::buffer>> returned_logs;
returned_logs.reserve(count);
uint64_t size_total = 0;
for (uint64_t i = index; i < index + count; ++i)
@ -669,7 +670,7 @@ nuraft::ptr<nuraft::buffer> Changelog::serializeEntriesToBuffer(uint64_t index,
nuraft::ptr<nuraft::buffer> buf = entry->second->serialize();
size_total += buf->size();
returned_logs.push_back(buf);
returned_logs.push_back(std::move(buf));
}
nuraft::ptr<nuraft::buffer> buf_out = nuraft::buffer::alloc(sizeof(int32_t) + count * sizeof(int32_t) + size_total);
@ -678,9 +679,8 @@ nuraft::ptr<nuraft::buffer> Changelog::serializeEntriesToBuffer(uint64_t index,
for (auto & entry : returned_logs)
{
nuraft::ptr<nuraft::buffer> & bb = entry;
buf_out->put(static_cast<int32_t>(bb->size()));
buf_out->put(*bb);
buf_out->put(static_cast<int32_t>(entry->size()));
buf_out->put(*entry);
}
return buf_out;
}
@ -699,7 +699,7 @@ void Changelog::applyEntriesFromBuffer(uint64_t index, nuraft::buffer & buffer)
buffer.get(buf_local);
LogEntryPtr log_entry = nuraft::log_entry::deserialize(*buf_local);
if (i == 0 && logs.count(cur_index))
if (i == 0 && logs.contains(cur_index))
writeAt(cur_index, log_entry);
else
appendEntry(cur_index, log_entry);

View File

@ -121,7 +121,7 @@ void KeeperDispatcher::requestThread()
current_batch.clear();
}
prev_batch = current_batch;
prev_batch = std::move(current_batch);
prev_result = result;
}

View File

@ -43,7 +43,7 @@ namespace
void writeNode(const KeeperStorage::Node & node, SnapshotVersion version, WriteBuffer & out)
{
writeBinary(node.data, out);
writeBinary(node.getData(), out);
/// Serialize ACL
writeBinary(node.acl_id, out);
@ -71,7 +71,9 @@ namespace
void readNode(KeeperStorage::Node & node, ReadBuffer & in, SnapshotVersion version, ACLMap & acl_map)
{
readBinary(node.data, in);
String new_data;
readBinary(new_data, in);
node.setData(std::move(new_data));
if (version >= SnapshotVersion::V1)
{
@ -281,7 +283,7 @@ void KeeperStorageSnapshot::deserialize(SnapshotDeserializationResult & deserial
if (itr.key != "/")
{
auto parent_path = parentPath(itr.key);
storage.container.updateValue(parent_path, [path = itr.key] (KeeperStorage::Node & value) { value.children.insert(getBaseName(path)); });
storage.container.updateValue(parent_path, [path = itr.key] (KeeperStorage::Node & value) { value.addChild(getBaseName(path)); });
}
}

View File

@ -259,22 +259,9 @@ void KeeperStateMachine::save_logical_snp_obj(
{
LOG_DEBUG(log, "Saving snapshot {} obj_id {}", s.get_last_log_idx(), obj_id);
nuraft::ptr<nuraft::buffer> cloned_buffer;
nuraft::ptr<nuraft::snapshot> cloned_meta;
if (obj_id == 0) /// Fake snapshot required by NuRaft at startup
{
std::lock_guard lock(storage_and_responses_lock);
KeeperStorageSnapshot snapshot(storage.get(), s.get_last_log_idx(), getClusterConfig());
cloned_buffer = snapshot_manager.serializeSnapshotToBuffer(snapshot);
}
else
{
/// copy snapshot into memory
}
/// copy snapshot meta into memory
nuraft::ptr<nuraft::buffer> snp_buf = s.serialize();
cloned_meta = nuraft::snapshot::deserialize(*snp_buf);
nuraft::ptr<nuraft::snapshot> cloned_meta = nuraft::snapshot::deserialize(*snp_buf);
try
{
@ -332,31 +319,22 @@ int KeeperStateMachine::read_logical_snp_obj(
{
LOG_DEBUG(log, "Reading snapshot {} obj_id {}", s.get_last_log_idx(), obj_id);
if (obj_id == 0) /// Fake snapshot required by NuRaft at startup
std::lock_guard lock(snapshots_lock);
/// Our snapshot is not equal to required. Maybe we still creating it in the background.
/// Let's wait and NuRaft will retry this call.
if (s.get_last_log_idx() != latest_snapshot_meta->get_last_log_idx())
{
data_out = nuraft::buffer::alloc(sizeof(int32_t));
nuraft::buffer_serializer bs(data_out);
bs.put_i32(0);
is_last_obj = false;
LOG_WARNING(log, "Required to apply snapshot with last log index {}, but our last log index is {}. Will ignore this one and retry",
s.get_last_log_idx(), latest_snapshot_meta->get_last_log_idx());
return -1;
}
else
if (bufferFromFile(log, latest_snapshot_path, data_out))
{
std::lock_guard lock(snapshots_lock);
/// Our snapshot is not equal to required. Maybe we still creating it in the background.
/// Let's wait and NuRaft will retry this call.
if (s.get_last_log_idx() != latest_snapshot_meta->get_last_log_idx())
{
LOG_WARNING(log, "Required to apply snapshot with last log index {}, but our last log index is {}. Will ignore this one and retry",
s.get_last_log_idx(), latest_snapshot_meta->get_last_log_idx());
return -1;
}
if (bufferFromFile(log, latest_snapshot_path, data_out))
{
LOG_WARNING(log, "Error reading snapshot {} from {}", s.get_last_log_idx(), latest_snapshot_path);
return -1;
}
is_last_obj = true;
LOG_WARNING(log, "Error reading snapshot {} from {}", s.get_last_log_idx(), latest_snapshot_path);
return -1;
}
is_last_obj = true;
return 1;
}

View File

@ -84,7 +84,7 @@ public:
bool shouldStartAsFollower() const
{
std::lock_guard lock(configuration_wrapper_mutex);
return configuration_wrapper.servers_start_as_followers.count(my_server_id);
return configuration_wrapper.servers_start_as_followers.contains(my_server_id);
}
bool isSecure() const

View File

@ -24,7 +24,10 @@ namespace ErrorCodes
extern const int BAD_ARGUMENTS;
}
static String base64Encode(const String & decoded)
namespace
{
String base64Encode(const String & decoded)
{
std::ostringstream ostr; // STYLE_CHECK_ALLOW_STD_STRING_STREAM
ostr.exceptions(std::ios::failbit);
@ -35,7 +38,7 @@ static String base64Encode(const String & decoded)
return ostr.str();
}
static String getSHA1(const String & userdata)
String getSHA1(const String & userdata)
{
Poco::SHA1Engine engine;
engine.update(userdata);
@ -43,14 +46,14 @@ static String getSHA1(const String & userdata)
return String{digest_id.begin(), digest_id.end()};
}
static String generateDigest(const String & userdata)
String generateDigest(const String & userdata)
{
std::vector<String> user_password;
boost::split(user_password, userdata, [](char c) { return c == ':'; });
return user_password[0] + ":" + base64Encode(getSHA1(userdata));
}
static bool checkACL(int32_t permission, const Coordination::ACLs & node_acls, const std::vector<KeeperStorage::AuthID> & session_auths)
bool checkACL(int32_t permission, const Coordination::ACLs & node_acls, const std::vector<KeeperStorage::AuthID> & session_auths)
{
if (node_acls.empty())
return true;
@ -77,7 +80,7 @@ static bool checkACL(int32_t permission, const Coordination::ACLs & node_acls, c
return false;
}
static bool fixupACL(
bool fixupACL(
const std::vector<Coordination::ACL> & request_acls,
const std::vector<KeeperStorage::AuthID> & current_ids,
std::vector<Coordination::ACL> & result_acls)
@ -119,7 +122,7 @@ static bool fixupACL(
return valid_found;
}
static KeeperStorage::ResponsesForSessions processWatchesImpl(const String & path, KeeperStorage::Watches & watches, KeeperStorage::Watches & list_watches, Coordination::Event event_type)
KeeperStorage::ResponsesForSessions processWatchesImpl(const String & path, KeeperStorage::Watches & watches, KeeperStorage::Watches & list_watches, Coordination::Event event_type)
{
KeeperStorage::ResponsesForSessions result;
auto it = watches.find(path);
@ -174,6 +177,25 @@ static KeeperStorage::ResponsesForSessions processWatchesImpl(const String & pat
}
return result;
}
}
void KeeperStorage::Node::setData(String new_data)
{
size_bytes = size_bytes - data.size() + new_data.size();
data = std::move(new_data);
}
void KeeperStorage::Node::addChild(StringRef child_path)
{
size_bytes += sizeof child_path;
children.insert(child_path);
}
void KeeperStorage::Node::removeChild(StringRef child_path)
{
size_bytes -= sizeof child_path;
children.erase(child_path);
}
KeeperStorage::KeeperStorage(int64_t tick_time_ms, const String & superdigest_)
: session_expiry_queue(tick_time_ms)
@ -314,8 +336,8 @@ struct KeeperStorageCreateRequestProcessor final : public KeeperStorageRequestPr
created_node.stat.numChildren = 0;
created_node.stat.dataLength = request.data.length();
created_node.stat.ephemeralOwner = request.is_ephemeral ? session_id : 0;
created_node.data = request.data;
created_node.is_sequental = request.is_sequential;
created_node.setData(std::move(request.data));
auto [map_key, _] = container.insert(path_created, created_node);
/// Take child path from key owned by map.
@ -327,8 +349,7 @@ struct KeeperStorageCreateRequestProcessor final : public KeeperStorageRequestPr
container.updateValue(parent_path, [child_path, zxid, &prev_parent_zxid,
parent_cversion, &prev_parent_cversion] (KeeperStorage::Node & parent)
{
parent.children.insert(child_path);
parent.size_bytes += child_path.size;
parent.addChild(child_path);
prev_parent_cversion = parent.stat.cversion;
prev_parent_zxid = parent.stat.pzxid;
@ -363,8 +384,7 @@ struct KeeperStorageCreateRequestProcessor final : public KeeperStorageRequestPr
--undo_parent.seq_num;
undo_parent.stat.cversion = prev_parent_cversion;
undo_parent.stat.pzxid = prev_parent_zxid;
undo_parent.children.erase(child_path);
undo_parent.size_bytes -= child_path.size;
undo_parent.removeChild(child_path);
});
storage.container.erase(path_created);
@ -409,7 +429,7 @@ struct KeeperStorageGetRequestProcessor final : public KeeperStorageRequestProce
else
{
response.stat = it->value.stat;
response.data = it->value.data;
response.data = it->value.getData();
response.error = Coordination::Error::ZOK;
}
@ -498,8 +518,7 @@ struct KeeperStorageRemoveRequestProcessor final : public KeeperStorageRequestPr
{
--parent.stat.numChildren;
++parent.stat.cversion;
parent.children.erase(child_basename);
parent.size_bytes -= child_basename.size;
parent.removeChild(child_basename);
});
response.error = Coordination::Error::ZOK;
@ -520,8 +539,7 @@ struct KeeperStorageRemoveRequestProcessor final : public KeeperStorageRequestPr
{
++parent.stat.numChildren;
--parent.stat.cversion;
parent.children.insert(child_name);
parent.size_bytes += child_name.size;
parent.addChild(child_name);
});
};
}
@ -598,14 +616,13 @@ struct KeeperStorageSetRequestProcessor final : public KeeperStorageRequestProce
auto prev_node = it->value;
auto itr = container.updateValue(request.path, [zxid, request, time] (KeeperStorage::Node & value)
auto itr = container.updateValue(request.path, [zxid, request, time] (KeeperStorage::Node & value) mutable
{
value.stat.version++;
value.stat.mzxid = zxid;
value.stat.mtime = time;
value.stat.dataLength = request.data.length();
value.size_bytes = value.size_bytes + request.data.size() - value.data.size();
value.data = request.data;
value.setData(std::move(request.data));
});
container.updateValue(parentPath(request.path), [] (KeeperStorage::Node & parent)
@ -675,9 +692,10 @@ struct KeeperStorageListRequestProcessor final : public KeeperStorageRequestProc
if (path_prefix.empty())
throw DB::Exception("Logical error: path cannot be empty", ErrorCodes::LOGICAL_ERROR);
response.names.reserve(it->value.children.size());
const auto & children = it->value.getChildren();
response.names.reserve(children.size());
for (const auto child : it->value.children)
for (const auto child : children)
response.names.push_back(child.toString());
response.stat = it->value.stat;
@ -856,24 +874,23 @@ struct KeeperStorageMultiRequestProcessor final : public KeeperStorageRequestPro
for (const auto & sub_request : request.requests)
{
auto sub_zk_request = std::dynamic_pointer_cast<Coordination::ZooKeeperRequest>(sub_request);
if (sub_zk_request->getOpNum() == Coordination::OpNum::Create)
switch (sub_zk_request->getOpNum())
{
concrete_requests.push_back(std::make_shared<KeeperStorageCreateRequestProcessor>(sub_zk_request));
case Coordination::OpNum::Create:
concrete_requests.push_back(std::make_shared<KeeperStorageCreateRequestProcessor>(sub_zk_request));
break;
case Coordination::OpNum::Remove:
concrete_requests.push_back(std::make_shared<KeeperStorageRemoveRequestProcessor>(sub_zk_request));
break;
case Coordination::OpNum::Set:
concrete_requests.push_back(std::make_shared<KeeperStorageSetRequestProcessor>(sub_zk_request));
break;
case Coordination::OpNum::Check:
concrete_requests.push_back(std::make_shared<KeeperStorageCheckRequestProcessor>(sub_zk_request));
break;
default:
throw DB::Exception(ErrorCodes::BAD_ARGUMENTS, "Illegal command as part of multi ZooKeeper request {}", sub_zk_request->getOpNum());
}
else if (sub_zk_request->getOpNum() == Coordination::OpNum::Remove)
{
concrete_requests.push_back(std::make_shared<KeeperStorageRemoveRequestProcessor>(sub_zk_request));
}
else if (sub_zk_request->getOpNum() == Coordination::OpNum::Set)
{
concrete_requests.push_back(std::make_shared<KeeperStorageSetRequestProcessor>(sub_zk_request));
}
else if (sub_zk_request->getOpNum() == Coordination::OpNum::Check)
{
concrete_requests.push_back(std::make_shared<KeeperStorageCheckRequestProcessor>(sub_zk_request));
}
else
throw DB::Exception(ErrorCodes::BAD_ARGUMENTS, "Illegal command as part of multi ZooKeeper request {}", sub_zk_request->getOpNum());
}
}
@ -1092,8 +1109,7 @@ KeeperStorage::ResponsesForSessions KeeperStorage::processRequest(const Coordina
--parent.stat.numChildren;
++parent.stat.cversion;
auto base_name = getBaseName(ephemeral_path);
parent.children.erase(base_name);
parent.size_bytes -= base_name.size;
parent.removeChild(base_name);
});
container.erase(ephemeral_path);

View File

@ -32,28 +32,38 @@ public:
struct Node
{
String data;
uint64_t acl_id = 0; /// 0 -- no ACL by default
bool is_sequental = false;
Coordination::Stat stat{};
int32_t seq_num = 0;
ChildrenSet children{};
uint64_t size_bytes; // save size to avoid calculate every time
Node()
{
size_bytes = sizeof(size_bytes);
size_bytes += data.size();
size_bytes += sizeof(acl_id);
size_bytes += sizeof(is_sequental);
size_bytes += sizeof(stat);
size_bytes += sizeof(seq_num);
}
Node() : size_bytes(sizeof(Node)) { }
/// Object memory size
uint64_t sizeInBytes() const
{
return size_bytes;
}
void setData(String new_data);
const auto & getData() const noexcept
{
return data;
}
void addChild(StringRef child_path);
void removeChild(StringRef child_path);
const auto & getChildren() const noexcept
{
return children;
}
private:
String data;
ChildrenSet children{};
};
struct ResponseForSession
@ -104,7 +114,7 @@ public:
/// Mapping session_id -> set of ephemeral nodes paths
Ephemerals ephemerals;
/// Mapping sessuib_id -> set of watched nodes paths
/// Mapping session_id -> set of watched nodes paths
SessionAndWatcher sessions_and_watchers;
/// Expiration queue for session, allows to get dead sessions at some point of time
SessionExpiryQueue session_expiry_queue;

View File

@ -80,7 +80,7 @@ private:
approximate_data_size += value_size;
if (!snapshot_mode)
{
approximate_data_size += key_size;
approximate_data_size -= key_size;
approximate_data_size -= old_value_size;
}
}
@ -132,7 +132,6 @@ public:
if (!it)
{
ListElem elem{copyStringInArena(arena, key), value, current_version};
auto itr = list.insert(list.end(), std::move(elem));
bool inserted;
@ -228,7 +227,7 @@ public:
/// We in snapshot mode but updating some node which is already more
/// fresh than snapshot distance. So it will not participate in
/// snapshot and we don't need to copy it.
if (snapshot_mode && list_itr->version <= snapshot_up_to_version)
if (list_itr->version <= snapshot_up_to_version)
{
auto elem_copy = *(list_itr);
list_itr->active_in_map = false;

View File

@ -98,7 +98,9 @@ int64_t deserializeStorageData(KeeperStorage & storage, ReadBuffer & in, Poco::L
while (path != "/")
{
KeeperStorage::Node node{};
Coordination::read(node.data, in);
String data;
Coordination::read(data, in);
node.setData(std::move(data));
Coordination::read(node.acl_id, in);
/// Deserialize stat
@ -117,7 +119,7 @@ int64_t deserializeStorageData(KeeperStorage & storage, ReadBuffer & in, Poco::L
Coordination::read(node.stat.pzxid, in);
if (!path.empty())
{
node.stat.dataLength = node.data.length();
node.stat.dataLength = node.getData().length();
node.seq_num = node.stat.cversion;
storage.container.insertOrReplace(path, node);
@ -137,7 +139,7 @@ int64_t deserializeStorageData(KeeperStorage & storage, ReadBuffer & in, Poco::L
if (itr.key != "/")
{
auto parent_path = parentPath(itr.key);
storage.container.updateValue(parent_path, [path = itr.key] (KeeperStorage::Node & value) { value.children.insert(getBaseName(path)); value.stat.numChildren++; });
storage.container.updateValue(parent_path, [path = itr.key] (KeeperStorage::Node & value) { value.addChild(getBaseName(path)); value.stat.numChildren++; });
}
}

View File

@ -946,6 +946,8 @@ TEST_P(CoordinationTest, SnapshotableHashMapDataSize)
EXPECT_EQ(hello.getApproximateDataSize(), 9);
hello.updateValue("hello", [](IntNode & value) { value = 2; });
EXPECT_EQ(hello.getApproximateDataSize(), 9);
hello.insertOrReplace("hello", 3);
EXPECT_EQ(hello.getApproximateDataSize(), 9);
hello.erase("hello");
EXPECT_EQ(hello.getApproximateDataSize(), 0);
@ -958,6 +960,8 @@ TEST_P(CoordinationTest, SnapshotableHashMapDataSize)
EXPECT_EQ(hello.getApproximateDataSize(), 9);
hello.updateValue("hello", [](IntNode & value) { value = 2; });
EXPECT_EQ(hello.getApproximateDataSize(), 18);
hello.insertOrReplace("hello", 1);
EXPECT_EQ(hello.getApproximateDataSize(), 27);
hello.clearOutdatedNodes();
EXPECT_EQ(hello.getApproximateDataSize(), 9);
@ -972,31 +976,31 @@ TEST_P(CoordinationTest, SnapshotableHashMapDataSize)
using Node = DB::KeeperStorage::Node;
DB::SnapshotableHashTable<Node> world;
Node n1;
n1.data = "1234";
n1.setData("1234");
Node n2;
n2.data = "123456";
n2.children.insert("");
n2.setData("123456");
n2.addChild("");
world.disableSnapshotMode();
world.insert("world", n1);
EXPECT_EQ(world.getApproximateDataSize(), 98);
EXPECT_EQ(world.getApproximateDataSize(), 177);
world.updateValue("world", [&](Node & value) { value = n2; });
EXPECT_EQ(world.getApproximateDataSize(), 98);
EXPECT_EQ(world.getApproximateDataSize(), 195);
world.erase("world");
EXPECT_EQ(world.getApproximateDataSize(), 0);
world.enableSnapshotMode(100000);
world.insert("world", n1);
EXPECT_EQ(world.getApproximateDataSize(), 98);
EXPECT_EQ(world.getApproximateDataSize(), 177);
world.updateValue("world", [&](Node & value) { value = n2; });
EXPECT_EQ(world.getApproximateDataSize(), 196);
EXPECT_EQ(world.getApproximateDataSize(), 372);
world.clearOutdatedNodes();
EXPECT_EQ(world.getApproximateDataSize(), 98);
EXPECT_EQ(world.getApproximateDataSize(), 195);
world.erase("world");
EXPECT_EQ(world.getApproximateDataSize(), 98);
EXPECT_EQ(world.getApproximateDataSize(), 195);
world.clear();
EXPECT_EQ(world.getApproximateDataSize(), 0);
@ -1006,7 +1010,7 @@ void addNode(DB::KeeperStorage & storage, const std::string & path, const std::s
{
using Node = DB::KeeperStorage::Node;
Node node{};
node.data = data;
node.setData(data);
node.stat.ephemeralOwner = ephemeral_owner;
storage.container.insertOrReplace(path, node);
}
@ -1044,13 +1048,13 @@ TEST_P(CoordinationTest, TestStorageSnapshotSimple)
auto [restored_storage, snapshot_meta, _] = manager.deserializeSnapshotFromBuffer(debuf);
EXPECT_EQ(restored_storage->container.size(), 3);
EXPECT_EQ(restored_storage->container.getValue("/").children.size(), 1);
EXPECT_EQ(restored_storage->container.getValue("/hello").children.size(), 1);
EXPECT_EQ(restored_storage->container.getValue("/hello/somepath").children.size(), 0);
EXPECT_EQ(restored_storage->container.getValue("/").getChildren().size(), 1);
EXPECT_EQ(restored_storage->container.getValue("/hello").getChildren().size(), 1);
EXPECT_EQ(restored_storage->container.getValue("/hello/somepath").getChildren().size(), 0);
EXPECT_EQ(restored_storage->container.getValue("/").data, "");
EXPECT_EQ(restored_storage->container.getValue("/hello").data, "world");
EXPECT_EQ(restored_storage->container.getValue("/hello/somepath").data, "somedata");
EXPECT_EQ(restored_storage->container.getValue("/").getData(), "");
EXPECT_EQ(restored_storage->container.getValue("/hello").getData(), "world");
EXPECT_EQ(restored_storage->container.getValue("/hello/somepath").getData(), "somedata");
EXPECT_EQ(restored_storage->session_id_counter, 7);
EXPECT_EQ(restored_storage->zxid, 2);
EXPECT_EQ(restored_storage->ephemerals.size(), 2);
@ -1095,7 +1099,7 @@ TEST_P(CoordinationTest, TestStorageSnapshotMoreWrites)
EXPECT_EQ(restored_storage->container.size(), 51);
for (size_t i = 0; i < 50; ++i)
{
EXPECT_EQ(restored_storage->container.getValue("/hello_" + std::to_string(i)).data, "world_" + std::to_string(i));
EXPECT_EQ(restored_storage->container.getValue("/hello_" + std::to_string(i)).getData(), "world_" + std::to_string(i));
}
}
@ -1135,7 +1139,7 @@ TEST_P(CoordinationTest, TestStorageSnapshotManySnapshots)
for (size_t i = 0; i < 250; ++i)
{
EXPECT_EQ(restored_storage->container.getValue("/hello_" + std::to_string(i)).data, "world_" + std::to_string(i));
EXPECT_EQ(restored_storage->container.getValue("/hello_" + std::to_string(i)).getData(), "world_" + std::to_string(i));
}
}
@ -1158,7 +1162,7 @@ TEST_P(CoordinationTest, TestStorageSnapshotMode)
}
for (size_t i = 0; i < 50; ++i)
{
EXPECT_EQ(storage.container.getValue("/hello_" + std::to_string(i)).data, "wlrd_" + std::to_string(i));
EXPECT_EQ(storage.container.getValue("/hello_" + std::to_string(i)).getData(), "wlrd_" + std::to_string(i));
}
for (size_t i = 0; i < 50; ++i)
{
@ -1178,7 +1182,7 @@ TEST_P(CoordinationTest, TestStorageSnapshotMode)
for (size_t i = 0; i < 50; ++i)
{
if (i % 2 != 0)
EXPECT_EQ(storage.container.getValue("/hello_" + std::to_string(i)).data, "wlrd_" + std::to_string(i));
EXPECT_EQ(storage.container.getValue("/hello_" + std::to_string(i)).getData(), "wlrd_" + std::to_string(i));
else
EXPECT_FALSE(storage.container.contains("/hello_" + std::to_string(i)));
}
@ -1187,7 +1191,7 @@ TEST_P(CoordinationTest, TestStorageSnapshotMode)
for (size_t i = 0; i < 50; ++i)
{
EXPECT_EQ(restored_storage->container.getValue("/hello_" + std::to_string(i)).data, "world_" + std::to_string(i));
EXPECT_EQ(restored_storage->container.getValue("/hello_" + std::to_string(i)).getData(), "world_" + std::to_string(i));
}
}
@ -1310,7 +1314,7 @@ void testLogAndStateMachine(Coordination::CoordinationSettingsPtr settings, uint
for (size_t i = 1; i < total_logs + 1; ++i)
{
auto path = "/hello_" + std::to_string(i);
EXPECT_EQ(source_storage.container.getValue(path).data, restored_storage.container.getValue(path).data);
EXPECT_EQ(source_storage.container.getValue(path).getData(), restored_storage.container.getValue(path).getData());
}
}
@ -1585,13 +1589,13 @@ TEST_P(CoordinationTest, TestStorageSnapshotDifferentCompressions)
auto [restored_storage, snapshot_meta, _] = new_manager.deserializeSnapshotFromBuffer(debuf);
EXPECT_EQ(restored_storage->container.size(), 3);
EXPECT_EQ(restored_storage->container.getValue("/").children.size(), 1);
EXPECT_EQ(restored_storage->container.getValue("/hello").children.size(), 1);
EXPECT_EQ(restored_storage->container.getValue("/hello/somepath").children.size(), 0);
EXPECT_EQ(restored_storage->container.getValue("/").getChildren().size(), 1);
EXPECT_EQ(restored_storage->container.getValue("/hello").getChildren().size(), 1);
EXPECT_EQ(restored_storage->container.getValue("/hello/somepath").getChildren().size(), 0);
EXPECT_EQ(restored_storage->container.getValue("/").data, "");
EXPECT_EQ(restored_storage->container.getValue("/hello").data, "world");
EXPECT_EQ(restored_storage->container.getValue("/hello/somepath").data, "somedata");
EXPECT_EQ(restored_storage->container.getValue("/").getData(), "");
EXPECT_EQ(restored_storage->container.getValue("/hello").getData(), "world");
EXPECT_EQ(restored_storage->container.getValue("/hello/somepath").getData(), "somedata");
EXPECT_EQ(restored_storage->session_id_counter, 7);
EXPECT_EQ(restored_storage->zxid, 2);
EXPECT_EQ(restored_storage->ephemerals.size(), 2);

View File

@ -46,7 +46,8 @@ static ReturnType checkColumnStructure(const ColumnWithTypeAndName & actual, con
return onError<ReturnType>("Block structure mismatch in " + std::string(context_description) + " stream: different names of columns:\n"
+ actual.dumpStructure() + "\n" + expected.dumpStructure(), code);
if (!actual.type->equals(*expected.type))
if ((actual.type && !expected.type) || (!actual.type && expected.type)
|| (actual.type && expected.type && !actual.type->equals(*expected.type)))
return onError<ReturnType>("Block structure mismatch in " + std::string(context_description) + " stream: different types:\n"
+ actual.dumpStructure() + "\n" + expected.dumpStructure(), code);

View File

@ -195,6 +195,7 @@ class IColumn;
M(UInt64, log_queries_cut_to_length, 100000, "If query length is greater than specified threshold (in bytes), then cut query when writing to query log. Also limit length of printed query in ordinary text log.", 0) \
M(Float, log_queries_probability, 1., "Log queries with the specified probabality.", 0) \
\
M(Bool, log_processors_profiles, false, "Log Processors profile events.", 0) \
M(DistributedProductMode, distributed_product_mode, DistributedProductMode::DENY, "How are distributed subqueries performed inside IN or JOIN sections?", IMPORTANT) \
\
M(UInt64, max_concurrent_queries_for_all_users, 0, "The maximum number of concurrent requests for all users.", 0) \
@ -637,10 +638,13 @@ class IColumn;
M(UInt64, input_format_msgpack_number_of_columns, 0, "The number of columns in inserted MsgPack data. Used for automatic schema inference from data.", 0) \
M(MsgPackUUIDRepresentation, output_format_msgpack_uuid_representation, FormatSettings::MsgPackUUIDRepresentation::EXT, "The way how to output UUID in MsgPack format.", 0) \
M(UInt64, input_format_max_rows_to_read_for_schema_inference, 100, "The maximum rows of data to read for automatic schema inference", 0) \
M(Bool, input_format_json_read_bools_as_numbers, true, "Allow to parse bools as numbers in JSON input formats", 0) \
\
M(DateTimeInputFormat, date_time_input_format, FormatSettings::DateTimeInputFormat::Basic, "Method to read DateTime from text input formats. Possible values: 'basic', 'best_effort' and 'best_effort_us'.", 0) \
M(DateTimeOutputFormat, date_time_output_format, FormatSettings::DateTimeOutputFormat::Simple, "Method to write DateTime to text output. Possible values: 'simple', 'iso', 'unix_timestamp'.", 0) \
\
M(Bool, input_format_ipv4_default_on_conversion_error, false, "Deserialization of IPv4 will use default values instead of throwing exception on conversion error.", 0) \
M(Bool, input_format_ipv6_default_on_conversion_error, false, "Deserialization of IPV6 will use default values instead of throwing exception on conversion error.", 0) \
M(String, bool_true_representation, "true", "Text to represent bool value in TSV/CSV formats.", 0) \
M(String, bool_false_representation, "false", "Text to represent bool value in TSV/CSV formats.", 0) \
\

View File

@ -15,10 +15,6 @@
namespace DB
{
namespace ErrorCodes
{
extern const int LOGICAL_ERROR;
}
/** Cursor allows to compare rows in different blocks (and parts).
* Cursor moves inside single block.
@ -61,25 +57,21 @@ struct SortCursorImpl
reset(block, perm);
}
SortCursorImpl(const Columns & columns, const SortDescription & desc_, size_t order_ = 0, IColumn::Permutation * perm = nullptr)
SortCursorImpl(
const Block & header,
const Columns & columns,
const SortDescription & desc_,
size_t order_ = 0,
IColumn::Permutation * perm = nullptr)
: desc(desc_), sort_columns_size(desc.size()), order(order_), need_collation(desc.size())
{
for (auto & column_desc : desc)
{
if (!column_desc.column_name.empty())
throw Exception("SortDescription should contain column position if SortCursor was used without header.",
ErrorCodes::LOGICAL_ERROR);
}
reset(columns, {}, perm);
reset(columns, header, perm);
}
bool empty() const { return rows == 0; }
/// Set the cursor to the beginning of the new block.
void reset(const Block & block, IColumn::Permutation * perm = nullptr)
{
reset(block.getColumns(), block, perm);
}
void reset(const Block & block, IColumn::Permutation * perm = nullptr) { reset(block.getColumns(), block, perm); }
/// Set the cursor to the beginning of the new block.
void reset(const Columns & columns, const Block & block, IColumn::Permutation * perm = nullptr)
@ -95,9 +87,7 @@ struct SortCursorImpl
for (size_t j = 0, size = desc.size(); j < size; ++j)
{
auto & column_desc = desc[j];
size_t column_number = !column_desc.column_name.empty()
? block.getPositionByName(column_desc.column_name)
: column_desc.column_number;
size_t column_number = block.getPositionByName(column_desc.column_name);
sort_columns.push_back(columns[column_number].get());
need_collation[j] = desc[j].collator != nullptr && sort_columns.back()->isCollationSupported();
@ -367,12 +357,12 @@ private:
};
template <typename TLeftColumns, typename TRightColumns>
bool less(const TLeftColumns & lhs, const TRightColumns & rhs, size_t i, size_t j, const SortDescription & descr)
bool less(const TLeftColumns & lhs, const TRightColumns & rhs, size_t i, size_t j, const SortDescriptionWithPositions & descr)
{
for (const auto & elem : descr)
{
size_t ind = elem.column_number;
int res = elem.direction * lhs[ind]->compareAt(i, j, *rhs[ind], elem.nulls_direction);
int res = elem.base.direction * lhs[ind]->compareAt(i, j, *rhs[ind], elem.base.nulls_direction);
if (res < 0)
return true;
else if (res > 0)

View File

@ -1,12 +1,12 @@
#include <Core/SortDescription.h>
#include <Core/Block.h>
#include <Core/SortDescription.h>
#include <IO/Operators.h>
#include <Common/JSONBuilder.h>
namespace DB
{
void dumpSortDescription(const SortDescription & description, const Block & header, WriteBuffer & out)
void dumpSortDescription(const SortDescription & description, WriteBuffer & out)
{
bool first = true;
@ -16,17 +16,7 @@ void dumpSortDescription(const SortDescription & description, const Block & head
out << ", ";
first = false;
if (!desc.column_name.empty())
out << desc.column_name;
else
{
if (desc.column_number < header.columns())
out << header.getByPosition(desc.column_number).name;
else
out << "?";
out << " (pos " << desc.column_number << ")";
}
out << desc.column_name;
if (desc.direction > 0)
out << " ASC";
@ -38,18 +28,9 @@ void dumpSortDescription(const SortDescription & description, const Block & head
}
}
void SortColumnDescription::explain(JSONBuilder::JSONMap & map, const Block & header) const
void SortColumnDescription::explain(JSONBuilder::JSONMap & map) const
{
if (!column_name.empty())
map.add("Column", column_name);
else
{
if (column_number < header.columns())
map.add("Column", header.getByPosition(column_number).name);
map.add("Position", column_number);
}
map.add("Column", column_name);
map.add("Ascending", direction > 0);
map.add("With Fill", with_fill);
}
@ -57,17 +38,17 @@ void SortColumnDescription::explain(JSONBuilder::JSONMap & map, const Block & he
std::string dumpSortDescription(const SortDescription & description)
{
WriteBufferFromOwnString wb;
dumpSortDescription(description, Block{}, wb);
dumpSortDescription(description, wb);
return wb.str();
}
JSONBuilder::ItemPtr explainSortDescription(const SortDescription & description, const Block & header)
JSONBuilder::ItemPtr explainSortDescription(const SortDescription & description)
{
auto json_array = std::make_unique<JSONBuilder::JSONArray>();
for (const auto & descr : description)
{
auto json_map = std::make_unique<JSONBuilder::JSONMap>();
descr.explain(*json_map, header);
descr.explain(*json_map);
json_array->add(std::move(json_map));
}

View File

@ -39,7 +39,6 @@ struct FillColumnDescription
struct SortColumnDescription
{
std::string column_name; /// The name of the column.
size_t column_number; /// Column number (used if no name is given).
int direction; /// 1 - ascending, -1 - descending.
int nulls_direction; /// 1 - NULLs and NaNs are greater, -1 - less.
/// To achieve NULLS LAST, set it equal to direction, to achieve NULLS FIRST, set it opposite.
@ -48,23 +47,24 @@ struct SortColumnDescription
FillColumnDescription fill_description;
explicit SortColumnDescription(
size_t column_number_, int direction_ = 1, int nulls_direction_ = 1,
const std::shared_ptr<Collator> & collator_ = nullptr,
bool with_fill_ = false, const FillColumnDescription & fill_description_ = {})
: column_number(column_number_), direction(direction_), nulls_direction(nulls_direction_), collator(collator_)
, with_fill(with_fill_), fill_description(fill_description_) {}
explicit SortColumnDescription(
const std::string & column_name_, int direction_ = 1, int nulls_direction_ = 1,
const std::shared_ptr<Collator> & collator_ = nullptr,
bool with_fill_ = false, const FillColumnDescription & fill_description_ = {})
: column_name(column_name_), column_number(0), direction(direction_), nulls_direction(nulls_direction_)
, collator(collator_), with_fill(with_fill_), fill_description(fill_description_) {}
const std::string & column_name_,
int direction_ = 1,
int nulls_direction_ = 1,
const std::shared_ptr<Collator> & collator_ = nullptr,
bool with_fill_ = false,
const FillColumnDescription & fill_description_ = {})
: column_name(column_name_)
, direction(direction_)
, nulls_direction(nulls_direction_)
, collator(collator_)
, with_fill(with_fill_)
, fill_description(fill_description_)
{
}
bool operator == (const SortColumnDescription & other) const
{
return column_name == other.column_name && column_number == other.column_number
&& direction == other.direction && nulls_direction == other.nulls_direction;
return column_name == other.column_name && direction == other.direction && nulls_direction == other.nulls_direction;
}
bool operator != (const SortColumnDescription & other) const
@ -72,22 +72,30 @@ struct SortColumnDescription
return !(*this == other);
}
std::string dump() const
{
return fmt::format("{}:{}:dir {}nulls ", column_name, column_number, direction, nulls_direction);
}
std::string dump() const { return fmt::format("{}:dir {}nulls {}", column_name, direction, nulls_direction); }
void explain(JSONBuilder::JSONMap & map, const Block & header) const;
void explain(JSONBuilder::JSONMap & map) const;
};
struct SortColumnDescriptionWithColumnIndex
{
SortColumnDescription base;
size_t column_number;
SortColumnDescriptionWithColumnIndex(SortColumnDescription description_, size_t column_number_)
: base(std::move(description_)), column_number(column_number_)
{
}
};
/// Description of the sorting rule for several columns.
using SortDescription = std::vector<SortColumnDescription>;
using SortDescriptionWithPositions = std::vector<SortColumnDescriptionWithColumnIndex>;
/// Outputs user-readable description into `out`.
void dumpSortDescription(const SortDescription & description, const Block & header, WriteBuffer & out);
void dumpSortDescription(const SortDescription & description, WriteBuffer & out);
std::string dumpSortDescription(const SortDescription & description);
JSONBuilder::ItemPtr explainSortDescription(const SortDescription & description, const Block & header);
JSONBuilder::ItemPtr explainSortDescription(const SortDescription & description);
}

View File

@ -36,7 +36,7 @@ DataTypePtr recursiveRemoveLowCardinality(const DataTypePtr & type)
element = recursiveRemoveLowCardinality(element);
if (tuple_type->haveExplicitNames())
return std::make_shared<DataTypeTuple>(elements, tuple_type->getElementNames(), tuple_type->serializeNames());
return std::make_shared<DataTypeTuple>(elements, tuple_type->getElementNames());
else
return std::make_shared<DataTypeTuple>(elements);
}

View File

@ -64,8 +64,8 @@ static std::optional<Exception> checkTupleNames(const Strings & names)
return {};
}
DataTypeTuple::DataTypeTuple(const DataTypes & elems_, const Strings & names_, bool serialize_names_)
: elems(elems_), names(names_), have_explicit_names(true), serialize_names(serialize_names_)
DataTypeTuple::DataTypeTuple(const DataTypes & elems_, const Strings & names_)
: elems(elems_), names(names_), have_explicit_names(true)
{
size_t size = elems.size();
if (names.size() != size)
@ -75,11 +75,6 @@ DataTypeTuple::DataTypeTuple(const DataTypes & elems_, const Strings & names_, b
throw std::move(*exception);
}
bool DataTypeTuple::canBeCreatedWithNames(const Strings & names)
{
return checkTupleNames(names) == std::nullopt;
}
std::string DataTypeTuple::doGetName() const
{
size_t size = elems.size();
@ -91,7 +86,7 @@ std::string DataTypeTuple::doGetName() const
if (i != 0)
s << ", ";
if (have_explicit_names && serialize_names)
if (have_explicit_names)
s << backQuoteIfNeed(names[i]) << ' ';
s << elems[i]->getName();
@ -206,7 +201,7 @@ bool DataTypeTuple::equals(const IDataType & rhs) const
return false;
for (size_t i = 0; i < size; ++i)
if (!elems[i]->equals(*rhs_tuple.elems[i]))
if (!elems[i]->equals(*rhs_tuple.elems[i]) || names[i] != rhs_tuple.names[i])
return false;
return true;
@ -265,31 +260,29 @@ size_t DataTypeTuple::getSizeOfValueInMemory() const
SerializationPtr DataTypeTuple::doGetDefaultSerialization() const
{
SerializationTuple::ElementSerializations serializations(elems.size());
bool use_explicit_names = have_explicit_names && serialize_names;
for (size_t i = 0; i < elems.size(); ++i)
{
String elem_name = use_explicit_names ? names[i] : toString(i + 1);
String elem_name = have_explicit_names ? names[i] : toString(i + 1);
auto serialization = elems[i]->getDefaultSerialization();
serializations[i] = std::make_shared<SerializationNamed>(serialization, elem_name);
}
return std::make_shared<SerializationTuple>(std::move(serializations), use_explicit_names);
return std::make_shared<SerializationTuple>(std::move(serializations), have_explicit_names);
}
SerializationPtr DataTypeTuple::getSerialization(const SerializationInfo & info) const
{
SerializationTuple::ElementSerializations serializations(elems.size());
const auto & info_tuple = assert_cast<const SerializationInfoTuple &>(info);
bool use_explicit_names = have_explicit_names && serialize_names;
for (size_t i = 0; i < elems.size(); ++i)
{
String elem_name = use_explicit_names ? names[i] : toString(i + 1);
String elem_name = have_explicit_names ? names[i] : toString(i + 1);
auto serialization = elems[i]->getSerialization(*info_tuple.getElementInfo(i));
serializations[i] = std::make_shared<SerializationNamed>(serialization, elem_name);
}
return std::make_shared<SerializationTuple>(std::move(serializations), use_explicit_names);
return std::make_shared<SerializationTuple>(std::move(serializations), have_explicit_names);
}
MutableSerializationInfoPtr DataTypeTuple::createSerializationInfo(const SerializationInfo::Settings & settings) const

View File

@ -22,14 +22,11 @@ private:
DataTypes elems;
Strings names;
bool have_explicit_names;
bool serialize_names = true;
public:
static constexpr bool is_parametric = true;
explicit DataTypeTuple(const DataTypes & elems);
DataTypeTuple(const DataTypes & elems, const Strings & names, bool serialize_names_ = true);
static bool canBeCreatedWithNames(const Strings & names);
DataTypeTuple(const DataTypes & elems, const Strings & names);
TypeIndex getTypeId() const override { return TypeIndex::Tuple; }
std::string doGetName() const override;
@ -66,7 +63,6 @@ public:
String getNameByPosition(size_t i) const;
bool haveExplicitNames() const { return have_explicit_names; }
bool serializeNames() const { return serialize_names; }
};
}

View File

@ -128,22 +128,21 @@ static auto extractVector(const std::vector<Tuple> & vec)
return res;
}
void convertObjectsToTuples(NamesAndTypesList & columns_list, Block & block, const NamesAndTypesList & extended_storage_columns)
void convertObjectsToTuples(Block & block, const NamesAndTypesList & extended_storage_columns)
{
std::unordered_map<String, DataTypePtr> storage_columns_map;
for (const auto & [name, type] : extended_storage_columns)
storage_columns_map[name] = type;
for (auto & name_type : columns_list)
for (auto & column : block)
{
if (!isObject(name_type.type))
if (!isObject(column.type))
continue;
auto & column = block.getByName(name_type.name);
if (!isObject(column.type))
throw Exception(ErrorCodes::TYPE_MISMATCH,
"Type for column '{}' mismatch in columns list and in block. In list: {}, in block: {}",
name_type.name, name_type.type->getName(), column.type->getName());
column.name, column.type->getName(), column.type->getName());
const auto & column_object = assert_cast<const ColumnObject &>(*column.column);
const auto & subcolumns = column_object.getSubcolumns();
@ -151,7 +150,7 @@ void convertObjectsToTuples(NamesAndTypesList & columns_list, Block & block, con
if (!column_object.isFinalized())
throw Exception(ErrorCodes::LOGICAL_ERROR,
"Cannot convert to tuple column '{}' from type {}. Column should be finalized first",
name_type.name, name_type.type->getName());
column.name, column.type->getName());
PathsInData tuple_paths;
DataTypes tuple_types;
@ -164,12 +163,11 @@ void convertObjectsToTuples(NamesAndTypesList & columns_list, Block & block, con
tuple_columns.emplace_back(entry->data.getFinalizedColumnPtr());
}
auto it = storage_columns_map.find(name_type.name);
auto it = storage_columns_map.find(column.name);
if (it == storage_columns_map.end())
throw Exception(ErrorCodes::LOGICAL_ERROR, "Column '{}' not found in storage", name_type.name);
throw Exception(ErrorCodes::LOGICAL_ERROR, "Column '{}' not found in storage", column.name);
std::tie(column.column, column.type) = unflattenTuple(tuple_paths, tuple_types, tuple_columns);
name_type.type = column.type;
/// Check that constructed Tuple type and type in storage are compatible.
getLeastCommonTypeForObject({column.type, it->second}, true);

View File

@ -38,7 +38,7 @@ DataTypePtr getDataTypeByColumn(const IColumn & column);
/// Converts Object types and columns to Tuples in @columns_list and @block
/// and checks that types are consistent with types in @extended_storage_columns.
void convertObjectsToTuples(NamesAndTypesList & columns_list, Block & block, const NamesAndTypesList & extended_storage_columns);
void convertObjectsToTuples(Block & block, const NamesAndTypesList & extended_storage_columns);
/// Checks that each path is not the prefix of any other path.
void checkObjectHasNoAmbiguosPaths(const PathsInData & paths);

View File

@ -6,6 +6,8 @@
#include <Common/formatIPv6.h>
#include <IO/WriteBuffer.h>
#include <IO/ReadBuffer.h>
#include <Formats/FormatSettings.h>
namespace DB
{
@ -47,9 +49,11 @@ void SerializationIPv4::deserializeText(IColumn & column, ReadBuffer & istr, con
char buffer[IPV4_MAX_TEXT_LENGTH + 1] = {'\0'};
istr.read(buffer, sizeof(buffer) - 1);
UInt32 ipv4_value = 0;
if (!parseIPv4(buffer, reinterpret_cast<unsigned char *>(&ipv4_value)))
bool parse_result = parseIPv4(buffer, reinterpret_cast<unsigned char *>(&ipv4_value));
if (!parse_result && !settings.input_format_ipv4_default_on_conversion_error)
{
throw Exception("Invalid IPv4 value.", ErrorCodes::CANNOT_PARSE_DOMAIN_VALUE_FROM_STRING);
throw Exception("Invalid IPv4 value", ErrorCodes::CANNOT_PARSE_DOMAIN_VALUE_FROM_STRING);
}
col->insert(ipv4_value);
@ -89,9 +93,11 @@ void SerializationIPv6::deserializeText(IColumn & column, ReadBuffer & istr, con
istr.read(buffer, sizeof(buffer) - 1);
std::string ipv6_value(IPV6_BINARY_LENGTH, '\0');
if (!parseIPv6(buffer, reinterpret_cast<unsigned char *>(ipv6_value.data())))
bool parse_result = parseIPv6(buffer, reinterpret_cast<unsigned char *>(ipv6_value.data()));
if (!parse_result && !settings.input_format_ipv6_default_on_conversion_error)
{
throw Exception("Invalid IPv6 value.", ErrorCodes::CANNOT_PARSE_DOMAIN_VALUE_FROM_STRING);
throw Exception("Invalid IPv6 value", ErrorCodes::CANNOT_PARSE_DOMAIN_VALUE_FROM_STRING);
}
col->insertString(ipv6_value);

View File

@ -43,7 +43,7 @@ void SerializationNumber<T>::serializeTextJSON(const IColumn & column, size_t ro
}
template <typename T>
void SerializationNumber<T>::deserializeTextJSON(IColumn & column, ReadBuffer & istr, const FormatSettings &) const
void SerializationNumber<T>::deserializeTextJSON(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const
{
bool has_quote = false;
if (!istr.eof() && *istr.position() == '"') /// We understand the number both in quotes and without.
@ -67,7 +67,7 @@ void SerializationNumber<T>::deserializeTextJSON(IColumn & column, ReadBuffer &
static constexpr bool is_uint8 = std::is_same_v<T, UInt8>;
static constexpr bool is_int8 = std::is_same_v<T, Int8>;
if (is_uint8 || is_int8)
if (settings.json.read_bools_as_numbers || is_uint8 || is_int8)
{
// extra conditions to parse true/false strings into 1/0
if (istr.eof())

View File

@ -674,7 +674,6 @@ void DatabaseReplicated::recoverLostReplica(const ZooKeeperPtr & current_zookeep
LOG_INFO(log, "Marked recovered {} as finished", entry_name);
}
}
current_zookeeper->set(replica_path + "/log_ptr", toString(max_log_ptr));
}
std::map<String, String> DatabaseReplicated::tryGetConsistentMetadataSnapshot(const ZooKeeperPtr & zookeeper, UInt32 & max_log_ptr)

View File

@ -66,9 +66,17 @@ void DatabaseReplicatedDDLWorker::initializeReplication()
UInt32 max_log_ptr = parse<UInt32>(zookeeper->get(database->zookeeper_path + "/max_log_ptr"));
logs_to_keep = parse<UInt32>(zookeeper->get(database->zookeeper_path + "/logs_to_keep"));
if (our_log_ptr == 0 || our_log_ptr + logs_to_keep < max_log_ptr)
{
database->recoverLostReplica(zookeeper, our_log_ptr, max_log_ptr);
zookeeper->set(database->replica_path + "/log_ptr", toString(max_log_ptr));
initializeLogPointer(DDLTaskBase::getLogEntryName(max_log_ptr));
}
else
last_skipped_entry_name.emplace(DDLTaskBase::getLogEntryName(our_log_ptr));
{
String log_entry_name = DDLTaskBase::getLogEntryName(our_log_ptr);
last_skipped_entry_name.emplace(log_entry_name);
initializeLogPointer(log_entry_name);
}
}
String DatabaseReplicatedDDLWorker::enqueueQuery(DDLLogEntry & entry)
@ -140,10 +148,10 @@ String DatabaseReplicatedDDLWorker::tryEnqueueAndExecuteEntry(DDLLogEntry & entr
/// but it requires more complex logic around /try node.
auto zookeeper = getAndSetZooKeeper();
UInt32 our_log_ptr = parse<UInt32>(zookeeper->get(database->replica_path + "/log_ptr"));
UInt32 our_log_ptr = getLogPointer();
UInt32 max_log_ptr = parse<UInt32>(zookeeper->get(database->zookeeper_path + "/max_log_ptr"));
assert(our_log_ptr <= max_log_ptr);
if (database->db_settings.max_replication_lag_to_enqueue < max_log_ptr - our_log_ptr)
if (our_log_ptr + database->db_settings.max_replication_lag_to_enqueue < max_log_ptr)
throw Exception(ErrorCodes::NOT_A_LEADER, "Cannot enqueue query on this replica, "
"because it has replication lag of {} queries. Try other replica.", max_log_ptr - our_log_ptr);
@ -203,7 +211,7 @@ DDLTaskPtr DatabaseReplicatedDDLWorker::initAndCheckTask(const String & entry_na
}
}
UInt32 our_log_ptr = parse<UInt32>(zookeeper->get(fs::path(database->replica_path) / "log_ptr"));
UInt32 our_log_ptr = getLogPointer();
UInt32 entry_num = DatabaseReplicatedTask::getLogEntryNumber(entry_name);
if (entry_num <= our_log_ptr)
@ -308,4 +316,18 @@ bool DatabaseReplicatedDDLWorker::canRemoveQueueEntry(const String & entry_name,
return entry_number + logs_to_keep < max_log_ptr;
}
void DatabaseReplicatedDDLWorker::initializeLogPointer(const String & processed_entry_name)
{
updateMaxDDLEntryID(processed_entry_name);
assert(max_id.load() == parse<UInt32>(getAndSetZooKeeper()->get(fs::path(database->replica_path) / "log_ptr")));
}
UInt32 DatabaseReplicatedDDLWorker::getLogPointer() const
{
/// NOTE it may not be equal to the log_ptr in zk:
/// - max_id can be equal to log_ptr - 1 due to race condition (when it's updated in zk, but not updated in memory yet)
/// - max_id can be greater than log_ptr, because log_ptr is not updated for failed and dummy entries
return max_id.load();
}
}

View File

@ -32,9 +32,11 @@ public:
static String enqueueQueryImpl(const ZooKeeperPtr & zookeeper, DDLLogEntry & entry,
DatabaseReplicated * const database, bool committed = false); /// NOLINT
UInt32 getLogPointer() const;
private:
bool initializeMainThread() override;
void initializeReplication();
void initializeLogPointer(const String & processed_entry_name);
DDLTaskPtr initAndCheckTask(const String & entry_name, String & out_reason, const ZooKeeperPtr & zookeeper) override;
bool canRemoveQueueEntry(const String & entry_name, const Coordination::Stat & stat) override;

View File

@ -333,6 +333,7 @@ void DiskLocal::replaceFile(const String & from_path, const String & to_path)
{
fs::path from_file = fs::path(disk_path) / from_path;
fs::path to_file = fs::path(disk_path) / to_path;
fs::create_directories(to_file.parent_path());
fs::rename(from_file, to_file);
}

View File

@ -334,15 +334,17 @@ SeekableReadBufferPtr CachedReadBufferFromRemoteFS::getImplementationBuffer(File
read_buffer_for_file_segment->seek(file_offset_of_buffer_end, SEEK_SET);
}
auto impl_range = read_buffer_for_file_segment->getRemainingReadRange();
auto download_offset = file_segment->getDownloadOffset();
if (download_offset != static_cast<size_t>(read_buffer_for_file_segment->getPosition()))
{
auto impl_range = read_buffer_for_file_segment->getRemainingReadRange();
throw Exception(
ErrorCodes::LOGICAL_ERROR,
"Buffer's offsets mismatch; cached buffer offset: {}, download_offset: {}, position: {}, implementation buffer offset: {}, "
"implementation buffer reading until: {}, file segment info: {}",
file_offset_of_buffer_end, download_offset, read_buffer_for_file_segment->getPosition(),
impl_range.left, *impl_range.right, file_segment->getInfoForLog());
}
break;
}
@ -802,12 +804,14 @@ std::optional<size_t> CachedReadBufferFromRemoteFS::getLastNonDownloadedOffset()
String CachedReadBufferFromRemoteFS::getInfoForLog()
{
auto implementation_buffer_read_range_str =
implementation_buffer ?
std::to_string(implementation_buffer->getRemainingReadRange().left)
+ '-'
+ (implementation_buffer->getRemainingReadRange().right ? std::to_string(*implementation_buffer->getRemainingReadRange().right) : "None")
: "None";
String implementation_buffer_read_range_str;
if (implementation_buffer)
{
auto read_range = implementation_buffer->getRemainingReadRange();
implementation_buffer_read_range_str = std::to_string(read_range.left) + '-' + (read_range.right ? std::to_string(*read_range.right) : "None");
}
else
implementation_buffer_read_range_str = "None";
auto current_file_segment_info = current_file_segment_it == file_segments_holder->file_segments.end() ? "None" : (*current_file_segment_it)->getInfoForLog();

View File

@ -77,6 +77,8 @@ FormatSettings getFormatSettings(ContextPtr context, const Settings & settings)
format_settings.custom.row_between_delimiter = settings.format_custom_row_between_delimiter;
format_settings.date_time_input_format = settings.date_time_input_format;
format_settings.date_time_output_format = settings.date_time_output_format;
format_settings.input_format_ipv4_default_on_conversion_error = settings.input_format_ipv4_default_on_conversion_error;
format_settings.input_format_ipv6_default_on_conversion_error = settings.input_format_ipv6_default_on_conversion_error;
format_settings.bool_true_representation = settings.bool_true_representation;
format_settings.bool_false_representation = settings.bool_false_representation;
format_settings.enable_streaming = settings.output_format_enable_streaming;
@ -88,6 +90,7 @@ FormatSettings getFormatSettings(ContextPtr context, const Settings & settings)
format_settings.json.named_tuples_as_objects = settings.output_format_json_named_tuples_as_objects;
format_settings.json.quote_64bit_integers = settings.output_format_json_quote_64bit_integers;
format_settings.json.quote_denormals = settings.output_format_json_quote_denormals;
format_settings.json.read_bools_as_numbers = settings.input_format_json_read_bools_as_numbers;
format_settings.null_as_default = settings.input_format_null_as_default;
format_settings.decimal_trailing_zeros = settings.output_format_decimal_trailing_zeros;
format_settings.parquet.row_group_size = settings.output_format_parquet_row_group_size;

View File

@ -65,6 +65,9 @@ struct FormatSettings
DateTimeOutputFormat date_time_output_format = DateTimeOutputFormat::Simple;
bool input_format_ipv4_default_on_conversion_error = false;
bool input_format_ipv6_default_on_conversion_error = false;
UInt64 input_allow_errors_num = 0;
Float32 input_allow_errors_ratio = 0;
@ -130,6 +133,7 @@ struct FormatSettings
bool escape_forward_slashes = true;
bool named_tuples_as_objects = false;
bool serialize_as_strings = false;
bool read_bools_as_numbers = true;
} json;
struct
@ -138,6 +142,7 @@ struct FormatSettings
bool import_nested = false;
bool allow_missing_columns = false;
bool case_insensitive_column_matching = false;
std::unordered_set<int> skip_row_groups = {};
} parquet;
struct Pretty
@ -219,6 +224,7 @@ struct FormatSettings
bool allow_missing_columns = false;
int64_t row_batch_size = 100'000;
bool case_insensitive_column_matching = false;
std::unordered_set<int> skip_stripes = {};
} orc;
/// For capnProto format we should determine how to

View File

@ -10,6 +10,7 @@
#include <DataTypes/DataTypeTuple.h>
#include <DataTypes/DataTypeMap.h>
#include <DataTypes/DataTypeObject.h>
#include <DataTypes/DataTypeFactory.h>
#include <Common/JSONParsers/SimdJSONParser.h>
#include <Common/JSONParsers/RapidJSONParser.h>
#include <Common/JSONParsers/DummyJSONParser.h>
@ -118,7 +119,7 @@ DataTypePtr getDataTypeFromJSONFieldImpl(const Element & field)
return nullptr;
if (field.isBool())
return makeNullable(std::make_shared<DataTypeUInt8>());
return DataTypeFactory::instance().get("Nullable(Bool)");
if (field.isInt64() || field.isUInt64() || field.isDouble())
return makeNullable(std::make_shared<DataTypeFloat64>());
@ -270,13 +271,13 @@ struct JSONEachRowFieldsExtractor
std::vector<String> column_names;
};
std::unordered_map<String, DataTypePtr> readRowAndGetNamesAndDataTypesForJSONEachRow(ReadBuffer & in, bool json_strings)
NamesAndTypesList readRowAndGetNamesAndDataTypesForJSONEachRow(ReadBuffer & in, bool json_strings)
{
JSONEachRowFieldsExtractor extractor;
auto data_types = determineColumnDataTypesFromJSONEachRowDataImpl<JSONEachRowFieldsExtractor, '{', '}'>(in, json_strings, extractor);
std::unordered_map<String, DataTypePtr> result;
NamesAndTypesList result;
for (size_t i = 0; i != extractor.column_names.size(); ++i)
result[extractor.column_names[i]] = data_types[i];
result.emplace_back(extractor.column_names[i], data_types[i]);
return result;
}

View File

@ -20,9 +20,9 @@ std::pair<bool, size_t> fileSegmentationEngineJSONCompactEachRow(ReadBuffer & in
DataTypePtr getDataTypeFromJSONField(const String & field);
/// Read row in JSONEachRow format and try to determine type for each field.
/// Return map {column_name : type}.
/// Return list of names and types.
/// If cannot determine the type of some field, return nullptr for it.
std::unordered_map<String, DataTypePtr> readRowAndGetNamesAndDataTypesForJSONEachRow(ReadBuffer & in, bool json_strings);
NamesAndTypesList readRowAndGetNamesAndDataTypesForJSONEachRow(ReadBuffer & in, bool json_strings);
/// Read row in JSONCompactEachRow format and try to determine type for each field.
/// If cannot determine the type of some field, return nullptr for it.

View File

@ -96,17 +96,6 @@ if (TARGET ch_contrib::rapidjson)
target_link_libraries(clickhouse_functions PRIVATE ch_contrib::rapidjson)
endif()
# ClickHouse developers may use platform-dependent code under some macro (e.g. `#ifdef ENABLE_MULTITARGET`).
# If turned ON, this option defines such macro.
# See `src/Functions/TargetSpecific.h`
option(ENABLE_MULTITARGET_CODE "Enable platform-dependent code" ON)
if (ENABLE_MULTITARGET_CODE)
add_definitions(-DENABLE_MULTITARGET_CODE=1)
else()
add_definitions(-DENABLE_MULTITARGET_CODE=0)
endif()
add_subdirectory(GatherUtils)
target_link_libraries(clickhouse_functions PRIVATE clickhouse_functions_gatherutils)

View File

@ -179,6 +179,30 @@ struct ToStartOfMonthImpl
using FactorTransform = ZeroTransform;
};
struct ToLastDayOfMonthImpl
{
static constexpr auto name = "toLastDayOfMonth";
static inline UInt16 execute(Int64 t, const DateLUTImpl & time_zone)
{
return time_zone.toLastDayNumOfMonth(time_zone.toDayNum(t));
}
static inline UInt16 execute(UInt32 t, const DateLUTImpl & time_zone)
{
return time_zone.toLastDayNumOfMonth(time_zone.toDayNum(t));
}
static inline UInt16 execute(Int32 d, const DateLUTImpl & time_zone)
{
return time_zone.toLastDayNumOfMonth(ExtendedDayNum(d));
}
static inline UInt16 execute(UInt16 d, const DateLUTImpl & time_zone)
{
return time_zone.toLastDayNumOfMonth(DayNum(d));
}
using FactorTransform = ZeroTransform;
};
struct ToStartOfQuarterImpl
{
static constexpr auto name = "toStartOfQuarter";

View File

@ -1,12 +1,12 @@
#pragma once
#include <base/map.h>
#include <Common/TargetSpecific.h>
#include <Functions/FunctionHelpers.h>
#include <Functions/GatherUtils/GatherUtils.h>
#include <Functions/GatherUtils/Sources.h>
#include <Functions/IFunction.h>
#include <Functions/PerformanceAdaptors.h>
#include <Functions/TargetSpecific.h>
#include <DataTypes/DataTypeString.h>
#include <DataTypes/DataTypesNumber.h>
#include <DataTypes/getLeastSupertype.h>

View File

@ -2958,8 +2958,7 @@ private:
/// For named tuples allow conversions for tuples with
/// different sets of elements. If element exists in @to_type
/// and doesn't exist in @to_type it will be filled by default values.
if (from_type->haveExplicitNames() && from_type->serializeNames()
&& to_type->haveExplicitNames() && to_type->serializeNames())
if (from_type->haveExplicitNames() && to_type->haveExplicitNames())
{
const auto & from_names = from_type->getElementNames();
std::unordered_map<String, size_t> from_positions;

View File

@ -38,8 +38,8 @@
#include <Columns/ColumnTuple.h>
#include <Functions/IFunction.h>
#include <Functions/FunctionHelpers.h>
#include <Functions/TargetSpecific.h>
#include <Functions/PerformanceAdaptors.h>
#include <Common/TargetSpecific.h>
#include <base/range.h>
#include <base/bit_cast.h>

View File

@ -1,9 +1,9 @@
#pragma once
#include <Common/TargetSpecific.h>
#include <DataTypes/DataTypesNumber.h>
#include <Columns/ColumnVector.h>
#include <Functions/IFunction.h>
#include <Functions/TargetSpecific.h>
#include <Functions/PerformanceAdaptors.h>
#include <IO/WriteHelpers.h>

View File

@ -7,6 +7,8 @@
#include <Core/AccurateComparison.h>
#include <base/range.h>
#include "GatherUtils.h"
#include "sliceEqualElements.h"
#include "sliceHasImplAnyAll.h"
namespace DB::ErrorCodes
@ -461,39 +463,19 @@ void NO_INLINE conditional(SourceA && src_a, SourceB && src_b, Sink && sink, con
}
/// Methods to check if first array has elements from second array, overloaded for various combinations of types.
template <
ArraySearchType search_type,
typename FirstSliceType,
typename SecondSliceType,
bool (*isEqual)(const FirstSliceType &, const SecondSliceType &, size_t, size_t)>
bool sliceHasImplAnyAll(const FirstSliceType & first, const SecondSliceType & second, const UInt8 * first_null_map, const UInt8 * second_null_map)
template <typename T>
bool insliceEqualElements(const NumericArraySlice<T> & first [[maybe_unused]],
size_t first_ind [[maybe_unused]],
size_t second_ind [[maybe_unused]])
{
const bool has_first_null_map = first_null_map != nullptr;
const bool has_second_null_map = second_null_map != nullptr;
for (size_t i = 0; i < second.size; ++i)
{
bool has = false;
for (size_t j = 0; j < first.size && !has; ++j)
{
const bool is_first_null = has_first_null_map && first_null_map[j];
const bool is_second_null = has_second_null_map && second_null_map[i];
if (is_first_null && is_second_null)
has = true;
if (!is_first_null && !is_second_null && isEqual(first, second, j, i))
has = true;
}
if (has && search_type == ArraySearchType::Any)
return true;
if (!has && search_type == ArraySearchType::All)
return false;
}
return search_type == ArraySearchType::All;
if constexpr (is_decimal<T>)
return accurate::equalsOp(first.data[first_ind].value, first.data[second_ind].value);
else
return accurate::equalsOp(first.data[first_ind], first.data[second_ind]);
}
inline ALWAYS_INLINE bool insliceEqualElements(const GenericArraySlice & first, size_t first_ind, size_t second_ind)
{
return first.elements->compareAt(first_ind + first.begin, second_ind + first.begin, *first.elements, -1) == 0;
}
template <
@ -620,55 +602,6 @@ bool sliceHasImpl(const FirstSliceType & first, const SecondSliceType & second,
return sliceHasImplAnyAll<search_type, FirstSliceType, SecondSliceType, isEqual>(first, second, first_null_map, second_null_map);
}
template <typename T, typename U>
bool sliceEqualElements(const NumericArraySlice<T> & first [[maybe_unused]],
const NumericArraySlice<U> & second [[maybe_unused]],
size_t first_ind [[maybe_unused]],
size_t second_ind [[maybe_unused]])
{
/// TODO: Decimal scale
if constexpr (is_decimal<T> && is_decimal<U>)
return accurate::equalsOp(first.data[first_ind].value, second.data[second_ind].value);
else if constexpr (is_decimal<T> || is_decimal<U>)
return false;
else
return accurate::equalsOp(first.data[first_ind], second.data[second_ind]);
}
template <typename T>
bool sliceEqualElements(const NumericArraySlice<T> &, const GenericArraySlice &, size_t, size_t)
{
return false;
}
template <typename U>
bool sliceEqualElements(const GenericArraySlice &, const NumericArraySlice<U> &, size_t, size_t)
{
return false;
}
inline ALWAYS_INLINE bool sliceEqualElements(const GenericArraySlice & first, const GenericArraySlice & second, size_t first_ind, size_t second_ind)
{
return first.elements->compareAt(first_ind + first.begin, second_ind + second.begin, *second.elements, -1) == 0;
}
template <typename T>
bool insliceEqualElements(const NumericArraySlice<T> & first [[maybe_unused]],
size_t first_ind [[maybe_unused]],
size_t second_ind [[maybe_unused]])
{
if constexpr (is_decimal<T>)
return accurate::equalsOp(first.data[first_ind].value, first.data[second_ind].value);
else
return accurate::equalsOp(first.data[first_ind], first.data[second_ind]);
}
inline ALWAYS_INLINE bool insliceEqualElements(const GenericArraySlice & first, size_t first_ind, size_t second_ind)
{
return first.elements->compareAt(first_ind + first.begin, second_ind + first.begin, *first.elements, -1) == 0;
}
template <ArraySearchType search_type, typename T, typename U>
bool sliceHas(const NumericArraySlice<T> & first, const NumericArraySlice<U> & second)
{
@ -854,4 +787,3 @@ void resizeConstantSize(ArraySource && array_source, ValueSource && value_source
}
}

View File

@ -1,4 +1,5 @@
include("${ClickHouse_SOURCE_DIR}/cmake/dbms_glob_sources.cmake")
add_headers_and_sources(clickhouse_functions_gatherutils .)
add_library(clickhouse_functions_gatherutils ${clickhouse_functions_gatherutils_sources} ${clickhouse_functions_gatherutils_headers})
target_link_libraries(clickhouse_functions_gatherutils PRIVATE dbms)
@ -14,3 +15,5 @@ endif()
if (STRIP_DEBUG_SYMBOLS_FUNCTIONS)
target_compile_options(clickhouse_functions_gatherutils PRIVATE "-g0")
endif()
set_target_properties(clickhouse_functions_gatherutils PROPERTIES COMPILE_FLAGS "${X86_INTRINSICS_FLAGS}")

View File

@ -0,0 +1,41 @@
#pragma once
#include <Core/AccurateComparison.h>
#include "Slices.h"
namespace DB::GatherUtils
{
template <typename T, typename U>
bool sliceEqualElements(const NumericArraySlice<T> & first [[maybe_unused]],
const NumericArraySlice<U> & second [[maybe_unused]],
size_t first_ind [[maybe_unused]],
size_t second_ind [[maybe_unused]])
{
/// TODO: Decimal scale
if constexpr (is_decimal<T> && is_decimal<U>)
return accurate::equalsOp(first.data[first_ind].value, second.data[second_ind].value);
else if constexpr (is_decimal<T> || is_decimal<U>)
return false;
else
return accurate::equalsOp(first.data[first_ind], second.data[second_ind]);
}
template <typename T>
bool sliceEqualElements(const NumericArraySlice<T> &, const GenericArraySlice &, size_t, size_t)
{
return false;
}
template <typename U>
bool sliceEqualElements(const GenericArraySlice &, const NumericArraySlice<U> &, size_t, size_t)
{
return false;
}
inline ALWAYS_INLINE bool sliceEqualElements(const GenericArraySlice & first, const GenericArraySlice & second, size_t first_ind, size_t second_ind)
{
return first.elements->compareAt(first_ind + first.begin, second_ind + second.begin, *second.elements, -1) == 0;
}
}

Some files were not shown because too many files have changed in this diff Show More