mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-11-18 13:42:02 +00:00
Merged with master
This commit is contained in:
commit
f7b9059dad
14
.gitignore
vendored
14
.gitignore
vendored
@ -180,18 +180,10 @@ utils/zookeeper-create-entry-to-download-part/zookeeper-create-entry-to-download
|
||||
utils/zookeeper-dump-tree/zookeeper-dump-tree
|
||||
utils/zookeeper-remove-by-list/zookeeper-remove-by-list
|
||||
dbms/src/Storages/tests/remove_symlink_directory
|
||||
debian/control
|
||||
debian/copyright
|
||||
debian/tmp/
|
||||
libs/libcommon/src/tests/json_test
|
||||
utils/compressor/zstd_test
|
||||
utils/wikistat-loader/wikistat-loader
|
||||
dbms/src/Common/tests/pod_array
|
||||
debian/clickhouse-benchmark/
|
||||
debian/clickhouse-client/
|
||||
debian/clickhouse-server-base/
|
||||
debian/clickhouse-server-common/
|
||||
debian/files
|
||||
|
||||
dbms/src/Server/data/*
|
||||
dbms/src/Server/metadata/*
|
||||
@ -210,9 +202,6 @@ vgcore*
|
||||
*.changes
|
||||
build-stamp
|
||||
configure-stamp
|
||||
debian/*.debhelper.log
|
||||
debian/*.debhelper
|
||||
debian/*.substvars
|
||||
|
||||
*.bin
|
||||
*.mrk
|
||||
@ -251,3 +240,6 @@ website/package-lock.json
|
||||
|
||||
# cquery cache
|
||||
/.cquery-cache
|
||||
|
||||
# ccls cache
|
||||
/.ccls-cache
|
||||
|
3
.gitmodules
vendored
3
.gitmodules
vendored
@ -61,3 +61,6 @@
|
||||
[submodule "contrib/libgsasl"]
|
||||
path = contrib/libgsasl
|
||||
url = https://github.com/ClickHouse-Extras/libgsasl.git
|
||||
[submodule "contrib/cppkafka"]
|
||||
path = contrib/cppkafka
|
||||
url = https://github.com/mfontanini/cppkafka.git
|
||||
|
@ -1,50 +0,0 @@
|
||||
language: generic
|
||||
|
||||
matrix:
|
||||
fast_finish: true
|
||||
include:
|
||||
# We need to have gcc7 headers to compile c++17 code on clang
|
||||
# - os: linux
|
||||
#
|
||||
# cache:
|
||||
# ccache: true
|
||||
# timeout: 1000
|
||||
# directories:
|
||||
# - /home/travis/.ccache
|
||||
#
|
||||
# addons:
|
||||
# apt:
|
||||
# update: true
|
||||
# sources:
|
||||
# - ubuntu-toolchain-r-test
|
||||
# - llvm-toolchain-trusty-5.0
|
||||
# packages: [ ninja-build, g++-7, clang-5.0, lld-5.0, libicu-dev, libreadline-dev, libmysqlclient-dev, unixodbc-dev, libltdl-dev, libssl-dev, libboost-dev, zlib1g-dev, libdouble-conversion-dev, libsparsehash-dev, librdkafka-dev, libcapnp-dev, libsparsehash-dev, libgoogle-perftools-dev, bash, expect, python, python-lxml, python-termcolor, curl, perl, sudo, openssl]
|
||||
#
|
||||
# env:
|
||||
# - MATRIX_EVAL="export CC=clang-5.0 CXX=clang++-5.0"
|
||||
#
|
||||
# script:
|
||||
# - utils/travis/normal.sh
|
||||
|
||||
- os: linux
|
||||
|
||||
sudo: required
|
||||
|
||||
cache:
|
||||
timeout: 1000
|
||||
directories:
|
||||
- /var/cache/pbuilder/ccache
|
||||
|
||||
addons:
|
||||
apt:
|
||||
update: true
|
||||
packages: [ pbuilder, fakeroot, debhelper ]
|
||||
|
||||
script:
|
||||
- utils/travis/pbuilder.sh
|
||||
|
||||
allow_failures:
|
||||
- os: osx
|
||||
|
||||
before_script:
|
||||
- eval "${MATRIX_EVAL}"
|
@ -81,9 +81,13 @@ option (ENABLE_TESTS "Enables tests" ON)
|
||||
if (CMAKE_SYSTEM_PROCESSOR MATCHES "amd64|x86_64")
|
||||
option (USE_INTERNAL_MEMCPY "Use internal implementation of 'memcpy' function instead of provided by libc. Only for x86_64." ON)
|
||||
|
||||
if (OS_LINUX)
|
||||
if (OS_LINUX AND NOT UNBUNDLED)
|
||||
option (GLIBC_COMPATIBILITY "Set to TRUE to enable compatibility with older glibc libraries. Only for x86_64, Linux. Implies USE_INTERNAL_MEMCPY." ON)
|
||||
endif()
|
||||
if (GLIBC_COMPATIBILITY)
|
||||
message (STATUS "Some symbols from glibc will be replaced for compatibility")
|
||||
link_libraries(glibc-compatibility)
|
||||
endif ()
|
||||
endif ()
|
||||
endif ()
|
||||
|
||||
if (GLIBC_COMPATIBILITY)
|
||||
@ -179,8 +183,8 @@ endif (TEST_COVERAGE)
|
||||
|
||||
if (ENABLE_TESTS)
|
||||
message (STATUS "Tests are enabled")
|
||||
enable_testing()
|
||||
endif ()
|
||||
enable_testing() # Enable for tests without binary
|
||||
|
||||
# when installing to /usr - place configs to /etc but for /usr/local place to /usr/local/etc
|
||||
if (CMAKE_INSTALL_PREFIX STREQUAL "/usr")
|
||||
@ -251,7 +255,7 @@ include (libs/libdaemon/cmake/find_unwind.cmake)
|
||||
|
||||
include (cmake/print_flags.cmake)
|
||||
|
||||
add_subdirectory (contrib)
|
||||
add_subdirectory (contrib EXCLUDE_FROM_ALL)
|
||||
add_subdirectory (libs)
|
||||
add_subdirectory (utils)
|
||||
add_subdirectory (dbms)
|
||||
|
@ -1,7 +1,7 @@
|
||||
# Use Ninja instead of Unix Makefiles by default.
|
||||
# https://stackoverflow.com/questions/11269833/cmake-selecting-a-generator-within-cmakelists-txt
|
||||
#
|
||||
# Reason: it have better startup time than make and it parallelize jobs more uniformly.
|
||||
# Reason: it has better startup time than make and it parallelizes jobs more uniformly.
|
||||
# (when comparing to make with Makefiles that was generated by CMake)
|
||||
#
|
||||
# How to install Ninja on Ubuntu:
|
||||
|
@ -10,3 +10,7 @@ ClickHouse is an open-source column-oriented database management system that all
|
||||
* [Blog](https://clickhouse.yandex/blog/en/) contains various ClickHouse-related articles, as well as announces and reports about events.
|
||||
* [Contacts](https://clickhouse.yandex/#contacts) can help to get your questions answered if there are any.
|
||||
* You can also [fill this form](https://forms.yandex.com/surveys/meet-yandex-clickhouse-team/) to meet Yandex ClickHouse team in person.
|
||||
|
||||
## Upcoming Events
|
||||
|
||||
* [C++ ClickHouse and CatBoost Sprints](https://events.yandex.ru/events/ClickHouse/2-feb-2019/) in Moscow on February 2.
|
||||
|
@ -25,9 +25,10 @@ Various possible options. We are not going to automate testing all of them.
|
||||
|
||||
#### CPU architectures:
|
||||
- x86_64;
|
||||
- AArch64.
|
||||
- AArch64;
|
||||
- PowerPC64LE.
|
||||
|
||||
x86_64 is the main CPU architecture. We also have minimal support for AArch64.
|
||||
x86_64 is the main CPU architecture. We also have minimal support for AArch64 and PowerPC64LE.
|
||||
|
||||
#### Operating systems:
|
||||
- Linux;
|
||||
|
@ -24,3 +24,10 @@ if (CMAKE_CXX_COMPILER_ID STREQUAL "GNU")
|
||||
elseif (CMAKE_CXX_COMPILER_ID STREQUAL "Clang")
|
||||
set (COMPILER_CLANG 1)
|
||||
endif ()
|
||||
|
||||
if (CMAKE_SYSTEM_PROCESSOR MATCHES "^(ppc64le.*|PPC64LE.*)")
|
||||
set (ARCH_PPC64LE 1)
|
||||
if (COMPILER_CLANG OR (COMPILER_GCC AND CMAKE_CXX_COMPILER_VERSION VERSION_LESS 8))
|
||||
message(FATAL_ERROR "Only gcc-8 is supported for powerpc architecture")
|
||||
endif ()
|
||||
endif ()
|
||||
|
@ -1,4 +1,4 @@
|
||||
if (NOT ARCH_ARM AND NOT ARCH_32)
|
||||
if (NOT ARCH_ARM AND NOT ARCH_32 AND NOT APPLE)
|
||||
option (ENABLE_RDKAFKA "Enable kafka" ON)
|
||||
endif ()
|
||||
|
||||
@ -20,11 +20,13 @@ if (NOT USE_INTERNAL_RDKAFKA_LIBRARY)
|
||||
if (USE_STATIC_LIBRARIES AND NOT OS_FREEBSD)
|
||||
find_library (SASL2_LIBRARY sasl2)
|
||||
endif ()
|
||||
set (CPPKAFKA_LIBRARY cppkafka) # TODO: try to use unbundled version.
|
||||
endif ()
|
||||
|
||||
if (RDKAFKA_LIB AND RDKAFKA_INCLUDE_DIR)
|
||||
set (USE_RDKAFKA 1)
|
||||
set (RDKAFKA_LIBRARY ${RDKAFKA_LIB} ${OPENSSL_LIBRARIES})
|
||||
set (CPPKAFKA_LIBRARY cppkafka)
|
||||
if (SASL2_LIBRARY)
|
||||
list (APPEND RDKAFKA_LIBRARY ${SASL2_LIBRARY})
|
||||
endif ()
|
||||
@ -35,9 +37,10 @@ elseif (NOT MISSING_INTERNAL_RDKAFKA_LIBRARY AND NOT ARCH_ARM)
|
||||
set (USE_INTERNAL_RDKAFKA_LIBRARY 1)
|
||||
set (RDKAFKA_INCLUDE_DIR "${ClickHouse_SOURCE_DIR}/contrib/librdkafka/src")
|
||||
set (RDKAFKA_LIBRARY rdkafka)
|
||||
set (CPPKAFKA_LIBRARY cppkafka)
|
||||
set (USE_RDKAFKA 1)
|
||||
endif ()
|
||||
|
||||
endif ()
|
||||
|
||||
message (STATUS "Using librdkafka=${USE_RDKAFKA}: ${RDKAFKA_INCLUDE_DIR} : ${RDKAFKA_LIBRARY}")
|
||||
message (STATUS "Using librdkafka=${USE_RDKAFKA}: ${RDKAFKA_INCLUDE_DIR} : ${RDKAFKA_LIBRARY} ${CPPKAFKA_LIBRARY}")
|
||||
|
@ -27,6 +27,9 @@ if (HAVE_SSE41)
|
||||
set (COMPILER_FLAGS "${COMPILER_FLAGS} ${TEST_FLAG}")
|
||||
endif ()
|
||||
|
||||
if (ARCH_PPC64LE)
|
||||
set (COMPILER_FLAGS "${COMPILER_FLAGS} -maltivec -D__SSE2__=1 -DNO_WARN_X86_INTRINSICS")
|
||||
endif ()
|
||||
|
||||
# gcc -dM -E -msse4.2 - < /dev/null | sort > gcc-dump-sse42
|
||||
#define __SSE4_2__ 1
|
||||
|
25
contrib/CMakeLists.txt
vendored
25
contrib/CMakeLists.txt
vendored
@ -54,6 +54,8 @@ if (USE_INTERNAL_UNWIND_LIBRARY)
|
||||
endif ()
|
||||
|
||||
if (USE_INTERNAL_ZLIB_LIBRARY)
|
||||
set (ZLIB_ENABLE_TESTS 0 CACHE INTERNAL "")
|
||||
set (SKIP_INSTALL_ALL 1 CACHE INTERNAL "")
|
||||
set (ZLIB_COMPAT 1 CACHE INTERNAL "") # also enables WITH_GZFILEOP
|
||||
set (WITH_NATIVE_INSTRUCTIONS ${ARCH_NATIVE} CACHE INTERNAL "")
|
||||
if (OS_FREEBSD OR ARCH_I386)
|
||||
@ -74,15 +76,15 @@ if (USE_INTERNAL_ZLIB_LIBRARY)
|
||||
target_compile_definitions (zlibstatic PUBLIC X86_64 UNALIGNED_OK)
|
||||
endif ()
|
||||
|
||||
set_target_properties(example PROPERTIES EXCLUDE_FROM_ALL 1)
|
||||
if (TARGET example64)
|
||||
set_target_properties(example64 PROPERTIES EXCLUDE_FROM_ALL 1)
|
||||
endif ()
|
||||
#set_target_properties(example PROPERTIES EXCLUDE_FROM_ALL 1)
|
||||
#if (TARGET example64)
|
||||
# set_target_properties(example64 PROPERTIES EXCLUDE_FROM_ALL 1)
|
||||
#endif ()
|
||||
|
||||
set_target_properties(minigzip PROPERTIES EXCLUDE_FROM_ALL 1)
|
||||
if (TARGET minigzip64)
|
||||
set_target_properties(minigzip64 PROPERTIES EXCLUDE_FROM_ALL 1)
|
||||
endif ()
|
||||
#set_target_properties(minigzip PROPERTIES EXCLUDE_FROM_ALL 1)
|
||||
#if (TARGET minigzip64)
|
||||
# set_target_properties(minigzip64 PROPERTIES EXCLUDE_FROM_ALL 1)
|
||||
#endif ()
|
||||
endif ()
|
||||
|
||||
if (USE_INTERNAL_CCTZ_LIBRARY)
|
||||
@ -105,8 +107,7 @@ if (USE_INTERNAL_SSL_LIBRARY)
|
||||
if (NOT MAKE_STATIC_LIBRARIES)
|
||||
set (BUILD_SHARED 1)
|
||||
endif ()
|
||||
set (USE_SHARED ${USE_STATIC_LIBRARIES})
|
||||
set (LIBRESSL_SKIP_INSTALL 1)
|
||||
set (LIBRESSL_SKIP_INSTALL 1 CACHE INTERNAL "")
|
||||
add_subdirectory (ssl)
|
||||
target_include_directories(${OPENSSL_CRYPTO_LIBRARY} SYSTEM PUBLIC ${OPENSSL_INCLUDE_DIR})
|
||||
target_include_directories(${OPENSSL_SSL_LIBRARY} SYSTEM PUBLIC ${OPENSSL_INCLUDE_DIR})
|
||||
@ -125,6 +126,10 @@ if (USE_INTERNAL_RDKAFKA_LIBRARY)
|
||||
target_include_directories(rdkafka BEFORE PRIVATE ${OPENSSL_INCLUDE_DIR})
|
||||
endif ()
|
||||
|
||||
if (USE_RDKAFKA)
|
||||
add_subdirectory (cppkafka-cmake)
|
||||
endif()
|
||||
|
||||
if (ENABLE_ODBC AND USE_INTERNAL_ODBC_LIBRARY)
|
||||
add_subdirectory (unixodbc-cmake)
|
||||
endif ()
|
||||
|
1
contrib/cppkafka
vendored
Submodule
1
contrib/cppkafka
vendored
Submodule
@ -0,0 +1 @@
|
||||
Subproject commit 520465510efef7704346cf8d140967c4abb057c1
|
31
contrib/cppkafka-cmake/CMakeLists.txt
Normal file
31
contrib/cppkafka-cmake/CMakeLists.txt
Normal file
@ -0,0 +1,31 @@
|
||||
set(CPPKAFKA_DIR ${CMAKE_SOURCE_DIR}/contrib/cppkafka)
|
||||
|
||||
set(SRCS
|
||||
${CPPKAFKA_DIR}/src/configuration.cpp
|
||||
${CPPKAFKA_DIR}/src/topic_configuration.cpp
|
||||
${CPPKAFKA_DIR}/src/configuration_option.cpp
|
||||
${CPPKAFKA_DIR}/src/exceptions.cpp
|
||||
${CPPKAFKA_DIR}/src/topic.cpp
|
||||
${CPPKAFKA_DIR}/src/buffer.cpp
|
||||
${CPPKAFKA_DIR}/src/queue.cpp
|
||||
${CPPKAFKA_DIR}/src/message.cpp
|
||||
${CPPKAFKA_DIR}/src/message_timestamp.cpp
|
||||
${CPPKAFKA_DIR}/src/message_internal.cpp
|
||||
${CPPKAFKA_DIR}/src/topic_partition.cpp
|
||||
${CPPKAFKA_DIR}/src/topic_partition_list.cpp
|
||||
${CPPKAFKA_DIR}/src/metadata.cpp
|
||||
${CPPKAFKA_DIR}/src/group_information.cpp
|
||||
${CPPKAFKA_DIR}/src/error.cpp
|
||||
${CPPKAFKA_DIR}/src/event.cpp
|
||||
|
||||
${CPPKAFKA_DIR}/src/kafka_handle_base.cpp
|
||||
${CPPKAFKA_DIR}/src/producer.cpp
|
||||
${CPPKAFKA_DIR}/src/consumer.cpp
|
||||
)
|
||||
|
||||
add_library(cppkafka ${LINK_MODE} ${SRCS})
|
||||
|
||||
target_link_libraries(cppkafka PRIVATE ${RDKAFKA_LIBRARY})
|
||||
target_include_directories(cppkafka PRIVATE ${CPPKAFKA_DIR}/include/cppkafka)
|
||||
target_include_directories(cppkafka PRIVATE ${Boost_INCLUDE_DIRS})
|
||||
target_include_directories(cppkafka SYSTEM PUBLIC ${CPPKAFKA_DIR}/include)
|
@ -341,6 +341,13 @@ static inline __m128i libdivide_get_0000FFFF(void) {
|
||||
#pragma clang diagnostic pop
|
||||
#endif
|
||||
|
||||
/// This is a bug in gcc-8, _MM_SHUFFLE was forgotten, though in trunk it is ok https://github.com/gcc-mirror/gcc/blob/master/gcc/config/rs6000/xmmintrin.h#L61
|
||||
#if defined(__PPC__)
|
||||
#ifndef _MM_SHUFFLE
|
||||
#define _MM_SHUFFLE(w,x,y,z) (((w) << 6) | ((x) << 4) | ((y) << 2) | (z))
|
||||
#endif
|
||||
#endif
|
||||
|
||||
static inline __m128i libdivide_s64_signbits(__m128i v) {
|
||||
//we want to compute v >> 63, that is, _mm_srai_epi64(v, 63). But there is no 64 bit shift right arithmetic instruction in SSE2. So we have to fake it by first duplicating the high 32 bit values, and then using a 32 bit shift. Another option would be to use _mm_srli_epi64(v, 63) and then subtract that from 0, but that approach appears to be substantially slower for unknown reasons
|
||||
__m128i hiBitsDuped = _mm_shuffle_epi32(v, _MM_SHUFFLE(3, 3, 1, 1));
|
||||
|
@ -7,9 +7,9 @@ CHECK_FUNCTION_EXISTS(nanosleep HAVE_NANOSLEEP)
|
||||
SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fno-strict-aliasing")
|
||||
SET(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -fno-strict-aliasing")
|
||||
|
||||
IF(ENABLE_SSE STREQUAL ON)
|
||||
IF(ENABLE_SSE STREQUAL ON AND NOT ARCH_PPC64LE AND NOT ARCH_AARCH64 AND NOT ARCH_ARM)
|
||||
SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -msse4.2")
|
||||
ENDIF(ENABLE_SSE STREQUAL ON)
|
||||
ENDIF(ENABLE_SSE STREQUAL ON AND NOT ARCH_PPC64LE AND NOT ARCH_AARCH64 AND NOT ARCH_ARM)
|
||||
|
||||
IF(NOT TEST_HDFS_PREFIX)
|
||||
SET(TEST_HDFS_PREFIX "./" CACHE STRING "default directory prefix used for test." FORCE)
|
||||
|
2
contrib/librdkafka
vendored
2
contrib/librdkafka
vendored
@ -1 +1 @@
|
||||
Subproject commit 7478b5ef16aadd6543fe38bc6a2deb895c70da98
|
||||
Subproject commit 363dcad5a23dc29381cc626620e68ae418b3af19
|
@ -1,60 +1,63 @@
|
||||
set(RDKAFKA_SOURCE_DIR ${CMAKE_SOURCE_DIR}/contrib/librdkafka/src)
|
||||
|
||||
set(SRCS
|
||||
${RDKAFKA_SOURCE_DIR}/crc32c.c
|
||||
${RDKAFKA_SOURCE_DIR}/rdaddr.c
|
||||
${RDKAFKA_SOURCE_DIR}/rdavl.c
|
||||
${RDKAFKA_SOURCE_DIR}/rdbuf.c
|
||||
${RDKAFKA_SOURCE_DIR}/rdcrc32.c
|
||||
${RDKAFKA_SOURCE_DIR}/rdkafka.c
|
||||
${RDKAFKA_SOURCE_DIR}/rdkafka_assignor.c
|
||||
${RDKAFKA_SOURCE_DIR}/rdkafka_broker.c
|
||||
${RDKAFKA_SOURCE_DIR}/rdkafka_buf.c
|
||||
${RDKAFKA_SOURCE_DIR}/rdkafka_cgrp.c
|
||||
${RDKAFKA_SOURCE_DIR}/rdkafka_conf.c
|
||||
${RDKAFKA_SOURCE_DIR}/rdkafka_event.c
|
||||
${RDKAFKA_SOURCE_DIR}/rdkafka_feature.c
|
||||
${RDKAFKA_SOURCE_DIR}/rdkafka_lz4.c
|
||||
${RDKAFKA_SOURCE_DIR}/rdkafka_metadata.c
|
||||
${RDKAFKA_SOURCE_DIR}/rdkafka_metadata_cache.c
|
||||
${RDKAFKA_SOURCE_DIR}/rdkafka_msg.c
|
||||
${RDKAFKA_SOURCE_DIR}/rdkafka_msgset_reader.c
|
||||
${RDKAFKA_SOURCE_DIR}/rdkafka_msgset_writer.c
|
||||
${RDKAFKA_SOURCE_DIR}/rdkafka_offset.c
|
||||
${RDKAFKA_SOURCE_DIR}/rdkafka_op.c
|
||||
${RDKAFKA_SOURCE_DIR}/rdkafka_partition.c
|
||||
${RDKAFKA_SOURCE_DIR}/rdkafka_pattern.c
|
||||
${RDKAFKA_SOURCE_DIR}/rdkafka_queue.c
|
||||
${RDKAFKA_SOURCE_DIR}/rdkafka_range_assignor.c
|
||||
${RDKAFKA_SOURCE_DIR}/rdkafka_request.c
|
||||
${RDKAFKA_SOURCE_DIR}/rdkafka_roundrobin_assignor.c
|
||||
${RDKAFKA_SOURCE_DIR}/rdkafka_sasl.c
|
||||
${RDKAFKA_SOURCE_DIR}/rdkafka_sasl_plain.c
|
||||
${RDKAFKA_SOURCE_DIR}/rdkafka_subscription.c
|
||||
${RDKAFKA_SOURCE_DIR}/rdkafka_timer.c
|
||||
${RDKAFKA_SOURCE_DIR}/rdkafka_topic.c
|
||||
${RDKAFKA_SOURCE_DIR}/rdkafka_transport.c
|
||||
${RDKAFKA_SOURCE_DIR}/rdkafka_interceptor.c
|
||||
${RDKAFKA_SOURCE_DIR}/rdkafka_header.c
|
||||
${RDKAFKA_SOURCE_DIR}/rdlist.c
|
||||
${RDKAFKA_SOURCE_DIR}/rdlog.c
|
||||
${RDKAFKA_SOURCE_DIR}/rdmurmur2.c
|
||||
${RDKAFKA_SOURCE_DIR}/rdports.c
|
||||
${RDKAFKA_SOURCE_DIR}/rdrand.c
|
||||
${RDKAFKA_SOURCE_DIR}/rdregex.c
|
||||
${RDKAFKA_SOURCE_DIR}/rdstring.c
|
||||
${RDKAFKA_SOURCE_DIR}/rdunittest.c
|
||||
${RDKAFKA_SOURCE_DIR}/rdvarint.c
|
||||
${RDKAFKA_SOURCE_DIR}/snappy.c
|
||||
${RDKAFKA_SOURCE_DIR}/tinycthread.c
|
||||
${RDKAFKA_SOURCE_DIR}/xxhash.c
|
||||
${RDKAFKA_SOURCE_DIR}/lz4.c
|
||||
${RDKAFKA_SOURCE_DIR}/lz4frame.c
|
||||
${RDKAFKA_SOURCE_DIR}/lz4hc.c
|
||||
${RDKAFKA_SOURCE_DIR}/rdgz.c
|
||||
${RDKAFKA_SOURCE_DIR}/crc32c.c
|
||||
${RDKAFKA_SOURCE_DIR}/rdaddr.c
|
||||
${RDKAFKA_SOURCE_DIR}/rdavl.c
|
||||
${RDKAFKA_SOURCE_DIR}/rdbuf.c
|
||||
${RDKAFKA_SOURCE_DIR}/rdcrc32.c
|
||||
${RDKAFKA_SOURCE_DIR}/rdkafka.c
|
||||
${RDKAFKA_SOURCE_DIR}/rdkafka_assignor.c
|
||||
${RDKAFKA_SOURCE_DIR}/rdkafka_background.c
|
||||
${RDKAFKA_SOURCE_DIR}/rdkafka_broker.c
|
||||
${RDKAFKA_SOURCE_DIR}/rdkafka_buf.c
|
||||
${RDKAFKA_SOURCE_DIR}/rdkafka_cgrp.c
|
||||
${RDKAFKA_SOURCE_DIR}/rdkafka_conf.c
|
||||
${RDKAFKA_SOURCE_DIR}/rdkafka_event.c
|
||||
${RDKAFKA_SOURCE_DIR}/rdkafka_feature.c
|
||||
${RDKAFKA_SOURCE_DIR}/rdkafka_idempotence.c
|
||||
${RDKAFKA_SOURCE_DIR}/rdkafka_lz4.c
|
||||
${RDKAFKA_SOURCE_DIR}/rdkafka_metadata.c
|
||||
${RDKAFKA_SOURCE_DIR}/rdkafka_metadata_cache.c
|
||||
${RDKAFKA_SOURCE_DIR}/rdkafka_msg.c
|
||||
${RDKAFKA_SOURCE_DIR}/rdkafka_msgset_reader.c
|
||||
${RDKAFKA_SOURCE_DIR}/rdkafka_msgset_writer.c
|
||||
${RDKAFKA_SOURCE_DIR}/rdkafka_offset.c
|
||||
${RDKAFKA_SOURCE_DIR}/rdkafka_op.c
|
||||
${RDKAFKA_SOURCE_DIR}/rdkafka_partition.c
|
||||
${RDKAFKA_SOURCE_DIR}/rdkafka_pattern.c
|
||||
${RDKAFKA_SOURCE_DIR}/rdkafka_queue.c
|
||||
${RDKAFKA_SOURCE_DIR}/rdkafka_range_assignor.c
|
||||
${RDKAFKA_SOURCE_DIR}/rdkafka_request.c
|
||||
${RDKAFKA_SOURCE_DIR}/rdkafka_roundrobin_assignor.c
|
||||
${RDKAFKA_SOURCE_DIR}/rdkafka_sasl.c
|
||||
${RDKAFKA_SOURCE_DIR}/rdkafka_sasl_plain.c
|
||||
${RDKAFKA_SOURCE_DIR}/rdkafka_subscription.c
|
||||
${RDKAFKA_SOURCE_DIR}/rdkafka_timer.c
|
||||
${RDKAFKA_SOURCE_DIR}/rdkafka_topic.c
|
||||
${RDKAFKA_SOURCE_DIR}/rdkafka_transport.c
|
||||
${RDKAFKA_SOURCE_DIR}/rdkafka_interceptor.c
|
||||
${RDKAFKA_SOURCE_DIR}/rdkafka_header.c
|
||||
${RDKAFKA_SOURCE_DIR}/rdlist.c
|
||||
${RDKAFKA_SOURCE_DIR}/rdlog.c
|
||||
${RDKAFKA_SOURCE_DIR}/rdmurmur2.c
|
||||
${RDKAFKA_SOURCE_DIR}/rdports.c
|
||||
${RDKAFKA_SOURCE_DIR}/rdrand.c
|
||||
${RDKAFKA_SOURCE_DIR}/rdregex.c
|
||||
${RDKAFKA_SOURCE_DIR}/rdstring.c
|
||||
${RDKAFKA_SOURCE_DIR}/rdunittest.c
|
||||
${RDKAFKA_SOURCE_DIR}/rdvarint.c
|
||||
${RDKAFKA_SOURCE_DIR}/snappy.c
|
||||
${RDKAFKA_SOURCE_DIR}/tinycthread.c
|
||||
${RDKAFKA_SOURCE_DIR}/tinycthread_extra.c
|
||||
${RDKAFKA_SOURCE_DIR}/xxhash.c
|
||||
${RDKAFKA_SOURCE_DIR}/lz4.c
|
||||
${RDKAFKA_SOURCE_DIR}/lz4frame.c
|
||||
${RDKAFKA_SOURCE_DIR}/lz4hc.c
|
||||
${RDKAFKA_SOURCE_DIR}/rdgz.c
|
||||
)
|
||||
|
||||
add_library(rdkafka ${LINK_MODE} ${SRCS})
|
||||
target_include_directories(rdkafka PRIVATE include)
|
||||
target_include_directories(rdkafka SYSTEM PUBLIC include)
|
||||
target_include_directories(rdkafka SYSTEM PUBLIC ${RDKAFKA_SOURCE_DIR})
|
||||
target_link_libraries(rdkafka PUBLIC ${ZLIB_LIBRARIES} ${OPENSSL_SSL_LIBRARY} ${OPENSSL_CRYPTO_LIBRARY})
|
||||
|
@ -60,7 +60,9 @@
|
||||
// WITH_SASL_SCRAM
|
||||
//#define WITH_SASL_SCRAM 1
|
||||
// crc32chw
|
||||
#if !defined(__PPC__)
|
||||
#define WITH_CRC32C_HW 1
|
||||
#endif
|
||||
// regex
|
||||
#define HAVE_REGEX 1
|
||||
// strndup
|
||||
@ -71,4 +73,8 @@
|
||||
#define HAVE_PTHREAD_SETNAME_GNU 1
|
||||
// python
|
||||
//#define HAVE_PYTHON 1
|
||||
// C11 threads
|
||||
#if (__STDC_VERSION__ >= 201112L) && !defined(__STDC_NO_THREADS__)
|
||||
# define WITH_C11THREADS 1
|
||||
#endif
|
||||
#endif /* _CONFIG_H_ */
|
||||
|
5
contrib/librdkafka-cmake/include/librdkafka/rdkafka.h
Normal file
5
contrib/librdkafka-cmake/include/librdkafka/rdkafka.h
Normal file
@ -0,0 +1,5 @@
|
||||
#if __has_include(<rdkafka.h>) // maybe bundled
|
||||
# include_next <rdkafka.h> // Y_IGNORE
|
||||
#else // system
|
||||
# include_next <librdkafka/rdkafka.h>
|
||||
#endif
|
@ -1,56 +1,110 @@
|
||||
|
||||
enable_language(ASM)
|
||||
|
||||
add_library(unwind
|
||||
src/mi/init.c
|
||||
src/mi/flush_cache.c
|
||||
src/mi/mempool.c
|
||||
src/mi/strerror.c
|
||||
src/x86_64/is_fpreg.c
|
||||
src/x86_64/regname.c
|
||||
src/mi/_ReadULEB.c
|
||||
src/mi/_ReadSLEB.c
|
||||
src/mi/backtrace.c
|
||||
src/mi/dyn-cancel.c
|
||||
src/mi/dyn-info-list.c
|
||||
src/mi/dyn-register.c
|
||||
src/mi/Ldyn-extract.c
|
||||
src/mi/Lfind_dynamic_proc_info.c
|
||||
src/mi/Lget_accessors.c
|
||||
src/mi/Lget_proc_info_by_ip.c
|
||||
src/mi/Lget_proc_name.c
|
||||
src/mi/Lput_dynamic_unwind_info.c
|
||||
src/mi/Ldestroy_addr_space.c
|
||||
src/mi/Lget_reg.c
|
||||
src/mi/Lset_reg.c
|
||||
src/mi/Lget_fpreg.c
|
||||
src/mi/Lset_fpreg.c
|
||||
src/mi/Lset_caching_policy.c
|
||||
src/x86_64/setcontext.S
|
||||
src/x86_64/Lcreate_addr_space.c
|
||||
src/x86_64/Lget_save_loc.c
|
||||
src/x86_64/Lglobal.c
|
||||
src/x86_64/Linit.c
|
||||
src/x86_64/Linit_local.c
|
||||
src/x86_64/Linit_remote.c
|
||||
src/x86_64/Lget_proc_info.c
|
||||
src/x86_64/Lregs.c
|
||||
src/x86_64/Lresume.c
|
||||
src/x86_64/Lstash_frame.c
|
||||
src/x86_64/Lstep.c
|
||||
src/x86_64/Ltrace.c
|
||||
src/x86_64/getcontext.S
|
||||
src/dwarf/Lexpr.c
|
||||
src/dwarf/Lfde.c
|
||||
src/dwarf/Lfind_proc_info-lsb.c
|
||||
src/dwarf/Lparser.c
|
||||
src/dwarf/Lpe.c
|
||||
src/dwarf/global.c
|
||||
src/elf64.c
|
||||
if (ARCH_PPC64LE)
|
||||
add_library(unwind
|
||||
src/mi/init.c
|
||||
src/mi/flush_cache.c
|
||||
src/mi/mempool.c
|
||||
src/mi/strerror.c
|
||||
src/mi/_ReadULEB.c
|
||||
src/mi/_ReadSLEB.c
|
||||
src/mi/backtrace.c
|
||||
src/mi/dyn-cancel.c
|
||||
src/mi/dyn-info-list.c
|
||||
src/mi/dyn-register.c
|
||||
src/mi/Ldyn-extract.c
|
||||
src/mi/Lfind_dynamic_proc_info.c
|
||||
src/mi/Lget_accessors.c
|
||||
src/mi/Lget_proc_info_by_ip.c
|
||||
src/mi/Lget_proc_name.c
|
||||
src/mi/Lput_dynamic_unwind_info.c
|
||||
src/mi/Ldestroy_addr_space.c
|
||||
src/mi/Lget_reg.c
|
||||
src/mi/Lset_reg.c
|
||||
src/mi/Lget_fpreg.c
|
||||
src/mi/Lset_fpreg.c
|
||||
src/mi/Lset_caching_policy.c
|
||||
src/dwarf/Lexpr.c
|
||||
src/dwarf/Lfde.c
|
||||
src/dwarf/Lfind_proc_info-lsb.c
|
||||
src/dwarf/Lparser.c
|
||||
src/dwarf/Lpe.c
|
||||
src/dwarf/global.c
|
||||
src/elf64.c
|
||||
src/os-linux.c
|
||||
|
||||
src/os-linux.c
|
||||
src/x86_64/Los-linux.c
|
||||
)
|
||||
src/ppc64/is_fpreg.c
|
||||
src/ppc64/regname.c
|
||||
src/ppc64/get_func_addr.c
|
||||
src/ppc/Linit_local.c
|
||||
src/ppc/Linit_remote.c
|
||||
src/ppc/Lis_signal_frame.c
|
||||
src/ppc/longjmp.S
|
||||
src/ppc/Lreg_states_iterate.c
|
||||
src/ppc/siglongjmp.S
|
||||
src/ppc64/setcontext.S
|
||||
src/ppc64/Lcreate_addr_space.c
|
||||
src/ppc64/Lglobal.c
|
||||
src/ppc64/Linit.c
|
||||
src/ppc64/Lreg_states_iterate.c
|
||||
src/ppc64/Lregs.c
|
||||
src/ppc64/Lresume.c
|
||||
src/ppc64/Lstep.c
|
||||
src/ppc64/regname.c
|
||||
src/ppc64/setcontext.S
|
||||
)
|
||||
else ()
|
||||
add_library(unwind
|
||||
src/mi/init.c
|
||||
src/mi/flush_cache.c
|
||||
src/mi/mempool.c
|
||||
src/mi/strerror.c
|
||||
src/mi/_ReadULEB.c
|
||||
src/mi/_ReadSLEB.c
|
||||
src/mi/backtrace.c
|
||||
src/mi/dyn-cancel.c
|
||||
src/mi/dyn-info-list.c
|
||||
src/mi/dyn-register.c
|
||||
src/mi/Ldyn-extract.c
|
||||
src/mi/Lfind_dynamic_proc_info.c
|
||||
src/mi/Lget_accessors.c
|
||||
src/mi/Lget_proc_info_by_ip.c
|
||||
src/mi/Lget_proc_name.c
|
||||
src/mi/Lput_dynamic_unwind_info.c
|
||||
src/mi/Ldestroy_addr_space.c
|
||||
src/mi/Lget_reg.c
|
||||
src/mi/Lset_reg.c
|
||||
src/mi/Lget_fpreg.c
|
||||
src/mi/Lset_fpreg.c
|
||||
src/mi/Lset_caching_policy.c
|
||||
src/dwarf/Lexpr.c
|
||||
src/dwarf/Lfde.c
|
||||
src/dwarf/Lfind_proc_info-lsb.c
|
||||
src/dwarf/Lparser.c
|
||||
src/dwarf/Lpe.c
|
||||
src/dwarf/global.c
|
||||
src/elf64.c
|
||||
src/os-linux.c
|
||||
|
||||
src/x86_64/is_fpreg.c
|
||||
src/x86_64/regname.c
|
||||
src/x86_64/setcontext.S
|
||||
src/x86_64/Lcreate_addr_space.c
|
||||
src/x86_64/Lget_save_loc.c
|
||||
src/x86_64/Lglobal.c
|
||||
src/x86_64/Linit.c
|
||||
src/x86_64/Linit_local.c
|
||||
src/x86_64/Linit_remote.c
|
||||
src/x86_64/Lget_proc_info.c
|
||||
src/x86_64/Lregs.c
|
||||
src/x86_64/Lresume.c
|
||||
src/x86_64/Lstash_frame.c
|
||||
src/x86_64/Lstep.c
|
||||
src/x86_64/Ltrace.c
|
||||
src/x86_64/getcontext.S
|
||||
src/x86_64/Los-linux.c
|
||||
)
|
||||
endif()
|
||||
|
||||
find_file (HAVE_ATOMIC_OPS_H "atomic_ops.h")
|
||||
configure_file (config/config.h.in ${CMAKE_CURRENT_BINARY_DIR}/config/config.h)
|
||||
|
@ -37,6 +37,7 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */
|
||||
|
||||
#include <stdlib.h>
|
||||
#include <libunwind.h>
|
||||
#include <libunwind-ppc64.h>
|
||||
|
||||
#include "elf64.h"
|
||||
#include "mempool.h"
|
||||
|
@ -25,7 +25,7 @@ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
||||
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
||||
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */
|
||||
|
||||
#include "unwind_i.h"
|
||||
#include "../ppc64/unwind_i.h"
|
||||
|
||||
PROTECTED int
|
||||
unw_reg_states_iterate (unw_cursor_t *cursor,
|
||||
|
2
contrib/ssl
vendored
2
contrib/ssl
vendored
@ -1 +1 @@
|
||||
Subproject commit dbbbcdbbd17785566f8f9c107b714f9e213d7293
|
||||
Subproject commit ba8de796195ff9d8bb0249ce289b83226b848b77
|
@ -287,6 +287,7 @@ endif ()
|
||||
|
||||
if (USE_RDKAFKA)
|
||||
target_link_libraries (dbms PRIVATE ${RDKAFKA_LIBRARY})
|
||||
target_link_libraries (dbms PRIVATE ${CPPKAFKA_LIBRARY})
|
||||
if (NOT USE_INTERNAL_RDKAFKA_LIBRARY)
|
||||
target_include_directories (dbms SYSTEM BEFORE PRIVATE ${RDKAFKA_INCLUDE_DIR})
|
||||
endif ()
|
||||
@ -298,8 +299,8 @@ target_include_directories (dbms SYSTEM BEFORE PRIVATE ${DIVIDE_INCLUDE_DIR})
|
||||
target_include_directories (dbms SYSTEM BEFORE PRIVATE ${SPARCEHASH_INCLUDE_DIR})
|
||||
|
||||
if (USE_HDFS)
|
||||
target_link_libraries (dbms PRIVATE ${HDFS3_LIBRARY})
|
||||
target_include_directories (dbms SYSTEM BEFORE PRIVATE ${HDFS3_INCLUDE_DIR})
|
||||
target_link_libraries (clickhouse_common_io PRIVATE ${HDFS3_LIBRARY})
|
||||
target_include_directories (clickhouse_common_io SYSTEM BEFORE PRIVATE ${HDFS3_INCLUDE_DIR})
|
||||
endif()
|
||||
|
||||
if (USE_JEMALLOC)
|
||||
@ -317,11 +318,6 @@ target_include_directories (clickhouse_common_io BEFORE PRIVATE ${COMMON_INCLUDE
|
||||
add_subdirectory (programs)
|
||||
add_subdirectory (tests)
|
||||
|
||||
if (GLIBC_COMPATIBILITY AND NOT CLICKHOUSE_SPLIT_BINARY)
|
||||
MESSAGE(STATUS "Some symbols from glibc will be replaced for compatibility")
|
||||
target_link_libraries(dbms PUBLIC glibc-compatibility)
|
||||
endif()
|
||||
|
||||
if (ENABLE_TESTS)
|
||||
macro (grep_gtest_sources BASE_DIR DST_VAR)
|
||||
# Cold match files that are not in tests/ directories
|
||||
|
@ -2,10 +2,10 @@
|
||||
set(VERSION_REVISION 54413)
|
||||
set(VERSION_MAJOR 19)
|
||||
set(VERSION_MINOR 1)
|
||||
set(VERSION_PATCH 0)
|
||||
set(VERSION_GITHASH 014e344a36bc19a58621e0add379984cf62b9067)
|
||||
set(VERSION_DESCRIBE v19.1.0-testing)
|
||||
set(VERSION_STRING 19.1.0)
|
||||
set(VERSION_PATCH 6)
|
||||
set(VERSION_GITHASH f73b337a93d534671b2187660398b8573fc1d464)
|
||||
set(VERSION_DESCRIBE v19.1.6-testing)
|
||||
set(VERSION_STRING 19.1.6)
|
||||
# end of autochange
|
||||
|
||||
set(VERSION_EXTRA "" CACHE STRING "")
|
||||
|
@ -27,7 +27,7 @@ elseif (EXISTS ${INTERNAL_COMPILER_BIN_ROOT}${INTERNAL_COMPILER_EXECUTABLE})
|
||||
endif ()
|
||||
|
||||
if (COPY_HEADERS_COMPILER AND OS_LINUX)
|
||||
add_custom_target (copy-headers env CLANG=${COPY_HEADERS_COMPILER} BUILD_PATH=${ClickHouse_BINARY_DIR} DESTDIR=${ClickHouse_SOURCE_DIR} ${ClickHouse_SOURCE_DIR}/copy_headers.sh ${ClickHouse_SOURCE_DIR} ${TMP_HEADERS_DIR} DEPENDS ${COPY_HEADERS_DEPENDS} WORKING_DIRECTORY ${ClickHouse_SOURCE_DIR} SOURCES ${ClickHouse_SOURCE_DIR}/copy_headers.sh)
|
||||
add_custom_target (copy-headers [ -f ${TMP_HEADERS_DIR}/dbms/src/Interpreters/SpecializedAggregator.h ] || env CLANG=${COPY_HEADERS_COMPILER} BUILD_PATH=${ClickHouse_BINARY_DIR} DESTDIR=${ClickHouse_SOURCE_DIR} ${CMAKE_CURRENT_SOURCE_DIR}/copy_headers.sh ${ClickHouse_SOURCE_DIR} ${TMP_HEADERS_DIR} DEPENDS ${COPY_HEADERS_DEPENDS} WORKING_DIRECTORY ${ClickHouse_SOURCE_DIR} SOURCES copy_headers.sh)
|
||||
|
||||
if (USE_INTERNAL_LLVM_LIBRARY)
|
||||
set (CLANG_HEADERS_DIR "${ClickHouse_SOURCE_DIR}/contrib/llvm/clang/lib/Headers")
|
||||
|
@ -19,7 +19,7 @@ set -e
|
||||
#
|
||||
# sudo ./copy_headers.sh . /usr/share/clickhouse/headers/
|
||||
|
||||
SOURCE_PATH=${1:-.}
|
||||
SOURCE_PATH=${1:-../../..}
|
||||
DST=${2:-$SOURCE_PATH/../headers}
|
||||
BUILD_PATH=${BUILD_PATH=${3:-$SOURCE_PATH/build}}
|
||||
|
@ -860,7 +860,7 @@ private:
|
||||
}
|
||||
|
||||
|
||||
/// Process the query that doesn't require transfering data blocks to the server.
|
||||
/// Process the query that doesn't require transferring data blocks to the server.
|
||||
void processOrdinaryQuery()
|
||||
{
|
||||
connection->sendQuery(query, query_id, QueryProcessingStage::Complete, &context.getSettingsRef(), nullptr, true);
|
||||
@ -869,7 +869,7 @@ private:
|
||||
}
|
||||
|
||||
|
||||
/// Process the query that requires transfering data blocks to the server.
|
||||
/// Process the query that requires transferring data blocks to the server.
|
||||
void processInsertQuery()
|
||||
{
|
||||
/// Send part of query without data, because data will be sent separately.
|
||||
@ -1136,7 +1136,7 @@ private:
|
||||
}
|
||||
|
||||
|
||||
/// Process Log packets, exit when recieve Exception or EndOfStream
|
||||
/// Process Log packets, exit when receive Exception or EndOfStream
|
||||
bool receiveEndOfQuery()
|
||||
{
|
||||
while (true)
|
||||
|
@ -25,12 +25,12 @@ namespace ErrorCodes
|
||||
struct ConnectionParameters
|
||||
{
|
||||
String host;
|
||||
UInt16 port;
|
||||
UInt16 port{};
|
||||
String default_database;
|
||||
String user;
|
||||
String password;
|
||||
Protocol::Secure security;
|
||||
Protocol::Compression compression;
|
||||
Protocol::Secure security = Protocol::Secure::Disable;
|
||||
Protocol::Compression compression = Protocol::Compression::Enable;
|
||||
ConnectionTimeouts timeouts;
|
||||
|
||||
ConnectionParameters() {}
|
||||
|
@ -93,11 +93,12 @@ private:
|
||||
{
|
||||
std::stringstream ss;
|
||||
ss << hint;
|
||||
String item;
|
||||
|
||||
while (!ss.eof())
|
||||
{
|
||||
String item;
|
||||
ss >> item;
|
||||
if (item.empty())
|
||||
if (ss.eof())
|
||||
break;
|
||||
|
||||
if (item == "serverError")
|
||||
|
@ -137,7 +137,7 @@ try
|
||||
static KillingErrorHandler error_handler;
|
||||
Poco::ErrorHandler::set(&error_handler);
|
||||
|
||||
/// Don't initilaize DateLUT
|
||||
/// Don't initialize DateLUT
|
||||
|
||||
registerFunctions();
|
||||
registerAggregateFunctions();
|
||||
|
@ -51,7 +51,7 @@ It is designed to retain the following properties of data:
|
||||
- probability distributions of length of strings;
|
||||
- probability of zero values of numbers; empty strings and arrays, NULLs;
|
||||
- data compression ratio when compressed with LZ77 and entropy family of codecs;
|
||||
- continuouty (magnitude of difference) of time values across table; continuouty of floating point values.
|
||||
- continuity (magnitude of difference) of time values across table; continuity of floating point values.
|
||||
- date component of DateTime values;
|
||||
- UTF-8 validity of string values;
|
||||
- string values continue to look somewhat natural.
|
||||
@ -246,7 +246,7 @@ Float transformFloatMantissa(Float x, UInt64 seed)
|
||||
|
||||
|
||||
/// Transform difference from previous number by applying pseudorandom permutation to mantissa part of it.
|
||||
/// It allows to retain some continuouty property of source data.
|
||||
/// It allows to retain some continuity property of source data.
|
||||
template <typename Float>
|
||||
class FloatModel : public IModel
|
||||
{
|
||||
|
@ -22,7 +22,7 @@ std::string validateODBCConnectionString(const std::string & connection_string)
|
||||
/// Connection string is a list of name, value pairs.
|
||||
/// name and value are separated by '='.
|
||||
/// names are case insensitive.
|
||||
/// name=value pairs are sepated by ';'.
|
||||
/// name=value pairs are separated by ';'.
|
||||
/// ASCII whitespace characters are skipped before and after delimiters.
|
||||
/// value may be optionally enclosed by {}
|
||||
/// in enclosed value, } is escaped as }}.
|
||||
|
@ -46,6 +46,8 @@
|
||||
namespace fs = boost::filesystem;
|
||||
using String = std::string;
|
||||
const String FOUR_SPACES = " ";
|
||||
const std::regex QUOTE_REGEX{"\""};
|
||||
const std::regex NEW_LINE{"\n"};
|
||||
|
||||
namespace DB
|
||||
{
|
||||
@ -80,7 +82,7 @@ public:
|
||||
|
||||
bool reserved = (value[0] == '[' || value[0] == '{' || value == "null");
|
||||
if (!reserved && wrap)
|
||||
value = '"' + value + '"';
|
||||
value = '"' + std::regex_replace(value, NEW_LINE, "\\n") + '"';
|
||||
|
||||
content[key] = value;
|
||||
}
|
||||
@ -579,7 +581,8 @@ private:
|
||||
|
||||
using Paths = std::vector<String>;
|
||||
using StringToVector = std::map<String, std::vector<String>>;
|
||||
StringToVector substitutions;
|
||||
using StringToMap = std::map<String, StringToVector>;
|
||||
StringToMap substitutions;
|
||||
|
||||
using StringKeyValue = std::map<String, String>;
|
||||
std::vector<StringKeyValue> substitutions_maps;
|
||||
@ -933,13 +936,13 @@ private:
|
||||
{
|
||||
/// Make "subconfig" of inner xml block
|
||||
ConfigurationPtr substitutions_view(test_config->createView("substitutions"));
|
||||
constructSubstitutions(substitutions_view, substitutions);
|
||||
constructSubstitutions(substitutions_view, substitutions[test_name]);
|
||||
|
||||
auto queries_pre_format = queries;
|
||||
queries.clear();
|
||||
for (const auto & query : queries_pre_format)
|
||||
{
|
||||
auto formatted = formatQueries(query, substitutions);
|
||||
auto formatted = formatQueries(query, substitutions[test_name]);
|
||||
queries.insert(queries.end(), formatted.begin(), formatted.end());
|
||||
}
|
||||
}
|
||||
@ -994,6 +997,9 @@ private:
|
||||
}
|
||||
else
|
||||
{
|
||||
if (metrics.empty())
|
||||
throw DB::Exception("You shoud specify at least one metric", DB::ErrorCodes::BAD_ARGUMENTS);
|
||||
main_metric = metrics[0];
|
||||
if (lite_output)
|
||||
throw DB::Exception("Specify main_metric for lite output", DB::ErrorCodes::BAD_ARGUMENTS);
|
||||
}
|
||||
@ -1219,11 +1225,11 @@ public:
|
||||
json_output.set("test_name", test_name);
|
||||
json_output.set("main_metric", main_metric);
|
||||
|
||||
if (substitutions.size())
|
||||
if (substitutions[test_name].size())
|
||||
{
|
||||
JSONString json_parameters(2); /// here, 2 is the size of \t padding
|
||||
|
||||
for (auto it = substitutions.begin(); it != substitutions.end(); ++it)
|
||||
for (auto it = substitutions[test_name].begin(); it != substitutions[test_name].end(); ++it)
|
||||
{
|
||||
String parameter = it->first;
|
||||
std::vector<String> values = it->second;
|
||||
@ -1231,7 +1237,7 @@ public:
|
||||
String array_string = "[";
|
||||
for (size_t i = 0; i != values.size(); ++i)
|
||||
{
|
||||
array_string += '"' + values[i] + '"';
|
||||
array_string += '"' + std::regex_replace(values[i], QUOTE_REGEX, "\\\"") + '"';
|
||||
if (i != values.size() - 1)
|
||||
{
|
||||
array_string += ", ";
|
||||
@ -1257,7 +1263,7 @@ public:
|
||||
|
||||
JSONString runJSON;
|
||||
|
||||
runJSON.set("query", queries[query_index]);
|
||||
runJSON.set("query", std::regex_replace(queries[query_index], QUOTE_REGEX, "\\\""));
|
||||
if (!statistics.exception.empty())
|
||||
runJSON.set("exception", statistics.exception);
|
||||
|
||||
|
@ -25,7 +25,10 @@ endif ()
|
||||
|
||||
if (OS_LINUX AND MAKE_STATIC_LIBRARIES)
|
||||
set (GLIBC_MAX_REQUIRED 2.4 CACHE INTERNAL "")
|
||||
add_test(NAME GLIBC_required_version COMMAND bash -c "readelf -s ${CMAKE_CURRENT_BINARY_DIR}/../clickhouse-server | grep '@GLIBC' | grep -oP 'GLIBC_[\\d\\.]+' | sort | uniq | sort -r | perl -lnE 'exit 1 if $_ gt q{GLIBC_${GLIBC_MAX_REQUIRED}}'")
|
||||
# temporary disabled. to enable - change 'exit 0' to 'exit $a'
|
||||
add_test(NAME GLIBC_required_version COMMAND bash -c "readelf -s ${CMAKE_CURRENT_BINARY_DIR}/../clickhouse-server | perl -nE 'END {exit 0 if $a} ++$a, print if /\\x40GLIBC_(\\S+)/ and pack(q{C*}, split /\\./, \$1) gt pack q{C*}, split /\\./, q{${GLIBC_MAX_REQUIRED}}'")
|
||||
|
||||
#add_test(NAME GLIBC_required_version COMMAND bash -c "readelf -s ${CMAKE_CURRENT_BINARY_DIR}/../clickhouse-server | grep '@GLIBC' | grep -oP 'GLIBC_[\\d\\.]+' | sort | uniq | sort --version-sort --reverse | perl -lnE 'warn($_), exit 1 if $_ gt q{GLIBC_${GLIBC_MAX_REQUIRED}}'") # old
|
||||
endif ()
|
||||
|
||||
install (
|
||||
|
@ -31,7 +31,7 @@
|
||||
#include <IO/MemoryReadWriteBuffer.h>
|
||||
#include <IO/WriteBufferFromTemporaryFile.h>
|
||||
|
||||
#include <DataStreams/IProfilingBlockInputStream.h>
|
||||
#include <DataStreams/IBlockInputStream.h>
|
||||
|
||||
#include <Interpreters/executeQuery.h>
|
||||
#include <Interpreters/Quota.h>
|
||||
|
@ -565,7 +565,7 @@ int Server::main(const std::vector<std::string> & /*args*/)
|
||||
auto address = socket_bind_listen(socket, listen_host, config().getInt("http_port"));
|
||||
socket.setReceiveTimeout(settings.http_receive_timeout);
|
||||
socket.setSendTimeout(settings.http_send_timeout);
|
||||
servers.emplace_back(new Poco::Net::HTTPServer(
|
||||
servers.emplace_back(std::make_unique<Poco::Net::HTTPServer>(
|
||||
new HTTPHandlerFactory(*this, "HTTPHandler-factory"),
|
||||
server_pool,
|
||||
socket,
|
||||
@ -582,7 +582,7 @@ int Server::main(const std::vector<std::string> & /*args*/)
|
||||
auto address = socket_bind_listen(socket, listen_host, config().getInt("https_port"), /* secure = */ true);
|
||||
socket.setReceiveTimeout(settings.http_receive_timeout);
|
||||
socket.setSendTimeout(settings.http_send_timeout);
|
||||
servers.emplace_back(new Poco::Net::HTTPServer(
|
||||
servers.emplace_back(std::make_unique<Poco::Net::HTTPServer>(
|
||||
new HTTPHandlerFactory(*this, "HTTPSHandler-factory"),
|
||||
server_pool,
|
||||
socket,
|
||||
@ -602,7 +602,7 @@ int Server::main(const std::vector<std::string> & /*args*/)
|
||||
auto address = socket_bind_listen(socket, listen_host, config().getInt("tcp_port"));
|
||||
socket.setReceiveTimeout(settings.receive_timeout);
|
||||
socket.setSendTimeout(settings.send_timeout);
|
||||
servers.emplace_back(new Poco::Net::TCPServer(
|
||||
servers.emplace_back(std::make_unique<Poco::Net::TCPServer>(
|
||||
new TCPHandlerFactory(*this),
|
||||
server_pool,
|
||||
socket,
|
||||
@ -619,7 +619,7 @@ int Server::main(const std::vector<std::string> & /*args*/)
|
||||
auto address = socket_bind_listen(socket, listen_host, config().getInt("tcp_port_secure"), /* secure = */ true);
|
||||
socket.setReceiveTimeout(settings.receive_timeout);
|
||||
socket.setSendTimeout(settings.send_timeout);
|
||||
servers.emplace_back(new Poco::Net::TCPServer(
|
||||
servers.emplace_back(std::make_unique<Poco::Net::TCPServer>(
|
||||
new TCPHandlerFactory(*this, /* secure= */ true),
|
||||
server_pool,
|
||||
socket,
|
||||
@ -642,7 +642,7 @@ int Server::main(const std::vector<std::string> & /*args*/)
|
||||
auto address = socket_bind_listen(socket, listen_host, config().getInt("interserver_http_port"));
|
||||
socket.setReceiveTimeout(settings.http_receive_timeout);
|
||||
socket.setSendTimeout(settings.http_send_timeout);
|
||||
servers.emplace_back(new Poco::Net::HTTPServer(
|
||||
servers.emplace_back(std::make_unique<Poco::Net::HTTPServer>(
|
||||
new InterserverIOHTTPHandlerFactory(*this, "InterserverIOHTTPHandler-factory"),
|
||||
server_pool,
|
||||
socket,
|
||||
@ -658,7 +658,7 @@ int Server::main(const std::vector<std::string> & /*args*/)
|
||||
auto address = socket_bind_listen(socket, listen_host, config().getInt("interserver_https_port"), /* secure = */ true);
|
||||
socket.setReceiveTimeout(settings.http_receive_timeout);
|
||||
socket.setSendTimeout(settings.http_send_timeout);
|
||||
servers.emplace_back(new Poco::Net::HTTPServer(
|
||||
servers.emplace_back(std::make_unique<Poco::Net::HTTPServer>(
|
||||
new InterserverIOHTTPHandlerFactory(*this, "InterserverIOHTTPHandler-factory"),
|
||||
server_pool,
|
||||
socket,
|
||||
|
@ -485,20 +485,15 @@ void TCPHandler::processTablesStatusRequest()
|
||||
|
||||
void TCPHandler::sendProfileInfo()
|
||||
{
|
||||
if (const IProfilingBlockInputStream * input = dynamic_cast<const IProfilingBlockInputStream *>(state.io.in.get()))
|
||||
{
|
||||
writeVarUInt(Protocol::Server::ProfileInfo, *out);
|
||||
input->getProfileInfo().write(*out);
|
||||
state.io.in->getProfileInfo().write(*out);
|
||||
out->next();
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
void TCPHandler::sendTotals()
|
||||
{
|
||||
if (IProfilingBlockInputStream * input = dynamic_cast<IProfilingBlockInputStream *>(state.io.in.get()))
|
||||
{
|
||||
const Block & totals = input->getTotals();
|
||||
const Block & totals = state.io.in->getTotals();
|
||||
|
||||
if (totals)
|
||||
{
|
||||
@ -511,15 +506,12 @@ void TCPHandler::sendTotals()
|
||||
state.maybe_compressed_out->next();
|
||||
out->next();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
void TCPHandler::sendExtremes()
|
||||
{
|
||||
if (IProfilingBlockInputStream * input = dynamic_cast<IProfilingBlockInputStream *>(state.io.in.get()))
|
||||
{
|
||||
Block extremes = input->getExtremes();
|
||||
Block extremes = state.io.in->getExtremes();
|
||||
|
||||
if (extremes)
|
||||
{
|
||||
@ -532,7 +524,6 @@ void TCPHandler::sendExtremes()
|
||||
state.maybe_compressed_out->next();
|
||||
out->next();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
@ -13,9 +13,9 @@
|
||||
<!-- How to choose between replicas during distributed query processing.
|
||||
random - choose random replica from set of replicas with minimum number of errors
|
||||
nearest_hostname - from set of replicas with minimum number of errors, choose replica
|
||||
with minumum number of different symbols between replica's hostname and local hostname
|
||||
with minimum number of different symbols between replica's hostname and local hostname
|
||||
(Hamming distance).
|
||||
in_order - first live replica is choosen in specified order.
|
||||
in_order - first live replica is chosen in specified order.
|
||||
-->
|
||||
<load_balancing>random</load_balancing>
|
||||
</default>
|
||||
|
@ -109,7 +109,7 @@ struct AggreagteFunctionGroupUniqArrayGenericData
|
||||
};
|
||||
|
||||
/** Template parameter with true value should be used for columns that store their elements in memory continuously.
|
||||
* For such columns groupUniqArray() can be implemented more efficently (especially for small numeric arrays).
|
||||
* For such columns groupUniqArray() can be implemented more efficiently (especially for small numeric arrays).
|
||||
*/
|
||||
template <bool is_plain_column = false>
|
||||
class AggreagteFunctionGroupUniqArrayGeneric
|
||||
|
@ -86,7 +86,7 @@ public:
|
||||
ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT};
|
||||
}
|
||||
|
||||
events_size = arguments.size();
|
||||
events_size = static_cast<UInt8>(arguments.size());
|
||||
}
|
||||
|
||||
|
||||
|
@ -235,6 +235,11 @@ private:
|
||||
actions.clear();
|
||||
actions.emplace_back(PatternActionType::KleeneStar);
|
||||
|
||||
dfa_states.clear();
|
||||
dfa_states.emplace_back(true);
|
||||
|
||||
pattern_has_time = false;
|
||||
|
||||
const char * pos = pattern.data();
|
||||
const char * begin = pos;
|
||||
const char * end = pos + pattern.size();
|
||||
@ -285,6 +290,7 @@ private:
|
||||
actions.back().type != PatternActionType::KleeneStar)
|
||||
throw Exception{"Temporal condition should be preceeded by an event condition", ErrorCodes::BAD_ARGUMENTS};
|
||||
|
||||
pattern_has_time = true;
|
||||
actions.emplace_back(type, duration);
|
||||
}
|
||||
else
|
||||
@ -299,6 +305,9 @@ private:
|
||||
throw Exception{"Event number " + toString(event_number) + " is out of range", ErrorCodes::BAD_ARGUMENTS};
|
||||
|
||||
actions.emplace_back(PatternActionType::SpecificEvent, event_number - 1);
|
||||
dfa_states.back().transition = DFATransition::SpecificEvent;
|
||||
dfa_states.back().event = event_number - 1;
|
||||
dfa_states.emplace_back();
|
||||
}
|
||||
|
||||
if (!match(")"))
|
||||
@ -306,17 +315,88 @@ private:
|
||||
|
||||
}
|
||||
else if (match(".*"))
|
||||
{
|
||||
actions.emplace_back(PatternActionType::KleeneStar);
|
||||
dfa_states.back().has_kleene = true;
|
||||
}
|
||||
else if (match("."))
|
||||
{
|
||||
actions.emplace_back(PatternActionType::AnyEvent);
|
||||
dfa_states.back().transition = DFATransition::AnyEvent;
|
||||
dfa_states.emplace_back();
|
||||
}
|
||||
else
|
||||
throw_exception("Could not parse pattern, unexpected starting symbol");
|
||||
}
|
||||
}
|
||||
|
||||
protected:
|
||||
/// Uses a DFA based approach in order to better handle patterns without
|
||||
/// time assertions.
|
||||
///
|
||||
/// NOTE: This implementation relies on the assumption that the pattern are *small*.
|
||||
///
|
||||
/// This algorithm performs in O(mn) (with m the number of DFA states and N the number
|
||||
/// of events) with a memory consumption and memory allocations in O(m). It means that
|
||||
/// if n >>> m (which is expected to be the case), this algorithm can be considered linear.
|
||||
template <typename T>
|
||||
bool match(T & events_it, const T events_end) const
|
||||
bool dfaMatch(T & events_it, const T events_end) const
|
||||
{
|
||||
using ActiveStates = std::vector<bool>;
|
||||
|
||||
/// Those two vectors keep track of which states should be considered for the current
|
||||
/// event as well as the states which should be considered for the next event.
|
||||
ActiveStates active_states(dfa_states.size(), false);
|
||||
ActiveStates next_active_states(dfa_states.size(), false);
|
||||
active_states[0] = true;
|
||||
|
||||
/// Keeps track of dead-ends in order not to iterate over all the events to realize that
|
||||
/// the match failed.
|
||||
size_t n_active = 1;
|
||||
|
||||
for (/* empty */; events_it != events_end && n_active > 0 && !active_states.back(); ++events_it)
|
||||
{
|
||||
n_active = 0;
|
||||
next_active_states.assign(dfa_states.size(), false);
|
||||
|
||||
for (size_t state = 0; state < dfa_states.size(); ++state)
|
||||
{
|
||||
if (!active_states[state])
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
switch (dfa_states[state].transition)
|
||||
{
|
||||
case DFATransition::None:
|
||||
break;
|
||||
case DFATransition::AnyEvent:
|
||||
next_active_states[state + 1] = true;
|
||||
++n_active;
|
||||
break;
|
||||
case DFATransition::SpecificEvent:
|
||||
if (events_it->second.test(dfa_states[state].event))
|
||||
{
|
||||
next_active_states[state + 1] = true;
|
||||
++n_active;
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
if (dfa_states[state].has_kleene)
|
||||
{
|
||||
next_active_states[state] = true;
|
||||
++n_active;
|
||||
}
|
||||
}
|
||||
swap(active_states, next_active_states);
|
||||
}
|
||||
|
||||
return active_states.back();
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
bool backtrackingMatch(T & events_it, const T events_end) const
|
||||
{
|
||||
const auto action_begin = std::begin(actions);
|
||||
const auto action_end = std::end(actions);
|
||||
@ -445,10 +525,53 @@ protected:
|
||||
return action_it == action_end;
|
||||
}
|
||||
|
||||
private:
|
||||
enum class DFATransition : char
|
||||
{
|
||||
/// .-------.
|
||||
/// | |
|
||||
/// `-------'
|
||||
None,
|
||||
/// .-------. (?[0-9])
|
||||
/// | | ----------
|
||||
/// `-------'
|
||||
SpecificEvent,
|
||||
/// .-------. .
|
||||
/// | | ----------
|
||||
/// `-------'
|
||||
AnyEvent,
|
||||
};
|
||||
|
||||
struct DFAState
|
||||
{
|
||||
DFAState(bool has_kleene = false)
|
||||
: has_kleene{has_kleene}, event{0}, transition{DFATransition::None}
|
||||
{}
|
||||
|
||||
/// .-------.
|
||||
/// | | - - -
|
||||
/// `-------'
|
||||
/// |_^
|
||||
bool has_kleene;
|
||||
/// In the case of a state transitions with a `SpecificEvent`,
|
||||
/// `event` contains the value of the event.
|
||||
uint32_t event;
|
||||
/// The kind of transition out of this state.
|
||||
DFATransition transition;
|
||||
};
|
||||
|
||||
using DFAStates = std::vector<DFAState>;
|
||||
|
||||
protected:
|
||||
/// `True` if the parsed pattern contains time assertions (?t...), `false` otherwise.
|
||||
bool pattern_has_time;
|
||||
|
||||
private:
|
||||
std::string pattern;
|
||||
size_t arg_count;
|
||||
PatternActions actions;
|
||||
|
||||
DFAStates dfa_states;
|
||||
};
|
||||
|
||||
|
||||
@ -471,7 +594,8 @@ public:
|
||||
const auto events_end = std::end(data_ref.events_list);
|
||||
auto events_it = events_begin;
|
||||
|
||||
static_cast<ColumnUInt8 &>(to).getData().push_back(match(events_it, events_end));
|
||||
bool match = pattern_has_time ? backtrackingMatch(events_it, events_end) : dfaMatch(events_it, events_end);
|
||||
static_cast<ColumnUInt8 &>(to).getData().push_back(match);
|
||||
}
|
||||
};
|
||||
|
||||
@ -501,7 +625,7 @@ private:
|
||||
auto events_it = events_begin;
|
||||
|
||||
size_t count = 0;
|
||||
while (events_it != events_end && match(events_it, events_end))
|
||||
while (events_it != events_end && backtrackingMatch(events_it, events_end))
|
||||
++count;
|
||||
|
||||
return count;
|
||||
|
@ -123,7 +123,7 @@ struct AggregateFunctionTopKGenericData
|
||||
};
|
||||
|
||||
/** Template parameter with true value should be used for columns that store their elements in memory continuously.
|
||||
* For such columns topK() can be implemented more efficently (especially for small numeric arrays).
|
||||
* For such columns topK() can be implemented more efficiently (especially for small numeric arrays).
|
||||
*/
|
||||
template <bool is_plain_column = false>
|
||||
class AggregateFunctionTopKGeneric : public IAggregateFunctionDataHelper<AggregateFunctionTopKGenericData, AggregateFunctionTopKGeneric<is_plain_column>>
|
||||
|
@ -12,7 +12,7 @@ namespace ErrorCodes
|
||||
extern const int NOT_IMPLEMENTED;
|
||||
}
|
||||
|
||||
/** Calculates quantile by counting number of occurences for each value in a hash map.
|
||||
/** Calculates quantile by counting number of occurrences for each value in a hash map.
|
||||
*
|
||||
* It use O(distinct(N)) memory. Can be naturally applied for values with weight.
|
||||
* In case of many identical values, it can be more efficient than QuantileExact even when weight is not used.
|
||||
|
@ -43,7 +43,7 @@ public:
|
||||
virtual const char * getFamilyName() const = 0;
|
||||
|
||||
/** If column isn't constant, returns nullptr (or itself).
|
||||
* If column is constant, transforms constant to full column (if column type allows such tranform) and return it.
|
||||
* If column is constant, transforms constant to full column (if column type allows such transform) and return it.
|
||||
*/
|
||||
virtual Ptr convertToFullColumnIfConst() const { return getPtr(); }
|
||||
|
||||
@ -149,7 +149,7 @@ public:
|
||||
virtual void insertDefault() = 0;
|
||||
|
||||
/** Removes last n elements.
|
||||
* Is used to support exeption-safety of several operations.
|
||||
* Is used to support exception-safety of several operations.
|
||||
* For example, sometimes insertion should be reverted if we catch an exception during operation processing.
|
||||
* If column has less than n elements or n == 0 - undefined behavior.
|
||||
*/
|
||||
@ -234,8 +234,8 @@ public:
|
||||
virtual void gather(ColumnGathererStream & gatherer_stream) = 0;
|
||||
|
||||
/** Computes minimum and maximum element of the column.
|
||||
* In addition to numeric types, the funtion is completely implemented for Date and DateTime.
|
||||
* For strings and arrays function should retrurn default value.
|
||||
* In addition to numeric types, the function is completely implemented for Date and DateTime.
|
||||
* For strings and arrays function should return default value.
|
||||
* (except for constant columns; they should return value of the constant).
|
||||
* If column is empty function should return default value.
|
||||
*/
|
||||
|
@ -64,7 +64,7 @@ namespace DB
|
||||
* During insertion, each key is locked - to avoid parallel initialization of regions for same key.
|
||||
*
|
||||
* On insertion, we search for free region of at least requested size.
|
||||
* If nothing was found, we evict oldest unused region; if not enogh size, we evict it neighbours; and try again.
|
||||
* If nothing was found, we evict oldest unused region; if not enough size, we evict it neighbours; and try again.
|
||||
*
|
||||
* Metadata is allocated by usual allocator and its memory usage is not accounted.
|
||||
*
|
||||
|
@ -23,7 +23,7 @@ using namespace Poco::XML;
|
||||
namespace DB
|
||||
{
|
||||
|
||||
/// For cutting prerpocessed path to this base
|
||||
/// For cutting preprocessed path to this base
|
||||
static std::string main_config_path;
|
||||
|
||||
/// Extracts from a string the first encountered number consisting of at least two digits.
|
||||
|
@ -12,7 +12,7 @@
|
||||
* - in typical implementation of standard library, hash function for integers is trivial and just use lower bits;
|
||||
* - traffic is non-uniformly distributed across a day;
|
||||
* - we are using open-addressing linear probing hash tables that are most critical to hash function quality,
|
||||
* and trivial hash function gives disasterous results.
|
||||
* and trivial hash function gives disastrous results.
|
||||
*/
|
||||
|
||||
/** Taken from MurmurHash. This is Murmur finalizer.
|
||||
@ -160,7 +160,7 @@ struct TrivialHash
|
||||
* NOTE Salting is far from perfect, because it commutes with first steps of calculation.
|
||||
*
|
||||
* NOTE As mentioned, this function is slower than intHash64.
|
||||
* But occasionaly, it is faster, when written in a loop and loop is vectorized.
|
||||
* But occasionally, it is faster, when written in a loop and loop is vectorized.
|
||||
*/
|
||||
template <DB::UInt64 salt>
|
||||
inline DB::UInt32 intHash32(DB::UInt64 key)
|
||||
|
@ -165,7 +165,7 @@ void OptimizedRegularExpressionImpl<thread_safe>::analyze(
|
||||
++pos;
|
||||
break;
|
||||
|
||||
/// Quantifiers that allow a zero number of occurences.
|
||||
/// Quantifiers that allow a zero number of occurrences.
|
||||
case '{':
|
||||
in_curly_braces = true;
|
||||
[[fallthrough]];
|
||||
|
@ -40,7 +40,7 @@ struct SpaceSavingArena
|
||||
|
||||
/*
|
||||
* Specialized storage for StringRef with a freelist arena.
|
||||
* Keys of this type that are retained on insertion must be serialised into local storage,
|
||||
* Keys of this type that are retained on insertion must be serialized into local storage,
|
||||
* otherwise the reference would be invalid after the processed block is released.
|
||||
*/
|
||||
template <>
|
||||
|
@ -1,15 +1,17 @@
|
||||
#pragma once
|
||||
|
||||
#include <Common/StringSearcher.h>
|
||||
#include <Common/StringUtils/StringUtils.h>
|
||||
#include <algorithm>
|
||||
#include <vector>
|
||||
#include <stdint.h>
|
||||
#include <string.h>
|
||||
#include <Columns/ColumnString.h>
|
||||
#include <Core/Types.h>
|
||||
#include <Poco/UTF8Encoding.h>
|
||||
#include <Poco/Unicode.h>
|
||||
#include <Common/StringSearcher.h>
|
||||
#include <Common/StringUtils/StringUtils.h>
|
||||
#include <common/StringRef.h>
|
||||
#include <common/unaligned.h>
|
||||
#include <ext/range.h>
|
||||
#include <stdint.h>
|
||||
#include <string.h>
|
||||
|
||||
|
||||
/** Search for a substring in a string by Volnitsky's algorithm
|
||||
* http://volnitsky.com/project/str_search/
|
||||
@ -28,117 +30,38 @@
|
||||
* - if it did not match, we check the next cell of the hash table from the collision resolution chain;
|
||||
* - if not found, skip to haystack almost the size of the needle bytes;
|
||||
*
|
||||
* Unaligned memory access is used.
|
||||
* MultiVolnitsky - search for multiple substrings in a string:
|
||||
* - Add bigrams to hash table with string index. Then the usual Volnitsky search is used.
|
||||
* - We are adding while searching, limiting the number of fallback searchers and the total number of added bigrams
|
||||
*/
|
||||
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
|
||||
/// @todo store lowercase needle to speed up in case there are numerous occurrences of bigrams from needle in haystack
|
||||
template <typename CRTP>
|
||||
class VolnitskyBase
|
||||
namespace VolnitskyTraits
|
||||
{
|
||||
protected:
|
||||
using Offset = UInt8; /// Offset in the needle. For the basic algorithm, the length of the needle must not be greater than 255.
|
||||
using Id = UInt8; /// Index of the string (within the array of multiple needles), must not be greater than 255.
|
||||
using Ngram = UInt16; /// n-gram (2 bytes).
|
||||
|
||||
const UInt8 * const needle;
|
||||
const size_t needle_size;
|
||||
const UInt8 * const needle_end = needle + needle_size;
|
||||
/// For how long we move, if the n-gram from haystack is not found in the hash table.
|
||||
const size_t step = needle_size - sizeof(Ngram) + 1;
|
||||
|
||||
/** max needle length is 255, max distinct ngrams for case-sensitive is (255 - 1), case-insensitive is 4 * (255 - 1)
|
||||
* storage of 64K ngrams (n = 2, 128 KB) should be large enough for both cases */
|
||||
static const size_t hash_size = 64 * 1024; /// Fits into the L2 cache (of common Intel CPUs).
|
||||
Offset hash[hash_size]; /// Hash table.
|
||||
/** Fits into the L2 cache (of common Intel CPUs).
|
||||
* This number is extremely good for compilers as it is numeric_limits<Uint16>::max() and there are optimizations with movzwl and other instructions with 2 bytes
|
||||
*/
|
||||
static constexpr size_t hash_size = 64 * 1024;
|
||||
|
||||
/// min haystack size to use main algorithm instead of fallback
|
||||
static constexpr auto min_haystack_size_for_algorithm = 20000;
|
||||
const bool fallback; /// Do we need to use the fallback algorithm.
|
||||
static constexpr size_t min_haystack_size_for_algorithm = 20000;
|
||||
|
||||
public:
|
||||
/** haystack_size_hint - the expected total size of the haystack for `search` calls. Optional (zero means unspecified).
|
||||
* If you specify it small enough, the fallback algorithm will be used,
|
||||
* since it is considered that it's useless to waste time initializing the hash table.
|
||||
*/
|
||||
VolnitskyBase(const char * const needle, const size_t needle_size, size_t haystack_size_hint = 0)
|
||||
: needle{reinterpret_cast<const UInt8 *>(needle)}, needle_size{needle_size},
|
||||
fallback{
|
||||
needle_size < 2 * sizeof(Ngram)
|
||||
|| needle_size >= std::numeric_limits<Offset>::max()
|
||||
|| (haystack_size_hint && haystack_size_hint < min_haystack_size_for_algorithm)}
|
||||
static inline bool isFallbackNeedle(const size_t needle_size, size_t haystack_size_hint = 0)
|
||||
{
|
||||
if (fallback)
|
||||
return;
|
||||
|
||||
memset(hash, 0, sizeof(hash));
|
||||
|
||||
/// int is used here because unsigned can't be used with condition like `i >= 0`, unsigned always >= 0
|
||||
for (auto i = static_cast<int>(needle_size - sizeof(Ngram)); i >= 0; --i)
|
||||
self().putNGram(this->needle + i, i + 1, this->needle);
|
||||
return needle_size < 2 * sizeof(Ngram) || needle_size >= std::numeric_limits<Offset>::max()
|
||||
|| (haystack_size_hint && haystack_size_hint < min_haystack_size_for_algorithm);
|
||||
}
|
||||
|
||||
static inline Ngram toNGram(const UInt8 * const pos) { return unalignedLoad<Ngram>(pos); }
|
||||
|
||||
/// If not found, the end of the haystack is returned.
|
||||
const UInt8 * search(const UInt8 * const haystack, const size_t haystack_size) const
|
||||
{
|
||||
if (needle_size == 0)
|
||||
return haystack;
|
||||
|
||||
const auto haystack_end = haystack + haystack_size;
|
||||
|
||||
if (needle_size == 1 || fallback || haystack_size <= needle_size)
|
||||
return self().search_fallback(haystack, haystack_end);
|
||||
|
||||
/// Let's "apply" the needle to the haystack and compare the n-gram from the end of the needle.
|
||||
const auto * pos = haystack + needle_size - sizeof(Ngram);
|
||||
for (; pos <= haystack_end - needle_size; pos += step)
|
||||
{
|
||||
/// We look at all the cells of the hash table that can correspond to the n-gram from haystack.
|
||||
for (size_t cell_num = toNGram(pos) % hash_size; hash[cell_num];
|
||||
cell_num = (cell_num + 1) % hash_size)
|
||||
{
|
||||
/// When found - compare bytewise, using the offset from the hash table.
|
||||
const auto res = pos - (hash[cell_num] - 1);
|
||||
|
||||
if (self().compare(res))
|
||||
return res;
|
||||
}
|
||||
}
|
||||
|
||||
/// The remaining tail.
|
||||
return self().search_fallback(pos - step + 1, haystack_end);
|
||||
}
|
||||
|
||||
const char * search(const char * haystack, size_t haystack_size) const
|
||||
{
|
||||
return reinterpret_cast<const char *>(search(reinterpret_cast<const UInt8 *>(haystack), haystack_size));
|
||||
}
|
||||
|
||||
protected:
|
||||
CRTP & self() { return static_cast<CRTP &>(*this); }
|
||||
const CRTP & self() const { return const_cast<VolnitskyBase *>(this)->self(); }
|
||||
|
||||
static Ngram toNGram(const UInt8 * const pos)
|
||||
{
|
||||
return unalignedLoad<Ngram>(pos);
|
||||
}
|
||||
|
||||
void putNGramBase(const Ngram ngram, const int offset)
|
||||
{
|
||||
/// Put the offset for the n-gram in the corresponding cell or the nearest free cell.
|
||||
size_t cell_num = ngram % hash_size;
|
||||
|
||||
while (hash[cell_num])
|
||||
cell_num = (cell_num + 1) % hash_size; /// Search for the next free cell.
|
||||
|
||||
hash[cell_num] = offset;
|
||||
}
|
||||
|
||||
void putNGramASCIICaseInsensitive(const UInt8 * const pos, const int offset)
|
||||
template <typename Callback>
|
||||
static inline void putNGramASCIICaseInsensitive(const UInt8 * const pos, const int offset, const Callback & putNGramBase)
|
||||
{
|
||||
struct Chars
|
||||
{
|
||||
@ -186,74 +109,21 @@ protected:
|
||||
/// 1 combination: 01
|
||||
putNGramBase(n, offset);
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
template <bool CaseSensitive, bool ASCII> struct VolnitskyImpl;
|
||||
|
||||
/// Case sensitive comparison
|
||||
template <bool ASCII> struct VolnitskyImpl<true, ASCII> : VolnitskyBase<VolnitskyImpl<true, ASCII>>
|
||||
{
|
||||
VolnitskyImpl(const char * const needle_, const size_t needle_size_, const size_t haystack_size_hint = 0)
|
||||
: VolnitskyBase<VolnitskyImpl<true, ASCII>>{needle_, needle_size_, haystack_size_hint},
|
||||
fallback_searcher{needle_, needle_size_}
|
||||
template <bool CaseSensitive, bool ASCII, typename Callback>
|
||||
static inline void putNGram(const UInt8 * const pos, const int offset, [[maybe_unused]] const UInt8 * const begin, const Callback & putNGramBase)
|
||||
{
|
||||
}
|
||||
|
||||
void putNGram(const UInt8 * const pos, const int offset, const UInt8 * const /*begin*/)
|
||||
if constexpr (CaseSensitive)
|
||||
{
|
||||
this->putNGramBase(this->toNGram(pos), offset);
|
||||
putNGramBase(toNGram(pos), offset);
|
||||
}
|
||||
|
||||
bool compare(const UInt8 * const pos) const
|
||||
else
|
||||
{
|
||||
/// @todo: maybe just use memcmp for this case and rely on internal SSE optimization as in case with memcpy?
|
||||
return fallback_searcher.compare(pos);
|
||||
}
|
||||
|
||||
const UInt8 * search_fallback(const UInt8 * const haystack, const UInt8 * const haystack_end) const
|
||||
if constexpr (ASCII)
|
||||
{
|
||||
return fallback_searcher.search(haystack, haystack_end);
|
||||
putNGramASCIICaseInsensitive(pos, offset, putNGramBase);
|
||||
}
|
||||
|
||||
ASCIICaseSensitiveStringSearcher fallback_searcher;
|
||||
};
|
||||
|
||||
/// Case-insensitive ASCII
|
||||
template <> struct VolnitskyImpl<false, true> : VolnitskyBase<VolnitskyImpl<false, true>>
|
||||
{
|
||||
VolnitskyImpl(const char * const needle_, const size_t needle_size_, const size_t haystack_size_hint = 0)
|
||||
: VolnitskyBase{needle_, needle_size_, haystack_size_hint}, fallback_searcher{needle_, needle_size_}
|
||||
{
|
||||
}
|
||||
|
||||
void putNGram(const UInt8 * const pos, const int offset, const UInt8 * const /*begin*/)
|
||||
{
|
||||
putNGramASCIICaseInsensitive(pos, offset);
|
||||
}
|
||||
|
||||
bool compare(const UInt8 * const pos) const
|
||||
{
|
||||
return fallback_searcher.compare(pos);
|
||||
}
|
||||
|
||||
const UInt8 * search_fallback(const UInt8 * const haystack, const UInt8 * const haystack_end) const
|
||||
{
|
||||
return fallback_searcher.search(haystack, haystack_end);
|
||||
}
|
||||
|
||||
ASCIICaseInsensitiveStringSearcher fallback_searcher;
|
||||
};
|
||||
|
||||
/// Case-sensitive UTF-8
|
||||
template <> struct VolnitskyImpl<false, false> : VolnitskyBase<VolnitskyImpl<false, false>>
|
||||
{
|
||||
VolnitskyImpl(const char * const needle_, const size_t needle_size_, const size_t haystack_size_hint = 0)
|
||||
: VolnitskyBase{needle_, needle_size_, haystack_size_hint}, fallback_searcher{needle_, needle_size_}
|
||||
{
|
||||
}
|
||||
|
||||
void putNGram(const UInt8 * const pos, const int offset, const UInt8 * const begin)
|
||||
else
|
||||
{
|
||||
struct Chars
|
||||
{
|
||||
@ -263,16 +133,14 @@ template <> struct VolnitskyImpl<false, false> : VolnitskyBase<VolnitskyImpl<fal
|
||||
|
||||
union
|
||||
{
|
||||
Ngram n;
|
||||
VolnitskyTraits::Ngram n;
|
||||
Chars chars;
|
||||
};
|
||||
|
||||
n = toNGram(pos);
|
||||
|
||||
if (isascii(chars.c0) && isascii(chars.c1))
|
||||
{
|
||||
putNGramASCIICaseInsensitive(pos, offset);
|
||||
}
|
||||
putNGramASCIICaseInsensitive(pos, offset, putNGramBase);
|
||||
else
|
||||
{
|
||||
/** n-gram (in the case of n = 2)
|
||||
@ -435,25 +303,389 @@ template <> struct VolnitskyImpl<false, false> : VolnitskyBase<VolnitskyImpl<fal
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
bool compare(const UInt8 * const pos) const
|
||||
|
||||
/// @todo store lowercase needle to speed up in case there are numerous occurrences of bigrams from needle in haystack
|
||||
template <bool CaseSensitive, bool ASCII, typename FallbackSearcher>
|
||||
class VolnitskyBase
|
||||
{
|
||||
protected:
|
||||
const UInt8 * const needle;
|
||||
const size_t needle_size;
|
||||
const UInt8 * const needle_end = needle + needle_size;
|
||||
/// For how long we move, if the n-gram from haystack is not found in the hash table.
|
||||
const size_t step = needle_size - sizeof(VolnitskyTraits::Ngram) + 1;
|
||||
|
||||
/** max needle length is 255, max distinct ngrams for case-sensitive is (255 - 1), case-insensitive is 4 * (255 - 1)
|
||||
* storage of 64K ngrams (n = 2, 128 KB) should be large enough for both cases */
|
||||
VolnitskyTraits::Offset hash[VolnitskyTraits::hash_size]; /// Hash table.
|
||||
|
||||
const bool fallback; /// Do we need to use the fallback algorithm.
|
||||
|
||||
FallbackSearcher fallback_searcher;
|
||||
|
||||
public:
|
||||
/** haystack_size_hint - the expected total size of the haystack for `search` calls. Optional (zero means unspecified).
|
||||
* If you specify it small enough, the fallback algorithm will be used,
|
||||
* since it is considered that it's useless to waste time initializing the hash table.
|
||||
*/
|
||||
VolnitskyBase(const char * const needle, const size_t needle_size, size_t haystack_size_hint = 0)
|
||||
: needle{reinterpret_cast<const UInt8 *>(needle)}
|
||||
, needle_size{needle_size}
|
||||
, fallback{VolnitskyTraits::isFallbackNeedle(needle_size, haystack_size_hint)}
|
||||
, fallback_searcher{needle, needle_size}
|
||||
{
|
||||
return fallback_searcher.compare(pos);
|
||||
if (fallback)
|
||||
return;
|
||||
|
||||
memset(hash, 0, sizeof(hash));
|
||||
|
||||
auto callback = [this](const VolnitskyTraits::Ngram ngram, const int offset) { return this->putNGramBase(ngram, offset); };
|
||||
/// ssize_t is used here because unsigned can't be used with condition like `i >= 0`, unsigned always >= 0
|
||||
for (auto i = static_cast<ssize_t>(needle_size - sizeof(VolnitskyTraits::Ngram)); i >= 0; --i)
|
||||
VolnitskyTraits::putNGram<CaseSensitive, ASCII>(this->needle + i, i + 1, this->needle, callback);
|
||||
}
|
||||
|
||||
const UInt8 * search_fallback(const UInt8 * const haystack, const UInt8 * const haystack_end) const
|
||||
|
||||
/// If not found, the end of the haystack is returned.
|
||||
const UInt8 * search(const UInt8 * const haystack, const size_t haystack_size) const
|
||||
{
|
||||
if (needle_size == 0)
|
||||
return haystack;
|
||||
|
||||
const auto haystack_end = haystack + haystack_size;
|
||||
|
||||
if (fallback || haystack_size <= needle_size)
|
||||
return fallback_searcher.search(haystack, haystack_end);
|
||||
|
||||
/// Let's "apply" the needle to the haystack and compare the n-gram from the end of the needle.
|
||||
const auto * pos = haystack + needle_size - sizeof(VolnitskyTraits::Ngram);
|
||||
for (; pos <= haystack_end - needle_size; pos += step)
|
||||
{
|
||||
/// We look at all the cells of the hash table that can correspond to the n-gram from haystack.
|
||||
for (size_t cell_num = VolnitskyTraits::toNGram(pos) % VolnitskyTraits::hash_size; hash[cell_num];
|
||||
cell_num = (cell_num + 1) % VolnitskyTraits::hash_size)
|
||||
{
|
||||
/// When found - compare bytewise, using the offset from the hash table.
|
||||
const auto res = pos - (hash[cell_num] - 1);
|
||||
|
||||
/// pointer in the code is always padded array so we can use pagesafe semantics
|
||||
if (fallback_searcher.compare(res))
|
||||
return res;
|
||||
}
|
||||
}
|
||||
|
||||
UTF8CaseInsensitiveStringSearcher fallback_searcher;
|
||||
return fallback_searcher.search(pos - step + 1, haystack_end);
|
||||
}
|
||||
|
||||
const char * search(const char * haystack, size_t haystack_size) const
|
||||
{
|
||||
return reinterpret_cast<const char *>(search(reinterpret_cast<const UInt8 *>(haystack), haystack_size));
|
||||
}
|
||||
|
||||
protected:
|
||||
void putNGramBase(const VolnitskyTraits::Ngram ngram, const int offset)
|
||||
{
|
||||
/// Put the offset for the n-gram in the corresponding cell or the nearest free cell.
|
||||
size_t cell_num = ngram % VolnitskyTraits::hash_size;
|
||||
|
||||
while (hash[cell_num])
|
||||
cell_num = (cell_num + 1) % VolnitskyTraits::hash_size; /// Search for the next free cell.
|
||||
|
||||
hash[cell_num] = offset;
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
using Volnitsky = VolnitskyImpl<true, true>;
|
||||
using VolnitskyUTF8 = VolnitskyImpl<true, false>; /// exactly same as Volnitsky
|
||||
using VolnitskyCaseInsensitive = VolnitskyImpl<false, true>; /// ignores non-ASCII bytes
|
||||
using VolnitskyCaseInsensitiveUTF8 = VolnitskyImpl<false, false>;
|
||||
template <bool CaseSensitive, bool ASCII, typename FallbackSearcher>
|
||||
class MultiVolnitskyBase
|
||||
{
|
||||
private:
|
||||
/// needles and their offsets
|
||||
const std::vector<StringRef> & needles;
|
||||
|
||||
|
||||
/// fallback searchers
|
||||
std::vector<size_t> fallback_needles;
|
||||
std::vector<FallbackSearcher> fallback_searchers;
|
||||
|
||||
/// because std::pair<> is not POD
|
||||
struct OffsetId
|
||||
{
|
||||
VolnitskyTraits::Id id;
|
||||
VolnitskyTraits::Offset off;
|
||||
};
|
||||
|
||||
OffsetId hash[VolnitskyTraits::hash_size];
|
||||
|
||||
/// step for each bunch of strings
|
||||
size_t step;
|
||||
|
||||
/// last index of offsets that was not processed
|
||||
size_t last;
|
||||
|
||||
/// limit for adding to hashtable. In worst case with case insentive search, the table will be filled at most as half
|
||||
static constexpr size_t small_limit = VolnitskyTraits::hash_size / 8;
|
||||
|
||||
public:
|
||||
MultiVolnitskyBase(const std::vector<StringRef> & needles_) : needles{needles_}, step{0}, last{0}
|
||||
{
|
||||
fallback_searchers.reserve(needles.size());
|
||||
}
|
||||
|
||||
template <typename ResultType, typename AnsCallback>
|
||||
void searchAll(
|
||||
const ColumnString::Chars & haystack_data,
|
||||
const ColumnString::Offsets & haystack_offsets,
|
||||
const AnsCallback & ansCallback,
|
||||
ResultType & ans)
|
||||
{
|
||||
const size_t haystack_string_size = haystack_offsets.size();
|
||||
const size_t needles_size = needles.size();
|
||||
|
||||
/// something can be uninitialized after
|
||||
std::fill(ans.begin(), ans.end(), 0);
|
||||
|
||||
while (!reset())
|
||||
{
|
||||
size_t fallback_size = fallback_needles.size();
|
||||
size_t prev_offset = 0;
|
||||
for (size_t j = 0, from = 0; j < haystack_string_size; ++j, from += needles_size)
|
||||
{
|
||||
const auto * haystack = &haystack_data[prev_offset];
|
||||
const auto * haystack_end = haystack + haystack_offsets[j] - prev_offset - 1;
|
||||
for (size_t i = 0; i < fallback_size; ++i)
|
||||
{
|
||||
const UInt8 * ptr = fallback_searchers[fallback_needles[i]].search(haystack, haystack_end);
|
||||
if (ptr != haystack_end)
|
||||
ans[from + fallback_needles[i]] = ansCallback(haystack, ptr);
|
||||
}
|
||||
|
||||
/// check if we have one non empty volnitsky searcher
|
||||
if (step != std::numeric_limits<size_t>::max())
|
||||
{
|
||||
const auto * pos = haystack + step - sizeof(VolnitskyTraits::Ngram);
|
||||
for (; pos <= haystack_end - sizeof(VolnitskyTraits::Ngram); pos += step)
|
||||
{
|
||||
for (size_t cell_num = VolnitskyTraits::toNGram(pos) % VolnitskyTraits::hash_size; hash[cell_num].off;
|
||||
cell_num = (cell_num + 1) % VolnitskyTraits::hash_size)
|
||||
{
|
||||
if (pos >= haystack + hash[cell_num].off - 1)
|
||||
{
|
||||
const auto * res = pos - (hash[cell_num].off - 1);
|
||||
const size_t ind = hash[cell_num].id;
|
||||
if (ans[from + ind] == 0 && res + needles[ind].size <= haystack_end)
|
||||
{
|
||||
if (fallback_searchers[ind].compare(res))
|
||||
{
|
||||
ans[from + ind] = ansCallback(haystack, res);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
prev_offset = haystack_offsets[j];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
template <typename ResultType>
|
||||
void search(const ColumnString::Chars & haystack_data, const ColumnString::Offsets & haystack_offsets, ResultType & ans)
|
||||
{
|
||||
auto callback = [this](const UInt8 * haystack, const UInt8 * haystack_end) -> bool
|
||||
{
|
||||
return this->searchOne(haystack, haystack_end);
|
||||
};
|
||||
searchInternal(haystack_data, haystack_offsets, callback, ans);
|
||||
}
|
||||
|
||||
template <typename ResultType>
|
||||
void searchIndex(const ColumnString::Chars & haystack_data, const ColumnString::Offsets & haystack_offsets, ResultType & ans)
|
||||
{
|
||||
auto callback = [this](const UInt8 * haystack, const UInt8 * haystack_end) -> size_t
|
||||
{
|
||||
return this->searchOneIndex(haystack, haystack_end);
|
||||
};
|
||||
searchInternal(haystack_data, haystack_offsets, callback, ans);
|
||||
}
|
||||
|
||||
private:
|
||||
/**
|
||||
* This function is needed to initialize hash table
|
||||
* Returns `true` if there is nothing to initialize
|
||||
* and `false` if we have something to initialize and initializes it.
|
||||
* This function is a kind of fallback if there are many needles.
|
||||
* We actually destroy the hash table and initialize it with uninitialized needles
|
||||
* and search through the haystack again.
|
||||
* The actual usage of this function is like this:
|
||||
* while (!reset())
|
||||
* {
|
||||
* search inside the haystack with the known needles
|
||||
* }
|
||||
*/
|
||||
bool reset()
|
||||
{
|
||||
if (last == needles.size())
|
||||
return true;
|
||||
|
||||
memset(hash, 0, sizeof(hash));
|
||||
fallback_needles.clear();
|
||||
step = std::numeric_limits<size_t>::max();
|
||||
|
||||
size_t buf = 0;
|
||||
size_t size = needles.size();
|
||||
|
||||
for (; last < size; ++last)
|
||||
{
|
||||
const char * cur_needle_data = needles[last].data;
|
||||
const size_t cur_needle_size = needles[last].size;
|
||||
|
||||
/// save the indices of fallback searchers
|
||||
if (VolnitskyTraits::isFallbackNeedle(cur_needle_size))
|
||||
{
|
||||
fallback_needles.push_back(last);
|
||||
}
|
||||
else
|
||||
{
|
||||
/// put all bigrams
|
||||
auto callback = [this](const VolnitskyTraits::Ngram ngram, const int offset)
|
||||
{
|
||||
return this->putNGramBase(ngram, offset, this->last);
|
||||
};
|
||||
|
||||
buf += cur_needle_size - sizeof(VolnitskyTraits::Ngram) + 1;
|
||||
|
||||
/// this is the condition when we actually need to stop and start searching with known needles
|
||||
if (buf > small_limit)
|
||||
break;
|
||||
|
||||
step = std::min(step, cur_needle_size - sizeof(VolnitskyTraits::Ngram) + 1);
|
||||
for (auto i = static_cast<int>(cur_needle_size - sizeof(VolnitskyTraits::Ngram)); i >= 0; --i)
|
||||
{
|
||||
VolnitskyTraits::putNGram<CaseSensitive, ASCII>(
|
||||
reinterpret_cast<const UInt8 *>(cur_needle_data) + i,
|
||||
i + 1,
|
||||
reinterpret_cast<const UInt8 *>(cur_needle_data),
|
||||
callback);
|
||||
}
|
||||
}
|
||||
fallback_searchers.emplace_back(cur_needle_data, cur_needle_size);
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
template <typename OneSearcher, typename ResultType>
|
||||
inline void searchInternal(
|
||||
const ColumnString::Chars & haystack_data,
|
||||
const ColumnString::Offsets & haystack_offsets,
|
||||
const OneSearcher & searchFallback,
|
||||
ResultType & ans)
|
||||
{
|
||||
const size_t haystack_string_size = haystack_offsets.size();
|
||||
while (!reset())
|
||||
{
|
||||
size_t prev_offset = 0;
|
||||
for (size_t j = 0; j < haystack_string_size; ++j)
|
||||
{
|
||||
const auto * haystack = &haystack_data[prev_offset];
|
||||
const auto * haystack_end = haystack + haystack_offsets[j] - prev_offset - 1;
|
||||
ans[j] = searchFallback(haystack, haystack_end);
|
||||
prev_offset = haystack_offsets[j];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
inline bool searchOne(const UInt8 * haystack, const UInt8 * haystack_end) const
|
||||
{
|
||||
const size_t fallback_size = fallback_needles.size();
|
||||
for (size_t i = 0; i < fallback_size; ++i)
|
||||
if (fallback_searchers[fallback_needles[i]].search(haystack, haystack_end) != haystack_end)
|
||||
return true;
|
||||
|
||||
/// check if we have one non empty volnitsky searcher
|
||||
if (step != std::numeric_limits<size_t>::max())
|
||||
{
|
||||
const auto * pos = haystack + step - sizeof(VolnitskyTraits::Ngram);
|
||||
for (; pos <= haystack_end - sizeof(VolnitskyTraits::Ngram); pos += step)
|
||||
{
|
||||
for (size_t cell_num = VolnitskyTraits::toNGram(pos) % VolnitskyTraits::hash_size; hash[cell_num].off;
|
||||
cell_num = (cell_num + 1) % VolnitskyTraits::hash_size)
|
||||
{
|
||||
if (pos >= haystack + hash[cell_num].off - 1)
|
||||
{
|
||||
const auto res = pos - (hash[cell_num].off - 1);
|
||||
const size_t ind = hash[cell_num].id;
|
||||
if (res + needles[ind].size <= haystack_end && fallback_searchers[ind].compare(res))
|
||||
return true;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
inline size_t searchOneIndex(const UInt8 * haystack, const UInt8 * haystack_end) const
|
||||
{
|
||||
const size_t fallback_size = fallback_needles.size();
|
||||
|
||||
size_t ans = std::numeric_limits<size_t>::max();
|
||||
|
||||
for (size_t i = 0; i < fallback_size; ++i)
|
||||
if (fallback_searchers[fallback_needles[i]].search(haystack, haystack_end) != haystack_end)
|
||||
ans = std::min(ans, fallback_needles[i]);
|
||||
|
||||
/// check if we have one non empty volnitsky searcher
|
||||
if (step != std::numeric_limits<size_t>::max())
|
||||
{
|
||||
const auto * pos = haystack + step - sizeof(VolnitskyTraits::Ngram);
|
||||
for (; pos <= haystack_end - sizeof(VolnitskyTraits::Ngram); pos += step)
|
||||
{
|
||||
for (size_t cell_num = VolnitskyTraits::toNGram(pos) % VolnitskyTraits::hash_size; hash[cell_num].off;
|
||||
cell_num = (cell_num + 1) % VolnitskyTraits::hash_size)
|
||||
{
|
||||
if (pos >= haystack + hash[cell_num].off - 1)
|
||||
{
|
||||
const auto res = pos - (hash[cell_num].off - 1);
|
||||
const size_t ind = hash[cell_num].id;
|
||||
if (res + needles[ind].size <= haystack_end && fallback_searchers[ind].compare(res))
|
||||
ans = std::min(ans, ind);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* if nothing was found, ans + 1 will be equal to zero and we can
|
||||
* assign it into the result because we need to return the position starting with one
|
||||
*/
|
||||
return ans + 1;
|
||||
}
|
||||
|
||||
void putNGramBase(const VolnitskyTraits::Ngram ngram, const int offset, const size_t num)
|
||||
{
|
||||
size_t cell_num = ngram % VolnitskyTraits::hash_size;
|
||||
|
||||
while (hash[cell_num].off)
|
||||
cell_num = (cell_num + 1) % VolnitskyTraits::hash_size;
|
||||
|
||||
hash[cell_num] = {static_cast<VolnitskyTraits::Id>(num), static_cast<VolnitskyTraits::Offset>(offset)};
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
using Volnitsky = VolnitskyBase<true, true, ASCIICaseSensitiveStringSearcher>;
|
||||
using VolnitskyUTF8 = VolnitskyBase<true, false, ASCIICaseSensitiveStringSearcher>; /// exactly same as Volnitsky
|
||||
using VolnitskyCaseInsensitive = VolnitskyBase<false, true, ASCIICaseInsensitiveStringSearcher>; /// ignores non-ASCII bytes
|
||||
using VolnitskyCaseInsensitiveUTF8 = VolnitskyBase<false, false, UTF8CaseInsensitiveStringSearcher>;
|
||||
|
||||
using MultiVolnitsky = MultiVolnitskyBase<true, true, ASCIICaseSensitiveStringSearcher>;
|
||||
using MultiVolnitskyUTF8 = MultiVolnitskyBase<true, false, ASCIICaseSensitiveStringSearcher>;
|
||||
using MultiVolnitskyCaseInsensitive = MultiVolnitskyBase<false, true, ASCIICaseInsensitiveStringSearcher>;
|
||||
using MultiVolnitskyCaseInsensitiveUTF8 = MultiVolnitskyBase<false, false, UTF8CaseInsensitiveStringSearcher>;
|
||||
|
||||
|
||||
}
|
||||
|
@ -61,6 +61,7 @@ struct Request
|
||||
{
|
||||
Request() = default;
|
||||
Request(const Request &) = default;
|
||||
Request & operator=(const Request &) = default;
|
||||
virtual ~Request() = default;
|
||||
virtual String getPath() const = 0;
|
||||
virtual void addRootPath(const String & /* root_path */) {}
|
||||
@ -76,6 +77,7 @@ struct Response
|
||||
int32_t error = 0;
|
||||
Response() = default;
|
||||
Response(const Response &) = default;
|
||||
Response & operator=(const Response &) = default;
|
||||
virtual ~Response() = default;
|
||||
virtual void removeRootPath(const String & /* root_path */) {}
|
||||
};
|
||||
|
@ -41,7 +41,7 @@
|
||||
* - extremely creepy code for implementation of "chroot" feature.
|
||||
*
|
||||
* As of 2018, there are no active maintainers of libzookeeper:
|
||||
* - bugs in JIRA are fixed only occasionaly with ad-hoc patches by library users.
|
||||
* - bugs in JIRA are fixed only occasionally with ad-hoc patches by library users.
|
||||
*
|
||||
* libzookeeper is a classical example of bad code written in C.
|
||||
*
|
||||
|
@ -5,7 +5,7 @@
|
||||
|
||||
/// A tool for reproducing https://issues.apache.org/jira/browse/ZOOKEEPER-706
|
||||
/// Original libzookeeper can't reconnect the session if the length of SET_WATCHES message
|
||||
/// exceeeds jute.maxbuffer (0xfffff by default).
|
||||
/// exceeds jute.maxbuffer (0xfffff by default).
|
||||
/// This happens when the number of watches exceeds ~29000.
|
||||
///
|
||||
/// Session reconnect can be caused by forbidding packets to the current zookeeper server, e.g.
|
||||
|
@ -43,7 +43,7 @@ CompressedWriteBuffer::CompressedWriteBuffer(
|
||||
WriteBuffer & out_,
|
||||
CompressionCodecPtr codec_,
|
||||
size_t buf_size)
|
||||
: BufferWithOwnMemory<WriteBuffer>(buf_size), out(out_), codec(codec_)
|
||||
: BufferWithOwnMemory<WriteBuffer>(buf_size), out(out_), codec(std::move(codec_))
|
||||
{
|
||||
}
|
||||
|
||||
|
156
dbms/src/Compression/CompressionCodecDelta.cpp
Normal file
156
dbms/src/Compression/CompressionCodecDelta.cpp
Normal file
@ -0,0 +1,156 @@
|
||||
#include <Compression/CompressionCodecDelta.h>
|
||||
#include <Compression/CompressionInfo.h>
|
||||
#include <Compression/CompressionFactory.h>
|
||||
#include <common/unaligned.h>
|
||||
#include <Parsers/IAST.h>
|
||||
#include <Parsers/ASTLiteral.h>
|
||||
#include <IO/WriteHelpers.h>
|
||||
#include <cstdlib>
|
||||
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
namespace ErrorCodes
|
||||
{
|
||||
extern const int CANNOT_COMPRESS;
|
||||
extern const int CANNOT_DECOMPRESS;
|
||||
extern const int ILLEGAL_SYNTAX_FOR_CODEC_TYPE;
|
||||
extern const int ILLEGAL_CODEC_PARAMETER;
|
||||
}
|
||||
|
||||
CompressionCodecDelta::CompressionCodecDelta(UInt8 delta_bytes_size_)
|
||||
: delta_bytes_size(delta_bytes_size_)
|
||||
{
|
||||
}
|
||||
|
||||
UInt8 CompressionCodecDelta::getMethodByte() const
|
||||
{
|
||||
return static_cast<UInt8>(CompressionMethodByte::Delta);
|
||||
}
|
||||
|
||||
String CompressionCodecDelta::getCodecDesc() const
|
||||
{
|
||||
return "Delta(" + toString(delta_bytes_size) + ")";
|
||||
}
|
||||
|
||||
namespace
|
||||
{
|
||||
|
||||
template <typename T>
|
||||
void compressDataForType(const char * source, UInt32 source_size, char * dest)
|
||||
{
|
||||
if (source_size % sizeof(T) != 0)
|
||||
throw Exception("Cannot delta compress, data size " + toString(source_size) + " is not aligned to " + toString(sizeof(T)), ErrorCodes::CANNOT_COMPRESS);
|
||||
|
||||
T prev_src{};
|
||||
const char * source_end = source + source_size;
|
||||
while (source < source_end)
|
||||
{
|
||||
T curr_src = unalignedLoad<T>(source);
|
||||
unalignedStore(dest, curr_src - prev_src);
|
||||
prev_src = curr_src;
|
||||
|
||||
source += sizeof(T);
|
||||
dest += sizeof(T);
|
||||
}
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
void decompressDataForType(const char * source, UInt32 source_size, char * dest)
|
||||
{
|
||||
if (source_size % sizeof(T) != 0)
|
||||
throw Exception("Cannot delta decompress, data size " + toString(source_size) + " is not aligned to " + toString(sizeof(T)), ErrorCodes::CANNOT_DECOMPRESS);
|
||||
|
||||
T accumulator{};
|
||||
const char * source_end = source + source_size;
|
||||
while (source < source_end)
|
||||
{
|
||||
accumulator += unalignedLoad<T>(source);
|
||||
unalignedStore(dest, accumulator);
|
||||
|
||||
source += sizeof(T);
|
||||
dest += sizeof(T);
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
UInt32 CompressionCodecDelta::doCompressData(const char * source, UInt32 source_size, char * dest) const
|
||||
{
|
||||
UInt8 bytes_to_skip = source_size % delta_bytes_size;
|
||||
dest[0] = delta_bytes_size;
|
||||
dest[1] = bytes_to_skip;
|
||||
memcpy(&dest[2], source, bytes_to_skip);
|
||||
size_t start_pos = 2 + bytes_to_skip;
|
||||
switch (delta_bytes_size)
|
||||
{
|
||||
case 1:
|
||||
compressDataForType<UInt8>(&source[bytes_to_skip], source_size - bytes_to_skip, &dest[start_pos]);
|
||||
break;
|
||||
case 2:
|
||||
compressDataForType<UInt16>(&source[bytes_to_skip], source_size - bytes_to_skip, &dest[start_pos]);
|
||||
break;
|
||||
case 4:
|
||||
compressDataForType<UInt32>(&source[bytes_to_skip], source_size - bytes_to_skip, &dest[start_pos]);
|
||||
break;
|
||||
case 8:
|
||||
compressDataForType<UInt64>(&source[bytes_to_skip], source_size - bytes_to_skip, &dest[start_pos]);
|
||||
break;
|
||||
}
|
||||
return 1 + 1 + source_size;
|
||||
}
|
||||
|
||||
void CompressionCodecDelta::doDecompressData(const char * source, UInt32 source_size, char * dest, UInt32 /* uncompressed_size */) const
|
||||
{
|
||||
UInt8 bytes_size = source[0];
|
||||
UInt8 bytes_to_skip = source[1];
|
||||
|
||||
memcpy(dest, &source[2], bytes_to_skip);
|
||||
UInt32 source_size_no_header = source_size - bytes_to_skip - 2;
|
||||
switch (bytes_size)
|
||||
{
|
||||
case 1:
|
||||
decompressDataForType<UInt8>(&source[2 + bytes_to_skip], source_size_no_header, &dest[bytes_to_skip]);
|
||||
break;
|
||||
case 2:
|
||||
decompressDataForType<UInt16>(&source[2 + bytes_to_skip], source_size_no_header, &dest[bytes_to_skip]);
|
||||
break;
|
||||
case 4:
|
||||
decompressDataForType<UInt32>(&source[2 + bytes_to_skip], source_size_no_header, &dest[bytes_to_skip]);
|
||||
break;
|
||||
case 8:
|
||||
decompressDataForType<UInt64>(&source[2 + bytes_to_skip], source_size_no_header, &dest[bytes_to_skip]);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
void registerCodecDelta(CompressionCodecFactory & factory)
|
||||
{
|
||||
UInt8 method_code = UInt8(CompressionMethodByte::Delta);
|
||||
factory.registerCompressionCodecWithType("Delta", method_code, [&](const ASTPtr & arguments, DataTypePtr column_type) -> CompressionCodecPtr
|
||||
{
|
||||
UInt8 delta_bytes_size = 1;
|
||||
if (column_type && column_type->haveMaximumSizeOfValue())
|
||||
{
|
||||
size_t max_size = column_type->getSizeOfValueInMemory();
|
||||
if (max_size == 1 || max_size == 2 || max_size == 4 || max_size == 8)
|
||||
delta_bytes_size = static_cast<UInt8>(max_size);
|
||||
}
|
||||
|
||||
if (arguments && !arguments->children.empty())
|
||||
{
|
||||
if (arguments->children.size() > 1)
|
||||
throw Exception("Delta codec must have 1 parameter, given " + std::to_string(arguments->children.size()), ErrorCodes::ILLEGAL_SYNTAX_FOR_CODEC_TYPE);
|
||||
|
||||
const auto children = arguments->children;
|
||||
const ASTLiteral * literal = static_cast<const ASTLiteral *>(children[0].get());
|
||||
size_t user_bytes_size = literal->value.safeGet<UInt64>();
|
||||
if (user_bytes_size != 1 && user_bytes_size != 2 && user_bytes_size != 4 && user_bytes_size != 8)
|
||||
throw Exception("Delta value for delta codec can be 1, 2, 4 or 8, given " + toString(user_bytes_size), ErrorCodes::ILLEGAL_CODEC_PARAMETER);
|
||||
delta_bytes_size = static_cast<UInt8>(user_bytes_size);
|
||||
}
|
||||
return std::make_shared<CompressionCodecDelta>(delta_bytes_size);
|
||||
});
|
||||
}
|
||||
}
|
26
dbms/src/Compression/CompressionCodecDelta.h
Normal file
26
dbms/src/Compression/CompressionCodecDelta.h
Normal file
@ -0,0 +1,26 @@
|
||||
#pragma once
|
||||
|
||||
#include <Compression/ICompressionCodec.h>
|
||||
namespace DB
|
||||
{
|
||||
|
||||
class CompressionCodecDelta : public ICompressionCodec
|
||||
{
|
||||
public:
|
||||
CompressionCodecDelta(UInt8 delta_bytes_size_);
|
||||
|
||||
UInt8 getMethodByte() const override;
|
||||
|
||||
String getCodecDesc() const override;
|
||||
|
||||
protected:
|
||||
UInt32 doCompressData(const char * source, UInt32 source_size, char * dest) const override;
|
||||
|
||||
void doDecompressData(const char * source, UInt32 source_size, char * dest, UInt32 uncompressed_size) const override;
|
||||
|
||||
UInt32 getMaxCompressedDataSize(UInt32 uncompressed_size) const override { return uncompressed_size + 2; }
|
||||
private:
|
||||
const UInt8 delta_bytes_size;
|
||||
};
|
||||
}
|
||||
|
@ -8,6 +8,7 @@
|
||||
#include "CompressionCodecLZ4.h"
|
||||
#include <Parsers/IAST.h>
|
||||
#include <Parsers/ASTLiteral.h>
|
||||
#include <IO/WriteHelpers.h>
|
||||
|
||||
#ifdef __clang__
|
||||
#pragma clang diagnostic ignored "-Wold-style-cast"
|
||||
@ -35,7 +36,7 @@ String CompressionCodecLZ4::getCodecDesc() const
|
||||
return "LZ4";
|
||||
}
|
||||
|
||||
UInt32 CompressionCodecLZ4::getCompressedDataSize(UInt32 uncompressed_size) const
|
||||
UInt32 CompressionCodecLZ4::getMaxCompressedDataSize(UInt32 uncompressed_size) const
|
||||
{
|
||||
return LZ4_COMPRESSBOUND(uncompressed_size);
|
||||
}
|
||||
@ -61,7 +62,7 @@ void registerCodecLZ4(CompressionCodecFactory & factory)
|
||||
|
||||
String CompressionCodecLZ4HC::getCodecDesc() const
|
||||
{
|
||||
return "LZ4HC";
|
||||
return "LZ4HC(" + toString(level) + ")";
|
||||
}
|
||||
|
||||
UInt32 CompressionCodecLZ4HC::doCompressData(const char * source, UInt32 source_size, char * dest) const
|
||||
|
@ -24,7 +24,7 @@ protected:
|
||||
private:
|
||||
void doDecompressData(const char * source, UInt32 source_size, char * dest, UInt32 uncompressed_size) const override;
|
||||
|
||||
UInt32 getCompressedDataSize(UInt32 uncompressed_size) const override;
|
||||
UInt32 getMaxCompressedDataSize(UInt32 uncompressed_size) const override;
|
||||
|
||||
mutable LZ4::PerformanceStatistics lz4_stat;
|
||||
};
|
||||
@ -41,7 +41,7 @@ protected:
|
||||
UInt32 doCompressData(const char * source, UInt32 source_size, char * dest) const override;
|
||||
|
||||
private:
|
||||
int level;
|
||||
const int level;
|
||||
};
|
||||
|
||||
}
|
||||
|
@ -5,6 +5,7 @@
|
||||
#include <IO/ReadHelpers.h>
|
||||
#include <IO/WriteHelpers.h>
|
||||
#include <Common/hex.h>
|
||||
#include <sstream>
|
||||
|
||||
|
||||
namespace DB
|
||||
@ -20,14 +21,16 @@ extern const int CORRUPTED_DATA;
|
||||
CompressionCodecMultiple::CompressionCodecMultiple(Codecs codecs)
|
||||
: codecs(codecs)
|
||||
{
|
||||
std::ostringstream ss;
|
||||
for (size_t idx = 0; idx < codecs.size(); idx++)
|
||||
{
|
||||
if (idx != 0)
|
||||
codec_desc = codec_desc + ',';
|
||||
ss << ',' << ' ';
|
||||
|
||||
const auto codec = codecs[idx];
|
||||
codec_desc = codec_desc + codec->getCodecDesc();
|
||||
ss << codec->getCodecDesc();
|
||||
}
|
||||
codec_desc = ss.str();
|
||||
}
|
||||
|
||||
UInt8 CompressionCodecMultiple::getMethodByte() const
|
||||
@ -40,7 +43,7 @@ String CompressionCodecMultiple::getCodecDesc() const
|
||||
return codec_desc;
|
||||
}
|
||||
|
||||
UInt32 CompressionCodecMultiple::getCompressedDataSize(UInt32 uncompressed_size) const
|
||||
UInt32 CompressionCodecMultiple::getMaxCompressedDataSize(UInt32 uncompressed_size) const
|
||||
{
|
||||
UInt32 compressed_size = uncompressed_size;
|
||||
for (auto & codec : codecs)
|
||||
|
@ -15,7 +15,7 @@ public:
|
||||
|
||||
String getCodecDesc() const override;
|
||||
|
||||
UInt32 getCompressedDataSize(UInt32 uncompressed_size) const override;
|
||||
UInt32 getMaxCompressedDataSize(UInt32 uncompressed_size) const override;
|
||||
|
||||
protected:
|
||||
UInt32 doCompressData(const char * source, UInt32 source_size, char * dest) const override;
|
||||
|
@ -28,10 +28,10 @@ UInt8 CompressionCodecZSTD::getMethodByte() const
|
||||
|
||||
String CompressionCodecZSTD::getCodecDesc() const
|
||||
{
|
||||
return "ZSTD";
|
||||
return "ZSTD(" + toString(level) + ")";
|
||||
}
|
||||
|
||||
UInt32 CompressionCodecZSTD::getCompressedDataSize(UInt32 uncompressed_size) const
|
||||
UInt32 CompressionCodecZSTD::getMaxCompressedDataSize(UInt32 uncompressed_size) const
|
||||
{
|
||||
return ZSTD_compressBound(uncompressed_size);
|
||||
}
|
||||
|
@ -19,7 +19,7 @@ public:
|
||||
|
||||
String getCodecDesc() const override;
|
||||
|
||||
UInt32 getCompressedDataSize(UInt32 uncompressed_size) const override;
|
||||
UInt32 getMaxCompressedDataSize(UInt32 uncompressed_size) const override;
|
||||
|
||||
protected:
|
||||
UInt32 doCompressData(const char * source, UInt32 source_size, char * dest) const override;
|
||||
@ -27,7 +27,7 @@ protected:
|
||||
void doDecompressData(const char * source, UInt32 source_size, char * dest, UInt32 uncompressed_size) const override;
|
||||
|
||||
private:
|
||||
int level;
|
||||
const int level;
|
||||
};
|
||||
|
||||
}
|
||||
|
@ -10,7 +10,6 @@
|
||||
#include <Parsers/queryToString.h>
|
||||
#include <Compression/CompressionCodecMultiple.h>
|
||||
#include <Compression/CompressionCodecLZ4.h>
|
||||
#include <Compression/CompressionCodecNone.h>
|
||||
#include <IO/WriteHelpers.h>
|
||||
|
||||
|
||||
@ -56,7 +55,7 @@ CompressionCodecPtr CompressionCodecFactory::get(const std::vector<CodecNameWith
|
||||
return std::make_shared<CompressionCodecMultiple>(result);
|
||||
}
|
||||
|
||||
CompressionCodecPtr CompressionCodecFactory::get(const ASTPtr & ast) const
|
||||
CompressionCodecPtr CompressionCodecFactory::get(const ASTPtr & ast, DataTypePtr column_type) const
|
||||
{
|
||||
if (const auto * func = typeid_cast<const ASTFunction *>(ast.get()))
|
||||
{
|
||||
@ -65,9 +64,9 @@ CompressionCodecPtr CompressionCodecFactory::get(const ASTPtr & ast) const
|
||||
for (const auto & inner_codec_ast : func->arguments->children)
|
||||
{
|
||||
if (const auto * family_name = typeid_cast<const ASTIdentifier *>(inner_codec_ast.get()))
|
||||
codecs.emplace_back(getImpl(family_name->name, {}));
|
||||
codecs.emplace_back(getImpl(family_name->name, {}, column_type));
|
||||
else if (const auto * ast_func = typeid_cast<const ASTFunction *>(inner_codec_ast.get()))
|
||||
codecs.emplace_back(getImpl(ast_func->name, ast_func->arguments));
|
||||
codecs.emplace_back(getImpl(ast_func->name, ast_func->arguments, column_type));
|
||||
else
|
||||
throw Exception("Unexpected AST element for compression codec", ErrorCodes::UNEXPECTED_AST_STRUCTURE);
|
||||
}
|
||||
@ -88,10 +87,11 @@ CompressionCodecPtr CompressionCodecFactory::get(const UInt8 byte_code) const
|
||||
if (family_code_and_creator == family_code_with_codec.end())
|
||||
throw Exception("Unknown codec family code : " + toString(byte_code), ErrorCodes::UNKNOWN_CODEC);
|
||||
|
||||
return family_code_and_creator->second({});
|
||||
return family_code_and_creator->second({}, nullptr);
|
||||
}
|
||||
|
||||
CompressionCodecPtr CompressionCodecFactory::getImpl(const String & family_name, const ASTPtr & arguments) const
|
||||
|
||||
CompressionCodecPtr CompressionCodecFactory::getImpl(const String & family_name, const ASTPtr & arguments, DataTypePtr column_type) const
|
||||
{
|
||||
if (family_name == "Multiple")
|
||||
throw Exception("Codec MULTIPLE cannot be specified directly", ErrorCodes::UNKNOWN_CODEC);
|
||||
@ -101,10 +101,13 @@ CompressionCodecPtr CompressionCodecFactory::getImpl(const String & family_name,
|
||||
if (family_and_creator == family_name_with_codec.end())
|
||||
throw Exception("Unknown codec family: " + family_name, ErrorCodes::UNKNOWN_CODEC);
|
||||
|
||||
return family_and_creator->second(arguments);
|
||||
return family_and_creator->second(arguments, column_type);
|
||||
}
|
||||
|
||||
void CompressionCodecFactory::registerCompressionCodec(const String & family_name, std::optional<UInt8> byte_code, Creator creator)
|
||||
void CompressionCodecFactory::registerCompressionCodecWithType(
|
||||
const String & family_name,
|
||||
std::optional<UInt8> byte_code,
|
||||
CreatorWithType creator)
|
||||
{
|
||||
if (creator == nullptr)
|
||||
throw Exception("CompressionCodecFactory: the codec family " + family_name + " has been provided a null constructor",
|
||||
@ -118,8 +121,18 @@ void CompressionCodecFactory::registerCompressionCodec(const String & family_nam
|
||||
throw Exception("CompressionCodecFactory: the codec family name '" + family_name + "' is not unique", ErrorCodes::LOGICAL_ERROR);
|
||||
}
|
||||
|
||||
void CompressionCodecFactory::registerSimpleCompressionCodec(const String & family_name, std::optional<UInt8> byte_code,
|
||||
std::function<CompressionCodecPtr()> creator)
|
||||
void CompressionCodecFactory::registerCompressionCodec(const String & family_name, std::optional<UInt8> byte_code, Creator creator)
|
||||
{
|
||||
registerCompressionCodecWithType(family_name, byte_code, [family_name, creator](const ASTPtr & ast, DataTypePtr /* data_type */)
|
||||
{
|
||||
return creator(ast);
|
||||
});
|
||||
}
|
||||
|
||||
void CompressionCodecFactory::registerSimpleCompressionCodec(
|
||||
const String & family_name,
|
||||
std::optional<UInt8> byte_code,
|
||||
SimpleCreator creator)
|
||||
{
|
||||
registerCompressionCodec(family_name, byte_code, [family_name, creator](const ASTPtr & ast)
|
||||
{
|
||||
@ -135,7 +148,7 @@ void registerCodecNone(CompressionCodecFactory & factory);
|
||||
void registerCodecZSTD(CompressionCodecFactory & factory);
|
||||
void registerCodecMultiple(CompressionCodecFactory & factory);
|
||||
void registerCodecLZ4HC(CompressionCodecFactory & factory);
|
||||
//void registerCodecDelta(CompressionCodecFactory & factory);
|
||||
void registerCodecDelta(CompressionCodecFactory & factory);
|
||||
|
||||
CompressionCodecFactory::CompressionCodecFactory()
|
||||
{
|
||||
@ -145,7 +158,7 @@ CompressionCodecFactory::CompressionCodecFactory()
|
||||
registerCodecZSTD(*this);
|
||||
registerCodecMultiple(*this);
|
||||
registerCodecLZ4HC(*this);
|
||||
// registerCodecDelta(*this);
|
||||
registerCodecDelta(*this);
|
||||
}
|
||||
|
||||
}
|
||||
|
@ -5,6 +5,7 @@
|
||||
#include <optional>
|
||||
#include <unordered_map>
|
||||
#include <ext/singleton.h>
|
||||
#include <DataTypes/IDataType.h>
|
||||
#include <Common/IFactoryWithAliases.h>
|
||||
#include <Compression/ICompressionCodec.h>
|
||||
#include <Compression/CompressionInfo.h>
|
||||
@ -28,16 +29,19 @@ class CompressionCodecFactory final : public ext::singleton<CompressionCodecFact
|
||||
{
|
||||
protected:
|
||||
using Creator = std::function<CompressionCodecPtr(const ASTPtr & parameters)>;
|
||||
using CreatorWithType = std::function<CompressionCodecPtr(const ASTPtr & parameters, DataTypePtr column_type)>;
|
||||
using SimpleCreator = std::function<CompressionCodecPtr()>;
|
||||
using CompressionCodecsDictionary = std::unordered_map<String, Creator>;
|
||||
using CompressionCodecsCodeDictionary = std::unordered_map<UInt8, Creator>;
|
||||
using CompressionCodecsDictionary = std::unordered_map<String, CreatorWithType>;
|
||||
using CompressionCodecsCodeDictionary = std::unordered_map<UInt8, CreatorWithType>;
|
||||
public:
|
||||
|
||||
/// Return default codec (currently LZ4)
|
||||
CompressionCodecPtr getDefaultCodec() const;
|
||||
|
||||
/// Get codec by AST
|
||||
CompressionCodecPtr get(const ASTPtr & ast) const;
|
||||
/// Get codec by AST and possible column_type
|
||||
/// some codecs can use information about type to improve inner settings
|
||||
/// but every codec should be able to work without information about type
|
||||
CompressionCodecPtr get(const ASTPtr & ast, DataTypePtr column_type=nullptr) const;
|
||||
|
||||
/// Get codec by method byte (no params available)
|
||||
CompressionCodecPtr get(const UInt8 byte_code) const;
|
||||
@ -46,6 +50,9 @@ public:
|
||||
CompressionCodecPtr get(const String & family_name, std::optional<int> level) const;
|
||||
|
||||
CompressionCodecPtr get(const std::vector<CodecNameWithLevel> & codecs) const;
|
||||
|
||||
/// Register codec with parameters and column type
|
||||
void registerCompressionCodecWithType(const String & family_name, std::optional<UInt8> byte_code, CreatorWithType creator);
|
||||
/// Register codec with parameters
|
||||
void registerCompressionCodec(const String & family_name, std::optional<UInt8> byte_code, Creator creator);
|
||||
|
||||
@ -53,7 +60,7 @@ public:
|
||||
void registerSimpleCompressionCodec(const String & family_name, std::optional<UInt8> byte_code, SimpleCreator creator);
|
||||
|
||||
protected:
|
||||
CompressionCodecPtr getImpl(const String & family_name, const ASTPtr & arguments) const;
|
||||
CompressionCodecPtr getImpl(const String & family_name, const ASTPtr & arguments, DataTypePtr column_type) const;
|
||||
|
||||
private:
|
||||
CompressionCodecsDictionary family_name_with_codec;
|
||||
|
@ -39,6 +39,7 @@ enum class CompressionMethodByte : uint8_t
|
||||
LZ4 = 0x82,
|
||||
ZSTD = 0x90,
|
||||
Multiple = 0x91,
|
||||
Delta = 0x92,
|
||||
};
|
||||
|
||||
}
|
||||
|
@ -41,7 +41,7 @@ public:
|
||||
virtual UInt32 decompress(char * source, UInt32 source_size, char * dest) const;
|
||||
|
||||
/// Number of bytes, that will be used to compress uncompressed_size bytes with current codec
|
||||
virtual UInt32 getCompressedReserveSize(UInt32 uncompressed_size) const { return getHeaderSize() + getCompressedDataSize(uncompressed_size); }
|
||||
virtual UInt32 getCompressedReserveSize(UInt32 uncompressed_size) const { return getHeaderSize() + getMaxCompressedDataSize(uncompressed_size); }
|
||||
|
||||
/// Some codecs (LZ4, for example) require additional bytes at end of buffer
|
||||
virtual UInt32 getAdditionalSizeAtTheEndOfBuffer() const { return 0; }
|
||||
@ -61,7 +61,7 @@ public:
|
||||
protected:
|
||||
|
||||
/// Return size of compressed data without header
|
||||
virtual UInt32 getCompressedDataSize(UInt32 uncompressed_size) const { return uncompressed_size; }
|
||||
virtual UInt32 getMaxCompressedDataSize(UInt32 uncompressed_size) const { return uncompressed_size; }
|
||||
|
||||
/// Actually compress data, without header
|
||||
virtual UInt32 doCompressData(const char * source, UInt32 source_size, char * dest) const = 0;
|
||||
|
@ -70,7 +70,7 @@ inline void copyOverlap8(UInt8 * op, const UInt8 *& match, const size_t offset)
|
||||
}
|
||||
|
||||
|
||||
#ifdef __x86_64__
|
||||
#if defined(__x86_64__) || defined(__PPC__)
|
||||
|
||||
/** We use 'xmm' (128bit SSE) registers here to shuffle 16 bytes.
|
||||
*
|
||||
@ -260,7 +260,7 @@ inline void copyOverlap16(UInt8 * op, const UInt8 *& match, const size_t offset)
|
||||
}
|
||||
|
||||
|
||||
#ifdef __x86_64__
|
||||
#if defined(__x86_64__) || defined(__PPC__)
|
||||
|
||||
inline void copyOverlap16Shuffle(UInt8 * op, const UInt8 *& match, const size_t offset)
|
||||
{
|
||||
|
@ -82,9 +82,9 @@
|
||||
#endif
|
||||
|
||||
|
||||
#define PLATFORM_NOT_SUPPORTED "The only supported platforms are x86_64 and AArch64 (work in progress)"
|
||||
#define PLATFORM_NOT_SUPPORTED "The only supported platforms are x86_64 and AArch64, PowerPC (work in progress)"
|
||||
|
||||
#if !defined(__x86_64__) && !defined(__aarch64__)
|
||||
#if !defined(__x86_64__) && !defined(__aarch64__) && !defined(__PPC__)
|
||||
// #error PLATFORM_NOT_SUPPORTED
|
||||
#endif
|
||||
|
||||
|
@ -1,6 +1,6 @@
|
||||
#pragma once
|
||||
|
||||
#include <DataStreams/IProfilingBlockInputStream.h>
|
||||
#include <DataStreams/IBlockInputStream.h>
|
||||
#include <Core/ColumnWithTypeAndName.h>
|
||||
|
||||
namespace DB
|
||||
@ -9,7 +9,7 @@ namespace DB
|
||||
/** Adds a materialized const column to the block with a specified value.
|
||||
*/
|
||||
template <typename T>
|
||||
class AddingConstColumnBlockInputStream : public IProfilingBlockInputStream
|
||||
class AddingConstColumnBlockInputStream : public IBlockInputStream
|
||||
{
|
||||
public:
|
||||
AddingConstColumnBlockInputStream(
|
||||
|
@ -1,6 +1,6 @@
|
||||
#pragma once
|
||||
|
||||
#include <DataStreams/IProfilingBlockInputStream.h>
|
||||
#include <DataStreams/IBlockInputStream.h>
|
||||
#include <Storages/ColumnDefault.h>
|
||||
#include <Interpreters/Context.h>
|
||||
|
||||
@ -9,7 +9,7 @@ namespace DB
|
||||
{
|
||||
|
||||
/// Adds defaults to columns using BlockDelayedDefaults bitmask attached to Block by child InputStream.
|
||||
class AddingDefaultsBlockInputStream : public IProfilingBlockInputStream
|
||||
class AddingDefaultsBlockInputStream : public IBlockInputStream
|
||||
{
|
||||
public:
|
||||
AddingDefaultsBlockInputStream(
|
||||
|
@ -1,6 +1,6 @@
|
||||
#pragma once
|
||||
|
||||
#include <DataStreams/IProfilingBlockInputStream.h>
|
||||
#include <DataStreams/IBlockInputStream.h>
|
||||
#include <Storages/ColumnDefault.h>
|
||||
|
||||
|
||||
@ -14,7 +14,7 @@ namespace DB
|
||||
* 3. Columns that materialized from other columns (materialized columns)
|
||||
* All three types of columns are materialized (not constants).
|
||||
*/
|
||||
class AddingMissedBlockInputStream : public IProfilingBlockInputStream
|
||||
class AddingMissedBlockInputStream : public IBlockInputStream
|
||||
{
|
||||
public:
|
||||
AddingMissedBlockInputStream(
|
||||
|
@ -3,7 +3,7 @@
|
||||
#include <Interpreters/Aggregator.h>
|
||||
#include <IO/ReadBufferFromFile.h>
|
||||
#include <Compression/CompressedReadBuffer.h>
|
||||
#include <DataStreams/IProfilingBlockInputStream.h>
|
||||
#include <DataStreams/IBlockInputStream.h>
|
||||
|
||||
|
||||
namespace DB
|
||||
@ -15,7 +15,7 @@ namespace DB
|
||||
* If final = false, the aggregate functions are not finalized, that is, they are not replaced by their value, but contain an intermediate state of calculations.
|
||||
* This is necessary so that aggregation can continue (for example, by combining streams of partially aggregated data).
|
||||
*/
|
||||
class AggregatingBlockInputStream : public IProfilingBlockInputStream
|
||||
class AggregatingBlockInputStream : public IBlockInputStream
|
||||
{
|
||||
public:
|
||||
/** keys are taken from the GROUP BY part of the query
|
||||
|
@ -2,7 +2,7 @@
|
||||
|
||||
#include <Poco/Event.h>
|
||||
|
||||
#include <DataStreams/IProfilingBlockInputStream.h>
|
||||
#include <DataStreams/IBlockInputStream.h>
|
||||
#include <Common/setThreadName.h>
|
||||
#include <Common/CurrentMetrics.h>
|
||||
#include <common/ThreadPool.h>
|
||||
@ -26,7 +26,7 @@ namespace DB
|
||||
* has come over the network with a request to interrupt the execution of the query.
|
||||
* It also allows you to execute multiple queries at the same time.
|
||||
*/
|
||||
class AsynchronousBlockInputStream : public IProfilingBlockInputStream
|
||||
class AsynchronousBlockInputStream : public IBlockInputStream
|
||||
{
|
||||
public:
|
||||
AsynchronousBlockInputStream(const BlockInputStreamPtr & in)
|
||||
|
@ -1,5 +1,5 @@
|
||||
#include <DataStreams/BlockStreamProfileInfo.h>
|
||||
#include <DataStreams/IProfilingBlockInputStream.h>
|
||||
#include <DataStreams/IBlockInputStream.h>
|
||||
|
||||
#include <IO/ReadHelpers.h>
|
||||
#include <IO/WriteHelpers.h>
|
||||
@ -77,7 +77,7 @@ void BlockStreamProfileInfo::collectInfosForStreamsWithName(const char * name, B
|
||||
return;
|
||||
}
|
||||
|
||||
parent->forEachProfilingChild([&] (IProfilingBlockInputStream & child)
|
||||
parent->forEachChild([&] (IBlockInputStream & child)
|
||||
{
|
||||
child.getProfileInfo().collectInfosForStreamsWithName(name, res);
|
||||
return false;
|
||||
@ -107,7 +107,7 @@ void BlockStreamProfileInfo::calculateRowsBeforeLimit() const
|
||||
|
||||
for (const BlockStreamProfileInfo * info_limit_or_sort : limits_or_sortings)
|
||||
{
|
||||
info_limit_or_sort->parent->forEachProfilingChild([&] (IProfilingBlockInputStream & child)
|
||||
info_limit_or_sort->parent->forEachChild([&] (IBlockInputStream & child)
|
||||
{
|
||||
rows_before_limit += child.getProfileInfo().rows;
|
||||
return false;
|
||||
|
@ -10,13 +10,13 @@ namespace DB
|
||||
class Block;
|
||||
class ReadBuffer;
|
||||
class WriteBuffer;
|
||||
class IProfilingBlockInputStream;
|
||||
class IBlockInputStream;
|
||||
|
||||
/// Information for profiling. See IProfilingBlockInputStream.h
|
||||
/// Information for profiling. See IBlockInputStream.h
|
||||
struct BlockStreamProfileInfo
|
||||
{
|
||||
/// Info about stream object this profile info refers to.
|
||||
IProfilingBlockInputStream * parent = nullptr;
|
||||
IBlockInputStream * parent = nullptr;
|
||||
|
||||
bool started = false;
|
||||
Stopwatch total_stopwatch {CLOCK_MONOTONIC_COARSE}; /// Time with waiting time
|
||||
|
@ -1,6 +1,6 @@
|
||||
#pragma once
|
||||
|
||||
#include <DataStreams/IProfilingBlockInputStream.h>
|
||||
#include <DataStreams/IBlockInputStream.h>
|
||||
|
||||
|
||||
namespace DB
|
||||
@ -9,7 +9,7 @@ namespace DB
|
||||
/** A stream of blocks from which you can read the next block from an explicitly provided list.
|
||||
* Also see OneBlockInputStream.
|
||||
*/
|
||||
class BlocksListBlockInputStream : public IProfilingBlockInputStream
|
||||
class BlocksListBlockInputStream : public IBlockInputStream
|
||||
{
|
||||
public:
|
||||
/// Acquires the ownership of the block list.
|
||||
|
@ -1,6 +1,6 @@
|
||||
#pragma once
|
||||
#include <common/logger_useful.h>
|
||||
#include <DataStreams/IProfilingBlockInputStream.h>
|
||||
#include <DataStreams/IBlockInputStream.h>
|
||||
#include <Core/SortDescription.h>
|
||||
#include <Columns/ColumnsNumber.h>
|
||||
#include <Common/typeid_cast.h>
|
||||
@ -12,7 +12,7 @@ namespace DB
|
||||
/// Collapses the same rows with the opposite sign roughly like CollapsingSortedBlockInputStream.
|
||||
/// Outputs the rows in random order (the input streams must still be ordered).
|
||||
/// Outputs only rows with a positive sign.
|
||||
class CollapsingFinalBlockInputStream : public IProfilingBlockInputStream
|
||||
class CollapsingFinalBlockInputStream : public IBlockInputStream
|
||||
{
|
||||
public:
|
||||
CollapsingFinalBlockInputStream(
|
||||
|
@ -1,6 +1,6 @@
|
||||
#pragma once
|
||||
|
||||
#include <DataStreams/IProfilingBlockInputStream.h>
|
||||
#include <DataStreams/IBlockInputStream.h>
|
||||
#include <IO/ReadBuffer.h>
|
||||
#include <Common/PODArray.h>
|
||||
|
||||
@ -51,9 +51,9 @@ using MergedRowSources = PODArray<RowSourcePart>;
|
||||
|
||||
/** Gather single stream from multiple streams according to streams mask.
|
||||
* Stream mask maps row number to index of source stream.
|
||||
* Streams should conatin exactly one column.
|
||||
* Streams should contain exactly one column.
|
||||
*/
|
||||
class ColumnGathererStream : public IProfilingBlockInputStream
|
||||
class ColumnGathererStream : public IBlockInputStream
|
||||
{
|
||||
public:
|
||||
ColumnGathererStream(
|
||||
|
@ -1,6 +1,6 @@
|
||||
#pragma once
|
||||
|
||||
#include <DataStreams/IProfilingBlockInputStream.h>
|
||||
#include <DataStreams/IBlockInputStream.h>
|
||||
|
||||
|
||||
namespace DB
|
||||
@ -11,7 +11,7 @@ namespace DB
|
||||
* Unlike UnionBlockInputStream, it does this sequentially.
|
||||
* Blocks of different sources are not interleaved with each other.
|
||||
*/
|
||||
class ConcatBlockInputStream : public IProfilingBlockInputStream
|
||||
class ConcatBlockInputStream : public IBlockInputStream
|
||||
{
|
||||
public:
|
||||
ConcatBlockInputStream(BlockInputStreams inputs_)
|
||||
|
@ -1,6 +1,6 @@
|
||||
#pragma once
|
||||
|
||||
#include <DataStreams/IProfilingBlockInputStream.h>
|
||||
#include <DataStreams/IBlockInputStream.h>
|
||||
#include <Columns/ColumnLowCardinality.h>
|
||||
#include <DataTypes/DataTypeLowCardinality.h>
|
||||
#include <Columns/ColumnConst.h>
|
||||
@ -13,7 +13,7 @@ namespace DB
|
||||
* Unlike UnionBlockInputStream, it does this sequentially.
|
||||
* Blocks of different sources are not interleaved with each other.
|
||||
*/
|
||||
class ConvertColumnLowCardinalityToFullBlockInputStream : public IProfilingBlockInputStream
|
||||
class ConvertColumnLowCardinalityToFullBlockInputStream : public IBlockInputStream
|
||||
{
|
||||
public:
|
||||
explicit ConvertColumnLowCardinalityToFullBlockInputStream(const BlockInputStreamPtr & input)
|
||||
|
@ -1,7 +1,7 @@
|
||||
#pragma once
|
||||
|
||||
#include <unordered_map>
|
||||
#include <DataStreams/IProfilingBlockInputStream.h>
|
||||
#include <DataStreams/IBlockInputStream.h>
|
||||
|
||||
|
||||
namespace DB
|
||||
@ -20,7 +20,7 @@ namespace DB
|
||||
* throw if they are const in result and non const in source,
|
||||
* or if they are const and have different values.
|
||||
*/
|
||||
class ConvertingBlockInputStream : public IProfilingBlockInputStream
|
||||
class ConvertingBlockInputStream : public IBlockInputStream
|
||||
{
|
||||
public:
|
||||
enum class MatchColumnsMode
|
||||
|
@ -1,6 +1,6 @@
|
||||
#pragma once
|
||||
#include <DataStreams/IBlockOutputStream.h>
|
||||
#include <DataStreams/IProfilingBlockInputStream.h>
|
||||
#include <DataStreams/IBlockInputStream.h>
|
||||
#include <Interpreters/ProcessList.h>
|
||||
|
||||
|
||||
|
@ -59,12 +59,7 @@ void CreatingSetsBlockInputStream::readPrefixImpl()
|
||||
|
||||
Block CreatingSetsBlockInputStream::getTotals()
|
||||
{
|
||||
auto input = dynamic_cast<IProfilingBlockInputStream *>(children.back().get());
|
||||
|
||||
if (input)
|
||||
return input->getTotals();
|
||||
else
|
||||
return totals;
|
||||
return children.back()->getTotals();
|
||||
}
|
||||
|
||||
|
||||
@ -158,9 +153,7 @@ void CreatingSetsBlockInputStream::createOne(SubqueryForSet & subquery)
|
||||
|
||||
if (done_with_set && done_with_join && done_with_table)
|
||||
{
|
||||
if (IProfilingBlockInputStream * profiling_in = dynamic_cast<IProfilingBlockInputStream *>(&*subquery.source))
|
||||
profiling_in->cancel(false);
|
||||
|
||||
subquery.source->cancel(false);
|
||||
break;
|
||||
}
|
||||
}
|
||||
@ -171,15 +164,12 @@ void CreatingSetsBlockInputStream::createOne(SubqueryForSet & subquery)
|
||||
watch.stop();
|
||||
|
||||
size_t head_rows = 0;
|
||||
if (IProfilingBlockInputStream * profiling_in = dynamic_cast<IProfilingBlockInputStream *>(&*subquery.source))
|
||||
{
|
||||
const BlockStreamProfileInfo & profile_info = profiling_in->getProfileInfo();
|
||||
const BlockStreamProfileInfo & profile_info = subquery.source->getProfileInfo();
|
||||
|
||||
head_rows = profile_info.rows;
|
||||
|
||||
if (subquery.join)
|
||||
subquery.join->setTotals(profiling_in->getTotals());
|
||||
}
|
||||
subquery.join->setTotals(subquery.source->getTotals());
|
||||
|
||||
if (head_rows != 0)
|
||||
{
|
||||
|
@ -1,7 +1,7 @@
|
||||
#pragma once
|
||||
|
||||
#include <Poco/Logger.h>
|
||||
#include <DataStreams/IProfilingBlockInputStream.h>
|
||||
#include <DataStreams/IBlockInputStream.h>
|
||||
#include <Interpreters/ExpressionAnalyzer.h> /// SubqueriesForSets
|
||||
|
||||
|
||||
@ -14,7 +14,7 @@ namespace DB
|
||||
* in the `readPrefix` function or before reading the first block
|
||||
* initializes all the passed sets.
|
||||
*/
|
||||
class CreatingSetsBlockInputStream : public IProfilingBlockInputStream
|
||||
class CreatingSetsBlockInputStream : public IBlockInputStream
|
||||
{
|
||||
public:
|
||||
CreatingSetsBlockInputStream(
|
||||
|
@ -1,6 +1,6 @@
|
||||
#pragma once
|
||||
|
||||
#include <DataStreams/IProfilingBlockInputStream.h>
|
||||
#include <DataStreams/IBlockInputStream.h>
|
||||
#include <Interpreters/Aggregator.h>
|
||||
#include <Core/ColumnNumbers.h>
|
||||
|
||||
@ -14,7 +14,7 @@ class ExpressionActions;
|
||||
/** Takes blocks after grouping, with non-finalized aggregate functions.
|
||||
* Calculates all subsets of columns and aggreagetes over them.
|
||||
*/
|
||||
class CubeBlockInputStream : public IProfilingBlockInputStream
|
||||
class CubeBlockInputStream : public IBlockInputStream
|
||||
{
|
||||
private:
|
||||
using ExpressionActionsPtr = std::shared_ptr<ExpressionActions>;
|
||||
|
@ -1,6 +1,6 @@
|
||||
#pragma once
|
||||
|
||||
#include <DataStreams/IProfilingBlockInputStream.h>
|
||||
#include <DataStreams/IBlockInputStream.h>
|
||||
#include <Interpreters/SetVariants.h>
|
||||
|
||||
namespace DB
|
||||
@ -13,7 +13,7 @@ namespace DB
|
||||
* set limit_hint to non zero value. So we stop emitting new rows after
|
||||
* count of already emitted rows will reach the limit_hint.
|
||||
*/
|
||||
class DistinctBlockInputStream : public IProfilingBlockInputStream
|
||||
class DistinctBlockInputStream : public IBlockInputStream
|
||||
{
|
||||
public:
|
||||
/// Empty columns_ means all collumns.
|
||||
|
@ -1,6 +1,6 @@
|
||||
#pragma once
|
||||
|
||||
#include <DataStreams/IProfilingBlockInputStream.h>
|
||||
#include <DataStreams/IBlockInputStream.h>
|
||||
#include <Interpreters/SetVariants.h>
|
||||
|
||||
|
||||
@ -17,7 +17,7 @@ namespace DB
|
||||
* set limit_hint to non zero value. So we stop emitting new rows after
|
||||
* count of already emitted rows will reach the limit_hint.
|
||||
*/
|
||||
class DistinctSortedBlockInputStream : public IProfilingBlockInputStream
|
||||
class DistinctSortedBlockInputStream : public IBlockInputStream
|
||||
{
|
||||
public:
|
||||
/// Empty columns_ means all collumns.
|
||||
|
@ -15,11 +15,8 @@ String ExpressionBlockInputStream::getName() const { return "Expression"; }
|
||||
|
||||
Block ExpressionBlockInputStream::getTotals()
|
||||
{
|
||||
if (IProfilingBlockInputStream * child = dynamic_cast<IProfilingBlockInputStream *>(&*children.back()))
|
||||
{
|
||||
totals = child->getTotals();
|
||||
totals = children.back()->getTotals();
|
||||
expression->executeOnTotals(totals);
|
||||
}
|
||||
|
||||
return totals;
|
||||
}
|
||||
|
@ -1,6 +1,6 @@
|
||||
#pragma once
|
||||
|
||||
#include <DataStreams/IProfilingBlockInputStream.h>
|
||||
#include <DataStreams/IBlockInputStream.h>
|
||||
|
||||
|
||||
namespace DB
|
||||
@ -13,7 +13,7 @@ class ExpressionActions;
|
||||
* For example: hits * 2 + 3, url LIKE '%yandex%'
|
||||
* The expression processes each row independently of the others.
|
||||
*/
|
||||
class ExpressionBlockInputStream : public IProfilingBlockInputStream
|
||||
class ExpressionBlockInputStream : public IBlockInputStream
|
||||
{
|
||||
private:
|
||||
using ExpressionActionsPtr = std::shared_ptr<ExpressionActions>;
|
||||
|
@ -52,11 +52,8 @@ String FilterBlockInputStream::getName() const { return "Filter"; }
|
||||
|
||||
Block FilterBlockInputStream::getTotals()
|
||||
{
|
||||
if (IProfilingBlockInputStream * child = dynamic_cast<IProfilingBlockInputStream *>(&*children.back()))
|
||||
{
|
||||
totals = child->getTotals();
|
||||
totals = children.back()->getTotals();
|
||||
expression->executeOnTotals(totals);
|
||||
}
|
||||
|
||||
return totals;
|
||||
}
|
||||
|
@ -1,6 +1,6 @@
|
||||
#pragma once
|
||||
|
||||
#include <DataStreams/IProfilingBlockInputStream.h>
|
||||
#include <DataStreams/IBlockInputStream.h>
|
||||
#include <Columns/FilterDescription.h>
|
||||
|
||||
|
||||
@ -14,7 +14,7 @@ class ExpressionActions;
|
||||
* A stream of blocks and an expression, which adds to the block one ColumnUInt8 column containing the filtering conditions, are passed as input.
|
||||
* The expression is evaluated and a stream of blocks is returned, which contains only the filtered rows.
|
||||
*/
|
||||
class FilterBlockInputStream : public IProfilingBlockInputStream
|
||||
class FilterBlockInputStream : public IBlockInputStream
|
||||
{
|
||||
private:
|
||||
using ExpressionActionsPtr = std::shared_ptr<ExpressionActions>;
|
||||
|
@ -1,6 +1,6 @@
|
||||
#pragma once
|
||||
|
||||
#include <DataStreams/IProfilingBlockInputStream.h>
|
||||
#include <DataStreams/IBlockInputStream.h>
|
||||
#include <iostream>
|
||||
|
||||
namespace DB
|
||||
@ -9,7 +9,7 @@ namespace DB
|
||||
/// Removes columns other than columns_to_save_ from block,
|
||||
/// and reorders columns as in columns_to_save_.
|
||||
/// Functionality is similar to ExpressionBlockInputStream with ExpressionActions containing PROJECT action.
|
||||
class FilterColumnsBlockInputStream : public IProfilingBlockInputStream
|
||||
class FilterColumnsBlockInputStream : public IBlockInputStream
|
||||
{
|
||||
public:
|
||||
FilterColumnsBlockInputStream(
|
||||
|
@ -85,7 +85,7 @@ Block FinishSortingBlockInputStream::readImpl()
|
||||
{
|
||||
Block block = children.back()->read();
|
||||
|
||||
/// End of input stream, but we can`t return immediatly, we need to merge already read blocks.
|
||||
/// End of input stream, but we can`t return immediately, we need to merge already read blocks.
|
||||
/// Check it later, when get end of stream from impl.
|
||||
if (!block)
|
||||
{
|
||||
@ -102,7 +102,7 @@ Block FinishSortingBlockInputStream::readImpl()
|
||||
if (size == 0)
|
||||
continue;
|
||||
|
||||
/// We need to sort each block separatly before merging.
|
||||
/// We need to sort each block separately before merging.
|
||||
sortBlock(block, description_to_sort);
|
||||
|
||||
removeConstantsFromBlock(block);
|
||||
|
@ -2,7 +2,7 @@
|
||||
|
||||
#include <Core/SortDescription.h>
|
||||
#include <Interpreters/sortBlock.h>
|
||||
#include <DataStreams/IProfilingBlockInputStream.h>
|
||||
#include <DataStreams/IBlockInputStream.h>
|
||||
|
||||
|
||||
namespace DB
|
||||
@ -11,7 +11,7 @@ namespace DB
|
||||
/** Takes stream already sorted by `x` and finishes sorting it by (`x`, `y`).
|
||||
* During sorting only blocks with rows that equal by `x` saved in RAM.
|
||||
* */
|
||||
class FinishSortingBlockInputStream : public IProfilingBlockInputStream
|
||||
class FinishSortingBlockInputStream : public IBlockInputStream
|
||||
{
|
||||
public:
|
||||
/// limit - if not 0, allowed to return just first 'limit' rows in sorted order.
|
||||
|
@ -17,7 +17,7 @@ namespace DB
|
||||
/** Intended for implementation of "rollup" - aggregation (rounding) of older data
|
||||
* for a table with Graphite data (Graphite is the system for time series monitoring).
|
||||
*
|
||||
* Table with graphite data has at least the folowing columns (accurate to the name):
|
||||
* Table with graphite data has at least the following columns (accurate to the name):
|
||||
* Path, Time, Value, Version
|
||||
*
|
||||
* Path - name of metric (sensor);
|
||||
|
@ -1,9 +1,14 @@
|
||||
#include <math.h>
|
||||
|
||||
#include <DataStreams/IProfilingBlockInputStream.h>
|
||||
#include <DataStreams/IBlockInputStream.h>
|
||||
|
||||
#include <IO/WriteHelpers.h>
|
||||
#include <Interpreters/ProcessList.h>
|
||||
#include <Interpreters/Quota.h>
|
||||
#include <Common/CurrentThread.h>
|
||||
|
||||
|
||||
namespace ProfileEvents
|
||||
{
|
||||
extern const Event ThrottlerSleepMicroseconds;
|
||||
}
|
||||
|
||||
|
||||
namespace DB
|
||||
@ -11,12 +16,414 @@ namespace DB
|
||||
|
||||
namespace ErrorCodes
|
||||
{
|
||||
extern const int TOO_MANY_ROWS;
|
||||
extern const int TOO_MANY_BYTES;
|
||||
extern const int TOO_MANY_ROWS_OR_BYTES;
|
||||
extern const int TIMEOUT_EXCEEDED;
|
||||
extern const int TOO_SLOW;
|
||||
extern const int LOGICAL_ERROR;
|
||||
extern const int BLOCKS_HAVE_DIFFERENT_STRUCTURE;
|
||||
extern const int TOO_DEEP_PIPELINE;
|
||||
}
|
||||
|
||||
|
||||
/** It's safe to access children without mutex as long as these methods are called before first call to read, readPrefix.
|
||||
/// It's safe to access children without mutex as long as these methods are called before first call to `read()` or `readPrefix()`.
|
||||
|
||||
|
||||
Block IBlockInputStream::read()
|
||||
{
|
||||
if (total_rows_approx)
|
||||
{
|
||||
progressImpl(Progress(0, 0, total_rows_approx));
|
||||
total_rows_approx = 0;
|
||||
}
|
||||
|
||||
if (!info.started)
|
||||
{
|
||||
info.total_stopwatch.start();
|
||||
info.started = true;
|
||||
}
|
||||
|
||||
Block res;
|
||||
|
||||
if (isCancelledOrThrowIfKilled())
|
||||
return res;
|
||||
|
||||
if (!checkTimeLimit())
|
||||
limit_exceeded_need_break = true;
|
||||
|
||||
if (!limit_exceeded_need_break)
|
||||
res = readImpl();
|
||||
|
||||
if (res)
|
||||
{
|
||||
info.update(res);
|
||||
|
||||
if (enabled_extremes)
|
||||
updateExtremes(res);
|
||||
|
||||
if (limits.mode == LIMITS_CURRENT && !limits.size_limits.check(info.rows, info.bytes, "result", ErrorCodes::TOO_MANY_ROWS_OR_BYTES))
|
||||
limit_exceeded_need_break = true;
|
||||
|
||||
if (quota != nullptr)
|
||||
checkQuota(res);
|
||||
}
|
||||
else
|
||||
{
|
||||
/** If the thread is over, then we will ask all children to abort the execution.
|
||||
* This makes sense when running a query with LIMIT
|
||||
* - there is a situation when all the necessary data has already been read,
|
||||
* but children sources are still working,
|
||||
* herewith they can work in separate threads or even remotely.
|
||||
*/
|
||||
cancel(false);
|
||||
}
|
||||
|
||||
progress(Progress(res.rows(), res.bytes()));
|
||||
|
||||
#ifndef NDEBUG
|
||||
if (res)
|
||||
{
|
||||
Block header = getHeader();
|
||||
if (header)
|
||||
assertBlocksHaveEqualStructure(res, header, getName());
|
||||
}
|
||||
#endif
|
||||
|
||||
return res;
|
||||
}
|
||||
|
||||
|
||||
void IBlockInputStream::readPrefix()
|
||||
{
|
||||
readPrefixImpl();
|
||||
|
||||
forEachChild([&] (IBlockInputStream & child)
|
||||
{
|
||||
child.readPrefix();
|
||||
return false;
|
||||
});
|
||||
}
|
||||
|
||||
|
||||
void IBlockInputStream::readSuffix()
|
||||
{
|
||||
forEachChild([&] (IBlockInputStream & child)
|
||||
{
|
||||
child.readSuffix();
|
||||
return false;
|
||||
});
|
||||
|
||||
readSuffixImpl();
|
||||
}
|
||||
|
||||
|
||||
void IBlockInputStream::updateExtremes(Block & block)
|
||||
{
|
||||
size_t num_columns = block.columns();
|
||||
|
||||
if (!extremes)
|
||||
{
|
||||
MutableColumns extremes_columns(num_columns);
|
||||
|
||||
for (size_t i = 0; i < num_columns; ++i)
|
||||
{
|
||||
const ColumnPtr & src = block.safeGetByPosition(i).column;
|
||||
|
||||
if (src->isColumnConst())
|
||||
{
|
||||
/// Equal min and max.
|
||||
extremes_columns[i] = src->cloneResized(2);
|
||||
}
|
||||
else
|
||||
{
|
||||
Field min_value;
|
||||
Field max_value;
|
||||
|
||||
src->getExtremes(min_value, max_value);
|
||||
|
||||
extremes_columns[i] = src->cloneEmpty();
|
||||
|
||||
extremes_columns[i]->insert(min_value);
|
||||
extremes_columns[i]->insert(max_value);
|
||||
}
|
||||
}
|
||||
|
||||
extremes = block.cloneWithColumns(std::move(extremes_columns));
|
||||
}
|
||||
else
|
||||
{
|
||||
for (size_t i = 0; i < num_columns; ++i)
|
||||
{
|
||||
ColumnPtr & old_extremes = extremes.safeGetByPosition(i).column;
|
||||
|
||||
if (old_extremes->isColumnConst())
|
||||
continue;
|
||||
|
||||
Field min_value = (*old_extremes)[0];
|
||||
Field max_value = (*old_extremes)[1];
|
||||
|
||||
Field cur_min_value;
|
||||
Field cur_max_value;
|
||||
|
||||
block.safeGetByPosition(i).column->getExtremes(cur_min_value, cur_max_value);
|
||||
|
||||
if (cur_min_value < min_value)
|
||||
min_value = cur_min_value;
|
||||
if (cur_max_value > max_value)
|
||||
max_value = cur_max_value;
|
||||
|
||||
MutableColumnPtr new_extremes = old_extremes->cloneEmpty();
|
||||
|
||||
new_extremes->insert(min_value);
|
||||
new_extremes->insert(max_value);
|
||||
|
||||
old_extremes = std::move(new_extremes);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
static bool handleOverflowMode(OverflowMode mode, const String & message, int code)
|
||||
{
|
||||
switch (mode)
|
||||
{
|
||||
case OverflowMode::THROW:
|
||||
throw Exception(message, code);
|
||||
case OverflowMode::BREAK:
|
||||
return false;
|
||||
default:
|
||||
throw Exception("Logical error: unknown overflow mode", ErrorCodes::LOGICAL_ERROR);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
bool IBlockInputStream::checkTimeLimit()
|
||||
{
|
||||
if (limits.max_execution_time != 0
|
||||
&& info.total_stopwatch.elapsed() > static_cast<UInt64>(limits.max_execution_time.totalMicroseconds()) * 1000)
|
||||
return handleOverflowMode(limits.timeout_overflow_mode,
|
||||
"Timeout exceeded: elapsed " + toString(info.total_stopwatch.elapsedSeconds())
|
||||
+ " seconds, maximum: " + toString(limits.max_execution_time.totalMicroseconds() / 1000000.0),
|
||||
ErrorCodes::TIMEOUT_EXCEEDED);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
|
||||
void IBlockInputStream::checkQuota(Block & block)
|
||||
{
|
||||
switch (limits.mode)
|
||||
{
|
||||
case LIMITS_TOTAL:
|
||||
/// Checked in `progress` method.
|
||||
break;
|
||||
|
||||
case LIMITS_CURRENT:
|
||||
{
|
||||
time_t current_time = time(nullptr);
|
||||
double total_elapsed = info.total_stopwatch.elapsedSeconds();
|
||||
|
||||
quota->checkAndAddResultRowsBytes(current_time, block.rows(), block.bytes());
|
||||
quota->checkAndAddExecutionTime(current_time, Poco::Timespan((total_elapsed - prev_elapsed) * 1000000.0));
|
||||
|
||||
prev_elapsed = total_elapsed;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
void IBlockInputStream::progressImpl(const Progress & value)
|
||||
{
|
||||
if (progress_callback)
|
||||
progress_callback(value);
|
||||
|
||||
if (process_list_elem)
|
||||
{
|
||||
if (!process_list_elem->updateProgressIn(value))
|
||||
cancel(/* kill */ true);
|
||||
|
||||
/// The total amount of data processed or intended for processing in all leaf sources, possibly on remote servers.
|
||||
|
||||
ProgressValues progress = process_list_elem->getProgressIn();
|
||||
size_t total_rows_estimate = std::max(progress.rows, progress.total_rows);
|
||||
|
||||
/** Check the restrictions on the amount of data to read, the speed of the query, the quota on the amount of data to read.
|
||||
* NOTE: Maybe it makes sense to have them checked directly in ProcessList?
|
||||
*/
|
||||
|
||||
if (limits.mode == LIMITS_TOTAL
|
||||
&& ((limits.size_limits.max_rows && total_rows_estimate > limits.size_limits.max_rows)
|
||||
|| (limits.size_limits.max_bytes && progress.bytes > limits.size_limits.max_bytes)))
|
||||
{
|
||||
switch (limits.size_limits.overflow_mode)
|
||||
{
|
||||
case OverflowMode::THROW:
|
||||
{
|
||||
if (limits.size_limits.max_rows && total_rows_estimate > limits.size_limits.max_rows)
|
||||
throw Exception("Limit for rows to read exceeded: " + toString(total_rows_estimate)
|
||||
+ " rows read (or to read), maximum: " + toString(limits.size_limits.max_rows),
|
||||
ErrorCodes::TOO_MANY_ROWS);
|
||||
else
|
||||
throw Exception("Limit for (uncompressed) bytes to read exceeded: " + toString(progress.bytes)
|
||||
+ " bytes read, maximum: " + toString(limits.size_limits.max_bytes),
|
||||
ErrorCodes::TOO_MANY_BYTES);
|
||||
}
|
||||
|
||||
case OverflowMode::BREAK:
|
||||
{
|
||||
/// For `break`, we will stop only if so many rows were actually read, and not just supposed to be read.
|
||||
if ((limits.size_limits.max_rows && progress.rows > limits.size_limits.max_rows)
|
||||
|| (limits.size_limits.max_bytes && progress.bytes > limits.size_limits.max_bytes))
|
||||
{
|
||||
cancel(false);
|
||||
}
|
||||
|
||||
break;
|
||||
}
|
||||
|
||||
default:
|
||||
throw Exception("Logical error: unknown overflow mode", ErrorCodes::LOGICAL_ERROR);
|
||||
}
|
||||
}
|
||||
|
||||
size_t total_rows = progress.total_rows;
|
||||
|
||||
constexpr UInt64 profile_events_update_period_microseconds = 10 * 1000; // 10 milliseconds
|
||||
UInt64 total_elapsed_microseconds = info.total_stopwatch.elapsedMicroseconds();
|
||||
|
||||
if (last_profile_events_update_time + profile_events_update_period_microseconds < total_elapsed_microseconds)
|
||||
{
|
||||
CurrentThread::updatePerformanceCounters();
|
||||
last_profile_events_update_time = total_elapsed_microseconds;
|
||||
}
|
||||
|
||||
if ((limits.min_execution_speed || (total_rows && limits.timeout_before_checking_execution_speed != 0))
|
||||
&& (static_cast<Int64>(total_elapsed_microseconds) > limits.timeout_before_checking_execution_speed.totalMicroseconds()))
|
||||
{
|
||||
/// Do not count sleeps in throttlers
|
||||
UInt64 throttler_sleep_microseconds = CurrentThread::getProfileEvents()[ProfileEvents::ThrottlerSleepMicroseconds];
|
||||
double elapsed_seconds = (throttler_sleep_microseconds > total_elapsed_microseconds)
|
||||
? 0.0 : (total_elapsed_microseconds - throttler_sleep_microseconds) / 1000000.0;
|
||||
|
||||
if (elapsed_seconds > 0)
|
||||
{
|
||||
if (limits.min_execution_speed && progress.rows / elapsed_seconds < limits.min_execution_speed)
|
||||
throw Exception("Query is executing too slow: " + toString(progress.rows / elapsed_seconds)
|
||||
+ " rows/sec., minimum: " + toString(limits.min_execution_speed),
|
||||
ErrorCodes::TOO_SLOW);
|
||||
|
||||
/// If the predicted execution time is longer than `max_execution_time`.
|
||||
if (limits.max_execution_time != 0 && total_rows)
|
||||
{
|
||||
double estimated_execution_time_seconds = elapsed_seconds * (static_cast<double>(total_rows) / progress.rows);
|
||||
|
||||
if (estimated_execution_time_seconds > limits.max_execution_time.totalSeconds())
|
||||
throw Exception("Estimated query execution time (" + toString(estimated_execution_time_seconds) + " seconds)"
|
||||
+ " is too long. Maximum: " + toString(limits.max_execution_time.totalSeconds())
|
||||
+ ". Estimated rows to process: " + toString(total_rows),
|
||||
ErrorCodes::TOO_SLOW);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (quota != nullptr && limits.mode == LIMITS_TOTAL)
|
||||
{
|
||||
quota->checkAndAddReadRowsBytes(time(nullptr), value.rows, value.bytes);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
void IBlockInputStream::cancel(bool kill)
|
||||
{
|
||||
if (kill)
|
||||
is_killed = true;
|
||||
|
||||
bool old_val = false;
|
||||
if (!is_cancelled.compare_exchange_strong(old_val, true, std::memory_order_seq_cst, std::memory_order_relaxed))
|
||||
return;
|
||||
|
||||
forEachChild([&] (IBlockInputStream & child)
|
||||
{
|
||||
child.cancel(kill);
|
||||
return false;
|
||||
});
|
||||
}
|
||||
|
||||
|
||||
bool IBlockInputStream::isCancelled() const
|
||||
{
|
||||
return is_cancelled;
|
||||
}
|
||||
|
||||
bool IBlockInputStream::isCancelledOrThrowIfKilled() const
|
||||
{
|
||||
if (!is_cancelled)
|
||||
return false;
|
||||
if (is_killed)
|
||||
throw Exception("Query was cancelled", ErrorCodes::QUERY_WAS_CANCELLED);
|
||||
return true;
|
||||
}
|
||||
|
||||
|
||||
void IBlockInputStream::setProgressCallback(const ProgressCallback & callback)
|
||||
{
|
||||
progress_callback = callback;
|
||||
|
||||
forEachChild([&] (IBlockInputStream & child)
|
||||
{
|
||||
child.setProgressCallback(callback);
|
||||
return false;
|
||||
});
|
||||
}
|
||||
|
||||
|
||||
void IBlockInputStream::setProcessListElement(QueryStatus * elem)
|
||||
{
|
||||
process_list_elem = elem;
|
||||
|
||||
forEachChild([&] (IBlockInputStream & child)
|
||||
{
|
||||
child.setProcessListElement(elem);
|
||||
return false;
|
||||
});
|
||||
}
|
||||
|
||||
|
||||
Block IBlockInputStream::getTotals()
|
||||
{
|
||||
if (totals)
|
||||
return totals;
|
||||
|
||||
Block res;
|
||||
forEachChild([&] (IBlockInputStream & child)
|
||||
{
|
||||
res = child.getTotals();
|
||||
if (res)
|
||||
return true;
|
||||
return false;
|
||||
});
|
||||
return res;
|
||||
}
|
||||
|
||||
|
||||
Block IBlockInputStream::getExtremes()
|
||||
{
|
||||
if (extremes)
|
||||
return extremes;
|
||||
|
||||
Block res;
|
||||
forEachChild([&] (IBlockInputStream & child)
|
||||
{
|
||||
res = child.getExtremes();
|
||||
if (res)
|
||||
return true;
|
||||
return false;
|
||||
});
|
||||
return res;
|
||||
}
|
||||
|
||||
|
||||
String IBlockInputStream::getTreeID() const
|
||||
@ -40,11 +447,6 @@ String IBlockInputStream::getTreeID() const
|
||||
}
|
||||
|
||||
|
||||
size_t IBlockInputStream::checkDepth(size_t max_depth) const
|
||||
{
|
||||
return checkDepthImpl(max_depth, max_depth);
|
||||
}
|
||||
|
||||
size_t IBlockInputStream::checkDepthImpl(size_t max_depth, size_t level) const
|
||||
{
|
||||
if (children.empty())
|
||||
@ -94,4 +496,3 @@ void IBlockInputStream::dumpTree(std::ostream & ostr, size_t indent, size_t mult
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
|
@ -1,41 +1,38 @@
|
||||
#pragma once
|
||||
|
||||
#include <vector>
|
||||
#include <memory>
|
||||
#include <mutex>
|
||||
#include <shared_mutex>
|
||||
#include <functional>
|
||||
#include <boost/noncopyable.hpp>
|
||||
#include <Core/Block.h>
|
||||
#include <Core/SortDescription.h>
|
||||
#include <DataStreams/BlockStreamProfileInfo.h>
|
||||
#include <DataStreams/SizeLimits.h>
|
||||
#include <IO/Progress.h>
|
||||
#include <Interpreters/SettingsCommon.h>
|
||||
|
||||
#include <atomic>
|
||||
#include <shared_mutex>
|
||||
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
|
||||
class IBlockInputStream;
|
||||
|
||||
using BlockInputStreamPtr = std::shared_ptr<IBlockInputStream>;
|
||||
using BlockInputStreams = std::vector<BlockInputStreamPtr>;
|
||||
|
||||
class TableStructureReadLock;
|
||||
|
||||
using TableStructureReadLockPtr = std::shared_ptr<TableStructureReadLock>;
|
||||
using TableStructureReadLocks = std::vector<TableStructureReadLockPtr>;
|
||||
using TableStructureReadLocksList = std::list<TableStructureReadLockPtr>;
|
||||
|
||||
struct Progress;
|
||||
|
||||
namespace ErrorCodes
|
||||
{
|
||||
extern const int OUTPUT_IS_NOT_SORTED;
|
||||
extern const int NOT_IMPLEMENTED;
|
||||
extern const int QUERY_WAS_CANCELLED;
|
||||
}
|
||||
|
||||
class IBlockInputStream;
|
||||
class ProcessListElement;
|
||||
class QuotaForIntervals;
|
||||
class QueryStatus;
|
||||
class TableStructureReadLock;
|
||||
|
||||
using BlockInputStreamPtr = std::shared_ptr<IBlockInputStream>;
|
||||
using BlockInputStreams = std::vector<BlockInputStreamPtr>;
|
||||
using TableStructureReadLockPtr = std::shared_ptr<TableStructureReadLock>;
|
||||
using TableStructureReadLocks = std::vector<TableStructureReadLockPtr>;
|
||||
|
||||
/** Callback to track the progress of the query.
|
||||
* Used in IProfilingBlockInputStream and Context.
|
||||
* Used in IBlockInputStream and Context.
|
||||
* The function takes the number of rows in the last block, the number of bytes in the last block.
|
||||
* Note that the callback can be called from different threads.
|
||||
*/
|
||||
@ -44,11 +41,23 @@ using ProgressCallback = std::function<void(const Progress & progress)>;
|
||||
|
||||
/** The stream interface for reading data by blocks from the database.
|
||||
* Relational operations are supposed to be done also as implementations of this interface.
|
||||
* Watches out at how the source of the blocks works.
|
||||
* Lets you get information for profiling: rows per second, blocks per second, megabytes per second, etc.
|
||||
* Allows you to stop reading data (in nested sources).
|
||||
*/
|
||||
class IBlockInputStream : private boost::noncopyable
|
||||
class IBlockInputStream
|
||||
{
|
||||
friend struct BlockStreamProfileInfo;
|
||||
|
||||
public:
|
||||
IBlockInputStream() {}
|
||||
IBlockInputStream() { info.parent = this; }
|
||||
virtual ~IBlockInputStream() {}
|
||||
|
||||
IBlockInputStream(const IBlockInputStream &) = delete;
|
||||
IBlockInputStream & operator=(const IBlockInputStream &) = delete;
|
||||
|
||||
/// To output the data stream transformation tree (query execution plan).
|
||||
virtual String getName() const = 0;
|
||||
|
||||
/** Get data structure of the stream in a form of "header" block (it is also called "sample block").
|
||||
* Header block contains column names, data types, columns of size 0. Constant columns must have corresponding values.
|
||||
@ -56,52 +65,244 @@ public:
|
||||
*/
|
||||
virtual Block getHeader() const = 0;
|
||||
|
||||
/** Read next block.
|
||||
* If there are no more blocks, return an empty block (for which operator `bool` returns false).
|
||||
* NOTE: Only one thread can read from one instance of IBlockInputStream simultaneously.
|
||||
* This also applies for readPrefix, readSuffix.
|
||||
*/
|
||||
virtual Block read() = 0;
|
||||
|
||||
virtual const BlockMissingValues & getMissingValues() const
|
||||
{
|
||||
static const BlockMissingValues none;
|
||||
return none;
|
||||
}
|
||||
|
||||
/// If this stream generates data in order by some keys, return true.
|
||||
virtual bool isSortedOutput() const { return false; }
|
||||
|
||||
/// In case of isSortedOutput, return corresponding SortDescription
|
||||
virtual const SortDescription & getSortDescription() const
|
||||
{
|
||||
throw Exception("Output of " + getName() + " is not sorted", ErrorCodes::OUTPUT_IS_NOT_SORTED);
|
||||
}
|
||||
|
||||
/** Read next block.
|
||||
* If there are no more blocks, return an empty block (for which operator `bool` returns false).
|
||||
* NOTE: Only one thread can read from one instance of IBlockInputStream simultaneously.
|
||||
* This also applies for readPrefix, readSuffix.
|
||||
*/
|
||||
Block read();
|
||||
|
||||
/** Read something before starting all data or after the end of all data.
|
||||
* In the `readSuffix` function, you can implement a finalization that can lead to an exception.
|
||||
* readPrefix() must be called before the first call to read().
|
||||
* readSuffix() should be called after read() returns an empty block, or after a call to cancel(), but not during read() execution.
|
||||
*/
|
||||
virtual void readPrefix() {}
|
||||
virtual void readSuffix() {}
|
||||
|
||||
virtual ~IBlockInputStream() {}
|
||||
|
||||
/** To output the data stream transformation tree (query execution plan).
|
||||
/** The default implementation calls readPrefixImpl() on itself, and then readPrefix() recursively for all children.
|
||||
* There are cases when you do not want `readPrefix` of children to be called synchronously, in this function,
|
||||
* but you want them to be called, for example, in separate threads (for parallel initialization of children).
|
||||
* Then overload `readPrefix` function.
|
||||
*/
|
||||
virtual String getName() const = 0;
|
||||
virtual void readPrefix();
|
||||
|
||||
/// If this stream generates data in order by some keys, return true.
|
||||
virtual bool isSortedOutput() const { return false; }
|
||||
/// In case of isSortedOutput, return corresponding SortDescription
|
||||
virtual const SortDescription & getSortDescription() const { throw Exception("Output of " + getName() + " is not sorted", ErrorCodes::OUTPUT_IS_NOT_SORTED); }
|
||||
|
||||
/** Must be called before read, readPrefix.
|
||||
/** The default implementation calls recursively readSuffix() on all children, and then readSuffixImpl() on itself.
|
||||
* If this stream calls read() in children in a separate thread, this behavior is usually incorrect:
|
||||
* readSuffix() of the child can not be called at the moment when the same child's read() is executed in another thread.
|
||||
* In this case, you need to override this method so that readSuffix() in children is called, for example, after connecting streams.
|
||||
*/
|
||||
virtual void readSuffix();
|
||||
|
||||
/// Must be called before `read()` and `readPrefix()`.
|
||||
void dumpTree(std::ostream & ostr, size_t indent = 0, size_t multiplier = 1) const;
|
||||
|
||||
/** Check the depth of the pipeline.
|
||||
* If max_depth is specified and the `depth` is greater - throw an exception.
|
||||
* Must be called before read, readPrefix.
|
||||
* Must be called before `read()` and `readPrefix()`.
|
||||
*/
|
||||
size_t checkDepth(size_t max_depth) const;
|
||||
size_t checkDepth(size_t max_depth) const { return checkDepthImpl(max_depth, max_depth); }
|
||||
|
||||
/** Do not allow to change the table while the blocks stream is alive.
|
||||
*/
|
||||
/// Do not allow to change the table while the blocks stream is alive.
|
||||
void addTableLock(const TableStructureReadLockPtr & lock) { table_locks.push_back(lock); }
|
||||
|
||||
/// Get information about execution speed.
|
||||
const BlockStreamProfileInfo & getProfileInfo() const { return info; }
|
||||
|
||||
/** Get "total" values.
|
||||
* The default implementation takes them from itself or from the first child source in which they are.
|
||||
* The overridden method can perform some calculations. For example, apply an expression to the `totals` of the child source.
|
||||
* There can be no total values - then an empty block is returned.
|
||||
*
|
||||
* Call this method only after all the data has been retrieved with `read`,
|
||||
* otherwise there will be problems if any data at the same time is computed in another thread.
|
||||
*/
|
||||
virtual Block getTotals();
|
||||
|
||||
/// The same for minimums and maximums.
|
||||
Block getExtremes();
|
||||
|
||||
|
||||
/** Set the execution progress bar callback.
|
||||
* The callback is passed to all child sources.
|
||||
* By default, it is called for leaf sources, after each block.
|
||||
* (But this can be overridden in the progress() method)
|
||||
* The function takes the number of rows in the last block, the number of bytes in the last block.
|
||||
* Note that the callback can be called from different threads.
|
||||
*/
|
||||
void setProgressCallback(const ProgressCallback & callback);
|
||||
|
||||
|
||||
/** In this method:
|
||||
* - the progress callback is called;
|
||||
* - the status of the query execution in ProcessList is updated;
|
||||
* - checks restrictions and quotas that should be checked not within the same source,
|
||||
* but over the total amount of resources spent in all sources at once (information in the ProcessList).
|
||||
*/
|
||||
virtual void progress(const Progress & value)
|
||||
{
|
||||
/// The data for progress is taken from leaf sources.
|
||||
if (children.empty())
|
||||
progressImpl(value);
|
||||
}
|
||||
|
||||
void progressImpl(const Progress & value);
|
||||
|
||||
|
||||
/** Set the pointer to the process list item.
|
||||
* It is passed to all child sources.
|
||||
* General information about the resources spent on the request will be written into it.
|
||||
* Based on this information, the quota and some restrictions will be checked.
|
||||
* This information will also be available in the SHOW PROCESSLIST request.
|
||||
*/
|
||||
void setProcessListElement(QueryStatus * elem);
|
||||
|
||||
/** Set the approximate total number of rows to read.
|
||||
*/
|
||||
void addTotalRowsApprox(size_t value) { total_rows_approx += value; }
|
||||
|
||||
|
||||
/** Ask to abort the receipt of data as soon as possible.
|
||||
* By default - just sets the flag is_cancelled and asks that all children be interrupted.
|
||||
* This function can be called several times, including simultaneously from different threads.
|
||||
* Have two modes:
|
||||
* with kill = false only is_cancelled is set - streams will stop silently with returning some processed data.
|
||||
* with kill = true also is_killed set - queries will stop with exception.
|
||||
*/
|
||||
virtual void cancel(bool kill);
|
||||
|
||||
bool isCancelled() const;
|
||||
bool isCancelledOrThrowIfKilled() const;
|
||||
|
||||
/** What limitations and quotas should be checked.
|
||||
* LIMITS_CURRENT - checks amount of data read by current stream only (BlockStreamProfileInfo is used for check).
|
||||
* Currently it is used in root streams to check max_result_{rows,bytes} limits.
|
||||
* LIMITS_TOTAL - checks total amount of read data from leaf streams (i.e. data read from disk and remote servers).
|
||||
* It is checks max_{rows,bytes}_to_read in progress handler and use info from ProcessListElement::progress_in for this.
|
||||
* Currently this check is performed only in leaf streams.
|
||||
*/
|
||||
enum LimitsMode
|
||||
{
|
||||
LIMITS_CURRENT,
|
||||
LIMITS_TOTAL,
|
||||
};
|
||||
|
||||
/// It is a subset of limitations from Limits.
|
||||
struct LocalLimits
|
||||
{
|
||||
LimitsMode mode = LIMITS_CURRENT;
|
||||
|
||||
SizeLimits size_limits;
|
||||
|
||||
Poco::Timespan max_execution_time = 0;
|
||||
OverflowMode timeout_overflow_mode = OverflowMode::THROW;
|
||||
|
||||
/// in rows per second
|
||||
size_t min_execution_speed = 0;
|
||||
/// Verify that the speed is not too low after the specified time has elapsed.
|
||||
Poco::Timespan timeout_before_checking_execution_speed = 0;
|
||||
};
|
||||
|
||||
/** Set limitations that checked on each block. */
|
||||
void setLimits(const LocalLimits & limits_)
|
||||
{
|
||||
limits = limits_;
|
||||
}
|
||||
|
||||
const LocalLimits & getLimits() const
|
||||
{
|
||||
return limits;
|
||||
}
|
||||
|
||||
/** Set the quota. If you set a quota on the amount of raw data,
|
||||
* then you should also set mode = LIMITS_TOTAL to LocalLimits with setLimits.
|
||||
*/
|
||||
void setQuota(QuotaForIntervals & quota_)
|
||||
{
|
||||
quota = "a_;
|
||||
}
|
||||
|
||||
/// Enable calculation of minimums and maximums by the result columns.
|
||||
void enableExtremes() { enabled_extremes = true; }
|
||||
|
||||
protected:
|
||||
BlockInputStreams children;
|
||||
std::shared_mutex children_mutex;
|
||||
|
||||
BlockStreamProfileInfo info;
|
||||
std::atomic<bool> is_cancelled{false};
|
||||
std::atomic<bool> is_killed{false};
|
||||
ProgressCallback progress_callback;
|
||||
QueryStatus * process_list_elem = nullptr;
|
||||
/// According to total_stopwatch in microseconds
|
||||
UInt64 last_profile_events_update_time = 0;
|
||||
|
||||
/// Additional information that can be generated during the work process.
|
||||
|
||||
/// Total values during aggregation.
|
||||
Block totals;
|
||||
/// Minimums and maximums. The first row of the block - minimums, the second - the maximums.
|
||||
Block extremes;
|
||||
|
||||
|
||||
void addChild(BlockInputStreamPtr & child)
|
||||
{
|
||||
std::unique_lock lock(children_mutex);
|
||||
children.push_back(child);
|
||||
}
|
||||
|
||||
private:
|
||||
TableStructureReadLocks table_locks;
|
||||
|
||||
bool enabled_extremes = false;
|
||||
|
||||
/// The limit on the number of rows/bytes has been exceeded, and you need to stop execution on the next `read` call, as if the thread has run out.
|
||||
bool limit_exceeded_need_break = false;
|
||||
|
||||
/// Limitations and quotas.
|
||||
|
||||
LocalLimits limits;
|
||||
|
||||
QuotaForIntervals * quota = nullptr; /// If nullptr - the quota is not used.
|
||||
double prev_elapsed = 0;
|
||||
|
||||
/// The approximate total number of rows to read. For progress bar.
|
||||
size_t total_rows_approx = 0;
|
||||
|
||||
/// The successors must implement this function.
|
||||
virtual Block readImpl() = 0;
|
||||
|
||||
/// Here you can do a preliminary initialization.
|
||||
virtual void readPrefixImpl() {}
|
||||
|
||||
/// Here you need to do a finalization, which can lead to an exception.
|
||||
virtual void readSuffixImpl() {}
|
||||
|
||||
void updateExtremes(Block & block);
|
||||
|
||||
/** Check limits and quotas.
|
||||
* But only those that can be checked within each separate stream.
|
||||
*/
|
||||
bool checkTimeLimit();
|
||||
void checkQuota(Block & block);
|
||||
|
||||
size_t checkDepthImpl(size_t max_depth, size_t level) const;
|
||||
|
||||
/// Get text with names of this source and the entire subtree.
|
||||
String getTreeID() const;
|
||||
|
||||
template <typename F>
|
||||
void forEachChild(F && f)
|
||||
@ -113,20 +314,6 @@ public:
|
||||
if (f(*child))
|
||||
return;
|
||||
}
|
||||
|
||||
protected:
|
||||
BlockInputStreams children;
|
||||
std::shared_mutex children_mutex;
|
||||
|
||||
private:
|
||||
TableStructureReadLocks table_locks;
|
||||
|
||||
size_t checkDepthImpl(size_t max_depth, size_t level) const;
|
||||
|
||||
/// Get text with names of this source and the entire subtree.
|
||||
String getTreeID() const;
|
||||
};
|
||||
|
||||
|
||||
}
|
||||
|
||||
|
@ -1,427 +0,0 @@
|
||||
#include <Interpreters/Quota.h>
|
||||
#include <Interpreters/ProcessList.h>
|
||||
#include <DataStreams/IProfilingBlockInputStream.h>
|
||||
#include <Common/CurrentThread.h>
|
||||
|
||||
|
||||
namespace ProfileEvents
|
||||
{
|
||||
extern const Event ThrottlerSleepMicroseconds;
|
||||
}
|
||||
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
namespace ErrorCodes
|
||||
{
|
||||
extern const int TOO_MANY_ROWS;
|
||||
extern const int TOO_MANY_BYTES;
|
||||
extern const int TOO_MANY_ROWS_OR_BYTES;
|
||||
extern const int TIMEOUT_EXCEEDED;
|
||||
extern const int TOO_SLOW;
|
||||
extern const int LOGICAL_ERROR;
|
||||
extern const int BLOCKS_HAVE_DIFFERENT_STRUCTURE;
|
||||
}
|
||||
|
||||
|
||||
IProfilingBlockInputStream::IProfilingBlockInputStream()
|
||||
{
|
||||
info.parent = this;
|
||||
}
|
||||
|
||||
Block IProfilingBlockInputStream::read()
|
||||
{
|
||||
if (total_rows_approx)
|
||||
{
|
||||
progressImpl(Progress(0, 0, total_rows_approx));
|
||||
total_rows_approx = 0;
|
||||
}
|
||||
|
||||
if (!info.started)
|
||||
{
|
||||
info.total_stopwatch.start();
|
||||
info.started = true;
|
||||
}
|
||||
|
||||
Block res;
|
||||
|
||||
if (isCancelledOrThrowIfKilled())
|
||||
return res;
|
||||
|
||||
if (!checkTimeLimit())
|
||||
limit_exceeded_need_break = true;
|
||||
|
||||
if (!limit_exceeded_need_break)
|
||||
res = readImpl();
|
||||
|
||||
if (res)
|
||||
{
|
||||
info.update(res);
|
||||
|
||||
if (enabled_extremes)
|
||||
updateExtremes(res);
|
||||
|
||||
if (limits.mode == LIMITS_CURRENT && !limits.size_limits.check(info.rows, info.bytes, "result", ErrorCodes::TOO_MANY_ROWS_OR_BYTES))
|
||||
limit_exceeded_need_break = true;
|
||||
|
||||
if (quota != nullptr)
|
||||
checkQuota(res);
|
||||
}
|
||||
else
|
||||
{
|
||||
/** If the thread is over, then we will ask all children to abort the execution.
|
||||
* This makes sense when running a query with LIMIT
|
||||
* - there is a situation when all the necessary data has already been read,
|
||||
* but children sources are still working,
|
||||
* herewith they can work in separate threads or even remotely.
|
||||
*/
|
||||
cancel(false);
|
||||
}
|
||||
|
||||
progress(Progress(res.rows(), res.bytes()));
|
||||
|
||||
#ifndef NDEBUG
|
||||
if (res)
|
||||
{
|
||||
Block header = getHeader();
|
||||
if (header)
|
||||
assertBlocksHaveEqualStructure(res, header, getName());
|
||||
}
|
||||
#endif
|
||||
|
||||
return res;
|
||||
}
|
||||
|
||||
|
||||
void IProfilingBlockInputStream::readPrefix()
|
||||
{
|
||||
readPrefixImpl();
|
||||
|
||||
forEachChild([&] (IBlockInputStream & child)
|
||||
{
|
||||
child.readPrefix();
|
||||
return false;
|
||||
});
|
||||
}
|
||||
|
||||
|
||||
void IProfilingBlockInputStream::readSuffix()
|
||||
{
|
||||
forEachChild([&] (IBlockInputStream & child)
|
||||
{
|
||||
child.readSuffix();
|
||||
return false;
|
||||
});
|
||||
|
||||
readSuffixImpl();
|
||||
}
|
||||
|
||||
|
||||
void IProfilingBlockInputStream::updateExtremes(Block & block)
|
||||
{
|
||||
size_t num_columns = block.columns();
|
||||
|
||||
if (!extremes)
|
||||
{
|
||||
MutableColumns extremes_columns(num_columns);
|
||||
|
||||
for (size_t i = 0; i < num_columns; ++i)
|
||||
{
|
||||
const ColumnPtr & src = block.safeGetByPosition(i).column;
|
||||
|
||||
if (src->isColumnConst())
|
||||
{
|
||||
/// Equal min and max.
|
||||
extremes_columns[i] = src->cloneResized(2);
|
||||
}
|
||||
else
|
||||
{
|
||||
Field min_value;
|
||||
Field max_value;
|
||||
|
||||
src->getExtremes(min_value, max_value);
|
||||
|
||||
extremes_columns[i] = src->cloneEmpty();
|
||||
|
||||
extremes_columns[i]->insert(min_value);
|
||||
extremes_columns[i]->insert(max_value);
|
||||
}
|
||||
}
|
||||
|
||||
extremes = block.cloneWithColumns(std::move(extremes_columns));
|
||||
}
|
||||
else
|
||||
{
|
||||
for (size_t i = 0; i < num_columns; ++i)
|
||||
{
|
||||
ColumnPtr & old_extremes = extremes.safeGetByPosition(i).column;
|
||||
|
||||
if (old_extremes->isColumnConst())
|
||||
continue;
|
||||
|
||||
Field min_value = (*old_extremes)[0];
|
||||
Field max_value = (*old_extremes)[1];
|
||||
|
||||
Field cur_min_value;
|
||||
Field cur_max_value;
|
||||
|
||||
block.safeGetByPosition(i).column->getExtremes(cur_min_value, cur_max_value);
|
||||
|
||||
if (cur_min_value < min_value)
|
||||
min_value = cur_min_value;
|
||||
if (cur_max_value > max_value)
|
||||
max_value = cur_max_value;
|
||||
|
||||
MutableColumnPtr new_extremes = old_extremes->cloneEmpty();
|
||||
|
||||
new_extremes->insert(min_value);
|
||||
new_extremes->insert(max_value);
|
||||
|
||||
old_extremes = std::move(new_extremes);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
static bool handleOverflowMode(OverflowMode mode, const String & message, int code)
|
||||
{
|
||||
switch (mode)
|
||||
{
|
||||
case OverflowMode::THROW:
|
||||
throw Exception(message, code);
|
||||
case OverflowMode::BREAK:
|
||||
return false;
|
||||
default:
|
||||
throw Exception("Logical error: unknown overflow mode", ErrorCodes::LOGICAL_ERROR);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
bool IProfilingBlockInputStream::checkTimeLimit()
|
||||
{
|
||||
if (limits.max_execution_time != 0
|
||||
&& info.total_stopwatch.elapsed() > static_cast<UInt64>(limits.max_execution_time.totalMicroseconds()) * 1000)
|
||||
return handleOverflowMode(limits.timeout_overflow_mode,
|
||||
"Timeout exceeded: elapsed " + toString(info.total_stopwatch.elapsedSeconds())
|
||||
+ " seconds, maximum: " + toString(limits.max_execution_time.totalMicroseconds() / 1000000.0),
|
||||
ErrorCodes::TIMEOUT_EXCEEDED);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
|
||||
void IProfilingBlockInputStream::checkQuota(Block & block)
|
||||
{
|
||||
switch (limits.mode)
|
||||
{
|
||||
case LIMITS_TOTAL:
|
||||
/// Checked in `progress` method.
|
||||
break;
|
||||
|
||||
case LIMITS_CURRENT:
|
||||
{
|
||||
time_t current_time = time(nullptr);
|
||||
double total_elapsed = info.total_stopwatch.elapsedSeconds();
|
||||
|
||||
quota->checkAndAddResultRowsBytes(current_time, block.rows(), block.bytes());
|
||||
quota->checkAndAddExecutionTime(current_time, Poco::Timespan((total_elapsed - prev_elapsed) * 1000000.0));
|
||||
|
||||
prev_elapsed = total_elapsed;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
void IProfilingBlockInputStream::progressImpl(const Progress & value)
|
||||
{
|
||||
if (progress_callback)
|
||||
progress_callback(value);
|
||||
|
||||
if (process_list_elem)
|
||||
{
|
||||
if (!process_list_elem->updateProgressIn(value))
|
||||
cancel(/* kill */ true);
|
||||
|
||||
/// The total amount of data processed or intended for processing in all leaf sources, possibly on remote servers.
|
||||
|
||||
ProgressValues progress = process_list_elem->getProgressIn();
|
||||
size_t total_rows_estimate = std::max(progress.rows, progress.total_rows);
|
||||
|
||||
/** Check the restrictions on the amount of data to read, the speed of the query, the quota on the amount of data to read.
|
||||
* NOTE: Maybe it makes sense to have them checked directly in ProcessList?
|
||||
*/
|
||||
|
||||
if (limits.mode == LIMITS_TOTAL
|
||||
&& ((limits.size_limits.max_rows && total_rows_estimate > limits.size_limits.max_rows)
|
||||
|| (limits.size_limits.max_bytes && progress.bytes > limits.size_limits.max_bytes)))
|
||||
{
|
||||
switch (limits.size_limits.overflow_mode)
|
||||
{
|
||||
case OverflowMode::THROW:
|
||||
{
|
||||
if (limits.size_limits.max_rows && total_rows_estimate > limits.size_limits.max_rows)
|
||||
throw Exception("Limit for rows to read exceeded: " + toString(total_rows_estimate)
|
||||
+ " rows read (or to read), maximum: " + toString(limits.size_limits.max_rows),
|
||||
ErrorCodes::TOO_MANY_ROWS);
|
||||
else
|
||||
throw Exception("Limit for (uncompressed) bytes to read exceeded: " + toString(progress.bytes)
|
||||
+ " bytes read, maximum: " + toString(limits.size_limits.max_bytes),
|
||||
ErrorCodes::TOO_MANY_BYTES);
|
||||
}
|
||||
|
||||
case OverflowMode::BREAK:
|
||||
{
|
||||
/// For `break`, we will stop only if so many rows were actually read, and not just supposed to be read.
|
||||
if ((limits.size_limits.max_rows && progress.rows > limits.size_limits.max_rows)
|
||||
|| (limits.size_limits.max_bytes && progress.bytes > limits.size_limits.max_bytes))
|
||||
{
|
||||
cancel(false);
|
||||
}
|
||||
|
||||
break;
|
||||
}
|
||||
|
||||
default:
|
||||
throw Exception("Logical error: unknown overflow mode", ErrorCodes::LOGICAL_ERROR);
|
||||
}
|
||||
}
|
||||
|
||||
size_t total_rows = progress.total_rows;
|
||||
|
||||
constexpr UInt64 profile_events_update_period_microseconds = 10 * 1000; // 10 milliseconds
|
||||
UInt64 total_elapsed_microseconds = info.total_stopwatch.elapsedMicroseconds();
|
||||
|
||||
if (last_profile_events_update_time + profile_events_update_period_microseconds < total_elapsed_microseconds)
|
||||
{
|
||||
CurrentThread::updatePerformanceCounters();
|
||||
last_profile_events_update_time = total_elapsed_microseconds;
|
||||
}
|
||||
|
||||
if ((limits.min_execution_speed || (total_rows && limits.timeout_before_checking_execution_speed != 0))
|
||||
&& (static_cast<Int64>(total_elapsed_microseconds) > limits.timeout_before_checking_execution_speed.totalMicroseconds()))
|
||||
{
|
||||
/// Do not count sleeps in throttlers
|
||||
UInt64 throttler_sleep_microseconds = CurrentThread::getProfileEvents()[ProfileEvents::ThrottlerSleepMicroseconds];
|
||||
double elapsed_seconds = (throttler_sleep_microseconds > total_elapsed_microseconds)
|
||||
? 0.0 : (total_elapsed_microseconds - throttler_sleep_microseconds) / 1000000.0;
|
||||
|
||||
if (elapsed_seconds > 0)
|
||||
{
|
||||
if (limits.min_execution_speed && progress.rows / elapsed_seconds < limits.min_execution_speed)
|
||||
throw Exception("Query is executing too slow: " + toString(progress.rows / elapsed_seconds)
|
||||
+ " rows/sec., minimum: " + toString(limits.min_execution_speed),
|
||||
ErrorCodes::TOO_SLOW);
|
||||
|
||||
/// If the predicted execution time is longer than `max_execution_time`.
|
||||
if (limits.max_execution_time != 0 && total_rows)
|
||||
{
|
||||
double estimated_execution_time_seconds = elapsed_seconds * (static_cast<double>(total_rows) / progress.rows);
|
||||
|
||||
if (estimated_execution_time_seconds > limits.max_execution_time.totalSeconds())
|
||||
throw Exception("Estimated query execution time (" + toString(estimated_execution_time_seconds) + " seconds)"
|
||||
+ " is too long. Maximum: " + toString(limits.max_execution_time.totalSeconds())
|
||||
+ ". Estimated rows to process: " + toString(total_rows),
|
||||
ErrorCodes::TOO_SLOW);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (quota != nullptr && limits.mode == LIMITS_TOTAL)
|
||||
{
|
||||
quota->checkAndAddReadRowsBytes(time(nullptr), value.rows, value.bytes);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
void IProfilingBlockInputStream::cancel(bool kill)
|
||||
{
|
||||
if (kill)
|
||||
is_killed = true;
|
||||
|
||||
bool old_val = false;
|
||||
if (!is_cancelled.compare_exchange_strong(old_val, true, std::memory_order_seq_cst, std::memory_order_relaxed))
|
||||
return;
|
||||
|
||||
forEachProfilingChild([&] (IProfilingBlockInputStream & child)
|
||||
{
|
||||
child.cancel(kill);
|
||||
return false;
|
||||
});
|
||||
}
|
||||
|
||||
|
||||
bool IProfilingBlockInputStream::isCancelled() const
|
||||
{
|
||||
return is_cancelled;
|
||||
}
|
||||
|
||||
bool IProfilingBlockInputStream::isCancelledOrThrowIfKilled() const
|
||||
{
|
||||
if (!is_cancelled)
|
||||
return false;
|
||||
if (is_killed)
|
||||
throw Exception("Query was cancelled", ErrorCodes::QUERY_WAS_CANCELLED);
|
||||
return true;
|
||||
}
|
||||
|
||||
|
||||
void IProfilingBlockInputStream::setProgressCallback(const ProgressCallback & callback)
|
||||
{
|
||||
progress_callback = callback;
|
||||
|
||||
forEachProfilingChild([&] (IProfilingBlockInputStream & child)
|
||||
{
|
||||
child.setProgressCallback(callback);
|
||||
return false;
|
||||
});
|
||||
}
|
||||
|
||||
|
||||
void IProfilingBlockInputStream::setProcessListElement(QueryStatus * elem)
|
||||
{
|
||||
process_list_elem = elem;
|
||||
|
||||
forEachProfilingChild([&] (IProfilingBlockInputStream & child)
|
||||
{
|
||||
child.setProcessListElement(elem);
|
||||
return false;
|
||||
});
|
||||
}
|
||||
|
||||
|
||||
Block IProfilingBlockInputStream::getTotals()
|
||||
{
|
||||
if (totals)
|
||||
return totals;
|
||||
|
||||
Block res;
|
||||
forEachProfilingChild([&] (IProfilingBlockInputStream & child)
|
||||
{
|
||||
res = child.getTotals();
|
||||
if (res)
|
||||
return true;
|
||||
return false;
|
||||
});
|
||||
return res;
|
||||
}
|
||||
|
||||
Block IProfilingBlockInputStream::getExtremes()
|
||||
{
|
||||
if (extremes)
|
||||
return extremes;
|
||||
|
||||
Block res;
|
||||
forEachProfilingChild([&] (IProfilingBlockInputStream & child)
|
||||
{
|
||||
res = child.getExtremes();
|
||||
if (res)
|
||||
return true;
|
||||
return false;
|
||||
});
|
||||
return res;
|
||||
}
|
||||
|
||||
}
|
@ -1,247 +0,0 @@
|
||||
#pragma once
|
||||
|
||||
#include <IO/Progress.h>
|
||||
|
||||
#include <DataStreams/BlockStreamProfileInfo.h>
|
||||
#include <DataStreams/IBlockInputStream.h>
|
||||
#include <DataStreams/SizeLimits.h>
|
||||
|
||||
#include <Interpreters/SettingsCommon.h>
|
||||
|
||||
#include <atomic>
|
||||
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
namespace ErrorCodes
|
||||
{
|
||||
extern const int QUERY_WAS_CANCELLED;
|
||||
}
|
||||
|
||||
class QuotaForIntervals;
|
||||
class QueryStatus;
|
||||
class ProcessListElement;
|
||||
class IProfilingBlockInputStream;
|
||||
|
||||
using ProfilingBlockInputStreamPtr = std::shared_ptr<IProfilingBlockInputStream>;
|
||||
|
||||
|
||||
/** Watches out at how the source of the blocks works.
|
||||
* Lets you get information for profiling:
|
||||
* rows per second, blocks per second, megabytes per second, etc.
|
||||
* Allows you to stop reading data (in nested sources).
|
||||
*/
|
||||
class IProfilingBlockInputStream : public IBlockInputStream
|
||||
{
|
||||
friend struct BlockStreamProfileInfo;
|
||||
|
||||
public:
|
||||
IProfilingBlockInputStream();
|
||||
|
||||
Block read() override final;
|
||||
|
||||
/** The default implementation calls readPrefixImpl() on itself, and then readPrefix() recursively for all children.
|
||||
* There are cases when you do not want `readPrefix` of children to be called synchronously, in this function,
|
||||
* but you want them to be called, for example, in separate threads (for parallel initialization of children).
|
||||
* Then overload `readPrefix` function.
|
||||
*/
|
||||
void readPrefix() override;
|
||||
|
||||
/** The default implementation calls recursively readSuffix() on all children, and then readSuffixImpl() on itself.
|
||||
* If this stream calls read() in children in a separate thread, this behavior is usually incorrect:
|
||||
* readSuffix() of the child can not be called at the moment when the same child's read() is executed in another thread.
|
||||
* In this case, you need to override this method so that readSuffix() in children is called, for example, after connecting streams.
|
||||
*/
|
||||
void readSuffix() override;
|
||||
|
||||
/// Get information about execution speed.
|
||||
const BlockStreamProfileInfo & getProfileInfo() const { return info; }
|
||||
|
||||
/** Get "total" values.
|
||||
* The default implementation takes them from itself or from the first child source in which they are.
|
||||
* The overridden method can perform some calculations. For example, apply an expression to the `totals` of the child source.
|
||||
* There can be no total values - then an empty block is returned.
|
||||
*
|
||||
* Call this method only after all the data has been retrieved with `read`,
|
||||
* otherwise there will be problems if any data at the same time is computed in another thread.
|
||||
*/
|
||||
virtual Block getTotals();
|
||||
|
||||
/// The same for minimums and maximums.
|
||||
Block getExtremes();
|
||||
|
||||
|
||||
/** Set the execution progress bar callback.
|
||||
* The callback is passed to all child sources.
|
||||
* By default, it is called for leaf sources, after each block.
|
||||
* (But this can be overridden in the progress() method)
|
||||
* The function takes the number of rows in the last block, the number of bytes in the last block.
|
||||
* Note that the callback can be called from different threads.
|
||||
*/
|
||||
void setProgressCallback(const ProgressCallback & callback);
|
||||
|
||||
|
||||
/** In this method:
|
||||
* - the progress callback is called;
|
||||
* - the status of the query execution in ProcessList is updated;
|
||||
* - checks restrictions and quotas that should be checked not within the same source,
|
||||
* but over the total amount of resources spent in all sources at once (information in the ProcessList).
|
||||
*/
|
||||
virtual void progress(const Progress & value)
|
||||
{
|
||||
/// The data for progress is taken from leaf sources.
|
||||
if (children.empty())
|
||||
progressImpl(value);
|
||||
}
|
||||
|
||||
void progressImpl(const Progress & value);
|
||||
|
||||
|
||||
/** Set the pointer to the process list item.
|
||||
* It is passed to all child sources.
|
||||
* General information about the resources spent on the request will be written into it.
|
||||
* Based on this information, the quota and some restrictions will be checked.
|
||||
* This information will also be available in the SHOW PROCESSLIST request.
|
||||
*/
|
||||
void setProcessListElement(QueryStatus * elem);
|
||||
|
||||
/** Set the approximate total number of rows to read.
|
||||
*/
|
||||
void addTotalRowsApprox(size_t value) { total_rows_approx += value; }
|
||||
|
||||
|
||||
/** Ask to abort the receipt of data as soon as possible.
|
||||
* By default - just sets the flag is_cancelled and asks that all children be interrupted.
|
||||
* This function can be called several times, including simultaneously from different threads.
|
||||
* Have two modes:
|
||||
* with kill = false only is_cancelled is set - streams will stop silently with returning some processed data.
|
||||
* with kill = true also is_killed set - queries will stop with exception.
|
||||
*/
|
||||
virtual void cancel(bool kill);
|
||||
|
||||
bool isCancelled() const;
|
||||
bool isCancelledOrThrowIfKilled() const;
|
||||
|
||||
/** What limitations and quotas should be checked.
|
||||
* LIMITS_CURRENT - checks amount of data read by current stream only (BlockStreamProfileInfo is used for check).
|
||||
* Currently it is used in root streams to check max_result_{rows,bytes} limits.
|
||||
* LIMITS_TOTAL - checks total amount of read data from leaf streams (i.e. data read from disk and remote servers).
|
||||
* It is checks max_{rows,bytes}_to_read in progress handler and use info from ProcessListElement::progress_in for this.
|
||||
* Currently this check is performed only in leaf streams.
|
||||
*/
|
||||
enum LimitsMode
|
||||
{
|
||||
LIMITS_CURRENT,
|
||||
LIMITS_TOTAL,
|
||||
};
|
||||
|
||||
/// It is a subset of limitations from Limits.
|
||||
struct LocalLimits
|
||||
{
|
||||
LimitsMode mode = LIMITS_CURRENT;
|
||||
|
||||
SizeLimits size_limits;
|
||||
|
||||
Poco::Timespan max_execution_time = 0;
|
||||
OverflowMode timeout_overflow_mode = OverflowMode::THROW;
|
||||
|
||||
/// in rows per second
|
||||
size_t min_execution_speed = 0;
|
||||
/// Verify that the speed is not too low after the specified time has elapsed.
|
||||
Poco::Timespan timeout_before_checking_execution_speed = 0;
|
||||
};
|
||||
|
||||
/** Set limitations that checked on each block. */
|
||||
void setLimits(const LocalLimits & limits_)
|
||||
{
|
||||
limits = limits_;
|
||||
}
|
||||
|
||||
const LocalLimits & getLimits() const
|
||||
{
|
||||
return limits;
|
||||
}
|
||||
|
||||
/** Set the quota. If you set a quota on the amount of raw data,
|
||||
* then you should also set mode = LIMITS_TOTAL to LocalLimits with setLimits.
|
||||
*/
|
||||
void setQuota(QuotaForIntervals & quota_)
|
||||
{
|
||||
quota = "a_;
|
||||
}
|
||||
|
||||
/// Enable calculation of minimums and maximums by the result columns.
|
||||
void enableExtremes() { enabled_extremes = true; }
|
||||
|
||||
protected:
|
||||
BlockStreamProfileInfo info;
|
||||
std::atomic<bool> is_cancelled{false};
|
||||
std::atomic<bool> is_killed{false};
|
||||
ProgressCallback progress_callback;
|
||||
QueryStatus * process_list_elem = nullptr;
|
||||
/// According to total_stopwatch in microseconds
|
||||
UInt64 last_profile_events_update_time = 0;
|
||||
|
||||
/// Additional information that can be generated during the work process.
|
||||
|
||||
/// Total values during aggregation.
|
||||
Block totals;
|
||||
/// Minimums and maximums. The first row of the block - minimums, the second - the maximums.
|
||||
Block extremes;
|
||||
|
||||
|
||||
void addChild(BlockInputStreamPtr & child)
|
||||
{
|
||||
std::unique_lock lock(children_mutex);
|
||||
children.push_back(child);
|
||||
}
|
||||
|
||||
private:
|
||||
bool enabled_extremes = false;
|
||||
|
||||
/// The limit on the number of rows/bytes has been exceeded, and you need to stop execution on the next `read` call, as if the thread has run out.
|
||||
bool limit_exceeded_need_break = false;
|
||||
|
||||
/// Limitations and quotas.
|
||||
|
||||
LocalLimits limits;
|
||||
|
||||
QuotaForIntervals * quota = nullptr; /// If nullptr - the quota is not used.
|
||||
double prev_elapsed = 0;
|
||||
|
||||
/// The approximate total number of rows to read. For progress bar.
|
||||
size_t total_rows_approx = 0;
|
||||
|
||||
/// The successors must implement this function.
|
||||
virtual Block readImpl() = 0;
|
||||
|
||||
/// Here you can do a preliminary initialization.
|
||||
virtual void readPrefixImpl() {}
|
||||
|
||||
/// Here you need to do a finalization, which can lead to an exception.
|
||||
virtual void readSuffixImpl() {}
|
||||
|
||||
void updateExtremes(Block & block);
|
||||
|
||||
/** Check limits and quotas.
|
||||
* But only those that can be checked within each separate stream.
|
||||
*/
|
||||
bool checkTimeLimit();
|
||||
void checkQuota(Block & block);
|
||||
|
||||
|
||||
template <typename F>
|
||||
void forEachProfilingChild(F && f)
|
||||
{
|
||||
/// NOTE: Acquire a read lock, therefore f() should be thread safe
|
||||
std::shared_lock lock(children_mutex);
|
||||
|
||||
for (auto & child : children)
|
||||
if (IProfilingBlockInputStream * p_child = dynamic_cast<IProfilingBlockInputStream *>(child.get()))
|
||||
if (f(*p_child))
|
||||
return;
|
||||
}
|
||||
};
|
||||
|
||||
}
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue
Block a user