diff --git a/.travis.yml b/.travis.yml index 705b6977114..d658b8d285c 100644 --- a/.travis.yml +++ b/.travis.yml @@ -3,26 +3,6 @@ language: generic matrix: fast_finish: true include: -# - os: linux -# -# cache: -# ccache: true -# timeout: 1000 -# -# addons: -# apt: -# update: true -# sources: -# - ubuntu-toolchain-r-test -# packages: [ g++-7, libicu-dev, libreadline-dev, libmysqlclient-dev, unixodbc-dev, libltdl-dev, libssl-dev, libboost-dev, zlib1g-dev, libdouble-conversion-dev, libsparsehash-dev, librdkafka-dev, libcapnp-dev, libsparsehash-dev, libgoogle-perftools-dev, bash, expect, python, python-lxml, python-termcolor, curl, perl, sudo, openssl ] -# -# env: -# - MATRIX_EVAL="export CC=gcc-7 && export CXX=g++-7" -# -# script: -# - env TEST_RUN= utils/travis/normal.sh - - # We need to have gcc7 headers to compile c++17 code on clang - os: linux @@ -41,33 +21,11 @@ matrix: packages: [ ninja-build, g++-7, clang-5.0, lld-5.0, libicu-dev, libreadline-dev, libmysqlclient-dev, unixodbc-dev, libltdl-dev, libssl-dev, libboost-dev, zlib1g-dev, libdouble-conversion-dev, libsparsehash-dev, librdkafka-dev, libcapnp-dev, libsparsehash-dev, libgoogle-perftools-dev, bash, expect, python, python-lxml, python-termcolor, curl, perl, sudo, openssl] env: - - MATRIX_EVAL="export CC=clang-5.0 && export CXX=clang++-5.0" + - MATRIX_EVAL="export CC=clang-5.0 CXX=clang++-5.0" script: - utils/travis/normal.sh - -# TODO: fix internal compiler -# - os: linux -# -# sudo: required -# -# cache: -# timeout: 1000 -# directories: -# - /var/cache/pbuilder/ccache -# -# addons: -# apt: -# packages: [ pbuilder, fakeroot, debhelper ] -# -# env: -# - MATRIX_EVAL="export DEB_CC=clang-5.0 && export DEB_CXX=clang++-5.0" -# -# script: -# - utils/travis/pbuilder.sh - - - os: linux sudo: required @@ -85,69 +43,6 @@ matrix: script: - utils/travis/pbuilder.sh - -# - os: linux -# -# sudo: required -# -# cache: -# timeout: 1000 -# directories: -# - /var/cache/pbuilder/ccache -# -# addons: -# apt: -# update: true -# packages: [ pbuilder, fakeroot, debhelper ] -# -# env: -# - MATRIX_EVAL="export ARCH=i386" -# -# script: -# - env PBUILDER_TIMEOUT=40m TEST_TRUE=true TEST_RUN= utils/travis/pbuilder.sh - - -# TODO: Can't bootstrap bionic on trusty host -# - os: linux -# -# sudo: required -# -# cache: -# timeout: 1000 -# directories: -# - /var/cache/pbuilder/ccache -# -# addons: -# apt: -# update: true -# packages: [ pbuilder, fakeroot, debhelper ] -# -# env: -# - MATRIX_EVAL="export DEB_CC=clang-6.0 && export DEB_CXX=clang++-6.0 && export DIST=bionic && export EXTRAPACKAGES='clang-6.0 lld-6.0'" -# -# script: -# - utils/travis/pbuilder.sh - - -# Cant fit to time limit (48min) -# - os: osx -# osx_image: xcode9.2 -# -# cache: -# ccache: true -# timeout: 1000 -# -# before_install: -# - brew install unixodbc gcc ccache libtool gettext zlib readline double-conversion gperftools google-sparsehash lz4 zstd || true -# - brew link --overwrite gcc || true -# -# env: -# - MATRIX_EVAL="export CC=gcc-8 && export CXX=g++-8" -# -# script: -# - env CMAKE_FLAGS="-DUSE_INTERNAL_BOOST_LIBRARY=1" utils/travis/normal.sh - - allow_failures: - os: osx diff --git a/CMakeLists.txt b/CMakeLists.txt index 68e293a5d7c..256f8a828a2 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -61,10 +61,6 @@ if (CMAKE_CXX_COMPILER_ID STREQUAL "Clang") set (COMMON_WARNING_FLAGS "${COMMON_WARNING_FLAGS} -Wno-unused-command-line-argument") endif () -if (ARCH_LINUX) - set (CXX11_ABI "ENABLE" CACHE STRING "Use C++11 ABI: DEFAULT, ENABLE, DISABLE") -endif () - option (TEST_COVERAGE "Enables flags for test coverage" OFF) option (ENABLE_TESTS "Enables tests" ${NOT_MSVC}) @@ -86,7 +82,7 @@ endif () if (CMAKE_LIBRARY_ARCHITECTURE MATCHES "amd64.*|x86_64.*|AMD64.*") option (USE_INTERNAL_MEMCPY "Use internal implementation of 'memcpy' function instead of provided by libc. Only for x86_64." ON) - if (ARCH_LINUX) + if (OS_LINUX) option (GLIBC_COMPATIBILITY "Set to TRUE to enable compatibility with older glibc libraries. Only for x86_64, Linux. Implies USE_INTERNAL_MEMCPY." ON) endif() endif () @@ -95,15 +91,7 @@ if (GLIBC_COMPATIBILITY) set (USE_INTERNAL_MEMCPY ON) endif () -if (CXX11_ABI STREQUAL ENABLE) - set (CXX11_ABI_FLAGS "-D_GLIBCXX_USE_CXX11_ABI=1") -elseif (CXX11_ABI STREQUAL DISABLE) - set (CXX11_ABI_FLAGS "-D_GLIBCXX_USE_CXX11_ABI=0") -else () - set (CXX11_ABI_FLAGS "") -endif () - -set (COMPILER_FLAGS "${COMPILER_FLAGS} ${CXX11_ABI_FLAGS}") +set (COMPILER_FLAGS "${COMPILER_FLAGS}") string(REGEX MATCH "-?[0-9]+(.[0-9]+)?$" COMPILER_POSTFIX ${CMAKE_CXX_COMPILER}) @@ -150,26 +138,29 @@ else () endif () set (CMAKE_BUILD_COLOR_MAKEFILE ON) -set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${COMPILER_FLAGS} ${PLATFORM_EXTRA_CXX_FLAG} -fno-omit-frame-pointer ${COMMON_WARNING_FLAGS} ${CXX_WARNING_FLAGS} ${GLIBC_COMPATIBILITY_COMPILE_FLAGS}") +set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${COMPILER_FLAGS} ${PLATFORM_EXTRA_CXX_FLAG} -fno-omit-frame-pointer ${COMMON_WARNING_FLAGS} ${CXX_WARNING_FLAGS}") #set (CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} ${CMAKE_CXX_FLAGS_ADD}") set (CMAKE_CXX_FLAGS_RELWITHDEBINFO "${CMAKE_CXX_FLAGS_RELWITHDEBINFO} -O3 ${CMAKE_CXX_FLAGS_ADD}") set (CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -O0 -g3 -ggdb3 -fno-inline ${CMAKE_CXX_FLAGS_ADD}") -set (CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${COMPILER_FLAGS} -fno-omit-frame-pointer ${COMMON_WARNING_FLAGS} ${GLIBC_COMPATIBILITY_COMPILE_FLAGS} ${CMAKE_C_FLAGS_ADD}") +set (CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${COMPILER_FLAGS} -fno-omit-frame-pointer ${COMMON_WARNING_FLAGS} ${CMAKE_C_FLAGS_ADD}") #set (CMAKE_C_FLAGS_RELEASE "${CMAKE_C_FLAGS_RELEASE} ${CMAKE_C_FLAGS_ADD}") set (CMAKE_C_FLAGS_RELWITHDEBINFO "${CMAKE_C_FLAGS_RELWITHDEBINFO} -O3 ${CMAKE_C_FLAGS_ADD}") set (CMAKE_C_FLAGS_DEBUG "${CMAKE_C_FLAGS_DEBUG} -O0 -g3 -ggdb3 -fno-inline ${CMAKE_C_FLAGS_ADD}") -if (MAKE_STATIC_LIBRARIES AND NOT APPLE AND NOT (CMAKE_CXX_COMPILER_ID STREQUAL "Clang" AND ARCH_FREEBSD)) +if (MAKE_STATIC_LIBRARIES AND NOT APPLE AND NOT (CMAKE_CXX_COMPILER_ID STREQUAL "Clang" AND OS_FREEBSD)) set (CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -static-libgcc -static-libstdc++") + + # Along with executables, we also build example of shared library for "library dictionary source"; and it also should be self-contained. + set (CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} -static-libgcc -static-libstdc++") endif () set(THREADS_PREFER_PTHREAD_FLAG ON) include (cmake/test_compiler.cmake) -if (ARCH_LINUX AND CMAKE_CXX_COMPILER_ID STREQUAL "Clang") - set (CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} ${GLIBC_COMPATIBILITY_LINK_FLAGS} ${CXX11_ABI_FLAGS}") +if (OS_LINUX AND CMAKE_CXX_COMPILER_ID STREQUAL "Clang") + set (CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS}") option (USE_LIBCXX "Use libc++ and libc++abi instead of libstdc++ (only make sense on Linux with Clang)" ${HAVE_LIBCXX}) set (LIBCXX_PATH "" CACHE STRING "Use custom path for libc++. It should be used for MSan.") @@ -237,7 +228,7 @@ else () set(NOT_UNBUNDLED 1) endif () # Using system libs can cause lot of warnings in includes. -if (UNBUNDLED OR NOT (ARCH_LINUX OR APPLE) OR ARCH_32) +if (UNBUNDLED OR NOT (OS_LINUX OR APPLE) OR ARCH_32) option (NO_WERROR "Disable -Werror compiler option" ON) endif () @@ -246,19 +237,15 @@ message (STATUS "Building for: ${CMAKE_SYSTEM} ${CMAKE_SYSTEM_PROCESSOR} ${CMAKE include(GNUInstallDirs) include (cmake/find_ssl.cmake) -if (NOT OPENSSL_FOUND) - message (FATAL_ERROR "Need openssl for build. debian tip: sudo apt install libssl-dev") -endif () - include (cmake/lib_name.cmake) include (cmake/find_icu4c.cmake) include (cmake/find_boost.cmake) -# openssl, zlib before poco include (cmake/find_zlib.cmake) include (cmake/find_zstd.cmake) include (cmake/find_ltdl.cmake) # for odbc include (cmake/find_termcap.cmake) include (cmake/find_odbc.cmake) +# openssl, zlib, odbc before poco include (cmake/find_poco.cmake) include (cmake/find_lz4.cmake) include (cmake/find_sparsehash.cmake) @@ -278,7 +265,6 @@ include (cmake/find_contrib_lib.cmake) find_contrib_lib(cityhash) find_contrib_lib(farmhash) find_contrib_lib(metrohash) -find_contrib_lib(murmurhash2) find_contrib_lib(btrie) find_contrib_lib(double-conversion) diff --git a/ci/install-libraries.sh b/ci/install-libraries.sh index ee2e5bc4cb0..d7fb856dbed 100755 --- a/ci/install-libraries.sh +++ b/ci/install-libraries.sh @@ -3,7 +3,6 @@ set -e -x source default-config -./install-os-packages.sh libssl-dev ./install-os-packages.sh libicu-dev ./install-os-packages.sh libreadline-dev diff --git a/ci/install-os-packages.sh b/ci/install-os-packages.sh index 66644ba4616..fe5b4f84833 100755 --- a/ci/install-os-packages.sh +++ b/ci/install-os-packages.sh @@ -43,9 +43,6 @@ case $PACKAGE_MANAGER in jq) $SUDO apt-get install -y jq ;; - libssl-dev) - $SUDO apt-get install -y libssl-dev - ;; libicu-dev) $SUDO apt-get install -y libicu-dev ;; @@ -91,9 +88,6 @@ case $PACKAGE_MANAGER in jq) $SUDO yum install -y jq ;; - libssl-dev) - $SUDO yum install -y openssl-devel - ;; libicu-dev) $SUDO yum install -y libicu-devel ;; @@ -133,9 +127,6 @@ case $PACKAGE_MANAGER in jq) $SUDO pkg install -y jq ;; - libssl-dev) - $SUDO pkg install -y openssl - ;; libicu-dev) $SUDO pkg install -y icu ;; diff --git a/ci/jobs/quick-build/run.sh b/ci/jobs/quick-build/run.sh index 5fe57457645..0eefa72caa7 100755 --- a/ci/jobs/quick-build/run.sh +++ b/ci/jobs/quick-build/run.sh @@ -21,7 +21,7 @@ BUILD_TARGETS=clickhouse BUILD_TYPE=Debug ENABLE_EMBEDDED_COMPILER=0 -CMAKE_FLAGS="-D CMAKE_C_FLAGS_ADD=-g0 -D CMAKE_CXX_FLAGS_ADD=-g0 -D ENABLE_TCMALLOC=0 -D ENABLE_CAPNP=0 -D ENABLE_RDKAFKA=0 -D ENABLE_UNWIND=0 -D ENABLE_ICU=0 -D ENABLE_POCO_MONGODB=0 -D ENABLE_POCO_NETSSL=0 -D ENABLE_POCO_ODBC=0 -D ENABLE_MYSQL=0" +CMAKE_FLAGS="-D CMAKE_C_FLAGS_ADD=-g0 -D CMAKE_CXX_FLAGS_ADD=-g0 -D ENABLE_TCMALLOC=0 -D ENABLE_CAPNP=0 -D ENABLE_RDKAFKA=0 -D ENABLE_UNWIND=0 -D ENABLE_ICU=0 -D ENABLE_POCO_MONGODB=0 -D ENABLE_POCO_NETSSL=0 -D ENABLE_POCO_ODBC=0 -D ENABLE_ODBC=0 -D ENABLE_MYSQL=0" [[ $(uname) == "FreeBSD" ]] && COMPILER_PACKAGE_VERSION=devel && export COMPILER_PATH=/usr/local/bin diff --git a/cmake/Modules/FindODBC.cmake b/cmake/Modules/FindODBC.cmake new file mode 100644 index 00000000000..66d43e93d2d --- /dev/null +++ b/cmake/Modules/FindODBC.cmake @@ -0,0 +1,88 @@ +# This file copied from contrib/poco/cmake/FindODBC.cmake to allow build without submodules + +# +# Find the ODBC driver manager includes and library. +# +# ODBC is an open standard for connecting to different databases in a +# semi-vendor-independent fashion. First you install the ODBC driver +# manager. Then you need a driver for each separate database you want +# to connect to (unless a generic one works). VTK includes neither +# the driver manager nor the vendor-specific drivers: you have to find +# those yourself. +# +# This module defines +# ODBC_INCLUDE_DIRECTORIES, where to find sql.h +# ODBC_LIBRARIES, the libraries to link against to use ODBC +# ODBC_FOUND. If false, you cannot build anything that requires ODBC. + +option (ENABLE_ODBC "Enable ODBC" ${OS_LINUX}) +if (OS_LINUX) + option (USE_INTERNAL_ODBC_LIBRARY "Set to FALSE to use system odbc library instead of bundled" ${NOT_UNBUNDLED}) +else () + option (USE_INTERNAL_ODBC_LIBRARY "Set to FALSE to use system odbc library instead of bundled" OFF) +endif () + +if (USE_INTERNAL_ODBC_LIBRARY AND NOT EXISTS "${ClickHouse_SOURCE_DIR}/contrib/unixodbc/README") + message (WARNING "submodule contrib/unixodbc is missing. to fix try run: \n git submodule update --init --recursive") + set (USE_INTERNAL_ODBC_LIBRARY 0) +endif () + +if (ENABLE_ODBC) + if (USE_INTERNAL_ODBC_LIBRARY) + set (ODBC_LIBRARIES unixodbc) + set (ODBC_INCLUDE_DIRECTORIES ${CMAKE_SOURCE_DIR}/contrib/unixodbc/include) + set (ODBC_FOUND 1) + set (USE_ODBC 1) + else () + find_path(ODBC_INCLUDE_DIRECTORIES + NAMES sql.h + HINTS + /usr/include + /usr/include/iodbc + /usr/include/odbc + /usr/local/include + /usr/local/include/iodbc + /usr/local/include/odbc + /usr/local/iodbc/include + /usr/local/odbc/include + "C:/Program Files/ODBC/include" + "C:/Program Files/Microsoft SDKs/Windows/v7.0/include" + "C:/Program Files/Microsoft SDKs/Windows/v6.0a/include" + "C:/ODBC/include" + DOC "Specify the directory containing sql.h." + ) + + find_library(ODBC_LIBRARIES + NAMES iodbc odbc iodbcinst odbcinst odbc32 + HINTS + /usr/lib + /usr/lib/iodbc + /usr/lib/odbc + /usr/local/lib + /usr/local/lib/iodbc + /usr/local/lib/odbc + /usr/local/iodbc/lib + /usr/local/odbc/lib + "C:/Program Files/ODBC/lib" + "C:/ODBC/lib/debug" + "C:/Program Files (x86)/Microsoft SDKs/Windows/v7.0A/Lib" + DOC "Specify the ODBC driver manager library here." + ) + + # MinGW find usually fails + if(MINGW) + set(ODBC_INCLUDE_DIRECTORIES ".") + set(ODBC_LIBRARIES odbc32) + endif() + + include(FindPackageHandleStandardArgs) + find_package_handle_standard_args(ODBC + DEFAULT_MSG + ODBC_INCLUDE_DIRECTORIES + ODBC_LIBRARIES) + + mark_as_advanced(ODBC_FOUND ODBC_LIBRARIES ODBC_INCLUDE_DIRECTORIES) + endif () +endif () + +message (STATUS "Using odbc: ${ODBC_INCLUDE_DIRECTORIES} : ${ODBC_LIBRARIES}") diff --git a/cmake/arch.cmake b/cmake/arch.cmake index ba446d95676..65361386035 100644 --- a/cmake/arch.cmake +++ b/cmake/arch.cmake @@ -11,19 +11,12 @@ if ( ( ARCH_ARM AND NOT ARCH_AARCH64 ) OR ARCH_I386) set (ARCH_32 1) message (WARNING "Support for 32bit platforms is highly experimental") endif () + if (CMAKE_SYSTEM MATCHES "Linux") - set (ARCH_LINUX 1) + set (OS_LINUX 1) endif () if (CMAKE_SYSTEM MATCHES "FreeBSD") - set (ARCH_FREEBSD 1) -endif () - -if (NOT MSVC) - set (NOT_MSVC 1) -endif () - -if (NOT APPLE) - set (NOT_APPLE 1) + set (OS_FREEBSD 1) endif () if (CMAKE_CXX_COMPILER_ID STREQUAL "GNU") diff --git a/cmake/find_cpuid.cmake b/cmake/find_cpuid.cmake index 6a4361dc42c..d02336021bb 100644 --- a/cmake/find_cpuid.cmake +++ b/cmake/find_cpuid.cmake @@ -2,7 +2,7 @@ # TODO: test new libcpuid - maybe already fixed if (NOT ARCH_ARM) - if (ARCH_FREEBSD) + if (OS_FREEBSD) set (DEFAULT_USE_INTERNAL_CPUID_LIBRARY 1) else () set (DEFAULT_USE_INTERNAL_CPUID_LIBRARY ${NOT_UNBUNDLED}) diff --git a/cmake/find_execinfo.cmake b/cmake/find_execinfo.cmake index 05dd72dbb3d..650d279983c 100644 --- a/cmake/find_execinfo.cmake +++ b/cmake/find_execinfo.cmake @@ -1,4 +1,4 @@ -if (ARCH_FREEBSD) +if (OS_FREEBSD) find_library (EXECINFO_LIBRARY execinfo) find_library (ELF_LIBRARY elf) message (STATUS "Using execinfo: ${EXECINFO_LIBRARY}") diff --git a/cmake/find_odbc.cmake b/cmake/find_odbc.cmake index 3d481d34f0d..95acf40b2b4 100644 --- a/cmake/find_odbc.cmake +++ b/cmake/find_odbc.cmake @@ -15,13 +15,20 @@ # ODBC_LIBRARIES, the libraries to link against to use ODBC # ODBC_FOUND. If false, you cannot build anything that requires ODBC. -option (ENABLE_ODBC "Enable ODBC" ON) -option (USE_INTERNAL_ODBC_LIBRARY "Set to FALSE to use system odbc library instead of bundled" ${NOT_UNBUNDLED}) +option (ENABLE_ODBC "Enable ODBC" ${OS_LINUX}) +if (OS_LINUX) + option (USE_INTERNAL_ODBC_LIBRARY "Set to FALSE to use system odbc library instead of bundled" ${NOT_UNBUNDLED}) +else () + option (USE_INTERNAL_ODBC_LIBRARY "Set to FALSE to use system odbc library instead of bundled" OFF) +endif () + if (USE_INTERNAL_ODBC_LIBRARY AND NOT EXISTS "${ClickHouse_SOURCE_DIR}/contrib/unixodbc/README") message (WARNING "submodule contrib/unixodbc is missing. to fix try run: \n git submodule update --init --recursive") set (USE_INTERNAL_ODBC_LIBRARY 0) endif () +set (ODBC_INCLUDE_DIRECTORIES ) # Include directories will be either used automatically by target_include_directories or set later. + if (ENABLE_ODBC) if (USE_INTERNAL_ODBC_LIBRARY) set (ODBC_LIBRARIES unixodbc) diff --git a/cmake/find_poco.cmake b/cmake/find_poco.cmake index 947d31951c9..f0bc535f614 100644 --- a/cmake/find_poco.cmake +++ b/cmake/find_poco.cmake @@ -92,8 +92,7 @@ elseif (NOT MISSING_INTERNAL_POCO_LIBRARY) endif () endif () - # TODO! fix internal ssl - if (OPENSSL_FOUND AND NOT USE_INTERNAL_SSL_LIBRARY AND (NOT DEFINED ENABLE_POCO_NETSSL OR ENABLE_POCO_NETSSL)) + if (OPENSSL_FOUND AND (NOT DEFINED ENABLE_POCO_NETSSL OR ENABLE_POCO_NETSSL)) set (Poco_NetSSL_LIBRARY PocoNetSSL) set (Poco_Crypto_LIBRARY PocoCrypto) endif () diff --git a/cmake/find_rdkafka.cmake b/cmake/find_rdkafka.cmake index 396be18cd1c..f05ced94707 100644 --- a/cmake/find_rdkafka.cmake +++ b/cmake/find_rdkafka.cmake @@ -13,7 +13,7 @@ endif () if (NOT USE_INTERNAL_RDKAFKA_LIBRARY) find_library (RDKAFKA_LIB rdkafka) find_path (RDKAFKA_INCLUDE_DIR NAMES librdkafka/rdkafka.h PATHS ${RDKAFKA_INCLUDE_PATHS}) - if (USE_STATIC_LIBRARIES AND NOT ARCH_FREEBSD) + if (USE_STATIC_LIBRARIES AND NOT OS_FREEBSD) find_library (SASL2_LIBRARY sasl2) endif () endif () diff --git a/cmake/find_rt.cmake b/cmake/find_rt.cmake index 82ec314d195..25614fe55eb 100644 --- a/cmake/find_rt.cmake +++ b/cmake/find_rt.cmake @@ -1,7 +1,7 @@ if (APPLE) # lib from libs/libcommon set (RT_LIBRARY "apple_rt") -elseif (ARCH_FREEBSD) +elseif (OS_FREEBSD) find_library (RT_LIBRARY rt) else () set (RT_LIBRARY "") diff --git a/cmake/find_ssl.cmake b/cmake/find_ssl.cmake index ec40e498da1..51e869f86ea 100644 --- a/cmake/find_ssl.cmake +++ b/cmake/find_ssl.cmake @@ -1,4 +1,4 @@ -option (USE_INTERNAL_SSL_LIBRARY "Set to FALSE to use system *ssl library instead of bundled" ${MSVC}) +option (USE_INTERNAL_SSL_LIBRARY "Set to FALSE to use system *ssl library instead of bundled" ${OS_LINUX}) set (OPENSSL_USE_STATIC_LIBS ${USE_STATIC_LIBRARIES}) diff --git a/cmake/find_zlib.cmake b/cmake/find_zlib.cmake index 17350f9fd58..0e198c9bb0f 100644 --- a/cmake/find_zlib.cmake +++ b/cmake/find_zlib.cmake @@ -17,7 +17,7 @@ if (NOT ZLIB_FOUND) set (USE_INTERNAL_ZLIB_LIBRARY 1) set (ZLIB_COMPAT 1) # for zlib-ng, also enables WITH_GZFILEOP set (WITH_NATIVE_INSTRUCTIONS ${ARCHNATIVE}) - if (ARCH_FREEBSD OR ARCH_I386) + if (OS_FREEBSD OR ARCH_I386) set (WITH_OPTIM 0 CACHE INTERNAL "") # Bug in assembler endif () if (ARCH_AARCH64) diff --git a/contrib/CMakeLists.txt b/contrib/CMakeLists.txt index 62fe516bba0..050bbc56ede 100644 --- a/contrib/CMakeLists.txt +++ b/contrib/CMakeLists.txt @@ -43,6 +43,8 @@ if (USE_INTERNAL_METROHASH_LIBRARY) add_subdirectory (libmetrohash) endif () +add_subdirectory (murmurhash) + if (USE_INTERNAL_BTRIE_LIBRARY) add_subdirectory (libbtrie) endif () @@ -51,10 +53,6 @@ if (USE_INTERNAL_UNWIND_LIBRARY) add_subdirectory (libunwind) endif () -if (USE_INTERNAL_MURMURHASH2_LIBRARY) - add_subdirectory (libmurmurhash2) -endif () - if (USE_INTERNAL_ZLIB_LIBRARY) add_subdirectory (${INTERNAL_ZLIB_NAME}) # todo: make pull to Dead2/zlib-ng and remove: @@ -98,6 +96,7 @@ if (USE_INTERNAL_SSL_LIBRARY) set (BUILD_SHARED 1) endif () set (USE_SHARED ${USE_STATIC_LIBRARIES}) + set (LIBRESSL_SKIP_INSTALL 1) add_subdirectory (ssl) target_include_directories(${OPENSSL_CRYPTO_LIBRARY} PUBLIC ${OPENSSL_INCLUDE_DIR}) target_include_directories(${OPENSSL_SSL_LIBRARY} PUBLIC ${OPENSSL_INCLUDE_DIR}) @@ -152,11 +151,6 @@ if (USE_INTERNAL_POCO_LIBRARY) set (_save ${ENABLE_TESTS}) set (ENABLE_TESTS 0) set (CMAKE_DISABLE_FIND_PACKAGE_ZLIB 1) - if (USE_INTERNAL_SSL_LIBRARY OR (DEFINED ENABLE_POCO_NETSSL AND NOT ENABLE_POCO_NETSSL)) - set (DISABLE_INTERNAL_OPENSSL 1 CACHE INTERNAL "") - set (ENABLE_NETSSL 0 CACHE INTERNAL "") # TODO! - set (ENABLE_CRYPTO 0 CACHE INTERNAL "") # TODO! - endif () if (MSVC) set (ENABLE_DATA_ODBC 0 CACHE INTERNAL "") # TODO (build fail) endif () diff --git a/contrib/jemalloc-cmake/CMakeLists.txt b/contrib/jemalloc-cmake/CMakeLists.txt index 65fda9cd0ce..d60d34604a9 100644 --- a/contrib/jemalloc-cmake/CMakeLists.txt +++ b/contrib/jemalloc-cmake/CMakeLists.txt @@ -37,7 +37,7 @@ ${JEMALLOC_SOURCE_DIR}/src/witness.c ) if(CMAKE_SYSTEM_NAME MATCHES "Darwin") - list(APPEND SRCS src/zone.c) + list(APPEND SRCS ${JEMALLOC_SOURCE_DIR}/src/zone.c) endif() add_library(jemalloc STATIC ${SRCS}) diff --git a/contrib/jemalloc-cmake/include_linux_x86_64/jemalloc/internal/jemalloc_internal_defs.h b/contrib/jemalloc-cmake/include_linux_x86_64/jemalloc/internal/jemalloc_internal_defs.h index ab949914ecb..43936e8eba0 100644 --- a/contrib/jemalloc-cmake/include_linux_x86_64/jemalloc/internal/jemalloc_internal_defs.h +++ b/contrib/jemalloc-cmake/include_linux_x86_64/jemalloc/internal/jemalloc_internal_defs.h @@ -96,7 +96,8 @@ /* * Defined if secure_getenv(3) is available. */ -#define JEMALLOC_HAVE_SECURE_GETENV +// Don't want dependency on newer GLIBC +//#define JEMALLOC_HAVE_SECURE_GETENV /* * Defined if issetugid(2) is available. diff --git a/contrib/libmurmurhash2/CMakeLists.txt b/contrib/libmurmurhash2/CMakeLists.txt deleted file mode 100644 index fede35faa50..00000000000 --- a/contrib/libmurmurhash2/CMakeLists.txt +++ /dev/null @@ -1,6 +0,0 @@ -add_library(murmurhash2 - src/murmurhash2.cpp - include/murmurhash2.h) - -target_include_directories (murmurhash2 PUBLIC include) -target_include_directories (murmurhash2 PUBLIC src) diff --git a/contrib/libmurmurhash2/LICENSE b/contrib/libmurmurhash2/LICENSE deleted file mode 100644 index e4f5d0c2f40..00000000000 --- a/contrib/libmurmurhash2/LICENSE +++ /dev/null @@ -1 +0,0 @@ -MurmurHash2 was written by Austin Appleby, and is placed in the publicdomain. The author hereby disclaims copyright to this source code. diff --git a/contrib/mariadb-connector-c-cmake/CMakeLists.txt b/contrib/mariadb-connector-c-cmake/CMakeLists.txt index b4210b92123..4c1184b3edb 100644 --- a/contrib/mariadb-connector-c-cmake/CMakeLists.txt +++ b/contrib/mariadb-connector-c-cmake/CMakeLists.txt @@ -57,6 +57,8 @@ ${CMAKE_CURRENT_SOURCE_DIR}/linux_x86_64/libmariadb/ma_client_plugin.c add_library(mysqlclient STATIC ${SRCS}) +target_link_libraries(mysqlclient ${OPENSSL_LIBRARIES}) + target_include_directories(mysqlclient PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}/linux_x86_64/include) target_include_directories(mysqlclient PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}/common/include) target_include_directories(mysqlclient PUBLIC ${MARIADB_CLIENT_SOURCE_DIR}/include) diff --git a/contrib/murmurhash/CMakeLists.txt b/contrib/murmurhash/CMakeLists.txt new file mode 100644 index 00000000000..eb1d621e5cf --- /dev/null +++ b/contrib/murmurhash/CMakeLists.txt @@ -0,0 +1,5 @@ +add_library(murmurhash + src/murmurhash2.cpp + include/murmurhash2.h) + +target_include_directories (murmurhash PUBLIC include) diff --git a/contrib/murmurhash/LICENSE b/contrib/murmurhash/LICENSE new file mode 100644 index 00000000000..f6cdede60b8 --- /dev/null +++ b/contrib/murmurhash/LICENSE @@ -0,0 +1 @@ +MurmurHash was written by Austin Appleby, and is placed in the publicdomain. The author hereby disclaims copyright to this source code. diff --git a/contrib/libmurmurhash2/README b/contrib/murmurhash/README similarity index 100% rename from contrib/libmurmurhash2/README rename to contrib/murmurhash/README diff --git a/contrib/libmurmurhash2/include/murmurhash2.h b/contrib/murmurhash/include/murmurhash2.h similarity index 100% rename from contrib/libmurmurhash2/include/murmurhash2.h rename to contrib/murmurhash/include/murmurhash2.h diff --git a/contrib/libmurmurhash2/src/murmurhash2.cpp b/contrib/murmurhash/src/murmurhash2.cpp similarity index 100% rename from contrib/libmurmurhash2/src/murmurhash2.cpp rename to contrib/murmurhash/src/murmurhash2.cpp diff --git a/contrib/poco b/contrib/poco index 3a2d0a833a2..4ab45bc3bb0 160000 --- a/contrib/poco +++ b/contrib/poco @@ -1 +1 @@ -Subproject commit 3a2d0a833a22ef5e1164a9ada54e3253cb038904 +Subproject commit 4ab45bc3bb0d2c476ea5385ec2d398c6bfc9f089 diff --git a/contrib/ssl b/contrib/ssl index 6fbe1c6f404..4f9a7b87451 160000 --- a/contrib/ssl +++ b/contrib/ssl @@ -1 +1 @@ -Subproject commit 6fbe1c6f404193989c5f6a63115d80fbe34ce2a3 +Subproject commit 4f9a7b8745184410dc0b31ba548ce21ac64edd9c diff --git a/contrib/unixodbc-cmake/CMakeLists.txt b/contrib/unixodbc-cmake/CMakeLists.txt index fa147eb3861..4f9f6b41538 100644 --- a/contrib/unixodbc-cmake/CMakeLists.txt +++ b/contrib/unixodbc-cmake/CMakeLists.txt @@ -279,9 +279,10 @@ target_link_libraries(unixodbc ltdl) # SYSTEM_FILE_PATH was changed to /etc -target_include_directories(unixodbc PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/linux_x86_64) +target_include_directories(unixodbc PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/linux_x86_64/private) +target_include_directories(unixodbc PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}/linux_x86_64) target_include_directories(unixodbc PUBLIC ${ODBC_SOURCE_DIR}/include) target_compile_definitions(unixodbc PRIVATE -DHAVE_CONFIG_H) -target_compile_options(unixodbc PRIVATE -Wno-dangling-else -Wno-parentheses -Wno-unknown-warning-option -O2) +target_compile_options(unixodbc PRIVATE -Wno-dangling-else -Wno-parentheses -Wno-misleading-indentation -Wno-unknown-warning-option -O2) diff --git a/contrib/unixodbc-cmake/linux_x86_64/config.h b/contrib/unixodbc-cmake/linux_x86_64/private/config.h similarity index 100% rename from contrib/unixodbc-cmake/linux_x86_64/config.h rename to contrib/unixodbc-cmake/linux_x86_64/private/config.h diff --git a/dbms/CMakeLists.txt b/dbms/CMakeLists.txt index ba014767469..91d5b7676a2 100644 --- a/dbms/CMakeLists.txt +++ b/dbms/CMakeLists.txt @@ -53,6 +53,7 @@ add_headers_and_sources(dbms src/Interpreters/ClusterProxy) add_headers_and_sources(dbms src/Columns) add_headers_and_sources(dbms src/Storages) add_headers_and_sources(dbms src/Storages/Distributed) +add_headers_and_sources(dbms src/Storages/Kafka) add_headers_and_sources(dbms src/Storages/MergeTree) add_headers_and_sources(dbms src/Client) add_headers_and_sources(dbms src/Formats) @@ -84,7 +85,7 @@ list (APPEND dbms_headers src/TableFunctions/ITableFunction.h src/TableFunctions add_library(clickhouse_common_io ${SPLIT_SHARED} ${clickhouse_common_io_headers} ${clickhouse_common_io_sources}) -if (ARCH_FREEBSD) +if (OS_FREEBSD) target_compile_definitions (clickhouse_common_io PUBLIC CLOCK_MONOTONIC_COARSE=CLOCK_MONOTONIC_FAST) endif () diff --git a/dbms/cmake/version.cmake b/dbms/cmake/version.cmake index 8ee16a16856..4e1fb44caa4 100644 --- a/dbms/cmake/version.cmake +++ b/dbms/cmake/version.cmake @@ -1,11 +1,11 @@ # This strings autochanged from release_lib.sh: -set(VERSION_REVISION 54403 CACHE STRING "") +set(VERSION_REVISION 54404 CACHE STRING "") set(VERSION_MAJOR 18 CACHE STRING "") -set(VERSION_MINOR 8 CACHE STRING "") +set(VERSION_MINOR 9 CACHE STRING "") set(VERSION_PATCH 0 CACHE STRING "") -set(VERSION_GITHASH e6be3df322f24ff3aa9ae9a97b9b01b2c88ab7b0 CACHE STRING "") -set(VERSION_DESCRIBE v18.8.0-testing CACHE STRING "") -set(VERSION_STRING 18.8.0 CACHE STRING "") +set(VERSION_GITHASH c83721a02db002eef7ff864f82d53ca89d47f9e6 CACHE STRING "") +set(VERSION_DESCRIBE v18.9.0-testing CACHE STRING "") +set(VERSION_STRING 18.9.0 CACHE STRING "") # end of autochange set(VERSION_EXTRA "" CACHE STRING "") @@ -14,18 +14,11 @@ set(VERSION_TWEAK "" CACHE STRING "") if (VERSION_TWEAK) string(CONCAT VERSION_STRING ${VERSION_STRING} "." ${VERSION_TWEAK}) endif () + if (VERSION_EXTRA) string(CONCAT VERSION_STRING ${VERSION_STRING} "." ${VERSION_EXTRA}) endif () -set (VERSION_NAME "${PROJECT_NAME}") -set (VERSION_FULL "${VERSION_NAME} ${VERSION_STRING}") - -if (APPLE) - # dirty hack: ld: malformed 64-bit a.b.c.d.e version number: 1.1.54160 - math (EXPR VERSION_SO1 "${VERSION_REVISION}/255") - math (EXPR VERSION_SO2 "${VERSION_REVISION}%255") - set (VERSION_SO "${VERSION_MAJOR}.${VERSION_MINOR}.${VERSION_SO1}.${VERSION_SO2}") -else () - set (VERSION_SO "${VERSION_STRING}") -endif () +set (VERSION_NAME "${PROJECT_NAME}" CACHE STRING "") +set (VERSION_FULL "${VERSION_NAME} ${VERSION_STRING}" CACHE STRING "") +set (VERSION_SO "${VERSION_STRING}" CACHE STRING "") diff --git a/dbms/programs/CMakeLists.txt b/dbms/programs/CMakeLists.txt index 20baa6b039c..a5692d81c09 100644 --- a/dbms/programs/CMakeLists.txt +++ b/dbms/programs/CMakeLists.txt @@ -152,6 +152,6 @@ else () endif () -if (USE_EMBEDDED_COMPILER AND ENABLE_CLICKHOUSE_SERVER) +if (TARGET clickhouse-server AND TARGET copy-headers) add_dependencies(clickhouse-server copy-headers) endif () diff --git a/dbms/programs/server/CMakeLists.txt b/dbms/programs/server/CMakeLists.txt index 74297d29864..c146f40d281 100644 --- a/dbms/programs/server/CMakeLists.txt +++ b/dbms/programs/server/CMakeLists.txt @@ -19,7 +19,7 @@ if (CLICKHOUSE_SPLIT_BINARY) install (TARGETS clickhouse-server ${CLICKHOUSE_ALL_TARGETS} RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR} COMPONENT clickhouse) endif () -if (NOT APPLE AND NOT ARCH_FREEBSD) +if (OS_LINUX) set (GLIBC_MAX_REQUIRED 2.4) add_test(NAME GLIBC_required_version COMMAND bash -c "readelf -s ${CMAKE_CURRENT_BINARY_DIR}/../clickhouse-server | grep '@GLIBC' | grep -oP 'GLIBC_[\\d\\.]+' | sort | uniq | sort -r | perl -lnE 'exit 1 if $_ gt q{GLIBC_${GLIBC_MAX_REQUIRED}}'") endif () diff --git a/dbms/src/Common/FieldVisitors.cpp b/dbms/src/Common/FieldVisitors.cpp index 3132a7412ca..62b7667d936 100644 --- a/dbms/src/Common/FieldVisitors.cpp +++ b/dbms/src/Common/FieldVisitors.cpp @@ -4,6 +4,7 @@ #include #include #include +#include #include #include @@ -35,6 +36,13 @@ String FieldVisitorDump::operator() (const UInt64 & x) const { return formatQuot String FieldVisitorDump::operator() (const Int64 & x) const { return formatQuotedWithPrefix(x, "Int64_"); } String FieldVisitorDump::operator() (const Float64 & x) const { return formatQuotedWithPrefix(x, "Float64_"); } +String FieldVisitorDump::operator() (const UInt128 & x) const +{ + WriteBufferFromOwnString wb; + wb << "UInt128_" << x.low << "_" << x.high; + return wb.str(); + +} String FieldVisitorDump::operator() (const String & x) const { @@ -47,14 +55,14 @@ String FieldVisitorDump::operator() (const Array & x) const { WriteBufferFromOwnString wb; - wb.write("Array_[", 7); + wb << "Array_["; for (auto it = x.begin(); it != x.end(); ++it) { if (it != x.begin()) - wb.write(", ", 2); - writeString(applyVisitor(*this, *it), wb); + wb << ", "; + wb << applyVisitor(*this, *it); } - writeChar(']', wb); + wb << ']'; return wb.str(); } @@ -64,14 +72,14 @@ String FieldVisitorDump::operator() (const Tuple & x_def) const auto & x = x_def.toUnderType(); WriteBufferFromOwnString wb; - wb.write("Tuple_(", 7); + wb << "Tuple_("; for (auto it = x.begin(); it != x.end(); ++it) { if (it != x.begin()) - wb.write(", ", 2); - writeString(applyVisitor(*this, *it), wb); + wb << ", "; + wb << applyVisitor(*this, *it); } - writeChar(')', wb); + wb << ')'; return wb.str(); } @@ -105,19 +113,24 @@ String FieldVisitorToString::operator() (const Int64 & x) const { return formatQ String FieldVisitorToString::operator() (const Float64 & x) const { return formatFloat(x); } String FieldVisitorToString::operator() (const String & x) const { return formatQuoted(x); } +String FieldVisitorToString::operator() (const UInt128 & x) const +{ + /// Dummy implementation. There is no UInt128 literals in SQL. + return FieldVisitorDump()(x); +} String FieldVisitorToString::operator() (const Array & x) const { WriteBufferFromOwnString wb; - writeChar('[', wb); + wb << '['; for (Array::const_iterator it = x.begin(); it != x.end(); ++it) { if (it != x.begin()) wb.write(", ", 2); - writeString(applyVisitor(*this, *it), wb); + wb << applyVisitor(*this, *it); } - writeChar(']', wb); + wb << ']'; return wb.str(); } @@ -127,14 +140,14 @@ String FieldVisitorToString::operator() (const Tuple & x_def) const auto & x = x_def.toUnderType(); WriteBufferFromOwnString wb; - writeChar('(', wb); + wb << '('; for (auto it = x.begin(); it != x.end(); ++it) { if (it != x.begin()) - wb.write(", ", 2); - writeString(applyVisitor(*this, *it), wb); + wb << ", "; + wb << applyVisitor(*this, *it); } - writeChar(')', wb); + wb << ')'; return wb.str(); } @@ -155,6 +168,13 @@ void FieldVisitorHash::operator() (const UInt64 & x) const hash.update(x); } +void FieldVisitorHash::operator() (const UInt128 & x) const +{ + UInt8 type = Field::Types::UInt128; + hash.update(type); + hash.update(x); +} + void FieldVisitorHash::operator() (const Int64 & x) const { UInt8 type = Field::Types::Int64; diff --git a/dbms/src/Common/FieldVisitors.h b/dbms/src/Common/FieldVisitors.h index b59c6a47aa7..8abf75dbc64 100644 --- a/dbms/src/Common/FieldVisitors.h +++ b/dbms/src/Common/FieldVisitors.h @@ -38,6 +38,7 @@ typename std::decay_t::ResultType applyVisitor(Visitor && visitor, F && { case Field::Types::Null: return visitor(field.template get()); case Field::Types::UInt64: return visitor(field.template get()); + case Field::Types::UInt128: return visitor(field.template get()); case Field::Types::Int64: return visitor(field.template get()); case Field::Types::Float64: return visitor(field.template get()); case Field::Types::String: return visitor(field.template get()); @@ -57,6 +58,7 @@ static typename std::decay_t::ResultType applyBinaryVisitorImpl(Visitor { case Field::Types::Null: return visitor(field1, field2.template get()); case Field::Types::UInt64: return visitor(field1, field2.template get()); + case Field::Types::UInt128: return visitor(field1, field2.template get()); case Field::Types::Int64: return visitor(field1, field2.template get()); case Field::Types::Float64: return visitor(field1, field2.template get()); case Field::Types::String: return visitor(field1, field2.template get()); @@ -79,6 +81,9 @@ typename std::decay_t::ResultType applyVisitor(Visitor && visitor, F1 & case Field::Types::UInt64: return applyBinaryVisitorImpl( std::forward(visitor), field1.template get(), std::forward(field2)); + case Field::Types::UInt128: + return applyBinaryVisitorImpl( + std::forward(visitor), field1.template get(), std::forward(field2)); case Field::Types::Int64: return applyBinaryVisitorImpl( std::forward(visitor), field1.template get(), std::forward(field2)); @@ -107,6 +112,7 @@ class FieldVisitorToString : public StaticVisitor public: String operator() (const Null & x) const; String operator() (const UInt64 & x) const; + String operator() (const UInt128 & x) const; String operator() (const Int64 & x) const; String operator() (const Float64 & x) const; String operator() (const String & x) const; @@ -121,6 +127,7 @@ class FieldVisitorDump : public StaticVisitor public: String operator() (const Null & x) const; String operator() (const UInt64 & x) const; + String operator() (const UInt128 & x) const; String operator() (const Int64 & x) const; String operator() (const Float64 & x) const; String operator() (const String & x) const; @@ -157,6 +164,11 @@ public: T operator() (const UInt64 & x) const { return x; } T operator() (const Int64 & x) const { return x; } T operator() (const Float64 & x) const { return x; } + + T operator() (const UInt128 &) const + { + throw Exception("Cannot convert UInt128 to " + demangle(typeid(T).name()), ErrorCodes::CANNOT_CONVERT_TYPE); + } }; @@ -170,6 +182,7 @@ public: void operator() (const Null & x) const; void operator() (const UInt64 & x) const; + void operator() (const UInt128 & x) const; void operator() (const Int64 & x) const; void operator() (const Float64 & x) const; void operator() (const String & x) const; @@ -180,44 +193,60 @@ public: /** More precise comparison, used for index. * Differs from Field::operator< and Field::operator== in that it also compares values of different types. * Comparison rules are same as in FunctionsComparison (to be consistent with expression evaluation in query). + * + * TODO Comparisons of UInt128 with different type are incorrect. */ class FieldVisitorAccurateEquals : public StaticVisitor { public: bool operator() (const Null &, const Null &) const { return true; } bool operator() (const Null &, const UInt64 &) const { return false; } + bool operator() (const Null &, const UInt128 &) const { return false; } bool operator() (const Null &, const Int64 &) const { return false; } bool operator() (const Null &, const Float64 &) const { return false; } bool operator() (const Null &, const String &) const { return false; } bool operator() (const Null &, const Array &) const { return false; } bool operator() (const Null &, const Tuple &) const { return false; } - bool operator() (const UInt64 &, const Null &) const { return false; } + bool operator() (const UInt64 &, const Null &) const { return false; } bool operator() (const UInt64 & l, const UInt64 & r) const { return l == r; } + bool operator() (const UInt64 &, const UInt128) const { return true; } bool operator() (const UInt64 & l, const Int64 & r) const { return accurate::equalsOp(l, r); } bool operator() (const UInt64 & l, const Float64 & r) const { return accurate::equalsOp(l, r); } - bool operator() (const UInt64 &, const String &) const { return false; } - bool operator() (const UInt64 &, const Array &) const { return false; } - bool operator() (const UInt64 &, const Tuple &) const { return false; } + bool operator() (const UInt64 &, const String &) const { return false; } + bool operator() (const UInt64 &, const Array &) const { return false; } + bool operator() (const UInt64 &, const Tuple &) const { return false; } - bool operator() (const Int64 &, const Null &) const { return false; } + bool operator() (const UInt128 &, const Null &) const { return false; } + bool operator() (const UInt128 &, const UInt64) const { return false; } + bool operator() (const UInt128 & l, const UInt128 & r) const { return l == r; } + bool operator() (const UInt128 &, const Int64) const { return false; } + bool operator() (const UInt128 &, const Float64) const { return false; } + bool operator() (const UInt128 &, const String &) const { return false; } + bool operator() (const UInt128 &, const Array &) const { return false; } + bool operator() (const UInt128 &, const Tuple &) const { return false; } + + bool operator() (const Int64 &, const Null &) const { return false; } bool operator() (const Int64 & l, const UInt64 & r) const { return accurate::equalsOp(l, r); } + bool operator() (const Int64 &, const UInt128) const { return false; } bool operator() (const Int64 & l, const Int64 & r) const { return l == r; } bool operator() (const Int64 & l, const Float64 & r) const { return accurate::equalsOp(l, r); } - bool operator() (const Int64 &, const String &) const { return false; } - bool operator() (const Int64 &, const Array &) const { return false; } - bool operator() (const Int64 &, const Tuple &) const { return false; } + bool operator() (const Int64 &, const String &) const { return false; } + bool operator() (const Int64 &, const Array &) const { return false; } + bool operator() (const Int64 &, const Tuple &) const { return false; } - bool operator() (const Float64 &, const Null &) const { return false; } + bool operator() (const Float64 &, const Null &) const { return false; } bool operator() (const Float64 & l, const UInt64 & r) const { return accurate::equalsOp(l, r); } + bool operator() (const Float64 &, const UInt128) const { return false; } bool operator() (const Float64 & l, const Int64 & r) const { return accurate::equalsOp(l, r); } bool operator() (const Float64 & l, const Float64 & r) const { return l == r; } - bool operator() (const Float64 &, const String &) const { return false; } - bool operator() (const Float64 &, const Array &) const { return false; } - bool operator() (const Float64 &, const Tuple &) const { return false; } + bool operator() (const Float64 &, const String &) const { return false; } + bool operator() (const Float64 &, const Array &) const { return false; } + bool operator() (const Float64 &, const Tuple &) const { return false; } bool operator() (const String &, const Null &) const { return false; } bool operator() (const String &, const UInt64 &) const { return false; } + bool operator() (const String &, const UInt128 &) const { return false; } bool operator() (const String &, const Int64 &) const { return false; } bool operator() (const String &, const Float64 &) const { return false; } bool operator() (const String & l, const String & r) const { return l == r; } @@ -226,6 +255,7 @@ public: bool operator() (const Array &, const Null &) const { return false; } bool operator() (const Array &, const UInt64 &) const { return false; } + bool operator() (const Array &, const UInt128 &) const { return false; } bool operator() (const Array &, const Int64 &) const { return false; } bool operator() (const Array &, const Float64 &) const { return false; } bool operator() (const Array &, const String &) const { return false; } @@ -234,6 +264,7 @@ public: bool operator() (const Tuple &, const Null &) const { return false; } bool operator() (const Tuple &, const UInt64 &) const { return false; } + bool operator() (const Tuple &, const UInt128 &) const { return false; } bool operator() (const Tuple &, const Int64 &) const { return false; } bool operator() (const Tuple &, const Float64 &) const { return false; } bool operator() (const Tuple &, const String &) const { return false; } @@ -247,45 +278,60 @@ public: bool operator() (const Null &, const Null &) const { return false; } bool operator() (const Null &, const UInt64 &) const { return true; } bool operator() (const Null &, const Int64 &) const { return true; } + bool operator() (const Null &, const UInt128 &) const { return true; } bool operator() (const Null &, const Float64 &) const { return true; } bool operator() (const Null &, const String &) const { return true; } bool operator() (const Null &, const Array &) const { return true; } bool operator() (const Null &, const Tuple &) const { return true; } - bool operator() (const UInt64 &, const Null &) const { return false; } + bool operator() (const UInt64 &, const Null &) const { return false; } bool operator() (const UInt64 & l, const UInt64 & r) const { return l < r; } + bool operator() (const UInt64 &, const UInt128 &) const { return true; } bool operator() (const UInt64 & l, const Int64 & r) const { return accurate::lessOp(l, r); } bool operator() (const UInt64 & l, const Float64 & r) const { return accurate::lessOp(l, r); } - bool operator() (const UInt64 &, const String &) const { return true; } - bool operator() (const UInt64 &, const Array &) const { return true; } - bool operator() (const UInt64 &, const Tuple &) const { return true; } + bool operator() (const UInt64 &, const String &) const { return true; } + bool operator() (const UInt64 &, const Array &) const { return true; } + bool operator() (const UInt64 &, const Tuple &) const { return true; } - bool operator() (const Int64 &, const Null &) const { return false; } + bool operator() (const UInt128 &, const Null &) const { return false; } + bool operator() (const UInt128 &, const UInt64) const { return false; } + bool operator() (const UInt128 & l, const UInt128 & r) const { return l < r; } + bool operator() (const UInt128 &, const Int64) const { return false; } + bool operator() (const UInt128 &, const Float64) const { return false; } + bool operator() (const UInt128 &, const String &) const { return false; } + bool operator() (const UInt128 &, const Array &) const { return false; } + bool operator() (const UInt128 &, const Tuple &) const { return false; } + + bool operator() (const Int64 &, const Null &) const { return false; } bool operator() (const Int64 & l, const UInt64 & r) const { return accurate::lessOp(l, r); } + bool operator() (const Int64 &, const UInt128 &) const { return false; } bool operator() (const Int64 & l, const Int64 & r) const { return l < r; } bool operator() (const Int64 & l, const Float64 & r) const { return accurate::lessOp(l, r); } - bool operator() (const Int64 &, const String &) const { return true; } - bool operator() (const Int64 &, const Array &) const { return true; } - bool operator() (const Int64 &, const Tuple &) const { return true; } + bool operator() (const Int64 &, const String &) const { return true; } + bool operator() (const Int64 &, const Array &) const { return true; } + bool operator() (const Int64 &, const Tuple &) const { return true; } - bool operator() (const Float64 &, const Null &) const { return false; } + bool operator() (const Float64 &, const Null &) const { return false; } bool operator() (const Float64 & l, const UInt64 & r) const { return accurate::lessOp(l, r); } + bool operator() (const Float64, const UInt128 &) const { return false; } bool operator() (const Float64 & l, const Int64 & r) const { return accurate::lessOp(l, r); } bool operator() (const Float64 & l, const Float64 & r) const { return l < r; } - bool operator() (const Float64 &, const String &) const { return true; } - bool operator() (const Float64 &, const Array &) const { return true; } - bool operator() (const Float64 &, const Tuple &) const { return true; } + bool operator() (const Float64 &, const String &) const { return true; } + bool operator() (const Float64 &, const Array &) const { return true; } + bool operator() (const Float64 &, const Tuple &) const { return true; } - bool operator() (const String &, const Null &) const { return false; } - bool operator() (const String &, const UInt64 &) const { return false; } - bool operator() (const String &, const Int64 &) const { return false; } - bool operator() (const String &, const Float64 &) const { return false; } + bool operator() (const String &, const Null &) const { return false; } + bool operator() (const String &, const UInt64 &) const { return false; } + bool operator() (const String &, const UInt128 &) const { return false; } + bool operator() (const String &, const Int64 &) const { return false; } + bool operator() (const String &, const Float64 &) const { return false; } bool operator() (const String & l, const String & r) const { return l < r; } - bool operator() (const String &, const Array &) const { return true; } - bool operator() (const String &, const Tuple &) const { return true; } + bool operator() (const String &, const Array &) const { return true; } + bool operator() (const String &, const Tuple &) const { return true; } bool operator() (const Array &, const Null &) const { return false; } bool operator() (const Array &, const UInt64 &) const { return false; } + bool operator() (const Array &, const UInt128 &) const { return false; } bool operator() (const Array &, const Int64 &) const { return false; } bool operator() (const Array &, const Float64 &) const { return false; } bool operator() (const Array &, const String &) const { return false; } @@ -294,6 +340,7 @@ public: bool operator() (const Tuple &, const Null &) const { return false; } bool operator() (const Tuple &, const UInt64 &) const { return false; } + bool operator() (const Tuple &, const UInt128 &) const { return false; } bool operator() (const Tuple &, const Int64 &) const { return false; } bool operator() (const Tuple &, const Float64 &) const { return false; } bool operator() (const Tuple &, const String &) const { return false; } @@ -318,6 +365,7 @@ public: bool operator() (Null &) const { throw Exception("Cannot sum Nulls", ErrorCodes::LOGICAL_ERROR); } bool operator() (String &) const { throw Exception("Cannot sum Strings", ErrorCodes::LOGICAL_ERROR); } bool operator() (Array &) const { throw Exception("Cannot sum Arrays", ErrorCodes::LOGICAL_ERROR); } + bool operator() (UInt128 &) const { throw Exception("Cannot sum UUIDs", ErrorCodes::LOGICAL_ERROR); } }; } diff --git a/dbms/src/Core/Block.cpp b/dbms/src/Core/Block.cpp index 514289ee325..bbac13a1ca9 100644 --- a/dbms/src/Core/Block.cpp +++ b/dbms/src/Core/Block.cpp @@ -54,7 +54,7 @@ void Block::insert(size_t position, const ColumnWithTypeAndName & elem) if (name_pos.second >= position) ++name_pos.second; - index_by_name[elem.name] = position; + index_by_name.emplace(elem.name, position); data.emplace(data.begin() + position, elem); } @@ -68,20 +68,20 @@ void Block::insert(size_t position, ColumnWithTypeAndName && elem) if (name_pos.second >= position) ++name_pos.second; - index_by_name[elem.name] = position; + index_by_name.emplace(elem.name, position); data.emplace(data.begin() + position, std::move(elem)); } void Block::insert(const ColumnWithTypeAndName & elem) { - index_by_name[elem.name] = data.size(); + index_by_name.emplace(elem.name, data.size()); data.emplace_back(elem); } void Block::insert(ColumnWithTypeAndName && elem) { - index_by_name[elem.name] = data.size(); + index_by_name.emplace(elem.name, data.size()); data.emplace_back(std::move(elem)); } diff --git a/dbms/src/DataTypes/FieldToDataType.cpp b/dbms/src/DataTypes/FieldToDataType.cpp index 1b4fbd53c6b..3c2e78b4295 100644 --- a/dbms/src/DataTypes/FieldToDataType.cpp +++ b/dbms/src/DataTypes/FieldToDataType.cpp @@ -18,6 +18,7 @@ namespace DB namespace ErrorCodes { extern const int EMPTY_DATA_PASSED; + extern const int NOT_IMPLEMENTED; } @@ -34,6 +35,11 @@ DataTypePtr FieldToDataType::operator() (const UInt64 & x) const return std::make_shared(); } +DataTypePtr FieldToDataType::operator() (const UInt128 &) const +{ + throw Exception("There are no UInt128 literals in SQL", ErrorCodes::NOT_IMPLEMENTED); +} + DataTypePtr FieldToDataType::operator() (const Int64 & x) const { if (x <= std::numeric_limits::max() && x >= std::numeric_limits::min()) return std::make_shared(); diff --git a/dbms/src/DataTypes/FieldToDataType.h b/dbms/src/DataTypes/FieldToDataType.h index c6256a6f04b..a60c6a725d8 100644 --- a/dbms/src/DataTypes/FieldToDataType.h +++ b/dbms/src/DataTypes/FieldToDataType.h @@ -19,6 +19,7 @@ class FieldToDataType : public StaticVisitor public: DataTypePtr operator() (const Null & x) const; DataTypePtr operator() (const UInt64 & x) const; + DataTypePtr operator() (const UInt128 & x) const; DataTypePtr operator() (const Int64 & x) const; DataTypePtr operator() (const Float64 & x) const; DataTypePtr operator() (const String & x) const; diff --git a/dbms/src/Functions/CMakeLists.txt b/dbms/src/Functions/CMakeLists.txt index 81b6c46eb79..ef285659be2 100644 --- a/dbms/src/Functions/CMakeLists.txt +++ b/dbms/src/Functions/CMakeLists.txt @@ -41,7 +41,7 @@ generate_function_register(Array FunctionArrayEnumerate FunctionArrayEnumerateUniq FunctionArrayUniq - FunctionArrayDistinct + FunctionArrayDistinct FunctionEmptyArrayUInt8 FunctionEmptyArrayUInt16 FunctionEmptyArrayUInt32 @@ -91,7 +91,7 @@ list(REMOVE_ITEM clickhouse_functions_headers IFunction.h FunctionFactory.h Func add_library(clickhouse_functions ${clickhouse_functions_sources}) -target_link_libraries(clickhouse_functions PUBLIC dbms PRIVATE libconsistent-hashing ${FARMHASH_LIBRARIES} ${METROHASH_LIBRARIES} ${MURMURHASH2_LIBRARIES}) +target_link_libraries(clickhouse_functions PUBLIC dbms PRIVATE libconsistent-hashing ${FARMHASH_LIBRARIES} ${METROHASH_LIBRARIES} murmurhash) target_include_directories (clickhouse_functions SYSTEM BEFORE PUBLIC ${DIVIDE_INCLUDE_DIR}) diff --git a/dbms/src/IO/tests/CMakeLists.txt b/dbms/src/IO/tests/CMakeLists.txt index 7b427a6ae00..324baa8278c 100644 --- a/dbms/src/IO/tests/CMakeLists.txt +++ b/dbms/src/IO/tests/CMakeLists.txt @@ -60,7 +60,7 @@ add_check (hashing_read_buffer) add_executable (io_operators operators.cpp) target_link_libraries (io_operators clickhouse_common_io) -if (NOT APPLE AND NOT ARCH_FREEBSD) +if (OS_LINUX) add_executable(write_buffer_aio write_buffer_aio.cpp) target_link_libraries (write_buffer_aio clickhouse_common_io ${Boost_FILESYSTEM_LIBRARY}) diff --git a/dbms/src/Interpreters/CMakeLists.txt b/dbms/src/Interpreters/CMakeLists.txt index 6fb123d2677..a6c043ddf6c 100644 --- a/dbms/src/Interpreters/CMakeLists.txt +++ b/dbms/src/Interpreters/CMakeLists.txt @@ -1,5 +1,5 @@ -if (ARCH_FREEBSD) +if (OS_FREEBSD) set (PATH_SHARE "/usr/local/share" CACHE STRING "") else () set (PATH_SHARE "/usr/share" CACHE STRING "") diff --git a/dbms/src/Interpreters/Cluster.cpp b/dbms/src/Interpreters/Cluster.cpp index efa2ad60732..cd1a3a2da11 100644 --- a/dbms/src/Interpreters/Cluster.cpp +++ b/dbms/src/Interpreters/Cluster.cpp @@ -127,11 +127,7 @@ String Cluster::Address::toStringFull() const Clusters::Clusters(Poco::Util::AbstractConfiguration & config, const Settings & settings, const String & config_name) { - Poco::Util::AbstractConfiguration::Keys config_keys; - config.keys(config_name, config_keys); - - for (const auto & key : config_keys) - impl.emplace(key, std::make_shared(config, settings, config_name + "." + key)); + updateClusters(config, settings, config_name); } @@ -158,19 +154,9 @@ void Clusters::updateClusters(Poco::Util::AbstractConfiguration & config, const std::lock_guard lock(mutex); + impl.clear(); for (const auto & key : config_keys) - { - auto it = impl.find(key); - auto new_cluster = std::make_shared(config, settings, config_name + "." + key); - - if (it == impl.end()) - impl.emplace(key, std::move(new_cluster)); - else - { - //TODO: Check that cluster update is necessarily - it->second = std::move(new_cluster); - } - } + impl.emplace(key, std::make_shared(config, settings, config_name + "." + key)); } Clusters::Impl Clusters::getContainer() const diff --git a/dbms/src/Interpreters/ClusterProxy/DescribeStreamFactory.cpp b/dbms/src/Interpreters/ClusterProxy/DescribeStreamFactory.cpp deleted file mode 100644 index 2638399f8ff..00000000000 --- a/dbms/src/Interpreters/ClusterProxy/DescribeStreamFactory.cpp +++ /dev/null @@ -1,59 +0,0 @@ -#include -#include -#include -#include -#include - -namespace DB -{ - -namespace -{ - -BlockExtraInfo toBlockExtraInfo(const Cluster::Address & address) -{ - BlockExtraInfo block_extra_info; - block_extra_info.host = address.host_name; - block_extra_info.resolved_address = address.getResolvedAddress().toString(); - block_extra_info.port = address.port; - block_extra_info.user = address.user; - block_extra_info.is_valid = true; - return block_extra_info; -} - -} - -namespace ClusterProxy -{ - -void DescribeStreamFactory::createForShard( - const Cluster::ShardInfo & shard_info, - const String & query, const ASTPtr & query_ast, - const Context & context, const ThrottlerPtr & throttler, - BlockInputStreams & res) -{ - for (const Cluster::Address & local_address : shard_info.local_addresses) - { - InterpreterDescribeQuery interpreter{query_ast, context}; - BlockInputStreamPtr stream = interpreter.execute().in; - - /** Materialization is needed, since from remote servers the constants come materialized. - * If you do not do this, different types (Const and non-Const) columns will be produced in different threads, - * And this is not allowed, since all code is based on the assumption that in the block stream all types are the same. - */ - BlockInputStreamPtr materialized_stream = std::make_shared(stream); - res.emplace_back(std::make_shared(materialized_stream, toBlockExtraInfo(local_address))); - } - - if (shard_info.hasRemoteConnections()) - { - auto remote_stream = std::make_shared( - shard_info.pool, query, InterpreterDescribeQuery::getSampleBlock(), context, nullptr, throttler); - remote_stream->setPoolMode(PoolMode::GET_ALL); - remote_stream->appendExtraInfo(); - res.emplace_back(std::move(remote_stream)); - } -} - -} -} diff --git a/dbms/src/Interpreters/ClusterProxy/DescribeStreamFactory.h b/dbms/src/Interpreters/ClusterProxy/DescribeStreamFactory.h deleted file mode 100644 index 05befc59305..00000000000 --- a/dbms/src/Interpreters/ClusterProxy/DescribeStreamFactory.h +++ /dev/null @@ -1,23 +0,0 @@ -#pragma once - -#include - -namespace DB -{ - -namespace ClusterProxy -{ - -class DescribeStreamFactory final : public IStreamFactory -{ -public: - void createForShard( - const Cluster::ShardInfo & shard_info, - const String & query, const ASTPtr & query_ast, - const Context & context, const ThrottlerPtr & throttler, - BlockInputStreams & res) override; -}; - -} - -} diff --git a/dbms/src/Interpreters/ExpressionAnalyzer.cpp b/dbms/src/Interpreters/ExpressionAnalyzer.cpp index 82287ec3878..c17fb435f88 100644 --- a/dbms/src/Interpreters/ExpressionAnalyzer.cpp +++ b/dbms/src/Interpreters/ExpressionAnalyzer.cpp @@ -203,7 +203,7 @@ ExpressionAnalyzer::ExpressionAnalyzer( const SubqueriesForSets & subqueries_for_set_) : ast(ast_), context(context_), settings(context.getSettings()), subquery_depth(subquery_depth_), - source_columns(source_columns_), required_result_columns(required_result_columns_.begin(), required_result_columns_.end()), + source_columns(source_columns_), required_result_columns(required_result_columns_), storage(storage_), do_global(do_global_), subqueries_for_sets(subqueries_for_set_) { @@ -2847,7 +2847,8 @@ void ExpressionAnalyzer::appendProjectResult(ExpressionActionsChain & chain) con for (size_t i = 0; i < asts.size(); ++i) { String result_name = asts[i]->getAliasOrColumnName(); - if (required_result_columns.empty() || required_result_columns.count(result_name)) + if (required_result_columns.empty() + || std::find(required_result_columns.begin(), required_result_columns.end(), result_name) != required_result_columns.end()) { result_columns.emplace_back(asts[i]->getColumnName(), result_name); step.required_output.push_back(result_columns.back().second); @@ -3393,15 +3394,37 @@ void ExpressionAnalyzer::removeUnneededColumnsFromSelectClause() if (!select_query) return; - if (required_result_columns.empty() || select_query->distinct) + if (required_result_columns.empty()) return; ASTs & elements = select_query->select_expression_list->children; - elements.erase(std::remove_if(elements.begin(), elements.end(), [this](const auto & node) + ASTs new_elements; + new_elements.reserve(elements.size()); + + /// Some columns may be queried multiple times, like SELECT x, y, y FROM table. + /// In that case we keep them exactly same number of times. + std::map required_columns_with_duplicate_count; + for (const auto & name : required_result_columns) + ++required_columns_with_duplicate_count[name]; + + for (const auto & elem : elements) { - return !required_result_columns.count(node->getAliasOrColumnName()) && !hasArrayJoin(node); - }), elements.end()); + String name = elem->getAliasOrColumnName(); + + auto it = required_columns_with_duplicate_count.find(name); + if (required_columns_with_duplicate_count.end() != it && it->second) + { + new_elements.push_back(elem); + --it->second; + } + else if (select_query->distinct || hasArrayJoin(elem)) + { + new_elements.push_back(elem); + } + } + + elements = std::move(new_elements); } } diff --git a/dbms/src/Interpreters/ExpressionAnalyzer.h b/dbms/src/Interpreters/ExpressionAnalyzer.h index 084e91ce558..25a861a2123 100644 --- a/dbms/src/Interpreters/ExpressionAnalyzer.h +++ b/dbms/src/Interpreters/ExpressionAnalyzer.h @@ -204,7 +204,7 @@ private: /** If non-empty, ignore all expressions in not from this list. */ - NameSet required_result_columns; + Names required_result_columns; /// Columns after ARRAY JOIN, JOIN, and/or aggregation. NamesAndTypesList aggregated_columns; diff --git a/dbms/src/Interpreters/ExternalLoader.cpp b/dbms/src/Interpreters/ExternalLoader.cpp index fc25327558e..751af361d0f 100644 --- a/dbms/src/Interpreters/ExternalLoader.cpp +++ b/dbms/src/Interpreters/ExternalLoader.cpp @@ -228,6 +228,17 @@ void ExternalLoader::reloadFromConfigFiles(const bool throw_on_error, const bool throw; } } + + /// erase removed from config loadable objects + std::list removed_loadable_objects; + for (const auto & loadable : loadable_objects) + { + const auto & current_config = loadable_objects_defined_in_config[loadable.second.origin]; + if (current_config.find(loadable.first) == std::end(current_config)) + removed_loadable_objects.emplace_back(loadable.first); + } + for(const auto & name : removed_loadable_objects) + loadable_objects.erase(name); } void ExternalLoader::reloadFromConfigFile(const std::string & config_path, const bool throw_on_error, @@ -250,6 +261,8 @@ void ExternalLoader::reloadFromConfigFile(const std::string & config_path, const if (force_reload || last_modified > config_last_modified) { auto config = config_repository->load(config_path); + + loadable_objects_defined_in_config[config_path].clear(); /// Definitions of loadable objects may have changed, recreate all of them @@ -282,7 +295,8 @@ void ExternalLoader::reloadFromConfigFile(const std::string & config_path, const LOG_WARNING(log, config_path << ": " + config_settings.external_name + " name cannot be empty"); continue; } - + + loadable_objects_defined_in_config[config_path].emplace(name); if (!loadable_name.empty() && name != loadable_name) continue; diff --git a/dbms/src/Interpreters/ExternalLoader.h b/dbms/src/Interpreters/ExternalLoader.h index 47163ca60d6..76d6cfc6f96 100644 --- a/dbms/src/Interpreters/ExternalLoader.h +++ b/dbms/src/Interpreters/ExternalLoader.h @@ -5,6 +5,7 @@ #include #include #include +#include #include #include #include @@ -146,6 +147,8 @@ private: /// Both for loadable_objects and failed_loadable_objects. std::unordered_map update_times; + std::unordered_map> loadable_objects_defined_in_config; + pcg64 rnd_engine{randomSeed()}; const Configuration & config; @@ -166,8 +169,8 @@ private: /// Check objects definitions in config files and reload or/and add new ones if the definition is changed /// If loadable_name is not empty, load only loadable object with name loadable_name void reloadFromConfigFiles(bool throw_on_error, bool force_reload = false, const std::string & loadable_name = ""); - void reloadFromConfigFile(const std::string & config_path, bool throw_on_error, bool force_reload, - const std::string & loadable_name); + void reloadFromConfigFile(const std::string & config_path, const bool throw_on_error, + const bool force_reload, const std::string & loadable_name); /// Check config files and update expired loadable objects void reloadAndUpdate(bool throw_on_error = false); diff --git a/dbms/src/Interpreters/InterpreterCheckQuery.cpp b/dbms/src/Interpreters/InterpreterCheckQuery.cpp index 2657775919c..bb8a1d46143 100644 --- a/dbms/src/Interpreters/InterpreterCheckQuery.cpp +++ b/dbms/src/Interpreters/InterpreterCheckQuery.cpp @@ -1,92 +1,16 @@ #include #include +#include #include -#include #include -#include -#include #include -#include #include #include -#include -#include -#include namespace DB { -namespace ErrorCodes -{ - extern const int INVALID_BLOCK_EXTRA_INFO; - extern const int RECEIVED_EMPTY_DATA; -} - - -namespace -{ - -/// A helper structure for performing a response to a DESCRIBE TABLE query with a Distributed table. -/// Contains information about the local table that was retrieved from a single replica. -struct TableDescription -{ - TableDescription(const Block & block, const BlockExtraInfo & extra_info_) - : extra_info(extra_info_) - { - const auto & name_column = typeid_cast(*block.getByName("name").column); - const auto & type_column = typeid_cast(*block.getByName("type").column); - const auto & default_type_column = typeid_cast(*block.getByName("default_type").column); - const auto & default_expression_column = typeid_cast(*block.getByName("default_expression").column); - - size_t row_count = block.rows(); - - names_with_types.reserve(name_column.byteSize() + type_column.byteSize() + (3 * row_count)); - - SHA512_CTX ctx; - SHA512_Init(&ctx); - - bool is_first = true; - for (size_t i = 0; i < row_count; ++i) - { - const auto & name = name_column.getDataAt(i).toString(); - const auto & type = type_column.getDataAt(i).toString(); - const auto & default_type = default_type_column.getDataAt(i).toString(); - const auto & default_expression = default_expression_column.getDataAt(i).toString(); - - names_with_types.append(is_first ? "" : ", "); - names_with_types.append(name); - names_with_types.append(" "); - names_with_types.append(type); - - SHA512_Update(&ctx, reinterpret_cast(name.data()), name.size()); - SHA512_Update(&ctx, reinterpret_cast(type.data()), type.size()); - SHA512_Update(&ctx, reinterpret_cast(default_type.data()), default_type.size()); - SHA512_Update(&ctx, reinterpret_cast(default_expression.data()), default_expression.size()); - - is_first = false; - } - - SHA512_Final(hash.data(), &ctx); - } - - using Hash = std::array; - - BlockExtraInfo extra_info; - std::string names_with_types; - Hash hash; - UInt32 structure_class; -}; - -inline bool operator<(const TableDescription & lhs, const TableDescription & rhs) -{ - return lhs.hash < rhs.hash; -} - -using TableDescriptions = std::deque; - -} - InterpreterCheckQuery::InterpreterCheckQuery(const ASTPtr & query_ptr_, const Context & context_) : query_ptr(query_ptr_), context(context_) { @@ -101,120 +25,14 @@ BlockIO InterpreterCheckQuery::execute() StoragePtr table = context.getTable(database_name, table_name); - auto distributed_table = dynamic_cast(&*table); - if (distributed_table != nullptr) - { - /// For tables with the Distributed engine, the CHECK TABLE query sends a DESCRIBE TABLE request to all replicas. - /// The identity of the structures is checked (column names + column types + default types + expressions - /// by default) of the tables that the distributed table looks at. + auto column = ColumnUInt8::create(); + column->insert(UInt64(table->checkData())); + result = Block{{ std::move(column), std::make_shared(), "result" }}; - const auto & settings = context.getSettingsRef(); + BlockIO res; + res.in = std::make_shared(result); - BlockInputStreams streams = distributed_table->describe(context, settings); - streams[0] = std::make_shared>( - streams, nullptr, settings.max_distributed_connections); - streams.resize(1); - - auto stream_ptr = dynamic_cast(&*streams[0]); - if (stream_ptr == nullptr) - throw Exception("InterpreterCheckQuery: Internal error", ErrorCodes::LOGICAL_ERROR); - auto & stream = *stream_ptr; - - /// Get all data from the DESCRIBE TABLE queries. - - TableDescriptions table_descriptions; - - while (true) - { - if (stream.isCancelledOrThrowIfKilled()) - { - BlockIO res; - res.in = std::make_shared(result); - return res; - } - - Block block = stream.read(); - if (!block) - break; - - BlockExtraInfo info = stream.getBlockExtraInfo(); - if (!info.is_valid) - throw Exception("Received invalid block extra info", ErrorCodes::INVALID_BLOCK_EXTRA_INFO); - - table_descriptions.emplace_back(block, info); - } - - if (table_descriptions.empty()) - throw Exception("Received empty data", ErrorCodes::RECEIVED_EMPTY_DATA); - - /// Define an equivalence class for each table structure. - - std::sort(table_descriptions.begin(), table_descriptions.end()); - - UInt32 structure_class = 0; - - auto it = table_descriptions.begin(); - it->structure_class = structure_class; - - auto prev = it; - for (++it; it != table_descriptions.end(); ++it) - { - if (*prev < *it) - ++structure_class; - it->structure_class = structure_class; - prev = it; - } - - /// Construct the result. - - MutableColumnPtr status_column = ColumnUInt8::create(); - MutableColumnPtr host_name_column = ColumnString::create(); - MutableColumnPtr host_address_column = ColumnString::create(); - MutableColumnPtr port_column = ColumnUInt16::create(); - MutableColumnPtr user_column = ColumnString::create(); - MutableColumnPtr structure_class_column = ColumnUInt32::create(); - MutableColumnPtr structure_column = ColumnString::create(); - - /// This value is 1 if the structure is not disposed of anywhere, but 0 otherwise. - UInt8 status_value = (structure_class == 0) ? 1 : 0; - - for (const auto & desc : table_descriptions) - { - status_column->insert(static_cast(status_value)); - structure_class_column->insert(static_cast(desc.structure_class)); - host_name_column->insert(desc.extra_info.host); - host_address_column->insert(desc.extra_info.resolved_address); - port_column->insert(static_cast(desc.extra_info.port)); - user_column->insert(desc.extra_info.user); - structure_column->insert(desc.names_with_types); - } - - Block block; - - block.insert(ColumnWithTypeAndName(std::move(status_column), std::make_shared(), "status")); - block.insert(ColumnWithTypeAndName(std::move(host_name_column), std::make_shared(), "host_name")); - block.insert(ColumnWithTypeAndName(std::move(host_address_column), std::make_shared(), "host_address")); - block.insert(ColumnWithTypeAndName(std::move(port_column), std::make_shared(), "port")); - block.insert(ColumnWithTypeAndName(std::move(user_column), std::make_shared(), "user")); - block.insert(ColumnWithTypeAndName(std::move(structure_class_column), std::make_shared(), "structure_class")); - block.insert(ColumnWithTypeAndName(std::move(structure_column), std::make_shared(), "structure")); - - BlockIO res; - res.in = std::make_shared(block); - - return res; - } - else - { - auto column = ColumnUInt8::create(); - column->insert(UInt64(table->checkData())); - result = Block{{ std::move(column), std::make_shared(), "result" }}; - - BlockIO res; - res.in = std::make_shared(result); - - return res; - } + return res; } } diff --git a/dbms/src/Interpreters/InterpreterSelectWithUnionQuery.cpp b/dbms/src/Interpreters/InterpreterSelectWithUnionQuery.cpp index a76f4f5ba21..fa8d8bc2c86 100644 --- a/dbms/src/Interpreters/InterpreterSelectWithUnionQuery.cpp +++ b/dbms/src/Interpreters/InterpreterSelectWithUnionQuery.cpp @@ -11,6 +11,7 @@ #include #include + namespace DB { @@ -69,7 +70,11 @@ InterpreterSelectWithUnionQuery::InterpreterSelectWithUnionQuery( ast.list_of_selects->children.at(query_num), context, Names(), to_stage, subquery_depth, true).getSampleBlock(); if (full_result_header_for_current_select.columns() != full_result_header.columns()) - throw Exception("Different number of columns in UNION ALL elements", ErrorCodes::UNION_ALL_RESULT_STRUCTURES_MISMATCH); + throw Exception("Different number of columns in UNION ALL elements:\n" + + full_result_header.dumpNames() + + "\nand\n" + + full_result_header_for_current_select.dumpNames() + "\n", + ErrorCodes::UNION_ALL_RESULT_STRUCTURES_MISMATCH); required_result_column_names_for_other_selects[query_num].reserve(required_result_column_names.size()); for (const auto & pos : positions_of_required_result_columns) @@ -87,7 +92,7 @@ InterpreterSelectWithUnionQuery::InterpreterSelectWithUnionQuery( ast.list_of_selects->children.at(query_num), context, current_required_result_column_names, to_stage, subquery_depth, only_analyze)); } - /// Determine structure of result. + /// Determine structure of the result. if (num_selects == 1) { @@ -104,7 +109,11 @@ InterpreterSelectWithUnionQuery::InterpreterSelectWithUnionQuery( for (size_t query_num = 1; query_num < num_selects; ++query_num) if (headers[query_num].columns() != num_columns) - throw Exception("Different number of columns in UNION ALL elements", ErrorCodes::UNION_ALL_RESULT_STRUCTURES_MISMATCH); + throw Exception("Different number of columns in UNION ALL elements:\n" + + result_header.dumpNames() + + "\nand\n" + + headers[query_num].dumpNames() + "\n", + ErrorCodes::UNION_ALL_RESULT_STRUCTURES_MISMATCH); for (size_t column_num = 0; column_num < num_columns; ++column_num) { diff --git a/dbms/src/Storages/Kafka/KafkaSettings.cpp b/dbms/src/Storages/Kafka/KafkaSettings.cpp new file mode 100644 index 00000000000..be6c3b11b05 --- /dev/null +++ b/dbms/src/Storages/Kafka/KafkaSettings.cpp @@ -0,0 +1,44 @@ +#include +#if USE_RDKAFKA + +#include +#include +#include + + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int BAD_ARGUMENTS; +} + +void KafkaSettings::loadFromQuery(ASTStorage & storage_def) +{ + if (storage_def.settings) + { + for (const ASTSetQuery::Change & setting : storage_def.settings->changes) + { +#define SET(TYPE, NAME, DEFAULT, DESCRIPTION) \ + else if (setting.name == #NAME) NAME.set(setting.value); + + if (false) {} + APPLY_FOR_KAFKA_SETTINGS(SET) + else + throw Exception( + "Unknown setting " + setting.name + " for storage " + storage_def.engine->name, + ErrorCodes::BAD_ARGUMENTS); +#undef SET + } + } + else + { + auto settings_ast = std::make_shared(); + settings_ast->is_standalone = false; + storage_def.set(storage_def.settings, settings_ast); + } +} + +} +#endif diff --git a/dbms/src/Storages/Kafka/KafkaSettings.h b/dbms/src/Storages/Kafka/KafkaSettings.h new file mode 100644 index 00000000000..bd7a5cc0bbb --- /dev/null +++ b/dbms/src/Storages/Kafka/KafkaSettings.h @@ -0,0 +1,43 @@ +#pragma once +#include +#if USE_RDKAFKA + +#include +#include +#include +#include + + +namespace DB +{ + +class ASTStorage; + +/** Settings for the Kafka engine. + * Could be loaded from a CREATE TABLE query (SETTINGS clause). + */ +struct KafkaSettings +{ + +#define APPLY_FOR_KAFKA_SETTINGS(M) \ + M(SettingString, kafka_broker_list, "", "A comma-separated list of brokers for Kafka engine.") \ + M(SettingString, kafka_topic_list, "", "A list of Kafka topics.") \ + M(SettingString, kafka_group_name, "", "A group of Kafka consumers.") \ + M(SettingString, kafka_format, "", "Message format for Kafka engine.") \ + M(SettingChar, kafka_row_delimiter, '\0', "The character to be considered as a delimiter in Kafka message.") \ + M(SettingString, kafka_schema, "", "Schema identifier (used by schema-based formats) for Kafka engine") \ + M(SettingUInt64, kafka_num_consumers, 1, "The number of consumers per table for Kafka engine.") + +#define DECLARE(TYPE, NAME, DEFAULT, DESCRIPTION) \ + TYPE NAME {DEFAULT}; + + APPLY_FOR_KAFKA_SETTINGS(DECLARE) + +#undef DECLARE + +public: + void loadFromQuery(ASTStorage & storage_def); +}; + +} +#endif diff --git a/dbms/src/Storages/StorageKafka.cpp b/dbms/src/Storages/Kafka/StorageKafka.cpp similarity index 78% rename from dbms/src/Storages/StorageKafka.cpp rename to dbms/src/Storages/Kafka/StorageKafka.cpp index 43ed4e3b63d..d43996e65b6 100644 --- a/dbms/src/Storages/StorageKafka.cpp +++ b/dbms/src/Storages/Kafka/StorageKafka.cpp @@ -23,7 +23,9 @@ #include #include #include -#include // Y_IGNORE +#include +#include +#include // Y_IGNORE #include #include #include @@ -566,93 +568,200 @@ void registerStorageKafka(StorageFactory & factory) factory.registerStorage("Kafka", [](const StorageFactory::Arguments & args) { ASTs & engine_args = args.engine_args; + size_t args_count = engine_args.size(); + bool has_settings = args.storage_def->settings; + + KafkaSettings kafka_settings; + if (has_settings) + { + kafka_settings.loadFromQuery(*args.storage_def); + } /** Arguments of engine is following: * - Kafka broker list * - List of topics * - Group ID (may be a constaint expression with a string result) * - Message format (string) + * - Row delimiter * - Schema (optional, if the format supports it) + * - Number of consumers */ - if (engine_args.size() < 3 || engine_args.size() > 7) - throw Exception( - "Storage Kafka requires 3-7 parameters" - " - Kafka broker list, list of topics to consume, consumer group ID, message format, row delimiter, schema, number of consumers", - ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); + // Check arguments and settings + #define CHECK_KAFKA_STORAGE_ARGUMENT(ARG_NUM, PAR_NAME) \ + /* One of the four required arguments is not specified */ \ + if (args_count < ARG_NUM && ARG_NUM <= 4 && \ + !kafka_settings.PAR_NAME.changed) \ + { \ + throw Exception( \ + "Required parameter '" #PAR_NAME "' " \ + "for storage Kafka not specified", \ + ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); \ + } \ + /* The same argument is given in two places */ \ + if (has_settings && \ + kafka_settings.PAR_NAME.changed && \ + args_count >= ARG_NUM) \ + { \ + throw Exception( \ + "The argument №" #ARG_NUM " of storage Kafka " \ + "and the parameter '" #PAR_NAME "' " \ + "in SETTINGS cannot be specified at the same time", \ + ErrorCodes::BAD_ARGUMENTS); \ + } + CHECK_KAFKA_STORAGE_ARGUMENT(1, kafka_broker_list) + CHECK_KAFKA_STORAGE_ARGUMENT(2, kafka_topic_list) + CHECK_KAFKA_STORAGE_ARGUMENT(3, kafka_group_name) + CHECK_KAFKA_STORAGE_ARGUMENT(4, kafka_format) + CHECK_KAFKA_STORAGE_ARGUMENT(5, kafka_row_delimiter) + CHECK_KAFKA_STORAGE_ARGUMENT(6, kafka_schema) + CHECK_KAFKA_STORAGE_ARGUMENT(7, kafka_num_consumers) + #undef CHECK_KAFKA_STORAGE_ARGUMENT + + // Get and check broker list String brokers; - auto ast = typeid_cast(engine_args[0].get()); - if (ast && ast->value.getType() == Field::Types::String) - brokers = safeGet(ast->value); - else - throw Exception(String("Kafka broker list must be a string"), ErrorCodes::BAD_ARGUMENTS); + if (args_count >= 1) + { + auto ast = typeid_cast(engine_args[0].get()); + if (ast && ast->value.getType() == Field::Types::String) + { + brokers = safeGet(ast->value); + } + else + { + throw Exception(String("Kafka broker list must be a string"), ErrorCodes::BAD_ARGUMENTS); + } + } + else if (kafka_settings.kafka_broker_list.changed) + { + brokers = kafka_settings.kafka_broker_list.value; + } - engine_args[1] = evaluateConstantExpressionAsLiteral(engine_args[1], args.local_context); - engine_args[2] = evaluateConstantExpressionOrIdentifierAsLiteral(engine_args[2], args.local_context); - engine_args[3] = evaluateConstantExpressionOrIdentifierAsLiteral(engine_args[3], args.local_context); + // Get and check topic list + String topic_list; + if (args_count >= 2) + { + engine_args[1] = evaluateConstantExpressionAsLiteral(engine_args[1], args.local_context); + topic_list = static_cast(*engine_args[1]).value.safeGet(); + } + else if (kafka_settings.kafka_topic_list.changed) + { + topic_list = kafka_settings.kafka_topic_list.value; + } + Names topics; + boost::split(topics, topic_list , [](char c){ return c == ','; }); + for (String & topic : topics) + { + boost::trim(topic); + } + + // Get and check group name + String group; + if (args_count >= 3) + { + engine_args[2] = evaluateConstantExpressionOrIdentifierAsLiteral(engine_args[2], args.local_context); + group = static_cast(*engine_args[2]).value.safeGet(); + } + else if (kafka_settings.kafka_group_name.changed) + { + group = kafka_settings.kafka_group_name.value; + } + + // Get and check message format name + String format; + if (args_count >= 4) + { + engine_args[3] = evaluateConstantExpressionOrIdentifierAsLiteral(engine_args[3], args.local_context); + + auto ast = typeid_cast(engine_args[3].get()); + if (ast && ast->value.getType() == Field::Types::String) + { + format = safeGet(ast->value); + } + else + { + throw Exception("Format must be a string", ErrorCodes::BAD_ARGUMENTS); + } + } + else if (kafka_settings.kafka_format.changed) + { + format = kafka_settings.kafka_format.value; + } // Parse row delimiter (optional) char row_delimiter = '\0'; - if (engine_args.size() >= 5) + if (args_count >= 5) { engine_args[4] = evaluateConstantExpressionOrIdentifierAsLiteral(engine_args[4], args.local_context); auto ast = typeid_cast(engine_args[4].get()); String arg; if (ast && ast->value.getType() == Field::Types::String) + { arg = safeGet(ast->value); + } else + { throw Exception("Row delimiter must be a char", ErrorCodes::BAD_ARGUMENTS); + } if (arg.size() > 1) + { throw Exception("Row delimiter must be a char", ErrorCodes::BAD_ARGUMENTS); + } else if (arg.size() == 0) + { row_delimiter = '\0'; + } else + { row_delimiter = arg[0]; + } + } + else if (kafka_settings.kafka_row_delimiter.changed) + { + row_delimiter = kafka_settings.kafka_row_delimiter.value; } // Parse format schema if supported (optional) String schema; - if (engine_args.size() >= 6) + if (args_count >= 6) { engine_args[5] = evaluateConstantExpressionOrIdentifierAsLiteral(engine_args[4], args.local_context); auto ast = typeid_cast(engine_args[5].get()); if (ast && ast->value.getType() == Field::Types::String) + { schema = safeGet(ast->value); + } else + { throw Exception("Format schema must be a string", ErrorCodes::BAD_ARGUMENTS); + } + } + else if (kafka_settings.kafka_schema.changed) + { + schema = kafka_settings.kafka_schema.value; } // Parse number of consumers (optional) UInt64 num_consumers = 1; - if (engine_args.size() >= 7) + if (args_count >= 7) { auto ast = typeid_cast(engine_args[6].get()); if (ast && ast->value.getType() == Field::Types::UInt64) + { num_consumers = safeGet(ast->value); + } else + { throw Exception("Number of consumers must be a positive integer", ErrorCodes::BAD_ARGUMENTS); + } + } + else if (kafka_settings.kafka_num_consumers.changed) + { + num_consumers = kafka_settings.kafka_num_consumers.value; } - - // Parse topic list - Names topics; - String topic_arg = static_cast(*engine_args[1]).value.safeGet(); - boost::split(topics, topic_arg , [](char c){ return c == ','; }); - for(String & topic : topics) - boost::trim(topic); - - // Parse consumer group - String group = static_cast(*engine_args[2]).value.safeGet(); - - // Parse format from string - String format; - ast = typeid_cast(engine_args[3].get()); - if (ast && ast->value.getType() == Field::Types::String) - format = safeGet(ast->value); - else - throw Exception("Format must be a string", ErrorCodes::BAD_ARGUMENTS); return StorageKafka::create( args.table_name, args.database_name, args.context, args.columns, diff --git a/dbms/src/Storages/StorageKafka.h b/dbms/src/Storages/Kafka/StorageKafka.h similarity index 100% rename from dbms/src/Storages/StorageKafka.h rename to dbms/src/Storages/Kafka/StorageKafka.h diff --git a/dbms/src/Storages/MergeTree/MergeTreeData.cpp b/dbms/src/Storages/MergeTree/MergeTreeData.cpp index 9c2edfb1ed5..e7447f07b09 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeData.cpp +++ b/dbms/src/Storages/MergeTree/MergeTreeData.cpp @@ -1784,6 +1784,21 @@ size_t MergeTreeData::getMaxPartsCountForPartition() const } +std::optional MergeTreeData::getMinPartDataVersion() const +{ + std::lock_guard lock(data_parts_mutex); + + std::optional result; + for (const DataPartPtr & part : getDataPartsStateRange(DataPartState::Committed)) + { + if (!result || *result > part->info.getDataVersion()) + result = part->info.getDataVersion(); + } + + return result; +} + + void MergeTreeData::delayInsertOrThrowIfNeeded(Poco::Event *until) const { const size_t parts_count = getMaxPartsCountForPartition(); diff --git a/dbms/src/Storages/MergeTree/MergeTreeData.h b/dbms/src/Storages/MergeTree/MergeTreeData.h index 0681a1f317c..102378861fe 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeData.h +++ b/dbms/src/Storages/MergeTree/MergeTreeData.h @@ -384,6 +384,10 @@ public: size_t getMaxPartsCountForPartition() const; + /// Get min value of part->info.getDataVersion() for all active parts. + /// Makes sense only for ordinary MergeTree engines because for them block numbering doesn't depend on partition. + std::optional getMinPartDataVersion() const; + /// If the table contains too many active parts, sleep for a while to give them time to merge. /// If until is non-null, wake up from the sleep earlier if the event happened. void delayInsertOrThrowIfNeeded(Poco::Event * until = nullptr) const; diff --git a/dbms/src/Storages/MergeTree/MergeTreeMutationEntry.h b/dbms/src/Storages/MergeTree/MergeTreeMutationEntry.h index 68d00b03e1d..95a6e32d204 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeMutationEntry.h +++ b/dbms/src/Storages/MergeTree/MergeTreeMutationEntry.h @@ -21,6 +21,8 @@ struct MergeTreeMutationEntry /// Create a new entry and write it to a temporary file. MergeTreeMutationEntry(MutationCommands commands_, const String & path_prefix_, Int64 tmp_number); + MergeTreeMutationEntry(const MergeTreeMutationEntry &) = delete; + MergeTreeMutationEntry(MergeTreeMutationEntry &&) = default; /// Commit entry and rename it to a permanent file. void commit(Int64 block_number_); diff --git a/dbms/src/Storages/MergeTree/MergeTreeSettings.h b/dbms/src/Storages/MergeTree/MergeTreeSettings.h index 43276a6dd34..fad54a9bb57 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeSettings.h +++ b/dbms/src/Storages/MergeTree/MergeTreeSettings.h @@ -139,7 +139,11 @@ struct MergeTreeSettings * instead of ordinary ones (dozens KB). \ * Before enabling check that all replicas support new format. \ */ \ - M(SettingBool, use_minimalistic_checksums_in_zookeeper, true) + M(SettingBool, use_minimalistic_checksums_in_zookeeper, true) \ + \ + /** How many records about mutations that are done to keep. \ + * If zero, then keep all of them */ \ + M(SettingUInt64, finished_mutations_to_keep, 100) /// Settings that should not change after the creation of a table. #define APPLY_FOR_IMMUTABLE_MERGE_TREE_SETTINGS(M) \ diff --git a/dbms/src/Storages/MergeTree/ReplicatedMergeTreeCleanupThread.cpp b/dbms/src/Storages/MergeTree/ReplicatedMergeTreeCleanupThread.cpp index 0d1d7d9a51f..a4bc98df293 100644 --- a/dbms/src/Storages/MergeTree/ReplicatedMergeTreeCleanupThread.cpp +++ b/dbms/src/Storages/MergeTree/ReplicatedMergeTreeCleanupThread.cpp @@ -59,6 +59,7 @@ void ReplicatedMergeTreeCleanupThread::iterate() { clearOldLogs(); clearOldBlocks(); + clearOldMutations(); } } @@ -236,4 +237,63 @@ void ReplicatedMergeTreeCleanupThread::getBlocksSortedByTime(zkutil::ZooKeeper & std::sort(timed_blocks.begin(), timed_blocks.end(), NodeWithStat::greaterByTime); } + +void ReplicatedMergeTreeCleanupThread::clearOldMutations() +{ + if (!storage.data.settings.finished_mutations_to_keep) + return; + + if (storage.queue.countFinishedMutations() <= storage.data.settings.finished_mutations_to_keep) + { + /// Not strictly necessary, but helps to avoid unnecessary ZooKeeper requests. + /// If even this replica hasn't finished enough mutations yet, then we don't need to clean anything. + return; + } + + auto zookeeper = storage.getZooKeeper(); + + zkutil::Stat replicas_stat; + Strings replicas = zookeeper->getChildren(storage.zookeeper_path + "/replicas", &replicas_stat); + + UInt64 min_pointer = std::numeric_limits::max(); + for (const String & replica : replicas) + { + String pointer; + zookeeper->tryGet(storage.zookeeper_path + "/replicas/" + replica + "/mutation_pointer", pointer); + if (pointer.empty()) + return; /// One replica hasn't done anything yet so we can't delete any mutations. + min_pointer = std::min(parse(pointer), min_pointer); + } + + Strings entries = zookeeper->getChildren(storage.zookeeper_path + "/mutations"); + std::sort(entries.begin(), entries.end()); + + /// Do not remove entries that are greater than `min_pointer` (they are not done yet). + entries.erase(std::upper_bound(entries.begin(), entries.end(), padIndex(min_pointer)), entries.end()); + /// Do not remove last `storage.data.settings.finished_mutations_to_keep` entries. + if (entries.size() <= storage.data.settings.finished_mutations_to_keep) + return; + entries.erase(entries.end() - storage.data.settings.finished_mutations_to_keep, entries.end()); + + if (entries.empty()) + return; + + zkutil::Requests ops; + size_t batch_start_i = 0; + for (size_t i = 0; i < entries.size(); ++i) + { + ops.emplace_back(zkutil::makeRemoveRequest(storage.zookeeper_path + "/mutations/" + entries[i], -1)); + + if (ops.size() > 4 * zkutil::MULTI_BATCH_SIZE || i + 1 == entries.size()) + { + /// Simultaneously with clearing the log, we check to see if replica was added since we received replicas list. + ops.emplace_back(zkutil::makeCheckRequest(storage.zookeeper_path + "/replicas", replicas_stat.version)); + zookeeper->multi(ops); + LOG_DEBUG(log, "Removed " << (i + 1 - batch_start_i) << " old mutation entries: " << entries[batch_start_i] << " - " << entries[i]); + batch_start_i = i + 1; + ops.clear(); + } + } +} + } diff --git a/dbms/src/Storages/MergeTree/ReplicatedMergeTreeCleanupThread.h b/dbms/src/Storages/MergeTree/ReplicatedMergeTreeCleanupThread.h index cdf87357e3b..2223a42717d 100644 --- a/dbms/src/Storages/MergeTree/ReplicatedMergeTreeCleanupThread.h +++ b/dbms/src/Storages/MergeTree/ReplicatedMergeTreeCleanupThread.h @@ -50,6 +50,9 @@ private: /// Remove old block hashes from ZooKeeper. This is done by the leader replica. void clearOldBlocks(); + /// Remove old mutations that are done from ZooKeeper. This is done by the leader replica. + void clearOldMutations(); + using NodeCTimeCache = std::map; NodeCTimeCache cached_block_stats; diff --git a/dbms/src/Storages/MergeTree/ReplicatedMergeTreeQueue.cpp b/dbms/src/Storages/MergeTree/ReplicatedMergeTreeQueue.cpp index f5c91b9c94d..31e52c042aa 100644 --- a/dbms/src/Storages/MergeTree/ReplicatedMergeTreeQueue.cpp +++ b/dbms/src/Storages/MergeTree/ReplicatedMergeTreeQueue.cpp @@ -1031,6 +1031,45 @@ bool ReplicatedMergeTreeQueue::processEntry( } +size_t ReplicatedMergeTreeQueue::countMergesAndPartMutations() const +{ + std::lock_guard lock(state_mutex); + + size_t count = 0; + for (const auto & entry : queue) + if (entry->type == ReplicatedMergeTreeLogEntry::MERGE_PARTS + || entry->type == ReplicatedMergeTreeLogEntry::MUTATE_PART) + ++count; + + return count; +} + + +size_t ReplicatedMergeTreeQueue::countMutations() const +{ + std::lock_guard lock(state_mutex); + return mutations_by_znode.size(); +} + + +size_t ReplicatedMergeTreeQueue::countFinishedMutations() const +{ + std::lock_guard lock(state_mutex); + + size_t count = 0; + for (const auto & pair : mutations_by_znode) + { + const auto & mutation = pair.second; + if (!mutation.is_done) + break; + + ++count; + } + + return count; +} + + ReplicatedMergeTreeMergePredicate ReplicatedMergeTreeQueue::getMergePredicate(zkutil::ZooKeeperPtr & zookeeper) { return ReplicatedMergeTreeMergePredicate(*this, zookeeper); @@ -1124,6 +1163,8 @@ bool ReplicatedMergeTreeQueue::tryFinalizeMutations(zkutil::ZooKeeperPtr zookeep { std::lock_guard lock(state_mutex); + mutation_pointer = finished.back()->znode_name; + for (const ReplicatedMergeTreeMutationEntry * entry : finished) { auto it = mutations_by_znode.find(entry->znode_name); @@ -1476,27 +1517,6 @@ bool ReplicatedMergeTreeMergePredicate::operator()( } -size_t ReplicatedMergeTreeMergePredicate::countMergesAndPartMutations() const -{ - std::lock_guard lock(queue.state_mutex); - - size_t count = 0; - for (const auto & entry : queue.queue) - if (entry->type == ReplicatedMergeTreeLogEntry::MERGE_PARTS - || entry->type == ReplicatedMergeTreeLogEntry::MUTATE_PART) - ++count; - - return count; -} - - -size_t ReplicatedMergeTreeMergePredicate::countMutations() const -{ - std::lock_guard lock(queue.state_mutex); - return queue.mutations_by_znode.size(); -} - - std::optional ReplicatedMergeTreeMergePredicate::getDesiredMutationVersion(const MergeTreeData::DataPartPtr & part) const { /// Assigning mutations is easier than assigning merges because mutations appear in the same order as diff --git a/dbms/src/Storages/MergeTree/ReplicatedMergeTreeQueue.h b/dbms/src/Storages/MergeTree/ReplicatedMergeTreeQueue.h index 2e642ad148c..be586fffe74 100644 --- a/dbms/src/Storages/MergeTree/ReplicatedMergeTreeQueue.h +++ b/dbms/src/Storages/MergeTree/ReplicatedMergeTreeQueue.h @@ -275,6 +275,15 @@ public: */ bool processEntry(std::function get_zookeeper, LogEntryPtr & entry, const std::function func); + /// Count the number of merges and mutations of single parts in the queue. + size_t countMergesAndPartMutations() const; + + /// Count the total number of active mutations. + size_t countMutations() const; + + /// Count the total number of active mutations that are finished (is_done = true). + size_t countFinishedMutations() const; + ReplicatedMergeTreeMergePredicate getMergePredicate(zkutil::ZooKeeperPtr & zookeeper); /// Return the version (block number) of the last mutation that we don't need to apply to the part @@ -345,12 +354,6 @@ public: const MergeTreeData::DataPartPtr & left, const MergeTreeData::DataPartPtr & right, String * out_reason = nullptr) const; - /// Count the number of merges and mutations of single parts in the queue. - size_t countMergesAndPartMutations() const; - - /// Count the total number of active mutations. - size_t countMutations() const; - /// Return nonempty optional if the part can and should be mutated. /// Returned mutation version number is always the biggest possible. std::optional getDesiredMutationVersion(const MergeTreeData::DataPartPtr & part) const; diff --git a/dbms/src/Storages/StorageDistributed.cpp b/dbms/src/Storages/StorageDistributed.cpp index a3b2e7f31f8..8d1887bb1bc 100644 --- a/dbms/src/Storages/StorageDistributed.cpp +++ b/dbms/src/Storages/StorageDistributed.cpp @@ -30,7 +30,6 @@ #include #include #include -#include #include #include @@ -245,7 +244,6 @@ BlockInputStreams StorageDistributed::read( ? QueryProcessingStage::Complete : QueryProcessingStage::WithMergeableState; - const auto & modified_query_ast = rewriteSelectQuery( query_info.query, remote_database, remote_table, remote_table_function_ptr); @@ -318,34 +316,6 @@ void StorageDistributed::shutdown() } -BlockInputStreams StorageDistributed::describe(const Context & context, const Settings & settings) -{ - /// Create DESCRIBE TABLE query. - auto cluster = getCluster(); - - auto describe_query = std::make_shared(); - - std::string name = remote_database + '.' + remote_table; - - auto id = std::make_shared(name); - - auto desc_database = std::make_shared(remote_database); - auto desc_table = std::make_shared(remote_table); - - id->children.push_back(desc_database); - id->children.push_back(desc_table); - - auto table_expression = std::make_shared(); - table_expression->database_and_table_name = id; - - describe_query->table_expression = table_expression; - - ClusterProxy::DescribeStreamFactory describe_stream_factory; - - return ClusterProxy::executeQuery( - describe_stream_factory, cluster, describe_query, context, settings); -} - void StorageDistributed::truncate(const ASTPtr &) { std::lock_guard lock(cluster_nodes_mutex); diff --git a/dbms/src/Storages/StorageDistributed.h b/dbms/src/Storages/StorageDistributed.h index c4367b5a064..fdb08c31c00 100644 --- a/dbms/src/Storages/StorageDistributed.h +++ b/dbms/src/Storages/StorageDistributed.h @@ -85,9 +85,6 @@ public: String getDataPath() const override { return path; } - /// From each replica, get a description of the corresponding local table. - BlockInputStreams describe(const Context & context, const Settings & settings); - const ExpressionActionsPtr & getShardingKeyExpr() const { return sharding_key_expr; } const String & getShardingKeyColumnName() const { return sharding_key_column_name; } size_t getShardCount() const; diff --git a/dbms/src/Storages/StorageFactory.cpp b/dbms/src/Storages/StorageFactory.cpp index d56e7ee4d80..9ceb59abbcb 100644 --- a/dbms/src/Storages/StorageFactory.cpp +++ b/dbms/src/Storages/StorageFactory.cpp @@ -87,11 +87,19 @@ StoragePtr StorageFactory::get( name = engine_def.name; - if ((storage_def->partition_by || storage_def->order_by || storage_def->sample_by || storage_def->settings) + if (storage_def->settings && !endsWith(name, "MergeTree") && name != "Kafka") + { + throw Exception( + "Engine " + name + " doesn't support SETTINGS clause. " + "Currently only the MergeTree family of engines and Kafka engine supports it", + ErrorCodes::BAD_ARGUMENTS); + } + + if ((storage_def->partition_by || storage_def->order_by || storage_def->sample_by) && !endsWith(name, "MergeTree")) { throw Exception( - "Engine " + name + " doesn't support PARTITION BY, ORDER BY, SAMPLE BY or SETTINGS clauses. " + "Engine " + name + " doesn't support PARTITION BY, ORDER BY or SAMPLE BY clauses. " "Currently only the MergeTree family of engines supports them", ErrorCodes::BAD_ARGUMENTS); } diff --git a/dbms/src/Storages/StorageMergeTree.cpp b/dbms/src/Storages/StorageMergeTree.cpp index 42774b06f1d..39678f168c1 100644 --- a/dbms/src/Storages/StorageMergeTree.cpp +++ b/dbms/src/Storages/StorageMergeTree.cpp @@ -599,6 +599,7 @@ bool StorageMergeTree::backgroundTask() { data.clearOldPartsFromFilesystem(); data.clearOldTemporaryDirectories(); + clearOldMutations(); } size_t aio_threshold = context.getSettings().min_bytes_to_use_direct_io; @@ -631,6 +632,46 @@ Int64 StorageMergeTree::getCurrentMutationVersion( return it->first; }; +void StorageMergeTree::clearOldMutations() +{ + if (!data.settings.finished_mutations_to_keep) + return; + + std::vector mutations_to_delete; + { + std::lock_guard lock(currently_merging_mutex); + + if (current_mutations_by_version.size() <= data.settings.finished_mutations_to_keep) + return; + + auto begin_it = current_mutations_by_version.begin(); + + std::optional min_version = data.getMinPartDataVersion(); + auto end_it = current_mutations_by_version.end(); + if (min_version) + end_it = current_mutations_by_version.upper_bound(*min_version); + + size_t done_count = std::distance(begin_it, end_it); + if (done_count <= data.settings.finished_mutations_to_keep) + return; + + size_t to_delete_count = done_count - data.settings.finished_mutations_to_keep; + + auto it = begin_it; + for (size_t i = 0; i < to_delete_count; ++i) + { + mutations_to_delete.push_back(std::move(it->second)); + it = current_mutations_by_version.erase(it); + } + } + + for (auto & mutation : mutations_to_delete) + { + LOG_TRACE(log, "Removing mutation: " << mutation.file_name); + mutation.removeFile(); + } +} + void StorageMergeTree::clearColumnInPartition(const ASTPtr & partition, const Field & column_name, const Context & context) { diff --git a/dbms/src/Storages/StorageMergeTree.h b/dbms/src/Storages/StorageMergeTree.h index 80bc7b421ac..9c9f591a9de 100644 --- a/dbms/src/Storages/StorageMergeTree.h +++ b/dbms/src/Storages/StorageMergeTree.h @@ -140,6 +140,8 @@ private: const MergeTreeData::DataPartPtr & part, std::lock_guard & /* currently_merging_mutex_lock */) const; + void clearOldMutations(); + friend class MergeTreeBlockOutputStream; friend class MergeTreeData; friend struct CurrentlyMergingPartsTagger; diff --git a/dbms/src/Storages/StorageReplicatedMergeTree.cpp b/dbms/src/Storages/StorageReplicatedMergeTree.cpp index e5e6d41133c..f997fb27bde 100644 --- a/dbms/src/Storages/StorageReplicatedMergeTree.cpp +++ b/dbms/src/Storages/StorageReplicatedMergeTree.cpp @@ -2197,7 +2197,7 @@ void StorageReplicatedMergeTree::mergeSelectingTask() /// If many merges is already queued, then will queue only small enough merges. /// Otherwise merge queue could be filled with only large merges, /// and in the same time, many small parts could be created and won't be merged. - size_t merges_and_mutations_queued = merge_pred.countMergesAndPartMutations(); + size_t merges_and_mutations_queued = queue.countMergesAndPartMutations(); if (merges_and_mutations_queued >= data.settings.max_replicated_merges_in_queue) { LOG_TRACE(log, "Number of queued merges and part mutations (" << merges_and_mutations_queued @@ -2216,7 +2216,7 @@ void StorageReplicatedMergeTree::mergeSelectingTask() { success = createLogEntryToMergeParts(zookeeper, future_merged_part.parts, future_merged_part.name, deduplicate); } - else if (merge_pred.countMutations() > 0) + else if (queue.countMutations() > 0) { /// Choose a part to mutate. diff --git a/dbms/tests/integration/test_storage_kafka/test.py b/dbms/tests/integration/test_storage_kafka/test.py index 85d6f090d58..41076ac78c4 100644 --- a/dbms/tests/integration/test_storage_kafka/test.py +++ b/dbms/tests/integration/test_storage_kafka/test.py @@ -1,6 +1,5 @@ import os.path as p import time -import datetime import pytest from helpers.cluster import ClickHouseCluster @@ -10,9 +9,11 @@ import json import subprocess - cluster = ClickHouseCluster(__file__) -instance = cluster.add_instance('instance', main_configs=['configs/kafka.xml'], with_kafka = True) +instance = cluster.add_instance('instance', + main_configs=['configs/kafka.xml'], + with_kafka=True) + @pytest.fixture(scope="module") def started_cluster(): @@ -25,23 +26,36 @@ def started_cluster(): finally: cluster.shutdown() + def kafka_is_available(started_cluster): - p = subprocess.Popen(('docker', 'exec', '-i', started_cluster.kafka_docker_id, '/usr/bin/kafka-broker-api-versions', '--bootstrap-server', 'PLAINTEXT://localhost:9092'), stdout=subprocess.PIPE) - streamdata = p.communicate()[0] + p = subprocess.Popen(('docker', + 'exec', + '-i', + started_cluster.kafka_docker_id, + '/usr/bin/kafka-broker-api-versions', + '--bootstrap-server', + 'PLAINTEXT://localhost:9092'), + stdout=subprocess.PIPE) + p.communicate()[0] return p.returncode == 0 + def kafka_produce(started_cluster, topic, messages): - p = subprocess.Popen(('docker', 'exec', '-i', started_cluster.kafka_docker_id, '/usr/bin/kafka-console-producer', '--broker-list', 'localhost:9092', '--topic', topic), stdin=subprocess.PIPE) + p = subprocess.Popen(('docker', + 'exec', + '-i', + started_cluster.kafka_docker_id, + '/usr/bin/kafka-console-producer', + '--broker-list', + 'localhost:9092', + '--topic', + topic), + stdin=subprocess.PIPE) p.communicate(messages) p.stdin.close() -def test_kafka_json(started_cluster): - instance.query(''' -DROP TABLE IF EXISTS test.kafka; -CREATE TABLE test.kafka (key UInt64, value UInt64) - ENGINE = Kafka('kafka1:9092', 'json', 'json', 'JSONEachRow', '\\n'); -''') +def kafka_check_json_numbers(instance): retries = 0 while True: if kafka_is_available(started_cluster): @@ -58,10 +72,38 @@ CREATE TABLE test.kafka (key UInt64, value UInt64) kafka_produce(started_cluster, 'json', messages) time.sleep(3) result = instance.query('SELECT * FROM test.kafka;') - with open(p.join(p.dirname(__file__), 'test_kafka_json.reference')) as reference: + file = p.join(p.dirname(__file__), 'test_kafka_json.reference') + with open(file) as reference: assert TSV(result) == TSV(reference) + + +def test_kafka_json(started_cluster): + instance.query(''' + DROP TABLE IF EXISTS test.kafka; + CREATE TABLE test.kafka (key UInt64, value UInt64) + ENGINE = Kafka('kafka1:9092', 'json', 'json', + 'JSONEachRow', '\\n'); + ''') + kafka_check_json_numbers(instance) instance.query('DROP TABLE test.kafka') + +def test_kafka_json_settings(started_cluster): + instance.query(''' + DROP TABLE IF EXISTS test.kafka; + CREATE TABLE test.kafka (key UInt64, value UInt64) + ENGINE = Kafka + SETTINGS + kafka_broker_list = 'kafka1:9092', + kafka_topic_list = 'json' + kafka_group_name = 'json' + kafka_format = 'JSONEachRow' + kafka_row_delimiter = '\\n'; + ''') + kafka_check_json_numbers(instance) + instance.query('DROP TABLE test.kafka') + + if __name__ == '__main__': cluster.start() raw_input("Cluster created, press any key to destroy...") diff --git a/dbms/tests/queries/0_stateless/00098_1_union_all.reference b/dbms/tests/queries/0_stateless/00098_1_union_all.reference index 8fc0e85e8fb..5927fc33d20 100644 --- a/dbms/tests/queries/0_stateless/00098_1_union_all.reference +++ b/dbms/tests/queries/0_stateless/00098_1_union_all.reference @@ -1,2 +1,4 @@ 1000 2000 +1000 Alice +2000 Alice diff --git a/dbms/tests/queries/0_stateless/00098_1_union_all.sql b/dbms/tests/queries/0_stateless/00098_1_union_all.sql index 7c05af6de98..6f96b710985 100644 --- a/dbms/tests/queries/0_stateless/00098_1_union_all.sql +++ b/dbms/tests/queries/0_stateless/00098_1_union_all.sql @@ -23,3 +23,9 @@ UNION ALL SELECT value AS val FROM data2014 WHERE name = 'Alice') ORDER BY val ASC; +SELECT val, name FROM +(SELECT value AS val, value AS val_1, name FROM data2013 WHERE name = 'Alice' +UNION ALL +SELECT value AS val, value, name FROM data2014 WHERE name = 'Alice') +ORDER BY val ASC; + diff --git a/dbms/tests/queries/0_stateless/00652_mergetree_mutations.lib b/dbms/tests/queries/0_stateless/00652_mergetree_mutations.lib new file mode 100644 index 00000000000..0df275092fe --- /dev/null +++ b/dbms/tests/queries/0_stateless/00652_mergetree_mutations.lib @@ -0,0 +1,20 @@ +#!/usr/bin/env bash + +function wait_for_mutation() +{ + local table=$1 + local mutation_id=$2 + + for i in {1..100} + do + sleep 0.1 + if [[ $(${CLICKHOUSE_CLIENT} --query="SELECT is_done FROM system.mutations WHERE table='$table' AND mutation_id='$mutation_id'") -eq 1 ]]; then + break + fi + + if [[ $i -eq 100 ]]; then + echo "Timed out while waiting for mutation to execute!" + fi + + done +} diff --git a/dbms/tests/queries/0_stateless/00652_mergetree_mutations.reference b/dbms/tests/queries/0_stateless/00652_mergetree_mutations.reference index ebddd8d85fd..5341d7b49aa 100644 --- a/dbms/tests/queries/0_stateless/00652_mergetree_mutations.reference +++ b/dbms/tests/queries/0_stateless/00652_mergetree_mutations.reference @@ -8,3 +8,6 @@ Query involving aliases should fail on submission mutation_1.txt DELETE WHERE x = 1 [''] [1] 0 1 mutation_5.txt DELETE WHERE (x % 2) = 1 [''] [5] 0 1 mutation_6.txt DELETE WHERE s = \'d\' [''] [6] 0 1 +*** Test mutations cleaner *** +mutation_3.txt DELETE WHERE x = 2 1 +mutation_4.txt DELETE WHERE x = 3 1 diff --git a/dbms/tests/queries/0_stateless/00652_mergetree_mutations.sh b/dbms/tests/queries/0_stateless/00652_mergetree_mutations.sh index 90a3bb6b659..b7e3c657dd9 100755 --- a/dbms/tests/queries/0_stateless/00652_mergetree_mutations.sh +++ b/dbms/tests/queries/0_stateless/00652_mergetree_mutations.sh @@ -3,6 +3,8 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) . $CURDIR/../shell_config.sh +. $CURDIR/00652_mergetree_mutations.lib + ${CLICKHOUSE_CLIENT} --query="DROP TABLE IF EXISTS test.mutations" ${CLICKHOUSE_CLIENT} --query="CREATE TABLE test.mutations(d Date, x UInt32, s String, a UInt32 ALIAS x + 1) ENGINE MergeTree(d, intDiv(x, 10), 8192)" @@ -31,18 +33,8 @@ ${CLICKHOUSE_CLIENT} --query="ALTER TABLE test.mutations DELETE WHERE s = 'd'" ${CLICKHOUSE_CLIENT} --query="INSERT INTO test.mutations(d, x, s) VALUES \ ('2000-01-01', 5, 'e'), ('2000-02-01', 5, 'e')" -# Wait until all mutations are done. -for i in {1..100} -do - sleep 0.1 - if [[ $(${CLICKHOUSE_CLIENT} --query="SELECT sum(is_done) FROM system.mutations WHERE table='mutations'") -eq 3 ]]; then - break - fi - - if [[ $i -eq 100 ]]; then - echo "Timed out while waiting for mutations to execute!" - fi -done +# Wait until the last mutation is done. +wait_for_mutation "mutations" "mutation_6.txt" # Check that the table contains only the data that should not be deleted. ${CLICKHOUSE_CLIENT} --query="SELECT * FROM test.mutations ORDER BY d, x" @@ -50,4 +42,31 @@ ${CLICKHOUSE_CLIENT} --query="SELECT * FROM test.mutations ORDER BY d, x" ${CLICKHOUSE_CLIENT} --query="SELECT mutation_id, command, block_numbers.partition_id, block_numbers.number, parts_to_do, is_done \ FROM system.mutations WHERE table = 'mutations' ORDER BY mutation_id" + +${CLICKHOUSE_CLIENT} --query="SELECT '*** Test mutations cleaner ***'" + +${CLICKHOUSE_CLIENT} --query="DROP TABLE IF EXISTS test.mutations_cleaner" + +# Create a table with finished_mutations_to_keep = 2 +${CLICKHOUSE_CLIENT} --query="CREATE TABLE test.mutations_cleaner(x UInt32) ENGINE MergeTree ORDER BY x SETTINGS finished_mutations_to_keep = 2" + +# Insert some data +${CLICKHOUSE_CLIENT} --query="INSERT INTO test.mutations_cleaner(x) VALUES (1), (2), (3), (4)" + +# Add some mutations and wait for their execution +${CLICKHOUSE_CLIENT} --query="ALTER TABLE test.mutations_cleaner DELETE WHERE x = 1" +${CLICKHOUSE_CLIENT} --query="ALTER TABLE test.mutations_cleaner DELETE WHERE x = 2" +${CLICKHOUSE_CLIENT} --query="ALTER TABLE test.mutations_cleaner DELETE WHERE x = 3" + +wait_for_mutation "mutations_cleaner" "mutation_4.txt" + +# Sleep and then do an INSERT to wakeup the background task that will clean up the old mutations +sleep 1 +${CLICKHOUSE_CLIENT} --query="INSERT INTO test.mutations_cleaner(x) VALUES (4)" +sleep 0.1 + +# Check that the first mutation is cleaned +${CLICKHOUSE_CLIENT} --query="SELECT mutation_id, command, is_done FROM system.mutations WHERE table = 'mutations_cleaner' ORDER BY mutation_id" + ${CLICKHOUSE_CLIENT} --query="DROP TABLE test.mutations" +${CLICKHOUSE_CLIENT} --query="DROP TABLE test.mutations_cleaner" diff --git a/dbms/tests/queries/0_stateless/00652_replicated_mutations_zookeeper.reference b/dbms/tests/queries/0_stateless/00652_replicated_mutations_zookeeper.reference index a82c2571055..cb5a52cb905 100644 --- a/dbms/tests/queries/0_stateless/00652_replicated_mutations_zookeeper.reference +++ b/dbms/tests/queries/0_stateless/00652_replicated_mutations_zookeeper.reference @@ -7,3 +7,7 @@ Query should fail 2 0000000000 DELETE WHERE x = 1 [] [] 0 1 0000000001 DELETE WHERE (x % 2) = 1 ['200001','200002'] [2,1] 0 1 0000000002 DELETE WHERE s = \'d\' ['200001','200002'] [3,2] 0 1 +*** Test mutations cleaner *** +0000000001 DELETE WHERE x = 2 1 +0000000002 DELETE WHERE x = 3 1 +0000000003 DELETE WHERE x = 4 0 diff --git a/dbms/tests/queries/0_stateless/00652_replicated_mutations_zookeeper.sh b/dbms/tests/queries/0_stateless/00652_replicated_mutations_zookeeper.sh index c652596cd1a..45618799c4f 100755 --- a/dbms/tests/queries/0_stateless/00652_replicated_mutations_zookeeper.sh +++ b/dbms/tests/queries/0_stateless/00652_replicated_mutations_zookeeper.sh @@ -3,6 +3,8 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) . $CURDIR/../shell_config.sh +. $CURDIR/00652_mergetree_mutations.lib + ${CLICKHOUSE_CLIENT} --query="DROP TABLE IF EXISTS test.mutations_r1" ${CLICKHOUSE_CLIENT} --query="DROP TABLE IF EXISTS test.mutations_r2" @@ -31,18 +33,8 @@ ${CLICKHOUSE_CLIENT} --query="ALTER TABLE test.mutations_r1 DELETE WHERE s = 'd' ${CLICKHOUSE_CLIENT} --query="INSERT INTO test.mutations_r1(d, x, s) VALUES \ ('2000-01-01', 5, 'e'), ('2000-02-01', 5, 'e')" -# Wait until all mutations are done. -for i in {1..100} -do - sleep 0.1 - if [[ $(${CLICKHOUSE_CLIENT} --query="SELECT sum(is_done) FROM system.mutations WHERE table='mutations_r2'") -eq 3 ]]; then - break - fi - - if [[ $i -eq 100 ]]; then - echo "Timed out while waiting for mutations to execute!" - fi -done +# Wait until the last mutation is done. +wait_for_mutation "mutations_r2" "0000000002" # Check that the table contains only the data that should not be deleted. ${CLICKHOUSE_CLIENT} --query="SELECT * FROM test.mutations_r2 ORDER BY d, x" @@ -50,5 +42,44 @@ ${CLICKHOUSE_CLIENT} --query="SELECT * FROM test.mutations_r2 ORDER BY d, x" ${CLICKHOUSE_CLIENT} --query="SELECT mutation_id, command, block_numbers.partition_id, block_numbers.number, parts_to_do, is_done \ FROM system.mutations WHERE table = 'mutations_r2' ORDER BY mutation_id" + +${CLICKHOUSE_CLIENT} --query="SELECT '*** Test mutations cleaner ***'" + +${CLICKHOUSE_CLIENT} --query="DROP TABLE IF EXISTS test.mutations_cleaner_r1" +${CLICKHOUSE_CLIENT} --query="DROP TABLE IF EXISTS test.mutations_cleaner_r2" + +# Create 2 replicas with finished_mutations_to_keep = 2 +${CLICKHOUSE_CLIENT} --query="CREATE TABLE test.mutations_cleaner_r1(x UInt32) ENGINE ReplicatedMergeTree('/clickhouse/tables/test/mutations_cleaner', 'r1') ORDER BY x SETTINGS \ + finished_mutations_to_keep = 2, + cleanup_delay_period = 1, + cleanup_delay_period_random_add = 0" +${CLICKHOUSE_CLIENT} --query="CREATE TABLE test.mutations_cleaner_r2(x UInt32) ENGINE ReplicatedMergeTree('/clickhouse/tables/test/mutations_cleaner', 'r2') ORDER BY x SETTINGS \ + finished_mutations_to_keep = 2, + cleanup_delay_period = 1, + cleanup_delay_period_random_add = 0" + +# Insert some data +${CLICKHOUSE_CLIENT} --query="INSERT INTO test.mutations_cleaner_r1(x) VALUES (1), (2), (3), (4)" + +# Add some mutations and wait for their execution +${CLICKHOUSE_CLIENT} --query="ALTER TABLE test.mutations_cleaner_r1 DELETE WHERE x = 1" +${CLICKHOUSE_CLIENT} --query="ALTER TABLE test.mutations_cleaner_r1 DELETE WHERE x = 2" +${CLICKHOUSE_CLIENT} --query="ALTER TABLE test.mutations_cleaner_r1 DELETE WHERE x = 3" + +wait_for_mutation "mutations_cleaner_r2" "0000000002" + +# Add another mutation and prevent its execution on the second replica +${CLICKHOUSE_CLIENT} --query="SYSTEM STOP REPLICATION QUEUES test.mutations_cleaner_r2" +${CLICKHOUSE_CLIENT} --query="ALTER TABLE test.mutations_cleaner_r1 DELETE WHERE x = 4" + +# Sleep for more than cleanup_delay_period +sleep 1.5 + +# Check that the first mutation is cleaned +${CLICKHOUSE_CLIENT} --query="SELECT mutation_id, command, is_done FROM system.mutations WHERE table = 'mutations_cleaner_r2' ORDER BY mutation_id" + ${CLICKHOUSE_CLIENT} --query="DROP TABLE test.mutations_r1" ${CLICKHOUSE_CLIENT} --query="DROP TABLE test.mutations_r2" + +${CLICKHOUSE_CLIENT} --query="DROP TABLE test.mutations_cleaner_r1" +${CLICKHOUSE_CLIENT} --query="DROP TABLE test.mutations_cleaner_r2" diff --git a/dbms/tests/queries/0_stateless/00679_uuid_in_key.reference b/dbms/tests/queries/0_stateless/00679_uuid_in_key.reference new file mode 100644 index 00000000000..eb806b81202 --- /dev/null +++ b/dbms/tests/queries/0_stateless/00679_uuid_in_key.reference @@ -0,0 +1,6 @@ +1 +0 +0 +0 +1 +1 diff --git a/dbms/tests/queries/0_stateless/00679_uuid_in_key.sql b/dbms/tests/queries/0_stateless/00679_uuid_in_key.sql new file mode 100644 index 00000000000..7a61132b4ff --- /dev/null +++ b/dbms/tests/queries/0_stateless/00679_uuid_in_key.sql @@ -0,0 +1,21 @@ +USE test; + +CREATE TABLE IF NOT EXISTS uuid +( + created_at DateTime, + id UUID +) +ENGINE = MergeTree +PARTITION BY toDate(created_at) +ORDER BY (created_at, id); + +INSERT INTO uuid (created_at, id) VALUES ('2018-01-01 01:02:03', '00000000-0000-03f8-9cb8-cb1b82fb3900'); + +SELECT count() FROM uuid WHERE id = '00000000-0000-03f8-9cb8-cb1b82fb3900'; +SELECT count() FROM uuid WHERE id != '00000000-0000-03f8-9cb8-cb1b82fb3900'; +SELECT count() FROM uuid WHERE id < '00000000-0000-03f8-9cb8-cb1b82fb3900'; +SELECT count() FROM uuid WHERE id > '00000000-0000-03f8-9cb8-cb1b82fb3900'; +SELECT count() FROM uuid WHERE id <= '00000000-0000-03f8-9cb8-cb1b82fb3900'; +SELECT count() FROM uuid WHERE id >= '00000000-0000-03f8-9cb8-cb1b82fb3900'; + +DROP TABLE uuid; diff --git a/dbms/tests/queries/0_stateless/00680_duplicate_columns_inside_union_all.reference b/dbms/tests/queries/0_stateless/00680_duplicate_columns_inside_union_all.reference new file mode 100644 index 00000000000..e2ec95f0464 --- /dev/null +++ b/dbms/tests/queries/0_stateless/00680_duplicate_columns_inside_union_all.reference @@ -0,0 +1,8 @@ +1 2 +3 3 +1 2 +4 4 +1 2 +3 4 +1 2 +3 3 diff --git a/dbms/tests/queries/0_stateless/00680_duplicate_columns_inside_union_all.sql b/dbms/tests/queries/0_stateless/00680_duplicate_columns_inside_union_all.sql new file mode 100644 index 00000000000..c316df36803 --- /dev/null +++ b/dbms/tests/queries/0_stateless/00680_duplicate_columns_inside_union_all.sql @@ -0,0 +1,4 @@ +SELECT x, y FROM (SELECT x, y FROM (SELECT 1 AS x, 2 AS y) UNION ALL SELECT x, x FROM (SELECT 3 AS x, 4 AS y)) ORDER BY x, y; +SELECT x, y FROM (SELECT x, y FROM (SELECT 1 AS x, 2 AS y) UNION ALL SELECT y, y FROM (SELECT 3 AS x, 4 AS y)) ORDER BY x, y; +SELECT x, y FROM (SELECT x, x, y FROM (SELECT 1 AS x, 2 AS y) UNION ALL SELECT x, y, y FROM (SELECT 3 AS x, 4 AS y)) ORDER BY x, y; +SELECT x, y FROM (SELECT x, y, y FROM (SELECT 1 AS x, 2 AS y) UNION ALL SELECT x, x, y FROM (SELECT 3 AS x, 4 AS y)) ORDER BY x, y; diff --git a/dbms/tests/queries/0_stateless/00681_duplicate_columns_inside_union_all_stas_sviridov.reference b/dbms/tests/queries/0_stateless/00681_duplicate_columns_inside_union_all_stas_sviridov.reference new file mode 100644 index 00000000000..3e67fe1ac4f --- /dev/null +++ b/dbms/tests/queries/0_stateless/00681_duplicate_columns_inside_union_all_stas_sviridov.reference @@ -0,0 +1,2 @@ +123 +123 diff --git a/dbms/tests/queries/0_stateless/00681_duplicate_columns_inside_union_all_stas_sviridov.sql b/dbms/tests/queries/0_stateless/00681_duplicate_columns_inside_union_all_stas_sviridov.sql new file mode 100644 index 00000000000..2c118f546c3 --- /dev/null +++ b/dbms/tests/queries/0_stateless/00681_duplicate_columns_inside_union_all_stas_sviridov.sql @@ -0,0 +1,14 @@ +DROP TABLE IF EXISTS test.test; + +CREATE TABLE test.test(x Int32) ENGINE = Log; +INSERT INTO test.test VALUES (123); + +SELECT a1 +FROM +( + SELECT x AS a1, x AS a2 FROM test.test + UNION ALL + SELECT x, x FROM test.test +); + +DROP TABLE test.test; diff --git a/debian/changelog b/debian/changelog index 2d3eb28d872..1b7b9165ed5 100644 --- a/debian/changelog +++ b/debian/changelog @@ -1,5 +1,5 @@ -clickhouse (18.8.0) unstable; urgency=low +clickhouse (18.9.0) unstable; urgency=low * Modified source code - -- Thu, 02 Aug 2018 11:35:38 +0300 + -- Alexey Milovidov Fri, 03 Aug 2018 19:17:05 +0300 diff --git a/debian/clickhouse-server.postinst b/debian/clickhouse-server.postinst index 54fc03f4a36..57882006d78 100644 --- a/debian/clickhouse-server.postinst +++ b/debian/clickhouse-server.postinst @@ -3,6 +3,7 @@ set -e CLICKHOUSE_USER=${CLICKHOUSE_USER=clickhouse} CLICKHOUSE_GROUP=${CLICKHOUSE_GROUP=${CLICKHOUSE_USER}} +CLICKHOUSE_CONFDIR=${CLICKHOUSE_CONFDIR=/etc/clickhouse-server} CLICKHOUSE_DATADIR=${CLICKHOUSE_DATADIR=/var/lib/clickhouse} CLICKHOUSE_LOGDIR=${CLICKHOUSE_LOGDIR=/var/log/clickhouse-server} OS=${OS=`lsb_release -is 2>/dev/null || uname -s || true`} @@ -64,6 +65,10 @@ Please fix this and reinstall this package." >&2 exit 1 fi + if [ -d ${CLICKHOUSE_CONFDIR} ]; then + su -s /bin/sh ${CLICKHOUSE_USER} -c "test -w ${CLICKHOUSE_CONFDIR}" || chown ${CLICKHOUSE_USER}:${CLICKHOUSE_GROUP} ${CLICKHOUSE_CONFDIR} + fi + if [ ! -d ${CLICKHOUSE_DATADIR} ]; then mkdir -p ${CLICKHOUSE_DATADIR} chown ${CLICKHOUSE_USER}:${CLICKHOUSE_GROUP} ${CLICKHOUSE_DATADIR} diff --git a/debian/rules b/debian/rules index edece55c89e..c9ff7635350 100755 --- a/debian/rules +++ b/debian/rules @@ -70,8 +70,8 @@ override_dh_auto_configure: override_dh_auto_build: # Fix for ninja. Do not add -O. - #cd $(BUILDDIR) && $(MAKE) -j$(THREADS_COUNT) - cd $(BUILDDIR) && cmake --build . -- -j$(THREADS_COUNT) + cd $(BUILDDIR) && $(MAKE) -j$(THREADS_COUNT) + #cd $(BUILDDIR) && cmake --build . -- -j$(THREADS_COUNT) # cmake return true on error override_dh_auto_test: #TODO, use ENABLE_TESTS=1 diff --git a/docker/client/Dockerfile b/docker/client/Dockerfile index 3393dd0e697..5f3b6ad9d42 100644 --- a/docker/client/Dockerfile +++ b/docker/client/Dockerfile @@ -1,7 +1,7 @@ FROM ubuntu:18.04 ARG repository="deb http://repo.yandex.ru/clickhouse/deb/stable/ main/" -ARG version=\* +ARG version=18.9.0 RUN apt-get update && \ apt-get install -y apt-transport-https dirmngr && \ diff --git a/docker/server/Dockerfile b/docker/server/Dockerfile index e1ba52b0989..af1c42c85e7 100644 --- a/docker/server/Dockerfile +++ b/docker/server/Dockerfile @@ -1,7 +1,7 @@ FROM ubuntu:18.04 ARG repository="deb http://repo.yandex.ru/clickhouse/deb/stable/ main/" -ARG version=\* +ARG version=18.9.0 RUN apt-get update && \ apt-get install -y apt-transport-https dirmngr && \ diff --git a/docker/test/Dockerfile b/docker/test/Dockerfile index 3f7755c58dd..f03e21e082b 100644 --- a/docker/test/Dockerfile +++ b/docker/test/Dockerfile @@ -1,7 +1,7 @@ FROM ubuntu:18.04 ARG repository="deb http://repo.yandex.ru/clickhouse/deb/stable/ main/" -ARG version=\* +ARG version=18.9.0 RUN apt-get update && \ apt-get install -y apt-transport-https dirmngr && \ diff --git a/docs/en/development/build.md b/docs/en/development/build.md index e1b1cfe5dde..b810e42bd40 100644 --- a/docs/en/development/build.md +++ b/docs/en/development/build.md @@ -46,22 +46,19 @@ export CXX=g++-7 ## Install required libraries from packages ```bash -sudo apt-get install libicu-dev libreadline-dev libssl-dev +sudo apt-get install libicu-dev libreadline-dev ``` ## Checkout ClickHouse sources -To get the latest stable version: - ```bash -git clone -b stable --recursive git@github.com:yandex/ClickHouse.git -# or: git clone -b stable --recursive https://github.com/yandex/ClickHouse.git +git clone --recursive git@github.com:yandex/ClickHouse.git +# or: git clone --recursive https://github.com/yandex/ClickHouse.git cd ClickHouse ``` -For development, switch to the `master` branch. -For the latest release candidate, switch to the `testing` branch. +For the latest stable version, switch to the `stable` branch. ## Build ClickHouse diff --git a/docs/en/development/build_osx.md b/docs/en/development/build_osx.md index 3dfe503be8d..732e95ca55b 100644 --- a/docs/en/development/build_osx.md +++ b/docs/en/development/build_osx.md @@ -17,17 +17,14 @@ brew install cmake ninja gcc icu4c mariadb-connector-c openssl libtool gettext r ## Checkout ClickHouse sources -To get the latest stable version: - ```bash -git clone -b stable --recursive --depth=10 git@github.com:yandex/ClickHouse.git -# or: git clone -b stable --recursive --depth=10 https://github.com/yandex/ClickHouse.git +git clone --recursive --depth=10 git@github.com:yandex/ClickHouse.git +# or: git clone --recursive --depth=10 https://github.com/yandex/ClickHouse.git cd ClickHouse ``` -For development, switch to the `master` branch. -For the latest release candidate, switch to the `testing` branch. +For the latest stable version, switch to the `stable` branch. ## Build ClickHouse diff --git a/docs/en/operations/table_engines/kafka.md b/docs/en/operations/table_engines/kafka.md index 31616e77d25..f04c234dcd5 100644 --- a/docs/en/operations/table_engines/kafka.md +++ b/docs/en/operations/table_engines/kafka.md @@ -8,20 +8,41 @@ Kafka lets you: - Organize fault-tolerant storage. - Process streams as they become available. + +Old format: + ``` -Kafka(broker_list, topic_list, group_name, format[, schema, num_consumers]) +Kafka(kafka_broker_list, kafka_topic_list, kafka_group_name, kafka_format + [, kafka_row_delimiter, kafka_schema, kafka_num_consumers]) ``` -Parameters: +New format: -- `broker_list` – A comma-separated list of brokers (`localhost:9092`). -- `topic_list` – A list of Kafka topics (`my_topic`). -- `group_name` – A group of Kafka consumers (`group1`). Reading margins are tracked for each group separately. If you don't want messages to be duplicated in the cluster, use the same group name everywhere. -- `--format` – Message format. Uses the same notation as the SQL ` FORMAT` function, such as ` JSONEachRow`. For more information, see the "Formats" section. -- `schema` – An optional parameter that must be used if the format requires a schema definition. For example, [Cap'n Proto](https://capnproto.org/) requires the path to the schema file and the name of the root `schema.capnp:Message` object. -- `num_consumers` – The number of consumers per table. Default: `1`. Specify more consumers if the throughput of one consumer is insufficient. The total number of consumers should not exceed the number of partitions in the topic, since only one consumer can be assigned per partition. +``` +Kafka SETTINGS + kafka_broker_list = 'localhost:9092', + kafka_topic_list = 'topic1,topic2', + kafka_group_name = 'group1', + kafka_format = 'JSONEachRow', + kafka_row_delimiter = '\n' + kafka_schema = '', + kafka_num_consumers = 2 +``` -Example: +Required parameters: + +- `kafka_broker_list` – A comma-separated list of brokers (`localhost:9092`). +- `kafka_topic_list` – A list of Kafka topics (`my_topic`). +- `kafka_group_name` – A group of Kafka consumers (`group1`). Reading margins are tracked for each group separately. If you don't want messages to be duplicated in the cluster, use the same group name everywhere. +- `kafka_format` – Message format. Uses the same notation as the SQL ` FORMAT` function, such as ` JSONEachRow`. For more information, see the "Formats" section. + +Optional parameters: + +- `kafka_row_delimiter` - Character-delimiter of records (rows), which ends the message. +- `kafka_schema` – An optional parameter that must be used if the format requires a schema definition. For example, [Cap'n Proto](https://capnproto.org/) requires the path to the schema file and the name of the root `schema.capnp:Message` object. +- `kafka_num_consumers` – The number of consumers per table. Default: `1`. Specify more consumers if the throughput of one consumer is insufficient. The total number of consumers should not exceed the number of partitions in the topic, since only one consumer can be assigned per partition. + +Examples: ```sql CREATE TABLE queue ( @@ -31,6 +52,24 @@ Example: ) ENGINE = Kafka('localhost:9092', 'topic', 'group1', 'JSONEachRow'); SELECT * FROM queue LIMIT 5; + + CREATE TABLE queue2 ( + timestamp UInt64, + level String, + message String + ) ENGINE = Kafka SETTINGS kafka_broker_list = 'localhost:9092', + kafka_topic_list = 'topic', + kafka_group_name = 'group1', + kafka_format = 'JSONEachRow', + kafka_num_consumers = 4; + + CREATE TABLE queue2 ( + timestamp UInt64, + level String, + message String + ) ENGINE = Kafka('localhost:9092', 'topic', 'group1') + SETTINGS kafka_format = 'JSONEachRow', + kafka_num_consumers = 4; ``` The delivered messages are tracked automatically, so each message in a group is only counted once. If you want to get the data twice, then create a copy of the table with another group name. @@ -59,7 +98,7 @@ Example: level String, total UInt64 ) ENGINE = SummingMergeTree(day, (day, level), 8192); - + CREATE MATERIALIZED VIEW consumer TO daily AS SELECT toDate(toDateTime(timestamp)) AS day, level, count() as total FROM queue GROUP BY day, level; diff --git a/docs/en/query_language/alter.md b/docs/en/query_language/alter.md index e428bf27bef..068dd9eb606 100644 --- a/docs/en/query_language/alter.md +++ b/docs/en/query_language/alter.md @@ -246,6 +246,8 @@ Mutations are totally ordered by their creation order and are applied to each pa A mutation query returns immediately after the mutation entry is added (in case of replicated tables to ZooKeeper, for nonreplicated tables - to the filesystem). The mutation itself executes asynchronously using the system profile settings. To track the progress of mutations you can use the `system.mutations` table. A mutation that was successfully submitted will continue to execute even if ClickHouse servers are restarted. There is no way to roll back the mutation once it is submitted. +Entries for finished mutations are not deleted right away (the number of preserved entries is determined by the `finished_mutations_to_keep` storage engine parameter). Older mutation entries are deleted. + #### system.mutations table The table contains information about mutations of MergeTree tables and their progress. Each mutation command is represented by a single row. The table has the following columns: diff --git a/docs/ru/operations/table_engines/kafka.md b/docs/ru/operations/table_engines/kafka.md index f368fae3860..a0f370df795 100644 --- a/docs/ru/operations/table_engines/kafka.md +++ b/docs/ru/operations/table_engines/kafka.md @@ -1,6 +1,6 @@ # Kafka -Движок работает с [Apache Kafka](http://kafka.apache.org/). +Движок работает с [Apache Kafka](http://kafka.apache.org/). Kafka позволяет: @@ -8,20 +8,40 @@ Kafka позволяет: - Организовать отказо-устойчивое хранилище. - Обрабатывать потоки по мере их появления. +Старый формат: + ``` -Kafka(broker_list, topic_list, group_name, format[, schema, num_consumers]) +Kafka(kafka_broker_list, kafka_topic_list, kafka_group_name, kafka_format + [, kafka_row_delimiter, kafka_schema, kafka_num_consumers]) ``` -Параметры: +Новый формат: -- `broker_list` - Перечень брокеров, разделенный запятыми (`localhost:9092`). -- `topic_list` - Перечень необходимых топиков Kafka (`my_topic`). -- `group_name` - Группа потребителя Kafka (`group1`). Отступы для чтения отслеживаются для каждой группы отдельно. Если необходимо, чтобы сообщения не повторялись на кластере, используйте везде одно имя группы. -- `format` - Формат сообщений. Имеет те же обозначения, что выдает SQL-выражение `FORMAT`, например, `JSONEachRow`. Подробнее смотрите в разделе "Форматы". -- `schema` - Опциональный параметр, необходимый, если используется формат, требующий определения схемы. Например, [Cap'n Proto](https://capnproto.org/) требует путь к файлу со схемой и название корневого объекта `schema.capnp:Message`. -- `num_consumers` - Количество потребителей (consumer) на таблицу. По умолчанию `1`. Укажите больше потребителей, если пропускная способность одного потребителя недостаточна. Общее число потребителей не должно превышать количество партиций в топике, так как на одну партицию может быть назначено не более одного потребителя. +``` +Kafka SETTINGS + kafka_broker_list = 'localhost:9092', + kafka_topic_list = 'topic1,topic2', + kafka_group_name = 'group1', + kafka_format = 'JSONEachRow', + kafka_row_delimiter = '\n' + kafka_schema = '', + kafka_num_consumers = 2 +``` -Пример: +Обязательные параметры: + +- `kafka_broker_list` - Перечень брокеров, разделенный запятыми (`localhost:9092`). +- `kafka_topic_list` - Перечень необходимых топиков Kafka (`my_topic`). +- `kafka_group_name` - Группа потребителя Kafka (`group1`). Отступы для чтения отслеживаются для каждой группы отдельно. Если необходимо, чтобы сообщения не повторялись на кластере, используйте везде одно имя группы. +- `kafka_format` - Формат сообщений. Имеет те же обозначения, что выдает SQL-выражение `FORMAT`, например, `JSONEachRow`. Подробнее смотрите в разделе "Форматы". + +Опциональные параметры: + +- `kafka_row_delimiter` - Символ-разделитель записей (строк), которым завершается сообщение. +- `kafka_schema` - Опциональный параметр, необходимый, если используется формат, требующий определения схемы. Например, [Cap'n Proto](https://capnproto.org/) требует путь к файлу со схемой и название корневого объекта `schema.capnp:Message`. +- `kafka_num_consumers` - Количество потребителей (consumer) на таблицу. По умолчанию `1`. Укажите больше потребителей, если пропускная способность одного потребителя недостаточна. Общее число потребителей не должно превышать количество партиций в топике, так как на одну партицию может быть назначено не более одного потребителя. + +Примеры: ```sql CREATE TABLE queue ( @@ -31,6 +51,24 @@ Kafka(broker_list, topic_list, group_name, format[, schema, num_consumers]) ) ENGINE = Kafka('localhost:9092', 'topic', 'group1', 'JSONEachRow'); SELECT * FROM queue LIMIT 5; + + CREATE TABLE queue2 ( + timestamp UInt64, + level String, + message String + ) ENGINE = Kafka SETTINGS kafka_broker_list = 'localhost:9092', + kafka_topic_list = 'topic', + kafka_group_name = 'group1', + kafka_format = 'JSONEachRow', + kafka_num_consumers = 4; + + CREATE TABLE queue2 ( + timestamp UInt64, + level String, + message String + ) ENGINE = Kafka('localhost:9092', 'topic', 'group1') + SETTINGS kafka_format = 'JSONEachRow', + kafka_num_consumers = 4; ``` Полученные сообщения отслеживаются автоматически, поэтому из одной группы каждое сообщение считывается только один раз. Если необходимо получить данные дважды, то создайте копию таблицы с другим именем группы. @@ -59,7 +97,7 @@ Kafka(broker_list, topic_list, group_name, format[, schema, num_consumers]) level String, total UInt64 ) ENGINE = SummingMergeTree(day, (day, level), 8192); - + CREATE MATERIALIZED VIEW consumer TO daily AS SELECT toDate(toDateTime(timestamp)) AS day, level, count() as total FROM queue GROUP BY day, level; diff --git a/docs/ru/query_language/alter.md b/docs/ru/query_language/alter.md index 454114231f6..2478954f26e 100644 --- a/docs/ru/query_language/alter.md +++ b/docs/ru/query_language/alter.md @@ -245,6 +245,8 @@ ALTER TABLE [db.]table DELETE WHERE expr Запрос завершается немедленно после добавления информации о мутации (для реплицированных таблиц - в ZooKeeper, для нереплицированных - на файловую систему). Сама мутация выполняется асинхронно, используя настройки системного профиля. Следить за ходом её выполнения можно по таблице `system.mutations`. Добавленные мутации будут выполняться до конца даже в случае перезапуска серверов ClickHouse. Откатить мутацию после её добавления нельзя. +Записи о последних выполненных мутациях удаляются не сразу (количество сохраняемых мутаций определяется параметром движка таблиц `finished_mutations_to_keep`). Более старые записи удаляются. + #### Таблица system.mutations Таблица содержит информацию о ходе выполнения мутаций MergeTree-таблиц. Каждой команде мутации соответствует одна строка. В таблице есть следующие столбцы: diff --git a/libs/libcommon/cmake/find_gperftools.cmake b/libs/libcommon/cmake/find_gperftools.cmake index a3eba003800..f2a042a2cef 100644 --- a/libs/libcommon/cmake/find_gperftools.cmake +++ b/libs/libcommon/cmake/find_gperftools.cmake @@ -1,4 +1,4 @@ -if (ARCH_FREEBSD OR ARCH_32) +if (OS_FREEBSD OR ARCH_32) option (USE_INTERNAL_GPERFTOOLS_LIBRARY "Set to FALSE to use system gperftools (tcmalloc) library instead of bundled" OFF) else () option (USE_INTERNAL_GPERFTOOLS_LIBRARY "Set to FALSE to use system gperftools (tcmalloc) library instead of bundled" ${NOT_UNBUNDLED}) @@ -13,7 +13,7 @@ if (ENABLE_TCMALLOC) find_package (Gperftools) endif () - if (NOT (GPERFTOOLS_FOUND AND GPERFTOOLS_INCLUDE_DIR AND GPERFTOOLS_TCMALLOC_MINIMAL) AND NOT (ARCH_FREEBSD OR ARCH_32)) + if (NOT (GPERFTOOLS_FOUND AND GPERFTOOLS_INCLUDE_DIR AND GPERFTOOLS_TCMALLOC_MINIMAL) AND NOT (OS_FREEBSD OR ARCH_32)) set (USE_INTERNAL_GPERFTOOLS_LIBRARY 1) set (GPERFTOOLS_INCLUDE_DIR "${ClickHouse_SOURCE_DIR}/contrib/libtcmalloc/include") set (GPERFTOOLS_TCMALLOC_MINIMAL tcmalloc_minimal_internal) diff --git a/libs/libcommon/cmake/find_jemalloc.cmake b/libs/libcommon/cmake/find_jemalloc.cmake index 33c1162b6bf..79898425de6 100644 --- a/libs/libcommon/cmake/find_jemalloc.cmake +++ b/libs/libcommon/cmake/find_jemalloc.cmake @@ -1,13 +1,20 @@ -option (ENABLE_JEMALLOC "Set to TRUE to use jemalloc" ON) -option (USE_INTERNAL_JEMALLOC_LIBRARY "Set to FALSE to use system jemalloc library instead of bundled" ${NOT_UNBUNDLED}) +option (ENABLE_JEMALLOC "Set to TRUE to use jemalloc" ${OS_LINUX}) +if (OS_LINUX) + option (USE_INTERNAL_JEMALLOC_LIBRARY "Set to FALSE to use system jemalloc library instead of bundled" ${NOT_UNBUNDLED}) +elseif () + option (USE_INTERNAL_JEMALLOC_LIBRARY "Set to FALSE to use system jemalloc library instead of bundled" OFF) +endif() if (ENABLE_JEMALLOC) - if (USE_INTERNAL_JEMALLOC_LIBRARY) - set (JEMALLOC_LIBRARIES "jemalloc") - else () + if (NOT USE_INTERNAL_JEMALLOC_LIBRARY) find_package (JeMalloc) endif () + if (NOT JEMALLOC_LIBRARIES) + set (JEMALLOC_LIBRARIES "jemalloc") + set (USE_INTERNAL_JEMALLOC_LIBRARY 1) + endif () + if (JEMALLOC_LIBRARIES) set (USE_JEMALLOC 1) else () diff --git a/libs/libglibc-compatibility/glibc-compatibility.c b/libs/libglibc-compatibility/glibc-compatibility.c index 08f6cc04162..ad88d3254bf 100644 --- a/libs/libglibc-compatibility/glibc-compatibility.c +++ b/libs/libglibc-compatibility/glibc-compatibility.c @@ -132,6 +132,33 @@ const char * __shm_directory(size_t * len) } +/// https://boringssl.googlesource.com/boringssl/+/ad1907fe73334d6c696c8539646c21b11178f20f%5E!/#F0 +/* Copyright (c) 2015, Google Inc. + * + * Permission to use, copy, modify, and/or distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY + * SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION + * OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN + * CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ +void explicit_bzero(void * buf, size_t len) +{ + memset(buf, 0, len); + __asm__ __volatile__("" :: "r"(buf) : "memory"); +} + +void __explicit_bzero_chk(void * buf, size_t len, size_t unused) +{ + return explicit_bzero(buf, len); +} + + #if defined (__cplusplus) } #endif diff --git a/libs/libmysqlxx/cmake/find_mysqlclient.cmake b/libs/libmysqlxx/cmake/find_mysqlclient.cmake index 2a98b76d3a8..85a6275f1ff 100644 --- a/libs/libmysqlxx/cmake/find_mysqlclient.cmake +++ b/libs/libmysqlxx/cmake/find_mysqlclient.cmake @@ -1,5 +1,9 @@ -option (ENABLE_MYSQL "Enable MySQL" ON) -option (USE_INTERNAL_MYSQL_LIBRARY "Set to FALSE to use system mysqlclient library instead of bundled" ${NOT_UNBUNDLED}) +option (ENABLE_MYSQL "Enable MySQL" ${OS_LINUX}) +if (OS_LINUX) + option (USE_INTERNAL_MYSQL_LIBRARY "Set to FALSE to use system mysqlclient library instead of bundled" ${NOT_UNBUNDLED}) +else () + option (USE_INTERNAL_MYSQL_LIBRARY "Set to FALSE to use system mysqlclient library instead of bundled" OFF) +endif () if (USE_INTERNAL_MYSQL_LIBRARY AND NOT EXISTS "${ClickHouse_SOURCE_DIR}/contrib/mariadb-connector-c/README") message (WARNING "submodule contrib/mariadb-connector-c is missing. to fix try run: \n git submodule update --init --recursive") diff --git a/release b/release index 5fe9e4c94eb..8e0d749f6e0 100755 --- a/release +++ b/release @@ -52,10 +52,6 @@ do elif [[ $1 == '--version' ]]; then gen_revision_author $2 exit 0 - # who use this? - #elif [[ $1 == '--head' ]]; then - # VERSION_STRING=`git rev-parse HEAD` - # shift elif [[ $1 == '--pbuilder' ]]; then # Default shift @@ -64,7 +60,7 @@ do shift elif [[ $1 == '--fast' ]]; then # Wrong but fast pbuilder mode: create base package with all depends - EXTRAPACKAGES="$EXTRAPACKAGES debhelper cmake ninja-build gcc-7 g++-7 libc6-dev libmariadbclient-dev libicu-dev libltdl-dev libreadline-dev libssl-dev unixodbc-dev psmisc bash expect python python-lxml python-termcolor python-requests curl perl sudo openssl netcat-openbsd" + EXTRAPACKAGES="$EXTRAPACKAGES debhelper cmake ninja-build gcc-7 g++-7 libc6-dev libicu-dev libreadline-dev psmisc bash expect python python-lxml python-termcolor python-requests curl perl sudo openssl netcat-openbsd" shift else echo "Unknown option $1" @@ -77,9 +73,6 @@ if [ -n "$SANITIZER" ] then CMAKE_BUILD_TYPE=$SANITIZER VERSION_POSTFIX+=+${SANITIZER,,} - # todo: нужно ли отключить libtcmalloc? - LIBTCMALLOC_OPTS="-DENABLE_TCMALLOC=0" - # GLIBC_COMPATIBILITY отключен по умолчанию export DEB_CC=${DEB_CC=clang-6.0} export DEB_CXX=${DEB_CXX=clang++-6.0} EXTRAPACKAGES="$EXTRAPACKAGES clang-6.0 lld-6.0" diff --git a/release_lib.sh b/release_lib.sh index 11f196fdd1b..244f3619001 100644 --- a/release_lib.sh +++ b/release_lib.sh @@ -95,9 +95,12 @@ function gen_revision_author { dbms/cmake/version.cmake gen_changelog "$VERSION_STRING" "" "$AUTHOR" "" - git commit -m "$auto_message [$VERSION_STRING] [$VERSION_REVISION]" dbms/cmake/version.cmake debian/changelog + gen_dockerfiles "$VERSION_STRING" + git commit -m "$auto_message [$VERSION_STRING] [$VERSION_REVISION]" dbms/cmake/version.cmake debian/changelog docker/*/Dockerfile git push + echo "Generated version: ${VERSION_STRING}, revision: ${VERSION_REVISION}." + # Second tag for correct version information in version.cmake inside tag if git tag --force -a "$tag" -m "$tag" then @@ -111,11 +114,15 @@ function gen_revision_author { fi fi + # Reset testing branch to current commit. + git checkout testing + git reset --hard "$tag" + git push + else get_version echo reusing old version $VERSION_STRING fi - fi AUTHOR=$(git config --get user.name || echo ${USER}) @@ -153,3 +160,9 @@ function gen_changelog { -e "s/[@]EMAIL[@]/$(whoami)@yandex-team.ru/g" \ < $CHLOG.in > $CHLOG } + +# Change package versions that are installed for Docker images. +function gen_dockerfiles { + VERSION_STRING="$1" + ls -1 docker/*/Dockerfile | xargs sed -i -r -e 's/ARG version=.+$/ARG version='$VERSION_STRING'/' +} diff --git a/utils/build/build_debian.sh b/utils/build/build_debian.sh index c58ac1c6f02..c8f8d32cc8b 100755 --- a/utils/build/build_debian.sh +++ b/utils/build/build_debian.sh @@ -6,7 +6,7 @@ # curl https://raw.githubusercontent.com/yandex/ClickHouse/master/utils/build/build_debian.sh | sh # install compiler and libs -sudo apt install -y git bash cmake gcc-7 g++-7 libicu-dev libreadline-dev libmysqlclient-dev unixodbc-dev libltdl-dev libssl-dev +sudo apt install -y git bash cmake gcc-7 g++-7 libicu-dev libreadline-dev # for -DUNBUNDLED=1 mode: #sudo apt install -y libboost-program-options-dev libboost-system-dev libboost-filesystem-dev libboost-thread-dev zlib1g-dev liblz4-dev libdouble-conversion-dev libzstd-dev libre2-dev libsparsehash-dev librdkafka-dev libcapnp-dev libpoco-dev libsparsehash-dev libgoogle-perftools-dev libunwind-dev googletest libcctz-dev diff --git a/utils/build/build_debian_unbundled.sh b/utils/build/build_debian_unbundled.sh index dc47c8fc3a3..443f532386e 100755 --- a/utils/build/build_debian_unbundled.sh +++ b/utils/build/build_debian_unbundled.sh @@ -1,4 +1,4 @@ -#!/bin/sh +#!/bin/bash ROOT_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && cd ../.. && pwd) diff --git a/utils/travis/normal.sh b/utils/travis/normal.sh index 3ab23a1147f..2824a17c5c9 100755 --- a/utils/travis/normal.sh +++ b/utils/travis/normal.sh @@ -32,7 +32,7 @@ cmake $CUR_DIR/../.. -DCMAKE_CXX_COMPILER=`which $DEB_CXX $CXX` -DCMAKE_C_COMPIL `# Use all possible contrib libs from system` \ -DUNBUNDLED=1 \ `# Disable all features` \ - -DENABLE_CAPNP=0 -DENABLE_RDKAFKA=0 -DENABLE_EMBEDDED_COMPILER=0 -DENABLE_TCMALLOC=0 -DENABLE_JEMALLOC=0 -DENABLE_UNWIND=0 -DENABLE_MYSQL=0 -DENABLE_POCO_ODBC=0 -DUSE_INTERNAL_LLVM_LIBRARY=0 $CMAKE_FLAGS \ + -DENABLE_CAPNP=0 -DENABLE_RDKAFKA=0 -DENABLE_EMBEDDED_COMPILER=0 -DENABLE_JEMALLOC=0 -DENABLE_UNWIND=0 -DENABLE_MYSQL=0 -DENABLE_POCO_ODBC=0 -DENABLE_ODBC=0 -DUSE_INTERNAL_LLVM_LIBRARY=0 $CMAKE_FLAGS \ && ninja clickhouse-bundle \ `# Skip tests:` \ `# 00281 requires internal compiler` \ diff --git a/utils/travis/pbuilder.sh b/utils/travis/pbuilder.sh index 17cd6629528..d993e8715b8 100755 --- a/utils/travis/pbuilder.sh +++ b/utils/travis/pbuilder.sh @@ -24,7 +24,7 @@ env TEST_RUN=${TEST_RUN=1} \ DEB_CC=${DEB_CC=$CC} DEB_CXX=${DEB_CXX=$CXX} \ CCACHE_SIZE=${CCACHE_SIZE:=4G} \ `# Disable all features` \ - CMAKE_FLAGS="-DCMAKE_BUILD_TYPE=Debug -DUNBUNDLED=1 -DENABLE_UNWIND=0 -DENABLE_MYSQL=0 -DENABLE_CAPNP=0 -DENABLE_RDKAFKA=0 -DUSE_INTERNAL_LLVM_LIBRARY=0 -DCMAKE_C_FLAGS_ADD='-O0 -g0' -DCMAKE_CXX_FLAGS_ADD='-O0 -g0' $CMAKE_FLAGS" \ + CMAKE_FLAGS="-DCMAKE_BUILD_TYPE=Debug -DUNBUNDLED=1 -DENABLE_CAPNP=0 -DENABLE_RDKAFKA=0 -DENABLE_JEMALLOC=0 -DENABLE_UNWIND=0 -DENABLE_MYSQL=0 -DENABLE_POCO_ODBC=0 -DENABLE_ODBC=0 -DUSE_INTERNAL_LLVM_LIBRARY=0 -DCMAKE_C_FLAGS_ADD='-O0 -g0' -DCMAKE_CXX_FLAGS_ADD='-O0 -g0' $CMAKE_FLAGS" \ `# Use all possible contrib libs from system` \ `# psmisc - killall` \ EXTRAPACKAGES="psmisc clang-5.0 lld-5.0 liblld-5.0-dev libclang-5.0-dev liblld-5.0 libc++abi-dev libc++-dev libboost-program-options-dev libboost-system-dev libboost-filesystem-dev libboost-thread-dev zlib1g-dev liblz4-dev libdouble-conversion-dev libsparsehash-dev librdkafka-dev libpoco-dev libsparsehash-dev libgoogle-perftools-dev libzstd-dev libre2-dev libjemalloc-dev $EXTRAPACKAGES" \