Merge branch 'master' of github.com:yandex/ClickHouse

This commit is contained in:
BayoNet 2019-09-24 16:59:38 +03:00
commit 776e1b63f0
410 changed files with 5794 additions and 19893 deletions

View File

@ -17,7 +17,7 @@ A clear and concise description of what works not as it is supposed to.
* Which interface to use, if matters
* Non-default settings, if any
* `CREATE TABLE` statements for all tables involved
* Sample data for all these tables, use [clickhouse-obfuscator](https://github.com/yandex/ClickHouse/blob/master/dbms/programs/obfuscator/Obfuscator.cpp#L42-L80) if necessary
* Sample data for all these tables, use [clickhouse-obfuscator](https://github.com/ClickHouse/ClickHouse/blob/master/dbms/programs/obfuscator/Obfuscator.cpp#L42-L80) if necessary
* Queries to run that lead to unexpected result
**Expected behavior**

View File

@ -17,7 +17,7 @@ What exactly works slower than expected?
* Which interface to use, if matters
* Non-default settings, if any
* `CREATE TABLE` statements for all tables involved
* Sample data for all these tables, use [clickhouse-obfuscator](https://github.com/yandex/ClickHouse/blob/master/dbms/programs/obfuscator/Obfuscator.cpp#L42-L80) if necessary
* Sample data for all these tables, use [clickhouse-obfuscator](https://github.com/ClickHouse/ClickHouse/blob/master/dbms/programs/obfuscator/Obfuscator.cpp#L42-L80) if necessary
* Queries to run that lead to slow performance
**Expected performance**

File diff suppressed because it is too large Load Diff

View File

@ -13,6 +13,8 @@ foreach(policy
endif()
endforeach()
include (cmake/target.cmake)
# Ignore export() since we don't use it,
# but it gets broken with a global targets via link_libraries()
macro (export)
@ -41,28 +43,12 @@ else()
message(STATUS "IPO/LTO not enabled.")
endif()
if (COMPILER_GCC)
# Require minimum version of gcc
set (GCC_MINIMUM_VERSION 8)
if (CMAKE_CXX_COMPILER_VERSION VERSION_LESS ${GCC_MINIMUM_VERSION} AND NOT CMAKE_VERSION VERSION_LESS 2.8.9)
message (FATAL_ERROR "GCC version must be at least ${GCC_MINIMUM_VERSION}. For example, if GCC ${GCC_MINIMUM_VERSION} is available under gcc-${GCC_MINIMUM_VERSION}, g++-${GCC_MINIMUM_VERSION} names, do the following: export CC=gcc-${GCC_MINIMUM_VERSION} CXX=g++-${GCC_MINIMUM_VERSION}; rm -rf CMakeCache.txt CMakeFiles; and re run cmake or ./release.")
endif ()
elseif (CMAKE_CXX_COMPILER_ID STREQUAL "Clang")
# Require minimum version of clang
set (CLANG_MINIMUM_VERSION 7)
if (CMAKE_CXX_COMPILER_VERSION VERSION_LESS ${CLANG_MINIMUM_VERSION})
message (FATAL_ERROR "Clang version must be at least ${CLANG_MINIMUM_VERSION}.")
endif ()
else ()
message (WARNING "You are using an unsupported compiler. Compilation has only been tested with Clang 6+ and GCC 7+.")
endif ()
# Check that submodules are present only if source was downloaded with git
if (EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/.git" AND NOT EXISTS "${ClickHouse_SOURCE_DIR}/contrib/boost/boost")
message (FATAL_ERROR "Submodules are not initialized. Run\n\tgit submodule update --init --recursive")
endif ()
include (cmake/find_ccache.cmake)
include (cmake/find/ccache.cmake)
if (NOT CMAKE_BUILD_TYPE OR CMAKE_BUILD_TYPE STREQUAL "None")
message (STATUS "CMAKE_BUILD_TYPE is not set, set to default = RELWITHDEBINFO")
@ -144,23 +130,6 @@ if (CMAKE_SYSTEM_PROCESSOR MATCHES "amd64|x86_64")
endif ()
endif ()
string(REGEX MATCH "-?[0-9]+(.[0-9]+)?$" COMPILER_POSTFIX ${CMAKE_CXX_COMPILER})
find_program (LLD_PATH NAMES "lld${COMPILER_POSTFIX}" "lld")
find_program (GOLD_PATH NAMES "ld.gold" "gold")
if (COMPILER_CLANG AND LLD_PATH AND NOT LINKER_NAME)
set (LINKER_NAME "lld")
elseif (GOLD_PATH)
set (LINKER_NAME "gold")
endif ()
if (LINKER_NAME)
message(STATUS "Using linker: ${LINKER_NAME} (selected from: LLD_PATH=${LLD_PATH}; GOLD_PATH=${GOLD_PATH}; COMPILER_POSTFIX=${COMPILER_POSTFIX})")
set (CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -fuse-ld=${LINKER_NAME}")
set (CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} -fuse-ld=${LINKER_NAME}")
endif ()
# Make sure the final executable has symbols exported
set (CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -rdynamic")
@ -195,7 +164,7 @@ else()
endif()
if(NOT DISABLE_CPU_OPTIMIZE)
include(cmake/test_cpu.cmake)
include(cmake/cpu_features.cmake)
endif()
if(NOT COMPILER_CLANG) # clang: error: the clang compiler does not support '-march=native'
@ -206,16 +175,9 @@ if (ARCH_NATIVE)
set (COMPILER_FLAGS "${COMPILER_FLAGS} -march=native")
endif ()
if (CMAKE_VERSION VERSION_LESS "3.8.0")
if (NOT MSVC)
set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++17")
endif ()
else ()
set (CMAKE_CXX_STANDARD 17)
set (CMAKE_CXX_EXTENSIONS 0) # https://cmake.org/cmake/help/latest/prop_tgt/CXX_EXTENSIONS.html#prop_tgt:CXX_EXTENSIONS
set (CMAKE_CXX_STANDARD_REQUIRED ON)
set (CXX_FLAGS_INTERNAL_COMPILER "-std=c++17")
endif ()
set (CMAKE_CXX_STANDARD 17)
set (CMAKE_CXX_EXTENSIONS 0) # https://cmake.org/cmake/help/latest/prop_tgt/CXX_EXTENSIONS.html#prop_tgt:CXX_EXTENSIONS
set (CMAKE_CXX_STANDARD_REQUIRED ON)
if (COMPILER_GCC OR COMPILER_CLANG)
# Enable C++14 sized global deallocation functions. It should be enabled by setting -std=c++14 but I'm not sure.
@ -257,13 +219,19 @@ if (UNBUNDLED OR NOT (OS_LINUX OR APPLE) OR ARCH_32)
endif ()
# Make this extra-checks for correct library dependencies.
if (NOT SANITIZE)
if (OS_LINUX AND NOT SANITIZE)
set (CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -Wl,--no-undefined")
set (CMAKE_SHARED_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -Wl,--no-undefined")
endif ()
include(cmake/dbms_glob_sources.cmake)
include(cmake/default_libs.cmake)
if (OS_LINUX)
include(cmake/linux/default_libs.cmake)
elseif (OS_DARWIN)
include(cmake/darwin/sdk.cmake)
include(cmake/darwin/default_libs.cmake)
endif ()
######################################
### Add targets below this comment ###
@ -321,49 +289,49 @@ endif ()
message (STATUS "Building for: ${CMAKE_SYSTEM} ${CMAKE_SYSTEM_PROCESSOR} ${CMAKE_LIBRARY_ARCHITECTURE} ; USE_STATIC_LIBRARIES=${USE_STATIC_LIBRARIES} MAKE_STATIC_LIBRARIES=${MAKE_STATIC_LIBRARIES} SPLIT_SHARED=${SPLIT_SHARED_LIBRARIES} UNBUNDLED=${UNBUNDLED} CCACHE=${CCACHE_FOUND} ${CCACHE_VERSION}")
include(GNUInstallDirs)
include (cmake/find_contrib_lib.cmake)
include (cmake/contrib_finder.cmake)
include (cmake/lib_name.cmake)
find_contrib_lib(double-conversion) # Must be before parquet
include (cmake/find_ssl.cmake)
include (cmake/find_icu.cmake)
include (cmake/find_boost.cmake)
include (cmake/find_zlib.cmake)
include (cmake/find_zstd.cmake)
include (cmake/find_ltdl.cmake) # for odbc
include (cmake/find_termcap.cmake)
include (cmake/find_odbc.cmake)
include (cmake/find/ssl.cmake)
include (cmake/find/icu.cmake)
include (cmake/find/boost.cmake)
include (cmake/find/zlib.cmake)
include (cmake/find/zstd.cmake)
include (cmake/find/ltdl.cmake) # for odbc
include (cmake/find/termcap.cmake)
include (cmake/find/odbc.cmake)
# openssl, zlib, odbc before poco
include (cmake/find_poco.cmake)
include (cmake/find_lz4.cmake)
include (cmake/find_xxhash.cmake)
include (cmake/find_sparsehash.cmake)
include (cmake/find_rt.cmake)
include (cmake/find_execinfo.cmake)
include (cmake/find_readline_edit.cmake)
include (cmake/find_re2.cmake)
include (cmake/find_libgsasl.cmake)
include (cmake/find_rdkafka.cmake)
include (cmake/find_capnp.cmake)
include (cmake/find_llvm.cmake)
include (cmake/find_h3.cmake)
include (cmake/find_cpuid.cmake) # Freebsd, bundled
include (cmake/find/poco.cmake)
include (cmake/find/lz4.cmake)
include (cmake/find/xxhash.cmake)
include (cmake/find/sparsehash.cmake)
include (cmake/find/rt.cmake)
include (cmake/find/execinfo.cmake)
include (cmake/find/readline_edit.cmake)
include (cmake/find/re2.cmake)
include (cmake/find/libgsasl.cmake)
include (cmake/find/rdkafka.cmake)
include (cmake/find/capnp.cmake)
include (cmake/find/llvm.cmake)
include (cmake/find/h3.cmake)
include (cmake/find/cpuid.cmake) # Freebsd, bundled
if (NOT USE_CPUID)
include (cmake/find_cpuinfo.cmake) # Debian
include (cmake/find/cpuinfo.cmake) # Debian
endif()
include (cmake/find_libxml2.cmake)
include (cmake/find_brotli.cmake)
include (cmake/find_protobuf.cmake)
include (cmake/find_pdqsort.cmake)
include (cmake/find_hdfs3.cmake) # uses protobuf
include (cmake/find_consistent-hashing.cmake)
include (cmake/find_base64.cmake)
include (cmake/find_parquet.cmake)
include (cmake/find_hyperscan.cmake)
include (cmake/find_simdjson.cmake)
include (cmake/find_rapidjson.cmake)
include (cmake/find_fastops.cmake)
#include (cmake/find_orc.cmake)
include (cmake/find/libxml2.cmake)
include (cmake/find/brotli.cmake)
include (cmake/find/protobuf.cmake)
include (cmake/find/pdqsort.cmake)
include (cmake/find/hdfs3.cmake) # uses protobuf
include (cmake/find/consistent-hashing.cmake)
include (cmake/find/base64.cmake)
include (cmake/find/parquet.cmake)
include (cmake/find/hyperscan.cmake)
include (cmake/find/simdjson.cmake)
include (cmake/find/rapidjson.cmake)
include (cmake/find/fastops.cmake)
include (cmake/find/orc.cmake)
find_contrib_lib(cityhash)
find_contrib_lib(farmhash)
@ -371,7 +339,7 @@ find_contrib_lib(metrohash)
find_contrib_lib(btrie)
if (ENABLE_TESTS)
include (cmake/find_gtest.cmake)
include (cmake/find/gtest.cmake)
endif ()
# Need to process before "contrib" dir:

View File

@ -1,4 +1,4 @@
[![ClickHouse — open source distributed column-oriented DBMS](https://github.com/yandex/ClickHouse/raw/master/website/images/logo-400x240.png)](https://clickhouse.yandex)
[![ClickHouse — open source distributed column-oriented DBMS](https://github.com/ClickHouse/ClickHouse/raw/master/website/images/logo-400x240.png)](https://clickhouse.yandex)
ClickHouse is an open-source column-oriented database management system that allows generating analytical data reports in real time.

View File

@ -14,4 +14,4 @@ currently being supported with security updates:
## Reporting a Vulnerability
To report a potential vulnerability in ClickHouse please use the security advisory feature of GitHub:
https://github.com/yandex/ClickHouse/security/advisories
https://github.com/ClickHouse/ClickHouse/security/advisories

View File

@ -12,19 +12,6 @@ if ((ARCH_ARM AND NOT ARCH_AARCH64) OR ARCH_I386)
message (FATAL_ERROR "32bit platforms are not supported")
endif ()
if (CMAKE_SYSTEM MATCHES "Linux")
set (OS_LINUX 1)
endif ()
if (CMAKE_SYSTEM MATCHES "FreeBSD")
set (OS_FREEBSD 1)
endif ()
if (CMAKE_CXX_COMPILER_ID STREQUAL "GNU")
set (COMPILER_GCC 1)
elseif (CMAKE_CXX_COMPILER_ID STREQUAL "Clang")
set (COMPILER_CLANG 1)
endif ()
if (CMAKE_SYSTEM_PROCESSOR MATCHES "^(ppc64le.*|PPC64LE.*)")
set (ARCH_PPC64LE 1)
if (COMPILER_CLANG OR (COMPILER_GCC AND CMAKE_CXX_COMPILER_VERSION VERSION_LESS 8))

View File

@ -0,0 +1,37 @@
set (DEFAULT_LIBS "-nodefaultlibs")
if (NOT COMPILER_CLANG)
message (FATAL_ERROR "Darwin build is supported only for Clang")
endif ()
set (DEFAULT_LIBS "${DEFAULT_LIBS} ${COVERAGE_OPTION} -lc -lm -lpthread -ldl")
message(STATUS "Default libraries: ${DEFAULT_LIBS}")
set(CMAKE_CXX_STANDARD_LIBRARIES ${DEFAULT_LIBS})
set(CMAKE_C_STANDARD_LIBRARIES ${DEFAULT_LIBS})
# Global libraries
add_library(global-libs INTERFACE)
# Unfortunately '-pthread' doesn't work with '-nodefaultlibs'.
# Just make sure we have pthreads at all.
set(THREADS_PREFER_PTHREAD_FLAG ON)
find_package(Threads REQUIRED)
include (cmake/find/cxx.cmake)
add_library(global-group INTERFACE)
target_link_libraries(global-group INTERFACE
$<TARGET_PROPERTY:global-libs,INTERFACE_LINK_LIBRARIES>
)
link_libraries(global-group)
# FIXME: remove when all contribs will get custom cmake lists
install(
TARGETS global-group global-libs
EXPORT global
)

11
cmake/darwin/sdk.cmake Normal file
View File

@ -0,0 +1,11 @@
option (SDK_PATH "Path to the SDK to build with" "")
if (NOT EXISTS "${SDK_PATH}/SDKSettings.plist")
message (FATAL_ERROR "Wrong SDK path provided: ${SDK_PATH}")
endif ()
set (CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -isysroot ${SDK_PATH} -mmacosx-version-min=10.14")
set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -isysroot ${SDK_PATH} -mmacosx-version-min=10.14")
set (CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -isysroot ${SDK_PATH} -mmacosx-version-min=10.14")
set (CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} -isysroot ${SDK_PATH} -mmacosx-version-min=10.14")

View File

@ -1,25 +1,11 @@
if (OS_LINUX AND COMPILER_CLANG)
if (COMPILER_CLANG)
option (USE_LIBCXX "Use libc++ and libc++abi instead of libstdc++" ON)
option (USE_INTERNAL_LIBCXX_LIBRARY "Set to FALSE to use system libcxx and libcxxabi libraries instead of bundled" ${NOT_UNBUNDLED})
endif()
if (USE_LIBCXX)
set (CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -D_LIBCPP_DEBUG=0") # More checks in debug build.
endif ()
# FIXME: make better check for submodule presence
if (USE_INTERNAL_LIBCXX_LIBRARY AND NOT EXISTS "${ClickHouse_SOURCE_DIR}/contrib/libcxx/include/vector")
message (WARNING "submodule contrib/libcxx is missing. to fix try run: \n git submodule update --init --recursive")
set (USE_INTERNAL_LIBCXX_LIBRARY 0)
endif ()
# FIXME: make better check for submodule presence
if (USE_INTERNAL_LIBCXX_LIBRARY AND NOT EXISTS "${ClickHouse_SOURCE_DIR}/contrib/libcxxabi/src")
message (WARNING "submodule contrib/libcxxabi is missing. to fix try run: \n git submodule update --init --recursive")
set (USE_INTERNAL_LIBCXX_LIBRARY 0)
endif ()
if (USE_LIBCXX)
if (NOT USE_INTERNAL_LIBCXX_LIBRARY)
find_library (LIBCXX_LIBRARY c++)
find_library (LIBCXXFS_LIBRARY c++fs)

View File

@ -21,7 +21,7 @@ endif()
if(ARROW_INCLUDE_DIR AND PARQUET_INCLUDE_DIR)
elseif(NOT MISSING_INTERNAL_PARQUET_LIBRARY AND NOT OS_FREEBSD)
include(cmake/find_snappy.cmake)
include(cmake/find/snappy.cmake)
set(CAN_USE_INTERNAL_PARQUET_LIBRARY 1)
include(CheckCXXSourceCompiles)
if(NOT USE_INTERNAL_DOUBLE_CONVERSION_LIBRARY)

13
cmake/find/snappy.cmake Normal file
View File

@ -0,0 +1,13 @@
option(USE_SNAPPY "Enable support of snappy library" ON)
if (USE_SNAPPY)
option (USE_INTERNAL_SNAPPY_LIBRARY "Set to FALSE to use system snappy library instead of bundled" ${NOT_UNBUNDLED})
if(NOT USE_INTERNAL_SNAPPY_LIBRARY)
find_library(SNAPPY_LIBRARY snappy)
else ()
set(SNAPPY_LIBRARY snappy)
endif()
message (STATUS "Using snappy: ${SNAPPY_LIBRARY}")
endif ()

View File

@ -1,27 +0,0 @@
option(USE_INTERNAL_SNAPPY_LIBRARY "Set to FALSE to use system snappy library instead of bundled" ${NOT_UNBUNDLED})
if(NOT EXISTS "${ClickHouse_SOURCE_DIR}/contrib/snappy/snappy.h")
if(USE_INTERNAL_SNAPPY_LIBRARY)
message(WARNING "submodule contrib/snappy is missing. to fix try run: \n git submodule update --init --recursive")
set(USE_INTERNAL_SNAPPY_LIBRARY 0)
endif()
set(MISSING_INTERNAL_SNAPPY_LIBRARY 1)
endif()
if(NOT USE_INTERNAL_SNAPPY_LIBRARY)
find_library(SNAPPY_LIBRARY snappy)
find_path(SNAPPY_INCLUDE_DIR NAMES snappy.h PATHS ${SNAPPY_INCLUDE_PATHS})
endif()
if(SNAPPY_LIBRARY AND SNAPPY_INCLUDE_DIR)
elseif(NOT MISSING_INTERNAL_SNAPPY_LIBRARY)
set(SNAPPY_INCLUDE_DIR ${ClickHouse_SOURCE_DIR}/contrib/snappy)
set(USE_INTERNAL_SNAPPY_LIBRARY 1)
set(SNAPPY_LIBRARY snappy)
endif()
if(SNAPPY_LIBRARY AND SNAPPY_INCLUDE_DIR)
set(USE_SNAPPY 1)
endif()
message(STATUS "Using snappy=${USE_SNAPPY}: ${SNAPPY_INCLUDE_DIR} : ${SNAPPY_LIBRARY}")

View File

@ -3,20 +3,18 @@
set (DEFAULT_LIBS "-nodefaultlibs")
if (OS_LINUX)
# We need builtins from Clang's RT even without libcxx - for ubsan+int128.
# See https://bugs.llvm.org/show_bug.cgi?id=16404
if (COMPILER_CLANG)
execute_process (COMMAND ${CMAKE_CXX_COMPILER} --print-file-name=libclang_rt.builtins-${CMAKE_SYSTEM_PROCESSOR}.a OUTPUT_VARIABLE BUILTINS_LIBRARY OUTPUT_STRIP_TRAILING_WHITESPACE)
else ()
set (BUILTINS_LIBRARY "-lgcc")
endif ()
set (DEFAULT_LIBS "${DEFAULT_LIBS} ${BUILTINS_LIBRARY} ${COVERAGE_OPTION} -lc -lm -lrt -lpthread -ldl")
message(STATUS "Default libraries: ${DEFAULT_LIBS}")
# We need builtins from Clang's RT even without libcxx - for ubsan+int128.
# See https://bugs.llvm.org/show_bug.cgi?id=16404
if (COMPILER_CLANG)
execute_process (COMMAND ${CMAKE_CXX_COMPILER} --print-file-name=libclang_rt.builtins-${CMAKE_SYSTEM_PROCESSOR}.a OUTPUT_VARIABLE BUILTINS_LIBRARY OUTPUT_STRIP_TRAILING_WHITESPACE)
else ()
set (BUILTINS_LIBRARY "-lgcc")
endif ()
set (DEFAULT_LIBS "${DEFAULT_LIBS} ${BUILTINS_LIBRARY} ${COVERAGE_OPTION} -lc -lm -lrt -lpthread -ldl")
message(STATUS "Default libraries: ${DEFAULT_LIBS}")
set(CMAKE_CXX_STANDARD_LIBRARIES ${DEFAULT_LIBS})
set(CMAKE_C_STANDARD_LIBRARIES ${DEFAULT_LIBS})
@ -30,8 +28,8 @@ set(THREADS_PREFER_PTHREAD_FLAG ON)
find_package(Threads REQUIRED)
add_subdirectory(libs/libglibc-compatibility)
include (cmake/find_unwind.cmake)
include (cmake/find_cxx.cmake)
include (cmake/find/unwind.cmake)
include (cmake/find/cxx.cmake)
add_library(global-group INTERFACE)
target_link_libraries(global-group INTERFACE
@ -42,6 +40,7 @@ target_link_libraries(global-group INTERFACE
link_libraries(global-group)
# FIXME: remove when all contribs will get custom cmake lists
install(
TARGETS global-group global-libs
EXPORT global

View File

@ -14,8 +14,11 @@ if (SANITIZE)
endif ()
elseif (SANITIZE STREQUAL "memory")
set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${SAN_FLAGS} -fsanitize=memory -fsanitize-memory-track-origins")
set (CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${SAN_FLAGS} -fsanitize=memory -fsanitize-memory-track-origins")
set (MSAN_FLAGS "-fsanitize=memory -fsanitize-memory-track-origins -fno-optimize-sibling-calls")
set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${SAN_FLAGS} ${MSAN_FLAGS}")
set (CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${SAN_FLAGS} ${MSAN_FLAGS}")
if (CMAKE_CXX_COMPILER_ID STREQUAL "GNU")
set (CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -fsanitize=memory")
endif()
@ -23,6 +26,28 @@ if (SANITIZE)
set (CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -static-libmsan")
endif ()
# Temporarily disable many external libraries that don't work under
# MemorySanitizer yet.
set (ENABLE_HDFS 0 CACHE BOOL "")
set (ENABLE_CAPNP 0 CACHE BOOL "")
set (ENABLE_RDKAFKA 0 CACHE BOOL "")
set (ENABLE_ICU 0 CACHE BOOL "")
set (ENABLE_POCO_MONGODB 0 CACHE BOOL "")
set (ENABLE_POCO_NETSSL 0 CACHE BOOL "")
set (ENABLE_POCO_ODBC 0 CACHE BOOL "")
set (ENABLE_ODBC 0 CACHE BOOL "")
set (ENABLE_MYSQL 0 CACHE BOOL "")
set (ENABLE_EMBEDDED_COMPILER 0 CACHE BOOL "")
set (USE_INTERNAL_CAPNP_LIBRARY 0 CACHE BOOL "")
set (USE_SIMDJSON 0 CACHE BOOL "")
set (ENABLE_READLINE 0 CACHE BOOL "")
set (ENABLE_ORC 0 CACHE BOOL "")
set (ENABLE_PARQUET 0 CACHE BOOL "")
set (USE_CAPNP 0 CACHE BOOL "")
set (USE_INTERNAL_ORC_LIBRARY 0 CACHE BOOL "")
set (USE_ORC 0 CACHE BOOL "")
set (ENABLE_SSL 0 CACHE BOOL "")
elseif (SANITIZE STREQUAL "thread")
set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${SAN_FLAGS} -fsanitize=thread")
set (CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${SAN_FLAGS} -fsanitize=thread")

71
cmake/target.cmake Normal file
View File

@ -0,0 +1,71 @@
if (CMAKE_SYSTEM_NAME MATCHES "Linux")
set (OS_LINUX 1)
add_definitions(-D OS_LINUX)
elseif (CMAKE_SYSTEM_NAME MATCHES "FreeBSD")
set (OS_FREEBSD 1)
add_definitions(-D OS_FREEBSD)
elseif (CMAKE_SYSTEM_NAME MATCHES "Darwin")
set (OS_DARWIN 1)
add_definitions(-D OS_DARWIN)
endif ()
if (CMAKE_CXX_COMPILER_ID STREQUAL "GNU")
set (COMPILER_GCC 1)
elseif (CMAKE_CXX_COMPILER_ID STREQUAL "Clang")
set (COMPILER_CLANG 1)
endif ()
if (COMPILER_GCC)
# Require minimum version of gcc
set (GCC_MINIMUM_VERSION 8)
if (CMAKE_CXX_COMPILER_VERSION VERSION_LESS ${GCC_MINIMUM_VERSION} AND NOT CMAKE_VERSION VERSION_LESS 2.8.9)
message (FATAL_ERROR "GCC version must be at least ${GCC_MINIMUM_VERSION}. For example, if GCC ${GCC_MINIMUM_VERSION} is available under gcc-${GCC_MINIMUM_VERSION}, g++-${GCC_MINIMUM_VERSION} names, do the following: export CC=gcc-${GCC_MINIMUM_VERSION} CXX=g++-${GCC_MINIMUM_VERSION}; rm -rf CMakeCache.txt CMakeFiles; and re run cmake or ./release.")
endif ()
elseif (COMPILER_CLANG)
# Require minimum version of clang
set (CLANG_MINIMUM_VERSION 7)
if (CMAKE_CXX_COMPILER_VERSION VERSION_LESS ${CLANG_MINIMUM_VERSION})
message (FATAL_ERROR "Clang version must be at least ${CLANG_MINIMUM_VERSION}.")
endif ()
else ()
message (WARNING "You are using an unsupported compiler. Compilation has only been tested with Clang 6+ and GCC 7+.")
endif ()
string(REGEX MATCH "-?[0-9]+(.[0-9]+)?$" COMPILER_POSTFIX ${CMAKE_CXX_COMPILER})
if (OS_LINUX)
find_program (LLD_PATH NAMES "lld${COMPILER_POSTFIX}" "lld")
find_program (GOLD_PATH NAMES "ld.gold" "gold")
endif()
option (LINKER_NAME "Linker name or full path")
if (NOT LINKER_NAME)
if (COMPILER_CLANG AND LLD_PATH)
set (LINKER_NAME "lld")
elseif (GOLD_PATH)
set (LINKER_NAME "gold")
endif ()
endif ()
if (LINKER_NAME)
message(STATUS "Using linker: ${LINKER_NAME} (selected from: LLD_PATH=${LLD_PATH}; GOLD_PATH=${GOLD_PATH}; COMPILER_POSTFIX=${COMPILER_POSTFIX})")
set (CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -fuse-ld=${LINKER_NAME}")
set (CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} -fuse-ld=${LINKER_NAME}")
endif ()
if (CMAKE_CROSSCOMPILING)
if (CMAKE_SYSTEM_NAME MATCHES "Darwin")
set (CMAKE_SYSTEM_PROCESSOR x86_64)
set (CMAKE_C_COMPILER_TARGET x86_64-apple-darwin)
set (CMAKE_CXX_COMPILER_TARGET x86_64-apple-darwin)
set (HAS_PRE_1970_EXITCODE "0" CACHE STRING "Result from TRY_RUN" FORCE)
set (HAS_PRE_1970_EXITCODE__TRYRUN_OUTPUT "" CACHE STRING "Output from TRY_RUN" FORCE)
set (HAS_POST_2038_EXITCODE "0" CACHE STRING "Result from TRY_RUN" FORCE)
set (HAS_POST_2038_EXITCODE__TRYRUN_OUTPUT "" CACHE STRING "Output from TRY_RUN" FORCE)
endif ()
# Don't know why but CXX_STANDARD doesn't work for cross-compilation
set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++17")
endif ()

View File

@ -193,13 +193,15 @@ if (USE_INTERNAL_PARQUET_LIBRARY_NATIVE_CMAKE)
endif()
else()
if(USE_INTERNAL_SNAPPY_LIBRARY)
set(SNAPPY_BUILD_TESTS 0 CACHE INTERNAL "")
if (NOT MAKE_STATIC_LIBRARIES)
set(BUILD_SHARED_LIBS 1) # TODO: set at root dir
endif()
add_subdirectory(snappy)
set (SNAPPY_INCLUDE_DIR "${ClickHouse_SOURCE_DIR}/contrib/snappy")
if(SANITIZE STREQUAL "undefined")
target_compile_options(${SNAPPY_LIBRARY} PRIVATE -fno-sanitize=undefined)
endif()

View File

@ -42,7 +42,12 @@ add_library(cxx ${SRCS})
target_include_directories(cxx SYSTEM BEFORE PUBLIC $<BUILD_INTERFACE:${LIBCXX_SOURCE_DIR}/include>)
target_compile_definitions(cxx PRIVATE -D_LIBCPP_BUILDING_LIBRARY -DLIBCXX_BUILDING_LIBCXXABI)
target_compile_options(cxx PUBLIC -nostdinc++ -Wno-reserved-id-macro)
if (OS_DARWIN)
target_compile_options(cxx PUBLIC -Wno-ctad-maybe-unsupported)
endif ()
target_link_libraries(cxx PUBLIC cxxabi)
install(

2
contrib/zlib-ng vendored

@ -1 +1 @@
Subproject commit cb43e7fa08ec29fd76d84e3bb35258a0f0bf3df3
Subproject commit cff0f500d9399d7cd3b9461a693d211e4b86fcc9

View File

@ -132,6 +132,11 @@ list (APPEND dbms_headers src/TableFunctions/ITableFunction.h src/TableFunctio
list (APPEND dbms_sources src/Dictionaries/DictionaryFactory.cpp src/Dictionaries/DictionarySourceFactory.cpp src/Dictionaries/DictionaryStructure.cpp)
list (APPEND dbms_headers src/Dictionaries/DictionaryFactory.h src/Dictionaries/DictionarySourceFactory.h src/Dictionaries/DictionaryStructure.h)
if (NOT ENABLE_SSL)
list (REMOVE_ITEM clickhouse_common_io_sources src/Common/OpenSSLHelpers.cpp)
list (REMOVE_ITEM clickhouse_common_io_headers src/Common/OpenSSLHelpers.h)
endif ()
add_library(clickhouse_common_io ${clickhouse_common_io_headers} ${clickhouse_common_io_sources})
if (OS_FREEBSD)
@ -156,7 +161,7 @@ macro(add_object_library name common_path)
endmacro()
add_object_library(clickhouse_core src/Core)
add_object_library(clickhouse_compression src/Compression/)
add_object_library(clickhouse_compression src/Compression)
add_object_library(clickhouse_datastreams src/DataStreams)
add_object_library(clickhouse_datatypes src/DataTypes)
add_object_library(clickhouse_databases src/Databases)

View File

@ -1,11 +1,11 @@
# This strings autochanged from release_lib.sh:
set(VERSION_REVISION 54426)
set(VERSION_REVISION 54427)
set(VERSION_MAJOR 19)
set(VERSION_MINOR 15)
set(VERSION_MINOR 16)
set(VERSION_PATCH 1)
set(VERSION_GITHASH 6f1a8c37abe6ee4e7ee74c0b5cb9c05a87417b61)
set(VERSION_DESCRIBE v19.15.1.1-prestable)
set(VERSION_STRING 19.15.1.1)
set(VERSION_GITHASH 38f65a6a2120d2e76bcf71131068f41195149dfc)
set(VERSION_DESCRIBE v19.16.1.1-prestable)
set(VERSION_STRING 19.16.1.1)
# end of autochange
set(VERSION_EXTRA "" CACHE STRING "")

View File

@ -46,7 +46,7 @@
# define _POSIX_VDISABLE VDISABLE
#endif
static volatile sig_atomic_t signo[_NSIG];
static volatile sig_atomic_t signo[NSIG];
static void handler(int);
@ -67,7 +67,7 @@ readpassphrase(const char *prompt, char *buf, size_t bufsiz, int flags)
}
restart:
for (i = 0; i < _NSIG; i++)
for (i = 0; i < NSIG; i++)
signo[i] = 0;
nr = -1;
save_errno = 0;
@ -173,7 +173,7 @@ restart:
* If we were interrupted by a signal, resend it to ourselves
* now that we have restored the signal handlers.
*/
for (i = 0; i < _NSIG; i++) {
for (i = 0; i < NSIG; i++) {
if (signo[i]) {
kill(getpid(), i);
switch (i) {

View File

@ -33,9 +33,11 @@ endif ()
clickhouse_program_add_library(odbc-bridge)
# clickhouse-odbc-bridge is always a separate binary.
# Reason: it must not export symbols from SSL, mariadb-client, etc. to not break ABI compatibility with ODBC drivers.
set (CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -Wl,--no-export-dynamic")
if (OS_LINUX)
# clickhouse-odbc-bridge is always a separate binary.
# Reason: it must not export symbols from SSL, mariadb-client, etc. to not break ABI compatibility with ODBC drivers.
set (CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -Wl,--no-export-dynamic")
endif ()
add_executable(clickhouse-odbc-bridge odbc-bridge.cpp)
set_target_properties(clickhouse-odbc-bridge PROPERTIES RUNTIME_OUTPUT_DIRECTORY ..)

View File

@ -54,16 +54,16 @@
#include <Common/StatusFile.h>
#include "TCPHandlerFactory.h"
#include "Common/config_version.h"
#include "MySQLHandlerFactory.h"
#include <Common/SensitiveDataMasker.h>
#if defined(__linux__)
#if defined(OS_LINUX)
#include <Common/hasLinuxCapability.h>
#include <sys/mman.h>
#endif
#if USE_POCO_NETSSL
#include "MySQLHandlerFactory.h"
#include <Poco/Net/Context.h>
#include <Poco/Net/SecureServerSocket.h>
#endif

View File

@ -179,12 +179,11 @@ protected:
// MAP_POPULATE to mmap(). This takes some time, but should be faster
// overall than having a hot loop interrupted by page faults.
// It is only supported on Linux.
#if defined(__linux__)
static constexpr int mmap_flags = MAP_PRIVATE | MAP_ANONYMOUS
| (mmap_populate ? MAP_POPULATE : 0);
#else
static constexpr int mmap_flags = MAP_PRIVATE | MAP_ANONYMOUS;
#if defined(OS_LINUX)
| (mmap_populate ? MAP_POPULATE : 0)
#endif
;
private:
void * allocNoTrack(size_t size, size_t alignment)

View File

@ -1,10 +1,12 @@
#include <Common/DiskSpaceMonitor.h>
#include <Common/escapeForFileName.h>
#include <IO/WriteHelpers.h>
#include <set>
#include <Common/escapeForFileName.h>
#include <Poco/File.h>
namespace DB
{
@ -45,7 +47,7 @@ std::filesystem::path getMountPoint(std::filesystem::path absolute_path)
return absolute_path;
}
/// Returns name of filesystem mounted to mount_point
/// Returns name of filesystem mounted to mount_point
#if !defined(__linux__)
[[noreturn]]
#endif
@ -65,7 +67,7 @@ std::string getFilesystemName([[maybe_unused]] const std::string & mount_point)
throw DB::Exception("Cannot find name of filesystem by mount point " + mount_point, ErrorCodes::SYSTEM_ERROR);
return fs_info.mnt_fsname;
#else
throw DB::Exception("Supported on linux only", ErrorCodes::NOT_IMPLEMENTED);
throw DB::Exception("The function getFilesystemName is supported on Linux only", ErrorCodes::NOT_IMPLEMENTED);
#endif
}
@ -82,7 +84,7 @@ bool Disk::tryReserve(UInt64 bytes) const
std::lock_guard lock(mutex);
if (bytes == 0)
{
LOG_DEBUG(&Logger::get("DiskSpaceMonitor"), "Reserving 0 bytes on disk " << name);
LOG_DEBUG(&Logger::get("DiskSpaceMonitor"), "Reserving 0 bytes on disk " << backQuote(name));
++reservation_count;
return true;
}
@ -93,7 +95,8 @@ bool Disk::tryReserve(UInt64 bytes) const
{
LOG_DEBUG(
&Logger::get("DiskSpaceMonitor"),
"Reserving " << bytes << " bytes on disk " << name << " having unreserved " << unreserved_space << " bytes.");
"Reserving " << formatReadableSizeWithBinarySuffix(bytes) << " on disk " << backQuote(name)
<< ", having unreserved " << formatReadableSizeWithBinarySuffix(unreserved_space) << ".");
++reservation_count;
reserved_bytes += bytes;
return true;
@ -283,14 +286,14 @@ Volume::Volume(
max_data_part_size = static_cast<decltype(max_data_part_size)>(sum_size * ratio / disks.size());
for (size_t i = 0; i < disks.size(); ++i)
if (sizes[i] < max_data_part_size)
LOG_WARNING(logger, "Disk " << disks[i]->getName() << " on volume " << config_prefix <<
" have not enough space (" << sizes[i] <<
LOG_WARNING(logger, "Disk " << backQuote(disks[i]->getName()) << " on volume " << backQuote(config_prefix) <<
" have not enough space (" << formatReadableSizeWithBinarySuffix(sizes[i]) <<
") for containing part the size of max_data_part_size (" <<
max_data_part_size << ")");
formatReadableSizeWithBinarySuffix(max_data_part_size) << ")");
}
constexpr UInt64 MIN_PART_SIZE = 8u * 1024u * 1024u;
if (max_data_part_size < MIN_PART_SIZE)
LOG_WARNING(logger, "Volume '" << name << "' max_data_part_size is too low ("
LOG_WARNING(logger, "Volume " << backQuote(name) << " max_data_part_size is too low ("
<< formatReadableSizeWithBinarySuffix(max_data_part_size) << " < "
<< formatReadableSizeWithBinarySuffix(MIN_PART_SIZE) << ")");
}
@ -505,7 +508,7 @@ StoragePolicySelector::StoragePolicySelector(
ErrorCodes::EXCESSIVE_ELEMENT_IN_CONFIG);
policies.emplace(name, std::make_shared<StoragePolicy>(name, config, config_prefix + "." + name, disks));
LOG_INFO(&Logger::get("StoragePolicySelector"), "Storage policy " << name << " loaded");
LOG_INFO(&Logger::get("StoragePolicySelector"), "Storage policy " << backQuote(name) << " loaded");
}
constexpr auto default_storage_policy_name = "default";

View File

@ -457,6 +457,7 @@ namespace ErrorCodes
extern const int UNKNOWN_PROTOCOL = 480;
extern const int PATH_ACCESS_DENIED = 481;
extern const int DICTIONARY_ACCESS_DENIED = 482;
extern const int TOO_MANY_REDIRECTS = 483;
extern const int KEEPER_EXCEPTION = 999;
extern const int POCO_EXCEPTION = 1000;

View File

@ -199,7 +199,7 @@ PoolWithFailoverBase<TNestedPool>::getMany(
for (const ShuffledPool & pool: shuffled_pools)
{
auto & pool_state = shared_pool_states[pool.index];
pool_state.error_count = std::min(max_error_cap, static_cast<size_t>(pool_state.error_count + pool.error_count));
pool_state.error_count = std::min<UInt64>(max_error_cap, pool_state.error_count + pool.error_count);
}
});

View File

@ -30,13 +30,13 @@ namespace
/// Thus upper bound on query_id length should be introduced to avoid buffer overflow in signal handler.
constexpr size_t QUERY_ID_MAX_LEN = 1024;
# if !defined(__APPLE__)
#if defined(OS_LINUX)
thread_local size_t write_trace_iteration = 0;
#endif
void writeTraceInfo(TimerType timer_type, int /* sig */, siginfo_t * info, void * context)
{
# if !defined(__APPLE__)
#if defined(OS_LINUX)
/// Quickly drop if signal handler is called too frequently.
/// Otherwise we may end up infinitelly processing signals instead of doing any useful work.
++write_trace_iteration;

View File

@ -55,7 +55,7 @@ public:
size_t wipeSensitiveData(std::string & data) const;
/// setInstance is not thread-safe and should be called once in single-thread mode.
/// https://github.com/yandex/ClickHouse/pull/6810#discussion_r321183367
/// https://github.com/ClickHouse/ClickHouse/pull/6810#discussion_r321183367
static void setInstance(std::unique_ptr<SensitiveDataMasker> sensitive_data_masker_);
static SensitiveDataMasker * getInstance();

View File

@ -9,7 +9,8 @@
#ifdef __APPLE__
// ucontext is not available without _XOPEN_SOURCE
#define _XOPEN_SOURCE 700
# pragma clang diagnostic ignored "-Wreserved-id-macro"
# define _XOPEN_SOURCE 700
#endif
#include <ucontext.h>

View File

@ -4,7 +4,7 @@
#include <unistd.h>
#if defined(__linux__)
#if defined(OS_LINUX)
#include "hasLinuxCapability.h"
#include <common/unaligned.h>
@ -321,7 +321,7 @@ bool TaskStatsInfoGetter::checkPermissions()
TaskStatsInfoGetter::TaskStatsInfoGetter()
{
throw Exception("TaskStats are not implemented for this OS.", ErrorCodes::NOT_IMPLEMENTED);
// TODO: throw Exception("TaskStats are not implemented for this OS.", ErrorCodes::NOT_IMPLEMENTED);
}
void TaskStatsInfoGetter::getStat(::taskstats &, pid_t)

View File

@ -46,7 +46,7 @@ TraceCollector::TraceCollector(std::shared_ptr<TraceLog> & trace_log_)
if (-1 == fcntl(trace_pipe.fds_rw[1], F_SETFL, flags | O_NONBLOCK))
throwFromErrno("Cannot set non-blocking mode of pipe", ErrorCodes::CANNOT_FCNTL);
#if !defined(__FreeBSD__) && !defined(__APPLE__)
#if defined(OS_LINUX)
/** Increase pipe size to avoid slowdown during fine-grained trace collection.
*/
int pipe_size = fcntl(trace_pipe.fds_rw[1], F_GETPIPE_SZ);

View File

@ -158,7 +158,9 @@ struct ZooKeeperArgs
}
/// Shuffle the hosts to distribute the load among ZooKeeper nodes.
std::shuffle(hosts_strings.begin(), hosts_strings.end(), thread_local_rng);
std::random_device rd;
std::mt19937 g(rd());
std::shuffle(hosts_strings.begin(), hosts_strings.end(), g);
for (auto & host : hosts_strings)
{

View File

@ -17,7 +17,6 @@ namespace DB
}
}
static thread_local void * stack_address = nullptr;
static thread_local size_t max_stack_size = 0;
@ -27,7 +26,7 @@ void checkStackSize()
if (!stack_address)
{
#if defined(__APPLE__)
#if defined(OS_DARWIN)
// pthread_get_stacksize_np() returns a value too low for the main thread on
// OSX 10.9, http://mail.openjdk.java.net/pipermail/hotspot-dev/2013-October/011369.html
//
@ -36,23 +35,25 @@ void checkStackSize()
// Stack size for the main thread is 8MB on OSX excluding the guard page size.
pthread_t thread = pthread_self();
max_stack_size = pthread_main_np() ? (8 * 1024 * 1024) : pthread_get_stacksize_np(thread);
stack_address = pthread_get_stackaddr_np(thread);
// stack_address points to the start of the stack, not the end how it's returned by pthread_get_stackaddr_np
stack_address = reinterpret_cast<void*>(reinterpret_cast<uintptr_t>(pthread_get_stackaddr_np(thread)) - max_stack_size);
#else
pthread_attr_t attr;
#if defined(__FreeBSD__)
# if defined(__FreeBSD__)
pthread_attr_init(&attr);
if (0 != pthread_attr_get_np(pthread_self(), &attr))
throwFromErrno("Cannot pthread_attr_get_np", ErrorCodes::CANNOT_PTHREAD_ATTR);
#else
# else
if (0 != pthread_getattr_np(pthread_self(), &attr))
throwFromErrno("Cannot pthread_getattr_np", ErrorCodes::CANNOT_PTHREAD_ATTR);
#endif
# endif
SCOPE_EXIT({ pthread_attr_destroy(&attr); });
if (0 != pthread_attr_getstack(&attr, &stack_address, &max_stack_size))
throwFromErrno("Cannot pthread_getattr_np", ErrorCodes::CANNOT_PTHREAD_ATTR);
#endif
#endif // OS_DARWIN
}
const void * frame_address = __builtin_frame_address(0);

View File

@ -1,7 +1,7 @@
#if defined(__MACH__)
#include <stdlib.h>
#else
#if defined(OS_LINUX)
#include <malloc.h>
#elif defined(OS_DARWIN)
#include <malloc/malloc.h>
#endif
#include <new>

View File

@ -36,7 +36,7 @@ target_include_directories (simple_cache PRIVATE ${DBMS_INCLUDE_DIR})
target_link_libraries (simple_cache PRIVATE common)
add_executable (compact_array compact_array.cpp)
target_link_libraries (compact_array PRIVATE clickhouse_common_io stdc++fs)
target_link_libraries (compact_array PRIVATE clickhouse_common_io)
add_executable (radix_sort radix_sort.cpp)
target_link_libraries (radix_sort PRIVATE clickhouse_common_io)

View File

@ -12,11 +12,11 @@ NO_INLINE const void * getAddress()
return __builtin_return_address(0);
}
using namespace DB;
int main(int argc, char ** argv)
{
#ifdef __ELF__
using namespace DB;
if (argc < 2)
{
std::cerr << "Usage: ./symbol_index address\n";

View File

@ -219,11 +219,14 @@ size_t Block::getPositionByName(const std::string & name) const
}
void Block::checkNumberOfRows() const
void Block::checkNumberOfRows(bool allow_null_columns) const
{
ssize_t rows = -1;
for (const auto & elem : data)
{
if (!elem.column && allow_null_columns)
continue;
if (!elem.column)
throw Exception("Column " + elem.name + " in block is nullptr, in method checkNumberOfRows."
, ErrorCodes::SIZES_OF_COLUMNS_DOESNT_MATCH);

View File

@ -90,7 +90,7 @@ public:
size_t columns() const { return data.size(); }
/// Checks that every column in block is not nullptr and has same number of elements.
void checkNumberOfRows() const;
void checkNumberOfRows(bool allow_null_columns = false) const;
/// Approximate number of bytes in memory - for profiling and limits.
size_t bytes() const;

View File

@ -1,12 +1,14 @@
#include <Common/config.h>
#if USE_POCO_NETSSL
#include "MySQLProtocol.h"
#if USE_SSL
#include <IO/WriteBuffer.h>
#include <IO/ReadBufferFromString.h>
#include <IO/WriteBufferFromString.h>
#include <common/logger_useful.h>
#include <random>
#include <sstream>
#include "MySQLProtocol.h"
namespace DB::MySQLProtocol
@ -102,4 +104,5 @@ size_t getLengthEncodedStringSize(const String & s)
}
}
#endif
#endif // USE_SSL

View File

@ -1,6 +1,8 @@
#pragma once
#include <Common/config.h>
#if USE_POCO_NETSSL
#include "config_core.h"
#if USE_SSL
#include <ext/scope_guard.h>
#include <openssl/pem.h>
@ -1077,4 +1079,5 @@ private:
}
}
#endif
#endif // USE_SSL

View File

@ -170,6 +170,8 @@ struct Settings : public SettingsCollection<Settings>
\
M(SettingBool, add_http_cors_header, false, "Write add http CORS header.") \
\
M(SettingUInt64, max_http_get_redirects, 0, "Max number of http GET redirects hops allowed. Make sure additional security measures are in place to prevent a malicious server to redirect your requests to unexpected services.") \
\
M(SettingBool, input_format_skip_unknown_fields, false, "Skip columns with unknown names from input data (it works for JSONEachRow, CSVWithNames, TSVWithNames and TSKV formats).") \
M(SettingBool, input_format_with_names_use_header, false, "For TSVWithNames and CSVWithNames input formats this controls whether format parser is to assume that column data appear in the input exactly as they are specified in the header.") \
M(SettingBool, input_format_import_nested_json, false, "Map nested JSON data to nested tables (it works for JSONEachRow format).") \

View File

@ -261,7 +261,9 @@ void registerOutputFormatProcessorXML(FormatFactory & factory);
void registerOutputFormatProcessorODBCDriver(FormatFactory & factory);
void registerOutputFormatProcessorODBCDriver2(FormatFactory & factory);
void registerOutputFormatProcessorNull(FormatFactory & factory);
#if USE_SSL
void registerOutputFormatProcessorMySQLWrite(FormatFactory & factory);
#endif
/// Input only formats.
void registerInputFormatProcessorCapnProto(FormatFactory & factory);
@ -309,7 +311,7 @@ FormatFactory::FormatFactory()
registerOutputFormatProcessorODBCDriver(*this);
registerOutputFormatProcessorODBCDriver2(*this);
registerOutputFormatProcessorNull(*this);
#if USE_POCO_NETSSL
#if USE_SSL
registerOutputFormatProcessorMySQLWrite(*this);
#endif
}

View File

@ -946,9 +946,10 @@ public:
{
WhichDataType which(arguments[0]);
if (!which.isStringOrFixedString()
&& !which.isDateOrDateTime()
&& !which.isUInt())
if (!which.isStringOrFixedString() &&
!which.isDateOrDateTime() &&
!which.isUInt() &&
!which.isFloat())
throw Exception("Illegal type " + arguments[0]->getName() + " of argument of function " + getName(),
ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
@ -1021,6 +1022,45 @@ public:
}
}
template <typename T>
bool tryExecuteFloat(const IColumn * col, ColumnPtr & col_res)
{
const ColumnVector<T> * col_vec = checkAndGetColumn<ColumnVector<T>>(col);
static constexpr size_t FLOAT_HEX_LENGTH = sizeof(T) * 2 + 1; /// Including trailing zero byte.
if (col_vec)
{
auto col_str = ColumnString::create();
ColumnString::Chars & out_vec = col_str->getChars();
ColumnString::Offsets & out_offsets = col_str->getOffsets();
const typename ColumnVector<T>::Container & in_vec = col_vec->getData();
size_t size = in_vec.size();
out_offsets.resize(size);
out_vec.resize(size * FLOAT_HEX_LENGTH);
size_t pos = 0;
char * out = reinterpret_cast<char *>(&out_vec[0]);
for (size_t i = 0; i < size; ++i)
{
const UInt8 * in_pos = reinterpret_cast<const UInt8 *>(&in_vec[i]);
executeOneString(in_pos, in_pos + sizeof(T), out);
pos += FLOAT_HEX_LENGTH;
out_offsets[i] = pos;
}
col_res = std::move(col_str);
return true;
}
else
{
return false;
}
}
void executeOneString(const UInt8 * pos, const UInt8 * end, char *& out)
{
while (pos < end)
@ -1135,7 +1175,9 @@ public:
tryExecuteUInt<UInt32>(column, res_column) ||
tryExecuteUInt<UInt64>(column, res_column) ||
tryExecuteString(column, res_column) ||
tryExecuteFixedString(column, res_column))
tryExecuteFixedString(column, res_column) ||
tryExecuteFloat<Float32>(column, res_column) ||
tryExecuteFloat<Float64>(column, res_column))
return;
throw Exception("Illegal column " + block.getByPosition(arguments[0]).column->getName()

View File

@ -337,19 +337,43 @@ static ColumnPtr replaceLowCardinalityColumnsByNestedAndGetDictionaryIndexes(
size_t num_rows = input_rows_count;
ColumnPtr indexes;
/// Find first LowCardinality column and replace it to nested dictionary.
for (auto arg : args)
{
ColumnWithTypeAndName & column = block.getByPosition(arg);
if (auto * low_cardinality_column = checkAndGetColumn<ColumnLowCardinality>(column.column.get()))
{
/// Single LowCardinality column is supported now.
if (indexes)
throw Exception("Expected single dictionary argument for function.", ErrorCodes::LOGICAL_ERROR);
indexes = low_cardinality_column->getIndexesPtr();
num_rows = low_cardinality_column->getDictionary().size();
auto * low_cardinality_type = checkAndGetDataType<DataTypeLowCardinality>(column.type.get());
if (!low_cardinality_type)
throw Exception("Incompatible type for low cardinality column: " + column.type->getName(),
ErrorCodes::LOGICAL_ERROR);
if (can_be_executed_on_default_arguments)
{
/// Normal case, when function can be executed on values's default.
column.column = low_cardinality_column->getDictionary().getNestedColumn();
indexes = low_cardinality_column->getIndexesPtr();
}
else
{
/// Special case when default value can't be used. Example: 1 % LowCardinality(Int).
/// LowCardinality always contains default, so 1 % 0 will throw exception in normal case.
auto dict_encoded = low_cardinality_column->getMinimalDictionaryEncodedColumn(0, low_cardinality_column->size());
column.column = dict_encoded.dictionary;
indexes = dict_encoded.indexes;
}
num_rows = column.column->size();
column.type = low_cardinality_type->getDictionaryType();
}
}
/// Change size of constants.
for (auto arg : args)
{
ColumnWithTypeAndName & column = block.getByPosition(arg);
@ -358,26 +382,12 @@ static ColumnPtr replaceLowCardinalityColumnsByNestedAndGetDictionaryIndexes(
column.column = column_const->removeLowCardinality()->cloneResized(num_rows);
column.type = removeLowCardinality(column.type);
}
else if (auto * low_cardinality_column = checkAndGetColumn<ColumnLowCardinality>(column.column.get()))
{
auto * low_cardinality_type = checkAndGetDataType<DataTypeLowCardinality>(column.type.get());
if (!low_cardinality_type)
throw Exception("Incompatible type for low cardinality column: " + column.type->getName(),
ErrorCodes::LOGICAL_ERROR);
if (can_be_executed_on_default_arguments)
column.column = low_cardinality_column->getDictionary().getNestedColumn();
else
{
auto dict_encoded = low_cardinality_column->getMinimalDictionaryEncodedColumn(0, low_cardinality_column->size());
column.column = dict_encoded.dictionary;
indexes = dict_encoded.indexes;
}
column.type = low_cardinality_type->getDictionaryType();
}
}
#ifndef NDEBUG
block.checkNumberOfRows(true);
#endif
return indexes;
}

View File

@ -1,9 +1,11 @@
#include <Core/Defines.h>
namespace DB
{
class FunctionFactory;
#ifdef __ELF__
#if defined(OS_LINUX)
void registerFunctionAddressToSymbol(FunctionFactory & factory);
void registerFunctionAddressToLine(FunctionFactory & factory);
#endif
@ -12,7 +14,7 @@ void registerFunctionTrap(FunctionFactory & factory);
void registerFunctionsIntrospection(FunctionFactory & factory)
{
#ifdef __ELF__
#if defined(OS_LINUX)
registerFunctionAddressToSymbol(factory);
registerFunctionAddressToLine(factory);
#endif
@ -21,4 +23,3 @@ void registerFunctionsIntrospection(FunctionFactory & factory)
}
}

View File

@ -4,6 +4,7 @@ namespace DB
{
class FunctionFactory;
void registerFunctionRepeat(FunctionFactory &);
void registerFunctionEmpty(FunctionFactory &);
void registerFunctionNotEmpty(FunctionFactory &);
void registerFunctionLength(FunctionFactory &);
@ -34,6 +35,7 @@ void registerFunctionTryBase64Decode(FunctionFactory &);
void registerFunctionsString(FunctionFactory & factory)
{
registerFunctionRepeat(factory);
registerFunctionEmpty(factory);
registerFunctionNotEmpty(factory);
registerFunctionLength(factory);

View File

@ -0,0 +1,212 @@
#include <Columns/ColumnString.h>
#include <Columns/ColumnVector.h>
#include <DataTypes/DataTypeString.h>
#include <DataTypes/DataTypesNumber.h>
#include <Functions/FunctionFactory.h>
#include <Functions/FunctionHelpers.h>
#include <Functions/IFunction.h>
#include <Functions/castTypeToEither.h>
namespace DB
{
namespace ErrorCodes
{
extern const int ILLEGAL_COLUMN;
extern const int ILLEGAL_TYPE_OF_ARGUMENT;
extern const int TOO_LARGE_STRING_SIZE;
}
struct RepeatImpl
{
/// Safety threshold against DoS.
static inline void checkRepeatTime(UInt64 repeat_time)
{
static constexpr UInt64 max_repeat_times = 1000000;
if (repeat_time > max_repeat_times)
throw Exception("Too many times to repeat (" + std::to_string(repeat_time) + "), maximum is: " + std::to_string(max_repeat_times),
ErrorCodes::TOO_LARGE_STRING_SIZE);
}
static void vectorStrConstRepeat(
const ColumnString::Chars & data,
const ColumnString::Offsets & offsets,
ColumnString::Chars & res_data,
ColumnString::Offsets & res_offsets,
UInt64 repeat_time)
{
checkRepeatTime(repeat_time);
UInt64 data_size = 0;
res_offsets.assign(offsets);
for (UInt64 i = 0; i < offsets.size(); ++i)
{
data_size += (offsets[i] - offsets[i - 1] - 1) * repeat_time + 1; /// Note that accessing -1th element is valid for PaddedPODArray.
res_offsets[i] = data_size;
}
res_data.resize(data_size);
for (UInt64 i = 0; i < res_offsets.size(); ++i)
{
process(data.data() + offsets[i - 1], res_data.data() + res_offsets[i - 1], offsets[i] - offsets[i - 1], repeat_time);
}
}
template <typename T>
static void vectorStrVectorRepeat(
const ColumnString::Chars & data,
const ColumnString::Offsets & offsets,
ColumnString::Chars & res_data,
ColumnString::Offsets & res_offsets,
const PaddedPODArray<T> & col_num)
{
UInt64 data_size = 0;
res_offsets.assign(offsets);
for (UInt64 i = 0; i < col_num.size(); ++i)
{
data_size += (offsets[i] - offsets[i - 1] - 1) * col_num[i] + 1;
res_offsets[i] = data_size;
}
res_data.resize(data_size);
for (UInt64 i = 0; i < col_num.size(); ++i)
{
T repeat_time = col_num[i];
checkRepeatTime(repeat_time);
process(data.data() + offsets[i - 1], res_data.data() + res_offsets[i - 1], offsets[i] - offsets[i - 1], repeat_time);
}
}
template <typename T>
static void constStrVectorRepeat(
const StringRef & copy_str,
ColumnString::Chars & res_data,
ColumnString::Offsets & res_offsets,
const PaddedPODArray<T> & col_num)
{
UInt64 data_size = 0;
res_offsets.resize(col_num.size());
UInt64 str_size = copy_str.size;
UInt64 col_size = col_num.size();
for (UInt64 i = 0; i < col_size; ++i)
{
data_size += str_size * col_num[i] + 1;
res_offsets[i] = data_size;
}
res_data.resize(data_size);
for (UInt64 i = 0; i < col_size; ++i)
{
T repeat_time = col_num[i];
checkRepeatTime(repeat_time);
process(
reinterpret_cast<UInt8 *>(const_cast<char *>(copy_str.data)),
res_data.data() + res_offsets[i - 1],
str_size + 1,
repeat_time);
}
}
private:
static void process(const UInt8 * src, UInt8 * dst, UInt64 size, UInt64 repeat_time)
{
for (UInt64 i = 0; i < repeat_time; ++i)
{
memcpy(dst, src, size - 1);
dst += size - 1;
}
*dst = 0;
}
};
class FunctionRepeat : public IFunction
{
template <typename F>
static bool castType(const IDataType * type, F && f)
{
return castTypeToEither<DataTypeUInt8, DataTypeUInt16, DataTypeUInt32, DataTypeUInt64>(type, std::forward<F>(f));
}
public:
static constexpr auto name = "repeat";
static FunctionPtr create(const Context &) { return std::make_shared<FunctionRepeat>(); }
String getName() const override { return name; }
size_t getNumberOfArguments() const override { return 2; }
DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override
{
if (!isString(arguments[0]))
throw Exception(
"Illegal type " + arguments[0]->getName() + " of argument of function " + getName(), ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
if (!isUnsignedInteger(arguments[1]))
throw Exception(
"Illegal type " + arguments[1]->getName() + " of argument of function " + getName(), ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
return arguments[0];
}
bool useDefaultImplementationForConstants() const override { return true; }
void executeImpl(Block & block, const ColumnNumbers & arguments, size_t result, size_t) override
{
const auto & strcolumn = block.getByPosition(arguments[0]).column;
const auto & numcolumn = block.getByPosition(arguments[1]).column;
if (const ColumnString * col = checkAndGetColumn<ColumnString>(strcolumn.get()))
{
if (const ColumnConst * scale_column_num = checkAndGetColumn<ColumnConst>(numcolumn.get()))
{
UInt64 repeat_time = scale_column_num->getValue<UInt64>();
auto col_res = ColumnString::create();
RepeatImpl::vectorStrConstRepeat(col->getChars(), col->getOffsets(), col_res->getChars(), col_res->getOffsets(), repeat_time);
block.getByPosition(result).column = std::move(col_res);
return;
}
else if (castType(block.getByPosition(arguments[1]).type.get(), [&](const auto & type)
{
using DataType = std::decay_t<decltype(type)>;
using T = typename DataType::FieldType;
const ColumnVector<T> * colnum = checkAndGetColumn<ColumnVector<T>>(numcolumn.get());
auto col_res = ColumnString::create();
RepeatImpl::vectorStrVectorRepeat(col->getChars(), col->getOffsets(), col_res->getChars(), col_res->getOffsets(), colnum->getData());
block.getByPosition(result).column = std::move(col_res);
return true;
}))
{
return;
}
}
else if (const ColumnConst * col_const = checkAndGetColumn<ColumnConst>(strcolumn.get()))
{
/// Note that const-const case is handled by useDefaultImplementationForConstants.
StringRef copy_str = col_const->getDataColumn().getDataAt(0);
if (castType(block.getByPosition(arguments[1]).type.get(), [&](const auto & type)
{
using DataType = std::decay_t<decltype(type)>;
using T = typename DataType::FieldType;
const ColumnVector<T> * colnum = checkAndGetColumn<ColumnVector<T>>(numcolumn.get());
auto col_res = ColumnString::create();
RepeatImpl::constStrVectorRepeat(copy_str, col_res->getChars(), col_res->getOffsets(), colnum->getData());
block.getByPosition(result).column = std::move(col_res);
return true;
}))
{
return;
}
}
throw Exception(
"Illegal column " + block.getByPosition(arguments[0]).column->getName() + " of argument of function " + getName(),
ErrorCodes::ILLEGAL_COLUMN);
}
};
void registerFunctionRepeat(FunctionFactory & factory)
{
factory.registerFunction<FunctionRepeat>();
}
}

View File

@ -17,8 +17,8 @@ HDFSBuilderPtr createHDFSBuilder(const std::string & uri_str)
const Poco::URI uri(uri_str);
auto & host = uri.getHost();
auto port = uri.getPort();
auto & path = uri.getPath();
if (host.empty() || path.empty())
const std::string path = "//";
if (host.empty())
throw Exception("Illegal HDFS URI: " + uri.toString(), ErrorCodes::BAD_ARGUMENTS);
HDFSBuilderPtr builder(hdfsNewBuilder());

View File

@ -40,6 +40,7 @@ namespace ErrorCodes
extern const int RECEIVED_ERROR_TOO_MANY_REQUESTS;
extern const int FEATURE_IS_NOT_ENABLED_AT_BUILD_TIME;
extern const int UNSUPPORTED_URI_SCHEME;
extern const int TOO_MANY_REDIRECTS;
}
@ -216,20 +217,21 @@ PooledHTTPSessionPtr makePooledHTTPSession(const Poco::URI & uri, const Connecti
return HTTPSessionPool::instance().getSession(uri, timeouts, per_endpoint_pool_size);
}
bool isRedirect(const Poco::Net::HTTPResponse::HTTPStatus status) { return status == Poco::Net::HTTPResponse::HTTP_MOVED_PERMANENTLY || status == Poco::Net::HTTPResponse::HTTP_FOUND || status == Poco::Net::HTTPResponse::HTTP_SEE_OTHER || status == Poco::Net::HTTPResponse::HTTP_TEMPORARY_REDIRECT; }
std::istream * receiveResponse(
Poco::Net::HTTPClientSession & session, const Poco::Net::HTTPRequest & request, Poco::Net::HTTPResponse & response)
Poco::Net::HTTPClientSession & session, const Poco::Net::HTTPRequest & request, Poco::Net::HTTPResponse & response, const bool allow_redirects)
{
auto & istr = session.receiveResponse(response);
assertResponseIsOk(request, response, istr);
assertResponseIsOk(request, response, istr, allow_redirects);
return &istr;
}
void assertResponseIsOk(const Poco::Net::HTTPRequest & request, Poco::Net::HTTPResponse & response, std::istream & istr)
void assertResponseIsOk(const Poco::Net::HTTPRequest & request, Poco::Net::HTTPResponse & response, std::istream & istr, const bool allow_redirects)
{
auto status = response.getStatus();
if (status != Poco::Net::HTTPResponse::HTTP_OK)
if (!(status == Poco::Net::HTTPResponse::HTTP_OK || (isRedirect(status) && allow_redirects)))
{
std::stringstream error_message;
error_message << "Received error from remote server " << request.getURI() << ". HTTP status code: " << status << " "

View File

@ -9,7 +9,7 @@
#include <Poco/Net/HTTPResponse.h>
#include <Poco/URI.h>
#include <Common/PoolBase.h>
#include <Poco/URIStreamFactory.h>
#include <IO/ConnectionTimeouts.h>
@ -50,13 +50,14 @@ HTTPSessionPtr makeHTTPSession(const Poco::URI & uri, const ConnectionTimeouts &
/// As previous method creates session, but tooks it from pool
PooledHTTPSessionPtr makePooledHTTPSession(const Poco::URI & uri, const ConnectionTimeouts & timeouts, size_t per_endpoint_pool_size);
bool isRedirect(const Poco::Net::HTTPResponse::HTTPStatus status);
/** Used to receive response (response headers and possibly body)
* after sending data (request headers and possibly body).
* Throws exception in case of non HTTP_OK (200) response code.
* Returned istream lives in 'session' object.
*/
std::istream * receiveResponse(
Poco::Net::HTTPClientSession & session, const Poco::Net::HTTPRequest & request, Poco::Net::HTTPResponse & response);
void assertResponseIsOk(const Poco::Net::HTTPRequest & request, Poco::Net::HTTPResponse & response, std::istream & istr);
Poco::Net::HTTPClientSession & session, const Poco::Net::HTTPRequest & request, Poco::Net::HTTPResponse & response, bool allow_redirects);
void assertResponseIsOk(const Poco::Net::HTTPRequest & request, Poco::Net::HTTPResponse & response, std::istream & istr, const bool allow_redirects = false);
}

View File

@ -16,6 +16,7 @@
#include <Common/DNSResolver.h>
#include <Common/config.h>
#include <common/logger_useful.h>
#include <Poco/URIStreamFactory.h>
#define DEFAULT_HTTP_READ_BUFFER_TIMEOUT 1800
@ -26,41 +27,84 @@ namespace DB
/** Perform HTTP POST request and provide response to read.
*/
namespace ErrorCodes
{
extern const int TOO_MANY_REDIRECTS;
}
template <typename SessionPtr>
class UpdatableSessionBase
{
protected:
SessionPtr session;
UInt64 redirects { 0 };
Poco::URI initial_uri;
const ConnectionTimeouts & timeouts;
DB::SettingUInt64 max_redirects;
public:
virtual void buildNewSession(const Poco::URI & uri) = 0;
explicit UpdatableSessionBase(const Poco::URI uri,
const ConnectionTimeouts & timeouts_,
SettingUInt64 max_redirects_)
: initial_uri { uri }
, timeouts { timeouts_ }
, max_redirects { max_redirects_ }
{
}
SessionPtr getSession()
{
return session;
}
void updateSession(const Poco::URI & uri)
{
++redirects;
if (redirects <= max_redirects)
{
buildNewSession(uri);
}
else
{
std::stringstream error_message;
error_message << "Too many redirects while trying to access " << initial_uri.toString();
throw Exception(error_message.str(), ErrorCodes::TOO_MANY_REDIRECTS);
}
}
virtual ~UpdatableSessionBase()
{
}
};
namespace detail
{
template <typename SessionPtr>
template <typename UpdatableSessionPtr>
class ReadWriteBufferFromHTTPBase : public ReadBuffer
{
protected:
Poco::URI uri;
std::string method;
SessionPtr session;
UpdatableSessionPtr session;
std::istream * istr; /// owned by session
std::unique_ptr<ReadBuffer> impl;
std::function<void(std::ostream &)> out_stream_callback;
const Poco::Net::HTTPBasicCredentials & credentials;
std::vector<Poco::Net::HTTPCookie> cookies;
public:
using OutStreamCallback = std::function<void(std::ostream &)>;
explicit ReadWriteBufferFromHTTPBase(
SessionPtr session_,
Poco::URI uri_,
const std::string & method_ = {},
OutStreamCallback out_stream_callback = {},
const Poco::Net::HTTPBasicCredentials & credentials = {},
size_t buffer_size_ = DBMS_DEFAULT_BUFFER_SIZE)
: ReadBuffer(nullptr, 0)
, uri{uri_}
, method{!method_.empty() ? method_ : out_stream_callback ? Poco::Net::HTTPRequest::HTTP_POST : Poco::Net::HTTPRequest::HTTP_GET}
, session{session_}
std::istream * call(const Poco::URI uri_, Poco::Net::HTTPResponse & response)
{
// With empty path poco will send "POST HTTP/1.1" its bug.
if (uri.getPath().empty())
uri.setPath("/");
Poco::Net::HTTPRequest request(method, uri.getPathAndQuery(), Poco::Net::HTTPRequest::HTTP_1_1);
request.setHost(uri.getHost()); // use original, not resolved host name in header
Poco::Net::HTTPRequest request(method, uri_.getPathAndQuery(), Poco::Net::HTTPRequest::HTTP_1_1);
request.setHost(uri_.getHost()); // use original, not resolved host name in header
if (out_stream_callback)
request.setChunkedTransferEncoding(true);
@ -68,27 +112,71 @@ namespace detail
if (!credentials.getUsername().empty())
credentials.authenticate(request);
Poco::Net::HTTPResponse response;
LOG_TRACE((&Logger::get("ReadWriteBufferFromHTTP")), "Sending request to " << uri.toString());
auto sess = session->getSession();
try
{
auto & stream_out = session->sendRequest(request);
auto & stream_out = sess->sendRequest(request);
if (out_stream_callback)
out_stream_callback(stream_out);
istr = receiveResponse(*session, request, response);
istr = receiveResponse(*sess, request, response, true);
response.getCookies(cookies);
return istr;
}
catch (const Poco::Exception & e)
{
/// We use session data storage as storage for exception text
/// Depend on it we can deduce to reconnect session or reresolve session host
sess->attachSessionData(e.message());
throw;
}
}
public:
using OutStreamCallback = std::function<void(std::ostream &)>;
explicit ReadWriteBufferFromHTTPBase(UpdatableSessionPtr session_,
Poco::URI uri_,
const std::string & method_ = {},
OutStreamCallback out_stream_callback_ = {},
const Poco::Net::HTTPBasicCredentials & credentials_ = {},
size_t buffer_size_ = DBMS_DEFAULT_BUFFER_SIZE)
: ReadBuffer(nullptr, 0)
, uri {uri_}
, method {!method_.empty() ? method_ : out_stream_callback_ ? Poco::Net::HTTPRequest::HTTP_POST : Poco::Net::HTTPRequest::HTTP_GET}
, session {session_}
, out_stream_callback {out_stream_callback_}
, credentials {credentials_}
{
Poco::Net::HTTPResponse response;
istr = call(uri, response);
while (isRedirect(response.getStatus()))
{
Poco::URI uri_redirect(response.get("Location"));
session->updateSession(uri_redirect);
istr = call(uri_redirect,response);
}
try
{
impl = std::make_unique<ReadBufferFromIStream>(*istr, buffer_size_);
}
catch (const Poco::Exception & e)
{
/// We use session data storage as storage for exception text
/// Depend on it we can deduce to reconnect session or reresolve session host
session->attachSessionData(e.message());
auto sess = session->getSession();
sess->attachSessionData(e.message());
throw;
}
}
@ -112,42 +200,88 @@ namespace detail
};
}
class ReadWriteBufferFromHTTP : public detail::ReadWriteBufferFromHTTPBase<HTTPSessionPtr>
class UpdatableSession : public UpdatableSessionBase<HTTPSessionPtr>
{
using Parent = detail::ReadWriteBufferFromHTTPBase<HTTPSessionPtr>;
using Parent = UpdatableSessionBase<HTTPSessionPtr>;
public:
explicit UpdatableSession(const Poco::URI uri,
const ConnectionTimeouts & timeouts_,
const SettingUInt64 max_redirects_)
: Parent(uri, timeouts_, max_redirects_)
{
session = makeHTTPSession(initial_uri, timeouts);
}
void buildNewSession(const Poco::URI & uri) override
{
session = makeHTTPSession(uri, timeouts);
}
};
class ReadWriteBufferFromHTTP : public detail::ReadWriteBufferFromHTTPBase<std::shared_ptr<UpdatableSession>>
{
using Parent = detail::ReadWriteBufferFromHTTPBase<std::shared_ptr<UpdatableSession>>;
public:
explicit ReadWriteBufferFromHTTP(Poco::URI uri_,
const std::string & method_ = {},
OutStreamCallback out_stream_callback = {},
OutStreamCallback out_stream_callback_ = {},
const ConnectionTimeouts & timeouts = {},
const Poco::Net::HTTPBasicCredentials & credentials = {},
const DB::SettingUInt64 max_redirects = 0,
const Poco::Net::HTTPBasicCredentials & credentials_ = {},
size_t buffer_size_ = DBMS_DEFAULT_BUFFER_SIZE)
: Parent(makeHTTPSession(uri_, timeouts), uri_, method_, out_stream_callback, credentials, buffer_size_)
: Parent(std::make_shared<UpdatableSession>(uri_, timeouts, max_redirects), uri_, method_, out_stream_callback_, credentials_, buffer_size_)
{
}
};
class PooledReadWriteBufferFromHTTP : public detail::ReadWriteBufferFromHTTPBase<PooledHTTPSessionPtr>
class UpdatablePooledSession : public UpdatableSessionBase<PooledHTTPSessionPtr>
{
using Parent = detail::ReadWriteBufferFromHTTPBase<PooledHTTPSessionPtr>;
using Parent = UpdatableSessionBase<PooledHTTPSessionPtr>;
private:
size_t per_endpoint_pool_size;
public:
explicit UpdatablePooledSession(const Poco::URI uri,
const ConnectionTimeouts & timeouts_,
const SettingUInt64 max_redirects_,
size_t per_endpoint_pool_size_)
: Parent(uri, timeouts_, max_redirects_)
, per_endpoint_pool_size { per_endpoint_pool_size_ }
{
session = makePooledHTTPSession(initial_uri, timeouts, per_endpoint_pool_size);
}
void buildNewSession(const Poco::URI & uri) override
{
session = makePooledHTTPSession(uri, timeouts, per_endpoint_pool_size);
}
};
class PooledReadWriteBufferFromHTTP : public detail::ReadWriteBufferFromHTTPBase<std::shared_ptr<UpdatablePooledSession>>
{
using Parent = detail::ReadWriteBufferFromHTTPBase<std::shared_ptr<UpdatablePooledSession>>;
public:
explicit PooledReadWriteBufferFromHTTP(Poco::URI uri_,
const std::string & method_ = {},
OutStreamCallback out_stream_callback = {},
const ConnectionTimeouts & timeouts = {},
const Poco::Net::HTTPBasicCredentials & credentials = {},
OutStreamCallback out_stream_callback_ = {},
const ConnectionTimeouts & timeouts_ = {},
const Poco::Net::HTTPBasicCredentials & credentials_ = {},
size_t buffer_size_ = DBMS_DEFAULT_BUFFER_SIZE,
const DB::SettingUInt64 max_redirects = 0,
size_t max_connections_per_endpoint = DEFAULT_COUNT_OF_HTTP_CONNECTIONS_PER_ENDPOINT)
: Parent(makePooledHTTPSession(uri_, timeouts, max_connections_per_endpoint),
: Parent(std::make_shared<UpdatablePooledSession>(uri_, timeouts_, max_redirects, max_connections_per_endpoint),
uri_,
method_,
out_stream_callback,
credentials,
out_stream_callback_,
credentials_,
buffer_size_)
{
}
};
}

View File

@ -15,6 +15,7 @@ namespace ErrorCodes
extern const int NETWORK_ERROR;
extern const int CANNOT_OPEN_FILE;
extern const int CANNOT_FSYNC;
extern const int BAD_ARGUMENTS;
}
@ -32,10 +33,12 @@ struct WriteBufferFromHDFS::WriteBufferFromHDFSImpl
{
const size_t begin_of_path = hdfs_uri.find('/', hdfs_uri.find("//") + 2);
const std::string path = hdfs_uri.substr(begin_of_path);
if (path.find("*?{") != std::string::npos)
if (path.find_first_of("*?{") != std::string::npos)
throw Exception("URI '" + hdfs_uri + "' contains globs, so the table is in readonly mode", ErrorCodes::CANNOT_OPEN_FILE);
fout = hdfsOpenFile(fs.get(), path.c_str(), O_WRONLY, 0, 0, 0);
if (!hdfsExists(fs.get(), path.c_str()))
throw Exception("File: " + path + " is already exists", ErrorCodes::BAD_ARGUMENTS);
fout = hdfsOpenFile(fs.get(), path.c_str(), O_WRONLY, 0, 0, 0); /// O_WRONLY meaning create or overwrite i.e., implies O_TRUNCAT here
if (fout == nullptr)
{

View File

@ -22,7 +22,7 @@ WriteBufferFromHTTP::WriteBufferFromHTTP(
void WriteBufferFromHTTP::finalize()
{
receiveResponse(*session, request, response);
receiveResponse(*session, request, response, false);
/// TODO: Response body is ignored.
}

View File

@ -15,6 +15,10 @@ namespace DB
{
const int DEFAULT_S3_MAX_FOLLOW_PUT_REDIRECT = 2;
// S3 protocol does not allow to have multipart upload with more than 10000 parts.
// In case server does not return an error on exceeding that number, we print a warning
// because custom S3 implementation may allow relaxed requirements on that.
const int S3_WARN_MAX_PARTS = 10000;
@ -166,7 +170,7 @@ void WriteBufferFromS3::writePart(const String & data)
if (part_tags.size() == S3_WARN_MAX_PARTS)
{
// Don't throw exception here by ourselves but leave the decision to take by S3 server.
LOG_WARNING(&Logger::get("WriteBufferFromS3"), "Maximum part number in S3 protocol has reached (too much parts). Server may not accept this whole upload.");
LOG_WARNING(&Logger::get("WriteBufferFromS3"), "Maximum part number in S3 protocol has reached (too many parts). Server may not accept this whole upload.");
}
for (int i = 0; i < DEFAULT_S3_MAX_FOLLOW_PUT_REDIRECT; ++i)

View File

@ -1,25 +1,16 @@
#pragma once
#include <functional>
#include <memory>
#include <vector>
#include <Core/Types.h>
#include <IO/ConnectionTimeouts.h>
#include <IO/HTTPCommon.h>
#include <IO/BufferWithOwnMemory.h>
#include <IO/ReadBuffer.h>
#include <IO/ReadBufferFromIStream.h>
#include <IO/WriteBuffer.h>
#include <IO/WriteBufferFromString.h>
#include <Poco/Net/HTTPBasicCredentials.h>
#include <Poco/Net/HTTPClientSession.h>
#include <Poco/Net/HTTPRequest.h>
#include <Poco/Net/HTTPResponse.h>
#include <Poco/URI.h>
#include <Poco/Version.h>
#include <Common/DNSResolver.h>
#include <Common/config.h>
#include <common/logger_useful.h>
namespace DB
@ -36,6 +27,9 @@ private:
String buffer_string;
std::unique_ptr<WriteBufferFromString> temporary_buffer;
size_t last_part_size;
/// Upload in S3 is made in parts.
/// We initiate upload, then upload each part and get ETag as a response, and then finish upload with listing all our parts.
String upload_id;
std::vector<String> part_tags;

View File

@ -67,4 +67,25 @@ void writeException(const Exception & e, WriteBuffer & buf, bool with_stack_trac
writeException(Exception(Exception::CreateFromPoco, *e.nested()), buf, with_stack_trace);
}
String backQuoteIfNeed(const String & x)
{
String res(x.size(), '\0');
{
WriteBufferFromString wb(res);
writeProbablyBackQuotedString(x, wb);
}
return res;
}
String backQuote(const String & x)
{
String res(x.size(), '\0');
{
WriteBufferFromString wb(res);
writeBackQuotedString(x, wb);
}
return res;
}
}

View File

@ -906,4 +906,10 @@ inline String toString(const T & x)
return buf.str();
}
/// Quote the identifier with backquotes, if required.
String backQuoteIfNeed(const String & x);
/// Quote the identifier with backquotes.
String backQuote(const String & x);
}

Some files were not shown because too many files have changed in this diff Show More