diff --git a/.gitmodules b/.gitmodules index a9e50ab8f6d..a7061ececc6 100644 --- a/.gitmodules +++ b/.gitmodules @@ -31,9 +31,6 @@ [submodule "contrib/ssl"] path = contrib/ssl url = https://github.com/ClickHouse-Extras/ssl.git -[submodule "contrib/boost"] - path = contrib/boost - url = https://github.com/ClickHouse-Extras/boost.git [submodule "contrib/llvm"] path = contrib/llvm url = https://github.com/ClickHouse-Extras/llvm @@ -46,6 +43,21 @@ [submodule "contrib/unixodbc"] path = contrib/unixodbc url = https://github.com/ClickHouse-Extras/UnixODBC.git +[submodule "contrib/protobuf"] + path = contrib/protobuf + url = https://github.com/ClickHouse-Extras/protobuf.git +[submodule "contrib/boost"] + path = contrib/boost + url = https://github.com/ClickHouse-Extras/boost-extra.git [submodule "contrib/base64"] path = contrib/base64 url = https://github.com/aklomp/base64.git +[submodule "contrib/libhdfs3"] + path = contrib/libhdfs3 + url = https://github.com/ClickHouse-Extras/libhdfs3.git +[submodule "contrib/libxml2"] + path = contrib/libxml2 + url = https://github.com/GNOME/libxml2.git +[submodule "contrib/libgsasl"] + path = contrib/libgsasl + url = https://github.com/ClickHouse-Extras/libgsasl.git diff --git a/CMakeLists.txt b/CMakeLists.txt index 3972cd66ca0..b9262c5c554 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -274,6 +274,9 @@ include (cmake/find_rdkafka.cmake) include (cmake/find_capnp.cmake) include (cmake/find_llvm.cmake) include (cmake/find_cpuid.cmake) +include (cmake/find_libgsasl.cmake) +include (cmake/find_libxml2.cmake) +include (cmake/find_hdfs3.cmake) include (cmake/find_consistent-hashing.cmake) include (cmake/find_base64.cmake) if (ENABLE_TESTS) diff --git a/LICENSE b/LICENSE index 75d3bda7798..1460730b3ed 100644 --- a/LICENSE +++ b/LICENSE @@ -1,4 +1,192 @@ -Copyright 2016-2018 YANDEX LLC + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright 2016-2018 Yandex LLC Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. diff --git a/cmake/find_hdfs3.cmake b/cmake/find_hdfs3.cmake new file mode 100644 index 00000000000..a6fdec20291 --- /dev/null +++ b/cmake/find_hdfs3.cmake @@ -0,0 +1,26 @@ +if (NOT ARCH_ARM AND NOT OS_FREEBSD AND NOT APPLE) + option (ENABLE_HDFS "Enable HDFS" ${NOT_UNBUNDLED}) +endif () + +if (ENABLE_HDFS AND NOT EXISTS "${ClickHouse_SOURCE_DIR}/contrib/libhdfs3/include/hdfs/hdfs.h") + message (WARNING "submodule contrib/libhdfs3 is missing. to fix try run: \n git submodule update --init --recursive") + set (ENABLE_HDFS 0) +endif () + +if (ENABLE_HDFS) +option (USE_INTERNAL_HDFS3_LIBRARY "Set to FALSE to use system HDFS3 instead of bundled" ON) + +if (NOT USE_INTERNAL_HDFS3_LIBRARY) + find_package(hdfs3) +endif () + +if (HDFS3_LIBRARY AND HDFS3_INCLUDE_DIR) +else () + set(HDFS3_INCLUDE_DIR "${ClickHouse_SOURCE_DIR}/contrib/libhdfs3/include") + set(HDFS3_LIBRARY hdfs3) +endif() +set (USE_HDFS 1) + +endif() + +message (STATUS "Using hdfs3: ${HDFS3_INCLUDE_DIR} : ${HDFS3_LIBRARY}") diff --git a/cmake/find_libgsasl.cmake b/cmake/find_libgsasl.cmake new file mode 100644 index 00000000000..b686bb1df7f --- /dev/null +++ b/cmake/find_libgsasl.cmake @@ -0,0 +1,22 @@ +if (NOT APPLE) + option (USE_INTERNAL_LIBGSASL_LIBRARY "Set to FALSE to use system libgsasl library instead of bundled" ${NOT_UNBUNDLED}) +endif () + +if (USE_INTERNAL_LIBGSASL_LIBRARY AND NOT EXISTS "${ClickHouse_SOURCE_DIR}/contrib/libgsasl/src/gsasl.h") + message (WARNING "submodule contrib/libgsasl is missing. to fix try run: \n git submodule update --init --recursive") + set (USE_INTERNAL_LIBGSASL_LIBRARY 0) +endif () + +if (NOT USE_INTERNAL_LIBGSASL_LIBRARY) + find_library (LIBGSASL_LIBRARY gsasl) + find_path (LIBGSASL_INCLUDE_DIR NAMES gsasl.h PATHS ${LIBGSASL_INCLUDE_PATHS}) +endif () + +if (LIBGSASL_LIBRARY AND LIBGSASL_INCLUDE_DIR) +else () + set (LIBGSASL_INCLUDE_DIR ${ClickHouse_SOURCE_DIR}/contrib/libgsasl/src ${ClickHouse_SOURCE_DIR}/contrib/libgsasl/linux_x86_64/include) + set (USE_INTERNAL_LIBGSASL_LIBRARY 1) + set (LIBGSASL_LIBRARY libgsasl) +endif () + +message (STATUS "Using libgsasl: ${LIBGSASL_INCLUDE_DIR} : ${LIBGSASL_LIBRARY}") diff --git a/cmake/find_libxml2.cmake b/cmake/find_libxml2.cmake new file mode 100644 index 00000000000..cfababfbf63 --- /dev/null +++ b/cmake/find_libxml2.cmake @@ -0,0 +1,20 @@ +option (USE_INTERNAL_LIBXML2_LIBRARY "Set to FALSE to use system libxml2 library instead of bundled" ${NOT_UNBUNDLED}) + +if (USE_INTERNAL_LIBXML2_LIBRARY AND NOT EXISTS "${ClickHouse_SOURCE_DIR}/contrib/libxml2/libxml.h") + message (WARNING "submodule contrib/libxml2 is missing. to fix try run: \n git submodule update --init --recursive") + set (USE_INTERNAL_LIBXML2_LIBRARY 0) +endif () + +if (NOT USE_INTERNAL_LIBXML2_LIBRARY) + find_library (LIBXML2_LIBRARY libxml2) + find_path (LIBXML2_INCLUDE_DIR NAMES libxml.h PATHS ${LIBXML2_INCLUDE_PATHS}) +endif () + +if (LIBXML2_LIBRARY AND LIBXML2_INCLUDE_DIR) +else () + set (LIBXML2_INCLUDE_DIR ${ClickHouse_SOURCE_DIR}/contrib/libxml2/include ${ClickHouse_SOURCE_DIR}/contrib/libxml2-cmake/linux_x86_64/include) + set (USE_INTERNAL_LIBXML2_LIBRARY 1) + set (LIBXML2_LIBRARY libxml2) +endif () + +message (STATUS "Using libxml2: ${LIBXML2_INCLUDE_DIR} : ${LIBXML2_LIBRARY}") diff --git a/cmake/find_protobuf.cmake b/cmake/find_protobuf.cmake new file mode 100644 index 00000000000..5daf5a0c186 --- /dev/null +++ b/cmake/find_protobuf.cmake @@ -0,0 +1,80 @@ +option (USE_INTERNAL_PROTOBUF_LIBRARY "Set to FALSE to use system protobuf instead of bundled" ON) + +if (NOT USE_INTERNAL_PROTOBUF_LIBRARY) + find_package(Protobuf) +endif () + +if (Protobuf_LIBRARY AND Protobuf_INCLUDE_DIR) +else () + set(Protobuf_INCLUDE_DIR ${CMAKE_SOURCE_DIR}/contrib/protobuf/src) + + set(Protobuf_LIBRARY libprotobuf) + set(Protobuf_PROTOC_LIBRARY libprotoc) + set(Protobuf_LITE_LIBRARY libprotobuf-lite) + + set(Protobuf_PROTOC_EXECUTABLE ${CMAKE_BINARY_DIR}/contrib/protobuf/cmake/protoc) + + if(NOT DEFINED PROTOBUF_GENERATE_CPP_APPEND_PATH) + set(PROTOBUF_GENERATE_CPP_APPEND_PATH TRUE) + endif() + + function(PROTOBUF_GENERATE_CPP SRCS HDRS) + if(NOT ARGN) + message(SEND_ERROR "Error: PROTOBUF_GENERATE_CPP() called without any proto files") + return() + endif() + + if(PROTOBUF_GENERATE_CPP_APPEND_PATH) + # Create an include path for each file specified + foreach(FIL ${ARGN}) + get_filename_component(ABS_FIL ${FIL} ABSOLUTE) + get_filename_component(ABS_PATH ${ABS_FIL} PATH) + list(FIND _protobuf_include_path ${ABS_PATH} _contains_already) + if(${_contains_already} EQUAL -1) + list(APPEND _protobuf_include_path -I ${ABS_PATH}) + endif() + endforeach() + else() + set(_protobuf_include_path -I ${CMAKE_CURRENT_SOURCE_DIR}) + endif() + + if(DEFINED PROTOBUF_IMPORT_DIRS AND NOT DEFINED Protobuf_IMPORT_DIRS) + set(Protobuf_IMPORT_DIRS "${PROTOBUF_IMPORT_DIRS}") + endif() + + if(DEFINED Protobuf_IMPORT_DIRS) + foreach(DIR ${Protobuf_IMPORT_DIRS}) + get_filename_component(ABS_PATH ${DIR} ABSOLUTE) + list(FIND _protobuf_include_path ${ABS_PATH} _contains_already) + if(${_contains_already} EQUAL -1) + list(APPEND _protobuf_include_path -I ${ABS_PATH}) + endif() + endforeach() + endif() + + set(${SRCS}) + set(${HDRS}) + foreach(FIL ${ARGN}) + get_filename_component(ABS_FIL ${FIL} ABSOLUTE) + get_filename_component(FIL_WE ${FIL} NAME_WE) + + list(APPEND ${SRCS} "${CMAKE_CURRENT_BINARY_DIR}/${FIL_WE}.pb.cc") + list(APPEND ${HDRS} "${CMAKE_CURRENT_BINARY_DIR}/${FIL_WE}.pb.h") + + add_custom_command( + OUTPUT "${CMAKE_CURRENT_BINARY_DIR}/${FIL_WE}.pb.cc" + "${CMAKE_CURRENT_BINARY_DIR}/${FIL_WE}.pb.h" + COMMAND ${Protobuf_PROTOC_EXECUTABLE} + ARGS --cpp_out ${CMAKE_CURRENT_BINARY_DIR} ${_protobuf_include_path} ${ABS_FIL} + DEPENDS ${ABS_FIL} ${Protobuf_PROTOC_EXECUTABLE} + COMMENT "Running C++ protocol buffer compiler on ${FIL}" + VERBATIM ) + endforeach() + + set_source_files_properties(${${SRCS}} ${${HDRS}} PROPERTIES GENERATED TRUE) + set(${SRCS} ${${SRCS}} PARENT_SCOPE) + set(${HDRS} ${${HDRS}} PARENT_SCOPE) + endfunction() +endif() + +message (STATUS "Using protobuf: ${Protobuf_INCLUDE_DIR} : ${Protobuf_LIBRARY}") diff --git a/cmake/find_rdkafka.cmake b/cmake/find_rdkafka.cmake index a978ff026f7..1d2674ea1a3 100644 --- a/cmake/find_rdkafka.cmake +++ b/cmake/find_rdkafka.cmake @@ -4,7 +4,7 @@ endif () if (ENABLE_RDKAFKA) -if (OS_LINUX) +if (OS_LINUX AND NOT ARCH_ARM) option (USE_INTERNAL_RDKAFKA_LIBRARY "Set to FALSE to use system librdkafka instead of the bundled" ${NOT_UNBUNDLED}) endif () @@ -31,7 +31,7 @@ if (RDKAFKA_LIB AND RDKAFKA_INCLUDE_DIR) if (LZ4_LIBRARY) list (APPEND RDKAFKA_LIBRARY ${LZ4_LIBRARY}) endif () -elseif (NOT MISSING_INTERNAL_RDKAFKA_LIBRARY) +elseif (NOT MISSING_INTERNAL_RDKAFKA_LIBRARY AND NOT ARCH_ARM) set (USE_INTERNAL_RDKAFKA_LIBRARY 1) set (RDKAFKA_INCLUDE_DIR "${ClickHouse_SOURCE_DIR}/contrib/librdkafka/src") set (RDKAFKA_LIBRARY rdkafka) diff --git a/contrib/CMakeLists.txt b/contrib/CMakeLists.txt index efc975689c5..66173322659 100644 --- a/contrib/CMakeLists.txt +++ b/contrib/CMakeLists.txt @@ -110,6 +110,7 @@ if (USE_INTERNAL_SSL_LIBRARY) add_subdirectory (ssl) target_include_directories(${OPENSSL_CRYPTO_LIBRARY} SYSTEM PUBLIC ${OPENSSL_INCLUDE_DIR}) target_include_directories(${OPENSSL_SSL_LIBRARY} SYSTEM PUBLIC ${OPENSSL_INCLUDE_DIR}) + set (POCO_SKIP_OPENSSL_FIND 1) endif () if (ENABLE_MYSQL AND USE_INTERNAL_MYSQL_LIBRARY) @@ -192,6 +193,24 @@ if (USE_INTERNAL_LLVM_LIBRARY) add_subdirectory (llvm/llvm) endif () +if (USE_INTERNAL_LIBGSASL_LIBRARY) + add_subdirectory(libgsasl) +endif() + +if (USE_INTERNAL_LIBXML2_LIBRARY) + add_subdirectory(libxml2-cmake) +endif () + +if (USE_INTERNAL_HDFS3_LIBRARY) + include(${ClickHouse_SOURCE_DIR}/cmake/find_protobuf.cmake) + if (USE_INTERNAL_PROTOBUF_LIBRARY) + set(protobuf_BUILD_TESTS OFF CACHE INTERNAL "" FORCE) + set(protobuf_BUILD_SHARED_LIBS OFF CACHE INTERNAL "" FORCE) + add_subdirectory(protobuf/cmake) + endif () + add_subdirectory(libhdfs3-cmake) +endif () + if (USE_BASE64) add_subdirectory (base64-cmake) endif() diff --git a/contrib/boost b/contrib/boost index 2d5cb2c86f6..6883b40449f 160000 --- a/contrib/boost +++ b/contrib/boost @@ -1 +1 @@ -Subproject commit 2d5cb2c86f61126f4e1efe9ab97332efd44e7dea +Subproject commit 6883b40449f378019aec792f9983ce3afc7ff16e diff --git a/contrib/boost-cmake/CMakeLists.txt b/contrib/boost-cmake/CMakeLists.txt index 49d50036e4b..acb76ad0328 100644 --- a/contrib/boost-cmake/CMakeLists.txt +++ b/contrib/boost-cmake/CMakeLists.txt @@ -42,12 +42,17 @@ ${LIBRARY_DIR}/libs/filesystem/src/windows_file_codecvt.cpp) add_library(boost_system_internal ${LINK_MODE} ${LIBRARY_DIR}/libs/system/src/error_code.cpp) +add_library(boost_random_internal ${LINK_MODE} +${LIBRARY_DIR}/libs/random/src/random_device.cpp) + target_link_libraries (boost_filesystem_internal PUBLIC boost_system_internal) target_include_directories (boost_program_options_internal SYSTEM BEFORE PUBLIC ${Boost_INCLUDE_DIRS}) target_include_directories (boost_filesystem_internal SYSTEM BEFORE PUBLIC ${Boost_INCLUDE_DIRS}) target_include_directories (boost_system_internal SYSTEM BEFORE PUBLIC ${Boost_INCLUDE_DIRS}) +target_include_directories (boost_random_internal SYSTEM BEFORE PUBLIC ${Boost_INCLUDE_DIRS}) target_compile_definitions (boost_program_options_internal PUBLIC BOOST_SYSTEM_NO_DEPRECATED) target_compile_definitions (boost_filesystem_internal PUBLIC BOOST_SYSTEM_NO_DEPRECATED) target_compile_definitions (boost_system_internal PUBLIC BOOST_SYSTEM_NO_DEPRECATED) +target_compile_definitions (boost_random_internal PUBLIC BOOST_SYSTEM_NO_DEPRECATED) diff --git a/contrib/libgsasl b/contrib/libgsasl new file mode 160000 index 00000000000..3b8948a4042 --- /dev/null +++ b/contrib/libgsasl @@ -0,0 +1 @@ +Subproject commit 3b8948a4042e34fb00b4fb987535dc9e02e39040 diff --git a/contrib/libhdfs3 b/contrib/libhdfs3 new file mode 160000 index 00000000000..bd6505cbb0c --- /dev/null +++ b/contrib/libhdfs3 @@ -0,0 +1 @@ +Subproject commit bd6505cbb0c130b0db695305b9a38546fa880e5a diff --git a/contrib/libhdfs3-cmake/CMake/CMakeTestCompileNestedException.cpp b/contrib/libhdfs3-cmake/CMake/CMakeTestCompileNestedException.cpp new file mode 100644 index 00000000000..66918ca516e --- /dev/null +++ b/contrib/libhdfs3-cmake/CMake/CMakeTestCompileNestedException.cpp @@ -0,0 +1,10 @@ +#include +#include + +int main() { + try { + throw 2; + } catch (int) { + std::throw_with_nested(std::runtime_error("test")); + } +} diff --git a/contrib/libhdfs3-cmake/CMake/CMakeTestCompileSteadyClock.cpp b/contrib/libhdfs3-cmake/CMake/CMakeTestCompileSteadyClock.cpp new file mode 100644 index 00000000000..afcbe1b83b2 --- /dev/null +++ b/contrib/libhdfs3-cmake/CMake/CMakeTestCompileSteadyClock.cpp @@ -0,0 +1,7 @@ +#include + +using std::chrono::steady_clock; + +void foo(const steady_clock &clock) { + return; +} diff --git a/contrib/libhdfs3-cmake/CMake/CMakeTestCompileStrerror.cpp b/contrib/libhdfs3-cmake/CMake/CMakeTestCompileStrerror.cpp new file mode 100644 index 00000000000..0ef4eda583e --- /dev/null +++ b/contrib/libhdfs3-cmake/CMake/CMakeTestCompileStrerror.cpp @@ -0,0 +1,10 @@ +#include + +int main() +{ + // We can't test "char *p = strerror_r()" because that only causes a + // compiler warning when strerror_r returns an integer. + char *buf = 0; + int i = strerror_r(0, buf, 100); + return i; +} diff --git a/contrib/libhdfs3-cmake/CMake/CodeCoverage.cmake b/contrib/libhdfs3-cmake/CMake/CodeCoverage.cmake new file mode 100644 index 00000000000..ce997925fcc --- /dev/null +++ b/contrib/libhdfs3-cmake/CMake/CodeCoverage.cmake @@ -0,0 +1,48 @@ +# Check prereqs +FIND_PROGRAM(GCOV_PATH gcov) +FIND_PROGRAM(LCOV_PATH lcov) +FIND_PROGRAM(GENHTML_PATH genhtml) + +IF(NOT GCOV_PATH) + MESSAGE(FATAL_ERROR "gcov not found! Aborting...") +ENDIF(NOT GCOV_PATH) + +IF(NOT CMAKE_BUILD_TYPE STREQUAL Debug) + MESSAGE(WARNING "Code coverage results with an optimised (non-Debug) build may be misleading") +ENDIF(NOT CMAKE_BUILD_TYPE STREQUAL Debug) + +#Setup compiler options +ADD_DEFINITIONS(-fprofile-arcs -ftest-coverage) + +SET(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -fprofile-arcs ") +SET(CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} -fprofile-arcs ") + +IF(NOT LCOV_PATH) + MESSAGE(FATAL_ERROR "lcov not found! Aborting...") +ENDIF(NOT LCOV_PATH) + +IF(NOT GENHTML_PATH) + MESSAGE(FATAL_ERROR "genhtml not found! Aborting...") +ENDIF(NOT GENHTML_PATH) + +#Setup target +ADD_CUSTOM_TARGET(ShowCoverage + #Capturing lcov counters and generating report + COMMAND ${LCOV_PATH} --directory . --capture --output-file CodeCoverage.info + COMMAND ${LCOV_PATH} --remove CodeCoverage.info '${CMAKE_CURRENT_BINARY_DIR}/*' 'test/*' 'mock/*' '/usr/*' '/opt/*' '*ext/rhel5_x86_64*' '*ext/osx*' --output-file CodeCoverage.info.cleaned + COMMAND ${GENHTML_PATH} -o CodeCoverageReport CodeCoverage.info.cleaned +) + + +ADD_CUSTOM_TARGET(ShowAllCoverage + #Capturing lcov counters and generating report + COMMAND ${LCOV_PATH} -a CodeCoverage.info.cleaned -a CodeCoverage.info.cleaned_withoutHA -o AllCodeCoverage.info + COMMAND sed -e 's|/.*/src|${CMAKE_SOURCE_DIR}/src|' -ig AllCodeCoverage.info + COMMAND ${GENHTML_PATH} -o AllCodeCoverageReport AllCodeCoverage.info +) + +ADD_CUSTOM_TARGET(ResetCoverage + #Cleanup lcov + COMMAND ${LCOV_PATH} --directory . --zerocounters +) + diff --git a/contrib/libhdfs3-cmake/CMake/FindBoost.cmake b/contrib/libhdfs3-cmake/CMake/FindBoost.cmake new file mode 100644 index 00000000000..914a0a5b5cd --- /dev/null +++ b/contrib/libhdfs3-cmake/CMake/FindBoost.cmake @@ -0,0 +1,1162 @@ +# - Find Boost include dirs and libraries +# Use this module by invoking find_package with the form: +# find_package(Boost +# [version] [EXACT] # Minimum or EXACT version e.g. 1.36.0 +# [REQUIRED] # Fail with error if Boost is not found +# [COMPONENTS ...] # Boost libraries by their canonical name +# ) # e.g. "date_time" for "libboost_date_time" +# This module finds headers and requested component libraries OR a CMake +# package configuration file provided by a "Boost CMake" build. For the +# latter case skip to the "Boost CMake" section below. For the former +# case results are reported in variables: +# Boost_FOUND - True if headers and requested libraries were found +# Boost_INCLUDE_DIRS - Boost include directories +# Boost_LIBRARY_DIRS - Link directories for Boost libraries +# Boost_LIBRARIES - Boost component libraries to be linked +# Boost__FOUND - True if component was found ( is upper-case) +# Boost__LIBRARY - Libraries to link for component (may include +# target_link_libraries debug/optimized keywords) +# Boost_VERSION - BOOST_VERSION value from boost/version.hpp +# Boost_LIB_VERSION - Version string appended to library filenames +# Boost_MAJOR_VERSION - Boost major version number (X in X.y.z) +# Boost_MINOR_VERSION - Boost minor version number (Y in x.Y.z) +# Boost_SUBMINOR_VERSION - Boost subminor version number (Z in x.y.Z) +# Boost_LIB_DIAGNOSTIC_DEFINITIONS (Windows) +# - Pass to add_definitions() to have diagnostic +# information about Boost's automatic linking +# displayed during compilation +# +# This module reads hints about search locations from variables: +# BOOST_ROOT - Preferred installation prefix +# (or BOOSTROOT) +# BOOST_INCLUDEDIR - Preferred include directory e.g. /include +# BOOST_LIBRARYDIR - Preferred library directory e.g. /lib +# Boost_NO_SYSTEM_PATHS - Set to ON to disable searching in locations not +# specified by these hint variables. Default is OFF. +# Boost_ADDITIONAL_VERSIONS +# - List of Boost versions not known to this module +# (Boost install locations may contain the version) +# and saves search results persistently in CMake cache entries: +# Boost_INCLUDE_DIR - Directory containing Boost headers +# Boost_LIBRARY_DIR - Directory containing Boost libraries +# Boost__LIBRARY_DEBUG - Component library debug variant +# Boost__LIBRARY_RELEASE - Component library release variant +# Users may set these hints or results as cache entries. Projects should +# not read these entries directly but instead use the above result variables. +# Note that some hint names start in upper-case "BOOST". One may specify +# these as environment variables if they are not specified as CMake variables +# or cache entries. +# +# This module first searches for the Boost header files using the above hint +# variables (excluding BOOST_LIBRARYDIR) and saves the result in +# Boost_INCLUDE_DIR. Then it searches for requested component libraries using +# the above hints (excluding BOOST_INCLUDEDIR and Boost_ADDITIONAL_VERSIONS), +# "lib" directories near Boost_INCLUDE_DIR, and the library name configuration +# settings below. It saves the library directory in Boost_LIBRARY_DIR and +# individual library locations in Boost__LIBRARY_DEBUG and +# Boost__LIBRARY_RELEASE. When one changes settings used by previous +# searches in the same build tree (excluding environment variables) this +# module discards previous search results affected by the changes and searches +# again. +# +# Boost libraries come in many variants encoded in their file name. Users or +# projects may tell this module which variant to find by setting variables: +# Boost_USE_MULTITHREADED - Set to OFF to use the non-multithreaded +# libraries ('mt' tag). Default is ON. +# Boost_USE_STATIC_LIBS - Set to ON to force the use of the static +# libraries. Default is OFF. +# Boost_USE_STATIC_RUNTIME - Set to ON or OFF to specify whether to use +# libraries linked statically to the C++ runtime +# ('s' tag). Default is platform dependent. +# Boost_USE_DEBUG_PYTHON - Set to ON to use libraries compiled with a +# debug Python build ('y' tag). Default is OFF. +# Boost_USE_STLPORT - Set to ON to use libraries compiled with +# STLPort ('p' tag). Default is OFF. +# Boost_USE_STLPORT_DEPRECATED_NATIVE_IOSTREAMS +# - Set to ON to use libraries compiled with +# STLPort deprecated "native iostreams" +# ('n' tag). Default is OFF. +# Boost_COMPILER - Set to the compiler-specific library suffix +# (e.g. "-gcc43"). Default is auto-computed +# for the C++ compiler in use. +# Boost_THREADAPI - Suffix for "thread" component library name, +# such as "pthread" or "win32". Names with +# and without this suffix will both be tried. +# Other variables one may set to control this module are: +# Boost_DEBUG - Set to ON to enable debug output from FindBoost. +# Please enable this before filing any bug report. +# Boost_DETAILED_FAILURE_MSG +# - Set to ON to add detailed information to the +# failure message even when the REQUIRED option +# is not given to the find_package call. +# Boost_REALPATH - Set to ON to resolve symlinks for discovered +# libraries to assist with packaging. For example, +# the "system" component library may be resolved to +# "/usr/lib/libboost_system.so.1.42.0" instead of +# "/usr/lib/libboost_system.so". This does not +# affect linking and should not be enabled unless +# the user needs this information. +# On Visual Studio and Borland compilers Boost headers request automatic +# linking to corresponding libraries. This requires matching libraries to be +# linked explicitly or available in the link library search path. In this +# case setting Boost_USE_STATIC_LIBS to OFF may not achieve dynamic linking. +# Boost automatic linking typically requests static libraries with a few +# exceptions (such as Boost.Python). Use +# add_definitions(${Boost_LIB_DIAGNOSTIC_DEFINITIONS}) +# to ask Boost to report information about automatic linking requests. +# +# Example to find Boost headers only: +# find_package(Boost 1.36.0) +# if(Boost_FOUND) +# include_directories(${Boost_INCLUDE_DIRS}) +# add_executable(foo foo.cc) +# endif() +# Example to find Boost headers and some libraries: +# set(Boost_USE_STATIC_LIBS ON) +# set(Boost_USE_MULTITHREADED ON) +# set(Boost_USE_STATIC_RUNTIME OFF) +# find_package(Boost 1.36.0 COMPONENTS date_time filesystem system ...) +# if(Boost_FOUND) +# include_directories(${Boost_INCLUDE_DIRS}) +# add_executable(foo foo.cc) +# target_link_libraries(foo ${Boost_LIBRARIES}) +# endif() +# +# Boost CMake ---------------------------------------------------------- +# +# If Boost was built using the boost-cmake project it provides a package +# configuration file for use with find_package's Config mode. This module +# looks for the package configuration file called BoostConfig.cmake or +# boost-config.cmake and stores the result in cache entry "Boost_DIR". If +# found, the package configuration file is loaded and this module returns with +# no further action. See documentation of the Boost CMake package +# configuration for details on what it provides. +# +# Set Boost_NO_BOOST_CMAKE to ON to disable the search for boost-cmake. + +#============================================================================= +# Copyright 2006-2012 Kitware, Inc. +# Copyright 2006-2008 Andreas Schneider +# Copyright 2007 Wengo +# Copyright 2007 Mike Jackson +# Copyright 2008 Andreas Pakulat +# Copyright 2008-2012 Philip Lowman +# +# Distributed under the OSI-approved BSD License (the "License"); +# see accompanying file Copyright.txt for details. +# +# This software is distributed WITHOUT ANY WARRANTY; without even the +# implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# See the License for more information. +#============================================================================= +# (To distribute this file outside of CMake, substitute the full +# License text for the above reference.) + + +#------------------------------------------------------------------------------- +# Before we go searching, check whether boost-cmake is available, unless the +# user specifically asked NOT to search for boost-cmake. +# +# If Boost_DIR is set, this behaves as any find_package call would. If not, +# it looks at BOOST_ROOT and BOOSTROOT to find Boost. +# +if (NOT Boost_NO_BOOST_CMAKE) + # If Boost_DIR is not set, look for BOOSTROOT and BOOST_ROOT as alternatives, + # since these are more conventional for Boost. + if ("$ENV{Boost_DIR}" STREQUAL "") + if (NOT "$ENV{BOOST_ROOT}" STREQUAL "") + set(ENV{Boost_DIR} $ENV{BOOST_ROOT}) + elseif (NOT "$ENV{BOOSTROOT}" STREQUAL "") + set(ENV{Boost_DIR} $ENV{BOOSTROOT}) + endif() + endif() + + # Do the same find_package call but look specifically for the CMake version. + # Note that args are passed in the Boost_FIND_xxxxx variables, so there is no + # need to delegate them to this find_package call. + find_package(Boost QUIET NO_MODULE) + mark_as_advanced(Boost_DIR) + + # If we found boost-cmake, then we're done. Print out what we found. + # Otherwise let the rest of the module try to find it. + if (Boost_FOUND) + message("Boost ${Boost_FIND_VERSION} found.") + if (Boost_FIND_COMPONENTS) + message("Found Boost components:") + message(" ${Boost_FIND_COMPONENTS}") + endif() + return() + endif() +endif() + + +#------------------------------------------------------------------------------- +# FindBoost functions & macros +# + +############################################ +# +# Check the existence of the libraries. +# +############################################ +# This macro was taken directly from the FindQt4.cmake file that is included +# with the CMake distribution. This is NOT my work. All work was done by the +# original authors of the FindQt4.cmake file. Only minor modifications were +# made to remove references to Qt and make this file more generally applicable +# And ELSE/ENDIF pairs were removed for readability. +######################################################################### + +macro(_Boost_ADJUST_LIB_VARS basename) + if(Boost_INCLUDE_DIR ) + if(Boost_${basename}_LIBRARY_DEBUG AND Boost_${basename}_LIBRARY_RELEASE) + # if the generator supports configuration types then set + # optimized and debug libraries, or if the CMAKE_BUILD_TYPE has a value + if(CMAKE_CONFIGURATION_TYPES OR CMAKE_BUILD_TYPE) + set(Boost_${basename}_LIBRARY optimized ${Boost_${basename}_LIBRARY_RELEASE} debug ${Boost_${basename}_LIBRARY_DEBUG}) + else() + # if there are no configuration types and CMAKE_BUILD_TYPE has no value + # then just use the release libraries + set(Boost_${basename}_LIBRARY ${Boost_${basename}_LIBRARY_RELEASE} ) + endif() + # FIXME: This probably should be set for both cases + set(Boost_${basename}_LIBRARIES optimized ${Boost_${basename}_LIBRARY_RELEASE} debug ${Boost_${basename}_LIBRARY_DEBUG}) + endif() + + # if only the release version was found, set the debug variable also to the release version + if(Boost_${basename}_LIBRARY_RELEASE AND NOT Boost_${basename}_LIBRARY_DEBUG) + set(Boost_${basename}_LIBRARY_DEBUG ${Boost_${basename}_LIBRARY_RELEASE}) + set(Boost_${basename}_LIBRARY ${Boost_${basename}_LIBRARY_RELEASE}) + set(Boost_${basename}_LIBRARIES ${Boost_${basename}_LIBRARY_RELEASE}) + endif() + + # if only the debug version was found, set the release variable also to the debug version + if(Boost_${basename}_LIBRARY_DEBUG AND NOT Boost_${basename}_LIBRARY_RELEASE) + set(Boost_${basename}_LIBRARY_RELEASE ${Boost_${basename}_LIBRARY_DEBUG}) + set(Boost_${basename}_LIBRARY ${Boost_${basename}_LIBRARY_DEBUG}) + set(Boost_${basename}_LIBRARIES ${Boost_${basename}_LIBRARY_DEBUG}) + endif() + + # If the debug & release library ends up being the same, omit the keywords + if(${Boost_${basename}_LIBRARY_RELEASE} STREQUAL ${Boost_${basename}_LIBRARY_DEBUG}) + set(Boost_${basename}_LIBRARY ${Boost_${basename}_LIBRARY_RELEASE} ) + set(Boost_${basename}_LIBRARIES ${Boost_${basename}_LIBRARY_RELEASE} ) + endif() + + if(Boost_${basename}_LIBRARY) + set(Boost_${basename}_FOUND ON) + endif() + + endif() + # Make variables changeable to the advanced user + mark_as_advanced( + Boost_${basename}_LIBRARY_RELEASE + Boost_${basename}_LIBRARY_DEBUG + ) +endmacro() + +macro(_Boost_CHANGE_DETECT changed_var) + set(${changed_var} 0) + foreach(v ${ARGN}) + if(DEFINED _Boost_COMPONENTS_SEARCHED) + if(${v}) + if(_${v}_LAST) + string(COMPARE NOTEQUAL "${${v}}" "${_${v}_LAST}" _${v}_CHANGED) + else() + set(_${v}_CHANGED 1) + endif() + elseif(_${v}_LAST) + set(_${v}_CHANGED 1) + endif() + if(_${v}_CHANGED) + set(${changed_var} 1) + endif() + else() + set(_${v}_CHANGED 0) + endif() + endforeach() +endmacro() + +macro(_Boost_FIND_LIBRARY var) + find_library(${var} ${ARGN}) + + # If we found the first library save Boost_LIBRARY_DIR. + if(${var} AND NOT Boost_LIBRARY_DIR) + get_filename_component(_dir "${${var}}" PATH) + set(Boost_LIBRARY_DIR "${_dir}" CACHE PATH "Boost library directory" FORCE) + endif() + + # If Boost_LIBRARY_DIR is known then search only there. + if(Boost_LIBRARY_DIR) + set(_boost_LIBRARY_SEARCH_DIRS ${Boost_LIBRARY_DIR} NO_DEFAULT_PATH) + endif() +endmacro() + +#------------------------------------------------------------------------------- + +# +# Runs compiler with "-dumpversion" and parses major/minor +# version with a regex. +# +function(_Boost_COMPILER_DUMPVERSION _OUTPUT_VERSION) + + exec_program(${CMAKE_CXX_COMPILER} + ARGS ${CMAKE_CXX_COMPILER_ARG1} -dumpversion + OUTPUT_VARIABLE _boost_COMPILER_VERSION + ) + string(REGEX REPLACE "([0-9])\\.([0-9])(\\.[0-9])?" "\\1\\2" + _boost_COMPILER_VERSION ${_boost_COMPILER_VERSION}) + + set(${_OUTPUT_VERSION} ${_boost_COMPILER_VERSION} PARENT_SCOPE) +endfunction() + +# +# Take a list of libraries with "thread" in it +# and prepend duplicates with "thread_${Boost_THREADAPI}" +# at the front of the list +# +function(_Boost_PREPEND_LIST_WITH_THREADAPI _output) + set(_orig_libnames ${ARGN}) + string(REPLACE "thread" "thread_${Boost_THREADAPI}" _threadapi_libnames "${_orig_libnames}") + set(${_output} ${_threadapi_libnames} ${_orig_libnames} PARENT_SCOPE) +endfunction() + +# +# If a library is found, replace its cache entry with its REALPATH +# +function(_Boost_SWAP_WITH_REALPATH _library _docstring) + if(${_library}) + get_filename_component(_boost_filepathreal ${${_library}} REALPATH) + unset(${_library} CACHE) + set(${_library} ${_boost_filepathreal} CACHE FILEPATH "${_docstring}") + endif() +endfunction() + +function(_Boost_CHECK_SPELLING _var) + if(${_var}) + string(TOUPPER ${_var} _var_UC) + message(FATAL_ERROR "ERROR: ${_var} is not the correct spelling. The proper spelling is ${_var_UC}.") + endif() +endfunction() + +# Guesses Boost's compiler prefix used in built library names +# Returns the guess by setting the variable pointed to by _ret +function(_Boost_GUESS_COMPILER_PREFIX _ret) + if("${CMAKE_CXX_COMPILER_ID}" STREQUAL "Intel" + OR "${CMAKE_CXX_COMPILER}" MATCHES "icl" + OR "${CMAKE_CXX_COMPILER}" MATCHES "icpc") + if(WIN32) + set (_boost_COMPILER "-iw") + else() + set (_boost_COMPILER "-il") + endif() + elseif (MSVC12) + set(_boost_COMPILER "-vc120") + elseif (MSVC11) + set(_boost_COMPILER "-vc110") + elseif (MSVC10) + set(_boost_COMPILER "-vc100") + elseif (MSVC90) + set(_boost_COMPILER "-vc90") + elseif (MSVC80) + set(_boost_COMPILER "-vc80") + elseif (MSVC71) + set(_boost_COMPILER "-vc71") + elseif (MSVC70) # Good luck! + set(_boost_COMPILER "-vc7") # yes, this is correct + elseif (MSVC60) # Good luck! + set(_boost_COMPILER "-vc6") # yes, this is correct + elseif (BORLAND) + set(_boost_COMPILER "-bcb") + elseif("${CMAKE_CXX_COMPILER_ID}" STREQUAL "SunPro") + set(_boost_COMPILER "-sw") + elseif (MINGW) + if(${Boost_MAJOR_VERSION}.${Boost_MINOR_VERSION} VERSION_LESS 1.34) + set(_boost_COMPILER "-mgw") # no GCC version encoding prior to 1.34 + else() + _Boost_COMPILER_DUMPVERSION(_boost_COMPILER_VERSION) + set(_boost_COMPILER "-mgw${_boost_COMPILER_VERSION}") + endif() + elseif (UNIX) + if (CMAKE_COMPILER_IS_GNUCXX) + if(${Boost_MAJOR_VERSION}.${Boost_MINOR_VERSION} VERSION_LESS 1.34) + set(_boost_COMPILER "-gcc") # no GCC version encoding prior to 1.34 + else() + _Boost_COMPILER_DUMPVERSION(_boost_COMPILER_VERSION) + # Determine which version of GCC we have. + if(APPLE) + if(Boost_MINOR_VERSION) + if(${Boost_MINOR_VERSION} GREATER 35) + # In Boost 1.36.0 and newer, the mangled compiler name used + # on Mac OS X/Darwin is "xgcc". + set(_boost_COMPILER "-xgcc${_boost_COMPILER_VERSION}") + else() + # In Boost <= 1.35.0, there is no mangled compiler name for + # the Mac OS X/Darwin version of GCC. + set(_boost_COMPILER "") + endif() + else() + # We don't know the Boost version, so assume it's + # pre-1.36.0. + set(_boost_COMPILER "") + endif() + else() + set(_boost_COMPILER "-gcc${_boost_COMPILER_VERSION}") + endif() + endif() + endif () + else() + # TODO at least Boost_DEBUG here? + set(_boost_COMPILER "") + endif() + set(${_ret} ${_boost_COMPILER} PARENT_SCOPE) +endfunction() + +# +# End functions/macros +# +#------------------------------------------------------------------------------- + +#------------------------------------------------------------------------------- +# main. +#------------------------------------------------------------------------------- + +if(NOT DEFINED Boost_USE_MULTITHREADED) + set(Boost_USE_MULTITHREADED TRUE) +endif() + +# Check the version of Boost against the requested version. +if(Boost_FIND_VERSION AND NOT Boost_FIND_VERSION_MINOR) + message(SEND_ERROR "When requesting a specific version of Boost, you must provide at least the major and minor version numbers, e.g., 1.34") +endif() + +if(Boost_FIND_VERSION_EXACT) + # The version may appear in a directory with or without the patch + # level, even when the patch level is non-zero. + set(_boost_TEST_VERSIONS + "${Boost_FIND_VERSION_MAJOR}.${Boost_FIND_VERSION_MINOR}.${Boost_FIND_VERSION_PATCH}" + "${Boost_FIND_VERSION_MAJOR}.${Boost_FIND_VERSION_MINOR}") +else() + # The user has not requested an exact version. Among known + # versions, find those that are acceptable to the user request. + set(_Boost_KNOWN_VERSIONS ${Boost_ADDITIONAL_VERSIONS} + "1.56.0" "1.56" "1.55.0" "1.55" "1.54.0" "1.54" + "1.53.0" "1.53" "1.52.0" "1.52" "1.51.0" "1.51" + "1.50.0" "1.50" "1.49.0" "1.49" "1.48.0" "1.48" "1.47.0" "1.47" "1.46.1" + "1.46.0" "1.46" "1.45.0" "1.45" "1.44.0" "1.44" "1.43.0" "1.43" "1.42.0" "1.42" + "1.41.0" "1.41" "1.40.0" "1.40" "1.39.0" "1.39" "1.38.0" "1.38" "1.37.0" "1.37" + "1.36.1" "1.36.0" "1.36" "1.35.1" "1.35.0" "1.35" "1.34.1" "1.34.0" + "1.34" "1.33.1" "1.33.0" "1.33") + set(_boost_TEST_VERSIONS) + if(Boost_FIND_VERSION) + set(_Boost_FIND_VERSION_SHORT "${Boost_FIND_VERSION_MAJOR}.${Boost_FIND_VERSION_MINOR}") + # Select acceptable versions. + foreach(version ${_Boost_KNOWN_VERSIONS}) + if(NOT "${version}" VERSION_LESS "${Boost_FIND_VERSION}") + # This version is high enough. + list(APPEND _boost_TEST_VERSIONS "${version}") + elseif("${version}.99" VERSION_EQUAL "${_Boost_FIND_VERSION_SHORT}.99") + # This version is a short-form for the requested version with + # the patch level dropped. + list(APPEND _boost_TEST_VERSIONS "${version}") + endif() + endforeach() + else() + # Any version is acceptable. + set(_boost_TEST_VERSIONS "${_Boost_KNOWN_VERSIONS}") + endif() +endif() + +# The reason that we failed to find Boost. This will be set to a +# user-friendly message when we fail to find some necessary piece of +# Boost. +set(Boost_ERROR_REASON) + +if(Boost_DEBUG) + # Output some of their choices + message(STATUS "[ ${CMAKE_CURRENT_LIST_FILE}:${CMAKE_CURRENT_LIST_LINE} ] " + "_boost_TEST_VERSIONS = ${_boost_TEST_VERSIONS}") + message(STATUS "[ ${CMAKE_CURRENT_LIST_FILE}:${CMAKE_CURRENT_LIST_LINE} ] " + "Boost_USE_MULTITHREADED = ${Boost_USE_MULTITHREADED}") + message(STATUS "[ ${CMAKE_CURRENT_LIST_FILE}:${CMAKE_CURRENT_LIST_LINE} ] " + "Boost_USE_STATIC_LIBS = ${Boost_USE_STATIC_LIBS}") + message(STATUS "[ ${CMAKE_CURRENT_LIST_FILE}:${CMAKE_CURRENT_LIST_LINE} ] " + "Boost_USE_STATIC_RUNTIME = ${Boost_USE_STATIC_RUNTIME}") + message(STATUS "[ ${CMAKE_CURRENT_LIST_FILE}:${CMAKE_CURRENT_LIST_LINE} ] " + "Boost_ADDITIONAL_VERSIONS = ${Boost_ADDITIONAL_VERSIONS}") + message(STATUS "[ ${CMAKE_CURRENT_LIST_FILE}:${CMAKE_CURRENT_LIST_LINE} ] " + "Boost_NO_SYSTEM_PATHS = ${Boost_NO_SYSTEM_PATHS}") +endif() + +if(WIN32) + # In windows, automatic linking is performed, so you do not have + # to specify the libraries. If you are linking to a dynamic + # runtime, then you can choose to link to either a static or a + # dynamic Boost library, the default is to do a static link. You + # can alter this for a specific library "whatever" by defining + # BOOST_WHATEVER_DYN_LINK to force Boost library "whatever" to be + # linked dynamically. Alternatively you can force all Boost + # libraries to dynamic link by defining BOOST_ALL_DYN_LINK. + + # This feature can be disabled for Boost library "whatever" by + # defining BOOST_WHATEVER_NO_LIB, or for all of Boost by defining + # BOOST_ALL_NO_LIB. + + # If you want to observe which libraries are being linked against + # then defining BOOST_LIB_DIAGNOSTIC will cause the auto-linking + # code to emit a #pragma message each time a library is selected + # for linking. + set(Boost_LIB_DIAGNOSTIC_DEFINITIONS "-DBOOST_LIB_DIAGNOSTIC") +endif() + +_Boost_CHECK_SPELLING(Boost_ROOT) +_Boost_CHECK_SPELLING(Boost_LIBRARYDIR) +_Boost_CHECK_SPELLING(Boost_INCLUDEDIR) + +# Collect environment variable inputs as hints. Do not consider changes. +foreach(v BOOSTROOT BOOST_ROOT BOOST_INCLUDEDIR BOOST_LIBRARYDIR) + set(_env $ENV{${v}}) + if(_env) + file(TO_CMAKE_PATH "${_env}" _ENV_${v}) + else() + set(_ENV_${v} "") + endif() +endforeach() +if(NOT _ENV_BOOST_ROOT AND _ENV_BOOSTROOT) + set(_ENV_BOOST_ROOT "${_ENV_BOOSTROOT}") +endif() + +# Collect inputs and cached results. Detect changes since the last run. +if(NOT BOOST_ROOT AND BOOSTROOT) + set(BOOST_ROOT "${BOOSTROOT}") +endif() +set(_Boost_VARS_DIR + BOOST_ROOT + Boost_NO_SYSTEM_PATHS + ) + +if(Boost_DEBUG) + message(STATUS "[ ${CMAKE_CURRENT_LIST_FILE}:${CMAKE_CURRENT_LIST_LINE} ] " + "Declared as CMake or Environmental Variables:") + message(STATUS "[ ${CMAKE_CURRENT_LIST_FILE}:${CMAKE_CURRENT_LIST_LINE} ] " + " BOOST_ROOT = ${BOOST_ROOT}") + message(STATUS "[ ${CMAKE_CURRENT_LIST_FILE}:${CMAKE_CURRENT_LIST_LINE} ] " + " BOOST_INCLUDEDIR = ${BOOST_INCLUDEDIR}") + message(STATUS "[ ${CMAKE_CURRENT_LIST_FILE}:${CMAKE_CURRENT_LIST_LINE} ] " + " BOOST_LIBRARYDIR = ${BOOST_LIBRARYDIR}") + message(STATUS "[ ${CMAKE_CURRENT_LIST_FILE}:${CMAKE_CURRENT_LIST_LINE} ] " + "_boost_TEST_VERSIONS = ${_boost_TEST_VERSIONS}") +endif() + +# ------------------------------------------------------------------------ +# Search for Boost include DIR +# ------------------------------------------------------------------------ + +set(_Boost_VARS_INC BOOST_INCLUDEDIR Boost_INCLUDE_DIR Boost_ADDITIONAL_VERSIONS) +_Boost_CHANGE_DETECT(_Boost_CHANGE_INCDIR ${_Boost_VARS_DIR} ${_Boost_VARS_INC}) +# Clear Boost_INCLUDE_DIR if it did not change but other input affecting the +# location did. We will find a new one based on the new inputs. +if(_Boost_CHANGE_INCDIR AND NOT _Boost_INCLUDE_DIR_CHANGED) + unset(Boost_INCLUDE_DIR CACHE) +endif() + +if(NOT Boost_INCLUDE_DIR) + set(_boost_INCLUDE_SEARCH_DIRS "") + if(BOOST_INCLUDEDIR) + list(APPEND _boost_INCLUDE_SEARCH_DIRS ${BOOST_INCLUDEDIR}) + elseif(_ENV_BOOST_INCLUDEDIR) + list(APPEND _boost_INCLUDE_SEARCH_DIRS ${_ENV_BOOST_INCLUDEDIR}) + endif() + + if( BOOST_ROOT ) + list(APPEND _boost_INCLUDE_SEARCH_DIRS ${BOOST_ROOT}/include ${BOOST_ROOT}) + elseif( _ENV_BOOST_ROOT ) + list(APPEND _boost_INCLUDE_SEARCH_DIRS ${_ENV_BOOST_ROOT}/include ${_ENV_BOOST_ROOT}) + endif() + + if( Boost_NO_SYSTEM_PATHS) + list(APPEND _boost_INCLUDE_SEARCH_DIRS NO_CMAKE_SYSTEM_PATH) + else() + list(APPEND _boost_INCLUDE_SEARCH_DIRS PATHS + C:/boost/include + C:/boost + /sw/local/include + ) + endif() + + # Try to find Boost by stepping backwards through the Boost versions + # we know about. + # Build a list of path suffixes for each version. + set(_boost_PATH_SUFFIXES) + foreach(_boost_VER ${_boost_TEST_VERSIONS}) + # Add in a path suffix, based on the required version, ideally + # we could read this from version.hpp, but for that to work we'd + # need to know the include dir already + set(_boost_BOOSTIFIED_VERSION) + + # Transform 1.35 => 1_35 and 1.36.0 => 1_36_0 + if(_boost_VER MATCHES "[0-9]+\\.[0-9]+\\.[0-9]+") + string(REGEX REPLACE "([0-9]+)\\.([0-9]+)\\.([0-9]+)" "\\1_\\2_\\3" + _boost_BOOSTIFIED_VERSION ${_boost_VER}) + elseif(_boost_VER MATCHES "[0-9]+\\.[0-9]+") + string(REGEX REPLACE "([0-9]+)\\.([0-9]+)" "\\1_\\2" + _boost_BOOSTIFIED_VERSION ${_boost_VER}) + endif() + + list(APPEND _boost_PATH_SUFFIXES + "boost-${_boost_BOOSTIFIED_VERSION}" + "boost_${_boost_BOOSTIFIED_VERSION}" + "boost/boost-${_boost_BOOSTIFIED_VERSION}" + "boost/boost_${_boost_BOOSTIFIED_VERSION}" + ) + + endforeach() + + if(Boost_DEBUG) + message(STATUS "[ ${CMAKE_CURRENT_LIST_FILE}:${CMAKE_CURRENT_LIST_LINE} ] " + "Include debugging info:") + message(STATUS "[ ${CMAKE_CURRENT_LIST_FILE}:${CMAKE_CURRENT_LIST_LINE} ] " + " _boost_INCLUDE_SEARCH_DIRS = ${_boost_INCLUDE_SEARCH_DIRS}") + message(STATUS "[ ${CMAKE_CURRENT_LIST_FILE}:${CMAKE_CURRENT_LIST_LINE} ] " + " _boost_PATH_SUFFIXES = ${_boost_PATH_SUFFIXES}") + endif() + + # Look for a standard boost header file. + find_path(Boost_INCLUDE_DIR + NAMES boost/config.hpp + HINTS ${_boost_INCLUDE_SEARCH_DIRS} + PATH_SUFFIXES ${_boost_PATH_SUFFIXES} + ) +endif() + +# ------------------------------------------------------------------------ +# Extract version information from version.hpp +# ------------------------------------------------------------------------ + +# Set Boost_FOUND based only on header location and version. +# It will be updated below for component libraries. +if(Boost_INCLUDE_DIR) + if(Boost_DEBUG) + message(STATUS "[ ${CMAKE_CURRENT_LIST_FILE}:${CMAKE_CURRENT_LIST_LINE} ] " + "location of version.hpp: ${Boost_INCLUDE_DIR}/boost/version.hpp") + endif() + + # Extract Boost_VERSION and Boost_LIB_VERSION from version.hpp + set(Boost_VERSION 0) + set(Boost_LIB_VERSION "") + file(STRINGS "${Boost_INCLUDE_DIR}/boost/version.hpp" _boost_VERSION_HPP_CONTENTS REGEX "#define BOOST_(LIB_)?VERSION ") + set(_Boost_VERSION_REGEX "([0-9]+)") + set(_Boost_LIB_VERSION_REGEX "\"([0-9_]+)\"") + foreach(v VERSION LIB_VERSION) + if("${_boost_VERSION_HPP_CONTENTS}" MATCHES ".*#define BOOST_${v} ${_Boost_${v}_REGEX}.*") + set(Boost_${v} "${CMAKE_MATCH_1}") + endif() + endforeach() + unset(_boost_VERSION_HPP_CONTENTS) + + math(EXPR Boost_MAJOR_VERSION "${Boost_VERSION} / 100000") + math(EXPR Boost_MINOR_VERSION "${Boost_VERSION} / 100 % 1000") + math(EXPR Boost_SUBMINOR_VERSION "${Boost_VERSION} % 100") + + set(Boost_ERROR_REASON + "${Boost_ERROR_REASON}Boost version: ${Boost_MAJOR_VERSION}.${Boost_MINOR_VERSION}.${Boost_SUBMINOR_VERSION}\nBoost include path: ${Boost_INCLUDE_DIR}") + if(Boost_DEBUG) + message(STATUS "[ ${CMAKE_CURRENT_LIST_FILE}:${CMAKE_CURRENT_LIST_LINE} ] " + "version.hpp reveals boost " + "${Boost_MAJOR_VERSION}.${Boost_MINOR_VERSION}.${Boost_SUBMINOR_VERSION}") + endif() + + if(Boost_FIND_VERSION) + # Set Boost_FOUND based on requested version. + set(_Boost_VERSION "${Boost_MAJOR_VERSION}.${Boost_MINOR_VERSION}.${Boost_SUBMINOR_VERSION}") + if("${_Boost_VERSION}" VERSION_LESS "${Boost_FIND_VERSION}") + set(Boost_FOUND 0) + set(_Boost_VERSION_AGE "old") + elseif(Boost_FIND_VERSION_EXACT AND + NOT "${_Boost_VERSION}" VERSION_EQUAL "${Boost_FIND_VERSION}") + set(Boost_FOUND 0) + set(_Boost_VERSION_AGE "new") + else() + set(Boost_FOUND 1) + endif() + if(NOT Boost_FOUND) + # State that we found a version of Boost that is too new or too old. + set(Boost_ERROR_REASON + "${Boost_ERROR_REASON}\nDetected version of Boost is too ${_Boost_VERSION_AGE}. Requested version was ${Boost_FIND_VERSION_MAJOR}.${Boost_FIND_VERSION_MINOR}") + if (Boost_FIND_VERSION_PATCH) + set(Boost_ERROR_REASON + "${Boost_ERROR_REASON}.${Boost_FIND_VERSION_PATCH}") + endif () + if (NOT Boost_FIND_VERSION_EXACT) + set(Boost_ERROR_REASON "${Boost_ERROR_REASON} (or newer)") + endif () + set(Boost_ERROR_REASON "${Boost_ERROR_REASON}.") + endif () + else() + # Caller will accept any Boost version. + set(Boost_FOUND 1) + endif() +else() + set(Boost_FOUND 0) + set(Boost_ERROR_REASON + "${Boost_ERROR_REASON}Unable to find the Boost header files. Please set BOOST_ROOT to the root directory containing Boost or BOOST_INCLUDEDIR to the directory containing Boost's headers.") +endif() + +# ------------------------------------------------------------------------ +# Suffix initialization and compiler suffix detection. +# ------------------------------------------------------------------------ + +set(_Boost_VARS_NAME + Boost_COMPILER + Boost_THREADAPI + Boost_USE_DEBUG_PYTHON + Boost_USE_MULTITHREADED + Boost_USE_STATIC_LIBS + Boost_USE_STATIC_RUNTIME + Boost_USE_STLPORT + Boost_USE_STLPORT_DEPRECATED_NATIVE_IOSTREAMS + ) +_Boost_CHANGE_DETECT(_Boost_CHANGE_LIBNAME ${_Boost_VARS_NAME}) + +# Setting some more suffixes for the library +set(Boost_LIB_PREFIX "") +if ( WIN32 AND Boost_USE_STATIC_LIBS AND NOT CYGWIN) + set(Boost_LIB_PREFIX "lib") +endif() + +if (Boost_COMPILER) + set(_boost_COMPILER ${Boost_COMPILER}) + if(Boost_DEBUG) + message(STATUS "[ ${CMAKE_CURRENT_LIST_FILE}:${CMAKE_CURRENT_LIST_LINE} ] " + "using user-specified Boost_COMPILER = ${_boost_COMPILER}") + endif() +else() + # Attempt to guess the compiler suffix + # NOTE: this is not perfect yet, if you experience any issues + # please report them and use the Boost_COMPILER variable + # to work around the problems. + _Boost_GUESS_COMPILER_PREFIX(_boost_COMPILER) + if(Boost_DEBUG) + message(STATUS "[ ${CMAKE_CURRENT_LIST_FILE}:${CMAKE_CURRENT_LIST_LINE} ] " + "guessed _boost_COMPILER = ${_boost_COMPILER}") + endif() +endif() + +set (_boost_MULTITHREADED "-mt") +if( NOT Boost_USE_MULTITHREADED ) + set (_boost_MULTITHREADED "") +endif() +if(Boost_DEBUG) + message(STATUS "[ ${CMAKE_CURRENT_LIST_FILE}:${CMAKE_CURRENT_LIST_LINE} ] " + "_boost_MULTITHREADED = ${_boost_MULTITHREADED}") +endif() + +#====================== +# Systematically build up the Boost ABI tag +# http://boost.org/doc/libs/1_41_0/more/getting_started/windows.html#library-naming +set( _boost_RELEASE_ABI_TAG "-") +set( _boost_DEBUG_ABI_TAG "-") +# Key Use this library when: +# s linking statically to the C++ standard library and +# compiler runtime support libraries. +if(Boost_USE_STATIC_RUNTIME) + set( _boost_RELEASE_ABI_TAG "${_boost_RELEASE_ABI_TAG}s") + set( _boost_DEBUG_ABI_TAG "${_boost_DEBUG_ABI_TAG}s") +endif() +# g using debug versions of the standard and runtime +# support libraries +if(WIN32) + if(MSVC OR "${CMAKE_CXX_COMPILER}" MATCHES "icl" + OR "${CMAKE_CXX_COMPILER}" MATCHES "icpc") + set(_boost_DEBUG_ABI_TAG "${_boost_DEBUG_ABI_TAG}g") + endif() +endif() +# y using special debug build of python +if(Boost_USE_DEBUG_PYTHON) + set(_boost_DEBUG_ABI_TAG "${_boost_DEBUG_ABI_TAG}y") +endif() +# d using a debug version of your code +set(_boost_DEBUG_ABI_TAG "${_boost_DEBUG_ABI_TAG}d") +# p using the STLport standard library rather than the +# default one supplied with your compiler +if(Boost_USE_STLPORT) + set( _boost_RELEASE_ABI_TAG "${_boost_RELEASE_ABI_TAG}p") + set( _boost_DEBUG_ABI_TAG "${_boost_DEBUG_ABI_TAG}p") +endif() +# n using the STLport deprecated "native iostreams" feature +if(Boost_USE_STLPORT_DEPRECATED_NATIVE_IOSTREAMS) + set( _boost_RELEASE_ABI_TAG "${_boost_RELEASE_ABI_TAG}n") + set( _boost_DEBUG_ABI_TAG "${_boost_DEBUG_ABI_TAG}n") +endif() + +if(Boost_DEBUG) + message(STATUS "[ ${CMAKE_CURRENT_LIST_FILE}:${CMAKE_CURRENT_LIST_LINE} ] " + "_boost_RELEASE_ABI_TAG = ${_boost_RELEASE_ABI_TAG}") + message(STATUS "[ ${CMAKE_CURRENT_LIST_FILE}:${CMAKE_CURRENT_LIST_LINE} ] " + "_boost_DEBUG_ABI_TAG = ${_boost_DEBUG_ABI_TAG}") +endif() + +# ------------------------------------------------------------------------ +# Begin finding boost libraries +# ------------------------------------------------------------------------ +set(_Boost_VARS_LIB BOOST_LIBRARYDIR Boost_LIBRARY_DIR) +_Boost_CHANGE_DETECT(_Boost_CHANGE_LIBDIR ${_Boost_VARS_DIR} ${_Boost_VARS_LIB} Boost_INCLUDE_DIR) +# Clear Boost_LIBRARY_DIR if it did not change but other input affecting the +# location did. We will find a new one based on the new inputs. +if(_Boost_CHANGE_LIBDIR AND NOT _Boost_LIBRARY_DIR_CHANGED) + unset(Boost_LIBRARY_DIR CACHE) +endif() + +if(Boost_LIBRARY_DIR) + set(_boost_LIBRARY_SEARCH_DIRS ${Boost_LIBRARY_DIR} NO_DEFAULT_PATH) +else() + set(_boost_LIBRARY_SEARCH_DIRS "") + if(BOOST_LIBRARYDIR) + list(APPEND _boost_LIBRARY_SEARCH_DIRS ${BOOST_LIBRARYDIR}) + elseif(_ENV_BOOST_LIBRARYDIR) + list(APPEND _boost_LIBRARY_SEARCH_DIRS ${_ENV_BOOST_LIBRARYDIR}) + endif() + + if(BOOST_ROOT) + list(APPEND _boost_LIBRARY_SEARCH_DIRS ${BOOST_ROOT}/lib ${BOOST_ROOT}/stage/lib) + elseif(_ENV_BOOST_ROOT) + list(APPEND _boost_LIBRARY_SEARCH_DIRS ${_ENV_BOOST_ROOT}/lib ${_ENV_BOOST_ROOT}/stage/lib) + endif() + + list(APPEND _boost_LIBRARY_SEARCH_DIRS + ${Boost_INCLUDE_DIR}/lib + ${Boost_INCLUDE_DIR}/../lib + ${Boost_INCLUDE_DIR}/stage/lib + ) + if( Boost_NO_SYSTEM_PATHS ) + list(APPEND _boost_LIBRARY_SEARCH_DIRS NO_CMAKE_SYSTEM_PATH) + else() + list(APPEND _boost_LIBRARY_SEARCH_DIRS PATHS + C:/boost/lib + C:/boost + /sw/local/lib + ) + endif() +endif() + +if(Boost_DEBUG) + message(STATUS "[ ${CMAKE_CURRENT_LIST_FILE}:${CMAKE_CURRENT_LIST_LINE} ] " + "_boost_LIBRARY_SEARCH_DIRS = ${_boost_LIBRARY_SEARCH_DIRS}") +endif() + +# Support preference of static libs by adjusting CMAKE_FIND_LIBRARY_SUFFIXES +if( Boost_USE_STATIC_LIBS ) + set( _boost_ORIG_CMAKE_FIND_LIBRARY_SUFFIXES ${CMAKE_FIND_LIBRARY_SUFFIXES}) + if(WIN32) + set(CMAKE_FIND_LIBRARY_SUFFIXES .lib .a ${CMAKE_FIND_LIBRARY_SUFFIXES}) + else() + set(CMAKE_FIND_LIBRARY_SUFFIXES .a ) + endif() +endif() + +# We want to use the tag inline below without risking double dashes +if(_boost_RELEASE_ABI_TAG) + if(${_boost_RELEASE_ABI_TAG} STREQUAL "-") + set(_boost_RELEASE_ABI_TAG "") + endif() +endif() +if(_boost_DEBUG_ABI_TAG) + if(${_boost_DEBUG_ABI_TAG} STREQUAL "-") + set(_boost_DEBUG_ABI_TAG "") + endif() +endif() + +# The previous behavior of FindBoost when Boost_USE_STATIC_LIBS was enabled +# on WIN32 was to: +# 1. Search for static libs compiled against a SHARED C++ standard runtime library (use if found) +# 2. Search for static libs compiled against a STATIC C++ standard runtime library (use if found) +# We maintain this behavior since changing it could break people's builds. +# To disable the ambiguous behavior, the user need only +# set Boost_USE_STATIC_RUNTIME either ON or OFF. +set(_boost_STATIC_RUNTIME_WORKAROUND false) +if(WIN32 AND Boost_USE_STATIC_LIBS) + if(NOT DEFINED Boost_USE_STATIC_RUNTIME) + set(_boost_STATIC_RUNTIME_WORKAROUND true) + endif() +endif() + +# On versions < 1.35, remove the System library from the considered list +# since it wasn't added until 1.35. +if(Boost_VERSION AND Boost_FIND_COMPONENTS) + if(Boost_VERSION LESS 103500) + list(REMOVE_ITEM Boost_FIND_COMPONENTS system) + endif() +endif() + +# If the user changed any of our control inputs flush previous results. +if(_Boost_CHANGE_LIBDIR OR _Boost_CHANGE_LIBNAME) + foreach(COMPONENT ${_Boost_COMPONENTS_SEARCHED}) + string(TOUPPER ${COMPONENT} UPPERCOMPONENT) + foreach(c DEBUG RELEASE) + set(_var Boost_${UPPERCOMPONENT}_LIBRARY_${c}) + unset(${_var} CACHE) + set(${_var} "${_var}-NOTFOUND") + endforeach() + endforeach() + set(_Boost_COMPONENTS_SEARCHED "") +endif() + +foreach(COMPONENT ${Boost_FIND_COMPONENTS}) + string(TOUPPER ${COMPONENT} UPPERCOMPONENT) + + set( _boost_docstring_release "Boost ${COMPONENT} library (release)") + set( _boost_docstring_debug "Boost ${COMPONENT} library (debug)") + + # + # Find RELEASE libraries + # + set(_boost_RELEASE_NAMES + ${Boost_LIB_PREFIX}boost_${COMPONENT}${_boost_COMPILER}${_boost_MULTITHREADED}${_boost_RELEASE_ABI_TAG}-${Boost_LIB_VERSION} + ${Boost_LIB_PREFIX}boost_${COMPONENT}${_boost_COMPILER}${_boost_MULTITHREADED}${_boost_RELEASE_ABI_TAG} + ${Boost_LIB_PREFIX}boost_${COMPONENT}${_boost_MULTITHREADED}${_boost_RELEASE_ABI_TAG}-${Boost_LIB_VERSION} + ${Boost_LIB_PREFIX}boost_${COMPONENT}${_boost_MULTITHREADED}${_boost_RELEASE_ABI_TAG} + ${Boost_LIB_PREFIX}boost_${COMPONENT} ) + if(_boost_STATIC_RUNTIME_WORKAROUND) + set(_boost_RELEASE_STATIC_ABI_TAG "-s${_boost_RELEASE_ABI_TAG}") + list(APPEND _boost_RELEASE_NAMES + ${Boost_LIB_PREFIX}boost_${COMPONENT}${_boost_COMPILER}${_boost_MULTITHREADED}${_boost_RELEASE_STATIC_ABI_TAG}-${Boost_LIB_VERSION} + ${Boost_LIB_PREFIX}boost_${COMPONENT}${_boost_COMPILER}${_boost_MULTITHREADED}${_boost_RELEASE_STATIC_ABI_TAG} + ${Boost_LIB_PREFIX}boost_${COMPONENT}${_boost_MULTITHREADED}${_boost_RELEASE_STATIC_ABI_TAG}-${Boost_LIB_VERSION} + ${Boost_LIB_PREFIX}boost_${COMPONENT}${_boost_MULTITHREADED}${_boost_RELEASE_STATIC_ABI_TAG} ) + endif() + if(Boost_THREADAPI AND ${COMPONENT} STREQUAL "thread") + _Boost_PREPEND_LIST_WITH_THREADAPI(_boost_RELEASE_NAMES ${_boost_RELEASE_NAMES}) + endif() + if(Boost_DEBUG) + message(STATUS "[ ${CMAKE_CURRENT_LIST_FILE}:${CMAKE_CURRENT_LIST_LINE} ] " + "Searching for ${UPPERCOMPONENT}_LIBRARY_RELEASE: ${_boost_RELEASE_NAMES}") + endif() + + # Avoid passing backslashes to _Boost_FIND_LIBRARY due to macro re-parsing. + string(REPLACE "\\" "/" _boost_LIBRARY_SEARCH_DIRS_tmp "${_boost_LIBRARY_SEARCH_DIRS}") + + _Boost_FIND_LIBRARY(Boost_${UPPERCOMPONENT}_LIBRARY_RELEASE + NAMES ${_boost_RELEASE_NAMES} + HINTS ${_boost_LIBRARY_SEARCH_DIRS_tmp} + NAMES_PER_DIR + DOC "${_boost_docstring_release}" + ) + + # + # Find DEBUG libraries + # + set(_boost_DEBUG_NAMES + ${Boost_LIB_PREFIX}boost_${COMPONENT}${_boost_COMPILER}${_boost_MULTITHREADED}${_boost_DEBUG_ABI_TAG}-${Boost_LIB_VERSION} + ${Boost_LIB_PREFIX}boost_${COMPONENT}${_boost_COMPILER}${_boost_MULTITHREADED}${_boost_DEBUG_ABI_TAG} + ${Boost_LIB_PREFIX}boost_${COMPONENT}${_boost_MULTITHREADED}${_boost_DEBUG_ABI_TAG}-${Boost_LIB_VERSION} + ${Boost_LIB_PREFIX}boost_${COMPONENT}${_boost_MULTITHREADED}${_boost_DEBUG_ABI_TAG} + ${Boost_LIB_PREFIX}boost_${COMPONENT}${_boost_MULTITHREADED} + ${Boost_LIB_PREFIX}boost_${COMPONENT} ) + if(_boost_STATIC_RUNTIME_WORKAROUND) + set(_boost_DEBUG_STATIC_ABI_TAG "-s${_boost_DEBUG_ABI_TAG}") + list(APPEND _boost_DEBUG_NAMES + ${Boost_LIB_PREFIX}boost_${COMPONENT}${_boost_COMPILER}${_boost_MULTITHREADED}${_boost_DEBUG_STATIC_ABI_TAG}-${Boost_LIB_VERSION} + ${Boost_LIB_PREFIX}boost_${COMPONENT}${_boost_COMPILER}${_boost_MULTITHREADED}${_boost_DEBUG_STATIC_ABI_TAG} + ${Boost_LIB_PREFIX}boost_${COMPONENT}${_boost_MULTITHREADED}${_boost_DEBUG_STATIC_ABI_TAG}-${Boost_LIB_VERSION} + ${Boost_LIB_PREFIX}boost_${COMPONENT}${_boost_MULTITHREADED}${_boost_DEBUG_STATIC_ABI_TAG} ) + endif() + if(Boost_THREADAPI AND ${COMPONENT} STREQUAL "thread") + _Boost_PREPEND_LIST_WITH_THREADAPI(_boost_DEBUG_NAMES ${_boost_DEBUG_NAMES}) + endif() + if(Boost_DEBUG) + message(STATUS "[ ${CMAKE_CURRENT_LIST_FILE}:${CMAKE_CURRENT_LIST_LINE} ] " + "Searching for ${UPPERCOMPONENT}_LIBRARY_DEBUG: ${_boost_DEBUG_NAMES}") + endif() + + # Avoid passing backslashes to _Boost_FIND_LIBRARY due to macro re-parsing. + string(REPLACE "\\" "/" _boost_LIBRARY_SEARCH_DIRS_tmp "${_boost_LIBRARY_SEARCH_DIRS}") + + _Boost_FIND_LIBRARY(Boost_${UPPERCOMPONENT}_LIBRARY_DEBUG + NAMES ${_boost_DEBUG_NAMES} + HINTS ${_boost_LIBRARY_SEARCH_DIRS_tmp} + NAMES_PER_DIR + DOC "${_boost_docstring_debug}" + ) + + if(Boost_REALPATH) + _Boost_SWAP_WITH_REALPATH(Boost_${UPPERCOMPONENT}_LIBRARY_RELEASE "${_boost_docstring_release}") + _Boost_SWAP_WITH_REALPATH(Boost_${UPPERCOMPONENT}_LIBRARY_DEBUG "${_boost_docstring_debug}" ) + endif() + + _Boost_ADJUST_LIB_VARS(${UPPERCOMPONENT}) + +endforeach() + +# Restore the original find library ordering +if( Boost_USE_STATIC_LIBS ) + set(CMAKE_FIND_LIBRARY_SUFFIXES ${_boost_ORIG_CMAKE_FIND_LIBRARY_SUFFIXES}) +endif() + +# ------------------------------------------------------------------------ +# End finding boost libraries +# ------------------------------------------------------------------------ + +set(Boost_INCLUDE_DIRS ${Boost_INCLUDE_DIR}) +set(Boost_LIBRARY_DIRS ${Boost_LIBRARY_DIR}) + +# The above setting of Boost_FOUND was based only on the header files. +# Update it for the requested component libraries. +if(Boost_FOUND) + # The headers were found. Check for requested component libs. + set(_boost_CHECKED_COMPONENT FALSE) + set(_Boost_MISSING_COMPONENTS "") + foreach(COMPONENT ${Boost_FIND_COMPONENTS}) + string(TOUPPER ${COMPONENT} COMPONENT) + set(_boost_CHECKED_COMPONENT TRUE) + if(NOT Boost_${COMPONENT}_FOUND) + string(TOLOWER ${COMPONENT} COMPONENT) + list(APPEND _Boost_MISSING_COMPONENTS ${COMPONENT}) + endif() + endforeach() + + if(Boost_DEBUG) + message(STATUS "[ ${CMAKE_CURRENT_LIST_FILE}:${CMAKE_CURRENT_LIST_LINE} ] Boost_FOUND = ${Boost_FOUND}") + endif() + + if (_Boost_MISSING_COMPONENTS) + set(Boost_FOUND 0) + # We were unable to find some libraries, so generate a sensible + # error message that lists the libraries we were unable to find. + set(Boost_ERROR_REASON + "${Boost_ERROR_REASON}\nCould not find the following") + if(Boost_USE_STATIC_LIBS) + set(Boost_ERROR_REASON "${Boost_ERROR_REASON} static") + endif() + set(Boost_ERROR_REASON + "${Boost_ERROR_REASON} Boost libraries:\n") + foreach(COMPONENT ${_Boost_MISSING_COMPONENTS}) + set(Boost_ERROR_REASON + "${Boost_ERROR_REASON} boost_${COMPONENT}\n") + endforeach() + + list(LENGTH Boost_FIND_COMPONENTS Boost_NUM_COMPONENTS_WANTED) + list(LENGTH _Boost_MISSING_COMPONENTS Boost_NUM_MISSING_COMPONENTS) + if (${Boost_NUM_COMPONENTS_WANTED} EQUAL ${Boost_NUM_MISSING_COMPONENTS}) + set(Boost_ERROR_REASON + "${Boost_ERROR_REASON}No Boost libraries were found. You may need to set BOOST_LIBRARYDIR to the directory containing Boost libraries or BOOST_ROOT to the location of Boost.") + else () + set(Boost_ERROR_REASON + "${Boost_ERROR_REASON}Some (but not all) of the required Boost libraries were found. You may need to install these additional Boost libraries. Alternatively, set BOOST_LIBRARYDIR to the directory containing Boost libraries or BOOST_ROOT to the location of Boost.") + endif () + endif () + + if( NOT Boost_LIBRARY_DIRS AND NOT _boost_CHECKED_COMPONENT ) + # Compatibility Code for backwards compatibility with CMake + # 2.4's FindBoost module. + + # Look for the boost library path. + # Note that the user may not have installed any libraries + # so it is quite possible the Boost_LIBRARY_DIRS may not exist. + set(_boost_LIB_DIR ${Boost_INCLUDE_DIR}) + + if("${_boost_LIB_DIR}" MATCHES "boost-[0-9]+") + get_filename_component(_boost_LIB_DIR ${_boost_LIB_DIR} PATH) + endif() + + if("${_boost_LIB_DIR}" MATCHES "/include$") + # Strip off the trailing "/include" in the path. + get_filename_component(_boost_LIB_DIR ${_boost_LIB_DIR} PATH) + endif() + + if(EXISTS "${_boost_LIB_DIR}/lib") + set(_boost_LIB_DIR ${_boost_LIB_DIR}/lib) + else() + if(EXISTS "${_boost_LIB_DIR}/stage/lib") + set(_boost_LIB_DIR ${_boost_LIB_DIR}/stage/lib) + else() + set(_boost_LIB_DIR "") + endif() + endif() + + if(_boost_LIB_DIR AND EXISTS "${_boost_LIB_DIR}") + set(Boost_LIBRARY_DIRS ${_boost_LIB_DIR}) + endif() + + endif() +else() + # Boost headers were not found so no components were found. + foreach(COMPONENT ${Boost_FIND_COMPONENTS}) + string(TOUPPER ${COMPONENT} UPPERCOMPONENT) + set(Boost_${UPPERCOMPONENT}_FOUND 0) + endforeach() +endif() + +# ------------------------------------------------------------------------ +# Notification to end user about what was found +# ------------------------------------------------------------------------ + +set(Boost_LIBRARIES "") +if(Boost_FOUND) + if(NOT Boost_FIND_QUIETLY) + message(STATUS "Boost version: ${Boost_MAJOR_VERSION}.${Boost_MINOR_VERSION}.${Boost_SUBMINOR_VERSION}") + if(Boost_FIND_COMPONENTS) + message(STATUS "Found the following Boost libraries:") + endif() + endif() + foreach( COMPONENT ${Boost_FIND_COMPONENTS} ) + string( TOUPPER ${COMPONENT} UPPERCOMPONENT ) + if( Boost_${UPPERCOMPONENT}_FOUND ) + if(NOT Boost_FIND_QUIETLY) + message (STATUS " ${COMPONENT}") + endif() + list(APPEND Boost_LIBRARIES ${Boost_${UPPERCOMPONENT}_LIBRARY}) + endif() + endforeach() +else() + if(Boost_FIND_REQUIRED) + message(SEND_ERROR "Unable to find the requested Boost libraries.\n${Boost_ERROR_REASON}") + else() + if(NOT Boost_FIND_QUIETLY) + # we opt not to automatically output Boost_ERROR_REASON here as + # it could be quite lengthy and somewhat imposing in its requests + # Since Boost is not always a required dependency we'll leave this + # up to the end-user. + if(Boost_DEBUG OR Boost_DETAILED_FAILURE_MSG) + message(STATUS "Could NOT find Boost\n${Boost_ERROR_REASON}") + else() + message(STATUS "Could NOT find Boost") + endif() + endif() + endif() +endif() + +# Configure display of cache entries in GUI. +foreach(v BOOSTROOT BOOST_ROOT ${_Boost_VARS_INC} ${_Boost_VARS_LIB}) + get_property(_type CACHE ${v} PROPERTY TYPE) + if(_type) + set_property(CACHE ${v} PROPERTY ADVANCED 1) + if("x${_type}" STREQUAL "xUNINITIALIZED") + if("x${v}" STREQUAL "xBoost_ADDITIONAL_VERSIONS") + set_property(CACHE ${v} PROPERTY TYPE STRING) + else() + set_property(CACHE ${v} PROPERTY TYPE PATH) + endif() + endif() + endif() +endforeach() + +# Record last used values of input variables so we can +# detect on the next run if the user changed them. +foreach(v + ${_Boost_VARS_INC} ${_Boost_VARS_LIB} + ${_Boost_VARS_DIR} ${_Boost_VARS_NAME} + ) + if(DEFINED ${v}) + set(_${v}_LAST "${${v}}" CACHE INTERNAL "Last used ${v} value.") + else() + unset(_${v}_LAST CACHE) + endif() +endforeach() + +# Maintain a persistent list of components requested anywhere since +# the last flush. +set(_Boost_COMPONENTS_SEARCHED "${_Boost_COMPONENTS_SEARCHED}") +list(APPEND _Boost_COMPONENTS_SEARCHED ${Boost_FIND_COMPONENTS}) +list(REMOVE_DUPLICATES _Boost_COMPONENTS_SEARCHED) +list(SORT _Boost_COMPONENTS_SEARCHED) +set(_Boost_COMPONENTS_SEARCHED "${_Boost_COMPONENTS_SEARCHED}" + CACHE INTERNAL "Components requested for this build tree.") diff --git a/contrib/libhdfs3-cmake/CMake/FindGSasl.cmake b/contrib/libhdfs3-cmake/CMake/FindGSasl.cmake new file mode 100644 index 00000000000..19ca7c30d1e --- /dev/null +++ b/contrib/libhdfs3-cmake/CMake/FindGSasl.cmake @@ -0,0 +1,26 @@ +# - Try to find the GNU sasl library (gsasl) +# +# Once done this will define +# +# GSASL_FOUND - System has gnutls +# GSASL_INCLUDE_DIR - The gnutls include directory +# GSASL_LIBRARIES - The libraries needed to use gnutls +# GSASL_DEFINITIONS - Compiler switches required for using gnutls + + +IF (GSASL_INCLUDE_DIR AND GSASL_LIBRARIES) + # in cache already + SET(GSasl_FIND_QUIETLY TRUE) +ENDIF (GSASL_INCLUDE_DIR AND GSASL_LIBRARIES) + +FIND_PATH(GSASL_INCLUDE_DIR gsasl.h) + +FIND_LIBRARY(GSASL_LIBRARIES gsasl) + +INCLUDE(FindPackageHandleStandardArgs) + +# handle the QUIETLY and REQUIRED arguments and set GSASL_FOUND to TRUE if +# all listed variables are TRUE +FIND_PACKAGE_HANDLE_STANDARD_ARGS(GSASL DEFAULT_MSG GSASL_LIBRARIES GSASL_INCLUDE_DIR) + +MARK_AS_ADVANCED(GSASL_INCLUDE_DIR GSASL_LIBRARIES) \ No newline at end of file diff --git a/contrib/libhdfs3-cmake/CMake/FindGoogleTest.cmake b/contrib/libhdfs3-cmake/CMake/FindGoogleTest.cmake new file mode 100644 index 00000000000..fd57c1e2abd --- /dev/null +++ b/contrib/libhdfs3-cmake/CMake/FindGoogleTest.cmake @@ -0,0 +1,65 @@ +include(CheckCXXSourceRuns) + +find_path(GTest_INCLUDE_DIR gtest/gtest.h + NO_DEFAULT_PATH + PATHS + "${PROJECT_SOURCE_DIR}/../thirdparty/googletest/googletest/include" + "/usr/local/include" + "/usr/include") + +find_path(GMock_INCLUDE_DIR gmock/gmock.h + NO_DEFAULT_PATH + PATHS + "${PROJECT_SOURCE_DIR}/../thirdparty/googletest/googlemock/include" + "/usr/local/include" + "/usr/include") + +find_library(Gtest_LIBRARY + NAMES libgtest.a + HINTS + "${PROJECT_SOURCE_DIR}/../thirdparty/googletest/build/googlemock/gtest" + "/usr/local/lib" + "/usr/lib") + +find_library(Gmock_LIBRARY + NAMES libgmock.a + HINTS + "${PROJECT_SOURCE_DIR}/../thirdparty/googletest/build/googlemock" + "/usr/local/lib" + "/usr/lib") + +message(STATUS "Find GoogleTest include path: ${GTest_INCLUDE_DIR}") +message(STATUS "Find GoogleMock include path: ${GMock_INCLUDE_DIR}") +message(STATUS "Find Gtest library path: ${Gtest_LIBRARY}") +message(STATUS "Find Gmock library path: ${Gmock_LIBRARY}") + +set(CMAKE_REQUIRED_INCLUDES ${GTest_INCLUDE_DIR} ${GMock_INCLUDE_DIR}) +set(CMAKE_REQUIRED_LIBRARIES ${Gtest_LIBRARY} ${Gmock_LIBRARY} -lpthread) +set(CMAKE_REQUIRED_FLAGS) +check_cxx_source_runs(" +#include +#include +int main(int argc, char *argv[]) +{ + double pi = 3.14; + EXPECT_EQ(pi, 3.14); + return 0; +} +" GoogleTest_CHECK_FINE) +message(STATUS "GoogleTest check: ${GoogleTest_CHECK_FINE}") + +include(FindPackageHandleStandardArgs) +find_package_handle_standard_args( + GoogleTest + REQUIRED_VARS + GTest_INCLUDE_DIR + GMock_INCLUDE_DIR + Gtest_LIBRARY + Gmock_LIBRARY + GoogleTest_CHECK_FINE) + +set(GoogleTest_INCLUDE_DIR ${GTest_INCLUDE_DIR} ${GMock_INCLUDE_DIR}) +set(GoogleTest_LIBRARIES ${Gtest_LIBRARY} ${Gmock_LIBRARY}) +mark_as_advanced( + GoogleTest_INCLUDE_DIR + GoogleTest_LIBRARIES) diff --git a/contrib/libhdfs3-cmake/CMake/FindKERBEROS.cmake b/contrib/libhdfs3-cmake/CMake/FindKERBEROS.cmake new file mode 100644 index 00000000000..5fc58235a3f --- /dev/null +++ b/contrib/libhdfs3-cmake/CMake/FindKERBEROS.cmake @@ -0,0 +1,23 @@ +# - Find kerberos +# Find the native KERBEROS includes and library +# +# KERBEROS_INCLUDE_DIRS - where to find krb5.h, etc. +# KERBEROS_LIBRARIES - List of libraries when using krb5. +# KERBEROS_FOUND - True if krb5 found. + +IF (KERBEROS_INCLUDE_DIRS) + # Already in cache, be silent + SET(KERBEROS_FIND_QUIETLY TRUE) +ENDIF (KERBEROS_INCLUDE_DIRS) + +FIND_PATH(KERBEROS_INCLUDE_DIRS krb5.h) + +SET(KERBEROS_NAMES krb5 k5crypto com_err) +FIND_LIBRARY(KERBEROS_LIBRARIES NAMES ${KERBEROS_NAMES}) + +# handle the QUIETLY and REQUIRED arguments and set KERBEROS_FOUND to TRUE if +# all listed variables are TRUE +INCLUDE(FindPackageHandleStandardArgs) +FIND_PACKAGE_HANDLE_STANDARD_ARGS(KERBEROS DEFAULT_MSG KERBEROS_LIBRARIES KERBEROS_INCLUDE_DIRS) + +MARK_AS_ADVANCED(KERBEROS_LIBRARIES KERBEROS_INCLUDE_DIRS) diff --git a/contrib/libhdfs3-cmake/CMake/FindSSL.cmake b/contrib/libhdfs3-cmake/CMake/FindSSL.cmake new file mode 100644 index 00000000000..bcbc5d89653 --- /dev/null +++ b/contrib/libhdfs3-cmake/CMake/FindSSL.cmake @@ -0,0 +1,26 @@ +# - Try to find the Open ssl library (ssl) +# +# Once done this will define +# +# SSL_FOUND - System has gnutls +# SSL_INCLUDE_DIR - The gnutls include directory +# SSL_LIBRARIES - The libraries needed to use gnutls +# SSL_DEFINITIONS - Compiler switches required for using gnutls + + +IF (SSL_INCLUDE_DIR AND SSL_LIBRARIES) + # in cache already + SET(SSL_FIND_QUIETLY TRUE) +ENDIF (SSL_INCLUDE_DIR AND SSL_LIBRARIES) + +FIND_PATH(SSL_INCLUDE_DIR openssl/opensslv.h) + +FIND_LIBRARY(SSL_LIBRARIES crypto) + +INCLUDE(FindPackageHandleStandardArgs) + +# handle the QUIETLY and REQUIRED arguments and set SSL_FOUND to TRUE if +# all listed variables are TRUE +FIND_PACKAGE_HANDLE_STANDARD_ARGS(SSL DEFAULT_MSG SSL_LIBRARIES SSL_INCLUDE_DIR) + +MARK_AS_ADVANCED(SSL_INCLUDE_DIR SSL_LIBRARIES) \ No newline at end of file diff --git a/contrib/libhdfs3-cmake/CMake/Functions.cmake b/contrib/libhdfs3-cmake/CMake/Functions.cmake new file mode 100644 index 00000000000..a771b6043fb --- /dev/null +++ b/contrib/libhdfs3-cmake/CMake/Functions.cmake @@ -0,0 +1,46 @@ +FUNCTION(AUTO_SOURCES RETURN_VALUE PATTERN SOURCE_SUBDIRS) + + IF ("${SOURCE_SUBDIRS}" STREQUAL "RECURSE") + SET(PATH ".") + IF (${ARGC} EQUAL 4) + LIST(GET ARGV 3 PATH) + ENDIF () + ENDIF() + + IF ("${SOURCE_SUBDIRS}" STREQUAL "RECURSE") + UNSET(${RETURN_VALUE}) + FILE(GLOB SUBDIR_FILES "${PATH}/${PATTERN}") + LIST(APPEND ${RETURN_VALUE} ${SUBDIR_FILES}) + + FILE(GLOB SUBDIRS RELATIVE ${PATH} ${PATH}/*) + + FOREACH(DIR ${SUBDIRS}) + IF (IS_DIRECTORY ${PATH}/${DIR}) + IF (NOT "${DIR}" STREQUAL "CMAKEFILES") + FILE(GLOB_RECURSE SUBDIR_FILES "${PATH}/${DIR}/${PATTERN}") + LIST(APPEND ${RETURN_VALUE} ${SUBDIR_FILES}) + ENDIF() + ENDIF() + ENDFOREACH() + ELSE () + FILE(GLOB ${RETURN_VALUE} "${PATTERN}") + + FOREACH (PATH ${SOURCE_SUBDIRS}) + FILE(GLOB SUBDIR_FILES "${PATH}/${PATTERN}") + LIST(APPEND ${RETURN_VALUE} ${SUBDIR_FILES}) + ENDFOREACH(PATH ${SOURCE_SUBDIRS}) + ENDIF () + + IF (${FILTER_OUT}) + LIST(REMOVE_ITEM ${RETURN_VALUE} ${FILTER_OUT}) + ENDIF() + + SET(${RETURN_VALUE} ${${RETURN_VALUE}} PARENT_SCOPE) +ENDFUNCTION(AUTO_SOURCES) + +FUNCTION(CONTAINS_STRING FILE SEARCH RETURN_VALUE) + FILE(STRINGS ${FILE} FILE_CONTENTS REGEX ".*${SEARCH}.*") + IF (FILE_CONTENTS) + SET(${RETURN_VALUE} TRUE PARENT_SCOPE) + ENDIF() +ENDFUNCTION(CONTAINS_STRING) diff --git a/contrib/libhdfs3-cmake/CMake/Options.cmake b/contrib/libhdfs3-cmake/CMake/Options.cmake new file mode 100644 index 00000000000..5561f3ccc1e --- /dev/null +++ b/contrib/libhdfs3-cmake/CMake/Options.cmake @@ -0,0 +1,169 @@ +OPTION(ENABLE_COVERAGE "enable code coverage" OFF) +OPTION(ENABLE_DEBUG "enable debug build" OFF) +OPTION(ENABLE_SSE "enable SSE4.2 buildin function" ON) +OPTION(ENABLE_FRAME_POINTER "enable frame pointer on 64bit system with flag -fno-omit-frame-pointer, on 32bit system, it is always enabled" ON) +OPTION(ENABLE_LIBCPP "using libc++ instead of libstdc++, only valid for clang compiler" OFF) +OPTION(ENABLE_BOOST "using boost instead of native compiler c++0x support" OFF) + +INCLUDE (CheckFunctionExists) +CHECK_FUNCTION_EXISTS(dladdr HAVE_DLADDR) +CHECK_FUNCTION_EXISTS(nanosleep HAVE_NANOSLEEP) + +IF(ENABLE_DEBUG STREQUAL ON) + SET(CMAKE_BUILD_TYPE Debug CACHE + STRING "Choose the type of build, options are: None Debug Release RelWithDebInfo MinSizeRel." FORCE) + SET(CMAKE_CXX_FLAGS_DEBUG "-g -O0" CACHE STRING "compiler flags for debug" FORCE) + SET(CMAKE_C_FLAGS_DEBUG "-g -O0" CACHE STRING "compiler flags for debug" FORCE) +ELSE(ENABLE_DEBUG STREQUAL ON) + SET(CMAKE_BUILD_TYPE RelWithDebInfo CACHE + STRING "Choose the type of build, options are: None Debug Release RelWithDebInfo MinSizeRel." FORCE) +ENDIF(ENABLE_DEBUG STREQUAL ON) + +SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fno-strict-aliasing") +SET(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -fno-strict-aliasing") + +IF(ENABLE_COVERAGE STREQUAL ON) + INCLUDE(CodeCoverage) +ENDIF(ENABLE_COVERAGE STREQUAL ON) + +IF(ENABLE_FRAME_POINTER STREQUAL ON) + SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fno-omit-frame-pointer") +ENDIF(ENABLE_FRAME_POINTER STREQUAL ON) + +IF(ENABLE_SSE STREQUAL ON) + SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -msse4.2") +ENDIF(ENABLE_SSE STREQUAL ON) + +IF(NOT TEST_HDFS_PREFIX) +SET(TEST_HDFS_PREFIX "./" CACHE STRING "default directory prefix used for test." FORCE) +ENDIF(NOT TEST_HDFS_PREFIX) + +ADD_DEFINITIONS(-DTEST_HDFS_PREFIX="${TEST_HDFS_PREFIX}") +ADD_DEFINITIONS(-D__STDC_FORMAT_MACROS) +ADD_DEFINITIONS(-D_GNU_SOURCE) + +IF(OS_MACOSX AND CMAKE_COMPILER_IS_GNUCXX) + SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wl,-bind_at_load") +ENDIF(OS_MACOSX AND CMAKE_COMPILER_IS_GNUCXX) + +IF(OS_LINUX) + SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wl,--export-dynamic") +ENDIF(OS_LINUX) + +SET(BOOST_ROOT ${CMAKE_PREFIX_PATH}) +IF(ENABLE_BOOST STREQUAL ON) + MESSAGE(STATUS "using boost instead of native compiler c++0x support.") + FIND_PACKAGE(Boost 1.50 REQUIRED) + SET(NEED_BOOST true CACHE INTERNAL "boost is required") +ELSE(ENABLE_BOOST STREQUAL ON) + SET(NEED_BOOST false CACHE INTERNAL "boost is required") +ENDIF(ENABLE_BOOST STREQUAL ON) + +IF(CMAKE_COMPILER_IS_GNUCXX) + IF(ENABLE_LIBCPP STREQUAL ON) + MESSAGE(FATAL_ERROR "Unsupport using GCC compiler with libc++") + ENDIF(ENABLE_LIBCPP STREQUAL ON) + + IF((GCC_COMPILER_VERSION_MAJOR EQUAL 4) AND (GCC_COMPILER_VERSION_MINOR EQUAL 4) AND OS_MACOSX) + SET(NEED_GCCEH true CACHE INTERNAL "Explicitly link with gcc_eh") + MESSAGE(STATUS "link with -lgcc_eh for TLS") + ENDIF((GCC_COMPILER_VERSION_MAJOR EQUAL 4) AND (GCC_COMPILER_VERSION_MINOR EQUAL 4) AND OS_MACOSX) + + IF((GCC_COMPILER_VERSION_MAJOR LESS 4) OR ((GCC_COMPILER_VERSION_MAJOR EQUAL 4) AND (GCC_COMPILER_VERSION_MINOR LESS 4))) + SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wall") + IF(NOT ENABLE_BOOST STREQUAL ON) + MESSAGE(STATUS "gcc version is older than 4.6.0, boost is required.") + FIND_PACKAGE(Boost 1.50 REQUIRED) + SET(NEED_BOOST true CACHE INTERNAL "boost is required") + ENDIF(NOT ENABLE_BOOST STREQUAL ON) + ELSEIF((GCC_COMPILER_VERSION_MAJOR EQUAL 4) AND (GCC_COMPILER_VERSION_MINOR LESS 7)) + IF(NOT ENABLE_BOOST STREQUAL ON) + MESSAGE(STATUS "gcc version is older than 4.6.0, boost is required.") + FIND_PACKAGE(Boost 1.50 REQUIRED) + SET(NEED_BOOST true CACHE INTERNAL "boost is required") + ENDIF(NOT ENABLE_BOOST STREQUAL ON) + MESSAGE(STATUS "adding c++0x support for gcc compiler") + SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++0x") + ELSE((GCC_COMPILER_VERSION_MAJOR LESS 4) OR ((GCC_COMPILER_VERSION_MAJOR EQUAL 4) AND (GCC_COMPILER_VERSION_MINOR LESS 4))) + MESSAGE(STATUS "adding c++0x support for gcc compiler") + SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++0x") + ENDIF((GCC_COMPILER_VERSION_MAJOR LESS 4) OR ((GCC_COMPILER_VERSION_MAJOR EQUAL 4) AND (GCC_COMPILER_VERSION_MINOR LESS 4))) + + IF(NEED_BOOST) + IF((Boost_MAJOR_VERSION LESS 1) OR ((Boost_MAJOR_VERSION EQUAL 1) AND (Boost_MINOR_VERSION LESS 50))) + MESSAGE(FATAL_ERROR "boost 1.50+ is required") + ENDIF() + ELSE(NEED_BOOST) + IF(HAVE_NANOSLEEP) + SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -D_GLIBCXX_USE_NANOSLEEP") + ELSE(HAVE_NANOSLEEP) + MESSAGE(FATAL_ERROR "nanosleep() is required") + ENDIF(HAVE_NANOSLEEP) + ENDIF(NEED_BOOST) + SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wall") +ELSEIF(CMAKE_COMPILER_IS_CLANG) + MESSAGE(STATUS "adding c++0x support for clang compiler") + SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++0x") + SET(CMAKE_XCODE_ATTRIBUTE_CLANG_CXX_LANGUAGE_STANDARD "c++0x") + IF(ENABLE_LIBCPP STREQUAL ON) + MESSAGE(STATUS "using libc++ instead of libstdc++") + SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -stdlib=libc++") + SET(CMAKE_XCODE_ATTRIBUTE_CLANG_CXX_LIBRARY "libc++") + ENDIF(ENABLE_LIBCPP STREQUAL ON) +ENDIF(CMAKE_COMPILER_IS_GNUCXX) + +TRY_COMPILE(STRERROR_R_RETURN_INT + ${CMAKE_CURRENT_BINARY_DIR} + ${HDFS3_ROOT_DIR}/CMake/CMakeTestCompileStrerror.cpp + CMAKE_FLAGS "-DCMAKE_CXX_LINK_EXECUTABLE='echo not linking now...'" + OUTPUT_VARIABLE OUTPUT) + +MESSAGE(STATUS "Checking whether strerror_r returns an int") + +IF(STRERROR_R_RETURN_INT) + MESSAGE(STATUS "Checking whether strerror_r returns an int -- yes") +ELSE(STRERROR_R_RETURN_INT) + MESSAGE(STATUS "Checking whether strerror_r returns an int -- no") +ENDIF(STRERROR_R_RETURN_INT) + +TRY_COMPILE(HAVE_STEADY_CLOCK + ${CMAKE_CURRENT_BINARY_DIR} + ${HDFS3_ROOT_DIR}/CMake/CMakeTestCompileSteadyClock.cpp + CMAKE_FLAGS "-DCMAKE_CXX_LINK_EXECUTABLE='echo not linking now...'" + OUTPUT_VARIABLE OUTPUT) + +TRY_COMPILE(HAVE_NESTED_EXCEPTION + ${CMAKE_CURRENT_BINARY_DIR} + ${HDFS3_ROOT_DIR}/CMake/CMakeTestCompileNestedException.cpp + CMAKE_FLAGS "-DCMAKE_CXX_LINK_EXECUTABLE='echo not linking now...'" + OUTPUT_VARIABLE OUTPUT) + +FILE(WRITE ${CMAKE_CURRENT_BINARY_DIR}/test.cpp "#include ") +TRY_COMPILE(HAVE_BOOST_CHRONO + ${CMAKE_CURRENT_BINARY_DIR} + ${CMAKE_CURRENT_BINARY_DIR}/test.cpp + CMAKE_FLAGS "-DCMAKE_CXX_LINK_EXECUTABLE='echo not linking now...'" + -DINCLUDE_DIRECTORIES=${Boost_INCLUDE_DIR} + OUTPUT_VARIABLE OUTPUT) + +FILE(WRITE ${CMAKE_CURRENT_BINARY_DIR}/test.cpp "#include ") +TRY_COMPILE(HAVE_STD_CHRONO + ${CMAKE_CURRENT_BINARY_DIR} + ${CMAKE_CURRENT_BINARY_DIR}/test.cpp + CMAKE_FLAGS "-DCMAKE_CXX_LINK_EXECUTABLE='echo not linking now...'" + OUTPUT_VARIABLE OUTPUT) + +FILE(WRITE ${CMAKE_CURRENT_BINARY_DIR}/test.cpp "#include ") +TRY_COMPILE(HAVE_BOOST_ATOMIC + ${CMAKE_CURRENT_BINARY_DIR} + ${CMAKE_CURRENT_BINARY_DIR}/test.cpp + CMAKE_FLAGS "-DCMAKE_CXX_LINK_EXECUTABLE='echo not linking now...'" + -DINCLUDE_DIRECTORIES=${Boost_INCLUDE_DIR} + OUTPUT_VARIABLE OUTPUT) + +FILE(WRITE ${CMAKE_CURRENT_BINARY_DIR}/test.cpp "#include ") +TRY_COMPILE(HAVE_STD_ATOMIC + ${CMAKE_CURRENT_BINARY_DIR} + ${CMAKE_CURRENT_BINARY_DIR}/test.cpp + CMAKE_FLAGS "-DCMAKE_CXX_LINK_EXECUTABLE='echo not linking now...'" + OUTPUT_VARIABLE OUTPUT) diff --git a/contrib/libhdfs3-cmake/CMake/Platform.cmake b/contrib/libhdfs3-cmake/CMake/Platform.cmake new file mode 100644 index 00000000000..55fbf646589 --- /dev/null +++ b/contrib/libhdfs3-cmake/CMake/Platform.cmake @@ -0,0 +1,33 @@ +IF(CMAKE_SYSTEM_NAME STREQUAL "Linux") + SET(OS_LINUX true CACHE INTERNAL "Linux operating system") +ELSEIF(CMAKE_SYSTEM_NAME STREQUAL "Darwin") + SET(OS_MACOSX true CACHE INTERNAL "Mac Darwin operating system") +ELSE(CMAKE_SYSTEM_NAME STREQUAL "Linux") + MESSAGE(FATAL_ERROR "Unsupported OS: \"${CMAKE_SYSTEM_NAME}\"") +ENDIF(CMAKE_SYSTEM_NAME STREQUAL "Linux") + +IF(CMAKE_COMPILER_IS_GNUCXX) + EXECUTE_PROCESS(COMMAND ${CMAKE_CXX_COMPILER} -dumpversion OUTPUT_VARIABLE GCC_COMPILER_VERSION) + + IF (NOT GCC_COMPILER_VERSION) + MESSAGE(FATAL_ERROR "Cannot get gcc version") + ENDIF (NOT GCC_COMPILER_VERSION) + + STRING(REGEX MATCHALL "[0-9]+" GCC_COMPILER_VERSION ${GCC_COMPILER_VERSION}) + + LIST(GET GCC_COMPILER_VERSION 0 GCC_COMPILER_VERSION_MAJOR) + LIST(GET GCC_COMPILER_VERSION 0 GCC_COMPILER_VERSION_MINOR) + + SET(GCC_COMPILER_VERSION_MAJOR ${GCC_COMPILER_VERSION_MAJOR} CACHE INTERNAL "gcc major version") + SET(GCC_COMPILER_VERSION_MINOR ${GCC_COMPILER_VERSION_MINOR} CACHE INTERNAL "gcc minor version") + + MESSAGE(STATUS "checking compiler: GCC (${GCC_COMPILER_VERSION_MAJOR}.${GCC_COMPILER_VERSION_MINOR}.${GCC_COMPILER_VERSION_PATCH})") +ELSE(CMAKE_COMPILER_IS_GNUCXX) + EXECUTE_PROCESS(COMMAND ${CMAKE_C_COMPILER} --version OUTPUT_VARIABLE COMPILER_OUTPUT) + IF(COMPILER_OUTPUT MATCHES "clang") + SET(CMAKE_COMPILER_IS_CLANG true CACHE INTERNAL "using clang as compiler") + MESSAGE(STATUS "checking compiler: CLANG") + ELSE(COMPILER_OUTPUT MATCHES "clang") + MESSAGE(FATAL_ERROR "Unsupported compiler: \"${CMAKE_CXX_COMPILER}\"") + ENDIF(COMPILER_OUTPUT MATCHES "clang") +ENDIF(CMAKE_COMPILER_IS_GNUCXX) diff --git a/contrib/libhdfs3-cmake/CMakeLists.txt b/contrib/libhdfs3-cmake/CMakeLists.txt new file mode 100644 index 00000000000..1e9e36ecd08 --- /dev/null +++ b/contrib/libhdfs3-cmake/CMakeLists.txt @@ -0,0 +1,212 @@ +if (NOT USE_INTERNAL_PROTOBUF_LIBRARY) + # compatiable with protobuf which was compiled old C++ ABI + set(CMAKE_CXX_FLAGS "-D_GLIBCXX_USE_CXX11_ABI=0") + set(CMAKE_C_FLAGS "") + if (NOT (CMAKE_VERSION VERSION_LESS "3.8.0")) + unset(CMAKE_CXX_STANDARD) + endif () +endif() + +SET(WITH_KERBEROS false) +# project and source dir +set(HDFS3_ROOT_DIR ${CMAKE_SOURCE_DIR}/contrib/libhdfs3) +set(HDFS3_SOURCE_DIR ${HDFS3_ROOT_DIR}/src) +set(HDFS3_COMMON_DIR ${HDFS3_SOURCE_DIR}/common) + +# module +set(CMAKE_MODULE_PATH "${CMAKE_CURRENT_SOURCE_DIR}/CMake" ${CMAKE_MODULE_PATH}) +include(Platform) +include(Options) + +# prefer shared libraries +if (WITH_KERBEROS) + find_package(KERBEROS REQUIRED) +endif() + +# source +set(PROTO_FILES + #${HDFS3_SOURCE_DIR}/proto/encryption.proto + ${HDFS3_SOURCE_DIR}/proto/ClientDatanodeProtocol.proto + ${HDFS3_SOURCE_DIR}/proto/hdfs.proto + ${HDFS3_SOURCE_DIR}/proto/Security.proto + ${HDFS3_SOURCE_DIR}/proto/ProtobufRpcEngine.proto + ${HDFS3_SOURCE_DIR}/proto/ClientNamenodeProtocol.proto + ${HDFS3_SOURCE_DIR}/proto/IpcConnectionContext.proto + ${HDFS3_SOURCE_DIR}/proto/RpcHeader.proto + ${HDFS3_SOURCE_DIR}/proto/datatransfer.proto + ) + +PROTOBUF_GENERATE_CPP(PROTO_SOURCES PROTO_HEADERS ${PROTO_FILES}) + +configure_file(${HDFS3_SOURCE_DIR}/platform.h.in ${CMAKE_CURRENT_BINARY_DIR}/platform.h) + +set(SRCS + ${HDFS3_SOURCE_DIR}/network/TcpSocket.cpp + ${HDFS3_SOURCE_DIR}/network/DomainSocket.cpp + ${HDFS3_SOURCE_DIR}/network/BufferedSocketReader.cpp + ${HDFS3_SOURCE_DIR}/client/ReadShortCircuitInfo.cpp + ${HDFS3_SOURCE_DIR}/client/Pipeline.cpp + ${HDFS3_SOURCE_DIR}/client/Hdfs.cpp + ${HDFS3_SOURCE_DIR}/client/Packet.cpp + ${HDFS3_SOURCE_DIR}/client/OutputStreamImpl.cpp + ${HDFS3_SOURCE_DIR}/client/KerberosName.cpp + ${HDFS3_SOURCE_DIR}/client/PacketHeader.cpp + ${HDFS3_SOURCE_DIR}/client/LocalBlockReader.cpp + ${HDFS3_SOURCE_DIR}/client/UserInfo.cpp + ${HDFS3_SOURCE_DIR}/client/RemoteBlockReader.cpp + ${HDFS3_SOURCE_DIR}/client/Permission.cpp + ${HDFS3_SOURCE_DIR}/client/FileSystemImpl.cpp + ${HDFS3_SOURCE_DIR}/client/DirectoryIterator.cpp + ${HDFS3_SOURCE_DIR}/client/FileSystemKey.cpp + ${HDFS3_SOURCE_DIR}/client/DataTransferProtocolSender.cpp + ${HDFS3_SOURCE_DIR}/client/LeaseRenewer.cpp + ${HDFS3_SOURCE_DIR}/client/PeerCache.cpp + ${HDFS3_SOURCE_DIR}/client/InputStream.cpp + ${HDFS3_SOURCE_DIR}/client/FileSystem.cpp + ${HDFS3_SOURCE_DIR}/client/InputStreamImpl.cpp + ${HDFS3_SOURCE_DIR}/client/Token.cpp + ${HDFS3_SOURCE_DIR}/client/PacketPool.cpp + ${HDFS3_SOURCE_DIR}/client/OutputStream.cpp + ${HDFS3_SOURCE_DIR}/rpc/RpcChannelKey.cpp + ${HDFS3_SOURCE_DIR}/rpc/RpcProtocolInfo.cpp + ${HDFS3_SOURCE_DIR}/rpc/RpcClient.cpp + ${HDFS3_SOURCE_DIR}/rpc/RpcRemoteCall.cpp + ${HDFS3_SOURCE_DIR}/rpc/RpcChannel.cpp + ${HDFS3_SOURCE_DIR}/rpc/RpcAuth.cpp + ${HDFS3_SOURCE_DIR}/rpc/RpcContentWrapper.cpp + ${HDFS3_SOURCE_DIR}/rpc/RpcConfig.cpp + ${HDFS3_SOURCE_DIR}/rpc/RpcServerInfo.cpp + ${HDFS3_SOURCE_DIR}/rpc/SaslClient.cpp + ${HDFS3_SOURCE_DIR}/server/Datanode.cpp + ${HDFS3_SOURCE_DIR}/server/LocatedBlocks.cpp + ${HDFS3_SOURCE_DIR}/server/NamenodeProxy.cpp + ${HDFS3_SOURCE_DIR}/server/NamenodeImpl.cpp + ${HDFS3_SOURCE_DIR}/server/NamenodeInfo.cpp + ${HDFS3_SOURCE_DIR}/common/WritableUtils.cpp + ${HDFS3_SOURCE_DIR}/common/ExceptionInternal.cpp + ${HDFS3_SOURCE_DIR}/common/SessionConfig.cpp + ${HDFS3_SOURCE_DIR}/common/StackPrinter.cpp + ${HDFS3_SOURCE_DIR}/common/Exception.cpp + ${HDFS3_SOURCE_DIR}/common/Logger.cpp + ${HDFS3_SOURCE_DIR}/common/CFileWrapper.cpp + ${HDFS3_SOURCE_DIR}/common/XmlConfig.cpp + ${HDFS3_SOURCE_DIR}/common/WriteBuffer.cpp + ${HDFS3_SOURCE_DIR}/common/HWCrc32c.cpp + ${HDFS3_SOURCE_DIR}/common/MappedFileWrapper.cpp + ${HDFS3_SOURCE_DIR}/common/Hash.cpp + ${HDFS3_SOURCE_DIR}/common/SWCrc32c.cpp + ${HDFS3_SOURCE_DIR}/common/Thread.cpp + + ${HDFS3_SOURCE_DIR}/network/TcpSocket.h + ${HDFS3_SOURCE_DIR}/network/BufferedSocketReader.h + ${HDFS3_SOURCE_DIR}/network/Socket.h + ${HDFS3_SOURCE_DIR}/network/DomainSocket.h + ${HDFS3_SOURCE_DIR}/network/Syscall.h + ${HDFS3_SOURCE_DIR}/client/InputStreamImpl.h + ${HDFS3_SOURCE_DIR}/client/FileSystem.h + ${HDFS3_SOURCE_DIR}/client/ReadShortCircuitInfo.h + ${HDFS3_SOURCE_DIR}/client/InputStreamInter.h + ${HDFS3_SOURCE_DIR}/client/FileSystemImpl.h + ${HDFS3_SOURCE_DIR}/client/PacketPool.h + ${HDFS3_SOURCE_DIR}/client/Pipeline.h + ${HDFS3_SOURCE_DIR}/client/OutputStreamInter.h + ${HDFS3_SOURCE_DIR}/client/RemoteBlockReader.h + ${HDFS3_SOURCE_DIR}/client/Token.h + ${HDFS3_SOURCE_DIR}/client/KerberosName.h + ${HDFS3_SOURCE_DIR}/client/DirectoryIterator.h + ${HDFS3_SOURCE_DIR}/client/hdfs.h + ${HDFS3_SOURCE_DIR}/client/FileSystemStats.h + ${HDFS3_SOURCE_DIR}/client/FileSystemKey.h + ${HDFS3_SOURCE_DIR}/client/DataTransferProtocolSender.h + ${HDFS3_SOURCE_DIR}/client/Packet.h + ${HDFS3_SOURCE_DIR}/client/PacketHeader.h + ${HDFS3_SOURCE_DIR}/client/FileSystemInter.h + ${HDFS3_SOURCE_DIR}/client/LocalBlockReader.h + ${HDFS3_SOURCE_DIR}/client/TokenInternal.h + ${HDFS3_SOURCE_DIR}/client/InputStream.h + ${HDFS3_SOURCE_DIR}/client/PipelineAck.h + ${HDFS3_SOURCE_DIR}/client/BlockReader.h + ${HDFS3_SOURCE_DIR}/client/Permission.h + ${HDFS3_SOURCE_DIR}/client/OutputStreamImpl.h + ${HDFS3_SOURCE_DIR}/client/LeaseRenewer.h + ${HDFS3_SOURCE_DIR}/client/UserInfo.h + ${HDFS3_SOURCE_DIR}/client/PeerCache.h + ${HDFS3_SOURCE_DIR}/client/OutputStream.h + ${HDFS3_SOURCE_DIR}/client/FileStatus.h + ${HDFS3_SOURCE_DIR}/client/DataTransferProtocol.h + ${HDFS3_SOURCE_DIR}/client/BlockLocation.h + ${HDFS3_SOURCE_DIR}/rpc/RpcConfig.h + ${HDFS3_SOURCE_DIR}/rpc/SaslClient.h + ${HDFS3_SOURCE_DIR}/rpc/RpcAuth.h + ${HDFS3_SOURCE_DIR}/rpc/RpcClient.h + ${HDFS3_SOURCE_DIR}/rpc/RpcCall.h + ${HDFS3_SOURCE_DIR}/rpc/RpcContentWrapper.h + ${HDFS3_SOURCE_DIR}/rpc/RpcProtocolInfo.h + ${HDFS3_SOURCE_DIR}/rpc/RpcRemoteCall.h + ${HDFS3_SOURCE_DIR}/rpc/RpcServerInfo.h + ${HDFS3_SOURCE_DIR}/rpc/RpcChannel.h + ${HDFS3_SOURCE_DIR}/rpc/RpcChannelKey.h + ${HDFS3_SOURCE_DIR}/server/BlockLocalPathInfo.h + ${HDFS3_SOURCE_DIR}/server/LocatedBlocks.h + ${HDFS3_SOURCE_DIR}/server/DatanodeInfo.h + ${HDFS3_SOURCE_DIR}/server/RpcHelper.h + ${HDFS3_SOURCE_DIR}/server/ExtendedBlock.h + ${HDFS3_SOURCE_DIR}/server/NamenodeInfo.h + ${HDFS3_SOURCE_DIR}/server/NamenodeImpl.h + ${HDFS3_SOURCE_DIR}/server/LocatedBlock.h + ${HDFS3_SOURCE_DIR}/server/NamenodeProxy.h + ${HDFS3_SOURCE_DIR}/server/Datanode.h + ${HDFS3_SOURCE_DIR}/server/Namenode.h + ${HDFS3_SOURCE_DIR}/common/XmlConfig.h + ${HDFS3_SOURCE_DIR}/common/Logger.h + ${HDFS3_SOURCE_DIR}/common/WriteBuffer.h + ${HDFS3_SOURCE_DIR}/common/HWCrc32c.h + ${HDFS3_SOURCE_DIR}/common/Checksum.h + ${HDFS3_SOURCE_DIR}/common/SessionConfig.h + ${HDFS3_SOURCE_DIR}/common/Unordered.h + ${HDFS3_SOURCE_DIR}/common/BigEndian.h + ${HDFS3_SOURCE_DIR}/common/Thread.h + ${HDFS3_SOURCE_DIR}/common/StackPrinter.h + ${HDFS3_SOURCE_DIR}/common/Exception.h + ${HDFS3_SOURCE_DIR}/common/WritableUtils.h + ${HDFS3_SOURCE_DIR}/common/StringUtil.h + ${HDFS3_SOURCE_DIR}/common/LruMap.h + ${HDFS3_SOURCE_DIR}/common/Function.h + ${HDFS3_SOURCE_DIR}/common/DateTime.h + ${HDFS3_SOURCE_DIR}/common/Hash.h + ${HDFS3_SOURCE_DIR}/common/SWCrc32c.h + ${HDFS3_SOURCE_DIR}/common/ExceptionInternal.h + ${HDFS3_SOURCE_DIR}/common/Memory.h + ${HDFS3_SOURCE_DIR}/common/FileWrapper.h + ) + +# target +add_library(hdfs3 STATIC ${SRCS} ${PROTO_SOURCES} ${PROTO_HEADERS}) + +if (USE_INTERNAL_PROTOBUF_LIBRARY) + add_dependencies(hdfs3 protoc) +endif() + +target_include_directories(hdfs3 PRIVATE ${HDFS3_SOURCE_DIR}) +target_include_directories(hdfs3 PRIVATE ${HDFS3_COMMON_DIR}) +target_include_directories(hdfs3 PRIVATE ${CMAKE_CURRENT_BINARY_DIR}) + +target_include_directories(hdfs3 PRIVATE ${LIBGSASL_INCLUDE_DIR}) +if (WITH_KERBEROS) + target_include_directories(hdfs3 PRIVATE ${KERBEROS_INCLUDE_DIRS}) +endif() +target_include_directories(hdfs3 PRIVATE ${LIBXML2_INCLUDE_DIR}) + +target_link_libraries(hdfs3 ${LIBGSASL_LIBRARY}) +if (WITH_KERBEROS) + target_link_libraries(hdfs3 ${KERBEROS_LIBRARIES}) +endif() +target_link_libraries(hdfs3 ${LIBXML2_LIBRARY}) + +# inherit from parent cmake +target_include_directories(hdfs3 PRIVATE ${Boost_INCLUDE_DIRS}) +target_include_directories(hdfs3 PRIVATE ${Protobuf_INCLUDE_DIR}) +target_include_directories(hdfs3 PRIVATE ${OPENSSL_INCLUDE_DIR}) + +target_link_libraries(hdfs3 ${Protobuf_LIBRARY}) +target_link_libraries(hdfs3 ${OPENSSL_LIBRARIES}) diff --git a/contrib/libxml2 b/contrib/libxml2 new file mode 160000 index 00000000000..18890f471c4 --- /dev/null +++ b/contrib/libxml2 @@ -0,0 +1 @@ +Subproject commit 18890f471c420411aa3c989e104d090966ec9dbf diff --git a/contrib/libxml2-cmake/CMakeLists.txt b/contrib/libxml2-cmake/CMakeLists.txt new file mode 100644 index 00000000000..c4b7f39cc8f --- /dev/null +++ b/contrib/libxml2-cmake/CMakeLists.txt @@ -0,0 +1,63 @@ +set(LIBXML2_SOURCE_DIR ${CMAKE_SOURCE_DIR}/contrib/libxml2) +set(LIBXML2_BINARY_DIR ${CMAKE_BINARY_DIR}/contrib/libxml2) + + +set(SRCS + ${LIBXML2_SOURCE_DIR}/parser.c + ${LIBXML2_SOURCE_DIR}/HTMLparser.c + ${LIBXML2_SOURCE_DIR}/buf.c + ${LIBXML2_SOURCE_DIR}/xzlib.c + ${LIBXML2_SOURCE_DIR}/xmlregexp.c + ${LIBXML2_SOURCE_DIR}/entities.c + ${LIBXML2_SOURCE_DIR}/rngparser.c + ${LIBXML2_SOURCE_DIR}/encoding.c + ${LIBXML2_SOURCE_DIR}/legacy.c + ${LIBXML2_SOURCE_DIR}/error.c + ${LIBXML2_SOURCE_DIR}/debugXML.c + ${LIBXML2_SOURCE_DIR}/xpointer.c + ${LIBXML2_SOURCE_DIR}/DOCBparser.c + ${LIBXML2_SOURCE_DIR}/xmlcatalog.c + ${LIBXML2_SOURCE_DIR}/c14n.c + ${LIBXML2_SOURCE_DIR}/xmlreader.c + ${LIBXML2_SOURCE_DIR}/xmlstring.c + ${LIBXML2_SOURCE_DIR}/dict.c + ${LIBXML2_SOURCE_DIR}/xpath.c + ${LIBXML2_SOURCE_DIR}/tree.c + ${LIBXML2_SOURCE_DIR}/trionan.c + ${LIBXML2_SOURCE_DIR}/pattern.c + ${LIBXML2_SOURCE_DIR}/globals.c + ${LIBXML2_SOURCE_DIR}/xmllint.c + ${LIBXML2_SOURCE_DIR}/chvalid.c + ${LIBXML2_SOURCE_DIR}/relaxng.c + ${LIBXML2_SOURCE_DIR}/list.c + ${LIBXML2_SOURCE_DIR}/xinclude.c + ${LIBXML2_SOURCE_DIR}/xmlIO.c + ${LIBXML2_SOURCE_DIR}/triostr.c + ${LIBXML2_SOURCE_DIR}/hash.c + ${LIBXML2_SOURCE_DIR}/xmlsave.c + ${LIBXML2_SOURCE_DIR}/HTMLtree.c + ${LIBXML2_SOURCE_DIR}/SAX.c + ${LIBXML2_SOURCE_DIR}/xmlschemas.c + ${LIBXML2_SOURCE_DIR}/SAX2.c + ${LIBXML2_SOURCE_DIR}/threads.c + ${LIBXML2_SOURCE_DIR}/runsuite.c + ${LIBXML2_SOURCE_DIR}/catalog.c + ${LIBXML2_SOURCE_DIR}/uri.c + ${LIBXML2_SOURCE_DIR}/xmlmodule.c + ${LIBXML2_SOURCE_DIR}/xlink.c + ${LIBXML2_SOURCE_DIR}/parserInternals.c + ${LIBXML2_SOURCE_DIR}/xmlwriter.c + ${LIBXML2_SOURCE_DIR}/xmlunicode.c + ${LIBXML2_SOURCE_DIR}/runxmlconf.c + ${LIBXML2_SOURCE_DIR}/xmlmemory.c + ${LIBXML2_SOURCE_DIR}/nanoftp.c + ${LIBXML2_SOURCE_DIR}/xmlschemastypes.c + ${LIBXML2_SOURCE_DIR}/valid.c + ${LIBXML2_SOURCE_DIR}/nanohttp.c + ${LIBXML2_SOURCE_DIR}/schematron.c +) +add_library(libxml2 STATIC ${SRCS}) + +target_include_directories(libxml2 PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}/linux_x86_64/include) + +target_include_directories(libxml2 PUBLIC ${LIBXML2_SOURCE_DIR}/include) diff --git a/contrib/libxml2-cmake/linux_x86_64/include/config.h b/contrib/libxml2-cmake/linux_x86_64/include/config.h new file mode 100644 index 00000000000..7969b377dc3 --- /dev/null +++ b/contrib/libxml2-cmake/linux_x86_64/include/config.h @@ -0,0 +1,285 @@ +/* config.h. Generated from config.h.in by configure. */ +/* config.h.in. Generated from configure.ac by autoheader. */ + +/* Type cast for the gethostbyname() argument */ +#define GETHOSTBYNAME_ARG_CAST /**/ + +/* Define to 1 if you have the header file. */ +#define HAVE_ARPA_INET_H 1 + +/* Define to 1 if you have the header file. */ +#define HAVE_ARPA_NAMESER_H 1 + +/* Whether struct sockaddr::__ss_family exists */ +/* #undef HAVE_BROKEN_SS_FAMILY */ + +/* Define to 1 if you have the header file. */ +#define HAVE_CTYPE_H 1 + +/* Define to 1 if you have the header file. */ +#define HAVE_DIRENT_H 1 + +/* Define to 1 if you have the header file. */ +#define HAVE_DLFCN_H 1 + +/* Have dlopen based dso */ +#define HAVE_DLOPEN /**/ + +/* Define to 1 if you have the header file. */ +/* #undef HAVE_DL_H */ + +/* Define to 1 if you have the header file. */ +#define HAVE_ERRNO_H 1 + +/* Define to 1 if you have the header file. */ +#define HAVE_FCNTL_H 1 + +/* Define to 1 if you have the header file. */ +#define HAVE_FLOAT_H 1 + +/* Define to 1 if you have the `fprintf' function. */ +#define HAVE_FPRINTF 1 + +/* Define to 1 if you have the `ftime' function. */ +#define HAVE_FTIME 1 + +/* Define if getaddrinfo is there */ +#define HAVE_GETADDRINFO /**/ + +/* Define to 1 if you have the `gettimeofday' function. */ +#define HAVE_GETTIMEOFDAY 1 + +/* Define to 1 if you have the header file. */ +#define HAVE_INTTYPES_H 1 + +/* Define to 1 if you have the `isascii' function. */ +#define HAVE_ISASCII 1 + +/* Define if isinf is there */ +#define HAVE_ISINF /**/ + +/* Define if isnan is there */ +#define HAVE_ISNAN /**/ + +/* Define if history library is there (-lhistory) */ +/* #undef HAVE_LIBHISTORY */ + +/* Define if pthread library is there (-lpthread) */ +#define HAVE_LIBPTHREAD /**/ + +/* Define if readline library is there (-lreadline) */ +/* #undef HAVE_LIBREADLINE */ + +/* Define to 1 if you have the header file. */ +#define HAVE_LIMITS_H 1 + +/* Define to 1 if you have the `localtime' function. */ +#define HAVE_LOCALTIME 1 + +/* Define to 1 if you have the header file. */ +/* #undef HAVE_LZMA_H */ + +/* Define to 1 if you have the header file. */ +#define HAVE_MALLOC_H 1 + +/* Define to 1 if you have the header file. */ +#define HAVE_MATH_H 1 + +/* Define to 1 if you have the header file. */ +#define HAVE_MEMORY_H 1 + +/* Define to 1 if you have the `mmap' function. */ +#define HAVE_MMAP 1 + +/* Define to 1 if you have the `munmap' function. */ +#define HAVE_MUNMAP 1 + +/* mmap() is no good without munmap() */ +#if defined(HAVE_MMAP) && !defined(HAVE_MUNMAP) +# undef /**/ HAVE_MMAP +#endif + +/* Define to 1 if you have the header file, and it defines `DIR'. */ +/* #undef HAVE_NDIR_H */ + +/* Define to 1 if you have the header file. */ +#define HAVE_NETDB_H 1 + +/* Define to 1 if you have the header file. */ +#define HAVE_NETINET_IN_H 1 + +/* Define to 1 if you have the header file. */ +#define HAVE_POLL_H 1 + +/* Define to 1 if you have the `printf' function. */ +#define HAVE_PRINTF 1 + +/* Define if is there */ +#define HAVE_PTHREAD_H /**/ + +/* Define to 1 if you have the `putenv' function. */ +#define HAVE_PUTENV 1 + +/* Define to 1 if you have the `rand' function. */ +#define HAVE_RAND 1 + +/* Define to 1 if you have the `rand_r' function. */ +#define HAVE_RAND_R 1 + +/* Define to 1 if you have the header file. */ +#define HAVE_RESOLV_H 1 + +/* Have shl_load based dso */ +/* #undef HAVE_SHLLOAD */ + +/* Define to 1 if you have the `signal' function. */ +#define HAVE_SIGNAL 1 + +/* Define to 1 if you have the header file. */ +#define HAVE_SIGNAL_H 1 + +/* Define to 1 if you have the `snprintf' function. */ +#define HAVE_SNPRINTF 1 + +/* Define to 1 if you have the `sprintf' function. */ +#define HAVE_SPRINTF 1 + +/* Define to 1 if you have the `srand' function. */ +#define HAVE_SRAND 1 + +/* Define to 1 if you have the `sscanf' function. */ +#define HAVE_SSCANF 1 + +/* Define to 1 if you have the `stat' function. */ +#define HAVE_STAT 1 + +/* Define to 1 if you have the header file. */ +#define HAVE_STDARG_H 1 + +/* Define to 1 if you have the header file. */ +#define HAVE_STDINT_H 1 + +/* Define to 1 if you have the header file. */ +#define HAVE_STDLIB_H 1 + +/* Define to 1 if you have the `strftime' function. */ +#define HAVE_STRFTIME 1 + +/* Define to 1 if you have the header file. */ +#define HAVE_STRINGS_H 1 + +/* Define to 1 if you have the header file. */ +#define HAVE_STRING_H 1 + +/* Define to 1 if you have the header file, and it defines `DIR'. + */ +/* #undef HAVE_SYS_DIR_H */ + +/* Define to 1 if you have the header file. */ +#define HAVE_SYS_MMAN_H 1 + +/* Define to 1 if you have the header file, and it defines `DIR'. + */ +/* #undef HAVE_SYS_NDIR_H */ + +/* Define to 1 if you have the header file. */ +#define HAVE_SYS_SELECT_H 1 + +/* Define to 1 if you have the header file. */ +#define HAVE_SYS_SOCKET_H 1 + +/* Define to 1 if you have the header file. */ +#define HAVE_SYS_STAT_H 1 + +/* Define to 1 if you have the header file. */ +#define HAVE_SYS_TIMEB_H 1 + +/* Define to 1 if you have the header file. */ +#define HAVE_SYS_TIME_H 1 + +/* Define to 1 if you have the header file. */ +#define HAVE_SYS_TYPES_H 1 + +/* Define to 1 if you have the `time' function. */ +#define HAVE_TIME 1 + +/* Define to 1 if you have the header file. */ +#define HAVE_TIME_H 1 + +/* Define to 1 if you have the header file. */ +#define HAVE_UNISTD_H 1 + +/* Whether va_copy() is available */ +#define HAVE_VA_COPY 1 + +/* Define to 1 if you have the `vfprintf' function. */ +#define HAVE_VFPRINTF 1 + +/* Define to 1 if you have the `vsnprintf' function. */ +#define HAVE_VSNPRINTF 1 + +/* Define to 1 if you have the `vsprintf' function. */ +#define HAVE_VSPRINTF 1 + +/* Define to 1 if you have the header file. */ +/* #undef HAVE_ZLIB_H */ + +/* Whether __va_copy() is available */ +/* #undef HAVE___VA_COPY */ + +/* Define as const if the declaration of iconv() needs const. */ +#define ICONV_CONST + +/* Define to the sub-directory where libtool stores uninstalled libraries. */ +#define LT_OBJDIR ".libs/" + +/* Name of package */ +#define PACKAGE "libxml2" + +/* Define to the address where bug reports for this package should be sent. */ +#define PACKAGE_BUGREPORT "" + +/* Define to the full name of this package. */ +#define PACKAGE_NAME "" + +/* Define to the full name and version of this package. */ +#define PACKAGE_STRING "" + +/* Define to the one symbol short name of this package. */ +#define PACKAGE_TARNAME "" + +/* Define to the home page for this package. */ +#define PACKAGE_URL "" + +/* Define to the version of this package. */ +#define PACKAGE_VERSION "" + +/* Type cast for the send() function 2nd arg */ +#define SEND_ARG2_CAST /**/ + +/* Define to 1 if you have the ANSI C header files. */ +#define STDC_HEADERS 1 + +/* Support for IPv6 */ +#define SUPPORT_IP6 /**/ + +/* Define if va_list is an array type */ +#define VA_LIST_IS_ARRAY 1 + +/* Version number of package */ +#define VERSION "2.9.8" + +/* Determine what socket length (socklen_t) data type is */ +#define XML_SOCKLEN_T socklen_t + +/* Define for Solaris 2.5.1 so the uint32_t typedef from , + , or is not used. If the typedef were allowed, the + #define below would cause a syntax error. */ +/* #undef _UINT32_T */ + +/* ss_family is not defined here, use __ss_family instead */ +/* #undef ss_family */ + +/* Define to the type of an unsigned integer type of width exactly 32 bits if + such a type exists and the standard includes do not define it. */ +/* #undef uint32_t */ diff --git a/contrib/libxml2-cmake/linux_x86_64/include/libxml/xmlversion.h b/contrib/libxml2-cmake/linux_x86_64/include/libxml/xmlversion.h new file mode 100644 index 00000000000..92d3414fdac --- /dev/null +++ b/contrib/libxml2-cmake/linux_x86_64/include/libxml/xmlversion.h @@ -0,0 +1,481 @@ +/* + * Summary: compile-time version informations + * Description: compile-time version informations for the XML library + * + * Copy: See Copyright for the status of this software. + * + * Author: Daniel Veillard + */ + +#ifndef __XML_VERSION_H__ +#define __XML_VERSION_H__ + +#include + +#ifdef __cplusplus +extern "C" { +#endif + +/* + * use those to be sure nothing nasty will happen if + * your library and includes mismatch + */ +#ifndef LIBXML2_COMPILING_MSCCDEF +XMLPUBFUN void XMLCALL xmlCheckVersion(int version); +#endif /* LIBXML2_COMPILING_MSCCDEF */ + +/** + * LIBXML_DOTTED_VERSION: + * + * the version string like "1.2.3" + */ +#define LIBXML_DOTTED_VERSION "2.9.8" + +/** + * LIBXML_VERSION: + * + * the version number: 1.2.3 value is 10203 + */ +#define LIBXML_VERSION 20908 + +/** + * LIBXML_VERSION_STRING: + * + * the version number string, 1.2.3 value is "10203" + */ +#define LIBXML_VERSION_STRING "20908" + +/** + * LIBXML_VERSION_EXTRA: + * + * extra version information, used to show a CVS compilation + */ +#define LIBXML_VERSION_EXTRA "-GITv2.9.9-rc2-1-g6fc04d71" + +/** + * LIBXML_TEST_VERSION: + * + * Macro to check that the libxml version in use is compatible with + * the version the software has been compiled against + */ +#define LIBXML_TEST_VERSION xmlCheckVersion(20908); + +#ifndef VMS +#if 0 +/** + * WITH_TRIO: + * + * defined if the trio support need to be configured in + */ +#define WITH_TRIO +#else +/** + * WITHOUT_TRIO: + * + * defined if the trio support should not be configured in + */ +#define WITHOUT_TRIO +#endif +#else /* VMS */ +/** + * WITH_TRIO: + * + * defined if the trio support need to be configured in + */ +#define WITH_TRIO 1 +#endif /* VMS */ + +/** + * LIBXML_THREAD_ENABLED: + * + * Whether the thread support is configured in + */ +#define LIBXML_THREAD_ENABLED 1 + +/** + * LIBXML_THREAD_ALLOC_ENABLED: + * + * Whether the allocation hooks are per-thread + */ +#if 0 +#define LIBXML_THREAD_ALLOC_ENABLED +#endif + +/** + * LIBXML_TREE_ENABLED: + * + * Whether the DOM like tree manipulation API support is configured in + */ +#if 1 +#define LIBXML_TREE_ENABLED +#endif + +/** + * LIBXML_OUTPUT_ENABLED: + * + * Whether the serialization/saving support is configured in + */ +#if 1 +#define LIBXML_OUTPUT_ENABLED +#endif + +/** + * LIBXML_PUSH_ENABLED: + * + * Whether the push parsing interfaces are configured in + */ +#if 1 +#define LIBXML_PUSH_ENABLED +#endif + +/** + * LIBXML_READER_ENABLED: + * + * Whether the xmlReader parsing interface is configured in + */ +#if 1 +#define LIBXML_READER_ENABLED +#endif + +/** + * LIBXML_PATTERN_ENABLED: + * + * Whether the xmlPattern node selection interface is configured in + */ +#if 1 +#define LIBXML_PATTERN_ENABLED +#endif + +/** + * LIBXML_WRITER_ENABLED: + * + * Whether the xmlWriter saving interface is configured in + */ +#if 1 +#define LIBXML_WRITER_ENABLED +#endif + +/** + * LIBXML_SAX1_ENABLED: + * + * Whether the older SAX1 interface is configured in + */ +#if 1 +#define LIBXML_SAX1_ENABLED +#endif + +/** + * LIBXML_FTP_ENABLED: + * + * Whether the FTP support is configured in + */ +#if 1 +#define LIBXML_FTP_ENABLED +#endif + +/** + * LIBXML_HTTP_ENABLED: + * + * Whether the HTTP support is configured in + */ +#if 1 +#define LIBXML_HTTP_ENABLED +#endif + +/** + * LIBXML_VALID_ENABLED: + * + * Whether the DTD validation support is configured in + */ +#if 1 +#define LIBXML_VALID_ENABLED +#endif + +/** + * LIBXML_HTML_ENABLED: + * + * Whether the HTML support is configured in + */ +#if 1 +#define LIBXML_HTML_ENABLED +#endif + +/** + * LIBXML_LEGACY_ENABLED: + * + * Whether the deprecated APIs are compiled in for compatibility + */ +#if 1 +#define LIBXML_LEGACY_ENABLED +#endif + +/** + * LIBXML_C14N_ENABLED: + * + * Whether the Canonicalization support is configured in + */ +#if 1 +#define LIBXML_C14N_ENABLED +#endif + +/** + * LIBXML_CATALOG_ENABLED: + * + * Whether the Catalog support is configured in + */ +#if 1 +#define LIBXML_CATALOG_ENABLED +#endif + +/** + * LIBXML_DOCB_ENABLED: + * + * Whether the SGML Docbook support is configured in + */ +#if 1 +#define LIBXML_DOCB_ENABLED +#endif + +/** + * LIBXML_XPATH_ENABLED: + * + * Whether XPath is configured in + */ +#if 1 +#define LIBXML_XPATH_ENABLED +#endif + +/** + * LIBXML_XPTR_ENABLED: + * + * Whether XPointer is configured in + */ +#if 1 +#define LIBXML_XPTR_ENABLED +#endif + +/** + * LIBXML_XINCLUDE_ENABLED: + * + * Whether XInclude is configured in + */ +#if 1 +#define LIBXML_XINCLUDE_ENABLED +#endif + +/** + * LIBXML_ICONV_ENABLED: + * + * Whether iconv support is available + */ +#if 1 +#define LIBXML_ICONV_ENABLED +#endif + +/** + * LIBXML_ICU_ENABLED: + * + * Whether icu support is available + */ +#if 0 +#define LIBXML_ICU_ENABLED +#endif + +/** + * LIBXML_ISO8859X_ENABLED: + * + * Whether ISO-8859-* support is made available in case iconv is not + */ +#if 1 +#define LIBXML_ISO8859X_ENABLED +#endif + +/** + * LIBXML_DEBUG_ENABLED: + * + * Whether Debugging module is configured in + */ +#if 1 +#define LIBXML_DEBUG_ENABLED +#endif + +/** + * DEBUG_MEMORY_LOCATION: + * + * Whether the memory debugging is configured in + */ +#if 0 +#define DEBUG_MEMORY_LOCATION +#endif + +/** + * LIBXML_DEBUG_RUNTIME: + * + * Whether the runtime debugging is configured in + */ +#if 0 +#define LIBXML_DEBUG_RUNTIME +#endif + +/** + * LIBXML_UNICODE_ENABLED: + * + * Whether the Unicode related interfaces are compiled in + */ +#if 1 +#define LIBXML_UNICODE_ENABLED +#endif + +/** + * LIBXML_REGEXP_ENABLED: + * + * Whether the regular expressions interfaces are compiled in + */ +#if 1 +#define LIBXML_REGEXP_ENABLED +#endif + +/** + * LIBXML_AUTOMATA_ENABLED: + * + * Whether the automata interfaces are compiled in + */ +#if 1 +#define LIBXML_AUTOMATA_ENABLED +#endif + +/** + * LIBXML_EXPR_ENABLED: + * + * Whether the formal expressions interfaces are compiled in + */ +#if 1 +#define LIBXML_EXPR_ENABLED +#endif + +/** + * LIBXML_SCHEMAS_ENABLED: + * + * Whether the Schemas validation interfaces are compiled in + */ +#if 1 +#define LIBXML_SCHEMAS_ENABLED +#endif + +/** + * LIBXML_SCHEMATRON_ENABLED: + * + * Whether the Schematron validation interfaces are compiled in + */ +#if 1 +#define LIBXML_SCHEMATRON_ENABLED +#endif + +/** + * LIBXML_MODULES_ENABLED: + * + * Whether the module interfaces are compiled in + */ +#if 1 +#define LIBXML_MODULES_ENABLED +/** + * LIBXML_MODULE_EXTENSION: + * + * the string suffix used by dynamic modules (usually shared libraries) + */ +#define LIBXML_MODULE_EXTENSION ".so" +#endif + +/** + * LIBXML_ZLIB_ENABLED: + * + * Whether the Zlib support is compiled in + */ +#if 1 +#define LIBXML_ZLIB_ENABLED +#endif + +/** + * LIBXML_LZMA_ENABLED: + * + * Whether the Lzma support is compiled in + */ +#if 0 +#define LIBXML_LZMA_ENABLED +#endif + +#ifdef __GNUC__ + +/** + * ATTRIBUTE_UNUSED: + * + * Macro used to signal to GCC unused function parameters + */ + +#ifndef ATTRIBUTE_UNUSED +# if ((__GNUC__ > 2) || ((__GNUC__ == 2) && (__GNUC_MINOR__ >= 7))) +# define ATTRIBUTE_UNUSED __attribute__((unused)) +# else +# define ATTRIBUTE_UNUSED +# endif +#endif + +/** + * LIBXML_ATTR_ALLOC_SIZE: + * + * Macro used to indicate to GCC this is an allocator function + */ + +#ifndef LIBXML_ATTR_ALLOC_SIZE +# if (!defined(__clang__) && ((__GNUC__ > 4) || ((__GNUC__ == 4) && (__GNUC_MINOR__ >= 3)))) +# define LIBXML_ATTR_ALLOC_SIZE(x) __attribute__((alloc_size(x))) +# else +# define LIBXML_ATTR_ALLOC_SIZE(x) +# endif +#else +# define LIBXML_ATTR_ALLOC_SIZE(x) +#endif + +/** + * LIBXML_ATTR_FORMAT: + * + * Macro used to indicate to GCC the parameter are printf like + */ + +#ifndef LIBXML_ATTR_FORMAT +# if ((__GNUC__ > 3) || ((__GNUC__ == 3) && (__GNUC_MINOR__ >= 3))) +# define LIBXML_ATTR_FORMAT(fmt,args) __attribute__((__format__(__printf__,fmt,args))) +# else +# define LIBXML_ATTR_FORMAT(fmt,args) +# endif +#else +# define LIBXML_ATTR_FORMAT(fmt,args) +#endif + +#else /* ! __GNUC__ */ +/** + * ATTRIBUTE_UNUSED: + * + * Macro used to signal to GCC unused function parameters + */ +#define ATTRIBUTE_UNUSED +/** + * LIBXML_ATTR_ALLOC_SIZE: + * + * Macro used to indicate to GCC this is an allocator function + */ +#define LIBXML_ATTR_ALLOC_SIZE(x) +/** + * LIBXML_ATTR_FORMAT: + * + * Macro used to indicate to GCC the parameter are printf like + */ +#define LIBXML_ATTR_FORMAT(fmt,args) +#endif /* __GNUC__ */ + +#ifdef __cplusplus +} +#endif /* __cplusplus */ +#endif + + diff --git a/contrib/poco b/contrib/poco index 20c1d877773..fe5505e56c2 160000 --- a/contrib/poco +++ b/contrib/poco @@ -1 +1 @@ -Subproject commit 20c1d877773b6a672f1bbfe3290dfea42a117ed5 +Subproject commit fe5505e56c27b6ecb0dcbc40c49dc2caf4e9637f diff --git a/contrib/protobuf b/contrib/protobuf new file mode 160000 index 00000000000..12735370922 --- /dev/null +++ b/contrib/protobuf @@ -0,0 +1 @@ +Subproject commit 12735370922a35f03999afff478e1c6d7aa917a4 diff --git a/dbms/CMakeLists.txt b/dbms/CMakeLists.txt index cce97e4a57e..f285a65356a 100644 --- a/dbms/CMakeLists.txt +++ b/dbms/CMakeLists.txt @@ -264,6 +264,11 @@ target_link_libraries(dbms PRIVATE ${OPENSSL_CRYPTO_LIBRARY} Threads::Threads) target_include_directories (dbms SYSTEM BEFORE PRIVATE ${DIVIDE_INCLUDE_DIR}) target_include_directories (dbms SYSTEM BEFORE PRIVATE ${SPARCEHASH_INCLUDE_DIR}) +if (USE_HDFS) + target_link_libraries (dbms PRIVATE ${HDFS3_LIBRARY}) + target_include_directories (dbms SYSTEM BEFORE PRIVATE ${HDFS3_INCLUDE_DIR}) +endif() + if (NOT USE_INTERNAL_LZ4_LIBRARY) target_include_directories (dbms SYSTEM BEFORE PRIVATE ${LZ4_INCLUDE_DIR}) endif () diff --git a/dbms/programs/server/Server.cpp b/dbms/programs/server/Server.cpp index 75c29435036..f965bf58eaa 100644 --- a/dbms/programs/server/Server.cpp +++ b/dbms/programs/server/Server.cpp @@ -376,13 +376,21 @@ int Server::main(const std::vector & /*args*/) format_schema_path.createDirectories(); LOG_INFO(log, "Loading metadata."); - loadMetadataSystem(*global_context); - /// After attaching system databases we can initialize system log. - global_context->initializeSystemLogs(); - /// After the system database is created, attach virtual system tables (in addition to query_log and part_log) - attachSystemTablesServer(*global_context->getDatabase("system"), has_zookeeper); - /// Then, load remaining databases - loadMetadata(*global_context); + try + { + loadMetadataSystem(*global_context); + /// After attaching system databases we can initialize system log. + global_context->initializeSystemLogs(); + /// After the system database is created, attach virtual system tables (in addition to query_log and part_log) + attachSystemTablesServer(*global_context->getDatabase("system"), has_zookeeper); + /// Then, load remaining databases + loadMetadata(*global_context); + } + catch (...) + { + tryLogCurrentException(log, "Caught exception while loading metadata"); + throw; + } LOG_DEBUG(log, "Loaded metadata."); global_context->setCurrentDatabase(default_database); diff --git a/dbms/programs/server/TCPHandler.cpp b/dbms/programs/server/TCPHandler.cpp index 9159d317f29..8036c77e395 100644 --- a/dbms/programs/server/TCPHandler.cpp +++ b/dbms/programs/server/TCPHandler.cpp @@ -31,6 +31,7 @@ #include #include #include +#include #include "TCPHandler.h" @@ -370,6 +371,17 @@ void TCPHandler::processInsertQuery(const Settings & global_settings) /// Send block to the client - table structure. Block block = state.io.out->getHeader(); + + /// Support insert from old clients without low cardinality type. + if (client_revision && client_revision < DBMS_MIN_REVISION_WITH_LOW_CARDINALITY_TYPE) + { + for (auto & col : block) + { + col.type = recursiveRemoveLowCardinality(col.type); + col.column = recursiveRemoveLowCardinality(col.column); + } + } + sendData(block); readData(global_settings); @@ -752,8 +764,13 @@ void TCPHandler::initBlockInput() else state.maybe_compressed_in = in; + Block header; + if (state.io.out) + header = state.io.out->getHeader(); + state.block_in = std::make_shared( *state.maybe_compressed_in, + header, client_revision); } } diff --git a/dbms/src/AggregateFunctions/AggregateFunctionSequenceMatch.h b/dbms/src/AggregateFunctions/AggregateFunctionSequenceMatch.h index 3a0940ace33..4bca50ee27a 100644 --- a/dbms/src/AggregateFunctions/AggregateFunctionSequenceMatch.h +++ b/dbms/src/AggregateFunctions/AggregateFunctionSequenceMatch.h @@ -18,7 +18,7 @@ namespace DB namespace ErrorCodes { extern const int TOO_SLOW; - extern const int TOO_LESS_ARGUMENTS_FOR_FUNCTION; + extern const int TOO_FEW_ARGUMENTS_FOR_FUNCTION; extern const int TOO_MANY_ARGUMENTS_FOR_FUNCTION; extern const int SYNTAX_ERROR; extern const int BAD_ARGUMENTS; @@ -146,7 +146,7 @@ public: if (!sufficientArgs(arg_count)) throw Exception{"Aggregate function " + derived().getName() + " requires at least 3 arguments.", - ErrorCodes::TOO_LESS_ARGUMENTS_FOR_FUNCTION}; + ErrorCodes::TOO_FEW_ARGUMENTS_FOR_FUNCTION}; if (arg_count - 1 > AggregateFunctionSequenceMatchData::max_events) throw Exception{"Aggregate function " + derived().getName() + " supports up to " + diff --git a/dbms/src/Columns/ColumnLowCardinality.cpp b/dbms/src/Columns/ColumnLowCardinality.cpp index fc52d03a91d..db1eb8f092b 100644 --- a/dbms/src/Columns/ColumnLowCardinality.cpp +++ b/dbms/src/Columns/ColumnLowCardinality.cpp @@ -163,7 +163,7 @@ void ColumnLowCardinality::insertRangeFrom(const IColumn & src, size_t start, si auto * low_cardinality_src = typeid_cast(&src); if (!low_cardinality_src) - throw Exception("Expected ColumnLowCardinality, got" + src.getName(), ErrorCodes::ILLEGAL_COLUMN); + throw Exception("Expected ColumnLowCardinality, got " + src.getName(), ErrorCodes::ILLEGAL_COLUMN); if (&low_cardinality_src->getDictionary() == &getDictionary()) { diff --git a/dbms/src/Columns/ColumnUnique.h b/dbms/src/Columns/ColumnUnique.h index 7fb01620fbd..ec8dc9a047e 100644 --- a/dbms/src/Columns/ColumnUnique.h +++ b/dbms/src/Columns/ColumnUnique.h @@ -36,6 +36,7 @@ public: const ColumnPtr & getNestedColumn() const override; const ColumnPtr & getNestedNotNullableColumn() const override { return column_holder; } + bool nestedColumnIsNullable() const override { return is_nullable; } size_t uniqueInsert(const Field & x) override; size_t uniqueInsertFrom(const IColumn & src, size_t n) override; diff --git a/dbms/src/Columns/IColumnUnique.h b/dbms/src/Columns/IColumnUnique.h index 7e5f3976dce..a4646a055d5 100644 --- a/dbms/src/Columns/IColumnUnique.h +++ b/dbms/src/Columns/IColumnUnique.h @@ -18,6 +18,8 @@ public: /// The same as getNestedColumn, but removes null map if nested column is nullable. virtual const ColumnPtr & getNestedNotNullableColumn() const = 0; + virtual bool nestedColumnIsNullable() const = 0; + /// Returns array with StringRefHash calculated for each row of getNestedNotNullableColumn() column. /// Returns nullptr if nested column doesn't contain strings. Otherwise calculates hash (if it wasn't). /// Uses thread-safe cache. diff --git a/dbms/src/Common/ErrorCodes.cpp b/dbms/src/Common/ErrorCodes.cpp index 669e7f9c36b..e5b6028594b 100644 --- a/dbms/src/Common/ErrorCodes.cpp +++ b/dbms/src/Common/ErrorCodes.cpp @@ -42,7 +42,7 @@ namespace ErrorCodes extern const int ATTEMPT_TO_READ_AFTER_EOF = 32; extern const int CANNOT_READ_ALL_DATA = 33; extern const int TOO_MANY_ARGUMENTS_FOR_FUNCTION = 34; - extern const int TOO_LESS_ARGUMENTS_FOR_FUNCTION = 35; + extern const int TOO_FEW_ARGUMENTS_FOR_FUNCTION = 35; extern const int BAD_ARGUMENTS = 36; extern const int UNKNOWN_ELEMENT_IN_AST = 37; extern const int CANNOT_PARSE_DATE = 38; @@ -285,7 +285,7 @@ namespace ErrorCodes extern const int INCORRECT_INDEX = 282; extern const int UNKNOWN_DISTRIBUTED_PRODUCT_MODE = 283; extern const int UNKNOWN_GLOBAL_SUBQUERIES_METHOD = 284; - extern const int TOO_LESS_LIVE_REPLICAS = 285; + extern const int TOO_FEW_LIVE_REPLICAS = 285; extern const int UNSATISFIED_QUORUM_FOR_PREVIOUS_WRITE = 286; extern const int UNKNOWN_FORMAT_VERSION = 287; extern const int DISTRIBUTED_IN_JOIN_SUBQUERY_DENIED = 288; diff --git a/dbms/src/Common/HashTable/TwoLevelHashMap.h b/dbms/src/Common/HashTable/TwoLevelHashMap.h index a6eab28b915..617774a0aa7 100644 --- a/dbms/src/Common/HashTable/TwoLevelHashMap.h +++ b/dbms/src/Common/HashTable/TwoLevelHashMap.h @@ -10,16 +10,17 @@ template typename Cell, typename Hash = DefaultHash, typename Grower = TwoLevelHashTableGrower<>, - typename Allocator = HashTableAllocator + typename Allocator = HashTableAllocator, + template typename ImplTable = HashMapTable > -class TwoLevelHashMapTable : public TwoLevelHashTable> +class TwoLevelHashMapTable : public TwoLevelHashTable> { public: using key_type = Key; using mapped_type = typename Cell::Mapped; using value_type = typename Cell::value_type; - using TwoLevelHashTable>::TwoLevelHashTable; + using TwoLevelHashTable>::TwoLevelHashTable; mapped_type & ALWAYS_INLINE operator[](Key x) { @@ -41,9 +42,10 @@ template typename Mapped, typename Hash = DefaultHash, typename Grower = TwoLevelHashTableGrower<>, - typename Allocator = HashTableAllocator + typename Allocator = HashTableAllocator, + template typename ImplTable = HashMapTable > -using TwoLevelHashMap = TwoLevelHashMapTable, Hash, Grower, Allocator>; +using TwoLevelHashMap = TwoLevelHashMapTable, Hash, Grower, Allocator, ImplTable>; template @@ -52,6 +54,7 @@ template typename Mapped, typename Hash = DefaultHash, typename Grower = TwoLevelHashTableGrower<>, - typename Allocator = HashTableAllocator + typename Allocator = HashTableAllocator, + template typename ImplTable = HashMapTable > -using TwoLevelHashMapWithSavedHash = TwoLevelHashMapTable, Hash, Grower, Allocator>; +using TwoLevelHashMapWithSavedHash = TwoLevelHashMapTable, Hash, Grower, Allocator, ImplTable>; diff --git a/dbms/src/Common/config.h.in b/dbms/src/Common/config.h.in index 44aa9e9773d..302fc33c6b4 100644 --- a/dbms/src/Common/config.h.in +++ b/dbms/src/Common/config.h.in @@ -16,3 +16,4 @@ #cmakedefine01 USE_POCO_NETSSL #cmakedefine01 CLICKHOUSE_SPLIT_BINARY #cmakedefine01 USE_BASE64 +#cmakedefine01 USE_HDFS diff --git a/dbms/src/Core/Defines.h b/dbms/src/Core/Defines.h index 9797218fe43..661dafeb0ee 100644 --- a/dbms/src/Core/Defines.h +++ b/dbms/src/Core/Defines.h @@ -53,6 +53,8 @@ #define DBMS_MIN_REVISION_WITH_CURRENT_AGGREGATION_VARIANT_SELECTION_METHOD 54408 #define DBMS_MIN_REVISION_WITH_COLUMN_DEFAULTS_METADATA 54410 +#define DBMS_MIN_REVISION_WITH_LOW_CARDINALITY_TYPE 54405 + /// Version of ClickHouse TCP protocol. Set to git tag with latest protocol change. #define DBMS_TCP_PROTOCOL_VERSION 54226 diff --git a/dbms/src/DataStreams/BlockExtraInfoInputStream.h b/dbms/src/DataStreams/BlockExtraInfoInputStream.h deleted file mode 100644 index 6c6bd793f56..00000000000 --- a/dbms/src/DataStreams/BlockExtraInfoInputStream.h +++ /dev/null @@ -1,39 +0,0 @@ -#pragma once - -#include - -namespace DB -{ - -/** Adds to one stream additional block information that is specified - * as the constructor parameter. - */ -class BlockExtraInfoInputStream : public IProfilingBlockInputStream -{ -public: - BlockExtraInfoInputStream(const BlockInputStreamPtr & input, const BlockExtraInfo & block_extra_info_) - : block_extra_info(block_extra_info_) - { - children.push_back(input); - } - - BlockExtraInfo getBlockExtraInfo() const override - { - return block_extra_info; - } - - String getName() const override { return "BlockExtraInfoInput"; } - - Block getHeader() const override { return children.back()->getHeader(); } - -protected: - Block readImpl() override - { - return children.back()->read(); - } - -private: - BlockExtraInfo block_extra_info; -}; - -} diff --git a/dbms/src/DataStreams/NativeBlockInputStream.cpp b/dbms/src/DataStreams/NativeBlockInputStream.cpp index fbb2bb6e08f..33afbb0aa9e 100644 --- a/dbms/src/DataStreams/NativeBlockInputStream.cpp +++ b/dbms/src/DataStreams/NativeBlockInputStream.cpp @@ -9,6 +9,7 @@ #include #include +#include namespace DB @@ -152,6 +153,12 @@ Block NativeBlockInputStream::readImpl() column.column = std::move(read_column); + if (server_revision && server_revision < DBMS_MIN_REVISION_WITH_LOW_CARDINALITY_TYPE) + { + column.column = recursiveLowCardinalityConversion(column.column, column.type, header.getByPosition(i).type); + column.type = header.getByPosition(i).type; + } + res.insert(std::move(column)); if (use_index) diff --git a/dbms/src/DataTypes/DataTypeLowCardinality.h b/dbms/src/DataTypes/DataTypeLowCardinality.h index 86698ded893..5744419bf01 100644 --- a/dbms/src/DataTypes/DataTypeLowCardinality.h +++ b/dbms/src/DataTypes/DataTypeLowCardinality.h @@ -164,4 +164,13 @@ private: /// Returns dictionary type if type is DataTypeLowCardinality, type otherwise. DataTypePtr removeLowCardinality(const DataTypePtr & type); +/// Remove LowCardinality recursively from all nested types. +DataTypePtr recursiveRemoveLowCardinality(const DataTypePtr & type); + +/// Remove LowCardinality recursively from all nested columns. +ColumnPtr recursiveRemoveLowCardinality(const ColumnPtr & column); + +/// Convert column of type from_type to type to_type by converting nested LowCardinality columns. +ColumnPtr recursiveLowCardinalityConversion(const ColumnPtr & column, const DataTypePtr & from_type, const DataTypePtr & to_type); + } diff --git a/dbms/src/DataTypes/DataTypeLowCardinalityHelpers.cpp b/dbms/src/DataTypes/DataTypeLowCardinalityHelpers.cpp new file mode 100644 index 00000000000..b212c8ebdaa --- /dev/null +++ b/dbms/src/DataTypes/DataTypeLowCardinalityHelpers.cpp @@ -0,0 +1,137 @@ +#include +#include +#include +#include + +#include +#include +#include + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int ILLEGAL_COLUMN; + extern const int TYPE_MISMATCH; +} + +DataTypePtr recursiveRemoveLowCardinality(const DataTypePtr & type) +{ + if (!type) + return type; + + if (const auto * array_type = typeid_cast(type.get())) + return std::make_shared(recursiveRemoveLowCardinality(array_type->getNestedType())); + + if (const auto * tuple_type = typeid_cast(type.get())) + { + DataTypes elements = tuple_type->getElements(); + for (auto & element : elements) + element = recursiveRemoveLowCardinality(element); + + if (tuple_type->haveExplicitNames()) + return std::make_shared(elements, tuple_type->getElementNames()); + else + return std::make_shared(elements); + } + + if (const auto * low_cardinality_type = typeid_cast(type.get())) + return low_cardinality_type->getDictionaryType(); + + return type; +} + +ColumnPtr recursiveRemoveLowCardinality(const ColumnPtr & column) +{ + if (!column) + return column; + + if (const auto * column_array = typeid_cast(column.get())) + return ColumnArray::create(recursiveRemoveLowCardinality(column_array->getDataPtr()), column_array->getOffsetsPtr()); + + if (const auto * column_const = typeid_cast(column.get())) + return ColumnConst::create(recursiveRemoveLowCardinality(column_const->getDataColumnPtr()), column_const->size()); + + if (const auto * column_tuple = typeid_cast(column.get())) + { + Columns columns = column_tuple->getColumns(); + for (auto & element : columns) + element = recursiveRemoveLowCardinality(element); + return ColumnTuple::create(columns); + } + + if (const auto * column_low_cardinality = typeid_cast(column.get())) + return column_low_cardinality->convertToFullColumn(); + + return column; +} + +ColumnPtr recursiveLowCardinalityConversion(const ColumnPtr & column, const DataTypePtr & from_type, const DataTypePtr & to_type) +{ + if (from_type->equals(*to_type)) + return column; + + if (const auto * column_const = typeid_cast(column.get())) + return ColumnConst::create(recursiveLowCardinalityConversion(column_const->getDataColumnPtr(), from_type, to_type), + column_const->size()); + + if (const auto * low_cardinality_type = typeid_cast(from_type.get())) + { + if (to_type->equals(*low_cardinality_type->getDictionaryType())) + return column->convertToFullColumnIfLowCardinality(); + } + + if (const auto * low_cardinality_type = typeid_cast(to_type.get())) + { + if (from_type->equals(*low_cardinality_type->getDictionaryType())) + { + auto col = low_cardinality_type->createColumn(); + static_cast(*col).insertRangeFromFullColumn(*column, 0, column->size()); + return std::move(col); + } + } + + if (const auto * from_array_type = typeid_cast(from_type.get())) + { + if (const auto * to_array_type = typeid_cast(to_type.get())) + { + const auto * column_array = typeid_cast(column.get()); + if (!column_array) + throw Exception("Unexpected column " + column->getName() + " for type " + from_type->getName(), + ErrorCodes::ILLEGAL_COLUMN); + + auto & nested_from = from_array_type->getNestedType(); + auto & nested_to = to_array_type->getNestedType(); + + return ColumnArray::create( + recursiveLowCardinalityConversion(column_array->getDataPtr(), nested_from, nested_to), + column_array->getOffsetsPtr()); + } + } + + if (const auto * from_tuple_type = typeid_cast(from_type.get())) + { + if (const auto * to_tuple_type = typeid_cast(to_type.get())) + { + const auto * column_tuple = typeid_cast(column.get()); + if (!column_tuple) + throw Exception("Unexpected column " + column->getName() + " for type " + from_type->getName(), + ErrorCodes::ILLEGAL_COLUMN); + + Columns columns = column_tuple->getColumns(); + auto & from_elements = from_tuple_type->getElements(); + auto & to_elements = to_tuple_type->getElements(); + for (size_t i = 0; i < columns.size(); ++i) + { + auto & element = columns[i]; + element = recursiveLowCardinalityConversion(element, from_elements.at(i), to_elements.at(i)); + } + return ColumnTuple::create(columns); + } + } + + throw Exception("Cannot convert: " + from_type->getName() + " to " + to_type->getName(), ErrorCodes::TYPE_MISMATCH); +} + +} diff --git a/dbms/src/Databases/DatabaseOrdinary.cpp b/dbms/src/Databases/DatabaseOrdinary.cpp index e0fe4294e7d..7c4adbe9f67 100644 --- a/dbms/src/Databases/DatabaseOrdinary.cpp +++ b/dbms/src/Databases/DatabaseOrdinary.cpp @@ -21,6 +21,7 @@ #include #include #include +#include namespace DB @@ -164,9 +165,15 @@ void DatabaseOrdinary::loadTables( AtomicStopwatch watch; std::atomic tables_processed {0}; Poco::Event all_tables_processed; + ExceptionHandler exception_handler; auto task_function = [&](const String & table) { + SCOPE_EXIT( + if (++tables_processed == total_tables) + all_tables_processed.set() + ); + /// Messages, so that it's not boring to wait for the server to load for a long time. if ((tables_processed + 1) % PRINT_MESSAGE_EACH_N_TABLES == 0 || watch.compareAndRestart(PRINT_MESSAGE_EACH_N_SECONDS)) @@ -176,14 +183,11 @@ void DatabaseOrdinary::loadTables( } loadTable(context, metadata_path, *this, name, data_path, table, has_force_restore_data_flag); - - if (++tables_processed == total_tables) - all_tables_processed.set(); }; for (const auto & filename : file_names) { - auto task = std::bind(task_function, filename); + auto task = createExceptionHandledJob(std::bind(task_function, filename), exception_handler); if (thread_pool) thread_pool->schedule(task); @@ -194,6 +198,8 @@ void DatabaseOrdinary::loadTables( if (thread_pool) all_tables_processed.wait(); + exception_handler.throwIfException(); + /// After all tables was basically initialized, startup them. startupTables(thread_pool); } @@ -207,12 +213,18 @@ void DatabaseOrdinary::startupTables(ThreadPool * thread_pool) std::atomic tables_processed {0}; size_t total_tables = tables.size(); Poco::Event all_tables_processed; + ExceptionHandler exception_handler; if (!total_tables) return; auto task_function = [&](const StoragePtr & table) { + SCOPE_EXIT( + if (++tables_processed == total_tables) + all_tables_processed.set() + ); + if ((tables_processed + 1) % PRINT_MESSAGE_EACH_N_TABLES == 0 || watch.compareAndRestart(PRINT_MESSAGE_EACH_N_SECONDS)) { @@ -221,14 +233,11 @@ void DatabaseOrdinary::startupTables(ThreadPool * thread_pool) } table->startup(); - - if (++tables_processed == total_tables) - all_tables_processed.set(); }; for (const auto & name_storage : tables) { - auto task = std::bind(task_function, name_storage.second); + auto task = createExceptionHandledJob(std::bind(task_function, name_storage.second), exception_handler); if (thread_pool) thread_pool->schedule(task); @@ -238,6 +247,8 @@ void DatabaseOrdinary::startupTables(ThreadPool * thread_pool) if (thread_pool) all_tables_processed.wait(); + + exception_handler.throwIfException(); } diff --git a/dbms/src/Functions/FunctionBitTestMany.h b/dbms/src/Functions/FunctionBitTestMany.h index 4dc5dc0522a..21b7ebbbaad 100644 --- a/dbms/src/Functions/FunctionBitTestMany.h +++ b/dbms/src/Functions/FunctionBitTestMany.h @@ -16,7 +16,7 @@ namespace ErrorCodes extern const int ILLEGAL_DIVISION; extern const int ILLEGAL_COLUMN; extern const int ILLEGAL_TYPE_OF_ARGUMENT; - extern const int TOO_LESS_ARGUMENTS_FOR_FUNCTION; + extern const int TOO_FEW_ARGUMENTS_FOR_FUNCTION; } @@ -36,7 +36,7 @@ public: { if (arguments.size() < 2) throw Exception{"Number of arguments for function " + getName() + " doesn't match: passed " - + toString(arguments.size()) + ", should be at least 2.", ErrorCodes::TOO_LESS_ARGUMENTS_FOR_FUNCTION}; + + toString(arguments.size()) + ", should be at least 2.", ErrorCodes::TOO_FEW_ARGUMENTS_FOR_FUNCTION}; const auto & first_arg = arguments.front(); diff --git a/dbms/src/Functions/FunctionsCoding.h b/dbms/src/Functions/FunctionsCoding.h index f505e97d423..62ae5ee5bd5 100644 --- a/dbms/src/Functions/FunctionsCoding.h +++ b/dbms/src/Functions/FunctionsCoding.h @@ -30,7 +30,7 @@ namespace DB namespace ErrorCodes { - extern const int TOO_LESS_ARGUMENTS_FOR_FUNCTION; + extern const int TOO_FEW_ARGUMENTS_FOR_FUNCTION; extern const int LOGICAL_ERROR; } diff --git a/dbms/src/Functions/FunctionsConversion.h b/dbms/src/Functions/FunctionsConversion.h index 74428782f9a..1428fec4f48 100644 --- a/dbms/src/Functions/FunctionsConversion.h +++ b/dbms/src/Functions/FunctionsConversion.h @@ -57,7 +57,7 @@ namespace ErrorCodes extern const int CANNOT_PARSE_TEXT; extern const int CANNOT_PARSE_UUID; extern const int TOO_LARGE_STRING_SIZE; - extern const int TOO_LESS_ARGUMENTS_FOR_FUNCTION; + extern const int TOO_FEW_ARGUMENTS_FOR_FUNCTION; extern const int LOGICAL_ERROR; extern const int TYPE_MISMATCH; extern const int CANNOT_CONVERT_TYPE; @@ -883,7 +883,7 @@ private: { if (!arguments.size()) throw Exception{"Function " + getName() + " expects at least 1 arguments", - ErrorCodes::TOO_LESS_ARGUMENTS_FOR_FUNCTION}; + ErrorCodes::TOO_FEW_ARGUMENTS_FOR_FUNCTION}; const IDataType * from_type = block.getByPosition(arguments[0]).type.get(); @@ -897,7 +897,7 @@ private: { if (arguments.size() != 2) throw Exception{"Function " + getName() + " expects 2 arguments for Decimal.", - ErrorCodes::TOO_LESS_ARGUMENTS_FOR_FUNCTION}; + ErrorCodes::TOO_FEW_ARGUMENTS_FOR_FUNCTION}; const ColumnWithTypeAndName & scale_column = block.getByPosition(arguments[1]); UInt32 scale = extractToDecimalScale(scale_column); diff --git a/dbms/src/Functions/FunctionsExternalModels.cpp b/dbms/src/Functions/FunctionsExternalModels.cpp index e862794a1da..6afbad31857 100644 --- a/dbms/src/Functions/FunctionsExternalModels.cpp +++ b/dbms/src/Functions/FunctionsExternalModels.cpp @@ -22,7 +22,7 @@ FunctionPtr FunctionModelEvaluate::create(const Context & context) namespace ErrorCodes { extern const int ILLEGAL_TYPE_OF_ARGUMENT; - extern const int TOO_LESS_ARGUMENTS_FOR_FUNCTION; + extern const int TOO_FEW_ARGUMENTS_FOR_FUNCTION; extern const int ILLEGAL_COLUMN; } @@ -30,7 +30,7 @@ DataTypePtr FunctionModelEvaluate::getReturnTypeImpl(const DataTypes & arguments { if (arguments.size() < 2) throw Exception("Function " + getName() + " expects at least 2 arguments", - ErrorCodes::TOO_LESS_ARGUMENTS_FOR_FUNCTION); + ErrorCodes::TOO_FEW_ARGUMENTS_FOR_FUNCTION); if (!isString(arguments[0])) throw Exception("Illegal type " + arguments[0]->getName() + " of first argument of function " + getName() diff --git a/dbms/src/Functions/FunctionsGeo.cpp b/dbms/src/Functions/FunctionsGeo.cpp index 9841ba03e85..8e63b7387d2 100644 --- a/dbms/src/Functions/FunctionsGeo.cpp +++ b/dbms/src/Functions/FunctionsGeo.cpp @@ -29,7 +29,7 @@ namespace DB namespace ErrorCodes { - extern const int TOO_LESS_ARGUMENTS_FOR_FUNCTION; + extern const int TOO_FEW_ARGUMENTS_FOR_FUNCTION; extern const int BAD_ARGUMENTS; extern const int ILLEGAL_TYPE_OF_ARGUMENT; } @@ -111,7 +111,7 @@ public: { if (arguments.size() < 2) { - throw Exception("Too few arguments", ErrorCodes::TOO_LESS_ARGUMENTS_FOR_FUNCTION); + throw Exception("Too few arguments", ErrorCodes::TOO_FEW_ARGUMENTS_FOR_FUNCTION); } auto getMsgPrefix = [this](size_t i) { return "Argument " + toString(i + 1) + " for function " + getName(); }; diff --git a/dbms/src/Functions/FunctionsStringSearch.cpp b/dbms/src/Functions/FunctionsStringSearch.cpp index 6b9136a6e77..337edbbc168 100644 --- a/dbms/src/Functions/FunctionsStringSearch.cpp +++ b/dbms/src/Functions/FunctionsStringSearch.cpp @@ -1088,5 +1088,7 @@ void registerFunctionsStringSearch(FunctionFactory & factory) factory.registerFunction(); factory.registerFunction(); factory.registerFunction(); + factory.registerAlias("locate", NamePosition::name, FunctionFactory::CaseInsensitive); + factory.registerAlias("replace", NameReplaceAll::name, FunctionFactory::CaseInsensitive); } } diff --git a/dbms/src/Functions/IFunction.cpp b/dbms/src/Functions/IFunction.cpp index 358fbfc425b..08376a94f78 100644 --- a/dbms/src/Functions/IFunction.cpp +++ b/dbms/src/Functions/IFunction.cpp @@ -103,58 +103,6 @@ void PreparedFunctionImpl::createLowCardinalityResultCache(size_t cache_size) } -static DataTypePtr recursiveRemoveLowCardinality(const DataTypePtr & type) -{ - if (!type) - return type; - - if (const auto * array_type = typeid_cast(type.get())) - return std::make_shared(recursiveRemoveLowCardinality(array_type->getNestedType())); - - if (const auto * tuple_type = typeid_cast(type.get())) - { - DataTypes elements = tuple_type->getElements(); - for (auto & element : elements) - element = recursiveRemoveLowCardinality(element); - - if (tuple_type->haveExplicitNames()) - return std::make_shared(elements, tuple_type->getElementNames()); - else - return std::make_shared(elements); - } - - if (const auto * low_cardinality_type = typeid_cast(type.get())) - return low_cardinality_type->getDictionaryType(); - - return type; -} - -static ColumnPtr recursiveRemoveLowCardinality(const ColumnPtr & column) -{ - if (!column) - return column; - - if (const auto * column_array = typeid_cast(column.get())) - return ColumnArray::create(recursiveRemoveLowCardinality(column_array->getDataPtr()), column_array->getOffsetsPtr()); - - if (const auto * column_const = typeid_cast(column.get())) - return ColumnConst::create(recursiveRemoveLowCardinality(column_const->getDataColumnPtr()), column_const->size()); - - if (const auto * column_tuple = typeid_cast(column.get())) - { - Columns columns = column_tuple->getColumns(); - for (auto & element : columns) - element = recursiveRemoveLowCardinality(element); - return ColumnTuple::create(columns); - } - - if (const auto * column_low_cardinality = typeid_cast(column.get())) - return column_low_cardinality->convertToFullColumn(); - - return column; -} - - ColumnPtr wrapInNullable(const ColumnPtr & src, const Block & block, const ColumnNumbers & args, size_t result, size_t input_rows_count) { ColumnPtr result_null_map_column; diff --git a/dbms/src/Functions/caseWithExpression.cpp b/dbms/src/Functions/caseWithExpression.cpp index 4af0cdb5161..e97424e3b7e 100644 --- a/dbms/src/Functions/caseWithExpression.cpp +++ b/dbms/src/Functions/caseWithExpression.cpp @@ -9,7 +9,7 @@ namespace DB namespace ErrorCodes { - extern const int TOO_LESS_ARGUMENTS_FOR_FUNCTION; + extern const int TOO_FEW_ARGUMENTS_FOR_FUNCTION; } /// Implements the CASE construction when it is @@ -30,7 +30,7 @@ public: { if (!args.size()) throw Exception{"Function " + getName() + " expects at least 1 arguments", - ErrorCodes::TOO_LESS_ARGUMENTS_FOR_FUNCTION}; + ErrorCodes::TOO_FEW_ARGUMENTS_FOR_FUNCTION}; /// See the comments in executeImpl() to understand why we actually have to /// get the return type of a transform function. @@ -48,7 +48,7 @@ public: { if (!args.size()) throw Exception{"Function " + getName() + " expects at least 1 argument", - ErrorCodes::TOO_LESS_ARGUMENTS_FOR_FUNCTION}; + ErrorCodes::TOO_FEW_ARGUMENTS_FOR_FUNCTION}; /// In the following code, we turn the construction: /// CASE expr WHEN val[0] THEN branch[0] ... WHEN val[N-1] then branch[N-1] ELSE branchN diff --git a/dbms/src/Functions/if.cpp b/dbms/src/Functions/if.cpp index 5879e45c810..42e8b65eb05 100644 --- a/dbms/src/Functions/if.cpp +++ b/dbms/src/Functions/if.cpp @@ -936,7 +936,7 @@ public: void registerFunctionIf(FunctionFactory & factory) { - factory.registerFunction(); + factory.registerFunction(FunctionFactory::CaseInsensitive); } } diff --git a/dbms/src/Functions/lower.cpp b/dbms/src/Functions/lower.cpp index 38379fa10cd..b3c939968cb 100644 --- a/dbms/src/Functions/lower.cpp +++ b/dbms/src/Functions/lower.cpp @@ -15,7 +15,8 @@ using FunctionLower = FunctionStringToString, NameLower void registerFunctionLower(FunctionFactory & factory) { - factory.registerFunction(); + factory.registerFunction(FunctionFactory::CaseInsensitive); + factory.registerAlias("lcase", NameLower::name, FunctionFactory::CaseInsensitive); } } diff --git a/dbms/src/Functions/substring.cpp b/dbms/src/Functions/substring.cpp index e0a6d707243..7263f3ec595 100644 --- a/dbms/src/Functions/substring.cpp +++ b/dbms/src/Functions/substring.cpp @@ -166,7 +166,9 @@ public: void registerFunctionSubstring(FunctionFactory & factory) { - factory.registerFunction(); + factory.registerFunction(FunctionFactory::CaseInsensitive); + factory.registerAlias("substr", FunctionSubstring::name, FunctionFactory::CaseInsensitive); + factory.registerAlias("mid", FunctionSubstring::name, FunctionFactory::CaseInsensitive); /// from MySQL dialect } } diff --git a/dbms/src/Functions/upper.cpp b/dbms/src/Functions/upper.cpp index 9a884ccedd8..e111827a87f 100644 --- a/dbms/src/Functions/upper.cpp +++ b/dbms/src/Functions/upper.cpp @@ -15,7 +15,8 @@ using FunctionUpper = FunctionStringToString, NameUpper void registerFunctionUpper(FunctionFactory & factory) { - factory.registerFunction(); + factory.registerFunction(FunctionFactory::CaseInsensitive); + factory.registerAlias("ucase", FunctionUpper::name, FunctionFactory::CaseInsensitive); } } diff --git a/dbms/src/IO/ReadBufferFromHDFS.h b/dbms/src/IO/ReadBufferFromHDFS.h new file mode 100644 index 00000000000..21705c1ef30 --- /dev/null +++ b/dbms/src/IO/ReadBufferFromHDFS.h @@ -0,0 +1,96 @@ +#pragma once + +#include + +#if USE_HDFS +#include +#include +#include +#include + +#ifndef O_DIRECT +#define O_DIRECT 00040000 +#endif + +namespace DB +{ + namespace ErrorCodes + { + extern const int BAD_ARGUMENTS; + extern const int NETWORK_ERROR; + } + /** Accepts path to file and opens it, or pre-opened file descriptor. + * Closes file by himself (thus "owns" a file descriptor). + */ + class ReadBufferFromHDFS : public BufferWithOwnMemory + { + protected: + std::string hdfs_uri; + struct hdfsBuilder *builder; + hdfsFS fs; + hdfsFile fin; + public: + ReadBufferFromHDFS(const std::string & hdfs_name_, size_t buf_size = DBMS_DEFAULT_BUFFER_SIZE) + : BufferWithOwnMemory(buf_size), hdfs_uri(hdfs_name_) , builder(hdfsNewBuilder()) + { + Poco::URI uri(hdfs_name_); + auto & host = uri.getHost(); + auto port = uri.getPort(); + auto & path = uri.getPath(); + if (host.empty() || port == 0 || path.empty()) + { + throw Exception("Illegal HDFS URI: " + hdfs_uri, ErrorCodes::BAD_ARGUMENTS); + } + // set read/connect timeout, default value in libhdfs3 is about 1 hour, and too large + /// TODO Allow to tune from query Settings. + hdfsBuilderConfSetStr(builder, "input.read.timeout", "60000"); // 1 min + hdfsBuilderConfSetStr(builder, "input.connect.timeout", "60000"); // 1 min + + hdfsBuilderSetNameNode(builder, host.c_str()); + hdfsBuilderSetNameNodePort(builder, port); + fs = hdfsBuilderConnect(builder); + + if (fs == nullptr) + { + throw Exception("Unable to connect to HDFS: " + String(hdfsGetLastError()), ErrorCodes::NETWORK_ERROR); + } + + fin = hdfsOpenFile(fs, path.c_str(), O_RDONLY, 0, 0, 0); + } + + ReadBufferFromHDFS(ReadBufferFromHDFS &&) = default; + + ~ReadBufferFromHDFS() override + { + close(); + hdfsFreeBuilder(builder); + } + + /// Close HDFS connection before destruction of object. + void close() + { + hdfsCloseFile(fs, fin); + } + + bool nextImpl() override + { + int bytes_read = hdfsRead(fs, fin, internal_buffer.begin(), internal_buffer.size()); + if (bytes_read < 0) + { + throw Exception("Fail to read HDFS file: " + hdfs_uri + " " + String(hdfsGetLastError()), ErrorCodes::NETWORK_ERROR); + } + + if (bytes_read) + working_buffer.resize(bytes_read); + else + return false; + return true; + } + + const std::string & getHDFSUri() const + { + return hdfs_uri; + } + }; +} +#endif diff --git a/dbms/src/Interpreters/AddDefaultDatabaseVisitor.h b/dbms/src/Interpreters/AddDefaultDatabaseVisitor.h index 238c50a34db..b444cb4230e 100644 --- a/dbms/src/Interpreters/AddDefaultDatabaseVisitor.h +++ b/dbms/src/Interpreters/AddDefaultDatabaseVisitor.h @@ -8,6 +8,7 @@ #include #include #include +#include #include namespace DB @@ -38,8 +39,9 @@ public: void visit(ASTPtr & ast) const { if (!tryVisit(ast) && - !tryVisit(ast)) - visitChildren(ast); + !tryVisit(ast) && + !tryVisit(ast)) + visitChildren(*ast); } void visit(ASTSelectQuery & select) const @@ -70,10 +72,7 @@ private: if (select.tables) tryVisit(select.tables); - if (select.prewhere_expression) - visitChildren(select.prewhere_expression); - if (select.where_expression) - visitChildren(select.where_expression); + visitChildren(select); } void visit(ASTTablesInSelectQuery & tables, ASTPtr &) const @@ -112,9 +111,43 @@ private: tryVisit(subquery.children[0]); } - void visitChildren(ASTPtr & ast) const + void visit(ASTFunction & function, ASTPtr &) const { - for (auto & child : ast->children) + bool is_operator_in = false; + for (auto name : {"in", "notIn", "globalIn", "globalNotIn"}) + { + if (function.name == name) + { + is_operator_in = true; + break; + } + } + + for (auto & child : function.children) + { + if (child.get() == function.arguments.get()) + { + for (size_t i = 0; i < child->children.size(); ++i) + { + if (is_operator_in && i == 1) + { + /// Second argument of the "in" function (or similar) may be a table name or a subselect. + /// Rewrite the table name or descend into subselect. + if (!tryVisit(child->children[i])) + visit(child->children[i]); + } + else + visit(child->children[i]); + } + } + else + visit(child); + } + } + + void visitChildren(IAST & ast) const + { + for (auto & child : ast.children) visit(child); } diff --git a/dbms/src/Interpreters/Aggregator.cpp b/dbms/src/Interpreters/Aggregator.cpp index bfa8d51c9c3..1fd614affc4 100644 --- a/dbms/src/Interpreters/Aggregator.cpp +++ b/dbms/src/Interpreters/Aggregator.cpp @@ -453,6 +453,27 @@ AggregatedDataVariants::Type Aggregator::chooseAggregationMethod() return AggregatedDataVariants::Type::nullable_keys256; } + if (has_low_cardinality && params.keys_size == 1) + { + if (types_removed_nullable[0]->isValueRepresentedByNumber()) + { + size_t size_of_field = types_removed_nullable[0]->getSizeOfValueInMemory(); + + if (size_of_field == 1) + return AggregatedDataVariants::Type::low_cardinality_key8; + if (size_of_field == 2) + return AggregatedDataVariants::Type::low_cardinality_key16; + if (size_of_field == 4) + return AggregatedDataVariants::Type::low_cardinality_key32; + if (size_of_field == 8) + return AggregatedDataVariants::Type::low_cardinality_key64; + } + else if (isString(types_removed_nullable[0])) + return AggregatedDataVariants::Type::low_cardinality_key_string; + else if (isFixedString(types_removed_nullable[0])) + return AggregatedDataVariants::Type::low_cardinality_key_fixed_string; + } + /// Fallback case. return AggregatedDataVariants::Type::serialized; } @@ -1139,12 +1160,10 @@ void Aggregator::convertToBlockImpl( convertToBlockImplFinal(method, data, key_columns, final_aggregate_columns); else convertToBlockImplNotFinal(method, data, key_columns, aggregate_columns); - /// In order to release memory early. data.clearAndShrink(); } - template void NO_INLINE Aggregator::convertToBlockImplFinal( Method & method, @@ -1152,6 +1171,19 @@ void NO_INLINE Aggregator::convertToBlockImplFinal( MutableColumns & key_columns, MutableColumns & final_aggregate_columns) const { + if constexpr (Method::low_cardinality_optimization) + { + if (data.hasNullKeyData()) + { + key_columns[0]->insert(Field()); /// Null + + for (size_t i = 0; i < params.aggregates_size; ++i) + aggregate_functions[i]->insertResultInto( + data.getNullKeyData() + offsets_of_aggregate_states[i], + *final_aggregate_columns[i]); + } + } + for (const auto & value : data) { method.insertKeyIntoColumns(value, key_columns, key_sizes); @@ -1172,6 +1204,17 @@ void NO_INLINE Aggregator::convertToBlockImplNotFinal( MutableColumns & key_columns, AggregateColumnsData & aggregate_columns) const { + if constexpr (Method::low_cardinality_optimization) + { + if (data.hasNullKeyData()) + { + key_columns[0]->insert(Field()); /// Null + + for (size_t i = 0; i < params.aggregates_size; ++i) + aggregate_columns[i]->push_back(data.getNullKeyData() + offsets_of_aggregate_states[i]); + } + } + for (auto & value : data) { method.insertKeyIntoColumns(value, key_columns, key_sizes); @@ -1470,12 +1513,50 @@ BlocksList Aggregator::convertToBlocks(AggregatedDataVariants & data_variants, b } +template +void NO_INLINE Aggregator::mergeDataNullKey( + Table & table_dst, + Table & table_src, + Arena * arena) const +{ + if constexpr (Method::low_cardinality_optimization) + { + if (table_src.hasNullKeyData()) + { + if (!table_dst.hasNullKeyData()) + { + table_dst.hasNullKeyData() = true; + table_dst.getNullKeyData() = table_src.getNullKeyData(); + } + else + { + for (size_t i = 0; i < params.aggregates_size; ++i) + aggregate_functions[i]->merge( + table_dst.getNullKeyData() + offsets_of_aggregate_states[i], + table_src.getNullKeyData() + offsets_of_aggregate_states[i], + arena); + + for (size_t i = 0; i < params.aggregates_size; ++i) + aggregate_functions[i]->destroy( + table_src.getNullKeyData() + offsets_of_aggregate_states[i]); + } + + table_src.hasNullKeyData() = false; + table_src.getNullKeyData() = nullptr; + } + } +} + + template void NO_INLINE Aggregator::mergeDataImpl( Table & table_dst, Table & table_src, Arena * arena) const { + if constexpr (Method::low_cardinality_optimization) + mergeDataNullKey(table_dst, table_src, arena); + for (auto it = table_src.begin(), end = table_src.end(); it != end; ++it) { typename Table::iterator res_it; @@ -1513,6 +1594,10 @@ void NO_INLINE Aggregator::mergeDataNoMoreKeysImpl( Table & table_src, Arena * arena) const { + /// Note : will create data for NULL key if not exist + if constexpr (Method::low_cardinality_optimization) + mergeDataNullKey(table_dst, table_src, arena); + for (auto it = table_src.begin(), end = table_src.end(); it != end; ++it) { typename Table::iterator res_it = table_dst.find(it->first, it.getHash()); @@ -1543,6 +1628,10 @@ void NO_INLINE Aggregator::mergeDataOnlyExistingKeysImpl( Table & table_src, Arena * arena) const { + /// Note : will create data for NULL key if not exist + if constexpr (Method::low_cardinality_optimization) + mergeDataNullKey(table_dst, table_src, arena); + for (auto it = table_src.begin(); it != table_src.end(); ++it) { decltype(it) res_it = table_dst.find(it->first, it.getHash()); @@ -2341,6 +2430,15 @@ void NO_INLINE Aggregator::convertBlockToTwoLevelImpl( /// For every row. for (size_t i = 0; i < rows; ++i) { + if constexpr (Method::low_cardinality_optimization) + { + if (state.isNullAt(i)) + { + selector[i] = 0; + continue; + } + } + /// Obtain a key. Calculate bucket number from it. typename Method::Key key = state.getKey(key_columns, params.keys_size, i, key_sizes, keys, *pool); diff --git a/dbms/src/Interpreters/Aggregator.h b/dbms/src/Interpreters/Aggregator.h index 0dadf08b8a2..f96c6ced219 100644 --- a/dbms/src/Interpreters/Aggregator.h +++ b/dbms/src/Interpreters/Aggregator.h @@ -88,6 +88,56 @@ using AggregatedDataWithStringKeyHash64 = HashMapWithSavedHash; using AggregatedDataWithKeys256Hash64 = HashMap; +template +struct AggregationDataWithNullKey : public Base +{ + using Base::Base; + + bool & hasNullKeyData() { return has_null_key_data; } + AggregateDataPtr & getNullKeyData() { return null_key_data; } + bool hasNullKeyData() const { return has_null_key_data; } + const AggregateDataPtr & getNullKeyData() const { return null_key_data; } + +private: + bool has_null_key_data = false; + AggregateDataPtr null_key_data = nullptr; +}; + +template +struct AggregationDataWithNullKeyTwoLevel : public Base +{ + using Base::Base; + using Base::impls; + + template + explicit AggregationDataWithNullKeyTwoLevel(const Other & other) : Base(other) + { + impls[0].hasNullKeyData() = other.hasNullKeyData(); + impls[0].getNullKeyData() = other.getNullKeyData(); + } + + bool & hasNullKeyData() { return impls[0].hasNullKeyData(); } + AggregateDataPtr & getNullKeyData() { return impls[0].getNullKeyData(); } + bool hasNullKeyData() const { return impls[0].hasNullKeyData(); } + const AggregateDataPtr & getNullKeyData() const { return impls[0].getNullKeyData(); } +}; + +template +using HashTableWithNullKey = AggregationDataWithNullKey>; + +using AggregatedDataWithNullableUInt8Key = AggregationDataWithNullKey; +using AggregatedDataWithNullableUInt16Key = AggregationDataWithNullKey; + +using AggregatedDataWithNullableUInt64Key = AggregationDataWithNullKey; +using AggregatedDataWithNullableStringKey = AggregationDataWithNullKey; + +using AggregatedDataWithNullableUInt64KeyTwoLevel = AggregationDataWithNullKeyTwoLevel< + TwoLevelHashMap, + TwoLevelHashTableGrower<>, HashTableAllocator, HashTableWithNullKey>>; +using AggregatedDataWithNullableStringKeyTwoLevel = AggregationDataWithNullKeyTwoLevel< + TwoLevelHashMapWithSavedHash, + TwoLevelHashTableGrower<>, HashTableAllocator, HashTableWithNullKey>>; + /// Cache which can be used by aggregations method's states. Object is shared in all threads. struct AggregationStateCache { @@ -403,8 +453,10 @@ struct AggregationMethodSingleLowCardinalityColumn : public SingleColumnMethod ColumnPtr dictionary_holder; /// Cache AggregateDataPtr for current column in order to decrease the number of hash table usages. - PaddedPODArray aggregate_data; - PaddedPODArray * aggregate_data_cache; + PaddedPODArray aggregate_data_cache; + + /// If initialized column is nullable. + bool is_nullable = false; void init(ColumnRawPtrs &) { @@ -429,7 +481,8 @@ struct AggregationMethodSingleLowCardinalityColumn : public SingleColumnMethod + demangle(typeid(cached_val).name()), ErrorCodes::LOGICAL_ERROR); } - auto * dict = column->getDictionary().getNestedColumn().get(); + auto * dict = column->getDictionary().getNestedNotNullableColumn().get(); + is_nullable = column->getDictionary().nestedColumnIsNullable(); key = {dict}; bool is_shared_dict = column->isSharedDictionary(); @@ -463,8 +516,7 @@ struct AggregationMethodSingleLowCardinalityColumn : public SingleColumnMethod } AggregateDataPtr default_data = nullptr; - aggregate_data.assign(key[0]->size(), default_data); - aggregate_data_cache = &aggregate_data; + aggregate_data_cache.assign(key[0]->size(), default_data); size_of_index_type = column->getSizeOfIndexType(); positions = column->getIndexesPtr().get(); @@ -507,10 +559,18 @@ struct AggregationMethodSingleLowCardinalityColumn : public SingleColumnMethod Arena & pool) { size_t row = getIndexAt(i); - if ((*aggregate_data_cache)[row]) + + if (is_nullable && row == 0) + { + inserted = !data.hasNullKeyData(); + data.hasNullKeyData() = true; + return &data.getNullKeyData(); + } + + if (aggregate_data_cache[row]) { inserted = false; - return &(*aggregate_data_cache)[row]; + return &aggregate_data_cache[row]; } else { @@ -527,23 +587,35 @@ struct AggregationMethodSingleLowCardinalityColumn : public SingleColumnMethod if (inserted) Base::onNewKey(*it, keys_size, keys, pool); else - (*aggregate_data_cache)[row] = Base::getAggregateData(it->second); + aggregate_data_cache[row] = Base::getAggregateData(it->second); return &Base::getAggregateData(it->second); } } + ALWAYS_INLINE bool isNullAt(size_t i) + { + if (!is_nullable) + return false; + + return getIndexAt(i) == 0; + } + ALWAYS_INLINE void cacheAggregateData(size_t i, AggregateDataPtr data) { size_t row = getIndexAt(i); - (*aggregate_data_cache)[row] = data; + aggregate_data_cache[row] = data; } template ALWAYS_INLINE AggregateDataPtr * findFromRow(D & data, size_t i) { size_t row = getIndexAt(i); - if (!(*aggregate_data_cache)[row]) + + if (is_nullable && row == 0) + return data.hasNullKeyData() ? &data.getNullKeyData() : nullptr; + + if (!aggregate_data_cache[row]) { ColumnRawPtrs key_columns; Sizes key_sizes; @@ -558,9 +630,9 @@ struct AggregationMethodSingleLowCardinalityColumn : public SingleColumnMethod it = data.find(key); if (it != data.end()) - (*aggregate_data_cache)[row] = Base::getAggregateData(it->second); + aggregate_data_cache[row] = Base::getAggregateData(it->second); } - return &(*aggregate_data_cache)[row]; + return &aggregate_data_cache[row]; } }; @@ -971,17 +1043,17 @@ struct AggregatedDataVariants : private boost::noncopyable std::unique_ptr> nullable_keys256_two_level; /// Support for low cardinality. - std::unique_ptr>> low_cardinality_key8; - std::unique_ptr>> low_cardinality_key16; - std::unique_ptr>> low_cardinality_key32; - std::unique_ptr>> low_cardinality_key64; - std::unique_ptr>> low_cardinality_key_string; - std::unique_ptr>> low_cardinality_key_fixed_string; + std::unique_ptr>> low_cardinality_key8; + std::unique_ptr>> low_cardinality_key16; + std::unique_ptr>> low_cardinality_key32; + std::unique_ptr>> low_cardinality_key64; + std::unique_ptr>> low_cardinality_key_string; + std::unique_ptr>> low_cardinality_key_fixed_string; - std::unique_ptr>> low_cardinality_key32_two_level; - std::unique_ptr>> low_cardinality_key64_two_level; - std::unique_ptr>> low_cardinality_key_string_two_level; - std::unique_ptr>> low_cardinality_key_fixed_string_two_level; + std::unique_ptr>> low_cardinality_key32_two_level; + std::unique_ptr>> low_cardinality_key64_two_level; + std::unique_ptr>> low_cardinality_key_string_two_level; + std::unique_ptr>> low_cardinality_key_fixed_string_two_level; std::unique_ptr> low_cardinality_keys128; std::unique_ptr> low_cardinality_keys256; @@ -1580,6 +1652,13 @@ public: Arena * arena) const; protected: + /// Merge NULL key data from hash table `src` into `dst`. + template + void mergeDataNullKey( + Table & table_dst, + Table & table_src, + Arena * arena) const; + /// Merge data from hash table `src` into `dst`. template void mergeDataImpl( diff --git a/dbms/src/Interpreters/ExpressionActions.cpp b/dbms/src/Interpreters/ExpressionActions.cpp index a33261ef385..b7535012907 100644 --- a/dbms/src/Interpreters/ExpressionActions.cpp +++ b/dbms/src/Interpreters/ExpressionActions.cpp @@ -590,7 +590,7 @@ void ExpressionActions::checkLimits(Block & block) const { std::stringstream list_of_non_const_columns; for (size_t i = 0, size = block.columns(); i < size; ++i) - if (!block.safeGetByPosition(i).column->isColumnConst()) + if (block.safeGetByPosition(i).column && !block.safeGetByPosition(i).column->isColumnConst()) list_of_non_const_columns << "\n" << block.safeGetByPosition(i).name; throw Exception("Too many temporary non-const columns:" + list_of_non_const_columns.str() diff --git a/dbms/src/Interpreters/InterpreterAlterQuery.cpp b/dbms/src/Interpreters/InterpreterAlterQuery.cpp index ab24d7e1164..db9ec6a354c 100644 --- a/dbms/src/Interpreters/InterpreterAlterQuery.cpp +++ b/dbms/src/Interpreters/InterpreterAlterQuery.cpp @@ -1,6 +1,7 @@ #include #include #include +#include #include #include @@ -33,6 +34,12 @@ BlockIO InterpreterAlterQuery::execute() String database_name = alter.database.empty() ? context.getCurrentDatabase() : alter.database; StoragePtr table = context.getTable(database_name, table_name); + /// Add default database to table identifiers that we can encounter in e.g. default expressions, + /// mutation expression, etc. + AddDefaultDatabaseVisitor visitor(database_name); + ASTPtr command_list_ptr = alter.command_list->ptr(); + visitor.visit(command_list_ptr); + AlterCommands alter_commands; PartitionCommands partition_commands; MutationCommands mutation_commands; diff --git a/dbms/src/Interpreters/InterpreterSelectQuery.cpp b/dbms/src/Interpreters/InterpreterSelectQuery.cpp index 12cfc39c96d..a7f0c0dcc52 100644 --- a/dbms/src/Interpreters/InterpreterSelectQuery.cpp +++ b/dbms/src/Interpreters/InterpreterSelectQuery.cpp @@ -743,9 +743,9 @@ void InterpreterSelectQuery::executeFetchColumns( } /// We will create an expression to return all the requested columns, with the calculation of the required ALIAS columns. - auto required_columns_expr_list = std::make_shared(); + ASTPtr required_columns_expr_list = std::make_shared(); /// Separate expression for columns used in prewhere. - auto required_prewhere_columns_expr_list = std::make_shared(); + ASTPtr required_prewhere_columns_expr_list = std::make_shared(); for (const auto & column : required_columns) { @@ -823,8 +823,10 @@ void InterpreterSelectQuery::executeFetchColumns( } prewhere_info->prewhere_actions = std::move(new_actions); + auto source_columns = storage->getColumns().getAllPhysical(); + auto analyzed_result = SyntaxAnalyzer(context, {}).analyze(required_prewhere_columns_expr_list, source_columns); prewhere_info->alias_actions = - ExpressionAnalyzer(required_prewhere_columns_expr_list, syntax_analyzer_result, context) + ExpressionAnalyzer(required_prewhere_columns_expr_list, analyzed_result, context) .getActions(true, false); /// Add columns required by alias actions. diff --git a/dbms/src/Storages/Distributed/DistributedBlockOutputStream.cpp b/dbms/src/Storages/Distributed/DistributedBlockOutputStream.cpp index 319b4c89583..528c83167af 100644 --- a/dbms/src/Storages/Distributed/DistributedBlockOutputStream.cpp +++ b/dbms/src/Storages/Distributed/DistributedBlockOutputStream.cpp @@ -15,6 +15,7 @@ #include #include +#include #include #include #include @@ -406,9 +407,13 @@ IColumn::Selector DistributedBlockOutputStream::createSelector(const Block & sou const auto & key_column = current_block_with_sharding_key_expr.getByName(storage.getShardingKeyColumnName()); const auto & slot_to_shard = cluster->getSlotToShard(); +// If key_column.type is DataTypeLowCardinality, do shard according to its dictionaryType #define CREATE_FOR_TYPE(TYPE) \ if (typeid_cast(key_column.type.get())) \ - return createBlockSelector(*key_column.column, slot_to_shard); + return createBlockSelector(*key_column.column, slot_to_shard); \ + else if (auto * type_low_cardinality = typeid_cast(key_column.type.get())) \ + if (typeid_cast(type_low_cardinality->getDictionaryType().get())) \ + return createBlockSelector(*key_column.column->convertToFullColumnIfLowCardinality(), slot_to_shard); CREATE_FOR_TYPE(UInt8) CREATE_FOR_TYPE(UInt16) diff --git a/dbms/src/Storages/MergeTree/MergeTreeData.cpp b/dbms/src/Storages/MergeTree/MergeTreeData.cpp index 5fb3fd2d457..4a7b649a25a 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeData.cpp +++ b/dbms/src/Storages/MergeTree/MergeTreeData.cpp @@ -353,8 +353,8 @@ ASTPtr MergeTreeData::extractKeyExpressionList(const ASTPtr & node) if (expr_func && expr_func->name == "tuple") { - /// Primary key is specified in tuple. - return expr_func->children.at(0); + /// Primary key is specified in tuple, extract its arguments. + return expr_func->arguments->clone(); } else { diff --git a/dbms/src/Storages/MergeTree/ReplicatedMergeTreeBlockOutputStream.cpp b/dbms/src/Storages/MergeTree/ReplicatedMergeTreeBlockOutputStream.cpp index cdfb71b5726..831c2c36448 100644 --- a/dbms/src/Storages/MergeTree/ReplicatedMergeTreeBlockOutputStream.cpp +++ b/dbms/src/Storages/MergeTree/ReplicatedMergeTreeBlockOutputStream.cpp @@ -18,7 +18,7 @@ namespace DB namespace ErrorCodes { - extern const int TOO_LESS_LIVE_REPLICAS; + extern const int TOO_FEW_LIVE_REPLICAS; extern const int UNSATISFIED_QUORUM_FOR_PREVIOUS_WRITE; extern const int CHECKSUM_DOESNT_MATCH; extern const int UNEXPECTED_ZOOKEEPER_ERROR; @@ -76,7 +76,7 @@ void ReplicatedMergeTreeBlockOutputStream::checkQuorumPrecondition(zkutil::ZooKe if (leader_election_stat.numChildren < static_cast(quorum)) throw Exception("Number of alive replicas (" + toString(leader_election_stat.numChildren) + ") is less than requested quorum (" + toString(quorum) + ").", - ErrorCodes::TOO_LESS_LIVE_REPLICAS); + ErrorCodes::TOO_FEW_LIVE_REPLICAS); /** Is there a quorum for the last part for which a quorum is needed? * Write of all the parts with the included quorum is linearly ordered. diff --git a/dbms/src/Storages/StorageHDFS.cpp b/dbms/src/Storages/StorageHDFS.cpp new file mode 100644 index 00000000000..03c9626a582 --- /dev/null +++ b/dbms/src/Storages/StorageHDFS.cpp @@ -0,0 +1,179 @@ +#include + +#if USE_HDFS + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +namespace DB +{ +namespace ErrorCodes +{ + extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; + extern const int NOT_IMPLEMENTED; + extern const int BAD_ARGUMENTS; +} + +StorageHDFS::StorageHDFS(const String & uri_, + const std::string & table_name_, + const String & format_name_, + const ColumnsDescription & columns_, + Context &) + : IStorage(columns_), uri(uri_), format_name(format_name_), table_name(table_name_) +{ +} + +namespace +{ + class StorageHDFSBlockInputStream : public IProfilingBlockInputStream + { + public: + StorageHDFSBlockInputStream(const String & uri, + const String & format, + const String & name_, + const Block & sample_block, + const Context & context, + size_t max_block_size) + : name(name_) + { + // Assume no query and fragment in uri, todo, add sanity check + String fuzzyFileNames; + String uriPrefix = uri.substr(0, uri.find_last_of('/')); + if (uriPrefix.length() == uri.length()) + { + fuzzyFileNames = uri; + uriPrefix.clear(); + } + else + { + uriPrefix += "/"; + fuzzyFileNames = uri.substr(uriPrefix.length()); + } + + std::vector fuzzyNameList = parseRemoteDescription(fuzzyFileNames, 0, fuzzyFileNames.length(), ',' , 100/* hard coded max files */); + + BlockInputStreams inputs; + + for (auto & name: fuzzyNameList) + { + std::unique_ptr read_buf = std::make_unique(uriPrefix + name); + + inputs.emplace_back( + std::make_shared>( + FormatFactory::instance().getInput(format, *read_buf, sample_block, context, max_block_size), + std::move(read_buf))); + } + + if (inputs.size() == 0) + throw Exception("StorageHDFS inputs interpreter error", ErrorCodes::BAD_ARGUMENTS); + + if (inputs.size() == 1) + { + reader = inputs[0]; + } + else + { + reader = std::make_shared(inputs, nullptr, context.getSettingsRef().max_distributed_connections); + } + } + + String getName() const override + { + return name; + } + + Block readImpl() override + { + return reader->read(); + } + + Block getHeader() const override + { + return reader->getHeader(); + } + + void readPrefixImpl() override + { + reader->readPrefix(); + } + + void readSuffixImpl() override + { + auto explicitReader = dynamic_cast(reader.get()); + if (explicitReader) explicitReader->cancel(false); // skip Union read suffix assertion + + reader->readSuffix(); + } + + private: + String name; + BlockInputStreamPtr reader; + }; + +} + + +BlockInputStreams StorageHDFS::read( + const Names & /*column_names*/, + const SelectQueryInfo & /*query_info*/, + const Context & context, + QueryProcessingStage::Enum /*processed_stage*/, + size_t max_block_size, + unsigned /*num_streams*/) +{ + return {std::make_shared( + uri, + format_name, + getName(), + getSampleBlock(), + context, + max_block_size)}; +} + +void StorageHDFS::rename(const String & /*new_path_to_db*/, const String & /*new_database_name*/, const String & /*new_table_name*/) {} + +BlockOutputStreamPtr StorageHDFS::write(const ASTPtr & /*query*/, const Settings & /*settings*/) +{ + throw Exception("StorageHDFS write is not supported yet", ErrorCodes::NOT_IMPLEMENTED); + return {}; +} + +void registerStorageHDFS(StorageFactory & factory) +{ + factory.registerStorage("HDFS", [](const StorageFactory::Arguments & args) + { + ASTs & engine_args = args.engine_args; + + if (!(engine_args.size() == 1 || engine_args.size() == 2)) + throw Exception( + "Storage HDFS requires exactly 2 arguments: url and name of used format.", ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); + + engine_args[0] = evaluateConstantExpressionOrIdentifierAsLiteral(engine_args[0], args.local_context); + + String url = static_cast(*engine_args[0]).value.safeGet(); + + engine_args[1] = evaluateConstantExpressionOrIdentifierAsLiteral(engine_args[1], args.local_context); + + String format_name = static_cast(*engine_args[1]).value.safeGet(); + + return StorageHDFS::create(url, args.table_name, format_name, args.columns, args.context); + }); +} + +} + +#endif diff --git a/dbms/src/Storages/StorageHDFS.h b/dbms/src/Storages/StorageHDFS.h new file mode 100644 index 00000000000..44ff23c4d67 --- /dev/null +++ b/dbms/src/Storages/StorageHDFS.h @@ -0,0 +1,56 @@ +#pragma once +#include +#if USE_HDFS + +#include +#include +#include +#include + +namespace DB +{ +/** + * This class represents table engine for external hdfs files. + * Read method is supported for now. + */ +class StorageHDFS : public ext::shared_ptr_helper, public IStorage +{ +public: + String getName() const override + { + return "HDFS"; + } + + String getTableName() const override + { + return table_name; + } + + BlockInputStreams read(const Names & column_names, + const SelectQueryInfo & query_info, + const Context & context, + QueryProcessingStage::Enum processed_stage, + size_t max_block_size, + unsigned num_streams) override; + + BlockOutputStreamPtr write(const ASTPtr & query, const Settings & settings) override; + + void rename(const String & new_path_to_db, const String & new_database_name, const String & new_table_name) override; + +protected: + StorageHDFS(const String & uri_, + const String & table_name_, + const String & format_name_, + const ColumnsDescription & columns_, + Context & context_); + +private: + String uri; + String format_name; + String table_name; + + Logger * log = &Logger::get("StorageHDFS"); +}; +} + +#endif diff --git a/dbms/src/Storages/registerStorages.cpp b/dbms/src/Storages/registerStorages.cpp index ce831fbb758..4bd2d995104 100644 --- a/dbms/src/Storages/registerStorages.cpp +++ b/dbms/src/Storages/registerStorages.cpp @@ -24,6 +24,10 @@ void registerStorageJoin(StorageFactory & factory); void registerStorageView(StorageFactory & factory); void registerStorageMaterializedView(StorageFactory & factory); +#if USE_HDFS +void registerStorageHDFS(StorageFactory & factory); +#endif + #if USE_POCO_SQLODBC || USE_POCO_DATAODBC void registerStorageODBC(StorageFactory & factory); #endif @@ -60,6 +64,10 @@ void registerStorages() registerStorageView(factory); registerStorageMaterializedView(factory); + #if USE_HDFS + registerStorageHDFS(factory); + #endif + #if USE_POCO_SQLODBC || USE_POCO_DATAODBC registerStorageODBC(factory); #endif diff --git a/dbms/src/TableFunctions/ITableFunction.h b/dbms/src/TableFunctions/ITableFunction.h index a3885f16152..ddf900fa65c 100644 --- a/dbms/src/TableFunctions/ITableFunction.h +++ b/dbms/src/TableFunctions/ITableFunction.h @@ -3,7 +3,6 @@ #include #include - namespace DB { diff --git a/dbms/src/TableFunctions/TableFunctionHDFS.cpp b/dbms/src/TableFunctions/TableFunctionHDFS.cpp new file mode 100644 index 00000000000..9c09ad9313c --- /dev/null +++ b/dbms/src/TableFunctions/TableFunctionHDFS.cpp @@ -0,0 +1,25 @@ +#include + +#if USE_HDFS +#include +#include +#include + +namespace DB +{ +StoragePtr TableFunctionHDFS::getStorage( + const String & source, const String & format, const Block & sample_block, Context & global_context) const +{ + return StorageHDFS::create(source, + getName(), + format, + ColumnsDescription{sample_block.getNamesAndTypesList()}, + global_context); +} + +void registerTableFunctionHDFS(TableFunctionFactory & factory) +{ + factory.registerFunction(); +} +} +#endif diff --git a/dbms/src/TableFunctions/TableFunctionHDFS.h b/dbms/src/TableFunctions/TableFunctionHDFS.h new file mode 100644 index 00000000000..8033034deb8 --- /dev/null +++ b/dbms/src/TableFunctions/TableFunctionHDFS.h @@ -0,0 +1,32 @@ +#pragma once + +#include + +#if USE_HDFS + +#include +#include +#include + + +namespace DB +{ +/* hdfs(name_node_ip:name_node_port, format, structure) - creates a temporary storage from hdfs file + * + */ +class TableFunctionHDFS : public ITableFunctionFileLike +{ +public: + static constexpr auto name = "hdfs"; + std::string getName() const override + { + return name; + } + +private: + StoragePtr getStorage( + const String & source, const String & format, const Block & sample_block, Context & global_context) const override; +}; +} + +#endif diff --git a/dbms/src/TableFunctions/TableFunctionRemote.cpp b/dbms/src/TableFunctions/TableFunctionRemote.cpp index 1478f8960a8..b93a1638d48 100644 --- a/dbms/src/TableFunctions/TableFunctionRemote.cpp +++ b/dbms/src/TableFunctions/TableFunctionRemote.cpp @@ -11,6 +11,7 @@ #include #include +#include namespace DB @@ -22,165 +23,6 @@ namespace ErrorCodes extern const int BAD_ARGUMENTS; } - -/// The Cartesian product of two sets of rows, the result is written in place of the first argument -static void append(std::vector & to, const std::vector & what, size_t max_addresses) -{ - if (what.empty()) - return; - - if (to.empty()) - { - to = what; - return; - } - - if (what.size() * to.size() > max_addresses) - throw Exception("Table function 'remote': first argument generates too many result addresses", - ErrorCodes::BAD_ARGUMENTS); - std::vector res; - for (size_t i = 0; i < to.size(); ++i) - for (size_t j = 0; j < what.size(); ++j) - res.push_back(to[i] + what[j]); - - to.swap(res); -} - - -/// Parse number from substring -static bool parseNumber(const String & description, size_t l, size_t r, size_t & res) -{ - res = 0; - for (size_t pos = l; pos < r; pos ++) - { - if (!isNumericASCII(description[pos])) - return false; - res = res * 10 + description[pos] - '0'; - if (res > 1e15) - return false; - } - return true; -} - - - -/* Parse a string that generates shards and replicas. Separator - one of two characters | or , - * depending on whether shards or replicas are generated. - * For example: - * host1,host2,... - generates set of shards from host1, host2, ... - * host1|host2|... - generates set of replicas from host1, host2, ... - * abc{8..10}def - generates set of shards abc8def, abc9def, abc10def. - * abc{08..10}def - generates set of shards abc08def, abc09def, abc10def. - * abc{x,yy,z}def - generates set of shards abcxdef, abcyydef, abczdef. - * abc{x|yy|z} def - generates set of replicas abcxdef, abcyydef, abczdef. - * abc{1..9}de{f,g,h} - is a direct product, 27 shards. - * abc{1..9}de{0|1} - is a direct product, 9 shards, in each 2 replicas. - */ -static std::vector parseDescription(const String & description, size_t l, size_t r, char separator, size_t max_addresses) -{ - std::vector res; - std::vector cur; - - /// An empty substring means a set of an empty string - if (l >= r) - { - res.push_back(""); - return res; - } - - for (size_t i = l; i < r; ++i) - { - /// Either the numeric interval (8..10) or equivalent expression in brackets - if (description[i] == '{') - { - int cnt = 1; - int last_dot = -1; /// The rightmost pair of points, remember the index of the right of the two - size_t m; - std::vector buffer; - bool have_splitter = false; - - /// Look for the corresponding closing bracket - for (m = i + 1; m < r; ++m) - { - if (description[m] == '{') ++cnt; - if (description[m] == '}') --cnt; - if (description[m] == '.' && description[m-1] == '.') last_dot = m; - if (description[m] == separator) have_splitter = true; - if (cnt == 0) break; - } - if (cnt != 0) - throw Exception("Table function 'remote': incorrect brace sequence in first argument", - ErrorCodes::BAD_ARGUMENTS); - /// The presence of a dot - numeric interval - if (last_dot != -1) - { - size_t left, right; - if (description[last_dot - 1] != '.') - throw Exception("Table function 'remote': incorrect argument in braces (only one dot): " + description.substr(i, m - i + 1), - ErrorCodes::BAD_ARGUMENTS); - if (!parseNumber(description, i + 1, last_dot - 1, left)) - throw Exception("Table function 'remote': incorrect argument in braces (Incorrect left number): " - + description.substr(i, m - i + 1), - ErrorCodes::BAD_ARGUMENTS); - if (!parseNumber(description, last_dot + 1, m, right)) - throw Exception("Table function 'remote': incorrect argument in braces (Incorrect right number): " - + description.substr(i, m - i + 1), - ErrorCodes::BAD_ARGUMENTS); - if (left > right) - throw Exception("Table function 'remote': incorrect argument in braces (left number is greater then right): " - + description.substr(i, m - i + 1), - ErrorCodes::BAD_ARGUMENTS); - if (right - left + 1 > max_addresses) - throw Exception("Table function 'remote': first argument generates too many result addresses", - ErrorCodes::BAD_ARGUMENTS); - bool add_leading_zeroes = false; - size_t len = last_dot - 1 - (i + 1); - /// If the left and right borders have equal numbers, then you must add leading zeros. - if (last_dot - 1 - (i + 1) == m - (last_dot + 1)) - add_leading_zeroes = true; - for (size_t id = left; id <= right; ++id) - { - String cur = toString(id); - if (add_leading_zeroes) - { - while (cur.size() < len) - cur = "0" + cur; - } - buffer.push_back(cur); - } - } - else if (have_splitter) /// If there is a current delimiter inside, then generate a set of resulting rows - buffer = parseDescription(description, i + 1, m, separator, max_addresses); - else /// Otherwise just copy, spawn will occur when you call with the correct delimiter - buffer.push_back(description.substr(i, m - i + 1)); - /// Add all possible received extensions to the current set of lines - append(cur, buffer, max_addresses); - i = m; - } - else if (description[i] == separator) - { - /// If the delimiter, then add found rows - res.insert(res.end(), cur.begin(), cur.end()); - cur.clear(); - } - else - { - /// Otherwise, simply append the character to current lines - std::vector buffer; - buffer.push_back(description.substr(i, 1)); - append(cur, buffer, max_addresses); - } - } - - res.insert(res.end(), cur.begin(), cur.end()); - if (res.size() > max_addresses) - throw Exception("Table function 'remote': first argument generates too many result addresses", - ErrorCodes::BAD_ARGUMENTS); - - return res; -} - - StoragePtr TableFunctionRemote::executeImpl(const ASTPtr & ast_function, const Context & context) const { ASTs & args_func = typeid_cast(*ast_function).children; @@ -304,11 +146,11 @@ StoragePtr TableFunctionRemote::executeImpl(const ASTPtr & ast_function, const C { /// Create new cluster from the scratch size_t max_addresses = context.getSettingsRef().table_function_remote_max_addresses; - std::vector shards = parseDescription(cluster_description, 0, cluster_description.size(), ',', max_addresses); + std::vector shards = parseRemoteDescription(cluster_description, 0, cluster_description.size(), ',', max_addresses); std::vector> names; for (size_t i = 0; i < shards.size(); ++i) - names.push_back(parseDescription(shards[i], 0, shards[i].size(), '|', max_addresses)); + names.push_back(parseRemoteDescription(shards[i], 0, shards[i].size(), '|', max_addresses)); if (names.empty()) throw Exception("Shard list is empty after parsing first argument", ErrorCodes::BAD_ARGUMENTS); diff --git a/dbms/src/TableFunctions/parseRemoteDescription.cpp b/dbms/src/TableFunctions/parseRemoteDescription.cpp new file mode 100644 index 00000000000..d903fe72f03 --- /dev/null +++ b/dbms/src/TableFunctions/parseRemoteDescription.cpp @@ -0,0 +1,171 @@ +#include +#include +#include + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; + extern const int BAD_ARGUMENTS; +} + +/// The Cartesian product of two sets of rows, the result is written in place of the first argument +static void append(std::vector & to, const std::vector & what, size_t max_addresses) +{ + if (what.empty()) + return; + + if (to.empty()) + { + to = what; + return; + } + + if (what.size() * to.size() > max_addresses) + throw Exception("Table function 'remote': first argument generates too many result addresses", + ErrorCodes::BAD_ARGUMENTS); + std::vector res; + for (size_t i = 0; i < to.size(); ++i) + for (size_t j = 0; j < what.size(); ++j) + res.push_back(to[i] + what[j]); + + to.swap(res); +} + + +/// Parse number from substring +static bool parseNumber(const String & description, size_t l, size_t r, size_t & res) +{ + res = 0; + for (size_t pos = l; pos < r; pos ++) + { + if (!isNumericASCII(description[pos])) + return false; + res = res * 10 + description[pos] - '0'; + if (res > 1e15) + return false; + } + return true; +} + + + +/* Parse a string that generates shards and replicas. Separator - one of two characters | or , + * depending on whether shards or replicas are generated. + * For example: + * host1,host2,... - generates set of shards from host1, host2, ... + * host1|host2|... - generates set of replicas from host1, host2, ... + * abc{8..10}def - generates set of shards abc8def, abc9def, abc10def. + * abc{08..10}def - generates set of shards abc08def, abc09def, abc10def. + * abc{x,yy,z}def - generates set of shards abcxdef, abcyydef, abczdef. + * abc{x|yy|z} def - generates set of replicas abcxdef, abcyydef, abczdef. + * abc{1..9}de{f,g,h} - is a direct product, 27 shards. + * abc{1..9}de{0|1} - is a direct product, 9 shards, in each 2 replicas. + */ +std::vector parseRemoteDescription(const String & description, size_t l, size_t r, char separator, size_t max_addresses) +{ + std::vector res; + std::vector cur; + + /// An empty substring means a set of an empty string + if (l >= r) + { + res.push_back(""); + return res; + } + + for (size_t i = l; i < r; ++i) + { + /// Either the numeric interval (8..10) or equivalent expression in brackets + if (description[i] == '{') + { + int cnt = 1; + int last_dot = -1; /// The rightmost pair of points, remember the index of the right of the two + size_t m; + std::vector buffer; + bool have_splitter = false; + + /// Look for the corresponding closing bracket + for (m = i + 1; m < r; ++m) + { + if (description[m] == '{') ++cnt; + if (description[m] == '}') --cnt; + if (description[m] == '.' && description[m-1] == '.') last_dot = m; + if (description[m] == separator) have_splitter = true; + if (cnt == 0) break; + } + if (cnt != 0) + throw Exception("Table function 'remote': incorrect brace sequence in first argument", + ErrorCodes::BAD_ARGUMENTS); + /// The presence of a dot - numeric interval + if (last_dot != -1) + { + size_t left, right; + if (description[last_dot - 1] != '.') + throw Exception("Table function 'remote': incorrect argument in braces (only one dot): " + description.substr(i, m - i + 1), + ErrorCodes::BAD_ARGUMENTS); + if (!parseNumber(description, i + 1, last_dot - 1, left)) + throw Exception("Table function 'remote': incorrect argument in braces (Incorrect left number): " + + description.substr(i, m - i + 1), + ErrorCodes::BAD_ARGUMENTS); + if (!parseNumber(description, last_dot + 1, m, right)) + throw Exception("Table function 'remote': incorrect argument in braces (Incorrect right number): " + + description.substr(i, m - i + 1), + ErrorCodes::BAD_ARGUMENTS); + if (left > right) + throw Exception("Table function 'remote': incorrect argument in braces (left number is greater then right): " + + description.substr(i, m - i + 1), + ErrorCodes::BAD_ARGUMENTS); + if (right - left + 1 > max_addresses) + throw Exception("Table function 'remote': first argument generates too many result addresses", + ErrorCodes::BAD_ARGUMENTS); + bool add_leading_zeroes = false; + size_t len = last_dot - 1 - (i + 1); + /// If the left and right borders have equal numbers, then you must add leading zeros. + if (last_dot - 1 - (i + 1) == m - (last_dot + 1)) + add_leading_zeroes = true; + for (size_t id = left; id <= right; ++id) + { + String cur = toString(id); + if (add_leading_zeroes) + { + while (cur.size() < len) + cur = "0" + cur; + } + buffer.push_back(cur); + } + } + else if (have_splitter) /// If there is a current delimiter inside, then generate a set of resulting rows + buffer = parseRemoteDescription(description, i + 1, m, separator, max_addresses); + else /// Otherwise just copy, spawn will occur when you call with the correct delimiter + buffer.push_back(description.substr(i, m - i + 1)); + /// Add all possible received extensions to the current set of lines + append(cur, buffer, max_addresses); + i = m; + } + else if (description[i] == separator) + { + /// If the delimiter, then add found rows + res.insert(res.end(), cur.begin(), cur.end()); + cur.clear(); + } + else + { + /// Otherwise, simply append the character to current lines + std::vector buffer; + buffer.push_back(description.substr(i, 1)); + append(cur, buffer, max_addresses); + } + } + + res.insert(res.end(), cur.begin(), cur.end()); + if (res.size() > max_addresses) + throw Exception("Table function 'remote': first argument generates too many result addresses", + ErrorCodes::BAD_ARGUMENTS); + + return res; +} + +} diff --git a/dbms/src/TableFunctions/parseRemoteDescription.h b/dbms/src/TableFunctions/parseRemoteDescription.h new file mode 100644 index 00000000000..cbc73380628 --- /dev/null +++ b/dbms/src/TableFunctions/parseRemoteDescription.h @@ -0,0 +1,20 @@ +#pragma once +#include +#include +namespace DB +{ +/* Parse a string that generates shards and replicas. Separator - one of two characters | or , + * depending on whether shards or replicas are generated. + * For example: + * host1,host2,... - generates set of shards from host1, host2, ... + * host1|host2|... - generates set of replicas from host1, host2, ... + * abc{8..10}def - generates set of shards abc8def, abc9def, abc10def. + * abc{08..10}def - generates set of shards abc08def, abc09def, abc10def. + * abc{x,yy,z}def - generates set of shards abcxdef, abcyydef, abczdef. + * abc{x|yy|z} def - generates set of replicas abcxdef, abcyydef, abczdef. + * abc{1..9}de{f,g,h} - is a direct product, 27 shards. + * abc{1..9}de{0|1} - is a direct product, 9 shards, in each 2 replicas. + */ +std::vector parseRemoteDescription(const String & description, size_t l, size_t r, char separator, size_t max_addresses); + +} diff --git a/dbms/src/TableFunctions/registerTableFunctions.cpp b/dbms/src/TableFunctions/registerTableFunctions.cpp index d0afeff0b17..8974dcd53fe 100644 --- a/dbms/src/TableFunctions/registerTableFunctions.cpp +++ b/dbms/src/TableFunctions/registerTableFunctions.cpp @@ -14,6 +14,10 @@ void registerTableFunctionCatBoostPool(TableFunctionFactory & factory); void registerTableFunctionFile(TableFunctionFactory & factory); void registerTableFunctionURL(TableFunctionFactory & factory); +#if USE_HDFS +void registerTableFunctionHDFS(TableFunctionFactory & factory); +#endif + #if USE_POCO_SQLODBC || USE_POCO_DATAODBC void registerTableFunctionODBC(TableFunctionFactory & factory); #endif @@ -37,6 +41,10 @@ void registerTableFunctions() registerTableFunctionFile(factory); registerTableFunctionURL(factory); +#if USE_HDFS + registerTableFunctionHDFS(factory); +#endif + #if USE_POCO_SQLODBC || USE_POCO_DATAODBC registerTableFunctionODBC(factory); #endif diff --git a/dbms/tests/integration/README.md b/dbms/tests/integration/README.md index 8a808dd4300..a1482a7c7c1 100644 --- a/dbms/tests/integration/README.md +++ b/dbms/tests/integration/README.md @@ -33,7 +33,12 @@ set the following environment variables: ### Running with runner script -The only requirement is fresh docker. +The only requirement is fresh docker configured docker. + +Notes: +* If you want to run integration tests without `sudo` you have to add your user to docker group `sudo usermod -aG docker $USER`. [More information](https://docs.docker.com/install/linux/linux-postinstall/) about docker configuration. +* If you already had run these tests without `./runner` script you may have problems with pytest cache. It can be removed with `rm -r __pycache__ .pytest_cache/`. +* Some tests maybe require a lot of resources (CPU, RAM, etc.). Better not try large tests like `test_cluster_copier` or `test_distributed_ddl*` on your notebook. You can run tests via `./runner` script and pass pytest arguments as last arg: ``` diff --git a/dbms/tests/integration/helpers/cluster.py b/dbms/tests/integration/helpers/cluster.py index 39227369c2a..22d34d05844 100644 --- a/dbms/tests/integration/helpers/cluster.py +++ b/dbms/tests/integration/helpers/cluster.py @@ -14,11 +14,13 @@ import xml.dom.minidom from kazoo.client import KazooClient from kazoo.exceptions import KazooException import psycopg2 +import requests import docker from docker.errors import ContainerError from .client import Client, CommandRequest +from .hdfs_api import HDFSApi HELPERS_DIR = p.dirname(__file__) @@ -83,6 +85,7 @@ class ClickHouseCluster: self.with_postgres = False self.with_kafka = False self.with_odbc_drivers = False + self.with_hdfs = False self.docker_client = None self.is_up = False @@ -94,7 +97,7 @@ class ClickHouseCluster: cmd += " client" return cmd - def add_instance(self, name, config_dir=None, main_configs=[], user_configs=[], macros={}, with_zookeeper=False, with_mysql=False, with_kafka=False, clickhouse_path_dir=None, with_odbc_drivers=False, with_postgres=False, hostname=None, env_variables={}, image="yandex/clickhouse-integration-test", stay_alive=False): + def add_instance(self, name, config_dir=None, main_configs=[], user_configs=[], macros={}, with_zookeeper=False, with_mysql=False, with_kafka=False, clickhouse_path_dir=None, with_odbc_drivers=False, with_postgres=False, with_hdfs=False, hostname=None, env_variables={}, image="yandex/clickhouse-integration-test", stay_alive=False): """Add an instance to the cluster. name - the name of the instance directory and the value of the 'instance' macro in ClickHouse. @@ -148,13 +151,19 @@ class ClickHouseCluster: self.base_postgres_cmd = ['docker-compose', '--project-directory', self.base_dir, '--project-name', self.project_name, '--file', p.join(HELPERS_DIR, 'docker_compose_postgres.yml')] - if with_kafka and not self.with_kafka: self.with_kafka = True self.base_cmd.extend(['--file', p.join(HELPERS_DIR, 'docker_compose_kafka.yml')]) self.base_kafka_cmd = ['docker-compose', '--project-directory', self.base_dir, '--project-name', self.project_name, '--file', p.join(HELPERS_DIR, 'docker_compose_kafka.yml')] + if with_hdfs and not self.with_hdfs: + self.with_hdfs = True + self.base_cmd.extend(['--file', p.join(HELPERS_DIR, 'docker_compose_hdfs.yml')]) + self.base_hdfs_cmd = ['docker-compose', '--project-directory', self.base_dir, '--project-name', + self.project_name, '--file', p.join(HELPERS_DIR, 'docker_compose_hdfs.yml')] + + return instance @@ -212,6 +221,20 @@ class ClickHouseCluster: raise Exception("Cannot wait ZooKeeper container") + def wait_hdfs_to_start(self, timeout=60): + hdfs_api = HDFSApi("root") + start = time.time() + while time.time() - start < timeout: + try: + hdfs_api.write_data("/somefilewithrandomname222", "1") + print "Connected to HDFS and SafeMode disabled! " + return + except Exception as ex: + print "Can't connect to HDFS " + str(ex) + time.sleep(1) + + raise Exception("Can't wait HDFS to start") + def start(self, destroy_dirs=True): if self.is_up: return @@ -250,7 +273,11 @@ class ClickHouseCluster: subprocess_check_call(self.base_kafka_cmd + ['up', '-d', '--force-recreate']) self.kafka_docker_id = self.get_instance_docker_id('kafka1') - subprocess_check_call(self.base_cmd + ['up', '-d', '--force-recreate']) + if self.with_hdfs and self.base_hdfs_cmd: + subprocess_check_call(self.base_hdfs_cmd + ['up', '-d', '--force-recreate']) + self.wait_hdfs_to_start(120) + + subprocess_check_call(self.base_cmd + ['up', '-d', '--no-recreate']) start_deadline = time.time() + 20.0 # seconds for instance in self.instances.itervalues(): @@ -310,7 +337,6 @@ services: {name}: image: {image} hostname: {hostname} - user: '{uid}' volumes: - {binary_path}:/usr/bin/clickhouse:ro - {configs_dir}:/etc/clickhouse-server/ @@ -588,7 +614,6 @@ class ClickHouseInstance: image=self.image, name=self.name, hostname=self.hostname, - uid=os.getuid(), binary_path=self.server_bin_path, configs_dir=configs_dir, config_d_dir=config_d_dir, diff --git a/dbms/tests/integration/helpers/docker_compose_hdfs.yml b/dbms/tests/integration/helpers/docker_compose_hdfs.yml new file mode 100644 index 00000000000..ecfb0c329b3 --- /dev/null +++ b/dbms/tests/integration/helpers/docker_compose_hdfs.yml @@ -0,0 +1,9 @@ +version: '2' +services: + hdfs1: + image: sequenceiq/hadoop-docker:2.7.0 + restart: always + ports: + - 50075:50075 + - 50070:50070 + entrypoint: /etc/bootstrap.sh -d diff --git a/dbms/tests/integration/helpers/hdfs_api.py b/dbms/tests/integration/helpers/hdfs_api.py new file mode 100644 index 00000000000..989d66ee1e3 --- /dev/null +++ b/dbms/tests/integration/helpers/hdfs_api.py @@ -0,0 +1,46 @@ +#-*- coding: utf-8 -*- +import requests +import subprocess +from tempfile import NamedTemporaryFile + +class HDFSApi(object): + def __init__(self, user): + self.host = "localhost" + self.http_proxy_port = "50070" + self.http_data_port = "50075" + self.user = user + + def read_data(self, path): + response = requests.get("http://{host}:{port}/webhdfs/v1{path}?op=OPEN".format(host=self.host, port=self.http_proxy_port, path=path), allow_redirects=False) + if response.status_code != 307: + response.raise_for_status() + additional_params = '&'.join(response.headers['Location'].split('&')[1:2]) + response_data = requests.get("http://{host}:{port}/webhdfs/v1{path}?op=OPEN&{params}".format(host=self.host, port=self.http_data_port, path=path, params=additional_params)) + if response_data.status_code != 200: + response_data.raise_for_status() + + return response_data.text + + # Requests can't put file + def _curl_to_put(self, filename, path, params): + url = "http://{host}:{port}/webhdfs/v1{path}?op=CREATE&{params}".format(host=self.host, port=self.http_data_port, path=path, params=params) + cmd = "curl -s -i -X PUT -T {fname} '{url}'".format(fname=filename, url=url) + output = subprocess.check_output(cmd, shell=True) + return output + + def write_data(self, path, content): + named_file = NamedTemporaryFile() + fpath = named_file.name + named_file.write(content) + named_file.flush() + response = requests.put( + "http://{host}:{port}/webhdfs/v1{path}?op=CREATE".format(host=self.host, port=self.http_proxy_port, path=path, user=self.user), + allow_redirects=False + ) + if response.status_code != 307: + response.raise_for_status() + + additional_params = '&'.join(response.headers['Location'].split('&')[1:2] + ["user.name={}".format(self.user), "overwrite=true"]) + output = self._curl_to_put(fpath, path, additional_params) + if "201 Created" not in output: + raise Exception("Can't create file on hdfs:\n {}".format(output)) diff --git a/dbms/tests/integration/runner b/dbms/tests/integration/runner index 99d90102868..9d664065e64 100755 --- a/dbms/tests/integration/runner +++ b/dbms/tests/integration/runner @@ -2,54 +2,94 @@ #-*- coding: utf-8 -*- import subprocess import os +import getpass import argparse import logging +import signal +import subprocess -CUR_FILE_DIR_PATH = os.path.dirname(os.path.realpath(__file__)) -DEFAULT_CLICKHOUSE_ROOT = os.path.abspath(os.path.join(CUR_FILE_DIR_PATH, "../../../")) +CUR_FILE_DIR = os.path.dirname(os.path.realpath(__file__)) +DEFAULT_CLICKHOUSE_ROOT = os.path.abspath(os.path.join(CUR_FILE_DIR, "../../../")) +CURRENT_WORK_DIR = os.getcwd() +CONTAINER_NAME = "clickhouse_integration_tests" DIND_INTEGRATION_TESTS_IMAGE_NAME = "yandex/clickhouse-integration-tests-runner" +def check_args_and_update_paths(args): + if not os.path.isabs(args.binary): + args.binary = os.path.abspath(os.path.join(CURRENT_WORK_DIR, args.binary)) + + if not os.path.isabs(args.configs_dir): + args.configs_dir = os.path.abspath(os.path.join(CURRENT_WORK_DIR, args.configs_dir)) + + if not os.path.isabs(args.clickhouse_root): + args.clickhouse_root = os.path.abspath(os.path.join(CURRENT_WORK_DIR, args.clickhouse_root)) + + for path in [args.binary, args.configs_dir, args.clickhouse_root]: + if not os.path.exists(path): + raise Exception("Path {} doesn't exists".format(path)) + +def try_rm_image(): + try: + subprocess.check_call('docker rm {name}'.format(name=CONTAINER_NAME), shell=True) + except: + pass + +def docker_kill_handler_handler(signum, frame): + subprocess.check_call('docker kill $(docker ps -a -q --filter name={name} --format="{{{{.ID}}}}")'.format(name=CONTAINER_NAME), shell=True) + try_rm_image() + raise KeyboardInterrupt("Killed by Ctrl+C") + +signal.signal(signal.SIGINT, docker_kill_handler_handler) + if __name__ == "__main__": logging.basicConfig(level=logging.INFO, format='%(asctime)s %(message)s') parser = argparse.ArgumentParser(description="ClickHouse integration tests runner") + parser.add_argument( "--binary", default=os.environ.get("CLICKHOUSE_TESTS_SERVER_BIN_PATH", os.environ.get("CLICKHOUSE_TESTS_CLIENT_BIN_PATH", "/usr/bin/clickhouse")), help="Path to clickhouse binary") + parser.add_argument( "--configs-dir", - default=os.environ.get("CLICKHOUSE_TESTS_BASE_CONFIG_DIR", "/etc/clickhouse-server"), - help="Path to clickhouse configs directory" - ) + default=os.environ.get("CLICKHOUSE_TESTS_BASE_CONFIG_DIR", os.path.join(DEFAULT_CLICKHOUSE_ROOT, "dbms/programs/server")), + help="Path to clickhouse configs directory") + parser.add_argument( "--clickhouse-root", default=DEFAULT_CLICKHOUSE_ROOT, - help="Path to repository root folder" - ) + help="Path to repository root folder") + parser.add_argument( "--disable-net-host", action='store_true', default=False, - help="Don't use net host in parent docker container" - ) + help="Don't use net host in parent docker container") parser.add_argument('pytest_args', nargs='*', help="args for pytest command") args = parser.parse_args() + check_args_and_update_paths(args) + net = "" if not args.disable_net_host: net = "--net=host" - cmd = "docker run {net} --privileged --volume={bin}:/clickhouse \ - --volume={cfg}:/clickhouse-config --volume={pth}:/ClickHouse -e PYTEST_OPTS='{opts}' {img}".format( + cmd = "docker run {net} --name {name} --user={user} --privileged --volume={bin}:/clickhouse \ + --volume={cfg}:/clickhouse-config --volume={pth}:/ClickHouse -e PYTEST_OPTS='{opts}' {img} ".format( net=net, bin=args.binary, cfg=args.configs_dir, pth=args.clickhouse_root, opts=' '.join(args.pytest_args), img=DIND_INTEGRATION_TESTS_IMAGE_NAME, + user=getpass.getuser(), + name=CONTAINER_NAME, ) - subprocess.check_call(cmd, shell=True) + try: + subprocess.check_call(cmd, shell=True) + finally: + try_rm_image() diff --git a/dbms/tests/integration/test_insert_into_distributed/configs/remote_servers.xml b/dbms/tests/integration/test_insert_into_distributed/configs/remote_servers.xml index d596982ea97..320766c18ae 100644 --- a/dbms/tests/integration/test_insert_into_distributed/configs/remote_servers.xml +++ b/dbms/tests/integration/test_insert_into_distributed/configs/remote_servers.xml @@ -28,5 +28,19 @@ + + + + shard1 + 9000 + + + + + shard2 + 9000 + + + diff --git a/dbms/tests/integration/test_insert_into_distributed/test.py b/dbms/tests/integration/test_insert_into_distributed/test.py index d6700106929..7c6c45c5e07 100644 --- a/dbms/tests/integration/test_insert_into_distributed/test.py +++ b/dbms/tests/integration/test_insert_into_distributed/test.py @@ -21,6 +21,8 @@ instance_test_inserts_local_cluster = cluster.add_instance( node1 = cluster.add_instance('node1', main_configs=['configs/remote_servers.xml'], with_zookeeper=True) node2 = cluster.add_instance('node2', main_configs=['configs/remote_servers.xml'], with_zookeeper=True) +shard1 = cluster.add_instance('shard1', main_configs=['configs/remote_servers.xml'], with_zookeeper=True) +shard2 = cluster.add_instance('shard2', main_configs=['configs/remote_servers.xml'], with_zookeeper=True) @pytest.fixture(scope="module") def started_cluster(): @@ -56,6 +58,19 @@ CREATE TABLE distributed (date Date, id UInt32) ENGINE = Distributed('shard_with node2.query(''' CREATE TABLE distributed (date Date, id UInt32) ENGINE = Distributed('shard_with_local_replica', 'default', 'replicated') ''') + + shard1.query(''' +SET allow_experimental_low_cardinality_type = 1; +CREATE TABLE low_cardinality (d Date, x UInt32, s LowCardinality(String)) ENGINE = MergeTree(d, x, 8192)''') + + shard2.query(''' +SET allow_experimental_low_cardinality_type = 1; +CREATE TABLE low_cardinality (d Date, x UInt32, s LowCardinality(String)) ENGINE = MergeTree(d, x, 8192)''') + + shard1.query(''' +SET allow_experimental_low_cardinality_type = 1; +CREATE TABLE low_cardinality_all (d Date, x UInt32, s LowCardinality(String)) ENGINE = Distributed('shard_with_low_cardinality', 'default', 'low_cardinality', sipHash64(s))''') + yield cluster finally: @@ -170,3 +185,10 @@ def test_prefer_localhost_replica(started_cluster): ''' # Now query is sent to node1, as it higher in order assert TSV(node2.query("SET load_balancing='in_order'; SET prefer_localhost_replica=0;" + test_query)) == TSV(expected_from_node1) + +def test_inserts_low_cardinality(started_cluster): + instance = shard1 + instance.query("INSERT INTO low_cardinality_all (d,x,s) VALUES ('2018-11-12',1,'123')") + time.sleep(0.5) + assert instance.query("SELECT count(*) FROM low_cardinality_all").strip() == '1' + diff --git a/dbms/tests/integration/test_storage_hdfs/__init__.py b/dbms/tests/integration/test_storage_hdfs/__init__.py new file mode 100644 index 00000000000..e69de29bb2d diff --git a/dbms/tests/integration/test_storage_hdfs/configs/log_conf.xml b/dbms/tests/integration/test_storage_hdfs/configs/log_conf.xml new file mode 100644 index 00000000000..0de2745ca4c --- /dev/null +++ b/dbms/tests/integration/test_storage_hdfs/configs/log_conf.xml @@ -0,0 +1,11 @@ + + + trace + /var/log/clickhouse-server/log.log + /var/log/clickhouse-server/log.err.log + 1000M + 10 + /var/log/clickhouse-server/stderr.log + /var/log/clickhouse-server/stdout.log + + diff --git a/dbms/tests/integration/test_storage_hdfs/test.py b/dbms/tests/integration/test_storage_hdfs/test.py new file mode 100644 index 00000000000..76b35cc7bed --- /dev/null +++ b/dbms/tests/integration/test_storage_hdfs/test.py @@ -0,0 +1,47 @@ +import time +import pytest +import requests +from tempfile import NamedTemporaryFile +from helpers.hdfs_api import HDFSApi + +import os + +from helpers.cluster import ClickHouseCluster +import subprocess + + +SCRIPT_DIR = os.path.dirname(os.path.realpath(__file__)) +cluster = ClickHouseCluster(__file__) +node1 = cluster.add_instance('node1', with_hdfs=True, image='withlibsimage', config_dir="configs", main_configs=['configs/log_conf.xml']) + +@pytest.fixture(scope="module") +def started_cluster(): + try: + cluster.start() + + yield cluster + + except Exception as ex: + print(ex) + raise ex + finally: + cluster.shutdown() + +def test_read_write_storage(started_cluster): + + hdfs_api = HDFSApi("root") + hdfs_api.write_data("/simple_storage", "1\tMark\t72.53\n") + + assert hdfs_api.read_data("/simple_storage") == "1\tMark\t72.53\n" + + node1.query("create table SimpleHDFSStorage (id UInt32, name String, weight Float64) ENGINE = HDFS('hdfs://hdfs1:9000/simple_storage', 'TSV')") + assert node1.query("select * from SimpleHDFSStorage") == "1\tMark\t72.53\n" + +def test_read_write_table(started_cluster): + hdfs_api = HDFSApi("root") + data = "1\tSerialize\t555.222\n2\tData\t777.333\n" + hdfs_api.write_data("/simple_table_function", data) + + assert hdfs_api.read_data("/simple_table_function") == data + + assert node1.query("select * from hdfs('hdfs://hdfs1:9000/simple_table_function', 'TSV', 'id UInt64, text String, number Float64')") == data diff --git a/dbms/tests/queries/0_stateless/00281_compile_sizeof_packed.re b/dbms/tests/queries/0_stateless/00281_compile_sizeof_packed.re new file mode 100644 index 00000000000..e69de29bb2d diff --git a/dbms/tests/queries/0_stateless/00652_mutations_default_database.reference b/dbms/tests/queries/0_stateless/00652_mutations_default_database.reference new file mode 100644 index 00000000000..fd844f751f1 --- /dev/null +++ b/dbms/tests/queries/0_stateless/00652_mutations_default_database.reference @@ -0,0 +1,3 @@ +123 1 +234 4 +345 5 diff --git a/dbms/tests/queries/0_stateless/00652_mutations_default_database.sh b/dbms/tests/queries/0_stateless/00652_mutations_default_database.sh new file mode 100755 index 00000000000..bb142201e7c --- /dev/null +++ b/dbms/tests/queries/0_stateless/00652_mutations_default_database.sh @@ -0,0 +1,29 @@ +#!/usr/bin/env bash + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +. $CURDIR/../shell_config.sh + +. $CURDIR/mergetree_mutations.lib + +${CLICKHOUSE_CLIENT} --multiquery << EOF +DROP TABLE IF EXISTS test.mutations; +DROP TABLE IF EXISTS test.for_subquery; + +USE test; + +CREATE TABLE mutations(x UInt32, y UInt32) ENGINE MergeTree ORDER BY x; +INSERT INTO mutations VALUES (123, 1), (234, 2), (345, 3); + +CREATE TABLE for_subquery(x UInt32) ENGINE TinyLog; +INSERT INTO for_subquery VALUES (234), (345); + +ALTER TABLE mutations UPDATE y = y + 1 WHERE x IN for_subquery; +ALTER TABLE mutations UPDATE y = y + 1 WHERE x IN (SELECT x FROM for_subquery); +EOF + +wait_for_mutation "mutations" "mutation_3.txt" + +${CLICKHOUSE_CLIENT} --query="SELECT * FROM test.mutations" + +${CLICKHOUSE_CLIENT} --query="DROP TABLE test.mutations" +${CLICKHOUSE_CLIENT} --query="DROP TABLE test.for_subquery" diff --git a/dbms/tests/queries/0_stateless/00652_replicated_mutations_default_database_zookeeper.reference b/dbms/tests/queries/0_stateless/00652_replicated_mutations_default_database_zookeeper.reference new file mode 100644 index 00000000000..fd844f751f1 --- /dev/null +++ b/dbms/tests/queries/0_stateless/00652_replicated_mutations_default_database_zookeeper.reference @@ -0,0 +1,3 @@ +123 1 +234 4 +345 5 diff --git a/dbms/tests/queries/0_stateless/00652_replicated_mutations_default_database_zookeeper.sh b/dbms/tests/queries/0_stateless/00652_replicated_mutations_default_database_zookeeper.sh new file mode 100755 index 00000000000..6d425e35d33 --- /dev/null +++ b/dbms/tests/queries/0_stateless/00652_replicated_mutations_default_database_zookeeper.sh @@ -0,0 +1,29 @@ +#!/usr/bin/env bash + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +. $CURDIR/../shell_config.sh + +. $CURDIR/mergetree_mutations.lib + +${CLICKHOUSE_CLIENT} --multiquery << EOF +DROP TABLE IF EXISTS test.mutations_r1; +DROP TABLE IF EXISTS test.for_subquery; + +USE test; + +CREATE TABLE mutations_r1(x UInt32, y UInt32) ENGINE ReplicatedMergeTree('/clickhouse/tables/test/mutations', 'r1') ORDER BY x; +INSERT INTO mutations_r1 VALUES (123, 1), (234, 2), (345, 3); + +CREATE TABLE for_subquery(x UInt32) ENGINE TinyLog; +INSERT INTO for_subquery VALUES (234), (345); + +ALTER TABLE mutations_r1 UPDATE y = y + 1 WHERE x IN for_subquery; +ALTER TABLE mutations_r1 UPDATE y = y + 1 WHERE x IN (SELECT x FROM for_subquery); +EOF + +wait_for_mutation "mutations_r1" "0000000001" + +${CLICKHOUSE_CLIENT} --query="SELECT * FROM test.mutations_r1" + +${CLICKHOUSE_CLIENT} --query="DROP TABLE test.mutations_r1" +${CLICKHOUSE_CLIENT} --query="DROP TABLE test.for_subquery" diff --git a/dbms/tests/queries/0_stateless/00712_prewhere_with_alias_bug_2.reference b/dbms/tests/queries/0_stateless/00712_prewhere_with_alias_bug_2.reference new file mode 100644 index 00000000000..e69de29bb2d diff --git a/dbms/tests/queries/0_stateless/00712_prewhere_with_alias_bug_2.sql b/dbms/tests/queries/0_stateless/00712_prewhere_with_alias_bug_2.sql new file mode 100644 index 00000000000..6c0df7cb94b --- /dev/null +++ b/dbms/tests/queries/0_stateless/00712_prewhere_with_alias_bug_2.sql @@ -0,0 +1,14 @@ +drop table if exists test.table; + +CREATE TABLE test.table (a UInt32, date Date, b UInt64, c UInt64, str String, d Int8, arr Array(UInt64), arr_alias Array(UInt64) ALIAS arr) ENGINE = MergeTree(date, intHash32(c), (a, date, intHash32(c), b), 8192); + +SELECT alias2 AS alias3 +FROM test.table +ARRAY JOIN + arr_alias AS alias2, + arrayEnumerateUniq(arr_alias) AS _uniq_Event +WHERE (date = toDate('2010-10-10')) AND (a IN (2, 3)) AND (str NOT IN ('z', 'x')) AND (d != -1) +LIMIT 1; + +drop table if exists test.table; + diff --git a/dbms/tests/queries/0_stateless/00751_default_databasename_for_view.reference b/dbms/tests/queries/0_stateless/00751_default_databasename_for_view.reference index 044e2513f21..a6aa649ae54 100644 --- a/dbms/tests/queries/0_stateless/00751_default_databasename_for_view.reference +++ b/dbms/tests/queries/0_stateless/00751_default_databasename_for_view.reference @@ -1,4 +1,4 @@ -CREATE MATERIALIZED VIEW test.t_mv ( date Date, platform Enum8('a' = 0, 'b' = 1), app Enum8('a' = 0, 'b' = 1)) ENGINE = MergeTree ORDER BY date SETTINGS index_granularity = 8192 AS SELECT date, platform, app FROM test.t WHERE (app = (SELECT min(app) FROM test.u )) AND (platform = (SELECT min(platform) FROM test.v )) +CREATE MATERIALIZED VIEW test.t_mv ( date Date, platform Enum8('a' = 0, 'b' = 1), app Enum8('a' = 0, 'b' = 1)) ENGINE = MergeTree ORDER BY date SETTINGS index_granularity = 8192 AS SELECT date, platform, app FROM test.t WHERE (app = (SELECT min(app) FROM test.u )) AND (platform = (SELECT (SELECT min(platform) FROM test.v ))) 2000-01-01 a a 2000-01-02 b b 2000-01-03 a a diff --git a/dbms/tests/queries/0_stateless/00751_default_databasename_for_view.sql b/dbms/tests/queries/0_stateless/00751_default_databasename_for_view.sql index 8065138d450..e2ce7b2a094 100644 --- a/dbms/tests/queries/0_stateless/00751_default_databasename_for_view.sql +++ b/dbms/tests/queries/0_stateless/00751_default_databasename_for_view.sql @@ -20,19 +20,25 @@ INSERT INTO v VALUES ('b'); CREATE MATERIALIZED VIEW t_mv ENGINE = MergeTree ORDER BY date AS SELECT date, platform, app FROM t - WHERE app = (SELECT min(app) from u) AND platform = (SELECT min(platform) from v); + WHERE app = (SELECT min(app) from u) AND platform = (SELECT (SELECT min(platform) from v)); SHOW CREATE TABLE test.t_mv FORMAT TabSeparatedRaw; -INSERT INTO t VALUES ('2000-01-01', 'a', 'a') ('2000-01-02', 'b', 'b'); +USE default; +DETACH TABLE test.t_mv; +ATTACH TABLE test.t_mv; -INSERT INTO u VALUES ('a'); -INSERT INTO v VALUES ('a'); +INSERT INTO test.t VALUES ('2000-01-01', 'a', 'a') ('2000-01-02', 'b', 'b'); -INSERT INTO t VALUES ('2000-01-03', 'a', 'a') ('2000-01-04', 'b', 'b'); +INSERT INTO test.u VALUES ('a'); +INSERT INTO test.v VALUES ('a'); -SELECT * FROM t ORDER BY date; -SELECT * FROM t_mv ORDER BY date; +INSERT INTO test.t VALUES ('2000-01-03', 'a', 'a') ('2000-01-04', 'b', 'b'); -DROP TABLE IF EXISTS t; -DROP TABLE IF EXISTS t_mv; +SELECT * FROM test.t ORDER BY date; +SELECT * FROM test.t_mv ORDER BY date; + +DROP TABLE test.t; +DROP TABLE test.t_mv; +DROP TABLE test.u; +DROP TABLE test.v; diff --git a/dbms/tests/queries/0_stateless/00763_lock_buffer.sh b/dbms/tests/queries/0_stateless/00763_lock_buffer.sh index 4b8e2931902..fdf5996a699 100755 --- a/dbms/tests/queries/0_stateless/00763_lock_buffer.sh +++ b/dbms/tests/queries/0_stateless/00763_lock_buffer.sh @@ -17,7 +17,7 @@ function thread1() function thread2() { - seq 1 1000 | sed -r -e 's/.+/SELECT count() FROM test.buffer;/' | ${CLICKHOUSE_CLIENT} --multiquery --server_logs_file='/dev/null' --ignore-error 2>&1 | grep -vP '^0$|^10$|^Received exception|^Code: 60' + seq 1 1000 | sed -r -e 's/.+/SELECT count() FROM test.buffer;/' | ${CLICKHOUSE_CLIENT} --multiquery --server_logs_file='/dev/null' --ignore-error 2>&1 | grep -vP '^0$|^10$|^Received exception|^Code: 60|^Code: 218' } thread1 & diff --git a/dbms/tests/queries/0_stateless/00765_sql_compatibility_aliases.reference b/dbms/tests/queries/0_stateless/00765_sql_compatibility_aliases.reference new file mode 100644 index 00000000000..f774720f9ff --- /dev/null +++ b/dbms/tests/queries/0_stateless/00765_sql_compatibility_aliases.reference @@ -0,0 +1,10 @@ +foo +FOO +foo +FOO +baz +2 +fo +oo +o +1 diff --git a/dbms/tests/queries/0_stateless/00765_sql_compatibility_aliases.sql b/dbms/tests/queries/0_stateless/00765_sql_compatibility_aliases.sql new file mode 100644 index 00000000000..c7ce18d2b45 --- /dev/null +++ b/dbms/tests/queries/0_stateless/00765_sql_compatibility_aliases.sql @@ -0,0 +1,12 @@ +SET send_logs_level = 'none'; + +select lcase('FOO'); +select ucase('foo'); +select LOWER('Foo'); +select UPPER('Foo'); +select REPLACE('bar', 'r', 'z'); +select Locate('foo', 'o'); +select SUBSTRING('foo', 1, 2); +select Substr('foo', 2); +select mid('foo', 3); +select IF(3>2, 1, 0); diff --git a/dbms/tests/queries/0_stateless/00800_low_cardinality_distributed_insert.reference b/dbms/tests/queries/0_stateless/00800_low_cardinality_distributed_insert.reference new file mode 100644 index 00000000000..190a18037c6 --- /dev/null +++ b/dbms/tests/queries/0_stateless/00800_low_cardinality_distributed_insert.reference @@ -0,0 +1 @@ +123 diff --git a/dbms/tests/queries/0_stateless/00800_low_cardinality_distributed_insert.sql b/dbms/tests/queries/0_stateless/00800_low_cardinality_distributed_insert.sql new file mode 100644 index 00000000000..1c20058bc9d --- /dev/null +++ b/dbms/tests/queries/0_stateless/00800_low_cardinality_distributed_insert.sql @@ -0,0 +1,12 @@ +SET allow_experimental_low_cardinality_type = 1; +DROP TABLE IF EXISTS test.low_cardinality; +DROP TABLE IF EXISTS test.low_cardinality_all; + +CREATE TABLE test.low_cardinality (d Date, x UInt32, s LowCardinality(String)) ENGINE = MergeTree(d, x, 8192); +CREATE TABLE test.low_cardinality_all (d Date, x UInt32, s LowCardinality(String)) ENGINE = Distributed(test_shard_localhost, test, low_cardinality, sipHash64(s)); + +INSERT INTO test.low_cardinality_all (d,x,s) VALUES ('2018-11-12',1,'123'); +SELECT s FROM test.low_cardinality_all; + +DROP TABLE IF EXISTS test.low_cardinality; +DROP TABLE IF EXISTS test.low_cardinality_all; diff --git a/docker/server/Dockerfile b/docker/server/Dockerfile index b448e2a4c3a..f52eb61799e 100644 --- a/docker/server/Dockerfile +++ b/docker/server/Dockerfile @@ -24,6 +24,8 @@ RUN apt-get update \ /tmp/* \ && apt-get clean +RUN mkdir /docker-entrypoint-initdb.d + COPY docker_related_config.xml /etc/clickhouse-server/config.d/ COPY entrypoint.sh /entrypoint.sh ADD https://github.com/tianon/gosu/releases/download/1.10/gosu-amd64 /bin/gosu diff --git a/docker/server/README.md b/docker/server/README.md index dce230cd43d..ffcc4bc37a4 100644 --- a/docker/server/README.md +++ b/docker/server/README.md @@ -33,6 +33,22 @@ ClickHouse configuration represented with a file "config.xml" ([documentation](h $ docker run -d --name some-clickhouse-server --ulimit nofile=262144:262144 -v /path/to/your/config.xml:/etc/clickhouse-server/config.xml yandex/clickhouse-server ``` +## How to extend this image + +If you would like to do additional initialization in an image derived from this one, add one or more `*.sql`, `*.sql.gz`, or `*.sh` scripts under `/docker-entrypoint-initdb.d`. After the entrypoint calls `initdb` it will run any `*.sql` files, run any executable `*.sh` scripts, and source any non-executable `*.sh` scripts found in that directory to do further initialization before starting the service. + +For example, to add an additional user and database, add the following to `/docker-entrypoint-initdb.d/init-db.sh`: + +```bash +#!/bin/bash +set -e + +clickhouse client -n <<-EOSQL + CREATE DATABASE docker; + CREATE TABLE docker.docker (x Int32) ENGINE = Log; +EOSQL +``` + ## License View [license information](https://github.com/yandex/ClickHouse/blob/master/LICENSE) for the software contained in this image. diff --git a/docker/server/entrypoint.sh b/docker/server/entrypoint.sh index 535fc4d2f37..1cd3a799c15 100644 --- a/docker/server/entrypoint.sh +++ b/docker/server/entrypoint.sh @@ -30,6 +30,36 @@ chown -R $USER:$GROUP \ "$TMP_DIR" \ "$USER_PATH" +if [ -n "$(ls /docker-entrypoint-initdb.d/)" ]; then + gosu clickhouse /usr/bin/clickhouse-server --config-file=$CLICKHOUSE_CONFIG & + pid="$!" + sleep 1 + + clickhouseclient=( clickhouse client --multiquery ) + echo + for f in /docker-entrypoint-initdb.d/*; do + case "$f" in + *.sh) + if [ -x "$f" ]; then + echo "$0: running $f" + "$f" + else + echo "$0: sourcing $f" + . "$f" + fi + ;; + *.sql) echo "$0: running $f"; cat "$f" | "${clickhouseclient[@]}" ; echo ;; + *.sql.gz) echo "$0: running $f"; gunzip -c "$f" | "${clickhouseclient[@]}"; echo ;; + *) echo "$0: ignoring $f" ;; + esac + echo + done + + if ! kill -s TERM "$pid" || ! wait "$pid"; then + echo >&2 'ClickHouse init process failed.' + exit 1 + fi +fi # if no args passed to `docker run` or first argument start with `--`, then the user is passing clickhouse-server arguments if [[ $# -lt 1 ]] || [[ "$1" == "--"* ]]; then diff --git a/docs/en/getting_started/index.md b/docs/en/getting_started/index.md index f78cb5dc03a..77c626152e4 100644 --- a/docs/en/getting_started/index.md +++ b/docs/en/getting_started/index.md @@ -39,6 +39,7 @@ You can also download and install packages manually from here: are generated from official `deb` packages by Yandex and have byte-identical binaries. * Packages from are built by independent company Altinity, but are used widely without any complaints. * Or you can use Docker (see below). diff --git a/docs/en/interfaces/third-party/gui.md b/docs/en/interfaces/third-party/gui.md index 855e9fffc7a..a424741efce 100644 --- a/docs/en/interfaces/third-party/gui.md +++ b/docs/en/interfaces/third-party/gui.md @@ -1,10 +1,12 @@ # Visual Interfaces from Third-party Developers -## Tabix +## Open-Source + +### Tabix Web interface for ClickHouse in the [Tabix](https://github.com/tabixio/tabix) project. -Main features: +Features: - Works with ClickHouse directly from the browser, without the need to install additional software. - Query editor with syntax highlighting. @@ -14,11 +16,11 @@ Main features: [Tabix documentation](https://tabix.io/doc/). -## HouseOps +### HouseOps [HouseOps](https://github.com/HouseOps/HouseOps) is a UI/IDE for OSX, Linux and Windows. -Main features: +Features: - Query builder with syntax highlighting. View the response in a table or JSON view. - Export query results as CSV or JSON. @@ -36,25 +38,27 @@ The following features are planned for development: - Cluster management. - Monitoring replicated and Kafka tables. -## DBeaver +## Commercial + +### DBeaver [DBeaver](https://dbeaver.io/) - universal desktop database client with ClickHouse support. -Key features: +Features: - Query development with syntax highlight. - Table preview. - Autocompletion. -## DataGrip +### DataGrip -[DataGrip](https://www.jetbrains.com/datagrip/) - Database IDE from JetBrains with dedicated support for ClickHouse. The same is embedded into other IntelliJ-based tools: PyCharm, IntelliJIDEA, GoLand, PhpStorm etc. +[DataGrip](https://www.jetbrains.com/datagrip/) is a database IDE from JetBrains with dedicated support for ClickHouse. It is also embedded into other IntelliJ-based tools: PyCharm, IntelliJ IDEA, GoLand, PhpStorm and others. Features: - Very fast code completion. -- Clickhouse synthax highlighting. -- Specific Clickhouse features support in SQL, i.e. nested columns, table engines. +- ClickHouse syntax highlighting. +- Support for features specific to ClickHouse, for example nested columns, table engines. - Data Editor. - Refactorings. - Search and Navigation. diff --git a/docs/en/interfaces/third-party/integrations.md b/docs/en/interfaces/third-party/integrations.md index ece64d9dcb3..a55d69d8bc2 100644 --- a/docs/en/interfaces/third-party/integrations.md +++ b/docs/en/interfaces/third-party/integrations.md @@ -3,6 +3,11 @@ !!! warning "Disclaimer" Yandex does **not** maintain the libraries listed below and haven't done any extensive testing to ensure their quality. +- Relational database management systems + - [MySQL](https://www.mysql.com) + - [ProxySQL](https://github.com/sysown/proxysql/wiki/ClickHouse-Support) + - [PostgreSQL](https://www.postgresql.org) + - [infi.clickhouse_fdw](https://github.com/Infinidat/infi.clickhouse_fdw) (uses [infi.clickhouse_orm](https://github.com/Infinidat/infi.clickhouse_orm)) - Python - [SQLAlchemy](https://www.sqlalchemy.org) - [sqlalchemy-clickhouse](https://github.com/cloudflare/sqlalchemy-clickhouse) (uses [infi.clickhouse_orm](https://github.com/Infinidat/infi.clickhouse_orm)) @@ -22,4 +27,4 @@ - [clickhouse_ecto](https://github.com/appodeal/clickhouse_ecto) -[Original article](https://clickhouse.yandex/docs/en/interfaces/third-party/integrations/) \ No newline at end of file +[Original article](https://clickhouse.yandex/docs/en/interfaces/third-party/integrations/) diff --git a/docs/en/introduction/distinctive_features.md b/docs/en/introduction/distinctive_features.md index 3354a75c54b..1dbc710659d 100644 --- a/docs/en/introduction/distinctive_features.md +++ b/docs/en/introduction/distinctive_features.md @@ -14,7 +14,7 @@ Some column-oriented DBMSs (InfiniDB CE and MonetDB) do not use data compression ## Disk Storage of Data -Mving a data physically sorted by primary key makes it possible to extract data for it's specific values or value ranges with low latency, less than few dozen milliseconds.any column-oriented DBMSs (such as SAP HANA and Google PowerDrill) can only work in RAM. This approach encourages the allocation of a larger hardware budget than is actually necessary for real-time analysis. ClickHouse is designed to work on regular hard drives, which means the cost per GB of data storage is low, but SSD and additional RAM are also fully used if available. +Keeping data physically sorted by primary key makes it possible to extract data for it's specific values or value ranges with low latency, less than few dozen milliseconds. Some column-oriented DBMSs (such as SAP HANA and Google PowerDrill) can only work in RAM. This approach encourages the allocation of a larger hardware budget than is actually necessary for real-time analysis. ClickHouse is designed to work on regular hard drives, which means the cost per GB of data storage is low, but SSD and additional RAM are also fully used if available. ## Parallel Processing on Multiple Cores @@ -50,7 +50,7 @@ Low latency means that queries can be processed without delay and without trying ## Support for Approximated Calculations ClickHouse provides various ways to trade accuracy for performance: - + 1. Aggregate functions for approximated calculation of the number of distinct values, medians, and quantiles. 2. Running a query based on a part (sample) of data and getting an approximated result. In this case, proportionally less data is retrieved from the disk. 3. Running an aggregation for a limited number of random keys, instead of for all keys. Under certain conditions for key distribution in the data, this provides a reasonably accurate result while using fewer resources. diff --git a/docs/en/operations/table_engines/mergetree.md b/docs/en/operations/table_engines/mergetree.md index 8fc159ac269..2f2b0253c49 100644 --- a/docs/en/operations/table_engines/mergetree.md +++ b/docs/en/operations/table_engines/mergetree.md @@ -51,18 +51,23 @@ For a description of request parameters, see [request description](../../query_l - `ENGINE` - Name and parameters of the engine. `ENGINE = MergeTree()`. `MergeTree` engine does not have parameters. -- `ORDER BY` — Primary key (or sorting key if the separate `PRIMARY KEY` clause is present). - - A tuple of columns or arbitrary expressions. Example: `ORDER BY (CounterID, EventDate)`. -If a sampling expression is used, the primary key must contain it. Example: `ORDER BY (CounterID, EventDate, intHash32(UserID))`. - -- `PRIMARY KEY` - Primary key if it differs from the [sorting key](mergetree.md#table_engines-mergetree-sorting_key) (the sorting key in this case is specified by the `ORDER BY` clause). - - `PARTITION BY` — The [partitioning key](custom_partitioning_key.md#table_engines-custom_partitioning_key). For partitioning by month, use the `toYYYYMM(date_column)` expression, where `date_column` is a column with a date of the type [Date](../../data_types/date.md#data_type-date). The partition names here have the `"YYYYMM"` format. -- `SAMPLE BY` — An expression for sampling. Example: `intHash32(UserID))`. +- `ORDER BY` — The sorting key. + + A tuple of columns or arbitrary expressions. Example: `ORDER BY (CounterID, EventDate)`. + +- `PRIMARY KEY` - The primary key if it [differs from the sorting key](mergetree.md#table_engines-mergetree-sorting_key). + + By default the primary key is the same as the sorting key (which is specified by the `ORDER BY` clause). + Thus in most cases it is unnecessary to specify a separate `PRIMARY KEY` clause. + +- `SAMPLE BY` — An expression for sampling. + + If a sampling expression is used, the primary key must contain it. Example: + `SAMPLE BY intHash32(UserID) ORDER BY (CounterID, EventDate, intHash32(UserID))`. - `SETTINGS` — Additional parameters that control the behavior of the `MergeTree`: - `index_granularity` — The granularity of an index. The number of data rows between the "marks" of an index. By default, 8192. @@ -164,17 +169,17 @@ The number of columns in the primary key is not explicitly limited. Depending on - Provide additional logic when data parts merging in the [CollapsingMergeTree](collapsingmergetree.md#table_engine-collapsingmergetree) and [SummingMergeTree](summingmergetree.md#table_engine-summingmergetree) engines. - If you need this, it makes sense to specify the *sorting key* that is distinct from the primary key. + In this case it makes sense to specify the *sorting key* that is different from the primary key. A long primary key will negatively affect the insert performance and memory consumption, but extra columns in the primary key do not affect ClickHouse performance during `SELECT` queries. -### Choosing the Sorting Key that is distinct from the Primary Key +### Choosing the Primary Key that differs from the Sorting Key -It is possible to specify the sorting key (the expression for sorting the rows in data parts) that is distinct -from the primary key (the expression, values of which are written into the index file for each mark). In this -case primary key expression tuple must be a prefix of the sorting key expression tuple. +It is possible to specify the primary key (the expression, values of which are written into the index file +for each mark) that is different from the sorting key (the expression for sorting the rows in data parts). +In this case the primary key expression tuple must be a prefix of the sorting key expression tuple. This feature is helpful when using the [SummingMergeTree](summingmergetree.md) and [AggregatingMergeTree](aggregatingmergetree.md) table engines. In a common case when using these engines the diff --git a/docs/en/operations/table_engines/null.md b/docs/en/operations/table_engines/null.md index 58d3552d19d..68a2abdac81 100644 --- a/docs/en/operations/table_engines/null.md +++ b/docs/en/operations/table_engines/null.md @@ -1,3 +1,5 @@ + + # Null When writing to a Null table, data is ignored. When reading from a Null table, the response is empty. diff --git a/docs/en/operations/table_engines/view.md b/docs/en/operations/table_engines/view.md index c74eab262e4..204c0d9a8de 100644 --- a/docs/en/operations/table_engines/view.md +++ b/docs/en/operations/table_engines/view.md @@ -1,3 +1,5 @@ + + # View Used for implementing views (for more information, see the `CREATE VIEW query`). It does not store data, but only stores the specified `SELECT` query. When reading from a table, it runs this query (and deletes all unnecessary columns from the query). diff --git a/docs/en/query_language/alter.md b/docs/en/query_language/alter.md index 5e39aeef181..cdb46b9a15d 100644 --- a/docs/en/query_language/alter.md +++ b/docs/en/query_language/alter.md @@ -54,8 +54,6 @@ There are several processing stages: Only the first stage takes time. If there is a failure at this stage, the data is not changed. If there is a failure during one of the successive stages, data can be restored manually. The exception is if the old files were deleted from the file system but the data for the new files did not get written to the disk and was lost. -There is no support for changing the column type in arrays and nested data structures. - The `ALTER` query lets you create and delete separate elements (columns) in nested data structures, but not whole nested data structures. To add a nested data structure, you can add columns with a name like `name.nested_name` and the type `Array(T)`. A nested data structure is equivalent to multiple array columns with a name that has the same prefix before the dot. There is no support for deleting columns in the primary key or the sampling key (columns that are in the `ENGINE` expression). Changing the type for columns that are included in the primary key is only possible if this change does not cause the data to be modified (for example, it is allowed to add values to an Enum or change a type with `DateTime` to `UInt32`). @@ -78,17 +76,19 @@ The following command is supported: MODIFY ORDER BY new_expression ``` -It only works for tables in the `MergeTree` family (including replicated tables). The command changes the +It only works for tables in the [`MergeTree`](../operations/table_engines/mergetree.md) family (including +[replicated](../operations/table_engines/replication.md) tables). The command changes the [sorting key](../operations/table_engines/mergetree.md#table_engines-mergetree-sorting_key) of the table to `new_expression` (an expression or a tuple of expressions). Primary key remains the same. The command is lightweight in a sense that it only changes metadata. To keep the property that data part -rows are sorted by the sorting key expression you cannot add expressions containing existing columns +rows are ordered by the sorting key expression you cannot add expressions containing existing columns to the sorting key (only columns added by the `ADD COLUMN` command in the same `ALTER` query). ### Manipulations With Partitions and Parts -It only works for tables in the `MergeTree` family (including replicated tables). The following operations +It only works for tables in the [`MergeTree`](../operations/table_engines/mergetree.md) family (including +[replicated](../operations/table_engines/replication.md) tables). The following operations are available: - `DETACH PARTITION` – Move a partition to the 'detached' directory and forget it. diff --git a/docs/en/query_language/functions/in_functions.md b/docs/en/query_language/functions/in_functions.md index b9295cac7d1..3e267e55dd7 100644 --- a/docs/en/query_language/functions/in_functions.md +++ b/docs/en/query_language/functions/in_functions.md @@ -2,7 +2,7 @@ ## in, notIn, globalIn, globalNotIn -See the section "IN operators". +See the section [IN operators](../select.md/#query_language-in_operators). ## tuple(x, y, ...), operator (x, y, ...) diff --git a/docs/en/query_language/misc.md b/docs/en/query_language/misc.md index e81deca4b49..16b2b7a007c 100644 --- a/docs/en/query_language/misc.md +++ b/docs/en/query_language/misc.md @@ -16,6 +16,29 @@ ATTACH TABLE [IF NOT EXISTS] [db.]name [ON CLUSTER cluster] This query is used when starting the server. The server stores table metadata as files with `ATTACH` queries, which it simply runs at launch (with the exception of system tables, which are explicitly created on the server). +## DESCRIBE TABLE + +``` sql +DESC|DESCRIBE TABLE [db.]table [INTO OUTFILE filename] [FORMAT format] +``` + +Returns two `String`-type columns: `name` and `type`, which indicate the names and types of columns in the specified table. + +Nested data structures are output in "expanded" format. Each column is shown separately, with the name after a dot. + +## DETACH + +Deletes information about the 'name' table from the server. The server stops knowing about the table's existence. + +``` sql +DETACH TABLE [IF EXISTS] [db.]name [ON CLUSTER cluster] +``` + +This does not delete the table's data or metadata. On the next server launch, the server will read the metadata and find out about the table again. +Similarly, a "detached" table can be re-attached using the `ATTACH` query (with the exception of system tables, which do not have metadata stored for them). + +There is no `DETACH DATABASE` query. + ## DROP This query has two types: `DROP DATABASE` and `DROP TABLE`. @@ -34,105 +57,6 @@ DROP [TEMPORARY] TABLE [IF EXISTS] [db.]name [ON CLUSTER cluster] Deletes the table. If `IF EXISTS` is specified, it doesn't return an error if the table doesn't exist or the database doesn't exist. -## DETACH - -Deletes information about the 'name' table from the server. The server stops knowing about the table's existence. - -``` sql -DETACH TABLE [IF EXISTS] [db.]name [ON CLUSTER cluster] -``` - -This does not delete the table's data or metadata. On the next server launch, the server will read the metadata and find out about the table again. -Similarly, a "detached" table can be re-attached using the `ATTACH` query (with the exception of system tables, which do not have metadata stored for them). - -There is no `DETACH DATABASE` query. - -## RENAME - -Renames one or more tables. - -``` sql -RENAME TABLE [db11.]name11 TO [db12.]name12, [db21.]name21 TO [db22.]name22, ... [ON CLUSTER cluster] -``` - -All tables are renamed under global locking. Renaming tables is a light operation. If you indicated another database after TO, the table will be moved to this database. However, the directories with databases must reside in the same file system (otherwise, an error is returned). - -## SHOW DATABASES - -``` sql -SHOW DATABASES [INTO OUTFILE filename] [FORMAT format] -``` - -Prints a list of all databases. -This query is identical to `SELECT name FROM system.databases [INTO OUTFILE filename] [FORMAT format]`. - -See also the section "Formats". - -## SHOW TABLES - -``` sql -SHOW [TEMPORARY] TABLES [FROM db] [LIKE 'pattern'] [INTO OUTFILE filename] [FORMAT format] -``` - -Displays a list of tables - -- tables from the current database, or from the 'db' database if "FROM db" is specified. -- all tables, or tables whose name matches the pattern, if "LIKE 'pattern'" is specified. - -This query is identical to: `SELECT name FROM system.tables WHERE database = 'db' [AND name LIKE 'pattern'] [INTO OUTFILE filename] [FORMAT format]`. - -See also the section "LIKE operator". - -## SHOW PROCESSLIST - -``` sql -SHOW PROCESSLIST [INTO OUTFILE filename] [FORMAT format] -``` - -Outputs a list of queries currently being processed, other than `SHOW PROCESSLIST` queries. - -Prints a table containing the columns: - -**user** – The user who made the query. Keep in mind that for distributed processing, queries are sent to remote servers under the 'default' user. SHOW PROCESSLIST shows the username for a specific query, not for a query that this query initiated. - -**address** – The name of the host that the query was sent from. For distributed processing, on remote servers, this is the name of the query requestor host. To track where a distributed query was originally made from, look at SHOW PROCESSLIST on the query requestor server. - -**elapsed** – The execution time, in seconds. Queries are output in order of decreasing execution time. - -**rows_read**, **bytes_read** – How many rows and bytes of uncompressed data were read when processing the query. For distributed processing, data is totaled from all the remote servers. This is the data used for restrictions and quotas. - -**memory_usage** – Current RAM usage in bytes. See the setting 'max_memory_usage'. - -**query** – The query itself. In INSERT queries, the data for insertion is not output. - -**query_id** – The query identifier. Non-empty only if it was explicitly defined by the user. For distributed processing, the query ID is not passed to remote servers. - -This query is identical to: `SELECT * FROM system.processes [INTO OUTFILE filename] [FORMAT format]`. - -Tip (execute in the console): - -```bash -watch -n1 "clickhouse-client --query='SHOW PROCESSLIST'" -``` - -## SHOW CREATE TABLE - -``` sql -SHOW CREATE [TEMPORARY] TABLE [db.]table [INTO OUTFILE filename] [FORMAT format] -``` - -Returns a single `String`-type 'statement' column, which contains a single value – the `CREATE` query used for creating the specified table. - -## DESCRIBE TABLE - -``` sql -DESC|DESCRIBE TABLE [db.]table [INTO OUTFILE filename] [FORMAT format] -``` - -Returns two `String`-type columns: `name` and `type`, which indicate the names and types of columns in the specified table. - -Nested data structures are output in "expanded" format. Each column is shown separately, with the name after a dot. - ## EXISTS ``` sql @@ -141,43 +65,6 @@ EXISTS [TEMPORARY] TABLE [db.]name [INTO OUTFILE filename] [FORMAT format] Returns a single `UInt8`-type column, which contains the single value `0` if the table or database doesn't exist, or `1` if the table exists in the specified database. -## USE - -``` sql -USE db -``` - -Lets you set the current database for the session. -The current database is used for searching for tables if the database is not explicitly defined in the query with a dot before the table name. -This query can't be made when using the HTTP protocol, since there is no concept of a session. - -## SET - -``` sql -SET param = value -``` - -Allows you to set `param` to `value`. You can also make all the settings from the specified settings profile in a single query. To do this, specify 'profile' as the setting name. For more information, see the section "Settings". -The setting is made for the session, or for the server (globally) if `GLOBAL` is specified. -When making a global setting, the setting is not applied to sessions already running, including the current session. It will only be used for new sessions. - -When the server is restarted, global settings made using `SET` are lost. -To make settings that persist after a server restart, you can only use the server's config file. - -## OPTIMIZE - -``` sql -OPTIMIZE TABLE [db.]name [ON CLUSTER cluster] [PARTITION partition] [FINAL] -``` - -Asks the table engine to do something for optimization. -Supported only by `*MergeTree` engines, in which this query initializes a non-scheduled merge of data parts. -If you specify a `PARTITION`, only the specified partition will be optimized. -If you specify `FINAL`, optimization will be performed even when all the data is already in one part. - -!!! warning - OPTIMIZE can't fix the "Too many parts" error. - ## KILL QUERY ``` sql @@ -214,3 +101,126 @@ The response contains the `kill_status` column, which can take the following val A test query (`TEST`) only checks the user's rights and displays a list of queries to stop. [Original article](https://clickhouse.yandex/docs/en/query_language/misc/) + +## OPTIMIZE + +``` sql +OPTIMIZE TABLE [db.]name [ON CLUSTER cluster] [PARTITION partition] [FINAL] +``` + +Asks the table engine to do something for optimization. +Supported only by `*MergeTree` engines, in which this query initializes a non-scheduled merge of data parts. +If you specify a `PARTITION`, only the specified partition will be optimized. +If you specify `FINAL`, optimization will be performed even when all the data is already in one part. + +!!! warning + OPTIMIZE can't fix the "Too many parts" error. + +## RENAME + +Renames one or more tables. + +``` sql +RENAME TABLE [db11.]name11 TO [db12.]name12, [db21.]name21 TO [db22.]name22, ... [ON CLUSTER cluster] +``` + +All tables are renamed under global locking. Renaming tables is a light operation. If you indicated another database after TO, the table will be moved to this database. However, the directories with databases must reside in the same file system (otherwise, an error is returned). + +## SET + +``` sql +SET param = value +``` + +Allows you to set `param` to `value`. You can also make all the settings from the specified settings profile in a single query. To do this, specify 'profile' as the setting name. For more information, see the section "Settings". +The setting is made for the session, or for the server (globally) if `GLOBAL` is specified. +When making a global setting, the setting is not applied to sessions already running, including the current session. It will only be used for new sessions. + +When the server is restarted, global settings made using `SET` are lost. +To make settings that persist after a server restart, you can only use the server's config file. + +## SHOW CREATE TABLE + +``` sql +SHOW CREATE [TEMPORARY] TABLE [db.]table [INTO OUTFILE filename] [FORMAT format] +``` + +Returns a single `String`-type 'statement' column, which contains a single value – the `CREATE` query used for creating the specified table. + +## SHOW DATABASES + +``` sql +SHOW DATABASES [INTO OUTFILE filename] [FORMAT format] +``` + +Prints a list of all databases. +This query is identical to `SELECT name FROM system.databases [INTO OUTFILE filename] [FORMAT format]`. + +See also the section "Formats". + +## SHOW PROCESSLIST + +``` sql +SHOW PROCESSLIST [INTO OUTFILE filename] [FORMAT format] +``` + +Outputs a list of queries currently being processed, other than `SHOW PROCESSLIST` queries. + +Prints a table containing the columns: + +**user** – The user who made the query. Keep in mind that for distributed processing, queries are sent to remote servers under the 'default' user. SHOW PROCESSLIST shows the username for a specific query, not for a query that this query initiated. + +**address** – The name of the host that the query was sent from. For distributed processing, on remote servers, this is the name of the query requestor host. To track where a distributed query was originally made from, look at SHOW PROCESSLIST on the query requestor server. + +**elapsed** – The execution time, in seconds. Queries are output in order of decreasing execution time. + +**rows_read**, **bytes_read** – How many rows and bytes of uncompressed data were read when processing the query. For distributed processing, data is totaled from all the remote servers. This is the data used for restrictions and quotas. + +**memory_usage** – Current RAM usage in bytes. See the setting 'max_memory_usage'. + +**query** – The query itself. In INSERT queries, the data for insertion is not output. + +**query_id** – The query identifier. Non-empty only if it was explicitly defined by the user. For distributed processing, the query ID is not passed to remote servers. + +This query is identical to: `SELECT * FROM system.processes [INTO OUTFILE filename] [FORMAT format]`. + +Tip (execute in the console): + +```bash +watch -n1 "clickhouse-client --query='SHOW PROCESSLIST'" +``` + +## SHOW TABLES + +``` sql +SHOW [TEMPORARY] TABLES [FROM db] [LIKE 'pattern'] [INTO OUTFILE filename] [FORMAT format] +``` + +Displays a list of tables + +- tables from the current database, or from the 'db' database if "FROM db" is specified. +- all tables, or tables whose name matches the pattern, if "LIKE 'pattern'" is specified. + +This query is identical to: `SELECT name FROM system.tables WHERE database = 'db' [AND name LIKE 'pattern'] [INTO OUTFILE filename] [FORMAT format]`. + +See also the section "LIKE operator". + +## TRUNCATE + +```sql +TRUNCATE TABLE [IF EXISTS] [db.]name [ON CLUSTER cluster] +``` + +Removes all data from a table. When the clause `IF EXISTS` is omitted, the query returns an error if the table does not exist. + +The `TRUNCATE` query is not supported for [View](../operations/table_engines/view.md#table_engines-view), [File](../operations/table_engines/file.md#table_engines-file), [URL](../operations/table_engines/url.md#table_engines-url) and [Null](../operations/table_engines/null.md#table_engines-null) table engines. + +## USE + +``` sql +USE db +``` + +Lets you set the current database for the session. +The current database is used for searching for tables if the database is not explicitly defined in the query with a dot before the table name. +This query can't be made when using the HTTP protocol, since there is no concept of a session. diff --git a/docs/en/query_language/operators.md b/docs/en/query_language/operators.md index 3f8e0a3f6fe..35ce7ced71e 100644 --- a/docs/en/query_language/operators.md +++ b/docs/en/query_language/operators.md @@ -53,7 +53,7 @@ Groups of operators are listed in order of priority (the higher it is in the lis ## Operators for Working With Data Sets -*See the section "IN operators".* +*See the section [IN operators](select.md/#query_language-in_operators).* `a IN ...` – The `in(a, b) function` diff --git a/docs/fa/interfaces/third-party/gui.md b/docs/fa/interfaces/third-party/gui.md index 7f537b1c26e..b2c71664af2 100644 --- a/docs/fa/interfaces/third-party/gui.md +++ b/docs/fa/interfaces/third-party/gui.md @@ -2,11 +2,14 @@ # interface های visual توسعه دهندگان third-party -## Tabix +## متن باز + +### Tabix interface تحت وب برای ClickHouse در پروژه [Tabix](https://github.com/tabixio/tabix). -### ویژگی ها: +ویژگی ها: + - کار با ClickHouse به صورت مستقیم و از طریق مرورگر، بدون نیاز به نرم افزار اضافی. - ادیتور query به همراه syntax highlighting. - ویژگی Auto-completion برای دستورات. @@ -16,11 +19,12 @@ interface تحت وب برای ClickHouse در پروژه [Tabix](https://github [مستندات Tabix](https://tabix.io/doc/). -## HouseOps +### HouseOps [HouseOps](https://github.com/HouseOps/HouseOps) نرم افزار Desktop برای سیستم عامل های Linux و OSX و Windows می باشد.. -### ویژگی ها: +ویژگی ها: + - ابزار Query builder به همراه syntax highlighting. نمایش نتایج به صورت جدول و JSON Object. - خروجی نتایج به صورت csv و JSON Object. - Pنمایش Processes List ها به همراه توضیحات، ویژگی حالت record و kill کردن process ها. @@ -35,5 +39,30 @@ interface تحت وب برای ClickHouse در پروژه [Tabix](https://github - مانیتورینگ کافکا و جداول replicate (بزودی); - و بسیاری از ویژگی های دیگر برای شما. +## تجاری + +### DBeaver + +[DBeaver](https://dbeaver.io/) - مشتری دسکتاپ دسکتاپ دسکتاپ با پشتیبانی ClickHouse. + +امکانات: + +- توسعه پرس و جو با برجسته نحو +- پیش نمایش جدول +- تکمیل خودکار + +### DataGrip + +[DataGrip](https://www.jetbrains.com/datagrip/) IDE پایگاه داده از JetBrains با پشتیبانی اختصاصی برای ClickHouse است. این ابزار همچنین به سایر ابزارهای مبتنی بر IntelliJ تعبیه شده است: PyCharm، IntelliJ IDEA، GoLand، PhpStorm و دیگران. + +امکانات: + +- تکمیل کد بسیار سریع +- نحو برجسته ClickHouse. +- پشتیبانی از ویژگی های خاص برای ClickHouse، برای مثال ستون های توپی، موتورهای جدول. +- ویرایشگر داده. +- Refactorings. +- جستجو و ناوبری + [مقاله اصلی](https://clickhouse.yandex/docs/fa/interfaces/third-party_gui/) diff --git a/docs/fa/interfaces/third-party/integrations.md b/docs/fa/interfaces/third-party/integrations.md index 08055497848..c11b7237653 100644 --- a/docs/fa/interfaces/third-party/integrations.md +++ b/docs/fa/interfaces/third-party/integrations.md @@ -5,12 +5,17 @@ !!! warning "سلب مسئولیت" Yandex نه حفظ کتابخانه ها در زیر ذکر شده و نشده انجام هر آزمایش های گسترده ای برای اطمینان از کیفیت آنها. +- سیستم های مدیریت پایگاه داده رابطه ای + - [MySQL](https://www.mysql.com) + - [ProxySQL](https://github.com/sysown/proxysql/wiki/ClickHouse-Support) + - [PostgreSQL](https://www.postgresql.org) + - [infi.clickhouse_fdw](https://github.com/Infinidat/infi.clickhouse_fdw) (استفاده می کند [infi.clickhouse_orm](https://github.com/Infinidat/infi.clickhouse_orm)) - Python - [SQLAlchemy](https://www.sqlalchemy.org) - - [sqlalchemy-clickhouse](https://github.com/cloudflare/sqlalchemy-clickhouse) (uses [infi.clickhouse_orm](https://github.com/Infinidat/infi.clickhouse_orm)) + - [sqlalchemy-clickhouse](https://github.com/cloudflare/sqlalchemy-clickhouse) (استفاده می کند [infi.clickhouse_orm](https://github.com/Infinidat/infi.clickhouse_orm)) - Java - [Hadoop](http://hadoop.apache.org) - - [clickhouse-hdfs-loader](https://github.com/jaykelin/clickhouse-hdfs-loader) (uses [JDBC](../jdbc.md)) + - [clickhouse-hdfs-loader](https://github.com/jaykelin/clickhouse-hdfs-loader) (استفاده می کند [JDBC](../jdbc.md)) - Scala - [Akka](https://akka.io) - [clickhouse-scala-client](https://github.com/crobox/clickhouse-scala-client) diff --git a/docs/ru/getting_started/index.md b/docs/ru/getting_started/index.md index 5ed654e1634..26da2a4654f 100644 --- a/docs/ru/getting_started/index.md +++ b/docs/ru/getting_started/index.md @@ -39,6 +39,7 @@ sudo apt-get install clickhouse-client clickhouse-server Яндекс не использует ClickHouse на поддерживающих `rpm` дистрибутивах Linux, а также `rpm` пакеты менее тщательно тестируются. Таким образом, использовать их стоит на свой страх и риск, но, тем не менее, многие другие компании успешно работают на них в production без каких-либо серьезных проблем. Для CentOS, RHEL и Fedora возможны следующие варианты: + * Пакеты из генерируются на основе официальных `deb` пакетов от Яндекса и содержат в точности тот же исполняемый файл. * Пакеты из собираются независимой компанией Altinity, но широко используются без каких-либо нареканий. * Либо можно использовать Docker (см. ниже). diff --git a/docs/ru/interfaces/third-party/gui.md b/docs/ru/interfaces/third-party/gui.md index 33b3e59ab8d..34598dd7119 100644 --- a/docs/ru/interfaces/third-party/gui.md +++ b/docs/ru/interfaces/third-party/gui.md @@ -1,6 +1,8 @@ # Визуальные интерфейсы от сторонних разработчиков -## Tabix +## С открытым исходным кодом + +### Tabix Веб-интерфейс для ClickHouse в проекте [Tabix](https://github.com/tabixio/tabix). @@ -14,7 +16,7 @@ [Документация Tabix](https://tabix.io/doc/). -## HouseOps +### HouseOps [HouseOps](https://github.com/HouseOps/HouseOps) — UI/IDE для OSX, Linux и Windows. @@ -39,7 +41,9 @@ - Управление кластером; - Мониторинг реплицированных и Kafka таблиц. -## DBeaver +## Коммерческие + +### DBeaver [DBeaver](https://dbeaver.io/) - универсальный desktop клиент баз данных с поддержкой ClickHouse. @@ -49,4 +53,17 @@ - Просмотр таблиц; - Автодополнение команд. +### DataGrip + +[DataGrip](https://www.jetbrains.com/datagrip/) — это IDE для баз данных о JetBrains с выделенной поддержкой ClickHouse. Он также встроен в другие инструменты на основе IntelliJ: PyCharm, IntelliJ IDEA, GoLand, PhpStorm и другие. + +Основные возможности: + +- Очень быстрое дополнение кода. +- Подсветка синтаксиса для SQL диалекта ClickHouse. +- Поддержка функций, специфичных для ClickHouse, например вложенных столбцов, движков таблиц. +- Редактор данных. +- Рефакторинги. +- Поиск и навигация. + [Оригинальная статья](https://clickhouse.yandex/docs/ru/interfaces/third-party_gui/) diff --git a/docs/ru/interfaces/third-party/integrations.md b/docs/ru/interfaces/third-party/integrations.md index e86a9fb20a8..ed7a20b5880 100644 --- a/docs/ru/interfaces/third-party/integrations.md +++ b/docs/ru/interfaces/third-party/integrations.md @@ -3,12 +3,17 @@ !!! warning "Disclaimer" Яндекс не поддерживает перечисленные ниже библиотеки и не проводит тщательного тестирования для проверки их качества. +- Реляционные системы управления базами данных + - [MySQL](https://www.mysql.com) + - [ProxySQL](https://github.com/sysown/proxysql/wiki/ClickHouse-Support) + - [PostgreSQL](https://www.postgresql.org) + - [infi.clickhouse_fdw](https://github.com/Infinidat/infi.clickhouse_fdw) (использует [infi.clickhouse_orm](https://github.com/Infinidat/infi.clickhouse_orm)) - Python - [SQLAlchemy](https://www.sqlalchemy.org) - - [sqlalchemy-clickhouse](https://github.com/cloudflare/sqlalchemy-clickhouse) (uses [infi.clickhouse_orm](https://github.com/Infinidat/infi.clickhouse_orm)) + - [sqlalchemy-clickhouse](https://github.com/cloudflare/sqlalchemy-clickhouse) (использует [infi.clickhouse_orm](https://github.com/Infinidat/infi.clickhouse_orm)) - Java - [Hadoop](http://hadoop.apache.org) - - [clickhouse-hdfs-loader](https://github.com/jaykelin/clickhouse-hdfs-loader) (uses [JDBC](../jdbc.md)) + - [clickhouse-hdfs-loader](https://github.com/jaykelin/clickhouse-hdfs-loader) (использует [JDBC](../jdbc.md)) - Scala - [Akka](https://akka.io) - [clickhouse-scala-client](https://github.com/crobox/clickhouse-scala-client) diff --git a/docs/ru/operations/table_engines/mergetree.md b/docs/ru/operations/table_engines/mergetree.md index 1b3d3e61db7..7351968e95b 100644 --- a/docs/ru/operations/table_engines/mergetree.md +++ b/docs/ru/operations/table_engines/mergetree.md @@ -49,18 +49,23 @@ CREATE TABLE [IF NOT EXISTS] [db.]table_name [ON CLUSTER cluster] - `ENGINE` — Имя и параметры движка. `ENGINE = MergeTree()`. Движок `MergeTree` не имеет параметров. -- `ORDER BY` — первичный ключ (если не указана отдельная секция `PRIMARY KEY`). - - Кортеж столбцов или произвольных выражений. Пример: `ORDER BY (CounerID, EventDate)`. - Если используется выражение для сэмплирования, то первичный ключ должен содержать его. Пример: `ORDER BY (CounerID, EventDate, intHash32(UserID))`. - -- `PRIMARY KEY` - первичный ключ, если он отличается от [ключа сортировки](mergetree.md#table_engines-mergetree-sorting_key) (который в этом случае задаёт секция `ORDER BY`). - - `PARTITION BY` — [ключ партиционирования](custom_partitioning_key.md#table_engines-custom_partitioning_key). Для партиционирования по месяцам используйте выражение `toYYYYMM(date_column)`, где `date_column` — столбец с датой типа [Date](../../data_types/date.md#data_type-date). В этом случае имена партиций имеют формат `"YYYYMM"`. -- `SAMPLE BY` — выражение для сэмплирования. Пример: `intHash32(UserID))`. +- `ORDER BY` — ключ сортировки. + + Кортеж столбцов или произвольных выражений. Пример: `ORDER BY (CounerID, EventDate)`. + +- `PRIMARY KEY` - первичный ключ, если он [отличается от ключа сортировки](mergetree.md#table_engines-mergetree-sorting_key). + + По умолчанию первичный ключ совпадает с ключом сортировки (который задаётся секцией `ORDER BY`). Поэтому + в большинстве случаев секцию `PRIMARY KEY` отдельно указывать не нужно. + +- `SAMPLE BY` — выражение для сэмплирования. + + Если используется выражение для сэмплирования, то первичный ключ должен содержать его. Пример: + `SAMPLE BY intHash32(UserID) ORDER BY (CounerID, EventDate, intHash32(UserID))`. - `SETTINGS` — дополнительные параметры, регулирующие поведение `MergeTree`: @@ -164,17 +169,17 @@ ClickHouse не требует уникального первичного кл - Обеспечить дополнительную логику при слиянии кусков данных в движках [CollapsingMergeTree](collapsingmergetree.md#table_engine-collapsingmergetree) и [SummingMergeTree](summingmergetree.md#table_engine-summingmergetree). - Для этого имеет смысл задать отдельный *ключ сортировки*, отличающийся от первичного ключа. + В этом случае имеет смысл задать отдельный *ключ сортировки*, отличающийся от первичного ключа. Длинный первичный ключ будет негативно влиять на производительность вставки и потребление памяти, однако на производительность ClickHouse при запросах `SELECT` лишние столбцы в первичном ключе не влияют. -### Ключ сортировки, отличный от первичного ключа +### Первичный ключ, отличный от ключа сортировки -Существует возможность задать ключ сортировки (выражение, по которому будут упорядочены строки в кусках -данных), отличающийся от первичного ключа (выражения, значения которого будут записаны в индексный файл для -каждой засечки). Кортеж выражения первичного ключа при этом должен быть префиксом кортежа выражения ключа +Существует возможность задать первичный ключ (выражение, значения которого будут записаны в индексный файл для +каждой засечки), отличный от ключа сортировки (выражения, по которому будут упорядочены строки в кусках +данных). Кортеж выражения первичного ключа при этом должен быть префиксом кортежа выражения ключа сортировки. Данная возможность особенно полезна при использовании движков [SummingMergeTree](summingmergetree.md) diff --git a/docs/ru/query_language/alter.md b/docs/ru/query_language/alter.md index f4877415e8c..e55a0e1d665 100644 --- a/docs/ru/query_language/alter.md +++ b/docs/ru/query_language/alter.md @@ -76,7 +76,7 @@ MODIFY COLUMN name [type] [default_expr] MODIFY ORDER BY new_expression ``` -Работает только для таблиц семейства `MergeTree` (в том числе реплицированных). После выполнения запроса +Работает только для таблиц семейства [`MergeTree`](../operations/table_engines/mergetree.md) (в том числе [реплицированных](../operations/table_engines/replication.md)). После выполнения запроса [ключ сортировки](../operations/table_engines/mergetree.md#table_engines-mergetree-sorting_key) таблицы заменяется на `new_expression` (выражение или кортеж выражений). Первичный ключ при этом остаётся прежним. @@ -86,7 +86,7 @@ MODIFY ORDER BY new_expression ### Манипуляции с партициями и кусками -Работает только для таблиц семейства `MergeTree` (в том числе реплицированных). Существуют следующие виды +Работает только для таблиц семейства [`MergeTree`](../operations/table_engines/mergetree.md) (в том числе [реплицированных](../operations/table_engines/replication.md)). Существуют следующие виды операций: - `DETACH PARTITION` - перенести партицию в директорию detached и забыть про неё. diff --git a/docs/ru/query_language/functions/in_functions.md b/docs/ru/query_language/functions/in_functions.md index 606867c85bb..427f332423d 100644 --- a/docs/ru/query_language/functions/in_functions.md +++ b/docs/ru/query_language/functions/in_functions.md @@ -1,7 +1,7 @@ # Функции для реализации оператора IN. ## in, notIn, globalIn, globalNotIn -Смотрите раздел "Операторы IN". +Смотрите раздел [Операторы IN](../select.md/#query_language-in_operators). ## tuple(x, y, ...), оператор (x, y, ...) Функция, позволяющая сгруппировать несколько столбцов. diff --git a/docs/ru/query_language/operators.md b/docs/ru/query_language/operators.md index 9d82d07feed..ba2f80c8a1d 100644 --- a/docs/ru/query_language/operators.md +++ b/docs/ru/query_language/operators.md @@ -53,7 +53,7 @@ ## Операторы для работы с множествами -*Смотрите раздел "Операторы IN".* +*Смотрите раздел [Операторы IN](select.md/#query_language-in_operators).* `a IN ...` - функция `in(a, b)` diff --git a/docs/tools/mdx_clickhouse.py b/docs/tools/mdx_clickhouse.py index c38d6ddcf16..7c5c592b28e 100755 --- a/docs/tools/mdx_clickhouse.py +++ b/docs/tools/mdx_clickhouse.py @@ -2,20 +2,46 @@ # -*- coding: utf-8 -*- from __future__ import unicode_literals +import markdown.inlinepatterns import markdown.extensions import markdown.util +class NofollowMixin(object): + def handleMatch(self, m): + try: + el = super(NofollowMixin, self).handleMatch(m) + except IndexError: + return + + if el is not None: + href = el.get('href') or '' + if href.startswith('http') and not href.startswith('https://clickhouse.yandex'): + el.set('rel', 'external nofollow') + return el + + +class NofollowAutolinkPattern(NofollowMixin, markdown.inlinepatterns.AutolinkPattern): + pass + + +class NofollowLinkPattern(NofollowMixin, markdown.inlinepatterns.LinkPattern): + pass + + class ClickHousePreprocessor(markdown.util.Processor): def run(self, lines): for line in lines: if '' not in line: yield line + class ClickHouseMarkdown(markdown.extensions.Extension): def extendMarkdown(self, md, md_globals): md.preprocessors['clickhouse'] = ClickHousePreprocessor() + md.inlinePatterns['link'] = NofollowLinkPattern(markdown.inlinepatterns.LINK_RE, md) + md.inlinePatterns['autolink'] = NofollowAutolinkPattern(markdown.inlinepatterns.AUTOLINK_RE, md) def makeExtension(**kwargs): diff --git a/docs/tools/mkdocs-material-theme/base.html b/docs/tools/mkdocs-material-theme/base.html index b3d3ae2d0e4..a8950c53c0a 100644 --- a/docs/tools/mkdocs-material-theme/base.html +++ b/docs/tools/mkdocs-material-theme/base.html @@ -136,10 +136,10 @@
{% block content %} {% if config.extra.single_page %} - + {% else %} {% if page.edit_url %} - + {% endif %} {% endif %} {% if not "\x3ch1" in page.content %} @@ -155,7 +155,7 @@

{{ lang.t("meta.source") }}

{% set path = page.meta.path | default([""]) %} {% set file = page.meta.source %} - + {{ file }} {% endif %} diff --git a/docs/tools/mkdocs-material-theme/partials/nav-item.html b/docs/tools/mkdocs-material-theme/partials/nav-item.html index 32b86583d7a..4b31a079912 100644 --- a/docs/tools/mkdocs-material-theme/partials/nav-item.html +++ b/docs/tools/mkdocs-material-theme/partials/nav-item.html @@ -40,7 +40,7 @@ {% endif %} - {{ nav_item.title }} + {{ nav_item.title }} {% if toc_ | first is defined %} {% include "partials/toc.html" %} diff --git a/docs/tools/requirements.txt b/docs/tools/requirements.txt index e78ba572591..dd44ae9717a 100644 --- a/docs/tools/requirements.txt +++ b/docs/tools/requirements.txt @@ -18,7 +18,7 @@ Pygments==2.2.0 pytz==2017.3 PyYAML==3.12 recommonmark==0.4.0 -requests==2.18.4 +requests==2.20.0 singledispatch==3.4.0.3 six==1.11.0 snowballstemmer==1.2.1 diff --git a/docs/zh/interfaces/third-party/gui.md b/docs/zh/interfaces/third-party/gui.md index e9f20405234..e0173b8b91d 100644 --- a/docs/zh/interfaces/third-party/gui.md +++ b/docs/zh/interfaces/third-party/gui.md @@ -1,6 +1,8 @@ # 第三方开发的可视化界面 -## Tabix +## 开源 + +### Tabix ClickHouse Web 界面 [Tabix](https://github.com/tabixio/tabix). @@ -15,7 +17,7 @@ ClickHouse Web 界面 [Tabix](https://github.com/tabixio/tabix). [Tabix 文档](https://tabix.io/doc/). -## HouseOps +### HouseOps [HouseOps](https://github.com/HouseOps/HouseOps) 是一个交互式 UI/IDE 工具,可以运行在 OSX, Linux and Windows 平台中。 @@ -36,5 +38,29 @@ ClickHouse Web 界面 [Tabix](https://github.com/tabixio/tabix). - 集群管理 - 监控副本情况以及 Kafka 引擎表 +## 商业 + +### DBeaver + +[DBeaver](https://dbeaver.io/) - 具有ClickHouse支持的通用桌面数据库客户端。 + +特征: + + - 使用语法高亮显示查询开发。 + - 表格预览。 + - 自动完成。 + +### DataGrip + +[DataGrip](https://www.jetbrains.com/datagrip/)是JetBrains的数据库IDE,专门支持ClickHouse。 它还嵌入到其他基于IntelliJ的工具中:PyCharm,IntelliJ IDEA,GoLand,PhpStorm等。 + +特征: + + - 非常快速的代码完成。 + - ClickHouse语法高亮显示。 + - 支持ClickHouse特有的功能,例如嵌套列,表引擎。 + - 数据编辑器。 + - 重构。 + - 搜索和导航。 [来源文章](https://clickhouse.yandex/docs/zh/interfaces/third-party_gui/) diff --git a/docs/zh/interfaces/third-party/integrations.md b/docs/zh/interfaces/third-party/integrations.md index 9b33f455b09..d4e9a6abdbe 100644 --- a/docs/zh/interfaces/third-party/integrations.md +++ b/docs/zh/interfaces/third-party/integrations.md @@ -1,14 +1,19 @@ # 第三方集成库 -!!! warning "放弃" +!!! warning "声明" Yandex不维护下面列出的库,也没有进行任何广泛的测试以确保其质量。 +- 关系数据库管理系统 + - [MySQL](https://www.mysql.com) + - [ProxySQL](https://github.com/sysown/proxysql/wiki/ClickHouse-Support) + - [PostgreSQL](https://www.postgresql.org) + - [infi.clickhouse_fdw](https://github.com/Infinidat/infi.clickhouse_fdw) (使用 [infi.clickhouse_orm](https://github.com/Infinidat/infi.clickhouse_orm)) - Python - [SQLAlchemy](https://www.sqlalchemy.org) - - [sqlalchemy-clickhouse](https://github.com/cloudflare/sqlalchemy-clickhouse) (uses [infi.clickhouse_orm](https://github.com/Infinidat/infi.clickhouse_orm)) + - [sqlalchemy-clickhouse](https://github.com/cloudflare/sqlalchemy-clickhouse) (使用 [infi.clickhouse_orm](https://github.com/Infinidat/infi.clickhouse_orm)) - Java - [Hadoop](http://hadoop.apache.org) - - [clickhouse-hdfs-loader](https://github.com/jaykelin/clickhouse-hdfs-loader) (uses [JDBC](../jdbc.md)) + - [clickhouse-hdfs-loader](https://github.com/jaykelin/clickhouse-hdfs-loader) (使用 [JDBC](../jdbc.md)) - Scala - [Akka](https://akka.io) - [clickhouse-scala-client](https://github.com/crobox/clickhouse-scala-client) diff --git a/docs/zh/introduction/distinctive_features.md b/docs/zh/introduction/distinctive_features.md deleted file mode 120000 index 9cf00a2a00f..00000000000 --- a/docs/zh/introduction/distinctive_features.md +++ /dev/null @@ -1 +0,0 @@ -../../en/introduction/distinctive_features.md \ No newline at end of file diff --git a/docs/zh/introduction/distinctive_features.md b/docs/zh/introduction/distinctive_features.md new file mode 100644 index 00000000000..dffd855048d --- /dev/null +++ b/docs/zh/introduction/distinctive_features.md @@ -0,0 +1,64 @@ +# ClickHouse的独特功能 + +## 真正的列式数据库管理系统 + +在一个真正的列式数据库管理系统中,除了数据本身外不应该存在其他额外的数据。这意味着为了避免在值旁边存储它们的长度“number”,你必须支持固定长度数值类型。例如,10亿个UInt8类型的数据在未压缩的情况下大约消耗1GB左右的空间,如果不是这样的话,这将对CPU的使用产生强烈影响。即使是在未压缩的情况下,紧凑的存储数据也是非常重要的,因为解压缩的速度主要取决于未压缩数据的大小。 + +这是非常值得注意的,因为在一些其他系统中也可以将不同的列分别进行存储,但由于对其他场景进行的优化,使其无法有效的处理分析查询。例如: HBase,BigTable,Cassandra,HyperTable。在这些系统中,你可以得到每秒数十万的吞吐能力,但是无法得到每秒几亿行的吞吐能力。 + +需要说明的是,ClickHouse不单单是一个数据库, 它是一个数据库管理系统。因为它允许在运行时创建表和数据库、加载数据和运行查询,而无需重新配置或重启服务。 + +## 数据压缩 + +在一些列式数据库管理系统中(例如:InfiniDB CE and MonetDB) 不是用数据压缩。但是, 数据压缩在实现优异的存储系统中确实起着关键的作用。 + +## 数据的磁盘存储 + +许多的列式数据库(如 SAP HANA, Google PowerDrill)只能在内存中工作,这种方式会造成比实际更多的设备预算。ClickHouse被设计用于工作在传统磁盘上的系统,它提供每GB更低的存储成本,但如果有可以使用SSD和内存,它也会合理的利用这些资源。 + +## 多核心并行处理 + +大型查询可以以很自然的方式在ClickHouse中进行并行化处理,以此来使用当前服务器上可用的所有资源。 + +## 多服务器分布式处理 + +上面提到的列式数据库管理系统中,几乎没有一个支持分布式的查询处理。 +在ClickHouse中,数据可以保存在不同的shard上,每一个shard都由一组用于容错的replica组成,查询可以并行的在所有shard上进行处理。这些对用户来说是透明的 + +## 支持SQL + +ClickHouse支持基于SQL的查询语言,该语言大部分情况下是与SQL标准兼容的。 +支持的查询包括 GROUP BY,ORDER BY,IN,JOIN以及非相关子查询。 +不支持窗口函数和相关子查询。 + +## 向量引擎 + +为了高效的使用CPU,数据不仅仅按列存储,同时还按向量(列的一部分)进行处理。 + +## 实时的数据更新 + +ClickHouse支持在表中定义主键。为了使查询能够快速在主键中进行范围查找,数据总是以增量的方式有序的存储在MergeTree中。因此,数据可以持续不断高效的写入到表中,并且写入的过程中不会存在任何加锁的行为。 + +## 索引 + +按照主键对数据进行排序,这将帮助ClickHouse以几十毫秒的低延迟对数据进行特定值查找或范围查找。 + +## 适合在线查询 + +在线查询意味着在没有对数据做任何预处理的情况下以极低的延迟处理查询并将结果加载到用户的页面中。 + +## 支持近似计算 + +ClickHouse提供各种各样在允许牺牲数据精度的情况下对查询进行加速的方法: + +1. 用于近似计算的各类聚合函数,如:distinct values, medians, quantiles +2. 基于数据的部分样本进行近似查询。这时,仅会从磁盘检索少部分比例的数据。 +3. 不使用全部的聚合条件,通过随机选择有限个数据聚合条件进行聚合。这在数据聚合条件满足某些分布条件下,在提供相当准确的聚合结果的同时降低了计算资源的使用。 + +## 支持数据复制和数据完整性 + +ClickHouse使用异步的多主复制技术。当数据被写入任何一个可用副本后,系统会在后台将数据分发给其他副本,以保证系统在不同副本上保持相同的数据。在大多数情况下ClickHouse能在故障后自动恢复,在一些复杂的情况下需要少量的手动恢复。 + +更多信息,参见[数据复制](../operations/table_engines/replication.md#table_engines-replication)。 + +[来源文章](https://clickhouse.yandex/docs/en/introduction/distinctive_features/) diff --git a/docs/zh/introduction/features_considered_disadvantages.md b/docs/zh/introduction/features_considered_disadvantages.md index e5cd51ebdcd..7a1c2721584 100644 --- a/docs/zh/introduction/features_considered_disadvantages.md +++ b/docs/zh/introduction/features_considered_disadvantages.md @@ -1,7 +1,7 @@ # ClickHouse可以考虑缺点的功能 -1. 没有完整的交易。 -2. 缺乏以高速率和低延迟修改或删除已插入数据的能力。 有批次删除和更新可用于清理或修改数据,例如符合[GDPR](https://gdpr-info.eu)。 +1. 没有完整的事物支持。 +2. 缺少高频率,低延迟的修改或删除已存在数据的能力。仅能用于批量删除或修改数据,但这符合[GDPR](https://gdpr-info.eu)。 3. 稀疏索引使得ClickHouse不适合通过其键检索单行的点查询。 [来源文章](https://clickhouse.yandex/docs/zh/introduction/features_considered_disadvantages/) diff --git a/docs/zh/introduction/performance.md b/docs/zh/introduction/performance.md deleted file mode 120000 index cb2912bcb81..00000000000 --- a/docs/zh/introduction/performance.md +++ /dev/null @@ -1 +0,0 @@ -../../en/introduction/performance.md \ No newline at end of file diff --git a/docs/zh/introduction/performance.md b/docs/zh/introduction/performance.md new file mode 100644 index 00000000000..1cd9f9f0692 --- /dev/null +++ b/docs/zh/introduction/performance.md @@ -0,0 +1,25 @@ +# Performance + +根据Yandex的内部测试结果,ClickHouse表现出了比同类可比较产品更优的性能。你可以在[这里](https://clickhouse.yandex/benchmark.html)查看具体的测试结果。 + +许多其他的测试也证实这一点。你可以使用互联网搜索到它们,或者你也可以从[我们收集的部分相关连接](https://clickhouse.yandex/#independent-bookmarks)中查看。 + +## 单个大查询的吞吐量 + +吞吐量可以使用每秒处理的行数或每秒处理的字节数来衡量。如果数据被放置在page cache中,则一个不太复杂的查询在单个服务器上大约能够以2-10GB/s(未压缩)的速度进行处理(对于简单的查询,速度可以达到30GB/s)。如果数据没有在page cache中的话,那么速度将取决于你的磁盘系统和数据的压缩率。例如,如果一个磁盘允许以400MB/s的速度读取数据,并且数据压缩率是3,则数据的处理速度为1.2GB/s。这意味着,如果你是在提取一个10字节的列,那么它的处理速度大约是1-2亿行每秒。 + +对于分布式处理,处理速度几乎是线性扩展的,但这受限于聚合或排序的结果不是那么大的情况下。 + +## 处理短查询的延迟时间 + +如果一个查询使用主键并且没有太多行(几十万)进行处理,并且没有查询太多的列,那么在数据被page cache缓存的情况下,它的延迟应该小于50毫秒(在最佳的情况下应该小于10毫秒)。 否则,延迟取决于数据的查找次数。如果你当前使用的是HDD,在数据没有加载的情况下,查询所需要的延迟可以通过以下公式计算得知: 查找时间(10 ms) \* 查询的列的数量 \* 查询的数据块的数量。 + +## 处理大量短查询的吞吐量 + +在相同的情况下,ClickHouse可以在单个服务器上每秒处理数百个查询(在最佳的情况下最多可以处理数千个)。但是由于这不适用于分析型场景。因此我们建议每秒最多查询100次。 + +## 数据的写入性能 + +我们建议每次写入不少于1000行的批量写入,或每秒不超过一个写入请求。当使用tab-separated格式将一份数据写入到MergeTree表中时,写入速度大约为50到200MB/s。如果您写入的数据每行为1Kb,那么写入的速度为50,000到200,000行每秒。如果您的行更小,那么写入速度将更高。为了提高写入性能,您可以使用多个INSERT进行并行写入,这将带来线性的性能提升。 + +[来源文章](https://clickhouse.yandex/docs/en/introduction/performance/) diff --git a/docs/zh/introduction/ya_metrika_task.md b/docs/zh/introduction/ya_metrika_task.md deleted file mode 120000 index ed316d32ddd..00000000000 --- a/docs/zh/introduction/ya_metrika_task.md +++ /dev/null @@ -1 +0,0 @@ -../../en/introduction/ya_metrika_task.md \ No newline at end of file diff --git a/docs/zh/introduction/ya_metrika_task.md b/docs/zh/introduction/ya_metrika_task.md new file mode 100644 index 00000000000..72d8f1bddd5 --- /dev/null +++ b/docs/zh/introduction/ya_metrika_task.md @@ -0,0 +1,50 @@ +# Yandex.Metrica的使用案例 + +ClickHouse最初是为[Yandex.Metrica](https://metrica.yandex.com/)[世界第二大Web分析平台](http://w3techs.com/technologies/overview/traffic_analysis/all)而开发的。多年来一直作为该系统的核心组件被该系统持续使用着。目前为止,该系统在ClickHouse中有超过13万亿条记录,并且每天超过200多亿个事件被处理。它允许直接从原始数据中动态查询并生成报告。本文简要介绍了ClickHouse在其早期发展阶段的目标。 + +Yandex.Metrica基于用户定义的字段,对实时访问、连接会话,生成实时的统计报表。这种需求往往需要复杂聚合方式,比如对访问用户进行去重。构建报表的数据,是实时接收存储的新数据。 + +截至2014年4月,Yandex.Metrica每天跟踪大约120亿个事件(用户的点击和浏览)。为了可以创建自定义的报表,我们必须存储全部这些事件。同时,这些查询可能需要在几百毫秒内扫描数百万行的数据,或在几秒内扫描数亿行的数据。 + +## Yandex.Metrica以及其他Yandex服务的使用案例 + +在Yandex.Metrica中,ClickHouse被用于多个场景中。 +它的主要任务是使用原始数据在线的提供各种数据报告。它使用374台服务器的集群,存储了20.3万亿行的数据。在去除重复与副本数据的情况下,压缩后的数据达到了2PB。未压缩前(TSV格式)它大概有17PB。 + +ClickHouse还被使用在: + +- 存储来自Yandex.Metrica回话重放数据。 +- 处理中间数据 +- 与Analytics一起构建全球报表。 +- 为调试Yandex.Metrica引擎运行查询 +- 分析来自API和用户界面的日志数据 + +ClickHouse在其他Yandex服务中至少有12个安装:search verticals, Market, Direct, business analytics, mobile development, AdFox, personal services等。 + +## 聚合与非聚合数据 + +有一种流行的观点认为,想要有效的计算统计数据,必须要聚合数据,因为聚合将降低数据量。 + +但是数据聚合是一个有诸多限制的解决方案,例如: + +- 你必须提前知道用户定义的报表的字段列表 +- 用户无法自定义报表 +- 当聚合条件过多时,可能不会减少数据,聚合是无用的。 +- 存在大量报表时,有太多的聚合变化(组合爆炸) +- 当聚合条件有非常大的基数时(如:url),数据量没有太大减少(少于两倍) +- 聚合的数据量可能会增长而不是收缩 +- 用户不会查看我们为他生成的所有报告,大部分计算将是无用的 +- 各种聚合可能违背了数据的逻辑完整性 + +如果我们直接使用非聚合数据而不尽兴任何聚合时,我们的计算量可能是减少的。 + +然而,相对于聚合中很大一部分工作被离线完成,在线计算需要尽快的完成计算,因为用户在等待结果。 + +Yandex.Metrica 有一个专门用于聚合数据的系统,称为Metrage,它可以用作大部分报表。 +从2009年开始,Yandex.Metrica还为非聚合数据使用专门的OLAP数据库,称为OLAPServer,它以前用于报表构建系统。 +OLAPServer可以很好的工作在非聚合数据上,但是它有诸多限制,导致无法根据需要将其用于所有报表中。如,缺少对数据类型的支持(只支持数据),无法实时增量的更新数据(只能通过每天重写数据完成)。OLAPServer不是一个数据库管理系统,它只是一个数据库。 + +为了消除OLAPServer的这些局限性,解决所有报表使用非聚合数据的问题,我们开发了ClickHouse数据库管理系统。 + + +[来源文章](https://clickhouse.yandex/docs/en/introduction/ya_metrika_task/) diff --git a/libs/libcommon/cmake/find_jemalloc.cmake b/libs/libcommon/cmake/find_jemalloc.cmake index eb8c9cb6ac7..f3fa138e5cc 100644 --- a/libs/libcommon/cmake/find_jemalloc.cmake +++ b/libs/libcommon/cmake/find_jemalloc.cmake @@ -5,14 +5,13 @@ else () endif () option (ENABLE_JEMALLOC "Set to TRUE to use jemalloc" ${ENABLE_JEMALLOC_DEFAULT}) -if (OS_LINUX) +if (OS_LINUX AND NOT ARCH_ARM) option (USE_INTERNAL_JEMALLOC_LIBRARY "Set to FALSE to use system jemalloc library instead of bundled" ${NOT_UNBUNDLED}) elseif () option (USE_INTERNAL_JEMALLOC_LIBRARY "Set to FALSE to use system jemalloc library instead of bundled" OFF) endif() if (ENABLE_JEMALLOC) - if (USE_INTERNAL_JEMALLOC_LIBRARY AND NOT EXISTS "${ClickHouse_SOURCE_DIR}/contrib/jemalloc/src/jemalloc.c") message (WARNING "submodule contrib/jemalloc is missing. to fix try run: \n git submodule update --init --recursive") set (USE_INTERNAL_JEMALLOC_LIBRARY 0) @@ -23,7 +22,7 @@ if (ENABLE_JEMALLOC) find_package (JeMalloc) endif () - if ((NOT JEMALLOC_LIBRARIES OR NOT JEMALLOC_INCLUDE_DIR) AND NOT MISSING_INTERNAL_JEMALLOC_LIBRARY) + if ((NOT JEMALLOC_LIBRARIES OR NOT JEMALLOC_INCLUDE_DIR) AND NOT MISSING_INTERNAL_JEMALLOC_LIBRARY AND NOT ARCH_ARM) set (JEMALLOC_LIBRARIES "jemalloc") set (JEMALLOC_INCLUDE_DIR "${ClickHouse_SOURCE_DIR}/contrib/jemalloc-cmake/include" "${ClickHouse_SOURCE_DIR}/contrib/jemalloc-cmake/include_linux_x86_64") set (USE_INTERNAL_JEMALLOC_LIBRARY 1) diff --git a/libs/libcommon/include/common/ThreadPool.h b/libs/libcommon/include/common/ThreadPool.h index 15952042a04..980fdcba355 100644 --- a/libs/libcommon/include/common/ThreadPool.h +++ b/libs/libcommon/include/common/ThreadPool.h @@ -58,3 +58,17 @@ private: void worker(); }; + +/// Allows to save first catched exception in jobs and postpone its rethrow. +class ExceptionHandler +{ +public: + void setException(std::exception_ptr && exception); + void throwIfException(); + +private: + std::exception_ptr first_exception; + std::mutex mutex; +}; + +ThreadPool::Job createExceptionHandledJob(ThreadPool::Job job, ExceptionHandler & handler); diff --git a/libs/libcommon/src/ThreadPool.cpp b/libs/libcommon/src/ThreadPool.cpp index 06f563bc55a..4da7c9689b8 100644 --- a/libs/libcommon/src/ThreadPool.cpp +++ b/libs/libcommon/src/ThreadPool.cpp @@ -112,3 +112,34 @@ void ThreadPool::worker() } } + +void ExceptionHandler::setException(std::exception_ptr && exception) +{ + std::unique_lock lock(mutex); + if (!first_exception) + first_exception = std::move(exception); +} + +void ExceptionHandler::throwIfException() +{ + std::unique_lock lock(mutex); + if (first_exception) + std::rethrow_exception(first_exception); +} + + +ThreadPool::Job createExceptionHandledJob(ThreadPool::Job job, ExceptionHandler & handler) +{ + return [job{std::move(job)}, &handler] () + { + try + { + job(); + } + catch (...) + { + handler.setException(std::current_exception()); + } + }; +} + diff --git a/website/index.css b/website/index.css index c5061595a9c..b303ac7ffbe 100644 --- a/website/index.css +++ b/website/index.css @@ -409,6 +409,7 @@ img { } #index_ul { + padding-bottom: 30px; padding-left: 0; margin: 0 0 30px -16px; font-size: 90%; diff --git a/website/index.html b/website/index.html index 048b46f60e4..dab2cbf30bb 100644 --- a/website/index.html +++ b/website/index.html @@ -3,6 +3,7 @@ + ClickHouse — open source distributed column-oriented DBMS