Merge remote-tracking branch 'hdfs/master'

This commit is contained in:
chenxing.xc 2018-11-20 19:28:40 +08:00
commit 939368cf08
41 changed files with 2713 additions and 189 deletions

12
.gitmodules vendored
View File

@ -31,9 +31,6 @@
[submodule "contrib/ssl"]
path = contrib/ssl
url = https://github.com/ClickHouse-Extras/ssl.git
[submodule "contrib/boost"]
path = contrib/boost
url = https://github.com/ClickHouse-Extras/boost.git
[submodule "contrib/llvm"]
path = contrib/llvm
url = https://github.com/ClickHouse-Extras/llvm
@ -46,3 +43,12 @@
[submodule "contrib/unixodbc"]
path = contrib/unixodbc
url = https://github.com/ClickHouse-Extras/UnixODBC.git
[submodule "contrib/libhdfs3"]
path = contrib/libhdfs3
url = https://github.com/chenxing-xc/ClickHouse-Extras-libhdfs3.git
[submodule "contrib/protobuf"]
path = contrib/protobuf
url = https://github.com/chenxing-xc/ClickHouse-Extras-protobuf.git
[submodule "contrib/boost"]
path = contrib/boost
url = https://github.com/chenxing-xc/ClickHouse-Extras-boost.git

View File

@ -255,6 +255,7 @@ include (cmake/find_rdkafka.cmake)
include (cmake/find_capnp.cmake)
include (cmake/find_llvm.cmake)
include (cmake/find_cpuid.cmake)
include (cmake/find_hdfs3.cmake)
include (cmake/find_consistent-hashing.cmake)
if (ENABLE_TESTS)
include (cmake/find_gtest.cmake)

13
cmake/find_hdfs3.cmake Normal file
View File

@ -0,0 +1,13 @@
option (USE_INTERNAL_HDFS3_LIBRARY "Set to FALSE to use system HDFS3 instead of bundled" ON)
if (NOT USE_INTERNAL_HDFS3_LIBRARY)
find_package(hdfs3)
endif ()
if (HDFS3_LIBRARY AND HDFS3_INCLUDE_DIR)
else ()
set(HDFS3_INCLUDE_DIR "${ClickHouse_SOURCE_DIR}/contrib/libhdfs3/src/client")
set(HDFS3_LIBRARY hdfs3)
endif()
message (STATUS "Using hdfs3: ${HDFS3_INCLUDE_DIR} : ${HDFS3_LIBRARY}")

View File

@ -1,6 +1,6 @@
# Broken in macos. TODO: update clang, re-test, enable
if (NOT APPLE)
option (ENABLE_EMBEDDED_COMPILER "Set to TRUE to enable support for 'compile' option for query execution" 1)
option (ENABLE_EMBEDDED_COMPILER "Set to TRUE to enable support for 'compile' option for query execution" 0)
option (USE_INTERNAL_LLVM_LIBRARY "Use bundled or system LLVM library. Default: system library for quicker developer builds." ${APPLE})
endif ()

80
cmake/find_protobuf.cmake Normal file
View File

@ -0,0 +1,80 @@
option (USE_INTERNAL_PROTOBUF_LIBRARY "Set to FALSE to use system protobuf instead of bundled" ON)
if (NOT USE_INTERNAL_PROTOBUF_LIBRARY)
find_package(Protobuf)
endif ()
if (Protobuf_LIBRARY AND Protobuf_INCLUDE_DIR)
else ()
set(Protobuf_INCLUDE_DIR ${CMAKE_SOURCE_DIR}/contrib/protobuf/src)
set(Protobuf_LIBRARY libprotobuf)
set(Protobuf_PROTOC_LIBRARY libprotoc)
set(Protobuf_LITE_LIBRARY libprotobuf-lite)
set(Protobuf_PROTOC_EXECUTABLE ${CMAKE_BINARY_DIR}/contrib/protobuf/cmake/protoc)
if(NOT DEFINED PROTOBUF_GENERATE_CPP_APPEND_PATH)
set(PROTOBUF_GENERATE_CPP_APPEND_PATH TRUE)
endif()
function(PROTOBUF_GENERATE_CPP SRCS HDRS)
if(NOT ARGN)
message(SEND_ERROR "Error: PROTOBUF_GENERATE_CPP() called without any proto files")
return()
endif()
if(PROTOBUF_GENERATE_CPP_APPEND_PATH)
# Create an include path for each file specified
foreach(FIL ${ARGN})
get_filename_component(ABS_FIL ${FIL} ABSOLUTE)
get_filename_component(ABS_PATH ${ABS_FIL} PATH)
list(FIND _protobuf_include_path ${ABS_PATH} _contains_already)
if(${_contains_already} EQUAL -1)
list(APPEND _protobuf_include_path -I ${ABS_PATH})
endif()
endforeach()
else()
set(_protobuf_include_path -I ${CMAKE_CURRENT_SOURCE_DIR})
endif()
if(DEFINED PROTOBUF_IMPORT_DIRS AND NOT DEFINED Protobuf_IMPORT_DIRS)
set(Protobuf_IMPORT_DIRS "${PROTOBUF_IMPORT_DIRS}")
endif()
if(DEFINED Protobuf_IMPORT_DIRS)
foreach(DIR ${Protobuf_IMPORT_DIRS})
get_filename_component(ABS_PATH ${DIR} ABSOLUTE)
list(FIND _protobuf_include_path ${ABS_PATH} _contains_already)
if(${_contains_already} EQUAL -1)
list(APPEND _protobuf_include_path -I ${ABS_PATH})
endif()
endforeach()
endif()
set(${SRCS})
set(${HDRS})
foreach(FIL ${ARGN})
get_filename_component(ABS_FIL ${FIL} ABSOLUTE)
get_filename_component(FIL_WE ${FIL} NAME_WE)
list(APPEND ${SRCS} "${CMAKE_CURRENT_BINARY_DIR}/${FIL_WE}.pb.cc")
list(APPEND ${HDRS} "${CMAKE_CURRENT_BINARY_DIR}/${FIL_WE}.pb.h")
add_custom_command(
OUTPUT "${CMAKE_CURRENT_BINARY_DIR}/${FIL_WE}.pb.cc"
"${CMAKE_CURRENT_BINARY_DIR}/${FIL_WE}.pb.h"
COMMAND ${Protobuf_PROTOC_EXECUTABLE}
ARGS --cpp_out ${CMAKE_CURRENT_BINARY_DIR} ${_protobuf_include_path} ${ABS_FIL}
DEPENDS ${ABS_FIL} ${Protobuf_PROTOC_EXECUTABLE}
COMMENT "Running C++ protocol buffer compiler on ${FIL}"
VERBATIM )
endforeach()
set_source_files_properties(${${SRCS}} ${${HDRS}} PROPERTIES GENERATED TRUE)
set(${SRCS} ${${SRCS}} PARENT_SCOPE)
set(${HDRS} ${${HDRS}} PARENT_SCOPE)
endfunction()
endif()
message (STATUS "Using protobuf: ${Protobuf_INCLUDE_DIR} : ${Protobuf_LIBRARY}")

View File

@ -191,3 +191,12 @@ if (USE_INTERNAL_LLVM_LIBRARY)
add_subdirectory (llvm/llvm)
endif ()
if (USE_INTERNAL_HDFS3_LIBRARY)
include(${CMAKE_SOURCE_DIR}/cmake/find_protobuf.cmake)
if (USE_INTERNAL_PROTOBUF_LIBRARY)
set(protobuf_BUILD_TESTS OFF CACHE INTERNAL "" FORCE)
set(protobuf_BUILD_SHARED_LIBS OFF CACHE INTERNAL "" FORCE)
add_subdirectory(protobuf/cmake)
endif ()
add_subdirectory(libhdfs3-cmake)
endif ()

2
contrib/boost vendored

@ -1 +1 @@
Subproject commit 2d5cb2c86f61126f4e1efe9ab97332efd44e7dea
Subproject commit 2e6b038865aec18a9af199d0898069e9fa53eabf

1
contrib/libhdfs3 vendored Submodule

@ -0,0 +1 @@
Subproject commit 695aefb67f416059a13c463374422404eb0fcd99

View File

@ -0,0 +1,10 @@
#include <exception>
#include <stdexcept>
int main() {
try {
throw 2;
} catch (int) {
std::throw_with_nested(std::runtime_error("test"));
}
}

View File

@ -0,0 +1,7 @@
#include <chrono>
using std::chrono::steady_clock;
void foo(const steady_clock &clock) {
return;
}

View File

@ -0,0 +1,10 @@
#include <string.h>
int main()
{
// We can't test "char *p = strerror_r()" because that only causes a
// compiler warning when strerror_r returns an integer.
char *buf = 0;
int i = strerror_r(0, buf, 100);
return i;
}

View File

@ -0,0 +1,48 @@
# Check prereqs
FIND_PROGRAM(GCOV_PATH gcov)
FIND_PROGRAM(LCOV_PATH lcov)
FIND_PROGRAM(GENHTML_PATH genhtml)
IF(NOT GCOV_PATH)
MESSAGE(FATAL_ERROR "gcov not found! Aborting...")
ENDIF(NOT GCOV_PATH)
IF(NOT CMAKE_BUILD_TYPE STREQUAL Debug)
MESSAGE(WARNING "Code coverage results with an optimised (non-Debug) build may be misleading")
ENDIF(NOT CMAKE_BUILD_TYPE STREQUAL Debug)
#Setup compiler options
ADD_DEFINITIONS(-fprofile-arcs -ftest-coverage)
SET(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -fprofile-arcs ")
SET(CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} -fprofile-arcs ")
IF(NOT LCOV_PATH)
MESSAGE(FATAL_ERROR "lcov not found! Aborting...")
ENDIF(NOT LCOV_PATH)
IF(NOT GENHTML_PATH)
MESSAGE(FATAL_ERROR "genhtml not found! Aborting...")
ENDIF(NOT GENHTML_PATH)
#Setup target
ADD_CUSTOM_TARGET(ShowCoverage
#Capturing lcov counters and generating report
COMMAND ${LCOV_PATH} --directory . --capture --output-file CodeCoverage.info
COMMAND ${LCOV_PATH} --remove CodeCoverage.info '${CMAKE_CURRENT_BINARY_DIR}/*' 'test/*' 'mock/*' '/usr/*' '/opt/*' '*ext/rhel5_x86_64*' '*ext/osx*' --output-file CodeCoverage.info.cleaned
COMMAND ${GENHTML_PATH} -o CodeCoverageReport CodeCoverage.info.cleaned
)
ADD_CUSTOM_TARGET(ShowAllCoverage
#Capturing lcov counters and generating report
COMMAND ${LCOV_PATH} -a CodeCoverage.info.cleaned -a CodeCoverage.info.cleaned_withoutHA -o AllCodeCoverage.info
COMMAND sed -e 's|/.*/src|${CMAKE_SOURCE_DIR}/src|' -ig AllCodeCoverage.info
COMMAND ${GENHTML_PATH} -o AllCodeCoverageReport AllCodeCoverage.info
)
ADD_CUSTOM_TARGET(ResetCoverage
#Cleanup lcov
COMMAND ${LCOV_PATH} --directory . --zerocounters
)

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,26 @@
# - Try to find the CURL library (curl)
#
# Once done this will define
#
# CURL_FOUND - System has gnutls
# CURL_INCLUDE_DIR - The gnutls include directory
# CURL_LIBRARIES - The libraries needed to use gnutls
# CURL_DEFINITIONS - Compiler switches required for using gnutls
IF (CURL_INCLUDE_DIR AND CURL_LIBRARIES)
# in cache already
SET(CURL_FIND_QUIETLY TRUE)
ENDIF (CURL_INCLUDE_DIR AND CURL_LIBRARIES)
FIND_PATH(CURL_INCLUDE_DIR curl/curl.h)
FIND_LIBRARY(CURL_LIBRARIES curl)
INCLUDE(FindPackageHandleStandardArgs)
# handle the QUIETLY and REQUIRED arguments and set CURL_FOUND to TRUE if
# all listed variables are TRUE
FIND_PACKAGE_HANDLE_STANDARD_ARGS(CURL DEFAULT_MSG CURL_LIBRARIES CURL_INCLUDE_DIR)
MARK_AS_ADVANCED(CURL_INCLUDE_DIR CURL_LIBRARIES)

View File

@ -0,0 +1,26 @@
# - Try to find the GNU sasl library (gsasl)
#
# Once done this will define
#
# GSASL_FOUND - System has gnutls
# GSASL_INCLUDE_DIR - The gnutls include directory
# GSASL_LIBRARIES - The libraries needed to use gnutls
# GSASL_DEFINITIONS - Compiler switches required for using gnutls
IF (GSASL_INCLUDE_DIR AND GSASL_LIBRARIES)
# in cache already
SET(GSasl_FIND_QUIETLY TRUE)
ENDIF (GSASL_INCLUDE_DIR AND GSASL_LIBRARIES)
FIND_PATH(GSASL_INCLUDE_DIR gsasl.h)
FIND_LIBRARY(GSASL_LIBRARIES gsasl)
INCLUDE(FindPackageHandleStandardArgs)
# handle the QUIETLY and REQUIRED arguments and set GSASL_FOUND to TRUE if
# all listed variables are TRUE
FIND_PACKAGE_HANDLE_STANDARD_ARGS(GSASL DEFAULT_MSG GSASL_LIBRARIES GSASL_INCLUDE_DIR)
MARK_AS_ADVANCED(GSASL_INCLUDE_DIR GSASL_LIBRARIES)

View File

@ -0,0 +1,65 @@
include(CheckCXXSourceRuns)
find_path(GTest_INCLUDE_DIR gtest/gtest.h
NO_DEFAULT_PATH
PATHS
"${PROJECT_SOURCE_DIR}/../thirdparty/googletest/googletest/include"
"/usr/local/include"
"/usr/include")
find_path(GMock_INCLUDE_DIR gmock/gmock.h
NO_DEFAULT_PATH
PATHS
"${PROJECT_SOURCE_DIR}/../thirdparty/googletest/googlemock/include"
"/usr/local/include"
"/usr/include")
find_library(Gtest_LIBRARY
NAMES libgtest.a
HINTS
"${PROJECT_SOURCE_DIR}/../thirdparty/googletest/build/googlemock/gtest"
"/usr/local/lib"
"/usr/lib")
find_library(Gmock_LIBRARY
NAMES libgmock.a
HINTS
"${PROJECT_SOURCE_DIR}/../thirdparty/googletest/build/googlemock"
"/usr/local/lib"
"/usr/lib")
message(STATUS "Find GoogleTest include path: ${GTest_INCLUDE_DIR}")
message(STATUS "Find GoogleMock include path: ${GMock_INCLUDE_DIR}")
message(STATUS "Find Gtest library path: ${Gtest_LIBRARY}")
message(STATUS "Find Gmock library path: ${Gmock_LIBRARY}")
set(CMAKE_REQUIRED_INCLUDES ${GTest_INCLUDE_DIR} ${GMock_INCLUDE_DIR})
set(CMAKE_REQUIRED_LIBRARIES ${Gtest_LIBRARY} ${Gmock_LIBRARY} -lpthread)
set(CMAKE_REQUIRED_FLAGS)
check_cxx_source_runs("
#include <gtest/gtest.h>
#include <gmock/gmock.h>
int main(int argc, char *argv[])
{
double pi = 3.14;
EXPECT_EQ(pi, 3.14);
return 0;
}
" GoogleTest_CHECK_FINE)
message(STATUS "GoogleTest check: ${GoogleTest_CHECK_FINE}")
include(FindPackageHandleStandardArgs)
find_package_handle_standard_args(
GoogleTest
REQUIRED_VARS
GTest_INCLUDE_DIR
GMock_INCLUDE_DIR
Gtest_LIBRARY
Gmock_LIBRARY
GoogleTest_CHECK_FINE)
set(GoogleTest_INCLUDE_DIR ${GTest_INCLUDE_DIR} ${GMock_INCLUDE_DIR})
set(GoogleTest_LIBRARIES ${Gtest_LIBRARY} ${Gmock_LIBRARY})
mark_as_advanced(
GoogleTest_INCLUDE_DIR
GoogleTest_LIBRARIES)

View File

@ -0,0 +1,23 @@
# - Find kerberos
# Find the native KERBEROS includes and library
#
# KERBEROS_INCLUDE_DIRS - where to find krb5.h, etc.
# KERBEROS_LIBRARIES - List of libraries when using krb5.
# KERBEROS_FOUND - True if krb5 found.
IF (KERBEROS_INCLUDE_DIRS)
# Already in cache, be silent
SET(KERBEROS_FIND_QUIETLY TRUE)
ENDIF (KERBEROS_INCLUDE_DIRS)
FIND_PATH(KERBEROS_INCLUDE_DIRS krb5.h)
SET(KERBEROS_NAMES krb5 k5crypto com_err)
FIND_LIBRARY(KERBEROS_LIBRARIES NAMES ${KERBEROS_NAMES})
# handle the QUIETLY and REQUIRED arguments and set KERBEROS_FOUND to TRUE if
# all listed variables are TRUE
INCLUDE(FindPackageHandleStandardArgs)
FIND_PACKAGE_HANDLE_STANDARD_ARGS(KERBEROS DEFAULT_MSG KERBEROS_LIBRARIES KERBEROS_INCLUDE_DIRS)
MARK_AS_ADVANCED(KERBEROS_LIBRARIES KERBEROS_INCLUDE_DIRS)

View File

@ -0,0 +1,23 @@
# - Find libuuid
# Find the native LIBUUID includes and library
#
# LIBUUID_INCLUDE_DIRS - where to find uuid/uuid.h, etc.
# LIBUUID_LIBRARIES - List of libraries when using uuid.
# LIBUUID_FOUND - True if uuid found.
IF (LIBUUID_INCLUDE_DIRS)
# Already in cache, be silent
SET(LIBUUID_FIND_QUIETLY TRUE)
ENDIF (LIBUUID_INCLUDE_DIRS)
FIND_PATH(LIBUUID_INCLUDE_DIRS uuid/uuid.h)
SET(LIBUUID_NAMES uuid)
FIND_LIBRARY(LIBUUID_LIBRARIES NAMES ${LIBUUID_NAMES})
# handle the QUIETLY and REQUIRED arguments and set LIBUUID_FOUND to TRUE if
# all listed variables are TRUE
INCLUDE(FindPackageHandleStandardArgs)
FIND_PACKAGE_HANDLE_STANDARD_ARGS(LIBUUID DEFAULT_MSG LIBUUID_LIBRARIES LIBUUID_INCLUDE_DIRS)
MARK_AS_ADVANCED(LIBUUID_LIBRARIES LIBUUID_INCLUDE_DIRS)

View File

@ -0,0 +1,26 @@
# - Try to find the Open ssl library (ssl)
#
# Once done this will define
#
# SSL_FOUND - System has gnutls
# SSL_INCLUDE_DIR - The gnutls include directory
# SSL_LIBRARIES - The libraries needed to use gnutls
# SSL_DEFINITIONS - Compiler switches required for using gnutls
IF (SSL_INCLUDE_DIR AND SSL_LIBRARIES)
# in cache already
SET(SSL_FIND_QUIETLY TRUE)
ENDIF (SSL_INCLUDE_DIR AND SSL_LIBRARIES)
FIND_PATH(SSL_INCLUDE_DIR openssl/opensslv.h)
FIND_LIBRARY(SSL_LIBRARIES crypto)
INCLUDE(FindPackageHandleStandardArgs)
# handle the QUIETLY and REQUIRED arguments and set SSL_FOUND to TRUE if
# all listed variables are TRUE
FIND_PACKAGE_HANDLE_STANDARD_ARGS(SSL DEFAULT_MSG SSL_LIBRARIES SSL_INCLUDE_DIR)
MARK_AS_ADVANCED(SSL_INCLUDE_DIR SSL_LIBRARIES)

View File

@ -0,0 +1,46 @@
FUNCTION(AUTO_SOURCES RETURN_VALUE PATTERN SOURCE_SUBDIRS)
IF ("${SOURCE_SUBDIRS}" STREQUAL "RECURSE")
SET(PATH ".")
IF (${ARGC} EQUAL 4)
LIST(GET ARGV 3 PATH)
ENDIF ()
ENDIF()
IF ("${SOURCE_SUBDIRS}" STREQUAL "RECURSE")
UNSET(${RETURN_VALUE})
FILE(GLOB SUBDIR_FILES "${PATH}/${PATTERN}")
LIST(APPEND ${RETURN_VALUE} ${SUBDIR_FILES})
FILE(GLOB SUBDIRS RELATIVE ${PATH} ${PATH}/*)
FOREACH(DIR ${SUBDIRS})
IF (IS_DIRECTORY ${PATH}/${DIR})
IF (NOT "${DIR}" STREQUAL "CMAKEFILES")
FILE(GLOB_RECURSE SUBDIR_FILES "${PATH}/${DIR}/${PATTERN}")
LIST(APPEND ${RETURN_VALUE} ${SUBDIR_FILES})
ENDIF()
ENDIF()
ENDFOREACH()
ELSE ()
FILE(GLOB ${RETURN_VALUE} "${PATTERN}")
FOREACH (PATH ${SOURCE_SUBDIRS})
FILE(GLOB SUBDIR_FILES "${PATH}/${PATTERN}")
LIST(APPEND ${RETURN_VALUE} ${SUBDIR_FILES})
ENDFOREACH(PATH ${SOURCE_SUBDIRS})
ENDIF ()
IF (${FILTER_OUT})
LIST(REMOVE_ITEM ${RETURN_VALUE} ${FILTER_OUT})
ENDIF()
SET(${RETURN_VALUE} ${${RETURN_VALUE}} PARENT_SCOPE)
ENDFUNCTION(AUTO_SOURCES)
FUNCTION(CONTAINS_STRING FILE SEARCH RETURN_VALUE)
FILE(STRINGS ${FILE} FILE_CONTENTS REGEX ".*${SEARCH}.*")
IF (FILE_CONTENTS)
SET(${RETURN_VALUE} TRUE PARENT_SCOPE)
ENDIF()
ENDFUNCTION(CONTAINS_STRING)

View File

@ -0,0 +1,169 @@
OPTION(ENABLE_COVERAGE "enable code coverage" OFF)
OPTION(ENABLE_DEBUG "enable debug build" OFF)
OPTION(ENABLE_SSE "enable SSE4.2 buildin function" ON)
OPTION(ENABLE_FRAME_POINTER "enable frame pointer on 64bit system with flag -fno-omit-frame-pointer, on 32bit system, it is always enabled" ON)
OPTION(ENABLE_LIBCPP "using libc++ instead of libstdc++, only valid for clang compiler" OFF)
OPTION(ENABLE_BOOST "using boost instead of native compiler c++0x support" OFF)
INCLUDE (CheckFunctionExists)
CHECK_FUNCTION_EXISTS(dladdr HAVE_DLADDR)
CHECK_FUNCTION_EXISTS(nanosleep HAVE_NANOSLEEP)
IF(ENABLE_DEBUG STREQUAL ON)
SET(CMAKE_BUILD_TYPE Debug CACHE
STRING "Choose the type of build, options are: None Debug Release RelWithDebInfo MinSizeRel." FORCE)
SET(CMAKE_CXX_FLAGS_DEBUG "-g -O0" CACHE STRING "compiler flags for debug" FORCE)
SET(CMAKE_C_FLAGS_DEBUG "-g -O0" CACHE STRING "compiler flags for debug" FORCE)
ELSE(ENABLE_DEBUG STREQUAL ON)
SET(CMAKE_BUILD_TYPE RelWithDebInfo CACHE
STRING "Choose the type of build, options are: None Debug Release RelWithDebInfo MinSizeRel." FORCE)
ENDIF(ENABLE_DEBUG STREQUAL ON)
SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fno-strict-aliasing")
SET(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -fno-strict-aliasing")
IF(ENABLE_COVERAGE STREQUAL ON)
INCLUDE(CodeCoverage)
ENDIF(ENABLE_COVERAGE STREQUAL ON)
IF(ENABLE_FRAME_POINTER STREQUAL ON)
SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fno-omit-frame-pointer")
ENDIF(ENABLE_FRAME_POINTER STREQUAL ON)
IF(ENABLE_SSE STREQUAL ON)
SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -msse4.2")
ENDIF(ENABLE_SSE STREQUAL ON)
IF(NOT TEST_HDFS_PREFIX)
SET(TEST_HDFS_PREFIX "./" CACHE STRING "default directory prefix used for test." FORCE)
ENDIF(NOT TEST_HDFS_PREFIX)
ADD_DEFINITIONS(-DTEST_HDFS_PREFIX="${TEST_HDFS_PREFIX}")
ADD_DEFINITIONS(-D__STDC_FORMAT_MACROS)
ADD_DEFINITIONS(-D_GNU_SOURCE)
IF(OS_MACOSX AND CMAKE_COMPILER_IS_GNUCXX)
SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wl,-bind_at_load")
ENDIF(OS_MACOSX AND CMAKE_COMPILER_IS_GNUCXX)
IF(OS_LINUX)
SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wl,--export-dynamic")
ENDIF(OS_LINUX)
SET(BOOST_ROOT ${CMAKE_PREFIX_PATH})
IF(ENABLE_BOOST STREQUAL ON)
MESSAGE(STATUS "using boost instead of native compiler c++0x support.")
FIND_PACKAGE(Boost 1.50 REQUIRED)
SET(NEED_BOOST true CACHE INTERNAL "boost is required")
ELSE(ENABLE_BOOST STREQUAL ON)
SET(NEED_BOOST false CACHE INTERNAL "boost is required")
ENDIF(ENABLE_BOOST STREQUAL ON)
IF(CMAKE_COMPILER_IS_GNUCXX)
IF(ENABLE_LIBCPP STREQUAL ON)
MESSAGE(FATAL_ERROR "Unsupport using GCC compiler with libc++")
ENDIF(ENABLE_LIBCPP STREQUAL ON)
IF((GCC_COMPILER_VERSION_MAJOR EQUAL 4) AND (GCC_COMPILER_VERSION_MINOR EQUAL 4) AND OS_MACOSX)
SET(NEED_GCCEH true CACHE INTERNAL "Explicitly link with gcc_eh")
MESSAGE(STATUS "link with -lgcc_eh for TLS")
ENDIF((GCC_COMPILER_VERSION_MAJOR EQUAL 4) AND (GCC_COMPILER_VERSION_MINOR EQUAL 4) AND OS_MACOSX)
IF((GCC_COMPILER_VERSION_MAJOR LESS 4) OR ((GCC_COMPILER_VERSION_MAJOR EQUAL 4) AND (GCC_COMPILER_VERSION_MINOR LESS 4)))
SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wall")
IF(NOT ENABLE_BOOST STREQUAL ON)
MESSAGE(STATUS "gcc version is older than 4.6.0, boost is required.")
FIND_PACKAGE(Boost 1.50 REQUIRED)
SET(NEED_BOOST true CACHE INTERNAL "boost is required")
ENDIF(NOT ENABLE_BOOST STREQUAL ON)
ELSEIF((GCC_COMPILER_VERSION_MAJOR EQUAL 4) AND (GCC_COMPILER_VERSION_MINOR LESS 7))
IF(NOT ENABLE_BOOST STREQUAL ON)
MESSAGE(STATUS "gcc version is older than 4.6.0, boost is required.")
FIND_PACKAGE(Boost 1.50 REQUIRED)
SET(NEED_BOOST true CACHE INTERNAL "boost is required")
ENDIF(NOT ENABLE_BOOST STREQUAL ON)
MESSAGE(STATUS "adding c++0x support for gcc compiler")
SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++0x")
ELSE((GCC_COMPILER_VERSION_MAJOR LESS 4) OR ((GCC_COMPILER_VERSION_MAJOR EQUAL 4) AND (GCC_COMPILER_VERSION_MINOR LESS 4)))
MESSAGE(STATUS "adding c++0x support for gcc compiler")
SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++0x")
ENDIF((GCC_COMPILER_VERSION_MAJOR LESS 4) OR ((GCC_COMPILER_VERSION_MAJOR EQUAL 4) AND (GCC_COMPILER_VERSION_MINOR LESS 4)))
IF(NEED_BOOST)
IF((Boost_MAJOR_VERSION LESS 1) OR ((Boost_MAJOR_VERSION EQUAL 1) AND (Boost_MINOR_VERSION LESS 50)))
MESSAGE(FATAL_ERROR "boost 1.50+ is required")
ENDIF()
ELSE(NEED_BOOST)
IF(HAVE_NANOSLEEP)
SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -D_GLIBCXX_USE_NANOSLEEP")
ELSE(HAVE_NANOSLEEP)
MESSAGE(FATAL_ERROR "nanosleep() is required")
ENDIF(HAVE_NANOSLEEP)
ENDIF(NEED_BOOST)
SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wall")
ELSEIF(CMAKE_COMPILER_IS_CLANG)
MESSAGE(STATUS "adding c++0x support for clang compiler")
SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++0x")
SET(CMAKE_XCODE_ATTRIBUTE_CLANG_CXX_LANGUAGE_STANDARD "c++0x")
IF(ENABLE_LIBCPP STREQUAL ON)
MESSAGE(STATUS "using libc++ instead of libstdc++")
SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -stdlib=libc++")
SET(CMAKE_XCODE_ATTRIBUTE_CLANG_CXX_LIBRARY "libc++")
ENDIF(ENABLE_LIBCPP STREQUAL ON)
ENDIF(CMAKE_COMPILER_IS_GNUCXX)
TRY_COMPILE(STRERROR_R_RETURN_INT
${CMAKE_CURRENT_BINARY_DIR}
${HDFS3_ROOT_DIR}/CMake/CMakeTestCompileStrerror.cpp
CMAKE_FLAGS "-DCMAKE_CXX_LINK_EXECUTABLE='echo not linking now...'"
OUTPUT_VARIABLE OUTPUT)
MESSAGE(STATUS "Checking whether strerror_r returns an int")
IF(STRERROR_R_RETURN_INT)
MESSAGE(STATUS "Checking whether strerror_r returns an int -- yes")
ELSE(STRERROR_R_RETURN_INT)
MESSAGE(STATUS "Checking whether strerror_r returns an int -- no")
ENDIF(STRERROR_R_RETURN_INT)
TRY_COMPILE(HAVE_STEADY_CLOCK
${CMAKE_CURRENT_BINARY_DIR}
${HDFS3_ROOT_DIR}/CMake/CMakeTestCompileSteadyClock.cpp
CMAKE_FLAGS "-DCMAKE_CXX_LINK_EXECUTABLE='echo not linking now...'"
OUTPUT_VARIABLE OUTPUT)
TRY_COMPILE(HAVE_NESTED_EXCEPTION
${CMAKE_CURRENT_BINARY_DIR}
${HDFS3_ROOT_DIR}/CMake/CMakeTestCompileNestedException.cpp
CMAKE_FLAGS "-DCMAKE_CXX_LINK_EXECUTABLE='echo not linking now...'"
OUTPUT_VARIABLE OUTPUT)
FILE(WRITE ${CMAKE_CURRENT_BINARY_DIR}/test.cpp "#include <boost/chrono.hpp>")
TRY_COMPILE(HAVE_BOOST_CHRONO
${CMAKE_CURRENT_BINARY_DIR}
${CMAKE_CURRENT_BINARY_DIR}/test.cpp
CMAKE_FLAGS "-DCMAKE_CXX_LINK_EXECUTABLE='echo not linking now...'"
-DINCLUDE_DIRECTORIES=${Boost_INCLUDE_DIR}
OUTPUT_VARIABLE OUTPUT)
FILE(WRITE ${CMAKE_CURRENT_BINARY_DIR}/test.cpp "#include <chrono>")
TRY_COMPILE(HAVE_STD_CHRONO
${CMAKE_CURRENT_BINARY_DIR}
${CMAKE_CURRENT_BINARY_DIR}/test.cpp
CMAKE_FLAGS "-DCMAKE_CXX_LINK_EXECUTABLE='echo not linking now...'"
OUTPUT_VARIABLE OUTPUT)
FILE(WRITE ${CMAKE_CURRENT_BINARY_DIR}/test.cpp "#include <boost/atomic.hpp>")
TRY_COMPILE(HAVE_BOOST_ATOMIC
${CMAKE_CURRENT_BINARY_DIR}
${CMAKE_CURRENT_BINARY_DIR}/test.cpp
CMAKE_FLAGS "-DCMAKE_CXX_LINK_EXECUTABLE='echo not linking now...'"
-DINCLUDE_DIRECTORIES=${Boost_INCLUDE_DIR}
OUTPUT_VARIABLE OUTPUT)
FILE(WRITE ${CMAKE_CURRENT_BINARY_DIR}/test.cpp "#include <atomic>")
TRY_COMPILE(HAVE_STD_ATOMIC
${CMAKE_CURRENT_BINARY_DIR}
${CMAKE_CURRENT_BINARY_DIR}/test.cpp
CMAKE_FLAGS "-DCMAKE_CXX_LINK_EXECUTABLE='echo not linking now...'"
OUTPUT_VARIABLE OUTPUT)

View File

@ -0,0 +1,33 @@
IF(CMAKE_SYSTEM_NAME STREQUAL "Linux")
SET(OS_LINUX true CACHE INTERNAL "Linux operating system")
ELSEIF(CMAKE_SYSTEM_NAME STREQUAL "Darwin")
SET(OS_MACOSX true CACHE INTERNAL "Mac Darwin operating system")
ELSE(CMAKE_SYSTEM_NAME STREQUAL "Linux")
MESSAGE(FATAL_ERROR "Unsupported OS: \"${CMAKE_SYSTEM_NAME}\"")
ENDIF(CMAKE_SYSTEM_NAME STREQUAL "Linux")
IF(CMAKE_COMPILER_IS_GNUCXX)
EXECUTE_PROCESS(COMMAND ${CMAKE_CXX_COMPILER} -dumpversion OUTPUT_VARIABLE GCC_COMPILER_VERSION)
IF (NOT GCC_COMPILER_VERSION)
MESSAGE(FATAL_ERROR "Cannot get gcc version")
ENDIF (NOT GCC_COMPILER_VERSION)
STRING(REGEX MATCHALL "[0-9]+" GCC_COMPILER_VERSION ${GCC_COMPILER_VERSION})
LIST(GET GCC_COMPILER_VERSION 0 GCC_COMPILER_VERSION_MAJOR)
LIST(GET GCC_COMPILER_VERSION 1 GCC_COMPILER_VERSION_MINOR)
SET(GCC_COMPILER_VERSION_MAJOR ${GCC_COMPILER_VERSION_MAJOR} CACHE INTERNAL "gcc major version")
SET(GCC_COMPILER_VERSION_MINOR ${GCC_COMPILER_VERSION_MINOR} CACHE INTERNAL "gcc minor version")
MESSAGE(STATUS "checking compiler: GCC (${GCC_COMPILER_VERSION_MAJOR}.${GCC_COMPILER_VERSION_MINOR}.${GCC_COMPILER_VERSION_PATCH})")
ELSE(CMAKE_COMPILER_IS_GNUCXX)
EXECUTE_PROCESS(COMMAND ${CMAKE_C_COMPILER} --version OUTPUT_VARIABLE COMPILER_OUTPUT)
IF(COMPILER_OUTPUT MATCHES "clang")
SET(CMAKE_COMPILER_IS_CLANG true CACHE INTERNAL "using clang as compiler")
MESSAGE(STATUS "checking compiler: CLANG")
ELSE(COMPILER_OUTPUT MATCHES "clang")
MESSAGE(FATAL_ERROR "Unsupported compiler: \"${CMAKE_CXX_COMPILER}\"")
ENDIF(COMPILER_OUTPUT MATCHES "clang")
ENDIF(CMAKE_COMPILER_IS_GNUCXX)

View File

@ -0,0 +1,224 @@
if (NOT USE_INTERNAL_PROTOBUF_LIBRARY)
# compatiable with protobuf which was compiled old C++ ABI
set(CMAKE_CXX_FLAGS "-D_GLIBCXX_USE_CXX11_ABI=0")
set(CMAKE_C_FLAGS "")
if (NOT (CMAKE_VERSION VERSION_LESS "3.8.0"))
unset(CMAKE_CXX_STANDARD)
endif ()
endif()
# project and source dir
set(HDFS3_ROOT_DIR ${CMAKE_SOURCE_DIR}/contrib/libhdfs3)
set(HDFS3_SOURCE_DIR ${HDFS3_ROOT_DIR}/src)
set(HDFS3_COMMON_DIR ${HDFS3_SOURCE_DIR}/common)
# module
set(CMAKE_MODULE_PATH "${CMAKE_CURRENT_SOURCE_DIR}/CMake" ${CMAKE_MODULE_PATH})
include(Platform)
include(Options)
# prefer shared libraries
set(CMAKE_FIND_LIBRARY_SUFFIXES ${CMAKE_SHARED_LIBRARY_SUFFIX})
find_package(CURL REQUIRED)
find_package(GSasl REQUIRED)
find_package(KERBEROS REQUIRED)
find_package(LibXml2 REQUIRED)
find_package(LibUUID REQUIRED)
# source
set(PROTO_FILES
${HDFS3_SOURCE_DIR}/proto/encryption.proto
${HDFS3_SOURCE_DIR}/proto/ClientDatanodeProtocol.proto
${HDFS3_SOURCE_DIR}/proto/hdfs.proto
${HDFS3_SOURCE_DIR}/proto/Security.proto
${HDFS3_SOURCE_DIR}/proto/ProtobufRpcEngine.proto
${HDFS3_SOURCE_DIR}/proto/ClientNamenodeProtocol.proto
${HDFS3_SOURCE_DIR}/proto/IpcConnectionContext.proto
${HDFS3_SOURCE_DIR}/proto/RpcHeader.proto
${HDFS3_SOURCE_DIR}/proto/datatransfer.proto
)
PROTOBUF_GENERATE_CPP(PROTO_SOURCES PROTO_HEADERS ${PROTO_FILES})
configure_file(${HDFS3_SOURCE_DIR}/platform.h.in ${CMAKE_CURRENT_BINARY_DIR}/platform.h)
set(SRCS
${HDFS3_SOURCE_DIR}/network/TcpSocket.cpp
${HDFS3_SOURCE_DIR}/network/DomainSocket.cpp
${HDFS3_SOURCE_DIR}/network/BufferedSocketReader.cpp
${HDFS3_SOURCE_DIR}/client/EncryptionZoneIterator.cpp
${HDFS3_SOURCE_DIR}/client/ReadShortCircuitInfo.cpp
${HDFS3_SOURCE_DIR}/client/Pipeline.cpp
${HDFS3_SOURCE_DIR}/client/Hdfs.cpp
${HDFS3_SOURCE_DIR}/client/Packet.cpp
${HDFS3_SOURCE_DIR}/client/OutputStreamImpl.cpp
${HDFS3_SOURCE_DIR}/client/KerberosName.cpp
${HDFS3_SOURCE_DIR}/client/PacketHeader.cpp
${HDFS3_SOURCE_DIR}/client/LocalBlockReader.cpp
${HDFS3_SOURCE_DIR}/client/UserInfo.cpp
${HDFS3_SOURCE_DIR}/client/RemoteBlockReader.cpp
${HDFS3_SOURCE_DIR}/client/Permission.cpp
${HDFS3_SOURCE_DIR}/client/FileSystemImpl.cpp
${HDFS3_SOURCE_DIR}/client/CryptoCodec.cpp
${HDFS3_SOURCE_DIR}/client/DirectoryIterator.cpp
${HDFS3_SOURCE_DIR}/client/FileSystemKey.cpp
${HDFS3_SOURCE_DIR}/client/DataTransferProtocolSender.cpp
${HDFS3_SOURCE_DIR}/client/LeaseRenewer.cpp
${HDFS3_SOURCE_DIR}/client/HttpClient.cpp
${HDFS3_SOURCE_DIR}/client/PeerCache.cpp
${HDFS3_SOURCE_DIR}/client/InputStream.cpp
${HDFS3_SOURCE_DIR}/client/FileSystem.cpp
${HDFS3_SOURCE_DIR}/client/InputStreamImpl.cpp
${HDFS3_SOURCE_DIR}/client/Token.cpp
${HDFS3_SOURCE_DIR}/client/PacketPool.cpp
${HDFS3_SOURCE_DIR}/client/KmsClientProvider.cpp
${HDFS3_SOURCE_DIR}/client/OutputStream.cpp
${HDFS3_SOURCE_DIR}/rpc/RpcChannelKey.cpp
${HDFS3_SOURCE_DIR}/rpc/RpcProtocolInfo.cpp
${HDFS3_SOURCE_DIR}/rpc/RpcClient.cpp
${HDFS3_SOURCE_DIR}/rpc/RpcRemoteCall.cpp
${HDFS3_SOURCE_DIR}/rpc/RpcChannel.cpp
${HDFS3_SOURCE_DIR}/rpc/RpcAuth.cpp
${HDFS3_SOURCE_DIR}/rpc/RpcContentWrapper.cpp
${HDFS3_SOURCE_DIR}/rpc/RpcConfig.cpp
${HDFS3_SOURCE_DIR}/rpc/RpcServerInfo.cpp
${HDFS3_SOURCE_DIR}/rpc/SaslClient.cpp
${HDFS3_SOURCE_DIR}/server/Datanode.cpp
${HDFS3_SOURCE_DIR}/server/LocatedBlocks.cpp
${HDFS3_SOURCE_DIR}/server/NamenodeProxy.cpp
${HDFS3_SOURCE_DIR}/server/NamenodeImpl.cpp
${HDFS3_SOURCE_DIR}/server/NamenodeInfo.cpp
${HDFS3_SOURCE_DIR}/common/WritableUtils.cpp
${HDFS3_SOURCE_DIR}/common/ExceptionInternal.cpp
${HDFS3_SOURCE_DIR}/common/SessionConfig.cpp
${HDFS3_SOURCE_DIR}/common/StackPrinter.cpp
${HDFS3_SOURCE_DIR}/common/Exception.cpp
${HDFS3_SOURCE_DIR}/common/Logger.cpp
${HDFS3_SOURCE_DIR}/common/CFileWrapper.cpp
${HDFS3_SOURCE_DIR}/common/XmlConfig.cpp
${HDFS3_SOURCE_DIR}/common/WriteBuffer.cpp
${HDFS3_SOURCE_DIR}/common/HWCrc32c.cpp
${HDFS3_SOURCE_DIR}/common/MappedFileWrapper.cpp
${HDFS3_SOURCE_DIR}/common/Hash.cpp
${HDFS3_SOURCE_DIR}/common/SWCrc32c.cpp
${HDFS3_SOURCE_DIR}/common/Thread.cpp
${HDFS3_SOURCE_DIR}/network/TcpSocket.h
${HDFS3_SOURCE_DIR}/network/BufferedSocketReader.h
${HDFS3_SOURCE_DIR}/network/Socket.h
${HDFS3_SOURCE_DIR}/network/DomainSocket.h
${HDFS3_SOURCE_DIR}/network/Syscall.h
${HDFS3_SOURCE_DIR}/client/InputStreamImpl.h
${HDFS3_SOURCE_DIR}/client/FileSystem.h
${HDFS3_SOURCE_DIR}/client/ReadShortCircuitInfo.h
${HDFS3_SOURCE_DIR}/client/InputStreamInter.h
${HDFS3_SOURCE_DIR}/client/HttpClient.h
${HDFS3_SOURCE_DIR}/client/FileSystemImpl.h
${HDFS3_SOURCE_DIR}/client/FileEncryptionInfo.h
${HDFS3_SOURCE_DIR}/client/PacketPool.h
${HDFS3_SOURCE_DIR}/client/Pipeline.h
${HDFS3_SOURCE_DIR}/client/EncryptionZoneInfo.h
${HDFS3_SOURCE_DIR}/client/OutputStreamInter.h
${HDFS3_SOURCE_DIR}/client/RemoteBlockReader.h
${HDFS3_SOURCE_DIR}/client/CryptoCodec.h
${HDFS3_SOURCE_DIR}/client/Token.h
${HDFS3_SOURCE_DIR}/client/EncryptionZoneIterator.h
${HDFS3_SOURCE_DIR}/client/KerberosName.h
${HDFS3_SOURCE_DIR}/client/DirectoryIterator.h
${HDFS3_SOURCE_DIR}/client/hdfs.h
${HDFS3_SOURCE_DIR}/client/FileSystemStats.h
${HDFS3_SOURCE_DIR}/client/FileSystemKey.h
${HDFS3_SOURCE_DIR}/client/DataTransferProtocolSender.h
${HDFS3_SOURCE_DIR}/client/Packet.h
${HDFS3_SOURCE_DIR}/client/PacketHeader.h
${HDFS3_SOURCE_DIR}/client/FileSystemInter.h
${HDFS3_SOURCE_DIR}/client/LocalBlockReader.h
${HDFS3_SOURCE_DIR}/client/TokenInternal.h
${HDFS3_SOURCE_DIR}/client/InputStream.h
${HDFS3_SOURCE_DIR}/client/PipelineAck.h
${HDFS3_SOURCE_DIR}/client/BlockReader.h
${HDFS3_SOURCE_DIR}/client/Permission.h
${HDFS3_SOURCE_DIR}/client/OutputStreamImpl.h
${HDFS3_SOURCE_DIR}/client/LeaseRenewer.h
${HDFS3_SOURCE_DIR}/client/UserInfo.h
${HDFS3_SOURCE_DIR}/client/PeerCache.h
${HDFS3_SOURCE_DIR}/client/OutputStream.h
${HDFS3_SOURCE_DIR}/client/FileStatus.h
${HDFS3_SOURCE_DIR}/client/KmsClientProvider.h
${HDFS3_SOURCE_DIR}/client/DataTransferProtocol.h
${HDFS3_SOURCE_DIR}/client/BlockLocation.h
${HDFS3_SOURCE_DIR}/rpc/RpcConfig.h
${HDFS3_SOURCE_DIR}/rpc/SaslClient.h
${HDFS3_SOURCE_DIR}/rpc/RpcAuth.h
${HDFS3_SOURCE_DIR}/rpc/RpcClient.h
${HDFS3_SOURCE_DIR}/rpc/RpcCall.h
${HDFS3_SOURCE_DIR}/rpc/RpcContentWrapper.h
${HDFS3_SOURCE_DIR}/rpc/RpcProtocolInfo.h
${HDFS3_SOURCE_DIR}/rpc/RpcRemoteCall.h
${HDFS3_SOURCE_DIR}/rpc/RpcServerInfo.h
${HDFS3_SOURCE_DIR}/rpc/RpcChannel.h
${HDFS3_SOURCE_DIR}/rpc/RpcChannelKey.h
${HDFS3_SOURCE_DIR}/server/BlockLocalPathInfo.h
${HDFS3_SOURCE_DIR}/server/LocatedBlocks.h
${HDFS3_SOURCE_DIR}/server/DatanodeInfo.h
${HDFS3_SOURCE_DIR}/server/RpcHelper.h
${HDFS3_SOURCE_DIR}/server/ExtendedBlock.h
${HDFS3_SOURCE_DIR}/server/NamenodeInfo.h
${HDFS3_SOURCE_DIR}/server/NamenodeImpl.h
${HDFS3_SOURCE_DIR}/server/LocatedBlock.h
${HDFS3_SOURCE_DIR}/server/NamenodeProxy.h
${HDFS3_SOURCE_DIR}/server/Datanode.h
${HDFS3_SOURCE_DIR}/server/Namenode.h
${HDFS3_SOURCE_DIR}/common/XmlConfig.h
${HDFS3_SOURCE_DIR}/common/Logger.h
${HDFS3_SOURCE_DIR}/common/WriteBuffer.h
${HDFS3_SOURCE_DIR}/common/HWCrc32c.h
${HDFS3_SOURCE_DIR}/common/Checksum.h
${HDFS3_SOURCE_DIR}/common/SessionConfig.h
${HDFS3_SOURCE_DIR}/common/Unordered.h
${HDFS3_SOURCE_DIR}/common/BigEndian.h
${HDFS3_SOURCE_DIR}/common/Thread.h
${HDFS3_SOURCE_DIR}/common/StackPrinter.h
${HDFS3_SOURCE_DIR}/common/Exception.h
${HDFS3_SOURCE_DIR}/common/WritableUtils.h
${HDFS3_SOURCE_DIR}/common/StringUtil.h
${HDFS3_SOURCE_DIR}/common/LruMap.h
${HDFS3_SOURCE_DIR}/common/Function.h
${HDFS3_SOURCE_DIR}/common/DateTime.h
${HDFS3_SOURCE_DIR}/common/Hash.h
${HDFS3_SOURCE_DIR}/common/SWCrc32c.h
${HDFS3_SOURCE_DIR}/common/ExceptionInternal.h
${HDFS3_SOURCE_DIR}/common/Memory.h
${HDFS3_SOURCE_DIR}/common/FileWrapper.h
)
# target
add_library(hdfs3 STATIC ${SRCS} ${PROTO_SOURCES} ${PROTO_HEADERS})
if (USE_INTERNAL_PROTOBUF_LIBRARY)
add_dependencies(hdfs3 protoc)
endif()
target_include_directories(hdfs3 PRIVATE ${HDFS3_SOURCE_DIR})
target_include_directories(hdfs3 PRIVATE ${HDFS3_COMMON_DIR})
target_include_directories(hdfs3 PRIVATE ${CMAKE_CURRENT_BINARY_DIR})
target_include_directories(hdfs3 PRIVATE ${CURL_INCLUDE_DIR})
target_include_directories(hdfs3 PRIVATE ${GSASL_INCLUDE_DIR})
target_include_directories(hdfs3 PRIVATE ${KERBEROS_INCLUDE_DIRS})
target_include_directories(hdfs3 PRIVATE ${LIBXML2_INCLUDE_DIR})
target_include_directories(hdfs3 PRIVATE ${LIBUUID_INCLUDE_DIRS})
target_link_libraries(hdfs3 ${CURL_LIBRARIES})
target_link_libraries(hdfs3 ${GSASL_LIBRARIES})
target_link_libraries(hdfs3 ${KERBEROS_LIBRARIES})
target_link_libraries(hdfs3 ${LIBXML2_LIBRARIES})
target_link_libraries(hdfs3 ${LIBUUID_LIBRARIES})
# inherit from parent cmake
target_include_directories(hdfs3 PRIVATE ${Boost_INCLUDE_DIRS})
target_include_directories(hdfs3 PRIVATE ${Protobuf_INCLUDE_DIR})
target_include_directories(hdfs3 PRIVATE ${OPENSSL_INCLUDE_DIR})
target_link_libraries(hdfs3 ${Protobuf_LIBRARY})
target_link_libraries(hdfs3 ${OPENSSL_LIBRARIES})

1
contrib/protobuf vendored Submodule

@ -0,0 +1 @@
Subproject commit 12735370922a35f03999afff478e1c6d7aa917a4

View File

@ -166,6 +166,7 @@ target_link_libraries (clickhouse_common_io
${Boost_SYSTEM_LIBRARY}
apple_rt
${CMAKE_DL_LIBS}
${HDFS3_LIBRARY}
)
target_link_libraries (dbms

View File

@ -802,7 +802,7 @@ private:
connection->forceConnected();
if (insert && !insert->select)
if (insert && !insert->select && !insert->in_file)
processInsertQuery();
else
processOrdinaryQuery();

View File

@ -0,0 +1,87 @@
#pragma once
#include <IO/ReadBuffer.h>
#include <Poco/URI.h>
#include <hdfs/hdfs.h>
#include <IO/BufferWithOwnMemory.h>
#ifndef O_DIRECT
#define O_DIRECT 00040000
#endif
namespace DB
{
/** Accepts path to file and opens it, or pre-opened file descriptor.
* Closes file by himself (thus "owns" a file descriptor).
*/
class ReadBufferFromHDFS : public BufferWithOwnMemory<ReadBuffer>
{
protected:
std::string hdfs_uri;
// std::unique_ptr<struct hdfsBuilder> builder;
struct hdfsBuilder *builder;
hdfsFS fs;
hdfsFile fin;
public:
ReadBufferFromHDFS(const std::string & hdfs_name_, size_t buf_size = DBMS_DEFAULT_BUFFER_SIZE)
: BufferWithOwnMemory<ReadBuffer>(buf_size), hdfs_uri(hdfs_name_) , builder(hdfsNewBuilder())
{
Poco::URI uri(hdfs_name_);
auto& host = uri.getHost();
auto port = uri.getPort();
auto& path = uri.getPath();
if (host.empty() || port == 0 || path.empty())
{
throw Exception("Illegal HDFS URI : " + hdfs_uri);
}
// set read/connect timeout, default value in libhdfs3 is about 1 hour, and too large
hdfsBuilderConfSetStr(builder, "input.read.timeout", "60000"); // 1 min
hdfsBuilderConfSetStr(builder, "input.connect.timeout", "60000"); // 1 min
hdfsBuilderSetNameNode(builder, host.c_str());
hdfsBuilderSetNameNodePort(builder, port);
fs = hdfsBuilderConnect(builder);
if (fs == nullptr)
{
throw Exception("Unable to connect to HDFS:" + String(hdfsGetLastError()));
}
fin = hdfsOpenFile(fs, path.c_str(), O_RDONLY, 0, 0, 0);
}
ReadBufferFromHDFS(ReadBufferFromHDFS &&) = default;
~ReadBufferFromHDFS() override
{
close();
hdfsFreeBuilder(builder);
}
/// Close HDFS connection before destruction of object.
void close()
{
hdfsCloseFile(fs, fin);
}
bool nextImpl() override
{
int done = hdfsRead(fs, fin, internal_buffer.begin(), internal_buffer.size());
if (done <0)
{
throw Exception("Fail to read HDFS file: " + hdfs_uri + " " + String(hdfsGetLastError()));
}
if (done)
working_buffer.resize(done);
else
return false;
return true;
}
const std::string& getHDFSUri() const
{
return hdfs_uri;
}
};
}

View File

@ -9,16 +9,23 @@
#include <DataStreams/PushingToViewsBlockOutputStream.h>
#include <DataStreams/SquashingBlockOutputStream.h>
#include <DataStreams/copyData.h>
#include <DataStreams/UnionBlockInputStream.h>
#include <DataStreams/OwningBlockInputStream.h>
#include <Parsers/ASTInsertQuery.h>
#include <Parsers/ASTSelectWithUnionQuery.h>
#include <Parsers/ASTLiteral.h>
#include <Interpreters/InterpreterInsertQuery.h>
#include <Interpreters/InterpreterSelectWithUnionQuery.h>
#include <TableFunctions/TableFunctionFactory.h>
#include <Parsers/ASTFunction.h>
#include <IO/ReadBufferFromFile.h>
#include <IO/ReadBufferFromHDFS.h>
#include<Poco/URI.h>
namespace DB
{
@ -141,6 +148,84 @@ BlockIO InterpreterInsertQuery::execute()
throw Exception("Cannot insert column " + name_type.name + ", because it is MATERIALIZED column.", ErrorCodes::ILLEGAL_COLUMN);
}
}
else if (query.in_file)
{
// read data stream from in_file, and copy it to out
// Special handling in_file based on url type:
String uristr = typeid_cast<const ASTLiteral &>(*query.in_file).value.safeGet<String>();
// create Datastream based on Format:
String format = query.format;
if (format.empty()) format = "Values";
auto & settings = context.getSettingsRef();
// Assume no query and fragment in uri, todo, add sanity check
String fuzzyFileNames;
String uriPrefix = uristr.substr(0, uristr.find_last_of('/'));
if (uriPrefix.length() == uristr.length())
{
fuzzyFileNames = uristr;
uriPrefix.clear();
}
else
{
uriPrefix += "/";
fuzzyFileNames = uristr.substr(uriPrefix.length());
}
Poco::URI uri(uriPrefix);
String scheme = uri.getScheme();
std::vector<String> fuzzyNameList = parseDescription(fuzzyFileNames, 0, fuzzyFileNames.length(), ',' , 100/* hard coded max files */);
std::vector<std::vector<String> > fileNames;
for(auto fuzzyName : fuzzyNameList)
fileNames.push_back(parseDescription(fuzzyName, 0, fuzzyName.length(), '|', 100));
BlockInputStreams inputs;
for (auto & vecNames : fileNames)
{
for (auto & name: vecNames)
{
std::unique_ptr<ReadBuffer> read_buf = nullptr;
if (scheme.empty() || scheme == "file")
{
read_buf = std::make_unique<ReadBufferFromFile>(Poco::URI(uriPrefix + name).getPath());
}
else if (scheme == "hdfs")
{
read_buf = std::make_unique<ReadBufferFromHDFS>(uriPrefix + name);
}
else
{
throw Exception("URI scheme " + scheme + " is not supported with insert statement yet");
}
inputs.emplace_back(
std::make_shared<OwningBlockInputStream<ReadBuffer>>(
context.getInputFormat(format, *read_buf,
res.out->getHeader(), // sample_block
settings.max_insert_block_size),
std::move(read_buf)));
}
}
if (inputs.size() == 0)
throw Exception("Inputs interpreter error");
auto stream = inputs[0];
if (inputs.size() > 1)
{
stream = std::make_shared<UnionBlockInputStream<> >(inputs, nullptr, settings.max_distributed_connections);
}
res.in = std::make_shared<NullAndDoCopyBlockInputStream>(stream, res.out);
res.out = nullptr;
}
return res;
}

View File

@ -36,10 +36,23 @@ void ASTInsertQuery::formatImpl(const FormatSettings & settings, FormatState & s
if (!format.empty())
{
settings.ostr << (settings.hilite ? hilite_keyword : "") << " FORMAT " << (settings.hilite ? hilite_none : "") << format;
if (in_file)
{
settings.ostr << (settings.hilite ? hilite_keyword : "") << " INFILE " << (settings.hilite ? hilite_none : "");
in_file->formatImpl(settings, state, frame);
}
}
else
{
settings.ostr << (settings.hilite ? hilite_keyword : "") << " VALUES" << (settings.hilite ? hilite_none : "");
if (in_file)
{
settings.ostr << (settings.hilite ? hilite_keyword : "") << " INFILE " << (settings.hilite ? hilite_none : "");
in_file->formatImpl(settings, state, frame);
}
else
{
settings.ostr << (settings.hilite ? hilite_keyword : "") << " VALUES" << (settings.hilite ? hilite_none : "");
}
}
}
}

View File

@ -18,6 +18,7 @@ public:
String format;
ASTPtr select;
ASTPtr table_function;
ASTPtr in_file;
// Set to true if the data should only be inserted into attached views
bool no_destination = false;
@ -41,6 +42,11 @@ public:
res->table_function = table_function->clone(); res->children.push_back(res->table_function);
}
if (in_file)
{
res->in_file = in_file->clone(); res->children.push_back(res->in_file);
}
return res;
}

View File

@ -31,6 +31,7 @@ bool ParserInsertQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected)
ParserKeyword s_format("FORMAT");
ParserKeyword s_select("SELECT");
ParserKeyword s_with("WITH");
ParserKeyword s_infile("INFILE");
ParserToken s_lparen(TokenType::OpeningRoundBracket);
ParserToken s_rparen(TokenType::ClosingRoundBracket);
ParserIdentifier name_p;
@ -43,6 +44,7 @@ bool ParserInsertQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected)
ASTPtr format;
ASTPtr select;
ASTPtr table_function;
ASTPtr in_file;
/// Insertion data
const char * data = nullptr;
@ -81,7 +83,7 @@ bool ParserInsertQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected)
Pos before_select = pos;
/// VALUES or FORMAT or SELECT
/// VALUES or FORMAT or SELECT or INFILE
if (s_values.ignore(pos, expected))
{
data = pos->begin;
@ -93,28 +95,42 @@ bool ParserInsertQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected)
if (!name_p.parse(pos, format, expected))
return false;
data = name_pos->end;
// there are two case after FORMAT xx:
// case 1: data_set.
// case 2: INFILE xx clause.
if (s_infile.ignore(pos, expected))
{
ParserStringLiteral in_file_p;
if (data < end && *data == ';')
throw Exception("You have excessive ';' symbol before data for INSERT.\n"
"Example:\n\n"
"INSERT INTO t (x, y) FORMAT TabSeparated\n"
";\tHello\n"
"2\tWorld\n"
"\n"
"Note that there is no ';' just after format name, "
"you need to put at least one whitespace symbol before the data.", ErrorCodes::SYNTAX_ERROR);
if (!in_file_p.parse(pos, in_file, expected))
return false;
while (data < end && (*data == ' ' || *data == '\t' || *data == '\f'))
++data;
}
else
{
data = name_pos->end;
/// Data starts after the first newline, if there is one, or after all the whitespace characters, otherwise.
if (data < end && *data == ';')
throw Exception("You have excessive ';' symbol before data for INSERT.\n"
"Example:\n\n"
"INSERT INTO t (x, y) FORMAT TabSeparated\n"
";\tHello\n"
"2\tWorld\n"
"\n"
"Note that there is no ';' just after format name, "
"you need to put at least one whitespace symbol before the data.", ErrorCodes::SYNTAX_ERROR);
if (data < end && *data == '\r')
++data;
while (data < end && (*data == ' ' || *data == '\t' || *data == '\f'))
++data;
if (data < end && *data == '\n')
++data;
/// Data starts after the first newline, if there is one, or after all the whitespace characters, otherwise.
if (data < end && *data == '\r')
++data;
if (data < end && *data == '\n')
++data;
}
}
else if (s_select.ignore(pos, expected) || s_with.ignore(pos,expected))
{
@ -122,6 +138,12 @@ bool ParserInsertQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected)
ParserSelectWithUnionQuery select_p;
select_p.parse(pos, select, expected);
}
else if (s_infile.ignore(pos, expected))
{
ParserStringLiteral in_file_p;
if (!in_file_p.parse(pos, in_file, expected))
return false;
}
else
{
return false;
@ -147,7 +169,12 @@ bool ParserInsertQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected)
query->columns = columns;
query->select = select;
query->data = data != end ? data : nullptr;
query->in_file = in_file;
if (query->in_file)
query->data = nullptr;
else
query->data = data != end ? data : nullptr;
query->end = end;
if (columns)
@ -155,6 +182,9 @@ bool ParserInsertQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected)
if (select)
query->children.push_back(select);
if (in_file)
query->children.push_back(in_file);
return true;
}

View File

@ -9,18 +9,21 @@ namespace DB
/** Cases:
*
* Normal case:
* #1 Normal case:
* INSERT INTO [db.]table (c1, c2, c3) VALUES (v11, v12, v13), (v21, v22, v23), ...
* INSERT INTO [db.]table VALUES (v11, v12, v13), (v21, v22, v23), ...
*
* Insert of data in an arbitrary format.
* #2 Insert of data in an arbitrary format.
* The data itself comes after LF(line feed), if it exists, or after all the whitespace characters, otherwise.
* INSERT INTO [db.]table (c1, c2, c3) FORMAT format \n ...
* INSERT INTO [db.]table FORMAT format \n ...
*
* Insert the result of the SELECT query.
* #3 Insert the result of the SELECT query.
* INSERT INTO [db.]table (c1, c2, c3) SELECT ...
* INSERT INTO [db.]table SELECT ...
* #4 Insert of data in an arbitrary form from file(a bit variant of #2)
* INSERT INTO [db.]table (c1, c2, c3) FORMAT format INFILE 'url'
*/
class ParserInsertQuery : public IParserBase
{

View File

@ -0,0 +1,178 @@
#include <Storages/StorageFactory.h>
#include <Storages/StorageHDFS.h>
#include <Interpreters/Context.h>
#include <Interpreters/evaluateConstantExpression.h>
#include <Parsers/ASTIdentifier.h>
#include <Parsers/ASTLiteral.h>
#include <IO/ReadBufferFromHDFS.h>
#include <Formats/FormatFactory.h>
#include <DataStreams/IBlockOutputStream.h>
#include <DataStreams/UnionBlockInputStream.h>
#include <DataStreams/IProfilingBlockInputStream.h>
#include <DataStreams/OwningBlockInputStream.h>
#include <Poco/Path.h>
#include <TableFunctions/ITableFunction.h>
#include <Common/typeid_cast.h>
namespace DB
{
namespace ErrorCodes
{
extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
}
StorageHDFS::StorageHDFS(const String & uri_,
const std::string & table_name_,
const String & format_name_,
const ColumnsDescription & columns_,
Context & context_)
: IStorage(columns_), uri(uri_), format_name(format_name_), table_name(table_name_), context_global(context_)
{
}
namespace
{
class StorageHDFSBlockInputStream : public IProfilingBlockInputStream
{
public:
StorageHDFSBlockInputStream(const String & uri,
const String & format,
const String & name_,
const Block & sample_block,
const Context & context,
size_t max_block_size)
: name(name_)
{
// Assume no query and fragment in uri, todo, add sanity check
String fuzzyFileNames;
String uriPrefix = uri.substr(0, uri.find_last_of('/'));
if (uriPrefix.length() == uri.length())
{
fuzzyFileNames = uri;
uriPrefix.clear();
}
else
{
uriPrefix += "/";
fuzzyFileNames = uri.substr(uriPrefix.length());
}
std::vector<String> fuzzyNameList = parseDescription(fuzzyFileNames, 0, fuzzyFileNames.length(), ',' , 100/* hard coded max files */);
std::vector<std::vector<String> > fileNames;
for(auto fuzzyName : fuzzyNameList)
fileNames.push_back(parseDescription(fuzzyName, 0, fuzzyName.length(), '|', 100));
BlockInputStreams inputs;
for (auto & vecNames : fileNames)
{
for (auto & name: vecNames)
{
std::unique_ptr<ReadBuffer> read_buf = std::make_unique<ReadBufferFromHDFS>(uriPrefix + name);
inputs.emplace_back(
std::make_shared<OwningBlockInputStream<ReadBuffer>>(
FormatFactory::instance().getInput(format, *read_buf, sample_block, context, max_block_size),
std::move(read_buf)));
}
}
if (inputs.size() == 0)
throw Exception("StorageHDFS inputs interpreter error");
if (inputs.size() == 1)
{
reader = inputs[0];
}
else
{
reader = std::make_shared<UnionBlockInputStream<> >(inputs, nullptr, context.getSettingsRef().max_distributed_connections);
}
}
String getName() const override
{
return name;
}
Block readImpl() override
{
return reader->read();
}
Block getHeader() const override
{
return reader->getHeader();
}
void readPrefixImpl() override
{
reader->readPrefix();
}
void readSuffixImpl() override
{
auto explicitReader = dynamic_cast<UnionBlockInputStream<> *>(reader.get());
if (explicitReader) explicitReader->cancel(false); // skip Union read suffix assertion
reader->readSuffix();
}
private:
String name;
BlockInputStreamPtr reader;
};
}
BlockInputStreams StorageHDFS::read(
const Names & /*column_names*/,
const SelectQueryInfo & /*query_info*/,
const Context & context,
QueryProcessingStage::Enum /*processed_stage*/,
size_t max_block_size,
unsigned /*num_streams*/)
{
return {std::make_shared<StorageHDFSBlockInputStream>(
uri,
format_name,
getName(),
getSampleBlock(),
context,
max_block_size)};
}
void StorageHDFS::rename(const String & /*new_path_to_db*/, const String & /*new_database_name*/, const String & /*new_table_name*/) {}
BlockOutputStreamPtr StorageHDFS::write(const ASTPtr & /*query*/, const Settings & /*settings*/)
{
throw Exception("StorageHDFS write is not supported yet");
return {};
}
void registerStorageHDFS(StorageFactory & factory)
{
factory.registerStorage("HDFS", [](const StorageFactory::Arguments & args)
{
ASTs & engine_args = args.engine_args;
if (!(engine_args.size() == 1 || engine_args.size() == 2))
throw Exception(
"Storage HDFS requires exactly 2 arguments: url and name of used format.", ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH);
engine_args[0] = evaluateConstantExpressionOrIdentifierAsLiteral(engine_args[0], args.local_context);
String url = static_cast<const ASTLiteral &>(*engine_args[0]).value.safeGet<String>();
engine_args[1] = evaluateConstantExpressionOrIdentifierAsLiteral(engine_args[1], args.local_context);
String format_name = static_cast<const ASTLiteral &>(*engine_args[1]).value.safeGet<String>();
return StorageHDFS::create(url, args.table_name, format_name, args.columns, args.context);
});
}
}

View File

@ -0,0 +1,53 @@
#pragma once
#include <Storages/IStorage.h>
#include <Poco/URI.h>
#include <common/logger_useful.h>
#include <ext/shared_ptr_helper.h>
namespace DB
{
/**
* This class represents table engine for external hdfs files.
* Read method is supported for now.
*/
class StorageHDFS : public ext::shared_ptr_helper<StorageHDFS>, public IStorage
{
public:
String getName() const override
{
return "HDFS";
}
String getTableName() const override
{
return table_name;
}
BlockInputStreams read(const Names & column_names,
const SelectQueryInfo & query_info,
const Context & context,
QueryProcessingStage::Enum processed_stage,
size_t max_block_size,
unsigned num_streams) override;
BlockOutputStreamPtr write(const ASTPtr & query, const Settings & settings) override;
void rename(const String & new_path_to_db, const String & new_database_name, const String & new_table_name) override;
protected:
StorageHDFS(const String & uri_,
const String & table_name_,
const String & format_name_,
const ColumnsDescription & columns_,
Context & context_);
private:
String uri;
String format_name;
String table_name;
Context & context_global;
Logger * log = &Logger::get("StorageHDFS");
};
}

View File

@ -1,5 +1,7 @@
#include <TableFunctions/ITableFunction.h>
#include <Common/ProfileEvents.h>
#include <Common/Exception.h>
#include <IO/WriteHelpers.h>
namespace ProfileEvents
@ -16,4 +18,168 @@ StoragePtr ITableFunction::execute(const ASTPtr & ast_function, const Context &
return executeImpl(ast_function, context);
}
namespace ErrorCodes
{
extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
extern const int BAD_ARGUMENTS;
}
/// The Cartesian product of two sets of rows, the result is written in place of the first argument
static void append(std::vector<String> & to, const std::vector<String> & what, size_t max_addresses)
{
if (what.empty())
return;
if (to.empty())
{
to = what;
return;
}
if (what.size() * to.size() > max_addresses)
throw Exception("Table function 'remote': first argument generates too many result addresses",
ErrorCodes::BAD_ARGUMENTS);
std::vector<String> res;
for (size_t i = 0; i < to.size(); ++i)
for (size_t j = 0; j < what.size(); ++j)
res.push_back(to[i] + what[j]);
to.swap(res);
}
/// Parse number from substring
static bool parseNumber(const String & description, size_t l, size_t r, size_t & res)
{
res = 0;
for (size_t pos = l; pos < r; pos ++)
{
if (!isNumericASCII(description[pos]))
return false;
res = res * 10 + description[pos] - '0';
if (res > 1e15)
return false;
}
return true;
}
/* Parse a string that generates shards and replicas. Separator - one of two characters | or ,
* depending on whether shards or replicas are generated.
* For example:
* host1,host2,... - generates set of shards from host1, host2, ...
* host1|host2|... - generates set of replicas from host1, host2, ...
* abc{8..10}def - generates set of shards abc8def, abc9def, abc10def.
* abc{08..10}def - generates set of shards abc08def, abc09def, abc10def.
* abc{x,yy,z}def - generates set of shards abcxdef, abcyydef, abczdef.
* abc{x|yy|z} def - generates set of replicas abcxdef, abcyydef, abczdef.
* abc{1..9}de{f,g,h} - is a direct product, 27 shards.
* abc{1..9}de{0|1} - is a direct product, 9 shards, in each 2 replicas.
*/
std::vector<String> parseDescription(const String & description, size_t l, size_t r, char separator, size_t max_addresses)
{
std::vector<String> res;
std::vector<String> cur;
/// An empty substring means a set of an empty string
if (l >= r)
{
res.push_back("");
return res;
}
for (size_t i = l; i < r; ++i)
{
/// Either the numeric interval (8..10) or equivalent expression in brackets
if (description[i] == '{')
{
int cnt = 1;
int last_dot = -1; /// The rightmost pair of points, remember the index of the right of the two
size_t m;
std::vector<String> buffer;
bool have_splitter = false;
/// Look for the corresponding closing bracket
for (m = i + 1; m < r; ++m)
{
if (description[m] == '{') ++cnt;
if (description[m] == '}') --cnt;
if (description[m] == '.' && description[m-1] == '.') last_dot = m;
if (description[m] == separator) have_splitter = true;
if (cnt == 0) break;
}
if (cnt != 0)
throw Exception("Table function 'remote': incorrect brace sequence in first argument",
ErrorCodes::BAD_ARGUMENTS);
/// The presence of a dot - numeric interval
if (last_dot != -1)
{
size_t left, right;
if (description[last_dot - 1] != '.')
throw Exception("Table function 'remote': incorrect argument in braces (only one dot): " + description.substr(i, m - i + 1),
ErrorCodes::BAD_ARGUMENTS);
if (!parseNumber(description, i + 1, last_dot - 1, left))
throw Exception("Table function 'remote': incorrect argument in braces (Incorrect left number): "
+ description.substr(i, m - i + 1),
ErrorCodes::BAD_ARGUMENTS);
if (!parseNumber(description, last_dot + 1, m, right))
throw Exception("Table function 'remote': incorrect argument in braces (Incorrect right number): "
+ description.substr(i, m - i + 1),
ErrorCodes::BAD_ARGUMENTS);
if (left > right)
throw Exception("Table function 'remote': incorrect argument in braces (left number is greater then right): "
+ description.substr(i, m - i + 1),
ErrorCodes::BAD_ARGUMENTS);
if (right - left + 1 > max_addresses)
throw Exception("Table function 'remote': first argument generates too many result addresses",
ErrorCodes::BAD_ARGUMENTS);
bool add_leading_zeroes = false;
size_t len = last_dot - 1 - (i + 1);
/// If the left and right borders have equal numbers, then you must add leading zeros.
if (last_dot - 1 - (i + 1) == m - (last_dot + 1))
add_leading_zeroes = true;
for (size_t id = left; id <= right; ++id)
{
String cur = toString<UInt64>(id);
if (add_leading_zeroes)
{
while (cur.size() < len)
cur = "0" + cur;
}
buffer.push_back(cur);
}
}
else if (have_splitter) /// If there is a current delimiter inside, then generate a set of resulting rows
buffer = parseDescription(description, i + 1, m, separator, max_addresses);
else /// Otherwise just copy, spawn will occur when you call with the correct delimiter
buffer.push_back(description.substr(i, m - i + 1));
/// Add all possible received extensions to the current set of lines
append(cur, buffer, max_addresses);
i = m;
}
else if (description[i] == separator)
{
/// If the delimiter, then add found rows
res.insert(res.end(), cur.begin(), cur.end());
cur.clear();
}
else
{
/// Otherwise, simply append the character to current lines
std::vector<String> buffer;
buffer.push_back(description.substr(i, 1));
append(cur, buffer, max_addresses);
}
}
res.insert(res.end(), cur.begin(), cur.end());
if (res.size() > max_addresses)
throw Exception("Table function 'remote': first argument generates too many result addresses",
ErrorCodes::BAD_ARGUMENTS);
return res;
}
}

View File

@ -2,6 +2,8 @@
#include <string>
#include <memory>
#include <vector>
#include <Core/Types.h>
namespace DB
@ -42,5 +44,6 @@ private:
using TableFunctionPtr = std::shared_ptr<ITableFunction>;
std::vector<String> parseDescription(const String & description, size_t l, size_t r, char separator, size_t max_addresses);
}

View File

@ -0,0 +1,21 @@
#include <Storages/StorageHDFS.h>
#include <TableFunctions/TableFunctionFactory.h>
#include <TableFunctions/TableFunctionHDFS.h>
namespace DB
{
StoragePtr TableFunctionHDFS::getStorage(
const String & source, const String & format, const Block & sample_block, Context & global_context) const
{
return StorageHDFS::create(source,
getName(),
format,
ColumnsDescription{sample_block.getNamesAndTypesList()},
global_context);
}
void registerTableFunctionHDFS(TableFunctionFactory & factory)
{
factory.registerFunction<TableFunctionHDFS>();
}
}

View File

@ -0,0 +1,26 @@
#pragma once
#include <TableFunctions/ITableFunctionFileLike.h>
#include <Interpreters/Context.h>
#include <Core/Block.h>
namespace DB
{
/* hdfs(name_node_ip:name_node_port, format, structure) - creates a temporary storage from hdfs file
*
*/
class TableFunctionHDFS : public ITableFunctionFileLike
{
public:
static constexpr auto name = "hdfs";
std::string getName() const override
{
return name;
}
private:
StoragePtr getStorage(
const String & source, const String & format, const Block & sample_block, Context & global_context) const override;
};
}

View File

@ -22,165 +22,6 @@ namespace ErrorCodes
extern const int BAD_ARGUMENTS;
}
/// The Cartesian product of two sets of rows, the result is written in place of the first argument
static void append(std::vector<String> & to, const std::vector<String> & what, size_t max_addresses)
{
if (what.empty())
return;
if (to.empty())
{
to = what;
return;
}
if (what.size() * to.size() > max_addresses)
throw Exception("Table function 'remote': first argument generates too many result addresses",
ErrorCodes::BAD_ARGUMENTS);
std::vector<String> res;
for (size_t i = 0; i < to.size(); ++i)
for (size_t j = 0; j < what.size(); ++j)
res.push_back(to[i] + what[j]);
to.swap(res);
}
/// Parse number from substring
static bool parseNumber(const String & description, size_t l, size_t r, size_t & res)
{
res = 0;
for (size_t pos = l; pos < r; pos ++)
{
if (!isNumericASCII(description[pos]))
return false;
res = res * 10 + description[pos] - '0';
if (res > 1e15)
return false;
}
return true;
}
/* Parse a string that generates shards and replicas. Separator - one of two characters | or ,
* depending on whether shards or replicas are generated.
* For example:
* host1,host2,... - generates set of shards from host1, host2, ...
* host1|host2|... - generates set of replicas from host1, host2, ...
* abc{8..10}def - generates set of shards abc8def, abc9def, abc10def.
* abc{08..10}def - generates set of shards abc08def, abc09def, abc10def.
* abc{x,yy,z}def - generates set of shards abcxdef, abcyydef, abczdef.
* abc{x|yy|z} def - generates set of replicas abcxdef, abcyydef, abczdef.
* abc{1..9}de{f,g,h} - is a direct product, 27 shards.
* abc{1..9}de{0|1} - is a direct product, 9 shards, in each 2 replicas.
*/
static std::vector<String> parseDescription(const String & description, size_t l, size_t r, char separator, size_t max_addresses)
{
std::vector<String> res;
std::vector<String> cur;
/// An empty substring means a set of an empty string
if (l >= r)
{
res.push_back("");
return res;
}
for (size_t i = l; i < r; ++i)
{
/// Either the numeric interval (8..10) or equivalent expression in brackets
if (description[i] == '{')
{
int cnt = 1;
int last_dot = -1; /// The rightmost pair of points, remember the index of the right of the two
size_t m;
std::vector<String> buffer;
bool have_splitter = false;
/// Look for the corresponding closing bracket
for (m = i + 1; m < r; ++m)
{
if (description[m] == '{') ++cnt;
if (description[m] == '}') --cnt;
if (description[m] == '.' && description[m-1] == '.') last_dot = m;
if (description[m] == separator) have_splitter = true;
if (cnt == 0) break;
}
if (cnt != 0)
throw Exception("Table function 'remote': incorrect brace sequence in first argument",
ErrorCodes::BAD_ARGUMENTS);
/// The presence of a dot - numeric interval
if (last_dot != -1)
{
size_t left, right;
if (description[last_dot - 1] != '.')
throw Exception("Table function 'remote': incorrect argument in braces (only one dot): " + description.substr(i, m - i + 1),
ErrorCodes::BAD_ARGUMENTS);
if (!parseNumber(description, i + 1, last_dot - 1, left))
throw Exception("Table function 'remote': incorrect argument in braces (Incorrect left number): "
+ description.substr(i, m - i + 1),
ErrorCodes::BAD_ARGUMENTS);
if (!parseNumber(description, last_dot + 1, m, right))
throw Exception("Table function 'remote': incorrect argument in braces (Incorrect right number): "
+ description.substr(i, m - i + 1),
ErrorCodes::BAD_ARGUMENTS);
if (left > right)
throw Exception("Table function 'remote': incorrect argument in braces (left number is greater then right): "
+ description.substr(i, m - i + 1),
ErrorCodes::BAD_ARGUMENTS);
if (right - left + 1 > max_addresses)
throw Exception("Table function 'remote': first argument generates too many result addresses",
ErrorCodes::BAD_ARGUMENTS);
bool add_leading_zeroes = false;
size_t len = last_dot - 1 - (i + 1);
/// If the left and right borders have equal numbers, then you must add leading zeros.
if (last_dot - 1 - (i + 1) == m - (last_dot + 1))
add_leading_zeroes = true;
for (size_t id = left; id <= right; ++id)
{
String cur = toString<UInt64>(id);
if (add_leading_zeroes)
{
while (cur.size() < len)
cur = "0" + cur;
}
buffer.push_back(cur);
}
}
else if (have_splitter) /// If there is a current delimiter inside, then generate a set of resulting rows
buffer = parseDescription(description, i + 1, m, separator, max_addresses);
else /// Otherwise just copy, spawn will occur when you call with the correct delimiter
buffer.push_back(description.substr(i, m - i + 1));
/// Add all possible received extensions to the current set of lines
append(cur, buffer, max_addresses);
i = m;
}
else if (description[i] == separator)
{
/// If the delimiter, then add found rows
res.insert(res.end(), cur.begin(), cur.end());
cur.clear();
}
else
{
/// Otherwise, simply append the character to current lines
std::vector<String> buffer;
buffer.push_back(description.substr(i, 1));
append(cur, buffer, max_addresses);
}
}
res.insert(res.end(), cur.begin(), cur.end());
if (res.size() > max_addresses)
throw Exception("Table function 'remote': first argument generates too many result addresses",
ErrorCodes::BAD_ARGUMENTS);
return res;
}
StoragePtr TableFunctionRemote::executeImpl(const ASTPtr & ast_function, const Context & context) const
{
ASTs & args_func = typeid_cast<ASTFunction &>(*ast_function).children;

View File

@ -13,6 +13,7 @@ void registerTableFunctionNumbers(TableFunctionFactory & factory);
void registerTableFunctionCatBoostPool(TableFunctionFactory & factory);
void registerTableFunctionFile(TableFunctionFactory & factory);
void registerTableFunctionURL(TableFunctionFactory & factory);
void registerTableFunctionHDFS(TableFunctionFactory & factory);
#if USE_POCO_SQLODBC || USE_POCO_DATAODBC
void registerTableFunctionODBC(TableFunctionFactory & factory);
@ -36,6 +37,7 @@ void registerTableFunctions()
registerTableFunctionCatBoostPool(factory);
registerTableFunctionFile(factory);
registerTableFunctionURL(factory);
registerTableFunctionHDFS(factory);
#if USE_POCO_SQLODBC || USE_POCO_DATAODBC
registerTableFunctionODBC(factory);

View File

@ -64,7 +64,7 @@ do
shift
elif [[ $1 == '--fast' ]]; then
# Wrong but fast pbuilder mode: create base package with all depends
EXTRAPACKAGES="$EXTRAPACKAGES debhelper cmake ninja-build gcc-7 g++-7 libc6-dev libicu-dev libreadline-dev psmisc bash expect python python-lxml python-termcolor python-requests curl perl sudo openssl netcat-openbsd"
EXTRAPACKAGES="$EXTRAPACKAGES debhelper cmake ninja-build gcc-7 g++-7 libc6-dev libicu-dev libreadline-dev psmisc bash expect python python-lxml python-termcolor python-requests curl perl sudo openssl netcat-openbsd uuid xml2 krb5 gsasl"
shift
else
echo "Unknown option $1"