Add integration test, put dependent libs to contrib, slightly refine

code.
This commit is contained in:
alesapin 2018-12-05 16:24:45 +03:00
parent 8256c19b29
commit 80b49e4c0a
39 changed files with 1300 additions and 400 deletions

6
.gitmodules vendored
View File

@ -55,3 +55,9 @@
[submodule "contrib/libhdfs3"]
path = contrib/libhdfs3
url = https://github.com/ClickHouse-Extras/libhdfs3.git
[submodule "contrib/libxml2"]
path = contrib/libxml2
url = https://github.com/GNOME/libxml2.git
[submodule "contrib/libgsasl"]
path = contrib/libgsasl
url = git@github.com:ClickHouse-Extras/libgsasl.git

View File

@ -89,7 +89,6 @@ endif ()
option (TEST_COVERAGE "Enables flags for test coverage" OFF)
option (ENABLE_TESTS "Enables tests" ON)
option (ENABLE_INSERT_INFILE "Enables INSERT INFILE syntax" OFF)
if (CMAKE_SYSTEM_PROCESSOR MATCHES "amd64|x86_64")
option (USE_INTERNAL_MEMCPY "Use internal implementation of 'memcpy' function instead of provided by libc. Only for x86_64." ON)
@ -232,10 +231,6 @@ if (ENABLE_TESTS)
enable_testing()
endif ()
if (ENABLE_INSERT_INFILE)
message (STATUS "INSERT INFILE SYNTAX support")
endif ()
# when installing to /usr - place configs to /etc but for /usr/local place to /usr/local/etc
if (CMAKE_INSTALL_PREFIX STREQUAL "/usr")
set (CLICKHOUSE_ETC_DIR "/etc")
@ -279,6 +274,8 @@ include (cmake/find_rdkafka.cmake)
include (cmake/find_capnp.cmake)
include (cmake/find_llvm.cmake)
include (cmake/find_cpuid.cmake)
include (cmake/find_libgsasl.cmake)
include (cmake/find_libxml2.cmake)
include (cmake/find_hdfs3.cmake)
include (cmake/find_consistent-hashing.cmake)
include (cmake/find_base64.cmake)

View File

@ -1,3 +1,8 @@
if (NOT ARCH_ARM)
option (ENABLE_HDFS "Enable HDFS" ${NOT_UNBUNDLED})
endif ()
if (ENABLE_HDFS)
option (USE_INTERNAL_HDFS3_LIBRARY "Set to FALSE to use system HDFS3 instead of bundled" ON)
if (NOT USE_INTERNAL_HDFS3_LIBRARY)
@ -9,5 +14,8 @@ else ()
set(HDFS3_INCLUDE_DIR "${ClickHouse_SOURCE_DIR}/contrib/libhdfs3/include")
set(HDFS3_LIBRARY hdfs3)
endif()
set (USE_HDFS 1)
endif()
message (STATUS "Using hdfs3: ${HDFS3_INCLUDE_DIR} : ${HDFS3_LIBRARY}")

20
cmake/find_libxml2.cmake Normal file
View File

@ -0,0 +1,20 @@
option (USE_INTERNAL_LIBXML2_LIBRARY "Set to FALSE to use system libxml2 library instead of bundled" ${NOT_UNBUNDLED})
if (USE_INTERNAL_LIBXML2_LIBRARY AND NOT EXISTS "${ClickHouse_SOURCE_DIR}/contrib/libxml2/libxml.h")
message (WARNING "submodule contrib/libxml2 is missing. to fix try run: \n git submodule update --init --recursive")
set (USE_INTERNAL_LIBXML2_LIBRARY 0)
endif ()
if (NOT USE_INTERNAL_LIBXML2_LIBRARY)
find_library (LIBXML2_LIBRARY libxml2)
find_path (LIBXML2_INCLUDE_DIR NAMES libxml.h PATHS ${LIBXML2_INCLUDE_PATHS})
endif ()
if (LIBXML2_LIBRARY AND LIBXML2_INCLUDE_DIR)
else ()
set (LIBXML2_INCLUDE_DIR ${ClickHouse_SOURCE_DIR}/contrib/libxml2/include ${ClickHouse_SOURCE_DIR}/contrib/libxml2-cmake/linux_x86_64/include)
set (USE_INTERNAL_LIBXML2_LIBRARY 1)
set (LIBXML2_LIBRARY libxml2)
endif ()
message (STATUS "Using libxml2: ${LIBXML2_INCLUDE_DIR} : ${LIBXML2_LIBRARY}")

View File

@ -191,6 +191,14 @@ if (USE_INTERNAL_LLVM_LIBRARY)
add_subdirectory (llvm/llvm)
endif ()
if (USE_INTERNAL_LIBGSASL_LIBRARY)
add_subdirectory(libgsasl)
endif()
if (USE_INTERNAL_LIBXML2_LIBRARY)
add_subdirectory(libxml2-cmake)
endif ()
if (USE_INTERNAL_HDFS3_LIBRARY)
include(${CMAKE_SOURCE_DIR}/cmake/find_protobuf.cmake)
if (USE_INTERNAL_PROTOBUF_LIBRARY)

1
contrib/libgsasl vendored Submodule

@ -0,0 +1 @@
Subproject commit 3b8948a4042e34fb00b4fb987535dc9e02e39040

2
contrib/libhdfs3 vendored

@ -1 +1 @@
Subproject commit 4e9940eb82a8c02b23b4985ce5242778c8d5bc11
Subproject commit bd6505cbb0c130b0db695305b9a38546fa880e5a

View File

@ -16,7 +16,7 @@ IF(CMAKE_COMPILER_IS_GNUCXX)
STRING(REGEX MATCHALL "[0-9]+" GCC_COMPILER_VERSION ${GCC_COMPILER_VERSION})
LIST(GET GCC_COMPILER_VERSION 0 GCC_COMPILER_VERSION_MAJOR)
LIST(GET GCC_COMPILER_VERSION 1 GCC_COMPILER_VERSION_MINOR)
LIST(GET GCC_COMPILER_VERSION 0 GCC_COMPILER_VERSION_MINOR)
SET(GCC_COMPILER_VERSION_MAJOR ${GCC_COMPILER_VERSION_MAJOR} CACHE INTERNAL "gcc major version")
SET(GCC_COMPILER_VERSION_MINOR ${GCC_COMPILER_VERSION_MINOR} CACHE INTERNAL "gcc minor version")

View File

@ -7,6 +7,7 @@ if (NOT USE_INTERNAL_PROTOBUF_LIBRARY)
endif ()
endif()
SET(WITH_KERBEROS false)
# project and source dir
set(HDFS3_ROOT_DIR ${CMAKE_SOURCE_DIR}/contrib/libhdfs3)
set(HDFS3_SOURCE_DIR ${HDFS3_ROOT_DIR}/src)
@ -18,11 +19,9 @@ include(Platform)
include(Options)
# prefer shared libraries
set(CMAKE_FIND_LIBRARY_SUFFIXES ${CMAKE_SHARED_LIBRARY_SUFFIX})
#find_package(CURL REQUIRED)
find_package(GSasl REQUIRED)
find_package(KERBEROS REQUIRED)
find_package(LibXml2 REQUIRED)
if (WITH_KERBEROS)
find_package(KERBEROS REQUIRED)
endif()
# source
set(PROTO_FILES
@ -192,17 +191,17 @@ target_include_directories(hdfs3 PRIVATE ${HDFS3_SOURCE_DIR})
target_include_directories(hdfs3 PRIVATE ${HDFS3_COMMON_DIR})
target_include_directories(hdfs3 PRIVATE ${CMAKE_CURRENT_BINARY_DIR})
#target_include_directories(hdfs3 PRIVATE ${CURL_INCLUDE_DIR})
target_include_directories(hdfs3 PRIVATE ${GSASL_INCLUDE_DIR})
target_include_directories(hdfs3 PRIVATE ${KERBEROS_INCLUDE_DIRS})
target_include_directories(hdfs3 PRIVATE ${LIBGSASL_INCLUDE_DIR})
if (WITH_KERBEROS)
target_include_directories(hdfs3 PRIVATE ${KERBEROS_INCLUDE_DIRS})
endif()
target_include_directories(hdfs3 PRIVATE ${LIBXML2_INCLUDE_DIR})
#target_include_directories(hdfs3 PRIVATE ${LIBUUID_INCLUDE_DIRS})
#target_link_libraries(hdfs3 ${CURL_LIBRARIES})
target_link_libraries(hdfs3 ${GSASL_LIBRARIES})
target_link_libraries(hdfs3 ${KERBEROS_LIBRARIES})
target_link_libraries(hdfs3 ${LIBXML2_LIBRARIES})
#target_link_libraries(hdfs3 ${LIBUUID_LIBRARIES})
target_link_libraries(hdfs3 ${LIBGSASL_LIBRARY})
if (WITH_KERBEROS)
target_link_libraries(hdfs3 ${KERBEROS_LIBRARIES})
endif()
target_link_libraries(hdfs3 ${LIBXML2_LIBRARY})
# inherit from parent cmake
target_include_directories(hdfs3 PRIVATE ${Boost_INCLUDE_DIRS})

1
contrib/libxml2 vendored Submodule

@ -0,0 +1 @@
Subproject commit 18890f471c420411aa3c989e104d090966ec9dbf

View File

@ -0,0 +1,63 @@
set(LIBXML2_SOURCE_DIR ${CMAKE_SOURCE_DIR}/contrib/libxml2)
set(LIBXML2_BINARY_DIR ${CMAKE_BINARY_DIR}/contrib/libxml2)
set(SRCS
${LIBXML2_SOURCE_DIR}/parser.c
${LIBXML2_SOURCE_DIR}/HTMLparser.c
${LIBXML2_SOURCE_DIR}/buf.c
${LIBXML2_SOURCE_DIR}/xzlib.c
${LIBXML2_SOURCE_DIR}/xmlregexp.c
${LIBXML2_SOURCE_DIR}/entities.c
${LIBXML2_SOURCE_DIR}/rngparser.c
${LIBXML2_SOURCE_DIR}/encoding.c
${LIBXML2_SOURCE_DIR}/legacy.c
${LIBXML2_SOURCE_DIR}/error.c
${LIBXML2_SOURCE_DIR}/debugXML.c
${LIBXML2_SOURCE_DIR}/xpointer.c
${LIBXML2_SOURCE_DIR}/DOCBparser.c
${LIBXML2_SOURCE_DIR}/xmlcatalog.c
${LIBXML2_SOURCE_DIR}/c14n.c
${LIBXML2_SOURCE_DIR}/xmlreader.c
${LIBXML2_SOURCE_DIR}/xmlstring.c
${LIBXML2_SOURCE_DIR}/dict.c
${LIBXML2_SOURCE_DIR}/xpath.c
${LIBXML2_SOURCE_DIR}/tree.c
${LIBXML2_SOURCE_DIR}/trionan.c
${LIBXML2_SOURCE_DIR}/pattern.c
${LIBXML2_SOURCE_DIR}/globals.c
${LIBXML2_SOURCE_DIR}/xmllint.c
${LIBXML2_SOURCE_DIR}/chvalid.c
${LIBXML2_SOURCE_DIR}/relaxng.c
${LIBXML2_SOURCE_DIR}/list.c
${LIBXML2_SOURCE_DIR}/xinclude.c
${LIBXML2_SOURCE_DIR}/xmlIO.c
${LIBXML2_SOURCE_DIR}/triostr.c
${LIBXML2_SOURCE_DIR}/hash.c
${LIBXML2_SOURCE_DIR}/xmlsave.c
${LIBXML2_SOURCE_DIR}/HTMLtree.c
${LIBXML2_SOURCE_DIR}/SAX.c
${LIBXML2_SOURCE_DIR}/xmlschemas.c
${LIBXML2_SOURCE_DIR}/SAX2.c
${LIBXML2_SOURCE_DIR}/threads.c
${LIBXML2_SOURCE_DIR}/runsuite.c
${LIBXML2_SOURCE_DIR}/catalog.c
${LIBXML2_SOURCE_DIR}/uri.c
${LIBXML2_SOURCE_DIR}/xmlmodule.c
${LIBXML2_SOURCE_DIR}/xlink.c
${LIBXML2_SOURCE_DIR}/parserInternals.c
${LIBXML2_SOURCE_DIR}/xmlwriter.c
${LIBXML2_SOURCE_DIR}/xmlunicode.c
${LIBXML2_SOURCE_DIR}/runxmlconf.c
${LIBXML2_SOURCE_DIR}/xmlmemory.c
${LIBXML2_SOURCE_DIR}/nanoftp.c
${LIBXML2_SOURCE_DIR}/xmlschemastypes.c
${LIBXML2_SOURCE_DIR}/valid.c
${LIBXML2_SOURCE_DIR}/nanohttp.c
${LIBXML2_SOURCE_DIR}/schematron.c
)
add_library(libxml2 STATIC ${SRCS})
target_include_directories(libxml2 PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}/linux_x86_64/include)
target_include_directories(libxml2 PUBLIC ${LIBXML2_SOURCE_DIR}/include)

View File

@ -0,0 +1,285 @@
/* config.h. Generated from config.h.in by configure. */
/* config.h.in. Generated from configure.ac by autoheader. */
/* Type cast for the gethostbyname() argument */
#define GETHOSTBYNAME_ARG_CAST /**/
/* Define to 1 if you have the <arpa/inet.h> header file. */
#define HAVE_ARPA_INET_H 1
/* Define to 1 if you have the <arpa/nameser.h> header file. */
#define HAVE_ARPA_NAMESER_H 1
/* Whether struct sockaddr::__ss_family exists */
/* #undef HAVE_BROKEN_SS_FAMILY */
/* Define to 1 if you have the <ctype.h> header file. */
#define HAVE_CTYPE_H 1
/* Define to 1 if you have the <dirent.h> header file. */
#define HAVE_DIRENT_H 1
/* Define to 1 if you have the <dlfcn.h> header file. */
#define HAVE_DLFCN_H 1
/* Have dlopen based dso */
#define HAVE_DLOPEN /**/
/* Define to 1 if you have the <dl.h> header file. */
/* #undef HAVE_DL_H */
/* Define to 1 if you have the <errno.h> header file. */
#define HAVE_ERRNO_H 1
/* Define to 1 if you have the <fcntl.h> header file. */
#define HAVE_FCNTL_H 1
/* Define to 1 if you have the <float.h> header file. */
#define HAVE_FLOAT_H 1
/* Define to 1 if you have the `fprintf' function. */
#define HAVE_FPRINTF 1
/* Define to 1 if you have the `ftime' function. */
#define HAVE_FTIME 1
/* Define if getaddrinfo is there */
#define HAVE_GETADDRINFO /**/
/* Define to 1 if you have the `gettimeofday' function. */
#define HAVE_GETTIMEOFDAY 1
/* Define to 1 if you have the <inttypes.h> header file. */
#define HAVE_INTTYPES_H 1
/* Define to 1 if you have the `isascii' function. */
#define HAVE_ISASCII 1
/* Define if isinf is there */
#define HAVE_ISINF /**/
/* Define if isnan is there */
#define HAVE_ISNAN /**/
/* Define if history library is there (-lhistory) */
/* #undef HAVE_LIBHISTORY */
/* Define if pthread library is there (-lpthread) */
#define HAVE_LIBPTHREAD /**/
/* Define if readline library is there (-lreadline) */
/* #undef HAVE_LIBREADLINE */
/* Define to 1 if you have the <limits.h> header file. */
#define HAVE_LIMITS_H 1
/* Define to 1 if you have the `localtime' function. */
#define HAVE_LOCALTIME 1
/* Define to 1 if you have the <lzma.h> header file. */
/* #undef HAVE_LZMA_H */
/* Define to 1 if you have the <malloc.h> header file. */
#define HAVE_MALLOC_H 1
/* Define to 1 if you have the <math.h> header file. */
#define HAVE_MATH_H 1
/* Define to 1 if you have the <memory.h> header file. */
#define HAVE_MEMORY_H 1
/* Define to 1 if you have the `mmap' function. */
#define HAVE_MMAP 1
/* Define to 1 if you have the `munmap' function. */
#define HAVE_MUNMAP 1
/* mmap() is no good without munmap() */
#if defined(HAVE_MMAP) && !defined(HAVE_MUNMAP)
# undef /**/ HAVE_MMAP
#endif
/* Define to 1 if you have the <ndir.h> header file, and it defines `DIR'. */
/* #undef HAVE_NDIR_H */
/* Define to 1 if you have the <netdb.h> header file. */
#define HAVE_NETDB_H 1
/* Define to 1 if you have the <netinet/in.h> header file. */
#define HAVE_NETINET_IN_H 1
/* Define to 1 if you have the <poll.h> header file. */
#define HAVE_POLL_H 1
/* Define to 1 if you have the `printf' function. */
#define HAVE_PRINTF 1
/* Define if <pthread.h> is there */
#define HAVE_PTHREAD_H /**/
/* Define to 1 if you have the `putenv' function. */
#define HAVE_PUTENV 1
/* Define to 1 if you have the `rand' function. */
#define HAVE_RAND 1
/* Define to 1 if you have the `rand_r' function. */
#define HAVE_RAND_R 1
/* Define to 1 if you have the <resolv.h> header file. */
#define HAVE_RESOLV_H 1
/* Have shl_load based dso */
/* #undef HAVE_SHLLOAD */
/* Define to 1 if you have the `signal' function. */
#define HAVE_SIGNAL 1
/* Define to 1 if you have the <signal.h> header file. */
#define HAVE_SIGNAL_H 1
/* Define to 1 if you have the `snprintf' function. */
#define HAVE_SNPRINTF 1
/* Define to 1 if you have the `sprintf' function. */
#define HAVE_SPRINTF 1
/* Define to 1 if you have the `srand' function. */
#define HAVE_SRAND 1
/* Define to 1 if you have the `sscanf' function. */
#define HAVE_SSCANF 1
/* Define to 1 if you have the `stat' function. */
#define HAVE_STAT 1
/* Define to 1 if you have the <stdarg.h> header file. */
#define HAVE_STDARG_H 1
/* Define to 1 if you have the <stdint.h> header file. */
#define HAVE_STDINT_H 1
/* Define to 1 if you have the <stdlib.h> header file. */
#define HAVE_STDLIB_H 1
/* Define to 1 if you have the `strftime' function. */
#define HAVE_STRFTIME 1
/* Define to 1 if you have the <strings.h> header file. */
#define HAVE_STRINGS_H 1
/* Define to 1 if you have the <string.h> header file. */
#define HAVE_STRING_H 1
/* Define to 1 if you have the <sys/dir.h> header file, and it defines `DIR'.
*/
/* #undef HAVE_SYS_DIR_H */
/* Define to 1 if you have the <sys/mman.h> header file. */
#define HAVE_SYS_MMAN_H 1
/* Define to 1 if you have the <sys/ndir.h> header file, and it defines `DIR'.
*/
/* #undef HAVE_SYS_NDIR_H */
/* Define to 1 if you have the <sys/select.h> header file. */
#define HAVE_SYS_SELECT_H 1
/* Define to 1 if you have the <sys/socket.h> header file. */
#define HAVE_SYS_SOCKET_H 1
/* Define to 1 if you have the <sys/stat.h> header file. */
#define HAVE_SYS_STAT_H 1
/* Define to 1 if you have the <sys/timeb.h> header file. */
#define HAVE_SYS_TIMEB_H 1
/* Define to 1 if you have the <sys/time.h> header file. */
#define HAVE_SYS_TIME_H 1
/* Define to 1 if you have the <sys/types.h> header file. */
#define HAVE_SYS_TYPES_H 1
/* Define to 1 if you have the `time' function. */
#define HAVE_TIME 1
/* Define to 1 if you have the <time.h> header file. */
#define HAVE_TIME_H 1
/* Define to 1 if you have the <unistd.h> header file. */
#define HAVE_UNISTD_H 1
/* Whether va_copy() is available */
#define HAVE_VA_COPY 1
/* Define to 1 if you have the `vfprintf' function. */
#define HAVE_VFPRINTF 1
/* Define to 1 if you have the `vsnprintf' function. */
#define HAVE_VSNPRINTF 1
/* Define to 1 if you have the `vsprintf' function. */
#define HAVE_VSPRINTF 1
/* Define to 1 if you have the <zlib.h> header file. */
/* #undef HAVE_ZLIB_H */
/* Whether __va_copy() is available */
/* #undef HAVE___VA_COPY */
/* Define as const if the declaration of iconv() needs const. */
#define ICONV_CONST
/* Define to the sub-directory where libtool stores uninstalled libraries. */
#define LT_OBJDIR ".libs/"
/* Name of package */
#define PACKAGE "libxml2"
/* Define to the address where bug reports for this package should be sent. */
#define PACKAGE_BUGREPORT ""
/* Define to the full name of this package. */
#define PACKAGE_NAME ""
/* Define to the full name and version of this package. */
#define PACKAGE_STRING ""
/* Define to the one symbol short name of this package. */
#define PACKAGE_TARNAME ""
/* Define to the home page for this package. */
#define PACKAGE_URL ""
/* Define to the version of this package. */
#define PACKAGE_VERSION ""
/* Type cast for the send() function 2nd arg */
#define SEND_ARG2_CAST /**/
/* Define to 1 if you have the ANSI C header files. */
#define STDC_HEADERS 1
/* Support for IPv6 */
#define SUPPORT_IP6 /**/
/* Define if va_list is an array type */
#define VA_LIST_IS_ARRAY 1
/* Version number of package */
#define VERSION "2.9.8"
/* Determine what socket length (socklen_t) data type is */
#define XML_SOCKLEN_T socklen_t
/* Define for Solaris 2.5.1 so the uint32_t typedef from <sys/synch.h>,
<pthread.h>, or <semaphore.h> is not used. If the typedef were allowed, the
#define below would cause a syntax error. */
/* #undef _UINT32_T */
/* ss_family is not defined here, use __ss_family instead */
/* #undef ss_family */
/* Define to the type of an unsigned integer type of width exactly 32 bits if
such a type exists and the standard includes do not define it. */
/* #undef uint32_t */

View File

@ -0,0 +1,481 @@
/*
* Summary: compile-time version informations
* Description: compile-time version informations for the XML library
*
* Copy: See Copyright for the status of this software.
*
* Author: Daniel Veillard
*/
#ifndef __XML_VERSION_H__
#define __XML_VERSION_H__
#include <libxml/xmlexports.h>
#ifdef __cplusplus
extern "C" {
#endif
/*
* use those to be sure nothing nasty will happen if
* your library and includes mismatch
*/
#ifndef LIBXML2_COMPILING_MSCCDEF
XMLPUBFUN void XMLCALL xmlCheckVersion(int version);
#endif /* LIBXML2_COMPILING_MSCCDEF */
/**
* LIBXML_DOTTED_VERSION:
*
* the version string like "1.2.3"
*/
#define LIBXML_DOTTED_VERSION "2.9.8"
/**
* LIBXML_VERSION:
*
* the version number: 1.2.3 value is 10203
*/
#define LIBXML_VERSION 20908
/**
* LIBXML_VERSION_STRING:
*
* the version number string, 1.2.3 value is "10203"
*/
#define LIBXML_VERSION_STRING "20908"
/**
* LIBXML_VERSION_EXTRA:
*
* extra version information, used to show a CVS compilation
*/
#define LIBXML_VERSION_EXTRA "-GITv2.9.9-rc2-1-g6fc04d71"
/**
* LIBXML_TEST_VERSION:
*
* Macro to check that the libxml version in use is compatible with
* the version the software has been compiled against
*/
#define LIBXML_TEST_VERSION xmlCheckVersion(20908);
#ifndef VMS
#if 0
/**
* WITH_TRIO:
*
* defined if the trio support need to be configured in
*/
#define WITH_TRIO
#else
/**
* WITHOUT_TRIO:
*
* defined if the trio support should not be configured in
*/
#define WITHOUT_TRIO
#endif
#else /* VMS */
/**
* WITH_TRIO:
*
* defined if the trio support need to be configured in
*/
#define WITH_TRIO 1
#endif /* VMS */
/**
* LIBXML_THREAD_ENABLED:
*
* Whether the thread support is configured in
*/
#define LIBXML_THREAD_ENABLED 1
/**
* LIBXML_THREAD_ALLOC_ENABLED:
*
* Whether the allocation hooks are per-thread
*/
#if 0
#define LIBXML_THREAD_ALLOC_ENABLED
#endif
/**
* LIBXML_TREE_ENABLED:
*
* Whether the DOM like tree manipulation API support is configured in
*/
#if 1
#define LIBXML_TREE_ENABLED
#endif
/**
* LIBXML_OUTPUT_ENABLED:
*
* Whether the serialization/saving support is configured in
*/
#if 1
#define LIBXML_OUTPUT_ENABLED
#endif
/**
* LIBXML_PUSH_ENABLED:
*
* Whether the push parsing interfaces are configured in
*/
#if 1
#define LIBXML_PUSH_ENABLED
#endif
/**
* LIBXML_READER_ENABLED:
*
* Whether the xmlReader parsing interface is configured in
*/
#if 1
#define LIBXML_READER_ENABLED
#endif
/**
* LIBXML_PATTERN_ENABLED:
*
* Whether the xmlPattern node selection interface is configured in
*/
#if 1
#define LIBXML_PATTERN_ENABLED
#endif
/**
* LIBXML_WRITER_ENABLED:
*
* Whether the xmlWriter saving interface is configured in
*/
#if 1
#define LIBXML_WRITER_ENABLED
#endif
/**
* LIBXML_SAX1_ENABLED:
*
* Whether the older SAX1 interface is configured in
*/
#if 1
#define LIBXML_SAX1_ENABLED
#endif
/**
* LIBXML_FTP_ENABLED:
*
* Whether the FTP support is configured in
*/
#if 1
#define LIBXML_FTP_ENABLED
#endif
/**
* LIBXML_HTTP_ENABLED:
*
* Whether the HTTP support is configured in
*/
#if 1
#define LIBXML_HTTP_ENABLED
#endif
/**
* LIBXML_VALID_ENABLED:
*
* Whether the DTD validation support is configured in
*/
#if 1
#define LIBXML_VALID_ENABLED
#endif
/**
* LIBXML_HTML_ENABLED:
*
* Whether the HTML support is configured in
*/
#if 1
#define LIBXML_HTML_ENABLED
#endif
/**
* LIBXML_LEGACY_ENABLED:
*
* Whether the deprecated APIs are compiled in for compatibility
*/
#if 1
#define LIBXML_LEGACY_ENABLED
#endif
/**
* LIBXML_C14N_ENABLED:
*
* Whether the Canonicalization support is configured in
*/
#if 1
#define LIBXML_C14N_ENABLED
#endif
/**
* LIBXML_CATALOG_ENABLED:
*
* Whether the Catalog support is configured in
*/
#if 1
#define LIBXML_CATALOG_ENABLED
#endif
/**
* LIBXML_DOCB_ENABLED:
*
* Whether the SGML Docbook support is configured in
*/
#if 1
#define LIBXML_DOCB_ENABLED
#endif
/**
* LIBXML_XPATH_ENABLED:
*
* Whether XPath is configured in
*/
#if 1
#define LIBXML_XPATH_ENABLED
#endif
/**
* LIBXML_XPTR_ENABLED:
*
* Whether XPointer is configured in
*/
#if 1
#define LIBXML_XPTR_ENABLED
#endif
/**
* LIBXML_XINCLUDE_ENABLED:
*
* Whether XInclude is configured in
*/
#if 1
#define LIBXML_XINCLUDE_ENABLED
#endif
/**
* LIBXML_ICONV_ENABLED:
*
* Whether iconv support is available
*/
#if 1
#define LIBXML_ICONV_ENABLED
#endif
/**
* LIBXML_ICU_ENABLED:
*
* Whether icu support is available
*/
#if 0
#define LIBXML_ICU_ENABLED
#endif
/**
* LIBXML_ISO8859X_ENABLED:
*
* Whether ISO-8859-* support is made available in case iconv is not
*/
#if 1
#define LIBXML_ISO8859X_ENABLED
#endif
/**
* LIBXML_DEBUG_ENABLED:
*
* Whether Debugging module is configured in
*/
#if 1
#define LIBXML_DEBUG_ENABLED
#endif
/**
* DEBUG_MEMORY_LOCATION:
*
* Whether the memory debugging is configured in
*/
#if 0
#define DEBUG_MEMORY_LOCATION
#endif
/**
* LIBXML_DEBUG_RUNTIME:
*
* Whether the runtime debugging is configured in
*/
#if 0
#define LIBXML_DEBUG_RUNTIME
#endif
/**
* LIBXML_UNICODE_ENABLED:
*
* Whether the Unicode related interfaces are compiled in
*/
#if 1
#define LIBXML_UNICODE_ENABLED
#endif
/**
* LIBXML_REGEXP_ENABLED:
*
* Whether the regular expressions interfaces are compiled in
*/
#if 1
#define LIBXML_REGEXP_ENABLED
#endif
/**
* LIBXML_AUTOMATA_ENABLED:
*
* Whether the automata interfaces are compiled in
*/
#if 1
#define LIBXML_AUTOMATA_ENABLED
#endif
/**
* LIBXML_EXPR_ENABLED:
*
* Whether the formal expressions interfaces are compiled in
*/
#if 1
#define LIBXML_EXPR_ENABLED
#endif
/**
* LIBXML_SCHEMAS_ENABLED:
*
* Whether the Schemas validation interfaces are compiled in
*/
#if 1
#define LIBXML_SCHEMAS_ENABLED
#endif
/**
* LIBXML_SCHEMATRON_ENABLED:
*
* Whether the Schematron validation interfaces are compiled in
*/
#if 1
#define LIBXML_SCHEMATRON_ENABLED
#endif
/**
* LIBXML_MODULES_ENABLED:
*
* Whether the module interfaces are compiled in
*/
#if 1
#define LIBXML_MODULES_ENABLED
/**
* LIBXML_MODULE_EXTENSION:
*
* the string suffix used by dynamic modules (usually shared libraries)
*/
#define LIBXML_MODULE_EXTENSION ".so"
#endif
/**
* LIBXML_ZLIB_ENABLED:
*
* Whether the Zlib support is compiled in
*/
#if 1
#define LIBXML_ZLIB_ENABLED
#endif
/**
* LIBXML_LZMA_ENABLED:
*
* Whether the Lzma support is compiled in
*/
#if 0
#define LIBXML_LZMA_ENABLED
#endif
#ifdef __GNUC__
/**
* ATTRIBUTE_UNUSED:
*
* Macro used to signal to GCC unused function parameters
*/
#ifndef ATTRIBUTE_UNUSED
# if ((__GNUC__ > 2) || ((__GNUC__ == 2) && (__GNUC_MINOR__ >= 7)))
# define ATTRIBUTE_UNUSED __attribute__((unused))
# else
# define ATTRIBUTE_UNUSED
# endif
#endif
/**
* LIBXML_ATTR_ALLOC_SIZE:
*
* Macro used to indicate to GCC this is an allocator function
*/
#ifndef LIBXML_ATTR_ALLOC_SIZE
# if (!defined(__clang__) && ((__GNUC__ > 4) || ((__GNUC__ == 4) && (__GNUC_MINOR__ >= 3))))
# define LIBXML_ATTR_ALLOC_SIZE(x) __attribute__((alloc_size(x)))
# else
# define LIBXML_ATTR_ALLOC_SIZE(x)
# endif
#else
# define LIBXML_ATTR_ALLOC_SIZE(x)
#endif
/**
* LIBXML_ATTR_FORMAT:
*
* Macro used to indicate to GCC the parameter are printf like
*/
#ifndef LIBXML_ATTR_FORMAT
# if ((__GNUC__ > 3) || ((__GNUC__ == 3) && (__GNUC_MINOR__ >= 3)))
# define LIBXML_ATTR_FORMAT(fmt,args) __attribute__((__format__(__printf__,fmt,args)))
# else
# define LIBXML_ATTR_FORMAT(fmt,args)
# endif
#else
# define LIBXML_ATTR_FORMAT(fmt,args)
#endif
#else /* ! __GNUC__ */
/**
* ATTRIBUTE_UNUSED:
*
* Macro used to signal to GCC unused function parameters
*/
#define ATTRIBUTE_UNUSED
/**
* LIBXML_ATTR_ALLOC_SIZE:
*
* Macro used to indicate to GCC this is an allocator function
*/
#define LIBXML_ATTR_ALLOC_SIZE(x)
/**
* LIBXML_ATTR_FORMAT:
*
* Macro used to indicate to GCC the parameter are printf like
*/
#define LIBXML_ATTR_FORMAT(fmt,args)
#endif /* __GNUC__ */
#ifdef __cplusplus
}
#endif /* __cplusplus */
#endif

View File

@ -173,7 +173,6 @@ target_link_libraries (clickhouse_common_io
PRIVATE
apple_rt
${CMAKE_DL_LIBS}
${HDFS3_LIBRARY}
)
target_link_libraries (dbms
@ -264,7 +263,11 @@ target_link_libraries(dbms PRIVATE ${OPENSSL_CRYPTO_LIBRARY} Threads::Threads)
target_include_directories (dbms SYSTEM BEFORE PRIVATE ${DIVIDE_INCLUDE_DIR})
target_include_directories (dbms SYSTEM BEFORE PRIVATE ${SPARCEHASH_INCLUDE_DIR})
target_include_directories (dbms SYSTEM BEFORE PRIVATE ${HDFS3_INCLUDE_DIR})
if (USE_HDFS)
target_link_libraries (dbms PRIVATE ${HDFS3_LIBRARY})
target_include_directories (dbms SYSTEM BEFORE PRIVATE ${HDFS3_INCLUDE_DIR})
endif()
if (NOT USE_INTERNAL_LZ4_LIBRARY)
target_include_directories (dbms SYSTEM BEFORE PRIVATE ${LZ4_INCLUDE_DIR})

View File

@ -31,7 +31,6 @@
#include <Common/typeid_cast.h>
#include <Common/Config/ConfigProcessor.h>
#include <Common/config_version.h>
#include <Common/config.h>
#include <Core/Types.h>
#include <Core/QueryProcessingStage.h>
#include <Core/ExternalTable.h>
@ -785,11 +784,8 @@ private:
const ASTInsertQuery * insert = typeid_cast<const ASTInsertQuery *>(&*parsed_query);
connection->forceConnected();
#if ENABLE_INSERT_INFILE
if (insert && !insert->select && !insert->in_file)
#else
if (insert && !insert->select)
#endif
processInsertQuery();
else
processOrdinaryQuery();

View File

@ -15,5 +15,5 @@
#cmakedefine01 USE_POCO_MONGODB
#cmakedefine01 USE_POCO_NETSSL
#cmakedefine01 CLICKHOUSE_SPLIT_BINARY
#cmakedefine01 ENABLE_INSERT_INFILE
#cmakedefine01 USE_BASE64
#cmakedefine01 USE_HDFS

View File

@ -1,4 +1,8 @@
#pragma once
#include <Common/config.h>
#if USE_HDFS
#include <IO/ReadBuffer.h>
#include <Poco/URI.h>
#include <hdfs/hdfs.h>
@ -89,3 +93,4 @@ namespace DB
}
};
}
#endif

View File

@ -1,7 +1,6 @@
#include <IO/ConcatReadBuffer.h>
#include <Common/typeid_cast.h>
#include <Common/config.h>
#include <DataStreams/AddingDefaultBlockOutputStream.h>
#include <DataStreams/CountingBlockOutputStream.h>
@ -10,23 +9,15 @@
#include <DataStreams/PushingToViewsBlockOutputStream.h>
#include <DataStreams/SquashingBlockOutputStream.h>
#include <DataStreams/copyData.h>
#include <DataStreams/UnionBlockInputStream.h>
#include <DataStreams/OwningBlockInputStream.h>
#include <Parsers/ASTInsertQuery.h>
#include <Parsers/ASTSelectWithUnionQuery.h>
#include <Parsers/ASTLiteral.h>
#include <Interpreters/InterpreterInsertQuery.h>
#include <Interpreters/InterpreterSelectWithUnionQuery.h>
#include <TableFunctions/TableFunctionFactory.h>
#include <Parsers/ASTFunction.h>
#include <IO/ReadBufferFromFile.h>
#include <IO/ReadBufferFromHDFS.h>
#include <Poco/URI.h>
namespace DB
@ -37,7 +28,6 @@ namespace ErrorCodes
extern const int NO_SUCH_COLUMN_IN_TABLE;
extern const int READONLY;
extern const int ILLEGAL_COLUMN;
extern const int BAD_ARGUMETNS;
}
@ -151,84 +141,6 @@ BlockIO InterpreterInsertQuery::execute()
throw Exception("Cannot insert column " + name_type.name + ", because it is MATERIALIZED column.", ErrorCodes::ILLEGAL_COLUMN);
}
}
#if ENABLE_INSERT_INFILE
else if (query.in_file)
{
// read data stream from in_file, and copy it to out
// Special handling in_file based on url type:
String uristr = typeid_cast<const ASTLiteral &>(*query.in_file).value.safeGet<String>();
// create Datastream based on Format:
String format = query.format;
if (format.empty())
format = context.getDefaultFormat();
auto & settings = context.getSettingsRef();
// Assume no query and fragment in uri, todo, add sanity check
String fuzzyFileNames;
String uriPrefix = uristr.substr(0, uristr.find_last_of('/'));
if (uriPrefix.length() == uristr.length())
{
fuzzyFileNames = uristr;
uriPrefix.clear();
}
else
{
uriPrefix += "/";
fuzzyFileNames = uristr.substr(uriPrefix.length());
}
Poco::URI uri(uriPrefix);
String scheme = uri.getScheme();
std::vector<String> fuzzyNameList = parseDescription(fuzzyFileNames, 0, fuzzyFileNames.length(), ',', 100/* hard coded max files */);
//std::vector<std::vector<String> > fileNames;
//for (const auto & fuzzyName : fuzzyNameList)
// fileNames.push_back(parseDescription(fuzzyName, 0, fuzzyName.length(), '|', 100));
BlockInputStreams inputs;
for (const auto & name : fuzzyNameList)
{
std::unique_ptr<ReadBuffer> read_buf;
if (scheme.empty() || scheme == "file")
{
read_buf = std::make_unique<ReadBufferFromFile>(Poco::URI(uriPrefix + name).getPath());
}
else if (scheme == "hdfs")
{
read_buf = std::make_unique<ReadBufferFromHDFS>(uriPrefix + name);
}
else
{
throw Exception("URI scheme " + scheme + " is not supported with insert statement yet", ErrorCodes::BAD_ARGUMENTS);
}
inputs.emplace_back(
std::make_shared<OwningBlockInputStream<ReadBuffer>>(
context.getInputFormat(format, *read_buf,
res.out->getHeader(), // sample_block
settings.max_insert_block_size),
std::move(read_buf)));
}
if (inputs.size() == 0)
throw Exception("Inputs interpreter error", ErrorCodes::BAD_ARGUMENTS);
auto stream = inputs[0];
if (inputs.size() > 1)
{
stream = std::make_shared<UnionBlockInputStream<> >(inputs, nullptr, settings.max_distributed_connections);
}
res.in = std::make_shared<NullAndDoCopyBlockInputStream>(stream, res.out);
res.out = nullptr;
}
#endif
return res;
}

View File

@ -1,6 +1,5 @@
#include <iomanip>
#include <Parsers/ASTInsertQuery.h>
#include <Common/config.h>
namespace DB
@ -37,27 +36,10 @@ void ASTInsertQuery::formatImpl(const FormatSettings & settings, FormatState & s
if (!format.empty())
{
settings.ostr << (settings.hilite ? hilite_keyword : "") << " FORMAT " << (settings.hilite ? hilite_none : "") << format;
#if ENABLE_INSERT_INFILE
if (in_file)
{
settings.ostr << (settings.hilite ? hilite_keyword : "") << " INFILE " << (settings.hilite ? hilite_none : "");
in_file->formatImpl(settings, state, frame);
}
#endif
}
else
{
#if ENABLE_INSERT_INFILE
if (in_file)
{
settings.ostr << (settings.hilite ? hilite_keyword : "") << " INFILE " << (settings.hilite ? hilite_none : "");
in_file->formatImpl(settings, state, frame);
}
else
#endif
{
settings.ostr << (settings.hilite ? hilite_keyword : "") << " VALUES" << (settings.hilite ? hilite_none : "");
}
settings.ostr << (settings.hilite ? hilite_keyword : "") << " VALUES" << (settings.hilite ? hilite_none : "");
}
}
}

View File

@ -1,7 +1,6 @@
#pragma once
#include <Parsers/IAST.h>
#include <Common/config.h>
namespace DB
@ -20,10 +19,6 @@ public:
ASTPtr select;
ASTPtr table_function;
#if ENABLE_INSERT_INFILE
ASTPtr in_file;
#endif
// Set to true if the data should only be inserted into attached views
bool no_destination = false;
@ -45,12 +40,7 @@ public:
{
res->table_function = table_function->clone(); res->children.push_back(res->table_function);
}
#if ENABLE_INSERT_INFILE
if (in_file)
{
res->in_file = in_file->clone(); res->children.push_back(res->in_file);
}
#endif
return res;
}

View File

@ -10,7 +10,6 @@
#include <Parsers/ASTFunction.h>
#include <Common/typeid_cast.h>
#include <Common/config.h>
namespace DB
@ -44,11 +43,6 @@ bool ParserInsertQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected)
ASTPtr format;
ASTPtr select;
ASTPtr table_function;
#if ENABLE_INSERT_INFILE
ParserKeyword s_infile("INFILE");
ASTPtr in_file;
#endif
/// Insertion data
const char * data = nullptr;
@ -87,7 +81,7 @@ bool ParserInsertQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected)
Pos before_select = pos;
/// VALUES or FORMAT or SELECT or INFILE
/// VALUES or FORMAT or SELECT
if (s_values.ignore(pos, expected))
{
data = pos->begin;
@ -99,45 +93,28 @@ bool ParserInsertQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected)
if (!name_p.parse(pos, format, expected))
return false;
#if ENABLE_INSERT_INFILE
// there are two case after FORMAT xx:
// case 1: data_set.
// case 2: INFILE xx clause.
if (s_infile.ignore(pos, expected))
{
ParserStringLiteral in_file_p;
data = name_pos->end;
if (!in_file_p.parse(pos, in_file, expected))
return false;
if (data < end && *data == ';')
throw Exception("You have excessive ';' symbol before data for INSERT.\n"
"Example:\n\n"
"INSERT INTO t (x, y) FORMAT TabSeparated\n"
";\tHello\n"
"2\tWorld\n"
"\n"
"Note that there is no ';' just after format name, "
"you need to put at least one whitespace symbol before the data.", ErrorCodes::SYNTAX_ERROR);
}
else
{
#endif
data = name_pos->end;
if (data < end && *data == ';')
throw Exception("You have excessive ';' symbol before data for INSERT.\n"
"Example:\n\n"
"INSERT INTO t (x, y) FORMAT TabSeparated\n"
";\tHello\n"
"2\tWorld\n"
"\n"
"Note that there is no ';' just after format name, "
"you need to put at least one whitespace symbol before the data.", ErrorCodes::SYNTAX_ERROR);
while (data < end && (*data == ' ' || *data == '\t' || *data == '\f'))
++data;
while (data < end && (*data == ' ' || *data == '\t' || *data == '\f'))
++data;
/// Data starts after the first newline, if there is one, or after all the whitespace characters, otherwise.
/// Data starts after the first newline, if there is one, or after all the whitespace characters, otherwise.
if (data < end && *data == '\r')
++data;
if (data < end && *data == '\r')
++data;
if (data < end && *data == '\n')
++data;
#if ENABLE_INSERT_INFILE
}
#endif
if (data < end && *data == '\n')
++data;
}
else if (s_select.ignore(pos, expected) || s_with.ignore(pos,expected))
{
@ -145,14 +122,6 @@ bool ParserInsertQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected)
ParserSelectWithUnionQuery select_p;
select_p.parse(pos, select, expected);
}
#if ENABLE_INSERT_INFILE
else if (s_infile.ignore(pos, expected))
{
ParserStringLiteral in_file_p;
if (!in_file_p.parse(pos, in_file, expected))
return false;
}
#endif
else
{
return false;
@ -178,24 +147,13 @@ bool ParserInsertQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected)
query->columns = columns;
query->select = select;
#if ENABLE_INSERT_INFILE
query->in_file = in_file;
if (query->in_file)
query->data = nullptr;
else
#endif
query->data = data != end ? data : nullptr;
query->data = data != end ? data : nullptr;
query->end = end;
if (columns)
query->children.push_back(columns);
if (select)
query->children.push_back(select);
#if ENABLE_INSERT_INFILE
if (in_file)
query->children.push_back(in_file);
#endif
return true;
}

View File

@ -9,22 +9,18 @@ namespace DB
/** Cases:
*
* #1 Normal case:
* Normal case:
* INSERT INTO [db.]table (c1, c2, c3) VALUES (v11, v12, v13), (v21, v22, v23), ...
* INSERT INTO [db.]table VALUES (v11, v12, v13), (v21, v22, v23), ...
*
* #2 Insert of data in an arbitrary format.
* Insert of data in an arbitrary format.
* The data itself comes after LF(line feed), if it exists, or after all the whitespace characters, otherwise.
* INSERT INTO [db.]table (c1, c2, c3) FORMAT format \n ...
* INSERT INTO [db.]table FORMAT format \n ...
*
* #3 Insert the result of the SELECT query.
* Insert the result of the SELECT query.
* INSERT INTO [db.]table (c1, c2, c3) SELECT ...
* INSERT INTO [db.]table SELECT ...
* This syntax is controversial, not open for now.
* #4 Insert of data in an arbitrary form from file(a bit variant of #2)
* INSERT INTO [db.]table (c1, c2, c3) FORMAT format INFILE 'url'
*/
class ParserInsertQuery : public IParserBase
{

View File

@ -1,3 +1,7 @@
#include <Common/config.h>
#if USE_HDFS
#include <Storages/StorageFactory.h>
#include <Storages/StorageHDFS.h>
#include <Interpreters/Context.h>
@ -11,9 +15,10 @@
#include <DataStreams/IProfilingBlockInputStream.h>
#include <DataStreams/OwningBlockInputStream.h>
#include <Poco/Path.h>
#include <TableFunctions/ITableFunction.h>
#include <TableFunctions/parseRemoteDescription.h>
#include <Common/typeid_cast.h>
namespace DB
{
namespace ErrorCodes
@ -59,16 +64,10 @@ namespace
fuzzyFileNames = uri.substr(uriPrefix.length());
}
std::vector<String> fuzzyNameList = parseDescription(fuzzyFileNames, 0, fuzzyFileNames.length(), ',' , 100/* hard coded max files */);
// Don't support | for globs compatible
//std::vector<std::vector<String> > fileNames;
//for(auto fuzzyName : fuzzyNameList)
// fileNames.push_back(parseDescription(fuzzyName, 0, fuzzyName.length(), '|', 100));
std::vector<String> fuzzyNameList = parseRemoteDescription(fuzzyFileNames, 0, fuzzyFileNames.length(), ',' , 100/* hard coded max files */);
BlockInputStreams inputs;
//for (auto & vecNames : fileNames)
for (auto & name: fuzzyNameList)
{
std::unique_ptr<ReadBuffer> read_buf = std::make_unique<ReadBufferFromHDFS>(uriPrefix + name);
@ -176,3 +175,5 @@ void registerStorageHDFS(StorageFactory & factory)
}
}
#endif

View File

@ -1,4 +1,6 @@
#pragma once
#include <Common/config.h>
#if USE_HDFS
#include <Storages/IStorage.h>
#include <Poco/URI.h>
@ -50,3 +52,5 @@ private:
Logger * log = &Logger::get("StorageHDFS");
};
}
#endif

View File

@ -24,6 +24,10 @@ void registerStorageJoin(StorageFactory & factory);
void registerStorageView(StorageFactory & factory);
void registerStorageMaterializedView(StorageFactory & factory);
#if USE_HDFS
void registerStorageHDFS(StorageFactory & factory);
#endif
#if USE_POCO_SQLODBC || USE_POCO_DATAODBC
void registerStorageODBC(StorageFactory & factory);
#endif
@ -60,6 +64,10 @@ void registerStorages()
registerStorageView(factory);
registerStorageMaterializedView(factory);
#if USE_HDFS
registerStorageHDFS(factory);
#endif
#if USE_POCO_SQLODBC || USE_POCO_DATAODBC
registerStorageODBC(factory);
#endif

View File

@ -1,7 +1,5 @@
#include <TableFunctions/ITableFunction.h>
#include <Common/ProfileEvents.h>
#include <Common/Exception.h>
#include <IO/WriteHelpers.h>
namespace ProfileEvents
@ -18,168 +16,4 @@ StoragePtr ITableFunction::execute(const ASTPtr & ast_function, const Context &
return executeImpl(ast_function, context);
}
namespace ErrorCodes
{
extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
extern const int BAD_ARGUMENTS;
}
/// The Cartesian product of two sets of rows, the result is written in place of the first argument
static void append(std::vector<String> & to, const std::vector<String> & what, size_t max_addresses)
{
if (what.empty())
return;
if (to.empty())
{
to = what;
return;
}
if (what.size() * to.size() > max_addresses)
throw Exception("Table function 'remote': first argument generates too many result addresses",
ErrorCodes::BAD_ARGUMENTS);
std::vector<String> res;
for (size_t i = 0; i < to.size(); ++i)
for (size_t j = 0; j < what.size(); ++j)
res.push_back(to[i] + what[j]);
to.swap(res);
}
/// Parse number from substring
static bool parseNumber(const String & description, size_t l, size_t r, size_t & res)
{
res = 0;
for (size_t pos = l; pos < r; pos ++)
{
if (!isNumericASCII(description[pos]))
return false;
res = res * 10 + description[pos] - '0';
if (res > 1e15)
return false;
}
return true;
}
/* Parse a string that generates shards and replicas. Separator - one of two characters | or ,
* depending on whether shards or replicas are generated.
* For example:
* host1,host2,... - generates set of shards from host1, host2, ...
* host1|host2|... - generates set of replicas from host1, host2, ...
* abc{8..10}def - generates set of shards abc8def, abc9def, abc10def.
* abc{08..10}def - generates set of shards abc08def, abc09def, abc10def.
* abc{x,yy,z}def - generates set of shards abcxdef, abcyydef, abczdef.
* abc{x|yy|z} def - generates set of replicas abcxdef, abcyydef, abczdef.
* abc{1..9}de{f,g,h} - is a direct product, 27 shards.
* abc{1..9}de{0|1} - is a direct product, 9 shards, in each 2 replicas.
*/
std::vector<String> parseDescription(const String & description, size_t l, size_t r, char separator, size_t max_addresses)
{
std::vector<String> res;
std::vector<String> cur;
/// An empty substring means a set of an empty string
if (l >= r)
{
res.push_back("");
return res;
}
for (size_t i = l; i < r; ++i)
{
/// Either the numeric interval (8..10) or equivalent expression in brackets
if (description[i] == '{')
{
int cnt = 1;
int last_dot = -1; /// The rightmost pair of points, remember the index of the right of the two
size_t m;
std::vector<String> buffer;
bool have_splitter = false;
/// Look for the corresponding closing bracket
for (m = i + 1; m < r; ++m)
{
if (description[m] == '{') ++cnt;
if (description[m] == '}') --cnt;
if (description[m] == '.' && description[m-1] == '.') last_dot = m;
if (description[m] == separator) have_splitter = true;
if (cnt == 0) break;
}
if (cnt != 0)
throw Exception("Table function 'remote': incorrect brace sequence in first argument",
ErrorCodes::BAD_ARGUMENTS);
/// The presence of a dot - numeric interval
if (last_dot != -1)
{
size_t left, right;
if (description[last_dot - 1] != '.')
throw Exception("Table function 'remote': incorrect argument in braces (only one dot): " + description.substr(i, m - i + 1),
ErrorCodes::BAD_ARGUMENTS);
if (!parseNumber(description, i + 1, last_dot - 1, left))
throw Exception("Table function 'remote': incorrect argument in braces (Incorrect left number): "
+ description.substr(i, m - i + 1),
ErrorCodes::BAD_ARGUMENTS);
if (!parseNumber(description, last_dot + 1, m, right))
throw Exception("Table function 'remote': incorrect argument in braces (Incorrect right number): "
+ description.substr(i, m - i + 1),
ErrorCodes::BAD_ARGUMENTS);
if (left > right)
throw Exception("Table function 'remote': incorrect argument in braces (left number is greater then right): "
+ description.substr(i, m - i + 1),
ErrorCodes::BAD_ARGUMENTS);
if (right - left + 1 > max_addresses)
throw Exception("Table function 'remote': first argument generates too many result addresses",
ErrorCodes::BAD_ARGUMENTS);
bool add_leading_zeroes = false;
size_t len = last_dot - 1 - (i + 1);
/// If the left and right borders have equal numbers, then you must add leading zeros.
if (last_dot - 1 - (i + 1) == m - (last_dot + 1))
add_leading_zeroes = true;
for (size_t id = left; id <= right; ++id)
{
String cur = toString<UInt64>(id);
if (add_leading_zeroes)
{
while (cur.size() < len)
cur = "0" + cur;
}
buffer.push_back(cur);
}
}
else if (have_splitter) /// If there is a current delimiter inside, then generate a set of resulting rows
buffer = parseDescription(description, i + 1, m, separator, max_addresses);
else /// Otherwise just copy, spawn will occur when you call with the correct delimiter
buffer.push_back(description.substr(i, m - i + 1));
/// Add all possible received extensions to the current set of lines
append(cur, buffer, max_addresses);
i = m;
}
else if (description[i] == separator)
{
/// If the delimiter, then add found rows
res.insert(res.end(), cur.begin(), cur.end());
cur.clear();
}
else
{
/// Otherwise, simply append the character to current lines
std::vector<String> buffer;
buffer.push_back(description.substr(i, 1));
append(cur, buffer, max_addresses);
}
}
res.insert(res.end(), cur.begin(), cur.end());
if (res.size() > max_addresses)
throw Exception("Table function 'remote': first argument generates too many result addresses",
ErrorCodes::BAD_ARGUMENTS);
return res;
}
}

View File

@ -2,9 +2,6 @@
#include <string>
#include <memory>
#include <vector>
#include <Core/Types.h>
namespace DB
{
@ -44,6 +41,5 @@ private:
using TableFunctionPtr = std::shared_ptr<ITableFunction>;
std::vector<String> parseDescription(const String & description, size_t l, size_t r, char separator, size_t max_addresses);
}

View File

@ -1,3 +1,6 @@
#include <Common/config.h>
#if USE_HDFS
#include <Storages/StorageHDFS.h>
#include <TableFunctions/TableFunctionFactory.h>
#include <TableFunctions/TableFunctionHDFS.h>
@ -19,3 +22,4 @@ void registerTableFunctionHDFS(TableFunctionFactory & factory)
factory.registerFunction<TableFunctionHDFS>();
}
}
#endif

View File

@ -1,5 +1,9 @@
#pragma once
#include <Common/config.h>
#if USE_HDFS
#include <TableFunctions/ITableFunctionFileLike.h>
#include <Interpreters/Context.h>
#include <Core/Block.h>
@ -24,3 +28,5 @@ private:
const String & source, const String & format, const Block & sample_block, Context & global_context) const override;
};
}
#endif

View File

@ -11,6 +11,7 @@
#include <TableFunctions/TableFunctionRemote.h>
#include <TableFunctions/TableFunctionFactory.h>
#include <TableFunctions/parseRemoteDescription.h>
namespace DB
@ -145,11 +146,11 @@ StoragePtr TableFunctionRemote::executeImpl(const ASTPtr & ast_function, const C
{
/// Create new cluster from the scratch
size_t max_addresses = context.getSettingsRef().table_function_remote_max_addresses;
std::vector<String> shards = parseDescription(cluster_description, 0, cluster_description.size(), ',', max_addresses);
std::vector<String> shards = parseRemoteDescription(cluster_description, 0, cluster_description.size(), ',', max_addresses);
std::vector<std::vector<String>> names;
for (size_t i = 0; i < shards.size(); ++i)
names.push_back(parseDescription(shards[i], 0, shards[i].size(), '|', max_addresses));
names.push_back(parseRemoteDescription(shards[i], 0, shards[i].size(), '|', max_addresses));
if (names.empty())
throw Exception("Shard list is empty after parsing first argument", ErrorCodes::BAD_ARGUMENTS);

View File

@ -0,0 +1,171 @@
#include <TableFunctions/parseRemoteDescription.h>
#include <Common/Exception.h>
#include <IO/WriteHelpers.h>
namespace DB
{
namespace ErrorCodes
{
extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
extern const int BAD_ARGUMENTS;
}
/// The Cartesian product of two sets of rows, the result is written in place of the first argument
static void append(std::vector<String> & to, const std::vector<String> & what, size_t max_addresses)
{
if (what.empty())
return;
if (to.empty())
{
to = what;
return;
}
if (what.size() * to.size() > max_addresses)
throw Exception("Table function 'remote': first argument generates too many result addresses",
ErrorCodes::BAD_ARGUMENTS);
std::vector<String> res;
for (size_t i = 0; i < to.size(); ++i)
for (size_t j = 0; j < what.size(); ++j)
res.push_back(to[i] + what[j]);
to.swap(res);
}
/// Parse number from substring
static bool parseNumber(const String & description, size_t l, size_t r, size_t & res)
{
res = 0;
for (size_t pos = l; pos < r; pos ++)
{
if (!isNumericASCII(description[pos]))
return false;
res = res * 10 + description[pos] - '0';
if (res > 1e15)
return false;
}
return true;
}
/* Parse a string that generates shards and replicas. Separator - one of two characters | or ,
* depending on whether shards or replicas are generated.
* For example:
* host1,host2,... - generates set of shards from host1, host2, ...
* host1|host2|... - generates set of replicas from host1, host2, ...
* abc{8..10}def - generates set of shards abc8def, abc9def, abc10def.
* abc{08..10}def - generates set of shards abc08def, abc09def, abc10def.
* abc{x,yy,z}def - generates set of shards abcxdef, abcyydef, abczdef.
* abc{x|yy|z} def - generates set of replicas abcxdef, abcyydef, abczdef.
* abc{1..9}de{f,g,h} - is a direct product, 27 shards.
* abc{1..9}de{0|1} - is a direct product, 9 shards, in each 2 replicas.
*/
std::vector<String> parseRemoteDescription(const String & description, size_t l, size_t r, char separator, size_t max_addresses)
{
std::vector<String> res;
std::vector<String> cur;
/// An empty substring means a set of an empty string
if (l >= r)
{
res.push_back("");
return res;
}
for (size_t i = l; i < r; ++i)
{
/// Either the numeric interval (8..10) or equivalent expression in brackets
if (description[i] == '{')
{
int cnt = 1;
int last_dot = -1; /// The rightmost pair of points, remember the index of the right of the two
size_t m;
std::vector<String> buffer;
bool have_splitter = false;
/// Look for the corresponding closing bracket
for (m = i + 1; m < r; ++m)
{
if (description[m] == '{') ++cnt;
if (description[m] == '}') --cnt;
if (description[m] == '.' && description[m-1] == '.') last_dot = m;
if (description[m] == separator) have_splitter = true;
if (cnt == 0) break;
}
if (cnt != 0)
throw Exception("Table function 'remote': incorrect brace sequence in first argument",
ErrorCodes::BAD_ARGUMENTS);
/// The presence of a dot - numeric interval
if (last_dot != -1)
{
size_t left, right;
if (description[last_dot - 1] != '.')
throw Exception("Table function 'remote': incorrect argument in braces (only one dot): " + description.substr(i, m - i + 1),
ErrorCodes::BAD_ARGUMENTS);
if (!parseNumber(description, i + 1, last_dot - 1, left))
throw Exception("Table function 'remote': incorrect argument in braces (Incorrect left number): "
+ description.substr(i, m - i + 1),
ErrorCodes::BAD_ARGUMENTS);
if (!parseNumber(description, last_dot + 1, m, right))
throw Exception("Table function 'remote': incorrect argument in braces (Incorrect right number): "
+ description.substr(i, m - i + 1),
ErrorCodes::BAD_ARGUMENTS);
if (left > right)
throw Exception("Table function 'remote': incorrect argument in braces (left number is greater then right): "
+ description.substr(i, m - i + 1),
ErrorCodes::BAD_ARGUMENTS);
if (right - left + 1 > max_addresses)
throw Exception("Table function 'remote': first argument generates too many result addresses",
ErrorCodes::BAD_ARGUMENTS);
bool add_leading_zeroes = false;
size_t len = last_dot - 1 - (i + 1);
/// If the left and right borders have equal numbers, then you must add leading zeros.
if (last_dot - 1 - (i + 1) == m - (last_dot + 1))
add_leading_zeroes = true;
for (size_t id = left; id <= right; ++id)
{
String cur = toString<UInt64>(id);
if (add_leading_zeroes)
{
while (cur.size() < len)
cur = "0" + cur;
}
buffer.push_back(cur);
}
}
else if (have_splitter) /// If there is a current delimiter inside, then generate a set of resulting rows
buffer = parseRemoteDescription(description, i + 1, m, separator, max_addresses);
else /// Otherwise just copy, spawn will occur when you call with the correct delimiter
buffer.push_back(description.substr(i, m - i + 1));
/// Add all possible received extensions to the current set of lines
append(cur, buffer, max_addresses);
i = m;
}
else if (description[i] == separator)
{
/// If the delimiter, then add found rows
res.insert(res.end(), cur.begin(), cur.end());
cur.clear();
}
else
{
/// Otherwise, simply append the character to current lines
std::vector<String> buffer;
buffer.push_back(description.substr(i, 1));
append(cur, buffer, max_addresses);
}
}
res.insert(res.end(), cur.begin(), cur.end());
if (res.size() > max_addresses)
throw Exception("Table function 'remote': first argument generates too many result addresses",
ErrorCodes::BAD_ARGUMENTS);
return res;
}
}

View File

@ -0,0 +1,20 @@
#pragma once
#include <Core/Types.h>
#include <vector>
namespace DB
{
/* Parse a string that generates shards and replicas. Separator - one of two characters | or ,
* depending on whether shards or replicas are generated.
* For example:
* host1,host2,... - generates set of shards from host1, host2, ...
* host1|host2|... - generates set of replicas from host1, host2, ...
* abc{8..10}def - generates set of shards abc8def, abc9def, abc10def.
* abc{08..10}def - generates set of shards abc08def, abc09def, abc10def.
* abc{x,yy,z}def - generates set of shards abcxdef, abcyydef, abczdef.
* abc{x|yy|z} def - generates set of replicas abcxdef, abcyydef, abczdef.
* abc{1..9}de{f,g,h} - is a direct product, 27 shards.
* abc{1..9}de{0|1} - is a direct product, 9 shards, in each 2 replicas.
*/
std::vector<String> parseRemoteDescription(const String & description, size_t l, size_t r, char separator, size_t max_addresses);
}

View File

@ -13,7 +13,10 @@ void registerTableFunctionNumbers(TableFunctionFactory & factory);
void registerTableFunctionCatBoostPool(TableFunctionFactory & factory);
void registerTableFunctionFile(TableFunctionFactory & factory);
void registerTableFunctionURL(TableFunctionFactory & factory);
#if USE_HDFS
void registerTableFunctionHDFS(TableFunctionFactory & factory);
#endif
#if USE_POCO_SQLODBC || USE_POCO_DATAODBC
void registerTableFunctionODBC(TableFunctionFactory & factory);
@ -37,7 +40,10 @@ void registerTableFunctions()
registerTableFunctionCatBoostPool(factory);
registerTableFunctionFile(factory);
registerTableFunctionURL(factory);
#if USE_HDFS
registerTableFunctionHDFS(factory);
#endif
#if USE_POCO_SQLODBC || USE_POCO_DATAODBC
registerTableFunctionODBC(factory);

View File

@ -14,11 +14,13 @@ import xml.dom.minidom
from kazoo.client import KazooClient
from kazoo.exceptions import KazooException
import psycopg2
import requests
import docker
from docker.errors import ContainerError
from .client import Client, CommandRequest
from .hdfs_api import HDFSApi
HELPERS_DIR = p.dirname(__file__)
@ -83,6 +85,7 @@ class ClickHouseCluster:
self.with_postgres = False
self.with_kafka = False
self.with_odbc_drivers = False
self.with_hdfs = False
self.docker_client = None
self.is_up = False
@ -94,7 +97,7 @@ class ClickHouseCluster:
cmd += " client"
return cmd
def add_instance(self, name, config_dir=None, main_configs=[], user_configs=[], macros={}, with_zookeeper=False, with_mysql=False, with_kafka=False, clickhouse_path_dir=None, with_odbc_drivers=False, with_postgres=False, hostname=None, env_variables={}, image="yandex/clickhouse-integration-test", stay_alive=False):
def add_instance(self, name, config_dir=None, main_configs=[], user_configs=[], macros={}, with_zookeeper=False, with_mysql=False, with_kafka=False, clickhouse_path_dir=None, with_odbc_drivers=False, with_postgres=False, with_hdfs=False, hostname=None, env_variables={}, image="yandex/clickhouse-integration-test", stay_alive=False):
"""Add an instance to the cluster.
name - the name of the instance directory and the value of the 'instance' macro in ClickHouse.
@ -148,13 +151,19 @@ class ClickHouseCluster:
self.base_postgres_cmd = ['docker-compose', '--project-directory', self.base_dir, '--project-name',
self.project_name, '--file', p.join(HELPERS_DIR, 'docker_compose_postgres.yml')]
if with_kafka and not self.with_kafka:
self.with_kafka = True
self.base_cmd.extend(['--file', p.join(HELPERS_DIR, 'docker_compose_kafka.yml')])
self.base_kafka_cmd = ['docker-compose', '--project-directory', self.base_dir, '--project-name',
self.project_name, '--file', p.join(HELPERS_DIR, 'docker_compose_kafka.yml')]
if with_hdfs and not self.with_hdfs:
self.with_hdfs = True
self.base_cmd.extend(['--file', p.join(HELPERS_DIR, 'docker_compose_hdfs.yml')])
self.base_hdfs_cmd = ['docker-compose', '--project-directory', self.base_dir, '--project-name',
self.project_name, '--file', p.join(HELPERS_DIR, 'docker_compose_hdfs.yml')]
return instance
@ -212,6 +221,20 @@ class ClickHouseCluster:
raise Exception("Cannot wait ZooKeeper container")
def wait_hdfs_to_start(self, timeout=60):
hdfs_api = HDFSApi("root")
start = time.time()
while time.time() - start < timeout:
try:
hdfs_api.write_data("/somefilewithrandomname222", "1")
print "Connected to HDFS and SafeMode disabled! "
return
except Exception as ex:
print "Can't connect to HDFS " + str(ex)
time.sleep(1)
raise Exception("Can't wait HDFS to start")
def start(self, destroy_dirs=True):
if self.is_up:
return
@ -250,7 +273,11 @@ class ClickHouseCluster:
subprocess_check_call(self.base_kafka_cmd + ['up', '-d', '--force-recreate'])
self.kafka_docker_id = self.get_instance_docker_id('kafka1')
subprocess_check_call(self.base_cmd + ['up', '-d', '--force-recreate'])
if self.with_hdfs and self.base_hdfs_cmd:
subprocess_check_call(self.base_hdfs_cmd + ['up', '-d', '--force-recreate'])
self.wait_hdfs_to_start(120)
subprocess_check_call(self.base_cmd + ['up', '-d', '--no-recreate'])
start_deadline = time.time() + 20.0 # seconds
for instance in self.instances.itervalues():
@ -310,7 +337,6 @@ services:
{name}:
image: {image}
hostname: {hostname}
user: '{uid}'
volumes:
- {binary_path}:/usr/bin/clickhouse:ro
- {configs_dir}:/etc/clickhouse-server/
@ -588,7 +614,6 @@ class ClickHouseInstance:
image=self.image,
name=self.name,
hostname=self.hostname,
uid=os.getuid(),
binary_path=self.server_bin_path,
configs_dir=configs_dir,
config_d_dir=config_d_dir,

View File

@ -0,0 +1,9 @@
version: '2'
services:
hdfs1:
image: sequenceiq/hadoop-docker:2.7.0
restart: always
ports:
- 50075:50075
- 50070:50070
entrypoint: /etc/bootstrap.sh -d

View File

@ -0,0 +1,46 @@
#-*- coding: utf-8 -*-
import requests
import subprocess
from tempfile import NamedTemporaryFile
class HDFSApi(object):
def __init__(self, user):
self.host = "localhost"
self.http_proxy_port = "50070"
self.http_data_port = "50075"
self.user = user
def read_data(self, path):
response = requests.get("http://{host}:{port}/webhdfs/v1{path}?op=OPEN".format(host=self.host, port=self.http_proxy_port, path=path), allow_redirects=False)
if response.status_code != 307:
response.raise_for_status()
additional_params = '&'.join(response.headers['Location'].split('&')[1:2])
response_data = requests.get("http://{host}:{port}/webhdfs/v1{path}?op=OPEN&{params}".format(host=self.host, port=self.http_data_port, path=path, params=additional_params))
if response_data.status_code != 200:
response_data.raise_for_status()
return response_data.text
# Requests can't put file
def _curl_to_put(self, filename, path, params):
url = "http://{host}:{port}/webhdfs/v1{path}?op=CREATE&{params}".format(host=self.host, port=self.http_data_port, path=path, params=params)
cmd = "curl -s -i -X PUT -T {fname} '{url}'".format(fname=filename, url=url)
output = subprocess.check_output(cmd, shell=True)
return output
def write_data(self, path, content):
named_file = NamedTemporaryFile()
fpath = named_file.name
named_file.write(content)
named_file.flush()
response = requests.put(
"http://{host}:{port}/webhdfs/v1{path}?op=CREATE".format(host=self.host, port=self.http_proxy_port, path=path, user=self.user),
allow_redirects=False
)
if response.status_code != 307:
response.raise_for_status()
additional_params = '&'.join(response.headers['Location'].split('&')[1:2] + ["user.name={}".format(self.user), "overwrite=true"])
output = self._curl_to_put(fpath, path, additional_params)
if "201 Created" not in output:
raise Exception("Can't create file on hdfs:\n {}".format(output))

View File

@ -0,0 +1,11 @@
<yandex>
<logger>
<level>trace</level>
<log>/var/log/clickhouse-server/log.log</log>
<errorlog>/var/log/clickhouse-server/log.err.log</errorlog>
<size>1000M</size>
<count>10</count>
<stderr>/var/log/clickhouse-server/stderr.log</stderr>
<stdout>/var/log/clickhouse-server/stdout.log</stdout>
</logger>
</yandex>

View File

@ -0,0 +1,47 @@
import time
import pytest
import requests
from tempfile import NamedTemporaryFile
from helpers.hdfs_api import HDFSApi
import os
from helpers.cluster import ClickHouseCluster
import subprocess
SCRIPT_DIR = os.path.dirname(os.path.realpath(__file__))
cluster = ClickHouseCluster(__file__)
node1 = cluster.add_instance('node1', with_hdfs=True, image='withlibsimage', config_dir="configs", main_configs=['configs/log_conf.xml'])
@pytest.fixture(scope="module")
def started_cluster():
try:
cluster.start()
yield cluster
except Exception as ex:
print(ex)
raise ex
finally:
cluster.shutdown()
def test_read_write_storage(started_cluster):
hdfs_api = HDFSApi("root")
hdfs_api.write_data("/simple_storage", "1\tMark\t72.53\n")
assert hdfs_api.read_data("/simple_storage") == "1\tMark\t72.53\n"
node1.query("create table SimpleHDFSStorage (id UInt32, name String, weight Float64) ENGINE = HDFS('hdfs://hdfs1:9000/simple_storage', 'TSV')")
assert node1.query("select * from SimpleHDFSStorage") == "1\tMark\t72.53\n"
def test_read_write_table(started_cluster):
hdfs_api = HDFSApi("root")
data = "1\tSerialize\t555.222\n2\tData\t777.333\n"
hdfs_api.write_data("/simple_table_function", data)
assert hdfs_api.read_data("/simple_table_function") == data
assert node1.query("select * from hdfs('hdfs://hdfs1:9000/simple_table_function', 'TSV', 'id UInt64, text String, number Float64')") == data