From 80b49e4c0a05ec2b7beb94e7c45e07a3bf1463f1 Mon Sep 17 00:00:00 2001 From: alesapin Date: Wed, 5 Dec 2018 16:24:45 +0300 Subject: [PATCH] Add integration test, put dependent libs to contrib, slightly refine code. --- .gitmodules | 6 + CMakeLists.txt | 7 +- cmake/find_hdfs3.cmake | 8 + cmake/find_libxml2.cmake | 20 + contrib/CMakeLists.txt | 8 + contrib/libgsasl | 1 + contrib/libhdfs3 | 2 +- contrib/libhdfs3-cmake/CMake/Platform.cmake | 2 +- contrib/libhdfs3-cmake/CMakeLists.txt | 27 +- contrib/libxml2 | 1 + contrib/libxml2-cmake/CMakeLists.txt | 63 +++ .../linux_x86_64/include/config.h | 285 +++++++++++ .../linux_x86_64/include/libxml/xmlversion.h | 481 ++++++++++++++++++ dbms/CMakeLists.txt | 7 +- dbms/programs/client/Client.cpp | 6 +- dbms/src/Common/config.h.in | 2 +- dbms/src/IO/ReadBufferFromHDFS.h | 5 + .../Interpreters/InterpreterInsertQuery.cpp | 88 ---- dbms/src/Parsers/ASTInsertQuery.cpp | 20 +- dbms/src/Parsers/ASTInsertQuery.h | 12 +- dbms/src/Parsers/ParserInsertQuery.cpp | 80 +-- dbms/src/Parsers/ParserInsertQuery.h | 10 +- dbms/src/Storages/StorageHDFS.cpp | 17 +- dbms/src/Storages/StorageHDFS.h | 4 + dbms/src/Storages/registerStorages.cpp | 8 + dbms/src/TableFunctions/ITableFunction.cpp | 166 ------ dbms/src/TableFunctions/ITableFunction.h | 4 - dbms/src/TableFunctions/TableFunctionHDFS.cpp | 4 + dbms/src/TableFunctions/TableFunctionHDFS.h | 6 + .../TableFunctions/TableFunctionRemote.cpp | 5 +- .../TableFunctions/parseRemoteDescription.cpp | 171 +++++++ .../TableFunctions/parseRemoteDescription.h | 20 + .../TableFunctions/registerTableFunctions.cpp | 6 + dbms/tests/integration/helpers/cluster.py | 35 +- .../helpers/docker_compose_hdfs.yml | 9 + dbms/tests/integration/helpers/hdfs_api.py | 46 ++ .../integration/test_storage_hdfs/__init__.py | 0 .../test_storage_hdfs/configs/log_conf.xml | 11 + .../integration/test_storage_hdfs/test.py | 47 ++ 39 files changed, 1300 insertions(+), 400 deletions(-) create mode 100644 cmake/find_libxml2.cmake create mode 160000 contrib/libgsasl create mode 160000 contrib/libxml2 create mode 100644 contrib/libxml2-cmake/CMakeLists.txt create mode 100644 contrib/libxml2-cmake/linux_x86_64/include/config.h create mode 100644 contrib/libxml2-cmake/linux_x86_64/include/libxml/xmlversion.h create mode 100644 dbms/src/TableFunctions/parseRemoteDescription.cpp create mode 100644 dbms/src/TableFunctions/parseRemoteDescription.h create mode 100644 dbms/tests/integration/helpers/docker_compose_hdfs.yml create mode 100644 dbms/tests/integration/helpers/hdfs_api.py create mode 100644 dbms/tests/integration/test_storage_hdfs/__init__.py create mode 100644 dbms/tests/integration/test_storage_hdfs/configs/log_conf.xml create mode 100644 dbms/tests/integration/test_storage_hdfs/test.py diff --git a/.gitmodules b/.gitmodules index b48eb5f9e3e..20b5cb88ced 100644 --- a/.gitmodules +++ b/.gitmodules @@ -55,3 +55,9 @@ [submodule "contrib/libhdfs3"] path = contrib/libhdfs3 url = https://github.com/ClickHouse-Extras/libhdfs3.git +[submodule "contrib/libxml2"] + path = contrib/libxml2 + url = https://github.com/GNOME/libxml2.git +[submodule "contrib/libgsasl"] + path = contrib/libgsasl + url = git@github.com:ClickHouse-Extras/libgsasl.git diff --git a/CMakeLists.txt b/CMakeLists.txt index 06ff4fea1d3..b9262c5c554 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -89,7 +89,6 @@ endif () option (TEST_COVERAGE "Enables flags for test coverage" OFF) option (ENABLE_TESTS "Enables tests" ON) -option (ENABLE_INSERT_INFILE "Enables INSERT INFILE syntax" OFF) if (CMAKE_SYSTEM_PROCESSOR MATCHES "amd64|x86_64") option (USE_INTERNAL_MEMCPY "Use internal implementation of 'memcpy' function instead of provided by libc. Only for x86_64." ON) @@ -232,10 +231,6 @@ if (ENABLE_TESTS) enable_testing() endif () -if (ENABLE_INSERT_INFILE) - message (STATUS "INSERT INFILE SYNTAX support") -endif () - # when installing to /usr - place configs to /etc but for /usr/local place to /usr/local/etc if (CMAKE_INSTALL_PREFIX STREQUAL "/usr") set (CLICKHOUSE_ETC_DIR "/etc") @@ -279,6 +274,8 @@ include (cmake/find_rdkafka.cmake) include (cmake/find_capnp.cmake) include (cmake/find_llvm.cmake) include (cmake/find_cpuid.cmake) +include (cmake/find_libgsasl.cmake) +include (cmake/find_libxml2.cmake) include (cmake/find_hdfs3.cmake) include (cmake/find_consistent-hashing.cmake) include (cmake/find_base64.cmake) diff --git a/cmake/find_hdfs3.cmake b/cmake/find_hdfs3.cmake index 401613915a4..7620ad1ed46 100644 --- a/cmake/find_hdfs3.cmake +++ b/cmake/find_hdfs3.cmake @@ -1,3 +1,8 @@ +if (NOT ARCH_ARM) + option (ENABLE_HDFS "Enable HDFS" ${NOT_UNBUNDLED}) +endif () + +if (ENABLE_HDFS) option (USE_INTERNAL_HDFS3_LIBRARY "Set to FALSE to use system HDFS3 instead of bundled" ON) if (NOT USE_INTERNAL_HDFS3_LIBRARY) @@ -9,5 +14,8 @@ else () set(HDFS3_INCLUDE_DIR "${ClickHouse_SOURCE_DIR}/contrib/libhdfs3/include") set(HDFS3_LIBRARY hdfs3) endif() +set (USE_HDFS 1) + +endif() message (STATUS "Using hdfs3: ${HDFS3_INCLUDE_DIR} : ${HDFS3_LIBRARY}") diff --git a/cmake/find_libxml2.cmake b/cmake/find_libxml2.cmake new file mode 100644 index 00000000000..cfababfbf63 --- /dev/null +++ b/cmake/find_libxml2.cmake @@ -0,0 +1,20 @@ +option (USE_INTERNAL_LIBXML2_LIBRARY "Set to FALSE to use system libxml2 library instead of bundled" ${NOT_UNBUNDLED}) + +if (USE_INTERNAL_LIBXML2_LIBRARY AND NOT EXISTS "${ClickHouse_SOURCE_DIR}/contrib/libxml2/libxml.h") + message (WARNING "submodule contrib/libxml2 is missing. to fix try run: \n git submodule update --init --recursive") + set (USE_INTERNAL_LIBXML2_LIBRARY 0) +endif () + +if (NOT USE_INTERNAL_LIBXML2_LIBRARY) + find_library (LIBXML2_LIBRARY libxml2) + find_path (LIBXML2_INCLUDE_DIR NAMES libxml.h PATHS ${LIBXML2_INCLUDE_PATHS}) +endif () + +if (LIBXML2_LIBRARY AND LIBXML2_INCLUDE_DIR) +else () + set (LIBXML2_INCLUDE_DIR ${ClickHouse_SOURCE_DIR}/contrib/libxml2/include ${ClickHouse_SOURCE_DIR}/contrib/libxml2-cmake/linux_x86_64/include) + set (USE_INTERNAL_LIBXML2_LIBRARY 1) + set (LIBXML2_LIBRARY libxml2) +endif () + +message (STATUS "Using libxml2: ${LIBXML2_INCLUDE_DIR} : ${LIBXML2_LIBRARY}") diff --git a/contrib/CMakeLists.txt b/contrib/CMakeLists.txt index b7085f992c3..e63cd5f0ea9 100644 --- a/contrib/CMakeLists.txt +++ b/contrib/CMakeLists.txt @@ -191,6 +191,14 @@ if (USE_INTERNAL_LLVM_LIBRARY) add_subdirectory (llvm/llvm) endif () +if (USE_INTERNAL_LIBGSASL_LIBRARY) + add_subdirectory(libgsasl) +endif() + +if (USE_INTERNAL_LIBXML2_LIBRARY) + add_subdirectory(libxml2-cmake) +endif () + if (USE_INTERNAL_HDFS3_LIBRARY) include(${CMAKE_SOURCE_DIR}/cmake/find_protobuf.cmake) if (USE_INTERNAL_PROTOBUF_LIBRARY) diff --git a/contrib/libgsasl b/contrib/libgsasl new file mode 160000 index 00000000000..3b8948a4042 --- /dev/null +++ b/contrib/libgsasl @@ -0,0 +1 @@ +Subproject commit 3b8948a4042e34fb00b4fb987535dc9e02e39040 diff --git a/contrib/libhdfs3 b/contrib/libhdfs3 index 4e9940eb82a..bd6505cbb0c 160000 --- a/contrib/libhdfs3 +++ b/contrib/libhdfs3 @@ -1 +1 @@ -Subproject commit 4e9940eb82a8c02b23b4985ce5242778c8d5bc11 +Subproject commit bd6505cbb0c130b0db695305b9a38546fa880e5a diff --git a/contrib/libhdfs3-cmake/CMake/Platform.cmake b/contrib/libhdfs3-cmake/CMake/Platform.cmake index ea00fa3f401..55fbf646589 100644 --- a/contrib/libhdfs3-cmake/CMake/Platform.cmake +++ b/contrib/libhdfs3-cmake/CMake/Platform.cmake @@ -16,7 +16,7 @@ IF(CMAKE_COMPILER_IS_GNUCXX) STRING(REGEX MATCHALL "[0-9]+" GCC_COMPILER_VERSION ${GCC_COMPILER_VERSION}) LIST(GET GCC_COMPILER_VERSION 0 GCC_COMPILER_VERSION_MAJOR) - LIST(GET GCC_COMPILER_VERSION 1 GCC_COMPILER_VERSION_MINOR) + LIST(GET GCC_COMPILER_VERSION 0 GCC_COMPILER_VERSION_MINOR) SET(GCC_COMPILER_VERSION_MAJOR ${GCC_COMPILER_VERSION_MAJOR} CACHE INTERNAL "gcc major version") SET(GCC_COMPILER_VERSION_MINOR ${GCC_COMPILER_VERSION_MINOR} CACHE INTERNAL "gcc minor version") diff --git a/contrib/libhdfs3-cmake/CMakeLists.txt b/contrib/libhdfs3-cmake/CMakeLists.txt index 5592af5e15b..1e9e36ecd08 100644 --- a/contrib/libhdfs3-cmake/CMakeLists.txt +++ b/contrib/libhdfs3-cmake/CMakeLists.txt @@ -7,6 +7,7 @@ if (NOT USE_INTERNAL_PROTOBUF_LIBRARY) endif () endif() +SET(WITH_KERBEROS false) # project and source dir set(HDFS3_ROOT_DIR ${CMAKE_SOURCE_DIR}/contrib/libhdfs3) set(HDFS3_SOURCE_DIR ${HDFS3_ROOT_DIR}/src) @@ -18,11 +19,9 @@ include(Platform) include(Options) # prefer shared libraries -set(CMAKE_FIND_LIBRARY_SUFFIXES ${CMAKE_SHARED_LIBRARY_SUFFIX}) -#find_package(CURL REQUIRED) -find_package(GSasl REQUIRED) -find_package(KERBEROS REQUIRED) -find_package(LibXml2 REQUIRED) +if (WITH_KERBEROS) + find_package(KERBEROS REQUIRED) +endif() # source set(PROTO_FILES @@ -192,17 +191,17 @@ target_include_directories(hdfs3 PRIVATE ${HDFS3_SOURCE_DIR}) target_include_directories(hdfs3 PRIVATE ${HDFS3_COMMON_DIR}) target_include_directories(hdfs3 PRIVATE ${CMAKE_CURRENT_BINARY_DIR}) -#target_include_directories(hdfs3 PRIVATE ${CURL_INCLUDE_DIR}) -target_include_directories(hdfs3 PRIVATE ${GSASL_INCLUDE_DIR}) -target_include_directories(hdfs3 PRIVATE ${KERBEROS_INCLUDE_DIRS}) +target_include_directories(hdfs3 PRIVATE ${LIBGSASL_INCLUDE_DIR}) +if (WITH_KERBEROS) + target_include_directories(hdfs3 PRIVATE ${KERBEROS_INCLUDE_DIRS}) +endif() target_include_directories(hdfs3 PRIVATE ${LIBXML2_INCLUDE_DIR}) -#target_include_directories(hdfs3 PRIVATE ${LIBUUID_INCLUDE_DIRS}) -#target_link_libraries(hdfs3 ${CURL_LIBRARIES}) -target_link_libraries(hdfs3 ${GSASL_LIBRARIES}) -target_link_libraries(hdfs3 ${KERBEROS_LIBRARIES}) -target_link_libraries(hdfs3 ${LIBXML2_LIBRARIES}) -#target_link_libraries(hdfs3 ${LIBUUID_LIBRARIES}) +target_link_libraries(hdfs3 ${LIBGSASL_LIBRARY}) +if (WITH_KERBEROS) + target_link_libraries(hdfs3 ${KERBEROS_LIBRARIES}) +endif() +target_link_libraries(hdfs3 ${LIBXML2_LIBRARY}) # inherit from parent cmake target_include_directories(hdfs3 PRIVATE ${Boost_INCLUDE_DIRS}) diff --git a/contrib/libxml2 b/contrib/libxml2 new file mode 160000 index 00000000000..18890f471c4 --- /dev/null +++ b/contrib/libxml2 @@ -0,0 +1 @@ +Subproject commit 18890f471c420411aa3c989e104d090966ec9dbf diff --git a/contrib/libxml2-cmake/CMakeLists.txt b/contrib/libxml2-cmake/CMakeLists.txt new file mode 100644 index 00000000000..c4b7f39cc8f --- /dev/null +++ b/contrib/libxml2-cmake/CMakeLists.txt @@ -0,0 +1,63 @@ +set(LIBXML2_SOURCE_DIR ${CMAKE_SOURCE_DIR}/contrib/libxml2) +set(LIBXML2_BINARY_DIR ${CMAKE_BINARY_DIR}/contrib/libxml2) + + +set(SRCS + ${LIBXML2_SOURCE_DIR}/parser.c + ${LIBXML2_SOURCE_DIR}/HTMLparser.c + ${LIBXML2_SOURCE_DIR}/buf.c + ${LIBXML2_SOURCE_DIR}/xzlib.c + ${LIBXML2_SOURCE_DIR}/xmlregexp.c + ${LIBXML2_SOURCE_DIR}/entities.c + ${LIBXML2_SOURCE_DIR}/rngparser.c + ${LIBXML2_SOURCE_DIR}/encoding.c + ${LIBXML2_SOURCE_DIR}/legacy.c + ${LIBXML2_SOURCE_DIR}/error.c + ${LIBXML2_SOURCE_DIR}/debugXML.c + ${LIBXML2_SOURCE_DIR}/xpointer.c + ${LIBXML2_SOURCE_DIR}/DOCBparser.c + ${LIBXML2_SOURCE_DIR}/xmlcatalog.c + ${LIBXML2_SOURCE_DIR}/c14n.c + ${LIBXML2_SOURCE_DIR}/xmlreader.c + ${LIBXML2_SOURCE_DIR}/xmlstring.c + ${LIBXML2_SOURCE_DIR}/dict.c + ${LIBXML2_SOURCE_DIR}/xpath.c + ${LIBXML2_SOURCE_DIR}/tree.c + ${LIBXML2_SOURCE_DIR}/trionan.c + ${LIBXML2_SOURCE_DIR}/pattern.c + ${LIBXML2_SOURCE_DIR}/globals.c + ${LIBXML2_SOURCE_DIR}/xmllint.c + ${LIBXML2_SOURCE_DIR}/chvalid.c + ${LIBXML2_SOURCE_DIR}/relaxng.c + ${LIBXML2_SOURCE_DIR}/list.c + ${LIBXML2_SOURCE_DIR}/xinclude.c + ${LIBXML2_SOURCE_DIR}/xmlIO.c + ${LIBXML2_SOURCE_DIR}/triostr.c + ${LIBXML2_SOURCE_DIR}/hash.c + ${LIBXML2_SOURCE_DIR}/xmlsave.c + ${LIBXML2_SOURCE_DIR}/HTMLtree.c + ${LIBXML2_SOURCE_DIR}/SAX.c + ${LIBXML2_SOURCE_DIR}/xmlschemas.c + ${LIBXML2_SOURCE_DIR}/SAX2.c + ${LIBXML2_SOURCE_DIR}/threads.c + ${LIBXML2_SOURCE_DIR}/runsuite.c + ${LIBXML2_SOURCE_DIR}/catalog.c + ${LIBXML2_SOURCE_DIR}/uri.c + ${LIBXML2_SOURCE_DIR}/xmlmodule.c + ${LIBXML2_SOURCE_DIR}/xlink.c + ${LIBXML2_SOURCE_DIR}/parserInternals.c + ${LIBXML2_SOURCE_DIR}/xmlwriter.c + ${LIBXML2_SOURCE_DIR}/xmlunicode.c + ${LIBXML2_SOURCE_DIR}/runxmlconf.c + ${LIBXML2_SOURCE_DIR}/xmlmemory.c + ${LIBXML2_SOURCE_DIR}/nanoftp.c + ${LIBXML2_SOURCE_DIR}/xmlschemastypes.c + ${LIBXML2_SOURCE_DIR}/valid.c + ${LIBXML2_SOURCE_DIR}/nanohttp.c + ${LIBXML2_SOURCE_DIR}/schematron.c +) +add_library(libxml2 STATIC ${SRCS}) + +target_include_directories(libxml2 PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}/linux_x86_64/include) + +target_include_directories(libxml2 PUBLIC ${LIBXML2_SOURCE_DIR}/include) diff --git a/contrib/libxml2-cmake/linux_x86_64/include/config.h b/contrib/libxml2-cmake/linux_x86_64/include/config.h new file mode 100644 index 00000000000..7969b377dc3 --- /dev/null +++ b/contrib/libxml2-cmake/linux_x86_64/include/config.h @@ -0,0 +1,285 @@ +/* config.h. Generated from config.h.in by configure. */ +/* config.h.in. Generated from configure.ac by autoheader. */ + +/* Type cast for the gethostbyname() argument */ +#define GETHOSTBYNAME_ARG_CAST /**/ + +/* Define to 1 if you have the header file. */ +#define HAVE_ARPA_INET_H 1 + +/* Define to 1 if you have the header file. */ +#define HAVE_ARPA_NAMESER_H 1 + +/* Whether struct sockaddr::__ss_family exists */ +/* #undef HAVE_BROKEN_SS_FAMILY */ + +/* Define to 1 if you have the header file. */ +#define HAVE_CTYPE_H 1 + +/* Define to 1 if you have the header file. */ +#define HAVE_DIRENT_H 1 + +/* Define to 1 if you have the header file. */ +#define HAVE_DLFCN_H 1 + +/* Have dlopen based dso */ +#define HAVE_DLOPEN /**/ + +/* Define to 1 if you have the header file. */ +/* #undef HAVE_DL_H */ + +/* Define to 1 if you have the header file. */ +#define HAVE_ERRNO_H 1 + +/* Define to 1 if you have the header file. */ +#define HAVE_FCNTL_H 1 + +/* Define to 1 if you have the header file. */ +#define HAVE_FLOAT_H 1 + +/* Define to 1 if you have the `fprintf' function. */ +#define HAVE_FPRINTF 1 + +/* Define to 1 if you have the `ftime' function. */ +#define HAVE_FTIME 1 + +/* Define if getaddrinfo is there */ +#define HAVE_GETADDRINFO /**/ + +/* Define to 1 if you have the `gettimeofday' function. */ +#define HAVE_GETTIMEOFDAY 1 + +/* Define to 1 if you have the header file. */ +#define HAVE_INTTYPES_H 1 + +/* Define to 1 if you have the `isascii' function. */ +#define HAVE_ISASCII 1 + +/* Define if isinf is there */ +#define HAVE_ISINF /**/ + +/* Define if isnan is there */ +#define HAVE_ISNAN /**/ + +/* Define if history library is there (-lhistory) */ +/* #undef HAVE_LIBHISTORY */ + +/* Define if pthread library is there (-lpthread) */ +#define HAVE_LIBPTHREAD /**/ + +/* Define if readline library is there (-lreadline) */ +/* #undef HAVE_LIBREADLINE */ + +/* Define to 1 if you have the header file. */ +#define HAVE_LIMITS_H 1 + +/* Define to 1 if you have the `localtime' function. */ +#define HAVE_LOCALTIME 1 + +/* Define to 1 if you have the header file. */ +/* #undef HAVE_LZMA_H */ + +/* Define to 1 if you have the header file. */ +#define HAVE_MALLOC_H 1 + +/* Define to 1 if you have the header file. */ +#define HAVE_MATH_H 1 + +/* Define to 1 if you have the header file. */ +#define HAVE_MEMORY_H 1 + +/* Define to 1 if you have the `mmap' function. */ +#define HAVE_MMAP 1 + +/* Define to 1 if you have the `munmap' function. */ +#define HAVE_MUNMAP 1 + +/* mmap() is no good without munmap() */ +#if defined(HAVE_MMAP) && !defined(HAVE_MUNMAP) +# undef /**/ HAVE_MMAP +#endif + +/* Define to 1 if you have the header file, and it defines `DIR'. */ +/* #undef HAVE_NDIR_H */ + +/* Define to 1 if you have the header file. */ +#define HAVE_NETDB_H 1 + +/* Define to 1 if you have the header file. */ +#define HAVE_NETINET_IN_H 1 + +/* Define to 1 if you have the header file. */ +#define HAVE_POLL_H 1 + +/* Define to 1 if you have the `printf' function. */ +#define HAVE_PRINTF 1 + +/* Define if is there */ +#define HAVE_PTHREAD_H /**/ + +/* Define to 1 if you have the `putenv' function. */ +#define HAVE_PUTENV 1 + +/* Define to 1 if you have the `rand' function. */ +#define HAVE_RAND 1 + +/* Define to 1 if you have the `rand_r' function. */ +#define HAVE_RAND_R 1 + +/* Define to 1 if you have the header file. */ +#define HAVE_RESOLV_H 1 + +/* Have shl_load based dso */ +/* #undef HAVE_SHLLOAD */ + +/* Define to 1 if you have the `signal' function. */ +#define HAVE_SIGNAL 1 + +/* Define to 1 if you have the header file. */ +#define HAVE_SIGNAL_H 1 + +/* Define to 1 if you have the `snprintf' function. */ +#define HAVE_SNPRINTF 1 + +/* Define to 1 if you have the `sprintf' function. */ +#define HAVE_SPRINTF 1 + +/* Define to 1 if you have the `srand' function. */ +#define HAVE_SRAND 1 + +/* Define to 1 if you have the `sscanf' function. */ +#define HAVE_SSCANF 1 + +/* Define to 1 if you have the `stat' function. */ +#define HAVE_STAT 1 + +/* Define to 1 if you have the header file. */ +#define HAVE_STDARG_H 1 + +/* Define to 1 if you have the header file. */ +#define HAVE_STDINT_H 1 + +/* Define to 1 if you have the header file. */ +#define HAVE_STDLIB_H 1 + +/* Define to 1 if you have the `strftime' function. */ +#define HAVE_STRFTIME 1 + +/* Define to 1 if you have the header file. */ +#define HAVE_STRINGS_H 1 + +/* Define to 1 if you have the header file. */ +#define HAVE_STRING_H 1 + +/* Define to 1 if you have the header file, and it defines `DIR'. + */ +/* #undef HAVE_SYS_DIR_H */ + +/* Define to 1 if you have the header file. */ +#define HAVE_SYS_MMAN_H 1 + +/* Define to 1 if you have the header file, and it defines `DIR'. + */ +/* #undef HAVE_SYS_NDIR_H */ + +/* Define to 1 if you have the header file. */ +#define HAVE_SYS_SELECT_H 1 + +/* Define to 1 if you have the header file. */ +#define HAVE_SYS_SOCKET_H 1 + +/* Define to 1 if you have the header file. */ +#define HAVE_SYS_STAT_H 1 + +/* Define to 1 if you have the header file. */ +#define HAVE_SYS_TIMEB_H 1 + +/* Define to 1 if you have the header file. */ +#define HAVE_SYS_TIME_H 1 + +/* Define to 1 if you have the header file. */ +#define HAVE_SYS_TYPES_H 1 + +/* Define to 1 if you have the `time' function. */ +#define HAVE_TIME 1 + +/* Define to 1 if you have the header file. */ +#define HAVE_TIME_H 1 + +/* Define to 1 if you have the header file. */ +#define HAVE_UNISTD_H 1 + +/* Whether va_copy() is available */ +#define HAVE_VA_COPY 1 + +/* Define to 1 if you have the `vfprintf' function. */ +#define HAVE_VFPRINTF 1 + +/* Define to 1 if you have the `vsnprintf' function. */ +#define HAVE_VSNPRINTF 1 + +/* Define to 1 if you have the `vsprintf' function. */ +#define HAVE_VSPRINTF 1 + +/* Define to 1 if you have the header file. */ +/* #undef HAVE_ZLIB_H */ + +/* Whether __va_copy() is available */ +/* #undef HAVE___VA_COPY */ + +/* Define as const if the declaration of iconv() needs const. */ +#define ICONV_CONST + +/* Define to the sub-directory where libtool stores uninstalled libraries. */ +#define LT_OBJDIR ".libs/" + +/* Name of package */ +#define PACKAGE "libxml2" + +/* Define to the address where bug reports for this package should be sent. */ +#define PACKAGE_BUGREPORT "" + +/* Define to the full name of this package. */ +#define PACKAGE_NAME "" + +/* Define to the full name and version of this package. */ +#define PACKAGE_STRING "" + +/* Define to the one symbol short name of this package. */ +#define PACKAGE_TARNAME "" + +/* Define to the home page for this package. */ +#define PACKAGE_URL "" + +/* Define to the version of this package. */ +#define PACKAGE_VERSION "" + +/* Type cast for the send() function 2nd arg */ +#define SEND_ARG2_CAST /**/ + +/* Define to 1 if you have the ANSI C header files. */ +#define STDC_HEADERS 1 + +/* Support for IPv6 */ +#define SUPPORT_IP6 /**/ + +/* Define if va_list is an array type */ +#define VA_LIST_IS_ARRAY 1 + +/* Version number of package */ +#define VERSION "2.9.8" + +/* Determine what socket length (socklen_t) data type is */ +#define XML_SOCKLEN_T socklen_t + +/* Define for Solaris 2.5.1 so the uint32_t typedef from , + , or is not used. If the typedef were allowed, the + #define below would cause a syntax error. */ +/* #undef _UINT32_T */ + +/* ss_family is not defined here, use __ss_family instead */ +/* #undef ss_family */ + +/* Define to the type of an unsigned integer type of width exactly 32 bits if + such a type exists and the standard includes do not define it. */ +/* #undef uint32_t */ diff --git a/contrib/libxml2-cmake/linux_x86_64/include/libxml/xmlversion.h b/contrib/libxml2-cmake/linux_x86_64/include/libxml/xmlversion.h new file mode 100644 index 00000000000..92d3414fdac --- /dev/null +++ b/contrib/libxml2-cmake/linux_x86_64/include/libxml/xmlversion.h @@ -0,0 +1,481 @@ +/* + * Summary: compile-time version informations + * Description: compile-time version informations for the XML library + * + * Copy: See Copyright for the status of this software. + * + * Author: Daniel Veillard + */ + +#ifndef __XML_VERSION_H__ +#define __XML_VERSION_H__ + +#include + +#ifdef __cplusplus +extern "C" { +#endif + +/* + * use those to be sure nothing nasty will happen if + * your library and includes mismatch + */ +#ifndef LIBXML2_COMPILING_MSCCDEF +XMLPUBFUN void XMLCALL xmlCheckVersion(int version); +#endif /* LIBXML2_COMPILING_MSCCDEF */ + +/** + * LIBXML_DOTTED_VERSION: + * + * the version string like "1.2.3" + */ +#define LIBXML_DOTTED_VERSION "2.9.8" + +/** + * LIBXML_VERSION: + * + * the version number: 1.2.3 value is 10203 + */ +#define LIBXML_VERSION 20908 + +/** + * LIBXML_VERSION_STRING: + * + * the version number string, 1.2.3 value is "10203" + */ +#define LIBXML_VERSION_STRING "20908" + +/** + * LIBXML_VERSION_EXTRA: + * + * extra version information, used to show a CVS compilation + */ +#define LIBXML_VERSION_EXTRA "-GITv2.9.9-rc2-1-g6fc04d71" + +/** + * LIBXML_TEST_VERSION: + * + * Macro to check that the libxml version in use is compatible with + * the version the software has been compiled against + */ +#define LIBXML_TEST_VERSION xmlCheckVersion(20908); + +#ifndef VMS +#if 0 +/** + * WITH_TRIO: + * + * defined if the trio support need to be configured in + */ +#define WITH_TRIO +#else +/** + * WITHOUT_TRIO: + * + * defined if the trio support should not be configured in + */ +#define WITHOUT_TRIO +#endif +#else /* VMS */ +/** + * WITH_TRIO: + * + * defined if the trio support need to be configured in + */ +#define WITH_TRIO 1 +#endif /* VMS */ + +/** + * LIBXML_THREAD_ENABLED: + * + * Whether the thread support is configured in + */ +#define LIBXML_THREAD_ENABLED 1 + +/** + * LIBXML_THREAD_ALLOC_ENABLED: + * + * Whether the allocation hooks are per-thread + */ +#if 0 +#define LIBXML_THREAD_ALLOC_ENABLED +#endif + +/** + * LIBXML_TREE_ENABLED: + * + * Whether the DOM like tree manipulation API support is configured in + */ +#if 1 +#define LIBXML_TREE_ENABLED +#endif + +/** + * LIBXML_OUTPUT_ENABLED: + * + * Whether the serialization/saving support is configured in + */ +#if 1 +#define LIBXML_OUTPUT_ENABLED +#endif + +/** + * LIBXML_PUSH_ENABLED: + * + * Whether the push parsing interfaces are configured in + */ +#if 1 +#define LIBXML_PUSH_ENABLED +#endif + +/** + * LIBXML_READER_ENABLED: + * + * Whether the xmlReader parsing interface is configured in + */ +#if 1 +#define LIBXML_READER_ENABLED +#endif + +/** + * LIBXML_PATTERN_ENABLED: + * + * Whether the xmlPattern node selection interface is configured in + */ +#if 1 +#define LIBXML_PATTERN_ENABLED +#endif + +/** + * LIBXML_WRITER_ENABLED: + * + * Whether the xmlWriter saving interface is configured in + */ +#if 1 +#define LIBXML_WRITER_ENABLED +#endif + +/** + * LIBXML_SAX1_ENABLED: + * + * Whether the older SAX1 interface is configured in + */ +#if 1 +#define LIBXML_SAX1_ENABLED +#endif + +/** + * LIBXML_FTP_ENABLED: + * + * Whether the FTP support is configured in + */ +#if 1 +#define LIBXML_FTP_ENABLED +#endif + +/** + * LIBXML_HTTP_ENABLED: + * + * Whether the HTTP support is configured in + */ +#if 1 +#define LIBXML_HTTP_ENABLED +#endif + +/** + * LIBXML_VALID_ENABLED: + * + * Whether the DTD validation support is configured in + */ +#if 1 +#define LIBXML_VALID_ENABLED +#endif + +/** + * LIBXML_HTML_ENABLED: + * + * Whether the HTML support is configured in + */ +#if 1 +#define LIBXML_HTML_ENABLED +#endif + +/** + * LIBXML_LEGACY_ENABLED: + * + * Whether the deprecated APIs are compiled in for compatibility + */ +#if 1 +#define LIBXML_LEGACY_ENABLED +#endif + +/** + * LIBXML_C14N_ENABLED: + * + * Whether the Canonicalization support is configured in + */ +#if 1 +#define LIBXML_C14N_ENABLED +#endif + +/** + * LIBXML_CATALOG_ENABLED: + * + * Whether the Catalog support is configured in + */ +#if 1 +#define LIBXML_CATALOG_ENABLED +#endif + +/** + * LIBXML_DOCB_ENABLED: + * + * Whether the SGML Docbook support is configured in + */ +#if 1 +#define LIBXML_DOCB_ENABLED +#endif + +/** + * LIBXML_XPATH_ENABLED: + * + * Whether XPath is configured in + */ +#if 1 +#define LIBXML_XPATH_ENABLED +#endif + +/** + * LIBXML_XPTR_ENABLED: + * + * Whether XPointer is configured in + */ +#if 1 +#define LIBXML_XPTR_ENABLED +#endif + +/** + * LIBXML_XINCLUDE_ENABLED: + * + * Whether XInclude is configured in + */ +#if 1 +#define LIBXML_XINCLUDE_ENABLED +#endif + +/** + * LIBXML_ICONV_ENABLED: + * + * Whether iconv support is available + */ +#if 1 +#define LIBXML_ICONV_ENABLED +#endif + +/** + * LIBXML_ICU_ENABLED: + * + * Whether icu support is available + */ +#if 0 +#define LIBXML_ICU_ENABLED +#endif + +/** + * LIBXML_ISO8859X_ENABLED: + * + * Whether ISO-8859-* support is made available in case iconv is not + */ +#if 1 +#define LIBXML_ISO8859X_ENABLED +#endif + +/** + * LIBXML_DEBUG_ENABLED: + * + * Whether Debugging module is configured in + */ +#if 1 +#define LIBXML_DEBUG_ENABLED +#endif + +/** + * DEBUG_MEMORY_LOCATION: + * + * Whether the memory debugging is configured in + */ +#if 0 +#define DEBUG_MEMORY_LOCATION +#endif + +/** + * LIBXML_DEBUG_RUNTIME: + * + * Whether the runtime debugging is configured in + */ +#if 0 +#define LIBXML_DEBUG_RUNTIME +#endif + +/** + * LIBXML_UNICODE_ENABLED: + * + * Whether the Unicode related interfaces are compiled in + */ +#if 1 +#define LIBXML_UNICODE_ENABLED +#endif + +/** + * LIBXML_REGEXP_ENABLED: + * + * Whether the regular expressions interfaces are compiled in + */ +#if 1 +#define LIBXML_REGEXP_ENABLED +#endif + +/** + * LIBXML_AUTOMATA_ENABLED: + * + * Whether the automata interfaces are compiled in + */ +#if 1 +#define LIBXML_AUTOMATA_ENABLED +#endif + +/** + * LIBXML_EXPR_ENABLED: + * + * Whether the formal expressions interfaces are compiled in + */ +#if 1 +#define LIBXML_EXPR_ENABLED +#endif + +/** + * LIBXML_SCHEMAS_ENABLED: + * + * Whether the Schemas validation interfaces are compiled in + */ +#if 1 +#define LIBXML_SCHEMAS_ENABLED +#endif + +/** + * LIBXML_SCHEMATRON_ENABLED: + * + * Whether the Schematron validation interfaces are compiled in + */ +#if 1 +#define LIBXML_SCHEMATRON_ENABLED +#endif + +/** + * LIBXML_MODULES_ENABLED: + * + * Whether the module interfaces are compiled in + */ +#if 1 +#define LIBXML_MODULES_ENABLED +/** + * LIBXML_MODULE_EXTENSION: + * + * the string suffix used by dynamic modules (usually shared libraries) + */ +#define LIBXML_MODULE_EXTENSION ".so" +#endif + +/** + * LIBXML_ZLIB_ENABLED: + * + * Whether the Zlib support is compiled in + */ +#if 1 +#define LIBXML_ZLIB_ENABLED +#endif + +/** + * LIBXML_LZMA_ENABLED: + * + * Whether the Lzma support is compiled in + */ +#if 0 +#define LIBXML_LZMA_ENABLED +#endif + +#ifdef __GNUC__ + +/** + * ATTRIBUTE_UNUSED: + * + * Macro used to signal to GCC unused function parameters + */ + +#ifndef ATTRIBUTE_UNUSED +# if ((__GNUC__ > 2) || ((__GNUC__ == 2) && (__GNUC_MINOR__ >= 7))) +# define ATTRIBUTE_UNUSED __attribute__((unused)) +# else +# define ATTRIBUTE_UNUSED +# endif +#endif + +/** + * LIBXML_ATTR_ALLOC_SIZE: + * + * Macro used to indicate to GCC this is an allocator function + */ + +#ifndef LIBXML_ATTR_ALLOC_SIZE +# if (!defined(__clang__) && ((__GNUC__ > 4) || ((__GNUC__ == 4) && (__GNUC_MINOR__ >= 3)))) +# define LIBXML_ATTR_ALLOC_SIZE(x) __attribute__((alloc_size(x))) +# else +# define LIBXML_ATTR_ALLOC_SIZE(x) +# endif +#else +# define LIBXML_ATTR_ALLOC_SIZE(x) +#endif + +/** + * LIBXML_ATTR_FORMAT: + * + * Macro used to indicate to GCC the parameter are printf like + */ + +#ifndef LIBXML_ATTR_FORMAT +# if ((__GNUC__ > 3) || ((__GNUC__ == 3) && (__GNUC_MINOR__ >= 3))) +# define LIBXML_ATTR_FORMAT(fmt,args) __attribute__((__format__(__printf__,fmt,args))) +# else +# define LIBXML_ATTR_FORMAT(fmt,args) +# endif +#else +# define LIBXML_ATTR_FORMAT(fmt,args) +#endif + +#else /* ! __GNUC__ */ +/** + * ATTRIBUTE_UNUSED: + * + * Macro used to signal to GCC unused function parameters + */ +#define ATTRIBUTE_UNUSED +/** + * LIBXML_ATTR_ALLOC_SIZE: + * + * Macro used to indicate to GCC this is an allocator function + */ +#define LIBXML_ATTR_ALLOC_SIZE(x) +/** + * LIBXML_ATTR_FORMAT: + * + * Macro used to indicate to GCC the parameter are printf like + */ +#define LIBXML_ATTR_FORMAT(fmt,args) +#endif /* __GNUC__ */ + +#ifdef __cplusplus +} +#endif /* __cplusplus */ +#endif + + diff --git a/dbms/CMakeLists.txt b/dbms/CMakeLists.txt index 101b27d5357..f285a65356a 100644 --- a/dbms/CMakeLists.txt +++ b/dbms/CMakeLists.txt @@ -173,7 +173,6 @@ target_link_libraries (clickhouse_common_io PRIVATE apple_rt ${CMAKE_DL_LIBS} - ${HDFS3_LIBRARY} ) target_link_libraries (dbms @@ -264,7 +263,11 @@ target_link_libraries(dbms PRIVATE ${OPENSSL_CRYPTO_LIBRARY} Threads::Threads) target_include_directories (dbms SYSTEM BEFORE PRIVATE ${DIVIDE_INCLUDE_DIR}) target_include_directories (dbms SYSTEM BEFORE PRIVATE ${SPARCEHASH_INCLUDE_DIR}) -target_include_directories (dbms SYSTEM BEFORE PRIVATE ${HDFS3_INCLUDE_DIR}) + +if (USE_HDFS) + target_link_libraries (dbms PRIVATE ${HDFS3_LIBRARY}) + target_include_directories (dbms SYSTEM BEFORE PRIVATE ${HDFS3_INCLUDE_DIR}) +endif() if (NOT USE_INTERNAL_LZ4_LIBRARY) target_include_directories (dbms SYSTEM BEFORE PRIVATE ${LZ4_INCLUDE_DIR}) diff --git a/dbms/programs/client/Client.cpp b/dbms/programs/client/Client.cpp index 37887d78c71..1b7e48ac55f 100644 --- a/dbms/programs/client/Client.cpp +++ b/dbms/programs/client/Client.cpp @@ -31,7 +31,6 @@ #include #include #include -#include #include #include #include @@ -785,11 +784,8 @@ private: const ASTInsertQuery * insert = typeid_cast(&*parsed_query); connection->forceConnected(); -#if ENABLE_INSERT_INFILE - if (insert && !insert->select && !insert->in_file) -#else + if (insert && !insert->select) -#endif processInsertQuery(); else processOrdinaryQuery(); diff --git a/dbms/src/Common/config.h.in b/dbms/src/Common/config.h.in index a06970384e4..302fc33c6b4 100644 --- a/dbms/src/Common/config.h.in +++ b/dbms/src/Common/config.h.in @@ -15,5 +15,5 @@ #cmakedefine01 USE_POCO_MONGODB #cmakedefine01 USE_POCO_NETSSL #cmakedefine01 CLICKHOUSE_SPLIT_BINARY -#cmakedefine01 ENABLE_INSERT_INFILE #cmakedefine01 USE_BASE64 +#cmakedefine01 USE_HDFS diff --git a/dbms/src/IO/ReadBufferFromHDFS.h b/dbms/src/IO/ReadBufferFromHDFS.h index 8010e60ff26..21705c1ef30 100644 --- a/dbms/src/IO/ReadBufferFromHDFS.h +++ b/dbms/src/IO/ReadBufferFromHDFS.h @@ -1,4 +1,8 @@ #pragma once + +#include + +#if USE_HDFS #include #include #include @@ -89,3 +93,4 @@ namespace DB } }; } +#endif diff --git a/dbms/src/Interpreters/InterpreterInsertQuery.cpp b/dbms/src/Interpreters/InterpreterInsertQuery.cpp index 6425fe2e579..444e4eed7aa 100644 --- a/dbms/src/Interpreters/InterpreterInsertQuery.cpp +++ b/dbms/src/Interpreters/InterpreterInsertQuery.cpp @@ -1,7 +1,6 @@ #include #include -#include #include #include @@ -10,23 +9,15 @@ #include #include #include -#include -#include - #include #include -#include #include #include #include #include -#include -#include - -#include namespace DB @@ -37,7 +28,6 @@ namespace ErrorCodes extern const int NO_SUCH_COLUMN_IN_TABLE; extern const int READONLY; extern const int ILLEGAL_COLUMN; - extern const int BAD_ARGUMETNS; } @@ -151,84 +141,6 @@ BlockIO InterpreterInsertQuery::execute() throw Exception("Cannot insert column " + name_type.name + ", because it is MATERIALIZED column.", ErrorCodes::ILLEGAL_COLUMN); } } -#if ENABLE_INSERT_INFILE - else if (query.in_file) - { - // read data stream from in_file, and copy it to out - // Special handling in_file based on url type: - String uristr = typeid_cast(*query.in_file).value.safeGet(); - // create Datastream based on Format: - String format = query.format; - if (format.empty()) - format = context.getDefaultFormat(); - - auto & settings = context.getSettingsRef(); - - // Assume no query and fragment in uri, todo, add sanity check - String fuzzyFileNames; - String uriPrefix = uristr.substr(0, uristr.find_last_of('/')); - if (uriPrefix.length() == uristr.length()) - { - fuzzyFileNames = uristr; - uriPrefix.clear(); - } - else - { - uriPrefix += "/"; - fuzzyFileNames = uristr.substr(uriPrefix.length()); - } - - Poco::URI uri(uriPrefix); - String scheme = uri.getScheme(); - - std::vector fuzzyNameList = parseDescription(fuzzyFileNames, 0, fuzzyFileNames.length(), ',', 100/* hard coded max files */); - - //std::vector > fileNames; - - //for (const auto & fuzzyName : fuzzyNameList) - // fileNames.push_back(parseDescription(fuzzyName, 0, fuzzyName.length(), '|', 100)); - - BlockInputStreams inputs; - - for (const auto & name : fuzzyNameList) - { - std::unique_ptr read_buf; - - if (scheme.empty() || scheme == "file") - { - read_buf = std::make_unique(Poco::URI(uriPrefix + name).getPath()); - } - else if (scheme == "hdfs") - { - read_buf = std::make_unique(uriPrefix + name); - } - else - { - throw Exception("URI scheme " + scheme + " is not supported with insert statement yet", ErrorCodes::BAD_ARGUMENTS); - } - - inputs.emplace_back( - std::make_shared>( - context.getInputFormat(format, *read_buf, - res.out->getHeader(), // sample_block - settings.max_insert_block_size), - std::move(read_buf))); - } - - if (inputs.size() == 0) - throw Exception("Inputs interpreter error", ErrorCodes::BAD_ARGUMENTS); - - auto stream = inputs[0]; - if (inputs.size() > 1) - { - stream = std::make_shared >(inputs, nullptr, settings.max_distributed_connections); - } - - res.in = std::make_shared(stream, res.out); - - res.out = nullptr; - } -#endif return res; } diff --git a/dbms/src/Parsers/ASTInsertQuery.cpp b/dbms/src/Parsers/ASTInsertQuery.cpp index 0f5b6a727c9..1ecf4f9daef 100644 --- a/dbms/src/Parsers/ASTInsertQuery.cpp +++ b/dbms/src/Parsers/ASTInsertQuery.cpp @@ -1,6 +1,5 @@ #include #include -#include namespace DB @@ -37,27 +36,10 @@ void ASTInsertQuery::formatImpl(const FormatSettings & settings, FormatState & s if (!format.empty()) { settings.ostr << (settings.hilite ? hilite_keyword : "") << " FORMAT " << (settings.hilite ? hilite_none : "") << format; -#if ENABLE_INSERT_INFILE - if (in_file) - { - settings.ostr << (settings.hilite ? hilite_keyword : "") << " INFILE " << (settings.hilite ? hilite_none : ""); - in_file->formatImpl(settings, state, frame); - } -#endif } else { -#if ENABLE_INSERT_INFILE - if (in_file) - { - settings.ostr << (settings.hilite ? hilite_keyword : "") << " INFILE " << (settings.hilite ? hilite_none : ""); - in_file->formatImpl(settings, state, frame); - } - else -#endif - { - settings.ostr << (settings.hilite ? hilite_keyword : "") << " VALUES" << (settings.hilite ? hilite_none : ""); - } + settings.ostr << (settings.hilite ? hilite_keyword : "") << " VALUES" << (settings.hilite ? hilite_none : ""); } } } diff --git a/dbms/src/Parsers/ASTInsertQuery.h b/dbms/src/Parsers/ASTInsertQuery.h index 6803da0c0a8..9da68ca21c8 100644 --- a/dbms/src/Parsers/ASTInsertQuery.h +++ b/dbms/src/Parsers/ASTInsertQuery.h @@ -1,7 +1,6 @@ #pragma once #include -#include namespace DB @@ -20,10 +19,6 @@ public: ASTPtr select; ASTPtr table_function; -#if ENABLE_INSERT_INFILE - ASTPtr in_file; -#endif - // Set to true if the data should only be inserted into attached views bool no_destination = false; @@ -45,12 +40,7 @@ public: { res->table_function = table_function->clone(); res->children.push_back(res->table_function); } -#if ENABLE_INSERT_INFILE - if (in_file) - { - res->in_file = in_file->clone(); res->children.push_back(res->in_file); - } -#endif + return res; } diff --git a/dbms/src/Parsers/ParserInsertQuery.cpp b/dbms/src/Parsers/ParserInsertQuery.cpp index df0d2ab6afa..73aca09c210 100644 --- a/dbms/src/Parsers/ParserInsertQuery.cpp +++ b/dbms/src/Parsers/ParserInsertQuery.cpp @@ -10,7 +10,6 @@ #include #include -#include namespace DB @@ -44,11 +43,6 @@ bool ParserInsertQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) ASTPtr format; ASTPtr select; ASTPtr table_function; - -#if ENABLE_INSERT_INFILE - ParserKeyword s_infile("INFILE"); - ASTPtr in_file; -#endif /// Insertion data const char * data = nullptr; @@ -87,7 +81,7 @@ bool ParserInsertQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) Pos before_select = pos; - /// VALUES or FORMAT or SELECT or INFILE + /// VALUES or FORMAT or SELECT if (s_values.ignore(pos, expected)) { data = pos->begin; @@ -99,45 +93,28 @@ bool ParserInsertQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) if (!name_p.parse(pos, format, expected)) return false; -#if ENABLE_INSERT_INFILE - // there are two case after FORMAT xx: - // case 1: data_set. - // case 2: INFILE xx clause. - if (s_infile.ignore(pos, expected)) - { - ParserStringLiteral in_file_p; + data = name_pos->end; - if (!in_file_p.parse(pos, in_file, expected)) - return false; + if (data < end && *data == ';') + throw Exception("You have excessive ';' symbol before data for INSERT.\n" + "Example:\n\n" + "INSERT INTO t (x, y) FORMAT TabSeparated\n" + ";\tHello\n" + "2\tWorld\n" + "\n" + "Note that there is no ';' just after format name, " + "you need to put at least one whitespace symbol before the data.", ErrorCodes::SYNTAX_ERROR); - } - else - { -#endif - data = name_pos->end; - if (data < end && *data == ';') - throw Exception("You have excessive ';' symbol before data for INSERT.\n" - "Example:\n\n" - "INSERT INTO t (x, y) FORMAT TabSeparated\n" - ";\tHello\n" - "2\tWorld\n" - "\n" - "Note that there is no ';' just after format name, " - "you need to put at least one whitespace symbol before the data.", ErrorCodes::SYNTAX_ERROR); + while (data < end && (*data == ' ' || *data == '\t' || *data == '\f')) + ++data; - while (data < end && (*data == ' ' || *data == '\t' || *data == '\f')) - ++data; + /// Data starts after the first newline, if there is one, or after all the whitespace characters, otherwise. - /// Data starts after the first newline, if there is one, or after all the whitespace characters, otherwise. + if (data < end && *data == '\r') + ++data; - if (data < end && *data == '\r') - ++data; - - if (data < end && *data == '\n') - ++data; -#if ENABLE_INSERT_INFILE - } -#endif + if (data < end && *data == '\n') + ++data; } else if (s_select.ignore(pos, expected) || s_with.ignore(pos,expected)) { @@ -145,14 +122,6 @@ bool ParserInsertQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) ParserSelectWithUnionQuery select_p; select_p.parse(pos, select, expected); } -#if ENABLE_INSERT_INFILE - else if (s_infile.ignore(pos, expected)) - { - ParserStringLiteral in_file_p; - if (!in_file_p.parse(pos, in_file, expected)) - return false; - } -#endif else { return false; @@ -178,24 +147,13 @@ bool ParserInsertQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) query->columns = columns; query->select = select; - -#if ENABLE_INSERT_INFILE - query->in_file = in_file; - if (query->in_file) - query->data = nullptr; - else -#endif - query->data = data != end ? data : nullptr; + query->data = data != end ? data : nullptr; query->end = end; if (columns) query->children.push_back(columns); if (select) query->children.push_back(select); -#if ENABLE_INSERT_INFILE - if (in_file) - query->children.push_back(in_file); -#endif return true; } diff --git a/dbms/src/Parsers/ParserInsertQuery.h b/dbms/src/Parsers/ParserInsertQuery.h index 913649210b9..86198365edc 100644 --- a/dbms/src/Parsers/ParserInsertQuery.h +++ b/dbms/src/Parsers/ParserInsertQuery.h @@ -9,22 +9,18 @@ namespace DB /** Cases: * - * #1 Normal case: + * Normal case: * INSERT INTO [db.]table (c1, c2, c3) VALUES (v11, v12, v13), (v21, v22, v23), ... * INSERT INTO [db.]table VALUES (v11, v12, v13), (v21, v22, v23), ... * - * #2 Insert of data in an arbitrary format. + * Insert of data in an arbitrary format. * The data itself comes after LF(line feed), if it exists, or after all the whitespace characters, otherwise. * INSERT INTO [db.]table (c1, c2, c3) FORMAT format \n ... * INSERT INTO [db.]table FORMAT format \n ... * - * #3 Insert the result of the SELECT query. + * Insert the result of the SELECT query. * INSERT INTO [db.]table (c1, c2, c3) SELECT ... * INSERT INTO [db.]table SELECT ... - - * This syntax is controversial, not open for now. - * #4 Insert of data in an arbitrary form from file(a bit variant of #2) - * INSERT INTO [db.]table (c1, c2, c3) FORMAT format INFILE 'url' */ class ParserInsertQuery : public IParserBase { diff --git a/dbms/src/Storages/StorageHDFS.cpp b/dbms/src/Storages/StorageHDFS.cpp index 4eb45fd4e76..03c9626a582 100644 --- a/dbms/src/Storages/StorageHDFS.cpp +++ b/dbms/src/Storages/StorageHDFS.cpp @@ -1,3 +1,7 @@ +#include + +#if USE_HDFS + #include #include #include @@ -11,9 +15,10 @@ #include #include #include -#include +#include #include + namespace DB { namespace ErrorCodes @@ -59,16 +64,10 @@ namespace fuzzyFileNames = uri.substr(uriPrefix.length()); } - std::vector fuzzyNameList = parseDescription(fuzzyFileNames, 0, fuzzyFileNames.length(), ',' , 100/* hard coded max files */); - - // Don't support | for globs compatible - //std::vector > fileNames; - //for(auto fuzzyName : fuzzyNameList) - // fileNames.push_back(parseDescription(fuzzyName, 0, fuzzyName.length(), '|', 100)); + std::vector fuzzyNameList = parseRemoteDescription(fuzzyFileNames, 0, fuzzyFileNames.length(), ',' , 100/* hard coded max files */); BlockInputStreams inputs; - //for (auto & vecNames : fileNames) for (auto & name: fuzzyNameList) { std::unique_ptr read_buf = std::make_unique(uriPrefix + name); @@ -176,3 +175,5 @@ void registerStorageHDFS(StorageFactory & factory) } } + +#endif diff --git a/dbms/src/Storages/StorageHDFS.h b/dbms/src/Storages/StorageHDFS.h index 81c0f5c93a1..44ff23c4d67 100644 --- a/dbms/src/Storages/StorageHDFS.h +++ b/dbms/src/Storages/StorageHDFS.h @@ -1,4 +1,6 @@ #pragma once +#include +#if USE_HDFS #include #include @@ -50,3 +52,5 @@ private: Logger * log = &Logger::get("StorageHDFS"); }; } + +#endif diff --git a/dbms/src/Storages/registerStorages.cpp b/dbms/src/Storages/registerStorages.cpp index ce831fbb758..4bd2d995104 100644 --- a/dbms/src/Storages/registerStorages.cpp +++ b/dbms/src/Storages/registerStorages.cpp @@ -24,6 +24,10 @@ void registerStorageJoin(StorageFactory & factory); void registerStorageView(StorageFactory & factory); void registerStorageMaterializedView(StorageFactory & factory); +#if USE_HDFS +void registerStorageHDFS(StorageFactory & factory); +#endif + #if USE_POCO_SQLODBC || USE_POCO_DATAODBC void registerStorageODBC(StorageFactory & factory); #endif @@ -60,6 +64,10 @@ void registerStorages() registerStorageView(factory); registerStorageMaterializedView(factory); + #if USE_HDFS + registerStorageHDFS(factory); + #endif + #if USE_POCO_SQLODBC || USE_POCO_DATAODBC registerStorageODBC(factory); #endif diff --git a/dbms/src/TableFunctions/ITableFunction.cpp b/dbms/src/TableFunctions/ITableFunction.cpp index b1d84a5ecac..b15cbbc9fd9 100644 --- a/dbms/src/TableFunctions/ITableFunction.cpp +++ b/dbms/src/TableFunctions/ITableFunction.cpp @@ -1,7 +1,5 @@ #include #include -#include -#include namespace ProfileEvents @@ -18,168 +16,4 @@ StoragePtr ITableFunction::execute(const ASTPtr & ast_function, const Context & return executeImpl(ast_function, context); } -namespace ErrorCodes -{ - extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; - extern const int BAD_ARGUMENTS; -} - -/// The Cartesian product of two sets of rows, the result is written in place of the first argument -static void append(std::vector & to, const std::vector & what, size_t max_addresses) -{ - if (what.empty()) - return; - - if (to.empty()) - { - to = what; - return; - } - - if (what.size() * to.size() > max_addresses) - throw Exception("Table function 'remote': first argument generates too many result addresses", - ErrorCodes::BAD_ARGUMENTS); - std::vector res; - for (size_t i = 0; i < to.size(); ++i) - for (size_t j = 0; j < what.size(); ++j) - res.push_back(to[i] + what[j]); - - to.swap(res); -} - - -/// Parse number from substring -static bool parseNumber(const String & description, size_t l, size_t r, size_t & res) -{ - res = 0; - for (size_t pos = l; pos < r; pos ++) - { - if (!isNumericASCII(description[pos])) - return false; - res = res * 10 + description[pos] - '0'; - if (res > 1e15) - return false; - } - return true; -} - - - -/* Parse a string that generates shards and replicas. Separator - one of two characters | or , - * depending on whether shards or replicas are generated. - * For example: - * host1,host2,... - generates set of shards from host1, host2, ... - * host1|host2|... - generates set of replicas from host1, host2, ... - * abc{8..10}def - generates set of shards abc8def, abc9def, abc10def. - * abc{08..10}def - generates set of shards abc08def, abc09def, abc10def. - * abc{x,yy,z}def - generates set of shards abcxdef, abcyydef, abczdef. - * abc{x|yy|z} def - generates set of replicas abcxdef, abcyydef, abczdef. - * abc{1..9}de{f,g,h} - is a direct product, 27 shards. - * abc{1..9}de{0|1} - is a direct product, 9 shards, in each 2 replicas. - */ -std::vector parseDescription(const String & description, size_t l, size_t r, char separator, size_t max_addresses) -{ - std::vector res; - std::vector cur; - - /// An empty substring means a set of an empty string - if (l >= r) - { - res.push_back(""); - return res; - } - - for (size_t i = l; i < r; ++i) - { - /// Either the numeric interval (8..10) or equivalent expression in brackets - if (description[i] == '{') - { - int cnt = 1; - int last_dot = -1; /// The rightmost pair of points, remember the index of the right of the two - size_t m; - std::vector buffer; - bool have_splitter = false; - - /// Look for the corresponding closing bracket - for (m = i + 1; m < r; ++m) - { - if (description[m] == '{') ++cnt; - if (description[m] == '}') --cnt; - if (description[m] == '.' && description[m-1] == '.') last_dot = m; - if (description[m] == separator) have_splitter = true; - if (cnt == 0) break; - } - if (cnt != 0) - throw Exception("Table function 'remote': incorrect brace sequence in first argument", - ErrorCodes::BAD_ARGUMENTS); - /// The presence of a dot - numeric interval - if (last_dot != -1) - { - size_t left, right; - if (description[last_dot - 1] != '.') - throw Exception("Table function 'remote': incorrect argument in braces (only one dot): " + description.substr(i, m - i + 1), - ErrorCodes::BAD_ARGUMENTS); - if (!parseNumber(description, i + 1, last_dot - 1, left)) - throw Exception("Table function 'remote': incorrect argument in braces (Incorrect left number): " - + description.substr(i, m - i + 1), - ErrorCodes::BAD_ARGUMENTS); - if (!parseNumber(description, last_dot + 1, m, right)) - throw Exception("Table function 'remote': incorrect argument in braces (Incorrect right number): " - + description.substr(i, m - i + 1), - ErrorCodes::BAD_ARGUMENTS); - if (left > right) - throw Exception("Table function 'remote': incorrect argument in braces (left number is greater then right): " - + description.substr(i, m - i + 1), - ErrorCodes::BAD_ARGUMENTS); - if (right - left + 1 > max_addresses) - throw Exception("Table function 'remote': first argument generates too many result addresses", - ErrorCodes::BAD_ARGUMENTS); - bool add_leading_zeroes = false; - size_t len = last_dot - 1 - (i + 1); - /// If the left and right borders have equal numbers, then you must add leading zeros. - if (last_dot - 1 - (i + 1) == m - (last_dot + 1)) - add_leading_zeroes = true; - for (size_t id = left; id <= right; ++id) - { - String cur = toString(id); - if (add_leading_zeroes) - { - while (cur.size() < len) - cur = "0" + cur; - } - buffer.push_back(cur); - } - } - else if (have_splitter) /// If there is a current delimiter inside, then generate a set of resulting rows - buffer = parseDescription(description, i + 1, m, separator, max_addresses); - else /// Otherwise just copy, spawn will occur when you call with the correct delimiter - buffer.push_back(description.substr(i, m - i + 1)); - /// Add all possible received extensions to the current set of lines - append(cur, buffer, max_addresses); - i = m; - } - else if (description[i] == separator) - { - /// If the delimiter, then add found rows - res.insert(res.end(), cur.begin(), cur.end()); - cur.clear(); - } - else - { - /// Otherwise, simply append the character to current lines - std::vector buffer; - buffer.push_back(description.substr(i, 1)); - append(cur, buffer, max_addresses); - } - } - - res.insert(res.end(), cur.begin(), cur.end()); - if (res.size() > max_addresses) - throw Exception("Table function 'remote': first argument generates too many result addresses", - ErrorCodes::BAD_ARGUMENTS); - - return res; -} - - } diff --git a/dbms/src/TableFunctions/ITableFunction.h b/dbms/src/TableFunctions/ITableFunction.h index 52f72ec74e8..ddf900fa65c 100644 --- a/dbms/src/TableFunctions/ITableFunction.h +++ b/dbms/src/TableFunctions/ITableFunction.h @@ -2,9 +2,6 @@ #include #include -#include -#include - namespace DB { @@ -44,6 +41,5 @@ private: using TableFunctionPtr = std::shared_ptr; -std::vector parseDescription(const String & description, size_t l, size_t r, char separator, size_t max_addresses); } diff --git a/dbms/src/TableFunctions/TableFunctionHDFS.cpp b/dbms/src/TableFunctions/TableFunctionHDFS.cpp index 5f2efd3b8fc..9c09ad9313c 100644 --- a/dbms/src/TableFunctions/TableFunctionHDFS.cpp +++ b/dbms/src/TableFunctions/TableFunctionHDFS.cpp @@ -1,3 +1,6 @@ +#include + +#if USE_HDFS #include #include #include @@ -19,3 +22,4 @@ void registerTableFunctionHDFS(TableFunctionFactory & factory) factory.registerFunction(); } } +#endif diff --git a/dbms/src/TableFunctions/TableFunctionHDFS.h b/dbms/src/TableFunctions/TableFunctionHDFS.h index 79e8c74ccfc..8033034deb8 100644 --- a/dbms/src/TableFunctions/TableFunctionHDFS.h +++ b/dbms/src/TableFunctions/TableFunctionHDFS.h @@ -1,5 +1,9 @@ #pragma once +#include + +#if USE_HDFS + #include #include #include @@ -24,3 +28,5 @@ private: const String & source, const String & format, const Block & sample_block, Context & global_context) const override; }; } + +#endif diff --git a/dbms/src/TableFunctions/TableFunctionRemote.cpp b/dbms/src/TableFunctions/TableFunctionRemote.cpp index d29ffcc7b4f..b93a1638d48 100644 --- a/dbms/src/TableFunctions/TableFunctionRemote.cpp +++ b/dbms/src/TableFunctions/TableFunctionRemote.cpp @@ -11,6 +11,7 @@ #include #include +#include namespace DB @@ -145,11 +146,11 @@ StoragePtr TableFunctionRemote::executeImpl(const ASTPtr & ast_function, const C { /// Create new cluster from the scratch size_t max_addresses = context.getSettingsRef().table_function_remote_max_addresses; - std::vector shards = parseDescription(cluster_description, 0, cluster_description.size(), ',', max_addresses); + std::vector shards = parseRemoteDescription(cluster_description, 0, cluster_description.size(), ',', max_addresses); std::vector> names; for (size_t i = 0; i < shards.size(); ++i) - names.push_back(parseDescription(shards[i], 0, shards[i].size(), '|', max_addresses)); + names.push_back(parseRemoteDescription(shards[i], 0, shards[i].size(), '|', max_addresses)); if (names.empty()) throw Exception("Shard list is empty after parsing first argument", ErrorCodes::BAD_ARGUMENTS); diff --git a/dbms/src/TableFunctions/parseRemoteDescription.cpp b/dbms/src/TableFunctions/parseRemoteDescription.cpp new file mode 100644 index 00000000000..d903fe72f03 --- /dev/null +++ b/dbms/src/TableFunctions/parseRemoteDescription.cpp @@ -0,0 +1,171 @@ +#include +#include +#include + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; + extern const int BAD_ARGUMENTS; +} + +/// The Cartesian product of two sets of rows, the result is written in place of the first argument +static void append(std::vector & to, const std::vector & what, size_t max_addresses) +{ + if (what.empty()) + return; + + if (to.empty()) + { + to = what; + return; + } + + if (what.size() * to.size() > max_addresses) + throw Exception("Table function 'remote': first argument generates too many result addresses", + ErrorCodes::BAD_ARGUMENTS); + std::vector res; + for (size_t i = 0; i < to.size(); ++i) + for (size_t j = 0; j < what.size(); ++j) + res.push_back(to[i] + what[j]); + + to.swap(res); +} + + +/// Parse number from substring +static bool parseNumber(const String & description, size_t l, size_t r, size_t & res) +{ + res = 0; + for (size_t pos = l; pos < r; pos ++) + { + if (!isNumericASCII(description[pos])) + return false; + res = res * 10 + description[pos] - '0'; + if (res > 1e15) + return false; + } + return true; +} + + + +/* Parse a string that generates shards and replicas. Separator - one of two characters | or , + * depending on whether shards or replicas are generated. + * For example: + * host1,host2,... - generates set of shards from host1, host2, ... + * host1|host2|... - generates set of replicas from host1, host2, ... + * abc{8..10}def - generates set of shards abc8def, abc9def, abc10def. + * abc{08..10}def - generates set of shards abc08def, abc09def, abc10def. + * abc{x,yy,z}def - generates set of shards abcxdef, abcyydef, abczdef. + * abc{x|yy|z} def - generates set of replicas abcxdef, abcyydef, abczdef. + * abc{1..9}de{f,g,h} - is a direct product, 27 shards. + * abc{1..9}de{0|1} - is a direct product, 9 shards, in each 2 replicas. + */ +std::vector parseRemoteDescription(const String & description, size_t l, size_t r, char separator, size_t max_addresses) +{ + std::vector res; + std::vector cur; + + /// An empty substring means a set of an empty string + if (l >= r) + { + res.push_back(""); + return res; + } + + for (size_t i = l; i < r; ++i) + { + /// Either the numeric interval (8..10) or equivalent expression in brackets + if (description[i] == '{') + { + int cnt = 1; + int last_dot = -1; /// The rightmost pair of points, remember the index of the right of the two + size_t m; + std::vector buffer; + bool have_splitter = false; + + /// Look for the corresponding closing bracket + for (m = i + 1; m < r; ++m) + { + if (description[m] == '{') ++cnt; + if (description[m] == '}') --cnt; + if (description[m] == '.' && description[m-1] == '.') last_dot = m; + if (description[m] == separator) have_splitter = true; + if (cnt == 0) break; + } + if (cnt != 0) + throw Exception("Table function 'remote': incorrect brace sequence in first argument", + ErrorCodes::BAD_ARGUMENTS); + /// The presence of a dot - numeric interval + if (last_dot != -1) + { + size_t left, right; + if (description[last_dot - 1] != '.') + throw Exception("Table function 'remote': incorrect argument in braces (only one dot): " + description.substr(i, m - i + 1), + ErrorCodes::BAD_ARGUMENTS); + if (!parseNumber(description, i + 1, last_dot - 1, left)) + throw Exception("Table function 'remote': incorrect argument in braces (Incorrect left number): " + + description.substr(i, m - i + 1), + ErrorCodes::BAD_ARGUMENTS); + if (!parseNumber(description, last_dot + 1, m, right)) + throw Exception("Table function 'remote': incorrect argument in braces (Incorrect right number): " + + description.substr(i, m - i + 1), + ErrorCodes::BAD_ARGUMENTS); + if (left > right) + throw Exception("Table function 'remote': incorrect argument in braces (left number is greater then right): " + + description.substr(i, m - i + 1), + ErrorCodes::BAD_ARGUMENTS); + if (right - left + 1 > max_addresses) + throw Exception("Table function 'remote': first argument generates too many result addresses", + ErrorCodes::BAD_ARGUMENTS); + bool add_leading_zeroes = false; + size_t len = last_dot - 1 - (i + 1); + /// If the left and right borders have equal numbers, then you must add leading zeros. + if (last_dot - 1 - (i + 1) == m - (last_dot + 1)) + add_leading_zeroes = true; + for (size_t id = left; id <= right; ++id) + { + String cur = toString(id); + if (add_leading_zeroes) + { + while (cur.size() < len) + cur = "0" + cur; + } + buffer.push_back(cur); + } + } + else if (have_splitter) /// If there is a current delimiter inside, then generate a set of resulting rows + buffer = parseRemoteDescription(description, i + 1, m, separator, max_addresses); + else /// Otherwise just copy, spawn will occur when you call with the correct delimiter + buffer.push_back(description.substr(i, m - i + 1)); + /// Add all possible received extensions to the current set of lines + append(cur, buffer, max_addresses); + i = m; + } + else if (description[i] == separator) + { + /// If the delimiter, then add found rows + res.insert(res.end(), cur.begin(), cur.end()); + cur.clear(); + } + else + { + /// Otherwise, simply append the character to current lines + std::vector buffer; + buffer.push_back(description.substr(i, 1)); + append(cur, buffer, max_addresses); + } + } + + res.insert(res.end(), cur.begin(), cur.end()); + if (res.size() > max_addresses) + throw Exception("Table function 'remote': first argument generates too many result addresses", + ErrorCodes::BAD_ARGUMENTS); + + return res; +} + +} diff --git a/dbms/src/TableFunctions/parseRemoteDescription.h b/dbms/src/TableFunctions/parseRemoteDescription.h new file mode 100644 index 00000000000..cbc73380628 --- /dev/null +++ b/dbms/src/TableFunctions/parseRemoteDescription.h @@ -0,0 +1,20 @@ +#pragma once +#include +#include +namespace DB +{ +/* Parse a string that generates shards and replicas. Separator - one of two characters | or , + * depending on whether shards or replicas are generated. + * For example: + * host1,host2,... - generates set of shards from host1, host2, ... + * host1|host2|... - generates set of replicas from host1, host2, ... + * abc{8..10}def - generates set of shards abc8def, abc9def, abc10def. + * abc{08..10}def - generates set of shards abc08def, abc09def, abc10def. + * abc{x,yy,z}def - generates set of shards abcxdef, abcyydef, abczdef. + * abc{x|yy|z} def - generates set of replicas abcxdef, abcyydef, abczdef. + * abc{1..9}de{f,g,h} - is a direct product, 27 shards. + * abc{1..9}de{0|1} - is a direct product, 9 shards, in each 2 replicas. + */ +std::vector parseRemoteDescription(const String & description, size_t l, size_t r, char separator, size_t max_addresses); + +} diff --git a/dbms/src/TableFunctions/registerTableFunctions.cpp b/dbms/src/TableFunctions/registerTableFunctions.cpp index e6bc80e48d2..8974dcd53fe 100644 --- a/dbms/src/TableFunctions/registerTableFunctions.cpp +++ b/dbms/src/TableFunctions/registerTableFunctions.cpp @@ -13,7 +13,10 @@ void registerTableFunctionNumbers(TableFunctionFactory & factory); void registerTableFunctionCatBoostPool(TableFunctionFactory & factory); void registerTableFunctionFile(TableFunctionFactory & factory); void registerTableFunctionURL(TableFunctionFactory & factory); + +#if USE_HDFS void registerTableFunctionHDFS(TableFunctionFactory & factory); +#endif #if USE_POCO_SQLODBC || USE_POCO_DATAODBC void registerTableFunctionODBC(TableFunctionFactory & factory); @@ -37,7 +40,10 @@ void registerTableFunctions() registerTableFunctionCatBoostPool(factory); registerTableFunctionFile(factory); registerTableFunctionURL(factory); + +#if USE_HDFS registerTableFunctionHDFS(factory); +#endif #if USE_POCO_SQLODBC || USE_POCO_DATAODBC registerTableFunctionODBC(factory); diff --git a/dbms/tests/integration/helpers/cluster.py b/dbms/tests/integration/helpers/cluster.py index 39227369c2a..22d34d05844 100644 --- a/dbms/tests/integration/helpers/cluster.py +++ b/dbms/tests/integration/helpers/cluster.py @@ -14,11 +14,13 @@ import xml.dom.minidom from kazoo.client import KazooClient from kazoo.exceptions import KazooException import psycopg2 +import requests import docker from docker.errors import ContainerError from .client import Client, CommandRequest +from .hdfs_api import HDFSApi HELPERS_DIR = p.dirname(__file__) @@ -83,6 +85,7 @@ class ClickHouseCluster: self.with_postgres = False self.with_kafka = False self.with_odbc_drivers = False + self.with_hdfs = False self.docker_client = None self.is_up = False @@ -94,7 +97,7 @@ class ClickHouseCluster: cmd += " client" return cmd - def add_instance(self, name, config_dir=None, main_configs=[], user_configs=[], macros={}, with_zookeeper=False, with_mysql=False, with_kafka=False, clickhouse_path_dir=None, with_odbc_drivers=False, with_postgres=False, hostname=None, env_variables={}, image="yandex/clickhouse-integration-test", stay_alive=False): + def add_instance(self, name, config_dir=None, main_configs=[], user_configs=[], macros={}, with_zookeeper=False, with_mysql=False, with_kafka=False, clickhouse_path_dir=None, with_odbc_drivers=False, with_postgres=False, with_hdfs=False, hostname=None, env_variables={}, image="yandex/clickhouse-integration-test", stay_alive=False): """Add an instance to the cluster. name - the name of the instance directory and the value of the 'instance' macro in ClickHouse. @@ -148,13 +151,19 @@ class ClickHouseCluster: self.base_postgres_cmd = ['docker-compose', '--project-directory', self.base_dir, '--project-name', self.project_name, '--file', p.join(HELPERS_DIR, 'docker_compose_postgres.yml')] - if with_kafka and not self.with_kafka: self.with_kafka = True self.base_cmd.extend(['--file', p.join(HELPERS_DIR, 'docker_compose_kafka.yml')]) self.base_kafka_cmd = ['docker-compose', '--project-directory', self.base_dir, '--project-name', self.project_name, '--file', p.join(HELPERS_DIR, 'docker_compose_kafka.yml')] + if with_hdfs and not self.with_hdfs: + self.with_hdfs = True + self.base_cmd.extend(['--file', p.join(HELPERS_DIR, 'docker_compose_hdfs.yml')]) + self.base_hdfs_cmd = ['docker-compose', '--project-directory', self.base_dir, '--project-name', + self.project_name, '--file', p.join(HELPERS_DIR, 'docker_compose_hdfs.yml')] + + return instance @@ -212,6 +221,20 @@ class ClickHouseCluster: raise Exception("Cannot wait ZooKeeper container") + def wait_hdfs_to_start(self, timeout=60): + hdfs_api = HDFSApi("root") + start = time.time() + while time.time() - start < timeout: + try: + hdfs_api.write_data("/somefilewithrandomname222", "1") + print "Connected to HDFS and SafeMode disabled! " + return + except Exception as ex: + print "Can't connect to HDFS " + str(ex) + time.sleep(1) + + raise Exception("Can't wait HDFS to start") + def start(self, destroy_dirs=True): if self.is_up: return @@ -250,7 +273,11 @@ class ClickHouseCluster: subprocess_check_call(self.base_kafka_cmd + ['up', '-d', '--force-recreate']) self.kafka_docker_id = self.get_instance_docker_id('kafka1') - subprocess_check_call(self.base_cmd + ['up', '-d', '--force-recreate']) + if self.with_hdfs and self.base_hdfs_cmd: + subprocess_check_call(self.base_hdfs_cmd + ['up', '-d', '--force-recreate']) + self.wait_hdfs_to_start(120) + + subprocess_check_call(self.base_cmd + ['up', '-d', '--no-recreate']) start_deadline = time.time() + 20.0 # seconds for instance in self.instances.itervalues(): @@ -310,7 +337,6 @@ services: {name}: image: {image} hostname: {hostname} - user: '{uid}' volumes: - {binary_path}:/usr/bin/clickhouse:ro - {configs_dir}:/etc/clickhouse-server/ @@ -588,7 +614,6 @@ class ClickHouseInstance: image=self.image, name=self.name, hostname=self.hostname, - uid=os.getuid(), binary_path=self.server_bin_path, configs_dir=configs_dir, config_d_dir=config_d_dir, diff --git a/dbms/tests/integration/helpers/docker_compose_hdfs.yml b/dbms/tests/integration/helpers/docker_compose_hdfs.yml new file mode 100644 index 00000000000..ecfb0c329b3 --- /dev/null +++ b/dbms/tests/integration/helpers/docker_compose_hdfs.yml @@ -0,0 +1,9 @@ +version: '2' +services: + hdfs1: + image: sequenceiq/hadoop-docker:2.7.0 + restart: always + ports: + - 50075:50075 + - 50070:50070 + entrypoint: /etc/bootstrap.sh -d diff --git a/dbms/tests/integration/helpers/hdfs_api.py b/dbms/tests/integration/helpers/hdfs_api.py new file mode 100644 index 00000000000..989d66ee1e3 --- /dev/null +++ b/dbms/tests/integration/helpers/hdfs_api.py @@ -0,0 +1,46 @@ +#-*- coding: utf-8 -*- +import requests +import subprocess +from tempfile import NamedTemporaryFile + +class HDFSApi(object): + def __init__(self, user): + self.host = "localhost" + self.http_proxy_port = "50070" + self.http_data_port = "50075" + self.user = user + + def read_data(self, path): + response = requests.get("http://{host}:{port}/webhdfs/v1{path}?op=OPEN".format(host=self.host, port=self.http_proxy_port, path=path), allow_redirects=False) + if response.status_code != 307: + response.raise_for_status() + additional_params = '&'.join(response.headers['Location'].split('&')[1:2]) + response_data = requests.get("http://{host}:{port}/webhdfs/v1{path}?op=OPEN&{params}".format(host=self.host, port=self.http_data_port, path=path, params=additional_params)) + if response_data.status_code != 200: + response_data.raise_for_status() + + return response_data.text + + # Requests can't put file + def _curl_to_put(self, filename, path, params): + url = "http://{host}:{port}/webhdfs/v1{path}?op=CREATE&{params}".format(host=self.host, port=self.http_data_port, path=path, params=params) + cmd = "curl -s -i -X PUT -T {fname} '{url}'".format(fname=filename, url=url) + output = subprocess.check_output(cmd, shell=True) + return output + + def write_data(self, path, content): + named_file = NamedTemporaryFile() + fpath = named_file.name + named_file.write(content) + named_file.flush() + response = requests.put( + "http://{host}:{port}/webhdfs/v1{path}?op=CREATE".format(host=self.host, port=self.http_proxy_port, path=path, user=self.user), + allow_redirects=False + ) + if response.status_code != 307: + response.raise_for_status() + + additional_params = '&'.join(response.headers['Location'].split('&')[1:2] + ["user.name={}".format(self.user), "overwrite=true"]) + output = self._curl_to_put(fpath, path, additional_params) + if "201 Created" not in output: + raise Exception("Can't create file on hdfs:\n {}".format(output)) diff --git a/dbms/tests/integration/test_storage_hdfs/__init__.py b/dbms/tests/integration/test_storage_hdfs/__init__.py new file mode 100644 index 00000000000..e69de29bb2d diff --git a/dbms/tests/integration/test_storage_hdfs/configs/log_conf.xml b/dbms/tests/integration/test_storage_hdfs/configs/log_conf.xml new file mode 100644 index 00000000000..0de2745ca4c --- /dev/null +++ b/dbms/tests/integration/test_storage_hdfs/configs/log_conf.xml @@ -0,0 +1,11 @@ + + + trace + /var/log/clickhouse-server/log.log + /var/log/clickhouse-server/log.err.log + 1000M + 10 + /var/log/clickhouse-server/stderr.log + /var/log/clickhouse-server/stdout.log + + diff --git a/dbms/tests/integration/test_storage_hdfs/test.py b/dbms/tests/integration/test_storage_hdfs/test.py new file mode 100644 index 00000000000..76b35cc7bed --- /dev/null +++ b/dbms/tests/integration/test_storage_hdfs/test.py @@ -0,0 +1,47 @@ +import time +import pytest +import requests +from tempfile import NamedTemporaryFile +from helpers.hdfs_api import HDFSApi + +import os + +from helpers.cluster import ClickHouseCluster +import subprocess + + +SCRIPT_DIR = os.path.dirname(os.path.realpath(__file__)) +cluster = ClickHouseCluster(__file__) +node1 = cluster.add_instance('node1', with_hdfs=True, image='withlibsimage', config_dir="configs", main_configs=['configs/log_conf.xml']) + +@pytest.fixture(scope="module") +def started_cluster(): + try: + cluster.start() + + yield cluster + + except Exception as ex: + print(ex) + raise ex + finally: + cluster.shutdown() + +def test_read_write_storage(started_cluster): + + hdfs_api = HDFSApi("root") + hdfs_api.write_data("/simple_storage", "1\tMark\t72.53\n") + + assert hdfs_api.read_data("/simple_storage") == "1\tMark\t72.53\n" + + node1.query("create table SimpleHDFSStorage (id UInt32, name String, weight Float64) ENGINE = HDFS('hdfs://hdfs1:9000/simple_storage', 'TSV')") + assert node1.query("select * from SimpleHDFSStorage") == "1\tMark\t72.53\n" + +def test_read_write_table(started_cluster): + hdfs_api = HDFSApi("root") + data = "1\tSerialize\t555.222\n2\tData\t777.333\n" + hdfs_api.write_data("/simple_table_function", data) + + assert hdfs_api.read_data("/simple_table_function") == data + + assert node1.query("select * from hdfs('hdfs://hdfs1:9000/simple_table_function', 'TSV', 'id UInt64, text String, number Float64')") == data