mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-12-11 17:02:25 +00:00
Add parquet support. Fixes, tests, ...
This commit is contained in:
parent
1def68be6f
commit
e20c77e4c6
14
.gitmodules
vendored
14
.gitmodules
vendored
@ -48,10 +48,16 @@
|
||||
url = https://github.com/ClickHouse-Extras/protobuf.git
|
||||
[submodule "contrib/boost"]
|
||||
path = contrib/boost
|
||||
url = https://github.com/ClickHouse-Extras/boost-extra.git
|
||||
url = https://github.com/ClickHouse-Extras/boost.git
|
||||
[submodule "contrib/base64"]
|
||||
path = contrib/base64
|
||||
url = https://github.com/aklomp/base64.git
|
||||
[submodule "contrib/arrow"]
|
||||
path = contrib/arrow
|
||||
url = https://github.com/apache/arrow
|
||||
[submodule "contrib/thrift"]
|
||||
path = contrib/thrift
|
||||
url = https://github.com/apache/thrift.git
|
||||
[submodule "contrib/libhdfs3"]
|
||||
path = contrib/libhdfs3
|
||||
url = https://github.com/ClickHouse-Extras/libhdfs3.git
|
||||
@ -61,12 +67,12 @@
|
||||
[submodule "contrib/libgsasl"]
|
||||
path = contrib/libgsasl
|
||||
url = https://github.com/ClickHouse-Extras/libgsasl.git
|
||||
[submodule "contrib/snappy"]
|
||||
path = contrib/snappy
|
||||
url = https://github.com/google/snappy
|
||||
[submodule "contrib/cppkafka"]
|
||||
path = contrib/cppkafka
|
||||
url = https://github.com/ClickHouse-Extras/cppkafka.git
|
||||
[submodule "contrib/brotli"]
|
||||
path = contrib/brotli
|
||||
url = https://github.com/google/brotli.git
|
||||
[submodule "contrib/arrow"]
|
||||
path = contrib/arrow
|
||||
url = https://github.com/apache/arrow
|
||||
|
@ -224,6 +224,7 @@ endif ()
|
||||
message (STATUS "Building for: ${CMAKE_SYSTEM} ${CMAKE_SYSTEM_PROCESSOR} ${CMAKE_LIBRARY_ARCHITECTURE} ; USE_STATIC_LIBRARIES=${USE_STATIC_LIBRARIES} MAKE_STATIC_LIBRARIES=${MAKE_STATIC_LIBRARIES} SPLIT_SHARED=${SPLIT_SHARED_LIBRARIES} UNBUNDLED=${UNBUNDLED} CCACHE=${CCACHE_FOUND} ${CCACHE_VERSION}")
|
||||
|
||||
include(GNUInstallDirs)
|
||||
include (cmake/find_contrib_lib.cmake)
|
||||
|
||||
include (cmake/find_ssl.cmake)
|
||||
include (cmake/lib_name.cmake)
|
||||
@ -258,17 +259,15 @@ include (cmake/find_pdqsort.cmake)
|
||||
include (cmake/find_hdfs3.cmake) # uses protobuf
|
||||
include (cmake/find_consistent-hashing.cmake)
|
||||
include (cmake/find_base64.cmake)
|
||||
if (ENABLE_TESTS)
|
||||
include (cmake/find_gtest.cmake)
|
||||
endif ()
|
||||
include (cmake/find_parquet.cmake)
|
||||
|
||||
include (cmake/find_contrib_lib.cmake)
|
||||
find_contrib_lib(cityhash)
|
||||
find_contrib_lib(farmhash)
|
||||
find_contrib_lib(metrohash)
|
||||
find_contrib_lib(btrie)
|
||||
find_contrib_lib(double-conversion)
|
||||
include (cmake/find_parquet.cmake)
|
||||
if (ENABLE_TESTS)
|
||||
include (cmake/find_gtest.cmake)
|
||||
endif ()
|
||||
|
||||
# Need to process before "contrib" dir:
|
||||
include (libs/libcommon/cmake/find_gperftools.cmake)
|
||||
|
@ -9,7 +9,7 @@ endif ()
|
||||
if (NOT USE_INTERNAL_BOOST_LIBRARY)
|
||||
set (Boost_USE_STATIC_LIBS ${USE_STATIC_LIBRARIES})
|
||||
set (BOOST_ROOT "/usr/local")
|
||||
find_package (Boost 1.60 COMPONENTS program_options system filesystem thread)
|
||||
find_package (Boost 1.60 COMPONENTS program_options system filesystem thread regex)
|
||||
# incomplete, no include search, who use it?
|
||||
if (NOT Boost_FOUND)
|
||||
# # Try to find manually.
|
||||
@ -29,9 +29,12 @@ if (NOT Boost_SYSTEM_LIBRARY)
|
||||
set (Boost_SYSTEM_LIBRARY boost_system_internal)
|
||||
set (Boost_PROGRAM_OPTIONS_LIBRARY boost_program_options_internal)
|
||||
set (Boost_FILESYSTEM_LIBRARY boost_filesystem_internal ${Boost_SYSTEM_LIBRARY})
|
||||
set (Boost_REGEX_LIBRARY boost_regex_internal)
|
||||
|
||||
set (Boost_INCLUDE_DIRS)
|
||||
|
||||
set (BOOST_ROOT "${ClickHouse_SOURCE_DIR}/contrib/boost")
|
||||
|
||||
# For boost from github:
|
||||
file (GLOB Boost_INCLUDE_DIRS_ "${ClickHouse_SOURCE_DIR}/contrib/boost/libs/*/include")
|
||||
list (APPEND Boost_INCLUDE_DIRS ${Boost_INCLUDE_DIRS_})
|
||||
@ -44,4 +47,4 @@ if (NOT Boost_SYSTEM_LIBRARY)
|
||||
|
||||
endif ()
|
||||
|
||||
message (STATUS "Using Boost: ${Boost_INCLUDE_DIRS} : ${Boost_PROGRAM_OPTIONS_LIBRARY},${Boost_SYSTEM_LIBRARY},${Boost_FILESYSTEM_LIBRARY}")
|
||||
message (STATUS "Using Boost: ${Boost_INCLUDE_DIRS} : ${Boost_PROGRAM_OPTIONS_LIBRARY},${Boost_SYSTEM_LIBRARY},${Boost_FILESYSTEM_LIBRARY},${Boost_REGEX_LIBRARY}")
|
||||
|
@ -9,8 +9,9 @@ if (NOT EXISTS "${ClickHouse_SOURCE_DIR}/contrib/libxml2/libxml.h")
|
||||
endif ()
|
||||
|
||||
if (NOT USE_INTERNAL_LIBXML2_LIBRARY)
|
||||
find_library (LIBXML2_LIBRARY libxml2)
|
||||
find_path (LIBXML2_INCLUDE_DIR NAMES libxml.h PATHS ${LIBXML2_INCLUDE_PATHS})
|
||||
find_package (LibXml2)
|
||||
#find_library (LIBXML2_LIBRARY libxml2)
|
||||
#find_path (LIBXML2_INCLUDE_DIR NAMES libxml.h PATHS ${LIBXML2_INCLUDE_PATHS})
|
||||
endif ()
|
||||
|
||||
if (LIBXML2_LIBRARY AND LIBXML2_INCLUDE_DIR)
|
||||
|
@ -1,8 +1,11 @@
|
||||
option (USE_INTERNAL_LZ4_LIBRARY "Set to FALSE to use system lz4 library instead of bundled" ${NOT_UNBUNDLED})
|
||||
|
||||
if (USE_INTERNAL_LZ4_LIBRARY AND NOT EXISTS "${ClickHouse_SOURCE_DIR}/contrib/lz4/lib/lz4.h")
|
||||
message (WARNING "submodule contrib/lz4 is missing. to fix try run: \n git submodule update --init --recursive")
|
||||
set (USE_INTERNAL_LZ4_LIBRARY 0)
|
||||
if (NOT EXISTS "${ClickHouse_SOURCE_DIR}/contrib/lz4/lib/lz4.h")
|
||||
if (USE_INTERNAL_LZ4_LIBRARY)
|
||||
message (WARNING "submodule contrib/lz4 is missing. to fix try run: \n git submodule update --init --recursive")
|
||||
set (USE_INTERNAL_LZ4_LIBRARY 0)
|
||||
endif ()
|
||||
set (MISSING_INTERNAL_LZ4_LIBRARY 1)
|
||||
endif ()
|
||||
|
||||
if (NOT USE_INTERNAL_LZ4_LIBRARY)
|
||||
@ -11,7 +14,7 @@ if (NOT USE_INTERNAL_LZ4_LIBRARY)
|
||||
endif ()
|
||||
|
||||
if (LZ4_LIBRARY AND LZ4_INCLUDE_DIR)
|
||||
else ()
|
||||
elseif (NOT MISSING_INTERNAL_LZ4_LIBRARY)
|
||||
set (LZ4_INCLUDE_DIR ${ClickHouse_SOURCE_DIR}/contrib/lz4/lib)
|
||||
set (USE_INTERNAL_LZ4_LIBRARY 1)
|
||||
set (LZ4_LIBRARY lz4)
|
||||
|
@ -1,31 +1,68 @@
|
||||
option (USE_INTERNAL_PARQUET_LIBRARY "Set to FALSE to use system parquet library instead of bundled" ${NOT_UNBUNDLED})
|
||||
if (NOT OS_FREEBSD) # Freebsd: ../contrib/arrow/cpp/src/arrow/util/bit-util.h:27:10: fatal error: endian.h: No such file or directory
|
||||
option(USE_INTERNAL_PARQUET_LIBRARY "Set to FALSE to use system parquet library instead of bundled" ${NOT_UNBUNDLED})
|
||||
endif()
|
||||
|
||||
if (NOT EXISTS "${ClickHouse_SOURCE_DIR}/contrib/arrow/cpp/CMakeLists.txt")
|
||||
if (USE_INTERNAL_PARQUET_LIBRARY)
|
||||
message (WARNING "submodule contrib/arrow (required for Parquet) is missing. to fix try run: \n git submodule update --init --recursive")
|
||||
endif ()
|
||||
set (USE_INTERNAL_PARQUET_LIBRARY 0)
|
||||
set (MISSING_INTERNAL_PARQUET_LIBRARY 1)
|
||||
endif ()
|
||||
if(NOT EXISTS "${ClickHouse_SOURCE_DIR}/contrib/arrow/cpp/CMakeLists.txt")
|
||||
if(USE_INTERNAL_PARQUET_LIBRARY)
|
||||
message(WARNING "submodule contrib/arrow (required for Parquet) is missing. to fix try run: \n git submodule update --init --recursive")
|
||||
endif()
|
||||
set(USE_INTERNAL_PARQUET_LIBRARY 0)
|
||||
set(MISSING_INTERNAL_PARQUET_LIBRARY 1)
|
||||
endif()
|
||||
|
||||
if (NOT USE_INTERNAL_PARQUET_LIBRARY)
|
||||
find_package (Arrow)
|
||||
find_package (Parquet)
|
||||
endif ()
|
||||
if(NOT USE_INTERNAL_PARQUET_LIBRARY)
|
||||
find_package(Arrow)
|
||||
find_package(Parquet)
|
||||
endif()
|
||||
|
||||
if (ARROW_INCLUDE_DIR AND PARQUET_INCLUDE_DIR)
|
||||
elseif (NOT MISSING_INTERNAL_PARQUET_LIBRARY)
|
||||
set (USE_INTERNAL_PARQUET_LIBRARY 1)
|
||||
# TODO: is it required?
|
||||
# set (ARROW_INCLUDE_DIR "${ClickHouse_SOURCE_DIR}/contrib/arrow/cpp/src/arrow")
|
||||
# set (PARQUET_INCLUDE_DIR "${ClickHouse_SOURCE_DIR}/contrib/arrow/cpp/src/parquet")
|
||||
set (ARROW_LIBRARY arrow_static)
|
||||
set (PARQUET_LIBRARY parquet_static)
|
||||
set (USE_PARQUET 1)
|
||||
endif ()
|
||||
if(ARROW_INCLUDE_DIR AND PARQUET_INCLUDE_DIR)
|
||||
elseif(NOT MISSING_INTERNAL_PARQUET_LIBRARY AND NOT OS_FREEBSD)
|
||||
include(cmake/find_snappy.cmake)
|
||||
set(CAN_USE_INTERNAL_PARQUET_LIBRARY 1)
|
||||
include(CheckCXXSourceCompiles)
|
||||
if(NOT USE_INTERNAL_DOUBLE_CONVERSION_LIBRARY)
|
||||
set(CMAKE_REQUIRED_LIBRARIES ${DOUBLE_CONVERSION_LIBRARIES})
|
||||
set(CMAKE_REQUIRED_INCLUDES ${DOUBLE_CONVERSION_INCLUDE_DIR})
|
||||
check_cxx_source_compiles("
|
||||
#include <double-conversion/double-conversion.h>
|
||||
int main() { static const int flags_ = double_conversion::StringToDoubleConverter::ALLOW_CASE_INSENSIBILITY; return 0;}
|
||||
" HAVE_DOUBLE_CONVERSION_ALLOW_CASE_INSENSIBILITY)
|
||||
|
||||
if (USE_PARQUET)
|
||||
message (STATUS "Using Parquet: ${ARROW_INCLUDE_DIR} ${PARQUET_INCLUDE_DIR}")
|
||||
else ()
|
||||
message (STATUS "Building without Parquet support")
|
||||
endif ()
|
||||
if(NOT HAVE_DOUBLE_CONVERSION_ALLOW_CASE_INSENSIBILITY) # HAVE_STD_RANDOM_SHUFFLE
|
||||
message(STATUS "Disabling internal parquet library because arrow is broken (can't use old double_conversion)")
|
||||
set(CAN_USE_INTERNAL_PARQUET_LIBRARY 0)
|
||||
endif()
|
||||
endif()
|
||||
|
||||
if(NOT CAN_USE_INTERNAL_PARQUET_LIBRARY)
|
||||
set(USE_INTERNAL_PARQUET_LIBRARY 0)
|
||||
else()
|
||||
set(USE_INTERNAL_PARQUET_LIBRARY 1)
|
||||
|
||||
if(USE_INTERNAL_PARQUET_LIBRARY_NATIVE_CMAKE)
|
||||
set(ARROW_INCLUDE_DIR "${ClickHouse_SOURCE_DIR}/contrib/arrow/cpp/src")
|
||||
set(PARQUET_INCLUDE_DIR "${ClickHouse_SOURCE_DIR}/contrib/arrow/cpp/src" ${ClickHouse_BINARY_DIR}/contrib/arrow/cpp/src)
|
||||
endif()
|
||||
|
||||
if(${USE_STATIC_LIBRARIES})
|
||||
set(ARROW_LIBRARY arrow_static)
|
||||
set(PARQUET_LIBRARY parquet_static)
|
||||
set(THRIFT_LIBRARY thrift_static)
|
||||
else()
|
||||
set(ARROW_LIBRARY arrow_shared)
|
||||
set(PARQUET_LIBRARY parquet_shared)
|
||||
if(USE_INTERNAL_PARQUET_LIBRARY_NATIVE_CMAKE)
|
||||
list(APPEND PARQUET_LIBRARY ${Boost_REGEX_LIBRARY})
|
||||
endif()
|
||||
set(THRIFT_LIBRARY thrift)
|
||||
endif()
|
||||
|
||||
set(USE_PARQUET 1)
|
||||
endif()
|
||||
endif()
|
||||
|
||||
if(USE_PARQUET)
|
||||
message(STATUS "Using Parquet: ${ARROW_LIBRARY}:${ARROW_INCLUDE_DIR} ; ${PARQUET_LIBRARY}:${PARQUET_INCLUDE_DIR} ; ${THRIFT_LIBRARY}")
|
||||
else()
|
||||
message(STATUS "Building without Parquet support")
|
||||
endif()
|
||||
|
27
cmake/find_snappy.cmake
Normal file
27
cmake/find_snappy.cmake
Normal file
@ -0,0 +1,27 @@
|
||||
option(USE_INTERNAL_SNAPPY_LIBRARY "Set to FALSE to use system snappy library instead of bundled" ${NOT_UNBUNDLED})
|
||||
|
||||
if(NOT EXISTS "${ClickHouse_SOURCE_DIR}/contrib/snappy/snappy.h")
|
||||
if(USE_INTERNAL_SNAPPY_LIBRARY)
|
||||
message(WARNING "submodule contrib/snappy is missing. to fix try run: \n git submodule update --init --recursive")
|
||||
set(USE_INTERNAL_SNAPPY_LIBRARY 0)
|
||||
endif()
|
||||
set(MISSING_INTERNAL_SNAPPY_LIBRARY 1)
|
||||
endif()
|
||||
|
||||
if(NOT USE_INTERNAL_SNAPPY_LIBRARY)
|
||||
find_library(SNAPPY_LIBRARY snappy)
|
||||
find_path(SNAPPY_INCLUDE_DIR NAMES snappy.h PATHS ${SNAPPY_INCLUDE_PATHS})
|
||||
endif()
|
||||
|
||||
if(SNAPPY_LIBRARY AND SNAPPY_INCLUDE_DIR)
|
||||
elseif(NOT MISSING_INTERNAL_SNAPPY_LIBRARY)
|
||||
set(SNAPPY_INCLUDE_DIR ${ClickHouse_SOURCE_DIR}/contrib/snappy)
|
||||
set(USE_INTERNAL_SNAPPY_LIBRARY 1)
|
||||
set(SNAPPY_LIBRARY snappy)
|
||||
endif()
|
||||
|
||||
if(SNAPPY_LIBRARY AND SNAPPY_INCLUDE_DIR)
|
||||
set(USE_SNAPPY 1)
|
||||
endif()
|
||||
|
||||
message(STATUS "Using snappy=${USE_SNAPPY}: ${SNAPPY_INCLUDE_DIR} : ${SNAPPY_LIBRARY}")
|
56
contrib/CMakeLists.txt
vendored
56
contrib/CMakeLists.txt
vendored
@ -151,17 +151,61 @@ if (USE_INTERNAL_CAPNP_LIBRARY)
|
||||
endif ()
|
||||
|
||||
if (USE_INTERNAL_PARQUET_LIBRARY)
|
||||
set (ARROW_COMPUTE ON)
|
||||
set (ARROW_PARQUET ON)
|
||||
set (ARROW_VERBOSE_THIRDPARTY_BUILD ON)
|
||||
set (PARQUET_ARROW_LINKAGE "static")
|
||||
set (ARROW_BUILD_STATIC ON)
|
||||
if (USE_INTERNAL_PARQUET_LIBRARY_NATIVE_CMAKE)
|
||||
# We dont use arrow's cmakefiles because they uses too many depends and download some libs in compile time
|
||||
# But this mode can be used for updating auto-generated parquet files:
|
||||
# cmake -DUSE_INTERNAL_PARQUET_LIBRARY_NATIVE_CMAKE=1 -DUSE_STATIC_LIBRARIES=0
|
||||
# copy {BUILD_DIR}/contrib/arrow/cpp/src/parquet/*.cpp,*.h -> /contrib/arrow-cmake/cpp/src/parquet/
|
||||
|
||||
# Also useful parquet reader:
|
||||
# cd contrib/arrow/cpp/build && mkdir -p build && cmake .. -DPARQUET_BUILD_EXECUTABLES=1 && make -j8
|
||||
# contrib/arrow/cpp/build/debug/parquet-reader some_file.parquet
|
||||
|
||||
set (ARROW_COMPUTE ON CACHE INTERNAL "")
|
||||
set (ARROW_PARQUET ON CACHE INTERNAL "")
|
||||
set (ARROW_VERBOSE_THIRDPARTY_BUILD ON CACHE INTERNAL "")
|
||||
set (ARROW_BUILD_SHARED 1 CACHE INTERNAL "")
|
||||
set (ARROW_BOOST_HEADER_ONLY ON CACHE INTERNAL "")
|
||||
#set (BOOST_INCLUDEDIR Boost_INCLUDE_DIRS)
|
||||
set (Boost_FOUND 1 CACHE INTERNAL "")
|
||||
#set (ZLIB_HOME ${ZLIB_INCLUDE_DIR})
|
||||
#set (ZLIB_FOUND 1)
|
||||
if (MAKE_STATIC_LIBRARIES)
|
||||
set (PARQUET_ARROW_LINKAGE "static" CACHE INTERNAL "")
|
||||
set (ARROW_TEST_LINKAGE "static" CACHE INTERNAL "")
|
||||
set (ARROW_BUILD_STATIC ${MAKE_STATIC_LIBRARIES} CACHE INTERNAL "")
|
||||
else()
|
||||
set (PARQUET_ARROW_LINKAGE "shared" CACHE INTERNAL "")
|
||||
set (ARROW_TEST_LINKAGE "shared" CACHE INTERNAL "")
|
||||
endif()
|
||||
|
||||
if(CMAKE_BUILD_TYPE STREQUAL "RELWITHDEBINFO")
|
||||
set(_save_build_type ${CMAKE_BUILD_TYPE})
|
||||
set(CMAKE_BUILD_TYPE RELEASE)
|
||||
endif()
|
||||
|
||||
# Because Arrow uses CMAKE_SOURCE_DIR as a project path
|
||||
# Hopefully will be fixed in https://github.com/apache/arrow/pull/2676
|
||||
set (CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} "${ClickHouse_SOURCE_DIR}/contrib/arrow/cpp/cmake_modules")
|
||||
add_subdirectory (arrow/cpp)
|
||||
endif ()
|
||||
|
||||
if(_save_build_type)
|
||||
set(CMAKE_BUILD_TYPE ${_save_build_type})
|
||||
endif()
|
||||
|
||||
else()
|
||||
|
||||
if(USE_INTERNAL_SNAPPY_LIBRARY)
|
||||
set(SNAPPY_BUILD_TESTS 0 CACHE INTERNAL "")
|
||||
if (NOT MAKE_STATIC_LIBRARIES)
|
||||
set(BUILD_SHARED_LIBS 1) # TODO: set at root dir
|
||||
endif()
|
||||
add_subdirectory(snappy)
|
||||
endif()
|
||||
|
||||
add_subdirectory(arrow-cmake)
|
||||
endif()
|
||||
endif()
|
||||
|
||||
if (USE_INTERNAL_POCO_LIBRARY)
|
||||
set (POCO_VERBOSE_MESSAGES 0 CACHE INTERNAL "")
|
||||
|
2
contrib/arrow
vendored
2
contrib/arrow
vendored
@ -1 +1 @@
|
||||
Subproject commit af20905877fb353367d7ee5a808f759532a5ca0f
|
||||
Subproject commit 87ac6fddaf21d0b4ee8b8090533ff293db0da1b4
|
212
contrib/arrow-cmake/CMakeLists.txt
Normal file
212
contrib/arrow-cmake/CMakeLists.txt
Normal file
@ -0,0 +1,212 @@
|
||||
# === thrift
|
||||
|
||||
set(LIBRARY_DIR ${ClickHouse_SOURCE_DIR}/contrib/thrift/lib/cpp)
|
||||
# contrib/thrift/lib/cpp/CMakeLists.txt
|
||||
set(thriftcpp_SOURCES
|
||||
${LIBRARY_DIR}/src/thrift/TApplicationException.cpp
|
||||
${LIBRARY_DIR}/src/thrift/TOutput.cpp
|
||||
${LIBRARY_DIR}/src/thrift/async/TAsyncChannel.cpp
|
||||
${LIBRARY_DIR}/src/thrift/async/TAsyncProtocolProcessor.cpp
|
||||
${LIBRARY_DIR}/src/thrift/async/TConcurrentClientSyncInfo.h
|
||||
${LIBRARY_DIR}/src/thrift/async/TConcurrentClientSyncInfo.cpp
|
||||
${LIBRARY_DIR}/src/thrift/concurrency/ThreadManager.cpp
|
||||
${LIBRARY_DIR}/src/thrift/concurrency/TimerManager.cpp
|
||||
${LIBRARY_DIR}/src/thrift/concurrency/Util.cpp
|
||||
${LIBRARY_DIR}/src/thrift/processor/PeekProcessor.cpp
|
||||
${LIBRARY_DIR}/src/thrift/protocol/TBase64Utils.cpp
|
||||
${LIBRARY_DIR}/src/thrift/protocol/TDebugProtocol.cpp
|
||||
${LIBRARY_DIR}/src/thrift/protocol/TJSONProtocol.cpp
|
||||
${LIBRARY_DIR}/src/thrift/protocol/TMultiplexedProtocol.cpp
|
||||
${LIBRARY_DIR}/src/thrift/protocol/TProtocol.cpp
|
||||
${LIBRARY_DIR}/src/thrift/transport/TTransportException.cpp
|
||||
${LIBRARY_DIR}/src/thrift/transport/TFDTransport.cpp
|
||||
${LIBRARY_DIR}/src/thrift/transport/TSimpleFileTransport.cpp
|
||||
${LIBRARY_DIR}/src/thrift/transport/THttpTransport.cpp
|
||||
${LIBRARY_DIR}/src/thrift/transport/THttpClient.cpp
|
||||
${LIBRARY_DIR}/src/thrift/transport/THttpServer.cpp
|
||||
${LIBRARY_DIR}/src/thrift/transport/TSocket.cpp
|
||||
${LIBRARY_DIR}/src/thrift/transport/TSocketPool.cpp
|
||||
${LIBRARY_DIR}/src/thrift/transport/TServerSocket.cpp
|
||||
${LIBRARY_DIR}/src/thrift/transport/TTransportUtils.cpp
|
||||
${LIBRARY_DIR}/src/thrift/transport/TBufferTransports.cpp
|
||||
${LIBRARY_DIR}/src/thrift/server/TConnectedClient.cpp
|
||||
${LIBRARY_DIR}/src/thrift/server/TServerFramework.cpp
|
||||
${LIBRARY_DIR}/src/thrift/server/TSimpleServer.cpp
|
||||
${LIBRARY_DIR}/src/thrift/server/TThreadPoolServer.cpp
|
||||
${LIBRARY_DIR}/src/thrift/server/TThreadedServer.cpp
|
||||
)
|
||||
set( thriftcpp_threads_SOURCES
|
||||
${LIBRARY_DIR}/src/thrift/concurrency/ThreadFactory.cpp
|
||||
${LIBRARY_DIR}/src/thrift/concurrency/Thread.cpp
|
||||
${LIBRARY_DIR}/src/thrift/concurrency/Monitor.cpp
|
||||
${LIBRARY_DIR}/src/thrift/concurrency/Mutex.cpp
|
||||
)
|
||||
add_library(${THRIFT_LIBRARY} ${LINK_MODE} ${thriftcpp_SOURCES} ${thriftcpp_threads_SOURCES})
|
||||
set_target_properties(${THRIFT_LIBRARY} PROPERTIES CXX_STANDARD 14) # REMOVE after https://github.com/apache/thrift/pull/1641
|
||||
target_include_directories(${THRIFT_LIBRARY} SYSTEM PUBLIC ${ClickHouse_SOURCE_DIR}/contrib/thrift/lib/cpp/src PRIVATE ${Boost_INCLUDE_DIRS})
|
||||
|
||||
|
||||
|
||||
# === arrow
|
||||
|
||||
set(LIBRARY_DIR ${ClickHouse_SOURCE_DIR}/contrib/arrow/cpp/src/arrow)
|
||||
# arrow/cpp/src/arrow/CMakeLists.txt
|
||||
set(ARROW_SRCS
|
||||
${LIBRARY_DIR}/array.cc
|
||||
|
||||
${LIBRARY_DIR}/builder.cc
|
||||
${LIBRARY_DIR}/array/builder_adaptive.cc
|
||||
${LIBRARY_DIR}/array/builder_base.cc
|
||||
${LIBRARY_DIR}/array/builder_binary.cc
|
||||
${LIBRARY_DIR}/array/builder_decimal.cc
|
||||
${LIBRARY_DIR}/array/builder_dict.cc
|
||||
${LIBRARY_DIR}/array/builder_nested.cc
|
||||
${LIBRARY_DIR}/array/builder_primitive.cc
|
||||
|
||||
${LIBRARY_DIR}/buffer.cc
|
||||
${LIBRARY_DIR}/compare.cc
|
||||
${LIBRARY_DIR}/memory_pool.cc
|
||||
${LIBRARY_DIR}/pretty_print.cc
|
||||
${LIBRARY_DIR}/record_batch.cc
|
||||
${LIBRARY_DIR}/status.cc
|
||||
${LIBRARY_DIR}/table.cc
|
||||
${LIBRARY_DIR}/table_builder.cc
|
||||
${LIBRARY_DIR}/tensor.cc
|
||||
${LIBRARY_DIR}/sparse_tensor.cc
|
||||
${LIBRARY_DIR}/type.cc
|
||||
${LIBRARY_DIR}/visitor.cc
|
||||
|
||||
${LIBRARY_DIR}/csv/converter.cc
|
||||
${LIBRARY_DIR}/csv/chunker.cc
|
||||
${LIBRARY_DIR}/csv/column-builder.cc
|
||||
${LIBRARY_DIR}/csv/options.cc
|
||||
${LIBRARY_DIR}/csv/parser.cc
|
||||
${LIBRARY_DIR}/csv/reader.cc
|
||||
|
||||
${LIBRARY_DIR}/io/buffered.cc
|
||||
${LIBRARY_DIR}/io/compressed.cc
|
||||
${LIBRARY_DIR}/io/file.cc
|
||||
${LIBRARY_DIR}/io/interfaces.cc
|
||||
${LIBRARY_DIR}/io/memory.cc
|
||||
${LIBRARY_DIR}/io/readahead.cc
|
||||
|
||||
${LIBRARY_DIR}/util/bit-util.cc
|
||||
${LIBRARY_DIR}/util/compression.cc
|
||||
${LIBRARY_DIR}/util/cpu-info.cc
|
||||
${LIBRARY_DIR}/util/decimal.cc
|
||||
${LIBRARY_DIR}/util/int-util.cc
|
||||
${LIBRARY_DIR}/util/io-util.cc
|
||||
${LIBRARY_DIR}/util/logging.cc
|
||||
${LIBRARY_DIR}/util/key_value_metadata.cc
|
||||
${LIBRARY_DIR}/util/task-group.cc
|
||||
${LIBRARY_DIR}/util/thread-pool.cc
|
||||
${LIBRARY_DIR}/util/trie.cc
|
||||
${LIBRARY_DIR}/util/utf8.cc
|
||||
)
|
||||
|
||||
set(ARROW_SRCS ${ARROW_SRCS}
|
||||
${LIBRARY_DIR}/compute/context.cc
|
||||
${LIBRARY_DIR}/compute/kernels/boolean.cc
|
||||
${LIBRARY_DIR}/compute/kernels/cast.cc
|
||||
${LIBRARY_DIR}/compute/kernels/hash.cc
|
||||
${LIBRARY_DIR}/compute/kernels/util-internal.cc
|
||||
)
|
||||
|
||||
if (LZ4_INCLUDE_DIR AND LZ4_LIBRARY)
|
||||
set(ARROW_WITH_LZ4 1)
|
||||
endif()
|
||||
|
||||
if(SNAPPY_INCLUDE_DIR AND SNAPPY_LIBRARY)
|
||||
set(ARROW_WITH_SNAPPY 1)
|
||||
endif()
|
||||
|
||||
if(ZLIB_INCLUDE_DIR AND ZLIB_LIBRARIES)
|
||||
set(ARROW_WITH_ZLIB 1)
|
||||
endif()
|
||||
|
||||
if (ZSTD_INCLUDE_DIR AND ZSTD_LIBRARY)
|
||||
set(ARROW_WITH_ZSTD 1)
|
||||
endif()
|
||||
|
||||
if (ARROW_WITH_LZ4)
|
||||
add_definitions(-DARROW_WITH_LZ4)
|
||||
SET(ARROW_SRCS ${LIBRARY_DIR}/util/compression_lz4.cc ${ARROW_SRCS})
|
||||
endif()
|
||||
|
||||
if (ARROW_WITH_SNAPPY)
|
||||
add_definitions(-DARROW_WITH_SNAPPY)
|
||||
SET(ARROW_SRCS ${LIBRARY_DIR}/util/compression_snappy.cc ${ARROW_SRCS})
|
||||
endif()
|
||||
|
||||
if (ARROW_WITH_ZLIB)
|
||||
add_definitions(-DARROW_WITH_ZLIB)
|
||||
SET(ARROW_SRCS ${LIBRARY_DIR}/util/compression_zlib.cc ${ARROW_SRCS})
|
||||
endif()
|
||||
|
||||
if (ARROW_WITH_ZSTD)
|
||||
add_definitions(-DARROW_WITH_ZSTD)
|
||||
SET(ARROW_SRCS ${LIBRARY_DIR}/util/compression_zstd.cc ${ARROW_SRCS})
|
||||
endif()
|
||||
|
||||
|
||||
add_library(${ARROW_LIBRARY} ${LINK_MODE} ${ARROW_SRCS})
|
||||
target_include_directories(${ARROW_LIBRARY} SYSTEM PUBLIC ${ClickHouse_SOURCE_DIR}/contrib/arrow/cpp/src PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/cpp/src ${Boost_INCLUDE_DIRS})
|
||||
target_link_libraries(${ARROW_LIBRARY} PRIVATE ${DOUBLE_CONVERSION_LIBRARIES} Threads::Threads)
|
||||
if (ARROW_WITH_LZ4)
|
||||
target_link_libraries(${ARROW_LIBRARY} PRIVATE ${LZ4_LIBRARY})
|
||||
endif()
|
||||
if (ARROW_WITH_SNAPPY)
|
||||
target_link_libraries(${ARROW_LIBRARY} PRIVATE ${SNAPPY_LIBRARY})
|
||||
endif()
|
||||
if (ARROW_WITH_ZLIB)
|
||||
target_link_libraries(${ARROW_LIBRARY} PRIVATE ${ZLIB_LIBRARIES})
|
||||
endif()
|
||||
if (ARROW_WITH_ZSTD)
|
||||
target_link_libraries(${ARROW_LIBRARY} PRIVATE ${ZSTD_LIBRARY})
|
||||
endif()
|
||||
|
||||
|
||||
# === parquet
|
||||
|
||||
set(LIBRARY_DIR ${ClickHouse_SOURCE_DIR}/contrib/arrow/cpp/src/parquet)
|
||||
# arrow/cpp/src/parquet/CMakeLists.txt
|
||||
set(PARQUET_SRCS
|
||||
${LIBRARY_DIR}/arrow/reader.cc
|
||||
${LIBRARY_DIR}/arrow/record_reader.cc
|
||||
${LIBRARY_DIR}/arrow/schema.cc
|
||||
${LIBRARY_DIR}/arrow/writer.cc
|
||||
${LIBRARY_DIR}/bloom_filter.cc
|
||||
${LIBRARY_DIR}/column_reader.cc
|
||||
${LIBRARY_DIR}/column_scanner.cc
|
||||
${LIBRARY_DIR}/column_writer.cc
|
||||
${LIBRARY_DIR}/file_reader.cc
|
||||
${LIBRARY_DIR}/file_writer.cc
|
||||
${LIBRARY_DIR}/metadata.cc
|
||||
${LIBRARY_DIR}/murmur3.cc
|
||||
${LIBRARY_DIR}/printer.cc
|
||||
${LIBRARY_DIR}/schema.cc
|
||||
${LIBRARY_DIR}/statistics.cc
|
||||
${LIBRARY_DIR}/types.cc
|
||||
${LIBRARY_DIR}/util/comparison.cc
|
||||
${LIBRARY_DIR}/util/memory.cc
|
||||
)
|
||||
#list(TRANSFORM PARQUET_SRCS PREPEND ${LIBRARY_DIR}/) # cmake 3.12
|
||||
list(APPEND PARQUET_SRCS
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/cpp/src/parquet/parquet_constants.cpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/cpp/src/parquet/parquet_types.cpp
|
||||
)
|
||||
add_library(${PARQUET_LIBRARY} ${LINK_MODE} ${PARQUET_SRCS})
|
||||
target_include_directories(${PARQUET_LIBRARY} SYSTEM PUBLIC ${ClickHouse_SOURCE_DIR}/contrib/arrow/cpp/src ${CMAKE_CURRENT_SOURCE_DIR}/cpp/src)
|
||||
include(${ClickHouse_SOURCE_DIR}/contrib/thrift/build/cmake/ConfigureChecks.cmake) # makes config.h
|
||||
target_link_libraries(${PARQUET_LIBRARY} PRIVATE ${ARROW_LIBRARY} ${THRIFT_LIBRARY} ${Boost_REGEX_LIBRARY})
|
||||
target_include_directories(${PARQUET_LIBRARY} PRIVATE ${Boost_INCLUDE_DIRS})
|
||||
|
||||
|
||||
# === tools
|
||||
|
||||
set(TOOLS_DIR ${ClickHouse_SOURCE_DIR}/contrib/arrow/cpp/tools/parquet)
|
||||
set(PARQUET_TOOLS parquet-dump-schema parquet-reader parquet-scan)
|
||||
foreach(TOOL ${PARQUET_TOOLS})
|
||||
add_executable(${TOOL} ${TOOLS_DIR}/${TOOL}.cc)
|
||||
target_link_libraries(${TOOL} ${PARQUET_LIBRARY})
|
||||
endforeach()
|
1
contrib/arrow-cmake/build/cmake/config.h.in
Symbolic link
1
contrib/arrow-cmake/build/cmake/config.h.in
Symbolic link
@ -0,0 +1 @@
|
||||
../../../thrift/build/cmake/config.h.in
|
17
contrib/arrow-cmake/cpp/src/parquet/parquet_constants.cpp
Normal file
17
contrib/arrow-cmake/cpp/src/parquet/parquet_constants.cpp
Normal file
@ -0,0 +1,17 @@
|
||||
/**
|
||||
* Autogenerated by Thrift Compiler (0.11.0)
|
||||
*
|
||||
* DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING
|
||||
* @generated
|
||||
*/
|
||||
#include "parquet_constants.h"
|
||||
|
||||
namespace parquet { namespace format {
|
||||
|
||||
const parquetConstants g_parquet_constants;
|
||||
|
||||
parquetConstants::parquetConstants() {
|
||||
}
|
||||
|
||||
}} // namespace
|
||||
|
24
contrib/arrow-cmake/cpp/src/parquet/parquet_constants.h
Normal file
24
contrib/arrow-cmake/cpp/src/parquet/parquet_constants.h
Normal file
@ -0,0 +1,24 @@
|
||||
/**
|
||||
* Autogenerated by Thrift Compiler (0.11.0)
|
||||
*
|
||||
* DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING
|
||||
* @generated
|
||||
*/
|
||||
#ifndef parquet_CONSTANTS_H
|
||||
#define parquet_CONSTANTS_H
|
||||
|
||||
#include "parquet_types.h"
|
||||
|
||||
namespace parquet { namespace format {
|
||||
|
||||
class parquetConstants {
|
||||
public:
|
||||
parquetConstants();
|
||||
|
||||
};
|
||||
|
||||
extern const parquetConstants g_parquet_constants;
|
||||
|
||||
}} // namespace
|
||||
|
||||
#endif
|
6501
contrib/arrow-cmake/cpp/src/parquet/parquet_types.cpp
Normal file
6501
contrib/arrow-cmake/cpp/src/parquet/parquet_types.cpp
Normal file
File diff suppressed because it is too large
Load Diff
2523
contrib/arrow-cmake/cpp/src/parquet/parquet_types.h
Normal file
2523
contrib/arrow-cmake/cpp/src/parquet/parquet_types.h
Normal file
File diff suppressed because it is too large
Load Diff
24
contrib/arrow-cmake/cpp/src/parquet/parquet_version.h
Normal file
24
contrib/arrow-cmake/cpp/src/parquet/parquet_version.h
Normal file
@ -0,0 +1,24 @@
|
||||
// Licensed to the Apache Software Foundation (ASF) under one
|
||||
// or more contributor license agreements. See the NOTICE file
|
||||
// distributed with this work for additional information
|
||||
// regarding copyright ownership. The ASF licenses this file
|
||||
// to you under the Apache License, Version 2.0 (the
|
||||
// "License"); you may not use this file except in compliance
|
||||
// with the License. You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing,
|
||||
// software distributed under the License is distributed on an
|
||||
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
// KIND, either express or implied. See the License for the
|
||||
// specific language governing permissions and limitations
|
||||
// under the License.
|
||||
|
||||
#ifndef PARQUET_VERSION_H
|
||||
#define PARQUET_VERSION_H
|
||||
|
||||
// define the parquet created by version
|
||||
#define CREATED_BY_VERSION "parquet-cpp version 1.5.1-SNAPSHOT"
|
||||
|
||||
#endif // PARQUET_VERSION_H
|
11
contrib/arrow-cmake/cpp/src/thrift/stdcxx.h
Normal file
11
contrib/arrow-cmake/cpp/src/thrift/stdcxx.h
Normal file
@ -0,0 +1,11 @@
|
||||
/*
|
||||
|
||||
Temporary hack caused by 17355425 - THRIFT-4735: Remove Qt4 build support
|
||||
|
||||
Fixes
|
||||
../contrib/arrow-cmake/cpp/src/parquet/parquet_types.h:18:10: fatal error: thrift/stdcxx.h: No such file or directory
|
||||
#include <thrift/stdcxx.h>
|
||||
|
||||
Delete me.
|
||||
|
||||
*/
|
2
contrib/boost
vendored
2
contrib/boost
vendored
@ -1 +1 @@
|
||||
Subproject commit 6883b40449f378019aec792f9983ce3afc7ff16e
|
||||
Subproject commit 6a96e8b59f76148eb8ad54a9d15259f8ce84c606
|
@ -10,49 +10,30 @@
|
||||
# Important boost patch: 094c18b
|
||||
#
|
||||
|
||||
set (LIBRARY_DIR ${ClickHouse_SOURCE_DIR}/contrib/boost)
|
||||
include(${ClickHouse_SOURCE_DIR}/cmake/dbms_glob_sources.cmake)
|
||||
|
||||
if (NOT MSVC)
|
||||
set(LIBRARY_DIR ${ClickHouse_SOURCE_DIR}/contrib/boost)
|
||||
|
||||
if(NOT MSVC)
|
||||
add_definitions(-Wno-unused-variable -Wno-deprecated-declarations)
|
||||
endif ()
|
||||
endif()
|
||||
|
||||
add_library(boost_program_options_internal ${LINK_MODE}
|
||||
${LIBRARY_DIR}/libs/program_options/src/cmdline.cpp
|
||||
${LIBRARY_DIR}/libs/program_options/src/config_file.cpp
|
||||
${LIBRARY_DIR}/libs/program_options/src/convert.cpp
|
||||
${LIBRARY_DIR}/libs/program_options/src/options_description.cpp
|
||||
${LIBRARY_DIR}/libs/program_options/src/parsers.cpp
|
||||
${LIBRARY_DIR}/libs/program_options/src/positional_options.cpp
|
||||
${LIBRARY_DIR}/libs/program_options/src/split.cpp
|
||||
${LIBRARY_DIR}/libs/program_options/src/utf8_codecvt_facet.cpp
|
||||
${LIBRARY_DIR}/libs/program_options/src/value_semantic.cpp
|
||||
${LIBRARY_DIR}/libs/program_options/src/variables_map.cpp
|
||||
${LIBRARY_DIR}/libs/program_options/src/winmain.cpp)
|
||||
macro(add_boost_lib lib_name)
|
||||
add_headers_and_sources(boost_${lib_name} ${LIBRARY_DIR}/libs/${lib_name}/src)
|
||||
add_library(boost_${lib_name}_internal ${LINK_MODE} ${boost_${lib_name}_sources})
|
||||
target_include_directories(boost_${lib_name}_internal SYSTEM BEFORE PUBLIC ${Boost_INCLUDE_DIRS})
|
||||
target_compile_definitions(boost_${lib_name}_internal PUBLIC BOOST_SYSTEM_NO_DEPRECATED)
|
||||
endmacro()
|
||||
|
||||
add_library(boost_filesystem_internal ${LINK_MODE}
|
||||
${LIBRARY_DIR}/libs/filesystem/src/codecvt_error_category.cpp
|
||||
${LIBRARY_DIR}/libs/filesystem/src/operations.cpp
|
||||
${LIBRARY_DIR}/libs/filesystem/src/path.cpp
|
||||
${LIBRARY_DIR}/libs/filesystem/src/path_traits.cpp
|
||||
${LIBRARY_DIR}/libs/filesystem/src/portability.cpp
|
||||
${LIBRARY_DIR}/libs/filesystem/src/unique_path.cpp
|
||||
${LIBRARY_DIR}/libs/filesystem/src/utf8_codecvt_facet.cpp
|
||||
${LIBRARY_DIR}/libs/filesystem/src/windows_file_codecvt.cpp)
|
||||
add_boost_lib(system)
|
||||
|
||||
add_library(boost_system_internal ${LINK_MODE}
|
||||
${LIBRARY_DIR}/libs/system/src/error_code.cpp)
|
||||
add_boost_lib(program_options)
|
||||
|
||||
add_library(boost_random_internal ${LINK_MODE}
|
||||
${LIBRARY_DIR}/libs/random/src/random_device.cpp)
|
||||
add_boost_lib(filesystem)
|
||||
target_link_libraries(boost_filesystem_internal PRIVATE boost_system_internal)
|
||||
|
||||
target_link_libraries (boost_filesystem_internal PUBLIC boost_system_internal)
|
||||
#add_boost_lib(random)
|
||||
|
||||
target_include_directories (boost_program_options_internal SYSTEM BEFORE PUBLIC ${Boost_INCLUDE_DIRS})
|
||||
target_include_directories (boost_filesystem_internal SYSTEM BEFORE PUBLIC ${Boost_INCLUDE_DIRS})
|
||||
target_include_directories (boost_system_internal SYSTEM BEFORE PUBLIC ${Boost_INCLUDE_DIRS})
|
||||
target_include_directories (boost_random_internal SYSTEM BEFORE PUBLIC ${Boost_INCLUDE_DIRS})
|
||||
|
||||
target_compile_definitions (boost_program_options_internal PUBLIC BOOST_SYSTEM_NO_DEPRECATED)
|
||||
target_compile_definitions (boost_filesystem_internal PUBLIC BOOST_SYSTEM_NO_DEPRECATED)
|
||||
target_compile_definitions (boost_system_internal PUBLIC BOOST_SYSTEM_NO_DEPRECATED)
|
||||
target_compile_definitions (boost_random_internal PUBLIC BOOST_SYSTEM_NO_DEPRECATED)
|
||||
if (USE_INTERNAL_PARQUET_LIBRARY)
|
||||
add_boost_lib(regex)
|
||||
endif()
|
||||
|
@ -51,10 +51,10 @@ set(SRCS
|
||||
${RDKAFKA_SOURCE_DIR}/snappy.c
|
||||
${RDKAFKA_SOURCE_DIR}/tinycthread.c
|
||||
${RDKAFKA_SOURCE_DIR}/tinycthread_extra.c
|
||||
${RDKAFKA_SOURCE_DIR}/xxhash.c
|
||||
${RDKAFKA_SOURCE_DIR}/lz4.c
|
||||
${RDKAFKA_SOURCE_DIR}/lz4frame.c
|
||||
${RDKAFKA_SOURCE_DIR}/lz4hc.c
|
||||
#${RDKAFKA_SOURCE_DIR}/xxhash.c
|
||||
#${RDKAFKA_SOURCE_DIR}/lz4.c
|
||||
#${RDKAFKA_SOURCE_DIR}/lz4frame.c
|
||||
#${RDKAFKA_SOURCE_DIR}/lz4hc.c
|
||||
${RDKAFKA_SOURCE_DIR}/rdgz.c
|
||||
)
|
||||
|
||||
|
@ -3,6 +3,10 @@ SET(LIBRARY_DIR ${ClickHouse_SOURCE_DIR}/contrib/lz4/lib)
|
||||
add_library (lz4
|
||||
${LIBRARY_DIR}/lz4.c
|
||||
${LIBRARY_DIR}/lz4hc.c
|
||||
${LIBRARY_DIR}/lz4frame.c
|
||||
${LIBRARY_DIR}/lz4frame.h
|
||||
${LIBRARY_DIR}/xxhash.c
|
||||
${LIBRARY_DIR}/xxhash.h
|
||||
|
||||
${LIBRARY_DIR}/lz4.h
|
||||
${LIBRARY_DIR}/lz4hc.h
|
||||
|
1
contrib/snappy
vendored
Submodule
1
contrib/snappy
vendored
Submodule
@ -0,0 +1 @@
|
||||
Subproject commit 3f194acb57e0487531c96b97af61dcbd025a78a3
|
1
contrib/thrift
vendored
Submodule
1
contrib/thrift
vendored
Submodule
@ -0,0 +1 @@
|
||||
Subproject commit 010ccf0a0c7023fea0f6bf4e4078ebdff7e61982
|
@ -295,9 +295,9 @@ if (USE_RDKAFKA)
|
||||
endif ()
|
||||
|
||||
if (USE_PARQUET)
|
||||
target_link_libraries(dbms ${PARQUET_LIBRARY} ${ARROW_LIBRARY})
|
||||
if (NOT USE_INTERNAL_PARQUET_LIBRARY)
|
||||
target_include_directories (dbms BEFORE PRIVATE ${PARQUET_INCLUDE_DIR} ${ARROW_INCLUDE_DIR})
|
||||
target_link_libraries(dbms PRIVATE ${PARQUET_LIBRARY})
|
||||
if (NOT USE_INTERNAL_PARQUET_LIBRARY OR USE_INTERNAL_PARQUET_LIBRARY_NATIVE_CMAKE)
|
||||
target_include_directories (dbms SYSTEM BEFORE PRIVATE ${PARQUET_INCLUDE_DIR} ${ARROW_INCLUDE_DIR})
|
||||
endif ()
|
||||
endif ()
|
||||
|
||||
|
@ -478,7 +478,7 @@ int Server::main(const std::vector<std::string> & /*args*/)
|
||||
global_context->setFormatSchemaPath(format_schema_path.path());
|
||||
format_schema_path.createDirectories();
|
||||
|
||||
LOG_INFO(log, "Loading metadata.");
|
||||
LOG_INFO(log, "Loading metadata from " + path);
|
||||
try
|
||||
{
|
||||
loadMetadataSystem(*global_context);
|
||||
|
@ -14,6 +14,8 @@
|
||||
#cmakedefine01 USE_POCO_MONGODB
|
||||
#cmakedefine01 USE_POCO_NETSSL
|
||||
#cmakedefine01 USE_BASE64
|
||||
#cmakedefine01 USE_SNAPPY
|
||||
#cmakedefine01 USE_PARQUET
|
||||
#cmakedefine01 USE_HDFS
|
||||
#cmakedefine01 USE_XXHASH
|
||||
#cmakedefine01 USE_INTERNAL_LLVM_LIBRARY
|
||||
|
@ -1,336 +0,0 @@
|
||||
#include <algorithm>
|
||||
#include <iterator>
|
||||
#include <vector>
|
||||
|
||||
// TODO: clear includes
|
||||
#include <Core/ColumnWithTypeAndName.h>
|
||||
#include <Columns/IColumn.h>
|
||||
#include <Columns/ColumnsNumber.h>
|
||||
#include <Columns/ColumnNullable.h>
|
||||
#include <Columns/ColumnString.h>
|
||||
#include <common/DateLUTImpl.h>
|
||||
#include <DataStreams/ParquetBlockInputStream.h>
|
||||
#include <DataTypes/DataTypeFactory.h>
|
||||
#include <DataTypes/DataTypeDate.h>
|
||||
#include <DataTypes/DataTypeNullable.h>
|
||||
#include <IO/BufferBase.h>
|
||||
#include <IO/ReadBufferFromMemory.h>
|
||||
#include <IO/WriteHelpers.h>
|
||||
#include <ext/range.h>
|
||||
|
||||
#include <arrow/buffer.h>
|
||||
#include <arrow/api.h>
|
||||
#include <arrow/io/api.h>
|
||||
#include <parquet/arrow/reader.h>
|
||||
#include <parquet/arrow/writer.h>
|
||||
#include <parquet/exception.h>
|
||||
|
||||
|
||||
#include <IO/copyData.h>
|
||||
#include <IO/WriteBufferFromString.h>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
ParquetBlockInputStream::ParquetBlockInputStream(ReadBuffer & istr_, const Block & header_)
|
||||
: istr(istr_)
|
||||
, header(header_)
|
||||
{
|
||||
}
|
||||
|
||||
Block ParquetBlockInputStream::getHeader() const
|
||||
{
|
||||
return header;
|
||||
}
|
||||
|
||||
/// Inserts numeric data right into internal column data to reduce an overhead
|
||||
template <typename NumericType>
|
||||
void ParquetBlockInputStream::fillColumnWithNumericData(std::shared_ptr<arrow::Column> & arrow_column, MutableColumnPtr & internal_column)
|
||||
{
|
||||
PaddedPODArray<NumericType> & column_data = static_cast<ColumnVector<NumericType> &>(*internal_column).getData();
|
||||
column_data.reserve(arrow_column->length());
|
||||
|
||||
for (size_t chunk_i = 0; chunk_i != static_cast<size_t>(arrow_column->data()->num_chunks()); ++chunk_i)
|
||||
{
|
||||
std::shared_ptr<arrow::Array> chunk = arrow_column->data()->chunk(chunk_i);
|
||||
/// buffers[0] is a null bitmap and buffers[1] are actual values
|
||||
std::shared_ptr<arrow::Buffer> buffer = chunk->data()->buffers[1];
|
||||
|
||||
const NumericType * raw_data = reinterpret_cast<const NumericType *>(buffer->data());
|
||||
column_data.insert_assume_reserved(raw_data, raw_data + chunk->length());
|
||||
}
|
||||
}
|
||||
|
||||
/// Inserts chars and offsets right into internal column data to reduce an overhead.
|
||||
/// Internal offsets are shifted by one to the right in comparison with Arrow ones. So the last offset should map to the end of all chars.
|
||||
/// Also internal strings are null terminated.
|
||||
void ParquetBlockInputStream::fillColumnWithStringData(std::shared_ptr<arrow::Column> & arrow_column, MutableColumnPtr & internal_column)
|
||||
{
|
||||
PaddedPODArray<UInt8> & column_chars_t = static_cast<ColumnString &>(*internal_column).getChars();
|
||||
PaddedPODArray<UInt64> & column_offsets = static_cast<ColumnString &>(*internal_column).getOffsets();
|
||||
|
||||
size_t chars_t_size = 0;
|
||||
for (size_t chunk_i = 0; chunk_i != static_cast<size_t>(arrow_column->data()->num_chunks()); ++chunk_i)
|
||||
{
|
||||
arrow::BinaryArray & chunk = static_cast<arrow::BinaryArray &>(*(arrow_column->data()->chunk(chunk_i)));
|
||||
const size_t chunk_length = chunk.length();
|
||||
|
||||
chars_t_size += chunk.value_offset(chunk_length - 1) + chunk.value_length(chunk_length - 1);
|
||||
chars_t_size += chunk_length; /// additional space for null bytes
|
||||
}
|
||||
|
||||
column_chars_t.reserve(chars_t_size);
|
||||
column_offsets.reserve(arrow_column->length());
|
||||
|
||||
for (size_t chunk_i = 0; chunk_i != static_cast<size_t>(arrow_column->data()->num_chunks()); ++chunk_i)
|
||||
{
|
||||
arrow::BinaryArray & chunk = static_cast<arrow::BinaryArray &>(*(arrow_column->data()->chunk(chunk_i)));
|
||||
std::shared_ptr<arrow::Buffer> buffer = chunk.value_data();
|
||||
const size_t chunk_length = chunk.length();
|
||||
|
||||
for (size_t offset_i = 0; offset_i != chunk_length; ++offset_i)
|
||||
{
|
||||
const UInt8 * raw_data = buffer->data() + chunk.value_offset(offset_i);
|
||||
column_chars_t.insert_assume_reserved(raw_data, raw_data + chunk.value_length(offset_i));
|
||||
column_chars_t.emplace_back('\0');
|
||||
|
||||
column_offsets.emplace_back(column_chars_t.size());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void ParquetBlockInputStream::fillColumnWithBooleanData(std::shared_ptr<arrow::Column> & arrow_column, MutableColumnPtr & internal_column)
|
||||
{
|
||||
PaddedPODArray<UInt8> & column_data = static_cast<ColumnVector<UInt8> &>(*internal_column).getData();
|
||||
column_data.resize(arrow_column->length());
|
||||
|
||||
for (size_t chunk_i = 0; chunk_i != static_cast<size_t>(arrow_column->data()->num_chunks()); ++chunk_i)
|
||||
{
|
||||
arrow::BooleanArray & chunk = static_cast<arrow::BooleanArray &>(*(arrow_column->data()->chunk(chunk_i)));
|
||||
/// buffers[0] is a null bitmap and buffers[1] are actual values
|
||||
std::shared_ptr<arrow::Buffer> buffer = chunk.data()->buffers[1];
|
||||
|
||||
for (size_t bool_i = 0; bool_i != static_cast<size_t>(chunk.length()); ++bool_i)
|
||||
column_data[bool_i] = chunk.Value(bool_i);
|
||||
}
|
||||
}
|
||||
|
||||
/// Arrow stores Parquet::DATE in Int32, while ClickHouse stores Date in UInt16. Therefore, it should be checked before saving
|
||||
void ParquetBlockInputStream::fillColumnWithDate32Data(std::shared_ptr<arrow::Column> & arrow_column, MutableColumnPtr & internal_column)
|
||||
{
|
||||
PaddedPODArray<UInt16> & column_data = static_cast<ColumnVector<UInt16> &>(*internal_column).getData();
|
||||
column_data.reserve(arrow_column->length());
|
||||
|
||||
for (size_t chunk_i = 0; chunk_i != static_cast<size_t>(arrow_column->data()->num_chunks()); ++chunk_i)
|
||||
{
|
||||
arrow::Date32Array & chunk = static_cast<arrow::Date32Array &>(*(arrow_column->data()->chunk(chunk_i)));
|
||||
|
||||
for (size_t value_i = 0; value_i != static_cast<size_t>(chunk.length()); ++value_i)
|
||||
{
|
||||
UInt32 days_num = static_cast<UInt32>(chunk.Value(value_i));
|
||||
if (days_num > DATE_LUT_MAX_DAY_NUM)
|
||||
{
|
||||
// TODO: will it rollback correctly?
|
||||
throw Exception(
|
||||
"Input value " + std::to_string(days_num) + " of a column \"" + arrow_column->name() + "\" is greater than "
|
||||
"max allowed Date value, which is " + std::to_string(DATE_LUT_MAX_DAY_NUM)
|
||||
);
|
||||
}
|
||||
|
||||
column_data.emplace_back(days_num);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Creates a null bytemap from arrow's null bitmap
|
||||
void ParquetBlockInputStream::fillByteMapFromArrowColumn(std::shared_ptr<arrow::Column> & arrow_column, MutableColumnPtr & bytemap)
|
||||
{
|
||||
PaddedPODArray<UInt8> & bytemap_data = static_cast<ColumnVector<UInt8> &>(*bytemap).getData();
|
||||
bytemap_data.reserve(arrow_column->length());
|
||||
|
||||
for (size_t chunk_i = 0; chunk_i != static_cast<size_t>(arrow_column->data()->num_chunks()); ++chunk_i)
|
||||
{
|
||||
std::shared_ptr<arrow::Array> chunk = arrow_column->data()->chunk(chunk_i);
|
||||
|
||||
for (size_t value_i = 0; value_i != static_cast<size_t>(chunk->length()); ++value_i)
|
||||
bytemap_data.emplace_back(chunk->IsNull(value_i));
|
||||
}
|
||||
}
|
||||
|
||||
#define FOR_ARROW_NUMERIC_TYPES(M) \
|
||||
M(arrow::Type::UINT8, UInt8) \
|
||||
M(arrow::Type::INT8, Int8) \
|
||||
M(arrow::Type::UINT16, UInt16) \
|
||||
M(arrow::Type::INT16, Int16) \
|
||||
M(arrow::Type::UINT32, UInt32) \
|
||||
M(arrow::Type::INT32, Int32) \
|
||||
M(arrow::Type::UINT64, UInt64) \
|
||||
M(arrow::Type::INT64, Int64) \
|
||||
M(arrow::Type::FLOAT, Float32) \
|
||||
M(arrow::Type::DOUBLE, Float64)
|
||||
|
||||
|
||||
using NameToColumnPtr = std::unordered_map<std::string, std::shared_ptr<arrow::Column>>;
|
||||
|
||||
const std::unordered_map<arrow::Type::type, std::shared_ptr<IDataType>> ParquetBlockInputStream::arrow_type_to_internal_type = {
|
||||
{arrow::Type::UINT8, std::make_shared<DataTypeUInt8>()},
|
||||
{arrow::Type::INT8, std::make_shared<DataTypeInt8>()},
|
||||
{arrow::Type::UINT16, std::make_shared<DataTypeUInt16>()},
|
||||
{arrow::Type::INT16, std::make_shared<DataTypeInt16>()},
|
||||
{arrow::Type::UINT32, std::make_shared<DataTypeUInt32>()},
|
||||
{arrow::Type::INT32, std::make_shared<DataTypeInt32>()},
|
||||
{arrow::Type::UINT64, std::make_shared<DataTypeUInt64>()},
|
||||
{arrow::Type::INT64, std::make_shared<DataTypeInt64>()},
|
||||
{arrow::Type::FLOAT, std::make_shared<DataTypeFloat32>()},
|
||||
{arrow::Type::DOUBLE, std::make_shared<DataTypeFloat64>()},
|
||||
|
||||
{arrow::Type::BOOL, std::make_shared<DataTypeUInt8>()},
|
||||
{arrow::Type::DATE32, std::make_shared<DataTypeDate>()},
|
||||
|
||||
{arrow::Type::STRING, std::make_shared<DataTypeString>()}//,
|
||||
// TODO: add other types that are convertable to internal ones:
|
||||
// 0. ENUM?
|
||||
// 1. UUID -> String
|
||||
// 2. JSON -> String
|
||||
};
|
||||
|
||||
|
||||
Block ParquetBlockInputStream::readImpl()
|
||||
{
|
||||
Block res;
|
||||
|
||||
if (istr.eof())
|
||||
return res;
|
||||
|
||||
std::string file_data;
|
||||
|
||||
{
|
||||
WriteBufferFromString file_buffer(file_data);
|
||||
copyData(istr, file_buffer);
|
||||
}
|
||||
|
||||
arrow::Buffer buffer(file_data);
|
||||
// TODO: maybe use parquet::RandomAccessSource?
|
||||
auto reader = parquet::ParquetFileReader::Open(std::make_shared<::arrow::io::BufferReader>(buffer));
|
||||
parquet::arrow::FileReader filereader(::arrow::default_memory_pool(), std::move(reader));
|
||||
std::shared_ptr<arrow::Table> table;
|
||||
|
||||
// TODO: also catch a ParquetException thrown by filereader?
|
||||
arrow::Status read_status = filereader.ReadTable(&table);
|
||||
if (!read_status.ok())
|
||||
throw Exception("Error while reading parquet data: " + read_status.ToString()/*, ErrorCodes::TODO*/);
|
||||
|
||||
if (0 == table->num_rows())
|
||||
throw Exception("Empty table in input data"/*, ErrorCodes::TODO*/);
|
||||
|
||||
if (header.columns() > static_cast<size_t>(table->num_columns()))
|
||||
// TODO: What if some columns were not presented? Insert NULLs? What if a column is not nullable?
|
||||
throw Exception("Number of columns is less than the table has" /*, ErrorCodes::TODO*/);
|
||||
|
||||
|
||||
NameToColumnPtr name_to_column_ptr;
|
||||
for (size_t i = 0; i != static_cast<size_t>(table->num_columns()); ++i)
|
||||
{
|
||||
std::shared_ptr<arrow::Column> arrow_column = table->column(i);
|
||||
name_to_column_ptr[arrow_column->name()] = arrow_column;
|
||||
}
|
||||
|
||||
for (size_t column_i = 0; column_i != header.columns(); ++column_i)
|
||||
{
|
||||
ColumnWithTypeAndName header_column = header.getByPosition(column_i);
|
||||
|
||||
if (name_to_column_ptr.find(header_column.name) == name_to_column_ptr.end())
|
||||
// TODO: What if some columns were not presented? Insert NULLs? What if a column is not nullable?
|
||||
throw Exception("Column \"" + header_column.name + "\" is not presented in input data" /*, ErrorCodes::TODO*/);
|
||||
|
||||
std::shared_ptr<arrow::Column> arrow_column = name_to_column_ptr[header_column.name];
|
||||
arrow::Type::type arrow_type = arrow_column->type()->id();
|
||||
|
||||
if (arrow_type_to_internal_type.find(arrow_type) == arrow_type_to_internal_type.end())
|
||||
{
|
||||
throw Exception(
|
||||
"The type \"" + arrow_column->type()->name() + "\" of an input column \"" + arrow_column->name() + "\""
|
||||
" is not supported for conversion from a Parquet data format"
|
||||
/*, ErrorCodes::TODO*/
|
||||
);
|
||||
}
|
||||
|
||||
// TODO: check if a column is const?
|
||||
if (!header_column.type->isNullable() && arrow_column->null_count())
|
||||
{
|
||||
throw Exception("Can not insert NULL data into non-nullable column \"" + header_column.name + "\""/*, ErrorCodes::TODO*/);
|
||||
}
|
||||
|
||||
const bool target_column_is_nullable = header_column.type->isNullable() || arrow_column->null_count();
|
||||
|
||||
const DataTypePtr internal_nested_type = arrow_type_to_internal_type.at(arrow_type);
|
||||
const DataTypePtr internal_type = target_column_is_nullable ? makeNullable(internal_nested_type) : internal_nested_type;
|
||||
const std::string internal_nested_type_name = internal_nested_type->getName();
|
||||
|
||||
const DataTypePtr column_nested_type =
|
||||
header_column.type->isNullable()
|
||||
? static_cast<const DataTypeNullable *>(header_column.type.get())->getNestedType()
|
||||
: header_column.type;
|
||||
|
||||
|
||||
const DataTypePtr column_type = header_column.type;
|
||||
const std::string column_nested_type_name = column_nested_type->getName();
|
||||
|
||||
// TODO: can it be done with typeid_cast?
|
||||
if (internal_nested_type_name != column_nested_type_name)
|
||||
{
|
||||
throw Exception(
|
||||
"Input data type \"" + internal_nested_type_name + "\" for a column \"" + header_column.name + "\""
|
||||
" is not compatible with a column type \"" + column_nested_type_name + "\""/*, ErrorCodes::TODO*/
|
||||
);
|
||||
}
|
||||
|
||||
ColumnWithTypeAndName column;
|
||||
column.name = header_column.name;
|
||||
column.type = internal_type;
|
||||
|
||||
/// Data
|
||||
MutableColumnPtr read_column = internal_nested_type->createColumn();
|
||||
|
||||
switch (arrow_type)
|
||||
{
|
||||
case arrow::Type::STRING:
|
||||
fillColumnWithStringData(arrow_column, read_column);
|
||||
break;
|
||||
case arrow::Type::BOOL:
|
||||
fillColumnWithBooleanData(arrow_column, read_column);
|
||||
break;
|
||||
case arrow::Type::DATE32:
|
||||
fillColumnWithDate32Data(arrow_column, read_column);
|
||||
break;
|
||||
#define DISPATCH(ARROW_NUMERIC_TYPE, CPP_NUMERIC_TYPE) \
|
||||
case ARROW_NUMERIC_TYPE: \
|
||||
fillColumnWithNumericData<CPP_NUMERIC_TYPE>(arrow_column, read_column); \
|
||||
break;
|
||||
|
||||
FOR_ARROW_NUMERIC_TYPES(DISPATCH)
|
||||
#undef DISPATCH
|
||||
// TODO: support TIMESTAMP_MICROS and TIMESTAMP_MILLIS with truncated micro- and milliseconds?
|
||||
// TODO: read JSON as a string?
|
||||
// TODO: read UUID as a string?
|
||||
default:
|
||||
throw Exception("Unsupported parquet type \"" + arrow_column->type()->name() + "\""/*, ErrorCodes::TODO*/);
|
||||
}
|
||||
|
||||
if (column.type->isNullable())
|
||||
{
|
||||
MutableColumnPtr null_bytemap = DataTypeUInt8().createColumn();
|
||||
fillByteMapFromArrowColumn(arrow_column, null_bytemap);
|
||||
column.column = ColumnNullable::create(std::move(read_column), std::move(null_bytemap));
|
||||
}
|
||||
else
|
||||
{
|
||||
column.column = std::move(read_column);
|
||||
}
|
||||
res.insert(std::move(column));
|
||||
}
|
||||
|
||||
return res;
|
||||
}
|
||||
|
||||
}
|
@ -1,43 +0,0 @@
|
||||
#pragma once
|
||||
|
||||
#include <Columns/IColumn.h>
|
||||
#include <Columns/ColumnVector.h>
|
||||
#include <DataStreams/IProfilingBlockInputStream.h>
|
||||
#include <DataTypes/DataTypesNumber.h>
|
||||
#include <DataTypes/DataTypeString.h>
|
||||
#include <DataTypes/DataTypeDate.h>
|
||||
// TODO: refine includes
|
||||
#include <arrow/api.h>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
class ParquetBlockInputStream : public IProfilingBlockInputStream
|
||||
{
|
||||
public:
|
||||
ParquetBlockInputStream(ReadBuffer & istr_, const Block & header_);
|
||||
|
||||
String getName() const override { return "Parquet"; }
|
||||
Block getHeader() const override;
|
||||
|
||||
protected:
|
||||
Block readImpl() override;
|
||||
|
||||
private:
|
||||
ReadBuffer & istr;
|
||||
Block header;
|
||||
|
||||
static void fillColumnWithStringData(std::shared_ptr<arrow::Column> & arrow_column, MutableColumnPtr & internal_column);
|
||||
static void fillColumnWithBooleanData(std::shared_ptr<arrow::Column> & arrow_column, MutableColumnPtr & internal_column);
|
||||
static void fillColumnWithDate32Data(std::shared_ptr<arrow::Column> & arrow_column, MutableColumnPtr & internal_column);
|
||||
template <typename NumericType>
|
||||
static void fillColumnWithNumericData(std::shared_ptr<arrow::Column> & arrow_column, MutableColumnPtr & internal_column);
|
||||
|
||||
static void fillByteMapFromArrowColumn(std::shared_ptr<arrow::Column> & arrow_column, MutableColumnPtr & bytemap);
|
||||
|
||||
static const std::unordered_map<arrow::Type::type, std::shared_ptr<IDataType>> arrow_type_to_internal_type;
|
||||
|
||||
// TODO: check that this class implements every part of its parent
|
||||
};
|
||||
|
||||
}
|
@ -1,271 +0,0 @@
|
||||
// TODO: clean includes
|
||||
#include <Columns/ColumnNullable.h>
|
||||
#include <Columns/ColumnString.h>
|
||||
#include <Columns/ColumnVector.h>
|
||||
#include <Columns/ColumnsNumber.h>
|
||||
#include <Core/ColumnWithTypeAndName.h>
|
||||
#include <DataTypes/DataTypeNullable.h>
|
||||
#include <IO/WriteHelpers.h>
|
||||
|
||||
#include <arrow/api.h>
|
||||
#include <arrow/io/api.h>
|
||||
#include <parquet/arrow/writer.h>
|
||||
#include <parquet/util/memory.h>
|
||||
#include <parquet/exception.h>
|
||||
|
||||
#include <DataStreams/ParquetBlockOutputStream.h>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
ParquetBlockOutputStream::ParquetBlockOutputStream(WriteBuffer & ostr_, const Block & header_)
|
||||
: ostr(ostr_)
|
||||
, header(header_)
|
||||
{
|
||||
}
|
||||
|
||||
void ParquetBlockOutputStream::flush()
|
||||
{
|
||||
ostr.next();
|
||||
}
|
||||
|
||||
void checkAppendStatus(arrow::Status & append_status, const std::string & column_name)
|
||||
{
|
||||
if (!append_status.ok())
|
||||
{
|
||||
throw Exception(
|
||||
"Error while building a parquet column \"" + column_name + "\": " + append_status.ToString()/*,
|
||||
ErrorCodes::TODO*/
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
void checkFinishStatus(arrow::Status & finish_status, const std::string & column_name)
|
||||
{
|
||||
if (!finish_status.ok())
|
||||
{
|
||||
throw Exception(
|
||||
"Error while writing a parquet column \"" + column_name + "\": " + finish_status.ToString()/*,
|
||||
ErrorCodes::TODO*/
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
template <typename NumericType, typename ArrowBuilderType>
|
||||
void ParquetBlockOutputStream::fillArrowArrayWithNumericColumnData(
|
||||
ColumnPtr write_column,
|
||||
std::shared_ptr<arrow::Array> & arrow_array,
|
||||
const PaddedPODArray<UInt8> * null_bytemap
|
||||
) {
|
||||
const PaddedPODArray<NumericType> & internal_data = static_cast<const ColumnVector<NumericType> &>(*write_column).getData();
|
||||
ArrowBuilderType numeric_builder;
|
||||
arrow::Status append_status;
|
||||
|
||||
const UInt8 * arrow_null_bytemap_raw_ptr = nullptr;
|
||||
PaddedPODArray<UInt8> arrow_null_bytemap;
|
||||
if (null_bytemap)
|
||||
{
|
||||
/// Invert values since Arrow interprets 1 as a non-null value, while CH as a null
|
||||
arrow_null_bytemap.reserve(null_bytemap->size());
|
||||
for (size_t i = 0; i != null_bytemap->size(); ++i)
|
||||
arrow_null_bytemap.emplace_back(1 ^ (*null_bytemap)[i]);
|
||||
|
||||
arrow_null_bytemap_raw_ptr = arrow_null_bytemap.data();
|
||||
}
|
||||
|
||||
append_status = numeric_builder.AppendValues(internal_data.data(), internal_data.size(), arrow_null_bytemap_raw_ptr);
|
||||
checkAppendStatus(append_status, write_column->getName());
|
||||
|
||||
arrow::Status finish_status = numeric_builder.Finish(&arrow_array);
|
||||
checkFinishStatus(finish_status, write_column->getName());
|
||||
}
|
||||
|
||||
void ParquetBlockOutputStream::fillArrowArrayWithStringColumnData(
|
||||
ColumnPtr write_column,
|
||||
std::shared_ptr<arrow::Array> & arrow_array,
|
||||
const PaddedPODArray<UInt8> * null_bytemap
|
||||
) {
|
||||
const ColumnString & internal_column = static_cast<const ColumnString &>(*write_column);
|
||||
arrow::StringBuilder string_builder;
|
||||
arrow::Status append_status;
|
||||
|
||||
for (size_t string_i = 0; string_i != internal_column.size(); ++string_i)
|
||||
{
|
||||
if (null_bytemap && (*null_bytemap)[string_i])
|
||||
{
|
||||
append_status = string_builder.AppendNull();
|
||||
}
|
||||
else
|
||||
{
|
||||
StringRef string_ref = internal_column.getDataAt(string_i);
|
||||
append_status = string_builder.Append(string_ref.data, string_ref.size);
|
||||
}
|
||||
|
||||
checkAppendStatus(append_status, write_column->getName());
|
||||
}
|
||||
|
||||
arrow::Status finish_status = string_builder.Finish(&arrow_array);
|
||||
checkFinishStatus(finish_status, write_column->getName());
|
||||
}
|
||||
|
||||
void ParquetBlockOutputStream::fillArrowArrayWithDateColumnData(
|
||||
ColumnPtr write_column,
|
||||
std::shared_ptr<arrow::Array> & arrow_array,
|
||||
const PaddedPODArray<UInt8> * null_bytemap
|
||||
) {
|
||||
const PaddedPODArray<UInt16> & internal_data = static_cast<const ColumnVector<UInt16> &>(*write_column).getData();
|
||||
arrow::Date32Builder date32_builder;
|
||||
arrow::Status append_status;
|
||||
|
||||
for (size_t value_i = 0; value_i != internal_data.size(); ++value_i)
|
||||
{
|
||||
if (null_bytemap && (*null_bytemap)[value_i])
|
||||
append_status = date32_builder.AppendNull();
|
||||
else
|
||||
/// Implicitly converts UInt16 to Int32
|
||||
append_status = date32_builder.Append(internal_data[value_i]);
|
||||
|
||||
checkAppendStatus(append_status, write_column->getName());
|
||||
}
|
||||
|
||||
arrow::Status finish_status = date32_builder.Finish(&arrow_array);
|
||||
checkFinishStatus(finish_status, write_column->getName());
|
||||
}
|
||||
|
||||
#define FOR_INTERNAL_NUMERIC_TYPES(M) \
|
||||
M(UInt8, arrow::UInt8Builder) \
|
||||
M(Int8, arrow::Int8Builder) \
|
||||
M(UInt16, arrow::UInt16Builder) \
|
||||
M(Int16, arrow::Int16Builder) \
|
||||
M(UInt32, arrow::UInt32Builder) \
|
||||
M(Int32, arrow::Int32Builder) \
|
||||
M(UInt64, arrow::UInt64Builder) \
|
||||
M(Int64, arrow::Int64Builder) \
|
||||
M(Float32, arrow::FloatBuilder) \
|
||||
M(Float64, arrow::DoubleBuilder)
|
||||
|
||||
const std::unordered_map<String, std::shared_ptr<arrow::DataType>> ParquetBlockOutputStream::internal_type_to_arrow_type = {
|
||||
{"UInt8", arrow::uint8()},
|
||||
{"Int8", arrow::int8()},
|
||||
{"UInt16", arrow::uint16()},
|
||||
{"Int16", arrow::int16()},
|
||||
{"UInt32", arrow::uint32()},
|
||||
{"Int32", arrow::int32()},
|
||||
{"UInt64", arrow::uint64()},
|
||||
{"Int64", arrow::int64()},
|
||||
{"Float32", arrow::float32()},
|
||||
{"Float64", arrow::float64()},
|
||||
|
||||
{"Date", arrow::date32()},
|
||||
|
||||
// TODO: ClickHouse can actually store non-utf8 strings!
|
||||
{"String", arrow::utf8()}//,
|
||||
// TODO: add other types:
|
||||
// 1. FixedString
|
||||
// 2. DateTime
|
||||
};
|
||||
|
||||
const PaddedPODArray<UInt8> * extractNullBytemapPtr(ColumnPtr column)
|
||||
{
|
||||
ColumnPtr null_column = static_cast<const ColumnNullable &>(*column).getNullMapColumnPtr();
|
||||
const PaddedPODArray<UInt8> & null_bytemap = static_cast<const ColumnVector<UInt8> &>(*null_column).getData();
|
||||
return &null_bytemap;
|
||||
}
|
||||
|
||||
void ParquetBlockOutputStream::write(const Block & block)
|
||||
{
|
||||
block.checkNumberOfRows();
|
||||
|
||||
const size_t columns_num = block.columns();
|
||||
|
||||
/// For arrow::Schema and arrow::Table creation
|
||||
std::vector<std::shared_ptr<arrow::Field>> arrow_fields;
|
||||
std::vector<std::shared_ptr<arrow::Array>> arrow_arrays;
|
||||
arrow_fields.reserve(columns_num);
|
||||
arrow_arrays.reserve(columns_num);
|
||||
|
||||
for (size_t column_i = 0; column_i < columns_num; ++column_i)
|
||||
{
|
||||
// TODO: constructed every iteration
|
||||
const ColumnWithTypeAndName & column = block.safeGetByPosition(column_i);
|
||||
|
||||
const bool is_column_nullable = column.type->isNullable();
|
||||
const DataTypePtr column_nested_type =
|
||||
is_column_nullable
|
||||
? static_cast<const DataTypeNullable *>(column.type.get())->getNestedType()
|
||||
: column.type;
|
||||
const DataTypePtr column_type = column.type;
|
||||
// TODO: do not mix std::string and String
|
||||
const std::string column_nested_type_name = column_nested_type->getName();
|
||||
|
||||
if (internal_type_to_arrow_type.find(column_nested_type_name) == internal_type_to_arrow_type.end())
|
||||
{
|
||||
throw Exception(
|
||||
"The type \"" + column_nested_type_name + "\" of a column \"" + column.name + "\""
|
||||
" is not supported for conversion into a Parquet data format"
|
||||
/*, ErrorCodes::TODO*/
|
||||
);
|
||||
}
|
||||
|
||||
arrow_fields.emplace_back(new arrow::Field(
|
||||
column.name,
|
||||
internal_type_to_arrow_type.at(column_nested_type_name),
|
||||
is_column_nullable
|
||||
));
|
||||
std::shared_ptr<arrow::Array> arrow_array;
|
||||
|
||||
ColumnPtr nested_column = is_column_nullable ? static_cast<const ColumnNullable &>(*column.column).getNestedColumnPtr() : column.column;
|
||||
const PaddedPODArray<UInt8> * null_bytemap = is_column_nullable ? extractNullBytemapPtr(column.column) : nullptr;
|
||||
|
||||
// TODO: use typeid_cast
|
||||
if ("String" == column_nested_type_name)
|
||||
{
|
||||
fillArrowArrayWithStringColumnData(nested_column, arrow_array, null_bytemap);
|
||||
}
|
||||
else if ("Date" == column_nested_type_name)
|
||||
{
|
||||
fillArrowArrayWithDateColumnData(nested_column, arrow_array, null_bytemap);
|
||||
}
|
||||
#define DISPATCH(CPP_NUMERIC_TYPE, ARROW_BUILDER_TYPE) \
|
||||
else if (#CPP_NUMERIC_TYPE == column_nested_type_name) \
|
||||
{ \
|
||||
fillArrowArrayWithNumericColumnData<CPP_NUMERIC_TYPE, ARROW_BUILDER_TYPE>(nested_column, arrow_array, null_bytemap); \
|
||||
}
|
||||
|
||||
FOR_INTERNAL_NUMERIC_TYPES(DISPATCH)
|
||||
#undef DISPATCH
|
||||
// TODO: there are also internal types that are convertable to parquet/arrow once:
|
||||
// 1. FixedString(N)
|
||||
// 2. DateTime
|
||||
else
|
||||
{
|
||||
throw Exception(
|
||||
"Internal type \"" + column_nested_type_name + "\" of a column \"" + column.name + "\""
|
||||
" is not supported for conversion into a Parquet data format"/*, ErrorCodes::TODO*/
|
||||
);
|
||||
}
|
||||
|
||||
arrow_arrays.emplace_back(std::move(arrow_array));
|
||||
}
|
||||
|
||||
std::shared_ptr<arrow::Schema> arrow_schema = std::make_shared<arrow::Schema>(std::move(arrow_fields));
|
||||
std::shared_ptr<arrow::Table> arrow_table = arrow::Table::Make(arrow_schema, arrow_arrays);
|
||||
|
||||
// TODO: get rid of extra copying
|
||||
std::shared_ptr<parquet::InMemoryOutputStream> sink = std::make_shared<parquet::InMemoryOutputStream>();
|
||||
|
||||
// TODO: calculate row_group_size depending on a number of rows and table size
|
||||
|
||||
arrow::Status write_status = parquet::arrow::WriteTable(
|
||||
*arrow_table, arrow::default_memory_pool(), sink,
|
||||
/* row_group_size = */arrow_table->num_rows(), parquet::default_writer_properties(),
|
||||
parquet::arrow::default_arrow_writer_properties()
|
||||
);
|
||||
if (!write_status.ok())
|
||||
throw Exception("Error while writing a table: " + write_status.ToString()/*, ErrorCodes::TODO*/);
|
||||
|
||||
std::shared_ptr<arrow::Buffer> table_buffer = sink->GetBuffer();
|
||||
writeString(reinterpret_cast<const char *>(table_buffer->data()), table_buffer->size(), ostr);
|
||||
}
|
||||
|
||||
};
|
@ -1,36 +0,0 @@
|
||||
#pragma once
|
||||
|
||||
#include <DataStreams/IBlockOutputStream.h>
|
||||
#include <DataTypes/DataTypesNumber.h>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
|
||||
class ParquetBlockOutputStream : public IBlockOutputStream
|
||||
{
|
||||
public:
|
||||
ParquetBlockOutputStream(WriteBuffer & ostr_, const Block & header_);
|
||||
|
||||
Block getHeader() const override { return header; }
|
||||
void write(const Block & block) override;
|
||||
void flush() override;
|
||||
|
||||
String getContentType() const override { return "application/octet-stream"; }
|
||||
|
||||
private:
|
||||
WriteBuffer & ostr;
|
||||
Block header;
|
||||
|
||||
static void fillArrowArrayWithDateColumnData(ColumnPtr write_column, std::shared_ptr<arrow::Array> & arrow_array,
|
||||
const PaddedPODArray<UInt8> * null_bytemap);
|
||||
static void fillArrowArrayWithStringColumnData(ColumnPtr write_column, std::shared_ptr<arrow::Array> & arrow_array,
|
||||
const PaddedPODArray<UInt8> * null_bytemap);
|
||||
template <typename NumericType, typename ArrowBuilderType>
|
||||
static void fillArrowArrayWithNumericColumnData(ColumnPtr write_column, std::shared_ptr<arrow::Array> & arrow_array,
|
||||
const PaddedPODArray<UInt8> * null_bytemap);
|
||||
|
||||
static const std::unordered_map<String, std::shared_ptr<arrow::DataType>> internal_type_to_arrow_type;
|
||||
};
|
||||
|
||||
}
|
@ -80,9 +80,9 @@ public:
|
||||
scale(scale_)
|
||||
{
|
||||
if (unlikely(precision < 1 || precision > maxPrecision()))
|
||||
throw Exception("Precision is out of bounds", ErrorCodes::ARGUMENT_OUT_OF_BOUND);
|
||||
throw Exception("Precision " + std::to_string(precision) + " is out of bounds", ErrorCodes::ARGUMENT_OUT_OF_BOUND);
|
||||
if (unlikely(scale < 0 || static_cast<UInt32>(scale) > maxPrecision()))
|
||||
throw Exception("Scale is out of bounds", ErrorCodes::ARGUMENT_OUT_OF_BOUND);
|
||||
throw Exception("Scale " + std::to_string(scale) + " is out of bounds", ErrorCodes::ARGUMENT_OUT_OF_BOUND);
|
||||
}
|
||||
|
||||
const char * getFamilyName() const override { return "Decimal"; }
|
||||
|
@ -69,6 +69,7 @@ BlockOutputStreamPtr FormatFactory::getOutput(const String & name, WriteBuffer &
|
||||
format_settings.pretty.max_column_pad_width = settings.output_format_pretty_max_column_pad_width;
|
||||
format_settings.pretty.color = settings.output_format_pretty_color;
|
||||
format_settings.write_statistics = settings.output_format_write_statistics;
|
||||
format_settings.parquet.row_group_size = settings.output_format_parquet_row_group_size;
|
||||
|
||||
/** Materialization is needed, because formats can use the functions `IDataType`,
|
||||
* which only work with full columns.
|
||||
@ -111,6 +112,8 @@ void registerInputFormatTSKV(FormatFactory & factory);
|
||||
void registerOutputFormatTSKV(FormatFactory & factory);
|
||||
void registerInputFormatJSONEachRow(FormatFactory & factory);
|
||||
void registerOutputFormatJSONEachRow(FormatFactory & factory);
|
||||
void registerInputFormatParquet(FormatFactory & factory);
|
||||
void registerOutputFormatParquet(FormatFactory & factory);
|
||||
void registerOutputFormatProtobuf(FormatFactory & factory);
|
||||
|
||||
/// Output only (presentational) formats.
|
||||
@ -149,6 +152,8 @@ FormatFactory::FormatFactory()
|
||||
registerOutputFormatJSONEachRow(*this);
|
||||
registerOutputFormatProtobuf(*this);
|
||||
registerInputFormatCapnProto(*this);
|
||||
registerInputFormatParquet(*this);
|
||||
registerOutputFormatParquet(*this);
|
||||
|
||||
registerOutputFormatPretty(*this);
|
||||
registerOutputFormatPrettyCompact(*this);
|
||||
|
@ -61,6 +61,12 @@ struct FormatSettings
|
||||
|
||||
UInt64 input_allow_errors_num = 0;
|
||||
Float32 input_allow_errors_ratio = 0;
|
||||
|
||||
struct Parquet
|
||||
{
|
||||
UInt64 row_group_size = 1000000;
|
||||
} parquet;
|
||||
|
||||
};
|
||||
|
||||
}
|
||||
|
@ -4,10 +4,6 @@
|
||||
#include <IO/WriteBuffer.h>
|
||||
#include <IO/WriteHelpers.h>
|
||||
|
||||
|
||||
#include <Core/iostream_debug_helpers.h>
|
||||
|
||||
|
||||
namespace DB
|
||||
{
|
||||
ODBCDriver2BlockOutputStream::ODBCDriver2BlockOutputStream(
|
||||
|
497
dbms/src/Formats/ParquetBlockInputStream.cpp
Normal file
497
dbms/src/Formats/ParquetBlockInputStream.cpp
Normal file
@ -0,0 +1,497 @@
|
||||
#include <Common/config.h>
|
||||
|
||||
#if USE_PARQUET
|
||||
# include "ParquetBlockInputStream.h"
|
||||
|
||||
# include <algorithm>
|
||||
# include <iterator>
|
||||
# include <vector>
|
||||
// TODO: clear includes
|
||||
# include <Columns/ColumnNullable.h>
|
||||
# include <Columns/ColumnString.h>
|
||||
# include <Columns/ColumnsNumber.h>
|
||||
# include <Columns/IColumn.h>
|
||||
# include <Core/ColumnWithTypeAndName.h>
|
||||
# include <DataTypes/DataTypeDate.h>
|
||||
# include <DataTypes/DataTypeDateTime.h>
|
||||
# include <DataTypes/DataTypeFactory.h>
|
||||
# include <DataTypes/DataTypeNullable.h>
|
||||
# include <DataTypes/DataTypeString.h>
|
||||
# include <DataTypes/DataTypesDecimal.h>
|
||||
# include <DataTypes/DataTypesNumber.h>
|
||||
# include <Formats/FormatFactory.h>
|
||||
# include <IO/BufferBase.h>
|
||||
# include <IO/ReadBufferFromMemory.h>
|
||||
# include <IO/WriteBufferFromString.h>
|
||||
# include <IO/WriteHelpers.h>
|
||||
# include <IO/copyData.h>
|
||||
# include <Interpreters/castColumn.h>
|
||||
# include <common/DateLUTImpl.h>
|
||||
# include <ext/range.h>
|
||||
# include <arrow/api.h>
|
||||
//# include <arrow/buffer.h>
|
||||
//# include <arrow/io/api.h>
|
||||
# include <parquet/arrow/reader.h>
|
||||
//# include <parquet/arrow/writer.h>
|
||||
//# include <parquet/exception.h>
|
||||
# include <parquet/file_reader.h>
|
||||
|
||||
# include <Core/iostream_debug_helpers.h> // REMOVE ME
|
||||
|
||||
namespace DB
|
||||
{
|
||||
namespace ErrorCodes
|
||||
{
|
||||
extern const int UNKNOWN_TYPE;
|
||||
extern const int VALUE_IS_OUT_OF_RANGE_OF_DATA_TYPE;
|
||||
extern const int CANNOT_READ_ALL_DATA;
|
||||
extern const int EMPTY_DATA_PASSED;
|
||||
extern const int SIZES_OF_COLUMNS_DOESNT_MATCH;
|
||||
extern const int CANNOT_CONVERT_TYPE;
|
||||
extern const int CANNOT_INSERT_NULL_IN_ORDINARY_COLUMN;
|
||||
extern const int THERE_IS_NO_COLUMN;
|
||||
}
|
||||
|
||||
ParquetBlockInputStream::ParquetBlockInputStream(ReadBuffer & istr_, const Block & header_, const Context & context_)
|
||||
: istr{istr_}, header{header_}, context{context_}
|
||||
{
|
||||
}
|
||||
|
||||
Block ParquetBlockInputStream::getHeader() const
|
||||
{
|
||||
return header;
|
||||
}
|
||||
|
||||
/// Inserts numeric data right into internal column data to reduce an overhead
|
||||
template <typename NumericType, typename VectorType = ColumnVector<NumericType>>
|
||||
void fillColumnWithNumericData(std::shared_ptr<arrow::Column> & arrow_column, MutableColumnPtr & internal_column)
|
||||
{
|
||||
auto & column_data = static_cast<VectorType &>(*internal_column).getData();
|
||||
column_data.reserve(arrow_column->length());
|
||||
|
||||
for (size_t chunk_i = 0, num_chunks = static_cast<size_t>(arrow_column->data()->num_chunks()); chunk_i < num_chunks; ++chunk_i)
|
||||
{
|
||||
std::shared_ptr<arrow::Array> chunk = arrow_column->data()->chunk(chunk_i);
|
||||
/// buffers[0] is a null bitmap and buffers[1] are actual values
|
||||
std::shared_ptr<arrow::Buffer> buffer = chunk->data()->buffers[1];
|
||||
|
||||
const auto * raw_data = reinterpret_cast<const NumericType *>(buffer->data());
|
||||
column_data.insert_assume_reserved(raw_data, raw_data + chunk->length());
|
||||
}
|
||||
}
|
||||
|
||||
/// Inserts chars and offsets right into internal column data to reduce an overhead.
|
||||
/// Internal offsets are shifted by one to the right in comparison with Arrow ones. So the last offset should map to the end of all chars.
|
||||
/// Also internal strings are null terminated.
|
||||
void fillColumnWithStringData(std::shared_ptr<arrow::Column> & arrow_column, MutableColumnPtr & internal_column)
|
||||
{
|
||||
PaddedPODArray<UInt8> & column_chars_t = static_cast<ColumnString &>(*internal_column).getChars();
|
||||
PaddedPODArray<UInt64> & column_offsets = static_cast<ColumnString &>(*internal_column).getOffsets();
|
||||
|
||||
size_t chars_t_size = 0;
|
||||
for (size_t chunk_i = 0, num_chunks = static_cast<size_t>(arrow_column->data()->num_chunks()); chunk_i < num_chunks; ++chunk_i)
|
||||
{
|
||||
arrow::BinaryArray & chunk = static_cast<arrow::BinaryArray &>(*(arrow_column->data()->chunk(chunk_i)));
|
||||
const size_t chunk_length = chunk.length();
|
||||
|
||||
chars_t_size += chunk.value_offset(chunk_length - 1) + chunk.value_length(chunk_length - 1);
|
||||
chars_t_size += chunk_length; /// additional space for null bytes
|
||||
}
|
||||
|
||||
column_chars_t.reserve(chars_t_size);
|
||||
column_offsets.reserve(arrow_column->length());
|
||||
|
||||
for (size_t chunk_i = 0, num_chunks = static_cast<size_t>(arrow_column->data()->num_chunks()); chunk_i < num_chunks; ++chunk_i)
|
||||
{
|
||||
arrow::BinaryArray & chunk = static_cast<arrow::BinaryArray &>(*(arrow_column->data()->chunk(chunk_i)));
|
||||
std::shared_ptr<arrow::Buffer> buffer = chunk.value_data();
|
||||
const size_t chunk_length = chunk.length();
|
||||
|
||||
for (size_t offset_i = 0; offset_i != chunk_length; ++offset_i)
|
||||
{
|
||||
if (!chunk.IsNull(offset_i) && buffer)
|
||||
{
|
||||
const UInt8 * raw_data = buffer->data() + chunk.value_offset(offset_i);
|
||||
column_chars_t.insert_assume_reserved(raw_data, raw_data + chunk.value_length(offset_i));
|
||||
}
|
||||
column_chars_t.emplace_back('\0');
|
||||
|
||||
column_offsets.emplace_back(column_chars_t.size());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void fillColumnWithBooleanData(std::shared_ptr<arrow::Column> & arrow_column, MutableColumnPtr & internal_column)
|
||||
{
|
||||
auto & column_data = static_cast<ColumnVector<UInt8> &>(*internal_column).getData();
|
||||
column_data.resize(arrow_column->length());
|
||||
|
||||
for (size_t chunk_i = 0, num_chunks = static_cast<size_t>(arrow_column->data()->num_chunks()); chunk_i < num_chunks; ++chunk_i)
|
||||
{
|
||||
arrow::BooleanArray & chunk = static_cast<arrow::BooleanArray &>(*(arrow_column->data()->chunk(chunk_i)));
|
||||
/// buffers[0] is a null bitmap and buffers[1] are actual values
|
||||
std::shared_ptr<arrow::Buffer> buffer = chunk.data()->buffers[1];
|
||||
|
||||
for (size_t bool_i = 0; bool_i != static_cast<size_t>(chunk.length()); ++bool_i)
|
||||
column_data[bool_i] = chunk.Value(bool_i);
|
||||
}
|
||||
}
|
||||
|
||||
/// Arrow stores Parquet::DATE in Int32, while ClickHouse stores Date in UInt16. Therefore, it should be checked before saving
|
||||
void fillColumnWithDate32Data(std::shared_ptr<arrow::Column> & arrow_column, MutableColumnPtr & internal_column)
|
||||
{
|
||||
PaddedPODArray<UInt16> & column_data = static_cast<ColumnVector<UInt16> &>(*internal_column).getData();
|
||||
column_data.reserve(arrow_column->length());
|
||||
|
||||
for (size_t chunk_i = 0, num_chunks = static_cast<size_t>(arrow_column->data()->num_chunks()); chunk_i < num_chunks; ++chunk_i)
|
||||
{
|
||||
arrow::Date32Array & chunk = static_cast<arrow::Date32Array &>(*(arrow_column->data()->chunk(chunk_i)));
|
||||
|
||||
for (size_t value_i = 0, length = static_cast<size_t>(chunk.length()); value_i < length; ++value_i)
|
||||
{
|
||||
UInt32 days_num = static_cast<UInt32>(chunk.Value(value_i));
|
||||
if (days_num > DATE_LUT_MAX_DAY_NUM)
|
||||
{
|
||||
// TODO: will it rollback correctly?
|
||||
throw Exception{"Input value " + std::to_string(days_num) + " of a column \"" + arrow_column->name()
|
||||
+ "\" is greater than "
|
||||
"max allowed Date value, which is "
|
||||
+ std::to_string(DATE_LUT_MAX_DAY_NUM),
|
||||
ErrorCodes::VALUE_IS_OUT_OF_RANGE_OF_DATA_TYPE};
|
||||
}
|
||||
|
||||
column_data.emplace_back(days_num);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Arrow stores Parquet::DATETIME in Int64, while ClickHouse stores DateTime in UInt32. Therefore, it should be checked before saving
|
||||
void fillColumnWithDate64Data(std::shared_ptr<arrow::Column> & arrow_column, MutableColumnPtr & internal_column)
|
||||
{
|
||||
auto & column_data = static_cast<ColumnVector<UInt32> &>(*internal_column).getData();
|
||||
column_data.reserve(arrow_column->length());
|
||||
|
||||
for (size_t chunk_i = 0, num_chunks = static_cast<size_t>(arrow_column->data()->num_chunks()); chunk_i < num_chunks; ++chunk_i)
|
||||
{
|
||||
auto & chunk = static_cast<arrow::Date64Array &>(*(arrow_column->data()->chunk(chunk_i)));
|
||||
for (size_t value_i = 0, length = static_cast<size_t>(chunk.length()); value_i < length; ++value_i)
|
||||
{
|
||||
auto timestamp = static_cast<UInt32>(chunk.Value(value_i) / 1000); // Always? in ms
|
||||
column_data.emplace_back(timestamp);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void fillColumnWithTimestampData(std::shared_ptr<arrow::Column> & arrow_column, MutableColumnPtr & internal_column)
|
||||
{
|
||||
auto & column_data = static_cast<ColumnVector<UInt32> &>(*internal_column).getData();
|
||||
column_data.reserve(arrow_column->length());
|
||||
|
||||
for (size_t chunk_i = 0, num_chunks = static_cast<size_t>(arrow_column->data()->num_chunks()); chunk_i < num_chunks; ++chunk_i)
|
||||
{
|
||||
auto & chunk = static_cast<arrow::TimestampArray &>(*(arrow_column->data()->chunk(chunk_i)));
|
||||
const auto & type = static_cast<const ::arrow::TimestampType &>(*chunk.type());
|
||||
|
||||
UInt32 divide = 1;
|
||||
const auto unit = type.unit();
|
||||
switch (unit)
|
||||
{
|
||||
case arrow::TimeUnit::SECOND:
|
||||
divide = 1;
|
||||
break;
|
||||
case arrow::TimeUnit::MILLI:
|
||||
divide = 1000;
|
||||
break;
|
||||
case arrow::TimeUnit::MICRO:
|
||||
divide = 1000000;
|
||||
break;
|
||||
case arrow::TimeUnit::NANO:
|
||||
divide = 1000000000;
|
||||
break;
|
||||
}
|
||||
|
||||
for (size_t value_i = 0, length = static_cast<size_t>(chunk.length()); value_i < length; ++value_i)
|
||||
{
|
||||
auto timestamp = static_cast<UInt32>(chunk.Value(value_i) / divide); // ms! TODO: check other 's' 'ns' ...
|
||||
column_data.emplace_back(timestamp);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void fillColumnWithDecimalData(std::shared_ptr<arrow::Column> & arrow_column, MutableColumnPtr & internal_column)
|
||||
{
|
||||
auto & column = static_cast<ColumnDecimal<Decimal128> &>(*internal_column);
|
||||
auto & column_data = column.getData();
|
||||
column_data.reserve(arrow_column->length());
|
||||
|
||||
for (size_t chunk_i = 0, num_chunks = static_cast<size_t>(arrow_column->data()->num_chunks()); chunk_i < num_chunks; ++chunk_i)
|
||||
{
|
||||
auto & chunk = static_cast<arrow::DecimalArray &>(*(arrow_column->data()->chunk(chunk_i)));
|
||||
for (size_t value_i = 0, length = static_cast<size_t>(chunk.length()); value_i < length; ++value_i)
|
||||
{
|
||||
column_data.emplace_back(chunk.IsNull(value_i) ? Decimal128(0) : *reinterpret_cast<const Decimal128 *>(chunk.Value(value_i))); // TODO: copy column
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Creates a null bytemap from arrow's null bitmap
|
||||
void fillByteMapFromArrowColumn(std::shared_ptr<arrow::Column> & arrow_column, MutableColumnPtr & bytemap)
|
||||
{
|
||||
PaddedPODArray<UInt8> & bytemap_data = static_cast<ColumnVector<UInt8> &>(*bytemap).getData();
|
||||
bytemap_data.reserve(arrow_column->length());
|
||||
|
||||
for (size_t chunk_i = 0; chunk_i != static_cast<size_t>(arrow_column->data()->num_chunks()); ++chunk_i)
|
||||
{
|
||||
std::shared_ptr<arrow::Array> chunk = arrow_column->data()->chunk(chunk_i);
|
||||
|
||||
for (size_t value_i = 0; value_i != static_cast<size_t>(chunk->length()); ++value_i)
|
||||
bytemap_data.emplace_back(chunk->IsNull(value_i));
|
||||
}
|
||||
}
|
||||
|
||||
# define FOR_ARROW_NUMERIC_TYPES(M) \
|
||||
M(arrow::Type::UINT8, UInt8) \
|
||||
M(arrow::Type::INT8, Int8) \
|
||||
M(arrow::Type::UINT16, UInt16) \
|
||||
M(arrow::Type::INT16, Int16) \
|
||||
M(arrow::Type::UINT32, UInt32) \
|
||||
M(arrow::Type::INT32, Int32) \
|
||||
M(arrow::Type::UINT64, UInt64) \
|
||||
M(arrow::Type::INT64, Int64) \
|
||||
M(arrow::Type::FLOAT, Float32) \
|
||||
M(arrow::Type::DOUBLE, Float64)
|
||||
//M(arrow::Type::HALF_FLOAT, Float32) // TODO
|
||||
|
||||
|
||||
using NameToColumnPtr = std::unordered_map<std::string, std::shared_ptr<arrow::Column>>;
|
||||
|
||||
const std::unordered_map<arrow::Type::type, std::shared_ptr<IDataType>> arrow_type_to_internal_type = {
|
||||
//{arrow::Type::DECIMAL, std::make_shared<DataTypeDecimal>()},
|
||||
{arrow::Type::UINT8, std::make_shared<DataTypeUInt8>()},
|
||||
{arrow::Type::INT8, std::make_shared<DataTypeInt8>()},
|
||||
{arrow::Type::UINT16, std::make_shared<DataTypeUInt16>()},
|
||||
{arrow::Type::INT16, std::make_shared<DataTypeInt16>()},
|
||||
{arrow::Type::UINT32, std::make_shared<DataTypeUInt32>()},
|
||||
{arrow::Type::INT32, std::make_shared<DataTypeInt32>()},
|
||||
{arrow::Type::UINT64, std::make_shared<DataTypeUInt64>()},
|
||||
{arrow::Type::INT64, std::make_shared<DataTypeInt64>()},
|
||||
{arrow::Type::HALF_FLOAT, std::make_shared<DataTypeFloat32>()},
|
||||
{arrow::Type::FLOAT, std::make_shared<DataTypeFloat32>()},
|
||||
{arrow::Type::DOUBLE, std::make_shared<DataTypeFloat64>()},
|
||||
|
||||
{arrow::Type::BOOL, std::make_shared<DataTypeUInt8>()},
|
||||
//{arrow::Type::DATE32, std::make_shared<DataTypeDate>()},
|
||||
{arrow::Type::DATE32, std::make_shared<DataTypeDate>()},
|
||||
//{arrow::Type::DATE32, std::make_shared<DataTypeDateTime>()},
|
||||
{arrow::Type::DATE64, std::make_shared<DataTypeDateTime>()},
|
||||
{arrow::Type::TIMESTAMP, std::make_shared<DataTypeDateTime>()},
|
||||
//{arrow::Type::TIME32, std::make_shared<DataTypeDateTime>()},
|
||||
|
||||
|
||||
{arrow::Type::STRING, std::make_shared<DataTypeString>()},
|
||||
{arrow::Type::BINARY, std::make_shared<DataTypeString>()},
|
||||
//{arrow::Type::FIXED_SIZE_BINARY, std::make_shared<DataTypeString>()},
|
||||
//{arrow::Type::UUID, std::make_shared<DataTypeString>()},
|
||||
|
||||
|
||||
// TODO: add other types that are convertable to internal ones:
|
||||
// 0. ENUM?
|
||||
// 1. UUID -> String
|
||||
// 2. JSON -> String
|
||||
// Full list of types: contrib/arrow/cpp/src/arrow/type.h
|
||||
};
|
||||
|
||||
|
||||
Block ParquetBlockInputStream::readImpl()
|
||||
{
|
||||
Block res;
|
||||
|
||||
if (!istr.eof())
|
||||
{
|
||||
/*
|
||||
First we load whole stream into string (its very bad and limiting .parquet file size to half? of RAM)
|
||||
Then producing blocks for every row_group (dont load big .parquet files with one row_group - it can eat x10+ RAM from .parquet file size)
|
||||
*/
|
||||
|
||||
if (row_group_current < row_group_total)
|
||||
throw Exception{"Got new data, but data from previous chunks not readed " + std::to_string(row_group_current) + "/" + std::to_string(row_group_total), ErrorCodes::CANNOT_READ_ALL_DATA};
|
||||
|
||||
file_data.clear();
|
||||
{
|
||||
WriteBufferFromString file_buffer(file_data);
|
||||
copyData(istr, file_buffer);
|
||||
}
|
||||
|
||||
buffer = std::make_unique<arrow::Buffer>(file_data);
|
||||
// TODO: maybe use parquet::RandomAccessSource?
|
||||
auto reader = parquet::ParquetFileReader::Open(std::make_shared<::arrow::io::BufferReader>(*buffer));
|
||||
file_reader = std::make_unique<parquet::arrow::FileReader>(::arrow::default_memory_pool(), std::move(reader));
|
||||
row_group_total = file_reader->num_row_groups();
|
||||
row_group_current = 0;
|
||||
}
|
||||
//DUMP(row_group_current, row_group_total);
|
||||
if (row_group_current >= row_group_total)
|
||||
return res;
|
||||
|
||||
// TODO: also catch a ParquetException thrown by filereader?
|
||||
//arrow::Status read_status = filereader.ReadTable(&table);
|
||||
std::shared_ptr<arrow::Table> table;
|
||||
arrow::Status read_status = file_reader->ReadRowGroup(row_group_current, &table);
|
||||
|
||||
if (!read_status.ok())
|
||||
throw Exception{"Error while reading parquet data: " + read_status.ToString(), ErrorCodes::CANNOT_READ_ALL_DATA};
|
||||
|
||||
if (0 == table->num_rows())
|
||||
throw Exception{"Empty table in input data", ErrorCodes::EMPTY_DATA_PASSED};
|
||||
|
||||
if (header.columns() > static_cast<size_t>(table->num_columns()))
|
||||
// TODO: What if some columns were not presented? Insert NULLs? What if a column is not nullable?
|
||||
throw Exception{"Number of columns is less than the table has", ErrorCodes::SIZES_OF_COLUMNS_DOESNT_MATCH};
|
||||
|
||||
++row_group_current;
|
||||
|
||||
NameToColumnPtr name_to_column_ptr;
|
||||
for (size_t i = 0, num_columns = static_cast<size_t>(table->num_columns()); i < num_columns; ++i)
|
||||
{
|
||||
std::shared_ptr<arrow::Column> arrow_column = table->column(i);
|
||||
name_to_column_ptr[arrow_column->name()] = arrow_column;
|
||||
}
|
||||
|
||||
for (size_t column_i = 0, columns = header.columns(); column_i < columns; ++column_i)
|
||||
{
|
||||
ColumnWithTypeAndName header_column = header.getByPosition(column_i);
|
||||
|
||||
if (name_to_column_ptr.find(header_column.name) == name_to_column_ptr.end())
|
||||
// TODO: What if some columns were not presented? Insert NULLs? What if a column is not nullable?
|
||||
throw Exception{"Column \"" + header_column.name + "\" is not presented in input data", ErrorCodes::THERE_IS_NO_COLUMN};
|
||||
|
||||
std::shared_ptr<arrow::Column> arrow_column = name_to_column_ptr[header_column.name];
|
||||
arrow::Type::type arrow_type = arrow_column->type()->id();
|
||||
|
||||
// TODO: check if a column is const?
|
||||
if (!header_column.type->isNullable() && arrow_column->null_count())
|
||||
{
|
||||
throw Exception{"Can not insert NULL data into non-nullable column \"" + header_column.name + "\"",
|
||||
ErrorCodes::CANNOT_INSERT_NULL_IN_ORDINARY_COLUMN};
|
||||
}
|
||||
|
||||
const bool target_column_is_nullable = header_column.type->isNullable() || arrow_column->null_count();
|
||||
|
||||
DataTypePtr internal_nested_type;
|
||||
|
||||
if (arrow_type == arrow::Type::DECIMAL)
|
||||
{
|
||||
const auto decimal_type = static_cast<arrow::DecimalType *>(arrow_column->type().get());
|
||||
internal_nested_type = std::make_shared<DataTypeDecimal<Decimal128>>(decimal_type->precision(), decimal_type->scale());
|
||||
}
|
||||
else if (arrow_type_to_internal_type.find(arrow_type) != arrow_type_to_internal_type.end())
|
||||
{
|
||||
internal_nested_type = arrow_type_to_internal_type.at(arrow_type);
|
||||
}
|
||||
else
|
||||
{
|
||||
throw Exception{"The type \"" + arrow_column->type()->name() + "\" of an input column \"" + arrow_column->name()
|
||||
+ "\" is not supported for conversion from a Parquet data format",
|
||||
ErrorCodes::CANNOT_CONVERT_TYPE};
|
||||
}
|
||||
|
||||
const DataTypePtr internal_type = target_column_is_nullable ? makeNullable(internal_nested_type) : internal_nested_type;
|
||||
const std::string internal_nested_type_name = internal_nested_type->getName();
|
||||
|
||||
const DataTypePtr column_nested_type = header_column.type->isNullable()
|
||||
? static_cast<const DataTypeNullable *>(header_column.type.get())->getNestedType()
|
||||
: header_column.type;
|
||||
|
||||
const DataTypePtr column_type = header_column.type;
|
||||
|
||||
const std::string column_nested_type_name = column_nested_type->getName();
|
||||
|
||||
ColumnWithTypeAndName column;
|
||||
column.name = header_column.name;
|
||||
column.type = internal_type;
|
||||
|
||||
/// Data
|
||||
MutableColumnPtr read_column = internal_nested_type->createColumn();
|
||||
switch (arrow_type)
|
||||
{
|
||||
case arrow::Type::STRING:
|
||||
case arrow::Type::BINARY:
|
||||
//case arrow::Type::FIXED_SIZE_BINARY:
|
||||
fillColumnWithStringData(arrow_column, read_column);
|
||||
break;
|
||||
case arrow::Type::BOOL:
|
||||
fillColumnWithBooleanData(arrow_column, read_column);
|
||||
break;
|
||||
case arrow::Type::DATE32:
|
||||
fillColumnWithDate32Data(arrow_column, read_column);
|
||||
break;
|
||||
case arrow::Type::DATE64:
|
||||
fillColumnWithDate64Data(arrow_column, read_column);
|
||||
break;
|
||||
case arrow::Type::TIMESTAMP:
|
||||
fillColumnWithTimestampData(arrow_column, read_column);
|
||||
break;
|
||||
case arrow::Type::DECIMAL:
|
||||
//fillColumnWithNumericData<Decimal128, ColumnDecimal<Decimal128>>(arrow_column, read_column); // Have problems with trash values under NULL, but faster
|
||||
fillColumnWithDecimalData(arrow_column, read_column /*, internal_nested_type*/);
|
||||
break;
|
||||
# define DISPATCH(ARROW_NUMERIC_TYPE, CPP_NUMERIC_TYPE) \
|
||||
case ARROW_NUMERIC_TYPE: \
|
||||
fillColumnWithNumericData<CPP_NUMERIC_TYPE>(arrow_column, read_column); \
|
||||
break;
|
||||
|
||||
FOR_ARROW_NUMERIC_TYPES(DISPATCH)
|
||||
# undef DISPATCH
|
||||
// TODO: support TIMESTAMP_MICROS and TIMESTAMP_MILLIS with truncated micro- and milliseconds?
|
||||
// TODO: read JSON as a string?
|
||||
// TODO: read UUID as a string?
|
||||
default:
|
||||
throw Exception{"Unsupported parquet type \"" + arrow_column->type()->name() + "\" of an input column \""
|
||||
+ arrow_column->name() + "\"",
|
||||
ErrorCodes::UNKNOWN_TYPE};
|
||||
}
|
||||
|
||||
if (column.type->isNullable())
|
||||
{
|
||||
MutableColumnPtr null_bytemap = DataTypeUInt8().createColumn();
|
||||
fillByteMapFromArrowColumn(arrow_column, null_bytemap);
|
||||
column.column = ColumnNullable::create(std::move(read_column), std::move(null_bytemap));
|
||||
}
|
||||
else
|
||||
{
|
||||
column.column = std::move(read_column);
|
||||
}
|
||||
|
||||
column.column = castColumn(column, column_type, context);
|
||||
column.type = column_type;
|
||||
|
||||
res.insert(std::move(column));
|
||||
}
|
||||
|
||||
return res;
|
||||
}
|
||||
|
||||
void registerInputFormatParquet(FormatFactory & factory)
|
||||
{
|
||||
factory.registerInputFormat(
|
||||
"Parquet",
|
||||
[](ReadBuffer & buf,
|
||||
const Block & sample,
|
||||
const Context & context,
|
||||
size_t /*max_block_size */,
|
||||
const FormatSettings & /* settings */) { return std::make_shared<ParquetBlockInputStream>(buf, sample, context); });
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
#else
|
||||
|
||||
namespace DB
|
||||
{
|
||||
class FormatFactory;
|
||||
void registerInputFormatParquet(FormatFactory &)
|
||||
{
|
||||
}
|
||||
}
|
||||
|
||||
#endif
|
46
dbms/src/Formats/ParquetBlockInputStream.h
Normal file
46
dbms/src/Formats/ParquetBlockInputStream.h
Normal file
@ -0,0 +1,46 @@
|
||||
#pragma once
|
||||
|
||||
#include <Common/config.h>
|
||||
#if USE_PARQUET
|
||||
# include <DataStreams/IBlockInputStream.h>
|
||||
//# include <parquet/file_reader.h>
|
||||
//# include <parquet/arrow/reader.h>
|
||||
//# include <arrow/buffer.h>
|
||||
|
||||
|
||||
namespace parquet { namespace arrow { class FileReader; } }
|
||||
namespace arrow { class Buffer; }
|
||||
|
||||
namespace DB
|
||||
{
|
||||
class Context;
|
||||
|
||||
class ParquetBlockInputStream : public IBlockInputStream
|
||||
{
|
||||
public:
|
||||
ParquetBlockInputStream(ReadBuffer & istr_, const Block & header_, const Context & context_);
|
||||
|
||||
String getName() const override { return "Parquet"; }
|
||||
Block getHeader() const override;
|
||||
|
||||
protected:
|
||||
Block readImpl() override;
|
||||
|
||||
private:
|
||||
ReadBuffer & istr;
|
||||
Block header;
|
||||
|
||||
// TODO: check that this class implements every part of its parent
|
||||
|
||||
const Context & context;
|
||||
|
||||
std::unique_ptr<parquet::arrow::FileReader> file_reader;
|
||||
std::string file_data;
|
||||
std::unique_ptr<arrow::Buffer> buffer;
|
||||
int row_group_total = 0;
|
||||
int row_group_current = 0;
|
||||
};
|
||||
|
||||
}
|
||||
|
||||
#endif
|
453
dbms/src/Formats/ParquetBlockOutputStream.cpp
Normal file
453
dbms/src/Formats/ParquetBlockOutputStream.cpp
Normal file
@ -0,0 +1,453 @@
|
||||
#include <Common/config.h>
|
||||
#if USE_PARQUET
|
||||
# include "ParquetBlockOutputStream.h"
|
||||
|
||||
// TODO: clean includes
|
||||
# include <Columns/ColumnDecimal.h>
|
||||
# include <Columns/ColumnFixedString.h>
|
||||
# include <Columns/ColumnNullable.h>
|
||||
# include <Columns/ColumnString.h>
|
||||
# include <Columns/ColumnVector.h>
|
||||
# include <Columns/ColumnsNumber.h>
|
||||
# include <Core/ColumnWithTypeAndName.h>
|
||||
# include <Core/callOnTypeIndex.h>
|
||||
# include <DataTypes/DataTypeDateTime.h>
|
||||
# include <DataTypes/DataTypeNullable.h>
|
||||
# include <DataTypes/DataTypesDecimal.h>
|
||||
# include <DataStreams/SquashingBlockOutputStream.h>
|
||||
# include <Formats/FormatFactory.h>
|
||||
# include <IO/WriteHelpers.h>
|
||||
# include <arrow/api.h>
|
||||
# include <arrow/io/api.h>
|
||||
# include <arrow/util/decimal.h>
|
||||
# include <parquet/arrow/writer.h>
|
||||
# include <parquet/exception.h>
|
||||
# include <parquet/util/memory.h>
|
||||
|
||||
# include <Core/iostream_debug_helpers.h> // REMOVE ME
|
||||
|
||||
namespace DB
|
||||
{
|
||||
namespace ErrorCodes
|
||||
{
|
||||
extern const int UNKNOWN_EXCEPTION;
|
||||
extern const int UNKNOWN_TYPE;
|
||||
}
|
||||
|
||||
ParquetBlockOutputStream::ParquetBlockOutputStream(WriteBuffer & ostr, const Block & header, const FormatSettings & format_settings) : ostr{ostr}, header{header}, format_settings{format_settings}
|
||||
{
|
||||
}
|
||||
|
||||
void ParquetBlockOutputStream::flush()
|
||||
{
|
||||
ostr.next();
|
||||
}
|
||||
|
||||
void checkStatus(arrow::Status & status, const std::string & column_name)
|
||||
{
|
||||
if (!status.ok())
|
||||
throw Exception{"Error with a parquet column \"" + column_name + "\": " + status.ToString(), ErrorCodes::UNKNOWN_EXCEPTION};
|
||||
}
|
||||
|
||||
template <typename NumericType, typename ArrowBuilderType>
|
||||
void fillArrowArrayWithNumericColumnData(
|
||||
ColumnPtr write_column, std::shared_ptr<arrow::Array> & arrow_array, const PaddedPODArray<UInt8> * null_bytemap)
|
||||
{
|
||||
const PaddedPODArray<NumericType> & internal_data = static_cast<const ColumnVector<NumericType> &>(*write_column).getData();
|
||||
ArrowBuilderType builder;
|
||||
arrow::Status status;
|
||||
|
||||
const UInt8 * arrow_null_bytemap_raw_ptr = nullptr;
|
||||
PaddedPODArray<UInt8> arrow_null_bytemap;
|
||||
if (null_bytemap)
|
||||
{
|
||||
/// Invert values since Arrow interprets 1 as a non-null value, while CH as a null
|
||||
arrow_null_bytemap.reserve(null_bytemap->size());
|
||||
for (size_t i = 0, size = null_bytemap->size(); i < size; ++i)
|
||||
arrow_null_bytemap.emplace_back(1 ^ (*null_bytemap)[i]);
|
||||
|
||||
arrow_null_bytemap_raw_ptr = arrow_null_bytemap.data();
|
||||
}
|
||||
|
||||
status = builder.AppendValues(internal_data.data(), internal_data.size(), arrow_null_bytemap_raw_ptr);
|
||||
checkStatus(status, write_column->getName());
|
||||
|
||||
status = builder.Finish(&arrow_array);
|
||||
checkStatus(status, write_column->getName());
|
||||
}
|
||||
|
||||
template <typename ColumnType>
|
||||
void fillArrowArrayWithStringColumnData(
|
||||
ColumnPtr write_column, std::shared_ptr<arrow::Array> & arrow_array, const PaddedPODArray<UInt8> * null_bytemap)
|
||||
{
|
||||
const auto & internal_column = static_cast<const ColumnType &>(*write_column);
|
||||
arrow::StringBuilder builder;
|
||||
arrow::Status status;
|
||||
|
||||
for (size_t string_i = 0, size = internal_column.size(); string_i < size; ++string_i)
|
||||
{
|
||||
if (null_bytemap && (*null_bytemap)[string_i])
|
||||
{
|
||||
status = builder.AppendNull();
|
||||
}
|
||||
else
|
||||
{
|
||||
StringRef string_ref = internal_column.getDataAt(string_i);
|
||||
status = builder.Append(string_ref.data, string_ref.size);
|
||||
}
|
||||
|
||||
checkStatus(status, write_column->getName());
|
||||
}
|
||||
|
||||
status = builder.Finish(&arrow_array);
|
||||
checkStatus(status, write_column->getName());
|
||||
}
|
||||
|
||||
void fillArrowArrayWithDateColumnData(
|
||||
ColumnPtr write_column, std::shared_ptr<arrow::Array> & arrow_array, const PaddedPODArray<UInt8> * null_bytemap)
|
||||
{
|
||||
const PaddedPODArray<UInt16> & internal_data = static_cast<const ColumnVector<UInt16> &>(*write_column).getData();
|
||||
//arrow::Date32Builder date_builder;
|
||||
arrow::UInt16Builder builder;
|
||||
arrow::Status status;
|
||||
|
||||
for (size_t value_i = 0, size = internal_data.size(); value_i < size; ++value_i)
|
||||
{
|
||||
if (null_bytemap && (*null_bytemap)[value_i])
|
||||
status = builder.AppendNull();
|
||||
else
|
||||
/// Implicitly converts UInt16 to Int32
|
||||
status = builder.Append(internal_data[value_i]);
|
||||
checkStatus(status, write_column->getName());
|
||||
}
|
||||
|
||||
status = builder.Finish(&arrow_array);
|
||||
checkStatus(status, write_column->getName());
|
||||
}
|
||||
|
||||
void fillArrowArrayWithDateTimeColumnData(
|
||||
ColumnPtr write_column, std::shared_ptr<arrow::Array> & arrow_array, const PaddedPODArray<UInt8> * null_bytemap)
|
||||
{
|
||||
auto & internal_data = static_cast<const ColumnVector<UInt32> &>(*write_column).getData();
|
||||
//arrow::Date64Builder builder;
|
||||
arrow::UInt32Builder builder;
|
||||
arrow::Status status;
|
||||
|
||||
for (size_t value_i = 0, size = internal_data.size(); value_i < size; ++value_i)
|
||||
{
|
||||
if (null_bytemap && (*null_bytemap)[value_i])
|
||||
status = builder.AppendNull();
|
||||
else
|
||||
/// Implicitly converts UInt16 to Int32
|
||||
//status = date_builder.Append(static_cast<int64_t>(internal_data[value_i]) * 1000); // now ms. TODO check other units
|
||||
status = builder.Append(internal_data[value_i]);
|
||||
|
||||
checkStatus(status, write_column->getName());
|
||||
}
|
||||
|
||||
status = builder.Finish(&arrow_array);
|
||||
checkStatus(status, write_column->getName());
|
||||
}
|
||||
|
||||
template <typename DataType>
|
||||
void fillArrowArrayWithDecimalColumnData(
|
||||
ColumnPtr write_column,
|
||||
std::shared_ptr<arrow::Array> & arrow_array,
|
||||
const PaddedPODArray<UInt8> * null_bytemap,
|
||||
const DataType * decimal_type)
|
||||
{
|
||||
const auto & column = static_cast<const typename DataType::ColumnType &>(*write_column);
|
||||
arrow::DecimalBuilder builder(arrow::decimal(decimal_type->getPrecision(), decimal_type->getScale()));
|
||||
arrow::Status status;
|
||||
|
||||
for (size_t value_i = 0, size = column.size(); value_i < size; ++value_i)
|
||||
{
|
||||
if (null_bytemap && (*null_bytemap)[value_i])
|
||||
status = builder.AppendNull();
|
||||
else
|
||||
status = builder.Append(
|
||||
arrow::Decimal128(reinterpret_cast<const uint8_t *>(&column.getElement(value_i).value))); // TODO: try copy column
|
||||
|
||||
checkStatus(status, write_column->getName());
|
||||
}
|
||||
status = builder.Finish(&arrow_array);
|
||||
checkStatus(status, write_column->getName());
|
||||
|
||||
/* TODO column copy
|
||||
const auto & internal_data = static_cast<const typename DataType::ColumnType &>(*write_column).getData();
|
||||
//ArrowBuilderType numeric_builder;
|
||||
arrow::DecimalBuilder builder(arrow::decimal(decimal_type->getPrecision(), decimal_type->getScale()));
|
||||
arrow::Status status;
|
||||
|
||||
const uint8_t * arrow_null_bytemap_raw_ptr = nullptr;
|
||||
PaddedPODArray<UInt8> arrow_null_bytemap;
|
||||
if (null_bytemap)
|
||||
{
|
||||
/// Invert values since Arrow interprets 1 as a non-null value, while CH as a null
|
||||
arrow_null_bytemap.reserve(null_bytemap->size());
|
||||
for (size_t i = 0, size = null_bytemap->size(); i < size; ++i)
|
||||
arrow_null_bytemap.emplace_back(1 ^ (*null_bytemap)[i]);
|
||||
|
||||
arrow_null_bytemap_raw_ptr = arrow_null_bytemap.data();
|
||||
}
|
||||
|
||||
status = builder.AppendValues(reinterpret_cast<const uint8_t*>(internal_data.data()), internal_data.size(), arrow_null_bytemap_raw_ptr);
|
||||
checkStatus(status, write_column->getName());
|
||||
|
||||
status = builder.Finish(&arrow_array);
|
||||
checkStatus(status, write_column->getName());
|
||||
*/
|
||||
}
|
||||
|
||||
# define FOR_INTERNAL_NUMERIC_TYPES(M) \
|
||||
M(UInt8, arrow::UInt8Builder) \
|
||||
M(Int8, arrow::Int8Builder) \
|
||||
M(UInt16, arrow::UInt16Builder) \
|
||||
M(Int16, arrow::Int16Builder) \
|
||||
M(UInt32, arrow::UInt32Builder) \
|
||||
M(Int32, arrow::Int32Builder) \
|
||||
M(UInt64, arrow::UInt64Builder) \
|
||||
M(Int64, arrow::Int64Builder) \
|
||||
M(Float32, arrow::FloatBuilder) \
|
||||
M(Float64, arrow::DoubleBuilder)
|
||||
|
||||
const std::unordered_map<String, std::shared_ptr<arrow::DataType>> internal_type_to_arrow_type = {
|
||||
{"UInt8", arrow::uint8()},
|
||||
{"Int8", arrow::int8()},
|
||||
{"UInt16", arrow::uint16()},
|
||||
{"Int16", arrow::int16()},
|
||||
{"UInt32", arrow::uint32()},
|
||||
{"Int32", arrow::int32()},
|
||||
{"UInt64", arrow::uint64()},
|
||||
{"Int64", arrow::int64()},
|
||||
{"Float32", arrow::float32()},
|
||||
{"Float64", arrow::float64()},
|
||||
|
||||
//{"Date", arrow::date64()},
|
||||
//{"Date", arrow::date32()},
|
||||
{"Date", arrow::uint16()}, // CHECK
|
||||
//{"DateTime", arrow::date64()}, // BUG! saves as date32
|
||||
{"DateTime", arrow::uint32()},
|
||||
|
||||
// TODO: ClickHouse can actually store non-utf8 strings!
|
||||
{"String", arrow::utf8()},
|
||||
{"FixedString", arrow::utf8()},
|
||||
};
|
||||
|
||||
const PaddedPODArray<UInt8> * extractNullBytemapPtr(ColumnPtr column)
|
||||
{
|
||||
ColumnPtr null_column = static_cast<const ColumnNullable &>(*column).getNullMapColumnPtr();
|
||||
const PaddedPODArray<UInt8> & null_bytemap = static_cast<const ColumnVector<UInt8> &>(*null_column).getData();
|
||||
return &null_bytemap;
|
||||
}
|
||||
|
||||
|
||||
class OstreamOutputStream : public parquet::OutputStream
|
||||
{
|
||||
public:
|
||||
explicit OstreamOutputStream(WriteBuffer & ostr_) : ostr(ostr_) {}
|
||||
virtual ~OstreamOutputStream() {}
|
||||
virtual void Close() {}
|
||||
virtual int64_t Tell() { return total_length; }
|
||||
virtual void Write(const uint8_t * data, int64_t length)
|
||||
{
|
||||
ostr.write(reinterpret_cast<const char *>(data), length);
|
||||
total_length += length;
|
||||
}
|
||||
|
||||
private:
|
||||
WriteBuffer & ostr;
|
||||
int64_t total_length = 0;
|
||||
|
||||
PARQUET_DISALLOW_COPY_AND_ASSIGN(OstreamOutputStream);
|
||||
};
|
||||
|
||||
|
||||
void ParquetBlockOutputStream::write(const Block & block)
|
||||
{
|
||||
block.checkNumberOfRows();
|
||||
|
||||
const size_t columns_num = block.columns();
|
||||
|
||||
/// For arrow::Schema and arrow::Table creation
|
||||
std::vector<std::shared_ptr<arrow::Field>> arrow_fields;
|
||||
std::vector<std::shared_ptr<arrow::Array>> arrow_arrays;
|
||||
arrow_fields.reserve(columns_num);
|
||||
arrow_arrays.reserve(columns_num);
|
||||
|
||||
for (size_t column_i = 0; column_i < columns_num; ++column_i)
|
||||
{
|
||||
// TODO: constructed every iteration
|
||||
const ColumnWithTypeAndName & column = block.safeGetByPosition(column_i);
|
||||
|
||||
const bool is_column_nullable = column.type->isNullable();
|
||||
const auto & column_nested_type
|
||||
= is_column_nullable ? static_cast<const DataTypeNullable *>(column.type.get())->getNestedType() : column.type;
|
||||
const std::string column_nested_type_name = column_nested_type->getFamilyName();
|
||||
|
||||
if (isDecimal(column_nested_type))
|
||||
{
|
||||
const auto add_decimal_field = [&](const auto & types) -> bool {
|
||||
using Types = std::decay_t<decltype(types)>;
|
||||
using ToDataType = typename Types::LeftType;
|
||||
|
||||
if constexpr (
|
||||
std::is_same_v<
|
||||
ToDataType,
|
||||
DataTypeDecimal<
|
||||
Decimal32>> || std::is_same_v<ToDataType, DataTypeDecimal<Decimal64>> || std::is_same_v<ToDataType, DataTypeDecimal<Decimal128>>)
|
||||
{
|
||||
const auto & decimal_type = static_cast<const ToDataType *>(column_nested_type.get());
|
||||
arrow_fields.emplace_back(std::make_shared<arrow::Field>(
|
||||
column.name, arrow::decimal(decimal_type->getPrecision(), decimal_type->getScale()), is_column_nullable));
|
||||
}
|
||||
|
||||
return false;
|
||||
};
|
||||
callOnIndexAndDataType<void>(column_nested_type->getTypeId(), add_decimal_field);
|
||||
}
|
||||
else
|
||||
{
|
||||
if (internal_type_to_arrow_type.find(column_nested_type_name) == internal_type_to_arrow_type.end())
|
||||
{
|
||||
throw Exception{"The type \"" + column_nested_type_name + "\" of a column \"" + column.name
|
||||
+ "\""
|
||||
" is not supported for conversion into a Parquet data format",
|
||||
ErrorCodes::UNKNOWN_TYPE};
|
||||
}
|
||||
|
||||
arrow_fields.emplace_back(std::make_shared<arrow::Field>(column.name, internal_type_to_arrow_type.at(column_nested_type_name), is_column_nullable));
|
||||
}
|
||||
|
||||
std::shared_ptr<arrow::Array> arrow_array;
|
||||
|
||||
ColumnPtr nested_column
|
||||
= is_column_nullable ? static_cast<const ColumnNullable &>(*column.column).getNestedColumnPtr() : column.column;
|
||||
const PaddedPODArray<UInt8> * null_bytemap = is_column_nullable ? extractNullBytemapPtr(column.column) : nullptr;
|
||||
|
||||
if ("String" == column_nested_type_name)
|
||||
{
|
||||
fillArrowArrayWithStringColumnData<ColumnString>(nested_column, arrow_array, null_bytemap);
|
||||
}
|
||||
else if ("FixedString" == column_nested_type_name)
|
||||
{
|
||||
fillArrowArrayWithStringColumnData<ColumnFixedString>(nested_column, arrow_array, null_bytemap);
|
||||
}
|
||||
else if ("Date" == column_nested_type_name)
|
||||
{
|
||||
fillArrowArrayWithDateColumnData(nested_column, arrow_array, null_bytemap);
|
||||
}
|
||||
else if ("DateTime" == column_nested_type_name)
|
||||
{
|
||||
fillArrowArrayWithDateTimeColumnData(nested_column, arrow_array, null_bytemap);
|
||||
}
|
||||
|
||||
else if (isDecimal(column_nested_type))
|
||||
{
|
||||
auto fill_decimal = [&](const auto & types) -> bool
|
||||
{
|
||||
using Types = std::decay_t<decltype(types)>;
|
||||
using ToDataType = typename Types::LeftType;
|
||||
if constexpr (
|
||||
std::is_same_v<
|
||||
ToDataType,
|
||||
DataTypeDecimal<
|
||||
Decimal32>> || std::is_same_v<ToDataType, DataTypeDecimal<Decimal64>> || std::is_same_v<ToDataType, DataTypeDecimal<Decimal128>>)
|
||||
{
|
||||
const auto & decimal_type = static_cast<const ToDataType *>(column_nested_type.get());
|
||||
fillArrowArrayWithDecimalColumnData(nested_column, arrow_array, null_bytemap, decimal_type);
|
||||
}
|
||||
return false;
|
||||
};
|
||||
callOnIndexAndDataType<void>(column_nested_type->getTypeId(), fill_decimal);
|
||||
}
|
||||
# define DISPATCH(CPP_NUMERIC_TYPE, ARROW_BUILDER_TYPE) \
|
||||
else if (#CPP_NUMERIC_TYPE == column_nested_type_name) \
|
||||
{ \
|
||||
fillArrowArrayWithNumericColumnData<CPP_NUMERIC_TYPE, ARROW_BUILDER_TYPE>(nested_column, arrow_array, null_bytemap); \
|
||||
}
|
||||
|
||||
FOR_INTERNAL_NUMERIC_TYPES(DISPATCH)
|
||||
# undef DISPATCH
|
||||
else
|
||||
{
|
||||
throw Exception{"Internal type \"" + column_nested_type_name + "\" of a column \"" + column.name
|
||||
+ "\""
|
||||
" is not supported for conversion into a Parquet data format",
|
||||
ErrorCodes::UNKNOWN_TYPE};
|
||||
}
|
||||
|
||||
|
||||
arrow_arrays.emplace_back(std::move(arrow_array));
|
||||
}
|
||||
|
||||
std::shared_ptr<arrow::Schema> arrow_schema = std::make_shared<arrow::Schema>(std::move(arrow_fields));
|
||||
|
||||
std::shared_ptr<arrow::Table> arrow_table = arrow::Table::Make(arrow_schema, arrow_arrays);
|
||||
|
||||
auto sink = std::make_shared<OstreamOutputStream>(ostr);
|
||||
|
||||
if (!file_writer)
|
||||
{
|
||||
|
||||
parquet::WriterProperties::Builder builder;
|
||||
#if USE_SNAPPY
|
||||
builder.compression(parquet::Compression::SNAPPY);
|
||||
#endif
|
||||
auto props = builder.build();
|
||||
auto status = parquet::arrow::FileWriter::Open(
|
||||
*arrow_table->schema(),
|
||||
arrow::default_memory_pool(),
|
||||
sink,
|
||||
props, /*parquet::default_writer_properties(),*/
|
||||
parquet::arrow::default_arrow_writer_properties(),
|
||||
&file_writer);
|
||||
if (!status.ok())
|
||||
throw Exception{"Error while opening a table: " + status.ToString(), ErrorCodes::UNKNOWN_EXCEPTION};
|
||||
}
|
||||
|
||||
// TODO: calculate row_group_size depending on a number of rows and table size
|
||||
auto status = file_writer->WriteTable(*arrow_table, format_settings.parquet.row_group_size);
|
||||
|
||||
if (!status.ok())
|
||||
throw Exception{"Error while writing a table: " + status.ToString(), ErrorCodes::UNKNOWN_EXCEPTION};
|
||||
}
|
||||
|
||||
void ParquetBlockOutputStream::writeSuffix()
|
||||
{
|
||||
if (file_writer)
|
||||
{
|
||||
auto status = file_writer->Close();
|
||||
if (!status.ok())
|
||||
throw Exception{"Error while closing a table: " + status.ToString(), ErrorCodes::UNKNOWN_EXCEPTION};
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
void registerOutputFormatParquet(FormatFactory & factory)
|
||||
{
|
||||
factory.registerOutputFormat(
|
||||
"Parquet", [](WriteBuffer & buf, const Block & sample, const Context & /*context*/, const FormatSettings & format_settings)
|
||||
{
|
||||
BlockOutputStreamPtr impl = std::make_shared<ParquetBlockOutputStream>(buf, sample, format_settings);
|
||||
auto res = std::make_shared<SquashingBlockOutputStream>(impl, impl->getHeader(), format_settings.parquet.row_group_size, 0);
|
||||
res->disableFlush();
|
||||
return res;
|
||||
});
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
|
||||
#else
|
||||
|
||||
namespace DB
|
||||
{
|
||||
class FormatFactory;
|
||||
void registerOutputFormatParquet(FormatFactory &)
|
||||
{
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
#endif
|
46
dbms/src/Formats/ParquetBlockOutputStream.h
Normal file
46
dbms/src/Formats/ParquetBlockOutputStream.h
Normal file
@ -0,0 +1,46 @@
|
||||
#pragma once
|
||||
|
||||
#include <Common/config.h>
|
||||
#if USE_PARQUET
|
||||
# include <DataStreams/IBlockOutputStream.h>
|
||||
# include <Formats/FormatSettings.h>
|
||||
|
||||
namespace arrow
|
||||
{
|
||||
class Array;
|
||||
class DataType;
|
||||
}
|
||||
|
||||
namespace parquet
|
||||
{
|
||||
namespace arrow
|
||||
{
|
||||
class FileWriter;
|
||||
}
|
||||
}
|
||||
|
||||
namespace DB
|
||||
{
|
||||
class ParquetBlockOutputStream : public IBlockOutputStream
|
||||
{
|
||||
public:
|
||||
ParquetBlockOutputStream(WriteBuffer & ostr_, const Block & header_, const FormatSettings & format_settings);
|
||||
|
||||
Block getHeader() const override { return header; }
|
||||
void write(const Block & block) override;
|
||||
void writeSuffix() override;
|
||||
void flush() override;
|
||||
|
||||
String getContentType() const override { return "application/octet-stream"; }
|
||||
|
||||
private:
|
||||
WriteBuffer & ostr;
|
||||
Block header;
|
||||
const FormatSettings format_settings;
|
||||
|
||||
std::unique_ptr<parquet::arrow::FileWriter> file_writer;
|
||||
};
|
||||
|
||||
}
|
||||
|
||||
#endif
|
@ -1,11 +1,10 @@
|
||||
#pragma once
|
||||
|
||||
#include <Formats/FormatSettings.h>
|
||||
#include <Formats/TabSeparatedRowOutputStream.h>
|
||||
|
||||
|
||||
namespace DB
|
||||
{
|
||||
struct FormatSettings;
|
||||
|
||||
/** A stream for outputting data in tsv format, but without escaping individual values.
|
||||
* (That is, the output is irreversible.)
|
||||
|
@ -618,7 +618,7 @@ inline void readDigits(ReadBuffer & buf, T & x, unsigned int & digits, int & exp
|
||||
|
||||
++places; // num zeroes before + current digit
|
||||
if (digits + places > max_digits)
|
||||
throw Exception("Too many digits in decimal value", ErrorCodes::ARGUMENT_OUT_OF_BOUND);
|
||||
throw Exception("Too many digits (" + std::to_string(digits + places) + " > " + std::to_string(max_digits) + ") in decimal value", ErrorCodes::ARGUMENT_OUT_OF_BOUND);
|
||||
|
||||
digits += places;
|
||||
if (after_point)
|
||||
|
@ -165,6 +165,7 @@ struct Settings
|
||||
M(SettingUInt64, output_format_pretty_max_rows, 10000, "Rows limit for Pretty formats.") \
|
||||
M(SettingUInt64, output_format_pretty_max_column_pad_width, 250, "Maximum width to pad all values in a column in Pretty formats.") \
|
||||
M(SettingBool, output_format_pretty_color, true, "Use ANSI escape sequences to paint colors in Pretty formats") \
|
||||
M(SettingUInt64, output_format_parquet_row_group_size, 1000000, "Row group size in rows.") \
|
||||
\
|
||||
M(SettingBool, use_client_time_zone, false, "Use client timezone for interpreting DateTime string values, instead of adopting server timezone.") \
|
||||
\
|
||||
|
@ -31,6 +31,7 @@ const char * auto_config_build[]
|
||||
"BUILD_COMPILE_DEFINITIONS", "@BUILD_COMPILE_DEFINITIONS@",
|
||||
"BUILD_INCLUDE_DIRECTORIES", "@BUILD_INCLUDE_DIRECTORIES@",
|
||||
"STATIC", "@USE_STATIC_LIBRARIES@",
|
||||
"SPLIT_BINARY", "@CLICKHOUSE_SPLIT_BINARY@",
|
||||
"USE_EMBEDDED_COMPILER", "@USE_EMBEDDED_COMPILER@",
|
||||
"USE_INTERNAL_MEMCPY", "@USE_INTERNAL_MEMCPY@",
|
||||
"USE_GLIBC_COMPATIBILITY", "@GLIBC_COMPATIBILITY@",
|
||||
@ -48,6 +49,10 @@ const char * auto_config_build[]
|
||||
"USE_POCO_MONGODB", "@USE_POCO_MONGODB@",
|
||||
"USE_POCO_NETSSL", "@USE_POCO_NETSSL@",
|
||||
"USE_BASE64", "@USE_BASE64@",
|
||||
"USE_XXHASH", "@USE_XXHASH@",
|
||||
"USE_HDFS", "@USE_HDFS@",
|
||||
"USE_SNAPPY", "@USE_SNAPPY@",
|
||||
"USE_PARQUET", "@USE_PARQUET@",
|
||||
"USE_PROTOBUF", "@USE_PROTOBUF@",
|
||||
"USE_BROTLI", "@USE_BROTLI@",
|
||||
|
||||
|
@ -240,5 +240,3 @@ SELECT toUInt64('9223372036854775809') AS x, toDecimal64(x, 0); -- { serverError
|
||||
SELECT toDecimal32(0, rowNumberInBlock()); -- { serverError 44 }
|
||||
SELECT toDecimal64(0, rowNumberInBlock()); -- { serverError 44 }
|
||||
SELECT toDecimal128(0, rowNumberInBlock()); -- { serverError 44 }
|
||||
|
||||
DROP TABLE IF EXISTS test.decimal;
|
||||
|
62
dbms/tests/queries/0_stateless/00900_parquet.reference
Normal file
62
dbms/tests/queries/0_stateless/00900_parquet.reference
Normal file
@ -0,0 +1,62 @@
|
||||
9999
|
||||
9998
|
||||
9997
|
||||
9996
|
||||
9995
|
||||
9994
|
||||
9993
|
||||
9992
|
||||
9991
|
||||
9990
|
||||
99999
|
||||
99998
|
||||
99997
|
||||
99996
|
||||
99995
|
||||
99994
|
||||
99993
|
||||
99992
|
||||
99991
|
||||
99990
|
||||
2
|
||||
1
|
||||
0
|
||||
999
|
||||
998
|
||||
997
|
||||
996
|
||||
995
|
||||
994
|
||||
993
|
||||
992
|
||||
991
|
||||
990
|
||||
ContextLock Number of times the lock of Context was acquired or tried to acquire. This is global lock.
|
||||
Query Number of queries started to be interpreted and maybe executed. Does not include queries that are failed to parse, that are rejected due to AST size limits; rejected due to quota limits or limits on number of simultaneously running queries. May include internal queries initiated by ClickHouse itself. Does not count subqueries.
|
||||
original:
|
||||
-128 0 -32768 0 -2147483648 0 -9223372036854775808 0 -1.032 -1.064 string-1 fixedstring-1\0\0 2003-04-05 2003-02-03 04:05:06
|
||||
-108 108 -1016 1116 -1032 1132 -1064 1164 -1.032 -1.064 string-0 fixedstring\0\0\0\0 2001-02-03 2002-02-03 04:05:06
|
||||
127 255 32767 65535 2147483647 4294967295 9223372036854775807 9223372036854775807 -1.032 -1.064 string-2 fixedstring-2\0\0 2004-06-07 2004-02-03 04:05:06
|
||||
converted:
|
||||
-128 0 -32768 0 -2147483648 0 -9223372036854775808 0 -1.032 -1.064 string-1 fixedstring-1\0\0 2003-04-05 2003-02-03 04:05:06
|
||||
-108 108 -1016 1116 -1032 1132 -1064 1164 -1.032 -1.064 string-0 fixedstring\0\0\0\0 2001-02-03 2002-02-03 04:05:06
|
||||
127 255 32767 65535 2147483647 4294967295 9223372036854775807 9223372036854775807 -1.032 -1.064 string-2 fixedstring-2\0\0 2004-06-07 2004-02-03 04:05:06
|
||||
diff:
|
||||
dest:
|
||||
79 81 82 83 84 85 86 87 88 89 str01\0\0\0\0\0\0\0\0\0\0 fstr1\0\0\0\0\0\0\0\0\0\0 2003-03-04 1970-01-01 06:29:04
|
||||
80 81 82 83 84 85 86 87 88 89 str02 fstr2\0\0\0\0\0\0\0\0\0\0 2005-03-04 2006-08-09 10:11:12
|
||||
min:
|
||||
-128 0 0 0 0 0 0 0 -1 -1 string-1\0\0\0\0\0\0\0 fixedstring-1\0\0 2003-04-05 2003-02-03
|
||||
-108 108 8 92 -8 108 -40 -116 -1 -1 string-0\0\0\0\0\0\0\0 fixedstring\0\0\0\0 2001-02-03 2002-02-03
|
||||
79 81 82 83 84 85 86 87 88 89 str01\0\0\0\0\0\0\0\0\0\0 fstr1\0\0\0\0\0\0\0\0\0\0 2003-03-04 2004-05-06
|
||||
127 -1 -1 -1 -1 -1 -1 -1 -1 -1 string-2\0\0\0\0\0\0\0 fixedstring-2\0\0 2004-06-07 2004-02-03
|
||||
max:
|
||||
-128 0 -32768 0 -2147483648 0 -9223372036854775808 0 -1 -1 string-1 fixedstring-1\0\0 1970-01-01 06:22:27 2003-02-03 04:05:06
|
||||
-108 108 -1016 1116 -1032 1132 -1064 1164 -1 -1 string-0 fixedstring\0\0\0\0 1970-01-01 06:09:16 2002-02-03 04:05:06
|
||||
80 81 82 83 84 85 86 87 88 89 str02 fstr2 2005-03-04 05:06:07 2006-08-09 10:11:12
|
||||
127 255 32767 65535 2147483647 4294967295 9223372036854775807 9223372036854775807 -1 -1 string-2 fixedstring-2\0\0 1970-01-01 06:29:36 2004-02-03 04:05:06
|
||||
dest from null:
|
||||
-128 0 -32768 0 -2147483648 0 -9223372036854775808 0 -1.032 -1.064 string-1 fixedstring-1\0\0 2003-04-05 2003-02-03 04:05:06
|
||||
-108 108 -1016 1116 -1032 1132 -1064 1164 -1.032 -1.064 string-0 fixedstring\0\0\0\0 2001-02-03 2002-02-03 04:05:06
|
||||
127 255 32767 65535 2147483647 4294967295 9223372036854775807 9223372036854775807 -1.032 -1.064 string-2 fixedstring-2\0\0 2004-06-07 2004-02-03 04:05:06
|
||||
\N \N \N \N \N \N \N \N \N \N \N \N \N \N
|
136
dbms/tests/queries/0_stateless/00900_parquet.sh
Executable file
136
dbms/tests/queries/0_stateless/00900_parquet.sh
Executable file
@ -0,0 +1,136 @@
|
||||
#!/usr/bin/env bash
|
||||
|
||||
set -e
|
||||
|
||||
CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
|
||||
. $CUR_DIR/../shell_config.sh
|
||||
|
||||
#${CLICKHOUSE_CLIENT} --max_block_size=1 --query="SELECT * FROM system.numbers LIMIT 10 FORMAT Parquet" > ${CLICKHOUSE_TMP}/t1.pq
|
||||
#${CLICKHOUSE_CLIENT} --max_block_size=5 --query="SELECT * FROM system.numbers LIMIT 10 FORMAT Parquet" > ${CLICKHOUSE_TMP}/t5.pq
|
||||
#${CLICKHOUSE_CLIENT} --max_block_size=15 --query="SELECT * FROM system.numbers LIMIT 10 FORMAT Parquet" > ${CLICKHOUSE_TMP}/t15.pq
|
||||
#${CLICKHOUSE_CLIENT} --query="SELECT * FROM system.numbers LIMIT 100000 FORMAT Parquet" > ${CLICKHOUSE_TMP}/t100000.pq
|
||||
#${CLICKHOUSE_CLIENT} --query="SELECT * FROM system.numbers LIMIT 1000000000 FORMAT Parquet" > ${CLICKHOUSE_TMP}/t1g.pq
|
||||
#${CLICKHOUSE_CLIENT} --query="SELECT * FROM system.numbers LIMIT 100000000 FORMAT Parquet" > ${CLICKHOUSE_TMP}/t100m.pq
|
||||
#${CLICKHOUSE_CLIENT} --max_block_size=100000000 --query="SELECT * FROM system.numbers LIMIT 100000000 FORMAT Parquet" > ${CLICKHOUSE_TMP}/t100m-100mbs.pq
|
||||
#valgrind --tool=massif ${CLICKHOUSE_CLIENT} --query="SELECT * FROM system.numbers LIMIT 1000000 FORMAT Parquet" > ${CLICKHOUSE_TMP}/t1g.pq
|
||||
|
||||
|
||||
${CLICKHOUSE_CLIENT} --query="DROP TABLE IF EXISTS test.contributors"
|
||||
${CLICKHOUSE_CLIENT} --query="CREATE TABLE test.contributors (name String) ENGINE = Memory"
|
||||
${CLICKHOUSE_CLIENT} --query="SELECT * FROM system.contributors ORDER BY name DESC FORMAT Parquet" | ${CLICKHOUSE_CLIENT} --query="INSERT INTO test.contributors FORMAT Parquet"
|
||||
# random results
|
||||
${CLICKHOUSE_CLIENT} --query="SELECT * FROM test.contributors LIMIT 10" > /dev/null
|
||||
${CLICKHOUSE_CLIENT} --query="DROP TABLE test.contributors"
|
||||
|
||||
${CLICKHOUSE_CLIENT} --query="DROP TABLE IF EXISTS test.parquet_numbers"
|
||||
${CLICKHOUSE_CLIENT} --query="CREATE TABLE test.parquet_numbers (number UInt64) ENGINE = Memory"
|
||||
# less than default block size (65k)
|
||||
${CLICKHOUSE_CLIENT} --query="SELECT * FROM system.numbers LIMIT 10000 FORMAT Parquet" | ${CLICKHOUSE_CLIENT} --query="INSERT INTO test.parquet_numbers FORMAT Parquet"
|
||||
${CLICKHOUSE_CLIENT} --query="SELECT * FROM test.parquet_numbers ORDER BY number DESC LIMIT 10"
|
||||
${CLICKHOUSE_CLIENT} --query="TRUNCATE TABLE test.parquet_numbers"
|
||||
|
||||
# More than default block size
|
||||
${CLICKHOUSE_CLIENT} --query="SELECT * FROM system.numbers LIMIT 100000 FORMAT Parquet" | ${CLICKHOUSE_CLIENT} --query="INSERT INTO test.parquet_numbers FORMAT Parquet"
|
||||
${CLICKHOUSE_CLIENT} --query="SELECT * FROM test.parquet_numbers ORDER BY number DESC LIMIT 10"
|
||||
${CLICKHOUSE_CLIENT} --query="TRUNCATE TABLE test.parquet_numbers"
|
||||
|
||||
#${CLICKHOUSE_CLIENT} --query="SELECT * FROM system.numbers LIMIT 10000000 FORMAT Parquet" | ${CLICKHOUSE_CLIENT} --query="INSERT INTO test.parquet_numbers FORMAT Parquet"
|
||||
#${CLICKHOUSE_CLIENT} --query="SELECT * FROM test.parquet_numbers ORDER BY number DESC LIMIT 10"
|
||||
#${CLICKHOUSE_CLIENT} --query="TRUNCATE TABLE test.parquet_numbers"
|
||||
|
||||
#${CLICKHOUSE_CLIENT} --max_block_size=2 --query="SELECT * FROM system.numbers LIMIT 3 FORMAT Parquet" > ${CLICKHOUSE_TMP}/bs2.pq
|
||||
${CLICKHOUSE_CLIENT} --max_block_size=2 --query="SELECT * FROM system.numbers LIMIT 3 FORMAT Parquet" | ${CLICKHOUSE_CLIENT} --query="INSERT INTO test.parquet_numbers FORMAT Parquet"
|
||||
${CLICKHOUSE_CLIENT} --query="SELECT * FROM test.parquet_numbers ORDER BY number DESC LIMIT 10"
|
||||
|
||||
${CLICKHOUSE_CLIENT} --query="TRUNCATE TABLE test.parquet_numbers"
|
||||
${CLICKHOUSE_CLIENT} --max_block_size=1 --query="SELECT * FROM system.numbers LIMIT 1000 FORMAT Parquet" | ${CLICKHOUSE_CLIENT} --query="INSERT INTO test.parquet_numbers FORMAT Parquet"
|
||||
${CLICKHOUSE_CLIENT} --query="SELECT * FROM test.parquet_numbers ORDER BY number DESC LIMIT 10"
|
||||
|
||||
${CLICKHOUSE_CLIENT} --query="DROP TABLE test.parquet_numbers"
|
||||
|
||||
${CLICKHOUSE_CLIENT} --query="DROP TABLE IF EXISTS test.parquet_events"
|
||||
${CLICKHOUSE_CLIENT} --query="CREATE TABLE test.parquet_events (event String, value UInt64, description String) ENGINE = Memory"
|
||||
${CLICKHOUSE_CLIENT} --query="SELECT * FROM system.events FORMAT Parquet" | ${CLICKHOUSE_CLIENT} --query="INSERT INTO test.parquet_events FORMAT Parquet"
|
||||
${CLICKHOUSE_CLIENT} --query="SELECT event, description FROM test.parquet_events WHERE event IN ('ContextLock', 'Query') ORDER BY event"
|
||||
${CLICKHOUSE_CLIENT} --query="DROP TABLE test.parquet_events"
|
||||
|
||||
|
||||
#${CLICKHOUSE_CLIENT} --query="DROP TABLE IF EXISTS test.parquet_types1"
|
||||
#${CLICKHOUSE_CLIENT} --query="DROP TABLE IF EXISTS test.parquet_types2"
|
||||
#${CLICKHOUSE_CLIENT} --query="CREATE TABLE test.parquet_types1 (int8 Int8, uint8 UInt8, int16 Int16, uint16 UInt16, int32 Int32, int64 Int64, uint64 UInt64, float32 Float32, float64 Float64, string String ) ENGINE = Memory"
|
||||
#${CLICKHOUSE_CLIENT} --query="CREATE TABLE test.parquet_types2 (int8 Int8, uint8 UInt8, int16 Int16, uint16 UInt16, int32 Int32, int64 Int64, uint64 UInt64, float32 Float32, float64 Float64, string String ) ENGINE = Memory"
|
||||
#${CLICKHOUSE_CLIENT} --query="INSERT INTO test.parquet_types1 values ( -108, 108, -1016, 1116, -1032, -1064, 1164, -1.032, -1.064, 'string' )"
|
||||
#${CLICKHOUSE_CLIENT} --query="SELECT * FROM test.parquet_types1 FORMAT Parquet" | ${CLICKHOUSE_CLIENT} --query="INSERT INTO test.parquet_types2 FORMAT Parquet"
|
||||
|
||||
|
||||
|
||||
${CLICKHOUSE_CLIENT} --query="DROP TABLE IF EXISTS test.parquet_types1"
|
||||
${CLICKHOUSE_CLIENT} --query="DROP TABLE IF EXISTS test.parquet_types2"
|
||||
${CLICKHOUSE_CLIENT} --query="DROP TABLE IF EXISTS test.parquet_types3"
|
||||
${CLICKHOUSE_CLIENT} --query="DROP TABLE IF EXISTS test.parquet_types4"
|
||||
${CLICKHOUSE_CLIENT} --query="CREATE TABLE test.parquet_types1 (int8 Int8, uint8 UInt8, int16 Int16, uint16 UInt16, int32 Int32, uint32 UInt32, int64 Int64, uint64 UInt64, float32 Float32, float64 Float64, string String, fixedstring FixedString(15), date Date, datetime DateTime) ENGINE = Memory"
|
||||
${CLICKHOUSE_CLIENT} --query="CREATE TABLE test.parquet_types2 (int8 Int8, uint8 UInt8, int16 Int16, uint16 UInt16, int32 Int32, uint32 UInt32, int64 Int64, uint64 UInt64, float32 Float32, float64 Float64, string String, fixedstring FixedString(15), date Date, datetime DateTime) ENGINE = Memory"
|
||||
# convert min type
|
||||
${CLICKHOUSE_CLIENT} --query="CREATE TABLE test.parquet_types3 (int8 Int8, uint8 Int8, int16 Int8, uint16 Int8, int32 Int8, uint32 Int8, int64 Int8, uint64 Int8, float32 Int8, float64 Int8, string FixedString(15), fixedstring FixedString(15), date Date, datetime Date) ENGINE = Memory"
|
||||
# convert max type
|
||||
${CLICKHOUSE_CLIENT} --query="CREATE TABLE test.parquet_types4 (int8 Int64, uint8 Int64, int16 Int64, uint16 Int64, int32 Int64, uint32 Int64, int64 Int64, uint64 Int64, float32 Int64, float64 Int64, string String, fixedstring String, date DateTime, datetime DateTime) ENGINE = Memory"
|
||||
|
||||
${CLICKHOUSE_CLIENT} --query="INSERT INTO test.parquet_types1 values ( -108, 108, -1016, 1116, -1032, 1132, -1064, 1164, -1.032, -1.064, 'string-0', 'fixedstring', '2001-02-03', '2002-02-03 04:05:06')"
|
||||
|
||||
# min
|
||||
${CLICKHOUSE_CLIENT} --query="INSERT INTO test.parquet_types1 values ( -128, 0, -32768, 0, -2147483648, 0, -9223372036854775808, 0, -1.032, -1.064, 'string-1', 'fixedstring-1', '2003-04-05', '2003-02-03 04:05:06')"
|
||||
|
||||
# max
|
||||
${CLICKHOUSE_CLIENT} --query="INSERT INTO test.parquet_types1 values ( 127, 255, 32767, 65535, 2147483647, 4294967295, 9223372036854775807, 9223372036854775807, -1.032, -1.064, 'string-2', 'fixedstring-2', '2004-06-07', '2004-02-03 04:05:06')"
|
||||
|
||||
# 'SELECT -127,-128,-129,126,127,128,255,256,257,-32767,-32768,-32769,32766,32767,32768,65535,65536,65537, -2147483647,-2147483648,-2147483649,2147483646,2147483647,2147483648,4294967295,4294967296,4294967297, -9223372036854775807,-9223372036854775808,9223372036854775806,9223372036854775807,9223372036854775808,18446744073709551615';
|
||||
|
||||
${CLICKHOUSE_CLIENT} --query="SELECT * FROM test.parquet_types1 FORMAT Parquet" | ${CLICKHOUSE_CLIENT} --query="INSERT INTO test.parquet_types2 FORMAT Parquet"
|
||||
|
||||
echo original:
|
||||
${CLICKHOUSE_CLIENT} --query="SELECT * FROM test.parquet_types1 ORDER BY int8" | tee ${CLICKHOUSE_TMP}/parquet_all_types_1.dump
|
||||
echo converted:
|
||||
${CLICKHOUSE_CLIENT} --query="SELECT * FROM test.parquet_types2 ORDER BY int8" | tee ${CLICKHOUSE_TMP}/parquet_all_types_2.dump
|
||||
${CLICKHOUSE_CLIENT} --query="SELECT * FROM test.parquet_types1 ORDER BY int8 FORMAT Parquet" > ${CLICKHOUSE_TMP}/parquet_all_types_1.parquet
|
||||
${CLICKHOUSE_CLIENT} --query="SELECT * FROM test.parquet_types2 ORDER BY int8 FORMAT Parquet" > ${CLICKHOUSE_TMP}/parquet_all_types_2.parquet
|
||||
echo diff:
|
||||
diff ${CLICKHOUSE_TMP}/parquet_all_types_1.dump ${CLICKHOUSE_TMP}/parquet_all_types_2.dump
|
||||
|
||||
${CLICKHOUSE_CLIENT} --query="TRUNCATE TABLE test.parquet_types2"
|
||||
${CLICKHOUSE_CLIENT} --query="INSERT INTO test.parquet_types3 values ( 79, 81, 82, 83, 84, 85, 86, 87, 88, 89, 'str01', 'fstr1', '2003-03-04', '2004-05-06')"
|
||||
${CLICKHOUSE_CLIENT} --query="SELECT * FROM test.parquet_types3 ORDER BY int8 FORMAT Parquet" | ${CLICKHOUSE_CLIENT} --query="INSERT INTO test.parquet_types2 FORMAT Parquet"
|
||||
${CLICKHOUSE_CLIENT} --query="SELECT * FROM test.parquet_types1 ORDER BY int8 FORMAT Parquet" | ${CLICKHOUSE_CLIENT} --query="INSERT INTO test.parquet_types3 FORMAT Parquet"
|
||||
|
||||
${CLICKHOUSE_CLIENT} --query="INSERT INTO test.parquet_types4 values ( 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 'str02', 'fstr2', '2005-03-04 05:06:07', '2006-08-09 10:11:12')"
|
||||
${CLICKHOUSE_CLIENT} --query="SELECT * FROM test.parquet_types4 ORDER BY int8 FORMAT Parquet" | ${CLICKHOUSE_CLIENT} --query="INSERT INTO test.parquet_types2 FORMAT Parquet"
|
||||
${CLICKHOUSE_CLIENT} --query="SELECT * FROM test.parquet_types1 ORDER BY int8 FORMAT Parquet" | ${CLICKHOUSE_CLIENT} --query="INSERT INTO test.parquet_types4 FORMAT Parquet"
|
||||
|
||||
echo dest:
|
||||
${CLICKHOUSE_CLIENT} --query="SELECT * FROM test.parquet_types2 ORDER BY int8"
|
||||
echo min:
|
||||
${CLICKHOUSE_CLIENT} --query="SELECT * FROM test.parquet_types3 ORDER BY int8"
|
||||
echo max:
|
||||
${CLICKHOUSE_CLIENT} --query="SELECT * FROM test.parquet_types4 ORDER BY int8"
|
||||
|
||||
|
||||
${CLICKHOUSE_CLIENT} --query="DROP TABLE IF EXISTS test.parquet_types5"
|
||||
${CLICKHOUSE_CLIENT} --query="DROP TABLE IF EXISTS test.parquet_types6"
|
||||
${CLICKHOUSE_CLIENT} --query="TRUNCATE TABLE test.parquet_types2"
|
||||
${CLICKHOUSE_CLIENT} --query="CREATE TABLE test.parquet_types5 (int8 Nullable(Int8), uint8 Nullable(UInt8), int16 Nullable(Int16), uint16 Nullable(UInt16), int32 Nullable(Int32), uint32 Nullable(UInt32), int64 Nullable(Int64), uint64 Nullable(UInt64), float32 Nullable(Float32), float64 Nullable(Float64), string Nullable(String), fixedstring Nullable(FixedString(15)), date Nullable(Date), datetime Nullable(DateTime)) ENGINE = Memory"
|
||||
${CLICKHOUSE_CLIENT} --query="CREATE TABLE test.parquet_types6 (int8 Nullable(Int8), uint8 Nullable(UInt8), int16 Nullable(Int16), uint16 Nullable(UInt16), int32 Nullable(Int32), uint32 Nullable(UInt32), int64 Nullable(Int64), uint64 Nullable(UInt64), float32 Nullable(Float32), float64 Nullable(Float64), string Nullable(String), fixedstring Nullable(FixedString(15)), date Nullable(Date), datetime Nullable(DateTime)) ENGINE = Memory"
|
||||
${CLICKHOUSE_CLIENT} --query="INSERT INTO test.parquet_types5 values ( NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL)"
|
||||
${CLICKHOUSE_CLIENT} --query="SELECT * FROM test.parquet_types5 ORDER BY int8 FORMAT Parquet" > ${CLICKHOUSE_TMP}/parquet_all_types_5.parquet
|
||||
#${CLICKHOUSE_CLIENT} --query="SELECT * FROM test.parquet_types5 FORMAT Parquet" | ${CLICKHOUSE_CLIENT} --query="INSERT INTO test.parquet_types6 FORMAT Parquet"
|
||||
${CLICKHOUSE_CLIENT} --query="SELECT * FROM test.parquet_types5 ORDER BY int8 FORMAT Parquet" | ${CLICKHOUSE_CLIENT} --query="INSERT INTO test.parquet_types6 FORMAT Parquet"
|
||||
${CLICKHOUSE_CLIENT} --query="SELECT * FROM test.parquet_types1 ORDER BY int8 FORMAT Parquet" | ${CLICKHOUSE_CLIENT} --query="INSERT INTO test.parquet_types6 FORMAT Parquet"
|
||||
echo dest from null:
|
||||
${CLICKHOUSE_CLIENT} --query="SELECT * FROM test.parquet_types6 ORDER BY int8"
|
||||
|
||||
${CLICKHOUSE_CLIENT} --query="DROP TABLE test.parquet_types5"
|
||||
${CLICKHOUSE_CLIENT} --query="DROP TABLE test.parquet_types6"
|
||||
|
||||
|
||||
${CLICKHOUSE_CLIENT} --query="DROP TABLE test.parquet_types1"
|
||||
${CLICKHOUSE_CLIENT} --query="DROP TABLE test.parquet_types2"
|
||||
${CLICKHOUSE_CLIENT} --query="DROP TABLE test.parquet_types3"
|
||||
${CLICKHOUSE_CLIENT} --query="DROP TABLE test.parquet_types4"
|
54
dbms/tests/queries/0_stateless/00900_parquet_create_table_columns.pl
Executable file
54
dbms/tests/queries/0_stateless/00900_parquet_create_table_columns.pl
Executable file
@ -0,0 +1,54 @@
|
||||
#!/usr/bin/env perl
|
||||
package parquet_create_table_columns;
|
||||
use strict;
|
||||
no warnings 'experimental';
|
||||
use feature 'signatures';
|
||||
use JSON::XS;
|
||||
#use Data::Dumper;
|
||||
|
||||
sub file_read($file) {
|
||||
open my $f, '<', $file or return;
|
||||
local $/ = undef;
|
||||
my $ret = <$f>;
|
||||
close $f;
|
||||
return $ret;
|
||||
}
|
||||
|
||||
our $type_parquet_logical_to_clickhouse = {
|
||||
DECIMAL => 'Decimal128(1)',
|
||||
TIMESTAMP_MICROS => 'DateTime',
|
||||
TIMESTAMP_MILLIS => 'DateTime',
|
||||
};
|
||||
our $type_parquet_physical_to_clickhouse = {
|
||||
BOOLEAN => 'UInt8',
|
||||
INT32 => 'Int32',
|
||||
INT64 => 'Int64',
|
||||
FLOAT => 'Float32',
|
||||
DOUBLE => 'Float64',
|
||||
BYTE_ARRAY => 'String',
|
||||
FIXED_LEN_BYTE_ARRAY => 'String', # Maybe FixedString?
|
||||
INT96 => 'Int64', # TODO!
|
||||
};
|
||||
|
||||
sub columns ($json) {
|
||||
my @list;
|
||||
my %uniq;
|
||||
for my $column (@{$json->{Columns}}) {
|
||||
#warn Data::Dumper::Dumper $column;
|
||||
my $name = $column->{'Name'};
|
||||
my $type = $type_parquet_logical_to_clickhouse->{$column->{'LogicalType'}} || $type_parquet_physical_to_clickhouse->{$column->{'PhysicalType'}};
|
||||
unless ($type) {
|
||||
warn "Unknown type [$column->{'PhysicalType'}:$column->{'LogicalType'}] of column [$name]";
|
||||
}
|
||||
$type = "Nullable($type)";
|
||||
$name .= $column->{'Id'} if $uniq{$name}++; # Names can be non-unique
|
||||
push @list, {name => $name, type => $type};
|
||||
}
|
||||
print join ', ', map {"$_->{name} $_->{type}"} @list;
|
||||
}
|
||||
|
||||
sub columns_file ($file) {
|
||||
return columns(JSON::XS::decode_json(file_read($file)));
|
||||
}
|
||||
|
||||
columns_file(shift) unless caller;
|
@ -0,0 +1,17 @@
|
||||
diff0:
|
||||
diff1:
|
||||
diff2:
|
||||
nothing:
|
||||
nulls:
|
||||
\N \N \N \N
|
||||
full orig:
|
||||
1 \N \N \N
|
||||
\N 1 \N \N
|
||||
\N \N 1 \N
|
||||
\N \N \N \N
|
||||
full inserted:
|
||||
1 \N \N \N
|
||||
\N 1 \N \N
|
||||
\N \N 1 \N
|
||||
\N \N \N \N
|
||||
diff3:
|
111
dbms/tests/queries/0_stateless/00900_parquet_decimal.sh
Executable file
111
dbms/tests/queries/0_stateless/00900_parquet_decimal.sh
Executable file
@ -0,0 +1,111 @@
|
||||
#!/usr/bin/env bash
|
||||
|
||||
# set -x
|
||||
|
||||
CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
|
||||
. $CUR_DIR/../shell_config.sh
|
||||
${CLICKHOUSE_CLIENT} --query="DROP TABLE IF EXISTS test.decimal;"
|
||||
${CLICKHOUSE_CLIENT} --query="DROP TABLE IF EXISTS test.decimal2;"
|
||||
|
||||
# Simple small values
|
||||
${CLICKHOUSE_CLIENT} --query="CREATE TABLE IF NOT EXISTS test.decimal (a DECIMAL(9,0), b DECIMAL(18,0), c DECIMAL(38,0), d DECIMAL(9, 9), e DECIMAL(18, 18), f DECIMAL(38, 38), g Decimal(9, 5), h decimal(18, 9), i deciMAL(38, 18), j DECIMAL(1,0)) ENGINE = Memory;"
|
||||
${CLICKHOUSE_CLIENT} --query="CREATE TABLE IF NOT EXISTS test.decimal2 AS test.decimal ENGINE = Memory;"
|
||||
${CLICKHOUSE_CLIENT} --query="INSERT INTO test.decimal (a, b, c, d, e, f, g, h, i, j) VALUES (0, 0, 0, 0, 0, 0, 0, 0, 0, 0);"
|
||||
#${CLICKHOUSE_CLIENT} --query="INSERT INTO test.decimal (a, b, c, d, e, f, g, h, i, j) VALUES (1, 1, 1, 0.1, 0.1, 1, 1, 1, 1, 1);"
|
||||
#${CLICKHOUSE_CLIENT} --query="INSERT INTO test.decimal (a, b, c, d, e, f, g, h, i, j) VALUES (10, 10, 10, 0.1, 0.1, 0.1, 10, 10, 10, 10);"
|
||||
#${CLICKHOUSE_CLIENT} --query="INSERT INTO test.decimal (a, b, c, d, e, f, g, h, i, j) VALUES (-100, -100, -100, -0.1, -0.1, -0.1, -100, -100, -100, -100);"
|
||||
${CLICKHOUSE_CLIENT} --query="INSERT INTO test.decimal (a, b, c) VALUES (1, 1, 1);"
|
||||
${CLICKHOUSE_CLIENT} --query="INSERT INTO test.decimal (a, b, c) VALUES (10, 10, 10);"
|
||||
${CLICKHOUSE_CLIENT} --query="SELECT * FROM test.decimal ORDER BY a, b, c, d, e, f, g, h, i, j;" > ${CLICKHOUSE_TMP}/parquet_decimal0_1.dump
|
||||
${CLICKHOUSE_CLIENT} --query="SELECT * FROM test.decimal ORDER BY a, b, c, d, e, f, g, h, i, j FORMAT Parquet;" > ${CLICKHOUSE_TMP}/parquet_decimal0.parquet
|
||||
${CLICKHOUSE_CLIENT} --query="SELECT * FROM test.decimal ORDER BY a, b, c, d, e, f, g, h, i, j FORMAT Parquet;" | ${CLICKHOUSE_CLIENT} --query="INSERT INTO test.decimal2 FORMAT Parquet"
|
||||
${CLICKHOUSE_CLIENT} --query="SELECT * FROM test.decimal2 ORDER BY a, b, c, d, e, f, g, h, i, j;" > ${CLICKHOUSE_TMP}/parquet_decimal0_2.dump
|
||||
echo diff0:
|
||||
diff ${CLICKHOUSE_TMP}/parquet_decimal0_1.dump ${CLICKHOUSE_TMP}/parquet_decimal0_2.dump
|
||||
${CLICKHOUSE_CLIENT} --query="DROP TABLE IF EXISTS test.decimal;"
|
||||
${CLICKHOUSE_CLIENT} --query="DROP TABLE IF EXISTS test.decimal2;"
|
||||
|
||||
|
||||
${CLICKHOUSE_CLIENT} --query="CREATE TABLE IF NOT EXISTS test.decimal ( a DECIMAL(9,0), b DECIMAL(18,0), c DECIMAL(38,0), d DECIMAL(9, 9), e DECIMAL(18, 18), f DECIMAL(38, 38), g Decimal(9, 5), h decimal(18, 9), i deciMAL(38, 18), j DECIMAL(1,0)) ENGINE = Memory;"
|
||||
${CLICKHOUSE_CLIENT} --query="CREATE TABLE IF NOT EXISTS test.decimal2 AS test.decimal ENGINE = Memory;"
|
||||
${CLICKHOUSE_CLIENT} --query="INSERT INTO test.decimal (a, b, d, g) VALUES (999999999, 999999999999999999, 0.999999999, 9999.99999);"
|
||||
${CLICKHOUSE_CLIENT} --query="INSERT INTO test.decimal (a, b, d, g) VALUES (-999999999, -999999999999999999, -0.999999999, -9999.99999);"
|
||||
${CLICKHOUSE_CLIENT} --query="INSERT INTO test.decimal (c) VALUES (99999999999999999999999999999999999999);"
|
||||
${CLICKHOUSE_CLIENT} --query="INSERT INTO test.decimal (c) VALUES (-99999999999999999999999999999999999999);"
|
||||
${CLICKHOUSE_CLIENT} --query="INSERT INTO test.decimal (f) VALUES (0.99999999999999999999999999999999999999);"
|
||||
${CLICKHOUSE_CLIENT} --query="INSERT INTO test.decimal (f) VALUES (-0.99999999999999999999999999999999999999);"
|
||||
${CLICKHOUSE_CLIENT} --query="INSERT INTO test.decimal (e, h) VALUES (0.999999999999999999, 999999999.999999999);"
|
||||
${CLICKHOUSE_CLIENT} --query="INSERT INTO test.decimal (e, h) VALUES (-0.999999999999999999, -999999999.999999999);"
|
||||
${CLICKHOUSE_CLIENT} --query="INSERT INTO test.decimal (i) VALUES (99999999999999999999.999999999999999999);"
|
||||
${CLICKHOUSE_CLIENT} --query="INSERT INTO test.decimal (i) VALUES (-99999999999999999999.999999999999999999);"
|
||||
${CLICKHOUSE_CLIENT} --query="INSERT INTO test.decimal (a, b, c, d, g, j, h) VALUES (1, 1, 1, 0.000000001, 0.00001, 1, 0.000000001);"
|
||||
${CLICKHOUSE_CLIENT} --query="INSERT INTO test.decimal (a, b, c, d, g, j, h) VALUES (-1, -1, -1, -0.000000001, -0.00001, -1, -0.000000001);"
|
||||
${CLICKHOUSE_CLIENT} --query="INSERT INTO test.decimal (e, f) VALUES (0.000000000000000001, 0.00000000000000000000000000000000000001);"
|
||||
${CLICKHOUSE_CLIENT} --query="INSERT INTO test.decimal (e, f) VALUES (-0.000000000000000001, -0.00000000000000000000000000000000000001);"
|
||||
${CLICKHOUSE_CLIENT} --query="INSERT INTO test.decimal (i) VALUES (0.000000000000000001);"
|
||||
${CLICKHOUSE_CLIENT} --query="INSERT INTO test.decimal (i) VALUES (-0.000000000000000001);"
|
||||
${CLICKHOUSE_CLIENT} --query="INSERT INTO test.decimal (a, b, c, d, e, f, g, h, i, j) VALUES (0, 0, 0, 0, 0, 0, 0, 0, 0, 0);"
|
||||
${CLICKHOUSE_CLIENT} --query="INSERT INTO test.decimal (a, b, c, d, e, f, g, h, i, j) VALUES (-0, -0, -0, -0, -0, -0, -0, -0, -0, -0);"
|
||||
${CLICKHOUSE_CLIENT} --query="INSERT INTO test.decimal (a, b, c, d, e, f, g, h, i, j) VALUES (0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0);"
|
||||
${CLICKHOUSE_CLIENT} --query="INSERT INTO test.decimal (a, b, c, d, e, f, g, h, i, j) VALUES (-0.0, -0.0, -0.0, -0.0, -0.0, -0.0, -0.0, -0.0, -0.0, -0.0);"
|
||||
${CLICKHOUSE_CLIENT} --query="INSERT INTO test.decimal (a, b, g) VALUES ('42.00000', 42.0000000000000000000000000000000, '0.999990');"
|
||||
${CLICKHOUSE_CLIENT} --query="INSERT INTO test.decimal (a, b, c, d, e, f) VALUES ('0.9e9', '0.9e18', '0.9e38', '9e-9', '9e-18', '9e-38');"
|
||||
${CLICKHOUSE_CLIENT} --query="INSERT INTO test.decimal (a, b, c, d, e, f) VALUES ('-0.9e9', '-0.9e18', '-0.9e38', '-9e-9', '-9e-18', '-9e-38');"
|
||||
${CLICKHOUSE_CLIENT} --query="SELECT * FROM test.decimal ORDER BY a, b, c, d, e, f, g, h, i, j;" > ${CLICKHOUSE_TMP}/parquet_decimal1_1.dump
|
||||
${CLICKHOUSE_CLIENT} --query="SELECT * FROM test.decimal ORDER BY a, b, c, d, e, f, g, h, i, j FORMAT Parquet;" > ${CLICKHOUSE_TMP}/parquet_decimal1.parquet
|
||||
${CLICKHOUSE_CLIENT} --query="SELECT * FROM test.decimal ORDER BY a, b, c, d, e, f, g, h, i, j FORMAT Parquet;" | ${CLICKHOUSE_CLIENT} --query="INSERT INTO test.decimal2 FORMAT Parquet"
|
||||
${CLICKHOUSE_CLIENT} --query="SELECT * FROM test.decimal2 ORDER BY a, b, c, d, e, f, g, h, i, j;" > ${CLICKHOUSE_TMP}/parquet_decimal1_2.dump
|
||||
echo diff1:
|
||||
diff ${CLICKHOUSE_TMP}/parquet_decimal1_1.dump ${CLICKHOUSE_TMP}/parquet_decimal1_2.dump
|
||||
${CLICKHOUSE_CLIENT} --query="DROP TABLE IF EXISTS test.decimal;"
|
||||
${CLICKHOUSE_CLIENT} --query="DROP TABLE IF EXISTS test.decimal2;"
|
||||
|
||||
${CLICKHOUSE_CLIENT} --query="CREATE TABLE IF NOT EXISTS test.decimal (a DECIMAL(9,0), b DECIMAL(18,0), c DECIMAL(38,0), d DECIMAL(9, 9), e Decimal64(18), f Decimal128(38), g Decimal32(5), h Decimal64(9), i Decimal128(18), j dec(4,2)) ENGINE = Memory;"
|
||||
${CLICKHOUSE_CLIENT} --query="CREATE TABLE IF NOT EXISTS test.decimal2 AS test.decimal ENGINE = Memory;"
|
||||
${CLICKHOUSE_CLIENT} --query="INSERT INTO test.decimal (a, b, c, d, e, f, g, h, i, j) VALUES (42, 42, 42, 0.42, 0.42, 0.42, 42.42, 42.42, 42.42, 42.42);"
|
||||
${CLICKHOUSE_CLIENT} --query="INSERT INTO test.decimal (a, b, c, d, e, f, g, h, i, j) VALUES (-42, -42, -42, -0.42, -0.42, -0.42, -42.42, -42.42, -42.42, -42.42);"
|
||||
${CLICKHOUSE_CLIENT} --query="SELECT * FROM test.decimal ORDER BY a, b, c, d, e, f, g, h, i, j;" > ${CLICKHOUSE_TMP}/parquet_decimal2_1.dump
|
||||
${CLICKHOUSE_CLIENT} --query="SELECT * FROM test.decimal ORDER BY a, b, c, d, e, f, g, h, i, j FORMAT Parquet;" > ${CLICKHOUSE_TMP}/parquet_decimal2.parquet
|
||||
${CLICKHOUSE_CLIENT} --query="SELECT * FROM test.decimal ORDER BY a, b, c, d, e, f, g, h, i, j FORMAT Parquet;" | ${CLICKHOUSE_CLIENT} --query="INSERT INTO test.decimal2 FORMAT Parquet"
|
||||
${CLICKHOUSE_CLIENT} --query="SELECT * FROM test.decimal2 ORDER BY a, b, c, d, e, f, g, h, i, j;" > ${CLICKHOUSE_TMP}/parquet_decimal2_2.dump
|
||||
echo diff2:
|
||||
diff ${CLICKHOUSE_TMP}/parquet_decimal2_1.dump ${CLICKHOUSE_TMP}/parquet_decimal2_2.dump
|
||||
${CLICKHOUSE_CLIENT} --query="DROP TABLE IF EXISTS test.decimal;"
|
||||
${CLICKHOUSE_CLIENT} --query="DROP TABLE IF EXISTS test.decimal2;"
|
||||
|
||||
|
||||
${CLICKHOUSE_CLIENT} --query="CREATE TABLE IF NOT EXISTS test.decimal (a Nullable(DECIMAL(9,0)), b Nullable(DECIMAL(18,0)), c Nullable(DECIMAL(38,0)), d Nullable(DECIMAL(9,0))) ENGINE = Memory;"
|
||||
${CLICKHOUSE_CLIENT} --query="CREATE TABLE IF NOT EXISTS test.decimal2 AS test.decimal ENGINE = Memory;"
|
||||
# Empty table test
|
||||
# throws No data to insert
|
||||
${CLICKHOUSE_CLIENT} --query="SELECT * FROM test.decimal ORDER BY a, b, c, d FORMAT Parquet;" > ${CLICKHOUSE_TMP}/parquet_decimal3_1.parquet
|
||||
${CLICKHOUSE_CLIENT} --query="SELECT * FROM test.decimal ORDER BY a, b, c, d FORMAT Parquet;" | ${CLICKHOUSE_CLIENT} --query="INSERT INTO test.decimal2 FORMAT Parquet" 2> /dev/null
|
||||
echo nothing:
|
||||
${CLICKHOUSE_CLIENT} --query="SELECT * FROM test.decimal2 ORDER BY a, b, c, d;"
|
||||
${CLICKHOUSE_CLIENT} --query="TRUNCATE TABLE test.decimal2;"
|
||||
|
||||
${CLICKHOUSE_CLIENT} --query="INSERT INTO test.decimal VALUES (Null, Null, Null, Null)"
|
||||
${CLICKHOUSE_CLIENT} --query="SELECT * FROM test.decimal ORDER BY a, b, c, d FORMAT Parquet;" > ${CLICKHOUSE_TMP}/parquet_decimal3_2.parquet
|
||||
${CLICKHOUSE_CLIENT} --query="SELECT * FROM test.decimal ORDER BY a, b, c, d FORMAT Parquet;" | ${CLICKHOUSE_CLIENT} --query="INSERT INTO test.decimal2 FORMAT Parquet"
|
||||
echo nulls:
|
||||
${CLICKHOUSE_CLIENT} --query="SELECT * FROM test.decimal2 ORDER BY a, b, c, d;"
|
||||
${CLICKHOUSE_CLIENT} --query="TRUNCATE TABLE test.decimal2;"
|
||||
|
||||
${CLICKHOUSE_CLIENT} --query="INSERT INTO test.decimal VALUES (1, Null, Null, Null)"
|
||||
${CLICKHOUSE_CLIENT} --query="INSERT INTO test.decimal VALUES (Null, 1, Null, Null)"
|
||||
${CLICKHOUSE_CLIENT} --query="INSERT INTO test.decimal VALUES (Null, Null, 1, Null)"
|
||||
${CLICKHOUSE_CLIENT} --query="SELECT * FROM test.decimal ORDER BY a, b, c, d FORMAT Parquet;" > ${CLICKHOUSE_TMP}/parquet_decimal3_3.parquet
|
||||
${CLICKHOUSE_CLIENT} --query="SELECT * FROM test.decimal ORDER BY a, b, c, d FORMAT Parquet;" | ${CLICKHOUSE_CLIENT} --query="INSERT INTO test.decimal2 FORMAT Parquet"
|
||||
|
||||
echo full orig:
|
||||
${CLICKHOUSE_CLIENT} --query="SELECT * FROM test.decimal ORDER BY a, b, c, d;"
|
||||
echo full inserted:
|
||||
${CLICKHOUSE_CLIENT} --query="SELECT * FROM test.decimal2 ORDER BY a, b, c, d;"
|
||||
|
||||
${CLICKHOUSE_CLIENT} --query="SELECT * FROM test.decimal2 ORDER BY a, b, c, d;" > ${CLICKHOUSE_TMP}/parquet_decimal3_1.dump
|
||||
${CLICKHOUSE_CLIENT} --query="SELECT * FROM test.decimal2 ORDER BY a, b, c, d;" > ${CLICKHOUSE_TMP}/parquet_decimal3_2.dump
|
||||
|
||||
echo diff3:
|
||||
diff ${CLICKHOUSE_TMP}/parquet_decimal3_1.dump ${CLICKHOUSE_TMP}/parquet_decimal3_2.dump
|
||||
#${CLICKHOUSE_CLIENT} --query="DROP TABLE IF EXISTS test.decimal;"
|
||||
#${CLICKHOUSE_CLIENT} --query="DROP TABLE IF EXISTS test.decimal2;"
|
729
dbms/tests/queries/0_stateless/00900_parquet_load.reference
Normal file
729
dbms/tests/queries/0_stateless/00900_parquet_load.reference
Normal file
@ -0,0 +1,729 @@
|
||||
=== Try load data from alltypes_dictionary.parquet
|
||||
0 1 0 0 0 0 0 0 01/01/09 0 1230768000
|
||||
1 0 1 1 1 10 1.1 10.1 01/01/09 1 1230768060
|
||||
=== Try load data from alltypes_plain.parquet
|
||||
4 1 0 0 0 0 0 0 03/01/09 0 1235865600
|
||||
5 0 1 1 1 10 1.1 10.1 03/01/09 1 1235865660
|
||||
6 1 0 0 0 0 0 0 04/01/09 0 1238544000
|
||||
7 0 1 1 1 10 1.1 10.1 04/01/09 1 1238544060
|
||||
2 1 0 0 0 0 0 0 02/01/09 0 1233446400
|
||||
3 0 1 1 1 10 1.1 10.1 02/01/09 1 1233446460
|
||||
0 1 0 0 0 0 0 0 01/01/09 0 1230768000
|
||||
1 0 1 1 1 10 1.1 10.1 01/01/09 1 1230768060
|
||||
=== Try load data from alltypes_plain.snappy.parquet
|
||||
6 1 0 0 0 0 0 0 04/01/09 0 1238544000
|
||||
7 0 1 1 1 10 1.1 10.1 04/01/09 1 1238544060
|
||||
=== Try load data from byte_array_decimal.parquet
|
||||
1.0
|
||||
2.0
|
||||
3.0
|
||||
4.0
|
||||
5.0
|
||||
6.0
|
||||
7.0
|
||||
8.0
|
||||
9.0
|
||||
10.0
|
||||
11.0
|
||||
12.0
|
||||
13.0
|
||||
14.0
|
||||
15.0
|
||||
16.0
|
||||
17.0
|
||||
18.0
|
||||
19.0
|
||||
20.0
|
||||
21.0
|
||||
22.0
|
||||
23.0
|
||||
24.0
|
||||
=== Try load data from datapage_v2.snappy.parquet
|
||||
Code: 33. DB::Ex---tion: Error while reading parquet data: IOError: Arrow error: IOError: Corrupt snappy compressed data.
|
||||
|
||||
=== Try load data from fixed_length_decimal_1.parquet
|
||||
1.0
|
||||
2.0
|
||||
3.0
|
||||
4.0
|
||||
5.0
|
||||
6.0
|
||||
7.0
|
||||
8.0
|
||||
9.0
|
||||
10.0
|
||||
11.0
|
||||
12.0
|
||||
13.0
|
||||
14.0
|
||||
15.0
|
||||
16.0
|
||||
17.0
|
||||
18.0
|
||||
19.0
|
||||
20.0
|
||||
21.0
|
||||
22.0
|
||||
23.0
|
||||
24.0
|
||||
=== Try load data from fixed_length_decimal_legacy.parquet
|
||||
1.0
|
||||
2.0
|
||||
3.0
|
||||
4.0
|
||||
5.0
|
||||
6.0
|
||||
7.0
|
||||
8.0
|
||||
9.0
|
||||
10.0
|
||||
11.0
|
||||
12.0
|
||||
13.0
|
||||
14.0
|
||||
15.0
|
||||
16.0
|
||||
17.0
|
||||
18.0
|
||||
19.0
|
||||
20.0
|
||||
21.0
|
||||
22.0
|
||||
23.0
|
||||
24.0
|
||||
=== Try load data from int32_decimal.parquet
|
||||
1.0
|
||||
2.0
|
||||
3.0
|
||||
4.0
|
||||
5.0
|
||||
6.0
|
||||
7.0
|
||||
8.0
|
||||
9.0
|
||||
10.0
|
||||
11.0
|
||||
12.0
|
||||
13.0
|
||||
14.0
|
||||
15.0
|
||||
16.0
|
||||
17.0
|
||||
18.0
|
||||
19.0
|
||||
20.0
|
||||
21.0
|
||||
22.0
|
||||
23.0
|
||||
24.0
|
||||
=== Try load data from int64_decimal.parquet
|
||||
1.0
|
||||
2.0
|
||||
3.0
|
||||
4.0
|
||||
5.0
|
||||
6.0
|
||||
7.0
|
||||
8.0
|
||||
9.0
|
||||
10.0
|
||||
11.0
|
||||
12.0
|
||||
13.0
|
||||
14.0
|
||||
15.0
|
||||
16.0
|
||||
17.0
|
||||
18.0
|
||||
19.0
|
||||
20.0
|
||||
21.0
|
||||
22.0
|
||||
23.0
|
||||
24.0
|
||||
=== Try load data from nation.dict-malformed.parquet
|
||||
0 ALGERIA 0 haggle. carefully final deposits detect slyly agai
|
||||
1 ARGENTINA 1 al foxes promise slyly according to the regular accounts. bold requests alon
|
||||
2 BRAZIL 1 y alongside of the pending deposits. carefully special packages are about the ironic forges. slyly special
|
||||
3 CANADA 1 eas hang ironic, silent packages. slyly regular packages are furiously over the tithes. fluffily bold
|
||||
4 EGYPT 4 y above the carefully unusual theodolites. final dugouts are quickly across the furiously regular d
|
||||
5 ETHIOPIA 0 ven packages wake quickly. regu
|
||||
6 FRANCE 3 refully final requests. regular, ironi
|
||||
7 GERMANY 3 l platelets. regular accounts x-ray: unusual, regular acco
|
||||
8 INDIA 2 ss excuses cajole slyly across the packages. deposits print aroun
|
||||
9 INDONESIA 2 slyly express asymptotes. regular deposits haggle slyly. carefully ironic hockey players sleep blithely. carefull
|
||||
10 IRAN 4 efully alongside of the slyly final dependencies.
|
||||
11 IRAQ 4 nic deposits boost atop the quickly final requests? quickly regula
|
||||
12 JAPAN 2 ously. final, express gifts cajole a
|
||||
13 JORDAN 4 ic deposits are blithely about the carefully regular pa
|
||||
14 KENYA 0 pending excuses haggle furiously deposits. pending, express pinto beans wake fluffily past t
|
||||
15 MOROCCO 0 rns. blithely bold courts among the closely regular packages use furiously bold platelets?
|
||||
16 MOZAMBIQUE 0 s. ironic, unusual asymptotes wake blithely r
|
||||
17 PERU 1 platelets. blithely pending dependencies use fluffily across the even pinto beans. carefully silent accoun
|
||||
18 CHINA 2 c dependencies. furiously express notornis sleep slyly regular accounts. ideas sleep. depos
|
||||
19 ROMANIA 3 ular asymptotes are about the furious multipliers. express dependencies nag above the ironically ironic account
|
||||
20 SAUDI ARABIA 4 ts. silent requests haggle. closely express packages sleep across the blithely
|
||||
21 VIETNAM 2 hely enticingly express accounts. even, final
|
||||
22 RUSSIA 3 requests against the platelets use never according to the quickly regular pint
|
||||
23 UNITED KINGDOM 3 eans boost carefully special requests. accounts are. carefull
|
||||
24 UNITED STATES 1 y final packages. slow foxes cajole quickly. quickly silent platelets breach ironic accounts. unusual pinto be
|
||||
=== Try load data from nested_lists.snappy.parquet
|
||||
Code: 8. DB::Ex---tion: Column "element" is not presented in input data
|
||||
|
||||
=== Try load data from nested_maps.snappy.parquet
|
||||
Code: 33. DB::Ex---tion: Error while reading parquet data: NotImplemented: Currently only nesting with Lists is supported.
|
||||
|
||||
=== Try load data from nonnullable.impala.parquet
|
||||
Code: 33. DB::Ex---tion: Error while reading parquet data: NotImplemented: Currently only nesting with Lists is supported.
|
||||
|
||||
=== Try load data from nullable.impala.parquet
|
||||
Code: 33. DB::Ex---tion: Error while reading parquet data: NotImplemented: Currently only nesting with Lists is supported.
|
||||
|
||||
=== Try load data from nulls.snappy.parquet
|
||||
Code: 8. DB::Ex---tion: Column "b_c_int" is not presented in input data
|
||||
|
||||
=== Try load data from repeated_no_annotation.parquet
|
||||
Code: 33. DB::Ex---tion: Error while reading parquet data: NotImplemented: Currently only nesting with Lists is supported.
|
||||
|
||||
=== Try load data from userdata1.parquet
|
||||
1454486129 1 Amanda Jordan ajordan0@com.com Female 1.197.201.2 6759521864920116 Indonesia 3/8/1971 49756.53 Internal Auditor 1E+02
|
||||
1454519043 2 Albert Freeman afreeman1@is.gd Male 218.111.175.34 Canada 1/16/1968 150280.17 Accountant IV
|
||||
1454461771 3 Evelyn Morgan emorgan2@altervista.org Female 7.161.136.94 6767119071901597 Russia 2/1/1960 144972.51 Structural Engineer
|
||||
1454459781 4 Denise Riley driley3@gmpg.org Female 140.35.109.83 3576031598965625 China 4/8/1997 90263.05 Senior Cost Accountant
|
||||
1454475931 5 Carlos Burns cburns4@miitbeian.gov.cn 169.113.235.40 5602256255204850 South Africa \N
|
||||
1454484154 6 Kathryn White kwhite5@google.com Female 195.131.81.179 3583136326049310 Indonesia 2/25/1983 69227.11 Account Executive
|
||||
1454488388 7 Samuel Holmes sholmes6@foxnews.com Male 232.234.81.197 3582641366974690 Portugal 12/18/1987 14247.62 Senior Financial Analyst
|
||||
1454482026 8 Harry Howell hhowell7@eepurl.com Male 91.235.51.73 Bosnia and Herzegovina 3/1/1962 186469.43 Web Developer IV
|
||||
1454471573 9 Jose Foster jfoster8@yelp.com Male 132.31.53.61 South Korea 3/27/1992 231067.84 Software Test Engineer I 1E+02
|
||||
1454524187 10 Emily Stewart estewart9@opensource.org Female 143.28.251.245 3574254110301671 Nigeria 1/28/1997 27234.28 Health Coach IV
|
||||
1454458242 11 Susan Perkins sperkinsa@patch.com Female 180.85.0.62 3573823609854134 Russia 210001.95
|
||||
1454522674 12 Alice Berry aberryb@wikipedia.org Female 246.225.12.189 4917830851454417 China 8/12/1968 22944.53 Quality Engineer
|
||||
1454525297 13 Justin Berry jberryc@usatoday.com Male 157.7.146.43 6331109912871813274 Zambia 8/15/1975 44165.46 Structural Analysis Engineer
|
||||
1454536012 14 Kathy Reynolds kreynoldsd@redcross.org Female 81.254.172.13 5537178462965976 Bosnia and Herzegovina 6/27/1970 286592.99 Librarian
|
||||
1454489603 15 Dorothy Hudson dhudsone@blogger.com Female 8.59.7.0 3542586858224170 Japan 12/20/1989 157099.71 Nurse Practicioner <script>alert(\'hi\')</script>
|
||||
1454460241 16 Bruce Willis bwillisf@bluehost.com Male 239.182.219.189 3573030625927601 Brazil 239100.65
|
||||
1454461065 17 Emily Andrews eandrewsg@cornell.edu Female 29.231.180.172 30271790537626 Russia 4/13/1990 116800.65 Food Chemist
|
||||
1454517864 18 Stephen Wallace swallaceh@netvibes.com Male 152.49.213.62 5433943468526428 Ukraine 1/15/1978 248877.99 Account Representative I
|
||||
1454499954 19 Clarence Lawson clawsoni@vkontakte.ru Male 107.175.15.152 3544052814080964 Russia 177122.99
|
||||
1454495436 20 Rebecca Bell rbellj@bandcamp.com Female 172.215.104.127 China 137251.19
|
||||
1454505444 21 Diane Stevens dstevensk@cnet.com Female 141.243.73.164 Russia 6/5/1985 87978.22 Food Chemist œ∑´®†¥¨ˆøπ“‘
|
||||
1454523505 22 Lawrence Ramos lramosl@sourceforge.net Male 46.72.4.6 3537473810855655 Tanzania 131283.64
|
||||
1454525455 23 Gregory Barnes gbarnesm@google.ru Male 220.22.114.145 3538432455620641 Tunisia 1/23/1971 182233.49 Senior Sales Associate 사회과학원 어학연구소
|
||||
1454472340 24 Michelle Ellis mellisn@timesonline.co.uk Female 239.81.215.135 3547383558025965 Tanzania 6/5/1964 278001.46 Tax Accountant
|
||||
1454518347 25 Rachel Perkins rperkinso@lulu.com Female 90.173.28.95 633313663891003209 Russia 176178.75
|
||||
1454486554 26 Anthony Lawrence alawrencep@miitbeian.gov.cn Male 121.211.242.99 564182969714151470 Japan 12/10/1979 170085.81 Electrical Engineer
|
||||
1454488886 27 Henry Henry hhenryq@godaddy.com Male 191.88.236.116 4905730021217853521 China 9/22/1995 284300.15 Nuclear Power Engineer
|
||||
1454519352 28 Samuel Hunter shunterr@instagram.com Male 72.190.230.173 5002353797389897 Brazil 9/21/1968 108950.24 Environmental Tech
|
||||
1454469374 29 Jacqueline Holmes jholmess@ustream.tv Female 47.141.224.95 3555934842115316 United States 247939.52 ̗̺͖̹̯͓Ṯ̤͍̥͇͈h̲́e͏͓̼̗̙̼̣͔ ͇̜̱̠͓͍ͅN͕͠e̗̱z̘̝̜̺͙p̤̺̹͍̯͚e̠̻̠͜r̨̤͍̺̖͔̖̖d̠̟̭̬̝͟i̦͖̩͓͔̤a̠̗̬͉̙n͚͜ ̻̞̰͚ͅh̵͉i̳̞v̢͇ḙ͎͟-҉̭̩̼͔m̤̭̫i͕͇̝̦n̗͙ḍ̟ ̯̲͕͞ǫ̟̯̰̲͙̻̝f ̪̰̰̗̖̭̘͘c̦͍̲̞͍̩̙ḥ͚a̮͎̟̙͜ơ̩̹͎s̤.̝̝ ҉Z̡̖̜͖̰̣͉̜a͖̰͙̬͡l̲̫̳͍̩g̡̟̼̱͚̞̬ͅo̗͜.̟
|
||||
1454535469 30 Annie Torres atorrest@ning.com Female 202.94.67.27 3530389861801215 Nigeria 5/20/1958 118310.72 Electrical Engineer -1E+02
|
||||
1454526588 31 Antonio Berry aberryu@ow.ly Male 5.82.180.4 Thailand 135007.96
|
||||
1454533547 32 Nicole Martinez nmartinezv@oakley.com Female 46.32.149.87 United States 149720.75 Z̮̞̠͙͔ͅḀ̗̞͈̻̗Ḷ͙͎̯̹̞͓G̻O̭̗̮
|
||||
1454459459 33 Christina Mason cmasonw@nydailynews.com Female 74.214.22.120 Greece 7/21/1986 242593.85 Senior Sales Associate
|
||||
1454541103 34 Margaret Barnes mbarnesx@angelfire.com Female 133.178.126.244 3582552005871223 South Africa 11/13/1969 109644.23 Human Resources Assistant II
|
||||
1454487881 35 Melissa Kelly mkellyy@unblog.fr Female 179.132.207.169 6374648559206801 Indonesia 2/6/1968 45639.62 General Manager Ṱ̺̺̕o͞ ̷i̲̬͇̪͙n̝̗͕v̟̜̘̦͟o̶̙̰̠kè͚̮̺̪̹̱̤ ̖t̝͕̳̣̻̪͞h̼͓̲̦̳̘̲e͇̣̰̦̬͎ ̢̼̻̱̘h͚͎͙̜̣̲ͅi̦̲̣̰̤v̻͍e̺̭̳̪̰-m̢iͅn̖̺̞̲̯̰d̵̼̟͙̩̼̘̳ ̞̥̱̳̭r̛̗̘e͙p͠r̼̞̻̭̗e̺̠̣͟s̘͇̳͍̝͉e͉̥̯̞̲͚̬͜ǹ̬͎͎̟̖͇̤t͍̬̤͓̼̭͘ͅi̪̱n͠g̴͉ ͏͉ͅc̬̟h͡a̫̻̯͘o̫̟̖͍̙̝͉s̗̦̲.̨̹͈̣
|
||||
1454484472 36 Betty Carr bcarrz@parallels.com Female 159.201.161.49 France 91370.3 -1E2
|
||||
1454532399 37 Dorothy Gomez dgomez10@jiathis.com Female 65.111.200.146 493684876859391834 China 57194.86
|
||||
1454538878 38 Kathryn Lane klane11@netlog.com Female 169.141.178.89 5308993357499254 Czech Republic 8/20/1964 67783.73 Paralegal
|
||||
1454511326 39 Jose Murphy jmurphy12@paypal.com Male 118.85.253.180 4994715164232848 Chile 8/8/1991 134708.82 Nuclear Power Engineer
|
||||
1454458506 40 Jack Flores jflores13@yolasite.com Male 162.215.65.11 3577342788590928 Argentina 1/28/1958 81685.1 Financial Advisor
|
||||
1454529124 41 Walter Martinez wmartinez14@spotify.com Male 165.150.92.96 Somalia 3/8/1972 212105.33 Health Coach I
|
||||
1454473984 42 Todd Alvarez talvarez15@csmonitor.com Male 59.123.34.76 3557102122317535 Japan 12/19/1999 284728.99 Marketing Assistant
|
||||
1454488466 43 Amanda Gray agray16@cdbaby.com Female 252.20.193.145 3561501596653859 China 8/28/1967 213410.26 Senior Quality Engineer
|
||||
1454494415 44 Sharon Simpson ssimpson17@weather.com Female 242.68.147.87 France 9/28/1963 133884.94 Analog Circuit Design manager
|
||||
1454526201 45 Bonnie Collins bcollins18@list-manage.com Female 132.217.56.27 3540813015762450 Germany 7/21/1986 67661.42 Business Systems Development Analyst
|
||||
1454474597 46 Deborah Armstrong darmstrong19@addthis.com Female 89.44.11.142 Canada 4/8/1969 111569.22 Quality Control Specialist test
|
||||
1454486980 47 Daniel Mccoy dmccoy1a@skype.com Male 115.85.247.190 3554507990607374 Central African Republic 66260.14 ❤️ 💔 💌 💕 💞 💓 💗 💖 💘 💝 💟 💜 💛 💚 💙
|
||||
1454505529 48 Jean Flores jflores1b@samsung.com Female 211.70.131.207 5392903051983005 Nepal 4/6/1990 199100.32 Financial Advisor
|
||||
1454521849 49 Lisa Snyder lsnyder1c@woothemes.com Female 145.202.177.215 30475362189761 Germany 12/12/1974 210631.91 Safety Technician II
|
||||
1454469295 50 Sean Alexander salexander1d@dagondesign.com Male 89.83.147.177 Bosnia and Herzegovina 5/29/1978 256068.38 Senior Financial Analyst
|
||||
1454481568 51 Ernest Carroll ecarroll1e@dailymail.co.uk Male 194.224.39.215 5100172156945078 Portugal 11/1/1992 100269.36 Dental Hygienist
|
||||
1454492589 52 Louise Dean ldean1f@tamu.edu Female 109.43.178.48 201996646854139 Ethiopia 173300.37
|
||||
1454457952 53 Ralph Price rprice1g@tmall.com Male 152.6.235.33 4844227560658222 China 8/26/1986 168208.4 Teacher
|
||||
1454467269 54 George Ferguson gferguson1h@51.la Male 129.108.219.50 3539784298399554 Macedonia 6/26/1971 153238.6 Computer Systems Analyst IV パーティーへ行かないか
|
||||
1454515393 55 Anna Montgomery amontgomery1i@google.cn Female 80.111.141.47 3586860392406446 China 9/6/1957 92837.5 Software Test Engineer IV 1E2
|
||||
1454514049 56 Cheryl Lawrence clawrence1j@ameblo.jp Female 171.155.78.116 Finland 5/7/1985 200827.88 Recruiting Manager
|
||||
1454459605 57 Willie Palmer wpalmer1k@t-online.de Male 164.107.46.161 4026614769857244 China 8/23/1986 184978.64 Environmental Specialist
|
||||
1454478957 58 Arthur Berry aberry1l@unc.edu Male 52.42.24.55 3542761473624274 China 144164.88
|
||||
1454519593 59 Patricia Marshall pmarshall1m@dell.com Female 47.108.196.175 China 7/21/1984 69236.54 Environmental Specialist
|
||||
1454466852 60 Cynthia Richards crichards1n@dailymail.co.uk Female 178.236.66.213 3557986543874466 Brazil 179378
|
||||
1454496286 61 David Sanders dsanders1o@fda.gov Male 94.143.190.8 3585745042921822 Mexico 2/15/1963 197445.45 Data Coordiator 0️⃣ 1️⃣ 2️⃣ 3️⃣ 4️⃣ 5️⃣ 6️⃣ 7️⃣ 8️⃣ 9️⃣ 🔟
|
||||
1454534081 62 Julia Sullivan jsullivan1p@wisc.edu Female 32.183.154.67 6767624411254094 Bolivia 11/28/1963 118311.39 Electrical Engineer
|
||||
1454530379 63 Kevin Butler kbutler1q@symantec.com Male 21.88.110.64 3551107057688681 Georgia 12/13/1962 129632.55 Database Administrator III
|
||||
1454475593 64 Dennis Ross dross1r@parallels.com Male 78.25.77.223 Portugal 5/27/1959 280933.71 Biostatistician II
|
||||
1454478626 65 Raymond Jacobs rjacobs1s@sohu.com Male 188.52.98.175 5048378563875353 Indonesia 13673.35
|
||||
1454532460 66 Steven Pierce spierce1t@usgs.gov Male 230.13.54.19 5100178880451481 Namibia 4/10/1965 152382.69 Analyst Programmer
|
||||
1454480831 67 Jonathan Ellis jellis1u@g.co Male 125.115.227.203 China 4/5/1991 268468.96 Staff Scientist
|
||||
1454460516 68 Rachel Price rprice1v@census.gov Female 89.52.192.105 Indonesia 5/6/1982 234502.16 Payment Adjustment Coordinator
|
||||
1454492257 69 Harold Olson holson1w@chronoengine.com Male 169.173.35.139 China 7/25/1994 146917.43 Occupational Therapist
|
||||
1454524497 70 Pamela Wagner pwagner1x@gravatar.com Female 184.97.191.144 5593584893781844 Italy 5/3/1964 253108.75 Automation Specialist I 1;DROP TABLE users
|
||||
1454537805 71 Stephanie Watkins swatkins1y@rakuten.co.jp 124.183.29.113 30552863095190 Burkina Faso 8/29/1971 \N Physical Therapy Assistant
|
||||
1454530454 72 John Ortiz jortiz1z@mozilla.org Male 4.70.220.127 5194470971764378 Sweden 2/13/1978 91566.02 Analyst Programmer
|
||||
1454523864 73 Kimberly Wheeler kwheeler20@imgur.com Female 26.46.50.55 China 11/6/1978 31026.94 Junior Executive
|
||||
1454470404 74 Kathryn Henderson khenderson21@ask.com Female 218.212.63.68 4936394111685353310 Ukraine 4/11/1985 59413.85 Pharmacist -$1.00
|
||||
1454527390 75 Catherine Gibson cgibson22@ebay.com Female 204.84.35.26 5402007176101895 Indonesia 12/20/1984 92315.94 Desktop Support Technician
|
||||
1454509078 76 Carolyn Nelson cnelson23@tiny.cc Female 64.13.61.211 4844223687165886 Estonia 3/9/1985 179193.6 Social Worker
|
||||
1454479055 77 Denise Nguyen dnguyen24@ovh.net Female 18.208.48.116 201900233821394 China 121013.48
|
||||
1454458493 78 Mildred Torres mtorres25@alibaba.com Female 38.102.60.15 6399156779396437 Russia 9/24/1960 166987.55 Paralegal
|
||||
1454507970 79 Linda Shaw lshaw26@psu.edu Female 188.221.197.229 3557917782902346 Russia 9/30/1987 67211.67 Structural Analysis Engineer
|
||||
1454540546 80 Anna Hudson ahudson27@gmpg.org Female 153.84.219.15 Indonesia 9/12/1997 110408.87 VP Marketing
|
||||
1454536800 81 Albert Pierce apierce28@phoca.cz Male 145.148.40.149 Palestinian Territory 11/4/1955 43019.01 Web Developer III 0/0
|
||||
1454542995 82 Carol Franklin cfranklin29@marketwatch.com Female 32.189.30.244 67097647572873744 China 6/5/1978 31572.53 Automation Specialist II
|
||||
1454506472 83 Carlos Washington cwashington2a@phpbb.com Male 90.239.40.124 67063904960748578 United States 11/4/1970 28853.61 Developer I ❤️ 💔 💌 💕 💞 💓 💗 💖 💘 💝 💟 💜 💛 💚 💙
|
||||
1454463081 84 Kathryn Austin kaustin2b@livejournal.com Female 152.193.181.90 Philippines 10/8/1990 131855.43 Nurse Practicioner
|
||||
1454494358 85 Lillian Gardner lgardner2c@hao123.com Female 189.104.46.70 Russia 10/28/1961 145282.64 Occupational Therapist
|
||||
1454530407 86 Peter Mendoza pmendoza2d@paypal.com Male 77.225.63.206 3562330687037049 Mexico 12/23/1988 40664.88 Staff Scientist
|
||||
1454466533 87 Dennis Torres dtorres2e@ask.com Male 199.131.129.105 50188330277167912 Croatia 5/25/1986 265985 Account Representative II 社會科學院語學研究所
|
||||
1454463286 88 Timothy Watkins twatkins2f@toplist.cz Male 120.52.182.111 Tunisia 6/24/2000 242129.05 Operator
|
||||
1454498394 89 Nicole Willis nwillis2g@cmu.edu Female 44.196.120.110 6394724888228638 Indonesia 2/1/1966 258772.36 Physical Therapy Assistant
|
||||
1454525151 90 Jacqueline Carr jcarr2h@freewebs.com Female 197.40.38.49 201939989746686 China 5/31/1961 100733.44 Civil Engineer (。◕ ∀ ◕。)
|
||||
1454510656 91 Theresa Gonzalez tgonzalez2i@nih.gov Female 237.106.229.219 Argentina 8/10/1970 47723.61 Product Engineer
|
||||
1454479785 92 Donald Bradley dbradley2j@latimes.com Male 244.82.249.86 3534114122488321 Indonesia 7/8/2000 105051.77 Tax Accountant
|
||||
1454512853 93 Katherine Little klittle2k@cyberchimps.com Female 61.43.154.182 30218284989094 Poland 1/20/1990 155597.16 Associate Professor
|
||||
1454516486 94 Ruth Cooper rcooper2l@apache.org Female 114.82.62.61 Indonesia 7/20/1993 181481.5 Civil Engineer
|
||||
1454498785 95 Stephen Gutierrez sgutierrez2m@walmart.com Male 134.231.189.30 3560204445825528 Guatemala 8/22/1995 83986.79 Structural Engineer
|
||||
1454473160 96 Kevin Scott kscott2n@histats.com Male 226.59.43.229 3558997916332270 United States 6/5/1966 130054.63 Graphic Designer ÅÍÎÏ˝ÓÔÒÚÆ☃
|
||||
1454540928 97 Steven Williamson swilliamson2o@devhub.com Male 122.216.99.88 France 238119.62
|
||||
1454473451 98 Shawn Adams sadams2p@imdb.com Male 148.92.123.202 5893564746795315893 Indonesia 11/10/1959 67749.83 Senior Developer test
|
||||
1454507278 99 Russell Fields rfields2q@google.ca Male 110.74.199.162 Tanzania 1/2/1994 13268.99 Mechanical Systems Engineer
|
||||
1454514595 100 Willie Weaver wweaver2r@google.de Male 13.54.121.138 3534023246040472 Mexico 8/21/1970 175694.61 Dental Hygienist ̡͓̞ͅI̗̘̦͝n͇͇͙v̮̫ok̲̫̙͈i̖͙̭̹̠̞n̡̻̮̣̺g̲͈͙̭͙̬͎ ̰t͔̦h̞̲e̢̤ ͍̬̲͖f̴̘͕̣è͖ẹ̥̩l͖͔͚i͓͚̦͠n͖͍̗͓̳̮g͍ ̨o͚̪͡f̘̣̬ ̖̘͖̟͙̮c҉͔̫͖͓͇͖ͅh̵̤̣͚͔á̗̼͕ͅo̼̣̥s̱͈̺̖̦̻͢.̛̖̞̠̫̰
|
||||
=== Try load data from userdata2.parquet
|
||||
1454506599 1 Donald Lewis dlewis0@clickbank.net Male 102.22.124.20 Indonesia 7/9/1972 140249.37 Senior Financial Analyst
|
||||
1454458948 2 Walter Collins wcollins1@bloglovin.com Male 247.28.26.93 3587726269478025 China \N
|
||||
1454524144 3 Michelle Henderson mhenderson2@geocities.jp Female 193.68.146.150 France 1/15/1964 236219.26 Teacher
|
||||
1454506939 4 Lori Hudson lhudson3@dion.ne.jp 34.252.168.48 3568840151595649 Russia 4/22/1988 \N Nuclear Power Engineer
|
||||
1454458529 5 Howard Miller hmiller4@fema.gov Male 103.193.150.230 3583473261055014 France 11/26/1998 50210.02 Senior Editor
|
||||
1454496547 6 Frances Adams fadams5@123-reg.co.uk Female 106.196.106.93 Russia 3/27/1997 82175.77 Account Coordinator
|
||||
1454528652 \N Steven Hanson shanson6@cisco.com Male 234.130.172.185 3550842607768119 Indonesia 129582.61
|
||||
1454487094 8 Louis Simmons lsimmons7@icio.us Male 18.69.80.15 China 6/1/1992 90744.86 Product Engineer
|
||||
1454543811 9 Keith Parker kparker8@amazonaws.com Male 108.205.40.64 Guadeloupe 12/30/1992 60618.9 Developer II
|
||||
1454485649 10 Wanda Walker wwalker9@latimes.com Female 246.214.98.78 3539421569669478 Portugal 137664.53
|
||||
1454517563 11 Kathryn Weaver kweavera@bizjournals.com Female 157.237.161.75 201425019338900 Sweden 117572.65
|
||||
1454482256 12 Philip Ward pwardb@sakura.ne.jp Male 77.140.225.69 201508031789224 Greece 9/3/1984 238925.79 Human Resources Manager
|
||||
1454542618 13 Evelyn Harvey eharveyc@time.com 254.174.154.7 3539535868968594 China 5/15/1979 \N Software Engineer III
|
||||
1454484804 14 Andrea Lane alaned@gov.uk Female 192.253.116.192 5100174455306952 Indonesia 1/19/1989 166778.42 Operator
|
||||
1454507104 15 Bobby Vasquez bvasqueze@furl.net Male 126.60.18.195 3581051861650673 Philippines 1/25/1975 138184.83 Senior Editor
|
||||
1454536690 16 Kenneth Gibson kgibsonf@soundcloud.com Male 91.153.142.170 5389947292571488 Peru 11/3/1975 98614.53 Environmental Tech
|
||||
1454516554 17 Emily Hill ehillg@house.gov Female 109.107.174.205 Palestinian Territory 5/18/1956 218781.48 Executive Secretary
|
||||
1454541649 18 Kelly Fowler kfowlerh@dell.com Female 147.58.88.116 3551741291105936 Greece 6/11/1975 117249.56 Systems Administrator III
|
||||
1454524126 19 Diana Howell dhowelli@sphinn.com Female 21.240.75.42 4026635872860296 Iran 7/7/1993 174844.52 Teacher
|
||||
1454466206 20 Johnny Collins jcollinsj@google.ca Male 38.173.129.250 372301677387203 Afghanistan 7/28/1987 155908.69 Social Worker
|
||||
1454493912 21 Frank Bradley fbradleyk@shinystat.com Male 186.9.38.46 4913033819988246 Czech Republic 211051.83
|
||||
1454509391 22 Billy Thomas bthomasl@moonfruit.com Male 143.89.197.162 4026052248187794 Czech Republic 10/7/1991 282061.72 Professor 👾 🙇 💁 🙅 🙆 🙋 🙎 🙍
|
||||
1454523133 23 Philip Moreno pmorenom@rambler.ru Male 9.39.210.239 4041597502244971 United States 122560.95
|
||||
1454536839 24 Billy Ray brayn@meetup.com Male 230.255.220.160 201925598515489 Kazakhstan 2/9/1966 130424.35 VP Accounting 사회과학원 어학연구소
|
||||
1454509252 25 Ryan Wilson rwilsono@forbes.com Male 197.77.142.137 Poland 7/4/1961 280703.91 Software Test Engineer III
|
||||
1454458024 26 Sandra Coleman scolemanp@blogger.com Female 230.159.39.252 3555708337891155 China 8/7/1971 113688.11 VP Sales
|
||||
1454513250 27 Evelyn Moreno emorenoq@chronoengine.com Female 126.96.111.52 3557508895347766 United States 8/17/1990 167131.57 Recruiting Manager
|
||||
1454509036 28 Elizabeth Warren ewarrenr@flavors.me Female 213.8.204.211 67099385430526802 China 6/14/1996 119515.12 Media Manager II
|
||||
1454541241 29 Linda Hawkins lhawkinss@fotki.com Female 206.6.3.196 4913079795915711 Philippines 2/14/1961 107779.93 Technical Writer
|
||||
1454493935 30 Janice Day jdayt@devhub.com Female 243.24.120.209 Ukraine 6/9/1972 53906.4 Marketing Manager
|
||||
1454483872 31 Diane Perez dperezu@ihg.com Female 182.136.218.77 Belarus 2/9/1957 170326.91 Chief Design Engineer
|
||||
1454529216 32 Bruce Robinson brobinsonv@redcross.org Male 5.126.135.106 201769377515751 Philippines 169520.45
|
||||
1454470160 33 Daniel Lawrence dlawrencew@usgs.gov Male 200.168.191.214 4911581295367856744 United States 5/7/1967 199535.76 VP Sales
|
||||
1454474809 34 Theresa James tjamesx@quantcast.com Female 83.122.166.224 3545570545148759 Russia 104683.19
|
||||
1454536922 35 Scott Russell srusselly@printfriendly.com Male 92.233.3.208 Bolivia 205730.41
|
||||
1454514354 36 Ruby Vasquez rvasquezz@toplist.cz Female 8.148.83.49 France 11/5/1999 95407.16 Financial Advisor
|
||||
1454524074 37 Jeffrey Hall jhall10@pagesperso-orange.fr Male 91.103.226.35 3531476231658075 Indonesia 5/29/1987 247716.37 Business Systems Development Analyst
|
||||
1454477697 38 Debra Kennedy dkennedy11@state.tx.us Female 116.247.236.130 676732277565853203 Mexico 5/22/1955 272563.67 Desktop Support Technician
|
||||
1454464041 39 Cole Male 157.157.28.86 4911512925983388490 Panama 91174.63
|
||||
1454521471 40 Helen Sanchez hsanchez13@oakley.com Female 222.122.74.77 Venezuela 2/11/1969 189240.59 Food Chemist
|
||||
1454527305 41 Jennifer Russell jrussell14@cpanel.net Female 42.82.215.191 Morocco 80644.64 1E02
|
||||
1454479360 42 Fred Marshall fmarshall15@ifeng.com 160.92.143.233 6374102245574313 China 12/18/1984 \N Structural Engineer
|
||||
1454464402 43 Terry Ford tford16@shop-pro.jp Male 169.34.131.192 3588107849306045 Turkmenistan 286388.01
|
||||
1454468866 44 Maria Mason mmason17@miibeian.gov.cn Female 213.62.60.224 060438374765421941 Sweden 7/6/1973 34664.91 Social Worker
|
||||
1454486568 45 Sharon Schmidt sschmidt18@istockphoto.com Female 111.247.11.124 5100179876769597 Argentina 10/4/1982 150142.49 Mechanical Systems Engineer
|
||||
1454483332 46 Gregory Jones gjones19@jimdo.com Male 132.88.44.128 30372001476487 China 12/31/1972 240265.01 Design Engineer
|
||||
1454520829 47 Raymond Moore rmoore1a@arizona.edu 89.39.221.170 5602248693774107 Japan 4/24/1956 \N VP Sales
|
||||
1454531788 48 Tammy Scott tscott1b@mlb.com Female 236.12.148.59 3577211980737555 Peru 10/14/1959 132064.01 Software Consultant
|
||||
1454480004 49 Willie Alexander walexander1c@home.pl Male 2.199.150.177 Brazil 10/14/1958 26424.57 Executive Secretary `ィ(´∀`∩
|
||||
1454473891 50 William Garrett wgarrett1d@java.com Male 20.24.142.67 Croatia 10/9/1963 181424.2 Database Administrator III
|
||||
1454463118 51 Patricia Peterson ppeterson1e@cpanel.net Female 77.242.54.160 3585161324543005 Peru 3/5/1987 176561.19 Media Manager III
|
||||
1454488118 52 Andrew Cook acook1f@ftc.gov Male 220.139.174.228 6333320102003586 Bolivia 3/8/1969 185775.61 Computer Systems Analyst III
|
||||
1454536072 53 Carol Nichols cnichols1g@statcounter.com Female 233.176.31.182 3543580855019963 Nigeria 1/6/1960 105346.38 Compensation Analyst
|
||||
1454489053 54 Jimmy Morales jmorales1h@archive.org Male 199.160.215.73 3587538933267985 Kiribati 8/25/1961 146625.62 Assistant Media Planner
|
||||
1454538033 55 Nancy Montgomery nmontgomery1i@freewebs.com Female 11.235.20.56 3586137339728301 China 128631.29 $1.00
|
||||
1454461902 56 Thomas Freeman tfreeman1j@java.com Male 161.123.216.250 3536920916224146 Colombia 8/4/1973 239571.27 Senior Developer
|
||||
1454488504 57 Virginia Bell vbell1k@aboutads.info Female 79.142.13.145 3585595583423005 Malaysia 4/2/1998 252007.47 Actuary
|
||||
1454496671 58 Tammy Adams tadams1l@virginia.edu Female 106.207.61.165 3528072249217643 Canada 1/26/1973 98463.77 Business Systems Development Analyst
|
||||
1454516066 59 Cynthia Robertson crobertson1m@alibaba.com Female 106.110.239.97 Belarus 12/20/1962 90950.39 Help Desk Technician
|
||||
1454523801 60 Steven Romero sromero1n@usa.gov Male 65.249.97.254 5007669084530801 Argentina 9/27/1963 14358.32 Quality Control Specialist
|
||||
1454458452 61 Sean Greene sgreene1o@goo.gl Male 71.195.178.59 5602246313163081 China 2/20/1991 70656.63 Sales Representative
|
||||
1454537851 62 Jerry Turner jturner1p@scribd.com Male 69.148.19.138 3561778321182616 New Zealand 5/25/1991 89186 Information Systems Manager
|
||||
1454523562 63 Jennifer Mendoza jmendoza1q@shutterfly.com Female 54.114.8.9 3544098267391200 Russia 7/8/1973 263720.16 General Manager
|
||||
1454477002 64 Roy Hughes rhughes1r@stanford.edu Male 209.120.70.78 3552886646968253 Canada 10/30/1968 191750.33 Mechanical Systems Engineer
|
||||
1454477109 65 Susan Jenkins sjenkins1s@princeton.edu Female 247.155.65.12 Philippines 3/1/1967 86339.04 VP Sales
|
||||
1454527329 66 Norma Dunn ndunn1t@pen.io Female 250.241.78.109 China 7/20/1967 77739.6 Web Designer I
|
||||
1454461701 67 Tina Reid treid1u@163.com Female 116.38.145.226 Germany 4/25/1967 228301.51 Financial Analyst
|
||||
1454478121 68 Cynthia Daniels cdaniels1v@pinterest.com Female 17.140.57.238 3589952234971047 Burundi 1/9/1956 42221.96 Research Nurse
|
||||
1454462100 69 Wells Male 92.13.7.20 Philippines 7/4/1969 78486.77 Tax Accountant
|
||||
1454516337 70 Stephen Butler sbutler1x@moonfruit.com Male 230.147.124.190 Argentina 125060.01
|
||||
1454459366 71 Jacqueline Wallace jwallace1y@dagondesign.com Female 203.83.140.84 3578315582149538 Turkmenistan 4/15/1997 89436.49 Cost Accountant
|
||||
1454479818 72 Carol Dunn cdunn1z@ocn.ne.jp Female 241.2.84.72 5602252003430282308 Bulgaria 2/1/1981 203473.36 Geological Engineer
|
||||
1454505977 73 Russell Williams rwilliams20@imgur.com Male 21.217.68.126 3566925409646658 Slovenia 1/30/1977 252402.64 Librarian
|
||||
1454476392 74 Kathryn Torres ktorres21@rakuten.co.jp Female 4.124.222.88 4026779356659103 Portugal 7/31/1956 121285.58 Project Manager
|
||||
1454463675 75 Larry Mason lmason22@alibaba.com Male 172.104.78.232 3587717468815331 Sweden 4/20/1969 248583.77 Professor
|
||||
1454517479 76 Rachel Dunn rdunn23@hugedomains.com Female 101.213.94.161 6374938227969686 Peru 6/18/1999 79245.45 Chief Design Engineer
|
||||
1454457675 77 Doris Elliott delliott24@shinystat.com Female 36.27.140.126 Portugal 9/23/1987 98288.74 Design Engineer
|
||||
1454483215 78 William Mendoza wmendoza25@prlog.org Male 71.28.136.31 3580069171786970 China 3/20/1967 81965.94 Media Manager II "ثم نفس سقطت وبالتحديد،
|
||||
1454504790 79 Elizabeth Payne epayne26@about.me Female 40.237.87.45 337941052859146 Estonia 49661.99
|
||||
1454481311 80 Dennis Robertson drobertson27@w3.org Male 189.45.163.164 Italy 5/2/1972 19984.47 Web Developer III
|
||||
1454514914 81 Edward Little elittle28@mozilla.org Male 114.189.184.212 South Korea 11/19/1984 141645.22 Senior Sales Associate ../../../../../../../../../../../etc/passwd%00
|
||||
1454530264 82 Roy Tucker rtucker29@vistaprint.com Male 254.148.189.172 Portugal 285617.13
|
||||
1454510066 83 Matthew Gardner mgardner2a@wix.com Male 91.23.27.42 5602247355547230028 Brazil 1/18/1977 267617.18 Actuary
|
||||
1454535958 84 Anthony Palmer apalmer2b@uol.com.br 25.228.124.126 3561410660537354 China 7/4/1974 \N Human Resources Assistant III
|
||||
1454460668 85 John Hudson jhudson2c@rediff.com Male 75.191.191.171 3538638405093479 Georgia 6/22/1994 82621.71 Tax Accountant
|
||||
1454479399 86 Jonathan Mills jmills2d@mail.ru Male 224.145.163.163 36504499928546 Philippines 77260.7 00˙Ɩ$-
|
||||
1454491670 87 Christine Jackson cjackson2e@feedburner.com Female 8.207.125.219 Philippines 6/12/1964 32832.61 Occupational Therapist
|
||||
1454475253 88 Eric Fernandez efernandez2f@artisteer.com Male 246.217.21.160 France 124825.77
|
||||
1454483421 89 Heather Diaz hdiaz2g@tmall.com Female 220.248.165.145 502080553226612964 China 7/26/1966 280714.33 Food Chemist
|
||||
1454515874 90 Nicole Reid nreid2h@cisco.com Female 10.75.131.59 5610704755842409780 Philippines 12/15/1985 24922.19 Marketing Assistant
|
||||
1454542340 91 Donald Murphy dmurphy2i@fema.gov Male 127.141.234.199 China 4/10/1977 76449.81 Cost Accountant
|
||||
1454531823 92 Steven Wagner swagner2j@go.com Male 211.154.182.230 United Kingdom 249411.22
|
||||
1454539859 93 Ruth Alvarez ralvarez2k@sciencedaily.com 240.195.230.204 South Korea 7/11/1964 \N Senior Developer
|
||||
1454462055 94 Carl Oliver coliver2l@cafepress.com Male 199.184.71.24 China 6/26/1967 215279.38 Operator (╯°□°)╯︵ ┻━┻)
|
||||
1454457982 95 Teresa Ruiz truiz2m@diigo.com Female 22.118.240.24 337941028849437 Brazil 7/25/1994 243603.67 Cost Accountant
|
||||
1454465475 96 Kathryn Carter kcarter2n@fastcompany.com Female 203.255.226.40 Greece 1/23/1969 34951.57 Registered Nurse
|
||||
1454542755 97 Fred Perry fperry2o@imgur.com 46.52.134.142 3544236333368634 Indonesia 2/6/1966 \N Programmer Analyst III
|
||||
1454477885 98 Harry Perkins hperkins2p@domainmarket.com Male 235.202.132.85 374288817366643 Russia 1/9/1962 167340.53 Physical Therapy Assistant
|
||||
1454509699 99 Bobby Hicks bhicks2q@wix.com Male 253.252.57.121 3555445397654443 United States 8/10/1964 238304.33 Quality Control Specialist Z̮̞̠͙͔ͅḀ̗̞͈̻̗Ḷ͙͎̯̹̞͓G̻O̭̗̮
|
||||
1454515572 100 Tammy Dunn tdunn2r@list-manage.com Female 162.156.75.67 Brazil 4/24/1980 163106.38 Sales Representative
|
||||
=== Try load data from userdata3.parquet
|
||||
1454515666 1 Ernest Fuller efuller0@examiner.com Male 106.72.28.74 5610608195667267 Israel 140639.36
|
||||
1454536327 2 Anthony Foster afoster1@weibo.com Male 156.243.130.166 4508242795214771 Indonesia 1/16/1998 172843.61 Developer II 👾 🙇 💁 🙅 🙆 🙋 🙎 🙍
|
||||
1454466139 3 Ryan Montgomery rmontgomery2@mozilla.org Male 28.55.168.128 Colombia 11/21/1978 204620.66 Developer I ␢
|
||||
1454473204 4 Brenda Nelson bnelson3@photobucket.com Female 185.81.160.85 Guatemala 10/29/1998 260474.12 GIS Technical Architect
|
||||
1454458516 5 Jacqueline Ellis jellis4@amazon.com Female 158.137.238.6 Russia 7/12/1959 286038.78 Marketing Assistant
|
||||
1454528894 6 Paul Ferguson pferguson5@gmpg.org Male 141.122.136.144 30501574577558 Thailand 241518.24
|
||||
1454489945 7 Linda Hunt lhunt6@prlog.org Female 104.179.97.82 Russia 3/30/1988 192756.38 Professor
|
||||
1454486691 8 Frances Kim fkim7@blog.com Female 28.77.158.48 676306013856639159 Indonesia 188511.28 <svg><script>0<1>alert(\'XSS\')</script>
|
||||
1454487153 9 Jason Matthews jmatthews8@google.co.uk Male 72.129.239.24 3534550235909507 China 7/29/1982 238068.56 Web Designer III
|
||||
1454519282 10 Carolyn Elliott celliott9@cpanel.net Female 51.211.70.30 3563436733386899 Indonesia 4/28/1977 132718.26 Research Nurse
|
||||
1454473379 11 Thomas Mills tmillsa@psu.edu Male 104.114.227.199 5018278895598921190 Russia 236386.69
|
||||
1454534367 12 Russell Lee rleeb@howstuffworks.com Male 193.165.137.217 China 280252.36 🐵 🙈 🙉 🙊
|
||||
1454525264 13 Chris Bailey cbaileyc@redcross.org Male 246.109.118.154 30485245023962 Thailand 11/26/1970 200218.34 Research Assistant I
|
||||
1454457712 14 Eric Parker eparkerd@usa.gov Male 25.73.91.135 5602249431899032 Russia 8/12/1986 102832.54 Tax Accountant
|
||||
1454526788 15 Anne Robertson arobertsone@geocities.jp Female 209.77.27.30 Armenia 168201.04
|
||||
1454494278 16 Angela Gonzalez agonzalezf@state.gov Female 118.77.43.191 Sweden 7/1/1972 161220.37 Database Administrator I
|
||||
1454488522 17 Edward Moreno emorenog@hp.com Male 200.50.125.67 3559979696602303 France 8/17/1966 144551.41 Chief Design Engineer
|
||||
1454496145 18 Roy Murray rmurrayh@sphinn.com Male 91.52.226.221 3546330084792460 Portugal 285872.87 𠜎𠜱𠝹𠱓𠱸𠲖𠳏
|
||||
1454492939 19 Louis Willis lwillisi@hp.com 14.132.82.250 Philippines 8/1/1980 \N Director of Sales
|
||||
1454530172 20 Edward Perez eperezj@china.com.cn Male 24.152.201.59 3571014044514515 Indonesia 29515.23
|
||||
1454518522 21 Nicole Price npricek@cpanel.net Female 4.21.204.142 Peru 5/8/1978 154023.3 Office Assistant III
|
||||
1454496552 22 Virginia Nichols vnicholsl@ning.com Female 160.202.18.170 30166467912021 Greece 5/10/1966 145509.34 Programmer II
|
||||
1454474290 23 Katherine Roberts krobertsm@hostgator.com Female 247.21.118.188 Cuba 192723.43
|
||||
1454522256 24 Emily Sullivan esullivann@sakura.ne.jp Female 33.152.103.14 4074771539744796 Indonesia 6/28/1965 36127.55 VP Sales
|
||||
1454527958 25 Susan Turner sturnero@google.pl 150.94.47.96 374283138983226 United States \N
|
||||
1454540961 26 Fred Jenkins fjenkinsp@walmart.com Male 219.195.7.86 China 3/23/1965 69388.75 Human Resources Assistant I
|
||||
1454496916 27 Jane Torres jtorresq@photobucket.com Female 147.220.219.158 5002353015111222 Indonesia 9/29/1997 226788.25 Occupational Therapist
|
||||
1454508711 28 Louis Patterson lpattersonr@wp.com Male 158.176.255.43 5100145505218793 China 9/20/1993 30309.45 VP Quality Control
|
||||
1454538643 29 Brandon Wagner bwagners@slashdot.org Male 124.203.101.37 6771208405057819279 Iraq 10/3/1959 95522.88 Research Associate
|
||||
1454484725 30 Amy Jenkins ajenkinst@wikia.com Female 21.0.126.111 3542005201579396 Ethiopia 9/26/1984 167682.84 Tax Accountant """\'""\'""\'\'\'"""
|
||||
1454513613 31 Timothy Frazier tfrazieru@toplist.cz 100.218.94.178 China 5/17/1963 \N Director of Sales 0.00
|
||||
1454463548 32 Phillip Meyer pmeyerv@live.com Male 184.208.76.39 3541248561759148 France 11/3/1974 245572.41 Nurse
|
||||
1454528692 33 Joe Wallace jwallacew@mail.ru Male 167.122.66.246 5602246900361320 Russia 64311.11
|
||||
1454466352 34 Walter Rivera wriverax@de.vu Male 67.169.221.120 5366484318587717 Russia 1/28/1983 271690.8 Programmer Analyst I
|
||||
1454480715 35 Lois Mcdonald lmcdonaldy@paypal.com 44.140.199.251 Portugal \N
|
||||
1454499439 36 William Edwards wedwardsz@acquirethisname.com Male 69.187.29.7 3528411636358679 Egypt 2/23/1958 252476.42 Financial Analyst Œ„´‰ˇÁ¨ˆØ∏”’
|
||||
1454460587 37 Frank Stevens fstevens10@samsung.com Male 61.182.84.178 Philippines 3/19/1958 47326.14 VP Product Management
|
||||
1454536874 38 Albert Martinez amartinez11@godaddy.com Male 76.139.124.119 Ukraine 11/11/1994 57220.55 Software Engineer III
|
||||
1454504601 39 Stephanie Stewart sstewart12@elpais.com Female 104.98.138.203 4905603900430425379 Syria 2/11/1975 250118.59 Developer I
|
||||
1454521301 40 Annie Stevens astevens13@slate.com Female 214.146.163.79 3553338148582934 South Africa 11/8/1983 12963.52 Systems Administrator I -1E2
|
||||
1454460788 41 Joyce Butler jbutler14@csmonitor.com Female 88.243.175.236 Indonesia 135825.27
|
||||
1454460615 42 Carlos Armstrong carmstrong15@technorati.com Male 85.22.216.153 3532000356234436 Indonesia 23446.58
|
||||
1454537073 43 Frances Kelly fkelly16@springer.com Female 146.38.150.164 4026344347458956 China 242916.36
|
||||
1454507861 44 Amanda Pierce apierce17@phpbb.com Female 214.208.248.216 201678379872880 Faroe Islands 6/1/1990 38037.1 Software Test Engineer II
test
|
||||
1454464352 45 Alan Torres atorres18@histats.com Male 117.124.224.32 4844818559255911 Israel 114759.77
|
||||
1454528513 46 Nancy Brown nbrown19@lycos.com Female 98.103.84.222 4041378619584967 Portugal 9/16/1972 170596.79 GIS Technical Architect
|
||||
1454518979 47 Kenneth Larson klarson1a@cnet.com Male 71.35.49.21 Philippines 2/3/1990 178010.01 Staff Scientist
|
||||
1454536052 48 Thomas Lawson tlawson1b@canalblog.com Male 209.50.87.12 50201361710870252 Ukraine 10/5/1987 35118.14 Software Test Engineer II
|
||||
1454488725 49 Debra Gomez dgomez1c@lycos.com Female 26.107.134.220 30508009555281 China 9/10/1979 129186.15 Electrical Engineer
|
||||
1454489047 50 Deborah Price dprice1d@google.nl Female 207.145.225.232 4055636387933119 Russia 1/26/1983 165945.4 Dental Hygienist ␡
|
||||
1454478467 51 Diane Banks dbanks1e@wikispaces.com Female 22.253.228.131 China 39139.44
|
||||
1454468949 52 Marie Woods mwoods1f@bbc.co.uk 41.109.183.128 Russia 2/20/1989 \N Human Resources Manager
|
||||
1454489570 53 Randy Romero rromero1g@tamu.edu Male 134.90.91.230 Indonesia 11/30/1960 230039.26 Professor
|
||||
1454528266 54 Brandon Fox bfox1h@ocn.ne.jp Male 157.130.211.215 6391404048298002 China 2/1/1979 223567.43 Programmer III
|
||||
1454513948 55 Albert Smith asmith1i@jalbum.net Male 167.84.86.133 3530479136988416 Ukraine 263457.42
|
||||
1454467976 56 Jeremy Black jblack1j@sphinn.com Male 181.85.144.139 Poland 194896.66
|
||||
1454463146 57 Marilyn Shaw mshaw1k@bloomberg.com Female 141.42.43.91 30110642387063 China 178473.04
|
||||
1454540383 58 Stephanie Diaz sdiaz1l@who.int Female 127.174.128.199 3571927033182087 Indonesia 3/25/1974 135570.75 Paralegal
|
||||
1454492347 59 Christopher Reynolds creynolds1m@sun.com Male 81.89.26.14 China 5/29/1956 147519.69 Account Executive
|
||||
1454529565 60 Douglas Holmes dholmes1n@weather.com Male 99.22.29.208 Honduras 11/29/2000 45372.51 VP Accounting œ∑´®†¥¨ˆøπ“‘
|
||||
1454485707 61 Howard Rogers hrogers1o@sciencedirect.com Male 222.229.220.65 Ukraine 2/26/1995 143231.21 Account Executive
|
||||
1454489894 62 Melissa Washington mwashington1p@cmu.edu Female 32.151.71.144 374288910553246 Czech Republic 2/24/1966 266547.15 Human Resources Manager
|
||||
1454541195 63 Margaret Flores mflores1q@usnews.com Female 108.42.248.249 France 8/25/1999 110594.3 Data Coordiator
|
||||
1454458233 64 Rose Fernandez rfernandez1r@usgs.gov Female 199.141.221.229 3564435193511524 Brazil 5/5/1972 196329.18 Senior Cost Accountant
|
||||
1454472500 65 Julie Mendoza jmendoza1s@unesco.org Female 137.192.7.121 3586331607810566 Cuba 149157.14
|
||||
1454515883 66 Earl Sanders esanders1t@github.com Male 179.122.203.141 3561742181897127 Vietnam 215545.14 𠜎𠜱𠝹𠱓𠱸𠲖𠳏
|
||||
1454460569 67 Eric Armstrong earmstrong1u@arizona.edu Male 128.202.252.112 4041590574307 Indonesia 5/30/1973 75347.18 Web Designer II
|
||||
1454532395 68 Joyce Perez jperez1v@dmoz.org Female 145.86.183.96 Canada 3/29/1975 115579.36 Director of Sales
|
||||
1454524697 69 Sanchez Female 100.163.22.106 Russia 127045.66
|
||||
1454489862 70 Laura Romero lromero1x@godaddy.com Female 237.131.116.77 3539134691869631 Madagascar 12/20/1957 208213.96 Business Systems Development Analyst
|
||||
1454538359 71 Maria Thomas mthomas1y@lycos.com Female 12.113.23.220 5602229580950679 China 10/29/1990 88961.11 Nurse
|
||||
1454520121 72 Victor Romero vromero1z@reference.com Male 208.79.116.61 6767842086446946518 Brazil 209207.14
|
||||
1454510241 73 Betty Hayes bhayes20@goo.ne.jp Female 153.254.225.4 201881044698306 Jordan 3/9/1970 173372.32 VP Accounting
|
||||
1454465142 74 Roger Jacobs rjacobs21@rediff.com Male 51.122.147.153 36548589951538 Benin 7/18/1977 18545.32 Paralegal 1/2
|
||||
1454470850 75 Ruth Thompson rthompson22@reuters.com Female 220.41.116.217 67067442144878124 Croatia 6/30/1972 167279 Account Executive ヽ༼ຈل͜ຈ༽ノ ヽ༼ຈل͜ຈ༽ノ
|
||||
1454515259 76 Theresa James tjames23@un.org Female 31.135.76.146 China 12/28/1974 188732.88 Financial Advisor
|
||||
1454517695 77 Pamela Collins pcollins24@nih.gov Female 21.45.74.249 490591529416018576 Moldova 7/28/1998 252394.72 Marketing Assistant 🚾 🆒 🆓 🆕 🆖 🆗 🆙 🏧
|
||||
1454523543 78 Adam Ward award25@telegraph.co.uk Male 242.85.131.30 201794641891036 Brazil 276446.24
|
||||
1454458334 79 Robin Price rprice26@jugem.jp Female 235.141.108.176 5610389618618837 Russia 1/7/1977 120293.75 Biostatistician IV
|
||||
1454529469 80 Barbara Ryan bryan27@usda.gov Female 58.0.103.48 30526192141883 Philippines 198959.68
|
||||
1454497076 81 Melissa Gibson mgibson28@census.gov Female 54.212.104.159 3529828486403520 Bhutan 7/29/1990 224163.74 Senior Developer
|
||||
1454467979 82 Carolyn Morris cmorris29@cbslocal.com Female 86.106.24.230 Portugal 2/12/1958 87727.95 Quality Engineer 0.00
|
||||
1454484623 83 Stephen Harris sharris2a@un.org Male 247.19.48.100 Russia 4/9/1983 284559.55 Product Engineer ١٢٣
|
||||
1454476730 84 Linda Campbell lcampbell2b@mapy.cz Female 28.62.77.24 6759510168753943 Peru 2/27/1982 16435.84 VP Quality Control ␡
|
||||
1454463822 85 Brian Daniels bdaniels2c@ovh.net Male 143.36.66.196 Ecuador 7/6/1966 148952.4 Information Systems Manager
|
||||
1454458337 86 West Female 247.72.186.254 3541609903446548 Indonesia 12/11/1984 132544.98 Physical Therapy Assistant
|
||||
1454518267 87 Timothy Moore tmoore2e@printfriendly.com Male 109.229.170.253 Samoa 42697.58
|
||||
1454523368 88 Eric Walker ewalker2f@mozilla.com Male 243.173.35.155 Thailand 5/29/1970 48715.81 Engineer IV
|
||||
1454486082 89 Maria Arnold marnold2g@google.com.br Female 58.58.77.228 3589928770150089 Uruguay 3/14/1956 64067 Geological Engineer
|
||||
1454541738 90 Edward Garza egarza2h@moonfruit.com Male 43.21.138.236 New Zealand 3/27/1965 139025.58 Structural Analysis Engineer
|
||||
1454490484 91 Alice Young ayoung2i@typepad.com Female 120.255.189.145 630468343049978318 Serbia 4/18/1981 17663.49 Automation Specialist I
|
||||
1454512586 92 Kenneth Powell kpowell2j@unicef.org Male 238.251.71.34 3586683330377036 Philippines 2/10/1955 68010.82 Social Worker
|
||||
1454472784 93 Kelly Bell kbell2k@hud.gov Female 176.210.241.20 Russia 11/17/1984 57640.41 Web Developer I
|
||||
1454490007 94 David Garcia dgarcia2l@tmall.com Male 100.18.61.166 Paraguay 201297.71
|
||||
1454504627 95 Maria Harvey mharvey2m@nydailynews.com Female 192.209.117.213 67593619471737741 Mongolia 283649.67
|
||||
1454505519 96 Chris Hall chall2n@imageshack.us Male 241.96.162.44 5594268668744901 Russia 1/3/1964 67656.08 Web Designer II
|
||||
1454481847 97 Roger Simpson rsimpson2o@nymag.com Male 80.110.89.28 493618903455317947 Indonesia 76354.79
|
||||
1454515032 98 Richard Nelson rnelson2p@simplemachines.org Male 43.54.4.82 Brazil 237205.58 NIL
|
||||
1454461907 99 Ruth Howell rhowell2q@cornell.edu Female 190.170.191.14 China 5/2/1969 286113.38 Senior Quality Engineer
|
||||
1454524115 100 Judith Garza jgarza2r@usnews.com Female 204.216.154.40 Ecuador 6/22/1962 256786.42 Teacher
|
||||
=== Try load data from userdata4.parquet
|
||||
1454599685 1 Howard Morgan hmorgan0@typepad.com 158.178.195.62 Colombia 12/2/1992 \N Data Coordiator
|
||||
1454581720 2 Jessica Schmidt jschmidt1@google.com Female 168.118.247.35 3565285464047941 Luxembourg 4/14/1995 222396.46 Research Nurse nil
|
||||
1454608896 3 Beverly Flores bflores2@wikipedia.org Female 51.97.88.173 Sweden 2/15/1965 141112.8 Actuary
|
||||
1454575874 4 Marilyn Sanchez msanchez3@intel.com Female 186.206.142.162 China 8/6/1969 87914.29 Structural Engineer
|
||||
1454567588 5 Janice Mitchell jmitchell4@sina.com.cn Female 205.187.116.241 5610719759939376962 Poland 7/4/1995 269297.4 Systems Administrator I
|
||||
1454545227 6 William Williamson wwilliamson5@trellian.com Male 44.86.73.201 201849487683564 Indonesia 12/6/1993 95352.25 Librarian 1E+02
|
||||
1454602212 7 Jack James jjames6@sogou.com Male 59.184.76.208 3552911855395632 Indonesia 11/25/1968 82549.73 Compensation Analyst test
|
||||
1454556325 8 Jesse Arnold jarnold7@soup.io Male 7.25.90.13 5100177285965756 Brazil 10/19/1987 257968.86 Executive Secretary
|
||||
1454622627 9 Lori Woods lwoods8@fastcompany.com Female 147.157.215.9 4844532485570190 Indonesia 12/26/1975 186145.91 Health Coach I
|
||||
1454601455 10 Juan Evans jevans9@zimbio.com Male 150.132.218.181 3578802610769023 Philippines 5/29/1988 129369.52 Social Worker
|
||||
1454579490 11 Roy Matthews rmatthewsa@ucsd.edu Male 203.239.85.224 5100135134598509 Russia 192057.84
|
||||
1454586145 12 Kenneth King kkingb@zimbio.com 9.103.96.206 675913564329481832 Greece \N
|
||||
1454568600 13 Raymond Green rgreenc@fc2.com Male 163.9.101.43 United States 1/28/1984 225094.01 Budget/Accounting Analyst III
|
||||
1454603300 14 Lillian Stephens lstephensd@psu.edu Female 31.50.183.23 630455284969060148 Finland 6/1/1973 19354.85 Information Systems Manager
|
||||
1454560697 15 Mary Gonzales mgonzalese@wired.com Female 91.42.17.109 3560985473023370 France 5/7/1966 23746.36 Compensation Analyst
|
||||
1454561895 16 Roger Mason rmasonf@newyorker.com Male 169.33.172.204 3545036194973129 Norway 165855.47
|
||||
1454604198 17 Diane Cole dcoleg@unesco.org Female 157.11.85.209 Philippines 6/9/1994 105028.67 Assistant Manager
|
||||
1454601270 18 Annie Hunt ahunth@ocn.ne.jp Female 169.47.232.187 5100177440436305 Poland 3/30/1992 266071.6 Legal Assistant
|
||||
1454600872 19 Jacqueline Bradley jbradleyi@epa.gov Female 83.241.214.77 5100131814165289 Indonesia 12/1/1971 55440.88 Dental Hygienist
|
||||
1454600248 20 Kathy Russell krussellj@joomla.org Female 158.32.89.44 3585627581021729 Indonesia 11/20/1999 29602.23 Sales Representative
|
||||
1454551378 21 Beverly Barnes bbarnesk@europa.eu Female 189.157.45.179 3548552521258155 Bulgaria 4/21/1956 37295.89 Human Resources Assistant II
|
||||
1454604764 22 Roy Morris rmorrisl@scribd.com 201.51.139.86 China \N
|
||||
1454569146 23 Alice Ramos aramosm@utexas.edu Female 185.168.142.9 374622349140748 Philippines 4/20/1966 138021.54 Paralegal
|
||||
1454597325 24 Todd Kelly tkellyn@fotki.com Male 46.19.203.86 4041599550654 Portugal 3/14/1998 84343.96 Executive Secretary () { _; } >_[$($())] { touch /tmp/blns.shellshock2.fail; }
|
||||
1454551797 25 Lawrence Ramos lramoso@imageshack.us Male 5.96.81.47 5010121401502407 Palestinian Territory 1/26/1994 265545.92 Operator
|
||||
1454605654 26 Jennifer Rogers jrogersp@so-net.ne.jp Female 31.48.54.193 5610097864736794573 Yemen 6/5/1992 138365.1 Computer Systems Analyst II
|
||||
1454603775 27 Kimberly Morgan kmorganq@seesaa.net Female 154.61.255.47 China 14486.75 0/0
|
||||
1454606635 28 Jessica Marshall jmarshallr@mtv.com Female 164.101.35.148 3531025977662047 Brazil 7/2/1987 216211.96 VP Accounting
|
||||
1454597817 29 Katherine Gordon kgordons@phoca.cz Female 248.30.182.15 5602230546469168 Italy 10/11/1956 48478.51 Librarian
|
||||
1454557995 30 Jennifer Phillips jphillipst@pcworld.com Female 61.30.215.16 5100179891124018 Sweden 9/3/1967 254808.27 Software Consultant
|
||||
1454613512 31 Gerald Nguyen gnguyenu@seesaa.net Male 9.13.167.17 67717376159922001 China 9/3/1972 285571.49 Tax Accountant
|
||||
1454625134 32 Rose Ellis rellisv@walmart.com Female 250.88.7.15 3580333318847248 China 4/23/1987 47695.25 Systems Administrator II 和製漢語
|
||||
1454622672 33 Margaret Grant mgrantw@bbb.org Female 227.165.116.192 3565645038486711 Slovenia 12/10/1992 106452.61 Account Coordinator
|
||||
1454568796 34 Jessica Wells jwellsx@blogtalkradio.com Female 185.189.187.186 Azerbaijan 9/13/1996 173164.24 Project Manager
|
||||
1454582324 35 Henry Jenkins hjenkinsy@mit.edu Male 10.83.90.235 5602221853972654 China 11/12/1975 25740.85 Recruiter 田中さんにあげて下さい
|
||||
1454545876 36 Earl Mccoy emccoyz@bigcartel.com Male 161.179.122.154 5038877150819047588 Japan 10/12/1976 114766.43 Software Test Engineer IV 0.00
|
||||
1454618571 37 Paul Knight pknight10@google.cn Male 182.38.37.173 5020715558032859593 Ukraine 10/25/1971 199366 Social Worker
|
||||
1454576590 38 Martha Clark mclark11@usda.gov 189.166.203.239 South Korea \N
|
||||
1454601033 39 Clarence Bryant cbryant12@bigcartel.com Male 120.218.175.241 Poland 9/1/1968 257075.65 Professor 田中さんにあげて下さい
|
||||
1454548319 40 Joan Price jprice13@mtv.com 233.4.158.135 3584182571037112 Portugal \N
|
||||
1454573152 41 Anthony Ford aford14@chicagotribune.com Male 100.240.61.163 Iran 6/26/1992 152800.71 Senior Cost Accountant
|
||||
1454595667 42 Roger Henderson rhenderson15@sitemeter.com Male 206.185.213.252 3560757094744860 Brazil 6/26/1970 40949.78 Nurse
|
||||
1454591751 43 Kenneth Butler kbutler16@youtu.be Male 2.12.57.207 3586795027670612 Thailand 3/26/1987 165121.43 Research Assistant IV
|
||||
1454566774 44 Kenneth Wright kwright17@google.de Male 241.213.136.95 5602246924892961 Belarus 10/15/1995 227583.86 Speech Pathologist
|
||||
1454617513 45 Aaron Smith asmith18@flickr.com Male 185.244.9.145 China 11/25/1972 286108.94 Paralegal
|
||||
1454574169 46 Amy Matthews amatthews19@t.co Female 206.172.83.152 5002357749310919 China 39365.73
|
||||
1454586102 47 Janet Cooper jcooper1a@dailymotion.com Female 9.148.129.197 Comoros 8/2/1968 168391.72 Senior Cost Accountant
|
||||
1454601994 48 Russell Stewart rstewart1b@edublogs.org Male 113.23.229.63 675993663890158630 Thailand 4/17/1963 57609.96 Senior Editor
|
||||
1454582839 49 Howard Elliott helliott1c@illinois.edu Male 225.208.151.89 3577055641640512 Mongolia 176999.03
|
||||
1454573932 50 Keith Lane klane1d@eventbrite.com Male 250.24.9.55 Russia 5/27/1983 80452.19 Budget/Accounting Analyst II
|
||||
1454583292 51 Jimmy Richardson jrichardson1e@vimeo.com Male 152.87.188.99 China 6/30/1960 194774.28 Assistant Manager ❤️ 💔 💌 💕 💞 💓 💗 💖 💘 💝 💟 💜 💛 💚 💙
|
||||
1454623280 52 Justin Bryant jbryant1f@github.com Male 245.48.63.169 3562259518717901 Guatemala 10/28/1960 144419.21 Database Administrator III
|
||||
1454582337 53 Ruby Allen rallen1g@cyberchimps.com Female 238.148.148.156 3541217939068433 Japan 248388.64
|
||||
1454578101 54 Ward Male 120.88.247.59 Russia 125075.78
|
||||
1454546163 55 Nancy Stephens nstephens1i@godaddy.com Female 211.0.225.116 Mongolia 20805.69
|
||||
1454580277 56 Dorothy Kennedy dkennedy1j@mlb.com Female 177.229.94.96 Indonesia 3/26/1984 118098.45 Legal Assistant
|
||||
1454597567 57 Katherine Ferguson kferguson1k@google.cn Female 185.67.150.20 5038883804496681778 Russia 1/28/1982 255040.89 Chemical Engineer
|
||||
1454609494 58 Norma Daniels ndaniels1l@adobe.com Female 72.161.56.76 5602256058813840 Lithuania 5/30/1986 228396.52 Junior Executive
|
||||
1454549169 59 John Rogers jrogers1m@miitbeian.gov.cn Male 91.131.170.178 3578552255653202 Croatia 9/25/1971 164207.53 Administrative Assistant III
|
||||
1454627177 60 Lisa Nguyen lnguyen1n@phpbb.com Female 99.51.36.31 3587343436670904 Ghana 6/10/1970 213963.71 Research Nurse
|
||||
1454564279 61 Roy Carter rcarter1o@cmu.edu Male 154.176.171.103 3581163353975466 Germany 7/21/1980 216294.79 Marketing Manager
|
||||
1454546835 62 Donna Gonzalez dgonzalez1p@instagram.com Female 81.57.136.186 China 3/3/1975 181562.45 Junior Executive
|
||||
1454610240 63 Medina Female 84.135.250.216 3579667388606106 Indonesia 7/18/1958 80267.81 Accounting Assistant III
|
||||
1454613635 64 Samuel Bishop sbishop1r@npr.org Male 87.38.89.122 3534693555244475 Indonesia 97009.57
|
||||
1454551032 65 Jerry Bradley jbradley1s@umn.edu Male 184.79.105.210 5602258009829107 China 3/13/1984 50863.85 Junior Executive
|
||||
1454555641 66 Ralph Castillo rcastillo1t@nba.com Male 96.246.167.130 6373313274491359 United States 5/14/1986 13099.91 Health Coach III
|
||||
1454615262 67 Margaret Vasquez mvasquez1u@tuttocitta.it Female 206.79.16.146 Poland 2/19/1973 281677.49 Quality Engineer
|
||||
1454564143 68 Shawn Payne spayne1v@privacy.gov.au Male 233.32.138.222 6380689013620353 China 5/29/1996 152175.99 Help Desk Operator
|
||||
1454560234 69 Bonnie Hart bhart1w@networkadvertising.org Female 92.158.145.51 5100141023990187 Philippines 8/10/1976 270525.27 Clinical Specialist
|
||||
1454557523 70 Ruby Phillips rphillips1x@google.com.hk Female 180.71.236.34 Russia 12/29/1980 175991.04 Analog Circuit Design manager
|
||||
1454615738 71 Michael Watkins mwatkins1y@infoseek.co.jp Male 20.48.165.57 6304600968704640 United States 277599.55
|
||||
1454549243 72 Walter Hill whill1z@fda.gov Male 169.189.26.193 Philippines 4/25/1989 170789.26 Executive Secretary
|
||||
1454590835 73 Deborah Garcia dgarcia20@ehow.com Female 176.149.163.227 3578754434491831 Brazil 213787.81 !@#$%^&*()
|
||||
1454592567 74 Sandra Lee slee21@hatena.ne.jp Female 196.212.29.124 China 12/25/1976 190399.56 Assistant Media Planner ../../../../../../../../../../../etc/passwd%00
|
||||
1454570808 75 Steve Shaw sshaw22@photobucket.com Male 56.32.41.109 3561652394394350 Macedonia 3/2/1961 180130.01 Recruiting Manager
|
||||
1454627208 76 Jerry Hansen jhansen23@newyorker.com Male 180.99.147.201 36652106508977 Ukraine 4/27/1992 201900.61 Chief Design Engineer
|
||||
1454595596 77 Joshua Harris jharris24@china.com.cn Male 93.173.2.87 3566428334927244 Greece 8/27/1987 189392.3 Account Representative III
|
||||
1454615457 78 Clarence Simmons csimmons25@dailymotion.com Male 30.117.30.162 3571762129017388 Philippines 180434.25
|
||||
1454604481 79 Denise Bishop dbishop26@wsj.com Female 251.230.214.155 3556286320706184 Philippines 10/18/1999 194426.62 Geologist II
|
||||
1454614660 80 Jason Warren jwarren27@shop-pro.jp Male 197.52.56.75 4913424719275497 China 8/26/1998 92571.41 Accounting Assistant II
|
||||
1454592347 81 Jesse Reynolds jreynolds28@amazon.com 46.11.66.226 Portugal 10/6/1977 \N Administrative Officer <img src=x onerror=alert(\'hi\') />
|
||||
1454579746 82 Ruby Lynch rlynch29@xing.com Female 50.190.120.2 340177638737200 Portugal 5/7/1981 159634.3 Sales Associate
|
||||
1454578991 83 Phillip Olson polson2a@marriott.com Male 38.205.137.200 4905640692662084 Indonesia 1/8/1987 161622.19 Assistant Media Planner
|
||||
1454574785 84 Sean Watkins swatkins2b@ft.com Male 22.52.43.242 6759770945991352 China 2/7/1964 103943.54 Senior Financial Analyst
|
||||
1454603364 85 Teresa Parker tparker2c@shinystat.com Female 36.134.254.22 4844522554899455 China 11/24/1987 137739.95 Chief Design Engineer
|
||||
1454629483 86 Anthony Harris aharris2d@uiuc.edu Male 142.3.139.220 China 2/26/1975 194926.38 Senior Quality Engineer
|
||||
1454617821 87 Donna Ray dray2e@wikimedia.org Female 122.113.90.100 3548062974262878 Peru 7/24/1964 121072.45 Clinical Specialist åß∂ƒ©˙∆˚¬…æ
|
||||
1454567199 88 Craig Lewis clewis2f@purevolume.com Male 106.156.113.218 3535698276698452 Slovenia 113013.98
|
||||
1454606687 89 Adam Turner aturner2g@delicious.com Male 94.92.15.85 3530109929436477 Sweden 3/18/1976 233715.21 Nurse Practicioner
|
||||
1454565501 90 Terry Parker tparker2h@hc360.com Male 189.36.77.133 China 4/2/1987 232623.76 GIS Technical Architect
|
||||
1454604198 91 Juan Shaw jshaw2i@ehow.com Male 222.127.83.190 493610712595084582 Democratic Republic of the Congo 220779.8
|
||||
1454592729 92 Nicole Russell nrussell2j@angelfire.com Female 247.123.224.36 4120730296866808 Germany 90748.17
|
||||
1454563310 93 Robin Ray rray2k@t.co Female 217.150.228.185 Sweden 9/28/1968 175995.93 Human Resources Assistant III """"
|
||||
1454546406 94 Debra Sims dsims2l@meetup.com Female 150.198.93.159 5602215295621929 Brazil 12/21/1984 276704.96 Office Assistant IV
|
||||
1454550946 95 Teresa Harrison tharrison2m@t.co Female 111.107.40.16 5007666196554596 Philippines 5/12/1959 129967.9 GIS Technical Architect
|
||||
1454603302 96 Tammy Ward tward2n@51.la Female 148.119.68.255 3568303818489466 France 8/20/1984 63550.31 General Manager
|
||||
1454605950 97 Louis Harrison lharrison2o@usgs.gov Male 134.95.151.68 5100179516595931 Ukraine 9/27/1986 169379.73 Payment Adjustment Coordinator
|
||||
1454579744 98 Charles Simpson csimpson2p@mashable.com Male 241.0.124.209 3562073915241617 Sweden 9/20/1956 116909.68 Biostatistician IV
|
||||
1454584629 99 Maria Richards mrichards2q@rediff.com Female 108.13.82.54 Azerbaijan 1/23/1978 34000.68 Clinical Specialist 社會科學院語學研究所
|
||||
1454622328 100 Diana Hall dhall2r@oaic.gov.au Female 6.215.107.104 3528227609255704 Russia 8/29/1996 221168.13 Assistant Professor
|
||||
=== Try load data from userdata5.parquet
|
||||
1454582047 1 Kelly Ortiz kortiz0@omniture.com Female 252.115.158.159 3537905681760845 Russia 4/23/1980 277302.99 Nurse
|
||||
1454626441 2 Sharon Carroll scarroll1@disqus.com Female 29.217.252.62 56022458507191696 Indonesia 8/28/1992 209258.05 Recruiter åß∂ƒ©˙∆˚¬…æ
|
||||
1454608790 3 Ruth Ross rross2@cbc.ca Female 220.224.80.32 3589642396435648 Benin 6/13/1994 18270.7 Design Engineer
|
||||
1454601797 4 Kelly Meyer kmeyer3@cornell.edu Female 255.65.123.124 Philippines 1/6/1967 17485.27 Cost Accountant
|
||||
1454584344 5 Irene Jordan ijordan4@pagesperso-orange.fr Female 162.57.23.136 3576848317807089 United States 1/4/1997 163979.38 Programmer Analyst III
|
||||
1454547199 6 Irene Wells iwells5@fema.gov Female 85.5.67.113 Iran 74337.42
|
||||
1454604109 7 Jessica Grant jgrant6@gov.uk Female 127.235.63.12 3536345996536989 Ecuador 1/27/1969 128665.86 Payment Adjustment Coordinator
|
||||
1454549472 8 Norma Wright nwright7@prweb.com Female 81.219.156.187 63047796765720509 Indonesia 6/27/1997 68907.46 Office Assistant III
|
||||
1454611735 9 Brandon Snyder bsnyder8@artisteer.com Male 102.118.191.191 490339322609872711 Malta 10/6/1981 71646.15 Physical Therapy Assistant
|
||||
1454610256 10 Stephanie Reed sreed9@who.int Female 175.52.228.75 502081312903167845 Afghanistan 8/27/1957 137924.13 Recruiter
test
|
||||
1454565105 11 Jane Armstrong jarmstronga@state.gov 202.44.98.126 374283443294665 China 10/30/1991 \N Associate Professor
|
||||
1454607247 12 Donna Coleman dcolemanb@upenn.edu Female 178.9.167.99 Vietnam 11/21/1957 93283.06 Librarian
|
||||
1454567839 13 Samuel Butler sbutlerc@hp.com Male 129.114.220.80 3587725229492688 Colombia 9/12/1984 208303.6 Compensation Analyst
|
||||
1454567413 14 Jerry Medina jmedinad@youtu.be Male 87.0.152.222 3579766249568578 Japan 8/30/1988 53502.26 Registered Nurse
|
||||
1454603317 15 Samuel Lane slanee@i2i.jp Male 225.20.25.160 Canada 9/6/1983 142643.38 GIS Technical Architect ❤️ 💔 💌 💕 💞 💓 💗 💖 💘 💝 💟 💜 💛 💚 💙
|
||||
1454630090 16 Kathy Rice kricef@independent.co.uk Female 4.200.99.226 6709951086431189768 Philippines 52614.1
|
||||
1454575979 17 Adam Woods awoodsg@mapy.cz Male 229.247.245.218 3580417672766100 Indonesia 12/8/1987 284906.49 Payment Adjustment Coordinator
|
||||
1454555573 18 Theresa Ellis tellish@nydailynews.com Female 39.249.101.160 Belarus 6/18/1966 35216.95 Sales Representative
|
||||
1454555343 19 Christopher Brooks cbrooksi@intel.com Male 252.52.58.13 China 119492.57
|
||||
1454544139 20 Debra White dwhitej@umn.edu Female 142.140.184.111 Indonesia 47859.54
|
||||
1454559526 21 Alice Ward awardk@cafepress.com Female 14.157.183.41 3554057857533990 Vietnam 5/7/1977 117790.3 Technical Writer
|
||||
1454597106 22 Tina Wood twoodl@businesswire.com Female 201.242.103.145 3568980472135848 Sweden 3/28/1969 47283.17 Staff Scientist
|
||||
1454591306 23 Carolyn Mendoza cmendozam@army.mil 214.205.231.22 Greece \N ␡
|
||||
1454611603 24 Craig Ford cfordn@vistaprint.com Male 236.178.217.229 633110713949459104 Indonesia 12/22/1996 274187.59 Dental Hygienist
|
||||
1454618551 25 Christine Morrison cmorrisono@ask.com Female 219.71.212.187 3538407669945679 Tanzania 3/12/1991 84756.66 Executive Secretary 社會科學院語學研究所
|
||||
1454580024 26 Janice Dean jdeanp@statcounter.com Female 49.234.145.208 3537160378882698 Ukraine 8/21/1991 217443.08 Administrative Assistant III
|
||||
1454558127 27 Joan Burton jburtonq@oaic.gov.au Female 221.227.41.244 201770241278691 China 4/6/1993 256763.22 Staff Accountant I \N
|
||||
1454619460 28 Brandon Stone bstoner@discovery.com Male 1.106.6.30 30535344906416 Indonesia 7/13/1964 166396.41 Health Coach II
|
||||
1454571966 29 Sarah Hall shalls@loc.gov Female 235.168.89.65 3528746985103311 Czech Republic 11/13/1959 123411.44 Assistant Manager
|
||||
1454569447 30 Kelly Crawford kcrawfordt@typepad.com Female 152.220.24.54 3578225435679583 Poland 10/21/1970 115305.8 Chief Design Engineer
|
||||
1454609438 31 Maria Banks mbanksu@google.co.uk Female 107.120.193.133 5602224764294077 Italy 10/29/1981 213273.21 Financial Analyst
|
||||
1454546937 32 Roy Simmons rsimmonsv@telegraph.co.uk Male 21.20.158.183 5602244835346375 Mongolia 6/27/1994 13987.6 Senior Editor "<>?:""{}|_+"
|
||||
1454611880 33 Judith Williamson jwilliamsonw@hubpages.com Female 128.75.193.80 3540423032294659 Indonesia 10/19/1975 35326.68 Senior Sales Associate
|
||||
1454567714 34 Joe Arnold jarnoldx@soundcloud.com Male 170.118.207.254 4017955870878 Morocco 1/11/1991 261893.92 Mechanical Systems Engineer
|
||||
1454605829 35 Richard Griffin rgriffiny@barnesandnoble.com Male 180.74.211.58 3539729371124817 Philippines 8/23/1964 43742.89 Nurse
|
||||
1454607440 36 Billy Freeman bfreemanz@fda.gov Male 223.238.104.92 Sweden 5/19/1961 185185.85 Office Assistant I
|
||||
1454601803 37 Shawn Welch swelch10@oaic.gov.au Male 239.144.169.67 Brazil 45785.65 test
|
||||
1454626608 38 Kenneth Price kprice11@tamu.edu Male 121.107.99.253 372301962802254 China 3/1/1958 110448 Senior Sales Associate
|
||||
1454612578 39 Patricia Lawson plawson12@dailymotion.com Female 181.201.209.42 6761282787969476 Czech Republic 4/6/1956 126454.68 Staff Accountant I
|
||||
1454544201 40 Christine Alexander calexander13@aboutads.info Female 163.32.3.92 50183677518131890 China 1/14/1981 213713.99 Sales Associate
|
||||
1454599667 41 Mark Wagner mwagner14@imageshack.us Male 78.141.201.64 5007660710388524 China 3/10/1987 207149.01 Staff Scientist
|
||||
1454624139 42 Richard Armstrong rarmstrong15@baidu.com Male 229.173.184.111 3546008978147005 Indonesia 9/6/1961 52279.16 Software Engineer II
|
||||
1454618327 43 Phillip Ellis pellis16@berkeley.edu Male 183.182.90.8 3561054399919267 Brazil 1/31/1994 59681.04 Analog Circuit Design manager \N
|
||||
1454614376 44 Beverly Perry bperry17@nasa.gov Female 47.117.191.34 Vietnam 9/15/1983 41351.4 Database Administrator IV 1E+02
|
||||
1454559810 45 Carolyn Parker cparker18@soup.io Female 124.227.162.209 3555739550936724 Belarus 1/29/1988 162142.52 Chemical Engineer
|
||||
1454605899 46 Martin Knight mknight19@umn.edu Male 173.169.240.26 5387225346178705 China 9/4/1994 200217.98 Assistant Professor
|
||||
1454580952 47 Michael Stephens mstephens1a@altervista.org Male 181.48.175.67 Honduras 9/10/1958 248987 Environmental Specialist
|
||||
1454545483 48 Frances Willis fwillis1b@linkedin.com 102.186.57.75 4175001067968122 Philippines 8/3/1998 \N VP Marketing
|
||||
1454618611 49 Gary Fox gfox1c@paginegialle.it Male 80.221.129.42 Belgium 261175.89
|
||||
1454605416 50 Cynthia Bailey cbailey1d@microsoft.com Female 210.74.99.47 Indonesia 4/23/1989 38171.71 Sales Associate
|
||||
1454547938 51 Terry Mitchell tmitchell1e@soundcloud.com Male 64.34.240.165 Peru 101626.65
|
||||
1454607980 52 Edward Webb ewebb1f@123-reg.co.uk Male 208.114.99.74 6386981481832436 Jordan 235457.76
|
||||
1454544152 53 Ralph Simmons rsimmons1g@google.cn Male 180.159.250.232 3554040768947822 Pakistan 111413.03
|
||||
1454606074 54 Sara Kelly skelly1h@wix.com Female 97.243.219.196 3560161969850482 Portugal 12/11/1963 185788.86 Chief Design Engineer
|
||||
1454577433 55 Donna Dean ddean1i@ftc.gov Female 91.232.196.181 Indonesia 285481.87
|
||||
1454545198 56 Jane Murray jmurray1j@apache.org Female 174.82.82.71 5100149053428994 China 7/15/1973 57832.83 Software Consultant
|
||||
1454582927 57 Walter Cook wcook1k@webnode.com Male 4.223.17.187 5048374925679138 China 7/19/1979 164010.7 Accounting Assistant IV
|
||||
1454553504 58 Bonnie Hanson bhanson1l@squidoo.com Female 209.131.133.80 3546400025538536 China 8/6/1989 207065.08 Recruiter
|
||||
1454583403 59 Patrick Kelly pkelly1m@usgs.gov Male 92.132.67.51 30129138653846 Poland 10/22/1984 281404.55 Librarian
|
||||
1454551706 60 George Ross gross1n@sciencedaily.com Male 77.33.183.49 201938854334636 Portugal 2/17/1986 96243.17 Teacher
|
||||
1454572199 61 Joan Harvey jharvey1o@biglobe.ne.jp Female 244.175.30.138 5479197462183554 Indonesia 12/30/1974 269498 Nurse Practicioner åß∂ƒ©˙∆˚¬…æ
|
||||
1454555502 62 Louise Stone lstone1p@1und1.de Female 230.79.20.66 Indonesia 1/14/1980 44528.64 Senior Editor
|
||||
1454597662 63 Lawrence Pierce lpierce1q@ihg.com Male 35.230.80.125 6763027632739915 Indonesia 7/22/1982 269467.08 Human Resources Assistant IV
|
||||
1454577961 64 Dorothy Gray dgray1r@vimeo.com Female 206.99.76.117 3582462082297450 China 10/8/1975 58802.03 Staff Scientist -1.00
|
||||
1454578138 65 Shawn Larson slarson1s@sohu.com Male 233.109.124.208 3557232712378033 Pakistan 6/11/1987 24566.92 Programmer I
|
||||
1454620878 66 Ashley Carter acarter1t@weather.com Female 120.243.16.33 5641823823569006485 Philippines 2/4/1999 181594.54 Technical Writer
|
||||
1454608592 67 Bruce Gonzalez bgonzalez1u@behance.net Male 213.165.12.93 5602219496203313 Sweden 6/27/1975 152915.03 Social Worker
|
||||
1454570547 68 Gary Porter gporter1v@nhs.uk Male 113.26.17.148 3551504699131924 China 10/15/1988 239398.41 VP Sales åß∂ƒ©˙∆˚¬…æ
|
||||
1454623375 69 Kimberly Bell kbell1w@techcrunch.com Female 232.188.203.114 06048433236353334 Tanzania 239482.42 "
|
||||
1454580645 70 James Torres jtorres1x@rakuten.co.jp Male 42.70.136.181 Brazil 3/19/1968 66432.01 Information Systems Manager
|
||||
1454565683 71 Cheryl Williams cwilliams1y@clickbank.net 24.11.168.130 Latvia 9/28/1958 \N Quality Control Specialist
|
||||
1454572298 72 Diane Hicks dhicks1z@noaa.gov Female 220.185.241.90 36196827669213 Honduras 11/20/1977 104365.11 Systems Administrator I
|
||||
1454630150 73 Judith Brown jbrown20@acquirethisname.com Female 173.62.110.176 Czech Republic 12/26/1994 218616.17 Safety Technician IV
|
||||
1454550898 74 Jesse Dixon jdixon21@bloglines.com Male 156.125.120.208 Syria 277530.58 (╯°□°)╯︵ ┻━┻)
|
||||
1454560223 75 Timothy Garza tgarza22@tmall.com Male 56.172.71.231 Poland 4/1/1978 21103.66 Desktop Support Technician ␡
|
||||
1454549446 76 Gloria Washington gwashington23@hud.gov Female 249.63.88.116 3528613230855766 Portugal 10/17/1960 175586.21 Information Systems Manager
|
||||
1454555260 77 Patricia Bell pbell24@youtu.be Female 20.46.164.228 3528267541114924 Honduras 1/31/1999 47750.6 Payment Adjustment Coordinator
|
||||
1454579807 78 Theresa Clark tclark25@wp.com Female 178.250.150.112 6396247540156151 Indonesia 10/10/1989 78319.93 Executive Secretary
|
||||
1454629649 79 Matthew Matthews mmatthews26@typepad.com Male 33.186.230.54 5213341713953768 Azerbaijan 10/4/1990 12883.34 Help Desk Technician
|
||||
1454568333 80 Betty White bwhite27@github.com Female 128.110.102.181 3572999005932624 Morocco 12/6/1980 30998.69 Operator
|
||||
1454559489 81 Christina Nguyen cnguyen28@washingtonpost.com Female 63.57.110.32 36954036240279 Philippines 7/23/1984 259707.25 Project Manager
|
||||
1454575575 82 Norma Stevens nstevens29@newyorker.com Female 148.35.34.31 Brazil 7/24/1984 233848.07 Professor
|
||||
1454547659 83 Tammy Walker twalker2a@craigslist.org Female 115.94.89.2 4508955158259501 China 1/1/1972 241046.96 Community Outreach Specialist
|
||||
1454559813 84 Mark Jackson mjackson2b@utexas.edu Male 136.242.153.66 36666130651082 Philippines 12/9/1957 245352.11 Account Executive 部落格
|
||||
1454547442 85 Scott Washington swashington2c@bloomberg.com Male 79.185.72.100 6395647151650882 Brazil 2/17/1957 240505.52 Professor
|
||||
1454577775 86 Margaret Franklin mfranklin2d@mapy.cz Female 139.209.240.12 501835281527257384 Brazil 72758.49
|
||||
1454582451 87 Carolyn Wilson cwilson2e@hp.com Female 5.172.62.195 3581164938009805 France 1/19/1997 162909.64 Librarian
|
||||
1454608782 88 Emily Cole ecole2f@epa.gov 97.83.153.33 Burkina Faso 5/3/1996 \N Accounting Assistant IV 1.00
|
||||
1454544809 89 Carolyn Gutierrez cgutierrez2g@smh.com.au Female 109.77.234.103 Madagascar 2/13/1999 139612.73 Nurse
|
||||
1454591667 90 Jose Wallace jwallace2h@about.com Male 250.231.81.57 Philippines 12/17/1983 213500.16 Design Engineer
|
||||
1454561119 91 Charles Reed creed2i@independent.co.uk Male 28.212.235.149 4017954848825528 China 88039.86
|
||||
1454615732 92 Brian Parker bparker2j@hugedomains.com Male 143.67.111.179 Portugal 1/18/1996 202446.54 Executive Secretary
|
||||
1454613613 93 Donald Fox dfox2k@webs.com Male 251.61.52.170 3553498748210516 Indonesia 12/19/1975 134745.75 Human Resources Manager
|
||||
1454603200 94 Jack West jwest2l@biblegateway.com Male 115.144.142.60 Poland 10/30/1956 245162.49 Office Assistant I 1.00
|
||||
1454574412 95 Doris Gomez dgomez2m@tinypic.com Female 156.173.76.213 4041593860679 Colombia 8/28/1977 164689.56 Speech Pathologist
|
||||
1454544624 96 Brandon Owens bowens2n@si.edu Male 5.39.151.46 4591258400528650 France 3/13/1998 74028.68 Software Engineer III
|
||||
1454596449 97 Evelyn Wagner ewagner2o@sbwire.com Female 84.231.120.250 3571837377153521 China 1/5/1965 78692.34 Operator
|
||||
1454545547 98 Timothy Boyd tboyd2p@imdb.com Male 211.20.45.168 5602253132446507 Peru 7/8/1976 127883.56 Data Coordiator
|
||||
1454549050 99 Edward Gilbert egilbert2q@ocn.ne.jp Male 237.183.200.242 3586807595028188 Bangladesh 8/30/1956 214872.75 Senior Financial Analyst
|
||||
1454583513 100 Howard Patterson hpatterson2r@toplist.cz Male 200.77.150.4 3558592437934298 China 7/9/1991 23607 Administrative Assistant IV
|
||||
=== Try load data from v0.7.1.all-named-index.parquet
|
||||
0.22 65.1 61 337 3.87 3.78 2.49 Fair E VS2
|
||||
0.23 56.9 65 327 4.05 4.07 2.31 Good E VS1
|
||||
0.31 63.3 58 335 4.34 4.35 2.75 Good J SI2
|
||||
0.23 61.5 55 326 3.95 3.98 2.43 Ideal E SI2
|
||||
0.21 59.8 61 326 3.89 3.84 2.31 Premium E SI1
|
||||
0.29 62.4 58 334 4.2 4.23 2.63 Premium I VS2
|
||||
0.26 61.9 55 337 4.07 4.11 2.53 Very Good H SI1
|
||||
0.23 59.4 61 338 4 4.05 2.39 Very Good H VS1
|
||||
0.24 62.3 57 336 3.95 3.98 2.47 Very Good I VVS1
|
||||
0.24 62.8 57 336 3.94 3.96 2.48 Very Good J VVS2
|
||||
=== Try load data from v0.7.1.column-metadata-handling.parquet
|
||||
1 0.1 2017-01-01 02:00:00 a 2017-01-01 02:00:00
|
||||
2 0.2 2017-01-02 02:00:00 b 2017-01-02 02:00:00
|
||||
3 0.3 2017-01-03 02:00:00 c 2017-01-03 02:00:00
|
||||
=== Try load data from v0.7.1.parquet
|
||||
0.23 Ideal E SI2 61.5 55 326 3.95 3.98 2.43 0
|
||||
0.21 Premium E SI1 59.8 61 326 3.89 3.84 2.31 1
|
||||
0.23 Good E VS1 56.9 65 327 4.05 4.07 2.31 2
|
||||
0.29 Premium I VS2 62.4 58 334 4.2 4.23 2.63 3
|
||||
0.31 Good J SI2 63.3 58 335 4.34 4.35 2.75 4
|
||||
0.24 Very Good J VVS2 62.8 57 336 3.94 3.96 2.48 5
|
||||
0.24 Very Good I VVS1 62.3 57 336 3.95 3.98 2.47 6
|
||||
0.26 Very Good H SI1 61.9 55 337 4.07 4.11 2.53 7
|
||||
0.22 Fair E VS2 65.1 61 337 3.87 3.78 2.49 8
|
||||
0.23 Very Good H VS1 59.4 61 338 4 4.05 2.39 9
|
||||
=== Try load data from v0.7.1.some-named-index.parquet
|
||||
0.22 65.1 61 337 3.87 3.78 2.49 Fair E VS2
|
||||
0.23 56.9 65 327 4.05 4.07 2.31 Good E VS1
|
||||
0.31 63.3 58 335 4.34 4.35 2.75 Good J SI2
|
||||
0.23 61.5 55 326 3.95 3.98 2.43 Ideal E SI2
|
||||
0.21 59.8 61 326 3.89 3.84 2.31 Premium E SI1
|
||||
0.29 62.4 58 334 4.2 4.23 2.63 Premium I VS2
|
||||
0.26 61.9 55 337 4.07 4.11 2.53 Very Good H SI1
|
||||
0.23 59.4 61 338 4 4.05 2.39 Very Good H VS1
|
||||
0.24 62.3 57 336 3.95 3.98 2.47 Very Good I VVS1
|
||||
0.24 62.8 57 336 3.94 3.96 2.48 Very Good J VVS2
|
58
dbms/tests/queries/0_stateless/00900_parquet_load.sh
Executable file
58
dbms/tests/queries/0_stateless/00900_parquet_load.sh
Executable file
@ -0,0 +1,58 @@
|
||||
#!/usr/bin/env bash
|
||||
|
||||
#
|
||||
# Load all possible .parquet files found in submodules.
|
||||
# TODO: Add more files.
|
||||
#
|
||||
|
||||
# To regenerate data install perl JSON::XS module: sudo apt install libjson-xs-perl
|
||||
|
||||
# Also 5 sample files from
|
||||
# wget https://github.com/Teradata/kylo/raw/master/samples/sample-data/parquet/userdata1.parquet
|
||||
# ...
|
||||
# wget https://github.com/Teradata/kylo/raw/master/samples/sample-data/parquet/userdata5.parquet
|
||||
|
||||
|
||||
# set -x
|
||||
|
||||
CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
|
||||
. $CUR_DIR/../shell_config.sh
|
||||
|
||||
CB_DIR=$(dirname "$CLICKHOUSE_CLIENT_BINARY")
|
||||
[ "$CB_DIR" == "." ] && ROOT_DIR=$CUR_DIR/../../../..
|
||||
[ "$CB_DIR" != "." ] && BUILD_DIR=$CB_DIR/../..
|
||||
[ -z "$ROOT_DIR" ] && ROOT_DIR=$CB_DIR/../../..
|
||||
|
||||
DATA_DIR=$CUR_DIR/data_parquet
|
||||
|
||||
# To update:
|
||||
# cp $ROOT_DIR/contrib/arrow/cpp/submodules/parquet-testing/data/*.parquet $ROOT_DIR/contrib/arrow/python/pyarrow/tests/data/parquet/*.parquet $CUR_DIR/data_parquet/
|
||||
|
||||
# BUG! nulls.snappy.parquet - parquet-reader shows wrong structure. Actual structure is {"type":"struct","fields":[{"name":"b_struct","type":{"type":"struct","fields":[{"name":"b_c_int","type":"integer","nullable":true,"metadata":{}}]},"nullable":true,"metadata":{}}]}
|
||||
# why? repeated_no_annotation.parquet
|
||||
|
||||
for NAME in `ls -1 $DATA_DIR/*.parquet | xargs -n 1 basename | sort`; do
|
||||
echo === Try load data from $NAME
|
||||
|
||||
JSON=$DATA_DIR/$NAME.json
|
||||
COLUMNS_FILE=$DATA_DIR/$NAME.columns
|
||||
|
||||
# If you want change or add .parquet file - rm data_parquet/*.json data_parquet/*.columns
|
||||
[ -n "$BUILD_DIR" ] && [ ! -s $COLUMNS_FILE ] && [ ! -s $JSON ] && $BUILD_DIR/contrib/arrow-cmake/parquet-reader --json $DATA_DIR/$NAME > $JSON
|
||||
[ -n "$BUILD_DIR" ] && [ ! -s $COLUMNS_FILE ] && $CUR_DIR/00900_parquet_create_table_columns.pl $JSON > $COLUMNS_FILE
|
||||
|
||||
# Debug only:
|
||||
# [ -n "$BUILD_DIR" ] && $BUILD_DIR/contrib/arrow-cmake/parquet-reader $DATA_DIR/$NAME > $DATA_DIR/$NAME.dump
|
||||
|
||||
#COLUMNS=`$CUR_DIR/00900_parquet_create_table_columns.pl $JSON` 2>&1 || continue
|
||||
COLUMNS=`cat $COLUMNS_FILE` || continue
|
||||
|
||||
${CLICKHOUSE_CLIENT} --query="DROP TABLE IF EXISTS test.parquet_load"
|
||||
${CLICKHOUSE_CLIENT} --query="CREATE TABLE test.parquet_load ($COLUMNS) ENGINE = Memory"
|
||||
|
||||
# Some files is broken, exception is ok.
|
||||
cat $DATA_DIR/$NAME | ${CLICKHOUSE_CLIENT} --query="INSERT INTO test.parquet_load FORMAT Parquet" 2>&1 | sed 's/Exception/Ex---tion/'
|
||||
|
||||
${CLICKHOUSE_CLIENT} --query="SELECT * FROM test.parquet_load LIMIT 100"
|
||||
${CLICKHOUSE_CLIENT} --query="DROP TABLE test.parquet_load"
|
||||
done
|
Binary file not shown.
@ -0,0 +1 @@
|
||||
id Nullable(Int32), bool_col Nullable(UInt8), tinyint_col Nullable(Int32), smallint_col Nullable(Int32), int_col Nullable(Int32), bigint_col Nullable(Int64), float_col Nullable(Float32), double_col Nullable(Float64), date_string_col Nullable(String), string_col Nullable(String), timestamp_col Nullable(Int64)
|
Binary file not shown.
@ -0,0 +1 @@
|
||||
id Nullable(Int32), bool_col Nullable(UInt8), tinyint_col Nullable(Int32), smallint_col Nullable(Int32), int_col Nullable(Int32), bigint_col Nullable(Int64), float_col Nullable(Float32), double_col Nullable(Float64), date_string_col Nullable(String), string_col Nullable(String), timestamp_col Nullable(Int64)
|
Binary file not shown.
@ -0,0 +1 @@
|
||||
id Nullable(Int32), bool_col Nullable(UInt8), tinyint_col Nullable(Int32), smallint_col Nullable(Int32), int_col Nullable(Int32), bigint_col Nullable(Int64), float_col Nullable(Float32), double_col Nullable(Float64), date_string_col Nullable(String), string_col Nullable(String), timestamp_col Nullable(Int64)
|
Binary file not shown.
@ -0,0 +1 @@
|
||||
value Nullable(Decimal128(1))
|
Binary file not shown.
@ -0,0 +1 @@
|
||||
a Nullable(String), b Nullable(Int32), c Nullable(Float64), d Nullable(UInt8), element Nullable(Int32)
|
Binary file not shown.
@ -0,0 +1 @@
|
||||
value Nullable(Decimal128(1))
|
Binary file not shown.
@ -0,0 +1 @@
|
||||
value Nullable(Decimal128(1))
|
Binary file not shown.
@ -0,0 +1 @@
|
||||
value Nullable(Decimal128(1))
|
Binary file not shown.
@ -0,0 +1 @@
|
||||
value Nullable(Decimal128(1))
|
Binary file not shown.
@ -0,0 +1 @@
|
||||
nation_key Nullable(Int32), name Nullable(String), region_key Nullable(Int32), comment_col Nullable(String)
|
Binary file not shown.
@ -0,0 +1 @@
|
||||
element Nullable(String), b Nullable(Int32)
|
Binary file not shown.
@ -0,0 +1 @@
|
||||
key Nullable(String), key1 Nullable(Int32), value Nullable(UInt8), b Nullable(Int32), c Nullable(Float64)
|
Binary file not shown.
@ -0,0 +1 @@
|
||||
ID Nullable(Int64), element Nullable(Int32), element2 Nullable(Int32), key Nullable(String), value Nullable(Int32), key5 Nullable(String), value6 Nullable(Int32), a Nullable(Int32), element8 Nullable(Int32), e Nullable(Int32), f Nullable(String), key11 Nullable(String), element12 Nullable(Float64)
|
Binary file not shown.
@ -0,0 +1 @@
|
||||
id Nullable(Int64), element Nullable(Int32), element2 Nullable(Int32), key Nullable(String), value Nullable(Int32), key5 Nullable(String), value6 Nullable(Int32), A Nullable(Int32), element8 Nullable(Int32), E Nullable(Int32), F Nullable(String), key11 Nullable(String), element12 Nullable(Float64)
|
BIN
dbms/tests/queries/0_stateless/data_parquet/nulls.snappy.parquet
Normal file
BIN
dbms/tests/queries/0_stateless/data_parquet/nulls.snappy.parquet
Normal file
Binary file not shown.
@ -0,0 +1 @@
|
||||
b_c_int Nullable(Int32)
|
Binary file not shown.
@ -0,0 +1 @@
|
||||
id Nullable(Int32), number Nullable(Int64), kind Nullable(String)
|
BIN
dbms/tests/queries/0_stateless/data_parquet/userdata1.parquet
Normal file
BIN
dbms/tests/queries/0_stateless/data_parquet/userdata1.parquet
Normal file
Binary file not shown.
@ -0,0 +1 @@
|
||||
registration_dttm Nullable(Int64), id Nullable(Int32), first_name Nullable(String), last_name Nullable(String), email Nullable(String), gender Nullable(String), ip_address Nullable(String), cc Nullable(String), country Nullable(String), birthdate Nullable(String), salary Nullable(Float64), title Nullable(String), comments Nullable(String)
|
BIN
dbms/tests/queries/0_stateless/data_parquet/userdata2.parquet
Normal file
BIN
dbms/tests/queries/0_stateless/data_parquet/userdata2.parquet
Normal file
Binary file not shown.
@ -0,0 +1 @@
|
||||
registration_dttm Nullable(Int64), id Nullable(Int32), first_name Nullable(String), last_name Nullable(String), email Nullable(String), gender Nullable(String), ip_address Nullable(String), cc Nullable(String), country Nullable(String), birthdate Nullable(String), salary Nullable(Float64), title Nullable(String), comments Nullable(String)
|
BIN
dbms/tests/queries/0_stateless/data_parquet/userdata3.parquet
Normal file
BIN
dbms/tests/queries/0_stateless/data_parquet/userdata3.parquet
Normal file
Binary file not shown.
@ -0,0 +1 @@
|
||||
registration_dttm Nullable(Int64), id Nullable(Int32), first_name Nullable(String), last_name Nullable(String), email Nullable(String), gender Nullable(String), ip_address Nullable(String), cc Nullable(String), country Nullable(String), birthdate Nullable(String), salary Nullable(Float64), title Nullable(String), comments Nullable(String)
|
BIN
dbms/tests/queries/0_stateless/data_parquet/userdata4.parquet
Normal file
BIN
dbms/tests/queries/0_stateless/data_parquet/userdata4.parquet
Normal file
Binary file not shown.
@ -0,0 +1 @@
|
||||
registration_dttm Nullable(Int64), id Nullable(Int32), first_name Nullable(String), last_name Nullable(String), email Nullable(String), gender Nullable(String), ip_address Nullable(String), cc Nullable(String), country Nullable(String), birthdate Nullable(String), salary Nullable(Float64), title Nullable(String), comments Nullable(String)
|
BIN
dbms/tests/queries/0_stateless/data_parquet/userdata5.parquet
Normal file
BIN
dbms/tests/queries/0_stateless/data_parquet/userdata5.parquet
Normal file
Binary file not shown.
@ -0,0 +1 @@
|
||||
registration_dttm Nullable(Int64), id Nullable(Int32), first_name Nullable(String), last_name Nullable(String), email Nullable(String), gender Nullable(String), ip_address Nullable(String), cc Nullable(String), country Nullable(String), birthdate Nullable(String), salary Nullable(Float64), title Nullable(String), comments Nullable(String)
|
Binary file not shown.
@ -0,0 +1 @@
|
||||
carat Nullable(Float64), depth Nullable(Float64), table Nullable(Float64), price Nullable(Int64), x Nullable(Float64), y Nullable(Float64), z Nullable(Float64), cut Nullable(String), color Nullable(String), clarity Nullable(String)
|
Binary file not shown.
@ -0,0 +1 @@
|
||||
a Nullable(Int64), b Nullable(Float64), c Nullable(DateTime), index Nullable(String), __index_level_1__ Nullable(DateTime)
|
BIN
dbms/tests/queries/0_stateless/data_parquet/v0.7.1.parquet
Normal file
BIN
dbms/tests/queries/0_stateless/data_parquet/v0.7.1.parquet
Normal file
Binary file not shown.
@ -0,0 +1 @@
|
||||
carat Nullable(Float64), cut Nullable(String), color Nullable(String), clarity Nullable(String), depth Nullable(Float64), table Nullable(Float64), price Nullable(Int64), x Nullable(Float64), y Nullable(Float64), z Nullable(Float64), __index_level_0__ Nullable(Int64)
|
Binary file not shown.
@ -0,0 +1 @@
|
||||
carat Nullable(Float64), depth Nullable(Float64), table Nullable(Float64), price Nullable(Int64), x Nullable(Float64), y Nullable(Float64), z Nullable(Float64), cut Nullable(String), __index_level_1__ Nullable(String), clarity Nullable(String)
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue
Block a user