Merge pull request #12181 from azat/bump-arrow-to-0.17

Bump arrow to 0.17 (and flatbuffers to v1.12, required by arrow)
This commit is contained in:
alexey-milovidov 2020-07-09 04:24:47 +03:00 committed by GitHub
commit 36205e3ddf
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
11 changed files with 55 additions and 9406 deletions

2
contrib/arrow vendored

@ -1 +1 @@
Subproject commit b789226ccb2124285792107c758bb3b40b3d082a
Subproject commit 3cbcb7b62c2f2d02851bff837758637eb592a64b

View File

@ -1,5 +1,3 @@
include(ExternalProject)
set (CMAKE_CXX_STANDARD 17)
# === thrift
@ -77,14 +75,9 @@ add_custom_command(OUTPUT orc_proto.pb.h orc_proto.pb.cc
# === flatbuffers
##############################################################
# fbs - Step 1: build flatbuffers lib and flatc compiler
##############################################################
set(FLATBUFFERS_SRC_DIR ${ClickHouse_SOURCE_DIR}/contrib/flatbuffers)
set(FLATBUFFERS_BINARY_DIR ${ClickHouse_BINARY_DIR}/contrib/flatbuffers)
set(FLATBUFFERS_INCLUDE_DIR ${FLATBUFFERS_SRC_DIR}/include)
set(FLATBUFFERS_COMPILER "$<TARGET_FILE:flatc>")
# set flatbuffers CMake options
if (${USE_STATIC_LIBRARIES})
@ -94,57 +87,11 @@ else ()
set(FLATBUFFERS_BUILD_SHAREDLIB ON CACHE BOOL "Enable the build of the flatbuffers shared library")
set(FLATBUFFERS_BUILD_FLATLIB OFF CACHE BOOL "Disable the build of the flatbuffers library")
endif ()
set(FLATBUFFERS_BUILD_FLATC ON CACHE BOOL "Build flatbuffers compiler")
set(FLATBUFFERS_BUILD_TESTS OFF CACHE BOOL "Skip flatbuffers tests")
add_subdirectory(${FLATBUFFERS_SRC_DIR} "${FLATBUFFERS_BINARY_DIR}")
###################################
# fbs - Step 2: compile *.fbs files
###################################
set(ARROW_IPC_SRC_DIR ${ARROW_SRC_DIR}/arrow/ipc)
set(ARROW_FORMAT_SRC_DIR ${ARROW_SRC_DIR}/../../format)
set(ARROW_GENERATED_INCLUDE_DIR ${CMAKE_CURRENT_BINARY_DIR}/arrow_gen_headers)
set(FLATBUFFERS_COMPILED_OUT_DIR ${ARROW_GENERATED_INCLUDE_DIR}/arrow/ipc)
set(FBS_OUTPUT_FILES
"${FLATBUFFERS_COMPILED_OUT_DIR}/File_generated.h"
"${FLATBUFFERS_COMPILED_OUT_DIR}/Message_generated.h"
"${FLATBUFFERS_COMPILED_OUT_DIR}/feather_generated.h"
"${FLATBUFFERS_COMPILED_OUT_DIR}/Schema_generated.h"
"${FLATBUFFERS_COMPILED_OUT_DIR}/SparseTensor_generated.h"
"${FLATBUFFERS_COMPILED_OUT_DIR}/Tensor_generated.h")
set(FBS_SRC
${ARROW_FORMAT_SRC_DIR}/Message.fbs
${ARROW_FORMAT_SRC_DIR}/File.fbs
${ARROW_FORMAT_SRC_DIR}/Schema.fbs
${ARROW_FORMAT_SRC_DIR}/Tensor.fbs
${ARROW_FORMAT_SRC_DIR}/SparseTensor.fbs
${ARROW_IPC_SRC_DIR}/feather.fbs)
foreach (FIL ${FBS_SRC})
get_filename_component(ABS_FIL ${FIL} ABSOLUTE)
list(APPEND ABS_FBS_SRC ${ABS_FIL})
endforeach ()
message(STATUS "FLATBUFFERS_LIBRARY: ${FLATBUFFERS_LIBRARY}, FLATBUFFERS_COMPILER: ${FLATBUFFERS_COMPILER}")
message(STATUS "FLATBUFFERS_COMPILED_OUT_DIR: ${FLATBUFFERS_COMPILED_OUT_DIR}")
message(STATUS "flatc: ${FLATBUFFERS_COMPILER} -c -o ${FLATBUFFERS_COMPILED_OUT_DIR}/ ${ABS_FBS_SRC}")
add_custom_command(OUTPUT ${FBS_OUTPUT_FILES}
COMMAND ${FLATBUFFERS_COMPILER}
-c
-o
${FLATBUFFERS_COMPILED_OUT_DIR}/
${ABS_FBS_SRC}
DEPENDS flatc ${ABS_FBS_SRC}
COMMENT "Running flatc compiler on ${ABS_FBS_SRC}"
VERBATIM)
add_custom_target(metadata_fbs DEPENDS ${FBS_OUTPUT_FILES})
add_dependencies(metadata_fbs flatc)
message(STATUS "FLATBUFFERS_LIBRARY: ${FLATBUFFERS_LIBRARY}")
# arrow-cmake cmake file calling orc cmake subroutine which detects certain compiler features.
# Apple Clang compiler failed to compile this code without specifying c++11 standard.
@ -203,6 +150,7 @@ configure_file("${LIBRARY_DIR}/util/config.h.cmake" "${CMAKE_CURRENT_SOURCE_DIR}
set(ARROW_SRCS
${LIBRARY_DIR}/array.cc
${LIBRARY_DIR}/buffer.cc
${LIBRARY_DIR}/device.cc
${LIBRARY_DIR}/builder.cc
${LIBRARY_DIR}/compare.cc
${LIBRARY_DIR}/extension_type.cc
@ -219,6 +167,11 @@ set(ARROW_SRCS
${LIBRARY_DIR}/type.cc
${LIBRARY_DIR}/visitor.cc
${LIBRARY_DIR}/tensor/coo_converter.cc
${LIBRARY_DIR}/tensor/csc_converter.cc
${LIBRARY_DIR}/tensor/csf_converter.cc
${LIBRARY_DIR}/tensor/csr_converter.cc
${LIBRARY_DIR}/array/builder_adaptive.cc
${LIBRARY_DIR}/array/builder_base.cc
${LIBRARY_DIR}/array/builder_binary.cc
@ -230,6 +183,7 @@ set(ARROW_SRCS
${LIBRARY_DIR}/array/concatenate.cc
${LIBRARY_DIR}/array/dict_internal.cc
${LIBRARY_DIR}/array/diff.cc
${LIBRARY_DIR}/array/validate.cc
${LIBRARY_DIR}/csv/converter.cc
${LIBRARY_DIR}/csv/chunker.cc
@ -237,6 +191,7 @@ set(ARROW_SRCS
${LIBRARY_DIR}/csv/options.cc
${LIBRARY_DIR}/csv/parser.cc
${LIBRARY_DIR}/csv/reader.cc
${LIBRARY_DIR}/csv/column_decoder.cc
${LIBRARY_DIR}/ipc/dictionary.cc
${LIBRARY_DIR}/ipc/feather.cc
@ -251,7 +206,6 @@ set(ARROW_SRCS
${LIBRARY_DIR}/io/file.cc
${LIBRARY_DIR}/io/interfaces.cc
${LIBRARY_DIR}/io/memory.cc
${LIBRARY_DIR}/io/readahead.cc
${LIBRARY_DIR}/io/slow.cc
${LIBRARY_DIR}/util/basic_decimal.cc
@ -274,6 +228,12 @@ set(ARROW_SRCS
${LIBRARY_DIR}/util/thread_pool.cc
${LIBRARY_DIR}/util/trie.cc
${LIBRARY_DIR}/util/utf8.cc
${LIBRARY_DIR}/util/future.cc
${LIBRARY_DIR}/util/formatting.cc
${LIBRARY_DIR}/util/parsing.cc
${LIBRARY_DIR}/util/time.cc
${LIBRARY_DIR}/util/delimiting.cc
${LIBRARY_DIR}/util/iterator.cc
${LIBRARY_DIR}/vendored/base64.cpp
${ORC_SRCS}
@ -321,7 +281,7 @@ endif ()
add_library(${ARROW_LIBRARY} ${ARROW_SRCS})
# Arrow dependencies
add_dependencies(${ARROW_LIBRARY} ${FLATBUFFERS_LIBRARY} metadata_fbs)
add_dependencies(${ARROW_LIBRARY} ${FLATBUFFERS_LIBRARY})
target_link_libraries(${ARROW_LIBRARY} PRIVATE ${FLATBUFFERS_LIBRARY} boost::filesystem)
@ -352,17 +312,18 @@ target_include_directories(${ARROW_LIBRARY} PRIVATE SYSTEM ${ORC_BUILD_INCLUDE_D
target_include_directories(${ARROW_LIBRARY} PRIVATE SYSTEM ${ORC_ADDITION_SOURCE_DIR})
target_include_directories(${ARROW_LIBRARY} PRIVATE SYSTEM ${ARROW_SRC_DIR})
target_include_directories(${ARROW_LIBRARY} PRIVATE SYSTEM ${FLATBUFFERS_INCLUDE_DIR})
target_include_directories(${ARROW_LIBRARY} PRIVATE SYSTEM ${ARROW_GENERATED_INCLUDE_DIR})
# === parquet
set(LIBRARY_DIR ${ClickHouse_SOURCE_DIR}/contrib/arrow/cpp/src/parquet)
set(GEN_LIBRARY_DIR ${ClickHouse_SOURCE_DIR}/contrib/arrow/cpp/src/generated)
# arrow/cpp/src/parquet/CMakeLists.txt
set(PARQUET_SRCS
${LIBRARY_DIR}/arrow/reader.cc
${LIBRARY_DIR}/arrow/reader_internal.cc
${LIBRARY_DIR}/arrow/schema.cc
${LIBRARY_DIR}/arrow/writer.cc
${LIBRARY_DIR}/arrow/path_internal.cc
${LIBRARY_DIR}/bloom_filter.cc
${LIBRARY_DIR}/column_reader.cc
${LIBRARY_DIR}/column_scanner.cc
@ -379,16 +340,19 @@ set(PARQUET_SRCS
${LIBRARY_DIR}/schema.cc
${LIBRARY_DIR}/statistics.cc
${LIBRARY_DIR}/types.cc
${LIBRARY_DIR}/encryption.cc
${LIBRARY_DIR}/encryption_internal.cc
${LIBRARY_DIR}/internal_file_decryptor.cc
${LIBRARY_DIR}/internal_file_encryptor.cc
${GEN_LIBRARY_DIR}/parquet_constants.cpp
${GEN_LIBRARY_DIR}/parquet_types.cpp
)
#list(TRANSFORM PARQUET_SRCS PREPEND ${LIBRARY_DIR}/) # cmake 3.12
list(APPEND PARQUET_SRCS
${CMAKE_CURRENT_SOURCE_DIR}/cpp/src/parquet/parquet_constants.cpp
${CMAKE_CURRENT_SOURCE_DIR}/cpp/src/parquet/parquet_types.cpp
)
add_library(${PARQUET_LIBRARY} ${PARQUET_SRCS})
target_include_directories(${PARQUET_LIBRARY} SYSTEM PUBLIC ${ClickHouse_SOURCE_DIR}/contrib/arrow/cpp/src ${CMAKE_CURRENT_SOURCE_DIR}/cpp/src)
target_include_directories(${PARQUET_LIBRARY} SYSTEM PUBLIC ${ClickHouse_SOURCE_DIR}/contrib/arrow/cpp/src ${CMAKE_CURRENT_SOURCE_DIR}/cpp/src PRIVATE ${OPENSSL_INCLUDE_DIR})
include(${ClickHouse_SOURCE_DIR}/contrib/thrift/build/cmake/ConfigureChecks.cmake) # makes config.h
target_link_libraries(${PARQUET_LIBRARY} PUBLIC ${ARROW_LIBRARY} PRIVATE ${THRIFT_LIBRARY} boost::headers_only boost::regex)
target_link_libraries(${PARQUET_LIBRARY} PUBLIC ${ARROW_LIBRARY} PRIVATE ${THRIFT_LIBRARY} boost::headers_only boost::regex ${OPENSSL_LIBRARIES})
if (SANITIZE STREQUAL "undefined")
target_compile_options(${PARQUET_LIBRARY} PRIVATE -fno-sanitize=undefined)

View File

@ -1,17 +0,0 @@
/**
* Autogenerated by Thrift Compiler (0.12.0)
*
* DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING
* @generated
*/
#include "parquet_constants.h"
namespace parquet { namespace format {
const parquetConstants g_parquet_constants;
parquetConstants::parquetConstants() {
}
}} // namespace

View File

@ -1,24 +0,0 @@
/**
* Autogenerated by Thrift Compiler (0.12.0)
*
* DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING
* @generated
*/
#ifndef parquet_CONSTANTS_H
#define parquet_CONSTANTS_H
#include "parquet_types.h"
namespace parquet { namespace format {
class parquetConstants {
public:
parquetConstants();
};
extern const parquetConstants g_parquet_constants;
}} // namespace
#endif

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -18,6 +18,13 @@
#ifndef PARQUET_VERSION_H
#define PARQUET_VERSION_H
#define PARQUET_VERSION_MAJOR 1
#define PARQUET_VERSION_MINOR 5
#define PARQUET_VERSION_PATCH 1
#define PARQUET_SO_VERSION 0
#define PARQUET_FULL_SO_VERSION 0.17
// define the parquet created by version
#define CREATED_BY_VERSION "parquet-cpp version 1.5.1-SNAPSHOT"

2
contrib/flatbuffers vendored

@ -1 +1 @@
Subproject commit bf9eb67ab9371755c6bcece13cadc7693bcbf264
Subproject commit 6df40a2471737b27271bdd9b900ab5f3aec746c7

View File

@ -24,10 +24,9 @@ arrow::Status ArrowBufferedOutputStream::Close()
return arrow::Status::OK();
}
arrow::Status ArrowBufferedOutputStream::Tell(int64_t * position) const
arrow::Result<int64_t> ArrowBufferedOutputStream::Tell() const
{
*position = total_length;
return arrow::Status::OK();
return arrow::Result<int64_t>(total_length);
}
arrow::Status ArrowBufferedOutputStream::Write(const void * data, int64_t length)
@ -42,10 +41,9 @@ RandomAccessFileFromSeekableReadBuffer::RandomAccessFileFromSeekableReadBuffer(S
{
}
arrow::Status RandomAccessFileFromSeekableReadBuffer::GetSize(int64_t * size)
arrow::Result<int64_t> RandomAccessFileFromSeekableReadBuffer::GetSize()
{
*size = file_size;
return arrow::Status::OK();
return arrow::Result<int64_t>(file_size);
}
arrow::Status RandomAccessFileFromSeekableReadBuffer::Close()
@ -54,25 +52,25 @@ arrow::Status RandomAccessFileFromSeekableReadBuffer::Close()
return arrow::Status::OK();
}
arrow::Status RandomAccessFileFromSeekableReadBuffer::Tell(int64_t * position) const
arrow::Result<int64_t> RandomAccessFileFromSeekableReadBuffer::Tell() const
{
*position = in.getPosition();
return arrow::Status::OK();
return arrow::Result<int64_t>(in.getPosition());
}
arrow::Status RandomAccessFileFromSeekableReadBuffer::Read(int64_t nbytes, int64_t * bytes_read, void * out)
arrow::Result<int64_t> RandomAccessFileFromSeekableReadBuffer::Read(int64_t nbytes, void * out)
{
*bytes_read = in.readBig(reinterpret_cast<char *>(out), nbytes);
return arrow::Status::OK();
int64_t bytes_read = in.readBig(reinterpret_cast<char *>(out), nbytes);
return arrow::Result<int64_t>(bytes_read);
}
arrow::Status RandomAccessFileFromSeekableReadBuffer::Read(int64_t nbytes, std::shared_ptr<arrow::Buffer> * out)
arrow::Result<std::shared_ptr<arrow::Buffer>> RandomAccessFileFromSeekableReadBuffer::Read(int64_t nbytes)
{
std::shared_ptr<arrow::Buffer> buf;
ARROW_RETURN_NOT_OK(arrow::AllocateBuffer(nbytes, &buf));
size_t n = in.readBig(reinterpret_cast<char *>(buf->mutable_data()), nbytes);
*out = arrow::SliceBuffer(buf, 0, n);
return arrow::Status::OK();
auto read_buffer = arrow::SliceBuffer(buf, 0, n);
return arrow::Result<std::shared_ptr<arrow::Buffer>>(read_buffer);
}
arrow::Status RandomAccessFileFromSeekableReadBuffer::Seek(int64_t position)

View File

@ -19,7 +19,7 @@ public:
// FileInterface
arrow::Status Close() override;
arrow::Status Tell(int64_t * position) const override;
arrow::Result<int64_t> Tell() const override;
bool closed() const override { return !is_open; }
@ -39,17 +39,17 @@ class RandomAccessFileFromSeekableReadBuffer : public arrow::io::RandomAccessFil
public:
RandomAccessFileFromSeekableReadBuffer(SeekableReadBuffer & in_, off_t file_size_);
arrow::Status GetSize(int64_t * size) override;
arrow::Result<int64_t> GetSize() override;
arrow::Status Close() override;
arrow::Status Tell(int64_t * position) const override;
arrow::Result<int64_t> Tell() const override;
bool closed() const override { return !is_open; }
arrow::Status Read(int64_t nbytes, int64_t * bytes_read, void * out) override;
arrow::Result<int64_t> Read(int64_t nbytes, void * out) override;
arrow::Status Read(int64_t nbytes, std::shared_ptr<arrow::Buffer> * out) override;
arrow::Result<std::shared_ptr<arrow::Buffer>> Read(int64_t nbytes) override;
arrow::Status Seek(int64_t position) override;

View File

@ -39,7 +39,7 @@
23.0
24.0
=== Try load data from datapage_v2.snappy.parquet
Code: 33. DB::Ex---tion: Error while reading Parquet data: IOError: Arrow error: IOError: Corrupt snappy compressed data.
Code: 33. DB::Ex---tion: Error while reading Parquet data: IOError: Not yet implemented: Unsupported encoding.
=== Try load data from fixed_length_decimal_1.parquet
1.0