From 4e269ce2e431222a3bf322c9b63c2213b9897327 Mon Sep 17 00:00:00 2001 From: Vitaly Stoyan Date: Wed, 28 Jul 2021 14:09:17 +0300 Subject: [PATCH 1/2] initial commit --- src/Formats/registerFormats.cpp | 5 +++-- .../Formats/Impl/ArrowBlockInputFormat.h | 5 ++++- .../Formats/Impl/ArrowBlockOutputFormat.h | 5 ++++- .../Formats/Impl/ArrowBufferedStreams.cpp | 2 +- src/Processors/Formats/Impl/ArrowBufferedStreams.h | 5 ++++- .../Formats/Impl/ArrowColumnToCHColumn.cpp | 14 ++++++++++++++ .../Formats/Impl/ArrowColumnToCHColumn.h | 4 +++- .../Formats/Impl/CHColumnToArrowColumn.cpp | 2 ++ .../Formats/Impl/CHColumnToArrowColumn.h | 5 ++++- src/Processors/ya.make | 11 +++++++++++ 10 files changed, 50 insertions(+), 8 deletions(-) diff --git a/src/Formats/registerFormats.cpp b/src/Formats/registerFormats.cpp index 89fb7c6cc02..c035ec0a1d1 100644 --- a/src/Formats/registerFormats.cpp +++ b/src/Formats/registerFormats.cpp @@ -123,12 +123,13 @@ void registerFormats() registerOutputFormatProcessorORC(factory); registerInputFormatProcessorParquet(factory); registerOutputFormatProcessorParquet(factory); - registerInputFormatProcessorArrow(factory); - registerOutputFormatProcessorArrow(factory); registerInputFormatProcessorAvro(factory); registerOutputFormatProcessorAvro(factory); #endif + registerInputFormatProcessorArrow(factory); + registerOutputFormatProcessorArrow(factory); + registerOutputFormatNull(factory); registerOutputFormatProcessorPretty(factory); diff --git a/src/Processors/Formats/Impl/ArrowBlockInputFormat.h b/src/Processors/Formats/Impl/ArrowBlockInputFormat.h index 3bfead93bf1..9f458dece7f 100644 --- a/src/Processors/Formats/Impl/ArrowBlockInputFormat.h +++ b/src/Processors/Formats/Impl/ArrowBlockInputFormat.h @@ -1,5 +1,8 @@ #pragma once -#include "config_formats.h" +#if !defined(ARCADIA_BUILD) +# include "config_formats.h" +#endif + #if USE_ARROW #include diff --git a/src/Processors/Formats/Impl/ArrowBlockOutputFormat.h b/src/Processors/Formats/Impl/ArrowBlockOutputFormat.h index 40d81f8b919..44d46e97d2a 100644 --- a/src/Processors/Formats/Impl/ArrowBlockOutputFormat.h +++ b/src/Processors/Formats/Impl/ArrowBlockOutputFormat.h @@ -1,5 +1,8 @@ #pragma once -#include "config_formats.h" +#if !defined(ARCADIA_BUILD) +# include "config_formats.h" +#endif + #if USE_ARROW #include diff --git a/src/Processors/Formats/Impl/ArrowBufferedStreams.cpp b/src/Processors/Formats/Impl/ArrowBufferedStreams.cpp index 9582e0c3312..243f3da5903 100644 --- a/src/Processors/Formats/Impl/ArrowBufferedStreams.cpp +++ b/src/Processors/Formats/Impl/ArrowBufferedStreams.cpp @@ -6,7 +6,7 @@ #include #include #include -#include +#include #include #include diff --git a/src/Processors/Formats/Impl/ArrowBufferedStreams.h b/src/Processors/Formats/Impl/ArrowBufferedStreams.h index a10a5bcabdb..a49936f326c 100644 --- a/src/Processors/Formats/Impl/ArrowBufferedStreams.h +++ b/src/Processors/Formats/Impl/ArrowBufferedStreams.h @@ -1,5 +1,8 @@ #pragma once -#include "config_formats.h" +#if !defined(ARCADIA_BUILD) +# include "config_formats.h" +#endif + #if USE_ARROW || USE_ORC || USE_PARQUET #include diff --git a/src/Processors/Formats/Impl/ArrowColumnToCHColumn.cpp b/src/Processors/Formats/Impl/ArrowColumnToCHColumn.cpp index a921039c824..2ae9f5a2aab 100644 --- a/src/Processors/Formats/Impl/ArrowColumnToCHColumn.cpp +++ b/src/Processors/Formats/Impl/ArrowColumnToCHColumn.cpp @@ -389,12 +389,18 @@ static void readColumnFromArrowColumn( case arrow::Type::TIMESTAMP: fillColumnWithTimestampData(arrow_column, internal_column); break; +#if ARCADIA_BUILD + case arrow::Type::DECIMAL: + fillColumnWithDecimalData(arrow_column, internal_column /*, internal_nested_type*/); + break; +#else case arrow::Type::DECIMAL128: fillColumnWithDecimalData(arrow_column, internal_column /*, internal_nested_type*/); break; case arrow::Type::DECIMAL256: fillColumnWithDecimalData(arrow_column, internal_column /*, internal_nested_type*/); break; +#endif case arrow::Type::MAP: [[fallthrough]]; case arrow::Type::LIST: { @@ -505,6 +511,13 @@ static DataTypePtr getInternalType( return makeNullable(getInternalType(arrow_type, nested_type, column_name, format_name)); } +#if ARCADIA_BUILD + if (arrow_type->id() == arrow::Type::DECIMAL) + { + const auto & decimal_type = dynamic_cast(*arrow_type); + return std::make_shared>(decimal_type.precision(), decimal_type.scale()); + } +#else if (arrow_type->id() == arrow::Type::DECIMAL128) { const auto & decimal_type = dynamic_cast(*arrow_type); @@ -516,6 +529,7 @@ static DataTypePtr getInternalType( const auto & decimal_type = dynamic_cast(*arrow_type); return std::make_shared>(decimal_type.precision(), decimal_type.scale()); } +#endif if (arrow_type->id() == arrow::Type::LIST) { diff --git a/src/Processors/Formats/Impl/ArrowColumnToCHColumn.h b/src/Processors/Formats/Impl/ArrowColumnToCHColumn.h index 3ce4e42a9bc..7f38dc7a31c 100644 --- a/src/Processors/Formats/Impl/ArrowColumnToCHColumn.h +++ b/src/Processors/Formats/Impl/ArrowColumnToCHColumn.h @@ -1,6 +1,8 @@ #pragma once -#include "config_formats.h" +#if !defined(ARCADIA_BUILD) +# include "config_formats.h" +#endif #if USE_ARROW || USE_ORC || USE_PARQUET diff --git a/src/Processors/Formats/Impl/CHColumnToArrowColumn.cpp b/src/Processors/Formats/Impl/CHColumnToArrowColumn.cpp index 0f502b36162..1082d95546e 100644 --- a/src/Processors/Formats/Impl/CHColumnToArrowColumn.cpp +++ b/src/Processors/Formats/Impl/CHColumnToArrowColumn.cpp @@ -442,11 +442,13 @@ namespace DB fillArrowArrayWithDecimalColumnData(column, null_bytemap, array_builder, format_name, start, end); return true; } +#if !ARCADIA_BUILD if constexpr (std::is_same_v>) { fillArrowArrayWithDecimalColumnData(column, null_bytemap, array_builder, format_name, start, end); return true; } +#endif return false; }; diff --git a/src/Processors/Formats/Impl/CHColumnToArrowColumn.h b/src/Processors/Formats/Impl/CHColumnToArrowColumn.h index 1fb2a8af65e..c0885d3778c 100644 --- a/src/Processors/Formats/Impl/CHColumnToArrowColumn.h +++ b/src/Processors/Formats/Impl/CHColumnToArrowColumn.h @@ -1,5 +1,8 @@ #pragma once -#include "config_formats.h" +#if !defined(ARCADIA_BUILD) +# include "config_formats.h" +#endif + #if USE_ARROW || USE_PARQUET diff --git a/src/Processors/ya.make b/src/Processors/ya.make index 543a08caca5..4b95484a828 100644 --- a/src/Processors/ya.make +++ b/src/Processors/ya.make @@ -7,8 +7,14 @@ PEERDIR( clickhouse/src/Common contrib/libs/msgpack contrib/libs/protobuf + contrib/libs/arrow ) +ADDINCL( + contrib/libs/arrow/src +) + +CFLAGS(-DUSE_ARROW=1) SRCS( Chunk.cpp @@ -25,6 +31,11 @@ SRCS( Formats/IOutputFormat.cpp Formats/IRowInputFormat.cpp Formats/IRowOutputFormat.cpp + Formats/Impl/ArrowBlockInputFormat.cpp + Formats/Impl/ArrowBlockOutputFormat.cpp + Formats/Impl/ArrowBufferedStreams.cpp + Formats/Impl/ArrowColumnToCHColumn.cpp + Formats/Impl/CHColumnToArrowColumn.cpp Formats/Impl/BinaryRowInputFormat.cpp Formats/Impl/BinaryRowOutputFormat.cpp Formats/Impl/CSVRowInputFormat.cpp From b2200913dfc37fb0f4ba9a619d1bcb042642596a Mon Sep 17 00:00:00 2001 From: Vitaly Stoyan Date: Wed, 28 Jul 2021 19:56:01 +0300 Subject: [PATCH 2/2] fix build --- src/Processors/Formats/Impl/ArrowColumnToCHColumn.cpp | 4 ++-- src/Processors/Formats/Impl/CHColumnToArrowColumn.cpp | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/Processors/Formats/Impl/ArrowColumnToCHColumn.cpp b/src/Processors/Formats/Impl/ArrowColumnToCHColumn.cpp index 2ae9f5a2aab..84c56f0f2b7 100644 --- a/src/Processors/Formats/Impl/ArrowColumnToCHColumn.cpp +++ b/src/Processors/Formats/Impl/ArrowColumnToCHColumn.cpp @@ -389,7 +389,7 @@ static void readColumnFromArrowColumn( case arrow::Type::TIMESTAMP: fillColumnWithTimestampData(arrow_column, internal_column); break; -#if ARCADIA_BUILD +#if defined(ARCADIA_BUILD) case arrow::Type::DECIMAL: fillColumnWithDecimalData(arrow_column, internal_column /*, internal_nested_type*/); break; @@ -511,7 +511,7 @@ static DataTypePtr getInternalType( return makeNullable(getInternalType(arrow_type, nested_type, column_name, format_name)); } -#if ARCADIA_BUILD +#if defined(ARCADIA_BUILD) if (arrow_type->id() == arrow::Type::DECIMAL) { const auto & decimal_type = dynamic_cast(*arrow_type); diff --git a/src/Processors/Formats/Impl/CHColumnToArrowColumn.cpp b/src/Processors/Formats/Impl/CHColumnToArrowColumn.cpp index 1082d95546e..42aa9e6ddc7 100644 --- a/src/Processors/Formats/Impl/CHColumnToArrowColumn.cpp +++ b/src/Processors/Formats/Impl/CHColumnToArrowColumn.cpp @@ -442,7 +442,7 @@ namespace DB fillArrowArrayWithDecimalColumnData(column, null_bytemap, array_builder, format_name, start, end); return true; } -#if !ARCADIA_BUILD +#if !defined(ARCADIA_BUILD) if constexpr (std::is_same_v>) { fillArrowArrayWithDecimalColumnData(column, null_bytemap, array_builder, format_name, start, end);