Merge pull request #26898 from vitstn/arcadia_arrow2

Enable Arrow format in Arcadia
This commit is contained in:
alexey-milovidov 2021-07-29 01:17:57 +03:00 committed by GitHub
commit f62c11dbf3
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
10 changed files with 50 additions and 8 deletions

View File

@ -123,12 +123,13 @@ void registerFormats()
registerOutputFormatProcessorORC(factory);
registerInputFormatProcessorParquet(factory);
registerOutputFormatProcessorParquet(factory);
registerInputFormatProcessorArrow(factory);
registerOutputFormatProcessorArrow(factory);
registerInputFormatProcessorAvro(factory);
registerOutputFormatProcessorAvro(factory);
#endif
registerInputFormatProcessorArrow(factory);
registerOutputFormatProcessorArrow(factory);
registerOutputFormatNull(factory);
registerOutputFormatProcessorPretty(factory);

View File

@ -1,5 +1,8 @@
#pragma once
#include "config_formats.h"
#if !defined(ARCADIA_BUILD)
# include "config_formats.h"
#endif
#if USE_ARROW
#include <Processors/Formats/IInputFormat.h>

View File

@ -1,5 +1,8 @@
#pragma once
#include "config_formats.h"
#if !defined(ARCADIA_BUILD)
# include "config_formats.h"
#endif
#if USE_ARROW
#include <Formats/FormatSettings.h>

View File

@ -6,7 +6,7 @@
#include <IO/WriteBufferFromString.h>
#include <IO/copyData.h>
#include <arrow/buffer.h>
#include <arrow/io/api.h>
#include <arrow/io/memory.h>
#include <arrow/result.h>
#include <sys/stat.h>

View File

@ -1,5 +1,8 @@
#pragma once
#include "config_formats.h"
#if !defined(ARCADIA_BUILD)
# include "config_formats.h"
#endif
#if USE_ARROW || USE_ORC || USE_PARQUET
#include <arrow/io/interfaces.h>

View File

@ -389,12 +389,18 @@ static void readColumnFromArrowColumn(
case arrow::Type::TIMESTAMP:
fillColumnWithTimestampData(arrow_column, internal_column);
break;
#if defined(ARCADIA_BUILD)
case arrow::Type::DECIMAL:
fillColumnWithDecimalData<Decimal128, arrow::Decimal128Array>(arrow_column, internal_column /*, internal_nested_type*/);
break;
#else
case arrow::Type::DECIMAL128:
fillColumnWithDecimalData<Decimal128, arrow::Decimal128Array>(arrow_column, internal_column /*, internal_nested_type*/);
break;
case arrow::Type::DECIMAL256:
fillColumnWithDecimalData<Decimal256, arrow::Decimal256Array>(arrow_column, internal_column /*, internal_nested_type*/);
break;
#endif
case arrow::Type::MAP: [[fallthrough]];
case arrow::Type::LIST:
{
@ -505,6 +511,13 @@ static DataTypePtr getInternalType(
return makeNullable(getInternalType(arrow_type, nested_type, column_name, format_name));
}
#if defined(ARCADIA_BUILD)
if (arrow_type->id() == arrow::Type::DECIMAL)
{
const auto & decimal_type = dynamic_cast<const arrow::DecimalType &>(*arrow_type);
return std::make_shared<DataTypeDecimal<Decimal128>>(decimal_type.precision(), decimal_type.scale());
}
#else
if (arrow_type->id() == arrow::Type::DECIMAL128)
{
const auto & decimal_type = dynamic_cast<const arrow::DecimalType &>(*arrow_type);
@ -516,6 +529,7 @@ static DataTypePtr getInternalType(
const auto & decimal_type = dynamic_cast<const arrow::DecimalType &>(*arrow_type);
return std::make_shared<DataTypeDecimal<Decimal256>>(decimal_type.precision(), decimal_type.scale());
}
#endif
if (arrow_type->id() == arrow::Type::LIST)
{

View File

@ -1,6 +1,8 @@
#pragma once
#include "config_formats.h"
#if !defined(ARCADIA_BUILD)
# include "config_formats.h"
#endif
#if USE_ARROW || USE_ORC || USE_PARQUET

View File

@ -442,11 +442,13 @@ namespace DB
fillArrowArrayWithDecimalColumnData<ToDataType, Int128, arrow::Decimal128, arrow::Decimal128Builder>(column, null_bytemap, array_builder, format_name, start, end);
return true;
}
#if !defined(ARCADIA_BUILD)
if constexpr (std::is_same_v<ToDataType,DataTypeDecimal<Decimal256>>)
{
fillArrowArrayWithDecimalColumnData<ToDataType, Int256, arrow::Decimal256, arrow::Decimal256Builder>(column, null_bytemap, array_builder, format_name, start, end);
return true;
}
#endif
return false;
};

View File

@ -1,5 +1,8 @@
#pragma once
#include "config_formats.h"
#if !defined(ARCADIA_BUILD)
# include "config_formats.h"
#endif
#if USE_ARROW || USE_PARQUET

View File

@ -7,8 +7,14 @@ PEERDIR(
clickhouse/src/Common
contrib/libs/msgpack
contrib/libs/protobuf
contrib/libs/arrow
)
ADDINCL(
contrib/libs/arrow/src
)
CFLAGS(-DUSE_ARROW=1)
SRCS(
Chunk.cpp
@ -25,6 +31,11 @@ SRCS(
Formats/IOutputFormat.cpp
Formats/IRowInputFormat.cpp
Formats/IRowOutputFormat.cpp
Formats/Impl/ArrowBlockInputFormat.cpp
Formats/Impl/ArrowBlockOutputFormat.cpp
Formats/Impl/ArrowBufferedStreams.cpp
Formats/Impl/ArrowColumnToCHColumn.cpp
Formats/Impl/CHColumnToArrowColumn.cpp
Formats/Impl/BinaryRowInputFormat.cpp
Formats/Impl/BinaryRowOutputFormat.cpp
Formats/Impl/CSVRowInputFormat.cpp