diff --git a/src/Processors/Formats/Impl/ArrowBlockInputFormat.cpp b/src/Processors/Formats/Impl/ArrowBlockInputFormat.cpp index 0ac8251b8bb..ce34fdfdc58 100644 --- a/src/Processors/Formats/Impl/ArrowBlockInputFormat.cpp +++ b/src/Processors/Formats/Impl/ArrowBlockInputFormat.cpp @@ -1,4 +1,5 @@ #include "ArrowBlockInputFormat.h" + #if USE_ARROW #include @@ -22,7 +23,7 @@ namespace ErrorCodes } ArrowBlockInputFormat::ArrowBlockInputFormat(ReadBuffer & in_, const Block & header_, bool stream_) - : IInputFormat(header_, in_), stream{stream_} + : IInputFormat(header_, in_), stream{stream_}, arrow_column_to_ch_column(std::make_unique()) { } @@ -63,7 +64,7 @@ Chunk ArrowBlockInputFormat::generate() ++record_batch_current; - arrow_column_to_ch_column.arrowTableToCHChunk(res, *table_result, header, "Arrow"); + arrow_column_to_ch_column->arrowTableToCHChunk(res, *table_result, header, "Arrow"); return res; } diff --git a/src/Processors/Formats/Impl/ArrowBlockInputFormat.h b/src/Processors/Formats/Impl/ArrowBlockInputFormat.h index 6a8acc4a118..3bfead93bf1 100644 --- a/src/Processors/Formats/Impl/ArrowBlockInputFormat.h +++ b/src/Processors/Formats/Impl/ArrowBlockInputFormat.h @@ -3,7 +3,6 @@ #if USE_ARROW #include -#include namespace arrow { class RecordBatchReader; } namespace arrow::ipc { class RecordBatchFileReader; } @@ -12,6 +11,7 @@ namespace DB { class ReadBuffer; +class ArrowColumnToCHColumn; class ArrowBlockInputFormat : public IInputFormat { @@ -33,7 +33,7 @@ private: // The following fields are used only for Arrow format std::shared_ptr file_reader; - ArrowColumnToCHColumn arrow_column_to_ch_column; + std::unique_ptr arrow_column_to_ch_column; int record_batch_total = 0; int record_batch_current = 0; diff --git a/src/Processors/Formats/Impl/ArrowBlockOutputFormat.cpp b/src/Processors/Formats/Impl/ArrowBlockOutputFormat.cpp index d1fdffb700c..9f619320b73 100644 --- a/src/Processors/Formats/Impl/ArrowBlockOutputFormat.cpp +++ b/src/Processors/Formats/Impl/ArrowBlockOutputFormat.cpp @@ -18,7 +18,11 @@ namespace ErrorCodes } ArrowBlockOutputFormat::ArrowBlockOutputFormat(WriteBuffer & out_, const Block & header_, bool stream_, const FormatSettings & format_settings_) - : IOutputFormat(header_, out_), stream{stream_}, format_settings{format_settings_}, arrow_ostream{std::make_shared(out_)} + : IOutputFormat(header_, out_) + , stream{stream_} + , format_settings{format_settings_} + , arrow_ostream{std::make_shared(out_)} + , ch_column_to_arrow_column(std::make_unique()) { } @@ -28,7 +32,7 @@ void ArrowBlockOutputFormat::consume(Chunk chunk) const size_t columns_num = chunk.getNumColumns(); std::shared_ptr arrow_table; - ch_column_to_arrow_column.chChunkToArrowTable(arrow_table, header, chunk, columns_num, "Arrow", format_settings.arrow.low_cardinality_as_dictionary); + ch_column_to_arrow_column->chChunkToArrowTable(arrow_table, header, chunk, columns_num, "Arrow", format_settings.arrow.low_cardinality_as_dictionary); if (!writer) prepareWriter(arrow_table->schema()); diff --git a/src/Processors/Formats/Impl/ArrowBlockOutputFormat.h b/src/Processors/Formats/Impl/ArrowBlockOutputFormat.h index fc8efe62435..40d81f8b919 100644 --- a/src/Processors/Formats/Impl/ArrowBlockOutputFormat.h +++ b/src/Processors/Formats/Impl/ArrowBlockOutputFormat.h @@ -4,7 +4,6 @@ #include #include -#include #include "ArrowBufferedStreams.h" namespace arrow { class Schema; } @@ -13,6 +12,8 @@ namespace arrow::ipc { class RecordBatchWriter; } namespace DB { +class CHColumnToArrowColumn; + class ArrowBlockOutputFormat : public IOutputFormat { public: @@ -29,7 +30,7 @@ private: const FormatSettings format_settings; std::shared_ptr arrow_ostream; std::shared_ptr writer; - CHColumnToArrowColumn ch_column_to_arrow_column; + std::unique_ptr ch_column_to_arrow_column; void prepareWriter(const std::shared_ptr & schema); }; diff --git a/src/Processors/Formats/Impl/ORCBlockInputFormat.cpp b/src/Processors/Formats/Impl/ORCBlockInputFormat.cpp index 0f8ca728c46..bd427bd62e1 100644 --- a/src/Processors/Formats/Impl/ORCBlockInputFormat.cpp +++ b/src/Processors/Formats/Impl/ORCBlockInputFormat.cpp @@ -26,7 +26,7 @@ namespace ErrorCodes throw Exception(_s.ToString(), ErrorCodes::BAD_ARGUMENTS); \ } while (false) -ORCBlockInputFormat::ORCBlockInputFormat(ReadBuffer & in_, Block header_) : IInputFormat(std::move(header_), in_) +ORCBlockInputFormat::ORCBlockInputFormat(ReadBuffer & in_, Block header_) : IInputFormat(std::move(header_), in_), arrow_column_to_ch_column(std::make_unique()) { } @@ -54,7 +54,7 @@ Chunk ORCBlockInputFormat::generate() ++stripe_current; - arrow_column_to_ch_column.arrowTableToCHChunk(res, *table_result, header, "ORC"); + arrow_column_to_ch_column->arrowTableToCHChunk(res, *table_result, header, "ORC"); return res; } diff --git a/src/Processors/Formats/Impl/ORCBlockInputFormat.h b/src/Processors/Formats/Impl/ORCBlockInputFormat.h index 5a6cfd1364a..f27685a9884 100644 --- a/src/Processors/Formats/Impl/ORCBlockInputFormat.h +++ b/src/Processors/Formats/Impl/ORCBlockInputFormat.h @@ -3,12 +3,14 @@ #if USE_ORC #include -#include namespace arrow::adapters::orc { class ORCFileReader; } namespace DB { + +class ArrowColumnToCHColumn; + class ORCBlockInputFormat : public IInputFormat { public: @@ -27,7 +29,7 @@ private: std::unique_ptr file_reader; - ArrowColumnToCHColumn arrow_column_to_ch_column; + std::unique_ptr arrow_column_to_ch_column; int stripe_total = 0; diff --git a/src/Processors/Formats/Impl/ParquetBlockInputFormat.cpp b/src/Processors/Formats/Impl/ParquetBlockInputFormat.cpp index ea7d35f5bbe..c0d9e330df2 100644 --- a/src/Processors/Formats/Impl/ParquetBlockInputFormat.cpp +++ b/src/Processors/Formats/Impl/ParquetBlockInputFormat.cpp @@ -31,7 +31,7 @@ namespace ErrorCodes } while (false) ParquetBlockInputFormat::ParquetBlockInputFormat(ReadBuffer & in_, Block header_) - : IInputFormat(std::move(header_), in_) + : IInputFormat(std::move(header_), in_), arrow_column_to_ch_column(std::make_unique()) { } @@ -54,7 +54,7 @@ Chunk ParquetBlockInputFormat::generate() ++row_group_current; - arrow_column_to_ch_column.arrowTableToCHChunk(res, table, header, "Parquet"); + arrow_column_to_ch_column->arrowTableToCHChunk(res, table, header, "Parquet"); return res; } diff --git a/src/Processors/Formats/Impl/ParquetBlockInputFormat.h b/src/Processors/Formats/Impl/ParquetBlockInputFormat.h index b27bafe04bf..b68f97c005a 100644 --- a/src/Processors/Formats/Impl/ParquetBlockInputFormat.h +++ b/src/Processors/Formats/Impl/ParquetBlockInputFormat.h @@ -4,7 +4,6 @@ #if USE_PARQUET #include -#include namespace parquet::arrow { class FileReader; } @@ -13,6 +12,8 @@ namespace arrow { class Buffer; } namespace DB { +class ArrowColumnToCHColumn; + class ParquetBlockInputFormat : public IInputFormat { public: @@ -33,7 +34,7 @@ private: int row_group_total = 0; // indices of columns to read from Parquet file std::vector column_indices; - ArrowColumnToCHColumn arrow_column_to_ch_column; + std::unique_ptr arrow_column_to_ch_column; int row_group_current = 0; }; diff --git a/src/Processors/Formats/Impl/ParquetBlockOutputFormat.cpp b/src/Processors/Formats/Impl/ParquetBlockOutputFormat.cpp index 1eab69239ca..96ef6702cc4 100644 --- a/src/Processors/Formats/Impl/ParquetBlockOutputFormat.cpp +++ b/src/Processors/Formats/Impl/ParquetBlockOutputFormat.cpp @@ -25,7 +25,7 @@ namespace ErrorCodes } ParquetBlockOutputFormat::ParquetBlockOutputFormat(WriteBuffer & out_, const Block & header_, const FormatSettings & format_settings_) - : IOutputFormat(header_, out_), format_settings{format_settings_} + : IOutputFormat(header_, out_), format_settings{format_settings_}, ch_column_to_arrow_column(std::make_unique()) { } @@ -35,7 +35,7 @@ void ParquetBlockOutputFormat::consume(Chunk chunk) const size_t columns_num = chunk.getNumColumns(); std::shared_ptr arrow_table; - ch_column_to_arrow_column.chChunkToArrowTable(arrow_table, header, chunk, columns_num, "Parquet"); + ch_column_to_arrow_column->chChunkToArrowTable(arrow_table, header, chunk, columns_num, "Parquet"); if (!file_writer) { diff --git a/src/Processors/Formats/Impl/ParquetBlockOutputFormat.h b/src/Processors/Formats/Impl/ParquetBlockOutputFormat.h index deb011e0274..8114d1ab494 100644 --- a/src/Processors/Formats/Impl/ParquetBlockOutputFormat.h +++ b/src/Processors/Formats/Impl/ParquetBlockOutputFormat.h @@ -4,7 +4,6 @@ #if USE_PARQUET # include # include -# include namespace arrow { @@ -22,6 +21,9 @@ namespace arrow namespace DB { + +class CHColumnToArrowColumn; + class ParquetBlockOutputFormat : public IOutputFormat { public: @@ -37,7 +39,7 @@ private: const FormatSettings format_settings; std::unique_ptr file_writer; - CHColumnToArrowColumn ch_column_to_arrow_column; + std::unique_ptr ch_column_to_arrow_column; }; }