mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-11-24 16:42:05 +00:00
add parquet max_block_size setting
This commit is contained in:
parent
cda9b0beea
commit
17fc22a21e
@ -774,6 +774,7 @@ class IColumn;
|
||||
M(Bool, input_format_tsv_detect_header, true, "Automatically detect header with names and types in TSV format", 0) \
|
||||
M(Bool, input_format_custom_detect_header, true, "Automatically detect header with names and types in CustomSeparated format", 0) \
|
||||
M(Bool, input_format_parquet_skip_columns_with_unsupported_types_in_schema_inference, false, "Skip columns with unsupported types while schema inference for format Parquet", 0) \
|
||||
M(UInt64, input_format_parquet_max_block_size, 8192, "Max block size for parquet reader.", 0) \
|
||||
M(Bool, input_format_protobuf_skip_fields_with_unsupported_types_in_schema_inference, false, "Skip fields with unsupported types while schema inference for format Protobuf", 0) \
|
||||
M(Bool, input_format_capn_proto_skip_fields_with_unsupported_types_in_schema_inference, false, "Skip columns with unsupported types while schema inference for format CapnProto", 0) \
|
||||
M(Bool, input_format_orc_skip_columns_with_unsupported_types_in_schema_inference, false, "Skip columns with unsupported types while schema inference for format ORC", 0) \
|
||||
|
@ -116,6 +116,7 @@ FormatSettings getFormatSettings(ContextPtr context, const Settings & settings)
|
||||
format_settings.parquet.allow_missing_columns = settings.input_format_parquet_allow_missing_columns;
|
||||
format_settings.parquet.skip_columns_with_unsupported_types_in_schema_inference = settings.input_format_parquet_skip_columns_with_unsupported_types_in_schema_inference;
|
||||
format_settings.parquet.output_string_as_string = settings.output_format_parquet_string_as_string;
|
||||
format_settings.parquet.max_block_size = settings.input_format_parquet_max_block_size;
|
||||
format_settings.pretty.charset = settings.output_format_pretty_grid_charset.toString() == "ASCII" ? FormatSettings::Pretty::Charset::ASCII : FormatSettings::Pretty::Charset::UTF8;
|
||||
format_settings.pretty.color = settings.output_format_pretty_color;
|
||||
format_settings.pretty.max_column_pad_width = settings.output_format_pretty_max_column_pad_width;
|
||||
|
@ -183,6 +183,7 @@ struct FormatSettings
|
||||
bool case_insensitive_column_matching = false;
|
||||
std::unordered_set<int> skip_row_groups = {};
|
||||
bool output_string_as_string = false;
|
||||
UInt64 max_block_size = 8192;
|
||||
} parquet;
|
||||
|
||||
struct Pretty
|
||||
|
@ -47,23 +47,27 @@ Chunk ParquetBlockInputFormat::generate()
|
||||
if (!file_reader)
|
||||
{
|
||||
prepareReader();
|
||||
/// It may be necessary to add a parameter
|
||||
file_reader->set_batch_size(8192);
|
||||
file_reader->set_batch_size(format_settings.parquet.max_block_size);
|
||||
std::vector<int> row_group_indices;
|
||||
for (int i = 0; i < file_reader->num_row_groups(); ++i)
|
||||
for (int i = 0; i < row_group_total; ++i)
|
||||
{
|
||||
if (!skip_row_groups.contains(i))
|
||||
row_group_indices.emplace_back(i);
|
||||
}
|
||||
auto read_status = file_reader->GetRecordBatchReader(row_group_indices, column_indices, ¤t_record_batch_reader);
|
||||
if (!read_status.ok())
|
||||
throw DB::Exception(ErrorCodes::CANNOT_READ_ALL_DATA, "Error while reading Parquet data: {}", read_status.ToString());
|
||||
throw DB::ParsingException(ErrorCodes::CANNOT_READ_ALL_DATA, "Error while reading Parquet data: {}", read_status.ToString());
|
||||
}
|
||||
|
||||
if (is_stopped)
|
||||
return {};
|
||||
|
||||
auto batch = current_record_batch_reader->Next();
|
||||
if (!batch.ok())
|
||||
{
|
||||
throw ParsingException(ErrorCodes::CANNOT_READ_ALL_DATA, "Error while reading Parquet data: {}",
|
||||
batch.status().ToString());
|
||||
}
|
||||
if (*batch)
|
||||
{
|
||||
auto tmp_table = arrow::Table::FromRecordBatches({*batch});
|
||||
|
Loading…
Reference in New Issue
Block a user