diff --git a/src/Formats/NativeReader.cpp b/src/Formats/NativeReader.cpp index ed3aca43d52..3ad0ce5cfc4 100644 --- a/src/Formats/NativeReader.cpp +++ b/src/Formats/NativeReader.cpp @@ -23,6 +23,7 @@ namespace ErrorCodes extern const int INCORRECT_INDEX; extern const int LOGICAL_ERROR; extern const int CANNOT_READ_ALL_DATA; + extern const int INCORRECT_DATA; } @@ -31,8 +32,8 @@ NativeReader::NativeReader(ReadBuffer & istr_, UInt64 server_revision_) { } -NativeReader::NativeReader(ReadBuffer & istr_, const Block & header_, UInt64 server_revision_) - : istr(istr_), header(header_), server_revision(server_revision_) +NativeReader::NativeReader(ReadBuffer & istr_, const Block & header_, UInt64 server_revision_, bool skip_unknown_columns_) + : istr(istr_), header(header_), server_revision(server_revision_), skip_unknown_columns(skip_unknown_columns_) { } @@ -186,18 +187,29 @@ Block NativeReader::read() column.column = std::move(read_column); + bool use_in_result = true; if (header) { - /// Support insert from old clients without low cardinality type. - auto & header_column = header.getByName(column.name); - if (!header_column.type->equals(*column.type)) + if (header.has(column.name)) { - column.column = recursiveTypeConversion(column.column, column.type, header.safeGetByPosition(i).type); - column.type = header.safeGetByPosition(i).type; + /// Support insert from old clients without low cardinality type. + auto & header_column = header.getByName(column.name); + if (!header_column.type->equals(*column.type)) + { + column.column = recursiveTypeConversion(column.column, column.type, header.safeGetByPosition(i).type); + column.type = header.safeGetByPosition(i).type; + } + } + else + { + if (!skip_unknown_columns) + throw Exception(ErrorCodes::INCORRECT_DATA, "Unknown column with name {} found while reading data in Native format", column.name); + use_in_result = false; } } - res.insert(std::move(column)); + if (use_in_result) + res.insert(std::move(column)); if (use_index) ++index_column_it; diff --git a/src/Formats/NativeReader.h b/src/Formats/NativeReader.h index 1f9eb8b9764..3ae53d45faf 100644 --- a/src/Formats/NativeReader.h +++ b/src/Formats/NativeReader.h @@ -24,7 +24,7 @@ public: /// For cases when data structure (header) is known in advance. /// NOTE We may use header for data validation and/or type conversions. It is not implemented. - NativeReader(ReadBuffer & istr_, const Block & header_, UInt64 server_revision_); + NativeReader(ReadBuffer & istr_, const Block & header_, UInt64 server_revision_, bool skip_unknown_columns_ = false); /// For cases when we have an index. It allows to skip columns. Only columns specified in the index will be read. NativeReader(ReadBuffer & istr_, UInt64 server_revision_, @@ -43,6 +43,7 @@ private: ReadBuffer & istr; Block header; UInt64 server_revision; + bool skip_unknown_columns; bool use_index = false; IndexForNativeFormat::Blocks::const_iterator index_block_it; diff --git a/src/IO/ReadHelpers.cpp b/src/IO/ReadHelpers.cpp index 71ed197da3e..f8226b137fb 100644 --- a/src/IO/ReadHelpers.cpp +++ b/src/IO/ReadHelpers.cpp @@ -703,9 +703,9 @@ void readCSVStringInto(Vector & s, ReadBuffer & buf, const FormatSettings::CSV & if constexpr (!std::is_same_v) { /** CSV format can contain insignificant spaces and tabs. - * Usually the task of skipping them is for the calling code. - * But in this case, it will be difficult to do this, so remove the trailing whitespace by ourself. - */ + * Usually the task of skipping them is for the calling code. + * But in this case, it will be difficult to do this, so remove the trailing whitespace by ourself. + */ size_t size = s.size(); while (size > 0 && (s[size - 1] == ' ' || s[size - 1] == '\t')) --size; diff --git a/src/Processors/Formats/Impl/NativeFormat.cpp b/src/Processors/Formats/Impl/NativeFormat.cpp index c1dc60022f5..18c3dfa3d4b 100644 --- a/src/Processors/Formats/Impl/NativeFormat.cpp +++ b/src/Processors/Formats/Impl/NativeFormat.cpp @@ -15,9 +15,9 @@ namespace DB class NativeInputFormat final : public IInputFormat { public: - NativeInputFormat(ReadBuffer & buf, const Block & header_) + NativeInputFormat(ReadBuffer & buf, const Block & header_, const FormatSettings & settings) : IInputFormat(header_, buf) - , reader(std::make_unique(buf, header_, 0)) + , reader(std::make_unique(buf, header_, 0, settings.skip_unknown_fields)) , header(header_) {} String getName() const override { return "Native"; } @@ -112,10 +112,11 @@ void registerInputFormatNative(FormatFactory & factory) ReadBuffer & buf, const Block & sample, const RowInputFormatParams &, - const FormatSettings &) + const FormatSettings & settings) { - return std::make_shared(buf, sample); + return std::make_shared(buf, sample, settings); }); + factory.markFormatSupportsSamplingColumns("Native"); } void registerOutputFormatNative(FormatFactory & factory) diff --git a/tests/performance/formats_columns_sampling.xml b/tests/performance/formats_columns_sampling.xml index 7812823a175..f5dd4395de1 100644 --- a/tests/performance/formats_columns_sampling.xml +++ b/tests/performance/formats_columns_sampling.xml @@ -19,6 +19,7 @@ ORC Parquet Arrow + Native