From bca580653dc06493a1cea06753a5078983803082 Mon Sep 17 00:00:00 2001 From: Alexander Tokmakov Date: Tue, 20 Aug 2019 02:09:02 +0300 Subject: [PATCH] fix csv input --- dbms/src/Formats/CSVRowInputStream.cpp | 10 ++++------ dbms/src/Processors/Formats/Impl/CSVRowInputFormat.cpp | 10 ++++------ 2 files changed, 8 insertions(+), 12 deletions(-) diff --git a/dbms/src/Formats/CSVRowInputStream.cpp b/dbms/src/Formats/CSVRowInputStream.cpp index 662e6306e25..4b1b6198486 100644 --- a/dbms/src/Formats/CSVRowInputStream.cpp +++ b/dbms/src/Formats/CSVRowInputStream.cpp @@ -348,10 +348,9 @@ bool OPTIMIZE(1) CSVRowInputStream::parseRowAndPrintDiagnosticInfo(MutableColumn const auto & current_column_type = data_types[table_column]; const bool is_last_file_column = file_column + 1 == column_indexes_for_input_fields.size(); - const bool at_delimiter = *istr.position() == delimiter; + const bool at_delimiter = !istr.eof() && *istr.position() == delimiter; const bool at_last_column_line_end = is_last_file_column - && (*istr.position() == '\n' || *istr.position() == '\r' - || istr.eof()); + && (istr.eof() || *istr.position() == '\n' || *istr.position() == '\r'); out << "Column " << file_column << ", " << std::string((file_column < 10 ? 2 : file_column < 100 ? 1 : 0), ' ') << "name: " << header.safeGetByPosition(table_column).name << ", " << std::string(max_length_of_column_name - header.safeGetByPosition(table_column).name.size(), ' ') @@ -514,10 +513,9 @@ void CSVRowInputStream::updateDiagnosticInfo() bool CSVRowInputStream::readField(IColumn & column, const DataTypePtr & type, bool is_last_file_column, size_t column_idx) { - const bool at_delimiter = *istr.position() == format_settings.csv.delimiter; + const bool at_delimiter = !istr.eof() || *istr.position() == format_settings.csv.delimiter; const bool at_last_column_line_end = is_last_file_column - && (*istr.position() == '\n' || *istr.position() == '\r' - || istr.eof()); + && (istr.eof() || *istr.position() == '\n' || *istr.position() == '\r'); if (format_settings.csv.empty_as_default && (at_delimiter || at_last_column_line_end)) diff --git a/dbms/src/Processors/Formats/Impl/CSVRowInputFormat.cpp b/dbms/src/Processors/Formats/Impl/CSVRowInputFormat.cpp index b13436a6600..2b8c1be6016 100644 --- a/dbms/src/Processors/Formats/Impl/CSVRowInputFormat.cpp +++ b/dbms/src/Processors/Formats/Impl/CSVRowInputFormat.cpp @@ -349,10 +349,9 @@ bool OPTIMIZE(1) CSVRowInputFormat::parseRowAndPrintDiagnosticInfo(MutableColumn const auto & current_column_type = data_types[table_column]; const bool is_last_file_column = file_column + 1 == column_indexes_for_input_fields.size(); - const bool at_delimiter = *in.position() == delimiter; + const bool at_delimiter = !in.eof() && *in.position() == delimiter; const bool at_last_column_line_end = is_last_file_column - && (*in.position() == '\n' || *in.position() == '\r' - || in.eof()); + && (in.eof() || *in.position() == '\n' || *in.position() == '\r'); auto & header = getPort().getHeader(); out << "Column " << file_column << ", " << std::string((file_column < 10 ? 2 : file_column < 100 ? 1 : 0), ' ') @@ -516,10 +515,9 @@ void CSVRowInputFormat::updateDiagnosticInfo() bool CSVRowInputFormat::readField(IColumn & column, const DataTypePtr & type, bool is_last_file_column, size_t column_idx) { - const bool at_delimiter = *in.position() == format_settings.csv.delimiter; + const bool at_delimiter = !in.eof() && *in.position() == format_settings.csv.delimiter; const bool at_last_column_line_end = is_last_file_column - && (*in.position() == '\n' || *in.position() == '\r' - || in.eof()); + && (in.eof() || *in.position() == '\n' || *in.position() == '\r'); if (format_settings.csv.empty_as_default && (at_delimiter || at_last_column_line_end))