From e284bf83d07ed5063d1c330cf09ed93514b933d9 Mon Sep 17 00:00:00 2001 From: hcz Date: Wed, 1 Jul 2020 11:21:53 +0800 Subject: [PATCH] Optimize and fix --- .../Impl/TabSeparatedRawRowInputFormat.h | 21 +++++-------------- .../01324_insert_tsv_raw.reference | 3 ++- .../0_stateless/01324_insert_tsv_raw.sql | 7 ++++--- 3 files changed, 11 insertions(+), 20 deletions(-) diff --git a/src/Processors/Formats/Impl/TabSeparatedRawRowInputFormat.h b/src/Processors/Formats/Impl/TabSeparatedRawRowInputFormat.h index c5982714e34..fe113c60a0d 100644 --- a/src/Processors/Formats/Impl/TabSeparatedRawRowInputFormat.h +++ b/src/Processors/Formats/Impl/TabSeparatedRawRowInputFormat.h @@ -10,7 +10,7 @@ namespace DB { /** A stream to input data in tsv format, but without escaping individual values. - * It only supports one string column + * It only supports columns without '\n' or '\t' */ class TabSeparatedRawRowInputFormat : public TabSeparatedRowInputFormat { @@ -33,23 +33,12 @@ public: bool readField(IColumn & column, const DataTypePtr & type, bool) override { - // TODO: possible to optimize - std::string buf; + char * pos = find_first_symbols<'\n', '\t'>(in.position(), in.buffer().end()); + ReadBufferFromMemory cell(in.position(), pos - in.position()); - while (!in.eof()) - { - char c = *in.position(); + type->deserializeAsWholeText(column, cell, format_settings); - if (c == '\n' || c == '\t') - break; - - in.ignore(); - buf.push_back(c); - } - - ReadBufferFromString line_in(buf); - - type->deserializeAsWholeText(column, line_in, format_settings); + in.position() = pos; return true; } diff --git a/tests/queries/0_stateless/01324_insert_tsv_raw.reference b/tests/queries/0_stateless/01324_insert_tsv_raw.reference index f0f74866cd2..ce809415693 100644 --- a/tests/queries/0_stateless/01324_insert_tsv_raw.reference +++ b/tests/queries/0_stateless/01324_insert_tsv_raw.reference @@ -1 +1,2 @@ -"a 1 +"a 1 \ \\ "\"" "\\"" +["\"a ", "1", "\\", "\\\\", "\"\\\"\"", "\"\\\\\"\""] diff --git a/tests/queries/0_stateless/01324_insert_tsv_raw.sql b/tests/queries/0_stateless/01324_insert_tsv_raw.sql index 4bcc3a2268b..c3812730e5b 100644 --- a/tests/queries/0_stateless/01324_insert_tsv_raw.sql +++ b/tests/queries/0_stateless/01324_insert_tsv_raw.sql @@ -1,7 +1,8 @@ drop table if exists tsv_raw; -create table tsv_raw (a String, b Int64) engine = Memory; -insert into tsv_raw format TSVRaw "a 1 +create table tsv_raw (strval String, intval Int64, b1 String, b2 String, b3 String, b4 String) engine = Memory; +insert into tsv_raw format TSVRaw "a 1 \ \\ "\"" "\\"" ; -select * from tsv_raw; +select * from tsv_raw format TSVRaw; +select * from tsv_raw format JSONCompactEachRow; drop table tsv_raw;