From 30394113a15018a9f0c6b46f7d90c26ac948efa0 Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Tue, 10 Sep 2019 17:16:31 +0300 Subject: [PATCH 1/8] Add LowCardinality conversion to Native format if types of columns are not equals. --- dbms/src/DataStreams/NativeBlockInputStream.cpp | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/dbms/src/DataStreams/NativeBlockInputStream.cpp b/dbms/src/DataStreams/NativeBlockInputStream.cpp index 2dbedd01b38..ce72325f351 100644 --- a/dbms/src/DataStreams/NativeBlockInputStream.cpp +++ b/dbms/src/DataStreams/NativeBlockInputStream.cpp @@ -155,10 +155,13 @@ Block NativeBlockInputStream::readImpl() /// Support insert from old clients without low cardinality type. bool revision_without_low_cardinality = server_revision && server_revision < DBMS_MIN_REVISION_WITH_LOW_CARDINALITY_TYPE; - if (header && (convert_types_to_low_cardinality || revision_without_low_cardinality)) + if (header && (convert_types_to_low_cardinality + || revision_without_low_cardinality + || header.getByPosition(i).type->equals(*column.type))) { column.column = recursiveLowCardinalityConversion(column.column, column.type, header.getByPosition(i).type); column.type = header.getByPosition(i).type; + column.name = header.getByPosition(i).name; } res.insert(std::move(column)); From 071b5dd514f7a5ba02d5341c5b55fbbb49ffb8f9 Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Tue, 10 Sep 2019 17:30:13 +0300 Subject: [PATCH 2/8] Add LowCardinality conversion to Native format if types of columns are not equals. --- dbms/src/DataStreams/NativeBlockInputStream.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dbms/src/DataStreams/NativeBlockInputStream.cpp b/dbms/src/DataStreams/NativeBlockInputStream.cpp index ce72325f351..e77540cfb11 100644 --- a/dbms/src/DataStreams/NativeBlockInputStream.cpp +++ b/dbms/src/DataStreams/NativeBlockInputStream.cpp @@ -157,7 +157,7 @@ Block NativeBlockInputStream::readImpl() bool revision_without_low_cardinality = server_revision && server_revision < DBMS_MIN_REVISION_WITH_LOW_CARDINALITY_TYPE; if (header && (convert_types_to_low_cardinality || revision_without_low_cardinality - || header.getByPosition(i).type->equals(*column.type))) + || !header.getByPosition(i).type->equals(*column.type))) { column.column = recursiveLowCardinalityConversion(column.column, column.type, header.getByPosition(i).type); column.type = header.getByPosition(i).type; From 8dd8ab7d1fb6cdcbcdf5087065dada70e104d9d2 Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Tue, 10 Sep 2019 19:31:41 +0300 Subject: [PATCH 3/8] Use header in native format created in InterpreterSelectQuery. Fill missed columns in Native format. --- dbms/src/DataStreams/NativeBlockInputStream.cpp | 8 ++++++++ dbms/src/Interpreters/InterpreterInsertQuery.cpp | 5 +---- 2 files changed, 9 insertions(+), 4 deletions(-) diff --git a/dbms/src/DataStreams/NativeBlockInputStream.cpp b/dbms/src/DataStreams/NativeBlockInputStream.cpp index e77540cfb11..94b3a0d5917 100644 --- a/dbms/src/DataStreams/NativeBlockInputStream.cpp +++ b/dbms/src/DataStreams/NativeBlockInputStream.cpp @@ -180,6 +180,14 @@ Block NativeBlockInputStream::readImpl() index_column_it = index_block_it->columns.begin(); } + if (rows && header) + { + /// Allow to skip columns. Fill them with default values. + for (auto & col : header) + if (!res.has(col.name)) + res.insert({col.type->createColumnConstWithDefaultValue(rows), col.type, col.name}); + } + return res; } diff --git a/dbms/src/Interpreters/InterpreterInsertQuery.cpp b/dbms/src/Interpreters/InterpreterInsertQuery.cpp index 974b55f446b..18652083f06 100644 --- a/dbms/src/Interpreters/InterpreterInsertQuery.cpp +++ b/dbms/src/Interpreters/InterpreterInsertQuery.cpp @@ -65,10 +65,7 @@ Block InterpreterInsertQuery::getSampleBlock(const ASTInsertQuery & query, const /// If the query does not include information about columns if (!query.columns) { - /// Format Native ignores header and write blocks as is. - if (query.format == "Native") - return {}; - else if (query.no_destination) + if (query.no_destination) return table->getSampleBlockWithVirtuals(); else return table_sample_non_materialized; From 7ddc8a6dded3ccb8f9007c543ee5780585767269 Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Tue, 10 Sep 2019 19:41:05 +0300 Subject: [PATCH 4/8] Use header in native format created in InterpreterSelectQuery. Fill missed columns in Native format. --- dbms/src/DataStreams/NativeBlockInputStream.cpp | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/dbms/src/DataStreams/NativeBlockInputStream.cpp b/dbms/src/DataStreams/NativeBlockInputStream.cpp index 94b3a0d5917..9e6408decab 100644 --- a/dbms/src/DataStreams/NativeBlockInputStream.cpp +++ b/dbms/src/DataStreams/NativeBlockInputStream.cpp @@ -183,9 +183,17 @@ Block NativeBlockInputStream::readImpl() if (rows && header) { /// Allow to skip columns. Fill them with default values. + Block tmp_res; + for (auto & col : header) - if (!res.has(col.name)) - res.insert({col.type->createColumnConstWithDefaultValue(rows), col.type, col.name}); + { + if (res.has(col.name)) + tmp_res.insert(std::move(res.getByName(col.name))); + else + tmp_res.insert({col.type->createColumnConstWithDefaultValue(rows), col.type, col.name}); + } + + res.swap(tmp_res); } return res; From bcc764e2cdfa7ae5c65dfc84d893d375769a756c Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Tue, 10 Sep 2019 19:45:53 +0300 Subject: [PATCH 5/8] Use header in native format created in InterpreterSelectQuery. Fill missed columns in Native format. --- dbms/src/DataStreams/NativeBlockInputStream.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dbms/src/DataStreams/NativeBlockInputStream.cpp b/dbms/src/DataStreams/NativeBlockInputStream.cpp index 9e6408decab..2ffd92a377a 100644 --- a/dbms/src/DataStreams/NativeBlockInputStream.cpp +++ b/dbms/src/DataStreams/NativeBlockInputStream.cpp @@ -190,7 +190,7 @@ Block NativeBlockInputStream::readImpl() if (res.has(col.name)) tmp_res.insert(std::move(res.getByName(col.name))); else - tmp_res.insert({col.type->createColumnConstWithDefaultValue(rows), col.type, col.name}); + tmp_res.insert({col.type->createColumn()->cloneResized(rows), col.type, col.name}); } res.swap(tmp_res); From 959744fede85dd0a5b10b41d2addb2ad70a3a6c6 Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Thu, 12 Sep 2019 12:34:47 +0300 Subject: [PATCH 6/8] Address header by name in NativeBlockInputStream. --- dbms/src/DataStreams/NativeBlockInputStream.cpp | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/dbms/src/DataStreams/NativeBlockInputStream.cpp b/dbms/src/DataStreams/NativeBlockInputStream.cpp index 2ffd92a377a..9be39f2fb68 100644 --- a/dbms/src/DataStreams/NativeBlockInputStream.cpp +++ b/dbms/src/DataStreams/NativeBlockInputStream.cpp @@ -153,15 +153,15 @@ Block NativeBlockInputStream::readImpl() column.column = std::move(read_column); - /// Support insert from old clients without low cardinality type. - bool revision_without_low_cardinality = server_revision && server_revision < DBMS_MIN_REVISION_WITH_LOW_CARDINALITY_TYPE; - if (header && (convert_types_to_low_cardinality - || revision_without_low_cardinality - || !header.getByPosition(i).type->equals(*column.type))) + if (header) { - column.column = recursiveLowCardinalityConversion(column.column, column.type, header.getByPosition(i).type); - column.type = header.getByPosition(i).type; - column.name = header.getByPosition(i).name; + /// Support insert from old clients without low cardinality type. + auto & header_column = header.getByName(column.name); + if (!header_column.type->equals(*column.type)) + { + column.column = recursiveLowCardinalityConversion(column.column, column.type, header.getByPosition(i).type); + column.type = header.getByPosition(i).type; + } } res.insert(std::move(column)); From 4115d79a3020fba79e2eb44f056e4f457a72374c Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Thu, 12 Sep 2019 12:51:29 +0300 Subject: [PATCH 7/8] Added test for LowCardinality conversions in Native over http. --- ..._low_cardinality_and_native_http.reference | 4 ++++ .../01010_low_cardinality_and_native_http.sh | 21 +++++++++++++++++++ 2 files changed, 25 insertions(+) create mode 100644 dbms/tests/queries/0_stateless/01010_low_cardinality_and_native_http.reference create mode 100755 dbms/tests/queries/0_stateless/01010_low_cardinality_and_native_http.sh diff --git a/dbms/tests/queries/0_stateless/01010_low_cardinality_and_native_http.reference b/dbms/tests/queries/0_stateless/01010_low_cardinality_and_native_http.reference new file mode 100644 index 00000000000..0589451d40c --- /dev/null +++ b/dbms/tests/queries/0_stateless/01010_low_cardinality_and_native_http.reference @@ -0,0 +1,4 @@ +abc +---- +abc +abc diff --git a/dbms/tests/queries/0_stateless/01010_low_cardinality_and_native_http.sh b/dbms/tests/queries/0_stateless/01010_low_cardinality_and_native_http.sh new file mode 100755 index 00000000000..a3149294c40 --- /dev/null +++ b/dbms/tests/queries/0_stateless/01010_low_cardinality_and_native_http.sh @@ -0,0 +1,21 @@ +#!/usr/bin/env bash + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +. $CURDIR/../shell_config.sh + + +$CLICKHOUSE_CLIENT --query="drop table if exists tab_str"; +$CLICKHOUSE_CLIENT --query="drop table if exists tab_str_lc"; + +$CLICKHOUSE_CLIENT --query="create table tab_str (x String) engine = MergeTree order by tuple()"; +$CLICKHOUSE_CLIENT --query="create table tab_str_lc (x LowCardinality(String)) engine = MergeTree order by tuple()"; +$CLICKHOUSE_CLIENT --query="insert into tab_str values ('abc')"; + +${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL_PARAMS}&query=select+x+from+tab_str+format+Native" | ${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL_PARAMS}&query=INSERT+INTO+tab_str_lc+FORMAT+Native" --data-binary @- + +$CLICKHOUSE_CLIENT --query="select x from tab_str_lc"; + +${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL_PARAMS}&query=select+x+from+tab_str_lc+format+Native" | ${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL_PARAMS}&query=INSERT+INTO+tab_str+FORMAT+Native" --data-binary @- + +$CLICKHOUSE_CLIENT --query="select '----'"; +$CLICKHOUSE_CLIENT --query="select x from tab_str"; From d7596e51b714d4af654ebe9977f6f55d10ab34d7 Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Thu, 12 Sep 2019 14:33:46 +0300 Subject: [PATCH 8/8] Removed convert_types_to_low_cardinality from NativeBlockInputStream. --- dbms/programs/server/TCPHandler.cpp | 6 ++---- dbms/src/DataStreams/NativeBlockInputStream.cpp | 4 ++-- dbms/src/DataStreams/NativeBlockInputStream.h | 4 +--- 3 files changed, 5 insertions(+), 9 deletions(-) diff --git a/dbms/programs/server/TCPHandler.cpp b/dbms/programs/server/TCPHandler.cpp index 7edb119f827..38eec85fbb4 100644 --- a/dbms/programs/server/TCPHandler.cpp +++ b/dbms/programs/server/TCPHandler.cpp @@ -1000,8 +1000,7 @@ void TCPHandler::receiveUnexpectedData() auto skip_block_in = std::make_shared( *maybe_compressed_in, last_block_in.header, - client_revision, - !connection_context.getSettingsRef().low_cardinality_allow_in_native_format); + client_revision); Block skip_block = skip_block_in->read(); throw NetException("Unexpected packet Data received from client", ErrorCodes::UNEXPECTED_PACKET_FROM_CLIENT); @@ -1028,8 +1027,7 @@ void TCPHandler::initBlockInput() state.block_in = std::make_shared( *state.maybe_compressed_in, header, - client_revision, - !connection_context.getSettingsRef().low_cardinality_allow_in_native_format); + client_revision); } } diff --git a/dbms/src/DataStreams/NativeBlockInputStream.cpp b/dbms/src/DataStreams/NativeBlockInputStream.cpp index 9be39f2fb68..659cfcbdfca 100644 --- a/dbms/src/DataStreams/NativeBlockInputStream.cpp +++ b/dbms/src/DataStreams/NativeBlockInputStream.cpp @@ -29,8 +29,8 @@ NativeBlockInputStream::NativeBlockInputStream(ReadBuffer & istr_, UInt64 server { } -NativeBlockInputStream::NativeBlockInputStream(ReadBuffer & istr_, const Block & header_, UInt64 server_revision_, bool convert_types_to_low_cardinality_) - : istr(istr_), header(header_), server_revision(server_revision_), convert_types_to_low_cardinality(convert_types_to_low_cardinality_) +NativeBlockInputStream::NativeBlockInputStream(ReadBuffer & istr_, const Block & header_, UInt64 server_revision_) + : istr(istr_), header(header_), server_revision(server_revision_) { } diff --git a/dbms/src/DataStreams/NativeBlockInputStream.h b/dbms/src/DataStreams/NativeBlockInputStream.h index 72f6e0d211b..0502d077e3a 100644 --- a/dbms/src/DataStreams/NativeBlockInputStream.h +++ b/dbms/src/DataStreams/NativeBlockInputStream.h @@ -65,7 +65,7 @@ public: /// For cases when data structure (header) is known in advance. /// NOTE We may use header for data validation and/or type conversions. It is not implemented. - NativeBlockInputStream(ReadBuffer & istr_, const Block & header_, UInt64 server_revision_, bool convert_types_to_low_cardinality_ = false); + NativeBlockInputStream(ReadBuffer & istr_, const Block & header_, UInt64 server_revision_); /// For cases when we have an index. It allows to skip columns. Only columns specified in the index will be read. NativeBlockInputStream(ReadBuffer & istr_, UInt64 server_revision_, @@ -91,8 +91,6 @@ private: IndexForNativeFormat::Blocks::const_iterator index_block_end; IndexOfBlockForNativeFormat::Columns::const_iterator index_column_it; - bool convert_types_to_low_cardinality = false; - /// If an index is specified, then `istr` must be CompressedReadBufferFromFile. Unused otherwise. CompressedReadBufferFromFile * istr_concrete = nullptr;