From f379d9cac5cb63370ecf364ed92d9045207bfc93 Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Wed, 11 Oct 2023 20:38:40 +0200 Subject: [PATCH 1/4] Fix checking of non handled data for Values format PeekableReadBuffer::hasUnreadData() does not checks the underlying buffer, and so it simply ignore some issues, like: INSERT INTO test_01179_str values ('foo'); ('bar') Signed-off-by: Azat Khuzhin --- .../Formats/Impl/ValuesBlockInputFormat.cpp | 4 ++-- .../01179_insert_values_semicolon.expect | 13 +++++++------ 2 files changed, 9 insertions(+), 8 deletions(-) diff --git a/src/Processors/Formats/Impl/ValuesBlockInputFormat.cpp b/src/Processors/Formats/Impl/ValuesBlockInputFormat.cpp index 8126c472f70..4a209fba3c5 100644 --- a/src/Processors/Formats/Impl/ValuesBlockInputFormat.cpp +++ b/src/Processors/Formats/Impl/ValuesBlockInputFormat.cpp @@ -615,12 +615,12 @@ void ValuesBlockInputFormat::readSuffix() { ++buf->position(); skipWhitespaceIfAny(*buf); - if (buf->hasUnreadData()) + if (buf->hasUnreadData() || !buf->eof()) throw Exception(ErrorCodes::CANNOT_READ_ALL_DATA, "Cannot read data after semicolon"); return; } - if (buf->hasUnreadData()) + if (buf->hasUnreadData() || !buf->eof()) throw Exception(ErrorCodes::LOGICAL_ERROR, "Unread data in PeekableReadBuffer will be lost. Most likely it's a bug."); } diff --git a/tests/queries/0_stateless/01179_insert_values_semicolon.expect b/tests/queries/0_stateless/01179_insert_values_semicolon.expect index cb22fb8265c..072be483e4f 100755 --- a/tests/queries/0_stateless/01179_insert_values_semicolon.expect +++ b/tests/queries/0_stateless/01179_insert_values_semicolon.expect @@ -21,20 +21,21 @@ expect ":) " send -- "DROP TABLE IF EXISTS test_01179\r" expect "Ok." -send -- "CREATE TABLE test_01179 (date DateTime64(3)) ENGINE=Memory()\r" +send -- "CREATE TABLE test_01179 (val String) ENGINE=Memory()\r" expect "Ok." -send -- "INSERT INTO test_01179 values ('2020-01-01')\r" +send -- "INSERT INTO test_01179 values ('foo')\r" + expect "Ok." -send -- "INSERT INTO test_01179 values ('2020-01-01'); \r" +send -- "INSERT INTO test_01179 values ('foo'); \r" expect "Ok." -send -- "INSERT INTO test_01179 values ('2020-01-01 0'); (1) \r" +send -- "INSERT INTO test_01179 values ('foo'); ('bar') \r" expect "Cannot read data after semicolon" -send -- "SELECT date, count() FROM test_01179 GROUP BY date FORMAT TSV\r" -expect "2020-01-01 00:00:00.000\t2" +send -- "SELECT val, count() FROM test_01179 GROUP BY val FORMAT TSV\r" +expect "foo\t2" send -- "DROP TABLE test_01179\r" expect "Ok." From a5cced2b9e031d47d0f3a59f165c46590eaeed99 Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Thu, 12 Oct 2023 12:24:21 +0200 Subject: [PATCH 2/4] Move comment into correct place Signed-off-by: Azat Khuzhin --- src/Client/ClientBase.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/Client/ClientBase.cpp b/src/Client/ClientBase.cpp index 9ca104ff942..691f488172e 100644 --- a/src/Client/ClientBase.cpp +++ b/src/Client/ClientBase.cpp @@ -2020,9 +2020,6 @@ bool ClientBase::executeMultiQuery(const String & all_queries_text) { bool echo_query = echo_queries; - /// Test tags are started with "--" so they are interpreted as comments anyway. - /// But if the echo is enabled we have to remove the test tags from `all_queries_text` - /// because we don't want test tags to be echoed. { /// disable logs if expects errors TestHint test_hint(all_queries_text); @@ -2030,6 +2027,9 @@ bool ClientBase::executeMultiQuery(const String & all_queries_text) processTextAsSingleQuery("SET send_logs_level = 'fatal'"); } + /// Test tags are started with "--" so they are interpreted as comments anyway. + /// But if the echo is enabled we have to remove the test tags from `all_queries_text` + /// because we don't want test tags to be echoed. size_t test_tags_length = getTestTagsLength(all_queries_text); /// Several queries separated by ';'. From 2cbb069b68f480e1ccfea70dd3120d6dabaf1149 Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Thu, 12 Oct 2023 12:39:08 +0200 Subject: [PATCH 3/4] Add ability to ignore data after semicolon in Values format This is required for client, to handle comments in multiquery mode. v0: separate context for input format v2: cannot use separate context since params and stuff are changed in global context v3: do not sent this setting to the server (breaks queries for readonly profiles) Signed-off-by: Azat Khuzhin --- programs/client/Client.cpp | 9 +++++++++ programs/local/LocalServer.cpp | 9 +++++++++ src/Core/Settings.h | 1 + src/Formats/FormatFactory.cpp | 1 + src/Formats/FormatSettings.h | 1 + .../Formats/Impl/ValuesBlockInputFormat.cpp | 4 +++- ...at_values_allow_data_after_semicolon.reference | 6 ++++++ ...ut_format_values_allow_data_after_semicolon.sh | 15 +++++++++++++++ 8 files changed, 45 insertions(+), 1 deletion(-) create mode 100644 tests/queries/0_stateless/02898_input_format_values_allow_data_after_semicolon.reference create mode 100755 tests/queries/0_stateless/02898_input_format_values_allow_data_after_semicolon.sh diff --git a/programs/client/Client.cpp b/programs/client/Client.cpp index 7d70b81a178..9f316e54f85 100644 --- a/programs/client/Client.cpp +++ b/programs/client/Client.cpp @@ -1264,6 +1264,15 @@ void Client::processConfig() global_context->setQueryKindInitial(); global_context->setQuotaClientKey(config().getString("quota_key", "")); global_context->setQueryKind(query_kind); + + if (is_multiquery && !global_context->getSettingsRef().input_format_values_allow_data_after_semicolon.changed) + { + Settings settings = global_context->getSettings(); + settings.input_format_values_allow_data_after_semicolon = true; + /// Do not send it to the server + settings.input_format_values_allow_data_after_semicolon.changed = false; + global_context->setSettings(settings); + } } diff --git a/programs/local/LocalServer.cpp b/programs/local/LocalServer.cpp index 9fb629a0871..e074cb638e8 100644 --- a/programs/local/LocalServer.cpp +++ b/programs/local/LocalServer.cpp @@ -783,6 +783,15 @@ void LocalServer::processConfig() global_context->setQueryKindInitial(); global_context->setQueryKind(query_kind); + + if (is_multiquery && !global_context->getSettingsRef().input_format_values_allow_data_after_semicolon.changed) + { + Settings settings = global_context->getSettings(); + settings.input_format_values_allow_data_after_semicolon = true; + /// Do not send it to the server + settings.input_format_values_allow_data_after_semicolon.changed = false; + global_context->setSettings(settings); + } } diff --git a/src/Core/Settings.h b/src/Core/Settings.h index c08425c03fd..c60d6511abc 100644 --- a/src/Core/Settings.h +++ b/src/Core/Settings.h @@ -963,6 +963,7 @@ class IColumn; M(Bool, input_format_values_interpret_expressions, true, "For Values format: if the field could not be parsed by streaming parser, run SQL parser and try to interpret it as SQL expression.", 0) \ M(Bool, input_format_values_deduce_templates_of_expressions, true, "For Values format: if the field could not be parsed by streaming parser, run SQL parser, deduce template of the SQL expression, try to parse all rows using template and then interpret expression for all rows.", 0) \ M(Bool, input_format_values_accurate_types_of_literals, true, "For Values format: when parsing and interpreting expressions using template, check actual type of literal to avoid possible overflow and precision issues.", 0) \ + M(Bool, input_format_values_allow_data_after_semicolon, false, "For Values format: allow extra data after semicolon (used by client to interpret comments).", 0) \ M(Bool, input_format_avro_allow_missing_fields, false, "For Avro/AvroConfluent format: when field is not found in schema use default value instead of error", 0) \ /** This setting is obsolete and do nothing, left for compatibility reasons. */ \ M(Bool, input_format_avro_null_as_default, false, "For Avro/AvroConfluent format: insert default in case of null and non Nullable column", 0) \ diff --git a/src/Formats/FormatFactory.cpp b/src/Formats/FormatFactory.cpp index 862e51aa088..b355d785715 100644 --- a/src/Formats/FormatFactory.cpp +++ b/src/Formats/FormatFactory.cpp @@ -170,6 +170,7 @@ FormatSettings getFormatSettings(ContextPtr context, const Settings & settings) format_settings.tsv.skip_trailing_empty_lines = settings.input_format_tsv_skip_trailing_empty_lines; format_settings.tsv.allow_variable_number_of_columns = settings.input_format_tsv_allow_variable_number_of_columns; format_settings.values.accurate_types_of_literals = settings.input_format_values_accurate_types_of_literals; + format_settings.values.allow_data_after_semicolon = settings.input_format_values_allow_data_after_semicolon; format_settings.values.deduce_templates_of_expressions = settings.input_format_values_deduce_templates_of_expressions; format_settings.values.interpret_expressions = settings.input_format_values_interpret_expressions; format_settings.with_names_use_header = settings.input_format_with_names_use_header; diff --git a/src/Formats/FormatSettings.h b/src/Formats/FormatSettings.h index ef8df02b208..655aaa81d35 100644 --- a/src/Formats/FormatSettings.h +++ b/src/Formats/FormatSettings.h @@ -341,6 +341,7 @@ struct FormatSettings bool interpret_expressions = true; bool deduce_templates_of_expressions = true; bool accurate_types_of_literals = true; + bool allow_data_after_semicolon = false; } values; enum class ORCCompression diff --git a/src/Processors/Formats/Impl/ValuesBlockInputFormat.cpp b/src/Processors/Formats/Impl/ValuesBlockInputFormat.cpp index 4a209fba3c5..b0ee2f7797a 100644 --- a/src/Processors/Formats/Impl/ValuesBlockInputFormat.cpp +++ b/src/Processors/Formats/Impl/ValuesBlockInputFormat.cpp @@ -615,8 +615,10 @@ void ValuesBlockInputFormat::readSuffix() { ++buf->position(); skipWhitespaceIfAny(*buf); - if (buf->hasUnreadData() || !buf->eof()) + if (buf->hasUnreadData()) throw Exception(ErrorCodes::CANNOT_READ_ALL_DATA, "Cannot read data after semicolon"); + if (!format_settings.values.allow_data_after_semicolon && !buf->eof()) + throw Exception(ErrorCodes::CANNOT_READ_ALL_DATA, "Cannot read data after semicolon (and input_format_values_allow_data_after_semicolon=0)"); return; } diff --git a/tests/queries/0_stateless/02898_input_format_values_allow_data_after_semicolon.reference b/tests/queries/0_stateless/02898_input_format_values_allow_data_after_semicolon.reference new file mode 100644 index 00000000000..250a673a26b --- /dev/null +++ b/tests/queries/0_stateless/02898_input_format_values_allow_data_after_semicolon.reference @@ -0,0 +1,6 @@ +client no multiquery +Cannot read data after semicolon (and input_format_values_allow_data_after_semicolon=0) +client multiquery +local no multiquery +Cannot read data after semicolon (and input_format_values_allow_data_after_semicolon=0) +local multiquery diff --git a/tests/queries/0_stateless/02898_input_format_values_allow_data_after_semicolon.sh b/tests/queries/0_stateless/02898_input_format_values_allow_data_after_semicolon.sh new file mode 100755 index 00000000000..8164c91b2ae --- /dev/null +++ b/tests/queries/0_stateless/02898_input_format_values_allow_data_after_semicolon.sh @@ -0,0 +1,15 @@ +#!/usr/bin/env bash + +CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CUR_DIR"/../shell_config.sh + +echo "client no multiquery" +$CLICKHOUSE_CLIENT -q "insert into function null() values (1); -- { foo }" |& grep -F -o "Cannot read data after semicolon (and input_format_values_allow_data_after_semicolon=0)" +echo "client multiquery" +$CLICKHOUSE_CLIENT -n -q "insert into function null() values (1); -- { foo }" + +echo "local no multiquery" +$CLICKHOUSE_LOCAL -q "insert into function null() values (1); -- { foo }" |& grep -F -o "Cannot read data after semicolon (and input_format_values_allow_data_after_semicolon=0)" +echo "local multiquery" +$CLICKHOUSE_LOCAL -n -q "insert into function null() values (1); -- { foo }" From fd16e52164a6a749ce70805cd3bd404b0de93686 Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Thu, 12 Oct 2023 14:57:03 +0200 Subject: [PATCH 4/4] Update 00626_replace_partition_from_table_zookeeper to avoid using comments after semicolon It does not have multiquery, so better to use shell comments Signed-off-by: Azat Khuzhin --- .../00626_replace_partition_from_table_zookeeper.sh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/queries/0_stateless/00626_replace_partition_from_table_zookeeper.sh b/tests/queries/0_stateless/00626_replace_partition_from_table_zookeeper.sh index 334025cba28..ffbf4df4ba7 100755 --- a/tests/queries/0_stateless/00626_replace_partition_from_table_zookeeper.sh +++ b/tests/queries/0_stateless/00626_replace_partition_from_table_zookeeper.sh @@ -96,7 +96,7 @@ $CLICKHOUSE_CLIENT --query="DROP TABLE src;" $CLICKHOUSE_CLIENT --query="CREATE TABLE src (p UInt64, k String, d UInt64) ENGINE = MergeTree PARTITION BY p ORDER BY k;" $CLICKHOUSE_CLIENT --query="INSERT INTO src VALUES (1, '0', 1);" $CLICKHOUSE_CLIENT --query="INSERT INTO src VALUES (1, '1', 1);" -$CLICKHOUSE_CLIENT --query="INSERT INTO dst_r1 VALUES (1, '1', 2); -- trash part to be deleted" +$CLICKHOUSE_CLIENT --query="INSERT INTO dst_r1 VALUES (1, '1', 2);" # trash part to be deleted # Stop replication at the second replica and remove source table to use fetch instead of copying $CLICKHOUSE_CLIENT --query="SYSTEM STOP REPLICATION QUEUES dst_r2;" @@ -116,7 +116,7 @@ query_with_retry "ALTER TABLE dst_r1 DROP PARTITION 1;" $CLICKHOUSE_CLIENT --query="CREATE TABLE src (p UInt64, k String, d UInt64) ENGINE = MergeTree PARTITION BY p ORDER BY k;" $CLICKHOUSE_CLIENT --query="INSERT INTO src VALUES (1, '0', 1);" $CLICKHOUSE_CLIENT --query="INSERT INTO src VALUES (1, '1', 1);" -$CLICKHOUSE_CLIENT --query="INSERT INTO dst_r1 VALUES (1, '1', 2); -- trash part to be deleted" +$CLICKHOUSE_CLIENT --query="INSERT INTO dst_r1 VALUES (1, '1', 2);" # trash part to be deleted $CLICKHOUSE_CLIENT --query="SYSTEM STOP MERGES dst_r2;" $CLICKHOUSE_CLIENT --query="SYSTEM STOP REPLICATION QUEUES dst_r2;"