Merge pull request #55527 from azat/values-eof-check-fix

Fix checking of non handled data for Values format
This commit is contained in:
Alexander Tokmakov 2023-10-13 18:07:02 +02:00 committed by GitHub
commit e3e105d154
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
11 changed files with 57 additions and 12 deletions

View File

@ -1264,6 +1264,15 @@ void Client::processConfig()
global_context->setQueryKindInitial();
global_context->setQuotaClientKey(config().getString("quota_key", ""));
global_context->setQueryKind(query_kind);
if (is_multiquery && !global_context->getSettingsRef().input_format_values_allow_data_after_semicolon.changed)
{
Settings settings = global_context->getSettings();
settings.input_format_values_allow_data_after_semicolon = true;
/// Do not send it to the server
settings.input_format_values_allow_data_after_semicolon.changed = false;
global_context->setSettings(settings);
}
}

View File

@ -783,6 +783,15 @@ void LocalServer::processConfig()
global_context->setQueryKindInitial();
global_context->setQueryKind(query_kind);
if (is_multiquery && !global_context->getSettingsRef().input_format_values_allow_data_after_semicolon.changed)
{
Settings settings = global_context->getSettings();
settings.input_format_values_allow_data_after_semicolon = true;
/// Do not send it to the server
settings.input_format_values_allow_data_after_semicolon.changed = false;
global_context->setSettings(settings);
}
}

View File

@ -2020,9 +2020,6 @@ bool ClientBase::executeMultiQuery(const String & all_queries_text)
{
bool echo_query = echo_queries;
/// Test tags are started with "--" so they are interpreted as comments anyway.
/// But if the echo is enabled we have to remove the test tags from `all_queries_text`
/// because we don't want test tags to be echoed.
{
/// disable logs if expects errors
TestHint test_hint(all_queries_text);
@ -2030,6 +2027,9 @@ bool ClientBase::executeMultiQuery(const String & all_queries_text)
processTextAsSingleQuery("SET send_logs_level = 'fatal'");
}
/// Test tags are started with "--" so they are interpreted as comments anyway.
/// But if the echo is enabled we have to remove the test tags from `all_queries_text`
/// because we don't want test tags to be echoed.
size_t test_tags_length = getTestTagsLength(all_queries_text);
/// Several queries separated by ';'.

View File

@ -963,6 +963,7 @@ class IColumn;
M(Bool, input_format_values_interpret_expressions, true, "For Values format: if the field could not be parsed by streaming parser, run SQL parser and try to interpret it as SQL expression.", 0) \
M(Bool, input_format_values_deduce_templates_of_expressions, true, "For Values format: if the field could not be parsed by streaming parser, run SQL parser, deduce template of the SQL expression, try to parse all rows using template and then interpret expression for all rows.", 0) \
M(Bool, input_format_values_accurate_types_of_literals, true, "For Values format: when parsing and interpreting expressions using template, check actual type of literal to avoid possible overflow and precision issues.", 0) \
M(Bool, input_format_values_allow_data_after_semicolon, false, "For Values format: allow extra data after semicolon (used by client to interpret comments).", 0) \
M(Bool, input_format_avro_allow_missing_fields, false, "For Avro/AvroConfluent format: when field is not found in schema use default value instead of error", 0) \
/** This setting is obsolete and do nothing, left for compatibility reasons. */ \
M(Bool, input_format_avro_null_as_default, false, "For Avro/AvroConfluent format: insert default in case of null and non Nullable column", 0) \

View File

@ -170,6 +170,7 @@ FormatSettings getFormatSettings(ContextPtr context, const Settings & settings)
format_settings.tsv.skip_trailing_empty_lines = settings.input_format_tsv_skip_trailing_empty_lines;
format_settings.tsv.allow_variable_number_of_columns = settings.input_format_tsv_allow_variable_number_of_columns;
format_settings.values.accurate_types_of_literals = settings.input_format_values_accurate_types_of_literals;
format_settings.values.allow_data_after_semicolon = settings.input_format_values_allow_data_after_semicolon;
format_settings.values.deduce_templates_of_expressions = settings.input_format_values_deduce_templates_of_expressions;
format_settings.values.interpret_expressions = settings.input_format_values_interpret_expressions;
format_settings.with_names_use_header = settings.input_format_with_names_use_header;

View File

@ -341,6 +341,7 @@ struct FormatSettings
bool interpret_expressions = true;
bool deduce_templates_of_expressions = true;
bool accurate_types_of_literals = true;
bool allow_data_after_semicolon = false;
} values;
enum class ORCCompression

View File

@ -617,10 +617,12 @@ void ValuesBlockInputFormat::readSuffix()
skipWhitespaceIfAny(*buf);
if (buf->hasUnreadData())
throw Exception(ErrorCodes::CANNOT_READ_ALL_DATA, "Cannot read data after semicolon");
if (!format_settings.values.allow_data_after_semicolon && !buf->eof())
throw Exception(ErrorCodes::CANNOT_READ_ALL_DATA, "Cannot read data after semicolon (and input_format_values_allow_data_after_semicolon=0)");
return;
}
if (buf->hasUnreadData())
if (buf->hasUnreadData() || !buf->eof())
throw Exception(ErrorCodes::LOGICAL_ERROR, "Unread data in PeekableReadBuffer will be lost. Most likely it's a bug.");
}

View File

@ -96,7 +96,7 @@ $CLICKHOUSE_CLIENT --query="DROP TABLE src;"
$CLICKHOUSE_CLIENT --query="CREATE TABLE src (p UInt64, k String, d UInt64) ENGINE = MergeTree PARTITION BY p ORDER BY k;"
$CLICKHOUSE_CLIENT --query="INSERT INTO src VALUES (1, '0', 1);"
$CLICKHOUSE_CLIENT --query="INSERT INTO src VALUES (1, '1', 1);"
$CLICKHOUSE_CLIENT --query="INSERT INTO dst_r1 VALUES (1, '1', 2); -- trash part to be deleted"
$CLICKHOUSE_CLIENT --query="INSERT INTO dst_r1 VALUES (1, '1', 2);" # trash part to be deleted
# Stop replication at the second replica and remove source table to use fetch instead of copying
$CLICKHOUSE_CLIENT --query="SYSTEM STOP REPLICATION QUEUES dst_r2;"
@ -116,7 +116,7 @@ query_with_retry "ALTER TABLE dst_r1 DROP PARTITION 1;"
$CLICKHOUSE_CLIENT --query="CREATE TABLE src (p UInt64, k String, d UInt64) ENGINE = MergeTree PARTITION BY p ORDER BY k;"
$CLICKHOUSE_CLIENT --query="INSERT INTO src VALUES (1, '0', 1);"
$CLICKHOUSE_CLIENT --query="INSERT INTO src VALUES (1, '1', 1);"
$CLICKHOUSE_CLIENT --query="INSERT INTO dst_r1 VALUES (1, '1', 2); -- trash part to be deleted"
$CLICKHOUSE_CLIENT --query="INSERT INTO dst_r1 VALUES (1, '1', 2);" # trash part to be deleted
$CLICKHOUSE_CLIENT --query="SYSTEM STOP MERGES dst_r2;"
$CLICKHOUSE_CLIENT --query="SYSTEM STOP REPLICATION QUEUES dst_r2;"

View File

@ -21,20 +21,21 @@ expect ":) "
send -- "DROP TABLE IF EXISTS test_01179\r"
expect "Ok."
send -- "CREATE TABLE test_01179 (date DateTime64(3)) ENGINE=Memory()\r"
send -- "CREATE TABLE test_01179 (val String) ENGINE=Memory()\r"
expect "Ok."
send -- "INSERT INTO test_01179 values ('2020-01-01')\r"
send -- "INSERT INTO test_01179 values ('foo')\r"
expect "Ok."
send -- "INSERT INTO test_01179 values ('2020-01-01'); \r"
send -- "INSERT INTO test_01179 values ('foo'); \r"
expect "Ok."
send -- "INSERT INTO test_01179 values ('2020-01-01 0'); (1) \r"
send -- "INSERT INTO test_01179 values ('foo'); ('bar') \r"
expect "Cannot read data after semicolon"
send -- "SELECT date, count() FROM test_01179 GROUP BY date FORMAT TSV\r"
expect "2020-01-01 00:00:00.000\t2"
send -- "SELECT val, count() FROM test_01179 GROUP BY val FORMAT TSV\r"
expect "foo\t2"
send -- "DROP TABLE test_01179\r"
expect "Ok."

View File

@ -0,0 +1,6 @@
client no multiquery
Cannot read data after semicolon (and input_format_values_allow_data_after_semicolon=0)
client multiquery
local no multiquery
Cannot read data after semicolon (and input_format_values_allow_data_after_semicolon=0)
local multiquery

View File

@ -0,0 +1,15 @@
#!/usr/bin/env bash
CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
# shellcheck source=../shell_config.sh
. "$CUR_DIR"/../shell_config.sh
echo "client no multiquery"
$CLICKHOUSE_CLIENT -q "insert into function null() values (1); -- { foo }" |& grep -F -o "Cannot read data after semicolon (and input_format_values_allow_data_after_semicolon=0)"
echo "client multiquery"
$CLICKHOUSE_CLIENT -n -q "insert into function null() values (1); -- { foo }"
echo "local no multiquery"
$CLICKHOUSE_LOCAL -q "insert into function null() values (1); -- { foo }" |& grep -F -o "Cannot read data after semicolon (and input_format_values_allow_data_after_semicolon=0)"
echo "local multiquery"
$CLICKHOUSE_LOCAL -n -q "insert into function null() values (1); -- { foo }"