From d1c45662e8560177eb1e90d5ad159b58ff396f44 Mon Sep 17 00:00:00 2001 From: Igor Nikonov Date: Fri, 26 Jul 2024 20:24:41 +0000 Subject: [PATCH 01/43] Fix: missing conversion in IN operator with parallel replicas --- src/Analyzer/FunctionNode.cpp | 6 ------ ...arallel_replicas_lost_decimal_conversion.reference | 0 ...3209_parallel_replicas_lost_decimal_conversion.sql | 11 +++++++++++ 3 files changed, 11 insertions(+), 6 deletions(-) create mode 100644 tests/queries/0_stateless/03209_parallel_replicas_lost_decimal_conversion.reference create mode 100644 tests/queries/0_stateless/03209_parallel_replicas_lost_decimal_conversion.sql diff --git a/src/Analyzer/FunctionNode.cpp b/src/Analyzer/FunctionNode.cpp index e98b04fe9a9..debed0983fd 100644 --- a/src/Analyzer/FunctionNode.cpp +++ b/src/Analyzer/FunctionNode.cpp @@ -239,12 +239,6 @@ ASTPtr FunctionNode::toASTImpl(const ConvertToASTOptions & options) const if (function_name == "_CAST" && !argument_nodes.empty() && argument_nodes[0]->getNodeType() == QueryTreeNodeType::CONSTANT) new_options.add_cast_for_constants = false; - /// Avoid cast for `IN tuple(...)` expression. - /// Tuples could be quite big, and adding a type may significantly increase query size. - /// It should be safe because set type for `column IN tuple` is deduced from `column` type. - if (isNameOfInFunction(function_name) && argument_nodes.size() > 1 && argument_nodes[1]->getNodeType() == QueryTreeNodeType::CONSTANT) - new_options.add_cast_for_constants = false; - const auto & parameters = getParameters(); if (!parameters.getNodes().empty()) { diff --git a/tests/queries/0_stateless/03209_parallel_replicas_lost_decimal_conversion.reference b/tests/queries/0_stateless/03209_parallel_replicas_lost_decimal_conversion.reference new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/queries/0_stateless/03209_parallel_replicas_lost_decimal_conversion.sql b/tests/queries/0_stateless/03209_parallel_replicas_lost_decimal_conversion.sql new file mode 100644 index 00000000000..bcc9dec306b --- /dev/null +++ b/tests/queries/0_stateless/03209_parallel_replicas_lost_decimal_conversion.sql @@ -0,0 +1,11 @@ +DROP TABLE IF EXISTS t_03209 SYNC; + +CREATE TABLE t_03209 ( `a` Decimal(18, 0), `b` Decimal(18, 1), `c` Decimal(36, 0) ) ENGINE = ReplicatedMergeTree('/clickhouse/{database}/test_03209', 'r1') ORDER BY tuple(); +INSERT INTO t_03209 VALUES ('33', '44.4', '35'); + +SET max_parallel_replicas = 2, cluster_for_parallel_replicas='parallel_replicas'; + +SELECT * FROM t_03209 WHERE a IN toDecimal32('33.3000', 4) SETTINGS allow_experimental_parallel_reading_from_replicas=0; +SELECT * FROM t_03209 WHERE a IN toDecimal32('33.3000', 4) SETTINGS allow_experimental_parallel_reading_from_replicas=1; + +DROP TABLE t_03209 SYNC; From 4c7d93dcc87ac4056c73356c155e441e1dffba2a Mon Sep 17 00:00:00 2001 From: Igor Nikonov Date: Sat, 27 Jul 2024 17:03:19 +0000 Subject: [PATCH 02/43] Update test output --- ...imize_skip_unused_shards_rewrite_in.reference | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/tests/queries/0_stateless/01756_optimize_skip_unused_shards_rewrite_in.reference b/tests/queries/0_stateless/01756_optimize_skip_unused_shards_rewrite_in.reference index 28dbb9215a8..0a7a9d64208 100644 --- a/tests/queries/0_stateless/01756_optimize_skip_unused_shards_rewrite_in.reference +++ b/tests/queries/0_stateless/01756_optimize_skip_unused_shards_rewrite_in.reference @@ -24,8 +24,8 @@ select splitByString('IN', query)[-1] from system.query_log where query like concat('%', currentDatabase(), '%AS%id_no%') and type = 'QueryFinish' order by query; - (0, 2) - (0, 2) + _CAST((0, 2), \'Tuple(UInt8, UInt8)\') + _CAST((0, 2), \'Tuple(UInt8, UInt8)\') -- -- w/ optimize_skip_unused_shards_rewrite_in=1 -- @@ -45,8 +45,8 @@ select splitByString('IN', query)[-1] from system.query_log where query like concat('%', currentDatabase(), '%AS%id_02%') and type = 'QueryFinish' order by query; - tuple(0) - tuple(2) + _CAST(tuple(0), \'Tuple(UInt8)\') + _CAST(tuple(2), \'Tuple(UInt8)\') select 'optimize_skip_unused_shards_rewrite_in(2,)'; optimize_skip_unused_shards_rewrite_in(2,) with (select currentDatabase()) as id_2 select *, ignore(id_2) from dist_01756 where dummy in (2,); @@ -59,7 +59,7 @@ select splitByString('IN', query)[-1] from system.query_log where query like concat('%', currentDatabase(), '%AS%id_2%') and type = 'QueryFinish' order by query; - tuple(2) + _CAST(tuple(2), \'Tuple(UInt8)\') select 'optimize_skip_unused_shards_rewrite_in(0,)'; optimize_skip_unused_shards_rewrite_in(0,) with (select currentDatabase()) as id_00 select *, ignore(id_00) from dist_01756 where dummy in (0,); @@ -73,7 +73,7 @@ select splitByString('IN', query)[-1] from system.query_log where query like concat('%', currentDatabase(), '%AS%id_00%') and type = 'QueryFinish' order by query; - tuple(0) + _CAST(tuple(0), \'Tuple(UInt8)\') -- signed column select 'signed column'; signed column @@ -88,8 +88,8 @@ select splitByString('IN', query)[-1] from system.query_log where query like concat('%', currentDatabase(), '%AS%key_signed%') and type = 'QueryFinish' order by query; - tuple(-1) - tuple(-2) + _CAST(tuple(-1), \'Tuple(Int8)\') + _CAST(tuple(-2), \'Tuple(Int8)\') -- not tuple select * from dist_01756 where dummy in (0); 0 From ea06447ca3b5330e09c0426574583b11c2ecaaa0 Mon Sep 17 00:00:00 2001 From: Igor Nikonov Date: Tue, 30 Jul 2024 22:03:29 +0000 Subject: [PATCH 03/43] Fix: skip cast only if constant doesn't have source expression --- src/Analyzer/FunctionNode.cpp | 7 +++++++ ...ize_skip_unused_shards_rewrite_in.reference | 18 +++++++++--------- 2 files changed, 16 insertions(+), 9 deletions(-) diff --git a/src/Analyzer/FunctionNode.cpp b/src/Analyzer/FunctionNode.cpp index debed0983fd..8e4e0725a2d 100644 --- a/src/Analyzer/FunctionNode.cpp +++ b/src/Analyzer/FunctionNode.cpp @@ -239,6 +239,13 @@ ASTPtr FunctionNode::toASTImpl(const ConvertToASTOptions & options) const if (function_name == "_CAST" && !argument_nodes.empty() && argument_nodes[0]->getNodeType() == QueryTreeNodeType::CONSTANT) new_options.add_cast_for_constants = false; + /// Avoid cast for `IN tuple(...)` expression. + /// Tuples could be quite big, and adding a type may significantly increase query size. + /// It should be safe because set type for `column IN tuple` is deduced from `column` type. + if (isNameOfInFunction(function_name) && argument_nodes.size() > 1 && argument_nodes[1]->getNodeType() == QueryTreeNodeType::CONSTANT + && !static_cast(argument_nodes[1].get())->hasSourceExpression()) + new_options.add_cast_for_constants = false; + const auto & parameters = getParameters(); if (!parameters.getNodes().empty()) { diff --git a/tests/queries/0_stateless/01756_optimize_skip_unused_shards_rewrite_in.reference b/tests/queries/0_stateless/01756_optimize_skip_unused_shards_rewrite_in.reference index 0a7a9d64208..8d064020c1f 100644 --- a/tests/queries/0_stateless/01756_optimize_skip_unused_shards_rewrite_in.reference +++ b/tests/queries/0_stateless/01756_optimize_skip_unused_shards_rewrite_in.reference @@ -24,10 +24,10 @@ select splitByString('IN', query)[-1] from system.query_log where query like concat('%', currentDatabase(), '%AS%id_no%') and type = 'QueryFinish' order by query; - _CAST((0, 2), \'Tuple(UInt8, UInt8)\') - _CAST((0, 2), \'Tuple(UInt8, UInt8)\') + (0, 2) + (0, 2) -- --- w/ optimize_skip_unused_shards_rewrite_in=1 +-- w/ otimize_skip_unused_shards_rewrite_in=1 -- set optimize_skip_unused_shards_rewrite_in=1; @@ -45,8 +45,8 @@ select splitByString('IN', query)[-1] from system.query_log where query like concat('%', currentDatabase(), '%AS%id_02%') and type = 'QueryFinish' order by query; - _CAST(tuple(0), \'Tuple(UInt8)\') - _CAST(tuple(2), \'Tuple(UInt8)\') + tuple(0) + tuple(2) select 'optimize_skip_unused_shards_rewrite_in(2,)'; optimize_skip_unused_shards_rewrite_in(2,) with (select currentDatabase()) as id_2 select *, ignore(id_2) from dist_01756 where dummy in (2,); @@ -59,7 +59,7 @@ select splitByString('IN', query)[-1] from system.query_log where query like concat('%', currentDatabase(), '%AS%id_2%') and type = 'QueryFinish' order by query; - _CAST(tuple(2), \'Tuple(UInt8)\') + tuple(2) select 'optimize_skip_unused_shards_rewrite_in(0,)'; optimize_skip_unused_shards_rewrite_in(0,) with (select currentDatabase()) as id_00 select *, ignore(id_00) from dist_01756 where dummy in (0,); @@ -73,7 +73,7 @@ select splitByString('IN', query)[-1] from system.query_log where query like concat('%', currentDatabase(), '%AS%id_00%') and type = 'QueryFinish' order by query; - _CAST(tuple(0), \'Tuple(UInt8)\') + tuple(0) -- signed column select 'signed column'; signed column @@ -88,8 +88,8 @@ select splitByString('IN', query)[-1] from system.query_log where query like concat('%', currentDatabase(), '%AS%key_signed%') and type = 'QueryFinish' order by query; - _CAST(tuple(-1), \'Tuple(Int8)\') - _CAST(tuple(-2), \'Tuple(Int8)\') + tuple(-1) + tuple(-2) -- not tuple select * from dist_01756 where dummy in (0); 0 From d23e9fc9eea9f21e5168e618f63add35704dda4b Mon Sep 17 00:00:00 2001 From: Igor Nikonov Date: Thu, 1 Aug 2024 13:10:13 +0000 Subject: [PATCH 04/43] Fix text --- ...56_optimize_skip_unused_shards_rewrite_in.reference | 10 +++++----- .../01756_optimize_skip_unused_shards_rewrite_in.sql | 4 ++-- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/tests/queries/0_stateless/01756_optimize_skip_unused_shards_rewrite_in.reference b/tests/queries/0_stateless/01756_optimize_skip_unused_shards_rewrite_in.reference index 8d064020c1f..237bca6305a 100644 --- a/tests/queries/0_stateless/01756_optimize_skip_unused_shards_rewrite_in.reference +++ b/tests/queries/0_stateless/01756_optimize_skip_unused_shards_rewrite_in.reference @@ -27,7 +27,7 @@ order by query; (0, 2) (0, 2) -- --- w/ otimize_skip_unused_shards_rewrite_in=1 +-- w/ optimize_skip_unused_shards_rewrite_in=1 -- set optimize_skip_unused_shards_rewrite_in=1; @@ -49,7 +49,7 @@ order by query; tuple(2) select 'optimize_skip_unused_shards_rewrite_in(2,)'; optimize_skip_unused_shards_rewrite_in(2,) -with (select currentDatabase()) as id_2 select *, ignore(id_2) from dist_01756 where dummy in (2,); +with (select currentDatabase()) as id_2 select *, ignore(id_2) from dist_01756 where dummy in (2); system flush logs; select splitByString('IN', query)[-1] from system.query_log where event_date >= yesterday() and @@ -59,10 +59,10 @@ select splitByString('IN', query)[-1] from system.query_log where query like concat('%', currentDatabase(), '%AS%id_2%') and type = 'QueryFinish' order by query; - tuple(2) + (2) select 'optimize_skip_unused_shards_rewrite_in(0,)'; optimize_skip_unused_shards_rewrite_in(0,) -with (select currentDatabase()) as id_00 select *, ignore(id_00) from dist_01756 where dummy in (0,); +with (select currentDatabase()) as id_00 select *, ignore(id_00) from dist_01756 where dummy in (0); 0 0 system flush logs; select splitByString('IN', query)[-1] from system.query_log where @@ -73,7 +73,7 @@ select splitByString('IN', query)[-1] from system.query_log where query like concat('%', currentDatabase(), '%AS%id_00%') and type = 'QueryFinish' order by query; - tuple(0) + (0) -- signed column select 'signed column'; signed column diff --git a/tests/queries/0_stateless/01756_optimize_skip_unused_shards_rewrite_in.sql b/tests/queries/0_stateless/01756_optimize_skip_unused_shards_rewrite_in.sql index 9a1a00cc0a1..0b7a44665dc 100644 --- a/tests/queries/0_stateless/01756_optimize_skip_unused_shards_rewrite_in.sql +++ b/tests/queries/0_stateless/01756_optimize_skip_unused_shards_rewrite_in.sql @@ -63,7 +63,7 @@ select splitByString('IN', query)[-1] from system.query_log where order by query; select 'optimize_skip_unused_shards_rewrite_in(2,)'; -with (select currentDatabase()) as id_2 select *, ignore(id_2) from dist_01756 where dummy in (2,); +with (select currentDatabase()) as id_2 select *, ignore(id_2) from dist_01756 where dummy in (2); system flush logs; select splitByString('IN', query)[-1] from system.query_log where event_date >= yesterday() and @@ -75,7 +75,7 @@ select splitByString('IN', query)[-1] from system.query_log where order by query; select 'optimize_skip_unused_shards_rewrite_in(0,)'; -with (select currentDatabase()) as id_00 select *, ignore(id_00) from dist_01756 where dummy in (0,); +with (select currentDatabase()) as id_00 select *, ignore(id_00) from dist_01756 where dummy in (0); system flush logs; select splitByString('IN', query)[-1] from system.query_log where event_date >= yesterday() and From 94611dbddc08565c0ac154ad62dab1a7e8c6d8ed Mon Sep 17 00:00:00 2001 From: taiyang-li <654010905@qq.com> Date: Mon, 5 Aug 2024 11:48:46 +0800 Subject: [PATCH 05/43] keep ColumnLowCardinality::getName() the same style with other columns --- src/Columns/ColumnLowCardinality.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/Columns/ColumnLowCardinality.h b/src/Columns/ColumnLowCardinality.h index 3766b247d60..5a23853e961 100644 --- a/src/Columns/ColumnLowCardinality.h +++ b/src/Columns/ColumnLowCardinality.h @@ -46,8 +46,8 @@ public: return Base::create(std::move(column_unique), std::move(indexes), is_shared); } - std::string getName() const override { return "ColumnLowCardinality"; } - const char * getFamilyName() const override { return "ColumnLowCardinality"; } + std::string getName() const override { return "LowCardinality(" + getDictionaryPtr()->getName() + ")"; } + const char * getFamilyName() const override { return "LowCardinality"; } TypeIndex getDataType() const override { return TypeIndex::LowCardinality; } ColumnPtr convertToFullColumn() const { return getDictionary().getNestedColumn()->index(getIndexes(), 0); } From 6f0f27838745d427966a3c949316a541d9fda7ac Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Fri, 9 Aug 2024 03:45:07 +0200 Subject: [PATCH 06/43] Fix trash (low-quality code) in AWS S3 --- src/IO/S3/URI.cpp | 29 ++++++++++++----------------- src/IO/S3/URI.h | 8 ++++---- 2 files changed, 16 insertions(+), 21 deletions(-) diff --git a/src/IO/S3/URI.cpp b/src/IO/S3/URI.cpp index fead18315d8..446c2aa355b 100644 --- a/src/IO/S3/URI.cpp +++ b/src/IO/S3/URI.cpp @@ -1,8 +1,8 @@ #include -#include -#include -#include "Common/Macros.h" + #if USE_AWS_S3 +#include +#include #include #include #include @@ -10,6 +10,7 @@ #include + namespace DB { @@ -47,14 +48,6 @@ URI::URI(const std::string & uri_, bool allow_archive_path_syntax) /// https://docs.aws.amazon.com/AmazonS3/latest/dev/VirtualHosting.html#path-style-access static const RE2 path_style_pattern("^/([^/]*)/(.*)"); - static constexpr auto S3 = "S3"; - static constexpr auto S3EXPRESS = "S3EXPRESS"; - static constexpr auto COSN = "COSN"; - static constexpr auto COS = "COS"; - static constexpr auto OBS = "OBS"; - static constexpr auto OSS = "OSS"; - static constexpr auto EOS = "EOS"; - if (allow_archive_path_syntax) std::tie(uri_str, archive_pattern) = getURIAndArchivePattern(uri_); else @@ -85,7 +78,7 @@ URI::URI(const std::string & uri_, bool allow_archive_path_syntax) URIConverter::modifyURI(uri, mapper); } - storage_name = S3; + storage_name = "S3"; if (uri.getHost().empty()) throw Exception(ErrorCodes::BAD_ARGUMENTS, "Host is empty in S3 URI."); @@ -93,11 +86,13 @@ URI::URI(const std::string & uri_, bool allow_archive_path_syntax) /// Extract object version ID from query string. bool has_version_id = false; for (const auto & [query_key, query_value] : uri.getQueryParameters()) + { if (query_key == "versionId") { version_id = query_value; has_version_id = true; } + } /// Poco::URI will ignore '?' when parsing the path, but if there is a versionId in the http parameter, /// '?' can not be used as a wildcard, otherwise it will be ambiguous. @@ -130,14 +125,14 @@ URI::URI(const std::string & uri_, bool allow_archive_path_syntax) boost::to_upper(name); /// For S3Express it will look like s3express-eun1-az1, i.e. contain region and AZ info - if (name != S3 && !name.starts_with(S3EXPRESS) && name != COS && name != OBS && name != OSS && name != EOS) + if (name != "S3" && !name.starts_with("S3EXPRESS") && name != "COS" && name != "OBS" && name != "OSS" && name != "EOS") throw Exception( ErrorCodes::BAD_ARGUMENTS, "Object storage system name is unrecognized in virtual hosted style S3 URI: {}", quoteString(name)); - if (name == COS) - storage_name = COSN; + if (name == "COS") + storage_name = "COSN"; else storage_name = name; } @@ -153,8 +148,8 @@ URI::URI(const std::string & uri_, bool allow_archive_path_syntax) void URI::addRegionToURI(const std::string ®ion) { - if (auto pos = endpoint.find("amazonaws.com"); pos != std::string::npos) - endpoint = endpoint.substr(0, pos) + region + "." + endpoint.substr(pos); + if (auto pos = endpoint.find(".amazonaws.com"); pos != std::string::npos) + endpoint = endpoint.substr(0, pos) + "." + region + endpoint.substr(pos); } void URI::validateBucket(const String & bucket, const Poco::URI & uri) diff --git a/src/IO/S3/URI.h b/src/IO/S3/URI.h index 80e2da96cd4..c8d0b28cd15 100644 --- a/src/IO/S3/URI.h +++ b/src/IO/S3/URI.h @@ -1,14 +1,14 @@ #pragma once -#include -#include - #include "config.h" #if USE_AWS_S3 +#include +#include #include + namespace DB::S3 { @@ -23,7 +23,7 @@ namespace DB::S3 struct URI { Poco::URI uri; - // Custom endpoint if URI scheme is not S3. + // Custom endpoint if URI scheme, if not S3. std::string endpoint; std::string bucket; std::string key; From d0a1ee821b609856c2692abc01d132f8d7a8b88f Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Fri, 9 Aug 2024 03:51:12 +0200 Subject: [PATCH 07/43] You don't know regular expressions --- src/IO/S3/URI.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/IO/S3/URI.cpp b/src/IO/S3/URI.cpp index 446c2aa355b..eea73474c44 100644 --- a/src/IO/S3/URI.cpp +++ b/src/IO/S3/URI.cpp @@ -41,7 +41,7 @@ URI::URI(const std::string & uri_, bool allow_archive_path_syntax) /// Case when AWS Private Link Interface is being used /// E.g. (bucket.vpce-07a1cd78f1bd55c5f-j3a3vg6w.s3.us-east-1.vpce.amazonaws.com/bucket-name/key) /// https://docs.aws.amazon.com/AmazonS3/latest/userguide/privatelink-interface-endpoints.html - static const RE2 aws_private_link_style_pattern(R"(bucket\.vpce\-([a-z0-9\-.]+)\.vpce.amazonaws.com(:\d{1,5})?)"); + static const RE2 aws_private_link_style_pattern(R"(bucket\.vpce\-([a-z0-9\-.]+)\.vpce\.amazonaws\.com(:\d{1,5})?)"); /// Case when bucket name and key represented in path of S3 URL. /// E.g. (https://s3.region.amazonaws.com/bucket-name/key) From 978d36f9fe0e8cc34b6529276b7435b56a16bbb7 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Fri, 9 Aug 2024 03:57:47 +0200 Subject: [PATCH 08/43] It's strange that we cared about this --- src/IO/S3/URI.cpp | 9 +-------- 1 file changed, 1 insertion(+), 8 deletions(-) diff --git a/src/IO/S3/URI.cpp b/src/IO/S3/URI.cpp index eea73474c44..64962f63edb 100644 --- a/src/IO/S3/URI.cpp +++ b/src/IO/S3/URI.cpp @@ -43,7 +43,7 @@ URI::URI(const std::string & uri_, bool allow_archive_path_syntax) /// https://docs.aws.amazon.com/AmazonS3/latest/userguide/privatelink-interface-endpoints.html static const RE2 aws_private_link_style_pattern(R"(bucket\.vpce\-([a-z0-9\-.]+)\.vpce\.amazonaws\.com(:\d{1,5})?)"); - /// Case when bucket name and key represented in path of S3 URL. + /// Case when bucket name and key represented in the path of S3 URL. /// E.g. (https://s3.region.amazonaws.com/bucket-name/key) /// https://docs.aws.amazon.com/AmazonS3/latest/dev/VirtualHosting.html#path-style-access static const RE2 path_style_pattern("^/([^/]*)/(.*)"); @@ -124,13 +124,6 @@ URI::URI(const std::string & uri_, bool allow_archive_path_syntax) } boost::to_upper(name); - /// For S3Express it will look like s3express-eun1-az1, i.e. contain region and AZ info - if (name != "S3" && !name.starts_with("S3EXPRESS") && name != "COS" && name != "OBS" && name != "OSS" && name != "EOS") - throw Exception( - ErrorCodes::BAD_ARGUMENTS, - "Object storage system name is unrecognized in virtual hosted style S3 URI: {}", - quoteString(name)); - if (name == "COS") storage_name = "COSN"; else From dbd4a6d551a9c242c0ce8025ebcdb1304f0d448c Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Fri, 9 Aug 2024 04:41:06 +0200 Subject: [PATCH 09/43] Speed up from 4 sec to 2 sec #52771 --- src/IO/S3/Credentials.cpp | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/src/IO/S3/Credentials.cpp b/src/IO/S3/Credentials.cpp index dfb7727fca4..11779c4dbd5 100644 --- a/src/IO/S3/Credentials.cpp +++ b/src/IO/S3/Credentials.cpp @@ -145,12 +145,16 @@ Aws::String AWSEC2MetadataClient::getDefaultCredentialsSecurely() const { String user_agent_string = awsComputeUserAgentString(); auto [new_token, response_code] = getEC2MetadataToken(user_agent_string); - if (response_code == Aws::Http::HttpResponseCode::BAD_REQUEST) + if (response_code == Aws::Http::HttpResponseCode::BAD_REQUEST + || response_code == Aws::Http::HttpResponseCode::REQUEST_NOT_MADE) + { + /// At least the host should be available and reply, otherwise neither IMDSv2 nor IMDSv1 are usable. return {}; + } else if (response_code != Aws::Http::HttpResponseCode::OK || new_token.empty()) { LOG_TRACE(logger, "Calling EC2MetadataService to get token failed, " - "falling back to less secure way. HTTP response code: {}", response_code); + "falling back to a less secure way. HTTP response code: {}", response_code); return getDefaultCredentials(); } From 8bfe4ee23f8b37ff8780e44ede9dc785ac563f75 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Fri, 9 Aug 2024 04:57:18 +0200 Subject: [PATCH 10/43] Speed up requests when IMDS is not available --- src/IO/S3/Credentials.cpp | 6 +++--- src/IO/S3/Credentials.h | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/src/IO/S3/Credentials.cpp b/src/IO/S3/Credentials.cpp index 11779c4dbd5..9c5f6547933 100644 --- a/src/IO/S3/Credentials.cpp +++ b/src/IO/S3/Credentials.cpp @@ -251,7 +251,7 @@ static Aws::String getAWSMetadataEndpoint() return ec2_metadata_service_endpoint; } -std::shared_ptr InitEC2MetadataClient(const Aws::Client::ClientConfiguration & client_configuration) +std::shared_ptr createEC2MetadataClient(const Aws::Client::ClientConfiguration & client_configuration) { auto endpoint = getAWSMetadataEndpoint(); return std::make_shared(client_configuration, endpoint.c_str()); @@ -785,11 +785,11 @@ S3CredentialsProviderChain::S3CredentialsProviderChain( /// EC2MetadataService throttles by delaying the response so the service client should set a large read timeout. /// EC2MetadataService delay is in order of seconds so it only make sense to retry after a couple of seconds. - aws_client_configuration.connectTimeoutMs = 1000; + aws_client_configuration.connectTimeoutMs = 10; aws_client_configuration.requestTimeoutMs = 1000; aws_client_configuration.retryStrategy = std::make_shared(1, 1000); - auto ec2_metadata_client = InitEC2MetadataClient(aws_client_configuration); + auto ec2_metadata_client = createEC2MetadataClient(aws_client_configuration); auto config_loader = std::make_shared(ec2_metadata_client, !credentials_configuration.use_insecure_imds_request); AddProvider(std::make_shared(config_loader)); diff --git a/src/IO/S3/Credentials.h b/src/IO/S3/Credentials.h index 95297ab0538..042c48ec15a 100644 --- a/src/IO/S3/Credentials.h +++ b/src/IO/S3/Credentials.h @@ -70,7 +70,7 @@ private: LoggerPtr logger; }; -std::shared_ptr InitEC2MetadataClient(const Aws::Client::ClientConfiguration & client_configuration); +std::shared_ptr createEC2MetadataClient(const Aws::Client::ClientConfiguration & client_configuration); class AWSEC2InstanceProfileConfigLoader : public Aws::Config::AWSProfileConfigLoader { From 9470ceb34d6519c820f7c7ddccbe27b48a046f2a Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Fri, 9 Aug 2024 05:41:10 +0200 Subject: [PATCH 11/43] Minor changes --- src/Disks/ObjectStorages/S3/S3ObjectStorage.cpp | 2 +- src/IO/S3/PocoHTTPClient.cpp | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/Disks/ObjectStorages/S3/S3ObjectStorage.cpp b/src/Disks/ObjectStorages/S3/S3ObjectStorage.cpp index 433a0e96d2e..9854bada9ec 100644 --- a/src/Disks/ObjectStorages/S3/S3ObjectStorage.cpp +++ b/src/Disks/ObjectStorages/S3/S3ObjectStorage.cpp @@ -63,7 +63,7 @@ void throwIfError(const Aws::Utils::Outcome & response) { const auto & err = response.GetError(); throw S3Exception( - fmt::format("{} (Code: {}, s3 exception: {})", + fmt::format("{} (Code: {}, S3 exception: '{}')", err.GetMessage(), static_cast(err.GetErrorType()), err.GetExceptionName()), err.GetErrorType()); } diff --git a/src/IO/S3/PocoHTTPClient.cpp b/src/IO/S3/PocoHTTPClient.cpp index aab7a39534d..de43f34d838 100644 --- a/src/IO/S3/PocoHTTPClient.cpp +++ b/src/IO/S3/PocoHTTPClient.cpp @@ -128,7 +128,7 @@ void PocoHTTPClientConfiguration::updateSchemeAndRegion() } else { - /// In global mode AWS C++ SDK send `us-east-1` but accept switching to another one if being suggested. + /// In global mode AWS C++ SDK sends `us-east-1` but accepts switching to another one if being suggested. region = Aws::Region::AWS_GLOBAL; } } From cfc10961ed237eac079db6f113330db5391adc1f Mon Sep 17 00:00:00 2001 From: taiyang-li <654010905@qq.com> Date: Fri, 9 Aug 2024 15:56:36 +0800 Subject: [PATCH 12/43] fix getName() style for columnlowcardinality and columnunique --- src/Columns/ColumnLowCardinality.h | 2 +- src/Columns/ColumnUnique.h | 2 ++ src/Columns/IColumnUnique.h | 2 +- src/Columns/tests/gtest_column_dump_structure.cpp | 2 +- .../02313_dump_column_structure_low_cardinality.reference | 2 +- tests/queries/0_stateless/02355_column_type_name_lc.reference | 2 +- 6 files changed, 7 insertions(+), 5 deletions(-) diff --git a/src/Columns/ColumnLowCardinality.h b/src/Columns/ColumnLowCardinality.h index 5a23853e961..d9c90e7dc21 100644 --- a/src/Columns/ColumnLowCardinality.h +++ b/src/Columns/ColumnLowCardinality.h @@ -46,7 +46,7 @@ public: return Base::create(std::move(column_unique), std::move(indexes), is_shared); } - std::string getName() const override { return "LowCardinality(" + getDictionaryPtr()->getName() + ")"; } + std::string getName() const override { return "LowCardinality(" + getDictionary().getNestedColumn()->getName() + ")"; } const char * getFamilyName() const override { return "LowCardinality"; } TypeIndex getDataType() const override { return TypeIndex::LowCardinality; } diff --git a/src/Columns/ColumnUnique.h b/src/Columns/ColumnUnique.h index d6cb75679be..8a66f4e02ed 100644 --- a/src/Columns/ColumnUnique.h +++ b/src/Columns/ColumnUnique.h @@ -48,6 +48,8 @@ private: ColumnUnique(const ColumnUnique & other); public: + std::string getName() const override { return "Unique(" + getNestedColumn()->getName() + ")"; } + MutableColumnPtr cloneEmpty() const override; const ColumnPtr & getNestedColumn() const override; diff --git a/src/Columns/IColumnUnique.h b/src/Columns/IColumnUnique.h index a8e10e5e2b2..52b1bef3009 100644 --- a/src/Columns/IColumnUnique.h +++ b/src/Columns/IColumnUnique.h @@ -73,7 +73,7 @@ public: /// Returns dictionary hash which is SipHash is applied to each row of nested column. virtual UInt128 getHash() const = 0; - const char * getFamilyName() const override { return "ColumnUnique"; } + const char * getFamilyName() const override { return "Unique"; } TypeIndex getDataType() const override { return getNestedColumn()->getDataType(); } void insert(const Field &) override diff --git a/src/Columns/tests/gtest_column_dump_structure.cpp b/src/Columns/tests/gtest_column_dump_structure.cpp index e00c77798c8..d9647147157 100644 --- a/src/Columns/tests/gtest_column_dump_structure.cpp +++ b/src/Columns/tests/gtest_column_dump_structure.cpp @@ -10,7 +10,7 @@ TEST(IColumn, dumpStructure) { auto type_lc = std::make_shared(std::make_shared()); ColumnPtr column_lc = type_lc->createColumn(); - String expected_structure = "ColumnLowCardinality(size = 0, UInt8(size = 0), ColumnUnique(size = 1, String(size = 1)))"; + String expected_structure = "LowCardinality(size = 0, UInt8(size = 0), Unique(size = 1, String(size = 1)))"; std::vector threads; for (size_t i = 0; i < 6; ++i) diff --git a/tests/queries/0_stateless/02313_dump_column_structure_low_cardinality.reference b/tests/queries/0_stateless/02313_dump_column_structure_low_cardinality.reference index fa7f1799c31..6b1e4743867 100644 --- a/tests/queries/0_stateless/02313_dump_column_structure_low_cardinality.reference +++ b/tests/queries/0_stateless/02313_dump_column_structure_low_cardinality.reference @@ -1 +1 @@ -Array(LowCardinality(String)), Const(size = 1, Array(size = 1, UInt64(size = 1), ColumnLowCardinality(size = 2, UInt8(size = 2), ColumnUnique(size = 3, String(size = 3))))) +Array(LowCardinality(String)), Const(size = 1, Array(size = 1, UInt64(size = 1), LowCardinality(size = 2, UInt8(size = 2), Unique(size = 3, String(size = 3))))) diff --git a/tests/queries/0_stateless/02355_column_type_name_lc.reference b/tests/queries/0_stateless/02355_column_type_name_lc.reference index 234a072299f..50c25a86b2f 100644 --- a/tests/queries/0_stateless/02355_column_type_name_lc.reference +++ b/tests/queries/0_stateless/02355_column_type_name_lc.reference @@ -1 +1 @@ -ColumnLowCardinality +LowCardinality(String) From a74cc601cd11ab32df61c46f9950c051fd8bb4da Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Fri, 9 Aug 2024 16:04:27 +0200 Subject: [PATCH 13/43] A catch-all URL style for S3 --- src/IO/S3/URI.cpp | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/src/IO/S3/URI.cpp b/src/IO/S3/URI.cpp index 64962f63edb..9c80b377661 100644 --- a/src/IO/S3/URI.cpp +++ b/src/IO/S3/URI.cpp @@ -136,7 +136,16 @@ URI::URI(const std::string & uri_, bool allow_archive_path_syntax) validateBucket(bucket, uri); } else - throw Exception(ErrorCodes::BAD_ARGUMENTS, "Bucket or key name are invalid in S3 URI."); + { + /// Custom endpoint, e.g. a public domain of Cloudflare R2, + /// which could be served by a custom server-side code. + storage_name = "S3"; + bucket = "default"; + is_virtual_hosted_style = false; + endpoint = uri.getScheme() + "://" + uri.getAuthority(); + if (!uri.getPath().empty()) + key = uri.getPath().substr(1); + } } void URI::addRegionToURI(const std::string ®ion) From 9a6d98cd203ee9a6cdde4450aaf72d109a9719c7 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Fri, 9 Aug 2024 16:15:28 +0200 Subject: [PATCH 14/43] Update test --- .../integration/test_odbc_interaction/test.py | 44 +++++++++---------- 1 file changed, 22 insertions(+), 22 deletions(-) diff --git a/tests/integration/test_odbc_interaction/test.py b/tests/integration/test_odbc_interaction/test.py index 0d0d7a0afb1..9d4ca5ad49f 100644 --- a/tests/integration/test_odbc_interaction/test.py +++ b/tests/integration/test_odbc_interaction/test.py @@ -51,9 +51,9 @@ create_table_sql_nullable_template = """ """ -def skip_test_msan(instance): - if instance.is_built_with_memory_sanitizer(): - pytest.skip("Memory Sanitizer cannot work with third-party shared libraries") +def skip_test_sanitizers(instance): + if instance.is_built_with_sanitizer(): + pytest.skip("Sanitizers cannot work with third-party shared libraries") def get_mysql_conn(): @@ -208,7 +208,7 @@ def started_cluster(): def test_mysql_odbc_select_nullable(started_cluster): - skip_test_msan(node1) + skip_test_sanitizers(node1) mysql_setup = node1.odbc_drivers["MySQL"] table_name = "test_insert_nullable_select" @@ -248,7 +248,7 @@ def test_mysql_odbc_select_nullable(started_cluster): def test_mysql_simple_select_works(started_cluster): - skip_test_msan(node1) + skip_test_sanitizers(node1) mysql_setup = node1.odbc_drivers["MySQL"] @@ -331,7 +331,7 @@ CREATE TABLE {}(id UInt32, name String, age UInt32, money UInt32, column_x Nulla def test_mysql_insert(started_cluster): - skip_test_msan(node1) + skip_test_sanitizers(node1) mysql_setup = node1.odbc_drivers["MySQL"] table_name = "test_insert" @@ -374,7 +374,7 @@ def test_mysql_insert(started_cluster): def test_sqlite_simple_select_function_works(started_cluster): - skip_test_msan(node1) + skip_test_sanitizers(node1) sqlite_setup = node1.odbc_drivers["SQLite3"] sqlite_db = sqlite_setup["Database"] @@ -438,7 +438,7 @@ def test_sqlite_simple_select_function_works(started_cluster): def test_sqlite_table_function(started_cluster): - skip_test_msan(node1) + skip_test_sanitizers(node1) sqlite_setup = node1.odbc_drivers["SQLite3"] sqlite_db = sqlite_setup["Database"] @@ -470,7 +470,7 @@ def test_sqlite_table_function(started_cluster): def test_sqlite_simple_select_storage_works(started_cluster): - skip_test_msan(node1) + skip_test_sanitizers(node1) sqlite_setup = node1.odbc_drivers["SQLite3"] sqlite_db = sqlite_setup["Database"] @@ -503,7 +503,7 @@ def test_sqlite_simple_select_storage_works(started_cluster): def test_sqlite_odbc_hashed_dictionary(started_cluster): - skip_test_msan(node1) + skip_test_sanitizers(node1) sqlite_db = node1.odbc_drivers["SQLite3"]["Database"] node1.exec_in_container( @@ -586,7 +586,7 @@ def test_sqlite_odbc_hashed_dictionary(started_cluster): def test_sqlite_odbc_cached_dictionary(started_cluster): - skip_test_msan(node1) + skip_test_sanitizers(node1) sqlite_db = node1.odbc_drivers["SQLite3"]["Database"] node1.exec_in_container( @@ -635,7 +635,7 @@ def test_sqlite_odbc_cached_dictionary(started_cluster): def test_postgres_odbc_hashed_dictionary_with_schema(started_cluster): - skip_test_msan(node1) + skip_test_sanitizers(node1) try: conn = get_postgres_conn(started_cluster) @@ -663,7 +663,7 @@ def test_postgres_odbc_hashed_dictionary_with_schema(started_cluster): def test_postgres_odbc_hashed_dictionary_no_tty_pipe_overflow(started_cluster): - skip_test_msan(node1) + skip_test_sanitizers(node1) try: conn = get_postgres_conn(started_cluster) @@ -685,7 +685,7 @@ def test_postgres_odbc_hashed_dictionary_no_tty_pipe_overflow(started_cluster): def test_no_connection_pooling(started_cluster): - skip_test_msan(node1) + skip_test_sanitizers(node1) try: conn = get_postgres_conn(started_cluster) @@ -717,7 +717,7 @@ def test_no_connection_pooling(started_cluster): def test_postgres_insert(started_cluster): - skip_test_msan(node1) + skip_test_sanitizers(node1) conn = get_postgres_conn(started_cluster) @@ -754,7 +754,7 @@ def test_postgres_insert(started_cluster): def test_odbc_postgres_date_data_type(started_cluster): - skip_test_msan(node1) + skip_test_sanitizers(node1) try: conn = get_postgres_conn(started_cluster) @@ -783,7 +783,7 @@ def test_odbc_postgres_date_data_type(started_cluster): def test_odbc_postgres_conversions(started_cluster): - skip_test_msan(node1) + skip_test_sanitizers(node1) try: conn = get_postgres_conn(started_cluster) @@ -841,7 +841,7 @@ def test_odbc_postgres_conversions(started_cluster): def test_odbc_cyrillic_with_varchar(started_cluster): - skip_test_msan(node1) + skip_test_sanitizers(node1) conn = get_postgres_conn(started_cluster) cursor = conn.cursor() @@ -868,7 +868,7 @@ def test_odbc_cyrillic_with_varchar(started_cluster): def test_many_connections(started_cluster): - skip_test_msan(node1) + skip_test_sanitizers(node1) conn = get_postgres_conn(started_cluster) cursor = conn.cursor() @@ -894,7 +894,7 @@ def test_many_connections(started_cluster): def test_concurrent_queries(started_cluster): - skip_test_msan(node1) + skip_test_sanitizers(node1) conn = get_postgres_conn(started_cluster) cursor = conn.cursor() @@ -948,7 +948,7 @@ def test_concurrent_queries(started_cluster): def test_odbc_long_column_names(started_cluster): - skip_test_msan(node1) + skip_test_sanitizers(node1) conn = get_postgres_conn(started_cluster) cursor = conn.cursor() @@ -986,7 +986,7 @@ def test_odbc_long_column_names(started_cluster): def test_odbc_long_text(started_cluster): - skip_test_msan(node1) + skip_test_sanitizers(node1) conn = get_postgres_conn(started_cluster) cursor = conn.cursor() From 9d91b600caa0d5cddcf4d13a8c80ac85c9077fca Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Fri, 9 Aug 2024 16:27:35 +0200 Subject: [PATCH 15/43] Add a test --- tests/integration/test_s3_imds/test_simple.py | 2 +- .../03221_s3_imds_decent_timeout.reference | 1 + .../0_stateless/03221_s3_imds_decent_timeout.sh | 16 ++++++++++++++++ 3 files changed, 18 insertions(+), 1 deletion(-) create mode 100644 tests/queries/0_stateless/03221_s3_imds_decent_timeout.reference create mode 100755 tests/queries/0_stateless/03221_s3_imds_decent_timeout.sh diff --git a/tests/integration/test_s3_imds/test_simple.py b/tests/integration/test_s3_imds/test_simple.py index 0dacac2b0b9..4884c824f99 100644 --- a/tests/integration/test_s3_imds/test_simple.py +++ b/tests/integration/test_s3_imds/test_simple.py @@ -56,7 +56,7 @@ def test_credentials_from_metadata(): ) expected_logs = [ - "Calling EC2MetadataService to get token failed, falling back to less secure way", + "Calling EC2MetadataService to get token failed, falling back to a less secure way", "Getting default credentials for ec2 instance from resolver:8080", "Calling EC2MetadataService resource, /latest/meta-data/iam/security-credentials returned credential string myrole", "Calling EC2MetadataService resource /latest/meta-data/iam/security-credentials/myrole", diff --git a/tests/queries/0_stateless/03221_s3_imds_decent_timeout.reference b/tests/queries/0_stateless/03221_s3_imds_decent_timeout.reference new file mode 100644 index 00000000000..d00491fd7e5 --- /dev/null +++ b/tests/queries/0_stateless/03221_s3_imds_decent_timeout.reference @@ -0,0 +1 @@ +1 diff --git a/tests/queries/0_stateless/03221_s3_imds_decent_timeout.sh b/tests/queries/0_stateless/03221_s3_imds_decent_timeout.sh new file mode 100755 index 00000000000..fb55539d04a --- /dev/null +++ b/tests/queries/0_stateless/03221_s3_imds_decent_timeout.sh @@ -0,0 +1,16 @@ +#!/usr/bin/env bash +# Tags: no-fasttest +# ^ requires S3 + +CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CUR_DIR"/../shell_config.sh + +# Inaccessible IMDS should not introduce large delays, so this query should reply quickly at least sometimes: +while true +do + # This host (likely) drops packets sent to it (does not reply), so it is good for testing timeouts. + # At the same time, we expect that google.com does not drop packets and quickly replies with 404, which is a non-retriable error for S3. + AWS_EC2_METADATA_SERVICE_ENDPOINT='https://10.255.255.255/' ${CLICKHOUSE_LOCAL} --time --query "SELECT * FROM s3('https://google.com/test')" |& grep -v -F 404 | + ${CLICKHOUSE_LOCAL} --input-format TSV "SELECT c1::Float64 < 1 FROM table" | grep 1 && break +done From f4173546a93ab72986d1239a812ac0bc7fda11cd Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Fri, 9 Aug 2024 21:47:01 +0200 Subject: [PATCH 16/43] Remove obsolete test --- src/IO/tests/gtest_s3_uri.cpp | 20 -------------------- 1 file changed, 20 deletions(-) diff --git a/src/IO/tests/gtest_s3_uri.cpp b/src/IO/tests/gtest_s3_uri.cpp index 0ec28f80072..7216c8077e3 100644 --- a/src/IO/tests/gtest_s3_uri.cpp +++ b/src/IO/tests/gtest_s3_uri.cpp @@ -206,11 +206,6 @@ TEST(S3UriTest, validPatterns) } } -TEST_P(S3UriTest, invalidPatterns) -{ - ASSERT_ANY_THROW(S3::URI new_uri(GetParam())); -} - TEST(S3UriTest, versionIdChecks) { for (const auto& test_case : TestCases) @@ -223,19 +218,4 @@ TEST(S3UriTest, versionIdChecks) } } -INSTANTIATE_TEST_SUITE_P( - S3, - S3UriTest, - testing::Values( - "https:///", - "https://.s3.amazonaws.com/key", - "https://s3.amazonaws.com/key", - "https://jokserfn.s3amazonaws.com/key", - "https://s3.amazonaws.com//", - "https://amazonaws.com/", - "https://amazonaws.com//", - "https://amazonaws.com//key")); - -} - #endif From cebcc88b312b0702de9866c229e67097c738c4d1 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sat, 10 Aug 2024 00:18:54 +0200 Subject: [PATCH 17/43] Fix build --- src/IO/tests/gtest_s3_uri.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/src/IO/tests/gtest_s3_uri.cpp b/src/IO/tests/gtest_s3_uri.cpp index 7216c8077e3..c0bf7fcb28a 100644 --- a/src/IO/tests/gtest_s3_uri.cpp +++ b/src/IO/tests/gtest_s3_uri.cpp @@ -218,4 +218,5 @@ TEST(S3UriTest, versionIdChecks) } } +} #endif From 3f718626da61f0502115586425d728492d3a3ae3 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sat, 10 Aug 2024 19:55:11 +0200 Subject: [PATCH 18/43] Better test --- tests/queries/0_stateless/03221_s3_imds_decent_timeout.sh | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tests/queries/0_stateless/03221_s3_imds_decent_timeout.sh b/tests/queries/0_stateless/03221_s3_imds_decent_timeout.sh index fb55539d04a..021278955cd 100755 --- a/tests/queries/0_stateless/03221_s3_imds_decent_timeout.sh +++ b/tests/queries/0_stateless/03221_s3_imds_decent_timeout.sh @@ -1,5 +1,5 @@ #!/usr/bin/env bash -# Tags: no-fasttest +# Tags: no-fasttest, no-asan, no-msan, no-tsan # ^ requires S3 CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) @@ -10,7 +10,7 @@ CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) while true do # This host (likely) drops packets sent to it (does not reply), so it is good for testing timeouts. - # At the same time, we expect that google.com does not drop packets and quickly replies with 404, which is a non-retriable error for S3. - AWS_EC2_METADATA_SERVICE_ENDPOINT='https://10.255.255.255/' ${CLICKHOUSE_LOCAL} --time --query "SELECT * FROM s3('https://google.com/test')" |& grep -v -F 404 | + # At the same time, we expect that the clickhouse host does not drop packets and quickly replies with 4xx, which is a non-retriable error for S3. + AWS_EC2_METADATA_SERVICE_ENDPOINT='https://10.255.255.255/' ${CLICKHOUSE_LOCAL} --time --query "SELECT * FROM s3('${CLICKHOUSE_PORT_HTTP_PROTO}://${CLICKHOUSE_HOST}:${CLICKHOUSE_PORT_HTTP}/nonexistent')" |& grep -v -F 404 | ${CLICKHOUSE_LOCAL} --input-format TSV "SELECT c1::Float64 < 1 FROM table" | grep 1 && break done From 7524b8f76712ce421fdebd1fe86c79128fea3ceb Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sat, 10 Aug 2024 19:55:22 +0200 Subject: [PATCH 19/43] A slight improvement --- src/Storages/StorageMergeTree.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Storages/StorageMergeTree.cpp b/src/Storages/StorageMergeTree.cpp index 78dbb72c199..f7701a2aab8 100644 --- a/src/Storages/StorageMergeTree.cpp +++ b/src/Storages/StorageMergeTree.cpp @@ -895,7 +895,7 @@ void StorageMergeTree::loadDeduplicationLog() std::string path = fs::path(relative_data_path) / "deduplication_logs"; /// If either there is already a deduplication log, or we will be able to use it. - if (disk->exists(path) || !disk->isReadOnly()) + if (!disk->isReadOnly() || disk->exists(path)) { deduplication_log = std::make_unique(path, settings->non_replicated_deduplication_window, format_version, disk); deduplication_log->load(); From ee3eec0a2a15592b2020d34b74d9f595e707c092 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sun, 11 Aug 2024 04:47:26 +0200 Subject: [PATCH 20/43] Update Credentials.cpp --- src/IO/S3/Credentials.cpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/IO/S3/Credentials.cpp b/src/IO/S3/Credentials.cpp index 9c5f6547933..d6f7542da6b 100644 --- a/src/IO/S3/Credentials.cpp +++ b/src/IO/S3/Credentials.cpp @@ -785,6 +785,8 @@ S3CredentialsProviderChain::S3CredentialsProviderChain( /// EC2MetadataService throttles by delaying the response so the service client should set a large read timeout. /// EC2MetadataService delay is in order of seconds so it only make sense to retry after a couple of seconds. + /// But the connection timeout should be small because there is the case when there is no IMDS at all, + /// like outside of the cloud, on your own machines. aws_client_configuration.connectTimeoutMs = 10; aws_client_configuration.requestTimeoutMs = 1000; From 967bd0566336f8c239f0045f703fc8fe428cb28f Mon Sep 17 00:00:00 2001 From: Peter Nguyen Date: Sun, 11 Aug 2024 12:24:13 -0600 Subject: [PATCH 21/43] Add create_if_not_exists setting to Settings.h --- src/Core/Settings.h | 1 + 1 file changed, 1 insertion(+) diff --git a/src/Core/Settings.h b/src/Core/Settings.h index 6f24db57026..1b52df76c45 100644 --- a/src/Core/Settings.h +++ b/src/Core/Settings.h @@ -896,6 +896,7 @@ class IColumn; M(UInt64, extract_key_value_pairs_max_pairs_per_row, 1000, "Max number of pairs that can be produced by the `extractKeyValuePairs` function. Used as a safeguard against consuming too much memory.", 0) ALIAS(extract_kvp_max_pairs_per_row) \ M(Bool, restore_replace_external_engines_to_null, false, "Replace all the external table engines to Null on restore. Useful for testing purposes", 0) \ M(Bool, restore_replace_external_table_functions_to_null, false, "Replace all table functions to Null on restore. Useful for testing purposes", 0) \ + M(Bool, create_if_not_exists, false, "Enable IF NOT EXISTS for CREATE statements by default", 0) \ \ \ /* ###################################### */ \ From f90b8327bea16ee81c12f0210d2602889a5944bc Mon Sep 17 00:00:00 2001 From: Peter Nguyen Date: Sun, 11 Aug 2024 11:43:57 -0600 Subject: [PATCH 22/43] Update SettingsChangesHistory.cpp with new create_if_not_exists settings --- src/Core/SettingsChangesHistory.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/src/Core/SettingsChangesHistory.cpp b/src/Core/SettingsChangesHistory.cpp index 511723f1873..202b21a92f0 100644 --- a/src/Core/SettingsChangesHistory.cpp +++ b/src/Core/SettingsChangesHistory.cpp @@ -75,6 +75,7 @@ static std::initializer_list Date: Sun, 11 Aug 2024 11:47:30 -0600 Subject: [PATCH 23/43] Add support for new create_if_not_exists setting in InterpreterCreateQuery.cpp --- src/Interpreters/InterpreterCreateQuery.cpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/Interpreters/InterpreterCreateQuery.cpp b/src/Interpreters/InterpreterCreateQuery.cpp index a101e5e8f09..d899b8e111e 100644 --- a/src/Interpreters/InterpreterCreateQuery.cpp +++ b/src/Interpreters/InterpreterCreateQuery.cpp @@ -1946,6 +1946,8 @@ BlockIO InterpreterCreateQuery::execute() FunctionNameNormalizer::visit(query_ptr.get()); auto & create = query_ptr->as(); + create.if_not_exists |= getContext()->getSettingsRef().create_if_not_exists; + bool is_create_database = create.database && !create.table; if (!create.cluster.empty() && !maybeRemoveOnCluster(query_ptr, getContext())) { From 2af1134c08ab164b2d77af854166fa30d96fddd9 Mon Sep 17 00:00:00 2001 From: Peter Nguyen Date: Sun, 11 Aug 2024 12:17:24 -0600 Subject: [PATCH 24/43] Update settings.md docs with new create_if_not_exists settings --- docs/en/operations/settings/settings.md | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/docs/en/operations/settings/settings.md b/docs/en/operations/settings/settings.md index e432f4e038f..22f73a03729 100644 --- a/docs/en/operations/settings/settings.md +++ b/docs/en/operations/settings/settings.md @@ -5637,3 +5637,9 @@ Possible values: - 1 — the [TimeSeries](../../engines/table-engines/integrations/time-series.md) table engine is enabled. Default value: `0`. + +## create_if_not_exists + +Enable IF NOT EXISTS for CREATE statements by default. If either this setting or IF NOT EXISTS is specified, then no Exception will be thrown when trying to create a new table. + +Default value: `false`. From cc0412c55372108116b6b05fb4fc66ebd4eccae2 Mon Sep 17 00:00:00 2001 From: Peter Nguyen Date: Sun, 11 Aug 2024 16:01:48 -0600 Subject: [PATCH 25/43] Add test 03221_create_if_not_exists_setting --- ...221_create_if_not_exists_setting.reference | 4 ++ .../03221_create_if_not_exists_setting.sh | 43 +++++++++++++++++++ 2 files changed, 47 insertions(+) create mode 100644 tests/queries/0_stateless/03221_create_if_not_exists_setting.reference create mode 100755 tests/queries/0_stateless/03221_create_if_not_exists_setting.sh diff --git a/tests/queries/0_stateless/03221_create_if_not_exists_setting.reference b/tests/queries/0_stateless/03221_create_if_not_exists_setting.reference new file mode 100644 index 00000000000..8740b05c9ca --- /dev/null +++ b/tests/queries/0_stateless/03221_create_if_not_exists_setting.reference @@ -0,0 +1,4 @@ +57 +82 +0 +0 diff --git a/tests/queries/0_stateless/03221_create_if_not_exists_setting.sh b/tests/queries/0_stateless/03221_create_if_not_exists_setting.sh new file mode 100755 index 00000000000..cfbe2eb8fd9 --- /dev/null +++ b/tests/queries/0_stateless/03221_create_if_not_exists_setting.sh @@ -0,0 +1,43 @@ +#!/usr/bin/env bash + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + +# $CLICKHOUSE_CLIENT -mn -q "SET create_if_not_exists=0;" # Default +$CLICKHOUSE_CLIENT -mn -q " +DROP TABLE IF EXISTS example_table; +CREATE TABLE example_table (id UInt32) ENGINE=MergeTree() ORDER BY id; +CREATE TABLE example_table (id UInt32) ENGINE=MergeTree() ORDER BY id; +" 2> /dev/null +# ensure failed error code +echo $? +$CLICKHOUSE_CLIENT -mn -q " +DROP DATABASE IF EXISTS example_database; +CREATE DATABASE example_database; +CREATE DATABASE example_database; +" 2> /dev/null +echo $? + +$CLICKHOUSE_CLIENT -mn -q " +SET create_if_not_exists=1; +DROP TABLE IF EXISTS example_table; +CREATE TABLE example_table (id UInt32) ENGINE=MergeTree() ORDER BY id; +CREATE TABLE example_table (id UInt32) ENGINE=MergeTree() ORDER BY id; +" +# ensure successful error code +echo $? + + +$CLICKHOUSE_CLIENT -mn -q " +SET create_if_not_exists=1; +DROP DATABASE IF EXISTS example_database; +CREATE DATABASE example_database; +CREATE DATABASE example_database; +" +echo $? + +$CLICKHOUSE_CLIENT -mn -q " +DROP DATABASE example_database; +DROP TABLE example_table; +" \ No newline at end of file From d31c56e0d9756642d90885016369e9ca7994e3f0 Mon Sep 17 00:00:00 2001 From: Peter Nguyen Date: Sun, 11 Aug 2024 23:17:24 -0600 Subject: [PATCH 26/43] Update 03221_create_if_not_exists_setting.sh --- .../queries/0_stateless/03221_create_if_not_exists_setting.sh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/queries/0_stateless/03221_create_if_not_exists_setting.sh b/tests/queries/0_stateless/03221_create_if_not_exists_setting.sh index cfbe2eb8fd9..8dcde8977bc 100755 --- a/tests/queries/0_stateless/03221_create_if_not_exists_setting.sh +++ b/tests/queries/0_stateless/03221_create_if_not_exists_setting.sh @@ -38,6 +38,6 @@ CREATE DATABASE example_database; echo $? $CLICKHOUSE_CLIENT -mn -q " -DROP DATABASE example_database; -DROP TABLE example_table; +DROP DATABASE IF EXISTS example_database; +DROP TABLE IF EXISTS example_table; " \ No newline at end of file From 83d20bee00a4973cbffc3bcd9ba4073c79efb073 Mon Sep 17 00:00:00 2001 From: Peter Nguyen Date: Mon, 12 Aug 2024 07:42:55 -0600 Subject: [PATCH 27/43] Update 03221_create_if_not_exists_setting test to a .sql test --- ...221_create_if_not_exists_setting.reference | 4 --- ...=> 03221_create_if_not_exists_setting.sql} | 32 ++++--------------- 2 files changed, 6 insertions(+), 30 deletions(-) rename tests/queries/0_stateless/{03221_create_if_not_exists_setting.sh => 03221_create_if_not_exists_setting.sql} (51%) mode change 100755 => 100644 diff --git a/tests/queries/0_stateless/03221_create_if_not_exists_setting.reference b/tests/queries/0_stateless/03221_create_if_not_exists_setting.reference index 8740b05c9ca..e69de29bb2d 100644 --- a/tests/queries/0_stateless/03221_create_if_not_exists_setting.reference +++ b/tests/queries/0_stateless/03221_create_if_not_exists_setting.reference @@ -1,4 +0,0 @@ -57 -82 -0 -0 diff --git a/tests/queries/0_stateless/03221_create_if_not_exists_setting.sh b/tests/queries/0_stateless/03221_create_if_not_exists_setting.sql old mode 100755 new mode 100644 similarity index 51% rename from tests/queries/0_stateless/03221_create_if_not_exists_setting.sh rename to tests/queries/0_stateless/03221_create_if_not_exists_setting.sql index 8dcde8977bc..59535981e7a --- a/tests/queries/0_stateless/03221_create_if_not_exists_setting.sh +++ b/tests/queries/0_stateless/03221_create_if_not_exists_setting.sql @@ -1,43 +1,23 @@ -#!/usr/bin/env bash -CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) -# shellcheck source=../shell_config.sh -. "$CURDIR"/../shell_config.sh +SET create_if_not_exists=0; -- Default -# $CLICKHOUSE_CLIENT -mn -q "SET create_if_not_exists=0;" # Default -$CLICKHOUSE_CLIENT -mn -q " DROP TABLE IF EXISTS example_table; CREATE TABLE example_table (id UInt32) ENGINE=MergeTree() ORDER BY id; -CREATE TABLE example_table (id UInt32) ENGINE=MergeTree() ORDER BY id; -" 2> /dev/null -# ensure failed error code -echo $? -$CLICKHOUSE_CLIENT -mn -q " +CREATE TABLE example_table (id UInt32) ENGINE=MergeTree() ORDER BY id; -- { serverError TABLE_ALREADY_EXISTS } + DROP DATABASE IF EXISTS example_database; CREATE DATABASE example_database; -CREATE DATABASE example_database; -" 2> /dev/null -echo $? +CREATE DATABASE example_database; -- { serverError DATABASE_ALREADY_EXISTS } -$CLICKHOUSE_CLIENT -mn -q " SET create_if_not_exists=1; + DROP TABLE IF EXISTS example_table; CREATE TABLE example_table (id UInt32) ENGINE=MergeTree() ORDER BY id; CREATE TABLE example_table (id UInt32) ENGINE=MergeTree() ORDER BY id; -" -# ensure successful error code -echo $? - -$CLICKHOUSE_CLIENT -mn -q " -SET create_if_not_exists=1; DROP DATABASE IF EXISTS example_database; CREATE DATABASE example_database; CREATE DATABASE example_database; -" -echo $? -$CLICKHOUSE_CLIENT -mn -q " DROP DATABASE IF EXISTS example_database; -DROP TABLE IF EXISTS example_table; -" \ No newline at end of file +DROP TABLE IF EXISTS example_table; \ No newline at end of file From c817a4e8adfba37b23156ed75e1a501068e10cc1 Mon Sep 17 00:00:00 2001 From: Peter Nguyen Date: Mon, 12 Aug 2024 07:45:51 -0600 Subject: [PATCH 28/43] Update settings.md to clarify create_if_not_exists behavior Co-authored-by: Nikita Taranov --- docs/en/operations/settings/settings.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/en/operations/settings/settings.md b/docs/en/operations/settings/settings.md index 22f73a03729..b9d5dde8522 100644 --- a/docs/en/operations/settings/settings.md +++ b/docs/en/operations/settings/settings.md @@ -5640,6 +5640,6 @@ Default value: `0`. ## create_if_not_exists -Enable IF NOT EXISTS for CREATE statements by default. If either this setting or IF NOT EXISTS is specified, then no Exception will be thrown when trying to create a new table. +Enable `IF NOT EXISTS` for `CREATE` statement by default. If either this setting or `IF NOT EXISTS` is specified and a table with the provided name already exists, no exception will be thrown. Default value: `false`. From 3b1d6f30bec5a8a568ab477e639d97c9c95a3f2c Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Mon, 12 Aug 2024 23:55:01 +0200 Subject: [PATCH 29/43] Debug test --- .../0_stateless/02490_benchmark_max_consecutive_errors.sh | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/queries/0_stateless/02490_benchmark_max_consecutive_errors.sh b/tests/queries/0_stateless/02490_benchmark_max_consecutive_errors.sh index f747b3156a5..df7e9386662 100755 --- a/tests/queries/0_stateless/02490_benchmark_max_consecutive_errors.sh +++ b/tests/queries/0_stateless/02490_benchmark_max_consecutive_errors.sh @@ -11,5 +11,6 @@ if [ "$RES" -eq 10 ] then echo "$RES" else + echo "$RES" cat "${CLICKHOUSE_TMP}/${CLICKHOUSE_DATABASE}.log" fi From cb7a67c6dd9df40d728108c37bfd423435b45141 Mon Sep 17 00:00:00 2001 From: taiyang-li <654010905@qq.com> Date: Tue, 13 Aug 2024 11:39:22 +0800 Subject: [PATCH 30/43] fix failed uts --- .../02210_toColumnTypeName_toLowCardinality_const.reference | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/queries/0_stateless/02210_toColumnTypeName_toLowCardinality_const.reference b/tests/queries/0_stateless/02210_toColumnTypeName_toLowCardinality_const.reference index 1e3d3a50562..e3978020431 100644 --- a/tests/queries/0_stateless/02210_toColumnTypeName_toLowCardinality_const.reference +++ b/tests/queries/0_stateless/02210_toColumnTypeName_toLowCardinality_const.reference @@ -1 +1 @@ -Const(ColumnLowCardinality) +Const(LowCardinality(UInt8)) From fbe08cc24ca3f1b4472eba0960b14227917c0329 Mon Sep 17 00:00:00 2001 From: Peter Nguyen Date: Tue, 13 Aug 2024 07:24:41 -0600 Subject: [PATCH 31/43] Add no-parallel flag to 03221_create_if_not_exists_setting.sql --- tests/queries/0_stateless/03221_create_if_not_exists_setting.sql | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/queries/0_stateless/03221_create_if_not_exists_setting.sql b/tests/queries/0_stateless/03221_create_if_not_exists_setting.sql index 59535981e7a..18b3ed7bcec 100644 --- a/tests/queries/0_stateless/03221_create_if_not_exists_setting.sql +++ b/tests/queries/0_stateless/03221_create_if_not_exists_setting.sql @@ -1,3 +1,4 @@ +-- Tags: no-parallel SET create_if_not_exists=0; -- Default From b9ffa929ba418d54e5e140470f21d4347ac0eab9 Mon Sep 17 00:00:00 2001 From: Igor Nikonov Date: Tue, 13 Aug 2024 21:08:53 +0000 Subject: [PATCH 32/43] Fix: min marks to read overflow with parallel replicas --- .../MergeTree/MergeTreeIndexGranularity.cpp | 12 ++++++++++-- ..._replicas_min_marks_to_read_overflow.reference | 10 ++++++++++ ...rallel_replicas_min_marks_to_read_overflow.sql | 15 +++++++++++++++ 3 files changed, 35 insertions(+), 2 deletions(-) create mode 100644 tests/queries/0_stateless/03222_parallel_replicas_min_marks_to_read_overflow.reference create mode 100644 tests/queries/0_stateless/03222_parallel_replicas_min_marks_to_read_overflow.sql diff --git a/src/Storages/MergeTree/MergeTreeIndexGranularity.cpp b/src/Storages/MergeTree/MergeTreeIndexGranularity.cpp index 2a45ab1d927..2f9a4a47b11 100644 --- a/src/Storages/MergeTree/MergeTreeIndexGranularity.cpp +++ b/src/Storages/MergeTree/MergeTreeIndexGranularity.cpp @@ -103,8 +103,16 @@ size_t MergeTreeIndexGranularity::countMarksForRows(size_t from_mark, size_t num /// This is a heuristic to respect min_marks_to_read which is ignored by MergeTreeReadPool in case of remote disk. /// See comment in IMergeTreeSelectAlgorithm. - if (min_marks_to_read && from_mark + 2 * min_marks_to_read <= to_mark) - to_mark = from_mark + min_marks_to_read; + if (min_marks_to_read) + { + // check that ... + bool overflow = ((1ULL << 63) & min_marks_to_read); // further multiplication by 2 will not overflow + if (!overflow) + overflow = (std::numeric_limits::max() - from_mark) < 2 * min_marks_to_read; // further addition will not overflow + + if (!overflow && from_mark + 2 * min_marks_to_read <= to_mark) + to_mark = from_mark + min_marks_to_read; + } return getRowsCountInRange(from_mark, std::max(1UL, to_mark)) - offset_in_rows; } diff --git a/tests/queries/0_stateless/03222_parallel_replicas_min_marks_to_read_overflow.reference b/tests/queries/0_stateless/03222_parallel_replicas_min_marks_to_read_overflow.reference new file mode 100644 index 00000000000..7fafd4d13ea --- /dev/null +++ b/tests/queries/0_stateless/03222_parallel_replicas_min_marks_to_read_overflow.reference @@ -0,0 +1,10 @@ +100 100 +101 101 +102 102 +103 103 +104 104 +105 105 +106 106 +107 107 +108 108 +109 109 diff --git a/tests/queries/0_stateless/03222_parallel_replicas_min_marks_to_read_overflow.sql b/tests/queries/0_stateless/03222_parallel_replicas_min_marks_to_read_overflow.sql new file mode 100644 index 00000000000..112373e5db2 --- /dev/null +++ b/tests/queries/0_stateless/03222_parallel_replicas_min_marks_to_read_overflow.sql @@ -0,0 +1,15 @@ +DROP TABLE IF EXISTS test__fuzz_22 SYNC; + +CREATE TABLE test__fuzz_22 (k Float32, v String) ENGINE = ReplicatedMergeTree('/clickhouse/03222/{database}/test__fuzz_22', 'r1') ORDER BY k SETTINGS index_granularity = 1; + +INSERT INTO test__fuzz_22 SELECT number, toString(number) FROM numbers(10_000); + +SET allow_experimental_parallel_reading_from_replicas = 2, max_parallel_replicas = 3, cluster_for_parallel_replicas='test_cluster_one_shard_three_replicas_localhost'; + +SELECT k, v +FROM test__fuzz_22 +ORDER BY k +LIMIT 100, 10 +SETTINGS merge_tree_min_rows_for_concurrent_read = 9223372036854775806; + +DROP TABLE test__fuzz_22 SYNC; From 0e0272b2ffdf4286c5ef9766c90b88e096469e92 Mon Sep 17 00:00:00 2001 From: Igor Nikonov Date: Wed, 14 Aug 2024 10:21:23 +0000 Subject: [PATCH 33/43] Better check for overflow + limit min_marks_for_concurrent_read --- src/Processors/QueryPlan/ReadFromMergeTree.cpp | 18 ++++++++++++++++-- .../MergeTree/MergeTreeIndexGranularity.cpp | 15 +++++++++------ ...plicas_min_marks_to_read_overflow.reference | 11 +++++++++++ ...lel_replicas_min_marks_to_read_overflow.sql | 10 +++++++++- 4 files changed, 45 insertions(+), 9 deletions(-) diff --git a/src/Processors/QueryPlan/ReadFromMergeTree.cpp b/src/Processors/QueryPlan/ReadFromMergeTree.cpp index 901d7c61167..b5b46ef9f41 100644 --- a/src/Processors/QueryPlan/ReadFromMergeTree.cpp +++ b/src/Processors/QueryPlan/ReadFromMergeTree.cpp @@ -350,7 +350,14 @@ Pipe ReadFromMergeTree::readFromPoolParallelReplicas( /// We have a special logic for local replica. It has to read less data, because in some cases it should /// merge states of aggregate functions or do some other important stuff other than reading from Disk. - const auto multiplier = context->getSettingsRef().parallel_replicas_single_task_marks_count_multiplier; + auto multiplier = context->getSettingsRef().parallel_replicas_single_task_marks_count_multiplier; + if (pool_settings.min_marks_for_concurrent_read > std::numeric_limits::max()) + { + /// limit min marks to read in case it's big, happened in test since due to settings randomzation + pool_settings.min_marks_for_concurrent_read = std::numeric_limits::max(); + multiplier = 1.0f; + } + if (auto result = pool_settings.min_marks_for_concurrent_read * multiplier; canConvertTo(result)) pool_settings.min_marks_for_concurrent_read = static_cast(result); else @@ -519,7 +526,14 @@ Pipe ReadFromMergeTree::readInOrder( .number_of_current_replica = client_info.number_of_current_replica, }; - const auto multiplier = context->getSettingsRef().parallel_replicas_single_task_marks_count_multiplier; + auto multiplier = context->getSettingsRef().parallel_replicas_single_task_marks_count_multiplier; + if (pool_settings.min_marks_for_concurrent_read > std::numeric_limits::max()) + { + /// limit min marks to read in case it's big, happened in test since due to settings randomzation + pool_settings.min_marks_for_concurrent_read = std::numeric_limits::max(); + multiplier = 1.0f; + } + if (auto result = pool_settings.min_marks_for_concurrent_read * multiplier; canConvertTo(result)) pool_settings.min_marks_for_concurrent_read = static_cast(result); else diff --git a/src/Storages/MergeTree/MergeTreeIndexGranularity.cpp b/src/Storages/MergeTree/MergeTreeIndexGranularity.cpp index 2f9a4a47b11..2b924284857 100644 --- a/src/Storages/MergeTree/MergeTreeIndexGranularity.cpp +++ b/src/Storages/MergeTree/MergeTreeIndexGranularity.cpp @@ -105,13 +105,16 @@ size_t MergeTreeIndexGranularity::countMarksForRows(size_t from_mark, size_t num /// See comment in IMergeTreeSelectAlgorithm. if (min_marks_to_read) { - // check that ... - bool overflow = ((1ULL << 63) & min_marks_to_read); // further multiplication by 2 will not overflow - if (!overflow) - overflow = (std::numeric_limits::max() - from_mark) < 2 * min_marks_to_read; // further addition will not overflow + // check overflow + size_t min_marks_to_read_2 = 0; + bool overflow = common::mulOverflow(min_marks_to_read, 2, min_marks_to_read_2); - if (!overflow && from_mark + 2 * min_marks_to_read <= to_mark) - to_mark = from_mark + min_marks_to_read; + size_t to_mark_overwrite = 0; + if (!overflow) + overflow = common::addOverflow(from_mark, min_marks_to_read_2, to_mark_overwrite); + + if (!overflow && to_mark_overwrite < to_mark) + to_mark = to_mark_overwrite; } return getRowsCountInRange(from_mark, std::max(1UL, to_mark)) - offset_in_rows; diff --git a/tests/queries/0_stateless/03222_parallel_replicas_min_marks_to_read_overflow.reference b/tests/queries/0_stateless/03222_parallel_replicas_min_marks_to_read_overflow.reference index 7fafd4d13ea..b6c452ba328 100644 --- a/tests/queries/0_stateless/03222_parallel_replicas_min_marks_to_read_overflow.reference +++ b/tests/queries/0_stateless/03222_parallel_replicas_min_marks_to_read_overflow.reference @@ -1,3 +1,14 @@ +1006 +1007 +1008 +1009 +101 +1010 +1011 +1012 +1013 +1014 +--- 100 100 101 101 102 102 diff --git a/tests/queries/0_stateless/03222_parallel_replicas_min_marks_to_read_overflow.sql b/tests/queries/0_stateless/03222_parallel_replicas_min_marks_to_read_overflow.sql index 112373e5db2..6f486f8f0fe 100644 --- a/tests/queries/0_stateless/03222_parallel_replicas_min_marks_to_read_overflow.sql +++ b/tests/queries/0_stateless/03222_parallel_replicas_min_marks_to_read_overflow.sql @@ -6,10 +6,18 @@ INSERT INTO test__fuzz_22 SELECT number, toString(number) FROM numbers(10_000); SET allow_experimental_parallel_reading_from_replicas = 2, max_parallel_replicas = 3, cluster_for_parallel_replicas='test_cluster_one_shard_three_replicas_localhost'; +SELECT v +FROM test__fuzz_22 +ORDER BY v +LIMIT 10, 10 +SETTINGS merge_tree_min_rows_for_concurrent_read = 9223372036854775806; + +SELECT '---'; + SELECT k, v FROM test__fuzz_22 ORDER BY k LIMIT 100, 10 -SETTINGS merge_tree_min_rows_for_concurrent_read = 9223372036854775806; +SETTINGS optimize_read_in_order=1, merge_tree_min_rows_for_concurrent_read = 9223372036854775806; DROP TABLE test__fuzz_22 SYNC; From aa38024b0e0cdc4a839446df2e7de974efc6b7e7 Mon Sep 17 00:00:00 2001 From: Igor Nikonov Date: Wed, 14 Aug 2024 20:59:08 +0000 Subject: [PATCH 34/43] Fix UBSan: lower upper bound for min_marks_for_concurrent_read --- src/Processors/QueryPlan/ReadFromMergeTree.cpp | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/src/Processors/QueryPlan/ReadFromMergeTree.cpp b/src/Processors/QueryPlan/ReadFromMergeTree.cpp index 3ece7b1c5c8..734e67bda24 100644 --- a/src/Processors/QueryPlan/ReadFromMergeTree.cpp +++ b/src/Processors/QueryPlan/ReadFromMergeTree.cpp @@ -353,10 +353,11 @@ Pipe ReadFromMergeTree::readFromPoolParallelReplicas( /// We have a special logic for local replica. It has to read less data, because in some cases it should /// merge states of aggregate functions or do some other important stuff other than reading from Disk. auto multiplier = context->getSettingsRef().parallel_replicas_single_task_marks_count_multiplier; - if (pool_settings.min_marks_for_concurrent_read > std::numeric_limits::max()) + const auto min_marks_for_concurrent_read_limit = std::numeric_limits::max() >> 1; + if (pool_settings.min_marks_for_concurrent_read > min_marks_for_concurrent_read_limit) { /// limit min marks to read in case it's big, happened in test since due to settings randomzation - pool_settings.min_marks_for_concurrent_read = std::numeric_limits::max(); + pool_settings.min_marks_for_concurrent_read = min_marks_for_concurrent_read_limit; multiplier = 1.0f; } @@ -529,10 +530,11 @@ Pipe ReadFromMergeTree::readInOrder( }; auto multiplier = context->getSettingsRef().parallel_replicas_single_task_marks_count_multiplier; - if (pool_settings.min_marks_for_concurrent_read > std::numeric_limits::max()) + const auto min_marks_for_concurrent_read_limit = std::numeric_limits::max() >> 1; + if (pool_settings.min_marks_for_concurrent_read > min_marks_for_concurrent_read_limit) { /// limit min marks to read in case it's big, happened in test since due to settings randomzation - pool_settings.min_marks_for_concurrent_read = std::numeric_limits::max(); + pool_settings.min_marks_for_concurrent_read = min_marks_for_concurrent_read_limit; multiplier = 1.0f; } From b077f2cc9c11b01c443eb1ff976457965f7297ee Mon Sep 17 00:00:00 2001 From: maxvostrikov Date: Thu, 15 Aug 2024 02:31:10 +0200 Subject: [PATCH 35/43] performance comparison test for output_format_parquet_write_page_index setting added new performance comparison test for output_format_parquet_write_page_index setting --- tests/performance/parquet_read_with_index.xml | 30 +++++++++++++++++++ 1 file changed, 30 insertions(+) create mode 100644 tests/performance/parquet_read_with_index.xml diff --git a/tests/performance/parquet_read_with_index.xml b/tests/performance/parquet_read_with_index.xml new file mode 100644 index 00000000000..1bb2d8eb4a2 --- /dev/null +++ b/tests/performance/parquet_read_with_index.xml @@ -0,0 +1,30 @@ + + + INSERT INTO FUNCTION file('test_pq_index', Parquet) SELECT * FROM generateRandom('int64_column Nullable(Int64), tuple_column Tuple(a Nullable(String), b Nullable(Float64), c Tuple(i UInt32, j UInt32)),array_tuple_column Array(Tuple(a Nullable(String), b Nullable(Float64), c Nullable(Int64))), map_tuple_column Map(String, Tuple(a Nullable(String), b Nullable(Float64), c Nullable(Int64)))') limit 1000000 SETTINGS output_format_parquet_use_custom_encoder=false, output_format_parquet_write_page_index=true + + + + SELECT * FROM file('test_pq_index', Parquet, 'tuple_column Tuple(a Nullable(String))') Format Null + + + + SELECT tuple_column.a FROM file('test_pq_index', Parquet) Format Null + + + + SELECT tuple_column.a FROM file('test_pq_index', Parquet, 'tuple_column Tuple(a Nullable(String))') Format Null + + + + SELECT tuple_column.c.i FROM file('test_pq_index', Parquet) Format Null + + + + SELECT * FROM file('test_pq_index', Parquet, 'array_tuple_column Array (Tuple(a Nullable(String)))') Format Null + + + + SELECT * FROM file('test_pq_index', Parquet, 'map_tuple_column Map(String, Tuple(a Nullable(String)))') Format Null + + + From 0bb076a4d381fcc4e9827bebedbbe46ded9b9278 Mon Sep 17 00:00:00 2001 From: avogar Date: Thu, 15 Aug 2024 08:08:00 +0000 Subject: [PATCH 36/43] Improve schema inference of date times --- src/Core/Settings.h | 1 + src/Core/SettingsChangesHistory.cpp | 1 + src/Formats/EscapingRuleUtils.cpp | 5 +- src/Formats/FormatFactory.cpp | 1 + src/Formats/FormatSettings.h | 1 + src/Formats/SchemaInferenceUtils.cpp | 119 +++++--- src/IO/ReadHelpers.cpp | 29 +- src/IO/ReadHelpers.h | 77 +++-- src/IO/parseDateTimeBestEffort.cpp | 77 ++++- src/IO/parseDateTimeBestEffort.h | 8 + .../03222_date_time_inference.reference | 253 +++++++++++++++++ .../0_stateless/03222_date_time_inference.sql | 268 ++++++++++++++++++ 12 files changed, 761 insertions(+), 79 deletions(-) create mode 100644 tests/queries/0_stateless/03222_date_time_inference.reference create mode 100644 tests/queries/0_stateless/03222_date_time_inference.sql diff --git a/src/Core/Settings.h b/src/Core/Settings.h index 0808e8eb49f..ad6cc89c5cd 100644 --- a/src/Core/Settings.h +++ b/src/Core/Settings.h @@ -1136,6 +1136,7 @@ class IColumn; M(Bool, input_format_try_infer_integers, true, "Try to infer integers instead of floats while schema inference in text formats", 0) \ M(Bool, input_format_try_infer_dates, true, "Try to infer dates from string fields while schema inference in text formats", 0) \ M(Bool, input_format_try_infer_datetimes, true, "Try to infer datetimes from string fields while schema inference in text formats", 0) \ + M(Bool, input_format_try_infer_datetimes_only_datetime64, false, "When input_format_try_infer_datetimes is enabled, infer only DateTime64 but not DateTime types", 0) \ M(Bool, input_format_try_infer_exponent_floats, false, "Try to infer floats in exponential notation while schema inference in text formats (except JSON, where exponent numbers are always inferred)", 0) \ M(Bool, output_format_markdown_escape_special_characters, false, "Escape special characters in Markdown", 0) \ M(Bool, input_format_protobuf_flatten_google_wrappers, false, "Enable Google wrappers for regular non-nested columns, e.g. google.protobuf.StringValue 'str' for String column 'str'. For Nullable columns empty wrappers are recognized as defaults, and missing as nulls", 0) \ diff --git a/src/Core/SettingsChangesHistory.cpp b/src/Core/SettingsChangesHistory.cpp index 20a8721c10e..b344a141a46 100644 --- a/src/Core/SettingsChangesHistory.cpp +++ b/src/Core/SettingsChangesHistory.cpp @@ -88,6 +88,7 @@ static std::initializer_list(have_dates) + static_cast(have_datetimes))); + bool have_datetimes = type_indexes.contains(TypeIndex::DateTime); + bool have_datetimes64 = type_indexes.contains(TypeIndex::DateTime64); + bool all_dates_or_datetimes = (type_indexes.size() == (static_cast(have_dates) + static_cast(have_datetimes) + static_cast(have_datetimes64))); - if (!all_dates_or_datetimes && (have_dates || have_datetimes)) + if (!all_dates_or_datetimes && (have_dates || have_datetimes || have_datetimes64)) { for (auto & type : data_types) { - if (isDate(type) || isDateTime64(type)) + if (isDate(type) || isDateTime(type) || isDateTime64(type)) type = std::make_shared(); } type_indexes.erase(TypeIndex::Date); type_indexes.erase(TypeIndex::DateTime); + type_indexes.erase(TypeIndex::DateTime64); type_indexes.insert(TypeIndex::String); return; } - if (have_dates && have_datetimes) + for (auto & type : data_types) { - for (auto & type : data_types) + if (isDate(type) && (have_datetimes || have_datetimes64)) { - if (isDate(type)) + if (have_datetimes64) type = std::make_shared(9); + else + type = std::make_shared(); + type_indexes.erase(TypeIndex::Date); + } + else if (isDateTime(type) && have_datetimes64) + { + type = std::make_shared(9); + type_indexes.erase(TypeIndex::DateTime); } - - type_indexes.erase(TypeIndex::Date); } } @@ -697,55 +705,87 @@ namespace bool tryInferDate(std::string_view field) { - if (field.empty()) + /// Minimum length of Date text representation is 8 (YYYY-M-D) and maximum is 10 (YYYY-MM-DD) + if (field.size() < 8 || field.size() > 10) return false; - ReadBufferFromString buf(field); - Float64 tmp_float; /// Check if it's just a number, and if so, don't try to infer Date from it, /// because we can interpret this number as a Date (for example 20000101 will be 2000-01-01) /// and it will lead to inferring Date instead of simple Int64/UInt64 in some cases. - if (tryReadFloatText(tmp_float, buf) && buf.eof()) - return false; - - buf.seek(0, SEEK_SET); /// Return position to the beginning - - DayNum tmp; - return tryReadDateText(tmp, buf) && buf.eof(); - } - - bool tryInferDateTime(std::string_view field, const FormatSettings & settings) - { - if (field.empty()) + if (std::all_of(field.begin(), field.end(), isNumericASCII)) return false; ReadBufferFromString buf(field); - Float64 tmp_float; + DayNum tmp; + return tryReadDateText(tmp, buf, DateLUT::instance(), /*allowed_delimiters=*/"-/:") && buf.eof(); + } + + DataTypePtr tryInferDateTimeOrDateTime64(std::string_view field, const FormatSettings & settings) + { + /// Don't try to infer DateTime if string is too long. + /// It's difficult to say what is the real maximum length of + /// DateTime we can parse using BestEffort approach. + /// 50 symbols is more or less valid limit for date times that makes sense. + if (field.empty() || field.size() > 50) + return nullptr; + + /// Check that we have at least one digit, don't infer datetime form strings like "Apr"/"May"/etc. + if (!std::any_of(field.begin(), field.end(), isNumericASCII)) + return nullptr; + /// Check if it's just a number, and if so, don't try to infer DateTime from it, /// because we can interpret this number as a timestamp and it will lead to - /// inferring DateTime instead of simple Int64/Float64 in some cases. + /// inferring DateTime instead of simple Int64 in some cases. + if (std::all_of(field.begin(), field.end(), isNumericASCII)) + return nullptr; + + ReadBufferFromString buf(field); + Float64 tmp_float; + /// Check if it's a float value, and if so, don't try to infer DateTime from it, + /// because it will lead to inferring DateTime instead of simple Float64 in some cases. if (tryReadFloatText(tmp_float, buf) && buf.eof()) - return false; + return nullptr; + + buf.seek(0, SEEK_SET); /// Return position to the beginning + if (!settings.try_infer_datetimes_only_datetime64) + { + time_t tmp; + switch (settings.date_time_input_format) + { + case FormatSettings::DateTimeInputFormat::Basic: + if (tryReadDateTimeText(tmp, buf, DateLUT::instance(), /*allowed_date_delimiters=*/"-/:", /*allowed_time_delimiters=*/":") && buf.eof()) + return std::make_shared(); + break; + case FormatSettings::DateTimeInputFormat::BestEffort: + if (tryParseDateTimeBestEffortStrict(tmp, buf, DateLUT::instance(), DateLUT::instance("UTC"), /*allowed_date_delimiters=*/"-/:") && buf.eof()) + return std::make_shared(); + break; + case FormatSettings::DateTimeInputFormat::BestEffortUS: + if (tryParseDateTimeBestEffortUSStrict(tmp, buf, DateLUT::instance(), DateLUT::instance("UTC"), /*allowed_date_delimiters=*/"-/:") && buf.eof()) + return std::make_shared(); + break; + } + } buf.seek(0, SEEK_SET); /// Return position to the beginning DateTime64 tmp; switch (settings.date_time_input_format) { case FormatSettings::DateTimeInputFormat::Basic: - if (tryReadDateTime64Text(tmp, 9, buf) && buf.eof()) - return true; + if (tryReadDateTime64Text(tmp, 9, buf, DateLUT::instance(), /*allowed_date_delimiters=*/"-/:", /*allowed_time_delimiters=*/":") && buf.eof()) + return std::make_shared(9); break; case FormatSettings::DateTimeInputFormat::BestEffort: - if (tryParseDateTime64BestEffort(tmp, 9, buf, DateLUT::instance(), DateLUT::instance("UTC")) && buf.eof()) - return true; + if (tryParseDateTime64BestEffortStrict(tmp, 9, buf, DateLUT::instance(), DateLUT::instance("UTC"), /*allowed_date_delimiters=*/"-/:") && buf.eof()) + return std::make_shared(9); break; case FormatSettings::DateTimeInputFormat::BestEffortUS: - if (tryParseDateTime64BestEffortUS(tmp, 9, buf, DateLUT::instance(), DateLUT::instance("UTC")) && buf.eof()) - return true; + if (tryParseDateTime64BestEffortUSStrict(tmp, 9, buf, DateLUT::instance(), DateLUT::instance("UTC"), /*allowed_date_delimiters=*/"-/:") && buf.eof()) + return std::make_shared(9); break; } - return false; + return nullptr; } template @@ -1439,8 +1479,11 @@ DataTypePtr tryInferDateOrDateTimeFromString(std::string_view field, const Forma if (settings.try_infer_dates && tryInferDate(field)) return std::make_shared(); - if (settings.try_infer_datetimes && tryInferDateTime(field, settings)) - return std::make_shared(9); + if (settings.try_infer_datetimes) + { + if (auto type = tryInferDateTimeOrDateTime64(field, settings)) + return type; + } return nullptr; } diff --git a/src/IO/ReadHelpers.cpp b/src/IO/ReadHelpers.cpp index 9559462e62b..48d788512e4 100644 --- a/src/IO/ReadHelpers.cpp +++ b/src/IO/ReadHelpers.cpp @@ -1271,7 +1271,7 @@ template void readJSONArrayInto, void>(PaddedPODArray, bool>(PaddedPODArray & s, ReadBuffer & buf); template -ReturnType readDateTextFallback(LocalDate & date, ReadBuffer & buf) +ReturnType readDateTextFallback(LocalDate & date, ReadBuffer & buf, const char * allowed_delimiters) { static constexpr bool throw_exception = std::is_same_v; @@ -1318,6 +1318,9 @@ ReturnType readDateTextFallback(LocalDate & date, ReadBuffer & buf) } else { + if (!isSymbolIn(*buf.position(), allowed_delimiters)) + return error(); + ++buf.position(); if (!append_digit(month)) @@ -1325,7 +1328,11 @@ ReturnType readDateTextFallback(LocalDate & date, ReadBuffer & buf) append_digit(month); if (!buf.eof() && !isNumericASCII(*buf.position())) + { + if (!isSymbolIn(*buf.position(), allowed_delimiters)) + return error(); ++buf.position(); + } else return error(); @@ -1338,12 +1345,12 @@ ReturnType readDateTextFallback(LocalDate & date, ReadBuffer & buf) return ReturnType(true); } -template void readDateTextFallback(LocalDate &, ReadBuffer &); -template bool readDateTextFallback(LocalDate &, ReadBuffer &); +template void readDateTextFallback(LocalDate &, ReadBuffer &, const char * allowed_delimiters); +template bool readDateTextFallback(LocalDate &, ReadBuffer &, const char * allowed_delimiters); template -ReturnType readDateTimeTextFallback(time_t & datetime, ReadBuffer & buf, const DateLUTImpl & date_lut) +ReturnType readDateTimeTextFallback(time_t & datetime, ReadBuffer & buf, const DateLUTImpl & date_lut, const char * allowed_date_delimiters, const char * allowed_time_delimiters) { static constexpr bool throw_exception = std::is_same_v; @@ -1413,6 +1420,9 @@ ReturnType readDateTimeTextFallback(time_t & datetime, ReadBuffer & buf, const D if (!isNumericASCII(s[0]) || !isNumericASCII(s[1]) || !isNumericASCII(s[2]) || !isNumericASCII(s[3]) || !isNumericASCII(s[5]) || !isNumericASCII(s[6]) || !isNumericASCII(s[8]) || !isNumericASCII(s[9])) return false; + + if (!isSymbolIn(s[4], allowed_date_delimiters) || !isSymbolIn(s[7], allowed_date_delimiters)) + return false; } UInt16 year = (s[0] - '0') * 1000 + (s[1] - '0') * 100 + (s[2] - '0') * 10 + (s[3] - '0'); @@ -1443,6 +1453,9 @@ ReturnType readDateTimeTextFallback(time_t & datetime, ReadBuffer & buf, const D if (!isNumericASCII(s[0]) || !isNumericASCII(s[1]) || !isNumericASCII(s[3]) || !isNumericASCII(s[4]) || !isNumericASCII(s[6]) || !isNumericASCII(s[7])) return false; + + if (!isSymbolIn(s[2], allowed_time_delimiters) || !isSymbolIn(s[5], allowed_time_delimiters)) + return false; } hour = (s[0] - '0') * 10 + (s[1] - '0'); @@ -1488,10 +1501,10 @@ ReturnType readDateTimeTextFallback(time_t & datetime, ReadBuffer & buf, const D return ReturnType(true); } -template void readDateTimeTextFallback(time_t &, ReadBuffer &, const DateLUTImpl &); -template void readDateTimeTextFallback(time_t &, ReadBuffer &, const DateLUTImpl &); -template bool readDateTimeTextFallback(time_t &, ReadBuffer &, const DateLUTImpl &); -template bool readDateTimeTextFallback(time_t &, ReadBuffer &, const DateLUTImpl &); +template void readDateTimeTextFallback(time_t &, ReadBuffer &, const DateLUTImpl &, const char *, const char *); +template void readDateTimeTextFallback(time_t &, ReadBuffer &, const DateLUTImpl &, const char *, const char *); +template bool readDateTimeTextFallback(time_t &, ReadBuffer &, const DateLUTImpl &, const char *, const char *); +template bool readDateTimeTextFallback(time_t &, ReadBuffer &, const DateLUTImpl &, const char *, const char *); template diff --git a/src/IO/ReadHelpers.h b/src/IO/ReadHelpers.h index ffba4fafb5c..39e1cb12b5c 100644 --- a/src/IO/ReadHelpers.h +++ b/src/IO/ReadHelpers.h @@ -703,13 +703,28 @@ struct NullOutput }; template -ReturnType readDateTextFallback(LocalDate & date, ReadBuffer & buf); +ReturnType readDateTextFallback(LocalDate & date, ReadBuffer & buf, const char * allowed_delimiters); + +inline bool isSymbolIn(char symbol, const char * symbols) +{ + if (symbols == nullptr) + return true; + + const char * pos = symbols; + while (*pos) + { + if (*pos == symbol) + return true; + ++pos; + } + return false; +} /// In YYYY-MM-DD format. /// For convenience, Month and Day parts can have single digit instead of two digits. /// Any separators other than '-' are supported. template -inline ReturnType readDateTextImpl(LocalDate & date, ReadBuffer & buf) +inline ReturnType readDateTextImpl(LocalDate & date, ReadBuffer & buf, const char * allowed_delimiters = nullptr) { static constexpr bool throw_exception = std::is_same_v; @@ -753,6 +768,9 @@ inline ReturnType readDateTextImpl(LocalDate & date, ReadBuffer & buf) } else { + if (!isSymbolIn(pos[-1], allowed_delimiters)) + return error(); + if (!isNumericASCII(pos[0])) return error(); @@ -768,6 +786,9 @@ inline ReturnType readDateTextImpl(LocalDate & date, ReadBuffer & buf) if (isNumericASCII(pos[-1]) || !isNumericASCII(pos[0])) return error(); + if (!isSymbolIn(pos[-1], allowed_delimiters)) + return error(); + day = pos[0] - '0'; if (isNumericASCII(pos[1])) { @@ -783,7 +804,7 @@ inline ReturnType readDateTextImpl(LocalDate & date, ReadBuffer & buf) return ReturnType(true); } else - return readDateTextFallback(date, buf); + return readDateTextFallback(date, buf, allowed_delimiters); } inline void convertToDayNum(DayNum & date, ExtendedDayNum & from) @@ -797,15 +818,15 @@ inline void convertToDayNum(DayNum & date, ExtendedDayNum & from) } template -inline ReturnType readDateTextImpl(DayNum & date, ReadBuffer & buf, const DateLUTImpl & date_lut) +inline ReturnType readDateTextImpl(DayNum & date, ReadBuffer & buf, const DateLUTImpl & date_lut, const char * allowed_delimiters = nullptr) { static constexpr bool throw_exception = std::is_same_v; LocalDate local_date; if constexpr (throw_exception) - readDateTextImpl(local_date, buf); - else if (!readDateTextImpl(local_date, buf)) + readDateTextImpl(local_date, buf, allowed_delimiters); + else if (!readDateTextImpl(local_date, buf, allowed_delimiters)) return false; ExtendedDayNum ret = date_lut.makeDayNum(local_date.year(), local_date.month(), local_date.day()); @@ -814,15 +835,15 @@ inline ReturnType readDateTextImpl(DayNum & date, ReadBuffer & buf, const DateLU } template -inline ReturnType readDateTextImpl(ExtendedDayNum & date, ReadBuffer & buf, const DateLUTImpl & date_lut) +inline ReturnType readDateTextImpl(ExtendedDayNum & date, ReadBuffer & buf, const DateLUTImpl & date_lut, const char * allowed_delimiters = nullptr) { static constexpr bool throw_exception = std::is_same_v; LocalDate local_date; if constexpr (throw_exception) - readDateTextImpl(local_date, buf); - else if (!readDateTextImpl(local_date, buf)) + readDateTextImpl(local_date, buf, allowed_delimiters); + else if (!readDateTextImpl(local_date, buf, allowed_delimiters)) return false; /// When the parameter is out of rule or out of range, Date32 uses 1925-01-01 as the default value (-DateLUT::instance().getDayNumOffsetEpoch(), -16436) and Date uses 1970-01-01. @@ -846,19 +867,19 @@ inline void readDateText(ExtendedDayNum & date, ReadBuffer & buf, const DateLUTI readDateTextImpl(date, buf, date_lut); } -inline bool tryReadDateText(LocalDate & date, ReadBuffer & buf) +inline bool tryReadDateText(LocalDate & date, ReadBuffer & buf, const char * allowed_delimiters = nullptr) { - return readDateTextImpl(date, buf); + return readDateTextImpl(date, buf, allowed_delimiters); } -inline bool tryReadDateText(DayNum & date, ReadBuffer & buf, const DateLUTImpl & time_zone = DateLUT::instance()) +inline bool tryReadDateText(DayNum & date, ReadBuffer & buf, const DateLUTImpl & time_zone = DateLUT::instance(), const char * allowed_delimiters = nullptr) { - return readDateTextImpl(date, buf, time_zone); + return readDateTextImpl(date, buf, time_zone, allowed_delimiters); } -inline bool tryReadDateText(ExtendedDayNum & date, ReadBuffer & buf, const DateLUTImpl & time_zone = DateLUT::instance()) +inline bool tryReadDateText(ExtendedDayNum & date, ReadBuffer & buf, const DateLUTImpl & time_zone = DateLUT::instance(), const char * allowed_delimiters = nullptr) { - return readDateTextImpl(date, buf, time_zone); + return readDateTextImpl(date, buf, time_zone, allowed_delimiters); } UUID parseUUID(std::span src); @@ -975,13 +996,13 @@ inline T parseFromString(std::string_view str) template -ReturnType readDateTimeTextFallback(time_t & datetime, ReadBuffer & buf, const DateLUTImpl & date_lut); +ReturnType readDateTimeTextFallback(time_t & datetime, ReadBuffer & buf, const DateLUTImpl & date_lut, const char * allowed_date_delimiters = nullptr, const char * allowed_time_delimiters = nullptr); /** In YYYY-MM-DD hh:mm:ss or YYYY-MM-DD format, according to specified time zone. * As an exception, also supported parsing of unix timestamp in form of decimal number. */ template -inline ReturnType readDateTimeTextImpl(time_t & datetime, ReadBuffer & buf, const DateLUTImpl & date_lut) +inline ReturnType readDateTimeTextImpl(time_t & datetime, ReadBuffer & buf, const DateLUTImpl & date_lut, const char * allowed_date_delimiters = nullptr, const char * allowed_time_delimiters = nullptr) { static constexpr bool throw_exception = std::is_same_v; @@ -1014,6 +1035,9 @@ inline ReturnType readDateTimeTextImpl(time_t & datetime, ReadBuffer & buf, cons if (!isNumericASCII(s[0]) || !isNumericASCII(s[1]) || !isNumericASCII(s[2]) || !isNumericASCII(s[3]) || !isNumericASCII(s[5]) || !isNumericASCII(s[6]) || !isNumericASCII(s[8]) || !isNumericASCII(s[9])) return ReturnType(false); + + if (!isSymbolIn(s[4], allowed_date_delimiters) || !isSymbolIn(s[7], allowed_date_delimiters)) + return ReturnType(false); } UInt16 year = (s[0] - '0') * 1000 + (s[1] - '0') * 100 + (s[2] - '0') * 10 + (s[3] - '0'); @@ -1033,6 +1057,9 @@ inline ReturnType readDateTimeTextImpl(time_t & datetime, ReadBuffer & buf, cons if (!isNumericASCII(s[11]) || !isNumericASCII(s[12]) || !isNumericASCII(s[14]) || !isNumericASCII(s[15]) || !isNumericASCII(s[17]) || !isNumericASCII(s[18])) return ReturnType(false); + + if (!isSymbolIn(s[13], allowed_time_delimiters) || !isSymbolIn(s[16], allowed_time_delimiters)) + return ReturnType(false); } hour = (s[11] - '0') * 10 + (s[12] - '0'); @@ -1057,11 +1084,11 @@ inline ReturnType readDateTimeTextImpl(time_t & datetime, ReadBuffer & buf, cons return readIntTextImpl(datetime, buf); } else - return readDateTimeTextFallback(datetime, buf, date_lut); + return readDateTimeTextFallback(datetime, buf, date_lut, allowed_date_delimiters, allowed_time_delimiters); } template -inline ReturnType readDateTimeTextImpl(DateTime64 & datetime64, UInt32 scale, ReadBuffer & buf, const DateLUTImpl & date_lut) +inline ReturnType readDateTimeTextImpl(DateTime64 & datetime64, UInt32 scale, ReadBuffer & buf, const DateLUTImpl & date_lut, const char * allowed_date_delimiters = nullptr, const char * allowed_time_delimiters = nullptr) { static constexpr bool throw_exception = std::is_same_v; @@ -1075,7 +1102,7 @@ inline ReturnType readDateTimeTextImpl(DateTime64 & datetime64, UInt32 scale, Re { try { - readDateTimeTextImpl(whole, buf, date_lut); + readDateTimeTextImpl(whole, buf, date_lut, allowed_date_delimiters, allowed_time_delimiters); } catch (const DB::Exception &) { @@ -1085,7 +1112,7 @@ inline ReturnType readDateTimeTextImpl(DateTime64 & datetime64, UInt32 scale, Re } else { - auto ok = readDateTimeTextImpl(whole, buf, date_lut); + auto ok = readDateTimeTextImpl(whole, buf, date_lut, allowed_date_delimiters, allowed_time_delimiters); if (!ok && (buf.eof() || *buf.position() != '.')) return ReturnType(false); } @@ -1168,14 +1195,14 @@ inline void readDateTime64Text(DateTime64 & datetime64, UInt32 scale, ReadBuffer readDateTimeTextImpl(datetime64, scale, buf, date_lut); } -inline bool tryReadDateTimeText(time_t & datetime, ReadBuffer & buf, const DateLUTImpl & time_zone = DateLUT::instance()) +inline bool tryReadDateTimeText(time_t & datetime, ReadBuffer & buf, const DateLUTImpl & time_zone = DateLUT::instance(), const char * allowed_date_delimiters = nullptr, const char * allowed_time_delimiters = nullptr) { - return readDateTimeTextImpl(datetime, buf, time_zone); + return readDateTimeTextImpl(datetime, buf, time_zone, allowed_date_delimiters, allowed_time_delimiters); } -inline bool tryReadDateTime64Text(DateTime64 & datetime64, UInt32 scale, ReadBuffer & buf, const DateLUTImpl & date_lut = DateLUT::instance()) +inline bool tryReadDateTime64Text(DateTime64 & datetime64, UInt32 scale, ReadBuffer & buf, const DateLUTImpl & date_lut = DateLUT::instance(), const char * allowed_date_delimiters = nullptr, const char * allowed_time_delimiters = nullptr) { - return readDateTimeTextImpl(datetime64, scale, buf, date_lut); + return readDateTimeTextImpl(datetime64, scale, buf, date_lut, allowed_date_delimiters, allowed_time_delimiters); } inline void readDateTimeText(LocalDateTime & datetime, ReadBuffer & buf) diff --git a/src/IO/parseDateTimeBestEffort.cpp b/src/IO/parseDateTimeBestEffort.cpp index e046e837689..68122a37df6 100644 --- a/src/IO/parseDateTimeBestEffort.cpp +++ b/src/IO/parseDateTimeBestEffort.cpp @@ -82,13 +82,14 @@ struct DateTimeSubsecondPart UInt8 digits; }; -template +template ReturnType parseDateTimeBestEffortImpl( time_t & res, ReadBuffer & in, const DateLUTImpl & local_time_zone, const DateLUTImpl & utc_time_zone, - DateTimeSubsecondPart * fractional) + DateTimeSubsecondPart * fractional, + const char * allowed_date_delimiters = nullptr) { auto on_error = [&](int error_code [[maybe_unused]], FormatStringHelper fmt_string [[maybe_unused]], @@ -170,22 +171,36 @@ ReturnType parseDateTimeBestEffortImpl( fractional->digits = 3; readDecimalNumber<3>(fractional->value, digits + 10); } + else if constexpr (strict) + { + /// Fractional part is not allowed. + return on_error(ErrorCodes::CANNOT_PARSE_DATETIME, "Cannot read DateTime: unexpected fractional part"); + } return ReturnType(true); } else if (num_digits == 10 && !year && !has_time) { + if (strict && month) + return on_error(ErrorCodes::CANNOT_PARSE_DATETIME, "Cannot read DateTime: month component is duplicated"); + /// This is unix timestamp. readDecimalNumber<10>(res, digits); return ReturnType(true); } else if (num_digits == 9 && !year && !has_time) { + if (strict && month) + return on_error(ErrorCodes::CANNOT_PARSE_DATETIME, "Cannot read DateTime: month component is duplicated"); + /// This is unix timestamp. readDecimalNumber<9>(res, digits); return ReturnType(true); } else if (num_digits == 14 && !year && !has_time) { + if (strict && month) + return on_error(ErrorCodes::CANNOT_PARSE_DATETIME, "Cannot read DateTime: month component is duplicated"); + /// This is YYYYMMDDhhmmss readDecimalNumber<4>(year, digits); readDecimalNumber<2>(month, digits + 4); @@ -197,6 +212,9 @@ ReturnType parseDateTimeBestEffortImpl( } else if (num_digits == 8 && !year) { + if (strict && month) + return on_error(ErrorCodes::CANNOT_PARSE_DATETIME, "Cannot read DateTime: month component is duplicated"); + /// This is YYYYMMDD readDecimalNumber<4>(year, digits); readDecimalNumber<2>(month, digits + 4); @@ -272,6 +290,9 @@ ReturnType parseDateTimeBestEffortImpl( else return on_error(ErrorCodes::CANNOT_PARSE_DATETIME, "Cannot read DateTime: unexpected number of decimal digits after year and month: {}", num_digits); } + + if (!isSymbolIn(delimiter_after_year, allowed_date_delimiters)) + return on_error(ErrorCodes::CANNOT_PARSE_DATETIME, "Cannot read DateTime: '{}' delimiter between date parts is not allowed", delimiter_after_year); } } else if (num_digits == 2 || num_digits == 1) @@ -403,9 +424,16 @@ ReturnType parseDateTimeBestEffortImpl( else { if (day_of_month) + { + if (strict && hour) + return on_error(ErrorCodes::CANNOT_PARSE_DATETIME, "Cannot read DateTime: hour component is duplicated"); + hour = hour_or_day_of_month_or_month; + } else + { day_of_month = hour_or_day_of_month_or_month; + } } } else if (num_digits != 0) @@ -446,6 +474,11 @@ ReturnType parseDateTimeBestEffortImpl( fractional->digits = num_digits; readDecimalNumber(fractional->value, num_digits, digits); } + else if (strict) + { + /// Fractional part is not allowed. + return on_error(ErrorCodes::CANNOT_PARSE_DATETIME, "Cannot read DateTime: unexpected fractional part"); + } } else if (c == '+' || c == '-') { @@ -582,12 +615,24 @@ ReturnType parseDateTimeBestEffortImpl( return on_error(ErrorCodes::CANNOT_PARSE_DATETIME, "Cannot read DateTime: neither Date nor Time was parsed successfully"); if (!day_of_month) + { + if constexpr (strict) + return on_error(ErrorCodes::CANNOT_PARSE_DATETIME, "Cannot read DateTime: day of month is required"); day_of_month = 1; + } + if (!month) + { + if constexpr (strict) + return on_error(ErrorCodes::CANNOT_PARSE_DATETIME, "Cannot read DateTime: month is required"); month = 1; + } if (!year) { + if constexpr (strict) + return on_error(ErrorCodes::CANNOT_PARSE_DATETIME, "Cannot read DateTime: year is required"); + /// If year is not specified, it will be the current year if the date is unknown or not greater than today, /// otherwise it will be the previous year. /// This convoluted logic is needed to parse the syslog format, which looks as follows: "Mar 3 01:33:48". @@ -654,20 +699,20 @@ ReturnType parseDateTimeBestEffortImpl( return ReturnType(true); } -template -ReturnType parseDateTime64BestEffortImpl(DateTime64 & res, UInt32 scale, ReadBuffer & in, const DateLUTImpl & local_time_zone, const DateLUTImpl & utc_time_zone) +template +ReturnType parseDateTime64BestEffortImpl(DateTime64 & res, UInt32 scale, ReadBuffer & in, const DateLUTImpl & local_time_zone, const DateLUTImpl & utc_time_zone, const char * allowed_date_delimiters = nullptr) { time_t whole; DateTimeSubsecondPart subsecond = {0, 0}; // needs to be explicitly initialized sine it could be missing from input string if constexpr (std::is_same_v) { - if (!parseDateTimeBestEffortImpl(whole, in, local_time_zone, utc_time_zone, &subsecond)) + if (!parseDateTimeBestEffortImpl(whole, in, local_time_zone, utc_time_zone, &subsecond, allowed_date_delimiters)) return false; } else { - parseDateTimeBestEffortImpl(whole, in, local_time_zone, utc_time_zone, &subsecond); + parseDateTimeBestEffortImpl(whole, in, local_time_zone, utc_time_zone, &subsecond, allowed_date_delimiters); } @@ -730,4 +775,24 @@ bool tryParseDateTime64BestEffortUS(DateTime64 & res, UInt32 scale, ReadBuffer & return parseDateTime64BestEffortImpl(res, scale, in, local_time_zone, utc_time_zone); } +bool tryParseDateTimeBestEffortStrict(time_t & res, ReadBuffer & in, const DateLUTImpl & local_time_zone, const DateLUTImpl & utc_time_zone, const char * allowed_date_delimiters) +{ + return parseDateTimeBestEffortImpl(res, in, local_time_zone, utc_time_zone, nullptr, allowed_date_delimiters); +} + +bool tryParseDateTimeBestEffortUSStrict(time_t & res, ReadBuffer & in, const DateLUTImpl & local_time_zone, const DateLUTImpl & utc_time_zone, const char * allowed_date_delimiters) +{ + return parseDateTimeBestEffortImpl(res, in, local_time_zone, utc_time_zone, nullptr, allowed_date_delimiters); +} + +bool tryParseDateTime64BestEffortStrict(DateTime64 & res, UInt32 scale, ReadBuffer & in, const DateLUTImpl & local_time_zone, const DateLUTImpl & utc_time_zone, const char * allowed_date_delimiters) +{ + return parseDateTime64BestEffortImpl(res, scale, in, local_time_zone, utc_time_zone, allowed_date_delimiters); +} + +bool tryParseDateTime64BestEffortUSStrict(DateTime64 & res, UInt32 scale, ReadBuffer & in, const DateLUTImpl & local_time_zone, const DateLUTImpl & utc_time_zone, const char * allowed_date_delimiters) +{ + return parseDateTime64BestEffortImpl(res, scale, in, local_time_zone, utc_time_zone, allowed_date_delimiters); +} + } diff --git a/src/IO/parseDateTimeBestEffort.h b/src/IO/parseDateTimeBestEffort.h index 22af44f9e76..6dd052b67a3 100644 --- a/src/IO/parseDateTimeBestEffort.h +++ b/src/IO/parseDateTimeBestEffort.h @@ -63,4 +63,12 @@ void parseDateTime64BestEffort(DateTime64 & res, UInt32 scale, ReadBuffer & in, bool tryParseDateTime64BestEffort(DateTime64 & res, UInt32 scale, ReadBuffer & in, const DateLUTImpl & local_time_zone, const DateLUTImpl & utc_time_zone); void parseDateTime64BestEffortUS(DateTime64 & res, UInt32 scale, ReadBuffer & in, const DateLUTImpl & local_time_zone, const DateLUTImpl & utc_time_zone); bool tryParseDateTime64BestEffortUS(DateTime64 & res, UInt32 scale, ReadBuffer & in, const DateLUTImpl & local_time_zone, const DateLUTImpl & utc_time_zone); + +/// More strict version of best effort parsing. Requires day, month and year to be present, checks for allowed +/// delimiters between date components, makes additional correctness checks. Used in schema inference if date times. +bool tryParseDateTimeBestEffortStrict(time_t & res, ReadBuffer & in, const DateLUTImpl & local_time_zone, const DateLUTImpl & utc_time_zone, const char * allowed_date_delimiters); +bool tryParseDateTimeBestEffortUSStrict(time_t & res, ReadBuffer & in, const DateLUTImpl & local_time_zone, const DateLUTImpl & utc_time_zone, const char * allowed_date_delimiters); +bool tryParseDateTime64BestEffortStrict(DateTime64 & res, UInt32 scale, ReadBuffer & in, const DateLUTImpl & local_time_zone, const DateLUTImpl & utc_time_zone, const char * allowed_date_delimiters); +bool tryParseDateTime64BestEffortUSStrict(DateTime64 & res, UInt32 scale, ReadBuffer & in, const DateLUTImpl & local_time_zone, const DateLUTImpl & utc_time_zone, const char * allowed_date_delimiters); + } diff --git a/tests/queries/0_stateless/03222_date_time_inference.reference b/tests/queries/0_stateless/03222_date_time_inference.reference new file mode 100644 index 00000000000..3288308a1d0 --- /dev/null +++ b/tests/queries/0_stateless/03222_date_time_inference.reference @@ -0,0 +1,253 @@ +Date +2020-01-01 Date +2020-01-01 Date +2020-01-01 Date +2020-01-01 Date +2020-01-01 Date +2020-01-01 Date +2020-01-01 Date +2020-01-01 Date +2020-01-01 Date +2020-01-01 Date +2020-01-01 Date +2020-01-01 Date +String +2020_01_01 String +2020_1_01 String +2020_01_1 String +2020_1_1 String +2020a01a01 String +2020a1a01 String +2020a01a1 String +2020a1a1 String +20200101 String +DateTime +2020-01-02 18:42:42 DateTime +2020-01-02 18:42:42 DateTime +2020-01-02 18:42:42 DateTime +String +2020_01_01 42:42:42 String +2020a01a01 42:42:42 String +2020-01-01 42.42.42 String +2020-01-01 42 42 42 String +2020-01-01 42a42a42 String +DateTime64 +2020-01-02 18:42:42.424200000 DateTime64(9) +2020-01-02 18:42:42.424200000 DateTime64(9) +2020-01-02 18:42:42.424200000 DateTime64(9) +String +2020_01_01 42:42:42.4242 String +2020a01a01 42:42:42.4242 String +2020-01-01 42.42.42.4242 String +2020-01-01 42 42 42.4242 String +2020-01-01 42a42a42.4242 String +DateTime/DateTime64 best effort +2000-01-01 00:00:00 DateTime +2000-01-01 01:00:00 DateTime +2000-01-01 01:00:00.000000000 DateTime64(9) +2017-01-01 22:02:03 DateTime +2017-01-01 22:02:03.000000000 DateTime64(9) +2017-01-01 21:02:03 DateTime +2017-01-01 21:02:03.000000000 DateTime64(9) +2017-01-01 22:02:03 DateTime +2017-01-01 22:02:03.000000000 DateTime64(9) +2017-01-02 01:02:03 DateTime +2017-01-02 01:02:03.000000000 DateTime64(9) +1970-01-02 01:02:03 DateTime +1970-01-02 01:02:03.000000000 DateTime64(9) +1970-01-02 01:02:03 DateTime +1970-01-02 01:02:03.000000000 DateTime64(9) +2018-02-11 03:40:50 DateTime +2018-02-11 03:40:50.000000000 DateTime64(9) +2000-04-17 01:02:03 DateTime +2000-04-17 01:02:03.000000000 DateTime64(9) +1970-01-02 01:00:00 DateTime +1970-01-02 01:00:00.000000000 DateTime64(9) +1970-01-02 01:02:03 DateTime +1970-01-02 01:02:03.000000000 DateTime64(9) +1970-01-02 01:02:03 DateTime +1970-01-02 01:02:03.000000000 DateTime64(9) +2015-12-31 20:00:00 DateTime +2015-12-31 20:00:00 DateTime +2016-01-01 00:00:00 DateTime +2016-01-01 00:00:00 DateTime +2017-01-01 22:02:03 DateTime +2017-01-01 22:02:03.000000000 DateTime64(9) +2017-01-02 03:04:05 DateTime +2017-01-02 03:04:05.000000000 DateTime64(9) +2017-01-02 03:04:05 DateTime +2017-01-02 03:04:05.000000000 DateTime64(9) +2017-01-02 03:04:05 DateTime +2017-01-02 03:04:05.000000000 DateTime64(9) +2017-01-02 03:04:05 DateTime +2017-01-02 03:04:05.000000000 DateTime64(9) +2017-01-02 04:04:05 DateTime +2017-01-02 04:04:05.000000000 DateTime64(9) +2017-01-02 02:34:05 DateTime +2017-01-02 02:34:05.000000000 DateTime64(9) +2017-01-02 00:04:05 DateTime +2017-01-02 00:04:05.000000000 DateTime64(9) +2017-01-02 02:04:05 DateTime +2017-01-02 02:04:05.000000000 DateTime64(9) +2017-01-02 00:04:05 DateTime +2017-01-02 00:04:05.000000000 DateTime64(9) +2017-01-01 18:04:05 DateTime +2017-01-01 18:04:05.000000000 DateTime64(9) +2017-01-02 03:04:05 DateTime +2017-01-02 03:04:05.000000000 DateTime64(9) +2017-01-01 23:04:05 DateTime +2017-01-01 23:04:05.000000000 DateTime64(9) +2017-02-01 23:04:05 DateTime +2017-02-01 23:04:05.000000000 DateTime64(9) +2017-06-01 23:04:05 DateTime +2017-06-01 23:04:05.000000000 DateTime64(9) +2017-01-02 00:04:05 DateTime +2017-01-02 00:04:05.000000000 DateTime64(9) +2017-01-02 03:04:05 DateTime +2017-01-02 03:04:05.000000000 DateTime64(9) +2017-01-02 03:04:05 DateTime +2017-01-02 03:04:05.000000000 DateTime64(9) +2017-01-02 04:04:05 DateTime +2017-01-02 04:04:05.000000000 DateTime64(9) +2017-01-02 04:04:05 DateTime +2017-01-02 04:04:05.000000000 DateTime64(9) +2017-01-02 02:04:05 DateTime +2017-01-02 02:04:05.000000000 DateTime64(9) +2017-01-02 03:04:05 DateTime +2017-01-02 03:04:05.000000000 DateTime64(9) +2017-04-01 11:22:33 DateTime +2017-04-01 11:22:33.000000000 DateTime64(9) +2017-04-01 22:02:03 DateTime +2017-04-01 22:02:03.000000000 DateTime64(9) +2017-04-01 22:02:03 DateTime +2017-04-01 22:02:03.000000000 DateTime64(9) +2017-04-02 01:02:03 DateTime +2017-04-02 01:02:03.000000000 DateTime64(9) +2017-04-02 11:22:33 DateTime +2017-04-02 11:22:33.000000000 DateTime64(9) +2017-04-02 01:02:03 DateTime +2017-04-02 01:02:03.000000000 DateTime64(9) +2017-04-02 01:22:33 DateTime +2017-04-02 01:22:33.000000000 DateTime64(9) +2017-04-02 01:02:03 DateTime +2017-04-02 01:02:03.000000000 DateTime64(9) +2017-04-02 01:02:33 DateTime +2017-04-02 01:02:33.000000000 DateTime64(9) +2017-04-01 22:02:03 DateTime +2017-04-01 22:02:03.000000000 DateTime64(9) +2017-04-02 01:02:03 DateTime +2017-04-02 01:02:03.000000000 DateTime64(9) +2017-04-01 22:02:03 DateTime +2017-04-01 22:02:03.000000000 DateTime64(9) +2017-04-01 21:02:03 DateTime +2017-04-01 21:02:03.000000000 DateTime64(9) +2017-04-02 01:02:03 DateTime +2017-04-02 01:02:03.000000000 DateTime64(9) +2017-01-01 22:02:03 DateTime +2017-01-01 22:02:03.000000000 DateTime64(9) +2017-04-25 01:02:03 DateTime +2017-04-25 01:02:03.000000000 DateTime64(9) +2017-04-25 01:02:03 DateTime +2017-04-25 01:02:03.000000000 DateTime64(9) +2017-01-25 01:02:03 DateTime +2017-01-25 01:02:03.000000000 DateTime64(9) +2017-01-24 22:02:03 DateTime +2017-01-24 22:02:03.000000000 DateTime64(9) +2017-01-25 13:02:03 DateTime +2017-01-25 13:02:03.000000000 DateTime64(9) +2017-01-25 01:02:03 DateTime +2017-01-25 01:02:03.000000000 DateTime64(9) +2017-01-25 01:02:03 DateTime +2017-01-25 01:02:03.000000000 DateTime64(9) +2017-01-24 22:02:03 DateTime +2017-01-24 22:02:03.000000000 DateTime64(9) +2017-01-24 22:02:03 DateTime +2017-01-24 22:02:03.000000000 DateTime64(9) +2017-01-25 10:02:03 DateTime +2017-01-25 10:02:03.000000000 DateTime64(9) +2017-01-25 10:02:03 DateTime +2017-01-25 10:02:03.000000000 DateTime64(9) +2017-01-25 10:02:03 DateTime +2017-01-25 10:02:03.000000000 DateTime64(9) +2017-01-25 09:32:03 DateTime +2017-01-25 09:32:03.000000000 DateTime64(9) +2017-01-25 01:02:03 DateTime +2017-01-25 01:02:03.000000000 DateTime64(9) +2017-01-25 13:02:03 DateTime +2017-01-25 13:02:03.000000000 DateTime64(9) +2017-01-25 13:02:03 DateTime +2017-01-25 13:02:03.000000000 DateTime64(9) +2017-01-25 10:02:03 DateTime +2017-01-25 10:02:03.000000000 DateTime64(9) +2018-02-11 03:40:50 DateTime +2018-02-11 03:40:50.000000000 DateTime64(9) +2018-02-11 03:40:50 DateTime +2018-02-11 03:40:50.000000000 DateTime64(9) +String +2 String +20 String +200 String +2000 String +20000 String +200001 String +2000010 String +20000101 String +200001010 String +2000010101 String +20000101010 String +200001010101 String +2000010101010 String +20000101010101 String +2.1 String +20.1 String +200.1 String +2000.1 String +20000.1 String +200001.1 String +2000010.1 String +20000101.1 String +200001010.1 String +2000010101.1 String +20000101010.1 String +200001010101.1 String +2000010101010.1 String +20000101010101.1 String +Mar String +Mar1 String +Mar 1 String +Mar01 String +Mar 01 String +Mar2020 String +Mar 2020 String +Mar012020 String +Mar 012020 String +Mar01012020 String +Mar 01012020 String +Mar0101202001 String +Mar 0101202001 String +Mar010120200101 String +Mar 010120200101 String +Mar01012020010101 String +Mar 01012020010101 String +Mar01012020010101.000 String +Mar 0101202001010101.000 String +2000 01 01 01:00:00 String +2000 01 01 01:00:00.000 String +2000a01a01 01:00:00 String +2000a01a01 01:00:00.000 String +2000-01-01 01 00 00 String +2000-01-01 01 00 00.000 String +2000-01-01 01-00-00 String +2000-01-01 01-00-00.000 String +2000-01-01 01a00a00 String +2000-01-01 01a00a00.000 String +2000-01 01:00:00 String +2000-01 01:00:00.000 String +2000 01 String +2000-01 String +Mar 2000 00:00:00 String +Mar 2000 00:00:00.000 String +2000 00:00:00 String +2000 00:00:00.000 String +Mar 2000-01-01 00:00:00 String +Mar 2000-01-01 00:00:00.000 String diff --git a/tests/queries/0_stateless/03222_date_time_inference.sql b/tests/queries/0_stateless/03222_date_time_inference.sql new file mode 100644 index 00000000000..01266a88d55 --- /dev/null +++ b/tests/queries/0_stateless/03222_date_time_inference.sql @@ -0,0 +1,268 @@ +set input_format_try_infer_datetimes = 1; +set input_format_try_infer_dates = 1; +set schema_inference_make_columns_nullable = 0; +set input_format_json_try_infer_numbers_from_strings = 0; + +select 'Date'; +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "2020:01:01"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "2020:1:01"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "2020:01:1"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "2020:1:1"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "2020-01-01"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "2020-1-01"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "2020-01-1"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "2020-1-1"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "2020/01/01"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "2020/1/01"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "2020/01/1"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "2020/1/1"}'); + +select 'String'; +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "2020_01_01"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "2020_1_01"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "2020_01_1"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "2020_1_1"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "2020a01a01"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "2020a1a01"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "2020a01a1"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "2020a1a1"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "20200101"}'); + +select 'DateTime'; +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "2020:01:01 42:42:42"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "2020/01/01 42:42:42"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "2020-01-01 42:42:42"}'); + +select 'String'; +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "2020_01_01 42:42:42"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "2020a01a01 42:42:42"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "2020-01-01 42.42.42"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "2020-01-01 42 42 42"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "2020-01-01 42a42a42"}'); + +select 'DateTime64'; +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "2020:01:01 42:42:42.4242"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "2020/01/01 42:42:42.4242"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "2020-01-01 42:42:42.4242"}'); + +select 'String'; +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "2020_01_01 42:42:42.4242"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "2020a01a01 42:42:42.4242"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "2020-01-01 42.42.42.4242"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "2020-01-01 42 42 42.4242"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "2020-01-01 42a42a42.4242"}'); + +set date_time_input_format='best_effort'; +select 'DateTime/DateTime64 best effort'; +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "2000-01-01 00:00:00"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "2000-01-01 01:00:00"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "2000-01-01 01:00:00.000"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "02/01/17 010203 MSK"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "02/01/17 010203.000 MSK"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "02/01/17 010203 MSK+0100"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "02/01/17 010203.000 MSK+0100"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "02/01/17 010203 UTC+0300"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "02/01/17 010203.000 UTC+0300"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "02/01/17 010203Z"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "02/01/17 010203.000Z"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "02/01/1970 010203Z"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "02/01/1970 010203.000Z"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "02/01/70 010203Z"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "02/01/70 010203.000Z"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "11 Feb 2018 06:40:50 +0300"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "11 Feb 2018 06:40:50.000 +0300"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "17 Apr 2000 2 1:2:3"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "17 Apr 2000 2 1:2:3.000"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "19700102 01:00:00"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "19700102 01:00:00.000"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "19700102010203Z"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "19700102010203Z.000"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "1970/01/02 010203Z"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "1970/01/02 010203.000Z"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "2016-01-01MSD"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "2016-01-01 MSD"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "2016-01-01UTC"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "2016-01-01Z"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "201701 02 010203 UTC+0300"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "201701 02 010203.000 UTC+0300"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "2017-01-02 03:04:05"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "2017-01-02 03:04:05.000"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "2017-01-02 03:04:05+0"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "2017-01-02 03:04:05.000+0"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "2017-01-02 03:04:05+00"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "2017-01-02 03:04:05.000+00"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "2017-01-02 03:04:05+0000"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "2017-01-02 03:04:05.000+0000"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "2017-01-02 03:04:05 -0100"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "2017-01-02 03:04:05.000 -0100"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "2017-01-02 03:04:05+030"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "2017-01-02 03:04:05.000+030"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "2017-01-02 03:04:05+0300"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "2017-01-02 03:04:05.000+0300"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "2017-01-02 03:04:05+1"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "2017-01-02 03:04:05.000+1"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "2017-01-02 03:04:05+300"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "2017-01-02 03:04:05.000+300"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "2017-01-02 03:04:05+900"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "2017-01-02 03:04:05.000+900"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "2017-01-02 03:04:05GMT"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "2017-01-02 03:04:05.000GMT"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "2017-01-02 03:04:05 MSD"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "2017-01-02 03:04:05.000 MSD"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "2017-01-02 03:04:05 MSD Feb"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "2017-01-02 03:04:05.000 MSD Feb"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "2017-01-02 03:04:05 MSD Jun"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "2017-01-02 03:04:05.000 MSD Jun"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "2017-01-02 03:04:05 MSK"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "2017-01-02 03:04:05.000 MSK"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "2017-01-02T03:04:05"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "2017-01-02T03:04:05.000"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "2017-01-02T03:04:05+00"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "2017-01-02T03:04:05.000+00"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "2017-01-02T03:04:05 -0100"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "2017-01-02T03:04:05.000 -0100"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "2017-01-02T03:04:05-0100"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "2017-01-02T03:04:05.000-0100"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "2017-01-02T03:04:05+0100"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "2017-01-02T03:04:05.000+0100"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "2017-01-02T03:04:05Z"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "2017-01-02T03:04:05.000Z"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "2017 Apr 01 11:22:33"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "2017 Apr 01 11:22:33.000"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "2017 Apr 02 010203 UTC+0300"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "2017 Apr 02 010203.000 UTC+0300"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "2017 Apr 02 01:2:3 UTC+0300"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "2017 Apr 02 01:2:3.000 UTC+0300"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "2017 Apr 02 1:02:3"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "2017 Apr 02 1:02:3.000"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "2017 Apr 02 11:22:33"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "2017 Apr 02 11:22:33.000"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "2017 Apr 02 1:2:03"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "2017 Apr 02 1:2:03.000"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "2017 Apr 02 1:22:33"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "2017 Apr 02 1:22:33.000"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "2017 Apr 02 1:2:3"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "2017 Apr 02 1:2:3.000"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "2017 Apr 02 1:2:33"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "2017 Apr 02 1:2:33.000"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "2017 Apr 02 1:2:3 MSK"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "2017 Apr 02 1:2:3.000 MSK"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "2017 Apr 02 1:2:3 UTC+0000"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "2017 Apr 02 1:2:3.000 UTC+0000"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "2017 Apr 02 1:2:3 UTC+0300"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "2017 Apr 02 1:2:3.000 UTC+0300"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "2017 Apr 02 1:2:3 UTC+0400"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "2017 Apr 02 1:2:3.000 UTC+0400"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "2017 Apr 2 1:2:3"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "2017 Apr 2 1:2:3.000"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "2017 Jan 02 010203 UTC+0300"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "2017 Jan 02 010203.000 UTC+0300"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "25 Apr 2017 01:02:03"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "25 Apr 2017 01:02:03.000"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "25 Apr 2017 1:2:3"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "25 Apr 2017 1:2:3.000"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "25 Jan 2017 1:2:3"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "25 Jan 2017 1:2:3.000"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "25 Jan 2017 1:2:3 MSK"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "25 Jan 2017 1:2:3.000 MSK"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "25 Jan 2017 1:2:3 PM"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "25 Jan 2017 1:2:3.000 PM"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "25 Jan 2017 1:2:3Z"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "25 Jan 2017 1:2:3.000Z"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "25 Jan 2017 1:2:3 Z"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "25 Jan 2017 1:2:3.000 Z"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "25 Jan 2017 1:2:3 Z +0300"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "25 Jan 2017 1:2:3.000 Z +0300"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "25 Jan 2017 1:2:3 Z+03:00"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "25 Jan 2017 1:2:3.000 Z+03:00"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "25 Jan 2017 1:2:3 Z +03:00 PM"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "25 Jan 2017 1:2:3.000 Z +03:00 PM"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "25 Jan 2017 1:2:3 Z +0300 PM"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "25 Jan 2017 1:2:3.000 Z +0300 PM"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "25 Jan 2017 1:2:3 Z+03:00 PM"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "25 Jan 2017 1:2:3.000 Z+03:00 PM"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "25 Jan 2017 1:2:3 Z +03:30 PM"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "25 Jan 2017 1:2:3.000 Z +03:30 PM"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "25 Jan 2017 1:2:3Z Mon"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "25 Jan 2017 1:2:3.000Z Mon"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "25 Jan 2017 1:2:3 Z PM"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "25 Jan 2017 1:2:3.000 Z PM"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "25 Jan 2017 1:2:3Z PM"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "25 Jan 2017 1:2:3.000Z PM"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "25 Jan 2017 1:2:3 Z PM +03:00"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "25 Jan 2017 1:2:3.000 Z PM +03:00"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "Sun 11 Feb 2018 06:40:50 +0300"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "Sun 11 Feb 2018 06:40:50.000 +0300"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "Sun, 11 Feb 2018 06:40:50 +0300"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "Sun, 11 Feb 2018 06:40:50.000 +0300"}'); + +select 'String'; +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "2"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "20"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "200"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "2000"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "20000"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "200001"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "2000010"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "20000101"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "200001010"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "2000010101"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "20000101010"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "200001010101"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "2000010101010"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "20000101010101"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "2.1"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "20.1"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "200.1"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "2000.1"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "20000.1"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "200001.1"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "2000010.1"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "20000101.1"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "200001010.1"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "2000010101.1"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "20000101010.1"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "200001010101.1"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "2000010101010.1"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "20000101010101.1"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "Mar"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "Mar1"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "Mar 1"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "Mar01"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "Mar 01"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "Mar2020"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "Mar 2020"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "Mar012020"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "Mar 012020"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "Mar01012020"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "Mar 01012020"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "Mar0101202001"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "Mar 0101202001"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "Mar010120200101"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "Mar 010120200101"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "Mar01012020010101"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "Mar 01012020010101"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "Mar01012020010101.000"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "Mar 0101202001010101.000"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "2000 01 01 01:00:00"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "2000 01 01 01:00:00.000"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "2000a01a01 01:00:00"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "2000a01a01 01:00:00.000"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "2000-01-01 01 00 00"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "2000-01-01 01 00 00.000"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "2000-01-01 01-00-00"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "2000-01-01 01-00-00.000"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "2000-01-01 01a00a00"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "2000-01-01 01a00a00.000"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "2000-01 01:00:00"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "2000-01 01:00:00.000"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "2000 01"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "2000-01"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "Mar 2000 00:00:00"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "Mar 2000 00:00:00.000"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "2000 00:00:00"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "2000 00:00:00.000"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "Mar 2000-01-01 00:00:00"}'); +select x, toTypeName(x) from format(JSONEachRow, '{"x" : "Mar 2000-01-01 00:00:00.000"}'); + + From 9f6e472b0cf03f8018b149ad5f7541b4ddec5735 Mon Sep 17 00:00:00 2001 From: Han Fei Date: Thu, 15 Aug 2024 11:47:41 +0200 Subject: [PATCH 37/43] process regexp flags correctly --- src/Common/OptimizedRegularExpression.cpp | 40 ++++++++++++++--------- src/Common/tests/gtest_optimize_re.cpp | 2 ++ 2 files changed, 26 insertions(+), 16 deletions(-) diff --git a/src/Common/OptimizedRegularExpression.cpp b/src/Common/OptimizedRegularExpression.cpp index 712cab80aff..04e5f846adf 100644 --- a/src/Common/OptimizedRegularExpression.cpp +++ b/src/Common/OptimizedRegularExpression.cpp @@ -244,33 +244,41 @@ const char * analyzeImpl( is_trivial = false; if (!in_square_braces) { - /// Check for case-insensitive flag. - if (pos + 1 < end && pos[1] == '?') + /// it means flag negation + /// there are various possible flags + /// actually only imsU are supported by re2 + auto is_flag_char = [](char x) { - for (size_t offset = 2; pos + offset < end; ++offset) + return x == '-' || x == 'i' || x == 'm' || x == 's' || x == 'U' || x == 'u'; + }; + /// Check for case-insensitive flag. + if (pos + 2 < end && pos[1] == '?' && is_flag_char(pos[2])) + { + size_t offset = 2; + for (; pos + offset < end; ++offset) { - if (pos[offset] == '-' /// it means flag negation - /// various possible flags, actually only imsU are supported by re2 - || (pos[offset] >= 'a' && pos[offset] <= 'z') - || (pos[offset] >= 'A' && pos[offset] <= 'Z')) + if (pos[offset] == 'i') { - if (pos[offset] == 'i') - { - /// Actually it can be negated case-insensitive flag. But we don't care. - has_case_insensitive_flag = true; - break; - } + /// Actually it can be negated case-insensitive flag. But we don't care. + has_case_insensitive_flag = true; } - else + else if (!is_flag_char(pos[offset])) break; } + pos += offset; + /// if this group only contains flags, we have nothing to do. + if (*pos == ')') + { + ++pos; + break; + } } /// (?:regex) means non-capturing parentheses group - if (pos + 2 < end && pos[1] == '?' && pos[2] == ':') + else if (pos + 2 < end && pos[1] == '?' && pos[2] == ':') { pos += 2; } - if (pos + 3 < end && pos[1] == '?' && (pos[2] == '<' || pos[2] == '\'' || (pos[2] == 'P' && pos[3] == '<'))) + else if (pos + 3 < end && pos[1] == '?' && (pos[2] == '<' || pos[2] == '\'' || (pos[2] == 'P' && pos[3] == '<'))) { pos = skipNameCapturingGroup(pos, pos[2] == 'P' ? 3: 2, end); } diff --git a/src/Common/tests/gtest_optimize_re.cpp b/src/Common/tests/gtest_optimize_re.cpp index a9fcb918b24..0730a13f160 100644 --- a/src/Common/tests/gtest_optimize_re.cpp +++ b/src/Common/tests/gtest_optimize_re.cpp @@ -19,6 +19,8 @@ TEST(OptimizeRE, analyze) }; test_f("abc", "abc", {}, true, true); test_f("c([^k]*)de", ""); + test_f("(?-s)bob", "bob", {}, false, true); + test_f("(?s)bob", "bob", {}, false, true); test_f("abc(de)fg", "abcdefg", {}, false, true); test_f("abc(de|xyz)fg", "abc", {"abcdefg", "abcxyzfg"}, false, true); test_f("abc(de?f|xyz)fg", "abc", {"abcd", "abcxyzfg"}, false, true); From bea8e65f4fcfa99cdae21ff4776d509ba4fcd0d7 Mon Sep 17 00:00:00 2001 From: avogar Date: Thu, 15 Aug 2024 09:48:28 +0000 Subject: [PATCH 38/43] Fix tests --- src/IO/parseDateTimeBestEffort.cpp | 20 ++++++++++++++++--- ...ed_dates_in_csv_schema_inference.reference | 2 +- ...03033_dynamic_text_serialization.reference | 10 +++++----- .../03199_json_extract_dynamic.reference | 2 +- ...ad_for_schema_inference_in_cache.reference | 2 +- .../0_stateless/03222_date_time_inference.sql | 1 + 6 files changed, 26 insertions(+), 11 deletions(-) diff --git a/src/IO/parseDateTimeBestEffort.cpp b/src/IO/parseDateTimeBestEffort.cpp index 68122a37df6..f220577f2cb 100644 --- a/src/IO/parseDateTimeBestEffort.cpp +++ b/src/IO/parseDateTimeBestEffort.cpp @@ -82,7 +82,7 @@ struct DateTimeSubsecondPart UInt8 digits; }; -template +template ReturnType parseDateTimeBestEffortImpl( time_t & res, ReadBuffer & in, @@ -686,6 +686,20 @@ ReturnType parseDateTimeBestEffortImpl( } }; + if constexpr (strict) + { + if constexpr (is_64) + { + if (year < 1900) + return on_error(ErrorCodes::CANNOT_PARSE_DATETIME, "Cannot read DateTime64: year {} is less than minimum supported year 1900", year); + } + else + { + if (year < 1970) + return on_error(ErrorCodes::CANNOT_PARSE_DATETIME, "Cannot read DateTime: year {} is less than minimum supported year 1970", year); + } + } + if (has_time_zone_offset) { res = utc_time_zone.makeDateTime(year, month, day_of_month, hour, minute, second); @@ -707,12 +721,12 @@ ReturnType parseDateTime64BestEffortImpl(DateTime64 & res, UInt32 scale, ReadBuf if constexpr (std::is_same_v) { - if (!parseDateTimeBestEffortImpl(whole, in, local_time_zone, utc_time_zone, &subsecond, allowed_date_delimiters)) + if (!parseDateTimeBestEffortImpl(whole, in, local_time_zone, utc_time_zone, &subsecond, allowed_date_delimiters)) return false; } else { - parseDateTimeBestEffortImpl(whole, in, local_time_zone, utc_time_zone, &subsecond, allowed_date_delimiters); + parseDateTimeBestEffortImpl(whole, in, local_time_zone, utc_time_zone, &subsecond, allowed_date_delimiters); } diff --git a/tests/queries/0_stateless/02228_unquoted_dates_in_csv_schema_inference.reference b/tests/queries/0_stateless/02228_unquoted_dates_in_csv_schema_inference.reference index be82d744a3b..56293ca0e5d 100644 --- a/tests/queries/0_stateless/02228_unquoted_dates_in_csv_schema_inference.reference +++ b/tests/queries/0_stateless/02228_unquoted_dates_in_csv_schema_inference.reference @@ -1 +1 @@ -c1 Nullable(DateTime64(9)) +c1 Nullable(DateTime) diff --git a/tests/queries/0_stateless/03033_dynamic_text_serialization.reference b/tests/queries/0_stateless/03033_dynamic_text_serialization.reference index d965245266c..aa7b3fc83a2 100644 --- a/tests/queries/0_stateless/03033_dynamic_text_serialization.reference +++ b/tests/queries/0_stateless/03033_dynamic_text_serialization.reference @@ -4,7 +4,7 @@ JSON {"d":"str","dynamicType(d)":"String"} {"d":["1","2","3"],"dynamicType(d)":"Array(Int64)"} {"d":"2020-01-01","dynamicType(d)":"Date"} -{"d":"2020-01-01 10:00:00.000000000","dynamicType(d)":"DateTime64(9)"} +{"d":"2020-01-01 10:00:00","dynamicType(d)":"DateTime"} {"d":{"a":"42","b":"str"},"dynamicType(d)":"Tuple(a Int64, b String)"} {"d":{"a":"43"},"dynamicType(d)":"Tuple(a Int64)"} {"d":{"a":"44","c":["1","2","3"]},"dynamicType(d)":"Tuple(a Int64, c Array(Int64))"} @@ -22,7 +22,7 @@ CSV "str","String" "[1,2,3]","Array(Int64)" "2020-01-01","Date" -"2020-01-01 10:00:00.000000000","DateTime64(9)" +"2020-01-01 10:00:00","DateTime" "[1, 'str', [1, 2, 3]]","String" \N,"None" true,"Bool" @@ -32,18 +32,18 @@ TSV str String [1,2,3] Array(Int64) 2020-01-01 Date -2020-01-01 10:00:00.000000000 DateTime64(9) +2020-01-01 10:00:00 DateTime [1, \'str\', [1, 2, 3]] String \N None true Bool Values -(42,'Int64'),(42.42,'Float64'),('str','String'),([1,2,3],'Array(Int64)'),('2020-01-01','Date'),('2020-01-01 10:00:00.000000000','DateTime64(9)'),(NULL,'None'),(true,'Bool') +(42,'Int64'),(42.42,'Float64'),('str','String'),([1,2,3],'Array(Int64)'),('2020-01-01','Date'),('2020-01-01 10:00:00','DateTime'),(NULL,'None'),(true,'Bool') Cast using parsing 42 Int64 42.42 Float64 [1,2,3] Array(Int64) 2020-01-01 Date -2020-01-01 10:00:00.000000000 DateTime64(9) +2020-01-01 10:00:00 DateTime \N None true Bool 42 Int64 diff --git a/tests/queries/0_stateless/03199_json_extract_dynamic.reference b/tests/queries/0_stateless/03199_json_extract_dynamic.reference index 759b7763cd1..955106946ea 100644 --- a/tests/queries/0_stateless/03199_json_extract_dynamic.reference +++ b/tests/queries/0_stateless/03199_json_extract_dynamic.reference @@ -12,7 +12,7 @@ Hello String [1,2,3] Array(Nullable(Int64)) ['str1','str2','str3'] Array(Nullable(String)) [[[1],[2,3,4]],[[5,6],[7]]] Array(Array(Array(Nullable(Int64)))) -['2020-01-01 00:00:00.000000000','2020-01-01 00:00:00.000000000'] Array(Nullable(DateTime64(9))) +['2020-01-01 00:00:00','2020-01-01 00:00:00'] Array(Nullable(DateTime)) ['2020-01-01','2020-01-01 date'] Array(Nullable(String)) ['2020-01-01','2020-01-01 00:00:00','str'] Array(Nullable(String)) ['2020-01-01','2020-01-01 00:00:00','42'] Array(Nullable(String)) diff --git a/tests/queries/0_stateless/03212_max_bytes_to_read_for_schema_inference_in_cache.reference b/tests/queries/0_stateless/03212_max_bytes_to_read_for_schema_inference_in_cache.reference index cd109daac52..13b1138d1c4 100644 --- a/tests/queries/0_stateless/03212_max_bytes_to_read_for_schema_inference_in_cache.reference +++ b/tests/queries/0_stateless/03212_max_bytes_to_read_for_schema_inference_in_cache.reference @@ -1,2 +1,2 @@ x Nullable(Int64) -schema_inference_hints=, max_rows_to_read_for_schema_inference=25000, max_bytes_to_read_for_schema_inference=1000, schema_inference_make_columns_nullable=true, try_infer_integers=true, try_infer_dates=true, try_infer_datetimes=true, try_infer_numbers_from_strings=false, read_bools_as_numbers=true, read_bools_as_strings=true, read_objects_as_strings=true, read_numbers_as_strings=true, read_arrays_as_strings=true, try_infer_objects_as_tuples=true, infer_incomplete_types_as_strings=true, try_infer_objects=false, use_string_type_for_ambiguous_paths_in_named_tuples_inference_from_objects=false +schema_inference_hints=, max_rows_to_read_for_schema_inference=25000, max_bytes_to_read_for_schema_inference=1000, schema_inference_make_columns_nullable=true, try_infer_integers=true, try_infer_dates=true, try_infer_datetimes=true, try_infer_datetimes_only_datetime64=false, try_infer_numbers_from_strings=false, read_bools_as_numbers=true, read_bools_as_strings=true, read_objects_as_strings=true, read_numbers_as_strings=true, read_arrays_as_strings=true, try_infer_objects_as_tuples=true, infer_incomplete_types_as_strings=true, try_infer_objects=false, use_string_type_for_ambiguous_paths_in_named_tuples_inference_from_objects=false diff --git a/tests/queries/0_stateless/03222_date_time_inference.sql b/tests/queries/0_stateless/03222_date_time_inference.sql index 01266a88d55..ebd472294be 100644 --- a/tests/queries/0_stateless/03222_date_time_inference.sql +++ b/tests/queries/0_stateless/03222_date_time_inference.sql @@ -2,6 +2,7 @@ set input_format_try_infer_datetimes = 1; set input_format_try_infer_dates = 1; set schema_inference_make_columns_nullable = 0; set input_format_json_try_infer_numbers_from_strings = 0; +set session_timezone = 'UTC'; select 'Date'; select x, toTypeName(x) from format(JSONEachRow, '{"x" : "2020:01:01"}'); From 0a10f0ceb3fab80e5dcfab5ebbebbdbfcdaff6c1 Mon Sep 17 00:00:00 2001 From: avogar Date: Thu, 15 Aug 2024 11:27:12 +0000 Subject: [PATCH 39/43] Update tests --- .../02325_dates_schema_inference.reference | 58 +++++++++---------- tests/queries/0_stateless/02404_data.CSV | 10 ++++ .../0_stateless/02404_data.CSVWithNames | 11 ++++ .../0_stateless/02404_data.CustomSeparated | 10 ++++ .../0_stateless/02404_data.JSONCompactEachRow | 10 ++++ .../0_stateless/02404_data.JSONEachRow | 10 ++++ tests/queries/0_stateless/02404_data.TSKV | 10 ++++ tests/queries/0_stateless/02404_data.TSV | 10 ++++ .../0_stateless/02404_data.TSVWithNames | 11 ++++ tests/queries/0_stateless/02404_data.Values | 1 + ...ce_cache_respect_format_settings.reference | 18 +++--- 11 files changed, 121 insertions(+), 38 deletions(-) create mode 100644 tests/queries/0_stateless/02404_data.CSV create mode 100644 tests/queries/0_stateless/02404_data.CSVWithNames create mode 100644 tests/queries/0_stateless/02404_data.CustomSeparated create mode 100644 tests/queries/0_stateless/02404_data.JSONCompactEachRow create mode 100644 tests/queries/0_stateless/02404_data.JSONEachRow create mode 100644 tests/queries/0_stateless/02404_data.TSKV create mode 100644 tests/queries/0_stateless/02404_data.TSV create mode 100644 tests/queries/0_stateless/02404_data.TSVWithNames create mode 100644 tests/queries/0_stateless/02404_data.Values diff --git a/tests/queries/0_stateless/02325_dates_schema_inference.reference b/tests/queries/0_stateless/02325_dates_schema_inference.reference index c8eebd3262e..124f105220d 100644 --- a/tests/queries/0_stateless/02325_dates_schema_inference.reference +++ b/tests/queries/0_stateless/02325_dates_schema_inference.reference @@ -1,29 +1,29 @@ JSONEachRow x Nullable(Date) x Nullable(DateTime64(9)) -x Nullable(DateTime64(9)) +x Nullable(DateTime) x Array(Nullable(Date)) -x Array(Nullable(DateTime64(9))) -x Array(Nullable(DateTime64(9))) -x Tuple(\n date1 Nullable(DateTime64(9)),\n date2 Nullable(Date)) -x Array(Nullable(DateTime64(9))) -x Array(Nullable(DateTime64(9))) -x Nullable(DateTime64(9)) +x Array(Nullable(DateTime)) +x Array(Nullable(DateTime)) +x Tuple(\n date1 Nullable(DateTime),\n date2 Nullable(Date)) +x Array(Nullable(DateTime)) +x Array(Nullable(DateTime)) +x Nullable(DateTime) x Array(Nullable(String)) x Nullable(String) x Array(Nullable(String)) -x Tuple(\n key1 Array(Array(Nullable(DateTime64(9)))),\n key2 Array(Array(Nullable(String)))) +x Tuple(\n key1 Array(Array(Nullable(DateTime))),\n key2 Array(Array(Nullable(String)))) CSV c1 Nullable(Date) c1 Nullable(DateTime64(9)) -c1 Nullable(DateTime64(9)) +c1 Nullable(DateTime) c1 Array(Nullable(Date)) -c1 Array(Nullable(DateTime64(9))) -c1 Array(Nullable(DateTime64(9))) -c1 Map(String, Nullable(DateTime64(9))) -c1 Array(Nullable(DateTime64(9))) -c1 Array(Nullable(DateTime64(9))) -c1 Nullable(DateTime64(9)) +c1 Array(Nullable(DateTime)) +c1 Array(Nullable(DateTime)) +c1 Map(String, Nullable(DateTime)) +c1 Array(Nullable(DateTime)) +c1 Array(Nullable(DateTime)) +c1 Nullable(DateTime) c1 Array(Nullable(String)) c1 Nullable(String) c1 Array(Nullable(String)) @@ -31,14 +31,14 @@ c1 Map(String, Array(Array(Nullable(String)))) TSV c1 Nullable(Date) c1 Nullable(DateTime64(9)) -c1 Nullable(DateTime64(9)) +c1 Nullable(DateTime) c1 Array(Nullable(Date)) -c1 Array(Nullable(DateTime64(9))) -c1 Array(Nullable(DateTime64(9))) -c1 Map(String, Nullable(DateTime64(9))) -c1 Array(Nullable(DateTime64(9))) -c1 Array(Nullable(DateTime64(9))) -c1 Nullable(DateTime64(9)) +c1 Array(Nullable(DateTime)) +c1 Array(Nullable(DateTime)) +c1 Map(String, Nullable(DateTime)) +c1 Array(Nullable(DateTime)) +c1 Array(Nullable(DateTime)) +c1 Nullable(DateTime) c1 Array(Nullable(String)) c1 Nullable(String) c1 Array(Nullable(String)) @@ -46,14 +46,14 @@ c1 Map(String, Array(Array(Nullable(String)))) Values c1 Nullable(Date) c1 Nullable(DateTime64(9)) -c1 Nullable(DateTime64(9)) +c1 Nullable(DateTime) c1 Array(Nullable(Date)) -c1 Array(Nullable(DateTime64(9))) -c1 Array(Nullable(DateTime64(9))) -c1 Map(String, Nullable(DateTime64(9))) -c1 Array(Nullable(DateTime64(9))) -c1 Array(Nullable(DateTime64(9))) -c1 Nullable(DateTime64(9)) +c1 Array(Nullable(DateTime)) +c1 Array(Nullable(DateTime)) +c1 Map(String, Nullable(DateTime)) +c1 Array(Nullable(DateTime)) +c1 Array(Nullable(DateTime)) +c1 Nullable(DateTime) c1 Array(Nullable(String)) c1 Nullable(String) c1 Array(Nullable(String)) diff --git a/tests/queries/0_stateless/02404_data.CSV b/tests/queries/0_stateless/02404_data.CSV new file mode 100644 index 00000000000..2d8b5c8daa8 --- /dev/null +++ b/tests/queries/0_stateless/02404_data.CSV @@ -0,0 +1,10 @@ +0,"1970-01-01" +1,"1970-01-02" +2,"1970-01-03" +3,"1970-01-04" +4,"1970-01-05" +5,"1970-01-06" +6,"1970-01-07" +7,"1970-01-08" +8,"1970-01-09" +9,"1970-01-10" diff --git a/tests/queries/0_stateless/02404_data.CSVWithNames b/tests/queries/0_stateless/02404_data.CSVWithNames new file mode 100644 index 00000000000..34647008916 --- /dev/null +++ b/tests/queries/0_stateless/02404_data.CSVWithNames @@ -0,0 +1,11 @@ +"number","toDate(number)" +0,"1970-01-01" +1,"1970-01-02" +2,"1970-01-03" +3,"1970-01-04" +4,"1970-01-05" +5,"1970-01-06" +6,"1970-01-07" +7,"1970-01-08" +8,"1970-01-09" +9,"1970-01-10" diff --git a/tests/queries/0_stateless/02404_data.CustomSeparated b/tests/queries/0_stateless/02404_data.CustomSeparated new file mode 100644 index 00000000000..f3ae1663536 --- /dev/null +++ b/tests/queries/0_stateless/02404_data.CustomSeparated @@ -0,0 +1,10 @@ +0 1970-01-01 +1 1970-01-02 +2 1970-01-03 +3 1970-01-04 +4 1970-01-05 +5 1970-01-06 +6 1970-01-07 +7 1970-01-08 +8 1970-01-09 +9 1970-01-10 diff --git a/tests/queries/0_stateless/02404_data.JSONCompactEachRow b/tests/queries/0_stateless/02404_data.JSONCompactEachRow new file mode 100644 index 00000000000..de2e0986aab --- /dev/null +++ b/tests/queries/0_stateless/02404_data.JSONCompactEachRow @@ -0,0 +1,10 @@ +["0", "1970-01-01"] +["1", "1970-01-02"] +["2", "1970-01-03"] +["3", "1970-01-04"] +["4", "1970-01-05"] +["5", "1970-01-06"] +["6", "1970-01-07"] +["7", "1970-01-08"] +["8", "1970-01-09"] +["9", "1970-01-10"] diff --git a/tests/queries/0_stateless/02404_data.JSONEachRow b/tests/queries/0_stateless/02404_data.JSONEachRow new file mode 100644 index 00000000000..e77256ac7fc --- /dev/null +++ b/tests/queries/0_stateless/02404_data.JSONEachRow @@ -0,0 +1,10 @@ +{"number":"0","toDate(number)":"1970-01-01"} +{"number":"1","toDate(number)":"1970-01-02"} +{"number":"2","toDate(number)":"1970-01-03"} +{"number":"3","toDate(number)":"1970-01-04"} +{"number":"4","toDate(number)":"1970-01-05"} +{"number":"5","toDate(number)":"1970-01-06"} +{"number":"6","toDate(number)":"1970-01-07"} +{"number":"7","toDate(number)":"1970-01-08"} +{"number":"8","toDate(number)":"1970-01-09"} +{"number":"9","toDate(number)":"1970-01-10"} diff --git a/tests/queries/0_stateless/02404_data.TSKV b/tests/queries/0_stateless/02404_data.TSKV new file mode 100644 index 00000000000..70f7ad33c8b --- /dev/null +++ b/tests/queries/0_stateless/02404_data.TSKV @@ -0,0 +1,10 @@ +number=0 toDate(number)=1970-01-01 +number=1 toDate(number)=1970-01-02 +number=2 toDate(number)=1970-01-03 +number=3 toDate(number)=1970-01-04 +number=4 toDate(number)=1970-01-05 +number=5 toDate(number)=1970-01-06 +number=6 toDate(number)=1970-01-07 +number=7 toDate(number)=1970-01-08 +number=8 toDate(number)=1970-01-09 +number=9 toDate(number)=1970-01-10 diff --git a/tests/queries/0_stateless/02404_data.TSV b/tests/queries/0_stateless/02404_data.TSV new file mode 100644 index 00000000000..f3ae1663536 --- /dev/null +++ b/tests/queries/0_stateless/02404_data.TSV @@ -0,0 +1,10 @@ +0 1970-01-01 +1 1970-01-02 +2 1970-01-03 +3 1970-01-04 +4 1970-01-05 +5 1970-01-06 +6 1970-01-07 +7 1970-01-08 +8 1970-01-09 +9 1970-01-10 diff --git a/tests/queries/0_stateless/02404_data.TSVWithNames b/tests/queries/0_stateless/02404_data.TSVWithNames new file mode 100644 index 00000000000..23310234a8c --- /dev/null +++ b/tests/queries/0_stateless/02404_data.TSVWithNames @@ -0,0 +1,11 @@ +number toDate(number) +0 1970-01-01 +1 1970-01-02 +2 1970-01-03 +3 1970-01-04 +4 1970-01-05 +5 1970-01-06 +6 1970-01-07 +7 1970-01-08 +8 1970-01-09 +9 1970-01-10 diff --git a/tests/queries/0_stateless/02404_data.Values b/tests/queries/0_stateless/02404_data.Values new file mode 100644 index 00000000000..d9a621d7ec9 --- /dev/null +++ b/tests/queries/0_stateless/02404_data.Values @@ -0,0 +1 @@ +(0,'1970-01-01'),(1,'1970-01-02'),(2,'1970-01-03'),(3,'1970-01-04'),(4,'1970-01-05'),(5,'1970-01-06'),(6,'1970-01-07'),(7,'1970-01-08'),(8,'1970-01-09'),(9,'1970-01-10') \ No newline at end of file diff --git a/tests/queries/0_stateless/02404_schema_inference_cache_respect_format_settings.reference b/tests/queries/0_stateless/02404_schema_inference_cache_respect_format_settings.reference index 049603328d9..3d6b1021916 100644 --- a/tests/queries/0_stateless/02404_schema_inference_cache_respect_format_settings.reference +++ b/tests/queries/0_stateless/02404_schema_inference_cache_respect_format_settings.reference @@ -4,7 +4,7 @@ c2 Nullable(Date) c1 Nullable(Float64) c2 Nullable(Date) c1 Nullable(Int64) -c2 Nullable(DateTime64(9)) +c2 Nullable(DateTime) c1 UInt8 c2 Nullable(Date) 4 @@ -14,7 +14,7 @@ toDate(number) Nullable(Date) number Nullable(Float64) toDate(number) Nullable(Date) number Nullable(Int64) -toDate(number) Nullable(DateTime64(9)) +toDate(number) Nullable(DateTime) number Nullable(Int64) toDate(number) Nullable(Date) 4 @@ -24,7 +24,7 @@ c2 Nullable(Date) c1 Nullable(Float64) c2 Nullable(Date) c1 Nullable(Int64) -c2 Nullable(DateTime64(9)) +c2 Nullable(DateTime) c1 UInt8 c2 Nullable(Date) 4 @@ -34,7 +34,7 @@ toDate(number) Nullable(Date) number Nullable(Float64) toDate(number) Nullable(Date) number Nullable(Int64) -toDate(number) Nullable(DateTime64(9)) +toDate(number) Nullable(DateTime) number Nullable(Int64) toDate(number) Nullable(Date) 4 @@ -44,7 +44,7 @@ toDate(number) Nullable(Date) number Nullable(Float64) toDate(number) Nullable(Date) number Nullable(Int64) -toDate(number) Nullable(DateTime64(9)) +toDate(number) Nullable(DateTime) number Nullable(Int64) toDate(number) Nullable(Date) 4 @@ -54,7 +54,7 @@ c2 Nullable(Date) c1 Nullable(Float64) c2 Nullable(Date) c1 Nullable(Int64) -c2 Nullable(DateTime64(9)) +c2 Nullable(DateTime) c1 UInt8 c2 Nullable(Date) 4 @@ -64,7 +64,7 @@ toDate(number) Nullable(Date) number Nullable(Float64) toDate(number) Nullable(Date) number Nullable(Int64) -toDate(number) Nullable(DateTime64(9)) +toDate(number) Nullable(DateTime) number Nullable(Int64) toDate(number) Nullable(Date) 4 @@ -74,7 +74,7 @@ c2 Nullable(Date) c1 Nullable(Float64) c2 Nullable(Date) c1 Nullable(Int64) -c2 Nullable(DateTime64(9)) +c2 Nullable(DateTime) c1 UInt8 c2 Nullable(Date) 4 @@ -84,7 +84,7 @@ c2 Nullable(Date) c1 Nullable(Float64) c2 Nullable(Date) c1 Nullable(Int64) -c2 Nullable(DateTime64(9)) +c2 Nullable(DateTime) c1 UInt8 c2 Nullable(Date) 4 From 50a8cee0c5a4cd067cee2dc5584401b15283b3cd Mon Sep 17 00:00:00 2001 From: avogar Date: Thu, 15 Aug 2024 11:39:04 +0000 Subject: [PATCH 40/43] Update docs --- docs/en/interfaces/schema-inference.md | 93 +++++++++++++++++--------- 1 file changed, 62 insertions(+), 31 deletions(-) diff --git a/docs/en/interfaces/schema-inference.md b/docs/en/interfaces/schema-inference.md index 05fae994cbe..4afba20d76c 100644 --- a/docs/en/interfaces/schema-inference.md +++ b/docs/en/interfaces/schema-inference.md @@ -359,13 +359,14 @@ DESC format(JSONEachRow, '{"int" : 42, "float" : 42.42, "string" : "Hello, World Dates, DateTimes: ```sql -DESC format(JSONEachRow, '{"date" : "2022-01-01", "datetime" : "2022-01-01 00:00:00"}') +DESC format(JSONEachRow, '{"date" : "2022-01-01", "datetime" : "2022-01-01 00:00:00", "datetime64" : "2022-01-01 00:00:00.000"}') ``` ```response -┌─name─────┬─type────────────────────┬─default_type─┬─default_expression─┬─comment─┬─codec_expression─┬─ttl_expression─┐ -│ date │ Nullable(Date) │ │ │ │ │ │ -│ datetime │ Nullable(DateTime64(9)) │ │ │ │ │ │ -└──────────┴─────────────────────────┴──────────────┴────────────────────┴─────────┴──────────────────┴────────────────┘ +┌─name───────┬─type────────────────────┬─default_type─┬─default_expression─┬─comment─┬─codec_expression─┬─ttl_expression─┐ +│ date │ Nullable(Date) │ │ │ │ │ │ +│ datetime │ Nullable(DateTime) │ │ │ │ │ │ +│ datetime64 │ Nullable(DateTime64(9)) │ │ │ │ │ │ +└────────────┴─────────────────────────┴──────────────┴────────────────────┴─────────┴──────────────────┴────────────────┘ ``` Arrays: @@ -759,12 +760,13 @@ DESC format(CSV, 'Hello world!,World hello!') Dates, DateTimes: ```sql -DESC format(CSV, '"2020-01-01","2020-01-01 00:00:00"') +DESC format(CSV, '"2020-01-01","2020-01-01 00:00:00","2022-01-01 00:00:00.000"') ``` ```response ┌─name─┬─type────────────────────┬─default_type─┬─default_expression─┬─comment─┬─codec_expression─┬─ttl_expression─┐ │ c1 │ Nullable(Date) │ │ │ │ │ │ -│ c2 │ Nullable(DateTime64(9)) │ │ │ │ │ │ +│ c2 │ Nullable(DateTime) │ │ │ │ │ │ +│ c3 │ Nullable(DateTime64(9)) │ │ │ │ │ │ └──────┴─────────────────────────┴──────────────┴────────────────────┴─────────┴──────────────────┴────────────────┘ ``` @@ -956,12 +958,13 @@ DESC format(TSKV, 'int=42 float=42.42 bool=true string=Hello,World!\n') Dates, DateTimes: ```sql -DESC format(TSV, '2020-01-01 2020-01-01 00:00:00') +DESC format(TSV, '2020-01-01 2020-01-01 00:00:00 2022-01-01 00:00:00.000') ``` ```response ┌─name─┬─type────────────────────┬─default_type─┬─default_expression─┬─comment─┬─codec_expression─┬─ttl_expression─┐ │ c1 │ Nullable(Date) │ │ │ │ │ │ -│ c2 │ Nullable(DateTime64(9)) │ │ │ │ │ │ +│ c2 │ Nullable(DateTime) │ │ │ │ │ │ +│ c3 │ Nullable(DateTime64(9)) │ │ │ │ │ │ └──────┴─────────────────────────┴──────────────┴────────────────────┴─────────┴──────────────────┴────────────────┘ ``` @@ -1126,12 +1129,13 @@ DESC format(Values, $$(42, 42.42, true, 'Hello,World!')$$) Dates, DateTimes: ```sql -DESC format(Values, $$('2020-01-01', '2020-01-01 00:00:00')$$) -``` + DESC format(Values, $$('2020-01-01', '2020-01-01 00:00:00', '2022-01-01 00:00:00.000')$$) + ``` ```response ┌─name─┬─type────────────────────┬─default_type─┬─default_expression─┬─comment─┬─codec_expression─┬─ttl_expression─┐ │ c1 │ Nullable(Date) │ │ │ │ │ │ -│ c2 │ Nullable(DateTime64(9)) │ │ │ │ │ │ +│ c2 │ Nullable(DateTime) │ │ │ │ │ │ +│ c3 │ Nullable(DateTime64(9)) │ │ │ │ │ │ └──────┴─────────────────────────┴──────────────┴────────────────────┴─────────┴──────────────────┴────────────────┘ ``` @@ -1504,8 +1508,8 @@ DESC format(JSONEachRow, $$ #### input_format_try_infer_datetimes -If enabled, ClickHouse will try to infer type `DateTime64` from string fields in schema inference for text formats. -If all fields from a column in sample data were successfully parsed as datetimes, the result type will be `DateTime64(9)`, +If enabled, ClickHouse will try to infer type `DateTime` or `DateTime64` from string fields in schema inference for text formats. +If all fields from a column in sample data were successfully parsed as datetimes, the result type will be `DateTime` or `DateTime64(9)` (if any datetime had fractional part), if at least one field was not parsed as datetime, the result type will be `String`. Enabled by default. @@ -1513,39 +1517,66 @@ Enabled by default. **Examples** ```sql -SET input_format_try_infer_datetimes = 0 +SET input_format_try_infer_datetimes = 0; DESC format(JSONEachRow, $$ - {"datetime" : "2021-01-01 00:00:00.000"} - {"datetime" : "2022-01-01 00:00:00.000"} + {"datetime" : "2021-01-01 00:00:00", "datetime64" : "2021-01-01 00:00:00.000"} + {"datetime" : "2022-01-01 00:00:00", "datetime64" : "2022-01-01 00:00:00.000"} $$) ``` ```response -┌─name─────┬─type─────────────┬─default_type─┬─default_expression─┬─comment─┬─codec_expression─┬─ttl_expression─┐ -│ datetime │ Nullable(String) │ │ │ │ │ │ -└──────────┴──────────────────┴──────────────┴────────────────────┴─────────┴──────────────────┴────────────────┘ +┌─name───────┬─type─────────────┬─default_type─┬─default_expression─┬─comment─┬─codec_expression─┬─ttl_expression─┐ +│ datetime │ Nullable(String) │ │ │ │ │ │ +│ datetime64 │ Nullable(String) │ │ │ │ │ │ +└────────────┴──────────────────┴──────────────┴────────────────────┴─────────┴──────────────────┴────────────────┘ ``` ```sql -SET input_format_try_infer_datetimes = 1 +SET input_format_try_infer_datetimes = 1; DESC format(JSONEachRow, $$ - {"datetime" : "2021-01-01 00:00:00.000"} - {"datetime" : "2022-01-01 00:00:00.000"} + {"datetime" : "2021-01-01 00:00:00", "datetime64" : "2021-01-01 00:00:00.000"} + {"datetime" : "2022-01-01 00:00:00", "datetime64" : "2022-01-01 00:00:00.000"} $$) ``` ```response -┌─name─────┬─type────────────────────┬─default_type─┬─default_expression─┬─comment─┬─codec_expression─┬─ttl_expression─┐ -│ datetime │ Nullable(DateTime64(9)) │ │ │ │ │ │ -└──────────┴─────────────────────────┴──────────────┴────────────────────┴─────────┴──────────────────┴────────────────┘ +┌─name───────┬─type────────────────────┬─default_type─┬─default_expression─┬─comment─┬─codec_expression─┬─ttl_expression─┐ +│ datetime │ Nullable(DateTime) │ │ │ │ │ │ +│ datetime64 │ Nullable(DateTime64(9)) │ │ │ │ │ │ +└────────────┴─────────────────────────┴──────────────┴────────────────────┴─────────┴──────────────────┴────────────────┘ ``` ```sql DESC format(JSONEachRow, $$ - {"datetime" : "2021-01-01 00:00:00.000"} - {"datetime" : "unknown"} + {"datetime" : "2021-01-01 00:00:00", "datetime64" : "2021-01-01 00:00:00.000"} + {"datetime" : "unknown", "datetime64" : "unknown"} $$) ``` ```response -┌─name─────┬─type─────────────┬─default_type─┬─default_expression─┬─comment─┬─codec_expression─┬─ttl_expression─┐ -│ datetime │ Nullable(String) │ │ │ │ │ │ -└──────────┴──────────────────┴──────────────┴────────────────────┴─────────┴──────────────────┴────────────────┘ +┌─name───────┬─type─────────────┬─default_type─┬─default_expression─┬─comment─┬─codec_expression─┬─ttl_expression─┐ +│ datetime │ Nullable(String) │ │ │ │ │ │ +│ datetime64 │ Nullable(String) │ │ │ │ │ │ +└────────────┴──────────────────┴──────────────┴────────────────────┴─────────┴──────────────────┴────────────────┘ +``` + +#### input_format_try_infer_datetimes_only_datetime64 + +If enabled, ClickHouse will always infer `DateTime64(9)` when `input_format_try_infer_datetimes` is enabled even if datetime values don't contain fractional part. + +Disabled by default. + +**Examples** + +```sql +SET input_format_try_infer_datetimes = 1; +SET input_format_try_infer_datetimes_only_datetime64 = 1; +DESC format(JSONEachRow, $$ + {"datetime" : "2021-01-01 00:00:00", "datetime64" : "2021-01-01 00:00:00.000"} + {"datetime" : "2022-01-01 00:00:00", "datetime64" : "2022-01-01 00:00:00.000"} + $$) +``` + +```text +┌─name───────┬─type────────────────────┬─default_type─┬─default_expression─┬─comment─┬─codec_expression─┬─ttl_expression─┐ +│ datetime │ Nullable(DateTime64(9)) │ │ │ │ │ │ +│ datetime64 │ Nullable(DateTime64(9)) │ │ │ │ │ │ +└────────────┴─────────────────────────┴──────────────┴────────────────────┴─────────┴──────────────────┴────────────────┘ ``` Note: Parsing datetimes during schema inference respect setting [date_time_input_format](/docs/en/operations/settings/settings-formats.md#date_time_input_format) From 03bfb1562b56d96963e75bbd14b6759ae103e52a Mon Sep 17 00:00:00 2001 From: Han Fei Date: Thu, 15 Aug 2024 14:26:01 +0200 Subject: [PATCH 41/43] fix overflow --- src/Common/OptimizedRegularExpression.cpp | 2 ++ src/Common/tests/gtest_optimize_re.cpp | 1 + 2 files changed, 3 insertions(+) diff --git a/src/Common/OptimizedRegularExpression.cpp b/src/Common/OptimizedRegularExpression.cpp index 04e5f846adf..2cdb3409487 100644 --- a/src/Common/OptimizedRegularExpression.cpp +++ b/src/Common/OptimizedRegularExpression.cpp @@ -266,6 +266,8 @@ const char * analyzeImpl( break; } pos += offset; + if (pos == end) + return pos; /// if this group only contains flags, we have nothing to do. if (*pos == ')') { diff --git a/src/Common/tests/gtest_optimize_re.cpp b/src/Common/tests/gtest_optimize_re.cpp index 0730a13f160..d6735c3ccfe 100644 --- a/src/Common/tests/gtest_optimize_re.cpp +++ b/src/Common/tests/gtest_optimize_re.cpp @@ -21,6 +21,7 @@ TEST(OptimizeRE, analyze) test_f("c([^k]*)de", ""); test_f("(?-s)bob", "bob", {}, false, true); test_f("(?s)bob", "bob", {}, false, true); + test_f("(?ssss", ""); test_f("abc(de)fg", "abcdefg", {}, false, true); test_f("abc(de|xyz)fg", "abc", {"abcdefg", "abcxyzfg"}, false, true); test_f("abc(de?f|xyz)fg", "abc", {"abcd", "abcxyzfg"}, false, true); From 47ed2204bd1e46ca6ef0556180372cc38fde5153 Mon Sep 17 00:00:00 2001 From: Robert Schulze Date: Thu, 15 Aug 2024 12:38:24 +0000 Subject: [PATCH 42/43] Follow-up to ClickHouse#63898 --- .../0_stateless/00366_multi_statements.sh | 20 +++--- .../00443_preferred_block_size_bytes.sh | 4 +- ...ess_to_temporary_table_in_readonly_mode.sh | 20 +++--- ...4_performance_introspection_and_logging.sh | 4 +- ..._fetch_merged_or_mutated_part_zookeeper.sh | 6 +- ...d_optimize_skip_select_on_unused_shards.sh | 30 ++++----- ...p_select_on_unused_shards_with_prewhere.sh | 26 ++++---- ...ated_minimalistic_part_header_zookeeper.sh | 4 +- .../queries/0_stateless/00837_minmax_index.sh | 2 +- .../queries/0_stateless/00838_unique_index.sh | 2 +- .../0_stateless/00907_set_index_max_rows.sh | 2 +- .../0_stateless/00908_bloom_filter_index.sh | 10 +-- .../queries/0_stateless/00942_mutate_index.sh | 2 +- .../0_stateless/00943_materialize_index.sh | 4 +- .../00944_clear_index_in_partition.sh | 2 +- .../00964_bloom_index_string_functions.sh | 2 +- .../00965_set_index_string_functions.sh | 2 +- .../00974_primary_key_for_lowCardinality.sh | 4 +- tests/queries/0_stateless/00990_hasToken.sh | 2 +- .../02003_memory_limit_in_client.sh | 32 ++++----- .../02021_create_database_with_comment.sh | 2 +- .../02050_client_profile_events.sh | 8 +-- .../02221_parallel_replicas_bug.sh | 2 +- .../02221_system_zookeeper_unrestricted.sh | 4 +- ...2221_system_zookeeper_unrestricted_like.sh | 4 +- ...parallel_distributed_insert_select_view.sh | 6 +- ...arallel_reading_from_replicas_benchmark.sh | 4 +- .../02232_allow_only_replicated_engine.sh | 6 +- .../0_stateless/02250_ON_CLUSTER_grant.sh | 4 +- tests/queries/0_stateless/02262_column_ttl.sh | 4 +- .../0_stateless/02286_parallel_final.sh | 4 +- ..._distinct_in_order_optimization_explain.sh | 66 +++++++++---------- ..._column_ttl_expired_column_optimization.sh | 2 +- .../0_stateless/02361_fsync_profile_events.sh | 4 +- ...7_extend_protocol_with_query_parameters.sh | 10 +-- ...ting_by_input_stream_properties_explain.sh | 2 +- .../0_stateless/02417_load_marks_async.sh | 2 +- 37 files changed, 157 insertions(+), 157 deletions(-) diff --git a/tests/queries/0_stateless/00366_multi_statements.sh b/tests/queries/0_stateless/00366_multi_statements.sh index 0b2e80fe457..8546e547581 100755 --- a/tests/queries/0_stateless/00366_multi_statements.sh +++ b/tests/queries/0_stateless/00366_multi_statements.sh @@ -14,22 +14,22 @@ $CLICKHOUSE_CLIENT --query="SELECT 1; SELECT 2" $CLICKHOUSE_CLIENT --query="SELECT 1; SELECT 2;" $CLICKHOUSE_CLIENT --query="SELECT 1; SELECT 2; SELECT" 2>&1 | grep -o 'Syntax error' -$CLICKHOUSE_CLIENT -n --query="SELECT 1; S" 2>&1 | grep -o 'Syntax error' -$CLICKHOUSE_CLIENT -n --query="SELECT 1; SELECT 2" -$CLICKHOUSE_CLIENT -n --query="SELECT 1; SELECT 2;" -$CLICKHOUSE_CLIENT -n --query="SELECT 1; SELECT 2; SELECT" 2>&1 | grep -o 'Syntax error' +$CLICKHOUSE_CLIENT --query="SELECT 1; S" 2>&1 | grep -o 'Syntax error' +$CLICKHOUSE_CLIENT --query="SELECT 1; SELECT 2" +$CLICKHOUSE_CLIENT --query="SELECT 1; SELECT 2;" +$CLICKHOUSE_CLIENT --query="SELECT 1; SELECT 2; SELECT" 2>&1 | grep -o 'Syntax error' -$CLICKHOUSE_CLIENT -n --query="DROP TABLE IF EXISTS t_00366; CREATE TABLE t_00366 (x UInt64) ENGINE = TinyLog;" +$CLICKHOUSE_CLIENT --query="DROP TABLE IF EXISTS t_00366; CREATE TABLE t_00366 (x UInt64) ENGINE = TinyLog;" $CLICKHOUSE_CLIENT --query="INSERT INTO t_00366 VALUES (1),(2),(3);" $CLICKHOUSE_CLIENT --query="SELECT * FROM t_00366" $CLICKHOUSE_CLIENT --query="INSERT INTO t_00366 VALUES" <<< "(4),(5),(6)" $CLICKHOUSE_CLIENT --query="SELECT * FROM t_00366" -$CLICKHOUSE_CLIENT -n --query="INSERT INTO t_00366 VALUES (1),(2),(3);" -$CLICKHOUSE_CLIENT -n --query="SELECT * FROM t_00366" -$CLICKHOUSE_CLIENT -n --query="INSERT INTO t_00366 VALUES" <<< "(4),(5),(6)" -$CLICKHOUSE_CLIENT -n --query="SELECT * FROM t_00366" +$CLICKHOUSE_CLIENT --query="INSERT INTO t_00366 VALUES (1),(2),(3);" +$CLICKHOUSE_CLIENT --query="SELECT * FROM t_00366" +$CLICKHOUSE_CLIENT --query="INSERT INTO t_00366 VALUES" <<< "(4),(5),(6)" +$CLICKHOUSE_CLIENT --query="SELECT * FROM t_00366" ${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}" -d "SELECT 1" ${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}" -d "SELECT 1;" @@ -48,4 +48,4 @@ $CLICKHOUSE_CLIENT --query="SELECT * FROM t_00366" ${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}&query=INSERT+INTO+t_00366+VALUES" -d "(7),(8),(9)" $CLICKHOUSE_CLIENT --query="SELECT * FROM t_00366" -$CLICKHOUSE_CLIENT -n --query="DROP TABLE t_00366;" +$CLICKHOUSE_CLIENT --query="DROP TABLE t_00366;" diff --git a/tests/queries/0_stateless/00443_preferred_block_size_bytes.sh b/tests/queries/0_stateless/00443_preferred_block_size_bytes.sh index 27b9f5c00c7..0635fbc2a57 100755 --- a/tests/queries/0_stateless/00443_preferred_block_size_bytes.sh +++ b/tests/queries/0_stateless/00443_preferred_block_size_bytes.sh @@ -43,10 +43,10 @@ popd > /dev/null #SCRIPTDIR=`dirname "$SCRIPTPATH"` SCRIPTDIR=$SCRIPTPATH -cat "$SCRIPTDIR"/00282_merging.sql | $CLICKHOUSE_CLIENT --preferred_block_size_bytes=10 -n > "${CLICKHOUSE_TMP}"/preferred_block_size_bytes.stdout +cat "$SCRIPTDIR"/00282_merging.sql | $CLICKHOUSE_CLIENT --preferred_block_size_bytes=10 > "${CLICKHOUSE_TMP}"/preferred_block_size_bytes.stdout cmp "$SCRIPTDIR"/00282_merging.reference "${CLICKHOUSE_TMP}"/preferred_block_size_bytes.stdout && echo PASSED || echo FAILED -cat "$SCRIPTDIR"/00282_merging.sql | $CLICKHOUSE_CLIENT --preferred_block_size_bytes=20 -n > "${CLICKHOUSE_TMP}"/preferred_block_size_bytes.stdout +cat "$SCRIPTDIR"/00282_merging.sql | $CLICKHOUSE_CLIENT --preferred_block_size_bytes=20 > "${CLICKHOUSE_TMP}"/preferred_block_size_bytes.stdout cmp "$SCRIPTDIR"/00282_merging.reference "${CLICKHOUSE_TMP}"/preferred_block_size_bytes.stdout && echo PASSED || echo FAILED rm "${CLICKHOUSE_TMP}"/preferred_block_size_bytes.stdout diff --git a/tests/queries/0_stateless/00543_access_to_temporary_table_in_readonly_mode.sh b/tests/queries/0_stateless/00543_access_to_temporary_table_in_readonly_mode.sh index 560b97a1d1b..5550fa69d3d 100755 --- a/tests/queries/0_stateless/00543_access_to_temporary_table_in_readonly_mode.sh +++ b/tests/queries/0_stateless/00543_access_to_temporary_table_in_readonly_mode.sh @@ -4,7 +4,7 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CURDIR"/../shell_config.sh -$CLICKHOUSE_CLIENT -n --query=" +$CLICKHOUSE_CLIENT --query=" DROP TABLE IF EXISTS test_readonly; CREATE TABLE test_readonly ( ID Int @@ -16,7 +16,7 @@ $CLICKHOUSE_CLIENT -n --query=" ################ # Try to create temporary table -$CLICKHOUSE_CLIENT -n --query=" +$CLICKHOUSE_CLIENT --query=" SET readonly = 1; CREATE TEMPORARY TABLE readonly ( ID Int @@ -26,7 +26,7 @@ CODE=$?; [ "$CODE" -ne "164" ] && [ "$CODE" -ne "0" ] && echo "Fail" && exit $CODE; # Try to insert into exists (non temporary) table -$CLICKHOUSE_CLIENT -n --query=" +$CLICKHOUSE_CLIENT --query=" SET readonly = 1; INSERT INTO test_readonly (ID) VALUES (1); " 2> /dev/null; @@ -34,7 +34,7 @@ CODE=$?; [ "$CODE" -ne "164" ] && [ "$CODE" -ne "0" ] && echo "Fail" && exit $CODE; # Try to drop exists (non temporary) table -$CLICKHOUSE_CLIENT -n --query=" +$CLICKHOUSE_CLIENT --query=" SET readonly = 1; DROP TABLE test_readonly; " 2> /dev/null; @@ -46,7 +46,7 @@ CODE=$?; ################ # Try to create temporary table -$CLICKHOUSE_CLIENT -n --query=" +$CLICKHOUSE_CLIENT --query=" SET readonly = 2; CREATE TEMPORARY TABLE readonly ( ID Int @@ -58,7 +58,7 @@ CODE=$?; [ "$CODE" -ne "0" ] && echo "Fail" && exit $CODE; # Try to insert into exists (non temporary) table -$CLICKHOUSE_CLIENT -n --query=" +$CLICKHOUSE_CLIENT --query=" SET readonly = 2; INSERT INTO test_readonly (ID) VALUES (1); " 2> /dev/null; @@ -66,7 +66,7 @@ CODE=$?; [ "$CODE" -ne "164" ] && [ "$CODE" -ne "0" ] && echo "Fail" && exit $CODE; # Try to drop exists (non temporary) table -$CLICKHOUSE_CLIENT -n --query=" +$CLICKHOUSE_CLIENT --query=" SET readonly = 2; DROP TABLE test_readonly; " 2> /dev/null; @@ -78,7 +78,7 @@ CODE=$?; ################ # Try to create temporary table -$CLICKHOUSE_CLIENT -n --query=" +$CLICKHOUSE_CLIENT --query=" SET readonly = 0; CREATE TEMPORARY TABLE readonly ( ID Int @@ -90,7 +90,7 @@ CODE=$?; [ "$CODE" -ne "0" ] && echo "Fail" && exit $CODE; # Try to insert into exists (non temporary) table -$CLICKHOUSE_CLIENT -n --query=" +$CLICKHOUSE_CLIENT --query=" SET readonly = 0; INSERT INTO test_readonly (ID) VALUES (1); " 2> /dev/null; @@ -98,7 +98,7 @@ CODE=$?; [ "$CODE" -ne "0" ] && echo "Fail" && exit $CODE; # Try to drop exists (non temporary) table -$CLICKHOUSE_CLIENT -n --query=" +$CLICKHOUSE_CLIENT --query=" SET readonly = 0; DROP TABLE test_readonly; " 2> /dev/null; diff --git a/tests/queries/0_stateless/00634_performance_introspection_and_logging.sh b/tests/queries/0_stateless/00634_performance_introspection_and_logging.sh index 93fd0c4a977..e9a4369a5bf 100755 --- a/tests/queries/0_stateless/00634_performance_introspection_and_logging.sh +++ b/tests/queries/0_stateless/00634_performance_introspection_and_logging.sh @@ -19,13 +19,13 @@ settings="$server_logs --log_queries=1 --log_query_threads=1 --log_profile_event # Test insert logging on each block and checkPacket() method -$CLICKHOUSE_CLIENT $settings -n -q " +$CLICKHOUSE_CLIENT $settings -q " DROP TABLE IF EXISTS null_00634; CREATE TABLE null_00634 (i UInt8) ENGINE = MergeTree PARTITION BY tuple() ORDER BY tuple();" head -c 1000 /dev/zero | $CLICKHOUSE_CLIENT $settings --max_insert_block_size=10 --min_insert_block_size_rows=1 --min_insert_block_size_bytes=1 -q "INSERT INTO null_00634 FORMAT RowBinary" -$CLICKHOUSE_CLIENT $settings -n -q " +$CLICKHOUSE_CLIENT $settings -q " SELECT count() FROM null_00634; DROP TABLE null_00634;" diff --git a/tests/queries/0_stateless/00715_fetch_merged_or_mutated_part_zookeeper.sh b/tests/queries/0_stateless/00715_fetch_merged_or_mutated_part_zookeeper.sh index 96d5764780f..d69e14bdbb9 100755 --- a/tests/queries/0_stateless/00715_fetch_merged_or_mutated_part_zookeeper.sh +++ b/tests/queries/0_stateless/00715_fetch_merged_or_mutated_part_zookeeper.sh @@ -8,7 +8,7 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) . "$CURDIR"/mergetree_mutations.lib -${CLICKHOUSE_CLIENT} -n --query=" +${CLICKHOUSE_CLIENT} --query=" DROP TABLE IF EXISTS fetches_r1 SYNC; DROP TABLE IF EXISTS fetches_r2 SYNC" @@ -17,7 +17,7 @@ ${CLICKHOUSE_CLIENT} --query="CREATE TABLE fetches_r2(x UInt32) ENGINE Replicate SETTINGS prefer_fetch_merged_part_time_threshold=0, \ prefer_fetch_merged_part_size_threshold=0" -${CLICKHOUSE_CLIENT} -n --query=" +${CLICKHOUSE_CLIENT} --query=" SET insert_keeper_fault_injection_probability=0; INSERT INTO fetches_r1 VALUES (1); INSERT INTO fetches_r1 VALUES (2); @@ -51,6 +51,6 @@ ${CLICKHOUSE_CLIENT} --query="SYSTEM SYNC REPLICA fetches_r2" ${CLICKHOUSE_CLIENT} --query="SELECT '*** Check data after fetch/clone of mutated part ***'" ${CLICKHOUSE_CLIENT} --query="SELECT _part, * FROM fetches_r2 ORDER BY x" -${CLICKHOUSE_CLIENT} -n --query=" +${CLICKHOUSE_CLIENT} --query=" DROP TABLE fetches_r1 SYNC; DROP TABLE fetches_r2 SYNC" diff --git a/tests/queries/0_stateless/00754_distributed_optimize_skip_select_on_unused_shards.sh b/tests/queries/0_stateless/00754_distributed_optimize_skip_select_on_unused_shards.sh index 09f20284402..989096a26d6 100755 --- a/tests/queries/0_stateless/00754_distributed_optimize_skip_select_on_unused_shards.sh +++ b/tests/queries/0_stateless/00754_distributed_optimize_skip_select_on_unused_shards.sh @@ -25,83 +25,83 @@ ${CLICKHOUSE_CLIENT} --query "SELECT count(*) FROM distributed WHERE a = 0 AND b | grep -F -q "All connection tries failed" && echo 'OK' || echo 'FAIL' # Should pass now -${CLICKHOUSE_CLIENT} -n --query=" +${CLICKHOUSE_CLIENT} --query=" SET optimize_skip_unused_shards = 1; SELECT count(*) FROM distributed WHERE a = 0 AND b = 0; " # Should still fail because of matching unavailable shard -${CLICKHOUSE_CLIENT} -n --query=" +${CLICKHOUSE_CLIENT} --query=" SET optimize_skip_unused_shards = 1; SELECT count(*) FROM distributed WHERE a = 2 AND b = 2; " 2>&1 \ | grep -F -q "All connection tries failed" && echo 'OK' || echo 'FAIL' # Try more complext expressions for constant folding - all should pass. -${CLICKHOUSE_CLIENT} -n --query=" +${CLICKHOUSE_CLIENT} --query=" SET optimize_skip_unused_shards = 1; SELECT count(*) FROM distributed WHERE a = 1 AND a = 0 AND b = 0; " -${CLICKHOUSE_CLIENT} -n --query=" +${CLICKHOUSE_CLIENT} --query=" SET optimize_skip_unused_shards = 1; SELECT count(*) FROM distributed WHERE a IN (0, 1) AND b IN (0, 1); " -${CLICKHOUSE_CLIENT} -n --query=" +${CLICKHOUSE_CLIENT} --query=" SET optimize_skip_unused_shards = 1; SELECT count(*) FROM distributed WHERE a = 0 AND b = 0 OR a = 1 AND b = 1; " # TODO: should pass one day. -#${CLICKHOUSE_CLIENT} -n --query=" +#${CLICKHOUSE_CLIENT} --query=" # SET optimize_skip_unused_shards = 1; # SELECT count(*) FROM distributed WHERE a = 0 AND b >= 0 AND b <= 1; #" -${CLICKHOUSE_CLIENT} -n --query=" +${CLICKHOUSE_CLIENT} --query=" SET optimize_skip_unused_shards = 1; SELECT count(*) FROM distributed WHERE a = 0 AND b = 0 AND c = 0; " -${CLICKHOUSE_CLIENT} -n --query=" +${CLICKHOUSE_CLIENT} --query=" SET optimize_skip_unused_shards = 1; SELECT count(*) FROM distributed WHERE a = 0 AND b = 0 AND c != 10; " -${CLICKHOUSE_CLIENT} -n --query=" +${CLICKHOUSE_CLIENT} --query=" SET optimize_skip_unused_shards = 1; SELECT count(*) FROM distributed WHERE a = 0 AND b = 0 AND (a+b)*b != 12; " -${CLICKHOUSE_CLIENT} -n --query=" +${CLICKHOUSE_CLIENT} --query=" SET optimize_skip_unused_shards = 1; SELECT count(*) FROM distributed WHERE (a = 0 OR a = 1) AND (b = 0 OR b = 1); " # These ones should fail. -${CLICKHOUSE_CLIENT} -n --query=" +${CLICKHOUSE_CLIENT} --query=" SET optimize_skip_unused_shards = 1; SELECT count(*) FROM distributed WHERE a = 0 AND b <= 1; " 2>&1 \ | grep -F -q "All connection tries failed" && echo 'OK' || echo 'FAIL' -${CLICKHOUSE_CLIENT} -n --query=" +${CLICKHOUSE_CLIENT} --query=" SET optimize_skip_unused_shards = 1; SELECT count(*) FROM distributed WHERE a = 0 AND c = 0; " 2>&1 \ | grep -F -q "All connection tries failed" && echo 'OK' || echo 'FAIL' -${CLICKHOUSE_CLIENT} -n --query=" +${CLICKHOUSE_CLIENT} --query=" SET optimize_skip_unused_shards = 1; SELECT count(*) FROM distributed WHERE a = 0 OR a = 1 AND b = 0; " 2>&1 \ | grep -F -q "All connection tries failed" && echo 'OK' || echo 'FAIL' -${CLICKHOUSE_CLIENT} -n --query=" +${CLICKHOUSE_CLIENT} --query=" SET optimize_skip_unused_shards = 1; SELECT count(*) FROM distributed WHERE a = 0 AND b = 0 OR a = 2 AND b = 2; " 2>&1 \ | grep -F -q "All connection tries failed" && echo 'OK' || echo 'FAIL' -${CLICKHOUSE_CLIENT} -n --query=" +${CLICKHOUSE_CLIENT} --query=" SET optimize_skip_unused_shards = 1; SELECT count(*) FROM distributed WHERE a = 0 AND b = 0 OR c = 0; " 2>&1 \ | grep -F -q "All connection tries failed" && echo 'OK' || echo 'FAIL' diff --git a/tests/queries/0_stateless/00754_distributed_optimize_skip_select_on_unused_shards_with_prewhere.sh b/tests/queries/0_stateless/00754_distributed_optimize_skip_select_on_unused_shards_with_prewhere.sh index 035907bddd7..b3dff2ea69a 100755 --- a/tests/queries/0_stateless/00754_distributed_optimize_skip_select_on_unused_shards_with_prewhere.sh +++ b/tests/queries/0_stateless/00754_distributed_optimize_skip_select_on_unused_shards_with_prewhere.sh @@ -30,73 +30,73 @@ ${CLICKHOUSE_CLIENT} --query "SELECT count(*) FROM distributed_00754 PREWHERE a | grep -F -q "All connection tries failed" && echo 'OK' || echo 'FAIL' # Should pass now -${CLICKHOUSE_CLIENT} -n --query=" +${CLICKHOUSE_CLIENT} --query=" SET optimize_skip_unused_shards = 1; SELECT count(*) FROM distributed_00754 PREWHERE a = 0 AND b = 0; " # Should still fail because of matching unavailable shard -${CLICKHOUSE_CLIENT} -n --query=" +${CLICKHOUSE_CLIENT} --query=" SET optimize_skip_unused_shards = 1; SELECT count(*) FROM distributed_00754 PREWHERE a = 2 AND b = 2; " 2>&1 \ | grep -F -q "All connection tries failed" && echo 'OK' || echo 'FAIL' # Try more complex expressions for constant folding - all should pass. -${CLICKHOUSE_CLIENT} -n --query=" +${CLICKHOUSE_CLIENT} --query=" SET optimize_skip_unused_shards = 1; SELECT count(*) FROM distributed_00754 PREWHERE a = 1 AND a = 0 WHERE b = 0; " -${CLICKHOUSE_CLIENT} -n --query=" +${CLICKHOUSE_CLIENT} --query=" SET optimize_skip_unused_shards = 1; SELECT count(*) FROM distributed_00754 PREWHERE a = 1 WHERE b = 1 AND length(c) = 5; " -${CLICKHOUSE_CLIENT} -n --query=" +${CLICKHOUSE_CLIENT} --query=" SET optimize_skip_unused_shards = 1; SELECT count(*) FROM distributed_00754 PREWHERE a IN (0, 1) AND b IN (0, 1) WHERE c LIKE '%l%'; " -${CLICKHOUSE_CLIENT} -n --query=" +${CLICKHOUSE_CLIENT} --query=" SET optimize_skip_unused_shards = 1; SELECT count(*) FROM distributed_00754 PREWHERE a IN (0, 1) WHERE b IN (0, 1) AND c LIKE '%l%'; " -${CLICKHOUSE_CLIENT} -n --query=" +${CLICKHOUSE_CLIENT} --query=" SET optimize_skip_unused_shards = 1; SELECT count(*) FROM distributed_00754 PREWHERE a = 0 AND b = 0 OR a = 1 AND b = 1 WHERE c LIKE '%l%'; " -${CLICKHOUSE_CLIENT} -n --query=" +${CLICKHOUSE_CLIENT} --query=" SET optimize_skip_unused_shards = 1; SELECT count(*) FROM distributed_00754 PREWHERE (a = 0 OR a = 1) WHERE (b = 0 OR b = 1); " # These should fail. -${CLICKHOUSE_CLIENT} -n --query=" +${CLICKHOUSE_CLIENT} --query=" SET optimize_skip_unused_shards = 1; SELECT count(*) FROM distributed_00754 PREWHERE a = 0 AND b <= 1; " 2>&1 \ | grep -F -q "All connection tries failed" && echo 'OK' || echo 'FAIL' -${CLICKHOUSE_CLIENT} -n --query=" +${CLICKHOUSE_CLIENT} --query=" SET optimize_skip_unused_shards = 1; SELECT count(*) FROM distributed_00754 PREWHERE a = 0 WHERE c LIKE '%l%'; " 2>&1 \ | grep -F -q "All connection tries failed" && echo 'OK' || echo 'FAIL' -${CLICKHOUSE_CLIENT} -n --query=" +${CLICKHOUSE_CLIENT} --query=" SET optimize_skip_unused_shards = 1; SELECT count(*) FROM distributed_00754 PREWHERE a = 0 OR a = 1 AND b = 0; " 2>&1 \ | grep -F -q "All connection tries failed" && echo 'OK' || echo 'FAIL' -${CLICKHOUSE_CLIENT} -n --query=" +${CLICKHOUSE_CLIENT} --query=" SET optimize_skip_unused_shards = 1; SELECT count(*) FROM distributed_00754 PREWHERE a = 0 AND b = 0 OR a = 2 AND b = 2; " 2>&1 \ | grep -F -q "All connection tries failed" && echo 'OK' || echo 'FAIL' -${CLICKHOUSE_CLIENT} -n --query=" +${CLICKHOUSE_CLIENT} --query=" SET optimize_skip_unused_shards = 1; SELECT count(*) FROM distributed_00754 PREWHERE a = 0 AND b = 0 OR c LIKE '%l%'; " 2>&1 \ | grep -F -q "All connection tries failed" && echo 'OK' || echo 'FAIL' diff --git a/tests/queries/0_stateless/00814_replicated_minimalistic_part_header_zookeeper.sh b/tests/queries/0_stateless/00814_replicated_minimalistic_part_header_zookeeper.sh index 12d889a7137..8f7a1a9ae98 100755 --- a/tests/queries/0_stateless/00814_replicated_minimalistic_part_header_zookeeper.sh +++ b/tests/queries/0_stateless/00814_replicated_minimalistic_part_header_zookeeper.sh @@ -10,7 +10,7 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) SHARD=$($CLICKHOUSE_CLIENT --query "Select getMacro('shard')") REPLICA=$($CLICKHOUSE_CLIENT --query "Select getMacro('replica')") -$CLICKHOUSE_CLIENT -nm -q " +$CLICKHOUSE_CLIENT -m -q " DROP TABLE IF EXISTS part_header_r1; DROP TABLE IF EXISTS part_header_r2; @@ -62,7 +62,7 @@ do [[ $count1 == 1 && $count2 == 1 ]] && break done -$CLICKHOUSE_CLIENT -nm -q " +$CLICKHOUSE_CLIENT -m -q " SELECT '*** Test part removal ***'; SELECT '*** replica 1 ***'; diff --git a/tests/queries/0_stateless/00837_minmax_index.sh b/tests/queries/0_stateless/00837_minmax_index.sh index e4de0b9ebfc..ff487f50ee0 100755 --- a/tests/queries/0_stateless/00837_minmax_index.sh +++ b/tests/queries/0_stateless/00837_minmax_index.sh @@ -8,7 +8,7 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) $CLICKHOUSE_CLIENT --query="DROP TABLE IF EXISTS minmax_idx;" -$CLICKHOUSE_CLIENT -n --query=" +$CLICKHOUSE_CLIENT --query=" CREATE TABLE minmax_idx ( u64 UInt64, diff --git a/tests/queries/0_stateless/00838_unique_index.sh b/tests/queries/0_stateless/00838_unique_index.sh index b267b6a8eb3..a3aba4f26b6 100755 --- a/tests/queries/0_stateless/00838_unique_index.sh +++ b/tests/queries/0_stateless/00838_unique_index.sh @@ -7,7 +7,7 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) $CLICKHOUSE_CLIENT --query="DROP TABLE IF EXISTS set_idx;" -$CLICKHOUSE_CLIENT -n --query=" +$CLICKHOUSE_CLIENT --query=" CREATE TABLE set_idx ( u64 UInt64, diff --git a/tests/queries/0_stateless/00907_set_index_max_rows.sh b/tests/queries/0_stateless/00907_set_index_max_rows.sh index 3707aaf2ca6..bdd0f36346f 100755 --- a/tests/queries/0_stateless/00907_set_index_max_rows.sh +++ b/tests/queries/0_stateless/00907_set_index_max_rows.sh @@ -6,7 +6,7 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) $CLICKHOUSE_CLIENT --query="DROP TABLE IF EXISTS set_idx;" -$CLICKHOUSE_CLIENT -n --query=" +$CLICKHOUSE_CLIENT --query=" CREATE TABLE set_idx ( u64 UInt64, diff --git a/tests/queries/0_stateless/00908_bloom_filter_index.sh b/tests/queries/0_stateless/00908_bloom_filter_index.sh index 25a6567b894..3bd169dd6df 100755 --- a/tests/queries/0_stateless/00908_bloom_filter_index.sh +++ b/tests/queries/0_stateless/00908_bloom_filter_index.sh @@ -12,7 +12,7 @@ $CLICKHOUSE_CLIENT --query="DROP TABLE IF EXISTS bloom_filter_idx3;" # NGRAM BF -$CLICKHOUSE_CLIENT -n --query=" +$CLICKHOUSE_CLIENT --query=" CREATE TABLE bloom_filter_idx ( k UInt64, @@ -22,7 +22,7 @@ CREATE TABLE bloom_filter_idx ORDER BY k SETTINGS index_granularity = 2, index_granularity_bytes = '10Mi';" -$CLICKHOUSE_CLIENT -n --query=" +$CLICKHOUSE_CLIENT --query=" CREATE TABLE bloom_filter_idx2 ( k UInt64, @@ -109,7 +109,7 @@ $CLICKHOUSE_CLIENT --optimize_or_like_chain 0 --query="SELECT count() FROM bloom # TOKEN BF -$CLICKHOUSE_CLIENT -n --query=" +$CLICKHOUSE_CLIENT --query=" CREATE TABLE bloom_filter_idx3 ( k UInt64, @@ -147,7 +147,7 @@ $CLICKHOUSE_CLIENT --query="DROP TABLE bloom_filter_idx2" $CLICKHOUSE_CLIENT --query="DROP TABLE bloom_filter_idx3" $CLICKHOUSE_CLIENT --query="DROP TABLE IF EXISTS bloom_filter_idx_na;" -$CLICKHOUSE_CLIENT -n --query=" +$CLICKHOUSE_CLIENT --query=" CREATE TABLE bloom_filter_idx_na ( na Array(Array(String)), @@ -156,7 +156,7 @@ CREATE TABLE bloom_filter_idx_na ORDER BY na" 2>&1 | grep -c 'DB::Exception: Unexpected type Array(Array(String)) of bloom filter index' # NGRAM BF with IPv6 -$CLICKHOUSE_CLIENT -n --query=" +$CLICKHOUSE_CLIENT --query=" CREATE TABLE bloom_filter_ipv6_idx ( foo IPv6, diff --git a/tests/queries/0_stateless/00942_mutate_index.sh b/tests/queries/0_stateless/00942_mutate_index.sh index 6ebb30c25b9..e1e23639e85 100755 --- a/tests/queries/0_stateless/00942_mutate_index.sh +++ b/tests/queries/0_stateless/00942_mutate_index.sh @@ -6,7 +6,7 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) $CLICKHOUSE_CLIENT --query="DROP TABLE IF EXISTS minmax_idx;" -$CLICKHOUSE_CLIENT -n --query=" +$CLICKHOUSE_CLIENT --query=" CREATE TABLE minmax_idx ( u64 UInt64, diff --git a/tests/queries/0_stateless/00943_materialize_index.sh b/tests/queries/0_stateless/00943_materialize_index.sh index 6ff7d34a9d7..e4a585fce97 100755 --- a/tests/queries/0_stateless/00943_materialize_index.sh +++ b/tests/queries/0_stateless/00943_materialize_index.sh @@ -7,7 +7,7 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) $CLICKHOUSE_CLIENT --query="DROP TABLE IF EXISTS minmax_idx;" -$CLICKHOUSE_CLIENT -n --query=" +$CLICKHOUSE_CLIENT --query=" CREATE TABLE minmax_idx ( u64 UInt64, @@ -34,7 +34,7 @@ $CLICKHOUSE_CLIENT --query="INSERT INTO minmax_idx VALUES $CLICKHOUSE_CLIENT --query="SELECT count() FROM minmax_idx WHERE i64 = 2 SETTINGS merge_tree_read_split_ranges_into_intersecting_and_non_intersecting_injection_probability = 0.0;" $CLICKHOUSE_CLIENT --query="SELECT count() FROM minmax_idx WHERE i64 = 2 SETTINGS merge_tree_read_split_ranges_into_intersecting_and_non_intersecting_injection_probability = 0.0 FORMAT JSON" | grep "rows_read" -$CLICKHOUSE_CLIENT -n --query=" +$CLICKHOUSE_CLIENT --query=" ALTER TABLE minmax_idx ADD INDEX idx (i64, u64 * i64) TYPE minmax GRANULARITY 1 SETTINGS mutations_sync = 2;" $CLICKHOUSE_CLIENT --query="ALTER TABLE minmax_idx MATERIALIZE INDEX idx IN PARTITION 1 SETTINGS mutations_sync = 2;" diff --git a/tests/queries/0_stateless/00944_clear_index_in_partition.sh b/tests/queries/0_stateless/00944_clear_index_in_partition.sh index 4655077960f..a12536da239 100755 --- a/tests/queries/0_stateless/00944_clear_index_in_partition.sh +++ b/tests/queries/0_stateless/00944_clear_index_in_partition.sh @@ -7,7 +7,7 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) $CLICKHOUSE_CLIENT --query="DROP TABLE IF EXISTS minmax_idx;" -$CLICKHOUSE_CLIENT -n --query=" +$CLICKHOUSE_CLIENT --query=" CREATE TABLE minmax_idx ( u64 UInt64, diff --git a/tests/queries/0_stateless/00964_bloom_index_string_functions.sh b/tests/queries/0_stateless/00964_bloom_index_string_functions.sh index e2ec7fd42e4..9e410f09b13 100755 --- a/tests/queries/0_stateless/00964_bloom_index_string_functions.sh +++ b/tests/queries/0_stateless/00964_bloom_index_string_functions.sh @@ -7,7 +7,7 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) $CLICKHOUSE_CLIENT --query="DROP TABLE IF EXISTS bloom_filter_idx;" # NGRAM BF -$CLICKHOUSE_CLIENT -n --query=" +$CLICKHOUSE_CLIENT --query=" CREATE TABLE bloom_filter_idx ( k UInt64, diff --git a/tests/queries/0_stateless/00965_set_index_string_functions.sh b/tests/queries/0_stateless/00965_set_index_string_functions.sh index 8892fb11752..0f29c3dd2f2 100755 --- a/tests/queries/0_stateless/00965_set_index_string_functions.sh +++ b/tests/queries/0_stateless/00965_set_index_string_functions.sh @@ -6,7 +6,7 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) $CLICKHOUSE_CLIENT --query="DROP TABLE IF EXISTS set_idx;" -$CLICKHOUSE_CLIENT -n --query=" +$CLICKHOUSE_CLIENT --query=" CREATE TABLE set_idx ( k UInt64, diff --git a/tests/queries/0_stateless/00974_primary_key_for_lowCardinality.sh b/tests/queries/0_stateless/00974_primary_key_for_lowCardinality.sh index 389d433c7e2..ba260042f47 100755 --- a/tests/queries/0_stateless/00974_primary_key_for_lowCardinality.sh +++ b/tests/queries/0_stateless/00974_primary_key_for_lowCardinality.sh @@ -8,7 +8,7 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) $CLICKHOUSE_CLIENT --query="DROP TABLE IF EXISTS lowString;" $CLICKHOUSE_CLIENT --query="DROP TABLE IF EXISTS string;" -$CLICKHOUSE_CLIENT -n --query=" +$CLICKHOUSE_CLIENT --query=" create table lowString ( a LowCardinality(String), @@ -18,7 +18,7 @@ ENGINE = MergeTree() PARTITION BY toYYYYMM(b) ORDER BY (a)" -$CLICKHOUSE_CLIENT -n --query=" +$CLICKHOUSE_CLIENT --query=" create table string ( a String, diff --git a/tests/queries/0_stateless/00990_hasToken.sh b/tests/queries/0_stateless/00990_hasToken.sh index 6a1d4ff5ccf..d79472aa5a5 100755 --- a/tests/queries/0_stateless/00990_hasToken.sh +++ b/tests/queries/0_stateless/00990_hasToken.sh @@ -6,4 +6,4 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # We should have correct env vars from shell_config.sh to run this test -python3 "$CURDIR"/00990_hasToken.python | ${CLICKHOUSE_CLIENT} --max_query_size 1048576 -nm +python3 "$CURDIR"/00990_hasToken.python | ${CLICKHOUSE_CLIENT} --max_query_size 1048576 -m diff --git a/tests/queries/0_stateless/02003_memory_limit_in_client.sh b/tests/queries/0_stateless/02003_memory_limit_in_client.sh index 96028f4847a..15cacbff8c5 100755 --- a/tests/queries/0_stateless/02003_memory_limit_in_client.sh +++ b/tests/queries/0_stateless/02003_memory_limit_in_client.sh @@ -4,21 +4,21 @@ CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CUR_DIR"/../shell_config.sh -$CLICKHOUSE_CLIENT --max_memory_usage_in_client=1 -n -q "SELECT arrayMap(x -> range(x), range(number)) FROM numbers(1000) -- { clientError MEMORY_LIMIT_EXCEEDED }" -$CLICKHOUSE_CLIENT --max_memory_usage_in_client=0 -n -q "SELECT * FROM (SELECT * FROM system.numbers LIMIT 600000) as num WHERE num.number=60000" +$CLICKHOUSE_CLIENT --max_memory_usage_in_client=1 -q "SELECT arrayMap(x -> range(x), range(number)) FROM numbers(1000) -- { clientError MEMORY_LIMIT_EXCEEDED }" +$CLICKHOUSE_CLIENT --max_memory_usage_in_client=0 -q "SELECT * FROM (SELECT * FROM system.numbers LIMIT 600000) as num WHERE num.number=60000" -$CLICKHOUSE_CLIENT --max_memory_usage_in_client='5K' -n -q "SELECT arrayMap(x -> range(x), range(number)) FROM numbers(1000) -- { clientError MEMORY_LIMIT_EXCEEDED }" -$CLICKHOUSE_CLIENT --max_memory_usage_in_client='5k' -n -q "SELECT arrayMap(x -> range(x), range(number)) FROM numbers(1000) -- { clientError MEMORY_LIMIT_EXCEEDED }" -$CLICKHOUSE_CLIENT --max_memory_usage_in_client='1M' -n -q "SELECT * FROM (SELECT * FROM system.numbers LIMIT 600000) as num WHERE num.number=60000" -$CLICKHOUSE_CLIENT --max_memory_usage_in_client='23G' -n -q "SELECT * FROM (SELECT * FROM system.numbers LIMIT 600000) as num WHERE num.number=60000" -$CLICKHOUSE_CLIENT --max_memory_usage_in_client='11T' -n -q "SELECT * FROM (SELECT * FROM system.numbers LIMIT 600000) as num WHERE num.number=60000" +$CLICKHOUSE_CLIENT --max_memory_usage_in_client='5K' -q "SELECT arrayMap(x -> range(x), range(number)) FROM numbers(1000) -- { clientError MEMORY_LIMIT_EXCEEDED }" +$CLICKHOUSE_CLIENT --max_memory_usage_in_client='5k' -q "SELECT arrayMap(x -> range(x), range(number)) FROM numbers(1000) -- { clientError MEMORY_LIMIT_EXCEEDED }" +$CLICKHOUSE_CLIENT --max_memory_usage_in_client='1M' -q "SELECT * FROM (SELECT * FROM system.numbers LIMIT 600000) as num WHERE num.number=60000" +$CLICKHOUSE_CLIENT --max_memory_usage_in_client='23G' -q "SELECT * FROM (SELECT * FROM system.numbers LIMIT 600000) as num WHERE num.number=60000" +$CLICKHOUSE_CLIENT --max_memory_usage_in_client='11T' -q "SELECT * FROM (SELECT * FROM system.numbers LIMIT 600000) as num WHERE num.number=60000" -$CLICKHOUSE_CLIENT --max_memory_usage_in_client='2P' -n -q "SELECT * FROM (SELECT * FROM system.numbers LIMIT 600000) as num WHERE num.number=60000" 2>&1 | grep -c -F "CANNOT_PARSE_INPUT_ASSERTION_FAILED" -$CLICKHOUSE_CLIENT --max_memory_usage_in_client='2.1p' -n -q "SELECT * FROM (SELECT * FROM system.numbers LIMIT 600000) as num WHERE num.number=60000" 2>&1 | grep -c -F "CANNOT_PARSE_INPUT_ASSERTION_FAILED" -$CLICKHOUSE_CLIENT --max_memory_usage_in_client='10E' -n -q "SELECT * FROM (SELECT * FROM system.numbers LIMIT 600000) as num WHERE num.number=60000" 2>&1 | grep -c -F "CANNOT_PARSE_INPUT_ASSERTION_FAILED" -$CLICKHOUSE_CLIENT --max_memory_usage_in_client='10.2e' -n -q "SELECT * FROM (SELECT * FROM system.numbers LIMIT 600000) as num WHERE num.number=60000" 2>&1 | grep -c -F "CANNOT_PARSE_INPUT_ASSERTION_FAILED" -$CLICKHOUSE_CLIENT --max_memory_usage_in_client='-1.1T' -n -q "SELECT * FROM (SELECT * FROM system.numbers LIMIT 600000) as num WHERE num.number=60000" 2>&1 | grep -c -F "CANNOT_PARSE_NUMBER" -$CLICKHOUSE_CLIENT --max_memory_usage_in_client='-1' -n -q "SELECT * FROM (SELECT * FROM system.numbers LIMIT 600000) as num WHERE num.number=60000" 2>&1 | grep -c -F "CANNOT_PARSE_NUMBER" -$CLICKHOUSE_CLIENT --max_memory_usage_in_client='1m' -n -q "SELECT * FROM (SELECT * FROM system.numbers LIMIT 600000) as num WHERE num.number=60000" 2>&1 | grep -c -F "CANNOT_PARSE_INPUT_ASSERTION_FAILED" -$CLICKHOUSE_CLIENT --max_memory_usage_in_client='14g' -n -q "SELECT * FROM (SELECT * FROM system.numbers LIMIT 600000) as num WHERE num.number=60000" 2>&1 | grep -c -F "CANNOT_PARSE_INPUT_ASSERTION_FAILED" -$CLICKHOUSE_CLIENT --max_memory_usage_in_client='11t' -n -q "SELECT * FROM (SELECT * FROM system.numbers LIMIT 600000) as num WHERE num.number=60000" 2>&1 | grep -c -F "CANNOT_PARSE_INPUT_ASSERTION_FAILED" +$CLICKHOUSE_CLIENT --max_memory_usage_in_client='2P' -q "SELECT * FROM (SELECT * FROM system.numbers LIMIT 600000) as num WHERE num.number=60000" 2>&1 | grep -c -F "CANNOT_PARSE_INPUT_ASSERTION_FAILED" +$CLICKHOUSE_CLIENT --max_memory_usage_in_client='2.1p' -q "SELECT * FROM (SELECT * FROM system.numbers LIMIT 600000) as num WHERE num.number=60000" 2>&1 | grep -c -F "CANNOT_PARSE_INPUT_ASSERTION_FAILED" +$CLICKHOUSE_CLIENT --max_memory_usage_in_client='10E' -q "SELECT * FROM (SELECT * FROM system.numbers LIMIT 600000) as num WHERE num.number=60000" 2>&1 | grep -c -F "CANNOT_PARSE_INPUT_ASSERTION_FAILED" +$CLICKHOUSE_CLIENT --max_memory_usage_in_client='10.2e' -q "SELECT * FROM (SELECT * FROM system.numbers LIMIT 600000) as num WHERE num.number=60000" 2>&1 | grep -c -F "CANNOT_PARSE_INPUT_ASSERTION_FAILED" +$CLICKHOUSE_CLIENT --max_memory_usage_in_client='-1.1T' -q "SELECT * FROM (SELECT * FROM system.numbers LIMIT 600000) as num WHERE num.number=60000" 2>&1 | grep -c -F "CANNOT_PARSE_NUMBER" +$CLICKHOUSE_CLIENT --max_memory_usage_in_client='-1' -q "SELECT * FROM (SELECT * FROM system.numbers LIMIT 600000) as num WHERE num.number=60000" 2>&1 | grep -c -F "CANNOT_PARSE_NUMBER" +$CLICKHOUSE_CLIENT --max_memory_usage_in_client='1m' -q "SELECT * FROM (SELECT * FROM system.numbers LIMIT 600000) as num WHERE num.number=60000" 2>&1 | grep -c -F "CANNOT_PARSE_INPUT_ASSERTION_FAILED" +$CLICKHOUSE_CLIENT --max_memory_usage_in_client='14g' -q "SELECT * FROM (SELECT * FROM system.numbers LIMIT 600000) as num WHERE num.number=60000" 2>&1 | grep -c -F "CANNOT_PARSE_INPUT_ASSERTION_FAILED" +$CLICKHOUSE_CLIENT --max_memory_usage_in_client='11t' -q "SELECT * FROM (SELECT * FROM system.numbers LIMIT 600000) as num WHERE num.number=60000" 2>&1 | grep -c -F "CANNOT_PARSE_INPUT_ASSERTION_FAILED" diff --git a/tests/queries/0_stateless/02021_create_database_with_comment.sh b/tests/queries/0_stateless/02021_create_database_with_comment.sh index f77397dc482..d87b0794c91 100755 --- a/tests/queries/0_stateless/02021_create_database_with_comment.sh +++ b/tests/queries/0_stateless/02021_create_database_with_comment.sh @@ -20,7 +20,7 @@ function test_db_comments() local ENGINE_NAME="$1" echo "engine : ${ENGINE_NAME}" - $CLICKHOUSE_CLIENT --allow_deprecated_database_ordinary=1 -nm <& /dev/null +$CLICKHOUSE_CLIENT --print-profile-events --profile-events-delay-ms=-1 -q 'select 1; select 1' >& /dev/null echo $? echo 'regression test for overlap profile events snapshots between queries' -$CLICKHOUSE_CLIENT --print-profile-events --profile-events-delay-ms=-1 -n -q 'select 1; select 1' |& grep -F -o '[ 0 ] SelectedRows: 1 (increment)' +$CLICKHOUSE_CLIENT --print-profile-events --profile-events-delay-ms=-1 -q 'select 1; select 1' |& grep -F -o '[ 0 ] SelectedRows: 1 (increment)' echo 'regression test for overlap profile events snapshots between queries (clickhouse-local)' -$CLICKHOUSE_LOCAL --print-profile-events --profile-events-delay-ms=-1 -n -q 'select 1; select 1' |& grep -F -o '[ 0 ] SelectedRows: 1 (increment)' +$CLICKHOUSE_LOCAL --print-profile-events --profile-events-delay-ms=-1 -q 'select 1; select 1' |& grep -F -o '[ 0 ] SelectedRows: 1 (increment)' echo 'print everything' profile_events="$( @@ -35,5 +35,5 @@ profile_events="$( test "$profile_events" -gt 1 && echo OK || echo "FAIL ($profile_events)" echo 'check that ProfileEvents is new for each query' -sleep_function_calls=$($CLICKHOUSE_CLIENT --print-profile-events --profile-events-delay-ms=-1 -n -q 'select sleep(1); select 1' |& grep -c 'SleepFunctionCalls') +sleep_function_calls=$($CLICKHOUSE_CLIENT --print-profile-events --profile-events-delay-ms=-1 -q 'select sleep(1); select 1' |& grep -c 'SleepFunctionCalls') test "$sleep_function_calls" -eq 1 && echo OK || echo "FAIL ($sleep_function_calls)" diff --git a/tests/queries/0_stateless/02221_parallel_replicas_bug.sh b/tests/queries/0_stateless/02221_parallel_replicas_bug.sh index 3c44a2a7ba7..a382b3859f3 100755 --- a/tests/queries/0_stateless/02221_parallel_replicas_bug.sh +++ b/tests/queries/0_stateless/02221_parallel_replicas_bug.sh @@ -4,4 +4,4 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) . "$CURDIR"/../shell_config.sh -${CLICKHOUSE_CLIENT} --allow_experimental_parallel_reading_from_replicas=1 --parallel_replicas_for_non_replicated_merge_tree=1 -nm < "$CURDIR"/01099_parallel_distributed_insert_select.sql > /dev/null +${CLICKHOUSE_CLIENT} --allow_experimental_parallel_reading_from_replicas=1 --parallel_replicas_for_non_replicated_merge_tree=1 -m < "$CURDIR"/01099_parallel_distributed_insert_select.sql > /dev/null diff --git a/tests/queries/0_stateless/02221_system_zookeeper_unrestricted.sh b/tests/queries/0_stateless/02221_system_zookeeper_unrestricted.sh index db94c59d2de..e23a272a4e8 100755 --- a/tests/queries/0_stateless/02221_system_zookeeper_unrestricted.sh +++ b/tests/queries/0_stateless/02221_system_zookeeper_unrestricted.sh @@ -8,7 +8,7 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) ${CLICKHOUSE_CLIENT} --query="DROP TABLE IF EXISTS sample_table" ${CLICKHOUSE_CLIENT} --query="DROP TABLE IF EXISTS sample_table_2" -${CLICKHOUSE_CLIENT} -n -q" +${CLICKHOUSE_CLIENT} -q" CREATE TABLE sample_table ( key UInt64 ) @@ -16,7 +16,7 @@ ENGINE ReplicatedMergeTree('/clickhouse/$CLICKHOUSE_TEST_ZOOKEEPER_PREFIX/02221_ ORDER BY tuple(); " -${CLICKHOUSE_CLIENT} -n -q" +${CLICKHOUSE_CLIENT} -q" CREATE TABLE sample_table_2 ( key UInt64 ) diff --git a/tests/queries/0_stateless/02221_system_zookeeper_unrestricted_like.sh b/tests/queries/0_stateless/02221_system_zookeeper_unrestricted_like.sh index c62ec14b340..6381d811d5d 100755 --- a/tests/queries/0_stateless/02221_system_zookeeper_unrestricted_like.sh +++ b/tests/queries/0_stateless/02221_system_zookeeper_unrestricted_like.sh @@ -8,7 +8,7 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) ${CLICKHOUSE_CLIENT} --query="DROP TABLE IF EXISTS sample_table;" ${CLICKHOUSE_CLIENT} --query="DROP TABLE IF EXISTS sample_table_2;" -${CLICKHOUSE_CLIENT} -n --query="CREATE TABLE sample_table ( +${CLICKHOUSE_CLIENT} --query="CREATE TABLE sample_table ( key UInt64 ) ENGINE ReplicatedMergeTree('/clickhouse/$CLICKHOUSE_TEST_ZOOKEEPER_PREFIX/02221_system_zookeeper_unrestricted_like', '1') @@ -16,7 +16,7 @@ ORDER BY tuple(); DROP TABLE IF EXISTS sample_table SYNC;" -${CLICKHOUSE_CLIENT} -n --query "CREATE TABLE sample_table_2 ( +${CLICKHOUSE_CLIENT} --query "CREATE TABLE sample_table_2 ( key UInt64 ) ENGINE ReplicatedMergeTree('/clickhouse/$CLICKHOUSE_TEST_ZOOKEEPER_PREFIX/02221_system_zookeeper_unrestricted_like_2', '1') diff --git a/tests/queries/0_stateless/02225_parallel_distributed_insert_select_view.sh b/tests/queries/0_stateless/02225_parallel_distributed_insert_select_view.sh index 376a49fd820..63111cc32e4 100755 --- a/tests/queries/0_stateless/02225_parallel_distributed_insert_select_view.sh +++ b/tests/queries/0_stateless/02225_parallel_distributed_insert_select_view.sh @@ -6,7 +6,7 @@ CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CUR_DIR"/../shell_config.sh -$CLICKHOUSE_CLIENT -nm -q " +$CLICKHOUSE_CLIENT -m -q " drop table if exists dst_02225; drop table if exists src_02225; create table dst_02225 (key Int) engine=Memory(); @@ -14,7 +14,7 @@ create table src_02225 (key Int) engine=Memory(); insert into src_02225 values (1); " -$CLICKHOUSE_CLIENT --param_database=$CLICKHOUSE_DATABASE -nm -q " +$CLICKHOUSE_CLIENT --param_database=$CLICKHOUSE_DATABASE -m -q " truncate table dst_02225; insert into function remote('127.{1,2}', currentDatabase(), dst_02225, key) select * from remote('127.{1,2}', view(select * from {database:Identifier}.src_02225), key) @@ -29,7 +29,7 @@ settings parallel_distributed_insert_select=2, max_distributed_depth=1; select * from dst_02225; " -$CLICKHOUSE_CLIENT -nm -q " +$CLICKHOUSE_CLIENT -m -q " drop table src_02225; drop table dst_02225; " diff --git a/tests/queries/0_stateless/02226_parallel_reading_from_replicas_benchmark.sh b/tests/queries/0_stateless/02226_parallel_reading_from_replicas_benchmark.sh index bc90f4b2c11..177b373641f 100755 --- a/tests/queries/0_stateless/02226_parallel_reading_from_replicas_benchmark.sh +++ b/tests/queries/0_stateless/02226_parallel_reading_from_replicas_benchmark.sh @@ -4,7 +4,7 @@ CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CUR_DIR"/../shell_config.sh -$CLICKHOUSE_CLIENT -nm -q " +$CLICKHOUSE_CLIENT -m -q " drop table if exists data_02226; create table data_02226 (key Int) engine=MergeTree() order by key as select * from numbers(1); @@ -24,7 +24,7 @@ opts=( $CLICKHOUSE_BENCHMARK --query "select * from remote('127.1', $CLICKHOUSE_DATABASE, data_02226)" "${opts[@]}" >& /dev/null ret=$? -$CLICKHOUSE_CLIENT -nm -q " +$CLICKHOUSE_CLIENT -m -q " drop table data_02226; " diff --git a/tests/queries/0_stateless/02232_allow_only_replicated_engine.sh b/tests/queries/0_stateless/02232_allow_only_replicated_engine.sh index d1a3825d286..e47a3033681 100755 --- a/tests/queries/0_stateless/02232_allow_only_replicated_engine.sh +++ b/tests/queries/0_stateless/02232_allow_only_replicated_engine.sh @@ -12,9 +12,9 @@ ${CLICKHOUSE_CLIENT} -q "GRANT CREATE TABLE ON ${CLICKHOUSE_DATABASE}_db.* TO us ${CLICKHOUSE_CLIENT} -q "GRANT TABLE ENGINE ON Memory, TABLE ENGINE ON MergeTree, TABLE ENGINE ON ReplicatedMergeTree TO user_${CLICKHOUSE_DATABASE}" ${CLICKHOUSE_CLIENT} -q "CREATE DATABASE ${CLICKHOUSE_DATABASE}_db engine = Replicated('/clickhouse/databases/${CLICKHOUSE_TEST_ZOOKEEPER_PREFIX}/${CLICKHOUSE_DATABASE}_db', '{shard}', '{replica}')" ${CLICKHOUSE_CLIENT} --distributed_ddl_output_mode=none --user "user_${CLICKHOUSE_DATABASE}" --query "CREATE TABLE ${CLICKHOUSE_DATABASE}_db.tab_memory (x UInt32) engine = Memory;" -${CLICKHOUSE_CLIENT} --distributed_ddl_output_mode=none --user "user_${CLICKHOUSE_DATABASE}" -n --query "CREATE TABLE ${CLICKHOUSE_DATABASE}_db.tab_mt (x UInt32) engine = MergeTree order by x;" 2>&1 | grep -o "Only tables with a Replicated engine" -${CLICKHOUSE_CLIENT} --distributed_ddl_output_mode=none -n --query "CREATE TABLE ${CLICKHOUSE_DATABASE}_db.tab_mt (x UInt32) engine = MergeTree order by x;" -${CLICKHOUSE_CLIENT} --distributed_ddl_output_mode=none --user "user_${CLICKHOUSE_DATABASE}" -n --query "CREATE TABLE ${CLICKHOUSE_DATABASE}_db.tab_rmt (x UInt32) engine = ReplicatedMergeTree order by x;" +${CLICKHOUSE_CLIENT} --distributed_ddl_output_mode=none --user "user_${CLICKHOUSE_DATABASE}" --query "CREATE TABLE ${CLICKHOUSE_DATABASE}_db.tab_mt (x UInt32) engine = MergeTree order by x;" 2>&1 | grep -o "Only tables with a Replicated engine" +${CLICKHOUSE_CLIENT} --distributed_ddl_output_mode=none --query "CREATE TABLE ${CLICKHOUSE_DATABASE}_db.tab_mt (x UInt32) engine = MergeTree order by x;" +${CLICKHOUSE_CLIENT} --distributed_ddl_output_mode=none --user "user_${CLICKHOUSE_DATABASE}" --query "CREATE TABLE ${CLICKHOUSE_DATABASE}_db.tab_rmt (x UInt32) engine = ReplicatedMergeTree order by x;" ${CLICKHOUSE_CLIENT} --query "DROP DATABASE ${CLICKHOUSE_DATABASE}_db" ${CLICKHOUSE_CLIENT} -q "DROP USER user_${CLICKHOUSE_DATABASE}" diff --git a/tests/queries/0_stateless/02250_ON_CLUSTER_grant.sh b/tests/queries/0_stateless/02250_ON_CLUSTER_grant.sh index 66417e9694a..09f9c0c8a98 100755 --- a/tests/queries/0_stateless/02250_ON_CLUSTER_grant.sh +++ b/tests/queries/0_stateless/02250_ON_CLUSTER_grant.sh @@ -6,7 +6,7 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) function cleanup() { - $CLICKHOUSE_CLIENT -nmq " + $CLICKHOUSE_CLIENT -mq " DROP USER IF EXISTS with_on_cluster_$CLICKHOUSE_TEST_UNIQUE_NAME; DROP USER IF EXISTS without_on_cluster_$CLICKHOUSE_TEST_UNIQUE_NAME; DROP DATABASE IF EXISTS db_with_on_cluster_$CLICKHOUSE_TEST_UNIQUE_NAME; @@ -15,7 +15,7 @@ function cleanup() cleanup trap cleanup EXIT -$CLICKHOUSE_CLIENT -nmq " +$CLICKHOUSE_CLIENT -mq " CREATE USER with_on_cluster_$CLICKHOUSE_TEST_UNIQUE_NAME; CREATE USER without_on_cluster_$CLICKHOUSE_TEST_UNIQUE_NAME; diff --git a/tests/queries/0_stateless/02262_column_ttl.sh b/tests/queries/0_stateless/02262_column_ttl.sh index b5e29c9b2a1..c620d3b6d9c 100755 --- a/tests/queries/0_stateless/02262_column_ttl.sh +++ b/tests/queries/0_stateless/02262_column_ttl.sh @@ -14,7 +14,7 @@ CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # note, that this should be written in .sh since we need $CLICKHOUSE_DATABASE # not 'default' to catch text_log -$CLICKHOUSE_CLIENT -nm -q " +$CLICKHOUSE_CLIENT -m -q " drop table if exists ttl_02262; drop table if exists this_text_log; @@ -31,7 +31,7 @@ $CLICKHOUSE_CLIENT -nm -q " ttl_02262_uuid=$($CLICKHOUSE_CLIENT -q "select uuid from system.tables where database = '$CLICKHOUSE_DATABASE' and name = 'ttl_02262'") -$CLICKHOUSE_CLIENT -nm -q " +$CLICKHOUSE_CLIENT -m -q " -- OPTIMIZE TABLE x FINAL will be done in background -- attach to it's log, via table UUID in query_id (see merger/mutator code). create materialized view this_text_log engine=Memory() as diff --git a/tests/queries/0_stateless/02286_parallel_final.sh b/tests/queries/0_stateless/02286_parallel_final.sh index 0ac510208f3..47dfad42e11 100755 --- a/tests/queries/0_stateless/02286_parallel_final.sh +++ b/tests/queries/0_stateless/02286_parallel_final.sh @@ -9,7 +9,7 @@ echo "Test intersecting ranges" test_random_values() { layers=$1 - $CLICKHOUSE_CLIENT -n -q " + $CLICKHOUSE_CLIENT -q " drop table if exists tbl_8parts_${layers}granules_rnd; create table tbl_8parts_${layers}granules_rnd (key1 UInt32, sign Int8) engine = CollapsingMergeTree(sign) order by (key1) partition by (key1 % 8); insert into tbl_8parts_${layers}granules_rnd select number, 1 from numbers_mt($((layers * 8 * 8192))); @@ -29,7 +29,7 @@ echo "Test non intersecting ranges" test_sequential_values() { layers=$1 - $CLICKHOUSE_CLIENT -n -q " + $CLICKHOUSE_CLIENT -q " drop table if exists tbl_8parts_${layers}granules_seq; create table tbl_8parts_${layers}granules_seq (key1 UInt32, sign Int8) engine = CollapsingMergeTree(sign) order by (key1) partition by (key1 / $((layers * 8192)))::UInt64; insert into tbl_8parts_${layers}granules_seq select number, 1 from numbers_mt($((layers * 8 * 8192))); diff --git a/tests/queries/0_stateless/02317_distinct_in_order_optimization_explain.sh b/tests/queries/0_stateless/02317_distinct_in_order_optimization_explain.sh index bd7e6be3987..953485c3a1f 100755 --- a/tests/queries/0_stateless/02317_distinct_in_order_optimization_explain.sh +++ b/tests/queries/0_stateless/02317_distinct_in_order_optimization_explain.sh @@ -23,99 +23,99 @@ $CLICKHOUSE_CLIENT -q "insert into distinct_in_order_explain select number % num $CLICKHOUSE_CLIENT -q "select '-- disable optimize_distinct_in_order'" $CLICKHOUSE_CLIENT -q "select '-- distinct all primary key columns -> ordinary distinct'" -$CLICKHOUSE_CLIENT -nq "$DISABLE_OPTIMIZATION;explain pipeline select distinct * from distinct_in_order_explain" | eval $FIND_DISTINCT +$CLICKHOUSE_CLIENT -q "$DISABLE_OPTIMIZATION;explain pipeline select distinct * from distinct_in_order_explain" | eval $FIND_DISTINCT $CLICKHOUSE_CLIENT -q "select '-- enable optimize_distinct_in_order'" $CLICKHOUSE_CLIENT -q "select '-- distinct with all primary key columns -> pre-distinct optimization only'" -$CLICKHOUSE_CLIENT -nq "$ENABLE_OPTIMIZATION;explain pipeline select distinct * from distinct_in_order_explain" | eval $FIND_DISTINCT +$CLICKHOUSE_CLIENT -q "$ENABLE_OPTIMIZATION;explain pipeline select distinct * from distinct_in_order_explain" | eval $FIND_DISTINCT $CLICKHOUSE_CLIENT -q "select '-- distinct with primary key prefix -> pre-distinct optimization only'" -$CLICKHOUSE_CLIENT -nq "$ENABLE_OPTIMIZATION;explain pipeline select distinct a, c from distinct_in_order_explain" | eval $FIND_DISTINCT +$CLICKHOUSE_CLIENT -q "$ENABLE_OPTIMIZATION;explain pipeline select distinct a, c from distinct_in_order_explain" | eval $FIND_DISTINCT $CLICKHOUSE_CLIENT -q "select '-- distinct with primary key prefix and order by column in distinct -> pre-distinct and final distinct optimization'" -$CLICKHOUSE_CLIENT -nq "$ENABLE_OPTIMIZATION;explain pipeline select distinct a, c from distinct_in_order_explain order by c" | eval $FIND_DISTINCT +$CLICKHOUSE_CLIENT -q "$ENABLE_OPTIMIZATION;explain pipeline select distinct a, c from distinct_in_order_explain order by c" | eval $FIND_DISTINCT $CLICKHOUSE_CLIENT -q "select '-- distinct with primary key prefix and order by the same columns -> pre-distinct and final distinct optimization'" -$CLICKHOUSE_CLIENT -nq "$ENABLE_OPTIMIZATION;explain pipeline select distinct a, b from distinct_in_order_explain order by a, b" | eval $FIND_DISTINCT +$CLICKHOUSE_CLIENT -q "$ENABLE_OPTIMIZATION;explain pipeline select distinct a, b from distinct_in_order_explain order by a, b" | eval $FIND_DISTINCT $CLICKHOUSE_CLIENT -q "select '-- distinct with primary key prefix and order by columns are prefix of distinct columns -> pre-distinct and final distinct optimization'" -$CLICKHOUSE_CLIENT -nq "$ENABLE_OPTIMIZATION;explain pipeline select distinct a, b from distinct_in_order_explain order by a" | eval $FIND_DISTINCT +$CLICKHOUSE_CLIENT -q "$ENABLE_OPTIMIZATION;explain pipeline select distinct a, b from distinct_in_order_explain order by a" | eval $FIND_DISTINCT $CLICKHOUSE_CLIENT -q "select '-- distinct with primary key prefix and order by column in distinct but non-primary key prefix -> pre-distinct and final distinct optimization'" -$CLICKHOUSE_CLIENT -nq "$ENABLE_OPTIMIZATION;explain pipeline select distinct a, b, c from distinct_in_order_explain order by c" | eval $FIND_DISTINCT +$CLICKHOUSE_CLIENT -q "$ENABLE_OPTIMIZATION;explain pipeline select distinct a, b, c from distinct_in_order_explain order by c" | eval $FIND_DISTINCT $CLICKHOUSE_CLIENT -q "select '-- distinct with primary key prefix and order by column _not_ in distinct -> pre-distinct optimization only'" -$CLICKHOUSE_CLIENT -nq "$ENABLE_OPTIMIZATION;explain pipeline select distinct a, c from distinct_in_order_explain order by b" | eval $FIND_DISTINCT +$CLICKHOUSE_CLIENT -q "$ENABLE_OPTIMIZATION;explain pipeline select distinct a, c from distinct_in_order_explain order by b" | eval $FIND_DISTINCT $CLICKHOUSE_CLIENT -q "select '-- distinct with non-primary key prefix -> ordinary distinct'" -$CLICKHOUSE_CLIENT -nq "$ENABLE_OPTIMIZATION;explain pipeline select distinct b, c from distinct_in_order_explain" | eval $FIND_DISTINCT +$CLICKHOUSE_CLIENT -q "$ENABLE_OPTIMIZATION;explain pipeline select distinct b, c from distinct_in_order_explain" | eval $FIND_DISTINCT $CLICKHOUSE_CLIENT -q "select '-- distinct with non-primary key prefix and order by column in distinct -> final distinct optimization only'" -$CLICKHOUSE_CLIENT -nq "$ENABLE_OPTIMIZATION;explain pipeline select distinct b, c from distinct_in_order_explain order by b" | eval $FIND_DISTINCT +$CLICKHOUSE_CLIENT -q "$ENABLE_OPTIMIZATION;explain pipeline select distinct b, c from distinct_in_order_explain order by b" | eval $FIND_DISTINCT $CLICKHOUSE_CLIENT -q "select '-- distinct with non-primary key prefix and order by column _not_ in distinct -> ordinary distinct'" -$CLICKHOUSE_CLIENT -nq "$ENABLE_OPTIMIZATION;explain pipeline select distinct b, c from distinct_in_order_explain order by a" | eval $FIND_DISTINCT +$CLICKHOUSE_CLIENT -q "$ENABLE_OPTIMIZATION;explain pipeline select distinct b, c from distinct_in_order_explain order by a" | eval $FIND_DISTINCT $CLICKHOUSE_CLIENT -q "select '-- distinct with non-primary key prefix and order by _const_ column in distinct -> ordinary distinct'" -$CLICKHOUSE_CLIENT -nq "$ENABLE_OPTIMIZATION;explain pipeline select distinct b, 1 as x from distinct_in_order_explain order by x" | eval $FIND_DISTINCT +$CLICKHOUSE_CLIENT -q "$ENABLE_OPTIMIZATION;explain pipeline select distinct b, 1 as x from distinct_in_order_explain order by x" | eval $FIND_DISTINCT echo "-- Check reading in order for distinct" echo "-- disabled, distinct columns match sorting key" -$CLICKHOUSE_CLIENT --max_threads=0 -nq "$DISABLE_OPTIMIZATION;explain pipeline select distinct a, b from distinct_in_order_explain" | eval $FIND_READING_DEFAULT +$CLICKHOUSE_CLIENT --max_threads=0 -q "$DISABLE_OPTIMIZATION;explain pipeline select distinct a, b from distinct_in_order_explain" | eval $FIND_READING_DEFAULT echo "-- enabled, distinct columns match sorting key" # read_in_order_two_level_merge_threshold is set here to avoid repeating MergeTreeInOrder in output -$CLICKHOUSE_CLIENT --read_in_order_two_level_merge_threshold=2 -nq "$ENABLE_OPTIMIZATION;explain pipeline select distinct a, b from distinct_in_order_explain" | eval $FIND_READING_IN_ORDER +$CLICKHOUSE_CLIENT --read_in_order_two_level_merge_threshold=2 -q "$ENABLE_OPTIMIZATION;explain pipeline select distinct a, b from distinct_in_order_explain" | eval $FIND_READING_IN_ORDER echo "-- enabled, distinct columns form prefix of sorting key" -$CLICKHOUSE_CLIENT --read_in_order_two_level_merge_threshold=2 -nq "$ENABLE_OPTIMIZATION;explain pipeline select distinct a, b from distinct_in_order_explain" | eval $FIND_READING_IN_ORDER +$CLICKHOUSE_CLIENT --read_in_order_two_level_merge_threshold=2 -q "$ENABLE_OPTIMIZATION;explain pipeline select distinct a, b from distinct_in_order_explain" | eval $FIND_READING_IN_ORDER echo "-- enabled, distinct columns DON't form prefix of sorting key" -$CLICKHOUSE_CLIENT --max_threads=0 -nq "$ENABLE_OPTIMIZATION;explain pipeline select distinct b from distinct_in_order_explain" | eval $FIND_READING_DEFAULT +$CLICKHOUSE_CLIENT --max_threads=0 -q "$ENABLE_OPTIMIZATION;explain pipeline select distinct b from distinct_in_order_explain" | eval $FIND_READING_DEFAULT echo "-- enabled, distinct columns contains constant columns, non-const columns form prefix of sorting key" -$CLICKHOUSE_CLIENT --read_in_order_two_level_merge_threshold=2 -nq "$ENABLE_OPTIMIZATION;explain pipeline select distinct 1, a from distinct_in_order_explain" | eval $FIND_READING_IN_ORDER +$CLICKHOUSE_CLIENT --read_in_order_two_level_merge_threshold=2 -q "$ENABLE_OPTIMIZATION;explain pipeline select distinct 1, a from distinct_in_order_explain" | eval $FIND_READING_IN_ORDER echo "-- enabled, distinct columns contains constant columns, non-const columns match prefix of sorting key" -$CLICKHOUSE_CLIENT --read_in_order_two_level_merge_threshold=2 -nq "$ENABLE_OPTIMIZATION;explain pipeline select distinct 1, b, a from distinct_in_order_explain" | eval $FIND_READING_IN_ORDER +$CLICKHOUSE_CLIENT --read_in_order_two_level_merge_threshold=2 -q "$ENABLE_OPTIMIZATION;explain pipeline select distinct 1, b, a from distinct_in_order_explain" | eval $FIND_READING_IN_ORDER echo "-- enabled, only part of distinct columns form prefix of sorting key" -$CLICKHOUSE_CLIENT --max_threads=0 -nq "$ENABLE_OPTIMIZATION;explain pipeline select distinct a, c from distinct_in_order_explain" | eval $FIND_READING_DEFAULT +$CLICKHOUSE_CLIENT --max_threads=0 -q "$ENABLE_OPTIMIZATION;explain pipeline select distinct a, c from distinct_in_order_explain" | eval $FIND_READING_DEFAULT echo "=== disable new analyzer ===" DISABLE_ANALYZER="set enable_analyzer=0" echo "-- enabled, check that sorting properties are propagated from ReadFromMergeTree till preliminary distinct" -$CLICKHOUSE_CLIENT -nq "$DISABLE_ANALYZER;$ENABLE_OPTIMIZATION;explain plan sorting=1 select distinct b, a from distinct_in_order_explain where a > 0" | eval $FIND_SORTING_PROPERTIES +$CLICKHOUSE_CLIENT -q "$DISABLE_ANALYZER;$ENABLE_OPTIMIZATION;explain plan sorting=1 select distinct b, a from distinct_in_order_explain where a > 0" | eval $FIND_SORTING_PROPERTIES echo "-- check that reading in order optimization for ORDER BY and DISTINCT applied correctly in the same query" ENABLE_READ_IN_ORDER="set optimize_read_in_order=1" echo "-- disabled, check that sorting description for ReadFromMergeTree match ORDER BY columns" -$CLICKHOUSE_CLIENT -nq "$DISABLE_ANALYZER;$DISABLE_OPTIMIZATION;$ENABLE_READ_IN_ORDER;explain plan sorting=1 select distinct b, a from distinct_in_order_explain order by a" | eval $FIND_SORTING_PROPERTIES +$CLICKHOUSE_CLIENT -q "$DISABLE_ANALYZER;$DISABLE_OPTIMIZATION;$ENABLE_READ_IN_ORDER;explain plan sorting=1 select distinct b, a from distinct_in_order_explain order by a" | eval $FIND_SORTING_PROPERTIES echo "-- enabled, check that ReadFromMergeTree sorting description is overwritten by DISTINCT optimization i.e. it contains columns from DISTINCT clause" -$CLICKHOUSE_CLIENT -nq "$DISABLE_ANALYZER;$ENABLE_OPTIMIZATION;$ENABLE_READ_IN_ORDER;explain plan sorting=1 select distinct b, a from distinct_in_order_explain order by a" | eval $FIND_SORTING_PROPERTIES +$CLICKHOUSE_CLIENT -q "$DISABLE_ANALYZER;$ENABLE_OPTIMIZATION;$ENABLE_READ_IN_ORDER;explain plan sorting=1 select distinct b, a from distinct_in_order_explain order by a" | eval $FIND_SORTING_PROPERTIES echo "-- enabled, check that ReadFromMergeTree sorting description is overwritten by DISTINCT optimization, but direction used from ORDER BY clause" -$CLICKHOUSE_CLIENT -nq "$DISABLE_ANALYZER;$ENABLE_OPTIMIZATION;$ENABLE_READ_IN_ORDER;explain plan sorting=1 select distinct b, a from distinct_in_order_explain order by a DESC" | eval $FIND_SORTING_PROPERTIES +$CLICKHOUSE_CLIENT -q "$DISABLE_ANALYZER;$ENABLE_OPTIMIZATION;$ENABLE_READ_IN_ORDER;explain plan sorting=1 select distinct b, a from distinct_in_order_explain order by a DESC" | eval $FIND_SORTING_PROPERTIES echo "-- enabled, check that ReadFromMergeTree sorting description is NOT overwritten by DISTINCT optimization (1), - it contains columns from ORDER BY clause" -$CLICKHOUSE_CLIENT -nq "$DISABLE_ANALYZER;$ENABLE_OPTIMIZATION;$ENABLE_READ_IN_ORDER;explain plan sorting=1 select distinct a from distinct_in_order_explain order by a, b" | eval $FIND_SORTING_PROPERTIES +$CLICKHOUSE_CLIENT -q "$DISABLE_ANALYZER;$ENABLE_OPTIMIZATION;$ENABLE_READ_IN_ORDER;explain plan sorting=1 select distinct a from distinct_in_order_explain order by a, b" | eval $FIND_SORTING_PROPERTIES echo "-- enabled, check that ReadFromMergeTree sorting description is NOT overwritten by DISTINCT optimization (2), - direction used from ORDER BY clause" -$CLICKHOUSE_CLIENT -nq "$DISABLE_ANALYZER;$ENABLE_OPTIMIZATION;$ENABLE_READ_IN_ORDER;explain plan sorting=1 select distinct b, a from distinct_in_order_explain order by a DESC, b DESC" | eval $FIND_SORTING_PROPERTIES +$CLICKHOUSE_CLIENT -q "$DISABLE_ANALYZER;$ENABLE_OPTIMIZATION;$ENABLE_READ_IN_ORDER;explain plan sorting=1 select distinct b, a from distinct_in_order_explain order by a DESC, b DESC" | eval $FIND_SORTING_PROPERTIES echo "-- enabled, check that disabling other 'read in order' optimizations do not disable distinct in order optimization" -$CLICKHOUSE_CLIENT -nq "$DISABLE_ANALYZER;$ENABLE_OPTIMIZATION;set optimize_read_in_order=0;set optimize_aggregation_in_order=0;set optimize_read_in_window_order=0;explain plan sorting=1 select distinct a,b from distinct_in_order_explain" | eval $FIND_SORTING_PROPERTIES +$CLICKHOUSE_CLIENT -q "$DISABLE_ANALYZER;$ENABLE_OPTIMIZATION;set optimize_read_in_order=0;set optimize_aggregation_in_order=0;set optimize_read_in_window_order=0;explain plan sorting=1 select distinct a,b from distinct_in_order_explain" | eval $FIND_SORTING_PROPERTIES echo "=== enable new analyzer ===" ENABLE_ANALYZER="set enable_analyzer=1" echo "-- enabled, check that sorting properties are propagated from ReadFromMergeTree till preliminary distinct" -$CLICKHOUSE_CLIENT -nq "$ENABLE_ANALYZER;$ENABLE_OPTIMIZATION;explain plan sorting=1 select distinct b, a from distinct_in_order_explain where a > 0 settings optimize_move_to_prewhere=1" | eval $FIND_SORTING_PROPERTIES +$CLICKHOUSE_CLIENT -q "$ENABLE_ANALYZER;$ENABLE_OPTIMIZATION;explain plan sorting=1 select distinct b, a from distinct_in_order_explain where a > 0 settings optimize_move_to_prewhere=1" | eval $FIND_SORTING_PROPERTIES echo "-- disabled, check that sorting description for ReadFromMergeTree match ORDER BY columns" -$CLICKHOUSE_CLIENT -nq "$ENABLE_ANALYZER;$DISABLE_OPTIMIZATION;$ENABLE_READ_IN_ORDER;explain plan sorting=1 select distinct b, a from distinct_in_order_explain order by a" | eval $FIND_SORTING_PROPERTIES +$CLICKHOUSE_CLIENT -q "$ENABLE_ANALYZER;$DISABLE_OPTIMIZATION;$ENABLE_READ_IN_ORDER;explain plan sorting=1 select distinct b, a from distinct_in_order_explain order by a" | eval $FIND_SORTING_PROPERTIES echo "-- enabled, check that ReadFromMergeTree sorting description is overwritten by DISTINCT optimization i.e. it contains columns from DISTINCT clause" -$CLICKHOUSE_CLIENT -nq "$ENABLE_ANALYZER;$ENABLE_OPTIMIZATION;$ENABLE_READ_IN_ORDER;explain plan sorting=1 select distinct b, a from distinct_in_order_explain order by a" | eval $FIND_SORTING_PROPERTIES +$CLICKHOUSE_CLIENT -q "$ENABLE_ANALYZER;$ENABLE_OPTIMIZATION;$ENABLE_READ_IN_ORDER;explain plan sorting=1 select distinct b, a from distinct_in_order_explain order by a" | eval $FIND_SORTING_PROPERTIES echo "-- enabled, check that ReadFromMergeTree sorting description is overwritten by DISTINCT optimization, but direction used from ORDER BY clause" -$CLICKHOUSE_CLIENT -nq "$ENABLE_ANALYZER;$ENABLE_OPTIMIZATION;$ENABLE_READ_IN_ORDER;explain plan sorting=1 select distinct b, a from distinct_in_order_explain order by a DESC" | eval $FIND_SORTING_PROPERTIES +$CLICKHOUSE_CLIENT -q "$ENABLE_ANALYZER;$ENABLE_OPTIMIZATION;$ENABLE_READ_IN_ORDER;explain plan sorting=1 select distinct b, a from distinct_in_order_explain order by a DESC" | eval $FIND_SORTING_PROPERTIES echo "-- enabled, check that ReadFromMergeTree sorting description is NOT overwritten by DISTINCT optimization (1), - it contains columns from ORDER BY clause" -$CLICKHOUSE_CLIENT -nq "$ENABLE_ANALYZER;$ENABLE_OPTIMIZATION;$ENABLE_READ_IN_ORDER;explain plan sorting=1 select distinct a from distinct_in_order_explain order by a, b" | eval $FIND_SORTING_PROPERTIES +$CLICKHOUSE_CLIENT -q "$ENABLE_ANALYZER;$ENABLE_OPTIMIZATION;$ENABLE_READ_IN_ORDER;explain plan sorting=1 select distinct a from distinct_in_order_explain order by a, b" | eval $FIND_SORTING_PROPERTIES echo "-- enabled, check that ReadFromMergeTree sorting description is NOT overwritten by DISTINCT optimization (2), - direction used from ORDER BY clause" -$CLICKHOUSE_CLIENT -nq "$ENABLE_ANALYZER;$ENABLE_OPTIMIZATION;$ENABLE_READ_IN_ORDER;explain plan sorting=1 select distinct b, a from distinct_in_order_explain order by a DESC, b DESC" | eval $FIND_SORTING_PROPERTIES +$CLICKHOUSE_CLIENT -q "$ENABLE_ANALYZER;$ENABLE_OPTIMIZATION;$ENABLE_READ_IN_ORDER;explain plan sorting=1 select distinct b, a from distinct_in_order_explain order by a DESC, b DESC" | eval $FIND_SORTING_PROPERTIES echo "-- enabled, check that disabling other 'read in order' optimizations do not disable distinct in order optimization" -$CLICKHOUSE_CLIENT -nq "$ENABLE_ANALYZER;$ENABLE_OPTIMIZATION;set optimize_read_in_order=0;set optimize_aggregation_in_order=0;set optimize_read_in_window_order=0;explain plan sorting=1 select distinct a,b from distinct_in_order_explain" | eval $FIND_SORTING_PROPERTIES +$CLICKHOUSE_CLIENT -q "$ENABLE_ANALYZER;$ENABLE_OPTIMIZATION;set optimize_read_in_order=0;set optimize_aggregation_in_order=0;set optimize_read_in_window_order=0;explain plan sorting=1 select distinct a,b from distinct_in_order_explain" | eval $FIND_SORTING_PROPERTIES $CLICKHOUSE_CLIENT -q "drop table if exists distinct_in_order_explain sync" diff --git a/tests/queries/0_stateless/02335_column_ttl_expired_column_optimization.sh b/tests/queries/0_stateless/02335_column_ttl_expired_column_optimization.sh index 96f80d65878..490f8361682 100755 --- a/tests/queries/0_stateless/02335_column_ttl_expired_column_optimization.sh +++ b/tests/queries/0_stateless/02335_column_ttl_expired_column_optimization.sh @@ -6,7 +6,7 @@ CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) data_path="$CLICKHOUSE_TMP/local" -$CLICKHOUSE_LOCAL --path "$data_path" -nm -q " +$CLICKHOUSE_LOCAL --path "$data_path" -m -q " create table ttl_02335 ( date Date, key Int, diff --git a/tests/queries/0_stateless/02361_fsync_profile_events.sh b/tests/queries/0_stateless/02361_fsync_profile_events.sh index 98c9cf9b7b4..73bf3fa120a 100755 --- a/tests/queries/0_stateless/02361_fsync_profile_events.sh +++ b/tests/queries/0_stateless/02361_fsync_profile_events.sh @@ -6,7 +6,7 @@ CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CUR_DIR"/../shell_config.sh -$CLICKHOUSE_CLIENT -nm -q " +$CLICKHOUSE_CLIENT -m -q " drop table if exists data_fsync_pe; create table data_fsync_pe (key Int) engine=MergeTree() @@ -27,7 +27,7 @@ for i in {1..100}; do $CLICKHOUSE_CLIENT --query_id "$query_id" -q "insert into data_fsync_pe values (1)" read -r FileSync FileOpen DirectorySync FileSyncElapsedMicroseconds DirectorySyncElapsedMicroseconds <<<"$( - $CLICKHOUSE_CLIENT -nm --param_query_id "$query_id" -q " + $CLICKHOUSE_CLIENT -m --param_query_id "$query_id" -q " system flush logs; select diff --git a/tests/queries/0_stateless/02377_extend_protocol_with_query_parameters.sh b/tests/queries/0_stateless/02377_extend_protocol_with_query_parameters.sh index 71e3b6961f8..46396d38747 100755 --- a/tests/queries/0_stateless/02377_extend_protocol_with_query_parameters.sh +++ b/tests/queries/0_stateless/02377_extend_protocol_with_query_parameters.sh @@ -24,7 +24,7 @@ $CLICKHOUSE_CLIENT \ table_name="t_02377_extend_protocol_with_query_parameters_$RANDOM$RANDOM" -$CLICKHOUSE_CLIENT -n -q " +$CLICKHOUSE_CLIENT -q " create table $table_name( id Int64, arr Array(UInt8), @@ -57,17 +57,17 @@ $CLICKHOUSE_CLIENT \ # it is possible to set parameter for the current session -$CLICKHOUSE_CLIENT -n -q "set param_n = 42; select {n: UInt8}" +$CLICKHOUSE_CLIENT -q "set param_n = 42; select {n: UInt8}" # and it will not be visible to other sessions -$CLICKHOUSE_CLIENT -n -q "select {n: UInt8} -- { serverError 456 }" +$CLICKHOUSE_CLIENT -q "select {n: UInt8} -- { serverError 456 }" # the same parameter could be set multiple times within one session (new value overrides the previous one) -$CLICKHOUSE_CLIENT -n -q "set param_n = 12; set param_n = 13; select {n: UInt8}" +$CLICKHOUSE_CLIENT -q "set param_n = 12; set param_n = 13; select {n: UInt8}" # multiple different parameters could be defined within each session -$CLICKHOUSE_CLIENT -n -q " +$CLICKHOUSE_CLIENT -q " set param_a = 13, param_b = 'str'; set param_c = '2022-08-04 18:30:53'; set param_d = '{\'10\': [11, 12], \'13\': [14, 15]}'; diff --git a/tests/queries/0_stateless/02377_optimize_sorting_by_input_stream_properties_explain.sh b/tests/queries/0_stateless/02377_optimize_sorting_by_input_stream_properties_explain.sh index 4b9793da5bb..974f10e2f24 100755 --- a/tests/queries/0_stateless/02377_optimize_sorting_by_input_stream_properties_explain.sh +++ b/tests/queries/0_stateless/02377_optimize_sorting_by_input_stream_properties_explain.sh @@ -15,7 +15,7 @@ FIND_SORTMODE="$GREP_SORTMODE | $TRIM_LEADING_SPACES" function explain_sorting { echo "-- QUERY: "$1 - $CLICKHOUSE_CLIENT --merge_tree_read_split_ranges_into_intersecting_and_non_intersecting_injection_probability=0.0 -nq "$1" | eval $FIND_SORTING + $CLICKHOUSE_CLIENT --merge_tree_read_split_ranges_into_intersecting_and_non_intersecting_injection_probability=0.0 -q "$1" | eval $FIND_SORTING } function explain_sortmode { diff --git a/tests/queries/0_stateless/02417_load_marks_async.sh b/tests/queries/0_stateless/02417_load_marks_async.sh index 950656e7ab6..bcede9e4f5e 100755 --- a/tests/queries/0_stateless/02417_load_marks_async.sh +++ b/tests/queries/0_stateless/02417_load_marks_async.sh @@ -7,7 +7,7 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) ${CLICKHOUSE_CLIENT} -q "DROP TABLE IF EXISTS test;" -${CLICKHOUSE_CLIENT} -n -q " +${CLICKHOUSE_CLIENT} -q " CREATE TABLE test ( n0 UInt64, From a5cc3a3a9cb7d40cc28a00016abbc08ba433e16a Mon Sep 17 00:00:00 2001 From: Kseniia Sumarokova <54203879+kssenii@users.noreply.github.com> Date: Thu, 15 Aug 2024 20:03:22 +0200 Subject: [PATCH 43/43] Update log message --- src/Disks/IO/CachedOnDiskReadBufferFromFile.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Disks/IO/CachedOnDiskReadBufferFromFile.cpp b/src/Disks/IO/CachedOnDiskReadBufferFromFile.cpp index 286d06bc424..56bfa019819 100644 --- a/src/Disks/IO/CachedOnDiskReadBufferFromFile.cpp +++ b/src/Disks/IO/CachedOnDiskReadBufferFromFile.cpp @@ -1031,7 +1031,7 @@ bool CachedOnDiskReadBufferFromFile::nextImplStep() } else LOG_TRACE(log, "No space left in cache to reserve {} bytes, reason: {}, " - "will continue without cache download", failure_reason, size); + "will continue without cache download", size, failure_reason); if (!success) {