From 43d057ed0f60a637133f5ebd19071d7a859ff892 Mon Sep 17 00:00:00 2001 From: Nikita Taranov Date: Mon, 9 Sep 2024 13:04:45 +0100 Subject: [PATCH 01/12] impl --- src/Interpreters/ProcessList.cpp | 2 +- src/Interpreters/QueryPriorities.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/Interpreters/ProcessList.cpp b/src/Interpreters/ProcessList.cpp index 6cb50b310ad..f8a808f6c68 100644 --- a/src/Interpreters/ProcessList.cpp +++ b/src/Interpreters/ProcessList.cpp @@ -258,7 +258,7 @@ ProcessList::insert(const String & query_, const IAST * ast, ContextMutablePtr q query_context, query_, client_info, - priorities.insert(static_cast(settings.priority)), + priorities.insert(settings.priority), std::move(thread_group), query_kind, settings, diff --git a/src/Interpreters/QueryPriorities.h b/src/Interpreters/QueryPriorities.h index 9e18e7bcff3..7601c7ba6eb 100644 --- a/src/Interpreters/QueryPriorities.h +++ b/src/Interpreters/QueryPriorities.h @@ -31,7 +31,7 @@ namespace DB class QueryPriorities { public: - using Priority = int; + using Priority = size_t; private: friend struct Handle; From 47589d48980a790a76e2163e88200213c6de2fee Mon Sep 17 00:00:00 2001 From: Nikita Taranov Date: Mon, 9 Sep 2024 19:47:10 +0100 Subject: [PATCH 02/12] impl --- src/IO/S3/URI.cpp | 2 +- src/IO/tests/gtest_s3_uri.cpp | 8 ++++++++ 2 files changed, 9 insertions(+), 1 deletion(-) diff --git a/src/IO/S3/URI.cpp b/src/IO/S3/URI.cpp index 9c80b377661..73bbba055d0 100644 --- a/src/IO/S3/URI.cpp +++ b/src/IO/S3/URI.cpp @@ -36,7 +36,7 @@ URI::URI(const std::string & uri_, bool allow_archive_path_syntax) /// Case when bucket name represented in domain name of S3 URL. /// E.g. (https://bucket-name.s3.region.amazonaws.com/key) /// https://docs.aws.amazon.com/AmazonS3/latest/dev/VirtualHosting.html#virtual-hosted-style-access - static const RE2 virtual_hosted_style_pattern(R"((.+)\.(s3express[\-a-z0-9]+|s3|cos|obs|oss|eos)([.\-][a-z0-9\-.:]+))"); + static const RE2 virtual_hosted_style_pattern(R"(([^.]+)\.(s3express[\-a-z0-9]+|s3|cos|obs|.*oss[^\/]*|eos)([.\-][a-z0-9\-.:]+))"); /// Case when AWS Private Link Interface is being used /// E.g. (bucket.vpce-07a1cd78f1bd55c5f-j3a3vg6w.s3.us-east-1.vpce.amazonaws.com/bucket-name/key) diff --git a/src/IO/tests/gtest_s3_uri.cpp b/src/IO/tests/gtest_s3_uri.cpp index c0bf7fcb28a..abe80db7ba5 100644 --- a/src/IO/tests/gtest_s3_uri.cpp +++ b/src/IO/tests/gtest_s3_uri.cpp @@ -204,6 +204,14 @@ TEST(S3UriTest, validPatterns) ASSERT_EQ("", uri.version_id); ASSERT_EQ(true, uri.is_virtual_hosted_style); } + { + S3::URI uri("https://bucket-test.cn-beijing-internal.oss-data-acc.aliyuncs.com/cc-2zeh496zqm0g6e09g"); + ASSERT_EQ("https://cn-beijing-internal.oss-data-acc.aliyuncs.com", uri.endpoint); + ASSERT_EQ("bucket-test", uri.bucket); + ASSERT_EQ("cc-2zeh496zqm0g6e09g", uri.key); + ASSERT_EQ("", uri.version_id); + ASSERT_EQ(true, uri.is_virtual_hosted_style); + } } TEST(S3UriTest, versionIdChecks) From 89dd3188bb937db9dfc8b755e8a0a96f3c51adb0 Mon Sep 17 00:00:00 2001 From: Alexander Tokmakov Date: Tue, 10 Sep 2024 13:35:18 +0200 Subject: [PATCH 03/12] add a setting to disallow DETACH PERMANENTLY in Replicated --- src/Core/ServerSettings.h | 1 + src/Databases/DatabaseReplicated.cpp | 4 ++++ 2 files changed, 5 insertions(+) diff --git a/src/Core/ServerSettings.h b/src/Core/ServerSettings.h index 79173503f28..5b2aaf5407a 100644 --- a/src/Core/ServerSettings.h +++ b/src/Core/ServerSettings.h @@ -148,6 +148,7 @@ namespace DB M(Bool, storage_metadata_write_full_object_key, false, "Write disk metadata files with VERSION_FULL_OBJECT_KEY format", 0) \ M(UInt64, max_materialized_views_count_for_table, 0, "A limit on the number of materialized views attached to a table.", 0) \ M(UInt32, max_database_replicated_create_table_thread_pool_size, 1, "The number of threads to create tables during replica recovery in DatabaseReplicated. Zero means number of threads equal number of cores.", 0) \ + M(Bool, database_replicated_allow_detach_permanently, true, "Allow detaching tables permanently in Replicated databases", 0) \ M(Bool, format_alter_operations_with_parentheses, false, "If enabled, each operation in alter queries will be surrounded with parentheses in formatted queries to make them less ambiguous.", 0) \ M(String, default_replica_path, "/clickhouse/tables/{uuid}/{shard}", "The path to the table in ZooKeeper", 0) \ M(String, default_replica_name, "{replica}", "The replica name in ZooKeeper", 0) \ diff --git a/src/Databases/DatabaseReplicated.cpp b/src/Databases/DatabaseReplicated.cpp index 8e3378bcc12..a9009e57dde 100644 --- a/src/Databases/DatabaseReplicated.cpp +++ b/src/Databases/DatabaseReplicated.cpp @@ -63,6 +63,7 @@ namespace ErrorCodes extern const int NO_ACTIVE_REPLICAS; extern const int CANNOT_GET_REPLICATED_DATABASE_SNAPSHOT; extern const int CANNOT_RESTORE_TABLE; + extern const int SUPPORT_IS_DISABLED; } static constexpr const char * REPLICATED_DATABASE_MARK = "DatabaseReplicated"; @@ -1693,6 +1694,9 @@ void DatabaseReplicated::detachTablePermanently(ContextPtr local_context, const { waitDatabaseStarted(); + if (!local_context->getServerSettings().database_replicated_allow_detach_permanently) + throw Exception(ErrorCodes::SUPPORT_IS_DISABLED, "Support for DETACH TABLE PERMANENTLY is disabled"); + auto txn = local_context->getZooKeeperMetadataTransaction(); assert(!ddl_worker->isCurrentlyActive() || txn); if (txn && txn->isInitialQuery()) From acbeaa6d3339b7206c6f9e822445952fe664a807 Mon Sep 17 00:00:00 2001 From: Nikita Taranov Date: Tue, 10 Sep 2024 16:14:53 +0100 Subject: [PATCH 04/12] fix --- tests/integration/test_disks_app_func/test.py | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/tests/integration/test_disks_app_func/test.py b/tests/integration/test_disks_app_func/test.py index 56ea5c8846a..a4b2399e117 100644 --- a/tests/integration/test_disks_app_func/test.py +++ b/tests/integration/test_disks_app_func/test.py @@ -13,8 +13,20 @@ def started_cluster(): main_configs=["config.xml"], with_minio=True, ) - cluster.start() + + # local disk requires its `path` directory to exist. + # the two paths below belong to `test1` and `test2` disks + node = cluster.instances["disks_app_test"] + for path in ["path1", "path2"]: + node.exec_in_container( + [ + "bash", + "-c", + f"mkdir -p /var/lib/clickhouse/{path}", + ] + ) + yield cluster finally: From 04dcf73e8fb98fbcdee2ead46e7177c961e1b5f6 Mon Sep 17 00:00:00 2001 From: Robert Schulze Date: Tue, 10 Sep 2024 15:35:59 +0000 Subject: [PATCH 05/12] Fix crash in sqidDecode --- src/Functions/sqid.cpp | 2 +- tests/queries/0_stateless/02933_sqid.reference | 1 + tests/queries/0_stateless/02933_sqid.sql | 7 +++++++ 3 files changed, 9 insertions(+), 1 deletion(-) diff --git a/src/Functions/sqid.cpp b/src/Functions/sqid.cpp index 0e133590b84..32434eb5e73 100644 --- a/src/Functions/sqid.cpp +++ b/src/Functions/sqid.cpp @@ -124,7 +124,7 @@ public: std::string_view sqid = col_non_const->getDataAt(i).toView(); std::vector integers = sqids.decode(String(sqid)); res_nested_data.insert(integers.begin(), integers.end()); - res_offsets_data.push_back(integers.size()); + res_offsets_data.push_back(i == 0 ? integers.size() : res_offsets_data.back() + integers.size()); } } else diff --git a/tests/queries/0_stateless/02933_sqid.reference b/tests/queries/0_stateless/02933_sqid.reference index a559bacb0ac..4597e2347e3 100644 --- a/tests/queries/0_stateless/02933_sqid.reference +++ b/tests/queries/0_stateless/02933_sqid.reference @@ -13,5 +13,6 @@ Td1EnWQo [1,2,3,4] XMbT -- invalid sqid [] +-- bug 69450 -- alias XMbT diff --git a/tests/queries/0_stateless/02933_sqid.sql b/tests/queries/0_stateless/02933_sqid.sql index 81d4b2bc35c..822fe33df51 100644 --- a/tests/queries/0_stateless/02933_sqid.sql +++ b/tests/queries/0_stateless/02933_sqid.sql @@ -25,5 +25,12 @@ SELECT sqidEncode(toNullable(materialize(1)), toLowCardinality(materialize(2))); SELECT '-- invalid sqid'; SELECT sqidDecode('invalid sqid'); +SELECT '-- bug 69450'; +DROP TABLE IF EXISTS tab; +CREATE TABLE tab (id String) ENGINE = MergeTree ORDER BY id; +INSERT INTO tab SELECT * FROM generateRandom() LIMIT 1000000; +SELECT sqidDecode(id) FROM tab FORMAT Null; +DROP TABLE tab; + SELECT '-- alias'; SELECT sqid(1, 2); From d43264c44e80f1f51604d266e5b1085462552aff Mon Sep 17 00:00:00 2001 From: kssenii Date: Tue, 10 Sep 2024 17:41:52 +0200 Subject: [PATCH 06/12] Quick fix for s3queue problem --- .../StorageObjectStorageQueue.cpp | 16 ++-------------- 1 file changed, 2 insertions(+), 14 deletions(-) diff --git a/src/Storages/ObjectStorageQueue/StorageObjectStorageQueue.cpp b/src/Storages/ObjectStorageQueue/StorageObjectStorageQueue.cpp index 9452ce81e9e..55a1d43b26b 100644 --- a/src/Storages/ObjectStorageQueue/StorageObjectStorageQueue.cpp +++ b/src/Storages/ObjectStorageQueue/StorageObjectStorageQueue.cpp @@ -64,9 +64,7 @@ namespace void checkAndAdjustSettings( ObjectStorageQueueSettings & queue_settings, - ASTStorage * engine_args, - bool is_attach, - const LoggerPtr & log) + bool is_attach) { if (!is_attach && !queue_settings.mode.changed) { @@ -85,16 +83,6 @@ namespace "Setting `cleanup_interval_min_ms` ({}) must be less or equal to `cleanup_interval_max_ms` ({})", queue_settings.cleanup_interval_min_ms, queue_settings.cleanup_interval_max_ms); } - - if (!is_attach && !queue_settings.processing_threads_num.changed) - { - queue_settings.processing_threads_num = std::max(getNumberOfPhysicalCPUCores(), 16); - engine_args->settings->as()->changes.insertSetting( - "processing_threads_num", - queue_settings.processing_threads_num.value); - - LOG_TRACE(log, "Set `processing_threads_num` to {}", queue_settings.processing_threads_num); - } } std::shared_ptr getQueueLog(const ObjectStoragePtr & storage, const ContextPtr & context, const ObjectStorageQueueSettings & table_settings) @@ -154,7 +142,7 @@ StorageObjectStorageQueue::StorageObjectStorageQueue( throw Exception(ErrorCodes::BAD_QUERY_PARAMETER, "ObjectStorageQueue url must either end with '/' or contain globs"); } - checkAndAdjustSettings(*queue_settings, engine_args, mode > LoadingStrictnessLevel::CREATE, log); + checkAndAdjustSettings(*queue_settings, mode > LoadingStrictnessLevel::CREATE); object_storage = configuration->createObjectStorage(context_, /* is_readonly */true); FormatFactory::instance().checkFormatName(configuration->format); From d8e670297bcdc1850260a1d60d4ee39b2a895a4d Mon Sep 17 00:00:00 2001 From: Yarik Briukhovetskyi <114298166+yariks5s@users.noreply.github.com> Date: Tue, 10 Sep 2024 17:42:27 +0200 Subject: [PATCH 07/12] groupConcat consistency --- .../AggregateFunctionGroupConcat.cpp | 13 +++++++++++-- .../03235_groupArray_returns_string.reference | 1 + .../0_stateless/03235_groupArray_returns_string.sql | 10 ++++++++++ 3 files changed, 22 insertions(+), 2 deletions(-) create mode 100644 tests/queries/0_stateless/03235_groupArray_returns_string.reference create mode 100644 tests/queries/0_stateless/03235_groupArray_returns_string.sql diff --git a/src/AggregateFunctions/AggregateFunctionGroupConcat.cpp b/src/AggregateFunctions/AggregateFunctionGroupConcat.cpp index 636ac80e350..8fb0b645096 100644 --- a/src/AggregateFunctions/AggregateFunctionGroupConcat.cpp +++ b/src/AggregateFunctions/AggregateFunctionGroupConcat.cpp @@ -116,15 +116,17 @@ class GroupConcatImpl final SerializationPtr serialization; UInt64 limit; const String delimiter; + const DataTypePtr type; public: GroupConcatImpl(const DataTypePtr & data_type_, const Array & parameters_, UInt64 limit_, const String & delimiter_) : IAggregateFunctionDataHelper, GroupConcatImpl>( {data_type_}, parameters_, std::make_shared()) - , serialization(this->argument_types[0]->getDefaultSerialization()) , limit(limit_) , delimiter(delimiter_) + , type(data_type_) { + serialization = isFixedString(type) ? std::make_shared()->getDefaultSerialization() : this->argument_types[0]->getDefaultSerialization(); } String getName() const override { return name; } @@ -140,7 +142,14 @@ public: if (cur_data.data_size != 0) cur_data.insertChar(delimiter.c_str(), delimiter.size(), arena); - cur_data.insert(columns[0], serialization, row_num, arena); + if (isFixedString(type)) + { + ColumnWithTypeAndName col = {columns[0]->getPtr(), type, "column"}; + const auto & col_str = castColumn(col, std::make_shared()); + cur_data.insert(col_str.get(), serialization, row_num, arena); + } + else + cur_data.insert(columns[0], serialization, row_num, arena); } void merge(AggregateDataPtr __restrict place, ConstAggregateDataPtr rhs, Arena * arena) const override diff --git a/tests/queries/0_stateless/03235_groupArray_returns_string.reference b/tests/queries/0_stateless/03235_groupArray_returns_string.reference new file mode 100644 index 00000000000..d00491fd7e5 --- /dev/null +++ b/tests/queries/0_stateless/03235_groupArray_returns_string.reference @@ -0,0 +1 @@ +1 diff --git a/tests/queries/0_stateless/03235_groupArray_returns_string.sql b/tests/queries/0_stateless/03235_groupArray_returns_string.sql new file mode 100644 index 00000000000..618ec6f839b --- /dev/null +++ b/tests/queries/0_stateless/03235_groupArray_returns_string.sql @@ -0,0 +1,10 @@ +CREATE TABLE t (st FixedString(54)) ENGINE=MergeTree ORDER BY (); + +INSERT INTO t VALUES +('abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRTUVWXYZ'), +('\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0'), +('IIIIIIIIII\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0'); + +WITH (SELECT groupConcat(',')(st) FROM t) AS a, + (SELECT groupConcat(',')(st :: String) FROM t) AS b +SELECT equals(a, b); From f588e3c31bc2f097f168646f73ed078259265659 Mon Sep 17 00:00:00 2001 From: Yarik Briukhovetskyi <114298166+yariks5s@users.noreply.github.com> Date: Tue, 10 Sep 2024 17:43:50 +0200 Subject: [PATCH 08/12] rename tests --- ...ng.reference => 03235_groupArray_string_consistency.reference} | 0 ...returns_string.sql => 03235_groupArray_string_consistency.sql} | 0 2 files changed, 0 insertions(+), 0 deletions(-) rename tests/queries/0_stateless/{03235_groupArray_returns_string.reference => 03235_groupArray_string_consistency.reference} (100%) rename tests/queries/0_stateless/{03235_groupArray_returns_string.sql => 03235_groupArray_string_consistency.sql} (100%) diff --git a/tests/queries/0_stateless/03235_groupArray_returns_string.reference b/tests/queries/0_stateless/03235_groupArray_string_consistency.reference similarity index 100% rename from tests/queries/0_stateless/03235_groupArray_returns_string.reference rename to tests/queries/0_stateless/03235_groupArray_string_consistency.reference diff --git a/tests/queries/0_stateless/03235_groupArray_returns_string.sql b/tests/queries/0_stateless/03235_groupArray_string_consistency.sql similarity index 100% rename from tests/queries/0_stateless/03235_groupArray_returns_string.sql rename to tests/queries/0_stateless/03235_groupArray_string_consistency.sql From 080193cfc36909ab9a1016b70edc007c8dd3540f Mon Sep 17 00:00:00 2001 From: Robert Schulze Date: Tue, 10 Sep 2024 15:46:21 +0000 Subject: [PATCH 09/12] 14% more aesthetic code --- src/Functions/sqid.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Functions/sqid.cpp b/src/Functions/sqid.cpp index 32434eb5e73..074a34bd083 100644 --- a/src/Functions/sqid.cpp +++ b/src/Functions/sqid.cpp @@ -124,7 +124,7 @@ public: std::string_view sqid = col_non_const->getDataAt(i).toView(); std::vector integers = sqids.decode(String(sqid)); res_nested_data.insert(integers.begin(), integers.end()); - res_offsets_data.push_back(i == 0 ? integers.size() : res_offsets_data.back() + integers.size()); + res_offsets_data.push_back(res_offsets_data.back() + integers.size()); } } else From 6a6d26aeeda7c6d480dbaf73658567ed494ca67b Mon Sep 17 00:00:00 2001 From: Kseniia Sumarokova <54203879+kssenii@users.noreply.github.com> Date: Tue, 10 Sep 2024 18:16:27 +0200 Subject: [PATCH 10/12] Update StorageObjectStorageQueue.cpp --- src/Storages/ObjectStorageQueue/StorageObjectStorageQueue.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Storages/ObjectStorageQueue/StorageObjectStorageQueue.cpp b/src/Storages/ObjectStorageQueue/StorageObjectStorageQueue.cpp index 55a1d43b26b..c1ef37e1a48 100644 --- a/src/Storages/ObjectStorageQueue/StorageObjectStorageQueue.cpp +++ b/src/Storages/ObjectStorageQueue/StorageObjectStorageQueue.cpp @@ -118,7 +118,7 @@ StorageObjectStorageQueue::StorageObjectStorageQueue( const String & comment, ContextPtr context_, std::optional format_settings_, - ASTStorage * engine_args, + ASTStorage * /* engine_args */, LoadingStrictnessLevel mode) : IStorage(table_id_) , WithContext(context_) From 3921f910f5d0d7e69581a2cc7033213809af48aa Mon Sep 17 00:00:00 2001 From: "Mikhail f. Shiryaev" Date: Wed, 11 Sep 2024 10:33:23 +0200 Subject: [PATCH 11/12] Another attempt to address EAGAIN "Resource unavailable" --- docker/test/base/setup_export_logs.sh | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/docker/test/base/setup_export_logs.sh b/docker/test/base/setup_export_logs.sh index 3df9655701c..7ec1c31f04a 100755 --- a/docker/test/base/setup_export_logs.sh +++ b/docker/test/base/setup_export_logs.sh @@ -187,10 +187,15 @@ function setup_logs_replication ') echo -e "Creating remote destination table ${table}_${hash} with statement:" >&2 + echo "::group::${table}" # there's the only way big "$statement" can be printed without causing EAGAIN error # cat: write error: Resource temporarily unavailable - echo "$statement" | cat + statement_print="${statement}" + if [ "${#statement_print}" -gt 4000 ]; then + statement_print="${statement::1999}\n…\n${statement:${#statement}-1999}" + fi + echo "$statement_print" echo "::endgroup::" echo "$statement" | clickhouse-client --database_replicated_initial_query_timeout_sec=10 \ From c1830bc041a067d1fae1e8971091c453614eaca7 Mon Sep 17 00:00:00 2001 From: "Mikhail f. Shiryaev" Date: Wed, 11 Sep 2024 13:08:58 +0200 Subject: [PATCH 12/12] Escape the `\n` in statements --- docker/test/base/setup_export_logs.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docker/test/base/setup_export_logs.sh b/docker/test/base/setup_export_logs.sh index 7ec1c31f04a..a39f96867be 100755 --- a/docker/test/base/setup_export_logs.sh +++ b/docker/test/base/setup_export_logs.sh @@ -195,7 +195,7 @@ function setup_logs_replication if [ "${#statement_print}" -gt 4000 ]; then statement_print="${statement::1999}\n…\n${statement:${#statement}-1999}" fi - echo "$statement_print" + echo -e "$statement_print" echo "::endgroup::" echo "$statement" | clickhouse-client --database_replicated_initial_query_timeout_sec=10 \