From ff01ca80b4ae644adc396c0b94d3b54ac2b8c1e6 Mon Sep 17 00:00:00 2001 From: Sema Checherinda Date: Wed, 15 May 2024 16:37:42 +0200 Subject: [PATCH 01/34] reduce retires time for queries, increase retries count for backups --- src/Backups/BackupIO_S3.cpp | 6 +++--- src/Core/Settings.h | 1 + src/IO/S3/Client.h | 2 +- 3 files changed, 5 insertions(+), 4 deletions(-) diff --git a/src/Backups/BackupIO_S3.cpp b/src/Backups/BackupIO_S3.cpp index 15860363615..baa16a269a9 100644 --- a/src/Backups/BackupIO_S3.cpp +++ b/src/Backups/BackupIO_S3.cpp @@ -54,9 +54,9 @@ namespace S3::PocoHTTPClientConfiguration client_configuration = S3::ClientFactory::instance().createClientConfiguration( settings.auth_settings.region, context->getRemoteHostFilter(), - static_cast(global_settings.s3_max_redirects), - static_cast(global_settings.s3_retry_attempts), - global_settings.enable_s3_requests_logging, + static_cast(local_settings.s3_max_redirects), + static_cast(local_settings.backup_restore_s3_retry_attempts), + local_settings.enable_s3_requests_logging, /* for_disk_s3 = */ false, request_settings.get_request_throttler, request_settings.put_request_throttler, diff --git a/src/Core/Settings.h b/src/Core/Settings.h index 4a0de354a03..292d2aa72d5 100644 --- a/src/Core/Settings.h +++ b/src/Core/Settings.h @@ -506,6 +506,7 @@ class IColumn; M(UInt64, backup_restore_keeper_value_max_size, 1048576, "Maximum size of data of a [Zoo]Keeper's node during backup", 0) \ M(UInt64, backup_restore_batch_size_for_keeper_multiread, 10000, "Maximum size of batch for multiread request to [Zoo]Keeper during backup or restore", 0) \ M(UInt64, backup_restore_batch_size_for_keeper_multi, 1000, "Maximum size of batch for multi request to [Zoo]Keeper during backup or restore", 0) \ + M(UInt64, backup_restore_s3_retry_attempts, 1000, "Setting for Aws::Client::RetryStrategy, Aws::Client does retries itself, 0 means no retries. It takes place only for backup/restore.", 0) \ M(UInt64, max_backup_bandwidth, 0, "The maximum read speed in bytes per second for particular backup on server. Zero means unlimited.", 0) \ \ M(Bool, log_profile_events, true, "Log query performance statistics into the query_log, query_thread_log and query_views_log.", 0) \ diff --git a/src/IO/S3/Client.h b/src/IO/S3/Client.h index bd281846343..1e90acb7f7b 100644 --- a/src/IO/S3/Client.h +++ b/src/IO/S3/Client.h @@ -162,7 +162,7 @@ public: class RetryStrategy : public Aws::Client::RetryStrategy { public: - explicit RetryStrategy(uint32_t maxRetries_ = 10, uint32_t scaleFactor_ = 25, uint32_t maxDelayMs_ = 90000); + explicit RetryStrategy(uint32_t maxRetries_ = 10, uint32_t scaleFactor_ = 25, uint32_t maxDelayMs_ = 5000); /// NOLINTNEXTLINE(google-runtime-int) bool ShouldRetry(const Aws::Client::AWSError& error, long attemptedRetries) const override; From 5bee42e1f74e38e764c74bd26680a6489b8d699a Mon Sep 17 00:00:00 2001 From: Sema Checherinda Date: Thu, 16 May 2024 13:18:02 +0200 Subject: [PATCH 02/34] add new setting to settings changes history --- src/Core/SettingsChangesHistory.h | 1 + 1 file changed, 1 insertion(+) diff --git a/src/Core/SettingsChangesHistory.h b/src/Core/SettingsChangesHistory.h index 5f3e9ffb611..775c740443c 100644 --- a/src/Core/SettingsChangesHistory.h +++ b/src/Core/SettingsChangesHistory.h @@ -93,6 +93,7 @@ static std::map sett {"prefer_external_sort_block_bytes", 0, DEFAULT_BLOCK_SIZE * 256, "Prefer maximum block bytes for external sort, reduce the memory usage during merging."}, {"input_format_force_null_for_omitted_fields", false, false, "Disable type-defaults for omitted fields when needed"}, {"output_format_pretty_preserve_border_for_multiline_string", 0, 1, "Applies better rendering for multiline strings."}, + {"backup_restore_s3_retry_attempts", 0, 1000, "A new setting."} }}, {"24.4", {{"input_format_json_throw_on_bad_escape_sequence", true, true, "Allow to save JSON strings with bad escape sequences"}, {"max_parsing_threads", 0, 0, "Add a separate setting to control number of threads in parallel parsing from files"}, From 38ec80ce92a4cbb18b3959a1a63ee9ee2faeea7c Mon Sep 17 00:00:00 2001 From: Sema Checherinda <104093494+CheSema@users.noreply.github.com> Date: Fri, 17 May 2024 14:35:58 +0200 Subject: [PATCH 03/34] Update src/Core/SettingsChangesHistory.h --- src/Core/SettingsChangesHistory.h | 1 - 1 file changed, 1 deletion(-) diff --git a/src/Core/SettingsChangesHistory.h b/src/Core/SettingsChangesHistory.h index 1ef475ab9db..119e359b29b 100644 --- a/src/Core/SettingsChangesHistory.h +++ b/src/Core/SettingsChangesHistory.h @@ -92,7 +92,6 @@ static std::map sett {"cross_join_min_bytes_to_compress", 0, 1_GiB, "A new setting."}, {"prefer_external_sort_block_bytes", 0, DEFAULT_BLOCK_SIZE * 256, "Prefer maximum block bytes for external sort, reduce the memory usage during merging."}, {"input_format_force_null_for_omitted_fields", false, false, "Disable type-defaults for omitted fields when needed"}, - {"output_format_pretty_preserve_border_for_multiline_string", 0, 1, "Applies better rendering for multiline strings."}, {"backup_restore_s3_retry_attempts", 0, 1000, "A new setting."}, }}, {"24.4", {{"input_format_json_throw_on_bad_escape_sequence", true, true, "Allow to save JSON strings with bad escape sequences"}, From 73f42b0858204f1682269453b0565380afc7a9f4 Mon Sep 17 00:00:00 2001 From: Alexander Tokmakov Date: Thu, 23 May 2024 20:59:10 +0200 Subject: [PATCH 04/34] add clusters with replicas from all replica groups --- src/Databases/DatabaseReplicated.cpp | 67 +++++++++++++++---- src/Databases/DatabaseReplicated.h | 6 +- src/Databases/DatabaseReplicatedWorker.cpp | 2 + src/Interpreters/DDLTask.cpp | 17 ++++- src/Storages/System/StorageSystemClusters.cpp | 4 ++ 5 files changed, 78 insertions(+), 18 deletions(-) diff --git a/src/Databases/DatabaseReplicated.cpp b/src/Databases/DatabaseReplicated.cpp index cc946fc22c4..c9e14790175 100644 --- a/src/Databases/DatabaseReplicated.cpp +++ b/src/Databases/DatabaseReplicated.cpp @@ -173,13 +173,40 @@ ClusterPtr DatabaseReplicated::tryGetCluster() const return cluster; } -void DatabaseReplicated::setCluster(ClusterPtr && new_cluster) +ClusterPtr DatabaseReplicated::tryGetAllGroupsCluster() const { std::lock_guard lock{mutex}; - cluster = std::move(new_cluster); + if (replica_group_name.empty()) + return nullptr; + + if (cluster_all_groups) + return cluster_all_groups; + + /// Database is probably not created or not initialized yet, it's ok to return nullptr + if (is_readonly) + return cluster_all_groups; + + try + { + cluster_all_groups = getClusterImpl(/*all_groups*/ true); + } + catch (...) + { + tryLogCurrentException(log); + } + return cluster_all_groups; } -ClusterPtr DatabaseReplicated::getClusterImpl() const +void DatabaseReplicated::setCluster(ClusterPtr && new_cluster, bool all_groups) +{ + std::lock_guard lock{mutex}; + if (all_groups) + cluster_all_groups = std::move(new_cluster); + else + cluster = std::move(new_cluster); +} + +ClusterPtr DatabaseReplicated::getClusterImpl(bool all_groups) const { Strings unfiltered_hosts; Strings hosts; @@ -199,17 +226,24 @@ ClusterPtr DatabaseReplicated::getClusterImpl() const "It's possible if the first replica is not fully created yet " "or if the last replica was just dropped or due to logical error", zookeeper_path); - hosts.clear(); - std::vector paths; - for (const auto & host : unfiltered_hosts) - paths.push_back(zookeeper_path + "/replicas/" + host + "/replica_group"); - - auto replica_groups = zookeeper->tryGet(paths); - - for (size_t i = 0; i < paths.size(); ++i) + if (all_groups) { - if (replica_groups[i].data == replica_group_name) - hosts.push_back(unfiltered_hosts[i]); + hosts = unfiltered_hosts; + } + else + { + hosts.clear(); + std::vector paths; + for (const auto & host : unfiltered_hosts) + paths.push_back(zookeeper_path + "/replicas/" + host + "/replica_group"); + + auto replica_groups = zookeeper->tryGet(paths); + + for (size_t i = 0; i < paths.size(); ++i) + { + if (replica_groups[i].data == replica_group_name) + hosts.push_back(unfiltered_hosts[i]); + } } Int32 cversion = stat.cversion; @@ -274,6 +308,11 @@ ClusterPtr DatabaseReplicated::getClusterImpl() const bool treat_local_as_remote = false; bool treat_local_port_as_remote = getContext()->getApplicationType() == Context::ApplicationType::LOCAL; + + String cluster_name = TSA_SUPPRESS_WARNING_FOR_READ(database_name); /// FIXME + if (all_groups) + cluster_name = "all_groups." + cluster_name; + ClusterConnectionParameters params{ cluster_auth_info.cluster_username, cluster_auth_info.cluster_password, @@ -282,7 +321,7 @@ ClusterPtr DatabaseReplicated::getClusterImpl() const treat_local_port_as_remote, cluster_auth_info.cluster_secure_connection, Priority{1}, - TSA_SUPPRESS_WARNING_FOR_READ(database_name), /// FIXME + cluster_name, cluster_auth_info.cluster_secret}; return std::make_shared(getContext()->getSettingsRef(), shards, params); diff --git a/src/Databases/DatabaseReplicated.h b/src/Databases/DatabaseReplicated.h index 55bcf963d37..f902b45ca86 100644 --- a/src/Databases/DatabaseReplicated.h +++ b/src/Databases/DatabaseReplicated.h @@ -65,6 +65,7 @@ public: /// Returns cluster consisting of database replicas ClusterPtr tryGetCluster() const; + ClusterPtr tryGetAllGroupsCluster() const; void drop(ContextPtr /*context*/) override; @@ -113,8 +114,8 @@ private: ASTPtr parseQueryFromMetadataInZooKeeper(const String & node_name, const String & query); String readMetadataFile(const String & table_name) const; - ClusterPtr getClusterImpl() const; - void setCluster(ClusterPtr && new_cluster); + ClusterPtr getClusterImpl(bool all_groups = false) const; + void setCluster(ClusterPtr && new_cluster, bool all_groups = false); void createEmptyLogEntry(const ZooKeeperPtr & current_zookeeper); @@ -155,6 +156,7 @@ private: UInt64 tables_metadata_digest TSA_GUARDED_BY(metadata_mutex); mutable ClusterPtr cluster; + mutable ClusterPtr cluster_all_groups; LoadTaskPtr startup_replicated_database_task TSA_GUARDED_BY(mutex); }; diff --git a/src/Databases/DatabaseReplicatedWorker.cpp b/src/Databases/DatabaseReplicatedWorker.cpp index 6e19a77c501..31d6f7876a8 100644 --- a/src/Databases/DatabaseReplicatedWorker.cpp +++ b/src/Databases/DatabaseReplicatedWorker.cpp @@ -421,6 +421,8 @@ DDLTaskPtr DatabaseReplicatedDDLWorker::initAndCheckTask(const String & entry_na { /// Some replica is added or removed, let's update cached cluster database->setCluster(database->getClusterImpl()); + if (!database->replica_group_name.empty()) + database->setCluster(database->getClusterImpl(/*all_groups*/ true), /*all_groups*/ true); out_reason = fmt::format("Entry {} is a dummy task", entry_name); return {}; } diff --git a/src/Interpreters/DDLTask.cpp b/src/Interpreters/DDLTask.cpp index a37b4db029a..06ec9489fc1 100644 --- a/src/Interpreters/DDLTask.cpp +++ b/src/Interpreters/DDLTask.cpp @@ -568,8 +568,21 @@ void ZooKeeperMetadataTransaction::commit() ClusterPtr tryGetReplicatedDatabaseCluster(const String & cluster_name) { - if (const auto * replicated_db = dynamic_cast(DatabaseCatalog::instance().tryGetDatabase(cluster_name).get())) - return replicated_db->tryGetCluster(); + String name = cluster_name; + bool all_groups = false; + if (name.starts_with("all_groups.")) + { + name = name.substr(strlen("all_groups.")); + all_groups = true; + } + + if (const auto * replicated_db = dynamic_cast(DatabaseCatalog::instance().tryGetDatabase(name).get())) + { + if (all_groups) + return replicated_db->tryGetAllGroupsCluster(); + else + return replicated_db->tryGetCluster(); + } return {}; } diff --git a/src/Storages/System/StorageSystemClusters.cpp b/src/Storages/System/StorageSystemClusters.cpp index cb8d5caa50c..520bd7e7f92 100644 --- a/src/Storages/System/StorageSystemClusters.cpp +++ b/src/Storages/System/StorageSystemClusters.cpp @@ -54,6 +54,10 @@ void StorageSystemClusters::fillData(MutableColumns & res_columns, ContextPtr co if (auto database_cluster = replicated->tryGetCluster()) writeCluster(res_columns, {name_and_database.first, database_cluster}, replicated->tryGetAreReplicasActive(database_cluster)); + + if (auto database_cluster = replicated->tryGetAllGroupsCluster()) + writeCluster(res_columns, {"all_groups." + name_and_database.first, database_cluster}, + replicated->tryGetAreReplicasActive(database_cluster)); } } } From 6725168b983d197d4e00234785e02373327fd30c Mon Sep 17 00:00:00 2001 From: Alexander Tokmakov Date: Fri, 24 May 2024 21:34:33 +0200 Subject: [PATCH 05/34] better code, add warning --- src/Databases/DatabaseReplicated.cpp | 9 ++++++++- src/Databases/DatabaseReplicated.h | 2 ++ src/Interpreters/DDLTask.cpp | 4 ++-- src/Storages/System/StorageSystemClusters.cpp | 2 +- 4 files changed, 13 insertions(+), 4 deletions(-) diff --git a/src/Databases/DatabaseReplicated.cpp b/src/Databases/DatabaseReplicated.cpp index c9e14790175..d2a3a5d421c 100644 --- a/src/Databases/DatabaseReplicated.cpp +++ b/src/Databases/DatabaseReplicated.cpp @@ -122,6 +122,13 @@ DatabaseReplicated::DatabaseReplicated( fillClusterAuthInfo(db_settings.collection_name.value, context_->getConfigRef()); replica_group_name = context_->getConfigRef().getString("replica_group_name", ""); + + if (!replica_group_name.empty() && database_name.starts_with(DatabaseReplicated::ALL_GROUPS_CLUSTER_PREFIX)) + { + context_->addWarningMessage(fmt::format("There's a Replicated database with a name starting from '{}', " + "and replica_group_name is configured. It may cause collisions in cluster names.", + ALL_GROUPS_CLUSTER_PREFIX)); + } } String DatabaseReplicated::getFullReplicaName(const String & shard, const String & replica) @@ -311,7 +318,7 @@ ClusterPtr DatabaseReplicated::getClusterImpl(bool all_groups) const String cluster_name = TSA_SUPPRESS_WARNING_FOR_READ(database_name); /// FIXME if (all_groups) - cluster_name = "all_groups." + cluster_name; + cluster_name = ALL_GROUPS_CLUSTER_PREFIX + cluster_name; ClusterConnectionParameters params{ cluster_auth_info.cluster_username, diff --git a/src/Databases/DatabaseReplicated.h b/src/Databases/DatabaseReplicated.h index f902b45ca86..761d6b4b503 100644 --- a/src/Databases/DatabaseReplicated.h +++ b/src/Databases/DatabaseReplicated.h @@ -20,6 +20,8 @@ using ClusterPtr = std::shared_ptr; class DatabaseReplicated : public DatabaseAtomic { public: + static constexpr auto ALL_GROUPS_CLUSTER_PREFIX = "all_groups."; + DatabaseReplicated(const String & name_, const String & metadata_path_, UUID uuid, const String & zookeeper_path_, const String & shard_name_, const String & replica_name_, DatabaseReplicatedSettings db_settings_, diff --git a/src/Interpreters/DDLTask.cpp b/src/Interpreters/DDLTask.cpp index 06ec9489fc1..6c346836ed8 100644 --- a/src/Interpreters/DDLTask.cpp +++ b/src/Interpreters/DDLTask.cpp @@ -570,9 +570,9 @@ ClusterPtr tryGetReplicatedDatabaseCluster(const String & cluster_name) { String name = cluster_name; bool all_groups = false; - if (name.starts_with("all_groups.")) + if (name.starts_with(DatabaseReplicated::ALL_GROUPS_CLUSTER_PREFIX)) { - name = name.substr(strlen("all_groups.")); + name = name.substr(strlen(DatabaseReplicated::ALL_GROUPS_CLUSTER_PREFIX)); all_groups = true; } diff --git a/src/Storages/System/StorageSystemClusters.cpp b/src/Storages/System/StorageSystemClusters.cpp index 520bd7e7f92..160c8d6270e 100644 --- a/src/Storages/System/StorageSystemClusters.cpp +++ b/src/Storages/System/StorageSystemClusters.cpp @@ -56,7 +56,7 @@ void StorageSystemClusters::fillData(MutableColumns & res_columns, ContextPtr co replicated->tryGetAreReplicasActive(database_cluster)); if (auto database_cluster = replicated->tryGetAllGroupsCluster()) - writeCluster(res_columns, {"all_groups." + name_and_database.first, database_cluster}, + writeCluster(res_columns, {DatabaseReplicated::ALL_GROUPS_CLUSTER_PREFIX + name_and_database.first, database_cluster}, replicated->tryGetAreReplicasActive(database_cluster)); } } From 8eb79c7cb3bbfe5d74e030507e4b9a2e695b1971 Mon Sep 17 00:00:00 2001 From: Sema Checherinda Date: Mon, 27 May 2024 16:47:50 +0200 Subject: [PATCH 06/34] adjust test test_mask_sensitive_info/test.py::test_backup_to_s3 --- tests/integration/test_mask_sensitive_info/configs/users.xml | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/integration/test_mask_sensitive_info/configs/users.xml b/tests/integration/test_mask_sensitive_info/configs/users.xml index f129a5bb3e3..f767216e907 100644 --- a/tests/integration/test_mask_sensitive_info/configs/users.xml +++ b/tests/integration/test_mask_sensitive_info/configs/users.xml @@ -2,6 +2,7 @@ 5 + 5 From ef3b802b4e543cb879fe7c45eb1f57423c3e471e Mon Sep 17 00:00:00 2001 From: Alexander Tokmakov Date: Tue, 28 May 2024 18:42:10 +0200 Subject: [PATCH 07/34] add test --- .../configs/config2.xml | 10 ++++++++ .../test_replicated_database/test.py | 23 ++++++++++++++++++- 2 files changed, 32 insertions(+), 1 deletion(-) create mode 100644 tests/integration/test_replicated_database/configs/config2.xml diff --git a/tests/integration/test_replicated_database/configs/config2.xml b/tests/integration/test_replicated_database/configs/config2.xml new file mode 100644 index 00000000000..727461697ca --- /dev/null +++ b/tests/integration/test_replicated_database/configs/config2.xml @@ -0,0 +1,10 @@ + + 10 + 1 + + 10 + + 50 + 42 + group + diff --git a/tests/integration/test_replicated_database/test.py b/tests/integration/test_replicated_database/test.py index fd1bfc75227..ef86da5af30 100644 --- a/tests/integration/test_replicated_database/test.py +++ b/tests/integration/test_replicated_database/test.py @@ -46,7 +46,7 @@ snapshotting_node = cluster.add_instance( ) snapshot_recovering_node = cluster.add_instance( "snapshot_recovering_node", - main_configs=["configs/config.xml"], + main_configs=["configs/config2.xml"], user_configs=["configs/settings.xml"], with_zookeeper=True, ) @@ -1522,3 +1522,24 @@ def test_auto_recovery(started_cluster): assert "42\n" == bad_settings_node.query("SELECT * FROM auto_recovery.t2") assert "137\n" == bad_settings_node.query("SELECT * FROM auto_recovery.t1") + + +def test_all_groups_cluster(started_cluster): + dummy_node.query("DROP DATABASE IF EXISTS db_cluster") + bad_settings_node.query("DROP DATABASE IF EXISTS db_cluster") + dummy_node.query( + "CREATE DATABASE db_cluster ENGINE = Replicated('/clickhouse/databases/all_groups_cluster', 'shard1', 'replica1');" + ) + bad_settings_node.query( + "CREATE DATABASE db_cluster ENGINE = Replicated('/clickhouse/databases/all_groups_cluster', 'shard1', 'replica2');" + ) + + assert "bad_settings_node\ndummy_node\n" == dummy_node.query( + "select host_name from system.clusters where name='db_cluster' order by host_name" + ) + assert "bad_settings_node\n" == bad_settings_node.query( + "select host_name from system.clusters where name='db_cluster' order by host_name" + ) + assert "bad_settings_node\ndummy_node\n" == bad_settings_node.query( + "select host_name from system.clusters where name='all_groups.db_cluster' order by host_name" + ) From d2184fd2a2d76de2de2252b9152c08f7ae269574 Mon Sep 17 00:00:00 2001 From: Alexander Tokmakov Date: Mon, 3 Jun 2024 20:51:09 +0200 Subject: [PATCH 08/34] Update test.py --- tests/integration/test_replicated_database/test.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/integration/test_replicated_database/test.py b/tests/integration/test_replicated_database/test.py index ef86da5af30..a5859960cd9 100644 --- a/tests/integration/test_replicated_database/test.py +++ b/tests/integration/test_replicated_database/test.py @@ -46,7 +46,7 @@ snapshotting_node = cluster.add_instance( ) snapshot_recovering_node = cluster.add_instance( "snapshot_recovering_node", - main_configs=["configs/config2.xml"], + main_configs=["configs/config.xml"], user_configs=["configs/settings.xml"], with_zookeeper=True, ) @@ -61,7 +61,7 @@ all_nodes = [ bad_settings_node = cluster.add_instance( "bad_settings_node", - main_configs=["configs/config.xml"], + main_configs=["configs/config2.xml"], user_configs=["configs/inconsistent_settings.xml"], with_zookeeper=True, macros={"shard": 1, "replica": 4}, From 3d7beae8fd4140f90917a592f3aae1bfecb90c5a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ra=C3=BAl=20Mar=C3=ADn?= Date: Tue, 4 Jun 2024 19:29:26 +0200 Subject: [PATCH 09/34] Simplify 03023_zeros_generate_random_with_limit_progress_bar --- ...rate_random_with_limit_progress_bar.expect | 49 ------------------- ...e_random_with_limit_progress_bar.reference | 3 ++ ...generate_random_with_limit_progress_bar.sh | 16 ++++++ 3 files changed, 19 insertions(+), 49 deletions(-) delete mode 100755 tests/queries/0_stateless/03023_zeros_generate_random_with_limit_progress_bar.expect create mode 100755 tests/queries/0_stateless/03023_zeros_generate_random_with_limit_progress_bar.sh diff --git a/tests/queries/0_stateless/03023_zeros_generate_random_with_limit_progress_bar.expect b/tests/queries/0_stateless/03023_zeros_generate_random_with_limit_progress_bar.expect deleted file mode 100755 index de15a199132..00000000000 --- a/tests/queries/0_stateless/03023_zeros_generate_random_with_limit_progress_bar.expect +++ /dev/null @@ -1,49 +0,0 @@ -#!/usr/bin/expect -f - -set basedir [file dirname $argv0] -set basename [file tail $argv0] -if {[info exists env(CLICKHOUSE_TMP)]} { - set CLICKHOUSE_TMP $env(CLICKHOUSE_TMP) -} else { - set CLICKHOUSE_TMP "." -} -exp_internal -f $CLICKHOUSE_TMP/$basename.debuglog 0 - -log_user 0 -set timeout 60 -match_max 100000 -set stty_init "rows 25 cols 120" - -expect_after { - -i $any_spawn_id eof { exp_continue } - -i $any_spawn_id timeout { exit 1 } -} - -spawn clickhouse-local -expect ":) " - -# Trivial SELECT with LIMIT from system.zeros shows progress bar. -send "SELECT * FROM system.zeros LIMIT 10000000 FORMAT Null SETTINGS max_execution_speed = 1000000, timeout_before_checking_execution_speed = 0, max_block_size = 128\r" -expect "Progress: " -expect "█" -send "\3" -expect "Query was cancelled." -expect ":) " - -send "SELECT * FROM system.zeros_mt LIMIT 10000000 FORMAT Null SETTINGS max_execution_speed = 1000000, timeout_before_checking_execution_speed = 0, max_block_size = 128\r" -expect "Progress: " -expect "█" -send "\3" -expect "Query was cancelled." -expect ":) " - -# As well as from generateRandom -send "SELECT * FROM generateRandom() LIMIT 10000000 FORMAT Null SETTINGS max_execution_speed = 1000000, timeout_before_checking_execution_speed = 0, max_block_size = 128\r" -expect "Progress: " -expect "█" -send "\3" -expect "Query was cancelled." -expect ":) " - -send "exit\r" -expect eof diff --git a/tests/queries/0_stateless/03023_zeros_generate_random_with_limit_progress_bar.reference b/tests/queries/0_stateless/03023_zeros_generate_random_with_limit_progress_bar.reference index e69de29bb2d..6ca5ae94f9a 100644 --- a/tests/queries/0_stateless/03023_zeros_generate_random_with_limit_progress_bar.reference +++ b/tests/queries/0_stateless/03023_zeros_generate_random_with_limit_progress_bar.reference @@ -0,0 +1,3 @@ +Matched +Matched +Matched diff --git a/tests/queries/0_stateless/03023_zeros_generate_random_with_limit_progress_bar.sh b/tests/queries/0_stateless/03023_zeros_generate_random_with_limit_progress_bar.sh new file mode 100755 index 00000000000..4bb8fc8880d --- /dev/null +++ b/tests/queries/0_stateless/03023_zeros_generate_random_with_limit_progress_bar.sh @@ -0,0 +1,16 @@ +#!/usr/bin/env bash + +CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CUR_DIR"/../shell_config.sh + +function run_with_progress_and_match_total_rows() +{ + echo "$1" | \ + ${CLICKHOUSE_CURL} -vsS "${CLICKHOUSE_URL}&max_block_size=1&send_progress_in_http_headers=1&http_headers_progress_interval_ms=0&output_format_parallel_formatting=0" --data-binary @- 2>&1 | \ + grep -q '"total_rows_to_read":"100"' && echo "Matched" || echo "Expected total_rows_to_read not found" +} + +run_with_progress_and_match_total_rows 'SELECT * FROM system.zeros LIMIT 100' +run_with_progress_and_match_total_rows 'SELECT * FROM system.zeros_mt LIMIT 100' +run_with_progress_and_match_total_rows 'SELECT * FROM generateRandom() LIMIT 100' From 553fcb5e0618858cfa7659522208be49320b0e48 Mon Sep 17 00:00:00 2001 From: Alexander Tokmakov Date: Tue, 4 Jun 2024 20:05:58 +0200 Subject: [PATCH 10/34] Update test.py --- tests/integration/test_replicated_database/test.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/integration/test_replicated_database/test.py b/tests/integration/test_replicated_database/test.py index a5859960cd9..ea569939c1c 100644 --- a/tests/integration/test_replicated_database/test.py +++ b/tests/integration/test_replicated_database/test.py @@ -1534,7 +1534,7 @@ def test_all_groups_cluster(started_cluster): "CREATE DATABASE db_cluster ENGINE = Replicated('/clickhouse/databases/all_groups_cluster', 'shard1', 'replica2');" ) - assert "bad_settings_node\ndummy_node\n" == dummy_node.query( + assert "dummy_node\n" == dummy_node.query( "select host_name from system.clusters where name='db_cluster' order by host_name" ) assert "bad_settings_node\n" == bad_settings_node.query( From d0a2abe17be7381c261a6d631037bd3a57d4fdb1 Mon Sep 17 00:00:00 2001 From: Ilya Golshtein Date: Tue, 4 Jun 2024 20:07:47 +0000 Subject: [PATCH 11/34] time_virtual_col: initial --- .../ObjectStorage/StorageObjectStorageSource.cpp | 2 +- src/Storages/VirtualColumnUtils.cpp | 13 +++++++++++-- src/Storages/VirtualColumnUtils.h | 2 +- 3 files changed, 13 insertions(+), 4 deletions(-) diff --git a/src/Storages/ObjectStorage/StorageObjectStorageSource.cpp b/src/Storages/ObjectStorage/StorageObjectStorageSource.cpp index b31d0f8a92e..8d5c03ae11e 100644 --- a/src/Storages/ObjectStorage/StorageObjectStorageSource.cpp +++ b/src/Storages/ObjectStorage/StorageObjectStorageSource.cpp @@ -199,7 +199,7 @@ Chunk StorageObjectStorageSource::generate() chunk, read_from_format_info.requested_virtual_columns, getUniqueStoragePathIdentifier(*configuration, reader.getObjectInfo(), false), - object_info.metadata->size_bytes, &filename); + object_info.metadata->size_bytes, &filename, object_info.metadata->last_modified); return chunk; } diff --git a/src/Storages/VirtualColumnUtils.cpp b/src/Storages/VirtualColumnUtils.cpp index cec55cefda2..5362cffece5 100644 --- a/src/Storages/VirtualColumnUtils.cpp +++ b/src/Storages/VirtualColumnUtils.cpp @@ -26,6 +26,7 @@ #include #include #include +#include #include #include @@ -111,7 +112,7 @@ void filterBlockWithDAG(ActionsDAGPtr dag, Block & block, ContextPtr context) NameSet getVirtualNamesForFileLikeStorage() { - return {"_path", "_file", "_size"}; + return {"_path", "_file", "_size", "_time"}; } VirtualColumnsDescription getVirtualsForFileLikeStorage(const ColumnsDescription & storage_columns) @@ -129,6 +130,7 @@ VirtualColumnsDescription getVirtualsForFileLikeStorage(const ColumnsDescription add_virtual("_path", std::make_shared(std::make_shared())); add_virtual("_file", std::make_shared(std::make_shared())); add_virtual("_size", makeNullable(std::make_shared())); + add_virtual("_time", makeNullable(std::make_shared())); return desc; } @@ -188,7 +190,7 @@ ColumnPtr getFilterByPathAndFileIndexes(const std::vector & paths, const } void addRequestedPathFileAndSizeVirtualsToChunk( - Chunk & chunk, const NamesAndTypesList & requested_virtual_columns, const String & path, std::optional size, const String * filename) + Chunk & chunk, const NamesAndTypesList & requested_virtual_columns, const String & path, std::optional size, const String * filename, std::optional last_modified) { for (const auto & virtual_column : requested_virtual_columns) { @@ -216,6 +218,13 @@ void addRequestedPathFileAndSizeVirtualsToChunk( else chunk.addColumn(virtual_column.type->createColumnConstWithDefaultValue(chunk.getNumRows())->convertToFullColumnIfConst()); } + else if (virtual_column.name == "_time") + { + if (last_modified) + chunk.addColumn(virtual_column.type->createColumnConst(chunk.getNumRows(), last_modified->utcTime())->convertToFullColumnIfConst()); + else + chunk.addColumn(virtual_column.type->createColumnConstWithDefaultValue(chunk.getNumRows())->convertToFullColumnIfConst()); + } } } diff --git a/src/Storages/VirtualColumnUtils.h b/src/Storages/VirtualColumnUtils.h index 62f2e4855b5..65826e1f1e7 100644 --- a/src/Storages/VirtualColumnUtils.h +++ b/src/Storages/VirtualColumnUtils.h @@ -69,7 +69,7 @@ void filterByPathOrFile(std::vector & sources, const std::vector & pa } void addRequestedPathFileAndSizeVirtualsToChunk( - Chunk & chunk, const NamesAndTypesList & requested_virtual_columns, const String & path, std::optional size, const String * filename = nullptr); + Chunk & chunk, const NamesAndTypesList & requested_virtual_columns, const String & path, std::optional size, const String * filename = nullptr, std::optional last_modified = std::nullopt); } } From aefe7ffc31e4b2bb000d4df121472e85b93baa1c Mon Sep 17 00:00:00 2001 From: Ilya Golshtein Date: Wed, 5 Jun 2024 21:34:29 +0000 Subject: [PATCH 12/34] time_virtual_col: slightly works --- src/IO/Archives/IArchiveReader.h | 2 ++ src/IO/Archives/LibArchiveReader.cpp | 1 + src/Storages/StorageFile.cpp | 4 +++- src/Storages/StorageFile.h | 1 + src/Storages/VirtualColumnUtils.cpp | 2 +- tests/integration/test_storage_hdfs/test.py | 2 +- tests/integration/test_storage_s3/test.py | 3 ++- 7 files changed, 11 insertions(+), 4 deletions(-) diff --git a/src/IO/Archives/IArchiveReader.h b/src/IO/Archives/IArchiveReader.h index ee516d2655b..d7758b9e401 100644 --- a/src/IO/Archives/IArchiveReader.h +++ b/src/IO/Archives/IArchiveReader.h @@ -5,6 +5,7 @@ #include #include +#include namespace DB { @@ -25,6 +26,7 @@ public: { UInt64 uncompressed_size; UInt64 compressed_size; + Poco::Timestamp last_modified; bool is_encrypted; }; diff --git a/src/IO/Archives/LibArchiveReader.cpp b/src/IO/Archives/LibArchiveReader.cpp index bec7f587180..e3fe63fa40d 100644 --- a/src/IO/Archives/LibArchiveReader.cpp +++ b/src/IO/Archives/LibArchiveReader.cpp @@ -157,6 +157,7 @@ public: file_info.emplace(); file_info->uncompressed_size = archive_entry_size(current_entry); file_info->compressed_size = archive_entry_size(current_entry); + file_info->last_modified = archive_entry_mtime(current_entry); file_info->is_encrypted = false; } diff --git a/src/Storages/StorageFile.cpp b/src/Storages/StorageFile.cpp index 6744159d5dc..f84d3380c3f 100644 --- a/src/Storages/StorageFile.cpp +++ b/src/Storages/StorageFile.cpp @@ -1341,6 +1341,7 @@ Chunk StorageFileSource::generate() chassert(file_enumerator); current_path = fmt::format("{}::{}", archive_reader->getPath(), *filename_override); current_file_size = file_enumerator->getFileInfo().uncompressed_size; + current_file_last_modified = file_enumerator->getFileInfo().last_modified; if (need_only_count && tryGetCountFromCache(current_archive_stat)) continue; @@ -1370,6 +1371,7 @@ Chunk StorageFileSource::generate() struct stat file_stat; file_stat = getFileStat(current_path, storage->use_table_fd, storage->table_fd, storage->getName()); current_file_size = file_stat.st_size; + current_file_last_modified = Poco::Timestamp::fromEpochTime(file_stat.st_mtim.tv_sec); if (getContext()->getSettingsRef().engine_file_skip_empty_files && file_stat.st_size == 0) continue; @@ -1437,7 +1439,7 @@ Chunk StorageFileSource::generate() /// Enrich with virtual columns. VirtualColumnUtils::addRequestedPathFileAndSizeVirtualsToChunk( - chunk, requested_virtual_columns, current_path, current_file_size, filename_override.has_value() ? &filename_override.value() : nullptr); + chunk, requested_virtual_columns, current_path, current_file_size, filename_override.has_value() ? &filename_override.value() : nullptr, current_file_last_modified); return chunk; } diff --git a/src/Storages/StorageFile.h b/src/Storages/StorageFile.h index 37da59c3664..ac094aeb489 100644 --- a/src/Storages/StorageFile.h +++ b/src/Storages/StorageFile.h @@ -279,6 +279,7 @@ private: FilesIteratorPtr files_iterator; String current_path; std::optional current_file_size; + std::optional current_file_last_modified; struct stat current_archive_stat; std::optional filename_override; Block sample_block; diff --git a/src/Storages/VirtualColumnUtils.cpp b/src/Storages/VirtualColumnUtils.cpp index 5362cffece5..1e39d64fb18 100644 --- a/src/Storages/VirtualColumnUtils.cpp +++ b/src/Storages/VirtualColumnUtils.cpp @@ -221,7 +221,7 @@ void addRequestedPathFileAndSizeVirtualsToChunk( else if (virtual_column.name == "_time") { if (last_modified) - chunk.addColumn(virtual_column.type->createColumnConst(chunk.getNumRows(), last_modified->utcTime())->convertToFullColumnIfConst()); + chunk.addColumn(virtual_column.type->createColumnConst(chunk.getNumRows(), last_modified->epochTime())->convertToFullColumnIfConst()); else chunk.addColumn(virtual_column.type->createColumnConstWithDefaultValue(chunk.getNumRows())->convertToFullColumnIfConst()); } diff --git a/tests/integration/test_storage_hdfs/test.py b/tests/integration/test_storage_hdfs/test.py index 44c0223e677..98ea79d6ee4 100644 --- a/tests/integration/test_storage_hdfs/test.py +++ b/tests/integration/test_storage_hdfs/test.py @@ -987,7 +987,7 @@ def test_read_subcolumns(started_cluster): assert res == "2\ttest_subcolumns.jsonl\t(1,2)\ttest_subcolumns.jsonl\t3\n" res = node.query( - f"select x.b.d, _path, x.b, _file, x.e from hdfs('hdfs://hdfs1:9000/test_subcolumns.jsonl', auto, 'x Tuple(b Tuple(c UInt32, d UInt32), e UInt32)')" + f"select x.b.d, _path, x.b, _file, _time, now(), x.e from hdfs('hdfs://hdfs1:9000/test_subcolumns.jsonl', auto, 'x Tuple(b Tuple(c UInt32, d UInt32), e UInt32)')" ) assert res == "0\ttest_subcolumns.jsonl\t(0,0)\ttest_subcolumns.jsonl\t0\n" diff --git a/tests/integration/test_storage_s3/test.py b/tests/integration/test_storage_s3/test.py index 09b27fff1e8..5becdf30476 100644 --- a/tests/integration/test_storage_s3/test.py +++ b/tests/integration/test_storage_s3/test.py @@ -2117,7 +2117,7 @@ def test_read_subcolumns(started_cluster): assert res == "0\troot/test_subcolumns.jsonl\t(0,0)\ttest_subcolumns.jsonl\t0\n" res = instance.query( - f"select x.b.d, _path, x.b, _file, x.e from s3('http://{started_cluster.minio_host}:{started_cluster.minio_port}/{bucket}/test_subcolumns.jsonl', auto, 'x Tuple(b Tuple(c UInt32, d UInt32), e UInt32) default ((42, 42), 42)')" + f"select x.b.d, _path, x.b, _file, _time, now(), x.e from s3('http://{started_cluster.minio_host}:{started_cluster.minio_port}/{bucket}/test_subcolumns.jsonl', auto, 'x Tuple(b Tuple(c UInt32, d UInt32), e UInt32) default ((42, 42), 42)')" ) assert res == "42\troot/test_subcolumns.jsonl\t(42,42)\ttest_subcolumns.jsonl\t42\n" @@ -2148,6 +2148,7 @@ def test_read_subcolumns(started_cluster): res == "42\t/root/test_subcolumns.jsonl\t(42,42)\ttest_subcolumns.jsonl\t42\n" ) + logging.info("Some custom logging") def test_filtering_by_file_or_path(started_cluster): bucket = started_cluster.minio_bucket From 54ed3354f6be7aab304f55e253d8175a03d9a2b0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ra=C3=BAl=20Mar=C3=ADn?= Date: Thu, 6 Jun 2024 12:56:43 +0200 Subject: [PATCH 13/34] Better debugging --- ...23_zeros_generate_random_with_limit_progress_bar.sh | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/tests/queries/0_stateless/03023_zeros_generate_random_with_limit_progress_bar.sh b/tests/queries/0_stateless/03023_zeros_generate_random_with_limit_progress_bar.sh index 4bb8fc8880d..8aedf0bc0ff 100755 --- a/tests/queries/0_stateless/03023_zeros_generate_random_with_limit_progress_bar.sh +++ b/tests/queries/0_stateless/03023_zeros_generate_random_with_limit_progress_bar.sh @@ -1,4 +1,5 @@ #!/usr/bin/env bash +# Tags: no-random-settings CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh @@ -6,11 +7,12 @@ CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) function run_with_progress_and_match_total_rows() { - echo "$1" | \ - ${CLICKHOUSE_CURL} -vsS "${CLICKHOUSE_URL}&max_block_size=1&send_progress_in_http_headers=1&http_headers_progress_interval_ms=0&output_format_parallel_formatting=0" --data-binary @- 2>&1 | \ - grep -q '"total_rows_to_read":"100"' && echo "Matched" || echo "Expected total_rows_to_read not found" + CURL_RESPONSE=$(echo "$1" | \ + ${CLICKHOUSE_CURL} -vsS "${CLICKHOUSE_URL}&max_block_size=1&send_progress_in_http_headers=1&http_headers_progress_interval_ms=0&output_format_parallel_formatting=0" --data-binary @- 2>&1) + + echo "$CURL_RESPONSE" | grep -q '"total_rows_to_read":"100"' && echo "Matched" || echo "Expected total_rows_to_read not found: ${CURL_RESPONSE}" } run_with_progress_and_match_total_rows 'SELECT * FROM system.zeros LIMIT 100' run_with_progress_and_match_total_rows 'SELECT * FROM system.zeros_mt LIMIT 100' -run_with_progress_and_match_total_rows 'SELECT * FROM generateRandom() LIMIT 100' +run_with_progress_and_match_total_rows "SELECT * FROM generateRandom('number UInt64') LIMIT 100" From 8ebcd8860821537cec4e9e6a76653df3d8fb89ae Mon Sep 17 00:00:00 2001 From: "Mikhail f. Shiryaev" Date: Thu, 6 Jun 2024 13:08:40 +0200 Subject: [PATCH 14/34] In imported files should be only logging, no prints --- tests/ci/pr_info.py | 23 ++++++++++++++++------- 1 file changed, 16 insertions(+), 7 deletions(-) diff --git a/tests/ci/pr_info.py b/tests/ci/pr_info.py index ccf5dc23121..cd8d32d4e3c 100644 --- a/tests/ci/pr_info.py +++ b/tests/ci/pr_info.py @@ -59,7 +59,7 @@ def get_pr_for_commit(sha, ref): data = response.json() our_prs = [] # type: List[Dict] if len(data) > 1: - print("Got more than one pr for commit", sha) + logging.warning("Got more than one pr for commit %s", sha) for pr in data: # We need to check if the PR is created in our repo, because # https://github.com/kaynewu/ClickHouse/pull/2 @@ -71,13 +71,20 @@ def get_pr_for_commit(sha, ref): if pr["head"]["ref"] in ref: return pr our_prs.append(pr) - print( - f"Cannot find PR with required ref {ref}, sha {sha} - returning first one" + logging.warning( + "Cannot find PR with required ref %s, sha %s - returning first one", + ref, + sha, ) first_pr = our_prs[0] return first_pr except Exception as ex: - print(f"Cannot fetch PR info from commit {ref}, {sha}", ex) + logging.error( + "Cannot fetch PR info from commit ref %s, sha %s, exception: %s", + ref, + sha, + ex, + ) return None @@ -289,8 +296,10 @@ class PRInfo: else: # assume this is a dispatch self.event_type = EventType.DISPATCH - print("event.json does not match pull_request or push:") - print(json.dumps(github_event, sort_keys=True, indent=4)) + logging.warning( + "event.json does not match pull_request or push:\n%s", + json.dumps(github_event, sort_keys=True, indent=4), + ) self.sha = os.getenv( "GITHUB_SHA", "0000000000000000000000000000000000000000" ) @@ -357,7 +366,7 @@ class PRInfo: diff_object = PatchSet(response.text) self.changed_files.update({f.path for f in diff_object}) self.changed_files_requested = True - print(f"Fetched info about {len(self.changed_files)} changed files") + logging.info("Fetched info about %s changed files", len(self.changed_files)) def get_dict(self): return { From 3b593a7c5e69779a1324f2f4461b564e37a1fc1f Mon Sep 17 00:00:00 2001 From: "Mikhail f. Shiryaev" Date: Thu, 6 Jun 2024 13:09:54 +0200 Subject: [PATCH 15/34] Make PRInfo.compare_pr_url persistent for the Github.PullReaquest objects --- tests/ci/pr_info.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/tests/ci/pr_info.py b/tests/ci/pr_info.py index cd8d32d4e3c..dda5b30f1e3 100644 --- a/tests/ci/pr_info.py +++ b/tests/ci/pr_info.py @@ -266,12 +266,12 @@ class PRInfo: self.diff_urls.append( self.compare_url( pull_request["base"]["repo"]["default_branch"], - pull_request["head"]["label"], + pull_request["head"]["sha"], ) ) self.diff_urls.append( self.compare_url( - pull_request["head"]["label"], + pull_request["head"]["sha"], pull_request["base"]["repo"]["default_branch"], ) ) @@ -286,7 +286,7 @@ class PRInfo: # itself, but as well files changed since we branched out self.diff_urls.append( self.compare_url( - pull_request["head"]["label"], + pull_request["head"]["sha"], pull_request["base"]["repo"]["default_branch"], ) ) @@ -339,7 +339,7 @@ class PRInfo: return self.event_type == EventType.DISPATCH def compare_pr_url(self, pr_object: dict) -> str: - return self.compare_url(pr_object["base"]["label"], pr_object["head"]["label"]) + return self.compare_url(pr_object["base"]["sha"], pr_object["head"]["sha"]) @staticmethod def compare_url(first: str, second: str) -> str: From 0dff60821fd950d3e47f24e307ea8b3f5249f831 Mon Sep 17 00:00:00 2001 From: "Mikhail f. Shiryaev" Date: Thu, 6 Jun 2024 13:10:51 +0200 Subject: [PATCH 16/34] Add context to the get_gh_api APIException --- tests/ci/build_download_helper.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/ci/build_download_helper.py b/tests/ci/build_download_helper.py index 0f6c8e5aa8a..036d3548eb9 100644 --- a/tests/ci/build_download_helper.py +++ b/tests/ci/build_download_helper.py @@ -110,7 +110,7 @@ def get_gh_api( logging.info("Exception '%s' while getting, retry %i", exc, try_cnt) time.sleep(sleep) - raise APIException("Unable to request data from GH API") from exc + raise APIException(f"Unable to request data from GH API: {url}") from exc def get_build_name_for_check(check_name: str) -> str: From 387869488d80db6c645346c61b2470d37a026fe6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ra=C3=BAl=20Mar=C3=ADn?= Date: Thu, 6 Jun 2024 15:54:02 +0200 Subject: [PATCH 17/34] Stabilize and adjust number of streams --- src/Storages/StorageGenerateRandom.cpp | 58 +++++++++++++++---- src/Storages/System/StorageSystemZeros.cpp | 33 ++++++----- ...generate_random_with_limit_progress_bar.sh | 2 +- 3 files changed, 65 insertions(+), 28 deletions(-) diff --git a/src/Storages/StorageGenerateRandom.cpp b/src/Storages/StorageGenerateRandom.cpp index cdbade51695..5aceef78238 100644 --- a/src/Storages/StorageGenerateRandom.cpp +++ b/src/Storages/StorageGenerateRandom.cpp @@ -50,6 +50,12 @@ namespace ErrorCodes namespace { +struct GenerateRandomState +{ + std::atomic add_total_rows = 0; +}; +using GenerateRandomStatePtr = std::shared_ptr; + void fillBufferWithRandomData(char * __restrict data, size_t limit, size_t size_of_type, pcg64 & rng, [[maybe_unused]] bool flip_bytes = false) { size_t size = limit * size_of_type; @@ -529,10 +535,24 @@ ColumnPtr fillColumnWithRandomData( class GenerateSource : public ISource { public: - GenerateSource(UInt64 block_size_, UInt64 max_array_length_, UInt64 max_string_length_, UInt64 random_seed_, Block block_header_, ContextPtr context_) + GenerateSource( + UInt64 block_size_, + UInt64 max_array_length_, + UInt64 max_string_length_, + UInt64 random_seed_, + Block block_header_, + ContextPtr context_, + GenerateRandomStatePtr state_) : ISource(Nested::flattenNested(prepareBlockToFill(block_header_))) - , block_size(block_size_), max_array_length(max_array_length_), max_string_length(max_string_length_) - , block_to_fill(std::move(block_header_)), rng(random_seed_), context(context_) {} + , block_size(block_size_) + , max_array_length(max_array_length_) + , max_string_length(max_string_length_) + , block_to_fill(std::move(block_header_)) + , rng(random_seed_) + , context(context_) + , shared_state(state_) + { + } String getName() const override { return "GenerateRandom"; } @@ -546,7 +566,15 @@ protected: columns.emplace_back(fillColumnWithRandomData(elem.type, block_size, max_array_length, max_string_length, rng, context)); columns = Nested::flattenNested(block_to_fill.cloneWithColumns(columns)).getColumns(); - return {std::move(columns), block_size}; + + UInt64 total_rows = shared_state->add_total_rows.fetch_and(0); + if (total_rows) + addTotalRowsApprox(total_rows); + + auto chunk = Chunk{std::move(columns), block_size}; + progress(chunk.getNumRows(), chunk.bytes()); + + return chunk; } private: @@ -558,6 +586,7 @@ private: pcg64 rng; ContextPtr context; + GenerateRandomStatePtr shared_state; static Block & prepareBlockToFill(Block & block) { @@ -645,9 +674,6 @@ Pipe StorageGenerateRandom::read( { storage_snapshot->check(column_names); - Pipes pipes; - pipes.reserve(num_streams); - const ColumnsDescription & our_columns = storage_snapshot->metadata->getColumns(); Block block_header; for (const auto & name : column_names) @@ -676,16 +702,24 @@ Pipe StorageGenerateRandom::read( } } + UInt64 query_limit = query_info.limit; + if (query_limit && num_streams * max_block_size < query_limit) + { + /// We want to avoid spawning more streams than necessary + num_streams = std::min(num_streams, ((query_limit + max_block_size - 1) / max_block_size)); + } + Pipes pipes; + pipes.reserve(num_streams); + /// Will create more seed values for each source from initial seed. pcg64 generate(random_seed); + auto shared_state = std::make_shared(query_info.limit); + for (UInt64 i = 0; i < num_streams; ++i) { - auto source = std::make_shared(max_block_size, max_array_length, max_string_length, generate(), block_header, context); - - if (i == 0 && query_info.limit) - source->addTotalRowsApprox(query_info.limit); - + auto source = std::make_shared( + max_block_size, max_array_length, max_string_length, generate(), block_header, context, shared_state); pipes.emplace_back(std::move(source)); } diff --git a/src/Storages/System/StorageSystemZeros.cpp b/src/Storages/System/StorageSystemZeros.cpp index a48b109fbbe..40faf2e265f 100644 --- a/src/Storages/System/StorageSystemZeros.cpp +++ b/src/Storages/System/StorageSystemZeros.cpp @@ -16,7 +16,9 @@ namespace struct ZerosState { + ZerosState(UInt64 limit) : add_total_rows(limit) { } std::atomic num_generated_rows = 0; + std::atomic add_total_rows = 0; }; using ZerosStatePtr = std::shared_ptr; @@ -49,6 +51,10 @@ protected: if (generated_rows >= limit) return {}; + UInt64 total_rows = state->add_total_rows.fetch_and(0); + if (total_rows) + addTotalRowsApprox(total_rows); + if (generated_rows + column_size > limit) { column_size = limit - generated_rows; @@ -105,10 +111,16 @@ Pipe StorageSystemZeros::read( bool use_multiple_streams = multithreaded; - if (limit && *limit < max_block_size) + UInt64 query_limit = limit ? *limit : query_info.limit; + if (query_limit && query_limit > max_block_size) + max_block_size = query_limit; + + if (use_multiple_streams && query_limit && num_streams * max_block_size < query_limit) { - max_block_size = static_cast(*limit); - use_multiple_streams = false; + /// We want to avoid spawning more streams than necessary + num_streams = std::min(num_streams, ((query_limit + max_block_size - 1) / max_block_size)); + if (num_streams <= 1) + use_multiple_streams = false; } if (!use_multiple_streams) @@ -118,21 +130,12 @@ Pipe StorageSystemZeros::read( ZerosStatePtr state; - if (limit) - state = std::make_shared(); + if (query_limit) + state = std::make_shared(query_limit); for (size_t i = 0; i < num_streams; ++i) { - auto source = std::make_shared(max_block_size, limit ? *limit : 0, state); - - if (i == 0) - { - if (limit) - source->addTotalRowsApprox(*limit); - else if (query_info.limit) - source->addTotalRowsApprox(query_info.limit); - } - + auto source = std::make_shared(max_block_size, query_limit, state); res.addSource(std::move(source)); } diff --git a/tests/queries/0_stateless/03023_zeros_generate_random_with_limit_progress_bar.sh b/tests/queries/0_stateless/03023_zeros_generate_random_with_limit_progress_bar.sh index 8aedf0bc0ff..500a12587a2 100755 --- a/tests/queries/0_stateless/03023_zeros_generate_random_with_limit_progress_bar.sh +++ b/tests/queries/0_stateless/03023_zeros_generate_random_with_limit_progress_bar.sh @@ -8,7 +8,7 @@ CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) function run_with_progress_and_match_total_rows() { CURL_RESPONSE=$(echo "$1" | \ - ${CLICKHOUSE_CURL} -vsS "${CLICKHOUSE_URL}&max_block_size=1&send_progress_in_http_headers=1&http_headers_progress_interval_ms=0&output_format_parallel_formatting=0" --data-binary @- 2>&1) + ${CLICKHOUSE_CURL} -vsS "${CLICKHOUSE_URL}&wait_end_of_query=1&max_block_size=1&send_progress_in_http_headers=1&http_headers_progress_interval_ms=0&output_format_parallel_formatting=0" --data-binary @- 2>&1) echo "$CURL_RESPONSE" | grep -q '"total_rows_to_read":"100"' && echo "Matched" || echo "Expected total_rows_to_read not found: ${CURL_RESPONSE}" } From 81fe4c48944eda05c8aad628e0de65b47aeddead Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ra=C3=BAl=20Mar=C3=ADn?= Date: Thu, 6 Jun 2024 17:59:16 +0200 Subject: [PATCH 18/34] Fix number of stream decision --- src/Storages/StorageGenerateRandom.cpp | 2 +- src/Storages/System/StorageSystemZeros.cpp | 35 ++++++++-------------- 2 files changed, 14 insertions(+), 23 deletions(-) diff --git a/src/Storages/StorageGenerateRandom.cpp b/src/Storages/StorageGenerateRandom.cpp index 5aceef78238..ca9c6fb3226 100644 --- a/src/Storages/StorageGenerateRandom.cpp +++ b/src/Storages/StorageGenerateRandom.cpp @@ -703,7 +703,7 @@ Pipe StorageGenerateRandom::read( } UInt64 query_limit = query_info.limit; - if (query_limit && num_streams * max_block_size < query_limit) + if (query_limit && num_streams * max_block_size > query_limit) { /// We want to avoid spawning more streams than necessary num_streams = std::min(num_streams, ((query_limit + max_block_size - 1) / max_block_size)); diff --git a/src/Storages/System/StorageSystemZeros.cpp b/src/Storages/System/StorageSystemZeros.cpp index 40faf2e265f..7e1d7a14d0e 100644 --- a/src/Storages/System/StorageSystemZeros.cpp +++ b/src/Storages/System/StorageSystemZeros.cpp @@ -44,17 +44,16 @@ protected: auto column_ptr = column; size_t column_size = column_ptr->size(); - if (state) + UInt64 total_rows = state->add_total_rows.fetch_and(0); + if (total_rows) + addTotalRowsApprox(total_rows); + + if (limit) { auto generated_rows = state->num_generated_rows.fetch_add(column_size, std::memory_order_acquire); - if (generated_rows >= limit) return {}; - UInt64 total_rows = state->add_total_rows.fetch_and(0); - if (total_rows) - addTotalRowsApprox(total_rows); - if (generated_rows + column_size > limit) { column_size = limit - generated_rows; @@ -109,30 +108,22 @@ Pipe StorageSystemZeros::read( { storage_snapshot->check(column_names); - bool use_multiple_streams = multithreaded; + UInt64 query_limit = limit ? *limit : 0; + if (query_info.limit) + query_limit = query_limit ? std::min(query_limit, query_info.limit) : query_info.limit; - UInt64 query_limit = limit ? *limit : query_info.limit; - if (query_limit && query_limit > max_block_size) + if (query_limit && query_limit < max_block_size) max_block_size = query_limit; - if (use_multiple_streams && query_limit && num_streams * max_block_size < query_limit) - { + if (!multithreaded) + num_streams = 1; + else if (query_limit && num_streams * max_block_size > query_limit) /// We want to avoid spawning more streams than necessary num_streams = std::min(num_streams, ((query_limit + max_block_size - 1) / max_block_size)); - if (num_streams <= 1) - use_multiple_streams = false; - } - if (!use_multiple_streams) - num_streams = 1; + ZerosStatePtr state = std::make_shared(query_limit); Pipe res; - - ZerosStatePtr state; - - if (query_limit) - state = std::make_shared(query_limit); - for (size_t i = 0; i < num_streams; ++i) { auto source = std::make_shared(max_block_size, query_limit, state); From d36bf5bac86fb9bfde4fe761a1ab2ab7079f4f1a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ra=C3=BAl=20Mar=C3=ADn?= Date: Thu, 6 Jun 2024 21:40:24 +0200 Subject: [PATCH 19/34] Compiler complains --- src/Storages/System/StorageSystemZeros.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/Storages/System/StorageSystemZeros.cpp b/src/Storages/System/StorageSystemZeros.cpp index 7e1d7a14d0e..09a2bb5d963 100644 --- a/src/Storages/System/StorageSystemZeros.cpp +++ b/src/Storages/System/StorageSystemZeros.cpp @@ -16,7 +16,7 @@ namespace struct ZerosState { - ZerosState(UInt64 limit) : add_total_rows(limit) { } + explicit ZerosState(UInt64 limit) : add_total_rows(limit) { } std::atomic num_generated_rows = 0; std::atomic add_total_rows = 0; }; @@ -119,7 +119,7 @@ Pipe StorageSystemZeros::read( num_streams = 1; else if (query_limit && num_streams * max_block_size > query_limit) /// We want to avoid spawning more streams than necessary - num_streams = std::min(num_streams, ((query_limit + max_block_size - 1) / max_block_size)); + num_streams = std::min(num_streams, static_cast(((query_limit + max_block_size - 1) / max_block_size))); ZerosStatePtr state = std::make_shared(query_limit); From 258b1f9559673b3ebe6da0eae728aa783955045e Mon Sep 17 00:00:00 2001 From: Ilya Golshtein Date: Thu, 6 Jun 2024 21:00:47 +0000 Subject: [PATCH 20/34] time_virtual_col: tests, doc, small refactoring --- .../integrations/azureBlobStorage.md | 1 + .../table-engines/integrations/hdfs.md | 1 + .../engines/table-engines/integrations/s3.md | 11 +++++---- docs/en/engines/table-engines/special/file.md | 1 + docs/en/engines/table-engines/special/url.md | 1 + .../table-functions/azureBlobStorage.md | 1 + docs/en/sql-reference/table-functions/file.md | 1 + docs/en/sql-reference/table-functions/hdfs.md | 1 + docs/en/sql-reference/table-functions/s3.md | 1 + docs/en/sql-reference/table-functions/url.md | 1 + .../StorageObjectStorageSource.cpp | 14 ++++++----- src/Storages/S3Queue/S3QueueSource.cpp | 10 ++++++-- src/Storages/StorageFile.cpp | 11 +++++++-- src/Storages/StorageURL.cpp | 7 +++++- src/Storages/VirtualColumnUtils.cpp | 23 ++++++++++--------- src/Storages/VirtualColumnUtils.h | 14 +++++++++-- .../test_storage_azure_blob_storage/test.py | 4 ++-- tests/integration/test_storage_hdfs/test.py | 4 ++-- tests/integration/test_storage_s3/test.py | 4 ++-- ...e_structure_from_insertion_table.reference | 1 - ...lumn_use_structure_from_insertion_table.sh | 13 ----------- ...e_structure_from_insertion_table.reference | 1 + ...lumn_use_structure_from_insertion_table.sh | 14 +++++++++++ 23 files changed, 91 insertions(+), 49 deletions(-) delete mode 100644 tests/queries/0_stateless/02931_size_virtual_column_use_structure_from_insertion_table.reference delete mode 100755 tests/queries/0_stateless/02931_size_virtual_column_use_structure_from_insertion_table.sh create mode 100644 tests/queries/0_stateless/02931_virtual_column_use_structure_from_insertion_table.reference create mode 100755 tests/queries/0_stateless/02931_virtual_column_use_structure_from_insertion_table.sh diff --git a/docs/en/engines/table-engines/integrations/azureBlobStorage.md b/docs/en/engines/table-engines/integrations/azureBlobStorage.md index 0843ff1ac47..dfc27d6b8cf 100644 --- a/docs/en/engines/table-engines/integrations/azureBlobStorage.md +++ b/docs/en/engines/table-engines/integrations/azureBlobStorage.md @@ -54,6 +54,7 @@ SELECT * FROM test_table; - `_path` — Path to the file. Type: `LowCardinalty(String)`. - `_file` — Name of the file. Type: `LowCardinalty(String)`. - `_size` — Size of the file in bytes. Type: `Nullable(UInt64)`. If the size is unknown, the value is `NULL`. +- `_time` — Last modified time of the file. Type: `Nullable(DateTime)`. If the time is unknown, the value is `NULL`. ## See also diff --git a/docs/en/engines/table-engines/integrations/hdfs.md b/docs/en/engines/table-engines/integrations/hdfs.md index 2749fa7e479..c9df713231a 100644 --- a/docs/en/engines/table-engines/integrations/hdfs.md +++ b/docs/en/engines/table-engines/integrations/hdfs.md @@ -235,6 +235,7 @@ libhdfs3 support HDFS namenode HA. - `_path` — Path to the file. Type: `LowCardinalty(String)`. - `_file` — Name of the file. Type: `LowCardinalty(String)`. - `_size` — Size of the file in bytes. Type: `Nullable(UInt64)`. If the size is unknown, the value is `NULL`. +- `_time` — Last modified time of the file. Type: `Nullable(DateTime)`. If the time is unknown, the value is `NULL`. ## Storage Settings {#storage-settings} diff --git a/docs/en/engines/table-engines/integrations/s3.md b/docs/en/engines/table-engines/integrations/s3.md index cb1da1c8e68..93f4a187656 100644 --- a/docs/en/engines/table-engines/integrations/s3.md +++ b/docs/en/engines/table-engines/integrations/s3.md @@ -53,14 +53,14 @@ For partitioning by month, use the `toYYYYMM(date_column)` expression, where `da This example uses the [docker compose recipe](https://github.com/ClickHouse/examples/tree/5fdc6ff72f4e5137e23ea075c88d3f44b0202490/docker-compose-recipes/recipes/ch-and-minio-S3), which integrates ClickHouse and MinIO. You should be able to reproduce the same queries using S3 by replacing the endpoint and authentication values. -Notice that the S3 endpoint in the `ENGINE` configuration uses the parameter token `{_partition_id}` as part of the S3 object (filename), and that the SELECT queries select against those resulting object names (e.g., `test_3.csv`). +Notice that the S3 endpoint in the `ENGINE` configuration uses the parameter token `{_partition_id}` as part of the S3 object (filename), and that the SELECT queries select against those resulting object names (e.g., `test_3.csv`). :::note As shown in the example, querying from S3 tables that are partitioned is not directly supported at this time, but can be accomplished by querying the individual partitions using the S3 table function. -The primary use-case for writing +The primary use-case for writing partitioned data in S3 is to enable transferring that data into another ClickHouse system (for example, moving from on-prem systems to ClickHouse Cloud). Because ClickHouse datasets are often very large, and network @@ -78,9 +78,9 @@ CREATE TABLE p ) ENGINE = S3( # highlight-next-line - 'http://minio:10000/clickhouse//test_{_partition_id}.csv', - 'minioadmin', - 'minioadminpassword', + 'http://minio:10000/clickhouse//test_{_partition_id}.csv', + 'minioadmin', + 'minioadminpassword', 'CSV') PARTITION BY column3 ``` @@ -145,6 +145,7 @@ Code: 48. DB::Exception: Received from localhost:9000. DB::Exception: Reading fr - `_path` — Path to the file. Type: `LowCardinalty(String)`. - `_file` — Name of the file. Type: `LowCardinalty(String)`. - `_size` — Size of the file in bytes. Type: `Nullable(UInt64)`. If the size is unknown, the value is `NULL`. +- `_time` — Last modified time of the file. Type: `Nullable(DateTime)`. If the time is unknown, the value is `NULL`. For more information about virtual columns see [here](../../../engines/table-engines/index.md#table_engines-virtual_columns). diff --git a/docs/en/engines/table-engines/special/file.md b/docs/en/engines/table-engines/special/file.md index 0d422f64762..957b18b5305 100644 --- a/docs/en/engines/table-engines/special/file.md +++ b/docs/en/engines/table-engines/special/file.md @@ -102,6 +102,7 @@ For partitioning by month, use the `toYYYYMM(date_column)` expression, where `da - `_path` — Path to the file. Type: `LowCardinalty(String)`. - `_file` — Name of the file. Type: `LowCardinalty(String)`. - `_size` — Size of the file in bytes. Type: `Nullable(UInt64)`. If the size is unknown, the value is `NULL`. +- `_time` — Last modified time of the file. Type: `Nullable(DateTime)`. If the time is unknown, the value is `NULL`. ## Settings {#settings} diff --git a/docs/en/engines/table-engines/special/url.md b/docs/en/engines/table-engines/special/url.md index f6183a779ae..c906830d0e9 100644 --- a/docs/en/engines/table-engines/special/url.md +++ b/docs/en/engines/table-engines/special/url.md @@ -108,6 +108,7 @@ For partitioning by month, use the `toYYYYMM(date_column)` expression, where `da - `_path` — Path to the `URL`. Type: `LowCardinalty(String)`. - `_file` — Resource name of the `URL`. Type: `LowCardinalty(String)`. - `_size` — Size of the resource in bytes. Type: `Nullable(UInt64)`. If the size is unknown, the value is `NULL`. +- `_time` — Last modified time of the file. Type: `Nullable(DateTime)`. If the time is unknown, the value is `NULL`. ## Storage Settings {#storage-settings} diff --git a/docs/en/sql-reference/table-functions/azureBlobStorage.md b/docs/en/sql-reference/table-functions/azureBlobStorage.md index 1510489ce83..f59fedeb3a2 100644 --- a/docs/en/sql-reference/table-functions/azureBlobStorage.md +++ b/docs/en/sql-reference/table-functions/azureBlobStorage.md @@ -72,6 +72,7 @@ SELECT count(*) FROM azureBlobStorage('DefaultEndpointsProtocol=https;AccountNam - `_path` — Path to the file. Type: `LowCardinalty(String)`. - `_file` — Name of the file. Type: `LowCardinalty(String)`. - `_size` — Size of the file in bytes. Type: `Nullable(UInt64)`. If the file size is unknown, the value is `NULL`. +- `_time` — Last modified time of the file. Type: `Nullable(DateTime)`. If the time is unknown, the value is `NULL`. **See Also** diff --git a/docs/en/sql-reference/table-functions/file.md b/docs/en/sql-reference/table-functions/file.md index f66178afbb2..4fec772c373 100644 --- a/docs/en/sql-reference/table-functions/file.md +++ b/docs/en/sql-reference/table-functions/file.md @@ -196,6 +196,7 @@ SELECT count(*) FROM file('big_dir/**/file002', 'CSV', 'name String, value UInt3 - `_path` — Path to the file. Type: `LowCardinalty(String)`. - `_file` — Name of the file. Type: `LowCardinalty(String)`. - `_size` — Size of the file in bytes. Type: `Nullable(UInt64)`. If the file size is unknown, the value is `NULL`. +- `_time` — Last modified time of the file. Type: `Nullable(DateTime)`. If the time is unknown, the value is `NULL`. ## Settings {#settings} diff --git a/docs/en/sql-reference/table-functions/hdfs.md b/docs/en/sql-reference/table-functions/hdfs.md index d65615e7588..28cba5ccc6a 100644 --- a/docs/en/sql-reference/table-functions/hdfs.md +++ b/docs/en/sql-reference/table-functions/hdfs.md @@ -97,6 +97,7 @@ FROM hdfs('hdfs://hdfs1:9000/big_dir/file{0..9}{0..9}{0..9}', 'CSV', 'name Strin - `_path` — Path to the file. Type: `LowCardinalty(String)`. - `_file` — Name of the file. Type: `LowCardinalty(String)`. - `_size` — Size of the file in bytes. Type: `Nullable(UInt64)`. If the size is unknown, the value is `NULL`. +- `_time` — Last modified time of the file. Type: `Nullable(DateTime)`. If the time is unknown, the value is `NULL`. ## Storage Settings {#storage-settings} diff --git a/docs/en/sql-reference/table-functions/s3.md b/docs/en/sql-reference/table-functions/s3.md index cbef80371a3..1a7e2b8d66a 100644 --- a/docs/en/sql-reference/table-functions/s3.md +++ b/docs/en/sql-reference/table-functions/s3.md @@ -272,6 +272,7 @@ FROM s3( - `_path` — Path to the file. Type: `LowCardinalty(String)`. - `_file` — Name of the file. Type: `LowCardinalty(String)`. - `_size` — Size of the file in bytes. Type: `Nullable(UInt64)`. If the file size is unknown, the value is `NULL`. +- `_time` — Last modified time of the file. Type: `Nullable(DateTime)`. If the time is unknown, the value is `NULL`. ## Storage Settings {#storage-settings} diff --git a/docs/en/sql-reference/table-functions/url.md b/docs/en/sql-reference/table-functions/url.md index 4dc6e435b50..3bb7aff53a7 100644 --- a/docs/en/sql-reference/table-functions/url.md +++ b/docs/en/sql-reference/table-functions/url.md @@ -53,6 +53,7 @@ Character `|` inside patterns is used to specify failover addresses. They are it - `_path` — Path to the `URL`. Type: `LowCardinalty(String)`. - `_file` — Resource name of the `URL`. Type: `LowCardinalty(String)`. - `_size` — Size of the resource in bytes. Type: `Nullable(UInt64)`. If the size is unknown, the value is `NULL`. +- `_time` — Last modified time of the file. Type: `Nullable(DateTime)`. If the time is unknown, the value is `NULL`. ## Storage Settings {#storage-settings} diff --git a/src/Storages/ObjectStorage/StorageObjectStorageSource.cpp b/src/Storages/ObjectStorage/StorageObjectStorageSource.cpp index 8d5c03ae11e..2fc6993369d 100644 --- a/src/Storages/ObjectStorage/StorageObjectStorageSource.cpp +++ b/src/Storages/ObjectStorage/StorageObjectStorageSource.cpp @@ -195,12 +195,14 @@ Chunk StorageObjectStorageSource::generate() const auto & object_info = reader.getObjectInfo(); const auto & filename = object_info.getFileName(); chassert(object_info.metadata); - VirtualColumnUtils::addRequestedPathFileAndSizeVirtualsToChunk( - chunk, - read_from_format_info.requested_virtual_columns, - getUniqueStoragePathIdentifier(*configuration, reader.getObjectInfo(), false), - object_info.metadata->size_bytes, &filename, object_info.metadata->last_modified); - + VirtualColumnUtils::addRequestedFileLikeStorageVirtualsToChunk( + chunk, read_from_format_info.requested_virtual_columns, + { + .path = getUniqueStoragePathIdentifier(*configuration, reader.getObjectInfo(), false), + .size = object_info.metadata->size_bytes, + .filename = &filename, + .last_modified = object_info.metadata->last_modified + }); return chunk; } diff --git a/src/Storages/S3Queue/S3QueueSource.cpp b/src/Storages/S3Queue/S3QueueSource.cpp index d8633037ed9..b5b1a8dd992 100644 --- a/src/Storages/S3Queue/S3QueueSource.cpp +++ b/src/Storages/S3Queue/S3QueueSource.cpp @@ -421,8 +421,14 @@ Chunk StorageS3QueueSource::generate() file_status->processed_rows += chunk.getNumRows(); processed_rows_from_file += chunk.getNumRows(); - VirtualColumnUtils::addRequestedPathFileAndSizeVirtualsToChunk( - chunk, requested_virtual_columns, path, reader.getObjectInfo().metadata->size_bytes); + VirtualColumnUtils::addRequestedFileLikeStorageVirtualsToChunk( + chunk, requested_virtual_columns, + { + .path = path, + .size = reader.getObjectInfo().metadata->size_bytes + }); + + return chunk; } } diff --git a/src/Storages/StorageFile.cpp b/src/Storages/StorageFile.cpp index f84d3380c3f..09f0bd60859 100644 --- a/src/Storages/StorageFile.cpp +++ b/src/Storages/StorageFile.cpp @@ -1438,8 +1438,15 @@ Chunk StorageFileSource::generate() progress(num_rows, chunk_size ? chunk_size : chunk.bytes()); /// Enrich with virtual columns. - VirtualColumnUtils::addRequestedPathFileAndSizeVirtualsToChunk( - chunk, requested_virtual_columns, current_path, current_file_size, filename_override.has_value() ? &filename_override.value() : nullptr, current_file_last_modified); + VirtualColumnUtils::addRequestedFileLikeStorageVirtualsToChunk( + chunk, requested_virtual_columns, + { + .path = current_path, + .size = current_file_size, + .filename = (filename_override.has_value() ? &filename_override.value() : nullptr), + .last_modified = current_file_last_modified + }); + return chunk; } diff --git a/src/Storages/StorageURL.cpp b/src/Storages/StorageURL.cpp index 8d1c6933503..9302e7ef3e5 100644 --- a/src/Storages/StorageURL.cpp +++ b/src/Storages/StorageURL.cpp @@ -411,7 +411,12 @@ Chunk StorageURLSource::generate() if (input_format) chunk_size = input_format->getApproxBytesReadForChunk(); progress(num_rows, chunk_size ? chunk_size : chunk.bytes()); - VirtualColumnUtils::addRequestedPathFileAndSizeVirtualsToChunk(chunk, requested_virtual_columns, curr_uri.getPath(), current_file_size); + VirtualColumnUtils::addRequestedFileLikeStorageVirtualsToChunk( + chunk, requested_virtual_columns, + { + .path = curr_uri.getPath(), + .size = current_file_size + }); return chunk; } diff --git a/src/Storages/VirtualColumnUtils.cpp b/src/Storages/VirtualColumnUtils.cpp index 1e39d64fb18..778c9e13adb 100644 --- a/src/Storages/VirtualColumnUtils.cpp +++ b/src/Storages/VirtualColumnUtils.cpp @@ -189,39 +189,40 @@ ColumnPtr getFilterByPathAndFileIndexes(const std::vector & paths, const return block.getByName("_idx").column; } -void addRequestedPathFileAndSizeVirtualsToChunk( - Chunk & chunk, const NamesAndTypesList & requested_virtual_columns, const String & path, std::optional size, const String * filename, std::optional last_modified) +void addRequestedFileLikeStorageVirtualsToChunk( + Chunk & chunk, const NamesAndTypesList & requested_virtual_columns, + VirtualsForFileLikeStorage virtual_values) { for (const auto & virtual_column : requested_virtual_columns) { if (virtual_column.name == "_path") { - chunk.addColumn(virtual_column.type->createColumnConst(chunk.getNumRows(), path)->convertToFullColumnIfConst()); + chunk.addColumn(virtual_column.type->createColumnConst(chunk.getNumRows(), virtual_values.path)->convertToFullColumnIfConst()); } else if (virtual_column.name == "_file") { - if (filename) + if (virtual_values.filename) { - chunk.addColumn(virtual_column.type->createColumnConst(chunk.getNumRows(), *filename)->convertToFullColumnIfConst()); + chunk.addColumn(virtual_column.type->createColumnConst(chunk.getNumRows(), (*virtual_values.filename))->convertToFullColumnIfConst()); } else { - size_t last_slash_pos = path.find_last_of('/'); - auto filename_from_path = path.substr(last_slash_pos + 1); + size_t last_slash_pos = virtual_values.path.find_last_of('/'); + auto filename_from_path = virtual_values.path.substr(last_slash_pos + 1); chunk.addColumn(virtual_column.type->createColumnConst(chunk.getNumRows(), filename_from_path)->convertToFullColumnIfConst()); } } else if (virtual_column.name == "_size") { - if (size) - chunk.addColumn(virtual_column.type->createColumnConst(chunk.getNumRows(), *size)->convertToFullColumnIfConst()); + if (virtual_values.size) + chunk.addColumn(virtual_column.type->createColumnConst(chunk.getNumRows(), *virtual_values.size)->convertToFullColumnIfConst()); else chunk.addColumn(virtual_column.type->createColumnConstWithDefaultValue(chunk.getNumRows())->convertToFullColumnIfConst()); } else if (virtual_column.name == "_time") { - if (last_modified) - chunk.addColumn(virtual_column.type->createColumnConst(chunk.getNumRows(), last_modified->epochTime())->convertToFullColumnIfConst()); + if (virtual_values.last_modified) + chunk.addColumn(virtual_column.type->createColumnConst(chunk.getNumRows(), virtual_values.last_modified->epochTime())->convertToFullColumnIfConst()); else chunk.addColumn(virtual_column.type->createColumnConstWithDefaultValue(chunk.getNumRows())->convertToFullColumnIfConst()); } diff --git a/src/Storages/VirtualColumnUtils.h b/src/Storages/VirtualColumnUtils.h index 65826e1f1e7..fbfbdd6c6cc 100644 --- a/src/Storages/VirtualColumnUtils.h +++ b/src/Storages/VirtualColumnUtils.h @@ -68,8 +68,18 @@ void filterByPathOrFile(std::vector & sources, const std::vector & pa sources = std::move(filtered_sources); } -void addRequestedPathFileAndSizeVirtualsToChunk( - Chunk & chunk, const NamesAndTypesList & requested_virtual_columns, const String & path, std::optional size, const String * filename = nullptr, std::optional last_modified = std::nullopt); +struct VirtualsForFileLikeStorage +{ + const String & path; + std::optional size { std::nullopt }; + const String * filename { nullptr }; + std::optional last_modified { std::nullopt }; + +}; + +void addRequestedFileLikeStorageVirtualsToChunk( + Chunk & chunk, const NamesAndTypesList & requested_virtual_columns, + VirtualsForFileLikeStorage virtual_values); } } diff --git a/tests/integration/test_storage_azure_blob_storage/test.py b/tests/integration/test_storage_azure_blob_storage/test.py index f836c58ce30..9f5aef1489c 100644 --- a/tests/integration/test_storage_azure_blob_storage/test.py +++ b/tests/integration/test_storage_azure_blob_storage/test.py @@ -758,12 +758,12 @@ def test_read_subcolumns(cluster): ) res = node.query( - f"select a.b.d, _path, a.b, _file, a.e from azureBlobStorage('{storage_account_url}', 'cont', 'test_subcolumns.tsv'," + f"select a.b.d, _path, a.b, _file, dateDiff('minute', _time, now()), a.e from azureBlobStorage('{storage_account_url}', 'cont', 'test_subcolumns.tsv'," f" 'devstoreaccount1', 'Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw==', 'auto', 'auto'," f" 'a Tuple(b Tuple(c UInt32, d UInt32), e UInt32)')" ) - assert res == "2\tcont/test_subcolumns.tsv\t(1,2)\ttest_subcolumns.tsv\t3\n" + assert res == "2\tcont/test_subcolumns.tsv\t(1,2)\ttest_subcolumns.tsv\t0\t3\n" res = node.query( f"select a.b.d, _path, a.b, _file, a.e from azureBlobStorage('{storage_account_url}', 'cont', 'test_subcolumns.jsonl'," diff --git a/tests/integration/test_storage_hdfs/test.py b/tests/integration/test_storage_hdfs/test.py index 98ea79d6ee4..cda2b8694c6 100644 --- a/tests/integration/test_storage_hdfs/test.py +++ b/tests/integration/test_storage_hdfs/test.py @@ -987,10 +987,10 @@ def test_read_subcolumns(started_cluster): assert res == "2\ttest_subcolumns.jsonl\t(1,2)\ttest_subcolumns.jsonl\t3\n" res = node.query( - f"select x.b.d, _path, x.b, _file, _time, now(), x.e from hdfs('hdfs://hdfs1:9000/test_subcolumns.jsonl', auto, 'x Tuple(b Tuple(c UInt32, d UInt32), e UInt32)')" + f"select x.b.d, _path, x.b, _file, dateDiff('minute', _time, now()), x.e from hdfs('hdfs://hdfs1:9000/test_subcolumns.jsonl', auto, 'x Tuple(b Tuple(c UInt32, d UInt32), e UInt32)')" ) - assert res == "0\ttest_subcolumns.jsonl\t(0,0)\ttest_subcolumns.jsonl\t0\n" + assert res == "0\ttest_subcolumns.jsonl\t(0,0)\ttest_subcolumns.jsonl\t0\t0\n" res = node.query( f"select x.b.d, _path, x.b, _file, x.e from hdfs('hdfs://hdfs1:9000/test_subcolumns.jsonl', auto, 'x Tuple(b Tuple(c UInt32, d UInt32), e UInt32) default ((42, 42), 42)')" diff --git a/tests/integration/test_storage_s3/test.py b/tests/integration/test_storage_s3/test.py index 5becdf30476..f7f7bbe335c 100644 --- a/tests/integration/test_storage_s3/test.py +++ b/tests/integration/test_storage_s3/test.py @@ -2117,10 +2117,10 @@ def test_read_subcolumns(started_cluster): assert res == "0\troot/test_subcolumns.jsonl\t(0,0)\ttest_subcolumns.jsonl\t0\n" res = instance.query( - f"select x.b.d, _path, x.b, _file, _time, now(), x.e from s3('http://{started_cluster.minio_host}:{started_cluster.minio_port}/{bucket}/test_subcolumns.jsonl', auto, 'x Tuple(b Tuple(c UInt32, d UInt32), e UInt32) default ((42, 42), 42)')" + f"select x.b.d, _path, x.b, _file, dateDiff('minute', _time, now()), x.e from s3('http://{started_cluster.minio_host}:{started_cluster.minio_port}/{bucket}/test_subcolumns.jsonl', auto, 'x Tuple(b Tuple(c UInt32, d UInt32), e UInt32) default ((42, 42), 42)')" ) - assert res == "42\troot/test_subcolumns.jsonl\t(42,42)\ttest_subcolumns.jsonl\t42\n" + assert res == "42\troot/test_subcolumns.jsonl\t(42,42)\ttest_subcolumns.jsonl\t0\t42\n" res = instance.query( f"select a.b.d, _path, a.b, _file, a.e from url('http://{started_cluster.minio_host}:{started_cluster.minio_port}/{bucket}/test_subcolumns.tsv', auto, 'a Tuple(b Tuple(c UInt32, d UInt32), e UInt32)')" diff --git a/tests/queries/0_stateless/02931_size_virtual_column_use_structure_from_insertion_table.reference b/tests/queries/0_stateless/02931_size_virtual_column_use_structure_from_insertion_table.reference deleted file mode 100644 index 35ef86f5339..00000000000 --- a/tests/queries/0_stateless/02931_size_virtual_column_use_structure_from_insertion_table.reference +++ /dev/null @@ -1 +0,0 @@ -1 2 4 diff --git a/tests/queries/0_stateless/02931_size_virtual_column_use_structure_from_insertion_table.sh b/tests/queries/0_stateless/02931_size_virtual_column_use_structure_from_insertion_table.sh deleted file mode 100755 index d9e4a2c8f8b..00000000000 --- a/tests/queries/0_stateless/02931_size_virtual_column_use_structure_from_insertion_table.sh +++ /dev/null @@ -1,13 +0,0 @@ -#!/usr/bin/env bash - -CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) -# shellcheck source=../shell_config.sh -. "$CURDIR"/../shell_config.sh - -echo "1,2" > $CLICKHOUSE_TEST_UNIQUE_NAME.csv -$CLICKHOUSE_LOCAL -nm -q " -create table test (x UInt64, y UInt32, size UInt64) engine=Memory; -insert into test select c1, c2, _size from file('$CLICKHOUSE_TEST_UNIQUE_NAME.csv') settings use_structure_from_insertion_table_in_table_functions=1; -select * from test; -" -rm $CLICKHOUSE_TEST_UNIQUE_NAME.csv diff --git a/tests/queries/0_stateless/02931_virtual_column_use_structure_from_insertion_table.reference b/tests/queries/0_stateless/02931_virtual_column_use_structure_from_insertion_table.reference new file mode 100644 index 00000000000..93acdc34842 --- /dev/null +++ b/tests/queries/0_stateless/02931_virtual_column_use_structure_from_insertion_table.reference @@ -0,0 +1 @@ +1 2 4 1 1 diff --git a/tests/queries/0_stateless/02931_virtual_column_use_structure_from_insertion_table.sh b/tests/queries/0_stateless/02931_virtual_column_use_structure_from_insertion_table.sh new file mode 100755 index 00000000000..ebdda0cc1d3 --- /dev/null +++ b/tests/queries/0_stateless/02931_virtual_column_use_structure_from_insertion_table.sh @@ -0,0 +1,14 @@ +#!/usr/bin/env bash + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + +echo "1,2" > $CLICKHOUSE_TEST_UNIQUE_NAME.csv +sleep 1 +$CLICKHOUSE_LOCAL -nm -q " +create table test (x UInt64, y UInt32, size UInt64, d32 DateTime32, d64 DateTime64) engine=Memory; +insert into test select c1, c2, _size, _time, _time from file('$CLICKHOUSE_TEST_UNIQUE_NAME.csv') settings use_structure_from_insertion_table_in_table_functions=1; +select x, y, size, (dateDiff('millisecond', d32, now()) < 4000 AND dateDiff('millisecond', d32, now()) > 0), (dateDiff('second', d64, now()) < 4 AND dateDiff('second', d64, now()) > 0) from test; +" +rm $CLICKHOUSE_TEST_UNIQUE_NAME.csv From b82ab5a4028f620d4abe920745945983ae9f264c Mon Sep 17 00:00:00 2001 From: Ilya Golshtein Date: Thu, 6 Jun 2024 21:21:12 +0000 Subject: [PATCH 21/34] time_virtual_col: style check --- tests/integration/test_storage_s3/test.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/tests/integration/test_storage_s3/test.py b/tests/integration/test_storage_s3/test.py index f7f7bbe335c..61c6d95f123 100644 --- a/tests/integration/test_storage_s3/test.py +++ b/tests/integration/test_storage_s3/test.py @@ -2120,7 +2120,9 @@ def test_read_subcolumns(started_cluster): f"select x.b.d, _path, x.b, _file, dateDiff('minute', _time, now()), x.e from s3('http://{started_cluster.minio_host}:{started_cluster.minio_port}/{bucket}/test_subcolumns.jsonl', auto, 'x Tuple(b Tuple(c UInt32, d UInt32), e UInt32) default ((42, 42), 42)')" ) - assert res == "42\troot/test_subcolumns.jsonl\t(42,42)\ttest_subcolumns.jsonl\t0\t42\n" + assert ( + res == "42\troot/test_subcolumns.jsonl\t(42,42)\ttest_subcolumns.jsonl\t0\t42\n" + ) res = instance.query( f"select a.b.d, _path, a.b, _file, a.e from url('http://{started_cluster.minio_host}:{started_cluster.minio_port}/{bucket}/test_subcolumns.tsv', auto, 'a Tuple(b Tuple(c UInt32, d UInt32), e UInt32)')" @@ -2150,6 +2152,7 @@ def test_read_subcolumns(started_cluster): logging.info("Some custom logging") + def test_filtering_by_file_or_path(started_cluster): bucket = started_cluster.minio_bucket instance = started_cluster.instances["dummy"] From 078f5f4ee0fff1d55a5924df8340a905ef32f0f3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ra=C3=BAl=20Mar=C3=ADn?= Date: Fri, 7 Jun 2024 13:10:12 +0200 Subject: [PATCH 22/34] Fix bug in short circuit evaluation --- src/Columns/MaskOperations.cpp | 6 +++++- .../0_stateless/03168_fuzz_multiIf_short_circuit.reference | 0 .../0_stateless/03168_fuzz_multiIf_short_circuit.sql | 6 ++++++ 3 files changed, 11 insertions(+), 1 deletion(-) create mode 100644 tests/queries/0_stateless/03168_fuzz_multiIf_short_circuit.reference create mode 100644 tests/queries/0_stateless/03168_fuzz_multiIf_short_circuit.sql diff --git a/src/Columns/MaskOperations.cpp b/src/Columns/MaskOperations.cpp index 1f5f94beee9..873a4060872 100644 --- a/src/Columns/MaskOperations.cpp +++ b/src/Columns/MaskOperations.cpp @@ -289,10 +289,14 @@ void executeColumnIfNeeded(ColumnWithTypeAndName & column, bool empty) if (!column_function) return; + size_t original_size = column.column->size(); + if (!empty) column = column_function->reduce(); else - column.column = column_function->getResultType()->createColumn(); + column.column = column_function->getResultType()->createColumnConstWithDefaultValue(original_size)->convertToFullColumnIfConst(); + + chassert(column.column->size() == original_size); } int checkShortCircuitArguments(const ColumnsWithTypeAndName & arguments) diff --git a/tests/queries/0_stateless/03168_fuzz_multiIf_short_circuit.reference b/tests/queries/0_stateless/03168_fuzz_multiIf_short_circuit.reference new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/queries/0_stateless/03168_fuzz_multiIf_short_circuit.sql b/tests/queries/0_stateless/03168_fuzz_multiIf_short_circuit.sql new file mode 100644 index 00000000000..4e4cc291e9b --- /dev/null +++ b/tests/queries/0_stateless/03168_fuzz_multiIf_short_circuit.sql @@ -0,0 +1,6 @@ +-- https://github.com/ClickHouse/ClickHouse/issues/64946 +SELECT + multiIf((number % toLowCardinality(toNullable(toUInt128(2)))) = (number % toNullable(2)), toInt8(1), (number % materialize(toLowCardinality(3))) = toUInt128(toNullable(0)), toInt8(materialize(materialize(2))), toInt64(toUInt128(3))) +FROM system.numbers +LIMIT 44857 +FORMAT Null; From 4b010dc478310b65d26cbe114e15f3cb73af4bb4 Mon Sep 17 00:00:00 2001 From: Alexander Gololobov Date: Fri, 7 Jun 2024 13:11:52 +0200 Subject: [PATCH 23/34] Disable test with ASAN --- .../0_stateless/02908_many_requests_to_system_replicas.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/queries/0_stateless/02908_many_requests_to_system_replicas.sh b/tests/queries/0_stateless/02908_many_requests_to_system_replicas.sh index 144831a2cdc..a247c99a818 100755 --- a/tests/queries/0_stateless/02908_many_requests_to_system_replicas.sh +++ b/tests/queries/0_stateless/02908_many_requests_to_system_replicas.sh @@ -1,5 +1,5 @@ #!/usr/bin/env bash -# Tags: long, zookeeper, no-parallel, no-fasttest +# Tags: long, zookeeper, no-parallel, no-fasttest, no-asan CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh From 4d88f103469e8176229f3a258847eb9cca374309 Mon Sep 17 00:00:00 2001 From: robot-clickhouse Date: Fri, 7 Jun 2024 11:28:36 +0000 Subject: [PATCH 24/34] Update version_date.tsv and changelogs after v24.4.2.141-stable --- docs/changelogs/v24.4.2.141-stable.md | 101 ++++++++++++++++++++++++++ utils/list-versions/version_date.tsv | 1 + 2 files changed, 102 insertions(+) create mode 100644 docs/changelogs/v24.4.2.141-stable.md diff --git a/docs/changelogs/v24.4.2.141-stable.md b/docs/changelogs/v24.4.2.141-stable.md new file mode 100644 index 00000000000..656d0854392 --- /dev/null +++ b/docs/changelogs/v24.4.2.141-stable.md @@ -0,0 +1,101 @@ +--- +sidebar_position: 1 +sidebar_label: 2024 +--- + +# 2024 Changelog + +### ClickHouse release v24.4.2.141-stable (9e23d27bd11) FIXME as compared to v24.4.1.2088-stable (6d4b31322d1) + +#### Improvement +* Backported in [#63467](https://github.com/ClickHouse/ClickHouse/issues/63467): Make rabbitmq nack broken messages. Closes [#45350](https://github.com/ClickHouse/ClickHouse/issues/45350). [#60312](https://github.com/ClickHouse/ClickHouse/pull/60312) ([Kseniia Sumarokova](https://github.com/kssenii)). + +#### Build/Testing/Packaging Improvement +* Backported in [#63612](https://github.com/ClickHouse/ClickHouse/issues/63612): The Dockerfile is reviewed by the docker official library in https://github.com/docker-library/official-images/pull/15846. [#63400](https://github.com/ClickHouse/ClickHouse/pull/63400) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). + +#### Bug Fix (user-visible misbehavior in an official stable release) + +* Backported in [#64279](https://github.com/ClickHouse/ClickHouse/issues/64279): Fix queries with FINAL give wrong result when table does not use adaptive granularity. [#62432](https://github.com/ClickHouse/ClickHouse/pull/62432) ([Duc Canh Le](https://github.com/canhld94)). +* Backported in [#63295](https://github.com/ClickHouse/ClickHouse/issues/63295): Fix crash with untuple and unresolved lambda. [#63131](https://github.com/ClickHouse/ClickHouse/pull/63131) ([Raúl Marín](https://github.com/Algunenano)). +* Backported in [#63978](https://github.com/ClickHouse/ClickHouse/issues/63978): Fix intersect parts when restart after drop range. [#63202](https://github.com/ClickHouse/ClickHouse/pull/63202) ([Han Fei](https://github.com/hanfei1991)). +* Backported in [#63413](https://github.com/ClickHouse/ClickHouse/issues/63413): Fix a misbehavior when SQL security defaults don't load for old tables during server startup. [#63209](https://github.com/ClickHouse/ClickHouse/pull/63209) ([pufit](https://github.com/pufit)). +* Backported in [#63388](https://github.com/ClickHouse/ClickHouse/issues/63388): JOIN filter push down filled join fix. Closes [#63228](https://github.com/ClickHouse/ClickHouse/issues/63228). [#63234](https://github.com/ClickHouse/ClickHouse/pull/63234) ([Maksim Kita](https://github.com/kitaisreal)). +* Backported in [#63618](https://github.com/ClickHouse/ClickHouse/issues/63618): Fix bug which could potentially lead to rare LOGICAL_ERROR during SELECT query with message: `Unexpected return type from materialize. Expected type_XXX. Got type_YYY.` Introduced in [#59379](https://github.com/ClickHouse/ClickHouse/issues/59379). [#63353](https://github.com/ClickHouse/ClickHouse/pull/63353) ([alesapin](https://github.com/alesapin)). +* Backported in [#63451](https://github.com/ClickHouse/ClickHouse/issues/63451): Fix `X-ClickHouse-Timezone` header returning wrong timezone when using `session_timezone` as query level setting. [#63377](https://github.com/ClickHouse/ClickHouse/pull/63377) ([Andrey Zvonov](https://github.com/zvonand)). +* Backported in [#63605](https://github.com/ClickHouse/ClickHouse/issues/63605): Fix backup of projection part in case projection was removed from table metadata, but part still has projection. [#63426](https://github.com/ClickHouse/ClickHouse/pull/63426) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Backported in [#63510](https://github.com/ClickHouse/ClickHouse/issues/63510): Fix 'Every derived table must have its own alias' error for MYSQL dictionary source, close [#63341](https://github.com/ClickHouse/ClickHouse/issues/63341). [#63481](https://github.com/ClickHouse/ClickHouse/pull/63481) ([vdimir](https://github.com/vdimir)). +* Backported in [#63592](https://github.com/ClickHouse/ClickHouse/issues/63592): Avoid segafult in `MergeTreePrefetchedReadPool` while fetching projection parts. [#63513](https://github.com/ClickHouse/ClickHouse/pull/63513) ([Antonio Andelic](https://github.com/antonio2368)). +* Backported in [#63750](https://github.com/ClickHouse/ClickHouse/issues/63750): Read only the necessary columns from VIEW (new analyzer). Closes [#62594](https://github.com/ClickHouse/ClickHouse/issues/62594). [#63688](https://github.com/ClickHouse/ClickHouse/pull/63688) ([Maksim Kita](https://github.com/kitaisreal)). +* Backported in [#63772](https://github.com/ClickHouse/ClickHouse/issues/63772): Fix [#63539](https://github.com/ClickHouse/ClickHouse/issues/63539). Forbid WINDOW redefinition in new analyzer. [#63694](https://github.com/ClickHouse/ClickHouse/pull/63694) ([Dmitry Novik](https://github.com/novikd)). +* Backported in [#63872](https://github.com/ClickHouse/ClickHouse/issues/63872): Flatten_nested is broken with replicated database. [#63695](https://github.com/ClickHouse/ClickHouse/pull/63695) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Backported in [#63854](https://github.com/ClickHouse/ClickHouse/issues/63854): Fix `Not found column` and `CAST AS Map from array requires nested tuple of 2 elements` exceptions for distributed queries which use `Map(Nothing, Nothing)` type. Fixes [#63637](https://github.com/ClickHouse/ClickHouse/issues/63637). [#63753](https://github.com/ClickHouse/ClickHouse/pull/63753) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Backported in [#63847](https://github.com/ClickHouse/ClickHouse/issues/63847): Fix possible `ILLEGAL_COLUMN` error in `partial_merge` join, close [#37928](https://github.com/ClickHouse/ClickHouse/issues/37928). [#63755](https://github.com/ClickHouse/ClickHouse/pull/63755) ([vdimir](https://github.com/vdimir)). +* Backported in [#63908](https://github.com/ClickHouse/ClickHouse/issues/63908): `query_plan_remove_redundant_distinct` can break queries with WINDOW FUNCTIONS (with `allow_experimental_analyzer` is on). Fixes [#62820](https://github.com/ClickHouse/ClickHouse/issues/62820). [#63776](https://github.com/ClickHouse/ClickHouse/pull/63776) ([Igor Nikonov](https://github.com/devcrafter)). +* Backported in [#63955](https://github.com/ClickHouse/ClickHouse/issues/63955): Fix possible crash with SYSTEM UNLOAD PRIMARY KEY. [#63778](https://github.com/ClickHouse/ClickHouse/pull/63778) ([Raúl Marín](https://github.com/Algunenano)). +* Backported in [#63938](https://github.com/ClickHouse/ClickHouse/issues/63938): Allow JOIN filter push down to both streams if only single equivalent column is used in query. Closes [#63799](https://github.com/ClickHouse/ClickHouse/issues/63799). [#63819](https://github.com/ClickHouse/ClickHouse/pull/63819) ([Maksim Kita](https://github.com/kitaisreal)). +* Backported in [#63991](https://github.com/ClickHouse/ClickHouse/issues/63991): Fix incorrect select query result when parallel replicas were used to read from a Materialized View. [#63861](https://github.com/ClickHouse/ClickHouse/pull/63861) ([Nikita Taranov](https://github.com/nickitat)). +* Backported in [#64033](https://github.com/ClickHouse/ClickHouse/issues/64033): Fix a error `Database name is empty` for remote queries with lambdas over the cluster with modified default database. Fixes [#63471](https://github.com/ClickHouse/ClickHouse/issues/63471). [#63864](https://github.com/ClickHouse/ClickHouse/pull/63864) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Backported in [#64561](https://github.com/ClickHouse/ClickHouse/issues/64561): Fix SIGSEGV due to CPU/Real (`query_profiler_real_time_period_ns`/`query_profiler_cpu_time_period_ns`) profiler (has been an issue since 2022, that leads to periodic server crashes, especially if you were using distributed engine). [#63865](https://github.com/ClickHouse/ClickHouse/pull/63865) ([Azat Khuzhin](https://github.com/azat)). +* Backported in [#64011](https://github.com/ClickHouse/ClickHouse/issues/64011): Fix analyzer - IN function with arbitrary deep sub-selects in materialized view to use insertion block. [#63930](https://github.com/ClickHouse/ClickHouse/pull/63930) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)). +* Backported in [#64238](https://github.com/ClickHouse/ClickHouse/issues/64238): Fix resolve of unqualified COLUMNS matcher. Preserve the input columns order and forbid usage of unknown identifiers. [#63962](https://github.com/ClickHouse/ClickHouse/pull/63962) ([Dmitry Novik](https://github.com/novikd)). +* Backported in [#64103](https://github.com/ClickHouse/ClickHouse/issues/64103): Deserialize untrusted binary inputs in a safer way. [#64024](https://github.com/ClickHouse/ClickHouse/pull/64024) ([Robert Schulze](https://github.com/rschu1ze)). +* Backported in [#64170](https://github.com/ClickHouse/ClickHouse/issues/64170): Add missing settings to recoverLostReplica. [#64040](https://github.com/ClickHouse/ClickHouse/pull/64040) ([Raúl Marín](https://github.com/Algunenano)). +* Backported in [#64322](https://github.com/ClickHouse/ClickHouse/issues/64322): This fix will use a proper redefined context with the correct definer for each individual view in the query pipeline Closes [#63777](https://github.com/ClickHouse/ClickHouse/issues/63777). [#64079](https://github.com/ClickHouse/ClickHouse/pull/64079) ([pufit](https://github.com/pufit)). +* Backported in [#64382](https://github.com/ClickHouse/ClickHouse/issues/64382): Fix analyzer: "Not found column" error is fixed when using INTERPOLATE. [#64096](https://github.com/ClickHouse/ClickHouse/pull/64096) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)). +* Backported in [#64568](https://github.com/ClickHouse/ClickHouse/issues/64568): Fix creating backups to S3 buckets with different credentials from the disk containing the file. [#64153](https://github.com/ClickHouse/ClickHouse/pull/64153) ([Antonio Andelic](https://github.com/antonio2368)). +* Backported in [#64272](https://github.com/ClickHouse/ClickHouse/issues/64272): Prevent LOGICAL_ERROR on CREATE TABLE as MaterializedView. [#64174](https://github.com/ClickHouse/ClickHouse/pull/64174) ([Raúl Marín](https://github.com/Algunenano)). +* Backported in [#64330](https://github.com/ClickHouse/ClickHouse/issues/64330): The query cache now considers two identical queries against different databases as different. The previous behavior could be used to bypass missing privileges to read from a table. [#64199](https://github.com/ClickHouse/ClickHouse/pull/64199) ([Robert Schulze](https://github.com/rschu1ze)). +* Backported in [#64254](https://github.com/ClickHouse/ClickHouse/issues/64254): Ignore `text_log` config when using Keeper. [#64218](https://github.com/ClickHouse/ClickHouse/pull/64218) ([Antonio Andelic](https://github.com/antonio2368)). +* Backported in [#64690](https://github.com/ClickHouse/ClickHouse/issues/64690): Fix Query Tree size validation. Closes [#63701](https://github.com/ClickHouse/ClickHouse/issues/63701). [#64377](https://github.com/ClickHouse/ClickHouse/pull/64377) ([Dmitry Novik](https://github.com/novikd)). +* Backported in [#64409](https://github.com/ClickHouse/ClickHouse/issues/64409): Fix `Logical error: Bad cast` for `Buffer` table with `PREWHERE`. Fixes [#64172](https://github.com/ClickHouse/ClickHouse/issues/64172). [#64388](https://github.com/ClickHouse/ClickHouse/pull/64388) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Backported in [#64727](https://github.com/ClickHouse/ClickHouse/issues/64727): Fixed `CREATE TABLE AS` queries for tables with default expressions. [#64455](https://github.com/ClickHouse/ClickHouse/pull/64455) ([Anton Popov](https://github.com/CurtizJ)). +* Backported in [#64623](https://github.com/ClickHouse/ClickHouse/issues/64623): Fix an error `Cannot find column` in distributed queries with constant CTE in the `GROUP BY` key. [#64519](https://github.com/ClickHouse/ClickHouse/pull/64519) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Backported in [#64680](https://github.com/ClickHouse/ClickHouse/issues/64680): Fix [#64612](https://github.com/ClickHouse/ClickHouse/issues/64612). Do not rewrite aggregation if `-If` combinator is already used. [#64638](https://github.com/ClickHouse/ClickHouse/pull/64638) ([Dmitry Novik](https://github.com/novikd)). +* Backported in [#64942](https://github.com/ClickHouse/ClickHouse/issues/64942): Fix OrderByLimitByDuplicateEliminationVisitor across subqueries. [#64766](https://github.com/ClickHouse/ClickHouse/pull/64766) ([Raúl Marín](https://github.com/Algunenano)). +* Backported in [#64871](https://github.com/ClickHouse/ClickHouse/issues/64871): Fixed memory possible incorrect memory tracking in several kinds of queries: queries that read any data from S3, queries via http protocol, asynchronous inserts. [#64844](https://github.com/ClickHouse/ClickHouse/pull/64844) ([Anton Popov](https://github.com/CurtizJ)). + +#### CI Fix or Improvement (changelog entry is not required) + +* Backported in [#63364](https://github.com/ClickHouse/ClickHouse/issues/63364): Implement cumulative A Sync status. [#61464](https://github.com/ClickHouse/ClickHouse/pull/61464) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Backported in [#63338](https://github.com/ClickHouse/ClickHouse/issues/63338): Use `/commit/` to have the URLs in [reports](https://play.clickhouse.com/play?user=play#c2VsZWN0IGRpc3RpbmN0IGNvbW1pdF91cmwgZnJvbSBjaGVja3Mgd2hlcmUgY2hlY2tfc3RhcnRfdGltZSA+PSBub3coKSAtIGludGVydmFsIDEgbW9udGggYW5kIHB1bGxfcmVxdWVzdF9udW1iZXI9NjA1MzI=) like https://github.com/ClickHouse/ClickHouse/commit/44f8bc5308b53797bec8cccc3bd29fab8a00235d and not like https://github.com/ClickHouse/ClickHouse/commits/44f8bc5308b53797bec8cccc3bd29fab8a00235d. [#63331](https://github.com/ClickHouse/ClickHouse/pull/63331) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Backported in [#63376](https://github.com/ClickHouse/ClickHouse/issues/63376):. [#63366](https://github.com/ClickHouse/ClickHouse/pull/63366) ([Aleksei Filatov](https://github.com/aalexfvk)). +* Backported in [#63571](https://github.com/ClickHouse/ClickHouse/issues/63571):. [#63551](https://github.com/ClickHouse/ClickHouse/pull/63551) ([Konstantin Bogdanov](https://github.com/thevar1able)). +* Backported in [#63651](https://github.com/ClickHouse/ClickHouse/issues/63651): Fix 02362_part_log_merge_algorithm flaky test. [#63635](https://github.com/ClickHouse/ClickHouse/pull/63635) ([Miсhael Stetsyuk](https://github.com/mstetsyuk)). +* Backported in [#63828](https://github.com/ClickHouse/ClickHouse/issues/63828): Fix test_odbc_interaction from aarch64 [#61457](https://github.com/ClickHouse/ClickHouse/issues/61457). [#63787](https://github.com/ClickHouse/ClickHouse/pull/63787) ([alesapin](https://github.com/alesapin)). +* Backported in [#63897](https://github.com/ClickHouse/ClickHouse/issues/63897): Fix test `test_catboost_evaluate` for aarch64. [#61457](https://github.com/ClickHouse/ClickHouse/issues/61457). [#63789](https://github.com/ClickHouse/ClickHouse/pull/63789) ([alesapin](https://github.com/alesapin)). +* Backported in [#63889](https://github.com/ClickHouse/ClickHouse/issues/63889): Remove HDFS from disks config for one integration test for arm. [#61457](https://github.com/ClickHouse/ClickHouse/issues/61457). [#63832](https://github.com/ClickHouse/ClickHouse/pull/63832) ([alesapin](https://github.com/alesapin)). +* Backported in [#63881](https://github.com/ClickHouse/ClickHouse/issues/63881): Bump version for old image in test_short_strings_aggregation to make it work on arm. [#61457](https://github.com/ClickHouse/ClickHouse/issues/61457). [#63836](https://github.com/ClickHouse/ClickHouse/pull/63836) ([alesapin](https://github.com/alesapin)). +* Backported in [#63919](https://github.com/ClickHouse/ClickHouse/issues/63919): Disable test `test_non_default_compression/test.py::test_preconfigured_deflateqpl_codec` on arm. [#61457](https://github.com/ClickHouse/ClickHouse/issues/61457). [#63839](https://github.com/ClickHouse/ClickHouse/pull/63839) ([alesapin](https://github.com/alesapin)). +* Backported in [#63971](https://github.com/ClickHouse/ClickHouse/issues/63971): Fix 02124_insert_deduplication_token_multiple_blocks. [#63950](https://github.com/ClickHouse/ClickHouse/pull/63950) ([Han Fei](https://github.com/hanfei1991)). +* Backported in [#64049](https://github.com/ClickHouse/ClickHouse/issues/64049): Add `ClickHouseVersion.copy` method. Create a branch release in advance without spinning out the release to increase the stability. [#64039](https://github.com/ClickHouse/ClickHouse/pull/64039) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Backported in [#64078](https://github.com/ClickHouse/ClickHouse/issues/64078): The mime type is not 100% reliable for Python and shell scripts without shebangs; add a check for file extension. [#64062](https://github.com/ClickHouse/ClickHouse/pull/64062) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Backported in [#64161](https://github.com/ClickHouse/ClickHouse/issues/64161): Add retries in git submodule update. [#64125](https://github.com/ClickHouse/ClickHouse/pull/64125) ([Alexey Milovidov](https://github.com/alexey-milovidov)). + +#### Critical Bug Fix (crash, LOGICAL_ERROR, data loss, RBAC) + +* Backported in [#64589](https://github.com/ClickHouse/ClickHouse/issues/64589): Disabled `enable_vertical_final` setting by default. This feature should not be used because it has a bug: [#64543](https://github.com/ClickHouse/ClickHouse/issues/64543). [#64544](https://github.com/ClickHouse/ClickHouse/pull/64544) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Backported in [#64880](https://github.com/ClickHouse/ClickHouse/issues/64880): This PR fixes an error when a user in a specific situation can escalate their privileges on the default database without necessary grants. [#64769](https://github.com/ClickHouse/ClickHouse/pull/64769) ([pufit](https://github.com/pufit)). + +#### NO CL CATEGORY + +* Backported in [#63306](https://github.com/ClickHouse/ClickHouse/issues/63306):. [#63297](https://github.com/ClickHouse/ClickHouse/pull/63297) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Backported in [#63710](https://github.com/ClickHouse/ClickHouse/issues/63710):. [#63415](https://github.com/ClickHouse/ClickHouse/pull/63415) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). + +#### NO CL ENTRY + +* NO CL ENTRY: 'Revert "Backport [#64363](https://github.com/ClickHouse/ClickHouse/issues/64363) to 24.4: Split tests 03039_dynamic_all_merge_algorithms to avoid timeouts"'. [#64905](https://github.com/ClickHouse/ClickHouse/pull/64905) ([Raúl Marín](https://github.com/Algunenano)). + +#### NOT FOR CHANGELOG / INSIGNIFICANT + +* group_by_use_nulls strikes back [#62922](https://github.com/ClickHouse/ClickHouse/pull/62922) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Add `FROM` keyword to `TRUNCATE ALL TABLES` [#63241](https://github.com/ClickHouse/ClickHouse/pull/63241) ([Yarik Briukhovetskyi](https://github.com/yariks5s)). +* More checks for concurrently deleted files and dirs in system.remote_data_paths [#63274](https://github.com/ClickHouse/ClickHouse/pull/63274) ([Alexander Gololobov](https://github.com/davenger)). +* Try fix segfault in `MergeTreeReadPoolBase::createTask` [#63323](https://github.com/ClickHouse/ClickHouse/pull/63323) ([Antonio Andelic](https://github.com/antonio2368)). +* Skip unaccessible table dirs in system.remote_data_paths [#63330](https://github.com/ClickHouse/ClickHouse/pull/63330) ([Alexander Gololobov](https://github.com/davenger)). +* Workaround for `oklch()` inside canvas bug for firefox [#63404](https://github.com/ClickHouse/ClickHouse/pull/63404) ([Sergei Trifonov](https://github.com/serxa)). +* Cancel S3 reads properly when parallel reads are used [#63687](https://github.com/ClickHouse/ClickHouse/pull/63687) ([Antonio Andelic](https://github.com/antonio2368)). +* Userspace page cache: don't collect stats if cache is unused [#63730](https://github.com/ClickHouse/ClickHouse/pull/63730) ([Michael Kolupaev](https://github.com/al13n321)). +* Fix sanitizers [#64090](https://github.com/ClickHouse/ClickHouse/pull/64090) ([Azat Khuzhin](https://github.com/azat)). +* Split tests 03039_dynamic_all_merge_algorithms to avoid timeouts [#64363](https://github.com/ClickHouse/ClickHouse/pull/64363) ([Kruglov Pavel](https://github.com/Avogar)). +* CI: Critical bugfix category in PR template [#64480](https://github.com/ClickHouse/ClickHouse/pull/64480) ([Max K.](https://github.com/maxknv)). + diff --git a/utils/list-versions/version_date.tsv b/utils/list-versions/version_date.tsv index f7d84cce4b1..2f96daf4887 100644 --- a/utils/list-versions/version_date.tsv +++ b/utils/list-versions/version_date.tsv @@ -1,4 +1,5 @@ v24.5.1.1763-stable 2024-06-01 +v24.4.2.141-stable 2024-06-07 v24.4.1.2088-stable 2024-05-01 v24.3.3.102-lts 2024-05-01 v24.3.2.23-lts 2024-04-03 From f77b6096c405aaf8862582bf87a897e925de7a6a Mon Sep 17 00:00:00 2001 From: Ilya Golshtein Date: Fri, 7 Jun 2024 12:14:21 +0000 Subject: [PATCH 25/34] time_virtual_col: st_mtime, not st_mtim, because of darwin --- src/Storages/StorageFile.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Storages/StorageFile.cpp b/src/Storages/StorageFile.cpp index 09f0bd60859..aaf84f6f82c 100644 --- a/src/Storages/StorageFile.cpp +++ b/src/Storages/StorageFile.cpp @@ -1371,7 +1371,7 @@ Chunk StorageFileSource::generate() struct stat file_stat; file_stat = getFileStat(current_path, storage->use_table_fd, storage->table_fd, storage->getName()); current_file_size = file_stat.st_size; - current_file_last_modified = Poco::Timestamp::fromEpochTime(file_stat.st_mtim.tv_sec); + current_file_last_modified = Poco::Timestamp::fromEpochTime(file_stat.st_mtime); if (getContext()->getSettingsRef().engine_file_skip_empty_files && file_stat.st_size == 0) continue; From b04fb116a5cd8c6064804a1949b97d31ccc95a56 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ra=C3=BAl=20Mar=C3=ADn?= Date: Fri, 7 Jun 2024 11:26:12 +0200 Subject: [PATCH 26/34] Compiler happy. Developer unhappy --- src/Storages/StorageGenerateRandom.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Storages/StorageGenerateRandom.cpp b/src/Storages/StorageGenerateRandom.cpp index ca9c6fb3226..8852e468c5e 100644 --- a/src/Storages/StorageGenerateRandom.cpp +++ b/src/Storages/StorageGenerateRandom.cpp @@ -706,7 +706,7 @@ Pipe StorageGenerateRandom::read( if (query_limit && num_streams * max_block_size > query_limit) { /// We want to avoid spawning more streams than necessary - num_streams = std::min(num_streams, ((query_limit + max_block_size - 1) / max_block_size)); + num_streams = std::min(num_streams, static_cast(((query_limit + max_block_size - 1) / max_block_size))); } Pipes pipes; pipes.reserve(num_streams); From f50a951e8e3cf80652ea525b4232833c846507f4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ra=C3=BAl=20Mar=C3=ADn?= Date: Fri, 7 Jun 2024 16:49:07 +0200 Subject: [PATCH 27/34] Fix innocuous data race in detectLanguage --- contrib/cld2 | 2 +- tests/queries/0_stateless/03168_cld2_tsan.reference | 2 ++ tests/queries/0_stateless/03168_cld2_tsan.sql | 10 ++++++++++ 3 files changed, 13 insertions(+), 1 deletion(-) create mode 100644 tests/queries/0_stateless/03168_cld2_tsan.reference create mode 100644 tests/queries/0_stateless/03168_cld2_tsan.sql diff --git a/contrib/cld2 b/contrib/cld2 index bc6d493a2f6..217ba8b8805 160000 --- a/contrib/cld2 +++ b/contrib/cld2 @@ -1 +1 @@ -Subproject commit bc6d493a2f64ed1fc1c4c4b4294a542a04e04217 +Subproject commit 217ba8b8805b41557faadaa47bb6e99f2242eea3 diff --git a/tests/queries/0_stateless/03168_cld2_tsan.reference b/tests/queries/0_stateless/03168_cld2_tsan.reference new file mode 100644 index 00000000000..6c3cafd4a6d --- /dev/null +++ b/tests/queries/0_stateless/03168_cld2_tsan.reference @@ -0,0 +1,2 @@ +{'ja':0.62,'fr':0.36} +{'ja':0.62,'fr':0.36} diff --git a/tests/queries/0_stateless/03168_cld2_tsan.sql b/tests/queries/0_stateless/03168_cld2_tsan.sql new file mode 100644 index 00000000000..701a781c472 --- /dev/null +++ b/tests/queries/0_stateless/03168_cld2_tsan.sql @@ -0,0 +1,10 @@ +-- Tags: no-fasttest +-- Tag no-fasttest: depends on cld2 + +-- https://github.com/ClickHouse/ClickHouse/issues/64931 +SELECT detectLanguageMixed(materialize('二兎を追う者は一兎をも得ず二兎を追う者は一兎をも得ず A vaincre sans peril, on triomphe sans gloire.')) +GROUP BY + GROUPING SETS ( + ('a', toUInt256(1)), + (stringToH3(toFixedString(toFixedString('85283473ffffff', 14), 14)))) +SETTINGS allow_experimental_nlp_functions = 1; From 8c4f5c65aa21569dbaea1d409d273a842a736437 Mon Sep 17 00:00:00 2001 From: "Mikhail f. Shiryaev" Date: Fri, 7 Jun 2024 17:07:06 +0200 Subject: [PATCH 28/34] Use a named logger in build_download_helper --- tests/ci/build_download_helper.py | 34 ++++++++++++++++--------------- 1 file changed, 18 insertions(+), 16 deletions(-) diff --git a/tests/ci/build_download_helper.py b/tests/ci/build_download_helper.py index 036d3548eb9..0532d618802 100644 --- a/tests/ci/build_download_helper.py +++ b/tests/ci/build_download_helper.py @@ -15,6 +15,8 @@ from ci_config import CI_CONFIG DOWNLOAD_RETRIES_COUNT = 5 +logger = logging.getLogger(__name__) + class DownloadException(Exception): pass @@ -30,7 +32,7 @@ def get_with_retries( sleep: int = 3, **kwargs: Any, ) -> requests.Response: - logging.info( + logger.info( "Getting URL with %i tries and sleep %i in between: %s", retries, sleep, url ) exc = Exception("A placeholder to satisfy typing and avoid nesting") @@ -42,7 +44,7 @@ def get_with_retries( return response except Exception as e: if i + 1 < retries: - logging.info("Exception '%s' while getting, retry %i", e, i + 1) + logger.info("Exception '%s' while getting, retry %i", e, i + 1) time.sleep(sleep) exc = e @@ -96,7 +98,7 @@ def get_gh_api( ) try_auth = e.response.status_code == 404 if (ratelimit_exceeded or try_auth) and not token_is_set: - logging.warning( + logger.warning( "Received rate limit exception, setting the auth header and retry" ) set_auth_header() @@ -107,7 +109,7 @@ def get_gh_api( exc = e if try_cnt < retries: - logging.info("Exception '%s' while getting, retry %i", exc, try_cnt) + logger.info("Exception '%s' while getting, retry %i", exc, try_cnt) time.sleep(sleep) raise APIException(f"Unable to request data from GH API: {url}") from exc @@ -121,25 +123,25 @@ def read_build_urls(build_name: str, reports_path: Union[Path, str]) -> List[str for root, _, files in os.walk(reports_path): for file in files: if file.endswith(f"_{build_name}.json"): - logging.info("Found build report json %s for %s", file, build_name) + logger.info("Found build report json %s for %s", file, build_name) with open( os.path.join(root, file), "r", encoding="utf-8" ) as file_handler: build_report = json.load(file_handler) return build_report["build_urls"] # type: ignore - logging.info("A build report is not found for %s", build_name) + logger.info("A build report is not found for %s", build_name) return [] def download_build_with_progress(url: str, path: Path) -> None: - logging.info("Downloading from %s to temp path %s", url, path) + logger.info("Downloading from %s to temp path %s", url, path) for i in range(DOWNLOAD_RETRIES_COUNT): try: response = get_with_retries(url, retries=1, stream=True) total_length = int(response.headers.get("content-length", 0)) if path.is_file() and total_length and path.stat().st_size == total_length: - logging.info( + logger.info( "The file %s already exists and have a proper size %s", path, total_length, @@ -148,14 +150,14 @@ def download_build_with_progress(url: str, path: Path) -> None: with open(path, "wb") as f: if total_length == 0: - logging.info( + logger.info( "No content-length, will download file without progress" ) f.write(response.content) else: dl = 0 - logging.info("Content length is %ld bytes", total_length) + logger.info("Content length is %ld bytes", total_length) for data in response.iter_content(chunk_size=4096): dl += len(data) f.write(data) @@ -170,8 +172,8 @@ def download_build_with_progress(url: str, path: Path) -> None: except Exception as e: if sys.stdout.isatty(): sys.stdout.write("\n") - if os.path.exists(path): - os.remove(path) + if path.exists(): + path.unlink() if i + 1 < DOWNLOAD_RETRIES_COUNT: time.sleep(3) @@ -182,7 +184,7 @@ def download_build_with_progress(url: str, path: Path) -> None: if sys.stdout.isatty(): sys.stdout.write("\n") - logging.info("Downloading finished") + logger.info("Downloading finished") def download_builds( @@ -191,7 +193,7 @@ def download_builds( for url in build_urls: if filter_fn(url): fname = os.path.basename(url.replace("%2B", "+").replace("%20", " ")) - logging.info("Will download %s to %s", fname, result_path) + logger.info("Will download %s to %s", fname, result_path) download_build_with_progress(url, result_path / fname) @@ -203,7 +205,7 @@ def download_builds_filter( ) -> None: build_name = get_build_name_for_check(check_name) urls = read_build_urls(build_name, reports_path) - logging.info("The build report for %s contains the next URLs: %s", build_name, urls) + logger.info("The build report for %s contains the next URLs: %s", build_name, urls) if not urls: raise DownloadException("No build URLs found") @@ -240,7 +242,7 @@ def get_clickhouse_binary_url( ) -> Optional[str]: build_name = get_build_name_for_check(check_name) urls = read_build_urls(build_name, reports_path) - logging.info("The build report for %s contains the next URLs: %s", build_name, urls) + logger.info("The build report for %s contains the next URLs: %s", build_name, urls) for url in urls: check_url = url if "?" in check_url: From dfc4184d7ff76dace72caeed4c5effbbdc3aa906 Mon Sep 17 00:00:00 2001 From: Nikolay Degterinsky Date: Sun, 9 Jun 2024 19:43:23 +0000 Subject: [PATCH 29/34] Fix ALTER MODIFY COMMENT in parameterized VIEWs --- src/Storages/AlterCommands.cpp | 5 ++++- .../03142_alter_comment_parameterized_view.reference | 0 .../0_stateless/03142_alter_comment_parameterized_view.sql | 4 ++++ 3 files changed, 8 insertions(+), 1 deletion(-) create mode 100644 tests/queries/0_stateless/03142_alter_comment_parameterized_view.reference create mode 100644 tests/queries/0_stateless/03142_alter_comment_parameterized_view.sql diff --git a/src/Storages/AlterCommands.cpp b/src/Storages/AlterCommands.cpp index 281fc72dfc4..f6d1bda422a 100644 --- a/src/Storages/AlterCommands.cpp +++ b/src/Storages/AlterCommands.cpp @@ -1583,7 +1583,10 @@ void AlterCommands::validate(const StoragePtr & table, ContextPtr context) const } } - if (all_columns.empty()) + /// Parameterized views do not have 'columns' in their metadata + bool is_parameterized_view = table->as() && table->as()->isParameterizedView(); + + if (!is_parameterized_view && all_columns.empty()) throw Exception(ErrorCodes::BAD_ARGUMENTS, "Cannot DROP or CLEAR all columns"); validateColumnsDefaultsAndGetSampleBlock(default_expr_list, all_columns.getAll(), context); diff --git a/tests/queries/0_stateless/03142_alter_comment_parameterized_view.reference b/tests/queries/0_stateless/03142_alter_comment_parameterized_view.reference new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/queries/0_stateless/03142_alter_comment_parameterized_view.sql b/tests/queries/0_stateless/03142_alter_comment_parameterized_view.sql new file mode 100644 index 00000000000..665bbbbc963 --- /dev/null +++ b/tests/queries/0_stateless/03142_alter_comment_parameterized_view.sql @@ -0,0 +1,4 @@ +DROP TABLE IF EXISTS test_table_comment; +CREATE VIEW test_table_comment AS SELECT toString({date_from:String}); +ALTER TABLE test_table_comment MODIFY COMMENT 'test comment'; +DROP TABLE test_table_comment; From 7997ce850c376d063303edf9ef5071f1ad47147f Mon Sep 17 00:00:00 2001 From: Yarik Briukhovetskyi <114298166+yariks5s@users.noreply.github.com> Date: Mon, 10 Jun 2024 01:12:44 +0200 Subject: [PATCH 30/34] include storageview --- src/Storages/AlterCommands.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/src/Storages/AlterCommands.cpp b/src/Storages/AlterCommands.cpp index f6d1bda422a..c29deda6fc5 100644 --- a/src/Storages/AlterCommands.cpp +++ b/src/Storages/AlterCommands.cpp @@ -18,6 +18,7 @@ #include #include #include +#include #include #include #include From 5e679a33e18e3782feabe3f848610c1baa5b2654 Mon Sep 17 00:00:00 2001 From: Nikolay Degterinsky Date: Mon, 10 Jun 2024 01:43:27 +0000 Subject: [PATCH 31/34] Fix --- src/Databases/DatabasesCommon.cpp | 5 +++-- .../03142_alter_comment_parameterized_view.reference | 1 + .../0_stateless/03142_alter_comment_parameterized_view.sql | 1 + 3 files changed, 5 insertions(+), 2 deletions(-) diff --git a/src/Databases/DatabasesCommon.cpp b/src/Databases/DatabasesCommon.cpp index fc75f8e44b9..b9d182c9c9f 100644 --- a/src/Databases/DatabasesCommon.cpp +++ b/src/Databases/DatabasesCommon.cpp @@ -8,6 +8,7 @@ #include #include #include +#include "Common/logger_useful.h" #include #include #include @@ -44,11 +45,11 @@ void applyMetadataChangesToCreateQuery(const ASTPtr & query, const StorageInMemo throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Cannot alter table {} because it was created AS table function" " and doesn't have structure in metadata", backQuote(ast_create_query.getTable())); - if (!has_structure && !ast_create_query.is_dictionary) + if (!has_structure && !ast_create_query.is_dictionary && !ast_create_query.isParameterizedView()) throw Exception(ErrorCodes::LOGICAL_ERROR, "Cannot alter table {} metadata doesn't have structure", backQuote(ast_create_query.getTable())); - if (!ast_create_query.is_dictionary) + if (!ast_create_query.is_dictionary && !ast_create_query.isParameterizedView()) { ASTPtr new_columns = InterpreterCreateQuery::formatColumns(metadata.columns); ASTPtr new_indices = InterpreterCreateQuery::formatIndices(metadata.secondary_indices); diff --git a/tests/queries/0_stateless/03142_alter_comment_parameterized_view.reference b/tests/queries/0_stateless/03142_alter_comment_parameterized_view.reference index e69de29bb2d..9b93c75ea56 100644 --- a/tests/queries/0_stateless/03142_alter_comment_parameterized_view.reference +++ b/tests/queries/0_stateless/03142_alter_comment_parameterized_view.reference @@ -0,0 +1 @@ +CREATE VIEW default.test_table_comment AS (SELECT toString({date_from:String})) COMMENT \'test comment\' diff --git a/tests/queries/0_stateless/03142_alter_comment_parameterized_view.sql b/tests/queries/0_stateless/03142_alter_comment_parameterized_view.sql index 665bbbbc963..14af304f98c 100644 --- a/tests/queries/0_stateless/03142_alter_comment_parameterized_view.sql +++ b/tests/queries/0_stateless/03142_alter_comment_parameterized_view.sql @@ -1,4 +1,5 @@ DROP TABLE IF EXISTS test_table_comment; CREATE VIEW test_table_comment AS SELECT toString({date_from:String}); ALTER TABLE test_table_comment MODIFY COMMENT 'test comment'; +SELECT create_table_query FROM system.tables WHERE name = 'test_table_comment'; DROP TABLE test_table_comment; From 132aa996a3f25fa98f3edc1ad92bbc22725d4c8d Mon Sep 17 00:00:00 2001 From: Alexander Tokmakov Date: Mon, 10 Jun 2024 13:49:39 +0200 Subject: [PATCH 32/34] Revert "Fix duplicating Delete events in blob_storage_log" --- src/Disks/ObjectStorages/S3/S3ObjectStorage.cpp | 5 ++--- src/Interpreters/SystemLog.cpp | 7 +------ 2 files changed, 3 insertions(+), 9 deletions(-) diff --git a/src/Disks/ObjectStorages/S3/S3ObjectStorage.cpp b/src/Disks/ObjectStorages/S3/S3ObjectStorage.cpp index afc13251f5b..ae719f5cde4 100644 --- a/src/Disks/ObjectStorages/S3/S3ObjectStorage.cpp +++ b/src/Disks/ObjectStorages/S3/S3ObjectStorage.cpp @@ -382,7 +382,6 @@ void S3ObjectStorage::removeObjectsImpl(const StoredObjects & objects, bool if_e { std::vector current_chunk; String keys; - size_t first_position = current_position; for (; current_position < objects.size() && current_chunk.size() < chunk_size_limit; ++current_position) { Aws::S3::Model::ObjectIdentifier obj; @@ -408,9 +407,9 @@ void S3ObjectStorage::removeObjectsImpl(const StoredObjects & objects, bool if_e { const auto * outcome_error = outcome.IsSuccess() ? nullptr : &outcome.GetError(); auto time_now = std::chrono::system_clock::now(); - for (size_t i = first_position; i < current_position; ++i) + for (const auto & object : objects) blob_storage_log->addEvent(BlobStorageLogElement::EventType::Delete, - uri.bucket, objects[i].remote_path, objects[i].local_path, objects[i].bytes_size, + uri.bucket, object.remote_path, object.local_path, object.bytes_size, outcome_error, time_now); } diff --git a/src/Interpreters/SystemLog.cpp b/src/Interpreters/SystemLog.cpp index 3b25deeb59d..5e0ce2cb0de 100644 --- a/src/Interpreters/SystemLog.cpp +++ b/src/Interpreters/SystemLog.cpp @@ -504,10 +504,6 @@ void SystemLog::flushImpl(const std::vector & to_flush, Block block(std::move(log_element_columns)); MutableColumns columns = block.mutateColumns(); - - for (auto & column : columns) - column->reserve(to_flush.size()); - for (const auto & elem : to_flush) elem.appendToBlock(columns); @@ -536,8 +532,7 @@ void SystemLog::flushImpl(const std::vector & to_flush, } catch (...) { - tryLogCurrentException(__PRETTY_FUNCTION__, fmt::format("Failed to flush system log {} with {} entries up to offset {}", - table_id.getNameForLogs(), to_flush.size(), to_flush_end)); + tryLogCurrentException(__PRETTY_FUNCTION__); } queue->confirm(to_flush_end); From 4ed0eaa3b064fef20a5c6bcfa55167b0033b58dc Mon Sep 17 00:00:00 2001 From: Nikolay Degterinsky <43110995+evillique@users.noreply.github.com> Date: Mon, 10 Jun 2024 16:42:09 +0200 Subject: [PATCH 33/34] Update 03142_alter_comment_parameterized_view.sql --- .../0_stateless/03142_alter_comment_parameterized_view.sql | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/queries/0_stateless/03142_alter_comment_parameterized_view.sql b/tests/queries/0_stateless/03142_alter_comment_parameterized_view.sql index 14af304f98c..98318e99e4a 100644 --- a/tests/queries/0_stateless/03142_alter_comment_parameterized_view.sql +++ b/tests/queries/0_stateless/03142_alter_comment_parameterized_view.sql @@ -1,5 +1,5 @@ DROP TABLE IF EXISTS test_table_comment; CREATE VIEW test_table_comment AS SELECT toString({date_from:String}); ALTER TABLE test_table_comment MODIFY COMMENT 'test comment'; -SELECT create_table_query FROM system.tables WHERE name = 'test_table_comment'; +SELECT create_table_query FROM system.tables WHERE name = 'test_table_comment' AND database = currentDatabase(); DROP TABLE test_table_comment; From 28a467a3beafb1eefa3a878c8f1df37783b2aaa8 Mon Sep 17 00:00:00 2001 From: Julia Kartseva Date: Sat, 8 Jun 2024 07:44:34 +0000 Subject: [PATCH 34/34] fix 03165_string_functions_with_token_text_indexes --- .../03165_string_functions_with_token_text_indexes.sql | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/tests/queries/0_stateless/03165_string_functions_with_token_text_indexes.sql b/tests/queries/0_stateless/03165_string_functions_with_token_text_indexes.sql index fee30af0245..bae98bd1eb6 100644 --- a/tests/queries/0_stateless/03165_string_functions_with_token_text_indexes.sql +++ b/tests/queries/0_stateless/03165_string_functions_with_token_text_indexes.sql @@ -127,7 +127,9 @@ CREATE TABLE 03165_token_ft INDEX idx_message message TYPE full_text() GRANULARITY 1 ) ENGINE = MergeTree -ORDER BY id; +ORDER BY id +-- Full text index works only with full parts. +SETTINGS min_bytes_for_full_part_storage=0; INSERT INTO 03165_token_ft VALUES(1, 'Service is not ready');