From e2e32fa34c92ff5009a64bd6ff79fdf0168d9366 Mon Sep 17 00:00:00 2001 From: Antonio Andelic Date: Thu, 6 Apr 2023 10:25:37 +0000 Subject: [PATCH 01/67] Add config --- utils/keeper-bench/CMakeLists.txt | 2 +- utils/keeper-bench/Generator.cpp | 8 ++++++++ utils/keeper-bench/Generator.h | 2 ++ utils/keeper-bench/Runner.h | 17 ++++++++++++++++- utils/keeper-bench/main.cpp | 16 +++++++++------- 5 files changed, 36 insertions(+), 9 deletions(-) diff --git a/utils/keeper-bench/CMakeLists.txt b/utils/keeper-bench/CMakeLists.txt index 2596be4addd..97d30117d69 100644 --- a/utils/keeper-bench/CMakeLists.txt +++ b/utils/keeper-bench/CMakeLists.txt @@ -1,2 +1,2 @@ clickhouse_add_executable(keeper-bench Generator.cpp Runner.cpp Stats.cpp main.cpp) -target_link_libraries(keeper-bench PRIVATE clickhouse_common_zookeeper_no_log) +target_link_libraries(keeper-bench PRIVATE clickhouse_common_config_no_zookeeper_log) diff --git a/utils/keeper-bench/Generator.cpp b/utils/keeper-bench/Generator.cpp index b6d8223862c..3b2378c4396 100644 --- a/utils/keeper-bench/Generator.cpp +++ b/utils/keeper-bench/Generator.cpp @@ -1,4 +1,5 @@ #include "Generator.h" +#include #include #include @@ -338,3 +339,10 @@ std::unique_ptr getGenerator(const std::string & name) throw DB::Exception(DB::ErrorCodes::LOGICAL_ERROR, "Unknown generator {}", name); } + +std::unique_ptr constructGeneratorFromConfig(const std::string & config_path) +{ + ConfigProcessor config_processor(config_path, true, false); + auto loaded_config = config_processor.loadConfig(); + return nullptr; +} diff --git a/utils/keeper-bench/Generator.h b/utils/keeper-bench/Generator.h index e2c546e4bce..25e4d96caef 100644 --- a/utils/keeper-bench/Generator.h +++ b/utils/keeper-bench/Generator.h @@ -137,4 +137,6 @@ private: }; +std::unique_ptr constructGeneratorFromConfig(const std::string & config_path); + std::unique_ptr getGenerator(const std::string & name); diff --git a/utils/keeper-bench/Runner.h b/utils/keeper-bench/Runner.h index a00b7b43eff..d3f2d1800d9 100644 --- a/utils/keeper-bench/Runner.h +++ b/utils/keeper-bench/Runner.h @@ -22,12 +22,18 @@ namespace CurrentMetrics extern const Metric LocalThreadActive; } +namespace DB::ErrorCodes +{ + extern const int BAD_ARGUMENTS; +} + class Runner { public: Runner( size_t concurrency_, const std::string & generator_name, + const std::string & config_path, const Strings & hosts_strings_, double max_time_, double delay_, @@ -36,7 +42,6 @@ public: : concurrency(concurrency_) , pool(CurrentMetrics::LocalThread, CurrentMetrics::LocalThreadActive, concurrency) , hosts_strings(hosts_strings_) - , generator(getGenerator(generator_name)) , max_time(max_time_) , delay(delay_) , continue_on_error(continue_on_error_) @@ -44,6 +49,16 @@ public: , info(std::make_shared()) , queue(concurrency) { + if (!generator_name.empty() && !config_path.empty()) + throw DB::Exception(DB::ErrorCodes::BAD_ARGUMENTS, "Both generator name and generator config path are defined. Please define only one of them"); + + if (generator_name.empty() && config_path.empty()) + throw DB::Exception(DB::ErrorCodes::BAD_ARGUMENTS, "Both generator name and generator config path are empty. Please define one of them"); + + if (!generator_name.empty()) + generator = getGenerator(generator_name); + else + generator = constructGeneratorFromConfig(config_path); } void thread(std::vector> zookeepers); diff --git a/utils/keeper-bench/main.cpp b/utils/keeper-bench/main.cpp index 39af28e7580..83303fb4029 100644 --- a/utils/keeper-bench/main.cpp +++ b/utils/keeper-bench/main.cpp @@ -19,13 +19,14 @@ int main(int argc, char *argv[]) boost::program_options::options_description desc = createOptionsDescription("Allowed options", getTerminalWidth()); desc.add_options() - ("help", "produce help message") - ("generator", value()->default_value("set_small_data"), "query to execute") - ("concurrency,c", value()->default_value(1), "number of parallel queries") - ("delay,d", value()->default_value(1), "delay between intermediate reports in seconds (set 0 to disable reports)") - ("iterations,i", value()->default_value(0), "amount of queries to be executed") - ("timelimit,t", value()->default_value(0.), "stop launch of queries after specified time limit") - ("hosts,h", value()->multitoken(), "") + ("help", "produce help message") + ("generator", value()->default_value(""), "query to execute") + ("config", value()->default_value(""), "xml file containing generator configuration") + ("concurrency,c", value()->default_value(1), "number of parallel queries") + ("delay,d", value()->default_value(1), "delay between intermediate reports in seconds (set 0 to disable reports)") + ("iterations,i", value()->default_value(0), "amount of queries to be executed") + ("timelimit,t", value()->default_value(0.), "stop launch of queries after specified time limit") + ("hosts,h", value()->multitoken(), "") ("continue_on_errors", "continue testing even if a query fails") ("reconnect", "establish new connection for every query") ; @@ -43,6 +44,7 @@ int main(int argc, char *argv[]) Runner runner(options["concurrency"].as(), options["generator"].as(), + options["config"].as(), options["hosts"].as(), options["timelimit"].as(), options["delay"].as(), From d74ff75d837d05df76f38ec732dd722b2cb53f98 Mon Sep 17 00:00:00 2001 From: Antonio Andelic Date: Thu, 6 Apr 2023 15:10:58 +0000 Subject: [PATCH 02/67] Add support for create request test --- utils/keeper-bench/Generator.cpp | 766 +++++++++++++++++++++---------- utils/keeper-bench/Generator.h | 284 +++++++----- utils/keeper-bench/Runner.cpp | 43 +- utils/keeper-bench/Runner.h | 26 +- 4 files changed, 732 insertions(+), 387 deletions(-) diff --git a/utils/keeper-bench/Generator.cpp b/utils/keeper-bench/Generator.cpp index 3b2378c4396..e2c276a274d 100644 --- a/utils/keeper-bench/Generator.cpp +++ b/utils/keeper-bench/Generator.cpp @@ -1,4 +1,5 @@ #include "Generator.h" +#include "Common/Exception.h" #include #include #include @@ -6,13 +7,10 @@ using namespace Coordination; using namespace zkutil; -namespace DB -{ -namespace ErrorCodes +namespace DB::ErrorCodes { extern const int LOGICAL_ERROR; } -} namespace { @@ -38,16 +36,16 @@ std::string generateRandomString(size_t length) return s; } } - -std::string generateRandomPath(const std::string & prefix, size_t length) -{ - return std::filesystem::path(prefix) / generateRandomString(length); -} - -std::string generateRandomData(size_t size) -{ - return generateRandomString(size); -} +// +//std::string generateRandomPath(const std::string & prefix, size_t length) +//{ +// return std::filesystem::path(prefix) / generateRandomString(length); +//} +// +//std::string generateRandomData(size_t size) +//{ +// return generateRandomString(size); +//} void removeRecursive(Coordination::ZooKeeper & zookeeper, const std::string & path) { @@ -98,251 +96,509 @@ void removeRecursive(Coordination::ZooKeeper & zookeeper, const std::string & pa } -void CreateRequestGenerator::startup(Coordination::ZooKeeper & zookeeper) +//void SetRequestGenerator::startup(Coordination::ZooKeeper & zookeeper) +//{ +// removeRecursive(zookeeper, path_prefix); +// +// auto promise = std::make_shared>(); +// auto future = promise->get_future(); +// auto create_callback = [promise] (const CreateResponse & response) +// { +// if (response.error != Coordination::Error::ZOK) +// promise->set_exception(std::make_exception_ptr(zkutil::KeeperException(response.error))); +// else +// promise->set_value(); +// }; +// zookeeper.create(path_prefix, "", false, false, default_acls, create_callback); +// future.get(); +//} +// +//ZooKeeperRequestPtr SetRequestGenerator::generate() +//{ +// auto request = std::make_shared(); +// request->path = path_prefix; +// request->data = generateRandomData(data_size); +// +// return request; +//} +// +//void MixedRequestGenerator::startup(Coordination::ZooKeeper & zookeeper) +//{ +// for (auto & generator : generators) +// generator->startup(zookeeper); +//} +// +//ZooKeeperRequestPtr MixedRequestGenerator::generate() +//{ +// pcg64 rng(randomSeed()); +// std::uniform_int_distribution distribution(0, generators.size() - 1); +// +// return generators[distribution(rng)]->generate(); +//} +// +//void GetRequestGenerator::startup(Coordination::ZooKeeper & zookeeper) +//{ +// auto promise = std::make_shared>(); +// auto future = promise->get_future(); +// auto create_callback = [promise] (const CreateResponse & response) +// { +// if (response.error != Coordination::Error::ZOK) +// promise->set_exception(std::make_exception_ptr(zkutil::KeeperException(response.error))); +// else +// promise->set_value(); +// }; +// zookeeper.create(path_prefix, "", false, false, default_acls, create_callback); +// future.get(); +// size_t total_nodes = 1; +// if (num_nodes) +// total_nodes = *num_nodes; +// +// for (size_t i = 0; i < total_nodes; ++i) +// { +// auto path = generateRandomPath(path_prefix, 5); +// while (std::find(paths_to_get.begin(), paths_to_get.end(), path) != paths_to_get.end()) +// path = generateRandomPath(path_prefix, 5); +// +// auto create_promise = std::make_shared>(); +// auto create_future = create_promise->get_future(); +// auto callback = [create_promise] (const CreateResponse & response) +// { +// if (response.error != Coordination::Error::ZOK) +// create_promise->set_exception(std::make_exception_ptr(zkutil::KeeperException(response.error))); +// else +// create_promise->set_value(); +// }; +// std::string data; +// if (nodes_data_size) +// data = generateRandomString(*nodes_data_size); +// +// zookeeper.create(path, data, false, false, default_acls, callback); +// create_future.get(); +// paths_to_get.push_back(path); +// } +//} +// +//Coordination::ZooKeeperRequestPtr GetRequestGenerator::generate() +//{ +// auto request = std::make_shared(); +// +// size_t path_index = distribution(rng); +// request->path = paths_to_get[path_index]; +// return request; +//} +// +//void ListRequestGenerator::startup(Coordination::ZooKeeper & zookeeper) +//{ +// auto promise = std::make_shared>(); +// auto future = promise->get_future(); +// auto create_callback = [promise] (const CreateResponse & response) +// { +// if (response.error != Coordination::Error::ZOK) +// promise->set_exception(std::make_exception_ptr(zkutil::KeeperException(response.error))); +// else +// promise->set_value(); +// }; +// zookeeper.create(path_prefix, "", false, false, default_acls, create_callback); +// future.get(); +// +// size_t total_nodes = 1; +// if (num_nodes) +// total_nodes = *num_nodes; +// +// size_t path_length = 5; +// if (paths_length) +// path_length = *paths_length; +// +// for (size_t i = 0; i < total_nodes; ++i) +// { +// auto path = generateRandomPath(path_prefix, path_length); +// +// auto create_promise = std::make_shared>(); +// auto create_future = create_promise->get_future(); +// auto callback = [create_promise] (const CreateResponse & response) +// { +// if (response.error != Coordination::Error::ZOK) +// create_promise->set_exception(std::make_exception_ptr(zkutil::KeeperException(response.error))); +// else +// create_promise->set_value(); +// }; +// zookeeper.create(path, "", false, false, default_acls, callback); +// create_future.get(); +// } +//} +// +//Coordination::ZooKeeperRequestPtr ListRequestGenerator::generate() +//{ +// auto request = std::make_shared(); +// request->path = path_prefix; +// return request; +//} + +std::unique_ptr getGenerator(const std::string & name) { - removeRecursive(zookeeper, path_prefix); - - auto promise = std::make_shared>(); - auto future = promise->get_future(); - auto create_callback = [promise] (const CreateResponse & response) - { - if (response.error != Coordination::Error::ZOK) - promise->set_exception(std::make_exception_ptr(zkutil::KeeperException(response.error))); - else - promise->set_value(); - }; - zookeeper.create(path_prefix, "", false, false, default_acls, create_callback); - future.get(); -} - -ZooKeeperRequestPtr CreateRequestGenerator::generate() -{ - auto request = std::make_shared(); - request->acls = default_acls; - size_t plength = 5; - if (path_length) - plength = *path_length; - auto path_candidate = generateRandomPath(path_prefix, plength); - - while (paths_created.contains(path_candidate)) - path_candidate = generateRandomPath(path_prefix, plength); - - paths_created.insert(path_candidate); - - request->path = path_candidate; - if (data_size) - request->data = generateRandomData(*data_size); - - return request; -} - - -void SetRequestGenerator::startup(Coordination::ZooKeeper & zookeeper) -{ - removeRecursive(zookeeper, path_prefix); - - auto promise = std::make_shared>(); - auto future = promise->get_future(); - auto create_callback = [promise] (const CreateResponse & response) - { - if (response.error != Coordination::Error::ZOK) - promise->set_exception(std::make_exception_ptr(zkutil::KeeperException(response.error))); - else - promise->set_value(); - }; - zookeeper.create(path_prefix, "", false, false, default_acls, create_callback); - future.get(); -} - -ZooKeeperRequestPtr SetRequestGenerator::generate() -{ - auto request = std::make_shared(); - request->path = path_prefix; - request->data = generateRandomData(data_size); - - return request; -} - -void MixedRequestGenerator::startup(Coordination::ZooKeeper & zookeeper) -{ - for (auto & generator : generators) - generator->startup(zookeeper); -} - -ZooKeeperRequestPtr MixedRequestGenerator::generate() -{ - pcg64 rng(randomSeed()); - std::uniform_int_distribution distribution(0, generators.size() - 1); - - return generators[distribution(rng)]->generate(); -} - -void GetRequestGenerator::startup(Coordination::ZooKeeper & zookeeper) -{ - auto promise = std::make_shared>(); - auto future = promise->get_future(); - auto create_callback = [promise] (const CreateResponse & response) - { - if (response.error != Coordination::Error::ZOK) - promise->set_exception(std::make_exception_ptr(zkutil::KeeperException(response.error))); - else - promise->set_value(); - }; - zookeeper.create(path_prefix, "", false, false, default_acls, create_callback); - future.get(); - size_t total_nodes = 1; - if (num_nodes) - total_nodes = *num_nodes; - - for (size_t i = 0; i < total_nodes; ++i) - { - auto path = generateRandomPath(path_prefix, 5); - while (std::find(paths_to_get.begin(), paths_to_get.end(), path) != paths_to_get.end()) - path = generateRandomPath(path_prefix, 5); - - auto create_promise = std::make_shared>(); - auto create_future = create_promise->get_future(); - auto callback = [create_promise] (const CreateResponse & response) - { - if (response.error != Coordination::Error::ZOK) - create_promise->set_exception(std::make_exception_ptr(zkutil::KeeperException(response.error))); - else - create_promise->set_value(); - }; - std::string data; - if (nodes_data_size) - data = generateRandomString(*nodes_data_size); - - zookeeper.create(path, data, false, false, default_acls, callback); - create_future.get(); - paths_to_get.push_back(path); - } -} - -Coordination::ZooKeeperRequestPtr GetRequestGenerator::generate() -{ - auto request = std::make_shared(); - - size_t path_index = distribution(rng); - request->path = paths_to_get[path_index]; - return request; -} - -void ListRequestGenerator::startup(Coordination::ZooKeeper & zookeeper) -{ - auto promise = std::make_shared>(); - auto future = promise->get_future(); - auto create_callback = [promise] (const CreateResponse & response) - { - if (response.error != Coordination::Error::ZOK) - promise->set_exception(std::make_exception_ptr(zkutil::KeeperException(response.error))); - else - promise->set_value(); - }; - zookeeper.create(path_prefix, "", false, false, default_acls, create_callback); - future.get(); - - size_t total_nodes = 1; - if (num_nodes) - total_nodes = *num_nodes; - - size_t path_length = 5; - if (paths_length) - path_length = *paths_length; - - for (size_t i = 0; i < total_nodes; ++i) - { - auto path = generateRandomPath(path_prefix, path_length); - - auto create_promise = std::make_shared>(); - auto create_future = create_promise->get_future(); - auto callback = [create_promise] (const CreateResponse & response) - { - if (response.error != Coordination::Error::ZOK) - create_promise->set_exception(std::make_exception_ptr(zkutil::KeeperException(response.error))); - else - create_promise->set_value(); - }; - zookeeper.create(path, "", false, false, default_acls, callback); - create_future.get(); - } -} - -Coordination::ZooKeeperRequestPtr ListRequestGenerator::generate() -{ - auto request = std::make_shared(); - request->path = path_prefix; - return request; -} - -std::unique_ptr getGenerator(const std::string & name) -{ - if (name == "create_no_data") - { - return std::make_unique(); - } - else if (name == "create_small_data") - { - return std::make_unique("/create_generator", 5, 32); - } - else if (name == "create_medium_data") - { - return std::make_unique("/create_generator", 5, 1024); - } - else if (name == "create_big_data") - { - return std::make_unique("/create_generator", 5, 512 * 1024); - } - else if (name == "get_no_data") - { - return std::make_unique("/get_generator", 10, 0); - } - else if (name == "get_small_data") - { - return std::make_unique("/get_generator", 10, 32); - } - else if (name == "get_medium_data") - { - return std::make_unique("/get_generator", 10, 1024); - } - else if (name == "get_big_data") - { - return std::make_unique("/get_generator", 10, 512 * 1024); - } - else if (name == "list_no_nodes") - { - return std::make_unique("/list_generator", 0, 1); - } - else if (name == "list_few_nodes") - { - return std::make_unique("/list_generator", 10, 5); - } - else if (name == "list_medium_nodes") - { - return std::make_unique("/list_generator", 1000, 5); - } - else if (name == "list_a_lot_nodes") - { - return std::make_unique("/list_generator", 100000, 5); - } - else if (name == "set_small_data") - { - return std::make_unique("/set_generator", 5); - } - else if (name == "mixed_small_data") - { - std::vector> generators; - generators.push_back(std::make_unique("/set_generator", 5)); - generators.push_back(std::make_unique("/get_generator", 10, 32)); - return std::make_unique(std::move(generators)); - } + //if (name == "create_no_data") + //{ + // return std::make_unique(); + //} + //else if (name == "create_small_data") + //{ + // return std::make_unique("/create_generator", 5, 32); + //} + //else if (name == "create_medium_data") + //{ + // return std::make_unique("/create_generator", 5, 1024); + //} + //else if (name == "create_big_data") + //{ + // return std::make_unique("/create_generator", 5, 512 * 1024); + //} + //else if (name == "get_no_data") + //{ + // return std::make_unique("/get_generator", 10, 0); + //} + //else if (name == "get_small_data") + //{ + // return std::make_unique("/get_generator", 10, 32); + //} + //else if (name == "get_medium_data") + //{ + // return std::make_unique("/get_generator", 10, 1024); + //} + //else if (name == "get_big_data") + //{ + // return std::make_unique("/get_generator", 10, 512 * 1024); + //} + //else if (name == "list_no_nodes") + //{ + // return std::make_unique("/list_generator", 0, 1); + //} + //else if (name == "list_few_nodes") + //{ + // return std::make_unique("/list_generator", 10, 5); + //} + //else if (name == "list_medium_nodes") + //{ + // return std::make_unique("/list_generator", 1000, 5); + //} + //else if (name == "list_a_lot_nodes") + //{ + // return std::make_unique("/list_generator", 100000, 5); + //} + //else if (name == "set_small_data") + //{ + // return std::make_unique("/set_generator", 5); + //} + //else if (name == "mixed_small_data") + //{ + // std::vector> generators; + // generators.push_back(std::make_unique("/set_generator", 5)); + // generators.push_back(std::make_unique("/get_generator", 10, 32)); + // return std::make_unique(std::move(generators)); + //} throw DB::Exception(DB::ErrorCodes::LOGICAL_ERROR, "Unknown generator {}", name); } -std::unique_ptr constructGeneratorFromConfig(const std::string & config_path) +NumberGetter +NumberGetter::fromConfig(const std::string & key, const Poco::Util::AbstractConfiguration & config, std::optional default_value) { - ConfigProcessor config_processor(config_path, true, false); - auto loaded_config = config_processor.loadConfig(); - return nullptr; + NumberGetter number_getter; + + if (!config.has(key) && default_value.has_value()) + { + number_getter.value = *default_value; + } + else if (config.has(key + ".min_value") && config.has(key + ".max_value")) + { + NumberRange range{.min_value = config.getUInt64(key + ".min_value"), .max_value = config.getUInt64(key + ".max_value")}; + if (range.max_value <= range.min_value) + throw DB::Exception(DB::ErrorCodes::BAD_ARGUMENTS, "Range is invalid for key {}: [{}, {}]", key, range.min_value, range.max_value); + number_getter.value = range; + } + else + { + number_getter.value = config.getUInt64(key); + } + + return number_getter; +} + +std::string NumberGetter::description() const +{ + if (const auto * number = std::get_if(&value)) + return std::to_string(*number); + + const auto & range = std::get(value); + return fmt::format("random value from range [{}, {}]", range.min_value, range.max_value); +} + +uint64_t NumberGetter::getNumber() const +{ + if (const auto * number = std::get_if(&value)) + return *number; + + const auto & range = std::get(value); + static pcg64 rng(randomSeed()); + return std::uniform_int_distribution(range.min_value, range.max_value)(rng); +} + +StringGetter StringGetter::fromConfig(const std::string & key, const Poco::Util::AbstractConfiguration & config) +{ + StringGetter string_getter; + if (config.has(key + ".random_string")) + string_getter.value + = NumberGetter::fromConfig(key + ".random_string.size", config); + else + string_getter.value = config.getString(key); + + return string_getter; +} + +void StringGetter::setString(std::string name) +{ + value = std::move(name); +} + +std::string StringGetter::getString() const +{ + if (const auto * string = std::get_if(&value)) + return *string; + + const auto number_getter = std::get(value); + return generateRandomString(number_getter.getNumber()); +} + +std::string StringGetter::description() const +{ + if (const auto * string = std::get_if(&value)) + return *string; + + const auto number_getter = std::get(value); + return fmt::format("random string with size of {}", number_getter.description()); +} + +bool StringGetter::isRandom() const +{ + return std::holds_alternative(value); +} + +void RequestGenerator::getFromConfig(const std::string & key, const Poco::Util::AbstractConfiguration & config) +{ + getFromConfigImpl(key, config); +} + +std::string RequestGenerator::description() +{ + return descriptionImpl(); +} + +Coordination::ZooKeeperRequestPtr RequestGenerator::generate(const Coordination::ACLs & acls) +{ + return generateImpl(acls); +} + +CreateRequestGenerator::CreateRequestGenerator() + : rng(randomSeed()) + , remove_picker(0, 1.0) +{} + +void CreateRequestGenerator::getFromConfigImpl(const std::string & key, const Poco::Util::AbstractConfiguration & config) +{ + path_prefix = config.getString(key + ".path_prefix"); + + if (path_prefix.empty() || path_prefix[0] != '/') + throw DB::Exception(DB::ErrorCodes::BAD_ARGUMENTS, "Invalid path_prefix for Create request generator: '{}'", path_prefix); + + name = StringGetter(NumberGetter::fromConfig(key + ".path_length", config, 5)); + + if (config.has(key + ".data")) + data = StringGetter::fromConfig(key + ".data", config); + + remove_factor = config.getDouble(key + ".remove_factor", 0.0); +} + +std::string CreateRequestGenerator::descriptionImpl() +{ + std::string data_string + = data.has_value() ? fmt::format("data for created nodes: {}", data->description()) : "no data for created nodes"; + return fmt::format( + "Create Request Generator\n" + "- path prefix for created nodes: {}\n" + "- name for created nodes: {}\n" + "- {}\n" + "- remove factor: {}", + path_prefix, + name.description(), + data_string, + remove_factor); +} + +Coordination::ZooKeeperRequestPtr CreateRequestGenerator::generateImpl(const Coordination::ACLs & acls) +{ + if (!paths_created.empty() && remove_picker(rng) < remove_factor) + { + auto request = std::make_shared(); + auto it = paths_created.begin(); + request->path = *it; + paths_created.erase(it); + return request; + } + + auto request = std::make_shared(); + request->acls = acls; + + std::string path_candidate = std::filesystem::path(path_prefix) / name.getString(); + + while (paths_created.contains(path_candidate)) + path_candidate = std::filesystem::path(path_prefix) / name.getString(); + + paths_created.insert(path_candidate); + + request->path = std::move(path_candidate); + + if (data) + request->data = data->getString(); + + return request; +} + +Generator::Generator(const Poco::Util::AbstractConfiguration & config) +{ + Coordination::ACL acl; + acl.permissions = Coordination::ACL::All; + acl.scheme = "world"; + acl.id = "anyone"; + default_acls.emplace_back(std::move(acl)); + + static const std::string generator_key = "generator"; + + { + static const std::string setup_key = generator_key + ".setup"; + Poco::Util::AbstractConfiguration::Keys keys; + config.keys(setup_key, keys); + for (const auto & key : keys) + { + if (key.starts_with("node")) + { + const auto & node = root_nodes.emplace_back(parseNode(setup_key + "." + key, config)); + + std::cout << "---- Will create tree ----" << std::endl; + node->dumpTree(); + } + } + } + { + static const std::string requests_key = generator_key + ".requests"; + Poco::Util::AbstractConfiguration::Keys keys; + config.keys(requests_key, keys); + + std::cout << "\n---- Collecting request generators ----" << std::endl; + for (const auto & key : keys) + { + RequestGeneratorPtr request_generator; + + if (key.starts_with("create")) + request_generator = std::make_unique(); + + if (!request_generator) + throw DB::Exception(DB::ErrorCodes::LOGICAL_ERROR, "Unknown generator {}", key); + + request_generator->getFromConfig(requests_key + "." + key, config); + + std::cout << fmt::format("\n{}\n", request_generator->description()) << std::endl; + request_generators.push_back(std::move(request_generator)); + } + + if (request_generators.empty()) + throw DB::Exception(DB::ErrorCodes::BAD_ARGUMENTS, "No request generators found in config"); + std::cout << "---- Done collecting request generators ----" << std::endl; + } + + request_picker = std::uniform_int_distribution(0, request_generators.size() - 1); +} + +std::shared_ptr Generator::parseNode(const std::string & key, const Poco::Util::AbstractConfiguration & config) +{ + auto node = std::make_shared(); + node->name = StringGetter::fromConfig(key + ".name", config); + + if (config.has(key + ".data")) + node->data = StringGetter::fromConfig(key + ".data", config); + + Poco::Util::AbstractConfiguration::Keys node_keys; + config.keys(key, node_keys); + + for (const auto & node_key : node_keys) + { + if (!node_key.starts_with("node")) + continue; + + const auto node_key_string = key + "." + node_key; + auto child_node = parseNode(node_key_string, config); + node->children.push_back(child_node); + + if (config.has(node_key_string + ".repeat")) + { + if (!child_node->name.isRandom()) + throw DB::Exception(DB::ErrorCodes::BAD_ARGUMENTS, "Repeating node creation for key {}, but name is not randomly generated", node_key_string); + + auto repeat_count = config.getUInt64(node_key_string + ".repeat"); + for (size_t i = 1; i < repeat_count; ++i) + node->children.push_back(child_node); + } + } + + return node; +} + +void Generator::Node::dumpTree(int level) const +{ + std::string data_string + = data.has_value() ? fmt::format("{}", data->description()) : "no data"; + std::cout << fmt::format("{}name: {}, data: {}", std::string(level, '\t'), name.description(), data_string) << std::endl; + + for (const auto & child : children) + child->dumpTree(level + 1); +} + +void Generator::Node::createNode(Coordination::ZooKeeper & zookeeper, const std::string & parent_path, const Coordination::ACLs & acls) const +{ + auto path = std::filesystem::path(parent_path) / name.getString(); + auto promise = std::make_shared>(); + auto future = promise->get_future(); + auto create_callback = [promise] (const CreateResponse & response) + { + if (response.error != Coordination::Error::ZOK) + promise->set_exception(std::make_exception_ptr(zkutil::KeeperException(response.error))); + else + promise->set_value(); + }; + zookeeper.create(path, data ? data->getString() : "", false, false, acls, create_callback); + future.get(); + + for (const auto & child : children) + child->createNode(zookeeper, path, acls); +} + +void Generator::startup(Coordination::ZooKeeper & zookeeper) +{ + std::cout << "\n---- Creating test data ----" << std::endl; + for (const auto & node : root_nodes) + { + auto node_name = node->name.getString(); + node->name.setString(node_name); + + std::string root_path = std::filesystem::path("/") / node_name; + std::cout << "Cleaning up " << root_path << std::endl; + removeRecursive(zookeeper, root_path); + + node->createNode(zookeeper, "/", default_acls); + } + std::cout << "---- Created test data ----" << std::endl; +} + +Coordination::ZooKeeperRequestPtr Generator::generate() +{ + static pcg64 rng(randomSeed()); + return request_generators[request_picker(rng)]->generate(default_acls); } diff --git a/utils/keeper-bench/Generator.h b/utils/keeper-bench/Generator.h index 25e4d96caef..123e4547120 100644 --- a/utils/keeper-bench/Generator.h +++ b/utils/keeper-bench/Generator.h @@ -6,6 +6,7 @@ #include #include #include +#include #include @@ -13,130 +14,203 @@ std::string generateRandomPath(const std::string & prefix, size_t length = 5); std::string generateRandomData(size_t size); -class IGenerator +// +//class CreateRequestGenerator final : public IGenerator +//{ +//public: +// explicit CreateRequestGenerator( +// std::string path_prefix_ = "/create_generator", +// std::optional path_length_ = std::nullopt, +// std::optional data_size_ = std::nullopt) +// : path_prefix(path_prefix_) +// , path_length(path_length_) +// , data_size(data_size_) +// {} +// +// void startup(Coordination::ZooKeeper & zookeeper) override; +// Coordination::ZooKeeperRequestPtr generate() override; +// +//private: +// std::string path_prefix; +// std::optional path_length; +// std::optional data_size; +// std::unordered_set paths_created; +//}; +// +// +//class GetRequestGenerator final : public IGenerator +//{ +//public: +// explicit GetRequestGenerator( +// std::string path_prefix_ = "/get_generator", +// std::optional num_nodes_ = std::nullopt, +// std::optional nodes_data_size_ = std::nullopt) +// : path_prefix(path_prefix_) +// , num_nodes(num_nodes_) +// , nodes_data_size(nodes_data_size_) +// , rng(randomSeed()) +// , distribution(0, num_nodes ? *num_nodes - 1 : 0) +// {} +// +// void startup(Coordination::ZooKeeper & zookeeper) override; +// Coordination::ZooKeeperRequestPtr generate() override; +// +//private: +// std::string path_prefix; +// std::optional num_nodes; +// std::optional nodes_data_size; +// std::vector paths_to_get; +// +// pcg64 rng; +// std::uniform_int_distribution distribution; +//}; +// +//class ListRequestGenerator final : public IGenerator +//{ +//public: +// explicit ListRequestGenerator( +// std::string path_prefix_ = "/list_generator", +// std::optional num_nodes_ = std::nullopt, +// std::optional paths_length_ = std::nullopt) +// : path_prefix(path_prefix_) +// , num_nodes(num_nodes_) +// , paths_length(paths_length_) +// {} +// +// void startup(Coordination::ZooKeeper & zookeeper) override; +// Coordination::ZooKeeperRequestPtr generate() override; +// +//private: +// std::string path_prefix; +// std::optional num_nodes; +// std::optional paths_length; +//}; +// +//class SetRequestGenerator final : public IGenerator +//{ +//public: +// explicit SetRequestGenerator( +// std::string path_prefix_ = "/set_generator", +// uint64_t data_size_ = 5) +// : path_prefix(path_prefix_) +// , data_size(data_size_) +// {} +// +// void startup(Coordination::ZooKeeper & zookeeper) override; +// Coordination::ZooKeeperRequestPtr generate() override; +// +//private: +// std::string path_prefix; +// uint64_t data_size; +//}; +// +//class MixedRequestGenerator final : public IGenerator +//{ +//public: +// explicit MixedRequestGenerator(std::vector> generators_) +// : generators(std::move(generators_)) +// {} +// +// void startup(Coordination::ZooKeeper & zookeeper) override; +// Coordination::ZooKeeperRequestPtr generate() override; +// +//private: +// std::vector> generators; +//}; + +struct NumberGetter { -public: - IGenerator() + static NumberGetter fromConfig(const std::string & key, const Poco::Util::AbstractConfiguration & config, std::optional default_value = std::nullopt); + uint64_t getNumber() const; + std::string description() const; +private: + struct NumberRange { - Coordination::ACL acl; - acl.permissions = Coordination::ACL::All; - acl.scheme = "world"; - acl.id = "anyone"; - default_acls.emplace_back(std::move(acl)); - } - virtual void startup(Coordination::ZooKeeper & /*zookeeper*/) {} - virtual Coordination::ZooKeeperRequestPtr generate() = 0; - - virtual ~IGenerator() = default; - - Coordination::ACLs default_acls; + uint64_t min_value; + uint64_t max_value; + }; + std::variant value; }; -class CreateRequestGenerator final : public IGenerator +struct StringGetter { -public: - explicit CreateRequestGenerator( - std::string path_prefix_ = "/create_generator", - std::optional path_length_ = std::nullopt, - std::optional data_size_ = std::nullopt) - : path_prefix(path_prefix_) - , path_length(path_length_) - , data_size(data_size_) + explicit StringGetter(NumberGetter number_getter) + : value(std::move(number_getter)) {} - void startup(Coordination::ZooKeeper & zookeeper) override; - Coordination::ZooKeeperRequestPtr generate() override; + StringGetter() = default; + static StringGetter fromConfig(const std::string & key, const Poco::Util::AbstractConfiguration & config); + void setString(std::string name); + std::string getString() const; + std::string description() const; + bool isRandom() const; private: + std::variant value; +}; + +struct RequestGenerator +{ + virtual ~RequestGenerator() = default; + + void getFromConfig(const std::string & key, const Poco::Util::AbstractConfiguration & config); + + Coordination::ZooKeeperRequestPtr generate(const Coordination::ACLs & acls); + + std::string description(); +private: + virtual void getFromConfigImpl(const std::string & key, const Poco::Util::AbstractConfiguration & config) = 0; + virtual std::string descriptionImpl() = 0; + virtual Coordination::ZooKeeperRequestPtr generateImpl(const Coordination::ACLs & acls) = 0; +}; + +using RequestGeneratorPtr = std::unique_ptr; + +struct CreateRequestGenerator final : public RequestGenerator +{ + CreateRequestGenerator(); +private: + void getFromConfigImpl(const std::string & key, const Poco::Util::AbstractConfiguration & config) override; + std::string descriptionImpl() override; + Coordination::ZooKeeperRequestPtr generateImpl(const Coordination::ACLs & acls) override; + std::string path_prefix; - std::optional path_length; - std::optional data_size; + StringGetter name; + std::optional data; + + double remove_factor; + pcg64 rng; + std::uniform_real_distribution remove_picker; + std::unordered_set paths_created; }; - -class GetRequestGenerator final : public IGenerator +class Generator { public: - explicit GetRequestGenerator( - std::string path_prefix_ = "/get_generator", - std::optional num_nodes_ = std::nullopt, - std::optional nodes_data_size_ = std::nullopt) - : path_prefix(path_prefix_) - , num_nodes(num_nodes_) - , nodes_data_size(nodes_data_size_) - , rng(randomSeed()) - , distribution(0, num_nodes ? *num_nodes - 1 : 0) - {} + explicit Generator(const Poco::Util::AbstractConfiguration & config); - void startup(Coordination::ZooKeeper & zookeeper) override; - Coordination::ZooKeeperRequestPtr generate() override; + void startup(Coordination::ZooKeeper & zookeeper); + Coordination::ZooKeeperRequestPtr generate(); private: - std::string path_prefix; - std::optional num_nodes; - std::optional nodes_data_size; - std::vector paths_to_get; + struct Node + { + StringGetter name; + std::optional data; + std::vector> children; - pcg64 rng; - std::uniform_int_distribution distribution; + void createNode(Coordination::ZooKeeper & zookeeper, const std::string & parent_path, const Coordination::ACLs & acls) const; + void dumpTree(int level = 0) const; + }; + + static std::shared_ptr parseNode(const std::string & key, const Poco::Util::AbstractConfiguration & config); + + std::uniform_int_distribution request_picker; + std::vector> root_nodes; + std::vector request_generators; + Coordination::ACLs default_acls; }; -class ListRequestGenerator final : public IGenerator -{ -public: - explicit ListRequestGenerator( - std::string path_prefix_ = "/list_generator", - std::optional num_nodes_ = std::nullopt, - std::optional paths_length_ = std::nullopt) - : path_prefix(path_prefix_) - , num_nodes(num_nodes_) - , paths_length(paths_length_) - {} - - void startup(Coordination::ZooKeeper & zookeeper) override; - Coordination::ZooKeeperRequestPtr generate() override; - -private: - std::string path_prefix; - std::optional num_nodes; - std::optional paths_length; -}; - -class SetRequestGenerator final : public IGenerator -{ -public: - explicit SetRequestGenerator( - std::string path_prefix_ = "/set_generator", - uint64_t data_size_ = 5) - : path_prefix(path_prefix_) - , data_size(data_size_) - {} - - void startup(Coordination::ZooKeeper & zookeeper) override; - Coordination::ZooKeeperRequestPtr generate() override; - -private: - std::string path_prefix; - uint64_t data_size; -}; - -class MixedRequestGenerator final : public IGenerator -{ -public: - explicit MixedRequestGenerator(std::vector> generators_) - : generators(std::move(generators_)) - {} - - void startup(Coordination::ZooKeeper & zookeeper) override; - Coordination::ZooKeeperRequestPtr generate() override; - -private: - std::vector> generators; -}; - - -std::unique_ptr constructGeneratorFromConfig(const std::string & config_path); - -std::unique_ptr getGenerator(const std::string & name); +std::unique_ptr getGenerator(const std::string & name); diff --git a/utils/keeper-bench/Runner.cpp b/utils/keeper-bench/Runner.cpp index c858b476483..387b7baebdd 100644 --- a/utils/keeper-bench/Runner.cpp +++ b/utils/keeper-bench/Runner.cpp @@ -1,15 +1,50 @@ #include "Runner.h" -namespace DB -{ +#include -namespace ErrorCodes +namespace DB::ErrorCodes { extern const int CANNOT_BLOCK_SIGNAL; } -} +Runner::Runner( + size_t concurrency_, + const std::string & generator_name, + const std::string & config_path, + const Strings & hosts_strings_, + double max_time_, + double delay_, + bool continue_on_error_, + size_t max_iterations_) + : concurrency(concurrency_) + , pool(CurrentMetrics::LocalThread, CurrentMetrics::LocalThreadActive, concurrency) + , hosts_strings(hosts_strings_) + , max_time(max_time_) + , delay(delay_) + , continue_on_error(continue_on_error_) + , max_iterations(max_iterations_) + , info(std::make_shared()) + , queue(concurrency) + { + if (!generator_name.empty() && !config_path.empty()) + throw DB::Exception(DB::ErrorCodes::BAD_ARGUMENTS, "Both generator name and generator config path are defined. Please define only one of them"); + if (generator_name.empty() && config_path.empty()) + throw DB::Exception(DB::ErrorCodes::BAD_ARGUMENTS, "Both generator name and generator config path are empty. Please define one of them"); + + if (!generator_name.empty()) + generator = getGenerator(generator_name); + else + { + DB::ConfigProcessor config_processor(config_path, true, false); + auto loaded_config = config_processor.loadConfig(); + + generator = std::make_unique(*loaded_config.configuration); + } + + if (!generator) + throw DB::Exception(DB::ErrorCodes::BAD_ARGUMENTS, "Failed to create generator"); + } void Runner::thread(std::vector> zookeepers) { diff --git a/utils/keeper-bench/Runner.h b/utils/keeper-bench/Runner.h index d3f2d1800d9..f8280ac5f37 100644 --- a/utils/keeper-bench/Runner.h +++ b/utils/keeper-bench/Runner.h @@ -2,6 +2,7 @@ #include #include "Generator.h" #include +#include #include #include #include @@ -38,28 +39,7 @@ public: double max_time_, double delay_, bool continue_on_error_, - size_t max_iterations_) - : concurrency(concurrency_) - , pool(CurrentMetrics::LocalThread, CurrentMetrics::LocalThreadActive, concurrency) - , hosts_strings(hosts_strings_) - , max_time(max_time_) - , delay(delay_) - , continue_on_error(continue_on_error_) - , max_iterations(max_iterations_) - , info(std::make_shared()) - , queue(concurrency) - { - if (!generator_name.empty() && !config_path.empty()) - throw DB::Exception(DB::ErrorCodes::BAD_ARGUMENTS, "Both generator name and generator config path are defined. Please define only one of them"); - - if (generator_name.empty() && config_path.empty()) - throw DB::Exception(DB::ErrorCodes::BAD_ARGUMENTS, "Both generator name and generator config path are empty. Please define one of them"); - - if (!generator_name.empty()) - generator = getGenerator(generator_name); - else - generator = constructGeneratorFromConfig(config_path); - } + size_t max_iterations_); void thread(std::vector> zookeepers); @@ -79,7 +59,7 @@ private: ThreadPool pool; Strings hosts_strings; - std::unique_ptr generator; + std::unique_ptr generator; double max_time = 0; double delay = 1; bool continue_on_error = false; From 7b33744618994151348c37cc8bacde9cf92c7bb5 Mon Sep 17 00:00:00 2001 From: Smita Kulkarni Date: Thu, 6 Apr 2023 20:11:40 +0200 Subject: [PATCH 03/67] Added a check to clear and not load marks asynchronously from outdated parts --- src/Storages/MergeTree/IDataPartStorage.h | 2 ++ src/Storages/MergeTree/MergeTreeData.cpp | 3 +++ src/Storages/MergeTree/MergeTreeMarksLoader.cpp | 15 +++++++++++++++ 3 files changed, 20 insertions(+) diff --git a/src/Storages/MergeTree/IDataPartStorage.h b/src/Storages/MergeTree/IDataPartStorage.h index f92784cb0da..98c14bd377c 100644 --- a/src/Storages/MergeTree/IDataPartStorage.h +++ b/src/Storages/MergeTree/IDataPartStorage.h @@ -284,6 +284,8 @@ public: /// It may be flush of buffered data or similar. virtual void precommitTransaction() = 0; virtual bool hasActiveTransaction() const = 0; + + mutable std::atomic is_part_outdated = false; }; using DataPartStoragePtr = std::shared_ptr; diff --git a/src/Storages/MergeTree/MergeTreeData.cpp b/src/Storages/MergeTree/MergeTreeData.cpp index 66c52e6e24c..057dba29ea8 100644 --- a/src/Storages/MergeTree/MergeTreeData.cpp +++ b/src/Storages/MergeTree/MergeTreeData.cpp @@ -3670,6 +3670,8 @@ void MergeTreeData::removePartsFromWorkingSet(MergeTreeTransaction * txn, const if (isInMemoryPart(part) && getSettings()->in_memory_parts_enable_wal) getWriteAheadLog()->dropPart(part->name); + + part->getDataPartStorage().is_part_outdated = true; } if (removed_active_part) @@ -3834,6 +3836,7 @@ void MergeTreeData::restoreAndActivatePart(const DataPartPtr & part, DataPartsLo addPartContributionToColumnAndSecondaryIndexSizes(part); addPartContributionToDataVolume(part); modifyPartState(part, DataPartState::Active); + part->getDataPartStorage().is_part_outdated = false; } diff --git a/src/Storages/MergeTree/MergeTreeMarksLoader.cpp b/src/Storages/MergeTree/MergeTreeMarksLoader.cpp index ed8866b0044..b870421993f 100644 --- a/src/Storages/MergeTree/MergeTreeMarksLoader.cpp +++ b/src/Storages/MergeTree/MergeTreeMarksLoader.cpp @@ -68,6 +68,11 @@ MarkInCompressedFile MergeTreeMarksLoader::getMark(size_t row_index, size_t colu { if (!marks) { + if (this->data_part_storage->is_part_outdated) + { + throw Exception(ErrorCodes::LOGICAL_ERROR, "Attempting to read from outdated part. path : {}", data_part_storage->getFullPath()); + } + Stopwatch watch(CLOCK_MONOTONIC); if (future.valid()) @@ -196,6 +201,16 @@ std::future MergeTreeMarksLoader::loadMarksAsync() [this]() -> MarkCache::MappedPtr { ProfileEvents::increment(ProfileEvents::BackgroundLoadingMarksTasks); + if (this->data_part_storage->is_part_outdated) + { + if (mark_cache) + { + auto key = mark_cache->hash(fs::path(data_part_storage->getFullPath()) / mrk_path); + marks.reset(); + mark_cache->remove(key); + } + return nullptr; + } return loadMarks(); }, *load_marks_threadpool, From 994b6dd71ce1396b9fd684c06ba0cf1f78c2be9e Mon Sep 17 00:00:00 2001 From: Antonio Andelic Date: Thu, 6 Apr 2023 19:17:35 +0000 Subject: [PATCH 04/67] Add other requests --- utils/keeper-bench/Generator.cpp | 367 +++++++++++++++++-------------- utils/keeper-bench/Generator.h | 58 ++++- 2 files changed, 258 insertions(+), 167 deletions(-) diff --git a/utils/keeper-bench/Generator.cpp b/utils/keeper-bench/Generator.cpp index e2c276a274d..eba6b7d9747 100644 --- a/utils/keeper-bench/Generator.cpp +++ b/utils/keeper-bench/Generator.cpp @@ -3,6 +3,7 @@ #include #include #include +#include using namespace Coordination; using namespace zkutil; @@ -36,16 +37,6 @@ std::string generateRandomString(size_t length) return s; } } -// -//std::string generateRandomPath(const std::string & prefix, size_t length) -//{ -// return std::filesystem::path(prefix) / generateRandomString(length); -//} -// -//std::string generateRandomData(size_t size) -//{ -// return generateRandomString(size); -//} void removeRecursive(Coordination::ZooKeeper & zookeeper, const std::string & path) { @@ -95,145 +86,6 @@ void removeRecursive(Coordination::ZooKeeper & zookeeper, const std::string & pa remove_future.get(); } - -//void SetRequestGenerator::startup(Coordination::ZooKeeper & zookeeper) -//{ -// removeRecursive(zookeeper, path_prefix); -// -// auto promise = std::make_shared>(); -// auto future = promise->get_future(); -// auto create_callback = [promise] (const CreateResponse & response) -// { -// if (response.error != Coordination::Error::ZOK) -// promise->set_exception(std::make_exception_ptr(zkutil::KeeperException(response.error))); -// else -// promise->set_value(); -// }; -// zookeeper.create(path_prefix, "", false, false, default_acls, create_callback); -// future.get(); -//} -// -//ZooKeeperRequestPtr SetRequestGenerator::generate() -//{ -// auto request = std::make_shared(); -// request->path = path_prefix; -// request->data = generateRandomData(data_size); -// -// return request; -//} -// -//void MixedRequestGenerator::startup(Coordination::ZooKeeper & zookeeper) -//{ -// for (auto & generator : generators) -// generator->startup(zookeeper); -//} -// -//ZooKeeperRequestPtr MixedRequestGenerator::generate() -//{ -// pcg64 rng(randomSeed()); -// std::uniform_int_distribution distribution(0, generators.size() - 1); -// -// return generators[distribution(rng)]->generate(); -//} -// -//void GetRequestGenerator::startup(Coordination::ZooKeeper & zookeeper) -//{ -// auto promise = std::make_shared>(); -// auto future = promise->get_future(); -// auto create_callback = [promise] (const CreateResponse & response) -// { -// if (response.error != Coordination::Error::ZOK) -// promise->set_exception(std::make_exception_ptr(zkutil::KeeperException(response.error))); -// else -// promise->set_value(); -// }; -// zookeeper.create(path_prefix, "", false, false, default_acls, create_callback); -// future.get(); -// size_t total_nodes = 1; -// if (num_nodes) -// total_nodes = *num_nodes; -// -// for (size_t i = 0; i < total_nodes; ++i) -// { -// auto path = generateRandomPath(path_prefix, 5); -// while (std::find(paths_to_get.begin(), paths_to_get.end(), path) != paths_to_get.end()) -// path = generateRandomPath(path_prefix, 5); -// -// auto create_promise = std::make_shared>(); -// auto create_future = create_promise->get_future(); -// auto callback = [create_promise] (const CreateResponse & response) -// { -// if (response.error != Coordination::Error::ZOK) -// create_promise->set_exception(std::make_exception_ptr(zkutil::KeeperException(response.error))); -// else -// create_promise->set_value(); -// }; -// std::string data; -// if (nodes_data_size) -// data = generateRandomString(*nodes_data_size); -// -// zookeeper.create(path, data, false, false, default_acls, callback); -// create_future.get(); -// paths_to_get.push_back(path); -// } -//} -// -//Coordination::ZooKeeperRequestPtr GetRequestGenerator::generate() -//{ -// auto request = std::make_shared(); -// -// size_t path_index = distribution(rng); -// request->path = paths_to_get[path_index]; -// return request; -//} -// -//void ListRequestGenerator::startup(Coordination::ZooKeeper & zookeeper) -//{ -// auto promise = std::make_shared>(); -// auto future = promise->get_future(); -// auto create_callback = [promise] (const CreateResponse & response) -// { -// if (response.error != Coordination::Error::ZOK) -// promise->set_exception(std::make_exception_ptr(zkutil::KeeperException(response.error))); -// else -// promise->set_value(); -// }; -// zookeeper.create(path_prefix, "", false, false, default_acls, create_callback); -// future.get(); -// -// size_t total_nodes = 1; -// if (num_nodes) -// total_nodes = *num_nodes; -// -// size_t path_length = 5; -// if (paths_length) -// path_length = *paths_length; -// -// for (size_t i = 0; i < total_nodes; ++i) -// { -// auto path = generateRandomPath(path_prefix, path_length); -// -// auto create_promise = std::make_shared>(); -// auto create_future = create_promise->get_future(); -// auto callback = [create_promise] (const CreateResponse & response) -// { -// if (response.error != Coordination::Error::ZOK) -// create_promise->set_exception(std::make_exception_ptr(zkutil::KeeperException(response.error))); -// else -// create_promise->set_value(); -// }; -// zookeeper.create(path, "", false, false, default_acls, callback); -// create_future.get(); -// } -//} -// -//Coordination::ZooKeeperRequestPtr ListRequestGenerator::generate() -//{ -// auto request = std::make_shared(); -// request->path = path_prefix; -// return request; -//} - std::unique_ptr getGenerator(const std::string & name) { //if (name == "create_no_data") @@ -288,13 +140,6 @@ std::unique_ptr getGenerator(const std::string & name) //{ // return std::make_unique("/set_generator", 5); //} - //else if (name == "mixed_small_data") - //{ - // std::vector> generators; - // generators.push_back(std::make_unique("/set_generator", 5)); - // generators.push_back(std::make_unique("/get_generator", 10, 32)); - // return std::make_unique(std::move(generators)); - //} throw DB::Exception(DB::ErrorCodes::LOGICAL_ERROR, "Unknown generator {}", name); } @@ -382,6 +227,99 @@ bool StringGetter::isRandom() const return std::holds_alternative(value); } +PathGetter PathGetter::fromConfig(const std::string & key, const Poco::Util::AbstractConfiguration & config) +{ + static constexpr std::string_view path_key_string = "path"; + + PathGetter path_getter; + Poco::Util::AbstractConfiguration::Keys path_keys; + config.keys(key, path_keys); + + for (const auto & path_key : path_keys) + { + if (!path_key.starts_with(path_key_string)) + continue; + + const auto current_path_key_string = key + "." + path_key; + const auto children_of_key = current_path_key_string + ".children_of"; + if (config.has(children_of_key)) + { + auto parent_node = config.getString(children_of_key); + if (parent_node.empty() || parent_node[0] != '/') + throw DB::Exception(DB::ErrorCodes::BAD_ARGUMENTS, "Invalid path for request generator: '{}'", parent_node); + path_getter.parent_paths.push_back(std::move(parent_node)); + } + else + { + auto path = config.getString(key + "." + path_key); + + if (path.empty() || path[0] != '/') + throw DB::Exception(DB::ErrorCodes::BAD_ARGUMENTS, "Invalid path for request generator: '{}'", path); + + path_getter.paths.push_back(std::move(path)); + } + } + + path_getter.path_picker = std::uniform_int_distribution(0, path_getter.paths.size() - 1); + return path_getter; +} + +void PathGetter::initialize(Coordination::ZooKeeper & zookeeper) +{ + for (const auto & parent_path : parent_paths) + { + auto list_promise = std::make_shared>(); + auto list_future = list_promise->get_future(); + auto callback = [list_promise] (const ListResponse & response) + { + if (response.error != Coordination::Error::ZOK) + list_promise->set_exception(std::make_exception_ptr(zkutil::KeeperException(response.error))); + else + list_promise->set_value(response); + }; + zookeeper.list(parent_path, ListRequestType::ALL, std::move(callback), {}); + auto list_response = list_future.get(); + + for (const auto & child : list_response.names) + paths.push_back(std::filesystem::path(parent_path) / child); + } + + path_picker = std::uniform_int_distribution(0, paths.size() - 1); + initialized = true; +} + +std::string PathGetter::getPath() const +{ + if (!initialized) + throw DB::Exception(DB::ErrorCodes::LOGICAL_ERROR, "PathGetter is not initialized"); + + if (paths.size() == 1) + return paths[0]; + + static pcg64 rng(randomSeed()); + return paths[path_picker(rng)]; +} + +std::string PathGetter::description() const +{ + std::string description; + for (const auto & path : parent_paths) + { + if (!description.empty()) + description += ", "; + description += fmt::format("children of {}", path); + } + + for (const auto & path : paths) + { + if (!description.empty()) + description += ", "; + description += path; + } + + return description; +} + void RequestGenerator::getFromConfig(const std::string & key, const Poco::Util::AbstractConfiguration & config) { getFromConfigImpl(key, config); @@ -397,6 +335,11 @@ Coordination::ZooKeeperRequestPtr RequestGenerator::generate(const Coordination: return generateImpl(acls); } +void RequestGenerator::startup(Coordination::ZooKeeper & zookeeper) +{ + startupImpl(zookeeper); +} + CreateRequestGenerator::CreateRequestGenerator() : rng(randomSeed()) , remove_picker(0, 1.0) @@ -404,12 +347,9 @@ CreateRequestGenerator::CreateRequestGenerator() void CreateRequestGenerator::getFromConfigImpl(const std::string & key, const Poco::Util::AbstractConfiguration & config) { - path_prefix = config.getString(key + ".path_prefix"); + parent_path = PathGetter::fromConfig(key, config); - if (path_prefix.empty() || path_prefix[0] != '/') - throw DB::Exception(DB::ErrorCodes::BAD_ARGUMENTS, "Invalid path_prefix for Create request generator: '{}'", path_prefix); - - name = StringGetter(NumberGetter::fromConfig(key + ".path_length", config, 5)); + name = StringGetter(NumberGetter::fromConfig(key + ".name_length", config, 5)); if (config.has(key + ".data")) data = StringGetter::fromConfig(key + ".data", config); @@ -423,16 +363,21 @@ std::string CreateRequestGenerator::descriptionImpl() = data.has_value() ? fmt::format("data for created nodes: {}", data->description()) : "no data for created nodes"; return fmt::format( "Create Request Generator\n" - "- path prefix for created nodes: {}\n" + "- parent path(s) for created nodes: {}\n" "- name for created nodes: {}\n" "- {}\n" "- remove factor: {}", - path_prefix, + parent_path.description(), name.description(), data_string, remove_factor); } +void CreateRequestGenerator::startupImpl(Coordination::ZooKeeper & zookeeper) +{ + parent_path.initialize(zookeeper); +} + Coordination::ZooKeeperRequestPtr CreateRequestGenerator::generateImpl(const Coordination::ACLs & acls) { if (!paths_created.empty() && remove_picker(rng) < remove_factor) @@ -447,10 +392,10 @@ Coordination::ZooKeeperRequestPtr CreateRequestGenerator::generateImpl(const Coo auto request = std::make_shared(); request->acls = acls; - std::string path_candidate = std::filesystem::path(path_prefix) / name.getString(); + std::string path_candidate = std::filesystem::path(parent_path.getPath()) / name.getString(); while (paths_created.contains(path_candidate)) - path_candidate = std::filesystem::path(path_prefix) / name.getString(); + path_candidate = std::filesystem::path(parent_path.getPath()) / name.getString(); paths_created.insert(path_candidate); @@ -462,6 +407,86 @@ Coordination::ZooKeeperRequestPtr CreateRequestGenerator::generateImpl(const Coo return request; } +void SetRequestGenerator::getFromConfigImpl(const std::string & key, const Poco::Util::AbstractConfiguration & config) +{ + path = PathGetter::fromConfig(key, config); + + data = StringGetter::fromConfig(key + ".data", config); +} + +std::string SetRequestGenerator::descriptionImpl() +{ + return fmt::format( + "Set Request Generator\n" + "- path(s) to set: {}\n" + "- data to set: {}", + path.description(), + data.description()); +} + +Coordination::ZooKeeperRequestPtr SetRequestGenerator::generateImpl(const Coordination::ACLs & /*acls*/) +{ + auto request = std::make_shared(); + request->path = path.getPath(); + request->data = data.getString(); + return request; +} + +void SetRequestGenerator::startupImpl(Coordination::ZooKeeper & zookeeper) +{ + path.initialize(zookeeper); +} + +void GetRequestGenerator::getFromConfigImpl(const std::string & key, const Poco::Util::AbstractConfiguration & config) +{ + path = PathGetter::fromConfig(key, config); +} + +std::string GetRequestGenerator::descriptionImpl() +{ + return fmt::format( + "Get Request Generator\n" + "- path(s) to get: {}", + path.description()); +} + +Coordination::ZooKeeperRequestPtr GetRequestGenerator::generateImpl(const Coordination::ACLs & /*acls*/) +{ + auto request = std::make_shared(); + request->path = path.getPath(); + return request; +} + +void GetRequestGenerator::startupImpl(Coordination::ZooKeeper & zookeeper) +{ + path.initialize(zookeeper); +} + +void ListRequestGenerator::getFromConfigImpl(const std::string & key, const Poco::Util::AbstractConfiguration & config) +{ + path = PathGetter::fromConfig(key, config); +} + +std::string ListRequestGenerator::descriptionImpl() +{ + return fmt::format( + "List Request Generator\n" + "- path(s) to get: {}", + path.description()); +} + +Coordination::ZooKeeperRequestPtr ListRequestGenerator::generateImpl(const Coordination::ACLs & /*acls*/) +{ + auto request = std::make_shared(); + request->path = path.getPath(); + return request; +} + +void ListRequestGenerator::startupImpl(Coordination::ZooKeeper & zookeeper) +{ + path.initialize(zookeeper); +} + Generator::Generator(const Poco::Util::AbstractConfiguration & config) { Coordination::ACL acl; @@ -499,6 +524,12 @@ Generator::Generator(const Poco::Util::AbstractConfiguration & config) if (key.starts_with("create")) request_generator = std::make_unique(); + else if (key.starts_with("set")) + request_generator = std::make_unique(); + else if (key.starts_with("get")) + request_generator = std::make_unique(); + else if (key.starts_with("list")) + request_generator = std::make_unique(); if (!request_generator) throw DB::Exception(DB::ErrorCodes::LOGICAL_ERROR, "Unknown generator {}", key); @@ -595,6 +626,12 @@ void Generator::startup(Coordination::ZooKeeper & zookeeper) node->createNode(zookeeper, "/", default_acls); } std::cout << "---- Created test data ----" << std::endl; + + + std::cout << "---- Initializing generators ----" << std::endl; + + for (const auto & generator : request_generators) + generator->startup(zookeeper); } Coordination::ZooKeeperRequestPtr Generator::generate() diff --git a/utils/keeper-bench/Generator.h b/utils/keeper-bench/Generator.h index 123e4547120..af186ea6624 100644 --- a/utils/keeper-bench/Generator.h +++ b/utils/keeper-bench/Generator.h @@ -150,6 +150,23 @@ private: std::variant value; }; +struct PathGetter +{ + static PathGetter fromConfig(const std::string & key, const Poco::Util::AbstractConfiguration & config); + + std::string getPath() const; + std::string description() const; + + void initialize(Coordination::ZooKeeper & zookeeper); +private: + std::vector parent_paths; + + bool initialized = false; + + std::vector paths; + mutable std::uniform_int_distribution path_picker; +}; + struct RequestGenerator { virtual ~RequestGenerator() = default; @@ -159,10 +176,13 @@ struct RequestGenerator Coordination::ZooKeeperRequestPtr generate(const Coordination::ACLs & acls); std::string description(); + + void startup(Coordination::ZooKeeper & zookeeper); private: virtual void getFromConfigImpl(const std::string & key, const Poco::Util::AbstractConfiguration & config) = 0; virtual std::string descriptionImpl() = 0; virtual Coordination::ZooKeeperRequestPtr generateImpl(const Coordination::ACLs & acls) = 0; + virtual void startupImpl(Coordination::ZooKeeper &) {} }; using RequestGeneratorPtr = std::unique_ptr; @@ -174,8 +194,9 @@ private: void getFromConfigImpl(const std::string & key, const Poco::Util::AbstractConfiguration & config) override; std::string descriptionImpl() override; Coordination::ZooKeeperRequestPtr generateImpl(const Coordination::ACLs & acls) override; + void startupImpl(Coordination::ZooKeeper & zookeeper) override; - std::string path_prefix; + PathGetter parent_path; StringGetter name; std::optional data; @@ -186,6 +207,40 @@ private: std::unordered_set paths_created; }; +struct SetRequestGenerator final : public RequestGenerator +{ +private: + void getFromConfigImpl(const std::string & key, const Poco::Util::AbstractConfiguration & config) override; + std::string descriptionImpl() override; + Coordination::ZooKeeperRequestPtr generateImpl(const Coordination::ACLs & acls) override; + void startupImpl(Coordination::ZooKeeper & zookeeper) override; + + PathGetter path; + StringGetter data; +}; + +struct GetRequestGenerator final : public RequestGenerator +{ +private: + void getFromConfigImpl(const std::string & key, const Poco::Util::AbstractConfiguration & config) override; + std::string descriptionImpl() override; + Coordination::ZooKeeperRequestPtr generateImpl(const Coordination::ACLs & acls) override; + void startupImpl(Coordination::ZooKeeper & zookeeper) override; + + PathGetter path; +}; + +struct ListRequestGenerator final : public RequestGenerator +{ +private: + void getFromConfigImpl(const std::string & key, const Poco::Util::AbstractConfiguration & config) override; + std::string descriptionImpl() override; + Coordination::ZooKeeperRequestPtr generateImpl(const Coordination::ACLs & acls) override; + void startupImpl(Coordination::ZooKeeper & zookeeper) override; + + PathGetter path; +}; + class Generator { public: @@ -193,7 +248,6 @@ public: void startup(Coordination::ZooKeeper & zookeeper); Coordination::ZooKeeperRequestPtr generate(); - private: struct Node { From 22a4d7d1e1c5d69542aae79fbd6b63f6ae1b2759 Mon Sep 17 00:00:00 2001 From: Antonio Andelic Date: Fri, 7 Apr 2023 13:02:42 +0000 Subject: [PATCH 05/67] Support connection definition --- utils/keeper-bench/Generator.cpp | 223 ++++++++++++++++++++++++------- utils/keeper-bench/Generator.h | 34 ++++- utils/keeper-bench/Runner.cpp | 186 +++++++++++++++++++++----- utils/keeper-bench/Runner.h | 26 +++- utils/keeper-bench/main.cpp | 16 +-- 5 files changed, 390 insertions(+), 95 deletions(-) diff --git a/utils/keeper-bench/Generator.cpp b/utils/keeper-bench/Generator.cpp index eba6b7d9747..b050fe6c639 100644 --- a/utils/keeper-bench/Generator.cpp +++ b/utils/keeper-bench/Generator.cpp @@ -1,5 +1,6 @@ #include "Generator.h" #include "Common/Exception.h" +#include "Common/ZooKeeper/ZooKeeperCommon.h" #include #include #include @@ -320,14 +321,122 @@ std::string PathGetter::description() const return description; } +RequestGetter RequestGetter::fromConfig(const std::string & key, const Poco::Util::AbstractConfiguration & config, bool for_multi) +{ + RequestGetter request_getter; + + Poco::Util::AbstractConfiguration::Keys generator_keys; + config.keys(key, generator_keys); + + bool use_weights = false; + size_t weight_sum = 0; + auto & generators = request_getter.request_generators; + for (const auto & generator_key : generator_keys) + { + RequestGeneratorPtr request_generator; + + if (generator_key.starts_with("create")) + request_generator = std::make_unique(); + else if (generator_key.starts_with("set")) + request_generator = std::make_unique(); + else if (generator_key.starts_with("get")) + request_generator = std::make_unique(); + else if (generator_key.starts_with("list")) + request_generator = std::make_unique(); + else if (generator_key.starts_with("multi")) + { + if (for_multi) + throw DB::Exception(DB::ErrorCodes::BAD_ARGUMENTS, "Nested multi requests are not allowed"); + request_generator = std::make_unique(); + } + else + { + if (for_multi) + continue; + + throw DB::Exception(DB::ErrorCodes::BAD_ARGUMENTS, "Unknown generator {}", key + "." + generator_key); + } + + request_generator->getFromConfig(key + "." + generator_key, config); + + auto weight = request_generator->getWeight(); + use_weights |= weight != 1; + weight_sum += weight; + + generators.push_back(std::move(request_generator)); + } + + if (generators.empty()) + throw DB::Exception(DB::ErrorCodes::BAD_ARGUMENTS, "No request generators found in config for key '{}'", key); + + + size_t max_value = use_weights ? weight_sum - 1 : generators.size() - 1; + request_getter.request_generator_picker = std::uniform_int_distribution(0, max_value); + + /// construct weight vector + if (use_weights) + { + auto & weights = request_getter.weights; + weights.reserve(generators.size()); + weights.push_back(generators[0]->getWeight() - 1); + + for (size_t i = 1; i < generators.size(); ++i) + weights.push_back(weights.back() + generators[i]->getWeight()); + } + + return request_getter; +} + +RequestGeneratorPtr RequestGetter::getRequestGenerator() const +{ + static pcg64 rng(randomSeed()); + + auto random_number = request_generator_picker(rng); + + if (weights.empty()) + return request_generators[random_number]; + + for (size_t i = 0; i < request_generators.size(); ++i) + { + if (random_number <= weights[i]) + return request_generators[i]; + } + + throw DB::Exception(DB::ErrorCodes::LOGICAL_ERROR, "Invalid number generated: {}", random_number); +} + +std::string RequestGetter::description() const +{ + std::string guard(30, '-'); + std::string description = guard; + + for (const auto & request_generator : request_generators) + description += fmt::format("\n{}\n", request_generator->description()); + return description + guard; +} + +void RequestGetter::startup(Coordination::ZooKeeper & zookeeper) +{ + for (const auto & request_generator : request_generators) + request_generator->startup(zookeeper); +} + +const std::vector & RequestGetter::requestGenerators() const +{ + return request_generators; +} + void RequestGenerator::getFromConfig(const std::string & key, const Poco::Util::AbstractConfiguration & config) { + if (config.has(key + ".weight")) + weight = config.getUInt64(key + ".weight"); getFromConfigImpl(key, config); } std::string RequestGenerator::description() { - return descriptionImpl(); + std::string weight_string = weight == 1 ? "" : fmt::format("\n- weight: {}", weight); + return fmt::format("{}{}", descriptionImpl(), weight_string); } Coordination::ZooKeeperRequestPtr RequestGenerator::generate(const Coordination::ACLs & acls) @@ -340,6 +449,11 @@ void RequestGenerator::startup(Coordination::ZooKeeper & zookeeper) startupImpl(zookeeper); } +size_t RequestGenerator::getWeight() const +{ + return weight; +} + CreateRequestGenerator::CreateRequestGenerator() : rng(randomSeed()) , remove_picker(0, 1.0) @@ -487,6 +601,50 @@ void ListRequestGenerator::startupImpl(Coordination::ZooKeeper & zookeeper) path.initialize(zookeeper); } +void MultiRequestGenerator::getFromConfigImpl(const std::string & key, const Poco::Util::AbstractConfiguration & config) +{ + if (config.has(key + ".size")) + size = NumberGetter::fromConfig(key + ".size", config); + + request_getter = RequestGetter::fromConfig(key, config, /*for_multi*/ true); +}; + +std::string MultiRequestGenerator::descriptionImpl() +{ + std::string size_string = size.has_value() ? fmt::format("- number of requests: {}\n", size->description()) : ""; + return fmt::format( + "Multi Request Generator\n" + "{}" + "- requests:\n{}", + size_string, + request_getter.description()); +} + +Coordination::ZooKeeperRequestPtr MultiRequestGenerator::generateImpl(const Coordination::ACLs & acls) +{ + Coordination::Requests ops; + + if (size) + { + auto request_count = size->getNumber(); + + for (size_t i = 0; i < request_count; ++i) + ops.push_back(request_getter.getRequestGenerator()->generate(acls)); + } + else + { + for (const auto & request_generator : request_getter.requestGenerators()) + ops.push_back(request_generator->generate(acls)); + } + + return std::make_shared(ops, acls); +} + +void MultiRequestGenerator::startupImpl(Coordination::ZooKeeper & zookeeper) +{ + request_getter.startup(zookeeper); +} + Generator::Generator(const Poco::Util::AbstractConfiguration & config) { Coordination::ACL acl; @@ -497,55 +655,25 @@ Generator::Generator(const Poco::Util::AbstractConfiguration & config) static const std::string generator_key = "generator"; + static const std::string setup_key = generator_key + ".setup"; + Poco::Util::AbstractConfiguration::Keys keys; + config.keys(setup_key, keys); + for (const auto & key : keys) { - static const std::string setup_key = generator_key + ".setup"; - Poco::Util::AbstractConfiguration::Keys keys; - config.keys(setup_key, keys); - for (const auto & key : keys) + if (key.starts_with("node")) { - if (key.starts_with("node")) - { - const auto & node = root_nodes.emplace_back(parseNode(setup_key + "." + key, config)); + const auto & node = root_nodes.emplace_back(parseNode(setup_key + "." + key, config)); - std::cout << "---- Will create tree ----" << std::endl; - node->dumpTree(); - } + std::cout << "---- Will create tree ----" << std::endl; + node->dumpTree(); } } - { - static const std::string requests_key = generator_key + ".requests"; - Poco::Util::AbstractConfiguration::Keys keys; - config.keys(requests_key, keys); - std::cout << "\n---- Collecting request generators ----" << std::endl; - for (const auto & key : keys) - { - RequestGeneratorPtr request_generator; - - if (key.starts_with("create")) - request_generator = std::make_unique(); - else if (key.starts_with("set")) - request_generator = std::make_unique(); - else if (key.starts_with("get")) - request_generator = std::make_unique(); - else if (key.starts_with("list")) - request_generator = std::make_unique(); - - if (!request_generator) - throw DB::Exception(DB::ErrorCodes::LOGICAL_ERROR, "Unknown generator {}", key); - - request_generator->getFromConfig(requests_key + "." + key, config); - - std::cout << fmt::format("\n{}\n", request_generator->description()) << std::endl; - request_generators.push_back(std::move(request_generator)); - } - - if (request_generators.empty()) - throw DB::Exception(DB::ErrorCodes::BAD_ARGUMENTS, "No request generators found in config"); - std::cout << "---- Done collecting request generators ----" << std::endl; - } - - request_picker = std::uniform_int_distribution(0, request_generators.size() - 1); + std::cout << "\n---- Collecting request generators ----" << std::endl; + static const std::string requests_key = generator_key + ".requests"; + request_getter = RequestGetter::fromConfig(requests_key, config); + std::cout << request_getter.description() << std::endl; + std::cout << "---- Done collecting request generators ----" << std::endl; } std::shared_ptr Generator::parseNode(const std::string & key, const Poco::Util::AbstractConfiguration & config) @@ -627,15 +755,12 @@ void Generator::startup(Coordination::ZooKeeper & zookeeper) } std::cout << "---- Created test data ----" << std::endl; - std::cout << "---- Initializing generators ----" << std::endl; - for (const auto & generator : request_generators) - generator->startup(zookeeper); + request_getter.startup(zookeeper); } Coordination::ZooKeeperRequestPtr Generator::generate() { - static pcg64 rng(randomSeed()); - return request_generators[request_picker(rng)]->generate(default_acls); + return request_getter.getRequestGenerator()->generate(default_acls); } diff --git a/utils/keeper-bench/Generator.h b/utils/keeper-bench/Generator.h index af186ea6624..d2925e8650a 100644 --- a/utils/keeper-bench/Generator.h +++ b/utils/keeper-bench/Generator.h @@ -178,14 +178,18 @@ struct RequestGenerator std::string description(); void startup(Coordination::ZooKeeper & zookeeper); + + size_t getWeight() const; private: virtual void getFromConfigImpl(const std::string & key, const Poco::Util::AbstractConfiguration & config) = 0; virtual std::string descriptionImpl() = 0; virtual Coordination::ZooKeeperRequestPtr generateImpl(const Coordination::ACLs & acls) = 0; virtual void startupImpl(Coordination::ZooKeeper &) {} + + size_t weight = 1; }; -using RequestGeneratorPtr = std::unique_ptr; +using RequestGeneratorPtr = std::shared_ptr; struct CreateRequestGenerator final : public RequestGenerator { @@ -241,6 +245,32 @@ private: PathGetter path; }; +struct RequestGetter +{ + static RequestGetter fromConfig(const std::string & key, const Poco::Util::AbstractConfiguration & config, bool for_multi = false); + + RequestGeneratorPtr getRequestGenerator() const; + std::string description() const; + void startup(Coordination::ZooKeeper & zookeeper); + const std::vector & requestGenerators() const; +private: + std::vector request_generators; + std::vector weights; + mutable std::uniform_int_distribution request_generator_picker; +}; + +struct MultiRequestGenerator final : public RequestGenerator +{ +private: + void getFromConfigImpl(const std::string & key, const Poco::Util::AbstractConfiguration & config) override; + std::string descriptionImpl() override; + Coordination::ZooKeeperRequestPtr generateImpl(const Coordination::ACLs & acls) override; + void startupImpl(Coordination::ZooKeeper & zookeeper) override; + + std::optional size; + RequestGetter request_getter; +}; + class Generator { public: @@ -263,7 +293,7 @@ private: std::uniform_int_distribution request_picker; std::vector> root_nodes; - std::vector request_generators; + RequestGetter request_getter; Coordination::ACLs default_acls; }; diff --git a/utils/keeper-bench/Runner.cpp b/utils/keeper-bench/Runner.cpp index 387b7baebdd..3076bf42558 100644 --- a/utils/keeper-bench/Runner.cpp +++ b/utils/keeper-bench/Runner.cpp @@ -1,5 +1,8 @@ #include "Runner.h" +#include +#include "Common/ZooKeeper/ZooKeeperCommon.h" +#include "Common/ZooKeeper/ZooKeeperConstants.h" #include namespace DB::ErrorCodes @@ -18,33 +21,96 @@ Runner::Runner( size_t max_iterations_) : concurrency(concurrency_) , pool(CurrentMetrics::LocalThread, CurrentMetrics::LocalThreadActive, concurrency) - , hosts_strings(hosts_strings_) , max_time(max_time_) , delay(delay_) , continue_on_error(continue_on_error_) , max_iterations(max_iterations_) , info(std::make_shared()) , queue(concurrency) +{ + + DB::ConfigurationPtr config = nullptr; + + if (!config_path.empty()) { - if (!generator_name.empty() && !config_path.empty()) - throw DB::Exception(DB::ErrorCodes::BAD_ARGUMENTS, "Both generator name and generator config path are defined. Please define only one of them"); + DB::ConfigProcessor config_processor(config_path, true, false); + config = config_processor.loadConfig().configuration; + } - if (generator_name.empty() && config_path.empty()) - throw DB::Exception(DB::ErrorCodes::BAD_ARGUMENTS, "Both generator name and generator config path are empty. Please define one of them"); - - if (!generator_name.empty()) - generator = getGenerator(generator_name); - else - { - DB::ConfigProcessor config_processor(config_path, true, false); - auto loaded_config = config_processor.loadConfig(); - - generator = std::make_unique(*loaded_config.configuration); - } + if (!generator_name.empty()) + { + generator = getGenerator(generator_name); if (!generator) throw DB::Exception(DB::ErrorCodes::BAD_ARGUMENTS, "Failed to create generator"); } + else + { + if (!config) + throw DB::Exception(DB::ErrorCodes::BAD_ARGUMENTS, "No config file or generator name defined"); + + generator = std::make_unique(*config); + } + + if (!hosts_strings_.empty()) + { + for (const auto & host : hosts_strings_) + connection_infos.push_back({.host = host}); + } + else + { + if (!config) + throw DB::Exception(DB::ErrorCodes::BAD_ARGUMENTS, "No config file or hosts defined"); + + parseHostsFromConfig(*config); + } +} + +void Runner::parseHostsFromConfig(const Poco::Util::AbstractConfiguration & config) +{ + ConnectionInfo default_connection_info; + + const auto fill_connection_details = [&](const std::string & key, auto & connection_info) + { + if (config.has(key + ".secure")) + connection_info.secure = config.getBool(key + ".secure"); + + if (config.has(key + ".session_timeout_ms")) + connection_info.session_timeout_ms = config.getInt(key + ".session_timeout_ms"); + + if (config.has(key + ".operation_timeout_ms")) + connection_info.operation_timeout_ms = config.getInt(key + ".operation_timeout_ms"); + + if (config.has(key + ".connection_timeout_ms")) + connection_info.connection_timeout_ms = config.getInt(key + ".connection_timeout_ms"); + }; + + fill_connection_details("connections", default_connection_info); + + Poco::Util::AbstractConfiguration::Keys connections_keys; + config.keys("connections", connections_keys); + + for (const auto & key : connections_keys) + { + std::string connection_key = "connections." + key; + auto connection_info = default_connection_info; + if (key.starts_with("host")) + { + connection_info.host = config.getString(connection_key); + connection_infos.push_back(std::move(connection_info)); + } + else if (key.starts_with("connection") && key != "connection_timeout_ms") + { + connection_info.host = config.getString(connection_key + ".host"); + if (config.has(connection_key + ".sessions")) + connection_info.sessions = config.getUInt64(connection_key + ".sessions"); + + fill_connection_details(connection_key, connection_info); + + connection_infos.push_back(std::move(connection_info)); + } + } +} void Runner::thread(std::vector> zookeepers) { @@ -130,7 +196,7 @@ void Runner::thread(std::vector> zookee { try { - zookeepers = getConnections(); + zookeepers = refreshConnections(); break; } catch (...) @@ -147,6 +213,24 @@ void Runner::thread(std::vector> zookee bool Runner::tryPushRequestInteractively(const Coordination::ZooKeeperRequestPtr & request, DB::InterruptListener & interrupt_listener) { + static std::unordered_map counts; + static size_t i = 0; + + counts[request->getOpNum()]++; + + //if (request->getOpNum() == Coordination::OpNum::Multi) + //{ + // for (const auto & multi_request : dynamic_cast(*request).requests) + // counts[dynamic_cast(*multi_request).getOpNum()]++; + //} + + ++i; + if (i % 10000 == 0) + { + for (const auto & [op_num, count] : counts) + std::cout << fmt::format("{}: {}", op_num, count) << std::endl; + } + bool inserted = false; while (!inserted) @@ -187,17 +271,17 @@ bool Runner::tryPushRequestInteractively(const Coordination::ZooKeeperRequestPtr void Runner::runBenchmark() { - auto aux_connections = getConnections(); + createConnections(); std::cerr << "Preparing to run\n"; - generator->startup(*aux_connections[0]); + generator->startup(*connections[0]); std::cerr << "Prepared\n"; try { - auto connections = getConnections(); for (size_t i = 0; i < concurrency; ++i) { - pool.scheduleOrThrowOnError([this, connections]() mutable { thread(connections); }); + auto thread_connections = connections; + pool.scheduleOrThrowOnError([this, connections = std::move(thread_connections)]() mutable { thread(connections); }); } } catch (...) @@ -230,21 +314,55 @@ void Runner::runBenchmark() } -std::vector> Runner::getConnections() +void Runner::createConnections() { - std::vector> zookeepers; - for (const auto & host_string : hosts_strings) + for (size_t connection_info_idx = 0; connection_info_idx < connection_infos.size(); ++connection_info_idx) { - Coordination::ZooKeeper::Node node{Poco::Net::SocketAddress{host_string}, false}; - std::vector nodes; - nodes.push_back(node); - zkutil::ZooKeeperArgs args; - args.session_timeout_ms = 30000; - args.connection_timeout_ms = 1000; - args.operation_timeout_ms = 10000; - zookeepers.emplace_back(std::make_shared(nodes, args, nullptr)); + const auto & connection_info = connection_infos[connection_info_idx]; + std::cout << fmt::format("Creating {} session(s) for:\n" + "- host: {}\n" + "- secure: {}\n" + "- session timeout: {}ms\n" + "- operation timeout: {}ms\n" + "- connection timeout: {}ms", + connection_info.sessions, + connection_info.host, + connection_info.secure, + connection_info.session_timeout_ms, + connection_info.operation_timeout_ms, + connection_info.connection_timeout_ms) << std::endl; + + for (size_t session = 0; session < connection_info.sessions; ++session) + { + connections.emplace_back(getConnection(connection_info)); + connections_to_info_map[connections.size() - 1] = connection_info_idx; + } } - - - return zookeepers; +} + +std::shared_ptr Runner::getConnection(const ConnectionInfo & connection_info) +{ + Coordination::ZooKeeper::Node node{Poco::Net::SocketAddress{connection_info.host}, connection_info.secure}; + std::vector nodes; + nodes.push_back(node); + zkutil::ZooKeeperArgs args; + args.session_timeout_ms = connection_info.session_timeout_ms; + args.connection_timeout_ms = connection_info.operation_timeout_ms; + args.operation_timeout_ms = connection_info.connection_timeout_ms; + return std::make_shared(nodes, args, nullptr); +} + +std::vector> Runner::refreshConnections() +{ + std::lock_guard lock(connection_mutex); + for (size_t connection_idx = 0; connection_idx < connections.size(); ++connection_idx) + { + auto & connection = connections[connection_idx]; + if (connection->isExpired()) + { + const auto & connection_info = connection_infos[connections_to_info_map[connection_idx]]; + connection = getConnection(connection_info); + } + } + return connections; } diff --git a/utils/keeper-bench/Runner.h b/utils/keeper-bench/Runner.h index f8280ac5f37..e36089d5519 100644 --- a/utils/keeper-bench/Runner.h +++ b/utils/keeper-bench/Runner.h @@ -1,4 +1,5 @@ #pragma once +#include "Common/ZooKeeper/ZooKeeperConstants.h" #include #include "Generator.h" #include @@ -12,6 +13,7 @@ #include #include +#include #include "Stats.h" using Ports = std::vector; @@ -54,11 +56,12 @@ public: private: + void parseHostsFromConfig(const Poco::Util::AbstractConfiguration & config); size_t concurrency = 1; ThreadPool pool; - Strings hosts_strings; + std::unique_ptr generator; double max_time = 0; double delay = 1; @@ -77,5 +80,24 @@ private: using Queue = ConcurrentBoundedQueue; Queue queue; - std::vector> getConnections(); + struct ConnectionInfo + { + std::string host; + + bool secure = false; + int32_t session_timeout_ms = Coordination::DEFAULT_SESSION_TIMEOUT_MS; + int32_t connection_timeout_ms = Coordination::DEFAULT_CONNECTION_TIMEOUT_MS; + int32_t operation_timeout_ms = Coordination::DEFAULT_OPERATION_TIMEOUT_MS; + + size_t sessions = 1; + }; + + std::mutex connection_mutex; + std::vector connection_infos; + std::vector> connections; + std::unordered_map connections_to_info_map; + + void createConnections(); + std::shared_ptr getConnection(const ConnectionInfo & connection_info); + std::vector> refreshConnections(); }; diff --git a/utils/keeper-bench/main.cpp b/utils/keeper-bench/main.cpp index 83303fb4029..bec91dc6ad1 100644 --- a/utils/keeper-bench/main.cpp +++ b/utils/keeper-bench/main.cpp @@ -19,14 +19,14 @@ int main(int argc, char *argv[]) boost::program_options::options_description desc = createOptionsDescription("Allowed options", getTerminalWidth()); desc.add_options() - ("help", "produce help message") - ("generator", value()->default_value(""), "query to execute") - ("config", value()->default_value(""), "xml file containing generator configuration") - ("concurrency,c", value()->default_value(1), "number of parallel queries") - ("delay,d", value()->default_value(1), "delay between intermediate reports in seconds (set 0 to disable reports)") - ("iterations,i", value()->default_value(0), "amount of queries to be executed") - ("timelimit,t", value()->default_value(0.), "stop launch of queries after specified time limit") - ("hosts,h", value()->multitoken(), "") + ("help", "produce help message") + ("generator", value()->default_value(""), "query to execute") + ("config", value()->default_value(""), "xml file containing generator configuration") + ("concurrency,c", value()->default_value(1), "number of parallel queries") + ("delay,d", value()->default_value(1), "delay between intermediate reports in seconds (set 0 to disable reports)") + ("iterations,i", value()->default_value(0), "amount of queries to be executed") + ("timelimit,t", value()->default_value(0.), "stop launch of queries after specified time limit") + ("hosts,h", value()->multitoken()->default_value(Strings{}, ""), "") ("continue_on_errors", "continue testing even if a query fails") ("reconnect", "establish new connection for every query") ; From 9df7a673062d63053903a5a3d3268a8756813248 Mon Sep 17 00:00:00 2001 From: Antonio Andelic Date: Fri, 7 Apr 2023 14:14:39 +0000 Subject: [PATCH 06/67] Add more configurations --- utils/keeper-bench/Generator.cpp | 15 +++-- utils/keeper-bench/Runner.cpp | 102 +++++++++++++++++++++---------- utils/keeper-bench/Runner.h | 20 +++--- utils/keeper-bench/main.cpp | 58 ++++++++++++------ 4 files changed, 129 insertions(+), 66 deletions(-) diff --git a/utils/keeper-bench/Generator.cpp b/utils/keeper-bench/Generator.cpp index b050fe6c639..f9f684e49ef 100644 --- a/utils/keeper-bench/Generator.cpp +++ b/utils/keeper-bench/Generator.cpp @@ -591,7 +591,7 @@ std::string ListRequestGenerator::descriptionImpl() Coordination::ZooKeeperRequestPtr ListRequestGenerator::generateImpl(const Coordination::ACLs & /*acls*/) { - auto request = std::make_shared(); + auto request = std::make_shared(); request->path = path.getPath(); return request; } @@ -655,6 +655,7 @@ Generator::Generator(const Poco::Util::AbstractConfiguration & config) static const std::string generator_key = "generator"; + std::cout << "---- Parsing setup ---- " << std::endl; static const std::string setup_key = generator_key + ".setup"; Poco::Util::AbstractConfiguration::Keys keys; config.keys(setup_key, keys); @@ -664,16 +665,18 @@ Generator::Generator(const Poco::Util::AbstractConfiguration & config) { const auto & node = root_nodes.emplace_back(parseNode(setup_key + "." + key, config)); - std::cout << "---- Will create tree ----" << std::endl; + std::cout << "Tree to create:" << std::endl; node->dumpTree(); + std::cout << std::endl; } } + std::cout << "---- Done parsing data setup ----\n" << std::endl; - std::cout << "\n---- Collecting request generators ----" << std::endl; + std::cout << "---- Collecting request generators ----" << std::endl; static const std::string requests_key = generator_key + ".requests"; request_getter = RequestGetter::fromConfig(requests_key, config); std::cout << request_getter.description() << std::endl; - std::cout << "---- Done collecting request generators ----" << std::endl; + std::cout << "---- Done collecting request generators ----\n" << std::endl; } std::shared_ptr Generator::parseNode(const std::string & key, const Poco::Util::AbstractConfiguration & config) @@ -741,7 +744,7 @@ void Generator::Node::createNode(Coordination::ZooKeeper & zookeeper, const std: void Generator::startup(Coordination::ZooKeeper & zookeeper) { - std::cout << "\n---- Creating test data ----" << std::endl; + std::cout << "---- Creating test data ----" << std::endl; for (const auto & node : root_nodes) { auto node_name = node->name.getString(); @@ -753,7 +756,7 @@ void Generator::startup(Coordination::ZooKeeper & zookeeper) node->createNode(zookeeper, "/", default_acls); } - std::cout << "---- Created test data ----" << std::endl; + std::cout << "---- Created test data ----\n" << std::endl; std::cout << "---- Initializing generators ----" << std::endl; diff --git a/utils/keeper-bench/Runner.cpp b/utils/keeper-bench/Runner.cpp index 3076bf42558..72d80e478db 100644 --- a/utils/keeper-bench/Runner.cpp +++ b/utils/keeper-bench/Runner.cpp @@ -3,6 +3,7 @@ #include "Common/ZooKeeper/ZooKeeperCommon.h" #include "Common/ZooKeeper/ZooKeeperConstants.h" +#include #include namespace DB::ErrorCodes @@ -11,22 +12,15 @@ namespace DB::ErrorCodes } Runner::Runner( - size_t concurrency_, + std::optional concurrency_, const std::string & generator_name, const std::string & config_path, const Strings & hosts_strings_, - double max_time_, - double delay_, - bool continue_on_error_, - size_t max_iterations_) - : concurrency(concurrency_) - , pool(CurrentMetrics::LocalThread, CurrentMetrics::LocalThreadActive, concurrency) - , max_time(max_time_) - , delay(delay_) - , continue_on_error(continue_on_error_) - , max_iterations(max_iterations_) - , info(std::make_shared()) - , queue(concurrency) + std::optional max_time_, + std::optional delay_, + std::optional continue_on_error_, + std::optional max_iterations_) + : info(std::make_shared()) { DB::ConfigurationPtr config = nullptr; @@ -39,7 +33,7 @@ Runner::Runner( if (!generator_name.empty()) { - generator = getGenerator(generator_name); + //generator = getGenerator(generator_name); if (!generator) throw DB::Exception(DB::ErrorCodes::BAD_ARGUMENTS, "Failed to create generator"); @@ -49,7 +43,7 @@ Runner::Runner( if (!config) throw DB::Exception(DB::ErrorCodes::BAD_ARGUMENTS, "No config file or generator name defined"); - generator = std::make_unique(*config); + generator.emplace(*config); } if (!hosts_strings_.empty()) @@ -64,6 +58,41 @@ Runner::Runner( parseHostsFromConfig(*config); } + + std::cout << "---- Run options ---- " << std::endl; + if (concurrency_) + concurrency = *concurrency_; + else + concurrency = config->getUInt64("concurrency", 1); + std::cout << "Concurrency: " << concurrency << std::endl; + + if (max_iterations_) + max_iterations = *max_iterations_; + else + max_iterations = config->getUInt64("iterations", 0); + std::cout << "Iterations: " << max_iterations << std::endl; + + if (delay_) + delay = *delay_; + else + delay = config->getDouble("report_delay", 1); + std::cout << "Report delay: " << delay << std::endl; + + if (max_time_) + max_time = *max_time_; + else + max_time = config->getDouble("timelimit", 1.0); + std::cout << "Time limit: " << max_time << std::endl; + + if (continue_on_error_) + continue_on_error = *continue_on_error_; + else + continue_on_error = config->getBool("continue_on_error", 1.0); + std::cout << "Continue on error: " << continue_on_error << std::endl; + std::cout << "---- Run options ----\n" << std::endl; + + pool.emplace(CurrentMetrics::LocalThread, CurrentMetrics::LocalThreadActive, concurrency); + queue.emplace(concurrency); } void Runner::parseHostsFromConfig(const Poco::Util::AbstractConfiguration & config) @@ -134,7 +163,7 @@ void Runner::thread(std::vector> zookee while (!extracted) { - extracted = queue.tryPop(request, 100); + extracted = queue->tryPop(request, 100); if (shutdown || (max_iterations && requests_executed >= max_iterations)) @@ -211,12 +240,12 @@ void Runner::thread(std::vector> zookee } } -bool Runner::tryPushRequestInteractively(const Coordination::ZooKeeperRequestPtr & request, DB::InterruptListener & interrupt_listener) +bool Runner::tryPushRequestInteractively(Coordination::ZooKeeperRequestPtr && request, DB::InterruptListener & interrupt_listener) { - static std::unordered_map counts; - static size_t i = 0; - - counts[request->getOpNum()]++; + //static std::unordered_map counts; + //static size_t i = 0; + // + //counts[request->getOpNum()]++; //if (request->getOpNum() == Coordination::OpNum::Multi) //{ @@ -224,18 +253,18 @@ bool Runner::tryPushRequestInteractively(const Coordination::ZooKeeperRequestPtr // counts[dynamic_cast(*multi_request).getOpNum()]++; //} - ++i; - if (i % 10000 == 0) - { - for (const auto & [op_num, count] : counts) - std::cout << fmt::format("{}: {}", op_num, count) << std::endl; - } + //++i; + //if (i % 10000 == 0) + //{ + // for (const auto & [op_num, count] : counts) + // std::cout << fmt::format("{}: {}", op_num, count) << std::endl; + //} bool inserted = false; while (!inserted) { - inserted = queue.tryPush(request, 100); + inserted = queue->tryPush(std::move(request), 100); if (shutdown) { @@ -281,13 +310,13 @@ void Runner::runBenchmark() for (size_t i = 0; i < concurrency; ++i) { auto thread_connections = connections; - pool.scheduleOrThrowOnError([this, connections = std::move(thread_connections)]() mutable { thread(connections); }); + pool->scheduleOrThrowOnError([this, connections = std::move(thread_connections)]() mutable { thread(connections); }); } } catch (...) { shutdown = true; - pool.wait(); + pool->wait(); throw; } @@ -304,7 +333,7 @@ void Runner::runBenchmark() } } - pool.wait(); + pool->wait(); total_watch.stop(); printNumberOfRequestsExecuted(requests_executed); @@ -316,6 +345,8 @@ void Runner::runBenchmark() void Runner::createConnections() { + DB::EventNotifier::init(); + std::cout << "---- Creating connections ---- " << std::endl; for (size_t connection_info_idx = 0; connection_info_idx < connection_infos.size(); ++connection_info_idx) { const auto & connection_info = connection_infos[connection_info_idx]; @@ -338,6 +369,7 @@ void Runner::createConnections() connections_to_info_map[connections.size() - 1] = connection_info_idx; } } + std::cout << "---- Done creating connections ----\n" << std::endl; } std::shared_ptr Runner::getConnection(const ConnectionInfo & connection_info) @@ -366,3 +398,11 @@ std::vector> Runner::refreshConnections } return connections; } + +Runner::~Runner() +{ + queue->clearAndFinish(); + shutdown = true; + pool->wait(); +} + diff --git a/utils/keeper-bench/Runner.h b/utils/keeper-bench/Runner.h index e36089d5519..ebd2d702019 100644 --- a/utils/keeper-bench/Runner.h +++ b/utils/keeper-bench/Runner.h @@ -34,14 +34,14 @@ class Runner { public: Runner( - size_t concurrency_, + std::optional concurrency_, const std::string & generator_name, const std::string & config_path, const Strings & hosts_strings_, - double max_time_, - double delay_, - bool continue_on_error_, - size_t max_iterations_); + std::optional max_time_, + std::optional delay_, + std::optional continue_on_error_, + std::optional max_iterations_); void thread(std::vector> zookeepers); @@ -50,19 +50,19 @@ public: std::cerr << "Requests executed: " << num << ".\n"; } - bool tryPushRequestInteractively(const Coordination::ZooKeeperRequestPtr & request, DB::InterruptListener & interrupt_listener); + bool tryPushRequestInteractively(Coordination::ZooKeeperRequestPtr && request, DB::InterruptListener & interrupt_listener); void runBenchmark(); - + ~Runner(); private: void parseHostsFromConfig(const Poco::Util::AbstractConfiguration & config); size_t concurrency = 1; - ThreadPool pool; + std::optional pool; - std::unique_ptr generator; + std::optional generator; double max_time = 0; double delay = 1; bool continue_on_error = false; @@ -78,7 +78,7 @@ private: std::mutex mutex; using Queue = ConcurrentBoundedQueue; - Queue queue; + std::optional queue; struct ConnectionInfo { diff --git a/utils/keeper-bench/main.cpp b/utils/keeper-bench/main.cpp index bec91dc6ad1..cb25aa7d0a5 100644 --- a/utils/keeper-bench/main.cpp +++ b/utils/keeper-bench/main.cpp @@ -3,10 +3,24 @@ #include "Runner.h" #include "Stats.h" #include "Generator.h" +#include "Common/Exception.h" #include #include +#include -using namespace std; +namespace +{ + +template +std::optional valueToOptional(const boost::program_options::variable_value & value) +{ + if (value.empty()) + return std::nullopt; + + return value.as(); +} + +} int main(int argc, char *argv[]) { @@ -19,16 +33,15 @@ int main(int argc, char *argv[]) boost::program_options::options_description desc = createOptionsDescription("Allowed options", getTerminalWidth()); desc.add_options() - ("help", "produce help message") - ("generator", value()->default_value(""), "query to execute") - ("config", value()->default_value(""), "xml file containing generator configuration") - ("concurrency,c", value()->default_value(1), "number of parallel queries") - ("delay,d", value()->default_value(1), "delay between intermediate reports in seconds (set 0 to disable reports)") - ("iterations,i", value()->default_value(0), "amount of queries to be executed") - ("timelimit,t", value()->default_value(0.), "stop launch of queries after specified time limit") - ("hosts,h", value()->multitoken()->default_value(Strings{}, ""), "") + ("help", "produce help message") + ("generator", value()->default_value(""), "query to execute") + ("config", value()->default_value(""), "yaml/xml file containing configuration") + ("concurrency,c", value(), "number of parallel queries") + ("report-delay,d", value(), "delay between intermediate reports in seconds (set 0 to disable reports)") + ("iterations,i", value(), "amount of queries to be executed") + ("time-limit,t", value(), "stop launch of queries after specified time limit") + ("hosts,h", value()->multitoken()->default_value(Strings{}, ""), "") ("continue_on_errors", "continue testing even if a query fails") - ("reconnect", "establish new connection for every query") ; boost::program_options::variables_map options; @@ -42,16 +55,23 @@ int main(int argc, char *argv[]) return 1; } - Runner runner(options["concurrency"].as(), - options["generator"].as(), - options["config"].as(), - options["hosts"].as(), - options["timelimit"].as(), - options["delay"].as(), - options.count("continue_on_errors"), - options["iterations"].as()); + Runner runner(valueToOptional(options["concurrency"]), + options["generator"].as(), + options["config"].as(), + options["hosts"].as(), + valueToOptional(options["time-limit"]), + valueToOptional(options["report-delay"]), + options.count("continue_on_errors") ? std::optional(true) : std::nullopt, + valueToOptional(options["iterations"])); - runner.runBenchmark(); + try + { + runner.runBenchmark(); + } + catch (const DB::Exception & e) + { + std::cout << "Got exception while trying to run benchmark: " << e.message() << std::endl; + } return 0; } From 46533c1ea7928a62dfa7455c194b4ce7d794e21e Mon Sep 17 00:00:00 2001 From: Antonio Andelic Date: Fri, 7 Apr 2023 14:51:25 +0000 Subject: [PATCH 07/67] CreateRequest more stable --- utils/keeper-bench/Generator.cpp | 23 +++++++++++++++++++---- utils/keeper-bench/Generator.h | 4 +++- utils/keeper-bench/Runner.cpp | 31 +++++++++++++++++++++++++++++-- 3 files changed, 51 insertions(+), 7 deletions(-) diff --git a/utils/keeper-bench/Generator.cpp b/utils/keeper-bench/Generator.cpp index f9f684e49ef..280593ed511 100644 --- a/utils/keeper-bench/Generator.cpp +++ b/utils/keeper-bench/Generator.cpp @@ -468,23 +468,26 @@ void CreateRequestGenerator::getFromConfigImpl(const std::string & key, const Po if (config.has(key + ".data")) data = StringGetter::fromConfig(key + ".data", config); - remove_factor = config.getDouble(key + ".remove_factor", 0.0); + if (config.has(key + ".remove_factor")) + remove_factor = config.getDouble(key + ".remove_factor"); } std::string CreateRequestGenerator::descriptionImpl() { std::string data_string = data.has_value() ? fmt::format("data for created nodes: {}", data->description()) : "no data for created nodes"; + std::string remove_factor_string + = remove_factor.has_value() ? fmt::format("- remove factor: {}", *remove_factor) : "- without removes"; return fmt::format( "Create Request Generator\n" "- parent path(s) for created nodes: {}\n" "- name for created nodes: {}\n" "- {}\n" - "- remove factor: {}", + "{}", parent_path.description(), name.description(), data_string, - remove_factor); + remove_factor_string); } void CreateRequestGenerator::startupImpl(Coordination::ZooKeeper & zookeeper) @@ -494,7 +497,7 @@ void CreateRequestGenerator::startupImpl(Coordination::ZooKeeper & zookeeper) Coordination::ZooKeeperRequestPtr CreateRequestGenerator::generateImpl(const Coordination::ACLs & acls) { - if (!paths_created.empty() && remove_picker(rng) < remove_factor) + if (remove_factor.has_value() && !paths_created.empty() && remove_picker(rng) < *remove_factor) { auto request = std::make_shared(); auto it = paths_created.begin(); @@ -767,3 +770,15 @@ Coordination::ZooKeeperRequestPtr Generator::generate() { return request_getter.getRequestGenerator()->generate(default_acls); } + +void Generator::cleanup(Coordination::ZooKeeper & zookeeper) +{ + std::cout << "---- Cleaning up test data ----" << std::endl; + for (const auto & node : root_nodes) + { + auto node_name = node->name.getString(); + std::string root_path = std::filesystem::path("/") / node_name; + std::cout << "Cleaning up " << root_path << std::endl; + removeRecursive(zookeeper, root_path); + } +} diff --git a/utils/keeper-bench/Generator.h b/utils/keeper-bench/Generator.h index d2925e8650a..5c15d2bce9a 100644 --- a/utils/keeper-bench/Generator.h +++ b/utils/keeper-bench/Generator.h @@ -204,7 +204,7 @@ private: StringGetter name; std::optional data; - double remove_factor; + std::optional remove_factor; pcg64 rng; std::uniform_real_distribution remove_picker; @@ -278,6 +278,8 @@ public: void startup(Coordination::ZooKeeper & zookeeper); Coordination::ZooKeeperRequestPtr generate(); + void cleanup(Coordination::ZooKeeper & zookeeper); + private: struct Node { diff --git a/utils/keeper-bench/Runner.cpp b/utils/keeper-bench/Runner.cpp index 72d80e478db..2e3cd911698 100644 --- a/utils/keeper-bench/Runner.cpp +++ b/utils/keeper-bench/Runner.cpp @@ -177,9 +177,35 @@ void Runner::thread(std::vector> zookee auto promise = std::make_shared>(); auto future = promise->get_future(); - Coordination::ResponseCallback callback = [promise](const Coordination::Response & response) + Coordination::ResponseCallback callback = [&request, promise](const Coordination::Response & response) { - if (response.error != Coordination::Error::ZOK) + bool set_exception = true; + + if (response.error == Coordination::Error::ZOK) + { + set_exception = false; + } + else if (response.error == Coordination::Error::ZNONODE) + { + /// remove can fail with ZNONODE because of different order of execution + /// of generated create and remove requests + /// this is okay for concurrent runs + if (dynamic_cast(&response)) + set_exception = false; + else if (const auto * multi_response = dynamic_cast(&response)) + { + const auto & responses = multi_response->responses; + size_t i = 0; + while (responses[i]->error != Coordination::Error::ZNONODE) + ++i; + + const auto & multi_request = dynamic_cast(*request); + if (dynamic_cast(&*multi_request.requests[i])) + set_exception = false; + } + } + + if (set_exception) promise->set_exception(std::make_exception_ptr(zkutil::KeeperException(response.error))); else promise->set_value(response.bytesSize()); @@ -404,5 +430,6 @@ Runner::~Runner() queue->clearAndFinish(); shutdown = true; pool->wait(); + generator->cleanup(*connections[0]); } From e9d43a8f6a2746ef101287fc92a6c13dcf1b3b65 Mon Sep 17 00:00:00 2001 From: Antonio Andelic Date: Fri, 7 Apr 2023 15:20:36 +0000 Subject: [PATCH 08/67] Remove generator name config --- utils/keeper-bench/Generator.cpp | 64 ++--------------- utils/keeper-bench/Generator.h | 116 ++----------------------------- utils/keeper-bench/Runner.cpp | 39 ++++------- utils/keeper-bench/Runner.h | 1 - utils/keeper-bench/main.cpp | 2 - 5 files changed, 22 insertions(+), 200 deletions(-) diff --git a/utils/keeper-bench/Generator.cpp b/utils/keeper-bench/Generator.cpp index 280593ed511..12e628ed1e5 100644 --- a/utils/keeper-bench/Generator.cpp +++ b/utils/keeper-bench/Generator.cpp @@ -87,64 +87,6 @@ void removeRecursive(Coordination::ZooKeeper & zookeeper, const std::string & pa remove_future.get(); } -std::unique_ptr getGenerator(const std::string & name) -{ - //if (name == "create_no_data") - //{ - // return std::make_unique(); - //} - //else if (name == "create_small_data") - //{ - // return std::make_unique("/create_generator", 5, 32); - //} - //else if (name == "create_medium_data") - //{ - // return std::make_unique("/create_generator", 5, 1024); - //} - //else if (name == "create_big_data") - //{ - // return std::make_unique("/create_generator", 5, 512 * 1024); - //} - //else if (name == "get_no_data") - //{ - // return std::make_unique("/get_generator", 10, 0); - //} - //else if (name == "get_small_data") - //{ - // return std::make_unique("/get_generator", 10, 32); - //} - //else if (name == "get_medium_data") - //{ - // return std::make_unique("/get_generator", 10, 1024); - //} - //else if (name == "get_big_data") - //{ - // return std::make_unique("/get_generator", 10, 512 * 1024); - //} - //else if (name == "list_no_nodes") - //{ - // return std::make_unique("/list_generator", 0, 1); - //} - //else if (name == "list_few_nodes") - //{ - // return std::make_unique("/list_generator", 10, 5); - //} - //else if (name == "list_medium_nodes") - //{ - // return std::make_unique("/list_generator", 1000, 5); - //} - //else if (name == "list_a_lot_nodes") - //{ - // return std::make_unique("/list_generator", 100000, 5); - //} - //else if (name == "set_small_data") - //{ - // return std::make_unique("/set_generator", 5); - //} - - throw DB::Exception(DB::ErrorCodes::LOGICAL_ERROR, "Unknown generator {}", name); -} - NumberGetter NumberGetter::fromConfig(const std::string & key, const Poco::Util::AbstractConfiguration & config, std::optional default_value) { @@ -321,6 +263,10 @@ std::string PathGetter::description() const return description; } +RequestGetter::RequestGetter(std::vector request_generators_) + : request_generators(std::move(request_generators_)) +{} + RequestGetter RequestGetter::fromConfig(const std::string & key, const Poco::Util::AbstractConfiguration & config, bool for_multi) { RequestGetter request_getter; @@ -658,7 +604,7 @@ Generator::Generator(const Poco::Util::AbstractConfiguration & config) static const std::string generator_key = "generator"; - std::cout << "---- Parsing setup ---- " << std::endl; + std::cout << "---- Parsing setup ---- " << std::endl; static const std::string setup_key = generator_key + ".setup"; Poco::Util::AbstractConfiguration::Keys keys; config.keys(setup_key, keys); diff --git a/utils/keeper-bench/Generator.h b/utils/keeper-bench/Generator.h index 5c15d2bce9a..60c4fcb3cc4 100644 --- a/utils/keeper-bench/Generator.h +++ b/utils/keeper-bench/Generator.h @@ -9,115 +9,6 @@ #include #include - -std::string generateRandomPath(const std::string & prefix, size_t length = 5); - -std::string generateRandomData(size_t size); - -// -//class CreateRequestGenerator final : public IGenerator -//{ -//public: -// explicit CreateRequestGenerator( -// std::string path_prefix_ = "/create_generator", -// std::optional path_length_ = std::nullopt, -// std::optional data_size_ = std::nullopt) -// : path_prefix(path_prefix_) -// , path_length(path_length_) -// , data_size(data_size_) -// {} -// -// void startup(Coordination::ZooKeeper & zookeeper) override; -// Coordination::ZooKeeperRequestPtr generate() override; -// -//private: -// std::string path_prefix; -// std::optional path_length; -// std::optional data_size; -// std::unordered_set paths_created; -//}; -// -// -//class GetRequestGenerator final : public IGenerator -//{ -//public: -// explicit GetRequestGenerator( -// std::string path_prefix_ = "/get_generator", -// std::optional num_nodes_ = std::nullopt, -// std::optional nodes_data_size_ = std::nullopt) -// : path_prefix(path_prefix_) -// , num_nodes(num_nodes_) -// , nodes_data_size(nodes_data_size_) -// , rng(randomSeed()) -// , distribution(0, num_nodes ? *num_nodes - 1 : 0) -// {} -// -// void startup(Coordination::ZooKeeper & zookeeper) override; -// Coordination::ZooKeeperRequestPtr generate() override; -// -//private: -// std::string path_prefix; -// std::optional num_nodes; -// std::optional nodes_data_size; -// std::vector paths_to_get; -// -// pcg64 rng; -// std::uniform_int_distribution distribution; -//}; -// -//class ListRequestGenerator final : public IGenerator -//{ -//public: -// explicit ListRequestGenerator( -// std::string path_prefix_ = "/list_generator", -// std::optional num_nodes_ = std::nullopt, -// std::optional paths_length_ = std::nullopt) -// : path_prefix(path_prefix_) -// , num_nodes(num_nodes_) -// , paths_length(paths_length_) -// {} -// -// void startup(Coordination::ZooKeeper & zookeeper) override; -// Coordination::ZooKeeperRequestPtr generate() override; -// -//private: -// std::string path_prefix; -// std::optional num_nodes; -// std::optional paths_length; -//}; -// -//class SetRequestGenerator final : public IGenerator -//{ -//public: -// explicit SetRequestGenerator( -// std::string path_prefix_ = "/set_generator", -// uint64_t data_size_ = 5) -// : path_prefix(path_prefix_) -// , data_size(data_size_) -// {} -// -// void startup(Coordination::ZooKeeper & zookeeper) override; -// Coordination::ZooKeeperRequestPtr generate() override; -// -//private: -// std::string path_prefix; -// uint64_t data_size; -//}; -// -//class MixedRequestGenerator final : public IGenerator -//{ -//public: -// explicit MixedRequestGenerator(std::vector> generators_) -// : generators(std::move(generators_)) -// {} -// -// void startup(Coordination::ZooKeeper & zookeeper) override; -// Coordination::ZooKeeperRequestPtr generate() override; -// -//private: -// std::vector> generators; -//}; - struct NumberGetter { static NumberGetter fromConfig(const std::string & key, const Poco::Util::AbstractConfiguration & config, std::optional default_value = std::nullopt); @@ -247,6 +138,10 @@ private: struct RequestGetter { + explicit RequestGetter(std::vector request_generators_); + + RequestGetter() = default; + static RequestGetter fromConfig(const std::string & key, const Poco::Util::AbstractConfiguration & config, bool for_multi = false); RequestGeneratorPtr getRequestGenerator() const; @@ -279,7 +174,6 @@ public: void startup(Coordination::ZooKeeper & zookeeper); Coordination::ZooKeeperRequestPtr generate(); void cleanup(Coordination::ZooKeeper & zookeeper); - private: struct Node { @@ -299,4 +193,4 @@ private: Coordination::ACLs default_acls; }; -std::unique_ptr getGenerator(const std::string & name); +std::optional getGenerator(const std::string & name); diff --git a/utils/keeper-bench/Runner.cpp b/utils/keeper-bench/Runner.cpp index 2e3cd911698..2b645ddc6c3 100644 --- a/utils/keeper-bench/Runner.cpp +++ b/utils/keeper-bench/Runner.cpp @@ -13,7 +13,6 @@ namespace DB::ErrorCodes Runner::Runner( std::optional concurrency_, - const std::string & generator_name, const std::string & config_path, const Strings & hosts_strings_, std::optional max_time_, @@ -23,28 +22,10 @@ Runner::Runner( : info(std::make_shared()) { - DB::ConfigurationPtr config = nullptr; + DB::ConfigProcessor config_processor(config_path, true, false); + auto config = config_processor.loadConfig().configuration; - if (!config_path.empty()) - { - DB::ConfigProcessor config_processor(config_path, true, false); - config = config_processor.loadConfig().configuration; - } - - if (!generator_name.empty()) - { - //generator = getGenerator(generator_name); - - if (!generator) - throw DB::Exception(DB::ErrorCodes::BAD_ARGUMENTS, "Failed to create generator"); - } - else - { - if (!config) - throw DB::Exception(DB::ErrorCodes::BAD_ARGUMENTS, "No config file or generator name defined"); - - generator.emplace(*config); - } + generator.emplace(*config); if (!hosts_strings_.empty()) { @@ -60,34 +41,38 @@ Runner::Runner( } std::cout << "---- Run options ---- " << std::endl; + static constexpr uint64_t DEFAULT_CONCURRENCY = 1; if (concurrency_) concurrency = *concurrency_; else - concurrency = config->getUInt64("concurrency", 1); + concurrency = config->getUInt64("concurrency", DEFAULT_CONCURRENCY); std::cout << "Concurrency: " << concurrency << std::endl; + static constexpr uint64_t DEFAULT_ITERATIONS = 0; if (max_iterations_) max_iterations = *max_iterations_; else - max_iterations = config->getUInt64("iterations", 0); + max_iterations = config->getUInt64("iterations", DEFAULT_ITERATIONS); std::cout << "Iterations: " << max_iterations << std::endl; + static constexpr double DEFAULT_DELAY = 1.0; if (delay_) delay = *delay_; else - delay = config->getDouble("report_delay", 1); + delay = config->getDouble("report_delay", DEFAULT_DELAY); std::cout << "Report delay: " << delay << std::endl; + static constexpr double DEFAULT_TIME_LIMIT = 1.0; if (max_time_) max_time = *max_time_; else - max_time = config->getDouble("timelimit", 1.0); + max_time = config->getDouble("timelimit", DEFAULT_TIME_LIMIT); std::cout << "Time limit: " << max_time << std::endl; if (continue_on_error_) continue_on_error = *continue_on_error_; else - continue_on_error = config->getBool("continue_on_error", 1.0); + continue_on_error = config->getBool("continue_on_error", false); std::cout << "Continue on error: " << continue_on_error << std::endl; std::cout << "---- Run options ----\n" << std::endl; diff --git a/utils/keeper-bench/Runner.h b/utils/keeper-bench/Runner.h index ebd2d702019..d85dc9e8658 100644 --- a/utils/keeper-bench/Runner.h +++ b/utils/keeper-bench/Runner.h @@ -35,7 +35,6 @@ class Runner public: Runner( std::optional concurrency_, - const std::string & generator_name, const std::string & config_path, const Strings & hosts_strings_, std::optional max_time_, diff --git a/utils/keeper-bench/main.cpp b/utils/keeper-bench/main.cpp index cb25aa7d0a5..0753d66850f 100644 --- a/utils/keeper-bench/main.cpp +++ b/utils/keeper-bench/main.cpp @@ -34,7 +34,6 @@ int main(int argc, char *argv[]) boost::program_options::options_description desc = createOptionsDescription("Allowed options", getTerminalWidth()); desc.add_options() ("help", "produce help message") - ("generator", value()->default_value(""), "query to execute") ("config", value()->default_value(""), "yaml/xml file containing configuration") ("concurrency,c", value(), "number of parallel queries") ("report-delay,d", value(), "delay between intermediate reports in seconds (set 0 to disable reports)") @@ -56,7 +55,6 @@ int main(int argc, char *argv[]) } Runner runner(valueToOptional(options["concurrency"]), - options["generator"].as(), options["config"].as(), options["hosts"].as(), valueToOptional(options["time-limit"]), From ca1e6ac5cac47a15ca7e0b5c38f543cd358d44d5 Mon Sep 17 00:00:00 2001 From: Antonio Andelic Date: Fri, 7 Apr 2023 15:22:03 +0000 Subject: [PATCH 09/67] Add example yaml --- utils/keeper-bench/example.yaml | 111 ++++++++++++++++++++++++++++++++ 1 file changed, 111 insertions(+) create mode 100644 utils/keeper-bench/example.yaml diff --git a/utils/keeper-bench/example.yaml b/utils/keeper-bench/example.yaml new file mode 100644 index 00000000000..2febb881634 --- /dev/null +++ b/utils/keeper-bench/example.yaml @@ -0,0 +1,111 @@ +concurrency: 20 +iterations: 10000 +delay: 4 +timelimit: 300 +continue_on_errors: true + +connections: + operation_timeout_ms: 3000 + connection_timeout_ms: 40000 + + connection: + secure: false + operation_timeout_ms: 2000 + session_timeout_ms: 2000 + connection_timeout_ms: 50000 + host: "localhost:9181" + sessions: 1 + + host: "localhost:9181" + +generator: + setup: + node: + name: "test3" + node: + name: "test_create" + node: + name: "test4" + node: + name: "test" + data: "somedata" + node: + repeat: 4 + name: + random_string: + size: 15 + data: + random_string: + size: + min_value: 10 + max_value: 20 + node: + repeat: 2 + node: + repeat: 2 + name: + random_string: + size: 12 + name: + random_string: + size: 15 + data: + random_string: + size: + min_value: 10 + max_value: 20 + node: + name: "test2" + data: "somedata" + requests: + create: + path: "/test_create" + name_length: 10 + remove_factor: 0.5 + multi: + size: 20 + create: + path: "/test" + data: + random_string: + size: + min_value: 10 + max_value: 20 + remove_factor: 0.8 + set: + weight: 2 + path: + - "/test3" + - "/test4" + path: + children_of: "/test" + data: + random_string: + size: 10 + get: + path: + - "/test3" + - "/test4" + path: + children_of: "/test" + + multi: + weight: 10 + get: + path: + - "/test3" + - "/test4" + path: + children_of: "/test" + list: + path: + - "/test3" + path: + children_of: "/test" + + list: + path: + - "/test3" + - "/test4" + path: + children_of: "/test" From 6bc1ab7ab186a1ffdc35786b14f298c2aa97cb05 Mon Sep 17 00:00:00 2001 From: Antonio Andelic Date: Fri, 14 Apr 2023 13:32:08 +0000 Subject: [PATCH 10/67] Add JSON output --- utils/check-style/check-style | 2 +- utils/keeper-bench/CMakeLists.txt | 2 +- utils/keeper-bench/Generator.cpp | 72 +++++++++--- utils/keeper-bench/Generator.h | 3 + utils/keeper-bench/Runner.cpp | 51 +++++---- utils/keeper-bench/Runner.h | 11 -- utils/keeper-bench/Stats.cpp | 175 ++++++++++++++++++++++++------ utils/keeper-bench/Stats.h | 58 ++++------ 8 files changed, 255 insertions(+), 119 deletions(-) diff --git a/utils/check-style/check-style b/utils/check-style/check-style index a6cc20bb7c8..988c6acd8a7 100755 --- a/utils/check-style/check-style +++ b/utils/check-style/check-style @@ -13,7 +13,7 @@ # and then to run formatter only for the specified files. ROOT_PATH=$(git rev-parse --show-toplevel) -EXCLUDE_DIRS='build/|integration/|widechar_width/|glibc-compatibility/|poco/|memcpy/|consistent-hashing|benchmark|tests/' +EXCLUDE_DIRS='build/|integration/|widechar_width/|glibc-compatibility/|poco/|memcpy/|consistent-hashing|benchmark|tests/|utils/keeper-bench/example.yaml' # From [1]: # But since array_to_string_internal() in array.c still loops over array diff --git a/utils/keeper-bench/CMakeLists.txt b/utils/keeper-bench/CMakeLists.txt index 97d30117d69..5883b03bbe9 100644 --- a/utils/keeper-bench/CMakeLists.txt +++ b/utils/keeper-bench/CMakeLists.txt @@ -1,2 +1,2 @@ clickhouse_add_executable(keeper-bench Generator.cpp Runner.cpp Stats.cpp main.cpp) -target_link_libraries(keeper-bench PRIVATE clickhouse_common_config_no_zookeeper_log) +target_link_libraries(keeper-bench PRIVATE clickhouse_common_config_no_zookeeper_log ch_contrib::rapidjson) diff --git a/utils/keeper-bench/Generator.cpp b/utils/keeper-bench/Generator.cpp index 12e628ed1e5..2212f7158ae 100644 --- a/utils/keeper-bench/Generator.cpp +++ b/utils/keeper-bench/Generator.cpp @@ -12,6 +12,7 @@ using namespace zkutil; namespace DB::ErrorCodes { extern const int LOGICAL_ERROR; + extern const int BAD_ARGUMENTS; } namespace @@ -308,7 +309,7 @@ RequestGetter RequestGetter::fromConfig(const std::string & key, const Poco::Uti auto weight = request_generator->getWeight(); use_weights |= weight != 1; weight_sum += weight; - + generators.push_back(std::move(request_generator)); } @@ -575,7 +576,7 @@ Coordination::ZooKeeperRequestPtr MultiRequestGenerator::generateImpl(const Coor if (size) { - auto request_count = size->getNumber(); + auto request_count = size->getNumber(); for (size_t i = 0; i < request_count; ++i) ops.push_back(request_getter.getRequestGenerator()->generate(acls)); @@ -604,7 +605,7 @@ Generator::Generator(const Poco::Util::AbstractConfiguration & config) static const std::string generator_key = "generator"; - std::cout << "---- Parsing setup ---- " << std::endl; + std::cerr << "---- Parsing setup ---- " << std::endl; static const std::string setup_key = generator_key + ".setup"; Poco::Util::AbstractConfiguration::Keys keys; config.keys(setup_key, keys); @@ -612,20 +613,34 @@ Generator::Generator(const Poco::Util::AbstractConfiguration & config) { if (key.starts_with("node")) { - const auto & node = root_nodes.emplace_back(parseNode(setup_key + "." + key, config)); + auto node_key = setup_key + "." + key; + auto parsed_root_node = parseNode(node_key, config); + const auto node = root_nodes.emplace_back(parsed_root_node); + + if (config.has(node_key + ".repeat")) + { + if (!node->name.isRandom()) + throw DB::Exception(DB::ErrorCodes::BAD_ARGUMENTS, "Repeating node creation for key {}, but name is not randomly generated", node_key); + + auto repeat_count = config.getUInt64(node_key + ".repeat"); + node->repeat_count = repeat_count; + for (size_t i = 1; i < repeat_count; ++i) + root_nodes.emplace_back(node->clone()); + } + + std::cerr << "Tree to create:" << std::endl; - std::cout << "Tree to create:" << std::endl; node->dumpTree(); - std::cout << std::endl; + std::cerr << std::endl; } } - std::cout << "---- Done parsing data setup ----\n" << std::endl; + std::cerr << "---- Done parsing data setup ----\n" << std::endl; - std::cout << "---- Collecting request generators ----" << std::endl; + std::cerr << "---- Collecting request generators ----" << std::endl; static const std::string requests_key = generator_key + ".requests"; request_getter = RequestGetter::fromConfig(requests_key, config); - std::cout << request_getter.description() << std::endl; - std::cout << "---- Done collecting request generators ----\n" << std::endl; + std::cerr << request_getter.description() << std::endl; + std::cerr << "---- Done collecting request generators ----\n" << std::endl; } std::shared_ptr Generator::parseNode(const std::string & key, const Poco::Util::AbstractConfiguration & config) @@ -654,6 +669,7 @@ std::shared_ptr Generator::parseNode(const std::string & key, c throw DB::Exception(DB::ErrorCodes::BAD_ARGUMENTS, "Repeating node creation for key {}, but name is not randomly generated", node_key_string); auto repeat_count = config.getUInt64(node_key_string + ".repeat"); + child_node->repeat_count = repeat_count; for (size_t i = 1; i < repeat_count; ++i) node->children.push_back(child_node); } @@ -666,10 +682,30 @@ void Generator::Node::dumpTree(int level) const { std::string data_string = data.has_value() ? fmt::format("{}", data->description()) : "no data"; - std::cout << fmt::format("{}name: {}, data: {}", std::string(level, '\t'), name.description(), data_string) << std::endl; - for (const auto & child : children) + std::string repeat_count_string = repeat_count != 0 ? fmt::format(", repeated {} times", repeat_count) : ""; + + std::cerr << fmt::format("{}name: {}, data: {}{}", std::string(level, '\t'), name.description(), data_string, repeat_count_string) << std::endl; + + for (auto it = children.begin(); it != children.end();) + { + const auto & child = *it; child->dumpTree(level + 1); + std::advance(it, child->repeat_count != 0 ? child->repeat_count : 1); + } +} + +std::shared_ptr Generator::Node::clone() const +{ + auto new_node = std::make_shared(); + new_node->name = name; + new_node->data = data; + new_node->repeat_count = repeat_count; + + // don't do deep copy of children because we will do clone only for root nodes + new_node->children = children; + + return new_node; } void Generator::Node::createNode(Coordination::ZooKeeper & zookeeper, const std::string & parent_path, const Coordination::ACLs & acls) const @@ -693,21 +729,21 @@ void Generator::Node::createNode(Coordination::ZooKeeper & zookeeper, const std: void Generator::startup(Coordination::ZooKeeper & zookeeper) { - std::cout << "---- Creating test data ----" << std::endl; + std::cerr << "---- Creating test data ----" << std::endl; for (const auto & node : root_nodes) { auto node_name = node->name.getString(); node->name.setString(node_name); std::string root_path = std::filesystem::path("/") / node_name; - std::cout << "Cleaning up " << root_path << std::endl; + std::cerr << "Cleaning up " << root_path << std::endl; removeRecursive(zookeeper, root_path); node->createNode(zookeeper, "/", default_acls); } - std::cout << "---- Created test data ----\n" << std::endl; + std::cerr << "---- Created test data ----\n" << std::endl; - std::cout << "---- Initializing generators ----" << std::endl; + std::cerr << "---- Initializing generators ----" << std::endl; request_getter.startup(zookeeper); } @@ -719,12 +755,12 @@ Coordination::ZooKeeperRequestPtr Generator::generate() void Generator::cleanup(Coordination::ZooKeeper & zookeeper) { - std::cout << "---- Cleaning up test data ----" << std::endl; + std::cerr << "---- Cleaning up test data ----" << std::endl; for (const auto & node : root_nodes) { auto node_name = node->name.getString(); std::string root_path = std::filesystem::path("/") / node_name; - std::cout << "Cleaning up " << root_path << std::endl; + std::cerr << "Cleaning up " << root_path << std::endl; removeRecursive(zookeeper, root_path); } } diff --git a/utils/keeper-bench/Generator.h b/utils/keeper-bench/Generator.h index 60c4fcb3cc4..5b4c05b2d8b 100644 --- a/utils/keeper-bench/Generator.h +++ b/utils/keeper-bench/Generator.h @@ -180,6 +180,9 @@ private: StringGetter name; std::optional data; std::vector> children; + size_t repeat_count = 0; + + std::shared_ptr clone() const; void createNode(Coordination::ZooKeeper & zookeeper, const std::string & parent_path, const Coordination::ACLs & acls) const; void dumpTree(int level = 0) const; diff --git a/utils/keeper-bench/Runner.cpp b/utils/keeper-bench/Runner.cpp index 2b645ddc6c3..4250263f043 100644 --- a/utils/keeper-bench/Runner.cpp +++ b/utils/keeper-bench/Runner.cpp @@ -5,10 +5,18 @@ #include "Common/ZooKeeper/ZooKeeperConstants.h" #include #include +#include "IO/WriteBufferFromFile.h" + +namespace CurrentMetrics +{ + extern const Metric LocalThread; + extern const Metric LocalThreadActive; +} namespace DB::ErrorCodes { extern const int CANNOT_BLOCK_SIGNAL; + extern const int BAD_ARGUMENTS; } Runner::Runner( @@ -40,41 +48,41 @@ Runner::Runner( parseHostsFromConfig(*config); } - std::cout << "---- Run options ---- " << std::endl; + std::cerr << "---- Run options ---- " << std::endl; static constexpr uint64_t DEFAULT_CONCURRENCY = 1; if (concurrency_) concurrency = *concurrency_; else concurrency = config->getUInt64("concurrency", DEFAULT_CONCURRENCY); - std::cout << "Concurrency: " << concurrency << std::endl; + std::cerr << "Concurrency: " << concurrency << std::endl; static constexpr uint64_t DEFAULT_ITERATIONS = 0; if (max_iterations_) max_iterations = *max_iterations_; else max_iterations = config->getUInt64("iterations", DEFAULT_ITERATIONS); - std::cout << "Iterations: " << max_iterations << std::endl; + std::cerr << "Iterations: " << max_iterations << std::endl; static constexpr double DEFAULT_DELAY = 1.0; if (delay_) delay = *delay_; else delay = config->getDouble("report_delay", DEFAULT_DELAY); - std::cout << "Report delay: " << delay << std::endl; + std::cerr << "Report delay: " << delay << std::endl; - static constexpr double DEFAULT_TIME_LIMIT = 1.0; + static constexpr double DEFAULT_TIME_LIMIT = 0.0; if (max_time_) max_time = *max_time_; else max_time = config->getDouble("timelimit", DEFAULT_TIME_LIMIT); - std::cout << "Time limit: " << max_time << std::endl; + std::cerr << "Time limit: " << max_time << std::endl; if (continue_on_error_) continue_on_error = *continue_on_error_; else continue_on_error = config->getBool("continue_on_error", false); - std::cout << "Continue on error: " << continue_on_error << std::endl; - std::cout << "---- Run options ----\n" << std::endl; + std::cerr << "Continue on error: " << continue_on_error << std::endl; + std::cerr << "---- Run options ----\n" << std::endl; pool.emplace(CurrentMetrics::LocalThread, CurrentMetrics::LocalThreadActive, concurrency); queue.emplace(concurrency); @@ -173,7 +181,7 @@ void Runner::thread(std::vector> zookee else if (response.error == Coordination::Error::ZNONODE) { /// remove can fail with ZNONODE because of different order of execution - /// of generated create and remove requests + /// of generated create and remove requests /// this is okay for concurrent runs if (dynamic_cast(&response)) set_exception = false; @@ -203,14 +211,14 @@ void Runner::thread(std::vector> zookee try { auto response_size = future.get(); - double seconds = watch.elapsedSeconds(); + auto microseconds = watch.elapsedMicroseconds(); std::lock_guard lock(mutex); if (request->isReadRequest()) - info->addRead(seconds, 1, request->bytesSize() + response_size); + info->addRead(microseconds, 1, request->bytesSize() + response_size); else - info->addWrite(seconds, 1, request->bytesSize() + response_size); + info->addWrite(microseconds, 1, request->bytesSize() + response_size); } catch (...) { @@ -268,7 +276,7 @@ bool Runner::tryPushRequestInteractively(Coordination::ZooKeeperRequestPtr && re //if (i % 10000 == 0) //{ // for (const auto & [op_num, count] : counts) - // std::cout << fmt::format("{}: {}", op_num, count) << std::endl; + // std::cerr << fmt::format("{}: {}", op_num, count) << std::endl; //} bool inserted = false; @@ -285,13 +293,13 @@ bool Runner::tryPushRequestInteractively(Coordination::ZooKeeperRequestPtr && re if (max_time > 0 && total_watch.elapsedSeconds() >= max_time) { - std::cout << "Stopping launch of queries. Requested time limit is exhausted.\n"; + std::cerr << "Stopping launch of queries. Requested time limit is exhausted.\n"; return false; } if (interrupt_listener.check()) { - std::cout << "Stopping launch of queries. SIGINT received." << std::endl; + std::cerr << "Stopping launch of queries. SIGINT received." << std::endl; return false; } @@ -300,7 +308,7 @@ bool Runner::tryPushRequestInteractively(Coordination::ZooKeeperRequestPtr && re printNumberOfRequestsExecuted(requests_executed); std::lock_guard lock(mutex); - report(info, concurrency); + info->report(concurrency); delay_watch.restart(); } } @@ -350,18 +358,21 @@ void Runner::runBenchmark() printNumberOfRequestsExecuted(requests_executed); std::lock_guard lock(mutex); - report(info, concurrency); + info->report(concurrency); + + DB::WriteBufferFromFile out("result.json"); + info->writeJSON(out, concurrency); } void Runner::createConnections() { DB::EventNotifier::init(); - std::cout << "---- Creating connections ---- " << std::endl; + std::cerr << "---- Creating connections ---- " << std::endl; for (size_t connection_info_idx = 0; connection_info_idx < connection_infos.size(); ++connection_info_idx) { const auto & connection_info = connection_infos[connection_info_idx]; - std::cout << fmt::format("Creating {} session(s) for:\n" + std::cerr << fmt::format("Creating {} session(s) for:\n" "- host: {}\n" "- secure: {}\n" "- session timeout: {}ms\n" @@ -380,7 +391,7 @@ void Runner::createConnections() connections_to_info_map[connections.size() - 1] = connection_info_idx; } } - std::cout << "---- Done creating connections ----\n" << std::endl; + std::cerr << "---- Done creating connections ----\n" << std::endl; } std::shared_ptr Runner::getConnection(const ConnectionInfo & connection_info) diff --git a/utils/keeper-bench/Runner.h b/utils/keeper-bench/Runner.h index d85dc9e8658..7f41958a45a 100644 --- a/utils/keeper-bench/Runner.h +++ b/utils/keeper-bench/Runner.h @@ -19,17 +19,6 @@ using Ports = std::vector; using Strings = std::vector; -namespace CurrentMetrics -{ - extern const Metric LocalThread; - extern const Metric LocalThreadActive; -} - -namespace DB::ErrorCodes -{ - extern const int BAD_ARGUMENTS; -} - class Runner { public: diff --git a/utils/keeper-bench/Stats.cpp b/utils/keeper-bench/Stats.cpp index 1f8b02ed09d..21de72f6047 100644 --- a/utils/keeper-bench/Stats.cpp +++ b/utils/keeper-bench/Stats.cpp @@ -1,67 +1,174 @@ #include "Stats.h" #include -void report(std::shared_ptr & info, size_t concurrency) +#include +#include +#include +#include + +void Stats::StatsCollector::add(uint64_t microseconds, size_t requests_inc, size_t bytes_inc) +{ + work_time += microseconds; + requests += requests_inc; + requests_bytes += bytes_inc; + sampler.insert(microseconds); +} + +void Stats::addRead(uint64_t microseconds, size_t requests_inc, size_t bytes_inc) +{ + read_collector.add(microseconds, requests_inc, bytes_inc); +} + +void Stats::addWrite(uint64_t microseconds, size_t requests_inc, size_t bytes_inc) +{ + write_collector.add(microseconds, requests_inc, bytes_inc); +} + +void Stats::StatsCollector::clear() +{ + requests = 0; + work_time = 0; + requests_bytes = 0; + sampler.clear(); +} + +void Stats::clear() +{ + read_collector.clear(); + write_collector.clear(); +} + +std::pair Stats::StatsCollector::getThroughput(size_t concurrency) +{ + assert(requests != 0); + double seconds = work_time / 1'000'000.0 / concurrency; + + return {requests / seconds, requests_bytes / seconds}; +} + +double Stats::StatsCollector::getPercentile(double percent) +{ + return sampler.quantileNearest(percent / 100.0) / 1000.0; +} + +void Stats::report(size_t concurrency) { std::cerr << "\n"; + const auto & read_requests = read_collector.requests; + const auto & write_requests = write_collector.requests; + /// Avoid zeros, nans or exceptions - if (0 == info->read_requests && 0 == info->write_requests) + if (0 == read_requests && 0 == write_requests) return; - double read_seconds = info->read_work_time / concurrency; - double write_seconds = info->write_work_time / concurrency; + auto [read_rps, read_bps] = read_collector.getThroughput(concurrency); + auto [write_rps, write_bps] = write_collector.getThroughput(concurrency); - std::cerr << "read requests " << info->read_requests << ", write requests " << info->write_requests << ", "; - if (info->errors) - { - std::cerr << "errors " << info->errors << ", "; - } - if (0 != info->read_requests) + std::cerr << "read requests " << read_requests << ", write requests " << write_requests << ", "; + if (errors) + std::cerr << "errors " << errors << ", "; + + if (0 != read_requests) { std::cerr - << "Read RPS: " << (info->read_requests / read_seconds) << ", " - << "Read MiB/s: " << (info->requests_read_bytes / read_seconds / 1048576); - if (0 != info->write_requests) + << "Read RPS: " << read_rps << ", " + << "Read MiB/s: " << read_bps / 1048576; + + if (0 != write_requests) std::cerr << ", "; } - if (0 != info->write_requests) + + if (0 != write_requests) { std::cerr - << "Write RPS: " << (info->write_requests / write_seconds) << ", " - << "Write MiB/s: " << (info->requests_write_bytes / write_seconds / 1048576) << ". " + << "Write RPS: " << write_rps << ", " + << "Write MiB/s: " << write_bps / 1048576 << ". " << "\n"; } std::cerr << "\n"; - auto print_percentile = [&](double percent, Stats::Sampler & sampler) + auto print_percentile = [&](double percent, Stats::StatsCollector & collector) { std::cerr << percent << "%\t\t"; - std::cerr << sampler.quantileNearest(percent / 100.0) << " sec.\t"; + std::cerr << collector.getPercentile(percent) << " msec.\t"; std::cerr << "\n"; }; - if (0 != info->read_requests) + const auto print_all_percentiles = [&](auto & collector) + { + for (int percent = 0; percent <= 90; percent += 10) + print_percentile(percent, collector); + + print_percentile(95, collector); + print_percentile(99, collector); + print_percentile(99.9, collector); + print_percentile(99.99, collector); + }; + + if (0 != read_requests) { std::cerr << "Read sampler:\n"; - for (int percent = 0; percent <= 90; percent += 10) - print_percentile(percent, info->read_sampler); - - print_percentile(95, info->read_sampler); - print_percentile(99, info->read_sampler); - print_percentile(99.9, info->read_sampler); - print_percentile(99.99, info->read_sampler); + print_all_percentiles(read_collector); } - if (0 != info->write_requests) + if (0 != write_requests) { std::cerr << "Write sampler:\n"; - for (int percent = 0; percent <= 90; percent += 10) - print_percentile(percent, info->write_sampler); - - print_percentile(95, info->write_sampler); - print_percentile(99, info->write_sampler); - print_percentile(99.9, info->write_sampler); - print_percentile(99.99, info->write_sampler); + print_all_percentiles(write_collector); } } + +void Stats::writeJSON(DB::WriteBuffer & out, size_t concurrency) +{ + using namespace rapidjson; + Document results; + auto & allocator = results.GetAllocator(); + results.SetObject(); + + const auto get_results = [&](auto & collector) + { + Value specific_results(kObjectType); + + auto [rps, bps] = collector.getThroughput(concurrency); + specific_results.AddMember("requests_per_second", Value(rps), allocator); + specific_results.AddMember("bytes_per_second", Value(bps), allocator); + + Value percentiles(kArrayType); + + const auto add_percentile = [&](double percent) + { + Value percentile(kObjectType); + percentile.AddMember("percentile", Value(percent), allocator); + percentile.AddMember("value", Value(collector.getPercentile(percent)), allocator); + + percentiles.PushBack(percentile, allocator); + }; + + for (int percent = 0; percent <= 90; percent += 10) + add_percentile(percent); + + add_percentile(95); + add_percentile(99); + add_percentile(99.9); + add_percentile(99.99); + + specific_results.AddMember("percentiles", percentiles, allocator); + + return specific_results; + }; + + if (read_collector.requests != 0) + results.AddMember("read_results", get_results(read_collector), results.GetAllocator()); + + if (write_collector.requests != 0) + results.AddMember("write_results", get_results(write_collector), results.GetAllocator()); + + StringBuffer strbuf; + strbuf.Clear(); + Writer writer(strbuf); + results.Accept(writer); + + const char * output_string = strbuf.GetString(); + out.write(output_string, strlen(output_string)); +} diff --git a/utils/keeper-bench/Stats.h b/utils/keeper-bench/Stats.h index 1b9a31bb734..7afd8ce4f1a 100644 --- a/utils/keeper-bench/Stats.h +++ b/utils/keeper-bench/Stats.h @@ -5,48 +5,38 @@ #include +#include + struct Stats { - std::atomic read_requests{0}; - std::atomic write_requests{0}; size_t errors = 0; - size_t requests_write_bytes = 0; - size_t requests_read_bytes = 0; - double read_work_time = 0; - double write_work_time = 0; using Sampler = ReservoirSampler; - Sampler read_sampler {1 << 16}; - Sampler write_sampler {1 << 16}; - - void addRead(double seconds, size_t requests_inc, size_t bytes_inc) + struct StatsCollector { - read_work_time += seconds; - read_requests += requests_inc; - requests_read_bytes += bytes_inc; - read_sampler.insert(seconds); - } + std::atomic requests{0}; + uint64_t requests_bytes = 0; + uint64_t work_time = 0; + Sampler sampler; - void addWrite(double seconds, size_t requests_inc, size_t bytes_inc) - { - write_work_time += seconds; - write_requests += requests_inc; - requests_write_bytes += bytes_inc; - write_sampler.insert(seconds); - } + /// requests/second, bytes/second + std::pair getThroughput(size_t concurrency); + double getPercentile(double percent); - void clear() - { - read_requests = 0; - write_requests = 0; - read_work_time = 0; - write_work_time = 0; - requests_read_bytes = 0; - requests_write_bytes = 0; - read_sampler.clear(); - write_sampler.clear(); - } + void add(uint64_t microseconds, size_t requests_inc, size_t bytes_inc); + void clear(); + }; + + StatsCollector read_collector; + StatsCollector write_collector; + + void addRead(uint64_t microseconds, size_t requests_inc, size_t bytes_inc); + void addWrite(uint64_t microseconds, size_t requests_inc, size_t bytes_inc); + + void clear(); + + void report(size_t concurrency); + void writeJSON(DB::WriteBuffer & out, size_t concurrency); }; -void report(std::shared_ptr & info, size_t concurrency); From 3dcc7e2f9eb02c57b1b2d728672fe0b39d740fc6 Mon Sep 17 00:00:00 2001 From: Antonio Andelic Date: Mon, 17 Apr 2023 11:25:46 +0000 Subject: [PATCH 11/67] Improve outputs --- utils/keeper-bench/Runner.cpp | 71 +++++++++++++++++++++++---------- utils/keeper-bench/Runner.h | 7 +++- utils/keeper-bench/Stats.cpp | 11 +++-- utils/keeper-bench/Stats.h | 2 +- utils/keeper-bench/example.yaml | 6 +++ 5 files changed, 69 insertions(+), 28 deletions(-) diff --git a/utils/keeper-bench/Runner.cpp b/utils/keeper-bench/Runner.cpp index 4250263f043..bb8ff46a20c 100644 --- a/utils/keeper-bench/Runner.cpp +++ b/utils/keeper-bench/Runner.cpp @@ -5,7 +5,10 @@ #include "Common/ZooKeeper/ZooKeeperConstants.h" #include #include -#include "IO/WriteBufferFromFile.h" +#include "IO/ReadBufferFromString.h" +#include +#include +#include namespace CurrentMetrics { @@ -82,6 +85,25 @@ Runner::Runner( else continue_on_error = config->getBool("continue_on_error", false); std::cerr << "Continue on error: " << continue_on_error << std::endl; + + static const std::string output_key = "output"; + print_to_stdout = config->getBool(output_key + ".stdout", false); + std::cerr << "Printing output to stdout: " << print_to_stdout << std::endl; + + static const std::string output_file_key = output_key + ".file"; + if (config->has(output_file_key)) + { + if (config->has(output_file_key + ".path")) + { + file_output = config->getString(output_file_key + ".path"); + output_file_with_timestamp = config->getBool(output_file_key + ".with_timestamp"); + } + else + file_output = config->getString(output_file_key); + + std::cerr << "Result file path: " << file_output->string() << std::endl; + } + std::cerr << "---- Run options ----\n" << std::endl; pool.emplace(CurrentMetrics::LocalThread, CurrentMetrics::LocalThreadActive, concurrency); @@ -261,24 +283,6 @@ void Runner::thread(std::vector> zookee bool Runner::tryPushRequestInteractively(Coordination::ZooKeeperRequestPtr && request, DB::InterruptListener & interrupt_listener) { - //static std::unordered_map counts; - //static size_t i = 0; - // - //counts[request->getOpNum()]++; - - //if (request->getOpNum() == Coordination::OpNum::Multi) - //{ - // for (const auto & multi_request : dynamic_cast(*request).requests) - // counts[dynamic_cast(*multi_request).getOpNum()]++; - //} - - //++i; - //if (i % 10000 == 0) - //{ - // for (const auto & [op_num, count] : counts) - // std::cerr << fmt::format("{}: {}", op_num, count) << std::endl; - //} - bool inserted = false; while (!inserted) @@ -324,6 +328,9 @@ void Runner::runBenchmark() std::cerr << "Preparing to run\n"; generator->startup(*connections[0]); std::cerr << "Prepared\n"; + + auto start_timestamp_ms = Poco::Timestamp{}.epochMicroseconds() / 1000; + try { for (size_t i = 0; i < concurrency; ++i) @@ -360,8 +367,30 @@ void Runner::runBenchmark() std::lock_guard lock(mutex); info->report(concurrency); - DB::WriteBufferFromFile out("result.json"); - info->writeJSON(out, concurrency); + DB::WriteBufferFromOwnString out; + info->writeJSON(out, concurrency, start_timestamp_ms); + auto output_string = std::move(out.str()); + + if (print_to_stdout) + std::cout << output_string << std::endl; + + if (file_output) + { + auto path = *file_output; + + if (output_file_with_timestamp) + { + auto filename = file_output->filename(); + filename = fmt::format("{}_{}{}", filename.stem().generic_string(), start_timestamp_ms, filename.extension().generic_string()); + path = file_output->parent_path() / filename; + } + + std::cerr << "Storing output to " << path << std::endl; + + DB::WriteBufferFromFile file_output_buffer(path); + DB::ReadBufferFromString read_buffer(output_string); + DB::copyData(read_buffer, file_output_buffer); + } } diff --git a/utils/keeper-bench/Runner.h b/utils/keeper-bench/Runner.h index 7f41958a45a..f899f1d538d 100644 --- a/utils/keeper-bench/Runner.h +++ b/utils/keeper-bench/Runner.h @@ -7,8 +7,6 @@ #include #include #include -#include -#include #include #include @@ -16,6 +14,8 @@ #include #include "Stats.h" +#include + using Ports = std::vector; using Strings = std::vector; @@ -59,6 +59,9 @@ private: std::atomic shutdown = false; std::shared_ptr info; + bool print_to_stdout; + std::optional file_output; + bool output_file_with_timestamp; Stopwatch total_watch; Stopwatch delay_watch; diff --git a/utils/keeper-bench/Stats.cpp b/utils/keeper-bench/Stats.cpp index 21de72f6047..f5e5f84ba14 100644 --- a/utils/keeper-bench/Stats.cpp +++ b/utils/keeper-bench/Stats.cpp @@ -119,17 +119,21 @@ void Stats::report(size_t concurrency) } } -void Stats::writeJSON(DB::WriteBuffer & out, size_t concurrency) +void Stats::writeJSON(DB::WriteBuffer & out, size_t concurrency, int64_t start_timestamp) { using namespace rapidjson; Document results; auto & allocator = results.GetAllocator(); results.SetObject(); + results.AddMember("timestamp", Value(start_timestamp), allocator); + const auto get_results = [&](auto & collector) { Value specific_results(kObjectType); + specific_results.AddMember("total_requests", Value(collector.requests), allocator); + auto [rps, bps] = collector.getThroughput(concurrency); specific_results.AddMember("requests_per_second", Value(rps), allocator); specific_results.AddMember("bytes_per_second", Value(bps), allocator); @@ -139,9 +143,8 @@ void Stats::writeJSON(DB::WriteBuffer & out, size_t concurrency) const auto add_percentile = [&](double percent) { Value percentile(kObjectType); - percentile.AddMember("percentile", Value(percent), allocator); - percentile.AddMember("value", Value(collector.getPercentile(percent)), allocator); - + Value percent_key(fmt::format("{:.2f}", percent).c_str(), allocator); + percentile.AddMember(percent_key, Value(collector.getPercentile(percent)), allocator); percentiles.PushBack(percentile, allocator); }; diff --git a/utils/keeper-bench/Stats.h b/utils/keeper-bench/Stats.h index 7afd8ce4f1a..bc50588e837 100644 --- a/utils/keeper-bench/Stats.h +++ b/utils/keeper-bench/Stats.h @@ -36,7 +36,7 @@ struct Stats void clear(); void report(size_t concurrency); - void writeJSON(DB::WriteBuffer & out, size_t concurrency); + void writeJSON(DB::WriteBuffer & out, size_t concurrency, int64_t start_timestamp); }; diff --git a/utils/keeper-bench/example.yaml b/utils/keeper-bench/example.yaml index 2febb881634..e800e923482 100644 --- a/utils/keeper-bench/example.yaml +++ b/utils/keeper-bench/example.yaml @@ -109,3 +109,9 @@ generator: - "/test4" path: children_of: "/test" + +output: + file: + path: "output.json" + with_timestamp: true + stdout: true From ef5d8b4f37f1e26154aaefe5a2fe63a60586b230 Mon Sep 17 00:00:00 2001 From: Antonio Andelic Date: Mon, 17 Apr 2023 11:30:59 +0000 Subject: [PATCH 12/67] Add readme --- utils/keeper-bench/README.md | 1 + 1 file changed, 1 insertion(+) create mode 100644 utils/keeper-bench/README.md diff --git a/utils/keeper-bench/README.md b/utils/keeper-bench/README.md new file mode 100644 index 00000000000..0c6d01a0418 --- /dev/null +++ b/utils/keeper-bench/README.md @@ -0,0 +1 @@ +Keeper Bench From 06087b3bf0a0d6c3b3c5e4e8363ca72746be0198 Mon Sep 17 00:00:00 2001 From: Antonio Andelic Date: Mon, 17 Apr 2023 14:09:54 +0000 Subject: [PATCH 13/67] update README --- utils/keeper-bench/README.md | 318 +++++++++++++++++++++++++++++++++- utils/keeper-bench/Runner.cpp | 2 +- 2 files changed, 318 insertions(+), 2 deletions(-) diff --git a/utils/keeper-bench/README.md b/utils/keeper-bench/README.md index 0c6d01a0418..8b498228799 100644 --- a/utils/keeper-bench/README.md +++ b/utils/keeper-bench/README.md @@ -1 +1,317 @@ -Keeper Bench +# Keeper Bench + +Keeper Bench is a tool for benchmarking Keeper or any ZooKeeper compatible systems. + +To run it call following command from the build folder: + +``` +./utils/keeper-bench --config benchmark_config_file.yaml +``` + +## Configuration file + +Keeper Bench runs need to be configured inside a yaml or XML file. +An example of a configuration file can be found in `./utils/keeper-bench/example.yaml` + +### Table of contents +- [Special Types](#special-types) +- [General settings](#general-settings) +- [Connections](#connections) +- [Generator](#generator) +- [Output](#output) + + +## Special types + +### IntegerGetter + +Can be defined with constant integer or as a random value from a range. + +```yaml +key: integer +key: + min_value: integer + max_value: integer +``` + +Example for a constant value: + +```yaml +some_key: 2 +``` + +Example for random value from [10, 20]: + +```yaml +some_key: + min_value: 10 + max_value: 20 +``` + +### StringGetter + +Can be defined with constant string or as a random string of some size. + +```yaml +key: string +key: + random_string: + size: IntegerGetter +``` + +Example for a constant value: +```yaml +some_key: "string" +``` + +Example for a random string with a random size from [10, 20]: +```yaml +some_key: + random_string: + size: + min_value: 10 + max_value: 20 +``` + + +### PathGetter + +If a section contains one or more `path` keys, all `path` keys are collected into a list. \ +Additionally, paths can be defined with key `children_of` which will add all children of some path to the list. + +```yaml +path: string +path: + children_of: string +``` + +Example for defining list of paths (`/path1`, `/path2` and children of `/path3`): + +```yaml +main: + path: + - "/path1" + - "/path2" + path: + children_of: "/path3" +``` + + +## General settings + +```yaml +# number of parallel queries (default: 1) +concurrency: integer + +# amount of queries to be executed, set 0 to disable limit (default: 0) +iterations: integer + +# delay between intermediate reports in seconds, set 0 to disable reports (default: 1.0) +report_delay: double + +# stop launch of queries after specified time limit, set 0 to disable limit (default: 0) +timelimit: double + +# continue testing even if a query fails (default: false) +continue_on_errors: boolean +``` + + +## Connections + +Connection definitions that will be used throughout tests defined under `connections` key. + +Following configurations can be defined under `connections` key or for each specific connection. \ +If it's defined under `connections` key, it will be used by default unless a specific connection overrides it. + +```yaml +secure: boolean +operation_timeout_ms: integer +session_timeout_ms: integer +connection_timeout_ms: integer +``` + +Specific configuration can be defined with a string or with a detailed description. + +```yaml +host: string +connection: + host: string + + # number of sessions to create for host + sessions: integer + # any connection configuration defined above +``` + +Example definition of 3 connections in total, 1 to `localhost:9181` and 2 to `localhost:9182` both will use secure connections: + +```yaml +connections: + secure: true + + host: "localhost:9181" + connection: + host: "localhost:9182" + sessions: 2 +``` + + +## Generator + +Main part of the benchmark is the generator itself which creates necessary nodes and defines how the requests will be generated. \ +It is defined under `generator` key. + +### Setup + +Setup defines nodes that are needed for test, defined under `setup` key. + +Each node is defined with a `node` key in the following format: + +```yaml +node: StringGetter + +node: + name: StringGetter + data: StringGetter + repeat: integer + node: Node +``` + +If only string is defined, a node with that name will be created. \ +Otherwise more detailed definition could be included to set data or the children of the node. \ +If `repeat` key is set, the node definition will be used multiple times. For a `repeat` key to be valid, the name of the node needs to be a random string. + +Example for a setup: + +```yaml +generator: + setup: + node: "node1" + node: + name: + random_string: + size: 20 + data: "somedata" + repeat: 4 + node: + name: + random_string: + size: 10 + repeat: 2 +``` + +We will create node `/node1` with no data and 4 children of random name of size 20 and data set to `somedata`. \ +We will also create 2 nodes with no data and random name of size 10 under `/` node. + +### Requests + +While benchmark is running, we are generating requests. + +Request generator is defined under `requests` key. \ +For each request `weight` (default: 1) can be defined which defines preference for a certain request. + +#### `create` + +```yaml +create: + # parent path for created nodes + path: string + + # length of the name for the create node (default: 5) + name_length: IntegerGetter + + # data for create nodes (default: "") + data: StringGetter + + # value in range [0.0, 1.0> denoting how often a remove request should be generated compared to create request (default: 0) + remove_factor: double +``` + +#### `set` + +```yaml +set: + # paths on which we randomly set data + path: PathGetter + + # data to set + data: StringGetter +``` + +#### `get` + +```yaml +get: + # paths for which we randomly get data + path: PathGetter +``` + +#### `list` + +```yaml +list: + # paths for which we randomly do list request + path: PathGetter +``` + +#### `multi` + +```yaml +multi: + # any request definition defined above can be added + + # optional size for the multi request + size: IntegerGetter +``` + +Multi request definition can contain any other request generator definitions described above. \ +If `size` key is defined, we will randomly pick `size` amount of requests from defined request generators. \ +All request generators can have a higher pick probability by using `weight` key. \ +If `size` is not defined, multi request with same request generators will always be generated. \ +Both write and read multi requests are supported. + +#### Example + +```yaml +generator: + requests: + create: + path: "/test_create" + name_length: + min_value: 10 + max_value: 20 + multi: + weight: 20 + size: 10 + get: + path: + children_of: "/test_get1" + get: + weight: 2 + path: + children_of: "/test_get2" +``` + +We defined a request geneator that will generate either a `create` or a `multi` request. \ +Each `create` request will create a node under `/test_create` with a randomly generated name with size from range `[10, 20]`. \ +`multi` request will be generated 20 times more than `create` request. \ +`multi` request will contain 10 requests and approximately twice as much get requests to children of "/test_get2". + + +## Output + +```yaml +output: + # if defined, JSON output of results will be stored at the defined path + file: string + # or + file: + # if defined, JSON output of results will be stored at the defined path + path: string + + # if set to true, timestamp will be appended to the output file name (default: false) + with_timestamp: boolean + + # if set to true, output will be printed to stdout also (default: false) + stdout: boolean +``` diff --git a/utils/keeper-bench/Runner.cpp b/utils/keeper-bench/Runner.cpp index bb8ff46a20c..36ffae40ce4 100644 --- a/utils/keeper-bench/Runner.cpp +++ b/utils/keeper-bench/Runner.cpp @@ -329,7 +329,7 @@ void Runner::runBenchmark() generator->startup(*connections[0]); std::cerr << "Prepared\n"; - auto start_timestamp_ms = Poco::Timestamp{}.epochMicroseconds() / 1000; + auto start_timestamp_ms = Poco::Timestamp().epochMicroseconds() / 1000; try { From 10cfc146cb2c5a9ea623bc62022ca2b4476fbe0f Mon Sep 17 00:00:00 2001 From: Antonio Andelic Date: Mon, 17 Apr 2023 15:09:35 +0000 Subject: [PATCH 14/67] Fix spaces --- utils/keeper-bench/Runner.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/utils/keeper-bench/Runner.cpp b/utils/keeper-bench/Runner.cpp index 36ffae40ce4..f86d2b44dd7 100644 --- a/utils/keeper-bench/Runner.cpp +++ b/utils/keeper-bench/Runner.cpp @@ -329,7 +329,7 @@ void Runner::runBenchmark() generator->startup(*connections[0]); std::cerr << "Prepared\n"; - auto start_timestamp_ms = Poco::Timestamp().epochMicroseconds() / 1000; + auto start_timestamp_ms = Poco::Timestamp().epochMicroseconds() / 1000; try { From d14cc1691cb0e9efc573856e31423a704b42f6a8 Mon Sep 17 00:00:00 2001 From: Smita Kulkarni Date: Mon, 17 Apr 2023 18:53:26 +0200 Subject: [PATCH 15/67] =?UTF-8?q?Added=20an=20option=20=E2=80=98force?= =?UTF-8?q?=E2=80=99=20to=20clearOldTemporaryDirectories,=20which=20is=20c?= =?UTF-8?q?urrently=20used=20by=20dropAllData=20to=20remove=20blobs=20when?= =?UTF-8?q?=20zero=20copy=20replication=20is=20enabled.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/Storages/MergeTree/MergeTreeData.cpp | 6 +++--- src/Storages/MergeTree/MergeTreeData.h | 3 ++- .../queries/0_stateless/02432_s3_parallel_parts_cleanup.sql | 2 +- 3 files changed, 6 insertions(+), 5 deletions(-) diff --git a/src/Storages/MergeTree/MergeTreeData.cpp b/src/Storages/MergeTree/MergeTreeData.cpp index 45759c449f6..5c189887e23 100644 --- a/src/Storages/MergeTree/MergeTreeData.cpp +++ b/src/Storages/MergeTree/MergeTreeData.cpp @@ -1960,7 +1960,7 @@ static bool isOldPartDirectory(const DiskPtr & disk, const String & directory_pa } -size_t MergeTreeData::clearOldTemporaryDirectories(size_t custom_directories_lifetime_seconds, const NameSet & valid_prefixes) +size_t MergeTreeData::clearOldTemporaryDirectories(size_t custom_directories_lifetime_seconds, const NameSet & valid_prefixes, const bool & force) { /// If the method is already called from another thread, then we don't need to do anything. std::unique_lock lock(clear_old_temporary_directories_mutex, std::defer_lock); @@ -2018,7 +2018,7 @@ size_t MergeTreeData::clearOldTemporaryDirectories(size_t custom_directories_lif /// We don't control the amount of refs for temporary parts so we cannot decide can we remove blobs /// or not. So we are not doing it bool keep_shared = false; - if (disk->supportZeroCopyReplication() && settings->allow_remote_fs_zero_copy_replication) + if (disk->supportZeroCopyReplication() && settings->allow_remote_fs_zero_copy_replication && !force) { LOG_WARNING(log, "Since zero-copy replication is enabled we are not going to remove blobs from shared storage for {}", full_path); keep_shared = true; @@ -2724,7 +2724,7 @@ void MergeTreeData::dropAllData() } LOG_INFO(log, "dropAllData: clearing temporary directories"); - clearOldTemporaryDirectories(0, {"tmp_", "delete_tmp_", "tmp-fetch_"}); + clearOldTemporaryDirectories(0, {"tmp_", "delete_tmp_", "tmp-fetch_"}, /* force */ true); column_sizes.clear(); diff --git a/src/Storages/MergeTree/MergeTreeData.h b/src/Storages/MergeTree/MergeTreeData.h index b03b7d4a71e..3053657e37b 100644 --- a/src/Storages/MergeTree/MergeTreeData.h +++ b/src/Storages/MergeTree/MergeTreeData.h @@ -667,7 +667,8 @@ public: /// Delete all directories which names begin with "tmp" /// Must be called with locked lockForShare() because it's using relative_data_path. - size_t clearOldTemporaryDirectories(size_t custom_directories_lifetime_seconds, const NameSet & valid_prefixes = {"tmp_", "tmp-fetch_"}); + /// 'force' is used by dropAllData(), this will remove blobs even if zero copy replication is enabled + size_t clearOldTemporaryDirectories(size_t custom_directories_lifetime_seconds, const NameSet & valid_prefixes = {"tmp_", "tmp-fetch_"}, const bool & force = false); size_t clearEmptyParts(); diff --git a/tests/queries/0_stateless/02432_s3_parallel_parts_cleanup.sql b/tests/queries/0_stateless/02432_s3_parallel_parts_cleanup.sql index 3688a649d5e..0230f30bf05 100644 --- a/tests/queries/0_stateless/02432_s3_parallel_parts_cleanup.sql +++ b/tests/queries/0_stateless/02432_s3_parallel_parts_cleanup.sql @@ -55,7 +55,7 @@ select sleep(3); select count(), sum(n), sum(m) from rmt; select count(), sum(n), sum(m) from rmt2; --- So there will be at least 2 parts (just in case no parts are removed until drop) +-- So there will be at least 2 parts (just in case no parts are removed until drop). insert into rmt(n) values (10); drop table rmt; From fb16623d48da107504a6d0c5d6b52fc525a34424 Mon Sep 17 00:00:00 2001 From: Antonio Andelic Date: Tue, 18 Apr 2023 13:15:23 +0000 Subject: [PATCH 16/67] Add CheckNotExists request to Keeper --- src/Common/ZooKeeper/IKeeper.h | 5 +- src/Common/ZooKeeper/TestKeeper.h | 2 +- src/Common/ZooKeeper/ZooKeeper.cpp | 3 +- src/Common/ZooKeeper/ZooKeeper.h | 18 ++++- src/Common/ZooKeeper/ZooKeeperCommon.cpp | 3 + src/Common/ZooKeeper/ZooKeeperCommon.h | 4 +- src/Common/ZooKeeper/ZooKeeperConstants.cpp | 3 + src/Common/ZooKeeper/ZooKeeperConstants.h | 1 + src/Common/ZooKeeper/ZooKeeperImpl.cpp | 2 +- src/Common/ZooKeeper/ZooKeeperImpl.h | 2 +- .../ZooKeeper/ZooKeeperWithFaultInjection.h | 6 ++ src/Coordination/KeeperConstants.h | 5 +- src/Coordination/KeeperStorage.cpp | 59 +++++++++++---- src/Coordination/tests/gtest_coordination.cpp | 72 +++++++++++++++++++ .../MergeTree/EphemeralLockInZooKeeper.cpp | 18 ++--- src/Storages/StorageReplicatedMergeTree.cpp | 6 +- 16 files changed, 170 insertions(+), 39 deletions(-) diff --git a/src/Common/ZooKeeper/IKeeper.h b/src/Common/ZooKeeper/IKeeper.h index 172714fe04f..b09f096d761 100644 --- a/src/Common/ZooKeeper/IKeeper.h +++ b/src/Common/ZooKeeper/IKeeper.h @@ -319,6 +319,9 @@ struct CheckRequest : virtual Request String path; int32_t version = -1; + /// should it check if a node DOES NOT exist + bool not_exists = false; + void addRootPath(const String & root_path) override; String getPath() const override { return path; } @@ -524,7 +527,7 @@ public: const Requests & requests, MultiCallback callback) = 0; - virtual DB::KeeperApiVersion getApiVersion() = 0; + virtual DB::KeeperApiVersion getApiVersion() const = 0; /// Expire session and finish all pending requests virtual void finalize(const String & reason) = 0; diff --git a/src/Common/ZooKeeper/TestKeeper.h b/src/Common/ZooKeeper/TestKeeper.h index fb4e527e50e..27405d8d571 100644 --- a/src/Common/ZooKeeper/TestKeeper.h +++ b/src/Common/ZooKeeper/TestKeeper.h @@ -91,7 +91,7 @@ public: void finalize(const String & reason) override; - DB::KeeperApiVersion getApiVersion() override + DB::KeeperApiVersion getApiVersion() const override { return KeeperApiVersion::ZOOKEEPER_COMPATIBLE; } diff --git a/src/Common/ZooKeeper/ZooKeeper.cpp b/src/Common/ZooKeeper/ZooKeeper.cpp index a8da0dff0cc..54a2e2dc519 100644 --- a/src/Common/ZooKeeper/ZooKeeper.cpp +++ b/src/Common/ZooKeeper/ZooKeeper.cpp @@ -821,7 +821,7 @@ bool ZooKeeper::expired() return impl->isExpired(); } -DB::KeeperApiVersion ZooKeeper::getApiVersion() +DB::KeeperApiVersion ZooKeeper::getApiVersion() const { return impl->getApiVersion(); } @@ -1282,7 +1282,6 @@ Coordination::RequestPtr makeExistsRequest(const std::string & path) return request; } - std::string normalizeZooKeeperPath(std::string zookeeper_path, bool check_starts_with_slash, Poco::Logger * log) { if (!zookeeper_path.empty() && zookeeper_path.back() == '/') diff --git a/src/Common/ZooKeeper/ZooKeeper.h b/src/Common/ZooKeeper/ZooKeeper.h index 8e7639b8cc1..b31dbc8da49 100644 --- a/src/Common/ZooKeeper/ZooKeeper.h +++ b/src/Common/ZooKeeper/ZooKeeper.h @@ -215,7 +215,7 @@ public: /// Returns true, if the session has expired. bool expired(); - DB::KeeperApiVersion getApiVersion(); + DB::KeeperApiVersion getApiVersion() const; /// Create a znode. /// Throw an exception if something went wrong. @@ -674,4 +674,20 @@ bool hasZooKeeperConfig(const Poco::Util::AbstractConfiguration & config); String getZooKeeperConfigName(const Poco::Util::AbstractConfiguration & config); +template +void addCheckNotExistsRequest(Coordination::Requests requests, const Client & client, const std::string & path) +{ + if (client.getApiVersion() >= DB::KeeperApiVersion::WITH_CHECK_NOT_EXISTS) + { + auto request = std::make_shared(); + request->path = path; + request->not_exists = true; + requests.push_back(std::move(request)); + return; + } + + requests.push_back(makeCreateRequest(path, "", zkutil::CreateMode::Persistent)); + requests.push_back(makeRemoveRequest(path, -1)); +} + } diff --git a/src/Common/ZooKeeper/ZooKeeperCommon.cpp b/src/Common/ZooKeeper/ZooKeeperCommon.cpp index 1ee56936889..03bfafac0c2 100644 --- a/src/Common/ZooKeeper/ZooKeeperCommon.cpp +++ b/src/Common/ZooKeeper/ZooKeeperCommon.cpp @@ -931,6 +931,8 @@ void registerZooKeeperRequest(ZooKeeperRequestFactory & factory) res->operation_type = ZooKeeperMultiRequest::OperationType::Read; else if constexpr (num == OpNum::Multi) res->operation_type = ZooKeeperMultiRequest::OperationType::Write; + else if constexpr (num == OpNum::CheckNotExists) + res->not_exists = true; return res; }); @@ -956,6 +958,7 @@ ZooKeeperRequestFactory::ZooKeeperRequestFactory() registerZooKeeperRequest(*this); registerZooKeeperRequest(*this); registerZooKeeperRequest(*this); + registerZooKeeperRequest(*this); } PathMatchResult matchPath(std::string_view path, std::string_view match_to) diff --git a/src/Common/ZooKeeper/ZooKeeperCommon.h b/src/Common/ZooKeeper/ZooKeeperCommon.h index 1755ebd8ccc..fccccfd2058 100644 --- a/src/Common/ZooKeeper/ZooKeeperCommon.h +++ b/src/Common/ZooKeeper/ZooKeeperCommon.h @@ -390,12 +390,12 @@ struct ZooKeeperSimpleListResponse final : ZooKeeperListResponse size_t bytesSize() const override { return ZooKeeperListResponse::bytesSize() - sizeof(stat); } }; -struct ZooKeeperCheckRequest final : CheckRequest, ZooKeeperRequest +struct ZooKeeperCheckRequest : CheckRequest, ZooKeeperRequest { ZooKeeperCheckRequest() = default; explicit ZooKeeperCheckRequest(const CheckRequest & base) : CheckRequest(base) {} - OpNum getOpNum() const override { return OpNum::Check; } + OpNum getOpNum() const override { return not_exists ? OpNum::CheckNotExists : OpNum::Check; } void writeImpl(WriteBuffer & out) const override; void readImpl(ReadBuffer & in) override; std::string toStringImpl() const override; diff --git a/src/Common/ZooKeeper/ZooKeeperConstants.cpp b/src/Common/ZooKeeper/ZooKeeperConstants.cpp index c2e4c0f5cbd..86f70ea547a 100644 --- a/src/Common/ZooKeeper/ZooKeeperConstants.cpp +++ b/src/Common/ZooKeeper/ZooKeeperConstants.cpp @@ -26,6 +26,7 @@ static const std::unordered_set VALID_OPERATIONS = static_cast(OpNum::SetACL), static_cast(OpNum::GetACL), static_cast(OpNum::FilteredList), + static_cast(OpNum::CheckNotExists), }; std::string toString(OpNum op_num) @@ -70,6 +71,8 @@ std::string toString(OpNum op_num) return "GetACL"; case OpNum::FilteredList: return "FilteredList"; + case OpNum::CheckNotExists: + return "CheckNotExists"; } int32_t raw_op = static_cast(op_num); throw Exception("Operation " + std::to_string(raw_op) + " is unknown", Error::ZUNIMPLEMENTED); diff --git a/src/Common/ZooKeeper/ZooKeeperConstants.h b/src/Common/ZooKeeper/ZooKeeperConstants.h index 912e253718b..6b50c5c5d09 100644 --- a/src/Common/ZooKeeper/ZooKeeperConstants.h +++ b/src/Common/ZooKeeper/ZooKeeperConstants.h @@ -36,6 +36,7 @@ enum class OpNum : int32_t // CH Keeper specific operations FilteredList = 500, + CheckNotExists = 501, SessionID = 997, /// Special internal request }; diff --git a/src/Common/ZooKeeper/ZooKeeperImpl.cpp b/src/Common/ZooKeeper/ZooKeeperImpl.cpp index f97bf292198..6c79fc4f178 100644 --- a/src/Common/ZooKeeper/ZooKeeperImpl.cpp +++ b/src/Common/ZooKeeper/ZooKeeperImpl.cpp @@ -1085,7 +1085,7 @@ void ZooKeeper::pushRequest(RequestInfo && info) ProfileEvents::increment(ProfileEvents::ZooKeeperTransactions); } -KeeperApiVersion ZooKeeper::getApiVersion() +KeeperApiVersion ZooKeeper::getApiVersion() const { return keeper_api_version; } diff --git a/src/Common/ZooKeeper/ZooKeeperImpl.h b/src/Common/ZooKeeper/ZooKeeperImpl.h index 9fff12309bd..c0c57d3f719 100644 --- a/src/Common/ZooKeeper/ZooKeeperImpl.h +++ b/src/Common/ZooKeeper/ZooKeeperImpl.h @@ -179,7 +179,7 @@ public: const Requests & requests, MultiCallback callback) override; - DB::KeeperApiVersion getApiVersion() override; + DB::KeeperApiVersion getApiVersion() const override; /// Without forcefully invalidating (finalizing) ZooKeeper session before /// establishing a new one, there was a possibility that server is using diff --git a/src/Common/ZooKeeper/ZooKeeperWithFaultInjection.h b/src/Common/ZooKeeper/ZooKeeperWithFaultInjection.h index 130590ceb40..214a2eb944a 100644 --- a/src/Common/ZooKeeper/ZooKeeperWithFaultInjection.h +++ b/src/Common/ZooKeeper/ZooKeeperWithFaultInjection.h @@ -6,6 +6,7 @@ #include #include #include +#include "Coordination/KeeperConstants.h" namespace DB { @@ -381,6 +382,11 @@ public: ephemeral_nodes.clear(); } + KeeperApiVersion getApiVersion() const + { + return keeper->getApiVersion(); + } + private: void faultInjectionBefore(std::function fault_cleanup) { diff --git a/src/Coordination/KeeperConstants.h b/src/Coordination/KeeperConstants.h index 952689af01f..4b5a5b54be0 100644 --- a/src/Coordination/KeeperConstants.h +++ b/src/Coordination/KeeperConstants.h @@ -9,10 +9,11 @@ enum class KeeperApiVersion : uint8_t { ZOOKEEPER_COMPATIBLE = 0, WITH_FILTERED_LIST, - WITH_MULTI_READ + WITH_MULTI_READ, + WITH_CHECK_NOT_EXISTS, }; -inline constexpr auto current_keeper_api_version = KeeperApiVersion::WITH_MULTI_READ; +inline constexpr auto current_keeper_api_version = KeeperApiVersion::WITH_CHECK_NOT_EXISTS; const std::string keeper_system_path = "/keeper"; const std::string keeper_api_version_path = keeper_system_path + "/api_version"; diff --git a/src/Coordination/KeeperStorage.cpp b/src/Coordination/KeeperStorage.cpp index cfc1c2bd12b..28cb4fba9c9 100644 --- a/src/Coordination/KeeperStorage.cpp +++ b/src/Coordination/KeeperStorage.cpp @@ -1449,24 +1449,44 @@ struct KeeperStorageListRequestProcessor final : public KeeperStorageRequestProc struct KeeperStorageCheckRequestProcessor final : public KeeperStorageRequestProcessor { - bool checkAuth(KeeperStorage & storage, int64_t session_id, bool is_local) const override + explicit KeeperStorageCheckRequestProcessor(const Coordination::ZooKeeperRequestPtr & zk_request_) + : KeeperStorageRequestProcessor(zk_request_) { - return storage.checkACL(zk_request->getPath(), Coordination::ACL::Read, session_id, is_local); + check_not_exists = zk_request->getOpNum() == Coordination::OpNum::CheckNotExists; + } + + bool checkAuth(KeeperStorage & storage, int64_t session_id, bool is_local) const override + { + StringRef path; + if (check_not_exists) + path = parentPath(zk_request->getPath()); + else + path = zk_request->getPath(); + + return storage.checkACL(path, Coordination::ACL::Read, session_id, is_local); } - using KeeperStorageRequestProcessor::KeeperStorageRequestProcessor; std::vector preprocess(KeeperStorage & storage, int64_t zxid, int64_t /*session_id*/, int64_t /*time*/, uint64_t & /*digest*/, const KeeperContext & /*keeper_context*/) const override { ProfileEvents::increment(ProfileEvents::KeeperCheckRequest); + Coordination::ZooKeeperCheckRequest & request = dynamic_cast(*zk_request); - if (!storage.uncommitted_state.getNode(request.path)) - return {KeeperStorage::Delta{zxid, Coordination::Error::ZNONODE}}; - auto node = storage.uncommitted_state.getNode(request.path); - if (request.version != -1 && request.version != node->stat.version) - return {KeeperStorage::Delta{zxid, Coordination::Error::ZBADVERSION}}; + if (check_not_exists) + { + if (node && (request.version == -1 || request.version == node->stat.version)) + return {KeeperStorage::Delta{zxid, Coordination::Error::ZNODEEXISTS}}; + } + else + { + if (!node) + return {KeeperStorage::Delta{zxid, Coordination::Error::ZNONODE}}; + + if (request.version != -1 && request.version != node->stat.version) + return {KeeperStorage::Delta{zxid, Coordination::Error::ZBADVERSION}}; + } return {}; } @@ -1497,17 +1517,22 @@ struct KeeperStorageCheckRequestProcessor final : public KeeperStorageRequestPro auto & container = storage.container; auto node_it = container.find(request.path); - if (node_it == container.end()) + + if (check_not_exists) { - on_error(Coordination::Error::ZNONODE); - } - else if (request.version != -1 && request.version != node_it->value.stat.version) - { - on_error(Coordination::Error::ZBADVERSION); + if (node_it != container.end() && (request.version == -1 || request.version == node_it->value.stat.version)) + on_error(Coordination::Error::ZNODEEXISTS); + else + response.error = Coordination::Error::ZOK; } else { - response.error = Coordination::Error::ZOK; + if (node_it == container.end()) + on_error(Coordination::Error::ZNONODE); + else if (request.version != -1 && request.version != node_it->value.stat.version) + on_error(Coordination::Error::ZBADVERSION); + else + response.error = Coordination::Error::ZOK; } return response_ptr; @@ -1523,6 +1548,9 @@ struct KeeperStorageCheckRequestProcessor final : public KeeperStorageRequestPro ProfileEvents::increment(ProfileEvents::KeeperCheckRequest); return processImpl(storage, zxid); } + +private: + bool check_not_exists; }; @@ -1971,6 +1999,7 @@ KeeperStorageRequestProcessorsFactory::KeeperStorageRequestProcessorsFactory() registerKeeperRequestProcessor(*this); registerKeeperRequestProcessor(*this); registerKeeperRequestProcessor(*this); + registerKeeperRequestProcessor(*this); } diff --git a/src/Coordination/tests/gtest_coordination.cpp b/src/Coordination/tests/gtest_coordination.cpp index b1bea8ddf24..62217fb2dd3 100644 --- a/src/Coordination/tests/gtest_coordination.cpp +++ b/src/Coordination/tests/gtest_coordination.cpp @@ -2451,6 +2451,78 @@ TEST_P(CoordinationTest, ChangelogTestMaxLogSize) } +TEST_P(CoordinationTest, TestCheckNotExistsRequest) +{ + using namespace DB; + using namespace Coordination; + + KeeperStorage storage{500, "", keeper_context}; + + int32_t zxid = 0; + + const auto create_path = [&](const auto & path) + { + const auto create_request = std::make_shared(); + int new_zxid = ++zxid; + create_request->path = path; + storage.preprocessRequest(create_request, 1, 0, new_zxid); + auto responses = storage.processRequest(create_request, 1, new_zxid); + + EXPECT_GE(responses.size(), 1); + EXPECT_EQ(responses[0].response->error, Coordination::Error::ZOK) << "Failed to create " << path; + }; + + const auto check_request = std::make_shared(); + check_request->path = "/test_node"; + check_request->not_exists = true; + + { + SCOPED_TRACE("CheckNotExists returns ZOK"); + int new_zxid = ++zxid; + storage.preprocessRequest(check_request, 1, 0, new_zxid); + auto responses = storage.processRequest(check_request, 1, new_zxid); + EXPECT_GE(responses.size(), 1); + auto error = responses[0].response->error; + EXPECT_EQ(error, Coordination::Error::ZOK) << "CheckNotExists returned invalid result: " << errorMessage(error); + } + + create_path("/test_node"); + auto node_it = storage.container.find("/test_node"); + ASSERT_NE(node_it, storage.container.end()); + auto node_version = node_it->value.stat.version; + + { + SCOPED_TRACE("CheckNotExists returns ZNODEEXISTS"); + int new_zxid = ++zxid; + storage.preprocessRequest(check_request, 1, 0, new_zxid); + auto responses = storage.processRequest(check_request, 1, new_zxid); + EXPECT_GE(responses.size(), 1); + auto error = responses[0].response->error; + EXPECT_EQ(error, Coordination::Error::ZNODEEXISTS) << "CheckNotExists returned invalid result: " << errorMessage(error); + } + + { + SCOPED_TRACE("CheckNotExists returns ZNODEEXISTS for same version"); + int new_zxid = ++zxid; + check_request->version = node_version; + storage.preprocessRequest(check_request, 1, 0, new_zxid); + auto responses = storage.processRequest(check_request, 1, new_zxid); + EXPECT_GE(responses.size(), 1); + auto error = responses[0].response->error; + EXPECT_EQ(error, Coordination::Error::ZNODEEXISTS) << "CheckNotExists returned invalid result: " << errorMessage(error); + } + + { + SCOPED_TRACE("CheckNotExists returns ZOK for different version"); + int new_zxid = ++zxid; + check_request->version = node_version + 1; + storage.preprocessRequest(check_request, 1, 0, new_zxid); + auto responses = storage.processRequest(check_request, 1, new_zxid); + EXPECT_GE(responses.size(), 1); + auto error = responses[0].response->error; + EXPECT_EQ(error, Coordination::Error::ZOK) << "CheckNotExists returned invalid result: " << errorMessage(error); + } +} INSTANTIATE_TEST_SUITE_P(CoordinationTestSuite, CoordinationTest, diff --git a/src/Storages/MergeTree/EphemeralLockInZooKeeper.cpp b/src/Storages/MergeTree/EphemeralLockInZooKeeper.cpp index 996d2bc46a5..5741e11aa22 100644 --- a/src/Storages/MergeTree/EphemeralLockInZooKeeper.cpp +++ b/src/Storages/MergeTree/EphemeralLockInZooKeeper.cpp @@ -24,7 +24,7 @@ template std::optional createEphemeralLockInZooKeeper( const String & path_prefix_, const String & temp_path, const ZooKeeperWithFaultInjectionPtr & zookeeper_, const T & deduplication_path) { - constexpr bool async_insert = std::is_same_v>; + static constexpr bool async_insert = std::is_same_v>; String path; @@ -42,16 +42,15 @@ std::optional createEphemeralLockInZooKeeper( if constexpr (async_insert) { for (const auto & single_dedup_path : deduplication_path) - { - ops.emplace_back(zkutil::makeCreateRequest(single_dedup_path, "", zkutil::CreateMode::Persistent)); - ops.emplace_back(zkutil::makeRemoveRequest(single_dedup_path, -1)); - } + zkutil::addCheckNotExistsRequest(ops, *zookeeper_, single_dedup_path); } else { - ops.emplace_back(zkutil::makeCreateRequest(deduplication_path, "", zkutil::CreateMode::Persistent)); - ops.emplace_back(zkutil::makeRemoveRequest(deduplication_path, -1)); + zkutil::addCheckNotExistsRequest(ops, *zookeeper_, deduplication_path); } + + auto deduplication_path_ops_size = ops.size(); + ops.emplace_back(zkutil::makeCreateRequest(path_prefix_, holder_path, zkutil::CreateMode::EphemeralSequential)); Coordination::Responses responses; Coordination::Error e = zookeeper_->tryMulti(ops, responses); @@ -60,9 +59,10 @@ std::optional createEphemeralLockInZooKeeper( if constexpr (async_insert) { auto failed_idx = zkutil::getFailedOpIndex(Coordination::Error::ZNODEEXISTS, responses); - if (failed_idx < deduplication_path.size() * 2) + + if (failed_idx < deduplication_path_ops_size) { - const String & failed_op_path = deduplication_path[failed_idx / 2]; + const String & failed_op_path = ops[failed_idx]->getPath(); LOG_DEBUG( &Poco::Logger::get("createEphemeralLockInZooKeeper"), "Deduplication path already exists: deduplication_path={}", diff --git a/src/Storages/StorageReplicatedMergeTree.cpp b/src/Storages/StorageReplicatedMergeTree.cpp index 5cd02c33d55..9aa36f18775 100644 --- a/src/Storages/StorageReplicatedMergeTree.cpp +++ b/src/Storages/StorageReplicatedMergeTree.cpp @@ -2467,8 +2467,7 @@ void StorageReplicatedMergeTree::cloneReplica(const String & source_replica, Coo { /// We check that it was not suddenly upgraded to new version. /// Otherwise it can be upgraded and instantly become lost, but we cannot notice that. - ops.push_back(zkutil::makeCreateRequest(fs::path(source_path) / "is_lost", "0", zkutil::CreateMode::Persistent)); - ops.push_back(zkutil::makeRemoveRequest(fs::path(source_path) / "is_lost", -1)); + zkutil::addCheckNotExistsRequest(ops, *zookeeper, fs::path(source_path) / "is_lost"); } else /// The replica we clone should not suddenly become lost. ops.push_back(zkutil::makeCheckRequest(fs::path(source_path) / "is_lost", source_is_lost_stat.version)); @@ -8869,8 +8868,7 @@ bool StorageReplicatedMergeTree::createEmptyPartInsteadOfLost(zkutil::ZooKeeperP /// We must be sure that this part doesn't exist on other replicas if (!zookeeper->exists(current_part_path)) { - ops.emplace_back(zkutil::makeCreateRequest(current_part_path, "", zkutil::CreateMode::Persistent)); - ops.emplace_back(zkutil::makeRemoveRequest(current_part_path, -1)); + zkutil::addCheckNotExistsRequest(ops, *zookeeper, current_part_path); } else { From 58e9b56fcbb54e3ddfecfa64e9ea015ce25f8107 Mon Sep 17 00:00:00 2001 From: Antonio Andelic Date: Wed, 19 Apr 2023 09:06:20 +0000 Subject: [PATCH 17/67] Fix CheckNotExists --- src/Common/ZooKeeper/ZooKeeper.h | 2 +- src/Coordination/KeeperStorage.cpp | 10 +++------- 2 files changed, 4 insertions(+), 8 deletions(-) diff --git a/src/Common/ZooKeeper/ZooKeeper.h b/src/Common/ZooKeeper/ZooKeeper.h index b31dbc8da49..636c9049af0 100644 --- a/src/Common/ZooKeeper/ZooKeeper.h +++ b/src/Common/ZooKeeper/ZooKeeper.h @@ -675,7 +675,7 @@ bool hasZooKeeperConfig(const Poco::Util::AbstractConfiguration & config); String getZooKeeperConfigName(const Poco::Util::AbstractConfiguration & config); template -void addCheckNotExistsRequest(Coordination::Requests requests, const Client & client, const std::string & path) +void addCheckNotExistsRequest(Coordination::Requests & requests, const Client & client, const std::string & path) { if (client.getApiVersion() >= DB::KeeperApiVersion::WITH_CHECK_NOT_EXISTS) { diff --git a/src/Coordination/KeeperStorage.cpp b/src/Coordination/KeeperStorage.cpp index 28cb4fba9c9..a838de07ecb 100644 --- a/src/Coordination/KeeperStorage.cpp +++ b/src/Coordination/KeeperStorage.cpp @@ -1457,13 +1457,8 @@ struct KeeperStorageCheckRequestProcessor final : public KeeperStorageRequestPro bool checkAuth(KeeperStorage & storage, int64_t session_id, bool is_local) const override { - StringRef path; - if (check_not_exists) - path = parentPath(zk_request->getPath()); - else - path = zk_request->getPath(); - - return storage.checkACL(path, Coordination::ACL::Read, session_id, is_local); + auto path = zk_request->getPath(); + return storage.checkACL(check_not_exists ? parentPath(path) : path, Coordination::ACL::Read, session_id, is_local); } std::vector @@ -1744,6 +1739,7 @@ struct KeeperStorageMultiRequestProcessor final : public KeeperStorageRequestPro concrete_requests.push_back(std::make_shared(sub_zk_request)); break; case Coordination::OpNum::Check: + case Coordination::OpNum::CheckNotExists: check_operation_type(OperationType::Write); concrete_requests.push_back(std::make_shared(sub_zk_request)); break; From 3f00d467851db0d41e142e4b4ade31aa2c27d8f2 Mon Sep 17 00:00:00 2001 From: Antonio Andelic Date: Wed, 19 Apr 2023 14:07:38 +0200 Subject: [PATCH 18/67] Update enum for ZooKeeperLog --- src/Interpreters/ZooKeeperLog.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/src/Interpreters/ZooKeeperLog.cpp b/src/Interpreters/ZooKeeperLog.cpp index faa6d1f9f02..48f4d510af7 100644 --- a/src/Interpreters/ZooKeeperLog.cpp +++ b/src/Interpreters/ZooKeeperLog.cpp @@ -87,6 +87,7 @@ NamesAndTypesList ZooKeeperLogElement::getNamesAndTypes() {"Auth", static_cast(Coordination::OpNum::Auth)}, {"SessionID", static_cast(Coordination::OpNum::SessionID)}, {"FilteredList", static_cast(Coordination::OpNum::FilteredList)}, + {"CheckNotExists", static_cast(Coordination::OpNum::CheckNotExists)}, }); auto error_enum = getCoordinationErrorCodesEnumType(); From 31548ab17cb439307e64f67601278e7d9da19a76 Mon Sep 17 00:00:00 2001 From: Antonio Andelic Date: Thu, 20 Apr 2023 12:30:24 +0000 Subject: [PATCH 19/67] Fix result --- src/Common/ZooKeeper/ZooKeeperCommon.cpp | 10 ++++++- src/Common/ZooKeeper/ZooKeeperCommon.h | 8 +++++- .../01158_zookeeper_log_long.reference | 28 ++++++++----------- 3 files changed, 28 insertions(+), 18 deletions(-) diff --git a/src/Common/ZooKeeper/ZooKeeperCommon.cpp b/src/Common/ZooKeeper/ZooKeeperCommon.cpp index 03bfafac0c2..5031af38812 100644 --- a/src/Common/ZooKeeper/ZooKeeperCommon.cpp +++ b/src/Common/ZooKeeper/ZooKeeperCommon.cpp @@ -666,7 +666,15 @@ ZooKeeperResponsePtr ZooKeeperGetRequest::makeResponse() const { return setTime( ZooKeeperResponsePtr ZooKeeperSetRequest::makeResponse() const { return setTime(std::make_shared()); } ZooKeeperResponsePtr ZooKeeperListRequest::makeResponse() const { return setTime(std::make_shared()); } ZooKeeperResponsePtr ZooKeeperSimpleListRequest::makeResponse() const { return setTime(std::make_shared()); } -ZooKeeperResponsePtr ZooKeeperCheckRequest::makeResponse() const { return setTime(std::make_shared()); } + +ZooKeeperResponsePtr ZooKeeperCheckRequest::makeResponse() const +{ + if (not_exists) + return setTime(std::make_shared()); + + return setTime(std::make_shared()); +} + ZooKeeperResponsePtr ZooKeeperMultiRequest::makeResponse() const { std::shared_ptr response; diff --git a/src/Common/ZooKeeper/ZooKeeperCommon.h b/src/Common/ZooKeeper/ZooKeeperCommon.h index fccccfd2058..5f00698423e 100644 --- a/src/Common/ZooKeeper/ZooKeeperCommon.h +++ b/src/Common/ZooKeeper/ZooKeeperCommon.h @@ -408,7 +408,7 @@ struct ZooKeeperCheckRequest : CheckRequest, ZooKeeperRequest void createLogElements(LogElements & elems) const override; }; -struct ZooKeeperCheckResponse final : CheckResponse, ZooKeeperResponse +struct ZooKeeperCheckResponse : CheckResponse, ZooKeeperResponse { void readImpl(ReadBuffer &) override {} void writeImpl(WriteBuffer &) const override {} @@ -417,6 +417,12 @@ struct ZooKeeperCheckResponse final : CheckResponse, ZooKeeperResponse size_t bytesSize() const override { return CheckResponse::bytesSize() + sizeof(xid) + sizeof(zxid); } }; +struct ZooKeeperCheckNotExistsResponse : public ZooKeeperCheckResponse +{ + OpNum getOpNum() const override { return OpNum::CheckNotExists; } + using ZooKeeperCheckResponse::ZooKeeperCheckResponse; +}; + /// This response may be received only as an element of responses in MultiResponse. struct ZooKeeperErrorResponse final : ErrorResponse, ZooKeeperResponse { diff --git a/tests/queries/0_stateless/01158_zookeeper_log_long.reference b/tests/queries/0_stateless/01158_zookeeper_log_long.reference index a0088610c9d..7ec52cb3366 100644 --- a/tests/queries/0_stateless/01158_zookeeper_log_long.reference +++ b/tests/queries/0_stateless/01158_zookeeper_log_long.reference @@ -18,22 +18,18 @@ Response 0 Create /test/01158/default/rmt/replicas/1/parts/all_0_0_0 0 0 \N 0 4 Request 0 Exists /test/01158/default/rmt/replicas/1/parts/all_0_0_0 0 0 \N 0 0 \N \N \N 0 0 0 0 Response 0 Exists /test/01158/default/rmt/replicas/1/parts/all_0_0_0 0 0 \N 0 0 ZOK \N \N 0 0 96 0 blocks -Request 0 Multi 0 0 \N 3 0 \N \N \N 0 0 0 0 -Request 0 Create /test/01158/default/rmt/blocks/all_6308706741995381342_2495791770474910886 0 0 \N 0 1 \N \N \N 0 0 0 0 -Request 0 Remove /test/01158/default/rmt/blocks/all_6308706741995381342_2495791770474910886 0 0 -1 0 2 \N \N \N 0 0 0 0 -Request 0 Create /test/01158/default/rmt/block_numbers/all/block- 1 1 \N 0 3 \N \N \N 0 0 0 0 -Response 0 Multi 0 0 \N 3 0 ZOK \N \N 0 0 0 0 -Response 0 Create /test/01158/default/rmt/blocks/all_6308706741995381342_2495791770474910886 0 0 \N 0 1 ZOK \N \N /test/01158/default/rmt/blocks/all_6308706741995381342_2495791770474910886 0 0 0 0 -Response 0 Remove /test/01158/default/rmt/blocks/all_6308706741995381342_2495791770474910886 0 0 -1 0 2 ZOK \N \N 0 0 0 0 -Response 0 Create /test/01158/default/rmt/block_numbers/all/block- 1 1 \N 0 3 ZOK \N \N /test/01158/default/rmt/block_numbers/all/block-0000000000 0 0 0 0 -Request 0 Multi 0 0 \N 3 0 \N \N \N 0 0 0 0 -Request 0 Create /test/01158/default/rmt/blocks/all_6308706741995381342_2495791770474910886 0 0 \N 0 1 \N \N \N 0 0 0 0 -Request 0 Remove /test/01158/default/rmt/blocks/all_6308706741995381342_2495791770474910886 0 0 -1 0 2 \N \N \N 0 0 0 0 -Request 0 Create /test/01158/default/rmt/block_numbers/all/block- 1 1 \N 0 3 \N \N \N 0 0 0 0 -Response 0 Multi 0 0 \N 3 0 ZNODEEXISTS \N \N 0 0 0 0 -Response 0 Error /test/01158/default/rmt/blocks/all_6308706741995381342_2495791770474910886 0 0 \N 0 1 ZNODEEXISTS \N \N 0 0 0 0 -Response 0 Error /test/01158/default/rmt/blocks/all_6308706741995381342_2495791770474910886 0 0 -1 0 2 ZRUNTIMEINCONSISTENCY \N \N 0 0 0 0 -Response 0 Error /test/01158/default/rmt/block_numbers/all/block- 1 1 \N 0 3 ZRUNTIMEINCONSISTENCY \N \N 0 0 0 0 +Request 0 Multi 0 0 \N 2 0 \N \N \N 0 0 0 0 +Request 0 CheckNotExists /test/01158/default/rmt/blocks/all_6308706741995381342_2495791770474910886 0 0 -1 0 1 \N \N \N 0 0 0 0 +Request 0 Create /test/01158/default/rmt/block_numbers/all/block- 1 1 \N 0 2 \N \N \N 0 0 0 0 +Response 0 Multi 0 0 \N 2 0 ZOK \N \N 0 0 0 0 +Response 0 CheckNotExists /test/01158/default/rmt/blocks/all_6308706741995381342_2495791770474910886 0 0 -1 0 1 ZOK \N \N 0 0 0 0 +Response 0 Create /test/01158/default/rmt/block_numbers/all/block- 1 1 \N 0 2 ZOK \N \N /test/01158/default/rmt/block_numbers/all/block-0000000000 0 0 0 0 +Request 0 Multi 0 0 \N 2 0 \N \N \N 0 0 0 0 +Request 0 CheckNotExists /test/01158/default/rmt/blocks/all_6308706741995381342_2495791770474910886 0 0 -1 0 1 \N \N \N 0 0 0 0 +Request 0 Create /test/01158/default/rmt/block_numbers/all/block- 1 1 \N 0 2 \N \N \N 0 0 0 0 +Response 0 Multi 0 0 \N 2 0 ZNODEEXISTS \N \N 0 0 0 0 +Response 0 Error /test/01158/default/rmt/blocks/all_6308706741995381342_2495791770474910886 0 0 -1 0 1 ZNODEEXISTS \N \N 0 0 0 0 +Response 0 Error /test/01158/default/rmt/block_numbers/all/block- 1 1 \N 0 2 ZRUNTIMEINCONSISTENCY \N \N 0 0 0 0 Request 0 Get /test/01158/default/rmt/blocks/all_6308706741995381342_2495791770474910886 0 0 \N 0 0 \N \N \N 0 0 0 0 Response 0 Get /test/01158/default/rmt/blocks/all_6308706741995381342_2495791770474910886 0 0 \N 0 0 ZOK \N \N 0 0 9 0 duration_ms From a3c7afc03e4ead57ac2debd2e430ce23ae6217cf Mon Sep 17 00:00:00 2001 From: Smita Kulkarni Date: Thu, 20 Apr 2023 16:05:16 +0200 Subject: [PATCH 20/67] Reverted changes to drop functionality and updated test to sync rmt2 before drop --- src/Storages/MergeTree/MergeTreeData.cpp | 6 +++--- src/Storages/MergeTree/MergeTreeData.h | 2 +- .../queries/0_stateless/02432_s3_parallel_parts_cleanup.sql | 4 +++- 3 files changed, 7 insertions(+), 5 deletions(-) diff --git a/src/Storages/MergeTree/MergeTreeData.cpp b/src/Storages/MergeTree/MergeTreeData.cpp index 5c189887e23..45759c449f6 100644 --- a/src/Storages/MergeTree/MergeTreeData.cpp +++ b/src/Storages/MergeTree/MergeTreeData.cpp @@ -1960,7 +1960,7 @@ static bool isOldPartDirectory(const DiskPtr & disk, const String & directory_pa } -size_t MergeTreeData::clearOldTemporaryDirectories(size_t custom_directories_lifetime_seconds, const NameSet & valid_prefixes, const bool & force) +size_t MergeTreeData::clearOldTemporaryDirectories(size_t custom_directories_lifetime_seconds, const NameSet & valid_prefixes) { /// If the method is already called from another thread, then we don't need to do anything. std::unique_lock lock(clear_old_temporary_directories_mutex, std::defer_lock); @@ -2018,7 +2018,7 @@ size_t MergeTreeData::clearOldTemporaryDirectories(size_t custom_directories_lif /// We don't control the amount of refs for temporary parts so we cannot decide can we remove blobs /// or not. So we are not doing it bool keep_shared = false; - if (disk->supportZeroCopyReplication() && settings->allow_remote_fs_zero_copy_replication && !force) + if (disk->supportZeroCopyReplication() && settings->allow_remote_fs_zero_copy_replication) { LOG_WARNING(log, "Since zero-copy replication is enabled we are not going to remove blobs from shared storage for {}", full_path); keep_shared = true; @@ -2724,7 +2724,7 @@ void MergeTreeData::dropAllData() } LOG_INFO(log, "dropAllData: clearing temporary directories"); - clearOldTemporaryDirectories(0, {"tmp_", "delete_tmp_", "tmp-fetch_"}, /* force */ true); + clearOldTemporaryDirectories(0, {"tmp_", "delete_tmp_", "tmp-fetch_"}); column_sizes.clear(); diff --git a/src/Storages/MergeTree/MergeTreeData.h b/src/Storages/MergeTree/MergeTreeData.h index 3053657e37b..119ab2ee1d4 100644 --- a/src/Storages/MergeTree/MergeTreeData.h +++ b/src/Storages/MergeTree/MergeTreeData.h @@ -668,7 +668,7 @@ public: /// Delete all directories which names begin with "tmp" /// Must be called with locked lockForShare() because it's using relative_data_path. /// 'force' is used by dropAllData(), this will remove blobs even if zero copy replication is enabled - size_t clearOldTemporaryDirectories(size_t custom_directories_lifetime_seconds, const NameSet & valid_prefixes = {"tmp_", "tmp-fetch_"}, const bool & force = false); + size_t clearOldTemporaryDirectories(size_t custom_directories_lifetime_seconds, const NameSet & valid_prefixes = {"tmp_", "tmp-fetch_"}); size_t clearEmptyParts(); diff --git a/tests/queries/0_stateless/02432_s3_parallel_parts_cleanup.sql b/tests/queries/0_stateless/02432_s3_parallel_parts_cleanup.sql index 0230f30bf05..522b2481ec7 100644 --- a/tests/queries/0_stateless/02432_s3_parallel_parts_cleanup.sql +++ b/tests/queries/0_stateless/02432_s3_parallel_parts_cleanup.sql @@ -55,10 +55,12 @@ select sleep(3); select count(), sum(n), sum(m) from rmt; select count(), sum(n), sum(m) from rmt2; --- So there will be at least 2 parts (just in case no parts are removed until drop). +-- So there will be at least 2 parts (just in case no parts are removed until drop) insert into rmt(n) values (10); drop table rmt; + +system sync replica rmt2; drop table rmt2; system flush logs; From c3fe2b9287c782144479c5fd17e3530e2756cfb0 Mon Sep 17 00:00:00 2001 From: Smita Kulkarni Date: Thu, 20 Apr 2023 16:09:21 +0200 Subject: [PATCH 21/67] Removed extra comment --- src/Storages/MergeTree/MergeTreeData.h | 1 - 1 file changed, 1 deletion(-) diff --git a/src/Storages/MergeTree/MergeTreeData.h b/src/Storages/MergeTree/MergeTreeData.h index 119ab2ee1d4..b03b7d4a71e 100644 --- a/src/Storages/MergeTree/MergeTreeData.h +++ b/src/Storages/MergeTree/MergeTreeData.h @@ -667,7 +667,6 @@ public: /// Delete all directories which names begin with "tmp" /// Must be called with locked lockForShare() because it's using relative_data_path. - /// 'force' is used by dropAllData(), this will remove blobs even if zero copy replication is enabled size_t clearOldTemporaryDirectories(size_t custom_directories_lifetime_seconds, const NameSet & valid_prefixes = {"tmp_", "tmp-fetch_"}); size_t clearEmptyParts(); From 30375d13d2ee821a7bbb5f79e21dc6cdcbc1a2bd Mon Sep 17 00:00:00 2001 From: Smita Kulkarni Date: Fri, 21 Apr 2023 09:35:40 +0200 Subject: [PATCH 22/67] Removed changes from test and updated log level --- tests/queries/0_stateless/02432_s3_parallel_parts_cleanup.sql | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/queries/0_stateless/02432_s3_parallel_parts_cleanup.sql b/tests/queries/0_stateless/02432_s3_parallel_parts_cleanup.sql index 522b2481ec7..88fb2cdf9b1 100644 --- a/tests/queries/0_stateless/02432_s3_parallel_parts_cleanup.sql +++ b/tests/queries/0_stateless/02432_s3_parallel_parts_cleanup.sql @@ -1,5 +1,7 @@ -- Tags: no-fasttest +SET send_logs_level = 'fatal'; + drop table if exists rmt; drop table if exists rmt2; @@ -59,8 +61,6 @@ select count(), sum(n), sum(m) from rmt2; insert into rmt(n) values (10); drop table rmt; - -system sync replica rmt2; drop table rmt2; system flush logs; From aa9635e35b8651f62b0a5204ab7dec2a55798913 Mon Sep 17 00:00:00 2001 From: Smita Kulkarni Date: Fri, 21 Apr 2023 11:24:02 +0200 Subject: [PATCH 23/67] Removed is_part_outdated flag & usage, updated MergeTreeMarksLoader to hold data_part instead of data_part_storage --- src/Storages/MergeTree/IDataPartStorage.h | 2 -- .../IMergeTreeDataPartInfoForReader.h | 2 ++ .../LoadedMergeTreeDataPartInfoForReader.h | 2 ++ src/Storages/MergeTree/MergeTreeData.cpp | 3 --- .../MergeTree/MergeTreeIndexReader.cpp | 2 +- .../MergeTree/MergeTreeMarksLoader.cpp | 23 +++++-------------- src/Storages/MergeTree/MergeTreeMarksLoader.h | 4 ++-- .../MergeTree/MergeTreeReaderCompact.cpp | 2 +- .../MergeTree/MergeTreeReaderStream.cpp | 6 ++--- .../MergeTree/MergeTreeReaderStream.h | 2 +- .../MergeTree/MergeTreeReaderWide.cpp | 2 +- ...tem_parts_race_condition_drop_zookeeper.sh | 1 - 12 files changed, 19 insertions(+), 32 deletions(-) diff --git a/src/Storages/MergeTree/IDataPartStorage.h b/src/Storages/MergeTree/IDataPartStorage.h index aa473a2ab41..b0b42b331cd 100644 --- a/src/Storages/MergeTree/IDataPartStorage.h +++ b/src/Storages/MergeTree/IDataPartStorage.h @@ -285,8 +285,6 @@ public: /// It may be flush of buffered data or similar. virtual void precommitTransaction() = 0; virtual bool hasActiveTransaction() const = 0; - - mutable std::atomic is_part_outdated = false; }; using DataPartStoragePtr = std::shared_ptr; diff --git a/src/Storages/MergeTree/IMergeTreeDataPartInfoForReader.h b/src/Storages/MergeTree/IMergeTreeDataPartInfoForReader.h index 648c3cfbb6b..af3fd7cbef3 100644 --- a/src/Storages/MergeTree/IMergeTreeDataPartInfoForReader.h +++ b/src/Storages/MergeTree/IMergeTreeDataPartInfoForReader.h @@ -40,6 +40,8 @@ public: virtual DataPartStoragePtr getDataPartStorage() const = 0; + virtual DataPartPtr getDataPart() const = 0; + virtual const NamesAndTypesList & getColumns() const = 0; virtual const ColumnsDescription & getColumnsDescription() const = 0; diff --git a/src/Storages/MergeTree/LoadedMergeTreeDataPartInfoForReader.h b/src/Storages/MergeTree/LoadedMergeTreeDataPartInfoForReader.h index 3363c75dd6f..a72285d8e3c 100644 --- a/src/Storages/MergeTree/LoadedMergeTreeDataPartInfoForReader.h +++ b/src/Storages/MergeTree/LoadedMergeTreeDataPartInfoForReader.h @@ -25,6 +25,8 @@ public: DataPartStoragePtr getDataPartStorage() const override { return data_part->getDataPartStoragePtr(); } + DataPartPtr getDataPart() const override { return data_part; } + const NamesAndTypesList & getColumns() const override { return data_part->getColumns(); } const ColumnsDescription & getColumnsDescription() const override { return data_part->getColumnsDescription(); } diff --git a/src/Storages/MergeTree/MergeTreeData.cpp b/src/Storages/MergeTree/MergeTreeData.cpp index 322558021b7..f5f12660223 100644 --- a/src/Storages/MergeTree/MergeTreeData.cpp +++ b/src/Storages/MergeTree/MergeTreeData.cpp @@ -3721,8 +3721,6 @@ void MergeTreeData::removePartsFromWorkingSet(MergeTreeTransaction * txn, const if (isInMemoryPart(part) && getSettings()->in_memory_parts_enable_wal) getWriteAheadLog()->dropPart(part->name); - - part->getDataPartStorage().is_part_outdated = true; } if (removed_active_part) @@ -3887,7 +3885,6 @@ void MergeTreeData::restoreAndActivatePart(const DataPartPtr & part, DataPartsLo addPartContributionToColumnAndSecondaryIndexSizes(part); addPartContributionToDataVolume(part); modifyPartState(part, DataPartState::Active); - part->getDataPartStorage().is_part_outdated = false; } diff --git a/src/Storages/MergeTree/MergeTreeIndexReader.cpp b/src/Storages/MergeTree/MergeTreeIndexReader.cpp index 7d7024a8ac2..1ce6d777644 100644 --- a/src/Storages/MergeTree/MergeTreeIndexReader.cpp +++ b/src/Storages/MergeTree/MergeTreeIndexReader.cpp @@ -20,7 +20,7 @@ std::unique_ptr makeIndexReader( auto * load_marks_threadpool = settings.read_settings.load_marks_asynchronously ? &context->getLoadMarksThreadpool() : nullptr; return std::make_unique( - part->getDataPartStoragePtr(), + part, index->getFileName(), extension, marks_count, all_mark_ranges, std::move(settings), mark_cache, uncompressed_cache, diff --git a/src/Storages/MergeTree/MergeTreeMarksLoader.cpp b/src/Storages/MergeTree/MergeTreeMarksLoader.cpp index 18934fc19b1..300e99c850f 100644 --- a/src/Storages/MergeTree/MergeTreeMarksLoader.cpp +++ b/src/Storages/MergeTree/MergeTreeMarksLoader.cpp @@ -30,7 +30,7 @@ namespace ErrorCodes } MergeTreeMarksLoader::MergeTreeMarksLoader( - DataPartStoragePtr data_part_storage_, + DataPartPtr data_part_, MarkCache * mark_cache_, const String & mrk_path_, size_t marks_count_, @@ -39,7 +39,7 @@ MergeTreeMarksLoader::MergeTreeMarksLoader( const ReadSettings & read_settings_, ThreadPool * load_marks_threadpool_, size_t columns_in_mark_) - : data_part_storage(std::move(data_part_storage_)) + : data_part(data_part_) , mark_cache(mark_cache_) , mrk_path(mrk_path_) , marks_count(marks_count_) @@ -68,11 +68,6 @@ MarkInCompressedFile MergeTreeMarksLoader::getMark(size_t row_index, size_t colu { if (!marks) { - if (this->data_part_storage->is_part_outdated) - { - throw Exception(ErrorCodes::LOGICAL_ERROR, "Attempting to read from outdated part. path : {}", data_part_storage->getFullPath()); - } - Stopwatch watch(CLOCK_MONOTONIC); if (future.valid()) @@ -103,6 +98,8 @@ MarkCache::MappedPtr MergeTreeMarksLoader::loadMarksImpl() /// Memory for marks must not be accounted as memory usage for query, because they are stored in shared cache. MemoryTrackerBlockerInThread temporarily_disable_memory_tracker; + auto data_part_storage = data_part->getDataPartStoragePtr(); + size_t file_size = data_part_storage->getFileSize(mrk_path); size_t mark_size = index_granularity_info.getMarkSizeInBytes(columns_in_mark); size_t expected_uncompressed_size = mark_size * marks_count; @@ -182,6 +179,8 @@ MarkCache::MappedPtr MergeTreeMarksLoader::loadMarks() { MarkCache::MappedPtr loaded_marks; + auto data_part_storage = data_part->getDataPartStoragePtr(); + if (mark_cache) { auto key = mark_cache->hash(fs::path(data_part_storage->getFullPath()) / mrk_path); @@ -215,16 +214,6 @@ std::future MergeTreeMarksLoader::loadMarksAsync() [this]() -> MarkCache::MappedPtr { ProfileEvents::increment(ProfileEvents::BackgroundLoadingMarksTasks); - if (this->data_part_storage->is_part_outdated) - { - if (mark_cache) - { - auto key = mark_cache->hash(fs::path(data_part_storage->getFullPath()) / mrk_path); - marks.reset(); - mark_cache->remove(key); - } - return nullptr; - } return loadMarks(); }, *load_marks_threadpool, diff --git a/src/Storages/MergeTree/MergeTreeMarksLoader.h b/src/Storages/MergeTree/MergeTreeMarksLoader.h index 17e52939d3f..816b512d1a7 100644 --- a/src/Storages/MergeTree/MergeTreeMarksLoader.h +++ b/src/Storages/MergeTree/MergeTreeMarksLoader.h @@ -18,7 +18,7 @@ public: using MarksPtr = MarkCache::MappedPtr; MergeTreeMarksLoader( - DataPartStoragePtr data_part_storage_, + DataPartPtr data_part_, MarkCache * mark_cache_, const String & mrk_path, size_t marks_count_, @@ -33,7 +33,7 @@ public: MarkInCompressedFile getMark(size_t row_index, size_t column_index = 0); private: - DataPartStoragePtr data_part_storage; + DataPartPtr data_part; MarkCache * mark_cache = nullptr; String mrk_path; size_t marks_count; diff --git a/src/Storages/MergeTree/MergeTreeReaderCompact.cpp b/src/Storages/MergeTree/MergeTreeReaderCompact.cpp index d1796dac6cc..13f8e485208 100644 --- a/src/Storages/MergeTree/MergeTreeReaderCompact.cpp +++ b/src/Storages/MergeTree/MergeTreeReaderCompact.cpp @@ -36,7 +36,7 @@ MergeTreeReaderCompact::MergeTreeReaderCompact( settings_, avg_value_size_hints_) , marks_loader( - data_part_info_for_read_->getDataPartStorage(), + data_part_info_for_read_->getDataPart(), mark_cache, data_part_info_for_read_->getIndexGranularityInfo().getMarksFilePath(MergeTreeDataPartCompact::DATA_FILE_NAME), data_part_info_for_read_->getMarksCount(), diff --git a/src/Storages/MergeTree/MergeTreeReaderStream.cpp b/src/Storages/MergeTree/MergeTreeReaderStream.cpp index cdca5aa1247..44cf8f45015 100644 --- a/src/Storages/MergeTree/MergeTreeReaderStream.cpp +++ b/src/Storages/MergeTree/MergeTreeReaderStream.cpp @@ -15,7 +15,7 @@ namespace ErrorCodes } MergeTreeReaderStream::MergeTreeReaderStream( - DataPartStoragePtr data_part_storage_, + DataPartPtr data_part_, const String & path_prefix_, const String & data_file_extension_, size_t marks_count_, @@ -35,7 +35,7 @@ MergeTreeReaderStream::MergeTreeReaderStream( , all_mark_ranges(all_mark_ranges_) , file_size(file_size_) , uncompressed_cache(uncompressed_cache_) - , data_part_storage(std::move(data_part_storage_)) + , data_part_storage(data_part_->getDataPartStoragePtr()) , path_prefix(path_prefix_) , data_file_extension(data_file_extension_) , is_low_cardinality_dictionary(is_low_cardinality_dictionary_) @@ -44,7 +44,7 @@ MergeTreeReaderStream::MergeTreeReaderStream( , save_marks_in_cache(settings.save_marks_in_cache) , index_granularity_info(index_granularity_info_) , marks_loader( - data_part_storage, + data_part_, mark_cache, index_granularity_info->getMarksFilePath(path_prefix), marks_count, diff --git a/src/Storages/MergeTree/MergeTreeReaderStream.h b/src/Storages/MergeTree/MergeTreeReaderStream.h index f3785e175df..2265de94d07 100644 --- a/src/Storages/MergeTree/MergeTreeReaderStream.h +++ b/src/Storages/MergeTree/MergeTreeReaderStream.h @@ -19,7 +19,7 @@ class MergeTreeReaderStream { public: MergeTreeReaderStream( - DataPartStoragePtr data_part_storage_, + DataPartPtr data_part_, const String & path_prefix_, const String & data_file_extension_, size_t marks_count_, diff --git a/src/Storages/MergeTree/MergeTreeReaderWide.cpp b/src/Storages/MergeTree/MergeTreeReaderWide.cpp index 05af33da20a..5b90118d9d5 100644 --- a/src/Storages/MergeTree/MergeTreeReaderWide.cpp +++ b/src/Storages/MergeTree/MergeTreeReaderWide.cpp @@ -242,7 +242,7 @@ void MergeTreeReaderWide::addStreams( auto * load_marks_threadpool = settings.read_settings.load_marks_asynchronously ? &context->getLoadMarksThreadpool() : nullptr; streams.emplace(stream_name, std::make_unique( - data_part_info_for_read->getDataPartStorage(), stream_name, DATA_FILE_EXTENSION, + data_part_info_for_read->getDataPart(), stream_name, DATA_FILE_EXTENSION, data_part_info_for_read->getMarksCount(), all_mark_ranges, settings, mark_cache, uncompressed_cache, data_part_info_for_read->getFileSizeOrZero(stream_name + DATA_FILE_EXTENSION), &data_part_info_for_read->getIndexGranularityInfo(), diff --git a/tests/queries/0_stateless/00993_system_parts_race_condition_drop_zookeeper.sh b/tests/queries/0_stateless/00993_system_parts_race_condition_drop_zookeeper.sh index bceda77c7f8..f4f38ad9c83 100755 --- a/tests/queries/0_stateless/00993_system_parts_race_condition_drop_zookeeper.sh +++ b/tests/queries/0_stateless/00993_system_parts_race_condition_drop_zookeeper.sh @@ -63,7 +63,6 @@ function thread6() done } - # https://stackoverflow.com/questions/9954794/execute-a-shell-function-with-timeout export -f thread1; export -f thread2; From 9b4bc115ae4c97cb44ef560db8fe01521a555475 Mon Sep 17 00:00:00 2001 From: Smita Kulkarni Date: Mon, 24 Apr 2023 16:23:54 +0200 Subject: [PATCH 24/67] Fixed test as refcount is increased --- .../0_stateless/02340_parts_refcnt_mergetree.reference | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/queries/0_stateless/02340_parts_refcnt_mergetree.reference b/tests/queries/0_stateless/02340_parts_refcnt_mergetree.reference index e225ce389cb..ae4fafae829 100644 --- a/tests/queries/0_stateless/02340_parts_refcnt_mergetree.reference +++ b/tests/queries/0_stateless/02340_parts_refcnt_mergetree.reference @@ -1,2 +1,2 @@ -data_02340 1_2_2_0 5 -data_02340_rep 1_0_0_0 5 +data_02340 1_2_2_0 6 +data_02340_rep 1_0_0_0 6 From 7731ea7905be089c943a82af6ff7cb16a5966bc6 Mon Sep 17 00:00:00 2001 From: vdimir Date: Tue, 25 Apr 2023 11:28:54 +0000 Subject: [PATCH 25/67] Fix key not found error for queries with multiple StorageJoin --- src/Interpreters/TableJoin.cpp | 9 ++++++++ src/Interpreters/TableJoin.h | 8 +++++++ src/Storages/StorageJoin.cpp | 5 +++-- .../02724_mutliple_storage_join.reference | 6 ++++++ .../02724_mutliple_storage_join.sql | 21 +++++++++++++++++++ 5 files changed, 47 insertions(+), 2 deletions(-) create mode 100644 tests/queries/0_stateless/02724_mutliple_storage_join.reference create mode 100644 tests/queries/0_stateless/02724_mutliple_storage_join.sql diff --git a/src/Interpreters/TableJoin.cpp b/src/Interpreters/TableJoin.cpp index 7ea7a265263..2d882083f3d 100644 --- a/src/Interpreters/TableJoin.cpp +++ b/src/Interpreters/TableJoin.cpp @@ -147,6 +147,7 @@ void TableJoin::addDisjunct() void TableJoin::addOnKeys(ASTPtr & left_table_ast, ASTPtr & right_table_ast) { addKey(left_table_ast->getColumnName(), right_table_ast->getAliasOrColumnName(), left_table_ast, right_table_ast); + right_key_aliases[right_table_ast->getColumnName()] = right_table_ast->getAliasOrColumnName(); } /// @return how many times right key appears in ON section. @@ -662,6 +663,14 @@ String TableJoin::renamedRightColumnName(const String & name) const return name; } +String TableJoin::renamedRightColumnNameWithAlias(const String & name) const +{ + auto renamed = renamedRightColumnName(name); + if (const auto it = right_key_aliases.find(renamed); it != right_key_aliases.end()) + return it->second; + return renamed; +} + void TableJoin::setRename(const String & from, const String & to) { renames[from] = to; diff --git a/src/Interpreters/TableJoin.h b/src/Interpreters/TableJoin.h index 99b683b7713..0e0c905e30c 100644 --- a/src/Interpreters/TableJoin.h +++ b/src/Interpreters/TableJoin.h @@ -156,6 +156,13 @@ private: /// Original name -> name. Only renamed columns. std::unordered_map renames; + /// Map column name to actual key name that can be an alias. + /// Example: SELECT r.id as rid from t JOIN r ON t.id = rid + /// Map: r.id -> rid + /// Required only for StorageJoin to map join keys back to original column names. + /// (workaround for ExpressionAnalyzer) + std::unordered_map right_key_aliases; + VolumePtr tmp_volume; std::shared_ptr right_storage_join; @@ -333,6 +340,7 @@ public: Block getRequiredRightKeys(const Block & right_table_keys, std::vector & keys_sources) const; String renamedRightColumnName(const String & name) const; + String renamedRightColumnNameWithAlias(const String & name) const; void setRename(const String & from, const String & to); void resetKeys(); diff --git a/src/Storages/StorageJoin.cpp b/src/Storages/StorageJoin.cpp index dec741beb45..23bcdd23484 100644 --- a/src/Storages/StorageJoin.cpp +++ b/src/Storages/StorageJoin.cpp @@ -220,12 +220,13 @@ HashJoinPtr StorageJoin::getJoinLocked(std::shared_ptr analyzed_join, Names left_key_names_resorted; for (const auto & key_name : key_names) { - const auto & renamed_key = analyzed_join->renamedRightColumnName(key_name); + const auto & renamed_key = analyzed_join->renamedRightColumnNameWithAlias(key_name); /// find position of renamed_key in key_names_right auto it = std::find(key_names_right.begin(), key_names_right.end(), renamed_key); if (it == key_names_right.end()) throw Exception(ErrorCodes::INCOMPATIBLE_TYPE_OF_JOIN, - "Key '{}' not found in JOIN ON section. All Join engine keys '{}' have to be used", key_name, fmt::join(key_names, ", ")); + "Key '{}' not found in JOIN ON section. Join engine key{} '{}' have to be used", + key_name, key_names.size() > 1 ? "s" : "", fmt::join(key_names, ", ")); const size_t key_position = std::distance(key_names_right.begin(), it); left_key_names_resorted.push_back(key_names_left[key_position]); } diff --git a/tests/queries/0_stateless/02724_mutliple_storage_join.reference b/tests/queries/0_stateless/02724_mutliple_storage_join.reference new file mode 100644 index 00000000000..f7eb44d66e0 --- /dev/null +++ b/tests/queries/0_stateless/02724_mutliple_storage_join.reference @@ -0,0 +1,6 @@ +0 +0 +0 +0 +0 +0 diff --git a/tests/queries/0_stateless/02724_mutliple_storage_join.sql b/tests/queries/0_stateless/02724_mutliple_storage_join.sql new file mode 100644 index 00000000000..286e867704d --- /dev/null +++ b/tests/queries/0_stateless/02724_mutliple_storage_join.sql @@ -0,0 +1,21 @@ +CREATE TABLE user(id UInt32, name String) ENGINE = Join(ANY, LEFT, id); +INSERT INTO user VALUES (1,'U1')(2,'U2')(3,'U3'); + +CREATE TABLE product(id UInt32, name String, cate String) ENGINE = Join(ANY, LEFT, id); +INSERT INTO product VALUES (1,'P1','C1')(2,'P2','C1')(3,'P3','C2'); + +CREATE TABLE order(id UInt32, pId UInt32, uId UInt32) ENGINE = TinyLog; +INSERT INTO order VALUES (1,1,1)(2,1,2)(3,2,3); + +SELECT ignore(*) FROM ( + SELECT + uId, + user.id as `uuu` + FROM order + LEFT ANY JOIN user + ON uId = `uuu` +); + +SELECT ignore(*) FROM order +LEFT ANY JOIN user ON uId = user.id +LEFT ANY JOIN product ON pId = product.id; From 111fb4b8a9cf92d35010d8f2f14eb8a2d8977a8f Mon Sep 17 00:00:00 2001 From: Nikolay Degterinsky Date: Thu, 27 Apr 2023 02:36:50 +0000 Subject: [PATCH 26/67] Add file name to exception raised during decompression --- src/IO/BrotliReadBuffer.cpp | 16 +++++++++++++--- src/IO/Bzip2ReadBuffer.cpp | 11 ++++++++--- src/IO/HadoopSnappyReadBuffer.cpp | 14 ++++++++++++-- src/IO/LZMAInflatingReadBuffer.cpp | 11 +++++++---- src/IO/Lz4InflatingReadBuffer.cpp | 6 ++++-- src/IO/WithFileName.cpp | 10 ++++++++++ src/IO/WithFileName.h | 1 + src/IO/ZlibInflatingReadBuffer.cpp | 14 +++++++++++--- src/IO/ZstdInflatingReadBuffer.cpp | 6 ++++-- 9 files changed, 70 insertions(+), 19 deletions(-) diff --git a/src/IO/BrotliReadBuffer.cpp b/src/IO/BrotliReadBuffer.cpp index 56ef2b5446b..1863cef8a39 100644 --- a/src/IO/BrotliReadBuffer.cpp +++ b/src/IO/BrotliReadBuffer.cpp @@ -3,6 +3,7 @@ #if USE_BROTLI # include # include "BrotliReadBuffer.h" +# include namespace DB { @@ -60,7 +61,10 @@ bool BrotliReadBuffer::nextImpl() if (brotli->result == BROTLI_DECODER_RESULT_NEEDS_MORE_INPUT && (!in_available || in->eof())) { - throw Exception(ErrorCodes::BROTLI_READ_FAILED, "brotli decode error"); + throw Exception( + ErrorCodes::BROTLI_READ_FAILED, + "brotli decode error{}", + getExceptionEntryWithFileName(*in)); } out_capacity = internal_buffer.size(); @@ -83,13 +87,19 @@ bool BrotliReadBuffer::nextImpl() } else { - throw Exception(ErrorCodes::BROTLI_READ_FAILED, "brotli decode error"); + throw Exception( + ErrorCodes::BROTLI_READ_FAILED, + "brotli decode error{}", + getExceptionEntryWithFileName(*in)); } } if (brotli->result == BROTLI_DECODER_RESULT_ERROR) { - throw Exception(ErrorCodes::BROTLI_READ_FAILED, "brotli decode error"); + throw Exception( + ErrorCodes::BROTLI_READ_FAILED, + "brotli decode error{}", + getExceptionEntryWithFileName(*in)); } return true; diff --git a/src/IO/Bzip2ReadBuffer.cpp b/src/IO/Bzip2ReadBuffer.cpp index 9970edcbcf3..45ce8f45232 100644 --- a/src/IO/Bzip2ReadBuffer.cpp +++ b/src/IO/Bzip2ReadBuffer.cpp @@ -3,6 +3,7 @@ #if USE_BZIP2 # include # include +# include namespace DB { @@ -118,13 +119,17 @@ bool Bzip2ReadBuffer::nextImpl() if (ret != BZ_OK) throw Exception( ErrorCodes::BZIP2_STREAM_DECODER_FAILED, - "bzip2 stream decoder failed: error code: {}", - ret); + "bzip2 stream decoder failed: error code: {}{}", + ret, + getExceptionEntryWithFileName(*in)); if (in->eof()) { eof_flag = true; - throw Exception(ErrorCodes::UNEXPECTED_END_OF_FILE, "Unexpected end of bzip2 archive"); + throw Exception( + ErrorCodes::UNEXPECTED_END_OF_FILE, + "Unexpected end of bzip2 archive{}", + getExceptionEntryWithFileName(*in)); } return true; diff --git a/src/IO/HadoopSnappyReadBuffer.cpp b/src/IO/HadoopSnappyReadBuffer.cpp index 6ba31997b37..577367e5607 100644 --- a/src/IO/HadoopSnappyReadBuffer.cpp +++ b/src/IO/HadoopSnappyReadBuffer.cpp @@ -11,6 +11,8 @@ #include "HadoopSnappyReadBuffer.h" +#include + namespace DB { namespace ErrorCodes @@ -196,7 +198,11 @@ bool HadoopSnappyReadBuffer::nextImpl() if (decoder->result == Status::NEEDS_MORE_INPUT && (!in_available || in->eof())) { - throw Exception(ErrorCodes::SNAPPY_UNCOMPRESS_FAILED, "hadoop snappy decode error: {}", statusToString(decoder->result)); + throw Exception( + ErrorCodes::SNAPPY_UNCOMPRESS_FAILED, + "hadoop snappy decode error: {}{}", + statusToString(decoder->result), + getExceptionEntryWithFileName(*in)); } out_capacity = internal_buffer.size(); @@ -221,7 +227,11 @@ bool HadoopSnappyReadBuffer::nextImpl() } else if (decoder->result == Status::INVALID_INPUT || decoder->result == Status::BUFFER_TOO_SMALL) { - throw Exception(ErrorCodes::SNAPPY_UNCOMPRESS_FAILED, "hadoop snappy decode error: {}", statusToString(decoder->result)); + throw Exception( + ErrorCodes::SNAPPY_UNCOMPRESS_FAILED, + "hadoop snappy decode error: {}{}", + statusToString(decoder->result), + getExceptionEntryWithFileName(*in)); } return true; } diff --git a/src/IO/LZMAInflatingReadBuffer.cpp b/src/IO/LZMAInflatingReadBuffer.cpp index 6d40dafd517..a6f3c74ae73 100644 --- a/src/IO/LZMAInflatingReadBuffer.cpp +++ b/src/IO/LZMAInflatingReadBuffer.cpp @@ -1,4 +1,5 @@ #include +#include namespace DB { @@ -78,18 +79,20 @@ bool LZMAInflatingReadBuffer::nextImpl() { throw Exception( ErrorCodes::LZMA_STREAM_DECODER_FAILED, - "lzma decoder finished, but input stream has not exceeded: error code: {}; lzma version: {}", + "lzma decoder finished, but input stream has not exceeded: error code: {}; lzma version: {}{}", ret, - LZMA_VERSION_STRING); + LZMA_VERSION_STRING, + getExceptionEntryWithFileName(*in)); } } if (ret != LZMA_OK) throw Exception( ErrorCodes::LZMA_STREAM_DECODER_FAILED, - "lzma_stream_decoder failed: error code: error codeL {}; lzma version: {}", + "lzma_stream_decoder failed: error code: error code {}; lzma version: {}{}", ret, - LZMA_VERSION_STRING); + LZMA_VERSION_STRING, + getExceptionEntryWithFileName(*in)); return true; } diff --git a/src/IO/Lz4InflatingReadBuffer.cpp b/src/IO/Lz4InflatingReadBuffer.cpp index 049f3a4d15a..eaa71048e70 100644 --- a/src/IO/Lz4InflatingReadBuffer.cpp +++ b/src/IO/Lz4InflatingReadBuffer.cpp @@ -1,4 +1,5 @@ #include +#include namespace DB { @@ -72,9 +73,10 @@ bool Lz4InflatingReadBuffer::nextImpl() if (LZ4F_isError(ret)) throw Exception( ErrorCodes::LZ4_DECODER_FAILED, - "LZ4 decompression failed. LZ4F version: {}. Error: {}", + "LZ4 decompression failed. LZ4F version: {}. Error: {}{}", LZ4F_VERSION, - LZ4F_getErrorName(ret)); + LZ4F_getErrorName(ret), + getExceptionEntryWithFileName(*in)); if (in->eof()) { diff --git a/src/IO/WithFileName.cpp b/src/IO/WithFileName.cpp index 6ecb3671ca0..0ec9ed5dd53 100644 --- a/src/IO/WithFileName.cpp +++ b/src/IO/WithFileName.cpp @@ -26,4 +26,14 @@ String getFileNameFromReadBuffer(const ReadBuffer & in) return getFileName(in); } +String getExceptionEntryWithFileName(const ReadBuffer & in) +{ + auto filename = getFileNameFromReadBuffer(in); + + if (filename.empty()) + return ""; + + return "; While reading from: " + filename; +} + } diff --git a/src/IO/WithFileName.h b/src/IO/WithFileName.h index d770634e738..595f1a768c5 100644 --- a/src/IO/WithFileName.h +++ b/src/IO/WithFileName.h @@ -14,5 +14,6 @@ public: }; String getFileNameFromReadBuffer(const ReadBuffer & in); +String getExceptionEntryWithFileName(const ReadBuffer & in); } diff --git a/src/IO/ZlibInflatingReadBuffer.cpp b/src/IO/ZlibInflatingReadBuffer.cpp index 09e4fce7c4c..b43dda1bfcc 100644 --- a/src/IO/ZlibInflatingReadBuffer.cpp +++ b/src/IO/ZlibInflatingReadBuffer.cpp @@ -1,5 +1,5 @@ #include - +#include namespace DB { @@ -99,14 +99,22 @@ bool ZlibInflatingReadBuffer::nextImpl() { rc = inflateReset(&zstr); if (rc != Z_OK) - throw Exception(ErrorCodes::ZLIB_INFLATE_FAILED, "inflateReset failed: {}", zError(rc)); + throw Exception( + ErrorCodes::ZLIB_INFLATE_FAILED, + "inflateReset failed: {}{}", + zError(rc), + getExceptionEntryWithFileName(*in)); return true; } } /// If it is not end and not OK, something went wrong, throw exception if (rc != Z_OK) - throw Exception(ErrorCodes::ZLIB_INFLATE_FAILED, "inflate failed: {}", zError(rc)); + throw Exception( + ErrorCodes::ZLIB_INFLATE_FAILED, + "inflate failed: {}{}", + zError(rc), + getExceptionEntryWithFileName(*in)); } while (working_buffer.empty()); diff --git a/src/IO/ZstdInflatingReadBuffer.cpp b/src/IO/ZstdInflatingReadBuffer.cpp index 6f5c8b4dc71..2b663ec7145 100644 --- a/src/IO/ZstdInflatingReadBuffer.cpp +++ b/src/IO/ZstdInflatingReadBuffer.cpp @@ -1,4 +1,5 @@ #include +#include #include @@ -61,12 +62,13 @@ bool ZstdInflatingReadBuffer::nextImpl() { throw Exception( ErrorCodes::ZSTD_DECODER_FAILED, - "ZSTD stream decoding failed: error '{}'{}; ZSTD version: {}", + "ZSTD stream decoding failed: error '{}'{}; ZSTD version: {}{}", ZSTD_getErrorName(ret), ZSTD_error_frameParameter_windowTooLarge == ret ? ". You can increase the maximum window size with the 'zstd_window_log_max' setting in ClickHouse. Example: 'SET zstd_window_log_max = 31'" : "", - ZSTD_VERSION_STRING); + ZSTD_VERSION_STRING, + getExceptionEntryWithFileName(*in)); } /// Check that something has changed after decompress (input or output position) From 3b536165cb51a792c19e81e5a6afe6f98d12e4a5 Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Fri, 28 Apr 2023 19:44:53 +0000 Subject: [PATCH 27/67] Update tests. --- src/Core/Settings.h | 2 +- .../0_stateless/01600_parts_states_metrics_long.sh | 12 +++++++++++- .../0_stateless/01600_parts_types_metrics_long.sh | 12 +++++++++++- 3 files changed, 23 insertions(+), 3 deletions(-) diff --git a/src/Core/Settings.h b/src/Core/Settings.h index 26409e98763..21c3c1c4dbf 100644 --- a/src/Core/Settings.h +++ b/src/Core/Settings.h @@ -316,7 +316,7 @@ class IColumn; M(Float, opentelemetry_start_trace_probability, 0., "Probability to start an OpenTelemetry trace for an incoming query.", 0) \ M(Bool, opentelemetry_trace_processors, false, "Collect OpenTelemetry spans for processors.", 0) \ M(Bool, prefer_column_name_to_alias, false, "Prefer using column names instead of aliases if possible.", 0) \ - M(Bool, allow_experimental_analyzer, false, "Allow experimental analyzer", 0) \ + M(Bool, allow_experimental_analyzer, true, "Allow experimental analyzer", 0) \ M(Bool, prefer_global_in_and_join, false, "If enabled, all IN/JOIN operators will be rewritten as GLOBAL IN/JOIN. It's useful when the to-be-joined tables are only available on the initiator and we need to always scatter their data on-the-fly during distributed processing with the GLOBAL keyword. It's also useful to reduce the need to access the external sources joining external tables.", 0) \ \ \ diff --git a/tests/queries/0_stateless/01600_parts_states_metrics_long.sh b/tests/queries/0_stateless/01600_parts_states_metrics_long.sh index f47d0863e69..50abd6ade90 100755 --- a/tests/queries/0_stateless/01600_parts_states_metrics_long.sh +++ b/tests/queries/0_stateless/01600_parts_states_metrics_long.sh @@ -15,10 +15,20 @@ verify_sql="SELECT # In case of test failure, this code will do infinite loop and timeout. verify() { - while true + for i in $(seq 1 3001) do result=$( $CLICKHOUSE_CLIENT -m --query="$verify_sql" ) [ "$result" = "1" ] && break + + if [ "$i" = "3000" ]; then + echo "=======" + $CLICKHOUSE_CLIENT --query="SELECT * FROM system.parts format TSVWithNames" + echo "=======" + $CLICKHOUSE_CLIENT --query="SELECT * FROM system.metrics format TSVWithNames" + echo "=======" + return + fi + sleep 0.1 done echo 1 diff --git a/tests/queries/0_stateless/01600_parts_types_metrics_long.sh b/tests/queries/0_stateless/01600_parts_types_metrics_long.sh index 05edf02f7ed..dcac6dcab39 100755 --- a/tests/queries/0_stateless/01600_parts_types_metrics_long.sh +++ b/tests/queries/0_stateless/01600_parts_types_metrics_long.sh @@ -20,12 +20,22 @@ verify_sql="SELECT # In case of test failure, this code will do infinite loop and timeout. verify() { - while true; do + for i in $(seq 1 3001); do result=$( $CLICKHOUSE_CLIENT -m --query="$verify_sql" ) if [ "$result" = "1" ]; then echo 1 return fi + + if [ "$i" = "3000" ]; then + echo "=======" + $CLICKHOUSE_CLIENT --query="SELECT * FROM system.parts format TSVWithNames" + echo "=======" + $CLICKHOUSE_CLIENT --query="SELECT * FROM system.metrics format TSVWithNames" + echo "=======" + return + fi + sleep 0.1 done } From af558e282c59cff653031ac0c4a9d424c1cee34c Mon Sep 17 00:00:00 2001 From: Nikita Taranov Date: Fri, 28 Apr 2023 20:48:37 +0000 Subject: [PATCH 28/67] impl --- src/IO/ReadSettings.h | 2 +- src/Interpreters/Context.cpp | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/IO/ReadSettings.h b/src/IO/ReadSettings.h index fc229ada59c..50ef5ec2988 100644 --- a/src/IO/ReadSettings.h +++ b/src/IO/ReadSettings.h @@ -68,7 +68,7 @@ struct ReadSettings /// Method to use reading from remote filesystem. RemoteFSReadMethod remote_fs_method = RemoteFSReadMethod::threadpool; - size_t local_fs_buffer_size = DBMS_DEFAULT_BUFFER_SIZE; + size_t local_fs_buffer_size = 128 * 1024; size_t remote_fs_buffer_size = DBMS_DEFAULT_BUFFER_SIZE; size_t prefetch_buffer_size = DBMS_DEFAULT_BUFFER_SIZE; diff --git a/src/Interpreters/Context.cpp b/src/Interpreters/Context.cpp index afeddbbe170..c78d2e8eb7b 100644 --- a/src/Interpreters/Context.cpp +++ b/src/Interpreters/Context.cpp @@ -4288,7 +4288,7 @@ ReadSettings Context::getReadSettings() const "Invalid value '{}' for max_read_buffer_size", settings.max_read_buffer_size); } - res.local_fs_buffer_size = settings.max_read_buffer_size; + /* res.local_fs_buffer_size = settings.max_read_buffer_size; */ res.remote_fs_buffer_size = settings.max_read_buffer_size; res.prefetch_buffer_size = settings.prefetch_buffer_size; res.direct_io_threshold = settings.min_bytes_to_use_direct_io; From 408db4a25b21b2fd9f201f095233496b14b8cbd7 Mon Sep 17 00:00:00 2001 From: Smita Kulkarni Date: Sun, 30 Apr 2023 10:56:43 +0200 Subject: [PATCH 29/67] Updated to store IMergeTreeDataPartInfoForReader instead of DataPart in MergeTreeMarksLoader --- src/Storages/MergeTree/IMergeTreeDataPartInfoForReader.h | 2 -- .../MergeTree/LoadedMergeTreeDataPartInfoForReader.h | 2 -- src/Storages/MergeTree/MergeTreeIndexReader.cpp | 3 ++- src/Storages/MergeTree/MergeTreeMarksLoader.cpp | 8 ++++---- src/Storages/MergeTree/MergeTreeMarksLoader.h | 6 +++--- src/Storages/MergeTree/MergeTreeReaderCompact.cpp | 2 +- src/Storages/MergeTree/MergeTreeReaderStream.cpp | 6 +++--- src/Storages/MergeTree/MergeTreeReaderStream.h | 3 ++- src/Storages/MergeTree/MergeTreeReaderWide.cpp | 2 +- 9 files changed, 16 insertions(+), 18 deletions(-) diff --git a/src/Storages/MergeTree/IMergeTreeDataPartInfoForReader.h b/src/Storages/MergeTree/IMergeTreeDataPartInfoForReader.h index af3fd7cbef3..648c3cfbb6b 100644 --- a/src/Storages/MergeTree/IMergeTreeDataPartInfoForReader.h +++ b/src/Storages/MergeTree/IMergeTreeDataPartInfoForReader.h @@ -40,8 +40,6 @@ public: virtual DataPartStoragePtr getDataPartStorage() const = 0; - virtual DataPartPtr getDataPart() const = 0; - virtual const NamesAndTypesList & getColumns() const = 0; virtual const ColumnsDescription & getColumnsDescription() const = 0; diff --git a/src/Storages/MergeTree/LoadedMergeTreeDataPartInfoForReader.h b/src/Storages/MergeTree/LoadedMergeTreeDataPartInfoForReader.h index a72285d8e3c..3363c75dd6f 100644 --- a/src/Storages/MergeTree/LoadedMergeTreeDataPartInfoForReader.h +++ b/src/Storages/MergeTree/LoadedMergeTreeDataPartInfoForReader.h @@ -25,8 +25,6 @@ public: DataPartStoragePtr getDataPartStorage() const override { return data_part->getDataPartStoragePtr(); } - DataPartPtr getDataPart() const override { return data_part; } - const NamesAndTypesList & getColumns() const override { return data_part->getColumns(); } const ColumnsDescription & getColumnsDescription() const override { return data_part->getColumnsDescription(); } diff --git a/src/Storages/MergeTree/MergeTreeIndexReader.cpp b/src/Storages/MergeTree/MergeTreeIndexReader.cpp index 1ce6d777644..88fbc8c2488 100644 --- a/src/Storages/MergeTree/MergeTreeIndexReader.cpp +++ b/src/Storages/MergeTree/MergeTreeIndexReader.cpp @@ -1,5 +1,6 @@ #include #include +#include namespace { @@ -20,7 +21,7 @@ std::unique_ptr makeIndexReader( auto * load_marks_threadpool = settings.read_settings.load_marks_asynchronously ? &context->getLoadMarksThreadpool() : nullptr; return std::make_unique( - part, + std::make_shared(part), index->getFileName(), extension, marks_count, all_mark_ranges, std::move(settings), mark_cache, uncompressed_cache, diff --git a/src/Storages/MergeTree/MergeTreeMarksLoader.cpp b/src/Storages/MergeTree/MergeTreeMarksLoader.cpp index 300e99c850f..9a5576f0ad2 100644 --- a/src/Storages/MergeTree/MergeTreeMarksLoader.cpp +++ b/src/Storages/MergeTree/MergeTreeMarksLoader.cpp @@ -30,7 +30,7 @@ namespace ErrorCodes } MergeTreeMarksLoader::MergeTreeMarksLoader( - DataPartPtr data_part_, + MergeTreeDataPartInfoForReaderPtr data_part_reader_, MarkCache * mark_cache_, const String & mrk_path_, size_t marks_count_, @@ -39,7 +39,7 @@ MergeTreeMarksLoader::MergeTreeMarksLoader( const ReadSettings & read_settings_, ThreadPool * load_marks_threadpool_, size_t columns_in_mark_) - : data_part(data_part_) + : data_part_reader(data_part_reader_) , mark_cache(mark_cache_) , mrk_path(mrk_path_) , marks_count(marks_count_) @@ -98,7 +98,7 @@ MarkCache::MappedPtr MergeTreeMarksLoader::loadMarksImpl() /// Memory for marks must not be accounted as memory usage for query, because they are stored in shared cache. MemoryTrackerBlockerInThread temporarily_disable_memory_tracker; - auto data_part_storage = data_part->getDataPartStoragePtr(); + auto data_part_storage = data_part_reader->getDataPartStorage(); size_t file_size = data_part_storage->getFileSize(mrk_path); size_t mark_size = index_granularity_info.getMarkSizeInBytes(columns_in_mark); @@ -179,7 +179,7 @@ MarkCache::MappedPtr MergeTreeMarksLoader::loadMarks() { MarkCache::MappedPtr loaded_marks; - auto data_part_storage = data_part->getDataPartStoragePtr(); + auto data_part_storage = data_part_reader->getDataPartStorage(); if (mark_cache) { diff --git a/src/Storages/MergeTree/MergeTreeMarksLoader.h b/src/Storages/MergeTree/MergeTreeMarksLoader.h index 816b512d1a7..0889da0cb85 100644 --- a/src/Storages/MergeTree/MergeTreeMarksLoader.h +++ b/src/Storages/MergeTree/MergeTreeMarksLoader.h @@ -1,9 +1,9 @@ #pragma once -#include #include #include #include +#include namespace DB @@ -18,7 +18,7 @@ public: using MarksPtr = MarkCache::MappedPtr; MergeTreeMarksLoader( - DataPartPtr data_part_, + MergeTreeDataPartInfoForReaderPtr data_part_reader_, MarkCache * mark_cache_, const String & mrk_path, size_t marks_count_, @@ -33,7 +33,7 @@ public: MarkInCompressedFile getMark(size_t row_index, size_t column_index = 0); private: - DataPartPtr data_part; + MergeTreeDataPartInfoForReaderPtr data_part_reader; MarkCache * mark_cache = nullptr; String mrk_path; size_t marks_count; diff --git a/src/Storages/MergeTree/MergeTreeReaderCompact.cpp b/src/Storages/MergeTree/MergeTreeReaderCompact.cpp index 13f8e485208..26a7cb2b50b 100644 --- a/src/Storages/MergeTree/MergeTreeReaderCompact.cpp +++ b/src/Storages/MergeTree/MergeTreeReaderCompact.cpp @@ -36,7 +36,7 @@ MergeTreeReaderCompact::MergeTreeReaderCompact( settings_, avg_value_size_hints_) , marks_loader( - data_part_info_for_read_->getDataPart(), + data_part_info_for_read_, mark_cache, data_part_info_for_read_->getIndexGranularityInfo().getMarksFilePath(MergeTreeDataPartCompact::DATA_FILE_NAME), data_part_info_for_read_->getMarksCount(), diff --git a/src/Storages/MergeTree/MergeTreeReaderStream.cpp b/src/Storages/MergeTree/MergeTreeReaderStream.cpp index 44cf8f45015..6d80dc5522c 100644 --- a/src/Storages/MergeTree/MergeTreeReaderStream.cpp +++ b/src/Storages/MergeTree/MergeTreeReaderStream.cpp @@ -15,7 +15,7 @@ namespace ErrorCodes } MergeTreeReaderStream::MergeTreeReaderStream( - DataPartPtr data_part_, + MergeTreeDataPartInfoForReaderPtr data_part_reader_, const String & path_prefix_, const String & data_file_extension_, size_t marks_count_, @@ -35,7 +35,7 @@ MergeTreeReaderStream::MergeTreeReaderStream( , all_mark_ranges(all_mark_ranges_) , file_size(file_size_) , uncompressed_cache(uncompressed_cache_) - , data_part_storage(data_part_->getDataPartStoragePtr()) + , data_part_storage(data_part_reader_->getDataPartStorage()) , path_prefix(path_prefix_) , data_file_extension(data_file_extension_) , is_low_cardinality_dictionary(is_low_cardinality_dictionary_) @@ -44,7 +44,7 @@ MergeTreeReaderStream::MergeTreeReaderStream( , save_marks_in_cache(settings.save_marks_in_cache) , index_granularity_info(index_granularity_info_) , marks_loader( - data_part_, + data_part_reader_, mark_cache, index_granularity_info->getMarksFilePath(path_prefix), marks_count, diff --git a/src/Storages/MergeTree/MergeTreeReaderStream.h b/src/Storages/MergeTree/MergeTreeReaderStream.h index 2265de94d07..baf8ec713f9 100644 --- a/src/Storages/MergeTree/MergeTreeReaderStream.h +++ b/src/Storages/MergeTree/MergeTreeReaderStream.h @@ -9,6 +9,7 @@ #include #include #include +#include namespace DB @@ -19,7 +20,7 @@ class MergeTreeReaderStream { public: MergeTreeReaderStream( - DataPartPtr data_part_, + MergeTreeDataPartInfoForReaderPtr data_part_reader_, const String & path_prefix_, const String & data_file_extension_, size_t marks_count_, diff --git a/src/Storages/MergeTree/MergeTreeReaderWide.cpp b/src/Storages/MergeTree/MergeTreeReaderWide.cpp index 5b90118d9d5..69617fdf9e3 100644 --- a/src/Storages/MergeTree/MergeTreeReaderWide.cpp +++ b/src/Storages/MergeTree/MergeTreeReaderWide.cpp @@ -242,7 +242,7 @@ void MergeTreeReaderWide::addStreams( auto * load_marks_threadpool = settings.read_settings.load_marks_asynchronously ? &context->getLoadMarksThreadpool() : nullptr; streams.emplace(stream_name, std::make_unique( - data_part_info_for_read->getDataPart(), stream_name, DATA_FILE_EXTENSION, + data_part_info_for_read, stream_name, DATA_FILE_EXTENSION, data_part_info_for_read->getMarksCount(), all_mark_ranges, settings, mark_cache, uncompressed_cache, data_part_info_for_read->getFileSizeOrZero(stream_name + DATA_FILE_EXTENSION), &data_part_info_for_read->getIndexGranularityInfo(), From 941b408574be4c405e00d4a53d1b0fc4e26eccda Mon Sep 17 00:00:00 2001 From: Smita Kulkarni Date: Sun, 30 Apr 2023 18:54:10 +0200 Subject: [PATCH 30/67] Reverted changes to test as reference of data part will be same --- .../0_stateless/02340_parts_refcnt_mergetree.reference | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/queries/0_stateless/02340_parts_refcnt_mergetree.reference b/tests/queries/0_stateless/02340_parts_refcnt_mergetree.reference index ae4fafae829..e225ce389cb 100644 --- a/tests/queries/0_stateless/02340_parts_refcnt_mergetree.reference +++ b/tests/queries/0_stateless/02340_parts_refcnt_mergetree.reference @@ -1,2 +1,2 @@ -data_02340 1_2_2_0 6 -data_02340_rep 1_0_0_0 6 +data_02340 1_2_2_0 5 +data_02340_rep 1_0_0_0 5 From a098298c0ff9b494f480c59baac22dec9d7fb574 Mon Sep 17 00:00:00 2001 From: alesapin Date: Tue, 25 Apr 2023 16:15:13 +0200 Subject: [PATCH 31/67] Remove file for empty parts --- src/Storages/MergeTree/IMergeTreeDataPart.cpp | 8 -------- 1 file changed, 8 deletions(-) diff --git a/src/Storages/MergeTree/IMergeTreeDataPart.cpp b/src/Storages/MergeTree/IMergeTreeDataPart.cpp index 148cbf93948..d3bc3cc70d9 100644 --- a/src/Storages/MergeTree/IMergeTreeDataPart.cpp +++ b/src/Storages/MergeTree/IMergeTreeDataPart.cpp @@ -1357,14 +1357,6 @@ void IMergeTreeDataPart::loadColumns(bool require) else { loaded_metadata_version = metadata_snapshot->getMetadataVersion(); - - if (!is_readonly_storage) - { - writeMetadata(METADATA_VERSION_FILE_NAME, {}, [loaded_metadata_version](auto & buffer) - { - writeIntText(loaded_metadata_version, buffer); - }); - } } setColumns(loaded_columns, infos, loaded_metadata_version); From f03e75971ad47ef23fa492844eb0480fee649b53 Mon Sep 17 00:00:00 2001 From: alesapin Date: Tue, 25 Apr 2023 16:21:31 +0200 Subject: [PATCH 32/67] Remove on detach --- src/Storages/MergeTree/DataPartStorageOnDiskBase.cpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/Storages/MergeTree/DataPartStorageOnDiskBase.cpp b/src/Storages/MergeTree/DataPartStorageOnDiskBase.cpp index 09456088d74..04449fa91d4 100644 --- a/src/Storages/MergeTree/DataPartStorageOnDiskBase.cpp +++ b/src/Storages/MergeTree/DataPartStorageOnDiskBase.cpp @@ -10,6 +10,7 @@ #include #include #include +#include namespace DB { @@ -415,6 +416,7 @@ MutableDataPartStoragePtr DataPartStorageOnDiskBase::freeze( disk->removeFileIfExists(fs::path(to) / dir_path / "delete-on-destroy.txt"); disk->removeFileIfExists(fs::path(to) / dir_path / "txn_version.txt"); + disk->removeFileIfExists(fs::path(to) / dir_path / IMergeTreeDataPart::METADATA_VERSION_FILE_NAME); auto single_disk_volume = std::make_shared(disk->getName(), disk, 0); From a1e9236b5301a3d87d6aad7d0db05dea7262cf32 Mon Sep 17 00:00:00 2001 From: alesapin Date: Wed, 26 Apr 2023 16:47:25 +0200 Subject: [PATCH 33/67] Fix tests which depend on detached files count --- .../integration/test_merge_tree_hdfs/test.py | 12 +-- tests/integration/test_merge_tree_s3/test.py | 22 ++--- tests/integration/test_partition/test.py | 97 ++++++++++++------- 3 files changed, 81 insertions(+), 50 deletions(-) diff --git a/tests/integration/test_merge_tree_hdfs/test.py b/tests/integration/test_merge_tree_hdfs/test.py index 782237539fa..26788d832b4 100644 --- a/tests/integration/test_merge_tree_hdfs/test.py +++ b/tests/integration/test_merge_tree_hdfs/test.py @@ -224,14 +224,14 @@ def test_attach_detach_partition(cluster): wait_for_delete_empty_parts(node, "hdfs_test") wait_for_delete_inactive_parts(node, "hdfs_test") wait_for_delete_hdfs_objects( - cluster, FILES_OVERHEAD + FILES_OVERHEAD_PER_PART_WIDE * 2 + cluster, FILES_OVERHEAD + FILES_OVERHEAD_PER_PART_WIDE * 2 - FILES_OVERHEAD_METADATA_VERSION ) node.query("ALTER TABLE hdfs_test ATTACH PARTITION '2020-01-03'") assert node.query("SELECT count(*) FROM hdfs_test FORMAT Values") == "(8192)" hdfs_objects = fs.listdir("/clickhouse") - assert len(hdfs_objects) == FILES_OVERHEAD + FILES_OVERHEAD_PER_PART_WIDE * 2 + assert len(hdfs_objects) == FILES_OVERHEAD + FILES_OVERHEAD_PER_PART_WIDE * 2 - FILES_OVERHEAD_METADATA_VERSION node.query("ALTER TABLE hdfs_test DROP PARTITION '2020-01-03'") assert node.query("SELECT count(*) FROM hdfs_test FORMAT Values") == "(4096)" @@ -355,7 +355,7 @@ def test_move_replace_partition_to_another_table(cluster): # Number of objects in HDFS should be unchanged. hdfs_objects = fs.listdir("/clickhouse") - assert len(hdfs_objects) == FILES_OVERHEAD * 2 + FILES_OVERHEAD_PER_PART_WIDE * 4 + assert len(hdfs_objects) == FILES_OVERHEAD * 2 + FILES_OVERHEAD_PER_PART_WIDE * 4 - FILES_OVERHEAD_METADATA_VERSION * 2 # Add new partitions to source table, but with different values and replace them from copied table. node.query( @@ -370,7 +370,7 @@ def test_move_replace_partition_to_another_table(cluster): assert node.query("SELECT count(*) FROM hdfs_test FORMAT Values") == "(16384)" hdfs_objects = fs.listdir("/clickhouse") - assert len(hdfs_objects) == FILES_OVERHEAD * 2 + FILES_OVERHEAD_PER_PART_WIDE * 6 + assert len(hdfs_objects) == FILES_OVERHEAD * 2 + FILES_OVERHEAD_PER_PART_WIDE * 6 - FILES_OVERHEAD_METADATA_VERSION * 2 node.query("ALTER TABLE hdfs_test REPLACE PARTITION '2020-01-03' FROM hdfs_clone") node.query("ALTER TABLE hdfs_test REPLACE PARTITION '2020-01-05' FROM hdfs_clone") @@ -381,7 +381,7 @@ def test_move_replace_partition_to_another_table(cluster): # Wait for outdated partitions deletion. wait_for_delete_hdfs_objects( - cluster, FILES_OVERHEAD * 2 + FILES_OVERHEAD_PER_PART_WIDE * 4 + cluster, FILES_OVERHEAD * 2 + FILES_OVERHEAD_PER_PART_WIDE * 4 - FILES_OVERHEAD_METADATA_VERSION * 2 ) node.query("DROP TABLE hdfs_clone NO DELAY") @@ -390,4 +390,4 @@ def test_move_replace_partition_to_another_table(cluster): # Data should remain in hdfs hdfs_objects = fs.listdir("/clickhouse") - assert len(hdfs_objects) == FILES_OVERHEAD + FILES_OVERHEAD_PER_PART_WIDE * 4 + assert len(hdfs_objects) == FILES_OVERHEAD + FILES_OVERHEAD_PER_PART_WIDE * 4 - FILES_OVERHEAD_METADATA_VERSION * 2 diff --git a/tests/integration/test_merge_tree_s3/test.py b/tests/integration/test_merge_tree_s3/test.py index c2e00dc0cb8..d9d00800688 100644 --- a/tests/integration/test_merge_tree_s3/test.py +++ b/tests/integration/test_merge_tree_s3/test.py @@ -312,14 +312,14 @@ def test_attach_detach_partition(cluster, node_name): assert node.query("SELECT count(*) FROM s3_test FORMAT Values") == "(4096)" assert ( len(list_objects(cluster, "data/")) - == FILES_OVERHEAD + FILES_OVERHEAD_PER_PART_WIDE * 2 + == FILES_OVERHEAD + FILES_OVERHEAD_PER_PART_WIDE * 2 - FILES_OVERHEAD_METADATA_VERSION ) node.query("ALTER TABLE s3_test ATTACH PARTITION '2020-01-03'") assert node.query("SELECT count(*) FROM s3_test FORMAT Values") == "(8192)" assert ( len(list_objects(cluster, "data/")) - == FILES_OVERHEAD + FILES_OVERHEAD_PER_PART_WIDE * 2 + == FILES_OVERHEAD + FILES_OVERHEAD_PER_PART_WIDE * 2 - FILES_OVERHEAD_METADATA_VERSION ) node.query("ALTER TABLE s3_test DROP PARTITION '2020-01-03'") @@ -337,7 +337,7 @@ def test_attach_detach_partition(cluster, node_name): assert node.query("SELECT count(*) FROM s3_test FORMAT Values") == "(0)" assert ( len(list_objects(cluster, "data/")) - == FILES_OVERHEAD + FILES_OVERHEAD_PER_PART_WIDE * 1 + == FILES_OVERHEAD + FILES_OVERHEAD_PER_PART_WIDE * 1 - FILES_OVERHEAD_METADATA_VERSION ) node.query( "ALTER TABLE s3_test DROP DETACHED PARTITION '2020-01-04'", @@ -460,7 +460,7 @@ def test_move_replace_partition_to_another_table(cluster, node_name): # Number of objects in S3 should be unchanged. assert ( len(list(minio.list_objects(cluster.minio_bucket, "data/", recursive=True))) - == FILES_OVERHEAD * 2 + FILES_OVERHEAD_PER_PART_WIDE * 4 + == FILES_OVERHEAD * 2 + FILES_OVERHEAD_PER_PART_WIDE * 4 - FILES_OVERHEAD_METADATA_VERSION * 2 ) # Add new partitions to source table, but with different values and replace them from copied table. @@ -474,7 +474,7 @@ def test_move_replace_partition_to_another_table(cluster, node_name): assert node.query("SELECT count(*) FROM s3_test FORMAT Values") == "(16384)" assert ( len(list(minio.list_objects(cluster.minio_bucket, "data/", recursive=True))) - == FILES_OVERHEAD * 2 + FILES_OVERHEAD_PER_PART_WIDE * 6 + == FILES_OVERHEAD * 2 + FILES_OVERHEAD_PER_PART_WIDE * 6 - FILES_OVERHEAD_METADATA_VERSION * 2 ) node.query("ALTER TABLE s3_test REPLACE PARTITION '2020-01-03' FROM s3_clone") @@ -486,7 +486,7 @@ def test_move_replace_partition_to_another_table(cluster, node_name): # Wait for outdated partitions deletion. wait_for_delete_s3_objects( - cluster, FILES_OVERHEAD * 2 + FILES_OVERHEAD_PER_PART_WIDE * 4 + cluster, FILES_OVERHEAD * 2 + FILES_OVERHEAD_PER_PART_WIDE * 4 - FILES_OVERHEAD_METADATA_VERSION * 2 ) node.query("DROP TABLE s3_clone NO DELAY") @@ -495,20 +495,20 @@ def test_move_replace_partition_to_another_table(cluster, node_name): # Data should remain in S3 assert ( len(list(minio.list_objects(cluster.minio_bucket, "data/", recursive=True))) - == FILES_OVERHEAD + FILES_OVERHEAD_PER_PART_WIDE * 4 + == FILES_OVERHEAD + FILES_OVERHEAD_PER_PART_WIDE * 4 - FILES_OVERHEAD_METADATA_VERSION * 2 ) node.query("ALTER TABLE s3_test FREEZE") # Number S3 objects should be unchanged. assert ( len(list(minio.list_objects(cluster.minio_bucket, "data/", recursive=True))) - == FILES_OVERHEAD + FILES_OVERHEAD_PER_PART_WIDE * 4 + == FILES_OVERHEAD + FILES_OVERHEAD_PER_PART_WIDE * 4 - FILES_OVERHEAD_METADATA_VERSION * 2 ) node.query("DROP TABLE s3_test NO DELAY") # Backup data should remain in S3. - wait_for_delete_s3_objects(cluster, FILES_OVERHEAD_PER_PART_WIDE * 4) + wait_for_delete_s3_objects(cluster, FILES_OVERHEAD_PER_PART_WIDE * 4 - FILES_OVERHEAD_METADATA_VERSION * 4) for obj in list(minio.list_objects(cluster.minio_bucket, "data/", recursive=True)): minio.remove_object(cluster.minio_bucket, obj.object_name) @@ -534,7 +534,7 @@ def test_freeze_unfreeze(cluster, node_name): wait_for_delete_inactive_parts(node, "s3_test") assert ( len(list(minio.list_objects(cluster.minio_bucket, "data/", recursive=True))) - == FILES_OVERHEAD + FILES_OVERHEAD_PER_PART_WIDE * 2 + == FILES_OVERHEAD + (FILES_OVERHEAD_PER_PART_WIDE - FILES_OVERHEAD_METADATA_VERSION) * 2 ) # Unfreeze single partition from backup1. @@ -575,7 +575,7 @@ def test_freeze_system_unfreeze(cluster, node_name): node.query("DROP TABLE s3_test_removed NO DELAY") assert ( len(list(minio.list_objects(cluster.minio_bucket, "data/", recursive=True))) - == FILES_OVERHEAD + FILES_OVERHEAD_PER_PART_WIDE * 2 + == FILES_OVERHEAD + (FILES_OVERHEAD_PER_PART_WIDE - FILES_OVERHEAD_METADATA_VERSION) * 2 ) # Unfreeze all data from backup3. diff --git a/tests/integration/test_partition/test.py b/tests/integration/test_partition/test.py index 5a972b58f99..93f03f4420e 100644 --- a/tests/integration/test_partition/test.py +++ b/tests/integration/test_partition/test.py @@ -70,7 +70,7 @@ def partition_complex_assert_columns_txt(): ) -def partition_complex_assert_checksums(): +def partition_complex_assert_checksums(after_detach=False): # Do not check increment.txt - it can be changed by other tests with FREEZE cmd = [ "bash", @@ -80,36 +80,67 @@ def partition_complex_assert_checksums(): " | sed 's shadow/[0-9]*/data/[a-z0-9_-]*/ shadow/1/data/test/ g' | sort | uniq", ] - checksums = ( - "082814b5aa5109160d5c0c5aff10d4df\tshadow/1/data/test/partition_complex/19700102_2_2_0/k.bin\n" - "082814b5aa5109160d5c0c5aff10d4df\tshadow/1/data/test/partition_complex/19700201_1_1_0/v1.bin\n" - "13cae8e658e0ca4f75c56b1fc424e150\tshadow/1/data/test/partition_complex/19700102_2_2_0/minmax_p.idx\n" - "25daad3d9e60b45043a70c4ab7d3b1c6\tshadow/1/data/test/partition_complex/19700102_2_2_0/partition.dat\n" - "3726312af62aec86b64a7708d5751787\tshadow/1/data/test/partition_complex/19700201_1_1_0/partition.dat\n" - "37855b06a39b79a67ea4e86e4a3299aa\tshadow/1/data/test/partition_complex/19700102_2_2_0/checksums.txt\n" - "38e62ff37e1e5064e9a3f605dfe09d13\tshadow/1/data/test/partition_complex/19700102_2_2_0/v1.bin\n" - "4ae71336e44bf9bf79d2752e234818a5\tshadow/1/data/test/partition_complex/19700102_2_2_0/k.mrk\n" - "4ae71336e44bf9bf79d2752e234818a5\tshadow/1/data/test/partition_complex/19700102_2_2_0/p.mrk\n" - "4ae71336e44bf9bf79d2752e234818a5\tshadow/1/data/test/partition_complex/19700102_2_2_0/v1.mrk\n" - "4ae71336e44bf9bf79d2752e234818a5\tshadow/1/data/test/partition_complex/19700201_1_1_0/k.mrk\n" - "4ae71336e44bf9bf79d2752e234818a5\tshadow/1/data/test/partition_complex/19700201_1_1_0/p.mrk\n" - "4ae71336e44bf9bf79d2752e234818a5\tshadow/1/data/test/partition_complex/19700201_1_1_0/v1.mrk\n" - "55a54008ad1ba589aa210d2629c1df41\tshadow/1/data/test/partition_complex/19700201_1_1_0/primary.idx\n" - "5f087cb3e7071bf9407e095821e2af8f\tshadow/1/data/test/partition_complex/19700201_1_1_0/checksums.txt\n" - "77d5af402ada101574f4da114f242e02\tshadow/1/data/test/partition_complex/19700102_2_2_0/columns.txt\n" - "77d5af402ada101574f4da114f242e02\tshadow/1/data/test/partition_complex/19700201_1_1_0/columns.txt\n" - "88cdc31ded355e7572d68d8cde525d3a\tshadow/1/data/test/partition_complex/19700201_1_1_0/p.bin\n" - "9e688c58a5487b8eaf69c9e1005ad0bf\tshadow/1/data/test/partition_complex/19700102_2_2_0/primary.idx\n" - "c0904274faa8f3f06f35666cc9c5bd2f\tshadow/1/data/test/partition_complex/19700102_2_2_0/default_compression_codec.txt\n" - "c0904274faa8f3f06f35666cc9c5bd2f\tshadow/1/data/test/partition_complex/19700201_1_1_0/default_compression_codec.txt\n" - "c4ca4238a0b923820dcc509a6f75849b\tshadow/1/data/test/partition_complex/19700102_2_2_0/count.txt\n" - "c4ca4238a0b923820dcc509a6f75849b\tshadow/1/data/test/partition_complex/19700201_1_1_0/count.txt\n" - "cfcb770c3ecd0990dcceb1bde129e6c6\tshadow/1/data/test/partition_complex/19700102_2_2_0/p.bin\n" - "cfcd208495d565ef66e7dff9f98764da\tshadow/1/data/test/partition_complex/19700102_2_2_0/metadata_version.txt\n" - "cfcd208495d565ef66e7dff9f98764da\tshadow/1/data/test/partition_complex/19700201_1_1_0/metadata_version.txt\n" - "e2af3bef1fd129aea73a890ede1e7a30\tshadow/1/data/test/partition_complex/19700201_1_1_0/k.bin\n" - "f2312862cc01adf34a93151377be2ddf\tshadow/1/data/test/partition_complex/19700201_1_1_0/minmax_p.idx\n" - ) + # no metadata version + if after_detach: + checksums = ( + "082814b5aa5109160d5c0c5aff10d4df\tshadow/1/data/test/partition_complex/19700102_2_2_0/k.bin\n" + "082814b5aa5109160d5c0c5aff10d4df\tshadow/1/data/test/partition_complex/19700201_1_1_0/v1.bin\n" + "13cae8e658e0ca4f75c56b1fc424e150\tshadow/1/data/test/partition_complex/19700102_2_2_0/minmax_p.idx\n" + "25daad3d9e60b45043a70c4ab7d3b1c6\tshadow/1/data/test/partition_complex/19700102_2_2_0/partition.dat\n" + "3726312af62aec86b64a7708d5751787\tshadow/1/data/test/partition_complex/19700201_1_1_0/partition.dat\n" + "37855b06a39b79a67ea4e86e4a3299aa\tshadow/1/data/test/partition_complex/19700102_2_2_0/checksums.txt\n" + "38e62ff37e1e5064e9a3f605dfe09d13\tshadow/1/data/test/partition_complex/19700102_2_2_0/v1.bin\n" + "4ae71336e44bf9bf79d2752e234818a5\tshadow/1/data/test/partition_complex/19700102_2_2_0/k.mrk\n" + "4ae71336e44bf9bf79d2752e234818a5\tshadow/1/data/test/partition_complex/19700102_2_2_0/p.mrk\n" + "4ae71336e44bf9bf79d2752e234818a5\tshadow/1/data/test/partition_complex/19700102_2_2_0/v1.mrk\n" + "4ae71336e44bf9bf79d2752e234818a5\tshadow/1/data/test/partition_complex/19700201_1_1_0/k.mrk\n" + "4ae71336e44bf9bf79d2752e234818a5\tshadow/1/data/test/partition_complex/19700201_1_1_0/p.mrk\n" + "4ae71336e44bf9bf79d2752e234818a5\tshadow/1/data/test/partition_complex/19700201_1_1_0/v1.mrk\n" + "55a54008ad1ba589aa210d2629c1df41\tshadow/1/data/test/partition_complex/19700201_1_1_0/primary.idx\n" + "5f087cb3e7071bf9407e095821e2af8f\tshadow/1/data/test/partition_complex/19700201_1_1_0/checksums.txt\n" + "77d5af402ada101574f4da114f242e02\tshadow/1/data/test/partition_complex/19700102_2_2_0/columns.txt\n" + "77d5af402ada101574f4da114f242e02\tshadow/1/data/test/partition_complex/19700201_1_1_0/columns.txt\n" + "88cdc31ded355e7572d68d8cde525d3a\tshadow/1/data/test/partition_complex/19700201_1_1_0/p.bin\n" + "9e688c58a5487b8eaf69c9e1005ad0bf\tshadow/1/data/test/partition_complex/19700102_2_2_0/primary.idx\n" + "c0904274faa8f3f06f35666cc9c5bd2f\tshadow/1/data/test/partition_complex/19700102_2_2_0/default_compression_codec.txt\n" + "c0904274faa8f3f06f35666cc9c5bd2f\tshadow/1/data/test/partition_complex/19700201_1_1_0/default_compression_codec.txt\n" + "c4ca4238a0b923820dcc509a6f75849b\tshadow/1/data/test/partition_complex/19700102_2_2_0/count.txt\n" + "c4ca4238a0b923820dcc509a6f75849b\tshadow/1/data/test/partition_complex/19700201_1_1_0/count.txt\n" + "cfcb770c3ecd0990dcceb1bde129e6c6\tshadow/1/data/test/partition_complex/19700102_2_2_0/p.bin\n" + "e2af3bef1fd129aea73a890ede1e7a30\tshadow/1/data/test/partition_complex/19700201_1_1_0/k.bin\n" + "f2312862cc01adf34a93151377be2ddf\tshadow/1/data/test/partition_complex/19700201_1_1_0/minmax_p.idx\n" + ) + else: + checksums = ( + "082814b5aa5109160d5c0c5aff10d4df\tshadow/1/data/test/partition_complex/19700102_2_2_0/k.bin\n" + "082814b5aa5109160d5c0c5aff10d4df\tshadow/1/data/test/partition_complex/19700201_1_1_0/v1.bin\n" + "13cae8e658e0ca4f75c56b1fc424e150\tshadow/1/data/test/partition_complex/19700102_2_2_0/minmax_p.idx\n" + "25daad3d9e60b45043a70c4ab7d3b1c6\tshadow/1/data/test/partition_complex/19700102_2_2_0/partition.dat\n" + "3726312af62aec86b64a7708d5751787\tshadow/1/data/test/partition_complex/19700201_1_1_0/partition.dat\n" + "37855b06a39b79a67ea4e86e4a3299aa\tshadow/1/data/test/partition_complex/19700102_2_2_0/checksums.txt\n" + "38e62ff37e1e5064e9a3f605dfe09d13\tshadow/1/data/test/partition_complex/19700102_2_2_0/v1.bin\n" + "4ae71336e44bf9bf79d2752e234818a5\tshadow/1/data/test/partition_complex/19700102_2_2_0/k.mrk\n" + "4ae71336e44bf9bf79d2752e234818a5\tshadow/1/data/test/partition_complex/19700102_2_2_0/p.mrk\n" + "4ae71336e44bf9bf79d2752e234818a5\tshadow/1/data/test/partition_complex/19700102_2_2_0/v1.mrk\n" + "4ae71336e44bf9bf79d2752e234818a5\tshadow/1/data/test/partition_complex/19700201_1_1_0/k.mrk\n" + "4ae71336e44bf9bf79d2752e234818a5\tshadow/1/data/test/partition_complex/19700201_1_1_0/p.mrk\n" + "4ae71336e44bf9bf79d2752e234818a5\tshadow/1/data/test/partition_complex/19700201_1_1_0/v1.mrk\n" + "55a54008ad1ba589aa210d2629c1df41\tshadow/1/data/test/partition_complex/19700201_1_1_0/primary.idx\n" + "5f087cb3e7071bf9407e095821e2af8f\tshadow/1/data/test/partition_complex/19700201_1_1_0/checksums.txt\n" + "77d5af402ada101574f4da114f242e02\tshadow/1/data/test/partition_complex/19700102_2_2_0/columns.txt\n" + "77d5af402ada101574f4da114f242e02\tshadow/1/data/test/partition_complex/19700201_1_1_0/columns.txt\n" + "88cdc31ded355e7572d68d8cde525d3a\tshadow/1/data/test/partition_complex/19700201_1_1_0/p.bin\n" + "9e688c58a5487b8eaf69c9e1005ad0bf\tshadow/1/data/test/partition_complex/19700102_2_2_0/primary.idx\n" + "c0904274faa8f3f06f35666cc9c5bd2f\tshadow/1/data/test/partition_complex/19700102_2_2_0/default_compression_codec.txt\n" + "c0904274faa8f3f06f35666cc9c5bd2f\tshadow/1/data/test/partition_complex/19700201_1_1_0/default_compression_codec.txt\n" + "c4ca4238a0b923820dcc509a6f75849b\tshadow/1/data/test/partition_complex/19700102_2_2_0/count.txt\n" + "c4ca4238a0b923820dcc509a6f75849b\tshadow/1/data/test/partition_complex/19700201_1_1_0/count.txt\n" + "cfcb770c3ecd0990dcceb1bde129e6c6\tshadow/1/data/test/partition_complex/19700102_2_2_0/p.bin\n" + "cfcd208495d565ef66e7dff9f98764da\tshadow/1/data/test/partition_complex/19700102_2_2_0/metadata_version.txt\n" + "cfcd208495d565ef66e7dff9f98764da\tshadow/1/data/test/partition_complex/19700201_1_1_0/metadata_version.txt\n" + "e2af3bef1fd129aea73a890ede1e7a30\tshadow/1/data/test/partition_complex/19700201_1_1_0/k.bin\n" + "f2312862cc01adf34a93151377be2ddf\tshadow/1/data/test/partition_complex/19700201_1_1_0/minmax_p.idx\n" + ) assert TSV(instance.exec_in_container(cmd).replace(" ", "\t")) == TSV(checksums) @@ -134,7 +165,7 @@ def test_partition_complex(partition_table_complex): q("ALTER TABLE test.partition_complex FREEZE") - partition_complex_assert_checksums() + partition_complex_assert_checksums(True) q("ALTER TABLE test.partition_complex DETACH PARTITION 197001") q("ALTER TABLE test.partition_complex ATTACH PARTITION 197001") @@ -144,7 +175,7 @@ def test_partition_complex(partition_table_complex): q("ALTER TABLE test.partition_complex MODIFY COLUMN v1 Int8") # Check the backup hasn't changed - partition_complex_assert_checksums() + partition_complex_assert_checksums(True) q("OPTIMIZE TABLE test.partition_complex") From c6f91afad799b72141564cd0d4d84a1d77b9846e Mon Sep 17 00:00:00 2001 From: alesapin Date: Fri, 28 Apr 2023 14:03:49 +0200 Subject: [PATCH 34/67] Add more debug to test (it's not flaky on my setup) --- .../integration/test_merge_tree_hdfs/test.py | 35 ++++++++++++++++++- 1 file changed, 34 insertions(+), 1 deletion(-) diff --git a/tests/integration/test_merge_tree_hdfs/test.py b/tests/integration/test_merge_tree_hdfs/test.py index 26788d832b4..878c5254c11 100644 --- a/tests/integration/test_merge_tree_hdfs/test.py +++ b/tests/integration/test_merge_tree_hdfs/test.py @@ -355,7 +355,14 @@ def test_move_replace_partition_to_another_table(cluster): # Number of objects in HDFS should be unchanged. hdfs_objects = fs.listdir("/clickhouse") - assert len(hdfs_objects) == FILES_OVERHEAD * 2 + FILES_OVERHEAD_PER_PART_WIDE * 4 - FILES_OVERHEAD_METADATA_VERSION * 2 + for obj in hdfs_objects: + print("Object in HDFS after move", obj) + assert ( + len(hdfs_objects) + == FILES_OVERHEAD * 2 + + FILES_OVERHEAD_PER_PART_WIDE * 4 + - FILES_OVERHEAD_METADATA_VERSION * 2 + ) # Add new partitions to source table, but with different values and replace them from copied table. node.query( @@ -370,7 +377,20 @@ def test_move_replace_partition_to_another_table(cluster): assert node.query("SELECT count(*) FROM hdfs_test FORMAT Values") == "(16384)" hdfs_objects = fs.listdir("/clickhouse") +<<<<<<< HEAD assert len(hdfs_objects) == FILES_OVERHEAD * 2 + FILES_OVERHEAD_PER_PART_WIDE * 6 - FILES_OVERHEAD_METADATA_VERSION * 2 +======= + hdfs_objects = fs.listdir("/clickhouse") + for obj in hdfs_objects: + print("Object in HDFS after insert", obj) + + assert ( + len(hdfs_objects) + == FILES_OVERHEAD * 2 + + FILES_OVERHEAD_PER_PART_WIDE * 6 + - FILES_OVERHEAD_METADATA_VERSION * 2 + ) +>>>>>>> 870d328ff4d (Add more debug to test (it's not flaky on my setup)) node.query("ALTER TABLE hdfs_test REPLACE PARTITION '2020-01-03' FROM hdfs_clone") node.query("ALTER TABLE hdfs_test REPLACE PARTITION '2020-01-05' FROM hdfs_clone") @@ -390,4 +410,17 @@ def test_move_replace_partition_to_another_table(cluster): # Data should remain in hdfs hdfs_objects = fs.listdir("/clickhouse") +<<<<<<< HEAD assert len(hdfs_objects) == FILES_OVERHEAD + FILES_OVERHEAD_PER_PART_WIDE * 4 - FILES_OVERHEAD_METADATA_VERSION * 2 +======= + + for obj in hdfs_objects: + print("Object in HDFS after drop", obj) + + assert ( + len(hdfs_objects) + == FILES_OVERHEAD + + FILES_OVERHEAD_PER_PART_WIDE * 4 + - FILES_OVERHEAD_METADATA_VERSION * 2 + ) +>>>>>>> 870d328ff4d (Add more debug to test (it's not flaky on my setup)) From 6281606fcfcefa493e8dd5be913e49a07c50b067 Mon Sep 17 00:00:00 2001 From: alesapin Date: Sun, 30 Apr 2023 18:43:56 +0200 Subject: [PATCH 35/67] More debug info --- tests/integration/test_merge_tree_s3/test.py | 47 ++++++++++++++++---- 1 file changed, 38 insertions(+), 9 deletions(-) diff --git a/tests/integration/test_merge_tree_s3/test.py b/tests/integration/test_merge_tree_s3/test.py index d9d00800688..369406a72b4 100644 --- a/tests/integration/test_merge_tree_s3/test.py +++ b/tests/integration/test_merge_tree_s3/test.py @@ -444,8 +444,13 @@ def test_move_replace_partition_to_another_table(cluster, node_name): ) assert node.query("SELECT sum(id) FROM s3_test FORMAT Values") == "(0)" assert node.query("SELECT count(*) FROM s3_test FORMAT Values") == "(16384)" + + s3_objects = list(minio.list_objects(cluster.minio_bucket, "data/", recursive=True)) + for obj in s3_objects: + print("Object at start", obj.object_name) + assert ( - len(list(minio.list_objects(cluster.minio_bucket, "data/", recursive=True))) + len(s3_objects) == FILES_OVERHEAD + FILES_OVERHEAD_PER_PART_WIDE * 4 ) @@ -457,10 +462,16 @@ def test_move_replace_partition_to_another_table(cluster, node_name): assert node.query("SELECT count(*) FROM s3_test FORMAT Values") == "(8192)" assert node.query("SELECT sum(id) FROM s3_clone FORMAT Values") == "(0)" assert node.query("SELECT count(*) FROM s3_clone FORMAT Values") == "(8192)" + s3_objects = list(minio.list_objects(cluster.minio_bucket, "data/", recursive=True)) + for obj in s3_objects: + print("Object after move partition", obj.object_name) + # Number of objects in S3 should be unchanged. assert ( - len(list(minio.list_objects(cluster.minio_bucket, "data/", recursive=True))) - == FILES_OVERHEAD * 2 + FILES_OVERHEAD_PER_PART_WIDE * 4 - FILES_OVERHEAD_METADATA_VERSION * 2 + len(s3_objects) + == FILES_OVERHEAD * 2 + + FILES_OVERHEAD_PER_PART_WIDE * 4 + - FILES_OVERHEAD_METADATA_VERSION * 2 ) # Add new partitions to source table, but with different values and replace them from copied table. @@ -472,9 +483,15 @@ def test_move_replace_partition_to_another_table(cluster, node_name): ) assert node.query("SELECT sum(id) FROM s3_test FORMAT Values") == "(0)" assert node.query("SELECT count(*) FROM s3_test FORMAT Values") == "(16384)" + s3_objects = list(minio.list_objects(cluster.minio_bucket, "data/", recursive=True)) + for obj in s3_objects: + print("Object after insert", obj.object_name) + assert ( - len(list(minio.list_objects(cluster.minio_bucket, "data/", recursive=True))) - == FILES_OVERHEAD * 2 + FILES_OVERHEAD_PER_PART_WIDE * 6 - FILES_OVERHEAD_METADATA_VERSION * 2 + len(s3_objects) + == FILES_OVERHEAD * 2 + + FILES_OVERHEAD_PER_PART_WIDE * 6 + - FILES_OVERHEAD_METADATA_VERSION * 2 ) node.query("ALTER TABLE s3_test REPLACE PARTITION '2020-01-03' FROM s3_clone") @@ -492,17 +509,29 @@ def test_move_replace_partition_to_another_table(cluster, node_name): node.query("DROP TABLE s3_clone NO DELAY") assert node.query("SELECT sum(id) FROM s3_test FORMAT Values") == "(0)" assert node.query("SELECT count(*) FROM s3_test FORMAT Values") == "(16384)" + s3_objects = list(minio.list_objects(cluster.minio_bucket, "data/", recursive=True)) + for obj in s3_objects: + print("Object after drop", obj.object_name) + # Data should remain in S3 assert ( - len(list(minio.list_objects(cluster.minio_bucket, "data/", recursive=True))) - == FILES_OVERHEAD + FILES_OVERHEAD_PER_PART_WIDE * 4 - FILES_OVERHEAD_METADATA_VERSION * 2 + len(s3_objects) + == FILES_OVERHEAD + + FILES_OVERHEAD_PER_PART_WIDE * 4 + - FILES_OVERHEAD_METADATA_VERSION * 2 ) node.query("ALTER TABLE s3_test FREEZE") # Number S3 objects should be unchanged. + s3_objects = list(minio.list_objects(cluster.minio_bucket, "data/", recursive=True)) + for obj in s3_objects: + print("Object after freeze", obj.object_name) + assert ( - len(list(minio.list_objects(cluster.minio_bucket, "data/", recursive=True))) - == FILES_OVERHEAD + FILES_OVERHEAD_PER_PART_WIDE * 4 - FILES_OVERHEAD_METADATA_VERSION * 2 + len(s3_objects) + == FILES_OVERHEAD + + FILES_OVERHEAD_PER_PART_WIDE * 4 + - FILES_OVERHEAD_METADATA_VERSION * 2 ) node.query("DROP TABLE s3_test NO DELAY") From b22a54d6cc08f335c62df70f6d6c139478bcd4e1 Mon Sep 17 00:00:00 2001 From: alesapin Date: Sun, 30 Apr 2023 19:10:12 +0200 Subject: [PATCH 36/67] fix --- tests/integration/test_merge_tree_hdfs/test.py | 9 --------- 1 file changed, 9 deletions(-) diff --git a/tests/integration/test_merge_tree_hdfs/test.py b/tests/integration/test_merge_tree_hdfs/test.py index 878c5254c11..d8699d7cb93 100644 --- a/tests/integration/test_merge_tree_hdfs/test.py +++ b/tests/integration/test_merge_tree_hdfs/test.py @@ -377,10 +377,6 @@ def test_move_replace_partition_to_another_table(cluster): assert node.query("SELECT count(*) FROM hdfs_test FORMAT Values") == "(16384)" hdfs_objects = fs.listdir("/clickhouse") -<<<<<<< HEAD - assert len(hdfs_objects) == FILES_OVERHEAD * 2 + FILES_OVERHEAD_PER_PART_WIDE * 6 - FILES_OVERHEAD_METADATA_VERSION * 2 -======= - hdfs_objects = fs.listdir("/clickhouse") for obj in hdfs_objects: print("Object in HDFS after insert", obj) @@ -390,7 +386,6 @@ def test_move_replace_partition_to_another_table(cluster): + FILES_OVERHEAD_PER_PART_WIDE * 6 - FILES_OVERHEAD_METADATA_VERSION * 2 ) ->>>>>>> 870d328ff4d (Add more debug to test (it's not flaky on my setup)) node.query("ALTER TABLE hdfs_test REPLACE PARTITION '2020-01-03' FROM hdfs_clone") node.query("ALTER TABLE hdfs_test REPLACE PARTITION '2020-01-05' FROM hdfs_clone") @@ -410,9 +405,6 @@ def test_move_replace_partition_to_another_table(cluster): # Data should remain in hdfs hdfs_objects = fs.listdir("/clickhouse") -<<<<<<< HEAD - assert len(hdfs_objects) == FILES_OVERHEAD + FILES_OVERHEAD_PER_PART_WIDE * 4 - FILES_OVERHEAD_METADATA_VERSION * 2 -======= for obj in hdfs_objects: print("Object in HDFS after drop", obj) @@ -423,4 +415,3 @@ def test_move_replace_partition_to_another_table(cluster): + FILES_OVERHEAD_PER_PART_WIDE * 4 - FILES_OVERHEAD_METADATA_VERSION * 2 ) ->>>>>>> 870d328ff4d (Add more debug to test (it's not flaky on my setup)) From ab97dbcf1e59e6ea59158ef31fb7d4c88b41d735 Mon Sep 17 00:00:00 2001 From: robot-clickhouse Date: Sun, 30 Apr 2023 17:27:15 +0000 Subject: [PATCH 37/67] Automatic style fix --- .../integration/test_merge_tree_hdfs/test.py | 17 ++++++++-- tests/integration/test_merge_tree_s3/test.py | 32 ++++++++++++------- 2 files changed, 35 insertions(+), 14 deletions(-) diff --git a/tests/integration/test_merge_tree_hdfs/test.py b/tests/integration/test_merge_tree_hdfs/test.py index d8699d7cb93..d454df72a37 100644 --- a/tests/integration/test_merge_tree_hdfs/test.py +++ b/tests/integration/test_merge_tree_hdfs/test.py @@ -224,14 +224,22 @@ def test_attach_detach_partition(cluster): wait_for_delete_empty_parts(node, "hdfs_test") wait_for_delete_inactive_parts(node, "hdfs_test") wait_for_delete_hdfs_objects( - cluster, FILES_OVERHEAD + FILES_OVERHEAD_PER_PART_WIDE * 2 - FILES_OVERHEAD_METADATA_VERSION + cluster, + FILES_OVERHEAD + + FILES_OVERHEAD_PER_PART_WIDE * 2 + - FILES_OVERHEAD_METADATA_VERSION, ) node.query("ALTER TABLE hdfs_test ATTACH PARTITION '2020-01-03'") assert node.query("SELECT count(*) FROM hdfs_test FORMAT Values") == "(8192)" hdfs_objects = fs.listdir("/clickhouse") - assert len(hdfs_objects) == FILES_OVERHEAD + FILES_OVERHEAD_PER_PART_WIDE * 2 - FILES_OVERHEAD_METADATA_VERSION + assert ( + len(hdfs_objects) + == FILES_OVERHEAD + + FILES_OVERHEAD_PER_PART_WIDE * 2 + - FILES_OVERHEAD_METADATA_VERSION + ) node.query("ALTER TABLE hdfs_test DROP PARTITION '2020-01-03'") assert node.query("SELECT count(*) FROM hdfs_test FORMAT Values") == "(4096)" @@ -396,7 +404,10 @@ def test_move_replace_partition_to_another_table(cluster): # Wait for outdated partitions deletion. wait_for_delete_hdfs_objects( - cluster, FILES_OVERHEAD * 2 + FILES_OVERHEAD_PER_PART_WIDE * 4 - FILES_OVERHEAD_METADATA_VERSION * 2 + cluster, + FILES_OVERHEAD * 2 + + FILES_OVERHEAD_PER_PART_WIDE * 4 + - FILES_OVERHEAD_METADATA_VERSION * 2, ) node.query("DROP TABLE hdfs_clone NO DELAY") diff --git a/tests/integration/test_merge_tree_s3/test.py b/tests/integration/test_merge_tree_s3/test.py index 369406a72b4..68a780c87fe 100644 --- a/tests/integration/test_merge_tree_s3/test.py +++ b/tests/integration/test_merge_tree_s3/test.py @@ -312,14 +312,18 @@ def test_attach_detach_partition(cluster, node_name): assert node.query("SELECT count(*) FROM s3_test FORMAT Values") == "(4096)" assert ( len(list_objects(cluster, "data/")) - == FILES_OVERHEAD + FILES_OVERHEAD_PER_PART_WIDE * 2 - FILES_OVERHEAD_METADATA_VERSION + == FILES_OVERHEAD + + FILES_OVERHEAD_PER_PART_WIDE * 2 + - FILES_OVERHEAD_METADATA_VERSION ) node.query("ALTER TABLE s3_test ATTACH PARTITION '2020-01-03'") assert node.query("SELECT count(*) FROM s3_test FORMAT Values") == "(8192)" assert ( len(list_objects(cluster, "data/")) - == FILES_OVERHEAD + FILES_OVERHEAD_PER_PART_WIDE * 2 - FILES_OVERHEAD_METADATA_VERSION + == FILES_OVERHEAD + + FILES_OVERHEAD_PER_PART_WIDE * 2 + - FILES_OVERHEAD_METADATA_VERSION ) node.query("ALTER TABLE s3_test DROP PARTITION '2020-01-03'") @@ -337,7 +341,9 @@ def test_attach_detach_partition(cluster, node_name): assert node.query("SELECT count(*) FROM s3_test FORMAT Values") == "(0)" assert ( len(list_objects(cluster, "data/")) - == FILES_OVERHEAD + FILES_OVERHEAD_PER_PART_WIDE * 1 - FILES_OVERHEAD_METADATA_VERSION + == FILES_OVERHEAD + + FILES_OVERHEAD_PER_PART_WIDE * 1 + - FILES_OVERHEAD_METADATA_VERSION ) node.query( "ALTER TABLE s3_test DROP DETACHED PARTITION '2020-01-04'", @@ -449,10 +455,7 @@ def test_move_replace_partition_to_another_table(cluster, node_name): for obj in s3_objects: print("Object at start", obj.object_name) - assert ( - len(s3_objects) - == FILES_OVERHEAD + FILES_OVERHEAD_PER_PART_WIDE * 4 - ) + assert len(s3_objects) == FILES_OVERHEAD + FILES_OVERHEAD_PER_PART_WIDE * 4 create_table(node, "s3_clone") @@ -503,7 +506,10 @@ def test_move_replace_partition_to_another_table(cluster, node_name): # Wait for outdated partitions deletion. wait_for_delete_s3_objects( - cluster, FILES_OVERHEAD * 2 + FILES_OVERHEAD_PER_PART_WIDE * 4 - FILES_OVERHEAD_METADATA_VERSION * 2 + cluster, + FILES_OVERHEAD * 2 + + FILES_OVERHEAD_PER_PART_WIDE * 4 + - FILES_OVERHEAD_METADATA_VERSION * 2, ) node.query("DROP TABLE s3_clone NO DELAY") @@ -537,7 +543,9 @@ def test_move_replace_partition_to_another_table(cluster, node_name): node.query("DROP TABLE s3_test NO DELAY") # Backup data should remain in S3. - wait_for_delete_s3_objects(cluster, FILES_OVERHEAD_PER_PART_WIDE * 4 - FILES_OVERHEAD_METADATA_VERSION * 4) + wait_for_delete_s3_objects( + cluster, FILES_OVERHEAD_PER_PART_WIDE * 4 - FILES_OVERHEAD_METADATA_VERSION * 4 + ) for obj in list(minio.list_objects(cluster.minio_bucket, "data/", recursive=True)): minio.remove_object(cluster.minio_bucket, obj.object_name) @@ -563,7 +571,8 @@ def test_freeze_unfreeze(cluster, node_name): wait_for_delete_inactive_parts(node, "s3_test") assert ( len(list(minio.list_objects(cluster.minio_bucket, "data/", recursive=True))) - == FILES_OVERHEAD + (FILES_OVERHEAD_PER_PART_WIDE - FILES_OVERHEAD_METADATA_VERSION) * 2 + == FILES_OVERHEAD + + (FILES_OVERHEAD_PER_PART_WIDE - FILES_OVERHEAD_METADATA_VERSION) * 2 ) # Unfreeze single partition from backup1. @@ -604,7 +613,8 @@ def test_freeze_system_unfreeze(cluster, node_name): node.query("DROP TABLE s3_test_removed NO DELAY") assert ( len(list(minio.list_objects(cluster.minio_bucket, "data/", recursive=True))) - == FILES_OVERHEAD + (FILES_OVERHEAD_PER_PART_WIDE - FILES_OVERHEAD_METADATA_VERSION) * 2 + == FILES_OVERHEAD + + (FILES_OVERHEAD_PER_PART_WIDE - FILES_OVERHEAD_METADATA_VERSION) * 2 ) # Unfreeze all data from backup3. From 2bce8a1b44b3e5d18ff685a6fc0a8ead458a85ea Mon Sep 17 00:00:00 2001 From: Raqbit Date: Mon, 1 May 2023 09:53:09 +0200 Subject: [PATCH 38/67] Fix typo "nullbale" in data-types reference doc --- docs/en/sql-reference/data-types/index.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/en/sql-reference/data-types/index.md b/docs/en/sql-reference/data-types/index.md index c61a3069db6..2ad8ac4bb23 100644 --- a/docs/en/sql-reference/data-types/index.md +++ b/docs/en/sql-reference/data-types/index.md @@ -27,7 +27,7 @@ ClickHouse data types include: - **Aggregation function types**: use [`SimpleAggregateFunction`](./simpleaggregatefunction.md) and [`AggregateFunction`](./aggregatefunction.md) for storing the intermediate status of aggregate function results - **Nested data structures**: A [`Nested` data structure](./nested-data-structures/index.md) is like a table inside a cell - **Tuples**: A [`Tuple` of elements](./tuple.md), each having an individual type. -- **Nullable**: [`Nullbale`](./nullable.md) allows you to store a value as `NULL` when a value is "missing" (instead of the column gettings its default value for the data type) +- **Nullable**: [`Nullable`](./nullable.md) allows you to store a value as `NULL` when a value is "missing" (instead of the column gettings its default value for the data type) - **IP addresses**: use [`IPv4`](./domains/ipv4.md) and [`IPv6`](./domains/ipv6.md) to efficiently store IP addresses - **Geo types**: for[ geographical data](./geo.md), including `Point`, `Ring`, `Polygon` and `MultiPolygon` - **Special data types**: including [`Expression`](./special-data-types/expression.md), [`Set`](./special-data-types/set.md), [`Nothing`](./special-data-types/nothing.md) and [`Interval`](./special-data-types/interval.md) \ No newline at end of file From 35a736f811626a6b9db84bdc64bda551d7da3fa0 Mon Sep 17 00:00:00 2001 From: ongkong Date: Thu, 20 Apr 2023 23:12:37 -0700 Subject: [PATCH 39/67] use sanitizeBlock --- src/Interpreters/ExpressionAnalyzer.cpp | 9 +-------- 1 file changed, 1 insertion(+), 8 deletions(-) diff --git a/src/Interpreters/ExpressionAnalyzer.cpp b/src/Interpreters/ExpressionAnalyzer.cpp index cc54e7620f6..e444a9b3a2b 100644 --- a/src/Interpreters/ExpressionAnalyzer.cpp +++ b/src/Interpreters/ExpressionAnalyzer.cpp @@ -1066,13 +1066,6 @@ static std::shared_ptr chooseJoinAlgorithm( { const auto & settings = context->getSettings(); - Block left_sample_block(left_sample_columns); - for (auto & column : left_sample_block) - { - if (!column.column) - column.column = column.type->createColumn(); - } - Block right_sample_block = joined_plan->getCurrentDataStream().header; std::vector tried_algorithms; @@ -1115,7 +1108,7 @@ static std::shared_ptr chooseJoinAlgorithm( return std::make_shared(analyzed_join, right_sample_block); } - if (analyzed_join->isEnabledAlgorithm(JoinAlgorithm::GRACE_HASH)) + if (Block left_sample_block(left_sample_columns); analyzed_join->isEnabledAlgorithm(JoinAlgorithm::GRACE_HASH) && sanitizeBlock(left_sample_block, false)) { tried_algorithms.push_back(toString(JoinAlgorithm::GRACE_HASH)); if (GraceHashJoin::isSupported(analyzed_join)) From 1bffb28adc995f828998df8c51841cf763ab6b11 Mon Sep 17 00:00:00 2001 From: ongkong Date: Tue, 25 Apr 2023 00:40:54 -0700 Subject: [PATCH 40/67] add comment, change check location --- src/Interpreters/ExpressionAnalyzer.cpp | 7 +++++-- ...2724_function_in_left_table_clause_asof_join.reference | 1 + .../02724_function_in_left_table_clause_asof_join.sql | 8 ++++++++ 3 files changed, 14 insertions(+), 2 deletions(-) create mode 100644 tests/queries/0_stateless/02724_function_in_left_table_clause_asof_join.reference create mode 100644 tests/queries/0_stateless/02724_function_in_left_table_clause_asof_join.sql diff --git a/src/Interpreters/ExpressionAnalyzer.cpp b/src/Interpreters/ExpressionAnalyzer.cpp index e444a9b3a2b..9e3951e80a4 100644 --- a/src/Interpreters/ExpressionAnalyzer.cpp +++ b/src/Interpreters/ExpressionAnalyzer.cpp @@ -1108,10 +1108,13 @@ static std::shared_ptr chooseJoinAlgorithm( return std::make_shared(analyzed_join, right_sample_block); } - if (Block left_sample_block(left_sample_columns); analyzed_join->isEnabledAlgorithm(JoinAlgorithm::GRACE_HASH) && sanitizeBlock(left_sample_block, false)) + if (analyzed_join->isEnabledAlgorithm(JoinAlgorithm::GRACE_HASH)) { tried_algorithms.push_back(toString(JoinAlgorithm::GRACE_HASH)); - if (GraceHashJoin::isSupported(analyzed_join)) + + // Grace hash join requires that columns exist in left_sample_block. + Block left_sample_block(left_sample_columns); + if (sanitizeBlock(left_sample_block, false) && GraceHashJoin::isSupported(analyzed_join)) return std::make_shared(context, analyzed_join, left_sample_block, right_sample_block, context->getTempDataOnDisk()); } diff --git a/tests/queries/0_stateless/02724_function_in_left_table_clause_asof_join.reference b/tests/queries/0_stateless/02724_function_in_left_table_clause_asof_join.reference new file mode 100644 index 00000000000..d00491fd7e5 --- /dev/null +++ b/tests/queries/0_stateless/02724_function_in_left_table_clause_asof_join.reference @@ -0,0 +1 @@ +1 diff --git a/tests/queries/0_stateless/02724_function_in_left_table_clause_asof_join.sql b/tests/queries/0_stateless/02724_function_in_left_table_clause_asof_join.sql new file mode 100644 index 00000000000..13dfb5debe7 --- /dev/null +++ b/tests/queries/0_stateless/02724_function_in_left_table_clause_asof_join.sql @@ -0,0 +1,8 @@ +select count(*) +from ( + select 1 as id, [1, 2, 3] as arr +) as sessions +ASOF LEFT JOIN ( + select 1 as session_id, 4 as id +) as visitors +ON visitors.session_id <= sessions.id AND arrayFirst(a -> a, arrayMap((a) -> a, sessions.arr)) = visitors.id From abe0cfd10f913211059038f67761c5ce633e0b2d Mon Sep 17 00:00:00 2001 From: Duc Canh Le Date: Mon, 1 May 2023 18:51:22 +0800 Subject: [PATCH 41/67] fix wrong query result when using nullable primary key (#49172) --- src/Storages/MergeTree/KeyCondition.cpp | 4 ++-- ...79_nullable_primary_key_non_first_column.reference | 2 ++ .../02479_nullable_primary_key_non_first_column.sql | 11 +++++++++++ ...02479_nullable_primary_key_second_column.reference | 1 - .../02479_nullable_primary_key_second_column.sql | 9 --------- 5 files changed, 15 insertions(+), 12 deletions(-) create mode 100644 tests/queries/0_stateless/02479_nullable_primary_key_non_first_column.reference create mode 100644 tests/queries/0_stateless/02479_nullable_primary_key_non_first_column.sql delete mode 100644 tests/queries/0_stateless/02479_nullable_primary_key_second_column.reference delete mode 100644 tests/queries/0_stateless/02479_nullable_primary_key_second_column.sql diff --git a/src/Storages/MergeTree/KeyCondition.cpp b/src/Storages/MergeTree/KeyCondition.cpp index 5d961425469..ed686a0b38b 100644 --- a/src/Storages/MergeTree/KeyCondition.cpp +++ b/src/Storages/MergeTree/KeyCondition.cpp @@ -1999,9 +1999,9 @@ static BoolMask forAnyHyperrectangle( if (left_bounded && right_bounded) hyperrectangle[prefix_size] = Range(left_keys[prefix_size], true, right_keys[prefix_size], true); else if (left_bounded) - hyperrectangle[prefix_size] = Range::createLeftBounded(left_keys[prefix_size], true); + hyperrectangle[prefix_size] = Range::createLeftBounded(left_keys[prefix_size], true, data_types[prefix_size]->isNullable()); else if (right_bounded) - hyperrectangle[prefix_size] = Range::createRightBounded(right_keys[prefix_size], true); + hyperrectangle[prefix_size] = Range::createRightBounded(right_keys[prefix_size], true, data_types[prefix_size]->isNullable()); return callback(hyperrectangle); } diff --git a/tests/queries/0_stateless/02479_nullable_primary_key_non_first_column.reference b/tests/queries/0_stateless/02479_nullable_primary_key_non_first_column.reference new file mode 100644 index 00000000000..ed6ac232d9c --- /dev/null +++ b/tests/queries/0_stateless/02479_nullable_primary_key_non_first_column.reference @@ -0,0 +1,2 @@ +a \N +1 1 \N diff --git a/tests/queries/0_stateless/02479_nullable_primary_key_non_first_column.sql b/tests/queries/0_stateless/02479_nullable_primary_key_non_first_column.sql new file mode 100644 index 00000000000..2d56e315bd1 --- /dev/null +++ b/tests/queries/0_stateless/02479_nullable_primary_key_non_first_column.sql @@ -0,0 +1,11 @@ +drop table if exists test_table; +create table test_table (A Nullable(String), B Nullable(String)) engine MergeTree order by (A,B) settings index_granularity = 1, allow_nullable_key=1; +insert into test_table values ('a', 'b'), ('a', null), (null, 'b'); +select * from test_table where B is null; +drop table test_table; + +DROP TABLE IF EXISTS dm_metric_small2; +CREATE TABLE dm_metric_small2 (`x` Nullable(Int64), `y` Nullable(Int64), `z` Nullable(Int64)) ENGINE = MergeTree() ORDER BY (x, y, z) SETTINGS index_granularity = 1, allow_nullable_key = 1; +INSERT INTO dm_metric_small2 VALUES (1,1,NULL) (1,1,1) (1,2,0) (1,2,1) (1,2,NULL) (1,2,NULL); +SELECT * FROM dm_metric_small2 WHERE (x = 1) AND (y = 1) AND z IS NULL; +DROP TABLE dm_metric_small2; \ No newline at end of file diff --git a/tests/queries/0_stateless/02479_nullable_primary_key_second_column.reference b/tests/queries/0_stateless/02479_nullable_primary_key_second_column.reference deleted file mode 100644 index f0227e1a41e..00000000000 --- a/tests/queries/0_stateless/02479_nullable_primary_key_second_column.reference +++ /dev/null @@ -1 +0,0 @@ -a \N diff --git a/tests/queries/0_stateless/02479_nullable_primary_key_second_column.sql b/tests/queries/0_stateless/02479_nullable_primary_key_second_column.sql deleted file mode 100644 index ad0c09222c2..00000000000 --- a/tests/queries/0_stateless/02479_nullable_primary_key_second_column.sql +++ /dev/null @@ -1,9 +0,0 @@ -drop table if exists test_table; - -create table test_table (A Nullable(String), B Nullable(String)) engine MergeTree order by (A,B) settings index_granularity = 1, allow_nullable_key=1; - -insert into test_table values ('a', 'b'), ('a', null), (null, 'b'); - -select * from test_table where B is null; - -drop table test_table; From 242bf034f583be51c7b7e415beb8b95a0c135ea7 Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Mon, 1 May 2023 12:58:10 +0200 Subject: [PATCH 42/67] Update 01600_parts_states_metrics_long.sh --- tests/queries/0_stateless/01600_parts_states_metrics_long.sh | 1 - 1 file changed, 1 deletion(-) diff --git a/tests/queries/0_stateless/01600_parts_states_metrics_long.sh b/tests/queries/0_stateless/01600_parts_states_metrics_long.sh index 50abd6ade90..815d24d293b 100755 --- a/tests/queries/0_stateless/01600_parts_states_metrics_long.sh +++ b/tests/queries/0_stateless/01600_parts_states_metrics_long.sh @@ -1,5 +1,4 @@ #!/usr/bin/env bash -# Tags: long CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh From 698c3d876bbe666497db79b1c063c486674a0b94 Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Mon, 1 May 2023 12:58:24 +0200 Subject: [PATCH 43/67] Update 01600_parts_types_metrics_long.sh --- tests/queries/0_stateless/01600_parts_types_metrics_long.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/queries/0_stateless/01600_parts_types_metrics_long.sh b/tests/queries/0_stateless/01600_parts_types_metrics_long.sh index dcac6dcab39..65ded439412 100755 --- a/tests/queries/0_stateless/01600_parts_types_metrics_long.sh +++ b/tests/queries/0_stateless/01600_parts_types_metrics_long.sh @@ -1,5 +1,5 @@ #!/usr/bin/env bash -# Tags: long, no-s3-storage +# Tags: no-s3-storage CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh From 21f58df9f7520bbb3adcfdfdf1a8d62a627ffb55 Mon Sep 17 00:00:00 2001 From: alesapin Date: Mon, 1 May 2023 13:58:10 +0200 Subject: [PATCH 44/67] Finally fix flaky test --- tests/integration/test_merge_tree_s3/test.py | 22 +++++++++----------- 1 file changed, 10 insertions(+), 12 deletions(-) diff --git a/tests/integration/test_merge_tree_s3/test.py b/tests/integration/test_merge_tree_s3/test.py index 68a780c87fe..1721f8423ae 100644 --- a/tests/integration/test_merge_tree_s3/test.py +++ b/tests/integration/test_merge_tree_s3/test.py @@ -470,9 +470,8 @@ def test_move_replace_partition_to_another_table(cluster, node_name): print("Object after move partition", obj.object_name) # Number of objects in S3 should be unchanged. - assert ( - len(s3_objects) - == FILES_OVERHEAD * 2 + wait_for_delete_s3_objects( + cluster, FILES_OVERHEAD * 2 + FILES_OVERHEAD_PER_PART_WIDE * 4 - FILES_OVERHEAD_METADATA_VERSION * 2 ) @@ -490,9 +489,8 @@ def test_move_replace_partition_to_another_table(cluster, node_name): for obj in s3_objects: print("Object after insert", obj.object_name) - assert ( - len(s3_objects) - == FILES_OVERHEAD * 2 + wait_for_delete_s3_objects( + cluster, FILES_OVERHEAD * 2 + FILES_OVERHEAD_PER_PART_WIDE * 6 - FILES_OVERHEAD_METADATA_VERSION * 2 ) @@ -520,9 +518,9 @@ def test_move_replace_partition_to_another_table(cluster, node_name): print("Object after drop", obj.object_name) # Data should remain in S3 - assert ( - len(s3_objects) - == FILES_OVERHEAD + + wait_for_delete_s3_objects( + cluster, FILES_OVERHEAD + FILES_OVERHEAD_PER_PART_WIDE * 4 - FILES_OVERHEAD_METADATA_VERSION * 2 ) @@ -533,9 +531,9 @@ def test_move_replace_partition_to_another_table(cluster, node_name): for obj in s3_objects: print("Object after freeze", obj.object_name) - assert ( - len(s3_objects) - == FILES_OVERHEAD + wait_for_delete_s3_objects( + cluster, + FILES_OVERHEAD + FILES_OVERHEAD_PER_PART_WIDE * 4 - FILES_OVERHEAD_METADATA_VERSION * 2 ) From 17effd4f6d9cbe96b2b4c04fdbdd360e84025772 Mon Sep 17 00:00:00 2001 From: alesapin Date: Mon, 1 May 2023 14:01:42 +0200 Subject: [PATCH 45/67] Fix black --- tests/integration/test_merge_tree_hdfs/test.py | 15 ++++++--------- 1 file changed, 6 insertions(+), 9 deletions(-) diff --git a/tests/integration/test_merge_tree_hdfs/test.py b/tests/integration/test_merge_tree_hdfs/test.py index d454df72a37..8ea5362b440 100644 --- a/tests/integration/test_merge_tree_hdfs/test.py +++ b/tests/integration/test_merge_tree_hdfs/test.py @@ -365,9 +365,8 @@ def test_move_replace_partition_to_another_table(cluster): hdfs_objects = fs.listdir("/clickhouse") for obj in hdfs_objects: print("Object in HDFS after move", obj) - assert ( - len(hdfs_objects) - == FILES_OVERHEAD * 2 + wait_for_delete_hdfs_objects( + cluster, FILES_OVERHEAD * 2 + FILES_OVERHEAD_PER_PART_WIDE * 4 - FILES_OVERHEAD_METADATA_VERSION * 2 ) @@ -388,9 +387,8 @@ def test_move_replace_partition_to_another_table(cluster): for obj in hdfs_objects: print("Object in HDFS after insert", obj) - assert ( - len(hdfs_objects) - == FILES_OVERHEAD * 2 + wait_for_delete_hdfs_objects( + cluster, FILES_OVERHEAD * 2 + FILES_OVERHEAD_PER_PART_WIDE * 6 - FILES_OVERHEAD_METADATA_VERSION * 2 ) @@ -420,9 +418,8 @@ def test_move_replace_partition_to_another_table(cluster): for obj in hdfs_objects: print("Object in HDFS after drop", obj) - assert ( - len(hdfs_objects) - == FILES_OVERHEAD + wait_for_delete_hdfs_objects( + cluster, FILES_OVERHEAD + FILES_OVERHEAD_PER_PART_WIDE * 4 - FILES_OVERHEAD_METADATA_VERSION * 2 ) From 5b24a84fc04f3122793aa9567b8d816a88cb7e57 Mon Sep 17 00:00:00 2001 From: alesapin Date: Mon, 1 May 2023 14:02:07 +0200 Subject: [PATCH 46/67] Apply black check --- tests/integration/test_merge_tree_hdfs/test.py | 15 +++++++++------ tests/integration/test_merge_tree_s3/test.py | 17 ++++++++++------- 2 files changed, 19 insertions(+), 13 deletions(-) diff --git a/tests/integration/test_merge_tree_hdfs/test.py b/tests/integration/test_merge_tree_hdfs/test.py index 8ea5362b440..3057e48c753 100644 --- a/tests/integration/test_merge_tree_hdfs/test.py +++ b/tests/integration/test_merge_tree_hdfs/test.py @@ -366,9 +366,10 @@ def test_move_replace_partition_to_another_table(cluster): for obj in hdfs_objects: print("Object in HDFS after move", obj) wait_for_delete_hdfs_objects( - cluster, FILES_OVERHEAD * 2 + cluster, + FILES_OVERHEAD * 2 + FILES_OVERHEAD_PER_PART_WIDE * 4 - - FILES_OVERHEAD_METADATA_VERSION * 2 + - FILES_OVERHEAD_METADATA_VERSION * 2, ) # Add new partitions to source table, but with different values and replace them from copied table. @@ -388,9 +389,10 @@ def test_move_replace_partition_to_another_table(cluster): print("Object in HDFS after insert", obj) wait_for_delete_hdfs_objects( - cluster, FILES_OVERHEAD * 2 + cluster, + FILES_OVERHEAD * 2 + FILES_OVERHEAD_PER_PART_WIDE * 6 - - FILES_OVERHEAD_METADATA_VERSION * 2 + - FILES_OVERHEAD_METADATA_VERSION * 2, ) node.query("ALTER TABLE hdfs_test REPLACE PARTITION '2020-01-03' FROM hdfs_clone") @@ -419,7 +421,8 @@ def test_move_replace_partition_to_another_table(cluster): print("Object in HDFS after drop", obj) wait_for_delete_hdfs_objects( - cluster, FILES_OVERHEAD + cluster, + FILES_OVERHEAD + FILES_OVERHEAD_PER_PART_WIDE * 4 - - FILES_OVERHEAD_METADATA_VERSION * 2 + - FILES_OVERHEAD_METADATA_VERSION * 2, ) diff --git a/tests/integration/test_merge_tree_s3/test.py b/tests/integration/test_merge_tree_s3/test.py index 1721f8423ae..76430a42e27 100644 --- a/tests/integration/test_merge_tree_s3/test.py +++ b/tests/integration/test_merge_tree_s3/test.py @@ -471,9 +471,10 @@ def test_move_replace_partition_to_another_table(cluster, node_name): # Number of objects in S3 should be unchanged. wait_for_delete_s3_objects( - cluster, FILES_OVERHEAD * 2 + cluster, + FILES_OVERHEAD * 2 + FILES_OVERHEAD_PER_PART_WIDE * 4 - - FILES_OVERHEAD_METADATA_VERSION * 2 + - FILES_OVERHEAD_METADATA_VERSION * 2, ) # Add new partitions to source table, but with different values and replace them from copied table. @@ -490,9 +491,10 @@ def test_move_replace_partition_to_another_table(cluster, node_name): print("Object after insert", obj.object_name) wait_for_delete_s3_objects( - cluster, FILES_OVERHEAD * 2 + cluster, + FILES_OVERHEAD * 2 + FILES_OVERHEAD_PER_PART_WIDE * 6 - - FILES_OVERHEAD_METADATA_VERSION * 2 + - FILES_OVERHEAD_METADATA_VERSION * 2, ) node.query("ALTER TABLE s3_test REPLACE PARTITION '2020-01-03' FROM s3_clone") @@ -520,9 +522,10 @@ def test_move_replace_partition_to_another_table(cluster, node_name): # Data should remain in S3 wait_for_delete_s3_objects( - cluster, FILES_OVERHEAD + cluster, + FILES_OVERHEAD + FILES_OVERHEAD_PER_PART_WIDE * 4 - - FILES_OVERHEAD_METADATA_VERSION * 2 + - FILES_OVERHEAD_METADATA_VERSION * 2, ) node.query("ALTER TABLE s3_test FREEZE") @@ -535,7 +538,7 @@ def test_move_replace_partition_to_another_table(cluster, node_name): cluster, FILES_OVERHEAD + FILES_OVERHEAD_PER_PART_WIDE * 4 - - FILES_OVERHEAD_METADATA_VERSION * 2 + - FILES_OVERHEAD_METADATA_VERSION * 2, ) node.query("DROP TABLE s3_test NO DELAY") From d36f60bc4a6bb33570191526e4df4a7960034997 Mon Sep 17 00:00:00 2001 From: Dan Roscigno Date: Mon, 1 May 2023 08:50:18 -0400 Subject: [PATCH 47/67] Update settings.md add listen_host to prometheus section --- .../settings.md | 21 ++++++++++++------- 1 file changed, 14 insertions(+), 7 deletions(-) diff --git a/docs/en/operations/server-configuration-parameters/settings.md b/docs/en/operations/server-configuration-parameters/settings.md index 02145a2fb6c..4d853b30341 100644 --- a/docs/en/operations/server-configuration-parameters/settings.md +++ b/docs/en/operations/server-configuration-parameters/settings.md @@ -1339,13 +1339,20 @@ Settings: **Example** ``` xml - - /metrics - 9363 - true - true - true - + + 0.0.0.0 + 8123 + 9000 + + + /metrics + 9363 + true + true + true + + + ``` ## query_log {#server_configuration_parameters-query-log} From 1be9371fb5a8beefe5343d3c4d379be52492612f Mon Sep 17 00:00:00 2001 From: Nikolay Degterinsky Date: Mon, 1 May 2023 13:01:23 +0000 Subject: [PATCH 48/67] Better --- src/IO/HadoopSnappyReadBuffer.cpp | 7 +++---- src/IO/HadoopSnappyReadBuffer.h | 3 +++ src/IO/WithFileName.cpp | 2 +- 3 files changed, 7 insertions(+), 5 deletions(-) diff --git a/src/IO/HadoopSnappyReadBuffer.cpp b/src/IO/HadoopSnappyReadBuffer.cpp index 577367e5607..28588854268 100644 --- a/src/IO/HadoopSnappyReadBuffer.cpp +++ b/src/IO/HadoopSnappyReadBuffer.cpp @@ -91,9 +91,8 @@ inline HadoopSnappyDecoder::Status HadoopSnappyDecoder::readCompressedLength(siz { auto status = readLength(avail_in, next_in, &compressed_length); if (unlikely(compressed_length > 0 && static_cast(compressed_length) > sizeof(buffer))) - throw Exception(ErrorCodes::SNAPPY_UNCOMPRESS_FAILED, - "Too large snappy compressed block. buffer size: {}, compressed block size: {}", - sizeof(buffer), compressed_length); + return Status::TOO_LARGE_COMPRESSED_BLOCK; + return status; } return Status::OK; @@ -225,7 +224,7 @@ bool HadoopSnappyReadBuffer::nextImpl() } return true; } - else if (decoder->result == Status::INVALID_INPUT || decoder->result == Status::BUFFER_TOO_SMALL) + else if (decoder->result != Status::NEEDS_MORE_INPUT) { throw Exception( ErrorCodes::SNAPPY_UNCOMPRESS_FAILED, diff --git a/src/IO/HadoopSnappyReadBuffer.h b/src/IO/HadoopSnappyReadBuffer.h index 6d1b95f6813..b5fb1fec093 100644 --- a/src/IO/HadoopSnappyReadBuffer.h +++ b/src/IO/HadoopSnappyReadBuffer.h @@ -29,6 +29,7 @@ public: INVALID_INPUT = 1, BUFFER_TOO_SMALL = 2, NEEDS_MORE_INPUT = 3, + TOO_LARGE_COMPRESSED_BLOCK = 4, }; HadoopSnappyDecoder() = default; @@ -84,6 +85,8 @@ public: return "BUFFER_TOO_SMALL"; case Status::NEEDS_MORE_INPUT: return "NEEDS_MORE_INPUT"; + case Status::TOO_LARGE_COMPRESSED_BLOCK: + return "TOO_LARGE_COMPRESSED_BLOCK"; } UNREACHABLE(); } diff --git a/src/IO/WithFileName.cpp b/src/IO/WithFileName.cpp index 0ec9ed5dd53..ef4b5fed3b1 100644 --- a/src/IO/WithFileName.cpp +++ b/src/IO/WithFileName.cpp @@ -33,7 +33,7 @@ String getExceptionEntryWithFileName(const ReadBuffer & in) if (filename.empty()) return ""; - return "; While reading from: " + filename; + return fmt::format("; While reading from: {}", filename); } } From 84432b789743fd19b820a3460c9c00ba20fa7f96 Mon Sep 17 00:00:00 2001 From: Nikolay Degterinsky Date: Mon, 1 May 2023 13:01:42 +0000 Subject: [PATCH 49/67] Add test --- ...24_decompress_filename_exception.reference | 8 +++++++ .../02724_decompress_filename_exception.sh | 22 +++++++++++++++++++ 2 files changed, 30 insertions(+) create mode 100644 tests/queries/0_stateless/02724_decompress_filename_exception.reference create mode 100755 tests/queries/0_stateless/02724_decompress_filename_exception.sh diff --git a/tests/queries/0_stateless/02724_decompress_filename_exception.reference b/tests/queries/0_stateless/02724_decompress_filename_exception.reference new file mode 100644 index 00000000000..f9c5aacff7b --- /dev/null +++ b/tests/queries/0_stateless/02724_decompress_filename_exception.reference @@ -0,0 +1,8 @@ +Ok +Ok +Ok +Ok +Ok +Ok +Ok +Ok diff --git a/tests/queries/0_stateless/02724_decompress_filename_exception.sh b/tests/queries/0_stateless/02724_decompress_filename_exception.sh new file mode 100755 index 00000000000..bbc2b8d066b --- /dev/null +++ b/tests/queries/0_stateless/02724_decompress_filename_exception.sh @@ -0,0 +1,22 @@ +#!/usr/bin/env bash +# Tags: no-fasttest, no-parallel + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + +USER_FILES_PATH=$(clickhouse-client --query "select _path,_file from file('nonexist.txt', 'CSV', 'val1 char')" 2>&1 | grep Exception | awk '{gsub("/nonexist.txt","",$9); print $9}') +FILENAME="${USER_FILES_PATH}/corrupted_file.tsv.xx" + +echo 'corrupted file' > $FILENAME; + +$CLICKHOUSE_CLIENT --query "SELECT * FROM file('${FILENAME}', 'TSV', 'c UInt32', 'gzip')" 2>&1 | grep -q "While reading from: $FILENAME" && echo 'Ok' || echo 'Fail'; +$CLICKHOUSE_CLIENT --query "SELECT * FROM file('${FILENAME}', 'TSV', 'c UInt32', 'deflate')" 2>&1 | grep -q "While reading from: $FILENAME" && echo 'Ok' || echo 'Fail'; +$CLICKHOUSE_CLIENT --query "SELECT * FROM file('${FILENAME}', 'TSV', 'c UInt32', 'br')" 2>&1 | grep -q "While reading from: $FILENAME" && echo 'Ok' || echo 'Fail'; +$CLICKHOUSE_CLIENT --query "SELECT * FROM file('${FILENAME}', 'TSV', 'c UInt32', 'xz')" 2>&1 | grep -q "While reading from: $FILENAME" && echo 'Ok' || echo 'Fail'; +$CLICKHOUSE_CLIENT --query "SELECT * FROM file('${FILENAME}', 'TSV', 'c UInt32', 'zstd')" 2>&1 | grep -q "While reading from: $FILENAME" && echo 'Ok' || echo 'Fail'; +$CLICKHOUSE_CLIENT --query "SELECT * FROM file('${FILENAME}', 'TSV', 'c UInt32', 'lz4')" 2>&1 | grep -q "While reading from: $FILENAME" && echo 'Ok' || echo 'Fail'; +$CLICKHOUSE_CLIENT --query "SELECT * FROM file('${FILENAME}', 'TSV', 'c UInt32', 'bz2')" 2>&1 | grep -q "While reading from: $FILENAME" && echo 'Ok' || echo 'Fail'; +$CLICKHOUSE_CLIENT --query "SELECT * FROM file('${FILENAME}', 'TSV', 'c UInt32', 'snappy')" 2>&1 | grep -q "While reading from: $FILENAME" && echo 'Ok' || echo 'Fail'; + +rm $FILENAME; From d21cdde371f4bd2ccfdc4c3a8daaf8533ea2537d Mon Sep 17 00:00:00 2001 From: DanRoscigno Date: Mon, 1 May 2023 09:02:32 -0400 Subject: [PATCH 50/67] add check --- .../operations/server-configuration-parameters/settings.md | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/docs/en/operations/server-configuration-parameters/settings.md b/docs/en/operations/server-configuration-parameters/settings.md index 4d853b30341..113e42499fe 100644 --- a/docs/en/operations/server-configuration-parameters/settings.md +++ b/docs/en/operations/server-configuration-parameters/settings.md @@ -1324,7 +1324,7 @@ The trailing slash is mandatory. /var/lib/clickhouse/ ``` -## prometheus {#server_configuration_parameters-prometheus} +## Prometheus {#server_configuration_parameters-prometheus} Exposing metrics data for scraping from [Prometheus](https://prometheus.io). @@ -1355,6 +1355,11 @@ Settings: ``` +Check (replace `127.0.0.1` with the IP addr or hostname of your ClickHouse server): +```bash +curl 127.0.0.1:9363/metrics +``` + ## query_log {#server_configuration_parameters-query-log} Setting for logging queries received with the [log_queries=1](../../operations/settings/settings.md) setting. From dd7282ea5c4417c70a2d14a93c90271823142b2c Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Mon, 1 May 2023 16:05:14 +0300 Subject: [PATCH 51/67] Update ci_config.py --- tests/ci/ci_config.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/ci/ci_config.py b/tests/ci/ci_config.py index 9dca9f2d8bd..5f9617e7100 100644 --- a/tests/ci/ci_config.py +++ b/tests/ci/ci_config.py @@ -470,7 +470,7 @@ CHECK_DESCRIPTIONS = [ ), CheckDescription( "Flaky tests", - "Runs a flaky tests from master multiple times to identify if they are stable.", + "Checks if new added or modified tests are flaky by running them repeatedly, in parallel, with more randomization. Functional tests are run 100 times with address sanitizer, and additional randomization of thread scheduling. Integrational tests are run up to 10 times. If at least once a new test has failed, or was too long, this check will be red. We don't allow flaky tests, read https://clickhouse.com/blog/decorating-a-christmas-tree-with-the-help-of-flaky-tests/", lambda x: "tests flaky check" in x, ), CheckDescription( From 266c9d403a8af9578dadcaa07c46a4a788868fc0 Mon Sep 17 00:00:00 2001 From: Nikita Taranov Date: Mon, 1 May 2023 12:43:43 +0000 Subject: [PATCH 52/67] review fixes --- src/Core/Settings.h | 2 ++ src/IO/ReadSettings.h | 2 ++ src/Interpreters/Context.cpp | 6 ++++-- 3 files changed, 8 insertions(+), 2 deletions(-) diff --git a/src/Core/Settings.h b/src/Core/Settings.h index aa5e69ce571..ed5ad0278a2 100644 --- a/src/Core/Settings.h +++ b/src/Core/Settings.h @@ -49,6 +49,8 @@ class IColumn; M(MaxThreads, max_download_threads, 4, "The maximum number of threads to download data (e.g. for URL engine).", 0) \ M(UInt64, max_download_buffer_size, 10*1024*1024, "The maximal size of buffer for parallel downloading (e.g. for URL engine) per each thread.", 0) \ M(UInt64, max_read_buffer_size, DBMS_DEFAULT_BUFFER_SIZE, "The maximum size of the buffer to read from the filesystem.", 0) \ + M(UInt64, max_read_buffer_size_local_fs, 128*1024, "The maximum size of the buffer to read from local filesystem. If set to 0 then max_read_buffer_size will be used.", 0) \ + M(UInt64, max_read_buffer_size_remote_fs, 0, "The maximum size of the buffer to read from remote filesystem. If set to 0 then max_read_buffer_size will be used.", 0) \ M(UInt64, max_distributed_connections, 1024, "The maximum number of connections for distributed processing of one query (should be greater than max_threads).", 0) \ M(UInt64, max_query_size, DBMS_DEFAULT_MAX_QUERY_SIZE, "The maximum number of bytes of a query string parsed by the SQL parser. Data in the VALUES clause of INSERT queries is processed by a separate stream parser (that consumes O(1) RAM) and not affected by this restriction.", 0) \ M(UInt64, interactive_delay, 100000, "The interval in microseconds to check if the request is cancelled, and to send progress info.", 0) \ diff --git a/src/IO/ReadSettings.h b/src/IO/ReadSettings.h index 50ef5ec2988..59ab402c748 100644 --- a/src/IO/ReadSettings.h +++ b/src/IO/ReadSettings.h @@ -68,7 +68,9 @@ struct ReadSettings /// Method to use reading from remote filesystem. RemoteFSReadMethod remote_fs_method = RemoteFSReadMethod::threadpool; + /// https://eklitzke.org/efficient-file-copying-on-linux size_t local_fs_buffer_size = 128 * 1024; + size_t remote_fs_buffer_size = DBMS_DEFAULT_BUFFER_SIZE; size_t prefetch_buffer_size = DBMS_DEFAULT_BUFFER_SIZE; diff --git a/src/Interpreters/Context.cpp b/src/Interpreters/Context.cpp index c78d2e8eb7b..944ff6dfa24 100644 --- a/src/Interpreters/Context.cpp +++ b/src/Interpreters/Context.cpp @@ -4288,8 +4288,10 @@ ReadSettings Context::getReadSettings() const "Invalid value '{}' for max_read_buffer_size", settings.max_read_buffer_size); } - /* res.local_fs_buffer_size = settings.max_read_buffer_size; */ - res.remote_fs_buffer_size = settings.max_read_buffer_size; + res.local_fs_buffer_size + = settings.max_read_buffer_size_local_fs ? settings.max_read_buffer_size_local_fs : settings.max_read_buffer_size; + res.remote_fs_buffer_size + = settings.max_read_buffer_size_remote_fs ? settings.max_read_buffer_size_remote_fs : settings.max_read_buffer_size; res.prefetch_buffer_size = settings.prefetch_buffer_size; res.direct_io_threshold = settings.min_bytes_to_use_direct_io; res.mmap_threshold = settings.min_bytes_to_use_mmap_io; From 76a591fa5e1f2e4974af78fb40f5ba04308f496a Mon Sep 17 00:00:00 2001 From: Nikolay Degterinsky Date: Mon, 1 May 2023 15:16:12 +0000 Subject: [PATCH 53/67] Allow restricted keywords if alias is quoted --- src/Parsers/ExpressionElementParsers.cpp | 4 +++- .../02725_alias_with_restricted_keywords.reference | 1 + .../0_stateless/02725_alias_with_restricted_keywords.sql | 1 + 3 files changed, 5 insertions(+), 1 deletion(-) create mode 100644 tests/queries/0_stateless/02725_alias_with_restricted_keywords.reference create mode 100644 tests/queries/0_stateless/02725_alias_with_restricted_keywords.sql diff --git a/src/Parsers/ExpressionElementParsers.cpp b/src/Parsers/ExpressionElementParsers.cpp index a6354cd0e81..28cef51e571 100644 --- a/src/Parsers/ExpressionElementParsers.cpp +++ b/src/Parsers/ExpressionElementParsers.cpp @@ -1429,10 +1429,12 @@ bool ParserAlias::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) if (!allow_alias_without_as_keyword && !has_as_word) return false; + bool is_quoted = pos->type == TokenType::QuotedIdentifier; + if (!id_p.parse(pos, node, expected)) return false; - if (!has_as_word) + if (!has_as_word && !is_quoted) { /** In this case, the alias can not match the keyword - * so that in the query "SELECT x FROM t", the word FROM was not considered an alias, diff --git a/tests/queries/0_stateless/02725_alias_with_restricted_keywords.reference b/tests/queries/0_stateless/02725_alias_with_restricted_keywords.reference new file mode 100644 index 00000000000..9874d6464ab --- /dev/null +++ b/tests/queries/0_stateless/02725_alias_with_restricted_keywords.reference @@ -0,0 +1 @@ +1 2 diff --git a/tests/queries/0_stateless/02725_alias_with_restricted_keywords.sql b/tests/queries/0_stateless/02725_alias_with_restricted_keywords.sql new file mode 100644 index 00000000000..6df0e856061 --- /dev/null +++ b/tests/queries/0_stateless/02725_alias_with_restricted_keywords.sql @@ -0,0 +1 @@ +SELECT 1 `array`, 2 "union"; From 631e81c53f02433f30eb4bc745a2551c4eb560c8 Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Mon, 1 May 2023 16:42:55 +0000 Subject: [PATCH 54/67] Respect projections in 01600_parts --- src/Core/Settings.h | 2 +- .../01600_parts_states_metrics_long.sh | 15 +++------------ .../0_stateless/01600_parts_types_metrics_long.sh | 15 +++------------ 3 files changed, 7 insertions(+), 25 deletions(-) diff --git a/src/Core/Settings.h b/src/Core/Settings.h index 21c3c1c4dbf..26409e98763 100644 --- a/src/Core/Settings.h +++ b/src/Core/Settings.h @@ -316,7 +316,7 @@ class IColumn; M(Float, opentelemetry_start_trace_probability, 0., "Probability to start an OpenTelemetry trace for an incoming query.", 0) \ M(Bool, opentelemetry_trace_processors, false, "Collect OpenTelemetry spans for processors.", 0) \ M(Bool, prefer_column_name_to_alias, false, "Prefer using column names instead of aliases if possible.", 0) \ - M(Bool, allow_experimental_analyzer, true, "Allow experimental analyzer", 0) \ + M(Bool, allow_experimental_analyzer, false, "Allow experimental analyzer", 0) \ M(Bool, prefer_global_in_and_join, false, "If enabled, all IN/JOIN operators will be rewritten as GLOBAL IN/JOIN. It's useful when the to-be-joined tables are only available on the initiator and we need to always scatter their data on-the-fly during distributed processing with the GLOBAL keyword. It's also useful to reduce the need to access the external sources joining external tables.", 0) \ \ \ diff --git a/tests/queries/0_stateless/01600_parts_states_metrics_long.sh b/tests/queries/0_stateless/01600_parts_states_metrics_long.sh index 815d24d293b..89ce84f6dbc 100755 --- a/tests/queries/0_stateless/01600_parts_states_metrics_long.sh +++ b/tests/queries/0_stateless/01600_parts_states_metrics_long.sh @@ -7,27 +7,18 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # NOTE: database = $CLICKHOUSE_DATABASE is unwanted verify_sql="SELECT (SELECT sumIf(value, metric = 'PartsActive'), sumIf(value, metric = 'PartsOutdated') FROM system.metrics) - = (SELECT sum(active), sum(NOT active) FROM system.parts)" + = (SELECT sum(active), sum(NOT active) FROM + (SELECT active FROM system.parts UNION ALL SELECT active FROM system.projection_parts))" # The query is not atomic - it can compare states between system.parts and system.metrics from different points in time. # So, there is inherent race condition. But it should get expected result eventually. # In case of test failure, this code will do infinite loop and timeout. verify() { - for i in $(seq 1 3001) + while true do result=$( $CLICKHOUSE_CLIENT -m --query="$verify_sql" ) [ "$result" = "1" ] && break - - if [ "$i" = "3000" ]; then - echo "=======" - $CLICKHOUSE_CLIENT --query="SELECT * FROM system.parts format TSVWithNames" - echo "=======" - $CLICKHOUSE_CLIENT --query="SELECT * FROM system.metrics format TSVWithNames" - echo "=======" - return - fi - sleep 0.1 done echo 1 diff --git a/tests/queries/0_stateless/01600_parts_types_metrics_long.sh b/tests/queries/0_stateless/01600_parts_types_metrics_long.sh index 65ded439412..0b9afcf633e 100755 --- a/tests/queries/0_stateless/01600_parts_types_metrics_long.sh +++ b/tests/queries/0_stateless/01600_parts_types_metrics_long.sh @@ -11,7 +11,8 @@ set -o pipefail # NOTE: database = $CLICKHOUSE_DATABASE is unwanted verify_sql="SELECT (SELECT sumIf(value, metric = 'PartsInMemory'), sumIf(value, metric = 'PartsCompact'), sumIf(value, metric = 'PartsWide') FROM system.metrics) = - (SELECT countIf(part_type == 'InMemory'), countIf(part_type == 'Compact'), countIf(part_type == 'Wide') FROM system.parts)" + (SELECT countIf(part_type == 'InMemory'), countIf(part_type == 'Compact'), countIf(part_type == 'Wide') + FROM (SELECT part_type FROM system.parts UNION ALL SELECT part_type FROM system.projection_parts))" # The query is not atomic - it can compare states between system.parts and system.metrics from different points in time. # So, there is inherent race condition (especially in fasttest that runs tests in parallel). @@ -20,22 +21,12 @@ verify_sql="SELECT # In case of test failure, this code will do infinite loop and timeout. verify() { - for i in $(seq 1 3001); do + while true; do result=$( $CLICKHOUSE_CLIENT -m --query="$verify_sql" ) if [ "$result" = "1" ]; then echo 1 return fi - - if [ "$i" = "3000" ]; then - echo "=======" - $CLICKHOUSE_CLIENT --query="SELECT * FROM system.parts format TSVWithNames" - echo "=======" - $CLICKHOUSE_CLIENT --query="SELECT * FROM system.metrics format TSVWithNames" - echo "=======" - return - fi - sleep 0.1 done } From 30dd645b4f2bcffd1fb7d8a40a5040fa71d4a926 Mon Sep 17 00:00:00 2001 From: DanRoscigno Date: Mon, 1 May 2023 13:02:32 -0400 Subject: [PATCH 55/67] add docs for nested dynamic disks --- .../mergetree-family/mergetree.md | 50 ++++++++++++++++++- 1 file changed, 49 insertions(+), 1 deletion(-) diff --git a/docs/en/engines/table-engines/mergetree-family/mergetree.md b/docs/en/engines/table-engines/mergetree-family/mergetree.md index d5189d4b9d9..a1e6fb90af2 100644 --- a/docs/en/engines/table-engines/mergetree-family/mergetree.md +++ b/docs/en/engines/table-engines/mergetree-family/mergetree.md @@ -731,7 +731,13 @@ The names given to the described entities can be found in the system tables, [sy ### Configuration {#table_engine-mergetree-multiple-volumes_configure} -Disks, volumes and storage policies should be declared inside the `` tag either in the main file `config.xml` or in a distinct file in the `config.d` directory. +Disks, volumes and storage policies should be declared inside the `` tag either in a file in the `config.d` directory. + +:::tip +Disks can also be declared in the `SETTINGS` section of a query. This is useful +for adhoc analysis to temporarily attach a disk that is, for example, hosted at a URL. +See [nested dynamic storage](#nested-dynamic-storage) for more details. +::: Configuration structure: @@ -876,6 +882,48 @@ You could change storage policy after table creation with [ALTER TABLE ... MODIF The number of threads performing background moves of data parts can be changed by [background_move_pool_size](/docs/en/operations/server-configuration-parameters/settings.md/#background_move_pool_size) setting. +### Nested Dynamic Storage + +This example query shows how to use a local disk for to cache data from a table stored +at a URL. Neither the cache disk nor the web storage is configured in the ClickHouse +configuration files; both are configured in the ATTACH query settings. + +In the settings highlighted below notice that the disk of `type=web` is nested within +the disk of `type=cache`. + +```sql +ATTACH TABLE uk_price_paid UUID 'cf712b4f-2ca8-435c-ac23-c4393efe52f7' +( + price UInt32, + date Date, + postcode1 LowCardinality(String), + postcode2 LowCardinality(String), + type Enum8('other' = 0, 'terraced' = 1, 'semi-detached' = 2, 'detached' = 3, 'flat' = 4), + is_new UInt8, + duration Enum8('unknown' = 0, 'freehold' = 1, 'leasehold' = 2), + addr1 String, + addr2 String, + street LowCardinality(String), + locality LowCardinality(String), + town LowCardinality(String), + district LowCardinality(String), + county LowCardinality(String) +) +ENGINE = MergeTree +ORDER BY (postcode1, postcode2, addr1, addr2) + # highlight-start + SETTINGS disk = disk( + type=cache, + max_size='1Gi', + path='/var/lib/clickhouse/custom_disk_cache/', + disk=disk( + type=web, + endpoint='https://raw.githubusercontent.com/ClickHouse/web-tables-demo/main/web/' + ) + ); + # highlight-end +``` + ### Details {#details} In the case of `MergeTree` tables, data is getting to disk in different ways: From fa4ab93f21f3566633de8f94cd35b6c9284bd732 Mon Sep 17 00:00:00 2001 From: DanRoscigno Date: Mon, 1 May 2023 14:06:50 -0400 Subject: [PATCH 56/67] add dynamic and nested dynamic --- .../mergetree-family/mergetree.md | 43 +++++++++++++++++-- 1 file changed, 39 insertions(+), 4 deletions(-) diff --git a/docs/en/engines/table-engines/mergetree-family/mergetree.md b/docs/en/engines/table-engines/mergetree-family/mergetree.md index a1e6fb90af2..12da62a147c 100644 --- a/docs/en/engines/table-engines/mergetree-family/mergetree.md +++ b/docs/en/engines/table-engines/mergetree-family/mergetree.md @@ -736,7 +736,7 @@ Disks, volumes and storage policies should be declared inside the ` Date: Mon, 1 May 2023 20:32:01 +0200 Subject: [PATCH 57/67] Fix bug in removal of existing part directory --- src/Storages/MergeTree/DataPartStorageOnDiskBase.cpp | 4 +++- src/Storages/StorageReplicatedMergeTree.cpp | 5 +++++ 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/src/Storages/MergeTree/DataPartStorageOnDiskBase.cpp b/src/Storages/MergeTree/DataPartStorageOnDiskBase.cpp index 09456088d74..d46f080c19f 100644 --- a/src/Storages/MergeTree/DataPartStorageOnDiskBase.cpp +++ b/src/Storages/MergeTree/DataPartStorageOnDiskBase.cpp @@ -461,6 +461,7 @@ void DataPartStorageOnDiskBase::rename( if (volume->getDisk()->exists(to)) { + /// FIXME it should be logical error if (remove_new_dir_if_exists) { Names files; @@ -471,7 +472,8 @@ void DataPartStorageOnDiskBase::rename( "Part directory {} already exists and contains {} files. Removing it.", fullPath(volume->getDisk(), to), files.size()); - executeWriteOperation([&](auto & disk) { disk.removeRecursive(to); }); + /// Do not remove blobs if they exist + executeWriteOperation([&](auto & disk) { disk.removeSharedRecursive(to, true, {}); }); } else { diff --git a/src/Storages/StorageReplicatedMergeTree.cpp b/src/Storages/StorageReplicatedMergeTree.cpp index 1e8c2c583a4..f0d0dec3cc3 100644 --- a/src/Storages/StorageReplicatedMergeTree.cpp +++ b/src/Storages/StorageReplicatedMergeTree.cpp @@ -1330,6 +1330,11 @@ void StorageReplicatedMergeTree::checkParts(bool skip_sanity_checks) uncovered_unexpected_parts.size(), uncovered_unexpected_parts_rows, unexpected_parts_nonnew, unexpected_parts_nonnew_rows, parts_to_fetch.size(), parts_to_fetch_blocks, covered_unexpected_parts.size(), unexpected_parts_rows - uncovered_unexpected_parts_rows); } + else + { + if (!parts_to_fetch.empty()) + LOG_DEBUG(log, "Found parts to fetch (exist in zookeeper, but not locally): [{}]", fmt::join(parts_to_fetch, ", ")); + } /// Add to the queue jobs to pick up the missing parts from other replicas and remove from ZK the information that we have them. queue.setBrokenPartsToEnqueueFetchesOnLoading(std::move(parts_to_fetch)); From 2b141a195abc581982974253d61ecbbd65a0e71e Mon Sep 17 00:00:00 2001 From: Dan Roscigno Date: Mon, 1 May 2023 14:38:05 -0400 Subject: [PATCH 58/67] Apply suggestions from code review Co-authored-by: Kseniia Sumarokova <54203879+kssenii@users.noreply.github.com> --- docs/en/engines/table-engines/mergetree-family/mergetree.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/en/engines/table-engines/mergetree-family/mergetree.md b/docs/en/engines/table-engines/mergetree-family/mergetree.md index 12da62a147c..96abfe4b80a 100644 --- a/docs/en/engines/table-engines/mergetree-family/mergetree.md +++ b/docs/en/engines/table-engines/mergetree-family/mergetree.md @@ -886,7 +886,7 @@ The number of threads performing background moves of data parts can be changed b This example query shows how to attach a table stored at a URL and configure the remote storage within the query. The web storage is not configured in the ClickHouse -configuration files; all the settings are in the ATTACH query. +configuration files; all the settings are in the CREATE/ATTACH query. ```sql ATTACH TABLE uk_price_paid UUID 'cf712b4f-2ca8-435c-ac23-c4393efe52f7' @@ -921,7 +921,7 @@ ORDER BY (postcode1, postcode2, addr1, addr2) This example query builds on the above dynamic disk configuration and shows how to use a local disk to cache data from a table stored at a URL. Neither the cache disk nor the web storage is configured in the ClickHouse configuration files; both are -configured in the ATTACH query settings. +configured in the CREATE/ATTACH query settings. In the settings highlighted below notice that the disk of `type=web` is nested within the disk of `type=cache`. From 02b7c2fe9095d63a966671b9e5bedb38e792f02f Mon Sep 17 00:00:00 2001 From: Sema Checherinda Date: Tue, 25 Apr 2023 22:22:47 +0200 Subject: [PATCH 59/67] clearing s3 between tests in a robust way --- tests/integration/test_merge_tree_s3/test.py | 120 ++++++------------- 1 file changed, 39 insertions(+), 81 deletions(-) diff --git a/tests/integration/test_merge_tree_s3/test.py b/tests/integration/test_merge_tree_s3/test.py index 76430a42e27..5e0445636a1 100644 --- a/tests/integration/test_merge_tree_s3/test.py +++ b/tests/integration/test_merge_tree_s3/test.py @@ -101,44 +101,34 @@ def run_s3_mocks(cluster): ) -def list_objects(cluster, path="data/"): +def list_objects(cluster, path="data/", hint="list_objects"): minio = cluster.minio_client objects = list(minio.list_objects(cluster.minio_bucket, path, recursive=True)) - logging.info(f"list_objects ({len(objects)}): {[x.object_name for x in objects]}") + logging.info(f"{hint} ({len(objects)}): {[x.object_name for x in objects]}") return objects def wait_for_delete_s3_objects(cluster, expected, timeout=30): - minio = cluster.minio_client while timeout > 0: - if ( - len(list(minio.list_objects(cluster.minio_bucket, "data/", recursive=True))) - == expected - ): + if len(list_objects(cluster, "data/")) == expected: return timeout -= 1 time.sleep(1) - assert ( - len(list(minio.list_objects(cluster.minio_bucket, "data/", recursive=True))) - == expected - ) + assert len(list_objects(cluster, "data/")) == expected -@pytest.fixture(autouse=True) -@pytest.mark.parametrize("node_name", ["node"]) -def drop_table(cluster, node_name): +@pytest.fixture(autouse=True, scope="function") +def clear_minio(cluster): + # CH do some writes to the S3 at start. For example, file data/clickhouse_access_check_{server_uuid}. + # Set the timeout there as 10 sec in order to resolve the race with that file exists. + wait_for_delete_s3_objects(cluster, 0, timeout=10) + yield - node = cluster.instances[node_name] + + # Remove extra objects to prevent tests cascade failing minio = cluster.minio_client - - node.query("DROP TABLE IF EXISTS s3_test NO DELAY") - - try: - wait_for_delete_s3_objects(cluster, 0) - finally: - # Remove extra objects to prevent tests cascade failing - for obj in list_objects(cluster, "data/"): - minio.remove_object(cluster.minio_bucket, obj.object_name) + for obj in list_objects(cluster, "data/"): + minio.remove_object(cluster.minio_bucket, obj.object_name) @pytest.mark.parametrize( @@ -158,10 +148,7 @@ def test_simple_insert_select( values1 = generate_values("2020-01-03", 4096) node.query("INSERT INTO s3_test VALUES {}".format(values1)) assert node.query("SELECT * FROM s3_test order by dt, id FORMAT Values") == values1 - assert ( - len(list(minio.list_objects(cluster.minio_bucket, "data/", recursive=True))) - == FILES_OVERHEAD + files_per_part - ) + assert len(list_objects(cluster, "data/")) == FILES_OVERHEAD + files_per_part values2 = generate_values("2020-01-04", 4096) node.query("INSERT INTO s3_test VALUES {}".format(values2)) @@ -169,10 +156,7 @@ def test_simple_insert_select( node.query("SELECT * FROM s3_test ORDER BY dt, id FORMAT Values") == values1 + "," + values2 ) - assert ( - len(list(minio.list_objects(cluster.minio_bucket, "data/", recursive=True))) - == FILES_OVERHEAD + files_per_part * 2 - ) + assert len(list_objects(cluster, "data/")) == FILES_OVERHEAD + files_per_part * 2 assert ( node.query("SELECT count(*) FROM s3_test where id = 1 FORMAT Values") == "(2)" @@ -214,7 +198,7 @@ def test_insert_same_partition_and_merge(cluster, merge_vertical, node_name): node.query("SELECT count(distinct(id)) FROM s3_test FORMAT Values") == "(8192)" ) assert ( - len(list(minio.list_objects(cluster.minio_bucket, "data/", recursive=True))) + len(list_objects(cluster, "data/")) == FILES_OVERHEAD_PER_PART_WIDE * 6 + FILES_OVERHEAD ) @@ -292,7 +276,6 @@ def test_alter_table_columns(cluster, node_name): def test_attach_detach_partition(cluster, node_name): node = cluster.instances[node_name] create_table(node, "s3_test") - minio = cluster.minio_client node.query( "INSERT INTO s3_test VALUES {}".format(generate_values("2020-01-03", 4096)) @@ -360,7 +343,6 @@ def test_attach_detach_partition(cluster, node_name): def test_move_partition_to_another_disk(cluster, node_name): node = cluster.instances[node_name] create_table(node, "s3_test") - minio = cluster.minio_client node.query( "INSERT INTO s3_test VALUES {}".format(generate_values("2020-01-03", 4096)) @@ -370,21 +352,21 @@ def test_move_partition_to_another_disk(cluster, node_name): ) assert node.query("SELECT count(*) FROM s3_test FORMAT Values") == "(8192)" assert ( - len(list(minio.list_objects(cluster.minio_bucket, "data/", recursive=True))) + len(list_objects(cluster, "data/")) == FILES_OVERHEAD + FILES_OVERHEAD_PER_PART_WIDE * 2 ) node.query("ALTER TABLE s3_test MOVE PARTITION '2020-01-04' TO DISK 'hdd'") assert node.query("SELECT count(*) FROM s3_test FORMAT Values") == "(8192)" assert ( - len(list(minio.list_objects(cluster.minio_bucket, "data/", recursive=True))) + len(list_objects(cluster, "data/")) == FILES_OVERHEAD + FILES_OVERHEAD_PER_PART_WIDE ) node.query("ALTER TABLE s3_test MOVE PARTITION '2020-01-04' TO DISK 's3'") assert node.query("SELECT count(*) FROM s3_test FORMAT Values") == "(8192)" assert ( - len(list(minio.list_objects(cluster.minio_bucket, "data/", recursive=True))) + len(list_objects(cluster, "data/")) == FILES_OVERHEAD + FILES_OVERHEAD_PER_PART_WIDE * 2 ) @@ -393,7 +375,6 @@ def test_move_partition_to_another_disk(cluster, node_name): def test_table_manipulations(cluster, node_name): node = cluster.instances[node_name] create_table(node, "s3_test") - minio = cluster.minio_client node.query( "INSERT INTO s3_test VALUES {}".format(generate_values("2020-01-03", 4096)) @@ -405,9 +386,10 @@ def test_table_manipulations(cluster, node_name): node.query("RENAME TABLE s3_test TO s3_renamed") assert node.query("SELECT count(*) FROM s3_renamed FORMAT Values") == "(8192)" assert ( - len(list(minio.list_objects(cluster.minio_bucket, "data/", recursive=True))) + len(list_objects(cluster, "data/")) == FILES_OVERHEAD + FILES_OVERHEAD_PER_PART_WIDE * 2 ) + node.query("RENAME TABLE s3_renamed TO s3_test") assert node.query("CHECK TABLE s3_test FORMAT Values") == "(1)" @@ -416,7 +398,7 @@ def test_table_manipulations(cluster, node_name): node.query("ATTACH TABLE s3_test") assert node.query("SELECT count(*) FROM s3_test FORMAT Values") == "(8192)" assert ( - len(list(minio.list_objects(cluster.minio_bucket, "data/", recursive=True))) + len(list_objects(cluster, "data/")) == FILES_OVERHEAD + FILES_OVERHEAD_PER_PART_WIDE * 2 ) @@ -424,17 +406,13 @@ def test_table_manipulations(cluster, node_name): wait_for_delete_empty_parts(node, "s3_test") wait_for_delete_inactive_parts(node, "s3_test") assert node.query("SELECT count(*) FROM s3_test FORMAT Values") == "(0)" - assert ( - len(list(minio.list_objects(cluster.minio_bucket, "data/", recursive=True))) - == FILES_OVERHEAD - ) + assert len(list_objects(cluster, "data/")) == FILES_OVERHEAD @pytest.mark.parametrize("node_name", ["node"]) def test_move_replace_partition_to_another_table(cluster, node_name): node = cluster.instances[node_name] create_table(node, "s3_test") - minio = cluster.minio_client node.query( "INSERT INTO s3_test VALUES {}".format(generate_values("2020-01-03", 4096)) @@ -451,12 +429,10 @@ def test_move_replace_partition_to_another_table(cluster, node_name): assert node.query("SELECT sum(id) FROM s3_test FORMAT Values") == "(0)" assert node.query("SELECT count(*) FROM s3_test FORMAT Values") == "(16384)" - s3_objects = list(minio.list_objects(cluster.minio_bucket, "data/", recursive=True)) - for obj in s3_objects: - print("Object at start", obj.object_name) - - assert len(s3_objects) == FILES_OVERHEAD + FILES_OVERHEAD_PER_PART_WIDE * 4 - + assert ( + len(list_objects(cluster, "data/", "Objects at start")) + == FILES_OVERHEAD + FILES_OVERHEAD_PER_PART_WIDE * 4 + ) create_table(node, "s3_clone") node.query("ALTER TABLE s3_test MOVE PARTITION '2020-01-03' TO TABLE s3_clone") @@ -465,10 +441,8 @@ def test_move_replace_partition_to_another_table(cluster, node_name): assert node.query("SELECT count(*) FROM s3_test FORMAT Values") == "(8192)" assert node.query("SELECT sum(id) FROM s3_clone FORMAT Values") == "(0)" assert node.query("SELECT count(*) FROM s3_clone FORMAT Values") == "(8192)" - s3_objects = list(minio.list_objects(cluster.minio_bucket, "data/", recursive=True)) - for obj in s3_objects: - print("Object after move partition", obj.object_name) + list_objects(cluster, "data/", "Object after move partition") # Number of objects in S3 should be unchanged. wait_for_delete_s3_objects( cluster, @@ -486,10 +460,8 @@ def test_move_replace_partition_to_another_table(cluster, node_name): ) assert node.query("SELECT sum(id) FROM s3_test FORMAT Values") == "(0)" assert node.query("SELECT count(*) FROM s3_test FORMAT Values") == "(16384)" - s3_objects = list(minio.list_objects(cluster.minio_bucket, "data/", recursive=True)) - for obj in s3_objects: - print("Object after insert", obj.object_name) + list_objects(cluster, "data/", "Object after insert") wait_for_delete_s3_objects( cluster, FILES_OVERHEAD * 2 @@ -515,12 +487,8 @@ def test_move_replace_partition_to_another_table(cluster, node_name): node.query("DROP TABLE s3_clone NO DELAY") assert node.query("SELECT sum(id) FROM s3_test FORMAT Values") == "(0)" assert node.query("SELECT count(*) FROM s3_test FORMAT Values") == "(16384)" - s3_objects = list(minio.list_objects(cluster.minio_bucket, "data/", recursive=True)) - for obj in s3_objects: - print("Object after drop", obj.object_name) - - # Data should remain in S3 + list_objects(cluster, "data/", "Object after drop") wait_for_delete_s3_objects( cluster, FILES_OVERHEAD @@ -530,10 +498,7 @@ def test_move_replace_partition_to_another_table(cluster, node_name): node.query("ALTER TABLE s3_test FREEZE") # Number S3 objects should be unchanged. - s3_objects = list(minio.list_objects(cluster.minio_bucket, "data/", recursive=True)) - for obj in s3_objects: - print("Object after freeze", obj.object_name) - + list_objects(cluster, "data/", "Object after freeze") wait_for_delete_s3_objects( cluster, FILES_OVERHEAD @@ -548,7 +513,8 @@ def test_move_replace_partition_to_another_table(cluster, node_name): cluster, FILES_OVERHEAD_PER_PART_WIDE * 4 - FILES_OVERHEAD_METADATA_VERSION * 4 ) - for obj in list(minio.list_objects(cluster.minio_bucket, "data/", recursive=True)): + minio = cluster.minio_client + for obj in list_objects(cluster, "data/"): minio.remove_object(cluster.minio_bucket, obj.object_name) @@ -556,7 +522,6 @@ def test_move_replace_partition_to_another_table(cluster, node_name): def test_freeze_unfreeze(cluster, node_name): node = cluster.instances[node_name] create_table(node, "s3_test") - minio = cluster.minio_client node.query( "INSERT INTO s3_test VALUES {}".format(generate_values("2020-01-03", 4096)) @@ -571,7 +536,7 @@ def test_freeze_unfreeze(cluster, node_name): wait_for_delete_empty_parts(node, "s3_test") wait_for_delete_inactive_parts(node, "s3_test") assert ( - len(list(minio.list_objects(cluster.minio_bucket, "data/", recursive=True))) + len(list_objects(cluster, "data/")) == FILES_OVERHEAD + (FILES_OVERHEAD_PER_PART_WIDE - FILES_OVERHEAD_METADATA_VERSION) * 2 ) @@ -586,10 +551,7 @@ def test_freeze_unfreeze(cluster, node_name): wait_for_delete_s3_objects(cluster, FILES_OVERHEAD) # Data should be removed from S3. - assert ( - len(list(minio.list_objects(cluster.minio_bucket, "data/", recursive=True))) - == FILES_OVERHEAD - ) + assert len(list_objects(cluster, "data/")) == FILES_OVERHEAD @pytest.mark.parametrize("node_name", ["node"]) @@ -597,7 +559,6 @@ def test_freeze_system_unfreeze(cluster, node_name): node = cluster.instances[node_name] create_table(node, "s3_test") create_table(node, "s3_test_removed") - minio = cluster.minio_client node.query( "INSERT INTO s3_test VALUES {}".format(generate_values("2020-01-04", 4096)) @@ -613,7 +574,7 @@ def test_freeze_system_unfreeze(cluster, node_name): wait_for_delete_inactive_parts(node, "s3_test") node.query("DROP TABLE s3_test_removed NO DELAY") assert ( - len(list(minio.list_objects(cluster.minio_bucket, "data/", recursive=True))) + len(list_objects(cluster, "data/")) == FILES_OVERHEAD + (FILES_OVERHEAD_PER_PART_WIDE - FILES_OVERHEAD_METADATA_VERSION) * 2 ) @@ -624,10 +585,7 @@ def test_freeze_system_unfreeze(cluster, node_name): wait_for_delete_s3_objects(cluster, FILES_OVERHEAD) # Data should be removed from S3. - assert ( - len(list(minio.list_objects(cluster.minio_bucket, "data/", recursive=True))) - == FILES_OVERHEAD - ) + assert len(list_objects(cluster, "data/")) == FILES_OVERHEAD @pytest.mark.parametrize("node_name", ["node"]) @@ -710,7 +668,7 @@ def test_lazy_seek_optimization_for_async_read(cluster, node_name): node.query("SELECT * FROM s3_test WHERE value LIKE '%abc%' ORDER BY value LIMIT 10") node.query("DROP TABLE IF EXISTS s3_test NO DELAY") minio = cluster.minio_client - for obj in list(minio.list_objects(cluster.minio_bucket, "data/", recursive=True)): + for obj in list_objects(cluster, "data/"): minio.remove_object(cluster.minio_bucket, obj.object_name) From e3647571f237d29c9f47c2db08a74068cb109d17 Mon Sep 17 00:00:00 2001 From: Sema Checherinda Date: Wed, 26 Apr 2023 14:24:35 +0200 Subject: [PATCH 60/67] explicit drop table for tests --- tests/integration/test_merge_tree_s3/test.py | 69 +++++++++++++------- 1 file changed, 47 insertions(+), 22 deletions(-) diff --git a/tests/integration/test_merge_tree_s3/test.py b/tests/integration/test_merge_tree_s3/test.py index 5e0445636a1..cb1848a88fb 100644 --- a/tests/integration/test_merge_tree_s3/test.py +++ b/tests/integration/test_merge_tree_s3/test.py @@ -117,20 +117,31 @@ def wait_for_delete_s3_objects(cluster, expected, timeout=30): assert len(list_objects(cluster, "data/")) == expected -@pytest.fixture(autouse=True, scope="function") -def clear_minio(cluster): - # CH do some writes to the S3 at start. For example, file data/clickhouse_access_check_{server_uuid}. - # Set the timeout there as 10 sec in order to resolve the race with that file exists. - wait_for_delete_s3_objects(cluster, 0, timeout=10) - - yield - - # Remove extra objects to prevent tests cascade failing +def remove_all_s3_objects(cluster): minio = cluster.minio_client for obj in list_objects(cluster, "data/"): minio.remove_object(cluster.minio_bucket, obj.object_name) +@pytest.fixture(autouse=True, scope="function") +def clear_minio(cluster): + try: + # CH do some writes to the S3 at start. For example, file data/clickhouse_access_check_{server_uuid}. + # Set the timeout there as 10 sec in order to resolve the race with that file exists. + wait_for_delete_s3_objects(cluster, 0, timeout=10) + except: + # Remove extra objects to prevent tests cascade failing + remove_all_s3_objects(cluster) + + yield + + +def check_no_objects_after_drop(cluster, table_name="s3_test", node_name="node"): + node = cluster.instances[node_name] + node.query(f"DROP TABLE IF EXISTS {table_name} NO DELAY") + wait_for_delete_s3_objects(cluster, 0, timeout=0) + + @pytest.mark.parametrize( "min_rows_for_wide_part,files_per_part,node_name", [ @@ -162,6 +173,8 @@ def test_simple_insert_select( node.query("SELECT count(*) FROM s3_test where id = 1 FORMAT Values") == "(2)" ) + check_no_objects_after_drop(cluster) + @pytest.mark.parametrize("merge_vertical,node_name", [(True, "node"), (False, "node")]) def test_insert_same_partition_and_merge(cluster, merge_vertical, node_name): @@ -172,7 +185,6 @@ def test_insert_same_partition_and_merge(cluster, merge_vertical, node_name): node = cluster.instances[node_name] create_table(node, "s3_test", **settings) - minio = cluster.minio_client node.query("SYSTEM STOP MERGES s3_test") node.query( @@ -226,6 +238,8 @@ def test_insert_same_partition_and_merge(cluster, merge_vertical, node_name): cluster, FILES_OVERHEAD_PER_PART_WIDE + FILES_OVERHEAD, timeout=45 ) + check_no_objects_after_drop(cluster) + @pytest.mark.parametrize("node_name", ["node"]) def test_alter_table_columns(cluster, node_name): @@ -271,6 +285,8 @@ def test_alter_table_columns(cluster, node_name): cluster, FILES_OVERHEAD + FILES_OVERHEAD_PER_PART_WIDE + 2 ) + check_no_objects_after_drop(cluster) + @pytest.mark.parametrize("node_name", ["node"]) def test_attach_detach_partition(cluster, node_name): @@ -338,6 +354,8 @@ def test_attach_detach_partition(cluster, node_name): == FILES_OVERHEAD + FILES_OVERHEAD_PER_PART_WIDE * 0 ) + check_no_objects_after_drop(cluster) + @pytest.mark.parametrize("node_name", ["node"]) def test_move_partition_to_another_disk(cluster, node_name): @@ -370,6 +388,8 @@ def test_move_partition_to_another_disk(cluster, node_name): == FILES_OVERHEAD + FILES_OVERHEAD_PER_PART_WIDE * 2 ) + check_no_objects_after_drop(cluster) + @pytest.mark.parametrize("node_name", ["node"]) def test_table_manipulations(cluster, node_name): @@ -408,6 +428,8 @@ def test_table_manipulations(cluster, node_name): assert node.query("SELECT count(*) FROM s3_test FORMAT Values") == "(0)" assert len(list_objects(cluster, "data/")) == FILES_OVERHEAD + check_no_objects_after_drop(cluster) + @pytest.mark.parametrize("node_name", ["node"]) def test_move_replace_partition_to_another_table(cluster, node_name): @@ -513,9 +535,7 @@ def test_move_replace_partition_to_another_table(cluster, node_name): cluster, FILES_OVERHEAD_PER_PART_WIDE * 4 - FILES_OVERHEAD_METADATA_VERSION * 4 ) - minio = cluster.minio_client - for obj in list_objects(cluster, "data/"): - minio.remove_object(cluster.minio_bucket, obj.object_name) + remove_all_s3_objects(cluster) @pytest.mark.parametrize("node_name", ["node"]) @@ -548,10 +568,10 @@ def test_freeze_unfreeze(cluster, node_name): # Unfreeze all partitions from backup2. node.query("ALTER TABLE s3_test UNFREEZE WITH NAME 'backup2'") + # Data should be removed from S3. wait_for_delete_s3_objects(cluster, FILES_OVERHEAD) - # Data should be removed from S3. - assert len(list_objects(cluster, "data/")) == FILES_OVERHEAD + check_no_objects_after_drop(cluster) @pytest.mark.parametrize("node_name", ["node"]) @@ -582,10 +602,10 @@ def test_freeze_system_unfreeze(cluster, node_name): # Unfreeze all data from backup3. node.query("SYSTEM UNFREEZE WITH NAME 'backup3'") + # Data should be removed from S3. wait_for_delete_s3_objects(cluster, FILES_OVERHEAD) - # Data should be removed from S3. - assert len(list_objects(cluster, "data/")) == FILES_OVERHEAD + check_no_objects_after_drop(cluster) @pytest.mark.parametrize("node_name", ["node"]) @@ -631,6 +651,8 @@ def test_s3_disk_apply_new_settings(cluster, node_name): # There should be 3 times more S3 requests because multi-part upload mode uses 3 requests to upload object. assert get_s3_requests() - s3_requests_before == s3_requests_to_write_partition * 3 + check_no_objects_after_drop(cluster) + @pytest.mark.parametrize("node_name", ["node"]) def test_s3_no_delete_objects(cluster, node_name): @@ -639,6 +661,7 @@ def test_s3_no_delete_objects(cluster, node_name): node, "s3_test_no_delete_objects", storage_policy="no_delete_objects_s3" ) node.query("DROP TABLE s3_test_no_delete_objects SYNC") + remove_all_s3_objects(cluster) @pytest.mark.parametrize("node_name", ["node"]) @@ -653,6 +676,7 @@ def test_s3_disk_reads_on_unstable_connection(cluster, node_name): assert node.query("SELECT sum(id) FROM s3_test").splitlines() == [ "40499995500000" ] + check_no_objects_after_drop(cluster) @pytest.mark.parametrize("node_name", ["node"]) @@ -666,10 +690,8 @@ def test_lazy_seek_optimization_for_async_read(cluster, node_name): "INSERT INTO s3_test SELECT * FROM generateRandom('key UInt32, value String') LIMIT 10000000" ) node.query("SELECT * FROM s3_test WHERE value LIKE '%abc%' ORDER BY value LIMIT 10") - node.query("DROP TABLE IF EXISTS s3_test NO DELAY") - minio = cluster.minio_client - for obj in list_objects(cluster, "data/"): - minio.remove_object(cluster.minio_bucket, obj.object_name) + + check_no_objects_after_drop(cluster) @pytest.mark.parametrize("node_name", ["node_with_limited_disk"]) @@ -697,7 +719,7 @@ def test_cache_with_full_disk_space(cluster, node_name): assert node.contains_in_log( "Insert into cache is skipped due to insufficient disk space" ) - node.query("DROP TABLE IF EXISTS s3_test NO DELAY") + check_no_objects_after_drop(cluster, node_name=node_name) @pytest.mark.parametrize("node_name", ["node"]) @@ -722,6 +744,7 @@ def test_store_cleanup_disk_s3(cluster, node_name): "CREATE TABLE s3_test UUID '00000000-1000-4000-8000-000000000001' (n UInt64) Engine=MergeTree() ORDER BY n SETTINGS storage_policy='s3';" ) node.query("INSERT INTO s3_test SELECT 1") + check_no_objects_after_drop(cluster) @pytest.mark.parametrize("node_name", ["node"]) @@ -798,3 +821,5 @@ def test_cache_setting_compatibility(cluster, node_name): node.query("SELECT * FROM s3_test FORMAT Null") assert not node.contains_in_log("No such file or directory: Cache info:") + + check_no_objects_after_drop(cluster) From 4ae2a467f76827bea025e97137e8545f6cf324db Mon Sep 17 00:00:00 2001 From: DanRoscigno Date: Mon, 1 May 2023 14:49:12 -0400 Subject: [PATCH 61/67] add note about other disk types --- docs/en/engines/table-engines/mergetree-family/mergetree.md | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/docs/en/engines/table-engines/mergetree-family/mergetree.md b/docs/en/engines/table-engines/mergetree-family/mergetree.md index 96abfe4b80a..543b0c58dc1 100644 --- a/docs/en/engines/table-engines/mergetree-family/mergetree.md +++ b/docs/en/engines/table-engines/mergetree-family/mergetree.md @@ -888,6 +888,10 @@ This example query shows how to attach a table stored at a URL and configure the remote storage within the query. The web storage is not configured in the ClickHouse configuration files; all the settings are in the CREATE/ATTACH query. +:::note +The example uses `type=web`, but any disk type can be configured as dynamic, even Local disk. Local disks require a path argument to be inside the server config parameter `custom_local_disks_base_directory`, which has no default, so set that also when using local disk. +::: + ```sql ATTACH TABLE uk_price_paid UUID 'cf712b4f-2ca8-435c-ac23-c4393efe52f7' ( From ff648b7b36aa2d45f52c0e2cd28c90810a502c69 Mon Sep 17 00:00:00 2001 From: Sema Checherinda Date: Mon, 1 May 2023 13:13:57 +0200 Subject: [PATCH 62/67] mute the bug, will open new pr with fix --- tests/integration/test_merge_tree_s3/test.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/tests/integration/test_merge_tree_s3/test.py b/tests/integration/test_merge_tree_s3/test.py index cb1848a88fb..9e9903c36c7 100644 --- a/tests/integration/test_merge_tree_s3/test.py +++ b/tests/integration/test_merge_tree_s3/test.py @@ -452,7 +452,7 @@ def test_move_replace_partition_to_another_table(cluster, node_name): assert node.query("SELECT count(*) FROM s3_test FORMAT Values") == "(16384)" assert ( - len(list_objects(cluster, "data/", "Objects at start")) + len(list_objects(cluster, "data/", "Objects at start")) == FILES_OVERHEAD + FILES_OVERHEAD_PER_PART_WIDE * 4 ) create_table(node, "s3_clone") @@ -686,6 +686,7 @@ def test_lazy_seek_optimization_for_async_read(cluster, node_name): node.query( "CREATE TABLE s3_test (key UInt32, value String) Engine=MergeTree() ORDER BY key SETTINGS storage_policy='s3';" ) + node.query("SYSTEM STOP MERGES s3_test") node.query( "INSERT INTO s3_test SELECT * FROM generateRandom('key UInt32, value String') LIMIT 10000000" ) @@ -701,6 +702,7 @@ def test_cache_with_full_disk_space(cluster, node_name): node.query( "CREATE TABLE s3_test (key UInt32, value String) Engine=MergeTree() ORDER BY value SETTINGS storage_policy='s3_with_cache_and_jbod';" ) + node.query("SYSTEM STOP MERGES s3_test") node.query( "INSERT INTO s3_test SELECT number, toString(number) FROM numbers(100000000)" ) From 044cfe5a2234acdc53087cc8fe3cb4f4b2431e41 Mon Sep 17 00:00:00 2001 From: Kseniia Sumarokova <54203879+kssenii@users.noreply.github.com> Date: Mon, 1 May 2023 22:32:31 +0200 Subject: [PATCH 63/67] Remove wrong assertion --- src/Disks/IO/CachedOnDiskReadBufferFromFile.cpp | 5 ----- 1 file changed, 5 deletions(-) diff --git a/src/Disks/IO/CachedOnDiskReadBufferFromFile.cpp b/src/Disks/IO/CachedOnDiskReadBufferFromFile.cpp index 4114ffcc522..91a04bde0bf 100644 --- a/src/Disks/IO/CachedOnDiskReadBufferFromFile.cpp +++ b/src/Disks/IO/CachedOnDiskReadBufferFromFile.cpp @@ -493,11 +493,6 @@ bool CachedOnDiskReadBufferFromFile::completeFileSegmentAndGetNext() chassert(file_offset_of_buffer_end > completed_range.right); - if (read_type == ReadType::CACHED) - { - chassert(current_file_segment->getDownloadedSize(true) == current_file_segment->range().size()); - } - file_segments->popFront(); if (file_segments->empty()) return false; From fdaed706a793c55f43e0873b98f6997e5bde5b53 Mon Sep 17 00:00:00 2001 From: HarryLeeIBM Date: Mon, 1 May 2023 18:54:43 -0700 Subject: [PATCH 64/67] Fix decimal aggregates test for s390x --- tests/queries/0_stateless/00700_decimal_aggregates.reference | 2 +- tests/queries/0_stateless/00700_decimal_aggregates.sql | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/queries/0_stateless/00700_decimal_aggregates.reference b/tests/queries/0_stateless/00700_decimal_aggregates.reference index acf41546f5c..79195312867 100644 --- a/tests/queries/0_stateless/00700_decimal_aggregates.reference +++ b/tests/queries/0_stateless/00700_decimal_aggregates.reference @@ -5,7 +5,7 @@ -1275 -424.99999983 -255 -1275 -424.99999983 -255 101 101 101 101 101 101 -101 -101 -101 -101 -101 -101 -(101,101,101) (101,101,101) (101,101,101) (101,101,101) (102,100,101) +(101,101,101) (101,101,101) (101,101,101) (101,101,101) (1,1,1,1,1,1) 5 5 5 10 10 10 -50 -50 -16.66666666 -16.66666666 -10 -10 diff --git a/tests/queries/0_stateless/00700_decimal_aggregates.sql b/tests/queries/0_stateless/00700_decimal_aggregates.sql index a1814fc866f..6ca37e06918 100644 --- a/tests/queries/0_stateless/00700_decimal_aggregates.sql +++ b/tests/queries/0_stateless/00700_decimal_aggregates.sql @@ -24,7 +24,7 @@ SELECT (uniq(a), uniq(b), uniq(c)), (uniqCombined(a), uniqCombined(b), uniqCombined(c)), (uniqCombined(17)(a), uniqCombined(17)(b), uniqCombined(17)(c)), (uniqExact(a), uniqExact(b), uniqExact(c)), - (uniqHLL12(a), uniqHLL12(b), uniqHLL12(c)) + (102 - uniqHLL12(a) >= 0, 102 - uniqHLL12(b) >= 0, 102 - uniqHLL12(c) >= 0, uniqHLL12(a) - 99 >= 0, uniqHLL12(b) - 99 >= 0, uniqHLL12(c) - 99 >= 0) FROM (SELECT * FROM decimal ORDER BY a); SELECT uniqUpTo(10)(a), uniqUpTo(10)(b), uniqUpTo(10)(c) FROM decimal WHERE a >= 0 AND a < 5; From 3ec53152e5513933dc97b038b78f3cef91196255 Mon Sep 17 00:00:00 2001 From: Kseniia Sumarokova <54203879+kssenii@users.noreply.github.com> Date: Tue, 2 May 2023 12:02:54 +0200 Subject: [PATCH 65/67] Update Metadata.cpp --- src/Interpreters/Cache/Metadata.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Interpreters/Cache/Metadata.cpp b/src/Interpreters/Cache/Metadata.cpp index ac16d0ef9da..d97417dd290 100644 --- a/src/Interpreters/Cache/Metadata.cpp +++ b/src/Interpreters/Cache/Metadata.cpp @@ -278,8 +278,8 @@ void CacheMetadata::doCleanup() } catch (...) { - chassert(false); tryLogCurrentException(__PRETTY_FUNCTION__); + chassert(false); } } } From aa4c5fe958159a102d785ba7f72bc59b93297586 Mon Sep 17 00:00:00 2001 From: Nikita Mikhaylov Date: Tue, 2 May 2023 13:43:59 +0200 Subject: [PATCH 66/67] Enhancements for background merges (#49313) --- src/Core/Defines.h | 6 ---- src/Interpreters/SortedBlocksWriter.cpp | 4 +++ .../Merges/AggregatingSortedTransform.h | 10 +++++-- .../Algorithms/AggregatingSortedAlgorithm.cpp | 15 +++++++--- .../Algorithms/AggregatingSortedAlgorithm.h | 13 +++++++-- .../Algorithms/CollapsingSortedAlgorithm.cpp | 5 ++-- .../Algorithms/CollapsingSortedAlgorithm.h | 4 +-- .../FinishAggregatingInOrderAlgorithm.cpp | 8 +++--- .../FinishAggregatingInOrderAlgorithm.h | 8 +++--- .../GraphiteRollupSortedAlgorithm.cpp | 5 ++-- .../GraphiteRollupSortedAlgorithm.h | 10 +++++-- src/Processors/Merges/Algorithms/MergedData.h | 19 ++++++++++--- .../Algorithms/MergingSortedAlgorithm.cpp | 5 ++-- .../Algorithms/MergingSortedAlgorithm.h | 3 +- .../Algorithms/ReplacingSortedAlgorithm.cpp | 5 ++-- .../Algorithms/ReplacingSortedAlgorithm.h | 3 +- .../Algorithms/SummingSortedAlgorithm.cpp | 9 +++--- .../Algorithms/SummingSortedAlgorithm.h | 5 ++-- .../VersionedCollapsingAlgorithm.cpp | 7 +++-- .../Algorithms/VersionedCollapsingAlgorithm.h | 3 +- .../Merges/CollapsingSortedTransform.h | 6 ++-- .../FinishAggregatingInOrderTransform.h | 8 +++--- .../Merges/GraphiteRollupSortedTransform.h | 13 ++++++--- .../Merges/MergingSortedTransform.cpp | 6 ++-- .../Merges/MergingSortedTransform.h | 3 +- .../Merges/ReplacingSortedTransform.h | 6 ++-- .../Merges/SummingSortedTransform.h | 7 +++-- .../Merges/VersionedCollapsingTransform.h | 6 ++-- .../QueryPlan/ReadFromMergeTree.cpp | 16 +++++------ src/Processors/QueryPlan/SortingStep.cpp | 2 ++ .../Transforms/MergeSortingTransform.cpp | 1 + .../gtest_blocks_size_merging_streams.cpp | 4 +-- src/Storages/MergeTree/MergeList.cpp | 2 ++ src/Storages/MergeTree/MergeList.h | 2 ++ src/Storages/MergeTree/MergeTask.cpp | 28 +++++++++++-------- .../MergeTree/MergeTreeDataWriter.cpp | 12 ++++---- src/Storages/MergeTree/MergeTreeSettings.h | 6 ++-- src/Storages/System/StorageSystemMerges.cpp | 2 ++ .../02117_show_create_table_system.reference | 1 + .../02725_memory-for-merges.reference | 1 + .../0_stateless/02725_memory-for-merges.sql | 27 ++++++++++++++++++ 41 files changed, 205 insertions(+), 101 deletions(-) create mode 100644 tests/queries/0_stateless/02725_memory-for-merges.reference create mode 100644 tests/queries/0_stateless/02725_memory-for-merges.sql diff --git a/src/Core/Defines.h b/src/Core/Defines.h index 3fae123fb6b..e9b84b71cae 100644 --- a/src/Core/Defines.h +++ b/src/Core/Defines.h @@ -29,11 +29,6 @@ #define DEFAULT_INSERT_BLOCK_SIZE \ 1048449 /// 1048576 - PADDING_FOR_SIMD - (PADDING_FOR_SIMD - 1) bytes padding that we usually have in arrays -/** The same, but for merge operations. Less DEFAULT_BLOCK_SIZE for saving RAM (since all the columns are read). - * Significantly less, since there are 10-way mergers. - */ -#define DEFAULT_MERGE_BLOCK_SIZE 8192 - #define DEFAULT_PERIODIC_LIVE_VIEW_REFRESH_SEC 60 #define SHOW_CHARS_ON_SYNTAX_ERROR ptrdiff_t(160) #define DBMS_CONNECTION_POOL_WITH_FAILOVER_DEFAULT_MAX_TRIES 3 @@ -83,4 +78,3 @@ #else #define QUERY_PROFILER_DEFAULT_SAMPLE_RATE_NS 0 #endif - diff --git a/src/Interpreters/SortedBlocksWriter.cpp b/src/Interpreters/SortedBlocksWriter.cpp index d8c42cba9c1..e09a66a38e6 100644 --- a/src/Interpreters/SortedBlocksWriter.cpp +++ b/src/Interpreters/SortedBlocksWriter.cpp @@ -165,6 +165,7 @@ SortedBlocksWriter::TmpFilePtr SortedBlocksWriter::flush(const BlocksList & bloc pipeline.getNumStreams(), sort_description, rows_in_block, + /*max_block_size_bytes=*/0, SortingQueueStrategy::Default); pipeline.addTransform(std::move(transform)); @@ -220,6 +221,7 @@ SortedBlocksWriter::PremergedFiles SortedBlocksWriter::premerge() pipeline.getNumStreams(), sort_description, rows_in_block, + /*max_block_size_bytes=*/0, SortingQueueStrategy::Default); pipeline.addTransform(std::move(transform)); @@ -254,6 +256,7 @@ SortedBlocksWriter::SortedFiles SortedBlocksWriter::finishMerge(std::function= max_block_size || accumulated_bytes >= max_block_bytes) + if (accumulated_rows >= max_block_size_rows || accumulated_bytes >= max_block_size_bytes) status.chunk = prepareToMerge(); return status; diff --git a/src/Processors/Merges/Algorithms/FinishAggregatingInOrderAlgorithm.h b/src/Processors/Merges/Algorithms/FinishAggregatingInOrderAlgorithm.h index b1a74a09459..13522b70834 100644 --- a/src/Processors/Merges/Algorithms/FinishAggregatingInOrderAlgorithm.h +++ b/src/Processors/Merges/Algorithms/FinishAggregatingInOrderAlgorithm.h @@ -42,8 +42,8 @@ public: size_t num_inputs_, AggregatingTransformParamsPtr params_, const SortDescription & description_, - size_t max_block_size_, - size_t max_block_bytes_); + size_t max_block_size_rows_, + size_t max_block_size_bytes_); void initialize(Inputs inputs) override; void consume(Input & input, size_t source_num) override; @@ -79,8 +79,8 @@ private: size_t num_inputs; AggregatingTransformParamsPtr params; SortDescriptionWithPositions description; - size_t max_block_size; - size_t max_block_bytes; + size_t max_block_size_rows; + size_t max_block_size_bytes; Inputs current_inputs; diff --git a/src/Processors/Merges/Algorithms/GraphiteRollupSortedAlgorithm.cpp b/src/Processors/Merges/Algorithms/GraphiteRollupSortedAlgorithm.cpp index 123748f9b43..814625d7aee 100644 --- a/src/Processors/Merges/Algorithms/GraphiteRollupSortedAlgorithm.cpp +++ b/src/Processors/Merges/Algorithms/GraphiteRollupSortedAlgorithm.cpp @@ -42,11 +42,12 @@ GraphiteRollupSortedAlgorithm::GraphiteRollupSortedAlgorithm( const Block & header_, size_t num_inputs, SortDescription description_, - size_t max_block_size, + size_t max_block_size_rows_, + size_t max_block_size_bytes_, Graphite::Params params_, time_t time_of_merge_) : IMergingAlgorithmWithSharedChunks(header_, num_inputs, std::move(description_), nullptr, max_row_refs) - , merged_data(header_.cloneEmptyColumns(), false, max_block_size) + , merged_data(header_.cloneEmptyColumns(), false, max_block_size_rows_, max_block_size_bytes_) , params(std::move(params_)) , time_of_merge(time_of_merge_) { diff --git a/src/Processors/Merges/Algorithms/GraphiteRollupSortedAlgorithm.h b/src/Processors/Merges/Algorithms/GraphiteRollupSortedAlgorithm.h index d6d2f66fb82..f920d623b1f 100644 --- a/src/Processors/Merges/Algorithms/GraphiteRollupSortedAlgorithm.h +++ b/src/Processors/Merges/Algorithms/GraphiteRollupSortedAlgorithm.h @@ -22,9 +22,13 @@ class GraphiteRollupSortedAlgorithm final : public IMergingAlgorithmWithSharedCh { public: GraphiteRollupSortedAlgorithm( - const Block & header, size_t num_inputs, - SortDescription description_, size_t max_block_size, - Graphite::Params params_, time_t time_of_merge_); + const Block & header, + size_t num_inputs, + SortDescription description_, + size_t max_block_size_rows_, + size_t max_block_size_bytes_, + Graphite::Params params_, + time_t time_of_merge_); Status merge() override; diff --git a/src/Processors/Merges/Algorithms/MergedData.h b/src/Processors/Merges/Algorithms/MergedData.h index f4ef0b77c53..f92d20d22e1 100644 --- a/src/Processors/Merges/Algorithms/MergedData.h +++ b/src/Processors/Merges/Algorithms/MergedData.h @@ -19,8 +19,8 @@ namespace ErrorCodes class MergedData { public: - explicit MergedData(MutableColumns columns_, bool use_average_block_size_, UInt64 max_block_size_) - : columns(std::move(columns_)), max_block_size(max_block_size_), use_average_block_size(use_average_block_size_) + explicit MergedData(MutableColumns columns_, bool use_average_block_size_, UInt64 max_block_size_, UInt64 max_block_size_bytes_) + : columns(std::move(columns_)), max_block_size(max_block_size_), max_block_size_bytes(max_block_size_bytes_), use_average_block_size(use_average_block_size_) { } @@ -117,6 +117,16 @@ public: if (merged_rows >= max_block_size) return true; + /// Never return more than max_block_size_bytes + if (max_block_size_bytes) + { + size_t merged_bytes = 0; + for (const auto & column : columns) + merged_bytes += column->allocatedBytes(); + if (merged_bytes >= max_block_size_bytes) + return true; + } + if (!use_average_block_size) return false; @@ -143,8 +153,9 @@ protected: UInt64 total_chunks = 0; UInt64 total_allocated_bytes = 0; - const UInt64 max_block_size; - const bool use_average_block_size; + const UInt64 max_block_size = 0; + const UInt64 max_block_size_bytes = 0; + const bool use_average_block_size = false; bool need_flush = false; }; diff --git a/src/Processors/Merges/Algorithms/MergingSortedAlgorithm.cpp b/src/Processors/Merges/Algorithms/MergingSortedAlgorithm.cpp index 77db1e06d06..1debfcec8e0 100644 --- a/src/Processors/Merges/Algorithms/MergingSortedAlgorithm.cpp +++ b/src/Processors/Merges/Algorithms/MergingSortedAlgorithm.cpp @@ -11,13 +11,14 @@ MergingSortedAlgorithm::MergingSortedAlgorithm( Block header_, size_t num_inputs, const SortDescription & description_, - size_t max_block_size, + size_t max_block_size_, + size_t max_block_size_bytes_, SortingQueueStrategy sorting_queue_strategy_, UInt64 limit_, WriteBuffer * out_row_sources_buf_, bool use_average_block_sizes) : header(std::move(header_)) - , merged_data(header.cloneEmptyColumns(), use_average_block_sizes, max_block_size) + , merged_data(header.cloneEmptyColumns(), use_average_block_sizes, max_block_size_, max_block_size_bytes_) , description(description_) , limit(limit_) , out_row_sources_buf(out_row_sources_buf_) diff --git a/src/Processors/Merges/Algorithms/MergingSortedAlgorithm.h b/src/Processors/Merges/Algorithms/MergingSortedAlgorithm.h index 2537c48b128..1357e58f0f1 100644 --- a/src/Processors/Merges/Algorithms/MergingSortedAlgorithm.h +++ b/src/Processors/Merges/Algorithms/MergingSortedAlgorithm.h @@ -17,7 +17,8 @@ public: Block header_, size_t num_inputs, const SortDescription & description_, - size_t max_block_size, + size_t max_block_size_, + size_t max_block_size_bytes_, SortingQueueStrategy sorting_queue_strategy_, UInt64 limit_ = 0, WriteBuffer * out_row_sources_buf_ = nullptr, diff --git a/src/Processors/Merges/Algorithms/ReplacingSortedAlgorithm.cpp b/src/Processors/Merges/Algorithms/ReplacingSortedAlgorithm.cpp index e8d1f836591..db770de858c 100644 --- a/src/Processors/Merges/Algorithms/ReplacingSortedAlgorithm.cpp +++ b/src/Processors/Merges/Algorithms/ReplacingSortedAlgorithm.cpp @@ -17,12 +17,13 @@ ReplacingSortedAlgorithm::ReplacingSortedAlgorithm( SortDescription description_, const String & is_deleted_column, const String & version_column, - size_t max_block_size, + size_t max_block_size_rows, + size_t max_block_size_bytes, WriteBuffer * out_row_sources_buf_, bool use_average_block_sizes, bool cleanup_) : IMergingAlgorithmWithSharedChunks(header_, num_inputs, std::move(description_), out_row_sources_buf_, max_row_refs) - , merged_data(header_.cloneEmptyColumns(), use_average_block_sizes, max_block_size), cleanup(cleanup_) + , merged_data(header_.cloneEmptyColumns(), use_average_block_sizes, max_block_size_rows, max_block_size_bytes), cleanup(cleanup_) { if (!is_deleted_column.empty()) is_deleted_column_number = header_.getPositionByName(is_deleted_column); diff --git a/src/Processors/Merges/Algorithms/ReplacingSortedAlgorithm.h b/src/Processors/Merges/Algorithms/ReplacingSortedAlgorithm.h index 6b9fb8f98c5..4d8de55b032 100644 --- a/src/Processors/Merges/Algorithms/ReplacingSortedAlgorithm.h +++ b/src/Processors/Merges/Algorithms/ReplacingSortedAlgorithm.h @@ -23,7 +23,8 @@ public: SortDescription description_, const String & is_deleted_column, const String & version_column, - size_t max_block_size, + size_t max_block_size_rows, + size_t max_block_size_bytes, WriteBuffer * out_row_sources_buf_ = nullptr, bool use_average_block_sizes = false, bool cleanup = false); diff --git a/src/Processors/Merges/Algorithms/SummingSortedAlgorithm.cpp b/src/Processors/Merges/Algorithms/SummingSortedAlgorithm.cpp index 5b829d6299e..7dac5715f95 100644 --- a/src/Processors/Merges/Algorithms/SummingSortedAlgorithm.cpp +++ b/src/Processors/Merges/Algorithms/SummingSortedAlgorithm.cpp @@ -497,8 +497,8 @@ static void setRow(Row & row, const ColumnRawPtrs & raw_columns, size_t row_num, SummingSortedAlgorithm::SummingMergedData::SummingMergedData( - MutableColumns columns_, UInt64 max_block_size_, ColumnsDefinition & def_) - : MergedData(std::move(columns_), false, max_block_size_) + MutableColumns columns_, UInt64 max_block_size_rows_, UInt64 max_block_size_bytes_, ColumnsDefinition & def_) + : MergedData(std::move(columns_), false, max_block_size_rows_, max_block_size_bytes_) , def(def_) { current_row.resize(def.column_names.size()); @@ -686,10 +686,11 @@ SummingSortedAlgorithm::SummingSortedAlgorithm( SortDescription description_, const Names & column_names_to_sum, const Names & partition_key_columns, - size_t max_block_size) + size_t max_block_size_rows, + size_t max_block_size_bytes) : IMergingAlgorithmWithDelayedChunk(header_, num_inputs, std::move(description_)) , columns_definition(defineColumns(header_, description, column_names_to_sum, partition_key_columns)) - , merged_data(getMergedDataColumns(header_, columns_definition), max_block_size, columns_definition) + , merged_data(getMergedDataColumns(header_, columns_definition), max_block_size_rows, max_block_size_bytes, columns_definition) { } diff --git a/src/Processors/Merges/Algorithms/SummingSortedAlgorithm.h b/src/Processors/Merges/Algorithms/SummingSortedAlgorithm.h index c77bf7c0ba5..8943e235729 100644 --- a/src/Processors/Merges/Algorithms/SummingSortedAlgorithm.h +++ b/src/Processors/Merges/Algorithms/SummingSortedAlgorithm.h @@ -22,7 +22,8 @@ public: const Names & column_names_to_sum, /// List of partition key columns. They have to be excluded. const Names & partition_key_columns, - size_t max_block_size); + size_t max_block_size_rows, + size_t max_block_size_bytes); void initialize(Inputs inputs) override; void consume(Input & input, size_t source_num) override; @@ -63,7 +64,7 @@ public: using MergedData::insertRow; public: - SummingMergedData(MutableColumns columns_, UInt64 max_block_size_, ColumnsDefinition & def_); + SummingMergedData(MutableColumns columns_, UInt64 max_block_size_rows, UInt64 max_block_size_bytes_, ColumnsDefinition & def_); void startGroup(ColumnRawPtrs & raw_columns, size_t row); void finishGroup(); diff --git a/src/Processors/Merges/Algorithms/VersionedCollapsingAlgorithm.cpp b/src/Processors/Merges/Algorithms/VersionedCollapsingAlgorithm.cpp index cbafa53d0a3..e7a431dc1d0 100644 --- a/src/Processors/Merges/Algorithms/VersionedCollapsingAlgorithm.cpp +++ b/src/Processors/Merges/Algorithms/VersionedCollapsingAlgorithm.cpp @@ -12,13 +12,14 @@ VersionedCollapsingAlgorithm::VersionedCollapsingAlgorithm( size_t num_inputs, SortDescription description_, const String & sign_column_, - size_t max_block_size, + size_t max_block_size_rows_, + size_t max_block_size_bytes_, WriteBuffer * out_row_sources_buf_, bool use_average_block_sizes) : IMergingAlgorithmWithSharedChunks(header_, num_inputs, std::move(description_), out_row_sources_buf_, MAX_ROWS_IN_MULTIVERSION_QUEUE) - , merged_data(header_.cloneEmptyColumns(), use_average_block_sizes, max_block_size) + , merged_data(header_.cloneEmptyColumns(), use_average_block_sizes, max_block_size_rows_, max_block_size_bytes_) /// -1 for +1 in FixedSizeDequeWithGaps's internal buffer. 3 is a reasonable minimum size to collapse anything. - , max_rows_in_queue(std::min(std::max(3, max_block_size), MAX_ROWS_IN_MULTIVERSION_QUEUE) - 1) + , max_rows_in_queue(std::min(std::max(3, max_block_size_rows_), MAX_ROWS_IN_MULTIVERSION_QUEUE) - 1) , current_keys(max_rows_in_queue) { sign_column_number = header_.getPositionByName(sign_column_); diff --git a/src/Processors/Merges/Algorithms/VersionedCollapsingAlgorithm.h b/src/Processors/Merges/Algorithms/VersionedCollapsingAlgorithm.h index 2226762d541..578100f080d 100644 --- a/src/Processors/Merges/Algorithms/VersionedCollapsingAlgorithm.h +++ b/src/Processors/Merges/Algorithms/VersionedCollapsingAlgorithm.h @@ -20,7 +20,8 @@ public: VersionedCollapsingAlgorithm( const Block & header, size_t num_inputs, SortDescription description_, const String & sign_column_, - size_t max_block_size, + size_t max_block_size_rows, + size_t max_block_size_bytes, WriteBuffer * out_row_sources_buf_ = nullptr, bool use_average_block_sizes = false); diff --git a/src/Processors/Merges/CollapsingSortedTransform.h b/src/Processors/Merges/CollapsingSortedTransform.h index abe3eefb401..b0cb6bc6d62 100644 --- a/src/Processors/Merges/CollapsingSortedTransform.h +++ b/src/Processors/Merges/CollapsingSortedTransform.h @@ -16,7 +16,8 @@ public: SortDescription description_, const String & sign_column, bool only_positive_sign, - size_t max_block_size, + size_t max_block_size_rows, + size_t max_block_size_bytes, WriteBuffer * out_row_sources_buf_ = nullptr, bool use_average_block_sizes = false) : IMergingTransform( @@ -26,7 +27,8 @@ public: std::move(description_), sign_column, only_positive_sign, - max_block_size, + max_block_size_rows, + max_block_size_bytes, &Poco::Logger::get("CollapsingSortedTransform"), out_row_sources_buf_, use_average_block_sizes) diff --git a/src/Processors/Merges/FinishAggregatingInOrderTransform.h b/src/Processors/Merges/FinishAggregatingInOrderTransform.h index b82a103fee0..0960b9d4127 100644 --- a/src/Processors/Merges/FinishAggregatingInOrderTransform.h +++ b/src/Processors/Merges/FinishAggregatingInOrderTransform.h @@ -17,16 +17,16 @@ public: size_t num_inputs, AggregatingTransformParamsPtr params, SortDescription description, - size_t max_block_size, - size_t max_block_bytes) + size_t max_block_size_rows, + size_t max_block_size_bytes) : IMergingTransform( num_inputs, header, {}, /*have_all_inputs_=*/ true, /*limit_hint_=*/ 0, /*always_read_till_end_=*/ false, header, num_inputs, params, std::move(description), - max_block_size, - max_block_bytes) + max_block_size_rows, + max_block_size_bytes) { } diff --git a/src/Processors/Merges/GraphiteRollupSortedTransform.h b/src/Processors/Merges/GraphiteRollupSortedTransform.h index f3c391c77ce..b69feff1fb6 100644 --- a/src/Processors/Merges/GraphiteRollupSortedTransform.h +++ b/src/Processors/Merges/GraphiteRollupSortedTransform.h @@ -11,15 +11,20 @@ class GraphiteRollupSortedTransform final : public IMergingTransform 1) { auto transform = std::make_shared( - pipe.getHeader(), pipe.numOutputPorts(), sort_description, max_block_size, SortingQueueStrategy::Batch); + pipe.getHeader(), pipe.numOutputPorts(), sort_description, max_block_size, /*max_block_size_bytes=*/0, SortingQueueStrategy::Batch); pipe.addTransform(std::move(transform)); } @@ -898,31 +898,31 @@ static void addMergingFinal( { case MergeTreeData::MergingParams::Ordinary: return std::make_shared(header, num_outputs, - sort_description, max_block_size, SortingQueueStrategy::Batch); + sort_description, max_block_size, /*max_block_size_bytes=*/0, SortingQueueStrategy::Batch); case MergeTreeData::MergingParams::Collapsing: return std::make_shared(header, num_outputs, - sort_description, merging_params.sign_column, true, max_block_size); + sort_description, merging_params.sign_column, true, max_block_size, /*max_block_size_bytes=*/0); case MergeTreeData::MergingParams::Summing: return std::make_shared(header, num_outputs, - sort_description, merging_params.columns_to_sum, partition_key_columns, max_block_size); + sort_description, merging_params.columns_to_sum, partition_key_columns, max_block_size, /*max_block_size_bytes=*/0); case MergeTreeData::MergingParams::Aggregating: return std::make_shared(header, num_outputs, - sort_description, max_block_size); + sort_description, max_block_size, /*max_block_size_bytes=*/0); case MergeTreeData::MergingParams::Replacing: return std::make_shared(header, num_outputs, - sort_description, merging_params.is_deleted_column, merging_params.version_column, max_block_size, /*out_row_sources_buf_*/ nullptr, /*use_average_block_sizes*/ false, /*cleanup*/ !merging_params.is_deleted_column.empty()); + sort_description, merging_params.is_deleted_column, merging_params.version_column, max_block_size, /*max_block_size_bytes=*/0, /*out_row_sources_buf_*/ nullptr, /*use_average_block_sizes*/ false, /*cleanup*/ !merging_params.is_deleted_column.empty()); case MergeTreeData::MergingParams::VersionedCollapsing: return std::make_shared(header, num_outputs, - sort_description, merging_params.sign_column, max_block_size); + sort_description, merging_params.sign_column, max_block_size, /*max_block_size_bytes=*/0); case MergeTreeData::MergingParams::Graphite: return std::make_shared(header, num_outputs, - sort_description, max_block_size, merging_params.graphite_params, now); + sort_description, max_block_size, /*max_block_size_bytes=*/0, merging_params.graphite_params, now); } UNREACHABLE(); diff --git a/src/Processors/QueryPlan/SortingStep.cpp b/src/Processors/QueryPlan/SortingStep.cpp index db44da5a0fc..55ce763575e 100644 --- a/src/Processors/QueryPlan/SortingStep.cpp +++ b/src/Processors/QueryPlan/SortingStep.cpp @@ -176,6 +176,7 @@ void SortingStep::mergingSorted(QueryPipelineBuilder & pipeline, const SortDescr pipeline.getNumStreams(), result_sort_desc, sort_settings.max_block_size, + /*max_block_size_bytes=*/0, SortingQueueStrategy::Batch, limit_, always_read_till_end); @@ -269,6 +270,7 @@ void SortingStep::fullSort( pipeline.getNumStreams(), result_sort_desc, sort_settings.max_block_size, + /*max_block_size_bytes=*/0, SortingQueueStrategy::Batch, limit_, always_read_till_end); diff --git a/src/Processors/Transforms/MergeSortingTransform.cpp b/src/Processors/Transforms/MergeSortingTransform.cpp index ecf14a81c00..de77711d129 100644 --- a/src/Processors/Transforms/MergeSortingTransform.cpp +++ b/src/Processors/Transforms/MergeSortingTransform.cpp @@ -186,6 +186,7 @@ void MergeSortingTransform::consume(Chunk chunk) 0, description, max_merged_block_size, + /*max_merged_block_size_bytes*/0, SortingQueueStrategy::Batch, limit, /*always_read_till_end_=*/ false, diff --git a/src/QueryPipeline/tests/gtest_blocks_size_merging_streams.cpp b/src/QueryPipeline/tests/gtest_blocks_size_merging_streams.cpp index d968dae3ff8..bc22f249f97 100644 --- a/src/QueryPipeline/tests/gtest_blocks_size_merging_streams.cpp +++ b/src/QueryPipeline/tests/gtest_blocks_size_merging_streams.cpp @@ -83,7 +83,7 @@ TEST(MergingSortedTest, SimpleBlockSizeTest) EXPECT_EQ(pipe.numOutputPorts(), 3); auto transform = std::make_shared(pipe.getHeader(), pipe.numOutputPorts(), sort_description, - DEFAULT_MERGE_BLOCK_SIZE, SortingQueueStrategy::Batch, 0, false, nullptr, false, true); + 8192, /*max_block_size_bytes=*/0, SortingQueueStrategy::Batch, 0, false, nullptr, false, true); pipe.addTransform(std::move(transform)); @@ -125,7 +125,7 @@ TEST(MergingSortedTest, MoreInterestingBlockSizes) EXPECT_EQ(pipe.numOutputPorts(), 3); auto transform = std::make_shared(pipe.getHeader(), pipe.numOutputPorts(), sort_description, - DEFAULT_MERGE_BLOCK_SIZE, SortingQueueStrategy::Batch, 0, false, nullptr, false, true); + 8192, /*max_block_size_bytes=*/0, SortingQueueStrategy::Batch, 0, false, nullptr, false, true); pipe.addTransform(std::move(transform)); diff --git a/src/Storages/MergeTree/MergeList.cpp b/src/Storages/MergeTree/MergeList.cpp index 0bf662921ad..6812ef93a78 100644 --- a/src/Storages/MergeTree/MergeList.cpp +++ b/src/Storages/MergeTree/MergeList.cpp @@ -31,6 +31,7 @@ MergeListElement::MergeListElement( source_part_paths.emplace_back(source_part->getDataPartStorage().getFullPath()); total_size_bytes_compressed += source_part->getBytesOnDisk(); + total_size_bytes_uncompressed += source_part->getTotalColumnsSize().data_uncompressed; total_size_marks += source_part->getMarksCount(); total_rows_count += source_part->index_granularity.getTotalRows(); } @@ -57,6 +58,7 @@ MergeInfo MergeListElement::getInfo() const res.progress = progress.load(std::memory_order_relaxed); res.num_parts = num_parts; res.total_size_bytes_compressed = total_size_bytes_compressed; + res.total_size_bytes_uncompressed = total_size_bytes_uncompressed; res.total_size_marks = total_size_marks; res.total_rows_count = total_rows_count; res.bytes_read_uncompressed = bytes_read_uncompressed.load(std::memory_order_relaxed); diff --git a/src/Storages/MergeTree/MergeList.h b/src/Storages/MergeTree/MergeList.h index 9c8c2ebd1e4..045b4015c8e 100644 --- a/src/Storages/MergeTree/MergeList.h +++ b/src/Storages/MergeTree/MergeList.h @@ -40,6 +40,7 @@ struct MergeInfo Float64 progress; UInt64 num_parts; UInt64 total_size_bytes_compressed; + UInt64 total_size_bytes_uncompressed; UInt64 total_size_marks; UInt64 total_rows_count; UInt64 bytes_read_uncompressed; @@ -82,6 +83,7 @@ struct MergeListElement : boost::noncopyable std::atomic is_cancelled{}; UInt64 total_size_bytes_compressed{}; + UInt64 total_size_bytes_uncompressed{}; UInt64 total_size_marks{}; UInt64 total_rows_count{}; std::atomic bytes_read_uncompressed{}; diff --git a/src/Storages/MergeTree/MergeTask.cpp b/src/Storages/MergeTree/MergeTask.cpp index df759b3bd45..eee550f8dd6 100644 --- a/src/Storages/MergeTree/MergeTask.cpp +++ b/src/Storages/MergeTree/MergeTask.cpp @@ -921,7 +921,9 @@ void MergeTask::ExecuteAndFinalizeHorizontalPart::createMergedStream() /// If merge is vertical we cannot calculate it ctx->blocks_are_granules_size = (global_ctx->chosen_merge_algorithm == MergeAlgorithm::Vertical); - UInt64 merge_block_size = data_settings->merge_max_block_size; + /// There is no sense to have the block size bigger than one granule for merge operations. + const UInt64 merge_block_size_rows = data_settings->merge_max_block_size; + const UInt64 merge_block_size_bytes = data_settings->merge_max_block_size_bytes; switch (ctx->merging_params.mode) { @@ -930,7 +932,8 @@ void MergeTask::ExecuteAndFinalizeHorizontalPart::createMergedStream() header, pipes.size(), sort_description, - merge_block_size, + merge_block_size_rows, + merge_block_size_bytes, SortingQueueStrategy::Default, /* limit_= */0, /* always_read_till_end_= */false, @@ -942,35 +945,35 @@ void MergeTask::ExecuteAndFinalizeHorizontalPart::createMergedStream() case MergeTreeData::MergingParams::Collapsing: merged_transform = std::make_shared( header, pipes.size(), sort_description, ctx->merging_params.sign_column, false, - merge_block_size, ctx->rows_sources_write_buf.get(), ctx->blocks_are_granules_size); + merge_block_size_rows, merge_block_size_bytes, ctx->rows_sources_write_buf.get(), ctx->blocks_are_granules_size); break; case MergeTreeData::MergingParams::Summing: merged_transform = std::make_shared( - header, pipes.size(), sort_description, ctx->merging_params.columns_to_sum, partition_key_columns, merge_block_size); + header, pipes.size(), sort_description, ctx->merging_params.columns_to_sum, partition_key_columns, merge_block_size_rows, merge_block_size_bytes); break; case MergeTreeData::MergingParams::Aggregating: - merged_transform = std::make_shared(header, pipes.size(), sort_description, merge_block_size); + merged_transform = std::make_shared(header, pipes.size(), sort_description, merge_block_size_rows, merge_block_size_bytes); break; case MergeTreeData::MergingParams::Replacing: merged_transform = std::make_shared( header, pipes.size(), sort_description, ctx->merging_params.is_deleted_column, ctx->merging_params.version_column, - merge_block_size, ctx->rows_sources_write_buf.get(), ctx->blocks_are_granules_size, + merge_block_size_rows, merge_block_size_bytes, ctx->rows_sources_write_buf.get(), ctx->blocks_are_granules_size, (data_settings->clean_deleted_rows != CleanDeletedRows::Never) || global_ctx->cleanup); break; case MergeTreeData::MergingParams::Graphite: merged_transform = std::make_shared( - header, pipes.size(), sort_description, merge_block_size, + header, pipes.size(), sort_description, merge_block_size_rows, merge_block_size_bytes, ctx->merging_params.graphite_params, global_ctx->time_of_merge); break; case MergeTreeData::MergingParams::VersionedCollapsing: merged_transform = std::make_shared( header, pipes.size(), sort_description, ctx->merging_params.sign_column, - merge_block_size, ctx->rows_sources_write_buf.get(), ctx->blocks_are_granules_size); + merge_block_size_rows, merge_block_size_bytes, ctx->rows_sources_write_buf.get(), ctx->blocks_are_granules_size); break; } @@ -1011,7 +1014,8 @@ void MergeTask::ExecuteAndFinalizeHorizontalPart::createMergedStream() MergeAlgorithm MergeTask::ExecuteAndFinalizeHorizontalPart::chooseMergeAlgorithm() const { - const size_t sum_rows_upper_bound = global_ctx->merge_list_element_ptr->total_rows_count; + const size_t total_rows_count = global_ctx->merge_list_element_ptr->total_rows_count; + const size_t total_size_bytes_uncompressed = global_ctx->merge_list_element_ptr->total_size_bytes_uncompressed; const auto data_settings = global_ctx->data->getSettings(); if (global_ctx->deduplicate) @@ -1042,11 +1046,13 @@ MergeAlgorithm MergeTask::ExecuteAndFinalizeHorizontalPart::chooseMergeAlgorithm bool enough_ordinary_cols = global_ctx->gathering_columns.size() >= data_settings->vertical_merge_algorithm_min_columns_to_activate; - bool enough_total_rows = sum_rows_upper_bound >= data_settings->vertical_merge_algorithm_min_rows_to_activate; + bool enough_total_rows = total_rows_count >= data_settings->vertical_merge_algorithm_min_rows_to_activate; + + bool enough_total_bytes = total_size_bytes_uncompressed >= data_settings->vertical_merge_algorithm_min_bytes_to_activate; bool no_parts_overflow = global_ctx->future_part->parts.size() <= RowSourcePart::MAX_PARTS; - auto merge_alg = (is_supported_storage && enough_total_rows && enough_ordinary_cols && no_parts_overflow) ? + auto merge_alg = (is_supported_storage && enough_total_rows && enough_total_bytes && enough_ordinary_cols && no_parts_overflow) ? MergeAlgorithm::Vertical : MergeAlgorithm::Horizontal; return merge_alg; diff --git a/src/Storages/MergeTree/MergeTreeDataWriter.cpp b/src/Storages/MergeTree/MergeTreeDataWriter.cpp index adb7505a8ba..dd7a0fcea24 100644 --- a/src/Storages/MergeTree/MergeTreeDataWriter.cpp +++ b/src/Storages/MergeTree/MergeTreeDataWriter.cpp @@ -280,23 +280,23 @@ Block MergeTreeDataWriter::mergeBlock( return nullptr; case MergeTreeData::MergingParams::Replacing: return std::make_shared( - block, 1, sort_description, merging_params.is_deleted_column, merging_params.version_column, block_size + 1); + block, 1, sort_description, merging_params.is_deleted_column, merging_params.version_column, block_size + 1, /*block_size_bytes=*/0); case MergeTreeData::MergingParams::Collapsing: return std::make_shared( block, 1, sort_description, merging_params.sign_column, - false, block_size + 1, &Poco::Logger::get("MergeTreeDataWriter")); + false, block_size + 1, /*block_size_bytes=*/0, &Poco::Logger::get("MergeTreeDataWriter")); case MergeTreeData::MergingParams::Summing: return std::make_shared( block, 1, sort_description, merging_params.columns_to_sum, - partition_key_columns, block_size + 1); + partition_key_columns, block_size + 1, /*block_size_bytes=*/0); case MergeTreeData::MergingParams::Aggregating: - return std::make_shared(block, 1, sort_description, block_size + 1); + return std::make_shared(block, 1, sort_description, block_size + 1, /*block_size_bytes=*/0); case MergeTreeData::MergingParams::VersionedCollapsing: return std::make_shared( - block, 1, sort_description, merging_params.sign_column, block_size + 1); + block, 1, sort_description, merging_params.sign_column, block_size + 1, /*block_size_bytes=*/0); case MergeTreeData::MergingParams::Graphite: return std::make_shared( - block, 1, sort_description, block_size + 1, merging_params.graphite_params, time(nullptr)); + block, 1, sort_description, block_size + 1, /*block_size_bytes=*/0, merging_params.graphite_params, time(nullptr)); } UNREACHABLE(); diff --git a/src/Storages/MergeTree/MergeTreeSettings.h b/src/Storages/MergeTree/MergeTreeSettings.h index ba98fca2f50..7ea7fab6e5d 100644 --- a/src/Storages/MergeTree/MergeTreeSettings.h +++ b/src/Storages/MergeTree/MergeTreeSettings.h @@ -40,7 +40,8 @@ struct Settings; M(Float, ratio_of_defaults_for_sparse_serialization, 1.0, "Minimal ratio of number of default values to number of all values in column to store it in sparse serializations. If >= 1, columns will be always written in full serialization.", 0) \ \ /** Merge settings. */ \ - M(UInt64, merge_max_block_size, DEFAULT_MERGE_BLOCK_SIZE, "How many rows in blocks should be formed for merge operations.", 0) \ + M(UInt64, merge_max_block_size, 8192, "How many rows in blocks should be formed for merge operations. By default has the same value as `index_granularity`.", 0) \ + M(UInt64, merge_max_block_size_bytes, 10 * 1024 * 1024, "How many bytes in blocks should be formed for merge operations. By default has the same value as `index_granularity_bytes`.", 0) \ M(UInt64, max_bytes_to_merge_at_max_space_in_pool, 150ULL * 1024 * 1024 * 1024, "Maximum in total size of parts to merge, when there are maximum free threads in background pool (or entries in replication queue).", 0) \ M(UInt64, max_bytes_to_merge_at_min_space_in_pool, 1024 * 1024, "Maximum in total size of parts to merge, when there are minimum free threads in background pool (or entries in replication queue).", 0) \ M(UInt64, max_replicated_merges_in_queue, 1000, "How many tasks of merging and mutating parts are allowed simultaneously in ReplicatedMergeTree queue.", 0) \ @@ -126,7 +127,8 @@ struct Settings; M(UInt64, min_relative_delay_to_close, 300, "Minimal delay from other replicas to close, stop serving requests and not return Ok during status check.", 0) \ M(UInt64, min_absolute_delay_to_close, 0, "Minimal absolute delay to close, stop serving requests and not return Ok during status check.", 0) \ M(UInt64, enable_vertical_merge_algorithm, 1, "Enable usage of Vertical merge algorithm.", 0) \ - M(UInt64, vertical_merge_algorithm_min_rows_to_activate, 16 * DEFAULT_MERGE_BLOCK_SIZE, "Minimal (approximate) sum of rows in merging parts to activate Vertical merge algorithm.", 0) \ + M(UInt64, vertical_merge_algorithm_min_rows_to_activate, 16 * 8192, "Minimal (approximate) sum of rows in merging parts to activate Vertical merge algorithm.", 0) \ + M(UInt64, vertical_merge_algorithm_min_bytes_to_activate, 0, "Minimal (approximate) uncompressed size in bytes in merging parts to activate Vertical merge algorithm.", 0) \ M(UInt64, vertical_merge_algorithm_min_columns_to_activate, 11, "Minimal amount of non-PK columns to activate Vertical merge algorithm.", 0) \ \ /** Compatibility settings */ \ diff --git a/src/Storages/System/StorageSystemMerges.cpp b/src/Storages/System/StorageSystemMerges.cpp index b29836206d0..1f32a0ff700 100644 --- a/src/Storages/System/StorageSystemMerges.cpp +++ b/src/Storages/System/StorageSystemMerges.cpp @@ -22,6 +22,7 @@ NamesAndTypesList StorageSystemMerges::getNamesAndTypes() {"partition_id", std::make_shared()}, {"is_mutation", std::make_shared()}, {"total_size_bytes_compressed", std::make_shared()}, + {"total_size_bytes_uncompressed", std::make_shared()}, {"total_size_marks", std::make_shared()}, {"bytes_read_uncompressed", std::make_shared()}, {"rows_read", std::make_shared()}, @@ -59,6 +60,7 @@ void StorageSystemMerges::fillData(MutableColumns & res_columns, ContextPtr cont res_columns[i++]->insert(merge.partition_id); res_columns[i++]->insert(merge.is_mutation); res_columns[i++]->insert(merge.total_size_bytes_compressed); + res_columns[i++]->insert(merge.total_size_bytes_uncompressed); res_columns[i++]->insert(merge.total_size_marks); res_columns[i++]->insert(merge.bytes_read_uncompressed); res_columns[i++]->insert(merge.rows_read); diff --git a/tests/queries/0_stateless/02117_show_create_table_system.reference b/tests/queries/0_stateless/02117_show_create_table_system.reference index 56330ea8bb9..3589fe4c632 100644 --- a/tests/queries/0_stateless/02117_show_create_table_system.reference +++ b/tests/queries/0_stateless/02117_show_create_table_system.reference @@ -361,6 +361,7 @@ CREATE TABLE system.merges `partition_id` String, `is_mutation` UInt8, `total_size_bytes_compressed` UInt64, + `total_size_bytes_uncompressed` UInt64, `total_size_marks` UInt64, `bytes_read_uncompressed` UInt64, `rows_read` UInt64, diff --git a/tests/queries/0_stateless/02725_memory-for-merges.reference b/tests/queries/0_stateless/02725_memory-for-merges.reference new file mode 100644 index 00000000000..d00491fd7e5 --- /dev/null +++ b/tests/queries/0_stateless/02725_memory-for-merges.reference @@ -0,0 +1 @@ +1 diff --git a/tests/queries/0_stateless/02725_memory-for-merges.sql b/tests/queries/0_stateless/02725_memory-for-merges.sql new file mode 100644 index 00000000000..b6ae7af7f1a --- /dev/null +++ b/tests/queries/0_stateless/02725_memory-for-merges.sql @@ -0,0 +1,27 @@ +-- Tags: no-s3-storage +-- We allocate a lot of memory for buffers when reading or writing to S3 + +DROP TABLE IF EXISTS 02725_memory_for_merges SYNC; + +CREATE TABLE 02725_memory_for_merges +( n UInt64, + s String +) +ENGINE = MergeTree +ORDER BY n +SETTINGS merge_max_block_size_bytes=1024, index_granularity_bytes=1024; + +INSERT INTO 02725_memory_for_merges SELECT number, randomPrintableASCII(1000000) FROM numbers(100); +INSERT INTO 02725_memory_for_merges SELECT number, randomPrintableASCII(1000000) FROM numbers(100); +INSERT INTO 02725_memory_for_merges SELECT number, randomPrintableASCII(1000000) FROM numbers(100); +INSERT INTO 02725_memory_for_merges SELECT number, randomPrintableASCII(1000000) FROM numbers(100); +INSERT INTO 02725_memory_for_merges SELECT number, randomPrintableASCII(1000000) FROM numbers(100); + +OPTIMIZE TABLE 02725_memory_for_merges FINAL; + +SYSTEM FLUSH LOGS; + +WITH (SELECT uuid FROM system.tables WHERE table='02725_memory_for_merges' and database=currentDatabase()) as uuid +SELECT sum(peak_memory_usage) < 1024 * 1024 * 200 from system.part_log where table_uuid=uuid and event_type='MergeParts'; + +DROP TABLE IF EXISTS 02725_memory_for_merges SYNC; From f6704205ff76571a5b17ee9fb01fe98ab999d297 Mon Sep 17 00:00:00 2001 From: Nikolay Degterinsky <43110995+evillique@users.noreply.github.com> Date: Tue, 2 May 2023 16:05:18 +0200 Subject: [PATCH 67/67] Update WithFileName.cpp --- src/IO/WithFileName.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/IO/WithFileName.cpp b/src/IO/WithFileName.cpp index ef4b5fed3b1..9d9f264c861 100644 --- a/src/IO/WithFileName.cpp +++ b/src/IO/WithFileName.cpp @@ -33,7 +33,7 @@ String getExceptionEntryWithFileName(const ReadBuffer & in) if (filename.empty()) return ""; - return fmt::format("; While reading from: {}", filename); + return fmt::format(": While reading from: {}", filename); } }