From 9246f258e82ca511adadad7f1160821e10b1002b Mon Sep 17 00:00:00 2001 From: millb Date: Mon, 30 Sep 2019 18:29:05 +0300 Subject: [PATCH 01/62] Created ability to configure allowed URL in config.xml without tests. --- dbms/programs/server/Server.cpp | 17 +++++++++++++++++ dbms/programs/server/config.xml | 19 +++++++++++++++++++ dbms/src/Common/ErrorCodes.cpp | 1 + dbms/src/Interpreters/Context.cpp | 26 +++++++++++++++++++++++++- dbms/src/Interpreters/Context.h | 6 ++++++ dbms/src/Storages/StorageURL.cpp | 25 +++++++++++++++++++++++++ dbms/src/Storages/StorageURL.h | 2 ++ 7 files changed, 95 insertions(+), 1 deletion(-) diff --git a/dbms/programs/server/Server.cpp b/dbms/programs/server/Server.cpp index 2f4e5039399..d50ce95affb 100644 --- a/dbms/programs/server/Server.cpp +++ b/dbms/programs/server/Server.cpp @@ -507,6 +507,23 @@ int Server::main(const std::vector & /*args*/) } global_context->setMarkCache(mark_cache_size); + if (config().has("remote_url_allow_hosts")) + { + std::vector keys; + config().keys("remote_url_allow_hosts", keys); + std::unordered_set primary_hosts; + std::vector regexp_hosts; + for (auto key : keys) + { + if (startsWith(key, "host_regexp")) + regexp_hosts.push_back(config().getString("remote_url_allow_hosts." + key)); + else if (startsWith(key, "host")) + primary_hosts.insert(config().getString("remote_url_allow_hosts." + key)); + } + global_context->setAllowedPrimaryUrlHosts(primary_hosts); + global_context->setAllowedRegexpUrlHosts(regexp_hosts); + } + #if USE_EMBEDDED_COMPILER size_t compiled_expression_cache_size = config().getUInt64("compiled_expression_cache_size", 500); if (compiled_expression_cache_size) diff --git a/dbms/programs/server/config.xml b/dbms/programs/server/config.xml index 34fe98b0e31..e97ad712040 100644 --- a/dbms/programs/server/config.xml +++ b/dbms/programs/server/config.xml @@ -3,6 +3,25 @@ NOTE: User and query level settings are set up in "users.xml" file. --> + + + + + + + trace diff --git a/dbms/src/Common/ErrorCodes.cpp b/dbms/src/Common/ErrorCodes.cpp index e2ee9720d68..49cb78003eb 100644 --- a/dbms/src/Common/ErrorCodes.cpp +++ b/dbms/src/Common/ErrorCodes.cpp @@ -457,6 +457,7 @@ namespace ErrorCodes extern const int UNKNOWN_PROTOCOL = 480; extern const int PATH_ACCESS_DENIED = 481; extern const int DICTIONARY_ACCESS_DENIED = 482; + extern const int UNACCEPTABLE_URL = 483; extern const int KEEPER_EXCEPTION = 999; extern const int POCO_EXCEPTION = 1000; diff --git a/dbms/src/Interpreters/Context.cpp b/dbms/src/Interpreters/Context.cpp index 086d060c171..f57e795142d 100644 --- a/dbms/src/Interpreters/Context.cpp +++ b/dbms/src/Interpreters/Context.cpp @@ -156,7 +156,8 @@ struct ContextShared std::optional system_logs; /// Used to log queries and operations on parts std::unique_ptr trace_collector; /// Thread collecting traces from threads executing queries - + std::unordered_set allowed_primary_url_hosts; /// Allowed primary () URL from config.xml + std::vector allowed_regexp_url_hosts; /// Allowed regexp () URL from config.xml /// Named sessions. The user could specify session identifier to reuse settings and temporary tables in subsequent requests. class SessionKeyHash @@ -539,6 +540,8 @@ String Context::getUserFilesPath() const return shared->user_files_path; } + + void Context::setPath(const String & path) { auto lock = getLock(); @@ -593,6 +596,27 @@ void Context::setUsersConfig(const ConfigurationPtr & config) shared->quotas.loadFromConfig(*shared->users_config); } + +std::unordered_set & Context::getAllowedPrimaryUrlHosts() const +{ + return shared->allowed_primary_url_hosts; +} + +void Context::setAllowedPrimaryUrlHosts(const std::unordered_set & url_set) +{ + shared->allowed_primary_url_hosts = url_set; +} + +std::vector & Context::getAllowedRegexpUrlHosts() const +{ + return shared->allowed_regexp_url_hosts; +} + +void Context::setAllowedRegexpUrlHosts(const std::vector &url_vec) +{ + shared->allowed_regexp_url_hosts = url_vec; +} + ConfigurationPtr Context::getUsersConfig() { auto lock = getLock(); diff --git a/dbms/src/Interpreters/Context.h b/dbms/src/Interpreters/Context.h index 9c001916347..a192afe468c 100644 --- a/dbms/src/Interpreters/Context.h +++ b/dbms/src/Interpreters/Context.h @@ -181,6 +181,12 @@ public: Context & operator=(const Context &); ~Context(); + /// allowed URL from config.xml + std::unordered_set & getAllowedPrimaryUrlHosts() const; + void setAllowedPrimaryUrlHosts(const std::unordered_set & url_set); + std::vector & getAllowedRegexpUrlHosts() const; + void setAllowedRegexpUrlHosts(const std::vector & url_vec); + String getPath() const; String getTemporaryPath() const; String getFlagsPath() const; diff --git a/dbms/src/Storages/StorageURL.cpp b/dbms/src/Storages/StorageURL.cpp index 4f3d41604f5..320e9ac2c0f 100644 --- a/dbms/src/Storages/StorageURL.cpp +++ b/dbms/src/Storages/StorageURL.cpp @@ -15,6 +15,9 @@ #include #include +#include + +#include <../../contrib/re2/re2/re2.h> namespace DB @@ -22,6 +25,7 @@ namespace DB namespace ErrorCodes { extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; + extern const int UNACCEPTABLE_URL; } IStorageURLBase::IStorageURLBase( @@ -34,6 +38,9 @@ IStorageURLBase::IStorageURLBase( const ConstraintsDescription & constraints_) : uri(uri_), context_global(context_), format_name(format_name_), table_name(table_name_), database_name(database_name_) { + if (!checkHostWhitelist(uri.getHost()) && + !checkHostWhitelist(uri.getHost() + ":" + Poco::NumberFormatter::format(uri.getPort()))) + throw Exception("Unacceptable URL.", ErrorCodes::UNACCEPTABLE_URL); setColumns(columns_); setConstraints(constraints_); } @@ -221,4 +228,22 @@ void registerStorageURL(StorageFactory & factory) return StorageURL::create(uri, args.database_name, args.table_name, format_name, args.columns, args.constraints, args.context); }); } + +bool IStorageURLBase::checkHostWhitelist(const std::string & host) { + const std::unordered_set primary_hosts = context_global.getAllowedPrimaryUrlHosts(); + const std::vector regexp_hosts = context_global.getAllowedRegexpUrlHosts(); + if (!(primary_hosts.empty() && regexp_hosts.empty())) + { + if (primary_hosts.find(host) == primary_hosts.end()) + { + bool flag = false; + for (size_t i = 0; i < regexp_hosts.size() && !flag; ++i) + if (re2::RE2::FullMatch(host, regexp_hosts[i])) + flag = true; + return flag; + } + return true; + } + return true; +} } diff --git a/dbms/src/Storages/StorageURL.h b/dbms/src/Storages/StorageURL.h index cdd78c7b60f..e7555ad503b 100644 --- a/dbms/src/Storages/StorageURL.h +++ b/dbms/src/Storages/StorageURL.h @@ -66,6 +66,8 @@ private: size_t max_block_size) const; virtual Block getHeaderBlock(const Names & column_names) const = 0; + + bool checkHostWhitelist(const std::string & host); ///return true if host allowed }; From 16645bfe53f3447ad9dc01932441d907da32b754 Mon Sep 17 00:00:00 2001 From: millb Date: Wed, 2 Oct 2019 19:33:50 +0300 Subject: [PATCH 02/62] Fixed StorageUrl.cpp --- dbms/src/Storages/StorageURL.cpp | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/dbms/src/Storages/StorageURL.cpp b/dbms/src/Storages/StorageURL.cpp index 320e9ac2c0f..3fa5d610c5a 100644 --- a/dbms/src/Storages/StorageURL.cpp +++ b/dbms/src/Storages/StorageURL.cpp @@ -236,11 +236,10 @@ bool IStorageURLBase::checkHostWhitelist(const std::string & host) { { if (primary_hosts.find(host) == primary_hosts.end()) { - bool flag = false; - for (size_t i = 0; i < regexp_hosts.size() && !flag; ++i) + for (size_t i = 0; i < regexp_hosts.size(); ++i) if (re2::RE2::FullMatch(host, regexp_hosts[i])) - flag = true; - return flag; + return true; + return false; } return true; } From 7caae87e74e427ebf70166ef3947fc518a6fbcff Mon Sep 17 00:00:00 2001 From: millb Date: Wed, 2 Oct 2019 20:17:09 +0300 Subject: [PATCH 03/62] Fixed StorageURL.cpp --- dbms/src/Storages/StorageURL.cpp | 27 ++++++++++++++++++++++++--- 1 file changed, 24 insertions(+), 3 deletions(-) diff --git a/dbms/src/Storages/StorageURL.cpp b/dbms/src/Storages/StorageURL.cpp index 3fa5d610c5a..609cfaf3aab 100644 --- a/dbms/src/Storages/StorageURL.cpp +++ b/dbms/src/Storages/StorageURL.cpp @@ -17,7 +17,7 @@ #include #include -#include <../../contrib/re2/re2/re2.h> +#include namespace DB @@ -40,7 +40,28 @@ IStorageURLBase::IStorageURLBase( { if (!checkHostWhitelist(uri.getHost()) && !checkHostWhitelist(uri.getHost() + ":" + Poco::NumberFormatter::format(uri.getPort()))) - throw Exception("Unacceptable URL.", ErrorCodes::UNACCEPTABLE_URL); + { + const std::unordered_set primary_hosts = context_global.getAllowedPrimaryUrlHosts(); + const std::vector regexp_hosts = context_global.getAllowedRegexpUrlHosts(); + std::string string_error = "\nUnacceptable URL. You can use "; + if (!primary_hosts.empty()) + { + string_error += "URL like\n\n"; + for (auto host : primary_hosts) + string_error += host + "\n"; + string_error += '\n'; + if (!regexp_hosts.empty()) + string_error += "Or "; + } + if (!regexp_hosts.empty()) + { + string_error += "URL that match the following regular expressions\n\n"; + for (auto reg_host : regexp_hosts) + string_error += reg_host + "\n"; + } + string_error += "\nIf you want to change this look at config.xml"; + throw Exception(string_error, ErrorCodes::UNACCEPTABLE_URL); + } setColumns(columns_); setConstraints(constraints_); } @@ -232,7 +253,7 @@ void registerStorageURL(StorageFactory & factory) bool IStorageURLBase::checkHostWhitelist(const std::string & host) { const std::unordered_set primary_hosts = context_global.getAllowedPrimaryUrlHosts(); const std::vector regexp_hosts = context_global.getAllowedRegexpUrlHosts(); - if (!(primary_hosts.empty() && regexp_hosts.empty())) + if (!primary_hosts.empty() || !regexp_hosts.empty()) { if (primary_hosts.find(host) == primary_hosts.end()) { From 5c272df15f90458df74e4b8261a87a41fec22ea2 Mon Sep 17 00:00:00 2001 From: millb Date: Sun, 6 Oct 2019 17:52:41 +0300 Subject: [PATCH 04/62] Add integration tests. --- .../test_allowed_url_from_config/__init__.py | 0 .../configs/config_with_hosts.xml | 7 +++ .../config_with_only_primary_hosts.xml | 10 ++++ .../configs/config_with_only_regexp_hosts.xml | 7 +++ .../configs/config_without_allowed_hosts.xml | 7 +++ .../config_without_block_of_allowed_hosts.xml | 3 ++ .../test_allowed_url_from_config/test.py | 53 +++++++++++++++++++ 7 files changed, 87 insertions(+) create mode 100644 dbms/tests/integration/test_allowed_url_from_config/__init__.py create mode 100644 dbms/tests/integration/test_allowed_url_from_config/configs/config_with_hosts.xml create mode 100644 dbms/tests/integration/test_allowed_url_from_config/configs/config_with_only_primary_hosts.xml create mode 100644 dbms/tests/integration/test_allowed_url_from_config/configs/config_with_only_regexp_hosts.xml create mode 100644 dbms/tests/integration/test_allowed_url_from_config/configs/config_without_allowed_hosts.xml create mode 100644 dbms/tests/integration/test_allowed_url_from_config/configs/config_without_block_of_allowed_hosts.xml create mode 100644 dbms/tests/integration/test_allowed_url_from_config/test.py diff --git a/dbms/tests/integration/test_allowed_url_from_config/__init__.py b/dbms/tests/integration/test_allowed_url_from_config/__init__.py new file mode 100644 index 00000000000..e69de29bb2d diff --git a/dbms/tests/integration/test_allowed_url_from_config/configs/config_with_hosts.xml b/dbms/tests/integration/test_allowed_url_from_config/configs/config_with_hosts.xml new file mode 100644 index 00000000000..e56ea12b6d0 --- /dev/null +++ b/dbms/tests/integration/test_allowed_url_from_config/configs/config_with_hosts.xml @@ -0,0 +1,7 @@ + + + host:80 + ^[a-z]*\.ru$ + + + diff --git a/dbms/tests/integration/test_allowed_url_from_config/configs/config_with_only_primary_hosts.xml b/dbms/tests/integration/test_allowed_url_from_config/configs/config_with_only_primary_hosts.xml new file mode 100644 index 00000000000..d5dd56f224d --- /dev/null +++ b/dbms/tests/integration/test_allowed_url_from_config/configs/config_with_only_primary_hosts.xml @@ -0,0 +1,10 @@ + + + + host:80 + host:123 + yandex.ru + + + + diff --git a/dbms/tests/integration/test_allowed_url_from_config/configs/config_with_only_regexp_hosts.xml b/dbms/tests/integration/test_allowed_url_from_config/configs/config_with_only_regexp_hosts.xml new file mode 100644 index 00000000000..5ff8263bec7 --- /dev/null +++ b/dbms/tests/integration/test_allowed_url_from_config/configs/config_with_only_regexp_hosts.xml @@ -0,0 +1,7 @@ + + + ^[a-z]*:80$ + ^[a-z]*\.ru$ + + + diff --git a/dbms/tests/integration/test_allowed_url_from_config/configs/config_without_allowed_hosts.xml b/dbms/tests/integration/test_allowed_url_from_config/configs/config_without_allowed_hosts.xml new file mode 100644 index 00000000000..d9c0bb90d2d --- /dev/null +++ b/dbms/tests/integration/test_allowed_url_from_config/configs/config_without_allowed_hosts.xml @@ -0,0 +1,7 @@ + + + + + + + diff --git a/dbms/tests/integration/test_allowed_url_from_config/configs/config_without_block_of_allowed_hosts.xml b/dbms/tests/integration/test_allowed_url_from_config/configs/config_without_block_of_allowed_hosts.xml new file mode 100644 index 00000000000..4b8b51c1db2 --- /dev/null +++ b/dbms/tests/integration/test_allowed_url_from_config/configs/config_without_block_of_allowed_hosts.xml @@ -0,0 +1,3 @@ + + + diff --git a/dbms/tests/integration/test_allowed_url_from_config/test.py b/dbms/tests/integration/test_allowed_url_from_config/test.py new file mode 100644 index 00000000000..d04c1d4ea43 --- /dev/null +++ b/dbms/tests/integration/test_allowed_url_from_config/test.py @@ -0,0 +1,53 @@ +import time +import pytest + +from helpers.cluster import ClickHouseCluster + +cluster = ClickHouseCluster(__file__) +node1 = cluster.add_instance('node1', main_configs=['configs/config_with_hosts.xml']) +node2 = cluster.add_instance('node2', main_configs=['configs/config_with_only_primary_hosts.xml']) +node3 = cluster.add_instance('node3', main_configs=['configs/config_with_only_regexp_hosts.xml']) +node4 = cluster.add_instance('node4', main_configs=['configs/config_without_allowed_hosts.xml']) +node5 = cluster.add_instance('node5', main_configs=['configs/config_without_block_of_allowed_hosts.xml']) + +@pytest.fixture(scope="module") +def start_cluster(): + try: + cluster.start() + yield cluster + finally: + cluster.shutdown() + +def test_config_with_hosts(start_cluster): + assert node1.query("CREATE TABLE table_test_1_1 (word String) Engine=URL('http://host:80', CSV)") == "" + assert node1.query("CREATE TABLE table_test_1_2 (word String) Engine=URL('https://yandex.ru', CSV)") == "" + assert "Unacceptable URL." in node1.query_and_get_error("CREATE TABLE table_test_1_4 (word String) Engine=URL('https://host:123', CSV)") + assert "Unacceptable URL." in node1.query_and_get_error("CREATE TABLE table_test_1_4 (word String) Engine=URL('https://yandex2.ru', CSV)") + +def test_config_with_only_primary_hosts(start_cluster): + assert node2.query("CREATE TABLE table_test_2_1 (word String) Engine=URL('https://host:80', CSV)") == "" + assert node2.query("CREATE TABLE table_test_2_2 (word String) Engine=URL('https://host:123', CSV)") == "" + assert node2.query("CREATE TABLE table_test_2_3 (word String) Engine=URL('https://yandex.ru', CSV)") == "" + assert node2.query("CREATE TABLE table_test_2_4 (word String) Engine=URL('https://yandex.ru:87', CSV)") == "" + assert "Unacceptable URL." in node2.query_and_get_error("CREATE TABLE table_test_2_5 (word String) Engine=URL('https://host', CSV)") + assert "Unacceptable URL." in node2.query_and_get_error("CREATE TABLE table_test_2_5 (word String) Engine=URL('https://host:234', CSV)") + assert "Unacceptable URL." in node2.query_and_get_error("CREATE TABLE table_test_2_6 (word String) Engine=URL('https://yandex2.ru', CSV)") + + +def test_config_with_only_regexp_hosts(start_cluster): + assert node3.query("CREATE TABLE table_test_3_1 (word String) Engine=URL('https://host:80', CSV)") == "" + assert node3.query("CREATE TABLE table_test_3_2 (word String) Engine=URL('https://yandex.ru', CSV)") == "" + assert "Unacceptable URL." in node1.query_and_get_error("CREATE TABLE table_test_1_4 (word String) Engine=URL('https://host', CSV)") + assert "Unacceptable URL." in node1.query_and_get_error("CREATE TABLE table_test_1_4 (word String) Engine=URL('https://yandex2.ru', CSV)") + +def test_config_without_allowed_hosts(start_cluster): + assert node4.query("CREATE TABLE table_test_4_1 (word String) Engine=URL('https://host:80', CSV)") == "" + assert node4.query("CREATE TABLE table_test_4_2 (word String) Engine=URL('https://host', CSV)") == "" + assert node4.query("CREATE TABLE table_test_4_3 (word String) Engine=URL('https://yandex.ru', CSV)") == "" + assert node4.query("CREATE TABLE table_test_4_4 (word String) Engine=URL('ftp://something.com', CSV)") == "" + +def test_config_without_block_of_allowed_hosts(start_cluster): + assert node5.query("CREATE TABLE table_test_5_1 (word String) Engine=URL('https://host:80', CSV)") == "" + assert node5.query("CREATE TABLE table_test_5_2 (word String) Engine=URL('https://host', CSV)") == "" + assert node5.query("CREATE TABLE table_test_5_3 (word String) Engine=URL('https://yandex.ru', CSV)") == "" + assert node5.query("CREATE TABLE table_test_5_4 (word String) Engine=URL('ftp://something.com', CSV)") == "" From 31a422fdff4164d4d01179544c649bb828b7926c Mon Sep 17 00:00:00 2001 From: millb Date: Sun, 6 Oct 2019 18:01:05 +0300 Subject: [PATCH 05/62] Fixed test codestyle --- .../configs/config_with_hosts.xml | 5 ++++- .../configs/config_with_only_regexp_hosts.xml | 2 ++ .../configs/config_without_allowed_hosts.xml | 5 ++--- dbms/tests/integration/test_allowed_url_from_config/test.py | 1 - 4 files changed, 8 insertions(+), 5 deletions(-) diff --git a/dbms/tests/integration/test_allowed_url_from_config/configs/config_with_hosts.xml b/dbms/tests/integration/test_allowed_url_from_config/configs/config_with_hosts.xml index e56ea12b6d0..c332d7ef4de 100644 --- a/dbms/tests/integration/test_allowed_url_from_config/configs/config_with_hosts.xml +++ b/dbms/tests/integration/test_allowed_url_from_config/configs/config_with_hosts.xml @@ -1,7 +1,10 @@ - + + host:80 + ^[a-z]*\.ru$ + diff --git a/dbms/tests/integration/test_allowed_url_from_config/configs/config_with_only_regexp_hosts.xml b/dbms/tests/integration/test_allowed_url_from_config/configs/config_with_only_regexp_hosts.xml index 5ff8263bec7..3f7bea8d35c 100644 --- a/dbms/tests/integration/test_allowed_url_from_config/configs/config_with_only_regexp_hosts.xml +++ b/dbms/tests/integration/test_allowed_url_from_config/configs/config_with_only_regexp_hosts.xml @@ -1,7 +1,9 @@ + ^[a-z]*:80$ ^[a-z]*\.ru$ + diff --git a/dbms/tests/integration/test_allowed_url_from_config/configs/config_without_allowed_hosts.xml b/dbms/tests/integration/test_allowed_url_from_config/configs/config_without_allowed_hosts.xml index d9c0bb90d2d..62d8d30d1e0 100644 --- a/dbms/tests/integration/test_allowed_url_from_config/configs/config_without_allowed_hosts.xml +++ b/dbms/tests/integration/test_allowed_url_from_config/configs/config_without_allowed_hosts.xml @@ -1,7 +1,6 @@ - - - + + diff --git a/dbms/tests/integration/test_allowed_url_from_config/test.py b/dbms/tests/integration/test_allowed_url_from_config/test.py index d04c1d4ea43..6d4381d80bb 100644 --- a/dbms/tests/integration/test_allowed_url_from_config/test.py +++ b/dbms/tests/integration/test_allowed_url_from_config/test.py @@ -33,7 +33,6 @@ def test_config_with_only_primary_hosts(start_cluster): assert "Unacceptable URL." in node2.query_and_get_error("CREATE TABLE table_test_2_5 (word String) Engine=URL('https://host:234', CSV)") assert "Unacceptable URL." in node2.query_and_get_error("CREATE TABLE table_test_2_6 (word String) Engine=URL('https://yandex2.ru', CSV)") - def test_config_with_only_regexp_hosts(start_cluster): assert node3.query("CREATE TABLE table_test_3_1 (word String) Engine=URL('https://host:80', CSV)") == "" assert node3.query("CREATE TABLE table_test_3_2 (word String) Engine=URL('https://yandex.ru', CSV)") == "" From 7ab1804ebb059a4261aeca32ea339b211d2c0259 Mon Sep 17 00:00:00 2001 From: millb Date: Sun, 6 Oct 2019 18:33:25 +0300 Subject: [PATCH 06/62] Fixed codestyle and changed Error message. --- dbms/src/Storages/StorageURL.cpp | 50 +++++++++++++++++--------------- dbms/src/Storages/StorageURL.h | 4 ++- 2 files changed, 30 insertions(+), 24 deletions(-) diff --git a/dbms/src/Storages/StorageURL.cpp b/dbms/src/Storages/StorageURL.cpp index 609cfaf3aab..aede71fdba0 100644 --- a/dbms/src/Storages/StorageURL.cpp +++ b/dbms/src/Storages/StorageURL.cpp @@ -38,29 +38,10 @@ IStorageURLBase::IStorageURLBase( const ConstraintsDescription & constraints_) : uri(uri_), context_global(context_), format_name(format_name_), table_name(table_name_), database_name(database_name_) { - if (!checkHostWhitelist(uri.getHost()) && - !checkHostWhitelist(uri.getHost() + ":" + Poco::NumberFormatter::format(uri.getPort()))) + if (!checkHost(uri.getHost()) && + !checkHost(uri.getHost() + ":" + Poco::NumberFormatter::format(uri.getPort()))) { - const std::unordered_set primary_hosts = context_global.getAllowedPrimaryUrlHosts(); - const std::vector regexp_hosts = context_global.getAllowedRegexpUrlHosts(); - std::string string_error = "\nUnacceptable URL. You can use "; - if (!primary_hosts.empty()) - { - string_error += "URL like\n\n"; - for (auto host : primary_hosts) - string_error += host + "\n"; - string_error += '\n'; - if (!regexp_hosts.empty()) - string_error += "Or "; - } - if (!regexp_hosts.empty()) - { - string_error += "URL that match the following regular expressions\n\n"; - for (auto reg_host : regexp_hosts) - string_error += reg_host + "\n"; - } - string_error += "\nIf you want to change this look at config.xml"; - throw Exception(string_error, ErrorCodes::UNACCEPTABLE_URL); + throwUnacceptableURLException(); } setColumns(columns_); setConstraints(constraints_); @@ -250,7 +231,8 @@ void registerStorageURL(StorageFactory & factory) }); } -bool IStorageURLBase::checkHostWhitelist(const std::string & host) { +bool IStorageURLBase::checkHost(const std::string & host) +{ const std::unordered_set primary_hosts = context_global.getAllowedPrimaryUrlHosts(); const std::vector regexp_hosts = context_global.getAllowedRegexpUrlHosts(); if (!primary_hosts.empty() || !regexp_hosts.empty()) @@ -266,4 +248,26 @@ bool IStorageURLBase::checkHostWhitelist(const std::string & host) { } return true; } + +void IStorageURLBase::throwUnacceptableURLException() +{ + const std::unordered_set primary_hosts = context_global.getAllowedPrimaryUrlHosts(); + const std::vector regexp_hosts = context_global.getAllowedRegexpUrlHosts(); + std::string string_error = "Unacceptable URL. You can use "; + if (!primary_hosts.empty()) + { + string_error += "URL like: "; + for (auto host : primary_hosts) + string_error += host + ", "; + if (!regexp_hosts.empty()) + string_error += "or "; + } + if (!regexp_hosts.empty()) + { + string_error += "URL that match the following regular expressions: "; + for (auto reg_host : regexp_hosts) + string_error += reg_host + ", "; + } + throw Exception(string_error.substr(0, string_error.size() - 2), ErrorCodes::UNACCEPTABLE_URL); +} } diff --git a/dbms/src/Storages/StorageURL.h b/dbms/src/Storages/StorageURL.h index e7555ad503b..6532b8b193e 100644 --- a/dbms/src/Storages/StorageURL.h +++ b/dbms/src/Storages/StorageURL.h @@ -67,7 +67,9 @@ private: virtual Block getHeaderBlock(const Names & column_names) const = 0; - bool checkHostWhitelist(const std::string & host); ///return true if host allowed + bool checkHost(const std::string & host); ///return true if host allowed + + void throwUnacceptableURLException(); }; From c5ba58f24347a0002ad67a10e0c5e560ade8b68f Mon Sep 17 00:00:00 2001 From: millb Date: Sun, 6 Oct 2019 18:49:51 +0300 Subject: [PATCH 07/62] Fixed tests --- dbms/tests/integration/test_allowed_url_from_config/test.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/dbms/tests/integration/test_allowed_url_from_config/test.py b/dbms/tests/integration/test_allowed_url_from_config/test.py index 6d4381d80bb..995fbc27789 100644 --- a/dbms/tests/integration/test_allowed_url_from_config/test.py +++ b/dbms/tests/integration/test_allowed_url_from_config/test.py @@ -36,8 +36,8 @@ def test_config_with_only_primary_hosts(start_cluster): def test_config_with_only_regexp_hosts(start_cluster): assert node3.query("CREATE TABLE table_test_3_1 (word String) Engine=URL('https://host:80', CSV)") == "" assert node3.query("CREATE TABLE table_test_3_2 (word String) Engine=URL('https://yandex.ru', CSV)") == "" - assert "Unacceptable URL." in node1.query_and_get_error("CREATE TABLE table_test_1_4 (word String) Engine=URL('https://host', CSV)") - assert "Unacceptable URL." in node1.query_and_get_error("CREATE TABLE table_test_1_4 (word String) Engine=URL('https://yandex2.ru', CSV)") + assert "Unacceptable URL." in node3.query_and_get_error("CREATE TABLE table_test_3_3 (word String) Engine=URL('https://host', CSV)") + assert "Unacceptable URL." in node3.query_and_get_error("CREATE TABLE table_test_3_4 (word String) Engine=URL('https://yandex2.ru', CSV)") def test_config_without_allowed_hosts(start_cluster): assert node4.query("CREATE TABLE table_test_4_1 (word String) Engine=URL('https://host:80', CSV)") == "" From fec1c6deba96078cb608e3e6cc27114912233e53 Mon Sep 17 00:00:00 2001 From: millb Date: Wed, 9 Oct 2019 14:16:14 +0300 Subject: [PATCH 08/62] Fixed Error message and error code --- dbms/src/Common/ErrorCodes.cpp | 2 +- dbms/src/Storages/StorageURL.cpp | 26 ++------------------------ dbms/src/Storages/StorageURL.h | 5 ++--- 3 files changed, 5 insertions(+), 28 deletions(-) diff --git a/dbms/src/Common/ErrorCodes.cpp b/dbms/src/Common/ErrorCodes.cpp index 49cb78003eb..7c342ddc102 100644 --- a/dbms/src/Common/ErrorCodes.cpp +++ b/dbms/src/Common/ErrorCodes.cpp @@ -457,7 +457,7 @@ namespace ErrorCodes extern const int UNKNOWN_PROTOCOL = 480; extern const int PATH_ACCESS_DENIED = 481; extern const int DICTIONARY_ACCESS_DENIED = 482; - extern const int UNACCEPTABLE_URL = 483; + extern const int UNACCEPTABLE_URL = 485; extern const int KEEPER_EXCEPTION = 999; extern const int POCO_EXCEPTION = 1000; diff --git a/dbms/src/Storages/StorageURL.cpp b/dbms/src/Storages/StorageURL.cpp index aede71fdba0..e42fdf1f0c8 100644 --- a/dbms/src/Storages/StorageURL.cpp +++ b/dbms/src/Storages/StorageURL.cpp @@ -39,9 +39,9 @@ IStorageURLBase::IStorageURLBase( : uri(uri_), context_global(context_), format_name(format_name_), table_name(table_name_), database_name(database_name_) { if (!checkHost(uri.getHost()) && - !checkHost(uri.getHost() + ":" + Poco::NumberFormatter::format(uri.getPort()))) + !checkHost(uri.getHost() + ":" + toString(uri.getPort()))) { - throwUnacceptableURLException(); + throw Exception("URL \"" + uri.toString() + "\" is not allowed in config.xml", ErrorCodes::UNACCEPTABLE_URL); } setColumns(columns_); setConstraints(constraints_); @@ -248,26 +248,4 @@ bool IStorageURLBase::checkHost(const std::string & host) } return true; } - -void IStorageURLBase::throwUnacceptableURLException() -{ - const std::unordered_set primary_hosts = context_global.getAllowedPrimaryUrlHosts(); - const std::vector regexp_hosts = context_global.getAllowedRegexpUrlHosts(); - std::string string_error = "Unacceptable URL. You can use "; - if (!primary_hosts.empty()) - { - string_error += "URL like: "; - for (auto host : primary_hosts) - string_error += host + ", "; - if (!regexp_hosts.empty()) - string_error += "or "; - } - if (!regexp_hosts.empty()) - { - string_error += "URL that match the following regular expressions: "; - for (auto reg_host : regexp_hosts) - string_error += reg_host + ", "; - } - throw Exception(string_error.substr(0, string_error.size() - 2), ErrorCodes::UNACCEPTABLE_URL); -} } diff --git a/dbms/src/Storages/StorageURL.h b/dbms/src/Storages/StorageURL.h index 6532b8b193e..43964e91273 100644 --- a/dbms/src/Storages/StorageURL.h +++ b/dbms/src/Storages/StorageURL.h @@ -67,9 +67,8 @@ private: virtual Block getHeaderBlock(const Names & column_names) const = 0; - bool checkHost(const std::string & host); ///return true if host allowed - - void throwUnacceptableURLException(); + /// Return true if host allowed + bool checkHost(const std::string & host); }; From a97739f67fcb92438aed1b5b8d809bb08336a0b4 Mon Sep 17 00:00:00 2001 From: millb Date: Wed, 9 Oct 2019 23:29:41 +0300 Subject: [PATCH 09/62] Created class StorageOfAllowedURL Created test for table function remote and remoteSecure Created check allowed url in remote and remoteSecure Fixed tests --- dbms/programs/server/Server.cpp | 19 +----- dbms/src/Common/StorageOfAllowedURL.cpp | 62 +++++++++++++++++ dbms/src/Common/StorageOfAllowedURL.h | 26 +++++++ dbms/src/Interpreters/Context.cpp | 37 ++++------ dbms/src/Interpreters/Context.h | 12 ++-- dbms/src/Storages/StorageURL.cpp | 27 +------- .../TableFunctions/TableFunctionRemote.cpp | 17 ++++- .../configs/config_for_remote.xml | 10 +++ .../test_allowed_url_from_config/test.py | 68 +++++++++++++++---- 9 files changed, 188 insertions(+), 90 deletions(-) create mode 100644 dbms/src/Common/StorageOfAllowedURL.cpp create mode 100644 dbms/src/Common/StorageOfAllowedURL.h create mode 100644 dbms/tests/integration/test_allowed_url_from_config/configs/config_for_remote.xml diff --git a/dbms/programs/server/Server.cpp b/dbms/programs/server/Server.cpp index d50ce95affb..56e0b5edba7 100644 --- a/dbms/programs/server/Server.cpp +++ b/dbms/programs/server/Server.cpp @@ -373,6 +373,8 @@ int Server::main(const std::vector & /*args*/) if (config().has("interserver_http_port") && config().has("interserver_https_port")) throw Exception("Both http and https interserver ports are specified", ErrorCodes::EXCESSIVE_ELEMENT_IN_CONFIG); + global_context->setStorageOfAllowedURL(config()); + static const auto interserver_tags = { std::make_tuple("interserver_http_host", "interserver_http_port", "http"), @@ -507,23 +509,6 @@ int Server::main(const std::vector & /*args*/) } global_context->setMarkCache(mark_cache_size); - if (config().has("remote_url_allow_hosts")) - { - std::vector keys; - config().keys("remote_url_allow_hosts", keys); - std::unordered_set primary_hosts; - std::vector regexp_hosts; - for (auto key : keys) - { - if (startsWith(key, "host_regexp")) - regexp_hosts.push_back(config().getString("remote_url_allow_hosts." + key)); - else if (startsWith(key, "host")) - primary_hosts.insert(config().getString("remote_url_allow_hosts." + key)); - } - global_context->setAllowedPrimaryUrlHosts(primary_hosts); - global_context->setAllowedRegexpUrlHosts(regexp_hosts); - } - #if USE_EMBEDDED_COMPILER size_t compiled_expression_cache_size = config().getUInt64("compiled_expression_cache_size", 500); if (compiled_expression_cache_size) diff --git a/dbms/src/Common/StorageOfAllowedURL.cpp b/dbms/src/Common/StorageOfAllowedURL.cpp new file mode 100644 index 00000000000..fb3c4fc60dc --- /dev/null +++ b/dbms/src/Common/StorageOfAllowedURL.cpp @@ -0,0 +1,62 @@ +#include +#include +#include +#include +#include +#include +#include +#include + +namespace DB +{ +namespace ErrorCodes +{ + extern const int UNACCEPTABLE_URL; +} + +void StorageOfAllowedURL::checkURL(const Poco::URI & uri) +{ + if (!checkString(uri.getHost()) && + !checkString(uri.getHost() + ":" + toString(uri.getPort()))) + throw Exception("URL \"" + uri.toString() + "\" is not allowed in config.xml", ErrorCodes::UNACCEPTABLE_URL); +} + +void StorageOfAllowedURL::checkHostAndPort(const std::string & host, const std::string & port) +{ + if (!checkString(host) && + !checkString(host + ":" + port)) + throw Exception("URL \"" + host + ":" + port + "\" is not allowed in config.xml", ErrorCodes::UNACCEPTABLE_URL); +} + +void StorageOfAllowedURL::setValuesFromConfig(const Poco::Util::AbstractConfiguration & config) +{ + if (config.has("remote_url_allow_hosts")) + { + std::vector keys; + config.keys("remote_url_allow_hosts", keys); + for (auto key : keys) + { + if (startsWith(key, "host_regexp")) + regexp_hosts.push_back(config.getString("remote_url_allow_hosts." + key)); + else if (startsWith(key, "host")) + primary_hosts.insert(config.getString("remote_url_allow_hosts." + key)); + } + } +} + +bool StorageOfAllowedURL::checkString(const std::string &host) +{ + if (!primary_hosts.empty() || !regexp_hosts.empty()) + { + if (primary_hosts.find(host) == primary_hosts.end()) + { + for (size_t i = 0; i < regexp_hosts.size(); ++i) + if (re2::RE2::FullMatch(host, regexp_hosts[i])) + return true; + return false; + } + return true; + } + return true; +} +} \ No newline at end of file diff --git a/dbms/src/Common/StorageOfAllowedURL.h b/dbms/src/Common/StorageOfAllowedURL.h new file mode 100644 index 00000000000..546998c6835 --- /dev/null +++ b/dbms/src/Common/StorageOfAllowedURL.h @@ -0,0 +1,26 @@ +#pragma once + +#include +#include +#include +#include + + +namespace DB +{ +class StorageOfAllowedURL +{ +public: + void checkURL(const Poco::URI &uri); /// If URL not allowed in config.xml throw UNACCEPTABLE_URL Exception + + void setValuesFromConfig(const Poco::Util::AbstractConfiguration &config); + + void checkHostAndPort(const std::string & host, const std::string & port); + +private: + std::unordered_set primary_hosts; /// Allowed primary () URL from config.xml + std::vector regexp_hosts; /// Allowed regexp () URL from config.xml + + bool checkString(const std::string &host); +}; +} \ No newline at end of file diff --git a/dbms/src/Interpreters/Context.cpp b/dbms/src/Interpreters/Context.cpp index f57e795142d..17a51d352a9 100644 --- a/dbms/src/Interpreters/Context.cpp +++ b/dbms/src/Interpreters/Context.cpp @@ -51,7 +51,7 @@ #include #include #include - +#include namespace ProfileEvents { @@ -155,9 +155,9 @@ struct ContextShared ActionLocksManagerPtr action_locks_manager; /// Set of storages' action lockers std::optional system_logs; /// Used to log queries and operations on parts + StorageOfAllowedURL storage_of_allowed_url; /// Allowed URL from config.xml + std::unique_ptr trace_collector; /// Thread collecting traces from threads executing queries - std::unordered_set allowed_primary_url_hosts; /// Allowed primary () URL from config.xml - std::vector allowed_regexp_url_hosts; /// Allowed regexp () URL from config.xml /// Named sessions. The user could specify session identifier to reuse settings and temporary tables in subsequent requests. class SessionKeyHash @@ -596,27 +596,6 @@ void Context::setUsersConfig(const ConfigurationPtr & config) shared->quotas.loadFromConfig(*shared->users_config); } - -std::unordered_set & Context::getAllowedPrimaryUrlHosts() const -{ - return shared->allowed_primary_url_hosts; -} - -void Context::setAllowedPrimaryUrlHosts(const std::unordered_set & url_set) -{ - shared->allowed_primary_url_hosts = url_set; -} - -std::vector & Context::getAllowedRegexpUrlHosts() const -{ - return shared->allowed_regexp_url_hosts; -} - -void Context::setAllowedRegexpUrlHosts(const std::vector &url_vec) -{ - shared->allowed_regexp_url_hosts = url_vec; -} - ConfigurationPtr Context::getUsersConfig() { auto lock = getLock(); @@ -1586,6 +1565,16 @@ String Context::getInterserverScheme() const return shared->interserver_scheme; } +void Context::setStorageOfAllowedURL(const Poco::Util::AbstractConfiguration & config) +{ + shared->storage_of_allowed_url.setValuesFromConfig(config); +} + +StorageOfAllowedURL & Context::getStorageOfAllowedURL() const +{ + return shared->storage_of_allowed_url; +} + UInt16 Context::getTCPPort() const { auto lock = getLock(); diff --git a/dbms/src/Interpreters/Context.h b/dbms/src/Interpreters/Context.h index a192afe468c..0a65ac587fe 100644 --- a/dbms/src/Interpreters/Context.h +++ b/dbms/src/Interpreters/Context.h @@ -22,6 +22,7 @@ #include #include #include +#include namespace Poco @@ -78,6 +79,7 @@ using ActionLocksManagerPtr = std::shared_ptr; class ShellCommand; class ICompressionCodec; class SettingsConstraints; +class StorageOfAllowedURL; class IOutputFormat; using OutputFormatPtr = std::shared_ptr; @@ -181,12 +183,6 @@ public: Context & operator=(const Context &); ~Context(); - /// allowed URL from config.xml - std::unordered_set & getAllowedPrimaryUrlHosts() const; - void setAllowedPrimaryUrlHosts(const std::unordered_set & url_set); - std::vector & getAllowedRegexpUrlHosts() const; - void setAllowedRegexpUrlHosts(const std::vector & url_vec); - String getPath() const; String getTemporaryPath() const; String getFlagsPath() const; @@ -354,6 +350,10 @@ public: void setInterserverScheme(const String & scheme); String getInterserverScheme() const; + /// Storage of allowed hosts from config.xml + void setStorageOfAllowedURL(const Poco::Util::AbstractConfiguration & config); + StorageOfAllowedURL & getStorageOfAllowedURL() const; + /// The port that the server listens for executing SQL queries. UInt16 getTCPPort() const; diff --git a/dbms/src/Storages/StorageURL.cpp b/dbms/src/Storages/StorageURL.cpp index e42fdf1f0c8..9349bd33c6e 100644 --- a/dbms/src/Storages/StorageURL.cpp +++ b/dbms/src/Storages/StorageURL.cpp @@ -15,9 +15,6 @@ #include #include -#include - -#include namespace DB @@ -38,11 +35,7 @@ IStorageURLBase::IStorageURLBase( const ConstraintsDescription & constraints_) : uri(uri_), context_global(context_), format_name(format_name_), table_name(table_name_), database_name(database_name_) { - if (!checkHost(uri.getHost()) && - !checkHost(uri.getHost() + ":" + toString(uri.getPort()))) - { - throw Exception("URL \"" + uri.toString() + "\" is not allowed in config.xml", ErrorCodes::UNACCEPTABLE_URL); - } + context_global.getStorageOfAllowedURL().checkURL(uri); setColumns(columns_); setConstraints(constraints_); } @@ -230,22 +223,4 @@ void registerStorageURL(StorageFactory & factory) return StorageURL::create(uri, args.database_name, args.table_name, format_name, args.columns, args.constraints, args.context); }); } - -bool IStorageURLBase::checkHost(const std::string & host) -{ - const std::unordered_set primary_hosts = context_global.getAllowedPrimaryUrlHosts(); - const std::vector regexp_hosts = context_global.getAllowedRegexpUrlHosts(); - if (!primary_hosts.empty() || !regexp_hosts.empty()) - { - if (primary_hosts.find(host) == primary_hosts.end()) - { - for (size_t i = 0; i < regexp_hosts.size(); ++i) - if (re2::RE2::FullMatch(host, regexp_hosts[i])) - return true; - return false; - } - return true; - } - return true; -} } diff --git a/dbms/src/TableFunctions/TableFunctionRemote.cpp b/dbms/src/TableFunctions/TableFunctionRemote.cpp index 9d0a8024c0e..68eadc4082f 100644 --- a/dbms/src/TableFunctions/TableFunctionRemote.cpp +++ b/dbms/src/TableFunctions/TableFunctionRemote.cpp @@ -141,8 +141,7 @@ StoragePtr TableFunctionRemote::executeImpl(const ASTPtr & ast_function, const C /// Use an existing cluster from the main config cluster = context.getCluster(cluster_name); } - else - { + else { /// Create new cluster from the scratch size_t max_addresses = context.getSettingsRef().table_function_remote_max_addresses; std::vector shards = parseRemoteDescription(cluster_description, 0, cluster_description.size(), ',', max_addresses); @@ -155,6 +154,20 @@ StoragePtr TableFunctionRemote::executeImpl(const ASTPtr & ast_function, const C throw Exception("Shard list is empty after parsing first argument", ErrorCodes::BAD_ARGUMENTS); auto maybe_secure_port = context.getTCPPortSecure(); + + /// Check host and port on affiliation allowed hosts. + for (auto hosts : names) + { + for (auto host : hosts) + { + size_t colon = host.find(':'); + if (colon == String::npos) + context.getStorageOfAllowedURL().checkHostAndPort(host, toString((secure ? (maybe_secure_port ? *maybe_secure_port : DBMS_DEFAULT_SECURE_PORT) : context.getTCPPort()))); + else + context.getStorageOfAllowedURL().checkHostAndPort(host.substr(0, colon), host.substr(colon + 1)); + } + } + cluster = std::make_shared(context.getSettings(), names, username, password, (secure ? (maybe_secure_port ? *maybe_secure_port : DBMS_DEFAULT_SECURE_PORT) : context.getTCPPort()), false, secure); } diff --git a/dbms/tests/integration/test_allowed_url_from_config/configs/config_for_remote.xml b/dbms/tests/integration/test_allowed_url_from_config/configs/config_for_remote.xml new file mode 100644 index 00000000000..b6074fc8a2d --- /dev/null +++ b/dbms/tests/integration/test_allowed_url_from_config/configs/config_for_remote.xml @@ -0,0 +1,10 @@ + + + localhost:9000 + localhost:9440 + example01-01-1 + example01-01-2 + example01-02-1 + example01-02-2 + + diff --git a/dbms/tests/integration/test_allowed_url_from_config/test.py b/dbms/tests/integration/test_allowed_url_from_config/test.py index 995fbc27789..2a3f00ebf1a 100644 --- a/dbms/tests/integration/test_allowed_url_from_config/test.py +++ b/dbms/tests/integration/test_allowed_url_from_config/test.py @@ -9,6 +9,7 @@ node2 = cluster.add_instance('node2', main_configs=['configs/config_with_only_pr node3 = cluster.add_instance('node3', main_configs=['configs/config_with_only_regexp_hosts.xml']) node4 = cluster.add_instance('node4', main_configs=['configs/config_without_allowed_hosts.xml']) node5 = cluster.add_instance('node5', main_configs=['configs/config_without_block_of_allowed_hosts.xml']) +node6 = cluster.add_instance('node6', main_configs=['configs/config_for_remote.xml']) @pytest.fixture(scope="module") def start_cluster(): @@ -19,34 +20,71 @@ def start_cluster(): cluster.shutdown() def test_config_with_hosts(start_cluster): - assert node1.query("CREATE TABLE table_test_1_1 (word String) Engine=URL('http://host:80', CSV)") == "" + assert node1.query("CREATE TABLE table_test_1_1 (word String) Engine=URL('http://host:80', HDFS)") == "" assert node1.query("CREATE TABLE table_test_1_2 (word String) Engine=URL('https://yandex.ru', CSV)") == "" - assert "Unacceptable URL." in node1.query_and_get_error("CREATE TABLE table_test_1_4 (word String) Engine=URL('https://host:123', CSV)") - assert "Unacceptable URL." in node1.query_and_get_error("CREATE TABLE table_test_1_4 (word String) Engine=URL('https://yandex2.ru', CSV)") + assert "not allowed" in node1.query_and_get_error("CREATE TABLE table_test_1_4 (word String) Engine=URL('https://host:123', S3)") + assert "not allowed" in node1.query_and_get_error("CREATE TABLE table_test_1_4 (word String) Engine=URL('https://yandex2.ru', CSV)") + assert "not allowed in config.xml" not in node1.query_and_get_error("SELECT * FROM remoteSecure('host:80', system, events)") + assert "not allowed in config.xml" not in node1.query_and_get_error("SELECT * FROM remote('yandex.ru', system, events)") + assert "not allowed" in node1.query_and_get_error("SELECT * FROM remoteSecure('host:123', system, events)") + assert "not allowed" in node1.query_and_get_error("SELECT * FROM remoteSecure('yandex2.ru', system, events)") + def test_config_with_only_primary_hosts(start_cluster): assert node2.query("CREATE TABLE table_test_2_1 (word String) Engine=URL('https://host:80', CSV)") == "" - assert node2.query("CREATE TABLE table_test_2_2 (word String) Engine=URL('https://host:123', CSV)") == "" + assert node2.query("CREATE TABLE table_test_2_2 (word String) Engine=URL('https://host:123', S3)") == "" assert node2.query("CREATE TABLE table_test_2_3 (word String) Engine=URL('https://yandex.ru', CSV)") == "" - assert node2.query("CREATE TABLE table_test_2_4 (word String) Engine=URL('https://yandex.ru:87', CSV)") == "" - assert "Unacceptable URL." in node2.query_and_get_error("CREATE TABLE table_test_2_5 (word String) Engine=URL('https://host', CSV)") - assert "Unacceptable URL." in node2.query_and_get_error("CREATE TABLE table_test_2_5 (word String) Engine=URL('https://host:234', CSV)") - assert "Unacceptable URL." in node2.query_and_get_error("CREATE TABLE table_test_2_6 (word String) Engine=URL('https://yandex2.ru', CSV)") + assert node2.query("CREATE TABLE table_test_2_4 (word String) Engine=URL('https://yandex.ru:87', HDFS)") == "" + assert "not allowed" in node2.query_and_get_error("CREATE TABLE table_test_2_5 (word String) Engine=URL('https://host', HDFS)") + assert "not allowed" in node2.query_and_get_error("CREATE TABLE table_test_2_5 (word String) Engine=URL('https://host:234', CSV)") + assert "not allowed" in node2.query_and_get_error("CREATE TABLE table_test_2_6 (word String) Engine=URL('https://yandex2.ru', S3)") + assert "not allowed in config.xml" not in node2.query_and_get_error("SELECT * FROM remoteSecure('host:80', system, events)") + assert "not allowed in config.xml" not in node2.query_and_get_error("SELECT * FROM remoteSecure('yandex.ru', system, events)") + assert "not allowed in config.xml" not in node2.query_and_get_error("SELECT * FROM remoteSecure('host:123', system, events)") + assert "not allowed in config.xml" not in node2.query_and_get_error("SELECT * FROM remoteSecure('yandex.ru:87', system, events)") + assert "not allowed" in node2.query_and_get_error("SELECT * FROM remote('host', system, events)") + assert "not allowed" in node2.query_and_get_error("SELECT * FROM remote('host:234', system, metrics)") + assert "not allowed" in node2.query_and_get_error("SELECT * FROM remoteSecure('yandex2.ru', system, events)") def test_config_with_only_regexp_hosts(start_cluster): - assert node3.query("CREATE TABLE table_test_3_1 (word String) Engine=URL('https://host:80', CSV)") == "" + assert node3.query("CREATE TABLE table_test_3_1 (word String) Engine=URL('https://host:80', HDFS)") == "" assert node3.query("CREATE TABLE table_test_3_2 (word String) Engine=URL('https://yandex.ru', CSV)") == "" - assert "Unacceptable URL." in node3.query_and_get_error("CREATE TABLE table_test_3_3 (word String) Engine=URL('https://host', CSV)") - assert "Unacceptable URL." in node3.query_and_get_error("CREATE TABLE table_test_3_4 (word String) Engine=URL('https://yandex2.ru', CSV)") + assert "not allowed" in node3.query_and_get_error("CREATE TABLE table_test_3_3 (word String) Engine=URL('https://host', CSV)") + assert "not allowed" in node3.query_and_get_error("CREATE TABLE table_test_3_4 (word String) Engine=URL('https://yandex2.ru', S3)") + assert "not allowed in config.xml" not in node3.query_and_get_error("SELECT * FROM remote('host:80', system, events)") + assert "not allowed in config.xml" not in node3.query_and_get_error("SELECT * FROM remoteSecure('yandex.ru', system, metrics)") + assert "not allowed" in node3.query_and_get_error("SELECT * FROM remote('host', system, events)") + assert "not allowed" in node3.query_and_get_error("SELECT * FROM remoteSecure('yandex2.ru', system, events)") + def test_config_without_allowed_hosts(start_cluster): assert node4.query("CREATE TABLE table_test_4_1 (word String) Engine=URL('https://host:80', CSV)") == "" - assert node4.query("CREATE TABLE table_test_4_2 (word String) Engine=URL('https://host', CSV)") == "" + assert node4.query("CREATE TABLE table_test_4_2 (word String) Engine=URL('https://host', HDFS)") == "" assert node4.query("CREATE TABLE table_test_4_3 (word String) Engine=URL('https://yandex.ru', CSV)") == "" - assert node4.query("CREATE TABLE table_test_4_4 (word String) Engine=URL('ftp://something.com', CSV)") == "" + assert node4.query("CREATE TABLE table_test_4_4 (word String) Engine=URL('ftp://something.com', S3)") == "" + assert "not allowed in config.xml" not in node4.query_and_get_error("SELECT * FROM remote('host:80', system, events)") + assert "not allowed in config.xml" not in node4.query_and_get_error("SELECT * FROM remoteSecure('something.com', system, metrics)") + assert "not allowed in config.xml" not in node4.query_and_get_error("SELECT * FROM remote('yandex.ru', system, numbers(100))") def test_config_without_block_of_allowed_hosts(start_cluster): - assert node5.query("CREATE TABLE table_test_5_1 (word String) Engine=URL('https://host:80', CSV)") == "" + assert node5.query("CREATE TABLE table_test_5_1 (word String) Engine=URL('https://host:80', HDFS)") == "" assert node5.query("CREATE TABLE table_test_5_2 (word String) Engine=URL('https://host', CSV)") == "" - assert node5.query("CREATE TABLE table_test_5_3 (word String) Engine=URL('https://yandex.ru', CSV)") == "" + assert node5.query("CREATE TABLE table_test_5_3 (word String) Engine=URL('https://yandex.ru', S3)") == "" assert node5.query("CREATE TABLE table_test_5_4 (word String) Engine=URL('ftp://something.com', CSV)") == "" + assert "not allowed in config.xml" not in node5.query_and_get_error("SELECT * FROM remoteSecure('host:80', system, events)") + assert "not allowed in config.xml" not in node5.query_and_get_error("SELECT * FROM remote('something.com', system, metrics)") + assert "not allowed in config.xml" not in node5.query_and_get_error("SELECT * FROM remote('yandex.ru', system, events)") + +def test_table_function_remote_on_config_for_remote(start_cluster): + assert node6.query("SELECT * FROM remote('localhost', system, events)") != "" + assert node6.query("SELECT * FROM remoteSecure('localhost', system, metrics)") != "" + assert "URL \"localhost:800\" is not allowed in config.xml" in node6.query_and_get_error("SELECT * FROM remoteSecure('localhost:800', system, events)") + assert "URL \"localhost:800\" is not allowed in config.xml" in node6.query_and_get_error("SELECT * FROM remote('localhost:800', system, metrics)") + assert "not allowed in config.xml" not in node6.query_and_get_error("SELECT * FROM remoteSecure('example01-01-1,example01-02-1', system, events)") + assert "not allowed in config.xml" not in node6.query_and_get_error("SELECT * FROM remote('example01-0{1,2}-1', system, events") + assert "not allowed in config.xml" not in node6.query_and_get_error("SELECT * FROM remoteSecure('example01-01-{1|2}', system, events)") + assert "not allowed in config.xml" not in node6.query_and_get_error("SELECT * FROM remote('example01-0{1,2}-{1|2}', system, events)") + assert "not allowed in config.xml" not in node6.query_and_get_error("SELECT * FROM remoteSecure('example01-{01..02}-{1|2}', system, events)") + assert "not allowed" in node6.query_and_get_error("SELECT * FROM remoteSecure('example01-01-1,example01-03-1', system, events)") + assert "not allowed" in node6.query_and_get_error("SELECT * FROM remote('example01-01-{1|3}', system, events)") + assert "not allowed" in node6.query_and_get_error("SELECT * FROM remoteSecure('example01-0{1,3}-1', system, metrics)") From 769ec9ea6c7ced627d5a557757be4f6dc39044d0 Mon Sep 17 00:00:00 2001 From: Mikhail Korotov <55493615+millb@users.noreply.github.com> Date: Thu, 10 Oct 2019 01:22:05 +0300 Subject: [PATCH 10/62] Update config.xml --- dbms/programs/server/config.xml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/dbms/programs/server/config.xml b/dbms/programs/server/config.xml index e97ad712040..c65a20d4735 100644 --- a/dbms/programs/server/config.xml +++ b/dbms/programs/server/config.xml @@ -9,9 +9,9 @@ From 56cdc03b0b395f05ce06df3202902a4303b38bf5 Mon Sep 17 00:00:00 2001 From: Mikhail Korotov <55493615+millb@users.noreply.github.com> Date: Thu, 10 Oct 2019 01:23:12 +0300 Subject: [PATCH 11/62] Update StorageOfAllowedURL.cpp --- dbms/src/Common/StorageOfAllowedURL.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dbms/src/Common/StorageOfAllowedURL.cpp b/dbms/src/Common/StorageOfAllowedURL.cpp index fb3c4fc60dc..4d57b67dcf2 100644 --- a/dbms/src/Common/StorageOfAllowedURL.cpp +++ b/dbms/src/Common/StorageOfAllowedURL.cpp @@ -59,4 +59,4 @@ bool StorageOfAllowedURL::checkString(const std::string &host) } return true; } -} \ No newline at end of file +} From af8b20f0af78d8b80ce165ee3bd40b1afe1a5dc5 Mon Sep 17 00:00:00 2001 From: Mikhail Korotov <55493615+millb@users.noreply.github.com> Date: Thu, 10 Oct 2019 01:23:31 +0300 Subject: [PATCH 12/62] Update StorageOfAllowedURL.h --- dbms/src/Common/StorageOfAllowedURL.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dbms/src/Common/StorageOfAllowedURL.h b/dbms/src/Common/StorageOfAllowedURL.h index 546998c6835..8a4639f4bd4 100644 --- a/dbms/src/Common/StorageOfAllowedURL.h +++ b/dbms/src/Common/StorageOfAllowedURL.h @@ -23,4 +23,4 @@ private: bool checkString(const std::string &host); }; -} \ No newline at end of file +} From 4710d2184cce53554e56635691319047fae20ed6 Mon Sep 17 00:00:00 2001 From: Mikhail Korotov <55493615+millb@users.noreply.github.com> Date: Thu, 10 Oct 2019 01:24:27 +0300 Subject: [PATCH 13/62] Update Context.cpp --- dbms/src/Interpreters/Context.cpp | 2 -- 1 file changed, 2 deletions(-) diff --git a/dbms/src/Interpreters/Context.cpp b/dbms/src/Interpreters/Context.cpp index 17a51d352a9..62b818b4e5c 100644 --- a/dbms/src/Interpreters/Context.cpp +++ b/dbms/src/Interpreters/Context.cpp @@ -540,8 +540,6 @@ String Context::getUserFilesPath() const return shared->user_files_path; } - - void Context::setPath(const String & path) { auto lock = getLock(); From b1c6fb571e450026f35069437ee112b7887e64c1 Mon Sep 17 00:00:00 2001 From: Mikhail Korotov <55493615+millb@users.noreply.github.com> Date: Thu, 10 Oct 2019 01:25:17 +0300 Subject: [PATCH 14/62] Update TableFunctionRemote.cpp --- dbms/src/TableFunctions/TableFunctionRemote.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/dbms/src/TableFunctions/TableFunctionRemote.cpp b/dbms/src/TableFunctions/TableFunctionRemote.cpp index 68eadc4082f..7296e4c5c49 100644 --- a/dbms/src/TableFunctions/TableFunctionRemote.cpp +++ b/dbms/src/TableFunctions/TableFunctionRemote.cpp @@ -141,7 +141,8 @@ StoragePtr TableFunctionRemote::executeImpl(const ASTPtr & ast_function, const C /// Use an existing cluster from the main config cluster = context.getCluster(cluster_name); } - else { + else + { /// Create new cluster from the scratch size_t max_addresses = context.getSettingsRef().table_function_remote_max_addresses; std::vector shards = parseRemoteDescription(cluster_description, 0, cluster_description.size(), ',', max_addresses); From 8d675ad2b43720f4e5390b75048755f57fd1620a Mon Sep 17 00:00:00 2001 From: Mikhail Korotov <55493615+millb@users.noreply.github.com> Date: Thu, 10 Oct 2019 01:25:39 +0300 Subject: [PATCH 15/62] Update TableFunctionRemote.cpp --- dbms/src/TableFunctions/TableFunctionRemote.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dbms/src/TableFunctions/TableFunctionRemote.cpp b/dbms/src/TableFunctions/TableFunctionRemote.cpp index 7296e4c5c49..18f3658e483 100644 --- a/dbms/src/TableFunctions/TableFunctionRemote.cpp +++ b/dbms/src/TableFunctions/TableFunctionRemote.cpp @@ -141,7 +141,7 @@ StoragePtr TableFunctionRemote::executeImpl(const ASTPtr & ast_function, const C /// Use an existing cluster from the main config cluster = context.getCluster(cluster_name); } - else + else { /// Create new cluster from the scratch size_t max_addresses = context.getSettingsRef().table_function_remote_max_addresses; From a00089f0b46ced3776e57dbef126f54048f3109a Mon Sep 17 00:00:00 2001 From: millb Date: Thu, 10 Oct 2019 15:58:06 +0300 Subject: [PATCH 16/62] Renamed StorageOfAllowedURL -> RemoteHostFilter --- dbms/programs/server/Server.cpp | 4 ++-- ...{StorageOfAllowedURL.cpp => RemoteHostFilter.cpp} | 10 +++++----- .../{StorageOfAllowedURL.h => RemoteHostFilter.h} | 2 +- dbms/src/Interpreters/Context.cpp | 12 ++++++------ dbms/src/Interpreters/Context.h | 8 ++++---- dbms/src/Storages/StorageURL.cpp | 2 +- dbms/src/TableFunctions/TableFunctionRemote.cpp | 4 ++-- 7 files changed, 21 insertions(+), 21 deletions(-) rename dbms/src/Common/{StorageOfAllowedURL.cpp => RemoteHostFilter.cpp} (81%) rename dbms/src/Common/{StorageOfAllowedURL.h => RemoteHostFilter.h} (96%) diff --git a/dbms/programs/server/Server.cpp b/dbms/programs/server/Server.cpp index 56e0b5edba7..8be6c060c19 100644 --- a/dbms/programs/server/Server.cpp +++ b/dbms/programs/server/Server.cpp @@ -242,6 +242,8 @@ int Server::main(const std::vector & /*args*/) } #endif + global_context->setRemoteHostFilter(config()); + std::string path = getCanonicalPath(config().getString("path", DBMS_DEFAULT_PATH)); std::string default_database = config().getString("default_database", "default"); @@ -373,8 +375,6 @@ int Server::main(const std::vector & /*args*/) if (config().has("interserver_http_port") && config().has("interserver_https_port")) throw Exception("Both http and https interserver ports are specified", ErrorCodes::EXCESSIVE_ELEMENT_IN_CONFIG); - global_context->setStorageOfAllowedURL(config()); - static const auto interserver_tags = { std::make_tuple("interserver_http_host", "interserver_http_port", "http"), diff --git a/dbms/src/Common/StorageOfAllowedURL.cpp b/dbms/src/Common/RemoteHostFilter.cpp similarity index 81% rename from dbms/src/Common/StorageOfAllowedURL.cpp rename to dbms/src/Common/RemoteHostFilter.cpp index fb3c4fc60dc..a70b30d5648 100644 --- a/dbms/src/Common/StorageOfAllowedURL.cpp +++ b/dbms/src/Common/RemoteHostFilter.cpp @@ -1,5 +1,5 @@ #include -#include +#include #include #include #include @@ -14,21 +14,21 @@ namespace ErrorCodes extern const int UNACCEPTABLE_URL; } -void StorageOfAllowedURL::checkURL(const Poco::URI & uri) +void RemoteHostFilter::checkURL(const Poco::URI & uri) { if (!checkString(uri.getHost()) && !checkString(uri.getHost() + ":" + toString(uri.getPort()))) throw Exception("URL \"" + uri.toString() + "\" is not allowed in config.xml", ErrorCodes::UNACCEPTABLE_URL); } -void StorageOfAllowedURL::checkHostAndPort(const std::string & host, const std::string & port) +void RemoteHostFilter::checkHostAndPort(const std::string & host, const std::string & port) { if (!checkString(host) && !checkString(host + ":" + port)) throw Exception("URL \"" + host + ":" + port + "\" is not allowed in config.xml", ErrorCodes::UNACCEPTABLE_URL); } -void StorageOfAllowedURL::setValuesFromConfig(const Poco::Util::AbstractConfiguration & config) +void RemoteHostFilter::setValuesFromConfig(const Poco::Util::AbstractConfiguration & config) { if (config.has("remote_url_allow_hosts")) { @@ -44,7 +44,7 @@ void StorageOfAllowedURL::setValuesFromConfig(const Poco::Util::AbstractConfigur } } -bool StorageOfAllowedURL::checkString(const std::string &host) +bool RemoteHostFilter::checkString(const std::string &host) { if (!primary_hosts.empty() || !regexp_hosts.empty()) { diff --git a/dbms/src/Common/StorageOfAllowedURL.h b/dbms/src/Common/RemoteHostFilter.h similarity index 96% rename from dbms/src/Common/StorageOfAllowedURL.h rename to dbms/src/Common/RemoteHostFilter.h index 546998c6835..9efd699685f 100644 --- a/dbms/src/Common/StorageOfAllowedURL.h +++ b/dbms/src/Common/RemoteHostFilter.h @@ -8,7 +8,7 @@ namespace DB { -class StorageOfAllowedURL +class RemoteHostFilter { public: void checkURL(const Poco::URI &uri); /// If URL not allowed in config.xml throw UNACCEPTABLE_URL Exception diff --git a/dbms/src/Interpreters/Context.cpp b/dbms/src/Interpreters/Context.cpp index 17a51d352a9..4a87141a687 100644 --- a/dbms/src/Interpreters/Context.cpp +++ b/dbms/src/Interpreters/Context.cpp @@ -51,7 +51,7 @@ #include #include #include -#include +#include namespace ProfileEvents { @@ -155,7 +155,7 @@ struct ContextShared ActionLocksManagerPtr action_locks_manager; /// Set of storages' action lockers std::optional system_logs; /// Used to log queries and operations on parts - StorageOfAllowedURL storage_of_allowed_url; /// Allowed URL from config.xml + RemoteHostFilter remote_host_filter; /// Allowed URL from config.xml std::unique_ptr trace_collector; /// Thread collecting traces from threads executing queries /// Named sessions. The user could specify session identifier to reuse settings and temporary tables in subsequent requests. @@ -1565,14 +1565,14 @@ String Context::getInterserverScheme() const return shared->interserver_scheme; } -void Context::setStorageOfAllowedURL(const Poco::Util::AbstractConfiguration & config) +void Context::setRemoteHostFilter(const Poco::Util::AbstractConfiguration & config) { - shared->storage_of_allowed_url.setValuesFromConfig(config); + shared->remote_host_filter.setValuesFromConfig(config); } -StorageOfAllowedURL & Context::getStorageOfAllowedURL() const +RemoteHostFilter & Context::getRemoteHostFilter() const { - return shared->storage_of_allowed_url; + return shared->remote_host_filter; } UInt16 Context::getTCPPort() const diff --git a/dbms/src/Interpreters/Context.h b/dbms/src/Interpreters/Context.h index 0a65ac587fe..8f3107ef9c0 100644 --- a/dbms/src/Interpreters/Context.h +++ b/dbms/src/Interpreters/Context.h @@ -22,7 +22,7 @@ #include #include #include -#include +#include namespace Poco @@ -79,7 +79,7 @@ using ActionLocksManagerPtr = std::shared_ptr; class ShellCommand; class ICompressionCodec; class SettingsConstraints; -class StorageOfAllowedURL; +class RemoteHostFilter; class IOutputFormat; using OutputFormatPtr = std::shared_ptr; @@ -351,8 +351,8 @@ public: String getInterserverScheme() const; /// Storage of allowed hosts from config.xml - void setStorageOfAllowedURL(const Poco::Util::AbstractConfiguration & config); - StorageOfAllowedURL & getStorageOfAllowedURL() const; + void setRemoteHostFilter(const Poco::Util::AbstractConfiguration & config); + RemoteHostFilter & getRemoteHostFilter() const; /// The port that the server listens for executing SQL queries. UInt16 getTCPPort() const; diff --git a/dbms/src/Storages/StorageURL.cpp b/dbms/src/Storages/StorageURL.cpp index 9349bd33c6e..5ca1170cf21 100644 --- a/dbms/src/Storages/StorageURL.cpp +++ b/dbms/src/Storages/StorageURL.cpp @@ -35,7 +35,7 @@ IStorageURLBase::IStorageURLBase( const ConstraintsDescription & constraints_) : uri(uri_), context_global(context_), format_name(format_name_), table_name(table_name_), database_name(database_name_) { - context_global.getStorageOfAllowedURL().checkURL(uri); + context_global.getRemoteHostFilter().checkURL(uri); setColumns(columns_); setConstraints(constraints_); } diff --git a/dbms/src/TableFunctions/TableFunctionRemote.cpp b/dbms/src/TableFunctions/TableFunctionRemote.cpp index 68eadc4082f..26117303733 100644 --- a/dbms/src/TableFunctions/TableFunctionRemote.cpp +++ b/dbms/src/TableFunctions/TableFunctionRemote.cpp @@ -162,9 +162,9 @@ StoragePtr TableFunctionRemote::executeImpl(const ASTPtr & ast_function, const C { size_t colon = host.find(':'); if (colon == String::npos) - context.getStorageOfAllowedURL().checkHostAndPort(host, toString((secure ? (maybe_secure_port ? *maybe_secure_port : DBMS_DEFAULT_SECURE_PORT) : context.getTCPPort()))); + context.getRemoteHostFilter().checkHostAndPort(host, toString((secure ? (maybe_secure_port ? *maybe_secure_port : DBMS_DEFAULT_SECURE_PORT) : context.getTCPPort()))); else - context.getStorageOfAllowedURL().checkHostAndPort(host.substr(0, colon), host.substr(colon + 1)); + context.getRemoteHostFilter().checkHostAndPort(host.substr(0, colon), host.substr(colon + 1)); } } From 7f73af0cc979072b65f7ff986037437f727ca6f0 Mon Sep 17 00:00:00 2001 From: Mikhail Korotov <55493615+millb@users.noreply.github.com> Date: Thu, 10 Oct 2019 16:13:33 +0300 Subject: [PATCH 17/62] Delete StorageOfAllowedURL.cpp --- dbms/src/Common/StorageOfAllowedURL.cpp | 62 ------------------------- 1 file changed, 62 deletions(-) delete mode 100644 dbms/src/Common/StorageOfAllowedURL.cpp diff --git a/dbms/src/Common/StorageOfAllowedURL.cpp b/dbms/src/Common/StorageOfAllowedURL.cpp deleted file mode 100644 index 4d57b67dcf2..00000000000 --- a/dbms/src/Common/StorageOfAllowedURL.cpp +++ /dev/null @@ -1,62 +0,0 @@ -#include -#include -#include -#include -#include -#include -#include -#include - -namespace DB -{ -namespace ErrorCodes -{ - extern const int UNACCEPTABLE_URL; -} - -void StorageOfAllowedURL::checkURL(const Poco::URI & uri) -{ - if (!checkString(uri.getHost()) && - !checkString(uri.getHost() + ":" + toString(uri.getPort()))) - throw Exception("URL \"" + uri.toString() + "\" is not allowed in config.xml", ErrorCodes::UNACCEPTABLE_URL); -} - -void StorageOfAllowedURL::checkHostAndPort(const std::string & host, const std::string & port) -{ - if (!checkString(host) && - !checkString(host + ":" + port)) - throw Exception("URL \"" + host + ":" + port + "\" is not allowed in config.xml", ErrorCodes::UNACCEPTABLE_URL); -} - -void StorageOfAllowedURL::setValuesFromConfig(const Poco::Util::AbstractConfiguration & config) -{ - if (config.has("remote_url_allow_hosts")) - { - std::vector keys; - config.keys("remote_url_allow_hosts", keys); - for (auto key : keys) - { - if (startsWith(key, "host_regexp")) - regexp_hosts.push_back(config.getString("remote_url_allow_hosts." + key)); - else if (startsWith(key, "host")) - primary_hosts.insert(config.getString("remote_url_allow_hosts." + key)); - } - } -} - -bool StorageOfAllowedURL::checkString(const std::string &host) -{ - if (!primary_hosts.empty() || !regexp_hosts.empty()) - { - if (primary_hosts.find(host) == primary_hosts.end()) - { - for (size_t i = 0; i < regexp_hosts.size(); ++i) - if (re2::RE2::FullMatch(host, regexp_hosts[i])) - return true; - return false; - } - return true; - } - return true; -} -} From 70234975d11b5f36ff2264c28c40230912d08036 Mon Sep 17 00:00:00 2001 From: Mikhail Korotov <55493615+millb@users.noreply.github.com> Date: Thu, 10 Oct 2019 16:13:44 +0300 Subject: [PATCH 18/62] Delete StorageOfAllowedURL.h --- dbms/src/Common/StorageOfAllowedURL.h | 26 -------------------------- 1 file changed, 26 deletions(-) delete mode 100644 dbms/src/Common/StorageOfAllowedURL.h diff --git a/dbms/src/Common/StorageOfAllowedURL.h b/dbms/src/Common/StorageOfAllowedURL.h deleted file mode 100644 index 8a4639f4bd4..00000000000 --- a/dbms/src/Common/StorageOfAllowedURL.h +++ /dev/null @@ -1,26 +0,0 @@ -#pragma once - -#include -#include -#include -#include - - -namespace DB -{ -class StorageOfAllowedURL -{ -public: - void checkURL(const Poco::URI &uri); /// If URL not allowed in config.xml throw UNACCEPTABLE_URL Exception - - void setValuesFromConfig(const Poco::Util::AbstractConfiguration &config); - - void checkHostAndPort(const std::string & host, const std::string & port); - -private: - std::unordered_set primary_hosts; /// Allowed primary () URL from config.xml - std::vector regexp_hosts; /// Allowed regexp () URL from config.xml - - bool checkString(const std::string &host); -}; -} From 375866936d6f4dd012240263147d207b2a9e1c0b Mon Sep 17 00:00:00 2001 From: Mikhail Korotov <55493615+millb@users.noreply.github.com> Date: Thu, 10 Oct 2019 16:53:26 +0300 Subject: [PATCH 19/62] Update RemoteHostFilter.h --- dbms/src/Common/RemoteHostFilter.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dbms/src/Common/RemoteHostFilter.h b/dbms/src/Common/RemoteHostFilter.h index 9efd699685f..be9b55dc888 100644 --- a/dbms/src/Common/RemoteHostFilter.h +++ b/dbms/src/Common/RemoteHostFilter.h @@ -23,4 +23,4 @@ private: bool checkString(const std::string &host); }; -} \ No newline at end of file +} From 7b62fc68cf413b91b3a9d5d4a131acd390c9b1e1 Mon Sep 17 00:00:00 2001 From: Mikhail Korotov <55493615+millb@users.noreply.github.com> Date: Thu, 10 Oct 2019 16:53:52 +0300 Subject: [PATCH 20/62] Update RemoteHostFilter.cpp --- dbms/src/Common/RemoteHostFilter.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dbms/src/Common/RemoteHostFilter.cpp b/dbms/src/Common/RemoteHostFilter.cpp index a70b30d5648..c6e9495230f 100644 --- a/dbms/src/Common/RemoteHostFilter.cpp +++ b/dbms/src/Common/RemoteHostFilter.cpp @@ -59,4 +59,4 @@ bool RemoteHostFilter::checkString(const std::string &host) } return true; } -} \ No newline at end of file +} From e6d0c2a66c4c6ffc3719a9cdb4fa397653b0c8f5 Mon Sep 17 00:00:00 2001 From: millb Date: Mon, 21 Oct 2019 12:42:28 +0300 Subject: [PATCH 21/62] Redirect support added --- dbms/src/Storages/StorageURL.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/dbms/src/Storages/StorageURL.cpp b/dbms/src/Storages/StorageURL.cpp index dda2e2959a5..cc409230373 100644 --- a/dbms/src/Storages/StorageURL.cpp +++ b/dbms/src/Storages/StorageURL.cpp @@ -56,6 +56,7 @@ namespace const ConnectionTimeouts & timeouts) : name(name_) { + context.getRemoteHostFilter().checkURL(uri); read_buf = std::make_unique(uri, method, callback, timeouts, context.getSettingsRef().max_http_get_redirects); reader = FormatFactory::instance().getInput(format, *read_buf, sample_block, context, max_block_size); } From 52d7e9a6679920d2e1349ea5fb1c6ca202df6c7e Mon Sep 17 00:00:00 2001 From: millb Date: Mon, 21 Oct 2019 17:36:24 +0300 Subject: [PATCH 22/62] Support redirect improvement --- dbms/src/Common/RemoteHostFilter.cpp | 6 +++--- dbms/src/Common/RemoteHostFilter.h | 8 +++++--- dbms/src/Interpreters/Context.cpp | 2 +- dbms/src/Interpreters/Context.h | 2 +- dbms/src/Storages/StorageURL.cpp | 6 +++--- dbms/src/Storages/StorageURL.h | 3 --- 6 files changed, 13 insertions(+), 14 deletions(-) diff --git a/dbms/src/Common/RemoteHostFilter.cpp b/dbms/src/Common/RemoteHostFilter.cpp index c6e9495230f..2b0b2245c6d 100644 --- a/dbms/src/Common/RemoteHostFilter.cpp +++ b/dbms/src/Common/RemoteHostFilter.cpp @@ -14,14 +14,14 @@ namespace ErrorCodes extern const int UNACCEPTABLE_URL; } -void RemoteHostFilter::checkURL(const Poco::URI & uri) +void RemoteHostFilter::checkURL(const Poco::URI & uri) const { if (!checkString(uri.getHost()) && !checkString(uri.getHost() + ":" + toString(uri.getPort()))) throw Exception("URL \"" + uri.toString() + "\" is not allowed in config.xml", ErrorCodes::UNACCEPTABLE_URL); } -void RemoteHostFilter::checkHostAndPort(const std::string & host, const std::string & port) +void RemoteHostFilter::checkHostAndPort(const std::string & host, const std::string & port) const { if (!checkString(host) && !checkString(host + ":" + port)) @@ -44,7 +44,7 @@ void RemoteHostFilter::setValuesFromConfig(const Poco::Util::AbstractConfigurati } } -bool RemoteHostFilter::checkString(const std::string &host) +bool RemoteHostFilter::checkString(const std::string &host) const { if (!primary_hosts.empty() || !regexp_hosts.empty()) { diff --git a/dbms/src/Common/RemoteHostFilter.h b/dbms/src/Common/RemoteHostFilter.h index be9b55dc888..859b92a2985 100644 --- a/dbms/src/Common/RemoteHostFilter.h +++ b/dbms/src/Common/RemoteHostFilter.h @@ -11,16 +11,18 @@ namespace DB class RemoteHostFilter { public: - void checkURL(const Poco::URI &uri); /// If URL not allowed in config.xml throw UNACCEPTABLE_URL Exception + void checkURL(const Poco::URI &uri) const; /// If URL not allowed in config.xml throw UNACCEPTABLE_URL Exception void setValuesFromConfig(const Poco::Util::AbstractConfiguration &config); - void checkHostAndPort(const std::string & host, const std::string & port); + void checkHostAndPort(const std::string & host, const std::string & port) const; + + RemoteHostFilter() {} private: std::unordered_set primary_hosts; /// Allowed primary () URL from config.xml std::vector regexp_hosts; /// Allowed regexp () URL from config.xml - bool checkString(const std::string &host); + bool checkString(const std::string &host) const; }; } diff --git a/dbms/src/Interpreters/Context.cpp b/dbms/src/Interpreters/Context.cpp index 0159b90ceed..b6f35c80252 100644 --- a/dbms/src/Interpreters/Context.cpp +++ b/dbms/src/Interpreters/Context.cpp @@ -1562,7 +1562,7 @@ void Context::setRemoteHostFilter(const Poco::Util::AbstractConfiguration & conf shared->remote_host_filter.setValuesFromConfig(config); } -RemoteHostFilter & Context::getRemoteHostFilter() const +const RemoteHostFilter & Context::getRemoteHostFilter() const { return shared->remote_host_filter; } diff --git a/dbms/src/Interpreters/Context.h b/dbms/src/Interpreters/Context.h index ebfe8a77f80..9bd1f564f6c 100644 --- a/dbms/src/Interpreters/Context.h +++ b/dbms/src/Interpreters/Context.h @@ -350,7 +350,7 @@ public: /// Storage of allowed hosts from config.xml void setRemoteHostFilter(const Poco::Util::AbstractConfiguration & config); - RemoteHostFilter & getRemoteHostFilter() const; + const RemoteHostFilter & getRemoteHostFilter() const; /// The port that the server listens for executing SQL queries. UInt16 getTCPPort() const; diff --git a/dbms/src/Storages/StorageURL.cpp b/dbms/src/Storages/StorageURL.cpp index cc409230373..3fbf8d7fb63 100644 --- a/dbms/src/Storages/StorageURL.cpp +++ b/dbms/src/Storages/StorageURL.cpp @@ -5,7 +5,7 @@ #include #include -#include +#include #include #include @@ -57,7 +57,7 @@ namespace : name(name_) { context.getRemoteHostFilter().checkURL(uri); - read_buf = std::make_unique(uri, method, callback, timeouts, context.getSettingsRef().max_http_get_redirects); + read_buf = std::make_unique(uri, method, callback, timeouts, context.getSettingsRef().max_http_get_redirects, context.getRemoteHostFilter()); reader = FormatFactory::instance().getInput(format, *read_buf, sample_block, context, max_block_size); } @@ -88,7 +88,7 @@ namespace private: String name; - std::unique_ptr read_buf; + std::unique_ptr read_buf; BlockInputStreamPtr reader; }; diff --git a/dbms/src/Storages/StorageURL.h b/dbms/src/Storages/StorageURL.h index 43964e91273..cdd78c7b60f 100644 --- a/dbms/src/Storages/StorageURL.h +++ b/dbms/src/Storages/StorageURL.h @@ -66,9 +66,6 @@ private: size_t max_block_size) const; virtual Block getHeaderBlock(const Names & column_names) const = 0; - - /// Return true if host allowed - bool checkHost(const std::string & host); }; From 8b8ca9a141fbe5459a1a41053e42da3f5b26d8e8 Mon Sep 17 00:00:00 2001 From: millb Date: Mon, 21 Oct 2019 17:36:24 +0300 Subject: [PATCH 23/62] Support redirect improvement --- dbms/src/Common/RemoteHostFilter.cpp | 6 +- dbms/src/Common/RemoteHostFilter.h | 8 +- .../ReadWriteBufferFromHTTPWithHostFilter.cpp | 1 + .../ReadWriteBufferFromHTTPWithHostFilter.h | 80 +++++++++++++++++++ dbms/src/Interpreters/Context.cpp | 2 +- dbms/src/Interpreters/Context.h | 2 +- dbms/src/Storages/StorageURL.cpp | 6 +- dbms/src/Storages/StorageURL.h | 3 - 8 files changed, 94 insertions(+), 14 deletions(-) create mode 100644 dbms/src/IO/ReadWriteBufferFromHTTPWithHostFilter.cpp create mode 100644 dbms/src/IO/ReadWriteBufferFromHTTPWithHostFilter.h diff --git a/dbms/src/Common/RemoteHostFilter.cpp b/dbms/src/Common/RemoteHostFilter.cpp index c6e9495230f..2b0b2245c6d 100644 --- a/dbms/src/Common/RemoteHostFilter.cpp +++ b/dbms/src/Common/RemoteHostFilter.cpp @@ -14,14 +14,14 @@ namespace ErrorCodes extern const int UNACCEPTABLE_URL; } -void RemoteHostFilter::checkURL(const Poco::URI & uri) +void RemoteHostFilter::checkURL(const Poco::URI & uri) const { if (!checkString(uri.getHost()) && !checkString(uri.getHost() + ":" + toString(uri.getPort()))) throw Exception("URL \"" + uri.toString() + "\" is not allowed in config.xml", ErrorCodes::UNACCEPTABLE_URL); } -void RemoteHostFilter::checkHostAndPort(const std::string & host, const std::string & port) +void RemoteHostFilter::checkHostAndPort(const std::string & host, const std::string & port) const { if (!checkString(host) && !checkString(host + ":" + port)) @@ -44,7 +44,7 @@ void RemoteHostFilter::setValuesFromConfig(const Poco::Util::AbstractConfigurati } } -bool RemoteHostFilter::checkString(const std::string &host) +bool RemoteHostFilter::checkString(const std::string &host) const { if (!primary_hosts.empty() || !regexp_hosts.empty()) { diff --git a/dbms/src/Common/RemoteHostFilter.h b/dbms/src/Common/RemoteHostFilter.h index be9b55dc888..859b92a2985 100644 --- a/dbms/src/Common/RemoteHostFilter.h +++ b/dbms/src/Common/RemoteHostFilter.h @@ -11,16 +11,18 @@ namespace DB class RemoteHostFilter { public: - void checkURL(const Poco::URI &uri); /// If URL not allowed in config.xml throw UNACCEPTABLE_URL Exception + void checkURL(const Poco::URI &uri) const; /// If URL not allowed in config.xml throw UNACCEPTABLE_URL Exception void setValuesFromConfig(const Poco::Util::AbstractConfiguration &config); - void checkHostAndPort(const std::string & host, const std::string & port); + void checkHostAndPort(const std::string & host, const std::string & port) const; + + RemoteHostFilter() {} private: std::unordered_set primary_hosts; /// Allowed primary () URL from config.xml std::vector regexp_hosts; /// Allowed regexp () URL from config.xml - bool checkString(const std::string &host); + bool checkString(const std::string &host) const; }; } diff --git a/dbms/src/IO/ReadWriteBufferFromHTTPWithHostFilter.cpp b/dbms/src/IO/ReadWriteBufferFromHTTPWithHostFilter.cpp new file mode 100644 index 00000000000..22acdf8dd6e --- /dev/null +++ b/dbms/src/IO/ReadWriteBufferFromHTTPWithHostFilter.cpp @@ -0,0 +1 @@ +#include \ No newline at end of file diff --git a/dbms/src/IO/ReadWriteBufferFromHTTPWithHostFilter.h b/dbms/src/IO/ReadWriteBufferFromHTTPWithHostFilter.h new file mode 100644 index 00000000000..653ed0f70c0 --- /dev/null +++ b/dbms/src/IO/ReadWriteBufferFromHTTPWithHostFilter.h @@ -0,0 +1,80 @@ +#pragma once + +#include +#include + +namespace DB +{ + +namespace detail +{ + template + class ReadWriteBufferFromHTTPWithHostFilterBase : public ReadWriteBufferFromHTTPBase + { + protected: + RemoteHostFilter remote_host_filter; + public: + using HTTPHeaderEntry = std::tuple; + using HTTPHeaderEntries = std::vector; + using OutStreamCallback = std::function; + using parent = ReadWriteBufferFromHTTPBase; + + explicit ReadWriteBufferFromHTTPWithHostFilterBase(UpdatableSessionPtr session_, + Poco::URI uri_, + const std::string & method_ = {}, + OutStreamCallback out_stream_callback_ = {}, + const Poco::Net::HTTPBasicCredentials & credentials_ = {}, + size_t buffer_size_ = DBMS_DEFAULT_BUFFER_SIZE, + HTTPHeaderEntries http_header_entries_ = {}, + const RemoteHostFilter & remote_host_filter_ = {}) + : parent(session_, uri_, method_, out_stream_callback_, credentials_, buffer_size_, http_header_entries_) + , remote_host_filter {remote_host_filter_} + { + Poco::Net::HTTPResponse response; + + parent::istr = parent::call(parent::uri, response); + + while (isRedirect(response.getStatus())) + { + Poco::URI uri_redirect(response.get("Location")); + remote_host_filter.checkURL(uri_redirect); + + parent::session->updateSession(uri_redirect); + + parent::istr = parent::call(uri_redirect,response); + } + + try + { + parent::impl = std::make_unique(*parent::istr, buffer_size_); + } + catch (const Poco::Exception & e) + { + auto sess = parent::session->getSession(); + sess->attachSessionData(e.message()); + throw; + } + } + }; +} + + +class ReadWriteBufferFromHTTPWithHostFilter : public detail::ReadWriteBufferFromHTTPWithHostFilterBase> +{ + using Parent = detail::ReadWriteBufferFromHTTPWithHostFilterBase>; + +public: + explicit ReadWriteBufferFromHTTPWithHostFilter(Poco::URI uri_, + const std::string & method_ = {}, + OutStreamCallback out_stream_callback_ = {}, + const ConnectionTimeouts & timeouts = {}, + const DB::SettingUInt64 max_redirects = 0, + const RemoteHostFilter & remote_host_filter_ = {}, + const Poco::Net::HTTPBasicCredentials & credentials_ = {}, + size_t buffer_size_ = DBMS_DEFAULT_BUFFER_SIZE, + const HTTPHeaderEntries & http_header_entries_ = {}) + : Parent(std::make_shared(uri_, timeouts, max_redirects), uri_, method_, out_stream_callback_, credentials_, buffer_size_, http_header_entries_, remote_host_filter_) + { + } +}; +} diff --git a/dbms/src/Interpreters/Context.cpp b/dbms/src/Interpreters/Context.cpp index 0159b90ceed..b6f35c80252 100644 --- a/dbms/src/Interpreters/Context.cpp +++ b/dbms/src/Interpreters/Context.cpp @@ -1562,7 +1562,7 @@ void Context::setRemoteHostFilter(const Poco::Util::AbstractConfiguration & conf shared->remote_host_filter.setValuesFromConfig(config); } -RemoteHostFilter & Context::getRemoteHostFilter() const +const RemoteHostFilter & Context::getRemoteHostFilter() const { return shared->remote_host_filter; } diff --git a/dbms/src/Interpreters/Context.h b/dbms/src/Interpreters/Context.h index ebfe8a77f80..9bd1f564f6c 100644 --- a/dbms/src/Interpreters/Context.h +++ b/dbms/src/Interpreters/Context.h @@ -350,7 +350,7 @@ public: /// Storage of allowed hosts from config.xml void setRemoteHostFilter(const Poco::Util::AbstractConfiguration & config); - RemoteHostFilter & getRemoteHostFilter() const; + const RemoteHostFilter & getRemoteHostFilter() const; /// The port that the server listens for executing SQL queries. UInt16 getTCPPort() const; diff --git a/dbms/src/Storages/StorageURL.cpp b/dbms/src/Storages/StorageURL.cpp index cc409230373..3fbf8d7fb63 100644 --- a/dbms/src/Storages/StorageURL.cpp +++ b/dbms/src/Storages/StorageURL.cpp @@ -5,7 +5,7 @@ #include #include -#include +#include #include #include @@ -57,7 +57,7 @@ namespace : name(name_) { context.getRemoteHostFilter().checkURL(uri); - read_buf = std::make_unique(uri, method, callback, timeouts, context.getSettingsRef().max_http_get_redirects); + read_buf = std::make_unique(uri, method, callback, timeouts, context.getSettingsRef().max_http_get_redirects, context.getRemoteHostFilter()); reader = FormatFactory::instance().getInput(format, *read_buf, sample_block, context, max_block_size); } @@ -88,7 +88,7 @@ namespace private: String name; - std::unique_ptr read_buf; + std::unique_ptr read_buf; BlockInputStreamPtr reader; }; diff --git a/dbms/src/Storages/StorageURL.h b/dbms/src/Storages/StorageURL.h index 43964e91273..cdd78c7b60f 100644 --- a/dbms/src/Storages/StorageURL.h +++ b/dbms/src/Storages/StorageURL.h @@ -66,9 +66,6 @@ private: size_t max_block_size) const; virtual Block getHeaderBlock(const Names & column_names) const = 0; - - /// Return true if host allowed - bool checkHost(const std::string & host); }; From 5d195ad2338aee82d9b8fd5c4407e5e05980ccc3 Mon Sep 17 00:00:00 2001 From: Mikhail Korotov <55493615+millb@users.noreply.github.com> Date: Mon, 21 Oct 2019 19:24:39 +0300 Subject: [PATCH 24/62] Update ReadWriteBufferFromHTTPWithHostFilter.cpp --- dbms/src/IO/ReadWriteBufferFromHTTPWithHostFilter.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dbms/src/IO/ReadWriteBufferFromHTTPWithHostFilter.cpp b/dbms/src/IO/ReadWriteBufferFromHTTPWithHostFilter.cpp index 22acdf8dd6e..816e3d27887 100644 --- a/dbms/src/IO/ReadWriteBufferFromHTTPWithHostFilter.cpp +++ b/dbms/src/IO/ReadWriteBufferFromHTTPWithHostFilter.cpp @@ -1 +1 @@ -#include \ No newline at end of file +#include From eca81dc32e30f06e83110100b163dc6b438ba528 Mon Sep 17 00:00:00 2001 From: millb Date: Wed, 23 Oct 2019 14:58:35 +0300 Subject: [PATCH 25/62] Fixed bugs --- dbms/src/IO/ReadWriteBufferFromHTTP.h | 11 ++++++++--- dbms/src/Storages/StorageURL.cpp | 16 ++++++++++++---- .../configs/config_for_redirect.xml | 5 +++++ .../test_allowed_url_from_config/test.py | 9 +++++++++ 4 files changed, 34 insertions(+), 7 deletions(-) create mode 100644 dbms/tests/integration/test_allowed_url_from_config/configs/config_for_redirect.xml diff --git a/dbms/src/IO/ReadWriteBufferFromHTTP.h b/dbms/src/IO/ReadWriteBufferFromHTTP.h index 6b408568800..7b456e03378 100644 --- a/dbms/src/IO/ReadWriteBufferFromHTTP.h +++ b/dbms/src/IO/ReadWriteBufferFromHTTP.h @@ -101,6 +101,7 @@ namespace detail const Poco::Net::HTTPBasicCredentials & credentials; std::vector cookies; HTTPHeaderEntries http_header_entries; + RemoteHostFilter remote_host_filter; std::istream * call(const Poco::URI uri_, Poco::Net::HTTPResponse & response) { @@ -157,7 +158,8 @@ namespace detail OutStreamCallback out_stream_callback_ = {}, const Poco::Net::HTTPBasicCredentials & credentials_ = {}, size_t buffer_size_ = DBMS_DEFAULT_BUFFER_SIZE, - HTTPHeaderEntries http_header_entries_ = {}) + HTTPHeaderEntries http_header_entries_ = {}, + const RemoteHostFilter & remote_host_filter_ = {}) : ReadBuffer(nullptr, 0) , uri {uri_} , method {!method_.empty() ? method_ : out_stream_callback_ ? Poco::Net::HTTPRequest::HTTP_POST : Poco::Net::HTTPRequest::HTTP_GET} @@ -165,6 +167,7 @@ namespace detail , out_stream_callback {out_stream_callback_} , credentials {credentials_} , http_header_entries {http_header_entries_} + , remote_host_filter {remote_host_filter_} { Poco::Net::HTTPResponse response; @@ -173,6 +176,7 @@ namespace detail while (isRedirect(response.getStatus())) { Poco::URI uri_redirect(response.get("Location")); + remote_host_filter.checkURL(uri_redirect); session->updateSession(uri_redirect); @@ -243,8 +247,9 @@ public: const DB::SettingUInt64 max_redirects = 0, const Poco::Net::HTTPBasicCredentials & credentials_ = {}, size_t buffer_size_ = DBMS_DEFAULT_BUFFER_SIZE, - const HTTPHeaderEntries & http_header_entries_ = {}) - : Parent(std::make_shared(uri_, timeouts, max_redirects), uri_, method_, out_stream_callback_, credentials_, buffer_size_, http_header_entries_) + const HTTPHeaderEntries & http_header_entries_ = {}, + const RemoteHostFilter & remote_host_filter_ = {}) + : Parent(std::make_shared(uri_, timeouts, max_redirects), uri_, method_, out_stream_callback_, credentials_, buffer_size_, http_header_entries_, remote_host_filter_) { } }; diff --git a/dbms/src/Storages/StorageURL.cpp b/dbms/src/Storages/StorageURL.cpp index 3fbf8d7fb63..3bf9d8c0835 100644 --- a/dbms/src/Storages/StorageURL.cpp +++ b/dbms/src/Storages/StorageURL.cpp @@ -5,7 +5,7 @@ #include #include -#include +#include #include #include @@ -56,8 +56,16 @@ namespace const ConnectionTimeouts & timeouts) : name(name_) { - context.getRemoteHostFilter().checkURL(uri); - read_buf = std::make_unique(uri, method, callback, timeouts, context.getSettingsRef().max_http_get_redirects, context.getRemoteHostFilter()); + read_buf = std::make_unique(uri, + method, + callback, + timeouts, + context.getSettingsRef().max_http_get_redirects, + Poco::Net::HTTPBasicCredentials(), + DBMS_DEFAULT_BUFFER_SIZE, + std::vector>(), + context.getRemoteHostFilter()); + reader = FormatFactory::instance().getInput(format, *read_buf, sample_block, context, max_block_size); } @@ -88,7 +96,7 @@ namespace private: String name; - std::unique_ptr read_buf; + std::unique_ptr read_buf; BlockInputStreamPtr reader; }; diff --git a/dbms/tests/integration/test_allowed_url_from_config/configs/config_for_redirect.xml b/dbms/tests/integration/test_allowed_url_from_config/configs/config_for_redirect.xml new file mode 100644 index 00000000000..eceaf7a7838 --- /dev/null +++ b/dbms/tests/integration/test_allowed_url_from_config/configs/config_for_redirect.xml @@ -0,0 +1,5 @@ + + + hdfs1:50070 + + diff --git a/dbms/tests/integration/test_allowed_url_from_config/test.py b/dbms/tests/integration/test_allowed_url_from_config/test.py index 2a3f00ebf1a..f44ee22e3bf 100644 --- a/dbms/tests/integration/test_allowed_url_from_config/test.py +++ b/dbms/tests/integration/test_allowed_url_from_config/test.py @@ -1,6 +1,7 @@ import time import pytest +from helpers.hdfs_api import HDFSApi from helpers.cluster import ClickHouseCluster cluster = ClickHouseCluster(__file__) @@ -10,6 +11,7 @@ node3 = cluster.add_instance('node3', main_configs=['configs/config_with_only_re node4 = cluster.add_instance('node4', main_configs=['configs/config_without_allowed_hosts.xml']) node5 = cluster.add_instance('node5', main_configs=['configs/config_without_block_of_allowed_hosts.xml']) node6 = cluster.add_instance('node6', main_configs=['configs/config_for_remote.xml']) +node7 = cluster.add_instance('node7', main_configs=['configs/config_for_redirect.xml'], with_hdfs=True) @pytest.fixture(scope="module") def start_cluster(): @@ -88,3 +90,10 @@ def test_table_function_remote_on_config_for_remote(start_cluster): assert "not allowed" in node6.query_and_get_error("SELECT * FROM remoteSecure('example01-01-1,example01-03-1', system, events)") assert "not allowed" in node6.query_and_get_error("SELECT * FROM remote('example01-01-{1|3}', system, events)") assert "not allowed" in node6.query_and_get_error("SELECT * FROM remoteSecure('example01-0{1,3}-1', system, metrics)") + +def test_redirect(start_cluster): + hdfs_api = HDFSApi("root") + hdfs_api.write_data("/simple_storage", "1\t\n") + assert hdfs_api.read_data("/simple_storage") == "1\t\n" + node7.query("CREATE TABLE table_test_7 (word String) ENGINE=URL('http://hdfs1:50070/webhdfs/v1/simple_storage?op=OPEN&namenoderpcaddress=hdfs1:9000&offset=0', CSV)") + assert "not allowed" in node7.query_and_get_error("SET max_http_get_redirects=1; SELECT * from table_test_7") From 717d88df45eb646bf1974223cbfe1ebbf6a011c4 Mon Sep 17 00:00:00 2001 From: Mikhail Korotov <55493615+millb@users.noreply.github.com> Date: Wed, 23 Oct 2019 15:00:04 +0300 Subject: [PATCH 26/62] Delete ReadWriteBufferFromHTTPWithHostFilter.cpp --- dbms/src/IO/ReadWriteBufferFromHTTPWithHostFilter.cpp | 1 - 1 file changed, 1 deletion(-) delete mode 100644 dbms/src/IO/ReadWriteBufferFromHTTPWithHostFilter.cpp diff --git a/dbms/src/IO/ReadWriteBufferFromHTTPWithHostFilter.cpp b/dbms/src/IO/ReadWriteBufferFromHTTPWithHostFilter.cpp deleted file mode 100644 index 816e3d27887..00000000000 --- a/dbms/src/IO/ReadWriteBufferFromHTTPWithHostFilter.cpp +++ /dev/null @@ -1 +0,0 @@ -#include From 20caa8724c4c857d047f6e494f96b5e738e11bfe Mon Sep 17 00:00:00 2001 From: Mikhail Korotov <55493615+millb@users.noreply.github.com> Date: Wed, 23 Oct 2019 15:00:43 +0300 Subject: [PATCH 27/62] Delete ReadWriteBufferFromHTTPWithHostFilter.h --- .../ReadWriteBufferFromHTTPWithHostFilter.h | 80 ------------------- 1 file changed, 80 deletions(-) delete mode 100644 dbms/src/IO/ReadWriteBufferFromHTTPWithHostFilter.h diff --git a/dbms/src/IO/ReadWriteBufferFromHTTPWithHostFilter.h b/dbms/src/IO/ReadWriteBufferFromHTTPWithHostFilter.h deleted file mode 100644 index 653ed0f70c0..00000000000 --- a/dbms/src/IO/ReadWriteBufferFromHTTPWithHostFilter.h +++ /dev/null @@ -1,80 +0,0 @@ -#pragma once - -#include -#include - -namespace DB -{ - -namespace detail -{ - template - class ReadWriteBufferFromHTTPWithHostFilterBase : public ReadWriteBufferFromHTTPBase - { - protected: - RemoteHostFilter remote_host_filter; - public: - using HTTPHeaderEntry = std::tuple; - using HTTPHeaderEntries = std::vector; - using OutStreamCallback = std::function; - using parent = ReadWriteBufferFromHTTPBase; - - explicit ReadWriteBufferFromHTTPWithHostFilterBase(UpdatableSessionPtr session_, - Poco::URI uri_, - const std::string & method_ = {}, - OutStreamCallback out_stream_callback_ = {}, - const Poco::Net::HTTPBasicCredentials & credentials_ = {}, - size_t buffer_size_ = DBMS_DEFAULT_BUFFER_SIZE, - HTTPHeaderEntries http_header_entries_ = {}, - const RemoteHostFilter & remote_host_filter_ = {}) - : parent(session_, uri_, method_, out_stream_callback_, credentials_, buffer_size_, http_header_entries_) - , remote_host_filter {remote_host_filter_} - { - Poco::Net::HTTPResponse response; - - parent::istr = parent::call(parent::uri, response); - - while (isRedirect(response.getStatus())) - { - Poco::URI uri_redirect(response.get("Location")); - remote_host_filter.checkURL(uri_redirect); - - parent::session->updateSession(uri_redirect); - - parent::istr = parent::call(uri_redirect,response); - } - - try - { - parent::impl = std::make_unique(*parent::istr, buffer_size_); - } - catch (const Poco::Exception & e) - { - auto sess = parent::session->getSession(); - sess->attachSessionData(e.message()); - throw; - } - } - }; -} - - -class ReadWriteBufferFromHTTPWithHostFilter : public detail::ReadWriteBufferFromHTTPWithHostFilterBase> -{ - using Parent = detail::ReadWriteBufferFromHTTPWithHostFilterBase>; - -public: - explicit ReadWriteBufferFromHTTPWithHostFilter(Poco::URI uri_, - const std::string & method_ = {}, - OutStreamCallback out_stream_callback_ = {}, - const ConnectionTimeouts & timeouts = {}, - const DB::SettingUInt64 max_redirects = 0, - const RemoteHostFilter & remote_host_filter_ = {}, - const Poco::Net::HTTPBasicCredentials & credentials_ = {}, - size_t buffer_size_ = DBMS_DEFAULT_BUFFER_SIZE, - const HTTPHeaderEntries & http_header_entries_ = {}) - : Parent(std::make_shared(uri_, timeouts, max_redirects), uri_, method_, out_stream_callback_, credentials_, buffer_size_, http_header_entries_, remote_host_filter_) - { - } -}; -} From c8220b7876c03594cb2f744c02ab87a4770fa0b7 Mon Sep 17 00:00:00 2001 From: Mikhail Korotov <55493615+millb@users.noreply.github.com> Date: Wed, 23 Oct 2019 15:02:19 +0300 Subject: [PATCH 28/62] Update config_with_only_primary_hosts.xml --- .../configs/config_with_only_primary_hosts.xml | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/dbms/tests/integration/test_allowed_url_from_config/configs/config_with_only_primary_hosts.xml b/dbms/tests/integration/test_allowed_url_from_config/configs/config_with_only_primary_hosts.xml index d5dd56f224d..a84d864bd0d 100644 --- a/dbms/tests/integration/test_allowed_url_from_config/configs/config_with_only_primary_hosts.xml +++ b/dbms/tests/integration/test_allowed_url_from_config/configs/config_with_only_primary_hosts.xml @@ -1,10 +1,8 @@ - host:80 host:123 yandex.ru - - + From b8763763f74c5f5e55b190669f55828d2f4b0ec4 Mon Sep 17 00:00:00 2001 From: Mikhail Korotov <55493615+millb@users.noreply.github.com> Date: Wed, 23 Oct 2019 15:02:50 +0300 Subject: [PATCH 29/62] Update config_with_only_regexp_hosts.xml --- .../configs/config_with_only_regexp_hosts.xml | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/dbms/tests/integration/test_allowed_url_from_config/configs/config_with_only_regexp_hosts.xml b/dbms/tests/integration/test_allowed_url_from_config/configs/config_with_only_regexp_hosts.xml index 3f7bea8d35c..baec87b0735 100644 --- a/dbms/tests/integration/test_allowed_url_from_config/configs/config_with_only_regexp_hosts.xml +++ b/dbms/tests/integration/test_allowed_url_from_config/configs/config_with_only_regexp_hosts.xml @@ -1,9 +1,7 @@ - - ^[a-z]*:80$ - ^[a-z]*\.ru$ - + ^[a-z]*:80$ + ^[a-z]*\.ru$ From 895392668a2fa93926ebb3c06a1479cbfd13a650 Mon Sep 17 00:00:00 2001 From: Mikhail Korotov <55493615+millb@users.noreply.github.com> Date: Wed, 23 Oct 2019 15:03:10 +0300 Subject: [PATCH 30/62] Update config_without_allowed_hosts.xml --- .../configs/config_without_allowed_hosts.xml | 1 - 1 file changed, 1 deletion(-) diff --git a/dbms/tests/integration/test_allowed_url_from_config/configs/config_without_allowed_hosts.xml b/dbms/tests/integration/test_allowed_url_from_config/configs/config_without_allowed_hosts.xml index 62d8d30d1e0..63559a2c6f6 100644 --- a/dbms/tests/integration/test_allowed_url_from_config/configs/config_without_allowed_hosts.xml +++ b/dbms/tests/integration/test_allowed_url_from_config/configs/config_without_allowed_hosts.xml @@ -1,6 +1,5 @@ - From 3e53a9e78b098d54c8ef8d299e8630b5d4e111aa Mon Sep 17 00:00:00 2001 From: Mikhail Korotov <55493615+millb@users.noreply.github.com> Date: Wed, 23 Oct 2019 15:04:17 +0300 Subject: [PATCH 31/62] Update config_with_hosts.xml --- .../configs/config_with_hosts.xml | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/dbms/tests/integration/test_allowed_url_from_config/configs/config_with_hosts.xml b/dbms/tests/integration/test_allowed_url_from_config/configs/config_with_hosts.xml index c332d7ef4de..3fcb0d62501 100644 --- a/dbms/tests/integration/test_allowed_url_from_config/configs/config_with_hosts.xml +++ b/dbms/tests/integration/test_allowed_url_from_config/configs/config_with_hosts.xml @@ -1,10 +1,7 @@ - - host:80 - - ^[a-z]*\.ru$ - + host:80 + ^[a-z]*\.ru$ From bdd505acbdd487243741bce5b6ba7872985a6dbb Mon Sep 17 00:00:00 2001 From: Mikhail Korotov <55493615+millb@users.noreply.github.com> Date: Wed, 23 Oct 2019 15:05:00 +0300 Subject: [PATCH 32/62] Update config_with_hosts.xml --- .../configs/config_with_hosts.xml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/dbms/tests/integration/test_allowed_url_from_config/configs/config_with_hosts.xml b/dbms/tests/integration/test_allowed_url_from_config/configs/config_with_hosts.xml index 3fcb0d62501..b35fa733ae6 100644 --- a/dbms/tests/integration/test_allowed_url_from_config/configs/config_with_hosts.xml +++ b/dbms/tests/integration/test_allowed_url_from_config/configs/config_with_hosts.xml @@ -1,7 +1,7 @@ - - host:80 - ^[a-z]*\.ru$ + + host:80 + ^[a-z]*\.ru$ From 868407d83f91b600f248d382da2632bae48855a0 Mon Sep 17 00:00:00 2001 From: Mikhail Korotov <55493615+millb@users.noreply.github.com> Date: Wed, 23 Oct 2019 15:05:32 +0300 Subject: [PATCH 33/62] Update config_with_only_regexp_hosts.xml --- .../configs/config_with_only_regexp_hosts.xml | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/dbms/tests/integration/test_allowed_url_from_config/configs/config_with_only_regexp_hosts.xml b/dbms/tests/integration/test_allowed_url_from_config/configs/config_with_only_regexp_hosts.xml index baec87b0735..b807672c2c1 100644 --- a/dbms/tests/integration/test_allowed_url_from_config/configs/config_with_only_regexp_hosts.xml +++ b/dbms/tests/integration/test_allowed_url_from_config/configs/config_with_only_regexp_hosts.xml @@ -1,7 +1,7 @@ - - ^[a-z]*:80$ - ^[a-z]*\.ru$ - + + ^[a-z]*:80$ + ^[a-z]*\.ru$ + From 09847b21f2d003d2ee03ca27ff39e0ce6cdc9b5e Mon Sep 17 00:00:00 2001 From: Mikhail Korotov <55493615+millb@users.noreply.github.com> Date: Wed, 23 Oct 2019 15:05:53 +0300 Subject: [PATCH 34/62] Update config_without_allowed_hosts.xml --- .../configs/config_without_allowed_hosts.xml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/dbms/tests/integration/test_allowed_url_from_config/configs/config_without_allowed_hosts.xml b/dbms/tests/integration/test_allowed_url_from_config/configs/config_without_allowed_hosts.xml index 63559a2c6f6..1855c56fc03 100644 --- a/dbms/tests/integration/test_allowed_url_from_config/configs/config_without_allowed_hosts.xml +++ b/dbms/tests/integration/test_allowed_url_from_config/configs/config_without_allowed_hosts.xml @@ -1,5 +1,5 @@ - - + + From 57f6780136f7836f454fdd8566c9c7fd65c368df Mon Sep 17 00:00:00 2001 From: Mikhail Korotov <55493615+millb@users.noreply.github.com> Date: Tue, 29 Oct 2019 14:39:57 +0300 Subject: [PATCH 35/62] Update ErrorCodes.cpp --- dbms/src/Common/ErrorCodes.cpp | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/dbms/src/Common/ErrorCodes.cpp b/dbms/src/Common/ErrorCodes.cpp index eabeb894cd9..8ced7b75516 100644 --- a/dbms/src/Common/ErrorCodes.cpp +++ b/dbms/src/Common/ErrorCodes.cpp @@ -459,7 +459,12 @@ namespace ErrorCodes extern const int DICTIONARY_ACCESS_DENIED = 482; extern const int TOO_MANY_REDIRECTS = 483; extern const int INTERNAL_REDIS_ERROR = 484; - extern const int UNACCEPTABLE_URL = 485; + extern const int SCALAR_ALREADY_EXISTS = 485; + extern const int UNKNOWN_SCALAR = 486; + extern const int CANNOT_GET_CREATE_DICTIONARY_QUERY = 487; + extern const int UNKNOWN_DICTIONARY = 488; + extern const int INCORRECT_DICTIONARY_DEFINITION = 489; + extern const int UNACCEPTABLE_URL = 490; extern const int KEEPER_EXCEPTION = 999; extern const int POCO_EXCEPTION = 1000; From c154675a1766255bd2509cc0d6b3ab47609065a3 Mon Sep 17 00:00:00 2001 From: millb Date: Tue, 29 Oct 2019 13:30:47 +0300 Subject: [PATCH 36/62] fixed_merge_conflict:wq --- dbms/src/Common/ErrorCodes.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/dbms/src/Common/ErrorCodes.cpp b/dbms/src/Common/ErrorCodes.cpp index 8ced7b75516..6db3816e670 100644 --- a/dbms/src/Common/ErrorCodes.cpp +++ b/dbms/src/Common/ErrorCodes.cpp @@ -466,6 +466,7 @@ namespace ErrorCodes extern const int INCORRECT_DICTIONARY_DEFINITION = 489; extern const int UNACCEPTABLE_URL = 490; + extern const int KEEPER_EXCEPTION = 999; extern const int POCO_EXCEPTION = 1000; extern const int STD_EXCEPTION = 1001; From a03f8145bb8c52c5c8747d790c543c4ade519da2 Mon Sep 17 00:00:00 2001 From: millb Date: Tue, 5 Nov 2019 15:40:49 +0300 Subject: [PATCH 37/62] Created check for HDFS and S3 storages : --- dbms/src/Common/RemoteHostFilter.cpp | 14 +++++++------- dbms/src/Common/RemoteHostFilter.h | 14 ++++++++------ dbms/src/IO/ReadBufferFromS3.cpp | 5 ++++- dbms/src/IO/ReadBufferFromS3.h | 5 ++++- dbms/src/IO/WriteBufferFromS3.cpp | 5 ++++- dbms/src/IO/WriteBufferFromS3.h | 4 +++- dbms/src/Storages/StorageHDFS.cpp | 2 +- dbms/src/Storages/StorageS3.cpp | 4 ++-- 8 files changed, 33 insertions(+), 20 deletions(-) diff --git a/dbms/src/Common/RemoteHostFilter.cpp b/dbms/src/Common/RemoteHostFilter.cpp index 2b0b2245c6d..16aaac35dbe 100644 --- a/dbms/src/Common/RemoteHostFilter.cpp +++ b/dbms/src/Common/RemoteHostFilter.cpp @@ -16,15 +16,15 @@ namespace ErrorCodes void RemoteHostFilter::checkURL(const Poco::URI & uri) const { - if (!checkString(uri.getHost()) && - !checkString(uri.getHost() + ":" + toString(uri.getPort()))) + if (!checkForDirectEntry(uri.getHost()) && + !checkForDirectEntry(uri.getHost() + ":" + toString(uri.getPort()))) throw Exception("URL \"" + uri.toString() + "\" is not allowed in config.xml", ErrorCodes::UNACCEPTABLE_URL); } void RemoteHostFilter::checkHostAndPort(const std::string & host, const std::string & port) const { - if (!checkString(host) && - !checkString(host + ":" + port)) + if (!checkForDirectEntry(host) && + !checkForDirectEntry(host + ":" + port)) throw Exception("URL \"" + host + ":" + port + "\" is not allowed in config.xml", ErrorCodes::UNACCEPTABLE_URL); } @@ -44,14 +44,14 @@ void RemoteHostFilter::setValuesFromConfig(const Poco::Util::AbstractConfigurati } } -bool RemoteHostFilter::checkString(const std::string &host) const +bool RemoteHostFilter::checkForDirectEntry(const std::string & str) const { if (!primary_hosts.empty() || !regexp_hosts.empty()) { - if (primary_hosts.find(host) == primary_hosts.end()) + if (primary_hosts.find(str) == primary_hosts.end()) { for (size_t i = 0; i < regexp_hosts.size(); ++i) - if (re2::RE2::FullMatch(host, regexp_hosts[i])) + if (re2::RE2::FullMatch(str, regexp_hosts[i])) return true; return false; } diff --git a/dbms/src/Common/RemoteHostFilter.h b/dbms/src/Common/RemoteHostFilter.h index 859b92a2985..86743891051 100644 --- a/dbms/src/Common/RemoteHostFilter.h +++ b/dbms/src/Common/RemoteHostFilter.h @@ -10,19 +10,21 @@ namespace DB { class RemoteHostFilter { +/** + * This class checks if url is allowed. + * If primary_hosts and regexp_hosts are empty all urls are allowed. + */ public: - void checkURL(const Poco::URI &uri) const; /// If URL not allowed in config.xml throw UNACCEPTABLE_URL Exception + void checkURL(const Poco::URI & uri) const; /// If URL not allowed in config.xml throw UNACCEPTABLE_URL Exception - void setValuesFromConfig(const Poco::Util::AbstractConfiguration &config); + void setValuesFromConfig(const Poco::Util::AbstractConfiguration & config); - void checkHostAndPort(const std::string & host, const std::string & port) const; - - RemoteHostFilter() {} + void checkHostAndPort(const std::string & host, const std::string & port) const; /// Does the same as checkURL, but for host and port. private: std::unordered_set primary_hosts; /// Allowed primary () URL from config.xml std::vector regexp_hosts; /// Allowed regexp () URL from config.xml - bool checkString(const std::string &host) const; + bool checkForDirectEntry(const std::string & str) const; /// Checks if the primary_hosts and regexp_hosts contain str. If primary_hosts and regexp_hosts are empty return true. }; } diff --git a/dbms/src/IO/ReadBufferFromS3.cpp b/dbms/src/IO/ReadBufferFromS3.cpp index ae09f0fb189..fef458bf70d 100644 --- a/dbms/src/IO/ReadBufferFromS3.cpp +++ b/dbms/src/IO/ReadBufferFromS3.cpp @@ -13,11 +13,13 @@ const int DEFAULT_S3_MAX_FOLLOW_GET_REDIRECT = 2; ReadBufferFromS3::ReadBufferFromS3(Poco::URI uri_, const ConnectionTimeouts & timeouts, const Poco::Net::HTTPBasicCredentials & credentials, - size_t buffer_size_) + size_t buffer_size_, + const RemoteHostFilter & remote_host_filter_) : ReadBuffer(nullptr, 0) , uri {uri_} , method {Poco::Net::HTTPRequest::HTTP_GET} , session {makeHTTPSession(uri_, timeouts)} + , remote_host_filter {remote_host_filter_} { Poco::Net::HTTPResponse response; std::unique_ptr request; @@ -50,6 +52,7 @@ ReadBufferFromS3::ReadBufferFromS3(Poco::URI uri_, break; uri = location_iterator->second; + remote_host_filter.checkURL(uri); session = makeHTTPSession(uri, timeouts); } diff --git a/dbms/src/IO/ReadBufferFromS3.h b/dbms/src/IO/ReadBufferFromS3.h index ffc0c5c0ab1..48d073e743e 100644 --- a/dbms/src/IO/ReadBufferFromS3.h +++ b/dbms/src/IO/ReadBufferFromS3.h @@ -23,11 +23,14 @@ protected: std::istream * istr; /// owned by session std::unique_ptr impl; + RemoteHostFilter remote_host_filter; + public: explicit ReadBufferFromS3(Poco::URI uri_, const ConnectionTimeouts & timeouts = {}, const Poco::Net::HTTPBasicCredentials & credentials = {}, - size_t buffer_size_ = DBMS_DEFAULT_BUFFER_SIZE); + size_t buffer_size_ = DBMS_DEFAULT_BUFFER_SIZE, + const RemoteHostFilter & remote_host_filter_ = {}); bool nextImpl() override; }; diff --git a/dbms/src/IO/WriteBufferFromS3.cpp b/dbms/src/IO/WriteBufferFromS3.cpp index 9604b6ce199..e18270946c5 100644 --- a/dbms/src/IO/WriteBufferFromS3.cpp +++ b/dbms/src/IO/WriteBufferFromS3.cpp @@ -32,7 +32,8 @@ WriteBufferFromS3::WriteBufferFromS3( const Poco::URI & uri_, size_t minimum_upload_part_size_, const ConnectionTimeouts & timeouts_, - const Poco::Net::HTTPBasicCredentials & credentials, size_t buffer_size_ + const Poco::Net::HTTPBasicCredentials & credentials, size_t buffer_size_, + const RemoteHostFilter & remote_host_filter_ ) : BufferWithOwnMemory(buffer_size_, nullptr, 0) , uri {uri_} @@ -41,6 +42,7 @@ WriteBufferFromS3::WriteBufferFromS3( , auth_request {Poco::Net::HTTPRequest::HTTP_PUT, uri.getPathAndQuery(), Poco::Net::HTTPRequest::HTTP_1_1} , temporary_buffer {std::make_unique(buffer_string)} , last_part_size {0} + , remote_host_filter(remote_host_filter_) { if (!credentials.getUsername().empty()) credentials.authenticate(auth_request); @@ -137,6 +139,7 @@ void WriteBufferFromS3::initiate() break; initiate_uri = location_iterator->second; + remote_host_filter.checkURL(initiate_uri); } assertResponseIsOk(*request_ptr, response, *istr); diff --git a/dbms/src/IO/WriteBufferFromS3.h b/dbms/src/IO/WriteBufferFromS3.h index 9afda1d14e2..e563bb652ed 100644 --- a/dbms/src/IO/WriteBufferFromS3.h +++ b/dbms/src/IO/WriteBufferFromS3.h @@ -27,6 +27,7 @@ private: String buffer_string; std::unique_ptr temporary_buffer; size_t last_part_size; + RemoteHostFilter remote_host_filter; /// Upload in S3 is made in parts. /// We initiate upload, then upload each part and get ETag as a response, and then finish upload with listing all our parts. @@ -38,7 +39,8 @@ public: size_t minimum_upload_part_size_, const ConnectionTimeouts & timeouts = {}, const Poco::Net::HTTPBasicCredentials & credentials = {}, - size_t buffer_size_ = DBMS_DEFAULT_BUFFER_SIZE); + size_t buffer_size_ = DBMS_DEFAULT_BUFFER_SIZE, + const RemoteHostFilter & remote_host_filter_ = {}); void nextImpl() override; diff --git a/dbms/src/Storages/StorageHDFS.cpp b/dbms/src/Storages/StorageHDFS.cpp index 27e5aa67398..546772d8b52 100644 --- a/dbms/src/Storages/StorageHDFS.cpp +++ b/dbms/src/Storages/StorageHDFS.cpp @@ -43,7 +43,7 @@ StorageHDFS::StorageHDFS(const String & uri_, , database_name(database_name_) , context(context_) { - context.getRemoteHostFilter().checkURL(); + context.getRemoteHostFilter().checkURL(Poco::URI(uri)); setColumns(columns_); setConstraints(constraints_); } diff --git a/dbms/src/Storages/StorageS3.cpp b/dbms/src/Storages/StorageS3.cpp index 83caaf893e2..1a1274ef9d1 100644 --- a/dbms/src/Storages/StorageS3.cpp +++ b/dbms/src/Storages/StorageS3.cpp @@ -38,7 +38,7 @@ namespace const ConnectionTimeouts & timeouts) : name(name_) { - read_buf = std::make_unique(uri, timeouts); + read_buf = std::make_unique(uri, timeouts, Poco::Net::HTTPBasicCredentials(), DBMS_DEFAULT_BUFFER_SIZE, context.getRemoteHostFilter()); reader = FormatFactory::instance().getInput(format, *read_buf, sample_block, context, max_block_size); } @@ -85,7 +85,7 @@ namespace const ConnectionTimeouts & timeouts) : sample_block(sample_block_) { - write_buf = std::make_unique(uri, min_upload_part_size, timeouts); + write_buf = std::make_unique(uri, min_upload_part_size, timeouts, Poco::Net::HTTPBasicCredentials(), DBMS_DEFAULT_BUFFER_SIZE, context.getRemoteHostFilter()); writer = FormatFactory::instance().getOutput(format, *write_buf, sample_block, context); } From 82d09a8df0727d9eeb2da73067164c77c7433cda Mon Sep 17 00:00:00 2001 From: Mikhail Korotov Date: Tue, 5 Nov 2019 15:45:43 +0300 Subject: [PATCH 38/62] working commit --- dbms/programs/server/config.xml | 1 - 1 file changed, 1 deletion(-) diff --git a/dbms/programs/server/config.xml b/dbms/programs/server/config.xml index 6e9bb527c97..6ce79dafe31 100644 --- a/dbms/programs/server/config.xml +++ b/dbms/programs/server/config.xml @@ -15,7 +15,6 @@ 8123 9000 - diff --git a/docs/en/query_language/functions/introspection.md b/docs/en/query_language/functions/introspection.md new file mode 100644 index 00000000000..4a7b26c2b8d --- /dev/null +++ b/docs/en/query_language/functions/introspection.md @@ -0,0 +1,70 @@ +# Introspection Functions + +You can use functions described in this chapter to introspect ELF and DWARF for query profiling. These functions are slow and may impose security considerations. + +For proper operation of introspection functions: + +- Install the `clickhouse-common-static-dbg` package. +- Set the [allow_introspection_functions](../../operations/settings/settings.md#settings-allow_introspection_functions) to 1. + + +## addressToLine {#addresstoline} + +Shows the filename and the number of a string in ClickHouse sources, using the debug information installed by the `clickhouse-common-static-dbg` package. + +**Syntax** + +```sql +addressToLine(address_of_binary_instruction) +``` + +**Parameters** + +- `address_of_binary_instruction` ([UInt64](../../data_types/int_uint.md)) — Address of code instruction that have been executed at the moment of sampling. + +**Returned value** + +- Source code filename and the number of string in this file. + +Type: [String](../../data_types/string.md). + +**Example** + +Enabling introspection functions: + +```sql +SET allow_introspection_functions=1 +``` + +Selecting the first string from the `trace_log` system table: + +```sql +SELECT + *, + arrayStringConcat(arrayMap(x -> addressToLine(x), trace), '\n') AS trace_source_code_lines +FROM system.trace_log +LIMIT 1 +\G +``` + +The [arrayMap](higher_order_functions.md#higher_order_functions-array-map) function allows to process each individual element of the `trace` array by the `addressToLine` function. The result of this processing you see in the `trace_source_code_lines` column of output. + +```text +Row 1: +────── +event_date: 2019-11-19 +event_time: 2019-11-19 18:57:23 +revision: 54429 +timer_type: Real +thread_number: 48 +query_id: 421b6855-1858-45a5-8f37-f383409d6d72 +trace: [140658411141617,94784174532828,94784076370703,94784076372094,94784076361020,94784175007680,140658411116251,140658403895439] +trace_source_code_lines: /lib/x86_64-linux-gnu/libpthread-2.27.so +/usr/lib/debug/usr/bin/clickhouse +/build/obj-x86_64-linux-gnu/../dbms/src/Common/ThreadPool.cpp:199 +/build/obj-x86_64-linux-gnu/../dbms/src/Common/ThreadPool.h:155 +/usr/include/c++/9/bits/atomic_base.h:551 +/usr/lib/debug/usr/bin/clickhouse +/lib/x86_64-linux-gnu/libpthread-2.27.so +/build/glibc-OTsEL5/glibc-2.27/misc/../sysdeps/unix/sysv/linux/x86_64/clone.S:97 +``` diff --git a/docs/toc_en.yml b/docs/toc_en.yml index 356a256e2d0..3435782a329 100644 --- a/docs/toc_en.yml +++ b/docs/toc_en.yml @@ -119,6 +119,7 @@ nav: - 'Working with geographical coordinates': 'query_language/functions/geo.md' - 'Working with Nullable arguments': 'query_language/functions/functions_for_nulls.md' - 'Machine Learning Functions': 'query_language/functions/machine_learning_functions.md' + - 'Introspection': 'query_language/functions/introspection.md' - 'Other': 'query_language/functions/other_functions.md' - 'Aggregate Functions': - 'Introduction': 'query_language/agg_functions/index.md' From d45898a75974022f73679f41100898af3d2671b1 Mon Sep 17 00:00:00 2001 From: BayoNet Date: Thu, 21 Nov 2019 08:58:57 +0300 Subject: [PATCH 54/62] CLICKHOUSEDOCS-121: Introspection functions description. --- .../query_language/functions/introspection.md | 160 +++++++++++++++++- 1 file changed, 157 insertions(+), 3 deletions(-) diff --git a/docs/en/query_language/functions/introspection.md b/docs/en/query_language/functions/introspection.md index 4a7b26c2b8d..31c08cf5eb0 100644 --- a/docs/en/query_language/functions/introspection.md +++ b/docs/en/query_language/functions/introspection.md @@ -1,16 +1,22 @@ # Introspection Functions -You can use functions described in this chapter to introspect ELF and DWARF for query profiling. These functions are slow and may impose security considerations. +You can use functions described in this chapter to introspect ELF and DWARF for query profiling. + +!!! warning "Warning" + These functions are slow and may impose security considerations. For proper operation of introspection functions: - Install the `clickhouse-common-static-dbg` package. -- Set the [allow_introspection_functions](../../operations/settings/settings.md#settings-allow_introspection_functions) to 1. +- Set the [allow_introspection_functions](../../operations/settings/settings.md#settings-allow_introspection_functions) setting to 1. + For security reasons introspection functions are disabled by default. + +ClickHouse saves profiler reports to the [trace_log](../../operations/system_tables.md#system_tables-trace_log) system table. Make sure the table and profiler are configured properly. ## addressToLine {#addresstoline} -Shows the filename and the number of a string in ClickHouse sources, using the debug information installed by the `clickhouse-common-static-dbg` package. +Converts virtual memory address inside ClickHouse server process to the filename and the number of a string in ClickHouse source code, using the debug information installed by the `clickhouse-common-static-dbg` package. **Syntax** @@ -68,3 +74,151 @@ trace_source_code_lines: /lib/x86_64-linux-gnu/libpthread-2.27.so /lib/x86_64-linux-gnu/libpthread-2.27.so /build/glibc-OTsEL5/glibc-2.27/misc/../sysdeps/unix/sysv/linux/x86_64/clone.S:97 ``` + +## addressToSymbol {#addresstosymbol} + +Converts virtual memory address inside ClickHouse server process to the symbol from ClickHouse object files. + + +**Syntax** + +```sql +addressToSymbol(address_of_binary_instruction) +``` + +**Parameters** + +- `address_of_binary_instruction` ([UInt64](../../data_types/int_uint.md)) — Address of code instruction that have been executed at the moment of sampling. + +**Returned value** + +- Symbol from ClickHouse object files. + +Type: [String](../../data_types/string.md). + +**Example** + +Enabling introspection functions: + +```sql +SET allow_introspection_functions=1 +``` + +Selecting the first string from the `trace_log` system table: + +```sql +SELECT + *, + arrayStringConcat(arrayMap(x -> addressToSymbol(x), trace), '\n') AS trace_symbols +FROM system.trace_log +LIMIT 1 +\G +``` + +The [arrayMap](higher_order_functions.md#higher_order_functions-array-map) function allows to process each individual element of the `trace` array by the `addressToSymbols` function. The result of this processing you see in the `trace_symbols` column of output. + + +```text +Row 1: +────── +event_date: 2019-11-20 +event_time: 2019-11-20 16:57:59 +revision: 54429 +timer_type: Real +thread_number: 48 +query_id: 724028bf-f550-45aa-910d-2af6212b94ac +trace: [94138803686098,94138815010911,94138815096522,94138815101224,94138815102091,94138814222988,94138806823642,94138814457211,94138806823642,94138814457211,94138806823642,94138806795179,94138806796144,94138753770094,94138753771646,94138753760572,94138852407232,140399185266395,140399178045583] +trace_symbols: _ZNK2DB24IAggregateFunctionHelperINS_20AggregateFunctionSumImmNS_24AggregateFunctionSumDataImEEEEE19addBatchSinglePlaceEmPcPPKNS_7IColumnEPNS_5ArenaE +_ZNK2DB10Aggregator21executeWithoutKeyImplERPcmPNS0_28AggregateFunctionInstructionEPNS_5ArenaE +_ZN2DB10Aggregator14executeOnBlockESt6vectorIN3COWINS_7IColumnEE13immutable_ptrIS3_EESaIS6_EEmRNS_22AggregatedDataVariantsERS1_IPKS3_SaISC_EERS1_ISE_SaISE_EERb +_ZN2DB10Aggregator14executeOnBlockERKNS_5BlockERNS_22AggregatedDataVariantsERSt6vectorIPKNS_7IColumnESaIS9_EERS6_ISB_SaISB_EERb +_ZN2DB10Aggregator7executeERKSt10shared_ptrINS_17IBlockInputStreamEERNS_22AggregatedDataVariantsE +_ZN2DB27AggregatingBlockInputStream8readImplEv +_ZN2DB17IBlockInputStream4readEv +_ZN2DB26ExpressionBlockInputStream8readImplEv +_ZN2DB17IBlockInputStream4readEv +_ZN2DB26ExpressionBlockInputStream8readImplEv +_ZN2DB17IBlockInputStream4readEv +_ZN2DB28AsynchronousBlockInputStream9calculateEv +_ZNSt17_Function_handlerIFvvEZN2DB28AsynchronousBlockInputStream4nextEvEUlvE_E9_M_invokeERKSt9_Any_data +_ZN14ThreadPoolImplI20ThreadFromGlobalPoolE6workerESt14_List_iteratorIS0_E +_ZZN20ThreadFromGlobalPoolC4IZN14ThreadPoolImplIS_E12scheduleImplIvEET_St8functionIFvvEEiSt8optionalImEEUlvE1_JEEEOS4_DpOT0_ENKUlvE_clEv +_ZN14ThreadPoolImplISt6threadE6workerESt14_List_iteratorIS0_E +execute_native_thread_routine +start_thread +clone +``` + +## demangle {#demangle} + +Converts a symbol that you can get by the [addressToSymbol](#addresstosymbol) function to the C++ function name. + + +**Syntax** + +```sql +demangle(symbol) +``` + +**Parameters** + +- `symbol` ([String](../../data_types/string.md)) — Symbol from an object file. + +**Returned value** + +- Name of the C++ function. + +Type: [String](../../data_types/string.md). + +**Example** + +Enabling introspection functions: + +```sql +SET allow_introspection_functions=1 +``` + +Selecting the first string from the `trace_log` system table: + +```sql +SELECT + *, + arrayStringConcat(arrayMap(x -> demangle(addressToSymbol(x)), trace), '\n') AS trace_functions +FROM system.trace_log +LIMIT 1 +\G +``` + +The [arrayMap](higher_order_functions.md#higher_order_functions-array-map) function allows to process each individual element of the `trace` array by the `demangle` function. The result of this processing you see in the `trace_functions` column of output. + + +```text +Row 1: +────── +event_date: 2019-11-20 +event_time: 2019-11-20 16:57:59 +revision: 54429 +timer_type: Real +thread_number: 48 +query_id: 724028bf-f550-45aa-910d-2af6212b94ac +trace: [94138803686098,94138815010911,94138815096522,94138815101224,94138815102091,94138814222988,94138806823642,94138814457211,94138806823642,94138814457211,94138806823642,94138806795179,94138806796144,94138753770094,94138753771646,94138753760572,94138852407232,140399185266395,140399178045583] +trace_functions: DB::IAggregateFunctionHelper > >::addBatchSinglePlace(unsigned long, char*, DB::IColumn const**, DB::Arena*) const +DB::Aggregator::executeWithoutKeyImpl(char*&, unsigned long, DB::Aggregator::AggregateFunctionInstruction*, DB::Arena*) const +DB::Aggregator::executeOnBlock(std::vector::immutable_ptr, std::allocator::immutable_ptr > >, unsigned long, DB::AggregatedDataVariants&, std::vector >&, std::vector >, std::allocator > > >&, bool&) +DB::Aggregator::executeOnBlock(DB::Block const&, DB::AggregatedDataVariants&, std::vector >&, std::vector >, std::allocator > > >&, bool&) +DB::Aggregator::execute(std::shared_ptr const&, DB::AggregatedDataVariants&) +DB::AggregatingBlockInputStream::readImpl() +DB::IBlockInputStream::read() +DB::ExpressionBlockInputStream::readImpl() +DB::IBlockInputStream::read() +DB::ExpressionBlockInputStream::readImpl() +DB::IBlockInputStream::read() +DB::AsynchronousBlockInputStream::calculate() +std::_Function_handler::_M_invoke(std::_Any_data const&) +ThreadPoolImpl::worker(std::_List_iterator) +ThreadFromGlobalPool::ThreadFromGlobalPool::scheduleImpl(std::function, int, std::optional)::{lambda()#3}>(ThreadPoolImpl::scheduleImpl(std::function, int, std::optional)::{lambda()#3}&&)::{lambda()#1}::operator()() const +ThreadPoolImpl::worker(std::_List_iterator) +execute_native_thread_routine +start_thread +clone +``` From 4205fe8c9b8905a3e303df140691805e01a40809 Mon Sep 17 00:00:00 2001 From: BayoNet Date: Tue, 3 Dec 2019 18:00:45 +0300 Subject: [PATCH 55/62] Update docs/en/query_language/functions/introspection.md Co-Authored-By: Ivan Blinkov --- docs/en/query_language/functions/introspection.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/en/query_language/functions/introspection.md b/docs/en/query_language/functions/introspection.md index 31c08cf5eb0..694685649cb 100644 --- a/docs/en/query_language/functions/introspection.md +++ b/docs/en/query_language/functions/introspection.md @@ -151,7 +151,7 @@ clone ## demangle {#demangle} -Converts a symbol that you can get by the [addressToSymbol](#addresstosymbol) function to the C++ function name. +Converts a symbol that you can get using the [addressToSymbol](#addresstosymbol) function to the C++ function name. **Syntax** From 22399ad7bec6d090956542d49c852ef084d1e825 Mon Sep 17 00:00:00 2001 From: Sergei Shtykov Date: Tue, 3 Dec 2019 19:52:28 +0300 Subject: [PATCH 56/62] CLICKHOUSEDOCS-121: Update by the comments. --- docs/en/operations/settings/settings.md | 3 - .../query_language/functions/introspection.md | 134 ++++++++++++++---- 2 files changed, 104 insertions(+), 33 deletions(-) diff --git a/docs/en/operations/settings/settings.md b/docs/en/operations/settings/settings.md index 107813f2f63..e9b0b7972cc 100644 --- a/docs/en/operations/settings/settings.md +++ b/docs/en/operations/settings/settings.md @@ -1011,7 +1011,6 @@ Lower values mean higher priority. Threads with low `nice` priority values are e Default value: 0. -<<<<<<< HEAD ## allow_introspection_functions {#settings-allow_introspection_functions} Enables of disables [introspections functions](../../query_language/functions/introspection.md) for query profiling. @@ -1023,7 +1022,6 @@ Possible values: Default value: 0. -======= ## input_format_parallel_parsing - Type: bool @@ -1037,6 +1035,5 @@ Enable order-preserving parallel parsing of data formats. Supported only for TSV - Default value: 1 MiB The minimum chunk size in bytes, which each thread will parse in parallel. ->>>>>>> da754943d87c78168b9c2f5510b727def69b600c [Original article](https://clickhouse.yandex/docs/en/operations/settings/settings/) diff --git a/docs/en/query_language/functions/introspection.md b/docs/en/query_language/functions/introspection.md index 694685649cb..520c89feaeb 100644 --- a/docs/en/query_language/functions/introspection.md +++ b/docs/en/query_language/functions/introspection.md @@ -1,6 +1,6 @@ # Introspection Functions -You can use functions described in this chapter to introspect ELF and DWARF for query profiling. +You can use functions described in this chapter to introspect [ELF](https://en.wikipedia.org/wiki/Executable_and_Linkable_Format) and [DWARF](https://en.wikipedia.org/wiki/DWARF) for query profiling. !!! warning "Warning" These functions are slow and may impose security considerations. @@ -16,7 +16,9 @@ ClickHouse saves profiler reports to the [trace_log](../../operations/system_tab ## addressToLine {#addresstoline} -Converts virtual memory address inside ClickHouse server process to the filename and the number of a string in ClickHouse source code, using the debug information installed by the `clickhouse-common-static-dbg` package. +Converts virtual memory address inside ClickHouse server process to the filename and the line number in ClickHouse source code. + +If you use official ClickHouse packages, you need to install the `clickhouse-common-static-dbg` package. **Syntax** @@ -26,11 +28,16 @@ addressToLine(address_of_binary_instruction) **Parameters** -- `address_of_binary_instruction` ([UInt64](../../data_types/int_uint.md)) — Address of code instruction that have been executed at the moment of sampling. +- `address_of_binary_instruction` ([UInt64](../../data_types/int_uint.md)) — Address of instruction in a running process. **Returned value** -- Source code filename and the number of string in this file. +- Source code filename and the line number in this file delimited by colon. + + For example, `/build/obj-x86_64-linux-gnu/../dbms/src/Common/ThreadPool.cpp:199`, where `199` is a line number. + +- Name of a binary, if the function couldn't find the debug information. +- Empty string, if the address is not valid. Type: [String](../../data_types/string.md). @@ -44,9 +51,38 @@ SET allow_introspection_functions=1 Selecting the first string from the `trace_log` system table: +```sql +SELECT * FROM system.trace_log LIMIT 1 \G +``` +```text +Row 1: +────── +event_date: 2019-11-19 +event_time: 2019-11-19 18:57:23 +revision: 54429 +timer_type: Real +thread_number: 48 +query_id: 421b6855-1858-45a5-8f37-f383409d6d72 +trace: [140658411141617,94784174532828,94784076370703,94784076372094,94784076361020,94784175007680,140658411116251,140658403895439] +``` + +The `trace` field contains the stack trace at the moment of sampling. + +Getting the source code filename and the line number for a single address: + +```sql +SELECT addressToLine(94784076370703) \G +``` +```text +Row 1: +────── +addressToLine(94784076370703): /build/obj-x86_64-linux-gnu/../dbms/src/Common/ThreadPool.cpp:199 +``` + +Applying the function to the whole stack trace: + ```sql SELECT - *, arrayStringConcat(arrayMap(x -> addressToLine(x), trace), '\n') AS trace_source_code_lines FROM system.trace_log LIMIT 1 @@ -58,13 +94,6 @@ The [arrayMap](higher_order_functions.md#higher_order_functions-array-map) funct ```text Row 1: ────── -event_date: 2019-11-19 -event_time: 2019-11-19 18:57:23 -revision: 54429 -timer_type: Real -thread_number: 48 -query_id: 421b6855-1858-45a5-8f37-f383409d6d72 -trace: [140658411141617,94784174532828,94784076370703,94784076372094,94784076361020,94784175007680,140658411116251,140658403895439] trace_source_code_lines: /lib/x86_64-linux-gnu/libpthread-2.27.so /usr/lib/debug/usr/bin/clickhouse /build/obj-x86_64-linux-gnu/../dbms/src/Common/ThreadPool.cpp:199 @@ -88,11 +117,12 @@ addressToSymbol(address_of_binary_instruction) **Parameters** -- `address_of_binary_instruction` ([UInt64](../../data_types/int_uint.md)) — Address of code instruction that have been executed at the moment of sampling. +- `address_of_binary_instruction` ([UInt64](../../data_types/int_uint.md)) — Address of instruction in a running process. **Returned value** - Symbol from ClickHouse object files. +- Empty string, if the address is not valid. Type: [String](../../data_types/string.md). @@ -106,9 +136,38 @@ SET allow_introspection_functions=1 Selecting the first string from the `trace_log` system table: +```sql +SELECT * FROM system.trace_log LIMIT 1 \G +``` +```text +Row 1: +────── +event_date: 2019-11-20 +event_time: 2019-11-20 16:57:59 +revision: 54429 +timer_type: Real +thread_number: 48 +query_id: 724028bf-f550-45aa-910d-2af6212b94ac +trace: [94138803686098,94138815010911,94138815096522,94138815101224,94138815102091,94138814222988,94138806823642,94138814457211,94138806823642,94138814457211,94138806823642,94138806795179,94138806796144,94138753770094,94138753771646,94138753760572,94138852407232,140399185266395,140399178045583] +``` + +The `trace` field contains the stack trace at the moment of sampling. + +Getting a symbol for a single address: + +```sql +SELECT addressToSymbol(94138803686098) \G +``` +```text +Row 1: +────── +addressToSymbol(94138803686098): _ZNK2DB24IAggregateFunctionHelperINS_20AggregateFunctionSumImmNS_24AggregateFunctionSumDataImEEEEE19addBatchSinglePlaceEmPcPPKNS_7IColumnEPNS_5ArenaE +``` + +Applying the function to the whole stack trace: + ```sql SELECT - *, arrayStringConcat(arrayMap(x -> addressToSymbol(x), trace), '\n') AS trace_symbols FROM system.trace_log LIMIT 1 @@ -121,13 +180,6 @@ The [arrayMap](higher_order_functions.md#higher_order_functions-array-map) funct ```text Row 1: ────── -event_date: 2019-11-20 -event_time: 2019-11-20 16:57:59 -revision: 54429 -timer_type: Real -thread_number: 48 -query_id: 724028bf-f550-45aa-910d-2af6212b94ac -trace: [94138803686098,94138815010911,94138815096522,94138815101224,94138815102091,94138814222988,94138806823642,94138814457211,94138806823642,94138814457211,94138806823642,94138806795179,94138806796144,94138753770094,94138753771646,94138753760572,94138852407232,140399185266395,140399178045583] trace_symbols: _ZNK2DB24IAggregateFunctionHelperINS_20AggregateFunctionSumImmNS_24AggregateFunctionSumDataImEEEEE19addBatchSinglePlaceEmPcPPKNS_7IColumnEPNS_5ArenaE _ZNK2DB10Aggregator21executeWithoutKeyImplERPcmPNS0_28AggregateFunctionInstructionEPNS_5ArenaE _ZN2DB10Aggregator14executeOnBlockESt6vectorIN3COWINS_7IColumnEE13immutable_ptrIS3_EESaIS6_EEmRNS_22AggregatedDataVariantsERS1_IPKS3_SaISC_EERS1_ISE_SaISE_EERb @@ -167,6 +219,7 @@ demangle(symbol) **Returned value** - Name of the C++ function. +- Empty string if a symbol is not valid. Type: [String](../../data_types/string.md). @@ -180,9 +233,38 @@ SET allow_introspection_functions=1 Selecting the first string from the `trace_log` system table: +```sql +SELECT * FROM system.trace_log LIMIT 1 \G +``` +```text +Row 1: +────── +event_date: 2019-11-20 +event_time: 2019-11-20 16:57:59 +revision: 54429 +timer_type: Real +thread_number: 48 +query_id: 724028bf-f550-45aa-910d-2af6212b94ac +trace: [94138803686098,94138815010911,94138815096522,94138815101224,94138815102091,94138814222988,94138806823642,94138814457211,94138806823642,94138814457211,94138806823642,94138806795179,94138806796144,94138753770094,94138753771646,94138753760572,94138852407232,140399185266395,140399178045583] +``` + +The `trace` field contains the stack trace at the moment of sampling. + +Getting a function name for a single address: + +```sql +SELECT demangle(addressToSymbol(94138803686098)) \G +``` +```text +Row 1: +────── +demangle(addressToSymbol(94138803686098)): DB::IAggregateFunctionHelper > >::addBatchSinglePlace(unsigned long, char*, DB::IColumn const**, DB::Arena*) const +``` + +Applying the function to the whole stack trace: + ```sql SELECT - *, arrayStringConcat(arrayMap(x -> demangle(addressToSymbol(x)), trace), '\n') AS trace_functions FROM system.trace_log LIMIT 1 @@ -191,17 +273,9 @@ LIMIT 1 The [arrayMap](higher_order_functions.md#higher_order_functions-array-map) function allows to process each individual element of the `trace` array by the `demangle` function. The result of this processing you see in the `trace_functions` column of output. - ```text Row 1: ────── -event_date: 2019-11-20 -event_time: 2019-11-20 16:57:59 -revision: 54429 -timer_type: Real -thread_number: 48 -query_id: 724028bf-f550-45aa-910d-2af6212b94ac -trace: [94138803686098,94138815010911,94138815096522,94138815101224,94138815102091,94138814222988,94138806823642,94138814457211,94138806823642,94138814457211,94138806823642,94138806795179,94138806796144,94138753770094,94138753771646,94138753760572,94138852407232,140399185266395,140399178045583] trace_functions: DB::IAggregateFunctionHelper > >::addBatchSinglePlace(unsigned long, char*, DB::IColumn const**, DB::Arena*) const DB::Aggregator::executeWithoutKeyImpl(char*&, unsigned long, DB::Aggregator::AggregateFunctionInstruction*, DB::Arena*) const DB::Aggregator::executeOnBlock(std::vector::immutable_ptr, std::allocator::immutable_ptr > >, unsigned long, DB::AggregatedDataVariants&, std::vector >&, std::vector >, std::allocator > > >&, bool&) From f0e9715f108380687e04fb14b7bf2632adec8174 Mon Sep 17 00:00:00 2001 From: alesapin Date: Tue, 3 Dec 2019 20:36:02 +0300 Subject: [PATCH 57/62] Fix test --- dbms/tests/integration/test_storage_s3/test.py | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/dbms/tests/integration/test_storage_s3/test.py b/dbms/tests/integration/test_storage_s3/test.py index 27280c4784d..6fbf25f8fa8 100644 --- a/dbms/tests/integration/test_storage_s3/test.py +++ b/dbms/tests/integration/test_storage_s3/test.py @@ -86,6 +86,7 @@ def get_nginx_access_logs(): def cluster(): try: cluster = ClickHouseCluster(__file__) + cluster.add_instance("restricted_dummy", main_configs=["configs/config_for_test_remote_host_filter.xml"], with_minio=True) cluster.add_instance("dummy", with_minio=True) logging.info("Starting cluster...") cluster.start() @@ -233,14 +234,14 @@ def test_multipart_put(cluster, maybe_auth, positive): assert csv_data == get_s3_file_content(cluster, bucket, filename) -def test_remote_host_filter(started_cluster): - instance = started_cluster.instances["dummy"] + +def test_remote_host_filter(cluster): + instance = cluster.instances["restricted_dummy"] format = "column1 UInt32, column2 UInt32, column3 UInt32" - put_communication_data(started_cluster, "=== RemoteHostFilter test ===") - query = "select *, column1*column2*column3 from s3('http://{}:{}/', 'CSV', '{}')".format("invalid_host", started_cluster.redirecting_to_http_port, format) + query = "select *, column1*column2*column3 from s3('http://{}:{}/', 'CSV', '{}')".format("invalid_host", cluster.minio_redirect_port, format) assert "not allowed in config.xml" in instance.query_and_get_error(query) other_values = "(1, 1, 1), (1, 1, 1), (11, 11, 11)" - query = "insert into table function s3('http://{}:{}/{}/test.csv', 'CSV', '{}') values {}".format("invalid_host", started_cluster.redirecting_preserving_data_port, started_cluster.bucket, format, other_values) + query = "insert into table function s3('http://{}:{}/{}/test.csv', 'CSV', '{}') values {}".format("invalid_host", cluster.minio_port, cluster.minio_bucket, format, other_values) assert "not allowed in config.xml" in instance.query_and_get_error(query) From fa4728d44f0ed5027d5176833dc395f1dd1eae15 Mon Sep 17 00:00:00 2001 From: alesapin Date: Tue, 3 Dec 2019 20:57:00 +0300 Subject: [PATCH 58/62] Fix style --- dbms/src/IO/ReadBufferFromS3.h | 2 +- dbms/src/Storages/StorageS3.cpp | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/dbms/src/IO/ReadBufferFromS3.h b/dbms/src/IO/ReadBufferFromS3.h index 0e9dcb95a62..debe5211a0c 100644 --- a/dbms/src/IO/ReadBufferFromS3.h +++ b/dbms/src/IO/ReadBufferFromS3.h @@ -28,7 +28,7 @@ public: const String & access_key_id_, const String & secret_access_key_, const ConnectionTimeouts & timeouts = {}, - const RemoteHostFilter & remote_host_filter_ = {}); + const RemoteHostFilter & remote_host_filter_ = {}); bool nextImpl() override; }; diff --git a/dbms/src/Storages/StorageS3.cpp b/dbms/src/Storages/StorageS3.cpp index 65d9f41d23c..e9dacf4ff4d 100644 --- a/dbms/src/Storages/StorageS3.cpp +++ b/dbms/src/Storages/StorageS3.cpp @@ -99,7 +99,7 @@ namespace secret_access_key, min_upload_part_size, timeouts, - context.getRemoteHostFilter()); + context.getRemoteHostFilter()); writer = FormatFactory::instance().getOutput(format, *write_buf, sample_block, context); } From f737f222b2cceb7f4d57ddb2faa8d0274c234f64 Mon Sep 17 00:00:00 2001 From: Ivan Blinkov Date: Wed, 4 Dec 2019 12:03:50 +0300 Subject: [PATCH 59/62] Remove link to past meetup --- README.md | 1 - 1 file changed, 1 deletion(-) diff --git a/README.md b/README.md index ae4abb10941..5d9faa11fbe 100644 --- a/README.md +++ b/README.md @@ -14,5 +14,4 @@ ClickHouse is an open-source column-oriented database management system that all ## Upcoming Events -* [ClickHouse Meetup in San Francisco](https://www.eventbrite.com/e/clickhouse-december-meetup-registration-78642047481) on December 3. * [ClickHouse Meetup in Moscow](https://yandex.ru/promo/clickhouse/moscow-december-2019) on December 11. From ace44e07170a94641690e3904edf230d3789ef42 Mon Sep 17 00:00:00 2001 From: alesapin Date: Wed, 4 Dec 2019 12:34:05 +0300 Subject: [PATCH 60/62] Add test for incorrect settings --- .../queries/0_stateless/01039_test_setting_parse.reference | 2 ++ .../tests/queries/0_stateless/01039_test_setting_parse.sql | 7 +++++++ 2 files changed, 9 insertions(+) create mode 100644 dbms/tests/queries/0_stateless/01039_test_setting_parse.reference create mode 100644 dbms/tests/queries/0_stateless/01039_test_setting_parse.sql diff --git a/dbms/tests/queries/0_stateless/01039_test_setting_parse.reference b/dbms/tests/queries/0_stateless/01039_test_setting_parse.reference new file mode 100644 index 00000000000..30237035c2c --- /dev/null +++ b/dbms/tests/queries/0_stateless/01039_test_setting_parse.reference @@ -0,0 +1,2 @@ +10000000001 +10000000001 diff --git a/dbms/tests/queries/0_stateless/01039_test_setting_parse.sql b/dbms/tests/queries/0_stateless/01039_test_setting_parse.sql new file mode 100644 index 00000000000..494e43b001f --- /dev/null +++ b/dbms/tests/queries/0_stateless/01039_test_setting_parse.sql @@ -0,0 +1,7 @@ +SET max_memory_usage = 10000000001; + +SELECT value FROM system.settings WHERE name = 'max_memory_usage'; + +SET max_memory_usage = '1G'; -- { serverError 27 } + +SELECT value FROM system.settings WHERE name = 'max_memory_usage'; From d1d3bac7441f8d8be45303f74d4ca0766ed3d2df Mon Sep 17 00:00:00 2001 From: alesapin Date: Wed, 4 Dec 2019 12:48:08 +0300 Subject: [PATCH 61/62] Better --- dbms/src/IO/ReadHelpers.h | 15 +++++---------- 1 file changed, 5 insertions(+), 10 deletions(-) diff --git a/dbms/src/IO/ReadHelpers.h b/dbms/src/IO/ReadHelpers.h index c7568fff4ba..eea3498e39e 100644 --- a/dbms/src/IO/ReadHelpers.h +++ b/dbms/src/IO/ReadHelpers.h @@ -884,23 +884,18 @@ inline T parse(const char * data, size_t size) return res; } +/// Read something from text format, but expect complete parse of given text +/// For example: 723145 -- ok, 213MB -- not ok template -std::enable_if_t, T> -inline completeParse(const char * data, size_t size) +inline T completeParse(const char * data, size_t size) { T res; ReadBufferFromMemory buf(data, size); - completeReadIntTextImpl(res, buf); + readText(res, buf); + assertEOF(buf); return res; } -template -std::enable_if_t, T> -inline completeParse(const char * data, size_t size) -{ - return parse(data, size); -} - template inline T completeParse(const String & s) { From eb17d51dd44caf008b77494a8f76c2d023bcb473 Mon Sep 17 00:00:00 2001 From: alesapin Date: Wed, 4 Dec 2019 12:49:54 +0300 Subject: [PATCH 62/62] Remove unused function --- dbms/src/IO/ReadHelpers.h | 7 ------- 1 file changed, 7 deletions(-) diff --git a/dbms/src/IO/ReadHelpers.h b/dbms/src/IO/ReadHelpers.h index eea3498e39e..19c33f5a83d 100644 --- a/dbms/src/IO/ReadHelpers.h +++ b/dbms/src/IO/ReadHelpers.h @@ -308,13 +308,6 @@ ReturnType readIntTextImpl(T & x, ReadBuffer & buf) return ReturnType(true); } -template -void completeReadIntTextImpl(T & x, ReadBuffer & buf) -{ - readIntTextImpl(x, buf); - assertEOF(buf); -} - template void readIntText(T & x, ReadBuffer & buf) {