Merge branch 'master' of github.com:ClickHouse/ClickHouse

This commit is contained in:
Sergei Shtykov 2019-12-04 16:38:19 +03:00
commit b7794a1d36
34 changed files with 673 additions and 21 deletions

View File

@ -14,5 +14,4 @@ ClickHouse is an open-source column-oriented database management system that all
## Upcoming Events
* [ClickHouse Meetup in San Francisco](https://www.eventbrite.com/e/clickhouse-december-meetup-registration-78642047481) on December 3.
* [ClickHouse Meetup in Moscow](https://yandex.ru/promo/clickhouse/moscow-december-2019) on December 11.

View File

@ -243,6 +243,8 @@ int Server::main(const std::vector<std::string> & /*args*/)
}
#endif
global_context->setRemoteHostFilter(config());
std::string path = getCanonicalPath(config().getString("path", DBMS_DEFAULT_PATH));
std::string default_database = config().getString("default_database", "default");

View File

@ -3,6 +3,25 @@
NOTE: User and query level settings are set up in "users.xml" file.
-->
<yandex>
<!-- The list of hosts allowed to use in URL-related storage engines and table functions.
If this section is not present in configuration, all hosts are allowed.
-->
<remote_url_allow_hosts>
<!-- Host should be specified exactly as in URL. The name is checked before DNS resolution.
Example: "yandex.ru", "yandex.ru." and "www.yandex.ru" are different hosts.
If port is explicitly specified in URL, the host:port is checked as a whole.
If host specified here without port, any port with this host allowed.
"yandex.ru" -> "yandex.ru:443", "yandex.ru:80" etc. is allowed, but "yandex.ru:80" -> only "yandex.ru:80" is allowed.
If the host is specified as IP address, it is checked as specified in URL. Example: "[2a02:6b8:a::a]".
If there are redirects and support for redirects is enabled, every redirect (the Location field) is checked.
-->
<!-- Regular expression can be specified. RE2 engine is used for regexps.
Regexps are not aligned: don't forget to add ^ and $. Also don't forget to escape dot (.) metacharacter
(forgetting to do so is a common source of error).
-->
</remote_url_allow_hosts>
<logger>
<!-- Possible levels: https://github.com/pocoproject/poco/blob/develop/Foundation/include/Poco/Logger.h#L105 -->
<level>trace</level>
@ -15,7 +34,6 @@
<!--display_name>production</display_name--> <!-- It is the name that will be shown in the client -->
<http_port>8123</http_port>
<tcp_port>9000</tcp_port>
<!-- For HTTPS and SSL over native protocol. -->
<!--
<https_port>8443</https_port>

View File

@ -465,6 +465,7 @@ namespace ErrorCodes
extern const int UNKNOWN_DICTIONARY = 488;
extern const int INCORRECT_DICTIONARY_DEFINITION = 489;
extern const int CANNOT_FORMAT_DATETIME = 490;
extern const int UNACCEPTABLE_URL = 491;
extern const int KEEPER_EXCEPTION = 999;
extern const int POCO_EXCEPTION = 1000;

View File

@ -0,0 +1,62 @@
#include <re2/re2.h>
#include <Common/RemoteHostFilter.h>
#include <Poco/URI.h>
#include <Formats/FormatFactory.h>
#include <Poco/Util/AbstractConfiguration.h>
#include <Common/StringUtils/StringUtils.h>
#include <Common/Exception.h>
#include <IO/WriteHelpers.h>
namespace DB
{
namespace ErrorCodes
{
extern const int UNACCEPTABLE_URL;
}
void RemoteHostFilter::checkURL(const Poco::URI & uri) const
{
if (!checkForDirectEntry(uri.getHost()) &&
!checkForDirectEntry(uri.getHost() + ":" + toString(uri.getPort())))
throw Exception("URL \"" + uri.toString() + "\" is not allowed in config.xml", ErrorCodes::UNACCEPTABLE_URL);
}
void RemoteHostFilter::checkHostAndPort(const std::string & host, const std::string & port) const
{
if (!checkForDirectEntry(host) &&
!checkForDirectEntry(host + ":" + port))
throw Exception("URL \"" + host + ":" + port + "\" is not allowed in config.xml", ErrorCodes::UNACCEPTABLE_URL);
}
void RemoteHostFilter::setValuesFromConfig(const Poco::Util::AbstractConfiguration & config)
{
if (config.has("remote_url_allow_hosts"))
{
std::vector<std::string> keys;
config.keys("remote_url_allow_hosts", keys);
for (auto key : keys)
{
if (startsWith(key, "host_regexp"))
regexp_hosts.push_back(config.getString("remote_url_allow_hosts." + key));
else if (startsWith(key, "host"))
primary_hosts.insert(config.getString("remote_url_allow_hosts." + key));
}
}
}
bool RemoteHostFilter::checkForDirectEntry(const std::string & str) const
{
if (!primary_hosts.empty() || !regexp_hosts.empty())
{
if (primary_hosts.find(str) == primary_hosts.end())
{
for (size_t i = 0; i < regexp_hosts.size(); ++i)
if (re2::RE2::FullMatch(str, regexp_hosts[i]))
return true;
return false;
}
return true;
}
return true;
}
}

View File

@ -0,0 +1,30 @@
#pragma once
#include <vector>
#include <unordered_set>
#include <Poco/URI.h>
#include <Poco/Util/AbstractConfiguration.h>
namespace DB
{
class RemoteHostFilter
{
/**
* This class checks if url is allowed.
* If primary_hosts and regexp_hosts are empty all urls are allowed.
*/
public:
void checkURL(const Poco::URI & uri) const; /// If URL not allowed in config.xml throw UNACCEPTABLE_URL Exception
void setValuesFromConfig(const Poco::Util::AbstractConfiguration & config);
void checkHostAndPort(const std::string & host, const std::string & port) const; /// Does the same as checkURL, but for host and port.
private:
std::unordered_set<std::string> primary_hosts; /// Allowed primary (<host>) URL from config.xml
std::vector<std::string> regexp_hosts; /// Allowed regexp (<hots_regexp>) URL from config.xml
bool checkForDirectEntry(const std::string & str) const; /// Checks if the primary_hosts and regexp_hosts contain str. If primary_hosts and regexp_hosts are empty return true.
};
}

View File

@ -62,7 +62,7 @@ void SettingNumber<Type>::set(const Field & x)
template <typename Type>
void SettingNumber<Type>::set(const String & x)
{
set(parse<Type>(x));
set(completeParse<Type>(x));
}
template <>

View File

@ -14,10 +14,12 @@ const int DEFAULT_S3_MAX_FOLLOW_GET_REDIRECT = 2;
ReadBufferFromS3::ReadBufferFromS3(const Poco::URI & uri_,
const String & access_key_id_,
const String & secret_access_key_,
const ConnectionTimeouts & timeouts)
const ConnectionTimeouts & timeouts,
const RemoteHostFilter & remote_host_filter_)
: ReadBuffer(nullptr, 0)
, uri {uri_}
, session {makeHTTPSession(uri_, timeouts)}
, remote_host_filter {remote_host_filter_}
{
Poco::Net::HTTPResponse response;
std::unique_ptr<Poco::Net::HTTPRequest> request;
@ -52,6 +54,7 @@ ReadBufferFromS3::ReadBufferFromS3(const Poco::URI & uri_,
break;
uri = location_iterator->second;
remote_host_filter.checkURL(uri);
session = makeHTTPSession(uri, timeouts);
}

View File

@ -21,11 +21,14 @@ protected:
std::istream * istr; /// owned by session
std::unique_ptr<ReadBuffer> impl;
RemoteHostFilter remote_host_filter;
public:
explicit ReadBufferFromS3(const Poco::URI & uri_,
const String & access_key_id_,
const String & secret_access_key_,
const ConnectionTimeouts & timeouts = {});
const ConnectionTimeouts & timeouts = {},
const RemoteHostFilter & remote_host_filter_ = {});
bool nextImpl() override;
};

View File

@ -877,6 +877,30 @@ inline T parse(const char * data, size_t size)
return res;
}
/// Read something from text format, but expect complete parse of given text
/// For example: 723145 -- ok, 213MB -- not ok
template <typename T>
inline T completeParse(const char * data, size_t size)
{
T res;
ReadBufferFromMemory buf(data, size);
readText(res, buf);
assertEOF(buf);
return res;
}
template <typename T>
inline T completeParse(const String & s)
{
return completeParse<T>(s.data(), s.size());
}
template <typename T>
inline T completeParse(const char * data)
{
return completeParse<T>(data, strlen(data));
}
template <typename T>
inline T parse(const char * data)
{
@ -916,12 +940,12 @@ void skipToUnescapedNextLineOrEOF(ReadBuffer & buf);
template <class TReadBuffer, class... Types>
std::unique_ptr<ReadBuffer> getReadBuffer(const DB::CompressionMethod method, Types&&... args)
{
if (method == DB::CompressionMethod::Gzip)
{
if (method == DB::CompressionMethod::Gzip)
{
auto read_buf = std::make_unique<TReadBuffer>(std::forward<Types>(args)...);
return std::make_unique<ZlibInflatingReadBuffer>(std::move(read_buf), method);
}
return std::make_unique<TReadBuffer>(args...);
}
return std::make_unique<TReadBuffer>(args...);
}
/** This function just copies the data from buffer's internal position (in.position())

View File

@ -101,6 +101,7 @@ namespace detail
const Poco::Net::HTTPBasicCredentials & credentials;
std::vector<Poco::Net::HTTPCookie> cookies;
HTTPHeaderEntries http_header_entries;
RemoteHostFilter remote_host_filter;
std::istream * call(const Poco::URI uri_, Poco::Net::HTTPResponse & response)
{
@ -157,7 +158,8 @@ namespace detail
OutStreamCallback out_stream_callback_ = {},
const Poco::Net::HTTPBasicCredentials & credentials_ = {},
size_t buffer_size_ = DBMS_DEFAULT_BUFFER_SIZE,
HTTPHeaderEntries http_header_entries_ = {})
HTTPHeaderEntries http_header_entries_ = {},
const RemoteHostFilter & remote_host_filter_ = {})
: ReadBuffer(nullptr, 0)
, uri {uri_}
, method {!method_.empty() ? method_ : out_stream_callback_ ? Poco::Net::HTTPRequest::HTTP_POST : Poco::Net::HTTPRequest::HTTP_GET}
@ -165,6 +167,7 @@ namespace detail
, out_stream_callback {out_stream_callback_}
, credentials {credentials_}
, http_header_entries {http_header_entries_}
, remote_host_filter {remote_host_filter_}
{
Poco::Net::HTTPResponse response;
@ -173,6 +176,7 @@ namespace detail
while (isRedirect(response.getStatus()))
{
Poco::URI uri_redirect(response.get("Location"));
remote_host_filter.checkURL(uri_redirect);
session->updateSession(uri_redirect);
@ -243,8 +247,9 @@ public:
const DB::SettingUInt64 max_redirects = 0,
const Poco::Net::HTTPBasicCredentials & credentials_ = {},
size_t buffer_size_ = DBMS_DEFAULT_BUFFER_SIZE,
const HTTPHeaderEntries & http_header_entries_ = {})
: Parent(std::make_shared<UpdatableSession>(uri_, timeouts, max_redirects), uri_, method_, out_stream_callback_, credentials_, buffer_size_, http_header_entries_)
const HTTPHeaderEntries & http_header_entries_ = {},
const RemoteHostFilter & remote_host_filter_ = {})
: Parent(std::make_shared<UpdatableSession>(uri_, timeouts, max_redirects), uri_, method_, out_stream_callback_, credentials_, buffer_size_, http_header_entries_, remote_host_filter_)
{
}
};

View File

@ -34,7 +34,8 @@ WriteBufferFromS3::WriteBufferFromS3(
const String & access_key_id_,
const String & secret_access_key_,
size_t minimum_upload_part_size_,
const ConnectionTimeouts & timeouts_)
const ConnectionTimeouts & timeouts_,
const RemoteHostFilter & remote_host_filter_)
: BufferWithOwnMemory<WriteBuffer>(DBMS_DEFAULT_BUFFER_SIZE, nullptr, 0)
, uri {uri_}
, access_key_id {access_key_id_}
@ -43,6 +44,7 @@ WriteBufferFromS3::WriteBufferFromS3(
, timeouts {timeouts_}
, temporary_buffer {std::make_unique<WriteBufferFromString>(buffer_string)}
, last_part_size {0}
, remote_host_filter(remote_host_filter_)
{
initiate();
@ -134,6 +136,7 @@ void WriteBufferFromS3::initiate()
break;
initiate_uri = location_iterator->second;
remote_host_filter.checkURL(initiate_uri);
}
assertResponseIsOk(*request_ptr, response, *istr);

View File

@ -28,6 +28,7 @@ private:
String buffer_string;
std::unique_ptr<WriteBufferFromString> temporary_buffer;
size_t last_part_size;
RemoteHostFilter remote_host_filter;
/// Upload in S3 is made in parts.
/// We initiate upload, then upload each part and get ETag as a response, and then finish upload with listing all our parts.
@ -39,7 +40,8 @@ public:
const String & access_key_id,
const String & secret_access_key,
size_t minimum_upload_part_size_,
const ConnectionTimeouts & timeouts = {});
const ConnectionTimeouts & timeouts = {},
const RemoteHostFilter & remote_host_filter_ = {});
void nextImpl() override;

View File

@ -53,7 +53,7 @@
#include <Common/ShellCommand.h>
#include <Common/TraceCollector.h>
#include <common/logger_useful.h>
#include <Common/RemoteHostFilter.h>
namespace ProfileEvents
{
@ -158,8 +158,9 @@ struct ContextShared
ActionLocksManagerPtr action_locks_manager; /// Set of storages' action lockers
std::optional<SystemLogs> system_logs; /// Used to log queries and operations on parts
std::unique_ptr<TraceCollector> trace_collector; /// Thread collecting traces from threads executing queries
RemoteHostFilter remote_host_filter; /// Allowed URL from config.xml
std::unique_ptr<TraceCollector> trace_collector; /// Thread collecting traces from threads executing queries
/// Named sessions. The user could specify session identifier to reuse settings and temporary tables in subsequent requests.
class SessionKeyHash
@ -1583,6 +1584,16 @@ String Context::getInterserverScheme() const
return shared->interserver_scheme;
}
void Context::setRemoteHostFilter(const Poco::Util::AbstractConfiguration & config)
{
shared->remote_host_filter.setValuesFromConfig(config);
}
const RemoteHostFilter & Context::getRemoteHostFilter() const
{
return shared->remote_host_filter;
}
UInt16 Context::getTCPPort() const
{
auto lock = getLock();

View File

@ -22,6 +22,7 @@
#include <mutex>
#include <optional>
#include <thread>
#include <Common/RemoteHostFilter.h>
namespace Poco
@ -77,6 +78,7 @@ using ActionLocksManagerPtr = std::shared_ptr<ActionLocksManager>;
class ShellCommand;
class ICompressionCodec;
class SettingsConstraints;
class RemoteHostFilter;
class IOutputFormat;
using OutputFormatPtr = std::shared_ptr<IOutputFormat>;
@ -354,6 +356,10 @@ public:
void setInterserverScheme(const String & scheme);
String getInterserverScheme() const;
/// Storage of allowed hosts from config.xml
void setRemoteHostFilter(const Poco::Util::AbstractConfiguration & config);
const RemoteHostFilter & getRemoteHostFilter() const;
/// The port that the server listens for executing SQL queries.
UInt16 getTCPPort() const;

View File

@ -49,6 +49,7 @@ StorageHDFS::StorageHDFS(const String & uri_,
, context(context_)
, compression_method(compression_method_)
{
context.getRemoteHostFilter().checkURL(Poco::URI(uri));
setColumns(columns_);
setConstraints(constraints_);
}

View File

@ -43,7 +43,7 @@ namespace
const CompressionMethod compression_method)
: name(name_)
{
read_buf = getReadBuffer<ReadBufferFromS3>(compression_method, uri, access_key_id, secret_access_key, timeouts);
read_buf = getReadBuffer<ReadBufferFromS3>(compression_method, uri, access_key_id, secret_access_key, timeouts, context.getRemoteHostFilter());
reader = FormatFactory::instance().getInput(format, *read_buf, sample_block, context, max_block_size);
}
@ -98,7 +98,8 @@ namespace
access_key_id,
secret_access_key,
min_upload_part_size,
timeouts);
timeouts,
context.getRemoteHostFilter());
writer = FormatFactory::instance().getOutput(format, *write_buf, sample_block, context);
}
@ -155,6 +156,7 @@ StorageS3::StorageS3(
, min_upload_part_size(min_upload_part_size_)
, compression_method(compression_method_)
{
context_global.getRemoteHostFilter().checkURL(uri_);
setColumns(columns_);
setConstraints(constraints_);
}

View File

@ -24,6 +24,7 @@ namespace DB
namespace ErrorCodes
{
extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
extern const int UNACCEPTABLE_URL;
}
IStorageURLBase::IStorageURLBase(
@ -37,6 +38,7 @@ IStorageURLBase::IStorageURLBase(
const String & compression_method_)
: uri(uri_), context_global(context_), compression_method(compression_method_), format_name(format_name_), table_name(table_name_), database_name(database_name_)
{
context_global.getRemoteHostFilter().checkURL(uri);
setColumns(columns_);
setConstraints(constraints_);
}
@ -58,7 +60,18 @@ namespace
const CompressionMethod compression_method)
: name(name_)
{
read_buf = getReadBuffer<ReadWriteBufferFromHTTP>(compression_method, uri, method, callback, timeouts, context.getSettingsRef().max_http_get_redirects);
read_buf = getReadBuffer<ReadWriteBufferFromHTTP>(
compression_method,
uri,
method,
callback,
timeouts,
context.getSettingsRef().max_http_get_redirects,
Poco::Net::HTTPBasicCredentials{},
DBMS_DEFAULT_BUFFER_SIZE,
ReadWriteBufferFromHTTP::HTTPHeaderEntries{},
context.getRemoteHostFilter());
reader = FormatFactory::instance().getInput(format, *read_buf, sample_block, context, max_block_size);
}

View File

@ -155,6 +155,20 @@ StoragePtr TableFunctionRemote::executeImpl(const ASTPtr & ast_function, const C
throw Exception("Shard list is empty after parsing first argument", ErrorCodes::BAD_ARGUMENTS);
auto maybe_secure_port = context.getTCPPortSecure();
/// Check host and port on affiliation allowed hosts.
for (auto hosts : names)
{
for (auto host : hosts)
{
size_t colon = host.find(':');
if (colon == String::npos)
context.getRemoteHostFilter().checkHostAndPort(host, toString((secure ? (maybe_secure_port ? *maybe_secure_port : DBMS_DEFAULT_SECURE_PORT) : context.getTCPPort())));
else
context.getRemoteHostFilter().checkHostAndPort(host.substr(0, colon), host.substr(colon + 1));
}
}
cluster = std::make_shared<Cluster>(context.getSettings(), names, username, password, (secure ? (maybe_secure_port ? *maybe_secure_port : DBMS_DEFAULT_SECURE_PORT) : context.getTCPPort()), false, secure);
}

View File

@ -0,0 +1,5 @@
<yandex>
<remote_url_allow_hosts>
<host>hdfs1:50070</host>
</remote_url_allow_hosts>
</yandex>

View File

@ -0,0 +1,10 @@
<yandex>
<remote_url_allow_hosts>
<host>localhost:9000</host>
<host>localhost:9440</host>
<host>example01-01-1</host>
<host>example01-01-2</host>
<host>example01-02-1</host>
<host>example01-02-2</host>
</remote_url_allow_hosts>
</yandex>

View File

@ -0,0 +1,7 @@
<yandex>
<remote_url_allow_hosts>
<host>host:80</host>
<host_regexp>^[a-z]*\.ru$</host_regexp>
</remote_url_allow_hosts>
</yandex>

View File

@ -0,0 +1,8 @@
<yandex>
<remote_url_allow_hosts>
<host>host:80</host>
<host>host:123</host>
<host>yandex.ru</host>
</remote_url_allow_hosts>
</yandex>

View File

@ -0,0 +1,7 @@
<yandex>
<remote_url_allow_hosts>
<host_regexp>^[a-z]*:80$</host_regexp>
<host_regexp>^[a-z]*\.ru$</host_regexp>
</remote_url_allow_hosts>
</yandex>

View File

@ -0,0 +1,5 @@
<yandex>
<remote_url_allow_hosts>
</remote_url_allow_hosts>
</yandex>

View File

@ -0,0 +1,74 @@
import time
import pytest
from helpers.hdfs_api import HDFSApi
from helpers.cluster import ClickHouseCluster
cluster = ClickHouseCluster(__file__)
node1 = cluster.add_instance('node1', main_configs=['configs/config_with_hosts.xml'])
node2 = cluster.add_instance('node2', main_configs=['configs/config_with_only_primary_hosts.xml'])
node3 = cluster.add_instance('node3', main_configs=['configs/config_with_only_regexp_hosts.xml'])
node4 = cluster.add_instance('node4', main_configs=['configs/config_without_allowed_hosts.xml'])
node6 = cluster.add_instance('node6', main_configs=['configs/config_for_remote.xml'])
node7 = cluster.add_instance('node7', main_configs=['configs/config_for_redirect.xml'], with_hdfs=True)
@pytest.fixture(scope="module")
def start_cluster():
try:
cluster.start()
yield cluster
finally:
cluster.shutdown()
def test_config_with_hosts(start_cluster):
assert node1.query("CREATE TABLE table_test_1_1 (word String) Engine=URL('http://host:80', HDFS)") == ""
assert node1.query("CREATE TABLE table_test_1_2 (word String) Engine=URL('https://yandex.ru', CSV)") == ""
assert "not allowed" in node1.query_and_get_error("CREATE TABLE table_test_1_4 (word String) Engine=URL('https://host:123', S3)")
assert "not allowed" in node1.query_and_get_error("CREATE TABLE table_test_1_4 (word String) Engine=URL('https://yandex2.ru', CSV)")
def test_config_with_only_primary_hosts(start_cluster):
assert node2.query("CREATE TABLE table_test_2_1 (word String) Engine=URL('https://host:80', CSV)") == ""
assert node2.query("CREATE TABLE table_test_2_2 (word String) Engine=URL('https://host:123', S3)") == ""
assert node2.query("CREATE TABLE table_test_2_3 (word String) Engine=URL('https://yandex.ru', CSV)") == ""
assert node2.query("CREATE TABLE table_test_2_4 (word String) Engine=URL('https://yandex.ru:87', HDFS)") == ""
assert "not allowed" in node2.query_and_get_error("CREATE TABLE table_test_2_5 (word String) Engine=URL('https://host', HDFS)")
assert "not allowed" in node2.query_and_get_error("CREATE TABLE table_test_2_5 (word String) Engine=URL('https://host:234', CSV)")
assert "not allowed" in node2.query_and_get_error("CREATE TABLE table_test_2_6 (word String) Engine=URL('https://yandex2.ru', S3)")
def test_config_with_only_regexp_hosts(start_cluster):
assert node3.query("CREATE TABLE table_test_3_1 (word String) Engine=URL('https://host:80', HDFS)") == ""
assert node3.query("CREATE TABLE table_test_3_2 (word String) Engine=URL('https://yandex.ru', CSV)") == ""
assert "not allowed" in node3.query_and_get_error("CREATE TABLE table_test_3_3 (word String) Engine=URL('https://host', CSV)")
assert "not allowed" in node3.query_and_get_error("CREATE TABLE table_test_3_4 (word String) Engine=URL('https://yandex2.ru', S3)")
def test_config_without_allowed_hosts(start_cluster):
assert node4.query("CREATE TABLE table_test_4_1 (word String) Engine=URL('https://host:80', CSV)") == ""
assert node4.query("CREATE TABLE table_test_4_2 (word String) Engine=URL('https://host', HDFS)") == ""
assert node4.query("CREATE TABLE table_test_4_3 (word String) Engine=URL('https://yandex.ru', CSV)") == ""
assert node4.query("CREATE TABLE table_test_4_4 (word String) Engine=URL('ftp://something.com', S3)") == ""
def test_table_function_remote(start_cluster):
assert node6.query("SELECT * FROM remote('localhost', system, events)") != ""
assert node6.query("SELECT * FROM remoteSecure('localhost', system, metrics)") != ""
assert "URL \"localhost:800\" is not allowed in config.xml" in node6.query_and_get_error("SELECT * FROM remoteSecure('localhost:800', system, events)")
assert "URL \"localhost:800\" is not allowed in config.xml" in node6.query_and_get_error("SELECT * FROM remote('localhost:800', system, metrics)")
assert "not allowed in config.xml" not in node6.query_and_get_error("SELECT * FROM remoteSecure('example01-01-1,example01-02-1', system, events)")
assert "not allowed in config.xml" not in node6.query_and_get_error("SELECT * FROM remote('example01-0{1,2}-1', system, events")
assert "not allowed in config.xml" not in node6.query_and_get_error("SELECT * FROM remoteSecure('example01-01-{1|2}', system, events)")
assert "not allowed in config.xml" not in node6.query_and_get_error("SELECT * FROM remote('example01-0{1,2}-{1|2}', system, events)")
assert "not allowed in config.xml" not in node6.query_and_get_error("SELECT * FROM remoteSecure('example01-{01..02}-{1|2}', system, events)")
assert "not allowed" in node6.query_and_get_error("SELECT * FROM remoteSecure('example01-01-1,example01-03-1', system, events)")
assert "not allowed" in node6.query_and_get_error("SELECT * FROM remote('example01-01-{1|3}', system, events)")
assert "not allowed" in node6.query_and_get_error("SELECT * FROM remoteSecure('example01-0{1,3}-1', system, metrics)")
def test_redirect(start_cluster):
hdfs_api = HDFSApi("root")
hdfs_api.write_data("/simple_storage", "1\t\n")
assert hdfs_api.read_data("/simple_storage") == "1\t\n"
node7.query("CREATE TABLE table_test_7_1 (word String) ENGINE=URL('http://hdfs1:50070/webhdfs/v1/simple_storage?op=OPEN&namenoderpcaddress=hdfs1:9000&offset=0', CSV)")
assert "not allowed" in node7.query_and_get_error("SET max_http_get_redirects=1; SELECT * from table_test_7_1")
def test_HDFS(start_cluster):
assert "not allowed" in node7.query_and_get_error("CREATE TABLE table_test_7_2 (word String) ENGINE=HDFS('http://hdfs1:50075/webhdfs/v1/simple_storage?op=OPEN&namenoderpcaddress=hdfs1:9000&offset=0', 'CSV')")
assert "not allowed" in node7.query_and_get_error("SELECT * FROM hdfs('http://hdfs1:50075/webhdfs/v1/simple_storage?op=OPEN&namenoderpcaddress=hdfs1:9000&offset=0', 'TSV', 'word String')")

View File

@ -0,0 +1,5 @@
<yandex>
<remote_url_allow_hosts>
<host_regexp>^((25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)(\.|$)){4}$</host_regexp>
</remote_url_allow_hosts>
</yandex>

View File

@ -86,6 +86,7 @@ def get_nginx_access_logs():
def cluster():
try:
cluster = ClickHouseCluster(__file__)
cluster.add_instance("restricted_dummy", main_configs=["configs/config_for_test_remote_host_filter.xml"], with_minio=True)
cluster.add_instance("dummy", with_minio=True)
logging.info("Starting cluster...")
cluster.start()
@ -232,3 +233,15 @@ def test_multipart_put(cluster, maybe_auth, positive):
assert uploaded_parts > 1
assert csv_data == get_s3_file_content(cluster, bucket, filename)
def test_remote_host_filter(cluster):
instance = cluster.instances["restricted_dummy"]
format = "column1 UInt32, column2 UInt32, column3 UInt32"
query = "select *, column1*column2*column3 from s3('http://{}:{}/', 'CSV', '{}')".format("invalid_host", cluster.minio_redirect_port, format)
assert "not allowed in config.xml" in instance.query_and_get_error(query)
other_values = "(1, 1, 1), (1, 1, 1), (11, 11, 11)"
query = "insert into table function s3('http://{}:{}/{}/test.csv', 'CSV', '{}') values {}".format("invalid_host", cluster.minio_port, cluster.minio_bucket, format, other_values)
assert "not allowed in config.xml" in instance.query_and_get_error(query)

View File

@ -0,0 +1,2 @@
10000000001
10000000001

View File

@ -0,0 +1,7 @@
SET max_memory_usage = 10000000001;
SELECT value FROM system.settings WHERE name = 'max_memory_usage';
SET max_memory_usage = '1G'; -- { serverError 27 }
SELECT value FROM system.settings WHERE name = 'max_memory_usage';

View File

@ -1022,6 +1022,17 @@ Lower values mean higher priority. Threads with low `nice` priority values are e
Default value: 0.
## allow_introspection_functions {#settings-allow_introspection_functions}
Enables of disables [introspections functions](../../query_language/functions/introspection.md) for query profiling.
Possible values:
- 1 — Introspection functions enabled.
- 0 — Introspection functions disabled.
Default value: 0.
## input_format_parallel_parsing
- Type: bool

View File

@ -0,0 +1,298 @@
# Introspection Functions
You can use functions described in this chapter to introspect [ELF](https://en.wikipedia.org/wiki/Executable_and_Linkable_Format) and [DWARF](https://en.wikipedia.org/wiki/DWARF) for query profiling.
!!! warning "Warning"
These functions are slow and may impose security considerations.
For proper operation of introspection functions:
- Install the `clickhouse-common-static-dbg` package.
- Set the [allow_introspection_functions](../../operations/settings/settings.md#settings-allow_introspection_functions) setting to 1.
For security reasons introspection functions are disabled by default.
ClickHouse saves profiler reports to the [trace_log](../../operations/system_tables.md#system_tables-trace_log) system table. Make sure the table and profiler are configured properly.
## addressToLine {#addresstoline}
Converts virtual memory address inside ClickHouse server process to the filename and the line number in ClickHouse source code.
If you use official ClickHouse packages, you need to install the `clickhouse-common-static-dbg` package.
**Syntax**
```sql
addressToLine(address_of_binary_instruction)
```
**Parameters**
- `address_of_binary_instruction` ([UInt64](../../data_types/int_uint.md)) — Address of instruction in a running process.
**Returned value**
- Source code filename and the line number in this file delimited by colon.
For example, `/build/obj-x86_64-linux-gnu/../dbms/src/Common/ThreadPool.cpp:199`, where `199` is a line number.
- Name of a binary, if the function couldn't find the debug information.
- Empty string, if the address is not valid.
Type: [String](../../data_types/string.md).
**Example**
Enabling introspection functions:
```sql
SET allow_introspection_functions=1
```
Selecting the first string from the `trace_log` system table:
```sql
SELECT * FROM system.trace_log LIMIT 1 \G
```
```text
Row 1:
──────
event_date: 2019-11-19
event_time: 2019-11-19 18:57:23
revision: 54429
timer_type: Real
thread_number: 48
query_id: 421b6855-1858-45a5-8f37-f383409d6d72
trace: [140658411141617,94784174532828,94784076370703,94784076372094,94784076361020,94784175007680,140658411116251,140658403895439]
```
The `trace` field contains the stack trace at the moment of sampling.
Getting the source code filename and the line number for a single address:
```sql
SELECT addressToLine(94784076370703) \G
```
```text
Row 1:
──────
addressToLine(94784076370703): /build/obj-x86_64-linux-gnu/../dbms/src/Common/ThreadPool.cpp:199
```
Applying the function to the whole stack trace:
```sql
SELECT
arrayStringConcat(arrayMap(x -> addressToLine(x), trace), '\n') AS trace_source_code_lines
FROM system.trace_log
LIMIT 1
\G
```
The [arrayMap](higher_order_functions.md#higher_order_functions-array-map) function allows to process each individual element of the `trace` array by the `addressToLine` function. The result of this processing you see in the `trace_source_code_lines` column of output.
```text
Row 1:
──────
trace_source_code_lines: /lib/x86_64-linux-gnu/libpthread-2.27.so
/usr/lib/debug/usr/bin/clickhouse
/build/obj-x86_64-linux-gnu/../dbms/src/Common/ThreadPool.cpp:199
/build/obj-x86_64-linux-gnu/../dbms/src/Common/ThreadPool.h:155
/usr/include/c++/9/bits/atomic_base.h:551
/usr/lib/debug/usr/bin/clickhouse
/lib/x86_64-linux-gnu/libpthread-2.27.so
/build/glibc-OTsEL5/glibc-2.27/misc/../sysdeps/unix/sysv/linux/x86_64/clone.S:97
```
## addressToSymbol {#addresstosymbol}
Converts virtual memory address inside ClickHouse server process to the symbol from ClickHouse object files.
**Syntax**
```sql
addressToSymbol(address_of_binary_instruction)
```
**Parameters**
- `address_of_binary_instruction` ([UInt64](../../data_types/int_uint.md)) — Address of instruction in a running process.
**Returned value**
- Symbol from ClickHouse object files.
- Empty string, if the address is not valid.
Type: [String](../../data_types/string.md).
**Example**
Enabling introspection functions:
```sql
SET allow_introspection_functions=1
```
Selecting the first string from the `trace_log` system table:
```sql
SELECT * FROM system.trace_log LIMIT 1 \G
```
```text
Row 1:
──────
event_date: 2019-11-20
event_time: 2019-11-20 16:57:59
revision: 54429
timer_type: Real
thread_number: 48
query_id: 724028bf-f550-45aa-910d-2af6212b94ac
trace: [94138803686098,94138815010911,94138815096522,94138815101224,94138815102091,94138814222988,94138806823642,94138814457211,94138806823642,94138814457211,94138806823642,94138806795179,94138806796144,94138753770094,94138753771646,94138753760572,94138852407232,140399185266395,140399178045583]
```
The `trace` field contains the stack trace at the moment of sampling.
Getting a symbol for a single address:
```sql
SELECT addressToSymbol(94138803686098) \G
```
```text
Row 1:
──────
addressToSymbol(94138803686098): _ZNK2DB24IAggregateFunctionHelperINS_20AggregateFunctionSumImmNS_24AggregateFunctionSumDataImEEEEE19addBatchSinglePlaceEmPcPPKNS_7IColumnEPNS_5ArenaE
```
Applying the function to the whole stack trace:
```sql
SELECT
arrayStringConcat(arrayMap(x -> addressToSymbol(x), trace), '\n') AS trace_symbols
FROM system.trace_log
LIMIT 1
\G
```
The [arrayMap](higher_order_functions.md#higher_order_functions-array-map) function allows to process each individual element of the `trace` array by the `addressToSymbols` function. The result of this processing you see in the `trace_symbols` column of output.
```text
Row 1:
──────
trace_symbols: _ZNK2DB24IAggregateFunctionHelperINS_20AggregateFunctionSumImmNS_24AggregateFunctionSumDataImEEEEE19addBatchSinglePlaceEmPcPPKNS_7IColumnEPNS_5ArenaE
_ZNK2DB10Aggregator21executeWithoutKeyImplERPcmPNS0_28AggregateFunctionInstructionEPNS_5ArenaE
_ZN2DB10Aggregator14executeOnBlockESt6vectorIN3COWINS_7IColumnEE13immutable_ptrIS3_EESaIS6_EEmRNS_22AggregatedDataVariantsERS1_IPKS3_SaISC_EERS1_ISE_SaISE_EERb
_ZN2DB10Aggregator14executeOnBlockERKNS_5BlockERNS_22AggregatedDataVariantsERSt6vectorIPKNS_7IColumnESaIS9_EERS6_ISB_SaISB_EERb
_ZN2DB10Aggregator7executeERKSt10shared_ptrINS_17IBlockInputStreamEERNS_22AggregatedDataVariantsE
_ZN2DB27AggregatingBlockInputStream8readImplEv
_ZN2DB17IBlockInputStream4readEv
_ZN2DB26ExpressionBlockInputStream8readImplEv
_ZN2DB17IBlockInputStream4readEv
_ZN2DB26ExpressionBlockInputStream8readImplEv
_ZN2DB17IBlockInputStream4readEv
_ZN2DB28AsynchronousBlockInputStream9calculateEv
_ZNSt17_Function_handlerIFvvEZN2DB28AsynchronousBlockInputStream4nextEvEUlvE_E9_M_invokeERKSt9_Any_data
_ZN14ThreadPoolImplI20ThreadFromGlobalPoolE6workerESt14_List_iteratorIS0_E
_ZZN20ThreadFromGlobalPoolC4IZN14ThreadPoolImplIS_E12scheduleImplIvEET_St8functionIFvvEEiSt8optionalImEEUlvE1_JEEEOS4_DpOT0_ENKUlvE_clEv
_ZN14ThreadPoolImplISt6threadE6workerESt14_List_iteratorIS0_E
execute_native_thread_routine
start_thread
clone
```
## demangle {#demangle}
Converts a symbol that you can get using the [addressToSymbol](#addresstosymbol) function to the C++ function name.
**Syntax**
```sql
demangle(symbol)
```
**Parameters**
- `symbol` ([String](../../data_types/string.md)) — Symbol from an object file.
**Returned value**
- Name of the C++ function.
- Empty string if a symbol is not valid.
Type: [String](../../data_types/string.md).
**Example**
Enabling introspection functions:
```sql
SET allow_introspection_functions=1
```
Selecting the first string from the `trace_log` system table:
```sql
SELECT * FROM system.trace_log LIMIT 1 \G
```
```text
Row 1:
──────
event_date: 2019-11-20
event_time: 2019-11-20 16:57:59
revision: 54429
timer_type: Real
thread_number: 48
query_id: 724028bf-f550-45aa-910d-2af6212b94ac
trace: [94138803686098,94138815010911,94138815096522,94138815101224,94138815102091,94138814222988,94138806823642,94138814457211,94138806823642,94138814457211,94138806823642,94138806795179,94138806796144,94138753770094,94138753771646,94138753760572,94138852407232,140399185266395,140399178045583]
```
The `trace` field contains the stack trace at the moment of sampling.
Getting a function name for a single address:
```sql
SELECT demangle(addressToSymbol(94138803686098)) \G
```
```text
Row 1:
──────
demangle(addressToSymbol(94138803686098)): DB::IAggregateFunctionHelper<DB::AggregateFunctionSum<unsigned long, unsigned long, DB::AggregateFunctionSumData<unsigned long> > >::addBatchSinglePlace(unsigned long, char*, DB::IColumn const**, DB::Arena*) const
```
Applying the function to the whole stack trace:
```sql
SELECT
arrayStringConcat(arrayMap(x -> demangle(addressToSymbol(x)), trace), '\n') AS trace_functions
FROM system.trace_log
LIMIT 1
\G
```
The [arrayMap](higher_order_functions.md#higher_order_functions-array-map) function allows to process each individual element of the `trace` array by the `demangle` function. The result of this processing you see in the `trace_functions` column of output.
```text
Row 1:
──────
trace_functions: DB::IAggregateFunctionHelper<DB::AggregateFunctionSum<unsigned long, unsigned long, DB::AggregateFunctionSumData<unsigned long> > >::addBatchSinglePlace(unsigned long, char*, DB::IColumn const**, DB::Arena*) const
DB::Aggregator::executeWithoutKeyImpl(char*&, unsigned long, DB::Aggregator::AggregateFunctionInstruction*, DB::Arena*) const
DB::Aggregator::executeOnBlock(std::vector<COW<DB::IColumn>::immutable_ptr<DB::IColumn>, std::allocator<COW<DB::IColumn>::immutable_ptr<DB::IColumn> > >, unsigned long, DB::AggregatedDataVariants&, std::vector<DB::IColumn const*, std::allocator<DB::IColumn const*> >&, std::vector<std::vector<DB::IColumn const*, std::allocator<DB::IColumn const*> >, std::allocator<std::vector<DB::IColumn const*, std::allocator<DB::IColumn const*> > > >&, bool&)
DB::Aggregator::executeOnBlock(DB::Block const&, DB::AggregatedDataVariants&, std::vector<DB::IColumn const*, std::allocator<DB::IColumn const*> >&, std::vector<std::vector<DB::IColumn const*, std::allocator<DB::IColumn const*> >, std::allocator<std::vector<DB::IColumn const*, std::allocator<DB::IColumn const*> > > >&, bool&)
DB::Aggregator::execute(std::shared_ptr<DB::IBlockInputStream> const&, DB::AggregatedDataVariants&)
DB::AggregatingBlockInputStream::readImpl()
DB::IBlockInputStream::read()
DB::ExpressionBlockInputStream::readImpl()
DB::IBlockInputStream::read()
DB::ExpressionBlockInputStream::readImpl()
DB::IBlockInputStream::read()
DB::AsynchronousBlockInputStream::calculate()
std::_Function_handler<void (), DB::AsynchronousBlockInputStream::next()::{lambda()#1}>::_M_invoke(std::_Any_data const&)
ThreadPoolImpl<ThreadFromGlobalPool>::worker(std::_List_iterator<ThreadFromGlobalPool>)
ThreadFromGlobalPool::ThreadFromGlobalPool<ThreadPoolImpl<ThreadFromGlobalPool>::scheduleImpl<void>(std::function<void ()>, int, std::optional<unsigned long>)::{lambda()#3}>(ThreadPoolImpl<ThreadFromGlobalPool>::scheduleImpl<void>(std::function<void ()>, int, std::optional<unsigned long>)::{lambda()#3}&&)::{lambda()#1}::operator()() const
ThreadPoolImpl<std::thread>::worker(std::_List_iterator<std::thread>)
execute_native_thread_routine
start_thread
clone
```

View File

@ -119,6 +119,7 @@ nav:
- 'Working with geographical coordinates': 'query_language/functions/geo.md'
- 'Working with Nullable arguments': 'query_language/functions/functions_for_nulls.md'
- 'Machine Learning Functions': 'query_language/functions/machine_learning_functions.md'
- 'Introspection': 'query_language/functions/introspection.md'
- 'Other': 'query_language/functions/other_functions.md'
- 'Aggregate Functions':
- 'Introduction': 'query_language/agg_functions/index.md'