mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-10-21 07:50:49 +00:00
Merge branch 'master' of github.com:ClickHouse/ClickHouse
This commit is contained in:
commit
b7794a1d36
@ -14,5 +14,4 @@ ClickHouse is an open-source column-oriented database management system that all
|
|||||||
|
|
||||||
## Upcoming Events
|
## Upcoming Events
|
||||||
|
|
||||||
* [ClickHouse Meetup in San Francisco](https://www.eventbrite.com/e/clickhouse-december-meetup-registration-78642047481) on December 3.
|
|
||||||
* [ClickHouse Meetup in Moscow](https://yandex.ru/promo/clickhouse/moscow-december-2019) on December 11.
|
* [ClickHouse Meetup in Moscow](https://yandex.ru/promo/clickhouse/moscow-december-2019) on December 11.
|
||||||
|
@ -243,6 +243,8 @@ int Server::main(const std::vector<std::string> & /*args*/)
|
|||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
global_context->setRemoteHostFilter(config());
|
||||||
|
|
||||||
std::string path = getCanonicalPath(config().getString("path", DBMS_DEFAULT_PATH));
|
std::string path = getCanonicalPath(config().getString("path", DBMS_DEFAULT_PATH));
|
||||||
std::string default_database = config().getString("default_database", "default");
|
std::string default_database = config().getString("default_database", "default");
|
||||||
|
|
||||||
|
@ -3,6 +3,25 @@
|
|||||||
NOTE: User and query level settings are set up in "users.xml" file.
|
NOTE: User and query level settings are set up in "users.xml" file.
|
||||||
-->
|
-->
|
||||||
<yandex>
|
<yandex>
|
||||||
|
<!-- The list of hosts allowed to use in URL-related storage engines and table functions.
|
||||||
|
If this section is not present in configuration, all hosts are allowed.
|
||||||
|
-->
|
||||||
|
<remote_url_allow_hosts>
|
||||||
|
<!-- Host should be specified exactly as in URL. The name is checked before DNS resolution.
|
||||||
|
Example: "yandex.ru", "yandex.ru." and "www.yandex.ru" are different hosts.
|
||||||
|
If port is explicitly specified in URL, the host:port is checked as a whole.
|
||||||
|
If host specified here without port, any port with this host allowed.
|
||||||
|
"yandex.ru" -> "yandex.ru:443", "yandex.ru:80" etc. is allowed, but "yandex.ru:80" -> only "yandex.ru:80" is allowed.
|
||||||
|
If the host is specified as IP address, it is checked as specified in URL. Example: "[2a02:6b8:a::a]".
|
||||||
|
If there are redirects and support for redirects is enabled, every redirect (the Location field) is checked.
|
||||||
|
-->
|
||||||
|
|
||||||
|
<!-- Regular expression can be specified. RE2 engine is used for regexps.
|
||||||
|
Regexps are not aligned: don't forget to add ^ and $. Also don't forget to escape dot (.) metacharacter
|
||||||
|
(forgetting to do so is a common source of error).
|
||||||
|
-->
|
||||||
|
</remote_url_allow_hosts>
|
||||||
|
|
||||||
<logger>
|
<logger>
|
||||||
<!-- Possible levels: https://github.com/pocoproject/poco/blob/develop/Foundation/include/Poco/Logger.h#L105 -->
|
<!-- Possible levels: https://github.com/pocoproject/poco/blob/develop/Foundation/include/Poco/Logger.h#L105 -->
|
||||||
<level>trace</level>
|
<level>trace</level>
|
||||||
@ -15,7 +34,6 @@
|
|||||||
<!--display_name>production</display_name--> <!-- It is the name that will be shown in the client -->
|
<!--display_name>production</display_name--> <!-- It is the name that will be shown in the client -->
|
||||||
<http_port>8123</http_port>
|
<http_port>8123</http_port>
|
||||||
<tcp_port>9000</tcp_port>
|
<tcp_port>9000</tcp_port>
|
||||||
|
|
||||||
<!-- For HTTPS and SSL over native protocol. -->
|
<!-- For HTTPS and SSL over native protocol. -->
|
||||||
<!--
|
<!--
|
||||||
<https_port>8443</https_port>
|
<https_port>8443</https_port>
|
||||||
|
@ -465,6 +465,7 @@ namespace ErrorCodes
|
|||||||
extern const int UNKNOWN_DICTIONARY = 488;
|
extern const int UNKNOWN_DICTIONARY = 488;
|
||||||
extern const int INCORRECT_DICTIONARY_DEFINITION = 489;
|
extern const int INCORRECT_DICTIONARY_DEFINITION = 489;
|
||||||
extern const int CANNOT_FORMAT_DATETIME = 490;
|
extern const int CANNOT_FORMAT_DATETIME = 490;
|
||||||
|
extern const int UNACCEPTABLE_URL = 491;
|
||||||
|
|
||||||
extern const int KEEPER_EXCEPTION = 999;
|
extern const int KEEPER_EXCEPTION = 999;
|
||||||
extern const int POCO_EXCEPTION = 1000;
|
extern const int POCO_EXCEPTION = 1000;
|
||||||
|
62
dbms/src/Common/RemoteHostFilter.cpp
Normal file
62
dbms/src/Common/RemoteHostFilter.cpp
Normal file
@ -0,0 +1,62 @@
|
|||||||
|
#include <re2/re2.h>
|
||||||
|
#include <Common/RemoteHostFilter.h>
|
||||||
|
#include <Poco/URI.h>
|
||||||
|
#include <Formats/FormatFactory.h>
|
||||||
|
#include <Poco/Util/AbstractConfiguration.h>
|
||||||
|
#include <Common/StringUtils/StringUtils.h>
|
||||||
|
#include <Common/Exception.h>
|
||||||
|
#include <IO/WriteHelpers.h>
|
||||||
|
|
||||||
|
namespace DB
|
||||||
|
{
|
||||||
|
namespace ErrorCodes
|
||||||
|
{
|
||||||
|
extern const int UNACCEPTABLE_URL;
|
||||||
|
}
|
||||||
|
|
||||||
|
void RemoteHostFilter::checkURL(const Poco::URI & uri) const
|
||||||
|
{
|
||||||
|
if (!checkForDirectEntry(uri.getHost()) &&
|
||||||
|
!checkForDirectEntry(uri.getHost() + ":" + toString(uri.getPort())))
|
||||||
|
throw Exception("URL \"" + uri.toString() + "\" is not allowed in config.xml", ErrorCodes::UNACCEPTABLE_URL);
|
||||||
|
}
|
||||||
|
|
||||||
|
void RemoteHostFilter::checkHostAndPort(const std::string & host, const std::string & port) const
|
||||||
|
{
|
||||||
|
if (!checkForDirectEntry(host) &&
|
||||||
|
!checkForDirectEntry(host + ":" + port))
|
||||||
|
throw Exception("URL \"" + host + ":" + port + "\" is not allowed in config.xml", ErrorCodes::UNACCEPTABLE_URL);
|
||||||
|
}
|
||||||
|
|
||||||
|
void RemoteHostFilter::setValuesFromConfig(const Poco::Util::AbstractConfiguration & config)
|
||||||
|
{
|
||||||
|
if (config.has("remote_url_allow_hosts"))
|
||||||
|
{
|
||||||
|
std::vector<std::string> keys;
|
||||||
|
config.keys("remote_url_allow_hosts", keys);
|
||||||
|
for (auto key : keys)
|
||||||
|
{
|
||||||
|
if (startsWith(key, "host_regexp"))
|
||||||
|
regexp_hosts.push_back(config.getString("remote_url_allow_hosts." + key));
|
||||||
|
else if (startsWith(key, "host"))
|
||||||
|
primary_hosts.insert(config.getString("remote_url_allow_hosts." + key));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
bool RemoteHostFilter::checkForDirectEntry(const std::string & str) const
|
||||||
|
{
|
||||||
|
if (!primary_hosts.empty() || !regexp_hosts.empty())
|
||||||
|
{
|
||||||
|
if (primary_hosts.find(str) == primary_hosts.end())
|
||||||
|
{
|
||||||
|
for (size_t i = 0; i < regexp_hosts.size(); ++i)
|
||||||
|
if (re2::RE2::FullMatch(str, regexp_hosts[i]))
|
||||||
|
return true;
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
}
|
30
dbms/src/Common/RemoteHostFilter.h
Normal file
30
dbms/src/Common/RemoteHostFilter.h
Normal file
@ -0,0 +1,30 @@
|
|||||||
|
#pragma once
|
||||||
|
|
||||||
|
#include <vector>
|
||||||
|
#include <unordered_set>
|
||||||
|
#include <Poco/URI.h>
|
||||||
|
#include <Poco/Util/AbstractConfiguration.h>
|
||||||
|
|
||||||
|
|
||||||
|
namespace DB
|
||||||
|
{
|
||||||
|
class RemoteHostFilter
|
||||||
|
{
|
||||||
|
/**
|
||||||
|
* This class checks if url is allowed.
|
||||||
|
* If primary_hosts and regexp_hosts are empty all urls are allowed.
|
||||||
|
*/
|
||||||
|
public:
|
||||||
|
void checkURL(const Poco::URI & uri) const; /// If URL not allowed in config.xml throw UNACCEPTABLE_URL Exception
|
||||||
|
|
||||||
|
void setValuesFromConfig(const Poco::Util::AbstractConfiguration & config);
|
||||||
|
|
||||||
|
void checkHostAndPort(const std::string & host, const std::string & port) const; /// Does the same as checkURL, but for host and port.
|
||||||
|
|
||||||
|
private:
|
||||||
|
std::unordered_set<std::string> primary_hosts; /// Allowed primary (<host>) URL from config.xml
|
||||||
|
std::vector<std::string> regexp_hosts; /// Allowed regexp (<hots_regexp>) URL from config.xml
|
||||||
|
|
||||||
|
bool checkForDirectEntry(const std::string & str) const; /// Checks if the primary_hosts and regexp_hosts contain str. If primary_hosts and regexp_hosts are empty return true.
|
||||||
|
};
|
||||||
|
}
|
@ -62,7 +62,7 @@ void SettingNumber<Type>::set(const Field & x)
|
|||||||
template <typename Type>
|
template <typename Type>
|
||||||
void SettingNumber<Type>::set(const String & x)
|
void SettingNumber<Type>::set(const String & x)
|
||||||
{
|
{
|
||||||
set(parse<Type>(x));
|
set(completeParse<Type>(x));
|
||||||
}
|
}
|
||||||
|
|
||||||
template <>
|
template <>
|
||||||
|
@ -14,10 +14,12 @@ const int DEFAULT_S3_MAX_FOLLOW_GET_REDIRECT = 2;
|
|||||||
ReadBufferFromS3::ReadBufferFromS3(const Poco::URI & uri_,
|
ReadBufferFromS3::ReadBufferFromS3(const Poco::URI & uri_,
|
||||||
const String & access_key_id_,
|
const String & access_key_id_,
|
||||||
const String & secret_access_key_,
|
const String & secret_access_key_,
|
||||||
const ConnectionTimeouts & timeouts)
|
const ConnectionTimeouts & timeouts,
|
||||||
|
const RemoteHostFilter & remote_host_filter_)
|
||||||
: ReadBuffer(nullptr, 0)
|
: ReadBuffer(nullptr, 0)
|
||||||
, uri {uri_}
|
, uri {uri_}
|
||||||
, session {makeHTTPSession(uri_, timeouts)}
|
, session {makeHTTPSession(uri_, timeouts)}
|
||||||
|
, remote_host_filter {remote_host_filter_}
|
||||||
{
|
{
|
||||||
Poco::Net::HTTPResponse response;
|
Poco::Net::HTTPResponse response;
|
||||||
std::unique_ptr<Poco::Net::HTTPRequest> request;
|
std::unique_ptr<Poco::Net::HTTPRequest> request;
|
||||||
@ -52,6 +54,7 @@ ReadBufferFromS3::ReadBufferFromS3(const Poco::URI & uri_,
|
|||||||
break;
|
break;
|
||||||
|
|
||||||
uri = location_iterator->second;
|
uri = location_iterator->second;
|
||||||
|
remote_host_filter.checkURL(uri);
|
||||||
session = makeHTTPSession(uri, timeouts);
|
session = makeHTTPSession(uri, timeouts);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -21,11 +21,14 @@ protected:
|
|||||||
std::istream * istr; /// owned by session
|
std::istream * istr; /// owned by session
|
||||||
std::unique_ptr<ReadBuffer> impl;
|
std::unique_ptr<ReadBuffer> impl;
|
||||||
|
|
||||||
|
RemoteHostFilter remote_host_filter;
|
||||||
|
|
||||||
public:
|
public:
|
||||||
explicit ReadBufferFromS3(const Poco::URI & uri_,
|
explicit ReadBufferFromS3(const Poco::URI & uri_,
|
||||||
const String & access_key_id_,
|
const String & access_key_id_,
|
||||||
const String & secret_access_key_,
|
const String & secret_access_key_,
|
||||||
const ConnectionTimeouts & timeouts = {});
|
const ConnectionTimeouts & timeouts = {},
|
||||||
|
const RemoteHostFilter & remote_host_filter_ = {});
|
||||||
|
|
||||||
bool nextImpl() override;
|
bool nextImpl() override;
|
||||||
};
|
};
|
||||||
|
@ -877,6 +877,30 @@ inline T parse(const char * data, size_t size)
|
|||||||
return res;
|
return res;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Read something from text format, but expect complete parse of given text
|
||||||
|
/// For example: 723145 -- ok, 213MB -- not ok
|
||||||
|
template <typename T>
|
||||||
|
inline T completeParse(const char * data, size_t size)
|
||||||
|
{
|
||||||
|
T res;
|
||||||
|
ReadBufferFromMemory buf(data, size);
|
||||||
|
readText(res, buf);
|
||||||
|
assertEOF(buf);
|
||||||
|
return res;
|
||||||
|
}
|
||||||
|
|
||||||
|
template <typename T>
|
||||||
|
inline T completeParse(const String & s)
|
||||||
|
{
|
||||||
|
return completeParse<T>(s.data(), s.size());
|
||||||
|
}
|
||||||
|
|
||||||
|
template <typename T>
|
||||||
|
inline T completeParse(const char * data)
|
||||||
|
{
|
||||||
|
return completeParse<T>(data, strlen(data));
|
||||||
|
}
|
||||||
|
|
||||||
template <typename T>
|
template <typename T>
|
||||||
inline T parse(const char * data)
|
inline T parse(const char * data)
|
||||||
{
|
{
|
||||||
@ -916,12 +940,12 @@ void skipToUnescapedNextLineOrEOF(ReadBuffer & buf);
|
|||||||
template <class TReadBuffer, class... Types>
|
template <class TReadBuffer, class... Types>
|
||||||
std::unique_ptr<ReadBuffer> getReadBuffer(const DB::CompressionMethod method, Types&&... args)
|
std::unique_ptr<ReadBuffer> getReadBuffer(const DB::CompressionMethod method, Types&&... args)
|
||||||
{
|
{
|
||||||
if (method == DB::CompressionMethod::Gzip)
|
if (method == DB::CompressionMethod::Gzip)
|
||||||
{
|
{
|
||||||
auto read_buf = std::make_unique<TReadBuffer>(std::forward<Types>(args)...);
|
auto read_buf = std::make_unique<TReadBuffer>(std::forward<Types>(args)...);
|
||||||
return std::make_unique<ZlibInflatingReadBuffer>(std::move(read_buf), method);
|
return std::make_unique<ZlibInflatingReadBuffer>(std::move(read_buf), method);
|
||||||
}
|
}
|
||||||
return std::make_unique<TReadBuffer>(args...);
|
return std::make_unique<TReadBuffer>(args...);
|
||||||
}
|
}
|
||||||
|
|
||||||
/** This function just copies the data from buffer's internal position (in.position())
|
/** This function just copies the data from buffer's internal position (in.position())
|
||||||
|
@ -101,6 +101,7 @@ namespace detail
|
|||||||
const Poco::Net::HTTPBasicCredentials & credentials;
|
const Poco::Net::HTTPBasicCredentials & credentials;
|
||||||
std::vector<Poco::Net::HTTPCookie> cookies;
|
std::vector<Poco::Net::HTTPCookie> cookies;
|
||||||
HTTPHeaderEntries http_header_entries;
|
HTTPHeaderEntries http_header_entries;
|
||||||
|
RemoteHostFilter remote_host_filter;
|
||||||
|
|
||||||
std::istream * call(const Poco::URI uri_, Poco::Net::HTTPResponse & response)
|
std::istream * call(const Poco::URI uri_, Poco::Net::HTTPResponse & response)
|
||||||
{
|
{
|
||||||
@ -157,7 +158,8 @@ namespace detail
|
|||||||
OutStreamCallback out_stream_callback_ = {},
|
OutStreamCallback out_stream_callback_ = {},
|
||||||
const Poco::Net::HTTPBasicCredentials & credentials_ = {},
|
const Poco::Net::HTTPBasicCredentials & credentials_ = {},
|
||||||
size_t buffer_size_ = DBMS_DEFAULT_BUFFER_SIZE,
|
size_t buffer_size_ = DBMS_DEFAULT_BUFFER_SIZE,
|
||||||
HTTPHeaderEntries http_header_entries_ = {})
|
HTTPHeaderEntries http_header_entries_ = {},
|
||||||
|
const RemoteHostFilter & remote_host_filter_ = {})
|
||||||
: ReadBuffer(nullptr, 0)
|
: ReadBuffer(nullptr, 0)
|
||||||
, uri {uri_}
|
, uri {uri_}
|
||||||
, method {!method_.empty() ? method_ : out_stream_callback_ ? Poco::Net::HTTPRequest::HTTP_POST : Poco::Net::HTTPRequest::HTTP_GET}
|
, method {!method_.empty() ? method_ : out_stream_callback_ ? Poco::Net::HTTPRequest::HTTP_POST : Poco::Net::HTTPRequest::HTTP_GET}
|
||||||
@ -165,6 +167,7 @@ namespace detail
|
|||||||
, out_stream_callback {out_stream_callback_}
|
, out_stream_callback {out_stream_callback_}
|
||||||
, credentials {credentials_}
|
, credentials {credentials_}
|
||||||
, http_header_entries {http_header_entries_}
|
, http_header_entries {http_header_entries_}
|
||||||
|
, remote_host_filter {remote_host_filter_}
|
||||||
{
|
{
|
||||||
Poco::Net::HTTPResponse response;
|
Poco::Net::HTTPResponse response;
|
||||||
|
|
||||||
@ -173,6 +176,7 @@ namespace detail
|
|||||||
while (isRedirect(response.getStatus()))
|
while (isRedirect(response.getStatus()))
|
||||||
{
|
{
|
||||||
Poco::URI uri_redirect(response.get("Location"));
|
Poco::URI uri_redirect(response.get("Location"));
|
||||||
|
remote_host_filter.checkURL(uri_redirect);
|
||||||
|
|
||||||
session->updateSession(uri_redirect);
|
session->updateSession(uri_redirect);
|
||||||
|
|
||||||
@ -243,8 +247,9 @@ public:
|
|||||||
const DB::SettingUInt64 max_redirects = 0,
|
const DB::SettingUInt64 max_redirects = 0,
|
||||||
const Poco::Net::HTTPBasicCredentials & credentials_ = {},
|
const Poco::Net::HTTPBasicCredentials & credentials_ = {},
|
||||||
size_t buffer_size_ = DBMS_DEFAULT_BUFFER_SIZE,
|
size_t buffer_size_ = DBMS_DEFAULT_BUFFER_SIZE,
|
||||||
const HTTPHeaderEntries & http_header_entries_ = {})
|
const HTTPHeaderEntries & http_header_entries_ = {},
|
||||||
: Parent(std::make_shared<UpdatableSession>(uri_, timeouts, max_redirects), uri_, method_, out_stream_callback_, credentials_, buffer_size_, http_header_entries_)
|
const RemoteHostFilter & remote_host_filter_ = {})
|
||||||
|
: Parent(std::make_shared<UpdatableSession>(uri_, timeouts, max_redirects), uri_, method_, out_stream_callback_, credentials_, buffer_size_, http_header_entries_, remote_host_filter_)
|
||||||
{
|
{
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
@ -34,7 +34,8 @@ WriteBufferFromS3::WriteBufferFromS3(
|
|||||||
const String & access_key_id_,
|
const String & access_key_id_,
|
||||||
const String & secret_access_key_,
|
const String & secret_access_key_,
|
||||||
size_t minimum_upload_part_size_,
|
size_t minimum_upload_part_size_,
|
||||||
const ConnectionTimeouts & timeouts_)
|
const ConnectionTimeouts & timeouts_,
|
||||||
|
const RemoteHostFilter & remote_host_filter_)
|
||||||
: BufferWithOwnMemory<WriteBuffer>(DBMS_DEFAULT_BUFFER_SIZE, nullptr, 0)
|
: BufferWithOwnMemory<WriteBuffer>(DBMS_DEFAULT_BUFFER_SIZE, nullptr, 0)
|
||||||
, uri {uri_}
|
, uri {uri_}
|
||||||
, access_key_id {access_key_id_}
|
, access_key_id {access_key_id_}
|
||||||
@ -43,6 +44,7 @@ WriteBufferFromS3::WriteBufferFromS3(
|
|||||||
, timeouts {timeouts_}
|
, timeouts {timeouts_}
|
||||||
, temporary_buffer {std::make_unique<WriteBufferFromString>(buffer_string)}
|
, temporary_buffer {std::make_unique<WriteBufferFromString>(buffer_string)}
|
||||||
, last_part_size {0}
|
, last_part_size {0}
|
||||||
|
, remote_host_filter(remote_host_filter_)
|
||||||
{
|
{
|
||||||
initiate();
|
initiate();
|
||||||
|
|
||||||
@ -134,6 +136,7 @@ void WriteBufferFromS3::initiate()
|
|||||||
break;
|
break;
|
||||||
|
|
||||||
initiate_uri = location_iterator->second;
|
initiate_uri = location_iterator->second;
|
||||||
|
remote_host_filter.checkURL(initiate_uri);
|
||||||
}
|
}
|
||||||
assertResponseIsOk(*request_ptr, response, *istr);
|
assertResponseIsOk(*request_ptr, response, *istr);
|
||||||
|
|
||||||
|
@ -28,6 +28,7 @@ private:
|
|||||||
String buffer_string;
|
String buffer_string;
|
||||||
std::unique_ptr<WriteBufferFromString> temporary_buffer;
|
std::unique_ptr<WriteBufferFromString> temporary_buffer;
|
||||||
size_t last_part_size;
|
size_t last_part_size;
|
||||||
|
RemoteHostFilter remote_host_filter;
|
||||||
|
|
||||||
/// Upload in S3 is made in parts.
|
/// Upload in S3 is made in parts.
|
||||||
/// We initiate upload, then upload each part and get ETag as a response, and then finish upload with listing all our parts.
|
/// We initiate upload, then upload each part and get ETag as a response, and then finish upload with listing all our parts.
|
||||||
@ -39,7 +40,8 @@ public:
|
|||||||
const String & access_key_id,
|
const String & access_key_id,
|
||||||
const String & secret_access_key,
|
const String & secret_access_key,
|
||||||
size_t minimum_upload_part_size_,
|
size_t minimum_upload_part_size_,
|
||||||
const ConnectionTimeouts & timeouts = {});
|
const ConnectionTimeouts & timeouts = {},
|
||||||
|
const RemoteHostFilter & remote_host_filter_ = {});
|
||||||
|
|
||||||
void nextImpl() override;
|
void nextImpl() override;
|
||||||
|
|
||||||
|
@ -53,7 +53,7 @@
|
|||||||
#include <Common/ShellCommand.h>
|
#include <Common/ShellCommand.h>
|
||||||
#include <Common/TraceCollector.h>
|
#include <Common/TraceCollector.h>
|
||||||
#include <common/logger_useful.h>
|
#include <common/logger_useful.h>
|
||||||
|
#include <Common/RemoteHostFilter.h>
|
||||||
|
|
||||||
namespace ProfileEvents
|
namespace ProfileEvents
|
||||||
{
|
{
|
||||||
@ -158,8 +158,9 @@ struct ContextShared
|
|||||||
ActionLocksManagerPtr action_locks_manager; /// Set of storages' action lockers
|
ActionLocksManagerPtr action_locks_manager; /// Set of storages' action lockers
|
||||||
std::optional<SystemLogs> system_logs; /// Used to log queries and operations on parts
|
std::optional<SystemLogs> system_logs; /// Used to log queries and operations on parts
|
||||||
|
|
||||||
std::unique_ptr<TraceCollector> trace_collector; /// Thread collecting traces from threads executing queries
|
RemoteHostFilter remote_host_filter; /// Allowed URL from config.xml
|
||||||
|
|
||||||
|
std::unique_ptr<TraceCollector> trace_collector; /// Thread collecting traces from threads executing queries
|
||||||
/// Named sessions. The user could specify session identifier to reuse settings and temporary tables in subsequent requests.
|
/// Named sessions. The user could specify session identifier to reuse settings and temporary tables in subsequent requests.
|
||||||
|
|
||||||
class SessionKeyHash
|
class SessionKeyHash
|
||||||
@ -1583,6 +1584,16 @@ String Context::getInterserverScheme() const
|
|||||||
return shared->interserver_scheme;
|
return shared->interserver_scheme;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void Context::setRemoteHostFilter(const Poco::Util::AbstractConfiguration & config)
|
||||||
|
{
|
||||||
|
shared->remote_host_filter.setValuesFromConfig(config);
|
||||||
|
}
|
||||||
|
|
||||||
|
const RemoteHostFilter & Context::getRemoteHostFilter() const
|
||||||
|
{
|
||||||
|
return shared->remote_host_filter;
|
||||||
|
}
|
||||||
|
|
||||||
UInt16 Context::getTCPPort() const
|
UInt16 Context::getTCPPort() const
|
||||||
{
|
{
|
||||||
auto lock = getLock();
|
auto lock = getLock();
|
||||||
|
@ -22,6 +22,7 @@
|
|||||||
#include <mutex>
|
#include <mutex>
|
||||||
#include <optional>
|
#include <optional>
|
||||||
#include <thread>
|
#include <thread>
|
||||||
|
#include <Common/RemoteHostFilter.h>
|
||||||
|
|
||||||
|
|
||||||
namespace Poco
|
namespace Poco
|
||||||
@ -77,6 +78,7 @@ using ActionLocksManagerPtr = std::shared_ptr<ActionLocksManager>;
|
|||||||
class ShellCommand;
|
class ShellCommand;
|
||||||
class ICompressionCodec;
|
class ICompressionCodec;
|
||||||
class SettingsConstraints;
|
class SettingsConstraints;
|
||||||
|
class RemoteHostFilter;
|
||||||
|
|
||||||
class IOutputFormat;
|
class IOutputFormat;
|
||||||
using OutputFormatPtr = std::shared_ptr<IOutputFormat>;
|
using OutputFormatPtr = std::shared_ptr<IOutputFormat>;
|
||||||
@ -354,6 +356,10 @@ public:
|
|||||||
void setInterserverScheme(const String & scheme);
|
void setInterserverScheme(const String & scheme);
|
||||||
String getInterserverScheme() const;
|
String getInterserverScheme() const;
|
||||||
|
|
||||||
|
/// Storage of allowed hosts from config.xml
|
||||||
|
void setRemoteHostFilter(const Poco::Util::AbstractConfiguration & config);
|
||||||
|
const RemoteHostFilter & getRemoteHostFilter() const;
|
||||||
|
|
||||||
/// The port that the server listens for executing SQL queries.
|
/// The port that the server listens for executing SQL queries.
|
||||||
UInt16 getTCPPort() const;
|
UInt16 getTCPPort() const;
|
||||||
|
|
||||||
|
@ -49,6 +49,7 @@ StorageHDFS::StorageHDFS(const String & uri_,
|
|||||||
, context(context_)
|
, context(context_)
|
||||||
, compression_method(compression_method_)
|
, compression_method(compression_method_)
|
||||||
{
|
{
|
||||||
|
context.getRemoteHostFilter().checkURL(Poco::URI(uri));
|
||||||
setColumns(columns_);
|
setColumns(columns_);
|
||||||
setConstraints(constraints_);
|
setConstraints(constraints_);
|
||||||
}
|
}
|
||||||
|
@ -43,7 +43,7 @@ namespace
|
|||||||
const CompressionMethod compression_method)
|
const CompressionMethod compression_method)
|
||||||
: name(name_)
|
: name(name_)
|
||||||
{
|
{
|
||||||
read_buf = getReadBuffer<ReadBufferFromS3>(compression_method, uri, access_key_id, secret_access_key, timeouts);
|
read_buf = getReadBuffer<ReadBufferFromS3>(compression_method, uri, access_key_id, secret_access_key, timeouts, context.getRemoteHostFilter());
|
||||||
reader = FormatFactory::instance().getInput(format, *read_buf, sample_block, context, max_block_size);
|
reader = FormatFactory::instance().getInput(format, *read_buf, sample_block, context, max_block_size);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -98,7 +98,8 @@ namespace
|
|||||||
access_key_id,
|
access_key_id,
|
||||||
secret_access_key,
|
secret_access_key,
|
||||||
min_upload_part_size,
|
min_upload_part_size,
|
||||||
timeouts);
|
timeouts,
|
||||||
|
context.getRemoteHostFilter());
|
||||||
writer = FormatFactory::instance().getOutput(format, *write_buf, sample_block, context);
|
writer = FormatFactory::instance().getOutput(format, *write_buf, sample_block, context);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -155,6 +156,7 @@ StorageS3::StorageS3(
|
|||||||
, min_upload_part_size(min_upload_part_size_)
|
, min_upload_part_size(min_upload_part_size_)
|
||||||
, compression_method(compression_method_)
|
, compression_method(compression_method_)
|
||||||
{
|
{
|
||||||
|
context_global.getRemoteHostFilter().checkURL(uri_);
|
||||||
setColumns(columns_);
|
setColumns(columns_);
|
||||||
setConstraints(constraints_);
|
setConstraints(constraints_);
|
||||||
}
|
}
|
||||||
|
@ -24,6 +24,7 @@ namespace DB
|
|||||||
namespace ErrorCodes
|
namespace ErrorCodes
|
||||||
{
|
{
|
||||||
extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
|
extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
|
||||||
|
extern const int UNACCEPTABLE_URL;
|
||||||
}
|
}
|
||||||
|
|
||||||
IStorageURLBase::IStorageURLBase(
|
IStorageURLBase::IStorageURLBase(
|
||||||
@ -37,6 +38,7 @@ IStorageURLBase::IStorageURLBase(
|
|||||||
const String & compression_method_)
|
const String & compression_method_)
|
||||||
: uri(uri_), context_global(context_), compression_method(compression_method_), format_name(format_name_), table_name(table_name_), database_name(database_name_)
|
: uri(uri_), context_global(context_), compression_method(compression_method_), format_name(format_name_), table_name(table_name_), database_name(database_name_)
|
||||||
{
|
{
|
||||||
|
context_global.getRemoteHostFilter().checkURL(uri);
|
||||||
setColumns(columns_);
|
setColumns(columns_);
|
||||||
setConstraints(constraints_);
|
setConstraints(constraints_);
|
||||||
}
|
}
|
||||||
@ -58,7 +60,18 @@ namespace
|
|||||||
const CompressionMethod compression_method)
|
const CompressionMethod compression_method)
|
||||||
: name(name_)
|
: name(name_)
|
||||||
{
|
{
|
||||||
read_buf = getReadBuffer<ReadWriteBufferFromHTTP>(compression_method, uri, method, callback, timeouts, context.getSettingsRef().max_http_get_redirects);
|
read_buf = getReadBuffer<ReadWriteBufferFromHTTP>(
|
||||||
|
compression_method,
|
||||||
|
uri,
|
||||||
|
method,
|
||||||
|
callback,
|
||||||
|
timeouts,
|
||||||
|
context.getSettingsRef().max_http_get_redirects,
|
||||||
|
Poco::Net::HTTPBasicCredentials{},
|
||||||
|
DBMS_DEFAULT_BUFFER_SIZE,
|
||||||
|
ReadWriteBufferFromHTTP::HTTPHeaderEntries{},
|
||||||
|
context.getRemoteHostFilter());
|
||||||
|
|
||||||
reader = FormatFactory::instance().getInput(format, *read_buf, sample_block, context, max_block_size);
|
reader = FormatFactory::instance().getInput(format, *read_buf, sample_block, context, max_block_size);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -155,6 +155,20 @@ StoragePtr TableFunctionRemote::executeImpl(const ASTPtr & ast_function, const C
|
|||||||
throw Exception("Shard list is empty after parsing first argument", ErrorCodes::BAD_ARGUMENTS);
|
throw Exception("Shard list is empty after parsing first argument", ErrorCodes::BAD_ARGUMENTS);
|
||||||
|
|
||||||
auto maybe_secure_port = context.getTCPPortSecure();
|
auto maybe_secure_port = context.getTCPPortSecure();
|
||||||
|
|
||||||
|
/// Check host and port on affiliation allowed hosts.
|
||||||
|
for (auto hosts : names)
|
||||||
|
{
|
||||||
|
for (auto host : hosts)
|
||||||
|
{
|
||||||
|
size_t colon = host.find(':');
|
||||||
|
if (colon == String::npos)
|
||||||
|
context.getRemoteHostFilter().checkHostAndPort(host, toString((secure ? (maybe_secure_port ? *maybe_secure_port : DBMS_DEFAULT_SECURE_PORT) : context.getTCPPort())));
|
||||||
|
else
|
||||||
|
context.getRemoteHostFilter().checkHostAndPort(host.substr(0, colon), host.substr(colon + 1));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
cluster = std::make_shared<Cluster>(context.getSettings(), names, username, password, (secure ? (maybe_secure_port ? *maybe_secure_port : DBMS_DEFAULT_SECURE_PORT) : context.getTCPPort()), false, secure);
|
cluster = std::make_shared<Cluster>(context.getSettings(), names, username, password, (secure ? (maybe_secure_port ? *maybe_secure_port : DBMS_DEFAULT_SECURE_PORT) : context.getTCPPort()), false, secure);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -0,0 +1,5 @@
|
|||||||
|
<yandex>
|
||||||
|
<remote_url_allow_hosts>
|
||||||
|
<host>hdfs1:50070</host>
|
||||||
|
</remote_url_allow_hosts>
|
||||||
|
</yandex>
|
@ -0,0 +1,10 @@
|
|||||||
|
<yandex>
|
||||||
|
<remote_url_allow_hosts>
|
||||||
|
<host>localhost:9000</host>
|
||||||
|
<host>localhost:9440</host>
|
||||||
|
<host>example01-01-1</host>
|
||||||
|
<host>example01-01-2</host>
|
||||||
|
<host>example01-02-1</host>
|
||||||
|
<host>example01-02-2</host>
|
||||||
|
</remote_url_allow_hosts>
|
||||||
|
</yandex>
|
@ -0,0 +1,7 @@
|
|||||||
|
<yandex>
|
||||||
|
<remote_url_allow_hosts>
|
||||||
|
<host>host:80</host>
|
||||||
|
<host_regexp>^[a-z]*\.ru$</host_regexp>
|
||||||
|
</remote_url_allow_hosts>
|
||||||
|
</yandex>
|
||||||
|
|
@ -0,0 +1,8 @@
|
|||||||
|
<yandex>
|
||||||
|
<remote_url_allow_hosts>
|
||||||
|
<host>host:80</host>
|
||||||
|
<host>host:123</host>
|
||||||
|
<host>yandex.ru</host>
|
||||||
|
</remote_url_allow_hosts>
|
||||||
|
</yandex>
|
||||||
|
|
@ -0,0 +1,7 @@
|
|||||||
|
<yandex>
|
||||||
|
<remote_url_allow_hosts>
|
||||||
|
<host_regexp>^[a-z]*:80$</host_regexp>
|
||||||
|
<host_regexp>^[a-z]*\.ru$</host_regexp>
|
||||||
|
</remote_url_allow_hosts>
|
||||||
|
</yandex>
|
||||||
|
|
@ -0,0 +1,5 @@
|
|||||||
|
<yandex>
|
||||||
|
<remote_url_allow_hosts>
|
||||||
|
</remote_url_allow_hosts>
|
||||||
|
</yandex>
|
||||||
|
|
74
dbms/tests/integration/test_allowed_url_from_config/test.py
Normal file
74
dbms/tests/integration/test_allowed_url_from_config/test.py
Normal file
@ -0,0 +1,74 @@
|
|||||||
|
import time
|
||||||
|
import pytest
|
||||||
|
|
||||||
|
from helpers.hdfs_api import HDFSApi
|
||||||
|
from helpers.cluster import ClickHouseCluster
|
||||||
|
|
||||||
|
cluster = ClickHouseCluster(__file__)
|
||||||
|
node1 = cluster.add_instance('node1', main_configs=['configs/config_with_hosts.xml'])
|
||||||
|
node2 = cluster.add_instance('node2', main_configs=['configs/config_with_only_primary_hosts.xml'])
|
||||||
|
node3 = cluster.add_instance('node3', main_configs=['configs/config_with_only_regexp_hosts.xml'])
|
||||||
|
node4 = cluster.add_instance('node4', main_configs=['configs/config_without_allowed_hosts.xml'])
|
||||||
|
node6 = cluster.add_instance('node6', main_configs=['configs/config_for_remote.xml'])
|
||||||
|
node7 = cluster.add_instance('node7', main_configs=['configs/config_for_redirect.xml'], with_hdfs=True)
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.fixture(scope="module")
|
||||||
|
def start_cluster():
|
||||||
|
try:
|
||||||
|
cluster.start()
|
||||||
|
yield cluster
|
||||||
|
finally:
|
||||||
|
cluster.shutdown()
|
||||||
|
|
||||||
|
def test_config_with_hosts(start_cluster):
|
||||||
|
assert node1.query("CREATE TABLE table_test_1_1 (word String) Engine=URL('http://host:80', HDFS)") == ""
|
||||||
|
assert node1.query("CREATE TABLE table_test_1_2 (word String) Engine=URL('https://yandex.ru', CSV)") == ""
|
||||||
|
assert "not allowed" in node1.query_and_get_error("CREATE TABLE table_test_1_4 (word String) Engine=URL('https://host:123', S3)")
|
||||||
|
assert "not allowed" in node1.query_and_get_error("CREATE TABLE table_test_1_4 (word String) Engine=URL('https://yandex2.ru', CSV)")
|
||||||
|
|
||||||
|
def test_config_with_only_primary_hosts(start_cluster):
|
||||||
|
assert node2.query("CREATE TABLE table_test_2_1 (word String) Engine=URL('https://host:80', CSV)") == ""
|
||||||
|
assert node2.query("CREATE TABLE table_test_2_2 (word String) Engine=URL('https://host:123', S3)") == ""
|
||||||
|
assert node2.query("CREATE TABLE table_test_2_3 (word String) Engine=URL('https://yandex.ru', CSV)") == ""
|
||||||
|
assert node2.query("CREATE TABLE table_test_2_4 (word String) Engine=URL('https://yandex.ru:87', HDFS)") == ""
|
||||||
|
assert "not allowed" in node2.query_and_get_error("CREATE TABLE table_test_2_5 (word String) Engine=URL('https://host', HDFS)")
|
||||||
|
assert "not allowed" in node2.query_and_get_error("CREATE TABLE table_test_2_5 (word String) Engine=URL('https://host:234', CSV)")
|
||||||
|
assert "not allowed" in node2.query_and_get_error("CREATE TABLE table_test_2_6 (word String) Engine=URL('https://yandex2.ru', S3)")
|
||||||
|
|
||||||
|
def test_config_with_only_regexp_hosts(start_cluster):
|
||||||
|
assert node3.query("CREATE TABLE table_test_3_1 (word String) Engine=URL('https://host:80', HDFS)") == ""
|
||||||
|
assert node3.query("CREATE TABLE table_test_3_2 (word String) Engine=URL('https://yandex.ru', CSV)") == ""
|
||||||
|
assert "not allowed" in node3.query_and_get_error("CREATE TABLE table_test_3_3 (word String) Engine=URL('https://host', CSV)")
|
||||||
|
assert "not allowed" in node3.query_and_get_error("CREATE TABLE table_test_3_4 (word String) Engine=URL('https://yandex2.ru', S3)")
|
||||||
|
|
||||||
|
def test_config_without_allowed_hosts(start_cluster):
|
||||||
|
assert node4.query("CREATE TABLE table_test_4_1 (word String) Engine=URL('https://host:80', CSV)") == ""
|
||||||
|
assert node4.query("CREATE TABLE table_test_4_2 (word String) Engine=URL('https://host', HDFS)") == ""
|
||||||
|
assert node4.query("CREATE TABLE table_test_4_3 (word String) Engine=URL('https://yandex.ru', CSV)") == ""
|
||||||
|
assert node4.query("CREATE TABLE table_test_4_4 (word String) Engine=URL('ftp://something.com', S3)") == ""
|
||||||
|
|
||||||
|
def test_table_function_remote(start_cluster):
|
||||||
|
assert node6.query("SELECT * FROM remote('localhost', system, events)") != ""
|
||||||
|
assert node6.query("SELECT * FROM remoteSecure('localhost', system, metrics)") != ""
|
||||||
|
assert "URL \"localhost:800\" is not allowed in config.xml" in node6.query_and_get_error("SELECT * FROM remoteSecure('localhost:800', system, events)")
|
||||||
|
assert "URL \"localhost:800\" is not allowed in config.xml" in node6.query_and_get_error("SELECT * FROM remote('localhost:800', system, metrics)")
|
||||||
|
assert "not allowed in config.xml" not in node6.query_and_get_error("SELECT * FROM remoteSecure('example01-01-1,example01-02-1', system, events)")
|
||||||
|
assert "not allowed in config.xml" not in node6.query_and_get_error("SELECT * FROM remote('example01-0{1,2}-1', system, events")
|
||||||
|
assert "not allowed in config.xml" not in node6.query_and_get_error("SELECT * FROM remoteSecure('example01-01-{1|2}', system, events)")
|
||||||
|
assert "not allowed in config.xml" not in node6.query_and_get_error("SELECT * FROM remote('example01-0{1,2}-{1|2}', system, events)")
|
||||||
|
assert "not allowed in config.xml" not in node6.query_and_get_error("SELECT * FROM remoteSecure('example01-{01..02}-{1|2}', system, events)")
|
||||||
|
assert "not allowed" in node6.query_and_get_error("SELECT * FROM remoteSecure('example01-01-1,example01-03-1', system, events)")
|
||||||
|
assert "not allowed" in node6.query_and_get_error("SELECT * FROM remote('example01-01-{1|3}', system, events)")
|
||||||
|
assert "not allowed" in node6.query_and_get_error("SELECT * FROM remoteSecure('example01-0{1,3}-1', system, metrics)")
|
||||||
|
|
||||||
|
def test_redirect(start_cluster):
|
||||||
|
hdfs_api = HDFSApi("root")
|
||||||
|
hdfs_api.write_data("/simple_storage", "1\t\n")
|
||||||
|
assert hdfs_api.read_data("/simple_storage") == "1\t\n"
|
||||||
|
node7.query("CREATE TABLE table_test_7_1 (word String) ENGINE=URL('http://hdfs1:50070/webhdfs/v1/simple_storage?op=OPEN&namenoderpcaddress=hdfs1:9000&offset=0', CSV)")
|
||||||
|
assert "not allowed" in node7.query_and_get_error("SET max_http_get_redirects=1; SELECT * from table_test_7_1")
|
||||||
|
|
||||||
|
def test_HDFS(start_cluster):
|
||||||
|
assert "not allowed" in node7.query_and_get_error("CREATE TABLE table_test_7_2 (word String) ENGINE=HDFS('http://hdfs1:50075/webhdfs/v1/simple_storage?op=OPEN&namenoderpcaddress=hdfs1:9000&offset=0', 'CSV')")
|
||||||
|
assert "not allowed" in node7.query_and_get_error("SELECT * FROM hdfs('http://hdfs1:50075/webhdfs/v1/simple_storage?op=OPEN&namenoderpcaddress=hdfs1:9000&offset=0', 'TSV', 'word String')")
|
@ -0,0 +1,5 @@
|
|||||||
|
<yandex>
|
||||||
|
<remote_url_allow_hosts>
|
||||||
|
<host_regexp>^((25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)(\.|$)){4}$</host_regexp>
|
||||||
|
</remote_url_allow_hosts>
|
||||||
|
</yandex>
|
@ -86,6 +86,7 @@ def get_nginx_access_logs():
|
|||||||
def cluster():
|
def cluster():
|
||||||
try:
|
try:
|
||||||
cluster = ClickHouseCluster(__file__)
|
cluster = ClickHouseCluster(__file__)
|
||||||
|
cluster.add_instance("restricted_dummy", main_configs=["configs/config_for_test_remote_host_filter.xml"], with_minio=True)
|
||||||
cluster.add_instance("dummy", with_minio=True)
|
cluster.add_instance("dummy", with_minio=True)
|
||||||
logging.info("Starting cluster...")
|
logging.info("Starting cluster...")
|
||||||
cluster.start()
|
cluster.start()
|
||||||
@ -232,3 +233,15 @@ def test_multipart_put(cluster, maybe_auth, positive):
|
|||||||
assert uploaded_parts > 1
|
assert uploaded_parts > 1
|
||||||
|
|
||||||
assert csv_data == get_s3_file_content(cluster, bucket, filename)
|
assert csv_data == get_s3_file_content(cluster, bucket, filename)
|
||||||
|
|
||||||
|
|
||||||
|
def test_remote_host_filter(cluster):
|
||||||
|
instance = cluster.instances["restricted_dummy"]
|
||||||
|
format = "column1 UInt32, column2 UInt32, column3 UInt32"
|
||||||
|
|
||||||
|
query = "select *, column1*column2*column3 from s3('http://{}:{}/', 'CSV', '{}')".format("invalid_host", cluster.minio_redirect_port, format)
|
||||||
|
assert "not allowed in config.xml" in instance.query_and_get_error(query)
|
||||||
|
|
||||||
|
other_values = "(1, 1, 1), (1, 1, 1), (11, 11, 11)"
|
||||||
|
query = "insert into table function s3('http://{}:{}/{}/test.csv', 'CSV', '{}') values {}".format("invalid_host", cluster.minio_port, cluster.minio_bucket, format, other_values)
|
||||||
|
assert "not allowed in config.xml" in instance.query_and_get_error(query)
|
||||||
|
@ -0,0 +1,2 @@
|
|||||||
|
10000000001
|
||||||
|
10000000001
|
@ -0,0 +1,7 @@
|
|||||||
|
SET max_memory_usage = 10000000001;
|
||||||
|
|
||||||
|
SELECT value FROM system.settings WHERE name = 'max_memory_usage';
|
||||||
|
|
||||||
|
SET max_memory_usage = '1G'; -- { serverError 27 }
|
||||||
|
|
||||||
|
SELECT value FROM system.settings WHERE name = 'max_memory_usage';
|
@ -1022,6 +1022,17 @@ Lower values mean higher priority. Threads with low `nice` priority values are e
|
|||||||
|
|
||||||
Default value: 0.
|
Default value: 0.
|
||||||
|
|
||||||
|
## allow_introspection_functions {#settings-allow_introspection_functions}
|
||||||
|
|
||||||
|
Enables of disables [introspections functions](../../query_language/functions/introspection.md) for query profiling.
|
||||||
|
|
||||||
|
Possible values:
|
||||||
|
|
||||||
|
- 1 — Introspection functions enabled.
|
||||||
|
- 0 — Introspection functions disabled.
|
||||||
|
|
||||||
|
Default value: 0.
|
||||||
|
|
||||||
## input_format_parallel_parsing
|
## input_format_parallel_parsing
|
||||||
|
|
||||||
- Type: bool
|
- Type: bool
|
||||||
|
298
docs/en/query_language/functions/introspection.md
Normal file
298
docs/en/query_language/functions/introspection.md
Normal file
@ -0,0 +1,298 @@
|
|||||||
|
# Introspection Functions
|
||||||
|
|
||||||
|
You can use functions described in this chapter to introspect [ELF](https://en.wikipedia.org/wiki/Executable_and_Linkable_Format) and [DWARF](https://en.wikipedia.org/wiki/DWARF) for query profiling.
|
||||||
|
|
||||||
|
!!! warning "Warning"
|
||||||
|
These functions are slow and may impose security considerations.
|
||||||
|
|
||||||
|
For proper operation of introspection functions:
|
||||||
|
|
||||||
|
- Install the `clickhouse-common-static-dbg` package.
|
||||||
|
- Set the [allow_introspection_functions](../../operations/settings/settings.md#settings-allow_introspection_functions) setting to 1.
|
||||||
|
|
||||||
|
For security reasons introspection functions are disabled by default.
|
||||||
|
|
||||||
|
ClickHouse saves profiler reports to the [trace_log](../../operations/system_tables.md#system_tables-trace_log) system table. Make sure the table and profiler are configured properly.
|
||||||
|
|
||||||
|
## addressToLine {#addresstoline}
|
||||||
|
|
||||||
|
Converts virtual memory address inside ClickHouse server process to the filename and the line number in ClickHouse source code.
|
||||||
|
|
||||||
|
If you use official ClickHouse packages, you need to install the `clickhouse-common-static-dbg` package.
|
||||||
|
|
||||||
|
**Syntax**
|
||||||
|
|
||||||
|
```sql
|
||||||
|
addressToLine(address_of_binary_instruction)
|
||||||
|
```
|
||||||
|
|
||||||
|
**Parameters**
|
||||||
|
|
||||||
|
- `address_of_binary_instruction` ([UInt64](../../data_types/int_uint.md)) — Address of instruction in a running process.
|
||||||
|
|
||||||
|
**Returned value**
|
||||||
|
|
||||||
|
- Source code filename and the line number in this file delimited by colon.
|
||||||
|
|
||||||
|
For example, `/build/obj-x86_64-linux-gnu/../dbms/src/Common/ThreadPool.cpp:199`, where `199` is a line number.
|
||||||
|
|
||||||
|
- Name of a binary, if the function couldn't find the debug information.
|
||||||
|
- Empty string, if the address is not valid.
|
||||||
|
|
||||||
|
Type: [String](../../data_types/string.md).
|
||||||
|
|
||||||
|
**Example**
|
||||||
|
|
||||||
|
Enabling introspection functions:
|
||||||
|
|
||||||
|
```sql
|
||||||
|
SET allow_introspection_functions=1
|
||||||
|
```
|
||||||
|
|
||||||
|
Selecting the first string from the `trace_log` system table:
|
||||||
|
|
||||||
|
```sql
|
||||||
|
SELECT * FROM system.trace_log LIMIT 1 \G
|
||||||
|
```
|
||||||
|
```text
|
||||||
|
Row 1:
|
||||||
|
──────
|
||||||
|
event_date: 2019-11-19
|
||||||
|
event_time: 2019-11-19 18:57:23
|
||||||
|
revision: 54429
|
||||||
|
timer_type: Real
|
||||||
|
thread_number: 48
|
||||||
|
query_id: 421b6855-1858-45a5-8f37-f383409d6d72
|
||||||
|
trace: [140658411141617,94784174532828,94784076370703,94784076372094,94784076361020,94784175007680,140658411116251,140658403895439]
|
||||||
|
```
|
||||||
|
|
||||||
|
The `trace` field contains the stack trace at the moment of sampling.
|
||||||
|
|
||||||
|
Getting the source code filename and the line number for a single address:
|
||||||
|
|
||||||
|
```sql
|
||||||
|
SELECT addressToLine(94784076370703) \G
|
||||||
|
```
|
||||||
|
```text
|
||||||
|
Row 1:
|
||||||
|
──────
|
||||||
|
addressToLine(94784076370703): /build/obj-x86_64-linux-gnu/../dbms/src/Common/ThreadPool.cpp:199
|
||||||
|
```
|
||||||
|
|
||||||
|
Applying the function to the whole stack trace:
|
||||||
|
|
||||||
|
```sql
|
||||||
|
SELECT
|
||||||
|
arrayStringConcat(arrayMap(x -> addressToLine(x), trace), '\n') AS trace_source_code_lines
|
||||||
|
FROM system.trace_log
|
||||||
|
LIMIT 1
|
||||||
|
\G
|
||||||
|
```
|
||||||
|
|
||||||
|
The [arrayMap](higher_order_functions.md#higher_order_functions-array-map) function allows to process each individual element of the `trace` array by the `addressToLine` function. The result of this processing you see in the `trace_source_code_lines` column of output.
|
||||||
|
|
||||||
|
```text
|
||||||
|
Row 1:
|
||||||
|
──────
|
||||||
|
trace_source_code_lines: /lib/x86_64-linux-gnu/libpthread-2.27.so
|
||||||
|
/usr/lib/debug/usr/bin/clickhouse
|
||||||
|
/build/obj-x86_64-linux-gnu/../dbms/src/Common/ThreadPool.cpp:199
|
||||||
|
/build/obj-x86_64-linux-gnu/../dbms/src/Common/ThreadPool.h:155
|
||||||
|
/usr/include/c++/9/bits/atomic_base.h:551
|
||||||
|
/usr/lib/debug/usr/bin/clickhouse
|
||||||
|
/lib/x86_64-linux-gnu/libpthread-2.27.so
|
||||||
|
/build/glibc-OTsEL5/glibc-2.27/misc/../sysdeps/unix/sysv/linux/x86_64/clone.S:97
|
||||||
|
```
|
||||||
|
|
||||||
|
## addressToSymbol {#addresstosymbol}
|
||||||
|
|
||||||
|
Converts virtual memory address inside ClickHouse server process to the symbol from ClickHouse object files.
|
||||||
|
|
||||||
|
|
||||||
|
**Syntax**
|
||||||
|
|
||||||
|
```sql
|
||||||
|
addressToSymbol(address_of_binary_instruction)
|
||||||
|
```
|
||||||
|
|
||||||
|
**Parameters**
|
||||||
|
|
||||||
|
- `address_of_binary_instruction` ([UInt64](../../data_types/int_uint.md)) — Address of instruction in a running process.
|
||||||
|
|
||||||
|
**Returned value**
|
||||||
|
|
||||||
|
- Symbol from ClickHouse object files.
|
||||||
|
- Empty string, if the address is not valid.
|
||||||
|
|
||||||
|
Type: [String](../../data_types/string.md).
|
||||||
|
|
||||||
|
**Example**
|
||||||
|
|
||||||
|
Enabling introspection functions:
|
||||||
|
|
||||||
|
```sql
|
||||||
|
SET allow_introspection_functions=1
|
||||||
|
```
|
||||||
|
|
||||||
|
Selecting the first string from the `trace_log` system table:
|
||||||
|
|
||||||
|
```sql
|
||||||
|
SELECT * FROM system.trace_log LIMIT 1 \G
|
||||||
|
```
|
||||||
|
```text
|
||||||
|
Row 1:
|
||||||
|
──────
|
||||||
|
event_date: 2019-11-20
|
||||||
|
event_time: 2019-11-20 16:57:59
|
||||||
|
revision: 54429
|
||||||
|
timer_type: Real
|
||||||
|
thread_number: 48
|
||||||
|
query_id: 724028bf-f550-45aa-910d-2af6212b94ac
|
||||||
|
trace: [94138803686098,94138815010911,94138815096522,94138815101224,94138815102091,94138814222988,94138806823642,94138814457211,94138806823642,94138814457211,94138806823642,94138806795179,94138806796144,94138753770094,94138753771646,94138753760572,94138852407232,140399185266395,140399178045583]
|
||||||
|
```
|
||||||
|
|
||||||
|
The `trace` field contains the stack trace at the moment of sampling.
|
||||||
|
|
||||||
|
Getting a symbol for a single address:
|
||||||
|
|
||||||
|
```sql
|
||||||
|
SELECT addressToSymbol(94138803686098) \G
|
||||||
|
```
|
||||||
|
```text
|
||||||
|
Row 1:
|
||||||
|
──────
|
||||||
|
addressToSymbol(94138803686098): _ZNK2DB24IAggregateFunctionHelperINS_20AggregateFunctionSumImmNS_24AggregateFunctionSumDataImEEEEE19addBatchSinglePlaceEmPcPPKNS_7IColumnEPNS_5ArenaE
|
||||||
|
```
|
||||||
|
|
||||||
|
Applying the function to the whole stack trace:
|
||||||
|
|
||||||
|
```sql
|
||||||
|
SELECT
|
||||||
|
arrayStringConcat(arrayMap(x -> addressToSymbol(x), trace), '\n') AS trace_symbols
|
||||||
|
FROM system.trace_log
|
||||||
|
LIMIT 1
|
||||||
|
\G
|
||||||
|
```
|
||||||
|
|
||||||
|
The [arrayMap](higher_order_functions.md#higher_order_functions-array-map) function allows to process each individual element of the `trace` array by the `addressToSymbols` function. The result of this processing you see in the `trace_symbols` column of output.
|
||||||
|
|
||||||
|
|
||||||
|
```text
|
||||||
|
Row 1:
|
||||||
|
──────
|
||||||
|
trace_symbols: _ZNK2DB24IAggregateFunctionHelperINS_20AggregateFunctionSumImmNS_24AggregateFunctionSumDataImEEEEE19addBatchSinglePlaceEmPcPPKNS_7IColumnEPNS_5ArenaE
|
||||||
|
_ZNK2DB10Aggregator21executeWithoutKeyImplERPcmPNS0_28AggregateFunctionInstructionEPNS_5ArenaE
|
||||||
|
_ZN2DB10Aggregator14executeOnBlockESt6vectorIN3COWINS_7IColumnEE13immutable_ptrIS3_EESaIS6_EEmRNS_22AggregatedDataVariantsERS1_IPKS3_SaISC_EERS1_ISE_SaISE_EERb
|
||||||
|
_ZN2DB10Aggregator14executeOnBlockERKNS_5BlockERNS_22AggregatedDataVariantsERSt6vectorIPKNS_7IColumnESaIS9_EERS6_ISB_SaISB_EERb
|
||||||
|
_ZN2DB10Aggregator7executeERKSt10shared_ptrINS_17IBlockInputStreamEERNS_22AggregatedDataVariantsE
|
||||||
|
_ZN2DB27AggregatingBlockInputStream8readImplEv
|
||||||
|
_ZN2DB17IBlockInputStream4readEv
|
||||||
|
_ZN2DB26ExpressionBlockInputStream8readImplEv
|
||||||
|
_ZN2DB17IBlockInputStream4readEv
|
||||||
|
_ZN2DB26ExpressionBlockInputStream8readImplEv
|
||||||
|
_ZN2DB17IBlockInputStream4readEv
|
||||||
|
_ZN2DB28AsynchronousBlockInputStream9calculateEv
|
||||||
|
_ZNSt17_Function_handlerIFvvEZN2DB28AsynchronousBlockInputStream4nextEvEUlvE_E9_M_invokeERKSt9_Any_data
|
||||||
|
_ZN14ThreadPoolImplI20ThreadFromGlobalPoolE6workerESt14_List_iteratorIS0_E
|
||||||
|
_ZZN20ThreadFromGlobalPoolC4IZN14ThreadPoolImplIS_E12scheduleImplIvEET_St8functionIFvvEEiSt8optionalImEEUlvE1_JEEEOS4_DpOT0_ENKUlvE_clEv
|
||||||
|
_ZN14ThreadPoolImplISt6threadE6workerESt14_List_iteratorIS0_E
|
||||||
|
execute_native_thread_routine
|
||||||
|
start_thread
|
||||||
|
clone
|
||||||
|
```
|
||||||
|
|
||||||
|
## demangle {#demangle}
|
||||||
|
|
||||||
|
Converts a symbol that you can get using the [addressToSymbol](#addresstosymbol) function to the C++ function name.
|
||||||
|
|
||||||
|
|
||||||
|
**Syntax**
|
||||||
|
|
||||||
|
```sql
|
||||||
|
demangle(symbol)
|
||||||
|
```
|
||||||
|
|
||||||
|
**Parameters**
|
||||||
|
|
||||||
|
- `symbol` ([String](../../data_types/string.md)) — Symbol from an object file.
|
||||||
|
|
||||||
|
**Returned value**
|
||||||
|
|
||||||
|
- Name of the C++ function.
|
||||||
|
- Empty string if a symbol is not valid.
|
||||||
|
|
||||||
|
Type: [String](../../data_types/string.md).
|
||||||
|
|
||||||
|
**Example**
|
||||||
|
|
||||||
|
Enabling introspection functions:
|
||||||
|
|
||||||
|
```sql
|
||||||
|
SET allow_introspection_functions=1
|
||||||
|
```
|
||||||
|
|
||||||
|
Selecting the first string from the `trace_log` system table:
|
||||||
|
|
||||||
|
```sql
|
||||||
|
SELECT * FROM system.trace_log LIMIT 1 \G
|
||||||
|
```
|
||||||
|
```text
|
||||||
|
Row 1:
|
||||||
|
──────
|
||||||
|
event_date: 2019-11-20
|
||||||
|
event_time: 2019-11-20 16:57:59
|
||||||
|
revision: 54429
|
||||||
|
timer_type: Real
|
||||||
|
thread_number: 48
|
||||||
|
query_id: 724028bf-f550-45aa-910d-2af6212b94ac
|
||||||
|
trace: [94138803686098,94138815010911,94138815096522,94138815101224,94138815102091,94138814222988,94138806823642,94138814457211,94138806823642,94138814457211,94138806823642,94138806795179,94138806796144,94138753770094,94138753771646,94138753760572,94138852407232,140399185266395,140399178045583]
|
||||||
|
```
|
||||||
|
|
||||||
|
The `trace` field contains the stack trace at the moment of sampling.
|
||||||
|
|
||||||
|
Getting a function name for a single address:
|
||||||
|
|
||||||
|
```sql
|
||||||
|
SELECT demangle(addressToSymbol(94138803686098)) \G
|
||||||
|
```
|
||||||
|
```text
|
||||||
|
Row 1:
|
||||||
|
──────
|
||||||
|
demangle(addressToSymbol(94138803686098)): DB::IAggregateFunctionHelper<DB::AggregateFunctionSum<unsigned long, unsigned long, DB::AggregateFunctionSumData<unsigned long> > >::addBatchSinglePlace(unsigned long, char*, DB::IColumn const**, DB::Arena*) const
|
||||||
|
```
|
||||||
|
|
||||||
|
Applying the function to the whole stack trace:
|
||||||
|
|
||||||
|
```sql
|
||||||
|
SELECT
|
||||||
|
arrayStringConcat(arrayMap(x -> demangle(addressToSymbol(x)), trace), '\n') AS trace_functions
|
||||||
|
FROM system.trace_log
|
||||||
|
LIMIT 1
|
||||||
|
\G
|
||||||
|
```
|
||||||
|
|
||||||
|
The [arrayMap](higher_order_functions.md#higher_order_functions-array-map) function allows to process each individual element of the `trace` array by the `demangle` function. The result of this processing you see in the `trace_functions` column of output.
|
||||||
|
|
||||||
|
```text
|
||||||
|
Row 1:
|
||||||
|
──────
|
||||||
|
trace_functions: DB::IAggregateFunctionHelper<DB::AggregateFunctionSum<unsigned long, unsigned long, DB::AggregateFunctionSumData<unsigned long> > >::addBatchSinglePlace(unsigned long, char*, DB::IColumn const**, DB::Arena*) const
|
||||||
|
DB::Aggregator::executeWithoutKeyImpl(char*&, unsigned long, DB::Aggregator::AggregateFunctionInstruction*, DB::Arena*) const
|
||||||
|
DB::Aggregator::executeOnBlock(std::vector<COW<DB::IColumn>::immutable_ptr<DB::IColumn>, std::allocator<COW<DB::IColumn>::immutable_ptr<DB::IColumn> > >, unsigned long, DB::AggregatedDataVariants&, std::vector<DB::IColumn const*, std::allocator<DB::IColumn const*> >&, std::vector<std::vector<DB::IColumn const*, std::allocator<DB::IColumn const*> >, std::allocator<std::vector<DB::IColumn const*, std::allocator<DB::IColumn const*> > > >&, bool&)
|
||||||
|
DB::Aggregator::executeOnBlock(DB::Block const&, DB::AggregatedDataVariants&, std::vector<DB::IColumn const*, std::allocator<DB::IColumn const*> >&, std::vector<std::vector<DB::IColumn const*, std::allocator<DB::IColumn const*> >, std::allocator<std::vector<DB::IColumn const*, std::allocator<DB::IColumn const*> > > >&, bool&)
|
||||||
|
DB::Aggregator::execute(std::shared_ptr<DB::IBlockInputStream> const&, DB::AggregatedDataVariants&)
|
||||||
|
DB::AggregatingBlockInputStream::readImpl()
|
||||||
|
DB::IBlockInputStream::read()
|
||||||
|
DB::ExpressionBlockInputStream::readImpl()
|
||||||
|
DB::IBlockInputStream::read()
|
||||||
|
DB::ExpressionBlockInputStream::readImpl()
|
||||||
|
DB::IBlockInputStream::read()
|
||||||
|
DB::AsynchronousBlockInputStream::calculate()
|
||||||
|
std::_Function_handler<void (), DB::AsynchronousBlockInputStream::next()::{lambda()#1}>::_M_invoke(std::_Any_data const&)
|
||||||
|
ThreadPoolImpl<ThreadFromGlobalPool>::worker(std::_List_iterator<ThreadFromGlobalPool>)
|
||||||
|
ThreadFromGlobalPool::ThreadFromGlobalPool<ThreadPoolImpl<ThreadFromGlobalPool>::scheduleImpl<void>(std::function<void ()>, int, std::optional<unsigned long>)::{lambda()#3}>(ThreadPoolImpl<ThreadFromGlobalPool>::scheduleImpl<void>(std::function<void ()>, int, std::optional<unsigned long>)::{lambda()#3}&&)::{lambda()#1}::operator()() const
|
||||||
|
ThreadPoolImpl<std::thread>::worker(std::_List_iterator<std::thread>)
|
||||||
|
execute_native_thread_routine
|
||||||
|
start_thread
|
||||||
|
clone
|
||||||
|
```
|
@ -119,6 +119,7 @@ nav:
|
|||||||
- 'Working with geographical coordinates': 'query_language/functions/geo.md'
|
- 'Working with geographical coordinates': 'query_language/functions/geo.md'
|
||||||
- 'Working with Nullable arguments': 'query_language/functions/functions_for_nulls.md'
|
- 'Working with Nullable arguments': 'query_language/functions/functions_for_nulls.md'
|
||||||
- 'Machine Learning Functions': 'query_language/functions/machine_learning_functions.md'
|
- 'Machine Learning Functions': 'query_language/functions/machine_learning_functions.md'
|
||||||
|
- 'Introspection': 'query_language/functions/introspection.md'
|
||||||
- 'Other': 'query_language/functions/other_functions.md'
|
- 'Other': 'query_language/functions/other_functions.md'
|
||||||
- 'Aggregate Functions':
|
- 'Aggregate Functions':
|
||||||
- 'Introduction': 'query_language/agg_functions/index.md'
|
- 'Introduction': 'query_language/agg_functions/index.md'
|
||||||
|
Loading…
Reference in New Issue
Block a user