Merge branch 'master' into DateTime64_extended_range

This commit is contained in:
Alexey Milovidov 2021-03-03 23:43:20 +03:00
commit 4e8239e098
473 changed files with 14779 additions and 4796 deletions

View File

@ -444,8 +444,14 @@ public:
if (offset_is_whole_number_of_hours_everytime)
return (toSecondsSinceTheDayStart(t) / 60) % 60;
UInt32 date = find(t).date;
return (UInt32(t) - date) / 60 % 60;
/// To consider the DST changing situation within this day.
/// also make the special timezones with no whole hour offset such as 'Australia/Lord_Howe' been taken into account
DayNum index = findIndex(t);
UInt32 res = t - lut[index].date;
if (lut[index].amount_of_offset_change != 0 && t >= lut[index].date + lut[index].time_at_offset_change)
res += lut[index].amount_of_offset_change;
return res / 60 % 60;
}
inline time_t toStartOfMinute(time_t t) const { return t / 60 * 60; }

View File

@ -1,5 +1,20 @@
#pragma once
/// __has_feature supported only by clang.
///
/// But libcxx/libcxxabi overrides it to 0,
/// thus the checks for __has_feature will be wrong.
///
/// NOTE:
/// - __has_feature cannot be simply undefined,
/// since this will be broken if some C++ header will be included after
/// including <common/defines.h>
/// - it should not have fallback to 0,
/// since this may create false-positive detection (common problem)
#if defined(__clang__) && defined(__has_feature)
# define ch_has_feature __has_feature
#endif
#if defined(_MSC_VER)
# if !defined(likely)
# define likely(x) (x)
@ -32,8 +47,8 @@
/// Check for presence of address sanitizer
#if !defined(ADDRESS_SANITIZER)
# if defined(__has_feature)
# if __has_feature(address_sanitizer)
# if defined(ch_has_feature)
# if ch_has_feature(address_sanitizer)
# define ADDRESS_SANITIZER 1
# endif
# elif defined(__SANITIZE_ADDRESS__)
@ -42,8 +57,8 @@
#endif
#if !defined(THREAD_SANITIZER)
# if defined(__has_feature)
# if __has_feature(thread_sanitizer)
# if defined(ch_has_feature)
# if ch_has_feature(thread_sanitizer)
# define THREAD_SANITIZER 1
# endif
# elif defined(__SANITIZE_THREAD__)
@ -52,8 +67,8 @@
#endif
#if !defined(MEMORY_SANITIZER)
# if defined(__has_feature)
# if __has_feature(memory_sanitizer)
# if defined(ch_has_feature)
# if ch_has_feature(memory_sanitizer)
# define MEMORY_SANITIZER 1
# endif
# elif defined(__MEMORY_SANITIZER__)

View File

@ -15,8 +15,8 @@
#endif
#define __msan_unpoison(X, Y) // NOLINT
#if defined(__has_feature)
# if __has_feature(memory_sanitizer)
#if defined(ch_has_feature)
# if ch_has_feature(memory_sanitizer)
# undef __msan_unpoison
# include <sanitizer/msan_interface.h>
# endif

View File

@ -51,10 +51,11 @@ Connection::Connection(
const char* ssl_key,
unsigned timeout,
unsigned rw_timeout,
bool enable_local_infile)
bool enable_local_infile,
bool opt_reconnect)
: Connection()
{
connect(db, server, user, password, port, socket, ssl_ca, ssl_cert, ssl_key, timeout, rw_timeout, enable_local_infile);
connect(db, server, user, password, port, socket, ssl_ca, ssl_cert, ssl_key, timeout, rw_timeout, enable_local_infile, opt_reconnect);
}
Connection::Connection(const std::string & config_name)
@ -80,7 +81,8 @@ void Connection::connect(const char* db,
const char * ssl_key,
unsigned timeout,
unsigned rw_timeout,
bool enable_local_infile)
bool enable_local_infile,
bool opt_reconnect)
{
if (is_connected)
disconnect();
@ -104,9 +106,8 @@ void Connection::connect(const char* db,
if (mysql_options(driver.get(), MYSQL_OPT_LOCAL_INFILE, &enable_local_infile_arg))
throw ConnectionFailed(errorMessage(driver.get()), mysql_errno(driver.get()));
/// Enables auto-reconnect.
bool reconnect = true;
if (mysql_options(driver.get(), MYSQL_OPT_RECONNECT, reinterpret_cast<const char *>(&reconnect)))
/// See C API Developer Guide: Automatic Reconnection Control
if (mysql_options(driver.get(), MYSQL_OPT_RECONNECT, reinterpret_cast<const char *>(&opt_reconnect)))
throw ConnectionFailed(errorMessage(driver.get()), mysql_errno(driver.get()));
/// Specifies particular ssl key and certificate if it needs

View File

@ -14,6 +14,8 @@
/// Disable LOAD DATA LOCAL INFILE because it is insecure
#define MYSQLXX_DEFAULT_ENABLE_LOCAL_INFILE false
/// See https://dev.mysql.com/doc/c-api/5.7/en/c-api-auto-reconnect.html
#define MYSQLXX_DEFAULT_MYSQL_OPT_RECONNECT true
namespace mysqlxx
@ -76,7 +78,8 @@ public:
const char * ssl_key = "",
unsigned timeout = MYSQLXX_DEFAULT_TIMEOUT,
unsigned rw_timeout = MYSQLXX_DEFAULT_RW_TIMEOUT,
bool enable_local_infile = MYSQLXX_DEFAULT_ENABLE_LOCAL_INFILE);
bool enable_local_infile = MYSQLXX_DEFAULT_ENABLE_LOCAL_INFILE,
bool opt_reconnect = MYSQLXX_DEFAULT_MYSQL_OPT_RECONNECT);
/// Creates connection. Can be used if Poco::Util::Application is using.
/// All settings will be got from config_name section of configuration.
@ -96,7 +99,8 @@ public:
const char* ssl_key,
unsigned timeout = MYSQLXX_DEFAULT_TIMEOUT,
unsigned rw_timeout = MYSQLXX_DEFAULT_RW_TIMEOUT,
bool enable_local_infile = MYSQLXX_DEFAULT_ENABLE_LOCAL_INFILE);
bool enable_local_infile = MYSQLXX_DEFAULT_ENABLE_LOCAL_INFILE,
bool opt_reconnect = MYSQLXX_DEFAULT_MYSQL_OPT_RECONNECT);
void connect(const std::string & config_name)
{
@ -112,6 +116,7 @@ public:
std::string ssl_cert = cfg.getString(config_name + ".ssl_cert", "");
std::string ssl_key = cfg.getString(config_name + ".ssl_key", "");
bool enable_local_infile = cfg.getBool(config_name + ".enable_local_infile", MYSQLXX_DEFAULT_ENABLE_LOCAL_INFILE);
bool opt_reconnect = cfg.getBool(config_name + ".opt_reconnect", MYSQLXX_DEFAULT_MYSQL_OPT_RECONNECT);
unsigned timeout =
cfg.getInt(config_name + ".connect_timeout",
@ -135,7 +140,8 @@ public:
ssl_key.c_str(),
timeout,
rw_timeout,
enable_local_infile);
enable_local_infile,
opt_reconnect);
}
/// If MySQL connection was established.

View File

@ -26,6 +26,15 @@ struct ConnectionFailed : public Exception
};
/// Connection to MySQL server was lost
struct ConnectionLost : public Exception
{
ConnectionLost(const std::string & msg, int code = 0) : Exception(msg, code) {}
const char * name() const throw() override { return "mysqlxx::ConnectionLost"; }
const char * className() const throw() override { return "mysqlxx::ConnectionLost"; }
};
/// Erroneous query.
struct BadQuery : public Exception
{

View File

@ -10,7 +10,6 @@
#include <common/sleep.h>
#include <Poco/Util/Application.h>
#include <Poco/Util/LayeredConfiguration.h>
@ -41,7 +40,9 @@ void Pool::Entry::decrementRefCount()
Pool::Pool(const Poco::Util::AbstractConfiguration & cfg, const std::string & config_name,
unsigned default_connections_, unsigned max_connections_,
const char * parent_config_name_)
: default_connections(default_connections_), max_connections(max_connections_)
: logger(Poco::Logger::get("mysqlxx::Pool"))
, default_connections(default_connections_)
, max_connections(max_connections_)
{
server = cfg.getString(config_name + ".host");
@ -78,6 +79,9 @@ Pool::Pool(const Poco::Util::AbstractConfiguration & cfg, const std::string & co
enable_local_infile = cfg.getBool(config_name + ".enable_local_infile",
cfg.getBool(parent_config_name + ".enable_local_infile", MYSQLXX_DEFAULT_ENABLE_LOCAL_INFILE));
opt_reconnect = cfg.getBool(config_name + ".opt_reconnect",
cfg.getBool(parent_config_name + ".opt_reconnect", MYSQLXX_DEFAULT_MYSQL_OPT_RECONNECT));
}
else
{
@ -96,6 +100,8 @@ Pool::Pool(const Poco::Util::AbstractConfiguration & cfg, const std::string & co
enable_local_infile = cfg.getBool(
config_name + ".enable_local_infile", MYSQLXX_DEFAULT_ENABLE_LOCAL_INFILE);
opt_reconnect = cfg.getBool(config_name + ".opt_reconnect", MYSQLXX_DEFAULT_MYSQL_OPT_RECONNECT);
}
connect_timeout = cfg.getInt(config_name + ".connect_timeout",
@ -125,20 +131,30 @@ Pool::Entry Pool::get()
initialize();
for (;;)
{
logger.trace("(%s): Iterating through existing MySQL connections", getDescription());
for (auto & connection : connections)
{
if (connection->ref_count == 0)
return Entry(connection, this);
}
logger.trace("(%s): Trying to allocate a new connection.", getDescription());
if (connections.size() < static_cast<size_t>(max_connections))
{
Connection * conn = allocConnection();
if (conn)
return Entry(conn, this);
logger.trace("(%s): Unable to create a new connection: Allocation failed.", getDescription());
}
else
{
logger.trace("(%s): Unable to create a new connection: Max number of connections has been reached.", getDescription());
}
lock.unlock();
logger.trace("(%s): Sleeping for %d seconds.", getDescription(), MYSQLXX_POOL_SLEEP_ON_CONNECT_FAIL);
sleepForSeconds(MYSQLXX_POOL_SLEEP_ON_CONNECT_FAIL);
lock.lock();
}
@ -162,8 +178,7 @@ Pool::Entry Pool::tryGet()
if (res.tryForceConnected()) /// Tries to reestablish connection as well
return res;
auto & logger = Poco::Util::Application::instance().logger();
logger.information("Idle connection to mysql server cannot be recovered, dropping it.");
logger.debug("(%s): Idle connection to MySQL server cannot be recovered, dropping it.", getDescription());
/// This one is disconnected, cannot be reestablished and so needs to be disposed of.
connection_it = connections.erase(connection_it);
@ -186,6 +201,8 @@ Pool::Entry Pool::tryGet()
void Pool::removeConnection(Connection* connection)
{
logger.trace("(%s): Removing connection.", getDescription());
std::lock_guard<std::mutex> lock(mutex);
if (connection)
{
@ -210,8 +227,6 @@ void Pool::Entry::forceConnected() const
if (data == nullptr)
throw Poco::RuntimeException("Tried to access NULL database connection.");
Poco::Util::Application & app = Poco::Util::Application::instance();
bool first = true;
while (!tryForceConnected())
{
@ -220,7 +235,7 @@ void Pool::Entry::forceConnected() const
else
sleepForSeconds(MYSQLXX_POOL_SLEEP_ON_CONNECT_FAIL);
app.logger().information("MYSQL: Reconnecting to " + pool->description);
pool->logger.debug("Entry: Reconnecting to MySQL server %s", pool->description);
data->conn.connect(
pool->db.c_str(),
pool->server.c_str(),
@ -233,7 +248,8 @@ void Pool::Entry::forceConnected() const
pool->ssl_key.c_str(),
pool->connect_timeout,
pool->rw_timeout,
pool->enable_local_infile);
pool->enable_local_infile,
pool->opt_reconnect);
}
}
@ -242,18 +258,22 @@ bool Pool::Entry::tryForceConnected() const
{
auto * const mysql_driver = data->conn.getDriver();
const auto prev_connection_id = mysql_thread_id(mysql_driver);
pool->logger.trace("Entry(connection %lu): sending PING to check if it is alive.", prev_connection_id);
if (data->conn.ping()) /// Attempts to reestablish lost connection
{
const auto current_connection_id = mysql_thread_id(mysql_driver);
if (prev_connection_id != current_connection_id)
{
auto & logger = Poco::Util::Application::instance().logger();
logger.information("Connection to mysql server has been reestablished. Connection id changed: %lu -> %lu",
prev_connection_id, current_connection_id);
pool->logger.debug("Entry(connection %lu): Reconnected to MySQL server. Connection id changed: %lu -> %lu",
current_connection_id, prev_connection_id, current_connection_id);
}
pool->logger.trace("Entry(connection %lu): PING ok.", current_connection_id);
return true;
}
pool->logger.trace("Entry(connection %lu): PING failed.", prev_connection_id);
return false;
}
@ -274,15 +294,13 @@ void Pool::initialize()
Pool::Connection * Pool::allocConnection(bool dont_throw_if_failed_first_time)
{
Poco::Util::Application & app = Poco::Util::Application::instance();
std::unique_ptr<Connection> conn(new Connection);
std::unique_ptr<Connection> conn_ptr{new Connection};
try
{
app.logger().information("MYSQL: Connecting to " + description);
logger.debug("Connecting to %s", description);
conn->conn.connect(
conn_ptr->conn.connect(
db.c_str(),
server.c_str(),
user.c_str(),
@ -294,29 +312,29 @@ Pool::Connection * Pool::allocConnection(bool dont_throw_if_failed_first_time)
ssl_key.c_str(),
connect_timeout,
rw_timeout,
enable_local_infile);
enable_local_infile,
opt_reconnect);
}
catch (mysqlxx::ConnectionFailed & e)
{
logger.error(e.what());
if ((!was_successful && !dont_throw_if_failed_first_time)
|| e.errnum() == ER_ACCESS_DENIED_ERROR
|| e.errnum() == ER_DBACCESS_DENIED_ERROR
|| e.errnum() == ER_BAD_DB_ERROR)
{
app.logger().error(e.what());
throw;
}
else
{
app.logger().error(e.what());
return nullptr;
}
}
connections.push_back(conn_ptr.get());
was_successful = true;
auto * connection = conn.release();
connections.push_back(connection);
return connection;
return conn_ptr.release();
}
}

View File

@ -6,6 +6,8 @@
#include <atomic>
#include <Poco/Exception.h>
#include <Poco/Logger.h>
#include <mysqlxx/Connection.h>
@ -165,19 +167,21 @@ public:
unsigned rw_timeout_ = MYSQLXX_DEFAULT_RW_TIMEOUT,
unsigned default_connections_ = MYSQLXX_POOL_DEFAULT_START_CONNECTIONS,
unsigned max_connections_ = MYSQLXX_POOL_DEFAULT_MAX_CONNECTIONS,
unsigned enable_local_infile_ = MYSQLXX_DEFAULT_ENABLE_LOCAL_INFILE)
: default_connections(default_connections_), max_connections(max_connections_),
db(db_), server(server_), user(user_), password(password_), port(port_), socket(socket_),
connect_timeout(connect_timeout_), rw_timeout(rw_timeout_), enable_local_infile(enable_local_infile_) {}
unsigned enable_local_infile_ = MYSQLXX_DEFAULT_ENABLE_LOCAL_INFILE,
bool opt_reconnect_ = MYSQLXX_DEFAULT_MYSQL_OPT_RECONNECT)
: logger(Poco::Logger::get("mysqlxx::Pool")), default_connections(default_connections_),
max_connections(max_connections_), db(db_), server(server_), user(user_), password(password_), port(port_), socket(socket_),
connect_timeout(connect_timeout_), rw_timeout(rw_timeout_), enable_local_infile(enable_local_infile_),
opt_reconnect(opt_reconnect_) {}
Pool(const Pool & other)
: default_connections{other.default_connections},
: logger(other.logger), default_connections{other.default_connections},
max_connections{other.max_connections},
db{other.db}, server{other.server},
user{other.user}, password{other.password},
port{other.port}, socket{other.socket},
connect_timeout{other.connect_timeout}, rw_timeout{other.rw_timeout},
enable_local_infile{other.enable_local_infile}
enable_local_infile{other.enable_local_infile}, opt_reconnect(other.opt_reconnect)
{}
Pool & operator=(const Pool &) = delete;
@ -201,6 +205,8 @@ public:
void removeConnection(Connection * connection);
protected:
Poco::Logger & logger;
/// Number of MySQL connections which are created at launch.
unsigned default_connections;
/// Maximum possible number of connections
@ -231,6 +237,7 @@ private:
std::string ssl_cert;
std::string ssl_key;
bool enable_local_infile;
bool opt_reconnect;
/// True if connection was established at least once.
bool was_successful{false};

View File

@ -1,3 +1,8 @@
#include <algorithm>
#include <ctime>
#include <random>
#include <thread>
#include <mysqlxx/PoolWithFailover.h>
@ -33,6 +38,19 @@ PoolWithFailover::PoolWithFailover(const Poco::Util::AbstractConfiguration & con
std::make_shared<Pool>(config_, replica_name, default_connections_, max_connections_, config_name_.c_str()));
}
}
/// PoolWithFailover objects are stored in a cache inside PoolFactory.
/// This cache is reset by ExternalDictionariesLoader after every SYSTEM RELOAD DICTIONAR{Y|IES}
/// which triggers massive re-constructing of connection pools.
/// The state of PRNGs like std::mt19937 is considered to be quite heavy
/// thus here we attempt to optimize its construction.
static thread_local std::mt19937 rnd_generator(
std::hash<std::thread::id>{}(std::this_thread::get_id()) + std::clock());
for (auto & [_, replicas] : replicas_by_priority)
{
if (replicas.size() > 1)
std::shuffle(replicas.begin(), replicas.end(), rnd_generator);
}
}
else
{

View File

@ -1,11 +1,16 @@
#if __has_include(<mysql.h>)
#include <errmsg.h>
#include <mysql.h>
#else
#include <mysql/errmsg.h>
#include <mysql/mysql.h>
#endif
#include <Poco/Logger.h>
#include <mysqlxx/Connection.h>
#include <mysqlxx/Query.h>
#include <mysqlxx/Types.h>
namespace mysqlxx
@ -57,8 +62,24 @@ void Query::reset()
void Query::executeImpl()
{
std::string query_string = query_buf.str();
if (mysql_real_query(conn->getDriver(), query_string.data(), query_string.size()))
throw BadQuery(errorMessage(conn->getDriver()), mysql_errno(conn->getDriver()));
MYSQL* mysql_driver = conn->getDriver();
auto & logger = Poco::Logger::get("mysqlxx::Query");
logger.trace("Running MySQL query using connection %lu", mysql_thread_id(mysql_driver));
if (mysql_real_query(mysql_driver, query_string.data(), query_string.size()))
{
const auto err_no = mysql_errno(mysql_driver);
switch (err_no)
{
case CR_SERVER_GONE_ERROR:
[[fallthrough]];
case CR_SERVER_LOST:
throw ConnectionLost(errorMessage(mysql_driver), err_no);
default:
throw BadQuery(errorMessage(mysql_driver), err_no);
}
}
}
UseQueryResult Query::use()

View File

@ -32,7 +32,10 @@ if (CCACHE_FOUND AND NOT COMPILER_MATCHES_CCACHE)
if (CCACHE_VERSION VERSION_GREATER "3.2.0" OR NOT CMAKE_CXX_COMPILER_ID STREQUAL "Clang")
message(STATUS "Using ${CCACHE_FOUND} ${CCACHE_VERSION}")
# debian (debhlpers) set SOURCE_DATE_EPOCH environment variable, that is
set_property (GLOBAL PROPERTY RULE_LAUNCH_COMPILE ${CCACHE_FOUND})
set_property (GLOBAL PROPERTY RULE_LAUNCH_LINK ${CCACHE_FOUND})
# debian (debhelpers) set SOURCE_DATE_EPOCH environment variable, that is
# filled from the debian/changelog or current time.
#
# - 4.0+ ccache always includes this environment variable into the hash
@ -48,9 +51,6 @@ if (CCACHE_FOUND AND NOT COMPILER_MATCHES_CCACHE)
message(STATUS "Ignore SOURCE_DATE_EPOCH for ccache")
set_property (GLOBAL PROPERTY RULE_LAUNCH_COMPILE "env -u SOURCE_DATE_EPOCH ${CCACHE_FOUND}")
set_property (GLOBAL PROPERTY RULE_LAUNCH_LINK "env -u SOURCE_DATE_EPOCH ${CCACHE_FOUND}")
else()
set_property (GLOBAL PROPERTY RULE_LAUNCH_COMPILE ${CCACHE_FOUND})
set_property (GLOBAL PROPERTY RULE_LAUNCH_LINK ${CCACHE_FOUND})
endif()
else ()
message(${RECONFIGURE_MESSAGE_LEVEL} "Not using ${CCACHE_FOUND} ${CCACHE_VERSION} bug: https://bugzilla.samba.org/show_bug.cgi?id=8118")

2
contrib/NuRaft vendored

@ -1 +1 @@
Subproject commit 7adf7ae33e7d5c307342431b577c8ab1025ee793
Subproject commit 9a0d78de4b90546368d954b6434f0e9a823e8d80

2
contrib/boost vendored

@ -1 +1 @@
Subproject commit 48f40ebb539220d328958f8823b094c0b07a4e79
Subproject commit ee24fa55bc46e4d2ce7d0d052cc5a0d9b1be8c36

2
contrib/brotli vendored

@ -1 +1 @@
Subproject commit 5805f99a533a8f8118699c0100d8c102f3605f65
Subproject commit 63be8a99401992075c23e99f7c84de1c653e39e2

View File

@ -2,6 +2,8 @@ set(BROTLI_SOURCE_DIR ${ClickHouse_SOURCE_DIR}/contrib/brotli/c)
set(BROTLI_BINARY_DIR ${ClickHouse_BINARY_DIR}/contrib/brotli/c)
set(SRCS
${BROTLI_SOURCE_DIR}/enc/command.c
${BROTLI_SOURCE_DIR}/enc/fast_log.c
${BROTLI_SOURCE_DIR}/dec/bit_reader.c
${BROTLI_SOURCE_DIR}/dec/state.c
${BROTLI_SOURCE_DIR}/dec/huffman.c
@ -26,6 +28,9 @@ set(SRCS
${BROTLI_SOURCE_DIR}/enc/memory.c
${BROTLI_SOURCE_DIR}/common/dictionary.c
${BROTLI_SOURCE_DIR}/common/transform.c
${BROTLI_SOURCE_DIR}/common/platform.c
${BROTLI_SOURCE_DIR}/common/context.c
${BROTLI_SOURCE_DIR}/common/constants.c
)
add_library(brotli ${SRCS})

View File

@ -70,6 +70,7 @@ function start_server
--path "$FASTTEST_DATA"
--user_files_path "$FASTTEST_DATA/user_files"
--top_level_domains_path "$FASTTEST_DATA/top_level_domains"
--test_keeper_server.log_storage_path "$FASTTEST_DATA/coordination"
)
clickhouse-server "${opts[@]}" &>> "$FASTTEST_OUTPUT/server.log" &
server_pid=$!
@ -355,7 +356,6 @@ function run_tests
# JSON functions
01666_blns
01674_htm_xml_coarse_parse
)
(time clickhouse-test --hung-check -j 8 --order=random --use-skip-list --no-long --testname --shard --zookeeper --skip "${TESTS_TO_SKIP[@]}" -- "$FASTTEST_FOCUS" 2>&1 ||:) | ts '%Y-%m-%d %H:%M:%S' | tee "$FASTTEST_OUTPUT/test_log.txt"
@ -375,7 +375,7 @@ function run_tests
stop_server ||:
# Clean the data so that there is no interference from the previous test run.
rm -rf "$FASTTEST_DATA"/{{meta,}data,user_files} ||:
rm -rf "$FASTTEST_DATA"/{{meta,}data,user_files,coordination} ||:
start_server

View File

@ -18,7 +18,8 @@ RUN apt-get update \
curl \
tar \
krb5-user \
iproute2
iproute2 \
lsof
RUN rm -rf \
/var/lib/apt/lists/* \
/var/cache/debconf \

View File

@ -1,11 +1,11 @@
version: '2.3'
services:
zoo1:
image: zookeeper:3.4.12
image: zookeeper:3.6.2
restart: always
environment:
ZOO_TICK_TIME: 500
ZOO_SERVERS: server.1=zoo1:2888:3888 server.2=zoo2:2888:3888 server.3=zoo3:2888:3888
ZOO_SERVERS: server.1=zoo1:2888:3888;2181 server.2=zoo2:2888:3888;2181 server.3=zoo3:2888:3888;2181
ZOO_MY_ID: 1
JVMFLAGS: -Dzookeeper.forceSync=no
volumes:
@ -16,11 +16,11 @@ services:
source: ${ZK_DATA_LOG1:-}
target: /datalog
zoo2:
image: zookeeper:3.4.12
image: zookeeper:3.6.2
restart: always
environment:
ZOO_TICK_TIME: 500
ZOO_SERVERS: server.1=zoo1:2888:3888 server.2=zoo2:2888:3888 server.3=zoo3:2888:3888
ZOO_SERVERS: server.1=zoo1:2888:3888;2181 server.2=zoo2:2888:3888;2181 server.3=zoo3:2888:3888
ZOO_MY_ID: 2
JVMFLAGS: -Dzookeeper.forceSync=no
volumes:
@ -31,11 +31,11 @@ services:
source: ${ZK_DATA_LOG2:-}
target: /datalog
zoo3:
image: zookeeper:3.4.12
image: zookeeper:3.6.2
restart: always
environment:
ZOO_TICK_TIME: 500
ZOO_SERVERS: server.1=zoo1:2888:3888 server.2=zoo2:2888:3888 server.3=zoo3:2888:3888
ZOO_SERVERS: server.1=zoo1:2888:3888;2181 server.2=zoo2:2888:3888;2181 server.3=zoo3:2888:3888;2181
ZOO_MY_ID: 3
JVMFLAGS: -Dzookeeper.forceSync=no
volumes:

View File

@ -358,6 +358,8 @@ mkdir analyze analyze/tmp ||:
build_log_column_definitions
# Split the raw test output into files suitable for analysis.
# To debug calculations only for a particular test, substitute a suitable
# wildcard here, e.g. `for test_file in modulo-raw.tsv`.
for test_file in *-raw.tsv
do
test_name=$(basename "$test_file" "-raw.tsv")
@ -467,7 +469,13 @@ create view broken_queries as
create table query_run_metrics_for_stats engine File(
TSV, -- do not add header -- will parse with grep
'analyze/query-run-metrics-for-stats.tsv')
as select test, query_index, 0 run, version, metric_values
as select test, query_index, 0 run, version,
-- For debugging, add a filter for a particular metric like this:
-- arrayFilter(m, n -> n = 'client_time', metric_values, metric_names)
-- metric_values
-- Note that further reporting may break, because the metric names are
-- not filtered.
metric_values
from query_run_metric_arrays
where (test, query_index) not in broken_queries
order by test, query_index, run, version
@ -585,8 +593,19 @@ create view query_metric_stats as
-- Main statistics for queries -- query time as reported in query log.
create table queries engine File(TSVWithNamesAndTypes, 'report/queries.tsv')
as select
abs(diff) > report_threshold and abs(diff) > stat_threshold as changed_fail,
abs(diff) > report_threshold - 0.05 and abs(diff) > stat_threshold as changed_show,
-- It is important to have a non-strict inequality with stat_threshold
-- here. The randomization distribution is actually discrete, and when
-- the number of runs is small, the quantile we need (e.g. 0.99) turns
-- out to be the maximum value of the distribution. We can also hit this
-- maximum possible value with our test run, and this obviously means
-- that we have observed the difference to the best precision possible
-- for the given number of runs. If we use a strict equality here, we
-- will miss such cases. This happened in the wild and lead to some
-- uncaught regressions, because for the default 7 runs we do for PRs,
-- the randomization distribution has only 16 values, so the max quantile
-- is actually 0.9375.
abs(diff) > report_threshold and abs(diff) >= stat_threshold as changed_fail,
abs(diff) > report_threshold - 0.05 and abs(diff) >= stat_threshold as changed_show,
not changed_fail and stat_threshold > report_threshold + 0.10 as unstable_fail,
not changed_show and stat_threshold > report_threshold - 0.05 as unstable_show,

View File

@ -0,0 +1,7 @@
<yandex>
<!-- Directory with user provided files that are accessible by 'file' table function. -->
<user_files_path>/var/lib/clickhouse/user_files/</user_files_path>
<!-- Path to configuration file with users, access rights, profiles of settings, quotas. -->
<users_config>users.xml</users_config>
</yandex>

View File

@ -19,4 +19,9 @@
<max_threads>12</max_threads>
</default>
</profiles>
<users>
<default>
<access_management>1</access_management>
</default>
</users>
</yandex>

View File

@ -1,4 +1,6 @@
-- input is table(test text, query text, run UInt32, version int, metrics Array(float))
-- The input is table(test text, query text, run UInt32, version UInt8, metrics Array(float)).
-- Run like this:
-- clickhouse-local --queries-file eqmed.sql -S 'test text, query text, run UInt32, version UInt8, metrics Array(float)' --file analyze/tmp/modulo_0.tsv
select
arrayMap(x -> floor(x, 4), original_medians_array.medians_by_version[1] as l) l_rounded,
arrayMap(x -> floor(x, 4), original_medians_array.medians_by_version[2] as r) r_rounded,
@ -8,14 +10,19 @@ select
from
(
-- quantiles of randomization distributions
-- note that for small number of runs, the exact quantile might not make
-- sense, because the last possible value of randomization distribution
-- might take a larger percentage of distirbution (i.e. the distribution
-- actually has discrete values, and the last step can be large).
select quantileExactForEach(0.99)(
arrayMap(x, y -> abs(x - y), metrics_by_label[1], metrics_by_label[2]) as d
) threshold
---- uncomment to see what the distribution is really like
--, uniqExact(d.1) u
---- Uncomment to see what the distribution is really like. This debug
---- code only works for single (the first) metric.
--, uniqExact(d[1]) u
--, arraySort(x->x.1,
-- arrayZip(
-- (sumMap([d.1], [1]) as f).1,
-- (sumMap([d[1]], [1]) as f).1,
-- f.2)) full_histogram
from
(

View File

@ -81,6 +81,8 @@ clickhouse-client --query "SHOW TABLES FROM test"
./stress --hung-check --output-folder test_output --skip-func-tests "$SKIP_TESTS_OPTION" && echo "OK" > /test_output/script_exit_code.txt || echo "FAIL" > /test_output/script_exit_code.txt
stop
# TODO remove me when persistent snapshots will be ready
rm -fr /var/lib/clickhouse/coordination ||:
start
clickhouse-client --query "SELECT 'Server successfuly started'" > /test_output/alive_check.txt || echo 'Server failed to start' > /test_output/alive_check.txt

View File

@ -26,4 +26,4 @@ The name of an additional section can be any, for example, **Usage**.
- [link](#)
[Original article](https://clickhouse.tech/docs/en/data_types/<data-type-name>/) <!--hide-->
[Original article](https://clickhouse.tech/docs/en/data-types/<data-type-name>/) <!--hide-->

View File

@ -38,20 +38,20 @@ SETTINGS
Required parameters:
- `kafka_broker_list` A comma-separated list of brokers (for example, `localhost:9092`).
- `kafka_topic_list` A list of Kafka topics.
- `kafka_group_name` A group of Kafka consumers. Reading margins are tracked for each group separately. If you dont want messages to be duplicated in the cluster, use the same group name everywhere.
- `kafka_format` Message format. Uses the same notation as the SQL `FORMAT` function, such as `JSONEachRow`. For more information, see the [Formats](../../../interfaces/formats.md) section.
- `kafka_broker_list` A comma-separated list of brokers (for example, `localhost:9092`).
- `kafka_topic_list` A list of Kafka topics.
- `kafka_group_name` A group of Kafka consumers. Reading margins are tracked for each group separately. If you dont want messages to be duplicated in the cluster, use the same group name everywhere.
- `kafka_format` Message format. Uses the same notation as the SQL `FORMAT` function, such as `JSONEachRow`. For more information, see the [Formats](../../../interfaces/formats.md) section.
Optional parameters:
- `kafka_row_delimiter` Delimiter character, which ends the message.
- `kafka_schema` Parameter that must be used if the format requires a schema definition. For example, [Capn Proto](https://capnproto.org/) requires the path to the schema file and the name of the root `schema.capnp:Message` object.
- `kafka_num_consumers` The number of consumers per table. Default: `1`. Specify more consumers if the throughput of one consumer is insufficient. The total number of consumers should not exceed the number of partitions in the topic, since only one consumer can be assigned per partition.
- `kafka_max_block_size` - The maximum batch size (in messages) for poll (default: `max_block_size`).
- `kafka_skip_broken_messages` Kafka message parser tolerance to schema-incompatible messages per block. Default: `0`. If `kafka_skip_broken_messages = N` then the engine skips *N* Kafka messages that cannot be parsed (a message equals a row of data).
- `kafka_commit_every_batch` - Commit every consumed and handled batch instead of a single commit after writing a whole block (default: `0`).
- `kafka_thread_per_consumer` - Provide independent thread for each consumer (default: `0`). When enabled, every consumer flush the data independently, in parallel (otherwise - rows from several consumers squashed to form one block).
- `kafka_row_delimiter` Delimiter character, which ends the message.
- `kafka_schema` Parameter that must be used if the format requires a schema definition. For example, [Capn Proto](https://capnproto.org/) requires the path to the schema file and the name of the root `schema.capnp:Message` object.
- `kafka_num_consumers` The number of consumers per table. Default: `1`. Specify more consumers if the throughput of one consumer is insufficient. The total number of consumers should not exceed the number of partitions in the topic, since only one consumer can be assigned per partition.
- `kafka_max_block_size` The maximum batch size (in messages) for poll (default: `max_block_size`).
- `kafka_skip_broken_messages` Kafka message parser tolerance to schema-incompatible messages per block. Default: `0`. If `kafka_skip_broken_messages = N` then the engine skips *N* Kafka messages that cannot be parsed (a message equals a row of data).
- `kafka_commit_every_batch` Commit every consumed and handled batch instead of a single commit after writing a whole block (default: `0`).
- `kafka_thread_per_consumer` Provide independent thread for each consumer (default: `0`). When enabled, every consumer flush the data independently, in parallel (otherwise rows from several consumers squashed to form one block).
Examples:

View File

@ -66,7 +66,8 @@ SELECT * FROM file_engine_table
## Usage in ClickHouse-local {#usage-in-clickhouse-local}
In [clickhouse-local](../../../operations/utilities/clickhouse-local.md) File engine accepts file path in addition to `Format`. Default input/output streams can be specified using numeric or human-readable names like `0` or `stdin`, `1` or `stdout`.
In [clickhouse-local](../../../operations/utilities/clickhouse-local.md) File engine accepts file path in addition to `Format`. Default input/output streams can be specified using numeric or human-readable names like `0` or `stdin`, `1` or `stdout`. It is possible to read and write compressed files based on an additional engine parameter or file extension (`gz`, `br` or `xz`).
**Example:**
``` bash

View File

@ -5,7 +5,7 @@ toc_title: Brown University Benchmark
# Brown University Benchmark
MgBench - A new analytical benchmark for machine-generated log data, [Andrew Crotty](http://cs.brown.edu/people/acrotty/).
`MgBench` is a new analytical benchmark for machine-generated log data, [Andrew Crotty](http://cs.brown.edu/people/acrotty/).
Download the data:
```
@ -153,7 +153,7 @@ ORDER BY dt,
hr;
-- Q1.4: Over a 1-month period, how often was each server blocked on disk I/O?
-- Q1.4: Over 1 month, how often was each server blocked on disk I/O?
SELECT machine_name,
COUNT(*) AS spikes
@ -301,7 +301,7 @@ WHERE event_type = 'temperature'
AND log_time >= '2019-11-29 17:00:00.000';
-- Q3.4: Over the past 6 months, how frequently was each door opened?
-- Q3.4: Over the past 6 months, how frequently were each door opened?
SELECT device_name,
device_floor,
@ -412,3 +412,5 @@ ORDER BY yr,
```
The data is also available for interactive queries in the [Playground](https://gh-api.clickhouse.tech/play?user=play), [example](https://gh-api.clickhouse.tech/play?user=play#U0VMRUNUIG1hY2hpbmVfbmFtZSwKICAgICAgIE1JTihjcHUpIEFTIGNwdV9taW4sCiAgICAgICBNQVgoY3B1KSBBUyBjcHVfbWF4LAogICAgICAgQVZHKGNwdSkgQVMgY3B1X2F2ZywKICAgICAgIE1JTihuZXRfaW4pIEFTIG5ldF9pbl9taW4sCiAgICAgICBNQVgobmV0X2luKSBBUyBuZXRfaW5fbWF4LAogICAgICAgQVZHKG5ldF9pbikgQVMgbmV0X2luX2F2ZywKICAgICAgIE1JTihuZXRfb3V0KSBBUyBuZXRfb3V0X21pbiwKICAgICAgIE1BWChuZXRfb3V0KSBBUyBuZXRfb3V0X21heCwKICAgICAgIEFWRyhuZXRfb3V0KSBBUyBuZXRfb3V0X2F2ZwpGUk9NICgKICBTRUxFQ1QgbWFjaGluZV9uYW1lLAogICAgICAgICBDT0FMRVNDRShjcHVfdXNlciwgMC4wKSBBUyBjcHUsCiAgICAgICAgIENPQUxFU0NFKGJ5dGVzX2luLCAwLjApIEFTIG5ldF9pbiwKICAgICAgICAgQ09BTEVTQ0UoYnl0ZXNfb3V0LCAwLjApIEFTIG5ldF9vdXQKICBGUk9NIG1nYmVuY2gubG9nczEKICBXSEVSRSBtYWNoaW5lX25hbWUgSU4gKCdhbmFuc2knLCdhcmFnb2cnLCd1cmQnKQogICAgQU5EIGxvZ190aW1lID49IFRJTUVTVEFNUCAnMjAxNy0wMS0xMSAwMDowMDowMCcKKSBBUyByCkdST1VQIEJZIG1hY2hpbmVfbmFtZQ==).
[Original article](https://clickhouse.tech/docs/en/getting_started/example_datasets/brown-benchmark/) <!--hide-->

File diff suppressed because one or more lines are too long

View File

@ -20,5 +20,6 @@ The list of documented datasets:
- [Terabyte of Click Logs from Criteo](../../getting-started/example-datasets/criteo.md)
- [AMPLab Big Data Benchmark](../../getting-started/example-datasets/amplab-benchmark.md)
- [Brown University Benchmark](../../getting-started/example-datasets/brown-benchmark.md)
- [Cell Towers](../../getting-started/example-datasets/cell-towers.md)
[Original article](https://clickhouse.tech/docs/en/getting_started/example_datasets) <!--hide-->

View File

@ -15,17 +15,9 @@ This dataset can be obtained in two ways:
Downloading data:
``` bash
for s in `seq 1987 2018`
do
for m in `seq 1 12`
do
wget https://transtats.bts.gov/PREZIP/On_Time_Reporting_Carrier_On_Time_Performance_1987_present_${s}_${m}.zip
done
done
echo https://transtats.bts.gov/PREZIP/On_Time_Reporting_Carrier_On_Time_Performance_1987_present_{1987..2021}_{1..12}.zip | xargs -P10 wget --no-check-certificate --continue
```
(from https://github.com/Percona-Lab/ontime-airline-performance/blob/master/download.sh )
Creating a table:
``` sql
@ -145,12 +137,14 @@ ORDER BY (Carrier, FlightDate)
SETTINGS index_granularity = 8192;
```
Loading data:
Loading data with multiple threads:
``` bash
$ for i in *.zip; do echo $i; unzip -cq $i '*.csv' | sed 's/\.00//g' | clickhouse-client --input_format_with_names_use_header=0 --host=example-perftest01j --query="INSERT INTO ontime FORMAT CSVWithNames"; done
ls -1 *.zip | xargs -I{} -P $(nproc) bash -c "echo {}; unzip -cq {} '*.csv' | sed 's/\.00//g' | clickhouse-client --input_format_with_names_use_header=0 --query='INSERT INTO ontime FORMAT CSVWithNames'"
```
(if you will have memory shortage or other issues on your server, remove the `-P $(nproc)` part)
## Download of Prepared Partitions {#download-of-prepared-partitions}
``` bash

View File

@ -148,28 +148,48 @@ $ echo 'DROP TABLE t' | curl 'http://localhost:8123/' --data-binary @-
For successful requests that dont return a data table, an empty response body is returned.
You can use the internal ClickHouse compression format when transmitting data. The compressed data has a non-standard format, and you will need to use the special `clickhouse-compressor` program to work with it (it is installed with the `clickhouse-client` package). To increase the efficiency of data insertion, you can disable server-side checksum verification by using the [http_native_compression_disable_checksumming_on_decompress](../operations/settings/settings.md#settings-http_native_compression_disable_checksumming_on_decompress) setting.
If you specified `compress=1` in the URL, the server compresses the data it sends you.
If you specified `decompress=1` in the URL, the server decompresses the same data that you pass in the `POST` method.
## Compression {#compression}
You can also choose to use [HTTP compression](https://en.wikipedia.org/wiki/HTTP_compression). To send a compressed `POST` request, append the request header `Content-Encoding: compression_method`. In order for ClickHouse to compress the response, you must append `Accept-Encoding: compression_method`. ClickHouse supports `gzip`, `br`, and `deflate` [compression methods](https://en.wikipedia.org/wiki/HTTP_compression#Content-Encoding_tokens). To enable HTTP compression, you must use the ClickHouse [enable_http_compression](../operations/settings/settings.md#settings-enable_http_compression) setting. You can configure the data compression level in the [http_zlib_compression_level](#settings-http_zlib_compression_level) setting for all the compression methods.
You can use compression to reduce network traffic when transmitting a large amount of data or for creating dumps that are immediately compressed.
You can use this to reduce network traffic when transmitting a large amount of data, or for creating dumps that are immediately compressed.
You can use the internal ClickHouse compression format when transmitting data. The compressed data has a non-standard format, and you need `clickhouse-compressor` program to work with it. It is installed with the `clickhouse-client` package. To increase the efficiency of data insertion, you can disable server-side checksum verification by using the [http_native_compression_disable_checksumming_on_decompress](../operations/settings/settings.md#settings-http_native_compression_disable_checksumming_on_decompress) setting.
Examples of sending data with compression:
If you specify `compress=1` in the URL, the server will compress the data it sends to you. If you specify `decompress=1` in the URL, the server will decompress the data which you pass in the `POST` method.
``` bash
#Sending data to the server:
$ curl -vsS "http://localhost:8123/?enable_http_compression=1" -d 'SELECT number FROM system.numbers LIMIT 10' -H 'Accept-Encoding: gzip'
You can also choose to use [HTTP compression](https://en.wikipedia.org/wiki/HTTP_compression). ClickHouse supports the following [compression methods](https://en.wikipedia.org/wiki/HTTP_compression#Content-Encoding_tokens):
#Sending data to the client:
$ echo "SELECT 1" | gzip -c | curl -sS --data-binary @- -H 'Content-Encoding: gzip' 'http://localhost:8123/'
```
- `gzip`
- `br`
- `deflate`
- `xz`
To send a compressed `POST` request, append the request header `Content-Encoding: compression_method`.
In order for ClickHouse to compress the response, enable compression with [enable_http_compression](../operations/settings/settings.md#settings-enable_http_compression) setting and append `Accept-Encoding: compression_method` header to the request. You can configure the data compression level in the [http_zlib_compression_level](../operations/settings/settings.md#settings-http_zlib_compression_level) setting for all compression methods.
!!! note "Note"
Some HTTP clients might decompress data from the server by default (with `gzip` and `deflate`) and you might get decompressed data even if you use the compression settings correctly.
**Examples**
``` bash
# Sending compressed data to the server
$ echo "SELECT 1" | gzip -c | \
curl -sS --data-binary @- -H 'Content-Encoding: gzip' 'http://localhost:8123/'
```
``` bash
# Receiving compressed data from the server
$ curl -vsS "http://localhost:8123/?enable_http_compression=1" \
-H 'Accept-Encoding: gzip' --output result.gz -d 'SELECT number FROM system.numbers LIMIT 3'
$ zcat result.gz
0
1
2
```
## Default Database {#default-database}
You can use the database URL parameter or the X-ClickHouse-Database header to specify the default database.
``` bash

View File

@ -8,18 +8,21 @@ toc_title: Caches
When performing queries, ClichHouse uses different caches.
Main cache types:
- `mark_cache` — Cache of marks used by table engines of the [MergeTree](../engines/table-engines/mergetree-family/mergetree.md) family.
- `uncompressed_cache` — Cache of uncompressed data used by table engines of the [MergeTree](../engines/table-engines/mergetree-family/mergetree.md) family.
Additional cache types:
- DNS cache
- [regexp](../interfaces/formats.md#data-format-regexp) cache
- compiled expressions cache
- [Avro format](../interfaces/formats.md#data-format-avro) schemas cache
- [dictionaries data cache](../sql-reference/dictionaries/index.md)
- DNS cache.
- [Regexp](../interfaces/formats.md#data-format-regexp) cache.
- Compiled expressions cache.
- [Avro format](../interfaces/formats.md#data-format-avro) schemas cache.
- [Dictionaries](../sql-reference/dictionaries/index.md) data cache.
Indirectly used:
- OS page cache
- OS page cache.
To drop cache, use [SYSTEM DROP ... CACHE](../sql-reference/statements/system.md) statements.

View File

@ -1956,8 +1956,8 @@ Default value: 16.
**See Also**
- [Kafka](../../engines/table-engines/integrations/kafka.md#kafka) engine
- [RabbitMQ](../../engines/table-engines/integrations/rabbitmq.md#rabbitmq-engine) engine
- [Kafka](../../engines/table-engines/integrations/kafka.md#kafka) engine.
- [RabbitMQ](../../engines/table-engines/integrations/rabbitmq.md#rabbitmq-engine) engine.
## validate_polygons {#validate_polygons}
@ -2658,8 +2658,6 @@ Result:
Note that this setting influences [Materialized view](../../sql-reference/statements/create/view.md#materialized) and [MaterializeMySQL](../../engines/database-engines/materialize-mysql.md) behaviour.
[Original article](https://clickhouse.tech/docs/en/operations/settings/settings/) <!-- hide -->
## engine_file_empty_if_not_exists {#engine-file-empty_if-not-exists}
Allows to select data from a file engine table without file.
@ -2679,3 +2677,16 @@ Possible values:
- 1 — Enabled.
Default value: `0`.
## allow_experimental_geo_types {#allow-experimental-geo-types}
Allows working with experimental [geo data types](../../sql-reference/data-types/geo.md).
Possible values:
- 0 — Working with geo data types is disabled.
- 1 — Working with geo data types is enabled.
Default value: `0`.
[Original article](https://clickhouse.tech/docs/en/operations/settings/settings/) <!-- hide -->

View File

@ -14,7 +14,7 @@ Columns:
- `initiator` ([String](../../sql-reference/data-types/string.md)) — Node that executed the query.
- `query_start_time` ([DateTime](../../sql-reference/data-types/datetime.md)) — Query start time.
- `query_finish_time` ([DateTime](../../sql-reference/data-types/datetime.md)) — Query finish time.
- `query_duration_ms` ([UInt64](../../sql-reference/data-types/datetime64.md)) — Duration of query execution (in milliseconds).
- `query_duration_ms` ([UInt64](../../sql-reference/data-types/int-uint.md)) — Duration of query execution (in milliseconds).
- `exception_code` ([Enum8](../../sql-reference/data-types/enum.md)) — Exception code from [ZooKeeper](../../operations/tips.md#zookeeper).
**Example**

View File

@ -20,7 +20,7 @@ System tables:
Most of system tables store their data in RAM. A ClickHouse server creates such system tables at the start.
Unlike other system tables, the system log tables [metric_log](../../operations/system-tables/metric_log.md), [query_log](../../operations/system-tables/query_log.md), [query_thread_log](../../operations/system-tables/query_thread_log.md), [trace_log](../../operations/system-tables/trace_log.md), [part_log](../../operations/system-tables/part_log.md), crash_log and [text_log](../../operations/system-tables/text_log.md) are served by [MergeTree](../../engines/table-engines/mergetree-family/mergetree.md) table engine and store their data in a storage filesystem by default. If you remove a table from a filesystem, the ClickHouse server creates the empty one again at the time of the next data writing. If system table schema changed in a new release, then ClickHouse renames the current table and creates a new one.
Unlike other system tables, the system log tables [metric_log](../../operations/system-tables/metric_log.md), [query_log](../../operations/system-tables/query_log.md), [query_thread_log](../../operations/system-tables/query_thread_log.md), [trace_log](../../operations/system-tables/trace_log.md), [part_log](../../operations/system-tables/part_log.md), [crash_log](../../operations/system-tables/crash-log.md) and [text_log](../../operations/system-tables/text_log.md) are served by [MergeTree](../../engines/table-engines/mergetree-family/mergetree.md) table engine and store their data in a filesystem by default. If you remove a table from a filesystem, the ClickHouse server creates the empty one again at the time of the next data writing. If system table schema changed in a new release, then ClickHouse renames the current table and creates a new one.
System log tables can be customized by creating a config file with the same name as the table under `/etc/clickhouse-server/config.d/`, or setting corresponding elements in `/etc/clickhouse-server/config.xml`. Elements can be customized are:
@ -33,7 +33,7 @@ System log tables can be customized by creating a config file with the same name
An example:
```
```xml
<yandex>
<query_log>
<database>system</database>

View File

@ -91,6 +91,8 @@ $ clickhouse-local --query "
Now lets output memory user for each Unix user:
Query:
``` bash
$ ps aux | tail -n +2 | awk '{ printf("%s\t%s\n", $1, $4) }' \
| clickhouse-local --structure "user String, mem Float64" \
@ -98,6 +100,8 @@ $ ps aux | tail -n +2 | awk '{ printf("%s\t%s\n", $1, $4) }' \
FROM table GROUP BY user ORDER BY memTotal DESC FORMAT Pretty"
```
Result:
``` text
Read 186 rows, 4.15 KiB in 0.035 sec., 5302 rows/sec., 118.34 KiB/sec.
┏━━━━━━━━━━┳━━━━━━━━━━┓

View File

@ -253,8 +253,8 @@ windowFunnel(window, [mode])(timestamp, cond1, cond2, ..., condN)
**Parameters**
- `window` — Length of the sliding window. The unit of `window` depends on the timestamp itself and varies. Determined using the expression `timestamp of cond2 <= timestamp of cond1 + window`.
- `mode` - It is an optional parameter.
- `window` — Length of the sliding window. The unit of `window` depends on the `timestamp` itself and varies. Determined using the expression `timestamp of cond2 <= timestamp of cond1 + window`.
- `mode` - It is an optional argument.
- `'strict'` - When the `'strict'` is set, the windowFunnel() applies conditions only for the unique values.
**Returned value**

View File

@ -9,7 +9,7 @@ Calculates the arithmetic mean.
**Syntax**
``` sql
avgWeighted(x)
avg(x)
```
**Arguments**

View File

@ -0,0 +1,106 @@
---
toc_priority: 62
toc_title: Geo
---
# Geo Data Types {#geo-data-types}
Clickhouse supports data types for representing geographical objects — locations, lands, etc.
!!! warning "Warning"
Currently geo data types are an experimental feature. To work with them you must set `allow_experimental_geo_types = 1`.
**See Also**
- [Representing simple geographical features](https://en.wikipedia.org/wiki/GeoJSON).
- [allow_experimental_geo_types](../../operations/settings/settings.md#allow-experimental-geo-types) setting.
## Point {#point-data-type}
`Point` is represented by its X and Y coordinates, stored as a [Tuple](tuple.md)([Float64](float.md), [Float64](float.md)).
**Example**
Query:
```sql
SET allow_experimental_geo_types = 1;
CREATE TABLE geo_point (p Point) ENGINE = Memory();
INSERT INTO geo_point VALUES((10, 10));
SELECT p, toTypeName(p) FROM geo_point;
```
Result:
``` text
┌─p─────┬─toTypeName(p)─┐
│ (10,10) │ Point │
└───────┴───────────────┘
```
## Ring {#ring-data-type}
`Ring` is a simple polygon without holes stored as an array of points: [Array](array.md)([Point](#point-data-type)).
**Example**
Query:
```sql
SET allow_experimental_geo_types = 1;
CREATE TABLE geo_ring (r Ring) ENGINE = Memory();
INSERT INTO geo_ring VALUES([(0, 0), (10, 0), (10, 10), (0, 10)]);
SELECT r, toTypeName(r) FROM geo_ring;
```
Result:
``` text
┌─r─────────────────────────────┬─toTypeName(r)─┐
│ [(0,0),(10,0),(10,10),(0,10)] │ Ring │
└───────────────────────────────┴───────────────┘
```
## Polygon {#polygon-data-type}
`Polygon` is a polygon with holes stored as an array of rings: [Array](array.md)([Ring](#ring-data-type)). First element of outer array is the outer shape of polygon and all the following elements are holes.
**Example**
This is a polygon with one hole:
```sql
SET allow_experimental_geo_types = 1;
CREATE TABLE geo_polygon (pg Polygon) ENGINE = Memory();
INSERT INTO geo_polygon VALUES([[(20, 20), (50, 20), (50, 50), (20, 50)], [(30, 30), (50, 50), (50, 30)]]);
SELECT pg, toTypeName(pg) FROM geo_polygon;
```
Result:
``` text
┌─pg────────────────────────────────────────────────────────────┬─toTypeName(pg)─┐
│ [[(20,20),(50,20),(50,50),(20,50)],[(30,30),(50,50),(50,30)]] │ Polygon │
└───────────────────────────────────────────────────────────────┴────────────────┘
```
## MultiPolygon {#multipolygon-data-type}
`MultiPolygon` consists of multiple polygons and is stored as an array of polygons: [Array](array.md)([Polygon](#polygon-data-type)).
**Example**
This multipolygon consists of two separate polygons — the first one without holes, and the second with one hole:
```sql
SET allow_experimental_geo_types = 1;
CREATE TABLE geo_multipolygon (mpg MultiPolygon) ENGINE = Memory();
INSERT INTO geo_multipolygon VALUES([[[(0, 0), (10, 0), (10, 10), (0, 10)]], [[(20, 20), (50, 20), (50, 50), (20, 50)],[(30, 30), (50, 50), (50, 30)]]]);
SELECT mpg, toTypeName(mpg) FROM geo_multipolygon;
```
Result:
``` text
┌─mpg─────────────────────────────────────────────────────────────────────────────────────────────┬─toTypeName(mpg)─┐
│ [[[(0,0),(10,0),(10,10),(0,10)]],[[(20,20),(50,20),(50,50),(20,50)],[(30,30),(50,50),(50,30)]]] │ MultiPolygon │
└─────────────────────────────────────────────────────────────────────────────────────────────────┴─────────────────┘
```
[Original article](https://clickhouse.tech/docs/en/data-types/geo/) <!--hide-->

View File

@ -21,7 +21,11 @@ The following aggregate functions are supported:
- [`argMin`](../../sql-reference/aggregate-functions/reference/argmin.md)
- [`argMax`](../../sql-reference/aggregate-functions/reference/argmax.md)
Values of the `SimpleAggregateFunction(func, Type)` look and stored the same way as `Type`, so you do not need to apply functions with `-Merge`/`-State` suffixes. `SimpleAggregateFunction` has better performance than `AggregateFunction` with same aggregation function.
!!! note "Note"
Values of the `SimpleAggregateFunction(func, Type)` look and stored the same way as `Type`, so you do not need to apply functions with `-Merge`/`-State` suffixes.
`SimpleAggregateFunction` has better performance than `AggregateFunction` with same aggregation function.
**Parameters**
@ -31,11 +35,7 @@ Values of the `SimpleAggregateFunction(func, Type)` look and stored the same way
**Example**
``` sql
CREATE TABLE t
(
column1 SimpleAggregateFunction(sum, UInt64),
column2 SimpleAggregateFunction(any, String)
) ENGINE = ...
CREATE TABLE simple (id UInt64, val SimpleAggregateFunction(sum, Double)) ENGINE=AggregatingMergeTree ORDER BY id;
```
[Original article](https://clickhouse.tech/docs/en/data_types/simpleaggregatefunction/) <!--hide-->

View File

@ -61,40 +61,58 @@ int32samoa: 1546300800
Converts a date or date with time to a UInt16 number containing the year number (AD).
Alias: `YEAR`.
## toQuarter {#toquarter}
Converts a date or date with time to a UInt8 number containing the quarter number.
Alias: `QUARTER`.
## toMonth {#tomonth}
Converts a date or date with time to a UInt8 number containing the month number (1-12).
Alias: `MONTH`.
## toDayOfYear {#todayofyear}
Converts a date or date with time to a UInt16 number containing the number of the day of the year (1-366).
Alias: `DAYOFYEAR`.
## toDayOfMonth {#todayofmonth}
Converts a date or date with time to a UInt8 number containing the number of the day of the month (1-31).
Aliases: `DAYOFMONTH`, `DAY`.
## toDayOfWeek {#todayofweek}
Converts a date or date with time to a UInt8 number containing the number of the day of the week (Monday is 1, and Sunday is 7).
Alias: `DAYOFWEEK`.
## toHour {#tohour}
Converts a date with time to a UInt8 number containing the number of the hour in 24-hour time (0-23).
This function assumes that if clocks are moved ahead, it is by one hour and occurs at 2 a.m., and if clocks are moved back, it is by one hour and occurs at 3 a.m. (which is not always true even in Moscow the clocks were twice changed at a different time).
Alias: `HOUR`.
## toMinute {#tominute}
Converts a date with time to a UInt8 number containing the number of the minute of the hour (0-59).
Alias: `MINUTE`.
## toSecond {#tosecond}
Converts a date with time to a UInt8 number containing the number of the second in the minute (0-59).
Leap seconds are not accounted for.
Alias: `SECOND`.
## toUnixTimestamp {#to-unix-timestamp}
For DateTime argument: converts value to the number with type UInt32 -- Unix Timestamp (https://en.wikipedia.org/wiki/Unix_time).
@ -753,7 +771,7 @@ This is necessary for searching for pageviews in the corresponding session.
## formatDateTime {#formatdatetime}
Function formats a Time according given Format string. N.B.: Format is a constant expression, e.g. you can not have multiple formats for single result column.
Formats a Time according to the given Format string. Format is a constant expression, so you cannot have multiple formats for a single result column.
**Syntax**
@ -812,31 +830,32 @@ Result:
└────────────────────────────────────────────┘
```
[Original article](https://clickhouse.tech/docs/en/query_language/functions/date_time_functions/) <!--hide-->
## FROM\_UNIXTIME {#fromunixfime}
When there is only single argument of integer type, it act in the same way as `toDateTime` and return [DateTime](../../sql-reference/data-types/datetime.md).
type.
Function converts Unix timestamp to a calendar date and a time of a day. When there is only a single argument of [Integer](../../sql-reference/data-types/int-uint.md) type, it acts in the same way as [toDateTime](../../sql-reference/functions/type-conversion-functions.md#todatetime) and return [DateTime](../../sql-reference/data-types/datetime.md) type.
For example:
**Example:**
Query:
```sql
SELECT FROM_UNIXTIME(423543535)
SELECT FROM_UNIXTIME(423543535);
```
Result:
```text
┌─FROM_UNIXTIME(423543535)─┐
│ 1983-06-04 10:58:55 │
└──────────────────────────┘
```
When there are two arguments, first is integer or DateTime, second is constant format string, it act in the same way as `formatDateTime` and return `String` type.
When there are two arguments: first is an [Integer](../../sql-reference/data-types/int-uint.md) or [DateTime](../../sql-reference/data-types/datetime.md), second is a constant format string — it acts in the same way as [formatDateTime](#formatdatetime) and return [String](../../sql-reference/data-types/string.md#string) type.
For example:
```sql
SELECT FROM_UNIXTIME(1234334543, '%Y-%m-%d %R:%S') AS DateTime
SELECT FROM_UNIXTIME(1234334543, '%Y-%m-%d %R:%S') AS DateTime;
```
```text
@ -988,3 +1007,5 @@ Result:
│ 2020-01-01 │
└────────────────────────────────────┘
```
[Original article](https://clickhouse.tech/docs/en/query_language/functions/date_time_functions/) <!--hide-->

View File

@ -75,6 +75,8 @@ Result:
Returns a string containing the arguments hexadecimal representation.
Alias: `HEX`.
**Syntax**
``` sql

View File

@ -55,7 +55,7 @@ CREATE TABLE encryption_test
`comment` String,
`secret` String
)
ENGINE = Memory
ENGINE = Memory;
```
Insert some data (please avoid storing the keys/ivs in the database as this undermines the whole concept of encryption), also storing 'hints' is unsafe too and used only for illustrative purposes:
@ -110,7 +110,7 @@ Result:
Compatible with mysql encryption and resulting ciphertext can be decrypted with [AES_DECRYPT](https://dev.mysql.com/doc/refman/8.0/en/encryption-functions.html#function_aes-decrypt) function.
Will produce same ciphertext as `encrypt` on equal inputs. But when `key` or `iv` are longer than they should normally be, `aes_encrypt_mysql` will stick to what MySQL's `aes_encrypt` does: 'fold' `key` and ignore excess bits of `IV`.
Will produce the same ciphertext as `encrypt` on equal inputs. But when `key` or `iv` are longer than they should normally be, `aes_encrypt_mysql` will stick to what MySQL's `aes_encrypt` does: 'fold' `key` and ignore excess bits of `iv`.
Supported encryption modes:
@ -132,13 +132,12 @@ aes_encrypt_mysql('mode', 'plaintext', 'key' [, iv])
- `mode` — Encryption mode. [String](../../sql-reference/data-types/string.md#string).
- `plaintext` — Text that needs to be encrypted. [String](../../sql-reference/data-types/string.md#string).
- `key` — Encryption key. If key is longer than required by mode, MySQL-specific key folding is performed. [String](../../sql-reference/data-types/string.md#string).
- `iv` — Initialization vector. Optinal, only first 16 bytes are taken into account [String](../../sql-reference/data-types/string.md#string).
- `iv` — Initialization vector. Optional, only first 16 bytes are taken into account [String](../../sql-reference/data-types/string.md#string).
**Returned value**
- Ciphertext binary string. [String](../../sql-reference/data-types/string.md#string).
**Examples**
Given equal input `encrypt` and `aes_encrypt_mysql` produce the same ciphertext:
@ -157,7 +156,6 @@ Result:
└───────────────────┘
```
But `encrypt` fails when `key` or `iv` is longer than expected:
Query:
@ -252,7 +250,7 @@ decrypt('mode', 'ciphertext', 'key' [, iv, aad])
**Examples**
Re-using table from [encrypt](./encryption-functions.md#encrypt).
Re-using table from [encrypt](#encrypt).
Query:
@ -284,6 +282,7 @@ SELECT comment, decrypt('aes-256-cfb128', secret, '12345678910121314151617181920
```
Result:
``` text
┌─comment─────────────────────────────┬─plaintext─┐
│ aes-256-cfb128 no IV │ Secret │
@ -294,7 +293,7 @@ Result:
└─────────────────────────────────────┴───────────┘
```
Notice how only portion of the data was properly decrypted, and the rest is gibberish since either `mode`, `key`, or `iv` were different upon encryption.
Notice how only a portion of the data was properly decrypted, and the rest is gibberish since either `mode`, `key`, or `iv` were different upon encryption.
## aes_decrypt_mysql {#aes_decrypt_mysql}
@ -331,6 +330,7 @@ aes_decrypt_mysql('mode', 'ciphertext', 'key' [, iv])
**Examples**
Let's decrypt data we've previously encrypted with MySQL:
``` sql
mysql> SET block_encryption_mode='aes-256-cfb128';
Query OK, 0 rows affected (0.00 sec)
@ -345,11 +345,13 @@ mysql> SELECT aes_encrypt('Secret', '123456789101213141516171819202122', 'iviviv
```
Query:
``` sql
SELECT aes_decrypt_mysql('aes-256-cfb128', unhex('24E9E4966469'), '123456789101213141516171819202122', 'iviviviviviviviv123456') AS plaintext
```
Result:
``` text
┌─plaintext─┐
│ Secret │

View File

@ -13,6 +13,8 @@ Checks whether the argument is [NULL](../../sql-reference/syntax.md#null-literal
isNull(x)
```
Alias: `ISNULL`.
**Arguments**
- `x` — A value with a non-compound data type.

View File

@ -9,7 +9,7 @@ Hash functions can be used for the deterministic pseudo-random shuffling of elem
## halfMD5 {#hash-functions-halfmd5}
[Interprets](../../sql-reference/functions/type-conversion-functions.md#type_conversion_function-reinterpretAsString) all the input parameters as strings and calculates the [MD5](https://en.wikipedia.org/wiki/MD5) hash value for each of them. Then combines hashes, takes the first 8 bytes of the hash of the resulting string, and interprets them as `UInt64` in big-endian byte order.
[Interprets](../../sql-reference/functions/type-conversion-functions.md#type_conversion_functions-reinterpretAsString) all the input parameters as strings and calculates the [MD5](https://en.wikipedia.org/wiki/MD5) hash value for each of them. Then combines hashes, takes the first 8 bytes of the hash of the resulting string, and interprets them as `UInt64` in big-endian byte order.
``` sql
halfMD5(par1, ...)
@ -54,7 +54,7 @@ sipHash64(par1,...)
This is a cryptographic hash function. It works at least three times faster than the [MD5](#hash_functions-md5) function.
Function [interprets](../../sql-reference/functions/type-conversion-functions.md#type_conversion_function-reinterpretAsString) all the input parameters as strings and calculates the hash value for each of them. Then combines hashes by the following algorithm:
Function [interprets](../../sql-reference/functions/type-conversion-functions.md#type_conversion_functions-reinterpretAsString) all the input parameters as strings and calculates the hash value for each of them. Then combines hashes by the following algorithm:
1. After hashing all the input parameters, the function gets the array of hashes.
2. Function takes the first and the second elements and calculates a hash for the array of them.

View File

@ -9,10 +9,14 @@ toc_title: IP Addresses
Takes a UInt32 number. Interprets it as an IPv4 address in big endian. Returns a string containing the corresponding IPv4 address in the format A.B.C.d (dot-separated numbers in decimal form).
Alias: `INET_NTOA`.
## IPv4StringToNum(s) {#ipv4stringtonums}
The reverse function of IPv4NumToString. If the IPv4 address has an invalid format, it returns 0.
Alias: `INET_ATON`.
## IPv4NumToStringClassC(num) {#ipv4numtostringclasscnum}
Similar to IPv4NumToString, but using xxx instead of the last octet.
@ -49,7 +53,11 @@ Since using xxx is highly unusual, this may be changed in the future. We r
### IPv6NumToString(x) {#ipv6numtostringx}
Accepts a FixedString(16) value containing the IPv6 address in binary format. Returns a string containing this address in text format.
IPv6-mapped IPv4 addresses are output in the format ::ffff:111.222.33.44. Examples:
IPv6-mapped IPv4 addresses are output in the format ::ffff:111.222.33.44.
Alias: `INET6_NTOA`.
Examples:
``` sql
SELECT IPv6NumToString(toFixedString(unhex('2A0206B8000000000000000000000011'), 16)) AS addr
@ -119,6 +127,8 @@ The reverse function of IPv6NumToString. If the IPv6 address has an invalid form
If the IP address is a valid IPv4 address then the IPv6 equivalent of the IPv4 address is returned.
HEX can be uppercase or lowercase.
Alias: `INET6_ATON`.
``` sql
SELECT cutIPv6(IPv6StringToNum('127.0.0.1'), 0, 0);
```

View File

@ -98,6 +98,8 @@ SELECT toValidUTF8('\x61\xF0\x80\x80\x80b')
Repeats a string as many times as specified and concatenates the replicated values as a single string.
Alias: `REPEAT`.
**Syntax**
``` sql
@ -276,10 +278,14 @@ Returns the string s that was converted from the encoding in from to
Encodes s string into base64
Alias: `TO_BASE64`.
## base64Decode(s) {#base64decode}
Decode base64-encoded string s into original string. In case of failure raises an exception.
Alias: `FROM_BASE64`.
## tryBase64Decode(s) {#trybase64decode}
Similar to base64Decode, but in case of error an empty string would be returned.

View File

@ -174,4 +174,129 @@ Result:
└──────────────────────────────┴───────────────────────────────────┘
```
## mapContains {#mapcontains}
Determines whether the `map` contains the `key` parameter.
**Syntax**
``` sql
mapContains(map, key)
```
**Parameters**
- `map` — Map. [Map](../../sql-reference/data-types/map.md).
- `key` — Key. Type matches the type of keys of `map` parameter.
**Returned value**
- `1` if `map` contains `key`, `0` if not.
Type: [UInt8](../../sql-reference/data-types/int-uint.md).
**Example**
Query:
```sql
CREATE TABLE test (a Map(String,String)) ENGINE = Memory;
INSERT INTO test VALUES ({'name':'eleven','age':'11'}), ({'number':'twelve','position':'6.0'});
SELECT mapContains(a, 'name') FROM test;
```
Result:
```text
┌─mapContains(a, 'name')─┐
│ 1 │
│ 0 │
└────────────────────────┘
```
## mapKeys {#mapkeys}
Returns all keys from the `map` parameter.
**Syntax**
```sql
mapKeys(map)
```
**Parameters**
- `map` — Map. [Map](../../sql-reference/data-types/map.md).
**Returned value**
- Array containing all keys from the `map`.
Type: [Array](../../sql-reference/data-types/array.md).
**Example**
Query:
```sql
CREATE TABLE test (a Map(String,String)) ENGINE = Memory;
INSERT INTO test VALUES ({'name':'eleven','age':'11'}), ({'number':'twelve','position':'6.0'});
SELECT mapKeys(a) FROM test;
```
Result:
```text
┌─mapKeys(a)────────────┐
│ ['name','age'] │
│ ['number','position'] │
└───────────────────────┘
```
## mapValues {#mapvalues}
Returns all values from the `map` parameter.
**Syntax**
```sql
mapKeys(map)
```
**Parameters**
- `map` — Map. [Map](../../sql-reference/data-types/map.md).
**Returned value**
- Array containing all the values from `map`.
Type: [Array](../../sql-reference/data-types/array.md).
**Example**
Query:
```sql
CREATE TABLE test (a Map(String,String)) ENGINE = Memory;
INSERT INTO test VALUES ({'name':'eleven','age':'11'}), ({'number':'twelve','position':'6.0'});
SELECT mapValues(a) FROM test;
```
Result:
```text
┌─mapValues(a)─────┐
│ ['eleven','11'] │
│ ['twelve','6.0'] │
└──────────────────┘
```
[Original article](https://clickhouse.tech/docs/en/sql-reference/functions/tuple-map-functions/) <!--hide-->

View File

@ -36,10 +36,14 @@ The behavior of functions for the [NaN and Inf](../../sql-reference/data-types/f
**Example**
Query:
``` sql
SELECT toInt64(nan), toInt32(32), toInt16('16'), toInt8(8.8)
SELECT toInt64(nan), toInt32(32), toInt16('16'), toInt8(8.8);
```
Result:
``` text
┌─────────toInt64(nan)─┬─toInt32(32)─┬─toInt16('16')─┬─toInt8(8.8)─┐
│ -9223372036854775808 │ 32 │ 16 │ 8 │
@ -52,10 +56,14 @@ It takes an argument of type String and tries to parse it into Int (8 \| 16 \| 3
**Example**
Query:
``` sql
select toInt64OrZero('123123'), toInt8OrZero('123qwe123')
SELECT toInt64OrZero('123123'), toInt8OrZero('123qwe123');
```
Result:
``` text
┌─toInt64OrZero('123123')─┬─toInt8OrZero('123qwe123')─┐
│ 123123 │ 0 │
@ -68,10 +76,14 @@ It takes an argument of type String and tries to parse it into Int (8 \| 16 \| 3
**Example**
Query:
``` sql
select toInt64OrNull('123123'), toInt8OrNull('123qwe123')
SELECT toInt64OrNull('123123'), toInt8OrNull('123qwe123');
```
Result:
``` text
┌─toInt64OrNull('123123')─┬─toInt8OrNull('123qwe123')─┐
│ 123123 │ ᴺᵁᴸᴸ │
@ -102,10 +114,14 @@ The behavior of functions for negative agruments and for the [NaN and Inf](../..
**Example**
Query:
``` sql
SELECT toUInt64(nan), toUInt32(-32), toUInt16('16'), toUInt8(8.8)
SELECT toUInt64(nan), toUInt32(-32), toUInt16('16'), toUInt8(8.8);
```
Result:
``` text
┌───────toUInt64(nan)─┬─toUInt32(-32)─┬─toUInt16('16')─┬─toUInt8(8.8)─┐
│ 9223372036854775808 │ 4294967264 │ 16 │ 8 │
@ -124,6 +140,8 @@ SELECT toUInt64(nan), toUInt32(-32), toUInt16('16'), toUInt8(8.8)
## toDate {#todate}
Alias: `DATE`.
## toDateOrZero {#todateorzero}
## toDateOrNull {#todateornull}
@ -168,20 +186,28 @@ A value in the `Nullable(Decimal(P,S))` data type. The value contains:
**Examples**
Query:
``` sql
SELECT toDecimal32OrNull(toString(-1.111), 5) AS val, toTypeName(val)
SELECT toDecimal32OrNull(toString(-1.111), 5) AS val, toTypeName(val);
```
Result:
``` text
┌──────val─┬─toTypeName(toDecimal32OrNull(toString(-1.111), 5))─┐
│ -1.11100 │ Nullable(Decimal(9, 5)) │
└──────────┴────────────────────────────────────────────────────┘
```
Query:
``` sql
SELECT toDecimal32OrNull(toString(-1.111), 2) AS val, toTypeName(val)
SELECT toDecimal32OrNull(toString(-1.111), 2) AS val, toTypeName(val);
```
Result:
``` text
┌──val─┬─toTypeName(toDecimal32OrNull(toString(-1.111), 2))─┐
│ ᴺᵁᴸᴸ │ Nullable(Decimal(9, 2)) │
@ -213,20 +239,28 @@ A value in the `Nullable(Decimal(P,S))` data type. The value contains:
**Example**
Query:
``` sql
SELECT toDecimal32OrZero(toString(-1.111), 5) AS val, toTypeName(val)
SELECT toDecimal32OrZero(toString(-1.111), 5) AS val, toTypeName(val);
```
Result:
``` text
┌──────val─┬─toTypeName(toDecimal32OrZero(toString(-1.111), 5))─┐
│ -1.11100 │ Decimal(9, 5) │
└──────────┴────────────────────────────────────────────────────┘
```
Query:
``` sql
SELECT toDecimal32OrZero(toString(-1.111), 2) AS val, toTypeName(val)
SELECT toDecimal32OrZero(toString(-1.111), 2) AS val, toTypeName(val);
```
Result:
``` text
┌──val─┬─toTypeName(toDecimal32OrZero(toString(-1.111), 2))─┐
│ 0.00 │ Decimal(9, 2) │
@ -258,12 +292,18 @@ Conversion between numeric types uses the same rules as assignments between diff
Additionally, the toString function of the DateTime argument can take a second String argument containing the name of the time zone. Example: `Asia/Yekaterinburg` In this case, the time is formatted according to the specified time zone.
**Example**
Query:
``` sql
SELECT
now() AS now_local,
toString(now(), 'Asia/Yekaterinburg') AS now_yekat
toString(now(), 'Asia/Yekaterinburg') AS now_yekat;
```
Result:
``` text
┌───────────now_local─┬─now_yekat───────────┐
│ 2016-06-15 00:11:21 │ 2016-06-15 02:11:21 │
@ -281,36 +321,81 @@ If the string has fewer bytes than N, it is padded with null bytes to the right.
Accepts a String or FixedString argument. Returns the String with the content truncated at the first zero byte found.
Example:
**Example**
Query:
``` sql
SELECT toFixedString('foo', 8) AS s, toStringCutToZero(s) AS s_cut
SELECT toFixedString('foo', 8) AS s, toStringCutToZero(s) AS s_cut;
```
Result:
``` text
┌─s─────────────┬─s_cut─┐
│ foo\0\0\0\0\0 │ foo │
└───────────────┴───────┘
```
Query:
``` sql
SELECT toFixedString('foo\0bar', 8) AS s, toStringCutToZero(s) AS s_cut
SELECT toFixedString('foo\0bar', 8) AS s, toStringCutToZero(s) AS s_cut;
```
Result:
``` text
┌─s──────────┬─s_cut─┐
│ foo\0bar\0 │ foo │
└────────────┴───────┘
```
## reinterpretAsUInt(8\|16\|32\|64) {#reinterpretasuint8163264}
## reinterpretAsInt(8\|16\|32\|64) {#reinterpretasint8163264}
## reinterpretAsFloat(32\|64) {#reinterpretasfloat3264}
## reinterpretAsDate {#reinterpretasdate}
## reinterpretAsDateTime {#reinterpretasdatetime}
These functions accept a string and interpret the bytes placed at the beginning of the string as a number in host order (little endian). If the string isnt long enough, the functions work as if the string is padded with the necessary number of null bytes. If the string is longer than needed, the extra bytes are ignored. A date is interpreted as the number of days since the beginning of the Unix Epoch, and a date with time is interpreted as the number of seconds since the beginning of the Unix Epoch.
## reinterpretAsString {#type_conversion_functions-reinterpretAsString}
This function accepts a number or date or date with time, and returns a string containing bytes representing the corresponding value in host order (little endian). Null bytes are dropped from the end. For example, a UInt32 type value of 255 is a string that is one byte long.
## reinterpretAsFixedString {#reinterpretasfixedstring}
This function accepts a number or date or date with time, and returns a FixedString containing bytes representing the corresponding value in host order (little endian). Null bytes are dropped from the end. For example, a UInt32 type value of 255 is a FixedString that is one byte long.
## reinterpretAsUUID {#reinterpretasuuid}
This function accepts 16 bytes string, and returns UUID containing bytes representing the corresponding value in network byte order (big-endian). If the string isn't long enough, the functions work as if the string is padded with the necessary number of null bytes to the end. If the string longer than 16 bytes, the extra bytes at the end are ignored.
**Syntax**
``` sql
reinterpretAsUUID(fixed_string)
```
**Parameters**
- `fixed_string` — Big-endian byte string. [FixedString](../../sql-reference/data-types/fixedstring.md#fixedstring).
## reinterpret(x, T) {#type_conversion_function-reinterpret}
Performs byte reinterpretation of x as t data type.
**Returned value**
Following reinterpretations are allowed:
1. Any type that has fixed size and value of that type can be represented continuously into FixedString.
2. Any type that if value of that type can be represented continuously into String. Null bytes are dropped from the end. For example, a UInt32 type value of 255 is a string that is one byte long.
3. FixedString, String, types that can be interpreted as numeric (Integers, Float, Date, DateTime, UUID) into types that can be interpreted as numeric (Integers, Float, Date, DateTime, UUID) into FixedString,
- The UUID type value. [UUID](../../sql-reference/data-types/uuid.md#uuid-data-type).
**Examples**
String to UUID.
Query:
``` sql
SELECT reinterpret(toInt8(-1), 'UInt8') as int_to_uint,
@ -318,39 +403,45 @@ SELECT reinterpret(toInt8(-1), 'UInt8') as int_to_uint,
reinterpret('1', 'UInt32') as string_to_int;
```
Result:
``` text
┌─int_to_uint─┬─int_to_float─┬─string_to_int─┐
255 │ 1e-45 │ 49
└─────────────┴──────────────┴───────────────┘
┌─reinterpretAsUUID(reverse(unhex('000102030405060708090a0b0c0d0e0f')))─┐
08090a0b-0c0d-0e0f-0001-020304050607
└───────────────────────────────────────────────────────────────────────┘
```
## reinterpretAsUInt(8\|16\|32\|64\|256) {#reinterpretAsUInt8163264256}
Going back and forth from String to UUID.
## reinterpretAsInt(8\|16\|32\|64\|128\|256) {#reinterpretAsInt8163264128256}
Query:
## reinterpretAsDecimal(32\|64\|128\|256) {#reinterpretAsDecimal3264128256}
``` sql
WITH
generateUUIDv4() AS uuid,
identity(lower(hex(reverse(reinterpretAsString(uuid))))) AS str,
reinterpretAsUUID(reverse(unhex(str))) AS uuid2
SELECT uuid = uuid2;
```
## reinterpretAsFloat(32\|64) {#type_conversion_function-reinterpretAsFloat}
Result:
## reinterpretAsDate {#type_conversion_function-reinterpretAsDate}
## reinterpretAsDateTime {#type_conversion_function-reinterpretAsDateTime}
## reinterpretAsDateTime64 {#type_conversion_function-reinterpretAsDateTime64}
## reinterpretAsString {#type_conversion_function-reinterpretAsString}
## reinterpretAsFixedString {#type_conversion_function-reinterpretAsFixedString}
## reinterpretAsUUID {#type_conversion_function-reinterpretAsUUID}
These functions are aliases for `reinterpret` function.
``` text
┌─equals(uuid, uuid2)─┐
│ 1 │
└─────────────────────┘
```
## CAST(x, T) {#type_conversion_function-cast}
Converts x to the t data type. The syntax CAST(x AS t) is also supported.
Converts input value `x` to the `T` data type.
Example:
The syntax `CAST(x AS t)` is also supported.
Note, that if value `x` does not fit the bounds of type T, the function overflows. For example, CAST(-1, 'UInt8') returns 255.
**Example**
Query:
``` sql
SELECT
@ -358,9 +449,11 @@ SELECT
CAST(timestamp AS DateTime) AS datetime,
CAST(timestamp AS Date) AS date,
CAST(timestamp, 'String') AS string,
CAST(timestamp, 'FixedString(22)') AS fixed_string
CAST(timestamp, 'FixedString(22)') AS fixed_string;
```
Result:
``` text
┌─timestamp───────────┬────────────datetime─┬───────date─┬─string──────────────┬─fixed_string──────────────┐
│ 2016-06-15 23:00:00 │ 2016-06-15 23:00:00 │ 2016-06-15 │ 2016-06-15 23:00:00 │ 2016-06-15 23:00:00\0\0\0 │
@ -369,12 +462,18 @@ SELECT
Conversion to FixedString(N) only works for arguments of type String or FixedString(N).
Type conversion to [Nullable](../../sql-reference/data-types/nullable.md) and back is supported. Example:
Type conversion to [Nullable](../../sql-reference/data-types/nullable.md) and back is supported.
**Example**
Query:
``` sql
SELECT toTypeName(x) FROM t_null
SELECT toTypeName(x) FROM t_null;
```
Result:
``` text
┌─toTypeName(x)─┐
│ Int8 │
@ -382,10 +481,14 @@ SELECT toTypeName(x) FROM t_null
└───────────────┘
```
Query:
``` sql
SELECT toTypeName(CAST(x, 'Nullable(UInt16)')) FROM t_null
SELECT toTypeName(CAST(x, 'Nullable(UInt16)')) FROM t_null;
```
Result:
``` text
┌─toTypeName(CAST(x, 'Nullable(UInt16)'))─┐
│ Nullable(UInt16) │
@ -399,15 +502,19 @@ SELECT toTypeName(CAST(x, 'Nullable(UInt16)')) FROM t_null
## accurateCast(x, T) {#type_conversion_function-accurate-cast}
Converts x to the t data type. The differente from cast(x, T) is that accurateCast
does not allow overflow of numeric types during cast if type value x does not fit
bounds of type T.
Converts `x` to the `T` data type.
The difference from [cast(x, T)](#type_conversion_function-cast) is that `accurateCast` does not allow overflow of numeric types during cast if type value `x` does not fit the bounds of type `T`. For example, `accurateCast(-1, 'UInt8')` throws an exception.
**Example**
Query:
Example
``` sql
SELECT cast(-1, 'UInt8') as uint8;
```
Result:
``` text
┌─uint8─┐
@ -415,38 +522,46 @@ SELECT cast(-1, 'UInt8') as uint8;
└───────┘
```
Query:
```sql
SELECT accurateCast(-1, 'UInt8') as uint8;
```
Result:
``` text
Code: 70. DB::Exception: Received from localhost:9000. DB::Exception: Value in column Int8 cannot be safely converted into type UInt8: While processing accurateCast(-1, 'UInt8') AS uint8.
```
## accurateCastOrNull(x, T) {#type_conversion_function-accurate-cast_or_null}
Converts x to the t data type. Always returns nullable type and returns NULL
if the casted value is not representable in the target type.
Converts input value `x` to the specified data type `T`. Always returns [Nullable](../../sql-reference/data-types/nullable.md) type and returns [NULL](../../sql-reference/syntax.md#null-literal) if the casted value is not representable in the target type.
Example:
**Syntax**
```sql
accurateCastOrNull(x, T)
```
**Parameters**
- `x` — Input value.
- `T` — The name of the returned data type.
**Returned value**
- The value, converted to the specified data type `T`.
**Example**
Query:
``` sql
SELECT
accurateCastOrNull(-1, 'UInt8') as uint8,
accurateCastOrNull(128, 'Int8') as int8,
accurateCastOrNull('Test', 'FixedString(2)') as fixed_string
SELECT toTypeName(accurateCastOrNull(5, 'UInt8'));
```
``` text
┌─uint8─┬─int8─┬─fixed_string─┐
│ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │
└───────┴──────┴──────────────┘┘
```
``` sql
SELECT toTypeName(accurateCastOrNull(5, 'UInt8'))
```
Result:
``` text
┌─toTypeName(accurateCastOrNull(5, 'UInt8'))─┐
@ -454,6 +569,23 @@ SELECT toTypeName(accurateCastOrNull(5, 'UInt8'))
└────────────────────────────────────────────┘
```
Query:
``` sql
SELECT
accurateCastOrNull(-1, 'UInt8') as uint8,
accurateCastOrNull(128, 'Int8') as int8,
accurateCastOrNull('Test', 'FixedString(2)') as fixed_string;
```
Result:
``` text
┌─uint8─┬─int8─┬─fixed_string─┐
│ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │
└───────┴──────┴──────────────┘
```
## toInterval(Year\|Quarter\|Month\|Week\|Day\|Hour\|Minute\|Second) {#function-tointerval}
Converts a Number type argument to an [Interval](../../sql-reference/data-types/special-data-types/interval.md) data type.
@ -481,6 +613,8 @@ toIntervalYear(number)
**Example**
Query:
``` sql
WITH
toDate('2019-01-01') AS date,
@ -488,9 +622,11 @@ WITH
toIntervalWeek(1) AS interval_to_week
SELECT
date + interval_week,
date + interval_to_week
date + interval_to_week;
```
Result:
``` text
┌─plus(date, interval_week)─┬─plus(date, interval_to_week)─┐
│ 2019-01-08 │ 2019-01-08 │
@ -506,7 +642,7 @@ The function parses [ISO 8601](https://en.wikipedia.org/wiki/ISO_8601), [RFC 112
**Syntax**
``` sql
parseDateTimeBestEffort(time_string [, time_zone]);
parseDateTimeBestEffort(time_string [, time_zone])
```
**Arguments**
@ -549,7 +685,7 @@ Query:
``` sql
SELECT parseDateTimeBestEffort('Sat, 18 Aug 2018 07:22:16 GMT', 'Europe/Moscow')
AS parseDateTimeBestEffort
AS parseDateTimeBestEffort;
```
Result:
@ -564,7 +700,7 @@ Query:
``` sql
SELECT parseDateTimeBestEffort('1284101485')
AS parseDateTimeBestEffort
AS parseDateTimeBestEffort;
```
Result:
@ -579,7 +715,7 @@ Query:
``` sql
SELECT parseDateTimeBestEffort('2018-12-12 10:12:12')
AS parseDateTimeBestEffort
AS parseDateTimeBestEffort;
```
Result:
@ -593,7 +729,7 @@ Result:
Query:
``` sql
SELECT parseDateTimeBestEffort('10 20:19')
SELECT parseDateTimeBestEffort('10 20:19');
```
Result:
@ -613,12 +749,12 @@ Result:
## parseDateTimeBestEffortUS {#parsedatetimebesteffortUS}
This function is similar to [parseDateTimeBestEffort](#parsedatetimebesteffort), the only difference is that this function prefers US date format (`MM/DD/YYYY` etc.) in case of ambiguity.
This function is similar to [parseDateTimeBestEffort](#parsedatetimebesteffort), the only difference is that this function prefers US date format (`MM/DD/YYYY` etc.) in case of ambiguity.
**Syntax**
``` sql
parseDateTimeBestEffortUS(time_string [, time_zone]);
parseDateTimeBestEffortUS(time_string [, time_zone])
```
**Arguments**
@ -693,6 +829,178 @@ Same as for [parseDateTimeBestEffort](#parsedatetimebesteffort) except that it r
Same as for [parseDateTimeBestEffort](#parsedatetimebesteffort) except that it returns zero date or zero date time when it encounters a date format that cannot be processed.
## parseDateTimeBestEffortUSOrNull {#parsedatetimebesteffortusornull}
Same as [parseDateTimeBestEffortUS](#parsedatetimebesteffortUS) function except that it returns `NULL` when it encounters a date format that cannot be processed.
**Syntax**
``` sql
parseDateTimeBestEffortUSOrNull(time_string[, time_zone])
```
**Parameters**
- `time_string` — String containing a date or date with time to convert. The date must be in the US date format (`MM/DD/YYYY`, etc). [String](../../sql-reference/data-types/string.md).
- `time_zone` — [Timezone](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-timezone). The function parses `time_string` according to the timezone. Optional. [String](../../sql-reference/data-types/string.md).
**Supported non-standard formats**
- A string containing 9..10 digit [unix timestamp](https://en.wikipedia.org/wiki/Unix_time).
- A string with a date and a time components: `YYYYMMDDhhmmss`, `MM/DD/YYYY hh:mm:ss`, `MM-DD-YY hh:mm`, `YYYY-MM-DD hh:mm:ss`, etc.
- A string with a date, but no time component: `YYYY`, `YYYYMM`, `YYYY*MM`, `MM/DD/YYYY`, `MM-DD-YY`, etc.
- A string with a day and time: `DD`, `DD hh`, `DD hh:mm`. In this case, `YYYY-MM` are substituted with `2000-01`.
- A string that includes date and time along with timezone offset information: `YYYY-MM-DD hh:mm:ss ±h:mm`, etc. For example, `2020-12-12 17:36:00 -5:00`.
**Returned values**
- `time_string` converted to the [DateTime](../../sql-reference/data-types/datetime.md) data type.
- `NULL` if the input string cannot be converted to the `DateTime` data type.
**Examples**
Query:
``` sql
SELECT parseDateTimeBestEffortUSOrNull('02/10/2021 21:12:57') AS parseDateTimeBestEffortUSOrNull;
```
Result:
``` text
┌─parseDateTimeBestEffortUSOrNull─┐
│ 2021-02-10 21:12:57 │
└─────────────────────────────────┘
```
Query:
``` sql
SELECT parseDateTimeBestEffortUSOrNull('02-10-2021 21:12:57 GMT', 'Europe/Moscow') AS parseDateTimeBestEffortUSOrNull;
```
Result:
``` text
┌─parseDateTimeBestEffortUSOrNull─┐
│ 2021-02-11 00:12:57 │
└─────────────────────────────────┘
```
Query:
``` sql
SELECT parseDateTimeBestEffortUSOrNull('02.10.2021') AS parseDateTimeBestEffortUSOrNull;
```
Result:
``` text
┌─parseDateTimeBestEffortUSOrNull─┐
│ 2021-02-10 00:00:00 │
└─────────────────────────────────┘
```
Query:
``` sql
SELECT parseDateTimeBestEffortUSOrNull('10.2021') AS parseDateTimeBestEffortUSOrNull;
```
Result:
``` text
┌─parseDateTimeBestEffortUSOrNull─┐
│ ᴺᵁᴸᴸ │
└─────────────────────────────────┘
```
## parseDateTimeBestEffortUSOrZero {#parsedatetimebesteffortusorzero}
Same as [parseDateTimeBestEffortUS](#parsedatetimebesteffortUS) function except that it returns zero date (`1970-01-01`) or zero date with time (`1970-01-01 00:00:00`) when it encounters a date format that cannot be processed.
**Syntax**
``` sql
parseDateTimeBestEffortUSOrZero(time_string[, time_zone])
```
**Parameters**
- `time_string` — String containing a date or date with time to convert. The date must be in the US date format (`MM/DD/YYYY`, etc). [String](../../sql-reference/data-types/string.md).
- `time_zone` — [Timezone](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-timezone). The function parses `time_string` according to the timezone. Optional. [String](../../sql-reference/data-types/string.md).
**Supported non-standard formats**
- A string containing 9..10 digit [unix timestamp](https://en.wikipedia.org/wiki/Unix_time).
- A string with a date and a time components: `YYYYMMDDhhmmss`, `MM/DD/YYYY hh:mm:ss`, `MM-DD-YY hh:mm`, `YYYY-MM-DD hh:mm:ss`, etc.
- A string with a date, but no time component: `YYYY`, `YYYYMM`, `YYYY*MM`, `MM/DD/YYYY`, `MM-DD-YY`, etc.
- A string with a day and time: `DD`, `DD hh`, `DD hh:mm`. In this case, `YYYY-MM` are substituted with `2000-01`.
- A string that includes date and time along with timezone offset information: `YYYY-MM-DD hh:mm:ss ±h:mm`, etc. For example, `2020-12-12 17:36:00 -5:00`.
**Returned values**
- `time_string` converted to the [DateTime](../../sql-reference/data-types/datetime.md) data type.
- Zero date or zero date with time if the input string cannot be converted to the `DateTime` data type.
**Examples**
Query:
``` sql
SELECT parseDateTimeBestEffortUSOrZero('02/10/2021 21:12:57') AS parseDateTimeBestEffortUSOrZero;
```
Result:
``` text
┌─parseDateTimeBestEffortUSOrZero─┐
│ 2021-02-10 21:12:57 │
└─────────────────────────────────┘
```
Query:
``` sql
SELECT parseDateTimeBestEffortUSOrZero('02-10-2021 21:12:57 GMT', 'Europe/Moscow') AS parseDateTimeBestEffortUSOrZero;
```
Result:
``` text
┌─parseDateTimeBestEffortUSOrZero─┐
│ 2021-02-11 00:12:57 │
└─────────────────────────────────┘
```
Query:
``` sql
SELECT parseDateTimeBestEffortUSOrZero('02.10.2021') AS parseDateTimeBestEffortUSOrZero;
```
Result:
``` text
┌─parseDateTimeBestEffortUSOrZero─┐
│ 2021-02-10 00:00:00 │
└─────────────────────────────────┘
```
Query:
``` sql
SELECT parseDateTimeBestEffortUSOrZero('02.2021') AS parseDateTimeBestEffortUSOrZero;
```
Result:
``` text
┌─parseDateTimeBestEffortUSOrZero─┐
│ 1970-01-01 00:00:00 │
└─────────────────────────────────┘
```
## toLowCardinality {#tolowcardinality}
Converts input parameter to the [LowCardianlity](../../sql-reference/data-types/lowcardinality.md) version of same data type.
@ -720,7 +1028,7 @@ Type: `LowCardinality(expr_result_type)`
Query:
``` sql
SELECT toLowCardinality('1')
SELECT toLowCardinality('1');
```
Result:
@ -759,7 +1067,7 @@ Query:
``` sql
WITH toDateTime64('2019-09-16 19:20:12.345678910', 6) AS dt64
SELECT toUnixTimestamp64Milli(dt64)
SELECT toUnixTimestamp64Milli(dt64);
```
Result:
@ -772,7 +1080,7 @@ Result:
``` sql
WITH toDateTime64('2019-09-16 19:20:12.345678910', 6) AS dt64
SELECT toUnixTimestamp64Nano(dt64)
SELECT toUnixTimestamp64Nano(dt64);
```
Result:
@ -806,13 +1114,17 @@ fromUnixTimestamp64Milli(value [, ti])
- `value` converted to the `DateTime64` data type.
**Examples**
**Example**
Query:
``` sql
WITH CAST(1234567891011, 'Int64') AS i64
SELECT fromUnixTimestamp64Milli(i64, 'UTC')
SELECT fromUnixTimestamp64Milli(i64, 'UTC');
```
Result:
``` text
┌─fromUnixTimestamp64Milli(i64, 'UTC')─┐
│ 2009-02-13 23:31:31.011 │
@ -844,7 +1156,7 @@ Query:
``` sql
SELECT formatRow('CSV', number, 'good')
FROM numbers(3)
FROM numbers(3);
```
Result:
@ -885,7 +1197,7 @@ Query:
``` sql
SELECT formatRowNoNewline('CSV', number, 'good')
FROM numbers(3)
FROM numbers(3);
```
Result:

View File

@ -13,10 +13,28 @@ SELECT (CounterID, UserID) IN ((34, 123), (101500, 456)) FROM ...
If the left side is a single column that is in the index, and the right side is a set of constants, the system uses the index for processing the query.
Dont list too many values explicitly (i.e. millions). If a data set is large, put it in a temporary table (for example, see the section “External data for query processing”), then use a subquery.
Dont list too many values explicitly (i.e. millions). If a data set is large, put it in a temporary table (for example, see the section [External data for query processing](../../engines/table-engines/special/external-data.md)), then use a subquery.
The right side of the operator can be a set of constant expressions, a set of tuples with constant expressions (shown in the examples above), or the name of a database table or SELECT subquery in brackets.
ClickHouse allows types to differ in the left and the right parts of `IN` subquery. In this case it converts the left side value to the type of the right side, as if the [accurateCastOrNull](../functions/type-conversion-functions.md#type_conversion_function-accurate-cast_or_null) function is applied. That means, that the data type becomes [Nullable](../../sql-reference/data-types/nullable.md), and if the conversion cannot be performed, it returns [NULL](../../sql-reference/syntax.md#null-literal).
**Example**
Query:
``` sql
SELECT '1' IN (SELECT 1);
```
Result:
``` text
┌─in('1', _subquery49)─┐
│ 1 │
└──────────────────────┘
```
If the right side of the operator is the name of a table (for example, `UserID IN users`), this is equivalent to the subquery `UserID IN (SELECT * FROM users)`. Use this when working with external data that is sent along with the query. For example, the query can be sent together with a set of user IDs loaded to the users temporary table, which should be filtered.
If the right side of the operator is a table name that has the Set engine (a prepared data set that is always in RAM), the data set will not be created over again for each query.

View File

@ -81,5 +81,5 @@ The `TTL` is no longer there, so the second row is not deleted:
### See Also
- More about the [TTL-expression](../../../sql-reference/statements/create/table#ttl-expression).
- Modify column [with TTL](../../../sql-reference/statements/alter/column#alter_modify-column).
- More about the [TTL-expression](../../../../sql-reference/statements/create/table#ttl-expression).
- Modify column [with TTL](../../../../sql-reference/statements/alter/column#alter_modify-column).

View File

@ -4,10 +4,8 @@ toc_title: ALL
# ALL Clause {#select-all}
`SELECT ALL` is identical to `SELECT` without `DISTINCT`.
If there are multiple matching rows in the table, then `ALL` returns all of them. `SELECT ALL` is identical to `SELECT` without `DISTINCT`. If both `ALL` and `DISTINCT` specified, exception will be thrown.
- If `ALL` specified, ignore it.
- If both `ALL` and `DISTINCT` specified, exception will be thrown.
`ALL` can also be specified inside aggregate function with the same effect(noop), for instance:
@ -19,3 +17,5 @@ equals to
```sql
SELECT sum(number) FROM numbers(10);
```
[Original article](https://clickhouse.tech/docs/en/sql-reference/statements/select/all) <!--hide-->

View File

@ -15,9 +15,9 @@ Creates a table from a file. This table function is similar to [url](../../sql-r
file(path, format, structure)
```
**Input parameters**
**Parameters**
- `path` — The relative path to the file from [user_files_path](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-user_files_path). Path to file support following globs in readonly mode: `*`, `?`, `{abc,def}` and `{N..M}` where `N`, `M` — numbers, `'abc', 'def'` — strings.
- `path` — The relative path to the file from [user_files_path](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-user_files_path). Path to file support following globs in read-only mode: `*`, `?`, `{abc,def}` and `{N..M}` where `N`, `M` — numbers, `'abc', 'def'` — strings.
- `format` — The [format](../../interfaces/formats.md#formats) of the file.
- `structure` — Structure of the table. Format: `'column1_name column1_type, column2_name column2_type, ...'`.
@ -39,7 +39,7 @@ $ cat /var/lib/clickhouse/user_files/test.csv
78,43,45
```
Getting data from a table in `test.csv` and selecting first two rows from it:
Getting data from a table in `test.csv` and selecting the first two rows from it:
``` sql
SELECT * FROM file('test.csv', 'CSV', 'column1 UInt32, column2 UInt32, column3 UInt32') LIMIT 2;
@ -51,7 +51,8 @@ SELECT * FROM file('test.csv', 'CSV', 'column1 UInt32, column2 UInt32, column3 U
│ 3 │ 2 │ 1 │
└─────────┴─────────┴─────────┘
```
Getting the first 10 lines of a table that contains 3 columns of UInt32 type from a CSV file:
Getting the first 10 lines of a table that contains 3 columns of [UInt32](../../sql-reference/data-types/int-uint.md) type from a CSV file:
``` sql
SELECT * FROM file('test.csv', 'CSV', 'column1 UInt32, column2 UInt32, column3 UInt32') LIMIT 10;
@ -71,17 +72,16 @@ SELECT * FROM file('test.csv', 'CSV', 'column1 UInt32, column2 UInt32, column3 U
└─────────┴─────────┴─────────┘
```
## Globs in Path {#globs-in-path}
Multiple path components can have globs. For being processed file should exists and matches to the whole path pattern (not only suffix or prefix).
Multiple path components can have globs. For being processed file must exist and match to the whole path pattern (not only suffix or prefix).
- `*` — Substitutes any number of any characters except `/` including empty string.
- `?` — Substitutes any single character.
- `{some_string,another_string,yet_another_one}` — Substitutes any of strings `'some_string', 'another_string', 'yet_another_one'`.
- `{N..M}` — Substitutes any number in range from N to M including both borders.
Constructions with `{}` are similar to the [remote table function](../../sql-reference/table-functions/remote.md)).
Constructions with `{}` are similar to the [remote](remote.md) table function.
**Example**
@ -94,13 +94,13 @@ Suppose we have several files with the following relative paths:
- 'another_dir/some_file_2'
- 'another_dir/some_file_3'
Query the amount of rows in these files:
Query the number of rows in these files:
``` sql
SELECT count(*) FROM file('{some,another}_dir/some_file_{1..3}', 'TSV', 'name String, value UInt32');
```
Query the amount of rows in all files of these two directories:
Query the number of rows in all files of these two directories:
``` sql
SELECT count(*) FROM file('{some,another}_dir/*', 'TSV', 'name String, value UInt32');
@ -124,6 +124,6 @@ SELECT count(*) FROM file('big_dir/file{0..9}{0..9}{0..9}', 'CSV', 'name String,
**See Also**
- [Virtual columns](https://clickhouse.tech/docs/en/operations/table_engines/#table_engines-virtual_columns)
- [Virtual columns](index.md#table_engines-virtual_columns)
[Original article](https://clickhouse.tech/docs/en/sql-reference/table-functions/file/) <!--hide-->

View File

@ -44,7 +44,7 @@ The rest of the conditions and the `LIMIT` sampling constraint are executed in C
A table object with the same columns as the original MySQL table.
!!! info "Note"
In the `INSERT` query to distinguish table function `mysql(...)` from table name with column names list you must use keywords `FUNCTION` or `TABLE FUNCTION`. See examples below.
In the `INSERT` query to distinguish table function `mysql(...)` from table name with column names list, you must use keywords `FUNCTION` or `TABLE FUNCTION`. See examples below.
**Examples**

View File

@ -5,7 +5,7 @@ toc_title: remote
# remote, remoteSecure {#remote-remotesecure}
Allows to access remote servers without creating a [Distributed](../../engines/table-engines/special/distributed.md) table. `remoteSecure` - same as `remote` but with secured connection.
Allows to access remote servers without creating a [Distributed](../../engines/table-engines/special/distributed.md) table. `remoteSecure` - same as `remote` but with a secured connection.
Both functions can be used in `SELECT` and `INSERT` queries.
@ -18,9 +18,9 @@ remoteSecure('addresses_expr', db, table[, 'user'[, 'password'], sharding_key])
remoteSecure('addresses_expr', db.table[, 'user'[, 'password'], sharding_key])
```
**Input parameters**
**Parameters**
- `addresses_expr` An expression that generates addresses of remote servers. This may be just one server address. The server address is `host:port`, or just `host`.
- `addresses_expr` An expression that generates addresses of remote servers. This may be just one server address. The server address is `host:port`, or just `host`.
The host can be specified as the server name, or as the IPv4 or IPv6 address. An IPv6 address is specified in square brackets.
@ -30,19 +30,19 @@ remoteSecure('addresses_expr', db.table[, 'user'[, 'password'], sharding_key])
Type: [String](../../sql-reference/data-types/string.md).
- `db` - Database name. Type: [String](../../sql-reference/data-types/string.md).
- `table` - Table name. Type: [String](../../sql-reference/data-types/string.md).
- `user` - User name. If the user is not specified, `default` is used. Type: [String](../../sql-reference/data-types/string.md).
- `password` - User password. If the password is not specified, an empty password is used. Type: [String](../../sql-reference/data-types/string.md).
- `sharding_key` - Sharding key to support distributing data across nodes. For example: `insert into remote('127.0.0.1:9000,127.0.0.2', db, table, 'default', rand())`. Type: [UInt32](../../sql-reference/data-types/int-uint.md).
- `db` Database name. Type: [String](../../sql-reference/data-types/string.md).
- `table` Table name. Type: [String](../../sql-reference/data-types/string.md).
- `user` User name. If the user is not specified, `default` is used. Type: [String](../../sql-reference/data-types/string.md).
- `password` User password. If the password is not specified, an empty password is used. Type: [String](../../sql-reference/data-types/string.md).
- `sharding_key` Sharding key to support distributing data across nodes. For example: `insert into remote('127.0.0.1:9000,127.0.0.2', db, table, 'default', rand())`. Type: [UInt32](../../sql-reference/data-types/int-uint.md).
**Returned value**
Dataset from remote servers.
The dataset from remote servers.
**Usage**
Using the `remote` table function is less optimal than creating a `Distributed` table, because in this case the server connection is re-established for every request. In addition, if host names are set, the names are resolved, and errors are not counted when working with various replicas. When processing a large number of queries, always create the `Distributed` table ahead of time, and dont use the `remote` table function.
Using the `remote` table function is less optimal than creating a `Distributed` table because in this case the server connection is re-established for every request. Also, if hostnames are set, the names are resolved, and errors are not counted when working with various replicas. When processing a large number of queries, always create the `Distributed` table ahead of time, and dont use the `remote` table function.
The `remote` table function can be useful in the following cases:
@ -62,7 +62,7 @@ localhost
[2a02:6b8:0:1111::11]:9000
```
Multiple addresses can be comma-separated. In this case, ClickHouse will use distributed processing, so it will send the query to all specified addresses (like to shards with different data). Example:
Multiple addresses can be comma-separated. In this case, ClickHouse will use distributed processing, so it will send the query to all specified addresses (like shards with different data). Example:
``` text
example01-01-1,example01-02-1
@ -82,7 +82,7 @@ example01-{01..02}-1
If you have multiple pairs of curly brackets, it generates the direct product of the corresponding sets.
Addresses and parts of addresses in curly brackets can be separated by the pipe symbol (\|). In this case, the corresponding sets of addresses are interpreted as replicas, and the query will be sent to the first healthy replica. However, the replicas are iterated in the order currently set in the [load_balancing](../../operations/settings/settings.md) setting. This example specifies two shards that each have two replicas:
Addresses and parts of addresses in curly brackets can be separated by the pipe symbol (\|). In this case, the corresponding sets of addresses are interpreted as replicas, and the query will be sent to the first healthy replica. However, the replicas are iterated in the order currently set in the [load_balancing](../../operations/settings/settings.md#settings-load_balancing) setting. This example specifies two shards that each have two replicas:
``` text
example01-{01..02}-{1|2}

View File

@ -15,25 +15,25 @@ toc_title: url
url(URL, format, structure)
```
**Input parameters**
**Parameters**
- `URL` - HTTP or HTTPS server address, which can accept `GET` (for `SELECT`) or `POST` (for `INSERT`) requests. Type: [String](../../sql-reference/data-types/string.md).
- `format` - [Format](../../interfaces/formats.md#formats) of the data. Type: [String](../../sql-reference/data-types/string.md).
- `structure` - Table structure in `'UserID UInt64, Name String'` format. Determines column names and types. Type: [String](../../sql-reference/data-types/string.md).
- `URL` — HTTP or HTTPS server address, which can accept `GET` or `POST` requests (for `SELECT` or `INSERT` queries correspondingly). Type: [String](../../sql-reference/data-types/string.md).
- `format` [Format](../../interfaces/formats.md#formats) of the data. Type: [String](../../sql-reference/data-types/string.md).
- `structure` Table structure in `'UserID UInt64, Name String'` format. Determines column names and types. Type: [String](../../sql-reference/data-types/string.md).
**Returned value**
A table with the specified format and structure and with data from the defined URL.
A table with the specified format and structure and with data from the defined `URL`.
**Examples**
Getting the first 3 lines of a table that contains columns of `String` and `UInt32` type from HTTP-server which answers in `CSV` format.
Getting the first 3 lines of a table that contains columns of `String` and [UInt32](../../sql-reference/data-types/int-uint.md) type from HTTP-server which answers in [CSV](../../interfaces/formats.md/#csv) format.
``` sql
SELECT * FROM url('http://127.0.0.1:12345/', CSV, 'column1 String, column2 UInt32') LIMIT 3;
```
Inserting data from a URL into a table:
Inserting data from a `URL` into a table:
``` sql
CREATE TABLE test_table (column1 String, column2 UInt32) ENGINE=Memory;

View File

@ -31,21 +31,26 @@ SETTINGS
[kafka_schema = '',]
[kafka_num_consumers = N,]
[kafka_skip_broken_messages = N]
[kafka_commit_every_batch = 0,]
[kafka_thread_per_consumer = 0]
```
Обязательные параметры:
- `kafka_broker_list` перечень брокеров, разделенный запятыми (`localhost:9092`).
- `kafka_topic_list` перечень необходимых топиков Kafka.
- `kafka_group_name` группа потребителя Kafka. Отступы для чтения отслеживаются для каждой группы отдельно. Если необходимо, чтобы сообщения не повторялись на кластере, используйте везде одно имя группы.
- `kafka_format` формат сообщений. Названия форматов должны быть теми же, что можно использовать в секции `FORMAT`, например, `JSONEachRow`. Подробнее читайте в разделе [Форматы](../../../interfaces/formats.md).
- `kafka_broker_list` перечень брокеров, разделенный запятыми (`localhost:9092`).
- `kafka_topic_list` перечень необходимых топиков Kafka.
- `kafka_group_name` группа потребителя Kafka. Отступы для чтения отслеживаются для каждой группы отдельно. Если необходимо, чтобы сообщения не повторялись на кластере, используйте везде одно имя группы.
- `kafka_format` формат сообщений. Названия форматов должны быть теми же, что можно использовать в секции `FORMAT`, например, `JSONEachRow`. Подробнее читайте в разделе [Форматы](../../../interfaces/formats.md).
Опциональные параметры:
- `kafka_row_delimiter` символ-разделитель записей (строк), которым завершается сообщение.
- `kafka_schema` опциональный параметр, необходимый, если используется формат, требующий определения схемы. Например, [Capn Proto](https://capnproto.org/) требует путь к файлу со схемой и название корневого объекта `schema.capnp:Message`.
- `kafka_num_consumers` количество потребителей (consumer) на таблицу. По умолчанию: `1`. Укажите больше потребителей, если пропускная способность одного потребителя недостаточна. Общее число потребителей не должно превышать количество партиций в топике, так как на одну партицию может быть назначено не более одного потребителя.
- `kafka_skip_broken_messages` максимальное количество некорректных сообщений в блоке. Если `kafka_skip_broken_messages = N`, то движок отбрасывает `N` сообщений Кафки, которые не получилось обработать. Одно сообщение в точности соответствует одной записи (строке). Значение по умолчанию 0.
- `kafka_row_delimiter` — символ-разделитель записей (строк), которым завершается сообщение.
- `kafka_schema` — опциональный параметр, необходимый, если используется формат, требующий определения схемы. Например, [Capn Proto](https://capnproto.org/) требует путь к файлу со схемой и название корневого объекта `schema.capnp:Message`.
- `kafka_num_consumers` — количество потребителей (consumer) на таблицу. По умолчанию: `1`. Укажите больше потребителей, если пропускная способность одного потребителя недостаточна. Общее число потребителей не должно превышать количество партиций в топике, так как на одну партицию может быть назначено не более одного потребителя.
- `kafka_max_block_size` — максимальный размер пачек (в сообщениях) для poll (по умолчанию `max_block_size`).
- `kafka_skip_broken_messages` — максимальное количество некорректных сообщений в блоке. Если `kafka_skip_broken_messages = N`, то движок отбрасывает `N` сообщений Кафки, которые не получилось обработать. Одно сообщение в точности соответствует одной записи (строке). Значение по умолчанию 0.
- `kafka_commit_every_batch` — включает или отключает режим записи каждой принятой и обработанной пачки по отдельности вместо единой записи целого блока (по умолчанию `0`).
- `kafka_thread_per_consumer` — включает или отключает предоставление отдельного потока каждому потребителю (по умолчанию `0`). При включенном режиме каждый потребитель сбрасывает данные независимо и параллельно, при отключённом — строки с данными от нескольких потребителей собираются в один блок.
Примеры

View File

@ -63,7 +63,7 @@ SELECT * FROM file_engine_table
## Использование движка в Clickhouse-local {#ispolzovanie-dvizhka-v-clickhouse-local}
В [clickhouse-local](../../../engines/table-engines/special/file.md) движок в качестве параметра принимает не только формат, но и путь к файлу. В том числе можно указать стандартные потоки ввода/вывода цифровым или буквенным обозначением `0` или `stdin`, `1` или `stdout`.
В [clickhouse-local](../../../engines/table-engines/special/file.md) движок в качестве параметра принимает не только формат, но и путь к файлу. В том числе можно указать стандартные потоки ввода/вывода цифровым или буквенным обозначением `0` или `stdin`, `1` или `stdout`. Можно записывать и читать сжатые файлы. Для этого нужно задать дополнительный параметр движка или расширение файла (`gz`, `br` или `xz`).
**Пример:**

View File

@ -0,0 +1,416 @@
---
toc_priority: 20
toc_title: Brown University Benchmark
---
# Brown University Benchmark
`MgBench` — это аналитический тест производительности для данных журнала событий, сгенерированных машиной. Бенчмарк разработан [Andrew Crotty](http://cs.brown.edu/people/acrotty/).
Скачать данные:
```
wget https://datasets.clickhouse.tech/mgbench{1..3}.csv.xz
```
Распаковать данные:
```
xz -v -d mgbench{1..3}.csv.xz
```
Создание таблиц:
```
CREATE DATABASE mgbench;
CREATE TABLE mgbench.logs1 (
log_time DateTime,
machine_name LowCardinality(String),
machine_group LowCardinality(String),
cpu_idle Nullable(Float32),
cpu_nice Nullable(Float32),
cpu_system Nullable(Float32),
cpu_user Nullable(Float32),
cpu_wio Nullable(Float32),
disk_free Nullable(Float32),
disk_total Nullable(Float32),
part_max_used Nullable(Float32),
load_fifteen Nullable(Float32),
load_five Nullable(Float32),
load_one Nullable(Float32),
mem_buffers Nullable(Float32),
mem_cached Nullable(Float32),
mem_free Nullable(Float32),
mem_shared Nullable(Float32),
swap_free Nullable(Float32),
bytes_in Nullable(Float32),
bytes_out Nullable(Float32)
)
ENGINE = MergeTree()
ORDER BY (machine_group, machine_name, log_time);
CREATE TABLE mgbench.logs2 (
log_time DateTime,
client_ip IPv4,
request String,
status_code UInt16,
object_size UInt64
)
ENGINE = MergeTree()
ORDER BY log_time;
CREATE TABLE mgbench.logs3 (
log_time DateTime64,
device_id FixedString(15),
device_name LowCardinality(String),
device_type LowCardinality(String),
device_floor UInt8,
event_type LowCardinality(String),
event_unit FixedString(1),
event_value Nullable(Float32)
)
ENGINE = MergeTree()
ORDER BY (event_type, log_time);
```
Вставка данных:
```
clickhouse-client --query "INSERT INTO mgbench.logs1 FORMAT CSVWithNames" < mgbench1.csv
clickhouse-client --query "INSERT INTO mgbench.logs2 FORMAT CSVWithNames" < mgbench2.csv
clickhouse-client --query "INSERT INTO mgbench.logs3 FORMAT CSVWithNames" < mgbench3.csv
```
Запуск тестов производительности:
```
-- Q1.1: What is the CPU/network utilization for each web server since midnight?
SELECT machine_name,
MIN(cpu) AS cpu_min,
MAX(cpu) AS cpu_max,
AVG(cpu) AS cpu_avg,
MIN(net_in) AS net_in_min,
MAX(net_in) AS net_in_max,
AVG(net_in) AS net_in_avg,
MIN(net_out) AS net_out_min,
MAX(net_out) AS net_out_max,
AVG(net_out) AS net_out_avg
FROM (
SELECT machine_name,
COALESCE(cpu_user, 0.0) AS cpu,
COALESCE(bytes_in, 0.0) AS net_in,
COALESCE(bytes_out, 0.0) AS net_out
FROM logs1
WHERE machine_name IN ('anansi','aragog','urd')
AND log_time >= TIMESTAMP '2017-01-11 00:00:00'
) AS r
GROUP BY machine_name;
-- Q1.2: Which computer lab machines have been offline in the past day?
SELECT machine_name,
log_time
FROM logs1
WHERE (machine_name LIKE 'cslab%' OR
machine_name LIKE 'mslab%')
AND load_one IS NULL
AND log_time >= TIMESTAMP '2017-01-10 00:00:00'
ORDER BY machine_name,
log_time;
-- Q1.3: What are the hourly average metrics during the past 10 days for a specific workstation?
SELECT dt,
hr,
AVG(load_fifteen) AS load_fifteen_avg,
AVG(load_five) AS load_five_avg,
AVG(load_one) AS load_one_avg,
AVG(mem_free) AS mem_free_avg,
AVG(swap_free) AS swap_free_avg
FROM (
SELECT CAST(log_time AS DATE) AS dt,
EXTRACT(HOUR FROM log_time) AS hr,
load_fifteen,
load_five,
load_one,
mem_free,
swap_free
FROM logs1
WHERE machine_name = 'babbage'
AND load_fifteen IS NOT NULL
AND load_five IS NOT NULL
AND load_one IS NOT NULL
AND mem_free IS NOT NULL
AND swap_free IS NOT NULL
AND log_time >= TIMESTAMP '2017-01-01 00:00:00'
) AS r
GROUP BY dt,
hr
ORDER BY dt,
hr;
-- Q1.4: Over 1 month, how often was each server blocked on disk I/O?
SELECT machine_name,
COUNT(*) AS spikes
FROM logs1
WHERE machine_group = 'Servers'
AND cpu_wio > 0.99
AND log_time >= TIMESTAMP '2016-12-01 00:00:00'
AND log_time < TIMESTAMP '2017-01-01 00:00:00'
GROUP BY machine_name
ORDER BY spikes DESC
LIMIT 10;
-- Q1.5: Which externally reachable VMs have run low on memory?
SELECT machine_name,
dt,
MIN(mem_free) AS mem_free_min
FROM (
SELECT machine_name,
CAST(log_time AS DATE) AS dt,
mem_free
FROM logs1
WHERE machine_group = 'DMZ'
AND mem_free IS NOT NULL
) AS r
GROUP BY machine_name,
dt
HAVING MIN(mem_free) < 10000
ORDER BY machine_name,
dt;
-- Q1.6: What is the total hourly network traffic across all file servers?
SELECT dt,
hr,
SUM(net_in) AS net_in_sum,
SUM(net_out) AS net_out_sum,
SUM(net_in) + SUM(net_out) AS both_sum
FROM (
SELECT CAST(log_time AS DATE) AS dt,
EXTRACT(HOUR FROM log_time) AS hr,
COALESCE(bytes_in, 0.0) / 1000000000.0 AS net_in,
COALESCE(bytes_out, 0.0) / 1000000000.0 AS net_out
FROM logs1
WHERE machine_name IN ('allsorts','andes','bigred','blackjack','bonbon',
'cadbury','chiclets','cotton','crows','dove','fireball','hearts','huey',
'lindt','milkduds','milkyway','mnm','necco','nerds','orbit','peeps',
'poprocks','razzles','runts','smarties','smuggler','spree','stride',
'tootsie','trident','wrigley','york')
) AS r
GROUP BY dt,
hr
ORDER BY both_sum DESC
LIMIT 10;
-- Q2.1: Which requests have caused server errors within the past 2 weeks?
SELECT *
FROM logs2
WHERE status_code >= 500
AND log_time >= TIMESTAMP '2012-12-18 00:00:00'
ORDER BY log_time;
-- Q2.2: During a specific 2-week period, was the user password file leaked?
SELECT *
FROM logs2
WHERE status_code >= 200
AND status_code < 300
AND request LIKE '%/etc/passwd%'
AND log_time >= TIMESTAMP '2012-05-06 00:00:00'
AND log_time < TIMESTAMP '2012-05-20 00:00:00';
-- Q2.3: What was the average path depth for top-level requests in the past month?
SELECT top_level,
AVG(LENGTH(request) - LENGTH(REPLACE(request, '/', ''))) AS depth_avg
FROM (
SELECT SUBSTRING(request FROM 1 FOR len) AS top_level,
request
FROM (
SELECT POSITION(SUBSTRING(request FROM 2), '/') AS len,
request
FROM logs2
WHERE status_code >= 200
AND status_code < 300
AND log_time >= TIMESTAMP '2012-12-01 00:00:00'
) AS r
WHERE len > 0
) AS s
WHERE top_level IN ('/about','/courses','/degrees','/events',
'/grad','/industry','/news','/people',
'/publications','/research','/teaching','/ugrad')
GROUP BY top_level
ORDER BY top_level;
-- Q2.4: During the last 3 months, which clients have made an excessive number of requests?
SELECT client_ip,
COUNT(*) AS num_requests
FROM logs2
WHERE log_time >= TIMESTAMP '2012-10-01 00:00:00'
GROUP BY client_ip
HAVING COUNT(*) >= 100000
ORDER BY num_requests DESC;
-- Q2.5: What are the daily unique visitors?
SELECT dt,
COUNT(DISTINCT client_ip)
FROM (
SELECT CAST(log_time AS DATE) AS dt,
client_ip
FROM logs2
) AS r
GROUP BY dt
ORDER BY dt;
-- Q2.6: What are the average and maximum data transfer rates (Gbps)?
SELECT AVG(transfer) / 125000000.0 AS transfer_avg,
MAX(transfer) / 125000000.0 AS transfer_max
FROM (
SELECT log_time,
SUM(object_size) AS transfer
FROM logs2
GROUP BY log_time
) AS r;
-- Q3.1: Did the indoor temperature reach freezing over the weekend?
SELECT *
FROM logs3
WHERE event_type = 'temperature'
AND event_value <= 32.0
AND log_time >= '2019-11-29 17:00:00.000';
-- Q3.4: Over the past 6 months, how frequently were each door opened?
SELECT device_name,
device_floor,
COUNT(*) AS ct
FROM logs3
WHERE event_type = 'door_open'
AND log_time >= '2019-06-01 00:00:00.000'
GROUP BY device_name,
device_floor
ORDER BY ct DESC;
-- Q3.5: Where in the building do large temperature variations occur in winter and summer?
WITH temperature AS (
SELECT dt,
device_name,
device_type,
device_floor
FROM (
SELECT dt,
hr,
device_name,
device_type,
device_floor,
AVG(event_value) AS temperature_hourly_avg
FROM (
SELECT CAST(log_time AS DATE) AS dt,
EXTRACT(HOUR FROM log_time) AS hr,
device_name,
device_type,
device_floor,
event_value
FROM logs3
WHERE event_type = 'temperature'
) AS r
GROUP BY dt,
hr,
device_name,
device_type,
device_floor
) AS s
GROUP BY dt,
device_name,
device_type,
device_floor
HAVING MAX(temperature_hourly_avg) - MIN(temperature_hourly_avg) >= 25.0
)
SELECT DISTINCT device_name,
device_type,
device_floor,
'WINTER'
FROM temperature
WHERE dt >= DATE '2018-12-01'
AND dt < DATE '2019-03-01'
UNION
SELECT DISTINCT device_name,
device_type,
device_floor,
'SUMMER'
FROM temperature
WHERE dt >= DATE '2019-06-01'
AND dt < DATE '2019-09-01';
-- Q3.6: For each device category, what are the monthly power consumption metrics?
SELECT yr,
mo,
SUM(coffee_hourly_avg) AS coffee_monthly_sum,
AVG(coffee_hourly_avg) AS coffee_monthly_avg,
SUM(printer_hourly_avg) AS printer_monthly_sum,
AVG(printer_hourly_avg) AS printer_monthly_avg,
SUM(projector_hourly_avg) AS projector_monthly_sum,
AVG(projector_hourly_avg) AS projector_monthly_avg,
SUM(vending_hourly_avg) AS vending_monthly_sum,
AVG(vending_hourly_avg) AS vending_monthly_avg
FROM (
SELECT dt,
yr,
mo,
hr,
AVG(coffee) AS coffee_hourly_avg,
AVG(printer) AS printer_hourly_avg,
AVG(projector) AS projector_hourly_avg,
AVG(vending) AS vending_hourly_avg
FROM (
SELECT CAST(log_time AS DATE) AS dt,
EXTRACT(YEAR FROM log_time) AS yr,
EXTRACT(MONTH FROM log_time) AS mo,
EXTRACT(HOUR FROM log_time) AS hr,
CASE WHEN device_name LIKE 'coffee%' THEN event_value END AS coffee,
CASE WHEN device_name LIKE 'printer%' THEN event_value END AS printer,
CASE WHEN device_name LIKE 'projector%' THEN event_value END AS projector,
CASE WHEN device_name LIKE 'vending%' THEN event_value END AS vending
FROM logs3
WHERE device_type = 'meter'
) AS r
GROUP BY dt,
yr,
mo,
hr
) AS s
GROUP BY yr,
mo
ORDER BY yr,
mo;
```
Данные также доступны для работы с интерактивными запросами через [Playground](https://gh-api.clickhouse.tech/play?user=play), [пример](https://gh-api.clickhouse.tech/play?user=play#U0VMRUNUIG1hY2hpbmVfbmFtZSwKICAgICAgIE1JTihjcHUpIEFTIGNwdV9taW4sCiAgICAgICBNQVgoY3B1KSBBUyBjcHVfbWF4LAogICAgICAgQVZHKGNwdSkgQVMgY3B1X2F2ZywKICAgICAgIE1JTihuZXRfaW4pIEFTIG5ldF9pbl9taW4sCiAgICAgICBNQVgobmV0X2luKSBBUyBuZXRfaW5fbWF4LAogICAgICAgQVZHKG5ldF9pbikgQVMgbmV0X2luX2F2ZywKICAgICAgIE1JTihuZXRfb3V0KSBBUyBuZXRfb3V0X21pbiwKICAgICAgIE1BWChuZXRfb3V0KSBBUyBuZXRfb3V0X21heCwKICAgICAgIEFWRyhuZXRfb3V0KSBBUyBuZXRfb3V0X2F2ZwpGUk9NICgKICBTRUxFQ1QgbWFjaGluZV9uYW1lLAogICAgICAgICBDT0FMRVNDRShjcHVfdXNlciwgMC4wKSBBUyBjcHUsCiAgICAgICAgIENPQUxFU0NFKGJ5dGVzX2luLCAwLjApIEFTIG5ldF9pbiwKICAgICAgICAgQ09BTEVTQ0UoYnl0ZXNfb3V0LCAwLjApIEFTIG5ldF9vdXQKICBGUk9NIG1nYmVuY2gubG9nczEKICBXSEVSRSBtYWNoaW5lX25hbWUgSU4gKCdhbmFuc2knLCdhcmFnb2cnLCd1cmQnKQogICAgQU5EIGxvZ190aW1lID49IFRJTUVTVEFNUCAnMjAxNy0wMS0xMSAwMDowMDowMCcKKSBBUyByCkdST1VQIEJZIG1hY2hpbmVfbmFtZQ==).
[Оригинальная статья](https://clickhouse.tech/docs/ru/getting_started/example_datasets/brown-benchmark/) <!--hide-->

View File

@ -149,28 +149,48 @@ $ echo 'DROP TABLE t' | curl 'http://localhost:8123/' --data-binary @-
Для запросов, которые не возвращают таблицу с данными, в случае успеха, выдаётся пустое тело ответа.
Вы можете использовать внутренний формат сжатия Clickhouse при передаче данных. Формат сжатых данных нестандартный, и вам придётся использовать для работы с ним специальную программу `clickhouse-compressor` (устанавливается вместе с пакетом `clickhouse-client`). Для повышения эффективности вставки данных можно отключить проверку контрольной суммы на стороне сервера с помощью настройки[http_native_compression_disable_checksumming_on_decompress](../operations/settings/settings.md#settings-http_native_compression_disable_checksumming_on_decompress).
Если вы указали `compress = 1` в URL, то сервер сжимает данные, которые он отправляет.
Если вы указали `decompress = 1` в URL, сервер распаковывает те данные, которые вы передаёте методом `POST`.
## Сжатие {#compression}
Также, можно использовать [HTTP compression](https://en.wikipedia.org/wiki/HTTP_compression). Для отправки сжатого запроса `POST`, добавьте заголовок `Content-Encoding: compression_method`. Чтобы ClickHouse сжимал ответ, добавьте заголовок `Accept-Encoding: compression_method`. ClickHouse поддерживает следующие [методы сжатия](https://en.wikipedia.org/wiki/HTTP_compression#Content-Encoding_tokens): `gzip`, `br`, and `deflate`. Чтобы включить HTTP compression, используйте настройку ClickHouse [enable_http_compression](../operations/settings/settings.md#settings-enable_http_compression). Уровень сжатия данных для всех методов сжатия можно настроить с помощью настройки [http_zlib_compression_level](#settings-http_zlib_compression_level).
Сжатие можно использовать для уменьшения трафика по сети при передаче большого количества данных, а также для создания сразу сжатых дампов.
Это может быть использовано для уменьшения трафика по сети при передаче большого количества данных, а также для создания сразу сжатых дампов.
Вы можете использовать внутренний формат сжатия Clickhouse при передаче данных. Формат сжатых данных нестандартный, и вам придётся использовать для работы с ним специальную программу `clickhouse-compressor`. Она устанавливается вместе с пакетом `clickhouse-client`. Для повышения эффективности вставки данных можно отключить проверку контрольной суммы на стороне сервера с помощью настройки [http_native_compression_disable_checksumming_on_decompress](../operations/settings/settings.md#settings-http_native_compression_disable_checksumming_on_decompress).
Примеры отправки данных со сжатием:
Если вы указали `compress=1` в URL, то сервер сжимает данные, которые он отправляет. Если вы указали `decompress=1` в URL, сервер распаковывает те данные, которые вы передаёте методом `POST`.
``` bash
$ #Отправка данных на сервер:
$ curl -vsS "http://localhost:8123/?enable_http_compression=1" -d 'SELECT number FROM system.numbers LIMIT 10' -H 'Accept-Encoding: gzip'
Также можно использовать [сжатие HTTP](https://en.wikipedia.org/wiki/HTTP_compression). ClickHouse поддерживает следующие [методы сжатия](https://en.wikipedia.org/wiki/HTTP_compression#Content-Encoding_tokens):
$ #Отправка данных клиенту:
$ echo "SELECT 1" | gzip -c | curl -sS --data-binary @- -H 'Content-Encoding: gzip' 'http://localhost:8123/'
```
- `gzip`
- `br`
- `deflate`
- `xz`
Для отправки сжатого запроса `POST`, добавьте заголовок `Content-Encoding: compression_method`.
Чтобы ClickHouse сжимал ответ, разрешите сжатие настройкой [enable_http_compression](../operations/settings/settings.md#settings-enable_http_compression) и добавьте заголовок `Accept-Encoding: compression_method`. Уровень сжатия данных для всех методов сжатия можно задать с помощью настройки [http_zlib_compression_level](../operations/settings/settings.md#settings-http_zlib_compression_level).
!!! note "Примечание"
Некоторые HTTP-клиенты могут по умолчанию распаковывать данные (`gzip` и `deflate`) с сервера в фоновом режиме и вы можете получить распакованные данные, даже если правильно используете настройки сжатия.
**Примеры**
``` bash
# Отправка сжатых данных на сервер
$ echo "SELECT 1" | gzip -c | \
curl -sS --data-binary @- -H 'Content-Encoding: gzip' 'http://localhost:8123/'
```
``` bash
# Получение сжатых данных с сервера
$ curl -vsS "http://localhost:8123/?enable_http_compression=1" \
-H 'Accept-Encoding: gzip' --output result.gz -d 'SELECT number FROM system.numbers LIMIT 3'
$ zcat result.gz
0
1
2
```
## База данных по умолчанию {#default-database}
Вы можете использовать параметр URL `database` или заголовок `X-ClickHouse-Database`, чтобы указать БД по умолчанию.
``` bash

View File

@ -22,6 +22,7 @@ toc_title: "\u041a\u043b\u0438\u0435\u043d\u0442\u0441\u043a\u0438\u0435\u0020\u
- [seva-code/php-click-house-client](https://packagist.org/packages/seva-code/php-click-house-client)
- [SeasClick C++ client](https://github.com/SeasX/SeasClick)
- [glushkovds/phpclickhouse-laravel](https://packagist.org/packages/glushkovds/phpclickhouse-laravel)
- [kolya7k ClickHouse PHP extension](https://github.com//kolya7k/clickhouse-php)
- Go
- [clickhouse](https://github.com/kshvakov/clickhouse/)
- [go-clickhouse](https://github.com/roistat/go-clickhouse)

View File

@ -0,0 +1,29 @@
---
toc_priority: 65
toc_title: Кеши
---
# Типы кеша {#cache-types}
При выполнении запросов ClickHouse использует различные типы кеша.
Основные типы кеша:
- `mark_cache` — кеш засечек, используемых движками таблиц семейства [MergeTree](../engines/table-engines/mergetree-family/mergetree.md).
- `uncompressed_cache` — кеш несжатых данных, используемых движками таблиц семейства [MergeTree](../engines/table-engines/mergetree-family/mergetree.md).
Дополнительные типы кеша:
- DNS-кеш.
- Кеш данных формата [regexp](../interfaces/formats.md#data-format-regexp).
- Кеш скомпилированных выражений.
- Кеш схем формата [Avro](../interfaces/formats.md#data-format-avro).
- Кеш данных в [словарях](../sql-reference/dictionaries/index.md).
Непрямое использование:
- Кеш страницы ОС.
Чтобы очистить кеш, используйте выражение [SYSTEM DROP ... CACHE](../sql-reference/statements/system.md).
[Оригинальная статья](https://clickhouse.tech/docs/ru/operations/caches/) <!--hide-->

View File

@ -1937,6 +1937,21 @@ SELECT idx, i FROM null_in WHERE i IN (1, NULL) SETTINGS transform_null_in = 1;
Значение по умолчанию: 16.
## background_message_broker_schedule_pool_size {#background_message_broker_schedule_pool_size}
Задает количество потоков для фонового потокового вывода сообщений. Настройка применяется при запуске сервера ClickHouse и не может быть изменена в пользовательском сеансе.
Допустимые значения:
- Положительное целое число.
Значение по умолчанию: 16.
**Смотрите также**
- Движок [Kafka](../../engines/table-engines/integrations/kafka.md#kafka).
- Движок [RabbitMQ](../../engines/table-engines/integrations/rabbitmq.md#rabbitmq-engine).
## format_avro_schema_registry_url {#format_avro_schema_registry_url}
Задает URL реестра схем [Confluent](https://docs.confluent.io/current/schema-registry/index.html) для использования с форматом [AvroConfluent](../../interfaces/formats.md#data-format-avro-confluent).
@ -2537,4 +2552,15 @@ SELECT * FROM test2;
Обратите внимание на то, что эта настройка влияет на поведение [материализованных представлений](../../sql-reference/statements/create/view.md#materialized) и БД [MaterializeMySQL](../../engines/database-engines/materialize-mysql.md).
## allow_experimental_geo_types {#allow-experimental-geo-types}
Разрешает использование экспериментальных типов данных для работы с [географическими структурами](../../sql-reference/data-types/geo.md).
Возможные значения:
- 0 — Использование типов данных для работы с географическими структурами не поддерживается.
- 1 — Использование типов данных для работы с географическими структурами поддерживается.
Значение по умолчанию: `0`.
[Оригинальная статья](https://clickhouse.tech/docs/ru/operations/settings/settings/) <!--hide-->

View File

@ -14,7 +14,7 @@
- `initiator` ([String](../../sql-reference/data-types/string.md)) — узел, выполнивший запрос.
- `query_start_time` ([DateTime](../../sql-reference/data-types/datetime.md)) — время начала запроса.
- `query_finish_time` ([DateTime](../../sql-reference/data-types/datetime.md)) — время окончания запроса.
- `query_duration_ms` ([UInt64](../../sql-reference/data-types/datetime64.md)) — продолжительность выполнения запроса (в миллисекундах).
- `query_duration_ms` ([UInt64](../../sql-reference/data-types/int-uint.md)) — продолжительность выполнения запроса (в миллисекундах).
- `exception_code` ([Enum8](../../sql-reference/data-types/enum.md)) — код исключения из [ZooKeeper](../../operations/tips.md#zookeeper).
**Пример**

View File

@ -9,25 +9,54 @@ toc_title: "\u0421\u0438\u0441\u0442\u0435\u043c\u043d\u044b\u0435\u0020\u0442\u
Системные таблицы содержат информацию о:
- Состоянии сервера, процессов и окружении.
- Внутренних процессах сервера.
- состоянии сервера, процессов и окружении.
- внутренних процессах сервера.
Системные таблицы:
- Находятся в базе данных `system`.
- Доступны только для чтения данных.
- Не могут быть удалены или изменены, но их можно отсоединить.
- находятся в базе данных `system`.
- доступны только для чтения данных.
- не могут быть удалены или изменены, но их можно отсоединить.
Системные таблицы `metric_log`, `query_log`, `query_thread_log`, `trace_log` системные таблицы хранят данные в файловой системе. Остальные системные таблицы хранят свои данные в оперативной памяти. Сервер ClickHouse создает такие системные таблицы при запуске.
Большинство системных таблиц хранят свои данные в оперативной памяти. Сервер ClickHouse создает эти системные таблицы при старте.
В отличие от других системных таблиц, таблицы с системными логами [metric_log](../../operations/system-tables/metric_log.md), [query_log](../../operations/system-tables/query_log.md), [query_thread_log](../../operations/system-tables/query_thread_log.md), [trace_log](../../operations/system-tables/trace_log.md), [part_log](../../operations/system-tables/part_log.md), [crash_log](../../operations/system-tables/crash-log.md) и [text_log](../../operations/system-tables/text_log.md) используют движок таблиц [MergeTree](../../engines/table-engines/mergetree-family/mergetree.md) и по умолчанию хранят свои данные в файловой системе. Если удалить таблицу из файловой системы, сервер ClickHouse снова создаст пустую таблицу во время следующей записи данных. Если схема системной таблицы изменилась в новом релизе, то ClickHouse переименует текущую таблицу и создаст новую.
Таблицы с системными логами `log` можно настроить, создав конфигурационный файл с тем же именем, что и таблица в разделе `/etc/clickhouse-server/config.d/`, или указав соответствующие элементы в `/etc/clickhouse-server/config.xml`. Настраиваться могут следующие элементы:
- `database` — база данных, к которой принадлежит системная таблица. Эта опция на текущий момент устарела. Все системные таблицы находятся в базе данных `system`.
- `table` — таблица для добавления данных.
- `partition_by` — [ключ партиционирования](../../engines/table-engines/mergetree-family/custom-partitioning-key.md).
- `ttl` — [время жизни](../../sql-reference/statements/alter/ttl.md) таблицы.
- `flush_interval_milliseconds` — интервал сброса данных на диск, в миллисекундах.
- `engine` — полное имя движка (начиная с `ENGINE =` ) с параметрами. Эта опция противоречит `partition_by` и `ttl`. Если указать оба параметра вместе, сервер вернет ошибку и завершит работу.
Пример:
```xml
<yandex>
<query_log>
<database>system</database>
<table>query_log</table>
<partition_by>toYYYYMM(event_date)</partition_by>
<ttl>event_date + INTERVAL 30 DAY DELETE</ttl>
<!--
<engine>ENGINE = MergeTree PARTITION BY toYYYYMM(event_date) ORDER BY (event_date, event_time) SETTINGS index_granularity = 1024</engine>
-->
<flush_interval_milliseconds>7500</flush_interval_milliseconds>
</query_log>
</yandex>
```
По умолчанию размер таблицы не ограничен. Управлять размером таблицы можно используя [TTL](../../sql-reference/statements/alter/ttl.md#manipuliatsii-s-ttl-tablitsy) для удаления устаревших записей журнала. Также вы можете использовать функцию партиционирования для таблиц `MergeTree`.
### Источники системных показателей
Для сбора системных показателей сервер ClickHouse использует:
- Возможности `CAP_NET_ADMIN`.
- возможности `CAP_NET_ADMIN`.
- [procfs](https://ru.wikipedia.org/wiki/Procfs) (только Linux).
**procfs**
Если для сервера ClickHouse не включено `CAP_NET_ADMIN`, он пытается обратиться к `ProcfsMetricsProvider`. `ProcfsMetricsProvider` позволяет собирать системные показатели для каждого запроса (для CPU и I/O).

View File

@ -21,7 +21,8 @@ toc_title: clickhouse-local
Основной формат вызова:
``` bash
$ clickhouse-local --structure "table_structure" --input-format "format_of_incoming_data" -q "query"
$ clickhouse-local --structure "table_structure" --input-format "format_of_incoming_data" \
--query "query"
```
Ключи команды:
@ -76,7 +77,9 @@ $ clickhouse-local --query "
1 2
```
А теперь давайте выведем на экран объём оперативной памяти, занимаемой пользователями (Unix):
Объём оперативной памяти, занимаемой процессами, которые запустил пользователь (Unix):
Запрос:
``` bash
$ ps aux | tail -n +2 | awk '{ printf("%s\t%s\n", $1, $4) }' \
@ -85,6 +88,8 @@ $ ps aux | tail -n +2 | awk '{ printf("%s\t%s\n", $1, $4) }' \
FROM table GROUP BY user ORDER BY memTotal DESC FORMAT Pretty"
```
Результат:
``` text
Read 186 rows, 4.15 KiB in 0.035 sec., 5302 rows/sec., 118.34 KiB/sec.
┏━━━━━━━━━━┳━━━━━━━━━━┓

View File

@ -239,7 +239,7 @@ windowFunnel(window, [mode])(timestamp, cond1, cond2, ..., condN)
**Параметры**
- `window` — ширина скользящего окна по времени в секундах. [UInt](../../sql-reference/aggregate-functions/parametric-functions.md).
- `window` — ширина скользящего окна по времени. Единица измерения зависит от `timestamp` и может варьироваться. Должно соблюдаться условие `timestamp события cond2 <= timestamp события cond1 + window`.
- `mode` - необязательный параметр. Если установлено значение `'strict'`, то функция `windowFunnel()` применяет условия только для уникальных значений.
- `timestamp` — имя столбца, содержащего временные отметки. [Date](../../sql-reference/aggregate-functions/parametric-functions.md), [DateTime](../../sql-reference/aggregate-functions/parametric-functions.md#data_type-datetime) и другие параметры с типом `Integer`. В случае хранения меток времени в столбцах с типом `UInt64`, максимально допустимое значение соответствует ограничению для типа `Int64`, т.е. равно `2^63-1`.
- `cond` — условия или данные, описывающие цепочку событий. [UInt8](../../sql-reference/aggregate-functions/parametric-functions.md).

View File

@ -0,0 +1,106 @@
---
toc_priority: 62
toc_title: Географические структуры
---
# Типы данных для работы с географическими структурами {#geo-data-types}
ClickHouse поддерживает типы данных для отображения географических объектов — точек (местоположений), территорий и т.п.
!!! warning "Предупреждение"
Сейчас использование типов данных для работы с географическими структурами является экспериментальной возможностью. Чтобы использовать эти типы данных, включите настройку `allow_experimental_geo_types = 1`.
**См. также**
- [Хранение географических структур данных](https://ru.wikipedia.org/wiki/GeoJSON).
- Настройка [allow_experimental_geo_types](../../operations/settings/settings.md#allow-experimental-geo-types).
## Point {#point-data-type}
Тип `Point` (точка) определяется парой координат X и Y и хранится в виде кортежа [Tuple](tuple.md)([Float64](float.md), [Float64](float.md)).
**Пример**
Запрос:
```sql
SET allow_experimental_geo_types = 1;
CREATE TABLE geo_point (p Point) ENGINE = Memory();
INSERT INTO geo_point VALUES((10, 10));
SELECT p, toTypeName(p) FROM geo_point;
```
Результат:
``` text
┌─p─────┬─toTypeName(p)─┐
│ (10,10) │ Point │
└───────┴───────────────┘
```
## Ring {#ring-data-type}
Тип `Ring` описывает простой многоугольник без внутренних областей (дыр) и хранится в виде массива точек: [Array](array.md)([Point](#point-data-type)).
**Пример**
Запрос:
```sql
SET allow_experimental_geo_types = 1;
CREATE TABLE geo_ring (r Ring) ENGINE = Memory();
INSERT INTO geo_ring VALUES([(0, 0), (10, 0), (10, 10), (0, 10)]);
SELECT r, toTypeName(r) FROM geo_ring;
```
Результат:
``` text
┌─r─────────────────────────────┬─toTypeName(r)─┐
│ [(0,0),(10,0),(10,10),(0,10)] │ Ring │
└───────────────────────────────┴───────────────┘
```
## Polygon {#polygon-data-type}
Тип `Polygon` описывает многоугольник с внутренними областями (дырами) и хранится в виде массива: [Array](array.md)([Ring](#ring-data-type)). Первый элемент массива описывает внешний многоугольник (контур), а остальные элементы описывают дыры.
**Пример**
Запись в этой таблице описывает многоугольник с одной дырой:
```sql
SET allow_experimental_geo_types = 1;
CREATE TABLE geo_polygon (pg Polygon) ENGINE = Memory();
INSERT INTO geo_polygon VALUES([[(20, 20), (50, 20), (50, 50), (20, 50)], [(30, 30), (50, 50), (50, 30)]]);
SELECT pg, toTypeName(pg) FROM geo_polygon;
```
Результат:
``` text
┌─pg────────────────────────────────────────────────────────────┬─toTypeName(pg)─┐
│ [[(20,20),(50,20),(50,50),(20,50)],[(30,30),(50,50),(50,30)]] │ Polygon │
└───────────────────────────────────────────────────────────────┴────────────────┘
```
## MultiPolygon {#multipolygon-data-type}
Тип `MultiPolygon` описывает элемент, состоящий из нескольких простых многоугольников (полигональную сетку). Он хранится в виде массива многоугольников: [Array](array.md)([Polygon](#polygon-data-type)).
**Пример**
Запись в этой таблице описывает элемент, состоящий из двух многоугольников — первый без дыр, а второй с одной дырой:
```sql
SET allow_experimental_geo_types = 1;
CREATE TABLE geo_multipolygon (mpg MultiPolygon) ENGINE = Memory();
INSERT INTO geo_multipolygon VALUES([[[(0, 0), (10, 0), (10, 10), (0, 10)]], [[(20, 20), (50, 20), (50, 50), (20, 50)],[(30, 30), (50, 50), (50, 30)]]]);
SELECT mpg, toTypeName(mpg) FROM geo_multipolygon;
```
Result:
``` text
┌─mpg─────────────────────────────────────────────────────────────────────────────────────────────┬─toTypeName(mpg)─┐
│ [[[(0,0),(10,0),(10,10),(0,10)]],[[(20,20),(50,20),(50,50),(20,50)],[(30,30),(50,50),(50,30)]]] │ MultiPolygon │
└─────────────────────────────────────────────────────────────────────────────────────────────────┴─────────────────┘
```
[Оригинальная статья](https://clickhouse.tech/docs/ru/data-types/geo/) <!--hide-->

View File

@ -1,8 +1,9 @@
# SimpleAggregateFunction {#data-type-simpleaggregatefunction}
# SimpleAggregateFunction(func, type) {#data-type-simpleaggregatefunction}
`SimpleAggregateFunction(name, types_of_arguments…)` data type stores current value of the aggregate function, and does not store its full state as [`AggregateFunction`](../../sql-reference/data-types/aggregatefunction.md) does. This optimization can be applied to functions for which the following property holds: the result of applying a function `f` to a row set `S1 UNION ALL S2` can be obtained by applying `f` to parts of the row set separately, and then again applying `f` to the results: `f(S1 UNION ALL S2) = f(f(S1) UNION ALL f(S2))`. This property guarantees that partial aggregation results are enough to compute the combined one, so we dont have to store and process any extra data.
Хранит только текущее значение агрегатной функции и не сохраняет ее полное состояние, как это делает [`AggregateFunction`](../../sql-reference/data-types/aggregatefunction.md). Такая оптимизация может быть применена к функциям, которые обладают следующим свойством: результат выполнения функции `f` к набору строк `S1 UNION ALL S2` может быть получен путем выполнения `f` к отдельным частям набора строк,
а затем повторного выполнения `f` к результатам: `f(S1 UNION ALL S2) = f(f(S1) UNION ALL f(S2))`. Это свойство гарантирует, что результатов частичной агрегации достаточно для вычисления комбинированной, поэтому хранить и обрабатывать какие-либо дополнительные данные не требуется.
The following aggregate functions are supported:
Поддерживаются следующие агрегатные функции:
- [`any`](../../sql-reference/aggregate-functions/reference/any.md#agg_function-any)
- [`anyLast`](../../sql-reference/aggregate-functions/reference/anylast.md#anylastx)
@ -15,22 +16,24 @@ The following aggregate functions are supported:
- [`groupBitXor`](../../sql-reference/aggregate-functions/reference/groupbitxor.md#groupbitxor)
- [`groupArrayArray`](../../sql-reference/aggregate-functions/reference/grouparray.md#agg_function-grouparray)
- [`groupUniqArrayArray`](../../sql-reference/aggregate-functions/reference/groupuniqarray.md#groupuniqarray)
- [`sumMap`](../../sql-reference/aggregate-functions/reference/summap.md#agg_functions-summap)
- [`minMap`](../../sql-reference/aggregate-functions/reference/minmap.md#agg_functions-minmap)
- [`maxMap`](../../sql-reference/aggregate-functions/reference/maxmap.md#agg_functions-maxmap)
Values of the `SimpleAggregateFunction(func, Type)` look and stored the same way as `Type`, so you do not need to apply functions with `-Merge`/`-State` suffixes. `SimpleAggregateFunction` has better performance than `AggregateFunction` with same aggregation function.
!!! note "Примечание"
Значения `SimpleAggregateFunction(func, Type)` отображаются и хранятся так же, как и `Type`, поэтому комбинаторы [-Merge](../../sql-reference/aggregate-functions/combinators.md#aggregate_functions_combinators-merge) и [-State]((../../sql-reference/aggregate-functions/combinators.md#agg-functions-combinator-state) не требуются.
**Parameters**
`SimpleAggregateFunction` имеет лучшую производительность, чем `AggregateFunction` с той же агрегатной функцией.
- Name of the aggregate function.
- Types of the aggregate function arguments.
**Параметры**
**Example**
- `func` — имя агрегатной функции.
- `type` — типы аргументов агрегатной функции.
**Пример**
``` sql
CREATE TABLE t
(
column1 SimpleAggregateFunction(sum, UInt64),
column2 SimpleAggregateFunction(any, String)
) ENGINE = ...
CREATE TABLE simple (id UInt64, val SimpleAggregateFunction(sum, Double)) ENGINE=AggregatingMergeTree ORDER BY id;
```
[Original article](https://clickhouse.tech/docs/en/data_types/simpleaggregatefunction/) <!--hide-->
[Оригинальная статья](https://clickhouse.tech/docs/en/sql-reference/data-types/simpleaggregatefunction/) <!--hide-->

View File

@ -572,7 +572,7 @@ SOURCE(CLICKHOUSE(
или
``` sql
SOURCE(MONGO(
SOURCE(MONGODB(
host 'localhost'
port 27017
user ''

View File

@ -1355,6 +1355,52 @@ SELECT arrayAvg(x -> (x * x), [2, 4]) AS res;
└─────┘
```
**Синтаксис**
``` sql
arraySum(arr)
```
**Возвращаемое значение**
- Число.
Тип: [Int](../../sql-reference/data-types/int-uint.md) или [Float](../../sql-reference/data-types/float.md).
**Параметры**
- `arr` — [Массив](../../sql-reference/data-types/array.md).
**Примеры**
Запрос:
```sql
SELECT arraySum([2,3]) AS res;
```
Результат:
``` text
┌─res─┐
│ 5 │
└─────┘
```
Запрос:
``` sql
SELECT arraySum(x -> x*x, [2, 3]) AS res;
```
Результат:
``` text
┌─res─┐
│ 13 │
└─────┘
```
## arrayCumSum(\[func,\] arr1, …) {#arraycumsumfunc-arr1}
Возвращает массив из частичных сумм элементов исходного массива (сумма с накоплением). Если указана функция `func`, то значения элементов массива преобразуются этой функцией перед суммированием.

View File

@ -63,40 +63,58 @@ int32samoa: 1546300800
Переводит дату или дату-с-временем в число типа UInt16, содержащее номер года (AD).
Синоним: `YEAR`.
## toQuarter {#toquarter}
Переводит дату или дату-с-временем в число типа UInt8, содержащее номер квартала.
Синоним: `QUARTER`.
## toMonth {#tomonth}
Переводит дату или дату-с-временем в число типа UInt8, содержащее номер месяца (1-12).
Синоним: `MONTH`.
## toDayOfYear {#todayofyear}
Переводит дату или дату-с-временем в число типа UInt16, содержащее номер дня года (1-366).
Синоним: `DAYOFYEAR`.
## toDayOfMonth {#todayofmonth}
Переводит дату или дату-с-временем в число типа UInt8, содержащее номер дня в месяце (1-31).
Синонимы: `DAYOFMONTH`, `DAY`.
## toDayOfWeek {#todayofweek}
Переводит дату или дату-с-временем в число типа UInt8, содержащее номер дня в неделе (понедельник - 1, воскресенье - 7).
Синоним: `DAYOFWEEK`.
## toHour {#tohour}
Переводит дату-с-временем в число типа UInt8, содержащее номер часа в сутках (0-23).
Функция исходит из допущения, что перевод стрелок вперёд, если осуществляется, то на час, в два часа ночи, а перевод стрелок назад, если осуществляется, то на час, в три часа ночи (что, в общем, не верно - даже в Москве два раза перевод стрелок был осуществлён в другое время).
Синоним: `HOUR`.
## toMinute {#tominute}
Переводит дату-с-временем в число типа UInt8, содержащее номер минуты в часе (0-59).
Синоним: `MINUTE`.
## toSecond {#tosecond}
Переводит дату-с-временем в число типа UInt8, содержащее номер секунды в минуте (0-59).
Секунды координации не учитываются.
Синоним: `SECOND`.
## toUnixTimestamp {#to-unix-timestamp}
Переводит дату-с-временем в число типа UInt32 -- Unix Timestamp (https://en.wikipedia.org/wiki/Unix_time).
@ -305,7 +323,9 @@ WITH toDateTime64('2020-01-01 10:20:30.999', 3) AS dt64 SELECT toStartOfSecond(d
Переводит дату-с-временем или дату в число типа UInt16, содержащее номер ISO года. ISO год отличается от обычного года, потому что в соответствии с [ISO 8601:1988](https://en.wikipedia.org/wiki/ISO_8601) ISO год начинается необязательно первого января.
Пример:
**Пример**
Запрос:
```sql
SELECT
@ -313,6 +333,9 @@ SELECT
toYear(date),
toISOYear(date)
```
Результат:
```text
┌───────date─┬─toYear(toDate('2017-01-01'))─┬─toISOYear(toDate('2017-01-01'))─┐
│ 2017-01-01 │ 2017 │ 2016 │
@ -326,12 +349,18 @@ SELECT
1 Января 2017 г. - воскресение, т.е. первая ISO неделя 2017 года началась в понедельник 2 января, поэтому 1 января 2017 это последняя неделя 2016 года.
**Пример**
Запрос:
```sql
SELECT
toISOWeek(toDate('2017-01-01')) AS ISOWeek20170101,
toISOWeek(toDate('2017-01-02')) AS ISOWeek20170102
```
Результат:
```text
┌─ISOWeek20170101─┬─ISOWeek20170102─┐
│ 52 │ 1 │
@ -368,10 +397,14 @@ SELECT
**Пример**
Запрос:
```sql
SELECT toDate('2016-12-27') AS date, toWeek(date) AS week0, toWeek(date,1) AS week1, toWeek(date,9) AS week9;
```
Результат:
```text
┌───────date─┬─week0─┬─week1─┬─week9─┐
│ 2016-12-27 │ 52 │ 52 │ 1 │
@ -387,10 +420,14 @@ SELECT toDate('2016-12-27') AS date, toWeek(date) AS week0, toWeek(date,1) AS we
**Пример**
Запрос:
```sql
SELECT toDate('2016-12-27') AS date, toYearWeek(date) AS yearWeek0, toYearWeek(date,1) AS yearWeek1, toYearWeek(date,9) AS yearWeek9;
```
Результат:
```text
┌───────date─┬─yearWeek0─┬─yearWeek1─┬─yearWeek9─┐
│ 2016-12-27 │ 201652 │ 201652 │ 201701 │
@ -573,7 +610,7 @@ dateDiff('unit', startdate, enddate, [timezone])
SELECT dateDiff('hour', toDateTime('2018-01-01 22:00:00'), toDateTime('2018-01-02 23:00:00'));
```
Ответ:
Результат:
``` text
┌─dateDiff('hour', toDateTime('2018-01-01 22:00:00'), toDateTime('2018-01-02 23:00:00'))─┐
@ -654,10 +691,10 @@ formatDateTime(Time, Format\[, Timezone\])
Запрос:
``` sql
SELECT formatDateTime(toDate('2010-01-04'), '%g')
SELECT formatDateTime(toDate('2010-01-04'), '%g');
```
Ответ:
Результат:
```
┌─formatDateTime(toDate('2010-01-04'), '%g')─┐
@ -665,4 +702,43 @@ SELECT formatDateTime(toDate('2010-01-04'), '%g')
└────────────────────────────────────────────┘
```
## FROM\_UNIXTIME {#fromunixtime}
Функция преобразует Unix timestamp в календарную дату и время.
**Примеры**
Если указан только один аргумент типа [Integer](../../sql-reference/data-types/int-uint.md), то функция действует так же, как [toDateTime](../../sql-reference/functions/type-conversion-functions.md#todatetime), и возвращает тип [DateTime](../../sql-reference/data-types/datetime.md).
Запрос:
```sql
SELECT FROM_UNIXTIME(423543535);
```
Результат:
```text
┌─FROM_UNIXTIME(423543535)─┐
│ 1983-06-04 10:58:55 │
└──────────────────────────┘
```
В случае, когда есть два аргумента: первый типа [Integer](../../sql-reference/data-types/int-uint.md) или [DateTime](../../sql-reference/data-types/datetime.md), а второй является строкой постоянного формата — функция работает также, как [formatDateTime](#formatdatetime), и возвращает значение типа [String](../../sql-reference/data-types/string.md#string).
Запрос:
```sql
SELECT FROM_UNIXTIME(1234334543, '%Y-%m-%d %R:%S') AS DateTime;
```
Результат:
```text
┌─DateTime────────────┐
│ 2009-02-11 14:42:23 │
└─────────────────────┘
```
[Оригинальная статья](https://clickhouse.tech/docs/ru/query_language/functions/date_time_functions/) <!--hide-->

View File

@ -75,6 +75,8 @@ SELECT char(0xE4, 0xBD, 0xA0, 0xE5, 0xA5, 0xBD) AS hello;
Returns a string containing the arguments hexadecimal representation.
Синоним: `HEX`.
**Syntax**
``` sql

View File

@ -11,7 +11,7 @@ toc_title: "\u0424\u0443\u043d\u043a\u0446\u0438\u0438 \u0434\u043b\u044f \u0448
Длина инициализирующего вектора всегда 16 байт (лишнии байты игнорируются).
Обратите внимание, что эти функции работают медленно.
Обратите внимание, что до версии Clickhouse 21.1 эти функции работали медленно.
## encrypt {#encrypt}
@ -41,7 +41,7 @@ encrypt('mode', 'plaintext', 'key' [, iv, aad])
**Возвращаемое значение**
- Зашифрованная строка. [String](../../sql-reference/data-types/string.md#string).
- Бинарная зашифрованная строка. [String](../../sql-reference/data-types/string.md#string).
**Примеры**
@ -52,57 +52,38 @@ encrypt('mode', 'plaintext', 'key' [, iv, aad])
``` sql
CREATE TABLE encryption_test
(
input String,
key String DEFAULT unhex('fb9958e2e897ef3fdb49067b51a24af645b3626eed2f9ea1dc7fd4dd71b7e38f9a68db2a3184f952382c783785f9d77bf923577108a88adaacae5c141b1576b0'),
iv String DEFAULT unhex('8CA3554377DFF8A369BC50A89780DD85'),
key32 String DEFAULT substring(key, 1, 32),
key24 String DEFAULT substring(key, 1, 24),
key16 String DEFAULT substring(key, 1, 16)
) Engine = Memory;
`comment` String,
`secret` String
)
ENGINE = Memory;
```
Вставим эти данные:
Вставим некоторые данные (замечание: не храните ключи или инициализирующие векторы в базе данных, так как это компрометирует всю концепцию шифрования), также хранение "подсказок" небезопасно и используется только для наглядности:
Запрос:
``` sql
INSERT INTO encryption_test (input) VALUES (''), ('text'), ('What Is ClickHouse?');
INSERT INTO encryption_test VALUES('aes-256-cfb128 no IV', encrypt('aes-256-cfb128', 'Secret', '12345678910121314151617181920212')),\
('aes-256-cfb128 no IV, different key', encrypt('aes-256-cfb128', 'Secret', 'keykeykeykeykeykeykeykeykeykeyke')),\
('aes-256-cfb128 with IV', encrypt('aes-256-cfb128', 'Secret', '12345678910121314151617181920212', 'iviviviviviviviv')),\
('aes-256-cbc no IV', encrypt('aes-256-cbc', 'Secret', '12345678910121314151617181920212'));
```
Пример без `iv`:
Запрос:
``` sql
SELECT 'aes-128-ecb' AS mode, hex(encrypt(mode, input, key16)) FROM encryption_test;
SELECT comment, hex(secret) FROM encryption_test;
```
Результат:
``` text
┌─mode────────┬─hex(encrypt('aes-128-ecb', input, key16))────────────────────────┐
│ aes-128-ecb │ 4603E6862B0D94BBEC68E0B0DF51D60F │
│ aes-128-ecb │ 3004851B86D3F3950672DE7085D27C03 │
│ aes-128-ecb │ E807F8C8D40A11F65076361AFC7D8B68D8658C5FAA6457985CAA380F16B3F7E4 │
└─────────────┴──────────────────────────────────────────────────────────────────┘
```
Пример с `iv`:
Запрос:
``` sql
SELECT 'aes-256-ctr' AS mode, hex(encrypt(mode, input, key32, iv)) FROM encryption_test;
```
Результат:
``` text
┌─mode────────┬─hex(encrypt('aes-256-ctr', input, key32, iv))─┐
│ aes-256-ctr │ │
│ aes-256-ctr │ 7FB039F7 │
│ aes-256-ctr │ 5CBD20F7ABD3AC41FCAA1A5C0E119E2B325949 │
└─────────────┴───────────────────────────────────────────────┘
┌─comment─────────────────────────────┬─hex(secret)──────────────────────┐
│ aes-256-cfb128 no IV │ B4972BDC4459 │
│ aes-256-cfb128 no IV, different key │ 2FF57C092DC9 │
│ aes-256-cfb128 with IV │ 5E6CB398F653 │
│ aes-256-cbc no IV │ 1BC0629A92450D9E73A00E7D02CF4142 │
└─────────────────────────────────────┴──────────────────────────────────┘
```
Пример в режиме `-gcm`:
@ -110,41 +91,27 @@ SELECT 'aes-256-ctr' AS mode, hex(encrypt(mode, input, key32, iv)) FROM encrypti
Запрос:
``` sql
SELECT 'aes-256-gcm' AS mode, hex(encrypt(mode, input, key32, iv)) FROM encryption_test;
INSERT INTO encryption_test VALUES('aes-256-gcm', encrypt('aes-256-gcm', 'Secret', '12345678910121314151617181920212', 'iviviviviviviviv')), \
('aes-256-gcm with AAD', encrypt('aes-256-gcm', 'Secret', '12345678910121314151617181920212', 'iviviviviviviviv', 'aad'));
SELECT comment, hex(secret) FROM encryption_test WHERE comment LIKE '%gcm%';
```
Результат:
``` text
┌─mode────────┬─hex(encrypt('aes-256-gcm', input, key32, iv))──────────────────────────┐
│ aes-256-gcm │ E99DBEBC01F021758352D7FBD9039EFA │
│ aes-256-gcm │ 8742CE3A7B0595B281C712600D274CA881F47414 │
│ aes-256-gcm │ A44FD73ACEB1A64BDE2D03808A2576EDBB60764CC6982DB9AF2C33C893D91B00C60DC5 │
└─────────────┴────────────────────────────────────────────────────────────────────────┘
```
Пример в режиме `-gcm` и с `aad`:
Запрос:
``` sql
SELECT 'aes-192-gcm' AS mode, hex(encrypt(mode, input, key24, iv, 'AAD')) FROM encryption_test;
```
Результат:
``` text
┌─mode────────┬─hex(encrypt('aes-192-gcm', input, key24, iv, 'AAD'))───────────────────┐
│ aes-192-gcm │ 04C13E4B1D62481ED22B3644595CB5DB │
│ aes-192-gcm │ 9A6CF0FD2B329B04EAD18301818F016DF8F77447 │
│ aes-192-gcm │ B961E9FD9B940EBAD7ADDA75C9F198A40797A5EA1722D542890CC976E21113BBB8A7AA │
└─────────────┴────────────────────────────────────────────────────────────────────────┘
┌─comment──────────────┬─hex(secret)──────────────────────────────────┐
│ aes-256-gcm │ A8A3CCBC6426CFEEB60E4EAE03D3E94204C1B09E0254 │
│ aes-256-gcm with AAD │ A8A3CCBC6426D9A1017A0A932322F1852260A4AD6837 │
└──────────────────────┴──────────────────────────────────────────────┘
```
## aes_encrypt_mysql {#aes_encrypt_mysql}
Совместима с шифрованием myqsl, результат может быть расшифрован функцией [AES_DECRYPT](https://dev.mysql.com/doc/refman/8.0/en/encryption-functions.html#function_aes-decrypt).
При одинаковых входящих значениях зашифрованный текст будет совпадать с результатом, возвращаемым функцией `encrypt`. Однако если `key` или `iv` длиннее, чем должны быть, `aes_encrypt_mysql` будет работать аналогично функции `aes_encrypt` в MySQL: свернет ключ и проигнорирует лишнюю часть `iv`.
Функция поддерживает шифрофание данных следующими режимами:
- aes-128-ecb, aes-192-ecb, aes-256-ecb
@ -156,7 +123,7 @@ SELECT 'aes-192-gcm' AS mode, hex(encrypt(mode, input, key24, iv, 'AAD')) FROM e
**Синтаксис**
```sql
``` sql
aes_encrypt_mysql('mode', 'plaintext', 'key' [, iv])
```
@ -164,78 +131,96 @@ aes_encrypt_mysql('mode', 'plaintext', 'key' [, iv])
- `mode` — режим шифрования. [String](../../sql-reference/data-types/string.md#string).
- `plaintext` — текст, который будет зашифрован. [String](../../sql-reference/data-types/string.md#string).
- `key` — ключ шифрования. [String](../../sql-reference/data-types/string.md#string).
- `iv` — инициализирующий вектор. Необязателен. [String](../../sql-reference/data-types/string.md#string).
- `key` — ключ шифрования. Если ключ длиннее, чем требует режим шифрования, производится специфичная для MySQL свертка ключа. [String](../../sql-reference/data-types/string.md#string).
- `iv` — инициализирующий вектор. Необязателен, учитываются только первые 16 байтов. [String](../../sql-reference/data-types/string.md#string).
**Возвращаемое значение**
- Зашифрованная строка. [String](../../sql-reference/data-types/string.md#string).
- Бинарная зашифрованная строка. [String](../../sql-reference/data-types/string.md#string).
**Примеры**
Создадим такую таблицу:
При одинаковых входящих значениях результаты шифрования у функций `encrypt` и `aes_encrypt_mysql` совпадают.
Запрос:
``` sql
CREATE TABLE encryption_test
(
input String,
key String DEFAULT unhex('fb9958e2e897ef3fdb49067b51a24af645b3626eed2f9ea1dc7fd4dd71b7e38f9a68db2a3184f952382c783785f9d77bf923577108a88adaacae5c141b1576b0'),
iv String DEFAULT unhex('8CA3554377DFF8A369BC50A89780DD85'),
key32 String DEFAULT substring(key, 1, 32),
key24 String DEFAULT substring(key, 1, 24),
key16 String DEFAULT substring(key, 1, 16)
) Engine = Memory;
```
Вставим эти данные:
Запрос:
``` sql
INSERT INTO encryption_test (input) VALUES (''), ('text'), ('What Is ClickHouse?');
```
Пример без `iv`:
Запрос:
``` sql
SELECT 'aes-128-cbc' AS mode, hex(aes_encrypt_mysql(mode, input, key32)) FROM encryption_test;
SELECT encrypt('aes-256-cfb128', 'Secret', '12345678910121314151617181920212', 'iviviviviviviviv') = aes_encrypt_mysql('aes-256-cfb128', 'Secret', '12345678910121314151617181920212', 'iviviviviviviviv') AS ciphertexts_equal;
```
Результат:
``` text
┌─mode────────┬─hex(aes_encrypt_mysql('aes-128-cbc', input, key32))──────────────┐
│ aes-128-cbc │ FEA8CFDE6EE2C6E7A2CC6ADDC9F62C83 │
│ aes-128-cbc │ 78B16CD4BE107660156124C5FEE6454A │
│ aes-128-cbc │ 67C0B119D96F18E2823968D42871B3D179221B1E7EE642D628341C2B29BA2E18 │
└─────────────┴──────────────────────────────────────────────────────────────────┘
┌─ciphertexts_equal─┐
│ 1 │
└───────────────────┘
```
Пример с `iv`:
Функция `encrypt` генерирует исключение, если `key` или `iv` длиннее чем нужно:
Запрос:
``` sql
SELECT 'aes-256-cfb128' AS mode, hex(aes_encrypt_mysql(mode, input, key32, iv)) FROM encryption_test;
SELECT encrypt('aes-256-cfb128', 'Secret', '123456789101213141516171819202122', 'iviviviviviviviv123');
```
Результат:
``` text
┌─mode───────────┬─hex(aes_encrypt_mysql('aes-256-cfb128', input, key32, iv))─┐
│ aes-256-cfb128 │ │
│ aes-256-cfb128 │ 7FB039F7 │
│ aes-256-cfb128 │ 5CBD20F7ABD3AC41FCAA1A5C0E119E2BB5174F │
└────────────────┴────────────────────────────────────────────────────────────┘
Received exception from server (version 21.1.2):
Code: 36. DB::Exception: Received from localhost:9000. DB::Exception: Invalid key size: 33 expected 32: While processing encrypt('aes-256-cfb128', 'Secret', '123456789101213141516171819202122', 'iviviviviviviviv123').
```
Однако функция `aes_encrypt_mysql` в аналогичном случае возвращает результат, который может быть обработан MySQL:
Запрос:
``` sql
SELECT hex(aes_encrypt_mysql('aes-256-cfb128', 'Secret', '123456789101213141516171819202122', 'iviviviviviviviv123')) AS ciphertext;
```
Результат:
```text
┌─ciphertext───┐
│ 24E9E4966469 │
└──────────────┘
```
Если передать `iv` еще длиннее, результат останется таким же:
Запрос:
``` sql
SELECT hex(aes_encrypt_mysql('aes-256-cfb128', 'Secret', '123456789101213141516171819202122', 'iviviviviviviviv123456')) AS ciphertext
```
Результат:
``` text
┌─ciphertext───┐
│ 24E9E4966469 │
└──────────────┘
```
Это совпадает с результатом, возвращаемым MySQL при таких же входящих значениях:
``` sql
mysql> SET block_encryption_mode='aes-256-cfb128';
Query OK, 0 rows affected (0.00 sec)
mysql> SELECT aes_encrypt('Secret', '123456789101213141516171819202122', 'iviviviviviviviv123456') as ciphertext;
+------------------------+
| ciphertext |
+------------------------+
| 0x24E9E4966469 |
+------------------------+
1 row in set (0.00 sec)
```
## decrypt {#decrypt}
Функция поддерживает расшифровку данных следующими режимами:
Функция расшифровывает зашифрованный текст и может работать в следующих режимах:
- aes-128-ecb, aes-192-ecb, aes-256-ecb
- aes-128-cbc, aes-192-cbc, aes-256-cbc
@ -247,7 +232,7 @@ SELECT 'aes-256-cfb128' AS mode, hex(aes_encrypt_mysql(mode, input, key32, iv))
**Синтаксис**
```sql
``` sql
decrypt('mode', 'ciphertext', 'key' [, iv, aad])
```
@ -265,52 +250,58 @@ decrypt('mode', 'ciphertext', 'key' [, iv, aad])
**Примеры**
Создадим такую таблицу:
Рассмотрим таблицу из примера для функции [encrypt](#encrypt).
Запрос:
``` sql
CREATE TABLE encryption_test
(
input String,
key String DEFAULT unhex('fb9958e2e897ef3fdb49067b51a24af645b3626eed2f9ea1dc7fd4dd71b7e38f9a68db2a3184f952382c783785f9d77bf923577108a88adaacae5c141b1576b0'),
iv String DEFAULT unhex('8CA3554377DFF8A369BC50A89780DD85'),
key32 String DEFAULT substring(key, 1, 32),
key24 String DEFAULT substring(key, 1, 24),
key16 String DEFAULT substring(key, 1, 16)
) Engine = Memory;
```
Вставим эти данные:
Запрос:
``` sql
INSERT INTO encryption_test (input) VALUES (''), ('text'), ('What Is ClickHouse?');
```
Запрос:
``` sql
SELECT 'aes-128-ecb' AS mode, decrypt(mode, encrypt(mode, input, key16), key16) FROM encryption_test;
SELECT comment, hex(secret) FROM encryption_test;
```
Результат:
```text
┌─mode────────┬─decrypt('aes-128-ecb', encrypt('aes-128-ecb', input, key16), key16)─┐
│ aes-128-ecb │ │
│ aes-128-ecb │ text │
│ aes-128-ecb │ What Is ClickHouse? │
└─────────────┴─────────────────────────────────────────────────────────────────────┘
``` text
┌─comment──────────────┬─hex(secret)──────────────────────────────────┐
│ aes-256-gcm │ A8A3CCBC6426CFEEB60E4EAE03D3E94204C1B09E0254 │
│ aes-256-gcm with AAD │ A8A3CCBC6426D9A1017A0A932322F1852260A4AD6837 │
└──────────────────────┴──────────────────────────────────────────────┘
┌─comment─────────────────────────────┬─hex(secret)──────────────────────┐
│ aes-256-cfb128 no IV │ B4972BDC4459 │
│ aes-256-cfb128 no IV, different key │ 2FF57C092DC9 │
│ aes-256-cfb128 with IV │ 5E6CB398F653 │
│ aes-256-cbc no IV │ 1BC0629A92450D9E73A00E7D02CF4142 │
└─────────────────────────────────────┴──────────────────────────────────┘
```
Теперь попытаемся расшифровать эти данные:
Запрос:
``` sql
SELECT comment, decrypt('aes-256-cfb128', secret, '12345678910121314151617181920212') as plaintext FROM encryption_test;
```
Результат:
``` text
┌─comment─────────────────────────────┬─plaintext─┐
│ aes-256-cfb128 no IV │ Secret │
│ aes-256-cfb128 no IV, different key │ <20>4<EFBFBD>
<20>
│ aes-256-cfb128 with IV │ <20><><EFBFBD>6<EFBFBD>~ │
│aes-256-cbc no IV │ <20>2*4<>h3c<33>4w<34><77>@
└─────────────────────────────────────┴───────────┘
```
Обратите внимание, что только часть данных была расшифрована верно. Оставшаяся часть расшифрована некорректно, так как при шифровании использовались другие значения `mode`, `key`, или `iv`.
## aes_decrypt_mysql {#aes_decrypt_mysql}
Совместима с шифрованием myqsl и может расшифровать данные, зашифрованные функцией [AES_ENCRYPT](https://dev.mysql.com/doc/refman/8.0/en/encryption-functions.html#function_aes-encrypt).
Функция поддерживает расшифровку данных следующими режимами:
При одинаковых входящих значениях расшифрованный текст будет совпадать с результатом, возвращаемым функцией `decrypt`. Однако если `key` или `iv` длиннее, чем должны быть, `aes_decrypt_mysql` будет работать аналогично функции `aes_decrypt` в MySQL: свернет ключ и проигнорирует лишнюю часть `iv`.
Функция поддерживает расшифровку данных в следующих режимах:
- aes-128-ecb, aes-192-ecb, aes-256-ecb
- aes-128-cbc, aes-192-cbc, aes-256-cbc
@ -321,7 +312,7 @@ SELECT 'aes-128-ecb' AS mode, decrypt(mode, encrypt(mode, input, key16), key16)
**Синтаксис**
```sql
``` sql
aes_decrypt_mysql('mode', 'ciphertext', 'key' [, iv])
```
@ -332,51 +323,39 @@ aes_decrypt_mysql('mode', 'ciphertext', 'key' [, iv])
- `key` — ключ шифрования. [String](../../sql-reference/data-types/string.md#string).
- `iv` — инициализирующий вектор. Необязателен. [String](../../sql-reference/data-types/string.md#string).
**Возвращаемое значение**
- Расшифрованная строка. [String](../../sql-reference/data-types/string.md#string).
**Примеры**
Создадим такую таблицу:
Расшифруем данные, которые до этого были зашифрованы в MySQL:
Запрос:
``` sql
CREATE TABLE encryption_test
(
input String,
key String DEFAULT unhex('fb9958e2e897ef3fdb49067b51a24af645b3626eed2f9ea1dc7fd4dd71b7e38f9a68db2a3184f952382c783785f9d77bf923577108a88adaacae5c141b1576b0'),
iv String DEFAULT unhex('8CA3554377DFF8A369BC50A89780DD85'),
key32 String DEFAULT substring(key, 1, 32),
key24 String DEFAULT substring(key, 1, 24),
key16 String DEFAULT substring(key, 1, 16)
) Engine = Memory;
```
mysql> SET block_encryption_mode='aes-256-cfb128';
Query OK, 0 rows affected (0.00 sec)
Вставим эти данные:
Запрос:
``` sql
INSERT INTO encryption_test (input) VALUES (''), ('text'), ('What Is ClickHouse?');
mysql> SELECT aes_encrypt('Secret', '123456789101213141516171819202122', 'iviviviviviviviv123456') as ciphertext;
+------------------------+
| ciphertext |
+------------------------+
| 0x24E9E4966469 |
+------------------------+
1 row in set (0.00 sec)
```
Запрос:
``` sql
SELECT 'aes-128-cbc' AS mode, aes_decrypt_mysql(mode, aes_encrypt_mysql(mode, input, key), key) FROM encryption_test;
SELECT aes_decrypt_mysql('aes-256-cfb128', unhex('24E9E4966469'), '123456789101213141516171819202122', 'iviviviviviviviv123456') AS plaintext;
```
Результат:
``` text
┌─mode────────┬─aes_decrypt_mysql('aes-128-cbc', aes_encrypt_mysql('aes-128-cbc', input, key), key)─┐
│ aes-128-cbc │ │
│ aes-128-cbc │ text │
│ aes-128-cbc │ What Is ClickHouse? │
└─────────────┴─────────────────────────────────────────────────────────────────────────────────────┘
┌─plaintext─┐
│ Secret │
└───────────┘
```
[Original article](https://clickhouse.tech/docs/ru/sql-reference/functions/encryption_functions/) <!--hide-->

View File

@ -13,6 +13,8 @@ toc_title: "\u0424\u0443\u043d\u043a\u0446\u0438\u0438\u0020\u0434\u043b\u044f\u
isNull(x)
```
Синоним: `ISNULL`.
**Параметры**
- `x` — значение с не составным типом данных.

View File

@ -9,10 +9,14 @@ toc_title: "\u0424\u0443\u043d\u043a\u0446\u0438\u0438\u0020\u0434\u043b\u044f\u
Принимает число типа UInt32. Интерпретирует его, как IPv4-адрес в big endian. Возвращает строку, содержащую соответствующий IPv4-адрес в формате A.B.C.D (числа в десятичной форме через точки).
Синоним: `INET_NTOA`.
## IPv4StringToNum(s) {#ipv4stringtonums}
Функция, обратная к IPv4NumToString. Если IPv4 адрес в неправильном формате, то возвращает 0.
Синоним: `INET_ATON`.
## IPv4NumToStringClassC(num) {#ipv4numtostringclasscnum}
Похоже на IPv4NumToString, но вместо последнего октета используется xxx.
@ -49,7 +53,11 @@ LIMIT 10
### IPv6NumToString(x) {#ipv6numtostringx}
Принимает значение типа FixedString(16), содержащее IPv6-адрес в бинарном виде. Возвращает строку, содержащую этот адрес в текстовом виде.
IPv6-mapped IPv4 адреса выводится в формате ::ffff:111.222.33.44. Примеры:
IPv6-mapped IPv4 адреса выводится в формате ::ffff:111.222.33.44.
Примеры: `INET6_NTOA`.
Примеры:
``` sql
SELECT IPv6NumToString(toFixedString(unhex('2A0206B8000000000000000000000011'), 16)) AS addr
@ -118,6 +126,8 @@ LIMIT 10
Функция, обратная к IPv6NumToString. Если IPv6 адрес в неправильном формате, то возвращает строку из нулевых байт.
HEX может быть в любом регистре.
Alias: `INET6_ATON`.
## IPv4ToIPv6(x) {#ipv4toipv6x}
Принимает число типа `UInt32`. Интерпретирует его, как IPv4-адрес в [big endian](https://en.wikipedia.org/wiki/Endianness). Возвращает значение `FixedString(16)`, содержащее адрес IPv6 в двоичном формате. Примеры:

View File

@ -95,6 +95,8 @@ SELECT toValidUTF8('\x61\xF0\x80\x80\x80b')
Повторяет строку определенное количество раз и объединяет повторяемые значения в одну строку.
Синоним: `REPEAT`.
**Синтаксис**
``` sql
@ -273,10 +275,14 @@ SELECT concat(key1, key2), sum(value) FROM key_val GROUP BY (key1, key2)
Производит кодирование строки s в base64-представление.
Синоним: `TO_BASE64`.
## base64Decode(s) {#base64decode}
Декодирует base64-представление s в исходную строку. При невозможности декодирования выбрасывает исключение
Синоним: `FROM_BASE64`.
## tryBase64Decode(s) {#trybase64decode}
Функционал аналогичен base64Decode, но при невозможности декодирования возвращает пустую строку.

View File

@ -176,4 +176,129 @@ select mapPopulateSeries([1,2,4], [11,22,44], 5) as res, toTypeName(res) as type
└──────────────────────────────┴───────────────────────────────────┘
```
[Оригинальная статья](https://clickhouse.tech/docs/en/query_language/functions/tuple-map-functions/) <!--hide-->
## mapContains {#mapcontains}
Определяет, содержит ли контейнер `map` ключ `key`.
**Синтаксис**
``` sql
mapContains(map, key)
```
**Параметры**
- `map` — контейнер Map. [Map](../../sql-reference/data-types/map.md).
- `key` — ключ. Тип соответстует типу ключей параметра `map`.
**Возвращаемое значение**
- `1` если `map` включает `key`, иначе `0`.
Тип: [UInt8](../../sql-reference/data-types/int-uint.md).
**Пример**
Запрос:
```sql
CREATE TABLE test (a Map(String,String)) ENGINE = Memory;
INSERT INTO test VALUES ({'name':'eleven','age':'11'}), ({'number':'twelve','position':'6.0'});
SELECT mapContains(a, 'name') FROM test;
```
Результат:
```text
┌─mapContains(a, 'name')─┐
│ 1 │
│ 0 │
└────────────────────────┘
```
## mapKeys {#mapkeys}
Возвращает все ключи контейнера `map`.
**Синтаксис**
```sql
mapKeys(map)
```
**Параметры**
- `map` — контейнер Map. [Map](../../sql-reference/data-types/map.md).
**Возвращаемое значение**
- Массив со всеми ключами контейнера `map`.
Тип: [Array](../../sql-reference/data-types/array.md).
**Пример**
Запрос:
```sql
CREATE TABLE test (a Map(String,String)) ENGINE = Memory;
INSERT INTO test VALUES ({'name':'eleven','age':'11'}), ({'number':'twelve','position':'6.0'});
SELECT mapKeys(a) FROM test;
```
Результат:
```text
┌─mapKeys(a)────────────┐
│ ['name','age'] │
│ ['number','position'] │
└───────────────────────┘
```
## mapValues {#mapvalues}
Возвращает все значения контейнера `map`.
**Синтаксис**
```sql
mapKeys(map)
```
**Параметры**
- `map` — контейнер Map. [Map](../../sql-reference/data-types/map.md).
**Возвращаемое значение**
- Массив со всеми значениями контейнера `map`.
Тип: [Array](../../sql-reference/data-types/array.md).
**Примеры**
Запрос:
```sql
CREATE TABLE test (a Map(String,String)) ENGINE = Memory;
INSERT INTO test VALUES ({'name':'eleven','age':'11'}), ({'number':'twelve','position':'6.0'});
SELECT mapValues(a) FROM test;
```
Результат:
```text
┌─mapValues(a)─────┐
│ ['eleven','11'] │
│ ['twelve','6.0'] │
└──────────────────┘
```
[Оригинальная статья](https://clickhouse.tech/docs/ru/sql-reference/functions/tuple-map-functions/) <!--hide-->

View File

@ -36,10 +36,14 @@ toc_title: "\u0424\u0443\u043d\u043a\u0446\u0438\u0438\u0020\u043f\u0440\u0435\u
**Пример**
Запрос:
``` sql
SELECT toInt64(nan), toInt32(32), toInt16('16'), toInt8(8.8)
SELECT toInt64(nan), toInt32(32), toInt16('16'), toInt8(8.8);
```
Результат:
``` text
┌─────────toInt64(nan)─┬─toInt32(32)─┬─toInt16('16')─┬─toInt8(8.8)─┐
│ -9223372036854775808 │ 32 │ 16 │ 8 │
@ -52,10 +56,14 @@ SELECT toInt64(nan), toInt32(32), toInt16('16'), toInt8(8.8)
**Пример**
Запрос:
``` sql
select toInt64OrZero('123123'), toInt8OrZero('123qwe123')
SELECT toInt64OrZero('123123'), toInt8OrZero('123qwe123');
```
Результат:
``` text
┌─toInt64OrZero('123123')─┬─toInt8OrZero('123qwe123')─┐
│ 123123 │ 0 │
@ -68,10 +76,14 @@ select toInt64OrZero('123123'), toInt8OrZero('123qwe123')
**Пример**
Запрос:
``` sql
select toInt64OrNull('123123'), toInt8OrNull('123qwe123')
SELECT toInt64OrNull('123123'), toInt8OrNull('123qwe123');
```
Результат:
``` text
┌─toInt64OrNull('123123')─┬─toInt8OrNull('123qwe123')─┐
│ 123123 │ ᴺᵁᴸᴸ │
@ -102,10 +114,14 @@ select toInt64OrNull('123123'), toInt8OrNull('123qwe123')
**Пример**
Запрос:
``` sql
SELECT toUInt64(nan), toUInt32(-32), toUInt16('16'), toUInt8(8.8)
SELECT toUInt64(nan), toUInt32(-32), toUInt16('16'), toUInt8(8.8);
```
Результат:
``` text
┌───────toUInt64(nan)─┬─toUInt32(-32)─┬─toUInt16('16')─┬─toUInt8(8.8)─┐
│ 9223372036854775808 │ 4294967264 │ 16 │ 8 │
@ -124,6 +140,8 @@ SELECT toUInt64(nan), toUInt32(-32), toUInt16('16'), toUInt8(8.8)
## toDate {#todate}
Cиноним: `DATE`.
## toDateOrZero {#todateorzero}
## toDateOrNull {#todateornull}
@ -168,20 +186,28 @@ SELECT toUInt64(nan), toUInt32(-32), toUInt16('16'), toUInt8(8.8)
**Примеры**
Запрос:
``` sql
SELECT toDecimal32OrNull(toString(-1.111), 5) AS val, toTypeName(val)
SELECT toDecimal32OrNull(toString(-1.111), 5) AS val, toTypeName(val);
```
Результат:
``` text
┌──────val─┬─toTypeName(toDecimal32OrNull(toString(-1.111), 5))─┐
│ -1.11100 │ Nullable(Decimal(9, 5)) │
└──────────┴────────────────────────────────────────────────────┘
```
Запрос:
``` sql
SELECT toDecimal32OrNull(toString(-1.111), 2) AS val, toTypeName(val)
SELECT toDecimal32OrNull(toString(-1.111), 2) AS val, toTypeName(val);
```
Результат:
``` text
┌──val─┬─toTypeName(toDecimal32OrNull(toString(-1.111), 2))─┐
│ ᴺᵁᴸᴸ │ Nullable(Decimal(9, 2)) │
@ -213,20 +239,28 @@ SELECT toDecimal32OrNull(toString(-1.111), 2) AS val, toTypeName(val)
**Пример**
Запрос:
``` sql
SELECT toDecimal32OrZero(toString(-1.111), 5) AS val, toTypeName(val)
SELECT toDecimal32OrZero(toString(-1.111), 5) AS val, toTypeName(val);
```
Результат:
``` text
┌──────val─┬─toTypeName(toDecimal32OrZero(toString(-1.111), 5))─┐
│ -1.11100 │ Decimal(9, 5) │
└──────────┴────────────────────────────────────────────────────┘
```
Запрос:
``` sql
SELECT toDecimal32OrZero(toString(-1.111), 2) AS val, toTypeName(val)
SELECT toDecimal32OrZero(toString(-1.111), 2) AS val, toTypeName(val);
```
Результат:
``` text
┌──val─┬─toTypeName(toDecimal32OrZero(toString(-1.111), 2))─┐
│ 0.00 │ Decimal(9, 2) │
@ -258,12 +292,18 @@ YYYY-MM-DD hh:mm:ss
Дополнительно, функция toString от аргумента типа DateTime может принимать второй аргумент String - имя тайм-зоны. Пример: `Asia/Yekaterinburg` В этом случае, форматирование времени производится согласно указанной тайм-зоне.
**Пример**
Запрос:
``` sql
SELECT
now() AS now_local,
toString(now(), 'Asia/Yekaterinburg') AS now_yekat
toString(now(), 'Asia/Yekaterinburg') AS now_yekat;
```
Результат:
``` text
┌───────────now_local─┬─now_yekat───────────┐
│ 2016-06-15 00:11:21 │ 2016-06-15 02:11:21 │
@ -281,22 +321,30 @@ SELECT
Принимает аргумент типа String или FixedString. Возвращает String, вырезая содержимое строки до первого найденного нулевого байта.
Пример:
**Примеры**
Запрос:
``` sql
SELECT toFixedString('foo', 8) AS s, toStringCutToZero(s) AS s_cut
SELECT toFixedString('foo', 8) AS s, toStringCutToZero(s) AS s_cut;
```
Результат:
``` text
┌─s─────────────┬─s_cut─┐
│ foo\0\0\0\0\0 │ foo │
└───────────────┴───────┘
```
Запрос:
``` sql
SELECT toFixedString('foo\0bar', 8) AS s, toStringCutToZero(s) AS s_cut
SELECT toFixedString('foo\0bar', 8) AS s, toStringCutToZero(s) AS s_cut;
```
Результат:
``` text
┌─s──────────┬─s_cut─┐
│ foo\0bar\0 │ foo │
@ -344,7 +392,7 @@ reinterpretAsUUID(fixed_string)
Запрос:
``` sql
SELECT reinterpretAsUUID(reverse(unhex('000102030405060708090a0b0c0d0e0f')))
SELECT reinterpretAsUUID(reverse(unhex('000102030405060708090a0b0c0d0e0f')));
```
Результат:
@ -377,10 +425,15 @@ SELECT uuid = uuid2;
## CAST(x, T) {#type_conversion_function-cast}
Преобразует x в тип данных t.
Поддерживается также синтаксис CAST(x AS t).
Преобразует входное значение `x` в указанный тип данных `T`.
Пример:
Поддерживается также синтаксис `CAST(x AS t)`.
Обратите внимание, что если значение `x` не может быть преобразовано к типу `T`, возникает переполнение. Например, `CAST(-1, 'UInt8')` возвращает 255.
**Пример**
Запрос:
``` sql
SELECT
@ -388,9 +441,11 @@ SELECT
CAST(timestamp AS DateTime) AS datetime,
CAST(timestamp AS Date) AS date,
CAST(timestamp, 'String') AS string,
CAST(timestamp, 'FixedString(22)') AS fixed_string
CAST(timestamp, 'FixedString(22)') AS fixed_string;
```
Результат:
``` text
┌─timestamp───────────┬────────────datetime─┬───────date─┬─string──────────────┬─fixed_string──────────────┐
│ 2016-06-15 23:00:00 │ 2016-06-15 23:00:00 │ 2016-06-15 │ 2016-06-15 23:00:00 │ 2016-06-15 23:00:00\0\0\0 │
@ -399,12 +454,18 @@ SELECT
Преобразование в FixedString(N) работает только для аргументов типа String или FixedString(N).
Поддержано преобразование к типу [Nullable](../../sql-reference/functions/type-conversion-functions.md) и обратно. Пример:
Поддержано преобразование к типу [Nullable](../../sql-reference/functions/type-conversion-functions.md) и обратно.
**Примеры**
Запрос:
``` sql
SELECT toTypeName(x) FROM t_null
SELECT toTypeName(x) FROM t_null;
```
Результат:
``` text
┌─toTypeName(x)─┐
│ Int8 │
@ -412,10 +473,14 @@ SELECT toTypeName(x) FROM t_null
└───────────────┘
```
Запрос:
``` sql
SELECT toTypeName(CAST(x, 'Nullable(UInt16)')) FROM t_null
SELECT toTypeName(CAST(x, 'Nullable(UInt16)')) FROM t_null;
```
Результат:
``` text
┌─toTypeName(CAST(x, 'Nullable(UInt16)'))─┐
│ Nullable(UInt16) │
@ -427,6 +492,93 @@ SELECT toTypeName(CAST(x, 'Nullable(UInt16)')) FROM t_null
- Настройка [cast_keep_nullable](../../operations/settings/settings.md#cast_keep_nullable)
## accurateCast(x, T) {#type_conversion_function-accurate-cast}
Преобразует входное значение `x` в указанный тип данных `T`.
В отличие от функции [cast(x, T)](#type_conversion_function-cast), `accurateCast` не допускает переполнения при преобразовании числовых типов. Например, `accurateCast(-1, 'UInt8')` вызовет исключение.
**Примеры**
Запрос:
``` sql
SELECT cast(-1, 'UInt8') as uint8;
```
Результат:
``` text
┌─uint8─┐
│ 255 │
└─────
Запрос:
```sql
SELECT accurateCast(-1, 'UInt8') as uint8;
```
Результат:
``` text
Code: 70. DB::Exception: Received from localhost:9000. DB::Exception: Value in column Int8 cannot be safely converted into type UInt8: While processing accurateCast(-1, 'UInt8') AS uint8.
```
## accurateCastOrNull(x, T) {#type_conversion_function-accurate-cast_or_null}
Преобразует входное значение `x` в указанный тип данных `T`.
Всегда возвращает тип [Nullable](../../sql-reference/data-types/nullable.md). Если исходное значение не может быть преобразовано к целевому типу, возвращает [NULL](../../sql-reference/syntax.md#null-literal).
**Синтаксис**
```sql
accurateCastOrNull(x, T)
```
**Параметры**
- `x` — входное значение.
- `T` — имя возвращаемого типа данных.
**Возвращаемое значение**
- Значение, преобразованное в указанный тип `T`.
**Примеры**
Запрос:
``` sql
SELECT toTypeName(accurateCastOrNull(5, 'UInt8'));
```
Результат:
``` text
┌─toTypeName(accurateCastOrNull(5, 'UInt8'))─┐
│ Nullable(UInt8) │
└────────────────────────────────────────────┘
```
Запрос:
``` sql
SELECT
accurateCastOrNull(-1, 'UInt8') as uint8,
accurateCastOrNull(128, 'Int8') as int8,
accurateCastOrNull('Test', 'FixedString(2)') as fixed_string;
```
Результат:
``` text
┌─uint8─┬─int8─┬─fixed_string─┐
│ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │
└───────┴──────┴──────────────┘
```
## toInterval(Year\|Quarter\|Month\|Week\|Day\|Hour\|Minute\|Second) {#function-tointerval}
Приводит аргумент из числового типа данных к типу данных [IntervalType](../../sql-reference/data-types/special-data-types/interval.md).
@ -454,6 +606,8 @@ toIntervalYear(number)
**Пример**
Запрос:
``` sql
WITH
toDate('2019-01-01') AS date,
@ -461,9 +615,11 @@ WITH
toIntervalWeek(1) AS interval_to_week
SELECT
date + interval_week,
date + interval_to_week
date + interval_to_week;
```
Результат:
``` text
┌─plus(date, interval_week)─┬─plus(date, interval_to_week)─┐
│ 2019-01-08 │ 2019-01-08 │
@ -479,7 +635,7 @@ SELECT
**Синтаксис**
``` sql
parseDateTimeBestEffort(time_string[, time_zone]);
parseDateTimeBestEffort(time_string[, time_zone])
```
**Параметры**
@ -522,7 +678,7 @@ AS parseDateTimeBestEffort;
``` sql
SELECT parseDateTimeBestEffort('Sat, 18 Aug 2018 07:22:16 GMT', 'Europe/Moscow')
AS parseDateTimeBestEffort
AS parseDateTimeBestEffort;
```
Результат:
@ -537,7 +693,7 @@ AS parseDateTimeBestEffort
``` sql
SELECT parseDateTimeBestEffort('1284101485')
AS parseDateTimeBestEffort
AS parseDateTimeBestEffort;
```
Результат:
@ -552,7 +708,7 @@ AS parseDateTimeBestEffort
``` sql
SELECT parseDateTimeBestEffort('2018-12-12 10:12:12')
AS parseDateTimeBestEffort
AS parseDateTimeBestEffort;
```
Результат:
@ -566,7 +722,7 @@ AS parseDateTimeBestEffort
Запрос:
``` sql
SELECT parseDateTimeBestEffort('10 20:19')
SELECT parseDateTimeBestEffort('10 20:19');
```
Результат:
@ -591,7 +747,7 @@ SELECT parseDateTimeBestEffort('10 20:19')
**Синтаксис**
``` sql
parseDateTimeBestEffortUS(time_string [, time_zone]);
parseDateTimeBestEffortUS(time_string [, time_zone])
```
**Параметры**
@ -620,7 +776,7 @@ SELECT parseDateTimeBestEffortUS('09/12/2020 12:12:57')
AS parseDateTimeBestEffortUS;
```
Ответ:
Результат:
``` text
┌─parseDateTimeBestEffortUS─┐
@ -635,7 +791,7 @@ SELECT parseDateTimeBestEffortUS('09-12-2020 12:12:57')
AS parseDateTimeBestEffortUS;
```
Ответ:
Результат:
``` text
┌─parseDateTimeBestEffortUS─┐
@ -650,7 +806,7 @@ SELECT parseDateTimeBestEffortUS('09.12.2020 12:12:57')
AS parseDateTimeBestEffortUS;
```
Ответ:
Результат:
``` text
┌─parseDateTimeBestEffortUS─┐
@ -658,6 +814,178 @@ AS parseDateTimeBestEffortUS;
└─────────────────────────——┘
```
## parseDateTimeBestEffortUSOrNull {#parsedatetimebesteffortusornull}
Работает аналогично функции [parseDateTimeBestEffortUS](#parsedatetimebesteffortUS), но в отличие от нее возвращает `NULL`, если входная строка не может быть преобразована в тип данных [DateTime](../../sql-reference/data-types/datetime.md).
**Синтаксис**
``` sql
parseDateTimeBestEffortUSOrNull(time_string[, time_zone])
```
**Параметры**
- `time_string` — строка, содержащая дату или дату со временем для преобразования. Дата должна быть в американском формате (`MM/DD/YYYY` и т.д.). [String](../../sql-reference/data-types/string.md).
- `time_zone` — [часовой пояс](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-timezone). Функция анализирует `time_string` в соответствии с заданным часовым поясом. Опциональный параметр. [String](../../sql-reference/data-types/string.md).
**Поддерживаемые нестандартные форматы**
- Строка в формате [unix timestamp](https://en.wikipedia.org/wiki/Unix_time), содержащая 9-10 цифр.
- Строка, содержащая дату и время: `YYYYMMDDhhmmss`, `MM/DD/YYYY hh:mm:ss`, `MM-DD-YY hh:mm`, `YYYY-MM-DD hh:mm:ss` и т.д.
- Строка, содержащая дату без времени: `YYYY`, `YYYYMM`, `YYYY*MM`, `MM/DD/YYYY`, `MM-DD-YY` и т.д.
- Строка, содержащая день и время: `DD`, `DD hh`, `DD hh:mm`. В этом случае `YYYY-MM` заменяется на `2000-01`.
- Строка, содержащая дату и время, а также информацию о часовом поясе: `YYYY-MM-DD hh:mm:ss ±h:mm` и т.д. Например, `2020-12-12 17:36:00 -5:00`.
**Возвращаемые значения**
- `time_string`, преобразованная в тип данных `DateTime`.
- `NULL`, если входная строка не может быть преобразована в тип данных `DateTime`.
**Примеры**
Запрос:
``` sql
SELECT parseDateTimeBestEffortUSOrNull('02/10/2021 21:12:57') AS parseDateTimeBestEffortUSOrNull;
```
Результат:
``` text
┌─parseDateTimeBestEffortUSOrNull─┐
│ 2021-02-10 21:12:57 │
└─────────────────────────────────┘
```
Запрос:
``` sql
SELECT parseDateTimeBestEffortUSOrNull('02-10-2021 21:12:57 GMT', 'Europe/Moscow') AS parseDateTimeBestEffortUSOrNull;
```
Результат:
``` text
┌─parseDateTimeBestEffortUSOrNull─┐
│ 2021-02-11 00:12:57 │
└─────────────────────────────────┘
```
Запрос:
``` sql
SELECT parseDateTimeBestEffortUSOrNull('02.10.2021') AS parseDateTimeBestEffortUSOrNull;
```
Результат:
``` text
┌─parseDateTimeBestEffortUSOrNull─┐
│ 2021-02-10 00:00:00 │
└─────────────────────────────────┘
```
Запрос:
``` sql
SELECT parseDateTimeBestEffortUSOrNull('10.2021') AS parseDateTimeBestEffortUSOrNull;
```
Результат:
``` text
┌─parseDateTimeBestEffortUSOrNull─┐
│ ᴺᵁᴸᴸ │
└─────────────────────────────────┘
```
## parseDateTimeBestEffortUSOrZero {#parsedatetimebesteffortusorzero}
Работает аналогично функции [parseDateTimeBestEffortUS](#parsedatetimebesteffortUS), но в отличие от нее возвращает нулевую дату (`1970-01-01`) или нулевую дату со временем (`1970-01-01 00:00:00`), если входная строка не может быть преобразована в тип данных [DateTime](../../sql-reference/data-types/datetime.md).
**Синтаксис**
``` sql
parseDateTimeBestEffortUSOrZero(time_string[, time_zone])
```
**Параметры**
- `time_string` — строка, содержащая дату или дату со временем для преобразования. Дата должна быть в американском формате (`MM/DD/YYYY` и т.д.). [String](../../sql-reference/data-types/string.md).
- `time_zone` — [часовой пояс](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-timezone). Функция анализирует `time_string` в соответствии с заданным часовым поясом. Опциональный параметр. [String](../../sql-reference/data-types/string.md).
**Поддерживаемые нестандартные форматы**
- Строка в формате [unix timestamp](https://en.wikipedia.org/wiki/Unix_time), содержащая 9-10 цифр.
- Строка, содержащая дату и время: `YYYYMMDDhhmmss`, `MM/DD/YYYY hh:mm:ss`, `MM-DD-YY hh:mm`, `YYYY-MM-DD hh:mm:ss` и т.д.
- Строка, содержащая дату без времени: `YYYY`, `YYYYMM`, `YYYY*MM`, `MM/DD/YYYY`, `MM-DD-YY` и т.д.
- Строка, содержащая день и время: `DD`, `DD hh`, `DD hh:mm`. В этом случае `YYYY-MM` заменяется на `2000-01`.
- Строка, содержащая дату и время, а также информацию о часовом поясе: `YYYY-MM-DD hh:mm:ss ±h:mm` и т.д. Например, `2020-12-12 17:36:00 -5:00`.
**Возвращаемые значения**
- `time_string`, преобразованная в тип данных `DateTime`.
- Нулевая дата или нулевая дата со временем, если входная строка не может быть преобразована в тип данных `DateTime`.
**Примеры**
Запрос:
``` sql
SELECT parseDateTimeBestEffortUSOrZero('02/10/2021 21:12:57') AS parseDateTimeBestEffortUSOrZero;
```
Результат:
``` text
┌─parseDateTimeBestEffortUSOrZero─┐
│ 2021-02-10 21:12:57 │
└─────────────────────────────────┘
```
Запрос:
``` sql
SELECT parseDateTimeBestEffortUSOrZero('02-10-2021 21:12:57 GMT', 'Europe/Moscow') AS parseDateTimeBestEffortUSOrZero;
```
Результат:
``` text
┌─parseDateTimeBestEffortUSOrZero─┐
│ 2021-02-11 00:12:57 │
└─────────────────────────────────┘
```
Запрос:
``` sql
SELECT parseDateTimeBestEffortUSOrZero('02.10.2021') AS parseDateTimeBestEffortUSOrZero;
```
Результат:
``` text
┌─parseDateTimeBestEffortUSOrZero─┐
│ 2021-02-10 00:00:00 │
└─────────────────────────────────┘
```
Запрос:
``` sql
SELECT parseDateTimeBestEffortUSOrZero('02.2021') AS parseDateTimeBestEffortUSOrZero;
```
Результат:
``` text
┌─parseDateTimeBestEffortUSOrZero─┐
│ 1970-01-01 00:00:00 │
└─────────────────────────────────┘
```
## toUnixTimestamp64Milli
## toUnixTimestamp64Micro
## toUnixTimestamp64Nano
@ -685,10 +1013,10 @@ toUnixTimestamp64Milli(value)
``` sql
WITH toDateTime64('2019-09-16 19:20:12.345678910', 6) AS dt64
SELECT toUnixTimestamp64Milli(dt64)
SELECT toUnixTimestamp64Milli(dt64);
```
Ответ:
Результат:
``` text
┌─toUnixTimestamp64Milli(dt64)─┐
@ -700,10 +1028,10 @@ SELECT toUnixTimestamp64Milli(dt64)
``` sql
WITH toDateTime64('2019-09-16 19:20:12.345678910', 6) AS dt64
SELECT toUnixTimestamp64Nano(dt64)
SELECT toUnixTimestamp64Nano(dt64);
```
Ответ:
Результат:
``` text
┌─toUnixTimestamp64Nano(dt64)─┐
@ -738,10 +1066,10 @@ fromUnixTimestamp64Milli(value [, ti])
``` sql
WITH CAST(1234567891011, 'Int64') AS i64
SELECT fromUnixTimestamp64Milli(i64, 'UTC')
SELECT fromUnixTimestamp64Milli(i64, 'UTC');
```
Ответ:
Результат:
``` text
┌─fromUnixTimestamp64Milli(i64, 'UTC')─┐
@ -772,12 +1100,12 @@ toLowCardinality(expr)
Тип: `LowCardinality(expr_result_type)`
**Example**
**Пример**
Запрос:
```sql
SELECT toLowCardinality('1')
SELECT toLowCardinality('1');
```
Результат:
@ -813,10 +1141,10 @@ formatRow(format, x, y, ...)
``` sql
SELECT formatRow('CSV', number, 'good')
FROM numbers(3)
FROM numbers(3);
```
Ответ:
Результат:
``` text
┌─formatRow('CSV', number, 'good')─┐
@ -854,10 +1182,10 @@ formatRowNoNewline(format, x, y, ...)
``` sql
SELECT formatRowNoNewline('CSV', number, 'good')
FROM numbers(3)
FROM numbers(3);
```
Ответ:
Результат:
``` text
┌─formatRowNoNewline('CSV', number, 'good')─┐

View File

@ -13,10 +13,28 @@ SELECT (CounterID, UserID) IN ((34, 123), (101500, 456)) FROM ...
Если слева стоит один столбец, входящий в индекс, а справа - множество констант, то при выполнении запроса, система воспользуется индексом.
Не перечисляйте слишком большое количество значений (миллионы) явно. Если множество большое - лучше загрузить его во временную таблицу (например, смотрите раздел «Внешние данные для обработки запроса»), и затем воспользоваться подзапросом.
Не перечисляйте слишком большое количество значений (миллионы) явно. Если множество большое - лучше загрузить его во временную таблицу (например, смотрите раздел [Внешние данные для обработки запроса](../../engines/table-engines/special/external-data.md)), и затем воспользоваться подзапросом.
В качестве правой части оператора может быть множество константных выражений, множество кортежей с константными выражениями (показано в примерах выше), а также имя таблицы или подзапрос SELECT в скобках.
Если типы данных в левой и правой частях подзапроса `IN` различаются, ClickHouse преобразует значение в левой части к типу данных из правой части. Преобразование выполняется по аналогии с функцией [accurateCastOrNull](../functions/type-conversion-functions.md#type_conversion_function-accurate-cast_or_null), т.е. тип данных становится [Nullable](../../sql-reference/data-types/nullable.md), а если преобразование не может быть выполнено, возвращается значение [NULL](../../sql-reference/syntax.md#null-literal).
**Пример**
Запрос:
``` sql
SELECT '1' IN (SELECT 1);
```
Результат:
``` text
┌─in('1', _subquery49)─┐
│ 1 │
└──────────────────────┘
```
Если в качестве правой части оператора указано имя таблицы (например, `UserID IN users`), то это эквивалентно подзапросу `UserID IN (SELECT * FROM users)`. Это используется при работе с внешними данными, отправляемым вместе с запросом. Например, вместе с запросом может быть отправлено множество идентификаторов посетителей, загруженное во временную таблицу users, по которому следует выполнить фильтрацию.
Если в качестве правой части оператора, указано имя таблицы, имеющий движок Set (подготовленное множество, постоянно находящееся в оперативке), то множество не будет создаваться заново при каждом запросе.

View File

@ -0,0 +1,22 @@
---
toc_title: ALL
---
# Секция ALL {#select-all}
Если в таблице несколько совпадающих строк, то `ALL` возвращает все из них. Поведение запроса `SELECT ALL` точно такое же, как и `SELECT` без аргумента `DISTINCT`. Если указаны оба аргумента: `ALL` и `DISTINCT`, функция вернет исключение.
`ALL` может быть указан внутри агрегатной функции, например, результат выполнения запроса:
```sql
SELECT sum(ALL number) FROM numbers(10);
```
равен результату выполнения запроса:
```sql
SELECT sum(number) FROM numbers(10);
```
[Оригинальная статья](https://clickhouse.tech/docs/ru/sql-reference/statements/select/all) <!--hide-->

View File

@ -5,23 +5,27 @@ toc_title: file
# file {#file}
Создаёт таблицу из файла. Данная табличная функция похожа на табличные функции [file](file.md) и [hdfs](hdfs.md).
Создаёт таблицу из файла. Данная табличная функция похожа на табличные функции [url](../../sql-reference/table-functions/url.md) и [hdfs](../../sql-reference/table-functions/hdfs.md).
Функция `file` может использоваться в запросах `SELECT` и `INSERT` при работе с движком таблиц [File](../../engines/table-engines/special/file.md).
**Синтаксис**
``` sql
file(path, format, structure)
```
**Входные параметры**
**Параметры**
- `path` — относительный путь до файла от [user_files_path](../../sql-reference/table-functions/file.md#server_configuration_parameters-user_files_path). Путь к файлу поддерживает следующие шаблоны в режиме доступа только для чтения `*`, `?`, `{abc,def}` и `{N..M}`, где `N`, `M` — числа, \``'abc', 'def'` — строки.
- `path` — относительный путь до файла от [user_files_path](../../sql-reference/table-functions/file.md#server_configuration_parameters-user_files_path). Путь к файлу поддерживает следующие шаблоны в режиме доступа только для чтения `*`, `?`, `{abc,def}` и `{N..M}`, где `N`, `M` — числа, `'abc', 'def'` — строки.
- `format` — [формат](../../interfaces/formats.md#formats) файла.
- `structure` — структура таблицы. Формат `'colunmn1_name column1_ype, column2_name column2_type, ...'`.
- `structure` — структура таблицы. Формат: `'colunmn1_name column1_ype, column2_name column2_type, ...'`.
**Возвращаемое значение**
Таблица с указанной структурой, предназначенная для чтения или записи данных в указанном файле.
**Пример**
**Примеры**
Настройка `user_files_path` и содержимое файла `test.csv`:
@ -35,12 +39,10 @@ $ cat /var/lib/clickhouse/user_files/test.csv
78,43,45
```
Таблица из `test.csv` и выборка первых двух строк из неё:
Получение данных из таблицы в файле `test.csv` и выборка первых двух строк из неё:
``` sql
SELECT *
FROM file('test.csv', 'CSV', 'column1 UInt32, column2 UInt32, column3 UInt32')
LIMIT 2
SELECT * FROM file('test.csv', 'CSV', 'column1 UInt32, column2 UInt32, column3 UInt32') LIMIT 2;
```
``` text
@ -50,45 +52,61 @@ LIMIT 2
└─────────┴─────────┴─────────┘
```
Шаблоны могут содержаться в нескольких компонентах пути. Обрабатываются только существующие файлы, название которых целиком удовлетворяет шаблону (не только суффиксом или префиксом).
Получение первых 10 строк таблицы, содержащей 3 столбца типа [UInt32](../../sql-reference/data-types/int-uint.md), из CSV-файла:
- `*` — Заменяет любое количество любых символов кроме `/`, включая отсутствие символов.
- `?` — Заменяет ровно один любой символ.
- `{some_string,another_string,yet_another_one}` — Заменяет любую из строк `'some_string', 'another_string', 'yet_another_one'`.
- `{N..M}` — Заменяет любое число в интервале от `N` до `M` включительно (может содержать ведущие нули).
``` sql
SELECT * FROM file('test.csv', 'CSV', 'column1 UInt32, column2 UInt32, column3 UInt32') LIMIT 10;
```
Вставка данных из файла в таблицу:
``` sql
INSERT INTO FUNCTION file('test.csv', 'CSV', 'column1 UInt32, column2 UInt32, column3 UInt32') VALUES (1, 2, 3), (3, 2, 1);
SELECT * FROM file('test.csv', 'CSV', 'column1 UInt32, column2 UInt32, column3 UInt32');
```
``` text
┌─column1─┬─column2─┬─column3─┐
│ 1 │ 2 │ 3 │
│ 3 │ 2 │ 1 │
└─────────┴─────────┴─────────┘
```
## Шаблоны поиска в компонентах пути {#globs-in-path}
При описании пути к файлу могут использоваться шаблоны поиска. Обрабатываются только те файлы, у которых путь и название соответствуют шаблону полностью (а не только префикс или суффикс).
- `*` — заменяет любое количество любых символов кроме `/`, включая отсутствие символов.
- `?` — заменяет ровно один любой символ.
- `{some_string,another_string,yet_another_one}` — заменяет любую из строк `'some_string', 'another_string', 'yet_another_one'`.
- `{N..M}` — заменяет любое число в интервале от `N` до `M` включительно (может содержать ведущие нули).
Конструкция с `{}` аналогична табличной функции [remote](remote.md).
**Пример**
1. Предположим у нас есть несколько файлов со следующими относительными путями:
Предположим, у нас есть несколько файлов со следующими относительными путями:
- some_dir/some_file_1
- some_dir/some_file_2
- some_dir/some_file_3
- another_dir/some_file_1
- another_dir/some_file_2
- another_dir/some_file_3
- 'some_dir/some_file_1'
- 'some_dir/some_file_2'
- 'some_dir/some_file_3'
- 'another_dir/some_file_1'
- 'another_dir/some_file_2'
- 'another_dir/some_file_3'
1. Запросим количество строк в этих файлах:
<!-- -->
Запросим количество строк в этих файлах:
``` sql
SELECT count(*)
FROM file('{some,another}_dir/some_file_{1..3}', 'TSV', 'name String, value UInt32')
SELECT count(*) FROM file('{some,another}_dir/some_file_{1..3}', 'TSV', 'name String, value UInt32');
```
1. Запросим количество строк во всех файлах этих двух директорий:
<!-- -->
Запросим количество строк во всех файлах этих двух директорий:
``` sql
SELECT count(*)
FROM file('{some,another}_dir/*', 'TSV', 'name String, value UInt32')
SELECT count(*) FROM file('{some,another}_dir/*', 'TSV', 'name String, value UInt32');
```
!!! warning "Warning"
!!! warning "Предупреждение"
Если ваш список файлов содержит интервал с ведущими нулями, используйте конструкцию с фигурными скобками для каждой цифры по отдельности или используйте `?`.
**Пример**
@ -96,17 +114,16 @@ FROM file('{some,another}_dir/*', 'TSV', 'name String, value UInt32')
Запрос данных из файлов с именами `file000`, `file001`, … , `file999`:
``` sql
SELECT count(*)
FROM file('big_dir/file{0..9}{0..9}{0..9}', 'CSV', 'name String, value UInt32')
SELECT count(*) FROM file('big_dir/file{0..9}{0..9}{0..9}', 'CSV', 'name String, value UInt32');
```
## Виртуальные столбцы {#virtualnye-stolbtsy}
- `_path`Путь к файлу.
- `_file`Имя файла.
- `_path`путь к файлу.
- `_file`имя файла.
**Смотрите также**
- [Виртуальные столбцы](index.md#table_engines-virtual_columns)
[Оригинальная статья](https://clickhouse.tech/docs/ru/query_language/table_functions/file/) <!--hide-->
[Оригинальная статья](https://clickhouse.tech/docs/ru/sql-reference/table-functions/file/) <!--hide-->

View File

@ -7,6 +7,8 @@ toc_title: mysql
Позволяет выполнять запросы `SELECT` над данными, хранящимися на удалённом MySQL сервере.
**Синтаксис**
``` sql
mysql('host:port', 'database', 'table', 'user', 'password'[, replace_query, 'on_duplicate_clause']);
```
@ -23,13 +25,13 @@ mysql('host:port', 'database', 'table', 'user', 'password'[, replace_query, 'on_
- `password` — пароль пользователя.
- `replace_query` — флаг, отвечающий за преобразование запросов `INSERT INTO` в `REPLACE INTO`. Если `replace_query=1`, то запрос заменяется.
- `replace_query` — флаг, отвечающий за преобразование запросов `INSERT INTO` в `REPLACE INTO`. Возможные значения:
- `0` - выполняется запрос `INSERT INTO`.
- `1` - выполняется запрос `REPLACE INTO`.
- `on_duplicate_clause` — выражение `ON DUPLICATE KEY on_duplicate_clause`, добавляемое в запрос `INSERT`.
- `on_duplicate_clause` — выражение `ON DUPLICATE KEY on_duplicate_clause`, добавляемое в запрос `INSERT`. Может быть передано только с помощью `replace_query = 0` (если вы одновременно передадите `replace_query = 1` и `on_duplicate_clause`, будет сгенерировано исключение).
Пример: `INSERT INTO t (c1,c2) VALUES ('a', 2) ON DUPLICATE KEY UPDATE c2 = c2 + 1`, где `on_duplicate_clause` это `UPDATE c2 = c2 + 1`. Чтобы узнать какие `on_duplicate_clause` можно использовать с секцией `ON DUPLICATE KEY` обратитесь к документации MySQL.
Чтобы указать `'on_duplicate_clause'` необходимо передать `0` в параметр `replace_query`. Если одновременно передать `replace_query = 1` и `'on_duplicate_clause'`, то ClickHouse сгенерирует исключение.
Пример: `INSERT INTO t (c1,c2) VALUES ('a', 2) ON DUPLICATE KEY UPDATE c2 = c2 + 1`, где `on_duplicate_clause` это `UPDATE c2 = c2 + 1;`
Простые условия `WHERE` такие как `=, !=, >, >=, <, =` выполняются на стороне сервера MySQL.
@ -39,46 +41,59 @@ mysql('host:port', 'database', 'table', 'user', 'password'[, replace_query, 'on_
Объект таблицы с теми же столбцами, что и в исходной таблице MySQL.
## Пример использования {#primer-ispolzovaniia}
!!! note "Примечание"
Чтобы отличить табличную функцию `mysql (...)` в запросе `INSERT` от имени таблицы со списком имен столбцов, используйте ключевые слова `FUNCTION` или `TABLE FUNCTION`. См. примеры ниже.
**Примеры**
Таблица в MySQL:
``` text
mysql> CREATE TABLE `test`.`test` (
-> `int_id` INT NOT NULL AUTO_INCREMENT,
-> `int_nullable` INT NULL DEFAULT NULL,
-> `float` FLOAT NOT NULL,
-> `float_nullable` FLOAT NULL DEFAULT NULL,
-> PRIMARY KEY (`int_id`));
Query OK, 0 rows affected (0,09 sec)
mysql> insert into test (`int_id`, `float`) VALUES (1,2);
Query OK, 1 row affected (0,00 sec)
mysql> INSERT INTO test (`int_id`, `float`) VALUES (1,2);
mysql> select * from test;
+--------+--------------+-------+----------------+
| int_id | int_nullable | float | float_nullable |
+--------+--------------+-------+----------------+
| 1 | NULL | 2 | NULL |
+--------+--------------+-------+----------------+
1 row in set (0,00 sec)
mysql> SELECT * FROM test;
+--------+-------+
| int_id | float |
+--------+-------+
| 1 | 2 |
+--------+-------+
```
Получение данных в ClickHouse:
``` sql
SELECT * FROM mysql('localhost:3306', 'test', 'test', 'bayonet', '123')
SELECT * FROM mysql('localhost:3306', 'test', 'test', 'bayonet', '123');
```
``` text
┌─int_id─┬─int_nullable─┬─float─┬─float_nullable─┐
│ 1 │ ᴺᵁᴸᴸ │ 2 │ ᴺᵁᴸᴸ │
└────────┴──────────────┴───────┴────────────────
┌─int_id─┬─float─┐
│ 1 │ 2 │
└────────┴───────┘
```
## Смотрите также {#smotrite-takzhe}
Замена и вставка:
```sql
INSERT INTO FUNCTION mysql('localhost:3306', 'test', 'test', 'bayonet', '123', 1) (int_id, float) VALUES (1, 3);
INSERT INTO TABLE FUNCTION mysql('localhost:3306', 'test', 'test', 'bayonet', '123', 0, 'UPDATE int_id = int_id + 1') (int_id, float) VALUES (1, 4);
SELECT * FROM mysql('localhost:3306', 'test', 'test', 'bayonet', '123');
```
``` text
┌─int_id─┬─float─┐
│ 1 │ 3 │
│ 2 │ 4 │
└────────┴───────┘
```
**Смотрите также**
- [Движок таблиц MySQL](../../sql-reference/table-functions/mysql.md)
- [Использование MySQL как источника данных для внешнего словаря](../../sql-reference/table-functions/mysql.md#dicts-external_dicts_dict_sources-mysql)
[Оригинальная статья](https://clickhouse.tech/docs/ru/query_language/table_functions/mysql/) <!--hide-->
[Оригинальная статья](https://clickhouse.tech/docs/ru/sql-reference/table_functions/mysql/) <!--hide-->

View File

@ -5,9 +5,11 @@ toc_title: remote
# remote, remoteSecure {#remote-remotesecure}
Позволяет обратиться к удалённым серверам без создания таблицы типа `Distributed`.
Позволяет обратиться к удалённым серверам без создания таблицы типа [Distributed](../../engines/table-engines/special/distributed.md). Функция `remoteSecure` работает аналогично `remote`, но использует защищенное соединение.
Сигнатуры:
Обе функции могут использоваться в запросах `SELECT` и `INSERT`.
**Синтаксис**
``` sql
remote('addresses_expr', db, table[, 'user'[, 'password']])
@ -16,12 +18,40 @@ remoteSecure('addresses_expr', db, table[, 'user'[, 'password']])
remoteSecure('addresses_expr', db.table[, 'user'[, 'password']])
```
`addresses_expr` - выражение, генерирующее адреса удалённых серверов. Это может быть просто один адрес сервера. Адрес сервера - это `хост:порт`, или только `хост`. Хост может быть указан в виде имени сервера, или в виде IPv4 или IPv6 адреса. IPv6 адрес указывается в квадратных скобках. Порт - TCP-порт удалённого сервера. Если порт не указан, используется `tcp_port` из конфигурационного файла сервера (по умолчанию - 9000).
**Параметры**
- `addresses_expr` — выражение, генерирующее адреса удалённых серверов. Это может быть просто один адрес сервера. Адрес сервера — это `host:port` или только `host`.
Вместо параметра `host` может быть указано имя сервера или его адрес в формате IPv4 или IPv6. IPv6 адрес указывается в квадратных скобках.
`port` — TCP-порт удалённого сервера. Если порт не указан, используется [tcp_port](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-tcp_port) из конфигурационного файла сервера, к которому обратились через функцию `remote` (по умолчанию - 9000), и [tcp_port_secure](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-tcp_port_secure), к которому обратились через функцию `remoteSecure` (по умолчанию — 9440).
!!! important "Важно"
С IPv6-адресом обязательно нужно указывать порт.
Примеры:
Тип: [String](../../sql-reference/data-types/string.md).
- `db` — имя базы данных. Тип: [String](../../sql-reference/data-types/string.md).
- `table` — имя таблицы. Тип: [String](../../sql-reference/data-types/string.md).
- `user` — имя пользователя. Если пользователь не указан, то по умолчанию `default`. Тип: [String](../../sql-reference/data-types/string.md).
- `password` — пароль. Если пароль не указан, то используется пустой пароль. Тип: [String](../../sql-reference/data-types/string.md).
- `sharding_key` — ключ шардирования для поддержки распределения данных между узлами. Например: `insert into remote('127.0.0.1:9000,127.0.0.2', db, table, 'default', rand())`. Тип: [UInt32](../../sql-reference/data-types/int-uint.md).
**Возвращаемое значение**
Набор данных с удаленных серверов.
**Использование**
Использование табличной функции `remote` менее оптимально, чем создание таблицы типа `Distributed`, так как в этом случае соединения с серверами устанавливаются заново при каждом запросе. Если указываются имена серверов, то приходится также выполнять поиск сервера по имени. Кроме того, не ведётся сквозной подсчёт ошибок при работе с разными репликами. При обработке большого количества запросов всегда создавайте таблицу типа `Distributed`, использовать табличную функцию `remote` в таких случаях не рекомендуется.
Табличная функция `remote` может быть полезна в следующих случаях:
- Обращение на конкретный сервер для сравнения данных, отладки и тестирования.
- Запросы между разными кластерами ClickHouse для исследований.
- Нечастые распределённые запросы, задаваемые вручную.
- Распределённые запросы, где набор серверов определяется каждый раз заново.
**Адреса**
``` text
example01-01-1
@ -32,9 +62,7 @@ localhost
[2a02:6b8:0:1111::11]:9000
```
Адреса можно указать через запятую, в этом случае ClickHouse обработает запрос как распределённый, т.е. отправит его по всем указанным адресам как на шарды с разными данными.
Пример:
Адреса можно указать через запятую. В этом случае ClickHouse обработает запрос как распределённый, т.е. отправит его по всем указанным адресам как на шарды с разными данными. Пример:
``` text
example01-01-1,example01-02-1
@ -46,38 +74,36 @@ example01-01-1,example01-02-1
example01-0{1,2}-1
```
В фигурных скобках может быть указан диапазон (неотрицательных целых) чисел через две точки. В этом случае, диапазон раскрывается в множество значений, генерирующих адреса шардов. Если запись первого числа начинается с нуля, то значения формируются с таким же выравниванием нулями. Предыдущий пример может быть записан следующим образом:
В фигурных скобках может быть указан диапазон (неотрицательных целых) чисел через две точки. В этом случае диапазон раскрывается в множество значений, генерирующих адреса шардов. Если запись первого числа начинается с нуля, то значения формируются с таким же выравниванием нулями. Предыдущий пример может быть записан следующим образом:
``` text
example01-{01..02}-1
```
При наличии нескольких пар фигурных скобок, генерируется прямое произведение соответствующих множеств.
При наличии нескольких пар фигурных скобок генерируется прямое произведение соответствующих множеств.
Адреса или их фрагменты в фигурных скобках можно указать через символ \|. В этом случае, соответствующие множества адресов понимаются как реплики - запрос будет отправлен на первую живую реплику. При этом, реплики перебираются в порядке, согласно текущей настройке [load_balancing](../../operations/settings/settings.md).
Пример:
Адреса или их фрагменты в фигурных скобках можно указать через символ \|. В этом случае соответствующие множества адресов понимаются как реплики — запрос будет отправлен на первую живую реплику. При этом реплики перебираются в порядке, согласно текущей настройке [load_balancing](../../operations/settings/settings.md#settings-load_balancing). В этом примере указаны два шарда, в каждом из которых имеются две реплики:
``` text
example01-{01..02}-{1|2}
```
В этом примере указано два шарда, в каждом из которых имеется две реплики.
Количество генерируемых адресов ограничено константой. Сейчас это 1000 адресов.
Количество генерируемых адресов ограничено константой - сейчас это 1000 штук.
**Примеры**
Использование табличной функции `remote` менее оптимально, чем создание таблицы типа `Distributed`, так как в этом случае, соединения с серверами устанавливаются заново при каждом запросе, в случае задания имён хостов, делается резолвинг имён, а также не ведётся подсчёт ошибок при работе с разными репликами. При обработке большого количества запросов, всегда создавайте `Distributed` таблицу заранее, не используйте табличную функцию `remote`.
Выборка данных с удаленного сервера:
Табличная функция `remote` может быть полезна для следующих случаях:
``` sql
SELECT * FROM remote('127.0.0.1', db.remote_engine_table) LIMIT 3;
```
- обращение на конкретный сервер в целях сравнения данных, отладки и тестирования;
- запросы между разными кластерами ClickHouse в целях исследований;
- нечастых распределённых запросов, задаваемых вручную;
- распределённых запросов, где набор серверов определяется каждый раз заново.
Вставка данных с удаленного сервера в таблицу:
Если пользователь не задан,то используется `default`.
Если пароль не задан, то используется пустой пароль.
``` sql
CREATE TABLE remote_table (name String, value UInt32) ENGINE=Memory;
INSERT INTO FUNCTION remote('127.0.0.1', currentDatabase(), 'remote_table') VALUES ('test', 42);
SELECT * FROM remote_table;
```
`remoteSecure` - аналогично функции `remote`, но с соединением по шифрованному каналу. Порт по умолчанию - `tcp_port_secure` из конфига или 9440.
[Оригинальная статья](https://clickhouse.tech/docs/ru/query_language/table_functions/remote/) <!--hide-->
[Оригинальная статья](https://clickhouse.tech/docs/ru/sql-reference/table-functions/remote/) <!--hide-->

View File

@ -5,21 +5,40 @@ toc_title: url
# url {#url}
`url(URL, format, structure)` - возвращает таблицу со столбцами, указанными в
`structure`, созданную из данных находящихся по `URL` в формате `format`.
Функция `url` берет данные по указанному адресу `URL` и создает из них таблицу указанной структуры со столбцами указанного формата.
URL - адрес, по которому сервер принимает `GET` и/или `POST` запросы по
протоколу HTTP или HTTPS.
Функция `url` может быть использована в запросах `SELECT` и `INSERT` с таблицами на движке [URL](../../engines/table-engines/special/url.md).
format - [формат](../../interfaces/formats.md#formats) данных.
structure - структура таблицы в форме `'UserID UInt64, Name String'`. Определяет имена и типы столбцов.
**Пример**
**Синтаксис**
``` sql
-- получение 3-х строк таблицы, состоящей из двух колонк типа String и UInt32 от сервера, отдающего данные в формате CSV
SELECT * FROM url('http://127.0.0.1:12345/', CSV, 'column1 String, column2 UInt32') LIMIT 3
url(URL, format, structure)
```
[Оригинальная статья](https://clickhouse.tech/docs/ru/query_language/table_functions/url/) <!--hide-->
**Параметры**
- `URL` — HTTP или HTTPS-адрес сервера, который может принимать запросы `GET` или `POST` (для запросов `SELECT` или `INSERT` соответственно). Тип: [String](../../sql-reference/data-types/string.md).
- `format` — [формат](../../interfaces/formats.md#formats) данных. Тип: [String](../../sql-reference/data-types/string.md).
- `structure` — структура таблицы в формате `'UserID UInt64, Name String'`. Определяет имена и типы столбцов. Тип: [String](../../sql-reference/data-types/string.md).
**Возвращаемое значение**
Таблица с указанными форматом и структурой, а также с данными, полученными из указанного адреса `URL`.
**Примеры**
Получение с HTTP-сервера первых 3 строк таблицы с данными в формате [CSV](../../interfaces/formats.md/#csv), содержащей столбцы типа [String](../../sql-reference/data-types/string.md) и [UInt32](../../sql-reference/data-types/int-uint.md).
``` sql
SELECT * FROM url('http://127.0.0.1:12345/', CSV, 'column1 String, column2 UInt32') LIMIT 3;
```
Вставка данных в таблицу:
``` sql
CREATE TABLE test_table (column1 String, column2 UInt32) ENGINE=Memory;
INSERT INTO FUNCTION url('http://127.0.0.1:8123/?query=INSERT+INTO+test_table+FORMAT+CSV', 'CSV', 'column1 String, column2 UInt32') VALUES ('http interface', 42);
SELECT * FROM test_table;
```
[Оригинальная статья](https://clickhouse.tech/docs/ru/sql-reference/table-functions/url/) <!--hide-->

View File

@ -66,6 +66,7 @@ namespace ErrorCodes
extern const int CANNOT_OPEN_FILE;
extern const int SYSTEM_ERROR;
extern const int NOT_ENOUGH_SPACE;
extern const int CANNOT_KILL;
}
}
@ -886,6 +887,27 @@ namespace
fmt::print("Sent kill signal.\n", pid);
else
throwFromErrno("Cannot send kill signal", ErrorCodes::SYSTEM_ERROR);
/// Wait for the process (100 seconds).
constexpr size_t num_kill_check_tries = 1000;
constexpr size_t kill_check_delay_ms = 100;
for (size_t i = 0; i < num_kill_check_tries; ++i)
{
fmt::print("Waiting for server to be killed\n");
if (!isRunning(pid_file))
{
fmt::print("Server exited\n");
break;
}
sleepForMilliseconds(kill_check_delay_ms);
}
if (isRunning(pid_file))
{
throw Exception(ErrorCodes::CANNOT_KILL,
"The server process still exists after %zu ms",
num_kill_check_tries, kill_check_delay_ms);
}
}
return 0;

View File

@ -82,6 +82,7 @@
#if defined(OS_LINUX)
# include <sys/mman.h>
# include <sys/ptrace.h>
# include <Common/hasLinuxCapability.h>
#endif
@ -479,6 +480,15 @@ int Server::main(const std::vector<std::string> & /*args*/)
LOG_INFO(log, "Calculated checksum of the binary: {}, integrity check passed.", calculated_binary_hash);
}
else
{
/// If program is run under debugger, ptrace will fail.
if (ptrace(PTRACE_TRACEME, 0, nullptr, nullptr) == -1)
{
/// Program is run under debugger. Modification of it's binary image is ok for breakpoints.
LOG_WARNING(log, "Server is run under debugger and its binary image is modified (most likely with breakpoints).",
calculated_binary_hash);
}
else
{
throw Exception(ErrorCodes::CORRUPTED_DATA,
"Calculated checksum of the ClickHouse binary ({0}) does not correspond"
@ -492,6 +502,7 @@ int Server::main(const std::vector<std::string> & /*args*/)
, calculated_binary_hash, stored_binary_hash, executable_path);
}
}
}
else
executable_path = "/usr/bin/clickhouse"; /// It is used for information messages.
@ -1006,17 +1017,6 @@ int Server::main(const std::vector<std::string> & /*args*/)
LOG_INFO(log, "Query Profiler and TraceCollector are disabled because they require PHDR cache to be created"
" (otherwise the function 'dl_iterate_phdr' is not lock free and not async-signal safe).");
if (has_zookeeper && config().has("distributed_ddl"))
{
/// DDL worker should be started after all tables were loaded
String ddl_zookeeper_path = config().getString("distributed_ddl.path", "/clickhouse/task_queue/ddl/");
int pool_size = config().getInt("distributed_ddl.pool_size", 1);
if (pool_size < 1)
throw Exception("distributed_ddl.pool_size should be greater then 0", ErrorCodes::ARGUMENT_OUT_OF_BOUND);
global_context->setDDLWorker(std::make_unique<DDLWorker>(pool_size, ddl_zookeeper_path, *global_context, &config(),
"distributed_ddl", "DDLWorker", &CurrentMetrics::MaxDDLEntryID));
}
std::unique_ptr<DNSCacheUpdater> dns_cache_updater;
if (config().has("disable_internal_dns_cache") && config().getInt("disable_internal_dns_cache"))
{
@ -1298,6 +1298,37 @@ int Server::main(const std::vector<std::string> & /*args*/)
std::thread::hardware_concurrency());
}
/// try to load dictionaries immediately, throw on error and die
ext::scope_guard dictionaries_xmls, models_xmls;
try
{
if (!config().getBool("dictionaries_lazy_load", true))
{
global_context->tryCreateEmbeddedDictionaries();
global_context->getExternalDictionariesLoader().enableAlwaysLoadEverything(true);
}
dictionaries_xmls = global_context->getExternalDictionariesLoader().addConfigRepository(
std::make_unique<ExternalLoaderXMLConfigRepository>(config(), "dictionaries_config"));
models_xmls = global_context->getExternalModelsLoader().addConfigRepository(
std::make_unique<ExternalLoaderXMLConfigRepository>(config(), "models_config"));
}
catch (...)
{
LOG_ERROR(log, "Caught exception while loading dictionaries.");
throw;
}
if (has_zookeeper && config().has("distributed_ddl"))
{
/// DDL worker should be started after all tables were loaded
String ddl_zookeeper_path = config().getString("distributed_ddl.path", "/clickhouse/task_queue/ddl/");
int pool_size = config().getInt("distributed_ddl.pool_size", 1);
if (pool_size < 1)
throw Exception("distributed_ddl.pool_size should be greater then 0", ErrorCodes::ARGUMENT_OUT_OF_BOUND);
global_context->setDDLWorker(std::make_unique<DDLWorker>(pool_size, ddl_zookeeper_path, *global_context, &config(),
"distributed_ddl", "DDLWorker", &CurrentMetrics::MaxDDLEntryID));
}
LOG_INFO(log, "Ready for connections.");
SCOPE_EXIT({
@ -1347,26 +1378,6 @@ int Server::main(const std::vector<std::string> & /*args*/)
}
});
/// try to load dictionaries immediately, throw on error and die
ext::scope_guard dictionaries_xmls, models_xmls;
try
{
if (!config().getBool("dictionaries_lazy_load", true))
{
global_context->tryCreateEmbeddedDictionaries();
global_context->getExternalDictionariesLoader().enableAlwaysLoadEverything(true);
}
dictionaries_xmls = global_context->getExternalDictionariesLoader().addConfigRepository(
std::make_unique<ExternalLoaderXMLConfigRepository>(config(), "dictionaries_config"));
models_xmls = global_context->getExternalModelsLoader().addConfigRepository(
std::make_unique<ExternalLoaderXMLConfigRepository>(config(), "models_config"));
}
catch (...)
{
LOG_ERROR(log, "Caught exception while loading dictionaries.");
throw;
}
std::vector<std::unique_ptr<MetricsTransmitter>> metrics_transmitters;
for (const auto & graphite_key : DB::getMultipleKeysFromConfig(config(), "", "graphite"))
{

View File

@ -892,6 +892,19 @@
<!-- Controls how much ON CLUSTER queries can be run simultaneously. -->
<!-- <pool_size>1</pool_size> -->
<!--
Cleanup settings (active tasks will not be removed)
-->
<!-- Controls task TTL (default 1 week) -->
<!-- <task_max_lifetime>604800</task_max_lifetime> -->
<!-- Controls how often cleanup should be performed (in seconds) -->
<!-- <cleanup_delay_period>60</cleanup_delay_period> -->
<!-- Controls how many tasks could be in the queue -->
<!-- <max_tasks_in_queue>1000</max_tasks_in_queue> -->
</distributed_ddl>
<!-- Settings to fine tune MergeTree tables. See documentation in source code, in MergeTreeSettings.h -->

View File

@ -106,6 +106,7 @@ AggregateFunctionPtr AggregateFunctionFactory::getImpl(
bool has_null_arguments) const
{
String name = getAliasToOrName(name_param);
bool is_case_insensitive = false;
Value found;
/// Find by exact match.
@ -115,7 +116,10 @@ AggregateFunctionPtr AggregateFunctionFactory::getImpl(
}
if (auto jt = case_insensitive_aggregate_functions.find(Poco::toLower(name)); jt != case_insensitive_aggregate_functions.end())
{
found = jt->second;
is_case_insensitive = true;
}
const Context * query_context = nullptr;
if (CurrentThread::isInitialized())
@ -126,7 +130,8 @@ AggregateFunctionPtr AggregateFunctionFactory::getImpl(
out_properties = found.properties;
if (query_context && query_context->getSettingsRef().log_queries)
query_context->addQueryFactoriesInfo(Context::QueryLogFactories::AggregateFunction, name);
query_context->addQueryFactoriesInfo(
Context::QueryLogFactories::AggregateFunction, is_case_insensitive ? Poco::toLower(name) : name);
/// The case when aggregate function should return NULL on NULL arguments. This case is handled in "get" method.
if (!out_properties.returns_default_when_only_null && has_null_arguments)

View File

@ -118,6 +118,8 @@ public:
WhichDataType value_type_to_check(value_type);
/// Do not promote decimal because of implementation issues of this function design
/// Currently we cannot get result column type in case of decimal we cannot get decimal scale
/// in method void insertResultInto(AggregateDataPtr __restrict place, IColumn & to, Arena *) const override
/// If we decide to make this function more efficient we should promote decimal type during summ
if (value_type_to_check.isDecimal())
result_type = value_type_without_nullable;

View File

@ -56,7 +56,7 @@ class ReservoirSamplerDeterministic
{
bool good(const UInt32 hash)
{
return hash == ((hash >> skip_degree) << skip_degree);
return !(hash & skip_mask);
}
public:
@ -135,11 +135,8 @@ public:
throw Poco::Exception("Cannot merge ReservoirSamplerDeterministic's with different max sample size");
sorted = false;
if (b.skip_degree > skip_degree)
{
skip_degree = b.skip_degree;
thinOut();
}
if (skip_degree < b.skip_degree)
setSkipDegree(b.skip_degree);
for (const auto & sample : b.samples)
if (good(sample.second))
@ -184,22 +181,39 @@ private:
size_t total_values = 0; /// How many values were inserted (regardless if they remain in sample or not).
bool sorted = false;
Array samples;
UInt8 skip_degree = 0; /// The number N determining that we save only one per 2^N elements in average.
/// The number N determining that we store only one per 2^N elements in average.
UInt8 skip_degree = 0;
/// skip_mask is calculated as (2 ^ skip_degree - 1). We store an element only if (hash & skip_mask) == 0.
/// For example, if skip_degree==0 then skip_mask==0 means we store each element;
/// if skip_degree==1 then skip_mask==0b0001 means we store one per 2 elements in average;
/// if skip_degree==4 then skip_mask==0b1111 means we store one per 16 elements in average.
UInt32 skip_mask = 0;
void insertImpl(const T & v, const UInt32 hash)
{
/// Make a room for plus one element.
while (samples.size() >= max_sample_size)
{
++skip_degree;
if (skip_degree > detail::MAX_SKIP_DEGREE)
throw DB::Exception{"skip_degree exceeds maximum value", DB::ErrorCodes::MEMORY_LIMIT_EXCEEDED};
thinOut();
}
setSkipDegree(skip_degree + 1);
samples.emplace_back(v, hash);
}
void setSkipDegree(UInt8 skip_degree_)
{
if (skip_degree_ == skip_degree)
return;
if (skip_degree_ > detail::MAX_SKIP_DEGREE)
throw DB::Exception{"skip_degree exceeds maximum value", DB::ErrorCodes::MEMORY_LIMIT_EXCEEDED};
skip_degree = skip_degree_;
if (skip_degree == detail::MAX_SKIP_DEGREE)
skip_mask = static_cast<UInt32>(-1);
else
skip_mask = (1 << skip_degree) - 1;
thinOut();
}
void thinOut()
{
samples.resize(std::distance(samples.begin(),

View File

@ -101,8 +101,8 @@ endif()
list (APPEND clickhouse_common_io_sources ${CONFIG_BUILD})
list (APPEND clickhouse_common_io_headers ${CONFIG_VERSION} ${CONFIG_COMMON})
list (APPEND dbms_sources Functions/IFunction.cpp Functions/FunctionFactory.cpp Functions/FunctionHelpers.cpp Functions/extractTimeZoneFromFunctionArguments.cpp Functions/replicate.cpp)
list (APPEND dbms_headers Functions/IFunctionImpl.h Functions/FunctionFactory.h Functions/FunctionHelpers.h Functions/extractTimeZoneFromFunctionArguments.h Functions/replicate.h)
list (APPEND dbms_sources Functions/IFunction.cpp Functions/FunctionFactory.cpp Functions/FunctionHelpers.cpp Functions/extractTimeZoneFromFunctionArguments.cpp Functions/replicate.cpp Functions/FunctionsLogical.cpp)
list (APPEND dbms_headers Functions/IFunctionImpl.h Functions/FunctionFactory.h Functions/FunctionHelpers.h Functions/extractTimeZoneFromFunctionArguments.h Functions/replicate.h Functions/FunctionsLogical.h)
list (APPEND dbms_sources
AggregateFunctions/AggregateFunctionFactory.cpp

View File

@ -109,6 +109,8 @@ void Connection::connect(const ConnectionTimeouts & timeouts)
}
in = std::make_shared<ReadBufferFromPocoSocket>(*socket);
in->setAsyncCallback(std::move(async_callback));
out = std::make_shared<WriteBufferFromPocoSocket>(*socket);
connected = true;
@ -753,15 +755,8 @@ std::optional<UInt64> Connection::checkPacket(size_t timeout_microseconds)
}
Packet Connection::receivePacket(std::function<void(Poco::Net::Socket &)> async_callback)
Packet Connection::receivePacket()
{
in->setAsyncCallback(std::move(async_callback));
SCOPE_EXIT({
/// disconnect() will reset "in".
if (in)
in->setAsyncCallback({});
});
try
{
Packet res;

View File

@ -27,7 +27,6 @@
#include <atomic>
#include <optional>
namespace DB
{
@ -175,8 +174,7 @@ public:
std::optional<UInt64> checkPacket(size_t timeout_microseconds = 0);
/// Receive packet from server.
/// Each time read blocks and async_callback is set, it will be called. You can poll socket inside it.
Packet receivePacket(std::function<void(Poco::Net::Socket &)> async_callback = {});
Packet receivePacket();
/// If not connected yet, or if connection is broken - then connect. If cannot connect - throw an exception.
void forceConnected(const ConnectionTimeouts & timeouts);
@ -195,6 +193,16 @@ public:
size_t outBytesCount() const { return out ? out->count() : 0; }
size_t inBytesCount() const { return in ? in->count() : 0; }
Poco::Net::Socket * getSocket() { return socket.get(); }
/// Each time read from socket blocks and async_callback is set, it will be called. You can poll socket inside it.
void setAsyncCallback(AsyncCallback async_callback_)
{
async_callback = std::move(async_callback_);
if (in)
in->setAsyncCallback(std::move(async_callback));
}
private:
String host;
UInt16 port;
@ -282,6 +290,8 @@ private:
LoggerWrapper log_wrapper;
AsyncCallback async_callback = {};
void connect(const ConnectionTimeouts & timeouts);
void sendHello();
void receiveHello();
@ -307,4 +317,20 @@ private:
[[noreturn]] void throwUnexpectedPacket(UInt64 packet_type, const char * expected) const;
};
class AsyncCallbackSetter
{
public:
AsyncCallbackSetter(Connection * connection_, AsyncCallback async_callback) : connection(connection_)
{
connection->setAsyncCallback(std::move(async_callback));
}
~AsyncCallbackSetter()
{
connection->setAsyncCallback({});
}
private:
Connection * connection;
};
}

View File

@ -0,0 +1,239 @@
#include <Client/ConnectionEstablisher.h>
#include <Common/quoteString.h>
#include <Common/ProfileEvents.h>
namespace ProfileEvents
{
extern const Event DistributedConnectionMissingTable;
extern const Event DistributedConnectionStaleReplica;
}
namespace DB
{
namespace ErrorCodes
{
extern const int ATTEMPT_TO_READ_AFTER_EOF;
extern const int NETWORK_ERROR;
extern const int SOCKET_TIMEOUT;
}
ConnectionEstablisher::ConnectionEstablisher(
IConnectionPool * pool_,
const ConnectionTimeouts * timeouts_,
const Settings * settings_,
Poco::Logger * log_,
const QualifiedTableName * table_to_check_)
: pool(pool_), timeouts(timeouts_), settings(settings_), log(log_), table_to_check(table_to_check_), is_finished(false)
{
}
void ConnectionEstablisher::run(ConnectionEstablisher::TryResult & result, std::string & fail_message)
{
is_finished = false;
SCOPE_EXIT(is_finished = true);
try
{
result.entry = pool->get(*timeouts, settings, /* force_connected = */ false);
AsyncCallbackSetter async_setter(&*result.entry, std::move(async_callback));
UInt64 server_revision = 0;
if (table_to_check)
server_revision = result.entry->getServerRevision(*timeouts);
if (!table_to_check || server_revision < DBMS_MIN_REVISION_WITH_TABLES_STATUS)
{
result.entry->forceConnected(*timeouts);
result.is_usable = true;
result.is_up_to_date = true;
return;
}
/// Only status of the remote table corresponding to the Distributed table is taken into account.
/// TODO: request status for joined tables also.
TablesStatusRequest status_request;
status_request.tables.emplace(*table_to_check);
TablesStatusResponse status_response = result.entry->getTablesStatus(*timeouts, status_request);
auto table_status_it = status_response.table_states_by_id.find(*table_to_check);
if (table_status_it == status_response.table_states_by_id.end())
{
const char * message_pattern = "There is no table {}.{} on server: {}";
fail_message = fmt::format(message_pattern, backQuote(table_to_check->database), backQuote(table_to_check->table), result.entry->getDescription());
LOG_WARNING(log, fail_message);
ProfileEvents::increment(ProfileEvents::DistributedConnectionMissingTable);
return;
}
result.is_usable = true;
UInt64 max_allowed_delay = settings ? UInt64(settings->max_replica_delay_for_distributed_queries) : 0;
if (!max_allowed_delay)
{
result.is_up_to_date = true;
return;
}
UInt32 delay = table_status_it->second.absolute_delay;
if (delay < max_allowed_delay)
result.is_up_to_date = true;
else
{
result.is_up_to_date = false;
result.staleness = delay;
LOG_TRACE(log, "Server {} has unacceptable replica delay for table {}.{}: {}", result.entry->getDescription(), table_to_check->database, table_to_check->table, delay);
ProfileEvents::increment(ProfileEvents::DistributedConnectionStaleReplica);
}
}
catch (const Exception & e)
{
if (e.code() != ErrorCodes::NETWORK_ERROR && e.code() != ErrorCodes::SOCKET_TIMEOUT
&& e.code() != ErrorCodes::ATTEMPT_TO_READ_AFTER_EOF)
throw;
fail_message = getCurrentExceptionMessage(/* with_stacktrace = */ false);
if (!result.entry.isNull())
{
result.entry->disconnect();
result.reset();
}
}
}
#if defined(OS_LINUX)
ConnectionEstablisherAsync::ConnectionEstablisherAsync(
IConnectionPool * pool_,
const ConnectionTimeouts * timeouts_,
const Settings * settings_,
Poco::Logger * log_,
const QualifiedTableName * table_to_check_)
: connection_establisher(pool_, timeouts_, settings_, log_, table_to_check_)
{
epoll.add(receive_timeout.getDescriptor());
}
void ConnectionEstablisherAsync::Routine::ReadCallback::operator()(int fd, const Poco::Timespan & timeout, const std::string &)
{
/// Check if it's the first time and we need to add socket fd to epoll.
if (connection_establisher_async.socket_fd == -1)
{
connection_establisher_async.epoll.add(fd);
connection_establisher_async.socket_fd = fd;
}
connection_establisher_async.receive_timeout.setRelative(timeout);
fiber = std::move(fiber).resume();
connection_establisher_async.receive_timeout.reset();
}
Fiber ConnectionEstablisherAsync::Routine::operator()(Fiber && sink)
{
try
{
connection_establisher_async.connection_establisher.setAsyncCallback(ReadCallback{connection_establisher_async, sink});
connection_establisher_async.connection_establisher.run(connection_establisher_async.result, connection_establisher_async.fail_message);
}
catch (const boost::context::detail::forced_unwind &)
{
/// This exception is thrown by fiber implementation in case if fiber is being deleted but hasn't exited
/// It should not be caught or it will segfault.
/// Other exceptions must be caught
throw;
}
catch (...)
{
connection_establisher_async.exception = std::current_exception();
}
return std::move(sink);
}
std::variant<int, ConnectionEstablisher::TryResult> ConnectionEstablisherAsync::resume()
{
if (!fiber_created)
{
reset();
fiber = boost::context::fiber(std::allocator_arg_t(), fiber_stack, Routine{*this});
fiber_created = true;
} else if (!checkReceiveTimeout())
return result;
fiber = std::move(fiber).resume();
if (exception)
std::rethrow_exception(std::move(exception));
if (connection_establisher.isFinished())
{
destroyFiber();
return result;
}
return epoll.getFileDescriptor();
}
bool ConnectionEstablisherAsync::checkReceiveTimeout()
{
bool is_socket_ready = false;
bool is_receive_timeout_alarmed = false;
epoll_event events[2];
events[0].data.fd = events[1].data.fd = -1;
size_t ready_count = epoll.getManyReady(2, events, false);
for (size_t i = 0; i != ready_count; ++i)
{
if (events[i].data.fd == socket_fd)
is_socket_ready = true;
if (events[i].data.fd == receive_timeout.getDescriptor())
is_receive_timeout_alarmed = true;
}
if (is_receive_timeout_alarmed && !is_socket_ready)
{
destroyFiber();
/// In not async case this exception would be thrown and caught in ConnectionEstablisher::run,
/// but in async case we process timeout outside and cannot throw exception. So, we just save fail message.
fail_message = "Timeout exceeded while reading from socket (" + result.entry->getDescription() + ")";
epoll.remove(socket_fd);
resetResult();
return false;
}
return true;
}
void ConnectionEstablisherAsync::cancel()
{
destroyFiber();
reset();
}
void ConnectionEstablisherAsync::reset()
{
resetResult();
fail_message.clear();
socket_fd = -1;
}
void ConnectionEstablisherAsync::resetResult()
{
if (!result.entry.isNull())
{
result.entry->disconnect();
result.reset();
}
}
void ConnectionEstablisherAsync::destroyFiber()
{
Fiber to_destroy = std::move(fiber);
fiber_created = false;
}
#endif
}

View File

@ -0,0 +1,131 @@
#pragma once
#include <variant>
#include <Common/Epoll.h>
#include <Common/Fiber.h>
#include <Common/FiberStack.h>
#include <Common/TimerDescriptor.h>
#include <Common/PoolWithFailoverBase.h>
#include <Client/ConnectionPool.h>
namespace DB
{
/// Class for establishing connection to the replica. It supports setting up
/// an async callback that will be called when reading from socket blocks.
class ConnectionEstablisher
{
public:
using TryResult = PoolWithFailoverBase<IConnectionPool>::TryResult;
ConnectionEstablisher(IConnectionPool * pool_,
const ConnectionTimeouts * timeouts_,
const Settings * settings_,
Poco::Logger * log,
const QualifiedTableName * table_to_check = nullptr);
/// Establish connection and save it in result, write possible exception message in fail_message.
void run(TryResult & result, std::string & fail_message);
/// Set async callback that will be called when reading from socket blocks.
void setAsyncCallback(AsyncCallback async_callback_) { async_callback = std::move(async_callback_); }
bool isFinished() const { return is_finished; }
private:
IConnectionPool * pool;
const ConnectionTimeouts * timeouts;
const Settings * settings;
Poco::Logger * log;
const QualifiedTableName * table_to_check;
bool is_finished;
AsyncCallback async_callback = {};
};
#if defined(OS_LINUX)
/// Class for nonblocking establishing connection to the replica.
/// It runs establishing connection process in fiber and sets special
/// read callback which is called when reading from socket blocks.
/// When read callback is called, socket and receive timeout are added in epoll
/// and execution returns to the main program.
/// So, you can poll this epoll file descriptor to determine when to resume.
class ConnectionEstablisherAsync
{
public:
using TryResult = PoolWithFailoverBase<IConnectionPool>::TryResult;
ConnectionEstablisherAsync(IConnectionPool * pool_,
const ConnectionTimeouts * timeouts_,
const Settings * settings_,
Poco::Logger * log_,
const QualifiedTableName * table_to_check = nullptr);
/// Resume establishing connection. If the process was not finished,
/// return file descriptor (you can add it in epoll and poll it,
/// when this fd become ready, call resume again),
/// if the process was failed or finished, return it's result,
std::variant<int, TryResult> resume();
/// Cancel establishing connections. Fiber will be destroyed,
/// class will be set in initial stage.
void cancel();
TryResult getResult() const { return result; }
const std::string & getFailMessage() const { return fail_message; }
private:
/// When epoll file descriptor is ready, check if it's an expired timeout.
/// Return false if receive timeout expired and socket is not ready, return true otherwise.
bool checkReceiveTimeout();
struct Routine
{
ConnectionEstablisherAsync & connection_establisher_async;
struct ReadCallback
{
ConnectionEstablisherAsync & connection_establisher_async;
Fiber & fiber;
void operator()(int fd, const Poco::Timespan & timeout, const std::string &);
};
Fiber operator()(Fiber && sink);
};
void reset();
void resetResult();
void destroyFiber();
ConnectionEstablisher connection_establisher;
TryResult result;
std::string fail_message;
Fiber fiber;
FiberStack fiber_stack;
/// We use timer descriptor for checking socket receive timeout.
TimerDescriptor receive_timeout;
/// In read callback we add socket file descriptor and timer descriptor with receive timeout
/// in epoll, so we can return epoll file descriptor outside for polling.
Epoll epoll;
int socket_fd = -1;
std::string socket_description;
/// If and exception occurred in fiber resume, we save it and rethrow.
std::exception_ptr exception;
bool fiber_created = false;
};
#endif
}

View File

@ -1,4 +1,5 @@
#include <Client/ConnectionPoolWithFailover.h>
#include <Client/ConnectionEstablisher.h>
#include <Poco/Net/NetException.h>
#include <Poco/Net/DNS.h>
@ -23,9 +24,6 @@ namespace DB
namespace ErrorCodes
{
extern const int ATTEMPT_TO_READ_AFTER_EOF;
extern const int NETWORK_ERROR;
extern const int SOCKET_TIMEOUT;
extern const int LOGICAL_ERROR;
}
@ -172,31 +170,12 @@ std::vector<ConnectionPoolWithFailover::TryResult> ConnectionPoolWithFailover::g
return getManyImpl(settings, pool_mode, try_get_entry);
}
std::vector<ConnectionPoolWithFailover::TryResult> ConnectionPoolWithFailover::getManyImpl(
const Settings * settings,
PoolMode pool_mode,
const TryGetEntryFunc & try_get_entry)
ConnectionPoolWithFailover::Base::GetPriorityFunc ConnectionPoolWithFailover::makeGetPriorityFunc(const Settings * settings)
{
size_t min_entries = (settings && settings->skip_unavailable_shards) ? 0 : 1;
size_t max_tries = (settings ?
size_t{settings->connections_with_failover_max_tries} :
size_t{DBMS_CONNECTION_POOL_WITH_FAILOVER_DEFAULT_MAX_TRIES});
size_t max_entries;
if (pool_mode == PoolMode::GET_ALL)
{
min_entries = nested_pools.size();
max_entries = nested_pools.size();
}
else if (pool_mode == PoolMode::GET_ONE)
max_entries = 1;
else if (pool_mode == PoolMode::GET_MANY)
max_entries = settings ? size_t(settings->max_parallel_replicas) : 1;
else
throw DB::Exception("Unknown pool allocation mode", DB::ErrorCodes::LOGICAL_ERROR);
size_t offset = 0;
if (settings)
offset = settings->load_balancing_first_offset % nested_pools.size();
GetPriorityFunc get_priority;
switch (settings ? LoadBalancing(settings->load_balancing) : default_load_balancing)
{
@ -225,6 +204,33 @@ std::vector<ConnectionPoolWithFailover::TryResult> ConnectionPoolWithFailover::g
break;
}
return get_priority;
}
std::vector<ConnectionPoolWithFailover::TryResult> ConnectionPoolWithFailover::getManyImpl(
const Settings * settings,
PoolMode pool_mode,
const TryGetEntryFunc & try_get_entry)
{
size_t min_entries = (settings && settings->skip_unavailable_shards) ? 0 : 1;
size_t max_tries = (settings ?
size_t{settings->connections_with_failover_max_tries} :
size_t{DBMS_CONNECTION_POOL_WITH_FAILOVER_DEFAULT_MAX_TRIES});
size_t max_entries;
if (pool_mode == PoolMode::GET_ALL)
{
min_entries = nested_pools.size();
max_entries = nested_pools.size();
}
else if (pool_mode == PoolMode::GET_ONE)
max_entries = 1;
else if (pool_mode == PoolMode::GET_MANY)
max_entries = settings ? size_t(settings->max_parallel_replicas) : 1;
else
throw DB::Exception("Unknown pool allocation mode", DB::ErrorCodes::LOGICAL_ERROR);
GetPriorityFunc get_priority = makeGetPriorityFunc(settings);
UInt64 max_ignored_errors = settings ? settings->distributed_replica_max_ignored_errors.value : 0;
bool fallback_to_stale_replicas = settings ? settings->fallback_to_stale_replicas_for_distributed_queries.value : true;
@ -241,77 +247,17 @@ ConnectionPoolWithFailover::tryGetEntry(
const Settings * settings,
const QualifiedTableName * table_to_check)
{
ConnectionEstablisher connection_establisher(&pool, &timeouts, settings, log, table_to_check);
TryResult result;
try
{
result.entry = pool.get(timeouts, settings, /* force_connected = */ false);
UInt64 server_revision = 0;
if (table_to_check)
server_revision = result.entry->getServerRevision(timeouts);
if (!table_to_check || server_revision < DBMS_MIN_REVISION_WITH_TABLES_STATUS)
{
result.entry->forceConnected(timeouts);
result.is_usable = true;
result.is_up_to_date = true;
return result;
}
/// Only status of the remote table corresponding to the Distributed table is taken into account.
/// TODO: request status for joined tables also.
TablesStatusRequest status_request;
status_request.tables.emplace(*table_to_check);
TablesStatusResponse status_response = result.entry->getTablesStatus(timeouts, status_request);
auto table_status_it = status_response.table_states_by_id.find(*table_to_check);
if (table_status_it == status_response.table_states_by_id.end())
{
const char * message_pattern = "There is no table {}.{} on server: {}";
fail_message = fmt::format(message_pattern, backQuote(table_to_check->database), backQuote(table_to_check->table), result.entry->getDescription());
LOG_WARNING(log, fail_message);
ProfileEvents::increment(ProfileEvents::DistributedConnectionMissingTable);
return result;
}
result.is_usable = true;
UInt64 max_allowed_delay = settings ? UInt64(settings->max_replica_delay_for_distributed_queries) : 0;
if (!max_allowed_delay)
{
result.is_up_to_date = true;
return result;
}
UInt32 delay = table_status_it->second.absolute_delay;
if (delay < max_allowed_delay)
result.is_up_to_date = true;
else
{
result.is_up_to_date = false;
result.staleness = delay;
LOG_TRACE(log, "Server {} has unacceptable replica delay for table {}.{}: {}", result.entry->getDescription(), table_to_check->database, table_to_check->table, delay);
ProfileEvents::increment(ProfileEvents::DistributedConnectionStaleReplica);
}
}
catch (const Exception & e)
{
if (e.code() != ErrorCodes::NETWORK_ERROR && e.code() != ErrorCodes::SOCKET_TIMEOUT
&& e.code() != ErrorCodes::ATTEMPT_TO_READ_AFTER_EOF)
throw;
fail_message = getCurrentExceptionMessage(/* with_stacktrace = */ false);
if (!result.entry.isNull())
{
result.entry->disconnect();
result.reset();
}
}
connection_establisher.run(result, fail_message);
return result;
}
std::vector<ConnectionPoolWithFailover::Base::ShuffledPool> ConnectionPoolWithFailover::getShuffledPools(const Settings * settings)
{
GetPriorityFunc get_priority = makeGetPriorityFunc(settings);
UInt64 max_ignored_errors = settings ? settings->distributed_replica_max_ignored_errors.value : 0;
return Base::getShuffledPools(max_ignored_errors, get_priority);
}
}

View File

@ -80,6 +80,15 @@ public:
using Status = std::vector<NestedPoolStatus>;
Status getStatus() const;
std::vector<Base::ShuffledPool> getShuffledPools(const Settings * settings);
size_t getMaxErrorCup() const { return Base::max_error_cap; }
void updateSharedError(std::vector<ShuffledPool> & shuffled_pools)
{
Base::updateSharedErrorCounts(shuffled_pools);
}
private:
/// Get the values of relevant settings and call Base::getMany()
std::vector<TryResult> getManyImpl(
@ -97,6 +106,8 @@ private:
const Settings * settings,
const QualifiedTableName * table_to_check = nullptr);
GetPriorityFunc makeGetPriorityFunc(const Settings * settings);
private:
std::vector<size_t> hostname_differences; /// Distances from name of this host to the names of hosts of pools.
size_t last_used = 0; /// Last used for round_robin policy.

View File

@ -0,0 +1,524 @@
#if defined(OS_LINUX)
#include <Client/HedgedConnections.h>
#include <Interpreters/ClientInfo.h>
namespace DB
{
namespace ErrorCodes
{
extern const int MISMATCH_REPLICAS_DATA_SOURCES;
extern const int LOGICAL_ERROR;
extern const int SOCKET_TIMEOUT;
extern const int ALL_CONNECTION_TRIES_FAILED;
}
HedgedConnections::HedgedConnections(
const ConnectionPoolWithFailoverPtr & pool_,
const Settings & settings_,
const ConnectionTimeouts & timeouts_,
const ThrottlerPtr & throttler_,
PoolMode pool_mode,
std::shared_ptr<QualifiedTableName> table_to_check_)
: hedged_connections_factory(pool_, &settings_, timeouts_, table_to_check_)
, settings(settings_)
, throttler(throttler_)
{
std::vector<Connection *> connections = hedged_connections_factory.getManyConnections(pool_mode);
if (connections.empty())
return;
offset_states.reserve(connections.size());
for (size_t i = 0; i != connections.size(); ++i)
{
offset_states.emplace_back();
offset_states[i].replicas.emplace_back(connections[i]);
offset_states[i].active_connection_count = 1;
ReplicaState & replica = offset_states[i].replicas.back();
replica.connection->setThrottler(throttler_);
epoll.add(replica.packet_receiver->getFileDescriptor());
fd_to_replica_location[replica.packet_receiver->getFileDescriptor()] = ReplicaLocation{i, 0};
epoll.add(replica.change_replica_timeout.getDescriptor());
timeout_fd_to_replica_location[replica.change_replica_timeout.getDescriptor()] = ReplicaLocation{i, 0};
}
active_connection_count = connections.size();
offsets_with_disabled_changing_replica = 0;
pipeline_for_new_replicas.add([throttler_](ReplicaState & replica_) { replica_.connection->setThrottler(throttler_); });
}
void HedgedConnections::Pipeline::add(std::function<void(ReplicaState & replica)> send_function)
{
pipeline.push_back(send_function);
}
void HedgedConnections::Pipeline::run(ReplicaState & replica)
{
for (auto & send_func : pipeline)
send_func(replica);
}
void HedgedConnections::sendScalarsData(Scalars & data)
{
std::lock_guard lock(cancel_mutex);
if (!sent_query)
throw Exception("Cannot send scalars data: query not yet sent.", ErrorCodes::LOGICAL_ERROR);
auto send_scalars_data = [&data](ReplicaState & replica) { replica.connection->sendScalarsData(data); };
for (auto & offset_state : offset_states)
for (auto & replica : offset_state.replicas)
if (replica.connection)
send_scalars_data(replica);
pipeline_for_new_replicas.add(send_scalars_data);
}
void HedgedConnections::sendExternalTablesData(std::vector<ExternalTablesData> & data)
{
std::lock_guard lock(cancel_mutex);
if (!sent_query)
throw Exception("Cannot send external tables data: query not yet sent.", ErrorCodes::LOGICAL_ERROR);
if (data.size() != size())
throw Exception("Mismatch between replicas and data sources", ErrorCodes::MISMATCH_REPLICAS_DATA_SOURCES);
auto send_external_tables_data = [&data](ReplicaState & replica) { replica.connection->sendExternalTablesData(data[0]); };
for (auto & offset_state : offset_states)
for (auto & replica : offset_state.replicas)
if (replica.connection)
send_external_tables_data(replica);
pipeline_for_new_replicas.add(send_external_tables_data);
}
void HedgedConnections::sendIgnoredPartUUIDs(const std::vector<UUID> & uuids)
{
std::lock_guard lock(cancel_mutex);
if (sent_query)
throw Exception("Cannot send uuids after query is sent.", ErrorCodes::LOGICAL_ERROR);
auto send_ignored_part_uuids = [&uuids](ReplicaState & replica) { replica.connection->sendIgnoredPartUUIDs(uuids); };
for (auto & offset_state : offset_states)
for (auto & replica : offset_state.replicas)
if (replica.connection)
send_ignored_part_uuids(replica);
pipeline_for_new_replicas.add(send_ignored_part_uuids);
}
void HedgedConnections::sendQuery(
const ConnectionTimeouts & timeouts,
const String & query,
const String & query_id,
UInt64 stage,
const ClientInfo & client_info,
bool with_pending_data)
{
std::lock_guard lock(cancel_mutex);
if (sent_query)
throw Exception("Query already sent.", ErrorCodes::LOGICAL_ERROR);
for (auto & offset_state : offset_states)
{
for (auto & replica : offset_state.replicas)
{
if (replica.connection->getServerRevision(timeouts) < DBMS_MIN_REVISION_WITH_CURRENT_AGGREGATION_VARIANT_SELECTION_METHOD)
{
disable_two_level_aggregation = true;
break;
}
}
if (disable_two_level_aggregation)
break;
}
if (!disable_two_level_aggregation)
{
/// Tell hedged_connections_factory to skip replicas that doesn't support two-level aggregation.
hedged_connections_factory.skipReplicasWithTwoLevelAggregationIncompatibility();
}
auto send_query = [this, timeouts, query, query_id, stage, client_info, with_pending_data](ReplicaState & replica)
{
Settings modified_settings = settings;
if (disable_two_level_aggregation)
{
/// Disable two-level aggregation due to version incompatibility.
modified_settings.group_by_two_level_threshold = 0;
modified_settings.group_by_two_level_threshold_bytes = 0;
}
if (offset_states.size() > 1)
{
modified_settings.parallel_replicas_count = offset_states.size();
modified_settings.parallel_replica_offset = fd_to_replica_location[replica.packet_receiver->getFileDescriptor()].offset;
}
replica.connection->sendQuery(timeouts, query, query_id, stage, &modified_settings, &client_info, with_pending_data);
replica.change_replica_timeout.setRelative(timeouts.receive_data_timeout);
};
for (auto & offset_status : offset_states)
for (auto & replica : offset_status.replicas)
send_query(replica);
pipeline_for_new_replicas.add(send_query);
sent_query = true;
}
void HedgedConnections::disconnect()
{
std::lock_guard lock(cancel_mutex);
for (auto & offset_status : offset_states)
for (auto & replica : offset_status.replicas)
if (replica.connection)
finishProcessReplica(replica, true);
if (hedged_connections_factory.hasEventsInProcess())
{
if (hedged_connections_factory.numberOfProcessingReplicas() > 0)
epoll.remove(hedged_connections_factory.getFileDescriptor());
hedged_connections_factory.stopChoosingReplicas();
}
}
std::string HedgedConnections::dumpAddresses() const
{
std::lock_guard lock(cancel_mutex);
std::string addresses;
bool is_first = true;
for (const auto & offset_state : offset_states)
{
for (const auto & replica : offset_state.replicas)
{
if (replica.connection)
{
addresses += (is_first ? "" : "; ") + replica.connection->getDescription();
is_first = false;
}
}
}
return addresses;
}
void HedgedConnections::sendCancel()
{
std::lock_guard lock(cancel_mutex);
if (!sent_query || cancelled)
throw Exception("Cannot cancel. Either no query sent or already cancelled.", ErrorCodes::LOGICAL_ERROR);
for (auto & offset_status : offset_states)
for (auto & replica : offset_status.replicas)
if (replica.connection)
replica.connection->sendCancel();
cancelled = true;
}
Packet HedgedConnections::drain()
{
std::lock_guard lock(cancel_mutex);
if (!cancelled)
throw Exception("Cannot drain connections: cancel first.", ErrorCodes::LOGICAL_ERROR);
Packet res;
res.type = Protocol::Server::EndOfStream;
while (!epoll.empty())
{
ReplicaLocation location = getReadyReplicaLocation();
Packet packet = receivePacketFromReplica(location);
switch (packet.type)
{
case Protocol::Server::PartUUIDs:
case Protocol::Server::Data:
case Protocol::Server::Progress:
case Protocol::Server::ProfileInfo:
case Protocol::Server::Totals:
case Protocol::Server::Extremes:
case Protocol::Server::EndOfStream:
break;
case Protocol::Server::Exception:
default:
/// If we receive an exception or an unknown packet, we save it.
res = std::move(packet);
break;
}
}
return res;
}
Packet HedgedConnections::receivePacket()
{
std::lock_guard lock(cancel_mutex);
return receivePacketUnlocked({});
}
Packet HedgedConnections::receivePacketUnlocked(AsyncCallback async_callback)
{
if (!sent_query)
throw Exception("Cannot receive packets: no query sent.", ErrorCodes::LOGICAL_ERROR);
if (!hasActiveConnections())
throw Exception("No more packets are available.", ErrorCodes::LOGICAL_ERROR);
if (epoll.empty())
throw Exception("No pending events in epoll.", ErrorCodes::LOGICAL_ERROR);
ReplicaLocation location = getReadyReplicaLocation(std::move(async_callback));
return receivePacketFromReplica(location);
}
HedgedConnections::ReplicaLocation HedgedConnections::getReadyReplicaLocation(AsyncCallback async_callback)
{
/// Firstly, resume replica with the last received packet if it has pending data.
if (replica_with_last_received_packet)
{
ReplicaLocation location = replica_with_last_received_packet.value();
replica_with_last_received_packet.reset();
if (offset_states[location.offset].replicas[location.index].connection->hasReadPendingData() && resumePacketReceiver(location))
return location;
}
int event_fd;
while (true)
{
/// Get ready file descriptor from epoll and process it.
event_fd = getReadyFileDescriptor(async_callback);
if (event_fd == hedged_connections_factory.getFileDescriptor())
checkNewReplica();
else if (fd_to_replica_location.contains(event_fd))
{
ReplicaLocation location = fd_to_replica_location[event_fd];
if (resumePacketReceiver(location))
return location;
}
else if (timeout_fd_to_replica_location.contains(event_fd))
{
ReplicaLocation location = timeout_fd_to_replica_location[event_fd];
offset_states[location.offset].replicas[location.index].change_replica_timeout.reset();
offset_states[location.offset].replicas[location.index].is_change_replica_timeout_expired = true;
offset_states[location.offset].next_replica_in_process = true;
offsets_queue.push(location.offset);
startNewReplica();
}
else
throw Exception("Unknown event from epoll", ErrorCodes::LOGICAL_ERROR);
}
};
bool HedgedConnections::resumePacketReceiver(const HedgedConnections::ReplicaLocation & location)
{
ReplicaState & replica_state = offset_states[location.offset].replicas[location.index];
auto res = replica_state.packet_receiver->resume();
if (std::holds_alternative<Packet>(res))
{
last_received_packet = std::move(std::get<Packet>(res));
return true;
}
else if (std::holds_alternative<Poco::Timespan>(res))
{
finishProcessReplica(replica_state, true);
/// Check if there is no more active connections with the same offset and there is no new replica in process.
if (offset_states[location.offset].active_connection_count == 0 && !offset_states[location.offset].next_replica_in_process)
throw NetException("Receive timeout expired", ErrorCodes::SOCKET_TIMEOUT);
}
return false;
}
int HedgedConnections::getReadyFileDescriptor(AsyncCallback async_callback)
{
epoll_event event;
event.data.fd = -1;
size_t events_count = 0;
while (events_count == 0)
{
events_count = epoll.getManyReady(1, &event, false);
if (!events_count && async_callback)
async_callback(epoll.getFileDescriptor(), 0, epoll.getDescription());
}
return event.data.fd;
}
Packet HedgedConnections::receivePacketFromReplica(const ReplicaLocation & replica_location)
{
ReplicaState & replica = offset_states[replica_location.offset].replicas[replica_location.index];
Packet packet = std::move(last_received_packet);
switch (packet.type)
{
case Protocol::Server::Data:
/// If we received the first not empty data packet and still can change replica,
/// disable changing replica with this offset.
if (offset_states[replica_location.offset].can_change_replica && packet.block.rows() > 0)
disableChangingReplica(replica_location);
replica_with_last_received_packet = replica_location;
break;
case Protocol::Server::Progress:
/// Check if we have made some progress and still can change replica.
if (offset_states[replica_location.offset].can_change_replica && packet.progress.read_bytes > 0)
{
/// If we are allowed to change replica until the first data packet,
/// just restart timeout (if it hasn't expired yet). Otherwise disable changing replica with this offset.
if (settings.allow_changing_replica_until_first_data_packet && !replica.is_change_replica_timeout_expired)
replica.change_replica_timeout.setRelative(hedged_connections_factory.getConnectionTimeouts().receive_data_timeout);
else
disableChangingReplica(replica_location);
}
replica_with_last_received_packet = replica_location;
break;
case Protocol::Server::PartUUIDs:
case Protocol::Server::ProfileInfo:
case Protocol::Server::Totals:
case Protocol::Server::Extremes:
case Protocol::Server::Log:
replica_with_last_received_packet = replica_location;
break;
case Protocol::Server::EndOfStream:
finishProcessReplica(replica, false);
break;
case Protocol::Server::Exception:
default:
finishProcessReplica(replica, true);
break;
}
return packet;
}
void HedgedConnections::disableChangingReplica(const ReplicaLocation & replica_location)
{
/// Stop working with replicas, that are responsible for the same offset.
OffsetState & offset_state = offset_states[replica_location.offset];
offset_state.replicas[replica_location.index].change_replica_timeout.reset();
++offsets_with_disabled_changing_replica;
offset_state.can_change_replica = false;
for (size_t i = 0; i != offset_state.replicas.size(); ++i)
{
if (i != replica_location.index && offset_state.replicas[i].connection)
{
offset_state.replicas[i].connection->sendCancel();
finishProcessReplica(offset_state.replicas[i], true);
}
}
/// If we disabled changing replica with all offsets, we need to stop choosing new replicas.
if (hedged_connections_factory.hasEventsInProcess() && offsets_with_disabled_changing_replica == offset_states.size())
{
if (hedged_connections_factory.numberOfProcessingReplicas() > 0)
epoll.remove(hedged_connections_factory.getFileDescriptor());
hedged_connections_factory.stopChoosingReplicas();
}
}
void HedgedConnections::startNewReplica()
{
Connection * connection = nullptr;
HedgedConnectionsFactory::State state = hedged_connections_factory.startNewConnection(connection);
/// Check if we need to add hedged_connections_factory file descriptor to epoll.
if (state == HedgedConnectionsFactory::State::NOT_READY && hedged_connections_factory.numberOfProcessingReplicas() == 1)
epoll.add(hedged_connections_factory.getFileDescriptor());
processNewReplicaState(state, connection);
}
void HedgedConnections::checkNewReplica()
{
Connection * connection = nullptr;
HedgedConnectionsFactory::State state = hedged_connections_factory.waitForReadyConnections(connection);
processNewReplicaState(state, connection);
/// Check if we don't need to listen hedged_connections_factory file descriptor in epoll anymore.
if (hedged_connections_factory.numberOfProcessingReplicas() == 0)
epoll.remove(hedged_connections_factory.getFileDescriptor());
}
void HedgedConnections::processNewReplicaState(HedgedConnectionsFactory::State state, Connection * connection)
{
switch (state)
{
case HedgedConnectionsFactory::State::READY:
{
size_t offset = offsets_queue.front();
offsets_queue.pop();
offset_states[offset].replicas.emplace_back(connection);
++offset_states[offset].active_connection_count;
offset_states[offset].next_replica_in_process = false;
++active_connection_count;
ReplicaState & replica = offset_states[offset].replicas.back();
epoll.add(replica.packet_receiver->getFileDescriptor());
fd_to_replica_location[replica.packet_receiver->getFileDescriptor()] = ReplicaLocation{offset, offset_states[offset].replicas.size() - 1};
epoll.add(replica.change_replica_timeout.getDescriptor());
timeout_fd_to_replica_location[replica.change_replica_timeout.getDescriptor()] = ReplicaLocation{offset, offset_states[offset].replicas.size() - 1};
pipeline_for_new_replicas.run(replica);
break;
}
case HedgedConnectionsFactory::State::CANNOT_CHOOSE:
{
while (!offsets_queue.empty())
{
/// Check if there is no active replica with needed offsets.
if (offset_states[offsets_queue.front()].active_connection_count == 0)
throw Exception("Cannot find enough connections to replicas", ErrorCodes::ALL_CONNECTION_TRIES_FAILED);
offset_states[offsets_queue.front()].next_replica_in_process = false;
offsets_queue.pop();
}
break;
}
case HedgedConnectionsFactory::State::NOT_READY:
break;
}
}
void HedgedConnections::finishProcessReplica(ReplicaState & replica, bool disconnect)
{
replica.packet_receiver->cancel();
replica.change_replica_timeout.reset();
epoll.remove(replica.packet_receiver->getFileDescriptor());
--offset_states[fd_to_replica_location[replica.packet_receiver->getFileDescriptor()].offset].active_connection_count;
fd_to_replica_location.erase(replica.packet_receiver->getFileDescriptor());
epoll.remove(replica.change_replica_timeout.getDescriptor());
timeout_fd_to_replica_location.erase(replica.change_replica_timeout.getDescriptor());
--active_connection_count;
if (disconnect)
replica.connection->disconnect();
replica.connection = nullptr;
}
}
#endif

View File

@ -0,0 +1,189 @@
#pragma once
#if defined(OS_LINUX)
#include <functional>
#include <queue>
#include <optional>
#include <Client/HedgedConnectionsFactory.h>
#include <Client/IConnections.h>
#include <Client/PacketReceiver.h>
#include <Common/FiberStack.h>
#include <Common/Fiber.h>
namespace DB
{
/** To receive data from multiple replicas (connections) from one shard asynchronously.
* The principe of Hedged Connections is used to reduce tail latency:
* if we don't receive data from replica and there is no progress in query execution
* for a long time, we try to get new replica and send query to it,
* without cancelling working with previous replica. This class
* supports all functionality that MultipleConnections has.
*/
class HedgedConnections : public IConnections
{
public:
using PacketReceiverPtr = std::unique_ptr<PacketReceiver>;
struct ReplicaState
{
explicit ReplicaState(Connection * connection_) : connection(connection_), packet_receiver(std::make_unique<PacketReceiver>(connection_))
{
}
Connection * connection = nullptr;
PacketReceiverPtr packet_receiver;
TimerDescriptor change_replica_timeout;
bool is_change_replica_timeout_expired = false;
};
struct OffsetState
{
/// Replicas with the same offset.
std::vector<ReplicaState> replicas;
/// An amount of active replicas. When can_change_replica is false,
/// active_connection_count is always <= 1 (because we stopped working with
/// other replicas with the same offset)
size_t active_connection_count = 0;
bool can_change_replica = true;
/// This flag is true when this offset is in queue for
/// new replicas. It's needed to process receive timeout
/// (throw an exception when receive timeout expired and there is no
/// new replica in process)
bool next_replica_in_process = false;
};
/// We process events in epoll, so we need to determine replica by it's
/// file descriptor. We store map fd -> replica location. To determine
/// where replica is, we need a replica offset
/// (the same as parallel_replica_offset), and index, which is needed because
/// we can have many replicas with same offset (when receive_data_timeout has expired).
struct ReplicaLocation
{
size_t offset;
size_t index;
};
HedgedConnections(const ConnectionPoolWithFailoverPtr & pool_,
const Settings & settings_,
const ConnectionTimeouts & timeouts_,
const ThrottlerPtr & throttler,
PoolMode pool_mode,
std::shared_ptr<QualifiedTableName> table_to_check_ = nullptr);
void sendScalarsData(Scalars & data) override;
void sendExternalTablesData(std::vector<ExternalTablesData> & data) override;
void sendQuery(
const ConnectionTimeouts & timeouts,
const String & query,
const String & query_id,
UInt64 stage,
const ClientInfo & client_info,
bool with_pending_data) override;
Packet receivePacket() override;
Packet receivePacketUnlocked(AsyncCallback async_callback) override;
void disconnect() override;
void sendCancel() override;
void sendIgnoredPartUUIDs(const std::vector<UUID> & uuids) override;
Packet drain() override;
std::string dumpAddresses() const override;
size_t size() const override { return offset_states.size(); }
bool hasActiveConnections() const override { return active_connection_count > 0; }
private:
/// If we don't receive data from replica and there is no progress in query
/// execution for receive_data_timeout, we are trying to get new
/// replica and send query to it. Beside sending query, there are some
/// additional actions like sendScalarsData or sendExternalTablesData and we need
/// to perform these actions in the same order on the new replica. So, we will
/// save actions with replicas in pipeline to perform them on the new replicas.
class Pipeline
{
public:
void add(std::function<void(ReplicaState &)> send_function);
void run(ReplicaState & replica);
private:
std::vector<std::function<void(ReplicaState &)>> pipeline;
};
Packet receivePacketFromReplica(const ReplicaLocation & replica_location);
ReplicaLocation getReadyReplicaLocation(AsyncCallback async_callback = {});
bool resumePacketReceiver(const ReplicaLocation & replica_location);
void disableChangingReplica(const ReplicaLocation & replica_location);
void startNewReplica();
void checkNewReplica();
void processNewReplicaState(HedgedConnectionsFactory::State state, Connection * connection);
void finishProcessReplica(ReplicaState & replica, bool disconnect);
int getReadyFileDescriptor(AsyncCallback async_callback = {});
HedgedConnectionsFactory hedged_connections_factory;
/// All replicas in offset_states[offset] is responsible for process query
/// with setting parallel_replica_offset = offset. In common situations
/// replica_states[offset].replicas.size() = 1 (like in MultiplexedConnections).
std::vector<OffsetState> offset_states;
/// Map socket file descriptor to replica location (it's offset and index in OffsetState.replicas).
std::unordered_map<int, ReplicaLocation> fd_to_replica_location;
/// Map receive data timeout file descriptor to replica location.
std::unordered_map<int, ReplicaLocation> timeout_fd_to_replica_location;
/// A queue of offsets for new replicas. When we get RECEIVE_DATA_TIMEOUT from
/// the replica, we push it's offset to this queue and start trying to get
/// new replica.
std::queue<int> offsets_queue;
/// The current number of valid connections to the replicas of this shard.
size_t active_connection_count;
/// We count offsets in which we can't change replica anymore,
/// it's needed to cancel choosing new replicas when we
/// disabled replica changing in all offsets.
size_t offsets_with_disabled_changing_replica;
Pipeline pipeline_for_new_replicas;
/// New replica may not support two-level aggregation due to version incompatibility.
/// If we didn't disabled it, we need to skip this replica.
bool disable_two_level_aggregation = false;
/// We will save replica with last received packet
/// (except cases when packet type is EndOfStream or Exception)
/// to resume it's packet receiver when new packet is needed.
std::optional<ReplicaLocation> replica_with_last_received_packet;
Packet last_received_packet;
Epoll epoll;
const Settings & settings;
ThrottlerPtr throttler;
bool sent_query = false;
bool cancelled = false;
mutable std::mutex cancel_mutex;
};
}
#endif

Some files were not shown because too many files have changed in this diff Show More