Merge branch 'master' into persistent_nukeeper_snapshot_storage

This commit is contained in:
alesapin 2021-03-01 13:18:35 +03:00
commit 91bc4478d7
167 changed files with 3486 additions and 1983 deletions

View File

@ -322,8 +322,14 @@ public:
if (offset_is_whole_number_of_hours_everytime) if (offset_is_whole_number_of_hours_everytime)
return (UInt32(t) / 60) % 60; return (UInt32(t) / 60) % 60;
UInt32 date = find(t).date; /// To consider the DST changing situation within this day.
return (UInt32(t) - date) / 60 % 60; /// also make the special timezones with no whole hour offset such as 'Australia/Lord_Howe' been taken into account
DayNum index = findIndex(t);
UInt32 res = t - lut[index].date;
if (lut[index].amount_of_offset_change != 0 && t >= lut[index].date + lut[index].time_at_offset_change)
res += lut[index].amount_of_offset_change;
return res / 60 % 60;
} }
inline time_t toStartOfMinute(time_t t) const { return t / 60 * 60; } inline time_t toStartOfMinute(time_t t) const { return t / 60 * 60; }

View File

@ -1,5 +1,20 @@
#pragma once #pragma once
/// __has_feature supported only by clang.
///
/// But libcxx/libcxxabi overrides it to 0,
/// thus the checks for __has_feature will be wrong.
///
/// NOTE:
/// - __has_feature cannot be simply undefined,
/// since this will be broken if some C++ header will be included after
/// including <common/defines.h>
/// - it should not have fallback to 0,
/// since this may create false-positive detection (common problem)
#if defined(__clang__) && defined(__has_feature)
# define ch_has_feature __has_feature
#endif
#if defined(_MSC_VER) #if defined(_MSC_VER)
# if !defined(likely) # if !defined(likely)
# define likely(x) (x) # define likely(x) (x)
@ -32,8 +47,8 @@
/// Check for presence of address sanitizer /// Check for presence of address sanitizer
#if !defined(ADDRESS_SANITIZER) #if !defined(ADDRESS_SANITIZER)
# if defined(__has_feature) # if defined(ch_has_feature)
# if __has_feature(address_sanitizer) # if ch_has_feature(address_sanitizer)
# define ADDRESS_SANITIZER 1 # define ADDRESS_SANITIZER 1
# endif # endif
# elif defined(__SANITIZE_ADDRESS__) # elif defined(__SANITIZE_ADDRESS__)
@ -42,8 +57,8 @@
#endif #endif
#if !defined(THREAD_SANITIZER) #if !defined(THREAD_SANITIZER)
# if defined(__has_feature) # if defined(ch_has_feature)
# if __has_feature(thread_sanitizer) # if ch_has_feature(thread_sanitizer)
# define THREAD_SANITIZER 1 # define THREAD_SANITIZER 1
# endif # endif
# elif defined(__SANITIZE_THREAD__) # elif defined(__SANITIZE_THREAD__)
@ -52,8 +67,8 @@
#endif #endif
#if !defined(MEMORY_SANITIZER) #if !defined(MEMORY_SANITIZER)
# if defined(__has_feature) # if defined(ch_has_feature)
# if __has_feature(memory_sanitizer) # if ch_has_feature(memory_sanitizer)
# define MEMORY_SANITIZER 1 # define MEMORY_SANITIZER 1
# endif # endif
# elif defined(__MEMORY_SANITIZER__) # elif defined(__MEMORY_SANITIZER__)

View File

@ -15,11 +15,11 @@
#endif #endif
#define __msan_unpoison(X, Y) // NOLINT #define __msan_unpoison(X, Y) // NOLINT
#if defined(__has_feature) #if defined(ch_has_feature)
# if __has_feature(memory_sanitizer) # if ch_has_feature(memory_sanitizer)
# undef __msan_unpoison # undef __msan_unpoison
# include <sanitizer/msan_interface.h> # include <sanitizer/msan_interface.h>
# endif # endif
#endif #endif
#include <link.h> #include <link.h>

View File

@ -51,10 +51,11 @@ Connection::Connection(
const char* ssl_key, const char* ssl_key,
unsigned timeout, unsigned timeout,
unsigned rw_timeout, unsigned rw_timeout,
bool enable_local_infile) bool enable_local_infile,
bool opt_reconnect)
: Connection() : Connection()
{ {
connect(db, server, user, password, port, socket, ssl_ca, ssl_cert, ssl_key, timeout, rw_timeout, enable_local_infile); connect(db, server, user, password, port, socket, ssl_ca, ssl_cert, ssl_key, timeout, rw_timeout, enable_local_infile, opt_reconnect);
} }
Connection::Connection(const std::string & config_name) Connection::Connection(const std::string & config_name)
@ -80,7 +81,8 @@ void Connection::connect(const char* db,
const char * ssl_key, const char * ssl_key,
unsigned timeout, unsigned timeout,
unsigned rw_timeout, unsigned rw_timeout,
bool enable_local_infile) bool enable_local_infile,
bool opt_reconnect)
{ {
if (is_connected) if (is_connected)
disconnect(); disconnect();
@ -104,9 +106,8 @@ void Connection::connect(const char* db,
if (mysql_options(driver.get(), MYSQL_OPT_LOCAL_INFILE, &enable_local_infile_arg)) if (mysql_options(driver.get(), MYSQL_OPT_LOCAL_INFILE, &enable_local_infile_arg))
throw ConnectionFailed(errorMessage(driver.get()), mysql_errno(driver.get())); throw ConnectionFailed(errorMessage(driver.get()), mysql_errno(driver.get()));
/// Enables auto-reconnect. /// See C API Developer Guide: Automatic Reconnection Control
bool reconnect = true; if (mysql_options(driver.get(), MYSQL_OPT_RECONNECT, reinterpret_cast<const char *>(&opt_reconnect)))
if (mysql_options(driver.get(), MYSQL_OPT_RECONNECT, reinterpret_cast<const char *>(&reconnect)))
throw ConnectionFailed(errorMessage(driver.get()), mysql_errno(driver.get())); throw ConnectionFailed(errorMessage(driver.get()), mysql_errno(driver.get()));
/// Specifies particular ssl key and certificate if it needs /// Specifies particular ssl key and certificate if it needs

View File

@ -14,6 +14,8 @@
/// Disable LOAD DATA LOCAL INFILE because it is insecure /// Disable LOAD DATA LOCAL INFILE because it is insecure
#define MYSQLXX_DEFAULT_ENABLE_LOCAL_INFILE false #define MYSQLXX_DEFAULT_ENABLE_LOCAL_INFILE false
/// See https://dev.mysql.com/doc/c-api/5.7/en/c-api-auto-reconnect.html
#define MYSQLXX_DEFAULT_MYSQL_OPT_RECONNECT true
namespace mysqlxx namespace mysqlxx
@ -76,7 +78,8 @@ public:
const char * ssl_key = "", const char * ssl_key = "",
unsigned timeout = MYSQLXX_DEFAULT_TIMEOUT, unsigned timeout = MYSQLXX_DEFAULT_TIMEOUT,
unsigned rw_timeout = MYSQLXX_DEFAULT_RW_TIMEOUT, unsigned rw_timeout = MYSQLXX_DEFAULT_RW_TIMEOUT,
bool enable_local_infile = MYSQLXX_DEFAULT_ENABLE_LOCAL_INFILE); bool enable_local_infile = MYSQLXX_DEFAULT_ENABLE_LOCAL_INFILE,
bool opt_reconnect = MYSQLXX_DEFAULT_MYSQL_OPT_RECONNECT);
/// Creates connection. Can be used if Poco::Util::Application is using. /// Creates connection. Can be used if Poco::Util::Application is using.
/// All settings will be got from config_name section of configuration. /// All settings will be got from config_name section of configuration.
@ -96,7 +99,8 @@ public:
const char* ssl_key, const char* ssl_key,
unsigned timeout = MYSQLXX_DEFAULT_TIMEOUT, unsigned timeout = MYSQLXX_DEFAULT_TIMEOUT,
unsigned rw_timeout = MYSQLXX_DEFAULT_RW_TIMEOUT, unsigned rw_timeout = MYSQLXX_DEFAULT_RW_TIMEOUT,
bool enable_local_infile = MYSQLXX_DEFAULT_ENABLE_LOCAL_INFILE); bool enable_local_infile = MYSQLXX_DEFAULT_ENABLE_LOCAL_INFILE,
bool opt_reconnect = MYSQLXX_DEFAULT_MYSQL_OPT_RECONNECT);
void connect(const std::string & config_name) void connect(const std::string & config_name)
{ {
@ -112,6 +116,7 @@ public:
std::string ssl_cert = cfg.getString(config_name + ".ssl_cert", ""); std::string ssl_cert = cfg.getString(config_name + ".ssl_cert", "");
std::string ssl_key = cfg.getString(config_name + ".ssl_key", ""); std::string ssl_key = cfg.getString(config_name + ".ssl_key", "");
bool enable_local_infile = cfg.getBool(config_name + ".enable_local_infile", MYSQLXX_DEFAULT_ENABLE_LOCAL_INFILE); bool enable_local_infile = cfg.getBool(config_name + ".enable_local_infile", MYSQLXX_DEFAULT_ENABLE_LOCAL_INFILE);
bool opt_reconnect = cfg.getBool(config_name + ".opt_reconnect", MYSQLXX_DEFAULT_MYSQL_OPT_RECONNECT);
unsigned timeout = unsigned timeout =
cfg.getInt(config_name + ".connect_timeout", cfg.getInt(config_name + ".connect_timeout",
@ -135,7 +140,8 @@ public:
ssl_key.c_str(), ssl_key.c_str(),
timeout, timeout,
rw_timeout, rw_timeout,
enable_local_infile); enable_local_infile,
opt_reconnect);
} }
/// If MySQL connection was established. /// If MySQL connection was established.

View File

@ -26,6 +26,15 @@ struct ConnectionFailed : public Exception
}; };
/// Connection to MySQL server was lost
struct ConnectionLost : public Exception
{
ConnectionLost(const std::string & msg, int code = 0) : Exception(msg, code) {}
const char * name() const throw() override { return "mysqlxx::ConnectionLost"; }
const char * className() const throw() override { return "mysqlxx::ConnectionLost"; }
};
/// Erroneous query. /// Erroneous query.
struct BadQuery : public Exception struct BadQuery : public Exception
{ {

View File

@ -10,7 +10,6 @@
#include <common/sleep.h> #include <common/sleep.h>
#include <Poco/Util/Application.h>
#include <Poco/Util/LayeredConfiguration.h> #include <Poco/Util/LayeredConfiguration.h>
@ -41,7 +40,9 @@ void Pool::Entry::decrementRefCount()
Pool::Pool(const Poco::Util::AbstractConfiguration & cfg, const std::string & config_name, Pool::Pool(const Poco::Util::AbstractConfiguration & cfg, const std::string & config_name,
unsigned default_connections_, unsigned max_connections_, unsigned default_connections_, unsigned max_connections_,
const char * parent_config_name_) const char * parent_config_name_)
: default_connections(default_connections_), max_connections(max_connections_) : logger(Poco::Logger::get("mysqlxx::Pool"))
, default_connections(default_connections_)
, max_connections(max_connections_)
{ {
server = cfg.getString(config_name + ".host"); server = cfg.getString(config_name + ".host");
@ -78,6 +79,9 @@ Pool::Pool(const Poco::Util::AbstractConfiguration & cfg, const std::string & co
enable_local_infile = cfg.getBool(config_name + ".enable_local_infile", enable_local_infile = cfg.getBool(config_name + ".enable_local_infile",
cfg.getBool(parent_config_name + ".enable_local_infile", MYSQLXX_DEFAULT_ENABLE_LOCAL_INFILE)); cfg.getBool(parent_config_name + ".enable_local_infile", MYSQLXX_DEFAULT_ENABLE_LOCAL_INFILE));
opt_reconnect = cfg.getBool(config_name + ".opt_reconnect",
cfg.getBool(parent_config_name + ".opt_reconnect", MYSQLXX_DEFAULT_MYSQL_OPT_RECONNECT));
} }
else else
{ {
@ -96,6 +100,8 @@ Pool::Pool(const Poco::Util::AbstractConfiguration & cfg, const std::string & co
enable_local_infile = cfg.getBool( enable_local_infile = cfg.getBool(
config_name + ".enable_local_infile", MYSQLXX_DEFAULT_ENABLE_LOCAL_INFILE); config_name + ".enable_local_infile", MYSQLXX_DEFAULT_ENABLE_LOCAL_INFILE);
opt_reconnect = cfg.getBool(config_name + ".opt_reconnect", MYSQLXX_DEFAULT_MYSQL_OPT_RECONNECT);
} }
connect_timeout = cfg.getInt(config_name + ".connect_timeout", connect_timeout = cfg.getInt(config_name + ".connect_timeout",
@ -125,20 +131,30 @@ Pool::Entry Pool::get()
initialize(); initialize();
for (;;) for (;;)
{ {
logger.trace("(%s): Iterating through existing MySQL connections", getDescription());
for (auto & connection : connections) for (auto & connection : connections)
{ {
if (connection->ref_count == 0) if (connection->ref_count == 0)
return Entry(connection, this); return Entry(connection, this);
} }
logger.trace("(%s): Trying to allocate a new connection.", getDescription());
if (connections.size() < static_cast<size_t>(max_connections)) if (connections.size() < static_cast<size_t>(max_connections))
{ {
Connection * conn = allocConnection(); Connection * conn = allocConnection();
if (conn) if (conn)
return Entry(conn, this); return Entry(conn, this);
logger.trace("(%s): Unable to create a new connection: Allocation failed.", getDescription());
}
else
{
logger.trace("(%s): Unable to create a new connection: Max number of connections has been reached.", getDescription());
} }
lock.unlock(); lock.unlock();
logger.trace("(%s): Sleeping for %d seconds.", getDescription(), MYSQLXX_POOL_SLEEP_ON_CONNECT_FAIL);
sleepForSeconds(MYSQLXX_POOL_SLEEP_ON_CONNECT_FAIL); sleepForSeconds(MYSQLXX_POOL_SLEEP_ON_CONNECT_FAIL);
lock.lock(); lock.lock();
} }
@ -162,8 +178,7 @@ Pool::Entry Pool::tryGet()
if (res.tryForceConnected()) /// Tries to reestablish connection as well if (res.tryForceConnected()) /// Tries to reestablish connection as well
return res; return res;
auto & logger = Poco::Util::Application::instance().logger(); logger.debug("(%s): Idle connection to MySQL server cannot be recovered, dropping it.", getDescription());
logger.information("Idle connection to mysql server cannot be recovered, dropping it.");
/// This one is disconnected, cannot be reestablished and so needs to be disposed of. /// This one is disconnected, cannot be reestablished and so needs to be disposed of.
connection_it = connections.erase(connection_it); connection_it = connections.erase(connection_it);
@ -186,6 +201,8 @@ Pool::Entry Pool::tryGet()
void Pool::removeConnection(Connection* connection) void Pool::removeConnection(Connection* connection)
{ {
logger.trace("(%s): Removing connection.", getDescription());
std::lock_guard<std::mutex> lock(mutex); std::lock_guard<std::mutex> lock(mutex);
if (connection) if (connection)
{ {
@ -210,8 +227,6 @@ void Pool::Entry::forceConnected() const
if (data == nullptr) if (data == nullptr)
throw Poco::RuntimeException("Tried to access NULL database connection."); throw Poco::RuntimeException("Tried to access NULL database connection.");
Poco::Util::Application & app = Poco::Util::Application::instance();
bool first = true; bool first = true;
while (!tryForceConnected()) while (!tryForceConnected())
{ {
@ -220,7 +235,7 @@ void Pool::Entry::forceConnected() const
else else
sleepForSeconds(MYSQLXX_POOL_SLEEP_ON_CONNECT_FAIL); sleepForSeconds(MYSQLXX_POOL_SLEEP_ON_CONNECT_FAIL);
app.logger().information("MYSQL: Reconnecting to " + pool->description); pool->logger.debug("Entry: Reconnecting to MySQL server %s", pool->description);
data->conn.connect( data->conn.connect(
pool->db.c_str(), pool->db.c_str(),
pool->server.c_str(), pool->server.c_str(),
@ -233,7 +248,8 @@ void Pool::Entry::forceConnected() const
pool->ssl_key.c_str(), pool->ssl_key.c_str(),
pool->connect_timeout, pool->connect_timeout,
pool->rw_timeout, pool->rw_timeout,
pool->enable_local_infile); pool->enable_local_infile,
pool->opt_reconnect);
} }
} }
@ -242,18 +258,22 @@ bool Pool::Entry::tryForceConnected() const
{ {
auto * const mysql_driver = data->conn.getDriver(); auto * const mysql_driver = data->conn.getDriver();
const auto prev_connection_id = mysql_thread_id(mysql_driver); const auto prev_connection_id = mysql_thread_id(mysql_driver);
pool->logger.trace("Entry(connection %lu): sending PING to check if it is alive.", prev_connection_id);
if (data->conn.ping()) /// Attempts to reestablish lost connection if (data->conn.ping()) /// Attempts to reestablish lost connection
{ {
const auto current_connection_id = mysql_thread_id(mysql_driver); const auto current_connection_id = mysql_thread_id(mysql_driver);
if (prev_connection_id != current_connection_id) if (prev_connection_id != current_connection_id)
{ {
auto & logger = Poco::Util::Application::instance().logger(); pool->logger.debug("Entry(connection %lu): Reconnected to MySQL server. Connection id changed: %lu -> %lu",
logger.information("Connection to mysql server has been reestablished. Connection id changed: %lu -> %lu", current_connection_id, prev_connection_id, current_connection_id);
prev_connection_id, current_connection_id);
} }
pool->logger.trace("Entry(connection %lu): PING ok.", current_connection_id);
return true; return true;
} }
pool->logger.trace("Entry(connection %lu): PING failed.", prev_connection_id);
return false; return false;
} }
@ -274,15 +294,13 @@ void Pool::initialize()
Pool::Connection * Pool::allocConnection(bool dont_throw_if_failed_first_time) Pool::Connection * Pool::allocConnection(bool dont_throw_if_failed_first_time)
{ {
Poco::Util::Application & app = Poco::Util::Application::instance(); std::unique_ptr<Connection> conn_ptr{new Connection};
std::unique_ptr<Connection> conn(new Connection);
try try
{ {
app.logger().information("MYSQL: Connecting to " + description); logger.debug("Connecting to %s", description);
conn->conn.connect( conn_ptr->conn.connect(
db.c_str(), db.c_str(),
server.c_str(), server.c_str(),
user.c_str(), user.c_str(),
@ -294,29 +312,29 @@ Pool::Connection * Pool::allocConnection(bool dont_throw_if_failed_first_time)
ssl_key.c_str(), ssl_key.c_str(),
connect_timeout, connect_timeout,
rw_timeout, rw_timeout,
enable_local_infile); enable_local_infile,
opt_reconnect);
} }
catch (mysqlxx::ConnectionFailed & e) catch (mysqlxx::ConnectionFailed & e)
{ {
logger.error(e.what());
if ((!was_successful && !dont_throw_if_failed_first_time) if ((!was_successful && !dont_throw_if_failed_first_time)
|| e.errnum() == ER_ACCESS_DENIED_ERROR || e.errnum() == ER_ACCESS_DENIED_ERROR
|| e.errnum() == ER_DBACCESS_DENIED_ERROR || e.errnum() == ER_DBACCESS_DENIED_ERROR
|| e.errnum() == ER_BAD_DB_ERROR) || e.errnum() == ER_BAD_DB_ERROR)
{ {
app.logger().error(e.what());
throw; throw;
} }
else else
{ {
app.logger().error(e.what());
return nullptr; return nullptr;
} }
} }
connections.push_back(conn_ptr.get());
was_successful = true; was_successful = true;
auto * connection = conn.release(); return conn_ptr.release();
connections.push_back(connection);
return connection;
} }
} }

View File

@ -6,6 +6,8 @@
#include <atomic> #include <atomic>
#include <Poco/Exception.h> #include <Poco/Exception.h>
#include <Poco/Logger.h>
#include <mysqlxx/Connection.h> #include <mysqlxx/Connection.h>
@ -165,19 +167,21 @@ public:
unsigned rw_timeout_ = MYSQLXX_DEFAULT_RW_TIMEOUT, unsigned rw_timeout_ = MYSQLXX_DEFAULT_RW_TIMEOUT,
unsigned default_connections_ = MYSQLXX_POOL_DEFAULT_START_CONNECTIONS, unsigned default_connections_ = MYSQLXX_POOL_DEFAULT_START_CONNECTIONS,
unsigned max_connections_ = MYSQLXX_POOL_DEFAULT_MAX_CONNECTIONS, unsigned max_connections_ = MYSQLXX_POOL_DEFAULT_MAX_CONNECTIONS,
unsigned enable_local_infile_ = MYSQLXX_DEFAULT_ENABLE_LOCAL_INFILE) unsigned enable_local_infile_ = MYSQLXX_DEFAULT_ENABLE_LOCAL_INFILE,
: default_connections(default_connections_), max_connections(max_connections_), bool opt_reconnect_ = MYSQLXX_DEFAULT_MYSQL_OPT_RECONNECT)
db(db_), server(server_), user(user_), password(password_), port(port_), socket(socket_), : logger(Poco::Logger::get("mysqlxx::Pool")), default_connections(default_connections_),
connect_timeout(connect_timeout_), rw_timeout(rw_timeout_), enable_local_infile(enable_local_infile_) {} max_connections(max_connections_), db(db_), server(server_), user(user_), password(password_), port(port_), socket(socket_),
connect_timeout(connect_timeout_), rw_timeout(rw_timeout_), enable_local_infile(enable_local_infile_),
opt_reconnect(opt_reconnect_) {}
Pool(const Pool & other) Pool(const Pool & other)
: default_connections{other.default_connections}, : logger(other.logger), default_connections{other.default_connections},
max_connections{other.max_connections}, max_connections{other.max_connections},
db{other.db}, server{other.server}, db{other.db}, server{other.server},
user{other.user}, password{other.password}, user{other.user}, password{other.password},
port{other.port}, socket{other.socket}, port{other.port}, socket{other.socket},
connect_timeout{other.connect_timeout}, rw_timeout{other.rw_timeout}, connect_timeout{other.connect_timeout}, rw_timeout{other.rw_timeout},
enable_local_infile{other.enable_local_infile} enable_local_infile{other.enable_local_infile}, opt_reconnect(other.opt_reconnect)
{} {}
Pool & operator=(const Pool &) = delete; Pool & operator=(const Pool &) = delete;
@ -201,6 +205,8 @@ public:
void removeConnection(Connection * connection); void removeConnection(Connection * connection);
protected: protected:
Poco::Logger & logger;
/// Number of MySQL connections which are created at launch. /// Number of MySQL connections which are created at launch.
unsigned default_connections; unsigned default_connections;
/// Maximum possible number of connections /// Maximum possible number of connections
@ -231,6 +237,7 @@ private:
std::string ssl_cert; std::string ssl_cert;
std::string ssl_key; std::string ssl_key;
bool enable_local_infile; bool enable_local_infile;
bool opt_reconnect;
/// True if connection was established at least once. /// True if connection was established at least once.
bool was_successful{false}; bool was_successful{false};

View File

@ -1,3 +1,8 @@
#include <algorithm>
#include <ctime>
#include <random>
#include <thread>
#include <mysqlxx/PoolWithFailover.h> #include <mysqlxx/PoolWithFailover.h>
@ -33,6 +38,19 @@ PoolWithFailover::PoolWithFailover(const Poco::Util::AbstractConfiguration & con
std::make_shared<Pool>(config_, replica_name, default_connections_, max_connections_, config_name_.c_str())); std::make_shared<Pool>(config_, replica_name, default_connections_, max_connections_, config_name_.c_str()));
} }
} }
/// PoolWithFailover objects are stored in a cache inside PoolFactory.
/// This cache is reset by ExternalDictionariesLoader after every SYSTEM RELOAD DICTIONAR{Y|IES}
/// which triggers massive re-constructing of connection pools.
/// The state of PRNGs like std::mt19937 is considered to be quite heavy
/// thus here we attempt to optimize its construction.
static thread_local std::mt19937 rnd_generator(
std::hash<std::thread::id>{}(std::this_thread::get_id()) + std::clock());
for (auto & [_, replicas] : replicas_by_priority)
{
if (replicas.size() > 1)
std::shuffle(replicas.begin(), replicas.end(), rnd_generator);
}
} }
else else
{ {

View File

@ -1,11 +1,16 @@
#if __has_include(<mysql.h>) #if __has_include(<mysql.h>)
#include <errmsg.h>
#include <mysql.h> #include <mysql.h>
#else #else
#include <mysql/errmsg.h>
#include <mysql/mysql.h> #include <mysql/mysql.h>
#endif #endif
#include <Poco/Logger.h>
#include <mysqlxx/Connection.h> #include <mysqlxx/Connection.h>
#include <mysqlxx/Query.h> #include <mysqlxx/Query.h>
#include <mysqlxx/Types.h>
namespace mysqlxx namespace mysqlxx
@ -57,8 +62,24 @@ void Query::reset()
void Query::executeImpl() void Query::executeImpl()
{ {
std::string query_string = query_buf.str(); std::string query_string = query_buf.str();
if (mysql_real_query(conn->getDriver(), query_string.data(), query_string.size()))
throw BadQuery(errorMessage(conn->getDriver()), mysql_errno(conn->getDriver())); MYSQL* mysql_driver = conn->getDriver();
auto & logger = Poco::Logger::get("mysqlxx::Query");
logger.trace("Running MySQL query using connection %lu", mysql_thread_id(mysql_driver));
if (mysql_real_query(mysql_driver, query_string.data(), query_string.size()))
{
const auto err_no = mysql_errno(mysql_driver);
switch (err_no)
{
case CR_SERVER_GONE_ERROR:
[[fallthrough]];
case CR_SERVER_LOST:
throw ConnectionLost(errorMessage(mysql_driver), err_no);
default:
throw BadQuery(errorMessage(mysql_driver), err_no);
}
}
} }
UseQueryResult Query::use() UseQueryResult Query::use()

View File

@ -32,7 +32,10 @@ if (CCACHE_FOUND AND NOT COMPILER_MATCHES_CCACHE)
if (CCACHE_VERSION VERSION_GREATER "3.2.0" OR NOT CMAKE_CXX_COMPILER_ID STREQUAL "Clang") if (CCACHE_VERSION VERSION_GREATER "3.2.0" OR NOT CMAKE_CXX_COMPILER_ID STREQUAL "Clang")
message(STATUS "Using ${CCACHE_FOUND} ${CCACHE_VERSION}") message(STATUS "Using ${CCACHE_FOUND} ${CCACHE_VERSION}")
# debian (debhlpers) set SOURCE_DATE_EPOCH environment variable, that is set_property (GLOBAL PROPERTY RULE_LAUNCH_COMPILE ${CCACHE_FOUND})
set_property (GLOBAL PROPERTY RULE_LAUNCH_LINK ${CCACHE_FOUND})
# debian (debhelpers) set SOURCE_DATE_EPOCH environment variable, that is
# filled from the debian/changelog or current time. # filled from the debian/changelog or current time.
# #
# - 4.0+ ccache always includes this environment variable into the hash # - 4.0+ ccache always includes this environment variable into the hash
@ -48,9 +51,6 @@ if (CCACHE_FOUND AND NOT COMPILER_MATCHES_CCACHE)
message(STATUS "Ignore SOURCE_DATE_EPOCH for ccache") message(STATUS "Ignore SOURCE_DATE_EPOCH for ccache")
set_property (GLOBAL PROPERTY RULE_LAUNCH_COMPILE "env -u SOURCE_DATE_EPOCH ${CCACHE_FOUND}") set_property (GLOBAL PROPERTY RULE_LAUNCH_COMPILE "env -u SOURCE_DATE_EPOCH ${CCACHE_FOUND}")
set_property (GLOBAL PROPERTY RULE_LAUNCH_LINK "env -u SOURCE_DATE_EPOCH ${CCACHE_FOUND}") set_property (GLOBAL PROPERTY RULE_LAUNCH_LINK "env -u SOURCE_DATE_EPOCH ${CCACHE_FOUND}")
else()
set_property (GLOBAL PROPERTY RULE_LAUNCH_COMPILE ${CCACHE_FOUND})
set_property (GLOBAL PROPERTY RULE_LAUNCH_LINK ${CCACHE_FOUND})
endif() endif()
else () else ()
message(${RECONFIGURE_MESSAGE_LEVEL} "Not using ${CCACHE_FOUND} ${CCACHE_VERSION} bug: https://bugzilla.samba.org/show_bug.cgi?id=8118") message(${RECONFIGURE_MESSAGE_LEVEL} "Not using ${CCACHE_FOUND} ${CCACHE_VERSION} bug: https://bugzilla.samba.org/show_bug.cgi?id=8118")

2
contrib/brotli vendored

@ -1 +1 @@
Subproject commit 5805f99a533a8f8118699c0100d8c102f3605f65 Subproject commit 63be8a99401992075c23e99f7c84de1c653e39e2

View File

@ -2,6 +2,8 @@ set(BROTLI_SOURCE_DIR ${ClickHouse_SOURCE_DIR}/contrib/brotli/c)
set(BROTLI_BINARY_DIR ${ClickHouse_BINARY_DIR}/contrib/brotli/c) set(BROTLI_BINARY_DIR ${ClickHouse_BINARY_DIR}/contrib/brotli/c)
set(SRCS set(SRCS
${BROTLI_SOURCE_DIR}/enc/command.c
${BROTLI_SOURCE_DIR}/enc/fast_log.c
${BROTLI_SOURCE_DIR}/dec/bit_reader.c ${BROTLI_SOURCE_DIR}/dec/bit_reader.c
${BROTLI_SOURCE_DIR}/dec/state.c ${BROTLI_SOURCE_DIR}/dec/state.c
${BROTLI_SOURCE_DIR}/dec/huffman.c ${BROTLI_SOURCE_DIR}/dec/huffman.c
@ -26,6 +28,9 @@ set(SRCS
${BROTLI_SOURCE_DIR}/enc/memory.c ${BROTLI_SOURCE_DIR}/enc/memory.c
${BROTLI_SOURCE_DIR}/common/dictionary.c ${BROTLI_SOURCE_DIR}/common/dictionary.c
${BROTLI_SOURCE_DIR}/common/transform.c ${BROTLI_SOURCE_DIR}/common/transform.c
${BROTLI_SOURCE_DIR}/common/platform.c
${BROTLI_SOURCE_DIR}/common/context.c
${BROTLI_SOURCE_DIR}/common/constants.c
) )
add_library(brotli ${SRCS}) add_library(brotli ${SRCS})

View File

@ -356,7 +356,6 @@ function run_tests
# JSON functions # JSON functions
01666_blns 01666_blns
01674_htm_xml_coarse_parse
) )
(time clickhouse-test --hung-check -j 8 --order=random --use-skip-list --no-long --testname --shard --zookeeper --skip "${TESTS_TO_SKIP[@]}" -- "$FASTTEST_FOCUS" 2>&1 ||:) | ts '%Y-%m-%d %H:%M:%S' | tee "$FASTTEST_OUTPUT/test_log.txt" (time clickhouse-test --hung-check -j 8 --order=random --use-skip-list --no-long --testname --shard --zookeeper --skip "${TESTS_TO_SKIP[@]}" -- "$FASTTEST_FOCUS" 2>&1 ||:) | ts '%Y-%m-%d %H:%M:%S' | tee "$FASTTEST_OUTPUT/test_log.txt"

View File

@ -18,7 +18,8 @@ RUN apt-get update \
curl \ curl \
tar \ tar \
krb5-user \ krb5-user \
iproute2 iproute2 \
lsof
RUN rm -rf \ RUN rm -rf \
/var/lib/apt/lists/* \ /var/lib/apt/lists/* \
/var/cache/debconf \ /var/cache/debconf \

View File

@ -1,11 +1,11 @@
version: '2.3' version: '2.3'
services: services:
zoo1: zoo1:
image: zookeeper:3.4.12 image: zookeeper:3.6.2
restart: always restart: always
environment: environment:
ZOO_TICK_TIME: 500 ZOO_TICK_TIME: 500
ZOO_SERVERS: server.1=zoo1:2888:3888 server.2=zoo2:2888:3888 server.3=zoo3:2888:3888 ZOO_SERVERS: server.1=zoo1:2888:3888;2181 server.2=zoo2:2888:3888;2181 server.3=zoo3:2888:3888;2181
ZOO_MY_ID: 1 ZOO_MY_ID: 1
JVMFLAGS: -Dzookeeper.forceSync=no JVMFLAGS: -Dzookeeper.forceSync=no
volumes: volumes:
@ -16,11 +16,11 @@ services:
source: ${ZK_DATA_LOG1:-} source: ${ZK_DATA_LOG1:-}
target: /datalog target: /datalog
zoo2: zoo2:
image: zookeeper:3.4.12 image: zookeeper:3.6.2
restart: always restart: always
environment: environment:
ZOO_TICK_TIME: 500 ZOO_TICK_TIME: 500
ZOO_SERVERS: server.1=zoo1:2888:3888 server.2=zoo2:2888:3888 server.3=zoo3:2888:3888 ZOO_SERVERS: server.1=zoo1:2888:3888;2181 server.2=zoo2:2888:3888;2181 server.3=zoo3:2888:3888
ZOO_MY_ID: 2 ZOO_MY_ID: 2
JVMFLAGS: -Dzookeeper.forceSync=no JVMFLAGS: -Dzookeeper.forceSync=no
volumes: volumes:
@ -31,11 +31,11 @@ services:
source: ${ZK_DATA_LOG2:-} source: ${ZK_DATA_LOG2:-}
target: /datalog target: /datalog
zoo3: zoo3:
image: zookeeper:3.4.12 image: zookeeper:3.6.2
restart: always restart: always
environment: environment:
ZOO_TICK_TIME: 500 ZOO_TICK_TIME: 500
ZOO_SERVERS: server.1=zoo1:2888:3888 server.2=zoo2:2888:3888 server.3=zoo3:2888:3888 ZOO_SERVERS: server.1=zoo1:2888:3888;2181 server.2=zoo2:2888:3888;2181 server.3=zoo3:2888:3888;2181
ZOO_MY_ID: 3 ZOO_MY_ID: 3
JVMFLAGS: -Dzookeeper.forceSync=no JVMFLAGS: -Dzookeeper.forceSync=no
volumes: volumes:

View File

@ -81,6 +81,8 @@ clickhouse-client --query "SHOW TABLES FROM test"
./stress --hung-check --output-folder test_output --skip-func-tests "$SKIP_TESTS_OPTION" && echo "OK" > /test_output/script_exit_code.txt || echo "FAIL" > /test_output/script_exit_code.txt ./stress --hung-check --output-folder test_output --skip-func-tests "$SKIP_TESTS_OPTION" && echo "OK" > /test_output/script_exit_code.txt || echo "FAIL" > /test_output/script_exit_code.txt
stop stop
# TODO remove me when persistent snapshots will be ready
rm -fr /var/lib/clickhouse/coordination ||:
start start
clickhouse-client --query "SELECT 'Server successfuly started'" > /test_output/alive_check.txt || echo 'Server failed to start' > /test_output/alive_check.txt clickhouse-client --query "SELECT 'Server successfuly started'" > /test_output/alive_check.txt || echo 'Server failed to start' > /test_output/alive_check.txt

View File

@ -66,7 +66,8 @@ SELECT * FROM file_engine_table
## Usage in ClickHouse-local {#usage-in-clickhouse-local} ## Usage in ClickHouse-local {#usage-in-clickhouse-local}
In [clickhouse-local](../../../operations/utilities/clickhouse-local.md) File engine accepts file path in addition to `Format`. Default input/output streams can be specified using numeric or human-readable names like `0` or `stdin`, `1` or `stdout`. In [clickhouse-local](../../../operations/utilities/clickhouse-local.md) File engine accepts file path in addition to `Format`. Default input/output streams can be specified using numeric or human-readable names like `0` or `stdin`, `1` or `stdout`. It is possible to read and write compressed files based on an additional engine parameter or file extension (`gz`, `br` or `xz`).
**Example:** **Example:**
``` bash ``` bash

File diff suppressed because one or more lines are too long

View File

@ -20,5 +20,6 @@ The list of documented datasets:
- [Terabyte of Click Logs from Criteo](../../getting-started/example-datasets/criteo.md) - [Terabyte of Click Logs from Criteo](../../getting-started/example-datasets/criteo.md)
- [AMPLab Big Data Benchmark](../../getting-started/example-datasets/amplab-benchmark.md) - [AMPLab Big Data Benchmark](../../getting-started/example-datasets/amplab-benchmark.md)
- [Brown University Benchmark](../../getting-started/example-datasets/brown-benchmark.md) - [Brown University Benchmark](../../getting-started/example-datasets/brown-benchmark.md)
- [Cell Towers](../../getting-started/example-datasets/cell-towers.md)
[Original article](https://clickhouse.tech/docs/en/getting_started/example_datasets) <!--hide--> [Original article](https://clickhouse.tech/docs/en/getting_started/example_datasets) <!--hide-->

View File

@ -15,17 +15,9 @@ This dataset can be obtained in two ways:
Downloading data: Downloading data:
``` bash ``` bash
for s in `seq 1987 2018` echo https://transtats.bts.gov/PREZIP/On_Time_Reporting_Carrier_On_Time_Performance_1987_present_{1987..2021}_{1..12}.zip | xargs -P10 wget --no-check-certificate --continue
do
for m in `seq 1 12`
do
wget https://transtats.bts.gov/PREZIP/On_Time_Reporting_Carrier_On_Time_Performance_1987_present_${s}_${m}.zip
done
done
``` ```
(from https://github.com/Percona-Lab/ontime-airline-performance/blob/master/download.sh )
Creating a table: Creating a table:
``` sql ``` sql
@ -145,12 +137,14 @@ ORDER BY (Carrier, FlightDate)
SETTINGS index_granularity = 8192; SETTINGS index_granularity = 8192;
``` ```
Loading data: Loading data with multiple threads:
``` bash ``` bash
$ for i in *.zip; do echo $i; unzip -cq $i '*.csv' | sed 's/\.00//g' | clickhouse-client --input_format_with_names_use_header=0 --host=example-perftest01j --query="INSERT INTO ontime FORMAT CSVWithNames"; done ls -1 *.zip | xargs -I{} -P $(nproc) bash -c "echo {}; unzip -cq {} '*.csv' | sed 's/\.00//g' | clickhouse-client --input_format_with_names_use_header=0 --query='INSERT INTO ontime FORMAT CSVWithNames'"
``` ```
(if you will have memory shortage or other issues on your server, remove the `-P $(nproc)` part)
## Download of Prepared Partitions {#download-of-prepared-partitions} ## Download of Prepared Partitions {#download-of-prepared-partitions}
``` bash ``` bash

View File

@ -148,28 +148,48 @@ $ echo 'DROP TABLE t' | curl 'http://localhost:8123/' --data-binary @-
For successful requests that dont return a data table, an empty response body is returned. For successful requests that dont return a data table, an empty response body is returned.
You can use the internal ClickHouse compression format when transmitting data. The compressed data has a non-standard format, and you will need to use the special `clickhouse-compressor` program to work with it (it is installed with the `clickhouse-client` package). To increase the efficiency of data insertion, you can disable server-side checksum verification by using the [http_native_compression_disable_checksumming_on_decompress](../operations/settings/settings.md#settings-http_native_compression_disable_checksumming_on_decompress) setting.
If you specified `compress=1` in the URL, the server compresses the data it sends you. ## Compression {#compression}
If you specified `decompress=1` in the URL, the server decompresses the same data that you pass in the `POST` method.
You can also choose to use [HTTP compression](https://en.wikipedia.org/wiki/HTTP_compression). To send a compressed `POST` request, append the request header `Content-Encoding: compression_method`. In order for ClickHouse to compress the response, you must append `Accept-Encoding: compression_method`. ClickHouse supports `gzip`, `br`, and `deflate` [compression methods](https://en.wikipedia.org/wiki/HTTP_compression#Content-Encoding_tokens). To enable HTTP compression, you must use the ClickHouse [enable_http_compression](../operations/settings/settings.md#settings-enable_http_compression) setting. You can configure the data compression level in the [http_zlib_compression_level](#settings-http_zlib_compression_level) setting for all the compression methods. You can use compression to reduce network traffic when transmitting a large amount of data or for creating dumps that are immediately compressed.
You can use this to reduce network traffic when transmitting a large amount of data, or for creating dumps that are immediately compressed. You can use the internal ClickHouse compression format when transmitting data. The compressed data has a non-standard format, and you need `clickhouse-compressor` program to work with it. It is installed with the `clickhouse-client` package. To increase the efficiency of data insertion, you can disable server-side checksum verification by using the [http_native_compression_disable_checksumming_on_decompress](../operations/settings/settings.md#settings-http_native_compression_disable_checksumming_on_decompress) setting.
Examples of sending data with compression: If you specify `compress=1` in the URL, the server will compress the data it sends to you. If you specify `decompress=1` in the URL, the server will decompress the data which you pass in the `POST` method.
``` bash You can also choose to use [HTTP compression](https://en.wikipedia.org/wiki/HTTP_compression). ClickHouse supports the following [compression methods](https://en.wikipedia.org/wiki/HTTP_compression#Content-Encoding_tokens):
#Sending data to the server:
$ curl -vsS "http://localhost:8123/?enable_http_compression=1" -d 'SELECT number FROM system.numbers LIMIT 10' -H 'Accept-Encoding: gzip'
#Sending data to the client: - `gzip`
$ echo "SELECT 1" | gzip -c | curl -sS --data-binary @- -H 'Content-Encoding: gzip' 'http://localhost:8123/' - `br`
``` - `deflate`
- `xz`
To send a compressed `POST` request, append the request header `Content-Encoding: compression_method`.
In order for ClickHouse to compress the response, enable compression with [enable_http_compression](../operations/settings/settings.md#settings-enable_http_compression) setting and append `Accept-Encoding: compression_method` header to the request. You can configure the data compression level in the [http_zlib_compression_level](../operations/settings/settings.md#settings-http_zlib_compression_level) setting for all compression methods.
!!! note "Note" !!! note "Note"
Some HTTP clients might decompress data from the server by default (with `gzip` and `deflate`) and you might get decompressed data even if you use the compression settings correctly. Some HTTP clients might decompress data from the server by default (with `gzip` and `deflate`) and you might get decompressed data even if you use the compression settings correctly.
**Examples**
``` bash
# Sending compressed data to the server
$ echo "SELECT 1" | gzip -c | \
curl -sS --data-binary @- -H 'Content-Encoding: gzip' 'http://localhost:8123/'
```
``` bash
# Receiving compressed data from the server
$ curl -vsS "http://localhost:8123/?enable_http_compression=1" \
-H 'Accept-Encoding: gzip' --output result.gz -d 'SELECT number FROM system.numbers LIMIT 3'
$ zcat result.gz
0
1
2
```
## Default Database {#default-database}
You can use the database URL parameter or the X-ClickHouse-Database header to specify the default database. You can use the database URL parameter or the X-ClickHouse-Database header to specify the default database.
``` bash ``` bash

View File

@ -8,18 +8,21 @@ toc_title: Caches
When performing queries, ClichHouse uses different caches. When performing queries, ClichHouse uses different caches.
Main cache types: Main cache types:
- `mark_cache` — Cache of marks used by table engines of the [MergeTree](../engines/table-engines/mergetree-family/mergetree.md) family. - `mark_cache` — Cache of marks used by table engines of the [MergeTree](../engines/table-engines/mergetree-family/mergetree.md) family.
- `uncompressed_cache` — Cache of uncompressed data used by table engines of the [MergeTree](../engines/table-engines/mergetree-family/mergetree.md) family. - `uncompressed_cache` — Cache of uncompressed data used by table engines of the [MergeTree](../engines/table-engines/mergetree-family/mergetree.md) family.
Additional cache types: Additional cache types:
- DNS cache
- [regexp](../interfaces/formats.md#data-format-regexp) cache - DNS cache.
- compiled expressions cache - [Regexp](../interfaces/formats.md#data-format-regexp) cache.
- [Avro format](../interfaces/formats.md#data-format-avro) schemas cache - Compiled expressions cache.
- [dictionaries data cache](../sql-reference/dictionaries/index.md) - [Avro format](../interfaces/formats.md#data-format-avro) schemas cache.
- [Dictionaries](../sql-reference/dictionaries/index.md) data cache.
Indirectly used: Indirectly used:
- OS page cache
- OS page cache.
To drop cache, use [SYSTEM DROP ... CACHE](../sql-reference/statements/system.md) statements. To drop cache, use [SYSTEM DROP ... CACHE](../sql-reference/statements/system.md) statements.

View File

@ -14,7 +14,7 @@ Columns:
- `initiator` ([String](../../sql-reference/data-types/string.md)) — Node that executed the query. - `initiator` ([String](../../sql-reference/data-types/string.md)) — Node that executed the query.
- `query_start_time` ([DateTime](../../sql-reference/data-types/datetime.md)) — Query start time. - `query_start_time` ([DateTime](../../sql-reference/data-types/datetime.md)) — Query start time.
- `query_finish_time` ([DateTime](../../sql-reference/data-types/datetime.md)) — Query finish time. - `query_finish_time` ([DateTime](../../sql-reference/data-types/datetime.md)) — Query finish time.
- `query_duration_ms` ([UInt64](../../sql-reference/data-types/datetime64.md)) — Duration of query execution (in milliseconds). - `query_duration_ms` ([UInt64](../../sql-reference/data-types/int-uint.md)) — Duration of query execution (in milliseconds).
- `exception_code` ([Enum8](../../sql-reference/data-types/enum.md)) — Exception code from [ZooKeeper](../../operations/tips.md#zookeeper). - `exception_code` ([Enum8](../../sql-reference/data-types/enum.md)) — Exception code from [ZooKeeper](../../operations/tips.md#zookeeper).
**Example** **Example**

View File

@ -91,6 +91,8 @@ $ clickhouse-local --query "
Now lets output memory user for each Unix user: Now lets output memory user for each Unix user:
Query:
``` bash ``` bash
$ ps aux | tail -n +2 | awk '{ printf("%s\t%s\n", $1, $4) }' \ $ ps aux | tail -n +2 | awk '{ printf("%s\t%s\n", $1, $4) }' \
| clickhouse-local --structure "user String, mem Float64" \ | clickhouse-local --structure "user String, mem Float64" \
@ -98,6 +100,8 @@ $ ps aux | tail -n +2 | awk '{ printf("%s\t%s\n", $1, $4) }' \
FROM table GROUP BY user ORDER BY memTotal DESC FORMAT Pretty" FROM table GROUP BY user ORDER BY memTotal DESC FORMAT Pretty"
``` ```
Result:
``` text ``` text
Read 186 rows, 4.15 KiB in 0.035 sec., 5302 rows/sec., 118.34 KiB/sec. Read 186 rows, 4.15 KiB in 0.035 sec., 5302 rows/sec., 118.34 KiB/sec.
┏━━━━━━━━━━┳━━━━━━━━━━┓ ┏━━━━━━━━━━┳━━━━━━━━━━┓

View File

@ -21,7 +21,11 @@ The following aggregate functions are supported:
- [`argMin`](../../sql-reference/aggregate-functions/reference/argmin.md) - [`argMin`](../../sql-reference/aggregate-functions/reference/argmin.md)
- [`argMax`](../../sql-reference/aggregate-functions/reference/argmax.md) - [`argMax`](../../sql-reference/aggregate-functions/reference/argmax.md)
Values of the `SimpleAggregateFunction(func, Type)` look and stored the same way as `Type`, so you do not need to apply functions with `-Merge`/`-State` suffixes. `SimpleAggregateFunction` has better performance than `AggregateFunction` with same aggregation function.
!!! note "Note"
Values of the `SimpleAggregateFunction(func, Type)` look and stored the same way as `Type`, so you do not need to apply functions with `-Merge`/`-State` suffixes.
`SimpleAggregateFunction` has better performance than `AggregateFunction` with same aggregation function.
**Parameters** **Parameters**
@ -31,11 +35,7 @@ Values of the `SimpleAggregateFunction(func, Type)` look and stored the same way
**Example** **Example**
``` sql ``` sql
CREATE TABLE t CREATE TABLE simple (id UInt64, val SimpleAggregateFunction(sum, Double)) ENGINE=AggregatingMergeTree ORDER BY id;
(
column1 SimpleAggregateFunction(sum, UInt64),
column2 SimpleAggregateFunction(any, String)
) ENGINE = ...
``` ```
[Original article](https://clickhouse.tech/docs/en/data_types/simpleaggregatefunction/) <!--hide--> [Original article](https://clickhouse.tech/docs/en/data_types/simpleaggregatefunction/) <!--hide-->

View File

@ -61,40 +61,58 @@ int32samoa: 1546300800
Converts a date or date with time to a UInt16 number containing the year number (AD). Converts a date or date with time to a UInt16 number containing the year number (AD).
Alias: `YEAR`.
## toQuarter {#toquarter} ## toQuarter {#toquarter}
Converts a date or date with time to a UInt8 number containing the quarter number. Converts a date or date with time to a UInt8 number containing the quarter number.
Alias: `QUARTER`.
## toMonth {#tomonth} ## toMonth {#tomonth}
Converts a date or date with time to a UInt8 number containing the month number (1-12). Converts a date or date with time to a UInt8 number containing the month number (1-12).
Alias: `MONTH`.
## toDayOfYear {#todayofyear} ## toDayOfYear {#todayofyear}
Converts a date or date with time to a UInt16 number containing the number of the day of the year (1-366). Converts a date or date with time to a UInt16 number containing the number of the day of the year (1-366).
Alias: `DAYOFYEAR`.
## toDayOfMonth {#todayofmonth} ## toDayOfMonth {#todayofmonth}
Converts a date or date with time to a UInt8 number containing the number of the day of the month (1-31). Converts a date or date with time to a UInt8 number containing the number of the day of the month (1-31).
Aliases: `DAYOFMONTH`, `DAY`.
## toDayOfWeek {#todayofweek} ## toDayOfWeek {#todayofweek}
Converts a date or date with time to a UInt8 number containing the number of the day of the week (Monday is 1, and Sunday is 7). Converts a date or date with time to a UInt8 number containing the number of the day of the week (Monday is 1, and Sunday is 7).
Alias: `DAYOFWEEK`.
## toHour {#tohour} ## toHour {#tohour}
Converts a date with time to a UInt8 number containing the number of the hour in 24-hour time (0-23). Converts a date with time to a UInt8 number containing the number of the hour in 24-hour time (0-23).
This function assumes that if clocks are moved ahead, it is by one hour and occurs at 2 a.m., and if clocks are moved back, it is by one hour and occurs at 3 a.m. (which is not always true even in Moscow the clocks were twice changed at a different time). This function assumes that if clocks are moved ahead, it is by one hour and occurs at 2 a.m., and if clocks are moved back, it is by one hour and occurs at 3 a.m. (which is not always true even in Moscow the clocks were twice changed at a different time).
Alias: `HOUR`.
## toMinute {#tominute} ## toMinute {#tominute}
Converts a date with time to a UInt8 number containing the number of the minute of the hour (0-59). Converts a date with time to a UInt8 number containing the number of the minute of the hour (0-59).
Alias: `MINUTE`.
## toSecond {#tosecond} ## toSecond {#tosecond}
Converts a date with time to a UInt8 number containing the number of the second in the minute (0-59). Converts a date with time to a UInt8 number containing the number of the second in the minute (0-59).
Leap seconds are not accounted for. Leap seconds are not accounted for.
Alias: `SECOND`.
## toUnixTimestamp {#to-unix-timestamp} ## toUnixTimestamp {#to-unix-timestamp}
For DateTime argument: converts value to the number with type UInt32 -- Unix Timestamp (https://en.wikipedia.org/wiki/Unix_time). For DateTime argument: converts value to the number with type UInt32 -- Unix Timestamp (https://en.wikipedia.org/wiki/Unix_time).
@ -753,7 +771,7 @@ This is necessary for searching for pageviews in the corresponding session.
## formatDateTime {#formatdatetime} ## formatDateTime {#formatdatetime}
Function formats a Time according given Format string. N.B.: Format is a constant expression, e.g. you can not have multiple formats for single result column. Formats a Time according to the given Format string. Format is a constant expression, so you cannot have multiple formats for a single result column.
**Syntax** **Syntax**
@ -812,31 +830,32 @@ Result:
└────────────────────────────────────────────┘ └────────────────────────────────────────────┘
``` ```
[Original article](https://clickhouse.tech/docs/en/query_language/functions/date_time_functions/) <!--hide-->
## FROM\_UNIXTIME {#fromunixfime} ## FROM\_UNIXTIME {#fromunixfime}
When there is only single argument of integer type, it act in the same way as `toDateTime` and return [DateTime](../../sql-reference/data-types/datetime.md). Function converts Unix timestamp to a calendar date and a time of a day. When there is only a single argument of [Integer](../../sql-reference/data-types/int-uint.md) type, it acts in the same way as [toDateTime](../../sql-reference/functions/type-conversion-functions.md#todatetime) and return [DateTime](../../sql-reference/data-types/datetime.md) type.
type.
For example: **Example:**
Query:
```sql ```sql
SELECT FROM_UNIXTIME(423543535) SELECT FROM_UNIXTIME(423543535);
``` ```
Result:
```text ```text
┌─FROM_UNIXTIME(423543535)─┐ ┌─FROM_UNIXTIME(423543535)─┐
│ 1983-06-04 10:58:55 │ │ 1983-06-04 10:58:55 │
└──────────────────────────┘ └──────────────────────────┘
``` ```
When there are two arguments, first is integer or DateTime, second is constant format string, it act in the same way as `formatDateTime` and return `String` type. When there are two arguments: first is an [Integer](../../sql-reference/data-types/int-uint.md) or [DateTime](../../sql-reference/data-types/datetime.md), second is a constant format string — it acts in the same way as [formatDateTime](#formatdatetime) and return [String](../../sql-reference/data-types/string.md#string) type.
For example: For example:
```sql ```sql
SELECT FROM_UNIXTIME(1234334543, '%Y-%m-%d %R:%S') AS DateTime SELECT FROM_UNIXTIME(1234334543, '%Y-%m-%d %R:%S') AS DateTime;
``` ```
```text ```text
@ -988,3 +1007,5 @@ Result:
│ 2020-01-01 │ │ 2020-01-01 │
└────────────────────────────────────┘ └────────────────────────────────────┘
``` ```
[Original article](https://clickhouse.tech/docs/en/query_language/functions/date_time_functions/) <!--hide-->

View File

@ -75,6 +75,8 @@ Result:
Returns a string containing the arguments hexadecimal representation. Returns a string containing the arguments hexadecimal representation.
Alias: `HEX`.
**Syntax** **Syntax**
``` sql ``` sql

View File

@ -132,7 +132,7 @@ aes_encrypt_mysql('mode', 'plaintext', 'key' [, iv])
- `mode` — Encryption mode. [String](../../sql-reference/data-types/string.md#string). - `mode` — Encryption mode. [String](../../sql-reference/data-types/string.md#string).
- `plaintext` — Text that needs to be encrypted. [String](../../sql-reference/data-types/string.md#string). - `plaintext` — Text that needs to be encrypted. [String](../../sql-reference/data-types/string.md#string).
- `key` — Encryption key. If key is longer than required by mode, MySQL-specific key folding is performed. [String](../../sql-reference/data-types/string.md#string). - `key` — Encryption key. If key is longer than required by mode, MySQL-specific key folding is performed. [String](../../sql-reference/data-types/string.md#string).
- `iv` — Initialization vector. Optinal, only first 16 bytes are taken into account [String](../../sql-reference/data-types/string.md#string). - `iv` — Initialization vector. Optional, only first 16 bytes are taken into account [String](../../sql-reference/data-types/string.md#string).
**Returned value** **Returned value**

View File

@ -13,6 +13,8 @@ Checks whether the argument is [NULL](../../sql-reference/syntax.md#null-literal
isNull(x) isNull(x)
``` ```
Alias: `ISNULL`.
**Arguments** **Arguments**
- `x` — A value with a non-compound data type. - `x` — A value with a non-compound data type.

View File

@ -9,7 +9,7 @@ Hash functions can be used for the deterministic pseudo-random shuffling of elem
## halfMD5 {#hash-functions-halfmd5} ## halfMD5 {#hash-functions-halfmd5}
[Interprets](../../sql-reference/functions/type-conversion-functions.md#type_conversion_function-reinterpretAsString) all the input parameters as strings and calculates the [MD5](https://en.wikipedia.org/wiki/MD5) hash value for each of them. Then combines hashes, takes the first 8 bytes of the hash of the resulting string, and interprets them as `UInt64` in big-endian byte order. [Interprets](../../sql-reference/functions/type-conversion-functions.md#type_conversion_functions-reinterpretAsString) all the input parameters as strings and calculates the [MD5](https://en.wikipedia.org/wiki/MD5) hash value for each of them. Then combines hashes, takes the first 8 bytes of the hash of the resulting string, and interprets them as `UInt64` in big-endian byte order.
``` sql ``` sql
halfMD5(par1, ...) halfMD5(par1, ...)
@ -54,7 +54,7 @@ sipHash64(par1,...)
This is a cryptographic hash function. It works at least three times faster than the [MD5](#hash_functions-md5) function. This is a cryptographic hash function. It works at least three times faster than the [MD5](#hash_functions-md5) function.
Function [interprets](../../sql-reference/functions/type-conversion-functions.md#type_conversion_function-reinterpretAsString) all the input parameters as strings and calculates the hash value for each of them. Then combines hashes by the following algorithm: Function [interprets](../../sql-reference/functions/type-conversion-functions.md#type_conversion_functions-reinterpretAsString) all the input parameters as strings and calculates the hash value for each of them. Then combines hashes by the following algorithm:
1. After hashing all the input parameters, the function gets the array of hashes. 1. After hashing all the input parameters, the function gets the array of hashes.
2. Function takes the first and the second elements and calculates a hash for the array of them. 2. Function takes the first and the second elements and calculates a hash for the array of them.

View File

@ -9,10 +9,14 @@ toc_title: IP Addresses
Takes a UInt32 number. Interprets it as an IPv4 address in big endian. Returns a string containing the corresponding IPv4 address in the format A.B.C.d (dot-separated numbers in decimal form). Takes a UInt32 number. Interprets it as an IPv4 address in big endian. Returns a string containing the corresponding IPv4 address in the format A.B.C.d (dot-separated numbers in decimal form).
Alias: `INET_NTOA`.
## IPv4StringToNum(s) {#ipv4stringtonums} ## IPv4StringToNum(s) {#ipv4stringtonums}
The reverse function of IPv4NumToString. If the IPv4 address has an invalid format, it returns 0. The reverse function of IPv4NumToString. If the IPv4 address has an invalid format, it returns 0.
Alias: `INET_ATON`.
## IPv4NumToStringClassC(num) {#ipv4numtostringclasscnum} ## IPv4NumToStringClassC(num) {#ipv4numtostringclasscnum}
Similar to IPv4NumToString, but using xxx instead of the last octet. Similar to IPv4NumToString, but using xxx instead of the last octet.
@ -49,7 +53,11 @@ Since using xxx is highly unusual, this may be changed in the future. We r
### IPv6NumToString(x) {#ipv6numtostringx} ### IPv6NumToString(x) {#ipv6numtostringx}
Accepts a FixedString(16) value containing the IPv6 address in binary format. Returns a string containing this address in text format. Accepts a FixedString(16) value containing the IPv6 address in binary format. Returns a string containing this address in text format.
IPv6-mapped IPv4 addresses are output in the format ::ffff:111.222.33.44. Examples: IPv6-mapped IPv4 addresses are output in the format ::ffff:111.222.33.44.
Alias: `INET6_NTOA`.
Examples:
``` sql ``` sql
SELECT IPv6NumToString(toFixedString(unhex('2A0206B8000000000000000000000011'), 16)) AS addr SELECT IPv6NumToString(toFixedString(unhex('2A0206B8000000000000000000000011'), 16)) AS addr
@ -119,6 +127,8 @@ The reverse function of IPv6NumToString. If the IPv6 address has an invalid form
If the IP address is a valid IPv4 address then the IPv6 equivalent of the IPv4 address is returned. If the IP address is a valid IPv4 address then the IPv6 equivalent of the IPv4 address is returned.
HEX can be uppercase or lowercase. HEX can be uppercase or lowercase.
Alias: `INET6_ATON`.
``` sql ``` sql
SELECT cutIPv6(IPv6StringToNum('127.0.0.1'), 0, 0); SELECT cutIPv6(IPv6StringToNum('127.0.0.1'), 0, 0);
``` ```

View File

@ -98,6 +98,8 @@ SELECT toValidUTF8('\x61\xF0\x80\x80\x80b')
Repeats a string as many times as specified and concatenates the replicated values as a single string. Repeats a string as many times as specified and concatenates the replicated values as a single string.
Alias: `REPEAT`.
**Syntax** **Syntax**
``` sql ``` sql
@ -276,10 +278,14 @@ Returns the string s that was converted from the encoding in from to
Encodes s string into base64 Encodes s string into base64
Alias: `TO_BASE64`.
## base64Decode(s) {#base64decode} ## base64Decode(s) {#base64decode}
Decode base64-encoded string s into original string. In case of failure raises an exception. Decode base64-encoded string s into original string. In case of failure raises an exception.
Alias: `FROM_BASE64`.
## tryBase64Decode(s) {#trybase64decode} ## tryBase64Decode(s) {#trybase64decode}
Similar to base64Decode, but in case of error an empty string would be returned. Similar to base64Decode, but in case of error an empty string would be returned.

View File

@ -174,4 +174,129 @@ Result:
└──────────────────────────────┴───────────────────────────────────┘ └──────────────────────────────┴───────────────────────────────────┘
``` ```
## mapContains {#mapcontains}
Determines whether the `map` contains the `key` parameter.
**Syntax**
``` sql
mapContains(map, key)
```
**Parameters**
- `map` — Map. [Map](../../sql-reference/data-types/map.md).
- `key` — Key. Type matches the type of keys of `map` parameter.
**Returned value**
- `1` if `map` contains `key`, `0` if not.
Type: [UInt8](../../sql-reference/data-types/int-uint.md).
**Example**
Query:
```sql
CREATE TABLE test (a Map(String,String)) ENGINE = Memory;
INSERT INTO test VALUES ({'name':'eleven','age':'11'}), ({'number':'twelve','position':'6.0'});
SELECT mapContains(a, 'name') FROM test;
```
Result:
```text
┌─mapContains(a, 'name')─┐
│ 1 │
│ 0 │
└────────────────────────┘
```
## mapKeys {#mapkeys}
Returns all keys from the `map` parameter.
**Syntax**
```sql
mapKeys(map)
```
**Parameters**
- `map` — Map. [Map](../../sql-reference/data-types/map.md).
**Returned value**
- Array containing all keys from the `map`.
Type: [Array](../../sql-reference/data-types/array.md).
**Example**
Query:
```sql
CREATE TABLE test (a Map(String,String)) ENGINE = Memory;
INSERT INTO test VALUES ({'name':'eleven','age':'11'}), ({'number':'twelve','position':'6.0'});
SELECT mapKeys(a) FROM test;
```
Result:
```text
┌─mapKeys(a)────────────┐
│ ['name','age'] │
│ ['number','position'] │
└───────────────────────┘
```
## mapValues {#mapvalues}
Returns all values from the `map` parameter.
**Syntax**
```sql
mapKeys(map)
```
**Parameters**
- `map` — Map. [Map](../../sql-reference/data-types/map.md).
**Returned value**
- Array containing all the values from `map`.
Type: [Array](../../sql-reference/data-types/array.md).
**Example**
Query:
```sql
CREATE TABLE test (a Map(String,String)) ENGINE = Memory;
INSERT INTO test VALUES ({'name':'eleven','age':'11'}), ({'number':'twelve','position':'6.0'});
SELECT mapValues(a) FROM test;
```
Result:
```text
┌─mapValues(a)─────┐
│ ['eleven','11'] │
│ ['twelve','6.0'] │
└──────────────────┘
```
[Original article](https://clickhouse.tech/docs/en/sql-reference/functions/tuple-map-functions/) <!--hide--> [Original article](https://clickhouse.tech/docs/en/sql-reference/functions/tuple-map-functions/) <!--hide-->

View File

@ -36,10 +36,14 @@ The behavior of functions for the [NaN and Inf](../../sql-reference/data-types/f
**Example** **Example**
Query:
``` sql ``` sql
SELECT toInt64(nan), toInt32(32), toInt16('16'), toInt8(8.8) SELECT toInt64(nan), toInt32(32), toInt16('16'), toInt8(8.8);
``` ```
Result:
``` text ``` text
┌─────────toInt64(nan)─┬─toInt32(32)─┬─toInt16('16')─┬─toInt8(8.8)─┐ ┌─────────toInt64(nan)─┬─toInt32(32)─┬─toInt16('16')─┬─toInt8(8.8)─┐
│ -9223372036854775808 │ 32 │ 16 │ 8 │ │ -9223372036854775808 │ 32 │ 16 │ 8 │
@ -52,10 +56,14 @@ It takes an argument of type String and tries to parse it into Int (8 \| 16 \| 3
**Example** **Example**
Query:
``` sql ``` sql
select toInt64OrZero('123123'), toInt8OrZero('123qwe123') SELECT toInt64OrZero('123123'), toInt8OrZero('123qwe123');
``` ```
Result:
``` text ``` text
┌─toInt64OrZero('123123')─┬─toInt8OrZero('123qwe123')─┐ ┌─toInt64OrZero('123123')─┬─toInt8OrZero('123qwe123')─┐
│ 123123 │ 0 │ │ 123123 │ 0 │
@ -68,10 +76,14 @@ It takes an argument of type String and tries to parse it into Int (8 \| 16 \| 3
**Example** **Example**
Query:
``` sql ``` sql
select toInt64OrNull('123123'), toInt8OrNull('123qwe123') SELECT toInt64OrNull('123123'), toInt8OrNull('123qwe123');
``` ```
Result:
``` text ``` text
┌─toInt64OrNull('123123')─┬─toInt8OrNull('123qwe123')─┐ ┌─toInt64OrNull('123123')─┬─toInt8OrNull('123qwe123')─┐
│ 123123 │ ᴺᵁᴸᴸ │ │ 123123 │ ᴺᵁᴸᴸ │
@ -102,10 +114,14 @@ The behavior of functions for negative agruments and for the [NaN and Inf](../..
**Example** **Example**
Query:
``` sql ``` sql
SELECT toUInt64(nan), toUInt32(-32), toUInt16('16'), toUInt8(8.8) SELECT toUInt64(nan), toUInt32(-32), toUInt16('16'), toUInt8(8.8);
``` ```
Result:
``` text ``` text
┌───────toUInt64(nan)─┬─toUInt32(-32)─┬─toUInt16('16')─┬─toUInt8(8.8)─┐ ┌───────toUInt64(nan)─┬─toUInt32(-32)─┬─toUInt16('16')─┬─toUInt8(8.8)─┐
│ 9223372036854775808 │ 4294967264 │ 16 │ 8 │ │ 9223372036854775808 │ 4294967264 │ 16 │ 8 │
@ -124,6 +140,8 @@ SELECT toUInt64(nan), toUInt32(-32), toUInt16('16'), toUInt8(8.8)
## toDate {#todate} ## toDate {#todate}
Alias: `DATE`.
## toDateOrZero {#todateorzero} ## toDateOrZero {#todateorzero}
## toDateOrNull {#todateornull} ## toDateOrNull {#todateornull}
@ -168,20 +186,28 @@ A value in the `Nullable(Decimal(P,S))` data type. The value contains:
**Examples** **Examples**
Query:
``` sql ``` sql
SELECT toDecimal32OrNull(toString(-1.111), 5) AS val, toTypeName(val) SELECT toDecimal32OrNull(toString(-1.111), 5) AS val, toTypeName(val);
``` ```
Result:
``` text ``` text
┌──────val─┬─toTypeName(toDecimal32OrNull(toString(-1.111), 5))─┐ ┌──────val─┬─toTypeName(toDecimal32OrNull(toString(-1.111), 5))─┐
│ -1.11100 │ Nullable(Decimal(9, 5)) │ │ -1.11100 │ Nullable(Decimal(9, 5)) │
└──────────┴────────────────────────────────────────────────────┘ └──────────┴────────────────────────────────────────────────────┘
``` ```
Query:
``` sql ``` sql
SELECT toDecimal32OrNull(toString(-1.111), 2) AS val, toTypeName(val) SELECT toDecimal32OrNull(toString(-1.111), 2) AS val, toTypeName(val);
``` ```
Result:
``` text ``` text
┌──val─┬─toTypeName(toDecimal32OrNull(toString(-1.111), 2))─┐ ┌──val─┬─toTypeName(toDecimal32OrNull(toString(-1.111), 2))─┐
│ ᴺᵁᴸᴸ │ Nullable(Decimal(9, 2)) │ │ ᴺᵁᴸᴸ │ Nullable(Decimal(9, 2)) │
@ -213,20 +239,28 @@ A value in the `Nullable(Decimal(P,S))` data type. The value contains:
**Example** **Example**
Query:
``` sql ``` sql
SELECT toDecimal32OrZero(toString(-1.111), 5) AS val, toTypeName(val) SELECT toDecimal32OrZero(toString(-1.111), 5) AS val, toTypeName(val);
``` ```
Result:
``` text ``` text
┌──────val─┬─toTypeName(toDecimal32OrZero(toString(-1.111), 5))─┐ ┌──────val─┬─toTypeName(toDecimal32OrZero(toString(-1.111), 5))─┐
│ -1.11100 │ Decimal(9, 5) │ │ -1.11100 │ Decimal(9, 5) │
└──────────┴────────────────────────────────────────────────────┘ └──────────┴────────────────────────────────────────────────────┘
``` ```
Query:
``` sql ``` sql
SELECT toDecimal32OrZero(toString(-1.111), 2) AS val, toTypeName(val) SELECT toDecimal32OrZero(toString(-1.111), 2) AS val, toTypeName(val);
``` ```
Result:
``` text ``` text
┌──val─┬─toTypeName(toDecimal32OrZero(toString(-1.111), 2))─┐ ┌──val─┬─toTypeName(toDecimal32OrZero(toString(-1.111), 2))─┐
│ 0.00 │ Decimal(9, 2) │ │ 0.00 │ Decimal(9, 2) │
@ -258,12 +292,18 @@ Conversion between numeric types uses the same rules as assignments between diff
Additionally, the toString function of the DateTime argument can take a second String argument containing the name of the time zone. Example: `Asia/Yekaterinburg` In this case, the time is formatted according to the specified time zone. Additionally, the toString function of the DateTime argument can take a second String argument containing the name of the time zone. Example: `Asia/Yekaterinburg` In this case, the time is formatted according to the specified time zone.
**Example**
Query:
``` sql ``` sql
SELECT SELECT
now() AS now_local, now() AS now_local,
toString(now(), 'Asia/Yekaterinburg') AS now_yekat toString(now(), 'Asia/Yekaterinburg') AS now_yekat;
``` ```
Result:
``` text ``` text
┌───────────now_local─┬─now_yekat───────────┐ ┌───────────now_local─┬─now_yekat───────────┐
│ 2016-06-15 00:11:21 │ 2016-06-15 02:11:21 │ │ 2016-06-15 00:11:21 │ 2016-06-15 02:11:21 │
@ -281,36 +321,81 @@ If the string has fewer bytes than N, it is padded with null bytes to the right.
Accepts a String or FixedString argument. Returns the String with the content truncated at the first zero byte found. Accepts a String or FixedString argument. Returns the String with the content truncated at the first zero byte found.
Example: **Example**
Query:
``` sql ``` sql
SELECT toFixedString('foo', 8) AS s, toStringCutToZero(s) AS s_cut SELECT toFixedString('foo', 8) AS s, toStringCutToZero(s) AS s_cut;
``` ```
Result:
``` text ``` text
┌─s─────────────┬─s_cut─┐ ┌─s─────────────┬─s_cut─┐
│ foo\0\0\0\0\0 │ foo │ │ foo\0\0\0\0\0 │ foo │
└───────────────┴───────┘ └───────────────┴───────┘
``` ```
Query:
``` sql ``` sql
SELECT toFixedString('foo\0bar', 8) AS s, toStringCutToZero(s) AS s_cut SELECT toFixedString('foo\0bar', 8) AS s, toStringCutToZero(s) AS s_cut;
``` ```
Result:
``` text ``` text
┌─s──────────┬─s_cut─┐ ┌─s──────────┬─s_cut─┐
│ foo\0bar\0 │ foo │ │ foo\0bar\0 │ foo │
└────────────┴───────┘ └────────────┴───────┘
``` ```
## reinterpretAsUInt(8\|16\|32\|64) {#reinterpretasuint8163264}
## reinterpretAsInt(8\|16\|32\|64) {#reinterpretasint8163264}
## reinterpretAsFloat(32\|64) {#reinterpretasfloat3264}
## reinterpretAsDate {#reinterpretasdate}
## reinterpretAsDateTime {#reinterpretasdatetime}
These functions accept a string and interpret the bytes placed at the beginning of the string as a number in host order (little endian). If the string isnt long enough, the functions work as if the string is padded with the necessary number of null bytes. If the string is longer than needed, the extra bytes are ignored. A date is interpreted as the number of days since the beginning of the Unix Epoch, and a date with time is interpreted as the number of seconds since the beginning of the Unix Epoch.
## reinterpretAsString {#type_conversion_functions-reinterpretAsString}
This function accepts a number or date or date with time, and returns a string containing bytes representing the corresponding value in host order (little endian). Null bytes are dropped from the end. For example, a UInt32 type value of 255 is a string that is one byte long.
## reinterpretAsFixedString {#reinterpretasfixedstring}
This function accepts a number or date or date with time, and returns a FixedString containing bytes representing the corresponding value in host order (little endian). Null bytes are dropped from the end. For example, a UInt32 type value of 255 is a FixedString that is one byte long.
## reinterpretAsUUID {#reinterpretasuuid}
This function accepts 16 bytes string, and returns UUID containing bytes representing the corresponding value in network byte order (big-endian). If the string isn't long enough, the functions work as if the string is padded with the necessary number of null bytes to the end. If the string longer than 16 bytes, the extra bytes at the end are ignored.
**Syntax**
``` sql
reinterpretAsUUID(fixed_string)
```
**Parameters**
- `fixed_string` — Big-endian byte string. [FixedString](../../sql-reference/data-types/fixedstring.md#fixedstring).
## reinterpret(x, T) {#type_conversion_function-reinterpret} ## reinterpret(x, T) {#type_conversion_function-reinterpret}
Performs byte reinterpretation of x as t data type. **Returned value**
Following reinterpretations are allowed: - The UUID type value. [UUID](../../sql-reference/data-types/uuid.md#uuid-data-type).
1. Any type that has fixed size and value of that type can be represented continuously into FixedString.
2. Any type that if value of that type can be represented continuously into String. Null bytes are dropped from the end. For example, a UInt32 type value of 255 is a string that is one byte long. **Examples**
3. FixedString, String, types that can be interpreted as numeric (Integers, Float, Date, DateTime, UUID) into types that can be interpreted as numeric (Integers, Float, Date, DateTime, UUID) into FixedString,
String to UUID.
Query:
``` sql ``` sql
SELECT reinterpret(toInt8(-1), 'UInt8') as int_to_uint, SELECT reinterpret(toInt8(-1), 'UInt8') as int_to_uint,
@ -318,39 +403,45 @@ SELECT reinterpret(toInt8(-1), 'UInt8') as int_to_uint,
reinterpret('1', 'UInt32') as string_to_int; reinterpret('1', 'UInt32') as string_to_int;
``` ```
Result:
``` text ``` text
┌─int_to_uint─┬─int_to_float─┬─string_to_int─┐ ┌─reinterpretAsUUID(reverse(unhex('000102030405060708090a0b0c0d0e0f')))─┐
255 │ 1e-45 │ 49 08090a0b-0c0d-0e0f-0001-020304050607
└─────────────┴──────────────┴───────────────┘ └───────────────────────────────────────────────────────────────────────┘
``` ```
## reinterpretAsUInt(8\|16\|32\|64\|256) {#reinterpretAsUInt8163264256} Going back and forth from String to UUID.
## reinterpretAsInt(8\|16\|32\|64\|128\|256) {#reinterpretAsInt8163264128256} Query:
## reinterpretAsDecimal(32\|64\|128\|256) {#reinterpretAsDecimal3264128256} ``` sql
WITH
generateUUIDv4() AS uuid,
identity(lower(hex(reverse(reinterpretAsString(uuid))))) AS str,
reinterpretAsUUID(reverse(unhex(str))) AS uuid2
SELECT uuid = uuid2;
```
## reinterpretAsFloat(32\|64) {#type_conversion_function-reinterpretAsFloat} Result:
## reinterpretAsDate {#type_conversion_function-reinterpretAsDate} ``` text
┌─equals(uuid, uuid2)─┐
## reinterpretAsDateTime {#type_conversion_function-reinterpretAsDateTime} │ 1 │
└─────────────────────┘
## reinterpretAsDateTime64 {#type_conversion_function-reinterpretAsDateTime64} ```
## reinterpretAsString {#type_conversion_function-reinterpretAsString}
## reinterpretAsFixedString {#type_conversion_function-reinterpretAsFixedString}
## reinterpretAsUUID {#type_conversion_function-reinterpretAsUUID}
These functions are aliases for `reinterpret` function.
## CAST(x, T) {#type_conversion_function-cast} ## CAST(x, T) {#type_conversion_function-cast}
Converts x to the t data type. The syntax CAST(x AS t) is also supported. Converts input value `x` to the `T` data type.
Example: The syntax `CAST(x AS t)` is also supported.
Note, that if value `x` does not fit the bounds of type T, the function overflows. For example, CAST(-1, 'UInt8') returns 255.
**Example**
Query:
``` sql ``` sql
SELECT SELECT
@ -358,9 +449,11 @@ SELECT
CAST(timestamp AS DateTime) AS datetime, CAST(timestamp AS DateTime) AS datetime,
CAST(timestamp AS Date) AS date, CAST(timestamp AS Date) AS date,
CAST(timestamp, 'String') AS string, CAST(timestamp, 'String') AS string,
CAST(timestamp, 'FixedString(22)') AS fixed_string CAST(timestamp, 'FixedString(22)') AS fixed_string;
``` ```
Result:
``` text ``` text
┌─timestamp───────────┬────────────datetime─┬───────date─┬─string──────────────┬─fixed_string──────────────┐ ┌─timestamp───────────┬────────────datetime─┬───────date─┬─string──────────────┬─fixed_string──────────────┐
│ 2016-06-15 23:00:00 │ 2016-06-15 23:00:00 │ 2016-06-15 │ 2016-06-15 23:00:00 │ 2016-06-15 23:00:00\0\0\0 │ │ 2016-06-15 23:00:00 │ 2016-06-15 23:00:00 │ 2016-06-15 │ 2016-06-15 23:00:00 │ 2016-06-15 23:00:00\0\0\0 │
@ -369,12 +462,18 @@ SELECT
Conversion to FixedString(N) only works for arguments of type String or FixedString(N). Conversion to FixedString(N) only works for arguments of type String or FixedString(N).
Type conversion to [Nullable](../../sql-reference/data-types/nullable.md) and back is supported. Example: Type conversion to [Nullable](../../sql-reference/data-types/nullable.md) and back is supported.
**Example**
Query:
``` sql ``` sql
SELECT toTypeName(x) FROM t_null SELECT toTypeName(x) FROM t_null;
``` ```
Result:
``` text ``` text
┌─toTypeName(x)─┐ ┌─toTypeName(x)─┐
│ Int8 │ │ Int8 │
@ -382,10 +481,14 @@ SELECT toTypeName(x) FROM t_null
└───────────────┘ └───────────────┘
``` ```
Query:
``` sql ``` sql
SELECT toTypeName(CAST(x, 'Nullable(UInt16)')) FROM t_null SELECT toTypeName(CAST(x, 'Nullable(UInt16)')) FROM t_null;
``` ```
Result:
``` text ``` text
┌─toTypeName(CAST(x, 'Nullable(UInt16)'))─┐ ┌─toTypeName(CAST(x, 'Nullable(UInt16)'))─┐
│ Nullable(UInt16) │ │ Nullable(UInt16) │
@ -399,15 +502,19 @@ SELECT toTypeName(CAST(x, 'Nullable(UInt16)')) FROM t_null
## accurateCast(x, T) {#type_conversion_function-accurate-cast} ## accurateCast(x, T) {#type_conversion_function-accurate-cast}
Converts x to the t data type. The differente from cast(x, T) is that accurateCast Converts `x` to the `T` data type.
does not allow overflow of numeric types during cast if type value x does not fit
bounds of type T. The difference from [cast(x, T)](#type_conversion_function-cast) is that `accurateCast` does not allow overflow of numeric types during cast if type value `x` does not fit the bounds of type `T`. For example, `accurateCast(-1, 'UInt8')` throws an exception.
**Example**
Query:
Example
``` sql ``` sql
SELECT cast(-1, 'UInt8') as uint8; SELECT cast(-1, 'UInt8') as uint8;
``` ```
Result:
``` text ``` text
┌─uint8─┐ ┌─uint8─┐
@ -415,38 +522,46 @@ SELECT cast(-1, 'UInt8') as uint8;
└───────┘ └───────┘
``` ```
Query:
```sql ```sql
SELECT accurateCast(-1, 'UInt8') as uint8; SELECT accurateCast(-1, 'UInt8') as uint8;
``` ```
Result:
``` text ``` text
Code: 70. DB::Exception: Received from localhost:9000. DB::Exception: Value in column Int8 cannot be safely converted into type UInt8: While processing accurateCast(-1, 'UInt8') AS uint8. Code: 70. DB::Exception: Received from localhost:9000. DB::Exception: Value in column Int8 cannot be safely converted into type UInt8: While processing accurateCast(-1, 'UInt8') AS uint8.
``` ```
## accurateCastOrNull(x, T) {#type_conversion_function-accurate-cast_or_null} ## accurateCastOrNull(x, T) {#type_conversion_function-accurate-cast_or_null}
Converts x to the t data type. Always returns nullable type and returns NULL Converts input value `x` to the specified data type `T`. Always returns [Nullable](../../sql-reference/data-types/nullable.md) type and returns [NULL](../../sql-reference/syntax.md#null-literal) if the casted value is not representable in the target type.
if the casted value is not representable in the target type.
Example: **Syntax**
```sql
accurateCastOrNull(x, T)
```
**Parameters**
- `x` — Input value.
- `T` — The name of the returned data type.
**Returned value**
- The value, converted to the specified data type `T`.
**Example**
Query:
``` sql ``` sql
SELECT SELECT toTypeName(accurateCastOrNull(5, 'UInt8'));
accurateCastOrNull(-1, 'UInt8') as uint8,
accurateCastOrNull(128, 'Int8') as int8,
accurateCastOrNull('Test', 'FixedString(2)') as fixed_string
``` ```
``` text Result:
┌─uint8─┬─int8─┬─fixed_string─┐
│ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │
└───────┴──────┴──────────────┘┘
```
``` sql
SELECT toTypeName(accurateCastOrNull(5, 'UInt8'))
```
``` text ``` text
┌─toTypeName(accurateCastOrNull(5, 'UInt8'))─┐ ┌─toTypeName(accurateCastOrNull(5, 'UInt8'))─┐
@ -454,6 +569,23 @@ SELECT toTypeName(accurateCastOrNull(5, 'UInt8'))
└────────────────────────────────────────────┘ └────────────────────────────────────────────┘
``` ```
Query:
``` sql
SELECT
accurateCastOrNull(-1, 'UInt8') as uint8,
accurateCastOrNull(128, 'Int8') as int8,
accurateCastOrNull('Test', 'FixedString(2)') as fixed_string;
```
Result:
``` text
┌─uint8─┬─int8─┬─fixed_string─┐
│ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │
└───────┴──────┴──────────────┘
```
## toInterval(Year\|Quarter\|Month\|Week\|Day\|Hour\|Minute\|Second) {#function-tointerval} ## toInterval(Year\|Quarter\|Month\|Week\|Day\|Hour\|Minute\|Second) {#function-tointerval}
Converts a Number type argument to an [Interval](../../sql-reference/data-types/special-data-types/interval.md) data type. Converts a Number type argument to an [Interval](../../sql-reference/data-types/special-data-types/interval.md) data type.
@ -481,6 +613,8 @@ toIntervalYear(number)
**Example** **Example**
Query:
``` sql ``` sql
WITH WITH
toDate('2019-01-01') AS date, toDate('2019-01-01') AS date,
@ -488,9 +622,11 @@ WITH
toIntervalWeek(1) AS interval_to_week toIntervalWeek(1) AS interval_to_week
SELECT SELECT
date + interval_week, date + interval_week,
date + interval_to_week date + interval_to_week;
``` ```
Result:
``` text ``` text
┌─plus(date, interval_week)─┬─plus(date, interval_to_week)─┐ ┌─plus(date, interval_week)─┬─plus(date, interval_to_week)─┐
│ 2019-01-08 │ 2019-01-08 │ │ 2019-01-08 │ 2019-01-08 │
@ -506,7 +642,7 @@ The function parses [ISO 8601](https://en.wikipedia.org/wiki/ISO_8601), [RFC 112
**Syntax** **Syntax**
``` sql ``` sql
parseDateTimeBestEffort(time_string [, time_zone]); parseDateTimeBestEffort(time_string [, time_zone])
``` ```
**Arguments** **Arguments**
@ -549,7 +685,7 @@ Query:
``` sql ``` sql
SELECT parseDateTimeBestEffort('Sat, 18 Aug 2018 07:22:16 GMT', 'Europe/Moscow') SELECT parseDateTimeBestEffort('Sat, 18 Aug 2018 07:22:16 GMT', 'Europe/Moscow')
AS parseDateTimeBestEffort AS parseDateTimeBestEffort;
``` ```
Result: Result:
@ -564,7 +700,7 @@ Query:
``` sql ``` sql
SELECT parseDateTimeBestEffort('1284101485') SELECT parseDateTimeBestEffort('1284101485')
AS parseDateTimeBestEffort AS parseDateTimeBestEffort;
``` ```
Result: Result:
@ -579,7 +715,7 @@ Query:
``` sql ``` sql
SELECT parseDateTimeBestEffort('2018-12-12 10:12:12') SELECT parseDateTimeBestEffort('2018-12-12 10:12:12')
AS parseDateTimeBestEffort AS parseDateTimeBestEffort;
``` ```
Result: Result:
@ -593,7 +729,7 @@ Result:
Query: Query:
``` sql ``` sql
SELECT parseDateTimeBestEffort('10 20:19') SELECT parseDateTimeBestEffort('10 20:19');
``` ```
Result: Result:
@ -613,12 +749,12 @@ Result:
## parseDateTimeBestEffortUS {#parsedatetimebesteffortUS} ## parseDateTimeBestEffortUS {#parsedatetimebesteffortUS}
This function is similar to [parseDateTimeBestEffort](#parsedatetimebesteffort), the only difference is that this function prefers US date format (`MM/DD/YYYY` etc.) in case of ambiguity. This function is similar to [parseDateTimeBestEffort](#parsedatetimebesteffort), the only difference is that this function prefers US date format (`MM/DD/YYYY` etc.) in case of ambiguity.
**Syntax** **Syntax**
``` sql ``` sql
parseDateTimeBestEffortUS(time_string [, time_zone]); parseDateTimeBestEffortUS(time_string [, time_zone])
``` ```
**Arguments** **Arguments**
@ -892,7 +1028,7 @@ Type: `LowCardinality(expr_result_type)`
Query: Query:
``` sql ``` sql
SELECT toLowCardinality('1') SELECT toLowCardinality('1');
``` ```
Result: Result:
@ -931,7 +1067,7 @@ Query:
``` sql ``` sql
WITH toDateTime64('2019-09-16 19:20:12.345678910', 6) AS dt64 WITH toDateTime64('2019-09-16 19:20:12.345678910', 6) AS dt64
SELECT toUnixTimestamp64Milli(dt64) SELECT toUnixTimestamp64Milli(dt64);
``` ```
Result: Result:
@ -944,7 +1080,7 @@ Result:
``` sql ``` sql
WITH toDateTime64('2019-09-16 19:20:12.345678910', 6) AS dt64 WITH toDateTime64('2019-09-16 19:20:12.345678910', 6) AS dt64
SELECT toUnixTimestamp64Nano(dt64) SELECT toUnixTimestamp64Nano(dt64);
``` ```
Result: Result:
@ -978,13 +1114,17 @@ fromUnixTimestamp64Milli(value [, ti])
- `value` converted to the `DateTime64` data type. - `value` converted to the `DateTime64` data type.
**Examples** **Example**
Query:
``` sql ``` sql
WITH CAST(1234567891011, 'Int64') AS i64 WITH CAST(1234567891011, 'Int64') AS i64
SELECT fromUnixTimestamp64Milli(i64, 'UTC') SELECT fromUnixTimestamp64Milli(i64, 'UTC');
``` ```
Result:
``` text ``` text
┌─fromUnixTimestamp64Milli(i64, 'UTC')─┐ ┌─fromUnixTimestamp64Milli(i64, 'UTC')─┐
│ 2009-02-13 23:31:31.011 │ │ 2009-02-13 23:31:31.011 │
@ -1016,7 +1156,7 @@ Query:
``` sql ``` sql
SELECT formatRow('CSV', number, 'good') SELECT formatRow('CSV', number, 'good')
FROM numbers(3) FROM numbers(3);
``` ```
Result: Result:
@ -1057,7 +1197,7 @@ Query:
``` sql ``` sql
SELECT formatRowNoNewline('CSV', number, 'good') SELECT formatRowNoNewline('CSV', number, 'good')
FROM numbers(3) FROM numbers(3);
``` ```
Result: Result:

View File

@ -13,10 +13,28 @@ SELECT (CounterID, UserID) IN ((34, 123), (101500, 456)) FROM ...
If the left side is a single column that is in the index, and the right side is a set of constants, the system uses the index for processing the query. If the left side is a single column that is in the index, and the right side is a set of constants, the system uses the index for processing the query.
Dont list too many values explicitly (i.e. millions). If a data set is large, put it in a temporary table (for example, see the section “External data for query processing”), then use a subquery. Dont list too many values explicitly (i.e. millions). If a data set is large, put it in a temporary table (for example, see the section [External data for query processing](../../engines/table-engines/special/external-data.md)), then use a subquery.
The right side of the operator can be a set of constant expressions, a set of tuples with constant expressions (shown in the examples above), or the name of a database table or SELECT subquery in brackets. The right side of the operator can be a set of constant expressions, a set of tuples with constant expressions (shown in the examples above), or the name of a database table or SELECT subquery in brackets.
ClickHouse allows types to differ in the left and the right parts of `IN` subquery. In this case it converts the left side value to the type of the right side, as if the [accurateCastOrNull](../functions/type-conversion-functions.md#type_conversion_function-accurate-cast_or_null) function is applied. That means, that the data type becomes [Nullable](../../sql-reference/data-types/nullable.md), and if the conversion cannot be performed, it returns [NULL](../../sql-reference/syntax.md#null-literal).
**Example**
Query:
``` sql
SELECT '1' IN (SELECT 1);
```
Result:
``` text
┌─in('1', _subquery49)─┐
│ 1 │
└──────────────────────┘
```
If the right side of the operator is the name of a table (for example, `UserID IN users`), this is equivalent to the subquery `UserID IN (SELECT * FROM users)`. Use this when working with external data that is sent along with the query. For example, the query can be sent together with a set of user IDs loaded to the users temporary table, which should be filtered. If the right side of the operator is the name of a table (for example, `UserID IN users`), this is equivalent to the subquery `UserID IN (SELECT * FROM users)`. Use this when working with external data that is sent along with the query. For example, the query can be sent together with a set of user IDs loaded to the users temporary table, which should be filtered.
If the right side of the operator is a table name that has the Set engine (a prepared data set that is always in RAM), the data set will not be created over again for each query. If the right side of the operator is a table name that has the Set engine (a prepared data set that is always in RAM), the data set will not be created over again for each query.

View File

@ -81,5 +81,5 @@ The `TTL` is no longer there, so the second row is not deleted:
### See Also ### See Also
- More about the [TTL-expression](../../../sql-reference/statements/create/table#ttl-expression). - More about the [TTL-expression](../../../../sql-reference/statements/create/table#ttl-expression).
- Modify column [with TTL](../../../sql-reference/statements/alter/column#alter_modify-column). - Modify column [with TTL](../../../../sql-reference/statements/alter/column#alter_modify-column).

View File

@ -5,7 +5,7 @@ toc_title: file
# file {#file} # file {#file}
Creates a table from a file. This table function is similar to [url](../../sql-reference/table-functions/url.md) and [hdfs](../../sql-reference/table-functions/hdfs.md) ones. Creates a table from a file. This table function is similar to [url](../../sql-reference/table-functions/url.md) and [hdfs](../../sql-reference/table-functions/hdfs.md) ones.
`file` function can be used in `SELECT` and `INSERT` queries on data in [File](../../engines/table-engines/special/file.md) tables. `file` function can be used in `SELECT` and `INSERT` queries on data in [File](../../engines/table-engines/special/file.md) tables.
@ -15,9 +15,9 @@ Creates a table from a file. This table function is similar to [url](../../sql-r
file(path, format, structure) file(path, format, structure)
``` ```
**Input parameters** **Parameters**
- `path` — The relative path to the file from [user_files_path](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-user_files_path). Path to file support following globs in readonly mode: `*`, `?`, `{abc,def}` and `{N..M}` where `N`, `M` — numbers, `'abc', 'def'` — strings. - `path` — The relative path to the file from [user_files_path](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-user_files_path). Path to file support following globs in read-only mode: `*`, `?`, `{abc,def}` and `{N..M}` where `N`, `M` — numbers, `'abc', 'def'` — strings.
- `format` — The [format](../../interfaces/formats.md#formats) of the file. - `format` — The [format](../../interfaces/formats.md#formats) of the file.
- `structure` — Structure of the table. Format: `'column1_name column1_type, column2_name column2_type, ...'`. - `structure` — Structure of the table. Format: `'column1_name column1_type, column2_name column2_type, ...'`.
@ -39,7 +39,7 @@ $ cat /var/lib/clickhouse/user_files/test.csv
78,43,45 78,43,45
``` ```
Getting data from a table in `test.csv` and selecting first two rows from it: Getting data from a table in `test.csv` and selecting the first two rows from it:
``` sql ``` sql
SELECT * FROM file('test.csv', 'CSV', 'column1 UInt32, column2 UInt32, column3 UInt32') LIMIT 2; SELECT * FROM file('test.csv', 'CSV', 'column1 UInt32, column2 UInt32, column3 UInt32') LIMIT 2;
@ -51,7 +51,8 @@ SELECT * FROM file('test.csv', 'CSV', 'column1 UInt32, column2 UInt32, column3 U
│ 3 │ 2 │ 1 │ │ 3 │ 2 │ 1 │
└─────────┴─────────┴─────────┘ └─────────┴─────────┴─────────┘
``` ```
Getting the first 10 lines of a table that contains 3 columns of UInt32 type from a CSV file:
Getting the first 10 lines of a table that contains 3 columns of [UInt32](../../sql-reference/data-types/int-uint.md) type from a CSV file:
``` sql ``` sql
SELECT * FROM file('test.csv', 'CSV', 'column1 UInt32, column2 UInt32, column3 UInt32') LIMIT 10; SELECT * FROM file('test.csv', 'CSV', 'column1 UInt32, column2 UInt32, column3 UInt32') LIMIT 10;
@ -71,17 +72,16 @@ SELECT * FROM file('test.csv', 'CSV', 'column1 UInt32, column2 UInt32, column3 U
└─────────┴─────────┴─────────┘ └─────────┴─────────┴─────────┘
``` ```
## Globs in Path {#globs-in-path} ## Globs in Path {#globs-in-path}
Multiple path components can have globs. For being processed file should exists and matches to the whole path pattern (not only suffix or prefix). Multiple path components can have globs. For being processed file must exist and match to the whole path pattern (not only suffix or prefix).
- `*` — Substitutes any number of any characters except `/` including empty string. - `*` — Substitutes any number of any characters except `/` including empty string.
- `?` — Substitutes any single character. - `?` — Substitutes any single character.
- `{some_string,another_string,yet_another_one}` — Substitutes any of strings `'some_string', 'another_string', 'yet_another_one'`. - `{some_string,another_string,yet_another_one}` — Substitutes any of strings `'some_string', 'another_string', 'yet_another_one'`.
- `{N..M}` — Substitutes any number in range from N to M including both borders. - `{N..M}` — Substitutes any number in range from N to M including both borders.
Constructions with `{}` are similar to the [remote table function](../../sql-reference/table-functions/remote.md)). Constructions with `{}` are similar to the [remote](remote.md) table function.
**Example** **Example**
@ -94,13 +94,13 @@ Suppose we have several files with the following relative paths:
- 'another_dir/some_file_2' - 'another_dir/some_file_2'
- 'another_dir/some_file_3' - 'another_dir/some_file_3'
Query the amount of rows in these files: Query the number of rows in these files:
``` sql ``` sql
SELECT count(*) FROM file('{some,another}_dir/some_file_{1..3}', 'TSV', 'name String, value UInt32'); SELECT count(*) FROM file('{some,another}_dir/some_file_{1..3}', 'TSV', 'name String, value UInt32');
``` ```
Query the amount of rows in all files of these two directories: Query the number of rows in all files of these two directories:
``` sql ``` sql
SELECT count(*) FROM file('{some,another}_dir/*', 'TSV', 'name String, value UInt32'); SELECT count(*) FROM file('{some,another}_dir/*', 'TSV', 'name String, value UInt32');
@ -124,6 +124,6 @@ SELECT count(*) FROM file('big_dir/file{0..9}{0..9}{0..9}', 'CSV', 'name String,
**See Also** **See Also**
- [Virtual columns](https://clickhouse.tech/docs/en/operations/table_engines/#table_engines-virtual_columns) - [Virtual columns](index.md#table_engines-virtual_columns)
[Original article](https://clickhouse.tech/docs/en/sql-reference/table-functions/file/) <!--hide--> [Original article](https://clickhouse.tech/docs/en/sql-reference/table-functions/file/) <!--hide-->

View File

@ -5,7 +5,7 @@ toc_title: remote
# remote, remoteSecure {#remote-remotesecure} # remote, remoteSecure {#remote-remotesecure}
Allows to access remote servers without creating a [Distributed](../../engines/table-engines/special/distributed.md) table. `remoteSecure` - same as `remote` but with secured connection. Allows to access remote servers without creating a [Distributed](../../engines/table-engines/special/distributed.md) table. `remoteSecure` - same as `remote` but with a secured connection.
Both functions can be used in `SELECT` and `INSERT` queries. Both functions can be used in `SELECT` and `INSERT` queries.
@ -18,31 +18,31 @@ remoteSecure('addresses_expr', db, table[, 'user'[, 'password'], sharding_key])
remoteSecure('addresses_expr', db.table[, 'user'[, 'password'], sharding_key]) remoteSecure('addresses_expr', db.table[, 'user'[, 'password'], sharding_key])
``` ```
**Input parameters** **Parameters**
- `addresses_expr` An expression that generates addresses of remote servers. This may be just one server address. The server address is `host:port`, or just `host`. - `addresses_expr` An expression that generates addresses of remote servers. This may be just one server address. The server address is `host:port`, or just `host`.
The host can be specified as the server name, or as the IPv4 or IPv6 address. An IPv6 address is specified in square brackets. The host can be specified as the server name, or as the IPv4 or IPv6 address. An IPv6 address is specified in square brackets.
The port is the TCP port on the remote server. If the port is omitted, it uses [tcp_port](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-tcp_port) from the servers config file in `remote` (by default, 9000) and [tcp_port_secure](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-tcp_port_secure) in `remoteSecure` (by default, 9440). The port is the TCP port on the remote server. If the port is omitted, it uses [tcp_port](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-tcp_port) from the servers config file in `remote` (by default, 9000) and [tcp_port_secure](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-tcp_port_secure) in `remoteSecure` (by default, 9440).
The port is required for an IPv6 address. The port is required for an IPv6 address.
Type: [String](../../sql-reference/data-types/string.md). Type: [String](../../sql-reference/data-types/string.md).
- `db` - Database name. Type: [String](../../sql-reference/data-types/string.md). - `db` Database name. Type: [String](../../sql-reference/data-types/string.md).
- `table` - Table name. Type: [String](../../sql-reference/data-types/string.md). - `table` Table name. Type: [String](../../sql-reference/data-types/string.md).
- `user` - User name. If the user is not specified, `default` is used. Type: [String](../../sql-reference/data-types/string.md). - `user` User name. If the user is not specified, `default` is used. Type: [String](../../sql-reference/data-types/string.md).
- `password` - User password. If the password is not specified, an empty password is used. Type: [String](../../sql-reference/data-types/string.md). - `password` User password. If the password is not specified, an empty password is used. Type: [String](../../sql-reference/data-types/string.md).
- `sharding_key` - Sharding key to support distributing data across nodes. For example: `insert into remote('127.0.0.1:9000,127.0.0.2', db, table, 'default', rand())`. Type: [UInt32](../../sql-reference/data-types/int-uint.md). - `sharding_key` Sharding key to support distributing data across nodes. For example: `insert into remote('127.0.0.1:9000,127.0.0.2', db, table, 'default', rand())`. Type: [UInt32](../../sql-reference/data-types/int-uint.md).
**Returned value** **Returned value**
Dataset from remote servers. The dataset from remote servers.
**Usage** **Usage**
Using the `remote` table function is less optimal than creating a `Distributed` table, because in this case the server connection is re-established for every request. In addition, if host names are set, the names are resolved, and errors are not counted when working with various replicas. When processing a large number of queries, always create the `Distributed` table ahead of time, and dont use the `remote` table function. Using the `remote` table function is less optimal than creating a `Distributed` table because in this case the server connection is re-established for every request. Also, if hostnames are set, the names are resolved, and errors are not counted when working with various replicas. When processing a large number of queries, always create the `Distributed` table ahead of time, and dont use the `remote` table function.
The `remote` table function can be useful in the following cases: The `remote` table function can be useful in the following cases:
@ -62,7 +62,7 @@ localhost
[2a02:6b8:0:1111::11]:9000 [2a02:6b8:0:1111::11]:9000
``` ```
Multiple addresses can be comma-separated. In this case, ClickHouse will use distributed processing, so it will send the query to all specified addresses (like to shards with different data). Example: Multiple addresses can be comma-separated. In this case, ClickHouse will use distributed processing, so it will send the query to all specified addresses (like shards with different data). Example:
``` text ``` text
example01-01-1,example01-02-1 example01-01-1,example01-02-1
@ -82,7 +82,7 @@ example01-{01..02}-1
If you have multiple pairs of curly brackets, it generates the direct product of the corresponding sets. If you have multiple pairs of curly brackets, it generates the direct product of the corresponding sets.
Addresses and parts of addresses in curly brackets can be separated by the pipe symbol (\|). In this case, the corresponding sets of addresses are interpreted as replicas, and the query will be sent to the first healthy replica. However, the replicas are iterated in the order currently set in the [load_balancing](../../operations/settings/settings.md) setting. This example specifies two shards that each have two replicas: Addresses and parts of addresses in curly brackets can be separated by the pipe symbol (\|). In this case, the corresponding sets of addresses are interpreted as replicas, and the query will be sent to the first healthy replica. However, the replicas are iterated in the order currently set in the [load_balancing](../../operations/settings/settings.md#settings-load_balancing) setting. This example specifies two shards that each have two replicas:
``` text ``` text
example01-{01..02}-{1|2} example01-{01..02}-{1|2}

View File

@ -15,25 +15,25 @@ toc_title: url
url(URL, format, structure) url(URL, format, structure)
``` ```
**Input parameters** **Parameters**
- `URL` - HTTP or HTTPS server address, which can accept `GET` (for `SELECT`) or `POST` (for `INSERT`) requests. Type: [String](../../sql-reference/data-types/string.md). - `URL` — HTTP or HTTPS server address, which can accept `GET` or `POST` requests (for `SELECT` or `INSERT` queries correspondingly). Type: [String](../../sql-reference/data-types/string.md).
- `format` - [Format](../../interfaces/formats.md#formats) of the data. Type: [String](../../sql-reference/data-types/string.md). - `format` [Format](../../interfaces/formats.md#formats) of the data. Type: [String](../../sql-reference/data-types/string.md).
- `structure` - Table structure in `'UserID UInt64, Name String'` format. Determines column names and types. Type: [String](../../sql-reference/data-types/string.md). - `structure` Table structure in `'UserID UInt64, Name String'` format. Determines column names and types. Type: [String](../../sql-reference/data-types/string.md).
**Returned value** **Returned value**
A table with the specified format and structure and with data from the defined URL. A table with the specified format and structure and with data from the defined `URL`.
**Examples** **Examples**
Getting the first 3 lines of a table that contains columns of `String` and `UInt32` type from HTTP-server which answers in `CSV` format. Getting the first 3 lines of a table that contains columns of `String` and [UInt32](../../sql-reference/data-types/int-uint.md) type from HTTP-server which answers in [CSV](../../interfaces/formats.md/#csv) format.
``` sql ``` sql
SELECT * FROM url('http://127.0.0.1:12345/', CSV, 'column1 String, column2 UInt32') LIMIT 3; SELECT * FROM url('http://127.0.0.1:12345/', CSV, 'column1 String, column2 UInt32') LIMIT 3;
``` ```
Inserting data from a URL into a table: Inserting data from a `URL` into a table:
``` sql ``` sql
CREATE TABLE test_table (column1 String, column2 UInt32) ENGINE=Memory; CREATE TABLE test_table (column1 String, column2 UInt32) ENGINE=Memory;

View File

@ -63,7 +63,7 @@ SELECT * FROM file_engine_table
## Использование движка в Clickhouse-local {#ispolzovanie-dvizhka-v-clickhouse-local} ## Использование движка в Clickhouse-local {#ispolzovanie-dvizhka-v-clickhouse-local}
В [clickhouse-local](../../../engines/table-engines/special/file.md) движок в качестве параметра принимает не только формат, но и путь к файлу. В том числе можно указать стандартные потоки ввода/вывода цифровым или буквенным обозначением `0` или `stdin`, `1` или `stdout`. В [clickhouse-local](../../../engines/table-engines/special/file.md) движок в качестве параметра принимает не только формат, но и путь к файлу. В том числе можно указать стандартные потоки ввода/вывода цифровым или буквенным обозначением `0` или `stdin`, `1` или `stdout`. Можно записывать и читать сжатые файлы. Для этого нужно задать дополнительный параметр движка или расширение файла (`gz`, `br` или `xz`).
**Пример:** **Пример:**

View File

@ -149,28 +149,48 @@ $ echo 'DROP TABLE t' | curl 'http://localhost:8123/' --data-binary @-
Для запросов, которые не возвращают таблицу с данными, в случае успеха, выдаётся пустое тело ответа. Для запросов, которые не возвращают таблицу с данными, в случае успеха, выдаётся пустое тело ответа.
Вы можете использовать внутренний формат сжатия Clickhouse при передаче данных. Формат сжатых данных нестандартный, и вам придётся использовать для работы с ним специальную программу `clickhouse-compressor` (устанавливается вместе с пакетом `clickhouse-client`). Для повышения эффективности вставки данных можно отключить проверку контрольной суммы на стороне сервера с помощью настройки[http_native_compression_disable_checksumming_on_decompress](../operations/settings/settings.md#settings-http_native_compression_disable_checksumming_on_decompress).
Если вы указали `compress = 1` в URL, то сервер сжимает данные, которые он отправляет. ## Сжатие {#compression}
Если вы указали `decompress = 1` в URL, сервер распаковывает те данные, которые вы передаёте методом `POST`.
Также, можно использовать [HTTP compression](https://en.wikipedia.org/wiki/HTTP_compression). Для отправки сжатого запроса `POST`, добавьте заголовок `Content-Encoding: compression_method`. Чтобы ClickHouse сжимал ответ, добавьте заголовок `Accept-Encoding: compression_method`. ClickHouse поддерживает следующие [методы сжатия](https://en.wikipedia.org/wiki/HTTP_compression#Content-Encoding_tokens): `gzip`, `br`, and `deflate`. Чтобы включить HTTP compression, используйте настройку ClickHouse [enable_http_compression](../operations/settings/settings.md#settings-enable_http_compression). Уровень сжатия данных для всех методов сжатия можно настроить с помощью настройки [http_zlib_compression_level](#settings-http_zlib_compression_level). Сжатие можно использовать для уменьшения трафика по сети при передаче большого количества данных, а также для создания сразу сжатых дампов.
Это может быть использовано для уменьшения трафика по сети при передаче большого количества данных, а также для создания сразу сжатых дампов. Вы можете использовать внутренний формат сжатия Clickhouse при передаче данных. Формат сжатых данных нестандартный, и вам придётся использовать для работы с ним специальную программу `clickhouse-compressor`. Она устанавливается вместе с пакетом `clickhouse-client`. Для повышения эффективности вставки данных можно отключить проверку контрольной суммы на стороне сервера с помощью настройки [http_native_compression_disable_checksumming_on_decompress](../operations/settings/settings.md#settings-http_native_compression_disable_checksumming_on_decompress).
Примеры отправки данных со сжатием: Если вы указали `compress=1` в URL, то сервер сжимает данные, которые он отправляет. Если вы указали `decompress=1` в URL, сервер распаковывает те данные, которые вы передаёте методом `POST`.
``` bash Также можно использовать [сжатие HTTP](https://en.wikipedia.org/wiki/HTTP_compression). ClickHouse поддерживает следующие [методы сжатия](https://en.wikipedia.org/wiki/HTTP_compression#Content-Encoding_tokens):
$ #Отправка данных на сервер:
$ curl -vsS "http://localhost:8123/?enable_http_compression=1" -d 'SELECT number FROM system.numbers LIMIT 10' -H 'Accept-Encoding: gzip'
$ #Отправка данных клиенту: - `gzip`
$ echo "SELECT 1" | gzip -c | curl -sS --data-binary @- -H 'Content-Encoding: gzip' 'http://localhost:8123/' - `br`
``` - `deflate`
- `xz`
Для отправки сжатого запроса `POST`, добавьте заголовок `Content-Encoding: compression_method`.
Чтобы ClickHouse сжимал ответ, разрешите сжатие настройкой [enable_http_compression](../operations/settings/settings.md#settings-enable_http_compression) и добавьте заголовок `Accept-Encoding: compression_method`. Уровень сжатия данных для всех методов сжатия можно задать с помощью настройки [http_zlib_compression_level](../operations/settings/settings.md#settings-http_zlib_compression_level).
!!! note "Примечание" !!! note "Примечание"
Некоторые HTTP-клиенты могут по умолчанию распаковывать данные (`gzip` и `deflate`) с сервера в фоновом режиме и вы можете получить распакованные данные, даже если правильно используете настройки сжатия. Некоторые HTTP-клиенты могут по умолчанию распаковывать данные (`gzip` и `deflate`) с сервера в фоновом режиме и вы можете получить распакованные данные, даже если правильно используете настройки сжатия.
**Примеры**
``` bash
# Отправка сжатых данных на сервер
$ echo "SELECT 1" | gzip -c | \
curl -sS --data-binary @- -H 'Content-Encoding: gzip' 'http://localhost:8123/'
```
``` bash
# Получение сжатых данных с сервера
$ curl -vsS "http://localhost:8123/?enable_http_compression=1" \
-H 'Accept-Encoding: gzip' --output result.gz -d 'SELECT number FROM system.numbers LIMIT 3'
$ zcat result.gz
0
1
2
```
## База данных по умолчанию {#default-database}
Вы можете использовать параметр URL `database` или заголовок `X-ClickHouse-Database`, чтобы указать БД по умолчанию. Вы можете использовать параметр URL `database` или заголовок `X-ClickHouse-Database`, чтобы указать БД по умолчанию.
``` bash ``` bash

View File

@ -22,6 +22,7 @@ toc_title: "\u041a\u043b\u0438\u0435\u043d\u0442\u0441\u043a\u0438\u0435\u0020\u
- [seva-code/php-click-house-client](https://packagist.org/packages/seva-code/php-click-house-client) - [seva-code/php-click-house-client](https://packagist.org/packages/seva-code/php-click-house-client)
- [SeasClick C++ client](https://github.com/SeasX/SeasClick) - [SeasClick C++ client](https://github.com/SeasX/SeasClick)
- [glushkovds/phpclickhouse-laravel](https://packagist.org/packages/glushkovds/phpclickhouse-laravel) - [glushkovds/phpclickhouse-laravel](https://packagist.org/packages/glushkovds/phpclickhouse-laravel)
- [kolya7k ClickHouse PHP extension](https://github.com//kolya7k/clickhouse-php)
- Go - Go
- [clickhouse](https://github.com/kshvakov/clickhouse/) - [clickhouse](https://github.com/kshvakov/clickhouse/)
- [go-clickhouse](https://github.com/roistat/go-clickhouse) - [go-clickhouse](https://github.com/roistat/go-clickhouse)

View File

@ -0,0 +1,29 @@
---
toc_priority: 65
toc_title: Кеши
---
# Типы кеша {#cache-types}
При выполнении запросов ClickHouse использует различные типы кеша.
Основные типы кеша:
- `mark_cache` — кеш засечек, используемых движками таблиц семейства [MergeTree](../engines/table-engines/mergetree-family/mergetree.md).
- `uncompressed_cache` — кеш несжатых данных, используемых движками таблиц семейства [MergeTree](../engines/table-engines/mergetree-family/mergetree.md).
Дополнительные типы кеша:
- DNS-кеш.
- Кеш данных формата [regexp](../interfaces/formats.md#data-format-regexp).
- Кеш скомпилированных выражений.
- Кеш схем формата [Avro](../interfaces/formats.md#data-format-avro).
- Кеш данных в [словарях](../sql-reference/dictionaries/index.md).
Непрямое использование:
- Кеш страницы ОС.
Чтобы очистить кеш, используйте выражение [SYSTEM DROP ... CACHE](../sql-reference/statements/system.md).
[Оригинальная статья](https://clickhouse.tech/docs/ru/operations/caches/) <!--hide-->

View File

@ -14,7 +14,7 @@
- `initiator` ([String](../../sql-reference/data-types/string.md)) — узел, выполнивший запрос. - `initiator` ([String](../../sql-reference/data-types/string.md)) — узел, выполнивший запрос.
- `query_start_time` ([DateTime](../../sql-reference/data-types/datetime.md)) — время начала запроса. - `query_start_time` ([DateTime](../../sql-reference/data-types/datetime.md)) — время начала запроса.
- `query_finish_time` ([DateTime](../../sql-reference/data-types/datetime.md)) — время окончания запроса. - `query_finish_time` ([DateTime](../../sql-reference/data-types/datetime.md)) — время окончания запроса.
- `query_duration_ms` ([UInt64](../../sql-reference/data-types/datetime64.md)) — продолжительность выполнения запроса (в миллисекундах). - `query_duration_ms` ([UInt64](../../sql-reference/data-types/int-uint.md)) — продолжительность выполнения запроса (в миллисекундах).
- `exception_code` ([Enum8](../../sql-reference/data-types/enum.md)) — код исключения из [ZooKeeper](../../operations/tips.md#zookeeper). - `exception_code` ([Enum8](../../sql-reference/data-types/enum.md)) — код исключения из [ZooKeeper](../../operations/tips.md#zookeeper).
**Пример** **Пример**

View File

@ -21,7 +21,8 @@ toc_title: clickhouse-local
Основной формат вызова: Основной формат вызова:
``` bash ``` bash
$ clickhouse-local --structure "table_structure" --input-format "format_of_incoming_data" -q "query" $ clickhouse-local --structure "table_structure" --input-format "format_of_incoming_data" \
--query "query"
``` ```
Ключи команды: Ключи команды:
@ -76,7 +77,9 @@ $ clickhouse-local --query "
1 2 1 2
``` ```
А теперь давайте выведем на экран объём оперативной памяти, занимаемой пользователями (Unix): Объём оперативной памяти, занимаемой процессами, которые запустил пользователь (Unix):
Запрос:
``` bash ``` bash
$ ps aux | tail -n +2 | awk '{ printf("%s\t%s\n", $1, $4) }' \ $ ps aux | tail -n +2 | awk '{ printf("%s\t%s\n", $1, $4) }' \
@ -85,6 +88,8 @@ $ ps aux | tail -n +2 | awk '{ printf("%s\t%s\n", $1, $4) }' \
FROM table GROUP BY user ORDER BY memTotal DESC FORMAT Pretty" FROM table GROUP BY user ORDER BY memTotal DESC FORMAT Pretty"
``` ```
Результат:
``` text ``` text
Read 186 rows, 4.15 KiB in 0.035 sec., 5302 rows/sec., 118.34 KiB/sec. Read 186 rows, 4.15 KiB in 0.035 sec., 5302 rows/sec., 118.34 KiB/sec.
┏━━━━━━━━━━┳━━━━━━━━━━┓ ┏━━━━━━━━━━┳━━━━━━━━━━┓

View File

@ -1,8 +1,9 @@
# SimpleAggregateFunction {#data-type-simpleaggregatefunction} # SimpleAggregateFunction(func, type) {#data-type-simpleaggregatefunction}
`SimpleAggregateFunction(name, types_of_arguments…)` data type stores current value of the aggregate function, and does not store its full state as [`AggregateFunction`](../../sql-reference/data-types/aggregatefunction.md) does. This optimization can be applied to functions for which the following property holds: the result of applying a function `f` to a row set `S1 UNION ALL S2` can be obtained by applying `f` to parts of the row set separately, and then again applying `f` to the results: `f(S1 UNION ALL S2) = f(f(S1) UNION ALL f(S2))`. This property guarantees that partial aggregation results are enough to compute the combined one, so we dont have to store and process any extra data. Хранит только текущее значение агрегатной функции и не сохраняет ее полное состояние, как это делает [`AggregateFunction`](../../sql-reference/data-types/aggregatefunction.md). Такая оптимизация может быть применена к функциям, которые обладают следующим свойством: результат выполнения функции `f` к набору строк `S1 UNION ALL S2` может быть получен путем выполнения `f` к отдельным частям набора строк,
а затем повторного выполнения `f` к результатам: `f(S1 UNION ALL S2) = f(f(S1) UNION ALL f(S2))`. Это свойство гарантирует, что результатов частичной агрегации достаточно для вычисления комбинированной, поэтому хранить и обрабатывать какие-либо дополнительные данные не требуется.
The following aggregate functions are supported: Поддерживаются следующие агрегатные функции:
- [`any`](../../sql-reference/aggregate-functions/reference/any.md#agg_function-any) - [`any`](../../sql-reference/aggregate-functions/reference/any.md#agg_function-any)
- [`anyLast`](../../sql-reference/aggregate-functions/reference/anylast.md#anylastx) - [`anyLast`](../../sql-reference/aggregate-functions/reference/anylast.md#anylastx)
@ -15,22 +16,24 @@ The following aggregate functions are supported:
- [`groupBitXor`](../../sql-reference/aggregate-functions/reference/groupbitxor.md#groupbitxor) - [`groupBitXor`](../../sql-reference/aggregate-functions/reference/groupbitxor.md#groupbitxor)
- [`groupArrayArray`](../../sql-reference/aggregate-functions/reference/grouparray.md#agg_function-grouparray) - [`groupArrayArray`](../../sql-reference/aggregate-functions/reference/grouparray.md#agg_function-grouparray)
- [`groupUniqArrayArray`](../../sql-reference/aggregate-functions/reference/groupuniqarray.md#groupuniqarray) - [`groupUniqArrayArray`](../../sql-reference/aggregate-functions/reference/groupuniqarray.md#groupuniqarray)
- [`sumMap`](../../sql-reference/aggregate-functions/reference/summap.md#agg_functions-summap)
- [`minMap`](../../sql-reference/aggregate-functions/reference/minmap.md#agg_functions-minmap)
- [`maxMap`](../../sql-reference/aggregate-functions/reference/maxmap.md#agg_functions-maxmap)
Values of the `SimpleAggregateFunction(func, Type)` look and stored the same way as `Type`, so you do not need to apply functions with `-Merge`/`-State` suffixes. `SimpleAggregateFunction` has better performance than `AggregateFunction` with same aggregation function. !!! note "Примечание"
Значения `SimpleAggregateFunction(func, Type)` отображаются и хранятся так же, как и `Type`, поэтому комбинаторы [-Merge](../../sql-reference/aggregate-functions/combinators.md#aggregate_functions_combinators-merge) и [-State]((../../sql-reference/aggregate-functions/combinators.md#agg-functions-combinator-state) не требуются.
`SimpleAggregateFunction` имеет лучшую производительность, чем `AggregateFunction` с той же агрегатной функцией.
**Parameters** **Параметры**
- Name of the aggregate function. - `func` — имя агрегатной функции.
- Types of the aggregate function arguments. - `type` — типы аргументов агрегатной функции.
**Example** **Пример**
``` sql ``` sql
CREATE TABLE t CREATE TABLE simple (id UInt64, val SimpleAggregateFunction(sum, Double)) ENGINE=AggregatingMergeTree ORDER BY id;
(
column1 SimpleAggregateFunction(sum, UInt64),
column2 SimpleAggregateFunction(any, String)
) ENGINE = ...
``` ```
[Original article](https://clickhouse.tech/docs/en/data_types/simpleaggregatefunction/) <!--hide--> [Оригинальная статья](https://clickhouse.tech/docs/en/sql-reference/data-types/simpleaggregatefunction/) <!--hide-->

View File

@ -63,40 +63,58 @@ int32samoa: 1546300800
Переводит дату или дату-с-временем в число типа UInt16, содержащее номер года (AD). Переводит дату или дату-с-временем в число типа UInt16, содержащее номер года (AD).
Синоним: `YEAR`.
## toQuarter {#toquarter} ## toQuarter {#toquarter}
Переводит дату или дату-с-временем в число типа UInt8, содержащее номер квартала. Переводит дату или дату-с-временем в число типа UInt8, содержащее номер квартала.
Синоним: `QUARTER`.
## toMonth {#tomonth} ## toMonth {#tomonth}
Переводит дату или дату-с-временем в число типа UInt8, содержащее номер месяца (1-12). Переводит дату или дату-с-временем в число типа UInt8, содержащее номер месяца (1-12).
Синоним: `MONTH`.
## toDayOfYear {#todayofyear} ## toDayOfYear {#todayofyear}
Переводит дату или дату-с-временем в число типа UInt16, содержащее номер дня года (1-366). Переводит дату или дату-с-временем в число типа UInt16, содержащее номер дня года (1-366).
Синоним: `DAYOFYEAR`.
## toDayOfMonth {#todayofmonth} ## toDayOfMonth {#todayofmonth}
Переводит дату или дату-с-временем в число типа UInt8, содержащее номер дня в месяце (1-31). Переводит дату или дату-с-временем в число типа UInt8, содержащее номер дня в месяце (1-31).
Синонимы: `DAYOFMONTH`, `DAY`.
## toDayOfWeek {#todayofweek} ## toDayOfWeek {#todayofweek}
Переводит дату или дату-с-временем в число типа UInt8, содержащее номер дня в неделе (понедельник - 1, воскресенье - 7). Переводит дату или дату-с-временем в число типа UInt8, содержащее номер дня в неделе (понедельник - 1, воскресенье - 7).
Синоним: `DAYOFWEEK`.
## toHour {#tohour} ## toHour {#tohour}
Переводит дату-с-временем в число типа UInt8, содержащее номер часа в сутках (0-23). Переводит дату-с-временем в число типа UInt8, содержащее номер часа в сутках (0-23).
Функция исходит из допущения, что перевод стрелок вперёд, если осуществляется, то на час, в два часа ночи, а перевод стрелок назад, если осуществляется, то на час, в три часа ночи (что, в общем, не верно - даже в Москве два раза перевод стрелок был осуществлён в другое время). Функция исходит из допущения, что перевод стрелок вперёд, если осуществляется, то на час, в два часа ночи, а перевод стрелок назад, если осуществляется, то на час, в три часа ночи (что, в общем, не верно - даже в Москве два раза перевод стрелок был осуществлён в другое время).
Синоним: `HOUR`.
## toMinute {#tominute} ## toMinute {#tominute}
Переводит дату-с-временем в число типа UInt8, содержащее номер минуты в часе (0-59). Переводит дату-с-временем в число типа UInt8, содержащее номер минуты в часе (0-59).
Синоним: `MINUTE`.
## toSecond {#tosecond} ## toSecond {#tosecond}
Переводит дату-с-временем в число типа UInt8, содержащее номер секунды в минуте (0-59). Переводит дату-с-временем в число типа UInt8, содержащее номер секунды в минуте (0-59).
Секунды координации не учитываются. Секунды координации не учитываются.
Синоним: `SECOND`.
## toUnixTimestamp {#to-unix-timestamp} ## toUnixTimestamp {#to-unix-timestamp}
Переводит дату-с-временем в число типа UInt32 -- Unix Timestamp (https://en.wikipedia.org/wiki/Unix_time). Переводит дату-с-временем в число типа UInt32 -- Unix Timestamp (https://en.wikipedia.org/wiki/Unix_time).
@ -305,7 +323,9 @@ WITH toDateTime64('2020-01-01 10:20:30.999', 3) AS dt64 SELECT toStartOfSecond(d
Переводит дату-с-временем или дату в число типа UInt16, содержащее номер ISO года. ISO год отличается от обычного года, потому что в соответствии с [ISO 8601:1988](https://en.wikipedia.org/wiki/ISO_8601) ISO год начинается необязательно первого января. Переводит дату-с-временем или дату в число типа UInt16, содержащее номер ISO года. ISO год отличается от обычного года, потому что в соответствии с [ISO 8601:1988](https://en.wikipedia.org/wiki/ISO_8601) ISO год начинается необязательно первого января.
Пример: **Пример**
Запрос:
```sql ```sql
SELECT SELECT
@ -313,6 +333,9 @@ SELECT
toYear(date), toYear(date),
toISOYear(date) toISOYear(date)
``` ```
Результат:
```text ```text
┌───────date─┬─toYear(toDate('2017-01-01'))─┬─toISOYear(toDate('2017-01-01'))─┐ ┌───────date─┬─toYear(toDate('2017-01-01'))─┬─toISOYear(toDate('2017-01-01'))─┐
│ 2017-01-01 │ 2017 │ 2016 │ │ 2017-01-01 │ 2017 │ 2016 │
@ -326,12 +349,18 @@ SELECT
1 Января 2017 г. - воскресение, т.е. первая ISO неделя 2017 года началась в понедельник 2 января, поэтому 1 января 2017 это последняя неделя 2016 года. 1 Января 2017 г. - воскресение, т.е. первая ISO неделя 2017 года началась в понедельник 2 января, поэтому 1 января 2017 это последняя неделя 2016 года.
**Пример**
Запрос:
```sql ```sql
SELECT SELECT
toISOWeek(toDate('2017-01-01')) AS ISOWeek20170101, toISOWeek(toDate('2017-01-01')) AS ISOWeek20170101,
toISOWeek(toDate('2017-01-02')) AS ISOWeek20170102 toISOWeek(toDate('2017-01-02')) AS ISOWeek20170102
``` ```
Результат:
```text ```text
┌─ISOWeek20170101─┬─ISOWeek20170102─┐ ┌─ISOWeek20170101─┬─ISOWeek20170102─┐
│ 52 │ 1 │ │ 52 │ 1 │
@ -368,10 +397,14 @@ SELECT
**Пример** **Пример**
Запрос:
```sql ```sql
SELECT toDate('2016-12-27') AS date, toWeek(date) AS week0, toWeek(date,1) AS week1, toWeek(date,9) AS week9; SELECT toDate('2016-12-27') AS date, toWeek(date) AS week0, toWeek(date,1) AS week1, toWeek(date,9) AS week9;
``` ```
Результат:
```text ```text
┌───────date─┬─week0─┬─week1─┬─week9─┐ ┌───────date─┬─week0─┬─week1─┬─week9─┐
│ 2016-12-27 │ 52 │ 52 │ 1 │ │ 2016-12-27 │ 52 │ 52 │ 1 │
@ -387,10 +420,14 @@ SELECT toDate('2016-12-27') AS date, toWeek(date) AS week0, toWeek(date,1) AS we
**Пример** **Пример**
Запрос:
```sql ```sql
SELECT toDate('2016-12-27') AS date, toYearWeek(date) AS yearWeek0, toYearWeek(date,1) AS yearWeek1, toYearWeek(date,9) AS yearWeek9; SELECT toDate('2016-12-27') AS date, toYearWeek(date) AS yearWeek0, toYearWeek(date,1) AS yearWeek1, toYearWeek(date,9) AS yearWeek9;
``` ```
Результат:
```text ```text
┌───────date─┬─yearWeek0─┬─yearWeek1─┬─yearWeek9─┐ ┌───────date─┬─yearWeek0─┬─yearWeek1─┬─yearWeek9─┐
│ 2016-12-27 │ 201652 │ 201652 │ 201701 │ │ 2016-12-27 │ 201652 │ 201652 │ 201701 │
@ -573,7 +610,7 @@ dateDiff('unit', startdate, enddate, [timezone])
SELECT dateDiff('hour', toDateTime('2018-01-01 22:00:00'), toDateTime('2018-01-02 23:00:00')); SELECT dateDiff('hour', toDateTime('2018-01-01 22:00:00'), toDateTime('2018-01-02 23:00:00'));
``` ```
Ответ: Результат:
``` text ``` text
┌─dateDiff('hour', toDateTime('2018-01-01 22:00:00'), toDateTime('2018-01-02 23:00:00'))─┐ ┌─dateDiff('hour', toDateTime('2018-01-01 22:00:00'), toDateTime('2018-01-02 23:00:00'))─┐
@ -654,10 +691,10 @@ formatDateTime(Time, Format\[, Timezone\])
Запрос: Запрос:
``` sql ``` sql
SELECT formatDateTime(toDate('2010-01-04'), '%g') SELECT formatDateTime(toDate('2010-01-04'), '%g');
``` ```
Ответ: Результат:
``` ```
┌─formatDateTime(toDate('2010-01-04'), '%g')─┐ ┌─formatDateTime(toDate('2010-01-04'), '%g')─┐
@ -665,4 +702,43 @@ SELECT formatDateTime(toDate('2010-01-04'), '%g')
└────────────────────────────────────────────┘ └────────────────────────────────────────────┘
``` ```
## FROM\_UNIXTIME {#fromunixtime}
Функция преобразует Unix timestamp в календарную дату и время.
**Примеры**
Если указан только один аргумент типа [Integer](../../sql-reference/data-types/int-uint.md), то функция действует так же, как [toDateTime](../../sql-reference/functions/type-conversion-functions.md#todatetime), и возвращает тип [DateTime](../../sql-reference/data-types/datetime.md).
Запрос:
```sql
SELECT FROM_UNIXTIME(423543535);
```
Результат:
```text
┌─FROM_UNIXTIME(423543535)─┐
│ 1983-06-04 10:58:55 │
└──────────────────────────┘
```
В случае, когда есть два аргумента: первый типа [Integer](../../sql-reference/data-types/int-uint.md) или [DateTime](../../sql-reference/data-types/datetime.md), а второй является строкой постоянного формата — функция работает также, как [formatDateTime](#formatdatetime), и возвращает значение типа [String](../../sql-reference/data-types/string.md#string).
Запрос:
```sql
SELECT FROM_UNIXTIME(1234334543, '%Y-%m-%d %R:%S') AS DateTime;
```
Результат:
```text
┌─DateTime────────────┐
│ 2009-02-11 14:42:23 │
└─────────────────────┘
```
[Оригинальная статья](https://clickhouse.tech/docs/ru/query_language/functions/date_time_functions/) <!--hide--> [Оригинальная статья](https://clickhouse.tech/docs/ru/query_language/functions/date_time_functions/) <!--hide-->

View File

@ -75,6 +75,8 @@ SELECT char(0xE4, 0xBD, 0xA0, 0xE5, 0xA5, 0xBD) AS hello;
Returns a string containing the arguments hexadecimal representation. Returns a string containing the arguments hexadecimal representation.
Синоним: `HEX`.
**Syntax** **Syntax**
``` sql ``` sql

View File

@ -13,6 +13,8 @@ toc_title: "\u0424\u0443\u043d\u043a\u0446\u0438\u0438\u0020\u0434\u043b\u044f\u
isNull(x) isNull(x)
``` ```
Синоним: `ISNULL`.
**Параметры** **Параметры**
- `x` — значение с не составным типом данных. - `x` — значение с не составным типом данных.

View File

@ -9,10 +9,14 @@ toc_title: "\u0424\u0443\u043d\u043a\u0446\u0438\u0438\u0020\u0434\u043b\u044f\u
Принимает число типа UInt32. Интерпретирует его, как IPv4-адрес в big endian. Возвращает строку, содержащую соответствующий IPv4-адрес в формате A.B.C.D (числа в десятичной форме через точки). Принимает число типа UInt32. Интерпретирует его, как IPv4-адрес в big endian. Возвращает строку, содержащую соответствующий IPv4-адрес в формате A.B.C.D (числа в десятичной форме через точки).
Синоним: `INET_NTOA`.
## IPv4StringToNum(s) {#ipv4stringtonums} ## IPv4StringToNum(s) {#ipv4stringtonums}
Функция, обратная к IPv4NumToString. Если IPv4 адрес в неправильном формате, то возвращает 0. Функция, обратная к IPv4NumToString. Если IPv4 адрес в неправильном формате, то возвращает 0.
Синоним: `INET_ATON`.
## IPv4NumToStringClassC(num) {#ipv4numtostringclasscnum} ## IPv4NumToStringClassC(num) {#ipv4numtostringclasscnum}
Похоже на IPv4NumToString, но вместо последнего октета используется xxx. Похоже на IPv4NumToString, но вместо последнего октета используется xxx.
@ -49,7 +53,11 @@ LIMIT 10
### IPv6NumToString(x) {#ipv6numtostringx} ### IPv6NumToString(x) {#ipv6numtostringx}
Принимает значение типа FixedString(16), содержащее IPv6-адрес в бинарном виде. Возвращает строку, содержащую этот адрес в текстовом виде. Принимает значение типа FixedString(16), содержащее IPv6-адрес в бинарном виде. Возвращает строку, содержащую этот адрес в текстовом виде.
IPv6-mapped IPv4 адреса выводится в формате ::ffff:111.222.33.44. Примеры: IPv6-mapped IPv4 адреса выводится в формате ::ffff:111.222.33.44.
Примеры: `INET6_NTOA`.
Примеры:
``` sql ``` sql
SELECT IPv6NumToString(toFixedString(unhex('2A0206B8000000000000000000000011'), 16)) AS addr SELECT IPv6NumToString(toFixedString(unhex('2A0206B8000000000000000000000011'), 16)) AS addr
@ -118,6 +126,8 @@ LIMIT 10
Функция, обратная к IPv6NumToString. Если IPv6 адрес в неправильном формате, то возвращает строку из нулевых байт. Функция, обратная к IPv6NumToString. Если IPv6 адрес в неправильном формате, то возвращает строку из нулевых байт.
HEX может быть в любом регистре. HEX может быть в любом регистре.
Alias: `INET6_ATON`.
## IPv4ToIPv6(x) {#ipv4toipv6x} ## IPv4ToIPv6(x) {#ipv4toipv6x}
Принимает число типа `UInt32`. Интерпретирует его, как IPv4-адрес в [big endian](https://en.wikipedia.org/wiki/Endianness). Возвращает значение `FixedString(16)`, содержащее адрес IPv6 в двоичном формате. Примеры: Принимает число типа `UInt32`. Интерпретирует его, как IPv4-адрес в [big endian](https://en.wikipedia.org/wiki/Endianness). Возвращает значение `FixedString(16)`, содержащее адрес IPv6 в двоичном формате. Примеры:

View File

@ -95,6 +95,8 @@ SELECT toValidUTF8('\x61\xF0\x80\x80\x80b')
Повторяет строку определенное количество раз и объединяет повторяемые значения в одну строку. Повторяет строку определенное количество раз и объединяет повторяемые значения в одну строку.
Синоним: `REPEAT`.
**Синтаксис** **Синтаксис**
``` sql ``` sql
@ -273,10 +275,14 @@ SELECT concat(key1, key2), sum(value) FROM key_val GROUP BY (key1, key2)
Производит кодирование строки s в base64-представление. Производит кодирование строки s в base64-представление.
Синоним: `TO_BASE64`.
## base64Decode(s) {#base64decode} ## base64Decode(s) {#base64decode}
Декодирует base64-представление s в исходную строку. При невозможности декодирования выбрасывает исключение Декодирует base64-представление s в исходную строку. При невозможности декодирования выбрасывает исключение
Синоним: `FROM_BASE64`.
## tryBase64Decode(s) {#trybase64decode} ## tryBase64Decode(s) {#trybase64decode}
Функционал аналогичен base64Decode, но при невозможности декодирования возвращает пустую строку. Функционал аналогичен base64Decode, но при невозможности декодирования возвращает пустую строку.

View File

@ -176,4 +176,129 @@ select mapPopulateSeries([1,2,4], [11,22,44], 5) as res, toTypeName(res) as type
└──────────────────────────────┴───────────────────────────────────┘ └──────────────────────────────┴───────────────────────────────────┘
``` ```
[Оригинальная статья](https://clickhouse.tech/docs/en/query_language/functions/tuple-map-functions/) <!--hide--> ## mapContains {#mapcontains}
Определяет, содержит ли контейнер `map` ключ `key`.
**Синтаксис**
``` sql
mapContains(map, key)
```
**Параметры**
- `map` — контейнер Map. [Map](../../sql-reference/data-types/map.md).
- `key` — ключ. Тип соответстует типу ключей параметра `map`.
**Возвращаемое значение**
- `1` если `map` включает `key`, иначе `0`.
Тип: [UInt8](../../sql-reference/data-types/int-uint.md).
**Пример**
Запрос:
```sql
CREATE TABLE test (a Map(String,String)) ENGINE = Memory;
INSERT INTO test VALUES ({'name':'eleven','age':'11'}), ({'number':'twelve','position':'6.0'});
SELECT mapContains(a, 'name') FROM test;
```
Результат:
```text
┌─mapContains(a, 'name')─┐
│ 1 │
│ 0 │
└────────────────────────┘
```
## mapKeys {#mapkeys}
Возвращает все ключи контейнера `map`.
**Синтаксис**
```sql
mapKeys(map)
```
**Параметры**
- `map` — контейнер Map. [Map](../../sql-reference/data-types/map.md).
**Возвращаемое значение**
- Массив со всеми ключами контейнера `map`.
Тип: [Array](../../sql-reference/data-types/array.md).
**Пример**
Запрос:
```sql
CREATE TABLE test (a Map(String,String)) ENGINE = Memory;
INSERT INTO test VALUES ({'name':'eleven','age':'11'}), ({'number':'twelve','position':'6.0'});
SELECT mapKeys(a) FROM test;
```
Результат:
```text
┌─mapKeys(a)────────────┐
│ ['name','age'] │
│ ['number','position'] │
└───────────────────────┘
```
## mapValues {#mapvalues}
Возвращает все значения контейнера `map`.
**Синтаксис**
```sql
mapKeys(map)
```
**Параметры**
- `map` — контейнер Map. [Map](../../sql-reference/data-types/map.md).
**Возвращаемое значение**
- Массив со всеми значениями контейнера `map`.
Тип: [Array](../../sql-reference/data-types/array.md).
**Примеры**
Запрос:
```sql
CREATE TABLE test (a Map(String,String)) ENGINE = Memory;
INSERT INTO test VALUES ({'name':'eleven','age':'11'}), ({'number':'twelve','position':'6.0'});
SELECT mapValues(a) FROM test;
```
Результат:
```text
┌─mapValues(a)─────┐
│ ['eleven','11'] │
│ ['twelve','6.0'] │
└──────────────────┘
```
[Оригинальная статья](https://clickhouse.tech/docs/ru/sql-reference/functions/tuple-map-functions/) <!--hide-->

View File

@ -36,10 +36,14 @@ toc_title: "\u0424\u0443\u043d\u043a\u0446\u0438\u0438\u0020\u043f\u0440\u0435\u
**Пример** **Пример**
Запрос:
``` sql ``` sql
SELECT toInt64(nan), toInt32(32), toInt16('16'), toInt8(8.8) SELECT toInt64(nan), toInt32(32), toInt16('16'), toInt8(8.8);
``` ```
Результат:
``` text ``` text
┌─────────toInt64(nan)─┬─toInt32(32)─┬─toInt16('16')─┬─toInt8(8.8)─┐ ┌─────────toInt64(nan)─┬─toInt32(32)─┬─toInt16('16')─┬─toInt8(8.8)─┐
│ -9223372036854775808 │ 32 │ 16 │ 8 │ │ -9223372036854775808 │ 32 │ 16 │ 8 │
@ -52,10 +56,14 @@ SELECT toInt64(nan), toInt32(32), toInt16('16'), toInt8(8.8)
**Пример** **Пример**
Запрос:
``` sql ``` sql
select toInt64OrZero('123123'), toInt8OrZero('123qwe123') SELECT toInt64OrZero('123123'), toInt8OrZero('123qwe123');
``` ```
Результат:
``` text ``` text
┌─toInt64OrZero('123123')─┬─toInt8OrZero('123qwe123')─┐ ┌─toInt64OrZero('123123')─┬─toInt8OrZero('123qwe123')─┐
│ 123123 │ 0 │ │ 123123 │ 0 │
@ -68,10 +76,14 @@ select toInt64OrZero('123123'), toInt8OrZero('123qwe123')
**Пример** **Пример**
Запрос:
``` sql ``` sql
select toInt64OrNull('123123'), toInt8OrNull('123qwe123') SELECT toInt64OrNull('123123'), toInt8OrNull('123qwe123');
``` ```
Результат:
``` text ``` text
┌─toInt64OrNull('123123')─┬─toInt8OrNull('123qwe123')─┐ ┌─toInt64OrNull('123123')─┬─toInt8OrNull('123qwe123')─┐
│ 123123 │ ᴺᵁᴸᴸ │ │ 123123 │ ᴺᵁᴸᴸ │
@ -102,10 +114,14 @@ select toInt64OrNull('123123'), toInt8OrNull('123qwe123')
**Пример** **Пример**
Запрос:
``` sql ``` sql
SELECT toUInt64(nan), toUInt32(-32), toUInt16('16'), toUInt8(8.8) SELECT toUInt64(nan), toUInt32(-32), toUInt16('16'), toUInt8(8.8);
``` ```
Результат:
``` text ``` text
┌───────toUInt64(nan)─┬─toUInt32(-32)─┬─toUInt16('16')─┬─toUInt8(8.8)─┐ ┌───────toUInt64(nan)─┬─toUInt32(-32)─┬─toUInt16('16')─┬─toUInt8(8.8)─┐
│ 9223372036854775808 │ 4294967264 │ 16 │ 8 │ │ 9223372036854775808 │ 4294967264 │ 16 │ 8 │
@ -124,6 +140,8 @@ SELECT toUInt64(nan), toUInt32(-32), toUInt16('16'), toUInt8(8.8)
## toDate {#todate} ## toDate {#todate}
Cиноним: `DATE`.
## toDateOrZero {#todateorzero} ## toDateOrZero {#todateorzero}
## toDateOrNull {#todateornull} ## toDateOrNull {#todateornull}
@ -168,20 +186,28 @@ SELECT toUInt64(nan), toUInt32(-32), toUInt16('16'), toUInt8(8.8)
**Примеры** **Примеры**
Запрос:
``` sql ``` sql
SELECT toDecimal32OrNull(toString(-1.111), 5) AS val, toTypeName(val) SELECT toDecimal32OrNull(toString(-1.111), 5) AS val, toTypeName(val);
``` ```
Результат:
``` text ``` text
┌──────val─┬─toTypeName(toDecimal32OrNull(toString(-1.111), 5))─┐ ┌──────val─┬─toTypeName(toDecimal32OrNull(toString(-1.111), 5))─┐
│ -1.11100 │ Nullable(Decimal(9, 5)) │ │ -1.11100 │ Nullable(Decimal(9, 5)) │
└──────────┴────────────────────────────────────────────────────┘ └──────────┴────────────────────────────────────────────────────┘
``` ```
Запрос:
``` sql ``` sql
SELECT toDecimal32OrNull(toString(-1.111), 2) AS val, toTypeName(val) SELECT toDecimal32OrNull(toString(-1.111), 2) AS val, toTypeName(val);
``` ```
Результат:
``` text ``` text
┌──val─┬─toTypeName(toDecimal32OrNull(toString(-1.111), 2))─┐ ┌──val─┬─toTypeName(toDecimal32OrNull(toString(-1.111), 2))─┐
│ ᴺᵁᴸᴸ │ Nullable(Decimal(9, 2)) │ │ ᴺᵁᴸᴸ │ Nullable(Decimal(9, 2)) │
@ -213,20 +239,28 @@ SELECT toDecimal32OrNull(toString(-1.111), 2) AS val, toTypeName(val)
**Пример** **Пример**
Запрос:
``` sql ``` sql
SELECT toDecimal32OrZero(toString(-1.111), 5) AS val, toTypeName(val) SELECT toDecimal32OrZero(toString(-1.111), 5) AS val, toTypeName(val);
``` ```
Результат:
``` text ``` text
┌──────val─┬─toTypeName(toDecimal32OrZero(toString(-1.111), 5))─┐ ┌──────val─┬─toTypeName(toDecimal32OrZero(toString(-1.111), 5))─┐
│ -1.11100 │ Decimal(9, 5) │ │ -1.11100 │ Decimal(9, 5) │
└──────────┴────────────────────────────────────────────────────┘ └──────────┴────────────────────────────────────────────────────┘
``` ```
Запрос:
``` sql ``` sql
SELECT toDecimal32OrZero(toString(-1.111), 2) AS val, toTypeName(val) SELECT toDecimal32OrZero(toString(-1.111), 2) AS val, toTypeName(val);
``` ```
Результат:
``` text ``` text
┌──val─┬─toTypeName(toDecimal32OrZero(toString(-1.111), 2))─┐ ┌──val─┬─toTypeName(toDecimal32OrZero(toString(-1.111), 2))─┐
│ 0.00 │ Decimal(9, 2) │ │ 0.00 │ Decimal(9, 2) │
@ -258,12 +292,18 @@ YYYY-MM-DD hh:mm:ss
Дополнительно, функция toString от аргумента типа DateTime может принимать второй аргумент String - имя тайм-зоны. Пример: `Asia/Yekaterinburg` В этом случае, форматирование времени производится согласно указанной тайм-зоне. Дополнительно, функция toString от аргумента типа DateTime может принимать второй аргумент String - имя тайм-зоны. Пример: `Asia/Yekaterinburg` В этом случае, форматирование времени производится согласно указанной тайм-зоне.
**Пример**
Запрос:
``` sql ``` sql
SELECT SELECT
now() AS now_local, now() AS now_local,
toString(now(), 'Asia/Yekaterinburg') AS now_yekat toString(now(), 'Asia/Yekaterinburg') AS now_yekat;
``` ```
Результат:
``` text ``` text
┌───────────now_local─┬─now_yekat───────────┐ ┌───────────now_local─┬─now_yekat───────────┐
│ 2016-06-15 00:11:21 │ 2016-06-15 02:11:21 │ │ 2016-06-15 00:11:21 │ 2016-06-15 02:11:21 │
@ -281,22 +321,30 @@ SELECT
Принимает аргумент типа String или FixedString. Возвращает String, вырезая содержимое строки до первого найденного нулевого байта. Принимает аргумент типа String или FixedString. Возвращает String, вырезая содержимое строки до первого найденного нулевого байта.
Пример: **Примеры**
Запрос:
``` sql ``` sql
SELECT toFixedString('foo', 8) AS s, toStringCutToZero(s) AS s_cut SELECT toFixedString('foo', 8) AS s, toStringCutToZero(s) AS s_cut;
``` ```
Результат:
``` text ``` text
┌─s─────────────┬─s_cut─┐ ┌─s─────────────┬─s_cut─┐
│ foo\0\0\0\0\0 │ foo │ │ foo\0\0\0\0\0 │ foo │
└───────────────┴───────┘ └───────────────┴───────┘
``` ```
Запрос:
``` sql ``` sql
SELECT toFixedString('foo\0bar', 8) AS s, toStringCutToZero(s) AS s_cut SELECT toFixedString('foo\0bar', 8) AS s, toStringCutToZero(s) AS s_cut;
``` ```
Результат:
``` text ``` text
┌─s──────────┬─s_cut─┐ ┌─s──────────┬─s_cut─┐
│ foo\0bar\0 │ foo │ │ foo\0bar\0 │ foo │
@ -344,7 +392,7 @@ reinterpretAsUUID(fixed_string)
Запрос: Запрос:
``` sql ``` sql
SELECT reinterpretAsUUID(reverse(unhex('000102030405060708090a0b0c0d0e0f'))) SELECT reinterpretAsUUID(reverse(unhex('000102030405060708090a0b0c0d0e0f')));
``` ```
Результат: Результат:
@ -377,10 +425,15 @@ SELECT uuid = uuid2;
## CAST(x, T) {#type_conversion_function-cast} ## CAST(x, T) {#type_conversion_function-cast}
Преобразует x в тип данных t. Преобразует входное значение `x` в указанный тип данных `T`.
Поддерживается также синтаксис CAST(x AS t).
Пример: Поддерживается также синтаксис `CAST(x AS t)`.
Обратите внимание, что если значение `x` не может быть преобразовано к типу `T`, возникает переполнение. Например, `CAST(-1, 'UInt8')` возвращает 255.
**Пример**
Запрос:
``` sql ``` sql
SELECT SELECT
@ -388,9 +441,11 @@ SELECT
CAST(timestamp AS DateTime) AS datetime, CAST(timestamp AS DateTime) AS datetime,
CAST(timestamp AS Date) AS date, CAST(timestamp AS Date) AS date,
CAST(timestamp, 'String') AS string, CAST(timestamp, 'String') AS string,
CAST(timestamp, 'FixedString(22)') AS fixed_string CAST(timestamp, 'FixedString(22)') AS fixed_string;
``` ```
Результат:
``` text ``` text
┌─timestamp───────────┬────────────datetime─┬───────date─┬─string──────────────┬─fixed_string──────────────┐ ┌─timestamp───────────┬────────────datetime─┬───────date─┬─string──────────────┬─fixed_string──────────────┐
│ 2016-06-15 23:00:00 │ 2016-06-15 23:00:00 │ 2016-06-15 │ 2016-06-15 23:00:00 │ 2016-06-15 23:00:00\0\0\0 │ │ 2016-06-15 23:00:00 │ 2016-06-15 23:00:00 │ 2016-06-15 │ 2016-06-15 23:00:00 │ 2016-06-15 23:00:00\0\0\0 │
@ -399,12 +454,18 @@ SELECT
Преобразование в FixedString(N) работает только для аргументов типа String или FixedString(N). Преобразование в FixedString(N) работает только для аргументов типа String или FixedString(N).
Поддержано преобразование к типу [Nullable](../../sql-reference/functions/type-conversion-functions.md) и обратно. Пример: Поддержано преобразование к типу [Nullable](../../sql-reference/functions/type-conversion-functions.md) и обратно.
**Примеры**
Запрос:
``` sql ``` sql
SELECT toTypeName(x) FROM t_null SELECT toTypeName(x) FROM t_null;
``` ```
Результат:
``` text ``` text
┌─toTypeName(x)─┐ ┌─toTypeName(x)─┐
│ Int8 │ │ Int8 │
@ -412,10 +473,14 @@ SELECT toTypeName(x) FROM t_null
└───────────────┘ └───────────────┘
``` ```
Запрос:
``` sql ``` sql
SELECT toTypeName(CAST(x, 'Nullable(UInt16)')) FROM t_null SELECT toTypeName(CAST(x, 'Nullable(UInt16)')) FROM t_null;
``` ```
Результат:
``` text ``` text
┌─toTypeName(CAST(x, 'Nullable(UInt16)'))─┐ ┌─toTypeName(CAST(x, 'Nullable(UInt16)'))─┐
│ Nullable(UInt16) │ │ Nullable(UInt16) │
@ -427,6 +492,93 @@ SELECT toTypeName(CAST(x, 'Nullable(UInt16)')) FROM t_null
- Настройка [cast_keep_nullable](../../operations/settings/settings.md#cast_keep_nullable) - Настройка [cast_keep_nullable](../../operations/settings/settings.md#cast_keep_nullable)
## accurateCast(x, T) {#type_conversion_function-accurate-cast}
Преобразует входное значение `x` в указанный тип данных `T`.
В отличие от функции [cast(x, T)](#type_conversion_function-cast), `accurateCast` не допускает переполнения при преобразовании числовых типов. Например, `accurateCast(-1, 'UInt8')` вызовет исключение.
**Примеры**
Запрос:
``` sql
SELECT cast(-1, 'UInt8') as uint8;
```
Результат:
``` text
┌─uint8─┐
│ 255 │
└─────
Запрос:
```sql
SELECT accurateCast(-1, 'UInt8') as uint8;
```
Результат:
``` text
Code: 70. DB::Exception: Received from localhost:9000. DB::Exception: Value in column Int8 cannot be safely converted into type UInt8: While processing accurateCast(-1, 'UInt8') AS uint8.
```
## accurateCastOrNull(x, T) {#type_conversion_function-accurate-cast_or_null}
Преобразует входное значение `x` в указанный тип данных `T`.
Всегда возвращает тип [Nullable](../../sql-reference/data-types/nullable.md). Если исходное значение не может быть преобразовано к целевому типу, возвращает [NULL](../../sql-reference/syntax.md#null-literal).
**Синтаксис**
```sql
accurateCastOrNull(x, T)
```
**Параметры**
- `x` — входное значение.
- `T` — имя возвращаемого типа данных.
**Возвращаемое значение**
- Значение, преобразованное в указанный тип `T`.
**Примеры**
Запрос:
``` sql
SELECT toTypeName(accurateCastOrNull(5, 'UInt8'));
```
Результат:
``` text
┌─toTypeName(accurateCastOrNull(5, 'UInt8'))─┐
│ Nullable(UInt8) │
└────────────────────────────────────────────┘
```
Запрос:
``` sql
SELECT
accurateCastOrNull(-1, 'UInt8') as uint8,
accurateCastOrNull(128, 'Int8') as int8,
accurateCastOrNull('Test', 'FixedString(2)') as fixed_string;
```
Результат:
``` text
┌─uint8─┬─int8─┬─fixed_string─┐
│ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │
└───────┴──────┴──────────────┘
```
## toInterval(Year\|Quarter\|Month\|Week\|Day\|Hour\|Minute\|Second) {#function-tointerval} ## toInterval(Year\|Quarter\|Month\|Week\|Day\|Hour\|Minute\|Second) {#function-tointerval}
Приводит аргумент из числового типа данных к типу данных [IntervalType](../../sql-reference/data-types/special-data-types/interval.md). Приводит аргумент из числового типа данных к типу данных [IntervalType](../../sql-reference/data-types/special-data-types/interval.md).
@ -454,6 +606,8 @@ toIntervalYear(number)
**Пример** **Пример**
Запрос:
``` sql ``` sql
WITH WITH
toDate('2019-01-01') AS date, toDate('2019-01-01') AS date,
@ -461,9 +615,11 @@ WITH
toIntervalWeek(1) AS interval_to_week toIntervalWeek(1) AS interval_to_week
SELECT SELECT
date + interval_week, date + interval_week,
date + interval_to_week date + interval_to_week;
``` ```
Результат:
``` text ``` text
┌─plus(date, interval_week)─┬─plus(date, interval_to_week)─┐ ┌─plus(date, interval_week)─┬─plus(date, interval_to_week)─┐
│ 2019-01-08 │ 2019-01-08 │ │ 2019-01-08 │ 2019-01-08 │
@ -479,7 +635,7 @@ SELECT
**Синтаксис** **Синтаксис**
``` sql ``` sql
parseDateTimeBestEffort(time_string[, time_zone]); parseDateTimeBestEffort(time_string[, time_zone])
``` ```
**Параметры** **Параметры**
@ -522,7 +678,7 @@ AS parseDateTimeBestEffort;
``` sql ``` sql
SELECT parseDateTimeBestEffort('Sat, 18 Aug 2018 07:22:16 GMT', 'Europe/Moscow') SELECT parseDateTimeBestEffort('Sat, 18 Aug 2018 07:22:16 GMT', 'Europe/Moscow')
AS parseDateTimeBestEffort AS parseDateTimeBestEffort;
``` ```
Результат: Результат:
@ -537,7 +693,7 @@ AS parseDateTimeBestEffort
``` sql ``` sql
SELECT parseDateTimeBestEffort('1284101485') SELECT parseDateTimeBestEffort('1284101485')
AS parseDateTimeBestEffort AS parseDateTimeBestEffort;
``` ```
Результат: Результат:
@ -552,7 +708,7 @@ AS parseDateTimeBestEffort
``` sql ``` sql
SELECT parseDateTimeBestEffort('2018-12-12 10:12:12') SELECT parseDateTimeBestEffort('2018-12-12 10:12:12')
AS parseDateTimeBestEffort AS parseDateTimeBestEffort;
``` ```
Результат: Результат:
@ -566,7 +722,7 @@ AS parseDateTimeBestEffort
Запрос: Запрос:
``` sql ``` sql
SELECT parseDateTimeBestEffort('10 20:19') SELECT parseDateTimeBestEffort('10 20:19');
``` ```
Результат: Результат:
@ -591,7 +747,7 @@ SELECT parseDateTimeBestEffort('10 20:19')
**Синтаксис** **Синтаксис**
``` sql ``` sql
parseDateTimeBestEffortUS(time_string [, time_zone]); parseDateTimeBestEffortUS(time_string [, time_zone])
``` ```
**Параметры** **Параметры**
@ -620,7 +776,7 @@ SELECT parseDateTimeBestEffortUS('09/12/2020 12:12:57')
AS parseDateTimeBestEffortUS; AS parseDateTimeBestEffortUS;
``` ```
Ответ: Результат:
``` text ``` text
┌─parseDateTimeBestEffortUS─┐ ┌─parseDateTimeBestEffortUS─┐
@ -635,7 +791,7 @@ SELECT parseDateTimeBestEffortUS('09-12-2020 12:12:57')
AS parseDateTimeBestEffortUS; AS parseDateTimeBestEffortUS;
``` ```
Ответ: Результат:
``` text ``` text
┌─parseDateTimeBestEffortUS─┐ ┌─parseDateTimeBestEffortUS─┐
@ -650,7 +806,7 @@ SELECT parseDateTimeBestEffortUS('09.12.2020 12:12:57')
AS parseDateTimeBestEffortUS; AS parseDateTimeBestEffortUS;
``` ```
Ответ: Результат:
``` text ``` text
┌─parseDateTimeBestEffortUS─┐ ┌─parseDateTimeBestEffortUS─┐
@ -857,10 +1013,10 @@ toUnixTimestamp64Milli(value)
``` sql ``` sql
WITH toDateTime64('2019-09-16 19:20:12.345678910', 6) AS dt64 WITH toDateTime64('2019-09-16 19:20:12.345678910', 6) AS dt64
SELECT toUnixTimestamp64Milli(dt64) SELECT toUnixTimestamp64Milli(dt64);
``` ```
Ответ: Результат:
``` text ``` text
┌─toUnixTimestamp64Milli(dt64)─┐ ┌─toUnixTimestamp64Milli(dt64)─┐
@ -872,10 +1028,10 @@ SELECT toUnixTimestamp64Milli(dt64)
``` sql ``` sql
WITH toDateTime64('2019-09-16 19:20:12.345678910', 6) AS dt64 WITH toDateTime64('2019-09-16 19:20:12.345678910', 6) AS dt64
SELECT toUnixTimestamp64Nano(dt64) SELECT toUnixTimestamp64Nano(dt64);
``` ```
Ответ: Результат:
``` text ``` text
┌─toUnixTimestamp64Nano(dt64)─┐ ┌─toUnixTimestamp64Nano(dt64)─┐
@ -910,10 +1066,10 @@ fromUnixTimestamp64Milli(value [, ti])
``` sql ``` sql
WITH CAST(1234567891011, 'Int64') AS i64 WITH CAST(1234567891011, 'Int64') AS i64
SELECT fromUnixTimestamp64Milli(i64, 'UTC') SELECT fromUnixTimestamp64Milli(i64, 'UTC');
``` ```
Ответ: Результат:
``` text ``` text
┌─fromUnixTimestamp64Milli(i64, 'UTC')─┐ ┌─fromUnixTimestamp64Milli(i64, 'UTC')─┐
@ -944,12 +1100,12 @@ toLowCardinality(expr)
Тип: `LowCardinality(expr_result_type)` Тип: `LowCardinality(expr_result_type)`
**Example** **Пример**
Запрос: Запрос:
```sql ```sql
SELECT toLowCardinality('1') SELECT toLowCardinality('1');
``` ```
Результат: Результат:
@ -985,10 +1141,10 @@ formatRow(format, x, y, ...)
``` sql ``` sql
SELECT formatRow('CSV', number, 'good') SELECT formatRow('CSV', number, 'good')
FROM numbers(3) FROM numbers(3);
``` ```
Ответ: Результат:
``` text ``` text
┌─formatRow('CSV', number, 'good')─┐ ┌─formatRow('CSV', number, 'good')─┐
@ -1026,10 +1182,10 @@ formatRowNoNewline(format, x, y, ...)
``` sql ``` sql
SELECT formatRowNoNewline('CSV', number, 'good') SELECT formatRowNoNewline('CSV', number, 'good')
FROM numbers(3) FROM numbers(3);
``` ```
Ответ: Результат:
``` text ``` text
┌─formatRowNoNewline('CSV', number, 'good')─┐ ┌─formatRowNoNewline('CSV', number, 'good')─┐

View File

@ -13,10 +13,28 @@ SELECT (CounterID, UserID) IN ((34, 123), (101500, 456)) FROM ...
Если слева стоит один столбец, входящий в индекс, а справа - множество констант, то при выполнении запроса, система воспользуется индексом. Если слева стоит один столбец, входящий в индекс, а справа - множество констант, то при выполнении запроса, система воспользуется индексом.
Не перечисляйте слишком большое количество значений (миллионы) явно. Если множество большое - лучше загрузить его во временную таблицу (например, смотрите раздел «Внешние данные для обработки запроса»), и затем воспользоваться подзапросом. Не перечисляйте слишком большое количество значений (миллионы) явно. Если множество большое - лучше загрузить его во временную таблицу (например, смотрите раздел [Внешние данные для обработки запроса](../../engines/table-engines/special/external-data.md)), и затем воспользоваться подзапросом.
В качестве правой части оператора может быть множество константных выражений, множество кортежей с константными выражениями (показано в примерах выше), а также имя таблицы или подзапрос SELECT в скобках. В качестве правой части оператора может быть множество константных выражений, множество кортежей с константными выражениями (показано в примерах выше), а также имя таблицы или подзапрос SELECT в скобках.
Если типы данных в левой и правой частях подзапроса `IN` различаются, ClickHouse преобразует значение в левой части к типу данных из правой части. Преобразование выполняется по аналогии с функцией [accurateCastOrNull](../functions/type-conversion-functions.md#type_conversion_function-accurate-cast_or_null), т.е. тип данных становится [Nullable](../../sql-reference/data-types/nullable.md), а если преобразование не может быть выполнено, возвращается значение [NULL](../../sql-reference/syntax.md#null-literal).
**Пример**
Запрос:
``` sql
SELECT '1' IN (SELECT 1);
```
Результат:
``` text
┌─in('1', _subquery49)─┐
│ 1 │
└──────────────────────┘
```
Если в качестве правой части оператора указано имя таблицы (например, `UserID IN users`), то это эквивалентно подзапросу `UserID IN (SELECT * FROM users)`. Это используется при работе с внешними данными, отправляемым вместе с запросом. Например, вместе с запросом может быть отправлено множество идентификаторов посетителей, загруженное во временную таблицу users, по которому следует выполнить фильтрацию. Если в качестве правой части оператора указано имя таблицы (например, `UserID IN users`), то это эквивалентно подзапросу `UserID IN (SELECT * FROM users)`. Это используется при работе с внешними данными, отправляемым вместе с запросом. Например, вместе с запросом может быть отправлено множество идентификаторов посетителей, загруженное во временную таблицу users, по которому следует выполнить фильтрацию.
Если в качестве правой части оператора, указано имя таблицы, имеющий движок Set (подготовленное множество, постоянно находящееся в оперативке), то множество не будет создаваться заново при каждом запросе. Если в качестве правой части оператора, указано имя таблицы, имеющий движок Set (подготовленное множество, постоянно находящееся в оперативке), то множество не будет создаваться заново при каждом запросе.

View File

@ -5,23 +5,27 @@ toc_title: file
# file {#file} # file {#file}
Создаёт таблицу из файла. Данная табличная функция похожа на табличные функции [file](file.md) и [hdfs](hdfs.md). Создаёт таблицу из файла. Данная табличная функция похожа на табличные функции [url](../../sql-reference/table-functions/url.md) и [hdfs](../../sql-reference/table-functions/hdfs.md).
Функция `file` может использоваться в запросах `SELECT` и `INSERT` при работе с движком таблиц [File](../../engines/table-engines/special/file.md).
**Синтаксис**
``` sql ``` sql
file(path, format, structure) file(path, format, structure)
``` ```
**Входные параметры** **Параметры**
- `path` — относительный путь до файла от [user_files_path](../../sql-reference/table-functions/file.md#server_configuration_parameters-user_files_path). Путь к файлу поддерживает следующие шаблоны в режиме доступа только для чтения `*`, `?`, `{abc,def}` и `{N..M}`, где `N`, `M` — числа, \``'abc', 'def'` — строки. - `path` — относительный путь до файла от [user_files_path](../../sql-reference/table-functions/file.md#server_configuration_parameters-user_files_path). Путь к файлу поддерживает следующие шаблоны в режиме доступа только для чтения `*`, `?`, `{abc,def}` и `{N..M}`, где `N`, `M` — числа, `'abc', 'def'` — строки.
- `format` — [формат](../../interfaces/formats.md#formats) файла. - `format` — [формат](../../interfaces/formats.md#formats) файла.
- `structure` — структура таблицы. Формат `'colunmn1_name column1_ype, column2_name column2_type, ...'`. - `structure` — структура таблицы. Формат: `'colunmn1_name column1_ype, column2_name column2_type, ...'`.
**Возвращаемое значение** **Возвращаемое значение**
Таблица с указанной структурой, предназначенная для чтения или записи данных в указанном файле. Таблица с указанной структурой, предназначенная для чтения или записи данных в указанном файле.
**Пример** **Примеры**
Настройка `user_files_path` и содержимое файла `test.csv`: Настройка `user_files_path` и содержимое файла `test.csv`:
@ -35,12 +39,10 @@ $ cat /var/lib/clickhouse/user_files/test.csv
78,43,45 78,43,45
``` ```
Таблица из `test.csv` и выборка первых двух строк из неё: Получение данных из таблицы в файле `test.csv` и выборка первых двух строк из неё:
``` sql ``` sql
SELECT * SELECT * FROM file('test.csv', 'CSV', 'column1 UInt32, column2 UInt32, column3 UInt32') LIMIT 2;
FROM file('test.csv', 'CSV', 'column1 UInt32, column2 UInt32, column3 UInt32')
LIMIT 2
``` ```
``` text ``` text
@ -50,45 +52,61 @@ LIMIT 2
└─────────┴─────────┴─────────┘ └─────────┴─────────┴─────────┘
``` ```
Шаблоны могут содержаться в нескольких компонентах пути. Обрабатываются только существующие файлы, название которых целиком удовлетворяет шаблону (не только суффиксом или префиксом). Получение первых 10 строк таблицы, содержащей 3 столбца типа [UInt32](../../sql-reference/data-types/int-uint.md), из CSV-файла:
- `*` — Заменяет любое количество любых символов кроме `/`, включая отсутствие символов. ``` sql
- `?` — Заменяет ровно один любой символ. SELECT * FROM file('test.csv', 'CSV', 'column1 UInt32, column2 UInt32, column3 UInt32') LIMIT 10;
- `{some_string,another_string,yet_another_one}` — Заменяет любую из строк `'some_string', 'another_string', 'yet_another_one'`. ```
- `{N..M}` — Заменяет любое число в интервале от `N` до `M` включительно (может содержать ведущие нули).
Вставка данных из файла в таблицу:
``` sql
INSERT INTO FUNCTION file('test.csv', 'CSV', 'column1 UInt32, column2 UInt32, column3 UInt32') VALUES (1, 2, 3), (3, 2, 1);
SELECT * FROM file('test.csv', 'CSV', 'column1 UInt32, column2 UInt32, column3 UInt32');
```
``` text
┌─column1─┬─column2─┬─column3─┐
│ 1 │ 2 │ 3 │
│ 3 │ 2 │ 1 │
└─────────┴─────────┴─────────┘
```
## Шаблоны поиска в компонентах пути {#globs-in-path}
При описании пути к файлу могут использоваться шаблоны поиска. Обрабатываются только те файлы, у которых путь и название соответствуют шаблону полностью (а не только префикс или суффикс).
- `*` — заменяет любое количество любых символов кроме `/`, включая отсутствие символов.
- `?` — заменяет ровно один любой символ.
- `{some_string,another_string,yet_another_one}` — заменяет любую из строк `'some_string', 'another_string', 'yet_another_one'`.
- `{N..M}` — заменяет любое число в интервале от `N` до `M` включительно (может содержать ведущие нули).
Конструкция с `{}` аналогична табличной функции [remote](remote.md). Конструкция с `{}` аналогична табличной функции [remote](remote.md).
**Пример** **Пример**
1. Предположим у нас есть несколько файлов со следующими относительными путями: Предположим, у нас есть несколько файлов со следующими относительными путями:
- some_dir/some_file_1 - 'some_dir/some_file_1'
- some_dir/some_file_2 - 'some_dir/some_file_2'
- some_dir/some_file_3 - 'some_dir/some_file_3'
- another_dir/some_file_1 - 'another_dir/some_file_1'
- another_dir/some_file_2 - 'another_dir/some_file_2'
- another_dir/some_file_3 - 'another_dir/some_file_3'
1. Запросим количество строк в этих файлах: Запросим количество строк в этих файлах:
<!-- -->
``` sql ``` sql
SELECT count(*) SELECT count(*) FROM file('{some,another}_dir/some_file_{1..3}', 'TSV', 'name String, value UInt32');
FROM file('{some,another}_dir/some_file_{1..3}', 'TSV', 'name String, value UInt32')
``` ```
1. Запросим количество строк во всех файлах этих двух директорий: Запросим количество строк во всех файлах этих двух директорий:
<!-- -->
``` sql ``` sql
SELECT count(*) SELECT count(*) FROM file('{some,another}_dir/*', 'TSV', 'name String, value UInt32');
FROM file('{some,another}_dir/*', 'TSV', 'name String, value UInt32')
``` ```
!!! warning "Warning" !!! warning "Предупреждение"
Если ваш список файлов содержит интервал с ведущими нулями, используйте конструкцию с фигурными скобками для каждой цифры по отдельности или используйте `?`. Если ваш список файлов содержит интервал с ведущими нулями, используйте конструкцию с фигурными скобками для каждой цифры по отдельности или используйте `?`.
**Пример** **Пример**
@ -96,17 +114,16 @@ FROM file('{some,another}_dir/*', 'TSV', 'name String, value UInt32')
Запрос данных из файлов с именами `file000`, `file001`, … , `file999`: Запрос данных из файлов с именами `file000`, `file001`, … , `file999`:
``` sql ``` sql
SELECT count(*) SELECT count(*) FROM file('big_dir/file{0..9}{0..9}{0..9}', 'CSV', 'name String, value UInt32');
FROM file('big_dir/file{0..9}{0..9}{0..9}', 'CSV', 'name String, value UInt32')
``` ```
## Виртуальные столбцы {#virtualnye-stolbtsy} ## Виртуальные столбцы {#virtualnye-stolbtsy}
- `_path`Путь к файлу. - `_path`путь к файлу.
- `_file`Имя файла. - `_file`имя файла.
**Смотрите также** **Смотрите также**
- [Виртуальные столбцы](index.md#table_engines-virtual_columns) - [Виртуальные столбцы](index.md#table_engines-virtual_columns)
[Оригинальная статья](https://clickhouse.tech/docs/ru/query_language/table_functions/file/) <!--hide--> [Оригинальная статья](https://clickhouse.tech/docs/ru/sql-reference/table-functions/file/) <!--hide-->

View File

@ -5,9 +5,11 @@ toc_title: remote
# remote, remoteSecure {#remote-remotesecure} # remote, remoteSecure {#remote-remotesecure}
Позволяет обратиться к удалённым серверам без создания таблицы типа `Distributed`. Позволяет обратиться к удалённым серверам без создания таблицы типа [Distributed](../../engines/table-engines/special/distributed.md). Функция `remoteSecure` работает аналогично `remote`, но использует защищенное соединение.
Сигнатуры: Обе функции могут использоваться в запросах `SELECT` и `INSERT`.
**Синтаксис**
``` sql ``` sql
remote('addresses_expr', db, table[, 'user'[, 'password']]) remote('addresses_expr', db, table[, 'user'[, 'password']])
@ -16,12 +18,40 @@ remoteSecure('addresses_expr', db, table[, 'user'[, 'password']])
remoteSecure('addresses_expr', db.table[, 'user'[, 'password']]) remoteSecure('addresses_expr', db.table[, 'user'[, 'password']])
``` ```
`addresses_expr` - выражение, генерирующее адреса удалённых серверов. Это может быть просто один адрес сервера. Адрес сервера - это `хост:порт`, или только `хост`. Хост может быть указан в виде имени сервера, или в виде IPv4 или IPv6 адреса. IPv6 адрес указывается в квадратных скобках. Порт - TCP-порт удалённого сервера. Если порт не указан, используется `tcp_port` из конфигурационного файла сервера (по умолчанию - 9000). **Параметры**
- `addresses_expr` — выражение, генерирующее адреса удалённых серверов. Это может быть просто один адрес сервера. Адрес сервера — это `host:port` или только `host`.
Вместо параметра `host` может быть указано имя сервера или его адрес в формате IPv4 или IPv6. IPv6 адрес указывается в квадратных скобках.
`port` — TCP-порт удалённого сервера. Если порт не указан, используется [tcp_port](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-tcp_port) из конфигурационного файла сервера, к которому обратились через функцию `remote` (по умолчанию - 9000), и [tcp_port_secure](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-tcp_port_secure), к которому обратились через функцию `remoteSecure` (по умолчанию — 9440).
!!! important "Важно"
С IPv6-адресом обязательно нужно указывать порт. С IPv6-адресом обязательно нужно указывать порт.
Примеры: Тип: [String](../../sql-reference/data-types/string.md).
- `db` — имя базы данных. Тип: [String](../../sql-reference/data-types/string.md).
- `table` — имя таблицы. Тип: [String](../../sql-reference/data-types/string.md).
- `user` — имя пользователя. Если пользователь не указан, то по умолчанию `default`. Тип: [String](../../sql-reference/data-types/string.md).
- `password` — пароль. Если пароль не указан, то используется пустой пароль. Тип: [String](../../sql-reference/data-types/string.md).
- `sharding_key` — ключ шардирования для поддержки распределения данных между узлами. Например: `insert into remote('127.0.0.1:9000,127.0.0.2', db, table, 'default', rand())`. Тип: [UInt32](../../sql-reference/data-types/int-uint.md).
**Возвращаемое значение**
Набор данных с удаленных серверов.
**Использование**
Использование табличной функции `remote` менее оптимально, чем создание таблицы типа `Distributed`, так как в этом случае соединения с серверами устанавливаются заново при каждом запросе. Если указываются имена серверов, то приходится также выполнять поиск сервера по имени. Кроме того, не ведётся сквозной подсчёт ошибок при работе с разными репликами. При обработке большого количества запросов всегда создавайте таблицу типа `Distributed`, использовать табличную функцию `remote` в таких случаях не рекомендуется.
Табличная функция `remote` может быть полезна в следующих случаях:
- Обращение на конкретный сервер для сравнения данных, отладки и тестирования.
- Запросы между разными кластерами ClickHouse для исследований.
- Нечастые распределённые запросы, задаваемые вручную.
- Распределённые запросы, где набор серверов определяется каждый раз заново.
**Адреса**
``` text ``` text
example01-01-1 example01-01-1
@ -32,9 +62,7 @@ localhost
[2a02:6b8:0:1111::11]:9000 [2a02:6b8:0:1111::11]:9000
``` ```
Адреса можно указать через запятую, в этом случае ClickHouse обработает запрос как распределённый, т.е. отправит его по всем указанным адресам как на шарды с разными данными. Адреса можно указать через запятую. В этом случае ClickHouse обработает запрос как распределённый, т.е. отправит его по всем указанным адресам как на шарды с разными данными. Пример:
Пример:
``` text ``` text
example01-01-1,example01-02-1 example01-01-1,example01-02-1
@ -46,38 +74,36 @@ example01-01-1,example01-02-1
example01-0{1,2}-1 example01-0{1,2}-1
``` ```
В фигурных скобках может быть указан диапазон (неотрицательных целых) чисел через две точки. В этом случае, диапазон раскрывается в множество значений, генерирующих адреса шардов. Если запись первого числа начинается с нуля, то значения формируются с таким же выравниванием нулями. Предыдущий пример может быть записан следующим образом: В фигурных скобках может быть указан диапазон (неотрицательных целых) чисел через две точки. В этом случае диапазон раскрывается в множество значений, генерирующих адреса шардов. Если запись первого числа начинается с нуля, то значения формируются с таким же выравниванием нулями. Предыдущий пример может быть записан следующим образом:
``` text ``` text
example01-{01..02}-1 example01-{01..02}-1
``` ```
При наличии нескольких пар фигурных скобок, генерируется прямое произведение соответствующих множеств. При наличии нескольких пар фигурных скобок генерируется прямое произведение соответствующих множеств.
Адреса или их фрагменты в фигурных скобках можно указать через символ \|. В этом случае, соответствующие множества адресов понимаются как реплики - запрос будет отправлен на первую живую реплику. При этом, реплики перебираются в порядке, согласно текущей настройке [load_balancing](../../operations/settings/settings.md). Адреса или их фрагменты в фигурных скобках можно указать через символ \|. В этом случае соответствующие множества адресов понимаются как реплики — запрос будет отправлен на первую живую реплику. При этом реплики перебираются в порядке, согласно текущей настройке [load_balancing](../../operations/settings/settings.md#settings-load_balancing). В этом примере указаны два шарда, в каждом из которых имеются две реплики:
Пример:
``` text ``` text
example01-{01..02}-{1|2} example01-{01..02}-{1|2}
``` ```
В этом примере указано два шарда, в каждом из которых имеется две реплики. Количество генерируемых адресов ограничено константой. Сейчас это 1000 адресов.
Количество генерируемых адресов ограничено константой - сейчас это 1000 штук. **Примеры**
Использование табличной функции `remote` менее оптимально, чем создание таблицы типа `Distributed`, так как в этом случае, соединения с серверами устанавливаются заново при каждом запросе, в случае задания имён хостов, делается резолвинг имён, а также не ведётся подсчёт ошибок при работе с разными репликами. При обработке большого количества запросов, всегда создавайте `Distributed` таблицу заранее, не используйте табличную функцию `remote`. Выборка данных с удаленного сервера:
Табличная функция `remote` может быть полезна для следующих случаях: ``` sql
SELECT * FROM remote('127.0.0.1', db.remote_engine_table) LIMIT 3;
```
- обращение на конкретный сервер в целях сравнения данных, отладки и тестирования; Вставка данных с удаленного сервера в таблицу:
- запросы между разными кластерами ClickHouse в целях исследований;
- нечастых распределённых запросов, задаваемых вручную;
- распределённых запросов, где набор серверов определяется каждый раз заново.
Если пользователь не задан,то используется `default`. ``` sql
Если пароль не задан, то используется пустой пароль. CREATE TABLE remote_table (name String, value UInt32) ENGINE=Memory;
INSERT INTO FUNCTION remote('127.0.0.1', currentDatabase(), 'remote_table') VALUES ('test', 42);
SELECT * FROM remote_table;
```
`remoteSecure` - аналогично функции `remote`, но с соединением по шифрованному каналу. Порт по умолчанию - `tcp_port_secure` из конфига или 9440. [Оригинальная статья](https://clickhouse.tech/docs/ru/sql-reference/table-functions/remote/) <!--hide-->
[Оригинальная статья](https://clickhouse.tech/docs/ru/query_language/table_functions/remote/) <!--hide-->

View File

@ -5,21 +5,40 @@ toc_title: url
# url {#url} # url {#url}
`url(URL, format, structure)` - возвращает таблицу со столбцами, указанными в Функция `url` берет данные по указанному адресу `URL` и создает из них таблицу указанной структуры со столбцами указанного формата.
`structure`, созданную из данных находящихся по `URL` в формате `format`.
URL - адрес, по которому сервер принимает `GET` и/или `POST` запросы по Функция `url` может быть использована в запросах `SELECT` и `INSERT` с таблицами на движке [URL](../../engines/table-engines/special/url.md).
протоколу HTTP или HTTPS.
format - [формат](../../interfaces/formats.md#formats) данных. **Синтаксис**
structure - структура таблицы в форме `'UserID UInt64, Name String'`. Определяет имена и типы столбцов.
**Пример**
``` sql ``` sql
-- получение 3-х строк таблицы, состоящей из двух колонк типа String и UInt32 от сервера, отдающего данные в формате CSV url(URL, format, structure)
SELECT * FROM url('http://127.0.0.1:12345/', CSV, 'column1 String, column2 UInt32') LIMIT 3
``` ```
[Оригинальная статья](https://clickhouse.tech/docs/ru/query_language/table_functions/url/) <!--hide--> **Параметры**
- `URL` — HTTP или HTTPS-адрес сервера, который может принимать запросы `GET` или `POST` (для запросов `SELECT` или `INSERT` соответственно). Тип: [String](../../sql-reference/data-types/string.md).
- `format` — [формат](../../interfaces/formats.md#formats) данных. Тип: [String](../../sql-reference/data-types/string.md).
- `structure` — структура таблицы в формате `'UserID UInt64, Name String'`. Определяет имена и типы столбцов. Тип: [String](../../sql-reference/data-types/string.md).
**Возвращаемое значение**
Таблица с указанными форматом и структурой, а также с данными, полученными из указанного адреса `URL`.
**Примеры**
Получение с HTTP-сервера первых 3 строк таблицы с данными в формате [CSV](../../interfaces/formats.md/#csv), содержащей столбцы типа [String](../../sql-reference/data-types/string.md) и [UInt32](../../sql-reference/data-types/int-uint.md).
``` sql
SELECT * FROM url('http://127.0.0.1:12345/', CSV, 'column1 String, column2 UInt32') LIMIT 3;
```
Вставка данных в таблицу:
``` sql
CREATE TABLE test_table (column1 String, column2 UInt32) ENGINE=Memory;
INSERT INTO FUNCTION url('http://127.0.0.1:8123/?query=INSERT+INTO+test_table+FORMAT+CSV', 'CSV', 'column1 String, column2 UInt32') VALUES ('http interface', 42);
SELECT * FROM test_table;
```
[Оригинальная статья](https://clickhouse.tech/docs/ru/sql-reference/table-functions/url/) <!--hide-->

View File

@ -82,6 +82,7 @@
#if defined(OS_LINUX) #if defined(OS_LINUX)
# include <sys/mman.h> # include <sys/mman.h>
# include <sys/ptrace.h>
# include <Common/hasLinuxCapability.h> # include <Common/hasLinuxCapability.h>
#endif #endif
@ -480,16 +481,26 @@ int Server::main(const std::vector<std::string> & /*args*/)
} }
else else
{ {
throw Exception(ErrorCodes::CORRUPTED_DATA, /// If program is run under debugger, ptrace will fail.
"Calculated checksum of the ClickHouse binary ({0}) does not correspond" if (ptrace(PTRACE_TRACEME, 0, nullptr, nullptr) == -1)
" to the reference checksum stored in the binary ({1})." {
" It may indicate one of the following:" /// Program is run under debugger. Modification of it's binary image is ok for breakpoints.
" - the file {2} was changed just after startup;" LOG_WARNING(log, "Server is run under debugger and its binary image is modified (most likely with breakpoints).",
" - the file {2} is damaged on disk due to faulty hardware;" calculated_binary_hash);
" - the loaded executable is damaged in memory due to faulty hardware;" }
" - the file {2} was intentionally modified;" else
" - logical error in code." {
, calculated_binary_hash, stored_binary_hash, executable_path); throw Exception(ErrorCodes::CORRUPTED_DATA,
"Calculated checksum of the ClickHouse binary ({0}) does not correspond"
" to the reference checksum stored in the binary ({1})."
" It may indicate one of the following:"
" - the file {2} was changed just after startup;"
" - the file {2} is damaged on disk due to faulty hardware;"
" - the loaded executable is damaged in memory due to faulty hardware;"
" - the file {2} was intentionally modified;"
" - logical error in code."
, calculated_binary_hash, stored_binary_hash, executable_path);
}
} }
} }
else else

View File

@ -1,5 +1,7 @@
#pragma once #pragma once
#include <common/defines.h>
#ifdef __clang__ #ifdef __clang__
#pragma clang diagnostic push #pragma clang diagnostic push
#pragma clang diagnostic ignored "-Wreserved-id-macro" #pragma clang diagnostic ignored "-Wreserved-id-macro"
@ -9,14 +11,15 @@
#define __msan_test_shadow(X, Y) (false) #define __msan_test_shadow(X, Y) (false)
#define __msan_print_shadow(X, Y) #define __msan_print_shadow(X, Y)
#define __msan_unpoison_string(X) #define __msan_unpoison_string(X)
#if defined(__has_feature)
# if __has_feature(memory_sanitizer) #if defined(ch_has_feature)
# undef __msan_unpoison # if ch_has_feature(memory_sanitizer)
# undef __msan_test_shadow # undef __msan_unpoison
# undef __msan_print_shadow # undef __msan_test_shadow
# undef __msan_unpoison_string # undef __msan_print_shadow
# include <sanitizer/msan_interface.h> # undef __msan_unpoison_string
# endif # include <sanitizer/msan_interface.h>
# endif
#endif #endif
#ifdef __clang__ #ifdef __clang__

View File

@ -60,11 +60,11 @@ Otherwise you will get only exported symbols from program headers.
#endif #endif
#define __msan_unpoison_string(X) // NOLINT #define __msan_unpoison_string(X) // NOLINT
#if defined(__has_feature) #if defined(ch_has_feature)
# if __has_feature(memory_sanitizer) # if ch_has_feature(memory_sanitizer)
# undef __msan_unpoison_string # undef __msan_unpoison_string
# include <sanitizer/msan_interface.h> # include <sanitizer/msan_interface.h>
# endif # endif
#endif #endif

View File

@ -391,6 +391,9 @@ public:
virtual void multi( virtual void multi(
const Requests & requests, const Requests & requests,
MultiCallback callback) = 0; MultiCallback callback) = 0;
/// Expire session and finish all pending requests
virtual void finalize() = 0;
}; };
} }

View File

@ -30,7 +30,7 @@ using TestKeeperRequestPtr = std::shared_ptr<TestKeeperRequest>;
* *
* NOTE: You can add various failure modes for better testing. * NOTE: You can add various failure modes for better testing.
*/ */
class TestKeeper : public IKeeper class TestKeeper final : public IKeeper
{ {
public: public:
TestKeeper(const String & root_path_, Poco::Timespan operation_timeout_); TestKeeper(const String & root_path_, Poco::Timespan operation_timeout_);
@ -83,6 +83,7 @@ public:
const Requests & requests, const Requests & requests,
MultiCallback callback) override; MultiCallback callback) override;
void finalize() override;
struct Node struct Node
{ {
@ -130,7 +131,6 @@ private:
void pushRequest(RequestInfo && request); void pushRequest(RequestInfo && request);
void finalize();
ThreadFromGlobalPool processing_thread; ThreadFromGlobalPool processing_thread;

View File

@ -44,7 +44,7 @@ static void check(Coordination::Error code, const std::string & path)
} }
void ZooKeeper::init(const std::string & implementation_, const std::string & hosts_, const std::string & identity_, void ZooKeeper::init(const std::string & implementation_, const Strings & hosts_, const std::string & identity_,
int32_t session_timeout_ms_, int32_t operation_timeout_ms_, const std::string & chroot_) int32_t session_timeout_ms_, int32_t operation_timeout_ms_, const std::string & chroot_)
{ {
log = &Poco::Logger::get("ZooKeeper"); log = &Poco::Logger::get("ZooKeeper");
@ -60,13 +60,16 @@ void ZooKeeper::init(const std::string & implementation_, const std::string & ho
if (hosts.empty()) if (hosts.empty())
throw KeeperException("No hosts passed to ZooKeeper constructor.", Coordination::Error::ZBADARGUMENTS); throw KeeperException("No hosts passed to ZooKeeper constructor.", Coordination::Error::ZBADARGUMENTS);
std::vector<std::string> hosts_strings;
splitInto<','>(hosts_strings, hosts);
Coordination::ZooKeeper::Nodes nodes; Coordination::ZooKeeper::Nodes nodes;
nodes.reserve(hosts_strings.size()); nodes.reserve(hosts.size());
Strings shuffled_hosts = hosts;
/// Shuffle the hosts to distribute the load among ZooKeeper nodes.
pcg64 generator(randomSeed());
std::shuffle(shuffled_hosts.begin(), shuffled_hosts.end(), generator);
bool dns_error = false; bool dns_error = false;
for (auto & host_string : hosts_strings) for (auto & host_string : shuffled_hosts)
{ {
try try
{ {
@ -109,9 +112,9 @@ void ZooKeeper::init(const std::string & implementation_, const std::string & ho
Poco::Timespan(0, operation_timeout_ms_ * 1000)); Poco::Timespan(0, operation_timeout_ms_ * 1000));
if (chroot.empty()) if (chroot.empty())
LOG_TRACE(log, "Initialized, hosts: {}", hosts); LOG_TRACE(log, "Initialized, hosts: {}", fmt::join(hosts, ","));
else else
LOG_TRACE(log, "Initialized, hosts: {}, chroot: {}", hosts, chroot); LOG_TRACE(log, "Initialized, hosts: {}, chroot: {}", fmt::join(hosts, ","), chroot);
} }
else if (implementation == "testkeeper") else if (implementation == "testkeeper")
{ {
@ -128,7 +131,16 @@ void ZooKeeper::init(const std::string & implementation_, const std::string & ho
throw KeeperException("Zookeeper root doesn't exist. You should create root node " + chroot + " before start.", Coordination::Error::ZNONODE); throw KeeperException("Zookeeper root doesn't exist. You should create root node " + chroot + " before start.", Coordination::Error::ZNONODE);
} }
ZooKeeper::ZooKeeper(const std::string & hosts_, const std::string & identity_, int32_t session_timeout_ms_, ZooKeeper::ZooKeeper(const std::string & hosts_string, const std::string & identity_, int32_t session_timeout_ms_,
int32_t operation_timeout_ms_, const std::string & chroot_, const std::string & implementation_)
{
Strings hosts_strings;
splitInto<','>(hosts_strings, hosts_string);
init(implementation_, hosts_strings, identity_, session_timeout_ms_, operation_timeout_ms_, chroot_);
}
ZooKeeper::ZooKeeper(const Strings & hosts_, const std::string & identity_, int32_t session_timeout_ms_,
int32_t operation_timeout_ms_, const std::string & chroot_, const std::string & implementation_) int32_t operation_timeout_ms_, const std::string & chroot_, const std::string & implementation_)
{ {
init(implementation_, hosts_, identity_, session_timeout_ms_, operation_timeout_ms_, chroot_); init(implementation_, hosts_, identity_, session_timeout_ms_, operation_timeout_ms_, chroot_);
@ -141,8 +153,6 @@ struct ZooKeeperArgs
Poco::Util::AbstractConfiguration::Keys keys; Poco::Util::AbstractConfiguration::Keys keys;
config.keys(config_name, keys); config.keys(config_name, keys);
std::vector<std::string> hosts_strings;
session_timeout_ms = Coordination::DEFAULT_SESSION_TIMEOUT_MS; session_timeout_ms = Coordination::DEFAULT_SESSION_TIMEOUT_MS;
operation_timeout_ms = Coordination::DEFAULT_OPERATION_TIMEOUT_MS; operation_timeout_ms = Coordination::DEFAULT_OPERATION_TIMEOUT_MS;
implementation = "zookeeper"; implementation = "zookeeper";
@ -150,7 +160,7 @@ struct ZooKeeperArgs
{ {
if (startsWith(key, "node")) if (startsWith(key, "node"))
{ {
hosts_strings.push_back( hosts.push_back(
(config.getBool(config_name + "." + key + ".secure", false) ? "secure://" : "") + (config.getBool(config_name + "." + key + ".secure", false) ? "secure://" : "") +
config.getString(config_name + "." + key + ".host") + ":" config.getString(config_name + "." + key + ".host") + ":"
+ config.getString(config_name + "." + key + ".port", "2181") + config.getString(config_name + "." + key + ".port", "2181")
@ -180,17 +190,6 @@ struct ZooKeeperArgs
throw KeeperException(std::string("Unknown key ") + key + " in config file", Coordination::Error::ZBADARGUMENTS); throw KeeperException(std::string("Unknown key ") + key + " in config file", Coordination::Error::ZBADARGUMENTS);
} }
/// Shuffle the hosts to distribute the load among ZooKeeper nodes.
pcg64 generator(randomSeed());
std::shuffle(hosts_strings.begin(), hosts_strings.end(), generator);
for (auto & host : hosts_strings)
{
if (!hosts.empty())
hosts += ',';
hosts += host;
}
if (!chroot.empty()) if (!chroot.empty())
{ {
if (chroot.front() != '/') if (chroot.front() != '/')
@ -200,7 +199,7 @@ struct ZooKeeperArgs
} }
} }
std::string hosts; Strings hosts;
std::string identity; std::string identity;
int session_timeout_ms; int session_timeout_ms;
int operation_timeout_ms; int operation_timeout_ms;
@ -922,6 +921,10 @@ Coordination::Error ZooKeeper::tryMultiNoThrow(const Coordination::Requests & re
} }
} }
void ZooKeeper::finalize()
{
impl->finalize();
}
size_t KeeperMultiException::getFailedOpIndex(Coordination::Error exception_code, const Coordination::Responses & responses) size_t KeeperMultiException::getFailedOpIndex(Coordination::Error exception_code, const Coordination::Responses & responses)
{ {
@ -1000,4 +1003,5 @@ Coordination::RequestPtr makeCheckRequest(const std::string & path, int version)
request->version = version; request->version = version;
return request; return request;
} }
} }

View File

@ -50,7 +50,14 @@ class ZooKeeper
public: public:
using Ptr = std::shared_ptr<ZooKeeper>; using Ptr = std::shared_ptr<ZooKeeper>;
ZooKeeper(const std::string & hosts_, const std::string & identity_ = "", /// hosts_string -- comma separated [secure://]host:port list
ZooKeeper(const std::string & hosts_string, const std::string & identity_ = "",
int32_t session_timeout_ms_ = Coordination::DEFAULT_SESSION_TIMEOUT_MS,
int32_t operation_timeout_ms_ = Coordination::DEFAULT_OPERATION_TIMEOUT_MS,
const std::string & chroot_ = "",
const std::string & implementation_ = "zookeeper");
ZooKeeper(const Strings & hosts_, const std::string & identity_ = "",
int32_t session_timeout_ms_ = Coordination::DEFAULT_SESSION_TIMEOUT_MS, int32_t session_timeout_ms_ = Coordination::DEFAULT_SESSION_TIMEOUT_MS,
int32_t operation_timeout_ms_ = Coordination::DEFAULT_OPERATION_TIMEOUT_MS, int32_t operation_timeout_ms_ = Coordination::DEFAULT_OPERATION_TIMEOUT_MS,
const std::string & chroot_ = "", const std::string & chroot_ = "",
@ -247,10 +254,12 @@ public:
/// Like the previous one but don't throw any exceptions on future.get() /// Like the previous one but don't throw any exceptions on future.get()
FutureMulti tryAsyncMulti(const Coordination::Requests & ops); FutureMulti tryAsyncMulti(const Coordination::Requests & ops);
void finalize();
private: private:
friend class EphemeralNodeHolder; friend class EphemeralNodeHolder;
void init(const std::string & implementation_, const std::string & hosts_, const std::string & identity_, void init(const std::string & implementation_, const Strings & hosts_, const std::string & identity_,
int32_t session_timeout_ms_, int32_t operation_timeout_ms_, const std::string & chroot_); int32_t session_timeout_ms_, int32_t operation_timeout_ms_, const std::string & chroot_);
/// The following methods don't throw exceptions but return error codes. /// The following methods don't throw exceptions but return error codes.
@ -266,7 +275,7 @@ private:
std::unique_ptr<Coordination::IKeeper> impl; std::unique_ptr<Coordination::IKeeper> impl;
std::string hosts; Strings hosts;
std::string identity; std::string identity;
int32_t session_timeout_ms; int32_t session_timeout_ms;
int32_t operation_timeout_ms; int32_t operation_timeout_ms;

View File

@ -88,7 +88,7 @@ using namespace DB;
/** Usage scenario: look at the documentation for IKeeper class. /** Usage scenario: look at the documentation for IKeeper class.
*/ */
class ZooKeeper : public IKeeper class ZooKeeper final : public IKeeper
{ {
public: public:
struct Node struct Node
@ -167,6 +167,20 @@ public:
const Requests & requests, const Requests & requests,
MultiCallback callback) override; MultiCallback callback) override;
/// Without forcefully invalidating (finalizing) ZooKeeper session before
/// establishing a new one, there was a possibility that server is using
/// two ZooKeeper sessions simultaneously in different parts of code.
/// This is strong antipattern and we always prevented it.
/// ZooKeeper is linearizeable for writes, but not linearizeable for
/// reads, it only maintains "sequential consistency": in every session
/// you observe all events in order but possibly with some delay. If you
/// perform write in one session, then notify different part of code and
/// it will do read in another session, that read may not see the
/// already performed write.
void finalize() override { finalize(false, false); }
private: private:
String root_path; String root_path;
ACLs default_acls; ACLs default_acls;

View File

@ -35,11 +35,23 @@ void NuKeeperServer::startup()
{ {
state_manager->loadLogStore(state_machine->last_commit_index()); state_manager->loadLogStore(state_machine->last_commit_index());
bool single_server = state_manager->getTotalServers() == 1;
nuraft::raft_params params; nuraft::raft_params params;
params.heart_beat_interval_ = coordination_settings->heart_beat_interval_ms.totalMilliseconds(); if (single_server)
params.election_timeout_lower_bound_ = coordination_settings->election_timeout_lower_bound_ms.totalMilliseconds(); {
params.election_timeout_upper_bound_ = coordination_settings->election_timeout_upper_bound_ms.totalMilliseconds(); /// Don't make sense in single server mode
params.heart_beat_interval_ = 0;
params.election_timeout_lower_bound_ = 0;
params.election_timeout_upper_bound_ = 0;
}
else
{
params.heart_beat_interval_ = coordination_settings->heart_beat_interval_ms.totalMilliseconds();
params.election_timeout_lower_bound_ = coordination_settings->election_timeout_lower_bound_ms.totalMilliseconds();
params.election_timeout_upper_bound_ = coordination_settings->election_timeout_upper_bound_ms.totalMilliseconds();
}
params.reserved_log_items_ = coordination_settings->reserved_log_items; params.reserved_log_items_ = coordination_settings->reserved_log_items;
params.snapshot_distance_ = coordination_settings->snapshot_distance; params.snapshot_distance_ = coordination_settings->snapshot_distance;
params.client_req_timeout_ = coordination_settings->operation_timeout_ms.totalMilliseconds(); params.client_req_timeout_ = coordination_settings->operation_timeout_ms.totalMilliseconds();
@ -161,13 +173,38 @@ bool NuKeeperServer::isLeaderAlive() const
nuraft::cb_func::ReturnCode NuKeeperServer::callbackFunc(nuraft::cb_func::Type type, nuraft::cb_func::Param * /* param */) nuraft::cb_func::ReturnCode NuKeeperServer::callbackFunc(nuraft::cb_func::Type type, nuraft::cb_func::Param * /* param */)
{ {
if ((type == nuraft::cb_func::InitialBatchCommited && isLeader()) || type == nuraft::cb_func::BecomeFresh) /// Only initial record
bool empty_store = state_manager->getLogStore()->size() == 1;
auto set_initialized = [this] ()
{ {
std::unique_lock lock(initialized_mutex); std::unique_lock lock(initialized_mutex);
initialized_flag = true; initialized_flag = true;
initialized_cv.notify_all(); initialized_cv.notify_all();
};
switch (type)
{
case nuraft::cb_func::BecomeLeader:
{
if (empty_store) /// We become leader and store is empty, ready to serve requests
set_initialized();
return nuraft::cb_func::ReturnCode::Ok;
}
case nuraft::cb_func::BecomeFresh:
{
set_initialized(); /// We are fresh follower, ready to serve requests.
return nuraft::cb_func::ReturnCode::Ok;
}
case nuraft::cb_func::InitialBatchCommited:
{
if (isLeader()) /// We have committed our log store and we are leader, ready to serve requests.
set_initialized();
return nuraft::cb_func::ReturnCode::Ok;
}
default: /// ignore other events
return nuraft::cb_func::ReturnCode::Ok;
} }
return nuraft::cb_func::ReturnCode::Ok;
} }
void NuKeeperServer::waitInit() void NuKeeperServer::waitInit()

View File

@ -33,6 +33,7 @@ NuKeeperStateManager::NuKeeperStateManager(
Poco::Util::AbstractConfiguration::Keys keys; Poco::Util::AbstractConfiguration::Keys keys;
config.keys(config_prefix + ".raft_configuration", keys); config.keys(config_prefix + ".raft_configuration", keys);
total_servers = keys.size();
for (const auto & server_key : keys) for (const auto & server_key : keys)
{ {

View File

@ -52,9 +52,14 @@ public:
return start_as_follower_servers.count(my_server_id); return start_as_follower_servers.count(my_server_id);
} }
nuraft::ptr<NuKeeperLogStore> getLogStore() const { return log_store; }
size_t getTotalServers() const { return total_servers; }
private: private:
int my_server_id; int my_server_id;
int my_port; int my_port;
size_t total_servers{0};
std::unordered_set<int> start_as_follower_servers; std::unordered_set<int> start_as_follower_servers;
nuraft::ptr<NuKeeperLogStore> log_store; nuraft::ptr<NuKeeperLogStore> log_store;
nuraft::ptr<nuraft::srv_config> my_server_config; nuraft::ptr<nuraft::srv_config> my_server_config;

View File

@ -197,124 +197,6 @@ TEST(CoordinationTest, TestSummingRaft1)
s1.launcher.shutdown(5); s1.launcher.shutdown(5);
} }
TEST(CoordinationTest, TestSummingRaft3)
{
ChangelogDirTest test1("./logs1");
SummingRaftServer s1(1, "localhost", 44444, "./logs1");
ChangelogDirTest test2("./logs2");
SummingRaftServer s2(2, "localhost", 44445, "./logs2");
ChangelogDirTest test3("./logs3");
SummingRaftServer s3(3, "localhost", 44446, "./logs3");
nuraft::srv_config first_config(1, 0, "localhost:44444", "", false, 0);
auto ret1 = s2.raft_instance->add_srv(first_config);
while (!ret1->get_accepted())
{
std::cout << "failed to add server: "
<< ret1->get_result_str() << std::endl;
std::this_thread::sleep_for(std::chrono::milliseconds(100));
ret1 = s2.raft_instance->add_srv(first_config);
}
while (s1.raft_instance->get_leader() != 2)
{
std::cout << "Waiting s1 to join to s2 quorum\n";
std::this_thread::sleep_for(std::chrono::milliseconds(100));
}
nuraft::srv_config third_config(3, 0, "localhost:44446", "", false, 0);
auto ret3 = s2.raft_instance->add_srv(third_config);
if (!ret3->get_accepted())
{
std::cout << "failed to add server: "
<< ret3->get_result_str() << std::endl;
std::this_thread::sleep_for(std::chrono::milliseconds(100));
ret3 = s2.raft_instance->add_srv(third_config);
}
while (s3.raft_instance->get_leader() != 2)
{
std::cout << "Waiting s3 to join to s2 quorum\n";
std::this_thread::sleep_for(std::chrono::milliseconds(100));
}
/// S2 is leader
EXPECT_EQ(s1.raft_instance->get_leader(), 2);
EXPECT_EQ(s2.raft_instance->get_leader(), 2);
EXPECT_EQ(s3.raft_instance->get_leader(), 2);
std::cerr << "Starting to add entries\n";
auto entry = getBuffer(1);
auto ret = s2.raft_instance->append_entries({entry});
while (!ret->get_accepted() || ret->get_result_code() != nuraft::cmd_result_code::OK)
{
std::cerr << ret->get_accepted() << "failed to replicate: entry 1" << ret->get_result_code() << std::endl;
ret = s2.raft_instance->append_entries({entry});
}
while (s1.state_machine->getValue() != 1)
{
std::cout << "Waiting s1 to apply entry\n";
std::this_thread::sleep_for(std::chrono::milliseconds(100));
}
while (s2.state_machine->getValue() != 1)
{
std::cout << "Waiting s2 to apply entry\n";
std::this_thread::sleep_for(std::chrono::milliseconds(100));
}
while (s3.state_machine->getValue() != 1)
{
std::cout << "Waiting s3 to apply entry\n";
std::this_thread::sleep_for(std::chrono::milliseconds(100));
}
EXPECT_EQ(s1.state_machine->getValue(), 1);
EXPECT_EQ(s2.state_machine->getValue(), 1);
EXPECT_EQ(s3.state_machine->getValue(), 1);
auto non_leader_entry = getBuffer(3);
auto ret_non_leader1 = s1.raft_instance->append_entries({non_leader_entry});
EXPECT_FALSE(ret_non_leader1->get_accepted());
auto ret_non_leader3 = s3.raft_instance->append_entries({non_leader_entry});
EXPECT_FALSE(ret_non_leader3->get_accepted());
auto leader_entry = getBuffer(77);
auto ret_leader = s2.raft_instance->append_entries({leader_entry});
while (!ret_leader->get_accepted() || ret_leader->get_result_code() != nuraft::cmd_result_code::OK)
{
std::cerr << "failed to replicate: entry 78" << ret_leader->get_result_code() << std::endl;
ret_leader = s2.raft_instance->append_entries({leader_entry});
}
while (s1.state_machine->getValue() != 78)
{
std::cout << "Waiting s1 to apply entry\n";
std::this_thread::sleep_for(std::chrono::milliseconds(100));
}
while (s3.state_machine->getValue() != 78)
{
std::cout << "Waiting s3 to apply entry\n";
std::this_thread::sleep_for(std::chrono::milliseconds(100));
}
EXPECT_EQ(s1.state_machine->getValue(), 78);
EXPECT_EQ(s2.state_machine->getValue(), 78);
EXPECT_EQ(s3.state_machine->getValue(), 78);
s1.launcher.shutdown(5);
s2.launcher.shutdown(5);
s3.launcher.shutdown(5);
}
nuraft::ptr<nuraft::buffer> getZooKeeperLogEntry(int64_t session_id, const Coordination::ZooKeeperRequestPtr & request) nuraft::ptr<nuraft::buffer> getZooKeeperLogEntry(int64_t session_id, const Coordination::ZooKeeperRequestPtr & request)
{ {
DB::WriteBufferFromNuraftBuffer buf; DB::WriteBufferFromNuraftBuffer buf;

View File

@ -1,12 +1,15 @@
#include <DataStreams/CheckConstraintsBlockOutputStream.h>
#include <Parsers/formatAST.h>
#include <Interpreters/ExpressionActions.h>
#include <Columns/ColumnsCommon.h>
#include <Columns/ColumnsNumber.h>
#include <Columns/ColumnConst.h>
#include <Common/assert_cast.h> #include <Common/assert_cast.h>
#include <Common/quoteString.h> #include <Common/quoteString.h>
#include <Common/FieldVisitors.h> #include <Common/FieldVisitors.h>
#include <DataTypes/DataTypeNullable.h>
#include <DataTypes/DataTypeLowCardinality.h>
#include <Columns/ColumnsCommon.h>
#include <Columns/ColumnsNumber.h>
#include <Columns/ColumnConst.h>
#include <Columns/ColumnNullable.h>
#include <DataStreams/CheckConstraintsBlockOutputStream.h>
#include <Parsers/formatAST.h>
#include <Interpreters/ExpressionActions.h>
namespace DB namespace DB
@ -15,7 +18,7 @@ namespace DB
namespace ErrorCodes namespace ErrorCodes
{ {
extern const int VIOLATED_CONSTRAINT; extern const int VIOLATED_CONSTRAINT;
extern const int LOGICAL_ERROR; extern const int UNSUPPORTED_METHOD;
} }
@ -48,62 +51,75 @@ void CheckConstraintsBlockOutputStream::write(const Block & block)
ColumnWithTypeAndName res_column = block_to_calculate.getByName(constraint_ptr->expr->getColumnName()); ColumnWithTypeAndName res_column = block_to_calculate.getByName(constraint_ptr->expr->getColumnName());
if (!isUInt8(res_column.type)) auto result_type = removeNullable(removeLowCardinality(res_column.type));
throw Exception(ErrorCodes::LOGICAL_ERROR, "Constraint {} does not return a value of type UInt8",
if (!isUInt8(result_type))
throw Exception(ErrorCodes::UNSUPPORTED_METHOD, "Constraint {} does not return a value of type UInt8",
backQuote(constraint_ptr->name)); backQuote(constraint_ptr->name));
if (const ColumnConst * res_const = typeid_cast<const ColumnConst *>(res_column.column.get())) auto result_column = res_column.column->convertToFullColumnIfConst()->convertToFullColumnIfLowCardinality();
{
UInt8 value = res_const->getValue<UInt64>();
/// Is violated. if (const auto * column_nullable = checkAndGetColumn<ColumnNullable>(*result_column))
if (!value) {
{ const auto & nested_column = column_nullable->getNestedColumnPtr();
throw Exception(ErrorCodes::VIOLATED_CONSTRAINT,
"Constraint {} for table {} is violated, because it is a constant expression returning 0. " /// Check if constraint value is nullable
"It is most likely an error in table definition.", const auto & null_map = column_nullable->getNullMapColumn();
backQuote(constraint_ptr->name), table_id.getNameForLogs()); const PaddedPODArray<UInt8> & data = null_map.getData();
} bool null_map_contains_null = !memoryIsZero(data.raw_data(), data.size() * sizeof(UInt8));
if (null_map_contains_null)
throw Exception(
ErrorCodes::VIOLATED_CONSTRAINT,
"Constraint {} for table {} is violated. Expression: ({})."\
"Constraint expression returns nullable column that contains null value",
backQuote(constraint_ptr->name),
table_id.getNameForLogs(),
serializeAST(*(constraint_ptr->expr), true));
result_column = nested_column;
} }
else
const ColumnUInt8 & res_column_uint8 = assert_cast<const ColumnUInt8 &>(*result_column);
const UInt8 * data = res_column_uint8.getData().data();
size_t size = res_column_uint8.size();
/// Is violated.
if (!memoryIsByte(data, size, 1))
{ {
const ColumnUInt8 & res_column_uint8 = assert_cast<const ColumnUInt8 &>(*res_column.column); size_t row_idx = 0;
for (; row_idx < size; ++row_idx)
if (data[row_idx] != 1)
break;
const UInt8 * data = res_column_uint8.getData().data(); Names related_columns = constraint_expr->getRequiredColumns();
size_t size = res_column_uint8.size();
/// Is violated. bool first = true;
if (!memoryIsByte(data, size, 1)) String column_values_msg;
constexpr size_t approx_bytes_for_col = 32;
column_values_msg.reserve(approx_bytes_for_col * related_columns.size());
for (const auto & name : related_columns)
{ {
size_t row_idx = 0; const IColumn & column = *block.getByName(name).column;
for (; row_idx < size; ++row_idx) assert(row_idx < column.size());
if (data[row_idx] != 1)
break;
Names related_columns = constraint_expr->getRequiredColumns(); if (!first)
column_values_msg.append(", ");
bool first = true; column_values_msg.append(backQuoteIfNeed(name));
String column_values_msg; column_values_msg.append(" = ");
constexpr size_t approx_bytes_for_col = 32; column_values_msg.append(applyVisitor(FieldVisitorToString(), column[row_idx]));
column_values_msg.reserve(approx_bytes_for_col * related_columns.size()); first = false;
for (const auto & name : related_columns)
{
const IColumn & column = *block.getByName(name).column;
assert(row_idx < column.size());
if (!first)
column_values_msg.append(", ");
column_values_msg.append(backQuoteIfNeed(name));
column_values_msg.append(" = ");
column_values_msg.append(applyVisitor(FieldVisitorToString(), column[row_idx]));
first = false;
}
throw Exception(ErrorCodes::VIOLATED_CONSTRAINT,
"Constraint {} for table {} is violated at row {}. Expression: ({}). Column values: {}",
backQuote(constraint_ptr->name), table_id.getNameForLogs(), rows_written + row_idx + 1,
serializeAST(*(constraint_ptr->expr), true), column_values_msg);
} }
throw Exception(
ErrorCodes::VIOLATED_CONSTRAINT,
"Constraint {} for table {} is violated at row {}. Expression: ({}). Column values: {}",
backQuote(constraint_ptr->name),
table_id.getNameForLogs(),
rows_written + row_idx + 1,
serializeAST(*(constraint_ptr->expr), true),
column_values_msg);
} }
} }
} }

View File

@ -47,11 +47,13 @@ void registerDictionarySourceMysql(DictionarySourceFactory & factory)
# include <common/logger_useful.h> # include <common/logger_useful.h>
# include <Formats/MySQLBlockInputStream.h> # include <Formats/MySQLBlockInputStream.h>
# include "readInvalidateQuery.h" # include "readInvalidateQuery.h"
# include <mysqlxx/Exception.h>
# include <mysqlxx/PoolFactory.h> # include <mysqlxx/PoolFactory.h>
namespace DB namespace DB
{ {
static const UInt64 max_block_size = 8192; static const UInt64 max_block_size = 8192;
static const size_t default_num_tries_on_connection_loss = 3;
MySQLDictionarySource::MySQLDictionarySource( MySQLDictionarySource::MySQLDictionarySource(
@ -72,7 +74,10 @@ MySQLDictionarySource::MySQLDictionarySource(
, query_builder{dict_struct, db, "", table, where, IdentifierQuotingStyle::Backticks} , query_builder{dict_struct, db, "", table, where, IdentifierQuotingStyle::Backticks}
, load_all_query{query_builder.composeLoadAllQuery()} , load_all_query{query_builder.composeLoadAllQuery()}
, invalidate_query{config.getString(config_prefix + ".invalidate_query", "")} , invalidate_query{config.getString(config_prefix + ".invalidate_query", "")}
, close_connection{config.getBool(config_prefix + ".close_connection", false) || config.getBool(config_prefix + ".share_connection", false)} , close_connection(
config.getBool(config_prefix + ".close_connection", false) || config.getBool(config_prefix + ".share_connection", false))
, max_tries_for_mysql_block_input_stream(
config.getBool(config_prefix + ".fail_on_connection_loss", false) ? 1 : default_num_tries_on_connection_loss)
{ {
} }
@ -94,6 +99,7 @@ MySQLDictionarySource::MySQLDictionarySource(const MySQLDictionarySource & other
, invalidate_query{other.invalidate_query} , invalidate_query{other.invalidate_query}
, invalidate_query_response{other.invalidate_query_response} , invalidate_query_response{other.invalidate_query_response}
, close_connection{other.close_connection} , close_connection{other.close_connection}
, max_tries_for_mysql_block_input_stream{other.max_tries_for_mysql_block_input_stream}
{ {
} }
@ -114,13 +120,41 @@ std::string MySQLDictionarySource::getUpdateFieldAndDate()
} }
} }
BlockInputStreamPtr MySQLDictionarySource::retriedCreateMySqlBIStream(const std::string & data_fetch_query_str, const size_t max_tries)
{
size_t count_connection_lost = 0;
while (true)
{
auto connection = pool.get();
try
{
return std::make_shared<MySQLBlockInputStream>(
connection, data_fetch_query_str, sample_block, max_block_size, close_connection);
}
catch (const mysqlxx::ConnectionLost & ecl) /// There are two retriable failures: CR_SERVER_GONE_ERROR, CR_SERVER_LOST
{
if (++count_connection_lost < max_tries)
{
LOG_WARNING(log, ecl.displayText());
LOG_WARNING(log, "Lost connection ({}/{}). Trying to reconnect...", count_connection_lost, max_tries);
continue;
}
LOG_ERROR(log, "Failed ({}/{}) to create BlockInputStream for MySQL dictionary source.", count_connection_lost, max_tries);
throw;
}
}
}
BlockInputStreamPtr MySQLDictionarySource::loadAll() BlockInputStreamPtr MySQLDictionarySource::loadAll()
{ {
auto connection = pool.get(); auto connection = pool.get();
last_modification = getLastModification(connection, false); last_modification = getLastModification(connection, false);
LOG_TRACE(log, load_all_query); LOG_TRACE(log, load_all_query);
return std::make_shared<MySQLBlockInputStream>(connection, load_all_query, sample_block, max_block_size, close_connection); return retriedCreateMySqlBIStream(load_all_query, max_tries_for_mysql_block_input_stream);
} }
BlockInputStreamPtr MySQLDictionarySource::loadUpdatedAll() BlockInputStreamPtr MySQLDictionarySource::loadUpdatedAll()
@ -130,7 +164,7 @@ BlockInputStreamPtr MySQLDictionarySource::loadUpdatedAll()
std::string load_update_query = getUpdateFieldAndDate(); std::string load_update_query = getUpdateFieldAndDate();
LOG_TRACE(log, load_update_query); LOG_TRACE(log, load_update_query);
return std::make_shared<MySQLBlockInputStream>(connection, load_update_query, sample_block, max_block_size, close_connection); return retriedCreateMySqlBIStream(load_update_query, max_tries_for_mysql_block_input_stream);
} }
BlockInputStreamPtr MySQLDictionarySource::loadIds(const std::vector<UInt64> & ids) BlockInputStreamPtr MySQLDictionarySource::loadIds(const std::vector<UInt64> & ids)
@ -138,7 +172,7 @@ BlockInputStreamPtr MySQLDictionarySource::loadIds(const std::vector<UInt64> & i
/// We do not log in here and do not update the modification time, as the request can be large, and often called. /// We do not log in here and do not update the modification time, as the request can be large, and often called.
const auto query = query_builder.composeLoadIdsQuery(ids); const auto query = query_builder.composeLoadIdsQuery(ids);
return std::make_shared<MySQLBlockInputStream>(pool.get(), query, sample_block, max_block_size, close_connection); return retriedCreateMySqlBIStream(query, max_tries_for_mysql_block_input_stream);
} }
BlockInputStreamPtr MySQLDictionarySource::loadKeys(const Columns & key_columns, const std::vector<size_t> & requested_rows) BlockInputStreamPtr MySQLDictionarySource::loadKeys(const Columns & key_columns, const std::vector<size_t> & requested_rows)
@ -146,7 +180,7 @@ BlockInputStreamPtr MySQLDictionarySource::loadKeys(const Columns & key_columns,
/// We do not log in here and do not update the modification time, as the request can be large, and often called. /// We do not log in here and do not update the modification time, as the request can be large, and often called.
const auto query = query_builder.composeLoadKeysQuery(key_columns, requested_rows, ExternalQueryBuilder::AND_OR_CHAIN); const auto query = query_builder.composeLoadKeysQuery(key_columns, requested_rows, ExternalQueryBuilder::AND_OR_CHAIN);
return std::make_shared<MySQLBlockInputStream>(pool.get(), query, sample_block, max_block_size, close_connection); return retriedCreateMySqlBIStream(query, max_tries_for_mysql_block_input_stream);
} }
bool MySQLDictionarySource::isModified() const bool MySQLDictionarySource::isModified() const

View File

@ -69,6 +69,9 @@ private:
// execute invalidate_query. expects single cell in result // execute invalidate_query. expects single cell in result
std::string doInvalidateQuery(const std::string & request) const; std::string doInvalidateQuery(const std::string & request) const;
/// A helper method for recovering from "Lost connection to MySQL server during query" errors
BlockInputStreamPtr retriedCreateMySqlBIStream(const std::string & query_str, const size_t max_tries);
Poco::Logger * log; Poco::Logger * log;
std::chrono::time_point<std::chrono::system_clock> update_time; std::chrono::time_point<std::chrono::system_clock> update_time;
@ -86,6 +89,7 @@ private:
std::string invalidate_query; std::string invalidate_query;
mutable std::string invalidate_query_response; mutable std::string invalidate_query_response;
const bool close_connection; const bool close_connection;
const size_t max_tries_for_mysql_block_input_stream;
}; };
} }

View File

@ -25,16 +25,18 @@ ColumnPtr ExecutableFunctionJoinGet<or_null>::execute(const ColumnsWithTypeAndNa
auto key = arguments[i]; auto key = arguments[i];
keys.emplace_back(std::move(key)); keys.emplace_back(std::move(key));
} }
return join->joinGet(keys, result_columns).column; return storage_join->joinGet(keys, result_columns).column;
} }
template <bool or_null> template <bool or_null>
ExecutableFunctionImplPtr FunctionJoinGet<or_null>::prepare(const ColumnsWithTypeAndName &) const ExecutableFunctionImplPtr FunctionJoinGet<or_null>::prepare(const ColumnsWithTypeAndName &) const
{ {
return std::make_unique<ExecutableFunctionJoinGet<or_null>>(join, DB::Block{{return_type->createColumn(), return_type, attr_name}}); Block result_columns {{return_type->createColumn(), return_type, attr_name}};
return std::make_unique<ExecutableFunctionJoinGet<or_null>>(table_lock, storage_join, result_columns);
} }
static auto getJoin(const ColumnsWithTypeAndName & arguments, const Context & context) static std::pair<std::shared_ptr<StorageJoin>, String>
getJoin(const ColumnsWithTypeAndName & arguments, const Context & context)
{ {
String join_name; String join_name;
if (const auto * name_col = checkAndGetColumnConst<ColumnString>(arguments[0].column.get())) if (const auto * name_col = checkAndGetColumnConst<ColumnString>(arguments[0].column.get()))
@ -87,13 +89,12 @@ FunctionBaseImplPtr JoinGetOverloadResolver<or_null>::build(const ColumnsWithTyp
+ ", should be greater or equal to 3", + ", should be greater or equal to 3",
ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH);
auto [storage_join, attr_name] = getJoin(arguments, context); auto [storage_join, attr_name] = getJoin(arguments, context);
auto join = storage_join->getJoin();
DataTypes data_types(arguments.size() - 2); DataTypes data_types(arguments.size() - 2);
for (size_t i = 2; i < arguments.size(); ++i) for (size_t i = 2; i < arguments.size(); ++i)
data_types[i - 2] = arguments[i].type; data_types[i - 2] = arguments[i].type;
auto return_type = join->joinGetCheckAndGetReturnType(data_types, attr_name, or_null); auto return_type = storage_join->joinGetCheckAndGetReturnType(data_types, attr_name, or_null);
auto table_lock = storage_join->lockForShare(context.getInitialQueryId(), context.getSettingsRef().lock_acquire_timeout); auto table_lock = storage_join->lockForShare(context.getInitialQueryId(), context.getSettingsRef().lock_acquire_timeout);
return std::make_unique<FunctionJoinGet<or_null>>(table_lock, storage_join, join, attr_name, data_types, return_type); return std::make_unique<FunctionJoinGet<or_null>>(table_lock, storage_join, attr_name, data_types, return_type);
} }
void registerFunctionJoinGet(FunctionFactory & factory) void registerFunctionJoinGet(FunctionFactory & factory)

View File

@ -9,14 +9,20 @@ namespace DB
class Context; class Context;
class HashJoin; class HashJoin;
using HashJoinPtr = std::shared_ptr<HashJoin>; class StorageJoin;
using StorageJoinPtr = std::shared_ptr<StorageJoin>;
template <bool or_null> template <bool or_null>
class ExecutableFunctionJoinGet final : public IExecutableFunctionImpl class ExecutableFunctionJoinGet final : public IExecutableFunctionImpl
{ {
public: public:
ExecutableFunctionJoinGet(HashJoinPtr join_, const DB::Block & result_columns_) ExecutableFunctionJoinGet(TableLockHolder table_lock_,
: join(std::move(join_)), result_columns(result_columns_) {} StorageJoinPtr storage_join_,
const DB::Block & result_columns_)
: table_lock(std::move(table_lock_))
, storage_join(std::move(storage_join_))
, result_columns(result_columns_)
{}
static constexpr auto name = or_null ? "joinGetOrNull" : "joinGet"; static constexpr auto name = or_null ? "joinGetOrNull" : "joinGet";
@ -29,7 +35,8 @@ public:
String getName() const override { return name; } String getName() const override { return name; }
private: private:
HashJoinPtr join; TableLockHolder table_lock;
StorageJoinPtr storage_join;
DB::Block result_columns; DB::Block result_columns;
}; };
@ -39,12 +46,11 @@ class FunctionJoinGet final : public IFunctionBaseImpl
public: public:
static constexpr auto name = or_null ? "joinGetOrNull" : "joinGet"; static constexpr auto name = or_null ? "joinGetOrNull" : "joinGet";
FunctionJoinGet(TableLockHolder table_lock_, StoragePtr storage_join_, FunctionJoinGet(TableLockHolder table_lock_,
HashJoinPtr join_, String attr_name_, StorageJoinPtr storage_join_, String attr_name_,
DataTypes argument_types_, DataTypePtr return_type_) DataTypes argument_types_, DataTypePtr return_type_)
: table_lock(std::move(table_lock_)) : table_lock(std::move(table_lock_))
, storage_join(std::move(storage_join_)) , storage_join(storage_join_)
, join(std::move(join_))
, attr_name(std::move(attr_name_)) , attr_name(std::move(attr_name_))
, argument_types(std::move(argument_types_)) , argument_types(std::move(argument_types_))
, return_type(std::move(return_type_)) , return_type(std::move(return_type_))
@ -60,8 +66,7 @@ public:
private: private:
TableLockHolder table_lock; TableLockHolder table_lock;
StoragePtr storage_join; StorageJoinPtr storage_join;
HashJoinPtr join;
const String attr_name; const String attr_name;
DataTypes argument_types; DataTypes argument_types;
DataTypePtr return_type; DataTypePtr return_type;

View File

@ -0,0 +1,358 @@
#include <Columns/ColumnString.h>
#include <Functions/FunctionFactory.h>
#include <Functions/FunctionHelpers.h>
#include <Functions/IFunctionImpl.h>
#include <common/find_symbols.h>
#include <Common/StringUtils/StringUtils.h>
/** A function to extract text from HTML or XHTML.
* It does not necessarily 100% conforms to any of the HTML, XML or XHTML standards,
* but the implementation is reasonably accurate and it is fast.
*
* The rules are the following:
*
* 1. Comments are skipped. Example: <!-- test -->
* Comment must end with -->. Nested comments are not possible.
* Note: constructions like <!--> <!---> are not valid comments in HTML but will be skipped by other rules.
*
* 2. CDATA is pasted verbatim.
* Note: CDATA is XML/XHTML specific. But we still process it for "best-effort" approach.
*
* 3. 'script' and 'style' elements are removed with all their content.
* Note: it's assumed that closing tag cannot appear inside content.
* For example, in JS string literal is has to be escaped as "<\/script>".
* Note: comments and CDATA is possible inside script or style - then closing tags are not searched inside CDATA.
* Example: <script><![CDATA[</script>]]></script>
* But still searched inside comments. Sometimes it becomes complicated:
* <script>var x = "<!--"; </script> var y = "-->"; alert(x + y);</script>
* Note: script and style can be the names of XML namespaces - then they are not treat like usual script or style.
* Example: <script:a>Hello</script:a>.
* Note: whitespaces are possible after closing tag name: </script > but not before: < / script>.
*
* 4. Other tags or tag-like elements are skipped without inner content.
* Example: <a>.</a>
* Note: it's expected that this HTML is illegal: <a test=">"></a>
* Note: it will also skip something like tags: <>, <!>, etc.
* Note: tag without end will be skipped to the end of input: <hello
* >
* 5. HTML and XML entities are not decoded.
* It should be processed by separate function.
*
* 6. Whitespaces in text are collapsed or inserted by specific rules.
* Whitespaces at beginning and at the end are removed.
* Consecutive whitespaces are collapsed.
* But if text is separated by other elements and there is no whitespace, it is inserted.
* It may be unnatural, examples: Hello<b>world</b>, Hello<!-- -->world
* - in HTML there will be no whitespace, but the function will insert it.
* But also consider: Hello<p>world</p>, Hello<br>world.
* This behaviour is reasonable for data analysis, e.g. convert HTML to a bag of words.
*
* 7. Also note that correct handling of whitespaces would require
* support of <pre></pre> and CSS display and white-space properties.
*
* Usage example:
*
* SELECT extractTextFromHTML(html) FROM url('https://yandex.ru/', RawBLOB, 'html String')
*
* - ClickHouse has embedded web browser.
*/
namespace DB
{
namespace ErrorCodes
{
extern const int ILLEGAL_COLUMN;
extern const int ILLEGAL_TYPE_OF_ARGUMENT;
}
namespace
{
inline bool startsWith(const char * s, const char * end, const char * prefix)
{
return s + strlen(prefix) < end && 0 == memcmp(s, prefix, strlen(prefix));
}
inline bool checkAndSkip(const char * __restrict & s, const char * end, const char * prefix)
{
if (startsWith(s, end, prefix))
{
s += strlen(prefix);
return true;
}
return false;
}
bool processComment(const char * __restrict & src, const char * end)
{
if (!checkAndSkip(src, end, "<!--"))
return false;
while (true)
{
const char * gt = find_first_symbols<'>'>(src, end);
if (gt >= end)
break;
if (gt > src + strlen("--") && gt[-1] == '-' && gt[-2] == '-')
{
src = gt + 1;
break;
}
src = gt + 1;
}
return true;
}
bool processCDATA(const char * __restrict & src, const char * end, char * __restrict & dst)
{
if (!checkAndSkip(src, end, "<![CDATA["))
return false;
const char * gt = src;
while (true)
{
gt = find_first_symbols<'>'>(gt, end);
if (gt >= end)
break;
if (gt[-1] == ']' && gt[-2] == ']')
{
if (dst)
{
size_t bytes_to_copy = gt - src - strlen("]]");
memcpy(dst, src, bytes_to_copy);
dst += bytes_to_copy;
}
src = gt + 1;
break;
}
++gt;
}
return true;
}
bool processElementAndSkipContent(const char * __restrict & src, const char * end, const char * tag_name)
{
const auto * old_src = src;
if (!(src < end && *src == '<'))
return false;
++src;
if (!checkAndSkip(src, end, tag_name))
{
src = old_src;
return false;
}
if (src >= end)
return false;
if (!(isWhitespaceASCII(*src) || *src == '>'))
{
src = old_src;
return false;
}
const char * gt = find_first_symbols<'>'>(src, end);
if (gt >= end)
return false;
src = gt + 1;
while (true)
{
const char * lt = find_first_symbols<'<'>(src, end);
src = lt;
if (src + 1 >= end)
break;
++src;
/// Skip CDATA
if (*src == '!')
{
--src;
char * dst = nullptr;
if (processCDATA(src, end, dst))
continue;
++src;
}
if (*src != '/')
continue;
++src;
if (checkAndSkip(src, end, tag_name))
{
while (src < end && isWhitespaceASCII(*src))
++src;
if (src >= end)
break;
if (*src == '>')
{
++src;
break;
}
}
}
return true;
}
bool skipTag(const char * __restrict & src, const char * end)
{
if (src < end && *src == '<')
{
src = find_first_symbols<'>'>(src, end);
if (src < end)
++src;
return true;
}
return false;
}
void copyText(const char * __restrict & src, const char * end, char * __restrict & dst, bool needs_whitespace)
{
while (src < end && isWhitespaceASCII(*src))
++src;
const char * lt = find_first_symbols<'<'>(src, end);
if (needs_whitespace && src < lt)
{
*dst = ' ';
++dst;
}
while (true)
{
const char * ws = find_first_symbols<' ', '\t', '\n', '\r', '\f', '\v'>(src, lt);
size_t bytes_to_copy = ws - src;
memcpy(dst, src, bytes_to_copy);
dst += bytes_to_copy;
src = ws;
while (src < lt && isWhitespaceASCII(*src))
++src;
if (src < lt)
{
*dst = ' ';
++dst;
}
else
{
break;
}
}
src = lt;
}
size_t extract(const char * __restrict src, size_t size, char * __restrict dst)
{
/** There are the following rules:
* - comments are removed with all their content;
* - elements 'script' and 'style' are removed with all their content;
* - for other elements tags are removed but content is processed as text;
* - CDATA should be copied verbatim;
*/
const char * end = src + size;
char * dst_begin = dst;
while (src < end)
{
bool needs_whitespace = dst != dst_begin && dst[-1] != ' ';
copyText(src, end, dst, needs_whitespace);
processComment(src, end)
|| processCDATA(src, end, dst)
|| processElementAndSkipContent(src, end, "script")
|| processElementAndSkipContent(src, end, "style")
|| skipTag(src, end);
}
return dst - dst_begin;
}
}
class FunctionExtractTextFromHTML : public IFunction
{
public:
static constexpr auto name = "extractTextFromHTML";
static FunctionPtr create(const Context &) { return std::make_shared<FunctionExtractTextFromHTML>(); }
String getName() const override { return name; }
size_t getNumberOfArguments() const override { return 1; }
bool useDefaultImplementationForConstants() const override { return true; }
DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override
{
if (!isString(arguments[0]))
throw Exception(
"Illegal type " + arguments[0]->getName() + " of argument of function " + getName(), ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
return arguments[0];
}
ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t rows) const override
{
const ColumnString * src = checkAndGetColumn<ColumnString>(arguments[0].column.get());
if (!src)
throw Exception("First argument for function " + getName() + " must be string.", ErrorCodes::ILLEGAL_COLUMN);
const ColumnString::Chars & src_chars = src->getChars();
const ColumnString::Offsets & src_offsets = src->getOffsets();
auto res = ColumnString::create();
ColumnString::Chars & res_chars = res->getChars();
ColumnString::Offsets & res_offsets = res->getOffsets();
res_chars.resize(src_chars.size());
res_offsets.resize(src_offsets.size());
ColumnString::Offset src_offset = 0;
ColumnString::Offset res_offset = 0;
for (size_t i = 0; i < rows; ++i)
{
auto next_src_offset = src_offsets[i];
res_offset += extract(
reinterpret_cast<const char *>(&src_chars[src_offset]),
next_src_offset - src_offset - 1,
reinterpret_cast<char *>(&res_chars[res_offset]));
res_chars[res_offset] = 0;
++res_offset;
res_offsets[i] = res_offset;
src_offset = next_src_offset;
}
res_chars.resize(res_offset);
return res;
}
};
void registerFunctionExtractTextFromHTML(FunctionFactory & factory)
{
factory.registerFunction<FunctionExtractTextFromHTML>();
}
}

View File

@ -1,582 +0,0 @@
#include <Columns/ColumnString.h>
#include <Functions/FunctionFactory.h>
#include <Functions/FunctionHelpers.h>
#include <Functions/IFunctionImpl.h>
#include <utility>
#include <vector>
#include <algorithm>
#if USE_HYPERSCAN
# include <hs.h>
namespace DB
{
namespace ErrorCodes
{
extern const int ILLEGAL_COLUMN;
extern const int ILLEGAL_TYPE_OF_ARGUMENT;
extern const int CANNOT_ALLOCATE_MEMORY;
extern const int NOT_IMPLEMENTED;
}
namespace
{
struct HxCoarseParseImpl
{
private:
struct SpanInfo
{
SpanInfo(): id(0), match_space(std::pair<unsigned long long, unsigned long long>(0, 0)) {} // NOLINT
SpanInfo(unsigned int matchId, std::pair<unsigned long long, unsigned long long> matchSpan): id(matchId), match_space(matchSpan){} // NOLINT
SpanInfo(const SpanInfo& obj)
{
id = obj.id;
match_space = obj.match_space;
}
SpanInfo& operator=(const SpanInfo& obj) = default;
unsigned int id;
std::pair<unsigned long long, unsigned long long> match_space; // NOLINT
};
using SpanElement = std::vector<SpanInfo>;
struct Span
{
Span(): set_script(false), set_style(false), set_semi(false), is_finding_cdata(false) {}
SpanElement copy_stack; // copy area
SpanElement tag_stack; // regexp area
SpanInfo script_ptr; // script pointer
bool set_script; // whether set script
SpanInfo style_ptr; // style pointer
bool set_style; // whether set style
SpanInfo semi_ptr; // tag ptr
bool set_semi; // whether set semi
bool is_finding_cdata;
};
static inline void copyZone(
ColumnString::Offset& current_dst_string_offset,
ColumnString::Offset& current_copy_loc,
ColumnString::Chars& dst_chars,
const ColumnString::Chars& src_chars,
size_t bytes_to_copy,
unsigned is_space
)
{
bool is_last_space = false;
if (current_dst_string_offset == 0 || dst_chars[current_dst_string_offset - 1] == 0 || dst_chars[current_dst_string_offset - 1] == ' ')
{
is_last_space = true;
}
if (bytes_to_copy == 0)
{
if (is_space && !is_last_space)
{
dst_chars[current_dst_string_offset++] = ' ';
}
}
else
{
if (is_last_space && src_chars[current_copy_loc] == ' ')
{
--bytes_to_copy;
++current_copy_loc;
}
if (bytes_to_copy > 0)
{
memcpySmallAllowReadWriteOverflow15(
&dst_chars[current_dst_string_offset], &src_chars[current_copy_loc], bytes_to_copy);
current_dst_string_offset += bytes_to_copy;
}
// separator is space and last character is not space.
if (is_space && !(current_dst_string_offset == 0 || dst_chars[current_dst_string_offset - 1] == 0 || dst_chars[current_dst_string_offset - 1] == ' '))
{
dst_chars[current_dst_string_offset++] = ' ';
}
}
// return;
}
static inline void popArea(SpanElement& stack, unsigned long long from, unsigned long long to) //NOLINT
{
while (!stack.empty())
{
if (to > stack.back().match_space.second && from < stack.back().match_space.second)
{
stack.pop_back();
}
else
{
break;
}
}
// return;
}
static void dealCommonTag(Span* matches)
{
while (!matches->copy_stack.empty() && matches->copy_stack.back().id != 10)
{
matches->copy_stack.pop_back();
}
if (!matches->copy_stack.empty())
{
matches->copy_stack.pop_back();
}
unsigned long long from; // NOLINT
unsigned long long to; // NOLINT
unsigned id;
for (auto begin = matches->tag_stack.begin(); begin != matches->tag_stack.end(); ++begin)
{
from = begin->match_space.first;
to = begin->match_space.second;
id = begin->id;
switch (id)
{
case 12:
case 13:
{
popArea(matches->copy_stack, from, to);
if (matches->copy_stack.empty() || from >= matches->copy_stack.back().match_space.second)
matches->copy_stack.push_back(SpanInfo(id, std::make_pair(from, to)));
break;
}
case 0:
case 2:
case 3:
case 4:
case 5:
case 6:
case 7:
case 8:
case 9:
case 10:
{
if (!matches->set_semi || (matches->set_semi && from == matches->semi_ptr.match_space.first))
{
matches->set_semi = true;
matches->semi_ptr = SpanInfo(id, std::make_pair(from, to));
}
break;
}
case 1:
{
if (matches->set_semi)
{
switch (matches->semi_ptr.id)
{
case 0:
case 2:
case 3:
case 6:
case 7:
case 10:
{
if (matches->semi_ptr.id == 2 || (matches->semi_ptr.id == 3 && matches->semi_ptr.match_space.second == from))
{
if (!matches->set_script)
{
matches->set_script = true;
matches->script_ptr = SpanInfo(matches->semi_ptr.id, std::make_pair(matches->semi_ptr.match_space.first, to));
}
}
else if (matches->semi_ptr.id == 6 || (matches->semi_ptr.id == 7 && matches->semi_ptr.match_space.second == from))
{
if (!matches->set_style)
{
matches->set_style = true;
matches->style_ptr = SpanInfo(matches->semi_ptr.id, std::make_pair(matches->semi_ptr.match_space.first, to));
}
}
popArea(matches->copy_stack, matches->semi_ptr.match_space.first, to);
matches->copy_stack.push_back(SpanInfo(0, std::make_pair(matches->semi_ptr.match_space.first, to)));
matches->set_semi = false;
break;
}
case 4:
case 5:
case 8:
case 9:
{
SpanInfo complete_zone;
complete_zone.match_space.second = to;
if (matches->set_script && (matches->semi_ptr.id == 4 || (matches->semi_ptr.id == 5 && matches->semi_ptr.match_space.second == from)))
{
complete_zone.id = matches->script_ptr.id;
complete_zone.match_space.first = matches->script_ptr.match_space.first;
matches->set_script = false;
}
else if (matches->set_style && (matches->semi_ptr.id == 8 || (matches->semi_ptr.id == 9 && matches->semi_ptr.match_space.second == from)))
{
complete_zone.id = matches->style_ptr.id;
complete_zone.match_space.first = matches->style_ptr.match_space.first;
matches->set_style = false;
}
else
{
complete_zone.id = matches->semi_ptr.id;
complete_zone.match_space.first = matches->semi_ptr.match_space.first;
}
popArea(matches->copy_stack, complete_zone.match_space.first, complete_zone.match_space.second);
matches->copy_stack.push_back(complete_zone);
matches->set_semi = false;
break;
}
}
}
break;
}
default:
{
break;
}
}
}
// return;
}
static int spanCollect(unsigned int id,
unsigned long long from, // NOLINT
unsigned long long to, // NOLINT
unsigned int , void * ctx)
{
Span* matches = static_cast<Span*>(ctx);
from = id == 12 ? from : to - patterns_length[id];
if (matches->is_finding_cdata)
{
if (id == 11)
{
matches->copy_stack.push_back(SpanInfo(id, std::make_pair(from, to)));
matches->is_finding_cdata = false;
matches->tag_stack.clear();
if (matches->semi_ptr.id == 10)
{
matches->set_semi = false;
}
}
else if (id == 12 || id == 13)
{
popArea(matches->copy_stack, from, to);
if (matches->copy_stack.empty() || from >= matches->copy_stack.back().match_space.second)
matches->copy_stack.push_back(SpanInfo(id, std::make_pair(from, to)));
popArea(matches->tag_stack, from, to);
if (matches->tag_stack.empty() || from >= matches->tag_stack.back().match_space.second)
matches->tag_stack.push_back(SpanInfo(id, std::make_pair(from, to)));
}
else
{
popArea(matches->tag_stack, from, to);
matches->tag_stack.push_back(SpanInfo(id, std::make_pair(from, to)));
}
}
else
{
switch (id)
{
case 12:
case 13:
{
popArea(matches->copy_stack, from, to);
if (matches->copy_stack.empty() || from >= matches->copy_stack.back().match_space.second)
matches->copy_stack.push_back(SpanInfo(id, std::make_pair(from, to)));
break;
}
case 0:
case 2:
case 3:
case 4:
case 5:
case 6:
case 7:
case 8:
case 9:
{
if (!matches->set_semi || (matches->set_semi && from == matches->semi_ptr.match_space.first))
{
matches->set_semi = true;
matches->semi_ptr = SpanInfo(id, std::make_pair(from, to));
}
break;
}
case 10:
{
if (!matches->set_semi || (matches->set_semi && from == matches->semi_ptr.match_space.first))
{
matches->set_semi = true;
matches->semi_ptr = SpanInfo(id, std::make_pair(from, to));
}
matches->is_finding_cdata = true;
matches->copy_stack.push_back(SpanInfo(id, std::make_pair(from, to)));
matches->tag_stack.push_back(SpanInfo(id, std::make_pair(from, to)));
break;
}
case 1:
{
if (matches->set_semi)
{
switch (matches->semi_ptr.id)
{
case 0:
case 2:
case 3:
case 6:
case 7:
case 10:
{
if (matches->semi_ptr.id == 2 || (matches->semi_ptr.id == 3 && matches->semi_ptr.match_space.second == from))
{
if (!matches->set_script)
{
matches->set_script = true;
matches->script_ptr = SpanInfo(matches->semi_ptr.id, std::make_pair(matches->semi_ptr.match_space.first, to));
}
}
else if (matches->semi_ptr.id == 6 || (matches->semi_ptr.id == 7 && matches->semi_ptr.match_space.second == from))
{
if (!matches->set_style)
{
matches->set_style = true;
matches->style_ptr = SpanInfo(matches->semi_ptr.id, std::make_pair(matches->semi_ptr.match_space.first, to));
}
}
popArea(matches->copy_stack, matches->semi_ptr.match_space.first, to);
matches->copy_stack.push_back(SpanInfo(matches->semi_ptr.id, std::make_pair(matches->semi_ptr.match_space.first, to)));
matches->set_semi = false;
break;
}
case 4:
case 5:
case 8:
case 9:
{
SpanInfo complete_zone;
complete_zone.match_space.second = to;
if (matches->set_script && (matches->semi_ptr.id == 4 || (matches->semi_ptr.id == 5 && matches->semi_ptr.match_space.second == from)))
{
complete_zone.id = matches->script_ptr.id;
complete_zone.match_space.first = matches->script_ptr.match_space.first;
matches->set_script = false;
}
else if (matches->set_style && (matches->semi_ptr.id == 8 || (matches->semi_ptr.id == 9 && matches->semi_ptr.match_space.second == from)))
{
complete_zone.id = matches->style_ptr.id;
complete_zone.match_space.first = matches->style_ptr.match_space.first;
matches->set_style = false;
}
else
{
complete_zone.id = matches->semi_ptr.id;
complete_zone.match_space.first = matches->semi_ptr.match_space.first;
}
popArea(matches->copy_stack, complete_zone.match_space.first, complete_zone.match_space.second);
matches->copy_stack.push_back(complete_zone);
matches->set_semi = false;
break;
}
}
}
break;
}
default:
{
break;
}
}
}
return 0;
}
#if USE_HYPERSCAN
static hs_database_t* buildDatabase(const std::vector<const char* > &expressions,
const std::vector<unsigned> &flags,
const std::vector<unsigned> &id,
unsigned int mode)
{
hs_database_t *db;
hs_compile_error_t *compile_err;
hs_error_t err;
err = hs_compile_multi(expressions.data(), flags.data(), id.data(),
expressions.size(), mode, nullptr, &db, &compile_err);
if (err != HS_SUCCESS)
{
hs_free_compile_error(compile_err);
throw Exception("Hyper scan database cannot be compiled.", ErrorCodes::CANNOT_ALLOCATE_MEMORY);
}
return db;
}
#endif
static std::vector<const char*> patterns;
static std::vector<std::size_t> patterns_length;
static std::vector<unsigned> patterns_flag;
static std::vector<unsigned> ids;
public:
static void executeInternal(
const ColumnString::Chars & src_chars,
const ColumnString::Offsets & src_offsets,
ColumnString::Chars & dst_chars,
ColumnString::Offsets & dst_offsets)
{
#if USE_HYPERSCAN
hs_database_t * db = buildDatabase(patterns, patterns_flag, ids, HS_MODE_BLOCK);
hs_scratch_t* scratch = nullptr;
if (hs_alloc_scratch(db, &scratch) != HS_SUCCESS)
{
hs_free_database(db);
throw Exception("Unable to allocate scratch space.", ErrorCodes::CANNOT_ALLOCATE_MEMORY);
}
dst_chars.resize(src_chars.size());
dst_offsets.resize(src_offsets.size());
ColumnString::Offset current_src_string_offset = 0;
ColumnString::Offset current_dst_string_offset = 0;
ColumnString::Offset current_copy_loc;
ColumnString::Offset current_copy_end;
unsigned is_space;
size_t bytes_to_copy;
Span match_zoneall;
for (size_t off = 0; off < src_offsets.size(); ++off)
{
hs_scan(db, reinterpret_cast<const char *>(&src_chars[current_src_string_offset]), src_offsets[off] - current_src_string_offset, 0, scratch, spanCollect, &match_zoneall);
if (match_zoneall.is_finding_cdata)
{
dealCommonTag(&match_zoneall);
}
SpanElement& match_zone = match_zoneall.copy_stack;
current_copy_loc = current_src_string_offset;
if (match_zone.empty())
{
current_copy_end = src_offsets[off];
is_space = 0;
}
else
{
current_copy_end = current_src_string_offset + match_zone.begin()->match_space.first;
is_space = (match_zone.begin()->id == 12 || match_zone.begin()->id == 13)?1:0;
}
bytes_to_copy = current_copy_end - current_copy_loc;
copyZone(current_dst_string_offset, current_copy_loc, dst_chars, src_chars, bytes_to_copy, is_space);
for (auto begin = match_zone.begin(); begin != match_zone.end(); ++begin)
{
current_copy_loc = current_src_string_offset + begin->match_space.second;
if (begin + 1 >= match_zone.end())
{
current_copy_end = src_offsets[off];
is_space = 0;
}
else
{
current_copy_end = current_src_string_offset + (begin+1)->match_space.first;
is_space = ((begin+1)->id == 12 || (begin+1)->id == 13)?1:0;
}
bytes_to_copy = current_copy_end - current_copy_loc;
copyZone(current_dst_string_offset, current_copy_loc, dst_chars, src_chars, bytes_to_copy, is_space);
}
if (current_dst_string_offset > 1 && dst_chars[current_dst_string_offset - 2] == ' ')
{
dst_chars[current_dst_string_offset - 2] = 0;
--current_dst_string_offset;
}
dst_offsets[off] = current_dst_string_offset;
current_src_string_offset = src_offsets[off];
match_zoneall.copy_stack.clear();
match_zoneall.tag_stack.clear();
}
dst_chars.resize(dst_chars.size());
hs_free_scratch(scratch);
hs_free_database(db);
#else
(void)src_chars;
(void)src_offsets;
(void)dst_chars;
(void)dst_offsets;
throw Exception(
"htmlOrXmlCoarseParse is not implemented when hyperscan is off (is it x86 processor?)",
ErrorCodes::NOT_IMPLEMENTED);
#endif
}
};
std::vector<const char*> HxCoarseParseImpl::patterns =
{
"<[^\\s<>]", // 0 "<", except "< ", "<<", "<>"
">", // 1 ">"
"<script\\s", // 2 <script xxxxx>
"<script", // 3 <script>
"</script\\s", // 4 </script xxxx>
"</script", // 5 </script>
"<style\\s", // 6 <style xxxxxx>
"<style", // 7 <style>
"</style\\s", // 8 </style xxxxx>
"</style", // 9 </style>
"<!\\[CDATA\\[", // 10 <![CDATA[xxxxxx]]>
"\\]\\]>", // 11 ]]>
"\\s{2,}", // 12 " ", continuous blanks
"[^\\S ]" // 13 "\n", "\t" and other white space, it does not include single ' '.
};
std::vector<std::size_t> HxCoarseParseImpl::patterns_length =
{
2, 1, 8, 7, 9, 8, 7, 6, 8, 7, 9, 3, 0, 1
};
#if USE_HYPERSCAN
std::vector<unsigned> HxCoarseParseImpl::patterns_flag =
{
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, HS_FLAG_SOM_LEFTMOST, 0
};
#endif
std::vector<unsigned> HxCoarseParseImpl::ids =
{
0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13
};
class FunctionHtmlOrXmlCoarseParse : public IFunction
{
public:
static constexpr auto name = "htmlOrXmlCoarseParse";
static FunctionPtr create(const Context &) {return std::make_shared<FunctionHtmlOrXmlCoarseParse>(); }
String getName() const override {return name;}
size_t getNumberOfArguments() const override {return 1;}
DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override
{
if (!isString(arguments[0]))
throw Exception(
"Illegal type " + arguments[0]->getName() + " of argument of function " + getName(), ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
return arguments[0];
}
bool useDefaultImplementationForConstants() const override {return true;}
ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & , size_t) const override
{
const auto & strcolumn = arguments[0].column;
if (const ColumnString* html_sentence = checkAndGetColumn<ColumnString>(strcolumn.get()))
{
auto col_res = ColumnString::create();
HxCoarseParseImpl::executeInternal(html_sentence->getChars(), html_sentence->getOffsets(), col_res->getChars(), col_res->getOffsets());
return col_res;
}
else
{
throw Exception("First argument for function " + getName() + " must be string.", ErrorCodes::ILLEGAL_COLUMN);
}
}
};
}
void registerFunctionHtmlOrXmlCoarseParse(FunctionFactory & factory)
{
factory.registerFunction<FunctionHtmlOrXmlCoarseParse>();
}
}
#endif

View File

@ -6,9 +6,7 @@ namespace DB
{ {
class FunctionFactory; class FunctionFactory;
#if USE_HYPERSCAN
void registerFunctionHtmlOrXmlCoarseParse(FunctionFactory &);
#endif
void registerFunctionRepeat(FunctionFactory &); void registerFunctionRepeat(FunctionFactory &);
void registerFunctionEmpty(FunctionFactory &); void registerFunctionEmpty(FunctionFactory &);
void registerFunctionNotEmpty(FunctionFactory &); void registerFunctionNotEmpty(FunctionFactory &);
@ -35,8 +33,9 @@ void registerFunctionRegexpQuoteMeta(FunctionFactory &);
void registerFunctionNormalizeQuery(FunctionFactory &); void registerFunctionNormalizeQuery(FunctionFactory &);
void registerFunctionNormalizedQueryHash(FunctionFactory &); void registerFunctionNormalizedQueryHash(FunctionFactory &);
void registerFunctionCountMatches(FunctionFactory &); void registerFunctionCountMatches(FunctionFactory &);
void registerFunctionEncodeXMLComponent(FunctionFactory & factory); void registerFunctionEncodeXMLComponent(FunctionFactory &);
void registerFunctionDecodeXMLComponent(FunctionFactory & factory); void registerFunctionDecodeXMLComponent(FunctionFactory &);
void registerFunctionExtractTextFromHTML(FunctionFactory &);
#if USE_BASE64 #if USE_BASE64
@ -47,9 +46,6 @@ void registerFunctionTryBase64Decode(FunctionFactory &);
void registerFunctionsString(FunctionFactory & factory) void registerFunctionsString(FunctionFactory & factory)
{ {
#if USE_HYPERSCAN
registerFunctionHtmlOrXmlCoarseParse(factory);
#endif
registerFunctionRepeat(factory); registerFunctionRepeat(factory);
registerFunctionEmpty(factory); registerFunctionEmpty(factory);
registerFunctionNotEmpty(factory); registerFunctionNotEmpty(factory);
@ -78,6 +74,7 @@ void registerFunctionsString(FunctionFactory & factory)
registerFunctionCountMatches(factory); registerFunctionCountMatches(factory);
registerFunctionEncodeXMLComponent(factory); registerFunctionEncodeXMLComponent(factory);
registerFunctionDecodeXMLComponent(factory); registerFunctionDecodeXMLComponent(factory);
registerFunctionExtractTextFromHTML(factory);
#if USE_BASE64 #if USE_BASE64
registerFunctionBase64Encode(factory); registerFunctionBase64Encode(factory);
registerFunctionBase64Decode(factory); registerFunctionBase64Decode(factory);

View File

@ -246,6 +246,7 @@ SRCS(
extractAllGroupsHorizontal.cpp extractAllGroupsHorizontal.cpp
extractAllGroupsVertical.cpp extractAllGroupsVertical.cpp
extractGroups.cpp extractGroups.cpp
extractTextFromHTML.cpp
extractTimeZoneFromFunctionArguments.cpp extractTimeZoneFromFunctionArguments.cpp
filesystem.cpp filesystem.cpp
finalizeAggregation.cpp finalizeAggregation.cpp
@ -291,7 +292,6 @@ SRCS(
hasToken.cpp hasToken.cpp
hasTokenCaseInsensitive.cpp hasTokenCaseInsensitive.cpp
hostName.cpp hostName.cpp
htmlOrXmlCoarseParse.cpp
hypot.cpp hypot.cpp
identity.cpp identity.cpp
if.cpp if.cpp

View File

@ -1,71 +0,0 @@
#pragma once
#include <vector>
#include <Common/ThreadPool.h>
#include <Common/MemoryTracker.h>
#include <IO/WriteBuffer.h>
namespace DB
{
/** Writes data asynchronously using double buffering.
*/
class AsynchronousWriteBuffer : public WriteBuffer
{
private:
WriteBuffer & out; /// The main buffer, responsible for writing data.
std::vector <char> memory; /// A piece of memory for duplicating the buffer.
ThreadPool pool; /// For asynchronous data writing.
bool started; /// Has an asynchronous data write started?
/// Swap the main and duplicate buffers.
void swapBuffers()
{
swap(out);
}
void nextImpl() override
{
if (!offset())
return;
if (started)
pool.wait();
else
started = true;
swapBuffers();
/// The data will be written in separate stream.
pool.scheduleOrThrowOnError([this] { thread(); });
}
public:
AsynchronousWriteBuffer(WriteBuffer & out_) : WriteBuffer(nullptr, 0), out(out_), memory(out.buffer().size()), pool(1), started(false)
{
/// Data is written to the duplicate buffer.
set(memory.data(), memory.size());
}
~AsynchronousWriteBuffer() override
{
/// FIXME move final flush into the caller
MemoryTracker::LockExceptionInThread lock;
if (started)
pool.wait();
swapBuffers();
out.next();
}
/// That is executed in a separate thread
void thread()
{
out.next();
}
};
}

View File

@ -64,29 +64,38 @@ void BrotliWriteBuffer::nextImpl()
in_data = reinterpret_cast<unsigned char *>(working_buffer.begin()); in_data = reinterpret_cast<unsigned char *>(working_buffer.begin());
in_available = offset(); in_available = offset();
do try
{ {
out->nextIfAtEnd(); do
out_data = reinterpret_cast<unsigned char *>(out->position());
out_capacity = out->buffer().end() - out->position();
int result = BrotliEncoderCompressStream(
brotli->state,
in_available ? BROTLI_OPERATION_PROCESS : BROTLI_OPERATION_FINISH,
&in_available,
&in_data,
&out_capacity,
&out_data,
nullptr);
out->position() = out->buffer().end() - out_capacity;
if (result == 0)
{ {
throw Exception("brotli compress failed", ErrorCodes::BROTLI_WRITE_FAILED); out->nextIfAtEnd();
out_data = reinterpret_cast<unsigned char *>(out->position());
out_capacity = out->buffer().end() - out->position();
int result = BrotliEncoderCompressStream(
brotli->state,
in_available ? BROTLI_OPERATION_PROCESS : BROTLI_OPERATION_FINISH,
&in_available,
&in_data,
&out_capacity,
&out_data,
nullptr);
out->position() = out->buffer().end() - out_capacity;
if (result == 0)
{
throw Exception("brotli compress failed", ErrorCodes::BROTLI_WRITE_FAILED);
}
} }
while (in_available > 0);
}
catch (...)
{
/// Do not try to write next time after exception.
out->position() = out->buffer().begin();
throw;
} }
while (in_available > 0 || out_capacity == 0);
} }
void BrotliWriteBuffer::finish() void BrotliWriteBuffer::finish()
@ -94,6 +103,23 @@ void BrotliWriteBuffer::finish()
if (finished) if (finished)
return; return;
try
{
finishImpl();
out->next();
finished = true;
}
catch (...)
{
/// Do not try to flush next time after exception.
out->position() = out->buffer().begin();
finished = true;
throw;
}
}
void BrotliWriteBuffer::finishImpl()
{
next(); next();
while (true) while (true)
@ -115,7 +141,6 @@ void BrotliWriteBuffer::finish()
if (BrotliEncoderIsFinished(brotli->state)) if (BrotliEncoderIsFinished(brotli->state))
{ {
finished = true;
return; return;
} }

View File

@ -18,11 +18,14 @@ public:
~BrotliWriteBuffer() override; ~BrotliWriteBuffer() override;
void finish(); void finalize() override { finish(); }
private: private:
void nextImpl() override; void nextImpl() override;
void finish();
void finishImpl();
class BrotliStateWrapper; class BrotliStateWrapper;
std::unique_ptr<BrotliStateWrapper> brotli; std::unique_ptr<BrotliStateWrapper> brotli;

View File

@ -1,30 +0,0 @@
#include <common/types.h>
#include <Common/hex.h>
#include <Common/MemoryTracker.h>
#include <IO/HexWriteBuffer.h>
namespace DB
{
void HexWriteBuffer::nextImpl()
{
if (!offset())
return;
for (Position p = working_buffer.begin(); p != pos; ++p)
{
UInt8 byte = *p;
out.write(hexDigitUppercase(byte / 16));
out.write(hexDigitUppercase(byte % 16));
}
}
HexWriteBuffer::~HexWriteBuffer()
{
/// FIXME move final flush into the caller
MemoryTracker::LockExceptionInThread lock;
nextImpl();
}
}

View File

@ -1,28 +0,0 @@
#pragma once
#include <IO/WriteBuffer.h>
/// Since HexWriteBuffer is often created in the inner loop, we'll make its buffer size small.
#define DBMS_HEX_WRITE_BUFFER_SIZE 32
namespace DB
{
/** Everything that is written into it, translates to HEX (in capital letters) and writes to another WriteBuffer.
*/
class HexWriteBuffer final : public WriteBuffer
{
protected:
char buf[DBMS_HEX_WRITE_BUFFER_SIZE]; //-V730
WriteBuffer & out;
void nextImpl() override;
public:
HexWriteBuffer(WriteBuffer & out_) : WriteBuffer(buf, sizeof(buf)), out(out_) {}
~HexWriteBuffer() override;
};
}

View File

@ -64,27 +64,36 @@ void LZMADeflatingWriteBuffer::nextImpl()
lstr.next_in = reinterpret_cast<unsigned char *>(working_buffer.begin()); lstr.next_in = reinterpret_cast<unsigned char *>(working_buffer.begin());
lstr.avail_in = offset(); lstr.avail_in = offset();
lzma_action action = LZMA_RUN; try
do
{ {
out->nextIfAtEnd(); lzma_action action = LZMA_RUN;
lstr.next_out = reinterpret_cast<unsigned char *>(out->position()); do
lstr.avail_out = out->buffer().end() - out->position(); {
out->nextIfAtEnd();
lstr.next_out = reinterpret_cast<unsigned char *>(out->position());
lstr.avail_out = out->buffer().end() - out->position();
lzma_ret ret = lzma_code(&lstr, action); lzma_ret ret = lzma_code(&lstr, action);
out->position() = out->buffer().end() - lstr.avail_out; out->position() = out->buffer().end() - lstr.avail_out;
if (ret == LZMA_STREAM_END) if (ret == LZMA_STREAM_END)
return; return;
if (ret != LZMA_OK) if (ret != LZMA_OK)
throw Exception( throw Exception(
ErrorCodes::LZMA_STREAM_ENCODER_FAILED, ErrorCodes::LZMA_STREAM_ENCODER_FAILED,
"lzma stream encoding failed: error code: {}; lzma_version: {}", "lzma stream encoding failed: error code: {}; lzma_version: {}",
ret, ret,
LZMA_VERSION_STRING); LZMA_VERSION_STRING);
} while (lstr.avail_in > 0 || lstr.avail_out == 0); } while (lstr.avail_in > 0 || lstr.avail_out == 0);
}
catch (...)
{
/// Do not try to write next time after exception.
out->position() = out->buffer().begin();
throw;
}
} }
@ -93,6 +102,23 @@ void LZMADeflatingWriteBuffer::finish()
if (finished) if (finished)
return; return;
try
{
finishImpl();
out->next();
finished = true;
}
catch (...)
{
/// Do not try to flush next time after exception.
out->position() = out->buffer().begin();
finished = true;
throw;
}
}
void LZMADeflatingWriteBuffer::finishImpl()
{
next(); next();
do do
@ -106,7 +132,6 @@ void LZMADeflatingWriteBuffer::finish()
if (ret == LZMA_STREAM_END) if (ret == LZMA_STREAM_END)
{ {
finished = true;
return; return;
} }

View File

@ -24,13 +24,16 @@ public:
char * existing_memory = nullptr, char * existing_memory = nullptr,
size_t alignment = 0); size_t alignment = 0);
void finish(); void finalize() override { finish(); }
~LZMADeflatingWriteBuffer() override; ~LZMADeflatingWriteBuffer() override;
private: private:
void nextImpl() override; void nextImpl() override;
void finish();
void finishImpl();
std::unique_ptr<WriteBuffer> out; std::unique_ptr<WriteBuffer> out;
lzma_stream lstr; lzma_stream lstr;
bool finished = false; bool finished = false;

View File

@ -75,19 +75,28 @@ void ZlibDeflatingWriteBuffer::nextImpl()
zstr.next_in = reinterpret_cast<unsigned char *>(working_buffer.begin()); zstr.next_in = reinterpret_cast<unsigned char *>(working_buffer.begin());
zstr.avail_in = offset(); zstr.avail_in = offset();
do try
{ {
out->nextIfAtEnd(); do
zstr.next_out = reinterpret_cast<unsigned char *>(out->position()); {
zstr.avail_out = out->buffer().end() - out->position(); out->nextIfAtEnd();
zstr.next_out = reinterpret_cast<unsigned char *>(out->position());
zstr.avail_out = out->buffer().end() - out->position();
int rc = deflate(&zstr, Z_NO_FLUSH); int rc = deflate(&zstr, Z_NO_FLUSH);
out->position() = out->buffer().end() - zstr.avail_out; out->position() = out->buffer().end() - zstr.avail_out;
if (rc != Z_OK) if (rc != Z_OK)
throw Exception(std::string("deflate failed: ") + zError(rc), ErrorCodes::ZLIB_DEFLATE_FAILED); throw Exception(std::string("deflate failed: ") + zError(rc), ErrorCodes::ZLIB_DEFLATE_FAILED);
}
while (zstr.avail_in > 0 || zstr.avail_out == 0);
}
catch (...)
{
/// Do not try to write next time after exception.
out->position() = out->buffer().begin();
throw;
} }
while (zstr.avail_in > 0 || zstr.avail_out == 0);
} }
void ZlibDeflatingWriteBuffer::finish() void ZlibDeflatingWriteBuffer::finish()
@ -95,6 +104,23 @@ void ZlibDeflatingWriteBuffer::finish()
if (finished) if (finished)
return; return;
try
{
finishImpl();
out->next();
finished = true;
}
catch (...)
{
/// Do not try to flush next time after exception.
out->position() = out->buffer().begin();
finished = true;
throw;
}
}
void ZlibDeflatingWriteBuffer::finishImpl()
{
next(); next();
/// https://github.com/zlib-ng/zlib-ng/issues/494 /// https://github.com/zlib-ng/zlib-ng/issues/494
@ -123,7 +149,6 @@ void ZlibDeflatingWriteBuffer::finish()
if (rc == Z_STREAM_END) if (rc == Z_STREAM_END)
{ {
finished = true;
return; return;
} }

View File

@ -22,16 +22,19 @@ public:
char * existing_memory = nullptr, char * existing_memory = nullptr,
size_t alignment = 0); size_t alignment = 0);
/// Flush all pending data and write zlib footer to the underlying buffer. void finalize() override { finish(); }
/// After the first call to this function, subsequent calls will have no effect and
/// an attempt to write to this buffer will result in exception.
void finish();
~ZlibDeflatingWriteBuffer() override; ~ZlibDeflatingWriteBuffer() override;
private: private:
void nextImpl() override; void nextImpl() override;
void finishImpl();
/// Flush all pending data and write zlib footer to the underlying buffer.
/// After the first call to this function, subsequent calls will have no effect and
/// an attempt to write to this buffer will result in exception.
void finish();
std::unique_ptr<WriteBuffer> out; std::unique_ptr<WriteBuffer> out;
z_stream zstr; z_stream zstr;
bool finished = false; bool finished = false;

View File

@ -61,28 +61,53 @@ void ZstdDeflatingWriteBuffer::nextImpl()
input.size = offset(); input.size = offset();
input.pos = 0; input.pos = 0;
bool finished = false; try
do
{ {
out->nextIfAtEnd(); bool ended = false;
do
{
out->nextIfAtEnd();
output.dst = reinterpret_cast<unsigned char *>(out->buffer().begin()); output.dst = reinterpret_cast<unsigned char *>(out->buffer().begin());
output.size = out->buffer().size(); output.size = out->buffer().size();
output.pos = out->offset(); output.pos = out->offset();
ZSTD_compressStream2(cctx, &output, &input, mode); ZSTD_compressStream2(cctx, &output, &input, mode);
out->position() = out->buffer().begin() + output.pos; out->position() = out->buffer().begin() + output.pos;
finished = (input.pos == input.size); ended = (input.pos == input.size);
} while (!finished); } while (!ended);
}
catch (...)
{
/// Do not try to write next time after exception.
out->position() = out->buffer().begin();
throw;
}
} }
void ZstdDeflatingWriteBuffer::finish() void ZstdDeflatingWriteBuffer::finish()
{ {
if (flushed) if (finished)
return; return;
try
{
finishImpl();
out->next();
finished = true;
}
catch (...)
{
/// Do not try to flush next time after exception.
out->position() = out->buffer().begin();
finished = true;
throw;
}
}
void ZstdDeflatingWriteBuffer::finishImpl()
{
next(); next();
out->nextIfAtEnd(); out->nextIfAtEnd();
@ -99,7 +124,6 @@ void ZstdDeflatingWriteBuffer::finish()
if (ZSTD_isError(remaining)) if (ZSTD_isError(remaining))
throw Exception(ErrorCodes::ZSTD_ENCODER_FAILED, "zstd stream encoder end failed: zstd version: {}", ZSTD_VERSION_STRING); throw Exception(ErrorCodes::ZSTD_ENCODER_FAILED, "zstd stream encoder end failed: zstd version: {}", ZSTD_VERSION_STRING);
out->position() = out->buffer().begin() + output.pos; out->position() = out->buffer().begin() + output.pos;
flushed = true;
} }
} }

View File

@ -20,21 +20,24 @@ public:
char * existing_memory = nullptr, char * existing_memory = nullptr,
size_t alignment = 0); size_t alignment = 0);
/// Flush all pending data and write zstd footer to the underlying buffer. void finalize() override { finish(); }
/// After the first call to this function, subsequent calls will have no effect and
/// an attempt to write to this buffer will result in exception.
void finish();
~ZstdDeflatingWriteBuffer() override; ~ZstdDeflatingWriteBuffer() override;
private: private:
void nextImpl() override; void nextImpl() override;
/// Flush all pending data and write zstd footer to the underlying buffer.
/// After the first call to this function, subsequent calls will have no effect and
/// an attempt to write to this buffer will result in exception.
void finish();
void finishImpl();
std::unique_ptr<WriteBuffer> out; std::unique_ptr<WriteBuffer> out;
ZSTD_CCtx * cctx; ZSTD_CCtx * cctx;
ZSTD_inBuffer input; ZSTD_inBuffer input;
ZSTD_outBuffer output; ZSTD_outBuffer output;
bool flushed = false; bool finished = false;
}; };
} }

View File

@ -25,9 +25,6 @@ target_link_libraries (var_uint PRIVATE clickhouse_common_io)
add_executable (read_escaped_string read_escaped_string.cpp) add_executable (read_escaped_string read_escaped_string.cpp)
target_link_libraries (read_escaped_string PRIVATE clickhouse_common_io) target_link_libraries (read_escaped_string PRIVATE clickhouse_common_io)
add_executable (async_write async_write.cpp)
target_link_libraries (async_write PRIVATE dbms)
add_executable (parse_int_perf parse_int_perf.cpp) add_executable (parse_int_perf parse_int_perf.cpp)
target_link_libraries (parse_int_perf PRIVATE clickhouse_common_io) target_link_libraries (parse_int_perf PRIVATE clickhouse_common_io)

View File

@ -1,26 +0,0 @@
#include <iostream>
#include <IO/ReadBufferFromFileDescriptor.h>
#include <IO/WriteBufferFromFileDescriptor.h>
#include <IO/AsynchronousWriteBuffer.h>
#include <IO/copyData.h>
#include <Compression/CompressedWriteBuffer.h>
int main(int, char **)
try
{
DB::ReadBufferFromFileDescriptor in1(STDIN_FILENO);
DB::WriteBufferFromFileDescriptor out1(STDOUT_FILENO);
DB::AsynchronousWriteBuffer out2(out1);
DB::CompressedWriteBuffer out3(out2);
DB::copyData(in1, out3);
return 0;
}
catch (const DB::Exception & e)
{
std::cerr << e.what() << ", " << e.displayText() << std::endl;
return 1;
}

View File

@ -28,7 +28,7 @@ try
DB::writeIntText(i, lzma_buf); DB::writeIntText(i, lzma_buf);
DB::writeChar('\t', lzma_buf); DB::writeChar('\t', lzma_buf);
} }
lzma_buf.finish(); lzma_buf.finalize();
stopwatch.stop(); stopwatch.stop();

View File

@ -62,7 +62,6 @@ int main(int argc, char ** argv)
{ {
DB::WriteBufferFromVector wb(formatted); DB::WriteBufferFromVector wb(formatted);
// DB::CompressedWriteBuffer wb2(wb1); // DB::CompressedWriteBuffer wb2(wb1);
// DB::AsynchronousWriteBuffer wb(wb2);
Stopwatch watch; Stopwatch watch;
UInt64 tsc = rdtsc(); UInt64 tsc = rdtsc();

View File

@ -30,7 +30,7 @@ try
DB::writeIntText(i, deflating_buf); DB::writeIntText(i, deflating_buf);
DB::writeChar('\t', deflating_buf); DB::writeChar('\t', deflating_buf);
} }
deflating_buf.finish(); deflating_buf.finalize();
stopwatch.stop(); stopwatch.stop();
std::cout << "Writing done. Elapsed: " << stopwatch.elapsedSeconds() << " s." std::cout << "Writing done. Elapsed: " << stopwatch.elapsedSeconds() << " s."

View File

@ -30,7 +30,7 @@ try
DB::writeIntText(i, zstd_buf); DB::writeIntText(i, zstd_buf);
DB::writeChar('\t', zstd_buf); DB::writeChar('\t', zstd_buf);
} }
zstd_buf.finish(); zstd_buf.finalize();
stopwatch.stop(); stopwatch.stop();

View File

@ -29,7 +29,6 @@ SRCS(
HTTPChunkedReadBuffer.cpp HTTPChunkedReadBuffer.cpp
HTTPCommon.cpp HTTPCommon.cpp
HashingWriteBuffer.cpp HashingWriteBuffer.cpp
HexWriteBuffer.cpp
LZMADeflatingWriteBuffer.cpp LZMADeflatingWriteBuffer.cpp
LZMAInflatingReadBuffer.cpp LZMAInflatingReadBuffer.cpp
LimitReadBuffer.cpp LimitReadBuffer.cpp

View File

@ -6,7 +6,6 @@
#include <Common/StringUtils/StringUtils.h> #include <Common/StringUtils/StringUtils.h>
#include <Common/parseAddress.h> #include <Common/parseAddress.h>
#include <Core/Settings.h> #include <Core/Settings.h>
#include <IO/HexWriteBuffer.h>
#include <IO/WriteHelpers.h> #include <IO/WriteHelpers.h>
#include <IO/ReadHelpers.h> #include <IO/ReadHelpers.h>
#include <Poco/Util/AbstractConfiguration.h> #include <Poco/Util/AbstractConfiguration.h>

View File

@ -1661,7 +1661,12 @@ void Context::resetZooKeeper() const
static void reloadZooKeeperIfChangedImpl(const ConfigurationPtr & config, const std::string & config_name, zkutil::ZooKeeperPtr & zk) static void reloadZooKeeperIfChangedImpl(const ConfigurationPtr & config, const std::string & config_name, zkutil::ZooKeeperPtr & zk)
{ {
if (!zk || zk->configChanged(*config, config_name)) if (!zk || zk->configChanged(*config, config_name))
{
if (zk)
zk->finalize();
zk = std::make_shared<zkutil::ZooKeeper>(*config, config_name); zk = std::make_shared<zkutil::ZooKeeper>(*config, config_name);
}
} }
void Context::reloadZooKeeperIfChanged(const ConfigurationPtr & config) const void Context::reloadZooKeeperIfChanged(const ConfigurationPtr & config) const

View File

@ -739,7 +739,7 @@ static JoinPtr tryGetStorageJoin(std::shared_ptr<TableJoin> analyzed_join)
{ {
if (auto * table = analyzed_join->joined_storage.get()) if (auto * table = analyzed_join->joined_storage.get())
if (auto * storage_join = dynamic_cast<StorageJoin *>(table)) if (auto * storage_join = dynamic_cast<StorageJoin *>(table))
return storage_join->getJoin(analyzed_join); return storage_join->getJoinLocked(analyzed_join);
return {}; return {};
} }

Some files were not shown because too many files have changed in this diff Show More