Merge branch 'master' of https://github.com/ClickHouse/ClickHouse into brotli

This commit is contained in:
kssenii 2021-02-26 14:31:24 +00:00
commit 991718d459
418 changed files with 11696 additions and 3586 deletions

2
.github/codecov.yml vendored
View File

@ -1,5 +1,5 @@
codecov: codecov:
max_report_age: off max_report_age: "off"
strict_yaml_branch: "master" strict_yaml_branch: "master"
ignore: ignore:

View File

@ -8,7 +8,7 @@
name: Docker Container Scan (clickhouse-server) name: Docker Container Scan (clickhouse-server)
on: "on":
pull_request: pull_request:
paths: paths:
- docker/server/Dockerfile - docker/server/Dockerfile
@ -20,20 +20,20 @@ jobs:
Anchore-Build-Scan: Anchore-Build-Scan:
runs-on: ubuntu-latest runs-on: ubuntu-latest
steps: steps:
- name: Checkout the code - name: Checkout the code
uses: actions/checkout@v2 uses: actions/checkout@v2
- name: Build the Docker image - name: Build the Docker image
run: | run: |
cd docker/server cd docker/server
perl -pi -e 's|=\$version||g' Dockerfile perl -pi -e 's|=\$version||g' Dockerfile
docker build . --file Dockerfile --tag localbuild/testimage:latest docker build . --file Dockerfile --tag localbuild/testimage:latest
- name: Run the local Anchore scan action itself with GitHub Advanced Security code scanning integration enabled - name: Run the local Anchore scan action itself with GitHub Advanced Security code scanning integration enabled
uses: anchore/scan-action@v2 uses: anchore/scan-action@v2
id: scan id: scan
with: with:
image: "localbuild/testimage:latest" image: "localbuild/testimage:latest"
acs-report-enable: true acs-report-enable: true
- name: Upload Anchore Scan Report - name: Upload Anchore Scan Report
uses: github/codeql-action/upload-sarif@v1 uses: github/codeql-action/upload-sarif@v1
with: with:
sarif_file: ${{ steps.scan.outputs.sarif }} sarif_file: ${{ steps.scan.outputs.sarif }}

View File

@ -14,14 +14,14 @@ handlers:
# The trigger for creating the Yandex.Tracker issue. When the specified event occurs, it transfers PR data to Yandex.Tracker. # The trigger for creating the Yandex.Tracker issue. When the specified event occurs, it transfers PR data to Yandex.Tracker.
github:pullRequest:labeled: github:pullRequest:labeled:
data: data:
# The Yandex.Tracker queue to create the issue in. Each issue in Tracker belongs to one of the project queues. # The Yandex.Tracker queue to create the issue in. Each issue in Tracker belongs to one of the project queues.
queue: CLICKHOUSEDOCS queue: CLICKHOUSEDOCS
# The issue title. # The issue title.
summary: '[Potato] Pull Request #{{pullRequest.number}}' summary: '[Potato] Pull Request #{{pullRequest.number}}'
# The issue description. # The issue description.
description: > description: >
{{pullRequest.description}} {{pullRequest.description}}
Ссылка на Pull Request: {{pullRequest.webUrl}} Ссылка на Pull Request: {{pullRequest.webUrl}}
# The condition for creating the Yandex.Tracker issue. # The condition for creating the Yandex.Tracker issue.
condition: eventPayload.labels.filter(label => ['pr-feature'].includes(label.name)).length condition: eventPayload.labels.filter(label => ['pr-feature'].includes(label.name)).length

15
.yamllint Normal file
View File

@ -0,0 +1,15 @@
# vi: ft=yaml
extends: default
rules:
indentation:
level: warning
indent-sequences: consistent
line-length:
# there are some bash -c "", so this is OK
max: 300
level: warning
comments:
min-spaces-from-content: 1
document-start:
present: false

View File

@ -7,6 +7,7 @@
#include <ctime> #include <ctime>
#include <string> #include <string>
#define DATE_LUT_MAX (0xFFFFFFFFU - 86400) #define DATE_LUT_MAX (0xFFFFFFFFU - 86400)
#define DATE_LUT_MAX_DAY_NUM (0xFFFFFFFFU / 86400) #define DATE_LUT_MAX_DAY_NUM (0xFFFFFFFFU / 86400)
/// Table size is bigger than DATE_LUT_MAX_DAY_NUM to fill all indices within UInt16 range: this allows to remove extra check. /// Table size is bigger than DATE_LUT_MAX_DAY_NUM to fill all indices within UInt16 range: this allows to remove extra check.
@ -249,7 +250,7 @@ public:
{ {
DayNum index = findIndex(t); DayNum index = findIndex(t);
if (unlikely(index == 0)) if (unlikely(index == 0 || index > DATE_LUT_MAX_DAY_NUM))
return t + offset_at_start_of_epoch; return t + offset_at_start_of_epoch;
time_t res = t - lut[index].date; time_t res = t - lut[index].date;
@ -264,18 +265,18 @@ public:
{ {
DayNum index = findIndex(t); DayNum index = findIndex(t);
/// If it is not 1970 year (findIndex found nothing appropriate), /// If it is overflow case,
/// than limit number of hours to avoid insane results like 1970-01-01 89:28:15 /// then limit number of hours to avoid insane results like 1970-01-01 89:28:15
if (unlikely(index == 0)) if (unlikely(index == 0 || index > DATE_LUT_MAX_DAY_NUM))
return static_cast<unsigned>((t + offset_at_start_of_epoch) / 3600) % 24; return static_cast<unsigned>((t + offset_at_start_of_epoch) / 3600) % 24;
time_t res = t - lut[index].date; time_t time = t - lut[index].date;
/// Data is cleaned to avoid possibility of underflow. if (time >= lut[index].time_at_offset_change)
if (res >= lut[index].time_at_offset_change) time += lut[index].amount_of_offset_change;
res += lut[index].amount_of_offset_change;
return res / 3600; unsigned res = time / 3600;
return res <= 23 ? res : 0;
} }
/** Calculating offset from UTC in seconds. /** Calculating offset from UTC in seconds.
@ -314,12 +315,12 @@ public:
* each minute, with added or subtracted leap second, spans exactly 60 unix timestamps. * each minute, with added or subtracted leap second, spans exactly 60 unix timestamps.
*/ */
inline unsigned toSecond(time_t t) const { return t % 60; } inline unsigned toSecond(time_t t) const { return UInt32(t) % 60; }
inline unsigned toMinute(time_t t) const inline unsigned toMinute(time_t t) const
{ {
if (offset_is_whole_number_of_hours_everytime) if (offset_is_whole_number_of_hours_everytime)
return (t / 60) % 60; return (UInt32(t) / 60) % 60;
UInt32 date = find(t).date; UInt32 date = find(t).date;
return (UInt32(t) - date) / 60 % 60; return (UInt32(t) - date) / 60 % 60;
@ -555,9 +556,7 @@ public:
} }
} }
/* /// Check and change mode to effective.
* check and change mode to effective
*/
inline UInt8 check_week_mode(UInt8 mode) const inline UInt8 check_week_mode(UInt8 mode) const
{ {
UInt8 week_format = (mode & 7); UInt8 week_format = (mode & 7);
@ -566,10 +565,9 @@ public:
return week_format; return week_format;
} }
/* /** Calculate weekday from d.
* Calc weekday from d * Returns 0 for monday, 1 for tuesday...
* Returns 0 for monday, 1 for tuesday ... */
*/
inline unsigned calc_weekday(DayNum d, bool sunday_first_day_of_week) const inline unsigned calc_weekday(DayNum d, bool sunday_first_day_of_week) const
{ {
if (!sunday_first_day_of_week) if (!sunday_first_day_of_week)
@ -578,7 +576,7 @@ public:
return toDayOfWeek(DayNum(d + 1)) - 1; return toDayOfWeek(DayNum(d + 1)) - 1;
} }
/* Calc days in one year. */ /// Calculate days in one year.
inline unsigned calc_days_in_year(UInt16 year) const inline unsigned calc_days_in_year(UInt16 year) const
{ {
return ((year & 3) == 0 && (year % 100 || (year % 400 == 0 && year)) ? 366 : 365); return ((year & 3) == 0 && (year % 100 || (year % 400 == 0 && year)) ? 366 : 365);

View File

@ -6,6 +6,25 @@
namespace common namespace common
{ {
/// Multiply and ignore overflow.
template <typename T1, typename T2>
inline auto NO_SANITIZE_UNDEFINED mulIgnoreOverflow(T1 x, T2 y)
{
return x * y;
}
template <typename T1, typename T2>
inline auto NO_SANITIZE_UNDEFINED addIgnoreOverflow(T1 x, T2 y)
{
return x + y;
}
template <typename T1, typename T2>
inline auto NO_SANITIZE_UNDEFINED subIgnoreOverflow(T1 x, T2 y)
{
return x - y;
}
template <typename T> template <typename T>
inline bool addOverflow(T x, T y, T & res) inline bool addOverflow(T x, T y, T & res)
{ {
@ -35,14 +54,14 @@ namespace common
{ {
static constexpr __int128 min_int128 = minInt128(); static constexpr __int128 min_int128 = minInt128();
static constexpr __int128 max_int128 = maxInt128(); static constexpr __int128 max_int128 = maxInt128();
res = x + y; res = addIgnoreOverflow(x, y);
return (y > 0 && x > max_int128 - y) || (y < 0 && x < min_int128 - y); return (y > 0 && x > max_int128 - y) || (y < 0 && x < min_int128 - y);
} }
template <> template <>
inline bool addOverflow(wInt256 x, wInt256 y, wInt256 & res) inline bool addOverflow(wInt256 x, wInt256 y, wInt256 & res)
{ {
res = x + y; res = addIgnoreOverflow(x, y);
return (y > 0 && x > std::numeric_limits<wInt256>::max() - y) || return (y > 0 && x > std::numeric_limits<wInt256>::max() - y) ||
(y < 0 && x < std::numeric_limits<wInt256>::min() - y); (y < 0 && x < std::numeric_limits<wInt256>::min() - y);
} }
@ -50,7 +69,7 @@ namespace common
template <> template <>
inline bool addOverflow(wUInt256 x, wUInt256 y, wUInt256 & res) inline bool addOverflow(wUInt256 x, wUInt256 y, wUInt256 & res)
{ {
res = x + y; res = addIgnoreOverflow(x, y);
return x > std::numeric_limits<wUInt256>::max() - y; return x > std::numeric_limits<wUInt256>::max() - y;
} }
@ -83,14 +102,14 @@ namespace common
{ {
static constexpr __int128 min_int128 = minInt128(); static constexpr __int128 min_int128 = minInt128();
static constexpr __int128 max_int128 = maxInt128(); static constexpr __int128 max_int128 = maxInt128();
res = x - y; res = subIgnoreOverflow(x, y);
return (y < 0 && x > max_int128 + y) || (y > 0 && x < min_int128 + y); return (y < 0 && x > max_int128 + y) || (y > 0 && x < min_int128 + y);
} }
template <> template <>
inline bool subOverflow(wInt256 x, wInt256 y, wInt256 & res) inline bool subOverflow(wInt256 x, wInt256 y, wInt256 & res)
{ {
res = x - y; res = subIgnoreOverflow(x, y);
return (y < 0 && x > std::numeric_limits<wInt256>::max() + y) || return (y < 0 && x > std::numeric_limits<wInt256>::max() + y) ||
(y > 0 && x < std::numeric_limits<wInt256>::min() + y); (y > 0 && x < std::numeric_limits<wInt256>::min() + y);
} }
@ -98,7 +117,7 @@ namespace common
template <> template <>
inline bool subOverflow(wUInt256 x, wUInt256 y, wUInt256 & res) inline bool subOverflow(wUInt256 x, wUInt256 y, wUInt256 & res)
{ {
res = x - y; res = subIgnoreOverflow(x, y);
return x < y; return x < y;
} }
@ -129,40 +148,33 @@ namespace common
template <> template <>
inline bool mulOverflow(__int128 x, __int128 y, __int128 & res) inline bool mulOverflow(__int128 x, __int128 y, __int128 & res)
{ {
res = static_cast<unsigned __int128>(x) * static_cast<unsigned __int128>(y); /// Avoid signed integer overflow. res = mulIgnoreOverflow(x, y);
if (!x || !y) if (!x || !y)
return false; return false;
unsigned __int128 a = (x > 0) ? x : -x; unsigned __int128 a = (x > 0) ? x : -x;
unsigned __int128 b = (y > 0) ? y : -y; unsigned __int128 b = (y > 0) ? y : -y;
return (a * b) / b != a; return mulIgnoreOverflow(a, b) / b != a;
} }
template <> template <>
inline bool mulOverflow(wInt256 x, wInt256 y, wInt256 & res) inline bool mulOverflow(wInt256 x, wInt256 y, wInt256 & res)
{ {
res = x * y; res = mulIgnoreOverflow(x, y);
if (!x || !y) if (!x || !y)
return false; return false;
wInt256 a = (x > 0) ? x : -x; wInt256 a = (x > 0) ? x : -x;
wInt256 b = (y > 0) ? y : -y; wInt256 b = (y > 0) ? y : -y;
return (a * b) / b != a; return mulIgnoreOverflow(a, b) / b != a;
} }
template <> template <>
inline bool mulOverflow(wUInt256 x, wUInt256 y, wUInt256 & res) inline bool mulOverflow(wUInt256 x, wUInt256 y, wUInt256 & res)
{ {
res = x * y; res = mulIgnoreOverflow(x, y);
if (!x || !y) if (!x || !y)
return false; return false;
return (x * y) / y != x; return res / y != x;
}
/// Multiply and ignore overflow.
template <typename T1, typename T2>
inline auto NO_SANITIZE_UNDEFINED mulIgnoreOverflow(T1 x, T2 y)
{
return x * y;
} }
} }

View File

@ -1,5 +1,20 @@
#pragma once #pragma once
/// __has_feature supported only by clang.
///
/// But libcxx/libcxxabi overrides it to 0,
/// thus the checks for __has_feature will be wrong.
///
/// NOTE:
/// - __has_feature cannot be simply undefined,
/// since this will be broken if some C++ header will be included after
/// including <common/defines.h>
/// - it should not have fallback to 0,
/// since this may create false-positive detection (common problem)
#if defined(__clang__) && defined(__has_feature)
# define ch_has_feature __has_feature
#endif
#if defined(_MSC_VER) #if defined(_MSC_VER)
# if !defined(likely) # if !defined(likely)
# define likely(x) (x) # define likely(x) (x)
@ -32,8 +47,8 @@
/// Check for presence of address sanitizer /// Check for presence of address sanitizer
#if !defined(ADDRESS_SANITIZER) #if !defined(ADDRESS_SANITIZER)
# if defined(__has_feature) # if defined(ch_has_feature)
# if __has_feature(address_sanitizer) # if ch_has_feature(address_sanitizer)
# define ADDRESS_SANITIZER 1 # define ADDRESS_SANITIZER 1
# endif # endif
# elif defined(__SANITIZE_ADDRESS__) # elif defined(__SANITIZE_ADDRESS__)
@ -42,8 +57,8 @@
#endif #endif
#if !defined(THREAD_SANITIZER) #if !defined(THREAD_SANITIZER)
# if defined(__has_feature) # if defined(ch_has_feature)
# if __has_feature(thread_sanitizer) # if ch_has_feature(thread_sanitizer)
# define THREAD_SANITIZER 1 # define THREAD_SANITIZER 1
# endif # endif
# elif defined(__SANITIZE_THREAD__) # elif defined(__SANITIZE_THREAD__)
@ -52,8 +67,8 @@
#endif #endif
#if !defined(MEMORY_SANITIZER) #if !defined(MEMORY_SANITIZER)
# if defined(__has_feature) # if defined(ch_has_feature)
# if __has_feature(memory_sanitizer) # if ch_has_feature(memory_sanitizer)
# define MEMORY_SANITIZER 1 # define MEMORY_SANITIZER 1
# endif # endif
# elif defined(__MEMORY_SANITIZER__) # elif defined(__MEMORY_SANITIZER__)

View File

@ -15,11 +15,11 @@
#endif #endif
#define __msan_unpoison(X, Y) // NOLINT #define __msan_unpoison(X, Y) // NOLINT
#if defined(__has_feature) #if defined(ch_has_feature)
# if __has_feature(memory_sanitizer) # if ch_has_feature(memory_sanitizer)
# undef __msan_unpoison # undef __msan_unpoison
# include <sanitizer/msan_interface.h> # include <sanitizer/msan_interface.h>
# endif # endif
#endif #endif
#include <link.h> #include <link.h>

View File

@ -416,7 +416,9 @@ static void sanitizerDeathCallback()
else else
log_message = "Terminate called without an active exception"; log_message = "Terminate called without an active exception";
static const size_t buf_size = 1024; /// POSIX.1 says that write(2)s of less than PIPE_BUF bytes must be atomic - man 7 pipe
/// And the buffer should not be too small because our exception messages can be large.
static constexpr size_t buf_size = PIPE_BUF;
if (log_message.size() > buf_size - 16) if (log_message.size() > buf_size - 16)
log_message.resize(buf_size - 16); log_message.resize(buf_size - 16);

View File

@ -51,10 +51,11 @@ Connection::Connection(
const char* ssl_key, const char* ssl_key,
unsigned timeout, unsigned timeout,
unsigned rw_timeout, unsigned rw_timeout,
bool enable_local_infile) bool enable_local_infile,
bool opt_reconnect)
: Connection() : Connection()
{ {
connect(db, server, user, password, port, socket, ssl_ca, ssl_cert, ssl_key, timeout, rw_timeout, enable_local_infile); connect(db, server, user, password, port, socket, ssl_ca, ssl_cert, ssl_key, timeout, rw_timeout, enable_local_infile, opt_reconnect);
} }
Connection::Connection(const std::string & config_name) Connection::Connection(const std::string & config_name)
@ -80,7 +81,8 @@ void Connection::connect(const char* db,
const char * ssl_key, const char * ssl_key,
unsigned timeout, unsigned timeout,
unsigned rw_timeout, unsigned rw_timeout,
bool enable_local_infile) bool enable_local_infile,
bool opt_reconnect)
{ {
if (is_connected) if (is_connected)
disconnect(); disconnect();
@ -104,9 +106,8 @@ void Connection::connect(const char* db,
if (mysql_options(driver.get(), MYSQL_OPT_LOCAL_INFILE, &enable_local_infile_arg)) if (mysql_options(driver.get(), MYSQL_OPT_LOCAL_INFILE, &enable_local_infile_arg))
throw ConnectionFailed(errorMessage(driver.get()), mysql_errno(driver.get())); throw ConnectionFailed(errorMessage(driver.get()), mysql_errno(driver.get()));
/// Enables auto-reconnect. /// See C API Developer Guide: Automatic Reconnection Control
bool reconnect = true; if (mysql_options(driver.get(), MYSQL_OPT_RECONNECT, reinterpret_cast<const char *>(&opt_reconnect)))
if (mysql_options(driver.get(), MYSQL_OPT_RECONNECT, reinterpret_cast<const char *>(&reconnect)))
throw ConnectionFailed(errorMessage(driver.get()), mysql_errno(driver.get())); throw ConnectionFailed(errorMessage(driver.get()), mysql_errno(driver.get()));
/// Specifies particular ssl key and certificate if it needs /// Specifies particular ssl key and certificate if it needs

View File

@ -14,6 +14,8 @@
/// Disable LOAD DATA LOCAL INFILE because it is insecure /// Disable LOAD DATA LOCAL INFILE because it is insecure
#define MYSQLXX_DEFAULT_ENABLE_LOCAL_INFILE false #define MYSQLXX_DEFAULT_ENABLE_LOCAL_INFILE false
/// See https://dev.mysql.com/doc/c-api/5.7/en/c-api-auto-reconnect.html
#define MYSQLXX_DEFAULT_MYSQL_OPT_RECONNECT true
namespace mysqlxx namespace mysqlxx
@ -76,7 +78,8 @@ public:
const char * ssl_key = "", const char * ssl_key = "",
unsigned timeout = MYSQLXX_DEFAULT_TIMEOUT, unsigned timeout = MYSQLXX_DEFAULT_TIMEOUT,
unsigned rw_timeout = MYSQLXX_DEFAULT_RW_TIMEOUT, unsigned rw_timeout = MYSQLXX_DEFAULT_RW_TIMEOUT,
bool enable_local_infile = MYSQLXX_DEFAULT_ENABLE_LOCAL_INFILE); bool enable_local_infile = MYSQLXX_DEFAULT_ENABLE_LOCAL_INFILE,
bool opt_reconnect = MYSQLXX_DEFAULT_MYSQL_OPT_RECONNECT);
/// Creates connection. Can be used if Poco::Util::Application is using. /// Creates connection. Can be used if Poco::Util::Application is using.
/// All settings will be got from config_name section of configuration. /// All settings will be got from config_name section of configuration.
@ -96,7 +99,8 @@ public:
const char* ssl_key, const char* ssl_key,
unsigned timeout = MYSQLXX_DEFAULT_TIMEOUT, unsigned timeout = MYSQLXX_DEFAULT_TIMEOUT,
unsigned rw_timeout = MYSQLXX_DEFAULT_RW_TIMEOUT, unsigned rw_timeout = MYSQLXX_DEFAULT_RW_TIMEOUT,
bool enable_local_infile = MYSQLXX_DEFAULT_ENABLE_LOCAL_INFILE); bool enable_local_infile = MYSQLXX_DEFAULT_ENABLE_LOCAL_INFILE,
bool opt_reconnect = MYSQLXX_DEFAULT_MYSQL_OPT_RECONNECT);
void connect(const std::string & config_name) void connect(const std::string & config_name)
{ {
@ -112,6 +116,7 @@ public:
std::string ssl_cert = cfg.getString(config_name + ".ssl_cert", ""); std::string ssl_cert = cfg.getString(config_name + ".ssl_cert", "");
std::string ssl_key = cfg.getString(config_name + ".ssl_key", ""); std::string ssl_key = cfg.getString(config_name + ".ssl_key", "");
bool enable_local_infile = cfg.getBool(config_name + ".enable_local_infile", MYSQLXX_DEFAULT_ENABLE_LOCAL_INFILE); bool enable_local_infile = cfg.getBool(config_name + ".enable_local_infile", MYSQLXX_DEFAULT_ENABLE_LOCAL_INFILE);
bool opt_reconnect = cfg.getBool(config_name + ".opt_reconnect", MYSQLXX_DEFAULT_MYSQL_OPT_RECONNECT);
unsigned timeout = unsigned timeout =
cfg.getInt(config_name + ".connect_timeout", cfg.getInt(config_name + ".connect_timeout",
@ -135,7 +140,8 @@ public:
ssl_key.c_str(), ssl_key.c_str(),
timeout, timeout,
rw_timeout, rw_timeout,
enable_local_infile); enable_local_infile,
opt_reconnect);
} }
/// If MySQL connection was established. /// If MySQL connection was established.

View File

@ -78,6 +78,9 @@ Pool::Pool(const Poco::Util::AbstractConfiguration & cfg, const std::string & co
enable_local_infile = cfg.getBool(config_name + ".enable_local_infile", enable_local_infile = cfg.getBool(config_name + ".enable_local_infile",
cfg.getBool(parent_config_name + ".enable_local_infile", MYSQLXX_DEFAULT_ENABLE_LOCAL_INFILE)); cfg.getBool(parent_config_name + ".enable_local_infile", MYSQLXX_DEFAULT_ENABLE_LOCAL_INFILE));
opt_reconnect = cfg.getBool(config_name + ".opt_reconnect",
cfg.getBool(parent_config_name + ".opt_reconnect", MYSQLXX_DEFAULT_MYSQL_OPT_RECONNECT));
} }
else else
{ {
@ -96,6 +99,8 @@ Pool::Pool(const Poco::Util::AbstractConfiguration & cfg, const std::string & co
enable_local_infile = cfg.getBool( enable_local_infile = cfg.getBool(
config_name + ".enable_local_infile", MYSQLXX_DEFAULT_ENABLE_LOCAL_INFILE); config_name + ".enable_local_infile", MYSQLXX_DEFAULT_ENABLE_LOCAL_INFILE);
opt_reconnect = cfg.getBool(config_name + ".opt_reconnect", MYSQLXX_DEFAULT_MYSQL_OPT_RECONNECT);
} }
connect_timeout = cfg.getInt(config_name + ".connect_timeout", connect_timeout = cfg.getInt(config_name + ".connect_timeout",
@ -233,7 +238,8 @@ void Pool::Entry::forceConnected() const
pool->ssl_key.c_str(), pool->ssl_key.c_str(),
pool->connect_timeout, pool->connect_timeout,
pool->rw_timeout, pool->rw_timeout,
pool->enable_local_infile); pool->enable_local_infile,
pool->opt_reconnect);
} }
} }
@ -248,7 +254,7 @@ bool Pool::Entry::tryForceConnected() const
if (prev_connection_id != current_connection_id) if (prev_connection_id != current_connection_id)
{ {
auto & logger = Poco::Util::Application::instance().logger(); auto & logger = Poco::Util::Application::instance().logger();
logger.information("Connection to mysql server has been reestablished. Connection id changed: %lu -> %lu", logger.information("Reconnected to mysql server. Connection id changed: %lu -> %lu",
prev_connection_id, current_connection_id); prev_connection_id, current_connection_id);
} }
return true; return true;
@ -294,7 +300,8 @@ Pool::Connection * Pool::allocConnection(bool dont_throw_if_failed_first_time)
ssl_key.c_str(), ssl_key.c_str(),
connect_timeout, connect_timeout,
rw_timeout, rw_timeout,
enable_local_infile); enable_local_infile,
opt_reconnect);
} }
catch (mysqlxx::ConnectionFailed & e) catch (mysqlxx::ConnectionFailed & e)
{ {

View File

@ -165,10 +165,12 @@ public:
unsigned rw_timeout_ = MYSQLXX_DEFAULT_RW_TIMEOUT, unsigned rw_timeout_ = MYSQLXX_DEFAULT_RW_TIMEOUT,
unsigned default_connections_ = MYSQLXX_POOL_DEFAULT_START_CONNECTIONS, unsigned default_connections_ = MYSQLXX_POOL_DEFAULT_START_CONNECTIONS,
unsigned max_connections_ = MYSQLXX_POOL_DEFAULT_MAX_CONNECTIONS, unsigned max_connections_ = MYSQLXX_POOL_DEFAULT_MAX_CONNECTIONS,
unsigned enable_local_infile_ = MYSQLXX_DEFAULT_ENABLE_LOCAL_INFILE) unsigned enable_local_infile_ = MYSQLXX_DEFAULT_ENABLE_LOCAL_INFILE,
bool opt_reconnect_ = MYSQLXX_DEFAULT_MYSQL_OPT_RECONNECT)
: default_connections(default_connections_), max_connections(max_connections_), : default_connections(default_connections_), max_connections(max_connections_),
db(db_), server(server_), user(user_), password(password_), port(port_), socket(socket_), db(db_), server(server_), user(user_), password(password_), port(port_), socket(socket_),
connect_timeout(connect_timeout_), rw_timeout(rw_timeout_), enable_local_infile(enable_local_infile_) {} connect_timeout(connect_timeout_), rw_timeout(rw_timeout_), enable_local_infile(enable_local_infile_),
opt_reconnect(opt_reconnect_) {}
Pool(const Pool & other) Pool(const Pool & other)
: default_connections{other.default_connections}, : default_connections{other.default_connections},
@ -177,7 +179,7 @@ public:
user{other.user}, password{other.password}, user{other.user}, password{other.password},
port{other.port}, socket{other.socket}, port{other.port}, socket{other.socket},
connect_timeout{other.connect_timeout}, rw_timeout{other.rw_timeout}, connect_timeout{other.connect_timeout}, rw_timeout{other.rw_timeout},
enable_local_infile{other.enable_local_infile} enable_local_infile{other.enable_local_infile}, opt_reconnect(other.opt_reconnect)
{} {}
Pool & operator=(const Pool &) = delete; Pool & operator=(const Pool &) = delete;
@ -231,6 +233,7 @@ private:
std::string ssl_cert; std::string ssl_cert;
std::string ssl_key; std::string ssl_key;
bool enable_local_infile; bool enable_local_infile;
bool opt_reconnect;
/// True if connection was established at least once. /// True if connection was established at least once.
bool was_successful{false}; bool was_successful{false};

View File

@ -1,3 +1,8 @@
#include <algorithm>
#include <ctime>
#include <random>
#include <thread>
#include <mysqlxx/PoolWithFailover.h> #include <mysqlxx/PoolWithFailover.h>
@ -33,6 +38,19 @@ PoolWithFailover::PoolWithFailover(const Poco::Util::AbstractConfiguration & con
std::make_shared<Pool>(config_, replica_name, default_connections_, max_connections_, config_name_.c_str())); std::make_shared<Pool>(config_, replica_name, default_connections_, max_connections_, config_name_.c_str()));
} }
} }
/// PoolWithFailover objects are stored in a cache inside PoolFactory.
/// This cache is reset by ExternalDictionariesLoader after every SYSTEM RELOAD DICTIONAR{Y|IES}
/// which triggers massive re-constructing of connection pools.
/// The state of PRNGs like std::mt19937 is considered to be quite heavy
/// thus here we attempt to optimize its construction.
static thread_local std::mt19937 rnd_generator(
std::hash<std::thread::id>{}(std::this_thread::get_id()) + std::clock());
for (auto & [_, replicas] : replicas_by_priority)
{
if (replicas.size() > 1)
std::shuffle(replicas.begin(), replicas.end(), rnd_generator);
}
} }
else else
{ {

2
contrib/NuRaft vendored

@ -1 +1 @@
Subproject commit 7adf7ae33e7d5c307342431b577c8ab1025ee793 Subproject commit 9a0d78de4b90546368d954b6434f0e9a823e8d80

View File

@ -70,6 +70,7 @@ function start_server
--path "$FASTTEST_DATA" --path "$FASTTEST_DATA"
--user_files_path "$FASTTEST_DATA/user_files" --user_files_path "$FASTTEST_DATA/user_files"
--top_level_domains_path "$FASTTEST_DATA/top_level_domains" --top_level_domains_path "$FASTTEST_DATA/top_level_domains"
--test_keeper_server.log_storage_path "$FASTTEST_DATA/coordination"
) )
clickhouse-server "${opts[@]}" &>> "$FASTTEST_OUTPUT/server.log" & clickhouse-server "${opts[@]}" &>> "$FASTTEST_OUTPUT/server.log" &
server_pid=$! server_pid=$!
@ -107,6 +108,18 @@ function start_server
fi fi
echo "ClickHouse server pid '$server_pid' started and responded" echo "ClickHouse server pid '$server_pid' started and responded"
echo "
handle all noprint
handle SIGSEGV stop print
handle SIGBUS stop print
handle SIGABRT stop print
continue
thread apply all backtrace
continue
" > script.gdb
gdb -batch -command script.gdb -p "$server_pid" &
} }
function clone_root function clone_root
@ -327,7 +340,7 @@ function run_tests
# Look at DistributedFilesToInsert, so cannot run in parallel. # Look at DistributedFilesToInsert, so cannot run in parallel.
01460_DistributedFilesToInsert 01460_DistributedFilesToInsert
01541_max_memory_usage_for_user 01541_max_memory_usage_for_user_long
# Require python libraries like scipy, pandas and numpy # Require python libraries like scipy, pandas and numpy
01322_ttest_scipy 01322_ttest_scipy
@ -363,7 +376,7 @@ function run_tests
stop_server ||: stop_server ||:
# Clean the data so that there is no interference from the previous test run. # Clean the data so that there is no interference from the previous test run.
rm -rf "$FASTTEST_DATA"/{{meta,}data,user_files} ||: rm -rf "$FASTTEST_DATA"/{{meta,}data,user_files,coordination} ||:
start_server start_server

View File

@ -4,4 +4,4 @@ services:
image: cassandra image: cassandra
restart: always restart: always
ports: ports:
- 9043:9042 - 9043:9042

View File

@ -5,6 +5,6 @@ services:
hostname: hdfs1 hostname: hdfs1
restart: always restart: always
ports: ports:
- 50075:50075 - 50075:50075
- 50070:50070 - 50070:50070
entrypoint: /etc/bootstrap.sh -d entrypoint: /etc/bootstrap.sh -d

View File

@ -5,42 +5,42 @@ services:
image: zookeeper:3.4.9 image: zookeeper:3.4.9
hostname: kafka_zookeeper hostname: kafka_zookeeper
environment: environment:
ZOO_MY_ID: 1 ZOO_MY_ID: 1
ZOO_PORT: 2181 ZOO_PORT: 2181
ZOO_SERVERS: server.1=kafka_zookeeper:2888:3888 ZOO_SERVERS: server.1=kafka_zookeeper:2888:3888
security_opt: security_opt:
- label:disable - label:disable
kafka1: kafka1:
image: confluentinc/cp-kafka:5.2.0 image: confluentinc/cp-kafka:5.2.0
hostname: kafka1 hostname: kafka1
ports: ports:
- "9092:9092" - "9092:9092"
environment: environment:
KAFKA_ADVERTISED_LISTENERS: INSIDE://localhost:9092,OUTSIDE://kafka1:19092 KAFKA_ADVERTISED_LISTENERS: INSIDE://localhost:9092,OUTSIDE://kafka1:19092
KAFKA_LISTENERS: INSIDE://:9092,OUTSIDE://:19092 KAFKA_LISTENERS: INSIDE://:9092,OUTSIDE://:19092
KAFKA_LISTENER_SECURITY_PROTOCOL_MAP: INSIDE:PLAINTEXT,OUTSIDE:PLAINTEXT KAFKA_LISTENER_SECURITY_PROTOCOL_MAP: INSIDE:PLAINTEXT,OUTSIDE:PLAINTEXT
KAFKA_INTER_BROKER_LISTENER_NAME: INSIDE KAFKA_INTER_BROKER_LISTENER_NAME: INSIDE
KAFKA_BROKER_ID: 1 KAFKA_BROKER_ID: 1
KAFKA_ZOOKEEPER_CONNECT: "kafka_zookeeper:2181" KAFKA_ZOOKEEPER_CONNECT: "kafka_zookeeper:2181"
KAFKA_LOG4J_LOGGERS: "kafka.controller=INFO,kafka.producer.async.DefaultEventHandler=INFO,state.change.logger=INFO" KAFKA_LOG4J_LOGGERS: "kafka.controller=INFO,kafka.producer.async.DefaultEventHandler=INFO,state.change.logger=INFO"
KAFKA_OFFSETS_TOPIC_REPLICATION_FACTOR: 1 KAFKA_OFFSETS_TOPIC_REPLICATION_FACTOR: 1
depends_on: depends_on:
- kafka_zookeeper - kafka_zookeeper
security_opt: security_opt:
- label:disable - label:disable
schema-registry: schema-registry:
image: confluentinc/cp-schema-registry:5.2.0 image: confluentinc/cp-schema-registry:5.2.0
hostname: schema-registry hostname: schema-registry
ports: ports:
- "8081:8081" - "8081:8081"
environment: environment:
SCHEMA_REGISTRY_HOST_NAME: schema-registry SCHEMA_REGISTRY_HOST_NAME: schema-registry
SCHEMA_REGISTRY_KAFKASTORE_SECURITY_PROTOCOL: PLAINTEXT SCHEMA_REGISTRY_KAFKASTORE_SECURITY_PROTOCOL: PLAINTEXT
SCHEMA_REGISTRY_KAFKASTORE_BOOTSTRAP_SERVERS: PLAINTEXT://kafka1:19092 SCHEMA_REGISTRY_KAFKASTORE_BOOTSTRAP_SERVERS: PLAINTEXT://kafka1:19092
depends_on: depends_on:
- kafka_zookeeper - kafka_zookeeper
- kafka1 - kafka1
security_opt: security_opt:
- label:disable - label:disable

View File

@ -8,22 +8,22 @@ services:
hostname: kerberizedhdfs1 hostname: kerberizedhdfs1
restart: always restart: always
volumes: volumes:
- ${KERBERIZED_HDFS_DIR}/../../hdfs_configs/bootstrap.sh:/etc/bootstrap.sh:ro - ${KERBERIZED_HDFS_DIR}/../../hdfs_configs/bootstrap.sh:/etc/bootstrap.sh:ro
- ${KERBERIZED_HDFS_DIR}/secrets:/usr/local/hadoop/etc/hadoop/conf - ${KERBERIZED_HDFS_DIR}/secrets:/usr/local/hadoop/etc/hadoop/conf
- ${KERBERIZED_HDFS_DIR}/secrets/krb_long.conf:/etc/krb5.conf:ro - ${KERBERIZED_HDFS_DIR}/secrets/krb_long.conf:/etc/krb5.conf:ro
ports: ports:
- 1006:1006 - 1006:1006
- 50070:50070 - 50070:50070
- 9010:9010 - 9010:9010
depends_on: depends_on:
- hdfskerberos - hdfskerberos
entrypoint: /etc/bootstrap.sh -d entrypoint: /etc/bootstrap.sh -d
hdfskerberos: hdfskerberos:
image: yandex/clickhouse-kerberos-kdc:${DOCKER_KERBEROS_KDC_TAG} image: yandex/clickhouse-kerberos-kdc:${DOCKER_KERBEROS_KDC_TAG}
hostname: hdfskerberos hostname: hdfskerberos
volumes: volumes:
- ${KERBERIZED_HDFS_DIR}/secrets:/tmp/keytab - ${KERBERIZED_HDFS_DIR}/secrets:/tmp/keytab
- ${KERBERIZED_HDFS_DIR}/../../kerberos_image_config.sh:/config.sh - ${KERBERIZED_HDFS_DIR}/../../kerberos_image_config.sh:/config.sh
- /dev/urandom:/dev/random - /dev/urandom:/dev/random
ports: [88, 749] ports: [88, 749]

View File

@ -6,54 +6,54 @@ services:
# restart: always # restart: always
hostname: kafka_kerberized_zookeeper hostname: kafka_kerberized_zookeeper
environment: environment:
ZOOKEEPER_SERVER_ID: 1 ZOOKEEPER_SERVER_ID: 1
ZOOKEEPER_CLIENT_PORT: 2181 ZOOKEEPER_CLIENT_PORT: 2181
ZOOKEEPER_SERVERS: "kafka_kerberized_zookeeper:2888:3888" ZOOKEEPER_SERVERS: "kafka_kerberized_zookeeper:2888:3888"
KAFKA_OPTS: "-Djava.security.auth.login.config=/etc/kafka/secrets/zookeeper_jaas.conf -Djava.security.krb5.conf=/etc/kafka/secrets/krb.conf -Dzookeeper.authProvider.1=org.apache.zookeeper.server.auth.SASLAuthenticationProvider -Dsun.security.krb5.debug=true" KAFKA_OPTS: "-Djava.security.auth.login.config=/etc/kafka/secrets/zookeeper_jaas.conf -Djava.security.krb5.conf=/etc/kafka/secrets/krb.conf -Dzookeeper.authProvider.1=org.apache.zookeeper.server.auth.SASLAuthenticationProvider -Dsun.security.krb5.debug=true"
volumes: volumes:
- ${KERBERIZED_KAFKA_DIR}/secrets:/etc/kafka/secrets - ${KERBERIZED_KAFKA_DIR}/secrets:/etc/kafka/secrets
- /dev/urandom:/dev/random - /dev/urandom:/dev/random
depends_on: depends_on:
- kafka_kerberos - kafka_kerberos
security_opt: security_opt:
- label:disable - label:disable
kerberized_kafka1: kerberized_kafka1:
image: confluentinc/cp-kafka:5.2.0 image: confluentinc/cp-kafka:5.2.0
# restart: always # restart: always
hostname: kerberized_kafka1 hostname: kerberized_kafka1
ports: ports:
- "9092:9092" - "9092:9092"
- "9093:9093" - "9093:9093"
environment: environment:
KAFKA_LISTENERS: OUTSIDE://:19092,UNSECURED_OUTSIDE://:19093,UNSECURED_INSIDE://:9093 KAFKA_LISTENERS: OUTSIDE://:19092,UNSECURED_OUTSIDE://:19093,UNSECURED_INSIDE://:9093
KAFKA_ADVERTISED_LISTENERS: OUTSIDE://kerberized_kafka1:19092,UNSECURED_OUTSIDE://kerberized_kafka1:19093,UNSECURED_INSIDE://localhost:9093 KAFKA_ADVERTISED_LISTENERS: OUTSIDE://kerberized_kafka1:19092,UNSECURED_OUTSIDE://kerberized_kafka1:19093,UNSECURED_INSIDE://localhost:9093
# KAFKA_LISTENERS: INSIDE://kerberized_kafka1:9092,OUTSIDE://kerberized_kafka1:19092 # KAFKA_LISTENERS: INSIDE://kerberized_kafka1:9092,OUTSIDE://kerberized_kafka1:19092
# KAFKA_ADVERTISED_LISTENERS: INSIDE://localhost:9092,OUTSIDE://kerberized_kafka1:19092 # KAFKA_ADVERTISED_LISTENERS: INSIDE://localhost:9092,OUTSIDE://kerberized_kafka1:19092
KAFKA_SASL_MECHANISM_INTER_BROKER_PROTOCOL: GSSAPI KAFKA_SASL_MECHANISM_INTER_BROKER_PROTOCOL: GSSAPI
KAFKA_SASL_ENABLED_MECHANISMS: GSSAPI KAFKA_SASL_ENABLED_MECHANISMS: GSSAPI
KAFKA_SASL_KERBEROS_SERVICE_NAME: kafka KAFKA_SASL_KERBEROS_SERVICE_NAME: kafka
KAFKA_LISTENER_SECURITY_PROTOCOL_MAP: OUTSIDE:SASL_PLAINTEXT,UNSECURED_OUTSIDE:PLAINTEXT,UNSECURED_INSIDE:PLAINTEXT, KAFKA_LISTENER_SECURITY_PROTOCOL_MAP: OUTSIDE:SASL_PLAINTEXT,UNSECURED_OUTSIDE:PLAINTEXT,UNSECURED_INSIDE:PLAINTEXT,
KAFKA_INTER_BROKER_LISTENER_NAME: OUTSIDE KAFKA_INTER_BROKER_LISTENER_NAME: OUTSIDE
KAFKA_BROKER_ID: 1 KAFKA_BROKER_ID: 1
KAFKA_ZOOKEEPER_CONNECT: "kafka_kerberized_zookeeper:2181" KAFKA_ZOOKEEPER_CONNECT: "kafka_kerberized_zookeeper:2181"
KAFKA_LOG4J_LOGGERS: "kafka.controller=INFO,kafka.producer.async.DefaultEventHandler=INFO,state.change.logger=INFO" KAFKA_LOG4J_LOGGERS: "kafka.controller=INFO,kafka.producer.async.DefaultEventHandler=INFO,state.change.logger=INFO"
KAFKA_OFFSETS_TOPIC_REPLICATION_FACTOR: 1 KAFKA_OFFSETS_TOPIC_REPLICATION_FACTOR: 1
KAFKA_OPTS: "-Djava.security.auth.login.config=/etc/kafka/secrets/broker_jaas.conf -Djava.security.krb5.conf=/etc/kafka/secrets/krb.conf -Dsun.security.krb5.debug=true" KAFKA_OPTS: "-Djava.security.auth.login.config=/etc/kafka/secrets/broker_jaas.conf -Djava.security.krb5.conf=/etc/kafka/secrets/krb.conf -Dsun.security.krb5.debug=true"
volumes: volumes:
- ${KERBERIZED_KAFKA_DIR}/secrets:/etc/kafka/secrets - ${KERBERIZED_KAFKA_DIR}/secrets:/etc/kafka/secrets
- /dev/urandom:/dev/random - /dev/urandom:/dev/random
depends_on: depends_on:
- kafka_kerberized_zookeeper - kafka_kerberized_zookeeper
- kafka_kerberos - kafka_kerberos
security_opt: security_opt:
- label:disable - label:disable
kafka_kerberos: kafka_kerberos:
image: yandex/clickhouse-kerberos-kdc:${DOCKER_KERBEROS_KDC_TAG:-latest} image: yandex/clickhouse-kerberos-kdc:${DOCKER_KERBEROS_KDC_TAG:-latest}
hostname: kafka_kerberos hostname: kafka_kerberos
volumes: volumes:
- ${KERBERIZED_KAFKA_DIR}/secrets:/tmp/keytab - ${KERBERIZED_KAFKA_DIR}/secrets:/tmp/keytab
- ${KERBERIZED_KAFKA_DIR}/../../kerberos_image_config.sh:/config.sh - ${KERBERIZED_KAFKA_DIR}/../../kerberos_image_config.sh:/config.sh
- /dev/urandom:/dev/random - /dev/urandom:/dev/random
ports: [88, 749] ports: [88, 749]

View File

@ -7,5 +7,5 @@ services:
MONGO_INITDB_ROOT_USERNAME: root MONGO_INITDB_ROOT_USERNAME: root
MONGO_INITDB_ROOT_PASSWORD: clickhouse MONGO_INITDB_ROOT_PASSWORD: clickhouse
ports: ports:
- 27018:27017 - 27018:27017
command: --profile=2 --verbose command: --profile=2 --verbose

View File

@ -6,5 +6,5 @@ services:
environment: environment:
MYSQL_ROOT_PASSWORD: clickhouse MYSQL_ROOT_PASSWORD: clickhouse
ports: ports:
- 3308:3306 - 3308:3306
command: --server_id=100 --log-bin='mysql-bin-1.log' --default-time-zone='+3:00' --gtid-mode="ON" --enforce-gtid-consistency command: --server_id=100 --log-bin='mysql-bin-1.log' --default-time-zone='+3:00' --gtid-mode="ON" --enforce-gtid-consistency

View File

@ -6,5 +6,9 @@ services:
environment: environment:
MYSQL_ROOT_PASSWORD: clickhouse MYSQL_ROOT_PASSWORD: clickhouse
ports: ports:
- 3308:3306 - 3308:3306
command: --server_id=100 --log-bin='mysql-bin-1.log' --default-time-zone='+3:00' --gtid-mode="ON" --enforce-gtid-consistency command: --server_id=100 --log-bin='mysql-bin-1.log'
--default-time-zone='+3:00'
--gtid-mode="ON"
--enforce-gtid-consistency
--log-error-verbosity=3

View File

@ -6,5 +6,10 @@ services:
environment: environment:
MYSQL_ROOT_PASSWORD: clickhouse MYSQL_ROOT_PASSWORD: clickhouse
ports: ports:
- 33308:3306 - 33308:3306
command: --server_id=100 --log-bin='mysql-bin-1.log' --default_authentication_plugin='mysql_native_password' --default-time-zone='+3:00' --gtid-mode="ON" --enforce-gtid-consistency command: --server_id=100 --log-bin='mysql-bin-1.log'
--default_authentication_plugin='mysql_native_password'
--default-time-zone='+3:00'
--gtid-mode="ON"
--enforce-gtid-consistency
--log-error-verbosity=3

View File

@ -7,7 +7,7 @@ services:
MYSQL_ALLOW_EMPTY_PASSWORD: 1 MYSQL_ALLOW_EMPTY_PASSWORD: 1
command: --federated --socket /var/run/mysqld/mysqld.sock command: --federated --socket /var/run/mysqld/mysqld.sock
healthcheck: healthcheck:
test: ["CMD", "mysqladmin" ,"ping", "-h", "localhost"] test: ["CMD", "mysqladmin", "ping", "-h", "localhost"]
interval: 1s interval: 1s
timeout: 2s timeout: 2s
retries: 100 retries: 100

View File

@ -6,8 +6,8 @@ services:
environment: environment:
POSTGRES_PASSWORD: mysecretpassword POSTGRES_PASSWORD: mysecretpassword
ports: ports:
- 5432:5432 - 5432:5432
networks: networks:
default: default:
aliases: aliases:
- postgre-sql.local - postgre-sql.local

View File

@ -4,5 +4,5 @@ services:
image: redis image: redis
restart: always restart: always
ports: ports:
- 6380:6379 - 6380:6379
command: redis-server --requirepass "clickhouse" --databases 32 command: redis-server --requirepass "clickhouse" --databases 32

View File

@ -97,6 +97,7 @@ function configure
rm -r right/db ||: rm -r right/db ||:
rm -r db0/preprocessed_configs ||: rm -r db0/preprocessed_configs ||:
rm -r db0/{data,metadata}/system ||: rm -r db0/{data,metadata}/system ||:
rm db0/status ||:
cp -al db0/ left/db/ cp -al db0/ left/db/
cp -al db0/ right/db/ cp -al db0/ right/db/
} }

View File

@ -60,4 +60,8 @@ fi
# more idiologically correct. # more idiologically correct.
read -ra ADDITIONAL_OPTIONS <<< "${ADDITIONAL_OPTIONS:-}" read -ra ADDITIONAL_OPTIONS <<< "${ADDITIONAL_OPTIONS:-}"
if [[ -n "$USE_DATABASE_REPLICATED" ]] && [[ "$USE_DATABASE_REPLICATED" -eq 1 ]]; then
ADDITIONAL_OPTIONS+=('--replicated-database')
fi
clickhouse-test --testname --shard --zookeeper --no-stateless --hung-check --print-time "$SKIP_LIST_OPT" "${ADDITIONAL_OPTIONS[@]}" "$SKIP_TESTS_OPTION" 2>&1 | ts '%Y-%m-%d %H:%M:%S' | tee test_output/test_result.txt clickhouse-test --testname --shard --zookeeper --no-stateless --hung-check --print-time "$SKIP_LIST_OPT" "${ADDITIONAL_OPTIONS[@]}" "$SKIP_TESTS_OPTION" 2>&1 | ts '%Y-%m-%d %H:%M:%S' | tee test_output/test_result.txt

View File

@ -57,6 +57,10 @@ function run_tests()
ADDITIONAL_OPTIONS+=('4') ADDITIONAL_OPTIONS+=('4')
fi fi
if [[ -n "$USE_DATABASE_REPLICATED" ]] && [[ "$USE_DATABASE_REPLICATED" -eq 1 ]]; then
ADDITIONAL_OPTIONS+=('--replicated-database')
fi
clickhouse-test --testname --shard --zookeeper --hung-check --print-time \ clickhouse-test --testname --shard --zookeeper --hung-check --print-time \
--test-runs "$NUM_TRIES" \ --test-runs "$NUM_TRIES" \
"$SKIP_LIST_OPT" "${ADDITIONAL_OPTIONS[@]}" 2>&1 \ "$SKIP_LIST_OPT" "${ADDITIONAL_OPTIONS[@]}" 2>&1 \

View File

@ -23,12 +23,15 @@ def get_options(i):
if 0 < i: if 0 < i:
options += " --order=random" options += " --order=random"
if i % 2 == 1: if i % 3 == 1:
options += " --db-engine=Ordinary" options += " --db-engine=Ordinary"
if i % 3 == 2:
options += ''' --db-engine="Replicated('/test/db/test_{}', 's1', 'r1')"'''.format(i)
# If database name is not specified, new database is created for each functional test. # If database name is not specified, new database is created for each functional test.
# Run some threads with one database for all tests. # Run some threads with one database for all tests.
if i % 3 == 1: if i % 2 == 1:
options += " --database=test_{}".format(i) options += " --database=test_{}".format(i)
if i == 13: if i == 13:

View File

@ -1,7 +1,14 @@
# docker build -t yandex/clickhouse-style-test . # docker build -t yandex/clickhouse-style-test .
FROM ubuntu:20.04 FROM ubuntu:20.04
RUN apt-get update && env DEBIAN_FRONTEND=noninteractive apt-get install --yes shellcheck libxml2-utils git python3-pip pylint && pip3 install codespell RUN apt-get update && env DEBIAN_FRONTEND=noninteractive apt-get install --yes \
shellcheck \
libxml2-utils \
git \
python3-pip \
pylint \
yamllint \
&& pip3 install codespell
# For |& syntax # For |& syntax

View File

@ -66,7 +66,8 @@ SELECT * FROM file_engine_table
## Usage in ClickHouse-local {#usage-in-clickhouse-local} ## Usage in ClickHouse-local {#usage-in-clickhouse-local}
In [clickhouse-local](../../../operations/utilities/clickhouse-local.md) File engine accepts file path in addition to `Format`. Default input/output streams can be specified using numeric or human-readable names like `0` or `stdin`, `1` or `stdout`. In [clickhouse-local](../../../operations/utilities/clickhouse-local.md) File engine accepts file path in addition to `Format`. Default input/output streams can be specified using numeric or human-readable names like `0` or `stdin`, `1` or `stdout`. It is possible to read and write compressed files based on an additional engine parameter or file extension (`gz`, `br` or `xz`).
**Example:** **Example:**
``` bash ``` bash

View File

@ -5,7 +5,7 @@ toc_title: Brown University Benchmark
# Brown University Benchmark # Brown University Benchmark
MgBench - A new analytical benchmark for machine-generated log data, [Andrew Crotty](http://cs.brown.edu/people/acrotty/). `MgBench` is a new analytical benchmark for machine-generated log data, [Andrew Crotty](http://cs.brown.edu/people/acrotty/).
Download the data: Download the data:
``` ```
@ -153,7 +153,7 @@ ORDER BY dt,
hr; hr;
-- Q1.4: Over a 1-month period, how often was each server blocked on disk I/O? -- Q1.4: Over 1 month, how often was each server blocked on disk I/O?
SELECT machine_name, SELECT machine_name,
COUNT(*) AS spikes COUNT(*) AS spikes
@ -301,7 +301,7 @@ WHERE event_type = 'temperature'
AND log_time >= '2019-11-29 17:00:00.000'; AND log_time >= '2019-11-29 17:00:00.000';
-- Q3.4: Over the past 6 months, how frequently was each door opened? -- Q3.4: Over the past 6 months, how frequently were each door opened?
SELECT device_name, SELECT device_name,
device_floor, device_floor,
@ -412,3 +412,5 @@ ORDER BY yr,
``` ```
The data is also available for interactive queries in the [Playground](https://gh-api.clickhouse.tech/play?user=play), [example](https://gh-api.clickhouse.tech/play?user=play#U0VMRUNUIG1hY2hpbmVfbmFtZSwKICAgICAgIE1JTihjcHUpIEFTIGNwdV9taW4sCiAgICAgICBNQVgoY3B1KSBBUyBjcHVfbWF4LAogICAgICAgQVZHKGNwdSkgQVMgY3B1X2F2ZywKICAgICAgIE1JTihuZXRfaW4pIEFTIG5ldF9pbl9taW4sCiAgICAgICBNQVgobmV0X2luKSBBUyBuZXRfaW5fbWF4LAogICAgICAgQVZHKG5ldF9pbikgQVMgbmV0X2luX2F2ZywKICAgICAgIE1JTihuZXRfb3V0KSBBUyBuZXRfb3V0X21pbiwKICAgICAgIE1BWChuZXRfb3V0KSBBUyBuZXRfb3V0X21heCwKICAgICAgIEFWRyhuZXRfb3V0KSBBUyBuZXRfb3V0X2F2ZwpGUk9NICgKICBTRUxFQ1QgbWFjaGluZV9uYW1lLAogICAgICAgICBDT0FMRVNDRShjcHVfdXNlciwgMC4wKSBBUyBjcHUsCiAgICAgICAgIENPQUxFU0NFKGJ5dGVzX2luLCAwLjApIEFTIG5ldF9pbiwKICAgICAgICAgQ09BTEVTQ0UoYnl0ZXNfb3V0LCAwLjApIEFTIG5ldF9vdXQKICBGUk9NIG1nYmVuY2gubG9nczEKICBXSEVSRSBtYWNoaW5lX25hbWUgSU4gKCdhbmFuc2knLCdhcmFnb2cnLCd1cmQnKQogICAgQU5EIGxvZ190aW1lID49IFRJTUVTVEFNUCAnMjAxNy0wMS0xMSAwMDowMDowMCcKKSBBUyByCkdST1VQIEJZIG1hY2hpbmVfbmFtZQ==). The data is also available for interactive queries in the [Playground](https://gh-api.clickhouse.tech/play?user=play), [example](https://gh-api.clickhouse.tech/play?user=play#U0VMRUNUIG1hY2hpbmVfbmFtZSwKICAgICAgIE1JTihjcHUpIEFTIGNwdV9taW4sCiAgICAgICBNQVgoY3B1KSBBUyBjcHVfbWF4LAogICAgICAgQVZHKGNwdSkgQVMgY3B1X2F2ZywKICAgICAgIE1JTihuZXRfaW4pIEFTIG5ldF9pbl9taW4sCiAgICAgICBNQVgobmV0X2luKSBBUyBuZXRfaW5fbWF4LAogICAgICAgQVZHKG5ldF9pbikgQVMgbmV0X2luX2F2ZywKICAgICAgIE1JTihuZXRfb3V0KSBBUyBuZXRfb3V0X21pbiwKICAgICAgIE1BWChuZXRfb3V0KSBBUyBuZXRfb3V0X21heCwKICAgICAgIEFWRyhuZXRfb3V0KSBBUyBuZXRfb3V0X2F2ZwpGUk9NICgKICBTRUxFQ1QgbWFjaGluZV9uYW1lLAogICAgICAgICBDT0FMRVNDRShjcHVfdXNlciwgMC4wKSBBUyBjcHUsCiAgICAgICAgIENPQUxFU0NFKGJ5dGVzX2luLCAwLjApIEFTIG5ldF9pbiwKICAgICAgICAgQ09BTEVTQ0UoYnl0ZXNfb3V0LCAwLjApIEFTIG5ldF9vdXQKICBGUk9NIG1nYmVuY2gubG9nczEKICBXSEVSRSBtYWNoaW5lX25hbWUgSU4gKCdhbmFuc2knLCdhcmFnb2cnLCd1cmQnKQogICAgQU5EIGxvZ190aW1lID49IFRJTUVTVEFNUCAnMjAxNy0wMS0xMSAwMDowMDowMCcKKSBBUyByCkdST1VQIEJZIG1hY2hpbmVfbmFtZQ==).
[Original article](https://clickhouse.tech/docs/en/getting_started/example_datasets/brown-benchmark/) <!--hide-->

View File

@ -148,28 +148,48 @@ $ echo 'DROP TABLE t' | curl 'http://localhost:8123/' --data-binary @-
For successful requests that dont return a data table, an empty response body is returned. For successful requests that dont return a data table, an empty response body is returned.
You can use the internal ClickHouse compression format when transmitting data. The compressed data has a non-standard format, and you will need to use the special `clickhouse-compressor` program to work with it (it is installed with the `clickhouse-client` package). To increase the efficiency of data insertion, you can disable server-side checksum verification by using the [http_native_compression_disable_checksumming_on_decompress](../operations/settings/settings.md#settings-http_native_compression_disable_checksumming_on_decompress) setting.
If you specified `compress=1` in the URL, the server compresses the data it sends you. ## Compression {#compression}
If you specified `decompress=1` in the URL, the server decompresses the same data that you pass in the `POST` method.
You can also choose to use [HTTP compression](https://en.wikipedia.org/wiki/HTTP_compression). To send a compressed `POST` request, append the request header `Content-Encoding: compression_method`. In order for ClickHouse to compress the response, you must append `Accept-Encoding: compression_method`. ClickHouse supports `gzip`, `br`, and `deflate` [compression methods](https://en.wikipedia.org/wiki/HTTP_compression#Content-Encoding_tokens). To enable HTTP compression, you must use the ClickHouse [enable_http_compression](../operations/settings/settings.md#settings-enable_http_compression) setting. You can configure the data compression level in the [http_zlib_compression_level](#settings-http_zlib_compression_level) setting for all the compression methods. You can use compression to reduce network traffic when transmitting a large amount of data or for creating dumps that are immediately compressed.
You can use this to reduce network traffic when transmitting a large amount of data, or for creating dumps that are immediately compressed. You can use the internal ClickHouse compression format when transmitting data. The compressed data has a non-standard format, and you need `clickhouse-compressor` program to work with it. It is installed with the `clickhouse-client` package. To increase the efficiency of data insertion, you can disable server-side checksum verification by using the [http_native_compression_disable_checksumming_on_decompress](../operations/settings/settings.md#settings-http_native_compression_disable_checksumming_on_decompress) setting.
Examples of sending data with compression: If you specify `compress=1` in the URL, the server will compress the data it sends to you. If you specify `decompress=1` in the URL, the server will decompress the data which you pass in the `POST` method.
``` bash You can also choose to use [HTTP compression](https://en.wikipedia.org/wiki/HTTP_compression). ClickHouse supports the following [compression methods](https://en.wikipedia.org/wiki/HTTP_compression#Content-Encoding_tokens):
#Sending data to the server:
$ curl -vsS "http://localhost:8123/?enable_http_compression=1" -d 'SELECT number FROM system.numbers LIMIT 10' -H 'Accept-Encoding: gzip'
#Sending data to the client: - `gzip`
$ echo "SELECT 1" | gzip -c | curl -sS --data-binary @- -H 'Content-Encoding: gzip' 'http://localhost:8123/' - `br`
``` - `deflate`
- `xz`
To send a compressed `POST` request, append the request header `Content-Encoding: compression_method`.
In order for ClickHouse to compress the response, enable compression with [enable_http_compression](../operations/settings/settings.md#settings-enable_http_compression) setting and append `Accept-Encoding: compression_method` header to the request. You can configure the data compression level in the [http_zlib_compression_level](../operations/settings/settings.md#settings-http_zlib_compression_level) setting for all compression methods.
!!! note "Note" !!! note "Note"
Some HTTP clients might decompress data from the server by default (with `gzip` and `deflate`) and you might get decompressed data even if you use the compression settings correctly. Some HTTP clients might decompress data from the server by default (with `gzip` and `deflate`) and you might get decompressed data even if you use the compression settings correctly.
**Examples**
``` bash
# Sending compressed data to the server
$ echo "SELECT 1" | gzip -c | \
curl -sS --data-binary @- -H 'Content-Encoding: gzip' 'http://localhost:8123/'
```
``` bash
# Receiving compressed data from the server
$ curl -vsS "http://localhost:8123/?enable_http_compression=1" \
-H 'Accept-Encoding: gzip' --output result.gz -d 'SELECT number FROM system.numbers LIMIT 3'
$ zcat result.gz
0
1
2
```
## Default Database {#default-database}
You can use the database URL parameter or the X-ClickHouse-Database header to specify the default database. You can use the database URL parameter or the X-ClickHouse-Database header to specify the default database.
``` bash ``` bash

View File

@ -139,7 +139,7 @@ You can assign a quotas set for the user. For a detailed description of quotas c
### user_name/databases {#user-namedatabases} ### user_name/databases {#user-namedatabases}
In this section, you can you can limit rows that are returned by ClickHouse for `SELECT` queries made by the current user, thus implementing basic row-level security. In this section, you can limit rows that are returned by ClickHouse for `SELECT` queries made by the current user, thus implementing basic row-level security.
**Example** **Example**

View File

@ -1104,7 +1104,7 @@ The maximum number of replicas for each shard when executing a query. In limited
- the sampling key is an expression that is expensive to calculate - the sampling key is an expression that is expensive to calculate
- the cluster's latency distribution has a long tail, so that querying more servers increases the query's overall latency - the cluster's latency distribution has a long tail, so that querying more servers increases the query's overall latency
In addition, this setting will produce incorrect results when joins or subqueries are involved, and all tables don't meet certain conditions. See [Distributed Subqueries and max_parallel_replicas](../../sql-reference/operators/in.md/#max_parallel_replica-subqueries) for more details. In addition, this setting will produce incorrect results when joins or subqueries are involved, and all tables don't meet certain conditions. See [Distributed Subqueries and max_parallel_replicas](../../sql-reference/operators/in.md#max_parallel_replica-subqueries) for more details.
## compile {#compile} ## compile {#compile}
@ -2659,3 +2659,23 @@ Result:
Note that this setting influences [Materialized view](../../sql-reference/statements/create/view.md#materialized) and [MaterializeMySQL](../../engines/database-engines/materialize-mysql.md) behaviour. Note that this setting influences [Materialized view](../../sql-reference/statements/create/view.md#materialized) and [MaterializeMySQL](../../engines/database-engines/materialize-mysql.md) behaviour.
[Original article](https://clickhouse.tech/docs/en/operations/settings/settings/) <!-- hide --> [Original article](https://clickhouse.tech/docs/en/operations/settings/settings/) <!-- hide -->
## engine_file_empty_if_not_exists {#engine-file-empty_if-not-exists}
Allows to select data from a file engine table without file.
Possible values:
- 0 — `SELECT` throws exception.
- 1 — `SELECT` returns empty result.
Default value: `0`.
## engine_file_truncate_on_insert {#engine-file-truncate-on-insert}
Enables or disables truncate before insert in file engine tables.
Possible values:
- 0 — Disabled.
- 1 — Enabled.
Default value: `0`.

View File

@ -52,15 +52,15 @@ Input table:
Query: Query:
``` sql ``` sql
SELECT argMax(user, salary), argMax(tuple(user, salary)) FROM salary; SELECT argMax(user, salary), argMax(tuple(user, salary), salary), argMax(tuple(user, salary)) FROM salary;
``` ```
Result: Result:
``` text ``` text
┌─argMax(user, salary)─┬─argMax(tuple(user, salary))─┐ ┌─argMax(user, salary)─┬─argMax(tuple(user, salary), salary)─┬─argMax(tuple(user, salary))─┐
│ director │ ('director',5000) │ │ director │ ('director',5000) │ ('director',5000)
└──────────────────────┴─────────────────────────────┘ └──────────────────────┴─────────────────────────────────────┴─────────────────────────────
``` ```
[Original article](https://clickhouse.tech/docs/en/sql-reference/aggregate-functions/reference/argmax/) <!--hide--> [Original article](https://clickhouse.tech/docs/en/sql-reference/aggregate-functions/reference/argmax/) <!--hide-->

View File

@ -9,7 +9,7 @@ Calculates the arithmetic mean.
**Syntax** **Syntax**
``` sql ``` sql
avgWeighted(x) avg(x)
``` ```
**Arguments** **Arguments**

View File

@ -32,6 +32,7 @@ The null hypothesis is that two populations are stochastically equal. Also one-s
**Returned values** **Returned values**
[Tuple](../../../sql-reference/data-types/tuple.md) with two elements: [Tuple](../../../sql-reference/data-types/tuple.md) with two elements:
- calculated U-statistic. [Float64](../../../sql-reference/data-types/float.md). - calculated U-statistic. [Float64](../../../sql-reference/data-types/float.md).
- calculated p-value. [Float64](../../../sql-reference/data-types/float.md). - calculated p-value. [Float64](../../../sql-reference/data-types/float.md).

View File

@ -24,6 +24,7 @@ The null hypothesis is that means of populations are equal. Normal distribution
**Returned values** **Returned values**
[Tuple](../../../sql-reference/data-types/tuple.md) with two elements: [Tuple](../../../sql-reference/data-types/tuple.md) with two elements:
- calculated t-statistic. [Float64](../../../sql-reference/data-types/float.md). - calculated t-statistic. [Float64](../../../sql-reference/data-types/float.md).
- calculated p-value. [Float64](../../../sql-reference/data-types/float.md). - calculated p-value. [Float64](../../../sql-reference/data-types/float.md).

View File

@ -24,6 +24,7 @@ The null hypothesis is that means of populations are equal. Normal distribution
**Returned values** **Returned values**
[Tuple](../../../sql-reference/data-types/tuple.md) with two elements: [Tuple](../../../sql-reference/data-types/tuple.md) with two elements:
- calculated t-statistic. [Float64](../../../sql-reference/data-types/float.md). - calculated t-statistic. [Float64](../../../sql-reference/data-types/float.md).
- calculated p-value. [Float64](../../../sql-reference/data-types/float.md). - calculated p-value. [Float64](../../../sql-reference/data-types/float.md).

View File

@ -61,40 +61,58 @@ int32samoa: 1546300800
Converts a date or date with time to a UInt16 number containing the year number (AD). Converts a date or date with time to a UInt16 number containing the year number (AD).
Alias: `YEAR`.
## toQuarter {#toquarter} ## toQuarter {#toquarter}
Converts a date or date with time to a UInt8 number containing the quarter number. Converts a date or date with time to a UInt8 number containing the quarter number.
Alias: `QUARTER`.
## toMonth {#tomonth} ## toMonth {#tomonth}
Converts a date or date with time to a UInt8 number containing the month number (1-12). Converts a date or date with time to a UInt8 number containing the month number (1-12).
Alias: `MONTH`.
## toDayOfYear {#todayofyear} ## toDayOfYear {#todayofyear}
Converts a date or date with time to a UInt16 number containing the number of the day of the year (1-366). Converts a date or date with time to a UInt16 number containing the number of the day of the year (1-366).
Alias: `DAYOFYEAR`.
## toDayOfMonth {#todayofmonth} ## toDayOfMonth {#todayofmonth}
Converts a date or date with time to a UInt8 number containing the number of the day of the month (1-31). Converts a date or date with time to a UInt8 number containing the number of the day of the month (1-31).
Aliases: `DAYOFMONTH`, `DAY`.
## toDayOfWeek {#todayofweek} ## toDayOfWeek {#todayofweek}
Converts a date or date with time to a UInt8 number containing the number of the day of the week (Monday is 1, and Sunday is 7). Converts a date or date with time to a UInt8 number containing the number of the day of the week (Monday is 1, and Sunday is 7).
Alias: `DAYOFWEEK`.
## toHour {#tohour} ## toHour {#tohour}
Converts a date with time to a UInt8 number containing the number of the hour in 24-hour time (0-23). Converts a date with time to a UInt8 number containing the number of the hour in 24-hour time (0-23).
This function assumes that if clocks are moved ahead, it is by one hour and occurs at 2 a.m., and if clocks are moved back, it is by one hour and occurs at 3 a.m. (which is not always true even in Moscow the clocks were twice changed at a different time). This function assumes that if clocks are moved ahead, it is by one hour and occurs at 2 a.m., and if clocks are moved back, it is by one hour and occurs at 3 a.m. (which is not always true even in Moscow the clocks were twice changed at a different time).
Alias: `HOUR`.
## toMinute {#tominute} ## toMinute {#tominute}
Converts a date with time to a UInt8 number containing the number of the minute of the hour (0-59). Converts a date with time to a UInt8 number containing the number of the minute of the hour (0-59).
Alias: `MINUTE`.
## toSecond {#tosecond} ## toSecond {#tosecond}
Converts a date with time to a UInt8 number containing the number of the second in the minute (0-59). Converts a date with time to a UInt8 number containing the number of the second in the minute (0-59).
Leap seconds are not accounted for. Leap seconds are not accounted for.
Alias: `SECOND`.
## toUnixTimestamp {#to-unix-timestamp} ## toUnixTimestamp {#to-unix-timestamp}
For DateTime argument: converts value to the number with type UInt32 -- Unix Timestamp (https://en.wikipedia.org/wiki/Unix_time). For DateTime argument: converts value to the number with type UInt32 -- Unix Timestamp (https://en.wikipedia.org/wiki/Unix_time).

View File

@ -75,6 +75,8 @@ Result:
Returns a string containing the arguments hexadecimal representation. Returns a string containing the arguments hexadecimal representation.
Alias: `HEX`.
**Syntax** **Syntax**
``` sql ``` sql

View File

@ -13,6 +13,8 @@ Checks whether the argument is [NULL](../../sql-reference/syntax.md#null-literal
isNull(x) isNull(x)
``` ```
Alias: `ISNULL`.
**Arguments** **Arguments**
- `x` — A value with a non-compound data type. - `x` — A value with a non-compound data type.

View File

@ -9,10 +9,14 @@ toc_title: IP Addresses
Takes a UInt32 number. Interprets it as an IPv4 address in big endian. Returns a string containing the corresponding IPv4 address in the format A.B.C.d (dot-separated numbers in decimal form). Takes a UInt32 number. Interprets it as an IPv4 address in big endian. Returns a string containing the corresponding IPv4 address in the format A.B.C.d (dot-separated numbers in decimal form).
Alias: `INET_NTOA`.
## IPv4StringToNum(s) {#ipv4stringtonums} ## IPv4StringToNum(s) {#ipv4stringtonums}
The reverse function of IPv4NumToString. If the IPv4 address has an invalid format, it returns 0. The reverse function of IPv4NumToString. If the IPv4 address has an invalid format, it returns 0.
Alias: `INET_ATON`.
## IPv4NumToStringClassC(num) {#ipv4numtostringclasscnum} ## IPv4NumToStringClassC(num) {#ipv4numtostringclasscnum}
Similar to IPv4NumToString, but using xxx instead of the last octet. Similar to IPv4NumToString, but using xxx instead of the last octet.
@ -49,7 +53,11 @@ Since using xxx is highly unusual, this may be changed in the future. We r
### IPv6NumToString(x) {#ipv6numtostringx} ### IPv6NumToString(x) {#ipv6numtostringx}
Accepts a FixedString(16) value containing the IPv6 address in binary format. Returns a string containing this address in text format. Accepts a FixedString(16) value containing the IPv6 address in binary format. Returns a string containing this address in text format.
IPv6-mapped IPv4 addresses are output in the format ::ffff:111.222.33.44. Examples: IPv6-mapped IPv4 addresses are output in the format ::ffff:111.222.33.44.
Alias: `INET6_NTOA`.
Examples:
``` sql ``` sql
SELECT IPv6NumToString(toFixedString(unhex('2A0206B8000000000000000000000011'), 16)) AS addr SELECT IPv6NumToString(toFixedString(unhex('2A0206B8000000000000000000000011'), 16)) AS addr
@ -119,6 +127,8 @@ The reverse function of IPv6NumToString. If the IPv6 address has an invalid form
If the IP address is a valid IPv4 address then the IPv6 equivalent of the IPv4 address is returned. If the IP address is a valid IPv4 address then the IPv6 equivalent of the IPv4 address is returned.
HEX can be uppercase or lowercase. HEX can be uppercase or lowercase.
Alias: `INET6_ATON`.
``` sql ``` sql
SELECT cutIPv6(IPv6StringToNum('127.0.0.1'), 0, 0); SELECT cutIPv6(IPv6StringToNum('127.0.0.1'), 0, 0);
``` ```

View File

@ -98,6 +98,8 @@ SELECT toValidUTF8('\x61\xF0\x80\x80\x80b')
Repeats a string as many times as specified and concatenates the replicated values as a single string. Repeats a string as many times as specified and concatenates the replicated values as a single string.
Alias: `REPEAT`.
**Syntax** **Syntax**
``` sql ``` sql
@ -276,10 +278,14 @@ Returns the string s that was converted from the encoding in from to
Encodes s string into base64 Encodes s string into base64
Alias: `TO_BASE64`.
## base64Decode(s) {#base64decode} ## base64Decode(s) {#base64decode}
Decode base64-encoded string s into original string. In case of failure raises an exception. Decode base64-encoded string s into original string. In case of failure raises an exception.
Alias: `FROM_BASE64`.
## tryBase64Decode(s) {#trybase64decode} ## tryBase64Decode(s) {#trybase64decode}
Similar to base64Decode, but in case of error an empty string would be returned. Similar to base64Decode, but in case of error an empty string would be returned.
@ -600,4 +606,46 @@ Hello, &quot;world&quot;!
&apos;foo&apos; &apos;foo&apos;
``` ```
## decodeXMLComponent {#decode-xml-component}
Replaces XML predefined entities with characters. Predefined entities are `&quot;` `&amp;` `&apos;` `&gt;` `&lt;`
This function also replaces numeric character references with Unicode characters. Both decimal (like `&#10003;`) and hexadecimal (`&#x2713;`) forms are supported.
**Syntax**
``` sql
decodeXMLComponent(x)
```
**Parameters**
- `x` — A sequence of characters. [String](../../sql-reference/data-types/string.md).
**Returned value**
- The sequence of characters after replacement.
Type: [String](../../sql-reference/data-types/string.md).
**Example**
Query:
``` sql
SELECT decodeXMLComponent('&apos;foo&apos;');
SELECT decodeXMLComponent('&lt; &#x3A3; &gt;');
```
Result:
``` text
'foo'
< Σ >
```
**See Also**
- [List of XML and HTML character entity references](https://en.wikipedia.org/wiki/List_of_XML_and_HTML_character_entity_references)
[Original article](https://clickhouse.tech/docs/en/query_language/functions/string_functions/) <!--hide--> [Original article](https://clickhouse.tech/docs/en/query_language/functions/string_functions/) <!--hide-->

View File

@ -36,10 +36,14 @@ The behavior of functions for the [NaN and Inf](../../sql-reference/data-types/f
**Example** **Example**
Query:
``` sql ``` sql
SELECT toInt64(nan), toInt32(32), toInt16('16'), toInt8(8.8) SELECT toInt64(nan), toInt32(32), toInt16('16'), toInt8(8.8);
``` ```
Result:
``` text ``` text
┌─────────toInt64(nan)─┬─toInt32(32)─┬─toInt16('16')─┬─toInt8(8.8)─┐ ┌─────────toInt64(nan)─┬─toInt32(32)─┬─toInt16('16')─┬─toInt8(8.8)─┐
│ -9223372036854775808 │ 32 │ 16 │ 8 │ │ -9223372036854775808 │ 32 │ 16 │ 8 │
@ -52,10 +56,14 @@ It takes an argument of type String and tries to parse it into Int (8 \| 16 \| 3
**Example** **Example**
Query:
``` sql ``` sql
select toInt64OrZero('123123'), toInt8OrZero('123qwe123') SELECT toInt64OrZero('123123'), toInt8OrZero('123qwe123');
``` ```
Result:
``` text ``` text
┌─toInt64OrZero('123123')─┬─toInt8OrZero('123qwe123')─┐ ┌─toInt64OrZero('123123')─┬─toInt8OrZero('123qwe123')─┐
│ 123123 │ 0 │ │ 123123 │ 0 │
@ -68,10 +76,14 @@ It takes an argument of type String and tries to parse it into Int (8 \| 16 \| 3
**Example** **Example**
Query:
``` sql ``` sql
select toInt64OrNull('123123'), toInt8OrNull('123qwe123') SELECT toInt64OrNull('123123'), toInt8OrNull('123qwe123');
``` ```
Result:
``` text ``` text
┌─toInt64OrNull('123123')─┬─toInt8OrNull('123qwe123')─┐ ┌─toInt64OrNull('123123')─┬─toInt8OrNull('123qwe123')─┐
│ 123123 │ ᴺᵁᴸᴸ │ │ 123123 │ ᴺᵁᴸᴸ │
@ -102,10 +114,14 @@ The behavior of functions for negative agruments and for the [NaN and Inf](../..
**Example** **Example**
Query:
``` sql ``` sql
SELECT toUInt64(nan), toUInt32(-32), toUInt16('16'), toUInt8(8.8) SELECT toUInt64(nan), toUInt32(-32), toUInt16('16'), toUInt8(8.8);
``` ```
Result:
``` text ``` text
┌───────toUInt64(nan)─┬─toUInt32(-32)─┬─toUInt16('16')─┬─toUInt8(8.8)─┐ ┌───────toUInt64(nan)─┬─toUInt32(-32)─┬─toUInt16('16')─┬─toUInt8(8.8)─┐
│ 9223372036854775808 │ 4294967264 │ 16 │ 8 │ │ 9223372036854775808 │ 4294967264 │ 16 │ 8 │
@ -124,6 +140,8 @@ SELECT toUInt64(nan), toUInt32(-32), toUInt16('16'), toUInt8(8.8)
## toDate {#todate} ## toDate {#todate}
Alias: `DATE`.
## toDateOrZero {#todateorzero} ## toDateOrZero {#todateorzero}
## toDateOrNull {#todateornull} ## toDateOrNull {#todateornull}
@ -168,20 +186,28 @@ A value in the `Nullable(Decimal(P,S))` data type. The value contains:
**Examples** **Examples**
Query:
``` sql ``` sql
SELECT toDecimal32OrNull(toString(-1.111), 5) AS val, toTypeName(val) SELECT toDecimal32OrNull(toString(-1.111), 5) AS val, toTypeName(val);
``` ```
Result:
``` text ``` text
┌──────val─┬─toTypeName(toDecimal32OrNull(toString(-1.111), 5))─┐ ┌──────val─┬─toTypeName(toDecimal32OrNull(toString(-1.111), 5))─┐
│ -1.11100 │ Nullable(Decimal(9, 5)) │ │ -1.11100 │ Nullable(Decimal(9, 5)) │
└──────────┴────────────────────────────────────────────────────┘ └──────────┴────────────────────────────────────────────────────┘
``` ```
Query:
``` sql ``` sql
SELECT toDecimal32OrNull(toString(-1.111), 2) AS val, toTypeName(val) SELECT toDecimal32OrNull(toString(-1.111), 2) AS val, toTypeName(val);
``` ```
Result:
``` text ``` text
┌──val─┬─toTypeName(toDecimal32OrNull(toString(-1.111), 2))─┐ ┌──val─┬─toTypeName(toDecimal32OrNull(toString(-1.111), 2))─┐
│ ᴺᵁᴸᴸ │ Nullable(Decimal(9, 2)) │ │ ᴺᵁᴸᴸ │ Nullable(Decimal(9, 2)) │
@ -213,20 +239,28 @@ A value in the `Nullable(Decimal(P,S))` data type. The value contains:
**Example** **Example**
Query:
``` sql ``` sql
SELECT toDecimal32OrZero(toString(-1.111), 5) AS val, toTypeName(val) SELECT toDecimal32OrZero(toString(-1.111), 5) AS val, toTypeName(val);
``` ```
Result:
``` text ``` text
┌──────val─┬─toTypeName(toDecimal32OrZero(toString(-1.111), 5))─┐ ┌──────val─┬─toTypeName(toDecimal32OrZero(toString(-1.111), 5))─┐
│ -1.11100 │ Decimal(9, 5) │ │ -1.11100 │ Decimal(9, 5) │
└──────────┴────────────────────────────────────────────────────┘ └──────────┴────────────────────────────────────────────────────┘
``` ```
Query:
``` sql ``` sql
SELECT toDecimal32OrZero(toString(-1.111), 2) AS val, toTypeName(val) SELECT toDecimal32OrZero(toString(-1.111), 2) AS val, toTypeName(val);
``` ```
Result:
``` text ``` text
┌──val─┬─toTypeName(toDecimal32OrZero(toString(-1.111), 2))─┐ ┌──val─┬─toTypeName(toDecimal32OrZero(toString(-1.111), 2))─┐
│ 0.00 │ Decimal(9, 2) │ │ 0.00 │ Decimal(9, 2) │
@ -258,12 +292,18 @@ Conversion between numeric types uses the same rules as assignments between diff
Additionally, the toString function of the DateTime argument can take a second String argument containing the name of the time zone. Example: `Asia/Yekaterinburg` In this case, the time is formatted according to the specified time zone. Additionally, the toString function of the DateTime argument can take a second String argument containing the name of the time zone. Example: `Asia/Yekaterinburg` In this case, the time is formatted according to the specified time zone.
**Example**
Query:
``` sql ``` sql
SELECT SELECT
now() AS now_local, now() AS now_local,
toString(now(), 'Asia/Yekaterinburg') AS now_yekat toString(now(), 'Asia/Yekaterinburg') AS now_yekat;
``` ```
Result:
``` text ``` text
┌───────────now_local─┬─now_yekat───────────┐ ┌───────────now_local─┬─now_yekat───────────┐
│ 2016-06-15 00:11:21 │ 2016-06-15 02:11:21 │ │ 2016-06-15 00:11:21 │ 2016-06-15 02:11:21 │
@ -281,36 +321,81 @@ If the string has fewer bytes than N, it is padded with null bytes to the right.
Accepts a String or FixedString argument. Returns the String with the content truncated at the first zero byte found. Accepts a String or FixedString argument. Returns the String with the content truncated at the first zero byte found.
Example: **Example**
Query:
``` sql ``` sql
SELECT toFixedString('foo', 8) AS s, toStringCutToZero(s) AS s_cut SELECT toFixedString('foo', 8) AS s, toStringCutToZero(s) AS s_cut;
``` ```
Result:
``` text ``` text
┌─s─────────────┬─s_cut─┐ ┌─s─────────────┬─s_cut─┐
│ foo\0\0\0\0\0 │ foo │ │ foo\0\0\0\0\0 │ foo │
└───────────────┴───────┘ └───────────────┴───────┘
``` ```
Query:
``` sql ``` sql
SELECT toFixedString('foo\0bar', 8) AS s, toStringCutToZero(s) AS s_cut SELECT toFixedString('foo\0bar', 8) AS s, toStringCutToZero(s) AS s_cut;
``` ```
Result:
``` text ``` text
┌─s──────────┬─s_cut─┐ ┌─s──────────┬─s_cut─┐
│ foo\0bar\0 │ foo │ │ foo\0bar\0 │ foo │
└────────────┴───────┘ └────────────┴───────┘
``` ```
## reinterpretAsUInt(8\|16\|32\|64) {#reinterpretasuint8163264}
## reinterpretAsInt(8\|16\|32\|64) {#reinterpretasint8163264}
## reinterpretAsFloat(32\|64) {#reinterpretasfloat3264}
## reinterpretAsDate {#reinterpretasdate}
## reinterpretAsDateTime {#reinterpretasdatetime}
These functions accept a string and interpret the bytes placed at the beginning of the string as a number in host order (little endian). If the string isnt long enough, the functions work as if the string is padded with the necessary number of null bytes. If the string is longer than needed, the extra bytes are ignored. A date is interpreted as the number of days since the beginning of the Unix Epoch, and a date with time is interpreted as the number of seconds since the beginning of the Unix Epoch.
## reinterpretAsString {#type_conversion_functions-reinterpretAsString}
This function accepts a number or date or date with time, and returns a string containing bytes representing the corresponding value in host order (little endian). Null bytes are dropped from the end. For example, a UInt32 type value of 255 is a string that is one byte long.
## reinterpretAsFixedString {#reinterpretasfixedstring}
This function accepts a number or date or date with time, and returns a FixedString containing bytes representing the corresponding value in host order (little endian). Null bytes are dropped from the end. For example, a UInt32 type value of 255 is a FixedString that is one byte long.
## reinterpretAsUUID {#reinterpretasuuid}
This function accepts 16 bytes string, and returns UUID containing bytes representing the corresponding value in network byte order (big-endian). If the string isn't long enough, the functions work as if the string is padded with the necessary number of null bytes to the end. If the string longer than 16 bytes, the extra bytes at the end are ignored.
**Syntax**
``` sql
reinterpretAsUUID(fixed_string)
```
**Parameters**
- `fixed_string` — Big-endian byte string. [FixedString](../../sql-reference/data-types/fixedstring.md#fixedstring).
## reinterpret(x, T) {#type_conversion_function-reinterpret} ## reinterpret(x, T) {#type_conversion_function-reinterpret}
Performs byte reinterpretation of x as t data type. **Returned value**
Following reinterpretations are allowed: - The UUID type value. [UUID](../../sql-reference/data-types/uuid.md#uuid-data-type).
1. Any type that has fixed size and value of that type can be represented continuously into FixedString.
2. Any type that if value of that type can be represented continuously into String. Null bytes are dropped from the end. For example, a UInt32 type value of 255 is a string that is one byte long. **Examples**
3. FixedString, String, types that can be interpreted as numeric (Integers, Float, Date, DateTime, UUID) into types that can be interpreted as numeric (Integers, Float, Date, DateTime, UUID) into FixedString,
String to UUID.
Query:
``` sql ``` sql
SELECT reinterpret(toInt8(-1), 'UInt8') as int_to_uint, SELECT reinterpret(toInt8(-1), 'UInt8') as int_to_uint,
@ -318,39 +403,45 @@ SELECT reinterpret(toInt8(-1), 'UInt8') as int_to_uint,
reinterpret('1', 'UInt32') as string_to_int; reinterpret('1', 'UInt32') as string_to_int;
``` ```
Result:
``` text ``` text
┌─int_to_uint─┬─int_to_float─┬─string_to_int─┐ ┌─reinterpretAsUUID(reverse(unhex('000102030405060708090a0b0c0d0e0f')))─┐
255 │ 1e-45 │ 49 08090a0b-0c0d-0e0f-0001-020304050607
└─────────────┴──────────────┴───────────────┘ └───────────────────────────────────────────────────────────────────────┘
``` ```
## reinterpretAsUInt(8\|16\|32\|64\|256) {#reinterpretAsUInt8163264256} Going back and forth from String to UUID.
## reinterpretAsInt(8\|16\|32\|64\|128\|256) {#reinterpretAsInt8163264128256} Query:
## reinterpretAsDecimal(32\|64\|128\|256) {#reinterpretAsDecimal3264128256} ``` sql
WITH
generateUUIDv4() AS uuid,
identity(lower(hex(reverse(reinterpretAsString(uuid))))) AS str,
reinterpretAsUUID(reverse(unhex(str))) AS uuid2
SELECT uuid = uuid2;
```
## reinterpretAsFloat(32\|64) {#type_conversion_function-reinterpretAsFloat} Result:
## reinterpretAsDate {#type_conversion_function-reinterpretAsDate} ``` text
┌─equals(uuid, uuid2)─┐
## reinterpretAsDateTime {#type_conversion_function-reinterpretAsDateTime} │ 1 │
└─────────────────────┘
## reinterpretAsDateTime64 {#type_conversion_function-reinterpretAsDateTime64} ```
## reinterpretAsString {#type_conversion_function-reinterpretAsString}
## reinterpretAsFixedString {#type_conversion_function-reinterpretAsFixedString}
## reinterpretAsUUID {#type_conversion_function-reinterpretAsUUID}
These functions are aliases for `reinterpret` function.
## CAST(x, T) {#type_conversion_function-cast} ## CAST(x, T) {#type_conversion_function-cast}
Converts x to the t data type. The syntax CAST(x AS t) is also supported. Converts input value `x` to the `T` data type.
Example: The syntax `CAST(x AS t)` is also supported.
Note, that if value `x` does not fit the bounds of type T, the function overflows. For example, CAST(-1, 'UInt8') returns 255.
**Example**
Query:
``` sql ``` sql
SELECT SELECT
@ -358,9 +449,11 @@ SELECT
CAST(timestamp AS DateTime) AS datetime, CAST(timestamp AS DateTime) AS datetime,
CAST(timestamp AS Date) AS date, CAST(timestamp AS Date) AS date,
CAST(timestamp, 'String') AS string, CAST(timestamp, 'String') AS string,
CAST(timestamp, 'FixedString(22)') AS fixed_string CAST(timestamp, 'FixedString(22)') AS fixed_string;
``` ```
Result:
``` text ``` text
┌─timestamp───────────┬────────────datetime─┬───────date─┬─string──────────────┬─fixed_string──────────────┐ ┌─timestamp───────────┬────────────datetime─┬───────date─┬─string──────────────┬─fixed_string──────────────┐
│ 2016-06-15 23:00:00 │ 2016-06-15 23:00:00 │ 2016-06-15 │ 2016-06-15 23:00:00 │ 2016-06-15 23:00:00\0\0\0 │ │ 2016-06-15 23:00:00 │ 2016-06-15 23:00:00 │ 2016-06-15 │ 2016-06-15 23:00:00 │ 2016-06-15 23:00:00\0\0\0 │
@ -369,12 +462,18 @@ SELECT
Conversion to FixedString(N) only works for arguments of type String or FixedString(N). Conversion to FixedString(N) only works for arguments of type String or FixedString(N).
Type conversion to [Nullable](../../sql-reference/data-types/nullable.md) and back is supported. Example: Type conversion to [Nullable](../../sql-reference/data-types/nullable.md) and back is supported.
**Example**
Query:
``` sql ``` sql
SELECT toTypeName(x) FROM t_null SELECT toTypeName(x) FROM t_null;
``` ```
Result:
``` text ``` text
┌─toTypeName(x)─┐ ┌─toTypeName(x)─┐
│ Int8 │ │ Int8 │
@ -382,10 +481,14 @@ SELECT toTypeName(x) FROM t_null
└───────────────┘ └───────────────┘
``` ```
Query:
``` sql ``` sql
SELECT toTypeName(CAST(x, 'Nullable(UInt16)')) FROM t_null SELECT toTypeName(CAST(x, 'Nullable(UInt16)')) FROM t_null;
``` ```
Result:
``` text ``` text
┌─toTypeName(CAST(x, 'Nullable(UInt16)'))─┐ ┌─toTypeName(CAST(x, 'Nullable(UInt16)'))─┐
│ Nullable(UInt16) │ │ Nullable(UInt16) │
@ -399,15 +502,19 @@ SELECT toTypeName(CAST(x, 'Nullable(UInt16)')) FROM t_null
## accurateCast(x, T) {#type_conversion_function-accurate-cast} ## accurateCast(x, T) {#type_conversion_function-accurate-cast}
Converts x to the t data type. The differente from cast(x, T) is that accurateCast Converts `x` to the `T` data type.
does not allow overflow of numeric types during cast if type value x does not fit
bounds of type T. The difference from [cast(x, T)](#type_conversion_function-cast) is that `accurateCast` does not allow overflow of numeric types during cast if type value `x` does not fit the bounds of type `T`. For example, `accurateCast(-1, 'UInt8')` throws an exception.
**Example**
Query:
Example
``` sql ``` sql
SELECT cast(-1, 'UInt8') as uint8; SELECT cast(-1, 'UInt8') as uint8;
``` ```
Result:
``` text ``` text
┌─uint8─┐ ┌─uint8─┐
@ -415,38 +522,46 @@ SELECT cast(-1, 'UInt8') as uint8;
└───────┘ └───────┘
``` ```
Query:
```sql ```sql
SELECT accurateCast(-1, 'UInt8') as uint8; SELECT accurateCast(-1, 'UInt8') as uint8;
``` ```
Result:
``` text ``` text
Code: 70. DB::Exception: Received from localhost:9000. DB::Exception: Value in column Int8 cannot be safely converted into type UInt8: While processing accurateCast(-1, 'UInt8') AS uint8. Code: 70. DB::Exception: Received from localhost:9000. DB::Exception: Value in column Int8 cannot be safely converted into type UInt8: While processing accurateCast(-1, 'UInt8') AS uint8.
``` ```
## accurateCastOrNull(x, T) {#type_conversion_function-accurate-cast_or_null} ## accurateCastOrNull(x, T) {#type_conversion_function-accurate-cast_or_null}
Converts x to the t data type. Always returns nullable type and returns NULL Converts input value `x` to the specified data type `T`. Always returns [Nullable](../../sql-reference/data-types/nullable.md) type and returns [NULL](../../sql-reference/syntax.md#null-literal) if the casted value is not representable in the target type.
if the casted value is not representable in the target type.
Example: **Syntax**
```sql
accurateCastOrNull(x, T)
```
**Parameters**
- `x` — Input value.
- `T` — The name of the returned data type.
**Returned value**
- The value, converted to the specified data type `T`.
**Example**
Query:
``` sql ``` sql
SELECT SELECT toTypeName(accurateCastOrNull(5, 'UInt8'));
accurateCastOrNull(-1, 'UInt8') as uint8,
accurateCastOrNull(128, 'Int8') as int8,
accurateCastOrNull('Test', 'FixedString(2)') as fixed_string
``` ```
``` text Result:
┌─uint8─┬─int8─┬─fixed_string─┐
│ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │
└───────┴──────┴──────────────┘┘
```
``` sql
SELECT toTypeName(accurateCastOrNull(5, 'UInt8'))
```
``` text ``` text
┌─toTypeName(accurateCastOrNull(5, 'UInt8'))─┐ ┌─toTypeName(accurateCastOrNull(5, 'UInt8'))─┐
@ -454,6 +569,23 @@ SELECT toTypeName(accurateCastOrNull(5, 'UInt8'))
└────────────────────────────────────────────┘ └────────────────────────────────────────────┘
``` ```
Query:
``` sql
SELECT
accurateCastOrNull(-1, 'UInt8') as uint8,
accurateCastOrNull(128, 'Int8') as int8,
accurateCastOrNull('Test', 'FixedString(2)') as fixed_string;
```
Result:
``` text
┌─uint8─┬─int8─┬─fixed_string─┐
│ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │
└───────┴──────┴──────────────┘
```
## toInterval(Year\|Quarter\|Month\|Week\|Day\|Hour\|Minute\|Second) {#function-tointerval} ## toInterval(Year\|Quarter\|Month\|Week\|Day\|Hour\|Minute\|Second) {#function-tointerval}
Converts a Number type argument to an [Interval](../../sql-reference/data-types/special-data-types/interval.md) data type. Converts a Number type argument to an [Interval](../../sql-reference/data-types/special-data-types/interval.md) data type.
@ -481,6 +613,8 @@ toIntervalYear(number)
**Example** **Example**
Query:
``` sql ``` sql
WITH WITH
toDate('2019-01-01') AS date, toDate('2019-01-01') AS date,
@ -488,9 +622,11 @@ WITH
toIntervalWeek(1) AS interval_to_week toIntervalWeek(1) AS interval_to_week
SELECT SELECT
date + interval_week, date + interval_week,
date + interval_to_week date + interval_to_week;
``` ```
Result:
``` text ``` text
┌─plus(date, interval_week)─┬─plus(date, interval_to_week)─┐ ┌─plus(date, interval_week)─┬─plus(date, interval_to_week)─┐
│ 2019-01-08 │ 2019-01-08 │ │ 2019-01-08 │ 2019-01-08 │
@ -506,7 +642,7 @@ The function parses [ISO 8601](https://en.wikipedia.org/wiki/ISO_8601), [RFC 112
**Syntax** **Syntax**
``` sql ``` sql
parseDateTimeBestEffort(time_string [, time_zone]); parseDateTimeBestEffort(time_string [, time_zone])
``` ```
**Arguments** **Arguments**
@ -549,7 +685,7 @@ Query:
``` sql ``` sql
SELECT parseDateTimeBestEffort('Sat, 18 Aug 2018 07:22:16 GMT', 'Europe/Moscow') SELECT parseDateTimeBestEffort('Sat, 18 Aug 2018 07:22:16 GMT', 'Europe/Moscow')
AS parseDateTimeBestEffort AS parseDateTimeBestEffort;
``` ```
Result: Result:
@ -564,7 +700,7 @@ Query:
``` sql ``` sql
SELECT parseDateTimeBestEffort('1284101485') SELECT parseDateTimeBestEffort('1284101485')
AS parseDateTimeBestEffort AS parseDateTimeBestEffort;
``` ```
Result: Result:
@ -579,7 +715,7 @@ Query:
``` sql ``` sql
SELECT parseDateTimeBestEffort('2018-12-12 10:12:12') SELECT parseDateTimeBestEffort('2018-12-12 10:12:12')
AS parseDateTimeBestEffort AS parseDateTimeBestEffort;
``` ```
Result: Result:
@ -593,7 +729,7 @@ Result:
Query: Query:
``` sql ``` sql
SELECT parseDateTimeBestEffort('10 20:19') SELECT parseDateTimeBestEffort('10 20:19');
``` ```
Result: Result:
@ -613,12 +749,12 @@ Result:
## parseDateTimeBestEffortUS {#parsedatetimebesteffortUS} ## parseDateTimeBestEffortUS {#parsedatetimebesteffortUS}
This function is similar to [parseDateTimeBestEffort](#parsedatetimebesteffort), the only difference is that this function prefers US date format (`MM/DD/YYYY` etc.) in case of ambiguity. This function is similar to [parseDateTimeBestEffort](#parsedatetimebesteffort), the only difference is that this function prefers US date format (`MM/DD/YYYY` etc.) in case of ambiguity.
**Syntax** **Syntax**
``` sql ``` sql
parseDateTimeBestEffortUS(time_string [, time_zone]); parseDateTimeBestEffortUS(time_string [, time_zone])
``` ```
**Arguments** **Arguments**
@ -693,6 +829,178 @@ Same as for [parseDateTimeBestEffort](#parsedatetimebesteffort) except that it r
Same as for [parseDateTimeBestEffort](#parsedatetimebesteffort) except that it returns zero date or zero date time when it encounters a date format that cannot be processed. Same as for [parseDateTimeBestEffort](#parsedatetimebesteffort) except that it returns zero date or zero date time when it encounters a date format that cannot be processed.
## parseDateTimeBestEffortUSOrNull {#parsedatetimebesteffortusornull}
Same as [parseDateTimeBestEffortUS](#parsedatetimebesteffortUS) function except that it returns `NULL` when it encounters a date format that cannot be processed.
**Syntax**
``` sql
parseDateTimeBestEffortUSOrNull(time_string[, time_zone])
```
**Parameters**
- `time_string` — String containing a date or date with time to convert. The date must be in the US date format (`MM/DD/YYYY`, etc). [String](../../sql-reference/data-types/string.md).
- `time_zone` — [Timezone](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-timezone). The function parses `time_string` according to the timezone. Optional. [String](../../sql-reference/data-types/string.md).
**Supported non-standard formats**
- A string containing 9..10 digit [unix timestamp](https://en.wikipedia.org/wiki/Unix_time).
- A string with a date and a time components: `YYYYMMDDhhmmss`, `MM/DD/YYYY hh:mm:ss`, `MM-DD-YY hh:mm`, `YYYY-MM-DD hh:mm:ss`, etc.
- A string with a date, but no time component: `YYYY`, `YYYYMM`, `YYYY*MM`, `MM/DD/YYYY`, `MM-DD-YY`, etc.
- A string with a day and time: `DD`, `DD hh`, `DD hh:mm`. In this case, `YYYY-MM` are substituted with `2000-01`.
- A string that includes date and time along with timezone offset information: `YYYY-MM-DD hh:mm:ss ±h:mm`, etc. For example, `2020-12-12 17:36:00 -5:00`.
**Returned values**
- `time_string` converted to the [DateTime](../../sql-reference/data-types/datetime.md) data type.
- `NULL` if the input string cannot be converted to the `DateTime` data type.
**Examples**
Query:
``` sql
SELECT parseDateTimeBestEffortUSOrNull('02/10/2021 21:12:57') AS parseDateTimeBestEffortUSOrNull;
```
Result:
``` text
┌─parseDateTimeBestEffortUSOrNull─┐
│ 2021-02-10 21:12:57 │
└─────────────────────────────────┘
```
Query:
``` sql
SELECT parseDateTimeBestEffortUSOrNull('02-10-2021 21:12:57 GMT', 'Europe/Moscow') AS parseDateTimeBestEffortUSOrNull;
```
Result:
``` text
┌─parseDateTimeBestEffortUSOrNull─┐
│ 2021-02-11 00:12:57 │
└─────────────────────────────────┘
```
Query:
``` sql
SELECT parseDateTimeBestEffortUSOrNull('02.10.2021') AS parseDateTimeBestEffortUSOrNull;
```
Result:
``` text
┌─parseDateTimeBestEffortUSOrNull─┐
│ 2021-02-10 00:00:00 │
└─────────────────────────────────┘
```
Query:
``` sql
SELECT parseDateTimeBestEffortUSOrNull('10.2021') AS parseDateTimeBestEffortUSOrNull;
```
Result:
``` text
┌─parseDateTimeBestEffortUSOrNull─┐
│ ᴺᵁᴸᴸ │
└─────────────────────────────────┘
```
## parseDateTimeBestEffortUSOrZero {#parsedatetimebesteffortusorzero}
Same as [parseDateTimeBestEffortUS](#parsedatetimebesteffortUS) function except that it returns zero date (`1970-01-01`) or zero date with time (`1970-01-01 00:00:00`) when it encounters a date format that cannot be processed.
**Syntax**
``` sql
parseDateTimeBestEffortUSOrZero(time_string[, time_zone])
```
**Parameters**
- `time_string` — String containing a date or date with time to convert. The date must be in the US date format (`MM/DD/YYYY`, etc). [String](../../sql-reference/data-types/string.md).
- `time_zone` — [Timezone](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-timezone). The function parses `time_string` according to the timezone. Optional. [String](../../sql-reference/data-types/string.md).
**Supported non-standard formats**
- A string containing 9..10 digit [unix timestamp](https://en.wikipedia.org/wiki/Unix_time).
- A string with a date and a time components: `YYYYMMDDhhmmss`, `MM/DD/YYYY hh:mm:ss`, `MM-DD-YY hh:mm`, `YYYY-MM-DD hh:mm:ss`, etc.
- A string with a date, but no time component: `YYYY`, `YYYYMM`, `YYYY*MM`, `MM/DD/YYYY`, `MM-DD-YY`, etc.
- A string with a day and time: `DD`, `DD hh`, `DD hh:mm`. In this case, `YYYY-MM` are substituted with `2000-01`.
- A string that includes date and time along with timezone offset information: `YYYY-MM-DD hh:mm:ss ±h:mm`, etc. For example, `2020-12-12 17:36:00 -5:00`.
**Returned values**
- `time_string` converted to the [DateTime](../../sql-reference/data-types/datetime.md) data type.
- Zero date or zero date with time if the input string cannot be converted to the `DateTime` data type.
**Examples**
Query:
``` sql
SELECT parseDateTimeBestEffortUSOrZero('02/10/2021 21:12:57') AS parseDateTimeBestEffortUSOrZero;
```
Result:
``` text
┌─parseDateTimeBestEffortUSOrZero─┐
│ 2021-02-10 21:12:57 │
└─────────────────────────────────┘
```
Query:
``` sql
SELECT parseDateTimeBestEffortUSOrZero('02-10-2021 21:12:57 GMT', 'Europe/Moscow') AS parseDateTimeBestEffortUSOrZero;
```
Result:
``` text
┌─parseDateTimeBestEffortUSOrZero─┐
│ 2021-02-11 00:12:57 │
└─────────────────────────────────┘
```
Query:
``` sql
SELECT parseDateTimeBestEffortUSOrZero('02.10.2021') AS parseDateTimeBestEffortUSOrZero;
```
Result:
``` text
┌─parseDateTimeBestEffortUSOrZero─┐
│ 2021-02-10 00:00:00 │
└─────────────────────────────────┘
```
Query:
``` sql
SELECT parseDateTimeBestEffortUSOrZero('02.2021') AS parseDateTimeBestEffortUSOrZero;
```
Result:
``` text
┌─parseDateTimeBestEffortUSOrZero─┐
│ 1970-01-01 00:00:00 │
└─────────────────────────────────┘
```
## toLowCardinality {#tolowcardinality} ## toLowCardinality {#tolowcardinality}
Converts input parameter to the [LowCardianlity](../../sql-reference/data-types/lowcardinality.md) version of same data type. Converts input parameter to the [LowCardianlity](../../sql-reference/data-types/lowcardinality.md) version of same data type.
@ -720,7 +1028,7 @@ Type: `LowCardinality(expr_result_type)`
Query: Query:
``` sql ``` sql
SELECT toLowCardinality('1') SELECT toLowCardinality('1');
``` ```
Result: Result:
@ -759,7 +1067,7 @@ Query:
``` sql ``` sql
WITH toDateTime64('2019-09-16 19:20:12.345678910', 6) AS dt64 WITH toDateTime64('2019-09-16 19:20:12.345678910', 6) AS dt64
SELECT toUnixTimestamp64Milli(dt64) SELECT toUnixTimestamp64Milli(dt64);
``` ```
Result: Result:
@ -772,7 +1080,7 @@ Result:
``` sql ``` sql
WITH toDateTime64('2019-09-16 19:20:12.345678910', 6) AS dt64 WITH toDateTime64('2019-09-16 19:20:12.345678910', 6) AS dt64
SELECT toUnixTimestamp64Nano(dt64) SELECT toUnixTimestamp64Nano(dt64);
``` ```
Result: Result:
@ -806,13 +1114,17 @@ fromUnixTimestamp64Milli(value [, ti])
- `value` converted to the `DateTime64` data type. - `value` converted to the `DateTime64` data type.
**Examples** **Example**
Query:
``` sql ``` sql
WITH CAST(1234567891011, 'Int64') AS i64 WITH CAST(1234567891011, 'Int64') AS i64
SELECT fromUnixTimestamp64Milli(i64, 'UTC') SELECT fromUnixTimestamp64Milli(i64, 'UTC');
``` ```
Result:
``` text ``` text
┌─fromUnixTimestamp64Milli(i64, 'UTC')─┐ ┌─fromUnixTimestamp64Milli(i64, 'UTC')─┐
│ 2009-02-13 23:31:31.011 │ │ 2009-02-13 23:31:31.011 │
@ -844,7 +1156,7 @@ Query:
``` sql ``` sql
SELECT formatRow('CSV', number, 'good') SELECT formatRow('CSV', number, 'good')
FROM numbers(3) FROM numbers(3);
``` ```
Result: Result:
@ -885,7 +1197,7 @@ Query:
``` sql ``` sql
SELECT formatRowNoNewline('CSV', number, 'good') SELECT formatRowNoNewline('CSV', number, 'good')
FROM numbers(3) FROM numbers(3);
``` ```
Result: Result:

View File

@ -13,10 +13,28 @@ SELECT (CounterID, UserID) IN ((34, 123), (101500, 456)) FROM ...
If the left side is a single column that is in the index, and the right side is a set of constants, the system uses the index for processing the query. If the left side is a single column that is in the index, and the right side is a set of constants, the system uses the index for processing the query.
Dont list too many values explicitly (i.e. millions). If a data set is large, put it in a temporary table (for example, see the section “External data for query processing”), then use a subquery. Dont list too many values explicitly (i.e. millions). If a data set is large, put it in a temporary table (for example, see the section [External data for query processing](../../engines/table-engines/special/external-data.md)), then use a subquery.
The right side of the operator can be a set of constant expressions, a set of tuples with constant expressions (shown in the examples above), or the name of a database table or SELECT subquery in brackets. The right side of the operator can be a set of constant expressions, a set of tuples with constant expressions (shown in the examples above), or the name of a database table or SELECT subquery in brackets.
ClickHouse allows types to differ in the left and the right parts of `IN` subquery. In this case it converts the left side value to the type of the right side, as if the [accurateCastOrNull](../functions/type-conversion-functions.md#type_conversion_function-accurate-cast_or_null) function is applied. That means, that the data type becomes [Nullable](../../sql-reference/data-types/nullable.md), and if the conversion cannot be performed, it returns [NULL](../../sql-reference/syntax.md#null-literal).
**Example**
Query:
``` sql
SELECT '1' IN (SELECT 1);
```
Result:
``` text
┌─in('1', _subquery49)─┐
│ 1 │
└──────────────────────┘
```
If the right side of the operator is the name of a table (for example, `UserID IN users`), this is equivalent to the subquery `UserID IN (SELECT * FROM users)`. Use this when working with external data that is sent along with the query. For example, the query can be sent together with a set of user IDs loaded to the users temporary table, which should be filtered. If the right side of the operator is the name of a table (for example, `UserID IN users`), this is equivalent to the subquery `UserID IN (SELECT * FROM users)`. Use this when working with external data that is sent along with the query. For example, the query can be sent together with a set of user IDs loaded to the users temporary table, which should be filtered.
If the right side of the operator is a table name that has the Set engine (a prepared data set that is always in RAM), the data set will not be created over again for each query. If the right side of the operator is a table name that has the Set engine (a prepared data set that is always in RAM), the data set will not be created over again for each query.

View File

@ -41,7 +41,6 @@ SELECT a, b, c FROM (SELECT ...)
CREATE MATERIALIZED VIEW [IF NOT EXISTS] [db.]table_name [ON CLUSTER] [TO[db.]name] [ENGINE = engine] [POPULATE] AS SELECT ... CREATE MATERIALIZED VIEW [IF NOT EXISTS] [db.]table_name [ON CLUSTER] [TO[db.]name] [ENGINE = engine] [POPULATE] AS SELECT ...
``` ```
Materialized views store data transformed by the corresponding [SELECT](../../../sql-reference/statements/select/index.md) query. Materialized views store data transformed by the corresponding [SELECT](../../../sql-reference/statements/select/index.md) query.
When creating a materialized view without `TO [db].[table]`, you must specify `ENGINE` the table engine for storing data. When creating a materialized view without `TO [db].[table]`, you must specify `ENGINE` the table engine for storing data.
@ -65,4 +64,191 @@ Views look the same as normal tables. For example, they are listed in the result
There isnt a separate query for deleting views. To delete a view, use [DROP TABLE](../../../sql-reference/statements/drop.md). There isnt a separate query for deleting views. To delete a view, use [DROP TABLE](../../../sql-reference/statements/drop.md).
## Live View (Experimental) {#live-view}
!!! important "Important"
This is an experimental feature that may change in backwards-incompatible ways in the future releases.
Enable usage of live views and `WATCH` query using `set allow_experimental_live_view = 1`.
```sql
CREATE LIVE VIEW [IF NOT EXISTS] [db.]table_name [WITH [TIMEOUT [value_in_sec] [AND]] [REFRESH [value_in_sec]]] AS SELECT ...
```
Live views store result of the corresponding [SELECT](../../../sql-reference/statements/select/index.md) query and are updated any time the result of the query changes. Query result as well as partial result needed to combine with new data are stored in memory providing increased performance for repeated queries. Live views can provide push notifications when query result changes using the [WATCH](../../../sql-reference/statements/watch.md) query.
Live views are triggered by insert into the innermost table specified in the query.
Live views work similarly to how a query in a distributed table works. But instead of combining partial results from different servers they combine partial result from current data with partial result from the new data. When a live view query includes a subquery then the cached partial result is only stored for the innermost subquery.
!!! info "Limitations"
- [Table function](../../../sql-reference/table-functions/index.md) is not supported as the innermost table.
- Tables that do not have inserts such as a [dictionary](../../../sql-reference/dictionaries/index.md), [system table](../../../operations/system-tables/index.md), a [normal view](#normal), or a [materialized view](#materialized) will not trigger a live view.
- Only queries where one can combine partial result from the old data plus partial result from the new data will work. Live view will not work for queries that require the complete data set to compute the final result or aggregations where the state of the aggregation must be preserved.
- Does not work with replicated or distributed tables where inserts are performed on different nodes.
- Can't be triggered by multiple tables.
See [WITH REFRESH](#live-view-with-refresh) to force periodic updates of a live view that in some cases can be used as a workaround.
You can watch for changes in the live view query result using the [WATCH](../../../sql-reference/statements/watch.md) query
```sql
WATCH [db.]live_view
```
**Example:**
```sql
CREATE TABLE mt (x Int8) Engine = MergeTree ORDER BY x;
CREATE LIVE VIEW lv AS SELECT sum(x) FROM mt;
```
Watch a live view while doing a parallel insert into the source table.
```sql
WATCH lv
```
```bash
┌─sum(x)─┬─_version─┐
│ 1 │ 1 │
└────────┴──────────┘
┌─sum(x)─┬─_version─┐
│ 2 │ 2 │
└────────┴──────────┘
┌─sum(x)─┬─_version─┐
│ 6 │ 3 │
└────────┴──────────┘
...
```
```sql
INSERT INTO mt VALUES (1);
INSERT INTO mt VALUES (2);
INSERT INTO mt VALUES (3);
```
or add [EVENTS](../../../sql-reference/statements/watch.md#events-clause) clause to just get change events.
```sql
WATCH [db.]live_view EVENTS
```
**Example:**
```sql
WATCH lv EVENTS
```
```bash
┌─version─┐
│ 1 │
└─────────┘
┌─version─┐
│ 2 │
└─────────┘
┌─version─┐
│ 3 │
└─────────┘
...
```
You can execute [SELECT](../../../sql-reference/statements/select/index.md) query on a live view in the same way as for any regular view or a table. If the query result is cached it will return the result immediately without running the stored query on the underlying tables.
```sql
SELECT * FROM [db.]live_view WHERE ...
```
### Force Refresh {#live-view-alter-refresh}
You can force live view refresh using the `ALTER LIVE VIEW [db.]table_name REFRESH` statement.
### With Timeout {#live-view-with-timeout}
When a live view is create with a `WITH TIMEOUT` clause then the live view will be dropped automatically after the specified number of seconds elapse since the end of the last [WATCH](../../../sql-reference/statements/watch.md) query that was watching the live view.
```sql
CREATE LIVE VIEW [db.]table_name WITH TIMEOUT [value_in_sec] AS SELECT ...
```
If the timeout value is not specified then the value specified by the `temporary_live_view_timeout` setting is used.
**Example:**
```sql
CREATE TABLE mt (x Int8) Engine = MergeTree ORDER BY x;
CREATE LIVE VIEW lv WITH TIMEOUT 15 AS SELECT sum(x) FROM mt;
```
### With Refresh {#live-view-with-refresh}
When a live view is created with a `WITH REFRESH` clause then it will be automatically refreshed after the specified number of seconds elapse since the last refresh or trigger.
```sql
CREATE LIVE VIEW [db.]table_name WITH REFRESH [value_in_sec] AS SELECT ...
```
If the refresh value is not specified then the value specified by the `periodic_live_view_refresh` setting is used.
**Example:**
```sql
CREATE LIVE VIEW lv WITH REFRESH 5 AS SELECT now();
WATCH lv
```
```bash
┌───────────────now()─┬─_version─┐
│ 2021-02-21 08:47:05 │ 1 │
└─────────────────────┴──────────┘
┌───────────────now()─┬─_version─┐
│ 2021-02-21 08:47:10 │ 2 │
└─────────────────────┴──────────┘
┌───────────────now()─┬─_version─┐
│ 2021-02-21 08:47:15 │ 3 │
└─────────────────────┴──────────┘
```
You can combine `WITH TIMEOUT` and `WITH REFRESH` clauses using an `AND` clause.
```sql
CREATE LIVE VIEW [db.]table_name WITH TIMEOUT [value_in_sec] AND REFRESH [value_in_sec] AS SELECT ...
```
**Example:**
```sql
CREATE LIVE VIEW lv WITH TIMEOUT 15 AND REFRESH 5 AS SELECT now();
```
After 15 sec the live view will be automatically dropped if there are no active `WATCH` queries.
```sql
WATCH lv
```
```
Code: 60. DB::Exception: Received from localhost:9000. DB::Exception: Table default.lv doesn't exist..
```
### Usage
Most common uses of live view tables include:
- Providing push notifications for query result changes to avoid polling.
- Caching results of most frequent queries to provide immediate query results.
- Watching for table changes and triggering a follow-up select queries.
- Watching metrics from system tables using periodic refresh.
### Settings {#live-view-settings}
You can use the following settings to control the behaviour of live views.
- `allow_experimental_live_view` - enable live views. Default is `0`.
- `live_view_heartbeat_interval` - the heartbeat interval in seconds to indicate live query is alive. Default is `15` seconds.
- `max_live_view_insert_blocks_before_refresh` - maximum number of inserted blocks after which
mergeable blocks are dropped and query is re-executed. Default is `64` inserts.
- `temporary_live_view_timeout` - interval after which live view with timeout is deleted. Default is `5` seconds.
- `periodic_live_view_refresh` - interval after which periodically refreshed live view is forced to refresh. Default is `60` seconds.
[Original article](https://clickhouse.tech/docs/en/sql-reference/statements/create/view/) <!--hide--> [Original article](https://clickhouse.tech/docs/en/sql-reference/statements/create/view/) <!--hide-->

View File

@ -0,0 +1,106 @@
---
toc_priority: 53
toc_title: WATCH
---
# WATCH Statement (Experimental) {#watch}
!!! important "Important"
This is an experimental feature that may change in backwards-incompatible ways in the future releases.
Enable live views and `WATCH` query using `set allow_experimental_live_view = 1`.
``` sql
WATCH [db.]live_view
[EVENTS]
[LIMIT n]
[FORMAT format]
```
The `WATCH` query performs continuous data retrieval from a [live view](./create/view.md#live-view) table. Unless the `LIMIT` clause is specified it provides an infinite stream of query results from a [live view](./create/view.md#live-view).
```sql
WATCH [db.]live_view
```
The virtual `_version` column in the query result indicates the current result version.
**Example:**
```sql
CREATE LIVE VIEW lv WITH REFRESH 5 AS SELECT now();
WATCH lv
```
```bash
┌───────────────now()─┬─_version─┐
│ 2021-02-21 09:17:21 │ 1 │
└─────────────────────┴──────────┘
┌───────────────now()─┬─_version─┐
│ 2021-02-21 09:17:26 │ 2 │
└─────────────────────┴──────────┘
┌───────────────now()─┬─_version─┐
│ 2021-02-21 09:17:31 │ 3 │
└─────────────────────┴──────────┘
...
```
By default, the requested data is returned to the client, while in conjunction with [INSERT INTO](../../sql-reference/statements/insert-into.md) it can be forwarded to a different table.
```sql
INSERT INTO [db.]table WATCH [db.]live_view ...
```
## EVENTS Clause {#events-clause}
The `EVENTS` clause can be used to obtain a short form of the `WATCH` query where instead of the query result you will just get the latest query result version.
```sql
WATCH [db.]live_view EVENTS
```
**Example:**
```sql
CREATE LIVE VIEW lv WITH REFRESH 5 AS SELECT now();
WATCH lv EVENTS
```
```bash
┌─version─┐
│ 1 │
└─────────┘
┌─version─┐
│ 2 │
└─────────┘
...
```
## LIMIT Clause {#limit-clause}
The `LIMIT n` clause species the number of updates the `WATCH` query should wait for before terminating. By default there is no limit on the number of updates and therefore the query will not terminate. The value of `0` indicates that the `WATCH` query should not wait for any new query results and therefore will return immediately once query is evaluated.
```sql
WATCH [db.]live_view LIMIT 1
```
**Example:**
```sql
CREATE LIVE VIEW lv WITH REFRESH 5 AS SELECT now();
WATCH lv EVENTS LIMIT 1
```
```bash
┌─version─┐
│ 1 │
└─────────┘
```
## FORMAT Clause {#format-clause}
The `FORMAT` clause works the same way as for the [SELECT](../../sql-reference/statements/select/format.md#format-clause).
!!! info "Note"
The [JSONEachRowWithProgress](../../../interfaces/formats/#jsoneachrowwithprogress) format should be used when watching [live view](./create/view.md#live-view) tables over the HTTP interface. The progress messages will be added to the output to keep the long-lived HTTP connection alive until the query result changes. The interval between progress messages is controlled using the [live_view_heartbeat_interval](./create/view.md#live-view-settings) setting.

View File

@ -44,7 +44,7 @@ The rest of the conditions and the `LIMIT` sampling constraint are executed in C
A table object with the same columns as the original MySQL table. A table object with the same columns as the original MySQL table.
!!! info "Note" !!! info "Note"
In the `INSERT` query to distinguish table function `mysql(...)` from table name with column names list you must use keywords `FUNCTION` or `TABLE FUNCTION`. See examples below. In the `INSERT` query to distinguish table function `mysql(...)` from table name with column names list, you must use keywords `FUNCTION` or `TABLE FUNCTION`. See examples below.
**Examples** **Examples**

View File

@ -63,7 +63,7 @@ SELECT * FROM file_engine_table
## Использование движка в Clickhouse-local {#ispolzovanie-dvizhka-v-clickhouse-local} ## Использование движка в Clickhouse-local {#ispolzovanie-dvizhka-v-clickhouse-local}
В [clickhouse-local](../../../engines/table-engines/special/file.md) движок в качестве параметра принимает не только формат, но и путь к файлу. В том числе можно указать стандартные потоки ввода/вывода цифровым или буквенным обозначением `0` или `stdin`, `1` или `stdout`. В [clickhouse-local](../../../engines/table-engines/special/file.md) движок в качестве параметра принимает не только формат, но и путь к файлу. В том числе можно указать стандартные потоки ввода/вывода цифровым или буквенным обозначением `0` или `stdin`, `1` или `stdout`. Можно записывать и читать сжатые файлы. Для этого нужно задать дополнительный параметр движка или расширение файла (`gz`, `br` или `xz`).
**Пример:** **Пример:**

View File

@ -0,0 +1,416 @@
---
toc_priority: 20
toc_title: Brown University Benchmark
---
# Brown University Benchmark
`MgBench` — это аналитический тест производительности для данных журнала событий, сгенерированных машиной. Бенчмарк разработан [Andrew Crotty](http://cs.brown.edu/people/acrotty/).
Скачать данные:
```
wget https://datasets.clickhouse.tech/mgbench{1..3}.csv.xz
```
Распаковать данные:
```
xz -v -d mgbench{1..3}.csv.xz
```
Создание таблиц:
```
CREATE DATABASE mgbench;
CREATE TABLE mgbench.logs1 (
log_time DateTime,
machine_name LowCardinality(String),
machine_group LowCardinality(String),
cpu_idle Nullable(Float32),
cpu_nice Nullable(Float32),
cpu_system Nullable(Float32),
cpu_user Nullable(Float32),
cpu_wio Nullable(Float32),
disk_free Nullable(Float32),
disk_total Nullable(Float32),
part_max_used Nullable(Float32),
load_fifteen Nullable(Float32),
load_five Nullable(Float32),
load_one Nullable(Float32),
mem_buffers Nullable(Float32),
mem_cached Nullable(Float32),
mem_free Nullable(Float32),
mem_shared Nullable(Float32),
swap_free Nullable(Float32),
bytes_in Nullable(Float32),
bytes_out Nullable(Float32)
)
ENGINE = MergeTree()
ORDER BY (machine_group, machine_name, log_time);
CREATE TABLE mgbench.logs2 (
log_time DateTime,
client_ip IPv4,
request String,
status_code UInt16,
object_size UInt64
)
ENGINE = MergeTree()
ORDER BY log_time;
CREATE TABLE mgbench.logs3 (
log_time DateTime64,
device_id FixedString(15),
device_name LowCardinality(String),
device_type LowCardinality(String),
device_floor UInt8,
event_type LowCardinality(String),
event_unit FixedString(1),
event_value Nullable(Float32)
)
ENGINE = MergeTree()
ORDER BY (event_type, log_time);
```
Вставка данных:
```
clickhouse-client --query "INSERT INTO mgbench.logs1 FORMAT CSVWithNames" < mgbench1.csv
clickhouse-client --query "INSERT INTO mgbench.logs2 FORMAT CSVWithNames" < mgbench2.csv
clickhouse-client --query "INSERT INTO mgbench.logs3 FORMAT CSVWithNames" < mgbench3.csv
```
Запуск тестов производительности:
```
-- Q1.1: What is the CPU/network utilization for each web server since midnight?
SELECT machine_name,
MIN(cpu) AS cpu_min,
MAX(cpu) AS cpu_max,
AVG(cpu) AS cpu_avg,
MIN(net_in) AS net_in_min,
MAX(net_in) AS net_in_max,
AVG(net_in) AS net_in_avg,
MIN(net_out) AS net_out_min,
MAX(net_out) AS net_out_max,
AVG(net_out) AS net_out_avg
FROM (
SELECT machine_name,
COALESCE(cpu_user, 0.0) AS cpu,
COALESCE(bytes_in, 0.0) AS net_in,
COALESCE(bytes_out, 0.0) AS net_out
FROM logs1
WHERE machine_name IN ('anansi','aragog','urd')
AND log_time >= TIMESTAMP '2017-01-11 00:00:00'
) AS r
GROUP BY machine_name;
-- Q1.2: Which computer lab machines have been offline in the past day?
SELECT machine_name,
log_time
FROM logs1
WHERE (machine_name LIKE 'cslab%' OR
machine_name LIKE 'mslab%')
AND load_one IS NULL
AND log_time >= TIMESTAMP '2017-01-10 00:00:00'
ORDER BY machine_name,
log_time;
-- Q1.3: What are the hourly average metrics during the past 10 days for a specific workstation?
SELECT dt,
hr,
AVG(load_fifteen) AS load_fifteen_avg,
AVG(load_five) AS load_five_avg,
AVG(load_one) AS load_one_avg,
AVG(mem_free) AS mem_free_avg,
AVG(swap_free) AS swap_free_avg
FROM (
SELECT CAST(log_time AS DATE) AS dt,
EXTRACT(HOUR FROM log_time) AS hr,
load_fifteen,
load_five,
load_one,
mem_free,
swap_free
FROM logs1
WHERE machine_name = 'babbage'
AND load_fifteen IS NOT NULL
AND load_five IS NOT NULL
AND load_one IS NOT NULL
AND mem_free IS NOT NULL
AND swap_free IS NOT NULL
AND log_time >= TIMESTAMP '2017-01-01 00:00:00'
) AS r
GROUP BY dt,
hr
ORDER BY dt,
hr;
-- Q1.4: Over 1 month, how often was each server blocked on disk I/O?
SELECT machine_name,
COUNT(*) AS spikes
FROM logs1
WHERE machine_group = 'Servers'
AND cpu_wio > 0.99
AND log_time >= TIMESTAMP '2016-12-01 00:00:00'
AND log_time < TIMESTAMP '2017-01-01 00:00:00'
GROUP BY machine_name
ORDER BY spikes DESC
LIMIT 10;
-- Q1.5: Which externally reachable VMs have run low on memory?
SELECT machine_name,
dt,
MIN(mem_free) AS mem_free_min
FROM (
SELECT machine_name,
CAST(log_time AS DATE) AS dt,
mem_free
FROM logs1
WHERE machine_group = 'DMZ'
AND mem_free IS NOT NULL
) AS r
GROUP BY machine_name,
dt
HAVING MIN(mem_free) < 10000
ORDER BY machine_name,
dt;
-- Q1.6: What is the total hourly network traffic across all file servers?
SELECT dt,
hr,
SUM(net_in) AS net_in_sum,
SUM(net_out) AS net_out_sum,
SUM(net_in) + SUM(net_out) AS both_sum
FROM (
SELECT CAST(log_time AS DATE) AS dt,
EXTRACT(HOUR FROM log_time) AS hr,
COALESCE(bytes_in, 0.0) / 1000000000.0 AS net_in,
COALESCE(bytes_out, 0.0) / 1000000000.0 AS net_out
FROM logs1
WHERE machine_name IN ('allsorts','andes','bigred','blackjack','bonbon',
'cadbury','chiclets','cotton','crows','dove','fireball','hearts','huey',
'lindt','milkduds','milkyway','mnm','necco','nerds','orbit','peeps',
'poprocks','razzles','runts','smarties','smuggler','spree','stride',
'tootsie','trident','wrigley','york')
) AS r
GROUP BY dt,
hr
ORDER BY both_sum DESC
LIMIT 10;
-- Q2.1: Which requests have caused server errors within the past 2 weeks?
SELECT *
FROM logs2
WHERE status_code >= 500
AND log_time >= TIMESTAMP '2012-12-18 00:00:00'
ORDER BY log_time;
-- Q2.2: During a specific 2-week period, was the user password file leaked?
SELECT *
FROM logs2
WHERE status_code >= 200
AND status_code < 300
AND request LIKE '%/etc/passwd%'
AND log_time >= TIMESTAMP '2012-05-06 00:00:00'
AND log_time < TIMESTAMP '2012-05-20 00:00:00';
-- Q2.3: What was the average path depth for top-level requests in the past month?
SELECT top_level,
AVG(LENGTH(request) - LENGTH(REPLACE(request, '/', ''))) AS depth_avg
FROM (
SELECT SUBSTRING(request FROM 1 FOR len) AS top_level,
request
FROM (
SELECT POSITION(SUBSTRING(request FROM 2), '/') AS len,
request
FROM logs2
WHERE status_code >= 200
AND status_code < 300
AND log_time >= TIMESTAMP '2012-12-01 00:00:00'
) AS r
WHERE len > 0
) AS s
WHERE top_level IN ('/about','/courses','/degrees','/events',
'/grad','/industry','/news','/people',
'/publications','/research','/teaching','/ugrad')
GROUP BY top_level
ORDER BY top_level;
-- Q2.4: During the last 3 months, which clients have made an excessive number of requests?
SELECT client_ip,
COUNT(*) AS num_requests
FROM logs2
WHERE log_time >= TIMESTAMP '2012-10-01 00:00:00'
GROUP BY client_ip
HAVING COUNT(*) >= 100000
ORDER BY num_requests DESC;
-- Q2.5: What are the daily unique visitors?
SELECT dt,
COUNT(DISTINCT client_ip)
FROM (
SELECT CAST(log_time AS DATE) AS dt,
client_ip
FROM logs2
) AS r
GROUP BY dt
ORDER BY dt;
-- Q2.6: What are the average and maximum data transfer rates (Gbps)?
SELECT AVG(transfer) / 125000000.0 AS transfer_avg,
MAX(transfer) / 125000000.0 AS transfer_max
FROM (
SELECT log_time,
SUM(object_size) AS transfer
FROM logs2
GROUP BY log_time
) AS r;
-- Q3.1: Did the indoor temperature reach freezing over the weekend?
SELECT *
FROM logs3
WHERE event_type = 'temperature'
AND event_value <= 32.0
AND log_time >= '2019-11-29 17:00:00.000';
-- Q3.4: Over the past 6 months, how frequently were each door opened?
SELECT device_name,
device_floor,
COUNT(*) AS ct
FROM logs3
WHERE event_type = 'door_open'
AND log_time >= '2019-06-01 00:00:00.000'
GROUP BY device_name,
device_floor
ORDER BY ct DESC;
-- Q3.5: Where in the building do large temperature variations occur in winter and summer?
WITH temperature AS (
SELECT dt,
device_name,
device_type,
device_floor
FROM (
SELECT dt,
hr,
device_name,
device_type,
device_floor,
AVG(event_value) AS temperature_hourly_avg
FROM (
SELECT CAST(log_time AS DATE) AS dt,
EXTRACT(HOUR FROM log_time) AS hr,
device_name,
device_type,
device_floor,
event_value
FROM logs3
WHERE event_type = 'temperature'
) AS r
GROUP BY dt,
hr,
device_name,
device_type,
device_floor
) AS s
GROUP BY dt,
device_name,
device_type,
device_floor
HAVING MAX(temperature_hourly_avg) - MIN(temperature_hourly_avg) >= 25.0
)
SELECT DISTINCT device_name,
device_type,
device_floor,
'WINTER'
FROM temperature
WHERE dt >= DATE '2018-12-01'
AND dt < DATE '2019-03-01'
UNION
SELECT DISTINCT device_name,
device_type,
device_floor,
'SUMMER'
FROM temperature
WHERE dt >= DATE '2019-06-01'
AND dt < DATE '2019-09-01';
-- Q3.6: For each device category, what are the monthly power consumption metrics?
SELECT yr,
mo,
SUM(coffee_hourly_avg) AS coffee_monthly_sum,
AVG(coffee_hourly_avg) AS coffee_monthly_avg,
SUM(printer_hourly_avg) AS printer_monthly_sum,
AVG(printer_hourly_avg) AS printer_monthly_avg,
SUM(projector_hourly_avg) AS projector_monthly_sum,
AVG(projector_hourly_avg) AS projector_monthly_avg,
SUM(vending_hourly_avg) AS vending_monthly_sum,
AVG(vending_hourly_avg) AS vending_monthly_avg
FROM (
SELECT dt,
yr,
mo,
hr,
AVG(coffee) AS coffee_hourly_avg,
AVG(printer) AS printer_hourly_avg,
AVG(projector) AS projector_hourly_avg,
AVG(vending) AS vending_hourly_avg
FROM (
SELECT CAST(log_time AS DATE) AS dt,
EXTRACT(YEAR FROM log_time) AS yr,
EXTRACT(MONTH FROM log_time) AS mo,
EXTRACT(HOUR FROM log_time) AS hr,
CASE WHEN device_name LIKE 'coffee%' THEN event_value END AS coffee,
CASE WHEN device_name LIKE 'printer%' THEN event_value END AS printer,
CASE WHEN device_name LIKE 'projector%' THEN event_value END AS projector,
CASE WHEN device_name LIKE 'vending%' THEN event_value END AS vending
FROM logs3
WHERE device_type = 'meter'
) AS r
GROUP BY dt,
yr,
mo,
hr
) AS s
GROUP BY yr,
mo
ORDER BY yr,
mo;
```
Данные также доступны для работы с интерактивными запросами через [Playground](https://gh-api.clickhouse.tech/play?user=play), [пример](https://gh-api.clickhouse.tech/play?user=play#U0VMRUNUIG1hY2hpbmVfbmFtZSwKICAgICAgIE1JTihjcHUpIEFTIGNwdV9taW4sCiAgICAgICBNQVgoY3B1KSBBUyBjcHVfbWF4LAogICAgICAgQVZHKGNwdSkgQVMgY3B1X2F2ZywKICAgICAgIE1JTihuZXRfaW4pIEFTIG5ldF9pbl9taW4sCiAgICAgICBNQVgobmV0X2luKSBBUyBuZXRfaW5fbWF4LAogICAgICAgQVZHKG5ldF9pbikgQVMgbmV0X2luX2F2ZywKICAgICAgIE1JTihuZXRfb3V0KSBBUyBuZXRfb3V0X21pbiwKICAgICAgIE1BWChuZXRfb3V0KSBBUyBuZXRfb3V0X21heCwKICAgICAgIEFWRyhuZXRfb3V0KSBBUyBuZXRfb3V0X2F2ZwpGUk9NICgKICBTRUxFQ1QgbWFjaGluZV9uYW1lLAogICAgICAgICBDT0FMRVNDRShjcHVfdXNlciwgMC4wKSBBUyBjcHUsCiAgICAgICAgIENPQUxFU0NFKGJ5dGVzX2luLCAwLjApIEFTIG5ldF9pbiwKICAgICAgICAgQ09BTEVTQ0UoYnl0ZXNfb3V0LCAwLjApIEFTIG5ldF9vdXQKICBGUk9NIG1nYmVuY2gubG9nczEKICBXSEVSRSBtYWNoaW5lX25hbWUgSU4gKCdhbmFuc2knLCdhcmFnb2cnLCd1cmQnKQogICAgQU5EIGxvZ190aW1lID49IFRJTUVTVEFNUCAnMjAxNy0wMS0xMSAwMDowMDowMCcKKSBBUyByCkdST1VQIEJZIG1hY2hpbmVfbmFtZQ==).
[Оригинальная статья](https://clickhouse.tech/docs/ru/getting_started/example_datasets/brown-benchmark/) <!--hide-->

View File

@ -149,28 +149,48 @@ $ echo 'DROP TABLE t' | curl 'http://localhost:8123/' --data-binary @-
Для запросов, которые не возвращают таблицу с данными, в случае успеха, выдаётся пустое тело ответа. Для запросов, которые не возвращают таблицу с данными, в случае успеха, выдаётся пустое тело ответа.
Вы можете использовать внутренний формат сжатия Clickhouse при передаче данных. Формат сжатых данных нестандартный, и вам придётся использовать для работы с ним специальную программу `clickhouse-compressor` (устанавливается вместе с пакетом `clickhouse-client`). Для повышения эффективности вставки данных можно отключить проверку контрольной суммы на стороне сервера с помощью настройки[http_native_compression_disable_checksumming_on_decompress](../operations/settings/settings.md#settings-http_native_compression_disable_checksumming_on_decompress).
Если вы указали `compress = 1` в URL, то сервер сжимает данные, которые он отправляет. ## Сжатие {#compression}
Если вы указали `decompress = 1` в URL, сервер распаковывает те данные, которые вы передаёте методом `POST`.
Также, можно использовать [HTTP compression](https://en.wikipedia.org/wiki/HTTP_compression). Для отправки сжатого запроса `POST`, добавьте заголовок `Content-Encoding: compression_method`. Чтобы ClickHouse сжимал ответ, добавьте заголовок `Accept-Encoding: compression_method`. ClickHouse поддерживает следующие [методы сжатия](https://en.wikipedia.org/wiki/HTTP_compression#Content-Encoding_tokens): `gzip`, `br`, and `deflate`. Чтобы включить HTTP compression, используйте настройку ClickHouse [enable_http_compression](../operations/settings/settings.md#settings-enable_http_compression). Уровень сжатия данных для всех методов сжатия можно настроить с помощью настройки [http_zlib_compression_level](#settings-http_zlib_compression_level). Сжатие можно использовать для уменьшения трафика по сети при передаче большого количества данных, а также для создания сразу сжатых дампов.
Это может быть использовано для уменьшения трафика по сети при передаче большого количества данных, а также для создания сразу сжатых дампов. Вы можете использовать внутренний формат сжатия Clickhouse при передаче данных. Формат сжатых данных нестандартный, и вам придётся использовать для работы с ним специальную программу `clickhouse-compressor`. Она устанавливается вместе с пакетом `clickhouse-client`. Для повышения эффективности вставки данных можно отключить проверку контрольной суммы на стороне сервера с помощью настройки [http_native_compression_disable_checksumming_on_decompress](../operations/settings/settings.md#settings-http_native_compression_disable_checksumming_on_decompress).
Примеры отправки данных со сжатием: Если вы указали `compress=1` в URL, то сервер сжимает данные, которые он отправляет. Если вы указали `decompress=1` в URL, сервер распаковывает те данные, которые вы передаёте методом `POST`.
``` bash Также можно использовать [сжатие HTTP](https://en.wikipedia.org/wiki/HTTP_compression). ClickHouse поддерживает следующие [методы сжатия](https://en.wikipedia.org/wiki/HTTP_compression#Content-Encoding_tokens):
$ #Отправка данных на сервер:
$ curl -vsS "http://localhost:8123/?enable_http_compression=1" -d 'SELECT number FROM system.numbers LIMIT 10' -H 'Accept-Encoding: gzip'
$ #Отправка данных клиенту: - `gzip`
$ echo "SELECT 1" | gzip -c | curl -sS --data-binary @- -H 'Content-Encoding: gzip' 'http://localhost:8123/' - `br`
``` - `deflate`
- `xz`
Для отправки сжатого запроса `POST`, добавьте заголовок `Content-Encoding: compression_method`.
Чтобы ClickHouse сжимал ответ, разрешите сжатие настройкой [enable_http_compression](../operations/settings/settings.md#settings-enable_http_compression) и добавьте заголовок `Accept-Encoding: compression_method`. Уровень сжатия данных для всех методов сжатия можно задать с помощью настройки [http_zlib_compression_level](../operations/settings/settings.md#settings-http_zlib_compression_level).
!!! note "Примечание" !!! note "Примечание"
Некоторые HTTP-клиенты могут по умолчанию распаковывать данные (`gzip` и `deflate`) с сервера в фоновом режиме и вы можете получить распакованные данные, даже если правильно используете настройки сжатия. Некоторые HTTP-клиенты могут по умолчанию распаковывать данные (`gzip` и `deflate`) с сервера в фоновом режиме и вы можете получить распакованные данные, даже если правильно используете настройки сжатия.
**Примеры**
``` bash
# Отправка сжатых данных на сервер
$ echo "SELECT 1" | gzip -c | \
curl -sS --data-binary @- -H 'Content-Encoding: gzip' 'http://localhost:8123/'
```
``` bash
# Получение сжатых данных с сервера
$ curl -vsS "http://localhost:8123/?enable_http_compression=1" \
-H 'Accept-Encoding: gzip' --output result.gz -d 'SELECT number FROM system.numbers LIMIT 3'
$ zcat result.gz
0
1
2
```
## База данных по умолчанию {#default-database}
Вы можете использовать параметр URL `database` или заголовок `X-ClickHouse-Database`, чтобы указать БД по умолчанию. Вы можете использовать параметр URL `database` или заголовок `X-ClickHouse-Database`, чтобы указать БД по умолчанию.
``` bash ``` bash

View File

@ -31,6 +31,7 @@ mannWhitneyUTest[(alternative[, continuity_correction])](sample_data, sample_ind
**Возвращаемые значения** **Возвращаемые значения**
[Кортеж](../../../sql-reference/data-types/tuple.md) с двумя элементами: [Кортеж](../../../sql-reference/data-types/tuple.md) с двумя элементами:
- вычисленное значение критерия Манна — Уитни. [Float64](../../../sql-reference/data-types/float.md). - вычисленное значение критерия Манна — Уитни. [Float64](../../../sql-reference/data-types/float.md).
- вычисленное p-значение. [Float64](../../../sql-reference/data-types/float.md). - вычисленное p-значение. [Float64](../../../sql-reference/data-types/float.md).

View File

@ -24,6 +24,7 @@ studentTTest(sample_data, sample_index)
**Возвращаемые значения** **Возвращаемые значения**
[Кортеж](../../../sql-reference/data-types/tuple.md) с двумя элементами: [Кортеж](../../../sql-reference/data-types/tuple.md) с двумя элементами:
- вычисленное значение критерия Стьюдента. [Float64](../../../sql-reference/data-types/float.md). - вычисленное значение критерия Стьюдента. [Float64](../../../sql-reference/data-types/float.md).
- вычисленное p-значение. [Float64](../../../sql-reference/data-types/float.md). - вычисленное p-значение. [Float64](../../../sql-reference/data-types/float.md).

View File

@ -24,6 +24,7 @@ welchTTest(sample_data, sample_index)
**Возвращаемые значения** **Возвращаемые значения**
[Кортеж](../../../sql-reference/data-types/tuple.md) с двумя элементами: [Кортеж](../../../sql-reference/data-types/tuple.md) с двумя элементами:
- вычисленное значение критерия Уэлча. [Float64](../../../sql-reference/data-types/float.md). - вычисленное значение критерия Уэлча. [Float64](../../../sql-reference/data-types/float.md).
- вычисленное p-значение. [Float64](../../../sql-reference/data-types/float.md). - вычисленное p-значение. [Float64](../../../sql-reference/data-types/float.md).

View File

@ -23,7 +23,7 @@ LowCardinality(data_type)
Эффективность использования типа данных `LowCarditality` зависит от разнообразия данных. Если словарь содержит менее 10 000 различных значений, ClickHouse в основном показывает более высокую эффективность чтения и хранения данных. Если же словарь содержит более 100 000 различных значений, ClickHouse может работать хуже, чем при использовании обычных типов данных. Эффективность использования типа данных `LowCarditality` зависит от разнообразия данных. Если словарь содержит менее 10 000 различных значений, ClickHouse в основном показывает более высокую эффективность чтения и хранения данных. Если же словарь содержит более 100 000 различных значений, ClickHouse может работать хуже, чем при использовании обычных типов данных.
При работе со строками, использование `LowCardinality` вместо [Enum](enum.md). `LowCardinality` обеспечивает большую гибкость в использовании и часто показывает такую же или более высокую эффективность. При работе со строками, использование `LowCardinality` вместо [Enum](enum.md) обеспечивает большую гибкость в использовании и часто показывает такую же или более высокую эффективность.
## Пример ## Пример

View File

@ -1355,6 +1355,52 @@ SELECT arrayAvg(x -> (x * x), [2, 4]) AS res;
└─────┘ └─────┘
``` ```
**Синтаксис**
``` sql
arraySum(arr)
```
**Возвращаемое значение**
- Число.
Тип: [Int](../../sql-reference/data-types/int-uint.md) или [Float](../../sql-reference/data-types/float.md).
**Параметры**
- `arr` — [Массив](../../sql-reference/data-types/array.md).
**Примеры**
Запрос:
```sql
SELECT arraySum([2,3]) AS res;
```
Результат:
``` text
┌─res─┐
│ 5 │
└─────┘
```
Запрос:
``` sql
SELECT arraySum(x -> x*x, [2, 3]) AS res;
```
Результат:
``` text
┌─res─┐
│ 13 │
└─────┘
```
## arrayCumSum(\[func,\] arr1, …) {#arraycumsumfunc-arr1} ## arrayCumSum(\[func,\] arr1, …) {#arraycumsumfunc-arr1}
Возвращает массив из частичных сумм элементов исходного массива (сумма с накоплением). Если указана функция `func`, то значения элементов массива преобразуются этой функцией перед суммированием. Возвращает массив из частичных сумм элементов исходного массива (сумма с накоплением). Если указана функция `func`, то значения элементов массива преобразуются этой функцией перед суммированием.

View File

@ -63,40 +63,58 @@ int32samoa: 1546300800
Переводит дату или дату-с-временем в число типа UInt16, содержащее номер года (AD). Переводит дату или дату-с-временем в число типа UInt16, содержащее номер года (AD).
Синоним: `YEAR`.
## toQuarter {#toquarter} ## toQuarter {#toquarter}
Переводит дату или дату-с-временем в число типа UInt8, содержащее номер квартала. Переводит дату или дату-с-временем в число типа UInt8, содержащее номер квартала.
Синоним: `QUARTER`.
## toMonth {#tomonth} ## toMonth {#tomonth}
Переводит дату или дату-с-временем в число типа UInt8, содержащее номер месяца (1-12). Переводит дату или дату-с-временем в число типа UInt8, содержащее номер месяца (1-12).
Синоним: `MONTH`.
## toDayOfYear {#todayofyear} ## toDayOfYear {#todayofyear}
Переводит дату или дату-с-временем в число типа UInt16, содержащее номер дня года (1-366). Переводит дату или дату-с-временем в число типа UInt16, содержащее номер дня года (1-366).
Синоним: `DAYOFYEAR`.
## toDayOfMonth {#todayofmonth} ## toDayOfMonth {#todayofmonth}
Переводит дату или дату-с-временем в число типа UInt8, содержащее номер дня в месяце (1-31). Переводит дату или дату-с-временем в число типа UInt8, содержащее номер дня в месяце (1-31).
Синонимы: `DAYOFMONTH`, `DAY`.
## toDayOfWeek {#todayofweek} ## toDayOfWeek {#todayofweek}
Переводит дату или дату-с-временем в число типа UInt8, содержащее номер дня в неделе (понедельник - 1, воскресенье - 7). Переводит дату или дату-с-временем в число типа UInt8, содержащее номер дня в неделе (понедельник - 1, воскресенье - 7).
Синоним: `DAYOFWEEK`.
## toHour {#tohour} ## toHour {#tohour}
Переводит дату-с-временем в число типа UInt8, содержащее номер часа в сутках (0-23). Переводит дату-с-временем в число типа UInt8, содержащее номер часа в сутках (0-23).
Функция исходит из допущения, что перевод стрелок вперёд, если осуществляется, то на час, в два часа ночи, а перевод стрелок назад, если осуществляется, то на час, в три часа ночи (что, в общем, не верно - даже в Москве два раза перевод стрелок был осуществлён в другое время). Функция исходит из допущения, что перевод стрелок вперёд, если осуществляется, то на час, в два часа ночи, а перевод стрелок назад, если осуществляется, то на час, в три часа ночи (что, в общем, не верно - даже в Москве два раза перевод стрелок был осуществлён в другое время).
Синоним: `HOUR`.
## toMinute {#tominute} ## toMinute {#tominute}
Переводит дату-с-временем в число типа UInt8, содержащее номер минуты в часе (0-59). Переводит дату-с-временем в число типа UInt8, содержащее номер минуты в часе (0-59).
Синоним: `MINUTE`.
## toSecond {#tosecond} ## toSecond {#tosecond}
Переводит дату-с-временем в число типа UInt8, содержащее номер секунды в минуте (0-59). Переводит дату-с-временем в число типа UInt8, содержащее номер секунды в минуте (0-59).
Секунды координации не учитываются. Секунды координации не учитываются.
Синоним: `SECOND`.
## toUnixTimestamp {#to-unix-timestamp} ## toUnixTimestamp {#to-unix-timestamp}
Переводит дату-с-временем в число типа UInt32 -- Unix Timestamp (https://en.wikipedia.org/wiki/Unix_time). Переводит дату-с-временем в число типа UInt32 -- Unix Timestamp (https://en.wikipedia.org/wiki/Unix_time).

View File

@ -75,6 +75,8 @@ SELECT char(0xE4, 0xBD, 0xA0, 0xE5, 0xA5, 0xBD) AS hello;
Returns a string containing the arguments hexadecimal representation. Returns a string containing the arguments hexadecimal representation.
Синоним: `HEX`.
**Syntax** **Syntax**
``` sql ``` sql

View File

@ -13,6 +13,8 @@ toc_title: "\u0424\u0443\u043d\u043a\u0446\u0438\u0438\u0020\u0434\u043b\u044f\u
isNull(x) isNull(x)
``` ```
Синоним: `ISNULL`.
**Параметры** **Параметры**
- `x` — значение с не составным типом данных. - `x` — значение с не составным типом данных.

View File

@ -9,10 +9,14 @@ toc_title: "\u0424\u0443\u043d\u043a\u0446\u0438\u0438\u0020\u0434\u043b\u044f\u
Принимает число типа UInt32. Интерпретирует его, как IPv4-адрес в big endian. Возвращает строку, содержащую соответствующий IPv4-адрес в формате A.B.C.D (числа в десятичной форме через точки). Принимает число типа UInt32. Интерпретирует его, как IPv4-адрес в big endian. Возвращает строку, содержащую соответствующий IPv4-адрес в формате A.B.C.D (числа в десятичной форме через точки).
Синоним: `INET_NTOA`.
## IPv4StringToNum(s) {#ipv4stringtonums} ## IPv4StringToNum(s) {#ipv4stringtonums}
Функция, обратная к IPv4NumToString. Если IPv4 адрес в неправильном формате, то возвращает 0. Функция, обратная к IPv4NumToString. Если IPv4 адрес в неправильном формате, то возвращает 0.
Синоним: `INET_ATON`.
## IPv4NumToStringClassC(num) {#ipv4numtostringclasscnum} ## IPv4NumToStringClassC(num) {#ipv4numtostringclasscnum}
Похоже на IPv4NumToString, но вместо последнего октета используется xxx. Похоже на IPv4NumToString, но вместо последнего октета используется xxx.
@ -49,7 +53,11 @@ LIMIT 10
### IPv6NumToString(x) {#ipv6numtostringx} ### IPv6NumToString(x) {#ipv6numtostringx}
Принимает значение типа FixedString(16), содержащее IPv6-адрес в бинарном виде. Возвращает строку, содержащую этот адрес в текстовом виде. Принимает значение типа FixedString(16), содержащее IPv6-адрес в бинарном виде. Возвращает строку, содержащую этот адрес в текстовом виде.
IPv6-mapped IPv4 адреса выводится в формате ::ffff:111.222.33.44. Примеры: IPv6-mapped IPv4 адреса выводится в формате ::ffff:111.222.33.44.
Примеры: `INET6_NTOA`.
Примеры:
``` sql ``` sql
SELECT IPv6NumToString(toFixedString(unhex('2A0206B8000000000000000000000011'), 16)) AS addr SELECT IPv6NumToString(toFixedString(unhex('2A0206B8000000000000000000000011'), 16)) AS addr
@ -118,6 +126,8 @@ LIMIT 10
Функция, обратная к IPv6NumToString. Если IPv6 адрес в неправильном формате, то возвращает строку из нулевых байт. Функция, обратная к IPv6NumToString. Если IPv6 адрес в неправильном формате, то возвращает строку из нулевых байт.
HEX может быть в любом регистре. HEX может быть в любом регистре.
Alias: `INET6_ATON`.
## IPv4ToIPv6(x) {#ipv4toipv6x} ## IPv4ToIPv6(x) {#ipv4toipv6x}
Принимает число типа `UInt32`. Интерпретирует его, как IPv4-адрес в [big endian](https://en.wikipedia.org/wiki/Endianness). Возвращает значение `FixedString(16)`, содержащее адрес IPv6 в двоичном формате. Примеры: Принимает число типа `UInt32`. Интерпретирует его, как IPv4-адрес в [big endian](https://en.wikipedia.org/wiki/Endianness). Возвращает значение `FixedString(16)`, содержащее адрес IPv6 в двоичном формате. Примеры:

View File

@ -95,6 +95,8 @@ SELECT toValidUTF8('\x61\xF0\x80\x80\x80b')
Повторяет строку определенное количество раз и объединяет повторяемые значения в одну строку. Повторяет строку определенное количество раз и объединяет повторяемые значения в одну строку.
Синоним: `REPEAT`.
**Синтаксис** **Синтаксис**
``` sql ``` sql
@ -273,10 +275,14 @@ SELECT concat(key1, key2), sum(value) FROM key_val GROUP BY (key1, key2)
Производит кодирование строки s в base64-представление. Производит кодирование строки s в base64-представление.
Синоним: `TO_BASE64`.
## base64Decode(s) {#base64decode} ## base64Decode(s) {#base64decode}
Декодирует base64-представление s в исходную строку. При невозможности декодирования выбрасывает исключение Декодирует base64-представление s в исходную строку. При невозможности декодирования выбрасывает исключение
Синоним: `FROM_BASE64`.
## tryBase64Decode(s) {#trybase64decode} ## tryBase64Decode(s) {#trybase64decode}
Функционал аналогичен base64Decode, но при невозможности декодирования возвращает пустую строку. Функционал аналогичен base64Decode, но при невозможности декодирования возвращает пустую строку.
@ -597,4 +603,46 @@ Hello, &quot;world&quot;!
&apos;foo&apos; &apos;foo&apos;
``` ```
## decodeXMLComponent {#decode-xml-component}
Заменяет символами предопределенные мнемоники XML: `&quot;` `&amp;` `&apos;` `&gt;` `&lt;`
Также эта функция заменяет числовые ссылки соответствующими символами юникод. Поддерживаются десятичная (например, `&#10003;`) и шестнадцатеричная (`&#x2713;`) формы.
**Синтаксис**
``` sql
decodeXMLComponent(x)
```
**Параметры**
- `x` — последовательность символов. [String](../../sql-reference/data-types/string.md).
**Возвращаемое значение**
- Строка с произведенными заменами.
Тип: [String](../../sql-reference/data-types/string.md).
**Пример**
Запрос:
``` sql
SELECT decodeXMLComponent('&apos;foo&apos;');
SELECT decodeXMLComponent('&lt; &#x3A3; &gt;');
```
Результат:
``` text
'foo'
< Σ >
```
**Смотрите также**
- [Мнемоники в HTML](https://ru.wikipedia.org/wiki/%D0%9C%D0%BD%D0%B5%D0%BC%D0%BE%D0%BD%D0%B8%D0%BA%D0%B8_%D0%B2_HTML)
[Оригинальная статья](https://clickhouse.tech/docs/ru/query_language/functions/string_functions/) <!--hide--> [Оригинальная статья](https://clickhouse.tech/docs/ru/query_language/functions/string_functions/) <!--hide-->

View File

@ -36,10 +36,14 @@ toc_title: "\u0424\u0443\u043d\u043a\u0446\u0438\u0438\u0020\u043f\u0440\u0435\u
**Пример** **Пример**
Запрос:
``` sql ``` sql
SELECT toInt64(nan), toInt32(32), toInt16('16'), toInt8(8.8) SELECT toInt64(nan), toInt32(32), toInt16('16'), toInt8(8.8);
``` ```
Результат:
``` text ``` text
┌─────────toInt64(nan)─┬─toInt32(32)─┬─toInt16('16')─┬─toInt8(8.8)─┐ ┌─────────toInt64(nan)─┬─toInt32(32)─┬─toInt16('16')─┬─toInt8(8.8)─┐
│ -9223372036854775808 │ 32 │ 16 │ 8 │ │ -9223372036854775808 │ 32 │ 16 │ 8 │
@ -52,10 +56,14 @@ SELECT toInt64(nan), toInt32(32), toInt16('16'), toInt8(8.8)
**Пример** **Пример**
Запрос:
``` sql ``` sql
select toInt64OrZero('123123'), toInt8OrZero('123qwe123') SELECT toInt64OrZero('123123'), toInt8OrZero('123qwe123');
``` ```
Результат:
``` text ``` text
┌─toInt64OrZero('123123')─┬─toInt8OrZero('123qwe123')─┐ ┌─toInt64OrZero('123123')─┬─toInt8OrZero('123qwe123')─┐
│ 123123 │ 0 │ │ 123123 │ 0 │
@ -68,10 +76,14 @@ select toInt64OrZero('123123'), toInt8OrZero('123qwe123')
**Пример** **Пример**
Запрос:
``` sql ``` sql
select toInt64OrNull('123123'), toInt8OrNull('123qwe123') SELECT toInt64OrNull('123123'), toInt8OrNull('123qwe123');
``` ```
Результат:
``` text ``` text
┌─toInt64OrNull('123123')─┬─toInt8OrNull('123qwe123')─┐ ┌─toInt64OrNull('123123')─┬─toInt8OrNull('123qwe123')─┐
│ 123123 │ ᴺᵁᴸᴸ │ │ 123123 │ ᴺᵁᴸᴸ │
@ -102,10 +114,14 @@ select toInt64OrNull('123123'), toInt8OrNull('123qwe123')
**Пример** **Пример**
Запрос:
``` sql ``` sql
SELECT toUInt64(nan), toUInt32(-32), toUInt16('16'), toUInt8(8.8) SELECT toUInt64(nan), toUInt32(-32), toUInt16('16'), toUInt8(8.8);
``` ```
Результат:
``` text ``` text
┌───────toUInt64(nan)─┬─toUInt32(-32)─┬─toUInt16('16')─┬─toUInt8(8.8)─┐ ┌───────toUInt64(nan)─┬─toUInt32(-32)─┬─toUInt16('16')─┬─toUInt8(8.8)─┐
│ 9223372036854775808 │ 4294967264 │ 16 │ 8 │ │ 9223372036854775808 │ 4294967264 │ 16 │ 8 │
@ -124,6 +140,8 @@ SELECT toUInt64(nan), toUInt32(-32), toUInt16('16'), toUInt8(8.8)
## toDate {#todate} ## toDate {#todate}
Cиноним: `DATE`.
## toDateOrZero {#todateorzero} ## toDateOrZero {#todateorzero}
## toDateOrNull {#todateornull} ## toDateOrNull {#todateornull}
@ -168,20 +186,28 @@ SELECT toUInt64(nan), toUInt32(-32), toUInt16('16'), toUInt8(8.8)
**Примеры** **Примеры**
Запрос:
``` sql ``` sql
SELECT toDecimal32OrNull(toString(-1.111), 5) AS val, toTypeName(val) SELECT toDecimal32OrNull(toString(-1.111), 5) AS val, toTypeName(val);
``` ```
Результат:
``` text ``` text
┌──────val─┬─toTypeName(toDecimal32OrNull(toString(-1.111), 5))─┐ ┌──────val─┬─toTypeName(toDecimal32OrNull(toString(-1.111), 5))─┐
│ -1.11100 │ Nullable(Decimal(9, 5)) │ │ -1.11100 │ Nullable(Decimal(9, 5)) │
└──────────┴────────────────────────────────────────────────────┘ └──────────┴────────────────────────────────────────────────────┘
``` ```
Запрос:
``` sql ``` sql
SELECT toDecimal32OrNull(toString(-1.111), 2) AS val, toTypeName(val) SELECT toDecimal32OrNull(toString(-1.111), 2) AS val, toTypeName(val);
``` ```
Результат:
``` text ``` text
┌──val─┬─toTypeName(toDecimal32OrNull(toString(-1.111), 2))─┐ ┌──val─┬─toTypeName(toDecimal32OrNull(toString(-1.111), 2))─┐
│ ᴺᵁᴸᴸ │ Nullable(Decimal(9, 2)) │ │ ᴺᵁᴸᴸ │ Nullable(Decimal(9, 2)) │
@ -213,20 +239,28 @@ SELECT toDecimal32OrNull(toString(-1.111), 2) AS val, toTypeName(val)
**Пример** **Пример**
Запрос:
``` sql ``` sql
SELECT toDecimal32OrZero(toString(-1.111), 5) AS val, toTypeName(val) SELECT toDecimal32OrZero(toString(-1.111), 5) AS val, toTypeName(val);
``` ```
Результат:
``` text ``` text
┌──────val─┬─toTypeName(toDecimal32OrZero(toString(-1.111), 5))─┐ ┌──────val─┬─toTypeName(toDecimal32OrZero(toString(-1.111), 5))─┐
│ -1.11100 │ Decimal(9, 5) │ │ -1.11100 │ Decimal(9, 5) │
└──────────┴────────────────────────────────────────────────────┘ └──────────┴────────────────────────────────────────────────────┘
``` ```
Запрос:
``` sql ``` sql
SELECT toDecimal32OrZero(toString(-1.111), 2) AS val, toTypeName(val) SELECT toDecimal32OrZero(toString(-1.111), 2) AS val, toTypeName(val);
``` ```
Результат:
``` text ``` text
┌──val─┬─toTypeName(toDecimal32OrZero(toString(-1.111), 2))─┐ ┌──val─┬─toTypeName(toDecimal32OrZero(toString(-1.111), 2))─┐
│ 0.00 │ Decimal(9, 2) │ │ 0.00 │ Decimal(9, 2) │
@ -258,12 +292,18 @@ YYYY-MM-DD hh:mm:ss
Дополнительно, функция toString от аргумента типа DateTime может принимать второй аргумент String - имя тайм-зоны. Пример: `Asia/Yekaterinburg` В этом случае, форматирование времени производится согласно указанной тайм-зоне. Дополнительно, функция toString от аргумента типа DateTime может принимать второй аргумент String - имя тайм-зоны. Пример: `Asia/Yekaterinburg` В этом случае, форматирование времени производится согласно указанной тайм-зоне.
**Пример**
Запрос:
``` sql ``` sql
SELECT SELECT
now() AS now_local, now() AS now_local,
toString(now(), 'Asia/Yekaterinburg') AS now_yekat toString(now(), 'Asia/Yekaterinburg') AS now_yekat;
``` ```
Результат:
``` text ``` text
┌───────────now_local─┬─now_yekat───────────┐ ┌───────────now_local─┬─now_yekat───────────┐
│ 2016-06-15 00:11:21 │ 2016-06-15 02:11:21 │ │ 2016-06-15 00:11:21 │ 2016-06-15 02:11:21 │
@ -281,22 +321,30 @@ SELECT
Принимает аргумент типа String или FixedString. Возвращает String, вырезая содержимое строки до первого найденного нулевого байта. Принимает аргумент типа String или FixedString. Возвращает String, вырезая содержимое строки до первого найденного нулевого байта.
Пример: **Примеры**
Запрос:
``` sql ``` sql
SELECT toFixedString('foo', 8) AS s, toStringCutToZero(s) AS s_cut SELECT toFixedString('foo', 8) AS s, toStringCutToZero(s) AS s_cut;
``` ```
Результат:
``` text ``` text
┌─s─────────────┬─s_cut─┐ ┌─s─────────────┬─s_cut─┐
│ foo\0\0\0\0\0 │ foo │ │ foo\0\0\0\0\0 │ foo │
└───────────────┴───────┘ └───────────────┴───────┘
``` ```
Запрос:
``` sql ``` sql
SELECT toFixedString('foo\0bar', 8) AS s, toStringCutToZero(s) AS s_cut SELECT toFixedString('foo\0bar', 8) AS s, toStringCutToZero(s) AS s_cut;
``` ```
Результат:
``` text ``` text
┌─s──────────┬─s_cut─┐ ┌─s──────────┬─s_cut─┐
│ foo\0bar\0 │ foo │ │ foo\0bar\0 │ foo │
@ -344,7 +392,7 @@ reinterpretAsUUID(fixed_string)
Запрос: Запрос:
``` sql ``` sql
SELECT reinterpretAsUUID(reverse(unhex('000102030405060708090a0b0c0d0e0f'))) SELECT reinterpretAsUUID(reverse(unhex('000102030405060708090a0b0c0d0e0f')));
``` ```
Результат: Результат:
@ -377,10 +425,15 @@ SELECT uuid = uuid2;
## CAST(x, T) {#type_conversion_function-cast} ## CAST(x, T) {#type_conversion_function-cast}
Преобразует x в тип данных t. Преобразует входное значение `x` в указанный тип данных `T`.
Поддерживается также синтаксис CAST(x AS t).
Пример: Поддерживается также синтаксис `CAST(x AS t)`.
Обратите внимание, что если значение `x` не может быть преобразовано к типу `T`, возникает переполнение. Например, `CAST(-1, 'UInt8')` возвращает 255.
**Пример**
Запрос:
``` sql ``` sql
SELECT SELECT
@ -388,9 +441,11 @@ SELECT
CAST(timestamp AS DateTime) AS datetime, CAST(timestamp AS DateTime) AS datetime,
CAST(timestamp AS Date) AS date, CAST(timestamp AS Date) AS date,
CAST(timestamp, 'String') AS string, CAST(timestamp, 'String') AS string,
CAST(timestamp, 'FixedString(22)') AS fixed_string CAST(timestamp, 'FixedString(22)') AS fixed_string;
``` ```
Результат:
``` text ``` text
┌─timestamp───────────┬────────────datetime─┬───────date─┬─string──────────────┬─fixed_string──────────────┐ ┌─timestamp───────────┬────────────datetime─┬───────date─┬─string──────────────┬─fixed_string──────────────┐
│ 2016-06-15 23:00:00 │ 2016-06-15 23:00:00 │ 2016-06-15 │ 2016-06-15 23:00:00 │ 2016-06-15 23:00:00\0\0\0 │ │ 2016-06-15 23:00:00 │ 2016-06-15 23:00:00 │ 2016-06-15 │ 2016-06-15 23:00:00 │ 2016-06-15 23:00:00\0\0\0 │
@ -399,12 +454,18 @@ SELECT
Преобразование в FixedString(N) работает только для аргументов типа String или FixedString(N). Преобразование в FixedString(N) работает только для аргументов типа String или FixedString(N).
Поддержано преобразование к типу [Nullable](../../sql-reference/functions/type-conversion-functions.md) и обратно. Пример: Поддержано преобразование к типу [Nullable](../../sql-reference/functions/type-conversion-functions.md) и обратно.
**Примеры**
Запрос:
``` sql ``` sql
SELECT toTypeName(x) FROM t_null SELECT toTypeName(x) FROM t_null;
``` ```
Результат:
``` text ``` text
┌─toTypeName(x)─┐ ┌─toTypeName(x)─┐
│ Int8 │ │ Int8 │
@ -412,10 +473,14 @@ SELECT toTypeName(x) FROM t_null
└───────────────┘ └───────────────┘
``` ```
Запрос:
``` sql ``` sql
SELECT toTypeName(CAST(x, 'Nullable(UInt16)')) FROM t_null SELECT toTypeName(CAST(x, 'Nullable(UInt16)')) FROM t_null;
``` ```
Результат:
``` text ``` text
┌─toTypeName(CAST(x, 'Nullable(UInt16)'))─┐ ┌─toTypeName(CAST(x, 'Nullable(UInt16)'))─┐
│ Nullable(UInt16) │ │ Nullable(UInt16) │
@ -427,6 +492,93 @@ SELECT toTypeName(CAST(x, 'Nullable(UInt16)')) FROM t_null
- Настройка [cast_keep_nullable](../../operations/settings/settings.md#cast_keep_nullable) - Настройка [cast_keep_nullable](../../operations/settings/settings.md#cast_keep_nullable)
## accurateCast(x, T) {#type_conversion_function-accurate-cast}
Преобразует входное значение `x` в указанный тип данных `T`.
В отличие от функции [cast(x, T)](#type_conversion_function-cast), `accurateCast` не допускает переполнения при преобразовании числовых типов. Например, `accurateCast(-1, 'UInt8')` вызовет исключение.
**Примеры**
Запрос:
``` sql
SELECT cast(-1, 'UInt8') as uint8;
```
Результат:
``` text
┌─uint8─┐
│ 255 │
└─────
Запрос:
```sql
SELECT accurateCast(-1, 'UInt8') as uint8;
```
Результат:
``` text
Code: 70. DB::Exception: Received from localhost:9000. DB::Exception: Value in column Int8 cannot be safely converted into type UInt8: While processing accurateCast(-1, 'UInt8') AS uint8.
```
## accurateCastOrNull(x, T) {#type_conversion_function-accurate-cast_or_null}
Преобразует входное значение `x` в указанный тип данных `T`.
Всегда возвращает тип [Nullable](../../sql-reference/data-types/nullable.md). Если исходное значение не может быть преобразовано к целевому типу, возвращает [NULL](../../sql-reference/syntax.md#null-literal).
**Синтаксис**
```sql
accurateCastOrNull(x, T)
```
**Параметры**
- `x` — входное значение.
- `T` — имя возвращаемого типа данных.
**Возвращаемое значение**
- Значение, преобразованное в указанный тип `T`.
**Примеры**
Запрос:
``` sql
SELECT toTypeName(accurateCastOrNull(5, 'UInt8'));
```
Результат:
``` text
┌─toTypeName(accurateCastOrNull(5, 'UInt8'))─┐
│ Nullable(UInt8) │
└────────────────────────────────────────────┘
```
Запрос:
``` sql
SELECT
accurateCastOrNull(-1, 'UInt8') as uint8,
accurateCastOrNull(128, 'Int8') as int8,
accurateCastOrNull('Test', 'FixedString(2)') as fixed_string;
```
Результат:
``` text
┌─uint8─┬─int8─┬─fixed_string─┐
│ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │
└───────┴──────┴──────────────┘
```
## toInterval(Year\|Quarter\|Month\|Week\|Day\|Hour\|Minute\|Second) {#function-tointerval} ## toInterval(Year\|Quarter\|Month\|Week\|Day\|Hour\|Minute\|Second) {#function-tointerval}
Приводит аргумент из числового типа данных к типу данных [IntervalType](../../sql-reference/data-types/special-data-types/interval.md). Приводит аргумент из числового типа данных к типу данных [IntervalType](../../sql-reference/data-types/special-data-types/interval.md).
@ -454,6 +606,8 @@ toIntervalYear(number)
**Пример** **Пример**
Запрос:
``` sql ``` sql
WITH WITH
toDate('2019-01-01') AS date, toDate('2019-01-01') AS date,
@ -461,9 +615,11 @@ WITH
toIntervalWeek(1) AS interval_to_week toIntervalWeek(1) AS interval_to_week
SELECT SELECT
date + interval_week, date + interval_week,
date + interval_to_week date + interval_to_week;
``` ```
Результат:
``` text ``` text
┌─plus(date, interval_week)─┬─plus(date, interval_to_week)─┐ ┌─plus(date, interval_week)─┬─plus(date, interval_to_week)─┐
│ 2019-01-08 │ 2019-01-08 │ │ 2019-01-08 │ 2019-01-08 │
@ -479,7 +635,7 @@ SELECT
**Синтаксис** **Синтаксис**
``` sql ``` sql
parseDateTimeBestEffort(time_string[, time_zone]); parseDateTimeBestEffort(time_string[, time_zone])
``` ```
**Параметры** **Параметры**
@ -522,7 +678,7 @@ AS parseDateTimeBestEffort;
``` sql ``` sql
SELECT parseDateTimeBestEffort('Sat, 18 Aug 2018 07:22:16 GMT', 'Europe/Moscow') SELECT parseDateTimeBestEffort('Sat, 18 Aug 2018 07:22:16 GMT', 'Europe/Moscow')
AS parseDateTimeBestEffort AS parseDateTimeBestEffort;
``` ```
Результат: Результат:
@ -537,7 +693,7 @@ AS parseDateTimeBestEffort
``` sql ``` sql
SELECT parseDateTimeBestEffort('1284101485') SELECT parseDateTimeBestEffort('1284101485')
AS parseDateTimeBestEffort AS parseDateTimeBestEffort;
``` ```
Результат: Результат:
@ -552,7 +708,7 @@ AS parseDateTimeBestEffort
``` sql ``` sql
SELECT parseDateTimeBestEffort('2018-12-12 10:12:12') SELECT parseDateTimeBestEffort('2018-12-12 10:12:12')
AS parseDateTimeBestEffort AS parseDateTimeBestEffort;
``` ```
Результат: Результат:
@ -566,7 +722,7 @@ AS parseDateTimeBestEffort
Запрос: Запрос:
``` sql ``` sql
SELECT parseDateTimeBestEffort('10 20:19') SELECT parseDateTimeBestEffort('10 20:19');
``` ```
Результат: Результат:
@ -591,7 +747,7 @@ SELECT parseDateTimeBestEffort('10 20:19')
**Синтаксис** **Синтаксис**
``` sql ``` sql
parseDateTimeBestEffortUS(time_string [, time_zone]); parseDateTimeBestEffortUS(time_string [, time_zone])
``` ```
**Параметры** **Параметры**
@ -620,7 +776,7 @@ SELECT parseDateTimeBestEffortUS('09/12/2020 12:12:57')
AS parseDateTimeBestEffortUS; AS parseDateTimeBestEffortUS;
``` ```
Ответ: Результат:
``` text ``` text
┌─parseDateTimeBestEffortUS─┐ ┌─parseDateTimeBestEffortUS─┐
@ -635,7 +791,7 @@ SELECT parseDateTimeBestEffortUS('09-12-2020 12:12:57')
AS parseDateTimeBestEffortUS; AS parseDateTimeBestEffortUS;
``` ```
Ответ: Результат:
``` text ``` text
┌─parseDateTimeBestEffortUS─┐ ┌─parseDateTimeBestEffortUS─┐
@ -650,7 +806,7 @@ SELECT parseDateTimeBestEffortUS('09.12.2020 12:12:57')
AS parseDateTimeBestEffortUS; AS parseDateTimeBestEffortUS;
``` ```
Ответ: Результат:
``` text ``` text
┌─parseDateTimeBestEffortUS─┐ ┌─parseDateTimeBestEffortUS─┐
@ -658,6 +814,178 @@ AS parseDateTimeBestEffortUS;
└─────────────────────────——┘ └─────────────────────────——┘
``` ```
## parseDateTimeBestEffortUSOrNull {#parsedatetimebesteffortusornull}
Работает аналогично функции [parseDateTimeBestEffortUS](#parsedatetimebesteffortUS), но в отличие от нее возвращает `NULL`, если входная строка не может быть преобразована в тип данных [DateTime](../../sql-reference/data-types/datetime.md).
**Синтаксис**
``` sql
parseDateTimeBestEffortUSOrNull(time_string[, time_zone])
```
**Параметры**
- `time_string` — строка, содержащая дату или дату со временем для преобразования. Дата должна быть в американском формате (`MM/DD/YYYY` и т.д.). [String](../../sql-reference/data-types/string.md).
- `time_zone` — [часовой пояс](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-timezone). Функция анализирует `time_string` в соответствии с заданным часовым поясом. Опциональный параметр. [String](../../sql-reference/data-types/string.md).
**Поддерживаемые нестандартные форматы**
- Строка в формате [unix timestamp](https://en.wikipedia.org/wiki/Unix_time), содержащая 9-10 цифр.
- Строка, содержащая дату и время: `YYYYMMDDhhmmss`, `MM/DD/YYYY hh:mm:ss`, `MM-DD-YY hh:mm`, `YYYY-MM-DD hh:mm:ss` и т.д.
- Строка, содержащая дату без времени: `YYYY`, `YYYYMM`, `YYYY*MM`, `MM/DD/YYYY`, `MM-DD-YY` и т.д.
- Строка, содержащая день и время: `DD`, `DD hh`, `DD hh:mm`. В этом случае `YYYY-MM` заменяется на `2000-01`.
- Строка, содержащая дату и время, а также информацию о часовом поясе: `YYYY-MM-DD hh:mm:ss ±h:mm` и т.д. Например, `2020-12-12 17:36:00 -5:00`.
**Возвращаемые значения**
- `time_string`, преобразованная в тип данных `DateTime`.
- `NULL`, если входная строка не может быть преобразована в тип данных `DateTime`.
**Примеры**
Запрос:
``` sql
SELECT parseDateTimeBestEffortUSOrNull('02/10/2021 21:12:57') AS parseDateTimeBestEffortUSOrNull;
```
Результат:
``` text
┌─parseDateTimeBestEffortUSOrNull─┐
│ 2021-02-10 21:12:57 │
└─────────────────────────────────┘
```
Запрос:
``` sql
SELECT parseDateTimeBestEffortUSOrNull('02-10-2021 21:12:57 GMT', 'Europe/Moscow') AS parseDateTimeBestEffortUSOrNull;
```
Результат:
``` text
┌─parseDateTimeBestEffortUSOrNull─┐
│ 2021-02-11 00:12:57 │
└─────────────────────────────────┘
```
Запрос:
``` sql
SELECT parseDateTimeBestEffortUSOrNull('02.10.2021') AS parseDateTimeBestEffortUSOrNull;
```
Результат:
``` text
┌─parseDateTimeBestEffortUSOrNull─┐
│ 2021-02-10 00:00:00 │
└─────────────────────────────────┘
```
Запрос:
``` sql
SELECT parseDateTimeBestEffortUSOrNull('10.2021') AS parseDateTimeBestEffortUSOrNull;
```
Результат:
``` text
┌─parseDateTimeBestEffortUSOrNull─┐
│ ᴺᵁᴸᴸ │
└─────────────────────────────────┘
```
## parseDateTimeBestEffortUSOrZero {#parsedatetimebesteffortusorzero}
Работает аналогично функции [parseDateTimeBestEffortUS](#parsedatetimebesteffortUS), но в отличие от нее возвращает нулевую дату (`1970-01-01`) или нулевую дату со временем (`1970-01-01 00:00:00`), если входная строка не может быть преобразована в тип данных [DateTime](../../sql-reference/data-types/datetime.md).
**Синтаксис**
``` sql
parseDateTimeBestEffortUSOrZero(time_string[, time_zone])
```
**Параметры**
- `time_string` — строка, содержащая дату или дату со временем для преобразования. Дата должна быть в американском формате (`MM/DD/YYYY` и т.д.). [String](../../sql-reference/data-types/string.md).
- `time_zone` — [часовой пояс](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-timezone). Функция анализирует `time_string` в соответствии с заданным часовым поясом. Опциональный параметр. [String](../../sql-reference/data-types/string.md).
**Поддерживаемые нестандартные форматы**
- Строка в формате [unix timestamp](https://en.wikipedia.org/wiki/Unix_time), содержащая 9-10 цифр.
- Строка, содержащая дату и время: `YYYYMMDDhhmmss`, `MM/DD/YYYY hh:mm:ss`, `MM-DD-YY hh:mm`, `YYYY-MM-DD hh:mm:ss` и т.д.
- Строка, содержащая дату без времени: `YYYY`, `YYYYMM`, `YYYY*MM`, `MM/DD/YYYY`, `MM-DD-YY` и т.д.
- Строка, содержащая день и время: `DD`, `DD hh`, `DD hh:mm`. В этом случае `YYYY-MM` заменяется на `2000-01`.
- Строка, содержащая дату и время, а также информацию о часовом поясе: `YYYY-MM-DD hh:mm:ss ±h:mm` и т.д. Например, `2020-12-12 17:36:00 -5:00`.
**Возвращаемые значения**
- `time_string`, преобразованная в тип данных `DateTime`.
- Нулевая дата или нулевая дата со временем, если входная строка не может быть преобразована в тип данных `DateTime`.
**Примеры**
Запрос:
``` sql
SELECT parseDateTimeBestEffortUSOrZero('02/10/2021 21:12:57') AS parseDateTimeBestEffortUSOrZero;
```
Результат:
``` text
┌─parseDateTimeBestEffortUSOrZero─┐
│ 2021-02-10 21:12:57 │
└─────────────────────────────────┘
```
Запрос:
``` sql
SELECT parseDateTimeBestEffortUSOrZero('02-10-2021 21:12:57 GMT', 'Europe/Moscow') AS parseDateTimeBestEffortUSOrZero;
```
Результат:
``` text
┌─parseDateTimeBestEffortUSOrZero─┐
│ 2021-02-11 00:12:57 │
└─────────────────────────────────┘
```
Запрос:
``` sql
SELECT parseDateTimeBestEffortUSOrZero('02.10.2021') AS parseDateTimeBestEffortUSOrZero;
```
Результат:
``` text
┌─parseDateTimeBestEffortUSOrZero─┐
│ 2021-02-10 00:00:00 │
└─────────────────────────────────┘
```
Запрос:
``` sql
SELECT parseDateTimeBestEffortUSOrZero('02.2021') AS parseDateTimeBestEffortUSOrZero;
```
Результат:
``` text
┌─parseDateTimeBestEffortUSOrZero─┐
│ 1970-01-01 00:00:00 │
└─────────────────────────────────┘
```
## toUnixTimestamp64Milli ## toUnixTimestamp64Milli
## toUnixTimestamp64Micro ## toUnixTimestamp64Micro
## toUnixTimestamp64Nano ## toUnixTimestamp64Nano
@ -685,10 +1013,10 @@ toUnixTimestamp64Milli(value)
``` sql ``` sql
WITH toDateTime64('2019-09-16 19:20:12.345678910', 6) AS dt64 WITH toDateTime64('2019-09-16 19:20:12.345678910', 6) AS dt64
SELECT toUnixTimestamp64Milli(dt64) SELECT toUnixTimestamp64Milli(dt64);
``` ```
Ответ: Результат:
``` text ``` text
┌─toUnixTimestamp64Milli(dt64)─┐ ┌─toUnixTimestamp64Milli(dt64)─┐
@ -700,10 +1028,10 @@ SELECT toUnixTimestamp64Milli(dt64)
``` sql ``` sql
WITH toDateTime64('2019-09-16 19:20:12.345678910', 6) AS dt64 WITH toDateTime64('2019-09-16 19:20:12.345678910', 6) AS dt64
SELECT toUnixTimestamp64Nano(dt64) SELECT toUnixTimestamp64Nano(dt64);
``` ```
Ответ: Результат:
``` text ``` text
┌─toUnixTimestamp64Nano(dt64)─┐ ┌─toUnixTimestamp64Nano(dt64)─┐
@ -738,10 +1066,10 @@ fromUnixTimestamp64Milli(value [, ti])
``` sql ``` sql
WITH CAST(1234567891011, 'Int64') AS i64 WITH CAST(1234567891011, 'Int64') AS i64
SELECT fromUnixTimestamp64Milli(i64, 'UTC') SELECT fromUnixTimestamp64Milli(i64, 'UTC');
``` ```
Ответ: Результат:
``` text ``` text
┌─fromUnixTimestamp64Milli(i64, 'UTC')─┐ ┌─fromUnixTimestamp64Milli(i64, 'UTC')─┐
@ -772,12 +1100,12 @@ toLowCardinality(expr)
Тип: `LowCardinality(expr_result_type)` Тип: `LowCardinality(expr_result_type)`
**Example** **Пример**
Запрос: Запрос:
```sql ```sql
SELECT toLowCardinality('1') SELECT toLowCardinality('1');
``` ```
Результат: Результат:
@ -813,10 +1141,10 @@ formatRow(format, x, y, ...)
``` sql ``` sql
SELECT formatRow('CSV', number, 'good') SELECT formatRow('CSV', number, 'good')
FROM numbers(3) FROM numbers(3);
``` ```
Ответ: Результат:
``` text ``` text
┌─formatRow('CSV', number, 'good')─┐ ┌─formatRow('CSV', number, 'good')─┐
@ -854,10 +1182,10 @@ formatRowNoNewline(format, x, y, ...)
``` sql ``` sql
SELECT formatRowNoNewline('CSV', number, 'good') SELECT formatRowNoNewline('CSV', number, 'good')
FROM numbers(3) FROM numbers(3);
``` ```
Ответ: Результат:
``` text ``` text
┌─formatRowNoNewline('CSV', number, 'good')─┐ ┌─formatRowNoNewline('CSV', number, 'good')─┐

View File

@ -13,10 +13,28 @@ SELECT (CounterID, UserID) IN ((34, 123), (101500, 456)) FROM ...
Если слева стоит один столбец, входящий в индекс, а справа - множество констант, то при выполнении запроса, система воспользуется индексом. Если слева стоит один столбец, входящий в индекс, а справа - множество констант, то при выполнении запроса, система воспользуется индексом.
Не перечисляйте слишком большое количество значений (миллионы) явно. Если множество большое - лучше загрузить его во временную таблицу (например, смотрите раздел «Внешние данные для обработки запроса»), и затем воспользоваться подзапросом. Не перечисляйте слишком большое количество значений (миллионы) явно. Если множество большое - лучше загрузить его во временную таблицу (например, смотрите раздел [Внешние данные для обработки запроса](../../engines/table-engines/special/external-data.md)), и затем воспользоваться подзапросом.
В качестве правой части оператора может быть множество константных выражений, множество кортежей с константными выражениями (показано в примерах выше), а также имя таблицы или подзапрос SELECT в скобках. В качестве правой части оператора может быть множество константных выражений, множество кортежей с константными выражениями (показано в примерах выше), а также имя таблицы или подзапрос SELECT в скобках.
Если типы данных в левой и правой частях подзапроса `IN` различаются, ClickHouse преобразует значение в левой части к типу данных из правой части. Преобразование выполняется по аналогии с функцией [accurateCastOrNull](../functions/type-conversion-functions.md#type_conversion_function-accurate-cast_or_null), т.е. тип данных становится [Nullable](../../sql-reference/data-types/nullable.md), а если преобразование не может быть выполнено, возвращается значение [NULL](../../sql-reference/syntax.md#null-literal).
**Пример**
Запрос:
``` sql
SELECT '1' IN (SELECT 1);
```
Результат:
``` text
┌─in('1', _subquery49)─┐
│ 1 │
└──────────────────────┘
```
Если в качестве правой части оператора указано имя таблицы (например, `UserID IN users`), то это эквивалентно подзапросу `UserID IN (SELECT * FROM users)`. Это используется при работе с внешними данными, отправляемым вместе с запросом. Например, вместе с запросом может быть отправлено множество идентификаторов посетителей, загруженное во временную таблицу users, по которому следует выполнить фильтрацию. Если в качестве правой части оператора указано имя таблицы (например, `UserID IN users`), то это эквивалентно подзапросу `UserID IN (SELECT * FROM users)`. Это используется при работе с внешними данными, отправляемым вместе с запросом. Например, вместе с запросом может быть отправлено множество идентификаторов посетителей, загруженное во временную таблицу users, по которому следует выполнить фильтрацию.
Если в качестве правой части оператора, указано имя таблицы, имеющий движок Set (подготовленное множество, постоянно находящееся в оперативке), то множество не будет создаваться заново при каждом запросе. Если в качестве правой части оператора, указано имя таблицы, имеющий движок Set (подготовленное множество, постоянно находящееся в оперативке), то множество не будет создаваться заново при каждом запросе.

View File

@ -7,6 +7,8 @@ toc_title: mysql
Позволяет выполнять запросы `SELECT` над данными, хранящимися на удалённом MySQL сервере. Позволяет выполнять запросы `SELECT` над данными, хранящимися на удалённом MySQL сервере.
**Синтаксис**
``` sql ``` sql
mysql('host:port', 'database', 'table', 'user', 'password'[, replace_query, 'on_duplicate_clause']); mysql('host:port', 'database', 'table', 'user', 'password'[, replace_query, 'on_duplicate_clause']);
``` ```
@ -23,13 +25,13 @@ mysql('host:port', 'database', 'table', 'user', 'password'[, replace_query, 'on_
- `password` — пароль пользователя. - `password` — пароль пользователя.
- `replace_query` — флаг, отвечающий за преобразование запросов `INSERT INTO` в `REPLACE INTO`. Если `replace_query=1`, то запрос заменяется. - `replace_query` — флаг, отвечающий за преобразование запросов `INSERT INTO` в `REPLACE INTO`. Возможные значения:
- `0` - выполняется запрос `INSERT INTO`.
- `1` - выполняется запрос `REPLACE INTO`.
- `on_duplicate_clause` — выражение `ON DUPLICATE KEY on_duplicate_clause`, добавляемое в запрос `INSERT`. - `on_duplicate_clause` — выражение `ON DUPLICATE KEY on_duplicate_clause`, добавляемое в запрос `INSERT`. Может быть передано только с помощью `replace_query = 0` (если вы одновременно передадите `replace_query = 1` и `on_duplicate_clause`, будет сгенерировано исключение).
Пример: `INSERT INTO t (c1,c2) VALUES ('a', 2) ON DUPLICATE KEY UPDATE c2 = c2 + 1`, где `on_duplicate_clause` это `UPDATE c2 = c2 + 1`. Чтобы узнать какие `on_duplicate_clause` можно использовать с секцией `ON DUPLICATE KEY` обратитесь к документации MySQL. Пример: `INSERT INTO t (c1,c2) VALUES ('a', 2) ON DUPLICATE KEY UPDATE c2 = c2 + 1`, где `on_duplicate_clause` это `UPDATE c2 = c2 + 1;`
Чтобы указать `'on_duplicate_clause'` необходимо передать `0` в параметр `replace_query`. Если одновременно передать `replace_query = 1` и `'on_duplicate_clause'`, то ClickHouse сгенерирует исключение.
Простые условия `WHERE` такие как `=, !=, >, >=, <, =` выполняются на стороне сервера MySQL. Простые условия `WHERE` такие как `=, !=, >, >=, <, =` выполняются на стороне сервера MySQL.
@ -39,46 +41,59 @@ mysql('host:port', 'database', 'table', 'user', 'password'[, replace_query, 'on_
Объект таблицы с теми же столбцами, что и в исходной таблице MySQL. Объект таблицы с теми же столбцами, что и в исходной таблице MySQL.
## Пример использования {#primer-ispolzovaniia} !!! note "Примечание"
Чтобы отличить табличную функцию `mysql (...)` в запросе `INSERT` от имени таблицы со списком имен столбцов, используйте ключевые слова `FUNCTION` или `TABLE FUNCTION`. См. примеры ниже.
**Примеры**
Таблица в MySQL: Таблица в MySQL:
``` text ``` text
mysql> CREATE TABLE `test`.`test` ( mysql> CREATE TABLE `test`.`test` (
-> `int_id` INT NOT NULL AUTO_INCREMENT, -> `int_id` INT NOT NULL AUTO_INCREMENT,
-> `int_nullable` INT NULL DEFAULT NULL,
-> `float` FLOAT NOT NULL, -> `float` FLOAT NOT NULL,
-> `float_nullable` FLOAT NULL DEFAULT NULL,
-> PRIMARY KEY (`int_id`)); -> PRIMARY KEY (`int_id`));
Query OK, 0 rows affected (0,09 sec)
mysql> insert into test (`int_id`, `float`) VALUES (1,2); mysql> INSERT INTO test (`int_id`, `float`) VALUES (1,2);
Query OK, 1 row affected (0,00 sec)
mysql> select * from test; mysql> SELECT * FROM test;
+--------+--------------+-------+----------------+ +--------+-------+
| int_id | int_nullable | float | float_nullable | | int_id | float |
+--------+--------------+-------+----------------+ +--------+-------+
| 1 | NULL | 2 | NULL | | 1 | 2 |
+--------+--------------+-------+----------------+ +--------+-------+
1 row in set (0,00 sec)
``` ```
Получение данных в ClickHouse: Получение данных в ClickHouse:
``` sql ``` sql
SELECT * FROM mysql('localhost:3306', 'test', 'test', 'bayonet', '123') SELECT * FROM mysql('localhost:3306', 'test', 'test', 'bayonet', '123');
``` ```
``` text ``` text
┌─int_id─┬─int_nullable─┬─float─┬─float_nullable─┐ ┌─int_id─┬─float─┐
│ 1 │ ᴺᵁᴸᴸ │ 2 │ ᴺᵁᴸᴸ │ │ 1 │ 2 │
└────────┴──────────────┴───────┴──────────────── └────────┴───────┘
``` ```
## Смотрите также {#smotrite-takzhe} Замена и вставка:
```sql
INSERT INTO FUNCTION mysql('localhost:3306', 'test', 'test', 'bayonet', '123', 1) (int_id, float) VALUES (1, 3);
INSERT INTO TABLE FUNCTION mysql('localhost:3306', 'test', 'test', 'bayonet', '123', 0, 'UPDATE int_id = int_id + 1') (int_id, float) VALUES (1, 4);
SELECT * FROM mysql('localhost:3306', 'test', 'test', 'bayonet', '123');
```
``` text
┌─int_id─┬─float─┐
│ 1 │ 3 │
│ 2 │ 4 │
└────────┴───────┘
```
**Смотрите также**
- [Движок таблиц MySQL](../../sql-reference/table-functions/mysql.md) - [Движок таблиц MySQL](../../sql-reference/table-functions/mysql.md)
- [Использование MySQL как источника данных для внешнего словаря](../../sql-reference/table-functions/mysql.md#dicts-external_dicts_dict_sources-mysql) - [Использование MySQL как источника данных для внешнего словаря](../../sql-reference/table-functions/mysql.md#dicts-external_dicts_dict_sources-mysql)
[Оригинальная статья](https://clickhouse.tech/docs/ru/query_language/table_functions/mysql/) <!--hide--> [Оригинальная статья](https://clickhouse.tech/docs/ru/sql-reference/table_functions/mysql/) <!--hide-->

View File

@ -160,7 +160,15 @@ void ODBCColumnsInfoHandler::handleRequest(HTTPServerRequest & request, HTTPServ
} }
WriteBufferFromHTTPServerResponse out(response, request.getMethod() == Poco::Net::HTTPRequest::HTTP_HEAD, keep_alive_timeout); WriteBufferFromHTTPServerResponse out(response, request.getMethod() == Poco::Net::HTTPRequest::HTTP_HEAD, keep_alive_timeout);
writeStringBinary(columns.toString(), out); try
{
writeStringBinary(columns.toString(), out);
out.finalize();
}
catch (...)
{
out.finalize();
}
} }
catch (...) catch (...)
{ {

View File

@ -50,7 +50,15 @@ void IdentifierQuoteHandler::handleRequest(HTTPServerRequest & request, HTTPServ
auto identifier = getIdentifierQuote(hdbc); auto identifier = getIdentifierQuote(hdbc);
WriteBufferFromHTTPServerResponse out(response, request.getMethod() == Poco::Net::HTTPRequest::HTTP_HEAD, keep_alive_timeout); WriteBufferFromHTTPServerResponse out(response, request.getMethod() == Poco::Net::HTTPRequest::HTTP_HEAD, keep_alive_timeout);
writeStringBinary(identifier, out); try
{
writeStringBinary(identifier, out);
out.finalize();
}
catch (...)
{
out.finalize();
}
} }
catch (...) catch (...)
{ {

View File

@ -187,9 +187,27 @@ void ODBCHandler::handleRequest(HTTPServerRequest & request, HTTPServerResponse
auto message = getCurrentExceptionMessage(true); auto message = getCurrentExceptionMessage(true);
response.setStatusAndReason( response.setStatusAndReason(
Poco::Net::HTTPResponse::HTTP_INTERNAL_SERVER_ERROR); // can't call process_error, because of too soon response sending Poco::Net::HTTPResponse::HTTP_INTERNAL_SERVER_ERROR); // can't call process_error, because of too soon response sending
writeStringBinary(message, out);
tryLogCurrentException(log);
try
{
writeStringBinary(message, out);
out.finalize();
}
catch (...)
{
tryLogCurrentException(log);
}
tryLogCurrentException(log);
}
try
{
out.finalize();
}
catch (...)
{
tryLogCurrentException(log);
} }
} }

View File

@ -61,7 +61,15 @@ void SchemaAllowedHandler::handleRequest(HTTPServerRequest & request, HTTPServer
bool result = isSchemaAllowed(hdbc); bool result = isSchemaAllowed(hdbc);
WriteBufferFromHTTPServerResponse out(response, request.getMethod() == Poco::Net::HTTPRequest::HTTP_HEAD, keep_alive_timeout); WriteBufferFromHTTPServerResponse out(response, request.getMethod() == Poco::Net::HTTPRequest::HTTP_HEAD, keep_alive_timeout);
writeBoolText(result, out); try
{
writeBoolText(result, out);
out.finalize();
}
catch (...)
{
out.finalize();
}
} }
catch (...) catch (...)
{ {

View File

@ -103,6 +103,7 @@ namespace CurrentMetrics
extern const Metric Revision; extern const Metric Revision;
extern const Metric VersionInteger; extern const Metric VersionInteger;
extern const Metric MemoryTracking; extern const Metric MemoryTracking;
extern const Metric MaxDDLEntryID;
} }
@ -1012,7 +1013,8 @@ int Server::main(const std::vector<std::string> & /*args*/)
int pool_size = config().getInt("distributed_ddl.pool_size", 1); int pool_size = config().getInt("distributed_ddl.pool_size", 1);
if (pool_size < 1) if (pool_size < 1)
throw Exception("distributed_ddl.pool_size should be greater then 0", ErrorCodes::ARGUMENT_OUT_OF_BOUND); throw Exception("distributed_ddl.pool_size should be greater then 0", ErrorCodes::ARGUMENT_OUT_OF_BOUND);
global_context->setDDLWorker(std::make_unique<DDLWorker>(pool_size, ddl_zookeeper_path, *global_context, &config(), "distributed_ddl")); global_context->setDDLWorker(std::make_unique<DDLWorker>(pool_size, ddl_zookeeper_path, *global_context, &config(),
"distributed_ddl", "DDLWorker", &CurrentMetrics::MaxDDLEntryID));
} }
std::unique_ptr<DNSCacheUpdater> dns_cache_updater; std::unique_ptr<DNSCacheUpdater> dns_cache_updater;

View File

@ -284,6 +284,10 @@
In bytes. Cache is single for server. Memory is allocated only on demand. In bytes. Cache is single for server. Memory is allocated only on demand.
Cache is used when 'use_uncompressed_cache' user setting turned on (off by default). Cache is used when 'use_uncompressed_cache' user setting turned on (off by default).
Uncompressed cache is advantageous only for very short queries and in rare cases. Uncompressed cache is advantageous only for very short queries and in rare cases.
Note: uncompressed cache can be pointless for lz4, because memory bandwidth
is slower than multi-core decompression on some server configurations.
Enabling it can sometimes paradoxically make queries slower.
--> -->
<uncompressed_cache_size>8589934592</uncompressed_cache_size> <uncompressed_cache_size>8589934592</uncompressed_cache_size>

View File

@ -7,9 +7,6 @@
<!-- Maximum memory usage for processing single query, in bytes. --> <!-- Maximum memory usage for processing single query, in bytes. -->
<max_memory_usage>10000000000</max_memory_usage> <max_memory_usage>10000000000</max_memory_usage>
<!-- Use cache of uncompressed blocks of data. Meaningful only for processing many of very short queries. -->
<use_uncompressed_cache>0</use_uncompressed_cache>
<!-- How to choose between replicas during distributed query processing. <!-- How to choose between replicas during distributed query processing.
random - choose random replica from set of replicas with minimum number of errors random - choose random replica from set of replicas with minimum number of errors
nearest_hostname - from set of replicas with minimum number of errors, choose replica nearest_hostname - from set of replicas with minimum number of errors, choose replica

View File

@ -30,6 +30,10 @@ namespace ErrorCodes
extern const int LOGICAL_ERROR; extern const int LOGICAL_ERROR;
} }
const String & getAggregateFunctionCanonicalNameIfAny(const String & name)
{
return AggregateFunctionFactory::instance().getCanonicalNameIfAny(name);
}
void AggregateFunctionFactory::registerFunction(const String & name, Value creator_with_properties, CaseSensitiveness case_sensitiveness) void AggregateFunctionFactory::registerFunction(const String & name, Value creator_with_properties, CaseSensitiveness case_sensitiveness)
{ {
@ -41,10 +45,14 @@ void AggregateFunctionFactory::registerFunction(const String & name, Value creat
throw Exception("AggregateFunctionFactory: the aggregate function name '" + name + "' is not unique", throw Exception("AggregateFunctionFactory: the aggregate function name '" + name + "' is not unique",
ErrorCodes::LOGICAL_ERROR); ErrorCodes::LOGICAL_ERROR);
if (case_sensitiveness == CaseInsensitive if (case_sensitiveness == CaseInsensitive)
&& !case_insensitive_aggregate_functions.emplace(Poco::toLower(name), creator_with_properties).second) {
throw Exception("AggregateFunctionFactory: the case insensitive aggregate function name '" + name + "' is not unique", auto key = Poco::toLower(name);
ErrorCodes::LOGICAL_ERROR); if (!case_insensitive_aggregate_functions.emplace(key, creator_with_properties).second)
throw Exception("AggregateFunctionFactory: the case insensitive aggregate function name '" + name + "' is not unique",
ErrorCodes::LOGICAL_ERROR);
case_insensitive_name_mapping[key] = name;
}
} }
static DataTypes convertLowCardinalityTypesToNested(const DataTypes & types) static DataTypes convertLowCardinalityTypesToNested(const DataTypes & types)
@ -98,6 +106,7 @@ AggregateFunctionPtr AggregateFunctionFactory::getImpl(
bool has_null_arguments) const bool has_null_arguments) const
{ {
String name = getAliasToOrName(name_param); String name = getAliasToOrName(name_param);
bool is_case_insensitive = false;
Value found; Value found;
/// Find by exact match. /// Find by exact match.
@ -107,7 +116,10 @@ AggregateFunctionPtr AggregateFunctionFactory::getImpl(
} }
if (auto jt = case_insensitive_aggregate_functions.find(Poco::toLower(name)); jt != case_insensitive_aggregate_functions.end()) if (auto jt = case_insensitive_aggregate_functions.find(Poco::toLower(name)); jt != case_insensitive_aggregate_functions.end())
{
found = jt->second; found = jt->second;
is_case_insensitive = true;
}
const Context * query_context = nullptr; const Context * query_context = nullptr;
if (CurrentThread::isInitialized()) if (CurrentThread::isInitialized())
@ -118,7 +130,8 @@ AggregateFunctionPtr AggregateFunctionFactory::getImpl(
out_properties = found.properties; out_properties = found.properties;
if (query_context && query_context->getSettingsRef().log_queries) if (query_context && query_context->getSettingsRef().log_queries)
query_context->addQueryFactoriesInfo(Context::QueryLogFactories::AggregateFunction, name); query_context->addQueryFactoriesInfo(
Context::QueryLogFactories::AggregateFunction, is_case_insensitive ? Poco::toLower(name) : name);
/// The case when aggregate function should return NULL on NULL arguments. This case is handled in "get" method. /// The case when aggregate function should return NULL on NULL arguments. This case is handled in "get" method.
if (!out_properties.returns_default_when_only_null && has_null_arguments) if (!out_properties.returns_default_when_only_null && has_null_arguments)

View File

@ -52,7 +52,7 @@ struct MovingSumData : public MovingData<T>
{ {
static constexpr auto name = "groupArrayMovingSum"; static constexpr auto name = "groupArrayMovingSum";
T get(size_t idx, UInt64 window_size) const T NO_SANITIZE_UNDEFINED get(size_t idx, UInt64 window_size) const
{ {
if (idx < window_size) if (idx < window_size)
return this->value[idx]; return this->value[idx];
@ -66,7 +66,7 @@ struct MovingAvgData : public MovingData<T>
{ {
static constexpr auto name = "groupArrayMovingAvg"; static constexpr auto name = "groupArrayMovingAvg";
T get(size_t idx, UInt64 window_size) const T NO_SANITIZE_UNDEFINED get(size_t idx, UInt64 window_size) const
{ {
if (idx < window_size) if (idx < window_size)
return this->value[idx] / window_size; return this->value[idx] / window_size;
@ -114,7 +114,7 @@ public:
return std::make_shared<DataTypeArray>(std::make_shared<DataTypeResult>()); return std::make_shared<DataTypeArray>(std::make_shared<DataTypeResult>());
} }
void add(AggregateDataPtr __restrict place, const IColumn ** columns, size_t row_num, Arena * arena) const override void NO_SANITIZE_UNDEFINED add(AggregateDataPtr __restrict place, const IColumn ** columns, size_t row_num, Arena * arena) const override
{ {
auto value = static_cast<const ColumnSource &>(*columns[0]).getData()[row_num]; auto value = static_cast<const ColumnSource &>(*columns[0]).getData()[row_num];
this->data(place).add(static_cast<ResultT>(value), arena); this->data(place).add(static_cast<ResultT>(value), arena);

View File

@ -115,7 +115,14 @@ public:
"Values for {} are expected to be Numeric, Float or Decimal, passed type {}", "Values for {} are expected to be Numeric, Float or Decimal, passed type {}",
getName(), value_type->getName()}; getName(), value_type->getName()};
result_type = value_type_without_nullable->promoteNumericType(); WhichDataType value_type_to_check(value_type);
/// Do not promote decimal because of implementation issues of this function design
/// If we decide to make this function more efficient we should promote decimal type during summ
if (value_type_to_check.isDecimal())
result_type = value_type_without_nullable;
else
result_type = value_type_without_nullable->promoteNumericType();
} }
types.emplace_back(std::make_shared<DataTypeArray>(result_type)); types.emplace_back(std::make_shared<DataTypeArray>(result_type));

View File

@ -56,7 +56,7 @@ class ReservoirSamplerDeterministic
{ {
bool good(const UInt32 hash) bool good(const UInt32 hash)
{ {
return hash == ((hash >> skip_degree) << skip_degree); return !(hash & skip_mask);
} }
public: public:
@ -135,11 +135,8 @@ public:
throw Poco::Exception("Cannot merge ReservoirSamplerDeterministic's with different max sample size"); throw Poco::Exception("Cannot merge ReservoirSamplerDeterministic's with different max sample size");
sorted = false; sorted = false;
if (b.skip_degree > skip_degree) if (skip_degree < b.skip_degree)
{ setSkipDegree(b.skip_degree);
skip_degree = b.skip_degree;
thinOut();
}
for (const auto & sample : b.samples) for (const auto & sample : b.samples)
if (good(sample.second)) if (good(sample.second))
@ -184,22 +181,39 @@ private:
size_t total_values = 0; /// How many values were inserted (regardless if they remain in sample or not). size_t total_values = 0; /// How many values were inserted (regardless if they remain in sample or not).
bool sorted = false; bool sorted = false;
Array samples; Array samples;
UInt8 skip_degree = 0; /// The number N determining that we save only one per 2^N elements in average.
/// The number N determining that we store only one per 2^N elements in average.
UInt8 skip_degree = 0;
/// skip_mask is calculated as (2 ^ skip_degree - 1). We store an element only if (hash & skip_mask) == 0.
/// For example, if skip_degree==0 then skip_mask==0 means we store each element;
/// if skip_degree==1 then skip_mask==0b0001 means we store one per 2 elements in average;
/// if skip_degree==4 then skip_mask==0b1111 means we store one per 16 elements in average.
UInt32 skip_mask = 0;
void insertImpl(const T & v, const UInt32 hash) void insertImpl(const T & v, const UInt32 hash)
{ {
/// Make a room for plus one element. /// Make a room for plus one element.
while (samples.size() >= max_sample_size) while (samples.size() >= max_sample_size)
{ setSkipDegree(skip_degree + 1);
++skip_degree;
if (skip_degree > detail::MAX_SKIP_DEGREE)
throw DB::Exception{"skip_degree exceeds maximum value", DB::ErrorCodes::MEMORY_LIMIT_EXCEEDED};
thinOut();
}
samples.emplace_back(v, hash); samples.emplace_back(v, hash);
} }
void setSkipDegree(UInt8 skip_degree_)
{
if (skip_degree_ == skip_degree)
return;
if (skip_degree_ > detail::MAX_SKIP_DEGREE)
throw DB::Exception{"skip_degree exceeds maximum value", DB::ErrorCodes::MEMORY_LIMIT_EXCEEDED};
skip_degree = skip_degree_;
if (skip_degree == detail::MAX_SKIP_DEGREE)
skip_mask = static_cast<UInt32>(-1);
else
skip_mask = (1 << skip_degree) - 1;
thinOut();
}
void thinOut() void thinOut()
{ {
samples.resize(std::distance(samples.begin(), samples.resize(std::distance(samples.begin(),

View File

@ -75,28 +75,8 @@ void ColumnAggregateFunction::set(const AggregateFunctionPtr & func_)
ColumnAggregateFunction::~ColumnAggregateFunction() ColumnAggregateFunction::~ColumnAggregateFunction()
{ {
if (!func->hasTrivialDestructor() && !src) if (!func->hasTrivialDestructor() && !src)
{ for (auto * val : data)
if (copiedDataInfo.empty()) func->destroy(val);
{
for (auto * val : data)
{
func->destroy(val);
}
}
else
{
size_t pos;
for (Map::iterator it = copiedDataInfo.begin(), it_end = copiedDataInfo.end(); it != it_end; ++it)
{
pos = it->getValue().second;
if (data[pos] != nullptr)
{
func->destroy(data[pos]);
data[pos] = nullptr;
}
}
}
}
} }
void ColumnAggregateFunction::addArena(ConstArenaPtr arena_) void ColumnAggregateFunction::addArena(ConstArenaPtr arena_)
@ -475,37 +455,14 @@ void ColumnAggregateFunction::insertFrom(const IColumn & from, size_t n)
/// (only as a whole, see comment above). /// (only as a whole, see comment above).
ensureOwnership(); ensureOwnership();
insertDefault(); insertDefault();
insertCopyFrom(assert_cast<const ColumnAggregateFunction &>(from).data[n]); insertMergeFrom(from, n);
} }
void ColumnAggregateFunction::insertFrom(ConstAggregateDataPtr place) void ColumnAggregateFunction::insertFrom(ConstAggregateDataPtr place)
{ {
ensureOwnership(); ensureOwnership();
insertDefault(); insertDefault();
insertCopyFrom(place); insertMergeFrom(place);
}
void ColumnAggregateFunction::insertCopyFrom(ConstAggregateDataPtr place)
{
Map::LookupResult result;
result = copiedDataInfo.find(place);
if (result == nullptr)
{
copiedDataInfo[place] = data.size()-1;
func->merge(data.back(), place, &createOrGetArena());
}
else
{
size_t pos = result->getValue().second;
if (pos != data.size() - 1)
{
data[data.size() - 1] = data[pos];
}
else /// insert same data to same pos, merge them.
{
func->merge(data.back(), place, &createOrGetArena());
}
}
} }
void ColumnAggregateFunction::insertMergeFrom(ConstAggregateDataPtr place) void ColumnAggregateFunction::insertMergeFrom(ConstAggregateDataPtr place)
@ -740,4 +697,5 @@ MutableColumnPtr ColumnAggregateFunction::cloneResized(size_t size) const
return cloned_col; return cloned_col;
} }
} }
} }

View File

@ -13,8 +13,6 @@
#include <Functions/FunctionHelpers.h> #include <Functions/FunctionHelpers.h>
#include <Common/HashTable/HashMap.h>
namespace DB namespace DB
{ {
@ -84,17 +82,6 @@ private:
/// Name of the type to distinguish different aggregation states. /// Name of the type to distinguish different aggregation states.
String type_string; String type_string;
/// MergedData records, used to avoid duplicated data copy.
///key: src pointer, val: pos in current column.
using Map = HashMap<
ConstAggregateDataPtr,
size_t,
DefaultHash<ConstAggregateDataPtr>,
HashTableGrower<3>,
HashTableAllocatorWithStackMemory<sizeof(std::pair<ConstAggregateDataPtr, size_t>) * (1 << 3)>>;
Map copiedDataInfo;
ColumnAggregateFunction() {} ColumnAggregateFunction() {}
/// Create a new column that has another column as a source. /// Create a new column that has another column as a source.
@ -153,8 +140,6 @@ public:
void insertFrom(ConstAggregateDataPtr place); void insertFrom(ConstAggregateDataPtr place);
void insertCopyFrom(ConstAggregateDataPtr place);
/// Merge state at last row with specified state in another column. /// Merge state at last row with specified state in another column.
void insertMergeFrom(ConstAggregateDataPtr place); void insertMergeFrom(ConstAggregateDataPtr place);

View File

@ -7,6 +7,7 @@
#include <Columns/ColumnNullable.h> #include <Columns/ColumnNullable.h>
#include <Columns/ColumnConst.h> #include <Columns/ColumnConst.h>
#include <Columns/ColumnsCommon.h> #include <Columns/ColumnsCommon.h>
#include <Columns/ColumnCompressed.h>
#include <common/unaligned.h> #include <common/unaligned.h>
#include <common/sort.h> #include <common/sort.h>
@ -369,8 +370,12 @@ void ColumnArray::compareColumn(const IColumn & rhs, size_t rhs_row_num,
compare_results, direction, nan_direction_hint); compare_results, direction, nan_direction_hint);
} }
namespace
{
template <bool positive> template <bool positive>
struct ColumnArray::Cmp struct Cmp
{ {
const ColumnArray & parent; const ColumnArray & parent;
int nan_direction_hint; int nan_direction_hint;
@ -390,6 +395,9 @@ struct ColumnArray::Cmp
} }
}; };
}
void ColumnArray::reserve(size_t n) void ColumnArray::reserve(size_t n)
{ {
getOffsets().reserve(n); getOffsets().reserve(n);
@ -912,6 +920,21 @@ void ColumnArray::updatePermutationWithCollation(const Collator & collator, bool
updatePermutationImpl(limit, res, equal_range, Cmp<true>(*this, nan_direction_hint, &collator)); updatePermutationImpl(limit, res, equal_range, Cmp<true>(*this, nan_direction_hint, &collator));
} }
ColumnPtr ColumnArray::compress() const
{
ColumnPtr data_compressed = data->compress();
ColumnPtr offsets_compressed = offsets->compress();
size_t byte_size = data_compressed->byteSize() + offsets_compressed->byteSize();
return ColumnCompressed::create(size(), byte_size,
[data_compressed = std::move(data_compressed), offsets_compressed = std::move(offsets_compressed)]
{
return ColumnArray::create(data_compressed->decompress(), offsets_compressed->decompress());
});
}
ColumnPtr ColumnArray::replicate(const Offsets & replicate_offsets) const ColumnPtr ColumnArray::replicate(const Offsets & replicate_offsets) const
{ {
if (replicate_offsets.empty()) if (replicate_offsets.empty())

View File

@ -123,6 +123,8 @@ public:
void gather(ColumnGathererStream & gatherer_stream) override; void gather(ColumnGathererStream & gatherer_stream) override;
ColumnPtr compress() const override;
void forEachSubcolumn(ColumnCallback callback) override void forEachSubcolumn(ColumnCallback callback) override
{ {
callback(offsets); callback(offsets);
@ -183,9 +185,6 @@ private:
template <typename Comparator> template <typename Comparator>
void updatePermutationImpl(size_t limit, Permutation & res, EqualRanges & equal_range, Comparator cmp) const; void updatePermutationImpl(size_t limit, Permutation & res, EqualRanges & equal_range, Comparator cmp) const;
template <bool positive>
struct Cmp;
}; };

View File

@ -0,0 +1,61 @@
#include <Columns/ColumnCompressed.h>
#pragma GCC diagnostic ignored "-Wold-style-cast"
#include <lz4.h>
namespace DB
{
namespace ErrorCodes
{
extern const int CANNOT_COMPRESS;
extern const int CANNOT_DECOMPRESS;
}
std::shared_ptr<Memory<>> ColumnCompressed::compressBuffer(const void * data, size_t data_size, bool always_compress)
{
size_t max_dest_size = LZ4_COMPRESSBOUND(data_size);
if (max_dest_size > std::numeric_limits<int>::max())
throw Exception(ErrorCodes::CANNOT_COMPRESS, "Cannot compress column of size {}", formatReadableSizeWithBinarySuffix(data_size));
Memory<> compressed(max_dest_size);
auto compressed_size = LZ4_compress_default(
reinterpret_cast<const char *>(data),
compressed.data(),
data_size,
max_dest_size);
if (compressed_size <= 0)
throw Exception(ErrorCodes::CANNOT_COMPRESS, "Cannot compress column");
/// If compression is inefficient.
if (!always_compress && static_cast<size_t>(compressed_size) * 2 > data_size)
return {};
/// Shrink to fit.
auto shrank = std::make_shared<Memory<>>(compressed_size);
memcpy(shrank->data(), compressed.data(), compressed_size);
return shrank;
}
void ColumnCompressed::decompressBuffer(
const void * compressed_data, void * decompressed_data, size_t compressed_size, size_t decompressed_size)
{
auto processed_size = LZ4_decompress_safe(
reinterpret_cast<const char *>(compressed_data),
reinterpret_cast<char *>(decompressed_data),
compressed_size,
decompressed_size);
if (processed_size <= 0)
throw Exception(ErrorCodes::CANNOT_DECOMPRESS, "Cannot decompress column");
}
}

View File

@ -0,0 +1,121 @@
#pragma once
#include <optional>
#include <Core/Field.h>
#include <Columns/IColumn.h>
#include <IO/BufferWithOwnMemory.h>
namespace DB
{
namespace ErrorCodes
{
extern const int LOGICAL_ERROR;
}
/** Wrapper for compressed column data.
* The only supported operations are:
* - decompress (reconstruct the source column)
* - get size in rows or bytes.
*
* It is needed to implement in-memory compression
* - to keep compressed data in Block or pass around.
*
* It's often beneficial to store compressed data in-memory and decompress on the fly
* because it allows to lower memory throughput. More specifically, if:
*
* decompression speed * num CPU cores >= memory read throughput
*
* Also in-memory compression allows to keep more data in RAM.
*/
class ColumnCompressed : public COWHelper<IColumn, ColumnCompressed>
{
public:
using Lazy = std::function<ColumnPtr()>;
ColumnCompressed(size_t rows_, size_t bytes_, Lazy lazy_)
: rows(rows_), bytes(bytes_), lazy(lazy_)
{
}
const char * getFamilyName() const override { return "Compressed"; }
size_t size() const override { return rows; }
size_t byteSize() const override { return bytes; }
size_t allocatedBytes() const override { return bytes; }
ColumnPtr decompress() const override
{
return lazy();
}
/** Wrap uncompressed column without compression.
* Method can be used when compression is not worth doing.
* But returning CompressedColumn is still needed to keep uniform block structure.
*/
static ColumnPtr wrap(ColumnPtr column)
{
return ColumnCompressed::create(
column->size(),
column->allocatedBytes(),
[column = std::move(column)]{ return column; });
}
/// Helper methods for compression.
/// If data is not worth to be compressed and not 'always_compress' - returns nullptr.
/// Note: shared_ptr is to allow to be captured by std::function.
static std::shared_ptr<Memory<>> compressBuffer(const void * data, size_t data_size, bool always_compress);
static void decompressBuffer(
const void * compressed_data, void * decompressed_data, size_t compressed_size, size_t decompressed_size);
/// All other methods throw exception.
TypeIndex getDataType() const override { throwMustBeDecompressed(); }
Field operator[](size_t) const override { throwMustBeDecompressed(); }
void get(size_t, Field &) const override { throwMustBeDecompressed(); }
StringRef getDataAt(size_t) const override { throwMustBeDecompressed(); }
void insert(const Field &) override { throwMustBeDecompressed(); }
void insertRangeFrom(const IColumn &, size_t, size_t) override { throwMustBeDecompressed(); }
void insertData(const char *, size_t) override { throwMustBeDecompressed(); }
void insertDefault() override { throwMustBeDecompressed(); }
void popBack(size_t) override { throwMustBeDecompressed(); }
StringRef serializeValueIntoArena(size_t, Arena &, char const *&) const override { throwMustBeDecompressed(); }
const char * deserializeAndInsertFromArena(const char *) override { throwMustBeDecompressed(); }
void updateHashWithValue(size_t, SipHash &) const override { throwMustBeDecompressed(); }
void updateWeakHash32(WeakHash32 &) const override { throwMustBeDecompressed(); }
void updateHashFast(SipHash &) const override { throwMustBeDecompressed(); }
ColumnPtr filter(const Filter &, ssize_t) const override { throwMustBeDecompressed(); }
ColumnPtr permute(const Permutation &, size_t) const override { throwMustBeDecompressed(); }
ColumnPtr index(const IColumn &, size_t) const override { throwMustBeDecompressed(); }
int compareAt(size_t, size_t, const IColumn &, int) const override { throwMustBeDecompressed(); }
void compareColumn(const IColumn &, size_t, PaddedPODArray<UInt64> *, PaddedPODArray<Int8> &, int, int) const override
{
throwMustBeDecompressed();
}
void getPermutation(bool, size_t, int, Permutation &) const override { throwMustBeDecompressed(); }
void updatePermutation(bool, size_t, int, Permutation &, EqualRanges &) const override { throwMustBeDecompressed(); }
ColumnPtr replicate(const Offsets &) const override { throwMustBeDecompressed(); }
MutableColumns scatter(ColumnIndex, const Selector &) const override { throwMustBeDecompressed(); }
void gather(ColumnGathererStream &) override { throwMustBeDecompressed(); }
void getExtremes(Field &, Field &) const override { throwMustBeDecompressed(); }
size_t byteSizeAt(size_t) const override { throwMustBeDecompressed(); }
protected:
size_t rows;
size_t bytes;
Lazy lazy;
private:
[[noreturn]] void throwMustBeDecompressed() const
{
throw Exception("ColumnCompressed must be decompressed before use", ErrorCodes::LOGICAL_ERROR);
}
};
}

View File

@ -14,6 +14,7 @@
#include <Columns/ColumnsCommon.h> #include <Columns/ColumnsCommon.h>
#include <Columns/ColumnDecimal.h> #include <Columns/ColumnDecimal.h>
#include <Columns/ColumnCompressed.h>
#include <DataStreams/ColumnGathererStream.h> #include <DataStreams/ColumnGathererStream.h>
@ -346,6 +347,30 @@ void ColumnDecimal<T>::gather(ColumnGathererStream & gatherer)
gatherer.gather(*this); gatherer.gather(*this);
} }
template <typename T>
ColumnPtr ColumnDecimal<T>::compress() const
{
size_t source_size = data.size() * sizeof(T);
/// Don't compress small blocks.
if (source_size < 4096) /// A wild guess.
return ColumnCompressed::wrap(this->getPtr());
auto compressed = ColumnCompressed::compressBuffer(data.data(), source_size, false);
if (!compressed)
return ColumnCompressed::wrap(this->getPtr());
return ColumnCompressed::create(data.size(), compressed->size(),
[compressed = std::move(compressed), column_size = data.size(), scale = this->scale]
{
auto res = ColumnDecimal<T>::create(column_size, scale);
ColumnCompressed::decompressBuffer(
compressed->data(), res->getData().data(), compressed->size(), column_size * sizeof(T));
return res;
});
}
template <typename T> template <typename T>
void ColumnDecimal<T>::getExtremes(Field & min, Field & max) const void ColumnDecimal<T>::getExtremes(Field & min, Field & max) const
{ {

View File

@ -172,6 +172,8 @@ public:
return false; return false;
} }
ColumnPtr compress() const override;
void insertValue(const T value) { data.push_back(value); } void insertValue(const T value) { data.push_back(value); }
Container & getData() { return data; } Container & getData() { return data; }

View File

@ -1,6 +1,7 @@
#include <Columns/ColumnFixedString.h> #include <Columns/ColumnFixedString.h>
#include <Columns/ColumnsCommon.h> #include <Columns/ColumnsCommon.h>
#include <Columns/ColumnCompressed.h>
#include <DataStreams/ColumnGathererStream.h> #include <DataStreams/ColumnGathererStream.h>
#include <IO/WriteHelpers.h> #include <IO/WriteHelpers.h>
#include <Common/Arena.h> #include <Common/Arena.h>
@ -446,18 +447,31 @@ void ColumnFixedString::getExtremes(Field & min, Field & max) const
get(max_idx, max); get(max_idx, max);
} }
void ColumnFixedString::alignStringLength(ColumnFixedString::Chars & data, size_t n, size_t old_size) ColumnPtr ColumnFixedString::compress() const
{ {
size_t length = data.size() - old_size; size_t source_size = chars.size();
if (length < n)
{ /// Don't compress small blocks.
data.resize_fill(old_size + n); if (source_size < 4096) /// A wild guess.
} return ColumnCompressed::wrap(this->getPtr());
else if (length > n)
{ auto compressed = ColumnCompressed::compressBuffer(chars.data(), source_size, false);
data.resize_assume_reserved(old_size);
throw Exception("Too large value for FixedString(" + std::to_string(n) + ")", ErrorCodes::TOO_LARGE_STRING_SIZE); if (!compressed)
} return ColumnCompressed::wrap(this->getPtr());
size_t column_size = size();
return ColumnCompressed::create(column_size, compressed->size(),
[compressed = std::move(compressed), column_size, n = n]
{
size_t chars_size = n * column_size;
auto res = ColumnFixedString::create(n);
res->getChars().resize(chars_size);
ColumnCompressed::decompressBuffer(
compressed->data(), res->getChars().data(), compressed->size(), chars_size);
return res;
});
} }
} }

View File

@ -156,6 +156,8 @@ public:
void gather(ColumnGathererStream & gatherer_stream) override; void gather(ColumnGathererStream & gatherer_stream) override;
ColumnPtr compress() const override;
void reserve(size_t size) override void reserve(size_t size) override
{ {
chars.reserve(n * size); chars.reserve(n * size);
@ -182,8 +184,6 @@ public:
const Chars & getChars() const { return chars; } const Chars & getChars() const { return chars; }
size_t getN() const { return n; } size_t getN() const { return n; }
static void alignStringLength(ColumnFixedString::Chars & data, size_t n, size_t old_size);
}; };
} }

View File

@ -1,4 +1,5 @@
#include <Columns/ColumnMap.h> #include <Columns/ColumnMap.h>
#include <Columns/ColumnCompressed.h>
#include <Columns/IColumnImpl.h> #include <Columns/IColumnImpl.h>
#include <DataStreams/ColumnGathererStream.h> #include <DataStreams/ColumnGathererStream.h>
#include <IO/WriteBufferFromString.h> #include <IO/WriteBufferFromString.h>
@ -228,7 +229,21 @@ void ColumnMap::protect()
void ColumnMap::getExtremes(Field & min, Field & max) const void ColumnMap::getExtremes(Field & min, Field & max) const
{ {
nested->getExtremes(min, max); Field nested_min;
Field nested_max;
nested->getExtremes(nested_min, nested_max);
/// Convert result Array fields to Map fields because client expect min and max field to have type Map
Array nested_min_value = nested_min.get<Array>();
Array nested_max_value = nested_max.get<Array>();
Map map_min_value(nested_min_value.begin(), nested_min_value.end());
Map map_max_value(nested_max_value.begin(), nested_max_value.end());
min = std::move(map_min_value);
max = std::move(map_max_value);
} }
void ColumnMap::forEachSubcolumn(ColumnCallback callback) void ColumnMap::forEachSubcolumn(ColumnCallback callback)
@ -243,4 +258,13 @@ bool ColumnMap::structureEquals(const IColumn & rhs) const
return false; return false;
} }
ColumnPtr ColumnMap::compress() const
{
auto compressed = nested->compress();
return ColumnCompressed::create(size(), compressed->byteSize(), [compressed = std::move(compressed)]
{
return ColumnMap::create(compressed->decompress());
});
}
} }

View File

@ -91,6 +91,8 @@ public:
const ColumnTuple & getNestedData() const { return assert_cast<const ColumnTuple &>(getNestedColumn().getData()); } const ColumnTuple & getNestedData() const { return assert_cast<const ColumnTuple &>(getNestedColumn().getData()); }
ColumnTuple & getNestedData() { return assert_cast<ColumnTuple &>(getNestedColumn().getData()); } ColumnTuple & getNestedData() { return assert_cast<ColumnTuple &>(getNestedColumn().getData()); }
ColumnPtr compress() const override;
}; };
} }

View File

@ -7,6 +7,7 @@
#include <Columns/ColumnNullable.h> #include <Columns/ColumnNullable.h>
#include <Columns/ColumnConst.h> #include <Columns/ColumnConst.h>
#include <Columns/ColumnString.h> #include <Columns/ColumnString.h>
#include <Columns/ColumnCompressed.h>
#include <DataStreams/ColumnGathererStream.h> #include <DataStreams/ColumnGathererStream.h>
@ -511,6 +512,20 @@ void ColumnNullable::protect()
getNullMapColumn().protect(); getNullMapColumn().protect();
} }
ColumnPtr ColumnNullable::compress() const
{
ColumnPtr nested_compressed = nested_column->compress();
ColumnPtr null_map_compressed = null_map->compress();
size_t byte_size = nested_column->byteSize() + null_map->byteSize();
return ColumnCompressed::create(size(), byte_size,
[nested_column = std::move(nested_column), null_map = std::move(null_map)]
{
return ColumnNullable::create(nested_column->decompress(), null_map->decompress());
});
}
namespace namespace
{ {

View File

@ -117,6 +117,8 @@ public:
void gather(ColumnGathererStream & gatherer_stream) override; void gather(ColumnGathererStream & gatherer_stream) override;
ColumnPtr compress() const override;
void forEachSubcolumn(ColumnCallback callback) override void forEachSubcolumn(ColumnCallback callback) override
{ {
callback(nested_column); callback(nested_column);

View File

@ -2,6 +2,7 @@
#include <Columns/Collator.h> #include <Columns/Collator.h>
#include <Columns/ColumnsCommon.h> #include <Columns/ColumnsCommon.h>
#include <Columns/ColumnCompressed.h>
#include <DataStreams/ColumnGathererStream.h> #include <DataStreams/ColumnGathererStream.h>
#include <Common/Arena.h> #include <Common/Arena.h>
#include <Common/HashTable/Hash.h> #include <Common/HashTable/Hash.h>
@ -525,6 +526,47 @@ void ColumnString::getExtremes(Field & min, Field & max) const
} }
ColumnPtr ColumnString::compress() const
{
size_t source_chars_size = chars.size();
size_t source_offsets_size = offsets.size() * sizeof(Offset);
/// Don't compress small blocks.
if (source_chars_size < 4096) /// A wild guess.
return ColumnCompressed::wrap(this->getPtr());
auto chars_compressed = ColumnCompressed::compressBuffer(chars.data(), source_chars_size, false);
/// Return original column if not compressible.
if (!chars_compressed)
return ColumnCompressed::wrap(this->getPtr());
auto offsets_compressed = ColumnCompressed::compressBuffer(offsets.data(), source_offsets_size, true);
return ColumnCompressed::create(offsets.size(), chars_compressed->size() + offsets_compressed->size(),
[
chars_compressed = std::move(chars_compressed),
offsets_compressed = std::move(offsets_compressed),
source_chars_size,
source_offsets_elements = offsets.size()
]
{
auto res = ColumnString::create();
res->getChars().resize(source_chars_size);
res->getOffsets().resize(source_offsets_elements);
ColumnCompressed::decompressBuffer(
chars_compressed->data(), res->getChars().data(), chars_compressed->size(), source_chars_size);
ColumnCompressed::decompressBuffer(
offsets_compressed->data(), res->getOffsets().data(), offsets_compressed->size(), source_offsets_elements * sizeof(Offset));
return res;
});
}
int ColumnString::compareAtWithCollation(size_t n, size_t m, const IColumn & rhs_, int, const Collator & collator) const int ColumnString::compareAtWithCollation(size_t n, size_t m, const IColumn & rhs_, int, const Collator & collator) const
{ {
const ColumnString & rhs = assert_cast<const ColumnString &>(rhs_); const ColumnString & rhs = assert_cast<const ColumnString &>(rhs_);

View File

@ -261,6 +261,8 @@ public:
void gather(ColumnGathererStream & gatherer_stream) override; void gather(ColumnGathererStream & gatherer_stream) override;
ColumnPtr compress() const override;
void reserve(size_t n) override; void reserve(size_t n) override;
void getExtremes(Field & min, Field & max) const override; void getExtremes(Field & min, Field & max) const override;

View File

@ -1,6 +1,7 @@
#include <Columns/ColumnTuple.h> #include <Columns/ColumnTuple.h>
#include <Columns/IColumnImpl.h> #include <Columns/IColumnImpl.h>
#include <Columns/ColumnCompressed.h>
#include <Core/Field.h> #include <Core/Field.h>
#include <DataStreams/ColumnGathererStream.h> #include <DataStreams/ColumnGathererStream.h>
#include <IO/Operators.h> #include <IO/Operators.h>
@ -486,7 +487,7 @@ bool ColumnTuple::structureEquals(const IColumn & rhs) const
bool ColumnTuple::isCollationSupported() const bool ColumnTuple::isCollationSupported() const
{ {
for (const auto& column : columns) for (const auto & column : columns)
{ {
if (column->isCollationSupported()) if (column->isCollationSupported())
return true; return true;
@ -495,4 +496,25 @@ bool ColumnTuple::isCollationSupported() const
} }
ColumnPtr ColumnTuple::compress() const
{
size_t byte_size = 0;
Columns compressed;
compressed.reserve(columns.size());
for (const auto & column : columns)
{
auto compressed_column = column->compress();
byte_size += compressed_column->byteSize();
compressed.emplace_back(std::move(compressed_column));
}
return ColumnCompressed::create(size(), byte_size,
[compressed = std::move(compressed)]() mutable
{
for (auto & column : compressed)
column = column->decompress();
return ColumnTuple::create(compressed);
});
}
} }

View File

@ -89,6 +89,7 @@ public:
void forEachSubcolumn(ColumnCallback callback) override; void forEachSubcolumn(ColumnCallback callback) override;
bool structureEquals(const IColumn & rhs) const override; bool structureEquals(const IColumn & rhs) const override;
bool isCollationSupported() const override; bool isCollationSupported() const override;
ColumnPtr compress() const override;
size_t tupleSize() const { return columns.size(); } size_t tupleSize() const { return columns.size(); }

View File

@ -28,13 +28,18 @@ namespace ErrorCodes
extern const int ILLEGAL_COLUMN; extern const int ILLEGAL_COLUMN;
} }
/** Stores another column with unique values
* and also an index that allows to find position by value.
*
* This column is not used on it's own but only as implementation detail of ColumnLowCardinality.
*/
template <typename ColumnType> template <typename ColumnType>
class ColumnUnique final : public COWHelper<IColumnUnique, ColumnUnique<ColumnType>> class ColumnUnique final : public COWHelper<IColumnUnique, ColumnUnique<ColumnType>>
{ {
friend class COWHelper<IColumnUnique, ColumnUnique<ColumnType>>; friend class COWHelper<IColumnUnique, ColumnUnique<ColumnType>>;
private: private:
explicit ColumnUnique(MutableColumnPtr && holder, bool is_nullable); ColumnUnique(MutableColumnPtr && holder, bool is_nullable);
explicit ColumnUnique(const IDataType & type); explicit ColumnUnique(const IDataType & type);
ColumnUnique(const ColumnUnique & other); ColumnUnique(const ColumnUnique & other);

View File

@ -2,6 +2,7 @@
#include <pdqsort.h> #include <pdqsort.h>
#include <Columns/ColumnsCommon.h> #include <Columns/ColumnsCommon.h>
#include <Columns/ColumnCompressed.h>
#include <DataStreams/ColumnGathererStream.h> #include <DataStreams/ColumnGathererStream.h>
#include <IO/WriteHelpers.h> #include <IO/WriteHelpers.h>
#include <Common/Arena.h> #include <Common/Arena.h>
@ -520,6 +521,33 @@ void ColumnVector<T>::getExtremes(Field & min, Field & max) const
max = NearestFieldType<T>(cur_max); max = NearestFieldType<T>(cur_max);
} }
#pragma GCC diagnostic ignored "-Wold-style-cast"
template <typename T>
ColumnPtr ColumnVector<T>::compress() const
{
size_t source_size = data.size() * sizeof(T);
/// Don't compress small blocks.
if (source_size < 4096) /// A wild guess.
return ColumnCompressed::wrap(this->getPtr());
auto compressed = ColumnCompressed::compressBuffer(data.data(), source_size, false);
if (!compressed)
return ColumnCompressed::wrap(this->getPtr());
return ColumnCompressed::create(data.size(), compressed->size(),
[compressed = std::move(compressed), column_size = data.size()]
{
auto res = ColumnVector<T>::create(column_size);
ColumnCompressed::decompressBuffer(
compressed->data(), res->getData().data(), compressed->size(), column_size * sizeof(T));
return res;
});
}
/// Explicit template instantiations - to avoid code bloat in headers. /// Explicit template instantiations - to avoid code bloat in headers.
template class ColumnVector<UInt8>; template class ColumnVector<UInt8>;
template class ColumnVector<UInt16>; template class ColumnVector<UInt16>;

Some files were not shown because too many files have changed in this diff Show More