mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-11-24 08:32:02 +00:00
Merge branch 'master' of https://github.com/ClickHouse/ClickHouse into brotli
This commit is contained in:
commit
991718d459
4
.github/codecov.yml
vendored
4
.github/codecov.yml
vendored
@ -1,5 +1,5 @@
|
||||
codecov:
|
||||
max_report_age: off
|
||||
max_report_age: "off"
|
||||
strict_yaml_branch: "master"
|
||||
|
||||
ignore:
|
||||
@ -14,4 +14,4 @@ ignore:
|
||||
comment: false
|
||||
|
||||
github_checks:
|
||||
annotations: false
|
||||
annotations: false
|
||||
|
38
.github/workflows/anchore-analysis.yml
vendored
38
.github/workflows/anchore-analysis.yml
vendored
@ -8,9 +8,9 @@
|
||||
|
||||
name: Docker Container Scan (clickhouse-server)
|
||||
|
||||
on:
|
||||
"on":
|
||||
pull_request:
|
||||
paths:
|
||||
paths:
|
||||
- docker/server/Dockerfile
|
||||
- .github/workflows/anchore-analysis.yml
|
||||
schedule:
|
||||
@ -20,20 +20,20 @@ jobs:
|
||||
Anchore-Build-Scan:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Checkout the code
|
||||
uses: actions/checkout@v2
|
||||
- name: Build the Docker image
|
||||
run: |
|
||||
cd docker/server
|
||||
perl -pi -e 's|=\$version||g' Dockerfile
|
||||
docker build . --file Dockerfile --tag localbuild/testimage:latest
|
||||
- name: Run the local Anchore scan action itself with GitHub Advanced Security code scanning integration enabled
|
||||
uses: anchore/scan-action@v2
|
||||
id: scan
|
||||
with:
|
||||
image: "localbuild/testimage:latest"
|
||||
acs-report-enable: true
|
||||
- name: Upload Anchore Scan Report
|
||||
uses: github/codeql-action/upload-sarif@v1
|
||||
with:
|
||||
sarif_file: ${{ steps.scan.outputs.sarif }}
|
||||
- name: Checkout the code
|
||||
uses: actions/checkout@v2
|
||||
- name: Build the Docker image
|
||||
run: |
|
||||
cd docker/server
|
||||
perl -pi -e 's|=\$version||g' Dockerfile
|
||||
docker build . --file Dockerfile --tag localbuild/testimage:latest
|
||||
- name: Run the local Anchore scan action itself with GitHub Advanced Security code scanning integration enabled
|
||||
uses: anchore/scan-action@v2
|
||||
id: scan
|
||||
with:
|
||||
image: "localbuild/testimage:latest"
|
||||
acs-report-enable: true
|
||||
- name: Upload Anchore Scan Report
|
||||
uses: github/codeql-action/upload-sarif@v1
|
||||
with:
|
||||
sarif_file: ${{ steps.scan.outputs.sarif }}
|
||||
|
16
.potato.yml
16
.potato.yml
@ -14,14 +14,14 @@ handlers:
|
||||
# The trigger for creating the Yandex.Tracker issue. When the specified event occurs, it transfers PR data to Yandex.Tracker.
|
||||
github:pullRequest:labeled:
|
||||
data:
|
||||
# The Yandex.Tracker queue to create the issue in. Each issue in Tracker belongs to one of the project queues.
|
||||
queue: CLICKHOUSEDOCS
|
||||
# The issue title.
|
||||
summary: '[Potato] Pull Request #{{pullRequest.number}}'
|
||||
# The issue description.
|
||||
description: >
|
||||
# The Yandex.Tracker queue to create the issue in. Each issue in Tracker belongs to one of the project queues.
|
||||
queue: CLICKHOUSEDOCS
|
||||
# The issue title.
|
||||
summary: '[Potato] Pull Request #{{pullRequest.number}}'
|
||||
# The issue description.
|
||||
description: >
|
||||
{{pullRequest.description}}
|
||||
|
||||
Ссылка на Pull Request: {{pullRequest.webUrl}}
|
||||
# The condition for creating the Yandex.Tracker issue.
|
||||
condition: eventPayload.labels.filter(label => ['pr-feature'].includes(label.name)).length
|
||||
# The condition for creating the Yandex.Tracker issue.
|
||||
condition: eventPayload.labels.filter(label => ['pr-feature'].includes(label.name)).length
|
||||
|
15
.yamllint
Normal file
15
.yamllint
Normal file
@ -0,0 +1,15 @@
|
||||
# vi: ft=yaml
|
||||
extends: default
|
||||
|
||||
rules:
|
||||
indentation:
|
||||
level: warning
|
||||
indent-sequences: consistent
|
||||
line-length:
|
||||
# there are some bash -c "", so this is OK
|
||||
max: 300
|
||||
level: warning
|
||||
comments:
|
||||
min-spaces-from-content: 1
|
||||
document-start:
|
||||
present: false
|
@ -7,6 +7,7 @@
|
||||
#include <ctime>
|
||||
#include <string>
|
||||
|
||||
|
||||
#define DATE_LUT_MAX (0xFFFFFFFFU - 86400)
|
||||
#define DATE_LUT_MAX_DAY_NUM (0xFFFFFFFFU / 86400)
|
||||
/// Table size is bigger than DATE_LUT_MAX_DAY_NUM to fill all indices within UInt16 range: this allows to remove extra check.
|
||||
@ -249,7 +250,7 @@ public:
|
||||
{
|
||||
DayNum index = findIndex(t);
|
||||
|
||||
if (unlikely(index == 0))
|
||||
if (unlikely(index == 0 || index > DATE_LUT_MAX_DAY_NUM))
|
||||
return t + offset_at_start_of_epoch;
|
||||
|
||||
time_t res = t - lut[index].date;
|
||||
@ -264,18 +265,18 @@ public:
|
||||
{
|
||||
DayNum index = findIndex(t);
|
||||
|
||||
/// If it is not 1970 year (findIndex found nothing appropriate),
|
||||
/// than limit number of hours to avoid insane results like 1970-01-01 89:28:15
|
||||
if (unlikely(index == 0))
|
||||
/// If it is overflow case,
|
||||
/// then limit number of hours to avoid insane results like 1970-01-01 89:28:15
|
||||
if (unlikely(index == 0 || index > DATE_LUT_MAX_DAY_NUM))
|
||||
return static_cast<unsigned>((t + offset_at_start_of_epoch) / 3600) % 24;
|
||||
|
||||
time_t res = t - lut[index].date;
|
||||
time_t time = t - lut[index].date;
|
||||
|
||||
/// Data is cleaned to avoid possibility of underflow.
|
||||
if (res >= lut[index].time_at_offset_change)
|
||||
res += lut[index].amount_of_offset_change;
|
||||
if (time >= lut[index].time_at_offset_change)
|
||||
time += lut[index].amount_of_offset_change;
|
||||
|
||||
return res / 3600;
|
||||
unsigned res = time / 3600;
|
||||
return res <= 23 ? res : 0;
|
||||
}
|
||||
|
||||
/** Calculating offset from UTC in seconds.
|
||||
@ -314,12 +315,12 @@ public:
|
||||
* each minute, with added or subtracted leap second, spans exactly 60 unix timestamps.
|
||||
*/
|
||||
|
||||
inline unsigned toSecond(time_t t) const { return t % 60; }
|
||||
inline unsigned toSecond(time_t t) const { return UInt32(t) % 60; }
|
||||
|
||||
inline unsigned toMinute(time_t t) const
|
||||
{
|
||||
if (offset_is_whole_number_of_hours_everytime)
|
||||
return (t / 60) % 60;
|
||||
return (UInt32(t) / 60) % 60;
|
||||
|
||||
UInt32 date = find(t).date;
|
||||
return (UInt32(t) - date) / 60 % 60;
|
||||
@ -555,9 +556,7 @@ public:
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* check and change mode to effective
|
||||
*/
|
||||
/// Check and change mode to effective.
|
||||
inline UInt8 check_week_mode(UInt8 mode) const
|
||||
{
|
||||
UInt8 week_format = (mode & 7);
|
||||
@ -566,10 +565,9 @@ public:
|
||||
return week_format;
|
||||
}
|
||||
|
||||
/*
|
||||
* Calc weekday from d
|
||||
* Returns 0 for monday, 1 for tuesday ...
|
||||
*/
|
||||
/** Calculate weekday from d.
|
||||
* Returns 0 for monday, 1 for tuesday...
|
||||
*/
|
||||
inline unsigned calc_weekday(DayNum d, bool sunday_first_day_of_week) const
|
||||
{
|
||||
if (!sunday_first_day_of_week)
|
||||
@ -578,7 +576,7 @@ public:
|
||||
return toDayOfWeek(DayNum(d + 1)) - 1;
|
||||
}
|
||||
|
||||
/* Calc days in one year. */
|
||||
/// Calculate days in one year.
|
||||
inline unsigned calc_days_in_year(UInt16 year) const
|
||||
{
|
||||
return ((year & 3) == 0 && (year % 100 || (year % 400 == 0 && year)) ? 366 : 365);
|
||||
|
@ -6,6 +6,25 @@
|
||||
|
||||
namespace common
|
||||
{
|
||||
/// Multiply and ignore overflow.
|
||||
template <typename T1, typename T2>
|
||||
inline auto NO_SANITIZE_UNDEFINED mulIgnoreOverflow(T1 x, T2 y)
|
||||
{
|
||||
return x * y;
|
||||
}
|
||||
|
||||
template <typename T1, typename T2>
|
||||
inline auto NO_SANITIZE_UNDEFINED addIgnoreOverflow(T1 x, T2 y)
|
||||
{
|
||||
return x + y;
|
||||
}
|
||||
|
||||
template <typename T1, typename T2>
|
||||
inline auto NO_SANITIZE_UNDEFINED subIgnoreOverflow(T1 x, T2 y)
|
||||
{
|
||||
return x - y;
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
inline bool addOverflow(T x, T y, T & res)
|
||||
{
|
||||
@ -35,14 +54,14 @@ namespace common
|
||||
{
|
||||
static constexpr __int128 min_int128 = minInt128();
|
||||
static constexpr __int128 max_int128 = maxInt128();
|
||||
res = x + y;
|
||||
res = addIgnoreOverflow(x, y);
|
||||
return (y > 0 && x > max_int128 - y) || (y < 0 && x < min_int128 - y);
|
||||
}
|
||||
|
||||
template <>
|
||||
inline bool addOverflow(wInt256 x, wInt256 y, wInt256 & res)
|
||||
{
|
||||
res = x + y;
|
||||
res = addIgnoreOverflow(x, y);
|
||||
return (y > 0 && x > std::numeric_limits<wInt256>::max() - y) ||
|
||||
(y < 0 && x < std::numeric_limits<wInt256>::min() - y);
|
||||
}
|
||||
@ -50,7 +69,7 @@ namespace common
|
||||
template <>
|
||||
inline bool addOverflow(wUInt256 x, wUInt256 y, wUInt256 & res)
|
||||
{
|
||||
res = x + y;
|
||||
res = addIgnoreOverflow(x, y);
|
||||
return x > std::numeric_limits<wUInt256>::max() - y;
|
||||
}
|
||||
|
||||
@ -83,14 +102,14 @@ namespace common
|
||||
{
|
||||
static constexpr __int128 min_int128 = minInt128();
|
||||
static constexpr __int128 max_int128 = maxInt128();
|
||||
res = x - y;
|
||||
res = subIgnoreOverflow(x, y);
|
||||
return (y < 0 && x > max_int128 + y) || (y > 0 && x < min_int128 + y);
|
||||
}
|
||||
|
||||
template <>
|
||||
inline bool subOverflow(wInt256 x, wInt256 y, wInt256 & res)
|
||||
{
|
||||
res = x - y;
|
||||
res = subIgnoreOverflow(x, y);
|
||||
return (y < 0 && x > std::numeric_limits<wInt256>::max() + y) ||
|
||||
(y > 0 && x < std::numeric_limits<wInt256>::min() + y);
|
||||
}
|
||||
@ -98,7 +117,7 @@ namespace common
|
||||
template <>
|
||||
inline bool subOverflow(wUInt256 x, wUInt256 y, wUInt256 & res)
|
||||
{
|
||||
res = x - y;
|
||||
res = subIgnoreOverflow(x, y);
|
||||
return x < y;
|
||||
}
|
||||
|
||||
@ -129,40 +148,33 @@ namespace common
|
||||
template <>
|
||||
inline bool mulOverflow(__int128 x, __int128 y, __int128 & res)
|
||||
{
|
||||
res = static_cast<unsigned __int128>(x) * static_cast<unsigned __int128>(y); /// Avoid signed integer overflow.
|
||||
res = mulIgnoreOverflow(x, y);
|
||||
if (!x || !y)
|
||||
return false;
|
||||
|
||||
unsigned __int128 a = (x > 0) ? x : -x;
|
||||
unsigned __int128 b = (y > 0) ? y : -y;
|
||||
return (a * b) / b != a;
|
||||
return mulIgnoreOverflow(a, b) / b != a;
|
||||
}
|
||||
|
||||
template <>
|
||||
inline bool mulOverflow(wInt256 x, wInt256 y, wInt256 & res)
|
||||
{
|
||||
res = x * y;
|
||||
res = mulIgnoreOverflow(x, y);
|
||||
if (!x || !y)
|
||||
return false;
|
||||
|
||||
wInt256 a = (x > 0) ? x : -x;
|
||||
wInt256 b = (y > 0) ? y : -y;
|
||||
return (a * b) / b != a;
|
||||
return mulIgnoreOverflow(a, b) / b != a;
|
||||
}
|
||||
|
||||
template <>
|
||||
inline bool mulOverflow(wUInt256 x, wUInt256 y, wUInt256 & res)
|
||||
{
|
||||
res = x * y;
|
||||
res = mulIgnoreOverflow(x, y);
|
||||
if (!x || !y)
|
||||
return false;
|
||||
return (x * y) / y != x;
|
||||
}
|
||||
|
||||
/// Multiply and ignore overflow.
|
||||
template <typename T1, typename T2>
|
||||
inline auto NO_SANITIZE_UNDEFINED mulIgnoreOverflow(T1 x, T2 y)
|
||||
{
|
||||
return x * y;
|
||||
return res / y != x;
|
||||
}
|
||||
}
|
||||
|
@ -1,5 +1,20 @@
|
||||
#pragma once
|
||||
|
||||
/// __has_feature supported only by clang.
|
||||
///
|
||||
/// But libcxx/libcxxabi overrides it to 0,
|
||||
/// thus the checks for __has_feature will be wrong.
|
||||
///
|
||||
/// NOTE:
|
||||
/// - __has_feature cannot be simply undefined,
|
||||
/// since this will be broken if some C++ header will be included after
|
||||
/// including <common/defines.h>
|
||||
/// - it should not have fallback to 0,
|
||||
/// since this may create false-positive detection (common problem)
|
||||
#if defined(__clang__) && defined(__has_feature)
|
||||
# define ch_has_feature __has_feature
|
||||
#endif
|
||||
|
||||
#if defined(_MSC_VER)
|
||||
# if !defined(likely)
|
||||
# define likely(x) (x)
|
||||
@ -32,8 +47,8 @@
|
||||
|
||||
/// Check for presence of address sanitizer
|
||||
#if !defined(ADDRESS_SANITIZER)
|
||||
# if defined(__has_feature)
|
||||
# if __has_feature(address_sanitizer)
|
||||
# if defined(ch_has_feature)
|
||||
# if ch_has_feature(address_sanitizer)
|
||||
# define ADDRESS_SANITIZER 1
|
||||
# endif
|
||||
# elif defined(__SANITIZE_ADDRESS__)
|
||||
@ -42,8 +57,8 @@
|
||||
#endif
|
||||
|
||||
#if !defined(THREAD_SANITIZER)
|
||||
# if defined(__has_feature)
|
||||
# if __has_feature(thread_sanitizer)
|
||||
# if defined(ch_has_feature)
|
||||
# if ch_has_feature(thread_sanitizer)
|
||||
# define THREAD_SANITIZER 1
|
||||
# endif
|
||||
# elif defined(__SANITIZE_THREAD__)
|
||||
@ -52,8 +67,8 @@
|
||||
#endif
|
||||
|
||||
#if !defined(MEMORY_SANITIZER)
|
||||
# if defined(__has_feature)
|
||||
# if __has_feature(memory_sanitizer)
|
||||
# if defined(ch_has_feature)
|
||||
# if ch_has_feature(memory_sanitizer)
|
||||
# define MEMORY_SANITIZER 1
|
||||
# endif
|
||||
# elif defined(__MEMORY_SANITIZER__)
|
||||
|
@ -15,11 +15,11 @@
|
||||
#endif
|
||||
|
||||
#define __msan_unpoison(X, Y) // NOLINT
|
||||
#if defined(__has_feature)
|
||||
# if __has_feature(memory_sanitizer)
|
||||
# undef __msan_unpoison
|
||||
# include <sanitizer/msan_interface.h>
|
||||
# endif
|
||||
#if defined(ch_has_feature)
|
||||
# if ch_has_feature(memory_sanitizer)
|
||||
# undef __msan_unpoison
|
||||
# include <sanitizer/msan_interface.h>
|
||||
# endif
|
||||
#endif
|
||||
|
||||
#include <link.h>
|
||||
|
@ -416,7 +416,9 @@ static void sanitizerDeathCallback()
|
||||
else
|
||||
log_message = "Terminate called without an active exception";
|
||||
|
||||
static const size_t buf_size = 1024;
|
||||
/// POSIX.1 says that write(2)s of less than PIPE_BUF bytes must be atomic - man 7 pipe
|
||||
/// And the buffer should not be too small because our exception messages can be large.
|
||||
static constexpr size_t buf_size = PIPE_BUF;
|
||||
|
||||
if (log_message.size() > buf_size - 16)
|
||||
log_message.resize(buf_size - 16);
|
||||
|
@ -51,10 +51,11 @@ Connection::Connection(
|
||||
const char* ssl_key,
|
||||
unsigned timeout,
|
||||
unsigned rw_timeout,
|
||||
bool enable_local_infile)
|
||||
bool enable_local_infile,
|
||||
bool opt_reconnect)
|
||||
: Connection()
|
||||
{
|
||||
connect(db, server, user, password, port, socket, ssl_ca, ssl_cert, ssl_key, timeout, rw_timeout, enable_local_infile);
|
||||
connect(db, server, user, password, port, socket, ssl_ca, ssl_cert, ssl_key, timeout, rw_timeout, enable_local_infile, opt_reconnect);
|
||||
}
|
||||
|
||||
Connection::Connection(const std::string & config_name)
|
||||
@ -80,7 +81,8 @@ void Connection::connect(const char* db,
|
||||
const char * ssl_key,
|
||||
unsigned timeout,
|
||||
unsigned rw_timeout,
|
||||
bool enable_local_infile)
|
||||
bool enable_local_infile,
|
||||
bool opt_reconnect)
|
||||
{
|
||||
if (is_connected)
|
||||
disconnect();
|
||||
@ -104,9 +106,8 @@ void Connection::connect(const char* db,
|
||||
if (mysql_options(driver.get(), MYSQL_OPT_LOCAL_INFILE, &enable_local_infile_arg))
|
||||
throw ConnectionFailed(errorMessage(driver.get()), mysql_errno(driver.get()));
|
||||
|
||||
/// Enables auto-reconnect.
|
||||
bool reconnect = true;
|
||||
if (mysql_options(driver.get(), MYSQL_OPT_RECONNECT, reinterpret_cast<const char *>(&reconnect)))
|
||||
/// See C API Developer Guide: Automatic Reconnection Control
|
||||
if (mysql_options(driver.get(), MYSQL_OPT_RECONNECT, reinterpret_cast<const char *>(&opt_reconnect)))
|
||||
throw ConnectionFailed(errorMessage(driver.get()), mysql_errno(driver.get()));
|
||||
|
||||
/// Specifies particular ssl key and certificate if it needs
|
||||
|
@ -14,6 +14,8 @@
|
||||
|
||||
/// Disable LOAD DATA LOCAL INFILE because it is insecure
|
||||
#define MYSQLXX_DEFAULT_ENABLE_LOCAL_INFILE false
|
||||
/// See https://dev.mysql.com/doc/c-api/5.7/en/c-api-auto-reconnect.html
|
||||
#define MYSQLXX_DEFAULT_MYSQL_OPT_RECONNECT true
|
||||
|
||||
|
||||
namespace mysqlxx
|
||||
@ -76,7 +78,8 @@ public:
|
||||
const char * ssl_key = "",
|
||||
unsigned timeout = MYSQLXX_DEFAULT_TIMEOUT,
|
||||
unsigned rw_timeout = MYSQLXX_DEFAULT_RW_TIMEOUT,
|
||||
bool enable_local_infile = MYSQLXX_DEFAULT_ENABLE_LOCAL_INFILE);
|
||||
bool enable_local_infile = MYSQLXX_DEFAULT_ENABLE_LOCAL_INFILE,
|
||||
bool opt_reconnect = MYSQLXX_DEFAULT_MYSQL_OPT_RECONNECT);
|
||||
|
||||
/// Creates connection. Can be used if Poco::Util::Application is using.
|
||||
/// All settings will be got from config_name section of configuration.
|
||||
@ -96,7 +99,8 @@ public:
|
||||
const char* ssl_key,
|
||||
unsigned timeout = MYSQLXX_DEFAULT_TIMEOUT,
|
||||
unsigned rw_timeout = MYSQLXX_DEFAULT_RW_TIMEOUT,
|
||||
bool enable_local_infile = MYSQLXX_DEFAULT_ENABLE_LOCAL_INFILE);
|
||||
bool enable_local_infile = MYSQLXX_DEFAULT_ENABLE_LOCAL_INFILE,
|
||||
bool opt_reconnect = MYSQLXX_DEFAULT_MYSQL_OPT_RECONNECT);
|
||||
|
||||
void connect(const std::string & config_name)
|
||||
{
|
||||
@ -112,6 +116,7 @@ public:
|
||||
std::string ssl_cert = cfg.getString(config_name + ".ssl_cert", "");
|
||||
std::string ssl_key = cfg.getString(config_name + ".ssl_key", "");
|
||||
bool enable_local_infile = cfg.getBool(config_name + ".enable_local_infile", MYSQLXX_DEFAULT_ENABLE_LOCAL_INFILE);
|
||||
bool opt_reconnect = cfg.getBool(config_name + ".opt_reconnect", MYSQLXX_DEFAULT_MYSQL_OPT_RECONNECT);
|
||||
|
||||
unsigned timeout =
|
||||
cfg.getInt(config_name + ".connect_timeout",
|
||||
@ -135,7 +140,8 @@ public:
|
||||
ssl_key.c_str(),
|
||||
timeout,
|
||||
rw_timeout,
|
||||
enable_local_infile);
|
||||
enable_local_infile,
|
||||
opt_reconnect);
|
||||
}
|
||||
|
||||
/// If MySQL connection was established.
|
||||
|
@ -78,6 +78,9 @@ Pool::Pool(const Poco::Util::AbstractConfiguration & cfg, const std::string & co
|
||||
|
||||
enable_local_infile = cfg.getBool(config_name + ".enable_local_infile",
|
||||
cfg.getBool(parent_config_name + ".enable_local_infile", MYSQLXX_DEFAULT_ENABLE_LOCAL_INFILE));
|
||||
|
||||
opt_reconnect = cfg.getBool(config_name + ".opt_reconnect",
|
||||
cfg.getBool(parent_config_name + ".opt_reconnect", MYSQLXX_DEFAULT_MYSQL_OPT_RECONNECT));
|
||||
}
|
||||
else
|
||||
{
|
||||
@ -96,6 +99,8 @@ Pool::Pool(const Poco::Util::AbstractConfiguration & cfg, const std::string & co
|
||||
|
||||
enable_local_infile = cfg.getBool(
|
||||
config_name + ".enable_local_infile", MYSQLXX_DEFAULT_ENABLE_LOCAL_INFILE);
|
||||
|
||||
opt_reconnect = cfg.getBool(config_name + ".opt_reconnect", MYSQLXX_DEFAULT_MYSQL_OPT_RECONNECT);
|
||||
}
|
||||
|
||||
connect_timeout = cfg.getInt(config_name + ".connect_timeout",
|
||||
@ -233,7 +238,8 @@ void Pool::Entry::forceConnected() const
|
||||
pool->ssl_key.c_str(),
|
||||
pool->connect_timeout,
|
||||
pool->rw_timeout,
|
||||
pool->enable_local_infile);
|
||||
pool->enable_local_infile,
|
||||
pool->opt_reconnect);
|
||||
}
|
||||
}
|
||||
|
||||
@ -248,7 +254,7 @@ bool Pool::Entry::tryForceConnected() const
|
||||
if (prev_connection_id != current_connection_id)
|
||||
{
|
||||
auto & logger = Poco::Util::Application::instance().logger();
|
||||
logger.information("Connection to mysql server has been reestablished. Connection id changed: %lu -> %lu",
|
||||
logger.information("Reconnected to mysql server. Connection id changed: %lu -> %lu",
|
||||
prev_connection_id, current_connection_id);
|
||||
}
|
||||
return true;
|
||||
@ -294,7 +300,8 @@ Pool::Connection * Pool::allocConnection(bool dont_throw_if_failed_first_time)
|
||||
ssl_key.c_str(),
|
||||
connect_timeout,
|
||||
rw_timeout,
|
||||
enable_local_infile);
|
||||
enable_local_infile,
|
||||
opt_reconnect);
|
||||
}
|
||||
catch (mysqlxx::ConnectionFailed & e)
|
||||
{
|
||||
|
@ -165,10 +165,12 @@ public:
|
||||
unsigned rw_timeout_ = MYSQLXX_DEFAULT_RW_TIMEOUT,
|
||||
unsigned default_connections_ = MYSQLXX_POOL_DEFAULT_START_CONNECTIONS,
|
||||
unsigned max_connections_ = MYSQLXX_POOL_DEFAULT_MAX_CONNECTIONS,
|
||||
unsigned enable_local_infile_ = MYSQLXX_DEFAULT_ENABLE_LOCAL_INFILE)
|
||||
unsigned enable_local_infile_ = MYSQLXX_DEFAULT_ENABLE_LOCAL_INFILE,
|
||||
bool opt_reconnect_ = MYSQLXX_DEFAULT_MYSQL_OPT_RECONNECT)
|
||||
: default_connections(default_connections_), max_connections(max_connections_),
|
||||
db(db_), server(server_), user(user_), password(password_), port(port_), socket(socket_),
|
||||
connect_timeout(connect_timeout_), rw_timeout(rw_timeout_), enable_local_infile(enable_local_infile_) {}
|
||||
connect_timeout(connect_timeout_), rw_timeout(rw_timeout_), enable_local_infile(enable_local_infile_),
|
||||
opt_reconnect(opt_reconnect_) {}
|
||||
|
||||
Pool(const Pool & other)
|
||||
: default_connections{other.default_connections},
|
||||
@ -177,7 +179,7 @@ public:
|
||||
user{other.user}, password{other.password},
|
||||
port{other.port}, socket{other.socket},
|
||||
connect_timeout{other.connect_timeout}, rw_timeout{other.rw_timeout},
|
||||
enable_local_infile{other.enable_local_infile}
|
||||
enable_local_infile{other.enable_local_infile}, opt_reconnect(other.opt_reconnect)
|
||||
{}
|
||||
|
||||
Pool & operator=(const Pool &) = delete;
|
||||
@ -231,6 +233,7 @@ private:
|
||||
std::string ssl_cert;
|
||||
std::string ssl_key;
|
||||
bool enable_local_infile;
|
||||
bool opt_reconnect;
|
||||
|
||||
/// True if connection was established at least once.
|
||||
bool was_successful{false};
|
||||
|
@ -1,3 +1,8 @@
|
||||
#include <algorithm>
|
||||
#include <ctime>
|
||||
#include <random>
|
||||
#include <thread>
|
||||
|
||||
#include <mysqlxx/PoolWithFailover.h>
|
||||
|
||||
|
||||
@ -33,6 +38,19 @@ PoolWithFailover::PoolWithFailover(const Poco::Util::AbstractConfiguration & con
|
||||
std::make_shared<Pool>(config_, replica_name, default_connections_, max_connections_, config_name_.c_str()));
|
||||
}
|
||||
}
|
||||
|
||||
/// PoolWithFailover objects are stored in a cache inside PoolFactory.
|
||||
/// This cache is reset by ExternalDictionariesLoader after every SYSTEM RELOAD DICTIONAR{Y|IES}
|
||||
/// which triggers massive re-constructing of connection pools.
|
||||
/// The state of PRNGs like std::mt19937 is considered to be quite heavy
|
||||
/// thus here we attempt to optimize its construction.
|
||||
static thread_local std::mt19937 rnd_generator(
|
||||
std::hash<std::thread::id>{}(std::this_thread::get_id()) + std::clock());
|
||||
for (auto & [_, replicas] : replicas_by_priority)
|
||||
{
|
||||
if (replicas.size() > 1)
|
||||
std::shuffle(replicas.begin(), replicas.end(), rnd_generator);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
|
2
contrib/NuRaft
vendored
2
contrib/NuRaft
vendored
@ -1 +1 @@
|
||||
Subproject commit 7adf7ae33e7d5c307342431b577c8ab1025ee793
|
||||
Subproject commit 9a0d78de4b90546368d954b6434f0e9a823e8d80
|
@ -70,6 +70,7 @@ function start_server
|
||||
--path "$FASTTEST_DATA"
|
||||
--user_files_path "$FASTTEST_DATA/user_files"
|
||||
--top_level_domains_path "$FASTTEST_DATA/top_level_domains"
|
||||
--test_keeper_server.log_storage_path "$FASTTEST_DATA/coordination"
|
||||
)
|
||||
clickhouse-server "${opts[@]}" &>> "$FASTTEST_OUTPUT/server.log" &
|
||||
server_pid=$!
|
||||
@ -107,6 +108,18 @@ function start_server
|
||||
fi
|
||||
|
||||
echo "ClickHouse server pid '$server_pid' started and responded"
|
||||
|
||||
echo "
|
||||
handle all noprint
|
||||
handle SIGSEGV stop print
|
||||
handle SIGBUS stop print
|
||||
handle SIGABRT stop print
|
||||
continue
|
||||
thread apply all backtrace
|
||||
continue
|
||||
" > script.gdb
|
||||
|
||||
gdb -batch -command script.gdb -p "$server_pid" &
|
||||
}
|
||||
|
||||
function clone_root
|
||||
@ -327,7 +340,7 @@ function run_tests
|
||||
# Look at DistributedFilesToInsert, so cannot run in parallel.
|
||||
01460_DistributedFilesToInsert
|
||||
|
||||
01541_max_memory_usage_for_user
|
||||
01541_max_memory_usage_for_user_long
|
||||
|
||||
# Require python libraries like scipy, pandas and numpy
|
||||
01322_ttest_scipy
|
||||
@ -363,7 +376,7 @@ function run_tests
|
||||
stop_server ||:
|
||||
|
||||
# Clean the data so that there is no interference from the previous test run.
|
||||
rm -rf "$FASTTEST_DATA"/{{meta,}data,user_files} ||:
|
||||
rm -rf "$FASTTEST_DATA"/{{meta,}data,user_files,coordination} ||:
|
||||
|
||||
start_server
|
||||
|
||||
|
@ -4,4 +4,4 @@ services:
|
||||
image: cassandra
|
||||
restart: always
|
||||
ports:
|
||||
- 9043:9042
|
||||
- 9043:9042
|
||||
|
@ -5,6 +5,6 @@ services:
|
||||
hostname: hdfs1
|
||||
restart: always
|
||||
ports:
|
||||
- 50075:50075
|
||||
- 50070:50070
|
||||
- 50075:50075
|
||||
- 50070:50070
|
||||
entrypoint: /etc/bootstrap.sh -d
|
||||
|
@ -5,42 +5,42 @@ services:
|
||||
image: zookeeper:3.4.9
|
||||
hostname: kafka_zookeeper
|
||||
environment:
|
||||
ZOO_MY_ID: 1
|
||||
ZOO_PORT: 2181
|
||||
ZOO_SERVERS: server.1=kafka_zookeeper:2888:3888
|
||||
ZOO_MY_ID: 1
|
||||
ZOO_PORT: 2181
|
||||
ZOO_SERVERS: server.1=kafka_zookeeper:2888:3888
|
||||
security_opt:
|
||||
- label:disable
|
||||
- label:disable
|
||||
|
||||
kafka1:
|
||||
image: confluentinc/cp-kafka:5.2.0
|
||||
hostname: kafka1
|
||||
ports:
|
||||
- "9092:9092"
|
||||
- "9092:9092"
|
||||
environment:
|
||||
KAFKA_ADVERTISED_LISTENERS: INSIDE://localhost:9092,OUTSIDE://kafka1:19092
|
||||
KAFKA_LISTENERS: INSIDE://:9092,OUTSIDE://:19092
|
||||
KAFKA_LISTENER_SECURITY_PROTOCOL_MAP: INSIDE:PLAINTEXT,OUTSIDE:PLAINTEXT
|
||||
KAFKA_INTER_BROKER_LISTENER_NAME: INSIDE
|
||||
KAFKA_BROKER_ID: 1
|
||||
KAFKA_ZOOKEEPER_CONNECT: "kafka_zookeeper:2181"
|
||||
KAFKA_LOG4J_LOGGERS: "kafka.controller=INFO,kafka.producer.async.DefaultEventHandler=INFO,state.change.logger=INFO"
|
||||
KAFKA_OFFSETS_TOPIC_REPLICATION_FACTOR: 1
|
||||
KAFKA_ADVERTISED_LISTENERS: INSIDE://localhost:9092,OUTSIDE://kafka1:19092
|
||||
KAFKA_LISTENERS: INSIDE://:9092,OUTSIDE://:19092
|
||||
KAFKA_LISTENER_SECURITY_PROTOCOL_MAP: INSIDE:PLAINTEXT,OUTSIDE:PLAINTEXT
|
||||
KAFKA_INTER_BROKER_LISTENER_NAME: INSIDE
|
||||
KAFKA_BROKER_ID: 1
|
||||
KAFKA_ZOOKEEPER_CONNECT: "kafka_zookeeper:2181"
|
||||
KAFKA_LOG4J_LOGGERS: "kafka.controller=INFO,kafka.producer.async.DefaultEventHandler=INFO,state.change.logger=INFO"
|
||||
KAFKA_OFFSETS_TOPIC_REPLICATION_FACTOR: 1
|
||||
depends_on:
|
||||
- kafka_zookeeper
|
||||
- kafka_zookeeper
|
||||
security_opt:
|
||||
- label:disable
|
||||
- label:disable
|
||||
|
||||
schema-registry:
|
||||
image: confluentinc/cp-schema-registry:5.2.0
|
||||
hostname: schema-registry
|
||||
ports:
|
||||
- "8081:8081"
|
||||
- "8081:8081"
|
||||
environment:
|
||||
SCHEMA_REGISTRY_HOST_NAME: schema-registry
|
||||
SCHEMA_REGISTRY_KAFKASTORE_SECURITY_PROTOCOL: PLAINTEXT
|
||||
SCHEMA_REGISTRY_KAFKASTORE_BOOTSTRAP_SERVERS: PLAINTEXT://kafka1:19092
|
||||
SCHEMA_REGISTRY_HOST_NAME: schema-registry
|
||||
SCHEMA_REGISTRY_KAFKASTORE_SECURITY_PROTOCOL: PLAINTEXT
|
||||
SCHEMA_REGISTRY_KAFKASTORE_BOOTSTRAP_SERVERS: PLAINTEXT://kafka1:19092
|
||||
depends_on:
|
||||
- kafka_zookeeper
|
||||
- kafka1
|
||||
- kafka_zookeeper
|
||||
- kafka1
|
||||
security_opt:
|
||||
- label:disable
|
||||
- label:disable
|
||||
|
@ -8,22 +8,22 @@ services:
|
||||
hostname: kerberizedhdfs1
|
||||
restart: always
|
||||
volumes:
|
||||
- ${KERBERIZED_HDFS_DIR}/../../hdfs_configs/bootstrap.sh:/etc/bootstrap.sh:ro
|
||||
- ${KERBERIZED_HDFS_DIR}/secrets:/usr/local/hadoop/etc/hadoop/conf
|
||||
- ${KERBERIZED_HDFS_DIR}/secrets/krb_long.conf:/etc/krb5.conf:ro
|
||||
- ${KERBERIZED_HDFS_DIR}/../../hdfs_configs/bootstrap.sh:/etc/bootstrap.sh:ro
|
||||
- ${KERBERIZED_HDFS_DIR}/secrets:/usr/local/hadoop/etc/hadoop/conf
|
||||
- ${KERBERIZED_HDFS_DIR}/secrets/krb_long.conf:/etc/krb5.conf:ro
|
||||
ports:
|
||||
- 1006:1006
|
||||
- 50070:50070
|
||||
- 9010:9010
|
||||
depends_on:
|
||||
- hdfskerberos
|
||||
- hdfskerberos
|
||||
entrypoint: /etc/bootstrap.sh -d
|
||||
|
||||
hdfskerberos:
|
||||
image: yandex/clickhouse-kerberos-kdc:${DOCKER_KERBEROS_KDC_TAG}
|
||||
hostname: hdfskerberos
|
||||
volumes:
|
||||
- ${KERBERIZED_HDFS_DIR}/secrets:/tmp/keytab
|
||||
- ${KERBERIZED_HDFS_DIR}/../../kerberos_image_config.sh:/config.sh
|
||||
- /dev/urandom:/dev/random
|
||||
- ${KERBERIZED_HDFS_DIR}/secrets:/tmp/keytab
|
||||
- ${KERBERIZED_HDFS_DIR}/../../kerberos_image_config.sh:/config.sh
|
||||
- /dev/urandom:/dev/random
|
||||
ports: [88, 749]
|
||||
|
@ -6,54 +6,54 @@ services:
|
||||
# restart: always
|
||||
hostname: kafka_kerberized_zookeeper
|
||||
environment:
|
||||
ZOOKEEPER_SERVER_ID: 1
|
||||
ZOOKEEPER_CLIENT_PORT: 2181
|
||||
ZOOKEEPER_SERVERS: "kafka_kerberized_zookeeper:2888:3888"
|
||||
KAFKA_OPTS: "-Djava.security.auth.login.config=/etc/kafka/secrets/zookeeper_jaas.conf -Djava.security.krb5.conf=/etc/kafka/secrets/krb.conf -Dzookeeper.authProvider.1=org.apache.zookeeper.server.auth.SASLAuthenticationProvider -Dsun.security.krb5.debug=true"
|
||||
ZOOKEEPER_SERVER_ID: 1
|
||||
ZOOKEEPER_CLIENT_PORT: 2181
|
||||
ZOOKEEPER_SERVERS: "kafka_kerberized_zookeeper:2888:3888"
|
||||
KAFKA_OPTS: "-Djava.security.auth.login.config=/etc/kafka/secrets/zookeeper_jaas.conf -Djava.security.krb5.conf=/etc/kafka/secrets/krb.conf -Dzookeeper.authProvider.1=org.apache.zookeeper.server.auth.SASLAuthenticationProvider -Dsun.security.krb5.debug=true"
|
||||
volumes:
|
||||
- ${KERBERIZED_KAFKA_DIR}/secrets:/etc/kafka/secrets
|
||||
- /dev/urandom:/dev/random
|
||||
- ${KERBERIZED_KAFKA_DIR}/secrets:/etc/kafka/secrets
|
||||
- /dev/urandom:/dev/random
|
||||
depends_on:
|
||||
- kafka_kerberos
|
||||
- kafka_kerberos
|
||||
security_opt:
|
||||
- label:disable
|
||||
- label:disable
|
||||
|
||||
kerberized_kafka1:
|
||||
image: confluentinc/cp-kafka:5.2.0
|
||||
# restart: always
|
||||
hostname: kerberized_kafka1
|
||||
ports:
|
||||
- "9092:9092"
|
||||
- "9093:9093"
|
||||
- "9092:9092"
|
||||
- "9093:9093"
|
||||
environment:
|
||||
KAFKA_LISTENERS: OUTSIDE://:19092,UNSECURED_OUTSIDE://:19093,UNSECURED_INSIDE://:9093
|
||||
KAFKA_ADVERTISED_LISTENERS: OUTSIDE://kerberized_kafka1:19092,UNSECURED_OUTSIDE://kerberized_kafka1:19093,UNSECURED_INSIDE://localhost:9093
|
||||
# KAFKA_LISTENERS: INSIDE://kerberized_kafka1:9092,OUTSIDE://kerberized_kafka1:19092
|
||||
# KAFKA_ADVERTISED_LISTENERS: INSIDE://localhost:9092,OUTSIDE://kerberized_kafka1:19092
|
||||
KAFKA_SASL_MECHANISM_INTER_BROKER_PROTOCOL: GSSAPI
|
||||
KAFKA_SASL_ENABLED_MECHANISMS: GSSAPI
|
||||
KAFKA_SASL_KERBEROS_SERVICE_NAME: kafka
|
||||
KAFKA_LISTENER_SECURITY_PROTOCOL_MAP: OUTSIDE:SASL_PLAINTEXT,UNSECURED_OUTSIDE:PLAINTEXT,UNSECURED_INSIDE:PLAINTEXT,
|
||||
KAFKA_INTER_BROKER_LISTENER_NAME: OUTSIDE
|
||||
KAFKA_BROKER_ID: 1
|
||||
KAFKA_ZOOKEEPER_CONNECT: "kafka_kerberized_zookeeper:2181"
|
||||
KAFKA_LOG4J_LOGGERS: "kafka.controller=INFO,kafka.producer.async.DefaultEventHandler=INFO,state.change.logger=INFO"
|
||||
KAFKA_OFFSETS_TOPIC_REPLICATION_FACTOR: 1
|
||||
KAFKA_OPTS: "-Djava.security.auth.login.config=/etc/kafka/secrets/broker_jaas.conf -Djava.security.krb5.conf=/etc/kafka/secrets/krb.conf -Dsun.security.krb5.debug=true"
|
||||
KAFKA_LISTENERS: OUTSIDE://:19092,UNSECURED_OUTSIDE://:19093,UNSECURED_INSIDE://:9093
|
||||
KAFKA_ADVERTISED_LISTENERS: OUTSIDE://kerberized_kafka1:19092,UNSECURED_OUTSIDE://kerberized_kafka1:19093,UNSECURED_INSIDE://localhost:9093
|
||||
# KAFKA_LISTENERS: INSIDE://kerberized_kafka1:9092,OUTSIDE://kerberized_kafka1:19092
|
||||
# KAFKA_ADVERTISED_LISTENERS: INSIDE://localhost:9092,OUTSIDE://kerberized_kafka1:19092
|
||||
KAFKA_SASL_MECHANISM_INTER_BROKER_PROTOCOL: GSSAPI
|
||||
KAFKA_SASL_ENABLED_MECHANISMS: GSSAPI
|
||||
KAFKA_SASL_KERBEROS_SERVICE_NAME: kafka
|
||||
KAFKA_LISTENER_SECURITY_PROTOCOL_MAP: OUTSIDE:SASL_PLAINTEXT,UNSECURED_OUTSIDE:PLAINTEXT,UNSECURED_INSIDE:PLAINTEXT,
|
||||
KAFKA_INTER_BROKER_LISTENER_NAME: OUTSIDE
|
||||
KAFKA_BROKER_ID: 1
|
||||
KAFKA_ZOOKEEPER_CONNECT: "kafka_kerberized_zookeeper:2181"
|
||||
KAFKA_LOG4J_LOGGERS: "kafka.controller=INFO,kafka.producer.async.DefaultEventHandler=INFO,state.change.logger=INFO"
|
||||
KAFKA_OFFSETS_TOPIC_REPLICATION_FACTOR: 1
|
||||
KAFKA_OPTS: "-Djava.security.auth.login.config=/etc/kafka/secrets/broker_jaas.conf -Djava.security.krb5.conf=/etc/kafka/secrets/krb.conf -Dsun.security.krb5.debug=true"
|
||||
volumes:
|
||||
- ${KERBERIZED_KAFKA_DIR}/secrets:/etc/kafka/secrets
|
||||
- /dev/urandom:/dev/random
|
||||
- ${KERBERIZED_KAFKA_DIR}/secrets:/etc/kafka/secrets
|
||||
- /dev/urandom:/dev/random
|
||||
depends_on:
|
||||
- kafka_kerberized_zookeeper
|
||||
- kafka_kerberos
|
||||
- kafka_kerberized_zookeeper
|
||||
- kafka_kerberos
|
||||
security_opt:
|
||||
- label:disable
|
||||
- label:disable
|
||||
|
||||
kafka_kerberos:
|
||||
image: yandex/clickhouse-kerberos-kdc:${DOCKER_KERBEROS_KDC_TAG:-latest}
|
||||
hostname: kafka_kerberos
|
||||
volumes:
|
||||
- ${KERBERIZED_KAFKA_DIR}/secrets:/tmp/keytab
|
||||
- ${KERBERIZED_KAFKA_DIR}/../../kerberos_image_config.sh:/config.sh
|
||||
- /dev/urandom:/dev/random
|
||||
- ${KERBERIZED_KAFKA_DIR}/secrets:/tmp/keytab
|
||||
- ${KERBERIZED_KAFKA_DIR}/../../kerberos_image_config.sh:/config.sh
|
||||
- /dev/urandom:/dev/random
|
||||
ports: [88, 749]
|
||||
|
@ -7,5 +7,5 @@ services:
|
||||
MONGO_INITDB_ROOT_USERNAME: root
|
||||
MONGO_INITDB_ROOT_PASSWORD: clickhouse
|
||||
ports:
|
||||
- 27018:27017
|
||||
- 27018:27017
|
||||
command: --profile=2 --verbose
|
||||
|
@ -6,5 +6,5 @@ services:
|
||||
environment:
|
||||
MYSQL_ROOT_PASSWORD: clickhouse
|
||||
ports:
|
||||
- 3308:3306
|
||||
- 3308:3306
|
||||
command: --server_id=100 --log-bin='mysql-bin-1.log' --default-time-zone='+3:00' --gtid-mode="ON" --enforce-gtid-consistency
|
||||
|
@ -6,5 +6,9 @@ services:
|
||||
environment:
|
||||
MYSQL_ROOT_PASSWORD: clickhouse
|
||||
ports:
|
||||
- 3308:3306
|
||||
command: --server_id=100 --log-bin='mysql-bin-1.log' --default-time-zone='+3:00' --gtid-mode="ON" --enforce-gtid-consistency
|
||||
- 3308:3306
|
||||
command: --server_id=100 --log-bin='mysql-bin-1.log'
|
||||
--default-time-zone='+3:00'
|
||||
--gtid-mode="ON"
|
||||
--enforce-gtid-consistency
|
||||
--log-error-verbosity=3
|
||||
|
@ -6,5 +6,10 @@ services:
|
||||
environment:
|
||||
MYSQL_ROOT_PASSWORD: clickhouse
|
||||
ports:
|
||||
- 33308:3306
|
||||
command: --server_id=100 --log-bin='mysql-bin-1.log' --default_authentication_plugin='mysql_native_password' --default-time-zone='+3:00' --gtid-mode="ON" --enforce-gtid-consistency
|
||||
- 33308:3306
|
||||
command: --server_id=100 --log-bin='mysql-bin-1.log'
|
||||
--default_authentication_plugin='mysql_native_password'
|
||||
--default-time-zone='+3:00'
|
||||
--gtid-mode="ON"
|
||||
--enforce-gtid-consistency
|
||||
--log-error-verbosity=3
|
||||
|
@ -7,7 +7,7 @@ services:
|
||||
MYSQL_ALLOW_EMPTY_PASSWORD: 1
|
||||
command: --federated --socket /var/run/mysqld/mysqld.sock
|
||||
healthcheck:
|
||||
test: ["CMD", "mysqladmin" ,"ping", "-h", "localhost"]
|
||||
test: ["CMD", "mysqladmin", "ping", "-h", "localhost"]
|
||||
interval: 1s
|
||||
timeout: 2s
|
||||
retries: 100
|
||||
|
@ -11,4 +11,4 @@ services:
|
||||
ports:
|
||||
- "5433:5433"
|
||||
environment:
|
||||
POSTGRES_HOST_AUTH_METHOD: "trust"
|
||||
POSTGRES_HOST_AUTH_METHOD: "trust"
|
||||
|
@ -6,8 +6,8 @@ services:
|
||||
environment:
|
||||
POSTGRES_PASSWORD: mysecretpassword
|
||||
ports:
|
||||
- 5432:5432
|
||||
- 5432:5432
|
||||
networks:
|
||||
default:
|
||||
aliases:
|
||||
- postgre-sql.local
|
||||
default:
|
||||
aliases:
|
||||
- postgre-sql.local
|
||||
|
@ -4,5 +4,5 @@ services:
|
||||
image: redis
|
||||
restart: always
|
||||
ports:
|
||||
- 6380:6379
|
||||
- 6380:6379
|
||||
command: redis-server --requirepass "clickhouse" --databases 32
|
||||
|
@ -97,6 +97,7 @@ function configure
|
||||
rm -r right/db ||:
|
||||
rm -r db0/preprocessed_configs ||:
|
||||
rm -r db0/{data,metadata}/system ||:
|
||||
rm db0/status ||:
|
||||
cp -al db0/ left/db/
|
||||
cp -al db0/ right/db/
|
||||
}
|
||||
|
@ -60,4 +60,8 @@ fi
|
||||
# more idiologically correct.
|
||||
read -ra ADDITIONAL_OPTIONS <<< "${ADDITIONAL_OPTIONS:-}"
|
||||
|
||||
if [[ -n "$USE_DATABASE_REPLICATED" ]] && [[ "$USE_DATABASE_REPLICATED" -eq 1 ]]; then
|
||||
ADDITIONAL_OPTIONS+=('--replicated-database')
|
||||
fi
|
||||
|
||||
clickhouse-test --testname --shard --zookeeper --no-stateless --hung-check --print-time "$SKIP_LIST_OPT" "${ADDITIONAL_OPTIONS[@]}" "$SKIP_TESTS_OPTION" 2>&1 | ts '%Y-%m-%d %H:%M:%S' | tee test_output/test_result.txt
|
||||
|
@ -57,6 +57,10 @@ function run_tests()
|
||||
ADDITIONAL_OPTIONS+=('4')
|
||||
fi
|
||||
|
||||
if [[ -n "$USE_DATABASE_REPLICATED" ]] && [[ "$USE_DATABASE_REPLICATED" -eq 1 ]]; then
|
||||
ADDITIONAL_OPTIONS+=('--replicated-database')
|
||||
fi
|
||||
|
||||
clickhouse-test --testname --shard --zookeeper --hung-check --print-time \
|
||||
--test-runs "$NUM_TRIES" \
|
||||
"$SKIP_LIST_OPT" "${ADDITIONAL_OPTIONS[@]}" 2>&1 \
|
||||
|
@ -23,12 +23,15 @@ def get_options(i):
|
||||
if 0 < i:
|
||||
options += " --order=random"
|
||||
|
||||
if i % 2 == 1:
|
||||
if i % 3 == 1:
|
||||
options += " --db-engine=Ordinary"
|
||||
|
||||
if i % 3 == 2:
|
||||
options += ''' --db-engine="Replicated('/test/db/test_{}', 's1', 'r1')"'''.format(i)
|
||||
|
||||
# If database name is not specified, new database is created for each functional test.
|
||||
# Run some threads with one database for all tests.
|
||||
if i % 3 == 1:
|
||||
if i % 2 == 1:
|
||||
options += " --database=test_{}".format(i)
|
||||
|
||||
if i == 13:
|
||||
|
@ -1,7 +1,14 @@
|
||||
# docker build -t yandex/clickhouse-style-test .
|
||||
FROM ubuntu:20.04
|
||||
|
||||
RUN apt-get update && env DEBIAN_FRONTEND=noninteractive apt-get install --yes shellcheck libxml2-utils git python3-pip pylint && pip3 install codespell
|
||||
RUN apt-get update && env DEBIAN_FRONTEND=noninteractive apt-get install --yes \
|
||||
shellcheck \
|
||||
libxml2-utils \
|
||||
git \
|
||||
python3-pip \
|
||||
pylint \
|
||||
yamllint \
|
||||
&& pip3 install codespell
|
||||
|
||||
|
||||
# For |& syntax
|
||||
|
@ -66,7 +66,8 @@ SELECT * FROM file_engine_table
|
||||
|
||||
## Usage in ClickHouse-local {#usage-in-clickhouse-local}
|
||||
|
||||
In [clickhouse-local](../../../operations/utilities/clickhouse-local.md) File engine accepts file path in addition to `Format`. Default input/output streams can be specified using numeric or human-readable names like `0` or `stdin`, `1` or `stdout`.
|
||||
In [clickhouse-local](../../../operations/utilities/clickhouse-local.md) File engine accepts file path in addition to `Format`. Default input/output streams can be specified using numeric or human-readable names like `0` or `stdin`, `1` or `stdout`. It is possible to read and write compressed files based on an additional engine parameter or file extension (`gz`, `br` or `xz`).
|
||||
|
||||
**Example:**
|
||||
|
||||
``` bash
|
||||
|
@ -5,7 +5,7 @@ toc_title: Brown University Benchmark
|
||||
|
||||
# Brown University Benchmark
|
||||
|
||||
MgBench - A new analytical benchmark for machine-generated log data, [Andrew Crotty](http://cs.brown.edu/people/acrotty/).
|
||||
`MgBench` is a new analytical benchmark for machine-generated log data, [Andrew Crotty](http://cs.brown.edu/people/acrotty/).
|
||||
|
||||
Download the data:
|
||||
```
|
||||
@ -153,7 +153,7 @@ ORDER BY dt,
|
||||
hr;
|
||||
|
||||
|
||||
-- Q1.4: Over a 1-month period, how often was each server blocked on disk I/O?
|
||||
-- Q1.4: Over 1 month, how often was each server blocked on disk I/O?
|
||||
|
||||
SELECT machine_name,
|
||||
COUNT(*) AS spikes
|
||||
@ -301,7 +301,7 @@ WHERE event_type = 'temperature'
|
||||
AND log_time >= '2019-11-29 17:00:00.000';
|
||||
|
||||
|
||||
-- Q3.4: Over the past 6 months, how frequently was each door opened?
|
||||
-- Q3.4: Over the past 6 months, how frequently were each door opened?
|
||||
|
||||
SELECT device_name,
|
||||
device_floor,
|
||||
@ -412,3 +412,5 @@ ORDER BY yr,
|
||||
```
|
||||
|
||||
The data is also available for interactive queries in the [Playground](https://gh-api.clickhouse.tech/play?user=play), [example](https://gh-api.clickhouse.tech/play?user=play#U0VMRUNUIG1hY2hpbmVfbmFtZSwKICAgICAgIE1JTihjcHUpIEFTIGNwdV9taW4sCiAgICAgICBNQVgoY3B1KSBBUyBjcHVfbWF4LAogICAgICAgQVZHKGNwdSkgQVMgY3B1X2F2ZywKICAgICAgIE1JTihuZXRfaW4pIEFTIG5ldF9pbl9taW4sCiAgICAgICBNQVgobmV0X2luKSBBUyBuZXRfaW5fbWF4LAogICAgICAgQVZHKG5ldF9pbikgQVMgbmV0X2luX2F2ZywKICAgICAgIE1JTihuZXRfb3V0KSBBUyBuZXRfb3V0X21pbiwKICAgICAgIE1BWChuZXRfb3V0KSBBUyBuZXRfb3V0X21heCwKICAgICAgIEFWRyhuZXRfb3V0KSBBUyBuZXRfb3V0X2F2ZwpGUk9NICgKICBTRUxFQ1QgbWFjaGluZV9uYW1lLAogICAgICAgICBDT0FMRVNDRShjcHVfdXNlciwgMC4wKSBBUyBjcHUsCiAgICAgICAgIENPQUxFU0NFKGJ5dGVzX2luLCAwLjApIEFTIG5ldF9pbiwKICAgICAgICAgQ09BTEVTQ0UoYnl0ZXNfb3V0LCAwLjApIEFTIG5ldF9vdXQKICBGUk9NIG1nYmVuY2gubG9nczEKICBXSEVSRSBtYWNoaW5lX25hbWUgSU4gKCdhbmFuc2knLCdhcmFnb2cnLCd1cmQnKQogICAgQU5EIGxvZ190aW1lID49IFRJTUVTVEFNUCAnMjAxNy0wMS0xMSAwMDowMDowMCcKKSBBUyByCkdST1VQIEJZIG1hY2hpbmVfbmFtZQ==).
|
||||
|
||||
[Original article](https://clickhouse.tech/docs/en/getting_started/example_datasets/brown-benchmark/) <!--hide-->
|
||||
|
@ -148,28 +148,48 @@ $ echo 'DROP TABLE t' | curl 'http://localhost:8123/' --data-binary @-
|
||||
|
||||
For successful requests that don’t return a data table, an empty response body is returned.
|
||||
|
||||
You can use the internal ClickHouse compression format when transmitting data. The compressed data has a non-standard format, and you will need to use the special `clickhouse-compressor` program to work with it (it is installed with the `clickhouse-client` package). To increase the efficiency of data insertion, you can disable server-side checksum verification by using the [http_native_compression_disable_checksumming_on_decompress](../operations/settings/settings.md#settings-http_native_compression_disable_checksumming_on_decompress) setting.
|
||||
|
||||
If you specified `compress=1` in the URL, the server compresses the data it sends you.
|
||||
If you specified `decompress=1` in the URL, the server decompresses the same data that you pass in the `POST` method.
|
||||
## Compression {#compression}
|
||||
|
||||
You can also choose to use [HTTP compression](https://en.wikipedia.org/wiki/HTTP_compression). To send a compressed `POST` request, append the request header `Content-Encoding: compression_method`. In order for ClickHouse to compress the response, you must append `Accept-Encoding: compression_method`. ClickHouse supports `gzip`, `br`, and `deflate` [compression methods](https://en.wikipedia.org/wiki/HTTP_compression#Content-Encoding_tokens). To enable HTTP compression, you must use the ClickHouse [enable_http_compression](../operations/settings/settings.md#settings-enable_http_compression) setting. You can configure the data compression level in the [http_zlib_compression_level](#settings-http_zlib_compression_level) setting for all the compression methods.
|
||||
You can use compression to reduce network traffic when transmitting a large amount of data or for creating dumps that are immediately compressed.
|
||||
|
||||
You can use this to reduce network traffic when transmitting a large amount of data, or for creating dumps that are immediately compressed.
|
||||
You can use the internal ClickHouse compression format when transmitting data. The compressed data has a non-standard format, and you need `clickhouse-compressor` program to work with it. It is installed with the `clickhouse-client` package. To increase the efficiency of data insertion, you can disable server-side checksum verification by using the [http_native_compression_disable_checksumming_on_decompress](../operations/settings/settings.md#settings-http_native_compression_disable_checksumming_on_decompress) setting.
|
||||
|
||||
Examples of sending data with compression:
|
||||
If you specify `compress=1` in the URL, the server will compress the data it sends to you. If you specify `decompress=1` in the URL, the server will decompress the data which you pass in the `POST` method.
|
||||
|
||||
``` bash
|
||||
#Sending data to the server:
|
||||
$ curl -vsS "http://localhost:8123/?enable_http_compression=1" -d 'SELECT number FROM system.numbers LIMIT 10' -H 'Accept-Encoding: gzip'
|
||||
You can also choose to use [HTTP compression](https://en.wikipedia.org/wiki/HTTP_compression). ClickHouse supports the following [compression methods](https://en.wikipedia.org/wiki/HTTP_compression#Content-Encoding_tokens):
|
||||
|
||||
#Sending data to the client:
|
||||
$ echo "SELECT 1" | gzip -c | curl -sS --data-binary @- -H 'Content-Encoding: gzip' 'http://localhost:8123/'
|
||||
```
|
||||
- `gzip`
|
||||
- `br`
|
||||
- `deflate`
|
||||
- `xz`
|
||||
|
||||
To send a compressed `POST` request, append the request header `Content-Encoding: compression_method`.
|
||||
In order for ClickHouse to compress the response, enable compression with [enable_http_compression](../operations/settings/settings.md#settings-enable_http_compression) setting and append `Accept-Encoding: compression_method` header to the request. You can configure the data compression level in the [http_zlib_compression_level](../operations/settings/settings.md#settings-http_zlib_compression_level) setting for all compression methods.
|
||||
|
||||
!!! note "Note"
|
||||
Some HTTP clients might decompress data from the server by default (with `gzip` and `deflate`) and you might get decompressed data even if you use the compression settings correctly.
|
||||
|
||||
**Examples**
|
||||
|
||||
``` bash
|
||||
# Sending compressed data to the server
|
||||
$ echo "SELECT 1" | gzip -c | \
|
||||
curl -sS --data-binary @- -H 'Content-Encoding: gzip' 'http://localhost:8123/'
|
||||
```
|
||||
|
||||
``` bash
|
||||
# Receiving compressed data from the server
|
||||
$ curl -vsS "http://localhost:8123/?enable_http_compression=1" \
|
||||
-H 'Accept-Encoding: gzip' --output result.gz -d 'SELECT number FROM system.numbers LIMIT 3'
|
||||
$ zcat result.gz
|
||||
0
|
||||
1
|
||||
2
|
||||
```
|
||||
|
||||
## Default Database {#default-database}
|
||||
|
||||
You can use the ‘database’ URL parameter or the ‘X-ClickHouse-Database’ header to specify the default database.
|
||||
|
||||
``` bash
|
||||
|
@ -139,7 +139,7 @@ You can assign a quotas set for the user. For a detailed description of quotas c
|
||||
|
||||
### user_name/databases {#user-namedatabases}
|
||||
|
||||
In this section, you can you can limit rows that are returned by ClickHouse for `SELECT` queries made by the current user, thus implementing basic row-level security.
|
||||
In this section, you can limit rows that are returned by ClickHouse for `SELECT` queries made by the current user, thus implementing basic row-level security.
|
||||
|
||||
**Example**
|
||||
|
||||
|
@ -1104,7 +1104,7 @@ The maximum number of replicas for each shard when executing a query. In limited
|
||||
- the sampling key is an expression that is expensive to calculate
|
||||
- the cluster's latency distribution has a long tail, so that querying more servers increases the query's overall latency
|
||||
|
||||
In addition, this setting will produce incorrect results when joins or subqueries are involved, and all tables don't meet certain conditions. See [Distributed Subqueries and max_parallel_replicas](../../sql-reference/operators/in.md/#max_parallel_replica-subqueries) for more details.
|
||||
In addition, this setting will produce incorrect results when joins or subqueries are involved, and all tables don't meet certain conditions. See [Distributed Subqueries and max_parallel_replicas](../../sql-reference/operators/in.md#max_parallel_replica-subqueries) for more details.
|
||||
|
||||
## compile {#compile}
|
||||
|
||||
@ -2659,3 +2659,23 @@ Result:
|
||||
Note that this setting influences [Materialized view](../../sql-reference/statements/create/view.md#materialized) and [MaterializeMySQL](../../engines/database-engines/materialize-mysql.md) behaviour.
|
||||
|
||||
[Original article](https://clickhouse.tech/docs/en/operations/settings/settings/) <!-- hide -->
|
||||
|
||||
## engine_file_empty_if_not_exists {#engine-file-empty_if-not-exists}
|
||||
|
||||
Allows to select data from a file engine table without file.
|
||||
|
||||
Possible values:
|
||||
- 0 — `SELECT` throws exception.
|
||||
- 1 — `SELECT` returns empty result.
|
||||
|
||||
Default value: `0`.
|
||||
|
||||
## engine_file_truncate_on_insert {#engine-file-truncate-on-insert}
|
||||
|
||||
Enables or disables truncate before insert in file engine tables.
|
||||
|
||||
Possible values:
|
||||
- 0 — Disabled.
|
||||
- 1 — Enabled.
|
||||
|
||||
Default value: `0`.
|
||||
|
@ -52,15 +52,15 @@ Input table:
|
||||
Query:
|
||||
|
||||
``` sql
|
||||
SELECT argMax(user, salary), argMax(tuple(user, salary)) FROM salary;
|
||||
SELECT argMax(user, salary), argMax(tuple(user, salary), salary), argMax(tuple(user, salary)) FROM salary;
|
||||
```
|
||||
|
||||
Result:
|
||||
|
||||
``` text
|
||||
┌─argMax(user, salary)─┬─argMax(tuple(user, salary))─┐
|
||||
│ director │ ('director',5000) │
|
||||
└──────────────────────┴─────────────────────────────┘
|
||||
┌─argMax(user, salary)─┬─argMax(tuple(user, salary), salary)─┬─argMax(tuple(user, salary))─┐
|
||||
│ director │ ('director',5000) │ ('director',5000) │
|
||||
└──────────────────────┴─────────────────────────────────────┴─────────────────────────────┘
|
||||
```
|
||||
|
||||
[Original article](https://clickhouse.tech/docs/en/sql-reference/aggregate-functions/reference/argmax/) <!--hide-->
|
||||
|
@ -9,7 +9,7 @@ Calculates the arithmetic mean.
|
||||
**Syntax**
|
||||
|
||||
``` sql
|
||||
avgWeighted(x)
|
||||
avg(x)
|
||||
```
|
||||
|
||||
**Arguments**
|
||||
|
@ -32,6 +32,7 @@ The null hypothesis is that two populations are stochastically equal. Also one-s
|
||||
**Returned values**
|
||||
|
||||
[Tuple](../../../sql-reference/data-types/tuple.md) with two elements:
|
||||
|
||||
- calculated U-statistic. [Float64](../../../sql-reference/data-types/float.md).
|
||||
- calculated p-value. [Float64](../../../sql-reference/data-types/float.md).
|
||||
|
||||
|
@ -24,6 +24,7 @@ The null hypothesis is that means of populations are equal. Normal distribution
|
||||
**Returned values**
|
||||
|
||||
[Tuple](../../../sql-reference/data-types/tuple.md) with two elements:
|
||||
|
||||
- calculated t-statistic. [Float64](../../../sql-reference/data-types/float.md).
|
||||
- calculated p-value. [Float64](../../../sql-reference/data-types/float.md).
|
||||
|
||||
|
@ -24,6 +24,7 @@ The null hypothesis is that means of populations are equal. Normal distribution
|
||||
**Returned values**
|
||||
|
||||
[Tuple](../../../sql-reference/data-types/tuple.md) with two elements:
|
||||
|
||||
- calculated t-statistic. [Float64](../../../sql-reference/data-types/float.md).
|
||||
- calculated p-value. [Float64](../../../sql-reference/data-types/float.md).
|
||||
|
||||
|
@ -61,40 +61,58 @@ int32samoa: 1546300800
|
||||
|
||||
Converts a date or date with time to a UInt16 number containing the year number (AD).
|
||||
|
||||
Alias: `YEAR`.
|
||||
|
||||
## toQuarter {#toquarter}
|
||||
|
||||
Converts a date or date with time to a UInt8 number containing the quarter number.
|
||||
|
||||
Alias: `QUARTER`.
|
||||
|
||||
## toMonth {#tomonth}
|
||||
|
||||
Converts a date or date with time to a UInt8 number containing the month number (1-12).
|
||||
|
||||
Alias: `MONTH`.
|
||||
|
||||
## toDayOfYear {#todayofyear}
|
||||
|
||||
Converts a date or date with time to a UInt16 number containing the number of the day of the year (1-366).
|
||||
|
||||
Alias: `DAYOFYEAR`.
|
||||
|
||||
## toDayOfMonth {#todayofmonth}
|
||||
|
||||
Converts a date or date with time to a UInt8 number containing the number of the day of the month (1-31).
|
||||
|
||||
Aliases: `DAYOFMONTH`, `DAY`.
|
||||
|
||||
## toDayOfWeek {#todayofweek}
|
||||
|
||||
Converts a date or date with time to a UInt8 number containing the number of the day of the week (Monday is 1, and Sunday is 7).
|
||||
|
||||
Alias: `DAYOFWEEK`.
|
||||
|
||||
## toHour {#tohour}
|
||||
|
||||
Converts a date with time to a UInt8 number containing the number of the hour in 24-hour time (0-23).
|
||||
This function assumes that if clocks are moved ahead, it is by one hour and occurs at 2 a.m., and if clocks are moved back, it is by one hour and occurs at 3 a.m. (which is not always true – even in Moscow the clocks were twice changed at a different time).
|
||||
|
||||
Alias: `HOUR`.
|
||||
|
||||
## toMinute {#tominute}
|
||||
|
||||
Converts a date with time to a UInt8 number containing the number of the minute of the hour (0-59).
|
||||
|
||||
Alias: `MINUTE`.
|
||||
|
||||
## toSecond {#tosecond}
|
||||
|
||||
Converts a date with time to a UInt8 number containing the number of the second in the minute (0-59).
|
||||
Leap seconds are not accounted for.
|
||||
|
||||
Alias: `SECOND`.
|
||||
|
||||
## toUnixTimestamp {#to-unix-timestamp}
|
||||
|
||||
For DateTime argument: converts value to the number with type UInt32 -- Unix Timestamp (https://en.wikipedia.org/wiki/Unix_time).
|
||||
|
@ -75,6 +75,8 @@ Result:
|
||||
|
||||
Returns a string containing the argument’s hexadecimal representation.
|
||||
|
||||
Alias: `HEX`.
|
||||
|
||||
**Syntax**
|
||||
|
||||
``` sql
|
||||
|
@ -13,6 +13,8 @@ Checks whether the argument is [NULL](../../sql-reference/syntax.md#null-literal
|
||||
isNull(x)
|
||||
```
|
||||
|
||||
Alias: `ISNULL`.
|
||||
|
||||
**Arguments**
|
||||
|
||||
- `x` — A value with a non-compound data type.
|
||||
|
@ -9,10 +9,14 @@ toc_title: IP Addresses
|
||||
|
||||
Takes a UInt32 number. Interprets it as an IPv4 address in big endian. Returns a string containing the corresponding IPv4 address in the format A.B.C.d (dot-separated numbers in decimal form).
|
||||
|
||||
Alias: `INET_NTOA`.
|
||||
|
||||
## IPv4StringToNum(s) {#ipv4stringtonums}
|
||||
|
||||
The reverse function of IPv4NumToString. If the IPv4 address has an invalid format, it returns 0.
|
||||
|
||||
Alias: `INET_ATON`.
|
||||
|
||||
## IPv4NumToStringClassC(num) {#ipv4numtostringclasscnum}
|
||||
|
||||
Similar to IPv4NumToString, but using xxx instead of the last octet.
|
||||
@ -49,7 +53,11 @@ Since using ‘xxx’ is highly unusual, this may be changed in the future. We r
|
||||
### IPv6NumToString(x) {#ipv6numtostringx}
|
||||
|
||||
Accepts a FixedString(16) value containing the IPv6 address in binary format. Returns a string containing this address in text format.
|
||||
IPv6-mapped IPv4 addresses are output in the format ::ffff:111.222.33.44. Examples:
|
||||
IPv6-mapped IPv4 addresses are output in the format ::ffff:111.222.33.44.
|
||||
|
||||
Alias: `INET6_NTOA`.
|
||||
|
||||
Examples:
|
||||
|
||||
``` sql
|
||||
SELECT IPv6NumToString(toFixedString(unhex('2A0206B8000000000000000000000011'), 16)) AS addr
|
||||
@ -119,6 +127,8 @@ The reverse function of IPv6NumToString. If the IPv6 address has an invalid form
|
||||
If the IP address is a valid IPv4 address then the IPv6 equivalent of the IPv4 address is returned.
|
||||
HEX can be uppercase or lowercase.
|
||||
|
||||
Alias: `INET6_ATON`.
|
||||
|
||||
``` sql
|
||||
SELECT cutIPv6(IPv6StringToNum('127.0.0.1'), 0, 0);
|
||||
```
|
||||
|
@ -98,6 +98,8 @@ SELECT toValidUTF8('\x61\xF0\x80\x80\x80b')
|
||||
|
||||
Repeats a string as many times as specified and concatenates the replicated values as a single string.
|
||||
|
||||
Alias: `REPEAT`.
|
||||
|
||||
**Syntax**
|
||||
|
||||
``` sql
|
||||
@ -276,10 +278,14 @@ Returns the string ‘s’ that was converted from the encoding in ‘from’ to
|
||||
|
||||
Encodes ‘s’ string into base64
|
||||
|
||||
Alias: `TO_BASE64`.
|
||||
|
||||
## base64Decode(s) {#base64decode}
|
||||
|
||||
Decode base64-encoded string ‘s’ into original string. In case of failure raises an exception.
|
||||
|
||||
Alias: `FROM_BASE64`.
|
||||
|
||||
## tryBase64Decode(s) {#trybase64decode}
|
||||
|
||||
Similar to base64Decode, but in case of error an empty string would be returned.
|
||||
@ -600,4 +606,46 @@ Hello, "world"!
|
||||
'foo'
|
||||
```
|
||||
|
||||
## decodeXMLComponent {#decode-xml-component}
|
||||
|
||||
Replaces XML predefined entities with characters. Predefined entities are `"` `&` `'` `>` `<`
|
||||
This function also replaces numeric character references with Unicode characters. Both decimal (like `✓`) and hexadecimal (`✓`) forms are supported.
|
||||
|
||||
**Syntax**
|
||||
|
||||
``` sql
|
||||
decodeXMLComponent(x)
|
||||
```
|
||||
|
||||
**Parameters**
|
||||
|
||||
- `x` — A sequence of characters. [String](../../sql-reference/data-types/string.md).
|
||||
|
||||
**Returned value**
|
||||
|
||||
- The sequence of characters after replacement.
|
||||
|
||||
Type: [String](../../sql-reference/data-types/string.md).
|
||||
|
||||
**Example**
|
||||
|
||||
Query:
|
||||
|
||||
``` sql
|
||||
SELECT decodeXMLComponent(''foo'');
|
||||
SELECT decodeXMLComponent('< Σ >');
|
||||
```
|
||||
|
||||
Result:
|
||||
|
||||
``` text
|
||||
'foo'
|
||||
< Σ >
|
||||
```
|
||||
|
||||
**See Also**
|
||||
|
||||
- [List of XML and HTML character entity references](https://en.wikipedia.org/wiki/List_of_XML_and_HTML_character_entity_references)
|
||||
|
||||
|
||||
[Original article](https://clickhouse.tech/docs/en/query_language/functions/string_functions/) <!--hide-->
|
||||
|
@ -36,10 +36,14 @@ The behavior of functions for the [NaN and Inf](../../sql-reference/data-types/f
|
||||
|
||||
**Example**
|
||||
|
||||
Query:
|
||||
|
||||
``` sql
|
||||
SELECT toInt64(nan), toInt32(32), toInt16('16'), toInt8(8.8)
|
||||
SELECT toInt64(nan), toInt32(32), toInt16('16'), toInt8(8.8);
|
||||
```
|
||||
|
||||
Result:
|
||||
|
||||
``` text
|
||||
┌─────────toInt64(nan)─┬─toInt32(32)─┬─toInt16('16')─┬─toInt8(8.8)─┐
|
||||
│ -9223372036854775808 │ 32 │ 16 │ 8 │
|
||||
@ -52,10 +56,14 @@ It takes an argument of type String and tries to parse it into Int (8 \| 16 \| 3
|
||||
|
||||
**Example**
|
||||
|
||||
Query:
|
||||
|
||||
``` sql
|
||||
select toInt64OrZero('123123'), toInt8OrZero('123qwe123')
|
||||
SELECT toInt64OrZero('123123'), toInt8OrZero('123qwe123');
|
||||
```
|
||||
|
||||
Result:
|
||||
|
||||
``` text
|
||||
┌─toInt64OrZero('123123')─┬─toInt8OrZero('123qwe123')─┐
|
||||
│ 123123 │ 0 │
|
||||
@ -68,10 +76,14 @@ It takes an argument of type String and tries to parse it into Int (8 \| 16 \| 3
|
||||
|
||||
**Example**
|
||||
|
||||
Query:
|
||||
|
||||
``` sql
|
||||
select toInt64OrNull('123123'), toInt8OrNull('123qwe123')
|
||||
SELECT toInt64OrNull('123123'), toInt8OrNull('123qwe123');
|
||||
```
|
||||
|
||||
Result:
|
||||
|
||||
``` text
|
||||
┌─toInt64OrNull('123123')─┬─toInt8OrNull('123qwe123')─┐
|
||||
│ 123123 │ ᴺᵁᴸᴸ │
|
||||
@ -102,10 +114,14 @@ The behavior of functions for negative agruments and for the [NaN and Inf](../..
|
||||
|
||||
**Example**
|
||||
|
||||
Query:
|
||||
|
||||
``` sql
|
||||
SELECT toUInt64(nan), toUInt32(-32), toUInt16('16'), toUInt8(8.8)
|
||||
SELECT toUInt64(nan), toUInt32(-32), toUInt16('16'), toUInt8(8.8);
|
||||
```
|
||||
|
||||
Result:
|
||||
|
||||
``` text
|
||||
┌───────toUInt64(nan)─┬─toUInt32(-32)─┬─toUInt16('16')─┬─toUInt8(8.8)─┐
|
||||
│ 9223372036854775808 │ 4294967264 │ 16 │ 8 │
|
||||
@ -124,6 +140,8 @@ SELECT toUInt64(nan), toUInt32(-32), toUInt16('16'), toUInt8(8.8)
|
||||
|
||||
## toDate {#todate}
|
||||
|
||||
Alias: `DATE`.
|
||||
|
||||
## toDateOrZero {#todateorzero}
|
||||
|
||||
## toDateOrNull {#todateornull}
|
||||
@ -168,20 +186,28 @@ A value in the `Nullable(Decimal(P,S))` data type. The value contains:
|
||||
|
||||
**Examples**
|
||||
|
||||
Query:
|
||||
|
||||
``` sql
|
||||
SELECT toDecimal32OrNull(toString(-1.111), 5) AS val, toTypeName(val)
|
||||
SELECT toDecimal32OrNull(toString(-1.111), 5) AS val, toTypeName(val);
|
||||
```
|
||||
|
||||
Result:
|
||||
|
||||
``` text
|
||||
┌──────val─┬─toTypeName(toDecimal32OrNull(toString(-1.111), 5))─┐
|
||||
│ -1.11100 │ Nullable(Decimal(9, 5)) │
|
||||
└──────────┴────────────────────────────────────────────────────┘
|
||||
```
|
||||
|
||||
Query:
|
||||
|
||||
``` sql
|
||||
SELECT toDecimal32OrNull(toString(-1.111), 2) AS val, toTypeName(val)
|
||||
SELECT toDecimal32OrNull(toString(-1.111), 2) AS val, toTypeName(val);
|
||||
```
|
||||
|
||||
Result:
|
||||
|
||||
``` text
|
||||
┌──val─┬─toTypeName(toDecimal32OrNull(toString(-1.111), 2))─┐
|
||||
│ ᴺᵁᴸᴸ │ Nullable(Decimal(9, 2)) │
|
||||
@ -213,20 +239,28 @@ A value in the `Nullable(Decimal(P,S))` data type. The value contains:
|
||||
|
||||
**Example**
|
||||
|
||||
Query:
|
||||
|
||||
``` sql
|
||||
SELECT toDecimal32OrZero(toString(-1.111), 5) AS val, toTypeName(val)
|
||||
SELECT toDecimal32OrZero(toString(-1.111), 5) AS val, toTypeName(val);
|
||||
```
|
||||
|
||||
Result:
|
||||
|
||||
``` text
|
||||
┌──────val─┬─toTypeName(toDecimal32OrZero(toString(-1.111), 5))─┐
|
||||
│ -1.11100 │ Decimal(9, 5) │
|
||||
└──────────┴────────────────────────────────────────────────────┘
|
||||
```
|
||||
|
||||
Query:
|
||||
|
||||
``` sql
|
||||
SELECT toDecimal32OrZero(toString(-1.111), 2) AS val, toTypeName(val)
|
||||
SELECT toDecimal32OrZero(toString(-1.111), 2) AS val, toTypeName(val);
|
||||
```
|
||||
|
||||
Result:
|
||||
|
||||
``` text
|
||||
┌──val─┬─toTypeName(toDecimal32OrZero(toString(-1.111), 2))─┐
|
||||
│ 0.00 │ Decimal(9, 2) │
|
||||
@ -258,12 +292,18 @@ Conversion between numeric types uses the same rules as assignments between diff
|
||||
|
||||
Additionally, the toString function of the DateTime argument can take a second String argument containing the name of the time zone. Example: `Asia/Yekaterinburg` In this case, the time is formatted according to the specified time zone.
|
||||
|
||||
**Example**
|
||||
|
||||
Query:
|
||||
|
||||
``` sql
|
||||
SELECT
|
||||
now() AS now_local,
|
||||
toString(now(), 'Asia/Yekaterinburg') AS now_yekat
|
||||
toString(now(), 'Asia/Yekaterinburg') AS now_yekat;
|
||||
```
|
||||
|
||||
Result:
|
||||
|
||||
``` text
|
||||
┌───────────now_local─┬─now_yekat───────────┐
|
||||
│ 2016-06-15 00:11:21 │ 2016-06-15 02:11:21 │
|
||||
@ -281,36 +321,81 @@ If the string has fewer bytes than N, it is padded with null bytes to the right.
|
||||
|
||||
Accepts a String or FixedString argument. Returns the String with the content truncated at the first zero byte found.
|
||||
|
||||
Example:
|
||||
**Example**
|
||||
|
||||
Query:
|
||||
|
||||
``` sql
|
||||
SELECT toFixedString('foo', 8) AS s, toStringCutToZero(s) AS s_cut
|
||||
SELECT toFixedString('foo', 8) AS s, toStringCutToZero(s) AS s_cut;
|
||||
```
|
||||
|
||||
Result:
|
||||
|
||||
``` text
|
||||
┌─s─────────────┬─s_cut─┐
|
||||
│ foo\0\0\0\0\0 │ foo │
|
||||
└───────────────┴───────┘
|
||||
```
|
||||
|
||||
Query:
|
||||
|
||||
``` sql
|
||||
SELECT toFixedString('foo\0bar', 8) AS s, toStringCutToZero(s) AS s_cut
|
||||
SELECT toFixedString('foo\0bar', 8) AS s, toStringCutToZero(s) AS s_cut;
|
||||
```
|
||||
|
||||
Result:
|
||||
|
||||
``` text
|
||||
┌─s──────────┬─s_cut─┐
|
||||
│ foo\0bar\0 │ foo │
|
||||
└────────────┴───────┘
|
||||
```
|
||||
|
||||
## reinterpretAsUInt(8\|16\|32\|64) {#reinterpretasuint8163264}
|
||||
|
||||
## reinterpretAsInt(8\|16\|32\|64) {#reinterpretasint8163264}
|
||||
|
||||
## reinterpretAsFloat(32\|64) {#reinterpretasfloat3264}
|
||||
|
||||
## reinterpretAsDate {#reinterpretasdate}
|
||||
|
||||
## reinterpretAsDateTime {#reinterpretasdatetime}
|
||||
|
||||
These functions accept a string and interpret the bytes placed at the beginning of the string as a number in host order (little endian). If the string isn’t long enough, the functions work as if the string is padded with the necessary number of null bytes. If the string is longer than needed, the extra bytes are ignored. A date is interpreted as the number of days since the beginning of the Unix Epoch, and a date with time is interpreted as the number of seconds since the beginning of the Unix Epoch.
|
||||
|
||||
## reinterpretAsString {#type_conversion_functions-reinterpretAsString}
|
||||
|
||||
This function accepts a number or date or date with time, and returns a string containing bytes representing the corresponding value in host order (little endian). Null bytes are dropped from the end. For example, a UInt32 type value of 255 is a string that is one byte long.
|
||||
|
||||
## reinterpretAsFixedString {#reinterpretasfixedstring}
|
||||
|
||||
This function accepts a number or date or date with time, and returns a FixedString containing bytes representing the corresponding value in host order (little endian). Null bytes are dropped from the end. For example, a UInt32 type value of 255 is a FixedString that is one byte long.
|
||||
|
||||
## reinterpretAsUUID {#reinterpretasuuid}
|
||||
|
||||
This function accepts 16 bytes string, and returns UUID containing bytes representing the corresponding value in network byte order (big-endian). If the string isn't long enough, the functions work as if the string is padded with the necessary number of null bytes to the end. If the string longer than 16 bytes, the extra bytes at the end are ignored.
|
||||
|
||||
**Syntax**
|
||||
|
||||
``` sql
|
||||
reinterpretAsUUID(fixed_string)
|
||||
```
|
||||
|
||||
**Parameters**
|
||||
|
||||
- `fixed_string` — Big-endian byte string. [FixedString](../../sql-reference/data-types/fixedstring.md#fixedstring).
|
||||
|
||||
## reinterpret(x, T) {#type_conversion_function-reinterpret}
|
||||
|
||||
Performs byte reinterpretation of ‘x’ as ‘t’ data type.
|
||||
**Returned value**
|
||||
|
||||
Following reinterpretations are allowed:
|
||||
1. Any type that has fixed size and value of that type can be represented continuously into FixedString.
|
||||
2. Any type that if value of that type can be represented continuously into String. Null bytes are dropped from the end. For example, a UInt32 type value of 255 is a string that is one byte long.
|
||||
3. FixedString, String, types that can be interpreted as numeric (Integers, Float, Date, DateTime, UUID) into types that can be interpreted as numeric (Integers, Float, Date, DateTime, UUID) into FixedString,
|
||||
- The UUID type value. [UUID](../../sql-reference/data-types/uuid.md#uuid-data-type).
|
||||
|
||||
**Examples**
|
||||
|
||||
String to UUID.
|
||||
|
||||
Query:
|
||||
|
||||
``` sql
|
||||
SELECT reinterpret(toInt8(-1), 'UInt8') as int_to_uint,
|
||||
@ -318,39 +403,45 @@ SELECT reinterpret(toInt8(-1), 'UInt8') as int_to_uint,
|
||||
reinterpret('1', 'UInt32') as string_to_int;
|
||||
```
|
||||
|
||||
Result:
|
||||
|
||||
``` text
|
||||
┌─int_to_uint─┬─int_to_float─┬─string_to_int─┐
|
||||
│ 255 │ 1e-45 │ 49 │
|
||||
└─────────────┴──────────────┴───────────────┘
|
||||
┌─reinterpretAsUUID(reverse(unhex('000102030405060708090a0b0c0d0e0f')))─┐
|
||||
│ 08090a0b-0c0d-0e0f-0001-020304050607 │
|
||||
└───────────────────────────────────────────────────────────────────────┘
|
||||
```
|
||||
|
||||
## reinterpretAsUInt(8\|16\|32\|64\|256) {#reinterpretAsUInt8163264256}
|
||||
Going back and forth from String to UUID.
|
||||
|
||||
## reinterpretAsInt(8\|16\|32\|64\|128\|256) {#reinterpretAsInt8163264128256}
|
||||
Query:
|
||||
|
||||
## reinterpretAsDecimal(32\|64\|128\|256) {#reinterpretAsDecimal3264128256}
|
||||
``` sql
|
||||
WITH
|
||||
generateUUIDv4() AS uuid,
|
||||
identity(lower(hex(reverse(reinterpretAsString(uuid))))) AS str,
|
||||
reinterpretAsUUID(reverse(unhex(str))) AS uuid2
|
||||
SELECT uuid = uuid2;
|
||||
```
|
||||
|
||||
## reinterpretAsFloat(32\|64) {#type_conversion_function-reinterpretAsFloat}
|
||||
Result:
|
||||
|
||||
## reinterpretAsDate {#type_conversion_function-reinterpretAsDate}
|
||||
|
||||
## reinterpretAsDateTime {#type_conversion_function-reinterpretAsDateTime}
|
||||
|
||||
## reinterpretAsDateTime64 {#type_conversion_function-reinterpretAsDateTime64}
|
||||
|
||||
## reinterpretAsString {#type_conversion_function-reinterpretAsString}
|
||||
|
||||
## reinterpretAsFixedString {#type_conversion_function-reinterpretAsFixedString}
|
||||
|
||||
## reinterpretAsUUID {#type_conversion_function-reinterpretAsUUID}
|
||||
|
||||
These functions are aliases for `reinterpret` function.
|
||||
``` text
|
||||
┌─equals(uuid, uuid2)─┐
|
||||
│ 1 │
|
||||
└─────────────────────┘
|
||||
```
|
||||
|
||||
## CAST(x, T) {#type_conversion_function-cast}
|
||||
|
||||
Converts ‘x’ to the ‘t’ data type. The syntax CAST(x AS t) is also supported.
|
||||
Converts input value `x` to the `T` data type.
|
||||
|
||||
Example:
|
||||
The syntax `CAST(x AS t)` is also supported.
|
||||
|
||||
Note, that if value `x` does not fit the bounds of type T, the function overflows. For example, CAST(-1, 'UInt8') returns 255.
|
||||
|
||||
**Example**
|
||||
|
||||
Query:
|
||||
|
||||
``` sql
|
||||
SELECT
|
||||
@ -358,9 +449,11 @@ SELECT
|
||||
CAST(timestamp AS DateTime) AS datetime,
|
||||
CAST(timestamp AS Date) AS date,
|
||||
CAST(timestamp, 'String') AS string,
|
||||
CAST(timestamp, 'FixedString(22)') AS fixed_string
|
||||
CAST(timestamp, 'FixedString(22)') AS fixed_string;
|
||||
```
|
||||
|
||||
Result:
|
||||
|
||||
``` text
|
||||
┌─timestamp───────────┬────────────datetime─┬───────date─┬─string──────────────┬─fixed_string──────────────┐
|
||||
│ 2016-06-15 23:00:00 │ 2016-06-15 23:00:00 │ 2016-06-15 │ 2016-06-15 23:00:00 │ 2016-06-15 23:00:00\0\0\0 │
|
||||
@ -369,12 +462,18 @@ SELECT
|
||||
|
||||
Conversion to FixedString(N) only works for arguments of type String or FixedString(N).
|
||||
|
||||
Type conversion to [Nullable](../../sql-reference/data-types/nullable.md) and back is supported. Example:
|
||||
Type conversion to [Nullable](../../sql-reference/data-types/nullable.md) and back is supported.
|
||||
|
||||
**Example**
|
||||
|
||||
Query:
|
||||
|
||||
``` sql
|
||||
SELECT toTypeName(x) FROM t_null
|
||||
SELECT toTypeName(x) FROM t_null;
|
||||
```
|
||||
|
||||
Result:
|
||||
|
||||
``` text
|
||||
┌─toTypeName(x)─┐
|
||||
│ Int8 │
|
||||
@ -382,10 +481,14 @@ SELECT toTypeName(x) FROM t_null
|
||||
└───────────────┘
|
||||
```
|
||||
|
||||
Query:
|
||||
|
||||
``` sql
|
||||
SELECT toTypeName(CAST(x, 'Nullable(UInt16)')) FROM t_null
|
||||
SELECT toTypeName(CAST(x, 'Nullable(UInt16)')) FROM t_null;
|
||||
```
|
||||
|
||||
Result:
|
||||
|
||||
``` text
|
||||
┌─toTypeName(CAST(x, 'Nullable(UInt16)'))─┐
|
||||
│ Nullable(UInt16) │
|
||||
@ -399,15 +502,19 @@ SELECT toTypeName(CAST(x, 'Nullable(UInt16)')) FROM t_null
|
||||
|
||||
## accurateCast(x, T) {#type_conversion_function-accurate-cast}
|
||||
|
||||
Converts ‘x’ to the ‘t’ data type. The differente from cast(x, T) is that accurateCast
|
||||
does not allow overflow of numeric types during cast if type value x does not fit
|
||||
bounds of type T.
|
||||
Converts `x` to the `T` data type.
|
||||
|
||||
The difference from [cast(x, T)](#type_conversion_function-cast) is that `accurateCast` does not allow overflow of numeric types during cast if type value `x` does not fit the bounds of type `T`. For example, `accurateCast(-1, 'UInt8')` throws an exception.
|
||||
|
||||
**Example**
|
||||
|
||||
Query:
|
||||
|
||||
Example
|
||||
``` sql
|
||||
SELECT cast(-1, 'UInt8') as uint8;
|
||||
```
|
||||
|
||||
Result:
|
||||
|
||||
``` text
|
||||
┌─uint8─┐
|
||||
@ -415,38 +522,46 @@ SELECT cast(-1, 'UInt8') as uint8;
|
||||
└───────┘
|
||||
```
|
||||
|
||||
Query:
|
||||
|
||||
```sql
|
||||
SELECT accurateCast(-1, 'UInt8') as uint8;
|
||||
```
|
||||
|
||||
Result:
|
||||
|
||||
``` text
|
||||
Code: 70. DB::Exception: Received from localhost:9000. DB::Exception: Value in column Int8 cannot be safely converted into type UInt8: While processing accurateCast(-1, 'UInt8') AS uint8.
|
||||
|
||||
```
|
||||
|
||||
## accurateCastOrNull(x, T) {#type_conversion_function-accurate-cast_or_null}
|
||||
|
||||
Converts ‘x’ to the ‘t’ data type. Always returns nullable type and returns NULL
|
||||
if the casted value is not representable in the target type.
|
||||
Converts input value `x` to the specified data type `T`. Always returns [Nullable](../../sql-reference/data-types/nullable.md) type and returns [NULL](../../sql-reference/syntax.md#null-literal) if the casted value is not representable in the target type.
|
||||
|
||||
Example:
|
||||
**Syntax**
|
||||
|
||||
```sql
|
||||
accurateCastOrNull(x, T)
|
||||
```
|
||||
|
||||
**Parameters**
|
||||
|
||||
- `x` — Input value.
|
||||
- `T` — The name of the returned data type.
|
||||
|
||||
**Returned value**
|
||||
|
||||
- The value, converted to the specified data type `T`.
|
||||
|
||||
**Example**
|
||||
|
||||
Query:
|
||||
|
||||
``` sql
|
||||
SELECT
|
||||
accurateCastOrNull(-1, 'UInt8') as uint8,
|
||||
accurateCastOrNull(128, 'Int8') as int8,
|
||||
accurateCastOrNull('Test', 'FixedString(2)') as fixed_string
|
||||
SELECT toTypeName(accurateCastOrNull(5, 'UInt8'));
|
||||
```
|
||||
|
||||
``` text
|
||||
┌─uint8─┬─int8─┬─fixed_string─┐
|
||||
│ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │
|
||||
└───────┴──────┴──────────────┘┘
|
||||
```
|
||||
|
||||
``` sql
|
||||
SELECT toTypeName(accurateCastOrNull(5, 'UInt8'))
|
||||
```
|
||||
Result:
|
||||
|
||||
``` text
|
||||
┌─toTypeName(accurateCastOrNull(5, 'UInt8'))─┐
|
||||
@ -454,6 +569,23 @@ SELECT toTypeName(accurateCastOrNull(5, 'UInt8'))
|
||||
└────────────────────────────────────────────┘
|
||||
```
|
||||
|
||||
Query:
|
||||
|
||||
``` sql
|
||||
SELECT
|
||||
accurateCastOrNull(-1, 'UInt8') as uint8,
|
||||
accurateCastOrNull(128, 'Int8') as int8,
|
||||
accurateCastOrNull('Test', 'FixedString(2)') as fixed_string;
|
||||
```
|
||||
|
||||
Result:
|
||||
|
||||
``` text
|
||||
┌─uint8─┬─int8─┬─fixed_string─┐
|
||||
│ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │
|
||||
└───────┴──────┴──────────────┘
|
||||
```
|
||||
|
||||
## toInterval(Year\|Quarter\|Month\|Week\|Day\|Hour\|Minute\|Second) {#function-tointerval}
|
||||
|
||||
Converts a Number type argument to an [Interval](../../sql-reference/data-types/special-data-types/interval.md) data type.
|
||||
@ -481,6 +613,8 @@ toIntervalYear(number)
|
||||
|
||||
**Example**
|
||||
|
||||
Query:
|
||||
|
||||
``` sql
|
||||
WITH
|
||||
toDate('2019-01-01') AS date,
|
||||
@ -488,9 +622,11 @@ WITH
|
||||
toIntervalWeek(1) AS interval_to_week
|
||||
SELECT
|
||||
date + interval_week,
|
||||
date + interval_to_week
|
||||
date + interval_to_week;
|
||||
```
|
||||
|
||||
Result:
|
||||
|
||||
``` text
|
||||
┌─plus(date, interval_week)─┬─plus(date, interval_to_week)─┐
|
||||
│ 2019-01-08 │ 2019-01-08 │
|
||||
@ -506,7 +642,7 @@ The function parses [ISO 8601](https://en.wikipedia.org/wiki/ISO_8601), [RFC 112
|
||||
**Syntax**
|
||||
|
||||
``` sql
|
||||
parseDateTimeBestEffort(time_string [, time_zone]);
|
||||
parseDateTimeBestEffort(time_string [, time_zone])
|
||||
```
|
||||
|
||||
**Arguments**
|
||||
@ -549,7 +685,7 @@ Query:
|
||||
|
||||
``` sql
|
||||
SELECT parseDateTimeBestEffort('Sat, 18 Aug 2018 07:22:16 GMT', 'Europe/Moscow')
|
||||
AS parseDateTimeBestEffort
|
||||
AS parseDateTimeBestEffort;
|
||||
```
|
||||
|
||||
Result:
|
||||
@ -564,7 +700,7 @@ Query:
|
||||
|
||||
``` sql
|
||||
SELECT parseDateTimeBestEffort('1284101485')
|
||||
AS parseDateTimeBestEffort
|
||||
AS parseDateTimeBestEffort;
|
||||
```
|
||||
|
||||
Result:
|
||||
@ -579,7 +715,7 @@ Query:
|
||||
|
||||
``` sql
|
||||
SELECT parseDateTimeBestEffort('2018-12-12 10:12:12')
|
||||
AS parseDateTimeBestEffort
|
||||
AS parseDateTimeBestEffort;
|
||||
```
|
||||
|
||||
Result:
|
||||
@ -593,7 +729,7 @@ Result:
|
||||
Query:
|
||||
|
||||
``` sql
|
||||
SELECT parseDateTimeBestEffort('10 20:19')
|
||||
SELECT parseDateTimeBestEffort('10 20:19');
|
||||
```
|
||||
|
||||
Result:
|
||||
@ -613,12 +749,12 @@ Result:
|
||||
|
||||
## parseDateTimeBestEffortUS {#parsedatetimebesteffortUS}
|
||||
|
||||
This function is similar to [‘parseDateTimeBestEffort’](#parsedatetimebesteffort), the only difference is that this function prefers US date format (`MM/DD/YYYY` etc.) in case of ambiguity.
|
||||
This function is similar to [parseDateTimeBestEffort](#parsedatetimebesteffort), the only difference is that this function prefers US date format (`MM/DD/YYYY` etc.) in case of ambiguity.
|
||||
|
||||
**Syntax**
|
||||
|
||||
``` sql
|
||||
parseDateTimeBestEffortUS(time_string [, time_zone]);
|
||||
parseDateTimeBestEffortUS(time_string [, time_zone])
|
||||
```
|
||||
|
||||
**Arguments**
|
||||
@ -693,6 +829,178 @@ Same as for [parseDateTimeBestEffort](#parsedatetimebesteffort) except that it r
|
||||
|
||||
Same as for [parseDateTimeBestEffort](#parsedatetimebesteffort) except that it returns zero date or zero date time when it encounters a date format that cannot be processed.
|
||||
|
||||
## parseDateTimeBestEffortUSOrNull {#parsedatetimebesteffortusornull}
|
||||
|
||||
Same as [parseDateTimeBestEffortUS](#parsedatetimebesteffortUS) function except that it returns `NULL` when it encounters a date format that cannot be processed.
|
||||
|
||||
**Syntax**
|
||||
|
||||
``` sql
|
||||
parseDateTimeBestEffortUSOrNull(time_string[, time_zone])
|
||||
```
|
||||
|
||||
**Parameters**
|
||||
|
||||
- `time_string` — String containing a date or date with time to convert. The date must be in the US date format (`MM/DD/YYYY`, etc). [String](../../sql-reference/data-types/string.md).
|
||||
- `time_zone` — [Timezone](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-timezone). The function parses `time_string` according to the timezone. Optional. [String](../../sql-reference/data-types/string.md).
|
||||
|
||||
**Supported non-standard formats**
|
||||
|
||||
- A string containing 9..10 digit [unix timestamp](https://en.wikipedia.org/wiki/Unix_time).
|
||||
- A string with a date and a time components: `YYYYMMDDhhmmss`, `MM/DD/YYYY hh:mm:ss`, `MM-DD-YY hh:mm`, `YYYY-MM-DD hh:mm:ss`, etc.
|
||||
- A string with a date, but no time component: `YYYY`, `YYYYMM`, `YYYY*MM`, `MM/DD/YYYY`, `MM-DD-YY`, etc.
|
||||
- A string with a day and time: `DD`, `DD hh`, `DD hh:mm`. In this case, `YYYY-MM` are substituted with `2000-01`.
|
||||
- A string that includes date and time along with timezone offset information: `YYYY-MM-DD hh:mm:ss ±h:mm`, etc. For example, `2020-12-12 17:36:00 -5:00`.
|
||||
|
||||
**Returned values**
|
||||
|
||||
- `time_string` converted to the [DateTime](../../sql-reference/data-types/datetime.md) data type.
|
||||
- `NULL` if the input string cannot be converted to the `DateTime` data type.
|
||||
|
||||
**Examples**
|
||||
|
||||
Query:
|
||||
|
||||
``` sql
|
||||
SELECT parseDateTimeBestEffortUSOrNull('02/10/2021 21:12:57') AS parseDateTimeBestEffortUSOrNull;
|
||||
```
|
||||
|
||||
Result:
|
||||
|
||||
``` text
|
||||
┌─parseDateTimeBestEffortUSOrNull─┐
|
||||
│ 2021-02-10 21:12:57 │
|
||||
└─────────────────────────────────┘
|
||||
```
|
||||
|
||||
Query:
|
||||
|
||||
``` sql
|
||||
SELECT parseDateTimeBestEffortUSOrNull('02-10-2021 21:12:57 GMT', 'Europe/Moscow') AS parseDateTimeBestEffortUSOrNull;
|
||||
```
|
||||
|
||||
Result:
|
||||
|
||||
``` text
|
||||
┌─parseDateTimeBestEffortUSOrNull─┐
|
||||
│ 2021-02-11 00:12:57 │
|
||||
└─────────────────────────────────┘
|
||||
```
|
||||
|
||||
Query:
|
||||
|
||||
``` sql
|
||||
SELECT parseDateTimeBestEffortUSOrNull('02.10.2021') AS parseDateTimeBestEffortUSOrNull;
|
||||
```
|
||||
|
||||
Result:
|
||||
|
||||
``` text
|
||||
┌─parseDateTimeBestEffortUSOrNull─┐
|
||||
│ 2021-02-10 00:00:00 │
|
||||
└─────────────────────────────────┘
|
||||
```
|
||||
|
||||
Query:
|
||||
|
||||
``` sql
|
||||
SELECT parseDateTimeBestEffortUSOrNull('10.2021') AS parseDateTimeBestEffortUSOrNull;
|
||||
```
|
||||
|
||||
Result:
|
||||
|
||||
``` text
|
||||
┌─parseDateTimeBestEffortUSOrNull─┐
|
||||
│ ᴺᵁᴸᴸ │
|
||||
└─────────────────────────────────┘
|
||||
```
|
||||
|
||||
## parseDateTimeBestEffortUSOrZero {#parsedatetimebesteffortusorzero}
|
||||
|
||||
Same as [parseDateTimeBestEffortUS](#parsedatetimebesteffortUS) function except that it returns zero date (`1970-01-01`) or zero date with time (`1970-01-01 00:00:00`) when it encounters a date format that cannot be processed.
|
||||
|
||||
**Syntax**
|
||||
|
||||
``` sql
|
||||
parseDateTimeBestEffortUSOrZero(time_string[, time_zone])
|
||||
```
|
||||
|
||||
**Parameters**
|
||||
|
||||
- `time_string` — String containing a date or date with time to convert. The date must be in the US date format (`MM/DD/YYYY`, etc). [String](../../sql-reference/data-types/string.md).
|
||||
- `time_zone` — [Timezone](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-timezone). The function parses `time_string` according to the timezone. Optional. [String](../../sql-reference/data-types/string.md).
|
||||
|
||||
**Supported non-standard formats**
|
||||
|
||||
- A string containing 9..10 digit [unix timestamp](https://en.wikipedia.org/wiki/Unix_time).
|
||||
- A string with a date and a time components: `YYYYMMDDhhmmss`, `MM/DD/YYYY hh:mm:ss`, `MM-DD-YY hh:mm`, `YYYY-MM-DD hh:mm:ss`, etc.
|
||||
- A string with a date, but no time component: `YYYY`, `YYYYMM`, `YYYY*MM`, `MM/DD/YYYY`, `MM-DD-YY`, etc.
|
||||
- A string with a day and time: `DD`, `DD hh`, `DD hh:mm`. In this case, `YYYY-MM` are substituted with `2000-01`.
|
||||
- A string that includes date and time along with timezone offset information: `YYYY-MM-DD hh:mm:ss ±h:mm`, etc. For example, `2020-12-12 17:36:00 -5:00`.
|
||||
|
||||
**Returned values**
|
||||
|
||||
- `time_string` converted to the [DateTime](../../sql-reference/data-types/datetime.md) data type.
|
||||
- Zero date or zero date with time if the input string cannot be converted to the `DateTime` data type.
|
||||
|
||||
**Examples**
|
||||
|
||||
Query:
|
||||
|
||||
``` sql
|
||||
SELECT parseDateTimeBestEffortUSOrZero('02/10/2021 21:12:57') AS parseDateTimeBestEffortUSOrZero;
|
||||
```
|
||||
|
||||
Result:
|
||||
|
||||
``` text
|
||||
┌─parseDateTimeBestEffortUSOrZero─┐
|
||||
│ 2021-02-10 21:12:57 │
|
||||
└─────────────────────────────────┘
|
||||
```
|
||||
|
||||
Query:
|
||||
|
||||
``` sql
|
||||
SELECT parseDateTimeBestEffortUSOrZero('02-10-2021 21:12:57 GMT', 'Europe/Moscow') AS parseDateTimeBestEffortUSOrZero;
|
||||
```
|
||||
|
||||
Result:
|
||||
|
||||
``` text
|
||||
┌─parseDateTimeBestEffortUSOrZero─┐
|
||||
│ 2021-02-11 00:12:57 │
|
||||
└─────────────────────────────────┘
|
||||
```
|
||||
|
||||
Query:
|
||||
|
||||
``` sql
|
||||
SELECT parseDateTimeBestEffortUSOrZero('02.10.2021') AS parseDateTimeBestEffortUSOrZero;
|
||||
```
|
||||
|
||||
Result:
|
||||
|
||||
``` text
|
||||
┌─parseDateTimeBestEffortUSOrZero─┐
|
||||
│ 2021-02-10 00:00:00 │
|
||||
└─────────────────────────────────┘
|
||||
```
|
||||
|
||||
Query:
|
||||
|
||||
``` sql
|
||||
SELECT parseDateTimeBestEffortUSOrZero('02.2021') AS parseDateTimeBestEffortUSOrZero;
|
||||
```
|
||||
|
||||
Result:
|
||||
|
||||
``` text
|
||||
┌─parseDateTimeBestEffortUSOrZero─┐
|
||||
│ 1970-01-01 00:00:00 │
|
||||
└─────────────────────────────────┘
|
||||
```
|
||||
|
||||
## toLowCardinality {#tolowcardinality}
|
||||
|
||||
Converts input parameter to the [LowCardianlity](../../sql-reference/data-types/lowcardinality.md) version of same data type.
|
||||
@ -720,7 +1028,7 @@ Type: `LowCardinality(expr_result_type)`
|
||||
Query:
|
||||
|
||||
``` sql
|
||||
SELECT toLowCardinality('1')
|
||||
SELECT toLowCardinality('1');
|
||||
```
|
||||
|
||||
Result:
|
||||
@ -759,7 +1067,7 @@ Query:
|
||||
|
||||
``` sql
|
||||
WITH toDateTime64('2019-09-16 19:20:12.345678910', 6) AS dt64
|
||||
SELECT toUnixTimestamp64Milli(dt64)
|
||||
SELECT toUnixTimestamp64Milli(dt64);
|
||||
```
|
||||
|
||||
Result:
|
||||
@ -772,7 +1080,7 @@ Result:
|
||||
|
||||
``` sql
|
||||
WITH toDateTime64('2019-09-16 19:20:12.345678910', 6) AS dt64
|
||||
SELECT toUnixTimestamp64Nano(dt64)
|
||||
SELECT toUnixTimestamp64Nano(dt64);
|
||||
```
|
||||
|
||||
Result:
|
||||
@ -806,13 +1114,17 @@ fromUnixTimestamp64Milli(value [, ti])
|
||||
|
||||
- `value` converted to the `DateTime64` data type.
|
||||
|
||||
**Examples**
|
||||
**Example**
|
||||
|
||||
Query:
|
||||
|
||||
``` sql
|
||||
WITH CAST(1234567891011, 'Int64') AS i64
|
||||
SELECT fromUnixTimestamp64Milli(i64, 'UTC')
|
||||
SELECT fromUnixTimestamp64Milli(i64, 'UTC');
|
||||
```
|
||||
|
||||
Result:
|
||||
|
||||
``` text
|
||||
┌─fromUnixTimestamp64Milli(i64, 'UTC')─┐
|
||||
│ 2009-02-13 23:31:31.011 │
|
||||
@ -844,7 +1156,7 @@ Query:
|
||||
|
||||
``` sql
|
||||
SELECT formatRow('CSV', number, 'good')
|
||||
FROM numbers(3)
|
||||
FROM numbers(3);
|
||||
```
|
||||
|
||||
Result:
|
||||
@ -885,7 +1197,7 @@ Query:
|
||||
|
||||
``` sql
|
||||
SELECT formatRowNoNewline('CSV', number, 'good')
|
||||
FROM numbers(3)
|
||||
FROM numbers(3);
|
||||
```
|
||||
|
||||
Result:
|
||||
|
@ -13,10 +13,28 @@ SELECT (CounterID, UserID) IN ((34, 123), (101500, 456)) FROM ...
|
||||
|
||||
If the left side is a single column that is in the index, and the right side is a set of constants, the system uses the index for processing the query.
|
||||
|
||||
Don’t list too many values explicitly (i.e. millions). If a data set is large, put it in a temporary table (for example, see the section “External data for query processing”), then use a subquery.
|
||||
Don’t list too many values explicitly (i.e. millions). If a data set is large, put it in a temporary table (for example, see the section [External data for query processing](../../engines/table-engines/special/external-data.md)), then use a subquery.
|
||||
|
||||
The right side of the operator can be a set of constant expressions, a set of tuples with constant expressions (shown in the examples above), or the name of a database table or SELECT subquery in brackets.
|
||||
|
||||
ClickHouse allows types to differ in the left and the right parts of `IN` subquery. In this case it converts the left side value to the type of the right side, as if the [accurateCastOrNull](../functions/type-conversion-functions.md#type_conversion_function-accurate-cast_or_null) function is applied. That means, that the data type becomes [Nullable](../../sql-reference/data-types/nullable.md), and if the conversion cannot be performed, it returns [NULL](../../sql-reference/syntax.md#null-literal).
|
||||
|
||||
**Example**
|
||||
|
||||
Query:
|
||||
|
||||
``` sql
|
||||
SELECT '1' IN (SELECT 1);
|
||||
```
|
||||
|
||||
Result:
|
||||
|
||||
``` text
|
||||
┌─in('1', _subquery49)─┐
|
||||
│ 1 │
|
||||
└──────────────────────┘
|
||||
```
|
||||
|
||||
If the right side of the operator is the name of a table (for example, `UserID IN users`), this is equivalent to the subquery `UserID IN (SELECT * FROM users)`. Use this when working with external data that is sent along with the query. For example, the query can be sent together with a set of user IDs loaded to the ‘users’ temporary table, which should be filtered.
|
||||
|
||||
If the right side of the operator is a table name that has the Set engine (a prepared data set that is always in RAM), the data set will not be created over again for each query.
|
||||
|
@ -41,7 +41,6 @@ SELECT a, b, c FROM (SELECT ...)
|
||||
CREATE MATERIALIZED VIEW [IF NOT EXISTS] [db.]table_name [ON CLUSTER] [TO[db.]name] [ENGINE = engine] [POPULATE] AS SELECT ...
|
||||
```
|
||||
|
||||
|
||||
Materialized views store data transformed by the corresponding [SELECT](../../../sql-reference/statements/select/index.md) query.
|
||||
|
||||
When creating a materialized view without `TO [db].[table]`, you must specify `ENGINE` – the table engine for storing data.
|
||||
@ -65,4 +64,191 @@ Views look the same as normal tables. For example, they are listed in the result
|
||||
|
||||
There isn’t a separate query for deleting views. To delete a view, use [DROP TABLE](../../../sql-reference/statements/drop.md).
|
||||
|
||||
## Live View (Experimental) {#live-view}
|
||||
|
||||
!!! important "Important"
|
||||
This is an experimental feature that may change in backwards-incompatible ways in the future releases.
|
||||
Enable usage of live views and `WATCH` query using `set allow_experimental_live_view = 1`.
|
||||
|
||||
|
||||
```sql
|
||||
CREATE LIVE VIEW [IF NOT EXISTS] [db.]table_name [WITH [TIMEOUT [value_in_sec] [AND]] [REFRESH [value_in_sec]]] AS SELECT ...
|
||||
```
|
||||
|
||||
Live views store result of the corresponding [SELECT](../../../sql-reference/statements/select/index.md) query and are updated any time the result of the query changes. Query result as well as partial result needed to combine with new data are stored in memory providing increased performance for repeated queries. Live views can provide push notifications when query result changes using the [WATCH](../../../sql-reference/statements/watch.md) query.
|
||||
|
||||
Live views are triggered by insert into the innermost table specified in the query.
|
||||
|
||||
Live views work similarly to how a query in a distributed table works. But instead of combining partial results from different servers they combine partial result from current data with partial result from the new data. When a live view query includes a subquery then the cached partial result is only stored for the innermost subquery.
|
||||
|
||||
!!! info "Limitations"
|
||||
- [Table function](../../../sql-reference/table-functions/index.md) is not supported as the innermost table.
|
||||
- Tables that do not have inserts such as a [dictionary](../../../sql-reference/dictionaries/index.md), [system table](../../../operations/system-tables/index.md), a [normal view](#normal), or a [materialized view](#materialized) will not trigger a live view.
|
||||
- Only queries where one can combine partial result from the old data plus partial result from the new data will work. Live view will not work for queries that require the complete data set to compute the final result or aggregations where the state of the aggregation must be preserved.
|
||||
- Does not work with replicated or distributed tables where inserts are performed on different nodes.
|
||||
- Can't be triggered by multiple tables.
|
||||
|
||||
See [WITH REFRESH](#live-view-with-refresh) to force periodic updates of a live view that in some cases can be used as a workaround.
|
||||
|
||||
You can watch for changes in the live view query result using the [WATCH](../../../sql-reference/statements/watch.md) query
|
||||
|
||||
```sql
|
||||
WATCH [db.]live_view
|
||||
```
|
||||
|
||||
**Example:**
|
||||
|
||||
```sql
|
||||
CREATE TABLE mt (x Int8) Engine = MergeTree ORDER BY x;
|
||||
CREATE LIVE VIEW lv AS SELECT sum(x) FROM mt;
|
||||
```
|
||||
|
||||
Watch a live view while doing a parallel insert into the source table.
|
||||
|
||||
```sql
|
||||
WATCH lv
|
||||
```
|
||||
|
||||
```bash
|
||||
┌─sum(x)─┬─_version─┐
|
||||
│ 1 │ 1 │
|
||||
└────────┴──────────┘
|
||||
┌─sum(x)─┬─_version─┐
|
||||
│ 2 │ 2 │
|
||||
└────────┴──────────┘
|
||||
┌─sum(x)─┬─_version─┐
|
||||
│ 6 │ 3 │
|
||||
└────────┴──────────┘
|
||||
...
|
||||
```
|
||||
|
||||
```sql
|
||||
INSERT INTO mt VALUES (1);
|
||||
INSERT INTO mt VALUES (2);
|
||||
INSERT INTO mt VALUES (3);
|
||||
```
|
||||
|
||||
or add [EVENTS](../../../sql-reference/statements/watch.md#events-clause) clause to just get change events.
|
||||
|
||||
```sql
|
||||
WATCH [db.]live_view EVENTS
|
||||
```
|
||||
|
||||
**Example:**
|
||||
|
||||
```sql
|
||||
WATCH lv EVENTS
|
||||
```
|
||||
|
||||
```bash
|
||||
┌─version─┐
|
||||
│ 1 │
|
||||
└─────────┘
|
||||
┌─version─┐
|
||||
│ 2 │
|
||||
└─────────┘
|
||||
┌─version─┐
|
||||
│ 3 │
|
||||
└─────────┘
|
||||
...
|
||||
```
|
||||
|
||||
You can execute [SELECT](../../../sql-reference/statements/select/index.md) query on a live view in the same way as for any regular view or a table. If the query result is cached it will return the result immediately without running the stored query on the underlying tables.
|
||||
|
||||
```sql
|
||||
SELECT * FROM [db.]live_view WHERE ...
|
||||
```
|
||||
|
||||
### Force Refresh {#live-view-alter-refresh}
|
||||
|
||||
You can force live view refresh using the `ALTER LIVE VIEW [db.]table_name REFRESH` statement.
|
||||
|
||||
### With Timeout {#live-view-with-timeout}
|
||||
|
||||
When a live view is create with a `WITH TIMEOUT` clause then the live view will be dropped automatically after the specified number of seconds elapse since the end of the last [WATCH](../../../sql-reference/statements/watch.md) query that was watching the live view.
|
||||
|
||||
```sql
|
||||
CREATE LIVE VIEW [db.]table_name WITH TIMEOUT [value_in_sec] AS SELECT ...
|
||||
```
|
||||
|
||||
If the timeout value is not specified then the value specified by the `temporary_live_view_timeout` setting is used.
|
||||
|
||||
**Example:**
|
||||
|
||||
```sql
|
||||
CREATE TABLE mt (x Int8) Engine = MergeTree ORDER BY x;
|
||||
CREATE LIVE VIEW lv WITH TIMEOUT 15 AS SELECT sum(x) FROM mt;
|
||||
```
|
||||
|
||||
### With Refresh {#live-view-with-refresh}
|
||||
|
||||
When a live view is created with a `WITH REFRESH` clause then it will be automatically refreshed after the specified number of seconds elapse since the last refresh or trigger.
|
||||
|
||||
```sql
|
||||
CREATE LIVE VIEW [db.]table_name WITH REFRESH [value_in_sec] AS SELECT ...
|
||||
```
|
||||
|
||||
If the refresh value is not specified then the value specified by the `periodic_live_view_refresh` setting is used.
|
||||
|
||||
**Example:**
|
||||
|
||||
```sql
|
||||
CREATE LIVE VIEW lv WITH REFRESH 5 AS SELECT now();
|
||||
WATCH lv
|
||||
```
|
||||
|
||||
```bash
|
||||
┌───────────────now()─┬─_version─┐
|
||||
│ 2021-02-21 08:47:05 │ 1 │
|
||||
└─────────────────────┴──────────┘
|
||||
┌───────────────now()─┬─_version─┐
|
||||
│ 2021-02-21 08:47:10 │ 2 │
|
||||
└─────────────────────┴──────────┘
|
||||
┌───────────────now()─┬─_version─┐
|
||||
│ 2021-02-21 08:47:15 │ 3 │
|
||||
└─────────────────────┴──────────┘
|
||||
```
|
||||
|
||||
You can combine `WITH TIMEOUT` and `WITH REFRESH` clauses using an `AND` clause.
|
||||
|
||||
```sql
|
||||
CREATE LIVE VIEW [db.]table_name WITH TIMEOUT [value_in_sec] AND REFRESH [value_in_sec] AS SELECT ...
|
||||
```
|
||||
|
||||
**Example:**
|
||||
|
||||
```sql
|
||||
CREATE LIVE VIEW lv WITH TIMEOUT 15 AND REFRESH 5 AS SELECT now();
|
||||
```
|
||||
|
||||
After 15 sec the live view will be automatically dropped if there are no active `WATCH` queries.
|
||||
|
||||
```sql
|
||||
WATCH lv
|
||||
```
|
||||
|
||||
```
|
||||
Code: 60. DB::Exception: Received from localhost:9000. DB::Exception: Table default.lv doesn't exist..
|
||||
```
|
||||
|
||||
### Usage
|
||||
|
||||
Most common uses of live view tables include:
|
||||
|
||||
- Providing push notifications for query result changes to avoid polling.
|
||||
- Caching results of most frequent queries to provide immediate query results.
|
||||
- Watching for table changes and triggering a follow-up select queries.
|
||||
- Watching metrics from system tables using periodic refresh.
|
||||
|
||||
### Settings {#live-view-settings}
|
||||
|
||||
You can use the following settings to control the behaviour of live views.
|
||||
|
||||
- `allow_experimental_live_view` - enable live views. Default is `0`.
|
||||
- `live_view_heartbeat_interval` - the heartbeat interval in seconds to indicate live query is alive. Default is `15` seconds.
|
||||
- `max_live_view_insert_blocks_before_refresh` - maximum number of inserted blocks after which
|
||||
mergeable blocks are dropped and query is re-executed. Default is `64` inserts.
|
||||
- `temporary_live_view_timeout` - interval after which live view with timeout is deleted. Default is `5` seconds.
|
||||
- `periodic_live_view_refresh` - interval after which periodically refreshed live view is forced to refresh. Default is `60` seconds.
|
||||
|
||||
[Original article](https://clickhouse.tech/docs/en/sql-reference/statements/create/view/) <!--hide-->
|
||||
|
106
docs/en/sql-reference/statements/watch.md
Normal file
106
docs/en/sql-reference/statements/watch.md
Normal file
@ -0,0 +1,106 @@
|
||||
---
|
||||
toc_priority: 53
|
||||
toc_title: WATCH
|
||||
---
|
||||
|
||||
# WATCH Statement (Experimental) {#watch}
|
||||
|
||||
!!! important "Important"
|
||||
This is an experimental feature that may change in backwards-incompatible ways in the future releases.
|
||||
Enable live views and `WATCH` query using `set allow_experimental_live_view = 1`.
|
||||
|
||||
|
||||
``` sql
|
||||
WATCH [db.]live_view
|
||||
[EVENTS]
|
||||
[LIMIT n]
|
||||
[FORMAT format]
|
||||
```
|
||||
|
||||
The `WATCH` query performs continuous data retrieval from a [live view](./create/view.md#live-view) table. Unless the `LIMIT` clause is specified it provides an infinite stream of query results from a [live view](./create/view.md#live-view).
|
||||
|
||||
```sql
|
||||
WATCH [db.]live_view
|
||||
```
|
||||
|
||||
The virtual `_version` column in the query result indicates the current result version.
|
||||
|
||||
**Example:**
|
||||
|
||||
```sql
|
||||
CREATE LIVE VIEW lv WITH REFRESH 5 AS SELECT now();
|
||||
WATCH lv
|
||||
```
|
||||
|
||||
```bash
|
||||
┌───────────────now()─┬─_version─┐
|
||||
│ 2021-02-21 09:17:21 │ 1 │
|
||||
└─────────────────────┴──────────┘
|
||||
┌───────────────now()─┬─_version─┐
|
||||
│ 2021-02-21 09:17:26 │ 2 │
|
||||
└─────────────────────┴──────────┘
|
||||
┌───────────────now()─┬─_version─┐
|
||||
│ 2021-02-21 09:17:31 │ 3 │
|
||||
└─────────────────────┴──────────┘
|
||||
...
|
||||
```
|
||||
|
||||
By default, the requested data is returned to the client, while in conjunction with [INSERT INTO](../../sql-reference/statements/insert-into.md) it can be forwarded to a different table.
|
||||
|
||||
```sql
|
||||
INSERT INTO [db.]table WATCH [db.]live_view ...
|
||||
```
|
||||
|
||||
## EVENTS Clause {#events-clause}
|
||||
|
||||
The `EVENTS` clause can be used to obtain a short form of the `WATCH` query where instead of the query result you will just get the latest query result version.
|
||||
|
||||
```sql
|
||||
WATCH [db.]live_view EVENTS
|
||||
```
|
||||
|
||||
**Example:**
|
||||
|
||||
```sql
|
||||
CREATE LIVE VIEW lv WITH REFRESH 5 AS SELECT now();
|
||||
WATCH lv EVENTS
|
||||
```
|
||||
|
||||
```bash
|
||||
┌─version─┐
|
||||
│ 1 │
|
||||
└─────────┘
|
||||
┌─version─┐
|
||||
│ 2 │
|
||||
└─────────┘
|
||||
...
|
||||
```
|
||||
|
||||
## LIMIT Clause {#limit-clause}
|
||||
|
||||
The `LIMIT n` clause species the number of updates the `WATCH` query should wait for before terminating. By default there is no limit on the number of updates and therefore the query will not terminate. The value of `0` indicates that the `WATCH` query should not wait for any new query results and therefore will return immediately once query is evaluated.
|
||||
|
||||
```sql
|
||||
WATCH [db.]live_view LIMIT 1
|
||||
```
|
||||
|
||||
**Example:**
|
||||
|
||||
```sql
|
||||
CREATE LIVE VIEW lv WITH REFRESH 5 AS SELECT now();
|
||||
WATCH lv EVENTS LIMIT 1
|
||||
```
|
||||
|
||||
```bash
|
||||
┌─version─┐
|
||||
│ 1 │
|
||||
└─────────┘
|
||||
```
|
||||
|
||||
## FORMAT Clause {#format-clause}
|
||||
|
||||
The `FORMAT` clause works the same way as for the [SELECT](../../sql-reference/statements/select/format.md#format-clause).
|
||||
|
||||
!!! info "Note"
|
||||
The [JSONEachRowWithProgress](../../../interfaces/formats/#jsoneachrowwithprogress) format should be used when watching [live view](./create/view.md#live-view) tables over the HTTP interface. The progress messages will be added to the output to keep the long-lived HTTP connection alive until the query result changes. The interval between progress messages is controlled using the [live_view_heartbeat_interval](./create/view.md#live-view-settings) setting.
|
||||
|
@ -44,7 +44,7 @@ The rest of the conditions and the `LIMIT` sampling constraint are executed in C
|
||||
A table object with the same columns as the original MySQL table.
|
||||
|
||||
!!! info "Note"
|
||||
In the `INSERT` query to distinguish table function `mysql(...)` from table name with column names list you must use keywords `FUNCTION` or `TABLE FUNCTION`. See examples below.
|
||||
In the `INSERT` query to distinguish table function `mysql(...)` from table name with column names list, you must use keywords `FUNCTION` or `TABLE FUNCTION`. See examples below.
|
||||
|
||||
**Examples**
|
||||
|
||||
|
@ -63,7 +63,7 @@ SELECT * FROM file_engine_table
|
||||
|
||||
## Использование движка в Clickhouse-local {#ispolzovanie-dvizhka-v-clickhouse-local}
|
||||
|
||||
В [clickhouse-local](../../../engines/table-engines/special/file.md) движок в качестве параметра принимает не только формат, но и путь к файлу. В том числе можно указать стандартные потоки ввода/вывода цифровым или буквенным обозначением `0` или `stdin`, `1` или `stdout`.
|
||||
В [clickhouse-local](../../../engines/table-engines/special/file.md) движок в качестве параметра принимает не только формат, но и путь к файлу. В том числе можно указать стандартные потоки ввода/вывода цифровым или буквенным обозначением `0` или `stdin`, `1` или `stdout`. Можно записывать и читать сжатые файлы. Для этого нужно задать дополнительный параметр движка или расширение файла (`gz`, `br` или `xz`).
|
||||
|
||||
**Пример:**
|
||||
|
||||
|
416
docs/ru/getting-started/example-datasets/brown-benchmark.md
Normal file
416
docs/ru/getting-started/example-datasets/brown-benchmark.md
Normal file
@ -0,0 +1,416 @@
|
||||
---
|
||||
toc_priority: 20
|
||||
toc_title: Brown University Benchmark
|
||||
---
|
||||
|
||||
# Brown University Benchmark
|
||||
|
||||
`MgBench` — это аналитический тест производительности для данных журнала событий, сгенерированных машиной. Бенчмарк разработан [Andrew Crotty](http://cs.brown.edu/people/acrotty/).
|
||||
|
||||
Скачать данные:
|
||||
```
|
||||
wget https://datasets.clickhouse.tech/mgbench{1..3}.csv.xz
|
||||
```
|
||||
|
||||
Распаковать данные:
|
||||
```
|
||||
xz -v -d mgbench{1..3}.csv.xz
|
||||
```
|
||||
|
||||
Создание таблиц:
|
||||
```
|
||||
CREATE DATABASE mgbench;
|
||||
|
||||
|
||||
CREATE TABLE mgbench.logs1 (
|
||||
log_time DateTime,
|
||||
machine_name LowCardinality(String),
|
||||
machine_group LowCardinality(String),
|
||||
cpu_idle Nullable(Float32),
|
||||
cpu_nice Nullable(Float32),
|
||||
cpu_system Nullable(Float32),
|
||||
cpu_user Nullable(Float32),
|
||||
cpu_wio Nullable(Float32),
|
||||
disk_free Nullable(Float32),
|
||||
disk_total Nullable(Float32),
|
||||
part_max_used Nullable(Float32),
|
||||
load_fifteen Nullable(Float32),
|
||||
load_five Nullable(Float32),
|
||||
load_one Nullable(Float32),
|
||||
mem_buffers Nullable(Float32),
|
||||
mem_cached Nullable(Float32),
|
||||
mem_free Nullable(Float32),
|
||||
mem_shared Nullable(Float32),
|
||||
swap_free Nullable(Float32),
|
||||
bytes_in Nullable(Float32),
|
||||
bytes_out Nullable(Float32)
|
||||
)
|
||||
ENGINE = MergeTree()
|
||||
ORDER BY (machine_group, machine_name, log_time);
|
||||
|
||||
|
||||
CREATE TABLE mgbench.logs2 (
|
||||
log_time DateTime,
|
||||
client_ip IPv4,
|
||||
request String,
|
||||
status_code UInt16,
|
||||
object_size UInt64
|
||||
)
|
||||
ENGINE = MergeTree()
|
||||
ORDER BY log_time;
|
||||
|
||||
|
||||
CREATE TABLE mgbench.logs3 (
|
||||
log_time DateTime64,
|
||||
device_id FixedString(15),
|
||||
device_name LowCardinality(String),
|
||||
device_type LowCardinality(String),
|
||||
device_floor UInt8,
|
||||
event_type LowCardinality(String),
|
||||
event_unit FixedString(1),
|
||||
event_value Nullable(Float32)
|
||||
)
|
||||
ENGINE = MergeTree()
|
||||
ORDER BY (event_type, log_time);
|
||||
```
|
||||
|
||||
Вставка данных:
|
||||
|
||||
```
|
||||
clickhouse-client --query "INSERT INTO mgbench.logs1 FORMAT CSVWithNames" < mgbench1.csv
|
||||
clickhouse-client --query "INSERT INTO mgbench.logs2 FORMAT CSVWithNames" < mgbench2.csv
|
||||
clickhouse-client --query "INSERT INTO mgbench.logs3 FORMAT CSVWithNames" < mgbench3.csv
|
||||
```
|
||||
|
||||
Запуск тестов производительности:
|
||||
```
|
||||
-- Q1.1: What is the CPU/network utilization for each web server since midnight?
|
||||
|
||||
SELECT machine_name,
|
||||
MIN(cpu) AS cpu_min,
|
||||
MAX(cpu) AS cpu_max,
|
||||
AVG(cpu) AS cpu_avg,
|
||||
MIN(net_in) AS net_in_min,
|
||||
MAX(net_in) AS net_in_max,
|
||||
AVG(net_in) AS net_in_avg,
|
||||
MIN(net_out) AS net_out_min,
|
||||
MAX(net_out) AS net_out_max,
|
||||
AVG(net_out) AS net_out_avg
|
||||
FROM (
|
||||
SELECT machine_name,
|
||||
COALESCE(cpu_user, 0.0) AS cpu,
|
||||
COALESCE(bytes_in, 0.0) AS net_in,
|
||||
COALESCE(bytes_out, 0.0) AS net_out
|
||||
FROM logs1
|
||||
WHERE machine_name IN ('anansi','aragog','urd')
|
||||
AND log_time >= TIMESTAMP '2017-01-11 00:00:00'
|
||||
) AS r
|
||||
GROUP BY machine_name;
|
||||
|
||||
|
||||
-- Q1.2: Which computer lab machines have been offline in the past day?
|
||||
|
||||
SELECT machine_name,
|
||||
log_time
|
||||
FROM logs1
|
||||
WHERE (machine_name LIKE 'cslab%' OR
|
||||
machine_name LIKE 'mslab%')
|
||||
AND load_one IS NULL
|
||||
AND log_time >= TIMESTAMP '2017-01-10 00:00:00'
|
||||
ORDER BY machine_name,
|
||||
log_time;
|
||||
|
||||
|
||||
-- Q1.3: What are the hourly average metrics during the past 10 days for a specific workstation?
|
||||
|
||||
SELECT dt,
|
||||
hr,
|
||||
AVG(load_fifteen) AS load_fifteen_avg,
|
||||
AVG(load_five) AS load_five_avg,
|
||||
AVG(load_one) AS load_one_avg,
|
||||
AVG(mem_free) AS mem_free_avg,
|
||||
AVG(swap_free) AS swap_free_avg
|
||||
FROM (
|
||||
SELECT CAST(log_time AS DATE) AS dt,
|
||||
EXTRACT(HOUR FROM log_time) AS hr,
|
||||
load_fifteen,
|
||||
load_five,
|
||||
load_one,
|
||||
mem_free,
|
||||
swap_free
|
||||
FROM logs1
|
||||
WHERE machine_name = 'babbage'
|
||||
AND load_fifteen IS NOT NULL
|
||||
AND load_five IS NOT NULL
|
||||
AND load_one IS NOT NULL
|
||||
AND mem_free IS NOT NULL
|
||||
AND swap_free IS NOT NULL
|
||||
AND log_time >= TIMESTAMP '2017-01-01 00:00:00'
|
||||
) AS r
|
||||
GROUP BY dt,
|
||||
hr
|
||||
ORDER BY dt,
|
||||
hr;
|
||||
|
||||
|
||||
-- Q1.4: Over 1 month, how often was each server blocked on disk I/O?
|
||||
|
||||
SELECT machine_name,
|
||||
COUNT(*) AS spikes
|
||||
FROM logs1
|
||||
WHERE machine_group = 'Servers'
|
||||
AND cpu_wio > 0.99
|
||||
AND log_time >= TIMESTAMP '2016-12-01 00:00:00'
|
||||
AND log_time < TIMESTAMP '2017-01-01 00:00:00'
|
||||
GROUP BY machine_name
|
||||
ORDER BY spikes DESC
|
||||
LIMIT 10;
|
||||
|
||||
|
||||
-- Q1.5: Which externally reachable VMs have run low on memory?
|
||||
|
||||
SELECT machine_name,
|
||||
dt,
|
||||
MIN(mem_free) AS mem_free_min
|
||||
FROM (
|
||||
SELECT machine_name,
|
||||
CAST(log_time AS DATE) AS dt,
|
||||
mem_free
|
||||
FROM logs1
|
||||
WHERE machine_group = 'DMZ'
|
||||
AND mem_free IS NOT NULL
|
||||
) AS r
|
||||
GROUP BY machine_name,
|
||||
dt
|
||||
HAVING MIN(mem_free) < 10000
|
||||
ORDER BY machine_name,
|
||||
dt;
|
||||
|
||||
|
||||
-- Q1.6: What is the total hourly network traffic across all file servers?
|
||||
|
||||
SELECT dt,
|
||||
hr,
|
||||
SUM(net_in) AS net_in_sum,
|
||||
SUM(net_out) AS net_out_sum,
|
||||
SUM(net_in) + SUM(net_out) AS both_sum
|
||||
FROM (
|
||||
SELECT CAST(log_time AS DATE) AS dt,
|
||||
EXTRACT(HOUR FROM log_time) AS hr,
|
||||
COALESCE(bytes_in, 0.0) / 1000000000.0 AS net_in,
|
||||
COALESCE(bytes_out, 0.0) / 1000000000.0 AS net_out
|
||||
FROM logs1
|
||||
WHERE machine_name IN ('allsorts','andes','bigred','blackjack','bonbon',
|
||||
'cadbury','chiclets','cotton','crows','dove','fireball','hearts','huey',
|
||||
'lindt','milkduds','milkyway','mnm','necco','nerds','orbit','peeps',
|
||||
'poprocks','razzles','runts','smarties','smuggler','spree','stride',
|
||||
'tootsie','trident','wrigley','york')
|
||||
) AS r
|
||||
GROUP BY dt,
|
||||
hr
|
||||
ORDER BY both_sum DESC
|
||||
LIMIT 10;
|
||||
|
||||
|
||||
-- Q2.1: Which requests have caused server errors within the past 2 weeks?
|
||||
|
||||
SELECT *
|
||||
FROM logs2
|
||||
WHERE status_code >= 500
|
||||
AND log_time >= TIMESTAMP '2012-12-18 00:00:00'
|
||||
ORDER BY log_time;
|
||||
|
||||
|
||||
-- Q2.2: During a specific 2-week period, was the user password file leaked?
|
||||
|
||||
SELECT *
|
||||
FROM logs2
|
||||
WHERE status_code >= 200
|
||||
AND status_code < 300
|
||||
AND request LIKE '%/etc/passwd%'
|
||||
AND log_time >= TIMESTAMP '2012-05-06 00:00:00'
|
||||
AND log_time < TIMESTAMP '2012-05-20 00:00:00';
|
||||
|
||||
|
||||
-- Q2.3: What was the average path depth for top-level requests in the past month?
|
||||
|
||||
SELECT top_level,
|
||||
AVG(LENGTH(request) - LENGTH(REPLACE(request, '/', ''))) AS depth_avg
|
||||
FROM (
|
||||
SELECT SUBSTRING(request FROM 1 FOR len) AS top_level,
|
||||
request
|
||||
FROM (
|
||||
SELECT POSITION(SUBSTRING(request FROM 2), '/') AS len,
|
||||
request
|
||||
FROM logs2
|
||||
WHERE status_code >= 200
|
||||
AND status_code < 300
|
||||
AND log_time >= TIMESTAMP '2012-12-01 00:00:00'
|
||||
) AS r
|
||||
WHERE len > 0
|
||||
) AS s
|
||||
WHERE top_level IN ('/about','/courses','/degrees','/events',
|
||||
'/grad','/industry','/news','/people',
|
||||
'/publications','/research','/teaching','/ugrad')
|
||||
GROUP BY top_level
|
||||
ORDER BY top_level;
|
||||
|
||||
|
||||
-- Q2.4: During the last 3 months, which clients have made an excessive number of requests?
|
||||
|
||||
SELECT client_ip,
|
||||
COUNT(*) AS num_requests
|
||||
FROM logs2
|
||||
WHERE log_time >= TIMESTAMP '2012-10-01 00:00:00'
|
||||
GROUP BY client_ip
|
||||
HAVING COUNT(*) >= 100000
|
||||
ORDER BY num_requests DESC;
|
||||
|
||||
|
||||
-- Q2.5: What are the daily unique visitors?
|
||||
|
||||
SELECT dt,
|
||||
COUNT(DISTINCT client_ip)
|
||||
FROM (
|
||||
SELECT CAST(log_time AS DATE) AS dt,
|
||||
client_ip
|
||||
FROM logs2
|
||||
) AS r
|
||||
GROUP BY dt
|
||||
ORDER BY dt;
|
||||
|
||||
|
||||
-- Q2.6: What are the average and maximum data transfer rates (Gbps)?
|
||||
|
||||
SELECT AVG(transfer) / 125000000.0 AS transfer_avg,
|
||||
MAX(transfer) / 125000000.0 AS transfer_max
|
||||
FROM (
|
||||
SELECT log_time,
|
||||
SUM(object_size) AS transfer
|
||||
FROM logs2
|
||||
GROUP BY log_time
|
||||
) AS r;
|
||||
|
||||
|
||||
-- Q3.1: Did the indoor temperature reach freezing over the weekend?
|
||||
|
||||
SELECT *
|
||||
FROM logs3
|
||||
WHERE event_type = 'temperature'
|
||||
AND event_value <= 32.0
|
||||
AND log_time >= '2019-11-29 17:00:00.000';
|
||||
|
||||
|
||||
-- Q3.4: Over the past 6 months, how frequently were each door opened?
|
||||
|
||||
SELECT device_name,
|
||||
device_floor,
|
||||
COUNT(*) AS ct
|
||||
FROM logs3
|
||||
WHERE event_type = 'door_open'
|
||||
AND log_time >= '2019-06-01 00:00:00.000'
|
||||
GROUP BY device_name,
|
||||
device_floor
|
||||
ORDER BY ct DESC;
|
||||
|
||||
|
||||
-- Q3.5: Where in the building do large temperature variations occur in winter and summer?
|
||||
|
||||
WITH temperature AS (
|
||||
SELECT dt,
|
||||
device_name,
|
||||
device_type,
|
||||
device_floor
|
||||
FROM (
|
||||
SELECT dt,
|
||||
hr,
|
||||
device_name,
|
||||
device_type,
|
||||
device_floor,
|
||||
AVG(event_value) AS temperature_hourly_avg
|
||||
FROM (
|
||||
SELECT CAST(log_time AS DATE) AS dt,
|
||||
EXTRACT(HOUR FROM log_time) AS hr,
|
||||
device_name,
|
||||
device_type,
|
||||
device_floor,
|
||||
event_value
|
||||
FROM logs3
|
||||
WHERE event_type = 'temperature'
|
||||
) AS r
|
||||
GROUP BY dt,
|
||||
hr,
|
||||
device_name,
|
||||
device_type,
|
||||
device_floor
|
||||
) AS s
|
||||
GROUP BY dt,
|
||||
device_name,
|
||||
device_type,
|
||||
device_floor
|
||||
HAVING MAX(temperature_hourly_avg) - MIN(temperature_hourly_avg) >= 25.0
|
||||
)
|
||||
SELECT DISTINCT device_name,
|
||||
device_type,
|
||||
device_floor,
|
||||
'WINTER'
|
||||
FROM temperature
|
||||
WHERE dt >= DATE '2018-12-01'
|
||||
AND dt < DATE '2019-03-01'
|
||||
UNION
|
||||
SELECT DISTINCT device_name,
|
||||
device_type,
|
||||
device_floor,
|
||||
'SUMMER'
|
||||
FROM temperature
|
||||
WHERE dt >= DATE '2019-06-01'
|
||||
AND dt < DATE '2019-09-01';
|
||||
|
||||
|
||||
-- Q3.6: For each device category, what are the monthly power consumption metrics?
|
||||
|
||||
SELECT yr,
|
||||
mo,
|
||||
SUM(coffee_hourly_avg) AS coffee_monthly_sum,
|
||||
AVG(coffee_hourly_avg) AS coffee_monthly_avg,
|
||||
SUM(printer_hourly_avg) AS printer_monthly_sum,
|
||||
AVG(printer_hourly_avg) AS printer_monthly_avg,
|
||||
SUM(projector_hourly_avg) AS projector_monthly_sum,
|
||||
AVG(projector_hourly_avg) AS projector_monthly_avg,
|
||||
SUM(vending_hourly_avg) AS vending_monthly_sum,
|
||||
AVG(vending_hourly_avg) AS vending_monthly_avg
|
||||
FROM (
|
||||
SELECT dt,
|
||||
yr,
|
||||
mo,
|
||||
hr,
|
||||
AVG(coffee) AS coffee_hourly_avg,
|
||||
AVG(printer) AS printer_hourly_avg,
|
||||
AVG(projector) AS projector_hourly_avg,
|
||||
AVG(vending) AS vending_hourly_avg
|
||||
FROM (
|
||||
SELECT CAST(log_time AS DATE) AS dt,
|
||||
EXTRACT(YEAR FROM log_time) AS yr,
|
||||
EXTRACT(MONTH FROM log_time) AS mo,
|
||||
EXTRACT(HOUR FROM log_time) AS hr,
|
||||
CASE WHEN device_name LIKE 'coffee%' THEN event_value END AS coffee,
|
||||
CASE WHEN device_name LIKE 'printer%' THEN event_value END AS printer,
|
||||
CASE WHEN device_name LIKE 'projector%' THEN event_value END AS projector,
|
||||
CASE WHEN device_name LIKE 'vending%' THEN event_value END AS vending
|
||||
FROM logs3
|
||||
WHERE device_type = 'meter'
|
||||
) AS r
|
||||
GROUP BY dt,
|
||||
yr,
|
||||
mo,
|
||||
hr
|
||||
) AS s
|
||||
GROUP BY yr,
|
||||
mo
|
||||
ORDER BY yr,
|
||||
mo;
|
||||
```
|
||||
|
||||
Данные также доступны для работы с интерактивными запросами через [Playground](https://gh-api.clickhouse.tech/play?user=play), [пример](https://gh-api.clickhouse.tech/play?user=play#U0VMRUNUIG1hY2hpbmVfbmFtZSwKICAgICAgIE1JTihjcHUpIEFTIGNwdV9taW4sCiAgICAgICBNQVgoY3B1KSBBUyBjcHVfbWF4LAogICAgICAgQVZHKGNwdSkgQVMgY3B1X2F2ZywKICAgICAgIE1JTihuZXRfaW4pIEFTIG5ldF9pbl9taW4sCiAgICAgICBNQVgobmV0X2luKSBBUyBuZXRfaW5fbWF4LAogICAgICAgQVZHKG5ldF9pbikgQVMgbmV0X2luX2F2ZywKICAgICAgIE1JTihuZXRfb3V0KSBBUyBuZXRfb3V0X21pbiwKICAgICAgIE1BWChuZXRfb3V0KSBBUyBuZXRfb3V0X21heCwKICAgICAgIEFWRyhuZXRfb3V0KSBBUyBuZXRfb3V0X2F2ZwpGUk9NICgKICBTRUxFQ1QgbWFjaGluZV9uYW1lLAogICAgICAgICBDT0FMRVNDRShjcHVfdXNlciwgMC4wKSBBUyBjcHUsCiAgICAgICAgIENPQUxFU0NFKGJ5dGVzX2luLCAwLjApIEFTIG5ldF9pbiwKICAgICAgICAgQ09BTEVTQ0UoYnl0ZXNfb3V0LCAwLjApIEFTIG5ldF9vdXQKICBGUk9NIG1nYmVuY2gubG9nczEKICBXSEVSRSBtYWNoaW5lX25hbWUgSU4gKCdhbmFuc2knLCdhcmFnb2cnLCd1cmQnKQogICAgQU5EIGxvZ190aW1lID49IFRJTUVTVEFNUCAnMjAxNy0wMS0xMSAwMDowMDowMCcKKSBBUyByCkdST1VQIEJZIG1hY2hpbmVfbmFtZQ==).
|
||||
|
||||
[Оригинальная статья](https://clickhouse.tech/docs/ru/getting_started/example_datasets/brown-benchmark/) <!--hide-->
|
@ -149,28 +149,48 @@ $ echo 'DROP TABLE t' | curl 'http://localhost:8123/' --data-binary @-
|
||||
|
||||
Для запросов, которые не возвращают таблицу с данными, в случае успеха, выдаётся пустое тело ответа.
|
||||
|
||||
Вы можете использовать внутренний формат сжатия Clickhouse при передаче данных. Формат сжатых данных нестандартный, и вам придётся использовать для работы с ним специальную программу `clickhouse-compressor` (устанавливается вместе с пакетом `clickhouse-client`). Для повышения эффективности вставки данных можно отключить проверку контрольной суммы на стороне сервера с помощью настройки[http_native_compression_disable_checksumming_on_decompress](../operations/settings/settings.md#settings-http_native_compression_disable_checksumming_on_decompress).
|
||||
|
||||
Если вы указали `compress = 1` в URL, то сервер сжимает данные, которые он отправляет.
|
||||
Если вы указали `decompress = 1` в URL, сервер распаковывает те данные, которые вы передаёте методом `POST`.
|
||||
## Сжатие {#compression}
|
||||
|
||||
Также, можно использовать [HTTP compression](https://en.wikipedia.org/wiki/HTTP_compression). Для отправки сжатого запроса `POST`, добавьте заголовок `Content-Encoding: compression_method`. Чтобы ClickHouse сжимал ответ, добавьте заголовок `Accept-Encoding: compression_method`. ClickHouse поддерживает следующие [методы сжатия](https://en.wikipedia.org/wiki/HTTP_compression#Content-Encoding_tokens): `gzip`, `br`, and `deflate`. Чтобы включить HTTP compression, используйте настройку ClickHouse [enable_http_compression](../operations/settings/settings.md#settings-enable_http_compression). Уровень сжатия данных для всех методов сжатия можно настроить с помощью настройки [http_zlib_compression_level](#settings-http_zlib_compression_level).
|
||||
Сжатие можно использовать для уменьшения трафика по сети при передаче большого количества данных, а также для создания сразу сжатых дампов.
|
||||
|
||||
Это может быть использовано для уменьшения трафика по сети при передаче большого количества данных, а также для создания сразу сжатых дампов.
|
||||
Вы можете использовать внутренний формат сжатия Clickhouse при передаче данных. Формат сжатых данных нестандартный, и вам придётся использовать для работы с ним специальную программу `clickhouse-compressor`. Она устанавливается вместе с пакетом `clickhouse-client`. Для повышения эффективности вставки данных можно отключить проверку контрольной суммы на стороне сервера с помощью настройки [http_native_compression_disable_checksumming_on_decompress](../operations/settings/settings.md#settings-http_native_compression_disable_checksumming_on_decompress).
|
||||
|
||||
Примеры отправки данных со сжатием:
|
||||
Если вы указали `compress=1` в URL, то сервер сжимает данные, которые он отправляет. Если вы указали `decompress=1` в URL, сервер распаковывает те данные, которые вы передаёте методом `POST`.
|
||||
|
||||
``` bash
|
||||
$ #Отправка данных на сервер:
|
||||
$ curl -vsS "http://localhost:8123/?enable_http_compression=1" -d 'SELECT number FROM system.numbers LIMIT 10' -H 'Accept-Encoding: gzip'
|
||||
Также можно использовать [сжатие HTTP](https://en.wikipedia.org/wiki/HTTP_compression). ClickHouse поддерживает следующие [методы сжатия](https://en.wikipedia.org/wiki/HTTP_compression#Content-Encoding_tokens):
|
||||
|
||||
$ #Отправка данных клиенту:
|
||||
$ echo "SELECT 1" | gzip -c | curl -sS --data-binary @- -H 'Content-Encoding: gzip' 'http://localhost:8123/'
|
||||
```
|
||||
- `gzip`
|
||||
- `br`
|
||||
- `deflate`
|
||||
- `xz`
|
||||
|
||||
Для отправки сжатого запроса `POST`, добавьте заголовок `Content-Encoding: compression_method`.
|
||||
Чтобы ClickHouse сжимал ответ, разрешите сжатие настройкой [enable_http_compression](../operations/settings/settings.md#settings-enable_http_compression) и добавьте заголовок `Accept-Encoding: compression_method`. Уровень сжатия данных для всех методов сжатия можно задать с помощью настройки [http_zlib_compression_level](../operations/settings/settings.md#settings-http_zlib_compression_level).
|
||||
|
||||
!!! note "Примечание"
|
||||
Некоторые HTTP-клиенты могут по умолчанию распаковывать данные (`gzip` и `deflate`) с сервера в фоновом режиме и вы можете получить распакованные данные, даже если правильно используете настройки сжатия.
|
||||
|
||||
**Примеры**
|
||||
|
||||
``` bash
|
||||
# Отправка сжатых данных на сервер
|
||||
$ echo "SELECT 1" | gzip -c | \
|
||||
curl -sS --data-binary @- -H 'Content-Encoding: gzip' 'http://localhost:8123/'
|
||||
```
|
||||
|
||||
``` bash
|
||||
# Получение сжатых данных с сервера
|
||||
$ curl -vsS "http://localhost:8123/?enable_http_compression=1" \
|
||||
-H 'Accept-Encoding: gzip' --output result.gz -d 'SELECT number FROM system.numbers LIMIT 3'
|
||||
$ zcat result.gz
|
||||
0
|
||||
1
|
||||
2
|
||||
```
|
||||
|
||||
## База данных по умолчанию {#default-database}
|
||||
|
||||
Вы можете использовать параметр URL `database` или заголовок `X-ClickHouse-Database`, чтобы указать БД по умолчанию.
|
||||
|
||||
``` bash
|
||||
|
@ -31,6 +31,7 @@ mannWhitneyUTest[(alternative[, continuity_correction])](sample_data, sample_ind
|
||||
**Возвращаемые значения**
|
||||
|
||||
[Кортеж](../../../sql-reference/data-types/tuple.md) с двумя элементами:
|
||||
|
||||
- вычисленное значение критерия Манна — Уитни. [Float64](../../../sql-reference/data-types/float.md).
|
||||
- вычисленное p-значение. [Float64](../../../sql-reference/data-types/float.md).
|
||||
|
||||
|
@ -24,6 +24,7 @@ studentTTest(sample_data, sample_index)
|
||||
**Возвращаемые значения**
|
||||
|
||||
[Кортеж](../../../sql-reference/data-types/tuple.md) с двумя элементами:
|
||||
|
||||
- вычисленное значение критерия Стьюдента. [Float64](../../../sql-reference/data-types/float.md).
|
||||
- вычисленное p-значение. [Float64](../../../sql-reference/data-types/float.md).
|
||||
|
||||
|
@ -24,6 +24,7 @@ welchTTest(sample_data, sample_index)
|
||||
**Возвращаемые значения**
|
||||
|
||||
[Кортеж](../../../sql-reference/data-types/tuple.md) с двумя элементами:
|
||||
|
||||
- вычисленное значение критерия Уэлча. [Float64](../../../sql-reference/data-types/float.md).
|
||||
- вычисленное p-значение. [Float64](../../../sql-reference/data-types/float.md).
|
||||
|
||||
|
@ -23,7 +23,7 @@ LowCardinality(data_type)
|
||||
|
||||
Эффективность использования типа данных `LowCarditality` зависит от разнообразия данных. Если словарь содержит менее 10 000 различных значений, ClickHouse в основном показывает более высокую эффективность чтения и хранения данных. Если же словарь содержит более 100 000 различных значений, ClickHouse может работать хуже, чем при использовании обычных типов данных.
|
||||
|
||||
При работе со строками, использование `LowCardinality` вместо [Enum](enum.md). `LowCardinality` обеспечивает большую гибкость в использовании и часто показывает такую же или более высокую эффективность.
|
||||
При работе со строками, использование `LowCardinality` вместо [Enum](enum.md) обеспечивает большую гибкость в использовании и часто показывает такую же или более высокую эффективность.
|
||||
|
||||
## Пример
|
||||
|
||||
|
@ -1355,6 +1355,52 @@ SELECT arrayAvg(x -> (x * x), [2, 4]) AS res;
|
||||
└─────┘
|
||||
```
|
||||
|
||||
**Синтаксис**
|
||||
|
||||
``` sql
|
||||
arraySum(arr)
|
||||
```
|
||||
|
||||
**Возвращаемое значение**
|
||||
|
||||
- Число.
|
||||
|
||||
Тип: [Int](../../sql-reference/data-types/int-uint.md) или [Float](../../sql-reference/data-types/float.md).
|
||||
|
||||
**Параметры**
|
||||
|
||||
- `arr` — [Массив](../../sql-reference/data-types/array.md).
|
||||
|
||||
**Примеры**
|
||||
|
||||
Запрос:
|
||||
|
||||
```sql
|
||||
SELECT arraySum([2,3]) AS res;
|
||||
```
|
||||
|
||||
Результат:
|
||||
|
||||
``` text
|
||||
┌─res─┐
|
||||
│ 5 │
|
||||
└─────┘
|
||||
```
|
||||
|
||||
Запрос:
|
||||
|
||||
``` sql
|
||||
SELECT arraySum(x -> x*x, [2, 3]) AS res;
|
||||
```
|
||||
|
||||
Результат:
|
||||
|
||||
``` text
|
||||
┌─res─┐
|
||||
│ 13 │
|
||||
└─────┘
|
||||
```
|
||||
|
||||
## arrayCumSum(\[func,\] arr1, …) {#arraycumsumfunc-arr1}
|
||||
|
||||
Возвращает массив из частичных сумм элементов исходного массива (сумма с накоплением). Если указана функция `func`, то значения элементов массива преобразуются этой функцией перед суммированием.
|
||||
|
@ -63,40 +63,58 @@ int32samoa: 1546300800
|
||||
|
||||
Переводит дату или дату-с-временем в число типа UInt16, содержащее номер года (AD).
|
||||
|
||||
Синоним: `YEAR`.
|
||||
|
||||
## toQuarter {#toquarter}
|
||||
|
||||
Переводит дату или дату-с-временем в число типа UInt8, содержащее номер квартала.
|
||||
|
||||
Синоним: `QUARTER`.
|
||||
|
||||
## toMonth {#tomonth}
|
||||
|
||||
Переводит дату или дату-с-временем в число типа UInt8, содержащее номер месяца (1-12).
|
||||
|
||||
Синоним: `MONTH`.
|
||||
|
||||
## toDayOfYear {#todayofyear}
|
||||
|
||||
Переводит дату или дату-с-временем в число типа UInt16, содержащее номер дня года (1-366).
|
||||
|
||||
Синоним: `DAYOFYEAR`.
|
||||
|
||||
## toDayOfMonth {#todayofmonth}
|
||||
|
||||
Переводит дату или дату-с-временем в число типа UInt8, содержащее номер дня в месяце (1-31).
|
||||
|
||||
Синонимы: `DAYOFMONTH`, `DAY`.
|
||||
|
||||
## toDayOfWeek {#todayofweek}
|
||||
|
||||
Переводит дату или дату-с-временем в число типа UInt8, содержащее номер дня в неделе (понедельник - 1, воскресенье - 7).
|
||||
|
||||
Синоним: `DAYOFWEEK`.
|
||||
|
||||
## toHour {#tohour}
|
||||
|
||||
Переводит дату-с-временем в число типа UInt8, содержащее номер часа в сутках (0-23).
|
||||
Функция исходит из допущения, что перевод стрелок вперёд, если осуществляется, то на час, в два часа ночи, а перевод стрелок назад, если осуществляется, то на час, в три часа ночи (что, в общем, не верно - даже в Москве два раза перевод стрелок был осуществлён в другое время).
|
||||
|
||||
Синоним: `HOUR`.
|
||||
|
||||
## toMinute {#tominute}
|
||||
|
||||
Переводит дату-с-временем в число типа UInt8, содержащее номер минуты в часе (0-59).
|
||||
|
||||
Синоним: `MINUTE`.
|
||||
|
||||
## toSecond {#tosecond}
|
||||
|
||||
Переводит дату-с-временем в число типа UInt8, содержащее номер секунды в минуте (0-59).
|
||||
Секунды координации не учитываются.
|
||||
|
||||
Синоним: `SECOND`.
|
||||
|
||||
## toUnixTimestamp {#to-unix-timestamp}
|
||||
|
||||
Переводит дату-с-временем в число типа UInt32 -- Unix Timestamp (https://en.wikipedia.org/wiki/Unix_time).
|
||||
|
@ -75,6 +75,8 @@ SELECT char(0xE4, 0xBD, 0xA0, 0xE5, 0xA5, 0xBD) AS hello;
|
||||
|
||||
Returns a string containing the argument’s hexadecimal representation.
|
||||
|
||||
Синоним: `HEX`.
|
||||
|
||||
**Syntax**
|
||||
|
||||
``` sql
|
||||
|
@ -13,6 +13,8 @@ toc_title: "\u0424\u0443\u043d\u043a\u0446\u0438\u0438\u0020\u0434\u043b\u044f\u
|
||||
isNull(x)
|
||||
```
|
||||
|
||||
Синоним: `ISNULL`.
|
||||
|
||||
**Параметры**
|
||||
|
||||
- `x` — значение с не составным типом данных.
|
||||
|
@ -9,10 +9,14 @@ toc_title: "\u0424\u0443\u043d\u043a\u0446\u0438\u0438\u0020\u0434\u043b\u044f\u
|
||||
|
||||
Принимает число типа UInt32. Интерпретирует его, как IPv4-адрес в big endian. Возвращает строку, содержащую соответствующий IPv4-адрес в формате A.B.C.D (числа в десятичной форме через точки).
|
||||
|
||||
Синоним: `INET_NTOA`.
|
||||
|
||||
## IPv4StringToNum(s) {#ipv4stringtonums}
|
||||
|
||||
Функция, обратная к IPv4NumToString. Если IPv4 адрес в неправильном формате, то возвращает 0.
|
||||
|
||||
Синоним: `INET_ATON`.
|
||||
|
||||
## IPv4NumToStringClassC(num) {#ipv4numtostringclasscnum}
|
||||
|
||||
Похоже на IPv4NumToString, но вместо последнего октета используется xxx.
|
||||
@ -49,7 +53,11 @@ LIMIT 10
|
||||
### IPv6NumToString(x) {#ipv6numtostringx}
|
||||
|
||||
Принимает значение типа FixedString(16), содержащее IPv6-адрес в бинарном виде. Возвращает строку, содержащую этот адрес в текстовом виде.
|
||||
IPv6-mapped IPv4 адреса выводится в формате ::ffff:111.222.33.44. Примеры:
|
||||
IPv6-mapped IPv4 адреса выводится в формате ::ffff:111.222.33.44.
|
||||
|
||||
Примеры: `INET6_NTOA`.
|
||||
|
||||
Примеры:
|
||||
|
||||
``` sql
|
||||
SELECT IPv6NumToString(toFixedString(unhex('2A0206B8000000000000000000000011'), 16)) AS addr
|
||||
@ -118,6 +126,8 @@ LIMIT 10
|
||||
Функция, обратная к IPv6NumToString. Если IPv6 адрес в неправильном формате, то возвращает строку из нулевых байт.
|
||||
HEX может быть в любом регистре.
|
||||
|
||||
Alias: `INET6_ATON`.
|
||||
|
||||
## IPv4ToIPv6(x) {#ipv4toipv6x}
|
||||
|
||||
Принимает число типа `UInt32`. Интерпретирует его, как IPv4-адрес в [big endian](https://en.wikipedia.org/wiki/Endianness). Возвращает значение `FixedString(16)`, содержащее адрес IPv6 в двоичном формате. Примеры:
|
||||
|
@ -95,6 +95,8 @@ SELECT toValidUTF8('\x61\xF0\x80\x80\x80b')
|
||||
|
||||
Повторяет строку определенное количество раз и объединяет повторяемые значения в одну строку.
|
||||
|
||||
Синоним: `REPEAT`.
|
||||
|
||||
**Синтаксис**
|
||||
|
||||
``` sql
|
||||
@ -273,10 +275,14 @@ SELECT concat(key1, key2), sum(value) FROM key_val GROUP BY (key1, key2)
|
||||
|
||||
Производит кодирование строки s в base64-представление.
|
||||
|
||||
Синоним: `TO_BASE64`.
|
||||
|
||||
## base64Decode(s) {#base64decode}
|
||||
|
||||
Декодирует base64-представление s в исходную строку. При невозможности декодирования выбрасывает исключение
|
||||
|
||||
Синоним: `FROM_BASE64`.
|
||||
|
||||
## tryBase64Decode(s) {#trybase64decode}
|
||||
|
||||
Функционал аналогичен base64Decode, но при невозможности декодирования возвращает пустую строку.
|
||||
@ -597,4 +603,46 @@ Hello, "world"!
|
||||
'foo'
|
||||
```
|
||||
|
||||
|
||||
## decodeXMLComponent {#decode-xml-component}
|
||||
|
||||
Заменяет символами предопределенные мнемоники XML: `"` `&` `'` `>` `<`
|
||||
Также эта функция заменяет числовые ссылки соответствующими символами юникод. Поддерживаются десятичная (например, `✓`) и шестнадцатеричная (`✓`) формы.
|
||||
|
||||
**Синтаксис**
|
||||
|
||||
``` sql
|
||||
decodeXMLComponent(x)
|
||||
```
|
||||
|
||||
**Параметры**
|
||||
|
||||
- `x` — последовательность символов. [String](../../sql-reference/data-types/string.md).
|
||||
|
||||
**Возвращаемое значение**
|
||||
|
||||
- Строка с произведенными заменами.
|
||||
|
||||
Тип: [String](../../sql-reference/data-types/string.md).
|
||||
|
||||
**Пример**
|
||||
|
||||
Запрос:
|
||||
|
||||
``` sql
|
||||
SELECT decodeXMLComponent(''foo'');
|
||||
SELECT decodeXMLComponent('< Σ >');
|
||||
```
|
||||
|
||||
Результат:
|
||||
|
||||
``` text
|
||||
'foo'
|
||||
< Σ >
|
||||
```
|
||||
|
||||
**Смотрите также**
|
||||
|
||||
- [Мнемоники в HTML](https://ru.wikipedia.org/wiki/%D0%9C%D0%BD%D0%B5%D0%BC%D0%BE%D0%BD%D0%B8%D0%BA%D0%B8_%D0%B2_HTML)
|
||||
|
||||
[Оригинальная статья](https://clickhouse.tech/docs/ru/query_language/functions/string_functions/) <!--hide-->
|
||||
|
@ -36,10 +36,14 @@ toc_title: "\u0424\u0443\u043d\u043a\u0446\u0438\u0438\u0020\u043f\u0440\u0435\u
|
||||
|
||||
**Пример**
|
||||
|
||||
Запрос:
|
||||
|
||||
``` sql
|
||||
SELECT toInt64(nan), toInt32(32), toInt16('16'), toInt8(8.8)
|
||||
SELECT toInt64(nan), toInt32(32), toInt16('16'), toInt8(8.8);
|
||||
```
|
||||
|
||||
Результат:
|
||||
|
||||
``` text
|
||||
┌─────────toInt64(nan)─┬─toInt32(32)─┬─toInt16('16')─┬─toInt8(8.8)─┐
|
||||
│ -9223372036854775808 │ 32 │ 16 │ 8 │
|
||||
@ -52,10 +56,14 @@ SELECT toInt64(nan), toInt32(32), toInt16('16'), toInt8(8.8)
|
||||
|
||||
**Пример**
|
||||
|
||||
Запрос:
|
||||
|
||||
``` sql
|
||||
select toInt64OrZero('123123'), toInt8OrZero('123qwe123')
|
||||
SELECT toInt64OrZero('123123'), toInt8OrZero('123qwe123');
|
||||
```
|
||||
|
||||
Результат:
|
||||
|
||||
``` text
|
||||
┌─toInt64OrZero('123123')─┬─toInt8OrZero('123qwe123')─┐
|
||||
│ 123123 │ 0 │
|
||||
@ -68,10 +76,14 @@ select toInt64OrZero('123123'), toInt8OrZero('123qwe123')
|
||||
|
||||
**Пример**
|
||||
|
||||
Запрос:
|
||||
|
||||
``` sql
|
||||
select toInt64OrNull('123123'), toInt8OrNull('123qwe123')
|
||||
SELECT toInt64OrNull('123123'), toInt8OrNull('123qwe123');
|
||||
```
|
||||
|
||||
Результат:
|
||||
|
||||
``` text
|
||||
┌─toInt64OrNull('123123')─┬─toInt8OrNull('123qwe123')─┐
|
||||
│ 123123 │ ᴺᵁᴸᴸ │
|
||||
@ -102,10 +114,14 @@ select toInt64OrNull('123123'), toInt8OrNull('123qwe123')
|
||||
|
||||
**Пример**
|
||||
|
||||
Запрос:
|
||||
|
||||
``` sql
|
||||
SELECT toUInt64(nan), toUInt32(-32), toUInt16('16'), toUInt8(8.8)
|
||||
SELECT toUInt64(nan), toUInt32(-32), toUInt16('16'), toUInt8(8.8);
|
||||
```
|
||||
|
||||
Результат:
|
||||
|
||||
``` text
|
||||
┌───────toUInt64(nan)─┬─toUInt32(-32)─┬─toUInt16('16')─┬─toUInt8(8.8)─┐
|
||||
│ 9223372036854775808 │ 4294967264 │ 16 │ 8 │
|
||||
@ -124,6 +140,8 @@ SELECT toUInt64(nan), toUInt32(-32), toUInt16('16'), toUInt8(8.8)
|
||||
|
||||
## toDate {#todate}
|
||||
|
||||
Cиноним: `DATE`.
|
||||
|
||||
## toDateOrZero {#todateorzero}
|
||||
|
||||
## toDateOrNull {#todateornull}
|
||||
@ -168,20 +186,28 @@ SELECT toUInt64(nan), toUInt32(-32), toUInt16('16'), toUInt8(8.8)
|
||||
|
||||
**Примеры**
|
||||
|
||||
Запрос:
|
||||
|
||||
``` sql
|
||||
SELECT toDecimal32OrNull(toString(-1.111), 5) AS val, toTypeName(val)
|
||||
SELECT toDecimal32OrNull(toString(-1.111), 5) AS val, toTypeName(val);
|
||||
```
|
||||
|
||||
Результат:
|
||||
|
||||
``` text
|
||||
┌──────val─┬─toTypeName(toDecimal32OrNull(toString(-1.111), 5))─┐
|
||||
│ -1.11100 │ Nullable(Decimal(9, 5)) │
|
||||
└──────────┴────────────────────────────────────────────────────┘
|
||||
```
|
||||
|
||||
Запрос:
|
||||
|
||||
``` sql
|
||||
SELECT toDecimal32OrNull(toString(-1.111), 2) AS val, toTypeName(val)
|
||||
SELECT toDecimal32OrNull(toString(-1.111), 2) AS val, toTypeName(val);
|
||||
```
|
||||
|
||||
Результат:
|
||||
|
||||
``` text
|
||||
┌──val─┬─toTypeName(toDecimal32OrNull(toString(-1.111), 2))─┐
|
||||
│ ᴺᵁᴸᴸ │ Nullable(Decimal(9, 2)) │
|
||||
@ -213,20 +239,28 @@ SELECT toDecimal32OrNull(toString(-1.111), 2) AS val, toTypeName(val)
|
||||
|
||||
**Пример**
|
||||
|
||||
Запрос:
|
||||
|
||||
``` sql
|
||||
SELECT toDecimal32OrZero(toString(-1.111), 5) AS val, toTypeName(val)
|
||||
SELECT toDecimal32OrZero(toString(-1.111), 5) AS val, toTypeName(val);
|
||||
```
|
||||
|
||||
Результат:
|
||||
|
||||
``` text
|
||||
┌──────val─┬─toTypeName(toDecimal32OrZero(toString(-1.111), 5))─┐
|
||||
│ -1.11100 │ Decimal(9, 5) │
|
||||
└──────────┴────────────────────────────────────────────────────┘
|
||||
```
|
||||
|
||||
Запрос:
|
||||
|
||||
``` sql
|
||||
SELECT toDecimal32OrZero(toString(-1.111), 2) AS val, toTypeName(val)
|
||||
SELECT toDecimal32OrZero(toString(-1.111), 2) AS val, toTypeName(val);
|
||||
```
|
||||
|
||||
Результат:
|
||||
|
||||
``` text
|
||||
┌──val─┬─toTypeName(toDecimal32OrZero(toString(-1.111), 2))─┐
|
||||
│ 0.00 │ Decimal(9, 2) │
|
||||
@ -258,12 +292,18 @@ YYYY-MM-DD hh:mm:ss
|
||||
|
||||
Дополнительно, функция toString от аргумента типа DateTime может принимать второй аргумент String - имя тайм-зоны. Пример: `Asia/Yekaterinburg` В этом случае, форматирование времени производится согласно указанной тайм-зоне.
|
||||
|
||||
**Пример**
|
||||
|
||||
Запрос:
|
||||
|
||||
``` sql
|
||||
SELECT
|
||||
now() AS now_local,
|
||||
toString(now(), 'Asia/Yekaterinburg') AS now_yekat
|
||||
toString(now(), 'Asia/Yekaterinburg') AS now_yekat;
|
||||
```
|
||||
|
||||
Результат:
|
||||
|
||||
``` text
|
||||
┌───────────now_local─┬─now_yekat───────────┐
|
||||
│ 2016-06-15 00:11:21 │ 2016-06-15 02:11:21 │
|
||||
@ -281,22 +321,30 @@ SELECT
|
||||
|
||||
Принимает аргумент типа String или FixedString. Возвращает String, вырезая содержимое строки до первого найденного нулевого байта.
|
||||
|
||||
Пример:
|
||||
**Примеры**
|
||||
|
||||
Запрос:
|
||||
|
||||
``` sql
|
||||
SELECT toFixedString('foo', 8) AS s, toStringCutToZero(s) AS s_cut
|
||||
SELECT toFixedString('foo', 8) AS s, toStringCutToZero(s) AS s_cut;
|
||||
```
|
||||
|
||||
Результат:
|
||||
|
||||
``` text
|
||||
┌─s─────────────┬─s_cut─┐
|
||||
│ foo\0\0\0\0\0 │ foo │
|
||||
└───────────────┴───────┘
|
||||
```
|
||||
|
||||
Запрос:
|
||||
|
||||
``` sql
|
||||
SELECT toFixedString('foo\0bar', 8) AS s, toStringCutToZero(s) AS s_cut
|
||||
SELECT toFixedString('foo\0bar', 8) AS s, toStringCutToZero(s) AS s_cut;
|
||||
```
|
||||
|
||||
Результат:
|
||||
|
||||
``` text
|
||||
┌─s──────────┬─s_cut─┐
|
||||
│ foo\0bar\0 │ foo │
|
||||
@ -344,7 +392,7 @@ reinterpretAsUUID(fixed_string)
|
||||
Запрос:
|
||||
|
||||
``` sql
|
||||
SELECT reinterpretAsUUID(reverse(unhex('000102030405060708090a0b0c0d0e0f')))
|
||||
SELECT reinterpretAsUUID(reverse(unhex('000102030405060708090a0b0c0d0e0f')));
|
||||
```
|
||||
|
||||
Результат:
|
||||
@ -377,10 +425,15 @@ SELECT uuid = uuid2;
|
||||
|
||||
## CAST(x, T) {#type_conversion_function-cast}
|
||||
|
||||
Преобразует x в тип данных t.
|
||||
Поддерживается также синтаксис CAST(x AS t).
|
||||
Преобразует входное значение `x` в указанный тип данных `T`.
|
||||
|
||||
Пример:
|
||||
Поддерживается также синтаксис `CAST(x AS t)`.
|
||||
|
||||
Обратите внимание, что если значение `x` не может быть преобразовано к типу `T`, возникает переполнение. Например, `CAST(-1, 'UInt8')` возвращает 255.
|
||||
|
||||
**Пример**
|
||||
|
||||
Запрос:
|
||||
|
||||
``` sql
|
||||
SELECT
|
||||
@ -388,9 +441,11 @@ SELECT
|
||||
CAST(timestamp AS DateTime) AS datetime,
|
||||
CAST(timestamp AS Date) AS date,
|
||||
CAST(timestamp, 'String') AS string,
|
||||
CAST(timestamp, 'FixedString(22)') AS fixed_string
|
||||
CAST(timestamp, 'FixedString(22)') AS fixed_string;
|
||||
```
|
||||
|
||||
Результат:
|
||||
|
||||
``` text
|
||||
┌─timestamp───────────┬────────────datetime─┬───────date─┬─string──────────────┬─fixed_string──────────────┐
|
||||
│ 2016-06-15 23:00:00 │ 2016-06-15 23:00:00 │ 2016-06-15 │ 2016-06-15 23:00:00 │ 2016-06-15 23:00:00\0\0\0 │
|
||||
@ -399,12 +454,18 @@ SELECT
|
||||
|
||||
Преобразование в FixedString(N) работает только для аргументов типа String или FixedString(N).
|
||||
|
||||
Поддержано преобразование к типу [Nullable](../../sql-reference/functions/type-conversion-functions.md) и обратно. Пример:
|
||||
Поддержано преобразование к типу [Nullable](../../sql-reference/functions/type-conversion-functions.md) и обратно.
|
||||
|
||||
**Примеры**
|
||||
|
||||
Запрос:
|
||||
|
||||
``` sql
|
||||
SELECT toTypeName(x) FROM t_null
|
||||
SELECT toTypeName(x) FROM t_null;
|
||||
```
|
||||
|
||||
Результат:
|
||||
|
||||
``` text
|
||||
┌─toTypeName(x)─┐
|
||||
│ Int8 │
|
||||
@ -412,10 +473,14 @@ SELECT toTypeName(x) FROM t_null
|
||||
└───────────────┘
|
||||
```
|
||||
|
||||
Запрос:
|
||||
|
||||
``` sql
|
||||
SELECT toTypeName(CAST(x, 'Nullable(UInt16)')) FROM t_null
|
||||
SELECT toTypeName(CAST(x, 'Nullable(UInt16)')) FROM t_null;
|
||||
```
|
||||
|
||||
Результат:
|
||||
|
||||
``` text
|
||||
┌─toTypeName(CAST(x, 'Nullable(UInt16)'))─┐
|
||||
│ Nullable(UInt16) │
|
||||
@ -427,6 +492,93 @@ SELECT toTypeName(CAST(x, 'Nullable(UInt16)')) FROM t_null
|
||||
|
||||
- Настройка [cast_keep_nullable](../../operations/settings/settings.md#cast_keep_nullable)
|
||||
|
||||
## accurateCast(x, T) {#type_conversion_function-accurate-cast}
|
||||
|
||||
Преобразует входное значение `x` в указанный тип данных `T`.
|
||||
|
||||
В отличие от функции [cast(x, T)](#type_conversion_function-cast), `accurateCast` не допускает переполнения при преобразовании числовых типов. Например, `accurateCast(-1, 'UInt8')` вызовет исключение.
|
||||
|
||||
**Примеры**
|
||||
|
||||
Запрос:
|
||||
|
||||
``` sql
|
||||
SELECT cast(-1, 'UInt8') as uint8;
|
||||
```
|
||||
|
||||
Результат:
|
||||
|
||||
``` text
|
||||
┌─uint8─┐
|
||||
│ 255 │
|
||||
└─────
|
||||
|
||||
Запрос:
|
||||
|
||||
```sql
|
||||
SELECT accurateCast(-1, 'UInt8') as uint8;
|
||||
```
|
||||
|
||||
Результат:
|
||||
|
||||
``` text
|
||||
Code: 70. DB::Exception: Received from localhost:9000. DB::Exception: Value in column Int8 cannot be safely converted into type UInt8: While processing accurateCast(-1, 'UInt8') AS uint8.
|
||||
```
|
||||
|
||||
## accurateCastOrNull(x, T) {#type_conversion_function-accurate-cast_or_null}
|
||||
|
||||
Преобразует входное значение `x` в указанный тип данных `T`.
|
||||
|
||||
Всегда возвращает тип [Nullable](../../sql-reference/data-types/nullable.md). Если исходное значение не может быть преобразовано к целевому типу, возвращает [NULL](../../sql-reference/syntax.md#null-literal).
|
||||
|
||||
**Синтаксис**
|
||||
|
||||
```sql
|
||||
accurateCastOrNull(x, T)
|
||||
```
|
||||
|
||||
**Параметры**
|
||||
|
||||
- `x` — входное значение.
|
||||
- `T` — имя возвращаемого типа данных.
|
||||
|
||||
**Возвращаемое значение**
|
||||
|
||||
- Значение, преобразованное в указанный тип `T`.
|
||||
|
||||
**Примеры**
|
||||
|
||||
Запрос:
|
||||
|
||||
``` sql
|
||||
SELECT toTypeName(accurateCastOrNull(5, 'UInt8'));
|
||||
```
|
||||
|
||||
Результат:
|
||||
|
||||
``` text
|
||||
┌─toTypeName(accurateCastOrNull(5, 'UInt8'))─┐
|
||||
│ Nullable(UInt8) │
|
||||
└────────────────────────────────────────────┘
|
||||
```
|
||||
|
||||
Запрос:
|
||||
|
||||
``` sql
|
||||
SELECT
|
||||
accurateCastOrNull(-1, 'UInt8') as uint8,
|
||||
accurateCastOrNull(128, 'Int8') as int8,
|
||||
accurateCastOrNull('Test', 'FixedString(2)') as fixed_string;
|
||||
```
|
||||
|
||||
Результат:
|
||||
|
||||
``` text
|
||||
┌─uint8─┬─int8─┬─fixed_string─┐
|
||||
│ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │
|
||||
└───────┴──────┴──────────────┘
|
||||
```
|
||||
|
||||
## toInterval(Year\|Quarter\|Month\|Week\|Day\|Hour\|Minute\|Second) {#function-tointerval}
|
||||
|
||||
Приводит аргумент из числового типа данных к типу данных [IntervalType](../../sql-reference/data-types/special-data-types/interval.md).
|
||||
@ -454,6 +606,8 @@ toIntervalYear(number)
|
||||
|
||||
**Пример**
|
||||
|
||||
Запрос:
|
||||
|
||||
``` sql
|
||||
WITH
|
||||
toDate('2019-01-01') AS date,
|
||||
@ -461,9 +615,11 @@ WITH
|
||||
toIntervalWeek(1) AS interval_to_week
|
||||
SELECT
|
||||
date + interval_week,
|
||||
date + interval_to_week
|
||||
date + interval_to_week;
|
||||
```
|
||||
|
||||
Результат:
|
||||
|
||||
``` text
|
||||
┌─plus(date, interval_week)─┬─plus(date, interval_to_week)─┐
|
||||
│ 2019-01-08 │ 2019-01-08 │
|
||||
@ -479,7 +635,7 @@ SELECT
|
||||
**Синтаксис**
|
||||
|
||||
``` sql
|
||||
parseDateTimeBestEffort(time_string[, time_zone]);
|
||||
parseDateTimeBestEffort(time_string[, time_zone])
|
||||
```
|
||||
|
||||
**Параметры**
|
||||
@ -522,7 +678,7 @@ AS parseDateTimeBestEffort;
|
||||
|
||||
``` sql
|
||||
SELECT parseDateTimeBestEffort('Sat, 18 Aug 2018 07:22:16 GMT', 'Europe/Moscow')
|
||||
AS parseDateTimeBestEffort
|
||||
AS parseDateTimeBestEffort;
|
||||
```
|
||||
|
||||
Результат:
|
||||
@ -537,7 +693,7 @@ AS parseDateTimeBestEffort
|
||||
|
||||
``` sql
|
||||
SELECT parseDateTimeBestEffort('1284101485')
|
||||
AS parseDateTimeBestEffort
|
||||
AS parseDateTimeBestEffort;
|
||||
```
|
||||
|
||||
Результат:
|
||||
@ -552,7 +708,7 @@ AS parseDateTimeBestEffort
|
||||
|
||||
``` sql
|
||||
SELECT parseDateTimeBestEffort('2018-12-12 10:12:12')
|
||||
AS parseDateTimeBestEffort
|
||||
AS parseDateTimeBestEffort;
|
||||
```
|
||||
|
||||
Результат:
|
||||
@ -566,7 +722,7 @@ AS parseDateTimeBestEffort
|
||||
Запрос:
|
||||
|
||||
``` sql
|
||||
SELECT parseDateTimeBestEffort('10 20:19')
|
||||
SELECT parseDateTimeBestEffort('10 20:19');
|
||||
```
|
||||
|
||||
Результат:
|
||||
@ -591,7 +747,7 @@ SELECT parseDateTimeBestEffort('10 20:19')
|
||||
**Синтаксис**
|
||||
|
||||
``` sql
|
||||
parseDateTimeBestEffortUS(time_string [, time_zone]);
|
||||
parseDateTimeBestEffortUS(time_string [, time_zone])
|
||||
```
|
||||
|
||||
**Параметры**
|
||||
@ -620,7 +776,7 @@ SELECT parseDateTimeBestEffortUS('09/12/2020 12:12:57')
|
||||
AS parseDateTimeBestEffortUS;
|
||||
```
|
||||
|
||||
Ответ:
|
||||
Результат:
|
||||
|
||||
``` text
|
||||
┌─parseDateTimeBestEffortUS─┐
|
||||
@ -635,7 +791,7 @@ SELECT parseDateTimeBestEffortUS('09-12-2020 12:12:57')
|
||||
AS parseDateTimeBestEffortUS;
|
||||
```
|
||||
|
||||
Ответ:
|
||||
Результат:
|
||||
|
||||
``` text
|
||||
┌─parseDateTimeBestEffortUS─┐
|
||||
@ -650,7 +806,7 @@ SELECT parseDateTimeBestEffortUS('09.12.2020 12:12:57')
|
||||
AS parseDateTimeBestEffortUS;
|
||||
```
|
||||
|
||||
Ответ:
|
||||
Результат:
|
||||
|
||||
``` text
|
||||
┌─parseDateTimeBestEffortUS─┐
|
||||
@ -658,6 +814,178 @@ AS parseDateTimeBestEffortUS;
|
||||
└─────────────────────────——┘
|
||||
```
|
||||
|
||||
## parseDateTimeBestEffortUSOrNull {#parsedatetimebesteffortusornull}
|
||||
|
||||
Работает аналогично функции [parseDateTimeBestEffortUS](#parsedatetimebesteffortUS), но в отличие от нее возвращает `NULL`, если входная строка не может быть преобразована в тип данных [DateTime](../../sql-reference/data-types/datetime.md).
|
||||
|
||||
**Синтаксис**
|
||||
|
||||
``` sql
|
||||
parseDateTimeBestEffortUSOrNull(time_string[, time_zone])
|
||||
```
|
||||
|
||||
**Параметры**
|
||||
|
||||
- `time_string` — строка, содержащая дату или дату со временем для преобразования. Дата должна быть в американском формате (`MM/DD/YYYY` и т.д.). [String](../../sql-reference/data-types/string.md).
|
||||
- `time_zone` — [часовой пояс](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-timezone). Функция анализирует `time_string` в соответствии с заданным часовым поясом. Опциональный параметр. [String](../../sql-reference/data-types/string.md).
|
||||
|
||||
**Поддерживаемые нестандартные форматы**
|
||||
|
||||
- Строка в формате [unix timestamp](https://en.wikipedia.org/wiki/Unix_time), содержащая 9-10 цифр.
|
||||
- Строка, содержащая дату и время: `YYYYMMDDhhmmss`, `MM/DD/YYYY hh:mm:ss`, `MM-DD-YY hh:mm`, `YYYY-MM-DD hh:mm:ss` и т.д.
|
||||
- Строка, содержащая дату без времени: `YYYY`, `YYYYMM`, `YYYY*MM`, `MM/DD/YYYY`, `MM-DD-YY` и т.д.
|
||||
- Строка, содержащая день и время: `DD`, `DD hh`, `DD hh:mm`. В этом случае `YYYY-MM` заменяется на `2000-01`.
|
||||
- Строка, содержащая дату и время, а также информацию о часовом поясе: `YYYY-MM-DD hh:mm:ss ±h:mm` и т.д. Например, `2020-12-12 17:36:00 -5:00`.
|
||||
|
||||
**Возвращаемые значения**
|
||||
|
||||
- `time_string`, преобразованная в тип данных `DateTime`.
|
||||
- `NULL`, если входная строка не может быть преобразована в тип данных `DateTime`.
|
||||
|
||||
**Примеры**
|
||||
|
||||
Запрос:
|
||||
|
||||
``` sql
|
||||
SELECT parseDateTimeBestEffortUSOrNull('02/10/2021 21:12:57') AS parseDateTimeBestEffortUSOrNull;
|
||||
```
|
||||
|
||||
Результат:
|
||||
|
||||
``` text
|
||||
┌─parseDateTimeBestEffortUSOrNull─┐
|
||||
│ 2021-02-10 21:12:57 │
|
||||
└─────────────────────────────────┘
|
||||
```
|
||||
|
||||
Запрос:
|
||||
|
||||
``` sql
|
||||
SELECT parseDateTimeBestEffortUSOrNull('02-10-2021 21:12:57 GMT', 'Europe/Moscow') AS parseDateTimeBestEffortUSOrNull;
|
||||
```
|
||||
|
||||
Результат:
|
||||
|
||||
``` text
|
||||
┌─parseDateTimeBestEffortUSOrNull─┐
|
||||
│ 2021-02-11 00:12:57 │
|
||||
└─────────────────────────────────┘
|
||||
```
|
||||
|
||||
Запрос:
|
||||
|
||||
``` sql
|
||||
SELECT parseDateTimeBestEffortUSOrNull('02.10.2021') AS parseDateTimeBestEffortUSOrNull;
|
||||
```
|
||||
|
||||
Результат:
|
||||
|
||||
``` text
|
||||
┌─parseDateTimeBestEffortUSOrNull─┐
|
||||
│ 2021-02-10 00:00:00 │
|
||||
└─────────────────────────────────┘
|
||||
```
|
||||
|
||||
Запрос:
|
||||
|
||||
``` sql
|
||||
SELECT parseDateTimeBestEffortUSOrNull('10.2021') AS parseDateTimeBestEffortUSOrNull;
|
||||
```
|
||||
|
||||
Результат:
|
||||
|
||||
``` text
|
||||
┌─parseDateTimeBestEffortUSOrNull─┐
|
||||
│ ᴺᵁᴸᴸ │
|
||||
└─────────────────────────────────┘
|
||||
```
|
||||
|
||||
## parseDateTimeBestEffortUSOrZero {#parsedatetimebesteffortusorzero}
|
||||
|
||||
Работает аналогично функции [parseDateTimeBestEffortUS](#parsedatetimebesteffortUS), но в отличие от нее возвращает нулевую дату (`1970-01-01`) или нулевую дату со временем (`1970-01-01 00:00:00`), если входная строка не может быть преобразована в тип данных [DateTime](../../sql-reference/data-types/datetime.md).
|
||||
|
||||
**Синтаксис**
|
||||
|
||||
``` sql
|
||||
parseDateTimeBestEffortUSOrZero(time_string[, time_zone])
|
||||
```
|
||||
|
||||
**Параметры**
|
||||
|
||||
- `time_string` — строка, содержащая дату или дату со временем для преобразования. Дата должна быть в американском формате (`MM/DD/YYYY` и т.д.). [String](../../sql-reference/data-types/string.md).
|
||||
- `time_zone` — [часовой пояс](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-timezone). Функция анализирует `time_string` в соответствии с заданным часовым поясом. Опциональный параметр. [String](../../sql-reference/data-types/string.md).
|
||||
|
||||
**Поддерживаемые нестандартные форматы**
|
||||
|
||||
- Строка в формате [unix timestamp](https://en.wikipedia.org/wiki/Unix_time), содержащая 9-10 цифр.
|
||||
- Строка, содержащая дату и время: `YYYYMMDDhhmmss`, `MM/DD/YYYY hh:mm:ss`, `MM-DD-YY hh:mm`, `YYYY-MM-DD hh:mm:ss` и т.д.
|
||||
- Строка, содержащая дату без времени: `YYYY`, `YYYYMM`, `YYYY*MM`, `MM/DD/YYYY`, `MM-DD-YY` и т.д.
|
||||
- Строка, содержащая день и время: `DD`, `DD hh`, `DD hh:mm`. В этом случае `YYYY-MM` заменяется на `2000-01`.
|
||||
- Строка, содержащая дату и время, а также информацию о часовом поясе: `YYYY-MM-DD hh:mm:ss ±h:mm` и т.д. Например, `2020-12-12 17:36:00 -5:00`.
|
||||
|
||||
**Возвращаемые значения**
|
||||
|
||||
- `time_string`, преобразованная в тип данных `DateTime`.
|
||||
- Нулевая дата или нулевая дата со временем, если входная строка не может быть преобразована в тип данных `DateTime`.
|
||||
|
||||
**Примеры**
|
||||
|
||||
Запрос:
|
||||
|
||||
``` sql
|
||||
SELECT parseDateTimeBestEffortUSOrZero('02/10/2021 21:12:57') AS parseDateTimeBestEffortUSOrZero;
|
||||
```
|
||||
|
||||
Результат:
|
||||
|
||||
``` text
|
||||
┌─parseDateTimeBestEffortUSOrZero─┐
|
||||
│ 2021-02-10 21:12:57 │
|
||||
└─────────────────────────────────┘
|
||||
```
|
||||
|
||||
Запрос:
|
||||
|
||||
``` sql
|
||||
SELECT parseDateTimeBestEffortUSOrZero('02-10-2021 21:12:57 GMT', 'Europe/Moscow') AS parseDateTimeBestEffortUSOrZero;
|
||||
```
|
||||
|
||||
Результат:
|
||||
|
||||
``` text
|
||||
┌─parseDateTimeBestEffortUSOrZero─┐
|
||||
│ 2021-02-11 00:12:57 │
|
||||
└─────────────────────────────────┘
|
||||
```
|
||||
|
||||
Запрос:
|
||||
|
||||
``` sql
|
||||
SELECT parseDateTimeBestEffortUSOrZero('02.10.2021') AS parseDateTimeBestEffortUSOrZero;
|
||||
```
|
||||
|
||||
Результат:
|
||||
|
||||
``` text
|
||||
┌─parseDateTimeBestEffortUSOrZero─┐
|
||||
│ 2021-02-10 00:00:00 │
|
||||
└─────────────────────────────────┘
|
||||
```
|
||||
|
||||
Запрос:
|
||||
|
||||
``` sql
|
||||
SELECT parseDateTimeBestEffortUSOrZero('02.2021') AS parseDateTimeBestEffortUSOrZero;
|
||||
```
|
||||
|
||||
Результат:
|
||||
|
||||
``` text
|
||||
┌─parseDateTimeBestEffortUSOrZero─┐
|
||||
│ 1970-01-01 00:00:00 │
|
||||
└─────────────────────────────────┘
|
||||
```
|
||||
|
||||
## toUnixTimestamp64Milli
|
||||
## toUnixTimestamp64Micro
|
||||
## toUnixTimestamp64Nano
|
||||
@ -685,10 +1013,10 @@ toUnixTimestamp64Milli(value)
|
||||
|
||||
``` sql
|
||||
WITH toDateTime64('2019-09-16 19:20:12.345678910', 6) AS dt64
|
||||
SELECT toUnixTimestamp64Milli(dt64)
|
||||
SELECT toUnixTimestamp64Milli(dt64);
|
||||
```
|
||||
|
||||
Ответ:
|
||||
Результат:
|
||||
|
||||
``` text
|
||||
┌─toUnixTimestamp64Milli(dt64)─┐
|
||||
@ -700,10 +1028,10 @@ SELECT toUnixTimestamp64Milli(dt64)
|
||||
|
||||
``` sql
|
||||
WITH toDateTime64('2019-09-16 19:20:12.345678910', 6) AS dt64
|
||||
SELECT toUnixTimestamp64Nano(dt64)
|
||||
SELECT toUnixTimestamp64Nano(dt64);
|
||||
```
|
||||
|
||||
Ответ:
|
||||
Результат:
|
||||
|
||||
``` text
|
||||
┌─toUnixTimestamp64Nano(dt64)─┐
|
||||
@ -738,10 +1066,10 @@ fromUnixTimestamp64Milli(value [, ti])
|
||||
|
||||
``` sql
|
||||
WITH CAST(1234567891011, 'Int64') AS i64
|
||||
SELECT fromUnixTimestamp64Milli(i64, 'UTC')
|
||||
SELECT fromUnixTimestamp64Milli(i64, 'UTC');
|
||||
```
|
||||
|
||||
Ответ:
|
||||
Результат:
|
||||
|
||||
``` text
|
||||
┌─fromUnixTimestamp64Milli(i64, 'UTC')─┐
|
||||
@ -772,12 +1100,12 @@ toLowCardinality(expr)
|
||||
|
||||
Тип: `LowCardinality(expr_result_type)`
|
||||
|
||||
**Example**
|
||||
**Пример**
|
||||
|
||||
Запрос:
|
||||
|
||||
```sql
|
||||
SELECT toLowCardinality('1')
|
||||
SELECT toLowCardinality('1');
|
||||
```
|
||||
|
||||
Результат:
|
||||
@ -813,10 +1141,10 @@ formatRow(format, x, y, ...)
|
||||
|
||||
``` sql
|
||||
SELECT formatRow('CSV', number, 'good')
|
||||
FROM numbers(3)
|
||||
FROM numbers(3);
|
||||
```
|
||||
|
||||
Ответ:
|
||||
Результат:
|
||||
|
||||
``` text
|
||||
┌─formatRow('CSV', number, 'good')─┐
|
||||
@ -854,10 +1182,10 @@ formatRowNoNewline(format, x, y, ...)
|
||||
|
||||
``` sql
|
||||
SELECT formatRowNoNewline('CSV', number, 'good')
|
||||
FROM numbers(3)
|
||||
FROM numbers(3);
|
||||
```
|
||||
|
||||
Ответ:
|
||||
Результат:
|
||||
|
||||
``` text
|
||||
┌─formatRowNoNewline('CSV', number, 'good')─┐
|
||||
|
@ -13,10 +13,28 @@ SELECT (CounterID, UserID) IN ((34, 123), (101500, 456)) FROM ...
|
||||
|
||||
Если слева стоит один столбец, входящий в индекс, а справа - множество констант, то при выполнении запроса, система воспользуется индексом.
|
||||
|
||||
Не перечисляйте слишком большое количество значений (миллионы) явно. Если множество большое - лучше загрузить его во временную таблицу (например, смотрите раздел «Внешние данные для обработки запроса»), и затем воспользоваться подзапросом.
|
||||
Не перечисляйте слишком большое количество значений (миллионы) явно. Если множество большое - лучше загрузить его во временную таблицу (например, смотрите раздел [Внешние данные для обработки запроса](../../engines/table-engines/special/external-data.md)), и затем воспользоваться подзапросом.
|
||||
|
||||
В качестве правой части оператора может быть множество константных выражений, множество кортежей с константными выражениями (показано в примерах выше), а также имя таблицы или подзапрос SELECT в скобках.
|
||||
|
||||
Если типы данных в левой и правой частях подзапроса `IN` различаются, ClickHouse преобразует значение в левой части к типу данных из правой части. Преобразование выполняется по аналогии с функцией [accurateCastOrNull](../functions/type-conversion-functions.md#type_conversion_function-accurate-cast_or_null), т.е. тип данных становится [Nullable](../../sql-reference/data-types/nullable.md), а если преобразование не может быть выполнено, возвращается значение [NULL](../../sql-reference/syntax.md#null-literal).
|
||||
|
||||
**Пример**
|
||||
|
||||
Запрос:
|
||||
|
||||
``` sql
|
||||
SELECT '1' IN (SELECT 1);
|
||||
```
|
||||
|
||||
Результат:
|
||||
|
||||
``` text
|
||||
┌─in('1', _subquery49)─┐
|
||||
│ 1 │
|
||||
└──────────────────────┘
|
||||
```
|
||||
|
||||
Если в качестве правой части оператора указано имя таблицы (например, `UserID IN users`), то это эквивалентно подзапросу `UserID IN (SELECT * FROM users)`. Это используется при работе с внешними данными, отправляемым вместе с запросом. Например, вместе с запросом может быть отправлено множество идентификаторов посетителей, загруженное во временную таблицу users, по которому следует выполнить фильтрацию.
|
||||
|
||||
Если в качестве правой части оператора, указано имя таблицы, имеющий движок Set (подготовленное множество, постоянно находящееся в оперативке), то множество не будет создаваться заново при каждом запросе.
|
||||
|
@ -7,6 +7,8 @@ toc_title: mysql
|
||||
|
||||
Позволяет выполнять запросы `SELECT` над данными, хранящимися на удалённом MySQL сервере.
|
||||
|
||||
**Синтаксис**
|
||||
|
||||
``` sql
|
||||
mysql('host:port', 'database', 'table', 'user', 'password'[, replace_query, 'on_duplicate_clause']);
|
||||
```
|
||||
@ -23,13 +25,13 @@ mysql('host:port', 'database', 'table', 'user', 'password'[, replace_query, 'on_
|
||||
|
||||
- `password` — пароль пользователя.
|
||||
|
||||
- `replace_query` — флаг, отвечающий за преобразование запросов `INSERT INTO` в `REPLACE INTO`. Если `replace_query=1`, то запрос заменяется.
|
||||
- `replace_query` — флаг, отвечающий за преобразование запросов `INSERT INTO` в `REPLACE INTO`. Возможные значения:
|
||||
- `0` - выполняется запрос `INSERT INTO`.
|
||||
- `1` - выполняется запрос `REPLACE INTO`.
|
||||
|
||||
- `on_duplicate_clause` — выражение `ON DUPLICATE KEY on_duplicate_clause`, добавляемое в запрос `INSERT`.
|
||||
- `on_duplicate_clause` — выражение `ON DUPLICATE KEY on_duplicate_clause`, добавляемое в запрос `INSERT`. Может быть передано только с помощью `replace_query = 0` (если вы одновременно передадите `replace_query = 1` и `on_duplicate_clause`, будет сгенерировано исключение).
|
||||
|
||||
Пример: `INSERT INTO t (c1,c2) VALUES ('a', 2) ON DUPLICATE KEY UPDATE c2 = c2 + 1`, где `on_duplicate_clause` это `UPDATE c2 = c2 + 1`. Чтобы узнать какие `on_duplicate_clause` можно использовать с секцией `ON DUPLICATE KEY` обратитесь к документации MySQL.
|
||||
|
||||
Чтобы указать `'on_duplicate_clause'` необходимо передать `0` в параметр `replace_query`. Если одновременно передать `replace_query = 1` и `'on_duplicate_clause'`, то ClickHouse сгенерирует исключение.
|
||||
Пример: `INSERT INTO t (c1,c2) VALUES ('a', 2) ON DUPLICATE KEY UPDATE c2 = c2 + 1`, где `on_duplicate_clause` это `UPDATE c2 = c2 + 1;`
|
||||
|
||||
Простые условия `WHERE` такие как `=, !=, >, >=, <, =` выполняются на стороне сервера MySQL.
|
||||
|
||||
@ -39,46 +41,59 @@ mysql('host:port', 'database', 'table', 'user', 'password'[, replace_query, 'on_
|
||||
|
||||
Объект таблицы с теми же столбцами, что и в исходной таблице MySQL.
|
||||
|
||||
## Пример использования {#primer-ispolzovaniia}
|
||||
!!! note "Примечание"
|
||||
Чтобы отличить табличную функцию `mysql (...)` в запросе `INSERT` от имени таблицы со списком имен столбцов, используйте ключевые слова `FUNCTION` или `TABLE FUNCTION`. См. примеры ниже.
|
||||
|
||||
**Примеры**
|
||||
|
||||
Таблица в MySQL:
|
||||
|
||||
``` text
|
||||
mysql> CREATE TABLE `test`.`test` (
|
||||
-> `int_id` INT NOT NULL AUTO_INCREMENT,
|
||||
-> `int_nullable` INT NULL DEFAULT NULL,
|
||||
-> `float` FLOAT NOT NULL,
|
||||
-> `float_nullable` FLOAT NULL DEFAULT NULL,
|
||||
-> PRIMARY KEY (`int_id`));
|
||||
Query OK, 0 rows affected (0,09 sec)
|
||||
|
||||
mysql> insert into test (`int_id`, `float`) VALUES (1,2);
|
||||
Query OK, 1 row affected (0,00 sec)
|
||||
mysql> INSERT INTO test (`int_id`, `float`) VALUES (1,2);
|
||||
|
||||
mysql> select * from test;
|
||||
+--------+--------------+-------+----------------+
|
||||
| int_id | int_nullable | float | float_nullable |
|
||||
+--------+--------------+-------+----------------+
|
||||
| 1 | NULL | 2 | NULL |
|
||||
+--------+--------------+-------+----------------+
|
||||
1 row in set (0,00 sec)
|
||||
mysql> SELECT * FROM test;
|
||||
+--------+-------+
|
||||
| int_id | float |
|
||||
+--------+-------+
|
||||
| 1 | 2 |
|
||||
+--------+-------+
|
||||
```
|
||||
|
||||
Получение данных в ClickHouse:
|
||||
|
||||
``` sql
|
||||
SELECT * FROM mysql('localhost:3306', 'test', 'test', 'bayonet', '123')
|
||||
SELECT * FROM mysql('localhost:3306', 'test', 'test', 'bayonet', '123');
|
||||
```
|
||||
|
||||
``` text
|
||||
┌─int_id─┬─int_nullable─┬─float─┬─float_nullable─┐
|
||||
│ 1 │ ᴺᵁᴸᴸ │ 2 │ ᴺᵁᴸᴸ │
|
||||
└────────┴──────────────┴───────┴────────────────┘
|
||||
┌─int_id─┬─float─┐
|
||||
│ 1 │ 2 │
|
||||
└────────┴───────┘
|
||||
```
|
||||
|
||||
## Смотрите также {#smotrite-takzhe}
|
||||
Замена и вставка:
|
||||
|
||||
```sql
|
||||
INSERT INTO FUNCTION mysql('localhost:3306', 'test', 'test', 'bayonet', '123', 1) (int_id, float) VALUES (1, 3);
|
||||
INSERT INTO TABLE FUNCTION mysql('localhost:3306', 'test', 'test', 'bayonet', '123', 0, 'UPDATE int_id = int_id + 1') (int_id, float) VALUES (1, 4);
|
||||
SELECT * FROM mysql('localhost:3306', 'test', 'test', 'bayonet', '123');
|
||||
```
|
||||
|
||||
``` text
|
||||
┌─int_id─┬─float─┐
|
||||
│ 1 │ 3 │
|
||||
│ 2 │ 4 │
|
||||
└────────┴───────┘
|
||||
```
|
||||
|
||||
**Смотрите также**
|
||||
|
||||
- [Движок таблиц ‘MySQL’](../../sql-reference/table-functions/mysql.md)
|
||||
- [Использование MySQL как источника данных для внешнего словаря](../../sql-reference/table-functions/mysql.md#dicts-external_dicts_dict_sources-mysql)
|
||||
|
||||
[Оригинальная статья](https://clickhouse.tech/docs/ru/query_language/table_functions/mysql/) <!--hide-->
|
||||
[Оригинальная статья](https://clickhouse.tech/docs/ru/sql-reference/table_functions/mysql/) <!--hide-->
|
||||
|
@ -160,7 +160,15 @@ void ODBCColumnsInfoHandler::handleRequest(HTTPServerRequest & request, HTTPServ
|
||||
}
|
||||
|
||||
WriteBufferFromHTTPServerResponse out(response, request.getMethod() == Poco::Net::HTTPRequest::HTTP_HEAD, keep_alive_timeout);
|
||||
writeStringBinary(columns.toString(), out);
|
||||
try
|
||||
{
|
||||
writeStringBinary(columns.toString(), out);
|
||||
out.finalize();
|
||||
}
|
||||
catch (...)
|
||||
{
|
||||
out.finalize();
|
||||
}
|
||||
}
|
||||
catch (...)
|
||||
{
|
||||
|
@ -50,7 +50,15 @@ void IdentifierQuoteHandler::handleRequest(HTTPServerRequest & request, HTTPServ
|
||||
auto identifier = getIdentifierQuote(hdbc);
|
||||
|
||||
WriteBufferFromHTTPServerResponse out(response, request.getMethod() == Poco::Net::HTTPRequest::HTTP_HEAD, keep_alive_timeout);
|
||||
writeStringBinary(identifier, out);
|
||||
try
|
||||
{
|
||||
writeStringBinary(identifier, out);
|
||||
out.finalize();
|
||||
}
|
||||
catch (...)
|
||||
{
|
||||
out.finalize();
|
||||
}
|
||||
}
|
||||
catch (...)
|
||||
{
|
||||
|
@ -187,9 +187,27 @@ void ODBCHandler::handleRequest(HTTPServerRequest & request, HTTPServerResponse
|
||||
auto message = getCurrentExceptionMessage(true);
|
||||
response.setStatusAndReason(
|
||||
Poco::Net::HTTPResponse::HTTP_INTERNAL_SERVER_ERROR); // can't call process_error, because of too soon response sending
|
||||
writeStringBinary(message, out);
|
||||
tryLogCurrentException(log);
|
||||
|
||||
try
|
||||
{
|
||||
writeStringBinary(message, out);
|
||||
out.finalize();
|
||||
}
|
||||
catch (...)
|
||||
{
|
||||
tryLogCurrentException(log);
|
||||
}
|
||||
|
||||
tryLogCurrentException(log);
|
||||
}
|
||||
|
||||
try
|
||||
{
|
||||
out.finalize();
|
||||
}
|
||||
catch (...)
|
||||
{
|
||||
tryLogCurrentException(log);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -61,7 +61,15 @@ void SchemaAllowedHandler::handleRequest(HTTPServerRequest & request, HTTPServer
|
||||
bool result = isSchemaAllowed(hdbc);
|
||||
|
||||
WriteBufferFromHTTPServerResponse out(response, request.getMethod() == Poco::Net::HTTPRequest::HTTP_HEAD, keep_alive_timeout);
|
||||
writeBoolText(result, out);
|
||||
try
|
||||
{
|
||||
writeBoolText(result, out);
|
||||
out.finalize();
|
||||
}
|
||||
catch (...)
|
||||
{
|
||||
out.finalize();
|
||||
}
|
||||
}
|
||||
catch (...)
|
||||
{
|
||||
|
@ -103,6 +103,7 @@ namespace CurrentMetrics
|
||||
extern const Metric Revision;
|
||||
extern const Metric VersionInteger;
|
||||
extern const Metric MemoryTracking;
|
||||
extern const Metric MaxDDLEntryID;
|
||||
}
|
||||
|
||||
|
||||
@ -1012,7 +1013,8 @@ int Server::main(const std::vector<std::string> & /*args*/)
|
||||
int pool_size = config().getInt("distributed_ddl.pool_size", 1);
|
||||
if (pool_size < 1)
|
||||
throw Exception("distributed_ddl.pool_size should be greater then 0", ErrorCodes::ARGUMENT_OUT_OF_BOUND);
|
||||
global_context->setDDLWorker(std::make_unique<DDLWorker>(pool_size, ddl_zookeeper_path, *global_context, &config(), "distributed_ddl"));
|
||||
global_context->setDDLWorker(std::make_unique<DDLWorker>(pool_size, ddl_zookeeper_path, *global_context, &config(),
|
||||
"distributed_ddl", "DDLWorker", &CurrentMetrics::MaxDDLEntryID));
|
||||
}
|
||||
|
||||
std::unique_ptr<DNSCacheUpdater> dns_cache_updater;
|
||||
|
@ -284,6 +284,10 @@
|
||||
In bytes. Cache is single for server. Memory is allocated only on demand.
|
||||
Cache is used when 'use_uncompressed_cache' user setting turned on (off by default).
|
||||
Uncompressed cache is advantageous only for very short queries and in rare cases.
|
||||
|
||||
Note: uncompressed cache can be pointless for lz4, because memory bandwidth
|
||||
is slower than multi-core decompression on some server configurations.
|
||||
Enabling it can sometimes paradoxically make queries slower.
|
||||
-->
|
||||
<uncompressed_cache_size>8589934592</uncompressed_cache_size>
|
||||
|
||||
|
@ -7,9 +7,6 @@
|
||||
<!-- Maximum memory usage for processing single query, in bytes. -->
|
||||
<max_memory_usage>10000000000</max_memory_usage>
|
||||
|
||||
<!-- Use cache of uncompressed blocks of data. Meaningful only for processing many of very short queries. -->
|
||||
<use_uncompressed_cache>0</use_uncompressed_cache>
|
||||
|
||||
<!-- How to choose between replicas during distributed query processing.
|
||||
random - choose random replica from set of replicas with minimum number of errors
|
||||
nearest_hostname - from set of replicas with minimum number of errors, choose replica
|
||||
|
@ -30,6 +30,10 @@ namespace ErrorCodes
|
||||
extern const int LOGICAL_ERROR;
|
||||
}
|
||||
|
||||
const String & getAggregateFunctionCanonicalNameIfAny(const String & name)
|
||||
{
|
||||
return AggregateFunctionFactory::instance().getCanonicalNameIfAny(name);
|
||||
}
|
||||
|
||||
void AggregateFunctionFactory::registerFunction(const String & name, Value creator_with_properties, CaseSensitiveness case_sensitiveness)
|
||||
{
|
||||
@ -41,10 +45,14 @@ void AggregateFunctionFactory::registerFunction(const String & name, Value creat
|
||||
throw Exception("AggregateFunctionFactory: the aggregate function name '" + name + "' is not unique",
|
||||
ErrorCodes::LOGICAL_ERROR);
|
||||
|
||||
if (case_sensitiveness == CaseInsensitive
|
||||
&& !case_insensitive_aggregate_functions.emplace(Poco::toLower(name), creator_with_properties).second)
|
||||
throw Exception("AggregateFunctionFactory: the case insensitive aggregate function name '" + name + "' is not unique",
|
||||
ErrorCodes::LOGICAL_ERROR);
|
||||
if (case_sensitiveness == CaseInsensitive)
|
||||
{
|
||||
auto key = Poco::toLower(name);
|
||||
if (!case_insensitive_aggregate_functions.emplace(key, creator_with_properties).second)
|
||||
throw Exception("AggregateFunctionFactory: the case insensitive aggregate function name '" + name + "' is not unique",
|
||||
ErrorCodes::LOGICAL_ERROR);
|
||||
case_insensitive_name_mapping[key] = name;
|
||||
}
|
||||
}
|
||||
|
||||
static DataTypes convertLowCardinalityTypesToNested(const DataTypes & types)
|
||||
@ -98,6 +106,7 @@ AggregateFunctionPtr AggregateFunctionFactory::getImpl(
|
||||
bool has_null_arguments) const
|
||||
{
|
||||
String name = getAliasToOrName(name_param);
|
||||
bool is_case_insensitive = false;
|
||||
Value found;
|
||||
|
||||
/// Find by exact match.
|
||||
@ -107,7 +116,10 @@ AggregateFunctionPtr AggregateFunctionFactory::getImpl(
|
||||
}
|
||||
|
||||
if (auto jt = case_insensitive_aggregate_functions.find(Poco::toLower(name)); jt != case_insensitive_aggregate_functions.end())
|
||||
{
|
||||
found = jt->second;
|
||||
is_case_insensitive = true;
|
||||
}
|
||||
|
||||
const Context * query_context = nullptr;
|
||||
if (CurrentThread::isInitialized())
|
||||
@ -118,7 +130,8 @@ AggregateFunctionPtr AggregateFunctionFactory::getImpl(
|
||||
out_properties = found.properties;
|
||||
|
||||
if (query_context && query_context->getSettingsRef().log_queries)
|
||||
query_context->addQueryFactoriesInfo(Context::QueryLogFactories::AggregateFunction, name);
|
||||
query_context->addQueryFactoriesInfo(
|
||||
Context::QueryLogFactories::AggregateFunction, is_case_insensitive ? Poco::toLower(name) : name);
|
||||
|
||||
/// The case when aggregate function should return NULL on NULL arguments. This case is handled in "get" method.
|
||||
if (!out_properties.returns_default_when_only_null && has_null_arguments)
|
||||
|
@ -52,7 +52,7 @@ struct MovingSumData : public MovingData<T>
|
||||
{
|
||||
static constexpr auto name = "groupArrayMovingSum";
|
||||
|
||||
T get(size_t idx, UInt64 window_size) const
|
||||
T NO_SANITIZE_UNDEFINED get(size_t idx, UInt64 window_size) const
|
||||
{
|
||||
if (idx < window_size)
|
||||
return this->value[idx];
|
||||
@ -66,7 +66,7 @@ struct MovingAvgData : public MovingData<T>
|
||||
{
|
||||
static constexpr auto name = "groupArrayMovingAvg";
|
||||
|
||||
T get(size_t idx, UInt64 window_size) const
|
||||
T NO_SANITIZE_UNDEFINED get(size_t idx, UInt64 window_size) const
|
||||
{
|
||||
if (idx < window_size)
|
||||
return this->value[idx] / window_size;
|
||||
@ -114,7 +114,7 @@ public:
|
||||
return std::make_shared<DataTypeArray>(std::make_shared<DataTypeResult>());
|
||||
}
|
||||
|
||||
void add(AggregateDataPtr __restrict place, const IColumn ** columns, size_t row_num, Arena * arena) const override
|
||||
void NO_SANITIZE_UNDEFINED add(AggregateDataPtr __restrict place, const IColumn ** columns, size_t row_num, Arena * arena) const override
|
||||
{
|
||||
auto value = static_cast<const ColumnSource &>(*columns[0]).getData()[row_num];
|
||||
this->data(place).add(static_cast<ResultT>(value), arena);
|
||||
|
@ -115,7 +115,14 @@ public:
|
||||
"Values for {} are expected to be Numeric, Float or Decimal, passed type {}",
|
||||
getName(), value_type->getName()};
|
||||
|
||||
result_type = value_type_without_nullable->promoteNumericType();
|
||||
WhichDataType value_type_to_check(value_type);
|
||||
|
||||
/// Do not promote decimal because of implementation issues of this function design
|
||||
/// If we decide to make this function more efficient we should promote decimal type during summ
|
||||
if (value_type_to_check.isDecimal())
|
||||
result_type = value_type_without_nullable;
|
||||
else
|
||||
result_type = value_type_without_nullable->promoteNumericType();
|
||||
}
|
||||
|
||||
types.emplace_back(std::make_shared<DataTypeArray>(result_type));
|
||||
|
@ -56,7 +56,7 @@ class ReservoirSamplerDeterministic
|
||||
{
|
||||
bool good(const UInt32 hash)
|
||||
{
|
||||
return hash == ((hash >> skip_degree) << skip_degree);
|
||||
return !(hash & skip_mask);
|
||||
}
|
||||
|
||||
public:
|
||||
@ -135,11 +135,8 @@ public:
|
||||
throw Poco::Exception("Cannot merge ReservoirSamplerDeterministic's with different max sample size");
|
||||
sorted = false;
|
||||
|
||||
if (b.skip_degree > skip_degree)
|
||||
{
|
||||
skip_degree = b.skip_degree;
|
||||
thinOut();
|
||||
}
|
||||
if (skip_degree < b.skip_degree)
|
||||
setSkipDegree(b.skip_degree);
|
||||
|
||||
for (const auto & sample : b.samples)
|
||||
if (good(sample.second))
|
||||
@ -184,22 +181,39 @@ private:
|
||||
size_t total_values = 0; /// How many values were inserted (regardless if they remain in sample or not).
|
||||
bool sorted = false;
|
||||
Array samples;
|
||||
UInt8 skip_degree = 0; /// The number N determining that we save only one per 2^N elements in average.
|
||||
|
||||
/// The number N determining that we store only one per 2^N elements in average.
|
||||
UInt8 skip_degree = 0;
|
||||
|
||||
/// skip_mask is calculated as (2 ^ skip_degree - 1). We store an element only if (hash & skip_mask) == 0.
|
||||
/// For example, if skip_degree==0 then skip_mask==0 means we store each element;
|
||||
/// if skip_degree==1 then skip_mask==0b0001 means we store one per 2 elements in average;
|
||||
/// if skip_degree==4 then skip_mask==0b1111 means we store one per 16 elements in average.
|
||||
UInt32 skip_mask = 0;
|
||||
|
||||
void insertImpl(const T & v, const UInt32 hash)
|
||||
{
|
||||
/// Make a room for plus one element.
|
||||
while (samples.size() >= max_sample_size)
|
||||
{
|
||||
++skip_degree;
|
||||
if (skip_degree > detail::MAX_SKIP_DEGREE)
|
||||
throw DB::Exception{"skip_degree exceeds maximum value", DB::ErrorCodes::MEMORY_LIMIT_EXCEEDED};
|
||||
thinOut();
|
||||
}
|
||||
setSkipDegree(skip_degree + 1);
|
||||
|
||||
samples.emplace_back(v, hash);
|
||||
}
|
||||
|
||||
void setSkipDegree(UInt8 skip_degree_)
|
||||
{
|
||||
if (skip_degree_ == skip_degree)
|
||||
return;
|
||||
if (skip_degree_ > detail::MAX_SKIP_DEGREE)
|
||||
throw DB::Exception{"skip_degree exceeds maximum value", DB::ErrorCodes::MEMORY_LIMIT_EXCEEDED};
|
||||
skip_degree = skip_degree_;
|
||||
if (skip_degree == detail::MAX_SKIP_DEGREE)
|
||||
skip_mask = static_cast<UInt32>(-1);
|
||||
else
|
||||
skip_mask = (1 << skip_degree) - 1;
|
||||
thinOut();
|
||||
}
|
||||
|
||||
void thinOut()
|
||||
{
|
||||
samples.resize(std::distance(samples.begin(),
|
||||
|
@ -75,28 +75,8 @@ void ColumnAggregateFunction::set(const AggregateFunctionPtr & func_)
|
||||
ColumnAggregateFunction::~ColumnAggregateFunction()
|
||||
{
|
||||
if (!func->hasTrivialDestructor() && !src)
|
||||
{
|
||||
if (copiedDataInfo.empty())
|
||||
{
|
||||
for (auto * val : data)
|
||||
{
|
||||
func->destroy(val);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
size_t pos;
|
||||
for (Map::iterator it = copiedDataInfo.begin(), it_end = copiedDataInfo.end(); it != it_end; ++it)
|
||||
{
|
||||
pos = it->getValue().second;
|
||||
if (data[pos] != nullptr)
|
||||
{
|
||||
func->destroy(data[pos]);
|
||||
data[pos] = nullptr;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
for (auto * val : data)
|
||||
func->destroy(val);
|
||||
}
|
||||
|
||||
void ColumnAggregateFunction::addArena(ConstArenaPtr arena_)
|
||||
@ -475,37 +455,14 @@ void ColumnAggregateFunction::insertFrom(const IColumn & from, size_t n)
|
||||
/// (only as a whole, see comment above).
|
||||
ensureOwnership();
|
||||
insertDefault();
|
||||
insertCopyFrom(assert_cast<const ColumnAggregateFunction &>(from).data[n]);
|
||||
insertMergeFrom(from, n);
|
||||
}
|
||||
|
||||
void ColumnAggregateFunction::insertFrom(ConstAggregateDataPtr place)
|
||||
{
|
||||
ensureOwnership();
|
||||
insertDefault();
|
||||
insertCopyFrom(place);
|
||||
}
|
||||
|
||||
void ColumnAggregateFunction::insertCopyFrom(ConstAggregateDataPtr place)
|
||||
{
|
||||
Map::LookupResult result;
|
||||
result = copiedDataInfo.find(place);
|
||||
if (result == nullptr)
|
||||
{
|
||||
copiedDataInfo[place] = data.size()-1;
|
||||
func->merge(data.back(), place, &createOrGetArena());
|
||||
}
|
||||
else
|
||||
{
|
||||
size_t pos = result->getValue().second;
|
||||
if (pos != data.size() - 1)
|
||||
{
|
||||
data[data.size() - 1] = data[pos];
|
||||
}
|
||||
else /// insert same data to same pos, merge them.
|
||||
{
|
||||
func->merge(data.back(), place, &createOrGetArena());
|
||||
}
|
||||
}
|
||||
insertMergeFrom(place);
|
||||
}
|
||||
|
||||
void ColumnAggregateFunction::insertMergeFrom(ConstAggregateDataPtr place)
|
||||
@ -740,4 +697,5 @@ MutableColumnPtr ColumnAggregateFunction::cloneResized(size_t size) const
|
||||
return cloned_col;
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
@ -13,8 +13,6 @@
|
||||
|
||||
#include <Functions/FunctionHelpers.h>
|
||||
|
||||
#include <Common/HashTable/HashMap.h>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
@ -84,17 +82,6 @@ private:
|
||||
/// Name of the type to distinguish different aggregation states.
|
||||
String type_string;
|
||||
|
||||
/// MergedData records, used to avoid duplicated data copy.
|
||||
///key: src pointer, val: pos in current column.
|
||||
using Map = HashMap<
|
||||
ConstAggregateDataPtr,
|
||||
size_t,
|
||||
DefaultHash<ConstAggregateDataPtr>,
|
||||
HashTableGrower<3>,
|
||||
HashTableAllocatorWithStackMemory<sizeof(std::pair<ConstAggregateDataPtr, size_t>) * (1 << 3)>>;
|
||||
|
||||
Map copiedDataInfo;
|
||||
|
||||
ColumnAggregateFunction() {}
|
||||
|
||||
/// Create a new column that has another column as a source.
|
||||
@ -153,8 +140,6 @@ public:
|
||||
|
||||
void insertFrom(ConstAggregateDataPtr place);
|
||||
|
||||
void insertCopyFrom(ConstAggregateDataPtr place);
|
||||
|
||||
/// Merge state at last row with specified state in another column.
|
||||
void insertMergeFrom(ConstAggregateDataPtr place);
|
||||
|
||||
|
@ -7,6 +7,7 @@
|
||||
#include <Columns/ColumnNullable.h>
|
||||
#include <Columns/ColumnConst.h>
|
||||
#include <Columns/ColumnsCommon.h>
|
||||
#include <Columns/ColumnCompressed.h>
|
||||
|
||||
#include <common/unaligned.h>
|
||||
#include <common/sort.h>
|
||||
@ -369,8 +370,12 @@ void ColumnArray::compareColumn(const IColumn & rhs, size_t rhs_row_num,
|
||||
compare_results, direction, nan_direction_hint);
|
||||
}
|
||||
|
||||
|
||||
namespace
|
||||
{
|
||||
|
||||
template <bool positive>
|
||||
struct ColumnArray::Cmp
|
||||
struct Cmp
|
||||
{
|
||||
const ColumnArray & parent;
|
||||
int nan_direction_hint;
|
||||
@ -390,6 +395,9 @@ struct ColumnArray::Cmp
|
||||
}
|
||||
};
|
||||
|
||||
}
|
||||
|
||||
|
||||
void ColumnArray::reserve(size_t n)
|
||||
{
|
||||
getOffsets().reserve(n);
|
||||
@ -912,6 +920,21 @@ void ColumnArray::updatePermutationWithCollation(const Collator & collator, bool
|
||||
updatePermutationImpl(limit, res, equal_range, Cmp<true>(*this, nan_direction_hint, &collator));
|
||||
}
|
||||
|
||||
ColumnPtr ColumnArray::compress() const
|
||||
{
|
||||
ColumnPtr data_compressed = data->compress();
|
||||
ColumnPtr offsets_compressed = offsets->compress();
|
||||
|
||||
size_t byte_size = data_compressed->byteSize() + offsets_compressed->byteSize();
|
||||
|
||||
return ColumnCompressed::create(size(), byte_size,
|
||||
[data_compressed = std::move(data_compressed), offsets_compressed = std::move(offsets_compressed)]
|
||||
{
|
||||
return ColumnArray::create(data_compressed->decompress(), offsets_compressed->decompress());
|
||||
});
|
||||
}
|
||||
|
||||
|
||||
ColumnPtr ColumnArray::replicate(const Offsets & replicate_offsets) const
|
||||
{
|
||||
if (replicate_offsets.empty())
|
||||
|
@ -123,6 +123,8 @@ public:
|
||||
|
||||
void gather(ColumnGathererStream & gatherer_stream) override;
|
||||
|
||||
ColumnPtr compress() const override;
|
||||
|
||||
void forEachSubcolumn(ColumnCallback callback) override
|
||||
{
|
||||
callback(offsets);
|
||||
@ -183,9 +185,6 @@ private:
|
||||
|
||||
template <typename Comparator>
|
||||
void updatePermutationImpl(size_t limit, Permutation & res, EqualRanges & equal_range, Comparator cmp) const;
|
||||
|
||||
template <bool positive>
|
||||
struct Cmp;
|
||||
};
|
||||
|
||||
|
||||
|
61
src/Columns/ColumnCompressed.cpp
Normal file
61
src/Columns/ColumnCompressed.cpp
Normal file
@ -0,0 +1,61 @@
|
||||
#include <Columns/ColumnCompressed.h>
|
||||
|
||||
#pragma GCC diagnostic ignored "-Wold-style-cast"
|
||||
|
||||
#include <lz4.h>
|
||||
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
namespace ErrorCodes
|
||||
{
|
||||
extern const int CANNOT_COMPRESS;
|
||||
extern const int CANNOT_DECOMPRESS;
|
||||
}
|
||||
|
||||
|
||||
std::shared_ptr<Memory<>> ColumnCompressed::compressBuffer(const void * data, size_t data_size, bool always_compress)
|
||||
{
|
||||
size_t max_dest_size = LZ4_COMPRESSBOUND(data_size);
|
||||
|
||||
if (max_dest_size > std::numeric_limits<int>::max())
|
||||
throw Exception(ErrorCodes::CANNOT_COMPRESS, "Cannot compress column of size {}", formatReadableSizeWithBinarySuffix(data_size));
|
||||
|
||||
Memory<> compressed(max_dest_size);
|
||||
|
||||
auto compressed_size = LZ4_compress_default(
|
||||
reinterpret_cast<const char *>(data),
|
||||
compressed.data(),
|
||||
data_size,
|
||||
max_dest_size);
|
||||
|
||||
if (compressed_size <= 0)
|
||||
throw Exception(ErrorCodes::CANNOT_COMPRESS, "Cannot compress column");
|
||||
|
||||
/// If compression is inefficient.
|
||||
if (!always_compress && static_cast<size_t>(compressed_size) * 2 > data_size)
|
||||
return {};
|
||||
|
||||
/// Shrink to fit.
|
||||
auto shrank = std::make_shared<Memory<>>(compressed_size);
|
||||
memcpy(shrank->data(), compressed.data(), compressed_size);
|
||||
|
||||
return shrank;
|
||||
}
|
||||
|
||||
|
||||
void ColumnCompressed::decompressBuffer(
|
||||
const void * compressed_data, void * decompressed_data, size_t compressed_size, size_t decompressed_size)
|
||||
{
|
||||
auto processed_size = LZ4_decompress_safe(
|
||||
reinterpret_cast<const char *>(compressed_data),
|
||||
reinterpret_cast<char *>(decompressed_data),
|
||||
compressed_size,
|
||||
decompressed_size);
|
||||
|
||||
if (processed_size <= 0)
|
||||
throw Exception(ErrorCodes::CANNOT_DECOMPRESS, "Cannot decompress column");
|
||||
}
|
||||
|
||||
}
|
121
src/Columns/ColumnCompressed.h
Normal file
121
src/Columns/ColumnCompressed.h
Normal file
@ -0,0 +1,121 @@
|
||||
#pragma once
|
||||
|
||||
#include <optional>
|
||||
#include <Core/Field.h>
|
||||
#include <Columns/IColumn.h>
|
||||
#include <IO/BufferWithOwnMemory.h>
|
||||
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
namespace ErrorCodes
|
||||
{
|
||||
extern const int LOGICAL_ERROR;
|
||||
}
|
||||
|
||||
|
||||
/** Wrapper for compressed column data.
|
||||
* The only supported operations are:
|
||||
* - decompress (reconstruct the source column)
|
||||
* - get size in rows or bytes.
|
||||
*
|
||||
* It is needed to implement in-memory compression
|
||||
* - to keep compressed data in Block or pass around.
|
||||
*
|
||||
* It's often beneficial to store compressed data in-memory and decompress on the fly
|
||||
* because it allows to lower memory throughput. More specifically, if:
|
||||
*
|
||||
* decompression speed * num CPU cores >= memory read throughput
|
||||
*
|
||||
* Also in-memory compression allows to keep more data in RAM.
|
||||
*/
|
||||
class ColumnCompressed : public COWHelper<IColumn, ColumnCompressed>
|
||||
{
|
||||
public:
|
||||
using Lazy = std::function<ColumnPtr()>;
|
||||
|
||||
ColumnCompressed(size_t rows_, size_t bytes_, Lazy lazy_)
|
||||
: rows(rows_), bytes(bytes_), lazy(lazy_)
|
||||
{
|
||||
}
|
||||
|
||||
const char * getFamilyName() const override { return "Compressed"; }
|
||||
|
||||
size_t size() const override { return rows; }
|
||||
size_t byteSize() const override { return bytes; }
|
||||
size_t allocatedBytes() const override { return bytes; }
|
||||
|
||||
ColumnPtr decompress() const override
|
||||
{
|
||||
return lazy();
|
||||
}
|
||||
|
||||
/** Wrap uncompressed column without compression.
|
||||
* Method can be used when compression is not worth doing.
|
||||
* But returning CompressedColumn is still needed to keep uniform block structure.
|
||||
*/
|
||||
static ColumnPtr wrap(ColumnPtr column)
|
||||
{
|
||||
return ColumnCompressed::create(
|
||||
column->size(),
|
||||
column->allocatedBytes(),
|
||||
[column = std::move(column)]{ return column; });
|
||||
}
|
||||
|
||||
/// Helper methods for compression.
|
||||
|
||||
/// If data is not worth to be compressed and not 'always_compress' - returns nullptr.
|
||||
/// Note: shared_ptr is to allow to be captured by std::function.
|
||||
static std::shared_ptr<Memory<>> compressBuffer(const void * data, size_t data_size, bool always_compress);
|
||||
|
||||
static void decompressBuffer(
|
||||
const void * compressed_data, void * decompressed_data, size_t compressed_size, size_t decompressed_size);
|
||||
|
||||
/// All other methods throw exception.
|
||||
|
||||
TypeIndex getDataType() const override { throwMustBeDecompressed(); }
|
||||
Field operator[](size_t) const override { throwMustBeDecompressed(); }
|
||||
void get(size_t, Field &) const override { throwMustBeDecompressed(); }
|
||||
StringRef getDataAt(size_t) const override { throwMustBeDecompressed(); }
|
||||
void insert(const Field &) override { throwMustBeDecompressed(); }
|
||||
void insertRangeFrom(const IColumn &, size_t, size_t) override { throwMustBeDecompressed(); }
|
||||
void insertData(const char *, size_t) override { throwMustBeDecompressed(); }
|
||||
void insertDefault() override { throwMustBeDecompressed(); }
|
||||
void popBack(size_t) override { throwMustBeDecompressed(); }
|
||||
StringRef serializeValueIntoArena(size_t, Arena &, char const *&) const override { throwMustBeDecompressed(); }
|
||||
const char * deserializeAndInsertFromArena(const char *) override { throwMustBeDecompressed(); }
|
||||
void updateHashWithValue(size_t, SipHash &) const override { throwMustBeDecompressed(); }
|
||||
void updateWeakHash32(WeakHash32 &) const override { throwMustBeDecompressed(); }
|
||||
void updateHashFast(SipHash &) const override { throwMustBeDecompressed(); }
|
||||
ColumnPtr filter(const Filter &, ssize_t) const override { throwMustBeDecompressed(); }
|
||||
ColumnPtr permute(const Permutation &, size_t) const override { throwMustBeDecompressed(); }
|
||||
ColumnPtr index(const IColumn &, size_t) const override { throwMustBeDecompressed(); }
|
||||
int compareAt(size_t, size_t, const IColumn &, int) const override { throwMustBeDecompressed(); }
|
||||
void compareColumn(const IColumn &, size_t, PaddedPODArray<UInt64> *, PaddedPODArray<Int8> &, int, int) const override
|
||||
{
|
||||
throwMustBeDecompressed();
|
||||
}
|
||||
void getPermutation(bool, size_t, int, Permutation &) const override { throwMustBeDecompressed(); }
|
||||
void updatePermutation(bool, size_t, int, Permutation &, EqualRanges &) const override { throwMustBeDecompressed(); }
|
||||
ColumnPtr replicate(const Offsets &) const override { throwMustBeDecompressed(); }
|
||||
MutableColumns scatter(ColumnIndex, const Selector &) const override { throwMustBeDecompressed(); }
|
||||
void gather(ColumnGathererStream &) override { throwMustBeDecompressed(); }
|
||||
void getExtremes(Field &, Field &) const override { throwMustBeDecompressed(); }
|
||||
size_t byteSizeAt(size_t) const override { throwMustBeDecompressed(); }
|
||||
|
||||
protected:
|
||||
size_t rows;
|
||||
size_t bytes;
|
||||
|
||||
Lazy lazy;
|
||||
|
||||
private:
|
||||
[[noreturn]] void throwMustBeDecompressed() const
|
||||
{
|
||||
throw Exception("ColumnCompressed must be decompressed before use", ErrorCodes::LOGICAL_ERROR);
|
||||
}
|
||||
};
|
||||
|
||||
}
|
||||
|
@ -14,6 +14,7 @@
|
||||
|
||||
#include <Columns/ColumnsCommon.h>
|
||||
#include <Columns/ColumnDecimal.h>
|
||||
#include <Columns/ColumnCompressed.h>
|
||||
#include <DataStreams/ColumnGathererStream.h>
|
||||
|
||||
|
||||
@ -346,6 +347,30 @@ void ColumnDecimal<T>::gather(ColumnGathererStream & gatherer)
|
||||
gatherer.gather(*this);
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
ColumnPtr ColumnDecimal<T>::compress() const
|
||||
{
|
||||
size_t source_size = data.size() * sizeof(T);
|
||||
|
||||
/// Don't compress small blocks.
|
||||
if (source_size < 4096) /// A wild guess.
|
||||
return ColumnCompressed::wrap(this->getPtr());
|
||||
|
||||
auto compressed = ColumnCompressed::compressBuffer(data.data(), source_size, false);
|
||||
|
||||
if (!compressed)
|
||||
return ColumnCompressed::wrap(this->getPtr());
|
||||
|
||||
return ColumnCompressed::create(data.size(), compressed->size(),
|
||||
[compressed = std::move(compressed), column_size = data.size(), scale = this->scale]
|
||||
{
|
||||
auto res = ColumnDecimal<T>::create(column_size, scale);
|
||||
ColumnCompressed::decompressBuffer(
|
||||
compressed->data(), res->getData().data(), compressed->size(), column_size * sizeof(T));
|
||||
return res;
|
||||
});
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
void ColumnDecimal<T>::getExtremes(Field & min, Field & max) const
|
||||
{
|
||||
|
@ -172,6 +172,8 @@ public:
|
||||
return false;
|
||||
}
|
||||
|
||||
ColumnPtr compress() const override;
|
||||
|
||||
|
||||
void insertValue(const T value) { data.push_back(value); }
|
||||
Container & getData() { return data; }
|
||||
|
@ -1,6 +1,7 @@
|
||||
#include <Columns/ColumnFixedString.h>
|
||||
|
||||
#include <Columns/ColumnsCommon.h>
|
||||
#include <Columns/ColumnCompressed.h>
|
||||
|
||||
#include <DataStreams/ColumnGathererStream.h>
|
||||
#include <IO/WriteHelpers.h>
|
||||
#include <Common/Arena.h>
|
||||
@ -446,18 +447,31 @@ void ColumnFixedString::getExtremes(Field & min, Field & max) const
|
||||
get(max_idx, max);
|
||||
}
|
||||
|
||||
void ColumnFixedString::alignStringLength(ColumnFixedString::Chars & data, size_t n, size_t old_size)
|
||||
ColumnPtr ColumnFixedString::compress() const
|
||||
{
|
||||
size_t length = data.size() - old_size;
|
||||
if (length < n)
|
||||
{
|
||||
data.resize_fill(old_size + n);
|
||||
}
|
||||
else if (length > n)
|
||||
{
|
||||
data.resize_assume_reserved(old_size);
|
||||
throw Exception("Too large value for FixedString(" + std::to_string(n) + ")", ErrorCodes::TOO_LARGE_STRING_SIZE);
|
||||
}
|
||||
size_t source_size = chars.size();
|
||||
|
||||
/// Don't compress small blocks.
|
||||
if (source_size < 4096) /// A wild guess.
|
||||
return ColumnCompressed::wrap(this->getPtr());
|
||||
|
||||
auto compressed = ColumnCompressed::compressBuffer(chars.data(), source_size, false);
|
||||
|
||||
if (!compressed)
|
||||
return ColumnCompressed::wrap(this->getPtr());
|
||||
|
||||
size_t column_size = size();
|
||||
|
||||
return ColumnCompressed::create(column_size, compressed->size(),
|
||||
[compressed = std::move(compressed), column_size, n = n]
|
||||
{
|
||||
size_t chars_size = n * column_size;
|
||||
auto res = ColumnFixedString::create(n);
|
||||
res->getChars().resize(chars_size);
|
||||
ColumnCompressed::decompressBuffer(
|
||||
compressed->data(), res->getChars().data(), compressed->size(), chars_size);
|
||||
return res;
|
||||
});
|
||||
}
|
||||
|
||||
}
|
||||
|
@ -156,6 +156,8 @@ public:
|
||||
|
||||
void gather(ColumnGathererStream & gatherer_stream) override;
|
||||
|
||||
ColumnPtr compress() const override;
|
||||
|
||||
void reserve(size_t size) override
|
||||
{
|
||||
chars.reserve(n * size);
|
||||
@ -182,8 +184,6 @@ public:
|
||||
const Chars & getChars() const { return chars; }
|
||||
|
||||
size_t getN() const { return n; }
|
||||
|
||||
static void alignStringLength(ColumnFixedString::Chars & data, size_t n, size_t old_size);
|
||||
};
|
||||
|
||||
}
|
||||
|
@ -1,4 +1,5 @@
|
||||
#include <Columns/ColumnMap.h>
|
||||
#include <Columns/ColumnCompressed.h>
|
||||
#include <Columns/IColumnImpl.h>
|
||||
#include <DataStreams/ColumnGathererStream.h>
|
||||
#include <IO/WriteBufferFromString.h>
|
||||
@ -228,7 +229,21 @@ void ColumnMap::protect()
|
||||
|
||||
void ColumnMap::getExtremes(Field & min, Field & max) const
|
||||
{
|
||||
nested->getExtremes(min, max);
|
||||
Field nested_min;
|
||||
Field nested_max;
|
||||
|
||||
nested->getExtremes(nested_min, nested_max);
|
||||
|
||||
/// Convert result Array fields to Map fields because client expect min and max field to have type Map
|
||||
|
||||
Array nested_min_value = nested_min.get<Array>();
|
||||
Array nested_max_value = nested_max.get<Array>();
|
||||
|
||||
Map map_min_value(nested_min_value.begin(), nested_min_value.end());
|
||||
Map map_max_value(nested_max_value.begin(), nested_max_value.end());
|
||||
|
||||
min = std::move(map_min_value);
|
||||
max = std::move(map_max_value);
|
||||
}
|
||||
|
||||
void ColumnMap::forEachSubcolumn(ColumnCallback callback)
|
||||
@ -243,4 +258,13 @@ bool ColumnMap::structureEquals(const IColumn & rhs) const
|
||||
return false;
|
||||
}
|
||||
|
||||
ColumnPtr ColumnMap::compress() const
|
||||
{
|
||||
auto compressed = nested->compress();
|
||||
return ColumnCompressed::create(size(), compressed->byteSize(), [compressed = std::move(compressed)]
|
||||
{
|
||||
return ColumnMap::create(compressed->decompress());
|
||||
});
|
||||
}
|
||||
|
||||
}
|
||||
|
@ -91,6 +91,8 @@ public:
|
||||
|
||||
const ColumnTuple & getNestedData() const { return assert_cast<const ColumnTuple &>(getNestedColumn().getData()); }
|
||||
ColumnTuple & getNestedData() { return assert_cast<ColumnTuple &>(getNestedColumn().getData()); }
|
||||
|
||||
ColumnPtr compress() const override;
|
||||
};
|
||||
|
||||
}
|
||||
|
@ -7,6 +7,7 @@
|
||||
#include <Columns/ColumnNullable.h>
|
||||
#include <Columns/ColumnConst.h>
|
||||
#include <Columns/ColumnString.h>
|
||||
#include <Columns/ColumnCompressed.h>
|
||||
#include <DataStreams/ColumnGathererStream.h>
|
||||
|
||||
|
||||
@ -511,6 +512,20 @@ void ColumnNullable::protect()
|
||||
getNullMapColumn().protect();
|
||||
}
|
||||
|
||||
ColumnPtr ColumnNullable::compress() const
|
||||
{
|
||||
ColumnPtr nested_compressed = nested_column->compress();
|
||||
ColumnPtr null_map_compressed = null_map->compress();
|
||||
|
||||
size_t byte_size = nested_column->byteSize() + null_map->byteSize();
|
||||
|
||||
return ColumnCompressed::create(size(), byte_size,
|
||||
[nested_column = std::move(nested_column), null_map = std::move(null_map)]
|
||||
{
|
||||
return ColumnNullable::create(nested_column->decompress(), null_map->decompress());
|
||||
});
|
||||
}
|
||||
|
||||
|
||||
namespace
|
||||
{
|
||||
|
@ -117,6 +117,8 @@ public:
|
||||
|
||||
void gather(ColumnGathererStream & gatherer_stream) override;
|
||||
|
||||
ColumnPtr compress() const override;
|
||||
|
||||
void forEachSubcolumn(ColumnCallback callback) override
|
||||
{
|
||||
callback(nested_column);
|
||||
|
@ -2,6 +2,7 @@
|
||||
|
||||
#include <Columns/Collator.h>
|
||||
#include <Columns/ColumnsCommon.h>
|
||||
#include <Columns/ColumnCompressed.h>
|
||||
#include <DataStreams/ColumnGathererStream.h>
|
||||
#include <Common/Arena.h>
|
||||
#include <Common/HashTable/Hash.h>
|
||||
@ -525,6 +526,47 @@ void ColumnString::getExtremes(Field & min, Field & max) const
|
||||
}
|
||||
|
||||
|
||||
ColumnPtr ColumnString::compress() const
|
||||
{
|
||||
size_t source_chars_size = chars.size();
|
||||
size_t source_offsets_size = offsets.size() * sizeof(Offset);
|
||||
|
||||
/// Don't compress small blocks.
|
||||
if (source_chars_size < 4096) /// A wild guess.
|
||||
return ColumnCompressed::wrap(this->getPtr());
|
||||
|
||||
auto chars_compressed = ColumnCompressed::compressBuffer(chars.data(), source_chars_size, false);
|
||||
|
||||
/// Return original column if not compressible.
|
||||
if (!chars_compressed)
|
||||
return ColumnCompressed::wrap(this->getPtr());
|
||||
|
||||
auto offsets_compressed = ColumnCompressed::compressBuffer(offsets.data(), source_offsets_size, true);
|
||||
|
||||
return ColumnCompressed::create(offsets.size(), chars_compressed->size() + offsets_compressed->size(),
|
||||
[
|
||||
chars_compressed = std::move(chars_compressed),
|
||||
offsets_compressed = std::move(offsets_compressed),
|
||||
source_chars_size,
|
||||
source_offsets_elements = offsets.size()
|
||||
]
|
||||
{
|
||||
auto res = ColumnString::create();
|
||||
|
||||
res->getChars().resize(source_chars_size);
|
||||
res->getOffsets().resize(source_offsets_elements);
|
||||
|
||||
ColumnCompressed::decompressBuffer(
|
||||
chars_compressed->data(), res->getChars().data(), chars_compressed->size(), source_chars_size);
|
||||
|
||||
ColumnCompressed::decompressBuffer(
|
||||
offsets_compressed->data(), res->getOffsets().data(), offsets_compressed->size(), source_offsets_elements * sizeof(Offset));
|
||||
|
||||
return res;
|
||||
});
|
||||
}
|
||||
|
||||
|
||||
int ColumnString::compareAtWithCollation(size_t n, size_t m, const IColumn & rhs_, int, const Collator & collator) const
|
||||
{
|
||||
const ColumnString & rhs = assert_cast<const ColumnString &>(rhs_);
|
||||
|
@ -261,6 +261,8 @@ public:
|
||||
|
||||
void gather(ColumnGathererStream & gatherer_stream) override;
|
||||
|
||||
ColumnPtr compress() const override;
|
||||
|
||||
void reserve(size_t n) override;
|
||||
|
||||
void getExtremes(Field & min, Field & max) const override;
|
||||
|
@ -1,6 +1,7 @@
|
||||
#include <Columns/ColumnTuple.h>
|
||||
|
||||
#include <Columns/IColumnImpl.h>
|
||||
#include <Columns/ColumnCompressed.h>
|
||||
#include <Core/Field.h>
|
||||
#include <DataStreams/ColumnGathererStream.h>
|
||||
#include <IO/Operators.h>
|
||||
@ -486,7 +487,7 @@ bool ColumnTuple::structureEquals(const IColumn & rhs) const
|
||||
|
||||
bool ColumnTuple::isCollationSupported() const
|
||||
{
|
||||
for (const auto& column : columns)
|
||||
for (const auto & column : columns)
|
||||
{
|
||||
if (column->isCollationSupported())
|
||||
return true;
|
||||
@ -495,4 +496,25 @@ bool ColumnTuple::isCollationSupported() const
|
||||
}
|
||||
|
||||
|
||||
ColumnPtr ColumnTuple::compress() const
|
||||
{
|
||||
size_t byte_size = 0;
|
||||
Columns compressed;
|
||||
compressed.reserve(columns.size());
|
||||
for (const auto & column : columns)
|
||||
{
|
||||
auto compressed_column = column->compress();
|
||||
byte_size += compressed_column->byteSize();
|
||||
compressed.emplace_back(std::move(compressed_column));
|
||||
}
|
||||
|
||||
return ColumnCompressed::create(size(), byte_size,
|
||||
[compressed = std::move(compressed)]() mutable
|
||||
{
|
||||
for (auto & column : compressed)
|
||||
column = column->decompress();
|
||||
return ColumnTuple::create(compressed);
|
||||
});
|
||||
}
|
||||
|
||||
}
|
||||
|
@ -89,6 +89,7 @@ public:
|
||||
void forEachSubcolumn(ColumnCallback callback) override;
|
||||
bool structureEquals(const IColumn & rhs) const override;
|
||||
bool isCollationSupported() const override;
|
||||
ColumnPtr compress() const override;
|
||||
|
||||
size_t tupleSize() const { return columns.size(); }
|
||||
|
||||
|
@ -28,13 +28,18 @@ namespace ErrorCodes
|
||||
extern const int ILLEGAL_COLUMN;
|
||||
}
|
||||
|
||||
/** Stores another column with unique values
|
||||
* and also an index that allows to find position by value.
|
||||
*
|
||||
* This column is not used on it's own but only as implementation detail of ColumnLowCardinality.
|
||||
*/
|
||||
template <typename ColumnType>
|
||||
class ColumnUnique final : public COWHelper<IColumnUnique, ColumnUnique<ColumnType>>
|
||||
{
|
||||
friend class COWHelper<IColumnUnique, ColumnUnique<ColumnType>>;
|
||||
|
||||
private:
|
||||
explicit ColumnUnique(MutableColumnPtr && holder, bool is_nullable);
|
||||
ColumnUnique(MutableColumnPtr && holder, bool is_nullable);
|
||||
explicit ColumnUnique(const IDataType & type);
|
||||
ColumnUnique(const ColumnUnique & other);
|
||||
|
||||
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue
Block a user