mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-11-21 23:21:59 +00:00
Merge remote-tracking branch 'origin/master' into HEAD
This commit is contained in:
commit
a8f0fd1b26
4
.github/codecov.yml
vendored
4
.github/codecov.yml
vendored
@ -1,5 +1,5 @@
|
||||
codecov:
|
||||
max_report_age: off
|
||||
max_report_age: "off"
|
||||
strict_yaml_branch: "master"
|
||||
|
||||
ignore:
|
||||
@ -14,4 +14,4 @@ ignore:
|
||||
comment: false
|
||||
|
||||
github_checks:
|
||||
annotations: false
|
||||
annotations: false
|
||||
|
38
.github/workflows/anchore-analysis.yml
vendored
38
.github/workflows/anchore-analysis.yml
vendored
@ -8,9 +8,9 @@
|
||||
|
||||
name: Docker Container Scan (clickhouse-server)
|
||||
|
||||
on:
|
||||
"on":
|
||||
pull_request:
|
||||
paths:
|
||||
paths:
|
||||
- docker/server/Dockerfile
|
||||
- .github/workflows/anchore-analysis.yml
|
||||
schedule:
|
||||
@ -20,20 +20,20 @@ jobs:
|
||||
Anchore-Build-Scan:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Checkout the code
|
||||
uses: actions/checkout@v2
|
||||
- name: Build the Docker image
|
||||
run: |
|
||||
cd docker/server
|
||||
perl -pi -e 's|=\$version||g' Dockerfile
|
||||
docker build . --file Dockerfile --tag localbuild/testimage:latest
|
||||
- name: Run the local Anchore scan action itself with GitHub Advanced Security code scanning integration enabled
|
||||
uses: anchore/scan-action@v2
|
||||
id: scan
|
||||
with:
|
||||
image: "localbuild/testimage:latest"
|
||||
acs-report-enable: true
|
||||
- name: Upload Anchore Scan Report
|
||||
uses: github/codeql-action/upload-sarif@v1
|
||||
with:
|
||||
sarif_file: ${{ steps.scan.outputs.sarif }}
|
||||
- name: Checkout the code
|
||||
uses: actions/checkout@v2
|
||||
- name: Build the Docker image
|
||||
run: |
|
||||
cd docker/server
|
||||
perl -pi -e 's|=\$version||g' Dockerfile
|
||||
docker build . --file Dockerfile --tag localbuild/testimage:latest
|
||||
- name: Run the local Anchore scan action itself with GitHub Advanced Security code scanning integration enabled
|
||||
uses: anchore/scan-action@v2
|
||||
id: scan
|
||||
with:
|
||||
image: "localbuild/testimage:latest"
|
||||
acs-report-enable: true
|
||||
- name: Upload Anchore Scan Report
|
||||
uses: github/codeql-action/upload-sarif@v1
|
||||
with:
|
||||
sarif_file: ${{ steps.scan.outputs.sarif }}
|
||||
|
6
.gitignore
vendored
6
.gitignore
vendored
@ -137,3 +137,9 @@ website/package-lock.json
|
||||
/prof
|
||||
|
||||
*.iml
|
||||
|
||||
# data store
|
||||
/programs/server/data
|
||||
/programs/server/metadata
|
||||
/programs/server/store
|
||||
|
||||
|
2
.gitmodules
vendored
2
.gitmodules
vendored
@ -220,4 +220,4 @@
|
||||
url = https://github.com/ClickHouse-Extras/boringssl.git
|
||||
[submodule "contrib/NuRaft"]
|
||||
path = contrib/NuRaft
|
||||
url = https://github.com/eBay/NuRaft.git
|
||||
url = https://github.com/ClickHouse-Extras/NuRaft.git
|
||||
|
16
.potato.yml
16
.potato.yml
@ -14,14 +14,14 @@ handlers:
|
||||
# The trigger for creating the Yandex.Tracker issue. When the specified event occurs, it transfers PR data to Yandex.Tracker.
|
||||
github:pullRequest:labeled:
|
||||
data:
|
||||
# The Yandex.Tracker queue to create the issue in. Each issue in Tracker belongs to one of the project queues.
|
||||
queue: CLICKHOUSEDOCS
|
||||
# The issue title.
|
||||
summary: '[Potato] Pull Request #{{pullRequest.number}}'
|
||||
# The issue description.
|
||||
description: >
|
||||
# The Yandex.Tracker queue to create the issue in. Each issue in Tracker belongs to one of the project queues.
|
||||
queue: CLICKHOUSEDOCS
|
||||
# The issue title.
|
||||
summary: '[Potato] Pull Request #{{pullRequest.number}}'
|
||||
# The issue description.
|
||||
description: >
|
||||
{{pullRequest.description}}
|
||||
|
||||
Ссылка на Pull Request: {{pullRequest.webUrl}}
|
||||
# The condition for creating the Yandex.Tracker issue.
|
||||
condition: eventPayload.labels.filter(label => ['pr-feature'].includes(label.name)).length
|
||||
# The condition for creating the Yandex.Tracker issue.
|
||||
condition: eventPayload.labels.filter(label => ['pr-feature'].includes(label.name)).length
|
||||
|
15
.yamllint
Normal file
15
.yamllint
Normal file
@ -0,0 +1,15 @@
|
||||
# vi: ft=yaml
|
||||
extends: default
|
||||
|
||||
rules:
|
||||
indentation:
|
||||
level: warning
|
||||
indent-sequences: consistent
|
||||
line-length:
|
||||
# there are some bash -c "", so this is OK
|
||||
max: 300
|
||||
level: warning
|
||||
comments:
|
||||
min-spaces-from-content: 1
|
||||
document-start:
|
||||
present: false
|
@ -13,6 +13,3 @@ ClickHouse® is an open-source column-oriented database management system that a
|
||||
* [Code Browser](https://clickhouse.tech/codebrowser/html_report/ClickHouse/index.html) with syntax highlight and navigation.
|
||||
* [Contacts](https://clickhouse.tech/#contacts) can help to get your questions answered if there are any.
|
||||
* You can also [fill this form](https://clickhouse.tech/#meet) to meet Yandex ClickHouse team in person.
|
||||
|
||||
## Upcoming Events
|
||||
* [Chinese ClickHouse Meetup (online)](http://hdxu.cn/8KxZE) on 6 February 2021.
|
||||
|
@ -7,6 +7,7 @@
|
||||
#include <ctime>
|
||||
#include <string>
|
||||
|
||||
|
||||
#define DATE_LUT_MAX (0xFFFFFFFFU - 86400)
|
||||
#define DATE_LUT_MAX_DAY_NUM (0xFFFFFFFFU / 86400)
|
||||
/// Table size is bigger than DATE_LUT_MAX_DAY_NUM to fill all indices within UInt16 range: this allows to remove extra check.
|
||||
@ -249,7 +250,7 @@ public:
|
||||
{
|
||||
DayNum index = findIndex(t);
|
||||
|
||||
if (unlikely(index == 0))
|
||||
if (unlikely(index == 0 || index > DATE_LUT_MAX_DAY_NUM))
|
||||
return t + offset_at_start_of_epoch;
|
||||
|
||||
time_t res = t - lut[index].date;
|
||||
@ -264,18 +265,18 @@ public:
|
||||
{
|
||||
DayNum index = findIndex(t);
|
||||
|
||||
/// If it is not 1970 year (findIndex found nothing appropriate),
|
||||
/// than limit number of hours to avoid insane results like 1970-01-01 89:28:15
|
||||
if (unlikely(index == 0))
|
||||
/// If it is overflow case,
|
||||
/// then limit number of hours to avoid insane results like 1970-01-01 89:28:15
|
||||
if (unlikely(index == 0 || index > DATE_LUT_MAX_DAY_NUM))
|
||||
return static_cast<unsigned>((t + offset_at_start_of_epoch) / 3600) % 24;
|
||||
|
||||
time_t res = t - lut[index].date;
|
||||
time_t time = t - lut[index].date;
|
||||
|
||||
/// Data is cleaned to avoid possibility of underflow.
|
||||
if (res >= lut[index].time_at_offset_change)
|
||||
res += lut[index].amount_of_offset_change;
|
||||
if (time >= lut[index].time_at_offset_change)
|
||||
time += lut[index].amount_of_offset_change;
|
||||
|
||||
return res / 3600;
|
||||
unsigned res = time / 3600;
|
||||
return res <= 23 ? res : 0;
|
||||
}
|
||||
|
||||
/** Calculating offset from UTC in seconds.
|
||||
@ -314,15 +315,21 @@ public:
|
||||
* each minute, with added or subtracted leap second, spans exactly 60 unix timestamps.
|
||||
*/
|
||||
|
||||
inline unsigned toSecond(time_t t) const { return t % 60; }
|
||||
inline unsigned toSecond(time_t t) const { return UInt32(t) % 60; }
|
||||
|
||||
inline unsigned toMinute(time_t t) const
|
||||
{
|
||||
if (offset_is_whole_number_of_hours_everytime)
|
||||
return (t / 60) % 60;
|
||||
return (UInt32(t) / 60) % 60;
|
||||
|
||||
UInt32 date = find(t).date;
|
||||
return (UInt32(t) - date) / 60 % 60;
|
||||
/// To consider the DST changing situation within this day.
|
||||
/// also make the special timezones with no whole hour offset such as 'Australia/Lord_Howe' been taken into account
|
||||
DayNum index = findIndex(t);
|
||||
UInt32 res = t - lut[index].date;
|
||||
if (lut[index].amount_of_offset_change != 0 && t >= lut[index].date + lut[index].time_at_offset_change)
|
||||
res += lut[index].amount_of_offset_change;
|
||||
|
||||
return res / 60 % 60;
|
||||
}
|
||||
|
||||
inline time_t toStartOfMinute(time_t t) const { return t / 60 * 60; }
|
||||
@ -555,9 +562,7 @@ public:
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* check and change mode to effective
|
||||
*/
|
||||
/// Check and change mode to effective.
|
||||
inline UInt8 check_week_mode(UInt8 mode) const
|
||||
{
|
||||
UInt8 week_format = (mode & 7);
|
||||
@ -566,10 +571,9 @@ public:
|
||||
return week_format;
|
||||
}
|
||||
|
||||
/*
|
||||
* Calc weekday from d
|
||||
* Returns 0 for monday, 1 for tuesday ...
|
||||
*/
|
||||
/** Calculate weekday from d.
|
||||
* Returns 0 for monday, 1 for tuesday...
|
||||
*/
|
||||
inline unsigned calc_weekday(DayNum d, bool sunday_first_day_of_week) const
|
||||
{
|
||||
if (!sunday_first_day_of_week)
|
||||
@ -578,7 +582,7 @@ public:
|
||||
return toDayOfWeek(DayNum(d + 1)) - 1;
|
||||
}
|
||||
|
||||
/* Calc days in one year. */
|
||||
/// Calculate days in one year.
|
||||
inline unsigned calc_days_in_year(UInt16 year) const
|
||||
{
|
||||
return ((year & 3) == 0 && (year % 100 || (year % 400 == 0 && year)) ? 366 : 365);
|
||||
|
@ -1,9 +1,30 @@
|
||||
#pragma once
|
||||
|
||||
#include <common/extended_types.h>
|
||||
#include <common/defines.h>
|
||||
|
||||
|
||||
namespace common
|
||||
{
|
||||
/// Multiply and ignore overflow.
|
||||
template <typename T1, typename T2>
|
||||
inline auto NO_SANITIZE_UNDEFINED mulIgnoreOverflow(T1 x, T2 y)
|
||||
{
|
||||
return x * y;
|
||||
}
|
||||
|
||||
template <typename T1, typename T2>
|
||||
inline auto NO_SANITIZE_UNDEFINED addIgnoreOverflow(T1 x, T2 y)
|
||||
{
|
||||
return x + y;
|
||||
}
|
||||
|
||||
template <typename T1, typename T2>
|
||||
inline auto NO_SANITIZE_UNDEFINED subIgnoreOverflow(T1 x, T2 y)
|
||||
{
|
||||
return x - y;
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
inline bool addOverflow(T x, T y, T & res)
|
||||
{
|
||||
@ -33,14 +54,14 @@ namespace common
|
||||
{
|
||||
static constexpr __int128 min_int128 = minInt128();
|
||||
static constexpr __int128 max_int128 = maxInt128();
|
||||
res = x + y;
|
||||
res = addIgnoreOverflow(x, y);
|
||||
return (y > 0 && x > max_int128 - y) || (y < 0 && x < min_int128 - y);
|
||||
}
|
||||
|
||||
template <>
|
||||
inline bool addOverflow(wInt256 x, wInt256 y, wInt256 & res)
|
||||
{
|
||||
res = x + y;
|
||||
res = addIgnoreOverflow(x, y);
|
||||
return (y > 0 && x > std::numeric_limits<wInt256>::max() - y) ||
|
||||
(y < 0 && x < std::numeric_limits<wInt256>::min() - y);
|
||||
}
|
||||
@ -48,7 +69,7 @@ namespace common
|
||||
template <>
|
||||
inline bool addOverflow(wUInt256 x, wUInt256 y, wUInt256 & res)
|
||||
{
|
||||
res = x + y;
|
||||
res = addIgnoreOverflow(x, y);
|
||||
return x > std::numeric_limits<wUInt256>::max() - y;
|
||||
}
|
||||
|
||||
@ -81,14 +102,14 @@ namespace common
|
||||
{
|
||||
static constexpr __int128 min_int128 = minInt128();
|
||||
static constexpr __int128 max_int128 = maxInt128();
|
||||
res = x - y;
|
||||
res = subIgnoreOverflow(x, y);
|
||||
return (y < 0 && x > max_int128 + y) || (y > 0 && x < min_int128 + y);
|
||||
}
|
||||
|
||||
template <>
|
||||
inline bool subOverflow(wInt256 x, wInt256 y, wInt256 & res)
|
||||
{
|
||||
res = x - y;
|
||||
res = subIgnoreOverflow(x, y);
|
||||
return (y < 0 && x > std::numeric_limits<wInt256>::max() + y) ||
|
||||
(y > 0 && x < std::numeric_limits<wInt256>::min() + y);
|
||||
}
|
||||
@ -96,7 +117,7 @@ namespace common
|
||||
template <>
|
||||
inline bool subOverflow(wUInt256 x, wUInt256 y, wUInt256 & res)
|
||||
{
|
||||
res = x - y;
|
||||
res = subIgnoreOverflow(x, y);
|
||||
return x < y;
|
||||
}
|
||||
|
||||
@ -127,33 +148,33 @@ namespace common
|
||||
template <>
|
||||
inline bool mulOverflow(__int128 x, __int128 y, __int128 & res)
|
||||
{
|
||||
res = static_cast<unsigned __int128>(x) * static_cast<unsigned __int128>(y); /// Avoid signed integer overflow.
|
||||
res = mulIgnoreOverflow(x, y);
|
||||
if (!x || !y)
|
||||
return false;
|
||||
|
||||
unsigned __int128 a = (x > 0) ? x : -x;
|
||||
unsigned __int128 b = (y > 0) ? y : -y;
|
||||
return (a * b) / b != a;
|
||||
return mulIgnoreOverflow(a, b) / b != a;
|
||||
}
|
||||
|
||||
template <>
|
||||
inline bool mulOverflow(wInt256 x, wInt256 y, wInt256 & res)
|
||||
{
|
||||
res = x * y;
|
||||
res = mulIgnoreOverflow(x, y);
|
||||
if (!x || !y)
|
||||
return false;
|
||||
|
||||
wInt256 a = (x > 0) ? x : -x;
|
||||
wInt256 b = (y > 0) ? y : -y;
|
||||
return (a * b) / b != a;
|
||||
return mulIgnoreOverflow(a, b) / b != a;
|
||||
}
|
||||
|
||||
template <>
|
||||
inline bool mulOverflow(wUInt256 x, wUInt256 y, wUInt256 & res)
|
||||
{
|
||||
res = x * y;
|
||||
res = mulIgnoreOverflow(x, y);
|
||||
if (!x || !y)
|
||||
return false;
|
||||
return (x * y) / y != x;
|
||||
return res / y != x;
|
||||
}
|
||||
}
|
||||
|
@ -1,5 +1,20 @@
|
||||
#pragma once
|
||||
|
||||
/// __has_feature supported only by clang.
|
||||
///
|
||||
/// But libcxx/libcxxabi overrides it to 0,
|
||||
/// thus the checks for __has_feature will be wrong.
|
||||
///
|
||||
/// NOTE:
|
||||
/// - __has_feature cannot be simply undefined,
|
||||
/// since this will be broken if some C++ header will be included after
|
||||
/// including <common/defines.h>
|
||||
/// - it should not have fallback to 0,
|
||||
/// since this may create false-positive detection (common problem)
|
||||
#if defined(__clang__) && defined(__has_feature)
|
||||
# define ch_has_feature __has_feature
|
||||
#endif
|
||||
|
||||
#if defined(_MSC_VER)
|
||||
# if !defined(likely)
|
||||
# define likely(x) (x)
|
||||
@ -32,8 +47,8 @@
|
||||
|
||||
/// Check for presence of address sanitizer
|
||||
#if !defined(ADDRESS_SANITIZER)
|
||||
# if defined(__has_feature)
|
||||
# if __has_feature(address_sanitizer)
|
||||
# if defined(ch_has_feature)
|
||||
# if ch_has_feature(address_sanitizer)
|
||||
# define ADDRESS_SANITIZER 1
|
||||
# endif
|
||||
# elif defined(__SANITIZE_ADDRESS__)
|
||||
@ -42,8 +57,8 @@
|
||||
#endif
|
||||
|
||||
#if !defined(THREAD_SANITIZER)
|
||||
# if defined(__has_feature)
|
||||
# if __has_feature(thread_sanitizer)
|
||||
# if defined(ch_has_feature)
|
||||
# if ch_has_feature(thread_sanitizer)
|
||||
# define THREAD_SANITIZER 1
|
||||
# endif
|
||||
# elif defined(__SANITIZE_THREAD__)
|
||||
@ -52,8 +67,8 @@
|
||||
#endif
|
||||
|
||||
#if !defined(MEMORY_SANITIZER)
|
||||
# if defined(__has_feature)
|
||||
# if __has_feature(memory_sanitizer)
|
||||
# if defined(ch_has_feature)
|
||||
# if ch_has_feature(memory_sanitizer)
|
||||
# define MEMORY_SANITIZER 1
|
||||
# endif
|
||||
# elif defined(__MEMORY_SANITIZER__)
|
||||
|
@ -15,11 +15,11 @@
|
||||
#endif
|
||||
|
||||
#define __msan_unpoison(X, Y) // NOLINT
|
||||
#if defined(__has_feature)
|
||||
# if __has_feature(memory_sanitizer)
|
||||
# undef __msan_unpoison
|
||||
# include <sanitizer/msan_interface.h>
|
||||
# endif
|
||||
#if defined(ch_has_feature)
|
||||
# if ch_has_feature(memory_sanitizer)
|
||||
# undef __msan_unpoison
|
||||
# include <sanitizer/msan_interface.h>
|
||||
# endif
|
||||
#endif
|
||||
|
||||
#include <link.h>
|
||||
|
@ -152,7 +152,7 @@ static void signalHandler(int sig, siginfo_t * info, void * context)
|
||||
if (sig != SIGTSTP) /// This signal is used for debugging.
|
||||
{
|
||||
/// The time that is usually enough for separate thread to print info into log.
|
||||
sleepForSeconds(10);
|
||||
sleepForSeconds(20); /// FIXME: use some feedback from threads that process stacktrace
|
||||
call_default_signal_handler(sig);
|
||||
}
|
||||
|
||||
@ -311,7 +311,8 @@ private:
|
||||
if (stack_trace.getSize())
|
||||
{
|
||||
/// Write bare stack trace (addresses) just in case if we will fail to print symbolized stack trace.
|
||||
/// NOTE This still require memory allocations and mutex lock inside logger. BTW we can also print it to stderr using write syscalls.
|
||||
/// NOTE: This still require memory allocations and mutex lock inside logger.
|
||||
/// BTW we can also print it to stderr using write syscalls.
|
||||
|
||||
std::stringstream bare_stacktrace;
|
||||
bare_stacktrace << "Stack trace:";
|
||||
@ -324,7 +325,7 @@ private:
|
||||
/// Write symbolized stack trace line by line for better grep-ability.
|
||||
stack_trace.toStringEveryLine([&](const std::string & s) { LOG_FATAL(log, s); });
|
||||
|
||||
#if defined(__linux__)
|
||||
#if defined(OS_LINUX)
|
||||
/// Write information about binary checksum. It can be difficult to calculate, so do it only after printing stack trace.
|
||||
String calculated_binary_hash = getHashOfLoadedBinaryHex();
|
||||
if (daemon.stored_binary_hash.empty())
|
||||
@ -415,7 +416,9 @@ static void sanitizerDeathCallback()
|
||||
else
|
||||
log_message = "Terminate called without an active exception";
|
||||
|
||||
static const size_t buf_size = 1024;
|
||||
/// POSIX.1 says that write(2)s of less than PIPE_BUF bytes must be atomic - man 7 pipe
|
||||
/// And the buffer should not be too small because our exception messages can be large.
|
||||
static constexpr size_t buf_size = PIPE_BUF;
|
||||
|
||||
if (log_message.size() > buf_size - 16)
|
||||
log_message.resize(buf_size - 16);
|
||||
@ -561,6 +564,7 @@ void debugIncreaseOOMScore()
|
||||
{
|
||||
DB::WriteBufferFromFile buf("/proc/self/oom_score_adj");
|
||||
buf.write(new_score.c_str(), new_score.size());
|
||||
buf.close();
|
||||
}
|
||||
catch (const Poco::Exception & e)
|
||||
{
|
||||
@ -783,7 +787,7 @@ void BaseDaemon::initializeTerminationAndSignalProcessing()
|
||||
/// Setup signal handlers.
|
||||
/// SIGTSTP is added for debugging purposes. To output a stack trace of any running thread at anytime.
|
||||
|
||||
addSignalHandler({SIGABRT, SIGSEGV, SIGILL, SIGBUS, SIGSYS, SIGFPE, SIGPIPE, SIGTSTP}, signalHandler, &handled_signals);
|
||||
addSignalHandler({SIGABRT, SIGSEGV, SIGILL, SIGBUS, SIGSYS, SIGFPE, SIGPIPE, SIGTSTP, SIGTRAP}, signalHandler, &handled_signals);
|
||||
addSignalHandler({SIGHUP, SIGUSR1}, closeLogsSignalHandler, &handled_signals);
|
||||
addSignalHandler({SIGINT, SIGQUIT, SIGTERM}, terminateRequestedSignalHandler, &handled_signals);
|
||||
|
||||
|
@ -83,7 +83,7 @@ public:
|
||||
template <class T>
|
||||
void writeToGraphite(const std::string & key, const T & value, const std::string & config_name = DEFAULT_GRAPHITE_CONFIG_NAME, time_t timestamp = 0, const std::string & custom_root_path = "")
|
||||
{
|
||||
auto writer = getGraphiteWriter(config_name);
|
||||
auto *writer = getGraphiteWriter(config_name);
|
||||
if (writer)
|
||||
writer->write(key, value, timestamp, custom_root_path);
|
||||
}
|
||||
@ -91,7 +91,7 @@ public:
|
||||
template <class T>
|
||||
void writeToGraphite(const GraphiteWriter::KeyValueVector<T> & key_vals, const std::string & config_name = DEFAULT_GRAPHITE_CONFIG_NAME, time_t timestamp = 0, const std::string & custom_root_path = "")
|
||||
{
|
||||
auto writer = getGraphiteWriter(config_name);
|
||||
auto *writer = getGraphiteWriter(config_name);
|
||||
if (writer)
|
||||
writer->write(key_vals, timestamp, custom_root_path);
|
||||
}
|
||||
@ -99,7 +99,7 @@ public:
|
||||
template <class T>
|
||||
void writeToGraphite(const GraphiteWriter::KeyValueVector<T> & key_vals, const std::chrono::system_clock::time_point & current_time, const std::string & custom_root_path)
|
||||
{
|
||||
auto writer = getGraphiteWriter();
|
||||
auto *writer = getGraphiteWriter();
|
||||
if (writer)
|
||||
writer->write(key_vals, std::chrono::system_clock::to_time_t(current_time), custom_root_path);
|
||||
}
|
||||
|
@ -51,10 +51,11 @@ Connection::Connection(
|
||||
const char* ssl_key,
|
||||
unsigned timeout,
|
||||
unsigned rw_timeout,
|
||||
bool enable_local_infile)
|
||||
bool enable_local_infile,
|
||||
bool opt_reconnect)
|
||||
: Connection()
|
||||
{
|
||||
connect(db, server, user, password, port, socket, ssl_ca, ssl_cert, ssl_key, timeout, rw_timeout, enable_local_infile);
|
||||
connect(db, server, user, password, port, socket, ssl_ca, ssl_cert, ssl_key, timeout, rw_timeout, enable_local_infile, opt_reconnect);
|
||||
}
|
||||
|
||||
Connection::Connection(const std::string & config_name)
|
||||
@ -80,7 +81,8 @@ void Connection::connect(const char* db,
|
||||
const char * ssl_key,
|
||||
unsigned timeout,
|
||||
unsigned rw_timeout,
|
||||
bool enable_local_infile)
|
||||
bool enable_local_infile,
|
||||
bool opt_reconnect)
|
||||
{
|
||||
if (is_connected)
|
||||
disconnect();
|
||||
@ -104,9 +106,8 @@ void Connection::connect(const char* db,
|
||||
if (mysql_options(driver.get(), MYSQL_OPT_LOCAL_INFILE, &enable_local_infile_arg))
|
||||
throw ConnectionFailed(errorMessage(driver.get()), mysql_errno(driver.get()));
|
||||
|
||||
/// Enables auto-reconnect.
|
||||
bool reconnect = true;
|
||||
if (mysql_options(driver.get(), MYSQL_OPT_RECONNECT, reinterpret_cast<const char *>(&reconnect)))
|
||||
/// See C API Developer Guide: Automatic Reconnection Control
|
||||
if (mysql_options(driver.get(), MYSQL_OPT_RECONNECT, reinterpret_cast<const char *>(&opt_reconnect)))
|
||||
throw ConnectionFailed(errorMessage(driver.get()), mysql_errno(driver.get()));
|
||||
|
||||
/// Specifies particular ssl key and certificate if it needs
|
||||
|
@ -14,6 +14,8 @@
|
||||
|
||||
/// Disable LOAD DATA LOCAL INFILE because it is insecure
|
||||
#define MYSQLXX_DEFAULT_ENABLE_LOCAL_INFILE false
|
||||
/// See https://dev.mysql.com/doc/c-api/5.7/en/c-api-auto-reconnect.html
|
||||
#define MYSQLXX_DEFAULT_MYSQL_OPT_RECONNECT true
|
||||
|
||||
|
||||
namespace mysqlxx
|
||||
@ -76,7 +78,8 @@ public:
|
||||
const char * ssl_key = "",
|
||||
unsigned timeout = MYSQLXX_DEFAULT_TIMEOUT,
|
||||
unsigned rw_timeout = MYSQLXX_DEFAULT_RW_TIMEOUT,
|
||||
bool enable_local_infile = MYSQLXX_DEFAULT_ENABLE_LOCAL_INFILE);
|
||||
bool enable_local_infile = MYSQLXX_DEFAULT_ENABLE_LOCAL_INFILE,
|
||||
bool opt_reconnect = MYSQLXX_DEFAULT_MYSQL_OPT_RECONNECT);
|
||||
|
||||
/// Creates connection. Can be used if Poco::Util::Application is using.
|
||||
/// All settings will be got from config_name section of configuration.
|
||||
@ -96,7 +99,8 @@ public:
|
||||
const char* ssl_key,
|
||||
unsigned timeout = MYSQLXX_DEFAULT_TIMEOUT,
|
||||
unsigned rw_timeout = MYSQLXX_DEFAULT_RW_TIMEOUT,
|
||||
bool enable_local_infile = MYSQLXX_DEFAULT_ENABLE_LOCAL_INFILE);
|
||||
bool enable_local_infile = MYSQLXX_DEFAULT_ENABLE_LOCAL_INFILE,
|
||||
bool opt_reconnect = MYSQLXX_DEFAULT_MYSQL_OPT_RECONNECT);
|
||||
|
||||
void connect(const std::string & config_name)
|
||||
{
|
||||
@ -112,6 +116,7 @@ public:
|
||||
std::string ssl_cert = cfg.getString(config_name + ".ssl_cert", "");
|
||||
std::string ssl_key = cfg.getString(config_name + ".ssl_key", "");
|
||||
bool enable_local_infile = cfg.getBool(config_name + ".enable_local_infile", MYSQLXX_DEFAULT_ENABLE_LOCAL_INFILE);
|
||||
bool opt_reconnect = cfg.getBool(config_name + ".opt_reconnect", MYSQLXX_DEFAULT_MYSQL_OPT_RECONNECT);
|
||||
|
||||
unsigned timeout =
|
||||
cfg.getInt(config_name + ".connect_timeout",
|
||||
@ -135,7 +140,8 @@ public:
|
||||
ssl_key.c_str(),
|
||||
timeout,
|
||||
rw_timeout,
|
||||
enable_local_infile);
|
||||
enable_local_infile,
|
||||
opt_reconnect);
|
||||
}
|
||||
|
||||
/// If MySQL connection was established.
|
||||
|
@ -26,6 +26,15 @@ struct ConnectionFailed : public Exception
|
||||
};
|
||||
|
||||
|
||||
/// Connection to MySQL server was lost
|
||||
struct ConnectionLost : public Exception
|
||||
{
|
||||
ConnectionLost(const std::string & msg, int code = 0) : Exception(msg, code) {}
|
||||
const char * name() const throw() override { return "mysqlxx::ConnectionLost"; }
|
||||
const char * className() const throw() override { return "mysqlxx::ConnectionLost"; }
|
||||
};
|
||||
|
||||
|
||||
/// Erroneous query.
|
||||
struct BadQuery : public Exception
|
||||
{
|
||||
|
@ -10,7 +10,6 @@
|
||||
|
||||
#include <common/sleep.h>
|
||||
|
||||
#include <Poco/Util/Application.h>
|
||||
#include <Poco/Util/LayeredConfiguration.h>
|
||||
|
||||
|
||||
@ -41,7 +40,9 @@ void Pool::Entry::decrementRefCount()
|
||||
Pool::Pool(const Poco::Util::AbstractConfiguration & cfg, const std::string & config_name,
|
||||
unsigned default_connections_, unsigned max_connections_,
|
||||
const char * parent_config_name_)
|
||||
: default_connections(default_connections_), max_connections(max_connections_)
|
||||
: logger(Poco::Logger::get("mysqlxx::Pool"))
|
||||
, default_connections(default_connections_)
|
||||
, max_connections(max_connections_)
|
||||
{
|
||||
server = cfg.getString(config_name + ".host");
|
||||
|
||||
@ -78,6 +79,9 @@ Pool::Pool(const Poco::Util::AbstractConfiguration & cfg, const std::string & co
|
||||
|
||||
enable_local_infile = cfg.getBool(config_name + ".enable_local_infile",
|
||||
cfg.getBool(parent_config_name + ".enable_local_infile", MYSQLXX_DEFAULT_ENABLE_LOCAL_INFILE));
|
||||
|
||||
opt_reconnect = cfg.getBool(config_name + ".opt_reconnect",
|
||||
cfg.getBool(parent_config_name + ".opt_reconnect", MYSQLXX_DEFAULT_MYSQL_OPT_RECONNECT));
|
||||
}
|
||||
else
|
||||
{
|
||||
@ -96,6 +100,8 @@ Pool::Pool(const Poco::Util::AbstractConfiguration & cfg, const std::string & co
|
||||
|
||||
enable_local_infile = cfg.getBool(
|
||||
config_name + ".enable_local_infile", MYSQLXX_DEFAULT_ENABLE_LOCAL_INFILE);
|
||||
|
||||
opt_reconnect = cfg.getBool(config_name + ".opt_reconnect", MYSQLXX_DEFAULT_MYSQL_OPT_RECONNECT);
|
||||
}
|
||||
|
||||
connect_timeout = cfg.getInt(config_name + ".connect_timeout",
|
||||
@ -125,20 +131,30 @@ Pool::Entry Pool::get()
|
||||
initialize();
|
||||
for (;;)
|
||||
{
|
||||
logger.trace("(%s): Iterating through existing MySQL connections", getDescription());
|
||||
|
||||
for (auto & connection : connections)
|
||||
{
|
||||
if (connection->ref_count == 0)
|
||||
return Entry(connection, this);
|
||||
}
|
||||
|
||||
logger.trace("(%s): Trying to allocate a new connection.", getDescription());
|
||||
if (connections.size() < static_cast<size_t>(max_connections))
|
||||
{
|
||||
Connection * conn = allocConnection();
|
||||
if (conn)
|
||||
return Entry(conn, this);
|
||||
|
||||
logger.trace("(%s): Unable to create a new connection: Allocation failed.", getDescription());
|
||||
}
|
||||
else
|
||||
{
|
||||
logger.trace("(%s): Unable to create a new connection: Max number of connections has been reached.", getDescription());
|
||||
}
|
||||
|
||||
lock.unlock();
|
||||
logger.trace("(%s): Sleeping for %d seconds.", getDescription(), MYSQLXX_POOL_SLEEP_ON_CONNECT_FAIL);
|
||||
sleepForSeconds(MYSQLXX_POOL_SLEEP_ON_CONNECT_FAIL);
|
||||
lock.lock();
|
||||
}
|
||||
@ -162,8 +178,7 @@ Pool::Entry Pool::tryGet()
|
||||
if (res.tryForceConnected()) /// Tries to reestablish connection as well
|
||||
return res;
|
||||
|
||||
auto & logger = Poco::Util::Application::instance().logger();
|
||||
logger.information("Idle connection to mysql server cannot be recovered, dropping it.");
|
||||
logger.debug("(%s): Idle connection to MySQL server cannot be recovered, dropping it.", getDescription());
|
||||
|
||||
/// This one is disconnected, cannot be reestablished and so needs to be disposed of.
|
||||
connection_it = connections.erase(connection_it);
|
||||
@ -186,6 +201,8 @@ Pool::Entry Pool::tryGet()
|
||||
|
||||
void Pool::removeConnection(Connection* connection)
|
||||
{
|
||||
logger.trace("(%s): Removing connection.", getDescription());
|
||||
|
||||
std::lock_guard<std::mutex> lock(mutex);
|
||||
if (connection)
|
||||
{
|
||||
@ -210,8 +227,6 @@ void Pool::Entry::forceConnected() const
|
||||
if (data == nullptr)
|
||||
throw Poco::RuntimeException("Tried to access NULL database connection.");
|
||||
|
||||
Poco::Util::Application & app = Poco::Util::Application::instance();
|
||||
|
||||
bool first = true;
|
||||
while (!tryForceConnected())
|
||||
{
|
||||
@ -220,7 +235,7 @@ void Pool::Entry::forceConnected() const
|
||||
else
|
||||
sleepForSeconds(MYSQLXX_POOL_SLEEP_ON_CONNECT_FAIL);
|
||||
|
||||
app.logger().information("MYSQL: Reconnecting to " + pool->description);
|
||||
pool->logger.debug("Entry: Reconnecting to MySQL server %s", pool->description);
|
||||
data->conn.connect(
|
||||
pool->db.c_str(),
|
||||
pool->server.c_str(),
|
||||
@ -233,7 +248,8 @@ void Pool::Entry::forceConnected() const
|
||||
pool->ssl_key.c_str(),
|
||||
pool->connect_timeout,
|
||||
pool->rw_timeout,
|
||||
pool->enable_local_infile);
|
||||
pool->enable_local_infile,
|
||||
pool->opt_reconnect);
|
||||
}
|
||||
}
|
||||
|
||||
@ -242,18 +258,22 @@ bool Pool::Entry::tryForceConnected() const
|
||||
{
|
||||
auto * const mysql_driver = data->conn.getDriver();
|
||||
const auto prev_connection_id = mysql_thread_id(mysql_driver);
|
||||
|
||||
pool->logger.trace("Entry(connection %lu): sending PING to check if it is alive.", prev_connection_id);
|
||||
if (data->conn.ping()) /// Attempts to reestablish lost connection
|
||||
{
|
||||
const auto current_connection_id = mysql_thread_id(mysql_driver);
|
||||
if (prev_connection_id != current_connection_id)
|
||||
{
|
||||
auto & logger = Poco::Util::Application::instance().logger();
|
||||
logger.information("Connection to mysql server has been reestablished. Connection id changed: %lu -> %lu",
|
||||
prev_connection_id, current_connection_id);
|
||||
pool->logger.debug("Entry(connection %lu): Reconnected to MySQL server. Connection id changed: %lu -> %lu",
|
||||
current_connection_id, prev_connection_id, current_connection_id);
|
||||
}
|
||||
|
||||
pool->logger.trace("Entry(connection %lu): PING ok.", current_connection_id);
|
||||
return true;
|
||||
}
|
||||
|
||||
pool->logger.trace("Entry(connection %lu): PING failed.", prev_connection_id);
|
||||
return false;
|
||||
}
|
||||
|
||||
@ -274,15 +294,13 @@ void Pool::initialize()
|
||||
|
||||
Pool::Connection * Pool::allocConnection(bool dont_throw_if_failed_first_time)
|
||||
{
|
||||
Poco::Util::Application & app = Poco::Util::Application::instance();
|
||||
|
||||
std::unique_ptr<Connection> conn(new Connection);
|
||||
std::unique_ptr<Connection> conn_ptr{new Connection};
|
||||
|
||||
try
|
||||
{
|
||||
app.logger().information("MYSQL: Connecting to " + description);
|
||||
logger.debug("Connecting to %s", description);
|
||||
|
||||
conn->conn.connect(
|
||||
conn_ptr->conn.connect(
|
||||
db.c_str(),
|
||||
server.c_str(),
|
||||
user.c_str(),
|
||||
@ -294,29 +312,29 @@ Pool::Connection * Pool::allocConnection(bool dont_throw_if_failed_first_time)
|
||||
ssl_key.c_str(),
|
||||
connect_timeout,
|
||||
rw_timeout,
|
||||
enable_local_infile);
|
||||
enable_local_infile,
|
||||
opt_reconnect);
|
||||
}
|
||||
catch (mysqlxx::ConnectionFailed & e)
|
||||
{
|
||||
logger.error(e.what());
|
||||
|
||||
if ((!was_successful && !dont_throw_if_failed_first_time)
|
||||
|| e.errnum() == ER_ACCESS_DENIED_ERROR
|
||||
|| e.errnum() == ER_DBACCESS_DENIED_ERROR
|
||||
|| e.errnum() == ER_BAD_DB_ERROR)
|
||||
{
|
||||
app.logger().error(e.what());
|
||||
throw;
|
||||
}
|
||||
else
|
||||
{
|
||||
app.logger().error(e.what());
|
||||
return nullptr;
|
||||
}
|
||||
}
|
||||
|
||||
connections.push_back(conn_ptr.get());
|
||||
was_successful = true;
|
||||
auto * connection = conn.release();
|
||||
connections.push_back(connection);
|
||||
return connection;
|
||||
return conn_ptr.release();
|
||||
}
|
||||
|
||||
}
|
||||
|
@ -6,6 +6,8 @@
|
||||
#include <atomic>
|
||||
|
||||
#include <Poco/Exception.h>
|
||||
#include <Poco/Logger.h>
|
||||
|
||||
#include <mysqlxx/Connection.h>
|
||||
|
||||
|
||||
@ -165,19 +167,21 @@ public:
|
||||
unsigned rw_timeout_ = MYSQLXX_DEFAULT_RW_TIMEOUT,
|
||||
unsigned default_connections_ = MYSQLXX_POOL_DEFAULT_START_CONNECTIONS,
|
||||
unsigned max_connections_ = MYSQLXX_POOL_DEFAULT_MAX_CONNECTIONS,
|
||||
unsigned enable_local_infile_ = MYSQLXX_DEFAULT_ENABLE_LOCAL_INFILE)
|
||||
: default_connections(default_connections_), max_connections(max_connections_),
|
||||
db(db_), server(server_), user(user_), password(password_), port(port_), socket(socket_),
|
||||
connect_timeout(connect_timeout_), rw_timeout(rw_timeout_), enable_local_infile(enable_local_infile_) {}
|
||||
unsigned enable_local_infile_ = MYSQLXX_DEFAULT_ENABLE_LOCAL_INFILE,
|
||||
bool opt_reconnect_ = MYSQLXX_DEFAULT_MYSQL_OPT_RECONNECT)
|
||||
: logger(Poco::Logger::get("mysqlxx::Pool")), default_connections(default_connections_),
|
||||
max_connections(max_connections_), db(db_), server(server_), user(user_), password(password_), port(port_), socket(socket_),
|
||||
connect_timeout(connect_timeout_), rw_timeout(rw_timeout_), enable_local_infile(enable_local_infile_),
|
||||
opt_reconnect(opt_reconnect_) {}
|
||||
|
||||
Pool(const Pool & other)
|
||||
: default_connections{other.default_connections},
|
||||
: logger(other.logger), default_connections{other.default_connections},
|
||||
max_connections{other.max_connections},
|
||||
db{other.db}, server{other.server},
|
||||
user{other.user}, password{other.password},
|
||||
port{other.port}, socket{other.socket},
|
||||
connect_timeout{other.connect_timeout}, rw_timeout{other.rw_timeout},
|
||||
enable_local_infile{other.enable_local_infile}
|
||||
enable_local_infile{other.enable_local_infile}, opt_reconnect(other.opt_reconnect)
|
||||
{}
|
||||
|
||||
Pool & operator=(const Pool &) = delete;
|
||||
@ -201,6 +205,8 @@ public:
|
||||
void removeConnection(Connection * connection);
|
||||
|
||||
protected:
|
||||
Poco::Logger & logger;
|
||||
|
||||
/// Number of MySQL connections which are created at launch.
|
||||
unsigned default_connections;
|
||||
/// Maximum possible number of connections
|
||||
@ -231,6 +237,7 @@ private:
|
||||
std::string ssl_cert;
|
||||
std::string ssl_key;
|
||||
bool enable_local_infile;
|
||||
bool opt_reconnect;
|
||||
|
||||
/// True if connection was established at least once.
|
||||
bool was_successful{false};
|
||||
|
@ -1,3 +1,8 @@
|
||||
#include <algorithm>
|
||||
#include <ctime>
|
||||
#include <random>
|
||||
#include <thread>
|
||||
|
||||
#include <mysqlxx/PoolWithFailover.h>
|
||||
|
||||
|
||||
@ -33,6 +38,19 @@ PoolWithFailover::PoolWithFailover(const Poco::Util::AbstractConfiguration & con
|
||||
std::make_shared<Pool>(config_, replica_name, default_connections_, max_connections_, config_name_.c_str()));
|
||||
}
|
||||
}
|
||||
|
||||
/// PoolWithFailover objects are stored in a cache inside PoolFactory.
|
||||
/// This cache is reset by ExternalDictionariesLoader after every SYSTEM RELOAD DICTIONAR{Y|IES}
|
||||
/// which triggers massive re-constructing of connection pools.
|
||||
/// The state of PRNGs like std::mt19937 is considered to be quite heavy
|
||||
/// thus here we attempt to optimize its construction.
|
||||
static thread_local std::mt19937 rnd_generator(
|
||||
std::hash<std::thread::id>{}(std::this_thread::get_id()) + std::clock());
|
||||
for (auto & [_, replicas] : replicas_by_priority)
|
||||
{
|
||||
if (replicas.size() > 1)
|
||||
std::shuffle(replicas.begin(), replicas.end(), rnd_generator);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
|
@ -1,11 +1,16 @@
|
||||
#if __has_include(<mysql.h>)
|
||||
#include <errmsg.h>
|
||||
#include <mysql.h>
|
||||
#else
|
||||
#include <mysql/errmsg.h>
|
||||
#include <mysql/mysql.h>
|
||||
#endif
|
||||
|
||||
#include <Poco/Logger.h>
|
||||
|
||||
#include <mysqlxx/Connection.h>
|
||||
#include <mysqlxx/Query.h>
|
||||
#include <mysqlxx/Types.h>
|
||||
|
||||
|
||||
namespace mysqlxx
|
||||
@ -57,8 +62,24 @@ void Query::reset()
|
||||
void Query::executeImpl()
|
||||
{
|
||||
std::string query_string = query_buf.str();
|
||||
if (mysql_real_query(conn->getDriver(), query_string.data(), query_string.size()))
|
||||
throw BadQuery(errorMessage(conn->getDriver()), mysql_errno(conn->getDriver()));
|
||||
|
||||
MYSQL* mysql_driver = conn->getDriver();
|
||||
|
||||
auto & logger = Poco::Logger::get("mysqlxx::Query");
|
||||
logger.trace("Running MySQL query using connection %lu", mysql_thread_id(mysql_driver));
|
||||
if (mysql_real_query(mysql_driver, query_string.data(), query_string.size()))
|
||||
{
|
||||
const auto err_no = mysql_errno(mysql_driver);
|
||||
switch (err_no)
|
||||
{
|
||||
case CR_SERVER_GONE_ERROR:
|
||||
[[fallthrough]];
|
||||
case CR_SERVER_LOST:
|
||||
throw ConnectionLost(errorMessage(mysql_driver), err_no);
|
||||
default:
|
||||
throw BadQuery(errorMessage(mysql_driver), err_no);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
UseQueryResult Query::use()
|
||||
|
@ -32,27 +32,25 @@ if (CCACHE_FOUND AND NOT COMPILER_MATCHES_CCACHE)
|
||||
if (CCACHE_VERSION VERSION_GREATER "3.2.0" OR NOT CMAKE_CXX_COMPILER_ID STREQUAL "Clang")
|
||||
message(STATUS "Using ${CCACHE_FOUND} ${CCACHE_VERSION}")
|
||||
|
||||
# debian (debhlpers) set SOURCE_DATE_EPOCH environment variable, that is
|
||||
set_property (GLOBAL PROPERTY RULE_LAUNCH_COMPILE ${CCACHE_FOUND})
|
||||
set_property (GLOBAL PROPERTY RULE_LAUNCH_LINK ${CCACHE_FOUND})
|
||||
|
||||
# debian (debhelpers) set SOURCE_DATE_EPOCH environment variable, that is
|
||||
# filled from the debian/changelog or current time.
|
||||
#
|
||||
# - 4.0+ ccache always includes this environment variable into the hash
|
||||
# of the manifest, which do not allow to use previous cache,
|
||||
# - 4.2+ ccache ignores SOURCE_DATE_EPOCH under time_macros sloppiness.
|
||||
# - 4.2+ ccache ignores SOURCE_DATE_EPOCH for every file w/o __DATE__/__TIME__
|
||||
#
|
||||
# So for:
|
||||
# - 4.2+ time_macros sloppiness is used,
|
||||
# - 4.2+ does not require any sloppiness
|
||||
# - 4.0+ will ignore SOURCE_DATE_EPOCH environment variable.
|
||||
if (CCACHE_VERSION VERSION_GREATER_EQUAL "4.2")
|
||||
message(STATUS "Use time_macros sloppiness for ccache")
|
||||
set_property (GLOBAL PROPERTY RULE_LAUNCH_COMPILE "${CCACHE_FOUND} --set-config=sloppiness=time_macros")
|
||||
set_property (GLOBAL PROPERTY RULE_LAUNCH_LINK "${CCACHE_FOUND} --set-config=sloppiness=time_macros")
|
||||
message(STATUS "ccache is 4.2+ no quirks for SOURCE_DATE_EPOCH required")
|
||||
elseif (CCACHE_VERSION VERSION_GREATER_EQUAL "4.0")
|
||||
message(STATUS "Ignore SOURCE_DATE_EPOCH for ccache")
|
||||
set_property (GLOBAL PROPERTY RULE_LAUNCH_COMPILE "env -u SOURCE_DATE_EPOCH ${CCACHE_FOUND}")
|
||||
set_property (GLOBAL PROPERTY RULE_LAUNCH_LINK "env -u SOURCE_DATE_EPOCH ${CCACHE_FOUND}")
|
||||
else()
|
||||
set_property (GLOBAL PROPERTY RULE_LAUNCH_COMPILE ${CCACHE_FOUND})
|
||||
set_property (GLOBAL PROPERTY RULE_LAUNCH_LINK ${CCACHE_FOUND})
|
||||
endif()
|
||||
else ()
|
||||
message(${RECONFIGURE_MESSAGE_LEVEL} "Not using ${CCACHE_FOUND} ${CCACHE_VERSION} bug: https://bugzilla.samba.org/show_bug.cgi?id=8118")
|
||||
|
@ -11,7 +11,7 @@ if (NOT EXISTS "${ClickHouse_SOURCE_DIR}/contrib/NuRaft/CMakeLists.txt")
|
||||
return()
|
||||
endif ()
|
||||
|
||||
if (NOT OS_FREEBSD)
|
||||
if (NOT OS_FREEBSD AND NOT OS_DARWIN)
|
||||
set (USE_NURAFT 1)
|
||||
set (NURAFT_LIBRARY nuraft)
|
||||
|
||||
@ -20,5 +20,5 @@ if (NOT OS_FREEBSD)
|
||||
message (STATUS "Using NuRaft=${USE_NURAFT}: ${NURAFT_INCLUDE_DIR} : ${NURAFT_LIBRARY}")
|
||||
else()
|
||||
set (USE_NURAFT 0)
|
||||
message (STATUS "Using internal NuRaft library on FreeBSD is not supported")
|
||||
message (STATUS "Using internal NuRaft library on FreeBSD and Darwin is not supported")
|
||||
endif()
|
||||
|
2
contrib/NuRaft
vendored
2
contrib/NuRaft
vendored
@ -1 +1 @@
|
||||
Subproject commit 410bd149da84cdde60b4436b02b738749f4e87e1
|
||||
Subproject commit 9a0d78de4b90546368d954b6434f0e9a823e8d80
|
2
contrib/boost
vendored
2
contrib/boost
vendored
@ -1 +1 @@
|
||||
Subproject commit 8e259cd2a6b60d75dd17e73432f11bb7b9351bb1
|
||||
Subproject commit 48f40ebb539220d328958f8823b094c0b07a4e79
|
2
contrib/brotli
vendored
2
contrib/brotli
vendored
@ -1 +1 @@
|
||||
Subproject commit 5805f99a533a8f8118699c0100d8c102f3605f65
|
||||
Subproject commit 63be8a99401992075c23e99f7c84de1c653e39e2
|
@ -2,6 +2,8 @@ set(BROTLI_SOURCE_DIR ${ClickHouse_SOURCE_DIR}/contrib/brotli/c)
|
||||
set(BROTLI_BINARY_DIR ${ClickHouse_BINARY_DIR}/contrib/brotli/c)
|
||||
|
||||
set(SRCS
|
||||
${BROTLI_SOURCE_DIR}/enc/command.c
|
||||
${BROTLI_SOURCE_DIR}/enc/fast_log.c
|
||||
${BROTLI_SOURCE_DIR}/dec/bit_reader.c
|
||||
${BROTLI_SOURCE_DIR}/dec/state.c
|
||||
${BROTLI_SOURCE_DIR}/dec/huffman.c
|
||||
@ -26,6 +28,9 @@ set(SRCS
|
||||
${BROTLI_SOURCE_DIR}/enc/memory.c
|
||||
${BROTLI_SOURCE_DIR}/common/dictionary.c
|
||||
${BROTLI_SOURCE_DIR}/common/transform.c
|
||||
${BROTLI_SOURCE_DIR}/common/platform.c
|
||||
${BROTLI_SOURCE_DIR}/common/context.c
|
||||
${BROTLI_SOURCE_DIR}/common/constants.c
|
||||
)
|
||||
|
||||
add_library(brotli ${SRCS})
|
||||
|
@ -30,7 +30,12 @@ set(SRCS
|
||||
|
||||
add_library(nuraft ${SRCS})
|
||||
|
||||
target_compile_definitions(nuraft PRIVATE USE_BOOST_ASIO=1 BOOST_ASIO_STANDALONE=1)
|
||||
|
||||
if (NOT OPENSSL_SSL_LIBRARY OR NOT OPENSSL_CRYPTO_LIBRARY)
|
||||
target_compile_definitions(nuraft PRIVATE USE_BOOST_ASIO=1 BOOST_ASIO_STANDALONE=1 SSL_LIBRARY_NOT_FOUND=1)
|
||||
else()
|
||||
target_compile_definitions(nuraft PRIVATE USE_BOOST_ASIO=1 BOOST_ASIO_STANDALONE=1)
|
||||
endif()
|
||||
|
||||
target_include_directories (nuraft SYSTEM PRIVATE ${LIBRARY_DIR}/include/libnuraft)
|
||||
# for some reason include "asio.h" directly without "boost/" prefix.
|
||||
|
@ -56,7 +56,7 @@ $ echo 'SELECT version()' | curl 'http://localhost:8123/' --data-binary @-
|
||||
20.12.3.3
|
||||
```
|
||||
|
||||
### Volumes
|
||||
### Volumes
|
||||
|
||||
Typically you may want to mount the following folders inside your container to archieve persistency:
|
||||
|
||||
@ -76,7 +76,7 @@ You may also want to mount:
|
||||
* `/etc/clickhouse-server/usert.d/*.xml` - files with use settings adjustmenets
|
||||
* `/docker-entrypoint-initdb.d/` - folder with database initialization scripts (see below).
|
||||
|
||||
### Linux capabilities
|
||||
### Linux capabilities
|
||||
|
||||
ClickHouse has some advanced functionality which requite enabling several [linux capabilities](https://man7.org/linux/man-pages/man7/capabilities.7.html).
|
||||
|
||||
@ -113,10 +113,10 @@ $ docker run --rm -e CLICKHOUSE_UID=0 -e CLICKHOUSE_GID=0 --name clickhouse-serv
|
||||
|
||||
### How to create default database and user on starting
|
||||
|
||||
Sometimes you may want to create user (user named `default` is used by default) and database on image starting. You can do it using environment variables `CLICKHOUSE_DB`, `CLICKHOUSE_USER` and `CLICKHOUSE_PASSWORD`:
|
||||
Sometimes you may want to create user (user named `default` is used by default) and database on image starting. You can do it using environment variables `CLICKHOUSE_DB`, `CLICKHOUSE_USER`, `CLICKHOUSE_DEFAULT_ACCESS_MANAGEMENT` and `CLICKHOUSE_PASSWORD`:
|
||||
|
||||
```
|
||||
$ docker run --rm -e CLICKHOUSE_DB=my_database -e CLICKHOUSE_USER=username -e CLICKHOUSE_PASSWORD=password -p 9000:9000/tcp yandex/clickhouse-server
|
||||
$ docker run --rm -e CLICKHOUSE_DB=my_database -e CLICKHOUSE_USER=username -e CLICKHOUSE_DEFAULT_ACCESS_MANAGEMENT=1 -e CLICKHOUSE_PASSWORD=password -p 9000:9000/tcp yandex/clickhouse-server
|
||||
```
|
||||
|
||||
## How to extend this image
|
||||
|
@ -54,8 +54,10 @@ docker cp -L "${ubuntu20image}":/lib/x86_64-linux-gnu/libm.so.6 "${CONTAIN
|
||||
docker cp -L "${ubuntu20image}":/lib/x86_64-linux-gnu/libpthread.so.0 "${CONTAINER_ROOT_FOLDER}/lib"
|
||||
docker cp -L "${ubuntu20image}":/lib/x86_64-linux-gnu/librt.so.1 "${CONTAINER_ROOT_FOLDER}/lib"
|
||||
docker cp -L "${ubuntu20image}":/lib/x86_64-linux-gnu/libnss_dns.so.2 "${CONTAINER_ROOT_FOLDER}/lib"
|
||||
docker cp -L "${ubuntu20image}":/lib/x86_64-linux-gnu/libnss_files.so.2 "${CONTAINER_ROOT_FOLDER}/lib"
|
||||
docker cp -L "${ubuntu20image}":/lib/x86_64-linux-gnu/libresolv.so.2 "${CONTAINER_ROOT_FOLDER}/lib"
|
||||
docker cp -L "${ubuntu20image}":/lib64/ld-linux-x86-64.so.2 "${CONTAINER_ROOT_FOLDER}/lib64"
|
||||
docker cp -L "${ubuntu20image}":/etc/nsswitch.conf "${CONTAINER_ROOT_FOLDER}/etc"
|
||||
|
||||
docker build "$DOCKER_BUILD_FOLDER" -f Dockerfile.alpine -t "${DOCKER_IMAGE}:${VERSION}-alpine" --pull
|
||||
rm -rf "$CONTAINER_ROOT_FOLDER"
|
||||
|
@ -54,6 +54,7 @@ FORMAT_SCHEMA_PATH="$(clickhouse extract-from-config --config-file "$CLICKHOUSE_
|
||||
CLICKHOUSE_USER="${CLICKHOUSE_USER:-default}"
|
||||
CLICKHOUSE_PASSWORD="${CLICKHOUSE_PASSWORD:-}"
|
||||
CLICKHOUSE_DB="${CLICKHOUSE_DB:-}"
|
||||
CLICKHOUSE_ACCESS_MANAGEMENT="${CLICKHOUSE_DEFAULT_ACCESS_MANAGEMENT:-0}"
|
||||
|
||||
for dir in "$DATA_DIR" \
|
||||
"$ERROR_LOG_DIR" \
|
||||
@ -97,6 +98,7 @@ if [ -n "$CLICKHOUSE_USER" ] && [ "$CLICKHOUSE_USER" != "default" ] || [ -n "$CL
|
||||
</networks>
|
||||
<password>${CLICKHOUSE_PASSWORD}</password>
|
||||
<quota>default</quota>
|
||||
<access_management>${CLICKHOUSE_ACCESS_MANAGEMENT}</access_management>
|
||||
</${CLICKHOUSE_USER}>
|
||||
</users>
|
||||
</yandex>
|
||||
|
@ -47,6 +47,7 @@ RUN apt-get update \
|
||||
expect \
|
||||
fakeroot \
|
||||
git \
|
||||
gdb \
|
||||
gperf \
|
||||
lld-${LLVM_VERSION} \
|
||||
llvm-${LLVM_VERSION} \
|
||||
|
@ -70,6 +70,7 @@ function start_server
|
||||
--path "$FASTTEST_DATA"
|
||||
--user_files_path "$FASTTEST_DATA/user_files"
|
||||
--top_level_domains_path "$FASTTEST_DATA/top_level_domains"
|
||||
--test_keeper_server.log_storage_path "$FASTTEST_DATA/coordination"
|
||||
)
|
||||
clickhouse-server "${opts[@]}" &>> "$FASTTEST_OUTPUT/server.log" &
|
||||
server_pid=$!
|
||||
@ -107,6 +108,18 @@ function start_server
|
||||
fi
|
||||
|
||||
echo "ClickHouse server pid '$server_pid' started and responded"
|
||||
|
||||
echo "
|
||||
handle all noprint
|
||||
handle SIGSEGV stop print
|
||||
handle SIGBUS stop print
|
||||
handle SIGABRT stop print
|
||||
continue
|
||||
thread apply all backtrace
|
||||
continue
|
||||
" > script.gdb
|
||||
|
||||
gdb -batch -command script.gdb -p "$server_pid" &
|
||||
}
|
||||
|
||||
function clone_root
|
||||
@ -163,6 +176,7 @@ function clone_submodules
|
||||
contrib/xz
|
||||
contrib/dragonbox
|
||||
contrib/fast_float
|
||||
contrib/NuRaft
|
||||
)
|
||||
|
||||
git submodule sync
|
||||
@ -182,6 +196,7 @@ function run_cmake
|
||||
"-DENABLE_EMBEDDED_COMPILER=0"
|
||||
"-DENABLE_THINLTO=0"
|
||||
"-DUSE_UNWIND=1"
|
||||
"-DENABLE_NURAFT=1"
|
||||
)
|
||||
|
||||
# TODO remove this? we don't use ccache anyway. An option would be to download it
|
||||
@ -257,6 +272,7 @@ function run_tests
|
||||
00929_multi_match_edit_distance
|
||||
01681_hyperscan_debug_assertion
|
||||
|
||||
01176_mysql_client_interactive # requires mysql client
|
||||
01031_mutations_interpreter_and_context
|
||||
01053_ssd_dictionary # this test mistakenly requires acces to /var/lib/clickhouse -- can't run this locally, disabled
|
||||
01083_expressions_in_engine_arguments
|
||||
@ -324,7 +340,7 @@ function run_tests
|
||||
# Look at DistributedFilesToInsert, so cannot run in parallel.
|
||||
01460_DistributedFilesToInsert
|
||||
|
||||
01541_max_memory_usage_for_user
|
||||
01541_max_memory_usage_for_user_long
|
||||
|
||||
# Require python libraries like scipy, pandas and numpy
|
||||
01322_ttest_scipy
|
||||
@ -342,7 +358,7 @@ function run_tests
|
||||
01666_blns
|
||||
)
|
||||
|
||||
time clickhouse-test --hung-check -j 8 --order=random --use-skip-list --no-long --testname --shard --zookeeper --skip "${TESTS_TO_SKIP[@]}" -- "$FASTTEST_FOCUS" 2>&1 | ts '%Y-%m-%d %H:%M:%S' | tee "$FASTTEST_OUTPUT/test_log.txt"
|
||||
(time clickhouse-test --hung-check -j 8 --order=random --use-skip-list --no-long --testname --shard --zookeeper --skip "${TESTS_TO_SKIP[@]}" -- "$FASTTEST_FOCUS" 2>&1 ||:) | ts '%Y-%m-%d %H:%M:%S' | tee "$FASTTEST_OUTPUT/test_log.txt"
|
||||
|
||||
# substr is to remove semicolon after test name
|
||||
readarray -t FAILED_TESTS < <(awk '/\[ FAIL|TIMEOUT|ERROR \]/ { print substr($3, 1, length($3)-1) }' "$FASTTEST_OUTPUT/test_log.txt" | tee "$FASTTEST_OUTPUT/failed-parallel-tests.txt")
|
||||
@ -359,7 +375,7 @@ function run_tests
|
||||
stop_server ||:
|
||||
|
||||
# Clean the data so that there is no interference from the previous test run.
|
||||
rm -rf "$FASTTEST_DATA"/{{meta,}data,user_files} ||:
|
||||
rm -rf "$FASTTEST_DATA"/{{meta,}data,user_files,coordination} ||:
|
||||
|
||||
start_server
|
||||
|
||||
|
@ -18,7 +18,8 @@ RUN apt-get update \
|
||||
curl \
|
||||
tar \
|
||||
krb5-user \
|
||||
iproute2
|
||||
iproute2 \
|
||||
lsof
|
||||
RUN rm -rf \
|
||||
/var/lib/apt/lists/* \
|
||||
/var/cache/debconf \
|
||||
|
@ -58,7 +58,7 @@ RUN dockerd --version; docker --version
|
||||
|
||||
RUN python3 -m pip install \
|
||||
PyMySQL \
|
||||
aerospike \
|
||||
aerospike==4.0.0 \
|
||||
avro \
|
||||
cassandra-driver \
|
||||
confluent-kafka==1.5.0 \
|
||||
|
@ -4,4 +4,4 @@ services:
|
||||
image: cassandra
|
||||
restart: always
|
||||
ports:
|
||||
- 9043:9042
|
||||
- 9043:9042
|
||||
|
@ -5,6 +5,6 @@ services:
|
||||
hostname: hdfs1
|
||||
restart: always
|
||||
ports:
|
||||
- 50075:50075
|
||||
- 50070:50070
|
||||
- 50075:50075
|
||||
- 50070:50070
|
||||
entrypoint: /etc/bootstrap.sh -d
|
||||
|
@ -5,42 +5,42 @@ services:
|
||||
image: zookeeper:3.4.9
|
||||
hostname: kafka_zookeeper
|
||||
environment:
|
||||
ZOO_MY_ID: 1
|
||||
ZOO_PORT: 2181
|
||||
ZOO_SERVERS: server.1=kafka_zookeeper:2888:3888
|
||||
ZOO_MY_ID: 1
|
||||
ZOO_PORT: 2181
|
||||
ZOO_SERVERS: server.1=kafka_zookeeper:2888:3888
|
||||
security_opt:
|
||||
- label:disable
|
||||
- label:disable
|
||||
|
||||
kafka1:
|
||||
image: confluentinc/cp-kafka:5.2.0
|
||||
hostname: kafka1
|
||||
ports:
|
||||
- "9092:9092"
|
||||
- "9092:9092"
|
||||
environment:
|
||||
KAFKA_ADVERTISED_LISTENERS: INSIDE://localhost:9092,OUTSIDE://kafka1:19092
|
||||
KAFKA_LISTENERS: INSIDE://:9092,OUTSIDE://:19092
|
||||
KAFKA_LISTENER_SECURITY_PROTOCOL_MAP: INSIDE:PLAINTEXT,OUTSIDE:PLAINTEXT
|
||||
KAFKA_INTER_BROKER_LISTENER_NAME: INSIDE
|
||||
KAFKA_BROKER_ID: 1
|
||||
KAFKA_ZOOKEEPER_CONNECT: "kafka_zookeeper:2181"
|
||||
KAFKA_LOG4J_LOGGERS: "kafka.controller=INFO,kafka.producer.async.DefaultEventHandler=INFO,state.change.logger=INFO"
|
||||
KAFKA_OFFSETS_TOPIC_REPLICATION_FACTOR: 1
|
||||
KAFKA_ADVERTISED_LISTENERS: INSIDE://localhost:9092,OUTSIDE://kafka1:19092
|
||||
KAFKA_LISTENERS: INSIDE://:9092,OUTSIDE://:19092
|
||||
KAFKA_LISTENER_SECURITY_PROTOCOL_MAP: INSIDE:PLAINTEXT,OUTSIDE:PLAINTEXT
|
||||
KAFKA_INTER_BROKER_LISTENER_NAME: INSIDE
|
||||
KAFKA_BROKER_ID: 1
|
||||
KAFKA_ZOOKEEPER_CONNECT: "kafka_zookeeper:2181"
|
||||
KAFKA_LOG4J_LOGGERS: "kafka.controller=INFO,kafka.producer.async.DefaultEventHandler=INFO,state.change.logger=INFO"
|
||||
KAFKA_OFFSETS_TOPIC_REPLICATION_FACTOR: 1
|
||||
depends_on:
|
||||
- kafka_zookeeper
|
||||
- kafka_zookeeper
|
||||
security_opt:
|
||||
- label:disable
|
||||
- label:disable
|
||||
|
||||
schema-registry:
|
||||
image: confluentinc/cp-schema-registry:5.2.0
|
||||
hostname: schema-registry
|
||||
ports:
|
||||
- "8081:8081"
|
||||
- "8081:8081"
|
||||
environment:
|
||||
SCHEMA_REGISTRY_HOST_NAME: schema-registry
|
||||
SCHEMA_REGISTRY_KAFKASTORE_SECURITY_PROTOCOL: PLAINTEXT
|
||||
SCHEMA_REGISTRY_KAFKASTORE_BOOTSTRAP_SERVERS: PLAINTEXT://kafka1:19092
|
||||
SCHEMA_REGISTRY_HOST_NAME: schema-registry
|
||||
SCHEMA_REGISTRY_KAFKASTORE_SECURITY_PROTOCOL: PLAINTEXT
|
||||
SCHEMA_REGISTRY_KAFKASTORE_BOOTSTRAP_SERVERS: PLAINTEXT://kafka1:19092
|
||||
depends_on:
|
||||
- kafka_zookeeper
|
||||
- kafka1
|
||||
- kafka_zookeeper
|
||||
- kafka1
|
||||
security_opt:
|
||||
- label:disable
|
||||
- label:disable
|
||||
|
@ -8,22 +8,22 @@ services:
|
||||
hostname: kerberizedhdfs1
|
||||
restart: always
|
||||
volumes:
|
||||
- ${KERBERIZED_HDFS_DIR}/../../hdfs_configs/bootstrap.sh:/etc/bootstrap.sh:ro
|
||||
- ${KERBERIZED_HDFS_DIR}/secrets:/usr/local/hadoop/etc/hadoop/conf
|
||||
- ${KERBERIZED_HDFS_DIR}/secrets/krb_long.conf:/etc/krb5.conf:ro
|
||||
- ${KERBERIZED_HDFS_DIR}/../../hdfs_configs/bootstrap.sh:/etc/bootstrap.sh:ro
|
||||
- ${KERBERIZED_HDFS_DIR}/secrets:/usr/local/hadoop/etc/hadoop/conf
|
||||
- ${KERBERIZED_HDFS_DIR}/secrets/krb_long.conf:/etc/krb5.conf:ro
|
||||
ports:
|
||||
- 1006:1006
|
||||
- 50070:50070
|
||||
- 9010:9010
|
||||
depends_on:
|
||||
- hdfskerberos
|
||||
- hdfskerberos
|
||||
entrypoint: /etc/bootstrap.sh -d
|
||||
|
||||
hdfskerberos:
|
||||
image: yandex/clickhouse-kerberos-kdc:${DOCKER_KERBEROS_KDC_TAG}
|
||||
hostname: hdfskerberos
|
||||
volumes:
|
||||
- ${KERBERIZED_HDFS_DIR}/secrets:/tmp/keytab
|
||||
- ${KERBERIZED_HDFS_DIR}/../../kerberos_image_config.sh:/config.sh
|
||||
- /dev/urandom:/dev/random
|
||||
- ${KERBERIZED_HDFS_DIR}/secrets:/tmp/keytab
|
||||
- ${KERBERIZED_HDFS_DIR}/../../kerberos_image_config.sh:/config.sh
|
||||
- /dev/urandom:/dev/random
|
||||
ports: [88, 749]
|
||||
|
@ -6,54 +6,54 @@ services:
|
||||
# restart: always
|
||||
hostname: kafka_kerberized_zookeeper
|
||||
environment:
|
||||
ZOOKEEPER_SERVER_ID: 1
|
||||
ZOOKEEPER_CLIENT_PORT: 2181
|
||||
ZOOKEEPER_SERVERS: "kafka_kerberized_zookeeper:2888:3888"
|
||||
KAFKA_OPTS: "-Djava.security.auth.login.config=/etc/kafka/secrets/zookeeper_jaas.conf -Djava.security.krb5.conf=/etc/kafka/secrets/krb.conf -Dzookeeper.authProvider.1=org.apache.zookeeper.server.auth.SASLAuthenticationProvider -Dsun.security.krb5.debug=true"
|
||||
ZOOKEEPER_SERVER_ID: 1
|
||||
ZOOKEEPER_CLIENT_PORT: 2181
|
||||
ZOOKEEPER_SERVERS: "kafka_kerberized_zookeeper:2888:3888"
|
||||
KAFKA_OPTS: "-Djava.security.auth.login.config=/etc/kafka/secrets/zookeeper_jaas.conf -Djava.security.krb5.conf=/etc/kafka/secrets/krb.conf -Dzookeeper.authProvider.1=org.apache.zookeeper.server.auth.SASLAuthenticationProvider -Dsun.security.krb5.debug=true"
|
||||
volumes:
|
||||
- ${KERBERIZED_KAFKA_DIR}/secrets:/etc/kafka/secrets
|
||||
- /dev/urandom:/dev/random
|
||||
- ${KERBERIZED_KAFKA_DIR}/secrets:/etc/kafka/secrets
|
||||
- /dev/urandom:/dev/random
|
||||
depends_on:
|
||||
- kafka_kerberos
|
||||
- kafka_kerberos
|
||||
security_opt:
|
||||
- label:disable
|
||||
- label:disable
|
||||
|
||||
kerberized_kafka1:
|
||||
image: confluentinc/cp-kafka:5.2.0
|
||||
# restart: always
|
||||
hostname: kerberized_kafka1
|
||||
ports:
|
||||
- "9092:9092"
|
||||
- "9093:9093"
|
||||
- "9092:9092"
|
||||
- "9093:9093"
|
||||
environment:
|
||||
KAFKA_LISTENERS: OUTSIDE://:19092,UNSECURED_OUTSIDE://:19093,UNSECURED_INSIDE://:9093
|
||||
KAFKA_ADVERTISED_LISTENERS: OUTSIDE://kerberized_kafka1:19092,UNSECURED_OUTSIDE://kerberized_kafka1:19093,UNSECURED_INSIDE://localhost:9093
|
||||
# KAFKA_LISTENERS: INSIDE://kerberized_kafka1:9092,OUTSIDE://kerberized_kafka1:19092
|
||||
# KAFKA_ADVERTISED_LISTENERS: INSIDE://localhost:9092,OUTSIDE://kerberized_kafka1:19092
|
||||
KAFKA_SASL_MECHANISM_INTER_BROKER_PROTOCOL: GSSAPI
|
||||
KAFKA_SASL_ENABLED_MECHANISMS: GSSAPI
|
||||
KAFKA_SASL_KERBEROS_SERVICE_NAME: kafka
|
||||
KAFKA_LISTENER_SECURITY_PROTOCOL_MAP: OUTSIDE:SASL_PLAINTEXT,UNSECURED_OUTSIDE:PLAINTEXT,UNSECURED_INSIDE:PLAINTEXT,
|
||||
KAFKA_INTER_BROKER_LISTENER_NAME: OUTSIDE
|
||||
KAFKA_BROKER_ID: 1
|
||||
KAFKA_ZOOKEEPER_CONNECT: "kafka_kerberized_zookeeper:2181"
|
||||
KAFKA_LOG4J_LOGGERS: "kafka.controller=INFO,kafka.producer.async.DefaultEventHandler=INFO,state.change.logger=INFO"
|
||||
KAFKA_OFFSETS_TOPIC_REPLICATION_FACTOR: 1
|
||||
KAFKA_OPTS: "-Djava.security.auth.login.config=/etc/kafka/secrets/broker_jaas.conf -Djava.security.krb5.conf=/etc/kafka/secrets/krb.conf -Dsun.security.krb5.debug=true"
|
||||
KAFKA_LISTENERS: OUTSIDE://:19092,UNSECURED_OUTSIDE://:19093,UNSECURED_INSIDE://:9093
|
||||
KAFKA_ADVERTISED_LISTENERS: OUTSIDE://kerberized_kafka1:19092,UNSECURED_OUTSIDE://kerberized_kafka1:19093,UNSECURED_INSIDE://localhost:9093
|
||||
# KAFKA_LISTENERS: INSIDE://kerberized_kafka1:9092,OUTSIDE://kerberized_kafka1:19092
|
||||
# KAFKA_ADVERTISED_LISTENERS: INSIDE://localhost:9092,OUTSIDE://kerberized_kafka1:19092
|
||||
KAFKA_SASL_MECHANISM_INTER_BROKER_PROTOCOL: GSSAPI
|
||||
KAFKA_SASL_ENABLED_MECHANISMS: GSSAPI
|
||||
KAFKA_SASL_KERBEROS_SERVICE_NAME: kafka
|
||||
KAFKA_LISTENER_SECURITY_PROTOCOL_MAP: OUTSIDE:SASL_PLAINTEXT,UNSECURED_OUTSIDE:PLAINTEXT,UNSECURED_INSIDE:PLAINTEXT,
|
||||
KAFKA_INTER_BROKER_LISTENER_NAME: OUTSIDE
|
||||
KAFKA_BROKER_ID: 1
|
||||
KAFKA_ZOOKEEPER_CONNECT: "kafka_kerberized_zookeeper:2181"
|
||||
KAFKA_LOG4J_LOGGERS: "kafka.controller=INFO,kafka.producer.async.DefaultEventHandler=INFO,state.change.logger=INFO"
|
||||
KAFKA_OFFSETS_TOPIC_REPLICATION_FACTOR: 1
|
||||
KAFKA_OPTS: "-Djava.security.auth.login.config=/etc/kafka/secrets/broker_jaas.conf -Djava.security.krb5.conf=/etc/kafka/secrets/krb.conf -Dsun.security.krb5.debug=true"
|
||||
volumes:
|
||||
- ${KERBERIZED_KAFKA_DIR}/secrets:/etc/kafka/secrets
|
||||
- /dev/urandom:/dev/random
|
||||
- ${KERBERIZED_KAFKA_DIR}/secrets:/etc/kafka/secrets
|
||||
- /dev/urandom:/dev/random
|
||||
depends_on:
|
||||
- kafka_kerberized_zookeeper
|
||||
- kafka_kerberos
|
||||
- kafka_kerberized_zookeeper
|
||||
- kafka_kerberos
|
||||
security_opt:
|
||||
- label:disable
|
||||
- label:disable
|
||||
|
||||
kafka_kerberos:
|
||||
image: yandex/clickhouse-kerberos-kdc:${DOCKER_KERBEROS_KDC_TAG:-latest}
|
||||
hostname: kafka_kerberos
|
||||
volumes:
|
||||
- ${KERBERIZED_KAFKA_DIR}/secrets:/tmp/keytab
|
||||
- ${KERBERIZED_KAFKA_DIR}/../../kerberos_image_config.sh:/config.sh
|
||||
- /dev/urandom:/dev/random
|
||||
- ${KERBERIZED_KAFKA_DIR}/secrets:/tmp/keytab
|
||||
- ${KERBERIZED_KAFKA_DIR}/../../kerberos_image_config.sh:/config.sh
|
||||
- /dev/urandom:/dev/random
|
||||
ports: [88, 749]
|
||||
|
@ -7,5 +7,5 @@ services:
|
||||
MONGO_INITDB_ROOT_USERNAME: root
|
||||
MONGO_INITDB_ROOT_PASSWORD: clickhouse
|
||||
ports:
|
||||
- 27018:27017
|
||||
- 27018:27017
|
||||
command: --profile=2 --verbose
|
||||
|
@ -6,5 +6,5 @@ services:
|
||||
environment:
|
||||
MYSQL_ROOT_PASSWORD: clickhouse
|
||||
ports:
|
||||
- 3308:3306
|
||||
- 3308:3306
|
||||
command: --server_id=100 --log-bin='mysql-bin-1.log' --default-time-zone='+3:00' --gtid-mode="ON" --enforce-gtid-consistency
|
||||
|
@ -6,5 +6,9 @@ services:
|
||||
environment:
|
||||
MYSQL_ROOT_PASSWORD: clickhouse
|
||||
ports:
|
||||
- 3308:3306
|
||||
command: --server_id=100 --log-bin='mysql-bin-1.log' --default-time-zone='+3:00' --gtid-mode="ON" --enforce-gtid-consistency
|
||||
- 3308:3306
|
||||
command: --server_id=100 --log-bin='mysql-bin-1.log'
|
||||
--default-time-zone='+3:00'
|
||||
--gtid-mode="ON"
|
||||
--enforce-gtid-consistency
|
||||
--log-error-verbosity=3
|
||||
|
@ -6,5 +6,10 @@ services:
|
||||
environment:
|
||||
MYSQL_ROOT_PASSWORD: clickhouse
|
||||
ports:
|
||||
- 33308:3306
|
||||
command: --server_id=100 --log-bin='mysql-bin-1.log' --default_authentication_plugin='mysql_native_password' --default-time-zone='+3:00' --gtid-mode="ON" --enforce-gtid-consistency
|
||||
- 33308:3306
|
||||
command: --server_id=100 --log-bin='mysql-bin-1.log'
|
||||
--default_authentication_plugin='mysql_native_password'
|
||||
--default-time-zone='+3:00'
|
||||
--gtid-mode="ON"
|
||||
--enforce-gtid-consistency
|
||||
--log-error-verbosity=3
|
||||
|
@ -7,7 +7,7 @@ services:
|
||||
MYSQL_ALLOW_EMPTY_PASSWORD: 1
|
||||
command: --federated --socket /var/run/mysqld/mysqld.sock
|
||||
healthcheck:
|
||||
test: ["CMD", "mysqladmin" ,"ping", "-h", "localhost"]
|
||||
test: ["CMD", "mysqladmin", "ping", "-h", "localhost"]
|
||||
interval: 1s
|
||||
timeout: 2s
|
||||
retries: 100
|
||||
|
@ -11,4 +11,4 @@ services:
|
||||
ports:
|
||||
- "5433:5433"
|
||||
environment:
|
||||
POSTGRES_HOST_AUTH_METHOD: "trust"
|
||||
POSTGRES_HOST_AUTH_METHOD: "trust"
|
||||
|
@ -6,8 +6,8 @@ services:
|
||||
environment:
|
||||
POSTGRES_PASSWORD: mysecretpassword
|
||||
ports:
|
||||
- 5432:5432
|
||||
- 5432:5432
|
||||
networks:
|
||||
default:
|
||||
aliases:
|
||||
- postgre-sql.local
|
||||
default:
|
||||
aliases:
|
||||
- postgre-sql.local
|
||||
|
@ -4,5 +4,5 @@ services:
|
||||
image: redis
|
||||
restart: always
|
||||
ports:
|
||||
- 6380:6379
|
||||
- 6380:6379
|
||||
command: redis-server --requirepass "clickhouse" --databases 32
|
||||
|
@ -1,11 +1,11 @@
|
||||
version: '2.3'
|
||||
services:
|
||||
zoo1:
|
||||
image: zookeeper:3.4.12
|
||||
image: zookeeper:3.6.2
|
||||
restart: always
|
||||
environment:
|
||||
ZOO_TICK_TIME: 500
|
||||
ZOO_SERVERS: server.1=zoo1:2888:3888 server.2=zoo2:2888:3888 server.3=zoo3:2888:3888
|
||||
ZOO_SERVERS: server.1=zoo1:2888:3888;2181 server.2=zoo2:2888:3888;2181 server.3=zoo3:2888:3888;2181
|
||||
ZOO_MY_ID: 1
|
||||
JVMFLAGS: -Dzookeeper.forceSync=no
|
||||
volumes:
|
||||
@ -16,11 +16,11 @@ services:
|
||||
source: ${ZK_DATA_LOG1:-}
|
||||
target: /datalog
|
||||
zoo2:
|
||||
image: zookeeper:3.4.12
|
||||
image: zookeeper:3.6.2
|
||||
restart: always
|
||||
environment:
|
||||
ZOO_TICK_TIME: 500
|
||||
ZOO_SERVERS: server.1=zoo1:2888:3888 server.2=zoo2:2888:3888 server.3=zoo3:2888:3888
|
||||
ZOO_SERVERS: server.1=zoo1:2888:3888;2181 server.2=zoo2:2888:3888;2181 server.3=zoo3:2888:3888
|
||||
ZOO_MY_ID: 2
|
||||
JVMFLAGS: -Dzookeeper.forceSync=no
|
||||
volumes:
|
||||
@ -31,11 +31,11 @@ services:
|
||||
source: ${ZK_DATA_LOG2:-}
|
||||
target: /datalog
|
||||
zoo3:
|
||||
image: zookeeper:3.4.12
|
||||
image: zookeeper:3.6.2
|
||||
restart: always
|
||||
environment:
|
||||
ZOO_TICK_TIME: 500
|
||||
ZOO_SERVERS: server.1=zoo1:2888:3888 server.2=zoo2:2888:3888 server.3=zoo3:2888:3888
|
||||
ZOO_SERVERS: server.1=zoo1:2888:3888;2181 server.2=zoo2:2888:3888;2181 server.3=zoo3:2888:3888;2181
|
||||
ZOO_MY_ID: 3
|
||||
JVMFLAGS: -Dzookeeper.forceSync=no
|
||||
volumes:
|
||||
|
@ -97,6 +97,7 @@ function configure
|
||||
rm -r right/db ||:
|
||||
rm -r db0/preprocessed_configs ||:
|
||||
rm -r db0/{data,metadata}/system ||:
|
||||
rm db0/status ||:
|
||||
cp -al db0/ left/db/
|
||||
cp -al db0/ right/db/
|
||||
}
|
||||
|
@ -60,4 +60,8 @@ fi
|
||||
# more idiologically correct.
|
||||
read -ra ADDITIONAL_OPTIONS <<< "${ADDITIONAL_OPTIONS:-}"
|
||||
|
||||
if [[ -n "$USE_DATABASE_REPLICATED" ]] && [[ "$USE_DATABASE_REPLICATED" -eq 1 ]]; then
|
||||
ADDITIONAL_OPTIONS+=('--replicated-database')
|
||||
fi
|
||||
|
||||
clickhouse-test --testname --shard --zookeeper --no-stateless --hung-check --print-time "$SKIP_LIST_OPT" "${ADDITIONAL_OPTIONS[@]}" "$SKIP_TESTS_OPTION" 2>&1 | ts '%Y-%m-%d %H:%M:%S' | tee test_output/test_result.txt
|
||||
|
@ -3,6 +3,9 @@ FROM yandex/clickhouse-test-base
|
||||
|
||||
ARG odbc_driver_url="https://github.com/ClickHouse/clickhouse-odbc/releases/download/v1.1.4.20200302/clickhouse-odbc-1.1.4-Linux.tar.gz"
|
||||
|
||||
RUN echo "deb [trusted=yes] http://repo.mysql.com/apt/ubuntu/ bionic mysql-5.7" >> /etc/apt/sources.list \
|
||||
&& apt-key adv --keyserver keyserver.ubuntu.com --recv-keys 8C718D3B5072E1F5
|
||||
|
||||
RUN apt-get update -y \
|
||||
&& env DEBIAN_FRONTEND=noninteractive \
|
||||
apt-get install --yes --no-install-recommends \
|
||||
@ -13,6 +16,7 @@ RUN apt-get update -y \
|
||||
ncdu \
|
||||
netcat-openbsd \
|
||||
openssl \
|
||||
protobuf-compiler \
|
||||
python3 \
|
||||
python3-lxml \
|
||||
python3-requests \
|
||||
@ -23,7 +27,8 @@ RUN apt-get update -y \
|
||||
telnet \
|
||||
tree \
|
||||
unixodbc \
|
||||
wget
|
||||
wget \
|
||||
mysql-client=5.7*
|
||||
|
||||
RUN pip3 install numpy scipy pandas
|
||||
|
||||
|
@ -57,6 +57,10 @@ function run_tests()
|
||||
ADDITIONAL_OPTIONS+=('4')
|
||||
fi
|
||||
|
||||
if [[ -n "$USE_DATABASE_REPLICATED" ]] && [[ "$USE_DATABASE_REPLICATED" -eq 1 ]]; then
|
||||
ADDITIONAL_OPTIONS+=('--replicated-database')
|
||||
fi
|
||||
|
||||
clickhouse-test --testname --shard --zookeeper --hung-check --print-time \
|
||||
--test-runs "$NUM_TRIES" \
|
||||
"$SKIP_LIST_OPT" "${ADDITIONAL_OPTIONS[@]}" 2>&1 \
|
||||
|
@ -8,16 +8,23 @@ dpkg -i package_folder/clickhouse-server_*.deb
|
||||
dpkg -i package_folder/clickhouse-client_*.deb
|
||||
dpkg -i package_folder/clickhouse-test_*.deb
|
||||
|
||||
function configure()
|
||||
{
|
||||
# install test configs
|
||||
/usr/share/clickhouse-test/config/install.sh
|
||||
|
||||
# for clickhouse-server (via service)
|
||||
echo "ASAN_OPTIONS='malloc_context_size=10 verbosity=1 allocator_release_to_os_interval_ms=10000'" >> /etc/environment
|
||||
# for clickhouse-client
|
||||
export ASAN_OPTIONS='malloc_context_size=10 allocator_release_to_os_interval_ms=10000'
|
||||
|
||||
# since we run clickhouse from root
|
||||
sudo chown root: /var/lib/clickhouse
|
||||
}
|
||||
|
||||
function stop()
|
||||
{
|
||||
timeout 120 service clickhouse-server stop
|
||||
|
||||
# Wait for process to disappear from processlist and also try to kill zombies.
|
||||
while kill -9 "$(pidof clickhouse-server)"
|
||||
do
|
||||
echo "Killed clickhouse-server"
|
||||
sleep 0.5
|
||||
done
|
||||
clickhouse stop
|
||||
}
|
||||
|
||||
function start()
|
||||
@ -33,19 +40,26 @@ function start()
|
||||
tail -n1000 /var/log/clickhouse-server/clickhouse-server.log
|
||||
break
|
||||
fi
|
||||
timeout 120 service clickhouse-server start
|
||||
# use root to match with current uid
|
||||
clickhouse start --user root >/var/log/clickhouse-server/stdout.log 2>/var/log/clickhouse-server/stderr.log
|
||||
sleep 0.5
|
||||
counter=$((counter + 1))
|
||||
done
|
||||
|
||||
echo "
|
||||
handle all noprint
|
||||
handle SIGSEGV stop print
|
||||
handle SIGBUS stop print
|
||||
handle SIGABRT stop print
|
||||
continue
|
||||
thread apply all backtrace
|
||||
continue
|
||||
" > script.gdb
|
||||
|
||||
gdb -batch -command script.gdb -p "$(cat /var/run/clickhouse-server/clickhouse-server.pid)" &
|
||||
}
|
||||
|
||||
# install test configs
|
||||
/usr/share/clickhouse-test/config/install.sh
|
||||
|
||||
# for clickhouse-server (via service)
|
||||
echo "ASAN_OPTIONS='malloc_context_size=10 verbosity=1 allocator_release_to_os_interval_ms=10000'" >> /etc/environment
|
||||
# for clickhouse-client
|
||||
export ASAN_OPTIONS='malloc_context_size=10 allocator_release_to_os_interval_ms=10000'
|
||||
configure
|
||||
|
||||
start
|
||||
|
||||
@ -64,9 +78,11 @@ clickhouse-client --query "RENAME TABLE datasets.hits_v1 TO test.hits"
|
||||
clickhouse-client --query "RENAME TABLE datasets.visits_v1 TO test.visits"
|
||||
clickhouse-client --query "SHOW TABLES FROM test"
|
||||
|
||||
./stress --output-folder test_output --skip-func-tests "$SKIP_TESTS_OPTION"
|
||||
./stress --hung-check --output-folder test_output --skip-func-tests "$SKIP_TESTS_OPTION" && echo "OK" > /test_output/script_exit_code.txt || echo "FAIL" > /test_output/script_exit_code.txt
|
||||
|
||||
stop
|
||||
# TODO remove me when persistent snapshots will be ready
|
||||
rm -fr /var/lib/clickhouse/coordination ||:
|
||||
start
|
||||
|
||||
clickhouse-client --query "SELECT 'Server successfuly started'" > /test_output/alive_check.txt || echo 'Server failed to start' > /test_output/alive_check.txt
|
||||
|
@ -1,8 +1,9 @@
|
||||
#!/usr/bin/env python3
|
||||
# -*- coding: utf-8 -*-
|
||||
from multiprocessing import cpu_count
|
||||
from subprocess import Popen, check_call
|
||||
from subprocess import Popen, call, STDOUT
|
||||
import os
|
||||
import sys
|
||||
import shutil
|
||||
import argparse
|
||||
import logging
|
||||
@ -22,12 +23,15 @@ def get_options(i):
|
||||
if 0 < i:
|
||||
options += " --order=random"
|
||||
|
||||
if i % 2 == 1:
|
||||
if i % 3 == 1:
|
||||
options += " --db-engine=Ordinary"
|
||||
|
||||
if i % 3 == 2:
|
||||
options += ''' --db-engine="Replicated('/test/db/test_{}', 's1', 'r1')"'''.format(i)
|
||||
|
||||
# If database name is not specified, new database is created for each functional test.
|
||||
# Run some threads with one database for all tests.
|
||||
if i % 3 == 1:
|
||||
if i % 2 == 1:
|
||||
options += " --database=test_{}".format(i)
|
||||
|
||||
if i == 13:
|
||||
@ -64,7 +68,8 @@ if __name__ == "__main__":
|
||||
parser.add_argument("--server-log-folder", default='/var/log/clickhouse-server')
|
||||
parser.add_argument("--output-folder")
|
||||
parser.add_argument("--global-time-limit", type=int, default=3600)
|
||||
parser.add_argument("--num-parallel", default=cpu_count());
|
||||
parser.add_argument("--num-parallel", default=cpu_count())
|
||||
parser.add_argument('--hung-check', action='store_true', default=False)
|
||||
|
||||
args = parser.parse_args()
|
||||
func_pipes = []
|
||||
@ -81,4 +86,13 @@ if __name__ == "__main__":
|
||||
logging.info("Finished %s from %s processes", len(retcodes), len(func_pipes))
|
||||
time.sleep(5)
|
||||
|
||||
logging.info("All processes finished")
|
||||
if args.hung_check:
|
||||
logging.info("Checking if some queries hung")
|
||||
cmd = "{} {} {}".format(args.test_cmd, "--hung-check", "00001_select_1")
|
||||
res = call(cmd, shell=True, stderr=STDOUT)
|
||||
if res != 0:
|
||||
logging.info("Hung check failed with exit code {}".format(res))
|
||||
sys.exit(1)
|
||||
|
||||
logging.info("Stress test finished")
|
||||
|
@ -1,7 +1,14 @@
|
||||
# docker build -t yandex/clickhouse-style-test .
|
||||
FROM ubuntu:20.04
|
||||
|
||||
RUN apt-get update && env DEBIAN_FRONTEND=noninteractive apt-get install --yes shellcheck libxml2-utils git python3-pip pylint && pip3 install codespell
|
||||
RUN apt-get update && env DEBIAN_FRONTEND=noninteractive apt-get install --yes \
|
||||
shellcheck \
|
||||
libxml2-utils \
|
||||
git \
|
||||
python3-pip \
|
||||
pylint \
|
||||
yamllint \
|
||||
&& pip3 install codespell
|
||||
|
||||
|
||||
# For |& syntax
|
||||
|
29
docs/_description_templates/template-data-type.md
Normal file
29
docs/_description_templates/template-data-type.md
Normal file
@ -0,0 +1,29 @@
|
||||
---
|
||||
toc_priority:
|
||||
toc_title:
|
||||
---
|
||||
|
||||
# data_type_name {#data_type-name}
|
||||
|
||||
Description.
|
||||
|
||||
**Parameters** (Optional)
|
||||
|
||||
- `x` — Description. [Type name](relative/path/to/type/dscr.md#type).
|
||||
- `y` — Description. [Type name](relative/path/to/type/dscr.md#type).
|
||||
|
||||
**Examples**
|
||||
|
||||
```sql
|
||||
|
||||
```
|
||||
|
||||
## Additional Info {#additional-info} (Optional)
|
||||
|
||||
The name of an additional section can be any, for example, **Usage**.
|
||||
|
||||
**See Also** (Optional)
|
||||
|
||||
- [link](#)
|
||||
|
||||
[Original article](https://clickhouse.tech/docs/en/data_types/<data-type-name>/) <!--hide-->
|
@ -12,16 +12,20 @@ Alias: `<alias name>`. (Optional)
|
||||
|
||||
More text (Optional).
|
||||
|
||||
**Parameters** (Optional)
|
||||
**Arguments** (Optional)
|
||||
|
||||
- `x` — Description. [Type name](relative/path/to/type/dscr.md#type).
|
||||
- `y` — Description. [Type name](relative/path/to/type/dscr.md#type).
|
||||
|
||||
**Parameters** (Optional, only for parametric aggregate functions)
|
||||
|
||||
- `z` — Description. [Type name](relative/path/to/type/dscr.md#type).
|
||||
|
||||
**Returned value(s)**
|
||||
|
||||
- Returned values list.
|
||||
- Returned values list.
|
||||
|
||||
Type: [Type](relative/path/to/type/dscr.md#type).
|
||||
Type: [Type name](relative/path/to/type/dscr.md#type).
|
||||
|
||||
**Example**
|
||||
|
||||
|
@ -8,10 +8,14 @@ Columns:
|
||||
|
||||
**Example**
|
||||
|
||||
Query:
|
||||
|
||||
``` sql
|
||||
SELECT * FROM system.table_name
|
||||
```
|
||||
|
||||
Result:
|
||||
|
||||
``` text
|
||||
Some output. It shouldn't be too long.
|
||||
```
|
||||
|
@ -38,20 +38,20 @@ SETTINGS
|
||||
|
||||
Required parameters:
|
||||
|
||||
- `kafka_broker_list` – A comma-separated list of brokers (for example, `localhost:9092`).
|
||||
- `kafka_topic_list` – A list of Kafka topics.
|
||||
- `kafka_group_name` – A group of Kafka consumers. Reading margins are tracked for each group separately. If you don’t want messages to be duplicated in the cluster, use the same group name everywhere.
|
||||
- `kafka_format` – Message format. Uses the same notation as the SQL `FORMAT` function, such as `JSONEachRow`. For more information, see the [Formats](../../../interfaces/formats.md) section.
|
||||
- `kafka_broker_list` — A comma-separated list of brokers (for example, `localhost:9092`).
|
||||
- `kafka_topic_list` — A list of Kafka topics.
|
||||
- `kafka_group_name` — A group of Kafka consumers. Reading margins are tracked for each group separately. If you don’t want messages to be duplicated in the cluster, use the same group name everywhere.
|
||||
- `kafka_format` — Message format. Uses the same notation as the SQL `FORMAT` function, such as `JSONEachRow`. For more information, see the [Formats](../../../interfaces/formats.md) section.
|
||||
|
||||
Optional parameters:
|
||||
|
||||
- `kafka_row_delimiter` – Delimiter character, which ends the message.
|
||||
- `kafka_schema` – Parameter that must be used if the format requires a schema definition. For example, [Cap’n Proto](https://capnproto.org/) requires the path to the schema file and the name of the root `schema.capnp:Message` object.
|
||||
- `kafka_num_consumers` – The number of consumers per table. Default: `1`. Specify more consumers if the throughput of one consumer is insufficient. The total number of consumers should not exceed the number of partitions in the topic, since only one consumer can be assigned per partition.
|
||||
- `kafka_max_block_size` - The maximum batch size (in messages) for poll (default: `max_block_size`).
|
||||
- `kafka_skip_broken_messages` – Kafka message parser tolerance to schema-incompatible messages per block. Default: `0`. If `kafka_skip_broken_messages = N` then the engine skips *N* Kafka messages that cannot be parsed (a message equals a row of data).
|
||||
- `kafka_commit_every_batch` - Commit every consumed and handled batch instead of a single commit after writing a whole block (default: `0`).
|
||||
- `kafka_thread_per_consumer` - Provide independent thread for each consumer (default: `0`). When enabled, every consumer flush the data independently, in parallel (otherwise - rows from several consumers squashed to form one block).
|
||||
- `kafka_row_delimiter` — Delimiter character, which ends the message.
|
||||
- `kafka_schema` — Parameter that must be used if the format requires a schema definition. For example, [Cap’n Proto](https://capnproto.org/) requires the path to the schema file and the name of the root `schema.capnp:Message` object.
|
||||
- `kafka_num_consumers` — The number of consumers per table. Default: `1`. Specify more consumers if the throughput of one consumer is insufficient. The total number of consumers should not exceed the number of partitions in the topic, since only one consumer can be assigned per partition.
|
||||
- `kafka_max_block_size` — The maximum batch size (in messages) for poll (default: `max_block_size`).
|
||||
- `kafka_skip_broken_messages` — Kafka message parser tolerance to schema-incompatible messages per block. Default: `0`. If `kafka_skip_broken_messages = N` then the engine skips *N* Kafka messages that cannot be parsed (a message equals a row of data).
|
||||
- `kafka_commit_every_batch` — Commit every consumed and handled batch instead of a single commit after writing a whole block (default: `0`).
|
||||
- `kafka_thread_per_consumer` — Provide independent thread for each consumer (default: `0`). When enabled, every consumer flush the data independently, in parallel (otherwise — rows from several consumers squashed to form one block).
|
||||
|
||||
Examples:
|
||||
|
||||
|
@ -59,10 +59,26 @@ Optional parameters:
|
||||
- `rabbitmq_max_block_size`
|
||||
- `rabbitmq_flush_interval_ms`
|
||||
|
||||
Required configuration:
|
||||
Also format settings can be added along with rabbitmq-related settings.
|
||||
|
||||
Example:
|
||||
|
||||
``` sql
|
||||
CREATE TABLE queue (
|
||||
key UInt64,
|
||||
value UInt64,
|
||||
date DateTime
|
||||
) ENGINE = RabbitMQ SETTINGS rabbitmq_host_port = 'localhost:5672',
|
||||
rabbitmq_exchange_name = 'exchange1',
|
||||
rabbitmq_format = 'JSONEachRow',
|
||||
rabbitmq_num_consumers = 5,
|
||||
date_time_input_format = 'best_effort';
|
||||
```
|
||||
|
||||
The RabbitMQ server configuration should be added using the ClickHouse config file.
|
||||
|
||||
Required configuration:
|
||||
|
||||
``` xml
|
||||
<rabbitmq>
|
||||
<username>root</username>
|
||||
@ -70,16 +86,12 @@ The RabbitMQ server configuration should be added using the ClickHouse config fi
|
||||
</rabbitmq>
|
||||
```
|
||||
|
||||
Example:
|
||||
Additional configuration:
|
||||
|
||||
``` sql
|
||||
CREATE TABLE queue (
|
||||
key UInt64,
|
||||
value UInt64
|
||||
) ENGINE = RabbitMQ SETTINGS rabbitmq_host_port = 'localhost:5672',
|
||||
rabbitmq_exchange_name = 'exchange1',
|
||||
rabbitmq_format = 'JSONEachRow',
|
||||
rabbitmq_num_consumers = 5;
|
||||
``` xml
|
||||
<rabbitmq>
|
||||
<vhost>clickhouse</vhost>
|
||||
</rabbitmq>
|
||||
```
|
||||
|
||||
## Description {#description}
|
||||
@ -105,6 +117,7 @@ Exchange type options:
|
||||
- `consistent_hash` - Data is evenly distributed between all bound tables (where the exchange name is the same). Note that this exchange type must be enabled with RabbitMQ plugin: `rabbitmq-plugins enable rabbitmq_consistent_hash_exchange`.
|
||||
|
||||
Setting `rabbitmq_queue_base` may be used for the following cases:
|
||||
|
||||
- to let different tables share queues, so that multiple consumers could be registered for the same queues, which makes a better performance. If using `rabbitmq_num_consumers` and/or `rabbitmq_num_queues` settings, the exact match of queues is achieved in case these parameters are the same.
|
||||
- to be able to restore reading from certain durable queues when not all messages were successfully consumed. To resume consumption from one specific queue - set its name in `rabbitmq_queue_base` setting and do not specify `rabbitmq_num_consumers` and `rabbitmq_num_queues` (defaults to 1). To resume consumption from all queues, which were declared for a specific table - just specify the same settings: `rabbitmq_queue_base`, `rabbitmq_num_consumers`, `rabbitmq_num_queues`. By default, queue names will be unique to tables.
|
||||
- to reuse queues as they are declared durable and not auto-deleted. (Can be deleted via any of RabbitMQ CLI tools.)
|
||||
|
@ -136,8 +136,7 @@ The following settings can be specified in configuration file for given endpoint
|
||||
- `access_key_id` and `secret_access_key` — Optional. Specifies credentials to use with given endpoint.
|
||||
- `use_environment_credentials` — Optional, default value is `false`. If set to `true`, S3 client will try to obtain credentials from environment variables and Amazon EC2 metadata for given endpoint.
|
||||
- `header` — Optional, can be speficied multiple times. Adds specified HTTP header to a request to given endpoint.
|
||||
|
||||
This configuration also applies to S3 disks in `MergeTree` table engine family.
|
||||
- `server_side_encryption_customer_key_base64` — Optional. If specified, required headers for accessing S3 objects with SSE-C encryption will be set.
|
||||
|
||||
Example:
|
||||
|
||||
@ -149,6 +148,7 @@ Example:
|
||||
<!-- <secret_access_key>SECRET_ACCESS_KEY</secret_access_key> -->
|
||||
<!-- <use_environment_credentials>false</use_environment_credentials> -->
|
||||
<!-- <header>Authorization: Bearer SOME-TOKEN</header> -->
|
||||
<!-- <server_side_encryption_customer_key_base64>BASE64-ENCODED-KEY</server_side_encryption_customer_key_base64> -->
|
||||
</endpoint-name>
|
||||
</s3>
|
||||
```
|
||||
|
@ -715,6 +715,7 @@ Configuration markup:
|
||||
<endpoint>https://storage.yandexcloud.net/my-bucket/root-path/</endpoint>
|
||||
<access_key_id>your_access_key_id</access_key_id>
|
||||
<secret_access_key>your_secret_access_key</secret_access_key>
|
||||
<server_side_encryption_customer_key_base64>your_base64_encoded_customer_key</server_side_encryption_customer_key_base64>
|
||||
<proxy>
|
||||
<uri>http://proxy1</uri>
|
||||
<uri>http://proxy2</uri>
|
||||
@ -750,7 +751,8 @@ Optional parameters:
|
||||
- `metadata_path` — Path on local FS to store metadata files for S3. Default value is `/var/lib/clickhouse/disks/<disk_name>/`.
|
||||
- `cache_enabled` — Allows to cache mark and index files on local FS. Default value is `true`.
|
||||
- `cache_path` — Path on local FS where to store cached mark and index files. Default value is `/var/lib/clickhouse/disks/<disk_name>/cache/`.
|
||||
- `skip_access_check` — If true disk access checks will not be performed on disk start-up. Default value is `false`.
|
||||
- `skip_access_check` — If true, disk access checks will not be performed on disk start-up. Default value is `false`.
|
||||
- `server_side_encryption_customer_key_base64` — If specified, required headers for accessing S3 objects with SSE-C encryption will be set.
|
||||
|
||||
|
||||
S3 disk can be configured as `main` or `cold` storage:
|
||||
|
@ -66,7 +66,8 @@ SELECT * FROM file_engine_table
|
||||
|
||||
## Usage in ClickHouse-local {#usage-in-clickhouse-local}
|
||||
|
||||
In [clickhouse-local](../../../operations/utilities/clickhouse-local.md) File engine accepts file path in addition to `Format`. Default input/output streams can be specified using numeric or human-readable names like `0` or `stdin`, `1` or `stdout`.
|
||||
In [clickhouse-local](../../../operations/utilities/clickhouse-local.md) File engine accepts file path in addition to `Format`. Default input/output streams can be specified using numeric or human-readable names like `0` or `stdin`, `1` or `stdout`. It is possible to read and write compressed files based on an additional engine parameter or file extension (`gz`, `br` or `xz`).
|
||||
|
||||
**Example:**
|
||||
|
||||
``` bash
|
||||
|
@ -39,4 +39,4 @@ More details on [manipulating partitions](../../sql-reference/statements/alter/p
|
||||
|
||||
It’s rather radical to drop all data from a table, but in some cases it might be exactly what you need.
|
||||
|
||||
More details on [table truncation](../../sql-reference/statements/alter/partition.md#alter_drop-partition).
|
||||
More details on [table truncation](../../sql-reference/statements/truncate.md).
|
||||
|
@ -5,7 +5,7 @@ toc_title: Brown University Benchmark
|
||||
|
||||
# Brown University Benchmark
|
||||
|
||||
MgBench - A new analytical benchmark for machine-generated log data, [Andrew Crotty](http://cs.brown.edu/people/acrotty/).
|
||||
`MgBench` is a new analytical benchmark for machine-generated log data, [Andrew Crotty](http://cs.brown.edu/people/acrotty/).
|
||||
|
||||
Download the data:
|
||||
```
|
||||
@ -153,7 +153,7 @@ ORDER BY dt,
|
||||
hr;
|
||||
|
||||
|
||||
-- Q1.4: Over a 1-month period, how often was each server blocked on disk I/O?
|
||||
-- Q1.4: Over 1 month, how often was each server blocked on disk I/O?
|
||||
|
||||
SELECT machine_name,
|
||||
COUNT(*) AS spikes
|
||||
@ -301,7 +301,7 @@ WHERE event_type = 'temperature'
|
||||
AND log_time >= '2019-11-29 17:00:00.000';
|
||||
|
||||
|
||||
-- Q3.4: Over the past 6 months, how frequently was each door opened?
|
||||
-- Q3.4: Over the past 6 months, how frequently were each door opened?
|
||||
|
||||
SELECT device_name,
|
||||
device_floor,
|
||||
@ -412,3 +412,5 @@ ORDER BY yr,
|
||||
```
|
||||
|
||||
The data is also available for interactive queries in the [Playground](https://gh-api.clickhouse.tech/play?user=play), [example](https://gh-api.clickhouse.tech/play?user=play#U0VMRUNUIG1hY2hpbmVfbmFtZSwKICAgICAgIE1JTihjcHUpIEFTIGNwdV9taW4sCiAgICAgICBNQVgoY3B1KSBBUyBjcHVfbWF4LAogICAgICAgQVZHKGNwdSkgQVMgY3B1X2F2ZywKICAgICAgIE1JTihuZXRfaW4pIEFTIG5ldF9pbl9taW4sCiAgICAgICBNQVgobmV0X2luKSBBUyBuZXRfaW5fbWF4LAogICAgICAgQVZHKG5ldF9pbikgQVMgbmV0X2luX2F2ZywKICAgICAgIE1JTihuZXRfb3V0KSBBUyBuZXRfb3V0X21pbiwKICAgICAgIE1BWChuZXRfb3V0KSBBUyBuZXRfb3V0X21heCwKICAgICAgIEFWRyhuZXRfb3V0KSBBUyBuZXRfb3V0X2F2ZwpGUk9NICgKICBTRUxFQ1QgbWFjaGluZV9uYW1lLAogICAgICAgICBDT0FMRVNDRShjcHVfdXNlciwgMC4wKSBBUyBjcHUsCiAgICAgICAgIENPQUxFU0NFKGJ5dGVzX2luLCAwLjApIEFTIG5ldF9pbiwKICAgICAgICAgQ09BTEVTQ0UoYnl0ZXNfb3V0LCAwLjApIEFTIG5ldF9vdXQKICBGUk9NIG1nYmVuY2gubG9nczEKICBXSEVSRSBtYWNoaW5lX25hbWUgSU4gKCdhbmFuc2knLCdhcmFnb2cnLCd1cmQnKQogICAgQU5EIGxvZ190aW1lID49IFRJTUVTVEFNUCAnMjAxNy0wMS0xMSAwMDowMDowMCcKKSBBUyByCkdST1VQIEJZIG1hY2hpbmVfbmFtZQ==).
|
||||
|
||||
[Original article](https://clickhouse.tech/docs/en/getting_started/example_datasets/brown-benchmark/) <!--hide-->
|
||||
|
133
docs/en/getting-started/example-datasets/cell-towers.md
Normal file
133
docs/en/getting-started/example-datasets/cell-towers.md
Normal file
File diff suppressed because one or more lines are too long
@ -20,5 +20,6 @@ The list of documented datasets:
|
||||
- [Terabyte of Click Logs from Criteo](../../getting-started/example-datasets/criteo.md)
|
||||
- [AMPLab Big Data Benchmark](../../getting-started/example-datasets/amplab-benchmark.md)
|
||||
- [Brown University Benchmark](../../getting-started/example-datasets/brown-benchmark.md)
|
||||
- [Cell Towers](../../getting-started/example-datasets/cell-towers.md)
|
||||
|
||||
[Original article](https://clickhouse.tech/docs/en/getting_started/example_datasets) <!--hide-->
|
||||
|
@ -15,17 +15,9 @@ This dataset can be obtained in two ways:
|
||||
Downloading data:
|
||||
|
||||
``` bash
|
||||
for s in `seq 1987 2018`
|
||||
do
|
||||
for m in `seq 1 12`
|
||||
do
|
||||
wget https://transtats.bts.gov/PREZIP/On_Time_Reporting_Carrier_On_Time_Performance_1987_present_${s}_${m}.zip
|
||||
done
|
||||
done
|
||||
echo https://transtats.bts.gov/PREZIP/On_Time_Reporting_Carrier_On_Time_Performance_1987_present_{1987..2021}_{1..12}.zip | xargs -P10 wget --no-check-certificate --continue
|
||||
```
|
||||
|
||||
(from https://github.com/Percona-Lab/ontime-airline-performance/blob/master/download.sh )
|
||||
|
||||
Creating a table:
|
||||
|
||||
``` sql
|
||||
@ -145,12 +137,14 @@ ORDER BY (Carrier, FlightDate)
|
||||
SETTINGS index_granularity = 8192;
|
||||
```
|
||||
|
||||
Loading data:
|
||||
Loading data with multiple threads:
|
||||
|
||||
``` bash
|
||||
$ for i in *.zip; do echo $i; unzip -cq $i '*.csv' | sed 's/\.00//g' | clickhouse-client --input_format_with_names_use_header=0 --host=example-perftest01j --query="INSERT INTO ontime FORMAT CSVWithNames"; done
|
||||
ls -1 *.zip | xargs -I{} -P $(nproc) bash -c "echo {}; unzip -cq {} '*.csv' | sed 's/\.00//g' | clickhouse-client --input_format_with_names_use_header=0 --query='INSERT INTO ontime FORMAT CSVWithNames'"
|
||||
```
|
||||
|
||||
(if you will have memory shortage or other issues on your server, remove the `-P $(nproc)` part)
|
||||
|
||||
## Download of Prepared Partitions {#download-of-prepared-partitions}
|
||||
|
||||
``` bash
|
||||
|
@ -644,7 +644,7 @@ If there are no replicas at the moment on replicated table creation, a new first
|
||||
|
||||
``` sql
|
||||
CREATE TABLE tutorial.hits_replica (...)
|
||||
ENGINE = ReplcatedMergeTree(
|
||||
ENGINE = ReplicatedMergeTree(
|
||||
'/clickhouse_perftest/tables/{shard}/hits',
|
||||
'{replica}'
|
||||
)
|
||||
|
@ -31,8 +31,8 @@ The supported formats are:
|
||||
| [JSONCompactString](#jsoncompactstring) | ✗ | ✔ |
|
||||
| [JSONEachRow](#jsoneachrow) | ✔ | ✔ |
|
||||
| [JSONEachRowWithProgress](#jsoneachrowwithprogress) | ✗ | ✔ |
|
||||
| [JSONStringEachRow](#jsonstringeachrow) | ✔ | ✔ |
|
||||
| [JSONStringEachRowWithProgress](#jsonstringeachrowwithprogress) | ✗ | ✔ |
|
||||
| [JSONStringsEachRow](#jsonstringseachrow) | ✔ | ✔ |
|
||||
| [JSONStringsEachRowWithProgress](#jsonstringseachrowwithprogress) | ✗ | ✔ |
|
||||
| [JSONCompactEachRow](#jsoncompacteachrow) | ✔ | ✔ |
|
||||
| [JSONCompactEachRowWithNamesAndTypes](#jsoncompacteachrowwithnamesandtypes) | ✔ | ✔ |
|
||||
| [JSONCompactStringEachRow](#jsoncompactstringeachrow) | ✔ | ✔ |
|
||||
@ -612,7 +612,7 @@ Example:
|
||||
```
|
||||
|
||||
## JSONEachRow {#jsoneachrow}
|
||||
## JSONStringEachRow {#jsonstringeachrow}
|
||||
## JSONStringsEachRow {#jsonstringseachrow}
|
||||
## JSONCompactEachRow {#jsoncompacteachrow}
|
||||
## JSONCompactStringEachRow {#jsoncompactstringeachrow}
|
||||
|
||||
@ -627,9 +627,9 @@ When using these formats, ClickHouse outputs rows as separated, newline-delimite
|
||||
When inserting the data, you should provide a separate JSON value for each row.
|
||||
|
||||
## JSONEachRowWithProgress {#jsoneachrowwithprogress}
|
||||
## JSONStringEachRowWithProgress {#jsonstringeachrowwithprogress}
|
||||
## JSONStringsEachRowWithProgress {#jsonstringseachrowwithprogress}
|
||||
|
||||
Differs from `JSONEachRow`/`JSONStringEachRow` in that ClickHouse will also yield progress information as JSON values.
|
||||
Differs from `JSONEachRow`/`JSONStringsEachRow` in that ClickHouse will also yield progress information as JSON values.
|
||||
|
||||
```json
|
||||
{"row":{"'hello'":"hello","multiply(42, number)":"0","range(5)":[0,1,2,3,4]}}
|
||||
|
@ -148,28 +148,48 @@ $ echo 'DROP TABLE t' | curl 'http://localhost:8123/' --data-binary @-
|
||||
|
||||
For successful requests that don’t return a data table, an empty response body is returned.
|
||||
|
||||
You can use the internal ClickHouse compression format when transmitting data. The compressed data has a non-standard format, and you will need to use the special `clickhouse-compressor` program to work with it (it is installed with the `clickhouse-client` package). To increase the efficiency of data insertion, you can disable server-side checksum verification by using the [http_native_compression_disable_checksumming_on_decompress](../operations/settings/settings.md#settings-http_native_compression_disable_checksumming_on_decompress) setting.
|
||||
|
||||
If you specified `compress=1` in the URL, the server compresses the data it sends you.
|
||||
If you specified `decompress=1` in the URL, the server decompresses the same data that you pass in the `POST` method.
|
||||
## Compression {#compression}
|
||||
|
||||
You can also choose to use [HTTP compression](https://en.wikipedia.org/wiki/HTTP_compression). To send a compressed `POST` request, append the request header `Content-Encoding: compression_method`. In order for ClickHouse to compress the response, you must append `Accept-Encoding: compression_method`. ClickHouse supports `gzip`, `br`, and `deflate` [compression methods](https://en.wikipedia.org/wiki/HTTP_compression#Content-Encoding_tokens). To enable HTTP compression, you must use the ClickHouse [enable_http_compression](../operations/settings/settings.md#settings-enable_http_compression) setting. You can configure the data compression level in the [http_zlib_compression_level](#settings-http_zlib_compression_level) setting for all the compression methods.
|
||||
You can use compression to reduce network traffic when transmitting a large amount of data or for creating dumps that are immediately compressed.
|
||||
|
||||
You can use this to reduce network traffic when transmitting a large amount of data, or for creating dumps that are immediately compressed.
|
||||
You can use the internal ClickHouse compression format when transmitting data. The compressed data has a non-standard format, and you need `clickhouse-compressor` program to work with it. It is installed with the `clickhouse-client` package. To increase the efficiency of data insertion, you can disable server-side checksum verification by using the [http_native_compression_disable_checksumming_on_decompress](../operations/settings/settings.md#settings-http_native_compression_disable_checksumming_on_decompress) setting.
|
||||
|
||||
Examples of sending data with compression:
|
||||
If you specify `compress=1` in the URL, the server will compress the data it sends to you. If you specify `decompress=1` in the URL, the server will decompress the data which you pass in the `POST` method.
|
||||
|
||||
``` bash
|
||||
#Sending data to the server:
|
||||
$ curl -vsS "http://localhost:8123/?enable_http_compression=1" -d 'SELECT number FROM system.numbers LIMIT 10' -H 'Accept-Encoding: gzip'
|
||||
You can also choose to use [HTTP compression](https://en.wikipedia.org/wiki/HTTP_compression). ClickHouse supports the following [compression methods](https://en.wikipedia.org/wiki/HTTP_compression#Content-Encoding_tokens):
|
||||
|
||||
#Sending data to the client:
|
||||
$ echo "SELECT 1" | gzip -c | curl -sS --data-binary @- -H 'Content-Encoding: gzip' 'http://localhost:8123/'
|
||||
```
|
||||
- `gzip`
|
||||
- `br`
|
||||
- `deflate`
|
||||
- `xz`
|
||||
|
||||
To send a compressed `POST` request, append the request header `Content-Encoding: compression_method`.
|
||||
In order for ClickHouse to compress the response, enable compression with [enable_http_compression](../operations/settings/settings.md#settings-enable_http_compression) setting and append `Accept-Encoding: compression_method` header to the request. You can configure the data compression level in the [http_zlib_compression_level](../operations/settings/settings.md#settings-http_zlib_compression_level) setting for all compression methods.
|
||||
|
||||
!!! note "Note"
|
||||
Some HTTP clients might decompress data from the server by default (with `gzip` and `deflate`) and you might get decompressed data even if you use the compression settings correctly.
|
||||
|
||||
**Examples**
|
||||
|
||||
``` bash
|
||||
# Sending compressed data to the server
|
||||
$ echo "SELECT 1" | gzip -c | \
|
||||
curl -sS --data-binary @- -H 'Content-Encoding: gzip' 'http://localhost:8123/'
|
||||
```
|
||||
|
||||
``` bash
|
||||
# Receiving compressed data from the server
|
||||
$ curl -vsS "http://localhost:8123/?enable_http_compression=1" \
|
||||
-H 'Accept-Encoding: gzip' --output result.gz -d 'SELECT number FROM system.numbers LIMIT 3'
|
||||
$ zcat result.gz
|
||||
0
|
||||
1
|
||||
2
|
||||
```
|
||||
|
||||
## Default Database {#default-database}
|
||||
|
||||
You can use the ‘database’ URL parameter or the ‘X-ClickHouse-Database’ header to specify the default database.
|
||||
|
||||
``` bash
|
||||
|
@ -5,7 +5,7 @@ toc_title: Data Backup
|
||||
|
||||
# Data Backup {#data-backup}
|
||||
|
||||
While [replication](../engines/table-engines/mergetree-family/replication.md) provides protection from hardware failures, it does not protect against human errors: accidental deletion of data, deletion of the wrong table or a table on the wrong cluster, and software bugs that result in incorrect data processing or data corruption. In many cases mistakes like these will affect all replicas. ClickHouse has built-in safeguards to prevent some types of mistakes — for example, by default [you can’t just drop tables with a MergeTree-like engine containing more than 50 Gb of data](https://github.com/ClickHouse/ClickHouse/blob/v18.14.18-stable/programs/server/config.xml#L322-L330). However, these safeguards don’t cover all possible cases and can be circumvented.
|
||||
While [replication](../engines/table-engines/mergetree-family/replication.md) provides protection from hardware failures, it does not protect against human errors: accidental deletion of data, deletion of the wrong table or a table on the wrong cluster, and software bugs that result in incorrect data processing or data corruption. In many cases mistakes like these will affect all replicas. ClickHouse has built-in safeguards to prevent some types of mistakes — for example, by default [you can’t just drop tables with a MergeTree-like engine containing more than 50 Gb of data](server-configuration-parameters/settings.md#max-table-size-to-drop). However, these safeguards don’t cover all possible cases and can be circumvented.
|
||||
|
||||
In order to effectively mitigate possible human errors, you should carefully prepare a strategy for backing up and restoring your data **in advance**.
|
||||
|
||||
|
@ -8,18 +8,21 @@ toc_title: Caches
|
||||
When performing queries, ClichHouse uses different caches.
|
||||
|
||||
Main cache types:
|
||||
|
||||
- `mark_cache` — Cache of marks used by table engines of the [MergeTree](../engines/table-engines/mergetree-family/mergetree.md) family.
|
||||
- `uncompressed_cache` — Cache of uncompressed data used by table engines of the [MergeTree](../engines/table-engines/mergetree-family/mergetree.md) family.
|
||||
|
||||
Additional cache types:
|
||||
- DNS cache
|
||||
- [regexp](../interfaces/formats.md#data-format-regexp) cache
|
||||
- compiled expressions cache
|
||||
- [Avro format](../interfaces/formats.md#data-format-avro) schemas cache
|
||||
- [dictionaries data cache](../sql-reference/dictionaries/index.md)
|
||||
|
||||
- DNS cache.
|
||||
- [Regexp](../interfaces/formats.md#data-format-regexp) cache.
|
||||
- Compiled expressions cache.
|
||||
- [Avro format](../interfaces/formats.md#data-format-avro) schemas cache.
|
||||
- [Dictionaries](../sql-reference/dictionaries/index.md) data cache.
|
||||
|
||||
Indirectly used:
|
||||
- OS page cache
|
||||
|
||||
- OS page cache.
|
||||
|
||||
To drop cache, use [SYSTEM DROP ... CACHE](../sql-reference/statements/system.md) statements.
|
||||
|
||||
|
13
docs/en/operations/external-authenticators/index.md
Normal file
13
docs/en/operations/external-authenticators/index.md
Normal file
@ -0,0 +1,13 @@
|
||||
---
|
||||
toc_folder_title: External User Authenticators and Directories
|
||||
toc_priority: 48
|
||||
toc_title: Introduction
|
||||
---
|
||||
|
||||
# External User Authenticators and Directories {#external-authenticators}
|
||||
|
||||
ClickHouse supports authenticating and managing users using external services.
|
||||
|
||||
The following external authenticators and directories are supported:
|
||||
|
||||
- [LDAP](./ldap.md#external-authenticators-ldap) [Authenticator](./ldap.md#ldap-external-authenticator) and [Directory](./ldap.md#ldap-external-user-directory)
|
156
docs/en/operations/external-authenticators/ldap.md
Normal file
156
docs/en/operations/external-authenticators/ldap.md
Normal file
@ -0,0 +1,156 @@
|
||||
# LDAP {#external-authenticators-ldap}
|
||||
|
||||
LDAP server can be used to authenticate ClickHouse users. There are two different approaches for doing this:
|
||||
|
||||
- use LDAP as an external authenticator for existing users, which are defined in `users.xml` or in local access control paths
|
||||
- use LDAP as an external user directory and allow locally undefined users to be authenticated if they exist on the LDAP server
|
||||
|
||||
For both of these approaches, an internally named LDAP server must be defined in the ClickHouse config so that other parts of config are able to refer to it.
|
||||
|
||||
## LDAP Server Definition {#ldap-server-definition}
|
||||
|
||||
To define LDAP server you must add `ldap_servers` section to the `config.xml`. For example,
|
||||
|
||||
```xml
|
||||
<yandex>
|
||||
<!- ... -->
|
||||
<ldap_servers>
|
||||
<my_ldap_server>
|
||||
<host>localhost</host>
|
||||
<port>636</port>
|
||||
<bind_dn>uid={user_name},ou=users,dc=example,dc=com</bind_dn>
|
||||
<verification_cooldown>300</verification_cooldown>
|
||||
<enable_tls>yes</enable_tls>
|
||||
<tls_minimum_protocol_version>tls1.2</tls_minimum_protocol_version>
|
||||
<tls_require_cert>demand</tls_require_cert>
|
||||
<tls_cert_file>/path/to/tls_cert_file</tls_cert_file>
|
||||
<tls_key_file>/path/to/tls_key_file</tls_key_file>
|
||||
<tls_ca_cert_file>/path/to/tls_ca_cert_file</tls_ca_cert_file>
|
||||
<tls_ca_cert_dir>/path/to/tls_ca_cert_dir</tls_ca_cert_dir>
|
||||
<tls_cipher_suite>ECDHE-ECDSA-AES256-GCM-SHA384:ECDHE-RSA-AES256-GCM-SHA384:AES256-GCM-SHA384</tls_cipher_suite>
|
||||
</my_ldap_server>
|
||||
</ldap_servers>
|
||||
</yandex>
|
||||
```
|
||||
|
||||
Note, that you can define multiple LDAP servers inside the `ldap_servers` section using distinct names.
|
||||
|
||||
Parameters:
|
||||
|
||||
- `host` - LDAP server hostname or IP, this parameter is mandatory and cannot be empty.
|
||||
- `port` - LDAP server port, default is `636` if `enable_tls` is set to `true`, `389` otherwise.
|
||||
- `bind_dn` - template used to construct the DN to bind to.
|
||||
- The resulting DN will be constructed by replacing all `{user_name}` substrings of the
|
||||
template with the actual user name during each authentication attempt.
|
||||
- `verification_cooldown` - a period of time, in seconds, after a successful bind attempt,
|
||||
during which the user will be assumed to be successfully authenticated for all consecutive
|
||||
requests without contacting the LDAP server.
|
||||
- Specify `0` (the default) to disable caching and force contacting the LDAP server for each authentication request.
|
||||
- `enable_tls` - flag to trigger use of secure connection to the LDAP server.
|
||||
- Specify `no` for plain text `ldap://` protocol (not recommended).
|
||||
- Specify `yes` for LDAP over SSL/TLS `ldaps://` protocol (recommended, the default).
|
||||
- Specify `starttls` for legacy StartTLS protocol (plain text `ldap://` protocol, upgraded to TLS).
|
||||
- `tls_minimum_protocol_version` - the minimum protocol version of SSL/TLS.
|
||||
- Accepted values are: `ssl2`, `ssl3`, `tls1.0`, `tls1.1`, `tls1.2` (the default).
|
||||
- `tls_require_cert` - SSL/TLS peer certificate verification behavior.
|
||||
- Accepted values are: `never`, `allow`, `try`, `demand` (the default).
|
||||
- `tls_cert_file` - path to certificate file.
|
||||
- `tls_key_file` - path to certificate key file.
|
||||
- `tls_ca_cert_file` - path to CA certificate file.
|
||||
- `tls_ca_cert_dir` - path to the directory containing CA certificates.
|
||||
- `tls_cipher_suite` - allowed cipher suite (in OpenSSL notation).
|
||||
|
||||
## LDAP External Authenticator {#ldap-external-authenticator}
|
||||
|
||||
A remote LDAP server can be used as a method for verifying passwords for locally defined users (users defined in `users.xml` or in local access control paths). In order to achieve this, specify previously defined LDAP server name instead of `password` or similar sections in the user definition.
|
||||
|
||||
At each login attempt, ClickHouse will try to "bind" to the specified DN defined by the `bind_dn` parameter in the [LDAP server definition](#ldap-server-definition) using the provided credentials, and if successful, the user will be considered authenticated. This is often called a "simple bind" method.
|
||||
|
||||
For example,
|
||||
|
||||
```xml
|
||||
<yandex>
|
||||
<!- ... -->
|
||||
<users>
|
||||
<!- ... -->
|
||||
<my_user>
|
||||
<!- ... -->
|
||||
<ldap>
|
||||
<server>my_ldap_server</server>
|
||||
</ldap>
|
||||
</my_user>
|
||||
</users>
|
||||
</yandex>
|
||||
```
|
||||
|
||||
Note, that user `my_user` refers to `my_ldap_server`. This LDAP server must be configured in the main `config.xml` file as described previously.
|
||||
|
||||
When SQL-driven [Access Control and Account Management](../access-rights.md#access-control) is enabled in ClickHouse, users that are authenticated by LDAP servers can also be created using the [CRATE USER](../../sql-reference/statements/create/user.md#create-user-statement) statement.
|
||||
|
||||
|
||||
```sql
|
||||
CREATE USER my_user IDENTIFIED WITH ldap_server BY 'my_ldap_server'
|
||||
```
|
||||
|
||||
## LDAP Exernal User Directory {#ldap-external-user-directory}
|
||||
|
||||
In addition to the locally defined users, a remote LDAP server can be used as a source of user definitions. In order to achieve this, specify previously defined LDAP server name (see [LDAP Server Definition](#ldap-server-definition)) in the `ldap` section inside the `users_directories` section of the `config.xml` file.
|
||||
|
||||
At each login attempt, ClickHouse will try to find the user definition locally and authenticate it as usual, but if the user is not defined, ClickHouse will assume it exists in the external LDAP directory, and will try to "bind" to the specified DN at the LDAP server using the provided credentials. If successful, the user will be considered existing and authenticated. The user will be assigned roles from the list specified in the `roles` section. Additionally, LDAP "search" can be performed and results can be transformed and treated as role names and then be assigned to the user if the `role_mapping` section is also configured. All this implies that the SQL-driven [Access Control and Account Management](../access-rights.md#access-control) is enabled and roles are created using the [CREATE ROLE](../../sql-reference/statements/create/role.md#create-role-statement) statement.
|
||||
|
||||
Example (goes into `config.xml`):
|
||||
|
||||
```xml
|
||||
<yandex>
|
||||
<!- ... -->
|
||||
<user_directories>
|
||||
<!- ... -->
|
||||
<ldap>
|
||||
<server>my_ldap_server</server>
|
||||
<roles>
|
||||
<my_local_role1 />
|
||||
<my_local_role2 />
|
||||
</roles>
|
||||
<role_mapping>
|
||||
<base_dn>ou=groups,dc=example,dc=com</base_dn>
|
||||
<scope>subtree</scope>
|
||||
<search_filter>(&(objectClass=groupOfNames)(member={bind_dn}))</search_filter>
|
||||
<attribute>cn</attribute>
|
||||
<prefix>clickhouse_</prefix>
|
||||
</role_mapping>
|
||||
</ldap>
|
||||
</user_directories>
|
||||
</yandex>
|
||||
```
|
||||
|
||||
Note that `my_ldap_server` referred in the `ldap` section inside the `user_directories` section must be a previously
|
||||
defined LDAP server that is configured in the `config.xml` (see [LDAP Server Definition](#ldap-server-definition)).
|
||||
|
||||
Parameters:
|
||||
|
||||
- `server` - one of LDAP server names defined in the `ldap_servers` config section above.
|
||||
This parameter is mandatory and cannot be empty.
|
||||
- `roles` - section with a list of locally defined roles that will be assigned to each user retrieved from the LDAP server.
|
||||
- If no roles are specified here or assigned during role mapping (below), user will not be able
|
||||
to perform any actions after authentication.
|
||||
- `role_mapping` - section with LDAP search parameters and mapping rules.
|
||||
- When a user authenticates, while still bound to LDAP, an LDAP search is performed using `search_filter`
|
||||
and the name of the logged in user. For each entry found during that search, the value of the specified
|
||||
attribute is extracted. For each attribute value that has the specified prefix, the prefix is removed,
|
||||
and the rest of the value becomes the name of a local role defined in ClickHouse,
|
||||
which is expected to be created beforehand by the [CREATE ROLE](../../sql-reference/statements/create/role.md#create-role-statement) statement.
|
||||
- There can be multiple `role_mapping` sections defined inside the same `ldap` section. All of them will be applied.
|
||||
- `base_dn` - template used to construct the base DN for the LDAP search.
|
||||
- The resulting DN will be constructed by replacing all `{user_name}` and `{bind_dn}`
|
||||
substrings of the template with the actual user name and bind DN during each LDAP search.
|
||||
- `scope` - scope of the LDAP search.
|
||||
- Accepted values are: `base`, `one_level`, `children`, `subtree` (the default).
|
||||
- `search_filter` - template used to construct the search filter for the LDAP search.
|
||||
- The resulting filter will be constructed by replacing all `{user_name}`, `{bind_dn}`, and `{base_dn}`
|
||||
substrings of the template with the actual user name, bind DN, and base DN during each LDAP search.
|
||||
- Note, that the special characters must be escaped properly in XML.
|
||||
- `attribute` - attribute name whose values will be returned by the LDAP search.
|
||||
- `prefix` - prefix, that will be expected to be in front of each string in the original
|
||||
list of strings returned by the LDAP search. Prefix will be removed from the original
|
||||
strings and resulting strings will be treated as local role names. Empty, by default.
|
||||
|
@ -139,7 +139,7 @@ You can assign a quotas set for the user. For a detailed description of quotas c
|
||||
|
||||
### user_name/databases {#user-namedatabases}
|
||||
|
||||
In this section, you can you can limit rows that are returned by ClickHouse for `SELECT` queries made by the current user, thus implementing basic row-level security.
|
||||
In this section, you can limit rows that are returned by ClickHouse for `SELECT` queries made by the current user, thus implementing basic row-level security.
|
||||
|
||||
**Example**
|
||||
|
||||
|
@ -312,7 +312,7 @@ Enables or disables parsing enum values as enum ids for TSV input format.
|
||||
Possible values:
|
||||
|
||||
- 0 — Enum values are parsed as values.
|
||||
- 1 — Enum values are parsed as enum IDs
|
||||
- 1 — Enum values are parsed as enum IDs.
|
||||
|
||||
Default value: 0.
|
||||
|
||||
@ -1104,7 +1104,7 @@ The maximum number of replicas for each shard when executing a query. In limited
|
||||
- the sampling key is an expression that is expensive to calculate
|
||||
- the cluster's latency distribution has a long tail, so that querying more servers increases the query's overall latency
|
||||
|
||||
In addition, this setting will produce incorrect results when joins or subqueries are involved, and all tables don't meet certain conditions. See [Distributed Subqueries and max_parallel_replicas](../../sql-reference/operators/in.md/#max_parallel_replica-subqueries) for more details.
|
||||
In addition, this setting will produce incorrect results when joins or subqueries are involved, and all tables don't meet certain conditions. See [Distributed Subqueries and max_parallel_replicas](../../sql-reference/operators/in.md#max_parallel_replica-subqueries) for more details.
|
||||
|
||||
## compile {#compile}
|
||||
|
||||
@ -1956,8 +1956,8 @@ Default value: 16.
|
||||
|
||||
**See Also**
|
||||
|
||||
- [Kafka](../../engines/table-engines/integrations/kafka.md#kafka) engine
|
||||
- [RabbitMQ](../../engines/table-engines/integrations/rabbitmq.md#rabbitmq-engine) engine
|
||||
- [Kafka](../../engines/table-engines/integrations/kafka.md#kafka) engine.
|
||||
- [RabbitMQ](../../engines/table-engines/integrations/rabbitmq.md#rabbitmq-engine) engine.
|
||||
|
||||
## validate_polygons {#validate_polygons}
|
||||
|
||||
@ -2592,6 +2592,18 @@ Possible values:
|
||||
|
||||
Default value: `16`.
|
||||
|
||||
## opentelemetry_start_trace_probability {#opentelemetry-start-trace-probability}
|
||||
|
||||
Sets the probability that the ClickHouse can start a trace for executed queries (if no parent [trace context](https://www.w3.org/TR/trace-context/) is supplied).
|
||||
|
||||
Possible values:
|
||||
|
||||
- 0 — The trace for all executed queries is disabled (if no parent trace context is supplied).
|
||||
- Positive floating-point number in the range [0..1]. For example, if the setting value is `0,5`, ClickHouse can start a trace on average for half of the queries.
|
||||
- 1 — The trace for all executed queries is enabled.
|
||||
|
||||
Default value: `0`.
|
||||
|
||||
## optimize_on_insert {#optimize-on-insert}
|
||||
|
||||
Enables or disables data transformation before the insertion, as if merge was done on this block (according to table engine).
|
||||
@ -2647,3 +2659,23 @@ Result:
|
||||
Note that this setting influences [Materialized view](../../sql-reference/statements/create/view.md#materialized) and [MaterializeMySQL](../../engines/database-engines/materialize-mysql.md) behaviour.
|
||||
|
||||
[Original article](https://clickhouse.tech/docs/en/operations/settings/settings/) <!-- hide -->
|
||||
|
||||
## engine_file_empty_if_not_exists {#engine-file-empty_if-not-exists}
|
||||
|
||||
Allows to select data from a file engine table without file.
|
||||
|
||||
Possible values:
|
||||
- 0 — `SELECT` throws exception.
|
||||
- 1 — `SELECT` returns empty result.
|
||||
|
||||
Default value: `0`.
|
||||
|
||||
## engine_file_truncate_on_insert {#engine-file-truncate-on-insert}
|
||||
|
||||
Enables or disables truncate before insert in file engine tables.
|
||||
|
||||
Possible values:
|
||||
- 0 — Disabled.
|
||||
- 1 — Enabled.
|
||||
|
||||
Default value: `0`.
|
||||
|
@ -14,7 +14,7 @@ Columns:
|
||||
- `initiator` ([String](../../sql-reference/data-types/string.md)) — Node that executed the query.
|
||||
- `query_start_time` ([DateTime](../../sql-reference/data-types/datetime.md)) — Query start time.
|
||||
- `query_finish_time` ([DateTime](../../sql-reference/data-types/datetime.md)) — Query finish time.
|
||||
- `query_duration_ms` ([UInt64](../../sql-reference/data-types/datetime64.md)) — Duration of query execution (in milliseconds).
|
||||
- `query_duration_ms` ([UInt64](../../sql-reference/data-types/int-uint.md)) — Duration of query execution (in milliseconds).
|
||||
- `exception_code` ([Enum8](../../sql-reference/data-types/enum.md)) — Exception code from [ZooKeeper](../../operations/tips.md#zookeeper).
|
||||
|
||||
**Example**
|
||||
|
@ -20,7 +20,7 @@ System tables:
|
||||
|
||||
Most of system tables store their data in RAM. A ClickHouse server creates such system tables at the start.
|
||||
|
||||
Unlike other system tables, the system log tables [metric_log](../../operations/system-tables/metric_log.md), [query_log](../../operations/system-tables/query_log.md), [query_thread_log](../../operations/system-tables/query_thread_log.md), [trace_log](../../operations/system-tables/trace_log.md), [part_log](../../operations/system-tables/part_log.md), crash_log and [text_log](../../operations/system-tables/text_log.md) are served by [MergeTree](../../engines/table-engines/mergetree-family/mergetree.md) table engine and store their data in a storage filesystem by default. If you remove a table from a filesystem, the ClickHouse server creates the empty one again at the time of the next data writing. If system table schema changed in a new release, then ClickHouse renames the current table and creates a new one.
|
||||
Unlike other system tables, the system log tables [metric_log](../../operations/system-tables/metric_log.md), [query_log](../../operations/system-tables/query_log.md), [query_thread_log](../../operations/system-tables/query_thread_log.md), [trace_log](../../operations/system-tables/trace_log.md), [part_log](../../operations/system-tables/part_log.md), [crash_log](../../operations/system-tables/crash-log.md) and [text_log](../../operations/system-tables/text_log.md) are served by [MergeTree](../../engines/table-engines/mergetree-family/mergetree.md) table engine and store their data in a filesystem by default. If you remove a table from a filesystem, the ClickHouse server creates the empty one again at the time of the next data writing. If system table schema changed in a new release, then ClickHouse renames the current table and creates a new one.
|
||||
|
||||
System log tables can be customized by creating a config file with the same name as the table under `/etc/clickhouse-server/config.d/`, or setting corresponding elements in `/etc/clickhouse-server/config.xml`. Elements can be customized are:
|
||||
|
||||
@ -33,7 +33,7 @@ System log tables can be customized by creating a config file with the same name
|
||||
|
||||
An example:
|
||||
|
||||
```
|
||||
```xml
|
||||
<yandex>
|
||||
<query_log>
|
||||
<database>system</database>
|
||||
|
53
docs/en/operations/system-tables/opentelemetry_span_log.md
Normal file
53
docs/en/operations/system-tables/opentelemetry_span_log.md
Normal file
@ -0,0 +1,53 @@
|
||||
# system.opentelemetry_span_log {#system_tables-opentelemetry_span_log}
|
||||
|
||||
Contains information about [trace spans](https://opentracing.io/docs/overview/spans/) for executed queries.
|
||||
|
||||
Columns:
|
||||
|
||||
- `trace_id` ([UUID](../../sql-reference/data-types/uuid.md) — ID of the trace for executed query.
|
||||
|
||||
- `span_id` ([UInt64](../../sql-reference/data-types/int-uint.md)) — ID of the `trace span`.
|
||||
|
||||
- `parent_span_id` ([UInt64](../../sql-reference/data-types/int-uint.md)) — ID of the parent `trace span`.
|
||||
|
||||
- `operation_name` ([String](../../sql-reference/data-types/string.md)) — The name of the operation.
|
||||
|
||||
- `start_time_us` ([UInt64](../../sql-reference/data-types/int-uint.md)) — The start time of the `trace span` (in microseconds).
|
||||
|
||||
- `finish_time_us` ([UInt64](../../sql-reference/data-types/int-uint.md)) — The finish time of the `trace span` (in microseconds).
|
||||
|
||||
- `finish_date` ([Date](../../sql-reference/data-types/date.md)) — The finish date of the `trace span`.
|
||||
|
||||
- `attribute.names` ([Array](../../sql-reference/data-types/array.md)([String](../../sql-reference/data-types/string.md))) — [Attribute](https://opentelemetry.io/docs/go/instrumentation/#attributes) names depending on the `trace span`. They are filled in according to the recommendations in the [OpenTelemetry](https://opentelemetry.io/) standard.
|
||||
|
||||
- `attribute.values` ([Array](../../sql-reference/data-types/array.md)([String](../../sql-reference/data-types/string.md))) — Attribute values depending on the `trace span`. They are filled in according to the recommendations in the `OpenTelemetry` standard.
|
||||
|
||||
**Example**
|
||||
|
||||
Query:
|
||||
|
||||
``` sql
|
||||
SELECT * FROM system.opentelemetry_span_log LIMIT 1 FORMAT Vertical;
|
||||
```
|
||||
|
||||
Result:
|
||||
|
||||
``` text
|
||||
Row 1:
|
||||
──────
|
||||
trace_id: cdab0847-0d62-61d5-4d38-dd65b19a1914
|
||||
span_id: 701487461015578150
|
||||
parent_span_id: 2991972114672045096
|
||||
operation_name: DB::Block DB::InterpreterSelectQuery::getSampleBlockImpl()
|
||||
start_time_us: 1612374594529090
|
||||
finish_time_us: 1612374594529108
|
||||
finish_date: 2021-02-03
|
||||
attribute.names: []
|
||||
attribute.values: []
|
||||
```
|
||||
|
||||
**See Also**
|
||||
|
||||
- [OpenTelemetry](../../operations/opentelemetry.md)
|
||||
|
||||
[Original article](https://clickhouse.tech/docs/en/operations/system_tables/opentelemetry_span_log) <!--hide-->
|
@ -1,9 +1,9 @@
|
||||
---
|
||||
toc_priority: 47
|
||||
toc_title: ClickHouse Update
|
||||
toc_title: ClickHouse Upgrade
|
||||
---
|
||||
|
||||
# ClickHouse Update {#clickhouse-update}
|
||||
# ClickHouse Upgrade {#clickhouse-upgrade}
|
||||
|
||||
If ClickHouse was installed from `deb` packages, execute the following commands on the server:
|
||||
|
||||
@ -16,3 +16,19 @@ $ sudo service clickhouse-server restart
|
||||
If you installed ClickHouse using something other than the recommended `deb` packages, use the appropriate update method.
|
||||
|
||||
ClickHouse does not support a distributed update. The operation should be performed consecutively on each separate server. Do not update all the servers on a cluster simultaneously, or the cluster will be unavailable for some time.
|
||||
|
||||
The upgrade of older version of ClickHouse to specific version:
|
||||
|
||||
As an example:
|
||||
|
||||
`xx.yy.a.b` is a current stable version. The latest stable version could be found [here](https://github.com/ClickHouse/ClickHouse/releases)
|
||||
|
||||
```bash
|
||||
$ sudo apt-get update
|
||||
$ sudo apt-get install clickhouse-server=xx.yy.a.b clickhouse-client=xx.yy.a.b clickhouse-common-static=xx.yy.a.b
|
||||
$ sudo service clickhouse-server restart
|
||||
```
|
||||
|
||||
|
||||
|
||||
|
||||
|
@ -91,6 +91,8 @@ $ clickhouse-local --query "
|
||||
|
||||
Now let’s output memory user for each Unix user:
|
||||
|
||||
Query:
|
||||
|
||||
``` bash
|
||||
$ ps aux | tail -n +2 | awk '{ printf("%s\t%s\n", $1, $4) }' \
|
||||
| clickhouse-local --structure "user String, mem Float64" \
|
||||
@ -98,6 +100,8 @@ $ ps aux | tail -n +2 | awk '{ printf("%s\t%s\n", $1, $4) }' \
|
||||
FROM table GROUP BY user ORDER BY memTotal DESC FORMAT Pretty"
|
||||
```
|
||||
|
||||
Result:
|
||||
|
||||
``` text
|
||||
Read 186 rows, 4.15 KiB in 0.035 sec., 5302 rows/sec., 118.34 KiB/sec.
|
||||
┏━━━━━━━━━━┳━━━━━━━━━━┓
|
||||
|
@ -72,7 +72,7 @@ If an aggregate function doesn’t have input values, with this combinator it re
|
||||
<aggFunction>OrDefault(x)
|
||||
```
|
||||
|
||||
**Parameters**
|
||||
**Arguments**
|
||||
|
||||
- `x` — Aggregate function parameters.
|
||||
|
||||
@ -132,7 +132,7 @@ This combinator converts a result of an aggregate function to the [Nullable](../
|
||||
<aggFunction>OrNull(x)
|
||||
```
|
||||
|
||||
**Parameters**
|
||||
**Arguments**
|
||||
|
||||
- `x` — Aggregate function parameters.
|
||||
|
||||
@ -189,7 +189,7 @@ Lets you divide data into groups, and then separately aggregates the data in tho
|
||||
<aggFunction>Resample(start, end, step)(<aggFunction_params>, resampling_key)
|
||||
```
|
||||
|
||||
**Parameters**
|
||||
**Arguments**
|
||||
|
||||
- `start` — Starting value of the whole required interval for `resampling_key` values.
|
||||
- `stop` — Ending value of the whole required interval for `resampling_key` values. The whole interval doesn’t include the `stop` value `[start, stop)`.
|
||||
|
@ -17,10 +17,13 @@ histogram(number_of_bins)(values)
|
||||
|
||||
The functions uses [A Streaming Parallel Decision Tree Algorithm](http://jmlr.org/papers/volume11/ben-haim10a/ben-haim10a.pdf). The borders of histogram bins are adjusted as new data enters a function. In common case, the widths of bins are not equal.
|
||||
|
||||
**Arguments**
|
||||
|
||||
`values` — [Expression](../../sql-reference/syntax.md#syntax-expressions) resulting in input values.
|
||||
|
||||
**Parameters**
|
||||
|
||||
`number_of_bins` — Upper limit for the number of bins in the histogram. The function automatically calculates the number of bins. It tries to reach the specified number of bins, but if it fails, it uses fewer bins.
|
||||
`values` — [Expression](../../sql-reference/syntax.md#syntax-expressions) resulting in input values.
|
||||
|
||||
**Returned values**
|
||||
|
||||
@ -89,14 +92,16 @@ sequenceMatch(pattern)(timestamp, cond1, cond2, ...)
|
||||
!!! warning "Warning"
|
||||
Events that occur at the same second may lay in the sequence in an undefined order affecting the result.
|
||||
|
||||
**Parameters**
|
||||
|
||||
- `pattern` — Pattern string. See [Pattern syntax](#sequence-function-pattern-syntax).
|
||||
**Arguments**
|
||||
|
||||
- `timestamp` — Column considered to contain time data. Typical data types are `Date` and `DateTime`. You can also use any of the supported [UInt](../../sql-reference/data-types/int-uint.md) data types.
|
||||
|
||||
- `cond1`, `cond2` — Conditions that describe the chain of events. Data type: `UInt8`. You can pass up to 32 condition arguments. The function takes only the events described in these conditions into account. If the sequence contains data that isn’t described in a condition, the function skips them.
|
||||
|
||||
**Parameters**
|
||||
|
||||
- `pattern` — Pattern string. See [Pattern syntax](#sequence-function-pattern-syntax).
|
||||
|
||||
**Returned values**
|
||||
|
||||
- 1, if the pattern is matched.
|
||||
@ -176,14 +181,16 @@ Counts the number of event chains that matched the pattern. The function searche
|
||||
sequenceCount(pattern)(timestamp, cond1, cond2, ...)
|
||||
```
|
||||
|
||||
**Parameters**
|
||||
|
||||
- `pattern` — Pattern string. See [Pattern syntax](#sequence-function-pattern-syntax).
|
||||
**Arguments**
|
||||
|
||||
- `timestamp` — Column considered to contain time data. Typical data types are `Date` and `DateTime`. You can also use any of the supported [UInt](../../sql-reference/data-types/int-uint.md) data types.
|
||||
|
||||
- `cond1`, `cond2` — Conditions that describe the chain of events. Data type: `UInt8`. You can pass up to 32 condition arguments. The function takes only the events described in these conditions into account. If the sequence contains data that isn’t described in a condition, the function skips them.
|
||||
|
||||
**Parameters**
|
||||
|
||||
- `pattern` — Pattern string. See [Pattern syntax](#sequence-function-pattern-syntax).
|
||||
|
||||
**Returned values**
|
||||
|
||||
- Number of non-overlapping event chains that are matched.
|
||||
@ -239,14 +246,17 @@ The function works according to the algorithm:
|
||||
windowFunnel(window, [mode])(timestamp, cond1, cond2, ..., condN)
|
||||
```
|
||||
|
||||
**Parameters**
|
||||
**Arguments**
|
||||
|
||||
- `window` — Length of the sliding window. The unit of `window` depends on the timestamp itself and varies. Determined using the expression `timestamp of cond2 <= timestamp of cond1 + window`.
|
||||
- `mode` - It is an optional argument.
|
||||
- `'strict'` - When the `'strict'` is set, the windowFunnel() applies conditions only for the unique values.
|
||||
- `timestamp` — Name of the column containing the timestamp. Data types supported: [Date](../../sql-reference/data-types/date.md), [DateTime](../../sql-reference/data-types/datetime.md#data_type-datetime) and other unsigned integer types (note that even though timestamp supports the `UInt64` type, it’s value can’t exceed the Int64 maximum, which is 2^63 - 1).
|
||||
- `cond` — Conditions or data describing the chain of events. [UInt8](../../sql-reference/data-types/int-uint.md).
|
||||
|
||||
**Parameters**
|
||||
|
||||
- `window` — Length of the sliding window. The unit of `window` depends on the `timestamp` itself and varies. Determined using the expression `timestamp of cond2 <= timestamp of cond1 + window`.
|
||||
- `mode` - It is an optional argument.
|
||||
- `'strict'` - When the `'strict'` is set, the windowFunnel() applies conditions only for the unique values.
|
||||
|
||||
**Returned value**
|
||||
|
||||
The maximum number of consecutive triggered conditions from the chain within the sliding time window.
|
||||
@ -324,7 +334,7 @@ The conditions, except the first, apply in pairs: the result of the second will
|
||||
retention(cond1, cond2, ..., cond32);
|
||||
```
|
||||
|
||||
**Parameters**
|
||||
**Arguments**
|
||||
|
||||
- `cond` — an expression that returns a `UInt8` result (1 or 0).
|
||||
|
||||
|
@ -20,7 +20,7 @@ or
|
||||
argMax(tuple(arg, val))
|
||||
```
|
||||
|
||||
**Parameters**
|
||||
**Arguments**
|
||||
|
||||
- `arg` — Argument.
|
||||
- `val` — Value.
|
||||
@ -52,15 +52,15 @@ Input table:
|
||||
Query:
|
||||
|
||||
``` sql
|
||||
SELECT argMax(user, salary), argMax(tuple(user, salary)) FROM salary;
|
||||
SELECT argMax(user, salary), argMax(tuple(user, salary), salary), argMax(tuple(user, salary)) FROM salary;
|
||||
```
|
||||
|
||||
Result:
|
||||
|
||||
``` text
|
||||
┌─argMax(user, salary)─┬─argMax(tuple(user, salary))─┐
|
||||
│ director │ ('director',5000) │
|
||||
└──────────────────────┴─────────────────────────────┘
|
||||
┌─argMax(user, salary)─┬─argMax(tuple(user, salary), salary)─┬─argMax(tuple(user, salary))─┐
|
||||
│ director │ ('director',5000) │ ('director',5000) │
|
||||
└──────────────────────┴─────────────────────────────────────┴─────────────────────────────┘
|
||||
```
|
||||
|
||||
[Original article](https://clickhouse.tech/docs/en/sql-reference/aggregate-functions/reference/argmax/) <!--hide-->
|
||||
|
@ -20,7 +20,7 @@ or
|
||||
argMin(tuple(arg, val))
|
||||
```
|
||||
|
||||
**Parameters**
|
||||
**Arguments**
|
||||
|
||||
- `arg` — Argument.
|
||||
- `val` — Value.
|
||||
|
@ -9,10 +9,10 @@ Calculates the arithmetic mean.
|
||||
**Syntax**
|
||||
|
||||
``` sql
|
||||
avgWeighted(x)
|
||||
avg(x)
|
||||
```
|
||||
|
||||
**Parameter**
|
||||
**Arguments**
|
||||
|
||||
- `x` — Values.
|
||||
|
||||
|
@ -12,7 +12,7 @@ Calculates the [weighted arithmetic mean](https://en.wikipedia.org/wiki/Weighted
|
||||
avgWeighted(x, weight)
|
||||
```
|
||||
|
||||
**Parameters**
|
||||
**Arguments**
|
||||
|
||||
- `x` — Values.
|
||||
- `weight` — Weights of the values.
|
||||
|
@ -10,7 +10,7 @@ ClickHouse supports the following syntaxes for `count`:
|
||||
- `count(expr)` or `COUNT(DISTINCT expr)`.
|
||||
- `count()` or `COUNT(*)`. The `count()` syntax is ClickHouse-specific.
|
||||
|
||||
**Parameters**
|
||||
**Arguments**
|
||||
|
||||
The function can take:
|
||||
|
||||
|
@ -17,7 +17,7 @@ If in one query several values are inserted into the same position, the function
|
||||
- If a query is executed in a single thread, the first one of the inserted values is used.
|
||||
- If a query is executed in multiple threads, the resulting value is an undetermined one of the inserted values.
|
||||
|
||||
**Parameters**
|
||||
**Arguments**
|
||||
|
||||
- `x` — Value to be inserted. [Expression](../../../sql-reference/syntax.md#syntax-expressions) resulting in one of the [supported data types](../../../sql-reference/data-types/index.md).
|
||||
- `pos` — Position at which the specified element `x` is to be inserted. Index numbering in the array starts from zero. [UInt32](../../../sql-reference/data-types/int-uint.md#uint-ranges).
|
||||
|
@ -13,7 +13,7 @@ groupArrayMovingAvg(window_size)(numbers_for_summing)
|
||||
|
||||
The function can take the window size as a parameter. If left unspecified, the function takes the window size equal to the number of rows in the column.
|
||||
|
||||
**Parameters**
|
||||
**Arguments**
|
||||
|
||||
- `numbers_for_summing` — [Expression](../../../sql-reference/syntax.md#syntax-expressions) resulting in a numeric data type value.
|
||||
- `window_size` — Size of the calculation window.
|
||||
|
@ -13,7 +13,7 @@ groupArrayMovingSum(window_size)(numbers_for_summing)
|
||||
|
||||
The function can take the window size as a parameter. If left unspecified, the function takes the window size equal to the number of rows in the column.
|
||||
|
||||
**Parameters**
|
||||
**Arguments**
|
||||
|
||||
- `numbers_for_summing` — [Expression](../../../sql-reference/syntax.md#syntax-expressions) resulting in a numeric data type value.
|
||||
- `window_size` — Size of the calculation window.
|
||||
|
@ -12,7 +12,7 @@ Creates an array of sample argument values. The size of the resulting array is l
|
||||
groupArraySample(max_size[, seed])(x)
|
||||
```
|
||||
|
||||
**Parameters**
|
||||
**Arguments**
|
||||
|
||||
- `max_size` — Maximum size of the resulting array. [UInt64](../../data-types/int-uint.md).
|
||||
- `seed` — Seed for the random number generator. Optional. [UInt64](../../data-types/int-uint.md). Default value: `123456`.
|
||||
|
@ -10,7 +10,7 @@ Applies bitwise `AND` for series of numbers.
|
||||
groupBitAnd(expr)
|
||||
```
|
||||
|
||||
**Parameters**
|
||||
**Arguments**
|
||||
|
||||
`expr` – An expression that results in `UInt*` type.
|
||||
|
||||
|
@ -10,7 +10,7 @@ Bitmap or Aggregate calculations from a unsigned integer column, return cardinal
|
||||
groupBitmap(expr)
|
||||
```
|
||||
|
||||
**Parameters**
|
||||
**Arguments**
|
||||
|
||||
`expr` – An expression that results in `UInt*` type.
|
||||
|
||||
|
@ -10,7 +10,7 @@ Calculations the AND of a bitmap column, return cardinality of type UInt64, if a
|
||||
groupBitmapAnd(expr)
|
||||
```
|
||||
|
||||
**Parameters**
|
||||
**Arguments**
|
||||
|
||||
`expr` – An expression that results in `AggregateFunction(groupBitmap, UInt*)` type.
|
||||
|
||||
|
@ -10,7 +10,7 @@ Calculations the OR of a bitmap column, return cardinality of type UInt64, if ad
|
||||
groupBitmapOr(expr)
|
||||
```
|
||||
|
||||
**Parameters**
|
||||
**Arguments**
|
||||
|
||||
`expr` – An expression that results in `AggregateFunction(groupBitmap, UInt*)` type.
|
||||
|
||||
|
@ -10,7 +10,7 @@ Calculations the XOR of a bitmap column, return cardinality of type UInt64, if a
|
||||
groupBitmapOr(expr)
|
||||
```
|
||||
|
||||
**Parameters**
|
||||
**Arguments**
|
||||
|
||||
`expr` – An expression that results in `AggregateFunction(groupBitmap, UInt*)` type.
|
||||
|
||||
|
@ -10,7 +10,7 @@ Applies bitwise `OR` for series of numbers.
|
||||
groupBitOr(expr)
|
||||
```
|
||||
|
||||
**Parameters**
|
||||
**Arguments**
|
||||
|
||||
`expr` – An expression that results in `UInt*` type.
|
||||
|
||||
|
@ -10,7 +10,7 @@ Applies bitwise `XOR` for series of numbers.
|
||||
groupBitXor(expr)
|
||||
```
|
||||
|
||||
**Parameters**
|
||||
**Arguments**
|
||||
|
||||
`expr` – An expression that results in `UInt*` type.
|
||||
|
||||
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue
Block a user