Merge remote-tracking branch 'origin' into mute-integration-debug-fails

This commit is contained in:
Yatsishin Ilya 2021-08-20 18:59:16 +03:00
commit d9e782d379
676 changed files with 13767 additions and 6207 deletions

View File

@ -1,5 +1,8 @@
### ClickHouse release v21.8, 2021-08-12 ### ClickHouse release v21.8, 2021-08-12
#### Upgrade Notes
* New version is using `Map` data type for system logs tables (`system.query_log`, `system.query_thread_log`, `system.processes`, `system.opentelemetry_span_log`). These tables will be auto-created with new data types. Virtual columns are created to support old queries. Closes [#18698](https://github.com/ClickHouse/ClickHouse/issues/18698). [#23934](https://github.com/ClickHouse/ClickHouse/pull/23934), [#25773](https://github.com/ClickHouse/ClickHouse/pull/25773) ([hexiaoting](https://github.com/hexiaoting), [sundy-li](https://github.com/sundy-li), [Maksim Kita](https://github.com/kitaisreal)). If you want to *downgrade* from version 21.8 to older versions, you will need to cleanup system tables with logs manually. Look at `/var/lib/clickhouse/data/system/*_log`.
#### New Features #### New Features
* Add support for a part of SQL/JSON standard. [#24148](https://github.com/ClickHouse/ClickHouse/pull/24148) ([l1tsolaiki](https://github.com/l1tsolaiki), [Kseniia Sumarokova](https://github.com/kssenii)). * Add support for a part of SQL/JSON standard. [#24148](https://github.com/ClickHouse/ClickHouse/pull/24148) ([l1tsolaiki](https://github.com/l1tsolaiki), [Kseniia Sumarokova](https://github.com/kssenii)).

View File

@ -593,7 +593,23 @@ macro (add_executable target)
# disabled for TSAN and gcc since libtsan.a provides overrides too # disabled for TSAN and gcc since libtsan.a provides overrides too
if (TARGET clickhouse_new_delete) if (TARGET clickhouse_new_delete)
# operator::new/delete for executables (MemoryTracker stuff) # operator::new/delete for executables (MemoryTracker stuff)
target_link_libraries (${target} PRIVATE clickhouse_new_delete ${MALLOC_LIBRARIES}) target_link_libraries (${target} PRIVATE clickhouse_new_delete)
endif()
# In case of static jemalloc, because zone_register() is located in zone.c and
# is never used outside (it is declared as constructor) it is omitted
# by the linker, and so jemalloc will not be registered as system
# allocator under osx [1], and clickhouse will SIGSEGV.
#
# [1]: https://github.com/jemalloc/jemalloc/issues/708
#
# About symbol name:
# - _zone_register not zone_register due to Mach-O binary format,
# - _je_zone_register due to JEMALLOC_PRIVATE_NAMESPACE=je_ under OS X.
# - but jemalloc-cmake does not run private_namespace.sh
# so symbol name should be _zone_register
if (ENABLE_JEMALLOC AND MAKE_STATIC_LIBRARIES AND OS_DARWIN)
set_property(TARGET ${target} APPEND PROPERTY LINK_OPTIONS -u_zone_register)
endif() endif()
endif() endif()
endmacro() endmacro()

View File

@ -152,7 +152,7 @@ namespace wide
template <size_t Bits, typename Signed> template <size_t Bits, typename Signed>
struct integer<Bits, Signed>::_impl struct integer<Bits, Signed>::_impl
{ {
static constexpr size_t _Bits = Bits; static constexpr size_t _bits = Bits;
static constexpr const unsigned byte_count = Bits / 8; static constexpr const unsigned byte_count = Bits / 8;
static constexpr const unsigned item_count = byte_count / sizeof(base_type); static constexpr const unsigned item_count = byte_count / sizeof(base_type);
static constexpr const unsigned base_bits = sizeof(base_type) * 8; static constexpr const unsigned base_bits = sizeof(base_type) * 8;
@ -614,8 +614,8 @@ public:
else else
{ {
static_assert(IsWideInteger<T>::value); static_assert(IsWideInteger<T>::value);
return std::common_type_t<integer<Bits, Signed>, integer<T::_impl::_Bits, Signed>>::_impl::operator_plus( return std::common_type_t<integer<Bits, Signed>, integer<T::_impl::_bits, Signed>>::_impl::operator_plus(
integer<T::_impl::_Bits, Signed>(lhs), rhs); integer<T::_impl::_bits, Signed>(lhs), rhs);
} }
} }
@ -632,8 +632,8 @@ public:
else else
{ {
static_assert(IsWideInteger<T>::value); static_assert(IsWideInteger<T>::value);
return std::common_type_t<integer<Bits, Signed>, integer<T::_impl::_Bits, Signed>>::_impl::operator_minus( return std::common_type_t<integer<Bits, Signed>, integer<T::_impl::_bits, Signed>>::_impl::operator_minus(
integer<T::_impl::_Bits, Signed>(lhs), rhs); integer<T::_impl::_bits, Signed>(lhs), rhs);
} }
} }
@ -857,7 +857,7 @@ public:
else else
{ {
static_assert(IsWideInteger<T>::value); static_assert(IsWideInteger<T>::value);
return std::common_type_t<integer<Bits, Signed>, integer<T::_impl::_Bits, Signed>>::operator_slash(T(lhs), rhs); return std::common_type_t<integer<Bits, Signed>, integer<T::_impl::_bits, Signed>>::operator_slash(T(lhs), rhs);
} }
} }
@ -877,7 +877,7 @@ public:
else else
{ {
static_assert(IsWideInteger<T>::value); static_assert(IsWideInteger<T>::value);
return std::common_type_t<integer<Bits, Signed>, integer<T::_impl::_Bits, Signed>>::operator_percent(T(lhs), rhs); return std::common_type_t<integer<Bits, Signed>, integer<T::_impl::_bits, Signed>>::operator_percent(T(lhs), rhs);
} }
} }

View File

@ -12,6 +12,7 @@
#include <Common/SymbolIndex.h> #include <Common/SymbolIndex.h>
#include <Common/StackTrace.h> #include <Common/StackTrace.h>
#include <Common/getNumberOfPhysicalCPUCores.h> #include <Common/getNumberOfPhysicalCPUCores.h>
#include <Core/ServerUUID.h>
#if !defined(ARCADIA_BUILD) #if !defined(ARCADIA_BUILD)
# include "Common/config_version.h" # include "Common/config_version.h"
@ -38,6 +39,13 @@ void setExtras()
if (!anonymize) if (!anonymize)
sentry_set_extra("server_name", sentry_value_new_string(getFQDNOrHostName().c_str())); sentry_set_extra("server_name", sentry_value_new_string(getFQDNOrHostName().c_str()));
DB::UUID server_uuid = DB::ServerUUID::get();
if (server_uuid != DB::UUIDHelpers::Nil)
{
std::string server_uuid_str = DB::toString(server_uuid);
sentry_set_extra("server_uuid", sentry_value_new_string(server_uuid_str.c_str()));
}
sentry_set_tag("version", VERSION_STRING); sentry_set_tag("version", VERSION_STRING);
sentry_set_extra("version_githash", sentry_value_new_string(VERSION_GITHASH)); sentry_set_extra("version_githash", sentry_value_new_string(VERSION_GITHASH));
sentry_set_extra("version_describe", sentry_value_new_string(VERSION_DESCRIBE)); sentry_set_extra("version_describe", sentry_value_new_string(VERSION_DESCRIBE));

View File

@ -1,5 +1,4 @@
#include <sys/auxv.h> #include <sys/auxv.h>
#include "atomic.h"
#include <unistd.h> // __environ #include <unistd.h> // __environ
#include <errno.h> #include <errno.h>
@ -18,7 +17,18 @@ static size_t __find_auxv(unsigned long type)
return (size_t) -1; return (size_t) -1;
} }
unsigned long __getauxval(unsigned long type) __attribute__((constructor)) static void __auxv_init()
{
size_t i;
for (i = 0; __environ[i]; i++);
__auxv = (unsigned long *) (__environ + i + 1);
size_t secure_idx = __find_auxv(AT_SECURE);
if (secure_idx != ((size_t) -1))
__auxv_secure = __auxv[secure_idx];
}
unsigned long getauxval(unsigned long type)
{ {
if (type == AT_SECURE) if (type == AT_SECURE)
return __auxv_secure; return __auxv_secure;
@ -33,38 +43,3 @@ unsigned long __getauxval(unsigned long type)
errno = ENOENT; errno = ENOENT;
return 0; return 0;
} }
static void * volatile getauxval_func;
static unsigned long __auxv_init(unsigned long type)
{
if (!__environ)
{
// __environ is not initialized yet so we can't initialize __auxv right now.
// That's normally occurred only when getauxval() is called from some sanitizer's internal code.
errno = ENOENT;
return 0;
}
// Initialize __auxv and __auxv_secure.
size_t i;
for (i = 0; __environ[i]; i++);
__auxv = (unsigned long *) (__environ + i + 1);
size_t secure_idx = __find_auxv(AT_SECURE);
if (secure_idx != ((size_t) -1))
__auxv_secure = __auxv[secure_idx];
// Now we've initialized __auxv, next time getauxval() will only call __get_auxval().
a_cas_p(&getauxval_func, (void *)__auxv_init, (void *)__getauxval);
return __getauxval(type);
}
// First time getauxval() will call __auxv_init().
static void * volatile getauxval_func = (void *)__auxv_init;
unsigned long getauxval(unsigned long type)
{
return ((unsigned long (*)(unsigned long))getauxval_func)(type);
}

View File

@ -2,11 +2,11 @@
# NOTE: has nothing common with DBMS_TCP_PROTOCOL_VERSION, # NOTE: has nothing common with DBMS_TCP_PROTOCOL_VERSION,
# only DBMS_TCP_PROTOCOL_VERSION should be incremented on protocol changes. # only DBMS_TCP_PROTOCOL_VERSION should be incremented on protocol changes.
SET(VERSION_REVISION 54454) SET(VERSION_REVISION 54455)
SET(VERSION_MAJOR 21) SET(VERSION_MAJOR 21)
SET(VERSION_MINOR 9) SET(VERSION_MINOR 10)
SET(VERSION_PATCH 1) SET(VERSION_PATCH 1)
SET(VERSION_GITHASH f063e44131a048ba2d9af8075f03700fd5ec3e69) SET(VERSION_GITHASH 09df5018f95edcd0f759d4689ac5d029dd400c2a)
SET(VERSION_DESCRIBE v21.9.1.7770-prestable) SET(VERSION_DESCRIBE v21.10.1.1-testing)
SET(VERSION_STRING 21.9.1.7770) SET(VERSION_STRING 21.10.1.1)
# end of autochange # end of autochange

View File

@ -1,9 +1,10 @@
# Disabled under OSX until https://github.com/ClickHouse/ClickHouse/issues/27568 is fixed
if (SANITIZE OR NOT ( if (SANITIZE OR NOT (
((OS_LINUX OR OS_FREEBSD) AND (ARCH_AMD64 OR ARCH_ARM OR ARCH_PPC64LE)))) ((OS_LINUX OR OS_FREEBSD) AND (ARCH_AMD64 OR ARCH_ARM OR ARCH_PPC64LE)) OR
(OS_DARWIN AND (CMAKE_BUILD_TYPE STREQUAL "RelWithDebInfo" OR CMAKE_BUILD_TYPE STREQUAL "Debug"))
))
if (ENABLE_JEMALLOC) if (ENABLE_JEMALLOC)
message (${RECONFIGURE_MESSAGE_LEVEL} message (${RECONFIGURE_MESSAGE_LEVEL}
"jemalloc is disabled implicitly: it doesn't work with sanitizers and can only be used with x86_64, aarch64, or ppc64le Linux or FreeBSD builds") "jemalloc is disabled implicitly: it doesn't work with sanitizers and can only be used with x86_64, aarch64, or ppc64le Linux or FreeBSD builds and RelWithDebInfo macOS builds.")
endif () endif ()
set (ENABLE_JEMALLOC OFF) set (ENABLE_JEMALLOC OFF)
else () else ()
@ -138,9 +139,5 @@ target_compile_options(jemalloc PRIVATE -Wno-redundant-decls)
target_compile_options(jemalloc PRIVATE -D_GNU_SOURCE) target_compile_options(jemalloc PRIVATE -D_GNU_SOURCE)
set_property(TARGET jemalloc APPEND PROPERTY INTERFACE_COMPILE_DEFINITIONS USE_JEMALLOC=1) set_property(TARGET jemalloc APPEND PROPERTY INTERFACE_COMPILE_DEFINITIONS USE_JEMALLOC=1)
if (MAKE_STATIC_LIBRARIES)
# To detect whether we need to register jemalloc for osx as default zone.
set_property(TARGET jemalloc APPEND PROPERTY INTERFACE_COMPILE_DEFINITIONS BUNDLED_STATIC_JEMALLOC=1)
endif()
message (STATUS "Using jemalloc") message (STATUS "Using jemalloc")

2
contrib/librdkafka vendored

@ -1 +1 @@
Subproject commit 43491d33ca2826531d1e3cae70d4bf1e5249e3c9 Subproject commit b8554f1682062c85ba519eb54ef2f90e02b812cb

4
debian/changelog vendored
View File

@ -1,5 +1,5 @@
clickhouse (21.9.1.1) unstable; urgency=low clickhouse (21.10.1.1) unstable; urgency=low
* Modified source code * Modified source code
-- clickhouse-release <clickhouse-release@yandex-team.ru> Sat, 10 Jul 2021 08:22:49 +0300 -- clickhouse-release <clickhouse-release@yandex-team.ru> Sat, 17 Jul 2021 08:45:03 +0300

View File

@ -1,7 +1,7 @@
FROM ubuntu:18.04 FROM ubuntu:18.04
ARG repository="deb https://repo.clickhouse.tech/deb/stable/ main/" ARG repository="deb https://repo.clickhouse.tech/deb/stable/ main/"
ARG version=21.9.1.* ARG version=21.10.1.*
RUN apt-get update \ RUN apt-get update \
&& apt-get install --yes --no-install-recommends \ && apt-get install --yes --no-install-recommends \

View File

@ -1,7 +1,7 @@
FROM ubuntu:20.04 FROM ubuntu:20.04
ARG repository="deb https://repo.clickhouse.tech/deb/stable/ main/" ARG repository="deb https://repo.clickhouse.tech/deb/stable/ main/"
ARG version=21.9.1.* ARG version=21.10.1.*
ARG gosu_ver=1.10 ARG gosu_ver=1.10
# set non-empty deb_location_url url to create a docker image # set non-empty deb_location_url url to create a docker image

View File

@ -1,7 +1,7 @@
FROM ubuntu:18.04 FROM ubuntu:18.04
ARG repository="deb https://repo.clickhouse.tech/deb/stable/ main/" ARG repository="deb https://repo.clickhouse.tech/deb/stable/ main/"
ARG version=21.9.1.* ARG version=21.10.1.*
RUN apt-get update && \ RUN apt-get update && \
apt-get install -y apt-transport-https dirmngr && \ apt-get install -y apt-transport-https dirmngr && \

View File

@ -641,6 +641,7 @@ create view partial_query_times as select * from
-- Report for partial queries that we could only run on the new server (e.g. -- Report for partial queries that we could only run on the new server (e.g.
-- queries with new functions added in the tested PR). -- queries with new functions added in the tested PR).
create table partial_queries_report engine File(TSV, 'report/partial-queries-report.tsv') create table partial_queries_report engine File(TSV, 'report/partial-queries-report.tsv')
settings output_format_decimal_trailing_zeros = 1
as select toDecimal64(time_median, 3) time, as select toDecimal64(time_median, 3) time,
toDecimal64(time_stddev / time_median, 3) relative_time_stddev, toDecimal64(time_stddev / time_median, 3) relative_time_stddev,
test, query_index, query_display_name test, query_index, query_display_name
@ -713,8 +714,9 @@ create table queries engine File(TSVWithNamesAndTypes, 'report/queries.tsv')
order by test, query_index, metric_name order by test, query_index, metric_name
; ;
create table changed_perf_report engine File(TSV, 'report/changed-perf.tsv') as create table changed_perf_report engine File(TSV, 'report/changed-perf.tsv')
with settings output_format_decimal_trailing_zeros = 1
as with
-- server_time is sometimes reported as zero (if it's less than 1 ms), -- server_time is sometimes reported as zero (if it's less than 1 ms),
-- so we have to work around this to not get an error about conversion -- so we have to work around this to not get an error about conversion
-- of NaN to decimal. -- of NaN to decimal.
@ -730,8 +732,9 @@ create table changed_perf_report engine File(TSV, 'report/changed-perf.tsv') as
changed_fail, test, query_index, query_display_name changed_fail, test, query_index, query_display_name
from queries where changed_show order by abs(diff) desc; from queries where changed_show order by abs(diff) desc;
create table unstable_queries_report engine File(TSV, 'report/unstable-queries.tsv') as create table unstable_queries_report engine File(TSV, 'report/unstable-queries.tsv')
select settings output_format_decimal_trailing_zeros = 1
as select
toDecimal64(left, 3), toDecimal64(right, 3), toDecimal64(diff, 3), toDecimal64(left, 3), toDecimal64(right, 3), toDecimal64(diff, 3),
toDecimal64(stat_threshold, 3), unstable_fail, test, query_index, query_display_name toDecimal64(stat_threshold, 3), unstable_fail, test, query_index, query_display_name
from queries where unstable_show order by stat_threshold desc; from queries where unstable_show order by stat_threshold desc;
@ -761,8 +764,9 @@ create view total_speedup as
from test_speedup from test_speedup
; ;
create table test_perf_changes_report engine File(TSV, 'report/test-perf-changes.tsv') as create table test_perf_changes_report engine File(TSV, 'report/test-perf-changes.tsv')
with settings output_format_decimal_trailing_zeros = 1
as with
(times_speedup >= 1 (times_speedup >= 1
? '-' || toString(toDecimal64(times_speedup, 3)) || 'x' ? '-' || toString(toDecimal64(times_speedup, 3)) || 'x'
: '+' || toString(toDecimal64(1 / times_speedup, 3)) || 'x') : '+' || toString(toDecimal64(1 / times_speedup, 3)) || 'x')
@ -788,8 +792,9 @@ create view total_client_time_per_query as select *
from file('analyze/client-times.tsv', TSV, from file('analyze/client-times.tsv', TSV,
'test text, query_index int, client float, server float'); 'test text, query_index int, client float, server float');
create table slow_on_client_report engine File(TSV, 'report/slow-on-client.tsv') as create table slow_on_client_report engine File(TSV, 'report/slow-on-client.tsv')
select client, server, toDecimal64(client/server, 3) p, settings output_format_decimal_trailing_zeros = 1
as select client, server, toDecimal64(client/server, 3) p,
test, query_display_name test, query_display_name
from total_client_time_per_query left join query_display_names using (test, query_index) from total_client_time_per_query left join query_display_names using (test, query_index)
where p > toDecimal64(1.02, 3) order by p desc; where p > toDecimal64(1.02, 3) order by p desc;
@ -874,8 +879,9 @@ create view test_times_view_total as
from test_times_view from test_times_view
; ;
create table test_times_report engine File(TSV, 'report/test-times.tsv') as create table test_times_report engine File(TSV, 'report/test-times.tsv')
select settings output_format_decimal_trailing_zeros = 1
as select
test, test,
toDecimal64(real, 3), toDecimal64(real, 3),
toDecimal64(total_client_time, 3), toDecimal64(total_client_time, 3),
@ -893,8 +899,9 @@ create table test_times_report engine File(TSV, 'report/test-times.tsv') as
; ;
-- report for all queries page, only main metric -- report for all queries page, only main metric
create table all_tests_report engine File(TSV, 'report/all-queries.tsv') as create table all_tests_report engine File(TSV, 'report/all-queries.tsv')
with settings output_format_decimal_trailing_zeros = 1
as with
-- server_time is sometimes reported as zero (if it's less than 1 ms), -- server_time is sometimes reported as zero (if it's less than 1 ms),
-- so we have to work around this to not get an error about conversion -- so we have to work around this to not get an error about conversion
-- of NaN to decimal. -- of NaN to decimal.
@ -1057,9 +1064,10 @@ create table unstable_run_traces engine File(TSVWithNamesAndTypes,
; ;
create table metric_devation engine File(TSVWithNamesAndTypes, create table metric_devation engine File(TSVWithNamesAndTypes,
'report/metric-deviation.$version.tsv') as 'report/metric-deviation.$version.tsv')
settings output_format_decimal_trailing_zeros = 1
-- first goes the key used to split the file with grep -- first goes the key used to split the file with grep
select test, query_index, query_display_name, as select test, query_index, query_display_name,
toDecimal64(d, 3) d, q, metric toDecimal64(d, 3) d, q, metric
from ( from (
select select
@ -1187,8 +1195,9 @@ create table metrics engine File(TSV, 'metrics/metrics.tsv') as
; ;
-- Show metrics that have changed -- Show metrics that have changed
create table changes engine File(TSV, 'metrics/changes.tsv') as create table changes engine File(TSV, 'metrics/changes.tsv')
select metric, left, right, settings output_format_decimal_trailing_zeros = 1
as select metric, left, right,
toDecimal64(diff, 3), toDecimal64(times_diff, 3) toDecimal64(diff, 3), toDecimal64(times_diff, 3)
from ( from (
select metric, median(left) as left, median(right) as right, select metric, median(left) as left, median(right) as right,

View File

@ -13,7 +13,7 @@ left_sha=$2
# right_pr=$3 not used for now # right_pr=$3 not used for now
right_sha=$4 right_sha=$4
datasets=${CHPC_DATASETS:-"hits1 hits10 hits100 values"} datasets=${CHPC_DATASETS-"hits1 hits10 hits100 values"}
declare -A dataset_paths declare -A dataset_paths
dataset_paths["hits10"]="https://s3.mds.yandex.net/clickhouse-private-datasets/hits_10m_single/partitions/hits_10m_single.tar" dataset_paths["hits10"]="https://s3.mds.yandex.net/clickhouse-private-datasets/hits_10m_single/partitions/hits_10m_single.tar"

View File

@ -127,6 +127,15 @@ export PATH
export REF_PR export REF_PR
export REF_SHA export REF_SHA
# Try to collect some core dumps. I've seen two patterns in Sandbox:
# 1) |/home/zomb-sandbox/venv/bin/python /home/zomb-sandbox/client/sandbox/bin/coredumper.py %e %p %g %u %s %P %c
# Not sure what this script does (puts them to sandbox resources, logs some messages?),
# and it's not accessible from inside docker anyway.
# 2) something like %e.%p.core.dmp. The dump should end up in the workspace directory.
# At least we remove the ulimit and then try to pack some common file names into output.
ulimit -c unlimited
cat /proc/sys/kernel/core_pattern
# Start the main comparison script. # Start the main comparison script.
{ \ { \
time ../download.sh "$REF_PR" "$REF_SHA" "$PR_TO_TEST" "$SHA_TO_TEST" && \ time ../download.sh "$REF_PR" "$REF_SHA" "$PR_TO_TEST" "$SHA_TO_TEST" && \
@ -144,8 +153,11 @@ done
dmesg -T > dmesg.log dmesg -T > dmesg.log
ls -lath
7z a '-x!*/tmp' /output/output.7z ./*.{log,tsv,html,txt,rep,svg,columns} \ 7z a '-x!*/tmp' /output/output.7z ./*.{log,tsv,html,txt,rep,svg,columns} \
{right,left}/{performance,scripts} {{right,left}/db,db0}/preprocessed_configs \ {right,left}/{performance,scripts} {{right,left}/db,db0}/preprocessed_configs \
report analyze benchmark metrics report analyze benchmark metrics \
./*.core.dmp ./*.core
cp compare.log /output cp compare.log /output

View File

@ -105,10 +105,6 @@ def process_result(result_path):
description += ", skipped: {}".format(skipped) description += ", skipped: {}".format(skipped)
if unknown != 0: if unknown != 0:
description += ", unknown: {}".format(unknown) description += ", unknown: {}".format(unknown)
# Temporary green for tests with DatabaseReplicated:
if 1 == int(os.environ.get('USE_DATABASE_REPLICATED', 0)):
state = "success"
else: else:
state = "failure" state = "failure"
description = "Output log doesn't exist" description = "Output log doesn't exist"

View File

@ -1,3 +1,8 @@
---
toc_priority: 36
toc_title: Replicated
---
# [experimental] Replicated {#replicated} # [experimental] Replicated {#replicated}
The engine is based on the [Atomic](../../engines/database-engines/atomic.md) engine. It supports replication of metadata via DDL log being written to ZooKeeper and executed on all of the replicas for a given database. The engine is based on the [Atomic](../../engines/database-engines/atomic.md) engine. It supports replication of metadata via DDL log being written to ZooKeeper and executed on all of the replicas for a given database.

View File

@ -38,9 +38,7 @@ A table for the Graphite data should have the following columns for the followin
- Value of the metric. Data type: any numeric. - Value of the metric. Data type: any numeric.
- Version of the metric. Data type: any numeric. - Version of the metric. Data type: any numeric (ClickHouse saves the rows with the highest version or the last written if versions are the same. Other rows are deleted during the merge of data parts).
ClickHouse saves the rows with the highest version or the last written if versions are the same. Other rows are deleted during the merge of data parts.
The names of these columns should be set in the rollup configuration. The names of these columns should be set in the rollup configuration.
@ -132,7 +130,7 @@ Fields for `pattern` and `default` sections:
- `regexp` A pattern for the metric name. - `regexp` A pattern for the metric name.
- `age` The minimum age of the data in seconds. - `age` The minimum age of the data in seconds.
- `precision` How precisely to define the age of the data in seconds. Should be a divisor for 86400 (seconds in a day). - `precision` How precisely to define the age of the data in seconds. Should be a divisor for 86400 (seconds in a day).
- `function` The name of the aggregating function to apply to data whose age falls within the range `[age, age + precision]`. - `function` The name of the aggregating function to apply to data whose age falls within the range `[age, age + precision]`. Accepted functions: min / max / any / avg. The average is calculated imprecisely, like the average of the averages.
### Configuration Example {#configuration-example} ### Configuration Example {#configuration-example}
@ -169,4 +167,7 @@ Fields for `pattern` and `default` sections:
</graphite_rollup> </graphite_rollup>
``` ```
!!! warning "Warning"
Data rollup is performed during merges. Usually, for old partitions, merges are not started, so for rollup it is necessary to trigger an unscheduled merge using [optimize](../../../sql-reference/statements/optimize.md). Or use additional tools, for example [graphite-ch-optimizer](https://github.com/innogames/graphite-ch-optimizer).
[Original article](https://clickhouse.tech/docs/en/operations/table_engines/graphitemergetree/) <!--hide--> [Original article](https://clickhouse.tech/docs/en/operations/table_engines/graphitemergetree/) <!--hide-->

View File

@ -844,44 +844,3 @@ S3 disk can be configured as `main` or `cold` storage:
``` ```
In case of `cold` option a data can be moved to S3 if local disk free size will be smaller than `move_factor * disk_size` or by TTL move rule. In case of `cold` option a data can be moved to S3 if local disk free size will be smaller than `move_factor * disk_size` or by TTL move rule.
## Using HDFS for Data Storage {#table_engine-mergetree-hdfs}
[HDFS](https://hadoop.apache.org/docs/r1.2.1/hdfs_design.html) is a distributed file system for remote data storage.
`MergeTree` family table engines can store data to HDFS using a disk with type `HDFS`.
Configuration markup:
``` xml
<yandex>
<storage_configuration>
<disks>
<hdfs>
<type>hdfs</type>
<endpoint>hdfs://hdfs1:9000/clickhouse/</endpoint>
</hdfs>
</disks>
<policies>
<hdfs>
<volumes>
<main>
<disk>hdfs</disk>
</main>
</volumes>
</hdfs>
</policies>
</storage_configuration>
<merge_tree>
<min_bytes_for_wide_part>0</min_bytes_for_wide_part>
</merge_tree>
</yandex>
```
Required parameters:
- `endpoint` — HDFS endpoint URL in `path` format. Endpoint URL should contain a root path to store data.
Optional parameters:
- `min_bytes_for_seek` — The minimal number of bytes to use seek operation instead of sequential read. Default value: `1 Mb`.

View File

@ -137,7 +137,7 @@ CREATE TABLE table_name
) ENGINE = ReplicatedReplacingMergeTree('/clickhouse/tables/{layer}-{shard}/table_name', '{replica}', ver) ) ENGINE = ReplicatedReplacingMergeTree('/clickhouse/tables/{layer}-{shard}/table_name', '{replica}', ver)
PARTITION BY toYYYYMM(EventDate) PARTITION BY toYYYYMM(EventDate)
ORDER BY (CounterID, EventDate, intHash32(UserID)) ORDER BY (CounterID, EventDate, intHash32(UserID))
SAMPLE BY intHash32(UserID) SAMPLE BY intHash32(UserID);
``` ```
<details markdown="1"> <details markdown="1">
@ -150,12 +150,12 @@ CREATE TABLE table_name
EventDate DateTime, EventDate DateTime,
CounterID UInt32, CounterID UInt32,
UserID UInt32 UserID UInt32
) ENGINE = ReplicatedMergeTree('/clickhouse/tables/{layer}-{shard}/table_name', '{replica}', EventDate, intHash32(UserID), (CounterID, EventDate, intHash32(UserID), EventTime), 8192) ) ENGINE = ReplicatedMergeTree('/clickhouse/tables/{layer}-{shard}/table_name', '{replica}', EventDate, intHash32(UserID), (CounterID, EventDate, intHash32(UserID), EventTime), 8192);
``` ```
</details> </details>
As the example shows, these parameters can contain substitutions in curly brackets. The substituted values are taken from the «[macros](../../../operations/server-configuration-parameters/settings/#macros) section of the configuration file. As the example shows, these parameters can contain substitutions in curly brackets. The substituted values are taken from the [macros](../../../operations/server-configuration-parameters/settings.md#macros) section of the configuration file.
Example: Example:

View File

@ -56,6 +56,9 @@ The same thing happens if the subordinate table does not exist when the buffer i
If you need to run ALTER for a subordinate table, and the Buffer table, we recommend first deleting the Buffer table, running ALTER for the subordinate table, then creating the Buffer table again. If you need to run ALTER for a subordinate table, and the Buffer table, we recommend first deleting the Buffer table, running ALTER for the subordinate table, then creating the Buffer table again.
!!! attention "Attention"
Running ALTER on the Buffer table in releases made before 28 Sep 2020 will cause a `Block structure mismatch` error (see [#15117](https://github.com/ClickHouse/ClickHouse/issues/15117)), so deleting the Buffer table and then recreating is the only option. It is advisable to check that this error is fixed in your release before trying to run ALTER on the Buffer table.
If the server is restarted abnormally, the data in the buffer is lost. If the server is restarted abnormally, the data in the buffer is lost.
`FINAL` and `SAMPLE` do not work correctly for Buffer tables. These conditions are passed to the destination table, but are not used for processing data in the buffer. If these features are required we recommend only using the Buffer table for writing, while reading from the destination table. `FINAL` and `SAMPLE` do not work correctly for Buffer tables. These conditions are passed to the destination table, but are not used for processing data in the buffer. If these features are required we recommend only using the Buffer table for writing, while reading from the destination table.

View File

@ -105,7 +105,7 @@ We use `Decimal` data type to store prices. Everything else is quite straightfor
## Import Data ## Import Data
Upload data into ClickHouse in parallel: Upload data into ClickHouse:
``` ```
clickhouse-client --format_csv_allow_single_quotes 0 --input_format_null_as_default 0 --query "INSERT INTO dish FORMAT CSVWithNames" < Dish.csv clickhouse-client --format_csv_allow_single_quotes 0 --input_format_null_as_default 0 --query "INSERT INTO dish FORMAT CSVWithNames" < Dish.csv

View File

@ -114,5 +114,5 @@ Seamlessly migration from ZooKeeper to `clickhouse-keeper` is impossible you hav
clickhouse-keeper-converter --zookeeper-logs-dir /var/lib/zookeeper/version-2 --zookeeper-snapshots-dir /var/lib/zookeeper/version-2 --output-dir /path/to/clickhouse/keeper/snapshots clickhouse-keeper-converter --zookeeper-logs-dir /var/lib/zookeeper/version-2 --zookeeper-snapshots-dir /var/lib/zookeeper/version-2 --output-dir /path/to/clickhouse/keeper/snapshots
``` ```
4. Copy snapshot to `clickhouse-server` nodes with configured `keeper` or start `clickhouse-keeper` instead of ZooKeeper. Snapshot must persist only on leader node, leader will sync it automatically to other nodes. 4. Copy snapshot to `clickhouse-server` nodes with configured `keeper` or start `clickhouse-keeper` instead of ZooKeeper. Snapshot must persist on all nodes, otherwise empty nodes can be faster and one of them can becamse leader.

View File

@ -18,6 +18,18 @@ Some settings specified in the main configuration file can be overridden in othe
- If `replace` is specified, it replaces the entire element with the specified one. - If `replace` is specified, it replaces the entire element with the specified one.
- If `remove` is specified, it deletes the element. - If `remove` is specified, it deletes the element.
You can also declare attributes as coming from environment variables by using `from_env="VARIABLE_NAME"`:
```xml
<yandex>
<macros>
<replica from_env="REPLICA" />
<layer from_env="LAYER" />
<shard from_env="SHARD" />
</macros>
</yandex>
```
## Substitution {#substitution} ## Substitution {#substitution}
The config can also define “substitutions”. If an element has the `incl` attribute, the corresponding substitution from the file will be used as the value. By default, the path to the file with substitutions is `/etc/metrika.xml`. This can be changed in the [include_from](../operations/server-configuration-parameters/settings.md#server_configuration_parameters-include_from) element in the server config. The substitution values are specified in `/yandex/substitution_name` elements in this file. If a substitution specified in `incl` does not exist, it is recorded in the log. To prevent ClickHouse from logging missing substitutions, specify the `optional="true"` attribute (for example, settings for [macros](../operations/server-configuration-parameters/settings.md)). The config can also define “substitutions”. If an element has the `incl` attribute, the corresponding substitution from the file will be used as the value. By default, the path to the file with substitutions is `/etc/metrika.xml`. This can be changed in the [include_from](../operations/server-configuration-parameters/settings.md#server_configuration_parameters-include_from) element in the server config. The substitution values are specified in `/yandex/substitution_name` elements in this file. If a substitution specified in `incl` does not exist, it is recorded in the log. To prevent ClickHouse from logging missing substitutions, specify the `optional="true"` attribute (for example, settings for [macros](../operations/server-configuration-parameters/settings.md)).

View File

@ -486,7 +486,7 @@ Parameter substitutions for replicated tables.
Can be omitted if replicated tables are not used. Can be omitted if replicated tables are not used.
For more information, see the section [Creating replicated tables](../../engines/table-engines/mergetree-family/replication.md). For more information, see the section [Creating replicated tables](../../engines/table-engines/mergetree-family/replication.md#creating-replicated-tables).
**Example** **Example**
@ -1247,6 +1247,7 @@ Default value: `/var/lib/clickhouse/access/`.
Section of the configuration file that contains settings: Section of the configuration file that contains settings:
- Path to configuration file with predefined users. - Path to configuration file with predefined users.
- Path to folder where users created by SQL commands are stored. - Path to folder where users created by SQL commands are stored.
- ZooKeeper node path where users created by SQL commands are stored and replicated (experimental).
If this section is specified, the path from [users_config](../../operations/server-configuration-parameters/settings.md#users-config) and [access_control_path](../../operations/server-configuration-parameters/settings.md#access_control_path) won't be used. If this section is specified, the path from [users_config](../../operations/server-configuration-parameters/settings.md#users-config) and [access_control_path](../../operations/server-configuration-parameters/settings.md#access_control_path) won't be used.
@ -1262,6 +1263,9 @@ The `user_directories` section can contain any number of items, the order of the
<local_directory> <local_directory>
<path>/var/lib/clickhouse/access/</path> <path>/var/lib/clickhouse/access/</path>
</local_directory> </local_directory>
<replicated>
<zookeeper_path>/clickhouse/access/</zookeeper_path>
</replicated>
</user_directories> </user_directories>
``` ```

View File

@ -5,10 +5,111 @@ toc_title: External Disks for Storing Data
# External Disks for Storing Data {#external-disks} # External Disks for Storing Data {#external-disks}
Data, processed in ClickHouse, is usually stored in the local file system — on the same machine with the ClickHouse server. That requires large-capacity disks, which can be expensive enough. To avoid that you can store the data remotely — on [Amazon s3](https://aws.amazon.com/s3/) disks or in the Hadoop Distributed File System ([HDFS](https://hadoop.apache.org/docs/current/hadoop-project-dist/hadoop-hdfs/HdfsDesign.html)). Data, processed in ClickHouse, is usually stored in the local file system — on the same machine with the ClickHouse server. That requires large-capacity disks, which can be expensive enough. To avoid that you can store the data remotely — on [Amazon S3](https://aws.amazon.com/s3/) disks or in the Hadoop Distributed File System ([HDFS](https://hadoop.apache.org/docs/current/hadoop-project-dist/hadoop-hdfs/HdfsDesign.html)).
To work with data stored on `Amazon s3` disks use [s3](../engines/table-engines/integrations/s3.md) table engine, and to work with data in the Hadoop Distributed File System — [HDFS](../engines/table-engines/integrations/hdfs.md) table engine. To work with data stored on `Amazon S3` disks use [S3](../engines/table-engines/integrations/s3.md) table engine, and to work with data in the Hadoop Distributed File System — [HDFS](../engines/table-engines/integrations/hdfs.md) table engine.
## Zero-copy Replication {#zero-copy} ## Zero-copy Replication {#zero-copy}
ClickHouse supports zero-copy replication for `s3` and `HDFS` disks, which means that if the data is stored remotely on several machines and needs to be synchronized, then only the metadata is replicated (paths to the data parts), but not the data itself. ClickHouse supports zero-copy replication for `S3` and `HDFS` disks, which means that if the data is stored remotely on several machines and needs to be synchronized, then only the metadata is replicated (paths to the data parts), but not the data itself.
## Configuring HDFS {#configuring-hdfs}
[MergeTree](../engines/table-engines/mergetree-family/mergetree.md) and [Log](../engines/table-engines/log-family/log.md) family table engines can store data to HDFS using a disk with type `HDFS`.
Configuration markup:
``` xml
<yandex>
<storage_configuration>
<disks>
<hdfs>
<type>hdfs</type>
<endpoint>hdfs://hdfs1:9000/clickhouse/</endpoint>
</hdfs>
</disks>
<policies>
<hdfs>
<volumes>
<main>
<disk>hdfs</disk>
</main>
</volumes>
</hdfs>
</policies>
</storage_configuration>
<merge_tree>
<min_bytes_for_wide_part>0</min_bytes_for_wide_part>
</merge_tree>
</yandex>
```
Required parameters:
- `endpoint` — HDFS endpoint URL in `path` format. Endpoint URL should contain a root path to store data.
Optional parameters:
- `min_bytes_for_seek` — The minimal number of bytes to use seek operation instead of sequential read. Default value: `1 Mb`.
## Using Virtual File System for Data Encryption {#encrypted-virtual-file-system}
You can encrypt the data stored on [S3](../engines/table-engines/mergetree-family/mergetree.md#table_engine-mergetree-s3), or [HDFS](#configuring-hdfs) external disks, or on a local disk. To turn on the encryption mode, in the configuration file you must define a disk with the type `encrypted` and choose a disk on which the data will be saved. An `encrypted` disk ciphers all written files on the fly, and when you read files from an `encrypted` disk it deciphers them automatically. So you can work with an `encrypted` disk like with a normal one.
Example of disk configuration:
``` xml
<disks>
<disk1>
<type>local</type>
<path>/path1/</path>
</disk1>
<disk2>
<type>encrypted</type>
<disk>disk1</disk>
<path>path2/</path>
<key>_16_ascii_chars_</key>
</disk2>
</disks>
```
For example, when ClickHouse writes data from some table to a file `store/all_1_1_0/data.bin` to `disk1`, then in fact this file will be written to the physical disk along the path `/path1/store/all_1_1_0/data.bin`.
When writing the same file to `disk2`, it will actually be written to the physical disk at the path `/path1/path2/store/all_1_1_0/data.bin` in encrypted mode.
Required parameters:
- `type``encrypted`. Otherwise the encrypted disk is not created.
- `disk` — Type of disk for data storage.
- `key` — The key for encryption and decryption. Type: [Uint64](../sql-reference/data-types/int-uint.md). You can use `key_hex` parameter to encrypt in hexadecimal form.
You can specify multiple keys using the `id` attribute (see example above).
Optional parameters:
- `path` — Path to the location on the disk where the data will be saved. If not specified, the data will be saved in the root directory.
- `current_key_id` — The key used for encryption. All the specified keys can be used for decryption, and you can always switch to another key while maintaining access to previously encrypted data.
- `algorithm` — [Algorithm](../sql-reference/statements/create/table.md#create-query-encryption-codecs) for encryption. Possible values: `AES_128_CTR`, `AES_192_CTR` or `AES_256_CTR`. Default value: `AES_128_CTR`. The key length depends on the algorithm: `AES_128_CTR` — 16 bytes, `AES_192_CTR` — 24 bytes, `AES_256_CTR` — 32 bytes.
Example of disk configuration:
``` xml
<yandex>
<storage_configuration>
<disks>
<disk_s3>
<type>s3</type>
<endpoint>...
</disk_s3>
<disk_s3_encrypted>
<type>encrypted</type>
<disk>disk_s3</disk>
<algorithm>AES_128_CTR</algorithm>
<key_hex id="0">00112233445566778899aabbccddeeff</key_hex>
<key_hex id="1">ffeeddccbbaa99887766554433221100</key_hex>
<current_key_id>1</current_key_id>
</disk_s3_encrypted>
</disks>
</storage_configuration>
</yandex>
```

View File

@ -4,7 +4,7 @@ Contains information about [trace spans](https://opentracing.io/docs/overview/sp
Columns: Columns:
- `trace_id` ([UUID](../../sql-reference/data-types/uuid.md) — ID of the trace for executed query. - `trace_id` ([UUID](../../sql-reference/data-types/uuid.md)) — ID of the trace for executed query.
- `span_id` ([UInt64](../../sql-reference/data-types/int-uint.md)) — ID of the `trace span`. - `span_id` ([UInt64](../../sql-reference/data-types/int-uint.md)) — ID of the `trace span`.

View File

@ -0,0 +1,129 @@
# system.zookeeper_log {#system-zookeeper_log}
This table contains information about the parameters of the request to the ZooKeeper server and the response from it.
For requests, only columns with request parameters are filled in, and the remaining columns are filled with default values (`0` or `NULL`). When the response arrives, the data from the response is added to the other columns.
Columns with request parameters:
- `type` ([Enum](../../sql-reference/data-types/enum.md)) — Event type in the ZooKeeper client. Can have one of the following values:
- `Request` — The request has been sent.
- `Response` — The response was received.
- `Finalize` — The connection is lost, no response was received.
- `event_date` ([Date](../../sql-reference/data-types/date.md)) — The date when the event happened.
- `event_time` ([DateTime64](../../sql-reference/data-types/datetime64.md)) — The date and time when the event happened.
- `address` ([IPv6](../../sql-reference/data-types/domains/ipv6.md)) — IP address of ZooKeeper server that was used to make the request.
- `port` ([UInt16](../../sql-reference/data-types/int-uint.md)) — The port of ZooKeeper server that was used to make the request.
- `session_id` ([Int64](../../sql-reference/data-types/int-uint.md)) — The session ID that the ZooKeeper server sets for each connection.
- `xid` ([Int32](../../sql-reference/data-types/int-uint.md)) — The ID of the request within the session. This is usually a sequential request number. It is the same for the request row and the paired `response`/`finalize` row.
- `has_watch` ([UInt8](../../sql-reference/data-types/int-uint.md)) — The request whether the [watch](https://zookeeper.apache.org/doc/r3.3.3/zookeeperProgrammers.html#ch_zkWatches) has been set.
- `op_num` ([Enum](../../sql-reference/data-types/enum.md)) — The type of request or response.
- `path` ([String](../../sql-reference/data-types/string.md)) — The path to the ZooKeeper node specified in the request, or an empty string if the request not requires specifying a path.
- `data` ([String](../../sql-reference/data-types/string.md)) — The data written to the ZooKeeper node (for the `SET` and `CREATE` requests — what the request wanted to write, for the response to the `GET` request — what was read) or an empty string.
- `is_ephemeral` ([UInt8](../../sql-reference/data-types/int-uint.md)) — Is the ZooKeeper node being created as an [ephemeral](https://zookeeper.apache.org/doc/r3.3.3/zookeeperProgrammers.html#Ephemeral+Nodes).
- `is_sequential` ([UInt8](../../sql-reference/data-types/int-uint.md)) — Is the ZooKeeper node being created as an [sequential](https://zookeeper.apache.org/doc/r3.3.3/zookeeperProgrammers.html#Sequence+Nodes+--+Unique+Naming).
- `version` ([Nullable(Int32)](../../sql-reference/data-types/nullable.md)) — The version of the ZooKeeper node that the request expects when executing. This is supported for `CHECK`, `SET`, `REMOVE` requests (is relevant `-1` if the request does not check the version or `NULL` for other requests that do not support version checking).
- `requests_size` ([UInt32](../../sql-reference/data-types/int-uint.md)) — The number of requests included in the multi request (this is a special request that consists of several consecutive ordinary requests and executes them atomically). All requests included in multi request will have the same `xid`.
- `request_idx` ([UInt32](../../sql-reference/data-types/int-uint.md)) — The number of the request included in multi request (for multi request — `0`, then in order from `1`).
Columns with request response parameters:
- `zxid` ([Int64](../../sql-reference/data-types/int-uint.md)) — ZooKeeper transaction ID. The serial number issued by the ZooKeeper server in response to a successfully executed request (`0` if the request was not executed/returned an error/the client does not know whether the request was executed).
- `error` ([Nullable(Enum)](../../sql-reference/data-types/nullable.md)) — Error code. Can have many values, here are just some of them:
- `ZOK` — The request was executed seccessfully.
- `ZCONNECTIONLOSS` — The connection was lost.
- `ZOPERATIONTIMEOUT` — The request execution timeout has expired.
- `ZSESSIONEXPIRED` — The session has expired.
- `NULL` — The request is completed.
- `watch_type` ([Nullable(Enum)](../../sql-reference/data-types/nullable.md)) — The type of the `watch` event (for responses with `op_num` = `Watch`), for the remaining responses: `NULL`.
- `watch_state` ([Nullable(Enum)](../../sql-reference/data-types/nullable.md)) — The status of the `watch` event (for responses with `op_num` = `Watch`), for the remaining responses: `NULL`.
- `path_created` ([String](../../sql-reference/data-types/string.md)) — The path to the created ZooKeeper node (for responses to the `CREATE` request), may differ from the `path` if the node is created as a `sequential`.
- `stat_czxid` ([Int64](../../sql-reference/data-types/int-uint.md)) — The `zxid` of the change that caused this ZooKeeper node to be created.
- `stat_mzxid` ([Int64](../../sql-reference/data-types/int-uint.md)) — The `zxid` of the change that last modified this ZooKeeper node.
- `stat_pzxid` ([Int64](../../sql-reference/data-types/int-uint.md)) — The transaction ID of the change that last modified childern of this ZooKeeper node.
- `stat_version` ([Int32](../../sql-reference/data-types/int-uint.md)) — The number of changes to the data of this ZooKeeper node.
- `stat_cversion` ([Int32](../../sql-reference/data-types/int-uint.md)) — The number of changes to the children of this ZooKeeper node.
- `stat_dataLength` ([Int32](../../sql-reference/data-types/int-uint.md)) — The length of the data field of this ZooKeeper node.
- `stat_numChildren` ([Int32](../../sql-reference/data-types/int-uint.md)) — The number of children of this ZooKeeper node.
- `children` ([Array(String)](../../sql-reference/data-types/array.md)) — The list of child ZooKeeper nodes (for responses to `LIST` request).
**Example**
Query:
``` sql
SELECT * FROM system.zookeeper_log WHERE (session_id = '106662742089334927') AND (xid = '10858') FORMAT Vertical;
```
Result:
``` text
Row 1:
──────
type: Request
event_date: 2021-08-09
event_time: 2021-08-09 21:38:30.291792
address: ::
port: 2181
session_id: 106662742089334927
xid: 10858
has_watch: 1
op_num: List
path: /clickhouse/task_queue/ddl
data:
is_ephemeral: 0
is_sequential: 0
version: ᴺᵁᴸᴸ
requests_size: 0
request_idx: 0
zxid: 0
error: ᴺᵁᴸᴸ
watch_type: ᴺᵁᴸᴸ
watch_state: ᴺᵁᴸᴸ
path_created:
stat_czxid: 0
stat_mzxid: 0
stat_pzxid: 0
stat_version: 0
stat_cversion: 0
stat_dataLength: 0
stat_numChildren: 0
children: []
Row 2:
──────
type: Response
event_date: 2021-08-09
event_time: 2021-08-09 21:38:30.292086
address: ::
port: 2181
session_id: 106662742089334927
xid: 10858
has_watch: 1
op_num: List
path: /clickhouse/task_queue/ddl
data:
is_ephemeral: 0
is_sequential: 0
version: ᴺᵁᴸᴸ
requests_size: 0
request_idx: 0
zxid: 16926267
error: ZOK
watch_type: ᴺᵁᴸᴸ
watch_state: ᴺᵁᴸᴸ
path_created:
stat_czxid: 16925469
stat_mzxid: 16925469
stat_pzxid: 16926179
stat_version: 0
stat_cversion: 7
stat_dataLength: 0
stat_numChildren: 7
children: ['query-0000000006','query-0000000005','query-0000000004','query-0000000003','query-0000000002','query-0000000001','query-0000000000']
```
**See Also**
- [ZooKeeper](../../operations/tips.md#zookeeper)
- [ZooKeeper guide](https://zookeeper.apache.org/doc/r3.3.3/zookeeperProgrammers.html)

View File

@ -197,7 +197,7 @@ Result:
## h3ToGeo {#h3togeo} ## h3ToGeo {#h3togeo}
Returns `(lon, lat)` that corresponds to the provided H3 index. Returns the geographical coordinates of longitude and latitude corresponding to the provided [H3](#h3index) index.
**Syntax** **Syntax**
@ -207,20 +207,18 @@ h3ToGeo(h3Index)
**Arguments** **Arguments**
- `h3Index` — H3 Index. Type: [UInt64](../../../sql-reference/data-types/int-uint.md). - `h3Index` — H3 Index. [UInt64](../../../sql-reference/data-types/int-uint.md).
**Returned values** **Returned values**
- `lon` — Longitude. Type: [Float64](../../../sql-reference/data-types/float.md). - A tuple consisting of two values: `tuple(lon,lat)`. `lon` — Longitude. [Float64](../../../sql-reference/data-types/float.md). `lat` — Latitude. [Float64](../../../sql-reference/data-types/float.md).
- `lat` — Latitude. Type: [Float64](../../../sql-reference/data-types/float.md).
**Example** **Example**
Query: Query:
``` sql ``` sql
SELECT h3ToGeo(644325524701193974) coordinates; SELECT h3ToGeo(644325524701193974) AS coordinates;
``` ```
Result: Result:
@ -230,6 +228,7 @@ Result:
│ (37.79506616830252,55.71290243145668) │ │ (37.79506616830252,55.71290243145668) │
└───────────────────────────────────────┘ └───────────────────────────────────────┘
``` ```
## h3kRing {#h3kring} ## h3kRing {#h3kring}
Lists all the [H3](#h3index) hexagons in the raduis of `k` from the given hexagon in random order. Lists all the [H3](#h3index) hexagons in the raduis of `k` from the given hexagon in random order.

View File

@ -1339,3 +1339,149 @@ Result:
│ 2,"good" │ │ 2,"good" │
└───────────────────────────────────────────┘ └───────────────────────────────────────────┘
``` ```
## snowflakeToDateTime {#snowflakeToDateTime}
Extract time from snowflake id as DateTime format.
**Syntax**
``` sql
snowflakeToDateTime(value [, time_zone])
```
**Parameters**
- `value``snowflake id`, Int64 value.
- `time_zone` — [Timezone](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-timezone). The function parses `time_string` according to the timezone. Optional. [String](../../sql-reference/data-types/string.md).
**Returned value**
- value converted to the `DateTime` data type.
**Example**
Query:
``` sql
SELECT snowflakeToDateTime(CAST('1426860702823350272', 'Int64'), 'UTC');
```
Result:
``` text
┌─snowflakeToDateTime(CAST('1426860702823350272', 'Int64'), 'UTC')─┐
│ 2021-08-15 10:57:56 │
└──────────────────────────────────────────────────────────────────┘
```
## snowflakeToDateTime64 {#snowflakeToDateTime64}
Extract time from snowflake id as DateTime64 format.
**Syntax**
``` sql
snowflakeToDateTime64(value [, time_zone])
```
**Parameters**
- `value``snowflake id`, Int64 value.
- `time_zone` — [Timezone](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-timezone). The function parses `time_string` according to the timezone. Optional. [String](../../sql-reference/data-types/string.md).
**Returned value**
- value converted to the `DateTime64` data type.
**Example**
Query:
``` sql
SELECT snowflakeToDateTime64(CAST('1426860802823350272', 'Int64'), 'UTC');
```
Result:
``` text
┌─snowflakeToDateTime64(CAST('1426860802823350272', 'Int64'), 'UTC')─┐
│ 2021-08-15 10:58:19.841 │
└────────────────────────────────────────────────────────────────────┘
```
## dateTimeToSnowflake {#dateTimeToSnowflake}
Convert DateTime to the first snowflake id at the giving time.
**Syntax**
``` sql
dateTimeToSnowflake(value)
```
**Parameters**
- `value` — Date and time. [DateTime](../../sql-reference/data-types/datetime.md).
**Returned value**
- `value` converted to the `Int64` data type as the first snowflake id at that time.
**Example**
Query:
``` sql
WITH toDateTime('2021-08-15 18:57:56', 'Asia/Shanghai') AS dt
SELECT dateTimeToSnowflake(dt);
```
Result:
``` text
┌─dateTimeToSnowflake(dt)─┐
│ 1426860702823350272 │
└─────────────────────────┘
```
## dateTime64ToSnowflake {#dateTime64ToSnowflake}
Convert DateTime64 to the first snowflake id at the giving time.
**Syntax**
``` sql
dateTime64ToSnowflake(value)
```
**Parameters**
- `value` — Date and time. [DateTime64](../../sql-reference/data-types/datetime64.md).
**Returned value**
- `value` converted to the `Int64` data type as the first snowflake id at that time.
**Example**
Query:
``` sql
WITH toDateTime64('2021-08-15 18:57:56.492', 3, 'Asia/Shanghai') AS dt64
SELECT dateTime64ToSnowflake(dt64);
```
Result:
``` text
┌─dateTime64ToSnowflake(dt64)─┐
│ 1426860704886947840 │
└─────────────────────────────┘
```

View File

@ -384,5 +384,32 @@ ExpressionTransform
(ReadFromStorage) (ReadFromStorage)
NumbersMt × 2 0 → 1 NumbersMt × 2 0 → 1
``` ```
### EXPLAIN ESTIMATE {#explain-estimate}
Shows the estimated number of rows, marks and parts to be read from the tables while processing the query. Works with tables in the [MergeTree](../../engines/table-engines/mergetree-family/mergetree.md#table_engines-mergetree) family.
**Example**
Creating a table:
```sql
CREATE TABLE ttt (i Int64) ENGINE = MergeTree() ORDER BY i SETTINGS index_granularity = 16, write_final_mark = 0;
INSERT INTO ttt SELECT number FROM numbers(128);
OPTIMIZE TABLE ttt;
```
Query:
```sql
EXPLAIN ESTIMATE SELECT * FROM ttt;
```
Result:
```text
┌─database─┬─table─┬─parts─┬─rows─┬─marks─┐
│ default │ ttt │ 1 │ 128 │ 8 │
└──────────┴───────┴───────┴──────┴───────┘
```
[Оriginal article](https://clickhouse.tech/docs/en/sql-reference/statements/explain/) <!--hide--> [Оriginal article](https://clickhouse.tech/docs/en/sql-reference/statements/explain/) <!--hide-->

View File

@ -311,12 +311,12 @@ One may execute query after:
- Individual replica path `/replicas/replica_name/` loss. - Individual replica path `/replicas/replica_name/` loss.
Replica attaches locally found parts and sends info about them to Zookeeper. Replica attaches locally found parts and sends info about them to Zookeeper.
Parts present on replica before metadata loss are not re-fetched from other replicas if not being outdated Parts present on a replica before metadata loss are not re-fetched from other ones if not being outdated (so replica restoration does not mean re-downloading all data over the network).
(so replica restoration does not mean re-downloading all data over the network).
Caveat: parts in all states are moved to `detached/` folder. Parts active before data loss (Committed) are attached. !!! warning "Warning"
Parts in all states are moved to `detached/` folder. Parts active before data loss (committed) are attached.
#### Syntax **Syntax**
```sql ```sql
SYSTEM RESTORE REPLICA [db.]replicated_merge_tree_family_table_name [ON CLUSTER cluster_name] SYSTEM RESTORE REPLICA [db.]replicated_merge_tree_family_table_name [ON CLUSTER cluster_name]
@ -328,11 +328,11 @@ Alternative syntax:
SYSTEM RESTORE REPLICA [ON CLUSTER cluster_name] [db.]replicated_merge_tree_family_table_name SYSTEM RESTORE REPLICA [ON CLUSTER cluster_name] [db.]replicated_merge_tree_family_table_name
``` ```
#### Example **Example**
Creating a table on multiple servers. After the replica's metadata in ZooKeeper is lost, the table will attach as read-only as metadata is missing. The last query needs to execute on every replica.
```sql ```sql
-- Creating table on multiple servers
CREATE TABLE test(n UInt32) CREATE TABLE test(n UInt32)
ENGINE = ReplicatedMergeTree('/clickhouse/tables/test/', '{replica}') ENGINE = ReplicatedMergeTree('/clickhouse/tables/test/', '{replica}')
ORDER BY n PARTITION BY n % 10; ORDER BY n PARTITION BY n % 10;
@ -341,8 +341,14 @@ INSERT INTO test SELECT * FROM numbers(1000);
-- zookeeper_delete_path("/clickhouse/tables/test", recursive=True) <- root loss. -- zookeeper_delete_path("/clickhouse/tables/test", recursive=True) <- root loss.
SYSTEM RESTART REPLICA test; -- Table will attach as readonly as metadata is missing. SYSTEM RESTART REPLICA test;
SYSTEM RESTORE REPLICA test; -- Need to execute on every replica, another way: RESTORE REPLICA test ON CLUSTER cluster SYSTEM RESTORE REPLICA test;
```
Another way:
```sql
SYSTEM RESTORE REPLICA test ON CLUSTER cluster;
``` ```
### RESTART REPLICAS {#query_language-system-restart-replicas} ### RESTART REPLICAS {#query_language-system-restart-replicas}

View File

@ -6,12 +6,13 @@ toc_title: cluster
# cluster, clusterAllReplicas {#cluster-clusterallreplicas} # cluster, clusterAllReplicas {#cluster-clusterallreplicas}
Allows to access all shards in an existing cluster which configured in `remote_servers` section without creating a [Distributed](../../engines/table-engines/special/distributed.md) table. One replica of each shard is queried. Allows to access all shards in an existing cluster which configured in `remote_servers` section without creating a [Distributed](../../engines/table-engines/special/distributed.md) table. One replica of each shard is queried.
`clusterAllReplicas` - same as `cluster` but all replicas are queried. Each replica in a cluster is used as separate shard/connection.
`clusterAllReplicas` function — same as `cluster`, but all replicas are queried. Each replica in a cluster is used as a separate shard/connection.
!!! note "Note" !!! note "Note"
All available clusters are listed in the `system.clusters` table. All available clusters are listed in the [system.clusters](../../operations/system-tables/clusters.md) table.
Signatures: **Syntax**
``` sql ``` sql
cluster('cluster_name', db.table[, sharding_key]) cluster('cluster_name', db.table[, sharding_key])
@ -19,10 +20,27 @@ cluster('cluster_name', db, table[, sharding_key])
clusterAllReplicas('cluster_name', db.table[, sharding_key]) clusterAllReplicas('cluster_name', db.table[, sharding_key])
clusterAllReplicas('cluster_name', db, table[, sharding_key]) clusterAllReplicas('cluster_name', db, table[, sharding_key])
``` ```
**Arguments**
`cluster_name` Name of a cluster that is used to build a set of addresses and connection parameters to remote and local servers. - `cluster_name` Name of a cluster that is used to build a set of addresses and connection parameters to remote and local servers.
- `db.table` or `db`, `table` - Name of a database and a table.
- `sharding_key` - A sharding key. Optional. Needs to be specified if the cluster has more than one shard.
`sharding_key` - When insert into cluster function with more than one shard, sharding_key need to be provided. **Returned value**
The dataset from clusters.
**Using Macros**
`cluster_name` can contain macros — substitution in curly brackets. The substituted value is taken from the [macros](../../operations/server-configuration-parameters/settings.md#macros) section of the server configuration file.
Example:
```sql
SELECT * FROM cluster('{cluster}', default.example_table);
```
**Usage and Recommendations**
Using the `cluster` and `clusterAllReplicas` table functions are less efficient than creating a `Distributed` table because in this case, the server connection is re-established for every request. When processing a large number of queries, please always create the `Distributed` table ahead of time, and do not use the `cluster` and `clusterAllReplicas` table functions. Using the `cluster` and `clusterAllReplicas` table functions are less efficient than creating a `Distributed` table because in this case, the server connection is re-established for every request. When processing a large number of queries, please always create the `Distributed` table ahead of time, and do not use the `cluster` and `clusterAllReplicas` table functions.

View File

@ -1,3 +1,7 @@
---
toc_priority: 36
toc_title: Replicated
---
# [экспериментальный] Replicated {#replicated} # [экспериментальный] Replicated {#replicated}

View File

@ -38,9 +38,7 @@ CREATE TABLE [IF NOT EXISTS] [db.]table_name [ON CLUSTER cluster]
- Значение метрики. Тип данных: любой числовой. - Значение метрики. Тип данных: любой числовой.
- Версия метрики. Тип данных: любой числовой. - Версия метрики. Тип данных: любой числовой (ClickHouse сохраняет строки с последней версией или последнюю записанную строку, если версии совпадают. Другие строки удаляются при слиянии кусков данных).
ClickHouse сохраняет строки с последней версией или последнюю записанную строку, если версии совпадают. Другие строки удаляются при слиянии кусков данных.
Имена этих столбцов должны быть заданы в конфигурации rollup. Имена этих столбцов должны быть заданы в конфигурации rollup.
@ -173,4 +171,4 @@ default
!!! warning "Внимание" !!! warning "Внимание"
Прореживание данных производится во время слияний. Обычно для старых партций слияния не запускаются, поэтому для прореживания надо иницировать незапланированное слияние используя [optimize](../../../sql-reference/statements/optimize/). Или использовать дополнительные инструменты, например [graphite-ch-optimizer](https://github.com/innogames/graphite-ch-optimizer). Прореживание данных производится во время слияний. Обычно для старых партций слияния не запускаются, поэтому для прореживания надо иницировать незапланированное слияние используя [optimize](../../../sql-reference/statements/optimize.md). Или использовать дополнительные инструменты, например [graphite-ch-optimizer](https://github.com/innogames/graphite-ch-optimizer).

View File

@ -827,44 +827,3 @@ SETTINGS storage_policy = 'moving_from_ssd_to_hdd'
``` ```
Если диск сконфигурирован как `cold`, данные будут переноситься в S3 при срабатывании правил TTL или когда свободное место на локальном диске станет меньше порогового значения, которое определяется как `move_factor * disk_size`. Если диск сконфигурирован как `cold`, данные будут переноситься в S3 при срабатывании правил TTL или когда свободное место на локальном диске станет меньше порогового значения, которое определяется как `move_factor * disk_size`.
## Использование сервиса HDFS для хранения данных {#table_engine-mergetree-hdfs}
[HDFS](https://hadoop.apache.org/docs/r1.2.1/hdfs_design.html) — это распределенная файловая система для удаленного хранения данных.
Таблицы семейства `MergeTree` могут хранить данные в сервисе HDFS при использовании диска типа `HDFS`.
Пример конфигурации:
``` xml
<yandex>
<storage_configuration>
<disks>
<hdfs>
<type>hdfs</type>
<endpoint>hdfs://hdfs1:9000/clickhouse/</endpoint>
</hdfs>
</disks>
<policies>
<hdfs>
<volumes>
<main>
<disk>hdfs</disk>
</main>
</volumes>
</hdfs>
</policies>
</storage_configuration>
<merge_tree>
<min_bytes_for_wide_part>0</min_bytes_for_wide_part>
</merge_tree>
</yandex>
```
Обязательные параметры:
- `endpoint` — URL точки приема запроса на стороне HDFS в формате `path`. URL точки должен содержать путь к корневой директории на сервере, где хранятся данные.
Необязательные параметры:
- `min_bytes_for_seek` — минимальное количество байтов, которые используются для операций поиска вместо последовательного чтения. Значение по умолчанию: 1 МБайт.

View File

@ -102,7 +102,7 @@ CREATE TABLE table_name
) ENGINE = ReplicatedReplacingMergeTree('/clickhouse/tables/{layer}-{shard}/table_name', '{replica}', ver) ) ENGINE = ReplicatedReplacingMergeTree('/clickhouse/tables/{layer}-{shard}/table_name', '{replica}', ver)
PARTITION BY toYYYYMM(EventDate) PARTITION BY toYYYYMM(EventDate)
ORDER BY (CounterID, EventDate, intHash32(UserID)) ORDER BY (CounterID, EventDate, intHash32(UserID))
SAMPLE BY intHash32(UserID) SAMPLE BY intHash32(UserID);
``` ```
<details markdown="1"> <details markdown="1">
@ -115,12 +115,12 @@ CREATE TABLE table_name
EventDate DateTime, EventDate DateTime,
CounterID UInt32, CounterID UInt32,
UserID UInt32 UserID UInt32
) ENGINE = ReplicatedMergeTree('/clickhouse/tables/{layer}-{shard}/table_name', '{replica}', EventDate, intHash32(UserID), (CounterID, EventDate, intHash32(UserID), EventTime), 8192) ) ENGINE = ReplicatedMergeTree('/clickhouse/tables/{layer}-{shard}/table_name', '{replica}', EventDate, intHash32(UserID), (CounterID, EventDate, intHash32(UserID), EventTime), 8192);
``` ```
</details> </details>
Как видно в примере, эти параметры могут содержать подстановки в фигурных скобках. Подставляемые значения достаются из конфигурационного файла, из секции «[macros](../../../operations/server-configuration-parameters/settings/#macros)». Как видно в примере, эти параметры могут содержать подстановки в фигурных скобках. Эти подстановки заменяются на соответствующие значения из конфигурационного файла, из секции [macros](../../../operations/server-configuration-parameters/settings.md#macros).
Пример: Пример:

View File

@ -48,7 +48,10 @@ CREATE TABLE merge.hits_buffer AS merge.hits ENGINE = Buffer(merge, hits, 16, 10
Если у одного из столбцов таблицы Buffer и подчинённой таблицы не совпадает тип, то в лог сервера будет записано сообщение об ошибке и буфер будет очищен. Если у одного из столбцов таблицы Buffer и подчинённой таблицы не совпадает тип, то в лог сервера будет записано сообщение об ошибке и буфер будет очищен.
То же самое происходит, если подчинённая таблица не существует в момент сброса буфера. То же самое происходит, если подчинённая таблица не существует в момент сброса буфера.
Если есть необходимость выполнить ALTER для подчинённой таблицы и для таблицы Buffer, то рекомендуется удалить таблицу Buffer, затем выполнить ALTER подчинённой таблицы, а затем создать таблицу Buffer заново. Если есть необходимость выполнить ALTER для подчинённой таблицы и для таблицы Buffer, то рекомендуется удалить таблицу Buffer, затем выполнить ALTER подчинённой таблицы, а после создать таблицу Buffer заново.
!!! attention "Внимание"
В релизах до 28 сентября 2020 года выполнение ALTER на таблице Buffer ломает структуру блоков и вызывает ошибку (см. [#15117](https://github.com/ClickHouse/ClickHouse/issues/15117)), поэтому удаление буфера и его пересоздание — единственный вариант миграции для данного движка. Перед выполнением ALTER на таблице Buffer убедитесь, что в вашей версии эта ошибка устранена.
При нештатном перезапуске сервера, данные, находящиеся в буфере, будут потеряны. При нештатном перезапуске сервера, данные, находящиеся в буфере, будут потеряны.

View File

@ -465,9 +465,9 @@ ClickHouse проверяет условия для `min_part_size` и `min_part
Подстановки параметров реплицируемых таблиц. Подстановки параметров реплицируемых таблиц.
Можно не указывать, если реплицируемых таблицы не используются. Можно не указывать, если реплицируемые таблицы не используются.
Подробнее смотрите в разделе «[Создание реплицируемых таблиц](../../engines/table-engines/mergetree-family/replication.md)». Подробнее смотрите в разделе [Создание реплицируемых таблиц](../../engines/table-engines/mergetree-family/replication.md#creating-replicated-tables).
**Пример** **Пример**

View File

@ -5,10 +5,110 @@ toc_title: "Хранение данных на внешних дисках"
# Хранение данных на внешних дисках {#external-disks} # Хранение данных на внешних дисках {#external-disks}
Данные, которые обрабатываются в ClickHouse, обычно хранятся в файловой системе локально, где развернут сервер ClickHouse. При этом для хранения данных требуются диски большого объема, которые могут быть довольно дорогостоящими. Решением проблемы может стать хранение данных отдельно от сервера — в распределенных файловых системах — [Amazon s3](https://aws.amazon.com/s3/) или Hadoop ([HDFS](https://hadoop.apache.org/docs/current/hadoop-project-dist/hadoop-hdfs/HdfsDesign.html)). Данные, которые обрабатываются в ClickHouse, обычно хранятся в файловой системе локально, где развернут сервер ClickHouse. При этом для хранения данных требуются диски большого объема, которые могут быть довольно дорогостоящими. Решением проблемы может стать хранение данных отдельно от сервера — в распределенных файловых системах — [Amazon S3](https://aws.amazon.com/s3/) или Hadoop ([HDFS](https://hadoop.apache.org/docs/current/hadoop-project-dist/hadoop-hdfs/HdfsDesign.html)).
Для работы с данными, хранящимися в файловой системе `Amazon s3`, используйте движок [s3](../engines/table-engines/integrations/s3.md), а для работы с данными в файловой системе Hadoop — движок [HDFS](../engines/table-engines/integrations/hdfs.md). Для работы с данными, хранящимися в файловой системе `Amazon S3`, используйте движок [S3](../engines/table-engines/integrations/s3.md), а для работы с данными в файловой системе Hadoop — движок [HDFS](../engines/table-engines/integrations/hdfs.md).
## Репликация без копирования данных {#zero-copy} ## Репликация без копирования данных {#zero-copy}
Для дисков `s3` и `HDFS` в ClickHouse поддерживается репликация без копирования данных (zero-copy): если данные хранятся на нескольких репликах, то при синхронизации пересылаются только метаданные (пути к кускам данных), а сами данные не копируются. Для дисков `S3` и `HDFS` в ClickHouse поддерживается репликация без копирования данных (zero-copy): если данные хранятся на нескольких репликах, то при синхронизации пересылаются только метаданные (пути к кускам данных), а сами данные не копируются.
## Использование сервиса HDFS для хранения данных {#table_engine-mergetree-hdfs}
Таблицы семейств [MergeTree](../engines/table-engines/mergetree-family/mergetree.md) и [Log](../engines/table-engines/log-family/log.md) могут хранить данные в сервисе HDFS при использовании диска типа `HDFS`.
Пример конфигурации:
``` xml
<yandex>
<storage_configuration>
<disks>
<hdfs>
<type>hdfs</type>
<endpoint>hdfs://hdfs1:9000/clickhouse/</endpoint>
</hdfs>
</disks>
<policies>
<hdfs>
<volumes>
<main>
<disk>hdfs</disk>
</main>
</volumes>
</hdfs>
</policies>
</storage_configuration>
<merge_tree>
<min_bytes_for_wide_part>0</min_bytes_for_wide_part>
</merge_tree>
</yandex>
```
Обязательные параметры:
- `endpoint` — URL точки приема запроса на стороне HDFS в формате `path`. URL точки должен содержать путь к корневой директории на сервере, где хранятся данные.
Необязательные параметры:
- `min_bytes_for_seek` — минимальное количество байтов, которые используются для операций поиска вместо последовательного чтения. Значение по умолчанию: `1 МБайт`.
## Использование виртуальной файловой системы для шифрования данных {#encrypted-virtual-file-system}
Вы можете зашифровать данные, сохраненные на внешних дисках [S3](../engines/table-engines/mergetree-family/mergetree.md#table_engine-mergetree-s3) или [HDFS](#table_engine-mergetree-hdfs) или на локальном диске. Чтобы включить режим шифрования, в конфигурационном файле вы должны указать диск с типом `encrypted` и тип диска, на котором будут сохранены данные. Диск типа `encrypted` шифрует данные "на лету", то есть при чтении файлов с этого диска расшифровка происходит автоматически. Таким образом, вы можете работать с диском типа `encrypted` как с обычным.
Пример конфигурации:
``` xml
<disks>
<disk1>
<type>local</type>
<path>/path1/</path>
</disk1>
<disk2>
<type>encrypted</type>
<disk>disk1</disk>
<path>path2/</path>
<key>_16_ascii_chars_</key>
</disk2>
</disks>
```
Например, когда ClickHouse записывает данные из какой-либо таблицы в файл `store/all_1_1_0/data.bin` на `disk1`, то на самом деле этот файл будет записан на физический диск по пути `/path1/store/all_1_1_0/data.bin`.
При записи того же файла на диск `disk2` он будет записан на физический диск в зашифрованном виде по пути `/path1/path2/store/all_1_1_0/data.bin`.
Обязательные параметры:
- `type``encrypted`. Иначе зашифрованный диск создан не будет.
- `disk` — тип диска для хранения данных.
- `key` — ключ для шифрования и расшифровки. Тип: [Uint64](../sql-reference/data-types/int-uint.md). Вы можете использовать параметр `key_hex` для шифрования в шестнадцатеричной форме.
Вы можете указать несколько ключей, используя атрибут `id` (смотрите пример выше).
Необязательные параметры:
- `path` — путь к месту на диске, где будут сохранены данные. Если не указан, данные будут сохранены в корневом каталоге.
- `current_key_id` — ключ, используемый для шифрования. Все указанные ключи могут быть использованы для расшифровки, и вы всегда можете переключиться на другой ключ, сохраняя доступ к ранее зашифрованным данным.
- `algorithm` — [алгоритм](../sql-reference/statements/create/table.md#create-query-encryption-codecs) шифрования данных. Возможные значения: `AES_128_CTR`, `AES_192_CTR` или `AES_256_CTR`. Значение по умолчанию: `AES_128_CTR`. Длина ключа зависит от алгоритма: `AES_128_CTR` — 16 байт, `AES_192_CTR` — 24 байта, `AES_256_CTR` — 32 байта.
Пример конфигурации:
``` xml
<yandex>
<storage_configuration>
<disks>
<disk_s3>
<type>s3</type>
<endpoint>...
</disk_s3>
<disk_s3_encrypted>
<type>encrypted</type>
<disk>disk_s3</disk>
<algorithm>AES_128_CTR</algorithm>
<key_hex id="0">00112233445566778899aabbccddeeff</key_hex>
<key_hex id="1">ffeeddccbbaa99887766554433221100</key_hex>
<current_key_id>1</current_key_id>
</disk_s3_encrypted>
</disks>
</storage_configuration>
</yandex>
```

View File

@ -4,7 +4,7 @@
Столбцы: Столбцы:
- `trace_id` ([UUID](../../sql-reference/data-types/uuid.md) — идентификатор трассировки для выполненного запроса. - `trace_id` ([UUID](../../sql-reference/data-types/uuid.md)) — идентификатор трассировки для выполненного запроса.
- `span_id` ([UInt64](../../sql-reference/data-types/int-uint.md)) — идентификатор `trace span`. - `span_id` ([UInt64](../../sql-reference/data-types/int-uint.md)) — идентификатор `trace span`.

View File

@ -0,0 +1,129 @@
# system.zookeeper_log {#system-zookeeper_log}
Эта таблица содержит информацию о параметрах запроса к серверу ZooKeeper и ответа от него.
Для запросов заполняются только столбцы с параметрами запроса, а остальные столбцы заполняются значениями по умолчанию (`0` или `NULL`). Когда поступает ответ, данные добавляются в столбцы с параметрами ответа на запрос.
Столбцы с параметрами запроса:
- `type` ([Enum](../../sql-reference/data-types/enum.md)) — тип события в клиенте ZooKeeper. Может иметь одно из следующих значений:
- `Request` — запрос отправлен.
- `Response` — ответ получен.
- `Finalize` — соединение разорвано, ответ не получен.
- `event_date` ([Date](../../sql-reference/data-types/date.md)) — дата, когда произошло событие.
- `event_time` ([DateTime64](../../sql-reference/data-types/datetime64.md)) — дата и время, когда произошло событие.
- `address` ([IPv6](../../sql-reference/data-types/domains/ipv6.md)) — IP адрес сервера ZooKeeper, с которого был сделан запрос.
- `port` ([UInt16](../../sql-reference/data-types/int-uint.md)) — порт сервера ZooKeeper, с которого был сделан запрос.
- `session_id` ([Int64](../../sql-reference/data-types/int-uint.md)) — идентификатор сессии, который сервер ZooKeeper создает для каждого соединения.
- `xid` ([Int32](../../sql-reference/data-types/int-uint.md)) — идентификатор запроса внутри сессии. Обычно это последовательный номер запроса, одинаковый у строки запроса и у парной строки `response`/`finalize`.
- `has_watch` ([UInt8](../../sql-reference/data-types/int-uint.md)) — установлен ли запрос [watch](https://zookeeper.apache.org/doc/r3.3.3/zookeeperProgrammers.html#ch_zkWatches).
- `op_num` ([Enum](../../sql-reference/data-types/enum.md)) — тип запроса или ответа на запрос.
- `path` ([String](../../sql-reference/data-types/string.md)) — путь к узлу ZooKeeper, указанный в запросе. Пустая строка, если запрос не требует указания пути.
- `data` ([String](../../sql-reference/data-types/string.md)) — данные, записанные на узле ZooKeeper (для запросов `SET` и `CREATE` — что запрос хотел записать, для ответа на запрос `GET` — что было прочитано), или пустая строка.
- `is_ephemeral` ([UInt8](../../sql-reference/data-types/int-uint.md)) — создается ли узел ZooKeeper как [ephemeral](https://zookeeper.apache.org/doc/r3.3.3/zookeeperProgrammers.html#Ephemeral+Nodes).
- `is_sequential` ([UInt8](../../sql-reference/data-types/int-uint.md)) — создается ли узел ZooKeeper как [sequential](https://zookeeper.apache.org/doc/r3.3.3/zookeeperProgrammers.html#Sequence+Nodes+--+Unique+Naming).
- `version` ([Nullable(Int32)](../../sql-reference/data-types/nullable.md)) — версия узла ZooKeeper, которую запрос ожидает увидеть при выполнении. Поддерживается для запросов `CHECK`, `SET`, `REMOVE` (`-1` — запрос не проверяет версию, `NULL` — для других запросов, которые не поддерживают проверку версии).
- `requests_size` ([UInt32](../../sql-reference/data-types/int-uint.md)) — количество запросов, включенных в мультизапрос (это специальный запрос, который состоит из нескольких последовательных обычных запросов, выполняющихся атомарно). Все запросы, включенные в мультизапрос, имеют одинаковый `xid`.
- `request_idx` ([UInt32](../../sql-reference/data-types/int-uint.md)) — номер запроса, включенного в мультизапрос (`0` — для мультизапроса, далее по порядку с `1`).
Столбцы с параметрами ответа на запрос:
- `zxid` ([Int64](../../sql-reference/data-types/int-uint.md)) — идентификатор транзакции в ZooKeeper. Последовательный номер, выданный сервером ZooKeeper в ответе на успешно выполненный запрос (`0` — запрос не был выполнен, возвращена ошибка или клиент ZooKeeper не знает, был ли выполнен запрос).
- `error` ([Nullable(Enum)](../../sql-reference/data-types/nullable.md)) — код ошибки. Может иметь много значений, здесь приведены только некоторые из них:
- `ZOK` — запрос успешно выполнен.
- `ZCONNECTIONLOSS` — соединение разорвано.
- `ZOPERATIONTIMEOUT` — истекло время ожидания выполнения запроса.
- `ZSESSIONEXPIRED` — истекло время сессии.
- `NULL` — выполнен запрос.
- `watch_type` ([Nullable(Enum)](../../sql-reference/data-types/nullable.md)) — тип события `watch` (для ответов на запрос при `op_num` = `Watch`), для остальных ответов: `NULL`.
- `watch_state` ([Nullable(Enum)](../../sql-reference/data-types/nullable.md)) — статус события `watch` (для ответов на запрос при `op_num` = `Watch`), для остальных ответов: `NULL`.
- `path_created` ([String](../../sql-reference/data-types/string.md)) — путь к созданному узлу ZooKeeper (для ответов на запрос `CREATE`). Может отличаться от `path`, если узел создается как `sequential`.
- `stat_czxid` ([Int64](../../sql-reference/data-types/int-uint.md)) — идентификатор транзакции, в результате которой был создан узел ZooKeeper.
- `stat_mzxid` ([Int64](../../sql-reference/data-types/int-uint.md)) — идентификатор транзакции, которая последней модифицировала узел ZooKeeper.
- `stat_pzxid` ([Int64](../../sql-reference/data-types/int-uint.md)) — идентификатор транзакции, которая последней модифицировала дочерние узлы ZooKeeper.
- `stat_version` ([Int32](../../sql-reference/data-types/int-uint.md)) — количество изменений в данных узла ZooKeeper.
- `stat_cversion` ([Int32](../../sql-reference/data-types/int-uint.md)) — количество изменений в дочерних узлах ZooKeeper.
- `stat_dataLength` ([Int32](../../sql-reference/data-types/int-uint.md)) — длина поля данных узла ZooKeeper.
- `stat_numChildren` ([Int32](../../sql-reference/data-types/int-uint.md)) — количество дочерних узлов ZooKeeper.
- `children` ([Array(String)](../../sql-reference/data-types/array.md)) — список дочерних узлов ZooKeeper (для ответов на запрос `LIST`).
**Пример**
Запрос:
``` sql
SELECT * FROM system.zookeeper_log WHERE (session_id = '106662742089334927') AND (xid = '10858') FORMAT Vertical;
```
Результат:
``` text
Row 1:
──────
type: Request
event_date: 2021-08-09
event_time: 2021-08-09 21:38:30.291792
address: ::
port: 2181
session_id: 106662742089334927
xid: 10858
has_watch: 1
op_num: List
path: /clickhouse/task_queue/ddl
data:
is_ephemeral: 0
is_sequential: 0
version: ᴺᵁᴸᴸ
requests_size: 0
request_idx: 0
zxid: 0
error: ᴺᵁᴸᴸ
watch_type: ᴺᵁᴸᴸ
watch_state: ᴺᵁᴸᴸ
path_created:
stat_czxid: 0
stat_mzxid: 0
stat_pzxid: 0
stat_version: 0
stat_cversion: 0
stat_dataLength: 0
stat_numChildren: 0
children: []
Row 2:
──────
type: Response
event_date: 2021-08-09
event_time: 2021-08-09 21:38:30.292086
address: ::
port: 2181
session_id: 106662742089334927
xid: 10858
has_watch: 1
op_num: List
path: /clickhouse/task_queue/ddl
data:
is_ephemeral: 0
is_sequential: 0
version: ᴺᵁᴸᴸ
requests_size: 0
request_idx: 0
zxid: 16926267
error: ZOK
watch_type: ᴺᵁᴸᴸ
watch_state: ᴺᵁᴸᴸ
path_created:
stat_czxid: 16925469
stat_mzxid: 16925469
stat_pzxid: 16926179
stat_version: 0
stat_cversion: 7
stat_dataLength: 0
stat_numChildren: 7
children: ['query-0000000006','query-0000000005','query-0000000004','query-0000000003','query-0000000002','query-0000000001','query-0000000000']
```
**См. также**
- [ZooKeeper](../../operations/tips.md#zookeeper)
- [Руководство по ZooKeeper](https://zookeeper.apache.org/doc/r3.3.3/zookeeperProgrammers.html)

View File

@ -193,6 +193,40 @@ SELECT geoToH3(37.79506683, 55.71290588, 15) as h3Index;
└────────────────────┘ └────────────────────┘
``` ```
## h3ToGeo {#h3togeo}
Возвращает географические координаты долготы и широты, соответствующие указанному [H3](#h3index)-индексу.
**Синтаксис**
``` sql
h3ToGeo(h3Index)
```
**Аргументы**
- `h3Index` — [H3](#h3index)-индекс. [UInt64](../../../sql-reference/data-types/int-uint.md).
**Возвращаемые значения**
- кортеж из двух значений: `tuple(lon,lat)`, где `lon` — долгота [Float64](../../../sql-reference/data-types/float.md), `lat` — широта [Float64](../../../sql-reference/data-types/float.md).
**Пример**
Запрос:
``` sql
SELECT h3ToGeo(644325524701193974) coordinates;
```
Результат:
``` text
┌─coordinates───────────────────────────┐
│ (37.79506616830252,55.71290243145668) │
└───────────────────────────────────────┘
```
## h3kRing {#h3kring} ## h3kRing {#h3kring}
Возвращает [H3](#h3index)-индексы шестигранников в радиусе `k` от данного в произвольном порядке. Возвращает [H3](#h3index)-индексы шестигранников в радиусе `k` от данного в произвольном порядке.

View File

@ -385,4 +385,32 @@ ExpressionTransform
NumbersMt × 2 0 → 1 NumbersMt × 2 0 → 1
``` ```
### EXPLAIN ESTIMATE {#explain-estimate}
Отображает оценки числа строк, засечек и кусков, которые будут прочитаны при выполнении запроса. Применяется для таблиц семейства [MergeTree](../../engines/table-engines/mergetree-family/mergetree.md#table_engines-mergetree).
**Пример**
Создадим таблицу:
```sql
CREATE TABLE ttt (i Int64) ENGINE = MergeTree() ORDER BY i SETTINGS index_granularity = 16, write_final_mark = 0;
INSERT INTO ttt SELECT number FROM numbers(128);
OPTIMIZE TABLE ttt;
```
Запрос:
```sql
EXPLAIN ESTIMATE SELECT * FROM ttt;
```
Результат:
```text
┌─database─┬─table─┬─parts─┬─rows─┬─marks─┐
│ default │ ttt │ 1 │ 128 │ 8 │
└──────────┴───────┴───────┴──────┴───────┘
```
[Оригинальная статья](https://clickhouse.tech/docs/ru/sql-reference/statements/explain/) <!--hide--> [Оригинальная статья](https://clickhouse.tech/docs/ru/sql-reference/statements/explain/) <!--hide-->

View File

@ -36,6 +36,7 @@ toc_title: SYSTEM
- [START REPLICATION QUEUES](#query_language-system-start-replication-queues) - [START REPLICATION QUEUES](#query_language-system-start-replication-queues)
- [SYNC REPLICA](#query_language-system-sync-replica) - [SYNC REPLICA](#query_language-system-sync-replica)
- [RESTART REPLICA](#query_language-system-restart-replica) - [RESTART REPLICA](#query_language-system-restart-replica)
- [RESTORE REPLICA](#query_language-system-restore-replica)
- [RESTART REPLICAS](#query_language-system-restart-replicas) - [RESTART REPLICAS](#query_language-system-restart-replicas)
## RELOAD EMBEDDED DICTIONARIES] {#query_language-system-reload-emdedded-dictionaries} ## RELOAD EMBEDDED DICTIONARIES] {#query_language-system-reload-emdedded-dictionaries}
@ -287,13 +288,66 @@ SYSTEM SYNC REPLICA [db.]replicated_merge_tree_family_table_name
### RESTART REPLICA {#query_language-system-restart-replica} ### RESTART REPLICA {#query_language-system-restart-replica}
Реинициализация состояния Zookeeper-сессий для таблицы семейства `ReplicatedMergeTree`. Сравнивает текущее состояние с тем, что хранится в Zookeeper, как источник правды, и добавляет задачи в очередь репликации в Zookeeper, если необходимо. Реинициализирует состояние сессий Zookeeper для таблицы семейства `ReplicatedMergeTree`. Сравнивает текущее состояние с состоянием в Zookeeper (как с эталоном) и при необходимости добавляет задачи в очередь репликации в Zookeeper.
Инициализация очереди репликации на основе данных ZooKeeper происходит так же, как при attach table. На короткое время таблица станет недоступной для любых операций. Инициализация очереди репликации на основе данных ZooKeeper происходит так же, как при `ATTACH TABLE`. Некоторое время таблица будет недоступна для любых операций.
``` sql ``` sql
SYSTEM RESTART REPLICA [db.]replicated_merge_tree_family_table_name SYSTEM RESTART REPLICA [db.]replicated_merge_tree_family_table_name
``` ```
### RESTORE REPLICA {#query_language-system-restore-replica}
Восстанавливает реплику, если метаданные в Zookeeper потеряны, но сами данные возможно существуют.
Работает только с таблицами семейства `ReplicatedMergeTree` и только если таблица находится в readonly-режиме.
Запрос можно выполнить если:
- потерян корневой путь ZooKeeper `/`;
- потерян путь реплик `/replicas`;
- потерян путь конкретной реплики `/replicas/replica_name/`.
К реплике прикрепляются локально найденные куски, информация о них отправляется в Zookeeper.
Если присутствующие в реплике до потери метаданных данные не устарели, они не скачиваются повторно с других реплик. Поэтому восстановление реплики не означает повторную загрузку всех данных по сети.
!!! warning "Предупреждение"
Потерянные данные в любых состояниях перемещаются в папку `detached/`. Куски, активные до потери данных (находившиеся в состоянии Committed), прикрепляются.
**Синтаксис**
```sql
SYSTEM RESTORE REPLICA [db.]replicated_merge_tree_family_table_name [ON CLUSTER cluster_name]
```
Альтернативный синтаксис:
```sql
SYSTEM RESTORE REPLICA [ON CLUSTER cluster_name] [db.]replicated_merge_tree_family_table_name
```
**Пример**
Создание таблицы на нескольких серверах. После потери корневого пути реплики таблица будет прикреплена только для чтения, так как метаданные отсутствуют. Последний запрос необходимо выполнить на каждой реплике.
```sql
CREATE TABLE test(n UInt32)
ENGINE = ReplicatedMergeTree('/clickhouse/tables/test/', '{replica}')
ORDER BY n PARTITION BY n % 10;
INSERT INTO test SELECT * FROM numbers(1000);
-- zookeeper_delete_path("/clickhouse/tables/test", recursive=True) <- root loss.
SYSTEM RESTART REPLICA test;
SYSTEM RESTORE REPLICA test;
```
Альтернативный способ:
```sql
SYSTEM RESTORE REPLICA test ON CLUSTER cluster;
```
### RESTART REPLICAS {#query_language-system-restart-replicas} ### RESTART REPLICAS {#query_language-system-restart-replicas}
Реинициализация состояния ZooKeeper-сессий для всех `ReplicatedMergeTree` таблиц. Сравнивает текущее состояние реплики с тем, что хранится в ZooKeeper, как c источником правды, и добавляет задачи в очередь репликации в ZooKeeper, если необходимо. Реинициализация состояния ZooKeeper-сессий для всех `ReplicatedMergeTree` таблиц. Сравнивает текущее состояние реплики с тем, что хранится в ZooKeeper, как c источником правды, и добавляет задачи в очередь репликации в ZooKeeper, если необходимо.

View File

@ -5,22 +5,44 @@ toc_title: cluster
# cluster, clusterAllReplicas {#cluster-clusterallreplicas} # cluster, clusterAllReplicas {#cluster-clusterallreplicas}
Позволяет обратиться ко всем серверам существующего кластера, который присутствует в таблице `system.clusters` и сконфигурирован в секцци `remote_servers` без создания таблицы типа `Distributed`. Позволяет обратиться ко всем шардам существующего кластера, который сконфигурирован в секции `remote_servers` без создания таблицы типа [Distributed](../../engines/table-engines/special/distributed.md). В запросе используется одна реплика каждого шарда.
`clusterAllReplicas` - работает также как `cluster` но каждая реплика в кластере будет использована как отдельный шард/отдельное соединение.
Функция `clusterAllReplicas` работает также как `cluster`, но каждая реплика в кластере используется как отдельный шард/отдельное соединение.
Сигнатуры: !!! note "Примечание"
Все доступные кластеры перечислены в таблице [system.clusters](../../operations/system-tables/clusters.md).
**Синтаксис**
``` sql ``` sql
cluster('cluster_name', db.table) cluster('cluster_name', db.table[, sharding_key])
cluster('cluster_name', db, table) cluster('cluster_name', db, table[, sharding_key])
clusterAllReplicas('cluster_name', db.table) clusterAllReplicas('cluster_name', db.table[, sharding_key])
clusterAllReplicas('cluster_name', db, table) clusterAllReplicas('cluster_name', db, table[, sharding_key])
```
**Аргументы**
- `cluster_name` имя кластера, который обозначает подмножество адресов и параметров подключения к удаленным и локальным серверам, входящим в кластер.
- `db.table` или `db`, `table` - имя базы данных и таблицы.
- `sharding_key` - ключ шардирования. Необязательный аргумент. Указывается, если данные добавляются более чем в один шард кластера.
**Возвращаемое значение**
Набор данных из кластеров.
**Использование макросов**
`cluster_name` может содержать макрос — подстановку в фигурных скобках. Эта подстановка заменяется на соответствующее значение из секции [macros](../../operations/server-configuration-parameters/settings.md#macros) конфигурационного файла .
Пример:
```sql
SELECT * FROM cluster('{cluster}', default.example_table);
``` ```
`cluster_name` имя кластера, который обязан присутствовать в таблице `system.clusters` и обозначает подмножество адресов и параметров подключения к удаленным и локальным серверам, входящим в кластер. **Использование и рекомендации**
Использование табличных функций `cluster` и `clusterAllReplicas` менее оптимальное чем создание таблицы типа `Distributed`, поскольку в этом случае соединение с сервером переустанавливается на каждый запрос. При обработке большого количества запросов, всегда создавайте `Distributed` таблицу заранее и не используйте табличные функции `cluster` и `clusterAllReplicas`. Использование табличных функций `cluster` и `clusterAllReplicas` менее оптимально, чем создание таблицы типа `Distributed`, поскольку в этом случае при каждом новом запросе устанавливается новое соединение с сервером. При обработке большого количества запросов всегда создавайте `Distributed` таблицу заранее и не используйте табличные функции `cluster` и `clusterAllReplicas`.
Табличные функции `cluster` and `clusterAllReplicas` могут быть полезны в следующих случаях: Табличные функции `cluster` and `clusterAllReplicas` могут быть полезны в следующих случаях:
@ -30,7 +52,7 @@ clusterAllReplicas('cluster_name', db, table)
Настройки соединения `user`, `password`, `host`, `post`, `compression`, `secure` берутся из секции `<remote_servers>` файлов конфигурации. См. подробности в разделе [Distributed](../../engines/table-engines/special/distributed.md) Настройки соединения `user`, `password`, `host`, `post`, `compression`, `secure` берутся из секции `<remote_servers>` файлов конфигурации. См. подробности в разделе [Distributed](../../engines/table-engines/special/distributed.md)
**See Also** **См. также**
- [skip_unavailable_shards](../../operations/settings/settings.md#settings-skip_unavailable_shards) - [skip_unavailable_shards](../../operations/settings/settings.md#settings-skip_unavailable_shards)
- [load_balancing](../../operations/settings/settings.md#settings-load_balancing) - [load_balancing](../../operations/settings/settings.md#settings-load_balancing)

View File

@ -1,6 +1,4 @@
--- ---
machine_translated: true
machine_translated_rev: 72537a2d527c63c07aa5d2361a8829f3895cf2bd
toc_priority: 49 toc_priority: 49
toc_title: "\u6570\u636E\u5907\u4EFD" toc_title: "\u6570\u636E\u5907\u4EFD"
--- ---
@ -36,7 +34,7 @@ ClickHouse允许使用 `ALTER TABLE ... FREEZE PARTITION ...` 查询以创建表
数据可以使用 `ALTER TABLE ... ATTACH PARTITION ...` 从备份中恢复。 数据可以使用 `ALTER TABLE ... ATTACH PARTITION ...` 从备份中恢复。
有关与分区操作相关的查询的详细信息,请参阅 [更改文档] (../sql-reference/statements/alter.md#alter_manipulations-with-partitions). 有关与分区操作相关的查询的详细信息,请参阅 [更改文档](../sql-reference/statements/alter.md#alter_manipulations-with-partitions).
第三方工具可用于自动化此方法: [clickhouse-backup](https://github.com/AlexAkulov/clickhouse-backup). 第三方工具可用于自动化此方法: [clickhouse-backup](https://github.com/AlexAkulov/clickhouse-backup).

View File

@ -1,9 +1,5 @@
---
machine_translated: true
machine_translated_rev: 5decc73b5dc60054f19087d3690c4eb99446a6c3
---
# 系统。data_type_families {#system_tables-data_type_families} # system.data_type_families {#system_tables-data_type_families}
包含有关受支持的[数据类型](../../sql-reference/data-types/)的信息. 包含有关受支持的[数据类型](../../sql-reference/data-types/)的信息.

View File

@ -26,7 +26,7 @@ toc_title: "常见问题"
### 服务器未运行 {#server-is-not-running} ### 服务器未运行 {#server-is-not-running}
**检查服务器是否运行nnig** **检查服务器是否正在运行**
命令: 命令:

View File

@ -2,6 +2,7 @@
#include "Common/MemoryTracker.h" #include "Common/MemoryTracker.h"
#include "Columns/ColumnsNumber.h" #include "Columns/ColumnsNumber.h"
#include "ConnectionParameters.h" #include "ConnectionParameters.h"
#include "IO/CompressionMethod.h"
#include "QueryFuzzer.h" #include "QueryFuzzer.h"
#include "Suggest.h" #include "Suggest.h"
#include "TestHint.h" #include "TestHint.h"
@ -1823,7 +1824,7 @@ private:
void processInsertQuery() void processInsertQuery()
{ {
const auto parsed_insert_query = parsed_query->as<ASTInsertQuery &>(); const auto parsed_insert_query = parsed_query->as<ASTInsertQuery &>();
if (!parsed_insert_query.data && (is_interactive || (!stdin_is_a_tty && std_in.eof()))) if ((!parsed_insert_query.data && !parsed_insert_query.infile) && (is_interactive || (!stdin_is_a_tty && std_in.eof())))
throw Exception("No data to insert", ErrorCodes::NO_DATA_TO_INSERT); throw Exception("No data to insert", ErrorCodes::NO_DATA_TO_INSERT);
connection->sendQuery( connection->sendQuery(
@ -1894,7 +1895,24 @@ private:
if (!parsed_insert_query) if (!parsed_insert_query)
return; return;
if (parsed_insert_query->data) if (parsed_insert_query->infile)
{
const auto & in_file_node = parsed_insert_query->infile->as<ASTLiteral &>();
const auto in_file = in_file_node.value.safeGet<std::string>();
auto in_buffer = wrapReadBufferWithCompressionMethod(std::make_unique<ReadBufferFromFile>(in_file), chooseCompressionMethod(in_file, ""));
try
{
sendDataFrom(*in_buffer, sample, columns_description);
}
catch (Exception & e)
{
e.addMessage("data for INSERT was parsed from file");
throw;
}
}
else if (parsed_insert_query->data)
{ {
/// Send data contained in the query. /// Send data contained in the query.
ReadBufferFromMemory data_in(parsed_insert_query->data, parsed_insert_query->end - parsed_insert_query->data); ReadBufferFromMemory data_in(parsed_insert_query->data, parsed_insert_query->end - parsed_insert_query->data);

View File

@ -17,6 +17,7 @@
#include <Poco/Version.h> #include <Poco/Version.h>
#include <Poco/Environment.h> #include <Poco/Environment.h>
#include <Common/getMultipleKeysFromConfig.h> #include <Common/getMultipleKeysFromConfig.h>
#include <Core/ServerUUID.h>
#include <filesystem> #include <filesystem>
#include <IO/UseSSL.h> #include <IO/UseSSL.h>
@ -326,6 +327,8 @@ int Keeper::main(const std::vector<std::string> & /*args*/)
} }
} }
DB::ServerUUID::load(path + "/uuid", log);
const Settings & settings = global_context->getSettingsRef(); const Settings & settings = global_context->getSettingsRef();
GlobalThreadPool::initialize(config().getUInt("max_thread_pool_size", 100)); GlobalThreadPool::initialize(config().getUInt("max_thread_pool_size", 100));

View File

@ -12,6 +12,7 @@
#include <Interpreters/executeQuery.h> #include <Interpreters/executeQuery.h>
#include <Interpreters/loadMetadata.h> #include <Interpreters/loadMetadata.h>
#include <Interpreters/DatabaseCatalog.h> #include <Interpreters/DatabaseCatalog.h>
#include <Interpreters/Session.h>
#include <Common/Exception.h> #include <Common/Exception.h>
#include <Common/Macros.h> #include <Common/Macros.h>
#include <Common/Config/ConfigProcessor.h> #include <Common/Config/ConfigProcessor.h>
@ -374,14 +375,13 @@ void LocalServer::processQueries()
if (!parse_res.second) if (!parse_res.second)
throw Exception("Cannot parse and execute the following part of query: " + String(parse_res.first), ErrorCodes::SYNTAX_ERROR); throw Exception("Cannot parse and execute the following part of query: " + String(parse_res.first), ErrorCodes::SYNTAX_ERROR);
/// we can't mutate global global_context (can lead to races, as it was already passed to some background threads) /// Authenticate and create a context to execute queries.
/// so we can't reuse it safely as a query context and need a copy here Session session{global_context, ClientInfo::Interface::TCP};
auto context = Context::createCopy(global_context); session.authenticate("default", "", Poco::Net::SocketAddress{});
context->makeSessionContext(); /// Use the same context for all queries.
context->makeQueryContext(); auto context = session.makeQueryContext();
context->makeSessionContext(); /// initial_create_query requires a session context to be set.
context->setUser("default", "", Poco::Net::SocketAddress{});
context->setCurrentQueryId(""); context->setCurrentQueryId("");
applyCmdSettings(context); applyCmdSettings(context);

View File

@ -39,6 +39,7 @@
#include <Common/getMappedArea.h> #include <Common/getMappedArea.h>
#include <Common/remapExecutable.h> #include <Common/remapExecutable.h>
#include <Common/TLDListsHolder.h> #include <Common/TLDListsHolder.h>
#include <Core/ServerUUID.h>
#include <IO/HTTPCommon.h> #include <IO/HTTPCommon.h>
#include <IO/ReadHelpers.h> #include <IO/ReadHelpers.h>
#include <IO/UseSSL.h> #include <IO/UseSSL.h>
@ -79,7 +80,6 @@
#include <Server/HTTP/HTTPServer.h> #include <Server/HTTP/HTTPServer.h>
#include <filesystem> #include <filesystem>
#if !defined(ARCADIA_BUILD) #if !defined(ARCADIA_BUILD)
# include "config_core.h" # include "config_core.h"
# include "Common/config_version.h" # include "Common/config_version.h"
@ -146,7 +146,6 @@ static bool jemallocOptionEnabled(const char *name)
static bool jemallocOptionEnabled(const char *) { return 0; } static bool jemallocOptionEnabled(const char *) { return 0; }
#endif #endif
int mainEntryClickHouseServer(int argc, char ** argv) int mainEntryClickHouseServer(int argc, char ** argv)
{ {
DB::Server app; DB::Server app;
@ -667,13 +666,14 @@ if (ThreadFuzzer::instance().isEffective())
global_context->setRemoteHostFilter(config()); global_context->setRemoteHostFilter(config());
std::string path = getCanonicalPath(config().getString("path", DBMS_DEFAULT_PATH)); std::string path_str = getCanonicalPath(config().getString("path", DBMS_DEFAULT_PATH));
fs::path path = path_str;
std::string default_database = config().getString("default_database", "default"); std::string default_database = config().getString("default_database", "default");
/// Check that the process user id matches the owner of the data. /// Check that the process user id matches the owner of the data.
const auto effective_user_id = geteuid(); const auto effective_user_id = geteuid();
struct stat statbuf; struct stat statbuf;
if (stat(path.c_str(), &statbuf) == 0 && effective_user_id != statbuf.st_uid) if (stat(path_str.c_str(), &statbuf) == 0 && effective_user_id != statbuf.st_uid)
{ {
const auto effective_user = getUserName(effective_user_id); const auto effective_user = getUserName(effective_user_id);
const auto data_owner = getUserName(statbuf.st_uid); const auto data_owner = getUserName(statbuf.st_uid);
@ -690,9 +690,11 @@ if (ThreadFuzzer::instance().isEffective())
} }
} }
global_context->setPath(path); global_context->setPath(path_str);
StatusFile status{path + "status", StatusFile::write_full_info}; StatusFile status{path / "status", StatusFile::write_full_info};
DB::ServerUUID::load(path / "uuid", log);
/// Try to increase limit on number of open files. /// Try to increase limit on number of open files.
{ {
@ -726,7 +728,7 @@ if (ThreadFuzzer::instance().isEffective())
/// Storage with temporary data for processing of heavy queries. /// Storage with temporary data for processing of heavy queries.
{ {
std::string tmp_path = config().getString("tmp_path", path + "tmp/"); std::string tmp_path = config().getString("tmp_path", path / "tmp/");
std::string tmp_policy = config().getString("tmp_policy", ""); std::string tmp_policy = config().getString("tmp_policy", "");
const VolumePtr & volume = global_context->setTemporaryStorage(tmp_path, tmp_policy); const VolumePtr & volume = global_context->setTemporaryStorage(tmp_path, tmp_policy);
for (const DiskPtr & disk : volume->getDisks()) for (const DiskPtr & disk : volume->getDisks())
@ -738,7 +740,7 @@ if (ThreadFuzzer::instance().isEffective())
* Examples: do repair of local data; clone all replicated tables from replica. * Examples: do repair of local data; clone all replicated tables from replica.
*/ */
{ {
auto flags_path = fs::path(path) / "flags/"; auto flags_path = path / "flags/";
fs::create_directories(flags_path); fs::create_directories(flags_path);
global_context->setFlagsPath(flags_path); global_context->setFlagsPath(flags_path);
} }
@ -747,29 +749,29 @@ if (ThreadFuzzer::instance().isEffective())
*/ */
{ {
std::string user_files_path = config().getString("user_files_path", fs::path(path) / "user_files/"); std::string user_files_path = config().getString("user_files_path", path / "user_files/");
global_context->setUserFilesPath(user_files_path); global_context->setUserFilesPath(user_files_path);
fs::create_directories(user_files_path); fs::create_directories(user_files_path);
} }
{ {
std::string dictionaries_lib_path = config().getString("dictionaries_lib_path", fs::path(path) / "dictionaries_lib/"); std::string dictionaries_lib_path = config().getString("dictionaries_lib_path", path / "dictionaries_lib/");
global_context->setDictionariesLibPath(dictionaries_lib_path); global_context->setDictionariesLibPath(dictionaries_lib_path);
fs::create_directories(dictionaries_lib_path); fs::create_directories(dictionaries_lib_path);
} }
/// top_level_domains_lists /// top_level_domains_lists
{ {
const std::string & top_level_domains_path = config().getString("top_level_domains_path", fs::path(path) / "top_level_domains/"); const std::string & top_level_domains_path = config().getString("top_level_domains_path", path / "top_level_domains/");
TLDListsHolder::getInstance().parseConfig(fs::path(top_level_domains_path) / "", config()); TLDListsHolder::getInstance().parseConfig(fs::path(top_level_domains_path) / "", config());
} }
{ {
fs::create_directories(fs::path(path) / "data/"); fs::create_directories(path / "data/");
fs::create_directories(fs::path(path) / "metadata/"); fs::create_directories(path / "metadata/");
/// Directory with metadata of tables, which was marked as dropped by Atomic database /// Directory with metadata of tables, which was marked as dropped by Atomic database
fs::create_directories(fs::path(path) / "metadata_dropped/"); fs::create_directories(path / "metadata_dropped/");
} }
if (config().has("interserver_http_port") && config().has("interserver_https_port")) if (config().has("interserver_http_port") && config().has("interserver_https_port"))
@ -952,7 +954,7 @@ if (ThreadFuzzer::instance().isEffective())
#endif #endif
/// Set path for format schema files /// Set path for format schema files
fs::path format_schema_path(config().getString("format_schema_path", fs::path(path) / "format_schemas/")); fs::path format_schema_path(config().getString("format_schema_path", path / "format_schemas/"));
global_context->setFormatSchemaPath(format_schema_path); global_context->setFormatSchemaPath(format_schema_path);
fs::create_directories(format_schema_path); fs::create_directories(format_schema_path);
@ -1088,7 +1090,7 @@ if (ThreadFuzzer::instance().isEffective())
/// system logs may copy global context. /// system logs may copy global context.
global_context->setCurrentDatabaseNameInGlobalContext(default_database); global_context->setCurrentDatabaseNameInGlobalContext(default_database);
LOG_INFO(log, "Loading metadata from {}", path); LOG_INFO(log, "Loading metadata from {}", path_str);
try try
{ {
@ -1428,7 +1430,6 @@ if (ThreadFuzzer::instance().isEffective())
/// Must be done after initialization of `servers`, because async_metrics will access `servers` variable from its thread. /// Must be done after initialization of `servers`, because async_metrics will access `servers` variable from its thread.
async_metrics.start(); async_metrics.start();
global_context->enableNamedSessions();
{ {
String level_str = config().getString("text_log.level", ""); String level_str = config().getString("text_log.level", "");

View File

@ -1,6 +1,7 @@
#include <Access/AccessControlManager.h> #include <Access/AccessControlManager.h>
#include <Access/MultipleAccessStorage.h> #include <Access/MultipleAccessStorage.h>
#include <Access/MemoryAccessStorage.h> #include <Access/MemoryAccessStorage.h>
#include <Access/ReplicatedAccessStorage.h>
#include <Access/UsersConfigAccessStorage.h> #include <Access/UsersConfigAccessStorage.h>
#include <Access/DiskAccessStorage.h> #include <Access/DiskAccessStorage.h>
#include <Access/LDAPAccessStorage.h> #include <Access/LDAPAccessStorage.h>
@ -225,6 +226,22 @@ void AccessControlManager::startPeriodicReloadingUsersConfigs()
} }
} }
void AccessControlManager::addReplicatedStorage(
const String & storage_name_,
const String & zookeeper_path_,
const zkutil::GetZooKeeper & get_zookeeper_function_)
{
auto storages = getStoragesPtr();
for (const auto & storage : *storages)
{
if (auto replicated_storage = typeid_cast<std::shared_ptr<ReplicatedAccessStorage>>(storage))
return;
}
auto new_storage = std::make_shared<ReplicatedAccessStorage>(storage_name_, zookeeper_path_, get_zookeeper_function_);
addStorage(new_storage);
LOG_DEBUG(getLogger(), "Added {} access storage '{}'", String(new_storage->getStorageType()), new_storage->getStorageName());
new_storage->startup();
}
void AccessControlManager::addDiskStorage(const String & directory_, bool readonly_) void AccessControlManager::addDiskStorage(const String & directory_, bool readonly_)
{ {
@ -322,6 +339,11 @@ void AccessControlManager::addStoragesFromUserDirectoriesConfig(
{ {
addLDAPStorage(name, config, prefix); addLDAPStorage(name, config, prefix);
} }
else if (type == ReplicatedAccessStorage::STORAGE_TYPE)
{
String zookeeper_path = config.getString(prefix + ".zookeeper_path");
addReplicatedStorage(name, zookeeper_path, get_zookeeper_function);
}
else else
throw Exception("Unknown storage type '" + type + "' at " + prefix + " in config", ErrorCodes::UNKNOWN_ELEMENT_IN_CONFIG); throw Exception("Unknown storage type '" + type + "' at " + prefix + " in config", ErrorCodes::UNKNOWN_ELEMENT_IN_CONFIG);
} }

View File

@ -84,6 +84,10 @@ public:
/// Adds LDAPAccessStorage which allows querying remote LDAP server for user info. /// Adds LDAPAccessStorage which allows querying remote LDAP server for user info.
void addLDAPStorage(const String & storage_name_, const Poco::Util::AbstractConfiguration & config_, const String & prefix_); void addLDAPStorage(const String & storage_name_, const Poco::Util::AbstractConfiguration & config_, const String & prefix_);
void addReplicatedStorage(const String & storage_name,
const String & zookeeper_path,
const zkutil::GetZooKeeper & get_zookeeper_function);
/// Adds storages from <users_directories> config. /// Adds storages from <users_directories> config.
void addStoragesFromUserDirectoriesConfig(const Poco::Util::AbstractConfiguration & config, void addStoragesFromUserDirectoriesConfig(const Poco::Util::AbstractConfiguration & config,
const String & key, const String & key,

View File

@ -0,0 +1,175 @@
#include <Access/AccessEntityIO.h>
#include <Access/IAccessEntity.h>
#include <Access/IAccessStorage.h>
#include <Access/Quota.h>
#include <Access/Role.h>
#include <Access/RowPolicy.h>
#include <Access/SettingsProfile.h>
#include <Access/User.h>
#include <Core/Defines.h>
#include <Interpreters/InterpreterCreateQuotaQuery.h>
#include <Interpreters/InterpreterCreateRoleQuery.h>
#include <Interpreters/InterpreterCreateRowPolicyQuery.h>
#include <Interpreters/InterpreterCreateSettingsProfileQuery.h>
#include <Interpreters/InterpreterCreateUserQuery.h>
#include <Interpreters/InterpreterGrantQuery.h>
#include <Interpreters/InterpreterShowCreateAccessEntityQuery.h>
#include <Interpreters/InterpreterShowGrantsQuery.h>
#include <Parsers/ASTCreateQuotaQuery.h>
#include <Parsers/ASTCreateRoleQuery.h>
#include <Parsers/ASTCreateRowPolicyQuery.h>
#include <Parsers/ASTCreateSettingsProfileQuery.h>
#include <Parsers/ASTCreateUserQuery.h>
#include <Parsers/ASTGrantQuery.h>
#include <Parsers/ParserCreateQuotaQuery.h>
#include <Parsers/ParserCreateRoleQuery.h>
#include <Parsers/ParserCreateRowPolicyQuery.h>
#include <Parsers/ParserCreateSettingsProfileQuery.h>
#include <Parsers/ParserCreateUserQuery.h>
#include <Parsers/ParserGrantQuery.h>
#include <Parsers/formatAST.h>
#include <Parsers/parseQuery.h>
#include <boost/range/algorithm/copy.hpp>
#include <boost/range/algorithm_ext/push_back.hpp>
namespace DB
{
namespace ErrorCodes
{
extern const int INCORRECT_ACCESS_ENTITY_DEFINITION;
}
using EntityType = IAccessStorage::EntityType;
using EntityTypeInfo = IAccessStorage::EntityTypeInfo;
namespace
{
/// Special parser for the 'ATTACH access entity' queries.
class ParserAttachAccessEntity : public IParserBase
{
protected:
const char * getName() const override { return "ATTACH access entity query"; }
bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override
{
ParserCreateUserQuery create_user_p;
ParserCreateRoleQuery create_role_p;
ParserCreateRowPolicyQuery create_policy_p;
ParserCreateQuotaQuery create_quota_p;
ParserCreateSettingsProfileQuery create_profile_p;
ParserGrantQuery grant_p;
create_user_p.useAttachMode();
create_role_p.useAttachMode();
create_policy_p.useAttachMode();
create_quota_p.useAttachMode();
create_profile_p.useAttachMode();
grant_p.useAttachMode();
return create_user_p.parse(pos, node, expected) || create_role_p.parse(pos, node, expected)
|| create_policy_p.parse(pos, node, expected) || create_quota_p.parse(pos, node, expected)
|| create_profile_p.parse(pos, node, expected) || grant_p.parse(pos, node, expected);
}
};
}
String serializeAccessEntity(const IAccessEntity & entity)
{
/// Build list of ATTACH queries.
ASTs queries;
queries.push_back(InterpreterShowCreateAccessEntityQuery::getAttachQuery(entity));
if ((entity.getType() == EntityType::USER) || (entity.getType() == EntityType::ROLE))
boost::range::push_back(queries, InterpreterShowGrantsQuery::getAttachGrantQueries(entity));
/// Serialize the list of ATTACH queries to a string.
WriteBufferFromOwnString buf;
for (const ASTPtr & query : queries)
{
formatAST(*query, buf, false, true);
buf.write(";\n", 2);
}
return buf.str();
}
AccessEntityPtr deserializeAccessEntity(const String & definition, const String & path)
{
ASTs queries;
ParserAttachAccessEntity parser;
const char * begin = definition.data(); /// begin of current query
const char * pos = begin; /// parser moves pos from begin to the end of current query
const char * end = begin + definition.size();
while (pos < end)
{
queries.emplace_back(parseQueryAndMovePosition(parser, pos, end, "", true, 0, DBMS_DEFAULT_MAX_PARSER_DEPTH));
while (isWhitespaceASCII(*pos) || *pos == ';')
++pos;
}
/// Interpret the AST to build an access entity.
std::shared_ptr<User> user;
std::shared_ptr<Role> role;
std::shared_ptr<RowPolicy> policy;
std::shared_ptr<Quota> quota;
std::shared_ptr<SettingsProfile> profile;
AccessEntityPtr res;
for (const auto & query : queries)
{
if (auto * create_user_query = query->as<ASTCreateUserQuery>())
{
if (res)
throw Exception("Two access entities attached in " + path, ErrorCodes::INCORRECT_ACCESS_ENTITY_DEFINITION);
res = user = std::make_unique<User>();
InterpreterCreateUserQuery::updateUserFromQuery(*user, *create_user_query);
}
else if (auto * create_role_query = query->as<ASTCreateRoleQuery>())
{
if (res)
throw Exception("Two access entities attached in " + path, ErrorCodes::INCORRECT_ACCESS_ENTITY_DEFINITION);
res = role = std::make_unique<Role>();
InterpreterCreateRoleQuery::updateRoleFromQuery(*role, *create_role_query);
}
else if (auto * create_policy_query = query->as<ASTCreateRowPolicyQuery>())
{
if (res)
throw Exception("Two access entities attached in " + path, ErrorCodes::INCORRECT_ACCESS_ENTITY_DEFINITION);
res = policy = std::make_unique<RowPolicy>();
InterpreterCreateRowPolicyQuery::updateRowPolicyFromQuery(*policy, *create_policy_query);
}
else if (auto * create_quota_query = query->as<ASTCreateQuotaQuery>())
{
if (res)
throw Exception("Two access entities attached in " + path, ErrorCodes::INCORRECT_ACCESS_ENTITY_DEFINITION);
res = quota = std::make_unique<Quota>();
InterpreterCreateQuotaQuery::updateQuotaFromQuery(*quota, *create_quota_query);
}
else if (auto * create_profile_query = query->as<ASTCreateSettingsProfileQuery>())
{
if (res)
throw Exception("Two access entities attached in " + path, ErrorCodes::INCORRECT_ACCESS_ENTITY_DEFINITION);
res = profile = std::make_unique<SettingsProfile>();
InterpreterCreateSettingsProfileQuery::updateSettingsProfileFromQuery(*profile, *create_profile_query);
}
else if (auto * grant_query = query->as<ASTGrantQuery>())
{
if (!user && !role)
throw Exception(
"A user or role should be attached before grant in " + path, ErrorCodes::INCORRECT_ACCESS_ENTITY_DEFINITION);
if (user)
InterpreterGrantQuery::updateUserFromQuery(*user, *grant_query);
else
InterpreterGrantQuery::updateRoleFromQuery(*role, *grant_query);
}
else
throw Exception("No interpreter found for query " + query->getID(), ErrorCodes::INCORRECT_ACCESS_ENTITY_DEFINITION);
}
if (!res)
throw Exception("No access entities attached in " + path, ErrorCodes::INCORRECT_ACCESS_ENTITY_DEFINITION);
return res;
}
}

View File

@ -0,0 +1,12 @@
#pragma once
#include <Access/IAccessEntity.h>
namespace DB
{
String serializeAccessEntity(const IAccessEntity & entity);
AccessEntityPtr deserializeAccessEntity(const String & definition, const String & path);
}

View File

@ -70,6 +70,7 @@ public:
/// Returns the current user. The function can return nullptr. /// Returns the current user. The function can return nullptr.
UserPtr getUser() const; UserPtr getUser() const;
String getUserName() const; String getUserName() const;
std::optional<UUID> getUserID() const { return getParams().user_id; }
/// Returns information about current and enabled roles. /// Returns information about current and enabled roles.
std::shared_ptr<const EnabledRolesInfo> getRolesInfo() const; std::shared_ptr<const EnabledRolesInfo> getRolesInfo() const;

View File

@ -26,6 +26,8 @@ protected:
String user_name; String user_name;
}; };
/// Does not check the password/credentials and that the specified host is allowed.
/// (Used only internally in cluster, if the secret matches)
class AlwaysAllowCredentials class AlwaysAllowCredentials
: public Credentials : public Credentials
{ {

View File

@ -4,41 +4,20 @@
#include <IO/ReadBufferFromFile.h> #include <IO/ReadBufferFromFile.h>
#include <IO/WriteBufferFromFile.h> #include <IO/WriteBufferFromFile.h>
#include <IO/ReadBufferFromString.h> #include <IO/ReadBufferFromString.h>
#include <Access/AccessEntityIO.h>
#include <Access/User.h> #include <Access/User.h>
#include <Access/Role.h> #include <Access/Role.h>
#include <Access/RowPolicy.h> #include <Access/RowPolicy.h>
#include <Access/Quota.h> #include <Access/Quota.h>
#include <Access/SettingsProfile.h>
#include <Parsers/ASTCreateUserQuery.h> #include <Parsers/ASTCreateUserQuery.h>
#include <Parsers/ASTCreateRoleQuery.h>
#include <Parsers/ASTCreateRowPolicyQuery.h>
#include <Parsers/ASTCreateQuotaQuery.h>
#include <Parsers/ASTCreateSettingsProfileQuery.h>
#include <Parsers/ASTGrantQuery.h>
#include <Parsers/ParserCreateUserQuery.h>
#include <Parsers/ParserCreateRoleQuery.h>
#include <Parsers/ParserCreateRowPolicyQuery.h>
#include <Parsers/ParserCreateQuotaQuery.h>
#include <Parsers/ParserCreateSettingsProfileQuery.h>
#include <Parsers/ParserGrantQuery.h>
#include <Parsers/formatAST.h> #include <Parsers/formatAST.h>
#include <Parsers/parseQuery.h>
#include <Interpreters/InterpreterCreateUserQuery.h> #include <Interpreters/InterpreterCreateUserQuery.h>
#include <Interpreters/InterpreterCreateRoleQuery.h>
#include <Interpreters/InterpreterCreateRowPolicyQuery.h>
#include <Interpreters/InterpreterCreateQuotaQuery.h>
#include <Interpreters/InterpreterCreateSettingsProfileQuery.h>
#include <Interpreters/InterpreterGrantQuery.h>
#include <Interpreters/InterpreterShowCreateAccessEntityQuery.h>
#include <Interpreters/InterpreterShowGrantsQuery.h> #include <Interpreters/InterpreterShowGrantsQuery.h>
#include <Common/quoteString.h> #include <Common/quoteString.h>
#include <Core/Defines.h>
#include <Poco/JSON/JSON.h> #include <Poco/JSON/JSON.h>
#include <Poco/JSON/Object.h> #include <Poco/JSON/Object.h>
#include <Poco/JSON/Stringifier.h> #include <Poco/JSON/Stringifier.h>
#include <boost/range/adaptor/map.hpp> #include <boost/range/adaptor/map.hpp>
#include <boost/range/algorithm/copy.hpp>
#include <boost/range/algorithm_ext/push_back.hpp>
#include <filesystem> #include <filesystem>
#include <fstream> #include <fstream>
@ -49,7 +28,6 @@ namespace ErrorCodes
{ {
extern const int DIRECTORY_DOESNT_EXIST; extern const int DIRECTORY_DOESNT_EXIST;
extern const int FILE_DOESNT_EXIST; extern const int FILE_DOESNT_EXIST;
extern const int INCORRECT_ACCESS_ENTITY_DEFINITION;
} }
@ -58,34 +36,6 @@ namespace
using EntityType = IAccessStorage::EntityType; using EntityType = IAccessStorage::EntityType;
using EntityTypeInfo = IAccessStorage::EntityTypeInfo; using EntityTypeInfo = IAccessStorage::EntityTypeInfo;
/// Special parser for the 'ATTACH access entity' queries.
class ParserAttachAccessEntity : public IParserBase
{
protected:
const char * getName() const override { return "ATTACH access entity query"; }
bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override
{
ParserCreateUserQuery create_user_p;
ParserCreateRoleQuery create_role_p;
ParserCreateRowPolicyQuery create_policy_p;
ParserCreateQuotaQuery create_quota_p;
ParserCreateSettingsProfileQuery create_profile_p;
ParserGrantQuery grant_p;
create_user_p.useAttachMode();
create_role_p.useAttachMode();
create_policy_p.useAttachMode();
create_quota_p.useAttachMode();
create_profile_p.useAttachMode();
grant_p.useAttachMode();
return create_user_p.parse(pos, node, expected) || create_role_p.parse(pos, node, expected)
|| create_policy_p.parse(pos, node, expected) || create_quota_p.parse(pos, node, expected)
|| create_profile_p.parse(pos, node, expected) || grant_p.parse(pos, node, expected);
}
};
/// Reads a file containing ATTACH queries and then parses it to build an access entity. /// Reads a file containing ATTACH queries and then parses it to build an access entity.
AccessEntityPtr readEntityFile(const String & file_path) AccessEntityPtr readEntityFile(const String & file_path)
@ -96,80 +46,7 @@ namespace
readStringUntilEOF(file_contents, in); readStringUntilEOF(file_contents, in);
/// Parse the file contents. /// Parse the file contents.
ASTs queries; return deserializeAccessEntity(file_contents, file_path);
ParserAttachAccessEntity parser;
const char * begin = file_contents.data(); /// begin of current query
const char * pos = begin; /// parser moves pos from begin to the end of current query
const char * end = begin + file_contents.size();
while (pos < end)
{
queries.emplace_back(parseQueryAndMovePosition(parser, pos, end, "", true, 0, DBMS_DEFAULT_MAX_PARSER_DEPTH));
while (isWhitespaceASCII(*pos) || *pos == ';')
++pos;
}
/// Interpret the AST to build an access entity.
std::shared_ptr<User> user;
std::shared_ptr<Role> role;
std::shared_ptr<RowPolicy> policy;
std::shared_ptr<Quota> quota;
std::shared_ptr<SettingsProfile> profile;
AccessEntityPtr res;
for (const auto & query : queries)
{
if (auto * create_user_query = query->as<ASTCreateUserQuery>())
{
if (res)
throw Exception("Two access entities in one file " + file_path, ErrorCodes::INCORRECT_ACCESS_ENTITY_DEFINITION);
res = user = std::make_unique<User>();
InterpreterCreateUserQuery::updateUserFromQuery(*user, *create_user_query);
}
else if (auto * create_role_query = query->as<ASTCreateRoleQuery>())
{
if (res)
throw Exception("Two access entities in one file " + file_path, ErrorCodes::INCORRECT_ACCESS_ENTITY_DEFINITION);
res = role = std::make_unique<Role>();
InterpreterCreateRoleQuery::updateRoleFromQuery(*role, *create_role_query);
}
else if (auto * create_policy_query = query->as<ASTCreateRowPolicyQuery>())
{
if (res)
throw Exception("Two access entities in one file " + file_path, ErrorCodes::INCORRECT_ACCESS_ENTITY_DEFINITION);
res = policy = std::make_unique<RowPolicy>();
InterpreterCreateRowPolicyQuery::updateRowPolicyFromQuery(*policy, *create_policy_query);
}
else if (auto * create_quota_query = query->as<ASTCreateQuotaQuery>())
{
if (res)
throw Exception("Two access entities are attached in the same file " + file_path, ErrorCodes::INCORRECT_ACCESS_ENTITY_DEFINITION);
res = quota = std::make_unique<Quota>();
InterpreterCreateQuotaQuery::updateQuotaFromQuery(*quota, *create_quota_query);
}
else if (auto * create_profile_query = query->as<ASTCreateSettingsProfileQuery>())
{
if (res)
throw Exception("Two access entities are attached in the same file " + file_path, ErrorCodes::INCORRECT_ACCESS_ENTITY_DEFINITION);
res = profile = std::make_unique<SettingsProfile>();
InterpreterCreateSettingsProfileQuery::updateSettingsProfileFromQuery(*profile, *create_profile_query);
}
else if (auto * grant_query = query->as<ASTGrantQuery>())
{
if (!user && !role)
throw Exception("A user or role should be attached before grant in file " + file_path, ErrorCodes::INCORRECT_ACCESS_ENTITY_DEFINITION);
if (user)
InterpreterGrantQuery::updateUserFromQuery(*user, *grant_query);
else
InterpreterGrantQuery::updateRoleFromQuery(*role, *grant_query);
}
else
throw Exception("No interpreter found for query " + query->getID(), ErrorCodes::INCORRECT_ACCESS_ENTITY_DEFINITION);
}
if (!res)
throw Exception("No access entities attached in file " + file_path, ErrorCodes::INCORRECT_ACCESS_ENTITY_DEFINITION);
return res;
} }
@ -186,24 +63,10 @@ namespace
} }
} }
/// Writes ATTACH queries for building a specified access entity to a file. /// Writes ATTACH queries for building a specified access entity to a file.
void writeEntityFile(const String & file_path, const IAccessEntity & entity) void writeEntityFile(const String & file_path, const IAccessEntity & entity)
{ {
/// Build list of ATTACH queries. String file_contents = serializeAccessEntity(entity);
ASTs queries;
queries.push_back(InterpreterShowCreateAccessEntityQuery::getAttachQuery(entity));
if ((entity.getType() == EntityType::USER) || (entity.getType() == EntityType::ROLE))
boost::range::push_back(queries, InterpreterShowGrantsQuery::getAttachGrantQueries(entity));
/// Serialize the list of ATTACH queries to a string.
WriteBufferFromOwnString buf;
for (const ASTPtr & query : queries)
{
formatAST(*query, buf, false, true);
buf.write(";\n", 2);
}
String file_contents = buf.str();
/// First we save *.tmp file and then we rename if everything's ok. /// First we save *.tmp file and then we rename if everything's ok.
auto tmp_file_path = std::filesystem::path{file_path}.replace_extension(".tmp"); auto tmp_file_path = std::filesystem::path{file_path}.replace_extension(".tmp");

View File

@ -0,0 +1,618 @@
#include <Access/AccessEntityIO.h>
#include <Access/MemoryAccessStorage.h>
#include <Access/ReplicatedAccessStorage.h>
#include <IO/ReadHelpers.h>
#include <boost/container/flat_set.hpp>
#include <Common/ZooKeeper/KeeperException.h>
#include <Common/ZooKeeper/Types.h>
#include <Common/ZooKeeper/ZooKeeper.h>
#include <Common/escapeForFileName.h>
#include <common/range.h>
#include <common/sleep.h>
namespace DB
{
namespace ErrorCodes
{
extern const int BAD_ARGUMENTS;
extern const int NO_ZOOKEEPER;
}
static UUID parseUUID(const String & text)
{
UUID uuid = UUIDHelpers::Nil;
auto buffer = ReadBufferFromMemory(text.data(), text.length());
readUUIDText(uuid, buffer);
return uuid;
}
ReplicatedAccessStorage::ReplicatedAccessStorage(
const String & storage_name_,
const String & zookeeper_path_,
zkutil::GetZooKeeper get_zookeeper_)
: IAccessStorage(storage_name_)
, zookeeper_path(zookeeper_path_)
, get_zookeeper(get_zookeeper_)
{
if (zookeeper_path.empty())
throw Exception("ZooKeeper path must be non-empty", ErrorCodes::BAD_ARGUMENTS);
if (zookeeper_path.back() == '/')
zookeeper_path.resize(zookeeper_path.size() - 1);
/// If zookeeper chroot prefix is used, path should start with '/', because chroot concatenates without it.
if (zookeeper_path.front() != '/')
zookeeper_path = "/" + zookeeper_path;
}
ReplicatedAccessStorage::~ReplicatedAccessStorage()
{
ReplicatedAccessStorage::shutdown();
}
void ReplicatedAccessStorage::startup()
{
initializeZookeeper();
worker_thread = ThreadFromGlobalPool(&ReplicatedAccessStorage::runWorkerThread, this);
}
void ReplicatedAccessStorage::shutdown()
{
bool prev_stop_flag = stop_flag.exchange(true);
if (!prev_stop_flag)
{
/// Notify the worker thread to stop waiting for new queue items
refresh_queue.push(UUIDHelpers::Nil);
worker_thread.join();
}
}
template <typename Func>
static void retryOnZooKeeperUserError(size_t attempts, Func && function)
{
while (attempts > 0)
{
try
{
function();
return;
}
catch (zkutil::KeeperException & keeper_exception)
{
if (Coordination::isUserError(keeper_exception.code) && attempts > 1)
attempts -= 1;
else
throw;
}
}
}
UUID ReplicatedAccessStorage::insertImpl(const AccessEntityPtr & new_entity, bool replace_if_exists)
{
const UUID id = generateRandomID();
const EntityTypeInfo type_info = EntityTypeInfo::get(new_entity->getType());
const String & name = new_entity->getName();
LOG_DEBUG(getLogger(), "Inserting entity of type {} named {} with id {}", type_info.name, name, toString(id));
auto zookeeper = get_zookeeper();
retryOnZooKeeperUserError(10, [&]{ insertZooKeeper(zookeeper, id, new_entity, replace_if_exists); });
Notifications notifications;
SCOPE_EXIT({ notify(notifications); });
std::lock_guard lock{mutex};
refreshEntityNoLock(zookeeper, id, notifications);
return id;
}
void ReplicatedAccessStorage::insertZooKeeper(
const zkutil::ZooKeeperPtr & zookeeper, const UUID & id, const AccessEntityPtr & new_entity, bool replace_if_exists)
{
const String & name = new_entity->getName();
const EntityType type = new_entity->getType();
const EntityTypeInfo type_info = EntityTypeInfo::get(type);
const String entity_uuid = toString(id);
/// The entity data will be stored here, this ensures all entities have unique ids
const String entity_path = zookeeper_path + "/uuid/" + entity_uuid;
/// Then we create a znode with the entity name, inside the znode of each entity type
/// This ensure all entities of the same type have a unique name
const String name_path = zookeeper_path + "/" + type_info.unique_char + "/" + escapeForFileName(name);
Coordination::Requests ops;
const String new_entity_definition = serializeAccessEntity(*new_entity);
ops.emplace_back(zkutil::makeCreateRequest(entity_path, new_entity_definition, zkutil::CreateMode::Persistent));
/// The content of the "name" znode is the uuid of the entity owning that name
ops.emplace_back(zkutil::makeCreateRequest(name_path, entity_uuid, zkutil::CreateMode::Persistent));
Coordination::Responses responses;
const Coordination::Error res = zookeeper->tryMulti(ops, responses);
if (res == Coordination::Error::ZNODEEXISTS)
{
if (responses[0]->error == Coordination::Error::ZNODEEXISTS)
{
/// The UUID already exists, simply fail.
/// To fail with a nice error message, we need info about what already exists.
/// This itself could fail if the conflicting uuid disappears in the meantime.
/// If that happens, then we'll just retry from the start.
String existing_entity_definition = zookeeper->get(entity_path);
AccessEntityPtr existing_entity = deserializeAccessEntity(existing_entity_definition, entity_path);
EntityType existing_type = existing_entity->getType();
String existing_name = existing_entity->getName();
throwIDCollisionCannotInsert(id, type, name, existing_type, existing_name);
}
else if (replace_if_exists)
{
/// The name already exists for this type.
/// If asked to, we need to replace the existing entity.
/// First get the uuid of the existing entity
/// This itself could fail if the conflicting name disappears in the meantime.
/// If that happens, then we'll just retry from the start.
Coordination::Stat name_stat;
String existing_entity_uuid = zookeeper->get(name_path, &name_stat);
const String existing_entity_path = zookeeper_path + "/uuid/" + existing_entity_uuid;
Coordination::Requests replace_ops;
replace_ops.emplace_back(zkutil::makeRemoveRequest(existing_entity_path, -1));
replace_ops.emplace_back(zkutil::makeCreateRequest(entity_path, new_entity_definition, zkutil::CreateMode::Persistent));
replace_ops.emplace_back(zkutil::makeSetRequest(name_path, entity_uuid, name_stat.version));
/// If this fails, then we'll just retry from the start.
zookeeper->multi(replace_ops);
}
else
{
throwNameCollisionCannotInsert(type, name);
}
}
else
{
zkutil::KeeperMultiException::check(res, ops, responses);
}
}
void ReplicatedAccessStorage::removeImpl(const UUID & id)
{
LOG_DEBUG(getLogger(), "Removing entity {}", toString(id));
auto zookeeper = get_zookeeper();
retryOnZooKeeperUserError(10, [&] { removeZooKeeper(zookeeper, id); });
Notifications notifications;
SCOPE_EXIT({ notify(notifications); });
std::lock_guard lock{mutex};
removeEntityNoLock(id, notifications);
}
void ReplicatedAccessStorage::removeZooKeeper(const zkutil::ZooKeeperPtr & zookeeper, const UUID & id)
{
const String entity_uuid = toString(id);
const String entity_path = zookeeper_path + "/uuid/" + entity_uuid;
String entity_definition;
Coordination::Stat entity_stat;
const bool uuid_exists = zookeeper->tryGet(entity_path, entity_definition, &entity_stat);
if (!uuid_exists)
throwNotFound(id);
const AccessEntityPtr entity = deserializeAccessEntity(entity_definition, entity_path);
const EntityTypeInfo type_info = EntityTypeInfo::get(entity->getType());
const String & name = entity->getName();
const String entity_name_path = zookeeper_path + "/" + type_info.unique_char + "/" + escapeForFileName(name);
Coordination::Requests ops;
ops.emplace_back(zkutil::makeRemoveRequest(entity_path, entity_stat.version));
ops.emplace_back(zkutil::makeRemoveRequest(entity_name_path, -1));
/// If this fails, then we'll just retry from the start.
zookeeper->multi(ops);
}
void ReplicatedAccessStorage::updateImpl(const UUID & id, const UpdateFunc & update_func)
{
LOG_DEBUG(getLogger(), "Updating entity {}", toString(id));
auto zookeeper = get_zookeeper();
retryOnZooKeeperUserError(10, [&] { updateZooKeeper(zookeeper, id, update_func); });
Notifications notifications;
SCOPE_EXIT({ notify(notifications); });
std::lock_guard lock{mutex};
refreshEntityNoLock(zookeeper, id, notifications);
}
void ReplicatedAccessStorage::updateZooKeeper(const zkutil::ZooKeeperPtr & zookeeper, const UUID & id, const UpdateFunc & update_func)
{
const String entity_uuid = toString(id);
const String entity_path = zookeeper_path + "/uuid/" + entity_uuid;
String old_entity_definition;
Coordination::Stat stat;
const bool uuid_exists = zookeeper->tryGet(entity_path, old_entity_definition, &stat);
if (!uuid_exists)
throwNotFound(id);
const AccessEntityPtr old_entity = deserializeAccessEntity(old_entity_definition, entity_path);
const AccessEntityPtr new_entity = update_func(old_entity);
if (!new_entity->isTypeOf(old_entity->getType()))
throwBadCast(id, new_entity->getType(), new_entity->getName(), old_entity->getType());
const EntityTypeInfo type_info = EntityTypeInfo::get(new_entity->getType());
Coordination::Requests ops;
const String new_entity_definition = serializeAccessEntity(*new_entity);
ops.emplace_back(zkutil::makeSetRequest(entity_path, new_entity_definition, stat.version));
const String & old_name = old_entity->getName();
const String & new_name = new_entity->getName();
if (new_name != old_name)
{
auto old_name_path = zookeeper_path + "/" + type_info.unique_char + "/" + escapeForFileName(old_name);
auto new_name_path = zookeeper_path + "/" + type_info.unique_char + "/" + escapeForFileName(new_name);
ops.emplace_back(zkutil::makeRemoveRequest(old_name_path, -1));
ops.emplace_back(zkutil::makeCreateRequest(new_name_path, entity_uuid, zkutil::CreateMode::Persistent));
}
Coordination::Responses responses;
const Coordination::Error res = zookeeper->tryMulti(ops, responses);
if (res == Coordination::Error::ZNODEEXISTS)
{
throwNameCollisionCannotRename(new_entity->getType(), old_name, new_name);
}
else if (res == Coordination::Error::ZNONODE)
{
throwNotFound(id);
}
else
{
zkutil::KeeperMultiException::check(res, ops, responses);
}
}
void ReplicatedAccessStorage::runWorkerThread()
{
LOG_DEBUG(getLogger(), "Started worker thread");
while (!stop_flag)
{
try
{
if (!initialized)
initializeZookeeper();
refresh();
}
catch (...)
{
tryLogCurrentException(getLogger(), "Unexpected error, will try to restart worker thread:");
resetAfterError();
sleepForSeconds(5);
}
}
}
void ReplicatedAccessStorage::resetAfterError()
{
initialized = false;
UUID id;
while (refresh_queue.tryPop(id)) {}
std::lock_guard lock{mutex};
for (const auto type : collections::range(EntityType::MAX))
entries_by_name_and_type[static_cast<size_t>(type)].clear();
entries_by_id.clear();
}
void ReplicatedAccessStorage::initializeZookeeper()
{
assert(!initialized);
auto zookeeper = get_zookeeper();
if (!zookeeper)
throw Exception("Can't have Replicated access without ZooKeeper", ErrorCodes::NO_ZOOKEEPER);
createRootNodes(zookeeper);
refreshEntities(zookeeper);
initialized = true;
}
void ReplicatedAccessStorage::createRootNodes(const zkutil::ZooKeeperPtr & zookeeper)
{
zookeeper->createAncestors(zookeeper_path);
zookeeper->createIfNotExists(zookeeper_path, "");
zookeeper->createIfNotExists(zookeeper_path + "/uuid", "");
for (const auto type : collections::range(EntityType::MAX))
{
/// Create a znode for each type of AccessEntity
const auto type_info = EntityTypeInfo::get(type);
zookeeper->createIfNotExists(zookeeper_path + "/" + type_info.unique_char, "");
}
}
void ReplicatedAccessStorage::refresh()
{
UUID id;
if (refresh_queue.tryPop(id, /* timeout_ms: */ 10000))
{
if (stop_flag)
return;
auto zookeeper = get_zookeeper();
if (id == UUIDHelpers::Nil)
refreshEntities(zookeeper);
else
refreshEntity(zookeeper, id);
}
}
void ReplicatedAccessStorage::refreshEntities(const zkutil::ZooKeeperPtr & zookeeper)
{
LOG_DEBUG(getLogger(), "Refreshing entities list");
const String zookeeper_uuids_path = zookeeper_path + "/uuid";
auto watch_entities_list = [this](const Coordination::WatchResponse &)
{
refresh_queue.push(UUIDHelpers::Nil);
};
Coordination::Stat stat;
const auto entity_uuid_strs = zookeeper->getChildrenWatch(zookeeper_uuids_path, &stat, watch_entities_list);
std::unordered_set<UUID> entity_uuids;
entity_uuids.reserve(entity_uuid_strs.size());
for (const String & entity_uuid_str : entity_uuid_strs)
entity_uuids.insert(parseUUID(entity_uuid_str));
Notifications notifications;
SCOPE_EXIT({ notify(notifications); });
std::lock_guard lock{mutex};
std::vector<UUID> entities_to_remove;
/// Locally remove entities that were removed from ZooKeeper
for (const auto & pair : entries_by_id)
{
const UUID & entity_uuid = pair.first;
if (!entity_uuids.contains(entity_uuid))
entities_to_remove.push_back(entity_uuid);
}
for (const auto & entity_uuid : entities_to_remove)
removeEntityNoLock(entity_uuid, notifications);
/// Locally add entities that were added to ZooKeeper
for (const auto & entity_uuid : entity_uuids)
{
const auto it = entries_by_id.find(entity_uuid);
if (it == entries_by_id.end())
refreshEntityNoLock(zookeeper, entity_uuid, notifications);
}
LOG_DEBUG(getLogger(), "Refreshing entities list finished");
}
void ReplicatedAccessStorage::refreshEntity(const zkutil::ZooKeeperPtr & zookeeper, const UUID & id)
{
Notifications notifications;
SCOPE_EXIT({ notify(notifications); });
std::lock_guard lock{mutex};
refreshEntityNoLock(zookeeper, id, notifications);
}
void ReplicatedAccessStorage::refreshEntityNoLock(const zkutil::ZooKeeperPtr & zookeeper, const UUID & id, Notifications & notifications)
{
LOG_DEBUG(getLogger(), "Refreshing entity {}", toString(id));
const auto watch_entity = [this, id](const Coordination::WatchResponse & response)
{
if (response.type == Coordination::Event::CHANGED)
refresh_queue.push(id);
};
Coordination::Stat entity_stat;
const String entity_path = zookeeper_path + "/uuid/" + toString(id);
String entity_definition;
const bool exists = zookeeper->tryGetWatch(entity_path, entity_definition, &entity_stat, watch_entity);
if (exists)
{
const AccessEntityPtr entity = deserializeAccessEntity(entity_definition, entity_path);
setEntityNoLock(id, entity, notifications);
}
else
{
removeEntityNoLock(id, notifications);
}
}
void ReplicatedAccessStorage::setEntityNoLock(const UUID & id, const AccessEntityPtr & entity, Notifications & notifications)
{
LOG_DEBUG(getLogger(), "Setting id {} to entity named {}", toString(id), entity->getName());
const EntityType type = entity->getType();
const String & name = entity->getName();
/// If the type+name already exists and is a different entity, remove old entity
auto & entries_by_name = entries_by_name_and_type[static_cast<size_t>(type)];
if (auto it = entries_by_name.find(name); it != entries_by_name.end() && it->second->id != id)
{
removeEntityNoLock(it->second->id, notifications);
}
/// If the entity already exists under a different type+name, remove old type+name
if (auto it = entries_by_id.find(id); it != entries_by_id.end())
{
const AccessEntityPtr & existing_entity = it->second.entity;
const EntityType existing_type = existing_entity->getType();
const String & existing_name = existing_entity->getName();
if (existing_type != type || existing_name != name)
{
auto & existing_entries_by_name = entries_by_name_and_type[static_cast<size_t>(existing_type)];
existing_entries_by_name.erase(existing_name);
}
}
auto & entry = entries_by_id[id];
entry.id = id;
entry.entity = entity;
entries_by_name[name] = &entry;
prepareNotifications(entry, false, notifications);
}
void ReplicatedAccessStorage::removeEntityNoLock(const UUID & id, Notifications & notifications)
{
LOG_DEBUG(getLogger(), "Removing entity with id {}", toString(id));
const auto it = entries_by_id.find(id);
if (it == entries_by_id.end())
{
LOG_DEBUG(getLogger(), "Id {} not found, ignoring removal", toString(id));
return;
}
const Entry & entry = it->second;
const EntityType type = entry.entity->getType();
const String & name = entry.entity->getName();
prepareNotifications(entry, true, notifications);
auto & entries_by_name = entries_by_name_and_type[static_cast<size_t>(type)];
const auto name_it = entries_by_name.find(name);
if (name_it == entries_by_name.end())
LOG_WARNING(getLogger(), "Entity {} not found in names, ignoring removal of name", toString(id));
else if (name_it->second != &(it->second))
LOG_WARNING(getLogger(), "Name {} not pointing to entity {}, ignoring removal of name", name, toString(id));
else
entries_by_name.erase(name);
entries_by_id.erase(id);
LOG_DEBUG(getLogger(), "Removed entity with id {}", toString(id));
}
std::optional<UUID> ReplicatedAccessStorage::findImpl(EntityType type, const String & name) const
{
std::lock_guard lock{mutex};
const auto & entries_by_name = entries_by_name_and_type[static_cast<size_t>(type)];
const auto it = entries_by_name.find(name);
if (it == entries_by_name.end())
return {};
const Entry * entry = it->second;
return entry->id;
}
std::vector<UUID> ReplicatedAccessStorage::findAllImpl(EntityType type) const
{
std::lock_guard lock{mutex};
std::vector<UUID> result;
result.reserve(entries_by_id.size());
for (const auto & [id, entry] : entries_by_id)
if (entry.entity->isTypeOf(type))
result.emplace_back(id);
return result;
}
bool ReplicatedAccessStorage::existsImpl(const UUID & id) const
{
std::lock_guard lock{mutex};
return entries_by_id.count(id);
}
AccessEntityPtr ReplicatedAccessStorage::readImpl(const UUID & id) const
{
std::lock_guard lock{mutex};
const auto it = entries_by_id.find(id);
if (it == entries_by_id.end())
throwNotFound(id);
const Entry & entry = it->second;
return entry.entity;
}
String ReplicatedAccessStorage::readNameImpl(const UUID & id) const
{
return readImpl(id)->getName();
}
void ReplicatedAccessStorage::prepareNotifications(const Entry & entry, bool remove, Notifications & notifications) const
{
const AccessEntityPtr entity = remove ? nullptr : entry.entity;
for (const auto & handler : entry.handlers_by_id)
notifications.push_back({handler, entry.id, entity});
for (const auto & handler : handlers_by_type[static_cast<size_t>(entry.entity->getType())])
notifications.push_back({handler, entry.id, entity});
}
scope_guard ReplicatedAccessStorage::subscribeForChangesImpl(EntityType type, const OnChangedHandler & handler) const
{
std::lock_guard lock{mutex};
auto & handlers = handlers_by_type[static_cast<size_t>(type)];
handlers.push_back(handler);
auto handler_it = std::prev(handlers.end());
return [this, type, handler_it]
{
std::lock_guard lock2{mutex};
auto & handlers2 = handlers_by_type[static_cast<size_t>(type)];
handlers2.erase(handler_it);
};
}
scope_guard ReplicatedAccessStorage::subscribeForChangesImpl(const UUID & id, const OnChangedHandler & handler) const
{
std::lock_guard lock{mutex};
const auto it = entries_by_id.find(id);
if (it == entries_by_id.end())
return {};
const Entry & entry = it->second;
auto handler_it = entry.handlers_by_id.insert(entry.handlers_by_id.end(), handler);
return [this, id, handler_it]
{
std::lock_guard lock2{mutex};
auto it2 = entries_by_id.find(id);
if (it2 != entries_by_id.end())
{
const Entry & entry2 = it2->second;
entry2.handlers_by_id.erase(handler_it);
}
};
}
bool ReplicatedAccessStorage::hasSubscriptionImpl(const UUID & id) const
{
std::lock_guard lock{mutex};
const auto & it = entries_by_id.find(id);
if (it != entries_by_id.end())
{
const Entry & entry = it->second;
return !entry.handlers_by_id.empty();
}
return false;
}
bool ReplicatedAccessStorage::hasSubscriptionImpl(EntityType type) const
{
std::lock_guard lock{mutex};
const auto & handlers = handlers_by_type[static_cast<size_t>(type)];
return !handlers.empty();
}
}

View File

@ -0,0 +1,87 @@
#pragma once
#include <Access/IAccessStorage.h>
#include <Common/ThreadPool.h>
#include <Common/ZooKeeper/Common.h>
#include <Common/ZooKeeper/ZooKeeper.h>
#include <common/scope_guard.h>
#include <Coordination/ThreadSafeQueue.h>
#include <atomic>
#include <list>
#include <memory>
#include <mutex>
#include <unordered_map>
namespace DB
{
/// Implementation of IAccessStorage which keeps all data in zookeeper.
class ReplicatedAccessStorage : public IAccessStorage
{
public:
static constexpr char STORAGE_TYPE[] = "replicated";
ReplicatedAccessStorage(const String & storage_name, const String & zookeeper_path, zkutil::GetZooKeeper get_zookeeper);
virtual ~ReplicatedAccessStorage() override;
const char * getStorageType() const override { return STORAGE_TYPE; }
virtual void startup();
virtual void shutdown();
private:
String zookeeper_path;
zkutil::GetZooKeeper get_zookeeper;
std::atomic<bool> initialized = false;
std::atomic<bool> stop_flag = false;
ThreadFromGlobalPool worker_thread;
ThreadSafeQueue<UUID> refresh_queue;
UUID insertImpl(const AccessEntityPtr & entity, bool replace_if_exists) override;
void removeImpl(const UUID & id) override;
void updateImpl(const UUID & id, const UpdateFunc & update_func) override;
void insertZooKeeper(const zkutil::ZooKeeperPtr & zookeeper, const UUID & id, const AccessEntityPtr & entity, bool replace_if_exists);
void removeZooKeeper(const zkutil::ZooKeeperPtr & zookeeper, const UUID & id);
void updateZooKeeper(const zkutil::ZooKeeperPtr & zookeeper, const UUID & id, const UpdateFunc & update_func);
void runWorkerThread();
void resetAfterError();
void initializeZookeeper();
void createRootNodes(const zkutil::ZooKeeperPtr & zookeeper);
void refresh();
void refreshEntities(const zkutil::ZooKeeperPtr & zookeeper);
void refreshEntity(const zkutil::ZooKeeperPtr & zookeeper, const UUID & id);
void refreshEntityNoLock(const zkutil::ZooKeeperPtr & zookeeper, const UUID & id, Notifications & notifications);
void setEntityNoLock(const UUID & id, const AccessEntityPtr & entity, Notifications & notifications);
void removeEntityNoLock(const UUID & id, Notifications & notifications);
struct Entry
{
UUID id;
AccessEntityPtr entity;
mutable std::list<OnChangedHandler> handlers_by_id;
};
std::optional<UUID> findImpl(EntityType type, const String & name) const override;
std::vector<UUID> findAllImpl(EntityType type) const override;
bool existsImpl(const UUID & id) const override;
AccessEntityPtr readImpl(const UUID & id) const override;
String readNameImpl(const UUID & id) const override;
bool canInsertImpl(const AccessEntityPtr &) const override { return true; }
void prepareNotifications(const Entry & entry, bool remove, Notifications & notifications) const;
scope_guard subscribeForChangesImpl(const UUID & id, const OnChangedHandler & handler) const override;
scope_guard subscribeForChangesImpl(EntityType type, const OnChangedHandler & handler) const override;
bool hasSubscriptionImpl(const UUID & id) const override;
bool hasSubscriptionImpl(EntityType type) const override;
mutable std::mutex mutex;
std::unordered_map<UUID, Entry> entries_by_id;
std::unordered_map<String, Entry *> entries_by_name_and_type[static_cast<size_t>(EntityType::MAX)];
mutable std::list<OnChangedHandler> handlers_by_type[static_cast<size_t>(EntityType::MAX)];
};
}

View File

@ -10,6 +10,7 @@ PEERDIR(
SRCS( SRCS(
AccessControlManager.cpp AccessControlManager.cpp
AccessEntityIO.cpp
AccessRights.cpp AccessRights.cpp
AccessRightsElement.cpp AccessRightsElement.cpp
AllowedClientHosts.cpp AllowedClientHosts.cpp
@ -34,6 +35,7 @@ SRCS(
Quota.cpp Quota.cpp
QuotaCache.cpp QuotaCache.cpp
QuotaUsage.cpp QuotaUsage.cpp
ReplicatedAccessStorage.cpp
Role.cpp Role.cpp
RoleCache.cpp RoleCache.cpp
RolesOrUsersSet.cpp RolesOrUsersSet.cpp

View File

@ -5,6 +5,7 @@
#include <Common/SipHash.h> #include <Common/SipHash.h>
#include <Common/assert_cast.h> #include <Common/assert_cast.h>
#include <Columns/ColumnTuple.h> #include <Columns/ColumnTuple.h>
#include <DataTypes/IDataType.h>
namespace DB namespace DB

View File

@ -5,6 +5,7 @@
#include <Poco/Net/HTTPRequest.h> #include <Poco/Net/HTTPRequest.h>
#include <Poco/URI.h> #include <Poco/URI.h>
#include <filesystem> #include <filesystem>
#include <thread>
namespace fs = std::filesystem; namespace fs = std::filesystem;

View File

@ -373,7 +373,9 @@ MultiplexedConnections::ReplicaState & MultiplexedConnections::getReplicaForRead
except_list, except_list,
is_draining ? drain_timeout : receive_timeout); is_draining ? drain_timeout : receive_timeout);
if (n == 0) /// We treat any error as timeout for simplicity.
/// And we also check if read_list is still empty just in case.
if (n <= 0 || read_list.empty())
{ {
auto err_msg = fmt::format("Timeout exceeded while reading from {}", dumpAddressesUnlocked()); auto err_msg = fmt::format("Timeout exceeded while reading from {}", dumpAddressesUnlocked());
for (ReplicaState & state : replica_states) for (ReplicaState & state : replica_states)
@ -389,9 +391,7 @@ MultiplexedConnections::ReplicaState & MultiplexedConnections::getReplicaForRead
} }
} }
/// TODO Absolutely wrong code: read_list could be empty; motivation of rand is unclear. /// TODO Motivation of rand is unclear.
/// This code path is disabled by default.
auto & socket = read_list[thread_local_rng() % read_list.size()]; auto & socket = read_list[thread_local_rng() % read_list.size()];
if (fd_to_replica_state_idx.empty()) if (fd_to_replica_state_idx.empty())
{ {

View File

@ -1,5 +1,6 @@
#include <Columns/ColumnAggregateFunction.h> #include <Columns/ColumnAggregateFunction.h>
#include <Columns/ColumnsCommon.h> #include <Columns/ColumnsCommon.h>
#include <Columns/MaskOperations.h>
#include <Common/assert_cast.h> #include <Common/assert_cast.h>
#include <DataStreams/ColumnGathererStream.h> #include <DataStreams/ColumnGathererStream.h>
#include <IO/WriteBufferFromArena.h> #include <IO/WriteBufferFromArena.h>
@ -308,6 +309,10 @@ ColumnPtr ColumnAggregateFunction::filter(const Filter & filter, ssize_t result_
return res; return res;
} }
void ColumnAggregateFunction::expand(const Filter & mask, bool inverted)
{
expandDataByMask<char *>(data, mask, inverted);
}
ColumnPtr ColumnAggregateFunction::permute(const Permutation & perm, size_t limit) const ColumnPtr ColumnAggregateFunction::permute(const Permutation & perm, size_t limit) const
{ {

View File

@ -177,6 +177,8 @@ public:
ColumnPtr filter(const Filter & filter, ssize_t result_size_hint) const override; ColumnPtr filter(const Filter & filter, ssize_t result_size_hint) const override;
void expand(const Filter & mask, bool inverted) override;
ColumnPtr permute(const Permutation & perm, size_t limit) const override; ColumnPtr permute(const Permutation & perm, size_t limit) const override;
ColumnPtr index(const IColumn & indexes, size_t limit) const override; ColumnPtr index(const IColumn & indexes, size_t limit) const override;

View File

@ -8,6 +8,7 @@
#include <Columns/ColumnConst.h> #include <Columns/ColumnConst.h>
#include <Columns/ColumnsCommon.h> #include <Columns/ColumnsCommon.h>
#include <Columns/ColumnCompressed.h> #include <Columns/ColumnCompressed.h>
#include <Columns/MaskOperations.h>
#include <common/unaligned.h> #include <common/unaligned.h>
#include <common/sort.h> #include <common/sort.h>
@ -551,6 +552,34 @@ ColumnPtr ColumnArray::filter(const Filter & filt, ssize_t result_size_hint) con
return filterGeneric(filt, result_size_hint); return filterGeneric(filt, result_size_hint);
} }
void ColumnArray::expand(const IColumn::Filter & mask, bool inverted)
{
auto & offsets_data = getOffsets();
if (mask.size() < offsets_data.size())
throw Exception("Mask size should be no less than data size.", ErrorCodes::LOGICAL_ERROR);
int index = mask.size() - 1;
int from = offsets_data.size() - 1;
offsets_data.resize(mask.size());
UInt64 last_offset = offsets_data[from];
while (index >= 0)
{
offsets_data[index] = last_offset;
if (!!mask[index] ^ inverted)
{
if (from < 0)
throw Exception("Too many bytes in mask", ErrorCodes::LOGICAL_ERROR);
--from;
last_offset = offsets_data[from];
}
--index;
}
if (from != -1)
throw Exception("Not enough bytes in mask", ErrorCodes::LOGICAL_ERROR);}
template <typename T> template <typename T>
ColumnPtr ColumnArray::filterNumber(const Filter & filt, ssize_t result_size_hint) const ColumnPtr ColumnArray::filterNumber(const Filter & filt, ssize_t result_size_hint) const
{ {

View File

@ -71,6 +71,7 @@ public:
void insertDefault() override; void insertDefault() override;
void popBack(size_t n) override; void popBack(size_t n) override;
ColumnPtr filter(const Filter & filt, ssize_t result_size_hint) const override; ColumnPtr filter(const Filter & filt, ssize_t result_size_hint) const override;
void expand(const Filter & mask, bool inverted) override;
ColumnPtr permute(const Permutation & perm, size_t limit) const override; ColumnPtr permute(const Permutation & perm, size_t limit) const override;
ColumnPtr index(const IColumn & indexes, size_t limit) const override; ColumnPtr index(const IColumn & indexes, size_t limit) const override;
template <typename Type> ColumnPtr indexImpl(const PaddedPODArray<Type> & indexes, size_t limit) const; template <typename Type> ColumnPtr indexImpl(const PaddedPODArray<Type> & indexes, size_t limit) const;

View File

@ -90,6 +90,7 @@ public:
void updateWeakHash32(WeakHash32 &) const override { throwMustBeDecompressed(); } void updateWeakHash32(WeakHash32 &) const override { throwMustBeDecompressed(); }
void updateHashFast(SipHash &) const override { throwMustBeDecompressed(); } void updateHashFast(SipHash &) const override { throwMustBeDecompressed(); }
ColumnPtr filter(const Filter &, ssize_t) const override { throwMustBeDecompressed(); } ColumnPtr filter(const Filter &, ssize_t) const override { throwMustBeDecompressed(); }
void expand(const Filter &, bool) override { throwMustBeDecompressed(); }
ColumnPtr permute(const Permutation &, size_t) const override { throwMustBeDecompressed(); } ColumnPtr permute(const Permutation &, size_t) const override { throwMustBeDecompressed(); }
ColumnPtr index(const IColumn &, size_t) const override { throwMustBeDecompressed(); } ColumnPtr index(const IColumn &, size_t) const override { throwMustBeDecompressed(); }
int compareAt(size_t, size_t, const IColumn &, int) const override { throwMustBeDecompressed(); } int compareAt(size_t, size_t, const IColumn &, int) const override { throwMustBeDecompressed(); }

View File

@ -59,9 +59,28 @@ ColumnPtr ColumnConst::filter(const Filter & filt, ssize_t /*result_size_hint*/)
throw Exception("Size of filter (" + toString(filt.size()) + ") doesn't match size of column (" + toString(s) + ")", throw Exception("Size of filter (" + toString(filt.size()) + ") doesn't match size of column (" + toString(s) + ")",
ErrorCodes::SIZES_OF_COLUMNS_DOESNT_MATCH); ErrorCodes::SIZES_OF_COLUMNS_DOESNT_MATCH);
return ColumnConst::create(data, countBytesInFilter(filt)); size_t new_size = countBytesInFilter(filt);
return ColumnConst::create(data, new_size);
} }
void ColumnConst::expand(const Filter & mask, bool inverted)
{
if (mask.size() < s)
throw Exception("Mask size should be no less than data size.", ErrorCodes::LOGICAL_ERROR);
size_t bytes_count = countBytesInFilter(mask);
if (inverted)
bytes_count = mask.size() - bytes_count;
if (bytes_count < s)
throw Exception("Not enough bytes in mask", ErrorCodes::LOGICAL_ERROR);
else if (bytes_count > s)
throw Exception("Too many bytes in mask", ErrorCodes::LOGICAL_ERROR);
s = mask.size();
}
ColumnPtr ColumnConst::replicate(const Offsets & offsets) const ColumnPtr ColumnConst::replicate(const Offsets & offsets) const
{ {
if (s != offsets.size()) if (s != offsets.size())

View File

@ -181,6 +181,8 @@ public:
} }
ColumnPtr filter(const Filter & filt, ssize_t result_size_hint) const override; ColumnPtr filter(const Filter & filt, ssize_t result_size_hint) const override;
void expand(const Filter & mask, bool inverted) override;
ColumnPtr replicate(const Offsets & offsets) const override; ColumnPtr replicate(const Offsets & offsets) const override;
ColumnPtr permute(const Permutation & perm, size_t limit) const override; ColumnPtr permute(const Permutation & perm, size_t limit) const override;
ColumnPtr index(const IColumn & indexes, size_t limit) const override; ColumnPtr index(const IColumn & indexes, size_t limit) const override;

View File

@ -15,6 +15,7 @@
#include <Columns/ColumnsCommon.h> #include <Columns/ColumnsCommon.h>
#include <Columns/ColumnDecimal.h> #include <Columns/ColumnDecimal.h>
#include <Columns/ColumnCompressed.h> #include <Columns/ColumnCompressed.h>
#include <Columns/MaskOperations.h>
#include <DataStreams/ColumnGathererStream.h> #include <DataStreams/ColumnGathererStream.h>
@ -320,6 +321,12 @@ ColumnPtr ColumnDecimal<T>::filter(const IColumn::Filter & filt, ssize_t result_
return res; return res;
} }
template <typename T>
void ColumnDecimal<T>::expand(const IColumn::Filter & mask, bool inverted)
{
expandDataByMask<T>(data, mask, inverted);
}
template <typename T> template <typename T>
ColumnPtr ColumnDecimal<T>::index(const IColumn & indexes, size_t limit) const ColumnPtr ColumnDecimal<T>::index(const IColumn & indexes, size_t limit) const
{ {

View File

@ -151,6 +151,8 @@ public:
bool isDefaultAt(size_t n) const override { return data[n].value == 0; } bool isDefaultAt(size_t n) const override { return data[n].value == 0; }
ColumnPtr filter(const IColumn::Filter & filt, ssize_t result_size_hint) const override; ColumnPtr filter(const IColumn::Filter & filt, ssize_t result_size_hint) const override;
void expand(const IColumn::Filter & mask, bool inverted) override;
ColumnPtr permute(const IColumn::Permutation & perm, size_t limit) const override; ColumnPtr permute(const IColumn::Permutation & perm, size_t limit) const override;
ColumnPtr index(const IColumn & indexes, size_t limit) const override; ColumnPtr index(const IColumn & indexes, size_t limit) const override;

View File

@ -344,6 +344,32 @@ ColumnPtr ColumnFixedString::filter(const IColumn::Filter & filt, ssize_t result
return res; return res;
} }
void ColumnFixedString::expand(const IColumn::Filter & mask, bool inverted)
{
if (mask.size() < size())
throw Exception("Mask size should be no less than data size.", ErrorCodes::LOGICAL_ERROR);
int index = mask.size() - 1;
int from = size() - 1;
chars.resize_fill(mask.size() * n, 0);
while (index >= 0)
{
if (!!mask[index] ^ inverted)
{
if (from < 0)
throw Exception("Too many bytes in mask", ErrorCodes::LOGICAL_ERROR);
memcpy(&chars[index * n], &chars[from * n], n);
--from;
}
--index;
}
if (from != -1)
throw Exception("Not enough bytes in mask", ErrorCodes::LOGICAL_ERROR);
}
ColumnPtr ColumnFixedString::permute(const Permutation & perm, size_t limit) const ColumnPtr ColumnFixedString::permute(const Permutation & perm, size_t limit) const
{ {
size_t col_size = size(); size_t col_size = size();

View File

@ -147,6 +147,8 @@ public:
ColumnPtr filter(const IColumn::Filter & filt, ssize_t result_size_hint) const override; ColumnPtr filter(const IColumn::Filter & filt, ssize_t result_size_hint) const override;
void expand(const IColumn::Filter & mask, bool inverted) override;
ColumnPtr permute(const Permutation & perm, size_t limit) const override; ColumnPtr permute(const Permutation & perm, size_t limit) const override;
ColumnPtr index(const IColumn & indexes, size_t limit) const override; ColumnPtr index(const IColumn & indexes, size_t limit) const override;

View File

@ -2,9 +2,15 @@
#include <Columns/ColumnFunction.h> #include <Columns/ColumnFunction.h>
#include <Columns/ColumnsCommon.h> #include <Columns/ColumnsCommon.h>
#include <Common/PODArray.h> #include <Common/PODArray.h>
#include <Common/ProfileEvents.h>
#include <IO/WriteHelpers.h> #include <IO/WriteHelpers.h>
#include <Functions/IFunction.h> #include <Functions/IFunction.h>
namespace ProfileEvents
{
extern const Event FunctionExecute;
extern const Event CompiledFunctionExecute;
}
namespace DB namespace DB
{ {
@ -15,8 +21,8 @@ namespace ErrorCodes
extern const int LOGICAL_ERROR; extern const int LOGICAL_ERROR;
} }
ColumnFunction::ColumnFunction(size_t size, FunctionBasePtr function_, const ColumnsWithTypeAndName & columns_to_capture) ColumnFunction::ColumnFunction(size_t size, FunctionBasePtr function_, const ColumnsWithTypeAndName & columns_to_capture, bool is_short_circuit_argument_, bool is_function_compiled_)
: size_(size), function(function_) : size_(size), function(function_), is_short_circuit_argument(is_short_circuit_argument_), is_function_compiled(is_function_compiled_)
{ {
appendArguments(columns_to_capture); appendArguments(columns_to_capture);
} }
@ -27,7 +33,7 @@ MutableColumnPtr ColumnFunction::cloneResized(size_t size) const
for (auto & column : capture) for (auto & column : capture)
column.column = column.column->cloneResized(size); column.column = column.column->cloneResized(size);
return ColumnFunction::create(size, function, capture); return ColumnFunction::create(size, function, capture, is_short_circuit_argument, is_function_compiled);
} }
ColumnPtr ColumnFunction::replicate(const Offsets & offsets) const ColumnPtr ColumnFunction::replicate(const Offsets & offsets) const
@ -41,7 +47,7 @@ ColumnPtr ColumnFunction::replicate(const Offsets & offsets) const
column.column = column.column->replicate(offsets); column.column = column.column->replicate(offsets);
size_t replicated_size = 0 == size_ ? 0 : offsets.back(); size_t replicated_size = 0 == size_ ? 0 : offsets.back();
return ColumnFunction::create(replicated_size, function, capture); return ColumnFunction::create(replicated_size, function, capture, is_short_circuit_argument, is_function_compiled);
} }
ColumnPtr ColumnFunction::cut(size_t start, size_t length) const ColumnPtr ColumnFunction::cut(size_t start, size_t length) const
@ -50,7 +56,7 @@ ColumnPtr ColumnFunction::cut(size_t start, size_t length) const
for (auto & column : capture) for (auto & column : capture)
column.column = column.column->cut(start, length); column.column = column.column->cut(start, length);
return ColumnFunction::create(length, function, capture); return ColumnFunction::create(length, function, capture, is_short_circuit_argument, is_function_compiled);
} }
ColumnPtr ColumnFunction::filter(const Filter & filt, ssize_t result_size_hint) const ColumnPtr ColumnFunction::filter(const Filter & filt, ssize_t result_size_hint) const
@ -65,11 +71,24 @@ ColumnPtr ColumnFunction::filter(const Filter & filt, ssize_t result_size_hint)
size_t filtered_size = 0; size_t filtered_size = 0;
if (capture.empty()) if (capture.empty())
{
filtered_size = countBytesInFilter(filt); filtered_size = countBytesInFilter(filt);
}
else else
filtered_size = capture.front().column->size(); filtered_size = capture.front().column->size();
return ColumnFunction::create(filtered_size, function, capture); return ColumnFunction::create(filtered_size, function, capture, is_short_circuit_argument, is_function_compiled);
}
void ColumnFunction::expand(const Filter & mask, bool inverted)
{
for (auto & column : captured_columns)
{
column.column = column.column->cloneResized(column.column->size());
column.column->assumeMutable()->expand(mask, inverted);
}
size_ = mask.size();
} }
ColumnPtr ColumnFunction::permute(const Permutation & perm, size_t limit) const ColumnPtr ColumnFunction::permute(const Permutation & perm, size_t limit) const
@ -87,7 +106,7 @@ ColumnPtr ColumnFunction::permute(const Permutation & perm, size_t limit) const
for (auto & column : capture) for (auto & column : capture)
column.column = column.column->permute(perm, limit); column.column = column.column->permute(perm, limit);
return ColumnFunction::create(limit, function, capture); return ColumnFunction::create(limit, function, capture, is_short_circuit_argument, is_function_compiled);
} }
ColumnPtr ColumnFunction::index(const IColumn & indexes, size_t limit) const ColumnPtr ColumnFunction::index(const IColumn & indexes, size_t limit) const
@ -96,7 +115,7 @@ ColumnPtr ColumnFunction::index(const IColumn & indexes, size_t limit) const
for (auto & column : capture) for (auto & column : capture)
column.column = column.column->index(indexes, limit); column.column = column.column->index(indexes, limit);
return ColumnFunction::create(limit, function, capture); return ColumnFunction::create(limit, function, capture, is_short_circuit_argument, is_function_compiled);
} }
std::vector<MutableColumnPtr> ColumnFunction::scatter(IColumn::ColumnIndex num_columns, std::vector<MutableColumnPtr> ColumnFunction::scatter(IColumn::ColumnIndex num_columns,
@ -125,7 +144,7 @@ std::vector<MutableColumnPtr> ColumnFunction::scatter(IColumn::ColumnIndex num_c
{ {
auto & capture = captures[part]; auto & capture = captures[part];
size_t capture_size = capture.empty() ? counts[part] : capture.front().column->size(); size_t capture_size = capture.empty() ? counts[part] : capture.front().column->size();
columns.emplace_back(ColumnFunction::create(capture_size, function, std::move(capture))); columns.emplace_back(ColumnFunction::create(capture_size, function, std::move(capture), is_short_circuit_argument));
} }
return columns; return columns;
@ -179,7 +198,7 @@ void ColumnFunction::appendArgument(const ColumnWithTypeAndName & column)
const auto & argumnet_types = function->getArgumentTypes(); const auto & argumnet_types = function->getArgumentTypes();
auto index = captured_columns.size(); auto index = captured_columns.size();
if (!column.type->equals(*argumnet_types[index])) if (!is_short_circuit_argument && !column.type->equals(*argumnet_types[index]))
throw Exception("Cannot capture column " + std::to_string(argumnet_types.size()) + throw Exception("Cannot capture column " + std::to_string(argumnet_types.size()) +
" because it has incompatible type: got " + column.type->getName() + " because it has incompatible type: got " + column.type->getName() +
", but " + argumnet_types[index]->getName() + " is expected.", ErrorCodes::LOGICAL_ERROR); ", but " + argumnet_types[index]->getName() + " is expected.", ErrorCodes::LOGICAL_ERROR);
@ -187,6 +206,11 @@ void ColumnFunction::appendArgument(const ColumnWithTypeAndName & column)
captured_columns.push_back(column); captured_columns.push_back(column);
} }
DataTypePtr ColumnFunction::getResultType() const
{
return function->getResultType();
}
ColumnWithTypeAndName ColumnFunction::reduce() const ColumnWithTypeAndName ColumnFunction::reduce() const
{ {
auto args = function->getArgumentTypes().size(); auto args = function->getArgumentTypes().size();
@ -196,11 +220,33 @@ ColumnWithTypeAndName ColumnFunction::reduce() const
throw Exception("Cannot call function " + function->getName() + " because is has " + toString(args) + throw Exception("Cannot call function " + function->getName() + " because is has " + toString(args) +
"arguments but " + toString(captured) + " columns were captured.", ErrorCodes::LOGICAL_ERROR); "arguments but " + toString(captured) + " columns were captured.", ErrorCodes::LOGICAL_ERROR);
auto columns = captured_columns; ColumnsWithTypeAndName columns = captured_columns;
if (is_short_circuit_argument)
{
/// Arguments of lazy executed function can also be lazy executed.
for (auto & col : columns)
{
if (const ColumnFunction * arg = checkAndGetShortCircuitArgument(col.column))
col = arg->reduce();
}
}
ColumnWithTypeAndName res{nullptr, function->getResultType(), ""}; ColumnWithTypeAndName res{nullptr, function->getResultType(), ""};
ProfileEvents::increment(ProfileEvents::FunctionExecute);
if (is_function_compiled)
ProfileEvents::increment(ProfileEvents::CompiledFunctionExecute);
res.column = function->execute(columns, res.type, size_); res.column = function->execute(columns, res.type, size_);
return res; return res;
} }
const ColumnFunction * checkAndGetShortCircuitArgument(const ColumnPtr & column)
{
const ColumnFunction * column_function;
if ((column_function = typeid_cast<const ColumnFunction *>(column.get())) && column_function->isShortCircuitArgument())
return column_function;
return nullptr;
}
} }

View File

@ -5,9 +5,6 @@
#include <Core/ColumnsWithTypeAndName.h> #include <Core/ColumnsWithTypeAndName.h>
#include <Columns/IColumn.h> #include <Columns/IColumn.h>
class IFunctionBase;
using FunctionBasePtr = std::shared_ptr<IFunctionBase>;
namespace DB namespace DB
{ {
@ -16,6 +13,8 @@ namespace ErrorCodes
extern const int NOT_IMPLEMENTED; extern const int NOT_IMPLEMENTED;
} }
class IFunctionBase;
using FunctionBasePtr = std::shared_ptr<IFunctionBase>;
/** A column containing a lambda expression. /** A column containing a lambda expression.
* Behaves like a constant-column. Contains an expression, but not input or output data. * Behaves like a constant-column. Contains an expression, but not input or output data.
@ -25,7 +24,7 @@ class ColumnFunction final : public COWHelper<IColumn, ColumnFunction>
private: private:
friend class COWHelper<IColumn, ColumnFunction>; friend class COWHelper<IColumn, ColumnFunction>;
ColumnFunction(size_t size, FunctionBasePtr function_, const ColumnsWithTypeAndName & columns_to_capture); ColumnFunction(size_t size, FunctionBasePtr function_, const ColumnsWithTypeAndName & columns_to_capture, bool is_short_circuit_argument_ = false, bool is_function_compiled_ = false);
public: public:
const char * getFamilyName() const override { return "Function"; } const char * getFamilyName() const override { return "Function"; }
@ -38,6 +37,7 @@ public:
ColumnPtr cut(size_t start, size_t length) const override; ColumnPtr cut(size_t start, size_t length) const override;
ColumnPtr replicate(const Offsets & offsets) const override; ColumnPtr replicate(const Offsets & offsets) const override;
ColumnPtr filter(const Filter & filt, ssize_t result_size_hint) const override; ColumnPtr filter(const Filter & filt, ssize_t result_size_hint) const override;
void expand(const Filter & mask, bool inverted) override;
ColumnPtr permute(const Permutation & perm, size_t limit) const override; ColumnPtr permute(const Permutation & perm, size_t limit) const override;
ColumnPtr index(const IColumn & indexes, size_t limit) const override; ColumnPtr index(const IColumn & indexes, size_t limit) const override;
@ -153,12 +153,29 @@ public:
throw Exception("Method gather is not supported for " + getName(), ErrorCodes::NOT_IMPLEMENTED); throw Exception("Method gather is not supported for " + getName(), ErrorCodes::NOT_IMPLEMENTED);
} }
bool isShortCircuitArgument() const { return is_short_circuit_argument; }
DataTypePtr getResultType() const;
private: private:
size_t size_; size_t size_;
FunctionBasePtr function; FunctionBasePtr function;
ColumnsWithTypeAndName captured_columns; ColumnsWithTypeAndName captured_columns;
/// Determine if it's used as a lazy executed argument for short-circuit function.
/// It's needed to distinguish between lazy executed argument and
/// argument with ColumnFunction column (some functions can return it)
/// See ExpressionActions.cpp for details.
bool is_short_circuit_argument;
/// Determine if passed function is compiled. Used for profiling.
bool is_function_compiled;
void appendArgument(const ColumnWithTypeAndName & column); void appendArgument(const ColumnWithTypeAndName & column);
void addOffsetsForReplication(const IColumn::Offsets & offsets);
}; };
const ColumnFunction * checkAndGetShortCircuitArgument(const ColumnPtr & column);
} }

View File

@ -110,6 +110,11 @@ public:
return ColumnLowCardinality::create(dictionary.getColumnUniquePtr(), getIndexes().filter(filt, result_size_hint)); return ColumnLowCardinality::create(dictionary.getColumnUniquePtr(), getIndexes().filter(filt, result_size_hint));
} }
void expand(const Filter & mask, bool inverted) override
{
idx.getPositionsPtr()->expand(mask, inverted);
}
ColumnPtr permute(const Permutation & perm, size_t limit) const override ColumnPtr permute(const Permutation & perm, size_t limit) const override
{ {
return ColumnLowCardinality::create(dictionary.getColumnUniquePtr(), getIndexes().permute(perm, limit)); return ColumnLowCardinality::create(dictionary.getColumnUniquePtr(), getIndexes().permute(perm, limit));

View File

@ -149,6 +149,11 @@ ColumnPtr ColumnMap::filter(const Filter & filt, ssize_t result_size_hint) const
return ColumnMap::create(filtered); return ColumnMap::create(filtered);
} }
void ColumnMap::expand(const IColumn::Filter & mask, bool inverted)
{
nested->expand(mask, inverted);
}
ColumnPtr ColumnMap::permute(const Permutation & perm, size_t limit) const ColumnPtr ColumnMap::permute(const Permutation & perm, size_t limit) const
{ {
auto permuted = nested->permute(perm, limit); auto permuted = nested->permute(perm, limit);

View File

@ -64,6 +64,7 @@ public:
void updateHashFast(SipHash & hash) const override; void updateHashFast(SipHash & hash) const override;
void insertRangeFrom(const IColumn & src, size_t start, size_t length) override; void insertRangeFrom(const IColumn & src, size_t start, size_t length) override;
ColumnPtr filter(const Filter & filt, ssize_t result_size_hint) const override; ColumnPtr filter(const Filter & filt, ssize_t result_size_hint) const override;
void expand(const Filter & mask, bool inverted) override;
ColumnPtr permute(const Permutation & perm, size_t limit) const override; ColumnPtr permute(const Permutation & perm, size_t limit) const override;
ColumnPtr index(const IColumn & indexes, size_t limit) const override; ColumnPtr index(const IColumn & indexes, size_t limit) const override;
ColumnPtr replicate(const Offsets & offsets) const override; ColumnPtr replicate(const Offsets & offsets) const override;

View File

@ -221,6 +221,12 @@ ColumnPtr ColumnNullable::filter(const Filter & filt, ssize_t result_size_hint)
return ColumnNullable::create(filtered_data, filtered_null_map); return ColumnNullable::create(filtered_data, filtered_null_map);
} }
void ColumnNullable::expand(const IColumn::Filter & mask, bool inverted)
{
nested_column->expand(mask, inverted);
null_map->expand(mask, inverted);
}
ColumnPtr ColumnNullable::permute(const Permutation & perm, size_t limit) const ColumnPtr ColumnNullable::permute(const Permutation & perm, size_t limit) const
{ {
ColumnPtr permuted_data = getNestedColumn().permute(perm, limit); ColumnPtr permuted_data = getNestedColumn().permute(perm, limit);

View File

@ -88,6 +88,7 @@ public:
void popBack(size_t n) override; void popBack(size_t n) override;
ColumnPtr filter(const Filter & filt, ssize_t result_size_hint) const override; ColumnPtr filter(const Filter & filt, ssize_t result_size_hint) const override;
void expand(const Filter & mask, bool inverted) override;
ColumnPtr permute(const Permutation & perm, size_t limit) const override; ColumnPtr permute(const Permutation & perm, size_t limit) const override;
ColumnPtr index(const IColumn & indexes, size_t limit) const override; ColumnPtr index(const IColumn & indexes, size_t limit) const override;
int compareAt(size_t n, size_t m, const IColumn & rhs_, int null_direction_hint) const override; int compareAt(size_t n, size_t m, const IColumn & rhs_, int null_direction_hint) const override;

View File

@ -3,6 +3,7 @@
#include <Columns/Collator.h> #include <Columns/Collator.h>
#include <Columns/ColumnsCommon.h> #include <Columns/ColumnsCommon.h>
#include <Columns/ColumnCompressed.h> #include <Columns/ColumnCompressed.h>
#include <Columns/MaskOperations.h>
#include <DataStreams/ColumnGathererStream.h> #include <DataStreams/ColumnGathererStream.h>
#include <Common/Arena.h> #include <Common/Arena.h>
#include <Common/HashTable/Hash.h> #include <Common/HashTable/Hash.h>
@ -157,6 +158,53 @@ ColumnPtr ColumnString::filter(const Filter & filt, ssize_t result_size_hint) co
return res; return res;
} }
void ColumnString::expand(const IColumn::Filter & mask, bool inverted)
{
auto & offsets_data = getOffsets();
auto & chars_data = getChars();
if (mask.size() < offsets_data.size())
throw Exception("Mask size should be no less than data size.", ErrorCodes::LOGICAL_ERROR);
/// We cannot change only offsets, because each string should end with terminating zero byte.
/// So, we will insert one zero byte when mask value is zero.
int index = mask.size() - 1;
int from = offsets_data.size() - 1;
/// mask.size() - offsets_data.size() should be equal to the number of zeros in mask
/// (if not, one of exceptions below will throw) and we can calculate the resulting chars size.
UInt64 last_offset = offsets_data[from] + (mask.size() - offsets_data.size());
offsets_data.resize(mask.size());
chars_data.resize_fill(last_offset, 0);
while (index >= 0)
{
offsets_data[index] = last_offset;
if (!!mask[index] ^ inverted)
{
if (from < 0)
throw Exception("Too many bytes in mask", ErrorCodes::LOGICAL_ERROR);
size_t len = offsets_data[from] - offsets_data[from - 1];
/// Copy only if it makes sense. It's important to copy backward, because
/// ranges can overlap, but destination is always is more to the right then source
if (last_offset - len != offsets_data[from - 1])
std::copy_backward(&chars_data[offsets_data[from - 1]], &chars_data[offsets_data[from]], &chars_data[last_offset]);
last_offset -= len;
--from;
}
else
{
chars_data[last_offset - 1] = 0;
--last_offset;
}
--index;
}
if (from != -1)
throw Exception("Not enough bytes in mask", ErrorCodes::LOGICAL_ERROR);
}
ColumnPtr ColumnString::permute(const Permutation & perm, size_t limit) const ColumnPtr ColumnString::permute(const Permutation & perm, size_t limit) const
{ {

View File

@ -212,6 +212,8 @@ public:
ColumnPtr filter(const Filter & filt, ssize_t result_size_hint) const override; ColumnPtr filter(const Filter & filt, ssize_t result_size_hint) const override;
void expand(const Filter & mask, bool inverted) override;
ColumnPtr permute(const Permutation & perm, size_t limit) const override; ColumnPtr permute(const Permutation & perm, size_t limit) const override;
ColumnPtr index(const IColumn & indexes, size_t limit) const override; ColumnPtr index(const IColumn & indexes, size_t limit) const override;

View File

@ -232,6 +232,12 @@ ColumnPtr ColumnTuple::filter(const Filter & filt, ssize_t result_size_hint) con
return ColumnTuple::create(new_columns); return ColumnTuple::create(new_columns);
} }
void ColumnTuple::expand(const Filter & mask, bool inverted)
{
for (auto & column : columns)
column->expand(mask, inverted);
}
ColumnPtr ColumnTuple::permute(const Permutation & perm, size_t limit) const ColumnPtr ColumnTuple::permute(const Permutation & perm, size_t limit) const
{ {
const size_t tuple_size = columns.size(); const size_t tuple_size = columns.size();

View File

@ -1,6 +1,6 @@
#pragma once #pragma once
#include <Core/Block.h> #include <Columns/IColumn.h>
namespace DB namespace DB
@ -67,6 +67,7 @@ public:
void updateHashFast(SipHash & hash) const override; void updateHashFast(SipHash & hash) const override;
void insertRangeFrom(const IColumn & src, size_t start, size_t length) override; void insertRangeFrom(const IColumn & src, size_t start, size_t length) override;
ColumnPtr filter(const Filter & filt, ssize_t result_size_hint) const override; ColumnPtr filter(const Filter & filt, ssize_t result_size_hint) const override;
void expand(const Filter & mask, bool inverted) override;
ColumnPtr permute(const Permutation & perm, size_t limit) const override; ColumnPtr permute(const Permutation & perm, size_t limit) const override;
ColumnPtr index(const IColumn & indexes, size_t limit) const override; ColumnPtr index(const IColumn & indexes, size_t limit) const override;
ColumnPtr replicate(const Offsets & offsets) const override; ColumnPtr replicate(const Offsets & offsets) const override;

View File

@ -3,6 +3,7 @@
#include <pdqsort.h> #include <pdqsort.h>
#include <Columns/ColumnsCommon.h> #include <Columns/ColumnsCommon.h>
#include <Columns/ColumnCompressed.h> #include <Columns/ColumnCompressed.h>
#include <Columns/MaskOperations.h>
#include <DataStreams/ColumnGathererStream.h> #include <DataStreams/ColumnGathererStream.h>
#include <IO/WriteHelpers.h> #include <IO/WriteHelpers.h>
#include <Common/Arena.h> #include <Common/Arena.h>
@ -408,6 +409,12 @@ ColumnPtr ColumnVector<T>::filter(const IColumn::Filter & filt, ssize_t result_s
return res; return res;
} }
template <typename T>
void ColumnVector<T>::expand(const IColumn::Filter & mask, bool inverted)
{
expandDataByMask<T>(data, mask, inverted);
}
template <typename T> template <typename T>
void ColumnVector<T>::applyZeroMap(const IColumn::Filter & filt, bool inverted) void ColumnVector<T>::applyZeroMap(const IColumn::Filter & filt, bool inverted)
{ {

View File

@ -239,6 +239,7 @@ public:
return data[n]; return data[n];
} }
void get(size_t n, Field & res) const override void get(size_t n, Field & res) const override
{ {
res = (*this)[n]; res = (*this)[n];
@ -284,6 +285,8 @@ public:
ColumnPtr filter(const IColumn::Filter & filt, ssize_t result_size_hint) const override; ColumnPtr filter(const IColumn::Filter & filt, ssize_t result_size_hint) const override;
void expand(const IColumn::Filter & mask, bool inverted) override;
ColumnPtr permute(const IColumn::Permutation & perm, size_t limit) const override; ColumnPtr permute(const IColumn::Permutation & perm, size_t limit) const override;
ColumnPtr index(const IColumn & indexes, size_t limit) const override; ColumnPtr index(const IColumn & indexes, size_t limit) const override;

View File

@ -3,7 +3,6 @@
#include <Columns/IColumn.h> #include <Columns/IColumn.h>
#include <Columns/ColumnNullable.h> #include <Columns/ColumnNullable.h>
#include <Columns/ColumnConst.h> #include <Columns/ColumnConst.h>
#include <Columns/ColumnArray.h>
#include <Core/Field.h> #include <Core/Field.h>

View File

@ -236,6 +236,14 @@ public:
using Filter = PaddedPODArray<UInt8>; using Filter = PaddedPODArray<UInt8>;
virtual Ptr filter(const Filter & filt, ssize_t result_size_hint) const = 0; virtual Ptr filter(const Filter & filt, ssize_t result_size_hint) const = 0;
/** Expand column by mask inplace. After expanding column will
* satisfy the following: if we filter it by given mask, we will
* get initial column. Values with indexes i: mask[i] = 0
* shouldn't be used after expanding.
* If inverted is true, inverted mask will be used.
*/
virtual void expand(const Filter & /*mask*/, bool /*inverted*/) = 0;
/// Permutes elements using specified permutation. Is used in sorting. /// Permutes elements using specified permutation. Is used in sorting.
/// limit - if it isn't 0, puts only first limit elements in the result. /// limit - if it isn't 0, puts only first limit elements in the result.
using Permutation = PaddedPODArray<size_t>; using Permutation = PaddedPODArray<size_t>;

View File

@ -100,7 +100,16 @@ public:
ColumnPtr filter(const Filter & filt, ssize_t /*result_size_hint*/) const override ColumnPtr filter(const Filter & filt, ssize_t /*result_size_hint*/) const override
{ {
return cloneDummy(countBytesInFilter(filt)); size_t bytes = countBytesInFilter(filt);
return cloneDummy(bytes);
}
void expand(const IColumn::Filter & mask, bool inverted) override
{
size_t bytes = countBytesInFilter(mask);
if (inverted)
bytes = mask.size() - bytes;
s = bytes;
} }
ColumnPtr permute(const Permutation & perm, size_t limit) const override ColumnPtr permute(const Permutation & perm, size_t limit) const override

View File

@ -139,6 +139,11 @@ public:
throw Exception("Method filter is not supported for ColumnUnique.", ErrorCodes::NOT_IMPLEMENTED); throw Exception("Method filter is not supported for ColumnUnique.", ErrorCodes::NOT_IMPLEMENTED);
} }
void expand(const IColumn::Filter &, bool) override
{
throw Exception("Method expand is not supported for ColumnUnique.", ErrorCodes::NOT_IMPLEMENTED);
}
ColumnPtr permute(const IColumn::Permutation &, size_t) const override ColumnPtr permute(const IColumn::Permutation &, size_t) const override
{ {
throw Exception("Method permute is not supported for ColumnUnique.", ErrorCodes::NOT_IMPLEMENTED); throw Exception("Method permute is not supported for ColumnUnique.", ErrorCodes::NOT_IMPLEMENTED);

View File

@ -0,0 +1,316 @@
#include <Columns/MaskOperations.h>
#include <Columns/ColumnFunction.h>
#include <Columns/ColumnNullable.h>
#include <Columns/ColumnNothing.h>
#include <Columns/ColumnsCommon.h>
#include <Columns/ColumnConst.h>
#include <algorithm>
namespace DB
{
namespace ErrorCodes
{
extern const int LOGICAL_ERROR;
extern const int ILLEGAL_COLUMN;
}
template <typename T>
void expandDataByMask(PaddedPODArray<T> & data, const PaddedPODArray<UInt8> & mask, bool inverted)
{
if (mask.size() < data.size())
throw Exception("Mask size should be no less than data size.", ErrorCodes::LOGICAL_ERROR);
int from = data.size() - 1;
int index = mask.size() - 1;
data.resize(mask.size());
while (index >= 0)
{
if (!!mask[index] ^ inverted)
{
if (from < 0)
throw Exception("Too many bytes in mask", ErrorCodes::LOGICAL_ERROR);
/// Copy only if it makes sense.
if (index != from)
data[index] = data[from];
--from;
}
else
data[index] = T();
--index;
}
if (from != -1)
throw Exception("Not enough bytes in mask", ErrorCodes::LOGICAL_ERROR);
}
/// Explicit instantiations - not to place the implementation of the function above in the header file.
#define INSTANTIATE(TYPE) \
template void expandDataByMask<TYPE>(PaddedPODArray<TYPE> &, const PaddedPODArray<UInt8> &, bool);
INSTANTIATE(UInt8)
INSTANTIATE(UInt16)
INSTANTIATE(UInt32)
INSTANTIATE(UInt64)
INSTANTIATE(UInt128)
INSTANTIATE(UInt256)
INSTANTIATE(Int8)
INSTANTIATE(Int16)
INSTANTIATE(Int32)
INSTANTIATE(Int64)
INSTANTIATE(Int128)
INSTANTIATE(Int256)
INSTANTIATE(Float32)
INSTANTIATE(Float64)
INSTANTIATE(Decimal32)
INSTANTIATE(Decimal64)
INSTANTIATE(Decimal128)
INSTANTIATE(Decimal256)
INSTANTIATE(DateTime64)
INSTANTIATE(char *)
INSTANTIATE(UUID)
#undef INSTANTIATE
template <bool inverted, bool column_is_short, typename Container>
size_t extractMaskNumericImpl(
PaddedPODArray<UInt8> & mask,
const Container & data,
UInt8 null_value,
const PaddedPODArray<UInt8> * null_bytemap,
PaddedPODArray<UInt8> * nulls)
{
size_t ones_count = 0;
size_t data_index = 0;
for (size_t i = 0; i != mask.size(); ++i)
{
// Change mask only where value is 1.
if (!mask[i])
continue;
UInt8 value;
size_t index;
if constexpr (column_is_short)
{
index = data_index;
++data_index;
}
else
index = i;
if (null_bytemap && (*null_bytemap)[index])
{
value = null_value;
if (nulls)
(*nulls)[i] = 1;
}
else
value = !!data[index];
if constexpr (inverted)
value = !value;
if (value)
++ones_count;
mask[i] = value;
}
return ones_count;
}
template <bool inverted, typename NumericType>
bool extractMaskNumeric(
PaddedPODArray<UInt8> & mask,
const ColumnPtr & column,
UInt8 null_value,
const PaddedPODArray<UInt8> * null_bytemap,
PaddedPODArray<UInt8> * nulls,
MaskInfo & mask_info)
{
const auto * numeric_column = checkAndGetColumn<ColumnVector<NumericType>>(column.get());
if (!numeric_column)
return false;
const auto & data = numeric_column->getData();
size_t ones_count;
if (column->size() < mask.size())
ones_count = extractMaskNumericImpl<inverted, true>(mask, data, null_value, null_bytemap, nulls);
else
ones_count = extractMaskNumericImpl<inverted, false>(mask, data, null_value, null_bytemap, nulls);
mask_info.has_ones = ones_count > 0;
mask_info.has_zeros = ones_count != mask.size();
return true;
}
template <bool inverted>
MaskInfo extractMaskFromConstOrNull(
PaddedPODArray<UInt8> & mask,
const ColumnPtr & column,
UInt8 null_value,
PaddedPODArray<UInt8> * nulls = nullptr)
{
UInt8 value;
if (column->onlyNull())
{
value = null_value;
if (nulls)
std::fill(nulls->begin(), nulls->end(), 1);
}
else
value = column->getBool(0);
if constexpr (inverted)
value = !value;
size_t ones_count = 0;
if (value)
ones_count = countBytesInFilter(mask);
else
std::fill(mask.begin(), mask.end(), 0);
return {.has_ones = ones_count > 0, .has_zeros = ones_count != mask.size()};
}
template <bool inverted>
MaskInfo extractMaskImpl(
PaddedPODArray<UInt8> & mask,
const ColumnPtr & column,
UInt8 null_value,
const PaddedPODArray<UInt8> * null_bytemap,
PaddedPODArray<UInt8> * nulls = nullptr)
{
/// Special implementation for Null and Const columns.
if (column->onlyNull() || checkAndGetColumn<ColumnConst>(*column))
return extractMaskFromConstOrNull<inverted>(mask, column, null_value, nulls);
if (const auto * col = checkAndGetColumn<ColumnNullable>(*column))
{
const PaddedPODArray<UInt8> & null_map = col->getNullMapData();
return extractMaskImpl<inverted>(mask, col->getNestedColumnPtr(), null_value, &null_map, nulls);
}
MaskInfo mask_info;
if (!(extractMaskNumeric<inverted, UInt8>(mask, column, null_value, null_bytemap, nulls, mask_info)
|| extractMaskNumeric<inverted, UInt16>(mask, column, null_value, null_bytemap, nulls, mask_info)
|| extractMaskNumeric<inverted, UInt32>(mask, column, null_value, null_bytemap, nulls, mask_info)
|| extractMaskNumeric<inverted, UInt64>(mask, column, null_value, null_bytemap, nulls, mask_info)
|| extractMaskNumeric<inverted, Int8>(mask, column, null_value, null_bytemap, nulls, mask_info)
|| extractMaskNumeric<inverted, Int16>(mask, column, null_value, null_bytemap, nulls, mask_info)
|| extractMaskNumeric<inverted, Int32>(mask, column, null_value, null_bytemap, nulls, mask_info)
|| extractMaskNumeric<inverted, Int64>(mask, column, null_value, null_bytemap, nulls, mask_info)
|| extractMaskNumeric<inverted, Float32>(mask, column, null_value, null_bytemap, nulls, mask_info)
|| extractMaskNumeric<inverted, Float64>(mask, column, null_value, null_bytemap, nulls, mask_info)))
throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Cannot convert column {} to mask.", column->getName());
return mask_info;
}
MaskInfo extractMask(
PaddedPODArray<UInt8> & mask,
const ColumnPtr & column,
UInt8 null_value)
{
return extractMaskImpl<false>(mask, column, null_value, nullptr);
}
MaskInfo extractInvertedMask(
PaddedPODArray<UInt8> & mask,
const ColumnPtr & column,
UInt8 null_value)
{
return extractMaskImpl<true>(mask, column, null_value, nullptr);
}
MaskInfo extractMask(
PaddedPODArray<UInt8> & mask,
const ColumnPtr & column,
PaddedPODArray<UInt8> * nulls,
UInt8 null_value)
{
return extractMaskImpl<false>(mask, column, null_value, nullptr, nulls);
}
MaskInfo extractInvertedMask(
PaddedPODArray<UInt8> & mask,
const ColumnPtr & column,
PaddedPODArray<UInt8> * nulls,
UInt8 null_value)
{
return extractMaskImpl<true>(mask, column, null_value, nullptr, nulls);
}
void inverseMask(PaddedPODArray<UInt8> & mask, MaskInfo & mask_info)
{
for (size_t i = 0; i != mask.size(); ++i)
mask[i] = !mask[i];
std::swap(mask_info.has_ones, mask_info.has_zeros);
}
void maskedExecute(ColumnWithTypeAndName & column, const PaddedPODArray<UInt8> & mask, const MaskInfo & mask_info)
{
const auto * column_function = checkAndGetShortCircuitArgument(column.column);
if (!column_function)
return;
ColumnWithTypeAndName result;
/// If mask contains only zeros, we can just create
/// an empty column with the execution result type.
if (!mask_info.has_ones)
{
auto result_type = column_function->getResultType();
auto empty_column = result_type->createColumn();
result = {std::move(empty_column), result_type, ""};
}
/// Filter column only if mask contains zeros.
else if (mask_info.has_zeros)
{
auto filtered = column_function->filter(mask, -1);
result = typeid_cast<const ColumnFunction *>(filtered.get())->reduce();
}
else
result = column_function->reduce();
column = std::move(result);
}
void executeColumnIfNeeded(ColumnWithTypeAndName & column, bool empty)
{
const auto * column_function = checkAndGetShortCircuitArgument(column.column);
if (!column_function)
return;
if (!empty)
column = column_function->reduce();
else
column.column = column_function->getResultType()->createColumn();
}
int checkShirtCircuitArguments(const ColumnsWithTypeAndName & arguments)
{
int last_short_circuit_argument_index = -1;
for (size_t i = 0; i != arguments.size(); ++i)
{
if (checkAndGetShortCircuitArgument(arguments[i].column))
last_short_circuit_argument_index = i;
}
return last_short_circuit_argument_index;
}
void copyMask(const PaddedPODArray<UInt8> & from, PaddedPODArray<UInt8> & to)
{
if (from.size() != to.size())
throw Exception("Cannot copy mask, because source and destination have different size", ErrorCodes::LOGICAL_ERROR);
if (from.empty())
return;
memcpy(to.data(), from.data(), from.size() * sizeof(*from.data()));
}
}

View File

@ -0,0 +1,73 @@
#pragma once
#include <Core/ColumnWithTypeAndName.h>
#include <Core/ColumnsWithTypeAndName.h>
#include <Core/Field.h>
#include <Common/PODArray.h>
namespace DB
{
/// Expand data by mask. After expanding data will satisfy the following: if we filter data
/// by given mask, we get initial data. In places where mask[i] = 0 we insert default value.
/// If inverted is true, we will work with inverted mask. This function is used in implementations of
/// expand() method in IColumn interface.
template <typename T>
void expandDataByMask(PaddedPODArray<T> & data, const PaddedPODArray<UInt8> & mask, bool inverted);
struct MaskInfo
{
bool has_ones;
bool has_zeros;
};
/// The next functions are used to extract UInt8 mask from a column,
/// filtered by some condition (mask). We will use value from a column
/// only when value in condition is 1. Column should satisfy the
/// condition: sum(mask) = column.size() or mask.size() = column.size().
/// You can set flag 'inverted' to use inverted values
/// from a column. You can also determine value that will be used when
/// column value is Null (argument null_value).
MaskInfo extractMask(
PaddedPODArray<UInt8> & mask,
const ColumnPtr & column,
UInt8 null_value = 0);
MaskInfo extractInvertedMask(
PaddedPODArray<UInt8> & mask,
const ColumnPtr & column,
UInt8 null_value = 0);
/// The same as extractMask, but fills
/// nulls so that nulls[i] = 1 when column[i] = Null.
MaskInfo extractMask(
PaddedPODArray<UInt8> & mask,
const ColumnPtr & column,
PaddedPODArray<UInt8> * nulls,
UInt8 null_value = 0);
MaskInfo extractInvertedMask(
PaddedPODArray<UInt8> & mask,
const ColumnPtr & column,
PaddedPODArray<UInt8> * nulls,
UInt8 null_value = 0);
/// Inplace inversion.
void inverseMask(PaddedPODArray<UInt8> & mask, MaskInfo & mask_info);
/// If given column is lazy executed argument (ColumnFunction with isShortCircuitArgument() = true),
/// filter it by mask and then reduce. If inverted is true, we will work with inverted mask.
void maskedExecute(ColumnWithTypeAndName & column, const PaddedPODArray<UInt8> & mask, const MaskInfo & mask_info);
/// If given column is lazy executed argument, reduce it. If empty is true,
/// create an empty column with the execution result type.
void executeColumnIfNeeded(ColumnWithTypeAndName & column, bool empty = false);
/// Check if arguments contain lazy executed argument. If contain, return index of the last one,
/// otherwise return -1.
int checkShirtCircuitArguments(const ColumnsWithTypeAndName & arguments);
void copyMask(const PaddedPODArray<UInt8> & from, PaddedPODArray<UInt8> & to);
}

View File

@ -35,6 +35,7 @@ SRCS(
ColumnsCommon.cpp ColumnsCommon.cpp
FilterDescription.cpp FilterDescription.cpp
IColumn.cpp IColumn.cpp
MaskOperations.cpp
getLeastSuperColumn.cpp getLeastSuperColumn.cpp
) )

View File

@ -20,7 +20,7 @@ template <typename T>
static inline void writeQuoted(const DecimalField<T> & x, WriteBuffer & buf) static inline void writeQuoted(const DecimalField<T> & x, WriteBuffer & buf)
{ {
writeChar('\'', buf); writeChar('\'', buf);
writeText(x.getValue(), x.getScale(), buf); writeText(x.getValue(), x.getScale(), buf, {});
writeChar('\'', buf); writeChar('\'', buf);
} }

View File

@ -26,7 +26,7 @@ template <typename T>
static inline void writeQuoted(const DecimalField<T> & x, WriteBuffer & buf) static inline void writeQuoted(const DecimalField<T> & x, WriteBuffer & buf)
{ {
writeChar('\'', buf); writeChar('\'', buf);
writeText(x.getValue(), x.getScale(), buf); writeText(x.getValue(), x.getScale(), buf, {});
writeChar('\'', buf); writeChar('\'', buf);
} }

View File

@ -191,10 +191,11 @@ void ThreadPoolImpl<Thread>::wait()
template <typename Thread> template <typename Thread>
ThreadPoolImpl<Thread>::~ThreadPoolImpl() ThreadPoolImpl<Thread>::~ThreadPoolImpl()
{ {
/// Note: should not use logger from here,
/// because it can be an instance of GlobalThreadPool that is a global variable
/// and the destruction order of global variables is unspecified.
finalize(); finalize();
/// wait() hadn't been called, log exception at least.
if (first_exception)
DB::tryLogException(first_exception, __PRETTY_FUNCTION__);
} }
template <typename Thread> template <typename Thread>
@ -273,21 +274,11 @@ void ThreadPoolImpl<Thread>::worker(typename std::list<Thread>::iterator thread_
} }
catch (...) catch (...)
{ {
ALLOW_ALLOCATIONS_IN_SCOPE;
/// job should be reset before decrementing scheduled_jobs to /// job should be reset before decrementing scheduled_jobs to
/// ensure that the Job destroyed before wait() returns. /// ensure that the Job destroyed before wait() returns.
job = {}; job = {};
{ {
/// In case thread pool will not be terminated on exception
/// (this is the case for GlobalThreadPool),
/// than first_exception may be overwritten and got lost,
/// and this usually is an error, since this will finish the thread,
/// and for this the caller may not be ready.
if (!shutdown_on_exception)
DB::tryLogException(std::current_exception(), __PRETTY_FUNCTION__);
std::unique_lock lock(mutex); std::unique_lock lock(mutex);
if (!first_exception) if (!first_exception)
first_exception = std::current_exception(); // NOLINT first_exception = std::current_exception(); // NOLINT

Some files were not shown because too many files have changed in this diff Show More