Merge branch 'master' of github.com:ClickHouse/ClickHouse

This commit is contained in:
listar 2021-08-21 15:58:07 +00:00
commit 66d60cbda3
295 changed files with 5561 additions and 2605 deletions

View File

@ -3,7 +3,7 @@ I hereby agree to the terms of the CLA available at: https://yandex.ru/legal/cla
Changelog category (leave one):
- New Feature
- Improvement
- Bug Fix
- Bug Fix (user-visible misbehaviour in official stable or prestable release)
- Performance Improvement
- Backward Incompatible Change
- Build/Testing/Packaging Improvement

View File

@ -1,5 +1,8 @@
### ClickHouse release v21.8, 2021-08-12
#### Upgrade Notes
* New version is using `Map` data type for system logs tables (`system.query_log`, `system.query_thread_log`, `system.processes`, `system.opentelemetry_span_log`). These tables will be auto-created with new data types. Virtual columns are created to support old queries. Closes [#18698](https://github.com/ClickHouse/ClickHouse/issues/18698). [#23934](https://github.com/ClickHouse/ClickHouse/pull/23934), [#25773](https://github.com/ClickHouse/ClickHouse/pull/25773) ([hexiaoting](https://github.com/hexiaoting), [sundy-li](https://github.com/sundy-li), [Maksim Kita](https://github.com/kitaisreal)). If you want to *downgrade* from version 21.8 to older versions, you will need to cleanup system tables with logs manually. Look at `/var/lib/clickhouse/data/system/*_log`.
#### New Features
* Add support for a part of SQL/JSON standard. [#24148](https://github.com/ClickHouse/ClickHouse/pull/24148) ([l1tsolaiki](https://github.com/l1tsolaiki), [Kseniia Sumarokova](https://github.com/kssenii)).

View File

@ -593,7 +593,23 @@ macro (add_executable target)
# disabled for TSAN and gcc since libtsan.a provides overrides too
if (TARGET clickhouse_new_delete)
# operator::new/delete for executables (MemoryTracker stuff)
target_link_libraries (${target} PRIVATE clickhouse_new_delete ${MALLOC_LIBRARIES})
target_link_libraries (${target} PRIVATE clickhouse_new_delete)
endif()
# In case of static jemalloc, because zone_register() is located in zone.c and
# is never used outside (it is declared as constructor) it is omitted
# by the linker, and so jemalloc will not be registered as system
# allocator under osx [1], and clickhouse will SIGSEGV.
#
# [1]: https://github.com/jemalloc/jemalloc/issues/708
#
# About symbol name:
# - _zone_register not zone_register due to Mach-O binary format,
# - _je_zone_register due to JEMALLOC_PRIVATE_NAMESPACE=je_ under OS X.
# - but jemalloc-cmake does not run private_namespace.sh
# so symbol name should be _zone_register
if (ENABLE_JEMALLOC AND MAKE_STATIC_LIBRARIES AND OS_DARWIN)
set_property(TARGET ${target} APPEND PROPERTY LINK_OPTIONS -u_zone_register)
endif()
endif()
endmacro()

View File

@ -152,7 +152,7 @@ namespace wide
template <size_t Bits, typename Signed>
struct integer<Bits, Signed>::_impl
{
static constexpr size_t _Bits = Bits;
static constexpr size_t _bits = Bits;
static constexpr const unsigned byte_count = Bits / 8;
static constexpr const unsigned item_count = byte_count / sizeof(base_type);
static constexpr const unsigned base_bits = sizeof(base_type) * 8;
@ -614,8 +614,8 @@ public:
else
{
static_assert(IsWideInteger<T>::value);
return std::common_type_t<integer<Bits, Signed>, integer<T::_impl::_Bits, Signed>>::_impl::operator_plus(
integer<T::_impl::_Bits, Signed>(lhs), rhs);
return std::common_type_t<integer<Bits, Signed>, integer<T::_impl::_bits, Signed>>::_impl::operator_plus(
integer<T::_impl::_bits, Signed>(lhs), rhs);
}
}
@ -632,8 +632,8 @@ public:
else
{
static_assert(IsWideInteger<T>::value);
return std::common_type_t<integer<Bits, Signed>, integer<T::_impl::_Bits, Signed>>::_impl::operator_minus(
integer<T::_impl::_Bits, Signed>(lhs), rhs);
return std::common_type_t<integer<Bits, Signed>, integer<T::_impl::_bits, Signed>>::_impl::operator_minus(
integer<T::_impl::_bits, Signed>(lhs), rhs);
}
}
@ -857,7 +857,7 @@ public:
else
{
static_assert(IsWideInteger<T>::value);
return std::common_type_t<integer<Bits, Signed>, integer<T::_impl::_Bits, Signed>>::operator_slash(T(lhs), rhs);
return std::common_type_t<integer<Bits, Signed>, integer<T::_impl::_bits, Signed>>::operator_slash(T(lhs), rhs);
}
}
@ -877,7 +877,7 @@ public:
else
{
static_assert(IsWideInteger<T>::value);
return std::common_type_t<integer<Bits, Signed>, integer<T::_impl::_Bits, Signed>>::operator_percent(T(lhs), rhs);
return std::common_type_t<integer<Bits, Signed>, integer<T::_impl::_bits, Signed>>::operator_percent(T(lhs), rhs);
}
}

View File

@ -12,6 +12,7 @@
#include <Common/SymbolIndex.h>
#include <Common/StackTrace.h>
#include <Common/getNumberOfPhysicalCPUCores.h>
#include <Core/ServerUUID.h>
#if !defined(ARCADIA_BUILD)
# include "Common/config_version.h"
@ -38,6 +39,13 @@ void setExtras()
if (!anonymize)
sentry_set_extra("server_name", sentry_value_new_string(getFQDNOrHostName().c_str()));
DB::UUID server_uuid = DB::ServerUUID::get();
if (server_uuid != DB::UUIDHelpers::Nil)
{
std::string server_uuid_str = DB::toString(server_uuid);
sentry_set_extra("server_uuid", sentry_value_new_string(server_uuid_str.c_str()));
}
sentry_set_tag("version", VERSION_STRING);
sentry_set_extra("version_githash", sentry_value_new_string(VERSION_GITHASH));
sentry_set_extra("version_describe", sentry_value_new_string(VERSION_DESCRIBE));

View File

@ -1,5 +1,4 @@
#include <sys/auxv.h>
#include "atomic.h"
#include <unistd.h> // __environ
#include <errno.h>
@ -18,7 +17,18 @@ static size_t __find_auxv(unsigned long type)
return (size_t) -1;
}
unsigned long __getauxval(unsigned long type)
__attribute__((constructor)) static void __auxv_init()
{
size_t i;
for (i = 0; __environ[i]; i++);
__auxv = (unsigned long *) (__environ + i + 1);
size_t secure_idx = __find_auxv(AT_SECURE);
if (secure_idx != ((size_t) -1))
__auxv_secure = __auxv[secure_idx];
}
unsigned long getauxval(unsigned long type)
{
if (type == AT_SECURE)
return __auxv_secure;
@ -33,38 +43,3 @@ unsigned long __getauxval(unsigned long type)
errno = ENOENT;
return 0;
}
static void * volatile getauxval_func;
static unsigned long __auxv_init(unsigned long type)
{
if (!__environ)
{
// __environ is not initialized yet so we can't initialize __auxv right now.
// That's normally occurred only when getauxval() is called from some sanitizer's internal code.
errno = ENOENT;
return 0;
}
// Initialize __auxv and __auxv_secure.
size_t i;
for (i = 0; __environ[i]; i++);
__auxv = (unsigned long *) (__environ + i + 1);
size_t secure_idx = __find_auxv(AT_SECURE);
if (secure_idx != ((size_t) -1))
__auxv_secure = __auxv[secure_idx];
// Now we've initialized __auxv, next time getauxval() will only call __get_auxval().
a_cas_p(&getauxval_func, (void *)__auxv_init, (void *)__getauxval);
return __getauxval(type);
}
// First time getauxval() will call __auxv_init().
static void * volatile getauxval_func = (void *)__auxv_init;
unsigned long getauxval(unsigned long type)
{
return ((unsigned long (*)(unsigned long))getauxval_func)(type);
}

View File

@ -2,11 +2,11 @@
# NOTE: has nothing common with DBMS_TCP_PROTOCOL_VERSION,
# only DBMS_TCP_PROTOCOL_VERSION should be incremented on protocol changes.
SET(VERSION_REVISION 54454)
SET(VERSION_REVISION 54455)
SET(VERSION_MAJOR 21)
SET(VERSION_MINOR 9)
SET(VERSION_MINOR 10)
SET(VERSION_PATCH 1)
SET(VERSION_GITHASH f063e44131a048ba2d9af8075f03700fd5ec3e69)
SET(VERSION_DESCRIBE v21.9.1.7770-prestable)
SET(VERSION_STRING 21.9.1.7770)
SET(VERSION_GITHASH 09df5018f95edcd0f759d4689ac5d029dd400c2a)
SET(VERSION_DESCRIBE v21.10.1.1-testing)
SET(VERSION_STRING 21.10.1.1)
# end of autochange

View File

@ -1,9 +1,10 @@
# Disabled under OSX until https://github.com/ClickHouse/ClickHouse/issues/27568 is fixed
if (SANITIZE OR NOT (
((OS_LINUX OR OS_FREEBSD) AND (ARCH_AMD64 OR ARCH_ARM OR ARCH_PPC64LE))))
((OS_LINUX OR OS_FREEBSD) AND (ARCH_AMD64 OR ARCH_ARM OR ARCH_PPC64LE)) OR
(OS_DARWIN AND (CMAKE_BUILD_TYPE STREQUAL "RelWithDebInfo" OR CMAKE_BUILD_TYPE STREQUAL "Debug"))
))
if (ENABLE_JEMALLOC)
message (${RECONFIGURE_MESSAGE_LEVEL}
"jemalloc is disabled implicitly: it doesn't work with sanitizers and can only be used with x86_64, aarch64, or ppc64le Linux or FreeBSD builds")
"jemalloc is disabled implicitly: it doesn't work with sanitizers and can only be used with x86_64, aarch64, or ppc64le Linux or FreeBSD builds and RelWithDebInfo macOS builds.")
endif ()
set (ENABLE_JEMALLOC OFF)
else ()
@ -138,9 +139,5 @@ target_compile_options(jemalloc PRIVATE -Wno-redundant-decls)
target_compile_options(jemalloc PRIVATE -D_GNU_SOURCE)
set_property(TARGET jemalloc APPEND PROPERTY INTERFACE_COMPILE_DEFINITIONS USE_JEMALLOC=1)
if (MAKE_STATIC_LIBRARIES)
# To detect whether we need to register jemalloc for osx as default zone.
set_property(TARGET jemalloc APPEND PROPERTY INTERFACE_COMPILE_DEFINITIONS BUNDLED_STATIC_JEMALLOC=1)
endif()
message (STATUS "Using jemalloc")

2
contrib/librdkafka vendored

@ -1 +1 @@
Subproject commit 43491d33ca2826531d1e3cae70d4bf1e5249e3c9
Subproject commit b8554f1682062c85ba519eb54ef2f90e02b812cb

4
debian/changelog vendored
View File

@ -1,5 +1,5 @@
clickhouse (21.9.1.1) unstable; urgency=low
clickhouse (21.10.1.1) unstable; urgency=low
* Modified source code
-- clickhouse-release <clickhouse-release@yandex-team.ru> Sat, 10 Jul 2021 08:22:49 +0300
-- clickhouse-release <clickhouse-release@yandex-team.ru> Sat, 17 Jul 2021 08:45:03 +0300

View File

@ -1,7 +1,7 @@
FROM ubuntu:18.04
ARG repository="deb https://repo.clickhouse.tech/deb/stable/ main/"
ARG version=21.9.1.*
ARG version=21.10.1.*
RUN apt-get update \
&& apt-get install --yes --no-install-recommends \

View File

@ -173,6 +173,9 @@ def parse_env_variables(build_type, compiler, sanitizer, package_type, image_typ
cmake_flags.append('-DUSE_GTEST=1')
cmake_flags.append('-DENABLE_TESTS=1')
cmake_flags.append('-DENABLE_EXAMPLES=1')
cmake_flags.append('-DENABLE_FUZZING=1')
# For fuzzing needs
cmake_flags.append('-DUSE_YAML_CPP=1')
# Don't stop on first error to find more clang-tidy errors in one run.
result.append('NINJA_FLAGS=-k0')

View File

@ -1,7 +1,7 @@
FROM ubuntu:20.04
ARG repository="deb https://repo.clickhouse.tech/deb/stable/ main/"
ARG version=21.9.1.*
ARG version=21.10.1.*
ARG gosu_ver=1.10
# set non-empty deb_location_url url to create a docker image

View File

@ -1,7 +1,7 @@
FROM ubuntu:18.04
ARG repository="deb https://repo.clickhouse.tech/deb/stable/ main/"
ARG version=21.9.1.*
ARG version=21.10.1.*
RUN apt-get update && \
apt-get install -y apt-transport-https dirmngr && \

View File

@ -628,9 +628,6 @@ cat analyze/errors.log >> report/errors.log ||:
cat profile-errors.log >> report/errors.log ||:
clickhouse-local --query "
-- We use decimals specifically to get fixed-point, fixed-width formatting.
set output_format_decimal_trailing_zeros = 1;
create view query_display_names as select * from
file('analyze/query-display-names.tsv', TSV,
'test text, query_index int, query_display_name text')
@ -644,6 +641,7 @@ create view partial_query_times as select * from
-- Report for partial queries that we could only run on the new server (e.g.
-- queries with new functions added in the tested PR).
create table partial_queries_report engine File(TSV, 'report/partial-queries-report.tsv')
settings output_format_decimal_trailing_zeros = 1
as select toDecimal64(time_median, 3) time,
toDecimal64(time_stddev / time_median, 3) relative_time_stddev,
test, query_index, query_display_name
@ -716,8 +714,9 @@ create table queries engine File(TSVWithNamesAndTypes, 'report/queries.tsv')
order by test, query_index, metric_name
;
create table changed_perf_report engine File(TSV, 'report/changed-perf.tsv') as
with
create table changed_perf_report engine File(TSV, 'report/changed-perf.tsv')
settings output_format_decimal_trailing_zeros = 1
as with
-- server_time is sometimes reported as zero (if it's less than 1 ms),
-- so we have to work around this to not get an error about conversion
-- of NaN to decimal.
@ -733,8 +732,9 @@ create table changed_perf_report engine File(TSV, 'report/changed-perf.tsv') as
changed_fail, test, query_index, query_display_name
from queries where changed_show order by abs(diff) desc;
create table unstable_queries_report engine File(TSV, 'report/unstable-queries.tsv') as
select
create table unstable_queries_report engine File(TSV, 'report/unstable-queries.tsv')
settings output_format_decimal_trailing_zeros = 1
as select
toDecimal64(left, 3), toDecimal64(right, 3), toDecimal64(diff, 3),
toDecimal64(stat_threshold, 3), unstable_fail, test, query_index, query_display_name
from queries where unstable_show order by stat_threshold desc;
@ -764,8 +764,9 @@ create view total_speedup as
from test_speedup
;
create table test_perf_changes_report engine File(TSV, 'report/test-perf-changes.tsv') as
with
create table test_perf_changes_report engine File(TSV, 'report/test-perf-changes.tsv')
settings output_format_decimal_trailing_zeros = 1
as with
(times_speedup >= 1
? '-' || toString(toDecimal64(times_speedup, 3)) || 'x'
: '+' || toString(toDecimal64(1 / times_speedup, 3)) || 'x')
@ -791,8 +792,9 @@ create view total_client_time_per_query as select *
from file('analyze/client-times.tsv', TSV,
'test text, query_index int, client float, server float');
create table slow_on_client_report engine File(TSV, 'report/slow-on-client.tsv') as
select client, server, toDecimal64(client/server, 3) p,
create table slow_on_client_report engine File(TSV, 'report/slow-on-client.tsv')
settings output_format_decimal_trailing_zeros = 1
as select client, server, toDecimal64(client/server, 3) p,
test, query_display_name
from total_client_time_per_query left join query_display_names using (test, query_index)
where p > toDecimal64(1.02, 3) order by p desc;
@ -877,8 +879,9 @@ create view test_times_view_total as
from test_times_view
;
create table test_times_report engine File(TSV, 'report/test-times.tsv') as
select
create table test_times_report engine File(TSV, 'report/test-times.tsv')
settings output_format_decimal_trailing_zeros = 1
as select
test,
toDecimal64(real, 3),
toDecimal64(total_client_time, 3),
@ -896,8 +899,9 @@ create table test_times_report engine File(TSV, 'report/test-times.tsv') as
;
-- report for all queries page, only main metric
create table all_tests_report engine File(TSV, 'report/all-queries.tsv') as
with
create table all_tests_report engine File(TSV, 'report/all-queries.tsv')
settings output_format_decimal_trailing_zeros = 1
as with
-- server_time is sometimes reported as zero (if it's less than 1 ms),
-- so we have to work around this to not get an error about conversion
-- of NaN to decimal.
@ -978,9 +982,6 @@ for version in {right,left}
do
rm -rf data
clickhouse-local --query "
-- We use decimals specifically to get fixed-point, fixed-width formatting.
set output_format_decimal_trailing_zeros = 1;
create view query_profiles as
with 0 as left, 1 as right
select * from file('analyze/query-profiles.tsv', TSV,
@ -1063,9 +1064,10 @@ create table unstable_run_traces engine File(TSVWithNamesAndTypes,
;
create table metric_devation engine File(TSVWithNamesAndTypes,
'report/metric-deviation.$version.tsv') as
'report/metric-deviation.$version.tsv')
settings output_format_decimal_trailing_zeros = 1
-- first goes the key used to split the file with grep
select test, query_index, query_display_name,
as select test, query_index, query_display_name,
toDecimal64(d, 3) d, q, metric
from (
select
@ -1176,9 +1178,6 @@ rm -rf metrics ||:
mkdir metrics
clickhouse-local --query "
-- We use decimals specifically to get fixed-point, fixed-width formatting.
set output_format_decimal_trailing_zeros = 1;
create view right_async_metric_log as
select * from file('right-async-metric-log.tsv', TSVWithNamesAndTypes,
'$(cat right-async-metric-log.tsv.columns)')
@ -1196,8 +1195,9 @@ create table metrics engine File(TSV, 'metrics/metrics.tsv') as
;
-- Show metrics that have changed
create table changes engine File(TSV, 'metrics/changes.tsv') as
select metric, left, right,
create table changes engine File(TSV, 'metrics/changes.tsv')
settings output_format_decimal_trailing_zeros = 1
as select metric, left, right,
toDecimal64(diff, 3), toDecimal64(times_diff, 3)
from (
select metric, median(left) as left, median(right) as right,

View File

@ -13,7 +13,7 @@ left_sha=$2
# right_pr=$3 not used for now
right_sha=$4
datasets=${CHPC_DATASETS:-"hits1 hits10 hits100 values"}
datasets=${CHPC_DATASETS-"hits1 hits10 hits100 values"}
declare -A dataset_paths
dataset_paths["hits10"]="https://s3.mds.yandex.net/clickhouse-private-datasets/hits_10m_single/partitions/hits_10m_single.tar"

View File

@ -127,6 +127,15 @@ export PATH
export REF_PR
export REF_SHA
# Try to collect some core dumps. I've seen two patterns in Sandbox:
# 1) |/home/zomb-sandbox/venv/bin/python /home/zomb-sandbox/client/sandbox/bin/coredumper.py %e %p %g %u %s %P %c
# Not sure what this script does (puts them to sandbox resources, logs some messages?),
# and it's not accessible from inside docker anyway.
# 2) something like %e.%p.core.dmp. The dump should end up in the workspace directory.
# At least we remove the ulimit and then try to pack some common file names into output.
ulimit -c unlimited
cat /proc/sys/kernel/core_pattern
# Start the main comparison script.
{ \
time ../download.sh "$REF_PR" "$REF_SHA" "$PR_TO_TEST" "$SHA_TO_TEST" && \
@ -144,8 +153,11 @@ done
dmesg -T > dmesg.log
ls -lath
7z a '-x!*/tmp' /output/output.7z ./*.{log,tsv,html,txt,rep,svg,columns} \
{right,left}/{performance,scripts} {{right,left}/db,db0}/preprocessed_configs \
report analyze benchmark metrics
report analyze benchmark metrics \
./*.core.dmp ./*.core
cp compare.log /output

View File

@ -137,7 +137,7 @@ CREATE TABLE table_name
) ENGINE = ReplicatedReplacingMergeTree('/clickhouse/tables/{layer}-{shard}/table_name', '{replica}', ver)
PARTITION BY toYYYYMM(EventDate)
ORDER BY (CounterID, EventDate, intHash32(UserID))
SAMPLE BY intHash32(UserID)
SAMPLE BY intHash32(UserID);
```
<details markdown="1">
@ -150,12 +150,12 @@ CREATE TABLE table_name
EventDate DateTime,
CounterID UInt32,
UserID UInt32
) ENGINE = ReplicatedMergeTree('/clickhouse/tables/{layer}-{shard}/table_name', '{replica}', EventDate, intHash32(UserID), (CounterID, EventDate, intHash32(UserID), EventTime), 8192)
) ENGINE = ReplicatedMergeTree('/clickhouse/tables/{layer}-{shard}/table_name', '{replica}', EventDate, intHash32(UserID), (CounterID, EventDate, intHash32(UserID), EventTime), 8192);
```
</details>
As the example shows, these parameters can contain substitutions in curly brackets. The substituted values are taken from the «[macros](../../../operations/server-configuration-parameters/settings/#macros) section of the configuration file.
As the example shows, these parameters can contain substitutions in curly brackets. The substituted values are taken from the [macros](../../../operations/server-configuration-parameters/settings.md#macros) section of the configuration file.
Example:

View File

@ -56,6 +56,9 @@ The same thing happens if the subordinate table does not exist when the buffer i
If you need to run ALTER for a subordinate table, and the Buffer table, we recommend first deleting the Buffer table, running ALTER for the subordinate table, then creating the Buffer table again.
!!! attention "Attention"
Running ALTER on the Buffer table in releases made before 28 Sep 2020 will cause a `Block structure mismatch` error (see [#15117](https://github.com/ClickHouse/ClickHouse/issues/15117)), so deleting the Buffer table and then recreating is the only option. It is advisable to check that this error is fixed in your release before trying to run ALTER on the Buffer table.
If the server is restarted abnormally, the data in the buffer is lost.
`FINAL` and `SAMPLE` do not work correctly for Buffer tables. These conditions are passed to the destination table, but are not used for processing data in the buffer. If these features are required we recommend only using the Buffer table for writing, while reading from the destination table.

View File

@ -105,7 +105,7 @@ We use `Decimal` data type to store prices. Everything else is quite straightfor
## Import Data
Upload data into ClickHouse in parallel:
Upload data into ClickHouse:
```
clickhouse-client --format_csv_allow_single_quotes 0 --input_format_null_as_default 0 --query "INSERT INTO dish FORMAT CSVWithNames" < Dish.csv

View File

@ -114,5 +114,5 @@ Seamlessly migration from ZooKeeper to `clickhouse-keeper` is impossible you hav
clickhouse-keeper-converter --zookeeper-logs-dir /var/lib/zookeeper/version-2 --zookeeper-snapshots-dir /var/lib/zookeeper/version-2 --output-dir /path/to/clickhouse/keeper/snapshots
```
4. Copy snapshot to `clickhouse-server` nodes with configured `keeper` or start `clickhouse-keeper` instead of ZooKeeper. Snapshot must persist only on leader node, leader will sync it automatically to other nodes.
4. Copy snapshot to `clickhouse-server` nodes with configured `keeper` or start `clickhouse-keeper` instead of ZooKeeper. Snapshot must persist on all nodes, otherwise empty nodes can be faster and one of them can becamse leader.

View File

@ -18,6 +18,18 @@ Some settings specified in the main configuration file can be overridden in othe
- If `replace` is specified, it replaces the entire element with the specified one.
- If `remove` is specified, it deletes the element.
You can also declare attributes as coming from environment variables by using `from_env="VARIABLE_NAME"`:
```xml
<yandex>
<macros>
<replica from_env="REPLICA" />
<layer from_env="LAYER" />
<shard from_env="SHARD" />
</macros>
</yandex>
```
## Substitution {#substitution}
The config can also define “substitutions”. If an element has the `incl` attribute, the corresponding substitution from the file will be used as the value. By default, the path to the file with substitutions is `/etc/metrika.xml`. This can be changed in the [include_from](../operations/server-configuration-parameters/settings.md#server_configuration_parameters-include_from) element in the server config. The substitution values are specified in `/yandex/substitution_name` elements in this file. If a substitution specified in `incl` does not exist, it is recorded in the log. To prevent ClickHouse from logging missing substitutions, specify the `optional="true"` attribute (for example, settings for [macros](../operations/server-configuration-parameters/settings.md)).

View File

@ -486,7 +486,7 @@ Parameter substitutions for replicated tables.
Can be omitted if replicated tables are not used.
For more information, see the section [Creating replicated tables](../../engines/table-engines/mergetree-family/replication.md).
For more information, see the section [Creating replicated tables](../../engines/table-engines/mergetree-family/replication.md#creating-replicated-tables).
**Example**

View File

@ -4,7 +4,7 @@ Contains information about [trace spans](https://opentracing.io/docs/overview/sp
Columns:
- `trace_id` ([UUID](../../sql-reference/data-types/uuid.md) — ID of the trace for executed query.
- `trace_id` ([UUID](../../sql-reference/data-types/uuid.md)) — ID of the trace for executed query.
- `span_id` ([UInt64](../../sql-reference/data-types/int-uint.md)) — ID of the `trace span`.

View File

@ -0,0 +1,129 @@
# system.zookeeper_log {#system-zookeeper_log}
This table contains information about the parameters of the request to the ZooKeeper server and the response from it.
For requests, only columns with request parameters are filled in, and the remaining columns are filled with default values (`0` or `NULL`). When the response arrives, the data from the response is added to the other columns.
Columns with request parameters:
- `type` ([Enum](../../sql-reference/data-types/enum.md)) — Event type in the ZooKeeper client. Can have one of the following values:
- `Request` — The request has been sent.
- `Response` — The response was received.
- `Finalize` — The connection is lost, no response was received.
- `event_date` ([Date](../../sql-reference/data-types/date.md)) — The date when the event happened.
- `event_time` ([DateTime64](../../sql-reference/data-types/datetime64.md)) — The date and time when the event happened.
- `address` ([IPv6](../../sql-reference/data-types/domains/ipv6.md)) — IP address of ZooKeeper server that was used to make the request.
- `port` ([UInt16](../../sql-reference/data-types/int-uint.md)) — The port of ZooKeeper server that was used to make the request.
- `session_id` ([Int64](../../sql-reference/data-types/int-uint.md)) — The session ID that the ZooKeeper server sets for each connection.
- `xid` ([Int32](../../sql-reference/data-types/int-uint.md)) — The ID of the request within the session. This is usually a sequential request number. It is the same for the request row and the paired `response`/`finalize` row.
- `has_watch` ([UInt8](../../sql-reference/data-types/int-uint.md)) — The request whether the [watch](https://zookeeper.apache.org/doc/r3.3.3/zookeeperProgrammers.html#ch_zkWatches) has been set.
- `op_num` ([Enum](../../sql-reference/data-types/enum.md)) — The type of request or response.
- `path` ([String](../../sql-reference/data-types/string.md)) — The path to the ZooKeeper node specified in the request, or an empty string if the request not requires specifying a path.
- `data` ([String](../../sql-reference/data-types/string.md)) — The data written to the ZooKeeper node (for the `SET` and `CREATE` requests — what the request wanted to write, for the response to the `GET` request — what was read) or an empty string.
- `is_ephemeral` ([UInt8](../../sql-reference/data-types/int-uint.md)) — Is the ZooKeeper node being created as an [ephemeral](https://zookeeper.apache.org/doc/r3.3.3/zookeeperProgrammers.html#Ephemeral+Nodes).
- `is_sequential` ([UInt8](../../sql-reference/data-types/int-uint.md)) — Is the ZooKeeper node being created as an [sequential](https://zookeeper.apache.org/doc/r3.3.3/zookeeperProgrammers.html#Sequence+Nodes+--+Unique+Naming).
- `version` ([Nullable(Int32)](../../sql-reference/data-types/nullable.md)) — The version of the ZooKeeper node that the request expects when executing. This is supported for `CHECK`, `SET`, `REMOVE` requests (is relevant `-1` if the request does not check the version or `NULL` for other requests that do not support version checking).
- `requests_size` ([UInt32](../../sql-reference/data-types/int-uint.md)) — The number of requests included in the multi request (this is a special request that consists of several consecutive ordinary requests and executes them atomically). All requests included in multi request will have the same `xid`.
- `request_idx` ([UInt32](../../sql-reference/data-types/int-uint.md)) — The number of the request included in multi request (for multi request — `0`, then in order from `1`).
Columns with request response parameters:
- `zxid` ([Int64](../../sql-reference/data-types/int-uint.md)) — ZooKeeper transaction ID. The serial number issued by the ZooKeeper server in response to a successfully executed request (`0` if the request was not executed/returned an error/the client does not know whether the request was executed).
- `error` ([Nullable(Enum)](../../sql-reference/data-types/nullable.md)) — Error code. Can have many values, here are just some of them:
- `ZOK` — The request was executed seccessfully.
- `ZCONNECTIONLOSS` — The connection was lost.
- `ZOPERATIONTIMEOUT` — The request execution timeout has expired.
- `ZSESSIONEXPIRED` — The session has expired.
- `NULL` — The request is completed.
- `watch_type` ([Nullable(Enum)](../../sql-reference/data-types/nullable.md)) — The type of the `watch` event (for responses with `op_num` = `Watch`), for the remaining responses: `NULL`.
- `watch_state` ([Nullable(Enum)](../../sql-reference/data-types/nullable.md)) — The status of the `watch` event (for responses with `op_num` = `Watch`), for the remaining responses: `NULL`.
- `path_created` ([String](../../sql-reference/data-types/string.md)) — The path to the created ZooKeeper node (for responses to the `CREATE` request), may differ from the `path` if the node is created as a `sequential`.
- `stat_czxid` ([Int64](../../sql-reference/data-types/int-uint.md)) — The `zxid` of the change that caused this ZooKeeper node to be created.
- `stat_mzxid` ([Int64](../../sql-reference/data-types/int-uint.md)) — The `zxid` of the change that last modified this ZooKeeper node.
- `stat_pzxid` ([Int64](../../sql-reference/data-types/int-uint.md)) — The transaction ID of the change that last modified childern of this ZooKeeper node.
- `stat_version` ([Int32](../../sql-reference/data-types/int-uint.md)) — The number of changes to the data of this ZooKeeper node.
- `stat_cversion` ([Int32](../../sql-reference/data-types/int-uint.md)) — The number of changes to the children of this ZooKeeper node.
- `stat_dataLength` ([Int32](../../sql-reference/data-types/int-uint.md)) — The length of the data field of this ZooKeeper node.
- `stat_numChildren` ([Int32](../../sql-reference/data-types/int-uint.md)) — The number of children of this ZooKeeper node.
- `children` ([Array(String)](../../sql-reference/data-types/array.md)) — The list of child ZooKeeper nodes (for responses to `LIST` request).
**Example**
Query:
``` sql
SELECT * FROM system.zookeeper_log WHERE (session_id = '106662742089334927') AND (xid = '10858') FORMAT Vertical;
```
Result:
``` text
Row 1:
──────
type: Request
event_date: 2021-08-09
event_time: 2021-08-09 21:38:30.291792
address: ::
port: 2181
session_id: 106662742089334927
xid: 10858
has_watch: 1
op_num: List
path: /clickhouse/task_queue/ddl
data:
is_ephemeral: 0
is_sequential: 0
version: ᴺᵁᴸᴸ
requests_size: 0
request_idx: 0
zxid: 0
error: ᴺᵁᴸᴸ
watch_type: ᴺᵁᴸᴸ
watch_state: ᴺᵁᴸᴸ
path_created:
stat_czxid: 0
stat_mzxid: 0
stat_pzxid: 0
stat_version: 0
stat_cversion: 0
stat_dataLength: 0
stat_numChildren: 0
children: []
Row 2:
──────
type: Response
event_date: 2021-08-09
event_time: 2021-08-09 21:38:30.292086
address: ::
port: 2181
session_id: 106662742089334927
xid: 10858
has_watch: 1
op_num: List
path: /clickhouse/task_queue/ddl
data:
is_ephemeral: 0
is_sequential: 0
version: ᴺᵁᴸᴸ
requests_size: 0
request_idx: 0
zxid: 16926267
error: ZOK
watch_type: ᴺᵁᴸᴸ
watch_state: ᴺᵁᴸᴸ
path_created:
stat_czxid: 16925469
stat_mzxid: 16925469
stat_pzxid: 16926179
stat_version: 0
stat_cversion: 7
stat_dataLength: 0
stat_numChildren: 7
children: ['query-0000000006','query-0000000005','query-0000000004','query-0000000003','query-0000000002','query-0000000001','query-0000000000']
```
**See Also**
- [ZooKeeper](../../operations/tips.md#zookeeper)
- [ZooKeeper guide](https://zookeeper.apache.org/doc/r3.3.3/zookeeperProgrammers.html)

View File

@ -197,7 +197,7 @@ Result:
## h3ToGeo {#h3togeo}
Returns `(lon, lat)` that corresponds to the provided H3 index.
Returns the geographical coordinates of longitude and latitude corresponding to the provided [H3](#h3index) index.
**Syntax**
@ -207,20 +207,18 @@ h3ToGeo(h3Index)
**Arguments**
- `h3Index` — H3 Index. Type: [UInt64](../../../sql-reference/data-types/int-uint.md).
- `h3Index` — H3 Index. [UInt64](../../../sql-reference/data-types/int-uint.md).
**Returned values**
- `lon` — Longitude. Type: [Float64](../../../sql-reference/data-types/float.md).
- `lat` — Latitude. Type: [Float64](../../../sql-reference/data-types/float.md).
- A tuple consisting of two values: `tuple(lon,lat)`. `lon` — Longitude. [Float64](../../../sql-reference/data-types/float.md). `lat` — Latitude. [Float64](../../../sql-reference/data-types/float.md).
**Example**
Query:
``` sql
SELECT h3ToGeo(644325524701193974) coordinates;
SELECT h3ToGeo(644325524701193974) AS coordinates;
```
Result:
@ -230,6 +228,7 @@ Result:
│ (37.79506616830252,55.71290243145668) │
└───────────────────────────────────────┘
```
## h3kRing {#h3kring}
Lists all the [H3](#h3index) hexagons in the raduis of `k` from the given hexagon in random order.

View File

@ -1339,3 +1339,149 @@ Result:
│ 2,"good" │
└───────────────────────────────────────────┘
```
## snowflakeToDateTime {#snowflakeToDateTime}
Extract time from snowflake id as DateTime format.
**Syntax**
``` sql
snowflakeToDateTime(value [, time_zone])
```
**Parameters**
- `value``snowflake id`, Int64 value.
- `time_zone` — [Timezone](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-timezone). The function parses `time_string` according to the timezone. Optional. [String](../../sql-reference/data-types/string.md).
**Returned value**
- value converted to the `DateTime` data type.
**Example**
Query:
``` sql
SELECT snowflakeToDateTime(CAST('1426860702823350272', 'Int64'), 'UTC');
```
Result:
``` text
┌─snowflakeToDateTime(CAST('1426860702823350272', 'Int64'), 'UTC')─┐
│ 2021-08-15 10:57:56 │
└──────────────────────────────────────────────────────────────────┘
```
## snowflakeToDateTime64 {#snowflakeToDateTime64}
Extract time from snowflake id as DateTime64 format.
**Syntax**
``` sql
snowflakeToDateTime64(value [, time_zone])
```
**Parameters**
- `value``snowflake id`, Int64 value.
- `time_zone` — [Timezone](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-timezone). The function parses `time_string` according to the timezone. Optional. [String](../../sql-reference/data-types/string.md).
**Returned value**
- value converted to the `DateTime64` data type.
**Example**
Query:
``` sql
SELECT snowflakeToDateTime64(CAST('1426860802823350272', 'Int64'), 'UTC');
```
Result:
``` text
┌─snowflakeToDateTime64(CAST('1426860802823350272', 'Int64'), 'UTC')─┐
│ 2021-08-15 10:58:19.841 │
└────────────────────────────────────────────────────────────────────┘
```
## dateTimeToSnowflake {#dateTimeToSnowflake}
Convert DateTime to the first snowflake id at the giving time.
**Syntax**
``` sql
dateTimeToSnowflake(value)
```
**Parameters**
- `value` — Date and time. [DateTime](../../sql-reference/data-types/datetime.md).
**Returned value**
- `value` converted to the `Int64` data type as the first snowflake id at that time.
**Example**
Query:
``` sql
WITH toDateTime('2021-08-15 18:57:56', 'Asia/Shanghai') AS dt
SELECT dateTimeToSnowflake(dt);
```
Result:
``` text
┌─dateTimeToSnowflake(dt)─┐
│ 1426860702823350272 │
└─────────────────────────┘
```
## dateTime64ToSnowflake {#dateTime64ToSnowflake}
Convert DateTime64 to the first snowflake id at the giving time.
**Syntax**
``` sql
dateTime64ToSnowflake(value)
```
**Parameters**
- `value` — Date and time. [DateTime64](../../sql-reference/data-types/datetime64.md).
**Returned value**
- `value` converted to the `Int64` data type as the first snowflake id at that time.
**Example**
Query:
``` sql
WITH toDateTime64('2021-08-15 18:57:56.492', 3, 'Asia/Shanghai') AS dt64
SELECT dateTime64ToSnowflake(dt64);
```
Result:
``` text
┌─dateTime64ToSnowflake(dt64)─┐
│ 1426860704886947840 │
└─────────────────────────────┘
```

View File

@ -311,12 +311,12 @@ One may execute query after:
- Individual replica path `/replicas/replica_name/` loss.
Replica attaches locally found parts and sends info about them to Zookeeper.
Parts present on replica before metadata loss are not re-fetched from other replicas if not being outdated
(so replica restoration does not mean re-downloading all data over the network).
Parts present on a replica before metadata loss are not re-fetched from other ones if not being outdated (so replica restoration does not mean re-downloading all data over the network).
Caveat: parts in all states are moved to `detached/` folder. Parts active before data loss (Committed) are attached.
!!! warning "Warning"
Parts in all states are moved to `detached/` folder. Parts active before data loss (committed) are attached.
#### Syntax
**Syntax**
```sql
SYSTEM RESTORE REPLICA [db.]replicated_merge_tree_family_table_name [ON CLUSTER cluster_name]
@ -328,11 +328,11 @@ Alternative syntax:
SYSTEM RESTORE REPLICA [ON CLUSTER cluster_name] [db.]replicated_merge_tree_family_table_name
```
#### Example
**Example**
Creating a table on multiple servers. After the replica's metadata in ZooKeeper is lost, the table will attach as read-only as metadata is missing. The last query needs to execute on every replica.
```sql
-- Creating table on multiple servers
CREATE TABLE test(n UInt32)
ENGINE = ReplicatedMergeTree('/clickhouse/tables/test/', '{replica}')
ORDER BY n PARTITION BY n % 10;
@ -341,8 +341,14 @@ INSERT INTO test SELECT * FROM numbers(1000);
-- zookeeper_delete_path("/clickhouse/tables/test", recursive=True) <- root loss.
SYSTEM RESTART REPLICA test; -- Table will attach as readonly as metadata is missing.
SYSTEM RESTORE REPLICA test; -- Need to execute on every replica, another way: RESTORE REPLICA test ON CLUSTER cluster
SYSTEM RESTART REPLICA test;
SYSTEM RESTORE REPLICA test;
```
Another way:
```sql
SYSTEM RESTORE REPLICA test ON CLUSTER cluster;
```
### RESTART REPLICAS {#query_language-system-restart-replicas}

View File

@ -6,12 +6,13 @@ toc_title: cluster
# cluster, clusterAllReplicas {#cluster-clusterallreplicas}
Allows to access all shards in an existing cluster which configured in `remote_servers` section without creating a [Distributed](../../engines/table-engines/special/distributed.md) table. One replica of each shard is queried.
`clusterAllReplicas` - same as `cluster` but all replicas are queried. Each replica in a cluster is used as separate shard/connection.
`clusterAllReplicas` function — same as `cluster`, but all replicas are queried. Each replica in a cluster is used as a separate shard/connection.
!!! note "Note"
All available clusters are listed in the `system.clusters` table.
All available clusters are listed in the [system.clusters](../../operations/system-tables/clusters.md) table.
Signatures:
**Syntax**
``` sql
cluster('cluster_name', db.table[, sharding_key])
@ -19,10 +20,27 @@ cluster('cluster_name', db, table[, sharding_key])
clusterAllReplicas('cluster_name', db.table[, sharding_key])
clusterAllReplicas('cluster_name', db, table[, sharding_key])
```
**Arguments**
`cluster_name` Name of a cluster that is used to build a set of addresses and connection parameters to remote and local servers.
- `cluster_name` Name of a cluster that is used to build a set of addresses and connection parameters to remote and local servers.
- `db.table` or `db`, `table` - Name of a database and a table.
- `sharding_key` - A sharding key. Optional. Needs to be specified if the cluster has more than one shard.
`sharding_key` - When insert into cluster function with more than one shard, sharding_key need to be provided.
**Returned value**
The dataset from clusters.
**Using Macros**
`cluster_name` can contain macros — substitution in curly brackets. The substituted value is taken from the [macros](../../operations/server-configuration-parameters/settings.md#macros) section of the server configuration file.
Example:
```sql
SELECT * FROM cluster('{cluster}', default.example_table);
```
**Usage and Recommendations**
Using the `cluster` and `clusterAllReplicas` table functions are less efficient than creating a `Distributed` table because in this case, the server connection is re-established for every request. When processing a large number of queries, please always create the `Distributed` table ahead of time, and do not use the `cluster` and `clusterAllReplicas` table functions.

View File

@ -102,7 +102,7 @@ CREATE TABLE table_name
) ENGINE = ReplicatedReplacingMergeTree('/clickhouse/tables/{layer}-{shard}/table_name', '{replica}', ver)
PARTITION BY toYYYYMM(EventDate)
ORDER BY (CounterID, EventDate, intHash32(UserID))
SAMPLE BY intHash32(UserID)
SAMPLE BY intHash32(UserID);
```
<details markdown="1">
@ -115,12 +115,12 @@ CREATE TABLE table_name
EventDate DateTime,
CounterID UInt32,
UserID UInt32
) ENGINE = ReplicatedMergeTree('/clickhouse/tables/{layer}-{shard}/table_name', '{replica}', EventDate, intHash32(UserID), (CounterID, EventDate, intHash32(UserID), EventTime), 8192)
) ENGINE = ReplicatedMergeTree('/clickhouse/tables/{layer}-{shard}/table_name', '{replica}', EventDate, intHash32(UserID), (CounterID, EventDate, intHash32(UserID), EventTime), 8192);
```
</details>
Как видно в примере, эти параметры могут содержать подстановки в фигурных скобках. Подставляемые значения достаются из конфигурационного файла, из секции «[macros](../../../operations/server-configuration-parameters/settings/#macros)».
Как видно в примере, эти параметры могут содержать подстановки в фигурных скобках. Эти подстановки заменяются на соответствующие значения из конфигурационного файла, из секции [macros](../../../operations/server-configuration-parameters/settings.md#macros).
Пример:

View File

@ -48,7 +48,10 @@ CREATE TABLE merge.hits_buffer AS merge.hits ENGINE = Buffer(merge, hits, 16, 10
Если у одного из столбцов таблицы Buffer и подчинённой таблицы не совпадает тип, то в лог сервера будет записано сообщение об ошибке и буфер будет очищен.
То же самое происходит, если подчинённая таблица не существует в момент сброса буфера.
Если есть необходимость выполнить ALTER для подчинённой таблицы и для таблицы Buffer, то рекомендуется удалить таблицу Buffer, затем выполнить ALTER подчинённой таблицы, а затем создать таблицу Buffer заново.
Если есть необходимость выполнить ALTER для подчинённой таблицы и для таблицы Buffer, то рекомендуется удалить таблицу Buffer, затем выполнить ALTER подчинённой таблицы, а после создать таблицу Buffer заново.
!!! attention "Внимание"
В релизах до 28 сентября 2020 года выполнение ALTER на таблице Buffer ломает структуру блоков и вызывает ошибку (см. [#15117](https://github.com/ClickHouse/ClickHouse/issues/15117)), поэтому удаление буфера и его пересоздание — единственный вариант миграции для данного движка. Перед выполнением ALTER на таблице Buffer убедитесь, что в вашей версии эта ошибка устранена.
При нештатном перезапуске сервера, данные, находящиеся в буфере, будут потеряны.

View File

@ -465,9 +465,9 @@ ClickHouse проверяет условия для `min_part_size` и `min_part
Подстановки параметров реплицируемых таблиц.
Можно не указывать, если реплицируемых таблицы не используются.
Можно не указывать, если реплицируемые таблицы не используются.
Подробнее смотрите в разделе «[Создание реплицируемых таблиц](../../engines/table-engines/mergetree-family/replication.md)».
Подробнее смотрите в разделе [Создание реплицируемых таблиц](../../engines/table-engines/mergetree-family/replication.md#creating-replicated-tables).
**Пример**

View File

@ -4,7 +4,7 @@
Столбцы:
- `trace_id` ([UUID](../../sql-reference/data-types/uuid.md) — идентификатор трассировки для выполненного запроса.
- `trace_id` ([UUID](../../sql-reference/data-types/uuid.md)) — идентификатор трассировки для выполненного запроса.
- `span_id` ([UInt64](../../sql-reference/data-types/int-uint.md)) — идентификатор `trace span`.

View File

@ -0,0 +1,129 @@
# system.zookeeper_log {#system-zookeeper_log}
Эта таблица содержит информацию о параметрах запроса к серверу ZooKeeper и ответа от него.
Для запросов заполняются только столбцы с параметрами запроса, а остальные столбцы заполняются значениями по умолчанию (`0` или `NULL`). Когда поступает ответ, данные добавляются в столбцы с параметрами ответа на запрос.
Столбцы с параметрами запроса:
- `type` ([Enum](../../sql-reference/data-types/enum.md)) — тип события в клиенте ZooKeeper. Может иметь одно из следующих значений:
- `Request` — запрос отправлен.
- `Response` — ответ получен.
- `Finalize` — соединение разорвано, ответ не получен.
- `event_date` ([Date](../../sql-reference/data-types/date.md)) — дата, когда произошло событие.
- `event_time` ([DateTime64](../../sql-reference/data-types/datetime64.md)) — дата и время, когда произошло событие.
- `address` ([IPv6](../../sql-reference/data-types/domains/ipv6.md)) — IP адрес сервера ZooKeeper, с которого был сделан запрос.
- `port` ([UInt16](../../sql-reference/data-types/int-uint.md)) — порт сервера ZooKeeper, с которого был сделан запрос.
- `session_id` ([Int64](../../sql-reference/data-types/int-uint.md)) — идентификатор сессии, который сервер ZooKeeper создает для каждого соединения.
- `xid` ([Int32](../../sql-reference/data-types/int-uint.md)) — идентификатор запроса внутри сессии. Обычно это последовательный номер запроса, одинаковый у строки запроса и у парной строки `response`/`finalize`.
- `has_watch` ([UInt8](../../sql-reference/data-types/int-uint.md)) — установлен ли запрос [watch](https://zookeeper.apache.org/doc/r3.3.3/zookeeperProgrammers.html#ch_zkWatches).
- `op_num` ([Enum](../../sql-reference/data-types/enum.md)) — тип запроса или ответа на запрос.
- `path` ([String](../../sql-reference/data-types/string.md)) — путь к узлу ZooKeeper, указанный в запросе. Пустая строка, если запрос не требует указания пути.
- `data` ([String](../../sql-reference/data-types/string.md)) — данные, записанные на узле ZooKeeper (для запросов `SET` и `CREATE` — что запрос хотел записать, для ответа на запрос `GET` — что было прочитано), или пустая строка.
- `is_ephemeral` ([UInt8](../../sql-reference/data-types/int-uint.md)) — создается ли узел ZooKeeper как [ephemeral](https://zookeeper.apache.org/doc/r3.3.3/zookeeperProgrammers.html#Ephemeral+Nodes).
- `is_sequential` ([UInt8](../../sql-reference/data-types/int-uint.md)) — создается ли узел ZooKeeper как [sequential](https://zookeeper.apache.org/doc/r3.3.3/zookeeperProgrammers.html#Sequence+Nodes+--+Unique+Naming).
- `version` ([Nullable(Int32)](../../sql-reference/data-types/nullable.md)) — версия узла ZooKeeper, которую запрос ожидает увидеть при выполнении. Поддерживается для запросов `CHECK`, `SET`, `REMOVE` (`-1` — запрос не проверяет версию, `NULL` — для других запросов, которые не поддерживают проверку версии).
- `requests_size` ([UInt32](../../sql-reference/data-types/int-uint.md)) — количество запросов, включенных в мультизапрос (это специальный запрос, который состоит из нескольких последовательных обычных запросов, выполняющихся атомарно). Все запросы, включенные в мультизапрос, имеют одинаковый `xid`.
- `request_idx` ([UInt32](../../sql-reference/data-types/int-uint.md)) — номер запроса, включенного в мультизапрос (`0` — для мультизапроса, далее по порядку с `1`).
Столбцы с параметрами ответа на запрос:
- `zxid` ([Int64](../../sql-reference/data-types/int-uint.md)) — идентификатор транзакции в ZooKeeper. Последовательный номер, выданный сервером ZooKeeper в ответе на успешно выполненный запрос (`0` — запрос не был выполнен, возвращена ошибка или клиент ZooKeeper не знает, был ли выполнен запрос).
- `error` ([Nullable(Enum)](../../sql-reference/data-types/nullable.md)) — код ошибки. Может иметь много значений, здесь приведены только некоторые из них:
- `ZOK` — запрос успешно выполнен.
- `ZCONNECTIONLOSS` — соединение разорвано.
- `ZOPERATIONTIMEOUT` — истекло время ожидания выполнения запроса.
- `ZSESSIONEXPIRED` — истекло время сессии.
- `NULL` — выполнен запрос.
- `watch_type` ([Nullable(Enum)](../../sql-reference/data-types/nullable.md)) — тип события `watch` (для ответов на запрос при `op_num` = `Watch`), для остальных ответов: `NULL`.
- `watch_state` ([Nullable(Enum)](../../sql-reference/data-types/nullable.md)) — статус события `watch` (для ответов на запрос при `op_num` = `Watch`), для остальных ответов: `NULL`.
- `path_created` ([String](../../sql-reference/data-types/string.md)) — путь к созданному узлу ZooKeeper (для ответов на запрос `CREATE`). Может отличаться от `path`, если узел создается как `sequential`.
- `stat_czxid` ([Int64](../../sql-reference/data-types/int-uint.md)) — идентификатор транзакции, в результате которой был создан узел ZooKeeper.
- `stat_mzxid` ([Int64](../../sql-reference/data-types/int-uint.md)) — идентификатор транзакции, которая последней модифицировала узел ZooKeeper.
- `stat_pzxid` ([Int64](../../sql-reference/data-types/int-uint.md)) — идентификатор транзакции, которая последней модифицировала дочерние узлы ZooKeeper.
- `stat_version` ([Int32](../../sql-reference/data-types/int-uint.md)) — количество изменений в данных узла ZooKeeper.
- `stat_cversion` ([Int32](../../sql-reference/data-types/int-uint.md)) — количество изменений в дочерних узлах ZooKeeper.
- `stat_dataLength` ([Int32](../../sql-reference/data-types/int-uint.md)) — длина поля данных узла ZooKeeper.
- `stat_numChildren` ([Int32](../../sql-reference/data-types/int-uint.md)) — количество дочерних узлов ZooKeeper.
- `children` ([Array(String)](../../sql-reference/data-types/array.md)) — список дочерних узлов ZooKeeper (для ответов на запрос `LIST`).
**Пример**
Запрос:
``` sql
SELECT * FROM system.zookeeper_log WHERE (session_id = '106662742089334927') AND (xid = '10858') FORMAT Vertical;
```
Результат:
``` text
Row 1:
──────
type: Request
event_date: 2021-08-09
event_time: 2021-08-09 21:38:30.291792
address: ::
port: 2181
session_id: 106662742089334927
xid: 10858
has_watch: 1
op_num: List
path: /clickhouse/task_queue/ddl
data:
is_ephemeral: 0
is_sequential: 0
version: ᴺᵁᴸᴸ
requests_size: 0
request_idx: 0
zxid: 0
error: ᴺᵁᴸᴸ
watch_type: ᴺᵁᴸᴸ
watch_state: ᴺᵁᴸᴸ
path_created:
stat_czxid: 0
stat_mzxid: 0
stat_pzxid: 0
stat_version: 0
stat_cversion: 0
stat_dataLength: 0
stat_numChildren: 0
children: []
Row 2:
──────
type: Response
event_date: 2021-08-09
event_time: 2021-08-09 21:38:30.292086
address: ::
port: 2181
session_id: 106662742089334927
xid: 10858
has_watch: 1
op_num: List
path: /clickhouse/task_queue/ddl
data:
is_ephemeral: 0
is_sequential: 0
version: ᴺᵁᴸᴸ
requests_size: 0
request_idx: 0
zxid: 16926267
error: ZOK
watch_type: ᴺᵁᴸᴸ
watch_state: ᴺᵁᴸᴸ
path_created:
stat_czxid: 16925469
stat_mzxid: 16925469
stat_pzxid: 16926179
stat_version: 0
stat_cversion: 7
stat_dataLength: 0
stat_numChildren: 7
children: ['query-0000000006','query-0000000005','query-0000000004','query-0000000003','query-0000000002','query-0000000001','query-0000000000']
```
**См. также**
- [ZooKeeper](../../operations/tips.md#zookeeper)
- [Руководство по ZooKeeper](https://zookeeper.apache.org/doc/r3.3.3/zookeeperProgrammers.html)

View File

@ -193,6 +193,40 @@ SELECT geoToH3(37.79506683, 55.71290588, 15) as h3Index;
└────────────────────┘
```
## h3ToGeo {#h3togeo}
Возвращает географические координаты долготы и широты, соответствующие указанному [H3](#h3index)-индексу.
**Синтаксис**
``` sql
h3ToGeo(h3Index)
```
**Аргументы**
- `h3Index` — [H3](#h3index)-индекс. [UInt64](../../../sql-reference/data-types/int-uint.md).
**Возвращаемые значения**
- кортеж из двух значений: `tuple(lon,lat)`, где `lon` — долгота [Float64](../../../sql-reference/data-types/float.md), `lat` — широта [Float64](../../../sql-reference/data-types/float.md).
**Пример**
Запрос:
``` sql
SELECT h3ToGeo(644325524701193974) coordinates;
```
Результат:
``` text
┌─coordinates───────────────────────────┐
│ (37.79506616830252,55.71290243145668) │
└───────────────────────────────────────┘
```
## h3kRing {#h3kring}
Возвращает [H3](#h3index)-индексы шестигранников в радиусе `k` от данного в произвольном порядке.

View File

@ -36,6 +36,7 @@ toc_title: SYSTEM
- [START REPLICATION QUEUES](#query_language-system-start-replication-queues)
- [SYNC REPLICA](#query_language-system-sync-replica)
- [RESTART REPLICA](#query_language-system-restart-replica)
- [RESTORE REPLICA](#query_language-system-restore-replica)
- [RESTART REPLICAS](#query_language-system-restart-replicas)
## RELOAD EMBEDDED DICTIONARIES] {#query_language-system-reload-emdedded-dictionaries}
@ -287,13 +288,66 @@ SYSTEM SYNC REPLICA [db.]replicated_merge_tree_family_table_name
### RESTART REPLICA {#query_language-system-restart-replica}
Реинициализация состояния Zookeeper-сессий для таблицы семейства `ReplicatedMergeTree`. Сравнивает текущее состояние с тем, что хранится в Zookeeper, как источник правды, и добавляет задачи в очередь репликации в Zookeeper, если необходимо.
Инициализация очереди репликации на основе данных ZooKeeper происходит так же, как при attach table. На короткое время таблица станет недоступной для любых операций.
Реинициализирует состояние сессий Zookeeper для таблицы семейства `ReplicatedMergeTree`. Сравнивает текущее состояние с состоянием в Zookeeper (как с эталоном) и при необходимости добавляет задачи в очередь репликации в Zookeeper.
Инициализация очереди репликации на основе данных ZooKeeper происходит так же, как при `ATTACH TABLE`. Некоторое время таблица будет недоступна для любых операций.
``` sql
SYSTEM RESTART REPLICA [db.]replicated_merge_tree_family_table_name
```
### RESTORE REPLICA {#query_language-system-restore-replica}
Восстанавливает реплику, если метаданные в Zookeeper потеряны, но сами данные возможно существуют.
Работает только с таблицами семейства `ReplicatedMergeTree` и только если таблица находится в readonly-режиме.
Запрос можно выполнить если:
- потерян корневой путь ZooKeeper `/`;
- потерян путь реплик `/replicas`;
- потерян путь конкретной реплики `/replicas/replica_name/`.
К реплике прикрепляются локально найденные куски, информация о них отправляется в Zookeeper.
Если присутствующие в реплике до потери метаданных данные не устарели, они не скачиваются повторно с других реплик. Поэтому восстановление реплики не означает повторную загрузку всех данных по сети.
!!! warning "Предупреждение"
Потерянные данные в любых состояниях перемещаются в папку `detached/`. Куски, активные до потери данных (находившиеся в состоянии Committed), прикрепляются.
**Синтаксис**
```sql
SYSTEM RESTORE REPLICA [db.]replicated_merge_tree_family_table_name [ON CLUSTER cluster_name]
```
Альтернативный синтаксис:
```sql
SYSTEM RESTORE REPLICA [ON CLUSTER cluster_name] [db.]replicated_merge_tree_family_table_name
```
**Пример**
Создание таблицы на нескольких серверах. После потери корневого пути реплики таблица будет прикреплена только для чтения, так как метаданные отсутствуют. Последний запрос необходимо выполнить на каждой реплике.
```sql
CREATE TABLE test(n UInt32)
ENGINE = ReplicatedMergeTree('/clickhouse/tables/test/', '{replica}')
ORDER BY n PARTITION BY n % 10;
INSERT INTO test SELECT * FROM numbers(1000);
-- zookeeper_delete_path("/clickhouse/tables/test", recursive=True) <- root loss.
SYSTEM RESTART REPLICA test;
SYSTEM RESTORE REPLICA test;
```
Альтернативный способ:
```sql
SYSTEM RESTORE REPLICA test ON CLUSTER cluster;
```
### RESTART REPLICAS {#query_language-system-restart-replicas}
Реинициализация состояния ZooKeeper-сессий для всех `ReplicatedMergeTree` таблиц. Сравнивает текущее состояние реплики с тем, что хранится в ZooKeeper, как c источником правды, и добавляет задачи в очередь репликации в ZooKeeper, если необходимо.

View File

@ -5,22 +5,44 @@ toc_title: cluster
# cluster, clusterAllReplicas {#cluster-clusterallreplicas}
Позволяет обратиться ко всем серверам существующего кластера, который присутствует в таблице `system.clusters` и сконфигурирован в секцци `remote_servers` без создания таблицы типа `Distributed`.
`clusterAllReplicas` - работает также как `cluster` но каждая реплика в кластере будет использована как отдельный шард/отдельное соединение.
Позволяет обратиться ко всем шардам существующего кластера, который сконфигурирован в секции `remote_servers` без создания таблицы типа [Distributed](../../engines/table-engines/special/distributed.md). В запросе используется одна реплика каждого шарда.
Функция `clusterAllReplicas` работает также как `cluster`, но каждая реплика в кластере используется как отдельный шард/отдельное соединение.
Сигнатуры:
!!! note "Примечание"
Все доступные кластеры перечислены в таблице [system.clusters](../../operations/system-tables/clusters.md).
**Синтаксис**
``` sql
cluster('cluster_name', db.table)
cluster('cluster_name', db, table)
clusterAllReplicas('cluster_name', db.table)
clusterAllReplicas('cluster_name', db, table)
cluster('cluster_name', db.table[, sharding_key])
cluster('cluster_name', db, table[, sharding_key])
clusterAllReplicas('cluster_name', db.table[, sharding_key])
clusterAllReplicas('cluster_name', db, table[, sharding_key])
```
**Аргументы**
- `cluster_name` имя кластера, который обозначает подмножество адресов и параметров подключения к удаленным и локальным серверам, входящим в кластер.
- `db.table` или `db`, `table` - имя базы данных и таблицы.
- `sharding_key` - ключ шардирования. Необязательный аргумент. Указывается, если данные добавляются более чем в один шард кластера.
**Возвращаемое значение**
Набор данных из кластеров.
**Использование макросов**
`cluster_name` может содержать макрос — подстановку в фигурных скобках. Эта подстановка заменяется на соответствующее значение из секции [macros](../../operations/server-configuration-parameters/settings.md#macros) конфигурационного файла .
Пример:
```sql
SELECT * FROM cluster('{cluster}', default.example_table);
```
`cluster_name` имя кластера, который обязан присутствовать в таблице `system.clusters` и обозначает подмножество адресов и параметров подключения к удаленным и локальным серверам, входящим в кластер.
**Использование и рекомендации**
Использование табличных функций `cluster` и `clusterAllReplicas` менее оптимальное чем создание таблицы типа `Distributed`, поскольку в этом случае соединение с сервером переустанавливается на каждый запрос. При обработке большого количества запросов, всегда создавайте `Distributed` таблицу заранее и не используйте табличные функции `cluster` и `clusterAllReplicas`.
Использование табличных функций `cluster` и `clusterAllReplicas` менее оптимально, чем создание таблицы типа `Distributed`, поскольку в этом случае при каждом новом запросе устанавливается новое соединение с сервером. При обработке большого количества запросов всегда создавайте `Distributed` таблицу заранее и не используйте табличные функции `cluster` и `clusterAllReplicas`.
Табличные функции `cluster` and `clusterAllReplicas` могут быть полезны в следующих случаях:
@ -30,7 +52,7 @@ clusterAllReplicas('cluster_name', db, table)
Настройки соединения `user`, `password`, `host`, `post`, `compression`, `secure` берутся из секции `<remote_servers>` файлов конфигурации. См. подробности в разделе [Distributed](../../engines/table-engines/special/distributed.md)
**See Also**
**См. также**
- [skip_unavailable_shards](../../operations/settings/settings.md#settings-skip_unavailable_shards)
- [load_balancing](../../operations/settings/settings.md#settings-load_balancing)

View File

@ -26,7 +26,7 @@ toc_title: "常见问题"
### 服务器未运行 {#server-is-not-running}
**检查服务器是否运行nnig**
**检查服务器是否正在运行**
命令:

View File

@ -1,8 +1,8 @@
# 功能与Yandex的工作。梅特里卡词典 {#functions-for-working-with-yandex-metrica-dictionaries}
# 使用 Yandex.Metrica 字典函数 {#functions-for-working-with-yandex-metrica-dictionaries}
为了使下面的功能正常工作,服务器配置必须指定获取所有Yandex的路径和地址。梅特里卡字典. 字典在任何这些函数的第一次调用时加载。 如果无法加载引用列表,则会引发异常。
为了使下面的功能正常工作,服务器配置必须指定获取所有 Yandex.Metrica 字典的路径和地址。Yandex.Metrica 字典在任何这些函数的第一次调用时加载。 如果无法加载引用列表,则会引发异常。
For information about creating reference lists, see the section «Dictionaries».
有关创建引用列表的信息,请参阅 «字典» 部分.
## 多个地理基 {#multiple-geobases}
@ -17,18 +17,18 @@ ClickHouse支持同时使用多个备选地理基区域层次结构
所有字典都在运行时重新加载每隔一定数量的秒重新加载一次如builtin_dictionaries_reload_interval config参数中定义或默认情况下每小时一次。 但是,可用字典列表在服务器启动时定义一次。
All functions for working with regions have an optional argument at the end the dictionary key. It is referred to as the geobase.
所有处理区域的函数都在末尾有一个可选参数—字典键。它被称为地基。
示例:
regionToCountry(RegionID) Uses the default dictionary: /opt/geo/regions_hierarchy.txt
regionToCountry(RegionID, '') Uses the default dictionary: /opt/geo/regions_hierarchy.txt
regionToCountry(RegionID, 'ua') Uses the dictionary for the 'ua' key: /opt/geo/regions_hierarchy_ua.txt
regionToCountry(RegionID) 使用默认路径: /opt/geo/regions_hierarchy.txt
regionToCountry(RegionID, '') 使用默认路径: /opt/geo/regions_hierarchy.txt
regionToCountry(RegionID, 'ua') 使用字典中的'ua' 键: /opt/geo/regions_hierarchy_ua.txt
### ツ环板(ョツ嘉ッツ偲青regionシツ氾カツ鉄ツ工ツ渉\]) {#regiontocityid-geobase}
### regionToCity(id[, geobase]) {#regiontocityid-geobase}
Accepts a UInt32 number the region ID from the Yandex geobase. If this region is a city or part of a city, it returns the region ID for the appropriate city. Otherwise, returns 0.
从 Yandex geobase 接收一个 UInt32 数字类型的区域ID 。如果该区域是一个城市或城市的一部分它将返回相应城市的区域ID。否则,返回0。
### 虏茅驴麓卤戮碌禄路戮鲁拢\]) {#regiontoareaid-geobase}
### regionToArea(id[, geobase]) {#regiontoareaid-geobase}
将区域转换为区域地理数据库中的类型5。 在所有其他方式,这个功能是一样的 regionToCity.
@ -84,36 +84,58 @@ LIMIT 15
│ Federation of Bosnia and Herzegovina │
└──────────────────────────────────────────────────────────┘
### 虏茅驴麓卤戮碌禄路戮鲁拢(陆毛隆隆(803)888-8325\]) {#regiontocountryid-geobase}
### regionToCountry(id[, geobase]) {#regiontocountryid-geobase}
将区域转换为国家。 在所有其他方式,这个功能是一样的 regionToCity.
示例: `regionToCountry(toUInt32(213)) = 225` 转换莫斯科213到俄罗斯225
### 掳胫((禄脢鹿脷露胫鲁隆鹿((酶-11-16""\[脪陆,ase\]) {#regiontocontinentid-geobase}
### regionToContinent(id[, geobase]) {#regiontocontinentid-geobase}
将区域转换为大陆。 在所有其他方式,这个功能是一样的 regionToCity.
示例: `regionToContinent(toUInt32(213)) = 10001` 将莫斯科213转换为欧亚大陆10001
### ツ环板(ョツ嘉ッツ偲青regionャツ静ャツ青サツ催ャツ渉\]) {#regiontopopulationid-geobase}
### regionToTopContinent (#regiontotopcontinent) {#regiontotopcontinent-regiontotopcontinent}
查找该区域层次结构中最高的大陆。
**语法**
``` sql
regionToTopContinent(id[, geobase])
```
**参数**
- `id` — Yandex geobase 的区域 ID. [UInt32](../../sql-reference/data-types/int-uint.md).
- `geobase` — 字典的建. 参阅 [Multiple Geobases](#multiple-geobases). [String](../../sql-reference/data-types/string.md). 可选.
**返回值**
- 顶级大陆的标识符(当您在区域层次结构中攀爬时,是后者)。
- 0如果没有。
类型: `UInt32`.
### regionToPopulation(id\[, geobase\]) {#regiontopopulationid-geobase}
获取区域的人口。
The population can be recorded in files with the geobase. See the section «External dictionaries».
人口可以记录在文件与地球基。请参阅«外部词典»部分。
如果没有为该区域记录人口则返回0。
在Yandex地理数据库中可能会为子区域记录人口但不会为父区域记录人口。
### regionIn(lhs,rhs\[,地理数据库\]) {#regioninlhs-rhs-geobase}
检查是否 lhs 属于一个区域 rhs 区域。 如果属于UInt8则返回等于1的数字如果不属于则返回0。
The relationship is reflexive any region also belongs to itself.
这种关系是反射的——任何地区也属于自己。
### ツ暗ェツ氾环催ツ団ツ法ツ人\]) {#regionhierarchyid-geobase}
### regionHierarchy(id\[, geobase\]) {#regionhierarchyid-geobase}
Accepts a UInt32 number the region ID from the Yandex geobase. Returns an array of region IDs consisting of the passed region and all parents along the chain.
从 Yandex geobase 接收一个 UInt32 数字类型的区域ID。返回一个区域ID数组由传递的区域和链上的所有父节点组成。
示例: `regionHierarchy(toUInt32(213)) = [213,1,3,225,10001,10000]`.
### 地区名称(id\[,郎\]) {#regiontonameid-lang}
### regionToName(id\[, lang\]) {#regiontonameid-lang}
Accepts a UInt32 number the region ID from the Yandex geobase. A string with the name of the language can be passed as a second argument. Supported languages are: ru, en, ua, uk, by, kz, tr. If the second argument is omitted, the language ru is used. If the language is not supported, an exception is thrown. Returns a string the name of the region in the corresponding language. If the region with the specified ID doesnt exist, an empty string is returned.
从 Yandex geobase 接收一个 UInt32 数字类型的区域ID。带有语言名称的字符串可以作为第二个参数传递。支持的语言有:ru, en, ua, uk, by, kz, tr。如果省略第二个参数则使用' ru '语言。如果不支持该语言,则抛出异常。返回一个字符串-对应语言的区域名称。如果指定ID的区域不存在则返回一个空字符串。
`ua``uk` 都意味着乌克兰。

View File

@ -2,6 +2,7 @@
#include "Common/MemoryTracker.h"
#include "Columns/ColumnsNumber.h"
#include "ConnectionParameters.h"
#include "IO/CompressionMethod.h"
#include "QueryFuzzer.h"
#include "Suggest.h"
#include "TestHint.h"
@ -128,6 +129,7 @@ namespace ErrorCodes
extern const int UNRECOGNIZED_ARGUMENTS;
extern const int SYNTAX_ERROR;
extern const int TOO_DEEP_RECURSION;
extern const int AUTHENTICATION_FAILED;
}
@ -772,31 +774,50 @@ private:
<< connection_parameters.host << ":" << connection_parameters.port
<< (!connection_parameters.user.empty() ? " as user " + connection_parameters.user : "") << "." << std::endl;
connection = std::make_unique<Connection>(
connection_parameters.host,
connection_parameters.port,
connection_parameters.default_database,
connection_parameters.user,
connection_parameters.password,
"", /* cluster */
"", /* cluster_secret */
"client",
connection_parameters.compression,
connection_parameters.security);
String server_name;
UInt64 server_version_major = 0;
UInt64 server_version_minor = 0;
UInt64 server_version_patch = 0;
if (max_client_network_bandwidth)
try
{
ThrottlerPtr throttler = std::make_shared<Throttler>(max_client_network_bandwidth, 0, "");
connection->setThrottler(throttler);
}
connection = std::make_unique<Connection>(
connection_parameters.host,
connection_parameters.port,
connection_parameters.default_database,
connection_parameters.user,
connection_parameters.password,
"", /* cluster */
"", /* cluster_secret */
"client",
connection_parameters.compression,
connection_parameters.security);
connection->getServerVersion(
connection_parameters.timeouts, server_name, server_version_major, server_version_minor, server_version_patch, server_revision);
if (max_client_network_bandwidth)
{
ThrottlerPtr throttler = std::make_shared<Throttler>(max_client_network_bandwidth, 0, "");
connection->setThrottler(throttler);
}
connection->getServerVersion(
connection_parameters.timeouts, server_name, server_version_major, server_version_minor, server_version_patch, server_revision);
}
catch (const Exception & e)
{
/// It is typical when users install ClickHouse, type some password and instantly forget it.
if ((connection_parameters.user.empty() || connection_parameters.user == "default")
&& e.code() == DB::ErrorCodes::AUTHENTICATION_FAILED)
{
std::cerr << std::endl
<< "If you have installed ClickHouse and forgot password you can reset it in the configuration file." << std::endl
<< "The password for default user is typically located at /etc/clickhouse-server/users.d/default-password.xml" << std::endl
<< "and deleting this file will reset the password." << std::endl
<< "See also /etc/clickhouse-server/users.xml on the server where ClickHouse is installed." << std::endl
<< std::endl;
}
throw;
}
server_version = toString(server_version_major) + "." + toString(server_version_minor) + "." + toString(server_version_patch);
@ -1823,7 +1844,7 @@ private:
void processInsertQuery()
{
const auto parsed_insert_query = parsed_query->as<ASTInsertQuery &>();
if (!parsed_insert_query.data && (is_interactive || (!stdin_is_a_tty && std_in.eof())))
if ((!parsed_insert_query.data && !parsed_insert_query.infile) && (is_interactive || (!stdin_is_a_tty && std_in.eof())))
throw Exception("No data to insert", ErrorCodes::NO_DATA_TO_INSERT);
connection->sendQuery(
@ -1894,7 +1915,24 @@ private:
if (!parsed_insert_query)
return;
if (parsed_insert_query->data)
if (parsed_insert_query->infile)
{
const auto & in_file_node = parsed_insert_query->infile->as<ASTLiteral &>();
const auto in_file = in_file_node.value.safeGet<std::string>();
auto in_buffer = wrapReadBufferWithCompressionMethod(std::make_unique<ReadBufferFromFile>(in_file), chooseCompressionMethod(in_file, ""));
try
{
sendDataFrom(*in_buffer, sample, columns_description);
}
catch (Exception & e)
{
e.addMessage("data for INSERT was parsed from file");
throw;
}
}
else if (parsed_insert_query->data)
{
/// Send data contained in the query.
ReadBufferFromMemory data_in(parsed_insert_query->data, parsed_insert_query->end - parsed_insert_query->data);

View File

@ -17,6 +17,7 @@
#include <Poco/Version.h>
#include <Poco/Environment.h>
#include <Common/getMultipleKeysFromConfig.h>
#include <Core/ServerUUID.h>
#include <filesystem>
#include <IO/UseSSL.h>
@ -326,6 +327,8 @@ int Keeper::main(const std::vector<std::string> & /*args*/)
}
}
DB::ServerUUID::load(path + "/uuid", log);
const Settings & settings = global_context->getSettingsRef();
GlobalThreadPool::initialize(config().getUInt("max_thread_pool_size", 100));

View File

@ -12,6 +12,7 @@
#include <Interpreters/executeQuery.h>
#include <Interpreters/loadMetadata.h>
#include <Interpreters/DatabaseCatalog.h>
#include <Interpreters/Session.h>
#include <Common/Exception.h>
#include <Common/Macros.h>
#include <Common/Config/ConfigProcessor.h>
@ -374,14 +375,13 @@ void LocalServer::processQueries()
if (!parse_res.second)
throw Exception("Cannot parse and execute the following part of query: " + String(parse_res.first), ErrorCodes::SYNTAX_ERROR);
/// we can't mutate global global_context (can lead to races, as it was already passed to some background threads)
/// so we can't reuse it safely as a query context and need a copy here
auto context = Context::createCopy(global_context);
/// Authenticate and create a context to execute queries.
Session session{global_context, ClientInfo::Interface::TCP};
session.authenticate("default", "", Poco::Net::SocketAddress{});
context->makeSessionContext();
context->makeQueryContext();
context->setUser("default", "", Poco::Net::SocketAddress{});
/// Use the same context for all queries.
auto context = session.makeQueryContext();
context->makeSessionContext(); /// initial_create_query requires a session context to be set.
context->setCurrentQueryId("");
applyCmdSettings(context);

View File

@ -39,6 +39,7 @@
#include <Common/getMappedArea.h>
#include <Common/remapExecutable.h>
#include <Common/TLDListsHolder.h>
#include <Core/ServerUUID.h>
#include <IO/HTTPCommon.h>
#include <IO/ReadHelpers.h>
#include <IO/UseSSL.h>
@ -79,7 +80,6 @@
#include <Server/HTTP/HTTPServer.h>
#include <filesystem>
#if !defined(ARCADIA_BUILD)
# include "config_core.h"
# include "Common/config_version.h"
@ -146,7 +146,6 @@ static bool jemallocOptionEnabled(const char *name)
static bool jemallocOptionEnabled(const char *) { return 0; }
#endif
int mainEntryClickHouseServer(int argc, char ** argv)
{
DB::Server app;
@ -667,13 +666,14 @@ if (ThreadFuzzer::instance().isEffective())
global_context->setRemoteHostFilter(config());
std::string path = getCanonicalPath(config().getString("path", DBMS_DEFAULT_PATH));
std::string path_str = getCanonicalPath(config().getString("path", DBMS_DEFAULT_PATH));
fs::path path = path_str;
std::string default_database = config().getString("default_database", "default");
/// Check that the process user id matches the owner of the data.
const auto effective_user_id = geteuid();
struct stat statbuf;
if (stat(path.c_str(), &statbuf) == 0 && effective_user_id != statbuf.st_uid)
if (stat(path_str.c_str(), &statbuf) == 0 && effective_user_id != statbuf.st_uid)
{
const auto effective_user = getUserName(effective_user_id);
const auto data_owner = getUserName(statbuf.st_uid);
@ -690,9 +690,11 @@ if (ThreadFuzzer::instance().isEffective())
}
}
global_context->setPath(path);
global_context->setPath(path_str);
StatusFile status{path + "status", StatusFile::write_full_info};
StatusFile status{path / "status", StatusFile::write_full_info};
DB::ServerUUID::load(path / "uuid", log);
/// Try to increase limit on number of open files.
{
@ -726,7 +728,7 @@ if (ThreadFuzzer::instance().isEffective())
/// Storage with temporary data for processing of heavy queries.
{
std::string tmp_path = config().getString("tmp_path", path + "tmp/");
std::string tmp_path = config().getString("tmp_path", path / "tmp/");
std::string tmp_policy = config().getString("tmp_policy", "");
const VolumePtr & volume = global_context->setTemporaryStorage(tmp_path, tmp_policy);
for (const DiskPtr & disk : volume->getDisks())
@ -738,7 +740,7 @@ if (ThreadFuzzer::instance().isEffective())
* Examples: do repair of local data; clone all replicated tables from replica.
*/
{
auto flags_path = fs::path(path) / "flags/";
auto flags_path = path / "flags/";
fs::create_directories(flags_path);
global_context->setFlagsPath(flags_path);
}
@ -747,29 +749,29 @@ if (ThreadFuzzer::instance().isEffective())
*/
{
std::string user_files_path = config().getString("user_files_path", fs::path(path) / "user_files/");
std::string user_files_path = config().getString("user_files_path", path / "user_files/");
global_context->setUserFilesPath(user_files_path);
fs::create_directories(user_files_path);
}
{
std::string dictionaries_lib_path = config().getString("dictionaries_lib_path", fs::path(path) / "dictionaries_lib/");
std::string dictionaries_lib_path = config().getString("dictionaries_lib_path", path / "dictionaries_lib/");
global_context->setDictionariesLibPath(dictionaries_lib_path);
fs::create_directories(dictionaries_lib_path);
}
/// top_level_domains_lists
{
const std::string & top_level_domains_path = config().getString("top_level_domains_path", fs::path(path) / "top_level_domains/");
const std::string & top_level_domains_path = config().getString("top_level_domains_path", path / "top_level_domains/");
TLDListsHolder::getInstance().parseConfig(fs::path(top_level_domains_path) / "", config());
}
{
fs::create_directories(fs::path(path) / "data/");
fs::create_directories(fs::path(path) / "metadata/");
fs::create_directories(path / "data/");
fs::create_directories(path / "metadata/");
/// Directory with metadata of tables, which was marked as dropped by Atomic database
fs::create_directories(fs::path(path) / "metadata_dropped/");
fs::create_directories(path / "metadata_dropped/");
}
if (config().has("interserver_http_port") && config().has("interserver_https_port"))
@ -952,7 +954,7 @@ if (ThreadFuzzer::instance().isEffective())
#endif
/// Set path for format schema files
fs::path format_schema_path(config().getString("format_schema_path", fs::path(path) / "format_schemas/"));
fs::path format_schema_path(config().getString("format_schema_path", path / "format_schemas/"));
global_context->setFormatSchemaPath(format_schema_path);
fs::create_directories(format_schema_path);
@ -1088,7 +1090,7 @@ if (ThreadFuzzer::instance().isEffective())
/// system logs may copy global context.
global_context->setCurrentDatabaseNameInGlobalContext(default_database);
LOG_INFO(log, "Loading metadata from {}", path);
LOG_INFO(log, "Loading metadata from {}", path_str);
try
{
@ -1428,7 +1430,6 @@ if (ThreadFuzzer::instance().isEffective())
/// Must be done after initialization of `servers`, because async_metrics will access `servers` variable from its thread.
async_metrics.start();
global_context->enableNamedSessions();
{
String level_str = config().getString("text_log.level", "");

View File

@ -70,6 +70,7 @@ public:
/// Returns the current user. The function can return nullptr.
UserPtr getUser() const;
String getUserName() const;
std::optional<UUID> getUserID() const { return getParams().user_id; }
/// Returns information about current and enabled roles.
std::shared_ptr<const EnabledRolesInfo> getRolesInfo() const;

View File

@ -26,6 +26,8 @@ protected:
String user_name;
};
/// Does not check the password/credentials and that the specified host is allowed.
/// (Used only internally in cluster, if the secret matches)
class AlwaysAllowCredentials
: public Credentials
{

View File

@ -5,6 +5,7 @@
#include <Common/SipHash.h>
#include <Common/assert_cast.h>
#include <Columns/ColumnTuple.h>
#include <DataTypes/IDataType.h>
namespace DB

View File

@ -5,6 +5,7 @@
#include <Poco/Net/HTTPRequest.h>
#include <Poco/URI.h>
#include <filesystem>
#include <thread>
namespace fs = std::filesystem;

View File

@ -373,7 +373,9 @@ MultiplexedConnections::ReplicaState & MultiplexedConnections::getReplicaForRead
except_list,
is_draining ? drain_timeout : receive_timeout);
if (n == 0)
/// We treat any error as timeout for simplicity.
/// And we also check if read_list is still empty just in case.
if (n <= 0 || read_list.empty())
{
auto err_msg = fmt::format("Timeout exceeded while reading from {}", dumpAddressesUnlocked());
for (ReplicaState & state : replica_states)
@ -389,9 +391,7 @@ MultiplexedConnections::ReplicaState & MultiplexedConnections::getReplicaForRead
}
}
/// TODO Absolutely wrong code: read_list could be empty; motivation of rand is unclear.
/// This code path is disabled by default.
/// TODO Motivation of rand is unclear.
auto & socket = read_list[thread_local_rng() % read_list.size()];
if (fd_to_replica_state_idx.empty())
{

View File

@ -565,7 +565,7 @@ void ColumnArray::expand(const IColumn::Filter & mask, bool inverted)
while (index >= 0)
{
offsets_data[index] = last_offset;
if (mask[index] ^ inverted)
if (!!mask[index] ^ inverted)
{
if (from < 0)
throw Exception("Too many bytes in mask", ErrorCodes::LOGICAL_ERROR);

View File

@ -354,7 +354,7 @@ void ColumnFixedString::expand(const IColumn::Filter & mask, bool inverted)
chars.resize_fill(mask.size() * n, 0);
while (index >= 0)
{
if (mask[index] ^ inverted)
if (!!mask[index] ^ inverted)
{
if (from < 0)
throw Exception("Too many bytes in mask", ErrorCodes::LOGICAL_ERROR);

View File

@ -178,7 +178,7 @@ void ColumnString::expand(const IColumn::Filter & mask, bool inverted)
while (index >= 0)
{
offsets_data[index] = last_offset;
if (mask[index] ^ inverted)
if (!!mask[index] ^ inverted)
{
if (from < 0)
throw Exception("Too many bytes in mask", ErrorCodes::LOGICAL_ERROR);

View File

@ -1,6 +1,6 @@
#pragma once
#include <Core/Block.h>
#include <Columns/IColumn.h>
namespace DB

View File

@ -304,7 +304,7 @@ size_t ColumnUnique<ColumnType>::uniqueInsert(const Field & x)
if (x.getType() == Field::Types::Null)
return getNullValueIndex();
if (isNumeric())
if (valuesHaveFixedSize())
return uniqueInsertData(&x.reinterpret<char>(), size_of_value_if_fixed);
auto & val = x.get<String>();

View File

@ -26,7 +26,7 @@ void expandDataByMask(PaddedPODArray<T> & data, const PaddedPODArray<UInt8> & ma
data.resize(mask.size());
while (index >= 0)
{
if (mask[index] ^ inverted)
if (!!mask[index] ^ inverted)
{
if (from < 0)
throw Exception("Too many bytes in mask", ErrorCodes::LOGICAL_ERROR);

View File

@ -191,10 +191,11 @@ void ThreadPoolImpl<Thread>::wait()
template <typename Thread>
ThreadPoolImpl<Thread>::~ThreadPoolImpl()
{
/// Note: should not use logger from here,
/// because it can be an instance of GlobalThreadPool that is a global variable
/// and the destruction order of global variables is unspecified.
finalize();
/// wait() hadn't been called, log exception at least.
if (first_exception)
DB::tryLogException(first_exception, __PRETTY_FUNCTION__);
}
template <typename Thread>
@ -273,21 +274,11 @@ void ThreadPoolImpl<Thread>::worker(typename std::list<Thread>::iterator thread_
}
catch (...)
{
ALLOW_ALLOCATIONS_IN_SCOPE;
/// job should be reset before decrementing scheduled_jobs to
/// ensure that the Job destroyed before wait() returns.
job = {};
{
/// In case thread pool will not be terminated on exception
/// (this is the case for GlobalThreadPool),
/// than first_exception may be overwritten and got lost,
/// and this usually is an error, since this will finish the thread,
/// and for this the caller may not be ready.
if (!shutdown_on_exception)
DB::tryLogException(std::current_exception(), __PRETTY_FUNCTION__);
std::unique_lock lock(mutex);
if (!first_exception)
first_exception = std::current_exception(); // NOLINT

View File

@ -80,8 +80,3 @@ target_link_libraries (average PRIVATE clickhouse_common_io)
add_executable (shell_command_inout shell_command_inout.cpp)
target_link_libraries (shell_command_inout PRIVATE clickhouse_common_io)
if (ENABLE_FUZZING)
add_executable(YAML_fuzzer YAML_fuzzer.cpp ${SRCS})
target_link_libraries(YAML_fuzzer PRIVATE clickhouse_parsers ${LIB_FUZZING_ENGINE})
endif ()

View File

@ -1,39 +0,0 @@
#include <iostream>
#include <fstream>
#include <string>
#include <cstdio>
#include <time.h>
#include <filesystem>
extern "C" int LLVMFuzzerTestOneInput(const uint8_t * data, size_t size)
{
/// How to test:
/// build ClickHouse with YAML_fuzzer.cpp
/// ./YAML_fuzzer YAML_CORPUS
/// where YAML_CORPUS is a directory with different YAML configs for libfuzzer
char file_name[L_tmpnam];
if (!std::tmpnam(file_name))
{
std::cerr << "Cannot create temp file!\n";
return 1;
}
std::string input = std::string(reinterpret_cast<const char*>(data), size);
DB::YAMLParser parser;
{
std::ofstream temp_file(file_name);
temp_file << input;
}
try
{
DB::YAMLParser::parse(std::string(file_name));
}
catch (...)
{
std::cerr << "YAML_fuzzer failed: " << getCurrentExceptionMessage() << std::endl;
return 1;
}
return 0;
}

View File

@ -122,6 +122,24 @@ bool pathStartsWith(const std::filesystem::path & path, const std::filesystem::p
return path_starts_with_prefix_path;
}
bool symlinkStartsWith(const std::filesystem::path & path, const std::filesystem::path & prefix_path)
{
/// Differs from pathStartsWith in how `path` is normalized before comparison.
/// Make `path` absolute if it was relative and put it into normalized form: remove
/// `.` and `..` and extra `/`. Path is not canonized because otherwise path will
/// not be a path of a symlink itself.
auto absolute_path = std::filesystem::absolute(path);
absolute_path = absolute_path.lexically_normal(); /// Normalize path.
auto absolute_prefix_path = std::filesystem::absolute(prefix_path);
absolute_prefix_path = absolute_prefix_path.lexically_normal(); /// Normalize path.
auto [_, prefix_path_mismatch_it] = std::mismatch(absolute_path.begin(), absolute_path.end(), absolute_prefix_path.begin(), absolute_prefix_path.end());
bool path_starts_with_prefix_path = (prefix_path_mismatch_it == absolute_prefix_path.end());
return path_starts_with_prefix_path;
}
bool pathStartsWith(const String & path, const String & prefix_path)
{
auto filesystem_path = std::filesystem::path(path);
@ -130,6 +148,13 @@ bool pathStartsWith(const String & path, const String & prefix_path)
return pathStartsWith(filesystem_path, filesystem_prefix_path);
}
bool symlinkStartsWith(const String & path, const String & prefix_path)
{
auto filesystem_path = std::filesystem::path(path);
auto filesystem_prefix_path = std::filesystem::path(prefix_path);
return symlinkStartsWith(filesystem_path, filesystem_prefix_path);
}
}

View File

@ -35,6 +35,8 @@ bool pathStartsWith(const std::filesystem::path & path, const std::filesystem::p
/// Returns true if path starts with prefix path
bool pathStartsWith(const String & path, const String & prefix_path);
bool symlinkStartsWith(const String & path, const String & prefix_path);
}
namespace FS

View File

@ -1,25 +0,0 @@
#if defined(OS_DARWIN) && defined(BUNDLED_STATIC_JEMALLOC)
extern "C"
{
extern void zone_register();
}
struct InitializeJemallocZoneAllocatorForOSX
{
InitializeJemallocZoneAllocatorForOSX()
{
/// In case of OSX jemalloc register itself as a default zone allocator.
///
/// But when you link statically then zone_register() will not be called,
/// and even will be optimized out:
///
/// It is ok to call it twice (i.e. in case of shared libraries)
/// Since zone_register() is a no-op if the default zone is already replaced with something.
///
/// https://github.com/jemalloc/jemalloc/issues/708
zone_register();
}
} initializeJemallocZoneAllocatorForOSX;
#endif

View File

@ -1,3 +1,18 @@
if(ENABLE_EXAMPLES)
if (ENABLE_FUZZING)
include("${ClickHouse_SOURCE_DIR}/cmake/dbms_glob_sources.cmake")
add_headers_and_sources(fuzz_compression .)
# Remove this file, because it has dependencies on DataTypes
list(REMOVE_ITEM ${fuzz_compression_sources} CompressionFactoryAdditions.cpp)
add_library(fuzz_compression ${fuzz_compression_headers} ${fuzz_compression_sources})
target_link_libraries(fuzz_compression PUBLIC clickhouse_parsers clickhouse_common_io common lz4)
endif()
if (ENABLE_EXAMPLES)
add_subdirectory(examples)
endif()
if (ENABLE_FUZZING)
add_subdirectory(fuzzers)
endif()

View File

@ -22,13 +22,10 @@ namespace ErrorCodes
{
extern const int LOGICAL_ERROR;
extern const int UNKNOWN_CODEC;
extern const int BAD_ARGUMENTS;
extern const int UNEXPECTED_AST_STRUCTURE;
extern const int DATA_TYPE_CANNOT_HAVE_ARGUMENTS;
}
static constexpr auto DEFAULT_CODEC_NAME = "Default";
CompressionCodecPtr CompressionCodecFactory::getDefaultCodec() const
{
return default_codec;
@ -49,184 +46,6 @@ CompressionCodecPtr CompressionCodecFactory::get(const String & family_name, std
}
}
void CompressionCodecFactory::validateCodec(
const String & family_name, std::optional<int> level, bool sanity_check, bool allow_experimental_codecs) const
{
if (family_name.empty())
throw Exception("Compression codec name cannot be empty", ErrorCodes::BAD_ARGUMENTS);
if (level)
{
auto literal = std::make_shared<ASTLiteral>(static_cast<UInt64>(*level));
validateCodecAndGetPreprocessedAST(makeASTFunction("CODEC", makeASTFunction(Poco::toUpper(family_name), literal)),
{}, sanity_check, allow_experimental_codecs);
}
else
{
auto identifier = std::make_shared<ASTIdentifier>(Poco::toUpper(family_name));
validateCodecAndGetPreprocessedAST(makeASTFunction("CODEC", identifier),
{}, sanity_check, allow_experimental_codecs);
}
}
ASTPtr CompressionCodecFactory::validateCodecAndGetPreprocessedAST(
const ASTPtr & ast, const IDataType * column_type, bool sanity_check, bool allow_experimental_codecs) const
{
if (const auto * func = ast->as<ASTFunction>())
{
ASTPtr codecs_descriptions = std::make_shared<ASTExpressionList>();
bool is_compression = false;
bool has_none = false;
std::optional<size_t> generic_compression_codec_pos;
std::set<size_t> post_processing_codecs;
bool can_substitute_codec_arguments = true;
for (size_t i = 0, size = func->arguments->children.size(); i < size; ++i)
{
const auto & inner_codec_ast = func->arguments->children[i];
String codec_family_name;
ASTPtr codec_arguments;
if (const auto * family_name = inner_codec_ast->as<ASTIdentifier>())
{
codec_family_name = family_name->name();
codec_arguments = {};
}
else if (const auto * ast_func = inner_codec_ast->as<ASTFunction>())
{
codec_family_name = ast_func->name;
codec_arguments = ast_func->arguments;
}
else
throw Exception("Unexpected AST element for compression codec", ErrorCodes::UNEXPECTED_AST_STRUCTURE);
/// Default codec replaced with current default codec which may depend on different
/// settings (and properties of data) in runtime.
CompressionCodecPtr result_codec;
if (codec_family_name == DEFAULT_CODEC_NAME)
{
if (codec_arguments != nullptr)
throw Exception(ErrorCodes::BAD_ARGUMENTS,
"{} codec cannot have any arguments, it's just an alias for codec specified in config.xml", DEFAULT_CODEC_NAME);
result_codec = default_codec;
codecs_descriptions->children.emplace_back(std::make_shared<ASTIdentifier>(DEFAULT_CODEC_NAME));
}
else
{
if (column_type)
{
CompressionCodecPtr prev_codec;
IDataType::StreamCallbackWithType callback = [&](
const ISerialization::SubstreamPath & substream_path, const IDataType & substream_type)
{
if (ISerialization::isSpecialCompressionAllowed(substream_path))
{
result_codec = getImpl(codec_family_name, codec_arguments, &substream_type);
/// Case for column Tuple, which compressed with codec which depends on data type, like Delta.
/// We cannot substitute parameters for such codecs.
if (prev_codec && prev_codec->getHash() != result_codec->getHash())
can_substitute_codec_arguments = false;
prev_codec = result_codec;
}
};
ISerialization::SubstreamPath stream_path;
column_type->enumerateStreams(column_type->getDefaultSerialization(), callback, stream_path);
if (!result_codec)
throw Exception(ErrorCodes::LOGICAL_ERROR, "Cannot find any substream with data type for type {}. It's a bug", column_type->getName());
}
else
{
result_codec = getImpl(codec_family_name, codec_arguments, nullptr);
}
if (!allow_experimental_codecs && result_codec->isExperimental())
throw Exception(ErrorCodes::BAD_ARGUMENTS,
"Codec {} is experimental and not meant to be used in production."
" You can enable it with the 'allow_experimental_codecs' setting.",
codec_family_name);
codecs_descriptions->children.emplace_back(result_codec->getCodecDesc());
}
is_compression |= result_codec->isCompression();
has_none |= result_codec->isNone();
if (!generic_compression_codec_pos && result_codec->isGenericCompression())
generic_compression_codec_pos = i;
if (result_codec->isPostProcessing())
post_processing_codecs.insert(i);
}
String codec_description = queryToString(codecs_descriptions);
if (sanity_check)
{
if (codecs_descriptions->children.size() > 1 && has_none)
throw Exception(
"It does not make sense to have codec NONE along with other compression codecs: " + codec_description
+ ". (Note: you can enable setting 'allow_suspicious_codecs' to skip this check).",
ErrorCodes::BAD_ARGUMENTS);
/// Allow to explicitly specify single NONE codec if user don't want any compression.
/// But applying other transformations solely without compression (e.g. Delta) does not make sense.
/// It's okay to apply post-processing codecs solely without anything else.
if (!is_compression && !has_none && post_processing_codecs.size() != codecs_descriptions->children.size())
throw Exception(
"Compression codec " + codec_description
+ " does not compress anything."
" You may want to add generic compression algorithm after other transformations, like: "
+ codec_description
+ ", LZ4."
" (Note: you can enable setting 'allow_suspicious_codecs' to skip this check).",
ErrorCodes::BAD_ARGUMENTS);
/// It does not make sense to apply any non-post-processing codecs
/// after post-processing one.
if (!post_processing_codecs.empty() &&
*post_processing_codecs.begin() != codecs_descriptions->children.size() - post_processing_codecs.size())
throw Exception("The combination of compression codecs " + codec_description + " is meaningless,"
" because it does not make sense to apply any non-post-processing codecs after"
" post-processing ones. (Note: you can enable setting 'allow_suspicious_codecs'"
" to skip this check).", ErrorCodes::BAD_ARGUMENTS);
/// It does not make sense to apply any transformations after generic compression algorithm
/// So, generic compression can be only one and only at the end.
if (generic_compression_codec_pos &&
*generic_compression_codec_pos != codecs_descriptions->children.size() - 1 - post_processing_codecs.size())
throw Exception("The combination of compression codecs " + codec_description + " is meaningless,"
" because it does not make sense to apply any transformations after generic compression algorithm."
" (Note: you can enable setting 'allow_suspicious_codecs' to skip this check).", ErrorCodes::BAD_ARGUMENTS);
}
/// For columns with nested types like Tuple(UInt32, UInt64) we
/// obviously cannot substitute parameters for codecs which depend on
/// data type, because for the first column Delta(4) is suitable and
/// Delta(8) for the second. So we should leave codec description as is
/// and deduce them in get method for each subtype separately. For all
/// other types it's better to substitute parameters, for better
/// readability and backward compatibility.
if (can_substitute_codec_arguments)
{
std::shared_ptr<ASTFunction> result = std::make_shared<ASTFunction>();
result->name = "CODEC";
result->arguments = codecs_descriptions;
return result;
}
else
{
return ast;
}
}
throw Exception("Unknown codec family: " + queryToString(ast), ErrorCodes::UNKNOWN_CODEC);
}
CompressionCodecPtr CompressionCodecFactory::get(
const ASTPtr & ast, const IDataType * column_type, CompressionCodecPtr current_default, bool only_generic) const

View File

@ -14,6 +14,8 @@
namespace DB
{
static constexpr auto DEFAULT_CODEC_NAME = "Default";
class ICompressionCodec;
using CompressionCodecPtr = std::shared_ptr<ICompressionCodec>;

View File

@ -0,0 +1,214 @@
/**
* This file contains a part of CompressionCodecFactory methods definitions and
* is needed only because they have dependencies on DataTypes.
* They are not useful for fuzzers, so we leave them in other translation unit.
*/
#include <Compression/CompressionFactory.h>
#include <Parsers/ASTFunction.h>
#include <Parsers/ASTLiteral.h>
#include <Parsers/ASTIdentifier.h>
#include <Parsers/parseQuery.h>
#include <Parsers/queryToString.h>
#include <DataTypes/DataTypeFactory.h>
#include <DataTypes/NestedUtils.h>
#include <DataTypes/DataTypeArray.h>
#include <DataTypes/DataTypeTuple.h>
#include <DataTypes/DataTypeNested.h>
#include <Common/Exception.h>
namespace DB
{
namespace ErrorCodes
{
extern const int UNEXPECTED_AST_STRUCTURE;
extern const int UNKNOWN_CODEC;
extern const int BAD_ARGUMENTS;
extern const int LOGICAL_ERROR;
}
void CompressionCodecFactory::validateCodec(
const String & family_name, std::optional<int> level, bool sanity_check, bool allow_experimental_codecs) const
{
if (family_name.empty())
throw Exception("Compression codec name cannot be empty", ErrorCodes::BAD_ARGUMENTS);
if (level)
{
auto literal = std::make_shared<ASTLiteral>(static_cast<UInt64>(*level));
validateCodecAndGetPreprocessedAST(makeASTFunction("CODEC", makeASTFunction(Poco::toUpper(family_name), literal)),
{}, sanity_check, allow_experimental_codecs);
}
else
{
auto identifier = std::make_shared<ASTIdentifier>(Poco::toUpper(family_name));
validateCodecAndGetPreprocessedAST(makeASTFunction("CODEC", identifier),
{}, sanity_check, allow_experimental_codecs);
}
}
ASTPtr CompressionCodecFactory::validateCodecAndGetPreprocessedAST(
const ASTPtr & ast, const IDataType * column_type, bool sanity_check, bool allow_experimental_codecs) const
{
if (const auto * func = ast->as<ASTFunction>())
{
ASTPtr codecs_descriptions = std::make_shared<ASTExpressionList>();
bool is_compression = false;
bool has_none = false;
std::optional<size_t> generic_compression_codec_pos;
std::set<size_t> post_processing_codecs;
bool can_substitute_codec_arguments = true;
for (size_t i = 0, size = func->arguments->children.size(); i < size; ++i)
{
const auto & inner_codec_ast = func->arguments->children[i];
String codec_family_name;
ASTPtr codec_arguments;
if (const auto * family_name = inner_codec_ast->as<ASTIdentifier>())
{
codec_family_name = family_name->name();
codec_arguments = {};
}
else if (const auto * ast_func = inner_codec_ast->as<ASTFunction>())
{
codec_family_name = ast_func->name;
codec_arguments = ast_func->arguments;
}
else
throw Exception("Unexpected AST element for compression codec", ErrorCodes::UNEXPECTED_AST_STRUCTURE);
/// Default codec replaced with current default codec which may depend on different
/// settings (and properties of data) in runtime.
CompressionCodecPtr result_codec;
if (codec_family_name == DEFAULT_CODEC_NAME)
{
if (codec_arguments != nullptr)
throw Exception(ErrorCodes::BAD_ARGUMENTS,
"{} codec cannot have any arguments, it's just an alias for codec specified in config.xml", DEFAULT_CODEC_NAME);
result_codec = default_codec;
codecs_descriptions->children.emplace_back(std::make_shared<ASTIdentifier>(DEFAULT_CODEC_NAME));
}
else
{
if (column_type)
{
CompressionCodecPtr prev_codec;
IDataType::StreamCallbackWithType callback = [&](
const ISerialization::SubstreamPath & substream_path, const IDataType & substream_type)
{
if (ISerialization::isSpecialCompressionAllowed(substream_path))
{
result_codec = getImpl(codec_family_name, codec_arguments, &substream_type);
/// Case for column Tuple, which compressed with codec which depends on data type, like Delta.
/// We cannot substitute parameters for such codecs.
if (prev_codec && prev_codec->getHash() != result_codec->getHash())
can_substitute_codec_arguments = false;
prev_codec = result_codec;
}
};
ISerialization::SubstreamPath stream_path;
column_type->enumerateStreams(column_type->getDefaultSerialization(), callback, stream_path);
if (!result_codec)
throw Exception(ErrorCodes::LOGICAL_ERROR, "Cannot find any substream with data type for type {}. It's a bug", column_type->getName());
}
else
{
result_codec = getImpl(codec_family_name, codec_arguments, nullptr);
}
if (!allow_experimental_codecs && result_codec->isExperimental())
throw Exception(ErrorCodes::BAD_ARGUMENTS,
"Codec {} is experimental and not meant to be used in production."
" You can enable it with the 'allow_experimental_codecs' setting.",
codec_family_name);
codecs_descriptions->children.emplace_back(result_codec->getCodecDesc());
}
is_compression |= result_codec->isCompression();
has_none |= result_codec->isNone();
if (!generic_compression_codec_pos && result_codec->isGenericCompression())
generic_compression_codec_pos = i;
if (result_codec->isPostProcessing())
post_processing_codecs.insert(i);
}
String codec_description = queryToString(codecs_descriptions);
if (sanity_check)
{
if (codecs_descriptions->children.size() > 1 && has_none)
throw Exception(
"It does not make sense to have codec NONE along with other compression codecs: " + codec_description
+ ". (Note: you can enable setting 'allow_suspicious_codecs' to skip this check).",
ErrorCodes::BAD_ARGUMENTS);
/// Allow to explicitly specify single NONE codec if user don't want any compression.
/// But applying other transformations solely without compression (e.g. Delta) does not make sense.
/// It's okay to apply post-processing codecs solely without anything else.
if (!is_compression && !has_none && post_processing_codecs.size() != codecs_descriptions->children.size())
throw Exception(
"Compression codec " + codec_description
+ " does not compress anything."
" You may want to add generic compression algorithm after other transformations, like: "
+ codec_description
+ ", LZ4."
" (Note: you can enable setting 'allow_suspicious_codecs' to skip this check).",
ErrorCodes::BAD_ARGUMENTS);
/// It does not make sense to apply any non-post-processing codecs
/// after post-processing one.
if (!post_processing_codecs.empty() &&
*post_processing_codecs.begin() != codecs_descriptions->children.size() - post_processing_codecs.size())
throw Exception("The combination of compression codecs " + codec_description + " is meaningless,"
" because it does not make sense to apply any non-post-processing codecs after"
" post-processing ones. (Note: you can enable setting 'allow_suspicious_codecs'"
" to skip this check).", ErrorCodes::BAD_ARGUMENTS);
/// It does not make sense to apply any transformations after generic compression algorithm
/// So, generic compression can be only one and only at the end.
if (generic_compression_codec_pos &&
*generic_compression_codec_pos != codecs_descriptions->children.size() - 1 - post_processing_codecs.size())
throw Exception("The combination of compression codecs " + codec_description + " is meaningless,"
" because it does not make sense to apply any transformations after generic compression algorithm."
" (Note: you can enable setting 'allow_suspicious_codecs' to skip this check).", ErrorCodes::BAD_ARGUMENTS);
}
/// For columns with nested types like Tuple(UInt32, UInt64) we
/// obviously cannot substitute parameters for codecs which depend on
/// data type, because for the first column Delta(4) is suitable and
/// Delta(8) for the second. So we should leave codec description as is
/// and deduce them in get method for each subtype separately. For all
/// other types it's better to substitute parameters, for better
/// readability and backward compatibility.
if (can_substitute_codec_arguments)
{
std::shared_ptr<ASTFunction> result = std::make_shared<ASTFunction>();
result->name = "CODEC";
result->arguments = codecs_descriptions;
return result;
}
else
{
return ast;
}
}
throw Exception("Unknown codec family: " + queryToString(ast), ErrorCodes::UNKNOWN_CODEC);
}
}

View File

@ -439,11 +439,14 @@ bool NO_INLINE decompressImpl(
{
s = *ip++;
length += s;
} while (unlikely(s == 255));
} while (unlikely(s == 255 && ip < input_end));
};
/// Get literal length.
if (unlikely(ip >= input_end))
return false;
const unsigned token = *ip++;
length = token >> 4;
if (length == 0x0F)
@ -464,18 +467,18 @@ bool NO_INLINE decompressImpl(
/// output: xyzHello, w
/// ^-op (we will overwrite excessive bytes on next iteration)
{
auto * target = std::min(copy_end, output_end);
wildCopy<copy_amount>(op, ip, target); /// Here we can write up to copy_amount - 1 bytes after buffer.
if (unlikely(copy_end > output_end))
return false;
if (target == output_end)
return true;
}
wildCopy<copy_amount>(op, ip, copy_end); /// Here we can write up to copy_amount - 1 bytes after buffer.
if (copy_end == output_end)
return true;
ip += length;
op = copy_end;
if (unlikely(ip > input_end))
if (unlikely(ip + 1 >= input_end))
return false;
/// Get match offset.
@ -528,8 +531,9 @@ bool NO_INLINE decompressImpl(
copy<copy_amount>(op, match); /// copy_amount + copy_amount - 1 - 4 * 2 bytes after buffer.
if (length > copy_amount * 2)
{
auto * target = std::min(copy_end, output_end);
wildCopy<copy_amount>(op + copy_amount, match + copy_amount, target);
if (unlikely(copy_end > output_end))
return false;
wildCopy<copy_amount>(op + copy_amount, match + copy_amount, copy_end);
}
op = copy_end;

View File

@ -3,8 +3,3 @@ target_link_libraries (compressed_buffer PRIVATE dbms)
add_executable (cached_compressed_read_buffer cached_compressed_read_buffer.cpp)
target_link_libraries (cached_compressed_read_buffer PRIVATE dbms)
if (ENABLE_FUZZING)
add_executable (compressed_buffer_fuzzer compressed_buffer_fuzzer.cpp)
target_link_libraries (compressed_buffer_fuzzer PRIVATE dbms ${LIB_FUZZING_ENGINE})
endif ()

View File

@ -0,0 +1,2 @@
add_executable (compressed_buffer_fuzzer compressed_buffer_fuzzer.cpp)
target_link_libraries (compressed_buffer_fuzzer PRIVATE fuzz_compression clickhouse_common_io ${LIB_FUZZING_ENGINE})

View File

@ -248,117 +248,117 @@ struct KeeperStorageCreateRequest final : public KeeperStorageRequest
Coordination::ZooKeeperCreateResponse & response = dynamic_cast<Coordination::ZooKeeperCreateResponse &>(*response_ptr);
Coordination::ZooKeeperCreateRequest & request = dynamic_cast<Coordination::ZooKeeperCreateRequest &>(*zk_request);
if (container.contains(request.path))
auto parent_path = parentPath(request.path);
auto it = container.find(parent_path);
if (it == container.end())
{
response.error = Coordination::Error::ZNONODE;
return { response_ptr, undo };
}
else if (it->value.stat.ephemeralOwner != 0)
{
response.error = Coordination::Error::ZNOCHILDRENFOREPHEMERALS;
return { response_ptr, undo };
}
std::string path_created = request.path;
if (request.is_sequential)
{
auto seq_num = it->value.seq_num;
std::stringstream seq_num_str; // STYLE_CHECK_ALLOW_STD_STRING_STREAM
seq_num_str.exceptions(std::ios::failbit);
seq_num_str << std::setw(10) << std::setfill('0') << seq_num;
path_created += seq_num_str.str();
}
if (container.contains(path_created))
{
response.error = Coordination::Error::ZNODEEXISTS;
return { response_ptr, undo };
}
else
auto child_path = getBaseName(path_created);
if (child_path.empty())
{
auto parent_path = parentPath(request.path);
auto it = container.find(parent_path);
if (it == container.end())
{
response.error = Coordination::Error::ZNONODE;
}
else if (it->value.stat.ephemeralOwner != 0)
{
response.error = Coordination::Error::ZNOCHILDRENFOREPHEMERALS;
}
else
{
auto & session_auth_ids = storage.session_and_auth[session_id];
KeeperStorage::Node created_node;
Coordination::ACLs node_acls;
if (!fixupACL(request.acls, session_auth_ids, node_acls, !request.restored_from_zookeeper_log))
{
response.error = Coordination::Error::ZINVALIDACL;
return {response_ptr, {}};
}
uint64_t acl_id = storage.acl_map.convertACLs(node_acls);
storage.acl_map.addUsage(acl_id);
created_node.acl_id = acl_id;
created_node.stat.czxid = zxid;
created_node.stat.mzxid = zxid;
created_node.stat.pzxid = zxid;
created_node.stat.ctime = std::chrono::system_clock::now().time_since_epoch() / std::chrono::milliseconds(1);
created_node.stat.mtime = created_node.stat.ctime;
created_node.stat.numChildren = 0;
created_node.stat.dataLength = request.data.length();
created_node.stat.ephemeralOwner = request.is_ephemeral ? session_id : 0;
created_node.data = request.data;
created_node.is_sequental = request.is_sequential;
std::string path_created = request.path;
if (request.is_sequential)
{
auto seq_num = it->value.seq_num;
std::stringstream seq_num_str; // STYLE_CHECK_ALLOW_STD_STRING_STREAM
seq_num_str.exceptions(std::ios::failbit);
seq_num_str << std::setw(10) << std::setfill('0') << seq_num;
path_created += seq_num_str.str();
}
int32_t parent_cversion = request.parent_cversion;
auto child_path = getBaseName(path_created);
int64_t prev_parent_zxid;
int32_t prev_parent_cversion;
container.updateValue(parent_path, [child_path, zxid, &prev_parent_zxid,
parent_cversion, &prev_parent_cversion] (KeeperStorage::Node & parent)
{
parent.children.insert(child_path);
prev_parent_cversion = parent.stat.cversion;
prev_parent_zxid = parent.stat.pzxid;
/// Increment sequential number even if node is not sequential
++parent.seq_num;
if (parent_cversion == -1)
++parent.stat.cversion;
else if (parent_cversion > parent.stat.cversion)
parent.stat.cversion = parent_cversion;
if (zxid > parent.stat.pzxid)
parent.stat.pzxid = zxid;
++parent.stat.numChildren;
});
response.path_created = path_created;
container.insert(path_created, std::move(created_node));
if (request.is_ephemeral)
ephemerals[session_id].emplace(path_created);
undo = [&storage, prev_parent_zxid, prev_parent_cversion, session_id, path_created, is_ephemeral = request.is_ephemeral, parent_path, child_path, acl_id]
{
storage.container.erase(path_created);
storage.acl_map.removeUsage(acl_id);
if (is_ephemeral)
storage.ephemerals[session_id].erase(path_created);
storage.container.updateValue(parent_path, [child_path, prev_parent_zxid, prev_parent_cversion] (KeeperStorage::Node & undo_parent)
{
--undo_parent.stat.numChildren;
--undo_parent.seq_num;
undo_parent.stat.cversion = prev_parent_cversion;
undo_parent.stat.pzxid = prev_parent_zxid;
undo_parent.children.erase(child_path);
});
};
response.error = Coordination::Error::ZOK;
}
response.error = Coordination::Error::ZBADARGUMENTS;
return { response_ptr, undo };
}
auto & session_auth_ids = storage.session_and_auth[session_id];
KeeperStorage::Node created_node;
Coordination::ACLs node_acls;
if (!fixupACL(request.acls, session_auth_ids, node_acls, !request.restored_from_zookeeper_log))
{
response.error = Coordination::Error::ZINVALIDACL;
return {response_ptr, {}};
}
uint64_t acl_id = storage.acl_map.convertACLs(node_acls);
storage.acl_map.addUsage(acl_id);
created_node.acl_id = acl_id;
created_node.stat.czxid = zxid;
created_node.stat.mzxid = zxid;
created_node.stat.pzxid = zxid;
created_node.stat.ctime = std::chrono::system_clock::now().time_since_epoch() / std::chrono::milliseconds(1);
created_node.stat.mtime = created_node.stat.ctime;
created_node.stat.numChildren = 0;
created_node.stat.dataLength = request.data.length();
created_node.stat.ephemeralOwner = request.is_ephemeral ? session_id : 0;
created_node.data = request.data;
created_node.is_sequental = request.is_sequential;
int32_t parent_cversion = request.parent_cversion;
int64_t prev_parent_zxid;
int32_t prev_parent_cversion;
container.updateValue(parent_path, [child_path, zxid, &prev_parent_zxid,
parent_cversion, &prev_parent_cversion] (KeeperStorage::Node & parent)
{
parent.children.insert(child_path);
prev_parent_cversion = parent.stat.cversion;
prev_parent_zxid = parent.stat.pzxid;
/// Increment sequential number even if node is not sequential
++parent.seq_num;
if (parent_cversion == -1)
++parent.stat.cversion;
else if (parent_cversion > parent.stat.cversion)
parent.stat.cversion = parent_cversion;
if (zxid > parent.stat.pzxid)
parent.stat.pzxid = zxid;
++parent.stat.numChildren;
});
response.path_created = path_created;
container.insert(path_created, std::move(created_node));
if (request.is_ephemeral)
ephemerals[session_id].emplace(path_created);
undo = [&storage, prev_parent_zxid, prev_parent_cversion, session_id, path_created, is_ephemeral = request.is_ephemeral, parent_path, child_path, acl_id]
{
storage.container.erase(path_created);
storage.acl_map.removeUsage(acl_id);
if (is_ephemeral)
storage.ephemerals[session_id].erase(path_created);
storage.container.updateValue(parent_path, [child_path, prev_parent_zxid, prev_parent_cversion] (KeeperStorage::Node & undo_parent)
{
--undo_parent.stat.numChildren;
--undo_parent.seq_num;
undo_parent.stat.cversion = prev_parent_cversion;
undo_parent.stat.pzxid = prev_parent_zxid;
undo_parent.children.erase(child_path);
});
};
response.error = Coordination::Error::ZOK;
return { response_ptr, undo };
}
};

View File

@ -22,6 +22,85 @@ namespace ErrorCodes
extern const int POSITION_OUT_OF_BOUND;
extern const int NOT_FOUND_COLUMN_IN_BLOCK;
extern const int SIZES_OF_COLUMNS_DOESNT_MATCH;
extern const int AMBIGUOUS_COLUMN_NAME;
}
template <typename ReturnType>
static ReturnType onError(const std::string & message [[maybe_unused]], int code [[maybe_unused]])
{
if constexpr (std::is_same_v<ReturnType, void>)
throw Exception(message, code);
else
return false;
};
template <typename ReturnType>
static ReturnType checkColumnStructure(const ColumnWithTypeAndName & actual, const ColumnWithTypeAndName & expected,
const std::string & context_description, bool allow_remove_constants, int code)
{
if (actual.name != expected.name)
return onError<ReturnType>("Block structure mismatch in " + context_description + " stream: different names of columns:\n"
+ actual.dumpStructure() + "\n" + expected.dumpStructure(), code);
if (!actual.type->equals(*expected.type))
return onError<ReturnType>("Block structure mismatch in " + context_description + " stream: different types:\n"
+ actual.dumpStructure() + "\n" + expected.dumpStructure(), code);
if (!actual.column || !expected.column)
return ReturnType(true);
const IColumn * actual_column = actual.column.get();
/// If we allow to remove constants, and expected column is not const, then unwrap actual constant column.
if (allow_remove_constants && !isColumnConst(*expected.column))
{
if (const auto * column_const = typeid_cast<const ColumnConst *>(actual_column))
actual_column = &column_const->getDataColumn();
}
if (actual_column->getName() != expected.column->getName())
return onError<ReturnType>("Block structure mismatch in " + context_description + " stream: different columns:\n"
+ actual.dumpStructure() + "\n" + expected.dumpStructure(), code);
if (isColumnConst(*actual.column) && isColumnConst(*expected.column))
{
Field actual_value = assert_cast<const ColumnConst &>(*actual.column).getField();
Field expected_value = assert_cast<const ColumnConst &>(*expected.column).getField();
if (actual_value != expected_value)
return onError<ReturnType>("Block structure mismatch in " + context_description + " stream: different values of constants, actual: "
+ applyVisitor(FieldVisitorToString(), actual_value) + ", expected: " + applyVisitor(FieldVisitorToString(), expected_value),
code);
}
return ReturnType(true);
}
template <typename ReturnType>
static ReturnType checkBlockStructure(const Block & lhs, const Block & rhs, const std::string & context_description, bool allow_remove_constants)
{
size_t columns = rhs.columns();
if (lhs.columns() != columns)
return onError<ReturnType>("Block structure mismatch in " + context_description + " stream: different number of columns:\n"
+ lhs.dumpStructure() + "\n" + rhs.dumpStructure(), ErrorCodes::LOGICAL_ERROR);
for (size_t i = 0; i < columns; ++i)
{
const auto & actual = lhs.getByPosition(i);
const auto & expected = rhs.getByPosition(i);
if constexpr (std::is_same_v<ReturnType, bool>)
{
if (!checkColumnStructure<ReturnType>(actual, expected, context_description, allow_remove_constants, ErrorCodes::LOGICAL_ERROR))
return false;
}
else
checkColumnStructure<ReturnType>(actual, expected, context_description, allow_remove_constants, ErrorCodes::LOGICAL_ERROR);
}
return ReturnType(true);
}
@ -57,24 +136,41 @@ void Block::insert(size_t position, ColumnWithTypeAndName elem)
throw Exception("Position out of bound in Block::insert(), max position = "
+ toString(data.size()), ErrorCodes::POSITION_OUT_OF_BOUND);
if (elem.name.empty())
throw Exception("Column name in Block cannot be empty", ErrorCodes::AMBIGUOUS_COLUMN_NAME);
for (auto & name_pos : index_by_name)
if (name_pos.second >= position)
++name_pos.second;
index_by_name.emplace(elem.name, position);
auto [it, inserted] = index_by_name.emplace(elem.name, position);
if (!inserted)
checkColumnStructure<void>(data[it->second], elem,
"(columns with identical name must have identical structure)", true, ErrorCodes::AMBIGUOUS_COLUMN_NAME);
data.emplace(data.begin() + position, std::move(elem));
}
void Block::insert(ColumnWithTypeAndName elem)
{
index_by_name.emplace(elem.name, data.size());
if (elem.name.empty())
throw Exception("Column name in Block cannot be empty", ErrorCodes::AMBIGUOUS_COLUMN_NAME);
auto [it, inserted] = index_by_name.emplace(elem.name, data.size());
if (!inserted)
checkColumnStructure<void>(data[it->second], elem,
"(columns with identical name must have identical structure)", true, ErrorCodes::AMBIGUOUS_COLUMN_NAME);
data.emplace_back(std::move(elem));
}
void Block::insertUnique(ColumnWithTypeAndName elem)
{
if (elem.name.empty())
throw Exception("Column name in Block cannot be empty", ErrorCodes::AMBIGUOUS_COLUMN_NAME);
if (index_by_name.end() == index_by_name.find(elem.name))
insert(std::move(elem));
}
@ -487,67 +583,6 @@ DataTypes Block::getDataTypes() const
}
template <typename ReturnType>
static ReturnType checkBlockStructure(const Block & lhs, const Block & rhs, const std::string & context_description, bool allow_remove_constants)
{
auto on_error = [](const std::string & message [[maybe_unused]], int code [[maybe_unused]])
{
if constexpr (std::is_same_v<ReturnType, void>)
throw Exception(message, code);
else
return false;
};
size_t columns = rhs.columns();
if (lhs.columns() != columns)
return on_error("Block structure mismatch in " + context_description + " stream: different number of columns:\n"
+ lhs.dumpStructure() + "\n" + rhs.dumpStructure(), ErrorCodes::LOGICAL_ERROR);
for (size_t i = 0; i < columns; ++i)
{
const auto & expected = rhs.getByPosition(i);
const auto & actual = lhs.getByPosition(i);
if (actual.name != expected.name)
return on_error("Block structure mismatch in " + context_description + " stream: different names of columns:\n"
+ lhs.dumpStructure() + "\n" + rhs.dumpStructure(), ErrorCodes::LOGICAL_ERROR);
if (!actual.type->equals(*expected.type))
return on_error("Block structure mismatch in " + context_description + " stream: different types:\n"
+ lhs.dumpStructure() + "\n" + rhs.dumpStructure(), ErrorCodes::LOGICAL_ERROR);
if (!actual.column || !expected.column)
continue;
const IColumn * actual_column = actual.column.get();
/// If we allow to remove constants, and expected column is not const, then unwrap actual constant column.
if (allow_remove_constants && !isColumnConst(*expected.column))
{
if (const auto * column_const = typeid_cast<const ColumnConst *>(actual_column))
actual_column = &column_const->getDataColumn();
}
if (actual_column->getName() != expected.column->getName())
return on_error("Block structure mismatch in " + context_description + " stream: different columns:\n"
+ lhs.dumpStructure() + "\n" + rhs.dumpStructure(), ErrorCodes::LOGICAL_ERROR);
if (isColumnConst(*actual.column) && isColumnConst(*expected.column))
{
Field actual_value = assert_cast<const ColumnConst &>(*actual.column).getField();
Field expected_value = assert_cast<const ColumnConst &>(*expected.column).getField();
if (actual_value != expected_value)
return on_error("Block structure mismatch in " + context_description + " stream: different values of constants, actual: "
+ applyVisitor(FieldVisitorToString(), actual_value) + ", expected: " + applyVisitor(FieldVisitorToString(), expected_value),
ErrorCodes::LOGICAL_ERROR);
}
}
return ReturnType(true);
}
bool blocksHaveEqualStructure(const Block & lhs, const Block & rhs)
{
return checkBlockStructure<bool>(lhs, rhs, {}, false);

View File

@ -1,3 +1,7 @@
if (ENABLE_EXAMPLES)
add_subdirectory(examples)
endif ()
if (ENABLE_FUZZING)
add_subdirectory(fuzzers)
endif()

View File

@ -2,8 +2,7 @@
#include <Core/MySQL/PacketsConnection.h>
#include <Poco/RandomStream.h>
#include <Poco/SHA1Engine.h>
#include <Access/User.h>
#include <Access/AccessControlManager.h>
#include <Interpreters/Session.h>
#include <common/logger_useful.h>
#include <Common/OpenSSLHelpers.h>
@ -73,7 +72,7 @@ Native41::Native41(const String & password, const String & auth_plugin_data)
}
void Native41::authenticate(
const String & user_name, std::optional<String> auth_response, ContextMutablePtr context,
const String & user_name, Session & session, std::optional<String> auth_response,
std::shared_ptr<PacketEndpoint> packet_endpoint, bool, const Poco::Net::SocketAddress & address)
{
if (!auth_response)
@ -86,7 +85,7 @@ void Native41::authenticate(
if (auth_response->empty())
{
context->setUser(user_name, "", address);
session.authenticate(user_name, "", address);
return;
}
@ -96,9 +95,7 @@ void Native41::authenticate(
+ " bytes, received: " + std::to_string(auth_response->size()) + " bytes.",
ErrorCodes::UNKNOWN_EXCEPTION);
auto user = context->getAccessControlManager().read<User>(user_name);
Poco::SHA1Engine::Digest double_sha1_value = user->authentication.getPasswordDoubleSHA1();
Poco::SHA1Engine::Digest double_sha1_value = session.getPasswordDoubleSHA1(user_name);
assert(double_sha1_value.size() == Poco::SHA1Engine::DIGEST_SIZE);
Poco::SHA1Engine engine;
@ -111,7 +108,7 @@ void Native41::authenticate(
{
password_sha1[i] = digest[i] ^ static_cast<unsigned char>((*auth_response)[i]);
}
context->setUser(user_name, password_sha1, address);
session.authenticate(user_name, password_sha1, address);
}
#if USE_SSL
@ -136,7 +133,7 @@ Sha256Password::Sha256Password(RSA & public_key_, RSA & private_key_, Poco::Logg
}
void Sha256Password::authenticate(
const String & user_name, std::optional<String> auth_response, ContextMutablePtr context,
const String & user_name, Session & session, std::optional<String> auth_response,
std::shared_ptr<PacketEndpoint> packet_endpoint, bool is_secure_connection, const Poco::Net::SocketAddress & address)
{
if (!auth_response)
@ -231,7 +228,7 @@ void Sha256Password::authenticate(
password.pop_back();
}
context->setUser(user_name, password, address);
session.authenticate(user_name, password, address);
}
#endif

View File

@ -15,6 +15,7 @@
namespace DB
{
class Session;
namespace MySQLProtocol
{
@ -32,7 +33,7 @@ public:
virtual String getAuthPluginData() = 0;
virtual void authenticate(
const String & user_name, std::optional<String> auth_response, ContextMutablePtr context,
const String & user_name, Session & session, std::optional<String> auth_response,
std::shared_ptr<PacketEndpoint> packet_endpoint, bool is_secure_connection, const Poco::Net::SocketAddress & address) = 0;
};
@ -49,7 +50,7 @@ public:
String getAuthPluginData() override { return scramble; }
void authenticate(
const String & user_name, std::optional<String> auth_response, ContextMutablePtr context,
const String & user_name, Session & session, std::optional<String> auth_response,
std::shared_ptr<PacketEndpoint> packet_endpoint, bool /* is_secure_connection */, const Poco::Net::SocketAddress & address) override;
private:
@ -69,7 +70,7 @@ public:
String getAuthPluginData() override { return scramble; }
void authenticate(
const String & user_name, std::optional<String> auth_response, ContextMutablePtr context,
const String & user_name, Session & session, std::optional<String> auth_response,
std::shared_ptr<PacketEndpoint> packet_endpoint, bool is_secure_connection, const Poco::Net::SocketAddress & address) override;
private:

View File

@ -110,7 +110,7 @@ void insertPostgreSQLValue(
readDateTime64Text(time, 6, in, assert_cast<const DataTypeDateTime64 *>(data_type.get())->getTimeZone());
if (time < 0)
time = 0;
assert_cast<ColumnDecimal<Decimal64> &>(column).insertValue(time);
assert_cast<DataTypeDateTime64::ColumnType &>(column).insertValue(time);
break;
}
case ExternalResultDescription::ValueType::vtDecimal32: [[fallthrough]];

View File

@ -1,13 +1,11 @@
#pragma once
#include <Access/AccessControlManager.h>
#include <Access/User.h>
#include <functional>
#include <Interpreters/Context.h>
#include <IO/ReadBuffer.h>
#include <IO/ReadHelpers.h>
#include <IO/WriteBuffer.h>
#include <IO/WriteHelpers.h>
#include <Interpreters/Session.h>
#include <common/logger_useful.h>
#include <Poco/Format.h>
#include <Poco/RegularExpression.h>
@ -803,12 +801,13 @@ protected:
static void setPassword(
const String & user_name,
const String & password,
ContextMutablePtr context,
Session & session,
Messaging::MessageTransport & mt,
const Poco::Net::SocketAddress & address)
{
try {
context->setUser(user_name, password, address);
try
{
session.authenticate(user_name, password, address);
}
catch (const Exception &)
{
@ -822,7 +821,7 @@ protected:
public:
virtual void authenticate(
const String & user_name,
ContextMutablePtr context,
Session & session,
Messaging::MessageTransport & mt,
const Poco::Net::SocketAddress & address) = 0;
@ -836,11 +835,11 @@ class NoPasswordAuth : public AuthenticationMethod
public:
void authenticate(
const String & user_name,
ContextMutablePtr context,
Session & session,
Messaging::MessageTransport & mt,
const Poco::Net::SocketAddress & address) override
{
setPassword(user_name, "", context, mt, address);
return setPassword(user_name, "", session, mt, address);
}
Authentication::Type getType() const override
@ -854,7 +853,7 @@ class CleartextPasswordAuth : public AuthenticationMethod
public:
void authenticate(
const String & user_name,
ContextMutablePtr context,
Session & session,
Messaging::MessageTransport & mt,
const Poco::Net::SocketAddress & address) override
{
@ -864,7 +863,7 @@ public:
if (type == Messaging::FrontMessageType::PASSWORD_MESSAGE)
{
std::unique_ptr<Messaging::PasswordMessage> password = mt.receive<Messaging::PasswordMessage>();
setPassword(user_name, password->password, context, mt, address);
return setPassword(user_name, password->password, session, mt, address);
}
else
throw Exception(
@ -897,16 +896,15 @@ public:
void authenticate(
const String & user_name,
ContextMutablePtr context,
Session & session,
Messaging::MessageTransport & mt,
const Poco::Net::SocketAddress & address)
{
auto user = context->getAccessControlManager().read<User>(user_name);
Authentication::Type user_auth_type = user->authentication.getType();
Authentication::Type user_auth_type = session.getAuthenticationType(user_name);
if (type_to_method.find(user_auth_type) != type_to_method.end())
{
type_to_method[user_auth_type]->authenticate(user_name, context, mt, address);
type_to_method[user_auth_type]->authenticate(user_name, session, mt, address);
mt.send(Messaging::AuthenticationOk(), true);
LOG_DEBUG(log, "Authentication for user {} was successful.", user_name);
return;

56
src/Core/ServerUUID.cpp Normal file
View File

@ -0,0 +1,56 @@
#include <Core/ServerUUID.h>
#include <IO/ReadBufferFromFile.h>
#include <IO/WriteBufferFromFile.h>
#include <IO/ReadHelpers.h>
#include <IO/WriteHelpers.h>
#include <common/logger_useful.h>
namespace DB
{
namespace ErrorCodes
{
extern const int CANNOT_CREATE_FILE;
}
void ServerUUID::load(const fs::path & server_uuid_file, Poco::Logger * log)
{
/// Write a uuid file containing a unique uuid if the file doesn't already exist during server start.
if (fs::exists(server_uuid_file))
{
try
{
UUID uuid;
ReadBufferFromFile in(server_uuid_file);
readUUIDText(uuid, in);
assertEOF(in);
server_uuid = uuid;
return;
}
catch (...)
{
/// As for now it's ok to just overwrite it, because persistency in not essential.
LOG_ERROR(log, "Cannot read server UUID from file {}: {}. Will overwrite it",
server_uuid_file.string(), getCurrentExceptionMessage(true));
}
}
try
{
UUID new_uuid = UUIDHelpers::generateV4();
auto uuid_str = toString(new_uuid);
WriteBufferFromFile out(server_uuid_file);
out.write(uuid_str.data(), uuid_str.size());
out.sync();
out.finalize();
server_uuid = new_uuid;
}
catch (...)
{
throw Exception(ErrorCodes::CANNOT_CREATE_FILE, "Caught Exception {} while writing the Server UUID file {}",
getCurrentExceptionMessage(false), server_uuid_file.string());
}
}
}

26
src/Core/ServerUUID.h Normal file
View File

@ -0,0 +1,26 @@
#pragma once
#include <Core/UUID.h>
#include <filesystem>
namespace fs = std::filesystem;
namespace Poco
{
class Logger;
}
namespace DB
{
class ServerUUID
{
inline static UUID server_uuid = UUIDHelpers::Nil;
public:
/// Returns persistent UUID of current clickhouse-server or clickhouse-keeper instance.
static UUID get() { return server_uuid; }
/// Loads server UUID from file or creates new one. Should be called on daemon startup.
static void load(const fs::path & server_uuid_file, Poco::Logger * log);
};
}

View File

@ -114,6 +114,7 @@ class IColumn;
M(UInt64, group_by_two_level_threshold_bytes, 50000000, "From what size of the aggregation state in bytes, a two-level aggregation begins to be used. 0 - the threshold is not set. Two-level aggregation is used when at least one of the thresholds is triggered.", 0) \
M(Bool, distributed_aggregation_memory_efficient, true, "Is the memory-saving mode of distributed aggregation enabled.", 0) \
M(UInt64, aggregation_memory_efficient_merge_threads, 0, "Number of threads to use for merge intermediate aggregation results in memory efficient mode. When bigger, then more memory is consumed. 0 means - same as 'max_threads'.", 0) \
M(Bool, enable_positional_arguments, false, "Enable positional arguments in ORDER BY, GROUP BY and LIMIT BY", 0) \
\
M(UInt64, max_parallel_replicas, 1, "The maximum number of replicas of each shard used when the query is executed. For consistency (to get different parts of the same partition), this option only works for the specified sampling key. The lag of the replicas is not controlled.", 0) \
M(UInt64, parallel_replicas_count, 0, "", 0) \
@ -252,6 +253,7 @@ class IColumn;
M(Bool, use_index_for_in_with_subqueries, true, "Try using an index if there is a subquery or a table expression on the right side of the IN operator.", 0) \
M(Bool, joined_subquery_requires_alias, true, "Force joined subqueries and table functions to have aliases for correct name qualification.", 0) \
M(Bool, empty_result_for_aggregation_by_empty_set, false, "Return empty result when aggregating without keys on empty set.", 0) \
M(Bool, empty_result_for_aggregation_by_constant_keys_on_empty_set, true, "Return empty result when aggregating by constant keys on empty set.", 0) \
M(Bool, allow_distributed_ddl, true, "If it is set to true, then a user is allowed to executed distributed DDL queries.", 0) \
M(Bool, allow_suspicious_codecs, false, "If it is set to true, allow to specify meaningless compression codecs.", 0) \
M(Bool, allow_experimental_codecs, false, "If it is set to true, allow to specify experimental compression codecs (but we don't have those yet and this option does nothing).", 0) \

View File

@ -8,11 +8,6 @@ target_link_libraries (field PRIVATE dbms)
add_executable (string_ref_hash string_ref_hash.cpp)
target_link_libraries (string_ref_hash PRIVATE clickhouse_common_io)
if (ENABLE_FUZZING)
add_executable (names_and_types_fuzzer names_and_types_fuzzer.cpp)
target_link_libraries (names_and_types_fuzzer PRIVATE dbms ${LIB_FUZZING_ENGINE})
endif ()
add_executable (mysql_protocol mysql_protocol.cpp)
target_link_libraries (mysql_protocol PRIVATE dbms)
if(USE_SSL)

View File

@ -0,0 +1,2 @@
add_executable (names_and_types_fuzzer names_and_types_fuzzer.cpp)
target_link_libraries (names_and_types_fuzzer PRIVATE dbms ${LIB_FUZZING_ENGINE})

View File

@ -26,23 +26,6 @@ namespace ErrorCodes
IDataType::~IDataType() = default;
String IDataType::getName() const
{
if (custom_name)
{
return custom_name->getName();
}
else
{
return doGetName();
}
}
String IDataType::doGetName() const
{
return getFamilyName();
}
void IDataType::updateAvgValueSizeHint(const IColumn & column, double & avg_value_size_hint)
{
/// Update the average value size hint if amount of read rows isn't too small

View File

@ -62,7 +62,13 @@ public:
/// static constexpr bool is_parametric = false;
/// Name of data type (examples: UInt64, Array(String)).
String getName() const;
String getName() const
{
if (custom_name)
return custom_name->getName();
else
return doGetName();
}
/// Name of data type family (example: FixedString, Array).
virtual const char * getFamilyName() const = 0;
@ -105,7 +111,7 @@ public:
void enumerateStreams(const SerializationPtr & serialization, const StreamCallbackWithType & callback) const { enumerateStreams(serialization, callback, {}); }
protected:
virtual String doGetName() const;
virtual String doGetName() const { return getFamilyName(); }
virtual SerializationPtr doGetDefaultSerialization() const = 0;
DataTypePtr getTypeForSubstream(const ISerialization::SubstreamPath & substream_path) const;

View File

@ -17,7 +17,7 @@ void registerDictionarySourceCassandra(DictionarySourceFactory & factory)
[[maybe_unused]] const Poco::Util::AbstractConfiguration & config,
[[maybe_unused]] const std::string & config_prefix,
[[maybe_unused]] Block & sample_block,
ContextPtr /* context */,
ContextPtr /* global_context */,
const std::string & /* default_database */,
bool /*created_from_ddl*/) -> DictionarySourcePtr
{

View File

@ -7,6 +7,7 @@
#include <Interpreters/ExpressionActions.h>
#include <Processors/Transforms/ExpressionTransform.h>
#include <IO/ConnectionTimeouts.h>
#include <Interpreters/Session.h>
#include <Interpreters/executeQuery.h>
#include <Common/isLocalAddress.h>
#include <common/logger_useful.h>
@ -63,19 +64,18 @@ ClickHouseDictionarySource::ClickHouseDictionarySource(
const DictionaryStructure & dict_struct_,
const Configuration & configuration_,
const Block & sample_block_,
ContextPtr context_)
ContextMutablePtr context_,
std::shared_ptr<Session> local_session_)
: update_time{std::chrono::system_clock::from_time_t(0)}
, dict_struct{dict_struct_}
, configuration{configuration_}
, query_builder{dict_struct, configuration.db, "", configuration.table, configuration.query, configuration.where, IdentifierQuotingStyle::Backticks}
, sample_block{sample_block_}
, context(Context::createCopy(context_))
, local_session(local_session_)
, context(context_)
, pool{createPool(configuration)}
, load_all_query{query_builder.composeLoadAllQuery()}
{
/// Query context is needed because some code in executeQuery function may assume it exists.
/// Current example is Context::getSampleBlockCache from InterpreterSelectWithUnionQuery::getSampleBlock.
context->makeQueryContext();
}
ClickHouseDictionarySource::ClickHouseDictionarySource(const ClickHouseDictionarySource & other)
@ -85,11 +85,11 @@ ClickHouseDictionarySource::ClickHouseDictionarySource(const ClickHouseDictionar
, invalidate_query_response{other.invalidate_query_response}
, query_builder{dict_struct, configuration.db, "", configuration.table, configuration.query, configuration.where, IdentifierQuotingStyle::Backticks}
, sample_block{other.sample_block}
, local_session(other.local_session)
, context(Context::createCopy(other.context))
, pool{createPool(configuration)}
, load_all_query{other.load_all_query}
{
context->makeQueryContext();
}
std::string ClickHouseDictionarySource::getUpdateFieldAndDate()
@ -222,14 +222,13 @@ void registerDictionarySourceClickHouse(DictionarySourceFactory & factory)
const Poco::Util::AbstractConfiguration & config,
const std::string & config_prefix,
Block & sample_block,
ContextPtr context,
ContextPtr global_context,
const std::string & default_database [[maybe_unused]],
bool /* created_from_ddl */) -> DictionarySourcePtr
{
bool secure = config.getBool(config_prefix + ".secure", false);
auto context_copy = Context::createCopy(context);
UInt16 default_port = getPortFromContext(context_copy, secure);
UInt16 default_port = getPortFromContext(global_context, secure);
std::string settings_config_prefix = config_prefix + ".clickhouse";
std::string host = config.getString(settings_config_prefix + ".host", "localhost");
@ -252,12 +251,18 @@ void registerDictionarySourceClickHouse(DictionarySourceFactory & factory)
.secure = config.getBool(settings_config_prefix + ".secure", false)
};
/// We should set user info even for the case when the dictionary is loaded in-process (without TCP communication).
ContextMutablePtr context;
std::shared_ptr<Session> local_session;
if (configuration.is_local)
{
context_copy->setUser(configuration.user, configuration.password, Poco::Net::SocketAddress("127.0.0.1", 0));
context_copy = copyContextAndApplySettings(config_prefix, context_copy, config);
/// Start local session in case when the dictionary is loaded in-process (without TCP communication).
local_session = std::make_shared<Session>(global_context, ClientInfo::Interface::TCP);
local_session->authenticate(configuration.user, configuration.password, Poco::Net::SocketAddress{"127.0.0.1", 0});
context = local_session->makeQueryContext();
context->applySettingsChanges(readSettingsFromDictionaryConfig(config, config_prefix));
}
else
context = copyContextAndApplySettingsFromDictionaryConfig(global_context, config, config_prefix);
String dictionary_name = config.getString(".dictionary.name", "");
String dictionary_database = config.getString(".dictionary.database", "");
@ -265,7 +270,7 @@ void registerDictionarySourceClickHouse(DictionarySourceFactory & factory)
if (dictionary_name == configuration.table && dictionary_database == configuration.db)
throw Exception(ErrorCodes::BAD_ARGUMENTS, "ClickHouseDictionarySource table cannot be dictionary table");
return std::make_unique<ClickHouseDictionarySource>(dict_struct, configuration, sample_block, context_copy);
return std::make_unique<ClickHouseDictionarySource>(dict_struct, configuration, sample_block, context, local_session);
};
factory.registerSource("clickhouse", create_table_source);

View File

@ -39,7 +39,8 @@ public:
const DictionaryStructure & dict_struct_,
const Configuration & configuration_,
const Block & sample_block_,
ContextPtr context);
ContextMutablePtr context_,
std::shared_ptr<Session> local_session_);
/// copy-constructor is provided in order to support cloneability
ClickHouseDictionarySource(const ClickHouseDictionarySource & other);
@ -81,6 +82,7 @@ private:
mutable std::string invalidate_query_response;
ExternalQueryBuilder query_builder;
Block sample_block;
std::shared_ptr<Session> local_session;
ContextMutablePtr context;
ConnectionPoolWithFailoverPtr pool;
const std::string load_all_query;

View File

@ -31,7 +31,7 @@ DictionaryPtr DictionaryFactory::create(
const std::string & name,
const Poco::Util::AbstractConfiguration & config,
const std::string & config_prefix,
ContextPtr context,
ContextPtr global_context,
bool created_from_ddl) const
{
Poco::Util::AbstractConfiguration::Keys keys;
@ -45,12 +45,9 @@ DictionaryPtr DictionaryFactory::create(
const DictionaryStructure dict_struct{config, config_prefix};
DictionarySourcePtr source_ptr = DictionarySourceFactory::instance().create(
name, config, config_prefix + ".source", dict_struct, context, config.getString(config_prefix + ".database", ""), created_from_ddl);
name, config, config_prefix + ".source", dict_struct, global_context, config.getString(config_prefix + ".database", ""), created_from_ddl);
LOG_TRACE(&Poco::Logger::get("DictionaryFactory"), "Created dictionary source '{}' for dictionary '{}'", source_ptr->toString(), name);
if (context->hasQueryContext() && context->getSettingsRef().log_queries)
context->getQueryContext()->addQueryFactoriesInfo(Context::QueryLogFactories::Dictionary, name);
const auto & layout_type = keys.front();
{
@ -58,7 +55,7 @@ DictionaryPtr DictionaryFactory::create(
if (found != registered_layouts.end())
{
const auto & layout_creator = found->second.layout_create_function;
return layout_creator(name, dict_struct, config, config_prefix, std::move(source_ptr), context, created_from_ddl);
return layout_creator(name, dict_struct, config, config_prefix, std::move(source_ptr), global_context, created_from_ddl);
}
}
@ -68,10 +65,10 @@ DictionaryPtr DictionaryFactory::create(
layout_type);
}
DictionaryPtr DictionaryFactory::create(const std::string & name, const ASTCreateQuery & ast, ContextPtr context) const
DictionaryPtr DictionaryFactory::create(const std::string & name, const ASTCreateQuery & ast, ContextPtr global_context) const
{
auto configuration = getDictionaryConfigurationFromAST(ast, context);
return DictionaryFactory::create(name, *configuration, "dictionary", context, true);
auto configuration = getDictionaryConfigurationFromAST(ast, global_context);
return DictionaryFactory::create(name, *configuration, "dictionary", global_context, true);
}
bool DictionaryFactory::isComplex(const std::string & layout_type) const

View File

@ -36,13 +36,13 @@ public:
const std::string & name,
const Poco::Util::AbstractConfiguration & config,
const std::string & config_prefix,
ContextPtr context,
ContextPtr global_context,
bool created_from_ddl) const;
/// Create dictionary from DDL-query
DictionaryPtr create(const std::string & name,
const ASTCreateQuery & ast,
ContextPtr context) const;
ContextPtr global_context) const;
using LayoutCreateFunction = std::function<DictionaryPtr(
const std::string & name,
@ -50,7 +50,7 @@ public:
const Poco::Util::AbstractConfiguration & config,
const std::string & config_prefix,
DictionarySourcePtr source_ptr,
ContextPtr context,
ContextPtr global_context,
bool created_from_ddl)>;
bool isComplex(const std::string & layout_type) const;

View File

@ -80,7 +80,7 @@ DictionarySourcePtr DictionarySourceFactory::create(
const Poco::Util::AbstractConfiguration & config,
const std::string & config_prefix,
const DictionaryStructure & dict_struct,
ContextPtr context,
ContextPtr global_context,
const std::string & default_database,
bool check_config) const
{
@ -99,7 +99,7 @@ DictionarySourcePtr DictionarySourceFactory::create(
{
const auto & create_source = found->second;
auto sample_block = createSampleBlock(dict_struct);
return create_source(dict_struct, config, config_prefix, sample_block, context, default_database, check_config);
return create_source(dict_struct, config, config_prefix, sample_block, global_context, default_database, check_config);
}
throw Exception(ErrorCodes::UNKNOWN_ELEMENT_IN_CONFIG,

View File

@ -35,7 +35,7 @@ public:
const Poco::Util::AbstractConfiguration & config,
const std::string & config_prefix,
Block & sample_block,
ContextPtr context,
ContextPtr global_context,
const std::string & default_database,
bool check_config)>;
@ -48,7 +48,7 @@ public:
const Poco::Util::AbstractConfiguration & config,
const std::string & config_prefix,
const DictionaryStructure & dict_struct,
ContextPtr context,
ContextPtr global_context,
const std::string & default_database,
bool check_config) const;

View File

@ -59,30 +59,36 @@ Block blockForKeys(
return block;
}
ContextMutablePtr copyContextAndApplySettings(
const std::string & config_prefix,
ContextPtr context,
const Poco::Util::AbstractConfiguration & config)
SettingsChanges readSettingsFromDictionaryConfig(const Poco::Util::AbstractConfiguration & config, const std::string & config_prefix)
{
auto local_context = Context::createCopy(context);
if (config.has(config_prefix + ".settings"))
if (!config.has(config_prefix + ".settings"))
return {};
const auto prefix = config_prefix + ".settings";
Poco::Util::AbstractConfiguration::Keys config_keys;
config.keys(prefix, config_keys);
SettingsChanges changes;
for (const std::string & key : config_keys)
{
const auto prefix = config_prefix + ".settings";
Poco::Util::AbstractConfiguration::Keys config_keys;
config.keys(prefix, config_keys);
SettingsChanges changes;
for (const std::string & key : config_keys)
{
const auto value = config.getString(prefix + "." + key);
changes.emplace_back(key, value);
}
local_context->applySettingsChanges(changes);
const auto value = config.getString(prefix + "." + key);
changes.emplace_back(key, value);
}
return local_context;
return changes;
}
ContextMutablePtr copyContextAndApplySettingsFromDictionaryConfig(
const ContextPtr & context, const Poco::Util::AbstractConfiguration & config, const std::string & config_prefix)
{
auto context_copy = Context::createCopy(context);
auto changes = readSettingsFromDictionaryConfig(config, config_prefix);
context_copy->applySettingsChanges(changes);
return context_copy;
}
static Block transformHeader(Block header, Block block_to_add)

View File

@ -14,6 +14,7 @@ namespace DB
{
struct DictionaryStructure;
class SettingsChanges;
/// For simple key
@ -29,10 +30,8 @@ Block blockForKeys(
const std::vector<size_t> & requested_rows);
/// Used for applying settings to copied context in some register[...]Source functions
ContextMutablePtr copyContextAndApplySettings(
const std::string & config_prefix,
ContextPtr context,
const Poco::Util::AbstractConfiguration & config);
SettingsChanges readSettingsFromDictionaryConfig(const Poco::Util::AbstractConfiguration & config, const std::string & config_prefix);
ContextMutablePtr copyContextAndApplySettingsFromDictionaryConfig(const ContextPtr & context, const Poco::Util::AbstractConfiguration & config, const std::string & config_prefix);
/** A stream, adds additional columns to each block that it will read from inner stream.
*

View File

@ -161,9 +161,6 @@ DictionaryStructure::DictionaryStructure(const Poco::Util::AbstractConfiguration
}
}
if (attributes.empty())
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Dictionary has no attributes defined");
if (config.getBool(config_prefix + ".layout.ip_trie.access_to_key_from_attributes", false))
access_to_key_from_attributes = true;
}

View File

@ -307,7 +307,7 @@ namespace
const Poco::Util::AbstractConfiguration & config,
const std::string & config_prefix,
DictionarySourcePtr source_ptr,
ContextPtr /* context */,
ContextPtr /* global_context */,
bool /* created_from_ddl */)
{
const auto * layout_name = dictionary_key_type == DictionaryKeyType::Simple ? "direct" : "complex_key_direct";

View File

@ -275,7 +275,7 @@ void registerDictionarySourceExecutable(DictionarySourceFactory & factory)
const Poco::Util::AbstractConfiguration & config,
const std::string & config_prefix,
Block & sample_block,
ContextPtr context,
ContextPtr global_context,
const std::string & /* default_database */,
bool created_from_ddl) -> DictionarySourcePtr
{
@ -285,10 +285,10 @@ void registerDictionarySourceExecutable(DictionarySourceFactory & factory)
/// Executable dictionaries may execute arbitrary commands.
/// It's OK for dictionaries created by administrator from xml-file, but
/// maybe dangerous for dictionaries created from DDL-queries.
if (created_from_ddl && context->getApplicationType() != Context::ApplicationType::LOCAL)
if (created_from_ddl && global_context->getApplicationType() != Context::ApplicationType::LOCAL)
throw Exception(ErrorCodes::DICTIONARY_ACCESS_DENIED, "Dictionaries with executable dictionary source are not allowed to be created from DDL query");
auto context_local_copy = copyContextAndApplySettings(config_prefix, context, config);
auto context = copyContextAndApplySettingsFromDictionaryConfig(global_context, config, config_prefix);
std::string settings_config_prefix = config_prefix + ".executable";
@ -301,7 +301,7 @@ void registerDictionarySourceExecutable(DictionarySourceFactory & factory)
.implicit_key = config.getBool(settings_config_prefix + ".implicit_key", false)
};
return std::make_unique<ExecutableDictionarySource>(dict_struct, configuration, sample_block, context_local_copy);
return std::make_unique<ExecutableDictionarySource>(dict_struct, configuration, sample_block, context);
};
factory.registerSource("executable", create_table_source);

View File

@ -279,7 +279,7 @@ void registerDictionarySourceExecutablePool(DictionarySourceFactory & factory)
const Poco::Util::AbstractConfiguration & config,
const std::string & config_prefix,
Block & sample_block,
ContextPtr context,
ContextPtr global_context,
const std::string & /* default_database */,
bool created_from_ddl) -> DictionarySourcePtr
{
@ -289,17 +289,15 @@ void registerDictionarySourceExecutablePool(DictionarySourceFactory & factory)
/// Executable dictionaries may execute arbitrary commands.
/// It's OK for dictionaries created by administrator from xml-file, but
/// maybe dangerous for dictionaries created from DDL-queries.
if (created_from_ddl && context->getApplicationType() != Context::ApplicationType::LOCAL)
if (created_from_ddl && global_context->getApplicationType() != Context::ApplicationType::LOCAL)
throw Exception(ErrorCodes::DICTIONARY_ACCESS_DENIED, "Dictionaries with executable pool dictionary source are not allowed to be created from DDL query");
auto context_local_copy = copyContextAndApplySettings(config_prefix, context, config);
ContextMutablePtr context = copyContextAndApplySettingsFromDictionaryConfig(global_context, config, config_prefix);
/** Currently parallel parsing input format cannot read exactly max_block_size rows from input,
* so it will be blocked on ReadBufferFromFileDescriptor because this file descriptor represent pipe that does not have eof.
*/
auto settings_no_parallel_parsing = context_local_copy->getSettings();
settings_no_parallel_parsing.input_format_parallel_parsing = false;
context_local_copy->setSettings(settings_no_parallel_parsing);
context->setSetting("input_format_parallel_parsing", Field{false});
String settings_config_prefix = config_prefix + ".executable_pool";
@ -319,7 +317,7 @@ void registerDictionarySourceExecutablePool(DictionarySourceFactory & factory)
.implicit_key = config.getBool(settings_config_prefix + ".implicit_key", false),
};
return std::make_unique<ExecutablePoolDictionarySource>(dict_struct, configuration, sample_block, context_local_copy);
return std::make_unique<ExecutablePoolDictionarySource>(dict_struct, configuration, sample_block, context);
};
factory.registerSource("executable_pool", create_table_source);

View File

@ -77,7 +77,7 @@ void registerDictionarySourceFile(DictionarySourceFactory & factory)
const Poco::Util::AbstractConfiguration & config,
const std::string & config_prefix,
Block & sample_block,
ContextPtr context,
ContextPtr global_context,
const std::string & /* default_database */,
bool created_from_ddl) -> DictionarySourcePtr
{
@ -87,9 +87,9 @@ void registerDictionarySourceFile(DictionarySourceFactory & factory)
const auto filepath = config.getString(config_prefix + ".file.path");
const auto format = config.getString(config_prefix + ".file.format");
auto context_local_copy = copyContextAndApplySettings(config_prefix, context, config);
const auto context = copyContextAndApplySettingsFromDictionaryConfig(global_context, config, config_prefix);
return std::make_unique<FileDictionarySource>(filepath, format, sample_block, context_local_copy, created_from_ddl);
return std::make_unique<FileDictionarySource>(filepath, format, sample_block, context, created_from_ddl);
};
factory.registerSource("file", create_table_source);

View File

@ -557,7 +557,7 @@ void registerDictionaryFlat(DictionaryFactory & factory)
const Poco::Util::AbstractConfiguration & config,
const std::string & config_prefix,
DictionarySourcePtr source_ptr,
ContextPtr /* context */,
ContextPtr /* global_context */,
bool /* created_from_ddl */) -> DictionaryPtr
{
if (dict_struct.key)

View File

@ -213,13 +213,13 @@ void registerDictionarySourceHTTP(DictionarySourceFactory & factory)
const Poco::Util::AbstractConfiguration & config,
const std::string & config_prefix,
Block & sample_block,
ContextPtr context,
ContextPtr global_context,
const std::string & /* default_database */,
bool created_from_ddl) -> DictionarySourcePtr {
if (dict_struct.has_expressions)
throw Exception(ErrorCodes::LOGICAL_ERROR, "Dictionary source of type `http` does not support attribute expressions");
auto context_local_copy = copyContextAndApplySettings(config_prefix, context, config);
auto context = copyContextAndApplySettingsFromDictionaryConfig(global_context, config, config_prefix);
const auto & settings_config_prefix = config_prefix + ".http";
const auto & credentials_prefix = settings_config_prefix + ".credentials";
@ -258,7 +258,7 @@ void registerDictionarySourceHTTP(DictionarySourceFactory & factory)
.header_entries = std::move(header_entries)
};
return std::make_unique<HTTPDictionarySource>(dict_struct, configuration, credentials, sample_block, context_local_copy, created_from_ddl);
return std::make_unique<HTTPDictionarySource>(dict_struct, configuration, credentials, sample_block, context, created_from_ddl);
};
factory.registerSource("http", create_table_source);
}

View File

@ -756,13 +756,13 @@ void registerDictionaryHashed(DictionaryFactory & factory)
using namespace std::placeholders;
factory.registerLayout("hashed",
[=](auto && a, auto && b, auto && c, auto && d, DictionarySourcePtr e, ContextPtr /* context */, bool /*created_from_ddl*/){ return create_layout(a, b, c, d, std::move(e), DictionaryKeyType::Simple, /* sparse = */ false); }, false);
[=](auto && a, auto && b, auto && c, auto && d, DictionarySourcePtr e, ContextPtr /* global_context */, bool /*created_from_ddl*/){ return create_layout(a, b, c, d, std::move(e), DictionaryKeyType::Simple, /* sparse = */ false); }, false);
factory.registerLayout("sparse_hashed",
[=](auto && a, auto && b, auto && c, auto && d, DictionarySourcePtr e, ContextPtr /* context */, bool /*created_from_ddl*/){ return create_layout(a, b, c, d, std::move(e), DictionaryKeyType::Simple, /* sparse = */ true); }, false);
[=](auto && a, auto && b, auto && c, auto && d, DictionarySourcePtr e, ContextPtr /* global_context */, bool /*created_from_ddl*/){ return create_layout(a, b, c, d, std::move(e), DictionaryKeyType::Simple, /* sparse = */ true); }, false);
factory.registerLayout("complex_key_hashed",
[=](auto && a, auto && b, auto && c, auto && d, DictionarySourcePtr e, ContextPtr /* context */, bool /*created_from_ddl*/){ return create_layout(a, b, c, d, std::move(e), DictionaryKeyType::Complex, /* sparse = */ false); }, true);
[=](auto && a, auto && b, auto && c, auto && d, DictionarySourcePtr e, ContextPtr /* global_context */, bool /*created_from_ddl*/){ return create_layout(a, b, c, d, std::move(e), DictionaryKeyType::Complex, /* sparse = */ false); }, true);
factory.registerLayout("complex_key_sparse_hashed",
[=](auto && a, auto && b, auto && c, auto && d, DictionarySourcePtr e, ContextPtr /* context */, bool /*created_from_ddl*/){ return create_layout(a, b, c, d, std::move(e), DictionaryKeyType::Complex, /* sparse = */ true); }, true);
[=](auto && a, auto && b, auto && c, auto && d, DictionarySourcePtr e, ContextPtr /* global_context */, bool /*created_from_ddl*/){ return create_layout(a, b, c, d, std::move(e), DictionaryKeyType::Complex, /* sparse = */ true); }, true);
}

Some files were not shown because too many files have changed in this diff Show More