diff --git a/README.md b/README.md index 21eda470f49..a915570122d 100644 --- a/README.md +++ b/README.md @@ -8,7 +8,7 @@ ClickHouse® is an open-source column-oriented database management system that a * [Tutorial](https://clickhouse.tech/docs/en/getting_started/tutorial/) shows how to set up and query small ClickHouse cluster. * [Documentation](https://clickhouse.tech/docs/en/) provides more in-depth information. * [YouTube channel](https://www.youtube.com/c/ClickHouseDB) has a lot of content about ClickHouse in video format. -* [Slack](https://join.slack.com/t/clickhousedb/shared_invite/zt-qfort0u8-TWqK4wIP0YSdoDE0btKa1w) and [Telegram](https://telegram.me/clickhouse_en) allow to chat with ClickHouse users in real-time. +* [Slack](https://join.slack.com/t/clickhousedb/shared_invite/zt-rxm3rdrk-lIUmhLC3V8WTaL0TGxsOmg) and [Telegram](https://telegram.me/clickhouse_en) allow to chat with ClickHouse users in real-time. * [Blog](https://clickhouse.yandex/blog/en/) contains various ClickHouse-related articles, as well as announcements and reports about events. * [Code Browser](https://clickhouse.tech/codebrowser/html_report/ClickHouse/index.html) with syntax highlight and navigation. * [Contacts](https://clickhouse.tech/#contacts) can help to get your questions answered if there are any. diff --git a/base/common/ReplxxLineReader.cpp b/base/common/ReplxxLineReader.cpp index 7893e56d751..9c65b1dfe4c 100644 --- a/base/common/ReplxxLineReader.cpp +++ b/base/common/ReplxxLineReader.cpp @@ -1,8 +1,9 @@ #include #include -#include -#include +#include +#include +#include #include #include #include @@ -24,6 +25,94 @@ void trim(String & s) s.erase(std::find_if(s.rbegin(), s.rend(), [](int ch) { return !std::isspace(ch); }).base(), s.end()); } +/// Copied from replxx::src/util.cxx::now_ms_str() under the terms of 3-clause BSD license of Replxx. +/// Copyright (c) 2017-2018, Marcin Konarski (amok at codestation.org) +/// Copyright (c) 2010, Salvatore Sanfilippo (antirez at gmail dot com) +/// Copyright (c) 2010, Pieter Noordhuis (pcnoordhuis at gmail dot com) +std::string replxx_now_ms_str() +{ + std::chrono::milliseconds ms(std::chrono::duration_cast(std::chrono::system_clock::now().time_since_epoch())); + time_t t = ms.count() / 1000; + tm broken; + if (!localtime_r(&t, &broken)) + { + return std::string(); + } + + static int const BUFF_SIZE(32); + char str[BUFF_SIZE]; + strftime(str, BUFF_SIZE, "%Y-%m-%d %H:%M:%S.", &broken); + snprintf(str + sizeof("YYYY-mm-dd HH:MM:SS"), 5, "%03d", static_cast(ms.count() % 1000)); + return str; +} + +/// Convert from readline to replxx format. +/// +/// replxx requires each history line to prepended with time line: +/// +/// ### YYYY-MM-DD HH:MM:SS.SSS +/// select 1 +/// +/// And w/o those service lines it will load all lines from history file as +/// one history line for suggestion. And if there are lots of lines in file it +/// will take lots of time (getline() + tons of reallocations). +/// +/// NOTE: this code uses std::ifstream/std::ofstream like original replxx code. +void convertHistoryFile(const std::string & path, replxx::Replxx & rx) +{ + std::ifstream in(path); + if (!in) + { + rx.print("Cannot open %s reading (for conversion): %s\n", + path.c_str(), errnoToString(errno).c_str()); + return; + } + + std::string line; + if (!getline(in, line).good()) + { + rx.print("Cannot read from %s (for conversion): %s\n", + path.c_str(), errnoToString(errno).c_str()); + return; + } + + /// This is the marker of the date, no need to convert. + static char const REPLXX_TIMESTAMP_PATTERN[] = "### dddd-dd-dd dd:dd:dd.ddd"; + if (line.starts_with("### ") && line.size() == strlen(REPLXX_TIMESTAMP_PATTERN)) + { + return; + } + + std::vector lines; + in.seekg(0); + while (getline(in, line).good()) + { + lines.push_back(line); + } + in.close(); + + size_t lines_size = lines.size(); + std::sort(lines.begin(), lines.end()); + lines.erase(std::unique(lines.begin(), lines.end()), lines.end()); + rx.print("The history file (%s) is in old format. %zu lines, %zu unique lines.\n", + path.c_str(), lines_size, lines.size()); + + std::ofstream out(path); + if (!out) + { + rx.print("Cannot open %s for writing (for conversion): %s\n", + path.c_str(), errnoToString(errno).c_str()); + return; + } + + const std::string & timestamp = replxx_now_ms_str(); + for (const auto & out_line : lines) + { + out << "### " << timestamp << "\n" << out_line << std::endl; + } + out.close(); +} + } ReplxxLineReader::ReplxxLineReader( @@ -47,6 +136,8 @@ ReplxxLineReader::ReplxxLineReader( } else { + convertHistoryFile(history_file_path, rx); + if (flock(history_file_fd, LOCK_SH)) { rx.print("Shared lock of history file failed: %s\n", errnoToString(errno).c_str()); diff --git a/contrib/libpq b/contrib/libpq index c7624588ddd..e071ea570f8 160000 --- a/contrib/libpq +++ b/contrib/libpq @@ -1 +1 @@ -Subproject commit c7624588ddd84f153dd5990e81b886e4568bddde +Subproject commit e071ea570f8985aa00e34f5b9d50a3cfe666327e diff --git a/contrib/libpq-cmake/CMakeLists.txt b/contrib/libpq-cmake/CMakeLists.txt index 028fabe52b8..4f6a1554d10 100644 --- a/contrib/libpq-cmake/CMakeLists.txt +++ b/contrib/libpq-cmake/CMakeLists.txt @@ -8,7 +8,7 @@ set(SRCS "${LIBPQ_SOURCE_DIR}/fe-lobj.c" "${LIBPQ_SOURCE_DIR}/fe-misc.c" "${LIBPQ_SOURCE_DIR}/fe-print.c" - "${LIBPQ_SOURCE_DIR}/fe-protocol2.c" + "${LIBPQ_SOURCE_DIR}/fe-trace.c" "${LIBPQ_SOURCE_DIR}/fe-protocol3.c" "${LIBPQ_SOURCE_DIR}/fe-secure.c" "${LIBPQ_SOURCE_DIR}/fe-secure-common.c" @@ -18,8 +18,12 @@ set(SRCS "${LIBPQ_SOURCE_DIR}/pqexpbuffer.c" "${LIBPQ_SOURCE_DIR}/common/scram-common.c" - "${LIBPQ_SOURCE_DIR}/common/sha2_openssl.c" + "${LIBPQ_SOURCE_DIR}/common/sha2.c" + "${LIBPQ_SOURCE_DIR}/common/sha1.c" "${LIBPQ_SOURCE_DIR}/common/md5.c" + "${LIBPQ_SOURCE_DIR}/common/md5_common.c" + "${LIBPQ_SOURCE_DIR}/common/hmac_openssl.c" + "${LIBPQ_SOURCE_DIR}/common/cryptohash.c" "${LIBPQ_SOURCE_DIR}/common/saslprep.c" "${LIBPQ_SOURCE_DIR}/common/unicode_norm.c" "${LIBPQ_SOURCE_DIR}/common/ip.c" diff --git a/debian/clickhouse-server.init b/debian/clickhouse-server.init index d7d87c6d53c..4e70d382b36 100755 --- a/debian/clickhouse-server.init +++ b/debian/clickhouse-server.init @@ -43,29 +43,6 @@ command -v flock >/dev/null && FLOCK=flock # Override defaults from optional config file test -f /etc/default/clickhouse && . /etc/default/clickhouse -# On x86_64, check for required instruction set. -if uname -mpi | grep -q 'x86_64'; then - if ! grep -q 'sse4_2' /proc/cpuinfo; then - # On KVM, cpuinfo could falsely not report SSE 4.2 support, so skip the check. - if ! grep -q 'Common KVM processor' /proc/cpuinfo; then - - # Some other VMs also report wrong flags in cpuinfo. - # Tricky way to test for instruction set: - # create temporary binary and run it; - # if it get caught illegal instruction signal, - # then required instruction set is not supported really. - # - # Generated this way: - # gcc -xc -Os -static -nostdlib - <<< 'void _start() { __asm__("pcmpgtq %%xmm0, %%xmm1; mov $0x3c, %%rax; xor %%rdi, %%rdi; syscall":::"memory"); }' && strip -R .note.gnu.build-id -R .comment -R .eh_frame -s ./a.out && gzip -c -9 ./a.out | base64 -w0; echo - - if ! (echo -n 'H4sICAwAW1cCA2Eub3V0AKt39XFjYmRkgAEmBjsGEI+H0QHMd4CKGyCUAMUsGJiBJDNQNUiYlQEZOKDQclB9cnD9CmCSBYqJBRxQOvBpSQobGfqIAWn8FuYnPI4fsAGyPQz/87MeZtArziguKSpJTGLQK0mtKGGgGHADMSgoYH6AhTMPNHyE0NQzYuEzYzEXFr6CBPQDANAsXKTwAQAA' | base64 -d | gzip -d > /tmp/clickhouse_test_sse42 && chmod a+x /tmp/clickhouse_test_sse42 && /tmp/clickhouse_test_sse42); then - echo 'Warning! SSE 4.2 instruction set is not supported' - #exit 3 - fi - fi - fi -fi - die() { @@ -116,7 +93,7 @@ forcestop() service_or_func() { if [ -x "/bin/systemctl" ] && [ -f /etc/systemd/system/clickhouse-server.service ] && [ -d /run/systemd/system ]; then - service $PROGRAM $1 + systemctl $1 $PROGRAM else $1 fi diff --git a/docker/test/base/Dockerfile b/docker/test/base/Dockerfile index 44b9d42d6a1..a722132c3a5 100644 --- a/docker/test/base/Dockerfile +++ b/docker/test/base/Dockerfile @@ -46,6 +46,7 @@ RUN apt-get update \ pigz \ pkg-config \ tzdata \ + pv \ --yes --no-install-recommends # Sanitizer options for services (clickhouse-server) diff --git a/docker/test/fasttest/run.sh b/docker/test/fasttest/run.sh index 9fd2212e2dc..6efdbf6178c 100755 --- a/docker/test/fasttest/run.sh +++ b/docker/test/fasttest/run.sh @@ -382,6 +382,9 @@ function run_tests # needs psql 01889_postgresql_protocol_null_fields + + # needs pv + 01923_network_receive_time_metric_insert ) time clickhouse-test --hung-check -j 8 --order=random --use-skip-list \ diff --git a/docker/test/performance-comparison/compare.sh b/docker/test/performance-comparison/compare.sh index dad4362b3d1..c3447c17d35 100755 --- a/docker/test/performance-comparison/compare.sh +++ b/docker/test/performance-comparison/compare.sh @@ -319,14 +319,14 @@ function get_profiles wait - clickhouse-client --port $LEFT_SERVER_PORT --query "select * from system.query_log where type = 2 format TSVWithNamesAndTypes" > left-query-log.tsv ||: & + clickhouse-client --port $LEFT_SERVER_PORT --query "select * from system.query_log where type = 'QueryFinish' format TSVWithNamesAndTypes" > left-query-log.tsv ||: & clickhouse-client --port $LEFT_SERVER_PORT --query "select * from system.query_thread_log format TSVWithNamesAndTypes" > left-query-thread-log.tsv ||: & clickhouse-client --port $LEFT_SERVER_PORT --query "select * from system.trace_log format TSVWithNamesAndTypes" > left-trace-log.tsv ||: & clickhouse-client --port $LEFT_SERVER_PORT --query "select arrayJoin(trace) addr, concat(splitByChar('/', addressToLine(addr))[-1], '#', demangle(addressToSymbol(addr)) ) name from system.trace_log group by addr format TSVWithNamesAndTypes" > left-addresses.tsv ||: & clickhouse-client --port $LEFT_SERVER_PORT --query "select * from system.metric_log format TSVWithNamesAndTypes" > left-metric-log.tsv ||: & clickhouse-client --port $LEFT_SERVER_PORT --query "select * from system.asynchronous_metric_log format TSVWithNamesAndTypes" > left-async-metric-log.tsv ||: & - clickhouse-client --port $RIGHT_SERVER_PORT --query "select * from system.query_log where type = 2 format TSVWithNamesAndTypes" > right-query-log.tsv ||: & + clickhouse-client --port $RIGHT_SERVER_PORT --query "select * from system.query_log where type = 'QueryFinish' format TSVWithNamesAndTypes" > right-query-log.tsv ||: & clickhouse-client --port $RIGHT_SERVER_PORT --query "select * from system.query_thread_log format TSVWithNamesAndTypes" > right-query-thread-log.tsv ||: & clickhouse-client --port $RIGHT_SERVER_PORT --query "select * from system.trace_log format TSVWithNamesAndTypes" > right-trace-log.tsv ||: & clickhouse-client --port $RIGHT_SERVER_PORT --query "select arrayJoin(trace) addr, concat(splitByChar('/', addressToLine(addr))[-1], '#', demangle(addressToSymbol(addr)) ) name from system.trace_log group by addr format TSVWithNamesAndTypes" > right-addresses.tsv ||: & @@ -409,10 +409,10 @@ create view right_query_log as select * '$(cat "right-query-log.tsv.columns")'); create view query_logs as - select 0 version, query_id, ProfileEvents.keys, ProfileEvents.values, + select 0 version, query_id, ProfileEvents, query_duration_ms, memory_usage from left_query_log union all - select 1 version, query_id, ProfileEvents.keys, ProfileEvents.values, + select 1 version, query_id, ProfileEvents, query_duration_ms, memory_usage from right_query_log ; @@ -424,7 +424,7 @@ create table query_run_metric_arrays engine File(TSV, 'analyze/query-run-metric- with ( -- sumMapState with the list of all keys with '-0.' values. Negative zero is because -- sumMap removes keys with positive zeros. - with (select groupUniqArrayArray(ProfileEvents.keys) from query_logs) as all_names + with (select groupUniqArrayArray(mapKeys(ProfileEvents)) from query_logs) as all_names select arrayReduce('sumMapState', [(all_names, arrayMap(x->-0., all_names))]) ) as all_metrics select test, query_index, version, query_id, @@ -433,8 +433,8 @@ create table query_run_metric_arrays engine File(TSV, 'analyze/query-run-metric- [ all_metrics, arrayReduce('sumMapState', - [(ProfileEvents.keys, - arrayMap(x->toFloat64(x), ProfileEvents.values))] + [(mapKeys(ProfileEvents), + arrayMap(x->toFloat64(x), mapValues(ProfileEvents)))] ), arrayReduce('sumMapState', [( ['client_time', 'server_time', 'memory_usage'], @@ -1003,10 +1003,11 @@ create view query_log as select * create table unstable_run_metrics engine File(TSVWithNamesAndTypes, 'unstable-run-metrics.$version.rep') as - select - test, query_index, query_id, - ProfileEvents.values value, ProfileEvents.keys metric - from query_log array join ProfileEvents + select test, query_index, query_id, value, metric + from query_log + array join + mapValues(ProfileEvents) as value, + mapKeys(ProfileEvents) as metric join unstable_query_runs using (query_id) ; diff --git a/docs/en/introduction/adopters.md b/docs/en/introduction/adopters.md index fdced7f354c..47927cd306a 100644 --- a/docs/en/introduction/adopters.md +++ b/docs/en/introduction/adopters.md @@ -155,5 +155,6 @@ toc_title: Adopters | Argedor | ClickHouse support | — | — | — | [Official website](https://www.argedor.com/en/clickhouse/) | | SigNoz | Observability Platform | Main Product | — | — | [Source code](https://github.com/SigNoz/signoz) | | ChelPipe Group | Analytics | — | — | — | [Blog post, June 2021](https://vc.ru/trade/253172-tyazhelomu-proizvodstvu-user-friendly-sayt-internet-magazin-trub-dlya-chtpz) | +| Zagrava Trading | — | — | — | — | [Job offer, May 2021](https://twitter.com/datastackjobs/status/1394707267082063874) | [Original article](https://clickhouse.tech/docs/en/introduction/adopters/) diff --git a/docs/en/operations/system-tables/asynchronous_metric_log.md b/docs/en/operations/system-tables/asynchronous_metric_log.md index 5dcfca5fbda..b0480dc256a 100644 --- a/docs/en/operations/system-tables/asynchronous_metric_log.md +++ b/docs/en/operations/system-tables/asynchronous_metric_log.md @@ -36,4 +36,4 @@ SELECT * FROM system.asynchronous_metric_log LIMIT 10 - [system.asynchronous_metrics](../system-tables/asynchronous_metrics.md) — Contains metrics, calculated periodically in the background. - [system.metric_log](../system-tables/metric_log.md) — Contains history of metrics values from tables `system.metrics` and `system.events`, periodically flushed to disk. -[Original article](https://clickhouse.tech/docs/en/operations/system_tables/asynchronous_metric_log) +[Original article](https://clickhouse.tech/docs/en/operations/system-tables/asynchronous_metric_log) diff --git a/docs/en/operations/system-tables/asynchronous_metrics.md b/docs/en/operations/system-tables/asynchronous_metrics.md index b27434793c7..fc801aa1c80 100644 --- a/docs/en/operations/system-tables/asynchronous_metrics.md +++ b/docs/en/operations/system-tables/asynchronous_metrics.md @@ -33,6 +33,6 @@ SELECT * FROM system.asynchronous_metrics LIMIT 10 - [Monitoring](../../operations/monitoring.md) — Base concepts of ClickHouse monitoring. - [system.metrics](../../operations/system-tables/metrics.md#system_tables-metrics) — Contains instantly calculated metrics. - [system.events](../../operations/system-tables/events.md#system_tables-events) — Contains a number of events that have occurred. -- [system.metric_log](../../operations/system-tables/metric_log.md#system_tables-metric_log) — Contains a history of metrics values from tables `system.metrics` и `system.events`. +- [system.metric_log](../../operations/system-tables/metric_log.md#system_tables-metric_log) — Contains a history of metrics values from tables `system.metrics` and `system.events`. - [Original article](https://clickhouse.tech/docs/en/operations/system_tables/asynchronous_metrics) \ No newline at end of file + [Original article](https://clickhouse.tech/docs/en/operations/system-tables/asynchronous_metrics) diff --git a/docs/en/operations/system-tables/clusters.md b/docs/en/operations/system-tables/clusters.md index 096eca12e7d..16cf183de53 100644 --- a/docs/en/operations/system-tables/clusters.md +++ b/docs/en/operations/system-tables/clusters.md @@ -68,4 +68,4 @@ estimated_recovery_time: 0 - [distributed_replica_error_cap setting](../../operations/settings/settings.md#settings-distributed_replica_error_cap) - [distributed_replica_error_half_life setting](../../operations/settings/settings.md#settings-distributed_replica_error_half_life) -[Original article](https://clickhouse.tech/docs/en/operations/system_tables/clusters) +[Original article](https://clickhouse.tech/docs/en/operations/system-tables/clusters) diff --git a/docs/en/operations/system-tables/columns.md b/docs/en/operations/system-tables/columns.md index 9160dca9a1a..2a8009dddee 100644 --- a/docs/en/operations/system-tables/columns.md +++ b/docs/en/operations/system-tables/columns.md @@ -69,4 +69,21 @@ is_in_sampling_key: 0 compression_codec: ``` -[Original article](https://clickhouse.tech/docs/en/operations/system_tables/columns) +The `system.columns` table contains the following columns (the column type is shown in brackets): + +- `database` (String) — Database name. +- `table` (String) — Table name. +- `name` (String) — Column name. +- `type` (String) — Column type. +- `default_kind` (String) — Expression type (`DEFAULT`, `MATERIALIZED`, `ALIAS`) for the default value, or an empty string if it is not defined. +- `default_expression` (String) — Expression for the default value, or an empty string if it is not defined. +- `data_compressed_bytes` (UInt64) — The size of compressed data, in bytes. +- `data_uncompressed_bytes` (UInt64) — The size of decompressed data, in bytes. +- `marks_bytes` (UInt64) — The size of marks, in bytes. +- `comment` (String) — Comment on the column, or an empty string if it is not defined. +- `is_in_partition_key` (UInt8) — Flag that indicates whether the column is in the partition expression. +- `is_in_sorting_key` (UInt8) — Flag that indicates whether the column is in the sorting key expression. +- `is_in_primary_key` (UInt8) — Flag that indicates whether the column is in the primary key expression. +- `is_in_sampling_key` (UInt8) — Flag that indicates whether the column is in the sampling key expression. + +[Original article](https://clickhouse.tech/docs/en/operations/system-tables/columns) diff --git a/docs/en/operations/system-tables/contributors.md b/docs/en/operations/system-tables/contributors.md index 37d01ef6204..a718c403c11 100644 --- a/docs/en/operations/system-tables/contributors.md +++ b/docs/en/operations/system-tables/contributors.md @@ -38,4 +38,4 @@ SELECT * FROM system.contributors WHERE name = 'Olga Khvostikova' │ Olga Khvostikova │ └──────────────────┘ ``` -[Original article](https://clickhouse.tech/docs/en/operations/system_tables/contributors) +[Original article](https://clickhouse.tech/docs/en/operations/system-tables/contributors) diff --git a/docs/en/operations/system-tables/current-roles.md b/docs/en/operations/system-tables/current-roles.md index f10dbe69918..56dbb602637 100644 --- a/docs/en/operations/system-tables/current-roles.md +++ b/docs/en/operations/system-tables/current-roles.md @@ -8,4 +8,4 @@ Columns: - `with_admin_option` ([UInt8](../../sql-reference/data-types/int-uint.md#uint-ranges)) — Flag that shows whether `current_role` is a role with `ADMIN OPTION` privilege. - `is_default` ([UInt8](../../sql-reference/data-types/int-uint.md#uint-ranges)) — Flag that shows whether `current_role` is a default role. - [Original article](https://clickhouse.tech/docs/en/operations/system_tables/current-roles) + [Original article](https://clickhouse.tech/docs/en/operations/system-tables/current-roles) diff --git a/docs/en/operations/system-tables/data_type_families.md b/docs/en/operations/system-tables/data_type_families.md index 4e439f13aa5..fdce9c33b37 100644 --- a/docs/en/operations/system-tables/data_type_families.md +++ b/docs/en/operations/system-tables/data_type_families.md @@ -33,4 +33,4 @@ SELECT * FROM system.data_type_families WHERE alias_to = 'String' - [Syntax](../../sql-reference/syntax.md) — Information about supported syntax. -[Original article](https://clickhouse.tech/docs/en/operations/system_tables/data_type_families) +[Original article](https://clickhouse.tech/docs/en/operations/system-tables/data_type_families) diff --git a/docs/en/operations/system-tables/databases.md b/docs/en/operations/system-tables/databases.md index 8ef5551d9b0..2c78fd25c2b 100644 --- a/docs/en/operations/system-tables/databases.md +++ b/docs/en/operations/system-tables/databases.md @@ -35,4 +35,4 @@ SELECT * FROM system.databases └────────────────────────────────┴────────┴────────────────────────────┴─────────────────────────────────────────────────────────────────────┴──────────────────────────────────────┘ ``` -[Original article](https://clickhouse.tech/docs/en/operations/system_tables/databases) +[Original article](https://clickhouse.tech/docs/en/operations/system-tables/databases) diff --git a/docs/en/operations/system-tables/detached_parts.md b/docs/en/operations/system-tables/detached_parts.md index ade89bd40c4..a5748128426 100644 --- a/docs/en/operations/system-tables/detached_parts.md +++ b/docs/en/operations/system-tables/detached_parts.md @@ -8,4 +8,4 @@ For the description of other columns, see [system.parts](../../operations/system If part name is invalid, values of some columns may be `NULL`. Such parts can be deleted with [ALTER TABLE DROP DETACHED PART](../../sql-reference/statements/alter/partition.md#alter_drop-detached). -[Original article](https://clickhouse.tech/docs/en/operations/system_tables/detached_parts) +[Original article](https://clickhouse.tech/docs/en/operations/system-tables/detached_parts) diff --git a/docs/en/operations/system-tables/dictionaries.md b/docs/en/operations/system-tables/dictionaries.md index 2bc1be51f19..a34e893599c 100644 --- a/docs/en/operations/system-tables/dictionaries.md +++ b/docs/en/operations/system-tables/dictionaries.md @@ -61,4 +61,4 @@ SELECT * FROM system.dictionaries └──────────┴──────┴────────┴─────────────┴──────┴────────┴──────────────────────────────────────┴─────────────────────┴─────────────────┴─────────────┴──────────┴───────────────┴───────────────────────┴────────────────────────────┴──────────────┴──────────────┴─────────────────────┴──────────────────────────────┘───────────────────────┴────────────────┘ ``` -[Original article](https://clickhouse.tech/docs/en/operations/system_tables/dictionaries) +[Original article](https://clickhouse.tech/docs/en/operations/system-tables/dictionaries) diff --git a/docs/en/operations/system-tables/disks.md b/docs/en/operations/system-tables/disks.md index e9d324580d8..833a0b3b16b 100644 --- a/docs/en/operations/system-tables/disks.md +++ b/docs/en/operations/system-tables/disks.md @@ -10,9 +10,6 @@ Columns: - `total_space` ([UInt64](../../sql-reference/data-types/int-uint.md)) — Disk volume in bytes. - `keep_free_space` ([UInt64](../../sql-reference/data-types/int-uint.md)) — Amount of disk space that should stay free on disk in bytes. Defined in the `keep_free_space_bytes` parameter of disk configuration. -[Original article](https://clickhouse.tech/docs/en/operations/system_tables/disks) - - **Example** ```sql @@ -27,5 +24,4 @@ Columns: 1 rows in set. Elapsed: 0.001 sec. ``` - - +[Original article](https://clickhouse.tech/docs/en/operations/system-tables/disks) diff --git a/docs/en/operations/system-tables/enabled-roles.md b/docs/en/operations/system-tables/enabled-roles.md index 27875fcf984..c03129b32dd 100644 --- a/docs/en/operations/system-tables/enabled-roles.md +++ b/docs/en/operations/system-tables/enabled-roles.md @@ -9,4 +9,4 @@ Columns: - `is_current` ([UInt8](../../sql-reference/data-types/int-uint.md#uint-ranges)) — Flag that shows whether `enabled_role` is a current role of a current user. - `is_default` ([UInt8](../../sql-reference/data-types/int-uint.md#uint-ranges)) — Flag that shows whether `enabled_role` is a default role. -[Original article](https://clickhouse.tech/docs/en/operations/system_tables/enabled-roles) \ No newline at end of file +[Original article](https://clickhouse.tech/docs/en/operations/system-tables/enabled-roles) diff --git a/docs/en/operations/system-tables/events.md b/docs/en/operations/system-tables/events.md index b4ced6e6bf6..2fcb5d8edec 100644 --- a/docs/en/operations/system-tables/events.md +++ b/docs/en/operations/system-tables/events.md @@ -31,4 +31,4 @@ SELECT * FROM system.events LIMIT 5 - [system.metric_log](../../operations/system-tables/metric_log.md#system_tables-metric_log) — Contains a history of metrics values from tables `system.metrics` и `system.events`. - [Monitoring](../../operations/monitoring.md) — Base concepts of ClickHouse monitoring. -[Original article](https://clickhouse.tech/docs/en/operations/system_tables/events) +[Original article](https://clickhouse.tech/docs/en/operations/system-tables/events) diff --git a/docs/en/operations/system-tables/functions.md b/docs/en/operations/system-tables/functions.md index fbcd4b7b723..888e768fc93 100644 --- a/docs/en/operations/system-tables/functions.md +++ b/docs/en/operations/system-tables/functions.md @@ -7,8 +7,6 @@ Columns: - `name`(`String`) – The name of the function. - `is_aggregate`(`UInt8`) — Whether the function is aggregate. -[Original article](https://clickhouse.tech/docs/en/operations/system_tables/functions) - **Example** ```sql @@ -30,4 +28,6 @@ Columns: └──────────────────────────┴──────────────┴──────────────────┴──────────┘ 10 rows in set. Elapsed: 0.002 sec. -``` \ No newline at end of file +``` + +[Original article](https://clickhouse.tech/docs/en/operations/system-tables/functions) diff --git a/docs/en/operations/system-tables/grants.md b/docs/en/operations/system-tables/grants.md index fb2a91ab30a..927fa4f3227 100644 --- a/docs/en/operations/system-tables/grants.md +++ b/docs/en/operations/system-tables/grants.md @@ -21,4 +21,4 @@ Columns: - `grant_option` ([UInt8](../../sql-reference/data-types/int-uint.md#uint-ranges)) — Permission is granted `WITH GRANT OPTION`, see [GRANT](../../sql-reference/statements/grant.md#grant-privigele-syntax). -[Original article](https://clickhouse.tech/docs/en/operations/system_tables/grants) +[Original article](https://clickhouse.tech/docs/en/operations/system-tables/grants) diff --git a/docs/en/operations/system-tables/graphite_retentions.md b/docs/en/operations/system-tables/graphite_retentions.md index 7ae5e0e36a8..0d56242dc95 100644 --- a/docs/en/operations/system-tables/graphite_retentions.md +++ b/docs/en/operations/system-tables/graphite_retentions.md @@ -14,4 +14,4 @@ Columns: - `Tables.database` (Array(String)) - Array of names of database tables that use the `config_name` parameter. - `Tables.table` (Array(String)) - Array of table names that use the `config_name` parameter. -[Original article](https://clickhouse.tech/docs/en/operations/system_tables/graphite_retentions) +[Original article](https://clickhouse.tech/docs/en/operations/system-tables/graphite_retentions) diff --git a/docs/en/operations/system-tables/licenses.md b/docs/en/operations/system-tables/licenses.md index c95e4e8b9b4..a9cada507c6 100644 --- a/docs/en/operations/system-tables/licenses.md +++ b/docs/en/operations/system-tables/licenses.md @@ -36,4 +36,4 @@ SELECT library_name, license_type, license_path FROM system.licenses LIMIT 15 ``` -[Original article](https://clickhouse.tech/docs/en/operations/system_tables/licenses) +[Original article](https://clickhouse.tech/docs/en/operations/system-tables/licenses) diff --git a/docs/en/operations/system-tables/merge_tree_settings.md b/docs/en/operations/system-tables/merge_tree_settings.md index c2c5703f869..309c1cbc9d1 100644 --- a/docs/en/operations/system-tables/merge_tree_settings.md +++ b/docs/en/operations/system-tables/merge_tree_settings.md @@ -51,4 +51,4 @@ type: SettingUInt64 4 rows in set. Elapsed: 0.001 sec. ``` -[Original article](https://clickhouse.tech/docs/en/operations/system_tables/merge_tree_settings) +[Original article](https://clickhouse.tech/docs/en/operations/system-tables/merge_tree_settings) diff --git a/docs/en/operations/system-tables/merges.md b/docs/en/operations/system-tables/merges.md index 3e712e2962c..c7bdaee42e1 100644 --- a/docs/en/operations/system-tables/merges.md +++ b/docs/en/operations/system-tables/merges.md @@ -22,4 +22,4 @@ Columns: - `merge_type` — The type of current merge. Empty if it's an mutation. - `merge_algorithm` — The algorithm used in current merge. Empty if it's an mutation. -[Original article](https://clickhouse.tech/docs/en/operations/system_tables/merges) +[Original article](https://clickhouse.tech/docs/en/operations/system-tables/merges) diff --git a/docs/en/operations/system-tables/metric_log.md b/docs/en/operations/system-tables/metric_log.md index 1f72c9a7358..ab149703309 100644 --- a/docs/en/operations/system-tables/metric_log.md +++ b/docs/en/operations/system-tables/metric_log.md @@ -48,4 +48,4 @@ CurrentMetric_DistributedFilesToInsert: 0 - [system.metrics](../../operations/system-tables/metrics.md) — Contains instantly calculated metrics. - [Monitoring](../../operations/monitoring.md) — Base concepts of ClickHouse monitoring. -[Original article](https://clickhouse.tech/docs/en/operations/system_tables/metric_log) +[Original article](https://clickhouse.tech/docs/en/operations/system-tables/metric_log) diff --git a/docs/en/operations/system-tables/metrics.md b/docs/en/operations/system-tables/metrics.md index decae8ea7fb..4afab40764b 100644 --- a/docs/en/operations/system-tables/metrics.md +++ b/docs/en/operations/system-tables/metrics.md @@ -38,4 +38,4 @@ SELECT * FROM system.metrics LIMIT 10 - [system.metric_log](../../operations/system-tables/metric_log.md#system_tables-metric_log) — Contains a history of metrics values from tables `system.metrics` и `system.events`. - [Monitoring](../../operations/monitoring.md) — Base concepts of ClickHouse monitoring. -[Original article](https://clickhouse.tech/docs/en/operations/system_tables/metrics) +[Original article](https://clickhouse.tech/docs/en/operations/system-tables/metrics) diff --git a/docs/en/operations/system-tables/mutations.md b/docs/en/operations/system-tables/mutations.md index e5ea7eab457..24fa559197c 100644 --- a/docs/en/operations/system-tables/mutations.md +++ b/docs/en/operations/system-tables/mutations.md @@ -45,4 +45,4 @@ If there were problems with mutating some data parts, the following columns cont - [MergeTree](../../engines/table-engines/mergetree-family/mergetree.md) table engine - [ReplicatedMergeTree](../../engines/table-engines/mergetree-family/replication.md) family -[Original article](https://clickhouse.tech/docs/en/operations/system_tables/mutations) +[Original article](https://clickhouse.tech/docs/en/operations/system-tables/mutations) diff --git a/docs/en/operations/system-tables/numbers.md b/docs/en/operations/system-tables/numbers.md index d1737c9abbb..bf948d9dd5b 100644 --- a/docs/en/operations/system-tables/numbers.md +++ b/docs/en/operations/system-tables/numbers.md @@ -29,4 +29,4 @@ Reads from this table are not parallelized. 10 rows in set. Elapsed: 0.001 sec. ``` -[Original article](https://clickhouse.tech/docs/en/operations/system_tables/numbers) +[Original article](https://clickhouse.tech/docs/en/operations/system-tables/numbers) diff --git a/docs/en/operations/system-tables/numbers_mt.md b/docs/en/operations/system-tables/numbers_mt.md index b40dc9a2d6f..d7df1bc1e0e 100644 --- a/docs/en/operations/system-tables/numbers_mt.md +++ b/docs/en/operations/system-tables/numbers_mt.md @@ -27,4 +27,4 @@ Used for tests. 10 rows in set. Elapsed: 0.001 sec. ``` -[Original article](https://clickhouse.tech/docs/en/operations/system_tables/numbers_mt) +[Original article](https://clickhouse.tech/docs/en/operations/system-tables/numbers_mt) diff --git a/docs/en/operations/system-tables/one.md b/docs/en/operations/system-tables/one.md index 51316dfbc44..10b2a1757d0 100644 --- a/docs/en/operations/system-tables/one.md +++ b/docs/en/operations/system-tables/one.md @@ -20,4 +20,4 @@ This is similar to the `DUAL` table found in other DBMSs. 1 rows in set. Elapsed: 0.001 sec. ``` -[Original article](https://clickhouse.tech/docs/en/operations/system_tables/one) +[Original article](https://clickhouse.tech/docs/en/operations/system-tables/one) diff --git a/docs/en/operations/system-tables/part_log.md b/docs/en/operations/system-tables/part_log.md index 3f9110349dd..b815d2366bb 100644 --- a/docs/en/operations/system-tables/part_log.md +++ b/docs/en/operations/system-tables/part_log.md @@ -66,4 +66,4 @@ error: 0 exception: ``` -[Original article](https://clickhouse.tech/docs/en/operations/system_tables/part_log) +[Original article](https://clickhouse.tech/docs/en/operations/system-tables/part_log) diff --git a/docs/en/operations/system-tables/parts.md b/docs/en/operations/system-tables/parts.md index 5a4715a4513..b9b5aa09b64 100644 --- a/docs/en/operations/system-tables/parts.md +++ b/docs/en/operations/system-tables/parts.md @@ -155,4 +155,4 @@ move_ttl_info.max: [] - [MergeTree family](../../engines/table-engines/mergetree-family/mergetree.md) - [TTL for Columns and Tables](../../engines/table-engines/mergetree-family/mergetree.md#table_engine-mergetree-ttl) -[Original article](https://clickhouse.tech/docs/en/operations/system_tables/parts) +[Original article](https://clickhouse.tech/docs/en/operations/system-tables/parts) diff --git a/docs/en/operations/system-tables/processes.md b/docs/en/operations/system-tables/processes.md index 0a469e144c5..6090c5e4555 100644 --- a/docs/en/operations/system-tables/processes.md +++ b/docs/en/operations/system-tables/processes.md @@ -14,7 +14,6 @@ Columns: - `query` (String) – The query text. For `INSERT`, it does not include the data to insert. - `query_id` (String) – Query ID, if defined. - ```sql :) SELECT * FROM system.processes LIMIT 10 FORMAT Vertical; ``` @@ -59,4 +58,4 @@ Settings: {'background_pool_size':'32','load_balancing':'random','al 1 rows in set. Elapsed: 0.002 sec. ``` -[Original article](https://clickhouse.tech/docs/en/operations/system_tables/processes) +[Original article](https://clickhouse.tech/docs/en/operations/system-tables/processes) diff --git a/docs/en/operations/system-tables/query_log.md b/docs/en/operations/system-tables/query_log.md index 57512a78ad0..d58e549616f 100644 --- a/docs/en/operations/system-tables/query_log.md +++ b/docs/en/operations/system-tables/query_log.md @@ -156,4 +156,4 @@ Settings: {'background_pool_size':'32','load_balancing':'random','al - [system.query_thread_log](../../operations/system-tables/query_thread_log.md#system_tables-query_thread_log) — This table contains information about each query execution thread. -[Original article](https://clickhouse.tech/docs/en/operations/system_tables/query_log) +[Original article](https://clickhouse.tech/docs/en/operations/system-tables/query_log) diff --git a/docs/en/operations/system-tables/query_thread_log.md b/docs/en/operations/system-tables/query_thread_log.md index 58af246056e..7ecea2971b4 100644 --- a/docs/en/operations/system-tables/query_thread_log.md +++ b/docs/en/operations/system-tables/query_thread_log.md @@ -113,4 +113,4 @@ ProfileEvents: {'Query':1,'SelectQuery':1,'ReadCompressedBytes':36,'Compr - [system.query_log](../../operations/system-tables/query_log.md#system_tables-query_log) — Description of the `query_log` system table which contains common information about queries execution. -[Original article](https://clickhouse.tech/docs/en/operations/system_tables/query_thread_log) +[Original article](https://clickhouse.tech/docs/en/operations/system-tables/query_thread_log) diff --git a/docs/en/operations/system-tables/quota_limits.md b/docs/en/operations/system-tables/quota_limits.md index 11616990206..0088b086e8c 100644 --- a/docs/en/operations/system-tables/quota_limits.md +++ b/docs/en/operations/system-tables/quota_limits.md @@ -17,3 +17,5 @@ Columns: - `max_read_rows` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — Maximum number of rows read from all tables and table functions participated in queries. - `max_read_bytes` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — Maximum number of bytes read from all tables and table functions participated in queries. - `max_execution_time` ([Nullable](../../sql-reference/data-types/nullable.md)([Float64](../../sql-reference/data-types/float.md))) — Maximum of the query execution time, in seconds. + +[Original article](https://clickhouse.tech/docs/en/operations/system-tables/quota_limits) diff --git a/docs/en/operations/system-tables/quota_usage.md b/docs/en/operations/system-tables/quota_usage.md index 89fdfe70069..2f35b6b7dae 100644 --- a/docs/en/operations/system-tables/quota_usage.md +++ b/docs/en/operations/system-tables/quota_usage.md @@ -28,3 +28,5 @@ Columns: ## See Also {#see-also} - [SHOW QUOTA](../../sql-reference/statements/show.md#show-quota-statement) + +[Original article](https://clickhouse.tech/docs/en/operations/system-tables/quota_usage) diff --git a/docs/en/operations/system-tables/quotas.md b/docs/en/operations/system-tables/quotas.md index 3e797c9bdc6..6acc349a54f 100644 --- a/docs/en/operations/system-tables/quotas.md +++ b/docs/en/operations/system-tables/quotas.md @@ -24,5 +24,5 @@ Columns: - [SHOW QUOTAS](../../sql-reference/statements/show.md#show-quotas-statement) -[Original article](https://clickhouse.tech/docs/en/operations/system_tables/quotas) +[Original article](https://clickhouse.tech/docs/en/operations/system-tables/quotas) diff --git a/docs/en/operations/system-tables/quotas_usage.md b/docs/en/operations/system-tables/quotas_usage.md index 04cf91cb990..6ba88cb935a 100644 --- a/docs/en/operations/system-tables/quotas_usage.md +++ b/docs/en/operations/system-tables/quotas_usage.md @@ -30,4 +30,6 @@ Columns: ## See Also {#see-also} -- [SHOW QUOTA](../../sql-reference/statements/show.md#show-quota-statement) \ No newline at end of file +- [SHOW QUOTA](../../sql-reference/statements/show.md#show-quota-statement) + +[Original article](https://clickhouse.tech/docs/en/operations/system-tables/quotas_usage) diff --git a/docs/en/operations/system-tables/replicas.md b/docs/en/operations/system-tables/replicas.md index 63a2141e399..5a6ec54723b 100644 --- a/docs/en/operations/system-tables/replicas.md +++ b/docs/en/operations/system-tables/replicas.md @@ -120,5 +120,5 @@ WHERE If this query does not return anything, it means that everything is fine. -[Original article](https://clickhouse.tech/docs/en/operations/system_tables/replicas) +[Original article](https://clickhouse.tech/docs/en/operations/system-tables/replicas) diff --git a/docs/en/operations/system-tables/role-grants.md b/docs/en/operations/system-tables/role-grants.md index 5eb18b0dca7..d90bc1f77be 100644 --- a/docs/en/operations/system-tables/role-grants.md +++ b/docs/en/operations/system-tables/role-grants.md @@ -18,4 +18,4 @@ Columns: - 1 — The role has `ADMIN OPTION` privilege. - 0 — The role without `ADMIN OPTION` privilege. -[Original article](https://clickhouse.tech/docs/en/operations/system_tables/role-grants) \ No newline at end of file +[Original article](https://clickhouse.tech/docs/en/operations/system-tables/role-grants) diff --git a/docs/en/operations/system-tables/roles.md b/docs/en/operations/system-tables/roles.md index 4ab5102dfc8..e68d5ed290a 100644 --- a/docs/en/operations/system-tables/roles.md +++ b/docs/en/operations/system-tables/roles.md @@ -12,4 +12,4 @@ Columns: - [SHOW ROLES](../../sql-reference/statements/show.md#show-roles-statement) -[Original article](https://clickhouse.tech/docs/en/operations/system_tables/roles) +[Original article](https://clickhouse.tech/docs/en/operations/system-tables/roles) diff --git a/docs/en/operations/system-tables/row_policies.md b/docs/en/operations/system-tables/row_policies.md index 97474d1b3ee..767270d64ae 100644 --- a/docs/en/operations/system-tables/row_policies.md +++ b/docs/en/operations/system-tables/row_policies.md @@ -31,4 +31,4 @@ Columns: - [SHOW POLICIES](../../sql-reference/statements/show.md#show-policies-statement) -[Original article](https://clickhouse.tech/docs/en/operations/system_tables/row_policies) +[Original article](https://clickhouse.tech/docs/en/operations/system-tables/row_policies) diff --git a/docs/en/operations/system-tables/settings.md b/docs/en/operations/system-tables/settings.md index 7034fe1204f..cfd9f43655a 100644 --- a/docs/en/operations/system-tables/settings.md +++ b/docs/en/operations/system-tables/settings.md @@ -50,4 +50,4 @@ SELECT * FROM system.settings WHERE changed AND name='load_balancing' - [Constraints on Settings](../../operations/settings/constraints-on-settings.md) - [SHOW SETTINGS](../../sql-reference/statements/show.md#show-settings) statement -[Original article](https://clickhouse.tech/docs/en/operations/system_tables/settings) +[Original article](https://clickhouse.tech/docs/en/operations/system-tables/settings) diff --git a/docs/en/operations/system-tables/settings_profile_elements.md b/docs/en/operations/system-tables/settings_profile_elements.md index d0f2c3c4527..3c8c728e645 100644 --- a/docs/en/operations/system-tables/settings_profile_elements.md +++ b/docs/en/operations/system-tables/settings_profile_elements.md @@ -27,4 +27,4 @@ Columns: - `inherit_profile` ([Nullable](../../sql-reference/data-types/nullable.md)([String](../../sql-reference/data-types/string.md))) — A parent profile for this setting profile. `NULL` if not set. Setting profile will inherit all the settings' values and constraints (`min`, `max`, `readonly`) from its parent profiles. -[Original article](https://clickhouse.tech/docs/en/operations/system_tables/settings_profile_elements) +[Original article](https://clickhouse.tech/docs/en/operations/system-tables/settings_profile_elements) diff --git a/docs/en/operations/system-tables/settings_profiles.md b/docs/en/operations/system-tables/settings_profiles.md index a06b26b9cb6..80dc5172f4e 100644 --- a/docs/en/operations/system-tables/settings_profiles.md +++ b/docs/en/operations/system-tables/settings_profiles.md @@ -21,4 +21,4 @@ Columns: - [SHOW PROFILES](../../sql-reference/statements/show.md#show-profiles-statement) -[Original article](https://clickhouse.tech/docs/en/operations/system_tables/settings_profiles) +[Original article](https://clickhouse.tech/docs/en/operations/system-tables/settings_profiles) diff --git a/docs/en/operations/system-tables/storage_policies.md b/docs/en/operations/system-tables/storage_policies.md index 5adab1cb2aa..4b07b593926 100644 --- a/docs/en/operations/system-tables/storage_policies.md +++ b/docs/en/operations/system-tables/storage_policies.md @@ -14,4 +14,4 @@ Columns: If the storage policy contains more then one volume, then information for each volume is stored in the individual row of the table. -[Original article](https://clickhouse.tech/docs/en/operations/system_tables/storage_policies) +[Original article](https://clickhouse.tech/docs/en/operations/system-tables/storage_policies) diff --git a/docs/en/operations/system-tables/table_engines.md b/docs/en/operations/system-tables/table_engines.md index 30122cb133e..45ff6f1ac19 100644 --- a/docs/en/operations/system-tables/table_engines.md +++ b/docs/en/operations/system-tables/table_engines.md @@ -35,4 +35,4 @@ WHERE name in ('Kafka', 'MergeTree', 'ReplicatedCollapsingMergeTree') - Kafka [settings](../../engines/table-engines/integrations/kafka.md#table_engine-kafka-creating-a-table) - Join [settings](../../engines/table-engines/special/join.md#join-limitations-and-settings) -[Original article](https://clickhouse.tech/docs/en/operations/system_tables/table_engines) +[Original article](https://clickhouse.tech/docs/en/operations/system-tables/table_engines) diff --git a/docs/en/operations/system-tables/tables.md b/docs/en/operations/system-tables/tables.md index 480db3087f6..4d7b20be311 100644 --- a/docs/en/operations/system-tables/tables.md +++ b/docs/en/operations/system-tables/tables.md @@ -117,4 +117,4 @@ lifetime_bytes: ᴺᵁᴸᴸ comment: ``` -[Original article](https://clickhouse.tech/docs/en/operations/system_tables/tables) +[Original article](https://clickhouse.tech/docs/en/operations/system-tables/tables) diff --git a/docs/en/operations/system-tables/text_log.md b/docs/en/operations/system-tables/text_log.md index f5f53c95653..ad95e91f0d2 100644 --- a/docs/en/operations/system-tables/text_log.md +++ b/docs/en/operations/system-tables/text_log.md @@ -50,4 +50,4 @@ source_file: /ClickHouse/src/Interpreters/DNSCacheUpdater.cpp; void source_line: 45 ``` - [Original article](https://clickhouse.tech/docs/en/operations/system_tables/text_log) \ No newline at end of file + [Original article](https://clickhouse.tech/docs/en/operations/system-tables/text_log) diff --git a/docs/en/operations/system-tables/time_zones.md b/docs/en/operations/system-tables/time_zones.md index 1b84ae7fe37..fa467124884 100644 --- a/docs/en/operations/system-tables/time_zones.md +++ b/docs/en/operations/system-tables/time_zones.md @@ -27,4 +27,4 @@ SELECT * FROM system.time_zones LIMIT 10 └────────────────────┘ ``` -[Original article](https://clickhouse.tech/docs/en/operations/system_tables/time_zones) +[Original article](https://clickhouse.tech/docs/en/operations/system-tables/time_zones) diff --git a/docs/en/operations/system-tables/trace_log.md b/docs/en/operations/system-tables/trace_log.md index e4c01a65d9d..5de597a0a51 100644 --- a/docs/en/operations/system-tables/trace_log.md +++ b/docs/en/operations/system-tables/trace_log.md @@ -55,4 +55,3 @@ size: 5244400 ``` [Original article](https://clickhouse.tech/docs/en/operations/system-tables/trace_log) - diff --git a/docs/en/operations/system-tables/users.md b/docs/en/operations/system-tables/users.md index 2227816aff3..11fdeb1e9ae 100644 --- a/docs/en/operations/system-tables/users.md +++ b/docs/en/operations/system-tables/users.md @@ -31,4 +31,4 @@ Columns: - [SHOW USERS](../../sql-reference/statements/show.md#show-users-statement) -[Original article](https://clickhouse.tech/docs/en/operations/system_tables/users) +[Original article](https://clickhouse.tech/docs/en/operations/system-tables/users) diff --git a/docs/en/operations/system-tables/zookeeper.md b/docs/en/operations/system-tables/zookeeper.md index 3b8db14934e..52d1c686e52 100644 --- a/docs/en/operations/system-tables/zookeeper.md +++ b/docs/en/operations/system-tables/zookeeper.md @@ -72,4 +72,4 @@ numChildren: 7 pzxid: 987021252247 path: /clickhouse/tables/01-08/visits/replicas ``` -[Original article](https://clickhouse.tech/docs/en/operations/system_tables/zookeeper) +[Original article](https://clickhouse.tech/docs/en/operations/system-tables/zookeeper) diff --git a/docs/en/sql-reference/functions/tuple-map-functions.md b/docs/en/sql-reference/functions/tuple-map-functions.md index 633b878ed47..dcfa18e04bf 100644 --- a/docs/en/sql-reference/functions/tuple-map-functions.md +++ b/docs/en/sql-reference/functions/tuple-map-functions.md @@ -70,23 +70,23 @@ Result: Collect all the keys and sum corresponding values. -**Syntax** +**Syntax** ``` sql -mapAdd(Tuple(Array, Array), Tuple(Array, Array) [, ...]) +mapAdd(arg1, arg2 [, ...]) ``` -**Arguments** +**Arguments** -Arguments are [tuples](../../sql-reference/data-types/tuple.md#tuplet1-t2) of two [arrays](../../sql-reference/data-types/array.md#data-type-array), where items in the first array represent keys, and the second array contains values for the each key. All key arrays should have same type, and all value arrays should contain items which are promote to the one type ([Int64](../../sql-reference/data-types/int-uint.md#int-ranges), [UInt64](../../sql-reference/data-types/int-uint.md#uint-ranges) or [Float64](../../sql-reference/data-types/float.md#float32-float64)). The common promoted type is used as a type for the result array. +Arguments are [maps](../../sql-reference/data-types/map.md) or [tuples](../../sql-reference/data-types/tuple.md#tuplet1-t2) of two [arrays](../../sql-reference/data-types/array.md#data-type-array), where items in the first array represent keys, and the second array contains values for the each key. All key arrays should have same type, and all value arrays should contain items which are promote to the one type ([Int64](../../sql-reference/data-types/int-uint.md#int-ranges), [UInt64](../../sql-reference/data-types/int-uint.md#uint-ranges) or [Float64](../../sql-reference/data-types/float.md#float32-float64)). The common promoted type is used as a type for the result array. **Returned value** -- Returns one [tuple](../../sql-reference/data-types/tuple.md#tuplet1-t2), where the first array contains the sorted keys and the second array contains values. +- Depending on the arguments returns one [map](../../sql-reference/data-types/map.md) or [tuple](../../sql-reference/data-types/tuple.md#tuplet1-t2), where the first array contains the sorted keys and the second array contains values. **Example** -Query: +Query with a tuple map: ``` sql SELECT mapAdd(([toUInt8(1), 2], [1, 1]), ([toUInt8(1), 2], [1, 1])) as res, toTypeName(res) as type; @@ -100,6 +100,11 @@ Result: └───────────────┴────────────────────────────────────┘ ``` +Query with `Map` type: + +``` sql +``` + ## mapSubtract {#function-mapsubtract} Collect all the keys and subtract corresponding values. diff --git a/docs/en/sql-reference/statements/alter/index/index.md b/docs/en/sql-reference/statements/alter/index/index.md index 56d81aaf52f..fd5657c3666 100644 --- a/docs/en/sql-reference/statements/alter/index/index.md +++ b/docs/en/sql-reference/statements/alter/index/index.md @@ -8,7 +8,7 @@ toc_title: INDEX The following operations are available: -- `ALTER TABLE [db].name ADD INDEX name expression TYPE type GRANULARITY value AFTER name [AFTER name2]` - Adds index description to tables metadata. +- `ALTER TABLE [db].name ADD INDEX name expression TYPE type GRANULARITY value [FIRST|AFTER name]` - Adds index description to tables metadata. - `ALTER TABLE [db].name DROP INDEX name` - Removes index description from tables metadata and deletes index files from disk. diff --git a/docs/ja/sql-reference/statements/alter.md b/docs/ja/sql-reference/statements/alter.md index 226565dd226..0967f60e06a 100644 --- a/docs/ja/sql-reference/statements/alter.md +++ b/docs/ja/sql-reference/statements/alter.md @@ -175,7 +175,7 @@ MODIFY ORDER BY new_expression [複製](../../engines/table-engines/mergetree-family/replication.md) テーブル)。 次の操作 利用できます: -- `ALTER TABLE [db].name ADD INDEX name expression TYPE type GRANULARITY value AFTER name [AFTER name2]` -付加価指数の説明をテーブルメタデータを指すものとします。 +- `ALTER TABLE [db].name ADD INDEX name expression TYPE type GRANULARITY value [FIRST|AFTER name]` -付加価指数の説明をテーブルメタデータを指すものとします。 - `ALTER TABLE [db].name DROP INDEX name` -除去す指標の説明からテーブルメタデータを削除を行指数のファイルからディスク。 diff --git a/docs/ru/sql-reference/statements/alter/index/index.md b/docs/ru/sql-reference/statements/alter/index/index.md index 632f11ed906..1f6bbea5c4b 100644 --- a/docs/ru/sql-reference/statements/alter/index/index.md +++ b/docs/ru/sql-reference/statements/alter/index/index.md @@ -9,7 +9,7 @@ toc_title: "Манипуляции с индексами" Добавить или удалить индекс можно с помощью операций ``` sql -ALTER TABLE [db.]name ADD INDEX name expression TYPE type GRANULARITY value [AFTER name] +ALTER TABLE [db.]name ADD INDEX name expression TYPE type GRANULARITY value [FIRST|AFTER name] ALTER TABLE [db.]name DROP INDEX name ALTER TABLE [db.]table MATERIALIZE INDEX name IN PARTITION partition_name ``` diff --git a/docs/zh/engines/table-engines/integrations/embedded-rocksdb.md b/docs/zh/engines/table-engines/integrations/embedded-rocksdb.md new file mode 100644 index 00000000000..7c04600894e --- /dev/null +++ b/docs/zh/engines/table-engines/integrations/embedded-rocksdb.md @@ -0,0 +1,42 @@ +--- +toc_priority: 9 +toc_title: EmbeddedRocksDB +--- + +# EmbeddedRocksDB 引擎 {#EmbeddedRocksDB-engine} + +这个引擎允许 ClickHouse 与 [rocksdb](http://rocksdb.org/) 进行集成。 + +## 创建一张表 {#table_engine-EmbeddedRocksDB-creating-a-table} + +``` sql +CREATE TABLE [IF NOT EXISTS] [db.]table_name [ON CLUSTER cluster] +( + name1 [type1] [DEFAULT|MATERIALIZED|ALIAS expr1], + name2 [type2] [DEFAULT|MATERIALIZED|ALIAS expr2], + ... +) ENGINE = EmbeddedRocksDB PRIMARY KEY(primary_key_name) +``` + +必要参数: + +- `primary_key_name` – any column name in the column list. +- 必须指定 `primary key`, 仅支持主键中的一个列. 主键将被序列化为二进制的`rocksdb key`. +- 主键以外的列将以相应的顺序在二进制中序列化为`rocksdb`值. +- 带有键 `equals` 或 `in` 过滤的查询将被优化为从 `rocksdb` 进行多键查询. + +示例: + +``` sql +CREATE TABLE test +( + `key` String, + `v1` UInt32, + `v2` String, + `v3` Float32, +) +ENGINE = EmbeddedRocksDB +PRIMARY KEY key +``` + +[原始文章](https://clickhouse.tech/docs/en/engines/table-engines/integrations/embedded-rocksdb/) diff --git a/docs/zh/engines/table-engines/integrations/index.md b/docs/zh/engines/table-engines/integrations/index.md index 17e9d204aa6..0c34ae078a0 100644 --- a/docs/zh/engines/table-engines/integrations/index.md +++ b/docs/zh/engines/table-engines/integrations/index.md @@ -1,8 +1,21 @@ --- -machine_translated: true -machine_translated_rev: 72537a2d527c63c07aa5d2361a8829f3895cf2bd toc_folder_title: "\u96C6\u6210" toc_priority: 30 --- +# 集成的表引擎 {#table-engines-for-integrations} +ClickHouse 提供了多种方式来与外部系统集成,包括表引擎。像所有其他的表引擎一样,使用`CREATE TABLE`或`ALTER TABLE`查询语句来完成配置。然后从用户的角度来看,配置的集成看起来像查询一个正常的表,但对它的查询是代理给外部系统的。这种透明的查询是这种方法相对于其他集成方法的主要优势之一,比如外部字典或表函数,它们需要在每次使用时使用自定义查询方法。 + +以下是支持的集成方式: + +- [ODBC](../../../engines/table-engines/integrations/odbc.md) +- [JDBC](../../../engines/table-engines/integrations/jdbc.md) +- [MySQL](../../../engines/table-engines/integrations/mysql.md) +- [MongoDB](../../../engines/table-engines/integrations/mongodb.md) +- [HDFS](../../../engines/table-engines/integrations/hdfs.md) +- [S3](../../../engines/table-engines/integrations/s3.md) +- [Kafka](../../../engines/table-engines/integrations/kafka.md) +- [EmbeddedRocksDB](../../../engines/table-engines/integrations/embedded-rocksdb.md) +- [RabbitMQ](../../../engines/table-engines/integrations/rabbitmq.md) +- [PostgreSQL](../../../engines/table-engines/integrations/postgresql.md) diff --git a/docs/zh/engines/table-engines/integrations/rabbitmq.md b/docs/zh/engines/table-engines/integrations/rabbitmq.md new file mode 100644 index 00000000000..a4a5be5f685 --- /dev/null +++ b/docs/zh/engines/table-engines/integrations/rabbitmq.md @@ -0,0 +1,167 @@ +--- +toc_priority: 10 +toc_title: RabbitMQ +--- + +# RabbitMQ 引擎 {#rabbitmq-engine} + +该引擎允许 ClickHouse 与 [RabbitMQ](https://www.rabbitmq.com) 进行集成. + +`RabbitMQ` 可以让你: + +- 发布或订阅数据流。 +- 在数据流可用时进行处理。 + +## 创建一张表 {#table_engine-rabbitmq-creating-a-table} + +``` sql +CREATE TABLE [IF NOT EXISTS] [db.]table_name [ON CLUSTER cluster] +( + name1 [type1] [DEFAULT|MATERIALIZED|ALIAS expr1], + name2 [type2] [DEFAULT|MATERIALIZED|ALIAS expr2], + ... +) ENGINE = RabbitMQ SETTINGS + rabbitmq_host_port = 'host:port', + rabbitmq_exchange_name = 'exchange_name', + rabbitmq_format = 'data_format'[,] + [rabbitmq_exchange_type = 'exchange_type',] + [rabbitmq_routing_key_list = 'key1,key2,...',] + [rabbitmq_row_delimiter = 'delimiter_symbol',] + [rabbitmq_schema = '',] + [rabbitmq_num_consumers = N,] + [rabbitmq_num_queues = N,] + [rabbitmq_queue_base = 'queue',] + [rabbitmq_deadletter_exchange = 'dl-exchange',] + [rabbitmq_persistent = 0,] + [rabbitmq_skip_broken_messages = N,] + [rabbitmq_max_block_size = N,] + [rabbitmq_flush_interval_ms = N] +``` + +必要参数: + +- `rabbitmq_host_port` – 主机名:端口号 (比如, `localhost:5672`). +- `rabbitmq_exchange_name` – RabbitMQ exchange 名称. +- `rabbitmq_format` – 消息格式. 使用与SQL`FORMAT`函数相同的标记,如`JSONEachRow`。 更多信息,请参阅 [Formats](../../../interfaces/formats.md) 部分. + +可选参数: + +- `rabbitmq_exchange_type` – RabbitMQ exchange 的类型: `direct`, `fanout`, `topic`, `headers`, `consistent_hash`. 默认是: `fanout`. +- `rabbitmq_routing_key_list` – 一个以逗号分隔的路由键列表. +- `rabbitmq_row_delimiter` – 用于消息结束的分隔符. +- `rabbitmq_schema` – 如果格式需要模式定义,必须使用该参数。比如, [Cap’n Proto](https://capnproto.org/) 需要模式文件的路径以及根 `schema.capnp:Message` 对象的名称. +- `rabbitmq_num_consumers` – 每个表的消费者数量。默认:`1`。如果一个消费者的吞吐量不够,可以指定更多的消费者. +- `rabbitmq_num_queues` – 队列的总数。默认值: `1`. 增加这个数字可以显著提高性能. +- `rabbitmq_queue_base` - 指定一个队列名称的提示。这个设置的使用情况如下. +- `rabbitmq_deadletter_exchange` - 为[dead letter exchange](https://www.rabbitmq.com/dlx.html)指定名称。你可以用这个 exchange 的名称创建另一个表,并在消息被重新发布到 dead letter exchange 的情况下收集它们。默认情况下,没有指定 dead letter exchange。Specify name for a [dead letter exchange](https://www.rabbitmq.com/dlx.html). +- `rabbitmq_persistent` - 如果设置为 1 (true), 在插入查询中交付模式将被设置为 2 (将消息标记为 'persistent'). 默认是: `0`. +- `rabbitmq_skip_broken_messages` – RabbitMQ 消息解析器对每块模式不兼容消息的容忍度。默认值:`0`. 如果 `rabbitmq_skip_broken_messages = N`,那么引擎将跳过 *N* 个无法解析的 RabbitMQ 消息(一条消息等于一行数据)。 +- `rabbitmq_max_block_size` +- `rabbitmq_flush_interval_ms` + +同时,格式的设置也可以与 rabbitmq 相关的设置一起添加。 + +示例: + +``` sql + CREATE TABLE queue ( + key UInt64, + value UInt64, + date DateTime + ) ENGINE = RabbitMQ SETTINGS rabbitmq_host_port = 'localhost:5672', + rabbitmq_exchange_name = 'exchange1', + rabbitmq_format = 'JSONEachRow', + rabbitmq_num_consumers = 5, + date_time_input_format = 'best_effort'; +``` + +RabbitMQ 服务器配置应使用 ClickHouse 配置文件添加。 + +必要配置: + +``` xml + + root + clickhouse + +``` + +可选配置: + +``` xml + + clickhouse + +``` + +## 描述 {#description} + +`SELECT`对于读取消息不是特别有用(除了调试),因为每个消息只能读取一次。使用[物化视图](../../../sql-reference/statements/create.md#create-view)创建实时线程更为实用。要做到这一点: + +1. 使用引擎创建一个 RabbitMQ 消费者,并将其视为一个数据流。 +2. 创建一个具有所需结构的表。 +3. 创建一个物化视图,转换来自引擎的数据并将其放入先前创建的表中。 + +当`物化视图`加入引擎时,它开始在后台收集数据。这允许您持续接收来自 RabbitMQ 的消息,并使用 `SELECT` 将它们转换为所需格式。 +一个 RabbitMQ 表可以有多个你需要的物化视图。 + +数据可以根据`rabbitmq_exchange_type`和指定的`rabbitmq_routing_key_list`进行通道。 +每个表不能有多于一个 exchange。一个 exchange 可以在多个表之间共享 - 因为可以使用路由让数据同时进入多个表。 + +Exchange 类型的选项: + +- `direct` - 路由是基于精确匹配的键。例如表的键列表: `key1,key2,key3,key4,key5`, 消息键可以是等同他们中的任意一个. +- `fanout` - 路由到所有的表 (exchange 名称相同的情况) 无论是什么键都是这样. +- `topic` - 路由是基于带有点分隔键的模式. 比如: `*.logs`, `records.*.*.2020`, `*.2018,*.2019,*.2020`. +- `headers` - 路由是基于`key=value`的匹配,设置为`x-match=all`或`x-match=any`. 例如表的键列表: `x-match=all,format=logs,type=report,year=2020`. +- `consistent_hash` - 数据在所有绑定的表之间均匀分布 (exchange 名称相同的情况). 请注意,这种 exchange 类型必须启用 RabbitMQ 插件: `rabbitmq-plugins enable rabbitmq_consistent_hash_exchange`. + +设置`rabbitmq_queue_base`可用于以下情况: + +- 来让不同的表共享队列, 这样就可以为同一个队列注册多个消费者,这使得性能更好。如果使用`rabbitmq_num_consumers`和/或`rabbitmq_num_queues`设置,在这些参数相同的情况下,实现队列的精确匹配。 +- 以便在不是所有消息都被成功消费时,能够恢复从某些持久队列的阅读。要从一个特定的队列恢复消耗 - 在`rabbitmq_queue_base`设置中设置其名称,不要指定`rabbitmq_num_consumers`和`rabbitmq_num_queues`(默认为1)。要恢复所有队列的消费,这些队列是为一个特定的表所声明的 - 只要指定相同的设置。`rabbitmq_queue_base`, `rabbitmq_num_consumers`, `rabbitmq_num_queues`。默认情况下,队列名称对表来说是唯一的。 +- 以重复使用队列,因为它们被声明为持久的,并且不会自动删除。可以通过任何 RabbitMQ CLI 工具删除) + +为了提高性能,收到的消息被分组为大小为 [max_insert_block_size](../../../operations/server-configuration-parameters/settings.md#settings-max_insert_block_size) 的块。如果在[stream_flush_interval_ms](../../../operations/server-configuration-parameters/settings.md)毫秒内没有形成数据块,无论数据块是否完整,数据都会被刷到表中。 + +如果`rabbitmq_num_consumers`和/或`rabbitmq_num_queues`设置与`rabbitmq_exchange_type`一起被指定,那么: + +- 必须启用`rabbitmq-consistent-hash-exchange` 插件. +- 必须指定已发布信息的 `message_id`属性(对于每个信息/批次都是唯一的)。 + +对于插入查询时有消息元数据,消息元数据被添加到每个发布的消息中:`messageID`和`republished`标志(如果值为true,则表示消息发布不止一次) - 可以通过消息头访问。 + +不要在插入和物化视图中使用同一个表。 + +示例: + +``` sql + CREATE TABLE queue ( + key UInt64, + value UInt64 + ) ENGINE = RabbitMQ SETTINGS rabbitmq_host_port = 'localhost:5672', + rabbitmq_exchange_name = 'exchange1', + rabbitmq_exchange_type = 'headers', + rabbitmq_routing_key_list = 'format=logs,type=report,year=2020', + rabbitmq_format = 'JSONEachRow', + rabbitmq_num_consumers = 5; + + CREATE TABLE daily (key UInt64, value UInt64) + ENGINE = MergeTree() ORDER BY key; + + CREATE MATERIALIZED VIEW consumer TO daily + AS SELECT key, value FROM queue; + + SELECT key, value FROM daily ORDER BY key; +``` + +## 虚拟列 {#virtual-columns} + +- `_exchange_name` - RabbitMQ exchange 名称. +- `_channel_id` - 接收消息的消费者所声明的频道ID. +- `_delivery_tag` - 收到消息的DeliveryTag. 以每个频道为范围. +- `_redelivered` - 消息的`redelivered`标志. +- `_message_id` - 收到的消息的ID;如果在消息发布时被设置,则为非空. +- `_timestamp` - 收到的消息的时间戳;如果在消息发布时被设置,则为非空. + +[原始文章](https://clickhouse.tech/docs/en/engines/table-engines/integrations/rabbitmq/) diff --git a/docs/zh/engines/table-engines/integrations/s3.md b/docs/zh/engines/table-engines/integrations/s3.md new file mode 100644 index 00000000000..5b934dae2c4 --- /dev/null +++ b/docs/zh/engines/table-engines/integrations/s3.md @@ -0,0 +1,213 @@ +--- +toc_priority: 7 +toc_title: S3 +--- + +# S3 表引擎 {#table-engine-s3} + +这个引擎提供与[Amazon S3](https://aws.amazon.com/s3/)生态系统的集成。这个引擎类似于[HDFS](../../../engines/table-engines/integrations/hdfs.md)引擎,但提供了 S3 特有的功能。 + +## 创建表 {#creating-a-table} + +``` sql +CREATE TABLE s3_engine_table (name String, value UInt32) +ENGINE = S3(path, [aws_access_key_id, aws_secret_access_key,] format, [compression]) +``` + +**引擎参数** + +- `path` — 带有文件路径的 Bucket url。在只读模式下支持以下通配符: `*`, `?`, `{abc,def}` 和 `{N..M}` 其中 `N`, `M` 是数字, `'abc'`, `'def'` 是字符串. 更多信息见[下文](#wildcards-in-path). +- `format` — 文件的[格式](../../../interfaces/formats.md#formats). +- `aws_access_key_id`, `aws_secret_access_key` - [AWS](https://aws.amazon.com/) 账号的长期凭证. 你可以使用凭证来对你的请求进行认证.参数是可选的. 如果没有指定凭据, 将从配置文件中读取凭据. 更多信息参见 [使用 S3 来存储数据](../mergetree-family/mergetree.md#table_engine-mergetree-s3). +- `compression` — 压缩类型. 支持的值: `none`, `gzip/gz`, `brotli/br`, `xz/LZMA`, `zstd/zst`. 参数是可选的. 默认情况下,通过文件扩展名自动检测压缩类型. + +**示例** + +1. 创建 `s3_engine_table` 表: + +``` sql +CREATE TABLE s3_engine_table (name String, value UInt32) ENGINE=S3('https://storage.yandexcloud.net/my-test-bucket-768/test-data.csv.gz', 'CSV', 'gzip'); +``` + +2. 填充文件: + +``` sql +INSERT INTO s3_engine_table VALUES ('one', 1), ('two', 2), ('three', 3); +``` + +3. 查询数据: + +``` sql +SELECT * FROM s3_engine_table LIMIT 2; +``` + +```text +┌─name─┬─value─┐ +│ one │ 1 │ +│ two │ 2 │ +└──────┴───────┘ +``` +## 虚拟列 {#virtual-columns} + +- `_path` — 文件路径. +- `_file` — 文件名. + +有关虚拟列的更多信息,见 [这里](../../../engines/table-engines/index.md#table_engines-virtual_columns). + +## 实施细节 {#implementation-details} + +- 读取和写入可以是并行的 +- 以下是不支持的: + - `ALTER` 和 `SELECT...SAMPLE` 操作. + - 索引. + - 复制. + +## 路径中的通配符 {#wildcards-in-path} + +`path` 参数可以使用类 bash 的通配符来指定多个文件。对于正在处理的文件应该存在并匹配到整个路径模式。 文件列表的确定是在 `SELECT` 的时候进行(而不是在 `CREATE` 的时候)。 + +- `*` — 替代任何数量的任何字符,除了 `/` 以及空字符串。 +- `?` — 代替任何单个字符. +- `{some_string,another_string,yet_another_one}` — 替代 `'some_string', 'another_string', 'yet_another_one'`字符串. +- `{N..M}` — 替换 N 到 M 范围内的任何数字,包括两个边界的值. N 和 M 可以以 0 开头,比如 `000..078` + +带 `{}` 的结构类似于 [远程](../../../sql-reference/table-functions/remote.md) 表函数。 + +**示例** + +1. 假设我们在 S3 上有几个 CSV 格式的文件,URI如下: + +- ‘https://storage.yandexcloud.net/my-test-bucket-768/some_prefix/some_file_1.csv’ +- ‘https://storage.yandexcloud.net/my-test-bucket-768/some_prefix/some_file_2.csv’ +- ‘https://storage.yandexcloud.net/my-test-bucket-768/some_prefix/some_file_3.csv’ +- ‘https://storage.yandexcloud.net/my-test-bucket-768/another_prefix/some_file_1.csv’ +- ‘https://storage.yandexcloud.net/my-test-bucket-768/another_prefix/some_file_2.csv’ +- ‘https://storage.yandexcloud.net/my-test-bucket-768/another_prefix/some_file_3.csv’ + +有几种方法来创建由所有六个文件组成的数据表: + +第一种方式: + +``` sql +CREATE TABLE table_with_range (name String, value UInt32) ENGINE = S3('https://storage.yandexcloud.net/my-test-bucket-768/{some,another}_prefix/some_file_{1..3}', 'CSV'); +``` + +另一种方式: + +``` sql +CREATE TABLE table_with_question_mark (name String, value UInt32) ENGINE = S3('https://storage.yandexcloud.net/my-test-bucket-768/{some,another}_prefix/some_file_?', 'CSV'); +``` + +表由两个目录中的所有文件组成(所有文件应满足查询中描述的格式和模式)。 + +``` sql +CREATE TABLE table_with_asterisk (name String, value UInt32) ENGINE = S3('https://storage.yandexcloud.net/my-test-bucket-768/{some,another}_prefix/*', 'CSV'); +``` + +如果文件列表中包含有从零开头的数字范围,请对每个数字分别使用带括号的结构,或者使用`?`。 + +**示例** + +使用文件`file-000.csv`, `file-001.csv`, … , `file-999.csv`来创建表: + +``` sql +CREATE TABLE big_table (name String, value UInt32) ENGINE = S3('https://storage.yandexcloud.net/my-test-bucket-768/big_prefix/file-{000..999}.csv', 'CSV'); +``` + +## 虚拟列 {#virtual-columns} + +- `_path` — 文件路径. +- `_file` — 文件名. + +**另请参阅** + +- [虚拟列](../../../engines/table-engines/index.md#table_engines-virtual_columns) + +## S3 相关的设置 {#settings} + +以下设置可以在查询执行前设置,也可以放在配置文件中。 + +- `s3_max_single_part_upload_size` - 使用单文件上传至 S3 的对象的最大文件大小。默认值是`64Mb`。 +- `s3_min_upload_part_size` - 使用[S3多文件块上传](https://docs.aws.amazon.com/AmazonS3/latest/dev/uploadobjusingmpu.html)时,文件块的最小文件大小。默认值是`512Mb`。 +- `s3_max_redirects` - 允许的最大S3重定向跳数。默认值是`10`。 +- `s3_single_read_retries` - 单次读取时的最大尝试次数。默认值是`4`。 + +安全考虑:如果恶意用户可以指定任意的 S3 网址,`s3_max_redirects`参数必须设置为零,以避免[SSRF](https://en.wikipedia.org/wiki/Server-side_request_forgery)攻击;或者,必须在服务器配置中指定`remote_host_filter`。 + +## 基于 Endpoint 的设置 {#endpoint-settings} + +在配置文件中可以为给定的端点指定以下设置(将通过URL的准确前缀来匹配)。 + +- `endpoint` - 指定一个端点的前缀。必要参数。 +- `access_key_id`和`secret_access_key` - 用于指定端点的登陆凭据。可选参数。 +- `use_environment_credentials` - 如果设置为`true`,S3客户端将尝试从环境变量和[Amazon EC2](https://en.wikipedia.org/wiki/Amazon_Elastic_Compute_Cloud)元数据中为指定的端点获取证书。可选参数,默认值是`false`。 +- `region` - 指定S3的区域名称。可选参数。 +- `use_insecure_imds_request` - 如果设置为`true`,S3客户端将使用不安全的 IMDS 请求,同时从Amazon EC2 元数据获取证书。可选参数,默认值是`false`。 +- `header` - 添加指定的HTTP头到给定端点的请求中。可选参数,可以使用多次此参数来添加多个值。 +- `server_side_encryption_customer_key_base64` - 如果指定,需要指定访问 SSE-C 加密的 S3 对象所需的头信息。可选参数。 +- `max_single_read_retries` - 单次读取时的最大尝试次数。默认值是`4`。可选参数。 + +**示例:** + +``` xml + + + https://storage.yandexcloud.net/my-test-bucket-768/ + + + + + + + + + + +``` + +## 用法 {#usage-examples} + +假设我们在 S3 上有几个 CSV 格式的文件,URI 如下: + +- 'https://storage.yandexcloud.net/my-test-bucket-768/some_prefix/some_file_1.csv' +- 'https://storage.yandexcloud.net/my-test-bucket-768/some_prefix/some_file_2.csv' +- 'https://storage.yandexcloud.net/my-test-bucket-768/some_prefix/some_file_3.csv' +- 'https://storage.yandexcloud.net/my-test-bucket-768/another_prefix/some_file_1.csv' +- 'https://storage.yandexcloud.net/my-test-bucket-768/another_prefix/some_file_2.csv' +- 'https://storage.yandexcloud.net/my-test-bucket-768/another_prefix/some_file_3.csv' + + +1. 有几种方式来制作由所有六个文件组成的表格,其中一种方式如下: + +``` sql +CREATE TABLE table_with_range (name String, value UInt32) +ENGINE = S3('https://storage.yandexcloud.net/my-test-bucket-768/{some,another}_prefix/some_file_{1..3}', 'CSV'); +``` + +2. 另一种方式: + +``` sql +CREATE TABLE table_with_question_mark (name String, value UInt32) +ENGINE = S3('https://storage.yandexcloud.net/my-test-bucket-768/{some,another}_prefix/some_file_?', 'CSV'); +``` + +3. 表由两个目录中的所有文件组成(所有文件应满足查询中描述的格式和模式): + +``` sql +CREATE TABLE table_with_asterisk (name String, value UInt32) +ENGINE = S3('https://storage.yandexcloud.net/my-test-bucket-768/{some,another}_prefix/*', 'CSV'); +``` + +!!! warning "Warning" + 如果文件列表中包含有从0开头的数字范围,请对每个数字分别使用带括号的结构,或者使用`?`. + +4. 从文件`file-000.csv`, `file-001.csv`, … , `file-999.csv`创建表: + +``` sql +CREATE TABLE big_table (name String, value UInt32) +ENGINE = S3('https://storage.yandexcloud.net/my-test-bucket-768/big_prefix/file-{000..999}.csv', 'CSV'); +``` + +## 另请参阅 + +- [S3 表函数](../../../sql-reference/table-functions/s3.md) diff --git a/docs/zh/engines/table-engines/mergetree-family/mergetree.md b/docs/zh/engines/table-engines/mergetree-family/mergetree.md index 353dd5f5bc8..45e080fd640 100644 --- a/docs/zh/engines/table-engines/mergetree-family/mergetree.md +++ b/docs/zh/engines/table-engines/mergetree-family/mergetree.md @@ -6,21 +6,21 @@ Clickhouse 中最强大的表引擎当属 `MergeTree` (合并树)引擎及 主要特点: -- 存储的数据按主键排序。 +- 存储的数据按主键排序。 - 这使得你能够创建一个小型的稀疏索引来加快数据检索。 + 这使得您能够创建一个小型的稀疏索引来加快数据检索。 -- 支持数据分区,如果指定了 [分区键](custom-partitioning-key.md) 的话。 +- 如果指定了 [分区键](custom-partitioning-key.md) 的话,可以使用分区。 在相同数据集和相同结果集的情况下 ClickHouse 中某些带分区的操作会比普通操作更快。查询中指定了分区键时 ClickHouse 会自动截取分区数据。这也有效增加了查询性能。 -- 支持数据副本。 +- 支持数据副本。 `ReplicatedMergeTree` 系列的表提供了数据副本功能。更多信息,请参阅 [数据副本](replication.md) 一节。 -- 支持数据采样。 +- 支持数据采样。 - 需要的话,你可以给表设置一个采样方法。 + 需要的话,您可以给表设置一个采样方法。 !!! note "注意" [合并](../special/merge.md#merge) 引擎并不属于 `*MergeTree` 系列。 @@ -50,54 +50,58 @@ ORDER BY expr **子句** -- `ENGINE` - 引擎名和参数。 `ENGINE = MergeTree()`. `MergeTree` 引擎没有参数。 +- `ENGINE` - 引擎名和参数。 `ENGINE = MergeTree()`. `MergeTree` 引擎没有参数。 - -- `ORDER BY` — 排序键。 +- `ORDER BY` — 排序键。 可以是一组列的元组或任意的表达式。 例如: `ORDER BY (CounterID, EventDate)` 。 - - 如果没有使用 `PRIMARY KEY` 显式的指定主键,ClickHouse 会使用排序键作为主键。 - + + 如果没有使用 `PRIMARY KEY` 显式指定的主键,ClickHouse 会使用排序键作为主键。 + 如果不需要排序,可以使用 `ORDER BY tuple()`. 参考 [选择主键](https://clickhouse.tech/docs/en/engines/table-engines/mergetree-family/mergetree/#selecting-the-primary-key) -- `PARTITION BY` — [分区键](custom-partitioning-key.md) 。 +- `PARTITION BY` — [分区键](custom-partitioning-key.md) ,可选项。 要按月分区,可以使用表达式 `toYYYYMM(date_column)` ,这里的 `date_column` 是一个 [Date](../../../engines/table-engines/mergetree-family/mergetree.md) 类型的列。分区名的格式会是 `"YYYYMM"` 。 -- `PRIMARY KEY` - 主键,如果要 [选择与排序键不同的主键](#choosing-a-primary-key-that-differs-from-the-sorting-key),可选。 +- `PRIMARY KEY` - 如果要 [选择与排序键不同的主键](#choosing-a-primary-key-that-differs-from-the-sorting-key),在这里指定,可选项。 默认情况下主键跟排序键(由 `ORDER BY` 子句指定)相同。 因此,大部分情况下不需要再专门指定一个 `PRIMARY KEY` 子句。 -- `SAMPLE BY` — 用于抽样的表达式。 +- `SAMPLE BY` - 用于抽样的表达式,可选项。 如果要用抽样表达式,主键中必须包含这个表达式。例如: `SAMPLE BY intHash32(UserID) ORDER BY (CounterID, EventDate, intHash32(UserID))` 。 - -- TTL 指定行存储的持续时间并定义数据片段在硬盘和卷上的移动逻辑的规则列表,可选。 + +- `TTL` - 指定行存储的持续时间并定义数据片段在硬盘和卷上的移动逻辑的规则列表,可选项。 表达式中必须存在至少一个 `Date` 或 `DateTime` 类型的列,比如: - + `TTL date + INTERVAl 1 DAY` - + 规则的类型 `DELETE|TO DISK 'xxx'|TO VOLUME 'xxx'`指定了当满足条件(到达指定时间)时所要执行的动作:移除过期的行,还是将数据片段(如果数据片段中的所有行都满足表达式的话)移动到指定的磁盘(`TO DISK 'xxx'`) 或 卷(`TO VOLUME 'xxx'`)。默认的规则是移除(`DELETE`)。可以在列表中指定多个规则,但最多只能有一个`DELETE`的规则。 - + 更多细节,请查看 [表和列的 TTL](#table_engine-mergetree-ttl) -- `SETTINGS` — 控制 `MergeTree` 行为的额外参数: +- `SETTINGS` — 控制 `MergeTree` 行为的额外参数,可选项: - - `index_granularity` — 索引粒度。索引中相邻的『标记』间的数据行数。默认值,8192 。参考[数据存储](#mergetree-data-storage)。 - - `index_granularity_bytes` — 索引粒度,以字节为单位,默认值: 10Mb。如果想要仅按数据行数限制索引粒度, 请设置为0(不建议)。 - - `enable_mixed_granularity_parts` — 是否启用通过 `index_granularity_bytes` 控制索引粒度的大小。在19.11版本之前, 只有 `index_granularity` 配置能够用于限制索引粒度的大小。当从具有很大的行(几十上百兆字节)的表中查询数据时候,`index_granularity_bytes` 配置能够提升ClickHouse的性能。如果你的表里有很大的行,可以开启这项配置来提升`SELECT` 查询的性能。 - - `use_minimalistic_part_header_in_zookeeper` — 是否在 ZooKeeper 中启用最小的数据片段头 。如果设置了 `use_minimalistic_part_header_in_zookeeper=1` ,ZooKeeper 会存储更少的数据。更多信息参考『服务配置参数』这章中的 [设置描述](../../../operations/server-configuration-parameters/settings.md#server-settings-use_minimalistic_part_header_in_zookeeper) 。 - - `min_merge_bytes_to_use_direct_io` — 使用直接 I/O 来操作磁盘的合并操作时要求的最小数据量。合并数据片段时,ClickHouse 会计算要被合并的所有数据的总存储空间。如果大小超过了 `min_merge_bytes_to_use_direct_io` 设置的字节数,则 ClickHouse 将使用直接 I/O 接口(`O_DIRECT` 选项)对磁盘读写。如果设置 `min_merge_bytes_to_use_direct_io = 0` ,则会禁用直接 I/O。默认值:`10 * 1024 * 1024 * 1024` 字节。 + - `index_granularity` — 索引粒度。索引中相邻的『标记』间的数据行数。默认值8192 。参考[数据存储](#mergetree-data-storage)。 + - `index_granularity_bytes` — 索引粒度,以字节为单位,默认值: 10Mb。如果想要仅按数据行数限制索引粒度, 请设置为0(不建议)。 + - `min_index_granularity_bytes` - 允许的最小数据粒度,默认值:1024b。该选项用于防止误操作,添加了一个非常低索引粒度的表。参考[数据存储](#mergetree-data-storage) + - `enable_mixed_granularity_parts` — 是否启用通过 `index_granularity_bytes` 控制索引粒度的大小。在19.11版本之前, 只有 `index_granularity` 配置能够用于限制索引粒度的大小。当从具有很大的行(几十上百兆字节)的表中查询数据时候,`index_granularity_bytes` 配置能够提升ClickHouse的性能。如果您的表里有很大的行,可以开启这项配置来提升`SELECT` 查询的性能。 + - `use_minimalistic_part_header_in_zookeeper` — ZooKeeper中数据片段存储方式 。如果`use_minimalistic_part_header_in_zookeeper=1` ,ZooKeeper 会存储更少的数据。更多信息参考[服务配置参数]([Server Settings | ClickHouse Documentation](https://clickhouse.tech/docs/zh/operations/server-configuration-parameters/settings/))这章中的 [设置描述](../../../operations/server-configuration-parameters/settings.md#server-settings-use_minimalistic_part_header_in_zookeeper) 。 + - `min_merge_bytes_to_use_direct_io` — 使用直接 I/O 来操作磁盘的合并操作时要求的最小数据量。合并数据片段时,ClickHouse 会计算要被合并的所有数据的总存储空间。如果大小超过了 `min_merge_bytes_to_use_direct_io` 设置的字节数,则 ClickHouse 将使用直接 I/O 接口(`O_DIRECT` 选项)对磁盘读写。如果设置 `min_merge_bytes_to_use_direct_io = 0` ,则会禁用直接 I/O。默认值:`10 * 1024 * 1024 * 1024` 字节。 - - `merge_with_ttl_timeout` — TTL合并频率的最小间隔时间,单位:秒。默认值: 86400 (1 天)。 - - `write_final_mark` — 是否启用在数据片段尾部写入最终索引标记。默认值: 1(不建议更改)。 - - `merge_max_block_size` — 在块中进行合并操作时的最大行数限制。默认值:8192 - - `storage_policy` — 存储策略。 参见 [使用具有多个块的设备进行数据存储](#table_engine-mergetree-multiple-volumes). - - `min_bytes_for_wide_part`,`min_rows_for_wide_part` 在数据片段中可以使用`Wide`格式进行存储的最小字节数/行数。你可以不设置、只设置一个,或全都设置。参考:[数据存储](#mergetree-data-storage) + - `merge_with_ttl_timeout` — TTL合并频率的最小间隔时间,单位:秒。默认值: 86400 (1 天)。 + - `write_final_mark` — 是否启用在数据片段尾部写入最终索引标记。默认值: 1(不要关闭)。 + - `merge_max_block_size` — 在块中进行合并操作时的最大行数限制。默认值:8192 + - `storage_policy` — 存储策略。 参见 [使用具有多个块的设备进行数据存储](#table_engine-mergetree-multiple-volumes). + - `min_bytes_for_wide_part`,`min_rows_for_wide_part` 在数据片段中可以使用`Wide`格式进行存储的最小字节数/行数。您可以不设置、只设置一个,或全都设置。参考:[数据存储](#mergetree-data-storage) + - `max_parts_in_total` - 所有分区中最大块的数量(意义不明) + - `max_compress_block_size` - 在数据压缩写入表前,未压缩数据块的最大大小。您可以在全局设置中设置该值(参见[max_compress_block_size](https://clickhouse.tech/docs/zh/operations/settings/settings/#max-compress-block-size))。建表时指定该值会覆盖全局设置。 + - `min_compress_block_size` - 在数据压缩写入表前,未压缩数据块的最小大小。您可以在全局设置中设置该值(参见[min_compress_block_size](https://clickhouse.tech/docs/zh/operations/settings/settings/#min-compress-block-size))。建表时指定该值会覆盖全局设置。 + - `max_partitions_to_read` - 一次查询中可访问的分区最大数。您可以在全局设置中设置该值(参见[max_partitions_to_read](https://clickhouse.tech/docs/zh/operations/settings/settings/#max_partitions_to_read))。 **示例配置** @@ -107,12 +111,11 @@ ENGINE MergeTree() PARTITION BY toYYYYMM(EventDate) ORDER BY (CounterID, EventDa 在这个例子中,我们设置了按月进行分区。 -同时我们设置了一个按用户 ID 哈希的抽样表达式。这使得你可以对该表中每个 `CounterID` 和 `EventDate` 的数据伪随机分布。如果你在查询时指定了 [SAMPLE](../../../engines/table-engines/mergetree-family/mergetree.md#select-sample-clause) 子句。 ClickHouse会返回对于用户子集的一个均匀的伪随机数据采样。 +同时我们设置了一个按用户 ID 哈希的抽样表达式。这使得您可以对该表中每个 `CounterID` 和 `EventDate` 的数据伪随机分布。如果您在查询时指定了 [SAMPLE](../../../engines/table-engines/mergetree-family/mergetree.md#select-sample-clause) 子句。 ClickHouse会返回对于用户子集的一个均匀的伪随机数据采样。 `index_granularity` 可省略因为 8192 是默认设置 。
- 已弃用的建表方法 !!! attention "注意" @@ -127,10 +130,10 @@ ENGINE MergeTree() PARTITION BY toYYYYMM(EventDate) ORDER BY (CounterID, EventDa **MergeTree() 参数** -- `date-column` — 类型为 [日期](../../../engines/table-engines/mergetree-family/mergetree.md) 的列名。ClickHouse 会自动依据这个列按月创建分区。分区名格式为 `"YYYYMM"` 。 -- `sampling_expression` — 采样表达式。 -- `(primary, key)` — 主键。类型 — [元组()](../../../engines/table-engines/mergetree-family/mergetree.md) -- `index_granularity` — 索引粒度。即索引中相邻『标记』间的数据行数。设为 8192 可以适用大部分场景。 +- `date-column` — 类型为 [日期](../../../engines/table-engines/mergetree-family/mergetree.md) 的列名。ClickHouse 会自动依据这个列按月创建分区。分区名格式为 `"YYYYMM"` 。 +- `sampling_expression` — 采样表达式。 +- `(primary, key)` — 主键。类型 — [元组()](../../../engines/table-engines/mergetree-family/mergetree.md) +- `index_granularity` — 索引粒度。即索引中相邻『标记』间的数据行数。设为 8192 可以适用大部分场景。 **示例** @@ -152,51 +155,55 @@ ENGINE MergeTree() PARTITION BY toYYYYMM(EventDate) ORDER BY (CounterID, EventDa 数据存储格式由 `min_bytes_for_wide_part` 和 `min_rows_for_wide_part` 表引擎参数控制。如果数据片段中的字节数或行数少于相应的设置值,数据片段会以 `Compact` 格式存储,否则会以 `Wide` 格式存储。 每个数据片段被逻辑的分割成颗粒(granules)。颗粒是 ClickHouse 中进行数据查询时的最小不可分割数据集。ClickHouse 不会对行或值进行拆分,所以每个颗粒总是包含整数个行。每个颗粒的第一行通过该行的主键值进行标记, -ClickHouse 会为每个数据片段创建一个索引文件来存储这些标记。对于每列,无论它是否包含在主键当中,ClickHouse 都会存储类似标记。这些标记让你可以在列文件中直接找到数据。 +ClickHouse 会为每个数据片段创建一个索引文件来存储这些标记。对于每列,无论它是否包含在主键当中,ClickHouse 都会存储类似标记。这些标记让您可以在列文件中直接找到数据。 -颗粒的大小通过表引擎参数 `index_granularity` 和 `index_granularity_bytes` 控制。取决于行的大小,颗粒的行数的在 `[1, index_granularity]` 范围中。如果单行的大小超过了 `index_granularity_bytes` 设置的值,那么一个颗粒的大小会超过 `index_granularity_bytes`。在这种情况下,颗粒的大小等于该行的大小。 +颗粒的大小通过表引擎参数 `index_granularity` 和 `index_granularity_bytes` 控制。颗粒的行数的在 `[1, index_granularity]` 范围中,这取决于行的大小。如果单行的大小超过了 `index_granularity_bytes` 设置的值,那么一个颗粒的大小会超过 `index_granularity_bytes`。在这种情况下,颗粒的大小等于该行的大小。 ## 主键和索引在查询中的表现 {#primary-keys-and-indexes-in-queries} 我们以 `(CounterID, Date)` 以主键。排序好的索引的图示会是下面这样: +``` text 全部数据 : [-------------------------------------------------------------------------] CounterID: [aaaaaaaaaaaaaaaaaabbbbcdeeeeeeeeeeeeefgggggggghhhhhhhhhiiiiiiiiikllllllll] Date: [1111111222222233331233211111222222333211111112122222223111112223311122333] 标记: | | | | | | | | | | | a,1 a,2 a,3 b,3 e,2 e,3 g,1 h,2 i,1 i,3 l,3 标记号: 0 1 2 3 4 5 6 7 8 9 10 +``` 如果指定查询如下: -- `CounterID in ('a', 'h')`,服务器会读取标记号在 `[0, 3)` 和 `[6, 8)` 区间中的数据。 -- `CounterID IN ('a', 'h') AND Date = 3`,服务器会读取标记号在 `[1, 3)` 和 `[7, 8)` 区间中的数据。 -- `Date = 3`,服务器会读取标记号在 `[1, 10]` 区间中的数据。 +- `CounterID in ('a', 'h')`,服务器会读取标记号在 `[0, 3)` 和 `[6, 8)` 区间中的数据。 +- `CounterID IN ('a', 'h') AND Date = 3`,服务器会读取标记号在 `[1, 3)` 和 `[7, 8)` 区间中的数据。 +- `Date = 3`,服务器会读取标记号在 `[1, 10]` 区间中的数据。 上面例子可以看出使用索引通常会比全表描述要高效。 稀疏索引会引起额外的数据读取。当读取主键单个区间范围的数据时,每个数据块中最多会多读 `index_granularity * 2` 行额外的数据。 -稀疏索引使得你可以处理极大量的行,因为大多数情况下,这些索引常驻与内存(RAM)中。 +稀疏索引使得您可以处理极大量的行,因为大多数情况下,这些索引常驻于内存。 -ClickHouse 不要求主键惟一,所以你可以插入多条具有相同主键的行。 +ClickHouse 不要求主键唯一,所以您可以插入多条具有相同主键的行。 + +您可以在`PRIMARY KEY`与`ORDER BY`条件中使用`可为空的`类型的表达式,但强烈建议不要这么做。为了启用这项功能,请打开[allow_nullable_key](https://clickhouse.tech/docs/zh/operations/settings/settings/#allow-nullable-key),[NULLS_LAST](https://clickhouse.tech/docs/zh/sql-reference/statements/select/order-by/#sorting-of-special-values)规则也适用于`ORDER BY`条件中有NULL值的情况下。 ### 主键的选择 {#zhu-jian-de-xuan-ze} -主键中列的数量并没有明确的限制。依据数据结构,你可以在主键包含多些或少些列。这样可以: +主键中列的数量并没有明确的限制。依据数据结构,您可以在主键包含多些或少些列。这样可以: -- 改善索引的性能。 +- 改善索引的性能。 如果当前主键是 `(a, b)` ,在下列情况下添加另一个 `c` 列会提升性能: - - - 查询会使用 `c` 列作为条件 - - 很长的数据范围( `index_granularity` 的数倍)里 `(a, b)` 都是相同的值,并且这样的情况很普遍。换言之,就是加入另一列后,可以让你的查询略过很长的数据范围。 -- 改善数据压缩。 + - 查询会使用 `c` 列作为条件 + - 很长的数据范围( `index_granularity` 的数倍)里 `(a, b)` 都是相同的值,并且这样的情况很普遍。换言之,就是加入另一列后,可以让您的查询略过很长的数据范围。 + +- 改善数据压缩。 ClickHouse 以主键排序片段数据,所以,数据的一致性越高,压缩越好。 -- 在[CollapsingMergeTree](collapsingmergetree.md#table_engine-collapsingmergetree) 和 [SummingMergeTree](summingmergetree.md) 引擎里进行数据合并时会提供额外的处理逻辑。 +- 在[CollapsingMergeTree](collapsingmergetree.md#table_engine-collapsingmergetree) 和 [SummingMergeTree](summingmergetree.md) 引擎里进行数据合并时会提供额外的处理逻辑。 在这种情况下,指定与主键不同的 *排序键* 也是有意义的。 @@ -206,9 +213,9 @@ ClickHouse 不要求主键惟一,所以你可以插入多条具有相同主键 想要根据初始顺序进行数据查询,使用 [单线程查询](../../../operations/settings/settings.md#settings-max_threads) -### 选择与排序键不同主键 {#choosing-a-primary-key-that-differs-from-the-sorting-key} +### 选择与排序键不同的主键 {#choosing-a-primary-key-that-differs-from-the-sorting-key} -指定一个跟排序键不一样的主键是可以的,此时排序键用于在数据片段中进行排序,主键用于在索引文件中进行标记的写入。这种情况下,主键表达式元组必须是排序键表达式元组的前缀。 +Clickhouse可以做到指定一个跟排序键不一样的主键,此时排序键用于在数据片段中进行排序,主键用于在索引文件中进行标记的写入。这种情况下,主键表达式元组必须是排序键表达式元组的前缀(即主键为(a,b),排序列必须为(a,b,******))。 当使用 [SummingMergeTree](summingmergetree.md) 和 [AggregatingMergeTree](aggregatingmergetree.md) 引擎时,这个特性非常有用。通常在使用这类引擎时,表里的列分两种:*维度* 和 *度量* 。典型的查询会通过任意的 `GROUP BY` 对度量列进行聚合并通过维度列进行过滤。由于 SummingMergeTree 和 AggregatingMergeTree 会对排序键相同的行进行聚合,所以把所有的维度放进排序键是很自然的做法。但这将导致排序键中包含大量的列,并且排序键会伴随着新添加的维度不断的更新。 @@ -218,14 +225,20 @@ ClickHouse 不要求主键惟一,所以你可以插入多条具有相同主键 ### 索引和分区在查询中的应用 {#use-of-indexes-and-partitions-in-queries} -对于 `SELECT` 查询,ClickHouse 分析是否可以使用索引。如果 `WHERE/PREWHERE` 子句具有下面这些表达式(作为谓词链接一子项或整个)则可以使用索引:包含一个表示与主键/分区键中的部分字段或全部字段相等/不等的比较表达式;基于主键/分区键的字段上的 `IN` 或 固定前缀的`LIKE` 表达式;基于主键/分区键的字段上的某些函数;基于主键/分区键的表达式的逻辑表达式。 +对于 `SELECT` 查询,ClickHouse 分析是否可以使用索引。如果 `WHERE/PREWHERE` 子句具有下面这些表达式(作为完整WHERE条件的一部分或全部)则可以使用索引:进行相等/不相等的比较;对主键列或分区列进行`IN`运算、有固定前缀的`LIKE`运算(如name like 'test%')、函数运算(部分函数适用),还有对上述表达式进行逻辑运算。 + + -因此,在索引键的一个或多个区间上快速地执行查询都是可能的。下面例子中,指定标签;指定标签和日期范围;指定标签和日期;指定多个标签和日期范围等执行查询,都会非常快。 + + +因此,在索引键的一个或多个区间上快速地执行查询是可能的。下面例子中,指定标签;指定标签和日期范围;指定标签和日期;指定多个标签和日期范围等执行查询,都会非常快。 当引擎配置如下时: +``` sql ENGINE MergeTree() PARTITION BY toYYYYMM(EventDate) ORDER BY (CounterID, EventDate) SETTINGS index_granularity=8192 +``` 这种情况下,这些查询: @@ -237,7 +250,7 @@ SELECT count() FROM table WHERE ((EventDate >= toDate('2014-01-01') AND EventDat ClickHouse 会依据主键索引剪掉不符合的数据,依据按月分区的分区键剪掉那些不包含符合数据的分区。 -上文的查询显示,即使索引用于复杂表达式。因为读表操作是组织好的,所以,使用索引不会比完整扫描慢。 +上文的查询显示,即使索引用于复杂表达式,因为读表操作经过优化,所以使用索引不会比完整扫描慢。 下面这个例子中,不会使用索引。 @@ -247,17 +260,16 @@ SELECT count() FROM table WHERE CounterID = 34 OR URL LIKE '%upyachka%' 要检查 ClickHouse 执行一个查询时能否使用索引,可设置 [force_index_by_date](../../../operations/settings/settings.md#settings-force_index_by_date) 和 [force_primary_key](../../../operations/settings/settings.md) 。 -按月分区的分区键是只能读取包含适当范围日期的数据块。这种情况下,数据块会包含很多天(最多整月)的数据。在块中,数据按主键排序,主键第一列可能不包含日期。因此,仅使用日期而没有带主键前几个字段作为条件的查询将会导致需要读取超过这个指定日期以外的数据。 +使用按月分区的分区列允许只读取包含适当日期区间的数据块,这种情况下,数据块会包含很多天(最多整月)的数据。在块中,数据按主键排序,主键第一列可能不包含日期。因此,仅使用日期而没有用主键字段作为条件的查询将会导致需要读取超过这个指定日期以外的数据。 ### 部分单调主键的使用 -考虑这样的场景,比如一个月中的几天。它们在一个月的范围内形成一个[单调序列](https://zh.wikipedia.org/wiki/单调函数) ,但如果扩展到更大的时间范围它们就不再单调了。这就是一个部分单调序列。如果用户使用部分单调的主键创建表,ClickHouse同样会创建一个稀疏索引。当用户从这类表中查询数据时,ClickHouse 会对查询条件进行分析。如果用户希望获取两个索引标记之间的数据并且这两个标记在一个月以内,ClickHouse 可以在这种特殊情况下使用到索引,因为它可以计算出查询参数与索引标记之间的距离。 +考虑这样的场景,比如一个月中的天数。它们在一个月的范围内形成一个[单调序列](https://zh.wikipedia.org/wiki/单调函数) ,但如果扩展到更大的时间范围它们就不再单调了。这就是一个部分单调序列。如果用户使用部分单调的主键创建表,ClickHouse同样会创建一个稀疏索引。当用户从这类表中查询数据时,ClickHouse 会对查询条件进行分析。如果用户希望获取两个索引标记之间的数据并且这两个标记在一个月以内,ClickHouse 可以在这种特殊情况下使用到索引,因为它可以计算出查询参数与索引标记之间的距离。 如果查询参数范围内的主键不是单调序列,那么 ClickHouse 无法使用索引。在这种情况下,ClickHouse 会进行全表扫描。 ClickHouse 在任何主键代表一个部分单调序列的情况下都会使用这个逻辑。 - ### 跳数索引 {#tiao-shu-suo-yin-fen-duan-hui-zong-suo-yin-shi-yan-xing-de} 此索引在 `CREATE` 语句的列部分里定义。 @@ -267,11 +279,7 @@ INDEX index_name expr TYPE type(...) GRANULARITY granularity_value ``` `*MergeTree` 系列的表可以指定跳数索引。 - -这些索引是由数据块按粒度分割后的每部分在指定表达式上汇总信息 `granularity_value` 组成(粒度大小用表引擎里 `index_granularity` 的指定)。 -这些汇总信息有助于用 `where` 语句跳过大片不满足的数据,从而减少 `SELECT` 查询从磁盘读取的数据量, - -这些索引会在数据块上聚合指定表达式的信息,这些信息以 granularity_value 指定的粒度组成 (粒度的大小通过在表引擎中定义 index_granularity 定义)。这些汇总信息有助于跳过大片不满足 `where` 条件的数据,从而减少 `SELECT` 查询从磁盘读取的数据量。 +跳数索引是指数据片段按照粒度(建表时指定的`index_granularity`)分割成小块后,将上述SQL的granularity_value数量的小块组合成一个大的块,对这些大块写入索引信息,这样有助于使用`where`筛选时跳过大量不必要的数据,减少`SELECT`需要读取的数据量。 **示例** @@ -295,34 +303,32 @@ SELECT count() FROM table WHERE s < 'z' SELECT count() FROM table WHERE u64 * i32 == 10 AND u64 * length(s) >= 1234 ``` -#### 索引的可用类型 {#table_engine-mergetree-data_skipping-indexes} +#### 可用的索引类型 {#table_engine-mergetree-data_skipping-indexes} -- `minmax` +- `minmax` 存储指定表达式的极值(如果表达式是 `tuple` ,则存储 `tuple` 中每个元素的极值),这些信息用于跳过数据块,类似主键。 -- `set(max_rows)` - 存储指定表达式的不重复值(不超过 `max_rows` 个,`max_rows=0` 则表示『无限制』)。这些信息可用于检查 数据块是否满足 `WHERE` 条件。 +- `set(max_rows)` + 存储指定表达式的不重复值(不超过 `max_rows` 个,`max_rows=0` 则表示『无限制』)。这些信息可用于检查数据块是否满足 `WHERE` 条件。 -- `ngrambf_v1(n, size_of_bloom_filter_in_bytes, number_of_hash_functions, random_seed)` +- `ngrambf_v1(n, size_of_bloom_filter_in_bytes, number_of_hash_functions, random_seed)` 存储一个包含数据块中所有 n元短语(ngram) 的 [布隆过滤器](https://en.wikipedia.org/wiki/Bloom_filter) 。只可用在字符串上。 可用于优化 `equals` , `like` 和 `in` 表达式的性能。 - `n` – 短语长度。 - `size_of_bloom_filter_in_bytes` – 布隆过滤器大小,单位字节。(因为压缩得好,可以指定比较大的值,如 256 或 512)。 - `number_of_hash_functions` – 布隆过滤器中使用的哈希函数的个数。 - `random_seed` – 哈希函数的随机种子。 + - `n` – 短语长度。 + - `size_of_bloom_filter_in_bytes` – 布隆过滤器大小,字节为单位。(因为压缩得好,可以指定比较大的值,如 256 或 512)。 + - `number_of_hash_functions` – 布隆过滤器中使用的哈希函数的个数。 + - `random_seed` – 哈希函数的随机种子。 -- `tokenbf_v1(size_of_bloom_filter_in_bytes, number_of_hash_functions, random_seed)` - 跟 `ngrambf_v1` 类似,不同于 ngrams 存储字符串指定长度的所有片段。它只存储被非字母数字字符分割的片段。 +- `tokenbf_v1(size_of_bloom_filter_in_bytes, number_of_hash_functions, random_seed)` + 跟 `ngrambf_v1` 类似,但是存储的是token而不是ngrams。Token是由非字母数字的符号分割的序列。 -- `bloom_filter(bloom_filter([false_positive])` – 为指定的列存储布隆过滤器 +- `bloom_filter(bloom_filter([false_positive])` – 为指定的列存储布隆过滤器 + + 可选参数`false_positive`用来指定从布隆过滤器收到错误响应的几率。取值范围是 (0,1),默认值:0.025 - 可选的参数 false_positive 用来指定从布隆过滤器收到错误响应的几率。取值范围是 (0,1),默认值:0.025 - 支持的数据类型:`Int*`, `UInt*`, `Float*`, `Enum`, `Date`, `DateTime`, `String`, `FixedString`, `Array`, `LowCardinality`, `Nullable`。 - + 以下函数会用到这个索引: [equals](../../../sql-reference/functions/comparison-functions.md), [notEquals](../../../sql-reference/functions/comparison-functions.md), [in](../../../sql-reference/functions/in-functions.md), [notIn](../../../sql-reference/functions/in-functions.md), [has](../../../sql-reference/functions/array-functions.md) - - ``` sql INDEX sample_index (u64 * length(s)) TYPE minmax GRANULARITY 4 @@ -332,56 +338,56 @@ INDEX sample_index3 (lower(str), str) TYPE ngrambf_v1(3, 256, 2, 0) GRANULARITY #### 函数支持 {#functions-support} -WHERE 子句中的条件包含对列的函数调用,如果列是索引的一部分,ClickHouse 会在执行函数时尝试使用索引。不同的函数对索引的支持是不同的。 +WHERE 子句中的条件可以包含对某列数据进行运算的函数表达式,如果列是索引的一部分,ClickHouse会在执行函数时尝试使用索引。不同的函数对索引的支持是不同的。 `set` 索引会对所有函数生效,其他索引对函数的生效情况见下表 -| 函数 (操作符) / 索引 | primary key | minmax | ngrambf_v1 | tokenbf_v1 | bloom_filter | -|------------------------------------------------------------------------------------------------------------|-------------|--------|-------------|-------------|---------------| -| [equals (=, ==)](../../../sql-reference/functions/comparison-functions.md#function-equals) | ✔ | ✔ | ✔ | ✔ | ✔ | -| [notEquals(!=, \<\>)](../../../sql-reference/functions/comparison-functions.md#function-notequals) | ✔ | ✔ | ✔ | ✔ | ✔ | -| [like](../../../sql-reference/functions/string-search-functions.md#function-like) | ✔ | ✔ | ✔ | ✔ | ✔ | -| [notLike](../../../sql-reference/functions/string-search-functions.md#function-notlike) | ✔ | ✔ | ✗ | ✗ | ✗ | -| [startsWith](../../../sql-reference/functions/string-functions.md#startswith) | ✔ | ✔ | ✔ | ✔ | ✗ | -| [endsWith](../../../sql-reference/functions/string-functions.md#endswith) | ✗ | ✗ | ✔ | ✔ | ✗ | -| [multiSearchAny](../../../sql-reference/functions/string-search-functions.md#function-multisearchany) | ✗ | ✗ | ✔ | ✗ | ✗ | -| [in](../../../sql-reference/functions/in-functions.md#in-functions) | ✔ | ✔ | ✔ | ✔ | ✔ | -| [notIn](../../../sql-reference/functions/in-functions.md#in-functions) | ✔ | ✔ | ✔ | ✔ | ✔ | -| [less (\<)](../../../sql-reference/functions/comparison-functions.md#function-less) | ✔ | ✔ | ✗ | ✗ | ✗ | -| [greater (\>)](../../../sql-reference/functions/comparison-functions.md#function-greater) | ✔ | ✔ | ✗ | ✗ | ✗ | -| [lessOrEquals (\<=)](../../../sql-reference/functions/comparison-functions.md#function-lessorequals) | ✔ | ✔ | ✗ | ✗ | ✗ | -| [greaterOrEquals (\>=)](../../../sql-reference/functions/comparison-functions.md#function-greaterorequals) | ✔ | ✔ | ✗ | ✗ | ✗ | -| [empty](../../../sql-reference/functions/array-functions.md#function-empty) | ✔ | ✔ | ✗ | ✗ | ✗ | -| [notEmpty](../../../sql-reference/functions/array-functions.md#function-notempty) | ✔ | ✔ | ✗ | ✗ | ✗ | -| hasToken | ✗ | ✗ | ✗ | ✔ | ✗ | +| 函数 (操作符) / 索引 | primary key | minmax | ngrambf_v1 | tokenbf_v1 | bloom_filter | +| ------------------------------------------------------------ | ----------- | ------ | ---------- | ---------- | ------------ | +| [equals (=, ==)](../../../sql-reference/functions/comparison-functions.md#function-equals) | ✔ | ✔ | ✔ | ✔ | ✔ | +| [notEquals(!=, \<\>)](../../../sql-reference/functions/comparison-functions.md#function-notequals) | ✔ | ✔ | ✔ | ✔ | ✔ | +| [like](../../../sql-reference/functions/string-search-functions.md#function-like) | ✔ | ✔ | ✔ | ✔ | ✔ | +| [notLike](../../../sql-reference/functions/string-search-functions.md#function-notlike) | ✔ | ✔ | ✗ | ✗ | ✗ | +| [startsWith](../../../sql-reference/functions/string-functions.md#startswith) | ✔ | ✔ | ✔ | ✔ | ✗ | +| [endsWith](../../../sql-reference/functions/string-functions.md#endswith) | ✗ | ✗ | ✔ | ✔ | ✗ | +| [multiSearchAny](../../../sql-reference/functions/string-search-functions.md#function-multisearchany) | ✗ | ✗ | ✔ | ✗ | ✗ | +| [in](../../../sql-reference/functions/in-functions.md#in-functions) | ✔ | ✔ | ✔ | ✔ | ✔ | +| [notIn](../../../sql-reference/functions/in-functions.md#in-functions) | ✔ | ✔ | ✔ | ✔ | ✔ | +| [less (\<)](../../../sql-reference/functions/comparison-functions.md#function-less) | ✔ | ✔ | ✗ | ✗ | ✗ | +| [greater (\>)](../../../sql-reference/functions/comparison-functions.md#function-greater) | ✔ | ✔ | ✗ | ✗ | ✗ | +| [lessOrEquals (\<=)](../../../sql-reference/functions/comparison-functions.md#function-lessorequals) | ✔ | ✔ | ✗ | ✗ | ✗ | +| [greaterOrEquals (\>=)](../../../sql-reference/functions/comparison-functions.md#function-greaterorequals) | ✔ | ✔ | ✗ | ✗ | ✗ | +| [empty](../../../sql-reference/functions/array-functions.md#function-empty) | ✔ | ✔ | ✗ | ✗ | ✗ | +| [notEmpty](../../../sql-reference/functions/array-functions.md#function-notempty) | ✔ | ✔ | ✗ | ✗ | ✗ | +| hasToken | ✗ | ✗ | ✗ | ✔ | ✗ | 常量参数小于 ngram 大小的函数不能使用 `ngrambf_v1` 进行查询优化。 !!! note "注意" -布隆过滤器可能会包含不符合条件的匹配,所以 `ngrambf_v1`, `tokenbf_v1` 和 `bloom_filter` 索引不能用于负向的函数,例如: +布隆过滤器可能会包含不符合条件的匹配,所以 `ngrambf_v1`, `tokenbf_v1` 和 `bloom_filter` 索引不能用于结果返回为假的函数,例如: -- 可以用来优化的场景 - - `s LIKE '%test%'` - - `NOT s NOT LIKE '%test%'` - - `s = 1` - - `NOT s != 1` - - `startsWith(s, 'test')` -- 不能用来优化的场景 - - `NOT s LIKE '%test%'` - - `s NOT LIKE '%test%'` - - `NOT s = 1` - - `s != 1` - - `NOT startsWith(s, 'test')` +- 可以用来优化的场景 + - `s LIKE '%test%'` + - `NOT s NOT LIKE '%test%'` + - `s = 1` + - `NOT s != 1` + - `startsWith(s, 'test')` +- 不能用来优化的场景 + - `NOT s LIKE '%test%'` + - `s NOT LIKE '%test%'` + - `NOT s = 1` + - `s != 1` + - `NOT startsWith(s, 'test')` ## 并发数据访问 {#concurrent-data-access} -应对表的并发访问,我们使用多版本机制。换言之,当同时读和更新表时,数据从当前查询到的一组片段中读取。没有冗长的的锁。插入不会阻碍读取。 +对于表的并发访问,我们使用多版本机制。换言之,当一张表同时被读和更新时,数据从当前查询到的一组片段中读取。没有冗长的的锁。插入不会阻碍读取。 对表的读操作是自动并行的。 ## 列和表的 TTL {#table_engine-mergetree-ttl} -TTL 可以设置值的生命周期,它既可以为整张表设置,也可以为每个列字段单独设置。表级别的 TTL 还会指定数据在磁盘和卷上自动转移的逻辑。 +TTL用于设置值的生命周期,它既可以为整张表设置,也可以为每个列字段单独设置。表级别的 TTL 还会指定数据在磁盘和卷上自动转移的逻辑。 TTL 表达式的计算结果必须是 [日期](../../../engines/table-engines/mergetree-family/mergetree.md) 或 [日期时间](../../../engines/table-engines/mergetree-family/mergetree.md) 类型的字段。 @@ -405,7 +411,7 @@ TTL date_time + INTERVAL 15 HOUR `TTL`子句不能被用于主键字段。 -示例: +**示例:** 创建表时指定 `TTL` @@ -443,16 +449,23 @@ ALTER TABLE example_table 表可以设置一个用于移除过期行的表达式,以及多个用于在磁盘或卷上自动转移数据片段的表达式。当表中的行过期时,ClickHouse 会删除所有对应的行。对于数据片段的转移特性,必须所有的行都满足转移条件。 ``` sql -TTL expr [DELETE|TO DISK 'aaa'|TO VOLUME 'bbb'], ... +TTL expr + [DELETE|TO DISK 'xxx'|TO VOLUME 'xxx'][, DELETE|TO DISK 'aaa'|TO VOLUME 'bbb'] ... + [WHERE conditions] + [GROUP BY key_expr [SET v1 = aggr_func(v1) [, v2 = aggr_func(v2) ...]] ] + ``` TTL 规则的类型紧跟在每个 TTL 表达式后面,它会影响满足表达式时(到达指定时间时)应当执行的操作: -- `DELETE` - 删除过期的行(默认操作); -- `TO DISK 'aaa'` - 将数据片段移动到磁盘 `aaa`; -- `TO VOLUME 'bbb'` - 将数据片段移动到卷 `bbb`. +- `DELETE` - 删除过期的行(默认操作); +- `TO DISK 'aaa'` - 将数据片段移动到磁盘 `aaa`; +- `TO VOLUME 'bbb'` - 将数据片段移动到卷 `bbb`. +- `GROUP BY` - 聚合过期的行 -示例: +使用`WHERE`从句,您可以指定哪些过期的行会被删除或聚合(不适用于移动)。`GROUP BY`表达式必须是表主键的前缀。如果某列不是`GROUP BY`表达式的一部分,也没有在SET从句显示引用,结果行中相应列的值是随机的(就好像使用了`any`函数)。 + +**示例**: 创建时指定 TTL @@ -477,19 +490,49 @@ ALTER TABLE example_table MODIFY TTL d + INTERVAL 1 DAY; ``` +创建一张表,设置一个月后数据过期,这些过期的行中日期为星期一的删除: + +``` sql +CREATE TABLE table_with_where +( + d DateTime, + a Int +) +ENGINE = MergeTree +PARTITION BY toYYYYMM(d) +ORDER BY d +TTL d + INTERVAL 1 MONTH DELETE WHERE toDayOfWeek(d) = 1; +``` + +创建一张表,设置过期的列会被聚合。列`x`包含每组行中的最大值,`y`为最小值,`d`为可能任意值。 + +``` sql +CREATE TABLE table_for_aggregation +( + d DateTime, + k1 Int, + k2 Int, + x Int, + y Int +) +ENGINE = MergeTree +ORDER BY (k1, k2) +TTL d + INTERVAL 1 MONTH GROUP BY k1, k2 SET x = max(x), y = min(y); +``` + **删除数据** ClickHouse 在数据片段合并时会删除掉过期的数据。 -当ClickHouse发现数据过期时, 它将会执行一个计划外的合并。要控制这类合并的频率, 你可以设置 `merge_with_ttl_timeout`。如果该值被设置的太低, 它将引发大量计划外的合并,这可能会消耗大量资源。 +当ClickHouse发现数据过期时, 它将会执行一个计划外的合并。要控制这类合并的频率, 您可以设置 `merge_with_ttl_timeout`。如果该值被设置的太低, 它将引发大量计划外的合并,这可能会消耗大量资源。 -如果在合并的过程中执行 `SELECT` 查询, 则可能会得到过期的数据。为了避免这种情况,可以在 `SELECT` 之前使用 [OPTIMIZE](../../../engines/table-engines/mergetree-family/mergetree.md#misc_operations-optimize) 查询。 +如果在合并的过程中执行 `SELECT` 查询, 则可能会得到过期的数据。为了避免这种情况,可以在 `SELECT` 之前使用 [OPTIMIZE](../../../engines/table-engines/mergetree-family/mergetree.md#misc_operations-optimize) 。 -## 使用具有多个块的设备进行数据存储 {#table_engine-mergetree-multiple-volumes} +## 使用多个块设备进行数据存储 {#table_engine-mergetree-multiple-volumes} ### 介绍 {#introduction} -MergeTree 系列表引擎可以将数据存储在多块设备上。这对某些可以潜在被划分为“冷”“热”的表来说是很有用的。近期数据被定期的查询但只需要很小的空间。相反,详尽的历史数据很少被用到。如果有多块磁盘可用,那么“热”的数据可以放置在快速的磁盘上(比如 NVMe 固态硬盘或内存),“冷”的数据可以放在相对较慢的磁盘上(比如机械硬盘)。 +MergeTree 系列表引擎可以将数据存储在多个块设备上。这对某些可以潜在被划分为“冷”“热”的表来说是很有用的。最新数据被定期的查询但只需要很小的空间。相反,详尽的历史数据很少被用到。如果有多块磁盘可用,那么“热”的数据可以放置在快速的磁盘上(比如 NVMe 固态硬盘或内存),“冷”的数据可以放在相对较慢的磁盘上(比如机械硬盘)。 数据片段是 `MergeTree` 引擎表的最小可移动单元。属于同一个数据片段的数据被存储在同一块磁盘上。数据片段会在后台自动的在磁盘间移动,也可以通过 [ALTER](../../../sql-reference/statements/alter.md#alter_move-partition) 查询来移动。 @@ -497,12 +540,14 @@ MergeTree 系列表引擎可以将数据存储在多块设备上。这对某些 - 磁盘 — 挂载到文件系统的块设备 - 默认磁盘 — 在服务器设置中通过 [path](../../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-path) 参数指定的数据存储 -- 卷 — 磁盘的等效有序集合 (类似于 [JBOD](https://en.wikipedia.org/wiki/Non-RAID_drive_architectures)) +- 卷 — 相同磁盘的顺序列表 (类似于 [JBOD](https://en.wikipedia.org/wiki/Non-RAID_drive_architectures)) - 存储策略 — 卷的集合及他们之间的数据移动规则 + 以上名称的信息在Clickhouse中系统表[system.storage_policies](https://clickhouse.tech/docs/zh/operations/system-tables/storage_policies/#system_tables-storage_policies)和[system.disks](https://clickhouse.tech/docs/zh/operations/system-tables/disks/#system_tables-disks)体现。为了应用存储策略,可以在建表时使用`storage_policy`设置。 + ### 配置 {#table_engine-mergetree-multiple-volumes_configure} -磁盘、卷和存储策略应当在主文件 `config.xml` 或 `config.d` 目录中的独立文件中的 `` 标签内定义。 +磁盘、卷和存储策略应当在主配置文件 `config.xml` 或 `config.d` 目录中的独立文件中的 `` 标签内定义。 配置结构: @@ -530,9 +575,9 @@ MergeTree 系列表引擎可以将数据存储在多块设备上。这对某些 标签: -- `` — 磁盘名,名称必须与其他磁盘不同. -- `path` — 服务器将用来存储数据 (`data` 和 `shadow` 目录) 的路径, 应当以 ‘/’ 结尾. -- `keep_free_space_bytes` — 需要保留的剩余磁盘空间. +- `` — 磁盘名,名称必须与其他磁盘不同. +- `path` — 服务器将用来存储数据 (`data` 和 `shadow` 目录) 的路径, 应当以 ‘/’ 结尾. +- `keep_free_space_bytes` — 需要保留的剩余磁盘空间. 磁盘定义的顺序无关紧要。 @@ -567,11 +612,12 @@ MergeTree 系列表引擎可以将数据存储在多块设备上。这对某些 标签: -- `policy_name_N` — 策略名称,不能重复。 -- `volume_name_N` — 卷名称,不能重复。 -- `disk` — 卷中的磁盘。 -- `max_data_part_size_bytes` — 任意卷上的磁盘可以存储的数据片段的最大大小。 -- `move_factor` — 当可用空间少于这个因子时,数据将自动的向下一个卷(如果有的话)移动 (默认值为 0.1)。 +- `policy_name_N` — 策略名称,不能重复。 +- `volume_name_N` — 卷名称,不能重复。 +- `disk` — 卷中的磁盘。 +- `max_data_part_size_bytes` — 卷中的磁盘可以存储的数据片段的最大大小。 +- `move_factor` — 当可用空间少于这个因子时,数据将自动的向下一个卷(如果有的话)移动 (默认值为 0.1)。 +- `prefer_not_to_merge` - 禁止在这个卷中进行数据合并。该选项启用时,对该卷的数据不能进行合并。这个选项主要用于慢速磁盘。 配置示例: @@ -600,19 +646,31 @@ MergeTree 系列表引擎可以将数据存储在多块设备上。这对某些 0.2 + + + +
+ jbod1 +
+ + external + true + +
+
...
``` -在给出的例子中, `hdd_in_order` 策略实现了 [循环制](https://zh.wikipedia.org/wiki/循环制) 方法。因此这个策略只定义了一个卷(`single`),数据片段会以循环的顺序全部存储到它的磁盘上。当有多个类似的磁盘挂载到系统上,但没有配置 RAID 时,这种策略非常有用。请注意一个每个独立的磁盘驱动都并不可靠,你可能需要用 3 或更大的复制因此来补偿它。 +在给出的例子中, `hdd_in_order` 策略实现了 [循环制](https://zh.wikipedia.org/wiki/循环制) 方法。因此这个策略只定义了一个卷(`single`),数据片段会以循环的顺序全部存储到它的磁盘上。当有多个类似的磁盘挂载到系统上,但没有配置 RAID 时,这种策略非常有用。请注意一个每个独立的磁盘驱动都并不可靠,您可能需要用3份或更多的复制份数来补偿它。 如果在系统中有不同类型的磁盘可用,可以使用 `moving_from_ssd_to_hdd`。`hot` 卷由 SSD 磁盘(`fast_ssd`)组成,这个卷上可以存储的数据片段的最大大小为 1GB。所有大于 1GB 的数据片段都会被直接存储到 `cold` 卷上,`cold` 卷包含一个名为 `disk1` 的 HDD 磁盘。 同样,一旦 `fast_ssd` 被填充超过 80%,数据会通过后台进程向 `disk1` 进行转移。 存储策略中卷的枚举顺序是很重要的。因为当一个卷被充满时,数据会向下一个卷转移。磁盘的枚举顺序同样重要,因为数据是依次存储在磁盘上的。 -在创建表时,可以将一个配置好的策略应用到表: +在创建表时,可以应用存储策略: ``` sql CREATE TABLE table_with_non_default_policy ( @@ -626,7 +684,7 @@ PARTITION BY toYYYYMM(EventDate) SETTINGS storage_policy = 'moving_from_ssd_to_hdd' ``` -`default` 存储策略意味着只使用一个卷,这个卷只包含一个在 `` 中定义的磁盘。表创建后,它的存储策略就不能改变了。 +`default` 存储策略意味着只使用一个卷,这个卷只包含一个在 `` 中定义的磁盘。您可以使用[ALTER TABLE ... MODIFY SETTING]来修改存储策略,新的存储策略应该包含所有以前的磁盘和卷,并使用相同的名称。 可以通过 [background_move_pool_size](../../../operations/settings/settings.md#background_move_pool_size) 设置调整执行后台任务的线程数。 @@ -634,24 +692,121 @@ SETTINGS storage_policy = 'moving_from_ssd_to_hdd' 对于 `MergeTree` 表,数据通过以下不同的方式写入到磁盘当中: -- 作为插入(`INSERT`查询)的结果 -- 在后台合并和[数据变异](../../../sql-reference/statements/alter.md#alter-mutations)期间 -- 当从另一个副本下载时 -- 作为 [ALTER TABLE … FREEZE PARTITION](../../../sql-reference/statements/alter.md#alter_freeze-partition) 冻结分区的结果 +- 插入(`INSERT`查询) +- 后台合并和[数据变异](../../../sql-reference/statements/alter.md#alter-mutations) +- 从另一个副本下载 +- [ALTER TABLE … FREEZE PARTITION](../../../sql-reference/statements/alter.md#alter_freeze-partition) 冻结分区 除了数据变异和冻结分区以外的情况下,数据按照以下逻辑存储到卷或磁盘上: -1. 首个卷(按定义顺序)拥有足够的磁盘空间存储数据片段(`unreserved_space > current_part_size`)并且允许存储给定数据片段的大小(`max_data_part_size_bytes > current_part_size`) -2. 在这个数据卷内,紧挨着先前存储数据的那块磁盘之后的磁盘,拥有比数据片段大的剩余空间。(`unreserved_space - keep_free_space_bytes > current_part_size`) +1. 首个卷(按定义顺序)拥有足够的磁盘空间存储数据片段(`unreserved_space > current_part_size`)并且允许存储给定数据片段的大小(`max_data_part_size_bytes > current_part_size`) +2. 在这个数据卷内,紧挨着先前存储数据的那块磁盘之后的磁盘,拥有比数据片段大的剩余空间。(`unreserved_space - keep_free_space_bytes > current_part_size`) -更进一步,数据变异和分区冻结使用的是 [硬链接](https://en.wikipedia.org/wiki/Hard_link)。不同磁盘之间的硬链接是不支持的,所以在这种情况下数据片段都会被存储到初始化的那一块磁盘上。 +更进一步,数据变异和分区冻结使用的是 [硬链接](https://en.wikipedia.org/wiki/Hard_link)。不同磁盘之间的硬链接是不支持的,所以在这种情况下数据片段都会被存储到原来的那一块磁盘上。 -在后台,数据片段基于剩余空间(`move_factor`参数)根据卷在配置文件中定义的顺序进行转移。数据永远不会从最后一个移出也不会从第一个移入。可以通过系统表 [system.part_log](../../../operations/system-tables/part_log.md#system_tables-part-log) (字段 `type = MOVE_PART`) 和 [system.parts](../../../operations/system-tables/parts.md#system_tables-parts) (字段 `path` 和 `disk`) 来监控后台的移动情况。同时,具体细节可以通过服务器日志查看。 +在后台,数据片段基于剩余空间(`move_factor`参数)根据卷在配置文件中定义的顺序进行转移。数据永远不会从最后一个移出也不会从第一个移入。可以通过系统表 [system.part_log](../../../operations/system-tables/part_log.md#system_tables-part-log) (字段 `type = MOVE_PART`) 和 [system.parts](../../../operations/system-tables/parts.md#system_tables-parts) (字段 `path` 和 `disk`) 来监控后台的移动情况。具体细节可以通过服务器日志查看。 用户可以通过 [ALTER TABLE … MOVE PART\|PARTITION … TO VOLUME\|DISK …](../../../sql-reference/statements/alter.md#alter_move-partition) 强制移动一个数据片段或分区到另外一个卷,所有后台移动的限制都会被考虑在内。这个查询会自行启动,无需等待后台操作完成。如果没有足够的可用空间或任何必须条件没有被满足,用户会收到报错信息。 数据移动不会妨碍到数据复制。也就是说,同一张表的不同副本可以指定不同的存储策略。 -在后台合并和数据变异之后,就的数据片段会在一定时间后被移除 (`old_parts_lifetime`)。在这期间,他们不能被移动到其他的卷或磁盘。也就是说,直到数据片段被完全移除,它们仍然会被磁盘占用空间计算在内。 +在后台合并和数据变异之后,旧的数据片段会在一定时间后被移除 (`old_parts_lifetime`)。在这期间,他们不能被移动到其他的卷或磁盘。也就是说,直到数据片段被完全移除,它们仍然会被磁盘占用空间计算在内。 + +## 使用S3进行数据存储 {#using-s3-data-storage} + +`MergeTree`系列表引擎允许使用[S3](https://aws.amazon.com/s3/)存储数据,需要修改磁盘类型为`S3`。 + +示例配置: + +``` xml + + ... + + + s3 + https://storage.yandexcloud.net/my-bucket/root-path/ + your_access_key_id + your_secret_access_key + + your_base64_encoded_customer_key + + http://proxy1 + http://proxy2 + + 10000 + 5000 + 10 + 4 + 1000 + /var/lib/clickhouse/disks/s3/ + true + /var/lib/clickhouse/disks/s3/cache/ + false + + + ... + +``` + +必须的参数: + +- `endpoint` - S3的结点URL,以`path`或`virtual hosted`[格式](https://docs.aws.amazon.com/AmazonS3/latest/dev/VirtualHosting.html)书写。 +- `access_key_id` - S3的Access Key ID。 +- `secret_access_key` - S3的Secret Access Key。 + +可选参数: + +- `region` - S3的区域名称 +- `use_environment_credentials` - 从环境变量AWS_ACCESS_KEY_ID、AWS_SECRET_ACCESS_KEY和AWS_SESSION_TOKEN中读取认证参数。默认值为`false`。 +- `use_insecure_imds_request` - 如果设置为`true`,S3客户端在认证时会使用不安全的IMDS请求。默认值为`false`。 +- `proxy` - 访问S3结点URL时代理设置。每一个`uri`项的值都应该是合法的代理URL。 +- `connect_timeout_ms` - Socket连接超时时间,默认值为`10000`,即10秒。 +- `request_timeout_ms` - 请求超时时间,默认值为`5000`,即5秒。 +- `retry_attempts` - 请求失败后的重试次数,默认值为10。 +- `single_read_retries` - 读过程中连接丢失后重试次数,默认值为4。 +- `min_bytes_for_seek` - 使用查找操作,而不是顺序读操作的最小字节数,默认值为1000。 +- `metadata_path` - 本地存放S3元数据文件的路径,默认值为`/var/lib/clickhouse/disks//` +- `cache_enabled` - 是否允许缓存标记和索引文件。默认值为`true`。 +- `cache_path` - 本地缓存标记和索引文件的路径。默认值为`/var/lib/clickhouse/disks//cache/`。 +- `skip_access_check` - 如果为`true`,Clickhouse启动时不检查磁盘是否可用。默认为`false`。 +- `server_side_encryption_customer_key_base64` - 如果指定该项的值,请求时会加上为了访问SSE-C加密数据而必须的头信息。 + +S3磁盘也可以设置冷热存储: +```xml + + ... + + + s3 + https://storage.yandexcloud.net/my-bucket/root-path/ + your_access_key_id + your_secret_access_key + + + + + +
+ s3 +
+
+
+ + +
+ default +
+ + s3 + +
+ 0.2 +
+
+ ... +
+``` + +指定了`cold`选项后,本地磁盘剩余空间如果小于`move_factor * disk_size`,或有TTL设置时,数据就会定时迁移至S3了。 [原始文章](https://clickhouse.tech/docs/en/operations/table_engines/mergetree/) diff --git a/docs/zh/operations/system-tables/asynchronous_metric_log.md b/docs/zh/operations/system-tables/asynchronous_metric_log.md index 9fbe15b8507..ff7593768d3 100644 --- a/docs/zh/operations/system-tables/asynchronous_metric_log.md +++ b/docs/zh/operations/system-tables/asynchronous_metric_log.md @@ -3,6 +3,6 @@ machine_translated: true machine_translated_rev: 5decc73b5dc60054f19087d3690c4eb99446a6c3 --- -## 系统。asynchronous_metric_log {#system-tables-async-log} +## system.asynchronous_metric_log {#system-tables-async-log} 包含以下内容的历史值 `system.asynchronous_log` (见 [系统。asynchronous_metrics](../../operations/system-tables/asynchronous_metrics.md#system_tables-asynchronous_metrics)) diff --git a/docs/zh/operations/system-tables/asynchronous_metrics.md b/docs/zh/operations/system-tables/asynchronous_metrics.md index 805477c9f47..5a302f6da7b 100644 --- a/docs/zh/operations/system-tables/asynchronous_metrics.md +++ b/docs/zh/operations/system-tables/asynchronous_metrics.md @@ -3,14 +3,14 @@ machine_translated: true machine_translated_rev: 5decc73b5dc60054f19087d3690c4eb99446a6c3 --- -# 系统。asynchronous_metrics {#system_tables-asynchronous_metrics} +# system.asynchronous_metrics {#system_tables-asynchronous_metrics} 包含在后台定期计算的指标。 例如,在使用的RAM量。 列: -- `metric` ([字符串](../../sql-reference/data-types/string.md)) — Metric name. -- `value` ([Float64](../../sql-reference/data-types/float.md)) — Metric value. +- `metric` ([字符串](../../sql-reference/data-types/string.md)) — 指标名。 +- `value` ([Float64](../../sql-reference/data-types/float.md)) — 指标值。 **示例** @@ -34,8 +34,7 @@ SELECT * FROM system.asynchronous_metrics LIMIT 10 ``` **另请参阅** - -- [监测](../../operations/monitoring.md) — Base concepts of ClickHouse monitoring. -- [系统。指标](../../operations/system-tables/metrics.md#system_tables-metrics) — Contains instantly calculated metrics. -- [系统。活动](../../operations/system-tables/events.md#system_tables-events) — Contains a number of events that have occurred. -- [系统。metric_log](../../operations/system-tables/metric_log.md#system_tables-metric_log) — Contains a history of metrics values from tables `system.metrics` и `system.events`. +- [监测](../../operations/monitoring.md) — ClickHouse监控的基本概念。 +- [系统。指标](../../operations/system-tables/metrics.md#system_tables-metrics) — 包含即时计算的指标。 +- [系统。活动](../../operations/system-tables/events.md#system_tables-events) — 包含出现的事件的次数。 +- [系统。metric\_log](../../operations/system-tables/metric_log.md#system_tables-metric_log) — 包含`system.metrics` 和 `system.events`表中的指标的历史值。 diff --git a/docs/zh/operations/system-tables/clusters.md b/docs/zh/operations/system-tables/clusters.md index 1e5935c276e..bcafff4970a 100644 --- a/docs/zh/operations/system-tables/clusters.md +++ b/docs/zh/operations/system-tables/clusters.md @@ -1,24 +1,20 @@ ---- -machine_translated: true -machine_translated_rev: 5decc73b5dc60054f19087d3690c4eb99446a6c3 ---- - -# 系统。集群 {#system-clusters} +# system.clusters{#system-clusters} 包含有关配置文件中可用的集群及其中的服务器的信息。 列: -- `cluster` (String) — The cluster name. -- `shard_num` (UInt32) — The shard number in the cluster, starting from 1. -- `shard_weight` (UInt32) — The relative weight of the shard when writing data. -- `replica_num` (UInt32) — The replica number in the shard, starting from 1. -- `host_name` (String) — The host name, as specified in the config. -- `host_address` (String) — The host IP address obtained from DNS. -- `port` (UInt16) — The port to use for connecting to the server. -- `user` (String) — The name of the user for connecting to the server. -- `errors_count` (UInt32)-此主机无法到达副本的次数。 -- `estimated_recovery_time` (UInt32)-剩下的秒数,直到副本错误计数归零,它被认为是恢复正常。 +- `cluster` (String) — 集群名。 +- `shard_num` (UInt32) — 集群中的分片数,从1开始。 +- `shard_weight` (UInt32) — 写数据时该分片的相对权重。 +- `replica_num` (UInt32) — 分片的副本数量,从1开始。 +- `host_name` (String) — 配置中指定的主机名。 +- `host_address` (String) — 从DNS获取的主机IP地址。 +- `port` (UInt16) — 连接到服务器的端口。 +- `user` (String) — 连接到服务器的用户名。 +- `errors_count` (UInt32) - 此主机无法访问副本的次数。 +- `slowdowns_count` (UInt32) - 与对冲请求建立连接时导致更改副本的减速次数。 +- `estimated_recovery_time` (UInt32) - 剩下的秒数,直到副本错误计数归零并被视为恢复正常。 请注意 `errors_count` 每个查询集群更新一次,但 `estimated_recovery_time` 按需重新计算。 所以有可能是非零的情况 `errors_count` 和零 `estimated_recovery_time`,下一个查询将为零 `errors_count` 并尝试使用副本,就好像它没有错误。 @@ -27,3 +23,5 @@ machine_translated_rev: 5decc73b5dc60054f19087d3690c4eb99446a6c3 - [表引擎分布式](../../engines/table-engines/special/distributed.md) - [distributed_replica_error_cap设置](../../operations/settings/settings.md#settings-distributed_replica_error_cap) - [distributed_replica_error_half_life设置](../../operations/settings/settings.md#settings-distributed_replica_error_half_life) + +[原文](https://clickhouse.tech/docs/zh/operations/system-tables/clusters) diff --git a/docs/zh/operations/system-tables/columns.md b/docs/zh/operations/system-tables/columns.md index 24296dc715c..b21be98c0dc 100644 --- a/docs/zh/operations/system-tables/columns.md +++ b/docs/zh/operations/system-tables/columns.md @@ -25,3 +25,5 @@ machine_translated_rev: 5decc73b5dc60054f19087d3690c4eb99446a6c3 - `is_in_sorting_key` (UInt8) — Flag that indicates whether the column is in the sorting key expression. - `is_in_primary_key` (UInt8) — Flag that indicates whether the column is in the primary key expression. - `is_in_sampling_key` (UInt8) — Flag that indicates whether the column is in the sampling key expression. + +[原文](https://clickhouse.tech/docs/zh/operations/system-tables/columns) diff --git a/docs/zh/operations/system-tables/tables.md b/docs/zh/operations/system-tables/tables.md index a690e938a3a..0c3e913b9bb 100644 --- a/docs/zh/operations/system-tables/tables.md +++ b/docs/zh/operations/system-tables/tables.md @@ -5,15 +5,15 @@ machine_translated_rev: 5decc73b5dc60054f19087d3690c4eb99446a6c3 # 系统。表 {#system-tables} -包含服务器知道的每个表的元数据。 分离的表不显示在 `system.tables`. +包含服务器知道的每个表的元数据。 分离的表不显示在 `system.tables`。 此表包含以下列(列类型显示在括号中): -- `database` (String) — The name of the database the table is in. +- `database` (String) — 表所在的数据库表名。 -- `name` (String) — Table name. +- `name` (String) — 表名。 -- `engine` (String) — Table engine name (without parameters). +- `engine` (String) — 表引擎名 (不包含参数)。 - `is_temporary` (UInt8)-指示表是否是临时的标志。 @@ -23,11 +23,11 @@ machine_translated_rev: 5decc73b5dc60054f19087d3690c4eb99446a6c3 - `metadata_modification_time` (DateTime)-表元数据的最新修改时间。 -- `dependencies_database` (数组(字符串))-数据库依赖关系. +- `dependencies_database` (数组(字符串))-数据库依赖关系。 - `dependencies_table` (数组(字符串))-表依赖关系 ([MaterializedView](../../engines/table-engines/special/materializedview.md) 基于当前表的表)。 -- `create_table_query` (String)-用于创建表的查询。 +- `create_table_query` (String)-用于创建表的SQL语句。 - `engine_full` (String)-表引擎的参数。 @@ -44,11 +44,15 @@ machine_translated_rev: 5decc73b5dc60054f19087d3690c4eb99446a6c3 - [MergeTree](../../engines/table-engines/mergetree-family/mergetree.md#table_engine-mergetree-multiple-volumes) - [分布](../../engines/table-engines/special/distributed.md#distributed) -- `total_rows` (Nullable(UInt64))-总行数,如果可以快速确定表中的确切行数,否则 `Null` (包括内衣 `Buffer` 表)。 +- `total_rows` (Nullable(UInt64))-总行数,如果可以快速确定表中的确切行数,否则行数为`Null`(包括底层 `Buffer` 表)。 -- `total_bytes` (Nullable(UInt64))-总字节数,如果可以快速确定存储表的确切字节数,否则 `Null` (**不** 包括任何底层存储)。 +- `total_bytes` (Nullable(UInt64))-总字节数,如果可以快速确定存储表的确切字节数,否则字节数为`Null` (即**不** 包括任何底层存储)。 - - If the table stores data on disk, returns used space on disk (i.e. compressed). - - 如果表在内存中存储数据,返回在内存中使用的近似字节数. + - 如果表将数据存在磁盘上,返回实际使用的磁盘空间(压缩后)。 + - 如果表在内存中存储数据,返回在内存中使用的近似字节数。 -该 `system.tables` 表中使用 `SHOW TABLES` 查询实现。 +- `lifetime_rows` (Nullbale(UInt64))-服务启动后插入的总行数(只针对`Buffer`表)。 + +`system.tables` 表被用于 `SHOW TABLES` 的查询实现中。 + +[原文](https://clickhouse.tech/docs/zh/operations/system-tables/tables) diff --git a/docs/zh/operations/system-tables/zookeeper.md b/docs/zh/operations/system-tables/zookeeper.md index f7e816ccee6..ca767fba7aa 100644 --- a/docs/zh/operations/system-tables/zookeeper.md +++ b/docs/zh/operations/system-tables/zookeeper.md @@ -3,13 +3,13 @@ machine_translated: true machine_translated_rev: 5decc73b5dc60054f19087d3690c4eb99446a6c3 --- -# 系统。动物园管理员 {#system-zookeeper} +# system.zookeeper {#system-zookeeper} -如果未配置ZooKeeper,则表不存在。 允许从配置中定义的ZooKeeper集群读取数据。 -查询必须具有 ‘path’ WHERE子句中的相等条件或者在某个集合中的条件。 这是ZooKeeper中您想要获取数据的孩子的路径。 +如果未配置ZooKeeper,则该表不存在。 允许从配置中定义的ZooKeeper集群读取数据。 +查询必须具有 ‘path’ WHERE子句中的相等条件。 这是ZooKeeper中您想要获取数据的子路径。 -查询 `SELECT * FROM system.zookeeper WHERE path = '/clickhouse'` 输出对所有孩子的数据 `/clickhouse` 节点。 -要输出所有根节点的数据,write path= ‘/’. +查询 `SELECT * FROM system.zookeeper WHERE path = '/clickhouse'` 输出`/clickhouse`节点的对所有子路径的数据。 +要输出所有根节点的数据,使用path= ‘/’. 如果在指定的路径 ‘path’ 不存在,将引发异常。 查询`SELECT * FROM system.zookeeper WHERE path IN ('/', '/clickhouse')` 输出`/` 和 `/clickhouse`节点上所有子节点的数据。 @@ -18,20 +18,20 @@ machine_translated_rev: 5decc73b5dc60054f19087d3690c4eb99446a6c3 列: -- `name` (String) — The name of the node. -- `path` (String) — The path to the node. -- `value` (String) — Node value. -- `dataLength` (Int32) — Size of the value. -- `numChildren` (Int32) — Number of descendants. -- `czxid` (Int64) — ID of the transaction that created the node. -- `mzxid` (Int64) — ID of the transaction that last changed the node. -- `pzxid` (Int64) — ID of the transaction that last deleted or added descendants. -- `ctime` (DateTime) — Time of node creation. -- `mtime` (DateTime) — Time of the last modification of the node. -- `version` (Int32) — Node version: the number of times the node was changed. -- `cversion` (Int32) — Number of added or removed descendants. -- `aversion` (Int32) — Number of changes to the ACL. -- `ephemeralOwner` (Int64) — For ephemeral nodes, the ID of the session that owns this node. +- `name` (String) — 节点的名字。 +- `path` (String) — 节点的路径。 +- `value` (String) — 节点的值。 +- `dataLength` (Int32) — 节点的值长度。 +- `numChildren` (Int32) — 子节点的个数。 +- `czxid` (Int64) — 创建该节点的事务ID。 +- `mzxid` (Int64) — 最后修改该节点的事务ID。 +- `pzxid` (Int64) — 最后删除或者增加子节点的事务ID。 +- `ctime` (DateTime) — 节点的创建时间。 +- `mtime` (DateTime) — 节点的最后修改时间。 +- `version` (Int32) — 节点版本:节点被修改的次数。 +- `cversion` (Int32) — 增加或删除子节点的个数。 +- `aversion` (Int32) — ACL的修改次数。 +- `ephemeralOwner` (Int64) — 针对临时节点,拥有该节点的事务ID。 示例: @@ -77,3 +77,4 @@ numChildren: 7 pzxid: 987021252247 path: /clickhouse/tables/01-08/visits/replicas ``` +[原文](https://clickhouse.tech/docs/zh/operations/system-tables/zookeeper) diff --git a/docs/zh/sql-reference/statements/alter.md b/docs/zh/sql-reference/statements/alter.md index 446feac96ce..4d1cdca71e5 100644 --- a/docs/zh/sql-reference/statements/alter.md +++ b/docs/zh/sql-reference/statements/alter.md @@ -174,7 +174,7 @@ MODIFY ORDER BY new_expression 该操作仅支持 [`MergeTree`](../../engines/table-engines/mergetree-family/mergetree.md) 系列表 (含 [replicated](../../engines/table-engines/mergetree-family/replication.md) 表)。 下列操作是允许的: -- `ALTER TABLE [db].name ADD INDEX name expression TYPE type GRANULARITY value AFTER name [AFTER name2]` - 在表的元数据中增加索引说明 +- `ALTER TABLE [db].name ADD INDEX name expression TYPE type GRANULARITY value [FIRST|AFTER name]` - 在表的元数据中增加索引说明 - `ALTER TABLE [db].name DROP INDEX name` - 从表的元数据中删除索引描述,并从磁盘上删除索引文件 diff --git a/docs/zh/sql-reference/syntax.md b/docs/zh/sql-reference/syntax.md index c05c5a1a7bf..644dc646726 100644 --- a/docs/zh/sql-reference/syntax.md +++ b/docs/zh/sql-reference/syntax.md @@ -1,39 +1,42 @@ --- toc_priority: 31 toc_title: SQL语法 + --- # SQL语法 {#syntax} -CH有2类解析器:完整SQL解析器(递归式解析器),以及数据格式解析器(快速流式解析器) +ClickHouse有2类解析器:完整SQL解析器(递归式解析器),以及数据格式解析器(快速流式解析器) 除了 `INSERT` 查询,其它情况下仅使用完整SQL解析器。 `INSERT`查询会同时使用2种解析器: + ``` sql INSERT INTO t VALUES (1, 'Hello, world'), (2, 'abc'), (3, 'def') ``` -含`INSERT INTO t VALUES` 的部分由完整SQL解析器处理,包含数据的部分 `(1, 'Hello, world'), (2, 'abc'), (3, 'def')` 交给快速流式解析器解析。通过设置参数 [input_format_values_interpret_expressions](../operations/settings/settings.md#settings-input_format_values_interpret_expressions),你也可以对数据部分开启完整SQL解析器。当 `input_format_values_interpret_expressions = 1` 时,CH优先采用快速流式解析器来解析数据。如果失败,CH再尝试用完整SQL解析器来处理,就像处理SQL [expression](#syntax-expressions) 一样。 +含`INSERT INTO t VALUES` 的部分由完整SQL解析器处理,包含数据的部分 `(1, 'Hello, world'), (2, 'abc'), (3, 'def')` 交给快速流式解析器解析。通过设置参数 [input_format_values_interpret_expressions](../operations/settings/settings.md#settings-input_format_values_interpret_expressions),你也可以对数据部分开启完整SQL解析器。当 `input_format_values_interpret_expressions = 1` 时,ClickHouse优先采用快速流式解析器来解析数据。如果失败,ClickHouse再尝试用完整SQL解析器来处理,就像处理SQL [expression](#syntax-expressions) 一样。 数据可以采用任何格式。当CH接收到请求时,服务端先在内存中计算不超过 [max_query_size](../operations/settings/settings.md#settings-max_query_size) 字节的请求数据(默认1 mb),然后剩下部分交给快速流式解析器。 -这将避免在处理大型的 `INSERT`语句时出现问题。 +当 `INSERT` 语句中使用 `Values` 格式时,看起来数据部分的解析和解析`SELECT` 中的表达式相同,但并不是这样的。 `Values` 格式有非常多的限制。 -当 `INSERT` 语句中使用 `Values` 形式时,看起来 数据部分的解析和解析`SELECT` 中的表达式相同,但并不是这样的。 `Values` 形式非常有限。 -该篇的剩余部分涵盖了完整SQL解析器。关于格式解析的更多信息,参见 [Formats](../interfaces/formats.md) 章节。 +本文的剩余部分涵盖了完整SQL解析器。关于格式解析的更多信息,参见 [Formats](../interfaces/formats.md) 章节。 -## 空字符 {#spaces} +## 空白{#spaces} -sql语句中(包含sql的起始和结束)可以有任意的空字符,这些空字符类型包括:空格字符,tab制表符,换行符,CR符,换页符等。 +sql语句的语法结构部分之间(标识符之间、部分符号之间、包括sql的起始和结束)可以有任意的空白字符,这些空字符类型包括:空格字符,tab制表符,换行符,CR符,换页符等。 ## 注释 {#comments} -CH支持SQL风格或C语言风格的注释: +ClickHouse支持SQL风格或C语言风格的注释: + - SQL风格的注释以 `--` 开始,直到行末,`--` 后紧跟的空格可以忽略 -- C语言风格的注释以 `/*` 开始,以 `*/` 结束,支持多行形式,同样可以省略 `/*` 后的空格 +- C语言风格的注释以 `/*` 开始,以 `*/` 结束,可以跨行,同样可以省略 `/*` 后的空格 ## 关键字 {#syntax-keywords} 以下场景的关键字是大小写不敏感的: + - 标准SQL。例如,`SELECT`, `select` 和 `SeLeCt` 都是允许的 - 在某些流行的RDBMS中被实现的关键字,例如,`DateTime` 和 `datetime`是一样的 @@ -41,38 +44,36 @@ CH支持SQL风格或C语言风格的注释: 你可以在系统表 [system.data_type_families](../operations/system-tables/data_type_families.md#system_tables-data_type_families) 中检查某个数据类型的名称是否是大小写敏感型。 和标准SQL相反,所有其它的关键字都是 **大小写敏感的**,包括函数名称。 -In contrast to standard SQL, all other keywords (including functions names) are **case-sensitive**. -关键字不是保留的;它们仅在相应的上下文中才会被处理。如果你使用和关键字同名的 [变量名](#syntax-identifiers) ,需要使用双引号或转移符将它们包含起来。例如:如果表 `table_name` 包含列 `"FROM"`,那么 `SELECT "FROM" FROM table_name` 是合法的 +关键字不是保留的;它们仅在相应的上下文中才会被认为是关键字。如果你使用和关键字同名的 [标识符](#syntax-identifiers) ,需要使用双引号或反引号将它们包含起来。例如:如果表 `table_name` 包含列 `"FROM"`,那么 `SELECT "FROM" FROM table_name` 是合法的 -## 变量名 {#syntax-identifiers} +## 标识符 {#syntax-identifiers} -变量包括: -Identifiers are: +标识符包括: -- 集群,数据库,表,分区,列名称 +- 集群、数据库、表、分区、列的名称 - 函数 - 数据类型 -- 表达式别名 +- [表达式别名](https://clickhouse.tech/docs/zh/sql-reference/syntax/#syntax-expression_aliases) -变量名可以使用反引号包含起来 +变量名可以被括起或不括起,后者是推荐做法。 -没有使用反引号包含的变量名,必须匹配正则表达式 `^[a-zA-Z_][0-9a-zA-Z_]*$`,并且不能和 [关键字]相同 +没有括起的变量名,必须匹配正则表达式 `^[a-zA-Z_][0-9a-zA-Z_]*$`,并且不能和 [关键字](#syntax-keywords)相同,合法的标识符名称:`x`,`_1`,`X_y__Z123_`等。 -如果想使用和关键字同名的变量名称,或者在变量名称中包含其它符号,你需要通过双引号或转义符号,例如: `"id"`, `` `id` `` +如果想使用和关键字同名的变量名称,或者在变量名称中包含其它符号,你需要通过双引号或反引号,例如: `"id"`, `` `id` `` ## 字符 {#literals} -CH包含数字,字母,括号,NULL值等字符 +字符包含数字,字母,括号,NULL值等字符。 ### 数字 {#numeric} 数字类型字符会被做如下解析: -- 首先,当做64位的有符号整数,使用该函数 [strtoull](https://en.cppreference.com/w/cpp/string/byte/strtoul) + +- 首先,当做64位的有符号整数,使用函数 [strtoull](https://en.cppreference.com/w/cpp/string/byte/strtoul) - 如果失败,解析成64位无符号整数,同样使用函数 [strtoull](https://en.cppreference.com/w/cpp/string/byte/strtoul) - 如果还失败了,试图解析成浮点型数值,使用函数 [strtod](https://en.cppreference.com/w/cpp/string/byte/strtof) -Numeric literal tries to be parsed: - 最后,以上情形都不符合时,返回异常 @@ -82,13 +83,14 @@ Numeric literal tries to be parsed: 例如: `1`, `18446744073709551615`, `0xDEADBEEF`, `01`, `0.1`, `1e100`, `-1e-100`, `inf`, `nan`. -### 字母 {#syntax-string-literal} -CH只支持用单引号包含的字母。特殊字符可通过反斜杠进行转义。下列转义字符都有相应的实际值: `\b`, `\f`, `\r`, `\n`, `\t`, `\0`, `\a`, `\v`, `\xHH`。其它情况下,以 `\c`形式出现的转义字符,当`c`表示任意字符时,转义字符会转换成`c`。这意味着你可以使用 `\'`和`\\`。该值将拥有[String](../sql-reference/data-types/string.md)类型。 +### 字符串 {#syntax-string-literal} + +ClickHouse只支持用单引号包含的字符串。特殊字符可通过反斜杠进行转义。下列转义字符都有相应的实际值: `\b`, `\f`, `\r`, `\n`, `\t`, `\0`, `\a`, `\v`, `\xHH`。其它情况下,以 `\c`形式出现的转义字符,当`c`表示任意字符时,转义字符会转换成`c`。这意味着你可以使用 `\'`和`\\`。该值将拥有[String](../sql-reference/data-types/string.md)类型。 在字符串中,你至少需要对 `'` 和 `\` 进行转义。单引号可以使用单引号转义,例如 `'It\'s'` 和 `'It''s'` 是相同的。 -### 括号 {#compound} +### 复合字符串 {#compound} 数组都是使用方括号进行构造 `[1, 2, 3]`,元组则使用圆括号 `(1, 'Hello, world!', 2)` 从技术上来讲,这些都不是字符串,而是包含创建数组和元组运算符的表达式。 @@ -97,17 +99,18 @@ CH只支持用单引号包含的字母。特殊字符可通过反斜杠进行转 ### NULL值 {#null-literal} -代表不存在的值 +代表不存在的值。 -为了能在表字段中存储NULL值,该字段必须声明为 [空值](../sql-reference/data-types/nullable.md) 类型 +为了能在表字段中存储NULL值,该字段必须声明为 [空值](../sql-reference/data-types/nullable.md) 类型。 根据数据的格式(输入或输出),NULL值有不同的表现形式。更多信息参见文档 [数据格式](../interfaces/formats.md#formats) -在处理 `NULL`时存在很多细微差别。例如,比较运算的至少一个参数为 `NULL` ,该结果也是 `NULL` 。与之类似的还有乘法运算, 加法运算,以及其它运算。更多信息,请参阅每种运算的文档部分。 +在处理 `NULL`时存在很多细微差别。例如,比较运算的至少一个参数为 `NULL` ,则该结果也是 `NULL` 。与之类似的还有乘法运算, 加法运算,以及其它运算。更多信息,请参阅每种运算的文档部分。 -在语句中,可以通过 [是否为NULL](operators/index.md#operator-is-null) 以及 [是否不为NULL](operators/index.md) 运算符,以及 `isNull` 、 `isNotNull` 函数来检查 `NULL` 值 +在语句中,可以通过 [IS NULL](operators/index.md#operator-is-null) 以及 [IS NOT NULL](operators/index.md) 运算符,以及 `isNull` 、 `isNotNull` 函数来检查 `NULL` 值 ## 函数 {#functions} -函数调用的写法,类似于变量并带有被圆括号包含的参数列表(可能为空)。与标准SQL不同,圆括号是必须的,不管参数列表是否为空。例如: `now()`。 + +函数调用的写法,类似于一个标识符后接被圆括号包含的参数列表(可能为空)。与标准SQL不同,圆括号是必须的,不管参数列表是否为空。例如: `now()`。 函数分为常规函数和聚合函数(参见“Aggregate functions”一章)。有些聚合函数包含2个参数列表,第一个参数列表中的参数被称为“parameters”。不包含“parameters”的聚合函数语法和常规函数是一样的。 @@ -116,12 +119,12 @@ CH只支持用单引号包含的字母。特殊字符可通过反斜杠进行转 在查询解析阶段,运算符会被转换成对应的函数,使用时请注意它们的优先级。例如: 表达式 `1 + 2 * 3 + 4` 会被解析成 `plus(plus(1, multiply(2, 3)), 4)`. - + ## 数据类型及数据库/表引擎 {#data_types-and-database-table-engines} `CREATE` 语句中的数据类型和表引擎写法与变量或函数类似。 -换句话说,它们可以用括号包含参数列表。更多信息,参见“数据类型,” “数据表引擎” 和 “CREATE语句”等章节 +换句话说,它们可以包含或不包含用括号包含的参数列表。更多信息,参见“数据类型,” “数据表引擎” 和 “CREATE语句”等章节 ## 表达式别名 {#syntax-expression_aliases} @@ -131,29 +134,30 @@ CH只支持用单引号包含的字母。特殊字符可通过反斜杠进行转 expr AS alias ``` -- `AS` — 用于定义别名的关键字。可以对表或select语句中的列定义别名(`AS` 可以省略) - 例如, `SELECT table_name_alias.column_name FROM table_name table_name_alias`. +- `AS` — 用于定义别名的关键字。可以对表或select语句中的列定义别名(`AS` 可以省略) + 例如, `SELECT table_name_alias.column_name FROM table_name table_name_alias`. - 在 [CAST函数](sql_reference/functions/type_conversion_functions.md#type_conversion_function-cast) 中,`AS`有其它含义。请参见该函数的说明部分。 + 在 [CAST函数](../sql-reference/functions/type-conversion-functions.md#type_conversion_function-cast) 中,`AS`有其它含义。请参见该函数的说明部分。 -- `expr` — 任意CH支持的表达式. +- `expr` — 任意CH支持的表达式. - 例如, `SELECT column_name * 2 AS double FROM some_table`. + 例如, `SELECT column_name * 2 AS double FROM some_table`. -- `alias` — `expr` 的名称。别名必须符合 [变量名]](#syntax-identifiers) 语法. +- `alias` — `expr` 的名称。别名必须符合 [标识符](#syntax-identifiers) 语法. - 例如, `SELECT "table t".column_name FROM table_name AS "table t"`. + 例如, `SELECT "table t".column_name FROM table_name AS "table t"`. ### 用法注意 {#notes-on-usage} 别名在当前查询或子查询中是全局可见的,你可以在查询语句的任何位置对表达式定义别名 -别名在当前查询的子查询及不同子查询中是不可见的。例如,执行如下查询SQL: `SELECT (SELECT sum(b.a) + num FROM b) - a.a AS num FROM a` ,CH会提示异常 `Unknown identifier: num`. +别名在当前查询的子查询及不同子查询中是不可见的。例如,执行如下查询SQL: `SELECT (SELECT sum(b.a) + num FROM b) - a.a AS num FROM a` ,ClickHouse会提示异常 `Unknown identifier: num`. 如果给select子查询语句的结果列定义其别名,那么在外层可以使用该别名。例如, `SELECT n + m FROM (SELECT 1 AS n, 2 AS m)`. 注意列的别名和表的别名相同时的情形,考虑如下示例: + ``` sql CREATE TABLE t ( @@ -175,7 +179,7 @@ Received exception from server (version 18.14.17): Code: 184. DB::Exception: Received from localhost:9000, 127.0.0.1. DB::Exception: Aggregate function sum(b) is found inside another aggregate function in query. ``` -在这个示例中,先声明了表 `t` 以及列 `b`。然后,在查询数据时,又定义了别名 `sum(b) AS b`。由于别名是全局的,CH使用表达式 `sum(b)` 来替换表达式 `argMax(a, b)` 中的变量 `b`。这种替换导致出现异常。 +在这个示例中,先声明了表 `t` 以及列 `b`。然后,在查询数据时,又定义了别名 `sum(b) AS b`。由于别名是全局的,ClickHouse使用表达式 `sum(b)` 来替换表达式 `argMax(a, b)` 中的变量 `b`。这种替换导致出现异常。 ## 星号 {#asterisk} @@ -184,7 +188,7 @@ select查询中,星号可以代替表达式使用。详情请参见“select ## 表达式 {#syntax-expressions} -表达式是函数、标识符、字符、运算符的应用程序、括号中的表达式、子查询或星号。它也可以包含别名。 +表达式是函数、标识符、字符、使用运算符的语句、括号中的表达式、子查询或星号。它也可以包含别名。 表达式列表是用逗号分隔的一个或多个表达式。 反过来,函数和运算符可以将表达式作为参数。 diff --git a/docs/zh/sql-reference/table-functions/s3.md b/docs/zh/sql-reference/table-functions/s3.md new file mode 100644 index 00000000000..c55412f4ddd --- /dev/null +++ b/docs/zh/sql-reference/table-functions/s3.md @@ -0,0 +1,132 @@ +--- +toc_priority: 45 +toc_title: s3 +--- + +# S3 表函数 {#s3-table-function} + +提供类似于表的接口来 select/insert [Amazon S3](https://aws.amazon.com/s3/)中的文件。这个表函数类似于[hdfs](../../sql-reference/table-functions/hdfs.md),但提供了 S3 特有的功能。 + +**语法** + +``` sql +s3(path, [aws_access_key_id, aws_secret_access_key,] format, structure, [compression]) +``` + +**参数** + +- `path` — 带有文件路径的 Bucket url。在只读模式下支持以下通配符: `*`, `?`, `{abc,def}` 和 `{N..M}` 其中 `N`, `M` 是数字, `'abc'`, `'def'` 是字符串. 更多信息见[下文](#wildcards-in-path). +- `format` — 文件的[格式](../../interfaces/formats.md#formats). +- `structure` — 表的结构. 格式像这样 `'column1_name column1_type, column2_name column2_type, ...'`. +- `compression` — 压缩类型. 支持的值: `none`, `gzip/gz`, `brotli/br`, `xz/LZMA`, `zstd/zst`. 参数是可选的. 默认情况下,通过文件扩展名自动检测压缩类型. + +**返回值** + +一个具有指定结构的表,用于读取或写入指定文件中的数据。 + +**示例** + +从 S3 文件`https://storage.yandexcloud.net/my-test-bucket-768/data.csv`中选择表格的前两行: + +``` sql +SELECT * +FROM s3('https://storage.yandexcloud.net/my-test-bucket-768/data.csv', 'CSV', 'column1 UInt32, column2 UInt32, column3 UInt32') +LIMIT 2; +``` + +``` text +┌─column1─┬─column2─┬─column3─┐ +│ 1 │ 2 │ 3 │ +│ 3 │ 2 │ 1 │ +└─────────┴─────────┴─────────┘ +``` + +类似的情况,但来源是`gzip`压缩的文件: + +``` sql +SELECT * +FROM s3('https://storage.yandexcloud.net/my-test-bucket-768/data.csv.gz', 'CSV', 'column1 UInt32, column2 UInt32, column3 UInt32', 'gzip') +LIMIT 2; +``` + +``` text +┌─column1─┬─column2─┬─column3─┐ +│ 1 │ 2 │ 3 │ +│ 3 │ 2 │ 1 │ +└─────────┴─────────┴─────────┘ +``` + +## 用法 {#usage-examples} + +假设我们在S3上有几个文件,URI如下: + +- 'https://storage.yandexcloud.net/my-test-bucket-768/some_prefix/some_file_1.csv' +- 'https://storage.yandexcloud.net/my-test-bucket-768/some_prefix/some_file_2.csv' +- 'https://storage.yandexcloud.net/my-test-bucket-768/some_prefix/some_file_3.csv' +- 'https://storage.yandexcloud.net/my-test-bucket-768/some_prefix/some_file_4.csv' +- 'https://storage.yandexcloud.net/my-test-bucket-768/another_prefix/some_file_1.csv' +- 'https://storage.yandexcloud.net/my-test-bucket-768/another_prefix/some_file_2.csv' +- 'https://storage.yandexcloud.net/my-test-bucket-768/another_prefix/some_file_3.csv' +- 'https://storage.yandexcloud.net/my-test-bucket-768/another_prefix/some_file_4.csv' + +计算以数字1至3结尾的文件的总行数: + +``` sql +SELECT count(*) +FROM s3('https://storage.yandexcloud.net/my-test-bucket-768/{some,another}_prefix/some_file_{1..3}.csv', 'CSV', 'name String, value UInt32') +``` + +``` text +┌─count()─┐ +│ 18 │ +└─────────┘ +``` + +计算这两个目录中所有文件的行的总量: + +``` sql +SELECT count(*) +FROM s3('https://storage.yandexcloud.net/my-test-bucket-768/{some,another}_prefix/*', 'CSV', 'name String, value UInt32') +``` + +``` text +┌─count()─┐ +│ 24 │ +└─────────┘ +``` + +!!! warning "Warning" + 如果文件列表中包含有从零开头的数字范围,请对每个数字分别使用带括号的结构,或者使用`?`。 + +计算名为 `file-000.csv`, `file-001.csv`, … , `file-999.csv` 文件的总行数: + +``` sql +SELECT count(*) +FROM s3('https://storage.yandexcloud.net/my-test-bucket-768/big_prefix/file-{000..999}.csv', 'CSV', 'name String, value UInt32'); +``` + +``` text +┌─count()─┐ +│ 12 │ +└─────────┘ +``` + +插入数据到 `test-data.csv.gz` 文件: + +``` sql +INSERT INTO FUNCTION s3('https://storage.yandexcloud.net/my-test-bucket-768/test-data.csv.gz', 'CSV', 'name String, value UInt32', 'gzip') +VALUES ('test-data', 1), ('test-data-2', 2); +``` + +从已有的表插入数据到 `test-data.csv.gz` 文件: + +``` sql +INSERT INTO FUNCTION s3('https://storage.yandexcloud.net/my-test-bucket-768/test-data.csv.gz', 'CSV', 'name String, value UInt32', 'gzip') +SELECT name, value FROM existing_table; +``` + +**另请参阅** + +- [S3 引擎](../../engines/table-engines/integrations/s3.md) + +[原始文章](https://clickhouse.tech/docs/en/sql-reference/table-functions/s3/) diff --git a/programs/benchmark/Benchmark.cpp b/programs/benchmark/Benchmark.cpp index c8f1a4eef47..859222c236e 100644 --- a/programs/benchmark/Benchmark.cpp +++ b/programs/benchmark/Benchmark.cpp @@ -37,6 +37,7 @@ #include #include + namespace fs = std::filesystem; /** A tool for evaluating ClickHouse performance. diff --git a/src/AggregateFunctions/AggregateFunctionQuantile.cpp b/src/AggregateFunctions/AggregateFunctionQuantile.cpp index cae0021082f..11b14585653 100644 --- a/src/AggregateFunctions/AggregateFunctionQuantile.cpp +++ b/src/AggregateFunctions/AggregateFunctionQuantile.cpp @@ -125,44 +125,47 @@ AggregateFunctionPtr createAggregateFunctionQuantile( void registerAggregateFunctionsQuantile(AggregateFunctionFactory & factory) { + /// For aggregate functions returning array we cannot return NULL on empty set. + AggregateFunctionProperties properties = { .returns_default_when_only_null = true }; + factory.registerFunction(NameQuantile::name, createAggregateFunctionQuantile); - factory.registerFunction(NameQuantiles::name, createAggregateFunctionQuantile); + factory.registerFunction(NameQuantiles::name, { createAggregateFunctionQuantile, properties }); factory.registerFunction(NameQuantileDeterministic::name, createAggregateFunctionQuantile); - factory.registerFunction(NameQuantilesDeterministic::name, createAggregateFunctionQuantile); + factory.registerFunction(NameQuantilesDeterministic::name, { createAggregateFunctionQuantile, properties }); factory.registerFunction(NameQuantileExact::name, createAggregateFunctionQuantile); - factory.registerFunction(NameQuantilesExact::name, createAggregateFunctionQuantile); + factory.registerFunction(NameQuantilesExact::name, { createAggregateFunctionQuantile, properties }); factory.registerFunction(NameQuantileExactLow::name, createAggregateFunctionQuantile); - factory.registerFunction(NameQuantilesExactLow::name, createAggregateFunctionQuantile); + factory.registerFunction(NameQuantilesExactLow::name, { createAggregateFunctionQuantile, properties }); factory.registerFunction(NameQuantileExactHigh::name, createAggregateFunctionQuantile); - factory.registerFunction(NameQuantilesExactHigh::name, createAggregateFunctionQuantile); + factory.registerFunction(NameQuantilesExactHigh::name, { createAggregateFunctionQuantile, properties }); factory.registerFunction(NameQuantileExactExclusive::name, createAggregateFunctionQuantile); - factory.registerFunction(NameQuantilesExactExclusive::name, createAggregateFunctionQuantile); + factory.registerFunction(NameQuantilesExactExclusive::name, { createAggregateFunctionQuantile, properties }); factory.registerFunction(NameQuantileExactInclusive::name, createAggregateFunctionQuantile); - factory.registerFunction(NameQuantilesExactInclusive::name, createAggregateFunctionQuantile); + factory.registerFunction(NameQuantilesExactInclusive::name, { createAggregateFunctionQuantile, properties }); factory.registerFunction(NameQuantileExactWeighted::name, createAggregateFunctionQuantile); - factory.registerFunction(NameQuantilesExactWeighted::name, createAggregateFunctionQuantile); + factory.registerFunction(NameQuantilesExactWeighted::name, { createAggregateFunctionQuantile, properties }); factory.registerFunction(NameQuantileTiming::name, createAggregateFunctionQuantile); - factory.registerFunction(NameQuantilesTiming::name, createAggregateFunctionQuantile); + factory.registerFunction(NameQuantilesTiming::name, { createAggregateFunctionQuantile, properties }); factory.registerFunction(NameQuantileTimingWeighted::name, createAggregateFunctionQuantile); - factory.registerFunction(NameQuantilesTimingWeighted::name, createAggregateFunctionQuantile); + factory.registerFunction(NameQuantilesTimingWeighted::name, { createAggregateFunctionQuantile, properties }); factory.registerFunction(NameQuantileTDigest::name, createAggregateFunctionQuantile); - factory.registerFunction(NameQuantilesTDigest::name, createAggregateFunctionQuantile); + factory.registerFunction(NameQuantilesTDigest::name, { createAggregateFunctionQuantile, properties }); factory.registerFunction(NameQuantileTDigestWeighted::name, createAggregateFunctionQuantile); - factory.registerFunction(NameQuantilesTDigestWeighted::name, createAggregateFunctionQuantile); + factory.registerFunction(NameQuantilesTDigestWeighted::name, { createAggregateFunctionQuantile, properties }); factory.registerFunction(NameQuantileBFloat16::name, createAggregateFunctionQuantile); - factory.registerFunction(NameQuantilesBFloat16::name, createAggregateFunctionQuantile); + factory.registerFunction(NameQuantilesBFloat16::name, { createAggregateFunctionQuantile, properties }); /// 'median' is an alias for 'quantile' factory.registerAlias("median", NameQuantile::name); diff --git a/src/AggregateFunctions/parseAggregateFunctionParameters.cpp b/src/AggregateFunctions/parseAggregateFunctionParameters.cpp index 3826d993c4a..64eb0932de9 100644 --- a/src/AggregateFunctions/parseAggregateFunctionParameters.cpp +++ b/src/AggregateFunctions/parseAggregateFunctionParameters.cpp @@ -4,6 +4,8 @@ #include #include +#include + namespace DB { @@ -15,7 +17,7 @@ namespace ErrorCodes extern const int PARAMETERS_TO_AGGREGATE_FUNCTIONS_MUST_BE_LITERALS; } -Array getAggregateFunctionParametersArray(const ASTPtr & expression_list, const std::string & error_context) +Array getAggregateFunctionParametersArray(const ASTPtr & expression_list, const std::string & error_context, ContextPtr context) { const ASTs & parameters = expression_list->children; if (parameters.empty()) @@ -25,25 +27,25 @@ Array getAggregateFunctionParametersArray(const ASTPtr & expression_list, const for (size_t i = 0; i < parameters.size(); ++i) { - const auto * literal = parameters[i]->as(); - - ASTPtr func_literal; - if (!literal) - if (const auto * func = parameters[i]->as()) - if ((func_literal = func->toLiteral())) - literal = func_literal->as(); - - if (!literal) + ASTPtr literal; + try { - throw Exception( - ErrorCodes::PARAMETERS_TO_AGGREGATE_FUNCTIONS_MUST_BE_LITERALS, - "Parameters to aggregate functions must be literals. " - "Got parameter '{}'{}", - parameters[i]->formatForErrorMessage(), - (error_context.empty() ? "" : " (in " + error_context +")")); + literal = evaluateConstantExpressionAsLiteral(parameters[i], context); + } + catch (Exception & e) + { + if (e.code() == ErrorCodes::BAD_ARGUMENTS) + throw Exception( + ErrorCodes::PARAMETERS_TO_AGGREGATE_FUNCTIONS_MUST_BE_LITERALS, + "Parameters to aggregate functions must be literals. " + "Got parameter '{}'{}", + parameters[i]->formatForErrorMessage(), + (error_context.empty() ? "" : " (in " + error_context +")")); + + throw; } - params_row[i] = literal->value; + params_row[i] = literal->as()->value; } return params_row; @@ -54,7 +56,8 @@ void getAggregateFunctionNameAndParametersArray( const std::string & aggregate_function_name_with_params, std::string & aggregate_function_name, Array & aggregate_function_parameters, - const std::string & error_context) + const std::string & error_context, + ContextPtr context) { if (aggregate_function_name_with_params.back() != ')') { @@ -84,7 +87,7 @@ void getAggregateFunctionNameAndParametersArray( throw Exception("Incorrect list of parameters to aggregate function " + aggregate_function_name, ErrorCodes::BAD_ARGUMENTS); - aggregate_function_parameters = getAggregateFunctionParametersArray(args_ast); + aggregate_function_parameters = getAggregateFunctionParametersArray(args_ast, error_context, context); } } diff --git a/src/AggregateFunctions/parseAggregateFunctionParameters.h b/src/AggregateFunctions/parseAggregateFunctionParameters.h index 37f1f1d5097..033e92714dd 100644 --- a/src/AggregateFunctions/parseAggregateFunctionParameters.h +++ b/src/AggregateFunctions/parseAggregateFunctionParameters.h @@ -1,19 +1,23 @@ #pragma once + #include #include +#include namespace DB { -struct Settings; - -Array getAggregateFunctionParametersArray(const ASTPtr & expression_list, const std::string & error_context = ""); +Array getAggregateFunctionParametersArray( + const ASTPtr & expression_list, + const std::string & error_context, + ContextPtr context); void getAggregateFunctionNameAndParametersArray( const std::string & aggregate_function_name_with_params, std::string & aggregate_function_name, Array & aggregate_function_parameters, - const std::string & error_context); + const std::string & error_context, + ContextPtr context); } diff --git a/src/Common/StringUtils/StringUtils.h b/src/Common/StringUtils/StringUtils.h index 20c0a5ca380..f6ad61f8fd9 100644 --- a/src/Common/StringUtils/StringUtils.h +++ b/src/Common/StringUtils/StringUtils.h @@ -149,7 +149,11 @@ inline bool isPunctuationASCII(char c) inline bool isValidIdentifier(const std::string_view & str) { - return !str.empty() && isValidIdentifierBegin(str[0]) && std::all_of(str.begin() + 1, str.end(), isWordCharASCII); + return !str.empty() + && isValidIdentifierBegin(str[0]) + && std::all_of(str.begin() + 1, str.end(), isWordCharASCII) + /// NULL is not a valid identifier in SQL, any case. + && !(str.size() == strlen("null") && 0 == strncasecmp(str.data(), "null", strlen("null"))); } /// Works assuming isAlphaASCII. diff --git a/src/DataTypes/NestedUtils.cpp b/src/DataTypes/NestedUtils.cpp index 6c13eea0a1b..ed9ea3e1b5c 100644 --- a/src/DataTypes/NestedUtils.cpp +++ b/src/DataTypes/NestedUtils.cpp @@ -34,41 +34,15 @@ std::string concatenateName(const std::string & nested_table_name, const std::st } -/** Name can be treated as compound if and only if both parts are simple identifiers. +/** Name can be treated as compound if it contains dot (.) in the middle. */ std::pair splitName(const std::string & name) { - const char * begin = name.data(); - const char * pos = begin; - const char * end = begin + name.size(); - - if (pos >= end || !isValidIdentifierBegin(*pos)) + auto idx = name.find_first_of('.'); + if (idx == std::string::npos || idx == 0 || idx + 1 == name.size()) return {name, {}}; - ++pos; - - while (pos < end && isWordCharASCII(*pos)) - ++pos; - - if (pos >= end || *pos != '.') - return {name, {}}; - - const char * first_end = pos; - ++pos; - const char * second_begin = pos; - - if (pos >= end || !isValidIdentifierBegin(*pos)) - return {name, {}}; - - ++pos; - - while (pos < end && isWordCharASCII(*pos)) - ++pos; - - if (pos != end) - return {name, {}}; - - return {{ begin, first_end }, { second_begin, end }}; + return {name.substr(0, idx), name.substr(idx + 1)}; } diff --git a/src/Functions/array/arrayReduce.cpp b/src/Functions/array/arrayReduce.cpp index 9a4b5aafdb9..e070596e5ee 100644 --- a/src/Functions/array/arrayReduce.cpp +++ b/src/Functions/array/arrayReduce.cpp @@ -33,11 +33,12 @@ namespace ErrorCodes * arrayReduce('agg', arr1, ...) - apply the aggregate function `agg` to arrays `arr1...` * If multiple arrays passed, then elements on corresponding positions are passed as multiple arguments to the aggregate function. */ -class FunctionArrayReduce : public IFunction +class FunctionArrayReduce : public IFunction, private WithContext { public: static constexpr auto name = "arrayReduce"; - static FunctionPtr create(ContextPtr) { return std::make_shared(); } + static FunctionPtr create(ContextPtr context_) { return std::make_shared(context_); } + explicit FunctionArrayReduce(ContextPtr context_) : WithContext(context_) {} String getName() const override { return name; } @@ -95,7 +96,7 @@ DataTypePtr FunctionArrayReduce::getReturnTypeImpl(const ColumnsWithTypeAndName String aggregate_function_name; Array params_row; getAggregateFunctionNameAndParametersArray(aggregate_function_name_with_params, - aggregate_function_name, params_row, "function " + getName()); + aggregate_function_name, params_row, "function " + getName(), getContext()); AggregateFunctionProperties properties; aggregate_function = AggregateFunctionFactory::instance().get(aggregate_function_name, argument_types, params_row, properties); diff --git a/src/Functions/array/arrayReduceInRanges.cpp b/src/Functions/array/arrayReduceInRanges.cpp index 9a2e8e1ca95..18140fe504d 100644 --- a/src/Functions/array/arrayReduceInRanges.cpp +++ b/src/Functions/array/arrayReduceInRanges.cpp @@ -35,12 +35,13 @@ namespace ErrorCodes * * arrayReduceInRanges('agg', indices, lengths, arr1, ...) */ -class FunctionArrayReduceInRanges : public IFunction +class FunctionArrayReduceInRanges : public IFunction, private WithContext { public: static const size_t minimum_step = 64; static constexpr auto name = "arrayReduceInRanges"; - static FunctionPtr create(ContextPtr) { return std::make_shared(); } + static FunctionPtr create(ContextPtr context_) { return std::make_shared(context_); } + explicit FunctionArrayReduceInRanges(ContextPtr context_) : WithContext(context_) {} String getName() const override { return name; } @@ -113,7 +114,7 @@ DataTypePtr FunctionArrayReduceInRanges::getReturnTypeImpl(const ColumnsWithType String aggregate_function_name; Array params_row; getAggregateFunctionNameAndParametersArray(aggregate_function_name_with_params, - aggregate_function_name, params_row, "function " + getName()); + aggregate_function_name, params_row, "function " + getName(), getContext()); AggregateFunctionProperties properties; aggregate_function = AggregateFunctionFactory::instance().get(aggregate_function_name, argument_types, params_row, properties); diff --git a/src/Functions/array/mapOp.cpp b/src/Functions/array/mapOp.cpp index 1a19ee41d2f..72d6707c4b2 100644 --- a/src/Functions/array/mapOp.cpp +++ b/src/Functions/array/mapOp.cpp @@ -1,20 +1,25 @@ #include -#include +#include +#include #include +#include +#include +#include #include #include #include #include #include #include -#include +#include "Columns/ColumnMap.h" +#include "DataTypes/DataTypeMap.h" namespace DB { namespace ErrorCodes { - extern const int ILLEGAL_COLUMN; + extern const int LOGICAL_ERROR; extern const int ILLEGAL_TYPE_OF_ARGUMENT; extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; } @@ -24,8 +29,8 @@ namespace struct TupArg { - const IColumn & key_column; - const IColumn & val_column; + const ColumnPtr & key_column; + const ColumnPtr & val_column; const IColumn::Offsets & key_offsets; const IColumn::Offsets & val_offsets; bool is_const; @@ -52,27 +57,49 @@ private: bool isVariadic() const override { return true; } bool useDefaultImplementationForConstants() const override { return true; } - DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override + void checkTypes( + DataTypePtr & key_type, DataTypePtr & promoted_val_type, const DataTypePtr & check_key_type, DataTypePtr & check_val_type) const + { + if (!(check_key_type->equals(*key_type))) + throw Exception( + "Expected same " + key_type->getName() + " type for all keys in " + getName(), ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); + + WhichDataType which_val(promoted_val_type); + WhichDataType which_ch_val(check_val_type); + + if (which_ch_val.isFloat() != which_val.isFloat()) + throw Exception( + "All value types in " + getName() + " should be either or float or integer", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); + + if (!(check_val_type->equals(*promoted_val_type))) + { + throw Exception( + "All value types in " + getName() + " should be promotable to " + promoted_val_type->getName() + ", got " + + check_val_type->getName(), + ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); + } + } + + DataTypePtr getReturnTypeForTuples(const DataTypes & arguments) const { - bool is_float = false; DataTypePtr key_type, val_type, res; - if (arguments.size() < 2) - throw Exception{getName() + " accepts at least two map tuples", ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH}; - - for (const auto & tup_arg : arguments) + for (const auto & arg : arguments) { - const DataTypeTuple * tup = checkAndGetDataType(tup_arg.get()); + const DataTypeArray * k; + const DataTypeArray * v; + + const DataTypeTuple * tup = checkAndGetDataType(arg.get()); if (!tup) - throw Exception{getName() + " accepts at least two map tuples", ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH}; + throw Exception(getName() + " accepts at least two map tuples", ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); auto elems = tup->getElements(); if (elems.size() != 2) throw Exception( "Each tuple in " + getName() + " arguments should consist of two arrays", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); - const DataTypeArray * k = checkAndGetDataType(elems[0].get()); - const DataTypeArray * v = checkAndGetDataType(elems[1].get()); + k = checkAndGetDataType(elems[0].get()); + v = checkAndGetDataType(elems[1].get()); if (!k || !v) throw Exception( @@ -80,62 +107,100 @@ private: auto result_type = v->getNestedType(); if (!result_type->canBePromoted()) - throw Exception{"Values to be summed are expected to be Numeric, Float or Decimal.", - ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT}; + throw Exception( + "Values to be summed are expected to be Numeric, Float or Decimal.", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); - WhichDataType which_val(result_type); - - auto promoted_type = result_type->promoteNumericType(); + auto promoted_val_type = result_type->promoteNumericType(); if (!key_type) { key_type = k->getNestedType(); - val_type = promoted_type; - is_float = which_val.isFloat(); + val_type = promoted_val_type; + res = std::make_shared( + DataTypes{std::make_shared(k->getNestedType()), std::make_shared(promoted_val_type)}); } else - { - if (!(k->getNestedType()->equals(*key_type))) - throw Exception( - "All key types in " + getName() + " should be same: " + key_type->getName(), - ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); - - if (is_float != which_val.isFloat()) - throw Exception( - "All value types in " + getName() + " should be or float or integer", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); - - if (!(promoted_type->equals(*val_type))) - { - throw Exception( - "All value types in " + getName() + " should be promotable to " + val_type->getName() + ", got " - + promoted_type->getName(), - ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); - } - } - - if (!res) - { - res = std::make_shared( - DataTypes{std::make_shared(k->getNestedType()), std::make_shared(promoted_type)}); - } + checkTypes(key_type, val_type, k->getNestedType(), promoted_val_type); } return res; } - template - ColumnPtr execute2(size_t row_count, TupleMaps & args, const DataTypeTuple & res_type) const + DataTypePtr getReturnTypeForMaps(const DataTypes & arguments) const { - MutableColumnPtr res_tuple = res_type.createColumn(); + DataTypePtr key_type, val_type, res; - auto * to_tuple = assert_cast(res_tuple.get()); - auto & to_keys_arr = assert_cast(to_tuple->getColumn(0)); - auto & to_keys_data = to_keys_arr.getData(); - auto & to_keys_offset = to_keys_arr.getOffsets(); + for (const auto & arg : arguments) + { + const auto * map = checkAndGetDataType(arg.get()); + if (!map) + throw Exception(getName() + " accepts at least two maps", ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); - auto & to_vals_arr = assert_cast(to_tuple->getColumn(1)); - auto & to_vals_data = to_vals_arr.getData(); + const auto & v = map->getValueType(); + + if (!v->canBePromoted()) + throw Exception( + "Values to be summed are expected to be Numeric, Float or Decimal.", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); + + auto promoted_val_type = v->promoteNumericType(); + if (!key_type) + { + key_type = map->getKeyType(); + val_type = promoted_val_type; + res = std::make_shared(DataTypes({key_type, promoted_val_type})); + } + else + checkTypes(key_type, val_type, map->getKeyType(), promoted_val_type); + } + + return res; + } + + DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override + { + if (arguments.size() < 2) + throw Exception(getName() + " accepts at least two maps or map tuples", ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); + + if (arguments[0]->getTypeId() == TypeIndex::Tuple) + return getReturnTypeForTuples(arguments); + else if (arguments[0]->getTypeId() == TypeIndex::Map) + return getReturnTypeForMaps(arguments); + else + throw Exception(getName() + " only accepts maps", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); + } + + template + ColumnPtr execute2(size_t row_count, TupleMaps & args, const DataTypePtr res_type) const + { + MutableColumnPtr res_column = res_type->createColumn(); + IColumn *to_keys_data, *to_vals_data; + ColumnArray::Offsets * to_keys_offset; + ColumnArray::Offsets * to_vals_offset = nullptr; + + // prepare output destinations + if (res_type->getTypeId() == TypeIndex::Tuple) + { + auto * to_tuple = assert_cast(res_column.get()); + auto & to_keys_arr = assert_cast(to_tuple->getColumn(0)); + to_keys_data = &to_keys_arr.getData(); + to_keys_offset = &to_keys_arr.getOffsets(); + + auto & to_vals_arr = assert_cast(to_tuple->getColumn(1)); + to_vals_data = &to_vals_arr.getData(); + to_vals_offset = &to_vals_arr.getOffsets(); + } + else + { + assert(res_type->getTypeId() == TypeIndex::Map); + + auto * to_map = assert_cast(res_column.get()); + auto & to_wrapper_arr = to_map->getNestedColumn(); + to_keys_offset = &to_wrapper_arr.getOffsets(); + + auto & to_map_tuple = to_map->getNestedData(); + to_keys_data = &to_map_tuple.getColumn(0); + to_vals_data = &to_map_tuple.getColumn(1); + } - size_t res_offset = 0; std::map summing_map; for (size_t i = 0; i < row_count; i++) @@ -147,7 +212,7 @@ private: if (!arg.is_const) { - offset = i > 0 ? arg.key_offsets[i - 1] : 0; + offset = arg.key_offsets[i - 1]; len = arg.key_offsets[i] - offset; if (arg.val_offsets[i] != arg.key_offsets[i]) @@ -155,20 +220,30 @@ private: "Key and value array should have same amount of elements", ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); } + Field temp_val; for (size_t j = 0; j < len; j++) { KeyType key; - if constexpr (is_str_key) + if constexpr (std::is_same::value) { - // have to use Field to get strings - key = arg.key_column[offset + j].get(); + if (const auto * col_fixed = checkAndGetColumn(arg.key_column.get())) + key = col_fixed->getDataAt(offset + j).toString(); + else if (const auto * col_str = checkAndGetColumn(arg.key_column.get())) + key = col_str->getDataAt(offset + j).toString(); + else + // should not happen + throw Exception( + "Expected String or FixedString, got " + std::string(getTypeName(arg.key_column->getDataType())) + + " in " + getName(), + ErrorCodes::LOGICAL_ERROR); } else { - key = assert_cast &>(arg.key_column).getData()[offset + j]; + key = assert_cast *>(arg.key_column.get())->getData()[offset + j]; } - ValType value = arg.val_column[offset + j].get(); + arg.val_column->get(offset + j, temp_val); + ValType value = temp_val.get(); if constexpr (op_type == OpTypes::ADD) { @@ -190,132 +265,184 @@ private: for (const auto & elem : summing_map) { - res_offset++; - to_keys_data.insert(elem.first); - to_vals_data.insert(elem.second); + to_keys_data->insert(elem.first); + to_vals_data->insert(elem.second); } - to_keys_offset.push_back(res_offset); + to_keys_offset->push_back(to_keys_data->size()); summing_map.clear(); } - // same offsets as in keys - to_vals_arr.getOffsets().insert(to_keys_offset.begin(), to_keys_offset.end()); + if (to_vals_offset) + { + // same offsets as in keys + to_vals_offset->insert(to_keys_offset->begin(), to_keys_offset->end()); + } - return res_tuple; + return res_column; } - template - ColumnPtr execute1(size_t row_count, const DataTypeTuple & res_type, TupleMaps & args) const + template + ColumnPtr execute1(size_t row_count, const DataTypePtr res_type, const DataTypePtr res_value_type, TupleMaps & args) const { - const auto & promoted_type = (assert_cast(res_type.getElements()[1].get()))->getNestedType(); -#define MATCH_EXECUTE(is_str) \ - switch (promoted_type->getTypeId()) \ - { \ - case TypeIndex::Int64: return execute2(row_count, args, res_type); \ - case TypeIndex::UInt64: return execute2(row_count, args, res_type); \ - case TypeIndex::Float64: return execute2(row_count, args, res_type); \ - default: \ - throw Exception{"Illegal columns in arguments of function " + getName(), ErrorCodes::ILLEGAL_COLUMN}; \ - } - - if constexpr (is_str_key) + switch (res_value_type->getTypeId()) { - MATCH_EXECUTE(true) + case TypeIndex::Int64: + return execute2(row_count, args, res_type); + case TypeIndex::Int128: + return execute2(row_count, args, res_type); + case TypeIndex::Int256: + return execute2(row_count, args, res_type); + case TypeIndex::UInt64: + return execute2(row_count, args, res_type); + case TypeIndex::UInt128: + return execute2(row_count, args, res_type); + case TypeIndex::UInt256: + return execute2(row_count, args, res_type); + case TypeIndex::Float64: + return execute2(row_count, args, res_type); + default: + throw Exception( + "Illegal column type " + res_value_type->getName() + " for values in arguments of function " + getName(), + ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); } - else - { - MATCH_EXECUTE(false) - } -#undef MATCH_EXECUTE } ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t) const override { + DataTypePtr key_type; + size_t row_count; const DataTypeTuple * tup_type = checkAndGetDataType((arguments[0]).type.get()); - const DataTypeArray * key_array_type = checkAndGetDataType(tup_type->getElements()[0].get()); - const DataTypeArray * val_array_type = checkAndGetDataType(tup_type->getElements()[1].get()); - - /* determine output type */ - const DataTypeTuple & res_type - = DataTypeTuple(DataTypes{std::make_shared(key_array_type->getNestedType()), - std::make_shared(val_array_type->getNestedType()->promoteNumericType())}); - + DataTypePtr res_type; + DataTypePtr res_value_type; TupleMaps args{}; args.reserve(arguments.size()); //prepare columns, extract data columns for direct access and put them to the vector - for (const auto & col : arguments) + if (tup_type) { - const ColumnTuple * tup; - bool is_const = isColumnConst(*col.column); - if (is_const) + const DataTypeArray * key_array_type = checkAndGetDataType(tup_type->getElements()[0].get()); + const DataTypeArray * val_array_type = checkAndGetDataType(tup_type->getElements()[1].get()); + + /* determine output type */ + res_value_type = val_array_type->getNestedType()->promoteNumericType(); + res_type = std::make_shared(DataTypes{ + std::make_shared(key_array_type->getNestedType()), std::make_shared(res_value_type)}); + + for (const auto & col : arguments) { - const auto * c = assert_cast(col.column.get()); - tup = assert_cast(c->getDataColumnPtr().get()); + const ColumnTuple * tup; + bool is_const = isColumnConst(*col.column); + if (is_const) + { + const auto * c = assert_cast(col.column.get()); + tup = assert_cast(c->getDataColumnPtr().get()); + } + else + tup = assert_cast(col.column.get()); + + const auto & arr1 = assert_cast(tup->getColumn(0)); + const auto & arr2 = assert_cast(tup->getColumn(1)); + + const auto & key_offsets = arr1.getOffsets(); + const auto & key_column = arr1.getDataPtr(); + + const auto & val_offsets = arr2.getOffsets(); + const auto & val_column = arr2.getDataPtr(); + + args.push_back({key_column, val_column, key_offsets, val_offsets, is_const}); + } + + key_type = key_array_type->getNestedType(); + } + else + { + const DataTypeMap * map_type = checkAndGetDataType((arguments[0]).type.get()); + if (map_type) + { + key_type = map_type->getKeyType(); + res_value_type = map_type->getValueType()->promoteNumericType(); + res_type = std::make_shared(DataTypes{map_type->getKeyType(), res_value_type}); + + for (const auto & col : arguments) + { + const ColumnMap * map; + bool is_const = isColumnConst(*col.column); + if (is_const) + { + const auto * c = assert_cast(col.column.get()); + map = assert_cast(c->getDataColumnPtr().get()); + } + else + map = assert_cast(col.column.get()); + + const auto & map_arr = map->getNestedColumn(); + const auto & key_offsets = map_arr.getOffsets(); + const auto & val_offsets = key_offsets; + + const auto & map_tup = map->getNestedData(); + const auto & key_column = map_tup.getColumnPtr(0); + const auto & val_column = map_tup.getColumnPtr(1); + + args.push_back({key_column, val_column, key_offsets, val_offsets, is_const}); + } } else - tup = assert_cast(col.column.get()); + throw Exception( + "Illegal column type " + arguments[0].type->getName() + " in arguments of function " + getName(), + ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); + } - const auto & arr1 = assert_cast(tup->getColumn(0)); - const auto & arr2 = assert_cast(tup->getColumn(1)); - - const auto & key_offsets = arr1.getOffsets(); - const auto & key_column = arr1.getData(); - - const auto & val_offsets = arr2.getOffsets(); - const auto & val_column = arr2.getData(); - - // we can check const columns before any processing - if (is_const) + // we can check const columns before any processing + for (auto & arg : args) + { + if (arg.is_const) { - if (val_offsets[0] != key_offsets[0]) + if (arg.val_offsets[0] != arg.key_offsets[0]) throw Exception( "Key and value array should have same amount of elements", ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); } - - args.push_back({key_column, val_column, key_offsets, val_offsets, is_const}); } - size_t row_count = arguments[0].column->size(); - auto key_type_id = key_array_type->getNestedType()->getTypeId(); - - switch (key_type_id) + row_count = arguments[0].column->size(); + switch (key_type->getTypeId()) { case TypeIndex::Enum8: case TypeIndex::Int8: - return execute1(row_count, res_type, args); + return execute1(row_count, res_type, res_value_type, args); case TypeIndex::Enum16: case TypeIndex::Int16: - return execute1(row_count, res_type, args); + return execute1(row_count, res_type, res_value_type, args); case TypeIndex::Int32: - return execute1(row_count, res_type, args); + return execute1(row_count, res_type, res_value_type, args); case TypeIndex::Int64: - return execute1(row_count, res_type, args); + return execute1(row_count, res_type, res_value_type, args); case TypeIndex::Int128: - return execute1(row_count, res_type, args); + return execute1(row_count, res_type, res_value_type, args); case TypeIndex::Int256: - return execute1(row_count, res_type, args); + return execute1(row_count, res_type, res_value_type, args); case TypeIndex::UInt8: - return execute1(row_count, res_type, args); + return execute1(row_count, res_type, res_value_type, args); case TypeIndex::Date: case TypeIndex::UInt16: - return execute1(row_count, res_type, args); + return execute1(row_count, res_type, res_value_type, args); case TypeIndex::DateTime: case TypeIndex::UInt32: - return execute1(row_count, res_type, args); + return execute1(row_count, res_type, res_value_type, args); case TypeIndex::UInt64: - return execute1(row_count, res_type, args); + return execute1(row_count, res_type, res_value_type, args); case TypeIndex::UInt128: - return execute1(row_count, res_type, args); + return execute1(row_count, res_type, res_value_type, args); case TypeIndex::UInt256: - return execute1(row_count, res_type, args); + return execute1(row_count, res_type, res_value_type, args); case TypeIndex::UUID: - return execute1(row_count, res_type, args); + return execute1(row_count, res_type, res_value_type, args); case TypeIndex::FixedString: case TypeIndex::String: - return execute1(row_count, res_type, args); + return execute1(row_count, res_type, res_value_type, args); default: - throw Exception{"Illegal columns in arguments of function " + getName(), ErrorCodes::ILLEGAL_COLUMN}; + throw Exception( + "Illegal column type " + key_type->getName() + " for keys in arguments of function " + getName(), + ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); } } }; diff --git a/src/Functions/initializeAggregation.cpp b/src/Functions/initializeAggregation.cpp index b9b3d219551..b097d81e385 100644 --- a/src/Functions/initializeAggregation.cpp +++ b/src/Functions/initializeAggregation.cpp @@ -25,11 +25,12 @@ namespace ErrorCodes namespace { -class FunctionInitializeAggregation : public IFunction +class FunctionInitializeAggregation : public IFunction, private WithContext { public: static constexpr auto name = "initializeAggregation"; - static FunctionPtr create(ContextPtr) { return std::make_shared(); } + static FunctionPtr create(ContextPtr context_) { return std::make_shared(context_); } + explicit FunctionInitializeAggregation(ContextPtr context_) : WithContext(context_) {} String getName() const override { return name; } @@ -78,7 +79,7 @@ DataTypePtr FunctionInitializeAggregation::getReturnTypeImpl(const ColumnsWithTy String aggregate_function_name; Array params_row; getAggregateFunctionNameAndParametersArray(aggregate_function_name_with_params, - aggregate_function_name, params_row, "function " + getName()); + aggregate_function_name, params_row, "function " + getName(), getContext()); AggregateFunctionProperties properties; aggregate_function = AggregateFunctionFactory::instance().get(aggregate_function_name, argument_types, params_row, properties); diff --git a/src/IO/ReadBufferFromPocoSocket.cpp b/src/IO/ReadBufferFromPocoSocket.cpp index d1ceaaf6a35..e043764d280 100644 --- a/src/IO/ReadBufferFromPocoSocket.cpp +++ b/src/IO/ReadBufferFromPocoSocket.cpp @@ -76,7 +76,13 @@ ReadBufferFromPocoSocket::ReadBufferFromPocoSocket(Poco::Net::Socket & socket_, bool ReadBufferFromPocoSocket::poll(size_t timeout_microseconds) const { - return available() || socket.poll(timeout_microseconds, Poco::Net::Socket::SELECT_READ | Poco::Net::Socket::SELECT_ERROR); + if (available()) + return true; + + Stopwatch watch; + bool res = socket.poll(timeout_microseconds, Poco::Net::Socket::SELECT_READ | Poco::Net::Socket::SELECT_ERROR); + ProfileEvents::increment(ProfileEvents::NetworkReceiveElapsedMicroseconds, watch.elapsedMicroseconds()); + return res; } } diff --git a/src/IO/ReadHelpers.cpp b/src/IO/ReadHelpers.cpp index 8e9a14a20fb..2a5594a6866 100644 --- a/src/IO/ReadHelpers.cpp +++ b/src/IO/ReadHelpers.cpp @@ -765,7 +765,7 @@ ReturnType readDateTextFallback(LocalDate & date, ReadBuffer & buf) auto ignore_delimiter = [&] { - if (!buf.eof()) + if (!buf.eof() && !isNumericASCII(*buf.position())) { ++buf.position(); return true; diff --git a/src/IO/ReadHelpers.h b/src/IO/ReadHelpers.h index d4e2db0b553..4e101aaaf63 100644 --- a/src/IO/ReadHelpers.h +++ b/src/IO/ReadHelpers.h @@ -572,27 +572,43 @@ inline ReturnType readDateTextImpl(LocalDate & date, ReadBuffer & buf) /// Optimistic path, when whole value is in buffer. if (!buf.eof() && buf.position() + 10 <= buf.buffer().end()) { - UInt16 year = (buf.position()[0] - '0') * 1000 + (buf.position()[1] - '0') * 100 + (buf.position()[2] - '0') * 10 + (buf.position()[3] - '0'); - buf.position() += 5; + char * pos = buf.position(); - UInt8 month = buf.position()[0] - '0'; - if (isNumericASCII(buf.position()[1])) + /// YYYY-MM-DD + /// YYYY-MM-D + /// YYYY-M-DD + /// YYYY-M-D + + /// The delimiters can be arbitrary characters, like YYYY/MM!DD, but obviously not digits. + + UInt16 year = (pos[0] - '0') * 1000 + (pos[1] - '0') * 100 + (pos[2] - '0') * 10 + (pos[3] - '0'); + pos += 5; + + if (isNumericASCII(pos[-1])) + return ReturnType(false); + + UInt8 month = pos[0] - '0'; + if (isNumericASCII(pos[1])) { - month = month * 10 + buf.position()[1] - '0'; - buf.position() += 3; + month = month * 10 + pos[1] - '0'; + pos += 3; } else - buf.position() += 2; + pos += 2; - UInt8 day = buf.position()[0] - '0'; - if (isNumericASCII(buf.position()[1])) + if (isNumericASCII(pos[-1])) + return ReturnType(false); + + UInt8 day = pos[0] - '0'; + if (isNumericASCII(pos[1])) { - day = day * 10 + buf.position()[1] - '0'; - buf.position() += 2; + day = day * 10 + pos[1] - '0'; + pos += 2; } else - buf.position() += 1; + pos += 1; + buf.position() = pos; date = LocalDate(year, month, day); return ReturnType(true); } diff --git a/src/Interpreters/ExpressionAnalyzer.cpp b/src/Interpreters/ExpressionAnalyzer.cpp index 5b2339975c1..e693d4ba988 100644 --- a/src/Interpreters/ExpressionAnalyzer.cpp +++ b/src/Interpreters/ExpressionAnalyzer.cpp @@ -468,7 +468,7 @@ bool ExpressionAnalyzer::makeAggregateDescriptions(ActionsDAGPtr & actions) } AggregateFunctionProperties properties; - aggregate.parameters = (node->parameters) ? getAggregateFunctionParametersArray(node->parameters) : Array(); + aggregate.parameters = (node->parameters) ? getAggregateFunctionParametersArray(node->parameters, "", getContext()) : Array(); aggregate.function = AggregateFunctionFactory::instance().get(node->name, types, aggregate.parameters, properties); aggregate_descriptions.push_back(aggregate); @@ -651,7 +651,7 @@ void ExpressionAnalyzer::makeWindowDescriptions(ActionsDAGPtr actions) window_function.function_parameters = window_function.function_node->parameters ? getAggregateFunctionParametersArray( - window_function.function_node->parameters) + window_function.function_node->parameters, "", getContext()) : Array(); // Requiring a constant reference to a shared pointer to non-const AST diff --git a/src/Interpreters/JoinedTables.cpp b/src/Interpreters/JoinedTables.cpp index 45466ae5ca1..421fe7fcddd 100644 --- a/src/Interpreters/JoinedTables.cpp +++ b/src/Interpreters/JoinedTables.cpp @@ -48,7 +48,7 @@ void replaceJoinedTable(const ASTSelectQuery & select_query) if (table_expr.database_and_table_name) { const auto & table_id = table_expr.database_and_table_name->as(); - String expr = "(select * from " + table_id.name() + ") as " + table_id.shortName(); + String expr = "(SELECT * FROM " + backQuote(table_id.name()) + ") AS " + backQuote(table_id.shortName()); // FIXME: since the expression "a as b" exposes both "a" and "b" names, which is not equivalent to "(select * from a) as b", // we can't replace aliased tables. diff --git a/src/Interpreters/MarkTableIdentifiersVisitor.cpp b/src/Interpreters/MarkTableIdentifiersVisitor.cpp index 52f180aa199..3010dbf57af 100644 --- a/src/Interpreters/MarkTableIdentifiersVisitor.cpp +++ b/src/Interpreters/MarkTableIdentifiersVisitor.cpp @@ -11,6 +11,26 @@ namespace DB { +namespace +{ + void replaceArgumentWithTableIdentifierIfNotAlias(ASTFunction & func, size_t argument_pos, const Aliases & aliases) + { + if (!func.arguments || (func.arguments->children.size() <= argument_pos)) + return; + auto arg = func.arguments->children[argument_pos]; + auto * identifier = arg->as(); + if (!identifier) + return; + if (aliases.contains(identifier->name())) + return; + auto table_identifier = identifier->createTable(); + if (!table_identifier) + return; + func.arguments->children[argument_pos] = table_identifier; + } +} + + bool MarkTableIdentifiersMatcher::needChildVisit(ASTPtr & node, const ASTPtr & child) { if (child->as()) @@ -23,37 +43,22 @@ bool MarkTableIdentifiersMatcher::needChildVisit(ASTPtr & node, const ASTPtr & c void MarkTableIdentifiersMatcher::visit(ASTPtr & ast, Data & data) { if (auto * node_func = ast->as()) - visit(*node_func, ast, data); + visit(*node_func, data); } -void MarkTableIdentifiersMatcher::visit(const ASTFunction & func, ASTPtr & ptr, Data & data) +void MarkTableIdentifiersMatcher::visit(ASTFunction & func, const Data & data) { /// `IN t` can be specified, where t is a table, which is equivalent to `IN (SELECT * FROM t)`. if (checkFunctionIsInOrGlobalInOperator(func)) { - auto ast = func.arguments->children.at(1); - auto opt_name = tryGetIdentifierName(ast); - if (opt_name && !data.aliases.count(*opt_name) && ast->as()) - { - ptr->as()->arguments->children[1] = ast->as()->createTable(); - assert(ptr->as()->arguments->children[1]); - } + replaceArgumentWithTableIdentifierIfNotAlias(func, 1, data.aliases); } // First argument of joinGet can be a table name, perhaps with a database. // First argument of dictGet can be a dictionary name, perhaps with a database. else if (functionIsJoinGet(func.name) || functionIsDictGet(func.name)) { - if (!func.arguments || func.arguments->children.empty()) - return; - - auto ast = func.arguments->children.at(0); - auto opt_name = tryGetIdentifierName(ast); - if (opt_name && !data.aliases.count(*opt_name) && ast->as()) - { - ptr->as()->arguments->children[0] = ast->as()->createTable(); - assert(ptr->as()->arguments->children[0]); - } + replaceArgumentWithTableIdentifierIfNotAlias(func, 0, data.aliases); } } diff --git a/src/Interpreters/MarkTableIdentifiersVisitor.h b/src/Interpreters/MarkTableIdentifiersVisitor.h index 0d80b865e53..d05c067397b 100644 --- a/src/Interpreters/MarkTableIdentifiersVisitor.h +++ b/src/Interpreters/MarkTableIdentifiersVisitor.h @@ -24,7 +24,7 @@ public: static void visit(ASTPtr & ast, Data & data); private: - static void visit(const ASTFunction & func, ASTPtr &, Data &); + static void visit(ASTFunction & func, const Data & data); }; using MarkTableIdentifiersVisitor = MarkTableIdentifiersMatcher::Visitor; diff --git a/src/Interpreters/evaluateConstantExpression.cpp b/src/Interpreters/evaluateConstantExpression.cpp index 90f6ac84afc..d91ea9208e4 100644 --- a/src/Interpreters/evaluateConstantExpression.cpp +++ b/src/Interpreters/evaluateConstantExpression.cpp @@ -49,17 +49,20 @@ std::pair> evaluateConstantExpression(co expr_for_constant_folding->execute(block_with_constants); if (!block_with_constants || block_with_constants.rows() == 0) - throw Exception("Logical error: empty block after evaluation of constant expression for IN, VALUES or LIMIT", ErrorCodes::LOGICAL_ERROR); + throw Exception("Logical error: empty block after evaluation of constant expression for IN, VALUES or LIMIT or aggregate function parameter", + ErrorCodes::LOGICAL_ERROR); if (!block_with_constants.has(name)) - throw Exception("Element of set in IN, VALUES or LIMIT is not a constant expression (result column not found): " + name, ErrorCodes::BAD_ARGUMENTS); + throw Exception(ErrorCodes::BAD_ARGUMENTS, + "Element of set in IN, VALUES or LIMIT or aggregate function parameter is not a constant expression (result column not found): {}", name); const ColumnWithTypeAndName & result = block_with_constants.getByName(name); const IColumn & result_column = *result.column; /// Expressions like rand() or now() are not constant if (!isColumnConst(result_column)) - throw Exception("Element of set in IN, VALUES or LIMIT is not a constant expression (result column is not const): " + name, ErrorCodes::BAD_ARGUMENTS); + throw Exception(ErrorCodes::BAD_ARGUMENTS, + "Element of set in IN, VALUES or LIMIT or aggregate function parameter is not a constant expression (result column is not const): {}", name); return std::make_pair(result_column[0], result.type); } diff --git a/src/Parsers/ASTAlterQuery.cpp b/src/Parsers/ASTAlterQuery.cpp index 918abc39037..7e60d1175e2 100644 --- a/src/Parsers/ASTAlterQuery.cpp +++ b/src/Parsers/ASTAlterQuery.cpp @@ -137,8 +137,9 @@ void ASTAlterCommand::formatImpl( settings.ostr << (settings.hilite ? hilite_keyword : "") << indent_str << "ADD INDEX " << (if_not_exists ? "IF NOT EXISTS " : "") << (settings.hilite ? hilite_none : ""); index_decl->formatImpl(settings, state, frame); - /// AFTER - if (index) + if (first) + settings.ostr << (settings.hilite ? hilite_keyword : "") << indent_str << " FIRST " << (settings.hilite ? hilite_none : ""); + else if (index) /// AFTER { settings.ostr << (settings.hilite ? hilite_keyword : "") << indent_str << " AFTER " << (settings.hilite ? hilite_none : ""); index->formatImpl(settings, state, frame); diff --git a/src/Parsers/ParserAlterQuery.cpp b/src/Parsers/ParserAlterQuery.cpp index d659db64b83..2908b171ca6 100644 --- a/src/Parsers/ParserAlterQuery.cpp +++ b/src/Parsers/ParserAlterQuery.cpp @@ -231,7 +231,9 @@ bool ParserAlterCommand::parseImpl(Pos & pos, ASTPtr & node, Expected & expected if (!parser_idx_decl.parse(pos, command->index_decl, expected)) return false; - if (s_after.ignore(pos, expected)) + if (s_first.ignore(pos, expected)) + command->first = true; + else if (s_after.ignore(pos, expected)) { if (!parser_name.parse(pos, command->index, expected)) return false; diff --git a/src/Storages/AlterCommands.cpp b/src/Storages/AlterCommands.cpp index 1cb936cbb84..3ac457e52d6 100644 --- a/src/Storages/AlterCommands.cpp +++ b/src/Storages/AlterCommands.cpp @@ -211,6 +211,7 @@ std::optional AlterCommand::parse(const ASTAlterCommand * command_ command.after_index_name = command_ast->index->as().name(); command.if_not_exists = command_ast->if_not_exists; + command.first = command_ast->first; return command; } @@ -454,6 +455,10 @@ void AlterCommand::apply(StorageInMemoryMetadata & metadata, ContextPtr context) auto insert_it = metadata.secondary_indices.end(); + /// insert the index in the beginning of the indices list + if (first) + insert_it = metadata.secondary_indices.begin(); + if (!after_index_name.empty()) { insert_it = std::find_if( diff --git a/src/Storages/AlterCommands.h b/src/Storages/AlterCommands.h index 4e9c9764753..d523bb2783e 100644 --- a/src/Storages/AlterCommands.h +++ b/src/Storages/AlterCommands.h @@ -77,7 +77,7 @@ struct AlterCommand /// For ADD or MODIFY - after which column to add a new one. If an empty string, add to the end. String after_column; - /// For ADD_COLUMN, MODIFY_COLUMN - Add to the begin if it is true. + /// For ADD_COLUMN, MODIFY_COLUMN, ADD_INDEX - Add to the begin if it is true. bool first = false; /// For DROP_COLUMN, MODIFY_COLUMN, COMMENT_COLUMN diff --git a/src/Storages/MergeTree/registerStorageMergeTree.cpp b/src/Storages/MergeTree/registerStorageMergeTree.cpp index b3b9ce31ff5..539f7713320 100644 --- a/src/Storages/MergeTree/registerStorageMergeTree.cpp +++ b/src/Storages/MergeTree/registerStorageMergeTree.cpp @@ -116,8 +116,11 @@ static bool compareRetentions(const Graphite::Retention & a, const Graphite::Ret * * */ -static void -appendGraphitePattern(const Poco::Util::AbstractConfiguration & config, const String & config_element, Graphite::Patterns & patterns) +static void appendGraphitePattern( + const Poco::Util::AbstractConfiguration & config, + const String & config_element, + Graphite::Patterns & out_patterns, + ContextPtr context) { Graphite::Pattern pattern; @@ -137,7 +140,7 @@ appendGraphitePattern(const Poco::Util::AbstractConfiguration & config, const St String aggregate_function_name; Array params_row; getAggregateFunctionNameAndParametersArray( - aggregate_function_name_with_params, aggregate_function_name, params_row, "GraphiteMergeTree storage initialization"); + aggregate_function_name_with_params, aggregate_function_name, params_row, "GraphiteMergeTree storage initialization", context); /// TODO Not only Float64 AggregateFunctionProperties properties; @@ -181,7 +184,7 @@ appendGraphitePattern(const Poco::Util::AbstractConfiguration & config, const St if (pattern.type & pattern.TypeRetention) /// TypeRetention or TypeAll std::sort(pattern.retentions.begin(), pattern.retentions.end(), compareRetentions); - patterns.emplace_back(pattern); + out_patterns.emplace_back(pattern); } static void setGraphitePatternsFromConfig(ContextPtr context, const String & config_element, Graphite::Params & params) @@ -204,7 +207,7 @@ static void setGraphitePatternsFromConfig(ContextPtr context, const String & con { if (startsWith(key, "pattern")) { - appendGraphitePattern(config, config_element + "." + key, params.patterns); + appendGraphitePattern(config, config_element + "." + key, params.patterns, context); } else if (key == "default") { @@ -219,7 +222,7 @@ static void setGraphitePatternsFromConfig(ContextPtr context, const String & con } if (config.has(config_element + ".default")) - appendGraphitePattern(config, config_element + "." + ".default", params.patterns); + appendGraphitePattern(config, config_element + "." + ".default", params.patterns, context); } diff --git a/tests/integration/helpers/cluster.py b/tests/integration/helpers/cluster.py index 8863492dd12..2617a7ade40 100644 --- a/tests/integration/helpers/cluster.py +++ b/tests/integration/helpers/cluster.py @@ -1188,15 +1188,18 @@ class ClickHouseCluster: time.sleep(1) - def wait_hdfs_to_start(self, timeout=300): + def wait_hdfs_to_start(self, timeout=300, check_marker=False): start = time.time() while time.time() - start < timeout: try: self.hdfs_api.write_data("/somefilewithrandomname222", "1") logging.debug("Connected to HDFS and SafeMode disabled! ") + if check_marker: + self.hdfs_api.read_data("/preparations_done_marker") + return except Exception as ex: - logging.exception("Can't connect to HDFS " + str(ex)) + logging.exception("Can't connect to HDFS or preparations are not done yet " + str(ex)) time.sleep(1) raise Exception("Can't wait HDFS to start") @@ -1443,7 +1446,7 @@ class ClickHouseCluster: os.chmod(self.hdfs_kerberized_logs_dir, stat.S_IRWXO) run_and_check(self.base_kerberized_hdfs_cmd + common_opts) self.make_hdfs_api(kerberized=True) - self.wait_hdfs_to_start() + self.wait_hdfs_to_start(check_marker=True) if self.with_mongo and self.base_mongo_cmd: logging.debug('Setup Mongo') diff --git a/tests/integration/helpers/network.py b/tests/integration/helpers/network.py index 333e38f76bc..7d9906ae663 100644 --- a/tests/integration/helpers/network.py +++ b/tests/integration/helpers/network.py @@ -127,21 +127,21 @@ class _NetworkManager: return cls._instance def add_iptables_rule(self, **kwargs): - cmd = ['iptables', '-I', 'DOCKER-USER', '1'] + cmd = ['iptables', '--wait', '-I', 'DOCKER-USER', '1'] cmd.extend(self._iptables_cmd_suffix(**kwargs)) - self._exec_run_with_retry(cmd, retry_count=3, privileged=True) + self._exec_run(cmd, privileged=True) def delete_iptables_rule(self, **kwargs): - cmd = ['iptables', '-D', 'DOCKER-USER'] + cmd = ['iptables', '--wait', '-D', 'DOCKER-USER'] cmd.extend(self._iptables_cmd_suffix(**kwargs)) - self._exec_run_with_retry(cmd, retry_count=3, privileged=True) + self._exec_run(cmd, privileged=True) @staticmethod def clean_all_user_iptables_rules(): for i in range(1000): iptables_iter = i # when rules will be empty, it will return error - res = subprocess.run("iptables -D DOCKER-USER 1", shell=True) + res = subprocess.run("iptables --wait -D DOCKER-USER 1", shell=True) if res.returncode != 0: logging.info("All iptables rules cleared, " + str(iptables_iter) + " iterations, last error: " + str(res.stderr)) diff --git a/tests/integration/test_storage_kerberized_hdfs/hdfs_configs/bootstrap.sh b/tests/integration/test_storage_kerberized_hdfs/hdfs_configs/bootstrap.sh index 971491d4053..769056d70b3 100755 --- a/tests/integration/test_storage_kerberized_hdfs/hdfs_configs/bootstrap.sh +++ b/tests/integration/test_storage_kerberized_hdfs/hdfs_configs/bootstrap.sh @@ -242,6 +242,7 @@ sleep 3 /usr/local/hadoop/bin/hdfs dfs -mkdir /user/specuser /usr/local/hadoop/bin/hdfs dfs -chown specuser /user/specuser +echo "chown_completed" | /usr/local/hadoop/bin/hdfs dfs -appendToFile - /preparations_done_marker kdestroy diff --git a/tests/queries/0_stateless/01318_map_add_map_subtract.sql b/tests/queries/0_stateless/01318_map_add_map_subtract.sql index 40c08e0a147..6ead7a2db46 100644 --- a/tests/queries/0_stateless/01318_map_add_map_subtract.sql +++ b/tests/queries/0_stateless/01318_map_add_map_subtract.sql @@ -2,7 +2,7 @@ drop table if exists map_test; create table map_test engine=TinyLog() as (select ([1, number], [toInt32(2),2]) as map from numbers(1, 10)); -- mapAdd -select mapAdd([1], [1]); -- { serverError 42 } +select mapAdd([1], [1]); -- { serverError 43 } select mapAdd(([1], [1])); -- { serverError 42 } select mapAdd(([1], [1]), map) from map_test; -- { serverError 43 } select mapAdd(([toUInt64(1)], [1]), map) from map_test; -- { serverError 43 } @@ -27,7 +27,7 @@ select mapAdd(([toInt64(1), 2], [toInt64(1), 1]), ([toInt64(1), 2], [toInt64(1), select mapAdd(([1, 2], [toFloat32(1.1), 1]), ([1, 2], [2.2, 1])) as res, toTypeName(res); select mapAdd(([1, 2], [toFloat64(1.1), 1]), ([1, 2], [2.2, 1])) as res, toTypeName(res); -select mapAdd(([toFloat32(1), 2], [toFloat64(1.1), 1]), ([toFloat32(1), 2], [2.2, 1])) as res, toTypeName(res); -- { serverError 44 } +select mapAdd(([toFloat32(1), 2], [toFloat64(1.1), 1]), ([toFloat32(1), 2], [2.2, 1])) as res, toTypeName(res); -- { serverError 43 } select mapAdd(([1, 2], [toFloat64(1.1), 1]), ([1, 2], [1, 1])) as res, toTypeName(res); -- { serverError 43 } select mapAdd((['a', 'b'], [1, 1]), ([key], [1])) from values('key String', ('b'), ('c'), ('d')); select mapAdd((cast(['a', 'b'], 'Array(FixedString(1))'), [1, 1]), ([key], [1])) as res, toTypeName(res) from values('key FixedString(1)', ('b'), ('c'), ('d')); diff --git a/tests/queries/0_stateless/01318_map_add_map_subtract_on_map_type.reference b/tests/queries/0_stateless/01318_map_add_map_subtract_on_map_type.reference new file mode 100644 index 00000000000..96bafc2c79c --- /dev/null +++ b/tests/queries/0_stateless/01318_map_add_map_subtract_on_map_type.reference @@ -0,0 +1,55 @@ +{1:5} +{1:3,2:2} +{1:3,3:2} +{1:3,4:2} +{1:3,5:2} +{1:3,6:2} +{1:3,7:2} +{1:3,8:2} +{1:3,9:2} +{1:3,10:2} +{1:5,2:2} +{1:3,2:4} +{1:3,2:2,3:2} +{1:3,2:2,4:2} +{1:3,2:2,5:2} +{1:3,2:2,6:2} +{1:3,2:2,7:2} +{1:3,2:2,8:2} +{1:3,2:2,9:2} +{1:3,2:2,10:2} +{1:2,2:2} Map(UInt8,UInt64) +{1:2,2:2} Map(UInt16,UInt64) +{1:2,2:2} Map(UInt32,UInt64) +{1:2,2:2} Map(UInt64,UInt64) +{1:2,2:2} Map(UInt128,UInt128) +{1:2,2:2} Map(UInt256,UInt256) +{1:2,2:2} Map(Int16,UInt64) +{1:2,2:2} Map(Int16,Int64) +{1:2,2:2} Map(Int32,Int64) +{1:2,2:2} Map(Int64,Int64) +{1:2,2:2} Map(Int128,Int128) +{1:2,2:2} Map(Int256,Int256) +{1:3.300000023841858,2:2} Map(UInt8,Float64) +{1:3.3000000000000003,2:2} Map(UInt8,Float64) +{'a':1,'b':2} +{'a':1,'b':1,'c':1} +{'a':1,'b':1,'d':1} +{'a':1,'b':2} Map(String,UInt64) +{'a':1,'b':1,'c':1} Map(String,UInt64) +{'a':1,'b':1,'d':1} Map(String,UInt64) +{'a':1,'b':2} +{'a':1,'b':1,'c':1} +{'a':1,'b':1,'d':1} +{'a':2} Map(Enum16(\'a\' = 1, \'b\' = 2),Int64) +{'b':2} Map(Enum16(\'a\' = 1, \'b\' = 2),Int64) +{'a':2} Map(Enum8(\'a\' = 1, \'b\' = 2),Int64) +{'b':2} Map(Enum8(\'a\' = 1, \'b\' = 2),Int64) +{'00000000-89ab-cdef-0123-456789abcdef':2} Map(UUID,Int64) +{'11111111-89ab-cdef-0123-456789abcdef':4} Map(UUID,Int64) +{1:0,2:0} Map(UInt8,UInt64) +{1:18446744073709551615,2:18446744073709551615} Map(UInt8,UInt64) +{1:-1,2:-1} Map(UInt8,Int64) +{1:-1.0999999761581423,2:0} Map(UInt8,Float64) +{1:-1,2:-1} Map(UInt8,Int64) +{1:-2,2:-2,3:1} Map(UInt8,Int64) diff --git a/tests/queries/0_stateless/01318_map_add_map_subtract_on_map_type.sql b/tests/queries/0_stateless/01318_map_add_map_subtract_on_map_type.sql new file mode 100644 index 00000000000..9f0f1cb0489 --- /dev/null +++ b/tests/queries/0_stateless/01318_map_add_map_subtract_on_map_type.sql @@ -0,0 +1,46 @@ +drop table if exists mapop_test; +set allow_experimental_map_type = 1; +create table mapop_test engine=TinyLog() as (select map(1, toInt32(2), number, 2) as m from numbers(1, 10)); + +-- mapAdd +select mapAdd(map(1, 1)); -- { serverError 42 } +select mapAdd(map(1, 1), m) from mapop_test; -- { serverError 43 } + +select mapAdd(map(toUInt64(1), toInt32(1)), m) from mapop_test; +select mapAdd(cast(m, 'Map(UInt8, UInt8)'), map(1, 1), map(2,2)) from mapop_test; + +-- cleanup +drop table mapop_test; + +-- check types +select mapAdd(map(toUInt8(1), 1, 2, 1), map(toUInt8(1), 1, 2, 1)) as res, toTypeName(res); +select mapAdd(map(toUInt16(1), toUInt16(1), 2, 1), map(toUInt16(1), toUInt16(1), 2, 1)) as res, toTypeName(res); +select mapAdd(map(toUInt32(1), toUInt32(1), 2, 1), map(toUInt32(1), toUInt32(1), 2, 1)) as res, toTypeName(res); +select mapAdd(map(toUInt64(1), toUInt64(1), 2, 1), map(toUInt64(1), toUInt64(1), 2, 1)) as res, toTypeName(res); +select mapAdd(map(toUInt128(1), toUInt128(1), 2, 1), map(toUInt128(1), toUInt128(1), 2, 1)) as res, toTypeName(res); +select mapAdd(map(toUInt256(1), toUInt256(1), 2, 1), map(toUInt256(1), toUInt256(1), 2, 1)) as res, toTypeName(res); + +select mapAdd(map(toInt8(1), 1, 2, 1), map(toInt8(1), 1, 2, 1)) as res, toTypeName(res); +select mapAdd(map(toInt16(1), toInt16(1), 2, 1), map(toInt16(1), toInt16(1), 2, 1)) as res, toTypeName(res); +select mapAdd(map(toInt32(1), toInt32(1), 2, 1), map(toInt32(1), toInt32(1), 2, 1)) as res, toTypeName(res); +select mapAdd(map(toInt64(1), toInt64(1), 2, 1), map(toInt64(1), toInt64(1), 2, 1)) as res, toTypeName(res); +select mapAdd(map(toInt128(1), toInt128(1), 2, 1), map(toInt128(1), toInt128(1), 2, 1)) as res, toTypeName(res); +select mapAdd(map(toInt256(1), toInt256(1), 2, 1), map(toInt256(1), toInt256(1), 2, 1)) as res, toTypeName(res); + +select mapAdd(map(1, toFloat32(1.1), 2, 1), map(1, 2.2, 2, 1)) as res, toTypeName(res); +select mapAdd(map(1, toFloat64(1.1), 2, 1), map(1, 2.2, 2, 1)) as res, toTypeName(res); +select mapAdd(map(1, toFloat64(1.1), 2, 1), map(1, 1, 2, 1)) as res, toTypeName(res); -- { serverError 43 } +select mapAdd(map('a', 1, 'b', 1), map(key, 1)) from values('key String', ('b'), ('c'), ('d')); +select mapAdd(map(cast('a', 'FixedString(1)'), 1, 'b', 1), map(key, 1)) as res, toTypeName(res) from values('key String', ('b'), ('c'), ('d')); +select mapAdd(map(cast('a', 'LowCardinality(String)'), 1, 'b', 1), map(key, 1)) from values('key String', ('b'), ('c'), ('d')); +select mapAdd(map(key, val), map(key, val)) as res, toTypeName(res) from values ('key Enum16(\'a\'=1, \'b\'=2), val Int16', ('a', 1), ('b', 1)); +select mapAdd(map(key, val), map(key, val)) as res, toTypeName(res) from values ('key Enum8(\'a\'=1, \'b\'=2), val Int16', ('a', 1), ('b', 1)); +select mapAdd(map(key, val), map(key, val)) as res, toTypeName(res) from values ('key UUID, val Int32', ('00000000-89ab-cdef-0123-456789abcdef', 1), ('11111111-89ab-cdef-0123-456789abcdef', 2)); + +-- mapSubtract, same rules as mapAdd +select mapSubtract(map(toUInt8(1), 1, 2, 1), map(toUInt8(1), 1, 2, 1)) as res, toTypeName(res); +select mapSubtract(map(toUInt8(1), 1, 2, 1), map(toUInt8(1), 2, 2, 2)) as res, toTypeName(res); -- overflow +select mapSubtract(map(toUInt8(1), toInt32(1), 2, 1), map(toUInt8(1), toInt16(2), 2, 2)) as res, toTypeName(res); +select mapSubtract(map(1, toFloat32(1.1), 2, 1), map(1, 2.2, 2, 1)) as res, toTypeName(res); +select mapSubtract(map(toUInt8(1), toInt32(1), 2, 1), map(toUInt8(1), toInt16(2), 2, 2)) as res, toTypeName(res); +select mapSubtract(map(toUInt8(3), toInt32(1)), map(toUInt8(1), toInt32(2), 2, 2)) as res, toTypeName(res); diff --git a/tests/queries/0_stateless/01923_network_receive_time_metric_insert.reference b/tests/queries/0_stateless/01923_network_receive_time_metric_insert.reference new file mode 100644 index 00000000000..d00491fd7e5 --- /dev/null +++ b/tests/queries/0_stateless/01923_network_receive_time_metric_insert.reference @@ -0,0 +1 @@ +1 diff --git a/tests/queries/0_stateless/01923_network_receive_time_metric_insert.sh b/tests/queries/0_stateless/01923_network_receive_time_metric_insert.sh new file mode 100755 index 00000000000..bcb3775f86a --- /dev/null +++ b/tests/queries/0_stateless/01923_network_receive_time_metric_insert.sh @@ -0,0 +1,18 @@ +#!/usr/bin/env bash + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + +${CLICKHOUSE_CLIENT} --multiquery --query "DROP TABLE IF EXISTS t; CREATE TABLE t (x UInt64) ENGINE = Memory;" + +# Rate limit is chosen for operation to spent more than one second. +seq 1 1000 | pv --quiet --rate-limit 1000 | ${CLICKHOUSE_CLIENT} --query "INSERT INTO t FORMAT TSV" + +# We check that the value of NetworkReceiveElapsedMicroseconds correctly includes the time spent waiting data from the client. +${CLICKHOUSE_CLIENT} --multiquery --query "SYSTEM FLUSH LOGS; + WITH ProfileEvents['NetworkReceiveElapsedMicroseconds'] AS time + SELECT time >= 1000000 ? 1 : time FROM system.query_log + WHERE current_database = currentDatabase() AND query_kind = 'Insert' AND event_date >= yesterday() AND type = 2 ORDER BY event_time DESC LIMIT 1;" + +${CLICKHOUSE_CLIENT} --query "DROP TABLE t" diff --git a/tests/queries/0_stateless/01932_alter_index_with_order.reference b/tests/queries/0_stateless/01932_alter_index_with_order.reference new file mode 100644 index 00000000000..07e1aab3df9 --- /dev/null +++ b/tests/queries/0_stateless/01932_alter_index_with_order.reference @@ -0,0 +1,9 @@ +default alter_index_test index_a set a 1 +default alter_index_test index_b minmax b 1 +default alter_index_test index_c set c 2 +default alter_index_test index_a set a 1 +default alter_index_test index_d set d 1 +default alter_index_test index_b minmax b 1 +default alter_index_test index_c set c 2 +default alter_index_test index_a set a 1 +default alter_index_test index_d set d 1 diff --git a/tests/queries/0_stateless/01932_alter_index_with_order.sql b/tests/queries/0_stateless/01932_alter_index_with_order.sql new file mode 100644 index 00000000000..0f2953b53f9 --- /dev/null +++ b/tests/queries/0_stateless/01932_alter_index_with_order.sql @@ -0,0 +1,28 @@ +DROP TABLE IF EXISTS alter_index_test; + +CREATE TABLE alter_index_test ( + a UInt32, + b Date, + c UInt32, + d UInt32, + INDEX index_a a TYPE set(0) GRANULARITY 1 +) +ENGINE = MergeTree() +ORDER BY tuple(); + +SELECT * FROM system.data_skipping_indices WHERE table = 'alter_index_test' AND database = currentDatabase(); + +ALTER TABLE alter_index_test ADD INDEX index_b b type minmax granularity 1 FIRST; + +ALTER TABLE alter_index_test ADD INDEX index_c c type set(0) granularity 2 AFTER index_b; + +ALTER TABLE alter_index_test ADD INDEX index_d d type set(0) granularity 1; + +SELECT * FROM system.data_skipping_indices WHERE table = 'alter_index_test' AND database = currentDatabase(); + +DETACH TABLE alter_index_test; +ATTACH TABLE alter_index_test; + +SELECT * FROM system.data_skipping_indices WHERE table = 'alter_index_test' AND database = currentDatabase(); + +DROP TABLE IF EXISTS alter_index_test; diff --git a/tests/queries/0_stateless/01932_null_valid_identifier.reference b/tests/queries/0_stateless/01932_null_valid_identifier.reference new file mode 100644 index 00000000000..8600160f48c --- /dev/null +++ b/tests/queries/0_stateless/01932_null_valid_identifier.reference @@ -0,0 +1,3 @@ +1 +1 +1 \N diff --git a/tests/queries/0_stateless/01932_null_valid_identifier.sql b/tests/queries/0_stateless/01932_null_valid_identifier.sql new file mode 100644 index 00000000000..31f1a771675 --- /dev/null +++ b/tests/queries/0_stateless/01932_null_valid_identifier.sql @@ -0,0 +1,3 @@ +SELECT `null` FROM remote('127.0.0.2', view(SELECT 1 AS `null`)); +SELECT `NULL` FROM remote('127.0.0.2', view(SELECT 1 AS `NULL`)); +SELECT `nULl`, null FROM remote('127.0.0.2', view(SELECT 1 AS `nULl`)); diff --git a/tests/queries/0_stateless/01933_client_replxx_convert_history.expect b/tests/queries/0_stateless/01933_client_replxx_convert_history.expect new file mode 100755 index 00000000000..890d024847f --- /dev/null +++ b/tests/queries/0_stateless/01933_client_replxx_convert_history.expect @@ -0,0 +1,33 @@ +#!/usr/bin/expect -f + +log_user 0 +set timeout 60 +match_max 100000 +# A default timeout action is to do nothing, change it to fail +expect_after { + timeout { + exit 1 + } +} +set basedir [file dirname $argv0] + +exec bash -c "echo select 1 > $argv0.txt" +exec bash -c "echo select 1 >> $argv0.txt" +exec bash -c "echo select 1 >> $argv0.txt" + +spawn bash -c "source $basedir/../shell_config.sh ; \$CLICKHOUSE_CLIENT_BINARY \$CLICKHOUSE_CLIENT_OPT --history_file=$argv0.txt" +expect "The history file ($argv0.txt) is in old format. 3 lines, 1 unique lines." +expect ":) " +send -- "\4" +expect eof + +spawn bash -c "wc -l $argv0.txt" +# The following lines are expected: +# +# ### YYYY-MM-DD HH:MM:SS.SSS +# select 1 +# +expect "2" +expect eof + +exec bash -c "rm $argv0.txt" diff --git a/tests/queries/0_stateless/01933_client_replxx_convert_history.reference b/tests/queries/0_stateless/01933_client_replxx_convert_history.reference new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/queries/0_stateless/01933_invalid_date.reference b/tests/queries/0_stateless/01933_invalid_date.reference new file mode 100644 index 00000000000..829e7e8c420 --- /dev/null +++ b/tests/queries/0_stateless/01933_invalid_date.reference @@ -0,0 +1 @@ +2019-07-08 diff --git a/tests/queries/0_stateless/01933_invalid_date.sql b/tests/queries/0_stateless/01933_invalid_date.sql new file mode 100644 index 00000000000..aac09c99e60 --- /dev/null +++ b/tests/queries/0_stateless/01933_invalid_date.sql @@ -0,0 +1,10 @@ +SELECT toDate('07-08-2019'); -- { serverError 6 } +SELECT toDate('2019-0708'); -- { serverError 38 } +SELECT toDate('201907-08'); -- { serverError 38 } +SELECT toDate('2019^7^8'); + +CREATE TEMPORARY TABLE test (d Date); +INSERT INTO test VALUES ('2018-01-01'); + +SELECT * FROM test WHERE d >= '07-08-2019'; -- { serverError 53 } +SELECT * FROM test WHERE d >= '2019-07-08'; diff --git a/tests/queries/0_stateless/01934_constexpr_aggregate_function_parameters.reference b/tests/queries/0_stateless/01934_constexpr_aggregate_function_parameters.reference new file mode 100644 index 00000000000..61be3e78ae7 --- /dev/null +++ b/tests/queries/0_stateless/01934_constexpr_aggregate_function_parameters.reference @@ -0,0 +1,2 @@ +[0,1,2,3,4] +[0,1,2,3,4] diff --git a/tests/queries/0_stateless/01934_constexpr_aggregate_function_parameters.sql b/tests/queries/0_stateless/01934_constexpr_aggregate_function_parameters.sql new file mode 100644 index 00000000000..3ab969ca256 --- /dev/null +++ b/tests/queries/0_stateless/01934_constexpr_aggregate_function_parameters.sql @@ -0,0 +1,11 @@ +SELECT groupArray(2 + 3)(number) FROM numbers(10); +SELECT groupArray('5'::UInt8)(number) FROM numbers(10); + +SELECT groupArray()(number) FROM numbers(10); -- { serverError 36 } +SELECT groupArray(NULL)(number) FROM numbers(10); -- { serverError 36 } +SELECT groupArray(NULL + NULL)(number) FROM numbers(10); -- { serverError 36 } +SELECT groupArray([])(number) FROM numbers(10); -- { serverError 36 } +SELECT groupArray(throwIf(1))(number) FROM numbers(10); -- { serverError 395 } + +-- Not the best error message, can be improved. +SELECT groupArray(number)(number) FROM numbers(10); -- { serverError 47 } diff --git a/tests/queries/0_stateless/01935_parametrized_query_parametric_aggregate_function.reference b/tests/queries/0_stateless/01935_parametrized_query_parametric_aggregate_function.reference new file mode 100644 index 00000000000..0cfbf08886f --- /dev/null +++ b/tests/queries/0_stateless/01935_parametrized_query_parametric_aggregate_function.reference @@ -0,0 +1 @@ +2 diff --git a/tests/queries/0_stateless/01935_parametrized_query_parametric_aggregate_function.sh b/tests/queries/0_stateless/01935_parametrized_query_parametric_aggregate_function.sh new file mode 100755 index 00000000000..bbc24af1214 --- /dev/null +++ b/tests/queries/0_stateless/01935_parametrized_query_parametric_aggregate_function.sh @@ -0,0 +1,7 @@ +#!/usr/bin/env bash + +CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CUR_DIR"/../shell_config.sh + +${CLICKHOUSE_CURL} -sS -XPOST "${CLICKHOUSE_URL}¶m_lim=2" --data-binary 'select length(topKArray({lim:UInt32})([1,1,2,3,4,5,6,7,7,7]))' diff --git a/tests/queries/0_stateless/01936_quantiles_cannot_return_null.reference b/tests/queries/0_stateless/01936_quantiles_cannot_return_null.reference new file mode 100644 index 00000000000..f9b4a3157f7 --- /dev/null +++ b/tests/queries/0_stateless/01936_quantiles_cannot_return_null.reference @@ -0,0 +1,4 @@ +[nan] +[nan] +[nan] +[nan] diff --git a/tests/queries/0_stateless/01936_quantiles_cannot_return_null.sql b/tests/queries/0_stateless/01936_quantiles_cannot_return_null.sql new file mode 100644 index 00000000000..81ac6224268 --- /dev/null +++ b/tests/queries/0_stateless/01936_quantiles_cannot_return_null.sql @@ -0,0 +1,9 @@ +set aggregate_functions_null_for_empty=0; + +SELECT quantiles(0.95)(x) FROM (SELECT 1 x WHERE 0); +SELECT quantiles(0.95)(number) FROM (SELECT number FROM numbers(10) WHERE number > 10); + +set aggregate_functions_null_for_empty=1; + +SELECT quantiles(0.95)(x) FROM (SELECT 1 x WHERE 0); +SELECT quantiles(0.95)(number) FROM (SELECT number FROM numbers(10) WHERE number > 10); diff --git a/tests/queries/0_stateless/01936_three_parts_identifiers_in_wrong_places.reference b/tests/queries/0_stateless/01936_three_parts_identifiers_in_wrong_places.reference new file mode 100644 index 00000000000..bbf76e61257 --- /dev/null +++ b/tests/queries/0_stateless/01936_three_parts_identifiers_in_wrong_places.reference @@ -0,0 +1 @@ +still alive diff --git a/tests/queries/0_stateless/01936_three_parts_identifiers_in_wrong_places.sql b/tests/queries/0_stateless/01936_three_parts_identifiers_in_wrong_places.sql new file mode 100644 index 00000000000..d2ca771edc5 --- /dev/null +++ b/tests/queries/0_stateless/01936_three_parts_identifiers_in_wrong_places.sql @@ -0,0 +1,7 @@ +SELECT dictGet(t.nest.a, concat(currentDatabase(), '.dict.dict'), 's', number) FROM numbers(5); -- { serverError 47 } + +SELECT dictGetFloat64(t.b.s, 'database_for_dict.dict1', dictGetFloat64('Ta\0', toUInt64('databas\0_for_dict.dict1databas\0_for_dict.dict1', dictGetFloat64('', '', toUInt64(1048577), toDate(NULL)), NULL), toDate(dictGetFloat64(257, 'database_for_dict.dict1database_for_dict.dict1', '', toUInt64(NULL), 2, toDate(NULL)), '2019-05-2\0')), NULL, toUInt64(dictGetFloat64('', '', toUInt64(-9223372036854775808), toDate(NULL)), NULL)); -- { serverError 47 } + +SELECT NULL AND (2147483648 AND NULL) AND -2147483647, toUUID(((1048576 AND NULL) AND (2147483647 AND 257 AND NULL AND -2147483649) AND NULL) IN (test_01103.t1_distr.id), '00000000-e1fe-11e\0-bb8f\0853d60c00749'), stringToH3('89184926cc3ffff89184926cc3ffff89184926cc3ffff89184926cc3ffff89184926cc3ffff89184926cc3ffff89184926cc3ffff89184926cc3ffff'); -- { serverError 47 } + +SELECT 'still alive'; diff --git a/tests/queries/0_stateless/01937_nested_chinese.reference b/tests/queries/0_stateless/01937_nested_chinese.reference new file mode 100644 index 00000000000..54b6175d7fc --- /dev/null +++ b/tests/queries/0_stateless/01937_nested_chinese.reference @@ -0,0 +1,12 @@ +id String +products.产品 Array(Array(String)) +products.销量 Array(Array(Int32)) +id String +products.产品 Array(Array(String)) +products.销量 Array(Array(Int32)) +id String +products.产品 Array(String) +products.销量 Array(Int32) +p.产品 Array(String) +p.销量 Array(Int32) +0 diff --git a/tests/queries/0_stateless/01937_nested_chinese.sql b/tests/queries/0_stateless/01937_nested_chinese.sql new file mode 100644 index 00000000000..94c6598480e --- /dev/null +++ b/tests/queries/0_stateless/01937_nested_chinese.sql @@ -0,0 +1,8 @@ +CREATE TEMPORARY TABLE test (`id` String, `products` Nested (`产品` Array(String), `销量` Array(Int32))); + +DESCRIBE test; +DESCRIBE (SELECT * FROM test); +DESCRIBE (SELECT * FROM test ARRAY JOIN products); +DESCRIBE (SELECT p.`产品`, p.`销量` FROM test ARRAY JOIN products AS p); +SELECT * FROM test ARRAY JOIN products; +SELECT count() FROM (SELECT * FROM test ARRAY JOIN products); diff --git a/tests/queries/0_stateless/01938_joins_identifiers.reference b/tests/queries/0_stateless/01938_joins_identifiers.reference new file mode 100644 index 00000000000..4ce2f5c2505 --- /dev/null +++ b/tests/queries/0_stateless/01938_joins_identifiers.reference @@ -0,0 +1 @@ +0 0 1 diff --git a/tests/queries/0_stateless/01938_joins_identifiers.sql b/tests/queries/0_stateless/01938_joins_identifiers.sql new file mode 100644 index 00000000000..b518080b116 --- /dev/null +++ b/tests/queries/0_stateless/01938_joins_identifiers.sql @@ -0,0 +1,13 @@ +DROP TABLE IF EXISTS "/t0"; +DROP TABLE IF EXISTS "/t1"; + +create table "/t0" (a Int64, b Int64) engine = MergeTree() partition by a order by a; +create table "/t1" (a Int64, b Int64) engine = MergeTree() partition by a order by a; + +insert into "/t0" values (0, 0); +insert into "/t1" values (0, 1); + +select * from "/t0" join "/t1" using a; + +DROP TABLE "/t0"; +DROP TABLE "/t1"; diff --git a/tests/queries/0_stateless/arcadia_skip_list.txt b/tests/queries/0_stateless/arcadia_skip_list.txt index afd11cb5a7d..8453094cc65 100644 --- a/tests/queries/0_stateless/arcadia_skip_list.txt +++ b/tests/queries/0_stateless/arcadia_skip_list.txt @@ -251,3 +251,4 @@ 01924_argmax_bitmap_state 01914_exchange_dictionaries 01923_different_expression_name_alias +01932_null_valid_identifier diff --git a/tests/queries/skip_list.json b/tests/queries/skip_list.json index 7c1f998e91d..e0a96ef8ded 100644 --- a/tests/queries/skip_list.json +++ b/tests/queries/skip_list.json @@ -520,7 +520,10 @@ "01914_exchange_dictionaries", "01915_create_or_replace_dictionary", "01913_names_of_tuple_literal", - "01925_merge_prewhere_table" + "01925_merge_prewhere_table", + "01932_null_valid_identifier", + "01934_constexpr_aggregate_function_parameters", + "01932_alter_index_with_order" ], "parallel": [ @@ -851,6 +854,7 @@ "01913_replace_dictionary", "01914_exchange_dictionaries", "01915_create_or_replace_dictionary", - "01925_test_storage_merge_aliases" + "01925_test_storage_merge_aliases", + "01933_client_replxx_convert_history" /// Uses non unique history file ] } diff --git a/website/templates/index/community.html b/website/templates/index/community.html index a71e4097a68..28e9f12ce93 100644 --- a/website/templates/index/community.html +++ b/website/templates/index/community.html @@ -66,7 +66,7 @@
-