Merge branch 'master' into hive_style_partitioning

This commit is contained in:
Yarik Briukhovetskyi 2024-08-09 11:53:14 +02:00 committed by GitHub
commit aebb07884b
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
85 changed files with 1870 additions and 421 deletions

View File

@ -428,12 +428,17 @@ if (NOT SANITIZE)
set (CMAKE_POSITION_INDEPENDENT_CODE OFF)
endif()
if (OS_LINUX AND NOT (ARCH_AARCH64 OR ARCH_S390X) AND NOT SANITIZE)
# Slightly more efficient code can be generated
# It's disabled for ARM because otherwise ClickHouse cannot run on Android.
if (NOT OS_ANDROID AND OS_LINUX AND NOT ARCH_S390X AND NOT SANITIZE)
# Using '-no-pie' builds executables with fixed addresses, resulting in slightly more efficient code
# and keeping binary addresses constant even with ASLR enabled.
# Disabled on Android as it requires PIE: https://source.android.com/docs/security/enhancements#android-5
# Disabled on IBM S390X due to build issues with 'no-pie'
# Disabled with sanitizers to avoid issues with maximum relocation size: https://github.com/ClickHouse/ClickHouse/pull/49145
set (CMAKE_CXX_FLAGS_RELWITHDEBINFO "${CMAKE_CXX_FLAGS_RELWITHDEBINFO} -fno-pie")
set (CMAKE_C_FLAGS_RELWITHDEBINFO "${CMAKE_C_FLAGS_RELWITHDEBINFO} -fno-pie")
set (CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -no-pie -Wl,-no-pie")
else ()
message (WARNING "ClickHouse is built as PIE, system.trace_log will contain invalid addresses after server restart.")
endif ()
if (ENABLE_TESTS)

View File

@ -71,7 +71,6 @@ add_contrib (zlib-ng-cmake zlib-ng)
add_contrib (bzip2-cmake bzip2)
add_contrib (minizip-ng-cmake minizip-ng)
add_contrib (snappy-cmake snappy)
add_contrib (rocksdb-cmake rocksdb)
add_contrib (thrift-cmake thrift)
# parquet/arrow/orc
add_contrib (arrow-cmake arrow) # requires: snappy, thrift, double-conversion
@ -148,6 +147,7 @@ add_contrib (hive-metastore-cmake hive-metastore) # requires: thrift, avro, arro
add_contrib (cppkafka-cmake cppkafka)
add_contrib (libpqxx-cmake libpqxx)
add_contrib (libpq-cmake libpq)
add_contrib (rocksdb-cmake rocksdb) # requires: jemalloc, snappy, zlib, lz4, zstd, liburing
add_contrib (nuraft-cmake NuRaft)
add_contrib (fast_float-cmake fast_float)
add_contrib (idna-cmake idna)

2
contrib/librdkafka vendored

@ -1 +1 @@
Subproject commit 2d2aab6f5b79db1cfca15d7bf0dee75d00d82082
Subproject commit 39d4ed49ccf3406e2bf825d5d7b0903b5a290782

2
contrib/rocksdb vendored

@ -1 +1 @@
Subproject commit 49ce8a1064dd1ad89117899839bf136365e49e79
Subproject commit 5f003e4a22d2e48e37c98d9620241237cd30dd24

View File

@ -5,36 +5,38 @@ if (NOT ENABLE_ROCKSDB OR NO_SSE3_OR_HIGHER) # assumes SSE4.2 and PCLMUL
return()
endif()
# not in original build system, otherwise xxHash.cc fails to compile with ClickHouse C++23 default
set (CMAKE_CXX_STANDARD 20)
# Always disable jemalloc for rocksdb by default because it introduces non-standard jemalloc APIs
option(WITH_JEMALLOC "build with JeMalloc" OFF)
option(WITH_LIBURING "build with liburing" OFF) # TODO could try to enable this conditionally, depending on ClickHouse's ENABLE_LIBURING
# ClickHouse cannot be compiled without snappy, lz4, zlib, zstd
option(WITH_SNAPPY "build with SNAPPY" ON)
option(WITH_LZ4 "build with lz4" ON)
option(WITH_ZLIB "build with zlib" ON)
option(WITH_ZSTD "build with zstd" ON)
if(WITH_SNAPPY)
if (ENABLE_JEMALLOC AND OS_LINUX) # gives compile errors with jemalloc enabled for rocksdb on non-Linux
add_definitions(-DROCKSDB_JEMALLOC -DJEMALLOC_NO_DEMANGLE)
list (APPEND THIRDPARTY_LIBS ch_contrib::jemalloc)
endif ()
if (ENABLE_LIBURING)
add_definitions(-DROCKSDB_IOURING_PRESENT)
list (APPEND THIRDPARTY_LIBS ch_contrib::liburing)
endif ()
if (WITH_SNAPPY)
add_definitions(-DSNAPPY)
list(APPEND THIRDPARTY_LIBS ch_contrib::snappy)
endif()
if(WITH_ZLIB)
if (WITH_ZLIB)
add_definitions(-DZLIB)
list(APPEND THIRDPARTY_LIBS ch_contrib::zlib)
endif()
if(WITH_LZ4)
if (WITH_LZ4)
add_definitions(-DLZ4)
list(APPEND THIRDPARTY_LIBS ch_contrib::lz4)
endif()
if(WITH_ZSTD)
if (WITH_ZSTD)
add_definitions(-DZSTD)
list(APPEND THIRDPARTY_LIBS ch_contrib::zstd)
endif()
@ -88,6 +90,7 @@ set(SOURCES
${ROCKSDB_SOURCE_DIR}/cache/sharded_cache.cc
${ROCKSDB_SOURCE_DIR}/cache/tiered_secondary_cache.cc
${ROCKSDB_SOURCE_DIR}/db/arena_wrapped_db_iter.cc
${ROCKSDB_SOURCE_DIR}/db/attribute_group_iterator_impl.cc
${ROCKSDB_SOURCE_DIR}/db/blob/blob_contents.cc
${ROCKSDB_SOURCE_DIR}/db/blob/blob_fetcher.cc
${ROCKSDB_SOURCE_DIR}/db/blob/blob_file_addition.cc
@ -104,6 +107,7 @@ set(SOURCES
${ROCKSDB_SOURCE_DIR}/db/blob/prefetch_buffer_collection.cc
${ROCKSDB_SOURCE_DIR}/db/builder.cc
${ROCKSDB_SOURCE_DIR}/db/c.cc
${ROCKSDB_SOURCE_DIR}/db/coalescing_iterator.cc
${ROCKSDB_SOURCE_DIR}/db/column_family.cc
${ROCKSDB_SOURCE_DIR}/db/compaction/compaction.cc
${ROCKSDB_SOURCE_DIR}/db/compaction/compaction_iterator.cc
@ -124,6 +128,7 @@ set(SOURCES
${ROCKSDB_SOURCE_DIR}/db/db_impl/db_impl_write.cc
${ROCKSDB_SOURCE_DIR}/db/db_impl/db_impl_compaction_flush.cc
${ROCKSDB_SOURCE_DIR}/db/db_impl/db_impl_files.cc
${ROCKSDB_SOURCE_DIR}/db/db_impl/db_impl_follower.cc
${ROCKSDB_SOURCE_DIR}/db/db_impl/db_impl_open.cc
${ROCKSDB_SOURCE_DIR}/db/db_impl/db_impl_debug.cc
${ROCKSDB_SOURCE_DIR}/db/db_impl/db_impl_experimental.cc
@ -181,6 +186,7 @@ set(SOURCES
${ROCKSDB_SOURCE_DIR}/env/env_encryption.cc
${ROCKSDB_SOURCE_DIR}/env/file_system.cc
${ROCKSDB_SOURCE_DIR}/env/file_system_tracer.cc
${ROCKSDB_SOURCE_DIR}/env/fs_on_demand.cc
${ROCKSDB_SOURCE_DIR}/env/fs_remap.cc
${ROCKSDB_SOURCE_DIR}/env/mock_env.cc
${ROCKSDB_SOURCE_DIR}/env/unique_id_gen.cc
@ -368,6 +374,7 @@ set(SOURCES
${ROCKSDB_SOURCE_DIR}/utilities/persistent_cache/volatile_tier_impl.cc
${ROCKSDB_SOURCE_DIR}/utilities/simulator_cache/cache_simulator.cc
${ROCKSDB_SOURCE_DIR}/utilities/simulator_cache/sim_cache.cc
${ROCKSDB_SOURCE_DIR}/utilities/table_properties_collectors/compact_for_tiering_collector.cc
${ROCKSDB_SOURCE_DIR}/utilities/table_properties_collectors/compact_on_deletion_collector.cc
${ROCKSDB_SOURCE_DIR}/utilities/trace/file_trace_reader_writer.cc
${ROCKSDB_SOURCE_DIR}/utilities/trace/replayer_impl.cc
@ -388,6 +395,7 @@ set(SOURCES
${ROCKSDB_SOURCE_DIR}/utilities/transactions/write_prepared_txn_db.cc
${ROCKSDB_SOURCE_DIR}/utilities/transactions/write_unprepared_txn.cc
${ROCKSDB_SOURCE_DIR}/utilities/transactions/write_unprepared_txn_db.cc
${ROCKSDB_SOURCE_DIR}/utilities/types_util.cc
${ROCKSDB_SOURCE_DIR}/utilities/ttl/db_ttl_impl.cc
${ROCKSDB_SOURCE_DIR}/utilities/wal_filter.cc
${ROCKSDB_SOURCE_DIR}/utilities/write_batch_with_index/write_batch_with_index.cc
@ -418,14 +426,18 @@ if(HAS_ARMV8_CRC)
endif(HAS_ARMV8_CRC)
list(APPEND SOURCES
"${ROCKSDB_SOURCE_DIR}/port/port_posix.cc"
"${ROCKSDB_SOURCE_DIR}/env/env_posix.cc"
"${ROCKSDB_SOURCE_DIR}/env/fs_posix.cc"
"${ROCKSDB_SOURCE_DIR}/env/io_posix.cc")
${ROCKSDB_SOURCE_DIR}/port/port_posix.cc
${ROCKSDB_SOURCE_DIR}/env/env_posix.cc
${ROCKSDB_SOURCE_DIR}/env/fs_posix.cc
${ROCKSDB_SOURCE_DIR}/env/io_posix.cc)
add_library(_rocksdb ${SOURCES})
add_library(ch_contrib::rocksdb ALIAS _rocksdb)
target_link_libraries(_rocksdb PRIVATE ${THIRDPARTY_LIBS} ${SYSTEM_LIBS})
# Not in the native build system but useful anyways:
# Make all functions in xxHash.h inline. Beneficial for performance: https://github.com/Cyan4973/xxHash/tree/v0.8.2#build-modifiers
target_compile_definitions (_rocksdb PRIVATE XXH_INLINE_ALL)
# SYSTEM is required to overcome some issues
target_include_directories(_rocksdb SYSTEM BEFORE INTERFACE "${ROCKSDB_SOURCE_DIR}/include")

View File

@ -232,15 +232,26 @@ function run_tests()
set +e
TEST_ARGS=(
-j 2
--testname
--shard
--zookeeper
--check-zookeeper-session
--no-stateless
--hung-check
--print-time
--capture-client-stacktrace
"${ADDITIONAL_OPTIONS[@]}"
"$SKIP_TESTS_OPTION"
)
if [[ -n "$USE_PARALLEL_REPLICAS" ]] && [[ "$USE_PARALLEL_REPLICAS" -eq 1 ]]; then
clickhouse-test --client="clickhouse-client --allow_experimental_parallel_reading_from_replicas=1 --parallel_replicas_for_non_replicated_merge_tree=1 \
--max_parallel_replicas=100 --cluster_for_parallel_replicas='parallel_replicas'" \
-j 2 --testname --shard --zookeeper --check-zookeeper-session --no-stateless --no-parallel-replicas --hung-check --print-time "${ADDITIONAL_OPTIONS[@]}" \
"$SKIP_TESTS_OPTION" 2>&1 | ts '%Y-%m-%d %H:%M:%S' | tee test_output/test_result.txt
else
clickhouse-test -j 2 --testname --shard --zookeeper --check-zookeeper-session --no-stateless --hung-check --print-time "${ADDITIONAL_OPTIONS[@]}" \
"$SKIP_TESTS_OPTION" 2>&1 | ts '%Y-%m-%d %H:%M:%S' | tee test_output/test_result.txt
TEST_ARGS+=(
--client="clickhouse-client --allow_experimental_parallel_reading_from_replicas=1 --parallel_replicas_for_non_replicated_merge_tree=1 --max_parallel_replicas=100 --cluster_for_parallel_replicas='parallel_replicas'"
--no-parallel-replicas
)
fi
clickhouse-test "${TEST_ARGS[@]}" 2>&1 | ts '%Y-%m-%d %H:%M:%S' | tee test_output/test_result.txt
set -e
}

View File

@ -264,11 +264,22 @@ function run_tests()
TIMEOUT=$((MAX_RUN_TIME - 800 > 8400 ? 8400 : MAX_RUN_TIME - 800))
START_TIME=${SECONDS}
set +e
timeout --preserve-status --signal TERM --kill-after 60m ${TIMEOUT}s \
clickhouse-test --testname --shard --zookeeper --check-zookeeper-session --hung-check --print-time \
--no-drop-if-fail --test-runs "$NUM_TRIES" "${ADDITIONAL_OPTIONS[@]}" 2>&1 \
| ts '%Y-%m-%d %H:%M:%S' \
| tee -a test_output/test_result.txt
TEST_ARGS=(
--testname
--shard
--zookeeper
--check-zookeeper-session
--hung-check
--print-time
--no-drop-if-fail
--capture-client-stacktrace
--test-runs "$NUM_TRIES"
"${ADDITIONAL_OPTIONS[@]}"
)
timeout --preserve-status --signal TERM --kill-after 60m ${TIMEOUT}s clickhouse-test "${TEST_ARGS[@]}" 2>&1 \
| ts '%Y-%m-%d %H:%M:%S' \
| tee -a test_output/test_result.txt
set -e
DURATION=$((SECONDS - START_TIME))

View File

@ -61,6 +61,7 @@ Engines in the family:
- [RabbitMQ](../../engines/table-engines/integrations/rabbitmq.md)
- [PostgreSQL](../../engines/table-engines/integrations/postgresql.md)
- [S3Queue](../../engines/table-engines/integrations/s3queue.md)
- [TimeSeries](../../engines/table-engines/integrations/time-series.md)
### Special Engines {#special-engines}

View File

@ -0,0 +1,295 @@
---
slug: /en/engines/table-engines/special/time_series
sidebar_position: 60
sidebar_label: TimeSeries
---
# TimeSeries Engine [Experimental]
A table engine storing time series, i.e. a set of values associated with timestamps and tags (or labels):
```
metric_name1[tag1=value1, tag2=value2, ...] = {timestamp1: value1, timestamp2: value2, ...}
metric_name2[...] = ...
```
:::info
This is an experimental feature that may change in backwards-incompatible ways in the future releases.
Enable usage of the TimeSeries table engine
with [allow_experimental_time_series_table](../../../operations/settings/settings.md#allow-experimental-time-series-table) setting.
Input the command `set allow_experimental_time_series_table = 1`.
:::
## Syntax {#syntax}
``` sql
CREATE TABLE name [(columns)] ENGINE=TimeSeries
[SETTINGS var1=value1, ...]
[DATA db.data_table_name | DATA ENGINE data_table_engine(arguments)]
[TAGS db.tags_table_name | TAGS ENGINE tags_table_engine(arguments)]
[METRICS db.metrics_table_name | METRICS ENGINE metrics_table_engine(arguments)]
```
## Usage {#usage}
It's easier to start with everything set by default (it's allowed to create a `TimeSeries` table without specifying a list of columns):
``` sql
CREATE TABLE my_table ENGINE=TimeSeries
```
Then this table can be used with the following protocols (a port must be assigned in the server configuration):
- [prometheus remote-write](../../../interfaces/prometheus.md#remote-write)
- [prometheus remote-read](../../../interfaces/prometheus.md#remote-read)
## Target tables {#target-tables}
A `TimeSeries` table doesn't have its own data, everything is stored in its target tables.
This is similar to how a [materialized view](../../../sql-reference/statements/create/view#materialized-view) works,
with the difference that a materialized view has one target table
whereas a `TimeSeries` table has three target tables named [data]{#data-table}, [tags]{#tags-table], and [metrics]{#metrics-table}.
The target tables can be either specified explicitly in the `CREATE TABLE` query
or the `TimeSeries` table engine can generate inner target tables automatically.
The target tables are the following:
1. The _data_ table {#data-table} contains time series associated with some identifier.
The _data_ table must have columns:
| Name | Mandatory? | Default type | Possible types | Description |
|---|---|---|---|---|
| `id` | [x] | `UUID` | any | Identifies a combination of a metric names and tags |
| `timestamp` | [x] | `DateTime64(3)` | `DateTime64(X)` | A time point |
| `value` | [x] | `Float64` | `Float32` or `Float64` | A value associated with the `timestamp` |
2. The _tags_ table {#tags-table} contains identifiers calculated for each combination of a metric name and tags.
The _tags_ table must have columns:
| Name | Mandatory? | Default type | Possible types | Description |
|---|---|---|---|---|
| `id` | [x] | `UUID` | any (must match the type of `id` in the [data]{#data-table} table) | An `id` identifies a combination of a metric name and tags. The DEFAULT expression specifies how to calculate such an identifier |
| `metric_name` | [x] | `LowCardinality(String)` | `String` or `LowCardinality(String)` | The name of a metric |
| `<tag_value_column>` | [ ] | `String` | `String` or `LowCardinality(String)` or `LowCardinality(Nullable(String))` | The value of a specific tag, the tag's name and the name of a corresponding column are specified in the [tags_to_columns](#settings) setting |
| `tags` | [x] | `Map(LowCardinality(String), String)` | `Map(String, String)` or `Map(LowCardinality(String), String)` or `Map(LowCardinality(String), LowCardinality(String))` | Map of tags excluding the tag `__name__` containing the name of a metric and excluding tags with names enumerated in the [tags_to_columns](#settings) setting |
| `all_tags` | [ ] | `Map(String, String)` | `Map(String, String)` or `Map(LowCardinality(String), String)` or `Map(LowCardinality(String), LowCardinality(String))` | Ephemeral column, each row is a map of all the tags excluding only the tag `__name__` containing the name of a metric. The only purpose of that column is to be used while calculating `id` |
| `min_time` | [ ] | `Nullable(DateTime64(3))` | `DateTime64(X)` or `Nullable(DateTime64(X))` | Minimum timestamp of time series with that `id`. The column is created if [store_min_time_and_max_time](#settings) is `true` |
| `max_time` | [ ] | `Nullable(DateTime64(3))` | `DateTime64(X)` or `Nullable(DateTime64(X))` | Maximum timestamp of time series with that `id`. The column is created if [store_min_time_and_max_time](#settings) is `true` |
3. The _metrics_ table {#metrics-table} contains some information about metrics been collected, the types of those metrics and their descriptions.
The _metrics_ table must have columns:
| Name | Mandatory? | Default type | Possible types | Description |
|---|---|---|---|---|
| `metric_family_name` | [x] | `String` | `String` or `LowCardinality(String)` | The name of a metric family |
| `type` | [x] | `String` | `String` or `LowCardinality(String)` | The type of a metric family, one of "counter", "gauge", "summary", "stateset", "histogram", "gaugehistogram" |
| `unit` | [x] | `String` | `String` or `LowCardinality(String)` | The unit used in a metric |
| `help` | [x] | `String` | `String` or `LowCardinality(String)` | The description of a metric |
Any row inserted into a `TimeSeries` table will be in fact stored in those three target tables.
A `TimeSeries` table contains all those columns from the [data]{#data-table}, [tags]{#tags-table}, [metrics]{#metrics-table} tables.
## Creation {#creation}
There are multiple ways to create a table with the `TimeSeries` table engine.
The simplest statement
``` sql
CREATE TABLE my_table ENGINE=TimeSeries
```
will actually create the following table (you can see that by executing `SHOW CREATE TABLE my_table`):
``` sql
CREATE TABLE my_table
(
`id` UUID DEFAULT reinterpretAsUUID(sipHash128(metric_name, all_tags)),
`timestamp` DateTime64(3),
`value` Float64,
`metric_name` LowCardinality(String),
`tags` Map(LowCardinality(String), String),
`all_tags` Map(String, String),
`min_time` Nullable(DateTime64(3)),
`max_time` Nullable(DateTime64(3)),
`metric_family_name` String,
`type` String,
`unit` String,
`help` String
)
ENGINE = TimeSeries
DATA ENGINE = MergeTree ORDER BY (id, timestamp)
DATA INNER UUID '01234567-89ab-cdef-0123-456789abcdef'
TAGS ENGINE = AggregatingMergeTree PRIMARY KEY metric_name ORDER BY (metric_name, id)
TAGS INNER UUID '01234567-89ab-cdef-0123-456789abcdef'
METRICS ENGINE = ReplacingMergeTree ORDER BY metric_family_name
METRICS INNER UUID '01234567-89ab-cdef-0123-456789abcdef'
```
So the columns were generated automatically and also there are three inner UUIDs in this statement -
one per each inner target table that was created.
(Inner UUIDs are not shown normally until setting
[show_table_uuid_in_table_create_query_if_not_nil](../../../operations/settings/settings#show_table_uuid_in_table_create_query_if_not_nil)
is set.)
Inner target tables have names like `.inner_id.data.xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx`,
`.inner_id.tags.xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx`, `.inner_id.metrics.xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx`
and each target table has columns which is a subset of the columns of the main `TimeSeries` table:
``` sql
CREATE TABLE default.`.inner_id.data.xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx`
(
`id` UUID,
`timestamp` DateTime64(3),
`value` Float64
)
ENGINE = MergeTree
ORDER BY (id, timestamp)
```
``` sql
CREATE TABLE default.`.inner_id.tags.xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx`
(
`id` UUID DEFAULT reinterpretAsUUID(sipHash128(metric_name, all_tags)),
`metric_name` LowCardinality(String),
`tags` Map(LowCardinality(String), String),
`all_tags` Map(String, String) EPHEMERAL,
`min_time` SimpleAggregateFunction(min, Nullable(DateTime64(3))),
`max_time` SimpleAggregateFunction(max, Nullable(DateTime64(3)))
)
ENGINE = AggregatingMergeTree
PRIMARY KEY metric_name
ORDER BY (metric_name, id)
```
``` sql
CREATE TABLE default.`.inner_id.metrics.xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx`
(
`metric_family_name` String,
`type` String,
`unit` String,
`help` String
)
ENGINE = ReplacingMergeTree
ORDER BY metric_family_name
```
## Adjusting types of columns {#adjusting-column-types}
You can adjust the types of almost any column of the inner target tables by specifying them explicitly
while defining the main table. For example,
``` sql
CREATE TABLE my_table
(
timestamp DateTime64(6)
) ENGINE=TimeSeries
```
will make the inner [data]{#data-table} table store timestamp in microseconds instead of milliseconds:
``` sql
CREATE TABLE default.`.inner_id.data.xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx`
(
`id` UUID,
`timestamp` DateTime64(6),
`value` Float64
)
ENGINE = MergeTree
ORDER BY (id, timestamp)
```
## The `id` column {#id-column}
The `id` column contains identifiers, every identifier is calculated for a combination of a metric name and tags.
The DEFAULT expression for the `id` column is an expression which will be used to calculate such identifiers.
Both the type of the `id` column and that expression can be adjusted by specifying them explicitly:
``` sql
CREATE TABLE my_table
(
id UInt64 DEFAULT sipHash64(metric_name, all_tags)
) ENGINE=TimeSeries
```
## The `tags` and `all_tags` columns {#tags-and-all-tags}
There are two columns containing maps of tags - `tags` and `all_tags`. In this example they mean the same, however they can be different
if setting `tags_to_columns` is used. This setting allows to specify that a specific tag should be stored in a separate column instead of storing
in a map inside the `tags` column:
``` sql
CREATE TABLE my_table ENGINE=TimeSeries SETTINGS = {'instance': 'instance', 'job': 'job'}
```
This statement will add columns
```
`instance` String,
`job` String
```
to the definition of both `my_table` and its inner [tags]{#tags-table} target table. In this case the `tags` column will not contain tags `instance` and `job`,
but the `all_tags` column will contain them. The `all_tags` column is ephemeral and its only purpose to be used in the DEFAULT expression
for the `id` column.
The types of columns can be adjusted by specifying them explicitly:
``` sql
CREATE TABLE my_table (instance LowCardinality(String), job LowCardinality(Nullable(String)))
ENGINE=TimeSeries SETTINGS = {'instance': 'instance', 'job': 'job'}
```
## Table engines of inner target tables {#inner-table-engines}
By default inner target tables use the following table engines:
- the [data]{#data-table} table uses [MergeTree](../mergetree-family/mergetree);
- the [tags]{#tags-table} table uses [AggregatingMergeTree](../mergetree-family/aggregatingmergetree) because the same data is often inserted multiple times to this table so we need a way
to remove duplicates, and also because it's required to do aggregation for columns `min_time` and `max_time`;
- the [metrics]{#metrics-table} table uses [ReplacingMergeTree](../mergetree-family/replacingmergetree) because the same data is often inserted multiple times to this table so we need a way
to remove duplicates.
Other table engines also can be used for inner target tables if it's specified so:
``` sql
CREATE TABLE my_table ENGINE=TimeSeries
DATA ENGINE=ReplicatedMergeTree
TAGS ENGINE=ReplicatedAggregatingMergeTree
METRICS ENGINE=ReplicatedReplacingMergeTree
```
## External target tables {#external-target-tables}
It's possible to make a `TimeSeries` table use a manually created table:
``` sql
CREATE TABLE data_for_my_table
(
`id` UUID,
`timestamp` DateTime64(3),
`value` Float64
)
ENGINE = MergeTree
ORDER BY (id, timestamp);
CREATE TABLE tags_for_my_table ...
CREATE TABLE metrics_for_my_table ...
CREATE TABLE my_table ENGINE=TimeSeries DATA data_for_my_table TAGS tags_for_my_table METRICS metrics_for_my_table;
```
## Settings {#settings}
Here is a list of settings which can be specified while defining a `TimeSeries` table:
| Name | Type | Default | Description |
|---|---|---|---|
| `tags_to_columns` | Map | {} | Map specifying which tags should be put to separate columns in the [tags]{#tags-table} table. Syntax: `{'tag1': 'column1', 'tag2' : column2, ...}` |
| `use_all_tags_column_to_generate_id` | Bool | true | When generating an expression to calculate an identifier of a time series, this flag enables using the `all_tags` column in that calculation |
| `store_min_time_and_max_time` | Bool | true | If set to true then the table will store `min_time` and `max_time` for each time series |
| `aggregate_min_time_and_max_time` | Bool | true | When creating an inner target `tags` table, this flag enables using `SimpleAggregateFunction(min, Nullable(DateTime64(3)))` instead of just `Nullable(DateTime64(3))` as the type of the `min_time` column, and the same for the `max_time` column |
| `filter_by_min_time_and_max_time` | Bool | true | If set to true then the table will use the `min_time` and `max_time` columns for filtering time series |
# Functions {#functions}
Here is a list of functions supporting a `TimeSeries` table as an argument:
- [timeSeriesData](../../../sql-reference/table-functions/timeSeriesData.md)
- [timeSeriesTags](../../../sql-reference/table-functions/timeSeriesTags.md)
- [timeSeriesMetrics](../../../sql-reference/table-functions/timeSeriesMetrics.md)

View File

@ -1005,7 +1005,7 @@ They can be used for prewhere optimization only if we enable `set allow_statisti
## Column-level Settings {#column-level-settings}
Certain MergeTree settings can be override at column level:
Certain MergeTree settings can be overridden at column level:
- `max_compress_block_size` — Maximum size of blocks of uncompressed data before compressing for writing to a table.
- `min_compress_block_size` — Minimum size of blocks of uncompressed data required for compression when writing the next mark.

View File

@ -0,0 +1,160 @@
---
slug: /en/interfaces/prometheus
sidebar_position: 19
sidebar_label: Prometheus protocols
---
# Prometheus protocols
## Exposing metrics {#expose}
:::note
ClickHouse Cloud does not currently support connecting to Prometheus. To be notified when this feature is supported, please contact support@clickhouse.com.
:::
ClickHouse can expose its own metrics for scraping from Prometheus:
```xml
<prometheus>
<port>9363</port>
<endpoint>/metrics</endpoint>
<metrics>true</metrics>
<asynchronous_metrics>true</asynchronous_metrics>
<events>true</events>
<errors>true</errors>
</prometheus>
Section `<prometheus.handlers>` can be used to make more extended handlers.
This section is similar to [<http_handlers>](/en/interfaces/http) but works for prometheus protocols:
```xml
<prometheus>
<port>9363</port>
<handlers>
<my_rule_1>
<url>/metrics</url>
<handler>
<type>expose_metrics</type>
<metrics>true</metrics>
<asynchronous_metrics>true</asynchronous_metrics>
<events>true</events>
<errors>true</errors>
</handler>
</my_rule_1>
</handlers>
</prometheus>
```
Settings:
| Name | Default | Description |
|---|---|---|---|
| `port` | none | Port for serving the exposing metrics protocol. |
| `endpoint` | `/metrics` | HTTP endpoint for scraping metrics by prometheus server. Starts with `/`. Should not be used with the `<handlers>` section. |
| `url` / `headers` / `method` | none | Filters used to find a matching handler for a request. Similar to the fields with the same names in the [<http_handlers>](/en/interfaces/http) section. |
| `metrics` | true | Expose metrics from the [system.metrics](/en/operations/system-tables/metrics) table. |
| `asynchronous_metrics` | true | Expose current metrics values from the [system.asynchronous_metrics](/en/operations/system-tables/asynchronous_metrics) table. |
| `events` | true | Expose metrics from the [system.events](/en/operations/system-tables/events) table. |
| `errors` | true | Expose the number of errors by error codes occurred since the last server restart. This information could be obtained from the [system.errors](/en/operations/system-tables/errors) as well. |
Check (replace `127.0.0.1` with the IP addr or hostname of your ClickHouse server):
```bash
curl 127.0.0.1:9363/metrics
```
## Remote-write protocol {#remote-write}
ClickHouse supports the [remote-write](https://prometheus.io/docs/specs/remote_write_spec/) protocol.
Data are received by this protocol and written to a [TimeSeries](/en/engines/table-engines/special/time_series) table
(which should be created beforehand).
```xml
<prometheus>
<port>9363</port>
<handlers>
<my_rule_1>
<url>/write</url>
<handler>
<type>remote_write</type
<database>db_name</database>
<table>time_series_table</table>
</handler>
</my_rule_1>
</handlers>
</prometheus>
```
Settings:
| Name | Default | Description |
|---|---|---|---|
| `port` | none | Port for serving the `remote-write` protocol. |
| `url` / `headers` / `method` | none | Filters used to find a matching handler for a request. Similar to the fields with the same names in the [<http_handlers>](/en/interfaces/http) section. |
| `table` | none | The name of a [TimeSeries](/en/engines/table-engines/special/time_series) table to write data received by the `remote-write` protocol. This name can optionally contain the name of a database too. |
| `database` | none | The name of a database where the table specified in the `table` setting is located if it's not specified in the `table` setting. |
## Remote-read protocol {#remote-read}
ClickHouse supports the [remote-read](https://prometheus.io/docs/prometheus/latest/querying/remote_read_api/) protocol.
Data are read from a [TimeSeries](/en/engines/table-engines/special/time_series) table and sent via this protocol.
```xml
<prometheus>
<port>9363</port>
<handlers>
<my_rule_1>
<url>/read</url>
<handler>
<type>remote_read</type
<database>db_name</database>
<table>time_series_table</table>
</handler>
</my_rule_1>
</handlers>
</prometheus>
```
Settings:
| Name | Default | Description |
|---|---|---|---|
| `port` | none | Port for serving the `remote-read` protocol. |
| `url` / `headers` / `method` | none | Filters used to find a matching handler for a request. Similar to the fields with the same names in the [<http_handlers>](/en/interfaces/http) section. |
| `table` | none | The name of a [TimeSeries](/en/engines/table-engines/special/time_series) table to read data to send by the `remote-read` protocol. This name can optionally contain the name of a database too. |
| `database` | none | The name of a database where the table specified in the `table` setting is located if it's not specified in the `table` setting. |
## Configuration for multiple protocols {#multiple-protocols}
Multiple protocols can be specified together in one place:
```xml
<prometheus>
<port>9363</port>
<handlers>
<my_rule_1>
<url>/metrics</url>
<handler>
<type>expose_metrics</type>
<metrics>true</metrics>
<asynchronous_metrics>true</asynchronous_metrics>
<events>true</events>
<errors>true</errors>
</handler>
</my_rule_1>
<my_rule_2>
<url>/write</url>
<handler>
<type>remote_write</type
<table>db_name.time_series_table</table>
</handler>
</my_rule_2>
<my_rule_3>
<url>/read</url>
<handler>
<type>remote_read</type
<table>db_name.time_series_table</table>
</handler>
</my_rule_3>
</handlers>
</prometheus>
```

View File

@ -2112,48 +2112,6 @@ The trailing slash is mandatory.
<path>/var/lib/clickhouse/</path>
```
## Prometheus {#prometheus}
:::note
ClickHouse Cloud does not currently support connecting to Prometheus. To be notified when this feature is supported, please contact support@clickhouse.com.
:::
Exposing metrics data for scraping from [Prometheus](https://prometheus.io).
Settings:
- `endpoint` HTTP endpoint for scraping metrics by prometheus server. Start from /.
- `port` Port for `endpoint`.
- `metrics` Expose metrics from the [system.metrics](../../operations/system-tables/metrics.md#system_tables-metrics) table.
- `events` Expose metrics from the [system.events](../../operations/system-tables/events.md#system_tables-events) table.
- `asynchronous_metrics` Expose current metrics values from the [system.asynchronous_metrics](../../operations/system-tables/asynchronous_metrics.md#system_tables-asynchronous_metrics) table.
- `errors` - Expose the number of errors by error codes occurred since the last server restart. This information could be obtained from the [system.errors](../../operations/system-tables/asynchronous_metrics.md#system_tables-errors) as well.
**Example**
``` xml
<clickhouse>
<listen_host>0.0.0.0</listen_host>
<http_port>8123</http_port>
<tcp_port>9000</tcp_port>
<!-- highlight-start -->
<prometheus>
<endpoint>/metrics</endpoint>
<port>9363</port>
<metrics>true</metrics>
<events>true</events>
<asynchronous_metrics>true</asynchronous_metrics>
<errors>true</errors>
</prometheus>
<!-- highlight-end -->
</clickhouse>
```
Check (replace `127.0.0.1` with the IP addr or hostname of your ClickHouse server):
```bash
curl 127.0.0.1:9363/metrics
```
## query_log {#query-log}
Setting for logging queries received with the [log_queries=1](../../operations/settings/settings.md) setting.

View File

@ -5631,4 +5631,15 @@ Default value: `false`.
When enabled, ClickHouse will detect Hive-style partitioning in path (`/name=value/`) in file-like table engines [File](../../engines/table-engines/special/file.md#hive-style-partitioning)/[S3](../../engines/table-engines/integrations/s3.md#hive-style-partitioning)/[URL](../../engines/table-engines/special/url.md#hive-style-partitioning)/[HDFS](../../engines/table-engines/integrations/hdfs.md#hive-style-partitioning)/[AzureBlobStorage](../../engines/table-engines/integrations/azureBlobStorage.md#hive-style-partitioning) and will allow to use partition columns as virtual columns in the query. These virtual columns will have the same names as in the partitioned path, but starting with `_`.
Default value: `false`.
## allow_experimental_time_series_table {#allow-experimental-time-series-table}
Allows creation of tables with the [TimeSeries](../../engines/table-engines/integrations/time-series.md) table engine.
Possible values:
- 0 — the [TimeSeries](../../engines/table-engines/integrations/time-series.md) table engine is disabled.
- 1 — the [TimeSeries](../../engines/table-engines/integrations/time-series.md) table engine is enabled.
Default value: `0`.

View File

@ -9,6 +9,7 @@ The following operations with [partitions](/docs/en/engines/table-engines/merget
- [DETACH PARTITION\|PART](#detach-partitionpart) — Moves a partition or part to the `detached` directory and forget it.
- [DROP PARTITION\|PART](#drop-partitionpart) — Deletes a partition or part.
- [DROP DETACHED PARTITION\|PART](#drop-detached-partitionpart) - Delete a part or all parts of a partition from `detached`.
- [FORGET PARTITION](#forget-partition) — Deletes a partition metadata from zookeeper if it's empty.
- [ATTACH PARTITION\|PART](#attach-partitionpart) — Adds a partition or part from the `detached` directory to the table.
- [ATTACH PARTITION FROM](#attach-partition-from) — Copies the data partition from one table to another and adds.
@ -68,7 +69,7 @@ ALTER TABLE mt DROP PART 'all_4_4_0';
## DROP DETACHED PARTITION\|PART
``` sql
ALTER TABLE table_name [ON CLUSTER cluster] DROP DETACHED PARTITION|PART partition_expr
ALTER TABLE table_name [ON CLUSTER cluster] DROP DETACHED PARTITION|PART ALL|partition_expr
```
Removes the specified part or all parts of the specified partition from `detached`.

View File

@ -0,0 +1,36 @@
---
slug: /en/sql-reference/table-functions/fuzzQuery
sidebar_position: 75
sidebar_label: fuzzQuery
---
# fuzzQuery
Perturbs the given query string with random variations.
``` sql
fuzzQuery(query[, max_query_length[, random_seed]])
```
**Arguments**
- `query` (String) - The source query to perform the fuzzing on.
- `max_query_length` (UInt64) - A maximum length the query can get during the fuzzing process.
- `random_seed` (UInt64) - A random seed for producing stable results.
**Returned Value**
A table object with a single column containing perturbed query strings.
## Usage Example
``` sql
SELECT * FROM fuzzQuery('SELECT materialize(\'a\' AS key) GROUP BY key') LIMIT 2;
```
```
┌─query──────────────────────────────────────────────────────────┐
1. │ SELECT 'a' AS key GROUP BY key │
2. │ EXPLAIN PIPELINE compact = true SELECT 'a' AS key GROUP BY key │
└────────────────────────────────────────────────────────────────┘
```

View File

@ -0,0 +1,28 @@
---
slug: /en/sql-reference/table-functions/timeSeriesData
sidebar_position: 145
sidebar_label: timeSeriesData
---
# timeSeriesData
`timeSeriesData(db_name.time_series_table)` - Returns the [data](../../engines/table-engines/integrations/time-series.md#data-table) table
used by table `db_name.time_series_table` which table engine is [TimeSeries](../../engines/table-engines/integrations/time-series.md):
``` sql
CREATE TABLE db_name.time_series_table ENGINE=TimeSeries DATA data_table
```
The function also works if the _data_ table is inner:
``` sql
CREATE TABLE db_name.time_series_table ENGINE=TimeSeries DATA INNER UUID '01234567-89ab-cdef-0123-456789abcdef'
```
The following queries are equivalent:
``` sql
SELECT * FROM timeSeriesData(db_name.time_series_table);
SELECT * FROM timeSeriesData('db_name.time_series_table');
SELECT * FROM timeSeriesData('db_name', 'time_series_table');
```

View File

@ -0,0 +1,28 @@
---
slug: /en/sql-reference/table-functions/timeSeriesMetrics
sidebar_position: 145
sidebar_label: timeSeriesMetrics
---
# timeSeriesMetrics
`timeSeriesMetrics(db_name.time_series_table)` - Returns the [metrics](../../engines/table-engines/integrations/time-series.md#metrics-table) table
used by table `db_name.time_series_table` which table engine is [TimeSeries](../../engines/table-engines/integrations/time-series.md):
``` sql
CREATE TABLE db_name.time_series_table ENGINE=TimeSeries METRICS metrics_table
```
The function also works if the _metrics_ table is inner:
``` sql
CREATE TABLE db_name.time_series_table ENGINE=TimeSeries METRICS INNER UUID '01234567-89ab-cdef-0123-456789abcdef'
```
The following queries are equivalent:
``` sql
SELECT * FROM timeSeriesMetrics(db_name.time_series_table);
SELECT * FROM timeSeriesMetrics('db_name.time_series_table');
SELECT * FROM timeSeriesMetrics('db_name', 'time_series_table');
```

View File

@ -0,0 +1,28 @@
---
slug: /en/sql-reference/table-functions/timeSeriesTags
sidebar_position: 145
sidebar_label: timeSeriesTags
---
# timeSeriesTags
`timeSeriesTags(db_name.time_series_table)` - Returns the [tags](../../engines/table-engines/integrations/time-series.md#tags-table) table
used by table `db_name.time_series_table` which table engine is [TimeSeries](../../engines/table-engines/integrations/time-series.md):
``` sql
CREATE TABLE db_name.time_series_table ENGINE=TimeSeries TAGS tags_table
```
The function also works if the _tags_ table is inner:
``` sql
CREATE TABLE db_name.time_series_table ENGINE=TimeSeries TAGS INNER UUID '01234567-89ab-cdef-0123-456789abcdef'
```
The following queries are equivalent:
``` sql
SELECT * FROM timeSeriesTags(db_name.time_series_table);
SELECT * FROM timeSeriesTags('db_name.time_series_table');
SELECT * FROM timeSeriesTags('db_name', 'time_series_table');
```

View File

@ -11,7 +11,10 @@ class Client : public ClientApplicationBase
public:
using Arguments = ClientApplicationBase::Arguments;
Client() = default;
Client()
{
fuzzer = QueryFuzzer(randomSeed(), &std::cout, &std::cerr);
}
void initialize(Poco::Util::Application & self) override;

View File

@ -814,10 +814,11 @@ try
const size_t physical_server_memory = getMemoryAmount();
LOG_INFO(log, "Available RAM: {}; physical cores: {}; logical cores: {}.",
LOG_INFO(log, "Available RAM: {}; logical cores: {}; used cores: {}.",
formatReadableSizeWithBinarySuffix(physical_server_memory),
getNumberOfPhysicalCPUCores(), // on ARM processors it can show only enabled at current moment cores
std::thread::hardware_concurrency());
std::thread::hardware_concurrency(),
getNumberOfPhysicalCPUCores() // on ARM processors it can show only enabled at current moment cores
);
#if defined(__x86_64__)
String cpu_info;
@ -1623,7 +1624,7 @@ try
concurrent_threads_soft_limit = new_server_settings.concurrent_threads_soft_limit_num;
if (new_server_settings.concurrent_threads_soft_limit_ratio_to_cores > 0)
{
auto value = new_server_settings.concurrent_threads_soft_limit_ratio_to_cores * std::thread::hardware_concurrency();
auto value = new_server_settings.concurrent_threads_soft_limit_ratio_to_cores * getNumberOfPhysicalCPUCores();
if (value > 0 && value < concurrent_threads_soft_limit)
concurrent_threads_soft_limit = value;
}

View File

@ -2,7 +2,7 @@
#include <Client/Suggest.h>
#include <Client/QueryFuzzer.h>
#include <Common/QueryFuzzer.h>
#include <Common/DNSResolver.h>
#include <Common/InterruptListener.h>
#include <Common/ProgressIndication.h>

View File

@ -365,7 +365,7 @@ bool LocalConnection::poll(size_t)
{
while (pollImpl())
{
LOG_DEBUG(&Poco::Logger::get("LocalConnection"), "Executor timeout encountered, will retry");
LOG_TEST(&Poco::Logger::get("LocalConnection"), "Executor timeout encountered, will retry");
if (needSendProgressOrMetrics())
return true;

View File

@ -68,22 +68,21 @@ Field QueryFuzzer::getRandomField(int type)
{
case 0:
{
return bad_int64_values[fuzz_rand() % (sizeof(bad_int64_values)
/ sizeof(*bad_int64_values))];
return bad_int64_values[fuzz_rand() % std::size(bad_int64_values)];
}
case 1:
{
static constexpr double values[]
= {NAN, INFINITY, -INFINITY, 0., -0., 0.0001, 0.5, 0.9999,
1., 1.0001, 2., 10.0001, 100.0001, 1000.0001, 1e10, 1e20,
FLT_MIN, FLT_MIN + FLT_EPSILON, FLT_MAX, FLT_MAX + FLT_EPSILON}; return values[fuzz_rand() % (sizeof(values) / sizeof(*values))];
FLT_MIN, FLT_MIN + FLT_EPSILON, FLT_MAX, FLT_MAX + FLT_EPSILON}; return values[fuzz_rand() % std::size(values)];
}
case 2:
{
static constexpr UInt64 scales[] = {0, 1, 2, 10};
return DecimalField<Decimal64>(
bad_int64_values[fuzz_rand() % (sizeof(bad_int64_values) / sizeof(*bad_int64_values))],
static_cast<UInt32>(scales[fuzz_rand() % (sizeof(scales) / sizeof(*scales))])
bad_int64_values[fuzz_rand() % std::size(bad_int64_values)],
static_cast<UInt32>(scales[fuzz_rand() % std::size(scales)])
);
}
default:
@ -165,7 +164,8 @@ Field QueryFuzzer::fuzzField(Field field)
{
size_t pos = fuzz_rand() % arr.size();
arr.erase(arr.begin() + pos);
std::cerr << "erased\n";
if (debug_stream)
*debug_stream << "erased\n";
}
if (fuzz_rand() % 5 == 0)
@ -174,12 +174,14 @@ Field QueryFuzzer::fuzzField(Field field)
{
size_t pos = fuzz_rand() % arr.size();
arr.insert(arr.begin() + pos, fuzzField(arr[pos]));
std::cerr << fmt::format("inserted (pos {})\n", pos);
if (debug_stream)
*debug_stream << fmt::format("inserted (pos {})\n", pos);
}
else
{
arr.insert(arr.begin(), getRandomField(0));
std::cerr << "inserted (0)\n";
if (debug_stream)
*debug_stream << "inserted (0)\n";
}
}
@ -197,7 +199,9 @@ Field QueryFuzzer::fuzzField(Field field)
{
size_t pos = fuzz_rand() % arr.size();
arr.erase(arr.begin() + pos);
std::cerr << "erased\n";
if (debug_stream)
*debug_stream << "erased\n";
}
if (fuzz_rand() % 5 == 0)
@ -206,12 +210,16 @@ Field QueryFuzzer::fuzzField(Field field)
{
size_t pos = fuzz_rand() % arr.size();
arr.insert(arr.begin() + pos, fuzzField(arr[pos]));
std::cerr << fmt::format("inserted (pos {})\n", pos);
if (debug_stream)
*debug_stream << fmt::format("inserted (pos {})\n", pos);
}
else
{
arr.insert(arr.begin(), getRandomField(0));
std::cerr << "inserted (0)\n";
if (debug_stream)
*debug_stream << "inserted (0)\n";
}
}
@ -344,7 +352,8 @@ void QueryFuzzer::fuzzOrderByList(IAST * ast)
}
else
{
std::cerr << "No random column.\n";
if (debug_stream)
*debug_stream << "No random column.\n";
}
}
@ -378,7 +387,8 @@ void QueryFuzzer::fuzzColumnLikeExpressionList(IAST * ast)
if (col)
impl->children.insert(pos, col);
else
std::cerr << "No random column.\n";
if (debug_stream)
*debug_stream << "No random column.\n";
}
// We don't have to recurse here to fuzz the children, this is handled by
@ -1361,11 +1371,15 @@ void QueryFuzzer::fuzzMain(ASTPtr & ast)
collectFuzzInfoMain(ast);
fuzz(ast);
std::cout << std::endl;
WriteBufferFromOStream ast_buf(std::cout, 4096);
formatAST(*ast, ast_buf, false /*highlight*/);
ast_buf.finalize();
std::cout << std::endl << std::endl;
if (out_stream)
{
*out_stream << std::endl;
WriteBufferFromOStream ast_buf(*out_stream, 4096);
formatAST(*ast, ast_buf, false /*highlight*/);
ast_buf.finalize();
*out_stream << std::endl << std::endl;
}
}
}

View File

@ -35,9 +35,31 @@ struct ASTWindowDefinition;
* queries, so you want to feed it a lot of queries to get some interesting mix
* of them. Normally we feed SQL regression tests to it.
*/
struct QueryFuzzer
class QueryFuzzer
{
pcg64 fuzz_rand{randomSeed()};
public:
explicit QueryFuzzer(pcg64 fuzz_rand_ = randomSeed(), std::ostream * out_stream_ = nullptr, std::ostream * debug_stream_ = nullptr)
: fuzz_rand(fuzz_rand_)
, out_stream(out_stream_)
, debug_stream(debug_stream_)
{
}
// This is the only function you have to call -- it will modify the passed
// ASTPtr to point to new AST with some random changes.
void fuzzMain(ASTPtr & ast);
ASTs getInsertQueriesForFuzzedTables(const String & full_query);
ASTs getDropQueriesForFuzzedTables(const ASTDropQuery & drop_query);
void notifyQueryFailed(ASTPtr ast);
static bool isSuitableForFuzzing(const ASTCreateQuery & create);
private:
pcg64 fuzz_rand;
std::ostream * out_stream = nullptr;
std::ostream * debug_stream = nullptr;
// We add elements to expression lists with fixed probability. Some elements
// are so large, that the expected number of elements we add to them is
@ -66,10 +88,6 @@ struct QueryFuzzer
std::unordered_map<std::string, size_t> index_of_fuzzed_table;
std::set<IAST::Hash> created_tables_hashes;
// This is the only function you have to call -- it will modify the passed
// ASTPtr to point to new AST with some random changes.
void fuzzMain(ASTPtr & ast);
// Various helper functions follow, normally you shouldn't have to call them.
Field getRandomField(int type);
Field fuzzField(Field field);
@ -77,9 +95,6 @@ struct QueryFuzzer
ASTPtr getRandomExpressionList();
DataTypePtr fuzzDataType(DataTypePtr type);
DataTypePtr getRandomType();
ASTs getInsertQueriesForFuzzedTables(const String & full_query);
ASTs getDropQueriesForFuzzedTables(const ASTDropQuery & drop_query);
void notifyQueryFailed(ASTPtr ast);
void replaceWithColumnLike(ASTPtr & ast);
void replaceWithTableLike(ASTPtr & ast);
void fuzzOrderByElement(ASTOrderByElement * elem);
@ -102,8 +117,6 @@ struct QueryFuzzer
void addTableLike(ASTPtr ast);
void addColumnLike(ASTPtr ast);
void collectFuzzInfoRecurse(ASTPtr ast);
static bool isSuitableForFuzzing(const ASTCreateQuery & create);
};
}

View File

@ -629,6 +629,7 @@ void HandledSignals::setupTerminateHandler()
void HandledSignals::setupCommonDeadlySignalHandlers()
{
/// SIGTSTP is added for debugging purposes. To output a stack trace of any running thread at anytime.
/// NOTE: that it is also used by clickhouse-test wrapper
addSignalHandler({SIGABRT, SIGSEGV, SIGILL, SIGBUS, SIGSYS, SIGFPE, SIGPIPE, SIGTSTP, SIGTRAP}, signalHandler, true);
#if defined(SANITIZER)

View File

@ -1,4 +1,5 @@
#pragma once
/// Get number of CPU cores without hyper-threading.
/// The calculation respects possible cgroups limits.
unsigned getNumberOfPhysicalCPUCores();

View File

@ -12,6 +12,7 @@
#include <Common/ZooKeeper/KeeperException.h>
#include <Common/ZooKeeper/Types.h>
#include <Common/ZooKeeper/ZooKeeper.h>
#include <Common/ZooKeeper/IKeeper.h>
#include <Common/PoolId.h>
#include <Core/ServerSettings.h>
#include <Core/Settings.h>
@ -338,9 +339,12 @@ ClusterPtr DatabaseReplicated::getClusterImpl(bool all_groups) const
return std::make_shared<Cluster>(getContext()->getSettingsRef(), shards, params);
}
std::vector<UInt8> DatabaseReplicated::tryGetAreReplicasActive(const ClusterPtr & cluster_) const
ReplicasInfo DatabaseReplicated::tryGetReplicasInfo(const ClusterPtr & cluster_) const
{
Strings paths;
paths.emplace_back(fs::path(zookeeper_path) / "max_log_ptr");
const auto & addresses_with_failover = cluster_->getShardsAddresses();
const auto & shards_info = cluster_->getShardsInfo();
for (size_t shard_index = 0; shard_index < shards_info.size(); ++shard_index)
@ -349,22 +353,50 @@ std::vector<UInt8> DatabaseReplicated::tryGetAreReplicasActive(const ClusterPtr
{
String full_name = getFullReplicaName(replica.database_shard_name, replica.database_replica_name);
paths.emplace_back(fs::path(zookeeper_path) / "replicas" / full_name / "active");
paths.emplace_back(fs::path(zookeeper_path) / "replicas" / full_name / "log_ptr");
}
}
try
{
auto current_zookeeper = getZooKeeper();
auto res = current_zookeeper->exists(paths);
auto zk_res = current_zookeeper->tryGet(paths);
std::vector<UInt8> statuses;
statuses.resize(paths.size());
auto max_log_ptr_zk = zk_res[0];
if (max_log_ptr_zk.error != Coordination::Error::ZOK)
throw Coordination::Exception(max_log_ptr_zk.error);
for (size_t i = 0; i < res.size(); ++i)
if (res[i].error == Coordination::Error::ZOK)
statuses[i] = 1;
UInt32 max_log_ptr = parse<UInt32>(max_log_ptr_zk.data);
return statuses;
ReplicasInfo replicas_info;
replicas_info.resize((zk_res.size() - 1) / 2);
size_t global_replica_index = 0;
for (size_t shard_index = 0; shard_index < shards_info.size(); ++shard_index)
{
for (const auto & replica : addresses_with_failover[shard_index])
{
auto replica_active = zk_res[2 * global_replica_index + 1];
auto replica_log_ptr = zk_res[2 * global_replica_index + 2];
UInt64 recovery_time = 0;
{
std::lock_guard lock(ddl_worker_mutex);
if (replica.is_local && ddl_worker)
recovery_time = ddl_worker->getCurrentInitializationDurationMs();
}
replicas_info[global_replica_index] = ReplicaInfo{
.is_active = replica_active.error == Coordination::Error::ZOK,
.replication_lag = replica_log_ptr.error != Coordination::Error::ZNONODE ? std::optional(max_log_ptr - parse<UInt32>(replica_log_ptr.data)) : std::nullopt,
.recovery_time = recovery_time,
};
++global_replica_index;
}
}
return replicas_info;
}
catch (...)
{
@ -373,7 +405,6 @@ std::vector<UInt8> DatabaseReplicated::tryGetAreReplicasActive(const ClusterPtr
}
}
void DatabaseReplicated::fillClusterAuthInfo(String collection_name, const Poco::Util::AbstractConfiguration & config_ref)
{
const auto & config_prefix = fmt::format("named_collections.{}", collection_name);

View File

@ -1,5 +1,7 @@
#pragma once
#include <optional>
#include <Databases/DatabaseAtomic.h>
#include <Databases/DatabaseReplicatedSettings.h>
#include <Common/ZooKeeper/ZooKeeper.h>
@ -17,6 +19,14 @@ using ZooKeeperPtr = std::shared_ptr<zkutil::ZooKeeper>;
class Cluster;
using ClusterPtr = std::shared_ptr<Cluster>;
struct ReplicaInfo
{
bool is_active;
std::optional<UInt32> replication_lag;
UInt64 recovery_time;
};
using ReplicasInfo = std::vector<ReplicaInfo>;
class DatabaseReplicated : public DatabaseAtomic
{
public:
@ -84,7 +94,7 @@ public:
static void dropReplica(DatabaseReplicated * database, const String & database_zookeeper_path, const String & shard, const String & replica, bool throw_if_noop);
std::vector<UInt8> tryGetAreReplicasActive(const ClusterPtr & cluster_) const;
ReplicasInfo tryGetReplicasInfo(const ClusterPtr & cluster_) const;
void renameDatabase(ContextPtr query_context, const String & new_name) override;

View File

@ -32,6 +32,12 @@ DatabaseReplicatedDDLWorker::DatabaseReplicatedDDLWorker(DatabaseReplicated * db
bool DatabaseReplicatedDDLWorker::initializeMainThread()
{
{
std::lock_guard lock(initialization_duration_timer_mutex);
initialization_duration_timer.emplace();
initialization_duration_timer->start();
}
while (!stop_flag)
{
try
@ -69,6 +75,10 @@ bool DatabaseReplicatedDDLWorker::initializeMainThread()
initializeReplication();
initialized = true;
{
std::lock_guard lock(initialization_duration_timer_mutex);
initialization_duration_timer.reset();
}
return true;
}
catch (...)
@ -78,6 +88,11 @@ bool DatabaseReplicatedDDLWorker::initializeMainThread()
}
}
{
std::lock_guard lock(initialization_duration_timer_mutex);
initialization_duration_timer.reset();
}
return false;
}
@ -459,4 +474,10 @@ UInt32 DatabaseReplicatedDDLWorker::getLogPointer() const
return max_id.load();
}
UInt64 DatabaseReplicatedDDLWorker::getCurrentInitializationDurationMs() const
{
std::lock_guard lock(initialization_duration_timer_mutex);
return initialization_duration_timer ? initialization_duration_timer->elapsedMilliseconds() : 0;
}
}

View File

@ -36,6 +36,8 @@ public:
DatabaseReplicated * const database, bool committed = false); /// NOLINT
UInt32 getLogPointer() const;
UInt64 getCurrentInitializationDurationMs() const;
private:
bool initializeMainThread() override;
void initializeReplication();
@ -56,6 +58,9 @@ private:
ZooKeeperPtr active_node_holder_zookeeper;
/// It will remove "active" node when database is detached
zkutil::EphemeralNodeHolderPtr active_node_holder;
std::optional<Stopwatch> initialization_duration_timer;
mutable std::mutex initialization_duration_timer_mutex;
};
}

View File

@ -153,12 +153,8 @@ public:
return true;
}
std::vector<double> xfreq(spec_len);
double step = 0.5 / (spec_len - 1);
for (size_t i = 0; i < spec_len; ++i)
xfreq[i] = i * step;
auto freq = xfreq[idx];
auto freq = idx * step;
period = std::round(1 / freq);
return true;

View File

@ -214,6 +214,10 @@ Field convertFieldToTypeImpl(const Field & src, const IDataType & type, const ID
}
else if (type.isValueRepresentedByNumber() && src.getType() != Field::Types::String)
{
/// Bool is not represented in which_type, so we need to type it separately
if (isInt64OrUInt64orBoolFieldType(src.getType()) && type.getName() == "Bool")
return bool(src.safeGet<bool>());
if (which_type.isUInt8()) return convertNumericType<UInt8>(src, type);
if (which_type.isUInt16()) return convertNumericType<UInt16>(src, type);
if (which_type.isUInt32()) return convertNumericType<UInt32>(src, type);

View File

@ -2,6 +2,7 @@
#include <Server/TCPServer.h>
#include <Poco/Net/NetException.h>
#include <Common/logger_useful.h>
namespace DB
{
@ -97,6 +98,21 @@ void HTTPServerConnection::run()
{
sendErrorResponse(session, Poco::Net::HTTPResponse::HTTP_BAD_REQUEST);
}
catch (const Poco::Net::NetException & e)
{
/// Do not spam logs with messages related to connection reset by peer.
if (e.code() == POCO_ENOTCONN)
{
LOG_DEBUG(LogFrequencyLimiter(getLogger("HTTPServerConnection"), 10), "Connection reset by peer while processing HTTP request: {}", e.message());
break;
}
if (session.networkException())
session.networkException()->rethrow();
else
throw;
}
catch (const Poco::Exception &)
{
if (session.networkException())

View File

@ -3359,6 +3359,10 @@ void MergeTreeData::checkAlterIsPossible(const AlterCommands & commands, Context
throw Exception(ErrorCodes::NOT_IMPLEMENTED,
"ALTER MODIFY REFRESH is not supported by MergeTree engines family");
if (command.type == AlterCommand::MODIFY_SQL_SECURITY)
throw Exception(ErrorCodes::NOT_IMPLEMENTED,
"ALTER MODIFY SQL SECURITY is not supported by MergeTree engines family");
if (command.type == AlterCommand::MODIFY_ORDER_BY && !is_custom_partitioned)
{
throw Exception(ErrorCodes::BAD_ARGUMENTS,
@ -6268,10 +6272,13 @@ void MergeTreeData::dropDetached(const ASTPtr & partition, bool part, ContextPtr
}
else
{
String partition_id = getPartitionIDFromQuery(partition, local_context);
String partition_id;
bool all = partition->as<ASTPartition>()->all;
if (!all)
partition_id = getPartitionIDFromQuery(partition, local_context);
DetachedPartsInfo detached_parts = getDetachedParts();
for (const auto & part_info : detached_parts)
if (part_info.valid_name && part_info.partition_id == partition_id
if (part_info.valid_name && (all || part_info.partition_id == partition_id)
&& part_info.prefix != "attaching" && part_info.prefix != "deleting")
renamed_parts.addPart(part_info.dir_name, "deleting_" + part_info.dir_name, part_info.disk);
}

View File

@ -19,7 +19,7 @@ void ConditionSelectivityEstimator::ColumnSelectivityEstimator::merge(String par
Float64 ConditionSelectivityEstimator::ColumnSelectivityEstimator::estimateLess(const Field & val, Float64 rows) const
{
if (part_statistics.empty())
return default_normal_cond_factor * rows;
return default_cond_range_factor * rows;
Float64 result = 0;
Float64 part_rows = 0;
for (const auto & [key, estimator] : part_statistics)
@ -39,13 +39,7 @@ Float64 ConditionSelectivityEstimator::ColumnSelectivityEstimator::estimateEqual
{
if (part_statistics.empty())
{
auto float_val = StatisticsUtils::tryConvertToFloat64(val);
if (!float_val)
return default_unknown_cond_factor * rows;
else if (float_val.value() < - threshold || float_val.value() > threshold)
return default_normal_cond_factor * rows;
else
return default_good_cond_factor * rows;
return default_cond_equal_factor * rows;
}
Float64 result = 0;
Float64 partial_cnt = 0;
@ -149,30 +143,22 @@ Float64 ConditionSelectivityEstimator::estimateRowCount(const RPNBuilderTreeNode
auto [op, val] = extractBinaryOp(node, col);
if (dummy)
{
if (op == "equals")
return default_cond_equal_factor * total_rows;
else if (op == "less" || op == "lessOrEquals" || op == "greater" || op == "greaterOrEquals")
return default_cond_range_factor * total_rows;
else
return default_unknown_cond_factor * total_rows;
}
if (op == "equals")
{
if (dummy)
{
auto float_val = StatisticsUtils::tryConvertToFloat64(val);
if (!float_val || (float_val < - threshold || float_val > threshold))
return default_normal_cond_factor * total_rows;
else
return default_good_cond_factor * total_rows;
}
return estimator.estimateEqual(val, total_rows);
}
else if (op == "less" || op == "lessOrEquals")
{
if (dummy)
return default_normal_cond_factor * total_rows;
return estimator.estimateLess(val, total_rows);
}
else if (op == "greater" || op == "greaterOrEquals")
{
if (dummy)
return default_normal_cond_factor * total_rows;
return estimator.estimateGreater(val, total_rows);
}
else
return default_unknown_cond_factor * total_rows;
}

View File

@ -38,12 +38,10 @@ private:
std::pair<String, Field> extractBinaryOp(const RPNBuilderTreeNode & node, const String & column_name) const;
static constexpr auto default_good_cond_factor = 0.1;
static constexpr auto default_normal_cond_factor = 0.5;
static constexpr auto default_unknown_cond_factor = 1.0;
/// Conditions like "x = N" are considered good if abs(N) > threshold.
/// This is used to assume that condition is likely to have good selectivity.
static constexpr auto threshold = 2;
/// Used to estimate the selectivity of a condition when there is no statistics.
static constexpr auto default_cond_range_factor = 0.5;
static constexpr auto default_cond_equal_factor = 0.01;
static constexpr auto default_unknown_cond_factor = 1;
UInt64 total_rows = 0;
std::map<String, ColumnSelectivityEstimator> column_estimators;

View File

@ -1,14 +1,17 @@
#include <Storages/Statistics/Statistics.h>
#include <Common/Exception.h>
#include <Common/FieldVisitorConvertToNumber.h>
#include <Common/logger_useful.h>
#include <DataTypes/DataTypeFactory.h>
#include <IO/ReadHelpers.h>
#include <IO/WriteHelpers.h>
#include <Interpreters/convertFieldToType.h>
#include <Storages/ColumnsDescription.h>
#include <Storages/Statistics/ConditionSelectivityEstimator.h>
#include <Storages/Statistics/StatisticsCountMinSketch.h>
#include <Storages/Statistics/StatisticsTDigest.h>
#include <Storages/Statistics/StatisticsUniq.h>
#include <Storages/StatisticsDescription.h>
#include <Common/Exception.h>
#include <Common/logger_useful.h>
#include "config.h" /// USE_DATASKETCHES
@ -27,33 +30,26 @@ enum StatisticsFileVersion : UInt16
V0 = 0,
};
std::optional<Float64> StatisticsUtils::tryConvertToFloat64(const Field & field)
std::optional<Float64> StatisticsUtils::tryConvertToFloat64(const Field & value, const DataTypePtr & data_type)
{
switch (field.getType())
if (data_type->isValueRepresentedByNumber())
{
case Field::Types::Int64:
return field.get<Int64>();
case Field::Types::UInt64:
return field.get<UInt64>();
case Field::Types::Float64:
return field.get<Float64>();
case Field::Types::Int128:
return field.get<Int128>();
case Field::Types::UInt128:
return field.get<UInt128>();
case Field::Types::Int256:
return field.get<Int256>();
case Field::Types::UInt256:
return field.get<UInt256>();
default:
return {};
}
}
Field value_converted;
std::optional<String> StatisticsUtils::tryConvertToString(const DB::Field & field)
{
if (field.getType() == Field::Types::String)
return field.get<String>();
if (isInteger(data_type) && (value.getType() == Field::Types::Float64 || value.getType() == Field::Types::String))
/// For case val_int32 < 10.5 or val_int32 < '10.5' we should convert 10.5 to Float64.
value_converted = convertFieldToType(value, *DataTypeFactory::instance().get("Float64"));
else
/// We should convert value to the real column data type and then translate it to Float64.
/// For example for expression col_date > '2024-08-07', if we directly convert '2024-08-07' to Float64, we will get null.
value_converted = convertFieldToType(value, *data_type);
if (value_converted.isNull())
return {};
Float64 value_as_float = applyVisitor(FieldVisitorConvertToNumber<Float64>(), value_converted);
return value_as_float;
}
return {};
}
@ -89,21 +85,23 @@ Float64 IStatistics::estimateLess(const Field & /*val*/) const
throw Exception(ErrorCodes::LOGICAL_ERROR, "Less-than estimation is not implemented for this type of statistics");
}
/// -------------------------------------
/// Implementation of the estimation:
/// Note: Each statistics object supports certain types predicates natively, e.g.
/// - TDigest: '< X' (less-than predicates)
/// - Count-min sketches: '= X' (equal predicates)
/// - Uniq (HyperLogLog): 'count distinct(*)' (column cardinality)
/// If multiple statistics objects are available per column, it is sometimes also possible to combine them in a clever way.
/// For that reason, all estimation are performed in a central place (here), and we don't simply pass the predicate to the first statistics
/// object that supports it natively.
/// Notes:
/// - Statistics object usually only support estimation for certain types of predicates, e.g.
/// - TDigest: '< X' (less-than predicates)
/// - Count-min sketches: '= X' (equal predicates)
/// - Uniq (HyperLogLog): 'count distinct(*)' (column cardinality)
///
/// If multiple statistics objects in a column support estimating a predicate, we want to try statistics in order of descending accuracy
/// (e.g. MinMax statistics are simpler than TDigest statistics and thus worse for estimating 'less' predicates).
///
/// Sometimes, it is possible to combine multiple statistics in a clever way. For that reason, all estimation are performed in a central
/// place (here), and we don't simply pass the predicate to the first statistics object that supports it natively.
Float64 ColumnStatistics::estimateLess(const Field & val) const
{
if (stats.contains(StatisticsType::TDigest))
return stats.at(StatisticsType::TDigest)->estimateLess(val);
return rows * ConditionSelectivityEstimator::default_normal_cond_factor;
return rows * ConditionSelectivityEstimator::default_cond_range_factor;
}
Float64 ColumnStatistics::estimateGreater(const Field & val) const
@ -113,8 +111,7 @@ Float64 ColumnStatistics::estimateGreater(const Field & val) const
Float64 ColumnStatistics::estimateEqual(const Field & val) const
{
auto float_val = StatisticsUtils::tryConvertToFloat64(val);
if (float_val.has_value() && stats.contains(StatisticsType::Uniq) && stats.contains(StatisticsType::TDigest))
if (stats_desc.data_type->isValueRepresentedByNumber() && stats.contains(StatisticsType::Uniq) && stats.contains(StatisticsType::TDigest))
{
/// 2048 is the default number of buckets in TDigest. In this case, TDigest stores exactly one value (with many rows) for every bucket.
if (stats.at(StatisticsType::Uniq)->estimateCardinality() < 2048)
@ -124,10 +121,7 @@ Float64 ColumnStatistics::estimateEqual(const Field & val) const
if (stats.contains(StatisticsType::CountMinSketch))
return stats.at(StatisticsType::CountMinSketch)->estimateEqual(val);
#endif
if (!float_val.has_value() && (float_val < - ConditionSelectivityEstimator::threshold || float_val > ConditionSelectivityEstimator::threshold))
return rows * ConditionSelectivityEstimator::default_normal_cond_factor;
else
return rows * ConditionSelectivityEstimator::default_good_cond_factor;
return rows * ConditionSelectivityEstimator::default_cond_equal_factor;
}
/// -------------------------------------
@ -204,15 +198,15 @@ void MergeTreeStatisticsFactory::registerValidator(StatisticsType stats_type, Va
MergeTreeStatisticsFactory::MergeTreeStatisticsFactory()
{
registerValidator(StatisticsType::TDigest, tdigestValidator);
registerCreator(StatisticsType::TDigest, tdigestCreator);
registerValidator(StatisticsType::TDigest, tdigestStatisticsValidator);
registerCreator(StatisticsType::TDigest, tdigestStatisticsCreator);
registerValidator(StatisticsType::Uniq, uniqValidator);
registerCreator(StatisticsType::Uniq, uniqCreator);
registerValidator(StatisticsType::Uniq, uniqStatisticsValidator);
registerCreator(StatisticsType::Uniq, uniqStatisticsCreator);
#if USE_DATASKETCHES
registerValidator(StatisticsType::CountMinSketch, countMinSketchValidator);
registerCreator(StatisticsType::CountMinSketch, countMinSketchCreator);
registerValidator(StatisticsType::CountMinSketch, countMinSketchStatisticsValidator);
registerCreator(StatisticsType::CountMinSketch, countMinSketchStatisticsCreator);
#endif
}
@ -222,7 +216,7 @@ MergeTreeStatisticsFactory & MergeTreeStatisticsFactory::instance()
return instance;
}
void MergeTreeStatisticsFactory::validate(const ColumnStatisticsDescription & stats, DataTypePtr data_type) const
void MergeTreeStatisticsFactory::validate(const ColumnStatisticsDescription & stats, const DataTypePtr & data_type) const
{
for (const auto & [type, desc] : stats.types_to_desc)
{

View File

@ -14,12 +14,11 @@ namespace DB
constexpr auto STATS_FILE_PREFIX = "statistics_";
constexpr auto STATS_FILE_SUFFIX = ".stats";
struct StatisticsUtils
{
/// Returns std::nullopt if input Field cannot be converted to a concrete value
static std::optional<Float64> tryConvertToFloat64(const Field & field);
static std::optional<String> tryConvertToString(const Field & field);
/// - `data_type` is the type of the column on which the statistics object was build on
static std::optional<Float64> tryConvertToFloat64(const Field & value, const DataTypePtr & data_type);
};
/// Statistics describe properties of the values in the column,
@ -87,10 +86,10 @@ class MergeTreeStatisticsFactory : private boost::noncopyable
public:
static MergeTreeStatisticsFactory & instance();
void validate(const ColumnStatisticsDescription & stats, DataTypePtr data_type) const;
void validate(const ColumnStatisticsDescription & stats, const DataTypePtr & data_type) const;
using Validator = std::function<void(const SingleStatisticsDescription & stats, DataTypePtr data_type)>;
using Creator = std::function<StatisticsPtr(const SingleStatisticsDescription & stats, DataTypePtr data_type)>;
using Validator = std::function<void(const SingleStatisticsDescription & stats, const DataTypePtr & data_type)>;
using Creator = std::function<StatisticsPtr(const SingleStatisticsDescription & stats, const DataTypePtr & data_type)>;
ColumnStatisticsPtr get(const ColumnStatisticsDescription & stats) const;
ColumnsStatistics getMany(const ColumnsDescription & columns) const;

View File

@ -25,8 +25,8 @@ extern const int ILLEGAL_STATISTICS;
static constexpr auto num_hashes = 7uz;
static constexpr auto num_buckets = 2718uz;
StatisticsCountMinSketch::StatisticsCountMinSketch(const SingleStatisticsDescription & stat_, DataTypePtr data_type_)
: IStatistics(stat_)
StatisticsCountMinSketch::StatisticsCountMinSketch(const SingleStatisticsDescription & description, const DataTypePtr & data_type_)
: IStatistics(description)
, sketch(num_hashes, num_buckets)
, data_type(data_type_)
{
@ -84,17 +84,17 @@ void StatisticsCountMinSketch::deserialize(ReadBuffer & buf)
}
void countMinSketchValidator(const SingleStatisticsDescription &, DataTypePtr data_type)
void countMinSketchStatisticsValidator(const SingleStatisticsDescription & /*description*/, const DataTypePtr & data_type)
{
data_type = removeNullable(data_type);
data_type = removeLowCardinalityAndNullable(data_type);
if (!data_type->isValueRepresentedByNumber() && !isStringOrFixedString(data_type))
DataTypePtr inner_data_type = removeNullable(data_type);
inner_data_type = removeLowCardinalityAndNullable(inner_data_type);
if (!inner_data_type->isValueRepresentedByNumber() && !isStringOrFixedString(inner_data_type))
throw Exception(ErrorCodes::ILLEGAL_STATISTICS, "Statistics of type 'count_min' does not support type {}", data_type->getName());
}
StatisticsPtr countMinSketchCreator(const SingleStatisticsDescription & stat, DataTypePtr data_type)
StatisticsPtr countMinSketchStatisticsCreator(const SingleStatisticsDescription & description, const DataTypePtr & data_type)
{
return std::make_shared<StatisticsCountMinSketch>(stat, data_type);
return std::make_shared<StatisticsCountMinSketch>(description, data_type);
}
}

View File

@ -14,7 +14,7 @@ namespace DB
class StatisticsCountMinSketch : public IStatistics
{
public:
StatisticsCountMinSketch(const SingleStatisticsDescription & stat_, DataTypePtr data_type_);
StatisticsCountMinSketch(const SingleStatisticsDescription & description, const DataTypePtr & data_type_);
Float64 estimateEqual(const Field & val) const override;
@ -31,8 +31,8 @@ private:
};
void countMinSketchValidator(const SingleStatisticsDescription &, DataTypePtr data_type);
StatisticsPtr countMinSketchCreator(const SingleStatisticsDescription & stat, DataTypePtr);
void countMinSketchStatisticsValidator(const SingleStatisticsDescription & description, const DataTypePtr & data_type);
StatisticsPtr countMinSketchStatisticsCreator(const SingleStatisticsDescription & description, const DataTypePtr & data_type);
}

View File

@ -1,33 +1,29 @@
#include <Storages/Statistics/StatisticsTDigest.h>
#include <DataTypes/DataTypeNullable.h>
#include <DataTypes/DataTypeLowCardinality.h>
#include <DataTypes/DataTypeNullable.h>
namespace DB
{
namespace ErrorCodes
{
extern const int ILLEGAL_STATISTICS;
extern const int LOGICAL_ERROR;
}
StatisticsTDigest::StatisticsTDigest(const SingleStatisticsDescription & stat_)
: IStatistics(stat_)
StatisticsTDigest::StatisticsTDigest(const SingleStatisticsDescription & description, const DataTypePtr & data_type_)
: IStatistics(description)
, data_type(data_type_)
{
}
void StatisticsTDigest::update(const ColumnPtr & column)
{
size_t rows = column->size();
for (size_t row = 0; row < rows; ++row)
for (size_t row = 0; row < column->size(); ++row)
{
Field field;
column->get(row, field);
if (field.isNull())
if (column->isNullAt(row))
continue;
if (auto field_as_float = StatisticsUtils::tryConvertToFloat64(field))
t_digest.add(*field_as_float, 1);
auto data = column->getFloat64(row);
t_digest.add(data, 1);
}
}
@ -43,31 +39,31 @@ void StatisticsTDigest::deserialize(ReadBuffer & buf)
Float64 StatisticsTDigest::estimateLess(const Field & val) const
{
auto val_as_float = StatisticsUtils::tryConvertToFloat64(val);
if (val_as_float)
return t_digest.getCountLessThan(*val_as_float);
throw Exception(ErrorCodes::LOGICAL_ERROR, "Statistics 'tdigest' does not support estimating value of type {}", val.getTypeName());
auto val_as_float = StatisticsUtils::tryConvertToFloat64(val, data_type);
if (!val_as_float.has_value())
return 0;
return t_digest.getCountLessThan(*val_as_float);
}
Float64 StatisticsTDigest::estimateEqual(const Field & val) const
{
auto val_as_float = StatisticsUtils::tryConvertToFloat64(val);
if (val_as_float)
return t_digest.getCountEqual(*val_as_float);
throw Exception(ErrorCodes::LOGICAL_ERROR, "Statistics 'tdigest' does not support estimating value of type {}", val.getTypeName());
auto val_as_float = StatisticsUtils::tryConvertToFloat64(val, data_type);
if (!val_as_float.has_value())
return 0;
return t_digest.getCountEqual(*val_as_float);
}
void tdigestValidator(const SingleStatisticsDescription &, DataTypePtr data_type)
void tdigestStatisticsValidator(const SingleStatisticsDescription & /*description*/, const DataTypePtr & data_type)
{
data_type = removeNullable(data_type);
data_type = removeLowCardinalityAndNullable(data_type);
if (!data_type->isValueRepresentedByNumber())
DataTypePtr inner_data_type = removeNullable(data_type);
inner_data_type = removeLowCardinalityAndNullable(inner_data_type);
if (!inner_data_type->isValueRepresentedByNumber())
throw Exception(ErrorCodes::ILLEGAL_STATISTICS, "Statistics of type 'tdigest' do not support type {}", data_type->getName());
}
StatisticsPtr tdigestCreator(const SingleStatisticsDescription & stat, DataTypePtr)
StatisticsPtr tdigestStatisticsCreator(const SingleStatisticsDescription & description, const DataTypePtr & data_type)
{
return std::make_shared<StatisticsTDigest>(stat);
return std::make_shared<StatisticsTDigest>(description, data_type);
}
}

View File

@ -9,7 +9,7 @@ namespace DB
class StatisticsTDigest : public IStatistics
{
public:
explicit StatisticsTDigest(const SingleStatisticsDescription & stat_);
explicit StatisticsTDigest(const SingleStatisticsDescription & description, const DataTypePtr & data_type_);
void update(const ColumnPtr & column) override;
@ -21,9 +21,10 @@ public:
private:
QuantileTDigest<Float64> t_digest;
DataTypePtr data_type;
};
void tdigestValidator(const SingleStatisticsDescription &, DataTypePtr data_type);
StatisticsPtr tdigestCreator(const SingleStatisticsDescription & stat, DataTypePtr);
void tdigestStatisticsValidator(const SingleStatisticsDescription & description, const DataTypePtr & data_type);
StatisticsPtr tdigestStatisticsCreator(const SingleStatisticsDescription & description, const DataTypePtr & data_type);
}

View File

@ -11,8 +11,8 @@ namespace ErrorCodes
extern const int ILLEGAL_STATISTICS;
}
StatisticsUniq::StatisticsUniq(const SingleStatisticsDescription & stat_, const DataTypePtr & data_type)
: IStatistics(stat_)
StatisticsUniq::StatisticsUniq(const SingleStatisticsDescription & description, const DataTypePtr & data_type)
: IStatistics(description)
{
arena = std::make_unique<Arena>();
AggregateFunctionProperties properties;
@ -52,17 +52,17 @@ UInt64 StatisticsUniq::estimateCardinality() const
return column->getUInt(0);
}
void uniqValidator(const SingleStatisticsDescription &, DataTypePtr data_type)
void uniqStatisticsValidator(const SingleStatisticsDescription & /*description*/, const DataTypePtr & data_type)
{
data_type = removeNullable(data_type);
data_type = removeLowCardinalityAndNullable(data_type);
if (!data_type->isValueRepresentedByNumber())
DataTypePtr inner_data_type = removeNullable(data_type);
inner_data_type = removeLowCardinalityAndNullable(inner_data_type);
if (!inner_data_type->isValueRepresentedByNumber())
throw Exception(ErrorCodes::ILLEGAL_STATISTICS, "Statistics of type 'uniq' do not support type {}", data_type->getName());
}
StatisticsPtr uniqCreator(const SingleStatisticsDescription & stat, DataTypePtr data_type)
StatisticsPtr uniqStatisticsCreator(const SingleStatisticsDescription & description, const DataTypePtr & data_type)
{
return std::make_shared<StatisticsUniq>(stat, data_type);
return std::make_shared<StatisticsUniq>(description, data_type);
}
}

View File

@ -10,7 +10,7 @@ namespace DB
class StatisticsUniq : public IStatistics
{
public:
StatisticsUniq(const SingleStatisticsDescription & stat_, const DataTypePtr & data_type);
StatisticsUniq(const SingleStatisticsDescription & description, const DataTypePtr & data_type);
~StatisticsUniq() override;
void update(const ColumnPtr & column) override;
@ -27,7 +27,7 @@ private:
};
void uniqValidator(const SingleStatisticsDescription &, DataTypePtr data_type);
StatisticsPtr uniqCreator(const SingleStatisticsDescription & stat, DataTypePtr data_type);
void uniqStatisticsValidator(const SingleStatisticsDescription & description, const DataTypePtr & data_type);
StatisticsPtr uniqStatisticsCreator(const SingleStatisticsDescription & description, const DataTypePtr & data_type);
}

View File

@ -0,0 +1,169 @@
#include <Storages/StorageFuzzQuery.h>
#include <optional>
#include <unordered_set>
#include <Columns/ColumnString.h>
#include <Interpreters/evaluateConstantExpression.h>
#include <Storages/NamedCollectionsHelpers.h>
#include <Storages/StorageFactory.h>
#include <Storages/checkAndGetLiteralArgument.h>
#include <Parsers/ParserQuery.h>
#include <Parsers/parseQuery.h>
namespace DB
{
namespace ErrorCodes
{
extern const int BAD_ARGUMENTS;
extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
}
ColumnPtr FuzzQuerySource::createColumn()
{
auto column = ColumnString::create();
ColumnString::Chars & data_to = column->getChars();
ColumnString::Offsets & offsets_to = column->getOffsets();
offsets_to.resize(block_size);
IColumn::Offset offset = 0;
auto fuzz_base = query;
size_t row_num = 0;
while (row_num < block_size)
{
ASTPtr new_query = fuzz_base->clone();
auto base_before_fuzz = fuzz_base->formatForErrorMessage();
fuzzer.fuzzMain(new_query);
auto fuzzed_text = new_query->formatForErrorMessage();
if (base_before_fuzz == fuzzed_text)
continue;
/// AST is too long, will start from the original query.
if (config.max_query_length > 500)
{
fuzz_base = query;
continue;
}
IColumn::Offset next_offset = offset + fuzzed_text.size() + 1;
data_to.resize(next_offset);
std::copy(fuzzed_text.begin(), fuzzed_text.end(), &data_to[offset]);
data_to[offset + fuzzed_text.size()] = 0;
offsets_to[row_num] = next_offset;
offset = next_offset;
fuzz_base = new_query;
++row_num;
}
return column;
}
StorageFuzzQuery::StorageFuzzQuery(
const StorageID & table_id_, const ColumnsDescription & columns_, const String & comment_, const Configuration & config_)
: IStorage(table_id_), config(config_)
{
StorageInMemoryMetadata storage_metadata;
storage_metadata.setColumns(columns_);
storage_metadata.setComment(comment_);
setInMemoryMetadata(storage_metadata);
}
Pipe StorageFuzzQuery::read(
const Names & column_names,
const StorageSnapshotPtr & storage_snapshot,
SelectQueryInfo & /*query_info*/,
ContextPtr /*context*/,
QueryProcessingStage::Enum /*processed_stage*/,
size_t max_block_size,
size_t num_streams)
{
storage_snapshot->check(column_names);
Pipes pipes;
pipes.reserve(num_streams);
const ColumnsDescription & our_columns = storage_snapshot->metadata->getColumns();
Block block_header;
for (const auto & name : column_names)
{
const auto & name_type = our_columns.get(name);
MutableColumnPtr column = name_type.type->createColumn();
block_header.insert({std::move(column), name_type.type, name_type.name});
}
const char * begin = config.query.data();
const char * end = begin + config.query.size();
ParserQuery parser(end, false);
auto query = parseQuery(parser, begin, end, "", 0, DBMS_DEFAULT_MAX_PARSER_DEPTH, DBMS_DEFAULT_MAX_PARSER_BACKTRACKS);
for (UInt64 i = 0; i < num_streams; ++i)
pipes.emplace_back(std::make_shared<FuzzQuerySource>(max_block_size, block_header, config, query));
return Pipe::unitePipes(std::move(pipes));
}
StorageFuzzQuery::Configuration StorageFuzzQuery::getConfiguration(ASTs & engine_args, ContextPtr local_context)
{
StorageFuzzQuery::Configuration configuration{};
// Supported signatures:
//
// FuzzQuery(query)
// FuzzQuery(query, max_query_length)
// FuzzQuery(query, max_query_length, random_seed)
if (engine_args.empty() || engine_args.size() > 3)
throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, "FuzzQuery requires 1 to 3 arguments: query, max_query_length, random_seed");
for (auto & engine_arg : engine_args)
engine_arg = evaluateConstantExpressionOrIdentifierAsLiteral(engine_arg, local_context);
auto first_arg = checkAndGetLiteralArgument<String>(engine_args[0], "query");
configuration.query = std::move(first_arg);
if (engine_args.size() >= 2)
{
const auto & literal = engine_args[1]->as<const ASTLiteral &>();
if (!literal.value.isNull())
configuration.max_query_length = checkAndGetLiteralArgument<UInt64>(literal, "max_query_length");
}
if (engine_args.size() == 3)
{
const auto & literal = engine_args[2]->as<const ASTLiteral &>();
if (!literal.value.isNull())
configuration.random_seed = checkAndGetLiteralArgument<UInt64>(literal, "random_seed");
}
return configuration;
}
void registerStorageFuzzQuery(StorageFactory & factory)
{
factory.registerStorage(
"FuzzQuery",
[](const StorageFactory::Arguments & args) -> std::shared_ptr<StorageFuzzQuery>
{
ASTs & engine_args = args.engine_args;
if (engine_args.empty())
throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, "Storage FuzzQuery must have arguments.");
StorageFuzzQuery::Configuration configuration = StorageFuzzQuery::getConfiguration(engine_args, args.getLocalContext());
for (const auto& col : args.columns)
if (col.type->getTypeId() != TypeIndex::String)
throw Exception(ErrorCodes::BAD_ARGUMENTS, "'StorageFuzzQuery' supports only columns of String type, got {}.", col.type->getName());
return std::make_shared<StorageFuzzQuery>(args.table_id, args.columns, args.comment, configuration);
});
}
}

View File

@ -0,0 +1,88 @@
#pragma once
#include <Storages/IStorage.h>
#include <Storages/StorageConfiguration.h>
#include <Common/randomSeed.h>
#include <Common/QueryFuzzer.h>
#include "config.h"
namespace DB
{
class NamedCollection;
class StorageFuzzQuery final : public IStorage
{
public:
struct Configuration : public StatelessTableEngineConfiguration
{
String query;
UInt64 max_query_length = 500;
UInt64 random_seed = randomSeed();
};
StorageFuzzQuery(
const StorageID & table_id_, const ColumnsDescription & columns_, const String & comment_, const Configuration & config_);
std::string getName() const override { return "FuzzQuery"; }
Pipe read(
const Names & column_names,
const StorageSnapshotPtr & storage_snapshot,
SelectQueryInfo & query_info,
ContextPtr context,
QueryProcessingStage::Enum processed_stage,
size_t max_block_size,
size_t num_streams) override;
static StorageFuzzQuery::Configuration getConfiguration(ASTs & engine_args, ContextPtr local_context);
private:
const Configuration config;
};
class FuzzQuerySource : public ISource
{
public:
FuzzQuerySource(
UInt64 block_size_, Block block_header_, const StorageFuzzQuery::Configuration & config_, ASTPtr query_)
: ISource(block_header_)
, block_size(block_size_)
, block_header(std::move(block_header_))
, config(config_)
, query(query_)
, fuzzer(config_.random_seed)
{
}
String getName() const override { return "FuzzQuery"; }
protected:
Chunk generate() override
{
Columns columns;
columns.reserve(block_header.columns());
for (const auto & col : block_header)
{
chassert(col.type->getTypeId() == TypeIndex::String);
columns.emplace_back(createColumn());
}
return {std::move(columns), block_size};
}
private:
ColumnPtr createColumn();
UInt64 block_size;
Block block_header;
StorageFuzzQuery::Configuration config;
ASTPtr query;
QueryFuzzer fuzzer;
};
}

View File

@ -1,3 +1,4 @@
#include <optional>
#include <DataTypes/DataTypeString.h>
#include <DataTypes/DataTypesNumber.h>
#include <DataTypes/DataTypeNullable.h>
@ -31,6 +32,8 @@ ColumnsDescription StorageSystemClusters::getColumnsDescription()
{"database_shard_name", std::make_shared<DataTypeString>(), "The name of the `Replicated` database shard (for clusters that belong to a `Replicated` database)."},
{"database_replica_name", std::make_shared<DataTypeString>(), "The name of the `Replicated` database replica (for clusters that belong to a `Replicated` database)."},
{"is_active", std::make_shared<DataTypeNullable>(std::make_shared<DataTypeUInt8>()), "The status of the Replicated database replica (for clusters that belong to a Replicated database): 1 means 'replica is online', 0 means 'replica is offline', NULL means 'unknown'."},
{"replication_lag", std::make_shared<DataTypeNullable>(std::make_shared<DataTypeUInt32>()), "The replication lag of the `Replicated` database replica (for clusters that belong to a Replicated database)."},
{"recovery_time", std::make_shared<DataTypeNullable>(std::make_shared<DataTypeUInt64>()), "The recovery time of the `Replicated` database replica (for clusters that belong to a Replicated database), in milliseconds."},
};
description.setAliases({
@ -67,6 +70,11 @@ void StorageSystemClusters::writeCluster(MutableColumns & res_columns, const std
const auto & shards_info = cluster->getShardsInfo();
const auto & addresses_with_failover = cluster->getShardsAddresses();
size_t recovery_time_column_idx = columns_mask.size() - 1, replication_lag_column_idx = columns_mask.size() - 2, is_active_column_idx = columns_mask.size() - 3;
ReplicasInfo replicas_info;
if (replicated && (columns_mask[recovery_time_column_idx] || columns_mask[replication_lag_column_idx] || columns_mask[is_active_column_idx]))
replicas_info = replicated->tryGetReplicasInfo(name_and_cluster.second);
size_t replica_idx = 0;
for (size_t shard_index = 0; shard_index < shards_info.size(); ++shard_index)
{
@ -114,17 +122,46 @@ void StorageSystemClusters::writeCluster(MutableColumns & res_columns, const std
res_columns[res_index++]->insert(address.database_shard_name);
if (columns_mask[src_index++])
res_columns[res_index++]->insert(address.database_replica_name);
/// make sure these three columns remain the last ones
if (columns_mask[src_index++])
{
std::vector<UInt8> is_active;
if (replicated)
is_active = replicated->tryGetAreReplicasActive(name_and_cluster.second);
if (is_active.empty())
if (replicas_info.empty())
res_columns[res_index++]->insertDefault();
else
res_columns[res_index++]->insert(is_active[replica_idx++]);
{
const auto & replica_info = replicas_info[replica_idx];
res_columns[res_index++]->insert(replica_info.is_active);
}
}
if (columns_mask[src_index++])
{
if (replicas_info.empty())
res_columns[res_index++]->insertDefault();
else
{
const auto & replica_info = replicas_info[replica_idx];
if (replica_info.replication_lag != std::nullopt)
res_columns[res_index++]->insert(*replica_info.replication_lag);
else
res_columns[res_index++]->insertDefault();
}
}
if (columns_mask[src_index++])
{
if (replicas_info.empty())
res_columns[res_index++]->insertDefault();
else
{
const auto & replica_info = replicas_info[replica_idx];
if (replica_info.recovery_time != 0)
res_columns[res_index++]->insert(replica_info.recovery_time);
else
res_columns[res_index++]->insertDefault();
}
}
++replica_idx;
}
}
}

View File

@ -1,10 +1,10 @@
#pragma once
#include <Databases/DatabaseReplicated.h>
#include <DataTypes/DataTypeString.h>
#include <DataTypes/DataTypesNumber.h>
#include <Storages/System/IStorageSystemOneBlock.h>
namespace DB
{

View File

@ -26,6 +26,7 @@ void registerStorageGenerateRandom(StorageFactory & factory);
void registerStorageExecutable(StorageFactory & factory);
void registerStorageWindowView(StorageFactory & factory);
void registerStorageLoop(StorageFactory & factory);
void registerStorageFuzzQuery(StorageFactory & factory);
void registerStorageTimeSeries(StorageFactory & factory);
#if USE_RAPIDJSON || USE_SIMDJSON
@ -128,6 +129,7 @@ void registerStorages()
registerStorageExecutable(factory);
registerStorageWindowView(factory);
registerStorageLoop(factory);
registerStorageFuzzQuery(factory);
registerStorageTimeSeries(factory);
#if USE_RAPIDJSON || USE_SIMDJSON

View File

@ -7,6 +7,7 @@
#include <DataTypes/DataTypeDateTime.h>
#include <DataTypes/DataTypeDate.h>
#include <DataTypes/DataTypeString.h>
#include <DataTypes/DataTypeFactory.h>
#include <Interpreters/Context.h>
#include <Interpreters/TreeRewriter.h>
#include <Interpreters/InterpreterSelectQueryAnalyzer.h>
@ -74,6 +75,7 @@ private:
{"a", std::make_shared<DataTypeUInt8>()},
{"b", std::make_shared<DataTypeDate>()},
{"foo", std::make_shared<DataTypeString>()},
{"is_value", DataTypeFactory::instance().get("Bool")},
}),
TableWithColumnNamesAndTypes(
createDBAndTable("table2"),
@ -411,6 +413,14 @@ TEST(TransformQueryForExternalDatabase, Analyzer)
R"(SELECT "column" FROM "test"."table")");
check(state, 1, {"column", "apply_id", "apply_type", "apply_status", "create_time", "field", "value", "a", "b", "foo"},
"SELECT * FROM table WHERE (column) IN (1)",
"SELECT * EXCEPT (is_value) FROM table WHERE (column) IN (1)",
R"(SELECT "column", "apply_id", "apply_type", "apply_status", "create_time", "field", "value", "a", "b", "foo" FROM "test"."table" WHERE "column" IN (1))");
check(state, 1, {"is_value"},
"SELECT is_value FROM table WHERE is_value = true",
R"(SELECT "is_value" FROM "test"."table" WHERE "is_value" = true)");
check(state, 1, {"is_value"},
"SELECT is_value FROM table WHERE is_value = 1",
R"(SELECT "is_value" FROM "test"."table" WHERE "is_value" = 1)");
}

View File

@ -0,0 +1,54 @@
#include <TableFunctions/TableFunctionFuzzQuery.h>
#include <DataTypes/DataTypeString.h>
#include <Storages/checkAndGetLiteralArgument.h>
#include <TableFunctions/TableFunctionFactory.h>
#include <TableFunctions/registerTableFunctions.h>
namespace DB
{
namespace ErrorCodes
{
extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
}
void TableFunctionFuzzQuery::parseArguments(const ASTPtr & ast_function, ContextPtr context)
{
ASTs & args_func = ast_function->children;
if (args_func.size() != 1)
throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, "Table function '{}' must have arguments", getName());
auto args = args_func.at(0)->children;
configuration = StorageFuzzQuery::getConfiguration(args, context);
}
StoragePtr TableFunctionFuzzQuery::executeImpl(
const ASTPtr & /*ast_function*/,
ContextPtr context,
const std::string & table_name,
ColumnsDescription /*cached_columns*/,
bool is_insert_query) const
{
ColumnsDescription columns = getActualTableStructure(context, is_insert_query);
auto res = std::make_shared<StorageFuzzQuery>(
StorageID(getDatabaseName(), table_name),
columns,
/* comment */ String{},
configuration);
res->startup();
return res;
}
void registerTableFunctionFuzzQuery(TableFunctionFactory & factory)
{
factory.registerFunction<TableFunctionFuzzQuery>(
{.documentation
= {.description = "Perturbs a query string with random variations.",
.returned_value = "A table object with a single column containing perturbed query strings."},
.allow_readonly = true});
}
}

View File

@ -0,0 +1,42 @@
#pragma once
#include <optional>
#include <TableFunctions/ITableFunction.h>
#include <DataTypes/DataTypeString.h>
#include <Storages/StorageFuzzQuery.h>
#include "config.h"
namespace DB
{
class TableFunctionFuzzQuery : public ITableFunction
{
public:
static constexpr auto name = "fuzzQuery";
std::string getName() const override { return name; }
void parseArguments(const ASTPtr & ast_function, ContextPtr context) override;
ColumnsDescription getActualTableStructure(ContextPtr /* context */, bool /* is_insert_query */) const override
{
return ColumnsDescription{{"query", std::make_shared<DataTypeString>()}};
}
private:
StoragePtr executeImpl(
const ASTPtr & ast_function,
ContextPtr context,
const std::string & table_name,
ColumnsDescription cached_columns,
bool is_insert_query) const override;
const char * getStorageTypeName() const override { return "fuzzQuery"; }
String source;
std::optional<UInt64> random_seed;
StorageFuzzQuery::Configuration configuration;
};
}

View File

@ -26,6 +26,7 @@ void registerTableFunctions()
registerTableFunctionMongoDB(factory);
registerTableFunctionRedis(factory);
registerTableFunctionMergeTreeIndex(factory);
registerTableFunctionFuzzQuery(factory);
#if USE_RAPIDJSON || USE_SIMDJSON
registerTableFunctionFuzzJSON(factory);
#endif

View File

@ -23,6 +23,7 @@ void registerTableFunctionGenerate(TableFunctionFactory & factory);
void registerTableFunctionMongoDB(TableFunctionFactory & factory);
void registerTableFunctionRedis(TableFunctionFactory & factory);
void registerTableFunctionMergeTreeIndex(TableFunctionFactory & factory);
void registerTableFunctionFuzzQuery(TableFunctionFactory & factory);
#if USE_RAPIDJSON || USE_SIMDJSON
void registerTableFunctionFuzzJSON(TableFunctionFactory & factory);
#endif

View File

@ -267,7 +267,7 @@ def clickhouse_execute_http(
max_http_retries=5,
retry_error_codes=False,
):
if args.secure:
if base_args.secure:
client = http.client.HTTPSConnection(
host=base_args.tcp_host, port=base_args.http_port, timeout=timeout
)
@ -358,14 +358,89 @@ def clickhouse_execute_json(
return rows
# Should we capture client's stacktraces via SIGTSTP
CAPTURE_CLIENT_STACKTRACE = False
def kill_process_group(pgid):
print(f"Killing process group {pgid}")
print(f"Processes in process group {pgid}:")
print(
subprocess.check_output(
f"pgrep --pgroup {pgid} -a", shell=True, stderr=subprocess.STDOUT
).decode("utf-8"),
end="",
)
try:
if CAPTURE_CLIENT_STACKTRACE:
# Let's try to dump stacktrace in client (useful to catch issues there)
os.killpg(pgid, signal.SIGTSTP)
# Wait some time for clickhouse utilities to gather stacktrace
if RELEASE_NON_SANITIZED:
sleep(0.5)
else:
sleep(10)
# NOTE: this still may leave some processes, that had been
# created by timeout(1), since it also creates new process
# group. But this should not be a problem with default
# options, since the default time for each test is 10min,
# and this is way more bigger then the timeout for each
# timeout(1) invocation.
#
# But as a workaround we are sending SIGTERM first, and
# only after SIGKILL, that way timeout(1) will have an
# ability to terminate childrens (though not always since
# signals are asynchronous).
os.killpg(pgid, signal.SIGTERM)
# We need minimal delay to let processes handle SIGTERM - 0.1 (this may
# not be enough, but at least something)
sleep(0.1)
os.killpg(pgid, signal.SIGKILL)
except OSError as e:
if e.errno == ESRCH:
print(f"Got ESRCH while killing {pgid}. Ignoring.")
else:
raise
print(f"Process group {pgid} should be killed")
def cleanup_child_processes(pid):
pgid = os.getpgid(os.getpid())
print(f"Child processes of {pid}:")
print(
subprocess.check_output(
f"pgrep --parent {pid} -a", shell=True, stderr=subprocess.STDOUT
).decode("utf-8"),
end="",
)
# Due to start_new_session=True, it is not enough to kill by PGID, we need
# to look at children processes as well.
# But we are hoping that nobody creates session in the tests (though it is
# possible via timeout(), but we are assuming that they will be killed by
# timeout).
processes = subprocess.check_output(
f"pgrep --parent {pid}", shell=True, stderr=subprocess.STDOUT
)
processes = processes.decode("utf-8")
processes = processes.strip()
processes = processes.split("\n")
processes = map(lambda x: int(x.strip()), processes)
processes = list(processes)
for child in processes:
child_pgid = os.getpgid(child)
if child_pgid != pgid:
kill_process_group(child_pgid)
# SIGKILL should not be sent, since this will kill the script itself
os.killpg(pgid, signal.SIGTERM)
# send signal to all processes in group to avoid hung check triggering
# (to avoid terminating clickhouse-test itself, the signal should be ignored)
def stop_tests():
# send signal to all processes in group to avoid hung check triggering
# (to avoid terminating clickhouse-test itself, the signal should be ignored)
print("Sending signals")
signal.signal(signal.SIGTERM, signal.SIG_IGN)
os.killpg(os.getpgid(os.getpid()), signal.SIGTERM)
signal.signal(signal.SIGTERM, signal.SIG_DFL)
print("Sending signals DONE")
cleanup_child_processes(os.getpid())
signal.signal(signal.SIGTERM, signal_handler)
def get_db_engine(args, database_name):
@ -836,7 +911,6 @@ class SettingsRandomizer:
"cross_join_min_bytes_to_compress": lambda: random.choice([0, 1, 100000000]),
"min_external_table_block_size_bytes": lambda: random.choice([0, 1, 100000000]),
"max_parsing_threads": lambda: random.choice([0, 1, 10]),
"trace_profile_events": lambda: random.randint(0, 1),
"optimize_functions_to_subcolumns": lambda: random.randint(0, 1),
}
@ -1249,39 +1323,35 @@ class TestCase:
return None
def process_result_impl(
self, proc, stdout: str, stderr: str, debug_log: str, total_time: float
):
def process_result_impl(self, proc, total_time: float):
if proc:
if proc.returncode is None:
kill_process_group(os.getpgid(proc.pid))
description = ""
debug_log = ""
if os.path.exists(self.testcase_args.debug_log_file):
with open(self.testcase_args.debug_log_file, "rb") as stream:
debug_log += self.testcase_args.debug_log_file + ":\n"
debug_log += str(stream.read(), errors="replace", encoding="utf-8")
debug_log += "\n"
stdout = ""
if os.path.exists(self.stdout_file):
with open(self.stdout_file, "rb") as stdfd:
stdout = str(stdfd.read(), errors="replace", encoding="utf-8")
stderr = ""
if os.path.exists(self.stderr_file):
with open(self.stderr_file, "rb") as stdfd:
stderr += str(stdfd.read(), errors="replace", encoding="utf-8")
if debug_log:
debug_log = "\n".join(debug_log.splitlines()[:100])
if proc:
if proc.returncode is None:
try:
pgid = os.getpgid(proc.pid)
# NOTE: this still may leave some processes, that had been
# created by timeout(1), since it also creates new process
# group. But this should not be a problem with default
# options, since the default time for each test is 10min,
# and this is way more bigger then the timeout for each
# timeout(1) invocation.
#
# But as a workaround we are sending SIGTERM first, and
# only after SIGKILL, that way timeout(1) will have an
# ability to terminate childrens (though not always since
# signals are asynchronous).
os.killpg(pgid, signal.SIGTERM)
# This may not be enough, but this is at least something
# (and anyway it is OK to spend 0.1 second more in case of
# test timeout).
sleep(0.1)
os.killpg(pgid, signal.SIGKILL)
except OSError as e:
if e.errno != ESRCH:
raise
if stderr:
description += stderr
if debug_log:
@ -1533,7 +1603,7 @@ class TestCase:
def run_single_test(
self, server_logs_level, client_options
) -> Tuple[Optional[Popen], str, str, str, float]:
) -> Tuple[Optional[Popen], float]:
args = self.testcase_args
client = args.testcase_client
start_time = args.testcase_start_time
@ -1610,13 +1680,6 @@ class TestCase:
# Whether the test timed out will be decided later
pass
debug_log = ""
if os.path.exists(self.testcase_args.debug_log_file):
with open(self.testcase_args.debug_log_file, "rb") as stream:
debug_log += self.testcase_args.debug_log_file + ":\n"
debug_log += str(stream.read(), errors="replace", encoding="utf-8")
debug_log += "\n"
total_time = (datetime.now() - start_time).total_seconds()
# Normalize randomized database names in stdout, stderr files.
@ -1668,17 +1731,7 @@ class TestCase:
"https://localhost:8443/",
)
stdout = ""
if os.path.exists(self.stdout_file):
with open(self.stdout_file, "rb") as stdfd:
stdout = str(stdfd.read(), errors="replace", encoding="utf-8")
stderr = ""
if os.path.exists(self.stderr_file):
with open(self.stderr_file, "rb") as stdfd:
stderr += str(stdfd.read(), errors="replace", encoding="utf-8")
return proc, stdout, stderr, debug_log, total_time
return proc, total_time
def run(self, args, suite, client_options, server_logs_level):
start_time = datetime.now()
@ -1710,14 +1763,14 @@ class TestCase:
if not is_valid_utf_8(self.case_file) or (
self.reference_file and not is_valid_utf_8(self.reference_file)
):
proc, stdout, stderr, debug_log, total_time = self.run_single_test(
proc, total_time = self.run_single_test(
server_logs_level, client_options
)
result = self.process_result_impl(
proc, stdout, stderr, debug_log, total_time
result = self.process_result_impl(proc, total_time)
result.check_if_need_retry(
args, result.description, result.description, self.runs_count
)
result.check_if_need_retry(args, stdout, stderr, self.runs_count)
# to avoid breaking CSV parser
result.description = result.description.replace("\0", "")
else:
@ -1735,17 +1788,16 @@ class TestCase:
):
(
proc,
stdout,
stderr,
debug_log,
total_time,
) = self.run_single_test(server_logs_level, client_options)
result = self.process_result_impl(
proc, stdout, stderr, debug_log, total_time
)
result = self.process_result_impl(proc, total_time)
result.check_if_need_retry(
args, stdout, stderr, self.runs_count
args,
result.description,
result.description,
self.runs_count,
)
# to avoid breaking CSV parser
result.description = result.description.replace("\0", "")
@ -2354,7 +2406,13 @@ class BuildFlags:
POLYMORPHIC_PARTS = "polymorphic-parts"
# Release and non-sanitizer build
RELEASE_NON_SANITIZED = False
def collect_build_flags(args):
global RELEASE_NON_SANITIZED
result = []
value = clickhouse_execute(
@ -2379,6 +2437,8 @@ def collect_build_flags(args):
elif b"RelWithDebInfo" in value or b"Release" in value:
result.append(BuildFlags.RELEASE)
RELEASE_NON_SANITIZED = result == [BuildFlags.RELEASE]
value = clickhouse_execute(
args,
"SELECT value FROM system.settings WHERE name = 'allow_deprecated_database_ordinary'",
@ -3391,29 +3451,36 @@ def parse_args():
default="./client.fatal.log",
help="Path to file for fatal logs from client",
)
parser.add_argument(
"--capture-client-stacktrace",
action="store_true",
help="Capture stacktraces from clickhouse-client/local on errors",
)
return parser.parse_args()
class Terminated(KeyboardInterrupt):
pass
def __init__(self, signal):
self.signal = signal
def signal_handler(sig, frame):
raise Terminated(f"Terminated with {sig} signal")
def signal_handler(signal, frame):
raise Terminated(signal)
if __name__ == "__main__":
# Move to a new process group and kill it at exit so that we don't have any
# infinite tests processes left
# (new process group is required to avoid killing some parent processes)
os.setpgid(0, 0)
stop_time = None
exit_code = multiprocessing.Value("i", 0)
server_died = multiprocessing.Event()
multiprocessing_manager = multiprocessing.Manager()
restarted_tests = multiprocessing_manager.list()
# Move to a new process group and kill it at exit so that we don't have any
# infinite tests processes left
# (new process group is required to avoid killing some parent processes)
os.setpgid(0, 0)
signal.signal(signal.SIGTERM, signal_handler)
signal.signal(signal.SIGINT, signal_handler)
signal.signal(signal.SIGHUP, signal_handler)
@ -3431,6 +3498,8 @@ if __name__ == "__main__":
)
sys.exit(1)
CAPTURE_CLIENT_STACKTRACE = args.capture_client_stacktrace
# Autodetect the directory with queries if not specified
if args.queries is None:
args.queries = "queries"
@ -3552,4 +3621,14 @@ if __name__ == "__main__":
if args.replace_replicated_with_shared:
args.s3_storage = True
main(args)
try:
main(args)
except ServerDied as e:
print(f"{e}", file=sys.stderr)
sys.exit(1)
except Terminated as e:
print(f"Terminated with {e.signal} signal", file=sys.stderr)
sys.exit(128 + e.signal)
except KeyboardInterrupt:
print("Interrupted")
sys.exit(128 + signal.SIGINT)

View File

@ -0,0 +1,41 @@
<clickhouse>
<tcp_port>9000</tcp_port>
<profiles>
<default>
</default>
</profiles>
<users>
<default>
<profile>default</profile>
<no_password></no_password>
</default>
</users>
<keeper_server>
<tcp_port>2181</tcp_port>
<server_id>1</server_id>
<log_storage_path>/var/lib/clickhouse/coordination/log</log_storage_path>
<snapshot_storage_path>/var/lib/clickhouse/coordination/snapshots</snapshot_storage_path>
<coordination_settings>
<session_timeout_ms>20000</session_timeout_ms>
</coordination_settings>
<raft_configuration>
<server>
<id>1</id>
<hostname>localhost</hostname>
<port>9444</port>
</server>
</raft_configuration>
</keeper_server>
<zookeeper>
<node index="1">
<host>localhost</host>
<port>2181</port>
</node>
<session_timeout_ms>20000</session_timeout_ms>
</zookeeper>
</clickhouse>

View File

@ -0,0 +1,61 @@
import pytest
from helpers.cluster import ClickHouseCluster
cluster = ClickHouseCluster(__file__)
node = cluster.add_instance(
"node",
main_configs=["configs/config.xml"],
stay_alive=True,
)
@pytest.fixture(scope="module")
def start_cluster():
try:
cluster.start()
yield cluster
finally:
cluster.shutdown()
def test_recovery_time_metric(start_cluster):
node.query(
"""
DROP DATABASE IF EXISTS rdb;
CREATE DATABASE rdb
ENGINE = Replicated('/test/test_recovery_time_metric', 'shard1', 'replica1')
"""
)
node.query(
"""
DROP TABLE IF EXISTS rdb.t;
CREATE TABLE rdb.t
(
`x` UInt32
)
ENGINE = MergeTree
ORDER BY x
"""
)
node.exec_in_container(["bash", "-c", "rm /var/lib/clickhouse/metadata/rdb/t.sql"])
node.restart_clickhouse()
ret = int(
node.query(
"""
SELECT recovery_time
FROM system.clusters
WHERE cluster = 'rdb'
"""
).strip()
)
assert ret > 0
node.query(
"""
DROP DATABASE rdb
"""
)

View File

@ -1,22 +1,2 @@
49999995000000 10000000
499999500000 1000000 15
100033 2
100034 2
100035 2
100036 2
100037 2
100038 2
100039 2
10004 2
100040 2
100041 2
100033 2
100034 2
100035 2
100036 2
100037 2
100038 2
100039 2
10004 2
100040 2
100041 2

View File

@ -1,5 +1,8 @@
-- Tags: long
-- This test was split in two due to long runtimes in sanitizers.
-- The other part is 00284_external_aggregation_2.
SET max_bytes_before_external_group_by = 100000000;
SET max_memory_usage = 410000000;
SET group_by_two_level_threshold = 100000;
@ -7,19 +10,3 @@ SET group_by_two_level_threshold_bytes = 50000000;
SELECT sum(k), sum(c) FROM (SELECT number AS k, count() AS c FROM (SELECT * FROM system.numbers LIMIT 10000000) GROUP BY k);
SELECT sum(k), sum(c), max(u) FROM (SELECT number AS k, count() AS c, uniqArray(range(number % 16)) AS u FROM (SELECT * FROM system.numbers LIMIT 1000000) GROUP BY k);
SET max_memory_usage = 0;
SET group_by_two_level_threshold = 100000;
SET max_bytes_before_external_group_by = '1Mi';
-- method: key_string & key_string_two_level
CREATE TABLE t_00284_str(s String) ENGINE = MergeTree() ORDER BY tuple() SETTINGS index_granularity = 8192, index_granularity_bytes = '10Mi';
INSERT INTO t_00284_str SELECT toString(number) FROM numbers_mt(1e6);
INSERT INTO t_00284_str SELECT toString(number) FROM numbers_mt(1e6);
SELECT s, count() FROM t_00284_str GROUP BY s ORDER BY s LIMIT 10 OFFSET 42;
-- method: low_cardinality_key_string & low_cardinality_key_string_two_level
CREATE TABLE t_00284_lc_str(s LowCardinality(String)) ENGINE = MergeTree() ORDER BY tuple() SETTINGS index_granularity = 8192, index_granularity_bytes = '10Mi';
INSERT INTO t_00284_lc_str SELECT toString(number) FROM numbers_mt(1e6);
INSERT INTO t_00284_lc_str SELECT toString(number) FROM numbers_mt(1e6);
SELECT s, count() FROM t_00284_lc_str GROUP BY s ORDER BY s LIMIT 10 OFFSET 42;

View File

@ -0,0 +1,20 @@
100033 2
100034 2
100035 2
100036 2
100037 2
100038 2
100039 2
10004 2
100040 2
100041 2
100033 2
100034 2
100035 2
100036 2
100037 2
100038 2
100039 2
10004 2
100040 2
100041 2

View File

@ -0,0 +1,22 @@
-- Tags: long
-- This test was split in two due to long runtimes in sanitizers.
-- The other part is 00284_external_aggregation.
SET group_by_two_level_threshold_bytes = 50000000;
SET max_memory_usage = 0;
SET group_by_two_level_threshold = 100000;
SET max_bytes_before_external_group_by = '1Mi';
-- method: key_string & key_string_two_level
CREATE TABLE t_00284_str(s String) ENGINE = MergeTree() ORDER BY tuple() SETTINGS index_granularity = 8192, index_granularity_bytes = '10Mi';
INSERT INTO t_00284_str SELECT toString(number) FROM numbers_mt(1e6);
INSERT INTO t_00284_str SELECT toString(number) FROM numbers_mt(1e6);
SELECT s, count() FROM t_00284_str GROUP BY s ORDER BY s LIMIT 10 OFFSET 42;
-- method: low_cardinality_key_string & low_cardinality_key_string_two_level
CREATE TABLE t_00284_lc_str(s LowCardinality(String)) ENGINE = MergeTree() ORDER BY tuple() SETTINGS index_granularity = 8192, index_granularity_bytes = '10Mi';
INSERT INTO t_00284_lc_str SELECT toString(number) FROM numbers_mt(1e6);
INSERT INTO t_00284_lc_str SELECT toString(number) FROM numbers_mt(1e6);
SELECT s, count() FROM t_00284_lc_str GROUP BY s ORDER BY s LIMIT 10 OFFSET 42;

View File

@ -227,3 +227,5 @@
1 value1
1 value2
2 value3
1
1

View File

@ -374,3 +374,10 @@ SELECT id, ary[indexOf(ary, 'value2')] FROM test_bf_indexOf WHERE ary[indexOf(ar
SELECT id, ary[indexOf(ary, 'value3')] FROM test_bf_indexOf WHERE ary[indexOf(ary, 'value3')] = 'value3' ORDER BY id FORMAT TSV;
DROP TABLE IF EXISTS test_bf_indexOf;
-- Test for bug #65597
DROP TABLE IF EXISTS test_bf_cast;
CREATE TABLE test_bf_cast (c Int32, INDEX x1 (c) type bloom_filter) ENGINE = MergeTree ORDER BY c AS SELECT 1;
SELECT count() FROM test_bf_cast WHERE cast(c = 1 OR c = 9999 AS Bool) SETTINGS use_skip_indexes=0;
SELECT count() FROM test_bf_cast WHERE cast(c = 1 OR c = 9999 AS Bool) SETTINGS use_skip_indexes=1;
DROP TABLE test_bf_cast;

View File

@ -1,6 +1,9 @@
#!/usr/bin/env bash
# Tags: long
# This test was split in two due to long runtimes in sanitizers.
# The other part is 02099_tsv_raw_format_2.sh.
CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
# shellcheck source=../shell_config.sh
. "$CURDIR"/../shell_config.sh

View File

@ -1,6 +1,9 @@
#!/usr/bin/env bash
# Tags: long
# This test was split in two due to long runtimes in sanitizers.
# The other part is 02099_tsv_raw_format_1.sh.
#
CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
# shellcheck source=../shell_config.sh
. "$CURDIR"/../shell_config.sh

View File

@ -52,6 +52,8 @@ CREATE TABLE system.clusters
`database_shard_name` String,
`database_replica_name` String,
`is_active` Nullable(UInt8),
`replication_lag` Nullable(UInt32),
`recovery_time` Nullable(UInt64),
`name` String ALIAS cluster
)
ENGINE = SystemClusters

View File

@ -1,29 +0,0 @@
#!/usr/bin/expect -f
set basedir [file dirname $argv0]
set basename [file tail $argv0]
if {[info exists env(CLICKHOUSE_TMP)]} {
set CLICKHOUSE_TMP $env(CLICKHOUSE_TMP)
} else {
set CLICKHOUSE_TMP "."
}
exp_internal -f $CLICKHOUSE_TMP/$basename.debuglog 0
log_user 0
set timeout 20
match_max 100000
expect_after {
-i $any_spawn_id eof { exp_continue }
-i $any_spawn_id timeout { exit 1 }
}
spawn bash -c "source $basedir/../shell_config.sh ; \$CLICKHOUSE_LOCAL --disable_suggestion"
expect ":) "
send -- "insert into table function null() format TSV some trash here 123 \n 456\r"
expect "CANNOT_PARSE_INPUT_ASSERTION_FAILED"
expect ":) "
send -- ""
expect eof

View File

@ -0,0 +1,95 @@
import pty
import os
import shlex
import time
import multiprocessing
COMPLETION_TIMEOUT_SECONDS = 30
DEBUG_LOG = os.path.join(
os.environ["CLICKHOUSE_TMP"],
os.path.basename(os.path.abspath(__file__)).strip(".python") + ".debuglog",
)
STATE_MAP = {
-1: "process did not start",
0: "all good",
1: "process started and said ':)'",
2: "prompt search was started",
3: "prompt is missing",
}
def run_with_timeout(func, args, timeout):
for _ in range(5):
state = multiprocessing.Value("i", -1)
process = multiprocessing.Process(
target=func, args=args, kwargs={"state": state}
)
process.start()
process.join(timeout)
if state.value in (0, 3):
return
if process.is_alive():
process.terminate()
if state.value == -1:
continue
print(f"Timeout, state: {STATE_MAP[state.value]}")
return
def expect(text, master, debug_log_fd, output=""):
while not text in output:
output_b = os.read(master, 4096)
output += output_b.decode()
debug_log_fd.write(repr(output_b) + "\n")
debug_log_fd.flush()
return output
def test_completion(program, argv, prompt, state=None):
shell_pid, master = pty.fork()
if shell_pid == 0:
os.execv(program, argv)
else:
try:
debug_log_fd = open(DEBUG_LOG, "a")
expect(":)", master, debug_log_fd)
state.value = 1
os.write(master, bytes(prompt.encode()))
expect(prompt[:-10], master, debug_log_fd)
time.sleep(0.01)
os.write(master, b"\r")
state.value = 2
output = expect("CANNOT_PARSE_INPUT_ASSERTION_FAILED", master, debug_log_fd)
expect(":)", master, debug_log_fd, output)
print("OK")
state.value = 0
finally:
os.close(master)
debug_log_fd.close()
if __name__ == "__main__":
clickhouse_local = os.environ["CLICKHOUSE_LOCAL"]
args = shlex.split(clickhouse_local)
args.append("--disable_suggestion")
args.append("--highlight=0")
run_with_timeout(
test_completion,
[
args[0],
args,
"insert into table function null() format TSV some trash here 123 \n 456",
],
COMPLETION_TIMEOUT_SECONDS,
)

View File

@ -13,13 +13,16 @@ ${CLICKHOUSE_CLIENT} --ignore-error --multiquery --query "drop table if exists b
${CLICKHOUSE_CLIENT} --query="create table copy_02572 (key Int) engine=Memory();"
${CLICKHOUSE_CLIENT} --query="create table data_02572 (key Int) engine=Memory();"
${CLICKHOUSE_CLIENT} --query="create table buffer_02572 (key Int) engine=Buffer(currentDatabase(), data_02572, 1, 3, 3, 1, 1e9, 1, 1e9);"
${CLICKHOUSE_CLIENT} --query="create table buffer_02572 (key Int) engine=Buffer(currentDatabase(), data_02572, 1, 8, 8, 1, 1e9, 1, 1e9);"
${CLICKHOUSE_CLIENT} --query="create materialized view mv_02572 to copy_02572 as select * from data_02572;"
start=$(date +%s)
${CLICKHOUSE_CLIENT} --query="insert into buffer_02572 values (1);"
# ensure that the flush was not direct
${CLICKHOUSE_CLIENT} --ignore-error --multiquery --query "select * from data_02572; select * from copy_02572;"
if [ $(( $(date +%s) - start )) -gt 6 ]; then # clickhouse test cluster is overloaded, will skip
# ensure that the flush was not direct
${CLICKHOUSE_CLIENT} --ignore-error --multiquery --query "select * from data_02572; select * from copy_02572;"
fi
# we cannot use OPTIMIZE, this will attach query context, so let's wait
for _ in {1..100}; do

View File

@ -0,0 +1,3 @@
10
11
0

View File

@ -0,0 +1,27 @@
SET allow_experimental_statistics = 1;
SET allow_statistics_optimize = 1;
SET mutations_sync = 1;
DROP TABLE IF EXISTS bug_67742;
CREATE TABLE bug_67742 (a Float64 STATISTICS(tdigest)) Engine = MergeTree() ORDER BY tuple();
INSERT INTO bug_67742 SELECT number FROM system.numbers LIMIT 10000;
SELECT count(*) FROM bug_67742 WHERE a < '10';
DROP TABLE bug_67742;
DROP TABLE IF EXISTS bug_67742;
CREATE TABLE bug_67742 (a Int32 STATISTICS(tdigest)) Engine = MergeTree() ORDER BY tuple();
INSERT INTO bug_67742 SELECT number FROM system.numbers LIMIT 10000;
SELECT count(*) FROM bug_67742 WHERE a < '10.5'; -- { serverError TYPE_MISMATCH }
DROP TABLE bug_67742;
DROP TABLE IF EXISTS bug_67742;
CREATE TABLE bug_67742 (a Int32 STATISTICS(tdigest)) Engine = MergeTree() ORDER BY tuple();
INSERT INTO bug_67742 SELECT number FROM system.numbers LIMIT 10000;
SELECT count(*) FROM bug_67742 WHERE a < 10.5;
DROP TABLE bug_67742;
DROP TABLE IF EXISTS bug_67742;
CREATE TABLE bug_67742 (a Int16 STATISTICS(tdigest)) Engine = MergeTree() ORDER BY tuple();
INSERT INTO bug_67742 SELECT number FROM system.numbers LIMIT 10000;
SELECT count(*) FROM bug_67742 WHERE a < '9999999999999999999999999';
DROP TABLE bug_67742;

View File

@ -27,6 +27,7 @@ OK
OK
100
100
OK
===== TestGrants =====
OK
OK

View File

@ -199,6 +199,8 @@ ${CLICKHOUSE_CLIENT} --user $user2 --query "INSERT INTO source SELECT * FROM gen
${CLICKHOUSE_CLIENT} --query "SELECT count() FROM destination1"
${CLICKHOUSE_CLIENT} --query "SELECT count() FROM destination2"
(( $(${CLICKHOUSE_CLIENT} --query "ALTER TABLE test_table MODIFY SQL SECURITY INVOKER" 2>&1 | grep -c "is not supported") >= 1 )) && echo "OK" || echo "UNEXPECTED"
echo "===== TestGrants ====="
${CLICKHOUSE_CLIENT} --query "GRANT CREATE ON *.* TO $user1"
${CLICKHOUSE_CLIENT} --query "GRANT SELECT ON $db.test_table TO $user1, $user2"

View File

@ -32,7 +32,7 @@ QUERY id: 0
FUNCTION id: 5, function_name: and, function_type: ordinary, result_type: Bool
ARGUMENTS
LIST id: 6, nodes: 2
CONSTANT id: 7, constant_value: UInt64_1, constant_value_type: Bool
CONSTANT id: 7, constant_value: Bool_1, constant_value_type: Bool
FUNCTION id: 8, function_name: notIn, function_type: ordinary, result_type: UInt8
ARGUMENTS
LIST id: 9, nodes: 2

View File

@ -0,0 +1,2 @@
query
String

View File

@ -0,0 +1,18 @@
SELECT * FROM fuzzQuery('SELECT 1', 500, 8956) LIMIT 0 FORMAT TSVWithNamesAndTypes;
SELECT * FROM fuzzQuery('SELECT *
FROM (
SELECT
([toString(number % 2)] :: Array(LowCardinality(String))) AS item_id,
count()
FROM numbers(3)
GROUP BY item_id WITH TOTALS
) AS l FULL JOIN (
SELECT
([toString((number % 2) * 2)] :: Array(String)) AS item_id
FROM numbers(3)
) AS r
ON l.item_id = r.item_id
ORDER BY 1,2,3;
', 500, 8956) LIMIT 10 FORMAT NULL;

View File

@ -0,0 +1,5 @@
1 1
2 2
3 3
3
0

View File

@ -0,0 +1,8 @@
DROP TABLE IF EXISTS t_03203;
CREATE TABLE t_03203 (p UInt64, v UInt64) ENGINE = MergeTree PARTITION BY p ORDER BY v;
INSERT INTO t_03203 VALUES (1, 1), (2, 2), (3, 3);
SELECT * FROM t_03203 ORDER BY p, v;
ALTER TABLE t_03203 DETACH PARTITION ALL;
SELECT count() FROM system.detached_parts WHERE database = currentDatabase() AND table = 't_03203';
ALTER TABLE t_03203 DROP DETACHED PARTITION ALL SETTINGS allow_drop_detached = 1;
SELECT count() FROM system.detached_parts WHERE database = currentDatabase() AND table = 't_03203';

View File

@ -0,0 +1,4 @@
0
2
0
2

View File

@ -0,0 +1,11 @@
-- Tags: no-parallel
CREATE DATABASE rdb1 ENGINE = Replicated('/test/test_replication_lag_metric', 'shard1', 'replica1');
CREATE DATABASE rdb2 ENGINE = Replicated('/test/test_replication_lag_metric', 'shard1', 'replica2');
SET distributed_ddl_task_timeout = 0;
CREATE TABLE rdb1.t (id UInt32) ENGINE = ReplicatedMergeTree ORDER BY id;
SELECT replication_lag FROM system.clusters WHERE cluster IN ('rdb1', 'rdb2') ORDER BY cluster ASC, replica_num ASC;
DROP DATABASE rdb1;
DROP DATABASE rdb2;

View File

@ -5,11 +5,9 @@
#
# Caveats for generic entry "fun":
# - does not work for __attribute__((__always_inline__))
# - and may not work for functions that had been inlined
# - requires asterisk at the beginning *and* end for static functions
#
[thread]
# https://github.com/ClickHouse/ClickHouse/issues/55629
fun:rd_kafka_broker_set_nodename
# https://github.com/ClickHouse/ClickHouse/issues/60443
fun:*rd_avg_calc*
fun:*rd_avg_rollover*

View File

@ -9,6 +9,7 @@
#
# Caveats for generic entry "fun":
# - does not work for __attribute__((__always_inline__))
# - and may not work for functions that had been inlined
# - requires asterisk at the beginning *and* end for static functions
#
[undefined]

View File

@ -1674,6 +1674,7 @@ fuzzQuery
fuzzer
fuzzers
gRPC
gaugehistogram
gccMurmurHash
gcem
generateRandom
@ -2558,6 +2559,7 @@ startsWithUTF
startswith
statbox
stateful
stateset
stddev
stddevPop
stddevPopStable
@ -2689,6 +2691,10 @@ themself
threadpool
throwIf
timeDiff
TimeSeries
timeSeriesData
timeSeriesMetrics
timeSeriesTags
timeSlot
timeSlots
timeZone