Merge remote-tracking branch 'origin/master' into pr-custom-key-failover

This commit is contained in:
Igor Nikonov 2023-12-05 12:00:45 +00:00
commit 71204c2a82
396 changed files with 7575 additions and 2875 deletions

View File

@ -532,6 +532,11 @@ jobs:
run_command: |
cd "$REPO_COPY/tests/ci"
mkdir -p "${REPORTS_PATH}/integration"
mkdir -p "${REPORTS_PATH}/stateless"
cp -r ${REPORTS_PATH}/changed_images* ${REPORTS_PATH}/integration
cp -r ${REPORTS_PATH}/changed_images* ${REPORTS_PATH}/stateless
TEMP_PATH="${TEMP_PATH}/integration" \
REPORTS_PATH="${REPORTS_PATH}/integration" \
python3 integration_test_check.py "Integration $CHECK_NAME" \

View File

@ -35,6 +35,7 @@ curl https://clickhouse.com/ | sh
* [**ClickHouse Meetup in Berlin**](https://www.meetup.com/clickhouse-berlin-user-group/events/296488501/) - Nov 30
* [**ClickHouse Meetup in NYC**](https://www.meetup.com/clickhouse-new-york-user-group/events/296488779/) - Dec 11
* [**ClickHouse Meetup in Sydney**](https://www.meetup.com/clickhouse-sydney-user-group/events/297638812/) - Dec 12
* [**ClickHouse Meetup in Boston**](https://www.meetup.com/clickhouse-boston-user-group/events/296488840/) - Dec 12
Also, keep an eye out for upcoming meetups around the world. Somewhere else you want us to be? Please feel free to reach out to tyler <at> clickhouse <dot> com.

View File

@ -33,7 +33,7 @@ target_include_directories(cxxabi SYSTEM BEFORE
PRIVATE $<BUILD_INTERFACE:${LIBCXXABI_SOURCE_DIR}/../libcxx/include>
PRIVATE $<BUILD_INTERFACE:${LIBCXXABI_SOURCE_DIR}/../libcxx/src>
)
target_compile_definitions(cxxabi PRIVATE -D_LIBCPP_BUILDING_LIBRARY)
target_compile_definitions(cxxabi PRIVATE -D_LIBCPP_BUILDING_LIBRARY -DHAS_THREAD_LOCAL)
target_compile_options(cxxabi PRIVATE -nostdinc++ -fno-sanitize=undefined -Wno-macro-redefined) # If we don't disable UBSan, infinite recursion happens in dynamic_cast.
target_link_libraries(cxxabi PUBLIC unwind)

2
contrib/qpl vendored

@ -1 +1 @@
Subproject commit faaf19350459c076e66bb5df11743c3fade59b73
Subproject commit a61bdd845fd7ca363b2bcc55454aa520dfcd8298

View File

@ -20,7 +20,8 @@ RUN apt-get update --yes \
RUN pip3 install \
numpy \
pyodbc \
deepdiff
deepdiff \
sqlglot
ARG odbc_repo="https://github.com/ClickHouse/clickhouse-odbc.git"
@ -35,7 +36,7 @@ RUN git clone --recursive ${odbc_repo} \
&& odbcinst -i -s -l -f /clickhouse-odbc/packaging/odbc.ini.sample
ENV TZ=Europe/Amsterdam
ENV MAX_RUN_TIME=900
ENV MAX_RUN_TIME=9000
RUN ln -snf /usr/share/zoneinfo/$TZ /etc/localtime && echo $TZ > /etc/timezone
ARG sqllogic_test_repo="https://github.com/gregrahn/sqllogictest.git"

View File

@ -75,6 +75,20 @@ function run_tests()
cat /test_output/statements-test/check_status.tsv >> /test_output/check_status.tsv
cat /test_output/statements-test/test_results.tsv >> /test_output/test_results.tsv
tar -zcvf statements-check.tar.gz statements-test 1>/dev/null
mkdir -p /test_output/complete-test
/clickhouse-tests/sqllogic/runner.py \
--log-file /test_output/runner-complete-test.log \
--log-level info \
complete-test \
--input-dir /sqllogictest \
--out-dir /test_output/complete-test \
2>&1 \
| ts '%Y-%m-%d %H:%M:%S'
cat /test_output/complete-test/check_status.tsv >> /test_output/check_status.tsv
cat /test_output/complete-test/test_results.tsv >> /test_output/test_results.tsv
tar -zcvf complete-check.tar.gz complete-test 1>/dev/null
fi
}

View File

@ -19,10 +19,14 @@ dpkg -i package_folder/clickhouse-common-static-dbg_*.deb
dpkg -i package_folder/clickhouse-server_*.deb
dpkg -i package_folder/clickhouse-client_*.deb
echo "$BUGFIX_VALIDATE_CHECK"
# Check that the tools are available under short names
ch --query "SELECT 1" || exit 1
chl --query "SELECT 1" || exit 1
chc --version || exit 1
if [[ -z "$BUGFIX_VALIDATE_CHECK" ]]; then
ch --query "SELECT 1" || exit 1
chl --query "SELECT 1" || exit 1
chc --version || exit 1
fi
ln -s /usr/share/clickhouse-test/clickhouse-test /usr/bin/clickhouse-test
@ -46,6 +50,16 @@ fi
config_logs_export_cluster /etc/clickhouse-server/config.d/system_logs_export.yaml
if [[ -n "$BUGFIX_VALIDATE_CHECK" ]] && [[ "$BUGFIX_VALIDATE_CHECK" -eq 1 ]]; then
sudo cat /etc/clickhouse-server/config.d/zookeeper.xml \
| sed "/<use_compression>1<\/use_compression>/d" \
> /etc/clickhouse-server/config.d/zookeeper.xml.tmp
sudo mv /etc/clickhouse-server/config.d/zookeeper.xml.tmp /etc/clickhouse-server/config.d/zookeeper.xml
# it contains some new settings, but we can safely remove it
rm /etc/clickhouse-server/users.d/s3_cache_new.xml
fi
# For flaky check we also enable thread fuzzer
if [ "$NUM_TRIES" -gt "1" ]; then
export THREAD_FUZZER_CPU_TIME_PERIOD_US=1000

View File

@ -191,6 +191,12 @@ sudo cat /etc/clickhouse-server/config.d/logger_trace.xml \
> /etc/clickhouse-server/config.d/logger_trace.xml.tmp
mv /etc/clickhouse-server/config.d/logger_trace.xml.tmp /etc/clickhouse-server/config.d/logger_trace.xml
# Randomize async_load_databases
if [ $(( $(date +%-d) % 2 )) -eq 1 ]; then
sudo echo "<clickhouse><async_load_databases>true</async_load_databases></clickhouse>" \
> /etc/clickhouse-server/config.d/enable_async_load_databases.xml
fi
start
stress --hung-check --drop-databases --output-folder test_output --skip-func-tests "$SKIP_TESTS_OPTION" --global-time-limit 1200 \

View File

@ -79,6 +79,7 @@ rm /etc/clickhouse-server/config.d/merge_tree.xml
rm /etc/clickhouse-server/config.d/enable_wait_for_shutdown_replicated_tables.xml
rm /etc/clickhouse-server/users.d/nonconst_timezone.xml
rm /etc/clickhouse-server/users.d/s3_cache_new.xml
rm /etc/clickhouse-server/users.d/replicated_ddl_entry.xml
start
stop
@ -116,6 +117,7 @@ rm /etc/clickhouse-server/config.d/merge_tree.xml
rm /etc/clickhouse-server/config.d/enable_wait_for_shutdown_replicated_tables.xml
rm /etc/clickhouse-server/users.d/nonconst_timezone.xml
rm /etc/clickhouse-server/users.d/s3_cache_new.xml
rm /etc/clickhouse-server/users.d/replicated_ddl_entry.xml
start

View File

@ -56,7 +56,7 @@ On Linux, macOS and FreeBSD:
./clickhouse client
ClickHouse client version 23.2.1.1501 (official build).
Connecting to localhost:9000 as user default.
Connected to ClickHouse server version 23.2.1 revision 54461.
Connected to ClickHouse server version 23.2.1.
local-host :)
```

View File

@ -16,7 +16,7 @@ ClickHouse provides a native command-line client: `clickhouse-client`. The clien
$ clickhouse-client
ClickHouse client version 20.13.1.5273 (official build).
Connecting to localhost:9000 as user default.
Connected to ClickHouse server version 20.13.1 revision 54442.
Connected to ClickHouse server version 20.13.1.
:)
```

View File

@ -15,6 +15,27 @@ You can monitor:
- Utilization of hardware resources.
- ClickHouse server metrics.
## Built-in observability dashboard
<img width="400" alt="Screenshot 2023-11-12 at 6 08 58 PM" src="https://github.com/ClickHouse/ClickHouse/assets/3936029/2bd10011-4a47-4b94-b836-d44557c7fdc1" />
ClickHouse comes with a built-in observability dashboard feature which can be accessed by `$HOST:$PORT/dashboard` (requires user and password) that shows the following metrics:
- Queries/second
- CPU usage (cores)
- Queries running
- Merges running
- Selected bytes/second
- IO wait
- CPU wait
- OS CPU Usage (userspace)
- OS CPU Usage (kernel)
- Read from disk
- Read from filesystem
- Memory (tracked)
- Inserted rows/second
- Total MergeTree parts
- Max parts for partition
## Resource Utilization {#resource-utilization}
ClickHouse also monitors the state of hardware resources by itself such as:

View File

@ -1646,6 +1646,45 @@ Default value: `0.5`.
## async_load_databases {#async_load_databases}
Asynchronous loading of databases and tables.
If `true` all non-system databases with `Ordinary`, `Atomic` and `Replicated` engine will be loaded asynchronously after the ClickHouse server start up. See `system.async_loader` table, `tables_loader_background_pool_size` and `tables_loader_foreground_pool_size` server settings. Any query that tries to access a table, that is not yet loaded, will wait for exactly this table to be started up. If load job fails, query will rethrow an error (instead of shutting down the whole server in case of `async_load_databases = false`). The table that is waited for by at least one query will be loaded with higher priority. DDL queries on a database will wait for exactly that database to be started up.
If `false`, all databases are loaded when the server starts.
The default is `false`.
**Example**
``` xml
<async_load_databases>true</async_load_databases>
```
## tables_loader_foreground_pool_size {#tables_loader_foreground_pool_size}
Sets the number of threads performing load jobs in foreground pool. The foreground pool is used for loading table synchronously before server start listening on a port and for loading tables that are waited for. Foreground pool has higher priority than background pool. It means that no job starts in background pool while there are jobs running in foreground pool.
Possible values:
- Any positive integer.
- Zero. Use all available CPUs.
Default value: 0.
## tables_loader_background_pool_size {#tables_loader_background_pool_size}
Sets the number of threads performing asynchronous load jobs in background pool. The background pool is used for loading tables asynchronously after server start in case there are no queries waiting for the table. It could be beneficial to keep low number of threads in background pool if there are a lot of tables. It will reserve CPU resources for concurrent query execution.
Possible values:
- Any positive integer.
- Zero. Use all available CPUs.
Default value: 0.
## merge_tree {#merge_tree}

View File

@ -149,7 +149,7 @@ Possible values:
- Any positive integer.
- 0 (disable deduplication)
Default value: 100.
Default value: 1000.
The `Insert` command creates one or more blocks (parts). For [insert deduplication](../../engines/table-engines/mergetree-family/replication.md), when writing into replicated tables, ClickHouse writes the hash sums of the created parts into ClickHouse Keeper. Hash sums are stored only for the most recent `replicated_deduplication_window` blocks. The oldest hash sums are removed from ClickHouse Keeper.
A large number of `replicated_deduplication_window` slows down `Inserts` because it needs to compare more entries.

View File

@ -0,0 +1,54 @@
---
slug: /en/operations/system-tables/async_loader
---
# async_loader
Contains information and status for recent asynchronous jobs (e.g. for tables loading). The table contains a row for every job. There is a tool for visualizing information from this table `utils/async_loader_graph`.
Example:
``` sql
SELECT *
FROM system.async_loader
FORMAT Vertical
LIMIT 1
```
``` text
```
Columns:
- `job` (`String`) - Job name (may be not unique).
- `job_id` (`UInt64`) - Unique ID of the job.
- `dependencies` (`Array(UInt64)`) - List of IDs of jobs that should be done before this job.
- `dependencies_left` (`UInt64`) - Current number of dependencies left to be done.
- `status` (`Enum`) - Current load status of a job:
`PENDING`: Load job is not started yet.
`OK`: Load job executed and was successful.
`FAILED`: Load job executed and failed.
`CANCELED`: Load job is not going to be executed due to removal or dependency failure.
A pending job might be in one of the following states:
- `is_executing` (`UInt8`) - The job is currently being executed by a worker.
- `is_blocked` (`UInt8`) - The job waits for its dependencies to be done.
- `is_ready` (`UInt8`) - The job is ready to be executed and waits for a worker.
- `elapsed` (`Float64`) - Seconds elapsed since start of execution. Zero if job is not started. Total execution time if job finished.
Every job has a pool associated with it and is started in this pool. Each pool has a constant priority and a mutable maximum number of workers. Higher priority (lower `priority` value) jobs are run first. No job with lower priority is started while there is at least one higher priority job ready or executing. Job priority can be elevated (but cannot be lowered) by prioritizing it. For example jobs for a table loading and startup will be prioritized if incoming query required this table. It is possible prioritize a job during its execution, but job is not moved from its `execution_pool` to newly assigned `pool`. The job uses `pool` for creating new jobs to avoid priority inversion. Already started jobs are not preempted by higher priority jobs and always run to completion after start.
- `pool_id` (`UInt64`) - ID of a pool currently assigned to the job.
- `pool` (`String`) - Name of `pool_id` pool.
- `priority` (`Int64`) - Priority of `pool_id` pool.
- `execution_pool_id` (`UInt64`) - ID of a pool the job is executed in. Equals initially assigned pool before execution starts.
- `execution_pool` (`String`) - Name of `execution_pool_id` pool.
- `execution_priority` (`Int64`) - Priority of `execution_pool_id` pool.
- `ready_seqno` (`Nullable(UInt64)`) - Not null for ready jobs. Worker pulls the next job to be executed from a ready queue of its pool. If there are multiple ready jobs, then job with the lowest value of `ready_seqno` is picked.
- `waiters` (`UInt64`) - The number of threads waiting on this job.
- `exception` (`Nullable(String)`) - Not null for failed and canceled jobs. Holds error message raised during query execution or error leading to cancelling of this job along with dependency failure chain of job names.
Time instants during job lifetime:
- `schedule_time` (`DateTime64`) - Time when job was created and scheduled to be executed (usually with all its dependencies).
- `enqueue_time` (`Nullable(DateTime64)`) - Time when job became ready and was enqueued into a ready queue of it's pool. Null if the job is not ready yet.
- `start_time` (`Nullable(DateTime64)`) - Time when worker dequeues the job from ready queue and start its execution. Null if the job is not started yet.
- `finish_time` (`Nullable(DateTime64)`) - Time when job execution is finished. Null if the job is not finished yet.

View File

@ -13,6 +13,7 @@ ClickHouse does not delete data from the table automatically. See [Introduction]
Columns:
- `hostname` ([LowCardinality(String)](../../sql-reference/data-types/string.md)) — Hostname of the server executing the query.
- `event_date` ([Date](../../sql-reference/data-types/date.md)) — The date when the async insert happened.
- `event_time` ([DateTime](../../sql-reference/data-types/datetime.md)) — The date and time when the async insert finished execution.
- `event_time_microseconds` ([DateTime64](../../sql-reference/data-types/datetime64.md)) — The date and time when the async insert finished execution with microseconds precision.
@ -42,6 +43,7 @@ SELECT * FROM system.asynchronous_insert_log LIMIT 1 \G;
Result:
``` text
hostname: clickhouse.eu-central1.internal
event_date: 2023-06-08
event_time: 2023-06-08 10:08:53
event_time_microseconds: 2023-06-08 10:08:53.199516

View File

@ -7,6 +7,7 @@ Contains the historical values for `system.asynchronous_metrics`, which are save
Columns:
- `hostname` ([LowCardinality(String)](../../sql-reference/data-types/string.md)) — Hostname of the server executing the query.
- `event_date` ([Date](../../sql-reference/data-types/date.md)) — Event date.
- `event_time` ([DateTime](../../sql-reference/data-types/datetime.md)) — Event time.
- `name` ([String](../../sql-reference/data-types/string.md)) — Metric name.
@ -15,22 +16,33 @@ Columns:
**Example**
``` sql
SELECT * FROM system.asynchronous_metric_log LIMIT 10
SELECT * FROM system.asynchronous_metric_log LIMIT 3 \G
```
``` text
┌─event_date─┬──────────event_time─┬─name─────────────────────────────────────┬─────value─┐
│ 2020-09-05 │ 2020-09-05 15:56:30 │ CPUFrequencyMHz_0 │ 2120.9 │
│ 2020-09-05 │ 2020-09-05 15:56:30 │ jemalloc.arenas.all.pmuzzy │ 743 │
│ 2020-09-05 │ 2020-09-05 15:56:30 │ jemalloc.arenas.all.pdirty │ 26288 │
│ 2020-09-05 │ 2020-09-05 15:56:30 │ jemalloc.background_thread.run_intervals │ 0 │
│ 2020-09-05 │ 2020-09-05 15:56:30 │ jemalloc.background_thread.num_runs │ 0 │
│ 2020-09-05 │ 2020-09-05 15:56:30 │ jemalloc.retained │ 60694528 │
│ 2020-09-05 │ 2020-09-05 15:56:30 │ jemalloc.mapped │ 303161344 │
│ 2020-09-05 │ 2020-09-05 15:56:30 │ jemalloc.resident │ 260931584 │
│ 2020-09-05 │ 2020-09-05 15:56:30 │ jemalloc.metadata │ 12079488 │
│ 2020-09-05 │ 2020-09-05 15:56:30 │ jemalloc.allocated │ 133756128 │
└────────────┴─────────────────────┴──────────────────────────────────────────┴───────────┘
Row 1:
──────
hostname: clickhouse.eu-central1.internal
event_date: 2023-11-14
event_time: 2023-11-14 14:39:07
metric: AsynchronousHeavyMetricsCalculationTimeSpent
value: 0.001
Row 2:
──────
hostname: clickhouse.eu-central1.internal
event_date: 2023-11-14
event_time: 2023-11-14 14:39:08
metric: AsynchronousHeavyMetricsCalculationTimeSpent
value: 0
Row 3:
──────
hostname: clickhouse.eu-central1.internal
event_date: 2023-11-14
event_time: 2023-11-14 14:39:09
metric: AsynchronousHeavyMetricsCalculationTimeSpent
value: 0
```
**See Also**

View File

@ -7,6 +7,7 @@ Contains logging entries with the information about `BACKUP` and `RESTORE` opera
Columns:
- `hostname` ([LowCardinality(String)](../../sql-reference/data-types/string.md)) — Hostname of the server executing the query.
- `event_date` ([Date](../../sql-reference/data-types/date.md)) — Date of the entry.
- `event_time_microseconds` ([DateTime64](../../sql-reference/data-types/datetime64.md)) — Time of the entry with microseconds precision.
- `id` ([String](../../sql-reference/data-types/string.md)) — Identifier of the backup or restore operation.
@ -45,6 +46,7 @@ SELECT * FROM system.backup_log WHERE id = 'e5b74ecb-f6f1-426a-80be-872f90043885
```response
Row 1:
──────
hostname: clickhouse.eu-central1.internal
event_date: 2023-08-19
event_time_microseconds: 2023-08-19 11:05:21.998566
id: e5b74ecb-f6f1-426a-80be-872f90043885
@ -63,6 +65,7 @@ bytes_read: 0
Row 2:
──────
hostname: clickhouse.eu-central1.internal
event_date: 2023-08-19
event_time_microseconds: 2023-08-19 11:08:56.916192
id: e5b74ecb-f6f1-426a-80be-872f90043885
@ -93,6 +96,7 @@ SELECT * FROM system.backup_log WHERE id = 'cdf1f731-52ef-42da-bc65-2e1bfcd4ce90
```response
Row 1:
──────
hostname: clickhouse.eu-central1.internal
event_date: 2023-08-19
event_time_microseconds: 2023-08-19 11:09:19.718077
id: cdf1f731-52ef-42da-bc65-2e1bfcd4ce90
@ -111,6 +115,7 @@ bytes_read: 0
Row 2:
──────
hostname: clickhouse.eu-central1.internal
event_date: 2023-08-19
event_time_microseconds: 2023-08-19 11:09:29.334234
id: cdf1f731-52ef-42da-bc65-2e1bfcd4ce90

View File

@ -7,6 +7,7 @@ Contains information about stack traces for fatal errors. The table does not exi
Columns:
- `hostname` ([LowCardinality(String)](../../sql-reference/data-types/string.md)) — Hostname of the server executing the query.
- `event_date` ([DateTime](../../sql-reference/data-types/datetime.md)) — Date of the event.
- `event_time` ([DateTime](../../sql-reference/data-types/datetime.md)) — Time of the event.
- `timestamp_ns` ([UInt64](../../sql-reference/data-types/int-uint.md)) — Timestamp of the event with nanoseconds.
@ -32,6 +33,7 @@ Result (not full):
``` text
Row 1:
──────
hostname: clickhouse.eu-central1.internal
event_date: 2020-10-14
event_time: 2020-10-14 15:47:40
timestamp_ns: 1602679660271312710

View File

@ -6,6 +6,7 @@ slug: /en/operations/system-tables/metric_log
Contains history of metrics values from tables `system.metrics` and `system.events`, periodically flushed to disk.
Columns:
- `hostname` ([LowCardinality(String)](../../sql-reference/data-types/string.md)) — Hostname of the server executing the query.
- `event_date` ([Date](../../sql-reference/data-types/date.md)) — Event date.
- `event_time` ([DateTime](../../sql-reference/data-types/datetime.md)) — Event time.
- `event_time_microseconds` ([DateTime64](../../sql-reference/data-types/datetime64.md)) — Event time with microseconds resolution.
@ -19,6 +20,7 @@ SELECT * FROM system.metric_log LIMIT 1 FORMAT Vertical;
``` text
Row 1:
──────
hostname: clickhouse.eu-central1.internal
event_date: 2020-09-05
event_time: 2020-09-05 16:22:33
event_time_microseconds: 2020-09-05 16:22:33.196807

View File

@ -45,6 +45,22 @@ Number of threads in the Aggregator thread pool.
Number of threads in the Aggregator thread pool running a task.
### TablesLoaderForegroundThreads
Number of threads in the async loader foreground thread pool.
### TablesLoaderForegroundThreadsActive
Number of threads in the async loader foreground thread pool running a task.
### TablesLoaderBackgroundThreads
Number of threads in the async loader background thread pool.
### TablesLoaderBackgroundThreadsActive
Number of threads in the async loader background thread pool running a task.
### AsyncInsertCacheSize
Number of async insert hash id in cache
@ -197,14 +213,6 @@ Number of threads in the DatabaseOnDisk thread pool.
Number of threads in the DatabaseOnDisk thread pool running a task.
### DatabaseOrdinaryThreads
Number of threads in the Ordinary database thread pool.
### DatabaseOrdinaryThreadsActive
Number of threads in the Ordinary database thread pool running a task.
### DelayedInserts
Number of INSERT queries that are throttled due to high number of active data parts for partition in a MergeTree table.
@ -625,14 +633,6 @@ Number of connections that are sending data for external tables to remote server
Number of connections that are sending data for scalars to remote servers.
### StartupSystemTablesThreads
Number of threads in the StartupSystemTables thread pool.
### StartupSystemTablesThreadsActive
Number of threads in the StartupSystemTables thread pool running a task.
### StorageBufferBytes
Number of bytes in buffers of Buffer tables
@ -677,14 +677,6 @@ Number of threads in the system.replicas thread pool running a task.
Number of connections to TCP server (clients with native interface), also included server-server distributed query connections
### TablesLoaderThreads
Number of threads in the tables loader thread pool.
### TablesLoaderThreadsActive
Number of threads in the tables loader thread pool running a task.
### TablesToDropQueueSize
Number of dropped tables, that are waiting for background data removal.

View File

@ -31,3 +31,26 @@ SELECT * FROM system.numbers LIMIT 10;
10 rows in set. Elapsed: 0.001 sec.
```
You can also limit the output by predicates.
```sql
SELECT * FROM system.numbers < 10;
```
```response
┌─number─┐
│ 0 │
│ 1 │
│ 2 │
│ 3 │
│ 4 │
│ 5 │
│ 6 │
│ 7 │
│ 8 │
│ 9 │
└────────┘
10 rows in set. Elapsed: 0.001 sec.
```

View File

@ -8,28 +8,19 @@ Contains information about [trace spans](https://opentracing.io/docs/overview/sp
Columns:
- `trace_id` ([UUID](../../sql-reference/data-types/uuid.md)) — ID of the trace for executed query.
- `span_id` ([UInt64](../../sql-reference/data-types/int-uint.md)) — ID of the `trace span`.
- `parent_span_id` ([UInt64](../../sql-reference/data-types/int-uint.md)) — ID of the parent `trace span`.
- `operation_name` ([String](../../sql-reference/data-types/string.md)) — The name of the operation.
- `kind` ([Enum8](../../sql-reference/data-types/enum.md)) — The [SpanKind](https://opentelemetry.io/docs/reference/specification/trace/api/#spankind) of the span.
- `INTERNAL` — Indicates that the span represents an internal operation within an application.
- `SERVER` — Indicates that the span covers server-side handling of a synchronous RPC or other remote request.
- `CLIENT` — Indicates that the span describes a request to some remote service.
- `PRODUCER` — Indicates that the span describes the initiators of an asynchronous request. This parent span will often end before the corresponding child CONSUMER span, possibly even before the child span starts.
- `CONSUMER` - Indicates that the span describes a child of an asynchronous PRODUCER request.
- `start_time_us` ([UInt64](../../sql-reference/data-types/int-uint.md)) — The start time of the `trace span` (in microseconds).
- `finish_time_us` ([UInt64](../../sql-reference/data-types/int-uint.md)) — The finish time of the `trace span` (in microseconds).
- `finish_date` ([Date](../../sql-reference/data-types/date.md)) — The finish date of the `trace span`.
- `attribute.names` ([Array](../../sql-reference/data-types/array.md)([String](../../sql-reference/data-types/string.md))) — [Attribute](https://opentelemetry.io/docs/go/instrumentation/#attributes) names depending on the `trace span`. They are filled in according to the recommendations in the [OpenTelemetry](https://opentelemetry.io/) standard.
- `attribute.values` ([Array](../../sql-reference/data-types/array.md)([String](../../sql-reference/data-types/string.md))) — Attribute values depending on the `trace span`. They are filled in according to the recommendations in the `OpenTelemetry` standard.
**Example**

View File

@ -9,6 +9,7 @@ This table contains information about events that occurred with [data parts](../
The `system.part_log` table contains the following columns:
- `hostname` ([LowCardinality(String)](../../sql-reference/data-types/string.md)) — Hostname of the server executing the query.
- `query_id` ([String](../../sql-reference/data-types/string.md)) — Identifier of the `INSERT` query that created this data part.
- `event_type` ([Enum8](../../sql-reference/data-types/enum.md)) — Type of the event that occurred with the data part. Can have one of the following values:
- `NewPart` — Inserting of a new data part.
@ -56,13 +57,14 @@ SELECT * FROM system.part_log LIMIT 1 FORMAT Vertical;
``` text
Row 1:
──────
hostname: clickhouse.eu-central1.internal
query_id: 983ad9c7-28d5-4ae1-844e-603116b7de31
event_type: NewPart
merge_reason: NotAMerge
merge_algorithm: Undecided
event_date: 2021-02-02
event_time: 2021-02-02 11:14:28
event_time_microseconds: 2021-02-02 11:14:28.861919
event_time_microseconds: 2021-02-02 11:14:28.861919
duration_ms: 35
database: default
table: log_mt_2

View File

@ -4,6 +4,7 @@ This table contains profiling on processors level (that you can find in [`EXPLAI
Columns:
- `hostname` ([LowCardinality(String)](../../sql-reference/data-types/string.md)) — Hostname of the server executing the query.
- `event_date` ([Date](../../sql-reference/data-types/date.md)) — The date when the event happened.
- `event_time` ([DateTime](../../sql-reference/data-types/datetime.md)) — The date and time when the event happened.
- `event_time_microseconds` ([DateTime64](../../sql-reference/data-types/datetime64.md)) — The date and time with microseconds precision when the event happened.

View File

@ -34,6 +34,7 @@ You can use the [log_formatted_queries](../../operations/settings/settings.md#se
Columns:
- `hostname` ([LowCardinality(String)](../../sql-reference/data-types/string.md)) — Hostname of the server executing the query.
- `type` ([Enum8](../../sql-reference/data-types/enum.md)) — Type of an event that occurred when executing the query. Values:
- `'QueryStart' = 1` — Successful start of query execution.
- `'QueryFinish' = 2` — Successful end of query execution.
@ -127,6 +128,7 @@ SELECT * FROM system.query_log WHERE type = 'QueryFinish' ORDER BY query_start_t
``` text
Row 1:
──────
hostname: clickhouse.eu-central1.internal
type: QueryFinish
event_date: 2021-11-03
event_time: 2021-11-03 16:13:54
@ -167,7 +169,7 @@ initial_query_start_time: 2021-11-03 16:13:54
initial_query_start_time_microseconds: 2021-11-03 16:13:54.952325
interface: 1
os_user: sevirov
client_hostname: clickhouse.ru-central1.internal
client_hostname: clickhouse.eu-central1.internal
client_name: ClickHouse
client_revision: 54449
client_version_major: 21

View File

@ -18,6 +18,7 @@ You can use the [log_queries_probability](../../operations/settings/settings.md#
Columns:
- `hostname` ([LowCardinality(String)](../../sql-reference/data-types/string.md)) — Hostname of the server executing the query.
- `event_date` ([Date](../../sql-reference/data-types/date.md)) — The date when the thread has finished execution of the query.
- `event_time` ([DateTime](../../sql-reference/data-types/datetime.md)) — The date and time when the thread has finished execution of the query.
- `event_time_microsecinds` ([DateTime](../../sql-reference/data-types/datetime.md)) — The date and time when the thread has finished execution of the query with microseconds precision.
@ -74,6 +75,7 @@ Columns:
``` text
Row 1:
──────
hostname: clickhouse.eu-central1.internal
event_date: 2020-09-11
event_time: 2020-09-11 10:08:17
event_time_microseconds: 2020-09-11 10:08:17.134042

View File

@ -18,6 +18,7 @@ You can use the [log_queries_probability](../../operations/settings/settings.md#
Columns:
- `hostname` ([LowCardinality(String)](../../sql-reference/data-types/string.md)) — Hostname of the server executing the query.
- `event_date` ([Date](../../sql-reference/data-types/date.md)) — The date when the last event of the view happened.
- `event_time` ([DateTime](../../sql-reference/data-types/datetime.md)) — The date and time when the view finished execution.
- `event_time_microseconds` ([DateTime](../../sql-reference/data-types/datetime.md)) — The date and time when the view finished execution with microseconds precision.
@ -59,6 +60,7 @@ Result:
``` text
Row 1:
──────
hostname: clickhouse.eu-central1.internal
event_date: 2021-06-22
event_time: 2021-06-22 13:23:07
event_time_microseconds: 2021-06-22 13:23:07.738221

View File

@ -7,6 +7,7 @@ Contains information about all successful and failed login and logout events.
Columns:
- `hostname` ([LowCardinality(String)](../../sql-reference/data-types/string.md)) — Hostname of the server executing the query.
- `type` ([Enum8](../../sql-reference/data-types/enum.md)) — Login/logout result. Possible values:
- `LoginFailure` — Login error.
- `LoginSuccess` — Successful login.
@ -57,6 +58,7 @@ Result:
``` text
Row 1:
──────
hostname: clickhouse.eu-central1.internal
type: LoginSuccess
auth_id: 45e6bd83-b4aa-4a23-85e6-bd83b4aa1a23
session_id:

View File

@ -7,6 +7,7 @@ Contains logging entries. The logging level which goes to this table can be limi
Columns:
- `hostname` ([LowCardinality(String)](../../sql-reference/data-types/string.md)) — Hostname of the server executing the query.
- `event_date` (Date) — Date of the entry.
- `event_time` (DateTime) — Time of the entry.
- `event_time_microseconds` (DateTime) — Time of the entry with microseconds precision.
@ -39,6 +40,7 @@ SELECT * FROM system.text_log LIMIT 1 \G
``` text
Row 1:
──────
hostname: clickhouse.eu-central1.internal
event_date: 2020-09-10
event_time: 2020-09-10 11:23:07
event_time_microseconds: 2020-09-10 11:23:07.871397

View File

@ -12,37 +12,27 @@ To analyze logs, use the `addressToLine`, `addressToLineWithInlines`, `addressTo
Columns:
- `hostname` ([LowCardinality(String)](../../sql-reference/data-types/string.md)) — Hostname of the server executing the query.
- `event_date` ([Date](../../sql-reference/data-types/date.md)) — Date of sampling moment.
- `event_time` ([DateTime](../../sql-reference/data-types/datetime.md)) — Timestamp of the sampling moment.
- `event_time_microseconds` ([DateTime64](../../sql-reference/data-types/datetime64.md)) — Timestamp of the sampling moment with microseconds precision.
- `timestamp_ns` ([UInt64](../../sql-reference/data-types/int-uint.md)) — Timestamp of the sampling moment in nanoseconds.
- `revision` ([UInt32](../../sql-reference/data-types/int-uint.md)) — ClickHouse server build revision.
When connecting to the server by `clickhouse-client`, you see the string similar to `Connected to ClickHouse server version 19.18.1 revision 54429.`. This field contains the `revision`, but not the `version` of a server.
When connecting to the server by `clickhouse-client`, you see the string similar to `Connected to ClickHouse server version 19.18.1.`. This field contains the `revision`, but not the `version` of a server.
- `trace_type` ([Enum8](../../sql-reference/data-types/enum.md)) — Trace type:
- `Real` represents collecting stack traces by wall-clock time.
- `CPU` represents collecting stack traces by CPU time.
- `Memory` represents collecting allocations and deallocations when memory allocation exceeds the subsequent watermark.
- `MemorySample` represents collecting random allocations and deallocations.
- `MemoryPeak` represents collecting updates of peak memory usage.
- `ProfileEvent` represents collecting of increments of profile events.
- `thread_id` ([UInt64](../../sql-reference/data-types/int-uint.md)) — Thread identifier.
- `query_id` ([String](../../sql-reference/data-types/string.md)) — Query identifier that can be used to get details about a query that was running from the [query_log](#system_tables-query_log) system table.
- `trace` ([Array(UInt64)](../../sql-reference/data-types/array.md)) — Stack trace at the moment of sampling. Each element is a virtual memory address inside ClickHouse server process.
- `size` ([Int64](../../sql-reference/data-types/int-uint.md)) - For trace types `Memory`, `MemorySample` or `MemoryPeak` is the amount of memory allocated, for other trace types is 0.
- `event` ([LowCardinality(String)](../../sql-reference/data-types/lowcardinality.md)) - For trace type `ProfileEvent` is the name of updated profile event, for other trace types is an empty string.
- `increment` ([UInt64](../../sql-reference/data-types/int-uint.md)) - For trace type `ProfileEvent` is the amount of increment of profile event, for other trace types is 0.
**Example**
@ -54,6 +44,7 @@ SELECT * FROM system.trace_log LIMIT 1 \G
``` text
Row 1:
──────
hostname: clickhouse.eu-central1.internal
event_date: 2020-09-10
event_time: 2020-09-10 11:23:09
event_time_microseconds: 2020-09-10 11:23:09.872924

View File

@ -9,6 +9,7 @@ For requests, only columns with request parameters are filled in, and the remain
Columns with request parameters:
- `hostname` ([LowCardinality(String)](../../sql-reference/data-types/string.md)) — Hostname of the server executing the query.
- `type` ([Enum](../../sql-reference/data-types/enum.md)) — Event type in the ZooKeeper client. Can have one of the following values:
- `Request` — The request has been sent.
- `Response` — The response was received.
@ -63,6 +64,7 @@ Result:
``` text
Row 1:
──────
hostname: clickhouse.eu-central1.internal
type: Request
event_date: 2021-08-09
event_time: 2021-08-09 21:38:30.291792

View File

@ -487,24 +487,23 @@ Where:
## uniqUpTo(N)(x)
Calculates the number of different argument values if it is less than or equal to N. If the number of different argument values is greater than N, it returns N + 1.
Calculates the number of different values of the argument up to a specified limit, `N`. If the number of different argument values is greater than `N`, this function returns `N` + 1, otherwise it calculates the exact value.
Recommended for use with small Ns, up to 10. The maximum value of N is 100.
Recommended for use with small `N`s, up to 10. The maximum value of `N` is 100.
For the state of an aggregate function, it uses the amount of memory equal to 1 + N \* the size of one value of bytes.
For strings, it stores a non-cryptographic hash of 8 bytes. That is, the calculation is approximated for strings.
For the state of an aggregate function, this function uses the amount of memory equal to 1 + `N` \* the size of one value of bytes.
When dealing with strings, this function stores a non-cryptographic hash of 8 bytes; the calculation is approximated for strings.
The function also works for several arguments.
For example, if you had a table that logs every search query made by users on your website. Each row in the table represents a single search query, with columns for the user ID, the search query, and the timestamp of the query. You can use `uniqUpTo` to generate a report that shows only the keywords that produced at least 5 unique users.
It works as fast as possible, except for cases when a large N value is used and the number of unique values is slightly less than N.
Usage example:
``` text
Problem: Generate a report that shows only keywords that produced at least 5 unique users.
Solution: Write in the GROUP BY query SearchPhrase HAVING uniqUpTo(4)(UserID) >= 5
```sql
SELECT SearchPhrase
FROM SearchLog
GROUP BY SearchPhrase
HAVING uniqUpTo(4)(UserID) >= 5
```
`uniqUpTo(4)(UserID)` calculates the number of unique `UserID` values for each `SearchPhrase`, but it only counts up to 4 unique values. If there are more than 4 unique `UserID` values for a `SearchPhrase`, the function returns 5 (4 + 1). The `HAVING` clause then filters out the `SearchPhrase` values for which the number of unique `UserID` values is less than 5. This will give you a list of search keywords that were used by at least 5 unique users.
## sumMapFiltered(keys_to_keep)(keys, values)

View File

@ -5,7 +5,12 @@ sidebar_position: 6
# any
Selects the first encountered (non-NULL) value, unless all rows have NULL values in that column.
Selects the first encountered value of a column.
By default, it ignores NULL values and returns the first NOT NULL value found in the column. As [`first_value`](../../../sql-reference/aggregate-functions/reference/first_value.md) if supports `RESPECT NULLS`, in which case it will select the first value passed, independently on whether it's NULL or not.
The return type of the function is the same as the input, except for LowCardinality which is discarded). This means that given no rows as input it will return the default value of that type (0 for integers, or Null for a Nullable() column). You might use the `-OrNull` [combinator](../../../sql-reference/aggregate-functions/combinators.md) ) to modify this behaviour.
The query can be executed in any order and even in a different order each time, so the result of this function is indeterminate.
To get a determinate result, you can use the min or max function instead of any.
@ -13,4 +18,4 @@ In some cases, you can rely on the order of execution. This applies to cases whe
When a `SELECT` query has the `GROUP BY` clause or at least one aggregate function, ClickHouse (in contrast to MySQL) requires that all expressions in the `SELECT`, `HAVING`, and `ORDER BY` clauses be calculated from keys or from aggregate functions. In other words, each column selected from the table must be used either in keys or inside aggregate functions. To get behavior like in MySQL, you can put the other columns in the `any` aggregate function.
- Alias: `any_value`
- Alias: `any_value`, `first_value`.

View File

@ -5,9 +5,12 @@ sidebar_position: 7
# first_value
Selects the first encountered value, similar to `any`, but could accept NULL.
Mostly it should be used with [Window Functions](../../window-functions/index.md).
Without Window Functions the result will be random if the source stream is not ordered.
It is an alias for [`any`](../../../sql-reference/aggregate-functions/reference/any.md) but it was introduced for compatibility with [Window Functions](../../window-functions/index.md), where sometimes it's necessary to process `NULL` values (by default all ClickHouse aggregate functions ignore NULL values).
It supports declaring a modifier to respect nulls (`RESPECT NULLS`), both under [Window Functions](../../window-functions/index.md) and in normal aggregations.
As with `any`, without Window Functions the result will be random if the source stream is not ordered and the return type
matches the input type (Null is only returned if the input is Nullable or -OrNull combinator is added).
## examples
@ -23,15 +26,15 @@ INSERT INTO test_data (a, b) Values (1,null), (2,3), (4, 5), (6,null);
```
### example1
The NULL value is ignored at default.
By default, the NULL value is ignored.
```sql
select first_value(b) from test_data;
```
```text
┌─first_value_ignore_nulls(b)─┐
3 │
└─────────────────────────────
┌─any(b)─┐
│ 3 │
└────────┘
```
### example2
@ -41,9 +44,9 @@ select first_value(b) ignore nulls from test_data
```
```text
┌─first_value_ignore_nulls(b)─┐
3 │
└─────────────────────────────
┌─any(b) IGNORE NULLS ─┐
│ 3 │
└──────────────────────┘
```
### example3
@ -53,9 +56,9 @@ select first_value(b) respect nulls from test_data
```
```text
┌─first_value_respect_nulls(b)─┐
ᴺᵁᴸᴸ │
└──────────────────────────────
┌─any(b) RESPECT NULLS ─┐
│ ᴺᵁᴸᴸ │
└───────────────────────┘
```
### example4
@ -73,8 +76,8 @@ FROM
```
```text
┌─first_value_respect_nulls(b)─┬─first_value(b)─┐
ᴺᵁᴸᴸ │ 3 │
└──────────────────────────────────────────────┘
┌─any_respect_nulls(b)─┬─any(b)─┐
│ ᴺᵁᴸᴸ │ 3 │
└──────────────────────┴────────┘
```

View File

@ -1083,7 +1083,7 @@ Result:
**See also**
- [arrayFold](#arrayFold)
- [arrayFold](#arrayfold)
## arrayReduceInRanges
@ -1175,7 +1175,7 @@ FROM numbers(1,10);
**See also**
- [arrayReduce](#arrayReduce)
- [arrayReduce](#arrayreduce)
## arrayReverse(arr)

View File

@ -67,45 +67,7 @@ WHERE macro = 'test';
│ test │ Value │
└───────┴──────────────┘
```
## getClientHTTPHeader
Returns the value of specified http header.If there is no such header or the request method is not http, it will throw an exception.
**Syntax**
```sql
getClientHTTPHeader(name);
```
**Arguments**
- `name` — HTTP header name .[String](../../sql-reference/data-types/string.md#string)
**Returned value**
Value of the specified header.
Type:[String](../../sql-reference/data-types/string.md#string).
When we use `clickhouse-client` to execute this function, we'll always get empty string, because client doesn't use http protocol.
```sql
SELECT getCientHTTPHeader('test')
```
result:
```text
┌─getClientHTTPHeader('test')─┐
│ │
└────────────------───────────┘
```
Try to use http request:
```shell
echo "select getClientHTTPHeader('X-Clickhouse-User')" | curl -H 'X-ClickHouse-User: default' -H 'X-ClickHouse-Key: ' 'http://localhost:8123/' -d @-
#result
default
```
## FQDN
Returns the fully qualified domain name of the ClickHouse server.

View File

@ -5,7 +5,7 @@ slug: /en/sql-reference/operators/exists
The `EXISTS` operator checks how many records are in the result of a subquery. If it is empty, then the operator returns `0`. Otherwise, it returns `1`.
`EXISTS` can be used in a [WHERE](../../sql-reference/statements/select/where.md) clause.
`EXISTS` can also be used in a [WHERE](../../sql-reference/statements/select/where.md) clause.
:::tip
References to main query tables and columns are not supported in a subquery.
@ -13,12 +13,26 @@ References to main query tables and columns are not supported in a subquery.
**Syntax**
```sql
WHERE EXISTS(subquery)
``` sql
EXISTS(subquery)
```
**Example**
Query checking existence of values in a subquery:
``` sql
SELECT EXISTS(SELECT * FROM numbers(10) WHERE number > 8), EXISTS(SELECT * FROM numbers(10) WHERE number > 11)
```
Result:
``` text
┌─in(1, _subquery1)─┬─in(1, _subquery2)─┐
│ 1 │ 0 │
└───────────────────┴───────────────────┘
```
Query with a subquery returning several rows:
``` sql

View File

@ -10,7 +10,7 @@ A set of queries that allow changing the table structure.
Syntax:
``` sql
ALTER TABLE [db].name [ON CLUSTER cluster] ADD|DROP|RENAME|CLEAR|COMMENT|{MODIFY|ALTER}|MATERIALIZE COLUMN ...
ALTER [TEMPORARY] TABLE [db].name [ON CLUSTER cluster] ADD|DROP|RENAME|CLEAR|COMMENT|{MODIFY|ALTER}|MATERIALIZE COLUMN ...
```
In the query, specify a list of one or more comma-separated actions.

View File

@ -415,7 +415,7 @@ ExpressionTransform
ExpressionTransform × 2
(SettingQuotaAndLimits)
(ReadFromStorage)
NumbersMt × 2 0 → 1
NumbersRange × 2 0 → 1
```
### EXPLAIN ESTIMATE

View File

@ -1,4 +1,4 @@
--
---
slug: /en/sql-reference/table-functions/file
sidebar_position: 60
sidebar_label: file

View File

@ -17,6 +17,8 @@ The following queries are equivalent:
SELECT * FROM numbers(10);
SELECT * FROM numbers(0, 10);
SELECT * FROM system.numbers LIMIT 10;
SELECT * FROM system.numbers WHERE number BETWEEN 0 AND 9;
SELECT * FROM system.numbers WHERE number IN (0, 1, 2, 3, 4, 5, 6, 7, 8, 9);
```
Examples:

View File

@ -14,7 +14,7 @@ ClickHouse предоставляет собственный клиент ком
$ clickhouse-client
ClickHouse client version 20.13.1.5273 (official build).
Connecting to localhost:9000 as user default.
Connected to ClickHouse server version 20.13.1 revision 54442.
Connected to ClickHouse server version 20.13.1.
:)
```

View File

@ -119,7 +119,7 @@ Eсли суммарное число активных кусков во все
- Положительное целое число.
- 0 (без ограничений).
Значение по умолчанию: 100.
Значение по умолчанию: 1000.
Команда `Insert` создает один или несколько блоков (кусков). При вставке в Replicated таблицы ClickHouse для [дедупликации вставок](../../engines/table-engines/mergetree-family/replication.md) записывает в Zookeeper хеш-суммы созданных кусков. Но хранятся только последние `replicated_deduplication_window` хеш-сумм. Самые старые хеш-суммы удаляются из Zookeeper.
Большое значение `replicated_deduplication_window` замедляет `Insert`, так как приходится сравнивать большее количество хеш-сумм.

View File

@ -19,7 +19,7 @@ ClickHouse создает эту таблицу когда установлен
- `revision`([UInt32](../../sql-reference/data-types/int-uint.md)) — ревизия сборки сервера ClickHouse.
Во время соединения с сервером через `clickhouse-client`, вы видите строку похожую на `Connected to ClickHouse server version 19.18.1 revision 54429.`. Это поле содержит номер после `revision`, но не содержит строку после `version`.
Во время соединения с сервером через `clickhouse-client`, вы видите строку похожую на `Connected to ClickHouse server version 19.18.1.`. Это поле содержит номер после `revision`, но не содержит строку после `version`.
- `trace_type`([Enum8](../../sql-reference/data-types/enum.md)) — тип трассировки:

View File

@ -11,7 +11,7 @@ sidebar_label: "Манипуляции со столбцами"
Синтаксис:
``` sql
ALTER TABLE [db].name [ON CLUSTER cluster] ADD|DROP|RENAME|CLEAR|COMMENT|{MODIFY|ALTER}|MATERIALIZE COLUMN ...
ALTER [TEMPORARY] TABLE [db].name [ON CLUSTER cluster] ADD|DROP|RENAME|CLEAR|COMMENT|{MODIFY|ALTER}|MATERIALIZE COLUMN ...
```
В запросе можно указать сразу несколько действий над одной таблицей через запятую.

View File

@ -371,7 +371,7 @@ ExpressionTransform
ExpressionTransform × 2
(SettingQuotaAndLimits)
(ReadFromStorage)
NumbersMt × 2 0 → 1
NumbersRange × 2 0 → 1
```
### EXPLAIN ESTIMATE {#explain-estimate}

View File

@ -14,7 +14,7 @@ ClickHouse提供了一个原生命令行客户端`clickhouse-client`客户端支
$ clickhouse-client
ClickHouse client version 19.17.1.1579 (official build).
Connecting to localhost:9000 as user default.
Connected to ClickHouse server version 19.17.1 revision 54428.
Connected to ClickHouse server version 19.17.1.
:)
```

View File

@ -22,7 +22,7 @@ ClickHouse创建此表时 [trace_log](../../operations/server-configuration-para
- `revision` ([UInt32](../../sql-reference/data-types/int-uint.md)) — ClickHouse server build revision.
通过以下方式连接到服务器 `clickhouse-client`,你看到的字符串类似于 `Connected to ClickHouse server version 19.18.1 revision 54429.`. 该字段包含 `revision`,但不是 `version` 的服务器。
通过以下方式连接到服务器 `clickhouse-client`,你看到的字符串类似于 `Connected to ClickHouse server version 19.18.1.`. 该字段包含 `revision`,但不是 `version` 的服务器。
- `timer_type` ([枚举8](../../sql-reference/data-types/enum.md)) — Timer type:

View File

@ -493,8 +493,7 @@ void Client::connect()
if (is_interactive)
{
std::cout << "Connected to " << server_name << " server version " << server_version << " revision " << server_revision << "."
<< std::endl << std::endl;
std::cout << "Connected to " << server_name << " server version " << server_version << "." << std::endl << std::endl;
auto client_version_tuple = std::make_tuple(VERSION_MAJOR, VERSION_MINOR, VERSION_PATCH);
auto server_version_tuple = std::make_tuple(server_version_major, server_version_minor, server_version_patch);

View File

@ -23,6 +23,7 @@
#include <Common/scope_guard_safe.h>
#include <Interpreters/Session.h>
#include <Access/AccessControl.h>
#include <Common/PoolId.h>
#include <Common/Exception.h>
#include <Common/Macros.h>
#include <Common/Config/ConfigProcessor.h>
@ -742,16 +743,16 @@ void LocalServer::processConfig()
status.emplace(fs::path(path) / "status", StatusFile::write_full_info);
LOG_DEBUG(log, "Loading metadata from {}", path);
loadMetadataSystem(global_context);
auto startup_system_tasks = loadMetadataSystem(global_context);
attachSystemTablesLocal(global_context, *createMemoryDatabaseIfNotExists(global_context, DatabaseCatalog::SYSTEM_DATABASE));
attachInformationSchema(global_context, *createMemoryDatabaseIfNotExists(global_context, DatabaseCatalog::INFORMATION_SCHEMA));
attachInformationSchema(global_context, *createMemoryDatabaseIfNotExists(global_context, DatabaseCatalog::INFORMATION_SCHEMA_UPPERCASE));
startupSystemTables();
waitLoad(TablesLoaderForegroundPoolId, startup_system_tasks);
if (!config().has("only-system-tables"))
{
DatabaseCatalog::instance().createBackgroundTasks();
loadMetadata(global_context);
waitLoad(loadMetadata(global_context));
DatabaseCatalog::instance().startupBackgroundTasks();
}

View File

@ -20,6 +20,7 @@
#include <base/coverage.h>
#include <base/getFQDNOrHostName.h>
#include <base/safeExit.h>
#include <Common/PoolId.h>
#include <Common/MemoryTracker.h>
#include <Common/ClickHouseRevision.h>
#include <Common/DNSResolver.h>
@ -1279,8 +1280,6 @@ try
global_context->setHTTPHeaderFilter(*config);
global_context->setMaxTableSizeToDrop(server_settings_.max_table_size_to_drop);
global_context->setClientHTTPHeaderForbiddenHeaders(server_settings_.get_client_http_header_forbidden_headers);
global_context->setAllowGetHTTPHeaderFunction(server_settings_.allow_get_client_http_header);
global_context->setMaxPartitionSizeToDrop(server_settings_.max_partition_size_to_drop);
ConcurrencyControl::SlotCount concurrent_threads_soft_limit = ConcurrencyControl::Unlimited;
@ -1336,6 +1335,10 @@ try
global_context->getMessageBrokerSchedulePool().increaseThreadsCount(server_settings_.background_message_broker_schedule_pool_size);
global_context->getDistributedSchedulePool().increaseThreadsCount(server_settings_.background_distributed_schedule_pool_size);
global_context->getAsyncLoader().setMaxThreads(TablesLoaderForegroundPoolId, server_settings_.tables_loader_foreground_pool_size);
global_context->getAsyncLoader().setMaxThreads(TablesLoaderBackgroundLoadPoolId, server_settings_.tables_loader_background_pool_size);
global_context->getAsyncLoader().setMaxThreads(TablesLoaderBackgroundStartupPoolId, server_settings_.tables_loader_background_pool_size);
getIOThreadPool().reloadConfiguration(
server_settings.max_io_thread_pool_size,
server_settings.max_io_thread_pool_free_size,
@ -1676,17 +1679,18 @@ try
LOG_INFO(log, "Loading metadata from {}", path_str);
LoadTaskPtrs load_metadata_tasks;
try
{
auto & database_catalog = DatabaseCatalog::instance();
/// We load temporary database first, because projections need it.
database_catalog.initializeAndLoadTemporaryDatabase();
loadMetadataSystem(global_context);
maybeConvertSystemDatabase(global_context);
auto system_startup_tasks = loadMetadataSystem(global_context);
maybeConvertSystemDatabase(global_context, system_startup_tasks);
/// This has to be done before the initialization of system logs,
/// otherwise there is a race condition between the system database initialization
/// and creation of new tables in the database.
startupSystemTables();
waitLoad(TablesLoaderForegroundPoolId, system_startup_tasks);
/// After attaching system databases we can initialize system log.
global_context->initializeSystemLogs();
global_context->setSystemZooKeeperLogAfterInitializationIfNeeded();
@ -1702,9 +1706,10 @@ try
/// and so loadMarkedAsDroppedTables() will find it and try to add, and UUID will overlap.
database_catalog.loadMarkedAsDroppedTables();
database_catalog.createBackgroundTasks();
/// Then, load remaining databases
loadMetadata(global_context, default_database);
convertDatabasesEnginesIfNeed(global_context);
/// Then, load remaining databases (some of them maybe be loaded asynchronously)
load_metadata_tasks = loadMetadata(global_context, default_database, server_settings.async_load_databases);
/// If we need to convert database engines, disable async tables loading
convertDatabasesEnginesIfNeed(load_metadata_tasks, global_context);
database_catalog.startupBackgroundTasks();
/// After loading validate that default database exists
database_catalog.assertDatabaseExists(default_database);
@ -1716,6 +1721,7 @@ try
tryLogCurrentException(log, "Caught exception while loading metadata");
throw;
}
LOG_DEBUG(log, "Loaded metadata.");
/// Init trace collector only after trace_log system table was created
@ -1871,9 +1877,14 @@ try
throw Exception(ErrorCodes::ARGUMENT_OUT_OF_BOUND, "distributed_ddl.pool_size should be greater then 0");
global_context->setDDLWorker(std::make_unique<DDLWorker>(pool_size, ddl_zookeeper_path, global_context, &config(),
"distributed_ddl", "DDLWorker",
&CurrentMetrics::MaxDDLEntryID, &CurrentMetrics::MaxPushedDDLEntryID));
&CurrentMetrics::MaxDDLEntryID, &CurrentMetrics::MaxPushedDDLEntryID),
load_metadata_tasks);
}
/// Do not keep tasks in server, they should be kept inside databases. Used here to make dependent tasks only.
load_metadata_tasks.clear();
load_metadata_tasks.shrink_to_fit();
{
std::lock_guard lock(servers_lock);
for (auto & server : servers)

View File

@ -364,8 +364,15 @@
<background_schedule_pool_size>128</background_schedule_pool_size>
<background_message_broker_schedule_pool_size>16</background_message_broker_schedule_pool_size>
<background_distributed_schedule_pool_size>16</background_distributed_schedule_pool_size>
<tables_loader_foreground_pool_size>0</tables_loader_foreground_pool_size>
<tables_loader_background_pool_size>0</tables_loader_background_pool_size>
-->
<!-- Enables asynchronous loading of databases and tables to speedup server startup.
Queries to not yet loaded entity will be blocked until load is finished.
-->
<!-- <async_load_databases>true</async_load_databases> -->
<!-- On memory constrained environments you may have to set this to value larger than 1.
-->
<max_server_memory_usage_to_ram_ratio>0.9</max_server_memory_usage_to_ram_ratio>

View File

@ -108,7 +108,7 @@
filter: blur(1px);
}
.chart div { position: absolute; }
.chart > div { position: absolute; }
.inputs {
height: auto;
@ -215,8 +215,6 @@
color: var(--text-color);
}
.u-legend th { display: none; }
.themes {
float: right;
font-size: 20pt;
@ -433,6 +431,16 @@
display: none;
}
.u-series {
line-height: 0.8;
}
.u-series.footer {
font-size: 8px;
padding-top: 0;
margin-top: 0;
}
/* Source: https://cdn.jsdelivr.net/npm/uplot@1.6.21/dist/uPlot.min.css
* It is copy-pasted to lower the number of requests.
*/
@ -478,7 +486,6 @@
* - compress the state for URL's #hash;
* - footer with "about" or a link to source code;
* - allow to configure a table on a server to save the dashboards;
* - multiple lines on chart;
* - if a query returned one value, display this value instead of a diagram;
* - if a query returned something unusual, display the table;
*/
@ -520,10 +527,54 @@ let queries = [];
/// Query parameters with predefined default values.
/// All other parameters will be automatically found in the queries.
let params = {
"rounding": "60",
"seconds": "86400"
'rounding': '60',
'seconds': '86400'
};
/// Palette generation for charts
function generatePalette(baseColor, numColors) {
const baseHSL = hexToHsl(baseColor);
const hueStep = 360 / numColors;
const palette = [];
for (let i = 0; i < numColors; i++) {
const hue = Math.round((baseHSL.h + i * hueStep) % 360);
const color = `hsl(${hue}, ${baseHSL.s}%, ${baseHSL.l}%)`;
palette.push(color);
}
return palette;
}
/// Helper function to convert hex color to HSL
function hexToHsl(hex) {
hex = hex.replace(/^#/, '');
const bigint = parseInt(hex, 16);
const r = (bigint >> 16) & 255;
const g = (bigint >> 8) & 255;
const b = bigint & 255;
const r_norm = r / 255;
const g_norm = g / 255;
const b_norm = b / 255;
const max = Math.max(r_norm, g_norm, b_norm);
const min = Math.min(r_norm, g_norm, b_norm);
const l = (max + min) / 2;
let s = 0;
if (max !== min) {
s = l > 0.5 ? (max - min) / (2 - max - min) : (max - min) / (max + min);
}
let h = 0;
if (max !== min) {
if (max === r_norm) {
h = (g_norm - b_norm) / (max - min) + (g_norm < b_norm ? 6 : 0);
} else if (max === g_norm) {
h = (b_norm - r_norm) / (max - min) + 2;
} else {
h = (r_norm - g_norm) / (max - min) + 4;
}
}
h = Math.round(h * 60);
return { h, s: Math.round(s * 100), l: Math.round(l * 100) };
}
let theme = 'light';
function setTheme(new_theme) {
@ -913,6 +964,8 @@ document.getElementById('mass-editor-textarea').addEventListener('input', e => {
function legendAsTooltipPlugin({ className, style = { background: "var(--legend-background)" } } = {}) {
let legendEl;
let showTop = false;
const showLimit = 5;
function init(u, opts) {
legendEl = u.root.querySelector(".u-legend");
@ -932,13 +985,28 @@ function legendAsTooltipPlugin({ className, style = { background: "var(--legend-
...style
});
// hide series color markers
const idents = legendEl.querySelectorAll(".u-marker");
if (opts.series.length == 2) {
const nodes = legendEl.querySelectorAll("th");
for (let i = 0; i < nodes.length; i++)
nodes[i].style.display = "none";
} else {
legendEl.querySelector("th").remove();
legendEl.querySelector("td").setAttribute('colspan', '2');
legendEl.querySelector("td").style.textAlign = 'center';
}
for (let i = 0; i < idents.length; i++)
idents[i].style.display = "none";
if (opts.series.length - 1 > showLimit) {
showTop = true;
let footer = legendEl.insertRow().insertCell();
footer.setAttribute('colspan', '2');
footer.style.textAlign = 'center';
footer.classList.add('u-value');
footer.parentNode.classList.add('u-series','footer');
footer.textContent = ". . .";
}
const overEl = u.over;
overEl.style.overflow = "visible";
overEl.appendChild(legendEl);
@ -946,11 +1014,28 @@ function legendAsTooltipPlugin({ className, style = { background: "var(--legend-
overEl.addEventListener("mouseleave", () => {legendEl.style.display = "none";});
}
function nodeListToArray(nodeList) {
return Array.prototype.slice.call(nodeList);
}
function update(u) {
let { left, top } = u.cursor;
left -= legendEl.clientWidth / 2;
top -= legendEl.clientHeight / 2;
legendEl.style.transform = "translate(" + left + "px, " + top + "px)";
if (showTop) {
let nodes = nodeListToArray(legendEl.querySelectorAll("tr"));
let header = nodes.shift();
let footer = nodes.pop();
nodes.forEach(function (node) { node._sort_key = +node.querySelector("td").textContent; });
nodes.sort((a, b) => +b._sort_key - +a._sort_key);
nodes.forEach(function (node) { node.parentNode.appendChild(node); });
for (let i = 0; i < nodes.length; i++) {
nodes[i].style.display = i < showLimit ? null : "none";
delete nodes[i]._sort_key;
}
footer.parentNode.appendChild(footer);
}
}
return {
@ -961,12 +1046,13 @@ function legendAsTooltipPlugin({ className, style = { background: "var(--legend-
};
}
async function doFetch(query, url_params = '') {
host = document.getElementById('url').value || host;
user = document.getElementById('user').value;
password = document.getElementById('password').value;
let url = `${host}?default_format=JSONCompactColumns&enable_http_compression=1`
let url = `${host}?default_format=JSONColumnsWithMetadata&enable_http_compression=1`
if (add_http_cors_header) {
// For debug purposes, you may set add_http_cors_header from a browser console
@ -980,14 +1066,17 @@ async function doFetch(query, url_params = '') {
url += `&password=${encodeURIComponent(password)}`;
}
let response, data, error;
let response, reply, error;
try {
response = await fetch(url + url_params, { method: "POST", body: query });
data = await response.text();
reply = await response.text();
if (response.ok) {
data = JSON.parse(data);
reply = JSON.parse(reply);
if (reply.exception) {
error = reply.exception;
}
} else {
error = data;
error = reply;
}
} catch (e) {
console.log(e);
@ -1006,7 +1095,7 @@ async function doFetch(query, url_params = '') {
}
}
return {data, error};
return {reply, error};
}
async function draw(idx, chart, url_params, query) {
@ -1015,17 +1104,76 @@ async function draw(idx, chart, url_params, query) {
plots[idx] = null;
}
let {data, error} = await doFetch(query, url_params);
let {reply, error} = await doFetch(query, url_params);
if (!error) {
if (reply.rows.length == 0) {
error = "Query returned empty result.";
} else if (reply.meta.length < 2) {
error = "Query should return at least two columns: unix timestamp and value.";
} else {
for (let i = 0; i < reply.meta.length; i++) {
let label = reply.meta[i].name;
let column = reply.data[label];
if (!Array.isArray(column) || column.length != reply.data[reply.meta[0].name].length) {
error = "Wrong data format of the query.";
break;
}
}
}
}
// Transform string-labeled data to multi-column data
function transformToColumns() {
const x = reply.meta[0].name; // time; must be ordered
const l = reply.meta[1].name; // string label column to distinguish series; must be ordered
const y = reply.meta[2].name; // values; must have single value for (x, l) pair
const labels = [...new Set(reply.data[l])].sort((a, b) => a - b);
if (labels.includes('__time__')) {
error = "The second column is not allowed to contain '__time__' values.";
return;
}
const times = [...new Set(reply.data[x])].sort((a, b) => a - b);
let new_meta = [{ name: '__time__', type: reply.meta[0].type }];
let new_data = { __time__: [] };
for (let label of labels) {
new_meta.push({ name: label, type: reply.meta[2].type });
new_data[label] = [];
}
let new_rows = 0;
function row_done(row_time) {
new_rows++;
new_data.__time__.push(row_time);
for (let label of labels) {
if (new_data[label].length < new_rows) {
new_data[label].push(null);
}
}
}
let prev_time = reply.data[x][0];
const old_rows = reply.data[x].length;
for (let i = 0; i < old_rows; i++) {
const time = reply.data[x][i];
const label = reply.data[l][i];
const value = reply.data[y][i];
if (prev_time != time) {
row_done(prev_time);
prev_time = time;
}
new_data[label].push(value);
}
row_done(prev_time);
reply.meta = new_meta;
reply.data = new_data;
reply.rows = new_rows;
}
function isStringColumn(type) {
return type === 'String' || type === 'LowCardinality(String)';
}
if (!error) {
if (!Array.isArray(data)) {
error = "Query should return an array.";
} else if (data.length == 0) {
error = "Query returned empty result.";
} else if (data.length != 2) {
error = "Query should return exactly two columns: unix timestamp and value.";
} else if (!Array.isArray(data[0]) || !Array.isArray(data[1]) || data[0].length != data[1].length) {
error = "Wrong data format of the query.";
if (reply.meta.length == 3 && isStringColumn(reply.meta[1].type)) {
transformToColumns();
}
}
@ -1043,24 +1191,38 @@ async function draw(idx, chart, url_params, query) {
}
const [line_color, fill_color, grid_color, axes_color] = theme != 'dark'
? ["#F88", "#FEE", "#EED", "#2c3235"]
: ["#864", "#045", "#2c3235", "#c7d0d9"];
? ["#ff8888", "#ffeeee", "#eeeedd", "#2c3235"]
: ["#886644", "#004455", "#2c3235", "#c7d0d9"];
let sync = uPlot.sync("sync");
const max_value = Math.max(...data[1]);
let axis = {
stroke: axes_color,
grid: { width: 1 / devicePixelRatio, stroke: grid_color },
ticks: { width: 1 / devicePixelRatio, stroke: grid_color }
};
let axes = [axis, axis];
let series = [{ label: "x" }];
let data = [reply.data[reply.meta[0].name]];
// Treat every column as series
const series_count = reply.meta.length;
const fill = series_count == 2 ? fill_color : undefined;
const palette = generatePalette(line_color, series_count);
let max_value = Number.NEGATIVE_INFINITY;
for (let i = 1; i < series_count; i++) {
let label = reply.meta[i].name;
series.push({ label, stroke: palette[i - 1], fill });
data.push(reply.data[label]);
max_value = Math.max(max_value, ...reply.data[label]);
}
const opts = {
width: chart.clientWidth,
height: chart.clientHeight,
axes: [ { stroke: axes_color,
grid: { width: 1 / devicePixelRatio, stroke: grid_color },
ticks: { width: 1 / devicePixelRatio, stroke: grid_color } },
{ stroke: axes_color,
grid: { width: 1 / devicePixelRatio, stroke: grid_color },
ticks: { width: 1 / devicePixelRatio, stroke: grid_color } } ],
series: [ { label: "x" },
{ label: "y", stroke: line_color, fill: fill_color } ],
axes,
series,
padding: [ null, null, null, (Math.round(max_value * 100) / 100).toString().length * 6 - 10 ],
plugins: [ legendAsTooltipPlugin() ],
cursor: {
@ -1216,22 +1378,21 @@ function saveState() {
}
async function searchQueries() {
let {data, error} = await doFetch(search_query);
let {reply, error} = await doFetch(search_query);
if (error) {
throw new Error(error);
}
if (!Array.isArray(data)) {
throw new Error("Search query should return an array.");
} else if (data.length == 0) {
let data = reply.data;
if (reply.rows == 0) {
throw new Error("Search query returned empty result.");
} else if (data.length != 2) {
} else if (reply.meta.length != 2 || reply.meta[0].name != "title" || reply.meta[1].name != "query") {
throw new Error("Search query should return exactly two columns: title and query.");
} else if (!Array.isArray(data[0]) || !Array.isArray(data[1]) || data[0].length != data[1].length) {
} else if (!Array.isArray(data.title) || !Array.isArray(data.query) || data.title.length != data.query.length) {
throw new Error("Wrong data format of the search query.");
}
for (let i = 0; i < data[0].length; i++) {
queries.push({title: data[0][i], query: data[1][i]});
for (let i = 0; i < data.title.length; i++) {
queries.push({title: data.title[i], query: data.query[i]});
}
regenerate();

View File

@ -1,26 +1,213 @@
#include <AggregateFunctions/AggregateFunctionFactory.h>
#include <AggregateFunctions/HelpersMinMaxAny.h>
#include <IO/ReadHelpers.h>
#include <IO/WriteHelpers.h>
#include <base/defines.h>
namespace DB
{
struct Settings;
namespace ErrorCodes
{
extern const int INCORRECT_DATA;
extern const int LOGICAL_ERROR;
}
namespace
{
struct AggregateFunctionAnyRespectNullsData
{
enum Status : UInt8
{
NotSet = 1,
SetNull = 2,
SetOther = 3
};
Status status = Status::NotSet;
Field value;
bool isSet() const { return status != Status::NotSet; }
void setNull() { status = Status::SetNull; }
void setOther() { status = Status::SetOther; }
};
template <bool First>
class AggregateFunctionAnyRespectNulls final
: public IAggregateFunctionDataHelper<AggregateFunctionAnyRespectNullsData, AggregateFunctionAnyRespectNulls<First>>
{
public:
using Data = AggregateFunctionAnyRespectNullsData;
SerializationPtr serialization;
const bool returns_nullable_type = false;
explicit AggregateFunctionAnyRespectNulls(const DataTypePtr & type)
: IAggregateFunctionDataHelper<Data, AggregateFunctionAnyRespectNulls<First>>({type}, {}, type)
, serialization(type->getDefaultSerialization())
, returns_nullable_type(type->isNullable())
{
}
String getName() const override
{
if constexpr (First)
return "any_respect_nulls";
else
return "anyLast_respect_nulls";
}
bool allocatesMemoryInArena() const override { return false; }
void addNull(AggregateDataPtr __restrict place) const
{
chassert(returns_nullable_type);
auto & d = this->data(place);
if (First && d.isSet())
return;
d.setNull();
}
void add(AggregateDataPtr __restrict place, const IColumn ** columns, size_t row_num, Arena *) const override
{
if (columns[0]->isNullable())
{
if (columns[0]->isNullAt(row_num))
return addNull(place);
}
auto & d = this->data(place);
if (First && d.isSet())
return;
d.setOther();
columns[0]->get(row_num, d.value);
}
void addManyDefaults(AggregateDataPtr __restrict place, const IColumn ** columns, size_t, Arena * arena) const override
{
if (columns[0]->isNullable())
addNull(place);
else
add(place, columns, 0, arena);
}
void addBatchSinglePlace(
size_t row_begin, size_t row_end, AggregateDataPtr place, const IColumn ** columns, Arena * arena, ssize_t if_argument_pos)
const override
{
if (if_argument_pos >= 0)
{
const auto & flags = assert_cast<const ColumnUInt8 &>(*columns[if_argument_pos]).getData();
size_t size = row_end - row_begin;
for (size_t i = 0; i < size; ++i)
{
size_t pos = First ? row_begin + i : row_end - 1 - i;
if (flags[pos])
{
add(place, columns, pos, arena);
break;
}
}
}
else
{
size_t pos = First ? row_begin : row_end - 1;
add(place, columns, pos, arena);
}
}
void addBatchSinglePlaceNotNull(
size_t, size_t, AggregateDataPtr __restrict, const IColumn **, const UInt8 *, Arena *, ssize_t) const override
{
/// This should not happen since it means somebody else has preprocessed the data (NULLs or IFs) and might
/// have discarded values that we need (NULLs)
throw DB::Exception(ErrorCodes::LOGICAL_ERROR, "AggregateFunctionAnyRespectNulls::addBatchSinglePlaceNotNull called");
}
void merge(AggregateDataPtr __restrict place, ConstAggregateDataPtr rhs, Arena *) const override
{
auto & d = this->data(place);
if (First && d.isSet())
return;
auto & other = this->data(rhs);
if (other.isSet())
{
d.status = other.status;
d.value = other.value;
}
}
void serialize(ConstAggregateDataPtr __restrict place, WriteBuffer & buf, std::optional<size_t> /* version */) const override
{
auto & d = this->data(place);
UInt8 k = d.status;
writeBinaryLittleEndian<UInt8>(k, buf);
if (k == Data::Status::SetOther)
serialization->serializeBinary(d.value, buf, {});
}
void deserialize(AggregateDataPtr place, ReadBuffer & buf, std::optional<size_t> /* version */, Arena *) const override
{
auto & d = this->data(place);
UInt8 k = Data::Status::NotSet;
readBinaryLittleEndian<UInt8>(k, buf);
d.status = static_cast<Data::Status>(k);
if (d.status == Data::Status::NotSet)
return;
else if (d.status == Data::Status::SetNull)
{
if (!returns_nullable_type)
throw Exception(ErrorCodes::INCORRECT_DATA, "Incorrect type (NULL) in non-nullable {}State", getName());
return;
}
else if (d.status == Data::Status::SetOther)
serialization->deserializeBinary(d.value, buf, {});
else
throw Exception(ErrorCodes::INCORRECT_DATA, "Incorrect type ({}) in {}State", static_cast<Int8>(k), getName());
}
void insertResultInto(AggregateDataPtr __restrict place, IColumn & to, Arena *) const override
{
auto & d = this->data(place);
if (d.status == Data::Status::SetOther)
to.insert(d.value);
else
to.insertDefault();
}
AggregateFunctionPtr getOwnNullAdapter(
const AggregateFunctionPtr & original_function,
const DataTypes & /*arguments*/,
const Array & /*params*/,
const AggregateFunctionProperties & /*properties*/) const override
{
return original_function;
}
};
template <bool First>
IAggregateFunction * createAggregateFunctionSingleValueRespectNulls(
const String & name, const DataTypes & argument_types, const Array & parameters, const Settings *)
{
assertNoParameters(name, parameters);
assertUnary(name, argument_types);
return new AggregateFunctionAnyRespectNulls<First>(argument_types[0]);
}
AggregateFunctionPtr createAggregateFunctionAny(const std::string & name, const DataTypes & argument_types, const Array & parameters, const Settings * settings)
{
return AggregateFunctionPtr(createAggregateFunctionSingleValue<AggregateFunctionsSingleValue, AggregateFunctionAnyData>(name, argument_types, parameters, settings));
}
template <bool RespectNulls = false>
AggregateFunctionPtr createAggregateFunctionNullableAny(
AggregateFunctionPtr createAggregateFunctionAnyRespectNulls(
const std::string & name, const DataTypes & argument_types, const Array & parameters, const Settings * settings)
{
return AggregateFunctionPtr(
createAggregateFunctionSingleNullableValue<AggregateFunctionsSingleValue, AggregateFunctionAnyData, RespectNulls>(
name, argument_types, parameters, settings));
return AggregateFunctionPtr(createAggregateFunctionSingleValueRespectNulls<true>(name, argument_types, parameters, settings));
}
AggregateFunctionPtr createAggregateFunctionAnyLast(const std::string & name, const DataTypes & argument_types, const Array & parameters, const Settings * settings)
@ -28,13 +215,10 @@ AggregateFunctionPtr createAggregateFunctionAnyLast(const std::string & name, co
return AggregateFunctionPtr(createAggregateFunctionSingleValue<AggregateFunctionsSingleValue, AggregateFunctionAnyLastData>(name, argument_types, parameters, settings));
}
template <bool RespectNulls = false>
AggregateFunctionPtr createAggregateFunctionNullableAnyLast(const std::string & name, const DataTypes & argument_types, const Array & parameters, const Settings * settings)
AggregateFunctionPtr createAggregateFunctionAnyLastRespectNulls(
const std::string & name, const DataTypes & argument_types, const Array & parameters, const Settings * settings)
{
return AggregateFunctionPtr(createAggregateFunctionSingleNullableValue<
AggregateFunctionsSingleValue,
AggregateFunctionAnyLastData,
RespectNulls>(name, argument_types, parameters, settings));
return AggregateFunctionPtr(createAggregateFunctionSingleValueRespectNulls<false>(name, argument_types, parameters, settings));
}
AggregateFunctionPtr createAggregateFunctionAnyHeavy(const std::string & name, const DataTypes & argument_types, const Array & parameters, const Settings * settings)
@ -46,26 +230,28 @@ AggregateFunctionPtr createAggregateFunctionAnyHeavy(const std::string & name, c
void registerAggregateFunctionsAny(AggregateFunctionFactory & factory)
{
AggregateFunctionProperties properties = { .returns_default_when_only_null = false, .is_order_dependent = true };
AggregateFunctionProperties default_properties = {.returns_default_when_only_null = false, .is_order_dependent = true};
AggregateFunctionProperties default_properties_for_respect_nulls
= {.returns_default_when_only_null = false, .is_order_dependent = true, .is_window_function = true};
factory.registerFunction("any", { createAggregateFunctionAny, properties });
factory.registerFunction("any", {createAggregateFunctionAny, default_properties});
factory.registerAlias("any_value", "any", AggregateFunctionFactory::CaseInsensitive);
factory.registerFunction("anyLast", { createAggregateFunctionAnyLast, properties });
factory.registerFunction("anyHeavy", { createAggregateFunctionAnyHeavy, properties });
factory.registerAlias("first_value", "any", AggregateFunctionFactory::CaseInsensitive);
// Synonyms for use as window functions.
factory.registerFunction("first_value",
{ createAggregateFunctionAny, properties },
AggregateFunctionFactory::CaseInsensitive);
factory.registerFunction("first_value_respect_nulls",
{ createAggregateFunctionNullableAny<true>, properties },
AggregateFunctionFactory::CaseInsensitive);
factory.registerFunction("last_value",
{ createAggregateFunctionAnyLast, properties },
AggregateFunctionFactory::CaseInsensitive);
factory.registerFunction("last_value_respect_nulls",
{ createAggregateFunctionNullableAnyLast<true>, properties },
AggregateFunctionFactory::CaseInsensitive);
factory.registerFunction("any_respect_nulls", {createAggregateFunctionAnyRespectNulls, default_properties_for_respect_nulls});
factory.registerAlias("any_value_respect_nulls", "any_respect_nulls", AggregateFunctionFactory::CaseInsensitive);
factory.registerAlias("first_value_respect_nulls", "any_respect_nulls", AggregateFunctionFactory::CaseInsensitive);
factory.registerFunction("anyLast", {createAggregateFunctionAnyLast, default_properties});
factory.registerAlias("last_value", "anyLast", AggregateFunctionFactory::CaseInsensitive);
factory.registerFunction("anyLast_respect_nulls", {createAggregateFunctionAnyLastRespectNulls, default_properties_for_respect_nulls});
factory.registerAlias("last_value_respect_nulls", "anyLast_respect_nulls", AggregateFunctionFactory::CaseInsensitive);
factory.registerFunction("anyHeavy", {createAggregateFunctionAnyHeavy, default_properties});
factory.registerNullsActionTransformation("any", "any_respect_nulls");
factory.registerNullsActionTransformation("anyLast", "anyLast_respect_nulls");
}
}

View File

@ -116,7 +116,7 @@ public:
/// Return normalized state type: count()
AggregateFunctionProperties properties;
return std::make_shared<DataTypeAggregateFunction>(
AggregateFunctionFactory::instance().get(getName(), {}, {}, properties), DataTypes{}, Array{});
AggregateFunctionFactory::instance().get(getName(), NullsAction::EMPTY, {}, {}, properties), DataTypes{}, Array{});
}
void merge(AggregateDataPtr __restrict place, ConstAggregateDataPtr rhs, Arena *) const override
@ -267,7 +267,7 @@ public:
/// Return normalized state type: count()
AggregateFunctionProperties properties;
return std::make_shared<DataTypeAggregateFunction>(
AggregateFunctionFactory::instance().get(getName(), {}, {}, properties), DataTypes{}, Array{});
AggregateFunctionFactory::instance().get(getName(), NullsAction::EMPTY, {}, {}, properties), DataTypes{}, Array{});
}
void merge(AggregateDataPtr __restrict place, ConstAggregateDataPtr rhs, Arena *) const override

View File

@ -1,23 +1,11 @@
#include <AggregateFunctions/AggregateFunctionFactory.h>
#include <AggregateFunctions/Combinators/AggregateFunctionCombinatorFactory.h>
#include <DataTypes/DataTypeAggregateFunction.h>
#include <DataTypes/DataTypeNullable.h>
#include <DataTypes/DataTypesNumber.h>
#include <DataTypes/DataTypeLowCardinality.h>
#include <IO/WriteHelpers.h>
#include <Interpreters/Context.h>
#include <Common/StringUtils/StringUtils.h>
#include <Common/typeid_cast.h>
#include <Common/CurrentThread.h>
#include <Poco/String.h>
#include <DataTypes/DataTypesNumber.h>
#include <Functions/FunctionFactory.h>
#include <IO/WriteHelpers.h>
#include <Interpreters/Context.h>
static constexpr size_t MAX_AGGREGATE_FUNCTION_NAME_LENGTH = 1000;
@ -28,10 +16,11 @@ struct Settings;
namespace ErrorCodes
{
extern const int UNKNOWN_AGGREGATE_FUNCTION;
extern const int LOGICAL_ERROR;
extern const int ILLEGAL_AGGREGATION;
extern const int LOGICAL_ERROR;
extern const int NOT_IMPLEMENTED;
extern const int TOO_LARGE_STRING_SIZE;
extern const int UNKNOWN_AGGREGATE_FUNCTION;
}
const String & getAggregateFunctionCanonicalNameIfAny(const String & name)
@ -59,6 +48,23 @@ void AggregateFunctionFactory::registerFunction(const String & name, Value creat
}
}
void AggregateFunctionFactory::registerNullsActionTransformation(const String & source_ignores_nulls, const String & target_respect_nulls)
{
if (!aggregate_functions.contains(source_ignores_nulls))
throw Exception(ErrorCodes::LOGICAL_ERROR, "registerNullsActionTransformation: Source aggregation '{}' not found");
if (!aggregate_functions.contains(target_respect_nulls))
throw Exception(ErrorCodes::LOGICAL_ERROR, "registerNullsActionTransformation: Target aggregation '{}' not found");
if (!respect_nulls.emplace(source_ignores_nulls, target_respect_nulls).second)
throw Exception(
ErrorCodes::LOGICAL_ERROR, "registerNullsActionTransformation: Assignment from '{}' is not unique", source_ignores_nulls);
if (!ignore_nulls.emplace(target_respect_nulls, source_ignores_nulls).second)
throw Exception(
ErrorCodes::LOGICAL_ERROR, "registerNullsActionTransformation: Assignment from '{}' is not unique", target_respect_nulls);
}
static DataTypes convertLowCardinalityTypesToNested(const DataTypes & types)
{
DataTypes res_types;
@ -70,7 +76,11 @@ static DataTypes convertLowCardinalityTypesToNested(const DataTypes & types)
}
AggregateFunctionPtr AggregateFunctionFactory::get(
const String & name, const DataTypes & argument_types, const Array & parameters, AggregateFunctionProperties & out_properties) const
const String & name,
NullsAction action,
const DataTypes & argument_types,
const Array & parameters,
AggregateFunctionProperties & out_properties) const
{
/// This to prevent costly string manipulation in parsing the aggregate function combinators.
/// Example: avgArrayArrayArrayArray...(1000 times)...Array
@ -81,8 +91,9 @@ AggregateFunctionPtr AggregateFunctionFactory::get(
/// If one of the types is Nullable, we apply aggregate function combinator "Null" if it's not window function.
/// Window functions are not real aggregate functions. Applying combinators doesn't make sense for them,
/// they must handle the nullability themselves
auto properties = tryGetProperties(name);
/// they must handle the nullability themselves.
/// Aggregate functions such as any_value_respect_nulls are considered window functions in that sense
auto properties = tryGetProperties(name, action);
bool is_window_function = properties.has_value() && properties->is_window_function;
if (!is_window_function && std::any_of(types_without_low_cardinality.begin(), types_without_low_cardinality.end(),
[](const auto & type) { return type->isNullable(); }))
@ -98,8 +109,7 @@ AggregateFunctionPtr AggregateFunctionFactory::get(
bool has_null_arguments = std::any_of(types_without_low_cardinality.begin(), types_without_low_cardinality.end(),
[](const auto & type) { return type->onlyNull(); });
AggregateFunctionPtr nested_function = getImpl(
name, nested_types, nested_parameters, out_properties, has_null_arguments);
AggregateFunctionPtr nested_function = getImpl(name, action, nested_types, nested_parameters, out_properties, has_null_arguments);
// Pure window functions are not real aggregate functions. Applying
// combinators doesn't make sense for them, they must handle the
@ -110,22 +120,54 @@ AggregateFunctionPtr AggregateFunctionFactory::get(
return combinator->transformAggregateFunction(nested_function, out_properties, types_without_low_cardinality, parameters);
}
auto with_original_arguments = getImpl(name, types_without_low_cardinality, parameters, out_properties, false);
auto with_original_arguments = getImpl(name, action, types_without_low_cardinality, parameters, out_properties, false);
if (!with_original_arguments)
throw Exception(ErrorCodes::LOGICAL_ERROR, "Logical error: AggregateFunctionFactory returned nullptr");
return with_original_arguments;
}
std::optional<AggregateFunctionWithProperties>
AggregateFunctionFactory::getAssociatedFunctionByNullsAction(const String & name, NullsAction action) const
{
if (action == NullsAction::RESPECT_NULLS)
{
if (auto it = respect_nulls.find(name); it == respect_nulls.end())
throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Function {} does not support RESPECT NULLS", name);
else if (auto associated_it = aggregate_functions.find(it->second); associated_it != aggregate_functions.end())
return {associated_it->second};
else
throw Exception(
ErrorCodes::LOGICAL_ERROR, "Unable to find the function {} (equivalent to '{} RESPECT NULLS')", it->second, name);
}
if (action == NullsAction::IGNORE_NULLS)
{
if (auto it = ignore_nulls.find(name); it != ignore_nulls.end())
{
if (auto associated_it = aggregate_functions.find(it->second); associated_it != aggregate_functions.end())
return {associated_it->second};
else
throw Exception(
ErrorCodes::LOGICAL_ERROR, "Unable to find the function {} (equivalent to '{} IGNORE NULLS')", it->second, name);
}
/// We don't throw for IGNORE NULLS of other functions because that's the default in CH
}
return {};
}
AggregateFunctionPtr AggregateFunctionFactory::getImpl(
const String & name_param,
NullsAction action,
const DataTypes & argument_types,
const Array & parameters,
AggregateFunctionProperties & out_properties,
bool has_null_arguments) const
{
String name = getAliasToOrName(name_param);
String case_insensitive_name;
bool is_case_insensitive = false;
Value found;
@ -135,10 +177,14 @@ AggregateFunctionPtr AggregateFunctionFactory::getImpl(
found = it->second;
}
if (auto jt = case_insensitive_aggregate_functions.find(Poco::toLower(name)); jt != case_insensitive_aggregate_functions.end())
if (!found.creator)
{
found = jt->second;
is_case_insensitive = true;
case_insensitive_name = Poco::toLower(name);
if (auto jt = case_insensitive_aggregate_functions.find(case_insensitive_name); jt != case_insensitive_aggregate_functions.end())
{
found = jt->second;
is_case_insensitive = true;
}
}
ContextPtr query_context;
@ -147,11 +193,14 @@ AggregateFunctionPtr AggregateFunctionFactory::getImpl(
if (found.creator)
{
out_properties = found.properties;
auto opt = getAssociatedFunctionByNullsAction(is_case_insensitive ? case_insensitive_name : name, action);
if (opt)
found = *opt;
out_properties = found.properties;
if (query_context && query_context->getSettingsRef().log_queries)
query_context->addQueryFactoriesInfo(
Context::QueryLogFactories::AggregateFunction, is_case_insensitive ? Poco::toLower(name) : name);
Context::QueryLogFactories::AggregateFunction, is_case_insensitive ? case_insensitive_name : name);
/// The case when aggregate function should return NULL on NULL arguments. This case is handled in "get" method.
if (!out_properties.returns_default_when_only_null && has_null_arguments)
@ -196,7 +245,7 @@ AggregateFunctionPtr AggregateFunctionFactory::getImpl(
DataTypes nested_types = combinator->transformArguments(argument_types);
Array nested_parameters = combinator->transformParameters(parameters);
AggregateFunctionPtr nested_function = get(nested_name, nested_types, nested_parameters, out_properties);
AggregateFunctionPtr nested_function = get(nested_name, action, nested_types, nested_parameters, out_properties);
return combinator->transformAggregateFunction(nested_function, out_properties, argument_types, parameters);
}
@ -213,16 +262,7 @@ AggregateFunctionPtr AggregateFunctionFactory::getImpl(
throw Exception(ErrorCodes::UNKNOWN_AGGREGATE_FUNCTION, "Unknown aggregate function {}{}", name, extra_info);
}
AggregateFunctionPtr AggregateFunctionFactory::tryGet(
const String & name, const DataTypes & argument_types, const Array & parameters, AggregateFunctionProperties & out_properties) const
{
return isAggregateFunctionName(name)
? get(name, argument_types, parameters, out_properties)
: nullptr;
}
std::optional<AggregateFunctionProperties> AggregateFunctionFactory::tryGetProperties(String name) const
std::optional<AggregateFunctionProperties> AggregateFunctionFactory::tryGetProperties(String name, NullsAction action) const
{
if (name.size() > MAX_AGGREGATE_FUNCTION_NAME_LENGTH)
throw Exception(ErrorCodes::TOO_LARGE_STRING_SIZE, "Too long name of aggregate function, maximum: {}", MAX_AGGREGATE_FUNCTION_NAME_LENGTH);
@ -231,6 +271,8 @@ std::optional<AggregateFunctionProperties> AggregateFunctionFactory::tryGetPrope
{
name = getAliasToOrName(name);
Value found;
String lower_case_name;
bool is_case_insensitive = false;
/// Find by exact match.
if (auto it = aggregate_functions.find(name); it != aggregate_functions.end())
@ -238,11 +280,23 @@ std::optional<AggregateFunctionProperties> AggregateFunctionFactory::tryGetPrope
found = it->second;
}
if (auto jt = case_insensitive_aggregate_functions.find(Poco::toLower(name)); jt != case_insensitive_aggregate_functions.end())
found = jt->second;
if (!found.creator)
{
lower_case_name = Poco::toLower(name);
if (auto jt = case_insensitive_aggregate_functions.find(lower_case_name); jt != case_insensitive_aggregate_functions.end())
{
is_case_insensitive = true;
found = jt->second;
}
}
if (found.creator)
{
auto opt = getAssociatedFunctionByNullsAction(is_case_insensitive ? lower_case_name : name, action);
if (opt)
return opt->properties;
return found.properties;
}
/// Combinators of aggregate functions.
/// For every aggregate function 'agg' and combiner '-Comb' there is a combined aggregate function with the name 'aggComb',
@ -262,27 +316,29 @@ std::optional<AggregateFunctionProperties> AggregateFunctionFactory::tryGetPrope
}
bool AggregateFunctionFactory::isAggregateFunctionName(String name) const
bool AggregateFunctionFactory::isAggregateFunctionName(const String & name_) const
{
if (name.size() > MAX_AGGREGATE_FUNCTION_NAME_LENGTH)
if (name_.size() > MAX_AGGREGATE_FUNCTION_NAME_LENGTH)
throw Exception(ErrorCodes::TOO_LARGE_STRING_SIZE, "Too long name of aggregate function, maximum: {}", MAX_AGGREGATE_FUNCTION_NAME_LENGTH);
while (true)
if (aggregate_functions.contains(name_) || isAlias(name_))
return true;
String name_lowercase = Poco::toLower(name_);
if (case_insensitive_aggregate_functions.contains(name_lowercase) || isAlias(name_lowercase))
return true;
String name = name_;
while (AggregateFunctionCombinatorPtr combinator = AggregateFunctionCombinatorFactory::instance().tryFindSuffix(name))
{
if (aggregate_functions.contains(name) || isAlias(name))
return true;
name = name.substr(0, name.size() - combinator->getName().size());
name_lowercase = name_lowercase.substr(0, name_lowercase.size() - combinator->getName().size());
String name_lowercase = Poco::toLower(name);
if (case_insensitive_aggregate_functions.contains(name_lowercase) || isAlias(name_lowercase))
if (aggregate_functions.contains(name) || isAlias(name) || case_insensitive_aggregate_functions.contains(name_lowercase)
|| isAlias(name_lowercase))
return true;
if (AggregateFunctionCombinatorPtr combinator = AggregateFunctionCombinatorFactory::instance().tryFindSuffix(name))
{
name = name.substr(0, name.size() - combinator->getName().size());
}
else
return false;
}
return false;
}
AggregateFunctionFactory & AggregateFunctionFactory::instance()

View File

@ -1,9 +1,9 @@
#pragma once
#include <AggregateFunctions/IAggregateFunction.h>
#include <Common/IFactoryWithAliases.h>
#include <Parsers/ASTFunction.h>
#include <Parsers/NullsAction.h>
#include <Common/IFactoryWithAliases.h>
#include <functional>
#include <memory>
@ -62,36 +62,44 @@ public:
Value creator,
CaseSensitiveness case_sensitiveness = CaseSensitive);
/// Register how to transform from one aggregate function to other based on NullsAction
/// Registers them both ways:
/// SOURCE + RESPECT NULLS will be transformed to TARGET
/// TARGET + IGNORE NULLS will be transformed to SOURCE
void registerNullsActionTransformation(const String & source_ignores_nulls, const String & target_respect_nulls);
/// Throws an exception if not found.
AggregateFunctionPtr
get(const String & name,
const DataTypes & argument_types,
const Array & parameters,
AggregateFunctionProperties & out_properties) const;
/// Returns nullptr if not found.
AggregateFunctionPtr tryGet(
const String & name,
NullsAction action,
const DataTypes & argument_types,
const Array & parameters,
AggregateFunctionProperties & out_properties) const;
/// Get properties if the aggregate function exists.
std::optional<AggregateFunctionProperties> tryGetProperties(String name) const;
std::optional<AggregateFunctionProperties> tryGetProperties(String name, NullsAction action) const;
bool isAggregateFunctionName(String name) const;
bool isAggregateFunctionName(const String & name) const;
private:
AggregateFunctionPtr getImpl(
const String & name,
NullsAction action,
const DataTypes & argument_types,
const Array & parameters,
AggregateFunctionProperties & out_properties,
bool has_null_arguments) const;
using AggregateFunctions = std::unordered_map<String, Value>;
using ActionMap = std::unordered_map<String, String>;
AggregateFunctions aggregate_functions;
/// Mapping from functions with `RESPECT NULLS` modifier to actual aggregate function names
/// Example: `any(x) RESPECT NULLS` should be executed as function `any_respect_nulls`
ActionMap respect_nulls;
/// Same as above for `IGNORE NULLS` modifier
ActionMap ignore_nulls;
std::optional<AggregateFunctionWithProperties> getAssociatedFunctionByNullsAction(const String & name, NullsAction action) const;
/// Case insensitive aggregate functions will be additionally added here with lowercased name.
AggregateFunctions case_insensitive_aggregate_functions;

View File

@ -771,26 +771,18 @@ static_assert(
/// For any other value types.
template <bool RESULT_IS_NULLABLE = false>
struct SingleValueDataGeneric
{
private:
using Self = SingleValueDataGeneric;
Field value;
bool has_value = false;
public:
static constexpr bool result_is_nullable = RESULT_IS_NULLABLE;
static constexpr bool should_skip_null_arguments = !RESULT_IS_NULLABLE;
static constexpr bool result_is_nullable = false;
static constexpr bool should_skip_null_arguments = true;
static constexpr bool is_any = false;
bool has() const
{
if constexpr (result_is_nullable)
return has_value;
return !value.isNull();
}
bool has() const { return !value.isNull(); }
void insertResultInto(IColumn & to) const
{
@ -820,19 +812,9 @@ public:
serialization.deserializeBinary(value, buf, {});
}
void change(const IColumn & column, size_t row_num, Arena *)
{
column.get(row_num, value);
if constexpr (result_is_nullable)
has_value = true;
}
void change(const IColumn & column, size_t row_num, Arena *) { column.get(row_num, value); }
void change(const Self & to, Arena *)
{
value = to.value;
if constexpr (result_is_nullable)
has_value = true;
}
void change(const Self & to, Arena *) { value = to.value; }
bool changeFirstTime(const IColumn & column, size_t row_num, Arena * arena)
{
@ -847,7 +829,7 @@ public:
bool changeFirstTime(const Self & to, Arena * arena)
{
if (!has() && (result_is_nullable || to.has()))
if (!has() && to.has())
{
change(to, arena);
return true;
@ -882,30 +864,15 @@ public:
}
else
{
if constexpr (result_is_nullable)
Field new_value;
column.get(row_num, new_value);
if (new_value < value)
{
Field new_value;
column.get(row_num, new_value);
if (!value.isNull() && (new_value.isNull() || new_value < value))
{
value = new_value;
return true;
}
else
return false;
value = new_value;
return true;
}
else
{
Field new_value;
column.get(row_num, new_value);
if (new_value < value)
{
value = new_value;
return true;
}
else
return false;
}
return false;
}
}
@ -913,30 +880,13 @@ public:
{
if (!to.has())
return false;
if constexpr (result_is_nullable)
if (!has() || to.value < value)
{
if (!has())
{
change(to, arena);
return true;
}
if (to.value.isNull() || (!value.isNull() && to.value < value))
{
value = to.value;
return true;
}
return false;
change(to, arena);
return true;
}
else
{
if (!has() || to.value < value)
{
change(to, arena);
return true;
}
else
return false;
}
return false;
}
bool changeIfGreater(const IColumn & column, size_t row_num, Arena * arena)
@ -948,29 +898,15 @@ public:
}
else
{
if constexpr (result_is_nullable)
Field new_value;
column.get(row_num, new_value);
if (new_value > value)
{
Field new_value;
column.get(row_num, new_value);
if (!value.isNull() && (new_value.isNull() || value < new_value))
{
value = new_value;
return true;
}
return false;
value = new_value;
return true;
}
else
{
Field new_value;
column.get(row_num, new_value);
if (new_value > value)
{
value = new_value;
return true;
}
else
return false;
}
return false;
}
}
@ -978,36 +914,18 @@ public:
{
if (!to.has())
return false;
if constexpr (result_is_nullable)
if (!has() || to.value > value)
{
if (!value.isNull() && (to.value.isNull() || value < to.value))
{
value = to.value;
return true;
}
return false;
change(to, arena);
return true;
}
else
{
if (!has() || to.value > value)
{
change(to, arena);
return true;
}
else
return false;
}
return false;
}
bool isEqualTo(const IColumn & column, size_t row_num) const
{
return has() && value == column[row_num];
}
bool isEqualTo(const IColumn & column, size_t row_num) const { return has() && value == column[row_num]; }
bool isEqualTo(const Self & to) const
{
return has() && to.value == value;
}
bool isEqualTo(const Self & to) const { return has() && to.value == value; }
static bool allocatesMemoryInArena()
{

View File

@ -150,7 +150,7 @@ public:
AggregateFunctionProperties properties;
return std::make_shared<DataTypeAggregateFunction>(
AggregateFunctionFactory::instance().get(
GatherFunctionQuantileData::toFusedNameOrSelf(getName()), this->argument_types, params, properties),
GatherFunctionQuantileData::toFusedNameOrSelf(getName()), NullsAction::EMPTY, this->argument_types, params, properties),
this->argument_types,
params);
}

View File

@ -20,7 +20,7 @@ template <template <typename> class Data>
class AggregateFunctionCombinatorArgMinMax final : public IAggregateFunctionCombinator
{
public:
String getName() const override { return Data<SingleValueDataGeneric<>>::name(); }
String getName() const override { return Data<SingleValueDataGeneric>::name(); }
DataTypes transformArguments(const DataTypes & arguments) const override
{
@ -66,7 +66,7 @@ public:
if (which.idx == TypeIndex::String)
return std::make_shared<AggregateFunctionArgMinMax<Data<SingleValueDataString>>>(nested_function, arguments, params);
return std::make_shared<AggregateFunctionArgMinMax<Data<SingleValueDataGeneric<>>>>(nested_function, arguments, params);
return std::make_shared<AggregateFunctionArgMinMax<Data<SingleValueDataGeneric>>>(nested_function, arguments, params);
}
};

View File

@ -33,6 +33,8 @@ class AggregateFunctionIf final : public IAggregateFunctionHelper<AggregateFunct
private:
AggregateFunctionPtr nested_func;
size_t num_arguments;
/// We accept Nullable(Nothing) as condition, but callees always expect UInt8 so we need to avoid calling them
bool only_null_condition = false;
public:
AggregateFunctionIf(AggregateFunctionPtr nested, const DataTypes & types, const Array & params_)
@ -42,7 +44,9 @@ public:
if (num_arguments == 0)
throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, "Aggregate function {} require at least one argument", getName());
if (!isUInt8(types.back()) && !types.back()->onlyNull())
only_null_condition = types.back()->onlyNull();
if (!isUInt8(types.back()) && !only_null_condition)
throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Last argument for aggregate function {} must be UInt8", getName());
}
@ -108,6 +112,8 @@ public:
void add(AggregateDataPtr __restrict place, const IColumn ** columns, size_t row_num, Arena * arena) const override
{
if (only_null_condition)
return;
if (assert_cast<const ColumnUInt8 &>(*columns[num_arguments - 1]).getData()[row_num])
nested_func->add(place, columns, row_num, arena);
}
@ -121,6 +127,8 @@ public:
Arena * arena,
ssize_t) const override
{
if (only_null_condition)
return;
nested_func->addBatch(row_begin, row_end, places, place_offset, columns, arena, num_arguments - 1);
}
@ -132,6 +140,8 @@ public:
Arena * arena,
ssize_t) const override
{
if (only_null_condition)
return;
nested_func->addBatchSinglePlace(row_begin, row_end, place, columns, arena, num_arguments - 1);
}
@ -144,6 +154,8 @@ public:
Arena * arena,
ssize_t) const override
{
if (only_null_condition)
return;
nested_func->addBatchSinglePlaceNotNull(row_begin, row_end, place, columns, null_map, arena, num_arguments - 1);
}

View File

@ -447,7 +447,8 @@ public:
{
AggregateFunctionProperties out_properties;
auto & aggr_func_factory = AggregateFunctionFactory::instance();
return aggr_func_factory.get(nested_func_name + "MappedArrays", arguments, params, out_properties);
auto action = NullsAction::EMPTY;
return aggr_func_factory.get(nested_func_name + "MappedArrays", action, arguments, params, out_properties);
}
else
throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Aggregation '{}Map' is not implemented for mapped arrays",

View File

@ -35,8 +35,8 @@ public:
auto storage_type_out = DataTypeFactory::instance().get(nested_->getResultType()->getName());
// Need to make a new function with promoted argument types because SimpleAggregates requires arg_type = return_type.
AggregateFunctionProperties properties;
auto function
= AggregateFunctionFactory::instance().get(nested_->getName(), {storage_type_out}, nested_->getParameters(), properties);
auto function = AggregateFunctionFactory::instance().get(
nested_->getName(), NullsAction::EMPTY, {storage_type_out}, nested_->getParameters(), properties);
// Need to make a clone because it'll be customized.
auto storage_type_arg = DataTypeFactory::instance().get(nested_->getResultType()->getName());

View File

@ -14,8 +14,9 @@ namespace DB
struct Settings;
/// min, max, any, anyLast, anyHeavy, etc...
template <template <typename> class AggregateFunctionTemplate, template <typename> class Data>
static IAggregateFunction * createAggregateFunctionSingleValue(const String & name, const DataTypes & argument_types, const Array & parameters, const Settings *)
template <template <typename> class AggregateFunctionTemplate, template <typename, bool...> class Data>
static IAggregateFunction *
createAggregateFunctionSingleValue(const String & name, const DataTypes & argument_types, const Array & parameters, const Settings *)
{
assertNoParameters(name, parameters);
assertUnary(name, argument_types);
@ -44,31 +45,9 @@ static IAggregateFunction * createAggregateFunctionSingleValue(const String & na
if (which.idx == TypeIndex::String)
return new AggregateFunctionTemplate<Data<SingleValueDataString>>(argument_type);
return new AggregateFunctionTemplate<Data<SingleValueDataGeneric<>>>(argument_type);
return new AggregateFunctionTemplate<Data<SingleValueDataGeneric>>(argument_type);
}
template <template <typename> class AggregateFunctionTemplate, template <typename> class Data, bool RespectNulls = false>
static IAggregateFunction * createAggregateFunctionSingleNullableValue(const String & name, const DataTypes & argument_types, const Array & parameters, const Settings * settings)
{
assertNoParameters(name, parameters);
assertUnary(name, argument_types);
const DataTypePtr & argument_type = argument_types[0];
WhichDataType which(argument_type);
// If the result value could be null (excluding the case that no row is matched),
// use SingleValueDataGeneric.
if constexpr (!RespectNulls)
{
return createAggregateFunctionSingleValue<AggregateFunctionTemplate, Data>(name, argument_types, Array(), settings);
}
else
{
return new AggregateFunctionTemplate<Data<SingleValueDataGeneric<true>>>(argument_type);
}
UNREACHABLE();
}
/// argMin, argMax
template <template <typename> class MinMaxData, typename ResData>
static IAggregateFunction * createAggregateFunctionArgMinMaxSecond(const DataTypePtr & res_type, const DataTypePtr & val_type)
@ -98,7 +77,7 @@ static IAggregateFunction * createAggregateFunctionArgMinMaxSecond(const DataTyp
if (which.idx == TypeIndex::String)
return new AggregateFunctionArgMinMax<AggregateFunctionArgMinMaxData<ResData, MinMaxData<SingleValueDataString>>>(res_type, val_type);
return new AggregateFunctionArgMinMax<AggregateFunctionArgMinMaxData<ResData, MinMaxData<SingleValueDataGeneric<>>>>(res_type, val_type);
return new AggregateFunctionArgMinMax<AggregateFunctionArgMinMaxData<ResData, MinMaxData<SingleValueDataGeneric>>>(res_type, val_type);
}
template <template <typename> class MinMaxData>
@ -134,7 +113,7 @@ static IAggregateFunction * createAggregateFunctionArgMinMax(const String & name
if (which.idx == TypeIndex::String)
return createAggregateFunctionArgMinMaxSecond<MinMaxData, SingleValueDataString>(res_type, val_type);
return createAggregateFunctionArgMinMaxSecond<MinMaxData, SingleValueDataGeneric<>>(res_type, val_type);
return createAggregateFunctionArgMinMaxSecond<MinMaxData, SingleValueDataGeneric>(res_type, val_type);
}
}

View File

@ -113,6 +113,11 @@ void FunctionNode::dumpTreeImpl(WriteBuffer & buffer, FormatState & format_state
buffer << ", function_type: " << function_type;
if (nulls_action == NullsAction::RESPECT_NULLS)
buffer << ", nulls_action : RESPECT_NULLS";
else if (nulls_action == NullsAction::IGNORE_NULLS)
buffer << ", nulls_action : IGNORE_NULLS";
if (function)
buffer << ", result_type: " + getResultType()->getName();
@ -140,10 +145,9 @@ void FunctionNode::dumpTreeImpl(WriteBuffer & buffer, FormatState & format_state
bool FunctionNode::isEqualImpl(const IQueryTreeNode & rhs) const
{
const auto & rhs_typed = assert_cast<const FunctionNode &>(rhs);
if (function_name != rhs_typed.function_name ||
isAggregateFunction() != rhs_typed.isAggregateFunction() ||
isOrdinaryFunction() != rhs_typed.isOrdinaryFunction() ||
isWindowFunction() != rhs_typed.isWindowFunction())
if (function_name != rhs_typed.function_name || isAggregateFunction() != rhs_typed.isAggregateFunction()
|| isOrdinaryFunction() != rhs_typed.isOrdinaryFunction() || isWindowFunction() != rhs_typed.isWindowFunction()
|| nulls_action != rhs_typed.nulls_action)
return false;
if (isResolved() != rhs_typed.isResolved())
@ -171,6 +175,7 @@ void FunctionNode::updateTreeHashImpl(HashState & hash_state) const
hash_state.update(isOrdinaryFunction());
hash_state.update(isAggregateFunction());
hash_state.update(isWindowFunction());
hash_state.update(nulls_action);
if (!isResolved())
return;
@ -192,6 +197,7 @@ QueryTreeNodePtr FunctionNode::cloneImpl() const
*/
result_function->function = function;
result_function->kind = kind;
result_function->nulls_action = nulls_action;
result_function->wrap_with_nullable = wrap_with_nullable;
return result_function;
@ -202,6 +208,7 @@ ASTPtr FunctionNode::toASTImpl(const ConvertToASTOptions & options) const
auto function_ast = std::make_shared<ASTFunction>();
function_ast->name = function_name;
function_ast->nulls_action = nulls_action;
if (function_name == "nothing")
{

View File

@ -5,11 +5,12 @@
#include <Analyzer/ConstantValue.h>
#include <Analyzer/IQueryTreeNode.h>
#include <Analyzer/ListNode.h>
#include <Common/typeid_cast.h>
#include <Core/ColumnsWithTypeAndName.h>
#include <Core/IResolvedFunction.h>
#include <DataTypes/DataTypeNullable.h>
#include <Functions/IFunction.h>
#include <Parsers/NullsAction.h>
#include <Common/typeid_cast.h>
namespace DB
{
@ -63,6 +64,10 @@ public:
/// Get function name
const String & getFunctionName() const { return function_name; }
/// Get NullAction modifier
NullsAction getNullsAction() const { return nulls_action; }
void setNullsAction(NullsAction action) { nulls_action = action; }
/// Get parameters
const ListNode & getParameters() const { return children[parameters_child_index]->as<const ListNode &>(); }
@ -214,6 +219,7 @@ protected:
private:
String function_name;
FunctionKind kind = FunctionKind::UNKNOWN;
NullsAction nulls_action = NullsAction::EMPTY;
IResolvedFunctionPtr function;
bool wrap_with_nullable = false;

View File

@ -184,10 +184,9 @@ private:
auto function_aggregate_function = function_node.getAggregateFunction();
AggregateFunctionProperties properties;
auto aggregate_function = AggregateFunctionFactory::instance().get(aggregate_function_name,
{ argument->getResultType() },
function_aggregate_function->getParameters(),
properties);
auto action = NullsAction::EMPTY;
auto aggregate_function = AggregateFunctionFactory::instance().get(
aggregate_function_name, action, {argument->getResultType()}, function_aggregate_function->getParameters(), properties);
function_node.resolveAsAggregateFunction(std::move(aggregate_function));
}

View File

@ -76,7 +76,8 @@ public:
/// Replace `countDistinct` of initial query into `count`
auto result_type = function_node->getResultType();
AggregateFunctionProperties properties;
auto aggregate_function = AggregateFunctionFactory::instance().get("count", {}, {}, properties);
auto action = NullsAction::EMPTY;
auto aggregate_function = AggregateFunctionFactory::instance().get("count", action, {}, {}, properties);
function_node->resolveAsAggregateFunction(std::move(aggregate_function));
function_node->getArguments().getNodes().clear();
}

View File

@ -78,9 +78,11 @@ QueryTreeNodePtr createResolvedFunction(const ContextPtr & context, const String
return function_node;
}
FunctionNodePtr createResolvedAggregateFunction(const String & name, const QueryTreeNodePtr & argument, const Array & parameters = {})
FunctionNodePtr createResolvedAggregateFunction(
const String & name, const QueryTreeNodePtr & argument, const Array & parameters = {}, NullsAction action = NullsAction::EMPTY)
{
auto function_node = std::make_shared<FunctionNode>(name);
function_node->setNullsAction(action);
if (!parameters.empty())
{
@ -92,11 +94,7 @@ FunctionNodePtr createResolvedAggregateFunction(const String & name, const Query
function_node->getArguments().getNodes() = { argument };
AggregateFunctionProperties properties;
auto aggregate_function = AggregateFunctionFactory::instance().get(
name,
{ argument->getResultType() },
parameters,
properties);
auto aggregate_function = AggregateFunctionFactory::instance().get(name, action, {argument->getResultType()}, parameters, properties);
function_node->resolveAsAggregateFunction(std::move(aggregate_function));
return function_node;

View File

@ -56,7 +56,7 @@ private:
static inline void resolveAsCountAggregateFunction(FunctionNode & function_node)
{
AggregateFunctionProperties properties;
auto aggregate_function = AggregateFunctionFactory::instance().get("count", {}, {}, properties);
auto aggregate_function = AggregateFunctionFactory::instance().get("count", NullsAction::EMPTY, {}, {}, properties);
function_node.resolveAsAggregateFunction(std::move(aggregate_function));
}

View File

@ -118,6 +118,7 @@ namespace ErrorCodes
extern const int ILLEGAL_COLUMN;
extern const int NUMBER_OF_COLUMNS_DOESNT_MATCH;
extern const int FUNCTION_CANNOT_HAVE_PARAMETERS;
extern const int SYNTAX_ERROR;
}
/** Query analyzer implementation overview. Please check documentation in QueryAnalysisPass.h first.
@ -1208,7 +1209,8 @@ private:
static void expandGroupByAll(QueryNode & query_tree_node_typed);
static std::string rewriteAggregateFunctionNameIfNeeded(const std::string & aggregate_function_name, const ContextPtr & context);
static std::string
rewriteAggregateFunctionNameIfNeeded(const std::string & aggregate_function_name, NullsAction action, const ContextPtr & context);
static std::optional<JoinTableSide> getColumnSideFromJoinTree(const QueryTreeNodePtr & resolved_identifier, const JoinNode & join_node)
{
@ -2310,7 +2312,8 @@ void QueryAnalyzer::expandGroupByAll(QueryNode & query_tree_node_typed)
recursivelyCollectMaxOrdinaryExpressions(node, group_by_nodes);
}
std::string QueryAnalyzer::rewriteAggregateFunctionNameIfNeeded(const std::string & aggregate_function_name, const ContextPtr & context)
std::string QueryAnalyzer::rewriteAggregateFunctionNameIfNeeded(
const std::string & aggregate_function_name, NullsAction action, const ContextPtr & context)
{
std::string result_aggregate_function_name = aggregate_function_name;
auto aggregate_function_name_lowercase = Poco::toLower(aggregate_function_name);
@ -2337,7 +2340,7 @@ std::string QueryAnalyzer::rewriteAggregateFunctionNameIfNeeded(const std::strin
bool need_add_or_null = settings.aggregate_functions_null_for_empty && !result_aggregate_function_name.ends_with("OrNull");
if (need_add_or_null)
{
auto properties = AggregateFunctionFactory::instance().tryGetProperties(result_aggregate_function_name);
auto properties = AggregateFunctionFactory::instance().tryGetProperties(result_aggregate_function_name, action);
if (!properties->returns_default_when_only_null)
result_aggregate_function_name += "OrNull";
}
@ -2349,7 +2352,7 @@ std::string QueryAnalyzer::rewriteAggregateFunctionNameIfNeeded(const std::strin
*/
if (result_aggregate_function_name.ends_with("OrNull"))
{
auto function_properies = AggregateFunctionFactory::instance().tryGetProperties(result_aggregate_function_name);
auto function_properies = AggregateFunctionFactory::instance().tryGetProperties(result_aggregate_function_name, action);
if (function_properies && !function_properies->returns_default_when_only_null)
{
size_t function_name_size = result_aggregate_function_name.size();
@ -4591,6 +4594,19 @@ ProjectionNames QueryAnalyzer::resolveLambda(const QueryTreeNodePtr & lambda_nod
return result_projection_names;
}
namespace
{
void checkFunctionNodeHasEmptyNullsAction(FunctionNode const & node)
{
if (node.getNullsAction() != NullsAction::EMPTY)
throw Exception(
ErrorCodes::SYNTAX_ERROR,
"Function with name '{}' cannot use {} NULLS",
node.getFunctionName(),
node.getNullsAction() == NullsAction::IGNORE_NULLS ? "IGNORE" : "RESPECT");
}
}
/** Resolve function node in scope.
* During function node resolve, function node can be replaced with another expression (if it match lambda or sql user defined function),
* with constant (if it allow constant folding), or with expression list. It is caller responsibility to handle such cases appropriately.
@ -4749,6 +4765,7 @@ ProjectionNames QueryAnalyzer::resolveFunction(QueryTreeNodePtr & node, Identifi
if (is_special_function_exists)
{
checkFunctionNodeHasEmptyNullsAction(*function_node_ptr);
/// Rewrite EXISTS (subquery) into 1 IN (SELECT 1 FROM (subquery) LIMIT 1).
auto & exists_subquery_argument = function_node_ptr->getArguments().getNodes().at(0);
@ -4769,6 +4786,7 @@ ProjectionNames QueryAnalyzer::resolveFunction(QueryTreeNodePtr & node, Identifi
if (is_special_function_if && !function_node_ptr->getArguments().getNodes().empty())
{
checkFunctionNodeHasEmptyNullsAction(*function_node_ptr);
/** Handle special case with constant If function, even if some of the arguments are invalid.
*
* SELECT if(hasColumnInTable('system', 'numbers', 'not_existing_column'), not_existing_column, 5) FROM system.numbers;
@ -4834,6 +4852,7 @@ ProjectionNames QueryAnalyzer::resolveFunction(QueryTreeNodePtr & node, Identifi
/// Replace right IN function argument if it is table or table function with subquery that read ordinary columns
if (is_special_function_in)
{
checkFunctionNodeHasEmptyNullsAction(function_node);
if (scope.context->getSettingsRef().transform_null_in)
{
static constexpr std::array<std::pair<std::string_view, std::string_view>, 4> in_function_to_replace_null_in_function_map =
@ -5012,6 +5031,8 @@ ProjectionNames QueryAnalyzer::resolveFunction(QueryTreeNodePtr & node, Identifi
lambda_expression_untyped->formatASTForErrorMessage(),
scope.scope_node->formatASTForErrorMessage());
checkFunctionNodeHasEmptyNullsAction(function_node);
if (!parameters.empty())
{
throw Exception(
@ -5041,6 +5062,8 @@ ProjectionNames QueryAnalyzer::resolveFunction(QueryTreeNodePtr & node, Identifi
"Function 'untuple' must have 1 argument. In scope {}",
scope.scope_node->formatASTForErrorMessage());
checkFunctionNodeHasEmptyNullsAction(function_node);
const auto & untuple_argument = function_arguments[0];
auto result_type = untuple_argument->getResultType();
const auto * tuple_data_type = typeid_cast<const DataTypeTuple *>(result_type.get());
@ -5091,6 +5114,7 @@ ProjectionNames QueryAnalyzer::resolveFunction(QueryTreeNodePtr & node, Identifi
throw Exception(ErrorCodes::TOO_MANY_ARGUMENTS_FOR_FUNCTION,
"Function GROUPING can have up to 64 arguments, but {} provided",
function_arguments_size);
checkFunctionNodeHasEmptyNullsAction(function_node);
bool force_grouping_standard_compatibility = scope.context->getSettingsRef().force_grouping_standard_compatibility;
auto grouping_function = std::make_shared<FunctionGrouping>(force_grouping_standard_compatibility);
@ -5115,10 +5139,12 @@ ProjectionNames QueryAnalyzer::resolveFunction(QueryTreeNodePtr & node, Identifi
"Window function '{}' does not support lambda arguments",
function_name);
std::string aggregate_function_name = rewriteAggregateFunctionNameIfNeeded(function_name, scope.context);
auto action = function_node_ptr->getNullsAction();
std::string aggregate_function_name = rewriteAggregateFunctionNameIfNeeded(function_name, action, scope.context);
AggregateFunctionProperties properties;
auto aggregate_function = AggregateFunctionFactory::instance().get(aggregate_function_name, argument_types, parameters, properties);
auto aggregate_function
= AggregateFunctionFactory::instance().get(aggregate_function_name, action, argument_types, parameters, properties);
function_node.resolveAsWindowFunction(std::move(aggregate_function));
@ -5142,7 +5168,11 @@ ProjectionNames QueryAnalyzer::resolveFunction(QueryTreeNodePtr & node, Identifi
is_executable_udf = false;
}
if (!function)
if (function)
{
checkFunctionNodeHasEmptyNullsAction(function_node);
}
else
{
if (!AggregateFunctionFactory::instance().isAggregateFunctionName(function_name))
{
@ -5181,10 +5211,12 @@ ProjectionNames QueryAnalyzer::resolveFunction(QueryTreeNodePtr & node, Identifi
"Aggregate function '{}' does not support lambda arguments",
function_name);
std::string aggregate_function_name = rewriteAggregateFunctionNameIfNeeded(function_name, scope.context);
auto action = function_node_ptr->getNullsAction();
std::string aggregate_function_name = rewriteAggregateFunctionNameIfNeeded(function_name, action, scope.context);
AggregateFunctionProperties properties;
auto aggregate_function = AggregateFunctionFactory::instance().get(aggregate_function_name, argument_types, parameters, properties);
auto aggregate_function
= AggregateFunctionFactory::instance().get(aggregate_function_name, action, argument_types, parameters, properties);
function_node.resolveAsAggregateFunction(std::move(aggregate_function));

View File

@ -97,6 +97,7 @@ private:
AggregateFunctionProperties properties;
auto aggregate_function = AggregateFunctionFactory::instance().get(
function_node.getFunctionName() + suffix,
function_node.getNullsAction(),
argument_types,
function_node.getAggregateFunction()->getParameters(),
properties);

View File

@ -157,10 +157,8 @@ private:
static inline void resolveAsCountIfAggregateFunction(FunctionNode & function_node, const DataTypePtr & argument_type)
{
AggregateFunctionProperties properties;
auto aggregate_function = AggregateFunctionFactory::instance().get("countIf",
{argument_type},
function_node.getAggregateFunction()->getParameters(),
properties);
auto aggregate_function = AggregateFunctionFactory::instance().get(
"countIf", NullsAction::EMPTY, {argument_type}, function_node.getAggregateFunction()->getParameters(), properties);
function_node.resolveAsAggregateFunction(std::move(aggregate_function));
}

View File

@ -76,7 +76,9 @@ public:
argument_types.emplace_back(function_node_argument->getResultType());
AggregateFunctionProperties properties;
auto aggregate_function = AggregateFunctionFactory::instance().get(function_node->getFunctionName(),
auto aggregate_function = AggregateFunctionFactory::instance().get(
function_node->getFunctionName(),
NullsAction::EMPTY,
argument_types,
function_node->getAggregateFunction()->getParameters(),
properties);

View File

@ -176,7 +176,7 @@ public:
if (match_subquery_with_distinct() || match_subquery_with_group_by())
{
AggregateFunctionProperties properties;
auto aggregate_function = AggregateFunctionFactory::instance().get("count", {}, {}, properties);
auto aggregate_function = AggregateFunctionFactory::instance().get("count", NullsAction::EMPTY, {}, {}, properties);
function_node->getArguments().getNodes().clear();
function_node->resolveAsAggregateFunction(std::move(aggregate_function));

View File

@ -607,6 +607,7 @@ QueryTreeNodePtr QueryTreeBuilder::buildExpression(const ASTPtr & expression, co
else
{
auto function_node = std::make_shared<FunctionNode>(function->name);
function_node->setNullsAction(function->nulls_action);
if (function->parameters)
{

View File

@ -544,11 +544,8 @@ inline AggregateFunctionPtr resolveAggregateFunction(FunctionNode * function_nod
argument_types.emplace_back(function_node_argument->getResultType());
AggregateFunctionProperties properties;
return AggregateFunctionFactory::instance().get(
function_node->getFunctionName(),
argument_types,
parameters,
properties);
auto action = NullsAction::EMPTY;
return AggregateFunctionFactory::instance().get(function_node->getFunctionName(), action, argument_types, parameters, properties);
}
}

View File

@ -451,17 +451,25 @@ void BackupEntriesCollector::gatherDatabaseMetadata(
}
catch (...)
{
throw Exception(ErrorCodes::INCONSISTENT_METADATA_FOR_BACKUP, "Couldn't get a create query for database {}", database_name);
/// Probably the database has been just removed.
if (throw_if_database_not_found)
throw;
LOG_WARNING(log, "Couldn't get a create query for database {}", backQuoteIfNeed(database_name));
return;
}
auto * create = create_database_query->as<ASTCreateQuery>();
if (create->getDatabase() != database_name)
{
/// Probably the database has been just renamed. Use the older name for backup to keep the backup consistent.
LOG_WARNING(log, "Got a create query with unexpected name {} for database {}",
backQuoteIfNeed(create->getDatabase()), backQuoteIfNeed(database_name));
create_database_query = create_database_query->clone();
create = create_database_query->as<ASTCreateQuery>();
create->setDatabase(database_name);
}
database_info.create_database_query = create_database_query;
const auto & create = create_database_query->as<const ASTCreateQuery &>();
if (create.getDatabase() != database_name)
throw Exception(ErrorCodes::INCONSISTENT_METADATA_FOR_BACKUP,
"Got a create query with unexpected name {} for database {}",
backQuoteIfNeed(create.getDatabase()), backQuoteIfNeed(database_name));
String new_database_name = renaming_map.getNewDatabaseName(database_name);
database_info.metadata_path_in_backup = root_path_in_backup / "metadata" / (escapeForFileName(new_database_name) + ".sql");
}
@ -582,26 +590,34 @@ std::vector<std::pair<ASTPtr, StoragePtr>> BackupEntriesCollector::findTablesInD
}
std::unordered_set<String> found_table_names;
for (const auto & db_table : db_tables)
for (auto & db_table : db_tables)
{
const auto & create_table_query = db_table.first;
const auto & create = create_table_query->as<const ASTCreateQuery &>();
found_table_names.emplace(create.getTable());
auto create_table_query = db_table.first;
auto * create = create_table_query->as<ASTCreateQuery>();
found_table_names.emplace(create->getTable());
if (database_name == DatabaseCatalog::TEMPORARY_DATABASE)
{
if (!create.temporary)
throw Exception(ErrorCodes::INCONSISTENT_METADATA_FOR_BACKUP,
if (!create->temporary)
{
throw Exception(ErrorCodes::LOGICAL_ERROR,
"Got a non-temporary create query for {}",
tableNameWithTypeToString(database_name, create.getTable(), false));
tableNameWithTypeToString(database_name, create->getTable(), false));
}
}
else
{
if (create.getDatabase() != database_name)
throw Exception(ErrorCodes::INCONSISTENT_METADATA_FOR_BACKUP,
"Got a create query with unexpected database name {} for {}",
backQuoteIfNeed(create.getDatabase()),
tableNameWithTypeToString(database_name, create.getTable(), false));
if (create->getDatabase() != database_name)
{
/// Probably the table has been just renamed. Use the older name for backup to keep the backup consistent.
LOG_WARNING(log, "Got a create query with unexpected database name {} for {}",
backQuoteIfNeed(create->getDatabase()),
tableNameWithTypeToString(database_name, create->getTable(), false));
create_table_query = create_table_query->clone();
create = create_table_query->as<ASTCreateQuery>();
create->setDatabase(database_name);
db_table.first = create_table_query;
}
}
}

View File

@ -48,20 +48,22 @@ namespace
}
const auto & request_settings = settings.request_settings;
const Settings & global_settings = context->getGlobalContext()->getSettingsRef();
const Settings & local_settings = context->getSettingsRef();
S3::PocoHTTPClientConfiguration client_configuration = S3::ClientFactory::instance().createClientConfiguration(
settings.auth_settings.region,
context->getRemoteHostFilter(),
static_cast<unsigned>(context->getGlobalContext()->getSettingsRef().s3_max_redirects),
static_cast<unsigned>(context->getGlobalContext()->getSettingsRef().s3_retry_attempts),
context->getGlobalContext()->getSettingsRef().enable_s3_requests_logging,
static_cast<unsigned>(global_settings.s3_max_redirects),
static_cast<unsigned>(global_settings.s3_retry_attempts),
global_settings.enable_s3_requests_logging,
/* for_disk_s3 = */ false,
request_settings.get_request_throttler,
request_settings.put_request_throttler,
s3_uri.uri.getScheme());
client_configuration.endpointOverride = s3_uri.endpoint;
client_configuration.maxConnections = static_cast<unsigned>(context->getSettingsRef().s3_max_connections);
client_configuration.maxConnections = static_cast<unsigned>(global_settings.s3_max_connections);
/// Increase connect timeout
client_configuration.connectTimeoutMs = 10 * 1000;
/// Requests in backups can be extremely long, set to one hour
@ -71,6 +73,7 @@ namespace
return S3::ClientFactory::instance().create(
client_configuration,
s3_uri.is_virtual_hosted_style,
local_settings.s3_disable_checksum,
credentials.GetAWSAccessKeyId(),
credentials.GetAWSSecretKey(),
settings.auth_settings.server_side_encryption_customer_key_base64,

View File

@ -46,6 +46,7 @@
#include <Common/assert_cast.h>
#include <Common/typeid_cast.h>
#include <AggregateFunctions/AggregateFunctionFactory.h>
namespace DB
{
@ -384,6 +385,39 @@ void QueryFuzzer::fuzzColumnLikeExpressionList(IAST * ast)
// the generic recursion into IAST.children.
}
void QueryFuzzer::fuzzNullsAction(NullsAction & action)
{
/// If it's not using actions, then it's a high change it doesn't support it to begin with
if ((action == NullsAction::EMPTY) && (fuzz_rand() % 100 == 0))
{
if (fuzz_rand() % 2 == 0)
action = NullsAction::RESPECT_NULLS;
else
action = NullsAction::IGNORE_NULLS;
}
else if (fuzz_rand() % 20 == 0)
{
switch (fuzz_rand() % 3)
{
case 0:
{
action = NullsAction::EMPTY;
break;
}
case 1:
{
action = NullsAction::RESPECT_NULLS;
break;
}
default:
{
action = NullsAction::IGNORE_NULLS;
break;
}
}
}
}
void QueryFuzzer::fuzzWindowFrame(ASTWindowDefinition & def)
{
switch (fuzz_rand() % 40)
@ -966,6 +1000,9 @@ void QueryFuzzer::fuzz(ASTPtr & ast)
fuzzColumnLikeExpressionList(fn->arguments.get());
fuzzColumnLikeExpressionList(fn->parameters.get());
if (AggregateUtils::isAggregateFunction(*fn))
fuzzNullsAction(fn->nulls_action);
if (fn->is_window_function && fn->window_definition)
{
auto & def = fn->window_definition->as<ASTWindowDefinition &>();

View File

@ -10,6 +10,7 @@
#include <Core/Field.h>
#include <Parsers/ASTExplainQuery.h>
#include <Parsers/IAST.h>
#include <Parsers/NullsAction.h>
#include <Common/randomSeed.h>
#include "Parsers/IAST_fwd.h"
@ -86,6 +87,7 @@ struct QueryFuzzer
void fuzzOrderByElement(ASTOrderByElement * elem);
void fuzzOrderByList(IAST * ast);
void fuzzColumnLikeExpressionList(IAST * ast);
void fuzzNullsAction(NullsAction & action);
void fuzzWindowFrame(ASTWindowDefinition & def);
void fuzzCreateQuery(ASTCreateQuery & create);
void fuzzExplainQuery(ASTExplainQuery & explain);

View File

@ -32,21 +32,23 @@ namespace ErrorCodes
Suggest::Suggest()
{
/// Keywords may be not up to date with ClickHouse parser.
addWords({
"CREATE", "DATABASE", "IF", "NOT", "EXISTS", "TEMPORARY", "TABLE", "ON", "CLUSTER", "DEFAULT",
"MATERIALIZED", "ALIAS", "ENGINE", "AS", "VIEW", "POPULATE", "SETTINGS", "ATTACH", "DETACH", "DROP",
"RENAME", "TO", "ALTER", "ADD", "MODIFY", "CLEAR", "COLUMN", "AFTER", "COPY", "PROJECT",
"PRIMARY", "KEY", "CHECK", "PARTITION", "PART", "FREEZE", "FETCH", "FROM", "SHOW", "INTO",
"OUTFILE", "FORMAT", "TABLES", "DATABASES", "LIKE", "PROCESSLIST", "CASE", "WHEN", "THEN", "ELSE",
"END", "DESCRIBE", "DESC", "USE", "SET", "OPTIMIZE", "FINAL", "DEDUPLICATE", "INSERT", "VALUES",
"SELECT", "DISTINCT", "SAMPLE", "ARRAY", "JOIN", "GLOBAL", "LOCAL", "ANY", "ALL", "INNER",
"LEFT", "RIGHT", "FULL", "OUTER", "CROSS", "USING", "PREWHERE", "WHERE", "GROUP", "BY",
"WITH", "TOTALS", "HAVING", "ORDER", "COLLATE", "LIMIT", "UNION", "AND", "OR", "ASC",
"IN", "KILL", "QUERY", "SYNC", "ASYNC", "TEST", "BETWEEN", "TRUNCATE", "USER", "ROLE",
"PROFILE", "QUOTA", "POLICY", "ROW", "GRANT", "REVOKE", "OPTION", "ADMIN", "EXCEPT", "REPLACE",
"IDENTIFIED", "HOST", "NAME", "READONLY", "WRITABLE", "PERMISSIVE", "FOR", "RESTRICTIVE", "RANDOMIZED",
"INTERVAL", "LIMITS", "ONLY", "TRACKING", "IP", "REGEXP", "ILIKE", "CLEANUP", "APPEND"
});
addWords({"CREATE", "DATABASE", "IF", "NOT", "EXISTS", "TEMPORARY", "TABLE", "ON",
"CLUSTER", "DEFAULT", "MATERIALIZED", "ALIAS", "ENGINE", "AS", "VIEW", "POPULATE",
"SETTINGS", "ATTACH", "DETACH", "DROP", "RENAME", "TO", "ALTER", "ADD",
"MODIFY", "CLEAR", "COLUMN", "AFTER", "COPY", "PROJECT", "PRIMARY", "KEY",
"CHECK", "PARTITION", "PART", "FREEZE", "FETCH", "FROM", "SHOW", "INTO",
"OUTFILE", "FORMAT", "TABLES", "DATABASES", "LIKE", "PROCESSLIST", "CASE", "WHEN",
"THEN", "ELSE", "END", "DESCRIBE", "DESC", "USE", "SET", "OPTIMIZE",
"FINAL", "DEDUPLICATE", "INSERT", "VALUES", "SELECT", "DISTINCT", "SAMPLE", "ARRAY",
"JOIN", "GLOBAL", "LOCAL", "ANY", "ALL", "INNER", "LEFT", "RIGHT",
"FULL", "OUTER", "CROSS", "USING", "PREWHERE", "WHERE", "GROUP", "BY",
"WITH", "TOTALS", "HAVING", "ORDER", "COLLATE", "LIMIT", "UNION", "AND",
"OR", "ASC", "IN", "KILL", "QUERY", "SYNC", "ASYNC", "TEST",
"BETWEEN", "TRUNCATE", "USER", "ROLE", "PROFILE", "QUOTA", "POLICY", "ROW",
"GRANT", "REVOKE", "OPTION", "ADMIN", "EXCEPT", "REPLACE", "IDENTIFIED", "HOST",
"NAME", "READONLY", "WRITABLE", "PERMISSIVE", "FOR", "RESTRICTIVE", "RANDOMIZED", "INTERVAL",
"LIMITS", "ONLY", "TRACKING", "IP", "REGEXP", "ILIKE", "CLEANUP", "APPEND",
"IGNORE NULLS", "RESPECT NULLS", "OVER"});
}
static String getLoadSuggestionQuery(Int32 suggestion_limit, bool basic_suggestion)

View File

@ -3,11 +3,11 @@
#include <cstring>
#include <memory>
#include <vector>
#include <boost/noncopyable.hpp>
#include <Core/Defines.h>
#include <Common/memcpySmall.h>
#include <Common/ProfileEvents.h>
#include <boost/noncopyable.hpp>
#include <Common/Allocator.h>
#include <Common/ProfileEvents.h>
#include <Common/memcpySmall.h>
#if __has_include(<sanitizer/asan_interface.h>) && defined(ADDRESS_SANITIZER)
# include <sanitizer/asan_interface.h>
@ -180,7 +180,7 @@ public:
char * alloc(size_t size)
{
used_bytes += size;
if (unlikely(head.empty() || static_cast<std::ptrdiff_t>(size) > head.end - head.pos))
if (unlikely(head.empty() || size > head.remaining()))
addMemoryChunk(size);
char * res = head.pos;
@ -193,6 +193,9 @@ public:
char * alignedAlloc(size_t size, size_t alignment)
{
used_bytes += size;
if (unlikely(head.empty() || size > head.remaining()))
addMemoryChunk(size + alignment);
do
{
void * head_pos = head.pos;

View File

@ -1,12 +1,24 @@
#include <Common/AsyncLoader.h>
#include <limits>
#include <optional>
#include <base/defines.h>
#include <base/scope_guard.h>
#include <Common/ErrorCodes.h>
#include <Common/Exception.h>
#include <Common/noexcept_scope.h>
#include <Common/setThreadName.h>
#include <Common/logger_useful.h>
#include <Common/ThreadPool.h>
#include <Common/getNumberOfPhysicalCPUCores.h>
#include <Common/ProfileEvents.h>
#include <Common/Stopwatch.h>
namespace ProfileEvents
{
extern const Event AsyncLoaderWaitMicroseconds;
}
namespace DB
{
@ -16,6 +28,7 @@ namespace ErrorCodes
extern const int ASYNC_LOAD_CYCLE;
extern const int ASYNC_LOAD_FAILED;
extern const int ASYNC_LOAD_CANCELED;
extern const int LOGICAL_ERROR;
}
static constexpr size_t PRINT_MESSAGE_EACH_N_OBJECTS = 256;
@ -52,63 +65,48 @@ size_t LoadJob::pool() const
return pool_id;
}
void LoadJob::wait() const
{
std::unique_lock lock{mutex};
waiters++;
finished.wait(lock, [this] { return load_status != LoadStatus::PENDING; });
waiters--;
if (load_exception)
std::rethrow_exception(load_exception);
}
void LoadJob::waitNoThrow() const noexcept
{
std::unique_lock lock{mutex};
waiters++;
finished.wait(lock, [this] { return load_status != LoadStatus::PENDING; });
waiters--;
}
size_t LoadJob::waitersCount() const
{
std::unique_lock lock{mutex};
return waiters;
}
void LoadJob::ok()
size_t LoadJob::ok()
{
std::unique_lock lock{mutex};
load_status = LoadStatus::OK;
finish();
return finish();
}
void LoadJob::failed(const std::exception_ptr & ptr)
size_t LoadJob::failed(const std::exception_ptr & ptr)
{
std::unique_lock lock{mutex};
load_status = LoadStatus::FAILED;
load_exception = ptr;
finish();
return finish();
}
void LoadJob::canceled(const std::exception_ptr & ptr)
size_t LoadJob::canceled(const std::exception_ptr & ptr)
{
std::unique_lock lock{mutex};
load_status = LoadStatus::CANCELED;
load_exception = ptr;
finish();
return finish();
}
void LoadJob::finish()
size_t LoadJob::finish()
{
func = {}; // To ensure job function is destructed before `AsyncLoader::wait()` and `LoadJob::wait()` return
func = {}; // To ensure job function is destructed before `AsyncLoader::wait()` return
finish_time = std::chrono::system_clock::now();
if (waiters > 0)
finished.notify_all();
return std::exchange(suspended_waiters, 0);
}
void LoadJob::scheduled()
void LoadJob::scheduled(UInt64 job_id_)
{
chassert(job_id == 0); // Job cannot be scheduled twice
job_id = job_id_;
schedule_time = std::chrono::system_clock::now();
}
@ -118,11 +116,11 @@ void LoadJob::enqueued()
enqueue_time = std::chrono::system_clock::now();
}
void LoadJob::execute(size_t pool, const LoadJobPtr & self)
void LoadJob::execute(AsyncLoader & loader, size_t pool, const LoadJobPtr & self)
{
execution_pool_id = pool;
start_time = std::chrono::system_clock::now();
func(self);
func(loader, self);
}
@ -180,11 +178,11 @@ AsyncLoader::AsyncLoader(std::vector<PoolInitializer> pool_initializers, bool lo
init.metric_threads,
init.metric_active_threads,
init.metric_scheduled_threads,
init.max_threads,
/* max_free_threads = */ 0,
init.max_threads),
/* max_threads = */ std::numeric_limits<size_t>::max(), // Unlimited number of threads, we do worker management ourselves
/* max_free_threads = */ 0, // We do not require free threads
/* queue_size = */0), // Unlimited queue to avoid blocking during worker spawning
.ready_queue = {},
.max_threads = init.max_threads
.max_threads = init.max_threads > 0 ? init.max_threads : getNumberOfPhysicalCPUCores()
});
}
@ -228,16 +226,16 @@ void AsyncLoader::stop()
void AsyncLoader::schedule(LoadTask & task)
{
chassert(this == &task.loader);
scheduleImpl(task.jobs);
schedule(task.jobs);
}
void AsyncLoader::schedule(const LoadTaskPtr & task)
{
chassert(this == &task->loader);
scheduleImpl(task->jobs);
schedule(task->jobs);
}
void AsyncLoader::schedule(const std::vector<LoadTaskPtr> & tasks)
void AsyncLoader::schedule(const LoadTaskPtrs & tasks)
{
LoadJobSet all_jobs;
for (const auto & task : tasks)
@ -245,10 +243,10 @@ void AsyncLoader::schedule(const std::vector<LoadTaskPtr> & tasks)
chassert(this == &task->loader);
all_jobs.insert(task->jobs.begin(), task->jobs.end());
}
scheduleImpl(all_jobs);
schedule(all_jobs);
}
void AsyncLoader::scheduleImpl(const LoadJobSet & input_jobs)
void AsyncLoader::schedule(const LoadJobSet & jobs_to_schedule)
{
std::unique_lock lock{mutex};
@ -264,7 +262,7 @@ void AsyncLoader::scheduleImpl(const LoadJobSet & input_jobs)
// 1) exclude already scheduled or finished jobs
// 2) include assigned job dependencies (that are not yet scheduled)
LoadJobSet jobs;
for (const auto & job : input_jobs)
for (const auto & job : jobs_to_schedule)
gatherNotScheduled(job, jobs, lock);
// Ensure scheduled_jobs graph will have no cycles. The only way to get a cycle is to add a cycle, assuming old jobs cannot reference new ones.
@ -280,7 +278,7 @@ void AsyncLoader::scheduleImpl(const LoadJobSet & input_jobs)
NOEXCEPT_SCOPE({
ALLOW_ALLOCATIONS_IN_SCOPE;
scheduled_jobs.try_emplace(job);
job->scheduled();
job->scheduled(++last_job_id);
});
}
@ -365,11 +363,20 @@ void AsyncLoader::prioritize(const LoadJobPtr & job, size_t new_pool)
if (!job)
return;
chassert(new_pool < pools.size());
DENY_ALLOCATIONS_IN_SCOPE;
std::unique_lock lock{mutex};
prioritize(job, new_pool, lock);
}
void AsyncLoader::wait(const LoadJobPtr & job, bool no_throw)
{
std::unique_lock job_lock{job->mutex};
wait(job_lock, job);
if (!no_throw && job->load_exception)
std::rethrow_exception(job->load_exception);
}
void AsyncLoader::remove(const LoadJobSet & jobs)
{
DENY_ALLOCATIONS_IN_SCOPE;
@ -397,9 +404,10 @@ void AsyncLoader::remove(const LoadJobSet & jobs)
if (auto info = scheduled_jobs.find(job); info != scheduled_jobs.end())
{
// Job is currently executing
ALLOW_ALLOCATIONS_IN_SCOPE;
chassert(info->second.isExecuting());
lock.unlock();
job->waitNoThrow(); // Wait for job to finish
wait(job, /* no_throw = */ true); // Wait for job to finish
lock.lock();
}
}
@ -415,10 +423,12 @@ void AsyncLoader::remove(const LoadJobSet & jobs)
void AsyncLoader::setMaxThreads(size_t pool, size_t value)
{
if (value == 0)
value = getNumberOfPhysicalCPUCores();
std::unique_lock lock{mutex};
auto & p = pools[pool];
p.thread_pool->setMaxThreads(value);
p.thread_pool->setQueueSize(value); // Keep queue size equal max threads count to avoid blocking during spawning
// Note that underlying `ThreadPool` always has unlimited `queue_size` and `max_threads`.
// Worker management is done by `AsyncLoader` based on `Pool::max_threads + Pool::suspended_workers` instead.
p.max_threads = value;
if (!is_running)
return;
@ -442,7 +452,6 @@ Priority AsyncLoader::getPoolPriority(size_t pool) const
return pools[pool].priority; // NOTE: lock is not needed because `priority` is const and `pools` are immutable
}
size_t AsyncLoader::getScheduledJobCount() const
{
std::unique_lock lock{mutex};
@ -479,11 +488,11 @@ void AsyncLoader::checkCycle(const LoadJobSet & jobs, std::unique_lock<std::mute
while (!left.empty())
{
LoadJobPtr job = *left.begin();
checkCycleImpl(job, left, visited, lock);
checkCycle(job, left, visited, lock);
}
}
String AsyncLoader::checkCycleImpl(const LoadJobPtr & job, LoadJobSet & left, LoadJobSet & visited, std::unique_lock<std::mutex> & lock)
String AsyncLoader::checkCycle(const LoadJobPtr & job, LoadJobSet & left, LoadJobSet & visited, std::unique_lock<std::mutex> & lock)
{
if (!left.contains(job))
return {}; // Do not consider external dependencies and already processed jobs
@ -494,7 +503,7 @@ String AsyncLoader::checkCycleImpl(const LoadJobPtr & job, LoadJobSet & left, Lo
}
for (const auto & dep : job->dependencies)
{
if (auto chain = checkCycleImpl(dep, left, visited, lock); !chain.empty())
if (auto chain = checkCycle(dep, left, visited, lock); !chain.empty())
{
if (!visited.contains(job)) // Check for cycle end
throw Exception(ErrorCodes::ASYNC_LOAD_CYCLE, "Load job dependency cycle detected: {} -> {}", job->name, chain);
@ -509,10 +518,11 @@ String AsyncLoader::checkCycleImpl(const LoadJobPtr & job, LoadJobSet & left, Lo
void AsyncLoader::finish(const LoadJobPtr & job, LoadStatus status, std::exception_ptr exception_from_job, std::unique_lock<std::mutex> & lock)
{
chassert(scheduled_jobs.contains(job)); // Job was pending
size_t resumed_workers = 0; // Number of workers resumed in the execution pool of the job
if (status == LoadStatus::OK)
{
// Notify waiters
job->ok();
resumed_workers += job->ok();
// Update dependent jobs and enqueue if ready
for (const auto & dep : scheduled_jobs[job].dependent_jobs)
@ -528,9 +538,9 @@ void AsyncLoader::finish(const LoadJobPtr & job, LoadStatus status, std::excepti
{
// Notify waiters
if (status == LoadStatus::FAILED)
job->failed(exception_from_job);
resumed_workers += job->failed(exception_from_job);
else if (status == LoadStatus::CANCELED)
job->canceled(exception_from_job);
resumed_workers += job->canceled(exception_from_job);
Info & info = scheduled_jobs[job];
if (info.isReady())
@ -572,35 +582,40 @@ void AsyncLoader::finish(const LoadJobPtr & job, LoadStatus status, std::excepti
if (log_progress)
logAboutProgress(log, finished_jobs.size() - old_jobs, finished_jobs.size() + scheduled_jobs.size() - old_jobs, stopwatch);
});
if (resumed_workers)
{
Pool & pool = pools[job->executionPool()];
pool.suspended_workers -= resumed_workers;
}
}
void AsyncLoader::prioritize(const LoadJobPtr & job, size_t new_pool_id, std::unique_lock<std::mutex> & lock)
{
Pool & old_pool = pools[job->pool_id];
Pool & new_pool = pools[new_pool_id];
if (old_pool.priority <= new_pool.priority)
return; // Never lower priority or change pool leaving the same priority
// Note that there is no point in prioritizing finished jobs, but because we do not lock `job.mutex` here (due to recursion),
// Races are inevitable, so we prioritize all job unconditionally: both finished and pending.
if (auto info = scheduled_jobs.find(job); info != scheduled_jobs.end())
{
Pool & old_pool = pools[job->pool_id];
Pool & new_pool = pools[new_pool_id];
if (old_pool.priority <= new_pool.priority)
return; // Never lower priority or change pool leaving the same priority
// Update priority and push job forward through ready queue if needed
UInt64 ready_seqno = info->second.ready_seqno;
// Requeue job into the new pool queue without allocations
if (ready_seqno)
if (UInt64 ready_seqno = info->second.ready_seqno)
{
new_pool.ready_queue.insert(old_pool.ready_queue.extract(ready_seqno));
if (canSpawnWorker(new_pool, lock))
spawn(new_pool, lock);
}
// Set user-facing pool (may affect executing jobs)
job->pool_id.store(new_pool_id);
// Recurse into dependencies
for (const auto & dep : job->dependencies)
prioritize(dep, new_pool_id, lock);
}
job->pool_id.store(new_pool_id);
// Recurse into dependencies
for (const auto & dep : job->dependencies)
prioritize(dep, new_pool_id, lock);
}
void AsyncLoader::enqueue(Info & info, const LoadJobPtr & job, std::unique_lock<std::mutex> & lock)
@ -620,11 +635,102 @@ void AsyncLoader::enqueue(Info & info, const LoadJobPtr & job, std::unique_lock<
spawn(pool, lock);
}
// Keep track of currently executing load jobs to be able to:
// 1) Detect "wait dependent" deadlocks -- throw LOGICAL_ERROR
// (when job A function waits for job B that depends on job A)
// 2) Detect "wait not scheduled" deadlocks -- throw LOGICAL_ERROR
// (thread T is waiting on an assigned job A, but job A is not yet scheduled)
// 3) Resolve "priority inversion" deadlocks -- apply priority inheritance
// (when high-priority job A function waits for a lower-priority job B, and B never starts due to its priority)
// 4) Resolve "blocked pool" deadlocks -- spawn more workers
// (when job A in pool P waits for another ready job B in P, but B never starts because there are no free workers in P)
thread_local LoadJob * current_load_job = nullptr;
size_t currentPoolOr(size_t pool)
{
return current_load_job ? current_load_job->executionPool() : pool;
}
bool detectWaitDependentDeadlock(const LoadJobPtr & waited)
{
if (waited.get() == current_load_job)
return true;
for (const auto & dep : waited->dependencies)
{
if (detectWaitDependentDeadlock(dep))
return true;
}
return false;
}
void AsyncLoader::wait(std::unique_lock<std::mutex> & job_lock, const LoadJobPtr & job)
{
// Ensure job we are going to wait was scheduled to avoid "wait not scheduled" deadlocks
if (job->job_id == 0)
throw Exception(ErrorCodes::LOGICAL_ERROR, "Load job '{}' waits for not scheduled load job '{}'", current_load_job->name, job->name);
// Deadlock detection and resolution
if (current_load_job && job->load_status == LoadStatus::PENDING)
{
if (detectWaitDependentDeadlock(job))
throw Exception(ErrorCodes::LOGICAL_ERROR, "Load job '{}' waits for dependent load job '{}'", current_load_job->name, job->name);
auto worker_pool = current_load_job->executionPool();
auto worker_priority = getPoolPriority(worker_pool);
auto job_priority = getPoolPriority(job->pool_id);
// Waiting for a lower-priority job ("priority inversion" deadlock) is resolved using priority inheritance.
if (worker_priority < job_priority)
{
job_lock.unlock(); // Avoid reverse locking order
prioritize(job, worker_pool);
job_lock.lock();
}
// Spawn more workers to avoid exhaustion of worker pool ("blocked pool" deadlock)
if (worker_pool == job->pool_id)
{
job_lock.unlock(); // Avoid reverse locking order
workerIsSuspendedByWait(worker_pool, job);
job_lock.lock();
}
}
Stopwatch watch;
job->waiters++;
job->finished.wait(job_lock, [&] { return job->load_status != LoadStatus::PENDING; });
job->waiters--;
ProfileEvents::increment(ProfileEvents::AsyncLoaderWaitMicroseconds, watch.elapsedMicroseconds());
}
void AsyncLoader::workerIsSuspendedByWait(size_t pool_id, const LoadJobPtr & job)
{
std::unique_lock lock{mutex};
std::unique_lock job_lock{job->mutex};
if (job->load_status != LoadStatus::PENDING)
return; // Job is already done, worker can continue execution
// To resolve "blocked pool" deadlocks we spawn a new worker for every suspended worker, if required
// This can lead to a visible excess of `max_threads` specified for a pool,
// but actual number of NOT suspended workers may exceed `max_threads` ONLY in intermittent state.
Pool & pool = pools[pool_id];
pool.suspended_workers++;
job->suspended_waiters++;
if (canSpawnWorker(pool, lock))
spawn(pool, lock);
// TODO(serxa): it is a good idea to propagate `job` and all its dependencies in `pool.ready_queue` by introducing
// key {suspended_waiters, ready_seqno} instead of plain `ready_seqno`, to force newly spawn workers to work on jobs
// that are being waited. But it doesn't affect correctness. So let's not complicate it for time being.
}
bool AsyncLoader::canSpawnWorker(Pool & pool, std::unique_lock<std::mutex> &)
{
// TODO(serxa): optimization: we should not spawn new worker on the first enqueue during `finish()` because current worker will take this job.
return is_running
&& !pool.ready_queue.empty()
&& pool.workers < pool.max_threads
&& pool.workers < pool.max_threads + pool.suspended_workers
&& (!current_priority || *current_priority >= pool.priority);
}
@ -632,7 +738,7 @@ bool AsyncLoader::canWorkerLive(Pool & pool, std::unique_lock<std::mutex> &)
{
return is_running
&& !pool.ready_queue.empty()
&& pool.workers <= pool.max_threads
&& pool.workers <= pool.max_threads + pool.suspended_workers
&& (!current_priority || *current_priority >= pool.priority);
}
@ -705,7 +811,9 @@ void AsyncLoader::worker(Pool & pool)
try
{
job->execute(pool_id, job);
current_load_job = job.get();
SCOPE_EXIT({ current_load_job = nullptr; }); // Note that recursive job execution is not supported
job->execute(*this, pool_id, job);
exception_from_job = {};
}
catch (...)

View File

@ -21,6 +21,16 @@ namespace Poco { class Logger; }
namespace DB
{
// TERMINOLOGY:
// Job (`LoadJob`) - The smallest part of loading process, executed by worker. Job can depend on the other jobs. Jobs are grouped in tasks.
// Task (`LoadTask`) - Owning holder of a set of jobs. Should be held during the whole job lifetime. Cancels all jobs on destruction.
// Goal jobs (goals) - a subset of "final" jobs of a task (usually no job in task depend on a goal job).
// By default all jobs in task are included in goal jobs.
// Goals should used if you need to create a job that depends on a task (to avoid placing all jobs of the task in dependencies).
// Pool (worker pool) - A set of workers with specific priority. Every job is assigned to a pool. Job can change its pool dynamically.
// Priority (pool priority) - Constant integer value showing relative priority of a pool. Lower value means higher priority.
// AsyncLoader - scheduling system responsible for job dependency tracking and worker management respecting pool priorities.
class LoadJob;
using LoadJobPtr = std::shared_ptr<LoadJob>;
using LoadJobSet = std::unordered_set<LoadJobPtr>;
@ -43,6 +53,7 @@ enum class LoadStatus
// Smallest indivisible part of a loading process. Load job can have multiple dependencies, thus jobs constitute a direct acyclic graph (DAG).
// Job encapsulates a function to be executed by `AsyncLoader` as soon as job functions of all dependencies are successfully executed.
// Job can be waited for by an arbitrary number of threads. See `AsyncLoader` class description for more details.
// WARNING: jobs are usually held with ownership by tasks (see `LoadTask`). You are encouraged to add jobs into a tasks as soon as the are created.
class LoadJob : private boost::noncopyable
{
public:
@ -50,6 +61,7 @@ public:
LoadJob(LoadJobSetType && dependencies_, String name_, size_t pool_id_, Func && func_)
: dependencies(std::forward<LoadJobSetType>(dependencies_))
, name(std::move(name_))
, execution_pool_id(pool_id_)
, pool_id(pool_id_)
, func(std::forward<Func>(func_))
{}
@ -67,18 +79,12 @@ public:
// Value may change during job execution by `prioritize()`.
size_t pool() const;
// Sync wait for a pending job to be finished: OK, FAILED or CANCELED status.
// Throws if job is FAILED or CANCELED. Returns or throws immediately if called on non-pending job.
void wait() const;
// Wait for a job to reach any non PENDING status.
void waitNoThrow() const noexcept;
// Returns number of threads blocked by `wait()` or `waitNoThrow()` calls.
// Returns number of threads blocked by `wait()` calls.
size_t waitersCount() const;
// Introspection
using TimePoint = std::chrono::system_clock::time_point;
UInt64 jobId() const { return job_id; }
TimePoint scheduleTime() const { return schedule_time; }
TimePoint enqueueTime() const { return enqueue_time; }
TimePoint startTime() const { return start_time; }
@ -90,22 +96,24 @@ public:
private:
friend class AsyncLoader;
void ok();
void failed(const std::exception_ptr & ptr);
void canceled(const std::exception_ptr & ptr);
void finish();
[[nodiscard]] size_t ok();
[[nodiscard]] size_t failed(const std::exception_ptr & ptr);
[[nodiscard]] size_t canceled(const std::exception_ptr & ptr);
[[nodiscard]] size_t finish();
void scheduled();
void scheduled(UInt64 job_id_);
void enqueued();
void execute(size_t pool, const LoadJobPtr & self);
void execute(AsyncLoader & loader, size_t pool, const LoadJobPtr & self);
std::atomic<UInt64> job_id{0};
std::atomic<size_t> execution_pool_id;
std::atomic<size_t> pool_id;
std::function<void(const LoadJobPtr & self)> func;
std::function<void(AsyncLoader & loader, const LoadJobPtr & self)> func;
mutable std::mutex mutex;
mutable std::condition_variable finished;
mutable size_t waiters = 0;
mutable size_t waiters = 0; // All waiters, including suspended
mutable size_t suspended_waiters = 0;
LoadStatus load_status{LoadStatus::PENDING};
std::exception_ptr load_exception;
@ -117,7 +125,7 @@ private:
struct EmptyJobFunc
{
void operator()(const LoadJobPtr &) {}
void operator()(AsyncLoader &, const LoadJobPtr &) {}
};
template <class Func = EmptyJobFunc>
@ -144,6 +152,7 @@ LoadJobPtr makeLoadJob(const LoadJobSet & dependencies, size_t pool_id, String n
return std::make_shared<LoadJob>(dependencies, std::move(name), pool_id, std::forward<Func>(func));
}
// Represents a logically connected set of LoadJobs required to achieve some goals (final LoadJob in the set).
class LoadTask : private boost::noncopyable
{
@ -168,10 +177,11 @@ public:
// auto load_task = loadSomethingAsync(async_loader, load_after_task.goals(), something);
const LoadJobSet & goals() const { return goal_jobs.empty() ? jobs : goal_jobs; }
AsyncLoader & loader;
private:
friend class AsyncLoader;
AsyncLoader & loader;
LoadJobSet jobs;
LoadJobSet goal_jobs;
};
@ -181,91 +191,6 @@ inline LoadTaskPtr makeLoadTask(AsyncLoader & loader, LoadJobSet && jobs, LoadJo
return std::make_shared<LoadTask>(loader, std::move(jobs), std::move(goals));
}
inline void scheduleLoad(const LoadTaskPtr & task)
{
task->schedule();
}
inline void scheduleLoad(const LoadTaskPtrs & tasks)
{
for (const auto & task : tasks)
task->schedule();
}
template <class... Args>
inline void scheduleLoadAll(Args && ... args)
{
(scheduleLoad(std::forward<Args>(args)), ...);
}
inline void waitLoad(const LoadJobSet & jobs)
{
for (const auto & job : jobs)
job->wait();
}
inline void waitLoad(const LoadTaskPtr & task)
{
waitLoad(task->goals());
}
inline void waitLoad(const LoadTaskPtrs & tasks)
{
for (const auto & task : tasks)
waitLoad(task->goals());
}
template <class... Args>
inline void waitLoadAll(Args && ... args)
{
(waitLoad(std::forward<Args>(args)), ...);
}
template <class... Args>
inline void scheduleAndWaitLoadAll(Args && ... args)
{
scheduleLoadAll(std::forward<Args>(args)...);
waitLoadAll(std::forward<Args>(args)...);
}
inline LoadJobSet getGoals(const LoadTaskPtrs & tasks)
{
LoadJobSet result;
for (const auto & task : tasks)
result.insert(task->goals().begin(), task->goals().end());
return result;
}
inline LoadJobSet getGoalsOr(const LoadTaskPtrs & tasks, const LoadJobSet & alternative)
{
LoadJobSet result;
for (const auto & task : tasks)
result.insert(task->goals().begin(), task->goals().end());
return result.empty() ? alternative : result;
}
inline LoadJobSet joinJobs(const LoadJobSet & jobs1, const LoadJobSet & jobs2)
{
LoadJobSet result;
if (!jobs1.empty())
result.insert(jobs1.begin(), jobs1.end());
if (!jobs2.empty())
result.insert(jobs2.begin(), jobs2.end());
return result;
}
inline LoadTaskPtrs joinTasks(const LoadTaskPtrs & tasks1, const LoadTaskPtrs & tasks2)
{
if (tasks1.empty())
return tasks2;
if (tasks2.empty())
return tasks1;
LoadTaskPtrs result;
result.reserve(tasks1.size() + tasks2.size());
result.insert(result.end(), tasks1.begin(), tasks1.end());
result.insert(result.end(), tasks2.begin(), tasks2.end());
return result;
}
// `AsyncLoader` is a scheduler for DAG of `LoadJob`s. It tracks job dependencies and priorities.
// Basic usage example:
@ -277,8 +202,8 @@ inline LoadTaskPtrs joinTasks(const LoadTaskPtrs & tasks1, const LoadTaskPtrs &
//
// // Create and schedule a task consisting of three jobs. Job1 has no dependencies and is run first.
// // Job2 and job3 depend on job1 and are run only after job1 completion.
// auto job_func = [&] (const LoadJobPtr & self) {
// LOG_TRACE(log, "Executing load job '{}' in pool '{}'", self->name, async_loader->getPoolName(self->pool()));
// auto job_func = [&] (AsyncLoader & loader, const LoadJobPtr & self) {
// LOG_TRACE(log, "Executing load job '{}' in pool '{}'", self->name, loader->getPoolName(self->pool()));
// };
// auto job1 = makeLoadJob({}, "job1", /* pool_id = */ 1, job_func);
// auto job2 = makeLoadJob({ job1 }, "job2", /* pool_id = */ 1, job_func);
@ -287,8 +212,8 @@ inline LoadTaskPtrs joinTasks(const LoadTaskPtrs & tasks1, const LoadTaskPtrs &
// task.schedule();
//
// // Another thread may prioritize a job by changing its pool and wait for it:
// async_loader->prioritize(job3, /* pool_id = */ 0); // Increase priority: 1 -> 0 (lower is better)
// job3->wait(); // Blocks until job completion or cancellation and rethrow an exception (if any)
// async_loader.prioritize(job3, /* pool_id = */ 0); // Increase priority: 1 -> 0 (lower is better)
// async_loader.wait(job3); // Blocks until job completion or cancellation and rethrow an exception (if any)
//
// Every job has a pool associated with it. AsyncLoader starts every job in its thread pool.
// Each pool has a constant priority and a mutable maximum number of threads.
@ -341,7 +266,8 @@ private:
std::unique_ptr<ThreadPool> thread_pool; // NOTE: we avoid using a `ThreadPool` queue to be able to move jobs between pools.
std::map<UInt64, LoadJobPtr> ready_queue; // FIFO queue of jobs to be executed in this pool. Map is used for faster erasing. Key is `ready_seqno`
size_t max_threads; // Max number of workers to be spawn
size_t workers = 0; // Number of currently execution workers
size_t workers = 0; // Number of currently executing workers
size_t suspended_workers = 0; // Number of workers that are blocked by `wait()` call on a job executing in the same pool (for deadlock resolution)
bool isActive() const { return workers > 0 || !ready_queue.empty(); }
};
@ -369,7 +295,7 @@ public:
Metric metric_threads;
Metric metric_active_threads;
Metric metric_scheduled_threads;
size_t max_threads;
size_t max_threads; // Zero means use all CPU cores
Priority priority;
};
@ -399,6 +325,7 @@ public:
// and are removed from AsyncLoader, so it is thread-safe to destroy them.
void schedule(LoadTask & task);
void schedule(const LoadTaskPtr & task);
void schedule(const LoadJobSet & jobs_to_schedule);
// Schedule all tasks atomically. To ensure only highest priority jobs among all tasks are run first.
void schedule(const LoadTaskPtrs & tasks);
@ -407,6 +334,11 @@ public:
// Jobs from higher (than `new_pool`) priority pools are not changed.
void prioritize(const LoadJobPtr & job, size_t new_pool);
// Sync wait for a pending job to be finished: OK, FAILED or CANCELED status.
// Throws if job is FAILED or CANCELED unless `no_throw` is set. Returns or throws immediately if called on non-pending job.
// If job was not scheduled, it will be implicitly scheduled before the wait (deadlock auto-resolution).
void wait(const LoadJobPtr & job, bool no_throw = false);
// Remove finished jobs, cancel scheduled jobs, wait for executing jobs to finish and remove them.
void remove(const LoadJobSet & jobs);
@ -430,23 +362,26 @@ public:
bool is_executing = false;
};
// For introspection and debug only, see `system.async_loader` table
// For introspection and debug only, see `system.async_loader` table.
std::vector<JobState> getJobStates() const;
// For deadlock resolution. Should not be used directly.
void workerIsSuspendedByWait(size_t pool_id, const LoadJobPtr & job);
private:
void checkCycle(const LoadJobSet & jobs, std::unique_lock<std::mutex> & lock);
String checkCycleImpl(const LoadJobPtr & job, LoadJobSet & left, LoadJobSet & visited, std::unique_lock<std::mutex> & lock);
String checkCycle(const LoadJobPtr & job, LoadJobSet & left, LoadJobSet & visited, std::unique_lock<std::mutex> & lock);
void finish(const LoadJobPtr & job, LoadStatus status, std::exception_ptr exception_from_job, std::unique_lock<std::mutex> & lock);
void scheduleImpl(const LoadJobSet & input_jobs);
void gatherNotScheduled(const LoadJobPtr & job, LoadJobSet & jobs, std::unique_lock<std::mutex> & lock);
void prioritize(const LoadJobPtr & job, size_t new_pool_id, std::unique_lock<std::mutex> & lock);
void enqueue(Info & info, const LoadJobPtr & job, std::unique_lock<std::mutex> & lock);
bool canSpawnWorker(Pool & pool, std::unique_lock<std::mutex> &);
bool canWorkerLive(Pool & pool, std::unique_lock<std::mutex> &);
void updateCurrentPriorityAndSpawn(std::unique_lock<std::mutex> &);
void spawn(Pool & pool, std::unique_lock<std::mutex> &);
void wait(std::unique_lock<std::mutex> & job_lock, const LoadJobPtr & job);
bool canSpawnWorker(Pool & pool, std::unique_lock<std::mutex> & lock);
bool canWorkerLive(Pool & pool, std::unique_lock<std::mutex> & lock);
void updateCurrentPriorityAndSpawn(std::unique_lock<std::mutex> & lock);
void spawn(Pool & pool, std::unique_lock<std::mutex> & lock);
void worker(Pool & pool);
bool hasWorker(std::unique_lock<std::mutex> &) const;
bool hasWorker(std::unique_lock<std::mutex> & lock) const;
// Logging
const bool log_failures; // Worker should log all exceptions caught from job functions.
@ -457,6 +392,7 @@ private:
bool is_running = true;
std::optional<Priority> current_priority; // highest priority among active pools
UInt64 last_ready_seqno = 0; // Increasing counter for ready queue keys.
UInt64 last_job_id = 0; // Increasing counter for job IDs
std::unordered_map<LoadJobPtr, Info> scheduled_jobs; // Full set of scheduled pending jobs along with scheduling info.
std::vector<Pool> pools; // Thread pools for job execution and ready queues
LoadJobSet finished_jobs; // Set of finished jobs (for introspection only, until jobs are removed).
@ -465,4 +401,136 @@ private:
std::chrono::system_clock::time_point busy_period_start_time;
};
// === HELPER FUNCTIONS ===
// There are three types of helper functions:
// schedulerLoad([loader], {jobs|task|tasks}):
// Just schedule jobs for async loading.
// Note that normally function `doSomethingAsync()` returns you a task which is NOT scheduled.
// This is done to allow you:
// (1) construct complex dependency graph offline.
// (2) schedule tasks simultaneously to respect their relative priorities.
// (3) do prioritization independently, before scheduling.
// prioritizeLoad([loader], pool_id, {jobs|task|tasks}):
// Prioritize jobs w/o waiting for it.
// Note that prioritization may be done
// (1) before scheduling (to ensure all jobs are started in the correct pools)
// (2) after scheduling (for dynamic prioritization, e.g. when new query arrives)
// waitLoad([loader], pool_id, {jobs|task|tasks}, [no_throw]):
// Prioritize and wait for jobs.
// Note that to avoid deadlocks it implicitly schedules all the jobs before waiting for them.
// Also to avoid priority inversion you should never wait for a job that has lower priority.
// So it prioritizes all jobs, then schedules all jobs and waits every job.
// IMPORTANT: Any load error will be rethrown, unless `no_throw` is set.
// Common usage pattern is:
// waitLoad(currentPoolOr(foreground_pool_id), tasks);
// Returns current execution pool if it is called from load job, or `pool` otherwise
// It should be used for waiting other load jobs in places that can be executed from load jobs
size_t currentPoolOr(size_t pool);
inline void scheduleLoad(AsyncLoader & loader, const LoadJobSet & jobs)
{
loader.schedule(jobs);
}
inline void scheduleLoad(const LoadTaskPtr & task)
{
task->schedule();
}
inline void scheduleLoad(const LoadTaskPtrs & tasks)
{
if (tasks.empty())
return;
// NOTE: it is assumed that all tasks use the same `AsyncLoader`
AsyncLoader & loader = tasks.front()->loader;
loader.schedule(tasks);
}
inline void waitLoad(AsyncLoader & loader, const LoadJobSet & jobs, bool no_throw = false)
{
scheduleLoad(loader, jobs);
for (const auto & job : jobs)
loader.wait(job, no_throw);
}
inline void waitLoad(const LoadTaskPtr & task, bool no_throw = false)
{
scheduleLoad(task);
waitLoad(task->loader, task->goals(), no_throw);
}
inline void waitLoad(const LoadTaskPtrs & tasks, bool no_throw = false)
{
scheduleLoad(tasks);
for (const auto & task : tasks)
waitLoad(task->loader, task->goals(), no_throw);
}
inline void prioritizeLoad(AsyncLoader & loader, size_t pool_id, const LoadJobSet & jobs)
{
for (const auto & job : jobs)
loader.prioritize(job, pool_id);
}
inline void prioritizeLoad(size_t pool_id, const LoadTaskPtr & task)
{
prioritizeLoad(task->loader, pool_id, task->goals());
}
inline void prioritizeLoad(size_t pool_id, const LoadTaskPtrs & tasks)
{
for (const auto & task : tasks)
prioritizeLoad(task->loader, pool_id, task->goals());
}
inline void waitLoad(AsyncLoader & loader, size_t pool_id, const LoadJobSet & jobs, bool no_throw = false)
{
prioritizeLoad(loader, pool_id, jobs);
waitLoad(loader, jobs, no_throw);
}
inline void waitLoad(size_t pool_id, const LoadTaskPtr & task, bool no_throw = false)
{
prioritizeLoad(task->loader, pool_id, task->goals());
waitLoad(task->loader, task->goals(), no_throw);
}
inline void waitLoad(size_t pool_id, const LoadTaskPtrs & tasks, bool no_throw = false)
{
prioritizeLoad(pool_id, tasks);
waitLoad(tasks, no_throw);
}
inline LoadJobSet getGoals(const LoadTaskPtrs & tasks, const LoadJobSet & alternative = {})
{
LoadJobSet result;
for (const auto & task : tasks)
result.insert(task->goals().begin(), task->goals().end());
return result.empty() ? alternative : result;
}
inline LoadJobSet joinJobs(const LoadJobSet & jobs1, const LoadJobSet & jobs2)
{
LoadJobSet result;
if (!jobs1.empty())
result.insert(jobs1.begin(), jobs1.end());
if (!jobs2.empty())
result.insert(jobs2.begin(), jobs2.end());
return result;
}
inline LoadTaskPtrs joinTasks(const LoadTaskPtrs & tasks1, const LoadTaskPtrs & tasks2)
{
if (tasks1.empty())
return tasks2;
if (tasks2.empty())
return tasks1;
LoadTaskPtrs result;
result.reserve(tasks1.size() + tasks2.size());
result.insert(result.end(), tasks1.begin(), tasks1.end());
result.insert(result.end(), tasks2.begin(), tasks2.end());
return result;
}
}

View File

@ -110,12 +110,12 @@
M(StorageHiveThreads, "Number of threads in the StorageHive thread pool.") \
M(StorageHiveThreadsActive, "Number of threads in the StorageHive thread pool running a task.") \
M(StorageHiveThreadsScheduled, "Number of queued or active jobs in the StorageHive thread pool.") \
M(TablesLoaderThreads, "Number of threads in the tables loader thread pool.") \
M(TablesLoaderThreadsActive, "Number of threads in the tables loader thread pool running a task.") \
M(TablesLoaderThreadsScheduled, "Number of queued or active jobs in the tables loader thread pool.") \
M(DatabaseOrdinaryThreads, "Number of threads in the Ordinary database thread pool.") \
M(DatabaseOrdinaryThreadsActive, "Number of threads in the Ordinary database thread pool running a task.") \
M(DatabaseOrdinaryThreadsScheduled, "Number of queued or active jobs in the Ordinary database thread pool.") \
M(TablesLoaderBackgroundThreads, "Number of threads in the tables loader background thread pool.") \
M(TablesLoaderBackgroundThreadsActive, "Number of threads in the tables loader background thread pool running a task.") \
M(TablesLoaderBackgroundThreadsScheduled, "Number of queued or active jobs in the tables loader background thread pool.") \
M(TablesLoaderForegroundThreads, "Number of threads in the tables loader foreground thread pool.") \
M(TablesLoaderForegroundThreadsActive, "Number of threads in the tables loader foreground thread pool running a task.") \
M(TablesLoaderForegroundThreadsScheduled, "Number of queued or active jobs in the tables loader foreground thread pool.") \
M(DatabaseOnDiskThreads, "Number of threads in the DatabaseOnDisk thread pool.") \
M(DatabaseOnDiskThreadsActive, "Number of threads in the DatabaseOnDisk thread pool running a task.") \
M(DatabaseOnDiskThreadsScheduled, "Number of queued or active jobs in the DatabaseOnDisk thread pool.") \

View File

@ -588,6 +588,7 @@
M(706, LIBSSH_ERROR) \
M(707, GCP_ERROR) \
M(708, ILLEGAL_STATISTIC) \
M(709, CANNOT_GET_REPLICATED_DATABASE_SNAPSHOT) \
\
M(999, KEEPER_EXCEPTION) \
M(1000, POCO_EXCEPTION) \

32
src/Common/PoolId.h Normal file
View File

@ -0,0 +1,32 @@
#pragma once
#include <Common/Priority.h>
namespace DB
{
/// Indices and priorities of `AsyncLoader` pools.
/// The most important difference from regular ThreadPools is priorities of pools:
/// * Pools that have different priorities do NOT run jobs simultaneously (with small exception due to dynamic prioritization).
/// * Pools with lower priority wait for all jobs in higher priority pools to be done.
/// Note that pools also have different configurable sizes not listed here. See `Context::getAsyncLoader()` for details.
/// WARNING: `*PoolId` values must be unique and sequential w/o gaps.
/// Used for executing load jobs that are waited for by queries or in case of synchronous table loading.
constexpr size_t TablesLoaderForegroundPoolId = 0;
constexpr Priority TablesLoaderForegroundPriority{0};
/// Has lower priority and is used by table load jobs.
constexpr size_t TablesLoaderBackgroundLoadPoolId = 1;
constexpr Priority TablesLoaderBackgroundLoadPriority{1};
/// Has even lower priority and is used by startup jobs.
/// NOTE: This pool is required to begin table startup only after all tables are loaded.
/// NOTE: Which is needed to prevent heavy merges/mutations from consuming all the resources, slowing table loading down.
constexpr size_t TablesLoaderBackgroundStartupPoolId = 2;
constexpr Priority TablesLoaderBackgroundStartupPriority{2};
}

View File

@ -444,8 +444,13 @@ The server successfully detected this situation and will download merged part fr
M(WaitPrefetchTaskMicroseconds, "Time spend waiting for prefetched reader") \
\
M(ThreadpoolReaderTaskMicroseconds, "Time spent getting the data in asynchronous reading") \
M(ThreadpoolReaderPrepareMicroseconds, "Time spent on preparation (e.g. call to reader seek() method)") \
M(ThreadpoolReaderReadBytes, "Bytes read from a threadpool task in asynchronous reading") \
M(ThreadpoolReaderSubmit, "Bytes read from a threadpool task in asynchronous reading") \
M(ThreadpoolReaderSubmitReadSynchronously, "How many times we haven't scheduled a task on the thread pool and read synchronously instead") \
M(ThreadpoolReaderSubmitReadSynchronouslyBytes, "How many bytes were read synchronously") \
M(ThreadpoolReaderSubmitReadSynchronouslyMicroseconds, "How much time we spent reading synchronously") \
M(AsynchronousReaderIgnoredBytes, "Number of bytes ignored during asynchronous reading") \
\
M(FileSegmentWaitReadBufferMicroseconds, "Metric per file segment. Time spend waiting for internal read buffer (includes cache waiting)") \
M(FileSegmentReadMicroseconds, "Metric per file segment. Time spend reading from file") \
@ -569,6 +574,8 @@ The server successfully detected this situation and will download merged part fr
\
M(ConnectionPoolIsFullMicroseconds, "Total time spent waiting for a slot in connection pool.") \
\
M(AsyncLoaderWaitMicroseconds, "Total time a query was waiting for async loader jobs.") \
\
M(LogTest, "Number of log messages with level Test") \
M(LogTrace, "Number of log messages with level Trace") \
M(LogDebug, "Number of log messages with level Debug") \

View File

@ -1,8 +1,12 @@
#include <boost/core/noncopyable.hpp>
#include <gtest/gtest.h>
#include <array>
#include <list>
#include <barrier>
#include <chrono>
#include <mutex>
#include <shared_mutex>
#include <stdexcept>
#include <string_view>
#include <vector>
@ -19,9 +23,9 @@ using namespace DB;
namespace CurrentMetrics
{
extern const Metric TablesLoaderThreads;
extern const Metric TablesLoaderThreadsActive;
extern const Metric TablesLoaderThreadsScheduled;
extern const Metric TablesLoaderBackgroundThreads;
extern const Metric TablesLoaderBackgroundThreadsActive;
extern const Metric TablesLoaderBackgroundThreadsScheduled;
}
namespace DB::ErrorCodes
@ -61,9 +65,9 @@ struct AsyncLoaderTest
{
result.push_back({
.name = fmt::format("Pool{}", pool_id),
.metric_threads = CurrentMetrics::TablesLoaderThreads,
.metric_active_threads = CurrentMetrics::TablesLoaderThreadsActive,
.metric_scheduled_threads = CurrentMetrics::TablesLoaderThreadsScheduled,
.metric_threads = CurrentMetrics::TablesLoaderBackgroundThreads,
.metric_active_threads = CurrentMetrics::TablesLoaderBackgroundThreadsActive,
.metric_scheduled_threads = CurrentMetrics::TablesLoaderBackgroundThreadsScheduled,
.max_threads = desc.max_threads,
.priority = desc.priority
});
@ -155,7 +159,7 @@ TEST(AsyncLoader, Smoke)
std::atomic<size_t> jobs_done{0};
std::atomic<size_t> low_priority_jobs_done{0};
auto job_func = [&] (const LoadJobPtr & self) {
auto job_func = [&] (AsyncLoader &, const LoadJobPtr & self) {
jobs_done++;
if (self->pool() == low_priority_pool)
low_priority_jobs_done++;
@ -172,13 +176,13 @@ TEST(AsyncLoader, Smoke)
auto job5 = makeLoadJob({ job3, job4 }, low_priority_pool, "job5", job_func);
task2->merge(t.schedule({ job5 }));
std::thread waiter_thread([=] { job5->wait(); });
std::thread waiter_thread([&t, job5] { t.loader.wait(job5); });
t.loader.start();
job3->wait();
t.loader.wait(job3);
t.loader.wait();
job4->wait();
t.loader.wait(job4);
waiter_thread.join();
@ -196,7 +200,7 @@ TEST(AsyncLoader, CycleDetection)
{
AsyncLoaderTest t;
auto job_func = [&] (const LoadJobPtr &) {};
auto job_func = [&] (AsyncLoader &, const LoadJobPtr &) {};
LoadJobPtr cycle_breaker; // To avoid memleak we introduce with a cycle
@ -241,7 +245,7 @@ TEST(AsyncLoader, CancelPendingJob)
{
AsyncLoaderTest t;
auto job_func = [&] (const LoadJobPtr &) {};
auto job_func = [&] (AsyncLoader &, const LoadJobPtr &) {};
auto job = makeLoadJob({}, "job", job_func);
auto task = t.schedule({ job });
@ -251,7 +255,7 @@ TEST(AsyncLoader, CancelPendingJob)
ASSERT_EQ(job->status(), LoadStatus::CANCELED);
try
{
job->wait();
t.loader.wait(job);
FAIL();
}
catch (Exception & e)
@ -264,7 +268,7 @@ TEST(AsyncLoader, CancelPendingTask)
{
AsyncLoaderTest t;
auto job_func = [&] (const LoadJobPtr &) {};
auto job_func = [&] (AsyncLoader &, const LoadJobPtr &) {};
auto job1 = makeLoadJob({}, "job1", job_func);
auto job2 = makeLoadJob({ job1 }, "job2", job_func);
@ -277,7 +281,7 @@ TEST(AsyncLoader, CancelPendingTask)
try
{
job1->wait();
t.loader.wait(job1);
FAIL();
}
catch (Exception & e)
@ -287,7 +291,7 @@ TEST(AsyncLoader, CancelPendingTask)
try
{
job2->wait();
t.loader.wait(job2);
FAIL();
}
catch (Exception & e)
@ -300,7 +304,7 @@ TEST(AsyncLoader, CancelPendingDependency)
{
AsyncLoaderTest t;
auto job_func = [&] (const LoadJobPtr &) {};
auto job_func = [&] (AsyncLoader &, const LoadJobPtr &) {};
auto job1 = makeLoadJob({}, "job1", job_func);
auto job2 = makeLoadJob({ job1 }, "job2", job_func);
@ -314,7 +318,7 @@ TEST(AsyncLoader, CancelPendingDependency)
try
{
job1->wait();
t.loader.wait(job1);
FAIL();
}
catch (Exception & e)
@ -324,7 +328,7 @@ TEST(AsyncLoader, CancelPendingDependency)
try
{
job2->wait();
t.loader.wait(job2);
FAIL();
}
catch (Exception & e)
@ -340,7 +344,7 @@ TEST(AsyncLoader, CancelExecutingJob)
std::barrier sync(2);
auto job_func = [&] (const LoadJobPtr &)
auto job_func = [&] (AsyncLoader &, const LoadJobPtr &)
{
sync.arrive_and_wait(); // (A) sync with main thread
sync.arrive_and_wait(); // (B) wait for waiter
@ -362,7 +366,7 @@ TEST(AsyncLoader, CancelExecutingJob)
canceler.join();
ASSERT_EQ(job->status(), LoadStatus::OK);
job->wait();
t.loader.wait(job);
}
TEST(AsyncLoader, CancelExecutingTask)
@ -371,19 +375,19 @@ TEST(AsyncLoader, CancelExecutingTask)
t.loader.start();
std::barrier sync(2);
auto blocker_job_func = [&] (const LoadJobPtr &)
auto blocker_job_func = [&] (AsyncLoader &, const LoadJobPtr &)
{
sync.arrive_and_wait(); // (A) sync with main thread
sync.arrive_and_wait(); // (B) wait for waiter
// signals (C)
};
auto job_to_cancel_func = [&] (const LoadJobPtr &)
auto job_to_cancel_func = [&] (AsyncLoader &, const LoadJobPtr &)
{
FAIL(); // this job should be canceled
};
auto job_to_succeed_func = [&] (const LoadJobPtr &)
auto job_to_succeed_func = [&] (AsyncLoader &, const LoadJobPtr &)
{
};
@ -430,7 +434,7 @@ TEST(AsyncLoader, DISABLED_JobFailure)
std::string error_message = "test job failure";
auto job_func = [&] (const LoadJobPtr &) {
auto job_func = [&] (AsyncLoader &, const LoadJobPtr &) {
throw std::runtime_error(error_message);
};
@ -442,7 +446,7 @@ TEST(AsyncLoader, DISABLED_JobFailure)
ASSERT_EQ(job->status(), LoadStatus::FAILED);
try
{
job->wait();
t.loader.wait(job);
FAIL();
}
catch (Exception & e)
@ -459,7 +463,7 @@ TEST(AsyncLoader, ScheduleJobWithFailedDependencies)
std::string_view error_message = "test job failure";
auto failed_job_func = [&] (const LoadJobPtr &) {
auto failed_job_func = [&] (AsyncLoader &, const LoadJobPtr &) {
throw Exception(ErrorCodes::ASYNC_LOAD_FAILED, "{}", error_message);
};
@ -468,7 +472,7 @@ TEST(AsyncLoader, ScheduleJobWithFailedDependencies)
t.loader.wait();
auto job_func = [&] (const LoadJobPtr &) {};
auto job_func = [&] (AsyncLoader &, const LoadJobPtr &) {};
auto job1 = makeLoadJob({ failed_job }, "job1", job_func);
auto job2 = makeLoadJob({ job1 }, "job2", job_func);
@ -480,7 +484,7 @@ TEST(AsyncLoader, ScheduleJobWithFailedDependencies)
ASSERT_EQ(job2->status(), LoadStatus::CANCELED);
try
{
job1->wait();
t.loader.wait(job1);
FAIL();
}
catch (Exception & e)
@ -490,7 +494,7 @@ TEST(AsyncLoader, ScheduleJobWithFailedDependencies)
}
try
{
job2->wait();
t.loader.wait(job2);
FAIL();
}
catch (Exception & e)
@ -504,14 +508,14 @@ TEST(AsyncLoader, ScheduleJobWithCanceledDependencies)
{
AsyncLoaderTest t;
auto canceled_job_func = [&] (const LoadJobPtr &) {};
auto canceled_job_func = [&] (AsyncLoader &, const LoadJobPtr &) {};
auto canceled_job = makeLoadJob({}, "canceled_job", canceled_job_func);
auto canceled_task = t.schedule({ canceled_job });
canceled_task->remove();
t.loader.start();
auto job_func = [&] (const LoadJobPtr &) {};
auto job_func = [&] (AsyncLoader &, const LoadJobPtr &) {};
auto job1 = makeLoadJob({ canceled_job }, "job1", job_func);
auto job2 = makeLoadJob({ job1 }, "job2", job_func);
auto task = t.schedule({ job1, job2 });
@ -522,7 +526,7 @@ TEST(AsyncLoader, ScheduleJobWithCanceledDependencies)
ASSERT_EQ(job2->status(), LoadStatus::CANCELED);
try
{
job1->wait();
t.loader.wait(job1);
FAIL();
}
catch (Exception & e)
@ -531,7 +535,7 @@ TEST(AsyncLoader, ScheduleJobWithCanceledDependencies)
}
try
{
job2->wait();
t.loader.wait(job2);
FAIL();
}
catch (Exception & e)
@ -550,7 +554,7 @@ TEST(AsyncLoader, TestConcurrency)
std::barrier sync(concurrency);
std::atomic<int> executing{0};
auto job_func = [&] (const LoadJobPtr &)
auto job_func = [&] (AsyncLoader &, const LoadJobPtr &)
{
executing++;
ASSERT_LE(executing, concurrency);
@ -577,7 +581,7 @@ TEST(AsyncLoader, TestOverload)
for (int concurrency = 4; concurrency <= 8; concurrency++)
{
auto job_func = [&] (const LoadJobPtr &)
auto job_func = [&] (AsyncLoader &, const LoadJobPtr &)
{
executing++;
t.randomSleepUs(100, 200, 100);
@ -613,7 +617,7 @@ TEST(AsyncLoader, StaticPriorities)
std::string schedule;
auto job_func = [&] (const LoadJobPtr & self)
auto job_func = [&] (AsyncLoader &, const LoadJobPtr & self)
{
schedule += fmt::format("{}{}", self->name, self->pool());
};
@ -656,18 +660,18 @@ TEST(AsyncLoader, SimplePrioritization)
std::atomic<int> executed{0}; // Number of previously executed jobs (to test execution order)
LoadJobPtr job_to_prioritize;
auto job_func_A_booster = [&] (const LoadJobPtr &)
auto job_func_A_booster = [&] (AsyncLoader &, const LoadJobPtr &)
{
ASSERT_EQ(executed++, 0);
t.loader.prioritize(job_to_prioritize, 2);
};
auto job_func_B_tester = [&] (const LoadJobPtr &)
auto job_func_B_tester = [&] (AsyncLoader &, const LoadJobPtr &)
{
ASSERT_EQ(executed++, 2);
};
auto job_func_C_boosted = [&] (const LoadJobPtr &)
auto job_func_C_boosted = [&] (AsyncLoader &, const LoadJobPtr &)
{
ASSERT_EQ(executed++, 1);
};
@ -680,7 +684,8 @@ TEST(AsyncLoader, SimplePrioritization)
job_to_prioritize = jobs[2]; // C
scheduleAndWaitLoadAll(task);
scheduleLoad(task);
waitLoad(task);
}
TEST(AsyncLoader, DynamicPriorities)
@ -714,7 +719,7 @@ TEST(AsyncLoader, DynamicPriorities)
UInt64 ready_seqno_D = 0;
UInt64 ready_seqno_E = 0;
auto job_func = [&] (const LoadJobPtr & self)
auto job_func = [&] (AsyncLoader &, const LoadJobPtr & self)
{
{
std::unique_lock lock{schedule_mutex};
@ -791,7 +796,7 @@ TEST(AsyncLoader, RandomIndependentTasks)
AsyncLoaderTest t(16);
t.loader.start();
auto job_func = [&] (const LoadJobPtr & self)
auto job_func = [&] (AsyncLoader &, const LoadJobPtr & self)
{
for (const auto & dep : self->dependencies)
ASSERT_EQ(dep->status(), LoadStatus::OK);
@ -818,7 +823,7 @@ TEST(AsyncLoader, RandomDependentTasks)
std::vector<LoadTaskPtr> tasks;
std::vector<LoadJobPtr> all_jobs;
auto job_func = [&] (const LoadJobPtr & self)
auto job_func = [&] (AsyncLoader &, const LoadJobPtr & self)
{
for (const auto & dep : self->dependencies)
ASSERT_EQ(dep->status(), LoadStatus::OK);
@ -860,7 +865,7 @@ TEST(AsyncLoader, SetMaxThreads)
syncs.push_back(std::make_unique<std::barrier<>>(max_threads + 1));
auto job_func = [&] (const LoadJobPtr &)
auto job_func = [&] (AsyncLoader &, const LoadJobPtr &)
{
int idx = sync_index;
if (idx < syncs.size())
@ -914,10 +919,11 @@ TEST(AsyncLoader, DynamicPools)
{
std::atomic<bool> boosted{false}; // Visible concurrency was increased
std::atomic<int> left{concurrency * jobs_in_chain / 2}; // Number of jobs to start before `prioritize()` call
std::shared_mutex prioritization_mutex; // To slow down job execution during prioritization to avoid race condition
LoadJobSet jobs_to_prioritize;
auto job_func = [&] (const LoadJobPtr & self)
auto job_func = [&] (AsyncLoader & loader, const LoadJobPtr & self)
{
auto pool_id = self->executionPool();
executing[pool_id]++;
@ -928,10 +934,12 @@ TEST(AsyncLoader, DynamicPools)
// Dynamic prioritization
if (--left == 0)
{
std::unique_lock lock{prioritization_mutex};
for (const auto & job : jobs_to_prioritize)
t.loader.prioritize(job, 1);
loader.prioritize(job, 1);
}
std::shared_lock lock{prioritization_mutex};
t.randomSleepUs(100, 200, 100);
ASSERT_LE(executing[pool_id], max_threads[pool_id]);
@ -941,9 +949,10 @@ TEST(AsyncLoader, DynamicPools)
std::vector<LoadTaskPtr> tasks;
tasks.reserve(concurrency);
for (int i = 0; i < concurrency; i++)
tasks.push_back(makeLoadTask(t.loader, t.chainJobSet(jobs_in_chain, job_func)));
tasks.push_back(makeLoadTask(t.loader, t.chainJobSet(jobs_in_chain, job_func, fmt::format("c{}-j", i))));
jobs_to_prioritize = getGoals(tasks); // All jobs
scheduleAndWaitLoadAll(tasks);
scheduleLoad(tasks);
waitLoad(tasks);
ASSERT_EQ(executing[0], 0);
ASSERT_EQ(executing[1], 0);
@ -952,3 +961,136 @@ TEST(AsyncLoader, DynamicPools)
}
}
TEST(AsyncLoader, SubJobs)
{
AsyncLoaderTest t(1);
t.loader.start();
// An example of component with an asynchronous loading interface
class MyComponent : boost::noncopyable {
public:
MyComponent(AsyncLoader & loader_, int jobs)
: loader(loader_)
, jobs_left(jobs)
{}
[[nodiscard]] LoadTaskPtr loadAsync()
{
auto job_func = [this] (AsyncLoader &, const LoadJobPtr &) {
auto sub_job_func = [this] (AsyncLoader &, const LoadJobPtr &) {
--jobs_left;
};
LoadJobSet jobs;
for (size_t j = 0; j < jobs_left; j++)
jobs.insert(makeLoadJob({}, fmt::format("sub job {}", j), sub_job_func));
waitLoad(makeLoadTask(loader, std::move(jobs)));
};
auto job = makeLoadJob({}, "main job", job_func);
return load_task = makeLoadTask(loader, { job });
}
bool isLoaded() const
{
return jobs_left == 0;
}
private:
AsyncLoader & loader;
std::atomic<int> jobs_left;
// It is a good practice to keep load task inside the component:
// 1) to make sure it outlives its load jobs;
// 2) to avoid removing load jobs from `system.async_loader` while we use the component
LoadTaskPtr load_task;
};
for (double jobs_per_thread : std::array{0.5, 1.0, 2.0})
{
for (size_t threads = 1; threads <= 32; threads *= 2)
{
t.loader.setMaxThreads(0, threads);
std::list<MyComponent> components;
LoadTaskPtrs tasks;
size_t size = static_cast<size_t>(jobs_per_thread * threads);
tasks.reserve(size);
for (size_t j = 0; j < size; j++)
{
components.emplace_back(t.loader, 5);
tasks.emplace_back(components.back().loadAsync());
}
waitLoad(tasks);
for (const auto & component: components)
ASSERT_TRUE(component.isLoaded());
}
}
}
TEST(AsyncLoader, RecursiveJob)
{
AsyncLoaderTest t(1);
t.loader.start();
// An example of component with an asynchronous loading interface (a complicated one)
class MyComponent : boost::noncopyable {
public:
MyComponent(AsyncLoader & loader_, int jobs)
: loader(loader_)
, jobs_left(jobs)
{}
[[nodiscard]] LoadTaskPtr loadAsync()
{
return load_task = loadAsyncImpl(jobs_left);
}
bool isLoaded() const
{
return jobs_left == 0;
}
private:
[[nodiscard]] LoadTaskPtr loadAsyncImpl(int id)
{
auto job_func = [this] (AsyncLoader &, const LoadJobPtr & self) {
jobFunction(self);
};
auto job = makeLoadJob({}, fmt::format("job{}", id), job_func);
auto task = makeLoadTask(loader, { job });
return task;
}
void jobFunction(const LoadJobPtr & self)
{
int next = --jobs_left;
if (next > 0)
waitLoad(self->pool(), loadAsyncImpl(next));
}
AsyncLoader & loader;
std::atomic<int> jobs_left;
// It is a good practice to keep load task inside the component:
// 1) to make sure it outlives its load jobs;
// 2) to avoid removing load jobs from `system.async_loader` while we use the component
LoadTaskPtr load_task;
};
for (double jobs_per_thread : std::array{0.5, 1.0, 2.0})
{
for (size_t threads = 1; threads <= 32; threads *= 2)
{
t.loader.setMaxThreads(0, threads);
std::list<MyComponent> components;
LoadTaskPtrs tasks;
size_t size = static_cast<size_t>(jobs_per_thread * threads);
tasks.reserve(size);
for (size_t j = 0; j < size; j++)
{
components.emplace_back(t.loader, 5);
tasks.emplace_back(components.back().loadAsync());
}
waitLoad(tasks);
for (const auto & component: components)
ASSERT_TRUE(component.isLoaded());
}
}
}

View File

@ -139,9 +139,9 @@ void DeflateQplJobHWPool::unLockJob(UInt32 index)
hw_job_ptr_locks[index].store(false);
}
//HardwareCodecDeflateQpl
HardwareCodecDeflateQpl::HardwareCodecDeflateQpl()
:log(&Poco::Logger::get("HardwareCodecDeflateQpl"))
HardwareCodecDeflateQpl::HardwareCodecDeflateQpl(SoftwareCodecDeflateQpl & sw_codec_)
: log(&Poco::Logger::get("HardwareCodecDeflateQpl"))
, sw_codec(sw_codec_)
{
}
@ -169,7 +169,7 @@ Int32 HardwareCodecDeflateQpl::doCompressData(const char * source, UInt32 source
UInt32 compressed_size = 0;
if (!(job_ptr = DeflateQplJobHWPool::instance().acquireJob(job_id)))
{
LOG_INFO(log, "DeflateQpl HW codec failed, falling back to SW codec.(Details: doCompressData->acquireJob fail, probably job pool exhausted)");
LOG_INFO(log, "DeflateQpl HW codec failed, falling back to SW codec. (Details: doCompressData->acquireJob fail, probably job pool exhausted)");
return RET_ERROR;
}
@ -189,7 +189,7 @@ Int32 HardwareCodecDeflateQpl::doCompressData(const char * source, UInt32 source
}
else
{
LOG_WARNING(log, "DeflateQpl HW codec failed, falling back to SW codec.(Details: doCompressData->qpl_execute_job with error code: {} - please refer to qpl_status in ./contrib/qpl/include/qpl/c_api/status.h)", static_cast<UInt32>(status));
LOG_WARNING(log, "DeflateQpl HW codec failed, falling back to SW codec. (Details: doCompressData->qpl_execute_job with error code: {} - please refer to qpl_status in ./contrib/qpl/include/qpl/c_api/status.h)", static_cast<UInt32>(status));
DeflateQplJobHWPool::instance().releaseJob(job_id);
return RET_ERROR;
}
@ -202,7 +202,7 @@ Int32 HardwareCodecDeflateQpl::doDecompressDataSynchronous(const char * source,
UInt32 decompressed_size = 0;
if (!(job_ptr = DeflateQplJobHWPool::instance().acquireJob(job_id)))
{
LOG_INFO(log, "DeflateQpl HW codec failed, falling back to SW codec.(Details: doDecompressDataSynchronous->acquireJob fail, probably job pool exhausted)");
LOG_INFO(log, "DeflateQpl HW codec failed, falling back to SW codec. (Details: doDecompressDataSynchronous->acquireJob fail, probably job pool exhausted)");
return RET_ERROR;
}
@ -214,17 +214,29 @@ Int32 HardwareCodecDeflateQpl::doDecompressDataSynchronous(const char * source,
job_ptr->available_out = uncompressed_size;
job_ptr->flags = QPL_FLAG_FIRST | QPL_FLAG_LAST;
if (auto status = qpl_submit_job(job_ptr); status != QPL_STS_OK)
auto status = qpl_submit_job(job_ptr);
if (status != QPL_STS_OK)
{
DeflateQplJobHWPool::instance().releaseJob(job_id);
LOG_WARNING(log, "DeflateQpl HW codec failed, falling back to SW codec.(Details: doDecompressDataSynchronous->qpl_execute_job with error code: {} - please refer to qpl_status in ./contrib/qpl/include/qpl/c_api/status.h)", static_cast<UInt32>(status));
LOG_WARNING(log, "DeflateQpl HW codec failed, falling back to SW codec. (Details: doDecompressDataSynchronous->qpl_submit_job with error code: {} - please refer to qpl_status in ./contrib/qpl/include/qpl/c_api/status.h)", static_cast<UInt32>(status));
return RET_ERROR;
}
/// Busy waiting till job complete.
UInt32 num_checks = 0;
do
{
_tpause(1, __rdtsc() + 1000);
} while (qpl_check_job(job_ptr) == QPL_STS_BEING_PROCESSED);
status = qpl_check_job(job_ptr);
++num_checks;
} while (status == QPL_STS_BEING_PROCESSED && num_checks < MAX_CHECKS);
if (status != QPL_STS_OK)
{
DeflateQplJobHWPool::instance().releaseJob(job_id);
LOG_WARNING(log, "DeflateQpl HW codec failed, falling back to SW codec. (Details: doDecompressDataSynchronous->qpl_submit_job with error code: {} - please refer to qpl_status in ./contrib/qpl/include/qpl/c_api/status.h)", static_cast<UInt32>(status));
return RET_ERROR;
}
decompressed_size = job_ptr->total_out;
DeflateQplJobHWPool::instance().releaseJob(job_id);
@ -237,7 +249,7 @@ Int32 HardwareCodecDeflateQpl::doDecompressDataAsynchronous(const char * source,
qpl_job * job_ptr = nullptr;
if (!(job_ptr = DeflateQplJobHWPool::instance().acquireJob(job_id)))
{
LOG_INFO(log, "DeflateQpl HW codec failed, falling back to SW codec.(Details: doDecompressDataAsynchronous->acquireJob fail, probably job pool exhausted)");
LOG_INFO(log, "DeflateQpl HW codec failed, falling back to SW codec. (Details: doDecompressDataAsynchronous->acquireJob fail, probably job pool exhausted)");
return RET_ERROR;
}
@ -257,7 +269,7 @@ Int32 HardwareCodecDeflateQpl::doDecompressDataAsynchronous(const char * source,
else
{
DeflateQplJobHWPool::instance().releaseJob(job_id);
LOG_WARNING(log, "DeflateQpl HW codec failed, falling back to SW codec.(Details: doDecompressDataAsynchronous->qpl_execute_job with error code: {} - please refer to qpl_status in ./contrib/qpl/include/qpl/c_api/status.h)", static_cast<UInt32>(status));
LOG_WARNING(log, "DeflateQpl HW codec failed, falling back to SW codec. (Details: doDecompressDataAsynchronous->qpl_submit_job with error code: {} - please refer to qpl_status in ./contrib/qpl/include/qpl/c_api/status.h)", static_cast<UInt32>(status));
return RET_ERROR;
}
}
@ -266,6 +278,7 @@ void HardwareCodecDeflateQpl::flushAsynchronousDecompressRequests()
{
auto n_jobs_processing = decomp_async_job_map.size();
std::map<UInt32, qpl_job *>::iterator it = decomp_async_job_map.begin();
UInt32 num_checks = 0;
while (n_jobs_processing)
{
@ -274,22 +287,34 @@ void HardwareCodecDeflateQpl::flushAsynchronousDecompressRequests()
job_id = it->first;
job_ptr = it->second;
if (qpl_check_job(job_ptr) == QPL_STS_BEING_PROCESSED)
auto status = qpl_check_job(job_ptr);
if ((status == QPL_STS_BEING_PROCESSED) && (num_checks < MAX_CHECKS))
{
it++;
}
else
{
if (status != QPL_STS_OK)
{
sw_codec.doDecompressData(
reinterpret_cast<const char * >(job_ptr->next_in_ptr),
job_ptr->available_in,
reinterpret_cast<char *>(job_ptr->next_out_ptr),
job_ptr->available_out);
LOG_WARNING(log, "DeflateQpl HW codec failed, falling back to SW codec. (Details: flushAsynchronousDecompressRequests with error code: {} - please refer to qpl_status in ./contrib/qpl/include/qpl/c_api/status.h)", static_cast<UInt32>(status));
}
it = decomp_async_job_map.erase(it);
DeflateQplJobHWPool::instance().releaseJob(job_id);
n_jobs_processing--;
if (n_jobs_processing <= 0)
break;
}
if (it == decomp_async_job_map.end())
{
it = decomp_async_job_map.begin();
_tpause(1, __rdtsc() + 1000);
++num_checks;
}
}
}
@ -364,8 +389,8 @@ void SoftwareCodecDeflateQpl::doDecompressData(const char * source, UInt32 sourc
}
CompressionCodecDeflateQpl::CompressionCodecDeflateQpl()
: hw_codec(std::make_unique<HardwareCodecDeflateQpl>())
, sw_codec(std::make_unique<SoftwareCodecDeflateQpl>())
: sw_codec(std::make_unique<SoftwareCodecDeflateQpl>())
, hw_codec(std::make_unique<HardwareCodecDeflateQpl>(*sw_codec))
{
setCodecDescription("DEFLATE_QPL");
}

View File

@ -65,8 +65,10 @@ class HardwareCodecDeflateQpl
public:
/// RET_ERROR stands for hardware codec fail, needs fallback to software codec.
static constexpr Int32 RET_ERROR = -1;
/// Maximum times to check if hardware job complete, otherwise fallback to software codec.
static constexpr UInt32 MAX_CHECKS = UINT16_MAX;
HardwareCodecDeflateQpl();
HardwareCodecDeflateQpl(SoftwareCodecDeflateQpl & sw_codec_);
~HardwareCodecDeflateQpl();
Int32 doCompressData(const char * source, UInt32 source_size, char * dest, UInt32 dest_size) const;
@ -87,6 +89,8 @@ private:
/// For flush, pop out job ID && job object from this map. Use job ID to release job lock and use job object to check job status till complete.
std::map<UInt32, qpl_job *> decomp_async_job_map;
Poco::Logger * log;
/// Provides a fallback in case of errors.
SoftwareCodecDeflateQpl & sw_codec;
};
class CompressionCodecDeflateQpl final : public ICompressionCodec
@ -110,8 +114,8 @@ protected:
private:
UInt32 getMaxCompressedDataSize(UInt32 uncompressed_size) const override;
std::unique_ptr<HardwareCodecDeflateQpl> hw_codec;
std::unique_ptr<SoftwareCodecDeflateQpl> sw_codec;
std::unique_ptr<HardwareCodecDeflateQpl> hw_codec;
};
}

View File

@ -101,6 +101,7 @@ void KeeperSnapshotManagerS3::updateS3Configuration(const Poco::Util::AbstractCo
auto client = S3::ClientFactory::instance().create(
client_configuration,
new_uri.is_virtual_hosted_style,
/* disable_checksum= */ false,
credentials.GetAWSAccessKeyId(),
credentials.GetAWSSecretKey(),
auth_settings.server_side_encryption_customer_key_base64,

157
src/Core/PlainRanges.cpp Normal file
View File

@ -0,0 +1,157 @@
#include <Core/PlainRanges.h>
namespace DB
{
PlainRanges::PlainRanges(const Range & range)
{
ranges.push_back(range);
}
PlainRanges::PlainRanges(const Ranges & ranges_, bool may_have_intersection, bool ordered)
{
if (may_have_intersection)
ranges = ordered ? makePlainFromOrdered(ranges_) : makePlainFromUnordered(ranges_);
else
ranges = ranges_;
}
Ranges PlainRanges::makePlainFromOrdered(const Ranges & ranges_)
{
if (ranges_.size() <= 1)
return ranges_;
Ranges ret{ranges_.front()};
for (size_t i = 1; i < ranges_.size(); ++i)
{
const auto & cur = ranges_[i];
if (ret.back().intersectsRange(cur))
ret.back() = *ret.back().unionWith(cur);
else
ret.push_back(cur);
}
return ret;
}
Ranges PlainRanges::makePlainFromUnordered(Ranges ranges_)
{
if (ranges_.size() <= 1)
return ranges_;
std::sort(ranges_.begin(), ranges_.end(), compareByLeftBound);
return makePlainFromOrdered(ranges_);
}
PlainRanges PlainRanges::unionWith(const PlainRanges & other)
{
auto left_itr = ranges.begin();
auto right_itr = other.ranges.begin();
Ranges new_range;
for (; left_itr != ranges.end() && right_itr != other.ranges.end();)
{
if (left_itr->leftThan(*right_itr))
{
new_range.push_back(*left_itr);
left_itr++;
}
else if (left_itr->rightThan(*right_itr))
{
new_range.push_back(*right_itr);
right_itr++;
}
else /// union
{
new_range.emplace_back(*(left_itr->unionWith(*right_itr)));
if (compareByRightBound(*left_itr, *right_itr))
left_itr++;
else
right_itr++;
}
}
while (left_itr != ranges.end())
{
new_range.push_back(*left_itr);
left_itr++;
}
while (right_itr != other.ranges.end())
{
new_range.push_back(*right_itr);
right_itr++;
}
/// After union two PlainRanges, new ranges may like: [1, 4], [2, 5]
/// We must make them plain.
return PlainRanges(makePlainFromOrdered(new_range));
}
PlainRanges PlainRanges::intersectWith(const PlainRanges & other)
{
auto left_itr = ranges.begin();
auto right_itr = other.ranges.begin();
Ranges new_ranges;
for (; left_itr != ranges.end() && right_itr != other.ranges.end();)
{
if (left_itr->leftThan(*right_itr))
{
left_itr++;
}
else if (left_itr->rightThan(*right_itr))
{
right_itr++;
}
else /// intersection
{
auto intersected = left_itr->intersectWith(*right_itr);
if (intersected) /// skip blank range
new_ranges.emplace_back(*intersected);
if (compareByRightBound(*left_itr, *right_itr))
left_itr++;
else
right_itr++;
}
}
return PlainRanges(new_ranges);
}
bool PlainRanges::compareByLeftBound(const Range & lhs, const Range & rhs)
{
if (lhs.left == NEGATIVE_INFINITY && rhs.left == NEGATIVE_INFINITY)
return false;
return Range::less(lhs.left, rhs.left) || ((!lhs.left_included && rhs.left_included) && Range::equals(lhs.left, rhs.left));
};
bool PlainRanges::compareByRightBound(const Range & lhs, const Range & rhs)
{
if (lhs.right == POSITIVE_INFINITY && rhs.right == POSITIVE_INFINITY)
return false;
return Range::less(lhs.right, rhs.right) || ((!lhs.right_included && rhs.right_included) && Range::equals(lhs.right, rhs.right));
};
std::vector<Ranges> PlainRanges::invert(const Ranges & to_invert_ranges)
{
/// invert a blank ranges
if (to_invert_ranges.empty())
return {makeUniverse().ranges};
std::vector<Ranges> reverted_ranges;
for (const auto & range : to_invert_ranges)
{
if (range.isInfinite())
/// return a blank ranges
return {{}};
reverted_ranges.push_back(range.invertRange());
}
return reverted_ranges;
};
}

46
src/Core/PlainRanges.h Normal file
View File

@ -0,0 +1,46 @@
#pragma once
#include <Core/Range.h>
namespace DB
{
/** A plain ranges is a series of ranges who
* 1. have no intersection in any two of the ranges
* 2. ordered by left side
* 3. does not contain blank range
*
* Example:
* query: (k > 1 and key < 5) or (k > 3 and k < 10) or key in (2, 12)
* original ranges: (1, 5), (3, 10), [2, 2], [12, 12]
* plain ranges: (1, 10), [12, 12]
*
* If it is blank, ranges is empty.
*/
struct PlainRanges
{
Ranges ranges;
explicit PlainRanges(const Range & range);
explicit PlainRanges(const Ranges & ranges_, bool may_have_intersection = false, bool ordered = true);
PlainRanges unionWith(const PlainRanges & other);
PlainRanges intersectWith(const PlainRanges & other);
/// Union ranges and return a new plain(ordered and no intersection) ranges.
/// Example:
/// [1, 3], [2, 4], [6, 8] -> [1, 4], [6, 8]
/// [1, 3], [2, 4], (4, 5] -> [1, 4], [5, 5]
static Ranges makePlainFromUnordered(Ranges ranges_);
static Ranges makePlainFromOrdered(const Ranges & ranges_);
static bool compareByLeftBound(const Range & lhs, const Range & rhs);
static bool compareByRightBound(const Range & lhs, const Range & rhs);
static std::vector<Ranges> invert(const Ranges & to_invert_ranges);
static PlainRanges makeBlank() { return PlainRanges({}); }
static PlainRanges makeUniverse() { return PlainRanges({Range::createWholeUniverseWithoutNull()}); }
};
}

View File

@ -123,6 +123,27 @@ bool Range::leftThan(const FieldRef & x) const
return less(x, right) || (right_included && equals(x, right));
}
bool Range::rightThan(const Range & x) const
{
return less(x.right, left) || (!(left_included && x.right_included) && equals(left, x.right));
}
bool Range::leftThan(const Range & x) const
{
return less(right, x.left) || (!(x.left_included && right_included) && equals(right, x.left));
}
bool Range::fullBounded() const
{
return left.getType() != Field::Types::Null && right.getType() != Field::Types::Null;
}
/// (-inf, +inf)
bool Range::isInfinite() const
{
return left.isNegativeInfinity() && right.isPositiveInfinity();
}
bool Range::intersectsRange(const Range & r) const
{
/// r to the left of me.
@ -159,6 +180,95 @@ void Range::invert()
std::swap(left_included, right_included);
}
Ranges Range::invertRange() const
{
Ranges ranges;
/// For full bounded range will generate two ranges.
if (fullBounded()) /// case: [1, 3] -> (-inf, 1), (3, +inf)
{
ranges.push_back({NEGATIVE_INFINITY, false, left, !left_included});
ranges.push_back({right, !right_included, POSITIVE_INFINITY, false});
}
else if (isInfinite())
{
/// blank ranges
}
else /// case: (-inf, 1] or [1, +inf)
{
Range r = *this;
std::swap(r.left, r.right);
if (r.left.isPositiveInfinity()) /// [1, +inf)
{
r.left = NEGATIVE_INFINITY;
r.right_included = !r.left_included;
r.left_included = false;
}
else if (r.right.isNegativeInfinity()) /// (-inf, 1]
{
r.right = POSITIVE_INFINITY;
r.left_included = !r.right_included;
r.right_included = false;
}
ranges.push_back(r);
}
return ranges;
}
std::optional<Range> Range::intersectWith(const Range & r) const
{
if (!intersectsRange(r))
return {};
bool left_bound_use_mine = true;
bool right_bound_use_mine = true;
if (less(left, r.left) || ((!left_included && r.left_included) && equals(left, r.left)))
left_bound_use_mine = false;
if (less(r.right, right) || ((!r.right_included && right_included) && equals(r.right, right)))
right_bound_use_mine = false;
return Range(
left_bound_use_mine ? left : r.left,
left_bound_use_mine ? left_included : r.left_included,
right_bound_use_mine ? right : r.right,
right_bound_use_mine ? right_included : r.right_included);
}
std::optional<Range> Range::unionWith(const Range & r) const
{
if (!intersectsRange(r) && !nearByWith(r))
return {};
bool left_bound_use_mine = false;
bool right_bound_use_mine = false;
if (less(left, r.left) || ((!left_included && r.left_included) && equals(left, r.left)))
left_bound_use_mine = true;
if (less(r.right, right) || ((!r.right_included && right_included) && equals(r.right, right)))
right_bound_use_mine = true;
return Range(
left_bound_use_mine ? left : r.left,
left_bound_use_mine ? left_included : r.left_included,
right_bound_use_mine ? right : r.right,
right_bound_use_mine ? right_included : r.right_included);
}
bool Range::nearByWith(const Range & r) const
{
/// me locates at left
if (((right_included && !r.left_included) || (!right_included && r.left_included)) && equals(right, r.left))
return true;
/// r locate left
if (((r.right_included && !left_included) || (r.right_included && !left_included)) && equals(r.right, left))
return true;
return false;
}
Range intersect(const Range & a, const Range & b)
{
Range res = Range::createWholeUniverse();

View File

@ -38,6 +38,13 @@ struct FieldRef : public Field
size_t column_idx = 0;
};
/** Range with open or closed ends; possibly unbounded.
*/
struct Range;
/** A serious of range who can overlap or non-overlap.
*/
using Ranges = std::vector<Range>;
/** Range with open or closed ends; possibly unbounded.
*/
struct Range
@ -79,12 +86,37 @@ public:
/// x is to the right
bool leftThan(const FieldRef & x) const;
/// completely right than x
bool rightThan(const Range & x) const;
/// completely left than x
bool leftThan(const Range & x) const;
/// range like [1, 2]
bool fullBounded() const;
/// (-inf, +inf)
bool isInfinite() const;
bool isBlank() const;
bool intersectsRange(const Range & r) const;
bool containsRange(const Range & r) const;
/// Invert left and right
void invert();
/// Invert the range.
/// Example:
/// [1, 3] -> (-inf, 1), (3, +inf)
Ranges invertRange() const;
std::optional<Range> intersectWith(const Range & r) const;
std::optional<Range> unionWith(const Range & r) const;
/// If near by r, they can be combined to a continuous range.
/// TODO If field is integer, case like [2, 3], [4, 5] is excluded.
bool nearByWith(const Range & r) const;
String toString() const;
};

View File

@ -92,14 +92,15 @@ namespace DB
M(UInt64, background_schedule_pool_size, 512, "The maximum number of threads that will be used for constantly executing some lightweight periodic operations.", 0) \
M(UInt64, background_message_broker_schedule_pool_size, 16, "The maximum number of threads that will be used for executing background operations for message streaming.", 0) \
M(UInt64, background_distributed_schedule_pool_size, 16, "The maximum number of threads that will be used for executing distributed sends.", 0) \
M(UInt64, tables_loader_foreground_pool_size, 0, "The maximum number of threads that will be used for foreground (that is being waited for by a query) loading of tables. Also used for synchronous loading of tables before the server start. Zero means use all CPUs.", 0) \
M(UInt64, tables_loader_background_pool_size, 0, "The maximum number of threads that will be used for background async loading of tables. Zero means use all CPUs.", 0) \
M(Bool, async_load_databases, false, "Enable asynchronous loading of databases and tables to speedup server startup. Queries to not yet loaded entity will be blocked until load is finished.", 0) \
M(Bool, display_secrets_in_show_and_select, false, "Allow showing secrets in SHOW and SELECT queries via a format setting and a grant", 0) \
\
M(UInt64, total_memory_profiler_step, 0, "Whenever server memory usage becomes larger than every next step in number of bytes the memory profiler will collect the allocating stack trace. Zero means disabled memory profiler. Values lower than a few megabytes will slow down server.", 0) \
M(Double, total_memory_tracker_sample_probability, 0, "Collect random allocations and deallocations and write them into system.trace_log with 'MemorySample' trace_type. The probability is for every alloc/free regardless to the size of the allocation (can be changed with `memory_profiler_sample_min_allocation_size` and `memory_profiler_sample_max_allocation_size`). Note that sampling happens only when the amount of untracked memory exceeds 'max_untracked_memory'. You may want to set 'max_untracked_memory' to 0 for extra fine grained sampling.", 0) \
M(UInt64, total_memory_profiler_sample_min_allocation_size, 0, "Collect random allocations of size greater or equal than specified value with probability equal to `total_memory_profiler_sample_probability`. 0 means disabled. You may want to set 'max_untracked_memory' to 0 to make this threshold to work as expected.", 0) \
M(UInt64, total_memory_profiler_sample_max_allocation_size, 0, "Collect random allocations of size less or equal than specified value with probability equal to `total_memory_profiler_sample_probability`. 0 means disabled. You may want to set 'max_untracked_memory' to 0 to make this threshold to work as expected.", 0) \
M(String, get_client_http_header_forbidden_headers, "", "Comma separated list of http header names that will not be returned by function getClientHTTPHeader.", 0) \
M(Bool, allow_get_client_http_header, false, "Allow function getClientHTTPHeader", 0) \
M(Bool, validate_tcp_client_information, false, "Validate client_information in the query packet over the native TCP protocol.", 0) \
M(Bool, storage_metadata_write_full_object_key, false, "Write disk metadata files with VERSION_FULL_OBJECT_KEY format", 0) \

View File

@ -104,9 +104,10 @@ class IColumn;
M(Bool, s3_check_objects_after_upload, false, "Check each uploaded object to s3 with head request to be sure that upload was successful", 0) \
M(Bool, s3_allow_parallel_part_upload, true, "Use multiple threads for s3 multipart upload. It may lead to slightly higher memory usage", 0) \
M(Bool, s3_throw_on_zero_files_match, false, "Throw an error, when ListObjects request cannot match any files", 0) \
M(Bool, s3_disable_checksum, false, "Do not calculate a checksum when sending a file to S3. This speeds up writes by avoiding excessive processing passes on a file. It is mostly safe as the data of MergeTree tables is checksummed by ClickHouse anyway, and when S3 is accessed with HTTPS, the TLS layer already provides integrity while transferring through the network. While additional checksums on S3 give defense in depth.", 0) \
M(UInt64, s3_retry_attempts, 100, "Setting for Aws::Client::RetryStrategy, Aws::Client does retries itself, 0 means no retries", 0) \
M(UInt64, s3_request_timeout_ms, 30000, "Idleness timeout for sending and receiving data to/from S3. Fail if a single TCP read or write call blocks for this long.", 0) \
M(UInt64, s3_http_connection_pool_size, 1000, "How many reusable open connections to keep per S3 endpoint. Only applies to the S3 table engine and table function, not to S3 disks (for disks, use disk config instead). Global setting, can only be set in config, overriding it per session or per query has no effect.", 0) \
M(UInt64, s3_http_connection_pool_size, 1000, "How many reusable open connections to keep per S3 endpoint. This only applies to the S3 table engine and table function, not to S3 disks (for disks, use disk config instead). Global setting, can only be set in config, overriding it per session or per query has no effect.", 0) \
M(Bool, enable_s3_requests_logging, false, "Enable very explicit logging of S3 requests. Makes sense for debug only.", 0) \
M(String, s3queue_default_zookeeper_path, "/clickhouse/s3queue/", "Default zookeeper path prefix for S3Queue engine", 0) \
M(Bool, s3queue_enable_logging_to_s3queue_log, false, "Enable writing to system.s3queue_log. The value can be overwritten per table with table settings", 0) \
@ -122,10 +123,10 @@ class IColumn;
M(UInt64, max_remote_write_network_bandwidth, 0, "The maximum speed of data exchange over the network in bytes per second for write.", 0) \
M(UInt64, max_local_read_bandwidth, 0, "The maximum speed of local reads in bytes per second.", 0) \
M(UInt64, max_local_write_bandwidth, 0, "The maximum speed of local writes in bytes per second.", 0) \
M(Bool, stream_like_engine_allow_direct_select, false, "Allow direct SELECT query for Kafka, RabbitMQ, FileLog, Redis Streams and NATS engines. In case there are attached materialized views, SELECT query is not allowed even if this setting is enabled.", 0) \
M(Bool, stream_like_engine_allow_direct_select, false, "Allow direct SELECT query for Kafka, RabbitMQ, FileLog, Redis Streams, and NATS engines. In case there are attached materialized views, SELECT query is not allowed even if this setting is enabled.", 0) \
M(String, stream_like_engine_insert_queue, "", "When stream like engine reads from multiple queues, user will need to select one queue to insert into when writing. Used by Redis Streams and NATS.", 0) \
\
M(Bool, distributed_foreground_insert, false, "If setting is enabled, insert query into distributed waits until data will be sent to all nodes in cluster. \n\nEnables or disables synchronous data insertion into a `Distributed` table.\n\nBy default, when inserting data into a Distributed table, the ClickHouse server sends data to cluster nodes in background. When `distributed_foreground_insert` = 1, the data is processed synchronously, and the `INSERT` operation succeeds only after all the data is saved on all shards (at least one replica for each shard if `internal_replication` is true).", 0) ALIAS(insert_distributed_sync) \
M(Bool, distributed_foreground_insert, false, "If setting is enabled, insert query into distributed waits until data are sent to all nodes in a cluster. \n\nEnables or disables synchronous data insertion into a `Distributed` table.\n\nBy default, when inserting data into a Distributed table, the ClickHouse server sends data to cluster nodes in the background. When `distributed_foreground_insert` = 1, the data is processed synchronously, and the `INSERT` operation succeeds only after all the data is saved on all shards (at least one replica for each shard if `internal_replication` is true).", 0) ALIAS(insert_distributed_sync) \
M(UInt64, distributed_background_insert_timeout, 0, "Timeout for insert query into distributed. Setting is used only with insert_distributed_sync enabled. Zero value means no timeout.", 0) ALIAS(insert_distributed_timeout) \
M(Milliseconds, distributed_background_insert_sleep_time_ms, 100, "Sleep time for background INSERTs into Distributed, in case of any errors delay grows exponentially.", 0) ALIAS(distributed_directory_monitor_sleep_time_ms) \
M(Milliseconds, distributed_background_insert_max_sleep_time_ms, 30000, "Maximum sleep time for background INSERTs into Distributed, it limits exponential growth too.", 0) ALIAS(distributed_directory_monitor_max_sleep_time_ms) \
@ -575,7 +576,6 @@ class IColumn;
M(Bool, optimize_substitute_columns, false, "Use constraints for column substitution", 0) \
M(Bool, optimize_append_index, false, "Use constraints in order to append index condition (indexHint)", 0) \
M(Bool, normalize_function_names, true, "Normalize function names to their canonical names", 0) \
M(Bool, allow_experimental_alter_materialized_view_structure, false, "Allow atomic alter on Materialized views. Work in progress.", 0) \
M(Bool, enable_early_constant_folding, true, "Enable query optimization where we analyze function and subqueries results and rewrite query if there're constants there", 0) \
M(Bool, deduplicate_blocks_in_dependent_materialized_views, false, "Should deduplicate blocks for materialized views if the block is not a duplicate for the table. Use true to always deduplicate in dependent tables.", 0) \
M(Bool, materialized_views_ignore_errors, false, "Allows to ignore errors for MATERIALIZED VIEW, and deliver original block to the table regardless of MVs", 0) \
@ -750,7 +750,7 @@ class IColumn;
M(UInt64, prefetch_buffer_size, DBMS_DEFAULT_BUFFER_SIZE, "The maximum size of the prefetch buffer to read from the filesystem.", 0) \
M(UInt64, filesystem_prefetch_step_bytes, 0, "Prefetch step in bytes. Zero means `auto` - approximately the best prefetch step will be auto deduced, but might not be 100% the best. The actual value might be different because of setting filesystem_prefetch_min_bytes_for_single_read_task", 0) \
M(UInt64, filesystem_prefetch_step_marks, 0, "Prefetch step in marks. Zero means `auto` - approximately the best prefetch step will be auto deduced, but might not be 100% the best. The actual value might be different because of setting filesystem_prefetch_min_bytes_for_single_read_task", 0) \
M(UInt64, filesystem_prefetch_min_bytes_for_single_read_task, "8Mi", "Do not parallelize within one file read less than this amount of bytes. E.g. one reader will not receive a read task of size less than this amount. This setting is recommended to avoid spikes of time for aws getObject requests to aws", 0) \
M(UInt64, filesystem_prefetch_min_bytes_for_single_read_task, "2Mi", "Do not parallelize within one file read less than this amount of bytes. E.g. one reader will not receive a read task of size less than this amount. This setting is recommended to avoid spikes of time for aws getObject requests to aws", 0) \
M(UInt64, filesystem_prefetch_max_memory_usage, "1Gi", "Maximum memory usage for prefetches.", 0) \
M(UInt64, filesystem_prefetches_limit, 200, "Maximum number of prefetches. Zero means unlimited. A setting `filesystem_prefetches_max_memory_usage` is more recommended if you want to limit the number of prefetches", 0) \
\
@ -856,6 +856,7 @@ class IColumn;
MAKE_OBSOLETE(M, Bool, allow_experimental_window_functions, true) \
MAKE_OBSOLETE(M, Bool, allow_experimental_geo_types, true) \
MAKE_OBSOLETE(M, Bool, allow_experimental_query_cache, true) \
MAKE_OBSOLETE(M, Bool, allow_experimental_alter_materialized_view_structure, true) \
\
MAKE_OBSOLETE(M, Milliseconds, async_insert_stale_timeout_ms, 0) \
MAKE_OBSOLETE(M, StreamingHandleErrorMode, handle_kafka_error_mode, StreamingHandleErrorMode::DEFAULT) \

Some files were not shown because too many files have changed in this diff Show More