Merge branch 'master' into feature/more_warnings

This commit is contained in:
凌涛 2023-12-05 11:50:05 +08:00 committed by GitHub
commit 9e05e79d66
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
360 changed files with 8399 additions and 2938 deletions

View File

@ -532,6 +532,11 @@ jobs:
run_command: | run_command: |
cd "$REPO_COPY/tests/ci" cd "$REPO_COPY/tests/ci"
mkdir -p "${REPORTS_PATH}/integration"
mkdir -p "${REPORTS_PATH}/stateless"
cp -r ${REPORTS_PATH}/changed_images* ${REPORTS_PATH}/integration
cp -r ${REPORTS_PATH}/changed_images* ${REPORTS_PATH}/stateless
TEMP_PATH="${TEMP_PATH}/integration" \ TEMP_PATH="${TEMP_PATH}/integration" \
REPORTS_PATH="${REPORTS_PATH}/integration" \ REPORTS_PATH="${REPORTS_PATH}/integration" \
python3 integration_test_check.py "Integration $CHECK_NAME" \ python3 integration_test_check.py "Integration $CHECK_NAME" \

3
.gitmodules vendored
View File

@ -357,3 +357,6 @@
[submodule "contrib/pocketfft"] [submodule "contrib/pocketfft"]
path = contrib/pocketfft path = contrib/pocketfft
url = https://github.com/mreineck/pocketfft.git url = https://github.com/mreineck/pocketfft.git
[submodule "contrib/sqids-cpp"]
path = contrib/sqids-cpp
url = https://github.com/sqids/sqids-cpp.git

View File

@ -35,6 +35,7 @@ curl https://clickhouse.com/ | sh
* [**ClickHouse Meetup in Berlin**](https://www.meetup.com/clickhouse-berlin-user-group/events/296488501/) - Nov 30 * [**ClickHouse Meetup in Berlin**](https://www.meetup.com/clickhouse-berlin-user-group/events/296488501/) - Nov 30
* [**ClickHouse Meetup in NYC**](https://www.meetup.com/clickhouse-new-york-user-group/events/296488779/) - Dec 11 * [**ClickHouse Meetup in NYC**](https://www.meetup.com/clickhouse-new-york-user-group/events/296488779/) - Dec 11
* [**ClickHouse Meetup in Sydney**](https://www.meetup.com/clickhouse-sydney-user-group/events/297638812/) - Dec 12
* [**ClickHouse Meetup in Boston**](https://www.meetup.com/clickhouse-boston-user-group/events/296488840/) - Dec 12 * [**ClickHouse Meetup in Boston**](https://www.meetup.com/clickhouse-boston-user-group/events/296488840/) - Dec 12
Also, keep an eye out for upcoming meetups around the world. Somewhere else you want us to be? Please feel free to reach out to tyler <at> clickhouse <dot> com. Also, keep an eye out for upcoming meetups around the world. Somewhere else you want us to be? Please feel free to reach out to tyler <at> clickhouse <dot> com.

View File

@ -156,6 +156,7 @@ add_contrib (nuraft-cmake NuRaft)
add_contrib (fast_float-cmake fast_float) add_contrib (fast_float-cmake fast_float)
add_contrib (datasketches-cpp-cmake datasketches-cpp) add_contrib (datasketches-cpp-cmake datasketches-cpp)
add_contrib (incbin-cmake incbin) add_contrib (incbin-cmake incbin)
add_contrib (sqids-cpp-cmake sqids-cpp)
option(ENABLE_NLP "Enable NLP functions support" ${ENABLE_LIBRARIES}) option(ENABLE_NLP "Enable NLP functions support" ${ENABLE_LIBRARIES})
if (ENABLE_NLP) if (ENABLE_NLP)

View File

@ -33,7 +33,7 @@ target_include_directories(cxxabi SYSTEM BEFORE
PRIVATE $<BUILD_INTERFACE:${LIBCXXABI_SOURCE_DIR}/../libcxx/include> PRIVATE $<BUILD_INTERFACE:${LIBCXXABI_SOURCE_DIR}/../libcxx/include>
PRIVATE $<BUILD_INTERFACE:${LIBCXXABI_SOURCE_DIR}/../libcxx/src> PRIVATE $<BUILD_INTERFACE:${LIBCXXABI_SOURCE_DIR}/../libcxx/src>
) )
target_compile_definitions(cxxabi PRIVATE -D_LIBCPP_BUILDING_LIBRARY) target_compile_definitions(cxxabi PRIVATE -D_LIBCPP_BUILDING_LIBRARY -DHAS_THREAD_LOCAL)
target_compile_options(cxxabi PRIVATE -nostdinc++ -fno-sanitize=undefined -Wno-macro-redefined) # If we don't disable UBSan, infinite recursion happens in dynamic_cast. target_compile_options(cxxabi PRIVATE -nostdinc++ -fno-sanitize=undefined -Wno-macro-redefined) # If we don't disable UBSan, infinite recursion happens in dynamic_cast.
target_link_libraries(cxxabi PUBLIC unwind) target_link_libraries(cxxabi PUBLIC unwind)

2
contrib/qpl vendored

@ -1 +1 @@
Subproject commit faaf19350459c076e66bb5df11743c3fade59b73 Subproject commit a61bdd845fd7ca363b2bcc55454aa520dfcd8298

1
contrib/sqids-cpp vendored Submodule

@ -0,0 +1 @@
Subproject commit 3756e537d4d48cc0dd4176801fe19f99601439b0

View File

@ -0,0 +1,14 @@
option(ENABLE_SQIDS "Enable sqids support" ${ENABLE_LIBRARIES})
if ((NOT ENABLE_SQIDS))
message (STATUS "Not using sqids")
return()
endif()
set (SQIDS_SOURCE_DIR "${ClickHouse_SOURCE_DIR}/contrib/sqids-cpp")
set (SQIDS_INCLUDE_DIR "${SQIDS_SOURCE_DIR}/include")
add_library(_sqids INTERFACE)
target_include_directories(_sqids SYSTEM INTERFACE ${SQIDS_INCLUDE_DIR})
add_library(ch_contrib::sqids ALIAS _sqids)
target_compile_definitions(_sqids INTERFACE ENABLE_SQIDS)

View File

@ -19,10 +19,14 @@ dpkg -i package_folder/clickhouse-common-static-dbg_*.deb
dpkg -i package_folder/clickhouse-server_*.deb dpkg -i package_folder/clickhouse-server_*.deb
dpkg -i package_folder/clickhouse-client_*.deb dpkg -i package_folder/clickhouse-client_*.deb
echo "$BUGFIX_VALIDATE_CHECK"
# Check that the tools are available under short names # Check that the tools are available under short names
ch --query "SELECT 1" || exit 1 if [[ -z "$BUGFIX_VALIDATE_CHECK" ]]; then
chl --query "SELECT 1" || exit 1 ch --query "SELECT 1" || exit 1
chc --version || exit 1 chl --query "SELECT 1" || exit 1
chc --version || exit 1
fi
ln -s /usr/share/clickhouse-test/clickhouse-test /usr/bin/clickhouse-test ln -s /usr/share/clickhouse-test/clickhouse-test /usr/bin/clickhouse-test
@ -46,6 +50,16 @@ fi
config_logs_export_cluster /etc/clickhouse-server/config.d/system_logs_export.yaml config_logs_export_cluster /etc/clickhouse-server/config.d/system_logs_export.yaml
if [[ -n "$BUGFIX_VALIDATE_CHECK" ]] && [[ "$BUGFIX_VALIDATE_CHECK" -eq 1 ]]; then
sudo cat /etc/clickhouse-server/config.d/zookeeper.xml \
| sed "/<use_compression>1<\/use_compression>/d" \
> /etc/clickhouse-server/config.d/zookeeper.xml.tmp
sudo mv /etc/clickhouse-server/config.d/zookeeper.xml.tmp /etc/clickhouse-server/config.d/zookeeper.xml
# it contains some new settings, but we can safely remove it
rm /etc/clickhouse-server/users.d/s3_cache_new.xml
fi
# For flaky check we also enable thread fuzzer # For flaky check we also enable thread fuzzer
if [ "$NUM_TRIES" -gt "1" ]; then if [ "$NUM_TRIES" -gt "1" ]; then
export THREAD_FUZZER_CPU_TIME_PERIOD_US=1000 export THREAD_FUZZER_CPU_TIME_PERIOD_US=1000

View File

@ -191,6 +191,12 @@ sudo cat /etc/clickhouse-server/config.d/logger_trace.xml \
> /etc/clickhouse-server/config.d/logger_trace.xml.tmp > /etc/clickhouse-server/config.d/logger_trace.xml.tmp
mv /etc/clickhouse-server/config.d/logger_trace.xml.tmp /etc/clickhouse-server/config.d/logger_trace.xml mv /etc/clickhouse-server/config.d/logger_trace.xml.tmp /etc/clickhouse-server/config.d/logger_trace.xml
# Randomize async_load_databases
if [ $(( $(date +%-d) % 2 )) -eq 1 ]; then
sudo echo "<clickhouse><async_load_databases>true</async_load_databases></clickhouse>" \
> /etc/clickhouse-server/config.d/enable_async_load_databases.xml
fi
start start
stress --hung-check --drop-databases --output-folder test_output --skip-func-tests "$SKIP_TESTS_OPTION" --global-time-limit 1200 \ stress --hung-check --drop-databases --output-folder test_output --skip-func-tests "$SKIP_TESTS_OPTION" --global-time-limit 1200 \

View File

@ -79,6 +79,7 @@ rm /etc/clickhouse-server/config.d/merge_tree.xml
rm /etc/clickhouse-server/config.d/enable_wait_for_shutdown_replicated_tables.xml rm /etc/clickhouse-server/config.d/enable_wait_for_shutdown_replicated_tables.xml
rm /etc/clickhouse-server/users.d/nonconst_timezone.xml rm /etc/clickhouse-server/users.d/nonconst_timezone.xml
rm /etc/clickhouse-server/users.d/s3_cache_new.xml rm /etc/clickhouse-server/users.d/s3_cache_new.xml
rm /etc/clickhouse-server/users.d/replicated_ddl_entry.xml
start start
stop stop
@ -116,6 +117,7 @@ rm /etc/clickhouse-server/config.d/merge_tree.xml
rm /etc/clickhouse-server/config.d/enable_wait_for_shutdown_replicated_tables.xml rm /etc/clickhouse-server/config.d/enable_wait_for_shutdown_replicated_tables.xml
rm /etc/clickhouse-server/users.d/nonconst_timezone.xml rm /etc/clickhouse-server/users.d/nonconst_timezone.xml
rm /etc/clickhouse-server/users.d/s3_cache_new.xml rm /etc/clickhouse-server/users.d/s3_cache_new.xml
rm /etc/clickhouse-server/users.d/replicated_ddl_entry.xml
start start

View File

@ -56,7 +56,7 @@ On Linux, macOS and FreeBSD:
./clickhouse client ./clickhouse client
ClickHouse client version 23.2.1.1501 (official build). ClickHouse client version 23.2.1.1501 (official build).
Connecting to localhost:9000 as user default. Connecting to localhost:9000 as user default.
Connected to ClickHouse server version 23.2.1 revision 54461. Connected to ClickHouse server version 23.2.1.
local-host :) local-host :)
``` ```

View File

@ -16,7 +16,7 @@ ClickHouse provides a native command-line client: `clickhouse-client`. The clien
$ clickhouse-client $ clickhouse-client
ClickHouse client version 20.13.1.5273 (official build). ClickHouse client version 20.13.1.5273 (official build).
Connecting to localhost:9000 as user default. Connecting to localhost:9000 as user default.
Connected to ClickHouse server version 20.13.1 revision 54442. Connected to ClickHouse server version 20.13.1.
:) :)
``` ```

View File

@ -16,9 +16,9 @@ More information about PGO in ClickHouse you can read in the corresponding GitHu
There are two major kinds of PGO: [Instrumentation](https://clang.llvm.org/docs/UsersManual.html#using-sampling-profilers) and [Sampling](https://clang.llvm.org/docs/UsersManual.html#using-sampling-profilers) (also known as AutoFDO). In this guide is described the Instrumentation PGO with ClickHouse. There are two major kinds of PGO: [Instrumentation](https://clang.llvm.org/docs/UsersManual.html#using-sampling-profilers) and [Sampling](https://clang.llvm.org/docs/UsersManual.html#using-sampling-profilers) (also known as AutoFDO). In this guide is described the Instrumentation PGO with ClickHouse.
1. Build ClickHouse in Instrumented mode. In Clang it can be done via passing `-fprofile-instr-generate` option to `CXXFLAGS`. 1. Build ClickHouse in Instrumented mode. In Clang it can be done via passing `-fprofile-generate` option to `CXXFLAGS`.
2. Run instrumented ClickHouse on a sample workload. Here you need to use your usual workload. One of the approaches could be using [ClickBench](https://github.com/ClickHouse/ClickBench) as a sample workload. ClickHouse in the instrumentation mode could work slowly so be ready for that and do not run instrumented ClickHouse in performance-critical environments. 2. Run instrumented ClickHouse on a sample workload. Here you need to use your usual workload. One of the approaches could be using [ClickBench](https://github.com/ClickHouse/ClickBench) as a sample workload. ClickHouse in the instrumentation mode could work slowly so be ready for that and do not run instrumented ClickHouse in performance-critical environments.
3. Recompile ClickHouse once again with `-fprofile-instr-use` compiler flags and profiles that are collected from the previous step. 3. Recompile ClickHouse once again with `-fprofile-use` compiler flags and profiles that are collected from the previous step.
A more detailed guide on how to apply PGO is in the Clang [documentation](https://clang.llvm.org/docs/UsersManual.html#profile-guided-optimization). A more detailed guide on how to apply PGO is in the Clang [documentation](https://clang.llvm.org/docs/UsersManual.html#profile-guided-optimization).

View File

@ -1679,6 +1679,45 @@ Default value: `0.5`.
## async_load_databases {#async_load_databases}
Asynchronous loading of databases and tables.
If `true` all non-system databases with `Ordinary`, `Atomic` and `Replicated` engine will be loaded asynchronously after the ClickHouse server start up. See `system.async_loader` table, `tables_loader_background_pool_size` and `tables_loader_foreground_pool_size` server settings. Any query that tries to access a table, that is not yet loaded, will wait for exactly this table to be started up. If load job fails, query will rethrow an error (instead of shutting down the whole server in case of `async_load_databases = false`). The table that is waited for by at least one query will be loaded with higher priority. DDL queries on a database will wait for exactly that database to be started up.
If `false`, all databases are loaded when the server starts.
The default is `false`.
**Example**
``` xml
<async_load_databases>true</async_load_databases>
```
## tables_loader_foreground_pool_size {#tables_loader_foreground_pool_size}
Sets the number of threads performing load jobs in foreground pool. The foreground pool is used for loading table synchronously before server start listening on a port and for loading tables that are waited for. Foreground pool has higher priority than background pool. It means that no job starts in background pool while there are jobs running in foreground pool.
Possible values:
- Any positive integer.
- Zero. Use all available CPUs.
Default value: 0.
## tables_loader_background_pool_size {#tables_loader_background_pool_size}
Sets the number of threads performing asynchronous load jobs in background pool. The background pool is used for loading tables asynchronously after server start in case there are no queries waiting for the table. It could be beneficial to keep low number of threads in background pool if there are a lot of tables. It will reserve CPU resources for concurrent query execution.
Possible values:
- Any positive integer.
- Zero. Use all available CPUs.
Default value: 0.
## merge_tree {#merge_tree} ## merge_tree {#merge_tree}
@ -2385,7 +2424,7 @@ Path on the local filesystem to store temporary data for processing large querie
## user_files_path {#user_files_path} ## user_files_path {#user_files_path}
The directory with user files. Used in the table function [file()](../../sql-reference/table-functions/file.md). The directory with user files. Used in the table function [file()](../../sql-reference/table-functions/file.md), [fileCluster()](../../sql-reference/table-functions/fileCluster.md).
**Example** **Example**

View File

@ -149,7 +149,7 @@ Possible values:
- Any positive integer. - Any positive integer.
- 0 (disable deduplication) - 0 (disable deduplication)
Default value: 100. Default value: 1000.
The `Insert` command creates one or more blocks (parts). For [insert deduplication](../../engines/table-engines/mergetree-family/replication.md), when writing into replicated tables, ClickHouse writes the hash sums of the created parts into ClickHouse Keeper. Hash sums are stored only for the most recent `replicated_deduplication_window` blocks. The oldest hash sums are removed from ClickHouse Keeper. The `Insert` command creates one or more blocks (parts). For [insert deduplication](../../engines/table-engines/mergetree-family/replication.md), when writing into replicated tables, ClickHouse writes the hash sums of the created parts into ClickHouse Keeper. Hash sums are stored only for the most recent `replicated_deduplication_window` blocks. The oldest hash sums are removed from ClickHouse Keeper.
A large number of `replicated_deduplication_window` slows down `Inserts` because it needs to compare more entries. A large number of `replicated_deduplication_window` slows down `Inserts` because it needs to compare more entries.

View File

@ -0,0 +1,54 @@
---
slug: /en/operations/system-tables/async_loader
---
# async_loader
Contains information and status for recent asynchronous jobs (e.g. for tables loading). The table contains a row for every job. There is a tool for visualizing information from this table `utils/async_loader_graph`.
Example:
``` sql
SELECT *
FROM system.async_loader
FORMAT Vertical
LIMIT 1
```
``` text
```
Columns:
- `job` (`String`) - Job name (may be not unique).
- `job_id` (`UInt64`) - Unique ID of the job.
- `dependencies` (`Array(UInt64)`) - List of IDs of jobs that should be done before this job.
- `dependencies_left` (`UInt64`) - Current number of dependencies left to be done.
- `status` (`Enum`) - Current load status of a job:
`PENDING`: Load job is not started yet.
`OK`: Load job executed and was successful.
`FAILED`: Load job executed and failed.
`CANCELED`: Load job is not going to be executed due to removal or dependency failure.
A pending job might be in one of the following states:
- `is_executing` (`UInt8`) - The job is currently being executed by a worker.
- `is_blocked` (`UInt8`) - The job waits for its dependencies to be done.
- `is_ready` (`UInt8`) - The job is ready to be executed and waits for a worker.
- `elapsed` (`Float64`) - Seconds elapsed since start of execution. Zero if job is not started. Total execution time if job finished.
Every job has a pool associated with it and is started in this pool. Each pool has a constant priority and a mutable maximum number of workers. Higher priority (lower `priority` value) jobs are run first. No job with lower priority is started while there is at least one higher priority job ready or executing. Job priority can be elevated (but cannot be lowered) by prioritizing it. For example jobs for a table loading and startup will be prioritized if incoming query required this table. It is possible prioritize a job during its execution, but job is not moved from its `execution_pool` to newly assigned `pool`. The job uses `pool` for creating new jobs to avoid priority inversion. Already started jobs are not preempted by higher priority jobs and always run to completion after start.
- `pool_id` (`UInt64`) - ID of a pool currently assigned to the job.
- `pool` (`String`) - Name of `pool_id` pool.
- `priority` (`Int64`) - Priority of `pool_id` pool.
- `execution_pool_id` (`UInt64`) - ID of a pool the job is executed in. Equals initially assigned pool before execution starts.
- `execution_pool` (`String`) - Name of `execution_pool_id` pool.
- `execution_priority` (`Int64`) - Priority of `execution_pool_id` pool.
- `ready_seqno` (`Nullable(UInt64)`) - Not null for ready jobs. Worker pulls the next job to be executed from a ready queue of its pool. If there are multiple ready jobs, then job with the lowest value of `ready_seqno` is picked.
- `waiters` (`UInt64`) - The number of threads waiting on this job.
- `exception` (`Nullable(String)`) - Not null for failed and canceled jobs. Holds error message raised during query execution or error leading to cancelling of this job along with dependency failure chain of job names.
Time instants during job lifetime:
- `schedule_time` (`DateTime64`) - Time when job was created and scheduled to be executed (usually with all its dependencies).
- `enqueue_time` (`Nullable(DateTime64)`) - Time when job became ready and was enqueued into a ready queue of it's pool. Null if the job is not ready yet.
- `start_time` (`Nullable(DateTime64)`) - Time when worker dequeues the job from ready queue and start its execution. Null if the job is not started yet.
- `finish_time` (`Nullable(DateTime64)`) - Time when job execution is finished. Null if the job is not finished yet.

View File

@ -45,6 +45,22 @@ Number of threads in the Aggregator thread pool.
Number of threads in the Aggregator thread pool running a task. Number of threads in the Aggregator thread pool running a task.
### TablesLoaderForegroundThreads
Number of threads in the async loader foreground thread pool.
### TablesLoaderForegroundThreadsActive
Number of threads in the async loader foreground thread pool running a task.
### TablesLoaderBackgroundThreads
Number of threads in the async loader background thread pool.
### TablesLoaderBackgroundThreadsActive
Number of threads in the async loader background thread pool running a task.
### AsyncInsertCacheSize ### AsyncInsertCacheSize
Number of async insert hash id in cache Number of async insert hash id in cache
@ -197,14 +213,6 @@ Number of threads in the DatabaseOnDisk thread pool.
Number of threads in the DatabaseOnDisk thread pool running a task. Number of threads in the DatabaseOnDisk thread pool running a task.
### DatabaseOrdinaryThreads
Number of threads in the Ordinary database thread pool.
### DatabaseOrdinaryThreadsActive
Number of threads in the Ordinary database thread pool running a task.
### DelayedInserts ### DelayedInserts
Number of INSERT queries that are throttled due to high number of active data parts for partition in a MergeTree table. Number of INSERT queries that are throttled due to high number of active data parts for partition in a MergeTree table.
@ -625,14 +633,6 @@ Number of connections that are sending data for external tables to remote server
Number of connections that are sending data for scalars to remote servers. Number of connections that are sending data for scalars to remote servers.
### StartupSystemTablesThreads
Number of threads in the StartupSystemTables thread pool.
### StartupSystemTablesThreadsActive
Number of threads in the StartupSystemTables thread pool running a task.
### StorageBufferBytes ### StorageBufferBytes
Number of bytes in buffers of Buffer tables Number of bytes in buffers of Buffer tables
@ -677,14 +677,6 @@ Number of threads in the system.replicas thread pool running a task.
Number of connections to TCP server (clients with native interface), also included server-server distributed query connections Number of connections to TCP server (clients with native interface), also included server-server distributed query connections
### TablesLoaderThreads
Number of threads in the tables loader thread pool.
### TablesLoaderThreadsActive
Number of threads in the tables loader thread pool running a task.
### TablesToDropQueueSize ### TablesToDropQueueSize
Number of dropped tables, that are waiting for background data removal. Number of dropped tables, that are waiting for background data removal.

View File

@ -31,3 +31,26 @@ SELECT * FROM system.numbers LIMIT 10;
10 rows in set. Elapsed: 0.001 sec. 10 rows in set. Elapsed: 0.001 sec.
``` ```
You can also limit the output by predicates.
```sql
SELECT * FROM system.numbers < 10;
```
```response
┌─number─┐
│ 0 │
│ 1 │
│ 2 │
│ 3 │
│ 4 │
│ 5 │
│ 6 │
│ 7 │
│ 8 │
│ 9 │
└────────┘
10 rows in set. Elapsed: 0.001 sec.
```

View File

@ -22,7 +22,7 @@ Columns:
- `revision` ([UInt32](../../sql-reference/data-types/int-uint.md)) — ClickHouse server build revision. - `revision` ([UInt32](../../sql-reference/data-types/int-uint.md)) — ClickHouse server build revision.
When connecting to the server by `clickhouse-client`, you see the string similar to `Connected to ClickHouse server version 19.18.1 revision 54429.`. This field contains the `revision`, but not the `version` of a server. When connecting to the server by `clickhouse-client`, you see the string similar to `Connected to ClickHouse server version 19.18.1.`. This field contains the `revision`, but not the `version` of a server.
- `trace_type` ([Enum8](../../sql-reference/data-types/enum.md)) — Trace type: - `trace_type` ([Enum8](../../sql-reference/data-types/enum.md)) — Trace type:

View File

@ -5,7 +5,12 @@ sidebar_position: 6
# any # any
Selects the first encountered (non-NULL) value, unless all rows have NULL values in that column. Selects the first encountered value of a column.
By default, it ignores NULL values and returns the first NOT NULL value found in the column. As [`first_value`](../../../sql-reference/aggregate-functions/reference/first_value.md) if supports `RESPECT NULLS`, in which case it will select the first value passed, independently on whether it's NULL or not.
The return type of the function is the same as the input, except for LowCardinality which is discarded). This means that given no rows as input it will return the default value of that type (0 for integers, or Null for a Nullable() column). You might use the `-OrNull` [combinator](../../../sql-reference/aggregate-functions/combinators.md) ) to modify this behaviour.
The query can be executed in any order and even in a different order each time, so the result of this function is indeterminate. The query can be executed in any order and even in a different order each time, so the result of this function is indeterminate.
To get a determinate result, you can use the min or max function instead of any. To get a determinate result, you can use the min or max function instead of any.
@ -13,4 +18,4 @@ In some cases, you can rely on the order of execution. This applies to cases whe
When a `SELECT` query has the `GROUP BY` clause or at least one aggregate function, ClickHouse (in contrast to MySQL) requires that all expressions in the `SELECT`, `HAVING`, and `ORDER BY` clauses be calculated from keys or from aggregate functions. In other words, each column selected from the table must be used either in keys or inside aggregate functions. To get behavior like in MySQL, you can put the other columns in the `any` aggregate function. When a `SELECT` query has the `GROUP BY` clause or at least one aggregate function, ClickHouse (in contrast to MySQL) requires that all expressions in the `SELECT`, `HAVING`, and `ORDER BY` clauses be calculated from keys or from aggregate functions. In other words, each column selected from the table must be used either in keys or inside aggregate functions. To get behavior like in MySQL, you can put the other columns in the `any` aggregate function.
- Alias: `any_value` - Alias: `any_value`, `first_value`.

View File

@ -5,9 +5,12 @@ sidebar_position: 7
# first_value # first_value
Selects the first encountered value, similar to `any`, but could accept NULL. It is an alias for [`any`](../../../sql-reference/aggregate-functions/reference/any.md) but it was introduced for compatibility with [Window Functions](../../window-functions/index.md), where sometimes it's necessary to process `NULL` values (by default all ClickHouse aggregate functions ignore NULL values).
Mostly it should be used with [Window Functions](../../window-functions/index.md).
Without Window Functions the result will be random if the source stream is not ordered. It supports declaring a modifier to respect nulls (`RESPECT NULLS`), both under [Window Functions](../../window-functions/index.md) and in normal aggregations.
As with `any`, without Window Functions the result will be random if the source stream is not ordered and the return type
matches the input type (Null is only returned if the input is Nullable or -OrNull combinator is added).
## examples ## examples
@ -23,15 +26,15 @@ INSERT INTO test_data (a, b) Values (1,null), (2,3), (4, 5), (6,null);
``` ```
### example1 ### example1
The NULL value is ignored at default. By default, the NULL value is ignored.
```sql ```sql
select first_value(b) from test_data; select first_value(b) from test_data;
``` ```
```text ```text
┌─first_value_ignore_nulls(b)─┐ ┌─any(b)─┐
3 │ │ 3 │
└───────────────────────────── └────────┘
``` ```
### example2 ### example2
@ -41,9 +44,9 @@ select first_value(b) ignore nulls from test_data
``` ```
```text ```text
┌─first_value_ignore_nulls(b)─┐ ┌─any(b) IGNORE NULLS ─┐
3 │ │ 3 │
└───────────────────────────── └──────────────────────┘
``` ```
### example3 ### example3
@ -53,9 +56,9 @@ select first_value(b) respect nulls from test_data
``` ```
```text ```text
┌─first_value_respect_nulls(b)─┐ ┌─any(b) RESPECT NULLS ─┐
ᴺᵁᴸᴸ │ │ ᴺᵁᴸᴸ │
└────────────────────────────── └───────────────────────┘
``` ```
### example4 ### example4
@ -73,8 +76,8 @@ FROM
``` ```
```text ```text
┌─first_value_respect_nulls(b)─┬─first_value(b)─┐ ┌─any_respect_nulls(b)─┬─any(b)─┐
ᴺᵁᴸᴸ │ 3 │ │ ᴺᵁᴸᴸ │ 3 │
└──────────────────────────────────────────────┘ └──────────────────────┴────────┘
``` ```

View File

@ -1083,7 +1083,7 @@ Result:
**See also** **See also**
- [arrayFold](#arrayFold) - [arrayFold](#arrayfold)
## arrayReduceInRanges ## arrayReduceInRanges
@ -1175,7 +1175,7 @@ FROM numbers(1,10);
**See also** **See also**
- [arrayReduce](#arrayReduce) - [arrayReduce](#arrayreduce)
## arrayReverse(arr) ## arrayReverse(arr)

View File

@ -1776,3 +1776,34 @@ Result:
│ (('queries','database','analytical'),('oriented','processing','DBMS')) │ │ (('queries','database','analytical'),('oriented','processing','DBMS')) │
└────────────────────────────────────────────────────────────────────────┘ └────────────────────────────────────────────────────────────────────────┘
``` ```
## sqid
Transforms numbers into YouTube-like short URL hash called [Sqid](https://sqids.org/).
To use this function, set setting `allow_experimental_hash_functions = 1`.
**Syntax**
```sql
sqid(number1,...)
```
**Arguments**
- A variable number of UInt8, UInt16, UInt32 or UInt64 numbers.
**Returned Value**
A hash id [String](/docs/en/sql-reference/data-types/string.md).
**Example**
```sql
SELECT sqid(1, 2, 3, 4, 5);
```
```response
┌─sqid(1, 2, 3, 4, 5)─┐
│ gXHfJ1C6dN │
└─────────────────────┘
```

View File

@ -5,7 +5,7 @@ slug: /en/sql-reference/operators/exists
The `EXISTS` operator checks how many records are in the result of a subquery. If it is empty, then the operator returns `0`. Otherwise, it returns `1`. The `EXISTS` operator checks how many records are in the result of a subquery. If it is empty, then the operator returns `0`. Otherwise, it returns `1`.
`EXISTS` can be used in a [WHERE](../../sql-reference/statements/select/where.md) clause. `EXISTS` can also be used in a [WHERE](../../sql-reference/statements/select/where.md) clause.
:::tip :::tip
References to main query tables and columns are not supported in a subquery. References to main query tables and columns are not supported in a subquery.
@ -13,12 +13,26 @@ References to main query tables and columns are not supported in a subquery.
**Syntax** **Syntax**
```sql ``` sql
WHERE EXISTS(subquery) EXISTS(subquery)
``` ```
**Example** **Example**
Query checking existence of values in a subquery:
``` sql
SELECT EXISTS(SELECT * FROM numbers(10) WHERE number > 8), EXISTS(SELECT * FROM numbers(10) WHERE number > 11)
```
Result:
``` text
┌─in(1, _subquery1)─┬─in(1, _subquery2)─┐
│ 1 │ 0 │
└───────────────────┴───────────────────┘
```
Query with a subquery returning several rows: Query with a subquery returning several rows:
``` sql ``` sql

View File

@ -10,7 +10,7 @@ A set of queries that allow changing the table structure.
Syntax: Syntax:
``` sql ``` sql
ALTER TABLE [db].name [ON CLUSTER cluster] ADD|DROP|RENAME|CLEAR|COMMENT|{MODIFY|ALTER}|MATERIALIZE COLUMN ... ALTER [TEMPORARY] TABLE [db].name [ON CLUSTER cluster] ADD|DROP|RENAME|CLEAR|COMMENT|{MODIFY|ALTER}|MATERIALIZE COLUMN ...
``` ```
In the query, specify a list of one or more comma-separated actions. In the query, specify a list of one or more comma-separated actions.

View File

@ -415,7 +415,7 @@ ExpressionTransform
ExpressionTransform × 2 ExpressionTransform × 2
(SettingQuotaAndLimits) (SettingQuotaAndLimits)
(ReadFromStorage) (ReadFromStorage)
NumbersMt × 2 0 → 1 NumbersRange × 2 0 → 1
``` ```
### EXPLAIN ESTIMATE ### EXPLAIN ESTIMATE

View File

@ -0,0 +1,85 @@
---
slug: /en/sql-reference/table-functions/fileCluster
sidebar_position: 61
sidebar_label: fileCluster
---
# fileCluster Table Function
Enables simultaneous processing of files matching a specified path across multiple nodes within a cluster. The initiator establishes connections to worker nodes, expands globs in the file path, and delegates file-reading tasks to worker nodes. Each worker node is querying the initiator for the next file to process, repeating until all tasks are completed (all files are read).
:::note
This function will operate _correctly_ only in case the set of files matching the initially specified path is identical across all nodes, and their content is consistent among different nodes.
In case these files differ between nodes, the return value cannot be predetermined and depends on the order in which worker nodes request tasks from the initiator.
:::
**Syntax**
``` sql
fileCluster(cluster_name, path[, format, structure, compression_method])
```
**Arguments**
- `cluster_name` — Name of a cluster that is used to build a set of addresses and connection parameters to remote and local servers.
- `path` — The relative path to the file from [user_files_path](/docs/en/operations/server-configuration-parameters/settings.md#server_configuration_parameters-user_files_path). Path to file also supports [globs](#globs_in_path).
- `format` — [Format](../../interfaces/formats.md#formats) of the files. Type: [String](../../sql-reference/data-types/string.md).
- `structure` — Table structure in `'UserID UInt64, Name String'` format. Determines column names and types. Type: [String](../../sql-reference/data-types/string.md).
- `compression_method` — Compression method. Supported compression types are `gz`, `br`, `xz`, `zst`, `lz4`, and `bz2`.
**Returned value**
A table with the specified format and structure and with data from files matching the specified path.
**Example**
Given a cluster named `my_cluster` and given the following value of setting `user_files_path`:
``` bash
$ grep user_files_path /etc/clickhouse-server/config.xml
<user_files_path>/var/lib/clickhouse/user_files/</user_files_path>
```
Also, given there are files `test1.csv` and `test2.csv` inside `user_files_path` of each cluster node, and their content is identical across different nodes:
```bash
$ cat /var/lib/clickhouse/user_files/test1.csv
1,"file1"
11,"file11"
$ cat /var/lib/clickhouse/user_files/test1.csv
2,"file2"
22,"file22"
```
For example, one can create these files by executing these two queries on every cluster node:
```sql
INSERT INTO TABLE FUNCTION file('file1.csv', 'CSV', 'i UInt32, s String') VALUES (1,'file1'), (11,'file11');
INSERT INTO TABLE FUNCTION file('file2.csv', 'CSV', 'i UInt32, s String') VALUES (2,'file2'), (22,'file22');
```
Now, read data contents of `test1.csv` and `test2.csv` via `fileCluster` table function:
```sql
SELECT * from fileCluster(
'my_cluster', 'file{1,2}.csv', 'CSV', 'i UInt32, s String') ORDER BY (i, s)"""
)
```
```
┌──i─┬─s──────┐
│ 1 │ file1 │
│ 11 │ file11 │
└────┴────────┘
┌──i─┬─s──────┐
│ 2 │ file2 │
│ 22 │ file22 │
└────┴────────┘
```
## Globs in Path {#globs_in_path}
All patterns supported by [File](../../sql-reference/table-functions/file.md#globs-in-path) table function are supported by FileCluster.
**See Also**
- [File table function](../../sql-reference/table-functions/file.md)

View File

@ -17,6 +17,8 @@ The following queries are equivalent:
SELECT * FROM numbers(10); SELECT * FROM numbers(10);
SELECT * FROM numbers(0, 10); SELECT * FROM numbers(0, 10);
SELECT * FROM system.numbers LIMIT 10; SELECT * FROM system.numbers LIMIT 10;
SELECT * FROM system.numbers WHERE number BETWEEN 0 AND 9;
SELECT * FROM system.numbers WHERE number IN (0, 1, 2, 3, 4, 5, 6, 7, 8, 9);
``` ```
Examples: Examples:

View File

@ -14,7 +14,7 @@ ClickHouse предоставляет собственный клиент ком
$ clickhouse-client $ clickhouse-client
ClickHouse client version 20.13.1.5273 (official build). ClickHouse client version 20.13.1.5273 (official build).
Connecting to localhost:9000 as user default. Connecting to localhost:9000 as user default.
Connected to ClickHouse server version 20.13.1 revision 54442. Connected to ClickHouse server version 20.13.1.
:) :)
``` ```

View File

@ -1678,7 +1678,7 @@ TCP порт для защищённого обмена данными с кли
## user_files_path {#server_configuration_parameters-user_files_path} ## user_files_path {#server_configuration_parameters-user_files_path}
Каталог с пользовательскими файлами. Используется в табличной функции [file()](../../operations/server-configuration-parameters/settings.md). Каталог с пользовательскими файлами. Используется в табличных функциях [file()](../../sql-reference/table-functions/fileCluster.md) и [fileCluster()](../../sql-reference/table-functions/fileCluster.md).
**Пример** **Пример**

View File

@ -119,7 +119,7 @@ Eсли суммарное число активных кусков во все
- Положительное целое число. - Положительное целое число.
- 0 (без ограничений). - 0 (без ограничений).
Значение по умолчанию: 100. Значение по умолчанию: 1000.
Команда `Insert` создает один или несколько блоков (кусков). При вставке в Replicated таблицы ClickHouse для [дедупликации вставок](../../engines/table-engines/mergetree-family/replication.md) записывает в Zookeeper хеш-суммы созданных кусков. Но хранятся только последние `replicated_deduplication_window` хеш-сумм. Самые старые хеш-суммы удаляются из Zookeeper. Команда `Insert` создает один или несколько блоков (кусков). При вставке в Replicated таблицы ClickHouse для [дедупликации вставок](../../engines/table-engines/mergetree-family/replication.md) записывает в Zookeeper хеш-суммы созданных кусков. Но хранятся только последние `replicated_deduplication_window` хеш-сумм. Самые старые хеш-суммы удаляются из Zookeeper.
Большое значение `replicated_deduplication_window` замедляет `Insert`, так как приходится сравнивать большее количество хеш-сумм. Большое значение `replicated_deduplication_window` замедляет `Insert`, так как приходится сравнивать большее количество хеш-сумм.

View File

@ -19,7 +19,7 @@ ClickHouse создает эту таблицу когда установлен
- `revision`([UInt32](../../sql-reference/data-types/int-uint.md)) — ревизия сборки сервера ClickHouse. - `revision`([UInt32](../../sql-reference/data-types/int-uint.md)) — ревизия сборки сервера ClickHouse.
Во время соединения с сервером через `clickhouse-client`, вы видите строку похожую на `Connected to ClickHouse server version 19.18.1 revision 54429.`. Это поле содержит номер после `revision`, но не содержит строку после `version`. Во время соединения с сервером через `clickhouse-client`, вы видите строку похожую на `Connected to ClickHouse server version 19.18.1.`. Это поле содержит номер после `revision`, но не содержит строку после `version`.
- `trace_type`([Enum8](../../sql-reference/data-types/enum.md)) — тип трассировки: - `trace_type`([Enum8](../../sql-reference/data-types/enum.md)) — тип трассировки:

View File

@ -11,7 +11,7 @@ sidebar_label: "Манипуляции со столбцами"
Синтаксис: Синтаксис:
``` sql ``` sql
ALTER TABLE [db].name [ON CLUSTER cluster] ADD|DROP|RENAME|CLEAR|COMMENT|{MODIFY|ALTER}|MATERIALIZE COLUMN ... ALTER [TEMPORARY] TABLE [db].name [ON CLUSTER cluster] ADD|DROP|RENAME|CLEAR|COMMENT|{MODIFY|ALTER}|MATERIALIZE COLUMN ...
``` ```
В запросе можно указать сразу несколько действий над одной таблицей через запятую. В запросе можно указать сразу несколько действий над одной таблицей через запятую.

View File

@ -371,7 +371,7 @@ ExpressionTransform
ExpressionTransform × 2 ExpressionTransform × 2
(SettingQuotaAndLimits) (SettingQuotaAndLimits)
(ReadFromStorage) (ReadFromStorage)
NumbersMt × 2 0 → 1 NumbersRange × 2 0 → 1
``` ```
### EXPLAIN ESTIMATE {#explain-estimate} ### EXPLAIN ESTIMATE {#explain-estimate}

View File

@ -13,7 +13,7 @@ sidebar_label: file
**Синтаксис** **Синтаксис**
``` sql ``` sql
file(path [,format] [,structure]) file(path [,format] [,structure] [,compression])
``` ```
**Параметры** **Параметры**
@ -21,6 +21,7 @@ file(path [,format] [,structure])
- `path` — относительный путь до файла от [user_files_path](../../sql-reference/table-functions/file.md#server_configuration_parameters-user_files_path). Путь к файлу поддерживает следующие шаблоны в режиме доступа только для чтения `*`, `?`, `{abc,def}` и `{N..M}`, где `N`, `M` — числа, `'abc', 'def'` — строки. - `path` — относительный путь до файла от [user_files_path](../../sql-reference/table-functions/file.md#server_configuration_parameters-user_files_path). Путь к файлу поддерживает следующие шаблоны в режиме доступа только для чтения `*`, `?`, `{abc,def}` и `{N..M}`, где `N`, `M` — числа, `'abc', 'def'` — строки.
- `format` — [формат](../../interfaces/formats.md#formats) файла. - `format` — [формат](../../interfaces/formats.md#formats) файла.
- `structure` — структура таблицы. Формат: `'colunmn1_name column1_ype, column2_name column2_type, ...'`. - `structure` — структура таблицы. Формат: `'colunmn1_name column1_ype, column2_name column2_type, ...'`.
- `compression` — Используемый тип сжатия для запроса SELECT или желаемый тип сжатия для запроса INSERT. Поддерживаемые типы сжатия: `gz`, `br`, `xz`, `zst`, `lz4` и `bz2`.
**Возвращаемое значение** **Возвращаемое значение**

View File

@ -0,0 +1,84 @@
---
slug: /ru/sql-reference/table-functions/fileCluster
sidebar_position: 38
sidebar_label: fileCluster
---
# fileCluster
Позволяет одновременно обрабатывать файлы, находящиеся по указанному пути, на нескольких узлах внутри кластера. Узел-инициатор устанавливает соединения с рабочими узлами (worker nodes), раскрывает шаблоны в пути к файлам и отдаёт задачи по чтению файлов рабочим узлам. Рабочий узел запрашивает у инициатора путь к следующему файлу для обработки, повторяя до тех пор, пока не завершатся все задачи (то есть пока не будут обработаны все файлы).
:::note
Эта табличная функция будет работать орректно_ только в случае, если набор файлов, соответствующих изначально указанному пути, одинаков на всех узлах и содержание этих файлов идентично на различных узлах. В случае, если эти файлы различаются между узлами, результат не предопределён и зависит от очерёдности, с которой рабочие узлы будут запрашивать задачи у инициатора.
:::
**Синтаксис**
``` sql
fileCluster(cluster_name, path[, format, structure, compression_method])
```
**Аргументы**
- `cluster_name` — имя кластера, используемое для создания набора адресов и параметров подключения к удаленным и локальным серверам.
- `path` — относительный путь до файла от [user_files_path](../../sql-reference/table-functions/file.md#server_configuration_parameters-user_files_path). Путь к файлу поддерживает [шаблоны поискаglobs](#globs_in_path).
- `format` — [формат](../../interfaces/formats.md#formats) файла.
- `structure` — структура таблицы. Формат: `'colunmn1_name column1_ype, column2_name column2_type, ...'`.
- `compression_method` — Используемый тип сжатия. Поддерживаемые типы: `gz`, `br`, `xz`, `zst`, `lz4` и `bz2`.
**Возвращаемое значение**
Таблица с указанным форматом и структурой, содержащая данные из файлов, соответствующих указанному пути.
**Пример**
Пусть есть кластер с именем `my_cluster`, а также установлено нижеследующее значение параметра `user_files_path`:
``` bash
$ grep user_files_path /etc/clickhouse-server/config.xml
<user_files_path>/var/lib/clickhouse/user_files/</user_files_path>
```
Пусть также на каждом узле кластера в директории `user_files_path` находятся файлы `test1.csv` и `test2.csv`, и их содержимое идентично на разных узлах:
```bash
$ cat /var/lib/clickhouse/user_files/test1.csv
1,"file1"
11,"file11"
$ cat /var/lib/clickhouse/user_files/test1.csv
2,"file2"
22,"file22"
```
Например, эти файлы можно создать, выполнив на каждом узле два запроса:
```sql
INSERT INTO TABLE FUNCTION file('file1.csv', 'CSV', 'i UInt32, s String') VALUES (1,'file1'), (11,'file11');
INSERT INTO TABLE FUNCTION file('file2.csv', 'CSV', 'i UInt32, s String') VALUES (2,'file2'), (22,'file22');
```
Прочитаем содержимое файлов `test1.csv` и `test2.csv` с помощью табличной функции `fileCluster`:
```sql
SELECT * from fileCluster(
'my_cluster', 'file{1,2}.csv', 'CSV', 'i UInt32, s String') ORDER BY (i, s)"""
)
```
```
┌──i─┬─s──────┐
│ 1 │ file1 │
│ 11 │ file11 │
└────┴────────┘
┌──i─┬─s──────┐
│ 2 │ file2 │
│ 22 │ file22 │
└────┴────────┘
```
## Шаблоны поиска в компонентах пути {#globs_in_path}
Поддерживаются все шаблоны поиска, что поддерживаются табличной функцией [File](../../sql-reference/table-functions/file.md#globs-in-path).
**Смотрите также**
- [File (табличная функция)](../../sql-reference/table-functions/file.md)

View File

@ -14,7 +14,7 @@ ClickHouse提供了一个原生命令行客户端`clickhouse-client`客户端支
$ clickhouse-client $ clickhouse-client
ClickHouse client version 19.17.1.1579 (official build). ClickHouse client version 19.17.1.1579 (official build).
Connecting to localhost:9000 as user default. Connecting to localhost:9000 as user default.
Connected to ClickHouse server version 19.17.1 revision 54428. Connected to ClickHouse server version 19.17.1.
:) :)
``` ```

View File

@ -22,7 +22,7 @@ ClickHouse创建此表时 [trace_log](../../operations/server-configuration-para
- `revision` ([UInt32](../../sql-reference/data-types/int-uint.md)) — ClickHouse server build revision. - `revision` ([UInt32](../../sql-reference/data-types/int-uint.md)) — ClickHouse server build revision.
通过以下方式连接到服务器 `clickhouse-client`,你看到的字符串类似于 `Connected to ClickHouse server version 19.18.1 revision 54429.`. 该字段包含 `revision`,但不是 `version` 的服务器。 通过以下方式连接到服务器 `clickhouse-client`,你看到的字符串类似于 `Connected to ClickHouse server version 19.18.1.`. 该字段包含 `revision`,但不是 `version` 的服务器。
- `timer_type` ([枚举8](../../sql-reference/data-types/enum.md)) — Timer type: - `timer_type` ([枚举8](../../sql-reference/data-types/enum.md)) — Timer type:

View File

@ -493,8 +493,7 @@ void Client::connect()
if (is_interactive) if (is_interactive)
{ {
std::cout << "Connected to " << server_name << " server version " << server_version << " revision " << server_revision << "." std::cout << "Connected to " << server_name << " server version " << server_version << "." << std::endl << std::endl;
<< std::endl << std::endl;
auto client_version_tuple = std::make_tuple(VERSION_MAJOR, VERSION_MINOR, VERSION_PATCH); auto client_version_tuple = std::make_tuple(VERSION_MAJOR, VERSION_MINOR, VERSION_PATCH);
auto server_version_tuple = std::make_tuple(server_version_major, server_version_minor, server_version_patch); auto server_version_tuple = std::make_tuple(server_version_major, server_version_minor, server_version_patch);

View File

@ -23,6 +23,7 @@
#include <Common/scope_guard_safe.h> #include <Common/scope_guard_safe.h>
#include <Interpreters/Session.h> #include <Interpreters/Session.h>
#include <Access/AccessControl.h> #include <Access/AccessControl.h>
#include <Common/PoolId.h>
#include <Common/Exception.h> #include <Common/Exception.h>
#include <Common/Macros.h> #include <Common/Macros.h>
#include <Common/Config/ConfigProcessor.h> #include <Common/Config/ConfigProcessor.h>
@ -742,16 +743,16 @@ void LocalServer::processConfig()
status.emplace(fs::path(path) / "status", StatusFile::write_full_info); status.emplace(fs::path(path) / "status", StatusFile::write_full_info);
LOG_DEBUG(log, "Loading metadata from {}", path); LOG_DEBUG(log, "Loading metadata from {}", path);
loadMetadataSystem(global_context); auto startup_system_tasks = loadMetadataSystem(global_context);
attachSystemTablesLocal(global_context, *createMemoryDatabaseIfNotExists(global_context, DatabaseCatalog::SYSTEM_DATABASE)); attachSystemTablesLocal(global_context, *createMemoryDatabaseIfNotExists(global_context, DatabaseCatalog::SYSTEM_DATABASE));
attachInformationSchema(global_context, *createMemoryDatabaseIfNotExists(global_context, DatabaseCatalog::INFORMATION_SCHEMA)); attachInformationSchema(global_context, *createMemoryDatabaseIfNotExists(global_context, DatabaseCatalog::INFORMATION_SCHEMA));
attachInformationSchema(global_context, *createMemoryDatabaseIfNotExists(global_context, DatabaseCatalog::INFORMATION_SCHEMA_UPPERCASE)); attachInformationSchema(global_context, *createMemoryDatabaseIfNotExists(global_context, DatabaseCatalog::INFORMATION_SCHEMA_UPPERCASE));
startupSystemTables(); waitLoad(TablesLoaderForegroundPoolId, startup_system_tasks);
if (!config().has("only-system-tables")) if (!config().has("only-system-tables"))
{ {
DatabaseCatalog::instance().createBackgroundTasks(); DatabaseCatalog::instance().createBackgroundTasks();
loadMetadata(global_context); waitLoad(loadMetadata(global_context));
DatabaseCatalog::instance().startupBackgroundTasks(); DatabaseCatalog::instance().startupBackgroundTasks();
} }

View File

@ -20,6 +20,7 @@
#include <base/coverage.h> #include <base/coverage.h>
#include <base/getFQDNOrHostName.h> #include <base/getFQDNOrHostName.h>
#include <base/safeExit.h> #include <base/safeExit.h>
#include <Common/PoolId.h>
#include <Common/MemoryTracker.h> #include <Common/MemoryTracker.h>
#include <Common/ClickHouseRevision.h> #include <Common/ClickHouseRevision.h>
#include <Common/DNSResolver.h> #include <Common/DNSResolver.h>
@ -1339,6 +1340,10 @@ try
global_context->getMessageBrokerSchedulePool().increaseThreadsCount(server_settings_.background_message_broker_schedule_pool_size); global_context->getMessageBrokerSchedulePool().increaseThreadsCount(server_settings_.background_message_broker_schedule_pool_size);
global_context->getDistributedSchedulePool().increaseThreadsCount(server_settings_.background_distributed_schedule_pool_size); global_context->getDistributedSchedulePool().increaseThreadsCount(server_settings_.background_distributed_schedule_pool_size);
global_context->getAsyncLoader().setMaxThreads(TablesLoaderForegroundPoolId, server_settings_.tables_loader_foreground_pool_size);
global_context->getAsyncLoader().setMaxThreads(TablesLoaderBackgroundLoadPoolId, server_settings_.tables_loader_background_pool_size);
global_context->getAsyncLoader().setMaxThreads(TablesLoaderBackgroundStartupPoolId, server_settings_.tables_loader_background_pool_size);
getIOThreadPool().reloadConfiguration( getIOThreadPool().reloadConfiguration(
server_settings.max_io_thread_pool_size, server_settings.max_io_thread_pool_size,
server_settings.max_io_thread_pool_free_size, server_settings.max_io_thread_pool_free_size,
@ -1679,17 +1684,18 @@ try
LOG_INFO(log, "Loading metadata from {}", path_str); LOG_INFO(log, "Loading metadata from {}", path_str);
LoadTaskPtrs load_metadata_tasks;
try try
{ {
auto & database_catalog = DatabaseCatalog::instance(); auto & database_catalog = DatabaseCatalog::instance();
/// We load temporary database first, because projections need it. /// We load temporary database first, because projections need it.
database_catalog.initializeAndLoadTemporaryDatabase(); database_catalog.initializeAndLoadTemporaryDatabase();
loadMetadataSystem(global_context); auto system_startup_tasks = loadMetadataSystem(global_context);
maybeConvertSystemDatabase(global_context); maybeConvertSystemDatabase(global_context, system_startup_tasks);
/// This has to be done before the initialization of system logs, /// This has to be done before the initialization of system logs,
/// otherwise there is a race condition between the system database initialization /// otherwise there is a race condition between the system database initialization
/// and creation of new tables in the database. /// and creation of new tables in the database.
startupSystemTables(); waitLoad(TablesLoaderForegroundPoolId, system_startup_tasks);
/// After attaching system databases we can initialize system log. /// After attaching system databases we can initialize system log.
global_context->initializeSystemLogs(); global_context->initializeSystemLogs();
global_context->setSystemZooKeeperLogAfterInitializationIfNeeded(); global_context->setSystemZooKeeperLogAfterInitializationIfNeeded();
@ -1705,9 +1711,10 @@ try
/// and so loadMarkedAsDroppedTables() will find it and try to add, and UUID will overlap. /// and so loadMarkedAsDroppedTables() will find it and try to add, and UUID will overlap.
database_catalog.loadMarkedAsDroppedTables(); database_catalog.loadMarkedAsDroppedTables();
database_catalog.createBackgroundTasks(); database_catalog.createBackgroundTasks();
/// Then, load remaining databases /// Then, load remaining databases (some of them maybe be loaded asynchronously)
loadMetadata(global_context, default_database); load_metadata_tasks = loadMetadata(global_context, default_database, server_settings.async_load_databases);
convertDatabasesEnginesIfNeed(global_context); /// If we need to convert database engines, disable async tables loading
convertDatabasesEnginesIfNeed(load_metadata_tasks, global_context);
database_catalog.startupBackgroundTasks(); database_catalog.startupBackgroundTasks();
/// After loading validate that default database exists /// After loading validate that default database exists
database_catalog.assertDatabaseExists(default_database); database_catalog.assertDatabaseExists(default_database);
@ -1719,6 +1726,7 @@ try
tryLogCurrentException(log, "Caught exception while loading metadata"); tryLogCurrentException(log, "Caught exception while loading metadata");
throw; throw;
} }
LOG_DEBUG(log, "Loaded metadata."); LOG_DEBUG(log, "Loaded metadata.");
/// Init trace collector only after trace_log system table was created /// Init trace collector only after trace_log system table was created
@ -1874,9 +1882,14 @@ try
throw Exception(ErrorCodes::ARGUMENT_OUT_OF_BOUND, "distributed_ddl.pool_size should be greater then 0"); throw Exception(ErrorCodes::ARGUMENT_OUT_OF_BOUND, "distributed_ddl.pool_size should be greater then 0");
global_context->setDDLWorker(std::make_unique<DDLWorker>(pool_size, ddl_zookeeper_path, global_context, &config(), global_context->setDDLWorker(std::make_unique<DDLWorker>(pool_size, ddl_zookeeper_path, global_context, &config(),
"distributed_ddl", "DDLWorker", "distributed_ddl", "DDLWorker",
&CurrentMetrics::MaxDDLEntryID, &CurrentMetrics::MaxPushedDDLEntryID)); &CurrentMetrics::MaxDDLEntryID, &CurrentMetrics::MaxPushedDDLEntryID),
load_metadata_tasks);
} }
/// Do not keep tasks in server, they should be kept inside databases. Used here to make dependent tasks only.
load_metadata_tasks.clear();
load_metadata_tasks.shrink_to_fit();
{ {
std::lock_guard lock(servers_lock); std::lock_guard lock(servers_lock);
for (auto & server : servers) for (auto & server : servers)

View File

@ -364,8 +364,15 @@
<background_schedule_pool_size>128</background_schedule_pool_size> <background_schedule_pool_size>128</background_schedule_pool_size>
<background_message_broker_schedule_pool_size>16</background_message_broker_schedule_pool_size> <background_message_broker_schedule_pool_size>16</background_message_broker_schedule_pool_size>
<background_distributed_schedule_pool_size>16</background_distributed_schedule_pool_size> <background_distributed_schedule_pool_size>16</background_distributed_schedule_pool_size>
<tables_loader_foreground_pool_size>0</tables_loader_foreground_pool_size>
<tables_loader_background_pool_size>0</tables_loader_background_pool_size>
--> -->
<!-- Enables asynchronous loading of databases and tables to speedup server startup.
Queries to not yet loaded entity will be blocked until load is finished.
-->
<!-- <async_load_databases>true</async_load_databases> -->
<!-- On memory constrained environments you may have to set this to value larger than 1. <!-- On memory constrained environments you may have to set this to value larger than 1.
--> -->
<max_server_memory_usage_to_ram_ratio>0.9</max_server_memory_usage_to_ram_ratio> <max_server_memory_usage_to_ram_ratio>0.9</max_server_memory_usage_to_ram_ratio>

View File

@ -108,7 +108,7 @@
filter: blur(1px); filter: blur(1px);
} }
.chart div { position: absolute; } .chart > div { position: absolute; }
.inputs { .inputs {
height: auto; height: auto;
@ -215,8 +215,6 @@
color: var(--text-color); color: var(--text-color);
} }
.u-legend th { display: none; }
.themes { .themes {
float: right; float: right;
font-size: 20pt; font-size: 20pt;
@ -433,6 +431,16 @@
display: none; display: none;
} }
.u-series {
line-height: 0.8;
}
.u-series.footer {
font-size: 8px;
padding-top: 0;
margin-top: 0;
}
/* Source: https://cdn.jsdelivr.net/npm/uplot@1.6.21/dist/uPlot.min.css /* Source: https://cdn.jsdelivr.net/npm/uplot@1.6.21/dist/uPlot.min.css
* It is copy-pasted to lower the number of requests. * It is copy-pasted to lower the number of requests.
*/ */
@ -478,7 +486,6 @@
* - compress the state for URL's #hash; * - compress the state for URL's #hash;
* - footer with "about" or a link to source code; * - footer with "about" or a link to source code;
* - allow to configure a table on a server to save the dashboards; * - allow to configure a table on a server to save the dashboards;
* - multiple lines on chart;
* - if a query returned one value, display this value instead of a diagram; * - if a query returned one value, display this value instead of a diagram;
* - if a query returned something unusual, display the table; * - if a query returned something unusual, display the table;
*/ */
@ -520,10 +527,54 @@ let queries = [];
/// Query parameters with predefined default values. /// Query parameters with predefined default values.
/// All other parameters will be automatically found in the queries. /// All other parameters will be automatically found in the queries.
let params = { let params = {
"rounding": "60", 'rounding': '60',
"seconds": "86400" 'seconds': '86400'
}; };
/// Palette generation for charts
function generatePalette(baseColor, numColors) {
const baseHSL = hexToHsl(baseColor);
const hueStep = 360 / numColors;
const palette = [];
for (let i = 0; i < numColors; i++) {
const hue = Math.round((baseHSL.h + i * hueStep) % 360);
const color = `hsl(${hue}, ${baseHSL.s}%, ${baseHSL.l}%)`;
palette.push(color);
}
return palette;
}
/// Helper function to convert hex color to HSL
function hexToHsl(hex) {
hex = hex.replace(/^#/, '');
const bigint = parseInt(hex, 16);
const r = (bigint >> 16) & 255;
const g = (bigint >> 8) & 255;
const b = bigint & 255;
const r_norm = r / 255;
const g_norm = g / 255;
const b_norm = b / 255;
const max = Math.max(r_norm, g_norm, b_norm);
const min = Math.min(r_norm, g_norm, b_norm);
const l = (max + min) / 2;
let s = 0;
if (max !== min) {
s = l > 0.5 ? (max - min) / (2 - max - min) : (max - min) / (max + min);
}
let h = 0;
if (max !== min) {
if (max === r_norm) {
h = (g_norm - b_norm) / (max - min) + (g_norm < b_norm ? 6 : 0);
} else if (max === g_norm) {
h = (b_norm - r_norm) / (max - min) + 2;
} else {
h = (r_norm - g_norm) / (max - min) + 4;
}
}
h = Math.round(h * 60);
return { h, s: Math.round(s * 100), l: Math.round(l * 100) };
}
let theme = 'light'; let theme = 'light';
function setTheme(new_theme) { function setTheme(new_theme) {
@ -913,6 +964,8 @@ document.getElementById('mass-editor-textarea').addEventListener('input', e => {
function legendAsTooltipPlugin({ className, style = { background: "var(--legend-background)" } } = {}) { function legendAsTooltipPlugin({ className, style = { background: "var(--legend-background)" } } = {}) {
let legendEl; let legendEl;
let showTop = false;
const showLimit = 5;
function init(u, opts) { function init(u, opts) {
legendEl = u.root.querySelector(".u-legend"); legendEl = u.root.querySelector(".u-legend");
@ -932,13 +985,28 @@ function legendAsTooltipPlugin({ className, style = { background: "var(--legend-
...style ...style
}); });
// hide series color markers if (opts.series.length == 2) {
const idents = legendEl.querySelectorAll(".u-marker"); const nodes = legendEl.querySelectorAll("th");
for (let i = 0; i < nodes.length; i++)
nodes[i].style.display = "none";
} else {
legendEl.querySelector("th").remove();
legendEl.querySelector("td").setAttribute('colspan', '2');
legendEl.querySelector("td").style.textAlign = 'center';
}
for (let i = 0; i < idents.length; i++) if (opts.series.length - 1 > showLimit) {
idents[i].style.display = "none"; showTop = true;
let footer = legendEl.insertRow().insertCell();
footer.setAttribute('colspan', '2');
footer.style.textAlign = 'center';
footer.classList.add('u-value');
footer.parentNode.classList.add('u-series','footer');
footer.textContent = ". . .";
}
const overEl = u.over; const overEl = u.over;
overEl.style.overflow = "visible";
overEl.appendChild(legendEl); overEl.appendChild(legendEl);
@ -946,11 +1014,28 @@ function legendAsTooltipPlugin({ className, style = { background: "var(--legend-
overEl.addEventListener("mouseleave", () => {legendEl.style.display = "none";}); overEl.addEventListener("mouseleave", () => {legendEl.style.display = "none";});
} }
function nodeListToArray(nodeList) {
return Array.prototype.slice.call(nodeList);
}
function update(u) { function update(u) {
let { left, top } = u.cursor; let { left, top } = u.cursor;
left -= legendEl.clientWidth / 2; left -= legendEl.clientWidth / 2;
top -= legendEl.clientHeight / 2; top -= legendEl.clientHeight / 2;
legendEl.style.transform = "translate(" + left + "px, " + top + "px)"; legendEl.style.transform = "translate(" + left + "px, " + top + "px)";
if (showTop) {
let nodes = nodeListToArray(legendEl.querySelectorAll("tr"));
let header = nodes.shift();
let footer = nodes.pop();
nodes.forEach(function (node) { node._sort_key = +node.querySelector("td").textContent; });
nodes.sort((a, b) => +b._sort_key - +a._sort_key);
nodes.forEach(function (node) { node.parentNode.appendChild(node); });
for (let i = 0; i < nodes.length; i++) {
nodes[i].style.display = i < showLimit ? null : "none";
delete nodes[i]._sort_key;
}
footer.parentNode.appendChild(footer);
}
} }
return { return {
@ -961,12 +1046,13 @@ function legendAsTooltipPlugin({ className, style = { background: "var(--legend-
}; };
} }
async function doFetch(query, url_params = '') { async function doFetch(query, url_params = '') {
host = document.getElementById('url').value || host; host = document.getElementById('url').value || host;
user = document.getElementById('user').value; user = document.getElementById('user').value;
password = document.getElementById('password').value; password = document.getElementById('password').value;
let url = `${host}?default_format=JSONCompactColumns&enable_http_compression=1` let url = `${host}?default_format=JSONColumnsWithMetadata&enable_http_compression=1`
if (add_http_cors_header) { if (add_http_cors_header) {
// For debug purposes, you may set add_http_cors_header from a browser console // For debug purposes, you may set add_http_cors_header from a browser console
@ -980,14 +1066,17 @@ async function doFetch(query, url_params = '') {
url += `&password=${encodeURIComponent(password)}`; url += `&password=${encodeURIComponent(password)}`;
} }
let response, data, error; let response, reply, error;
try { try {
response = await fetch(url + url_params, { method: "POST", body: query }); response = await fetch(url + url_params, { method: "POST", body: query });
data = await response.text(); reply = await response.text();
if (response.ok) { if (response.ok) {
data = JSON.parse(data); reply = JSON.parse(reply);
if (reply.exception) {
error = reply.exception;
}
} else { } else {
error = data; error = reply;
} }
} catch (e) { } catch (e) {
console.log(e); console.log(e);
@ -1006,7 +1095,7 @@ async function doFetch(query, url_params = '') {
} }
} }
return {data, error}; return {reply, error};
} }
async function draw(idx, chart, url_params, query) { async function draw(idx, chart, url_params, query) {
@ -1015,17 +1104,76 @@ async function draw(idx, chart, url_params, query) {
plots[idx] = null; plots[idx] = null;
} }
let {data, error} = await doFetch(query, url_params); let {reply, error} = await doFetch(query, url_params);
if (!error) {
if (reply.rows.length == 0) {
error = "Query returned empty result.";
} else if (reply.meta.length < 2) {
error = "Query should return at least two columns: unix timestamp and value.";
} else {
for (let i = 0; i < reply.meta.length; i++) {
let label = reply.meta[i].name;
let column = reply.data[label];
if (!Array.isArray(column) || column.length != reply.data[reply.meta[0].name].length) {
error = "Wrong data format of the query.";
break;
}
}
}
}
// Transform string-labeled data to multi-column data
function transformToColumns() {
const x = reply.meta[0].name; // time; must be ordered
const l = reply.meta[1].name; // string label column to distinguish series; must be ordered
const y = reply.meta[2].name; // values; must have single value for (x, l) pair
const labels = [...new Set(reply.data[l])].sort((a, b) => a - b);
if (labels.includes('__time__')) {
error = "The second column is not allowed to contain '__time__' values.";
return;
}
const times = [...new Set(reply.data[x])].sort((a, b) => a - b);
let new_meta = [{ name: '__time__', type: reply.meta[0].type }];
let new_data = { __time__: [] };
for (let label of labels) {
new_meta.push({ name: label, type: reply.meta[2].type });
new_data[label] = [];
}
let new_rows = 0;
function row_done(row_time) {
new_rows++;
new_data.__time__.push(row_time);
for (let label of labels) {
if (new_data[label].length < new_rows) {
new_data[label].push(null);
}
}
}
let prev_time = reply.data[x][0];
const old_rows = reply.data[x].length;
for (let i = 0; i < old_rows; i++) {
const time = reply.data[x][i];
const label = reply.data[l][i];
const value = reply.data[y][i];
if (prev_time != time) {
row_done(prev_time);
prev_time = time;
}
new_data[label].push(value);
}
row_done(prev_time);
reply.meta = new_meta;
reply.data = new_data;
reply.rows = new_rows;
}
function isStringColumn(type) {
return type === 'String' || type === 'LowCardinality(String)';
}
if (!error) { if (!error) {
if (!Array.isArray(data)) { if (reply.meta.length == 3 && isStringColumn(reply.meta[1].type)) {
error = "Query should return an array."; transformToColumns();
} else if (data.length == 0) {
error = "Query returned empty result.";
} else if (data.length != 2) {
error = "Query should return exactly two columns: unix timestamp and value.";
} else if (!Array.isArray(data[0]) || !Array.isArray(data[1]) || data[0].length != data[1].length) {
error = "Wrong data format of the query.";
} }
} }
@ -1043,24 +1191,38 @@ async function draw(idx, chart, url_params, query) {
} }
const [line_color, fill_color, grid_color, axes_color] = theme != 'dark' const [line_color, fill_color, grid_color, axes_color] = theme != 'dark'
? ["#F88", "#FEE", "#EED", "#2c3235"] ? ["#ff8888", "#ffeeee", "#eeeedd", "#2c3235"]
: ["#864", "#045", "#2c3235", "#c7d0d9"]; : ["#886644", "#004455", "#2c3235", "#c7d0d9"];
let sync = uPlot.sync("sync"); let sync = uPlot.sync("sync");
const max_value = Math.max(...data[1]); let axis = {
stroke: axes_color,
grid: { width: 1 / devicePixelRatio, stroke: grid_color },
ticks: { width: 1 / devicePixelRatio, stroke: grid_color }
};
let axes = [axis, axis];
let series = [{ label: "x" }];
let data = [reply.data[reply.meta[0].name]];
// Treat every column as series
const series_count = reply.meta.length;
const fill = series_count == 2 ? fill_color : undefined;
const palette = generatePalette(line_color, series_count);
let max_value = Number.NEGATIVE_INFINITY;
for (let i = 1; i < series_count; i++) {
let label = reply.meta[i].name;
series.push({ label, stroke: palette[i - 1], fill });
data.push(reply.data[label]);
max_value = Math.max(max_value, ...reply.data[label]);
}
const opts = { const opts = {
width: chart.clientWidth, width: chart.clientWidth,
height: chart.clientHeight, height: chart.clientHeight,
axes: [ { stroke: axes_color, axes,
grid: { width: 1 / devicePixelRatio, stroke: grid_color }, series,
ticks: { width: 1 / devicePixelRatio, stroke: grid_color } },
{ stroke: axes_color,
grid: { width: 1 / devicePixelRatio, stroke: grid_color },
ticks: { width: 1 / devicePixelRatio, stroke: grid_color } } ],
series: [ { label: "x" },
{ label: "y", stroke: line_color, fill: fill_color } ],
padding: [ null, null, null, (Math.round(max_value * 100) / 100).toString().length * 6 - 10 ], padding: [ null, null, null, (Math.round(max_value * 100) / 100).toString().length * 6 - 10 ],
plugins: [ legendAsTooltipPlugin() ], plugins: [ legendAsTooltipPlugin() ],
cursor: { cursor: {
@ -1216,22 +1378,21 @@ function saveState() {
} }
async function searchQueries() { async function searchQueries() {
let {data, error} = await doFetch(search_query); let {reply, error} = await doFetch(search_query);
if (error) { if (error) {
throw new Error(error); throw new Error(error);
} }
if (!Array.isArray(data)) { let data = reply.data;
throw new Error("Search query should return an array."); if (reply.rows == 0) {
} else if (data.length == 0) {
throw new Error("Search query returned empty result."); throw new Error("Search query returned empty result.");
} else if (data.length != 2) { } else if (reply.meta.length != 2 || reply.meta[0].name != "title" || reply.meta[1].name != "query") {
throw new Error("Search query should return exactly two columns: title and query."); throw new Error("Search query should return exactly two columns: title and query.");
} else if (!Array.isArray(data[0]) || !Array.isArray(data[1]) || data[0].length != data[1].length) { } else if (!Array.isArray(data.title) || !Array.isArray(data.query) || data.title.length != data.query.length) {
throw new Error("Wrong data format of the search query."); throw new Error("Wrong data format of the search query.");
} }
for (let i = 0; i < data[0].length; i++) { for (let i = 0; i < data.title.length; i++) {
queries.push({title: data[0][i], query: data[1][i]}); queries.push({title: data.title[i], query: data.query[i]});
} }
regenerate(); regenerate();

View File

@ -1,26 +1,213 @@
#include <AggregateFunctions/AggregateFunctionFactory.h> #include <AggregateFunctions/AggregateFunctionFactory.h>
#include <AggregateFunctions/HelpersMinMaxAny.h> #include <AggregateFunctions/HelpersMinMaxAny.h>
#include <IO/ReadHelpers.h>
#include <IO/WriteHelpers.h>
#include <base/defines.h>
namespace DB namespace DB
{ {
struct Settings; struct Settings;
namespace ErrorCodes
{
extern const int INCORRECT_DATA;
extern const int LOGICAL_ERROR;
}
namespace namespace
{ {
struct AggregateFunctionAnyRespectNullsData
{
enum Status : UInt8
{
NotSet = 1,
SetNull = 2,
SetOther = 3
};
Status status = Status::NotSet;
Field value;
bool isSet() const { return status != Status::NotSet; }
void setNull() { status = Status::SetNull; }
void setOther() { status = Status::SetOther; }
};
template <bool First>
class AggregateFunctionAnyRespectNulls final
: public IAggregateFunctionDataHelper<AggregateFunctionAnyRespectNullsData, AggregateFunctionAnyRespectNulls<First>>
{
public:
using Data = AggregateFunctionAnyRespectNullsData;
SerializationPtr serialization;
const bool returns_nullable_type = false;
explicit AggregateFunctionAnyRespectNulls(const DataTypePtr & type)
: IAggregateFunctionDataHelper<Data, AggregateFunctionAnyRespectNulls<First>>({type}, {}, type)
, serialization(type->getDefaultSerialization())
, returns_nullable_type(type->isNullable())
{
}
String getName() const override
{
if constexpr (First)
return "any_respect_nulls";
else
return "anyLast_respect_nulls";
}
bool allocatesMemoryInArena() const override { return false; }
void addNull(AggregateDataPtr __restrict place) const
{
chassert(returns_nullable_type);
auto & d = this->data(place);
if (First && d.isSet())
return;
d.setNull();
}
void add(AggregateDataPtr __restrict place, const IColumn ** columns, size_t row_num, Arena *) const override
{
if (columns[0]->isNullable())
{
if (columns[0]->isNullAt(row_num))
return addNull(place);
}
auto & d = this->data(place);
if (First && d.isSet())
return;
d.setOther();
columns[0]->get(row_num, d.value);
}
void addManyDefaults(AggregateDataPtr __restrict place, const IColumn ** columns, size_t, Arena * arena) const override
{
if (columns[0]->isNullable())
addNull(place);
else
add(place, columns, 0, arena);
}
void addBatchSinglePlace(
size_t row_begin, size_t row_end, AggregateDataPtr place, const IColumn ** columns, Arena * arena, ssize_t if_argument_pos)
const override
{
if (if_argument_pos >= 0)
{
const auto & flags = assert_cast<const ColumnUInt8 &>(*columns[if_argument_pos]).getData();
size_t size = row_end - row_begin;
for (size_t i = 0; i < size; ++i)
{
size_t pos = First ? row_begin + i : row_end - 1 - i;
if (flags[pos])
{
add(place, columns, pos, arena);
break;
}
}
}
else
{
size_t pos = First ? row_begin : row_end - 1;
add(place, columns, pos, arena);
}
}
void addBatchSinglePlaceNotNull(
size_t, size_t, AggregateDataPtr __restrict, const IColumn **, const UInt8 *, Arena *, ssize_t) const override
{
/// This should not happen since it means somebody else has preprocessed the data (NULLs or IFs) and might
/// have discarded values that we need (NULLs)
throw DB::Exception(ErrorCodes::LOGICAL_ERROR, "AggregateFunctionAnyRespectNulls::addBatchSinglePlaceNotNull called");
}
void merge(AggregateDataPtr __restrict place, ConstAggregateDataPtr rhs, Arena *) const override
{
auto & d = this->data(place);
if (First && d.isSet())
return;
auto & other = this->data(rhs);
if (other.isSet())
{
d.status = other.status;
d.value = other.value;
}
}
void serialize(ConstAggregateDataPtr __restrict place, WriteBuffer & buf, std::optional<size_t> /* version */) const override
{
auto & d = this->data(place);
UInt8 k = d.status;
writeBinaryLittleEndian<UInt8>(k, buf);
if (k == Data::Status::SetOther)
serialization->serializeBinary(d.value, buf, {});
}
void deserialize(AggregateDataPtr place, ReadBuffer & buf, std::optional<size_t> /* version */, Arena *) const override
{
auto & d = this->data(place);
UInt8 k = Data::Status::NotSet;
readBinaryLittleEndian<UInt8>(k, buf);
d.status = static_cast<Data::Status>(k);
if (d.status == Data::Status::NotSet)
return;
else if (d.status == Data::Status::SetNull)
{
if (!returns_nullable_type)
throw Exception(ErrorCodes::INCORRECT_DATA, "Incorrect type (NULL) in non-nullable {}State", getName());
return;
}
else if (d.status == Data::Status::SetOther)
serialization->deserializeBinary(d.value, buf, {});
else
throw Exception(ErrorCodes::INCORRECT_DATA, "Incorrect type ({}) in {}State", static_cast<Int8>(k), getName());
}
void insertResultInto(AggregateDataPtr __restrict place, IColumn & to, Arena *) const override
{
auto & d = this->data(place);
if (d.status == Data::Status::SetOther)
to.insert(d.value);
else
to.insertDefault();
}
AggregateFunctionPtr getOwnNullAdapter(
const AggregateFunctionPtr & original_function,
const DataTypes & /*arguments*/,
const Array & /*params*/,
const AggregateFunctionProperties & /*properties*/) const override
{
return original_function;
}
};
template <bool First>
IAggregateFunction * createAggregateFunctionSingleValueRespectNulls(
const String & name, const DataTypes & argument_types, const Array & parameters, const Settings *)
{
assertNoParameters(name, parameters);
assertUnary(name, argument_types);
return new AggregateFunctionAnyRespectNulls<First>(argument_types[0]);
}
AggregateFunctionPtr createAggregateFunctionAny(const std::string & name, const DataTypes & argument_types, const Array & parameters, const Settings * settings) AggregateFunctionPtr createAggregateFunctionAny(const std::string & name, const DataTypes & argument_types, const Array & parameters, const Settings * settings)
{ {
return AggregateFunctionPtr(createAggregateFunctionSingleValue<AggregateFunctionsSingleValue, AggregateFunctionAnyData>(name, argument_types, parameters, settings)); return AggregateFunctionPtr(createAggregateFunctionSingleValue<AggregateFunctionsSingleValue, AggregateFunctionAnyData>(name, argument_types, parameters, settings));
} }
template <bool RespectNulls = false> AggregateFunctionPtr createAggregateFunctionAnyRespectNulls(
AggregateFunctionPtr createAggregateFunctionNullableAny(
const std::string & name, const DataTypes & argument_types, const Array & parameters, const Settings * settings) const std::string & name, const DataTypes & argument_types, const Array & parameters, const Settings * settings)
{ {
return AggregateFunctionPtr( return AggregateFunctionPtr(createAggregateFunctionSingleValueRespectNulls<true>(name, argument_types, parameters, settings));
createAggregateFunctionSingleNullableValue<AggregateFunctionsSingleValue, AggregateFunctionAnyData, RespectNulls>(
name, argument_types, parameters, settings));
} }
AggregateFunctionPtr createAggregateFunctionAnyLast(const std::string & name, const DataTypes & argument_types, const Array & parameters, const Settings * settings) AggregateFunctionPtr createAggregateFunctionAnyLast(const std::string & name, const DataTypes & argument_types, const Array & parameters, const Settings * settings)
@ -28,13 +215,10 @@ AggregateFunctionPtr createAggregateFunctionAnyLast(const std::string & name, co
return AggregateFunctionPtr(createAggregateFunctionSingleValue<AggregateFunctionsSingleValue, AggregateFunctionAnyLastData>(name, argument_types, parameters, settings)); return AggregateFunctionPtr(createAggregateFunctionSingleValue<AggregateFunctionsSingleValue, AggregateFunctionAnyLastData>(name, argument_types, parameters, settings));
} }
template <bool RespectNulls = false> AggregateFunctionPtr createAggregateFunctionAnyLastRespectNulls(
AggregateFunctionPtr createAggregateFunctionNullableAnyLast(const std::string & name, const DataTypes & argument_types, const Array & parameters, const Settings * settings) const std::string & name, const DataTypes & argument_types, const Array & parameters, const Settings * settings)
{ {
return AggregateFunctionPtr(createAggregateFunctionSingleNullableValue< return AggregateFunctionPtr(createAggregateFunctionSingleValueRespectNulls<false>(name, argument_types, parameters, settings));
AggregateFunctionsSingleValue,
AggregateFunctionAnyLastData,
RespectNulls>(name, argument_types, parameters, settings));
} }
AggregateFunctionPtr createAggregateFunctionAnyHeavy(const std::string & name, const DataTypes & argument_types, const Array & parameters, const Settings * settings) AggregateFunctionPtr createAggregateFunctionAnyHeavy(const std::string & name, const DataTypes & argument_types, const Array & parameters, const Settings * settings)
@ -46,26 +230,28 @@ AggregateFunctionPtr createAggregateFunctionAnyHeavy(const std::string & name, c
void registerAggregateFunctionsAny(AggregateFunctionFactory & factory) void registerAggregateFunctionsAny(AggregateFunctionFactory & factory)
{ {
AggregateFunctionProperties properties = { .returns_default_when_only_null = false, .is_order_dependent = true }; AggregateFunctionProperties default_properties = {.returns_default_when_only_null = false, .is_order_dependent = true};
AggregateFunctionProperties default_properties_for_respect_nulls
= {.returns_default_when_only_null = false, .is_order_dependent = true, .is_window_function = true};
factory.registerFunction("any", { createAggregateFunctionAny, properties }); factory.registerFunction("any", {createAggregateFunctionAny, default_properties});
factory.registerAlias("any_value", "any", AggregateFunctionFactory::CaseInsensitive); factory.registerAlias("any_value", "any", AggregateFunctionFactory::CaseInsensitive);
factory.registerFunction("anyLast", { createAggregateFunctionAnyLast, properties }); factory.registerAlias("first_value", "any", AggregateFunctionFactory::CaseInsensitive);
factory.registerFunction("anyHeavy", { createAggregateFunctionAnyHeavy, properties });
// Synonyms for use as window functions. factory.registerFunction("any_respect_nulls", {createAggregateFunctionAnyRespectNulls, default_properties_for_respect_nulls});
factory.registerFunction("first_value", factory.registerAlias("any_value_respect_nulls", "any_respect_nulls", AggregateFunctionFactory::CaseInsensitive);
{ createAggregateFunctionAny, properties }, factory.registerAlias("first_value_respect_nulls", "any_respect_nulls", AggregateFunctionFactory::CaseInsensitive);
AggregateFunctionFactory::CaseInsensitive);
factory.registerFunction("first_value_respect_nulls", factory.registerFunction("anyLast", {createAggregateFunctionAnyLast, default_properties});
{ createAggregateFunctionNullableAny<true>, properties }, factory.registerAlias("last_value", "anyLast", AggregateFunctionFactory::CaseInsensitive);
AggregateFunctionFactory::CaseInsensitive);
factory.registerFunction("last_value", factory.registerFunction("anyLast_respect_nulls", {createAggregateFunctionAnyLastRespectNulls, default_properties_for_respect_nulls});
{ createAggregateFunctionAnyLast, properties }, factory.registerAlias("last_value_respect_nulls", "anyLast_respect_nulls", AggregateFunctionFactory::CaseInsensitive);
AggregateFunctionFactory::CaseInsensitive);
factory.registerFunction("last_value_respect_nulls", factory.registerFunction("anyHeavy", {createAggregateFunctionAnyHeavy, default_properties});
{ createAggregateFunctionNullableAnyLast<true>, properties },
AggregateFunctionFactory::CaseInsensitive); factory.registerNullsActionTransformation("any", "any_respect_nulls");
factory.registerNullsActionTransformation("anyLast", "anyLast_respect_nulls");
} }
} }

View File

@ -116,7 +116,7 @@ public:
/// Return normalized state type: count() /// Return normalized state type: count()
AggregateFunctionProperties properties; AggregateFunctionProperties properties;
return std::make_shared<DataTypeAggregateFunction>( return std::make_shared<DataTypeAggregateFunction>(
AggregateFunctionFactory::instance().get(getName(), {}, {}, properties), DataTypes{}, Array{}); AggregateFunctionFactory::instance().get(getName(), NullsAction::EMPTY, {}, {}, properties), DataTypes{}, Array{});
} }
void merge(AggregateDataPtr __restrict place, ConstAggregateDataPtr rhs, Arena *) const override void merge(AggregateDataPtr __restrict place, ConstAggregateDataPtr rhs, Arena *) const override
@ -267,7 +267,7 @@ public:
/// Return normalized state type: count() /// Return normalized state type: count()
AggregateFunctionProperties properties; AggregateFunctionProperties properties;
return std::make_shared<DataTypeAggregateFunction>( return std::make_shared<DataTypeAggregateFunction>(
AggregateFunctionFactory::instance().get(getName(), {}, {}, properties), DataTypes{}, Array{}); AggregateFunctionFactory::instance().get(getName(), NullsAction::EMPTY, {}, {}, properties), DataTypes{}, Array{});
} }
void merge(AggregateDataPtr __restrict place, ConstAggregateDataPtr rhs, Arena *) const override void merge(AggregateDataPtr __restrict place, ConstAggregateDataPtr rhs, Arena *) const override

View File

@ -1,23 +1,11 @@
#include <AggregateFunctions/AggregateFunctionFactory.h> #include <AggregateFunctions/AggregateFunctionFactory.h>
#include <AggregateFunctions/Combinators/AggregateFunctionCombinatorFactory.h> #include <AggregateFunctions/Combinators/AggregateFunctionCombinatorFactory.h>
#include <DataTypes/DataTypeAggregateFunction.h>
#include <DataTypes/DataTypeNullable.h>
#include <DataTypes/DataTypesNumber.h>
#include <DataTypes/DataTypeLowCardinality.h> #include <DataTypes/DataTypeLowCardinality.h>
#include <DataTypes/DataTypesNumber.h>
#include <IO/WriteHelpers.h>
#include <Interpreters/Context.h>
#include <Common/StringUtils/StringUtils.h>
#include <Common/typeid_cast.h>
#include <Common/CurrentThread.h>
#include <Poco/String.h>
#include <Functions/FunctionFactory.h> #include <Functions/FunctionFactory.h>
#include <IO/WriteHelpers.h>
#include <Interpreters/Context.h>
static constexpr size_t MAX_AGGREGATE_FUNCTION_NAME_LENGTH = 1000; static constexpr size_t MAX_AGGREGATE_FUNCTION_NAME_LENGTH = 1000;
@ -28,10 +16,11 @@ struct Settings;
namespace ErrorCodes namespace ErrorCodes
{ {
extern const int UNKNOWN_AGGREGATE_FUNCTION;
extern const int LOGICAL_ERROR;
extern const int ILLEGAL_AGGREGATION; extern const int ILLEGAL_AGGREGATION;
extern const int LOGICAL_ERROR;
extern const int NOT_IMPLEMENTED;
extern const int TOO_LARGE_STRING_SIZE; extern const int TOO_LARGE_STRING_SIZE;
extern const int UNKNOWN_AGGREGATE_FUNCTION;
} }
const String & getAggregateFunctionCanonicalNameIfAny(const String & name) const String & getAggregateFunctionCanonicalNameIfAny(const String & name)
@ -59,6 +48,23 @@ void AggregateFunctionFactory::registerFunction(const String & name, Value creat
} }
} }
void AggregateFunctionFactory::registerNullsActionTransformation(const String & source_ignores_nulls, const String & target_respect_nulls)
{
if (!aggregate_functions.contains(source_ignores_nulls))
throw Exception(ErrorCodes::LOGICAL_ERROR, "registerNullsActionTransformation: Source aggregation '{}' not found");
if (!aggregate_functions.contains(target_respect_nulls))
throw Exception(ErrorCodes::LOGICAL_ERROR, "registerNullsActionTransformation: Target aggregation '{}' not found");
if (!respect_nulls.emplace(source_ignores_nulls, target_respect_nulls).second)
throw Exception(
ErrorCodes::LOGICAL_ERROR, "registerNullsActionTransformation: Assignment from '{}' is not unique", source_ignores_nulls);
if (!ignore_nulls.emplace(target_respect_nulls, source_ignores_nulls).second)
throw Exception(
ErrorCodes::LOGICAL_ERROR, "registerNullsActionTransformation: Assignment from '{}' is not unique", target_respect_nulls);
}
static DataTypes convertLowCardinalityTypesToNested(const DataTypes & types) static DataTypes convertLowCardinalityTypesToNested(const DataTypes & types)
{ {
DataTypes res_types; DataTypes res_types;
@ -70,7 +76,11 @@ static DataTypes convertLowCardinalityTypesToNested(const DataTypes & types)
} }
AggregateFunctionPtr AggregateFunctionFactory::get( AggregateFunctionPtr AggregateFunctionFactory::get(
const String & name, const DataTypes & argument_types, const Array & parameters, AggregateFunctionProperties & out_properties) const const String & name,
NullsAction action,
const DataTypes & argument_types,
const Array & parameters,
AggregateFunctionProperties & out_properties) const
{ {
/// This to prevent costly string manipulation in parsing the aggregate function combinators. /// This to prevent costly string manipulation in parsing the aggregate function combinators.
/// Example: avgArrayArrayArrayArray...(1000 times)...Array /// Example: avgArrayArrayArrayArray...(1000 times)...Array
@ -81,8 +91,9 @@ AggregateFunctionPtr AggregateFunctionFactory::get(
/// If one of the types is Nullable, we apply aggregate function combinator "Null" if it's not window function. /// If one of the types is Nullable, we apply aggregate function combinator "Null" if it's not window function.
/// Window functions are not real aggregate functions. Applying combinators doesn't make sense for them, /// Window functions are not real aggregate functions. Applying combinators doesn't make sense for them,
/// they must handle the nullability themselves /// they must handle the nullability themselves.
auto properties = tryGetProperties(name); /// Aggregate functions such as any_value_respect_nulls are considered window functions in that sense
auto properties = tryGetProperties(name, action);
bool is_window_function = properties.has_value() && properties->is_window_function; bool is_window_function = properties.has_value() && properties->is_window_function;
if (!is_window_function && std::any_of(types_without_low_cardinality.begin(), types_without_low_cardinality.end(), if (!is_window_function && std::any_of(types_without_low_cardinality.begin(), types_without_low_cardinality.end(),
[](const auto & type) { return type->isNullable(); })) [](const auto & type) { return type->isNullable(); }))
@ -98,8 +109,7 @@ AggregateFunctionPtr AggregateFunctionFactory::get(
bool has_null_arguments = std::any_of(types_without_low_cardinality.begin(), types_without_low_cardinality.end(), bool has_null_arguments = std::any_of(types_without_low_cardinality.begin(), types_without_low_cardinality.end(),
[](const auto & type) { return type->onlyNull(); }); [](const auto & type) { return type->onlyNull(); });
AggregateFunctionPtr nested_function = getImpl( AggregateFunctionPtr nested_function = getImpl(name, action, nested_types, nested_parameters, out_properties, has_null_arguments);
name, nested_types, nested_parameters, out_properties, has_null_arguments);
// Pure window functions are not real aggregate functions. Applying // Pure window functions are not real aggregate functions. Applying
// combinators doesn't make sense for them, they must handle the // combinators doesn't make sense for them, they must handle the
@ -110,22 +120,54 @@ AggregateFunctionPtr AggregateFunctionFactory::get(
return combinator->transformAggregateFunction(nested_function, out_properties, types_without_low_cardinality, parameters); return combinator->transformAggregateFunction(nested_function, out_properties, types_without_low_cardinality, parameters);
} }
auto with_original_arguments = getImpl(name, types_without_low_cardinality, parameters, out_properties, false); auto with_original_arguments = getImpl(name, action, types_without_low_cardinality, parameters, out_properties, false);
if (!with_original_arguments) if (!with_original_arguments)
throw Exception(ErrorCodes::LOGICAL_ERROR, "Logical error: AggregateFunctionFactory returned nullptr"); throw Exception(ErrorCodes::LOGICAL_ERROR, "Logical error: AggregateFunctionFactory returned nullptr");
return with_original_arguments; return with_original_arguments;
} }
std::optional<AggregateFunctionWithProperties>
AggregateFunctionFactory::getAssociatedFunctionByNullsAction(const String & name, NullsAction action) const
{
if (action == NullsAction::RESPECT_NULLS)
{
if (auto it = respect_nulls.find(name); it == respect_nulls.end())
throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Function {} does not support RESPECT NULLS", name);
else if (auto associated_it = aggregate_functions.find(it->second); associated_it != aggregate_functions.end())
return {associated_it->second};
else
throw Exception(
ErrorCodes::LOGICAL_ERROR, "Unable to find the function {} (equivalent to '{} RESPECT NULLS')", it->second, name);
}
if (action == NullsAction::IGNORE_NULLS)
{
if (auto it = ignore_nulls.find(name); it != ignore_nulls.end())
{
if (auto associated_it = aggregate_functions.find(it->second); associated_it != aggregate_functions.end())
return {associated_it->second};
else
throw Exception(
ErrorCodes::LOGICAL_ERROR, "Unable to find the function {} (equivalent to '{} IGNORE NULLS')", it->second, name);
}
/// We don't throw for IGNORE NULLS of other functions because that's the default in CH
}
return {};
}
AggregateFunctionPtr AggregateFunctionFactory::getImpl( AggregateFunctionPtr AggregateFunctionFactory::getImpl(
const String & name_param, const String & name_param,
NullsAction action,
const DataTypes & argument_types, const DataTypes & argument_types,
const Array & parameters, const Array & parameters,
AggregateFunctionProperties & out_properties, AggregateFunctionProperties & out_properties,
bool has_null_arguments) const bool has_null_arguments) const
{ {
String name = getAliasToOrName(name_param); String name = getAliasToOrName(name_param);
String case_insensitive_name;
bool is_case_insensitive = false; bool is_case_insensitive = false;
Value found; Value found;
@ -135,10 +177,14 @@ AggregateFunctionPtr AggregateFunctionFactory::getImpl(
found = it->second; found = it->second;
} }
if (auto jt = case_insensitive_aggregate_functions.find(Poco::toLower(name)); jt != case_insensitive_aggregate_functions.end()) if (!found.creator)
{ {
found = jt->second; case_insensitive_name = Poco::toLower(name);
is_case_insensitive = true; if (auto jt = case_insensitive_aggregate_functions.find(case_insensitive_name); jt != case_insensitive_aggregate_functions.end())
{
found = jt->second;
is_case_insensitive = true;
}
} }
ContextPtr query_context; ContextPtr query_context;
@ -147,11 +193,14 @@ AggregateFunctionPtr AggregateFunctionFactory::getImpl(
if (found.creator) if (found.creator)
{ {
out_properties = found.properties; auto opt = getAssociatedFunctionByNullsAction(is_case_insensitive ? case_insensitive_name : name, action);
if (opt)
found = *opt;
out_properties = found.properties;
if (query_context && query_context->getSettingsRef().log_queries) if (query_context && query_context->getSettingsRef().log_queries)
query_context->addQueryFactoriesInfo( query_context->addQueryFactoriesInfo(
Context::QueryLogFactories::AggregateFunction, is_case_insensitive ? Poco::toLower(name) : name); Context::QueryLogFactories::AggregateFunction, is_case_insensitive ? case_insensitive_name : name);
/// The case when aggregate function should return NULL on NULL arguments. This case is handled in "get" method. /// The case when aggregate function should return NULL on NULL arguments. This case is handled in "get" method.
if (!out_properties.returns_default_when_only_null && has_null_arguments) if (!out_properties.returns_default_when_only_null && has_null_arguments)
@ -196,7 +245,7 @@ AggregateFunctionPtr AggregateFunctionFactory::getImpl(
DataTypes nested_types = combinator->transformArguments(argument_types); DataTypes nested_types = combinator->transformArguments(argument_types);
Array nested_parameters = combinator->transformParameters(parameters); Array nested_parameters = combinator->transformParameters(parameters);
AggregateFunctionPtr nested_function = get(nested_name, nested_types, nested_parameters, out_properties); AggregateFunctionPtr nested_function = get(nested_name, action, nested_types, nested_parameters, out_properties);
return combinator->transformAggregateFunction(nested_function, out_properties, argument_types, parameters); return combinator->transformAggregateFunction(nested_function, out_properties, argument_types, parameters);
} }
@ -213,16 +262,7 @@ AggregateFunctionPtr AggregateFunctionFactory::getImpl(
throw Exception(ErrorCodes::UNKNOWN_AGGREGATE_FUNCTION, "Unknown aggregate function {}{}", name, extra_info); throw Exception(ErrorCodes::UNKNOWN_AGGREGATE_FUNCTION, "Unknown aggregate function {}{}", name, extra_info);
} }
std::optional<AggregateFunctionProperties> AggregateFunctionFactory::tryGetProperties(String name, NullsAction action) const
AggregateFunctionPtr AggregateFunctionFactory::tryGet(
const String & name, const DataTypes & argument_types, const Array & parameters, AggregateFunctionProperties & out_properties) const
{
return isAggregateFunctionName(name)
? get(name, argument_types, parameters, out_properties)
: nullptr;
}
std::optional<AggregateFunctionProperties> AggregateFunctionFactory::tryGetProperties(String name) const
{ {
if (name.size() > MAX_AGGREGATE_FUNCTION_NAME_LENGTH) if (name.size() > MAX_AGGREGATE_FUNCTION_NAME_LENGTH)
throw Exception(ErrorCodes::TOO_LARGE_STRING_SIZE, "Too long name of aggregate function, maximum: {}", MAX_AGGREGATE_FUNCTION_NAME_LENGTH); throw Exception(ErrorCodes::TOO_LARGE_STRING_SIZE, "Too long name of aggregate function, maximum: {}", MAX_AGGREGATE_FUNCTION_NAME_LENGTH);
@ -231,6 +271,8 @@ std::optional<AggregateFunctionProperties> AggregateFunctionFactory::tryGetPrope
{ {
name = getAliasToOrName(name); name = getAliasToOrName(name);
Value found; Value found;
String lower_case_name;
bool is_case_insensitive = false;
/// Find by exact match. /// Find by exact match.
if (auto it = aggregate_functions.find(name); it != aggregate_functions.end()) if (auto it = aggregate_functions.find(name); it != aggregate_functions.end())
@ -238,11 +280,23 @@ std::optional<AggregateFunctionProperties> AggregateFunctionFactory::tryGetPrope
found = it->second; found = it->second;
} }
if (auto jt = case_insensitive_aggregate_functions.find(Poco::toLower(name)); jt != case_insensitive_aggregate_functions.end()) if (!found.creator)
found = jt->second; {
lower_case_name = Poco::toLower(name);
if (auto jt = case_insensitive_aggregate_functions.find(lower_case_name); jt != case_insensitive_aggregate_functions.end())
{
is_case_insensitive = true;
found = jt->second;
}
}
if (found.creator) if (found.creator)
{
auto opt = getAssociatedFunctionByNullsAction(is_case_insensitive ? lower_case_name : name, action);
if (opt)
return opt->properties;
return found.properties; return found.properties;
}
/// Combinators of aggregate functions. /// Combinators of aggregate functions.
/// For every aggregate function 'agg' and combiner '-Comb' there is a combined aggregate function with the name 'aggComb', /// For every aggregate function 'agg' and combiner '-Comb' there is a combined aggregate function with the name 'aggComb',
@ -262,27 +316,29 @@ std::optional<AggregateFunctionProperties> AggregateFunctionFactory::tryGetPrope
} }
bool AggregateFunctionFactory::isAggregateFunctionName(String name) const bool AggregateFunctionFactory::isAggregateFunctionName(const String & name_) const
{ {
if (name.size() > MAX_AGGREGATE_FUNCTION_NAME_LENGTH) if (name_.size() > MAX_AGGREGATE_FUNCTION_NAME_LENGTH)
throw Exception(ErrorCodes::TOO_LARGE_STRING_SIZE, "Too long name of aggregate function, maximum: {}", MAX_AGGREGATE_FUNCTION_NAME_LENGTH); throw Exception(ErrorCodes::TOO_LARGE_STRING_SIZE, "Too long name of aggregate function, maximum: {}", MAX_AGGREGATE_FUNCTION_NAME_LENGTH);
while (true) if (aggregate_functions.contains(name_) || isAlias(name_))
return true;
String name_lowercase = Poco::toLower(name_);
if (case_insensitive_aggregate_functions.contains(name_lowercase) || isAlias(name_lowercase))
return true;
String name = name_;
while (AggregateFunctionCombinatorPtr combinator = AggregateFunctionCombinatorFactory::instance().tryFindSuffix(name))
{ {
if (aggregate_functions.contains(name) || isAlias(name)) name = name.substr(0, name.size() - combinator->getName().size());
return true; name_lowercase = name_lowercase.substr(0, name_lowercase.size() - combinator->getName().size());
String name_lowercase = Poco::toLower(name); if (aggregate_functions.contains(name) || isAlias(name) || case_insensitive_aggregate_functions.contains(name_lowercase)
if (case_insensitive_aggregate_functions.contains(name_lowercase) || isAlias(name_lowercase)) || isAlias(name_lowercase))
return true; return true;
if (AggregateFunctionCombinatorPtr combinator = AggregateFunctionCombinatorFactory::instance().tryFindSuffix(name))
{
name = name.substr(0, name.size() - combinator->getName().size());
}
else
return false;
} }
return false;
} }
AggregateFunctionFactory & AggregateFunctionFactory::instance() AggregateFunctionFactory & AggregateFunctionFactory::instance()

View File

@ -1,9 +1,9 @@
#pragma once #pragma once
#include <AggregateFunctions/IAggregateFunction.h> #include <AggregateFunctions/IAggregateFunction.h>
#include <Common/IFactoryWithAliases.h>
#include <Parsers/ASTFunction.h> #include <Parsers/ASTFunction.h>
#include <Parsers/NullsAction.h>
#include <Common/IFactoryWithAliases.h>
#include <functional> #include <functional>
#include <memory> #include <memory>
@ -62,36 +62,44 @@ public:
Value creator, Value creator,
CaseSensitiveness case_sensitiveness = CaseSensitive); CaseSensitiveness case_sensitiveness = CaseSensitive);
/// Register how to transform from one aggregate function to other based on NullsAction
/// Registers them both ways:
/// SOURCE + RESPECT NULLS will be transformed to TARGET
/// TARGET + IGNORE NULLS will be transformed to SOURCE
void registerNullsActionTransformation(const String & source_ignores_nulls, const String & target_respect_nulls);
/// Throws an exception if not found. /// Throws an exception if not found.
AggregateFunctionPtr AggregateFunctionPtr
get(const String & name, get(const String & name,
const DataTypes & argument_types, NullsAction action,
const Array & parameters,
AggregateFunctionProperties & out_properties) const;
/// Returns nullptr if not found.
AggregateFunctionPtr tryGet(
const String & name,
const DataTypes & argument_types, const DataTypes & argument_types,
const Array & parameters, const Array & parameters,
AggregateFunctionProperties & out_properties) const; AggregateFunctionProperties & out_properties) const;
/// Get properties if the aggregate function exists. /// Get properties if the aggregate function exists.
std::optional<AggregateFunctionProperties> tryGetProperties(String name) const; std::optional<AggregateFunctionProperties> tryGetProperties(String name, NullsAction action) const;
bool isAggregateFunctionName(String name) const; bool isAggregateFunctionName(const String & name) const;
private: private:
AggregateFunctionPtr getImpl( AggregateFunctionPtr getImpl(
const String & name, const String & name,
NullsAction action,
const DataTypes & argument_types, const DataTypes & argument_types,
const Array & parameters, const Array & parameters,
AggregateFunctionProperties & out_properties, AggregateFunctionProperties & out_properties,
bool has_null_arguments) const; bool has_null_arguments) const;
using AggregateFunctions = std::unordered_map<String, Value>; using AggregateFunctions = std::unordered_map<String, Value>;
using ActionMap = std::unordered_map<String, String>;
AggregateFunctions aggregate_functions; AggregateFunctions aggregate_functions;
/// Mapping from functions with `RESPECT NULLS` modifier to actual aggregate function names
/// Example: `any(x) RESPECT NULLS` should be executed as function `any_respect_nulls`
ActionMap respect_nulls;
/// Same as above for `IGNORE NULLS` modifier
ActionMap ignore_nulls;
std::optional<AggregateFunctionWithProperties> getAssociatedFunctionByNullsAction(const String & name, NullsAction action) const;
/// Case insensitive aggregate functions will be additionally added here with lowercased name. /// Case insensitive aggregate functions will be additionally added here with lowercased name.
AggregateFunctions case_insensitive_aggregate_functions; AggregateFunctions case_insensitive_aggregate_functions;

View File

@ -771,26 +771,18 @@ static_assert(
/// For any other value types. /// For any other value types.
template <bool RESULT_IS_NULLABLE = false>
struct SingleValueDataGeneric struct SingleValueDataGeneric
{ {
private: private:
using Self = SingleValueDataGeneric; using Self = SingleValueDataGeneric;
Field value; Field value;
bool has_value = false;
public: public:
static constexpr bool result_is_nullable = RESULT_IS_NULLABLE; static constexpr bool result_is_nullable = false;
static constexpr bool should_skip_null_arguments = !RESULT_IS_NULLABLE; static constexpr bool should_skip_null_arguments = true;
static constexpr bool is_any = false; static constexpr bool is_any = false;
bool has() const bool has() const { return !value.isNull(); }
{
if constexpr (result_is_nullable)
return has_value;
return !value.isNull();
}
void insertResultInto(IColumn & to) const void insertResultInto(IColumn & to) const
{ {
@ -820,19 +812,9 @@ public:
serialization.deserializeBinary(value, buf, {}); serialization.deserializeBinary(value, buf, {});
} }
void change(const IColumn & column, size_t row_num, Arena *) void change(const IColumn & column, size_t row_num, Arena *) { column.get(row_num, value); }
{
column.get(row_num, value);
if constexpr (result_is_nullable)
has_value = true;
}
void change(const Self & to, Arena *) void change(const Self & to, Arena *) { value = to.value; }
{
value = to.value;
if constexpr (result_is_nullable)
has_value = true;
}
bool changeFirstTime(const IColumn & column, size_t row_num, Arena * arena) bool changeFirstTime(const IColumn & column, size_t row_num, Arena * arena)
{ {
@ -847,7 +829,7 @@ public:
bool changeFirstTime(const Self & to, Arena * arena) bool changeFirstTime(const Self & to, Arena * arena)
{ {
if (!has() && (result_is_nullable || to.has())) if (!has() && to.has())
{ {
change(to, arena); change(to, arena);
return true; return true;
@ -882,30 +864,15 @@ public:
} }
else else
{ {
if constexpr (result_is_nullable) Field new_value;
column.get(row_num, new_value);
if (new_value < value)
{ {
Field new_value; value = new_value;
column.get(row_num, new_value); return true;
if (!value.isNull() && (new_value.isNull() || new_value < value))
{
value = new_value;
return true;
}
else
return false;
} }
else else
{ return false;
Field new_value;
column.get(row_num, new_value);
if (new_value < value)
{
value = new_value;
return true;
}
else
return false;
}
} }
} }
@ -913,30 +880,13 @@ public:
{ {
if (!to.has()) if (!to.has())
return false; return false;
if constexpr (result_is_nullable) if (!has() || to.value < value)
{ {
if (!has()) change(to, arena);
{ return true;
change(to, arena);
return true;
}
if (to.value.isNull() || (!value.isNull() && to.value < value))
{
value = to.value;
return true;
}
return false;
} }
else else
{ return false;
if (!has() || to.value < value)
{
change(to, arena);
return true;
}
else
return false;
}
} }
bool changeIfGreater(const IColumn & column, size_t row_num, Arena * arena) bool changeIfGreater(const IColumn & column, size_t row_num, Arena * arena)
@ -948,29 +898,15 @@ public:
} }
else else
{ {
if constexpr (result_is_nullable) Field new_value;
column.get(row_num, new_value);
if (new_value > value)
{ {
Field new_value; value = new_value;
column.get(row_num, new_value); return true;
if (!value.isNull() && (new_value.isNull() || value < new_value))
{
value = new_value;
return true;
}
return false;
} }
else else
{ return false;
Field new_value;
column.get(row_num, new_value);
if (new_value > value)
{
value = new_value;
return true;
}
else
return false;
}
} }
} }
@ -978,36 +914,18 @@ public:
{ {
if (!to.has()) if (!to.has())
return false; return false;
if constexpr (result_is_nullable) if (!has() || to.value > value)
{ {
if (!value.isNull() && (to.value.isNull() || value < to.value)) change(to, arena);
{ return true;
value = to.value;
return true;
}
return false;
} }
else else
{ return false;
if (!has() || to.value > value)
{
change(to, arena);
return true;
}
else
return false;
}
} }
bool isEqualTo(const IColumn & column, size_t row_num) const bool isEqualTo(const IColumn & column, size_t row_num) const { return has() && value == column[row_num]; }
{
return has() && value == column[row_num];
}
bool isEqualTo(const Self & to) const bool isEqualTo(const Self & to) const { return has() && to.value == value; }
{
return has() && to.value == value;
}
static bool allocatesMemoryInArena() static bool allocatesMemoryInArena()
{ {

View File

@ -150,7 +150,7 @@ public:
AggregateFunctionProperties properties; AggregateFunctionProperties properties;
return std::make_shared<DataTypeAggregateFunction>( return std::make_shared<DataTypeAggregateFunction>(
AggregateFunctionFactory::instance().get( AggregateFunctionFactory::instance().get(
GatherFunctionQuantileData::toFusedNameOrSelf(getName()), this->argument_types, params, properties), GatherFunctionQuantileData::toFusedNameOrSelf(getName()), NullsAction::EMPTY, this->argument_types, params, properties),
this->argument_types, this->argument_types,
params); params);
} }

View File

@ -20,7 +20,7 @@ template <template <typename> class Data>
class AggregateFunctionCombinatorArgMinMax final : public IAggregateFunctionCombinator class AggregateFunctionCombinatorArgMinMax final : public IAggregateFunctionCombinator
{ {
public: public:
String getName() const override { return Data<SingleValueDataGeneric<>>::name(); } String getName() const override { return Data<SingleValueDataGeneric>::name(); }
DataTypes transformArguments(const DataTypes & arguments) const override DataTypes transformArguments(const DataTypes & arguments) const override
{ {
@ -66,7 +66,7 @@ public:
if (which.idx == TypeIndex::String) if (which.idx == TypeIndex::String)
return std::make_shared<AggregateFunctionArgMinMax<Data<SingleValueDataString>>>(nested_function, arguments, params); return std::make_shared<AggregateFunctionArgMinMax<Data<SingleValueDataString>>>(nested_function, arguments, params);
return std::make_shared<AggregateFunctionArgMinMax<Data<SingleValueDataGeneric<>>>>(nested_function, arguments, params); return std::make_shared<AggregateFunctionArgMinMax<Data<SingleValueDataGeneric>>>(nested_function, arguments, params);
} }
}; };

View File

@ -33,6 +33,8 @@ class AggregateFunctionIf final : public IAggregateFunctionHelper<AggregateFunct
private: private:
AggregateFunctionPtr nested_func; AggregateFunctionPtr nested_func;
size_t num_arguments; size_t num_arguments;
/// We accept Nullable(Nothing) as condition, but callees always expect UInt8 so we need to avoid calling them
bool only_null_condition = false;
public: public:
AggregateFunctionIf(AggregateFunctionPtr nested, const DataTypes & types, const Array & params_) AggregateFunctionIf(AggregateFunctionPtr nested, const DataTypes & types, const Array & params_)
@ -42,7 +44,9 @@ public:
if (num_arguments == 0) if (num_arguments == 0)
throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, "Aggregate function {} require at least one argument", getName()); throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, "Aggregate function {} require at least one argument", getName());
if (!isUInt8(types.back()) && !types.back()->onlyNull()) only_null_condition = types.back()->onlyNull();
if (!isUInt8(types.back()) && !only_null_condition)
throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Last argument for aggregate function {} must be UInt8", getName()); throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Last argument for aggregate function {} must be UInt8", getName());
} }
@ -108,6 +112,8 @@ public:
void add(AggregateDataPtr __restrict place, const IColumn ** columns, size_t row_num, Arena * arena) const override void add(AggregateDataPtr __restrict place, const IColumn ** columns, size_t row_num, Arena * arena) const override
{ {
if (only_null_condition)
return;
if (assert_cast<const ColumnUInt8 &>(*columns[num_arguments - 1]).getData()[row_num]) if (assert_cast<const ColumnUInt8 &>(*columns[num_arguments - 1]).getData()[row_num])
nested_func->add(place, columns, row_num, arena); nested_func->add(place, columns, row_num, arena);
} }
@ -121,6 +127,8 @@ public:
Arena * arena, Arena * arena,
ssize_t) const override ssize_t) const override
{ {
if (only_null_condition)
return;
nested_func->addBatch(row_begin, row_end, places, place_offset, columns, arena, num_arguments - 1); nested_func->addBatch(row_begin, row_end, places, place_offset, columns, arena, num_arguments - 1);
} }
@ -132,6 +140,8 @@ public:
Arena * arena, Arena * arena,
ssize_t) const override ssize_t) const override
{ {
if (only_null_condition)
return;
nested_func->addBatchSinglePlace(row_begin, row_end, place, columns, arena, num_arguments - 1); nested_func->addBatchSinglePlace(row_begin, row_end, place, columns, arena, num_arguments - 1);
} }
@ -144,6 +154,8 @@ public:
Arena * arena, Arena * arena,
ssize_t) const override ssize_t) const override
{ {
if (only_null_condition)
return;
nested_func->addBatchSinglePlaceNotNull(row_begin, row_end, place, columns, null_map, arena, num_arguments - 1); nested_func->addBatchSinglePlaceNotNull(row_begin, row_end, place, columns, null_map, arena, num_arguments - 1);
} }

View File

@ -447,7 +447,8 @@ public:
{ {
AggregateFunctionProperties out_properties; AggregateFunctionProperties out_properties;
auto & aggr_func_factory = AggregateFunctionFactory::instance(); auto & aggr_func_factory = AggregateFunctionFactory::instance();
return aggr_func_factory.get(nested_func_name + "MappedArrays", arguments, params, out_properties); auto action = NullsAction::EMPTY;
return aggr_func_factory.get(nested_func_name + "MappedArrays", action, arguments, params, out_properties);
} }
else else
throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Aggregation '{}Map' is not implemented for mapped arrays", throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Aggregation '{}Map' is not implemented for mapped arrays",

View File

@ -35,8 +35,8 @@ public:
auto storage_type_out = DataTypeFactory::instance().get(nested_->getResultType()->getName()); auto storage_type_out = DataTypeFactory::instance().get(nested_->getResultType()->getName());
// Need to make a new function with promoted argument types because SimpleAggregates requires arg_type = return_type. // Need to make a new function with promoted argument types because SimpleAggregates requires arg_type = return_type.
AggregateFunctionProperties properties; AggregateFunctionProperties properties;
auto function auto function = AggregateFunctionFactory::instance().get(
= AggregateFunctionFactory::instance().get(nested_->getName(), {storage_type_out}, nested_->getParameters(), properties); nested_->getName(), NullsAction::EMPTY, {storage_type_out}, nested_->getParameters(), properties);
// Need to make a clone because it'll be customized. // Need to make a clone because it'll be customized.
auto storage_type_arg = DataTypeFactory::instance().get(nested_->getResultType()->getName()); auto storage_type_arg = DataTypeFactory::instance().get(nested_->getResultType()->getName());

View File

@ -14,8 +14,9 @@ namespace DB
struct Settings; struct Settings;
/// min, max, any, anyLast, anyHeavy, etc... /// min, max, any, anyLast, anyHeavy, etc...
template <template <typename> class AggregateFunctionTemplate, template <typename> class Data> template <template <typename> class AggregateFunctionTemplate, template <typename, bool...> class Data>
static IAggregateFunction * createAggregateFunctionSingleValue(const String & name, const DataTypes & argument_types, const Array & parameters, const Settings *) static IAggregateFunction *
createAggregateFunctionSingleValue(const String & name, const DataTypes & argument_types, const Array & parameters, const Settings *)
{ {
assertNoParameters(name, parameters); assertNoParameters(name, parameters);
assertUnary(name, argument_types); assertUnary(name, argument_types);
@ -44,31 +45,9 @@ static IAggregateFunction * createAggregateFunctionSingleValue(const String & na
if (which.idx == TypeIndex::String) if (which.idx == TypeIndex::String)
return new AggregateFunctionTemplate<Data<SingleValueDataString>>(argument_type); return new AggregateFunctionTemplate<Data<SingleValueDataString>>(argument_type);
return new AggregateFunctionTemplate<Data<SingleValueDataGeneric<>>>(argument_type); return new AggregateFunctionTemplate<Data<SingleValueDataGeneric>>(argument_type);
} }
template <template <typename> class AggregateFunctionTemplate, template <typename> class Data, bool RespectNulls = false>
static IAggregateFunction * createAggregateFunctionSingleNullableValue(const String & name, const DataTypes & argument_types, const Array & parameters, const Settings * settings)
{
assertNoParameters(name, parameters);
assertUnary(name, argument_types);
const DataTypePtr & argument_type = argument_types[0];
WhichDataType which(argument_type);
// If the result value could be null (excluding the case that no row is matched),
// use SingleValueDataGeneric.
if constexpr (!RespectNulls)
{
return createAggregateFunctionSingleValue<AggregateFunctionTemplate, Data>(name, argument_types, Array(), settings);
}
else
{
return new AggregateFunctionTemplate<Data<SingleValueDataGeneric<true>>>(argument_type);
}
UNREACHABLE();
}
/// argMin, argMax /// argMin, argMax
template <template <typename> class MinMaxData, typename ResData> template <template <typename> class MinMaxData, typename ResData>
static IAggregateFunction * createAggregateFunctionArgMinMaxSecond(const DataTypePtr & res_type, const DataTypePtr & val_type) static IAggregateFunction * createAggregateFunctionArgMinMaxSecond(const DataTypePtr & res_type, const DataTypePtr & val_type)
@ -98,7 +77,7 @@ static IAggregateFunction * createAggregateFunctionArgMinMaxSecond(const DataTyp
if (which.idx == TypeIndex::String) if (which.idx == TypeIndex::String)
return new AggregateFunctionArgMinMax<AggregateFunctionArgMinMaxData<ResData, MinMaxData<SingleValueDataString>>>(res_type, val_type); return new AggregateFunctionArgMinMax<AggregateFunctionArgMinMaxData<ResData, MinMaxData<SingleValueDataString>>>(res_type, val_type);
return new AggregateFunctionArgMinMax<AggregateFunctionArgMinMaxData<ResData, MinMaxData<SingleValueDataGeneric<>>>>(res_type, val_type); return new AggregateFunctionArgMinMax<AggregateFunctionArgMinMaxData<ResData, MinMaxData<SingleValueDataGeneric>>>(res_type, val_type);
} }
template <template <typename> class MinMaxData> template <template <typename> class MinMaxData>
@ -134,7 +113,7 @@ static IAggregateFunction * createAggregateFunctionArgMinMax(const String & name
if (which.idx == TypeIndex::String) if (which.idx == TypeIndex::String)
return createAggregateFunctionArgMinMaxSecond<MinMaxData, SingleValueDataString>(res_type, val_type); return createAggregateFunctionArgMinMaxSecond<MinMaxData, SingleValueDataString>(res_type, val_type);
return createAggregateFunctionArgMinMaxSecond<MinMaxData, SingleValueDataGeneric<>>(res_type, val_type); return createAggregateFunctionArgMinMaxSecond<MinMaxData, SingleValueDataGeneric>(res_type, val_type);
} }
} }

View File

@ -113,6 +113,11 @@ void FunctionNode::dumpTreeImpl(WriteBuffer & buffer, FormatState & format_state
buffer << ", function_type: " << function_type; buffer << ", function_type: " << function_type;
if (nulls_action == NullsAction::RESPECT_NULLS)
buffer << ", nulls_action : RESPECT_NULLS";
else if (nulls_action == NullsAction::IGNORE_NULLS)
buffer << ", nulls_action : IGNORE_NULLS";
if (function) if (function)
buffer << ", result_type: " + getResultType()->getName(); buffer << ", result_type: " + getResultType()->getName();
@ -140,10 +145,9 @@ void FunctionNode::dumpTreeImpl(WriteBuffer & buffer, FormatState & format_state
bool FunctionNode::isEqualImpl(const IQueryTreeNode & rhs) const bool FunctionNode::isEqualImpl(const IQueryTreeNode & rhs) const
{ {
const auto & rhs_typed = assert_cast<const FunctionNode &>(rhs); const auto & rhs_typed = assert_cast<const FunctionNode &>(rhs);
if (function_name != rhs_typed.function_name || if (function_name != rhs_typed.function_name || isAggregateFunction() != rhs_typed.isAggregateFunction()
isAggregateFunction() != rhs_typed.isAggregateFunction() || || isOrdinaryFunction() != rhs_typed.isOrdinaryFunction() || isWindowFunction() != rhs_typed.isWindowFunction()
isOrdinaryFunction() != rhs_typed.isOrdinaryFunction() || || nulls_action != rhs_typed.nulls_action)
isWindowFunction() != rhs_typed.isWindowFunction())
return false; return false;
if (isResolved() != rhs_typed.isResolved()) if (isResolved() != rhs_typed.isResolved())
@ -171,6 +175,7 @@ void FunctionNode::updateTreeHashImpl(HashState & hash_state) const
hash_state.update(isOrdinaryFunction()); hash_state.update(isOrdinaryFunction());
hash_state.update(isAggregateFunction()); hash_state.update(isAggregateFunction());
hash_state.update(isWindowFunction()); hash_state.update(isWindowFunction());
hash_state.update(nulls_action);
if (!isResolved()) if (!isResolved())
return; return;
@ -192,6 +197,7 @@ QueryTreeNodePtr FunctionNode::cloneImpl() const
*/ */
result_function->function = function; result_function->function = function;
result_function->kind = kind; result_function->kind = kind;
result_function->nulls_action = nulls_action;
result_function->wrap_with_nullable = wrap_with_nullable; result_function->wrap_with_nullable = wrap_with_nullable;
return result_function; return result_function;
@ -202,6 +208,7 @@ ASTPtr FunctionNode::toASTImpl(const ConvertToASTOptions & options) const
auto function_ast = std::make_shared<ASTFunction>(); auto function_ast = std::make_shared<ASTFunction>();
function_ast->name = function_name; function_ast->name = function_name;
function_ast->nulls_action = nulls_action;
if (function_name == "nothing") if (function_name == "nothing")
{ {

View File

@ -5,11 +5,12 @@
#include <Analyzer/ConstantValue.h> #include <Analyzer/ConstantValue.h>
#include <Analyzer/IQueryTreeNode.h> #include <Analyzer/IQueryTreeNode.h>
#include <Analyzer/ListNode.h> #include <Analyzer/ListNode.h>
#include <Common/typeid_cast.h>
#include <Core/ColumnsWithTypeAndName.h> #include <Core/ColumnsWithTypeAndName.h>
#include <Core/IResolvedFunction.h> #include <Core/IResolvedFunction.h>
#include <DataTypes/DataTypeNullable.h> #include <DataTypes/DataTypeNullable.h>
#include <Functions/IFunction.h> #include <Functions/IFunction.h>
#include <Parsers/NullsAction.h>
#include <Common/typeid_cast.h>
namespace DB namespace DB
{ {
@ -63,6 +64,10 @@ public:
/// Get function name /// Get function name
const String & getFunctionName() const { return function_name; } const String & getFunctionName() const { return function_name; }
/// Get NullAction modifier
NullsAction getNullsAction() const { return nulls_action; }
void setNullsAction(NullsAction action) { nulls_action = action; }
/// Get parameters /// Get parameters
const ListNode & getParameters() const { return children[parameters_child_index]->as<const ListNode &>(); } const ListNode & getParameters() const { return children[parameters_child_index]->as<const ListNode &>(); }
@ -214,6 +219,7 @@ protected:
private: private:
String function_name; String function_name;
FunctionKind kind = FunctionKind::UNKNOWN; FunctionKind kind = FunctionKind::UNKNOWN;
NullsAction nulls_action = NullsAction::EMPTY;
IResolvedFunctionPtr function; IResolvedFunctionPtr function;
bool wrap_with_nullable = false; bool wrap_with_nullable = false;

View File

@ -184,10 +184,9 @@ private:
auto function_aggregate_function = function_node.getAggregateFunction(); auto function_aggregate_function = function_node.getAggregateFunction();
AggregateFunctionProperties properties; AggregateFunctionProperties properties;
auto aggregate_function = AggregateFunctionFactory::instance().get(aggregate_function_name, auto action = NullsAction::EMPTY;
{ argument->getResultType() }, auto aggregate_function = AggregateFunctionFactory::instance().get(
function_aggregate_function->getParameters(), aggregate_function_name, action, {argument->getResultType()}, function_aggregate_function->getParameters(), properties);
properties);
function_node.resolveAsAggregateFunction(std::move(aggregate_function)); function_node.resolveAsAggregateFunction(std::move(aggregate_function));
} }

View File

@ -76,7 +76,8 @@ public:
/// Replace `countDistinct` of initial query into `count` /// Replace `countDistinct` of initial query into `count`
auto result_type = function_node->getResultType(); auto result_type = function_node->getResultType();
AggregateFunctionProperties properties; AggregateFunctionProperties properties;
auto aggregate_function = AggregateFunctionFactory::instance().get("count", {}, {}, properties); auto action = NullsAction::EMPTY;
auto aggregate_function = AggregateFunctionFactory::instance().get("count", action, {}, {}, properties);
function_node->resolveAsAggregateFunction(std::move(aggregate_function)); function_node->resolveAsAggregateFunction(std::move(aggregate_function));
function_node->getArguments().getNodes().clear(); function_node->getArguments().getNodes().clear();
} }

View File

@ -78,9 +78,11 @@ QueryTreeNodePtr createResolvedFunction(const ContextPtr & context, const String
return function_node; return function_node;
} }
FunctionNodePtr createResolvedAggregateFunction(const String & name, const QueryTreeNodePtr & argument, const Array & parameters = {}) FunctionNodePtr createResolvedAggregateFunction(
const String & name, const QueryTreeNodePtr & argument, const Array & parameters = {}, NullsAction action = NullsAction::EMPTY)
{ {
auto function_node = std::make_shared<FunctionNode>(name); auto function_node = std::make_shared<FunctionNode>(name);
function_node->setNullsAction(action);
if (!parameters.empty()) if (!parameters.empty())
{ {
@ -92,11 +94,7 @@ FunctionNodePtr createResolvedAggregateFunction(const String & name, const Query
function_node->getArguments().getNodes() = { argument }; function_node->getArguments().getNodes() = { argument };
AggregateFunctionProperties properties; AggregateFunctionProperties properties;
auto aggregate_function = AggregateFunctionFactory::instance().get( auto aggregate_function = AggregateFunctionFactory::instance().get(name, action, {argument->getResultType()}, parameters, properties);
name,
{ argument->getResultType() },
parameters,
properties);
function_node->resolveAsAggregateFunction(std::move(aggregate_function)); function_node->resolveAsAggregateFunction(std::move(aggregate_function));
return function_node; return function_node;

View File

@ -56,7 +56,7 @@ private:
static inline void resolveAsCountAggregateFunction(FunctionNode & function_node) static inline void resolveAsCountAggregateFunction(FunctionNode & function_node)
{ {
AggregateFunctionProperties properties; AggregateFunctionProperties properties;
auto aggregate_function = AggregateFunctionFactory::instance().get("count", {}, {}, properties); auto aggregate_function = AggregateFunctionFactory::instance().get("count", NullsAction::EMPTY, {}, {}, properties);
function_node.resolveAsAggregateFunction(std::move(aggregate_function)); function_node.resolveAsAggregateFunction(std::move(aggregate_function));
} }

View File

@ -118,6 +118,7 @@ namespace ErrorCodes
extern const int ILLEGAL_COLUMN; extern const int ILLEGAL_COLUMN;
extern const int NUMBER_OF_COLUMNS_DOESNT_MATCH; extern const int NUMBER_OF_COLUMNS_DOESNT_MATCH;
extern const int FUNCTION_CANNOT_HAVE_PARAMETERS; extern const int FUNCTION_CANNOT_HAVE_PARAMETERS;
extern const int SYNTAX_ERROR;
} }
/** Query analyzer implementation overview. Please check documentation in QueryAnalysisPass.h first. /** Query analyzer implementation overview. Please check documentation in QueryAnalysisPass.h first.
@ -1208,7 +1209,8 @@ private:
static void expandGroupByAll(QueryNode & query_tree_node_typed); static void expandGroupByAll(QueryNode & query_tree_node_typed);
static std::string rewriteAggregateFunctionNameIfNeeded(const std::string & aggregate_function_name, const ContextPtr & context); static std::string
rewriteAggregateFunctionNameIfNeeded(const std::string & aggregate_function_name, NullsAction action, const ContextPtr & context);
static std::optional<JoinTableSide> getColumnSideFromJoinTree(const QueryTreeNodePtr & resolved_identifier, const JoinNode & join_node) static std::optional<JoinTableSide> getColumnSideFromJoinTree(const QueryTreeNodePtr & resolved_identifier, const JoinNode & join_node)
{ {
@ -2310,7 +2312,8 @@ void QueryAnalyzer::expandGroupByAll(QueryNode & query_tree_node_typed)
recursivelyCollectMaxOrdinaryExpressions(node, group_by_nodes); recursivelyCollectMaxOrdinaryExpressions(node, group_by_nodes);
} }
std::string QueryAnalyzer::rewriteAggregateFunctionNameIfNeeded(const std::string & aggregate_function_name, const ContextPtr & context) std::string QueryAnalyzer::rewriteAggregateFunctionNameIfNeeded(
const std::string & aggregate_function_name, NullsAction action, const ContextPtr & context)
{ {
std::string result_aggregate_function_name = aggregate_function_name; std::string result_aggregate_function_name = aggregate_function_name;
auto aggregate_function_name_lowercase = Poco::toLower(aggregate_function_name); auto aggregate_function_name_lowercase = Poco::toLower(aggregate_function_name);
@ -2337,7 +2340,7 @@ std::string QueryAnalyzer::rewriteAggregateFunctionNameIfNeeded(const std::strin
bool need_add_or_null = settings.aggregate_functions_null_for_empty && !result_aggregate_function_name.ends_with("OrNull"); bool need_add_or_null = settings.aggregate_functions_null_for_empty && !result_aggregate_function_name.ends_with("OrNull");
if (need_add_or_null) if (need_add_or_null)
{ {
auto properties = AggregateFunctionFactory::instance().tryGetProperties(result_aggregate_function_name); auto properties = AggregateFunctionFactory::instance().tryGetProperties(result_aggregate_function_name, action);
if (!properties->returns_default_when_only_null) if (!properties->returns_default_when_only_null)
result_aggregate_function_name += "OrNull"; result_aggregate_function_name += "OrNull";
} }
@ -2349,7 +2352,7 @@ std::string QueryAnalyzer::rewriteAggregateFunctionNameIfNeeded(const std::strin
*/ */
if (result_aggregate_function_name.ends_with("OrNull")) if (result_aggregate_function_name.ends_with("OrNull"))
{ {
auto function_properies = AggregateFunctionFactory::instance().tryGetProperties(result_aggregate_function_name); auto function_properies = AggregateFunctionFactory::instance().tryGetProperties(result_aggregate_function_name, action);
if (function_properies && !function_properies->returns_default_when_only_null) if (function_properies && !function_properies->returns_default_when_only_null)
{ {
size_t function_name_size = result_aggregate_function_name.size(); size_t function_name_size = result_aggregate_function_name.size();
@ -4591,6 +4594,19 @@ ProjectionNames QueryAnalyzer::resolveLambda(const QueryTreeNodePtr & lambda_nod
return result_projection_names; return result_projection_names;
} }
namespace
{
void checkFunctionNodeHasEmptyNullsAction(FunctionNode const & node)
{
if (node.getNullsAction() != NullsAction::EMPTY)
throw Exception(
ErrorCodes::SYNTAX_ERROR,
"Function with name '{}' cannot use {} NULLS",
node.getFunctionName(),
node.getNullsAction() == NullsAction::IGNORE_NULLS ? "IGNORE" : "RESPECT");
}
}
/** Resolve function node in scope. /** Resolve function node in scope.
* During function node resolve, function node can be replaced with another expression (if it match lambda or sql user defined function), * During function node resolve, function node can be replaced with another expression (if it match lambda or sql user defined function),
* with constant (if it allow constant folding), or with expression list. It is caller responsibility to handle such cases appropriately. * with constant (if it allow constant folding), or with expression list. It is caller responsibility to handle such cases appropriately.
@ -4749,6 +4765,7 @@ ProjectionNames QueryAnalyzer::resolveFunction(QueryTreeNodePtr & node, Identifi
if (is_special_function_exists) if (is_special_function_exists)
{ {
checkFunctionNodeHasEmptyNullsAction(*function_node_ptr);
/// Rewrite EXISTS (subquery) into 1 IN (SELECT 1 FROM (subquery) LIMIT 1). /// Rewrite EXISTS (subquery) into 1 IN (SELECT 1 FROM (subquery) LIMIT 1).
auto & exists_subquery_argument = function_node_ptr->getArguments().getNodes().at(0); auto & exists_subquery_argument = function_node_ptr->getArguments().getNodes().at(0);
@ -4769,6 +4786,7 @@ ProjectionNames QueryAnalyzer::resolveFunction(QueryTreeNodePtr & node, Identifi
if (is_special_function_if && !function_node_ptr->getArguments().getNodes().empty()) if (is_special_function_if && !function_node_ptr->getArguments().getNodes().empty())
{ {
checkFunctionNodeHasEmptyNullsAction(*function_node_ptr);
/** Handle special case with constant If function, even if some of the arguments are invalid. /** Handle special case with constant If function, even if some of the arguments are invalid.
* *
* SELECT if(hasColumnInTable('system', 'numbers', 'not_existing_column'), not_existing_column, 5) FROM system.numbers; * SELECT if(hasColumnInTable('system', 'numbers', 'not_existing_column'), not_existing_column, 5) FROM system.numbers;
@ -4834,6 +4852,7 @@ ProjectionNames QueryAnalyzer::resolveFunction(QueryTreeNodePtr & node, Identifi
/// Replace right IN function argument if it is table or table function with subquery that read ordinary columns /// Replace right IN function argument if it is table or table function with subquery that read ordinary columns
if (is_special_function_in) if (is_special_function_in)
{ {
checkFunctionNodeHasEmptyNullsAction(function_node);
if (scope.context->getSettingsRef().transform_null_in) if (scope.context->getSettingsRef().transform_null_in)
{ {
static constexpr std::array<std::pair<std::string_view, std::string_view>, 4> in_function_to_replace_null_in_function_map = static constexpr std::array<std::pair<std::string_view, std::string_view>, 4> in_function_to_replace_null_in_function_map =
@ -5012,6 +5031,8 @@ ProjectionNames QueryAnalyzer::resolveFunction(QueryTreeNodePtr & node, Identifi
lambda_expression_untyped->formatASTForErrorMessage(), lambda_expression_untyped->formatASTForErrorMessage(),
scope.scope_node->formatASTForErrorMessage()); scope.scope_node->formatASTForErrorMessage());
checkFunctionNodeHasEmptyNullsAction(function_node);
if (!parameters.empty()) if (!parameters.empty())
{ {
throw Exception( throw Exception(
@ -5041,6 +5062,8 @@ ProjectionNames QueryAnalyzer::resolveFunction(QueryTreeNodePtr & node, Identifi
"Function 'untuple' must have 1 argument. In scope {}", "Function 'untuple' must have 1 argument. In scope {}",
scope.scope_node->formatASTForErrorMessage()); scope.scope_node->formatASTForErrorMessage());
checkFunctionNodeHasEmptyNullsAction(function_node);
const auto & untuple_argument = function_arguments[0]; const auto & untuple_argument = function_arguments[0];
auto result_type = untuple_argument->getResultType(); auto result_type = untuple_argument->getResultType();
const auto * tuple_data_type = typeid_cast<const DataTypeTuple *>(result_type.get()); const auto * tuple_data_type = typeid_cast<const DataTypeTuple *>(result_type.get());
@ -5091,6 +5114,7 @@ ProjectionNames QueryAnalyzer::resolveFunction(QueryTreeNodePtr & node, Identifi
throw Exception(ErrorCodes::TOO_MANY_ARGUMENTS_FOR_FUNCTION, throw Exception(ErrorCodes::TOO_MANY_ARGUMENTS_FOR_FUNCTION,
"Function GROUPING can have up to 64 arguments, but {} provided", "Function GROUPING can have up to 64 arguments, but {} provided",
function_arguments_size); function_arguments_size);
checkFunctionNodeHasEmptyNullsAction(function_node);
bool force_grouping_standard_compatibility = scope.context->getSettingsRef().force_grouping_standard_compatibility; bool force_grouping_standard_compatibility = scope.context->getSettingsRef().force_grouping_standard_compatibility;
auto grouping_function = std::make_shared<FunctionGrouping>(force_grouping_standard_compatibility); auto grouping_function = std::make_shared<FunctionGrouping>(force_grouping_standard_compatibility);
@ -5115,10 +5139,12 @@ ProjectionNames QueryAnalyzer::resolveFunction(QueryTreeNodePtr & node, Identifi
"Window function '{}' does not support lambda arguments", "Window function '{}' does not support lambda arguments",
function_name); function_name);
std::string aggregate_function_name = rewriteAggregateFunctionNameIfNeeded(function_name, scope.context); auto action = function_node_ptr->getNullsAction();
std::string aggregate_function_name = rewriteAggregateFunctionNameIfNeeded(function_name, action, scope.context);
AggregateFunctionProperties properties; AggregateFunctionProperties properties;
auto aggregate_function = AggregateFunctionFactory::instance().get(aggregate_function_name, argument_types, parameters, properties); auto aggregate_function
= AggregateFunctionFactory::instance().get(aggregate_function_name, action, argument_types, parameters, properties);
function_node.resolveAsWindowFunction(std::move(aggregate_function)); function_node.resolveAsWindowFunction(std::move(aggregate_function));
@ -5142,7 +5168,11 @@ ProjectionNames QueryAnalyzer::resolveFunction(QueryTreeNodePtr & node, Identifi
is_executable_udf = false; is_executable_udf = false;
} }
if (!function) if (function)
{
checkFunctionNodeHasEmptyNullsAction(function_node);
}
else
{ {
if (!AggregateFunctionFactory::instance().isAggregateFunctionName(function_name)) if (!AggregateFunctionFactory::instance().isAggregateFunctionName(function_name))
{ {
@ -5181,10 +5211,12 @@ ProjectionNames QueryAnalyzer::resolveFunction(QueryTreeNodePtr & node, Identifi
"Aggregate function '{}' does not support lambda arguments", "Aggregate function '{}' does not support lambda arguments",
function_name); function_name);
std::string aggregate_function_name = rewriteAggregateFunctionNameIfNeeded(function_name, scope.context); auto action = function_node_ptr->getNullsAction();
std::string aggregate_function_name = rewriteAggregateFunctionNameIfNeeded(function_name, action, scope.context);
AggregateFunctionProperties properties; AggregateFunctionProperties properties;
auto aggregate_function = AggregateFunctionFactory::instance().get(aggregate_function_name, argument_types, parameters, properties); auto aggregate_function
= AggregateFunctionFactory::instance().get(aggregate_function_name, action, argument_types, parameters, properties);
function_node.resolveAsAggregateFunction(std::move(aggregate_function)); function_node.resolveAsAggregateFunction(std::move(aggregate_function));

View File

@ -97,6 +97,7 @@ private:
AggregateFunctionProperties properties; AggregateFunctionProperties properties;
auto aggregate_function = AggregateFunctionFactory::instance().get( auto aggregate_function = AggregateFunctionFactory::instance().get(
function_node.getFunctionName() + suffix, function_node.getFunctionName() + suffix,
function_node.getNullsAction(),
argument_types, argument_types,
function_node.getAggregateFunction()->getParameters(), function_node.getAggregateFunction()->getParameters(),
properties); properties);

View File

@ -157,10 +157,8 @@ private:
static inline void resolveAsCountIfAggregateFunction(FunctionNode & function_node, const DataTypePtr & argument_type) static inline void resolveAsCountIfAggregateFunction(FunctionNode & function_node, const DataTypePtr & argument_type)
{ {
AggregateFunctionProperties properties; AggregateFunctionProperties properties;
auto aggregate_function = AggregateFunctionFactory::instance().get("countIf", auto aggregate_function = AggregateFunctionFactory::instance().get(
{argument_type}, "countIf", NullsAction::EMPTY, {argument_type}, function_node.getAggregateFunction()->getParameters(), properties);
function_node.getAggregateFunction()->getParameters(),
properties);
function_node.resolveAsAggregateFunction(std::move(aggregate_function)); function_node.resolveAsAggregateFunction(std::move(aggregate_function));
} }

View File

@ -76,7 +76,9 @@ public:
argument_types.emplace_back(function_node_argument->getResultType()); argument_types.emplace_back(function_node_argument->getResultType());
AggregateFunctionProperties properties; AggregateFunctionProperties properties;
auto aggregate_function = AggregateFunctionFactory::instance().get(function_node->getFunctionName(), auto aggregate_function = AggregateFunctionFactory::instance().get(
function_node->getFunctionName(),
NullsAction::EMPTY,
argument_types, argument_types,
function_node->getAggregateFunction()->getParameters(), function_node->getAggregateFunction()->getParameters(),
properties); properties);

View File

@ -176,7 +176,7 @@ public:
if (match_subquery_with_distinct() || match_subquery_with_group_by()) if (match_subquery_with_distinct() || match_subquery_with_group_by())
{ {
AggregateFunctionProperties properties; AggregateFunctionProperties properties;
auto aggregate_function = AggregateFunctionFactory::instance().get("count", {}, {}, properties); auto aggregate_function = AggregateFunctionFactory::instance().get("count", NullsAction::EMPTY, {}, {}, properties);
function_node->getArguments().getNodes().clear(); function_node->getArguments().getNodes().clear();
function_node->resolveAsAggregateFunction(std::move(aggregate_function)); function_node->resolveAsAggregateFunction(std::move(aggregate_function));

View File

@ -607,6 +607,7 @@ QueryTreeNodePtr QueryTreeBuilder::buildExpression(const ASTPtr & expression, co
else else
{ {
auto function_node = std::make_shared<FunctionNode>(function->name); auto function_node = std::make_shared<FunctionNode>(function->name);
function_node->setNullsAction(function->nulls_action);
if (function->parameters) if (function->parameters)
{ {

View File

@ -544,11 +544,8 @@ inline AggregateFunctionPtr resolveAggregateFunction(FunctionNode * function_nod
argument_types.emplace_back(function_node_argument->getResultType()); argument_types.emplace_back(function_node_argument->getResultType());
AggregateFunctionProperties properties; AggregateFunctionProperties properties;
return AggregateFunctionFactory::instance().get( auto action = NullsAction::EMPTY;
function_node->getFunctionName(), return AggregateFunctionFactory::instance().get(function_node->getFunctionName(), action, argument_types, parameters, properties);
argument_types,
parameters,
properties);
} }
} }

View File

@ -451,17 +451,25 @@ void BackupEntriesCollector::gatherDatabaseMetadata(
} }
catch (...) catch (...)
{ {
throw Exception(ErrorCodes::INCONSISTENT_METADATA_FOR_BACKUP, "Couldn't get a create query for database {}", database_name); /// Probably the database has been just removed.
if (throw_if_database_not_found)
throw;
LOG_WARNING(log, "Couldn't get a create query for database {}", backQuoteIfNeed(database_name));
return;
}
auto * create = create_database_query->as<ASTCreateQuery>();
if (create->getDatabase() != database_name)
{
/// Probably the database has been just renamed. Use the older name for backup to keep the backup consistent.
LOG_WARNING(log, "Got a create query with unexpected name {} for database {}",
backQuoteIfNeed(create->getDatabase()), backQuoteIfNeed(database_name));
create_database_query = create_database_query->clone();
create = create_database_query->as<ASTCreateQuery>();
create->setDatabase(database_name);
} }
database_info.create_database_query = create_database_query; database_info.create_database_query = create_database_query;
const auto & create = create_database_query->as<const ASTCreateQuery &>();
if (create.getDatabase() != database_name)
throw Exception(ErrorCodes::INCONSISTENT_METADATA_FOR_BACKUP,
"Got a create query with unexpected name {} for database {}",
backQuoteIfNeed(create.getDatabase()), backQuoteIfNeed(database_name));
String new_database_name = renaming_map.getNewDatabaseName(database_name); String new_database_name = renaming_map.getNewDatabaseName(database_name);
database_info.metadata_path_in_backup = root_path_in_backup / "metadata" / (escapeForFileName(new_database_name) + ".sql"); database_info.metadata_path_in_backup = root_path_in_backup / "metadata" / (escapeForFileName(new_database_name) + ".sql");
} }
@ -582,26 +590,34 @@ std::vector<std::pair<ASTPtr, StoragePtr>> BackupEntriesCollector::findTablesInD
} }
std::unordered_set<String> found_table_names; std::unordered_set<String> found_table_names;
for (const auto & db_table : db_tables) for (auto & db_table : db_tables)
{ {
const auto & create_table_query = db_table.first; auto create_table_query = db_table.first;
const auto & create = create_table_query->as<const ASTCreateQuery &>(); auto * create = create_table_query->as<ASTCreateQuery>();
found_table_names.emplace(create.getTable()); found_table_names.emplace(create->getTable());
if (database_name == DatabaseCatalog::TEMPORARY_DATABASE) if (database_name == DatabaseCatalog::TEMPORARY_DATABASE)
{ {
if (!create.temporary) if (!create->temporary)
throw Exception(ErrorCodes::INCONSISTENT_METADATA_FOR_BACKUP, {
throw Exception(ErrorCodes::LOGICAL_ERROR,
"Got a non-temporary create query for {}", "Got a non-temporary create query for {}",
tableNameWithTypeToString(database_name, create.getTable(), false)); tableNameWithTypeToString(database_name, create->getTable(), false));
}
} }
else else
{ {
if (create.getDatabase() != database_name) if (create->getDatabase() != database_name)
throw Exception(ErrorCodes::INCONSISTENT_METADATA_FOR_BACKUP, {
"Got a create query with unexpected database name {} for {}", /// Probably the table has been just renamed. Use the older name for backup to keep the backup consistent.
backQuoteIfNeed(create.getDatabase()), LOG_WARNING(log, "Got a create query with unexpected database name {} for {}",
tableNameWithTypeToString(database_name, create.getTable(), false)); backQuoteIfNeed(create->getDatabase()),
tableNameWithTypeToString(database_name, create->getTable(), false));
create_table_query = create_table_query->clone();
create = create_table_query->as<ASTCreateQuery>();
create->setDatabase(database_name);
db_table.first = create_table_query;
}
} }
} }

View File

@ -55,6 +55,10 @@ void WithRetries::renewZooKeeper(FaultyKeeper my_faulty_zookeeper) const
callback(my_faulty_zookeeper); callback(my_faulty_zookeeper);
} }
else
{
my_faulty_zookeeper->setKeeper(zookeeper);
}
} }
const WithRetries::KeeperSettings & WithRetries::getKeeperSettings() const const WithRetries::KeeperSettings & WithRetries::getKeeperSettings() const

View File

@ -46,6 +46,7 @@
#include <Common/assert_cast.h> #include <Common/assert_cast.h>
#include <Common/typeid_cast.h> #include <Common/typeid_cast.h>
#include <AggregateFunctions/AggregateFunctionFactory.h>
namespace DB namespace DB
{ {
@ -384,6 +385,39 @@ void QueryFuzzer::fuzzColumnLikeExpressionList(IAST * ast)
// the generic recursion into IAST.children. // the generic recursion into IAST.children.
} }
void QueryFuzzer::fuzzNullsAction(NullsAction & action)
{
/// If it's not using actions, then it's a high change it doesn't support it to begin with
if ((action == NullsAction::EMPTY) && (fuzz_rand() % 100 == 0))
{
if (fuzz_rand() % 2 == 0)
action = NullsAction::RESPECT_NULLS;
else
action = NullsAction::IGNORE_NULLS;
}
else if (fuzz_rand() % 20 == 0)
{
switch (fuzz_rand() % 3)
{
case 0:
{
action = NullsAction::EMPTY;
break;
}
case 1:
{
action = NullsAction::RESPECT_NULLS;
break;
}
default:
{
action = NullsAction::IGNORE_NULLS;
break;
}
}
}
}
void QueryFuzzer::fuzzWindowFrame(ASTWindowDefinition & def) void QueryFuzzer::fuzzWindowFrame(ASTWindowDefinition & def)
{ {
switch (fuzz_rand() % 40) switch (fuzz_rand() % 40)
@ -966,6 +1000,9 @@ void QueryFuzzer::fuzz(ASTPtr & ast)
fuzzColumnLikeExpressionList(fn->arguments.get()); fuzzColumnLikeExpressionList(fn->arguments.get());
fuzzColumnLikeExpressionList(fn->parameters.get()); fuzzColumnLikeExpressionList(fn->parameters.get());
if (AggregateUtils::isAggregateFunction(*fn))
fuzzNullsAction(fn->nulls_action);
if (fn->is_window_function && fn->window_definition) if (fn->is_window_function && fn->window_definition)
{ {
auto & def = fn->window_definition->as<ASTWindowDefinition &>(); auto & def = fn->window_definition->as<ASTWindowDefinition &>();

View File

@ -10,6 +10,7 @@
#include <Core/Field.h> #include <Core/Field.h>
#include <Parsers/ASTExplainQuery.h> #include <Parsers/ASTExplainQuery.h>
#include <Parsers/IAST.h> #include <Parsers/IAST.h>
#include <Parsers/NullsAction.h>
#include <Common/randomSeed.h> #include <Common/randomSeed.h>
#include "Parsers/IAST_fwd.h" #include "Parsers/IAST_fwd.h"
@ -86,6 +87,7 @@ struct QueryFuzzer
void fuzzOrderByElement(ASTOrderByElement * elem); void fuzzOrderByElement(ASTOrderByElement * elem);
void fuzzOrderByList(IAST * ast); void fuzzOrderByList(IAST * ast);
void fuzzColumnLikeExpressionList(IAST * ast); void fuzzColumnLikeExpressionList(IAST * ast);
void fuzzNullsAction(NullsAction & action);
void fuzzWindowFrame(ASTWindowDefinition & def); void fuzzWindowFrame(ASTWindowDefinition & def);
void fuzzCreateQuery(ASTCreateQuery & create); void fuzzCreateQuery(ASTCreateQuery & create);
void fuzzExplainQuery(ASTExplainQuery & explain); void fuzzExplainQuery(ASTExplainQuery & explain);

View File

@ -32,21 +32,23 @@ namespace ErrorCodes
Suggest::Suggest() Suggest::Suggest()
{ {
/// Keywords may be not up to date with ClickHouse parser. /// Keywords may be not up to date with ClickHouse parser.
addWords({ addWords({"CREATE", "DATABASE", "IF", "NOT", "EXISTS", "TEMPORARY", "TABLE", "ON",
"CREATE", "DATABASE", "IF", "NOT", "EXISTS", "TEMPORARY", "TABLE", "ON", "CLUSTER", "DEFAULT", "CLUSTER", "DEFAULT", "MATERIALIZED", "ALIAS", "ENGINE", "AS", "VIEW", "POPULATE",
"MATERIALIZED", "ALIAS", "ENGINE", "AS", "VIEW", "POPULATE", "SETTINGS", "ATTACH", "DETACH", "DROP", "SETTINGS", "ATTACH", "DETACH", "DROP", "RENAME", "TO", "ALTER", "ADD",
"RENAME", "TO", "ALTER", "ADD", "MODIFY", "CLEAR", "COLUMN", "AFTER", "COPY", "PROJECT", "MODIFY", "CLEAR", "COLUMN", "AFTER", "COPY", "PROJECT", "PRIMARY", "KEY",
"PRIMARY", "KEY", "CHECK", "PARTITION", "PART", "FREEZE", "FETCH", "FROM", "SHOW", "INTO", "CHECK", "PARTITION", "PART", "FREEZE", "FETCH", "FROM", "SHOW", "INTO",
"OUTFILE", "FORMAT", "TABLES", "DATABASES", "LIKE", "PROCESSLIST", "CASE", "WHEN", "THEN", "ELSE", "OUTFILE", "FORMAT", "TABLES", "DATABASES", "LIKE", "PROCESSLIST", "CASE", "WHEN",
"END", "DESCRIBE", "DESC", "USE", "SET", "OPTIMIZE", "FINAL", "DEDUPLICATE", "INSERT", "VALUES", "THEN", "ELSE", "END", "DESCRIBE", "DESC", "USE", "SET", "OPTIMIZE",
"SELECT", "DISTINCT", "SAMPLE", "ARRAY", "JOIN", "GLOBAL", "LOCAL", "ANY", "ALL", "INNER", "FINAL", "DEDUPLICATE", "INSERT", "VALUES", "SELECT", "DISTINCT", "SAMPLE", "ARRAY",
"LEFT", "RIGHT", "FULL", "OUTER", "CROSS", "USING", "PREWHERE", "WHERE", "GROUP", "BY", "JOIN", "GLOBAL", "LOCAL", "ANY", "ALL", "INNER", "LEFT", "RIGHT",
"WITH", "TOTALS", "HAVING", "ORDER", "COLLATE", "LIMIT", "UNION", "AND", "OR", "ASC", "FULL", "OUTER", "CROSS", "USING", "PREWHERE", "WHERE", "GROUP", "BY",
"IN", "KILL", "QUERY", "SYNC", "ASYNC", "TEST", "BETWEEN", "TRUNCATE", "USER", "ROLE", "WITH", "TOTALS", "HAVING", "ORDER", "COLLATE", "LIMIT", "UNION", "AND",
"PROFILE", "QUOTA", "POLICY", "ROW", "GRANT", "REVOKE", "OPTION", "ADMIN", "EXCEPT", "REPLACE", "OR", "ASC", "IN", "KILL", "QUERY", "SYNC", "ASYNC", "TEST",
"IDENTIFIED", "HOST", "NAME", "READONLY", "WRITABLE", "PERMISSIVE", "FOR", "RESTRICTIVE", "RANDOMIZED", "BETWEEN", "TRUNCATE", "USER", "ROLE", "PROFILE", "QUOTA", "POLICY", "ROW",
"INTERVAL", "LIMITS", "ONLY", "TRACKING", "IP", "REGEXP", "ILIKE", "CLEANUP", "APPEND" "GRANT", "REVOKE", "OPTION", "ADMIN", "EXCEPT", "REPLACE", "IDENTIFIED", "HOST",
}); "NAME", "READONLY", "WRITABLE", "PERMISSIVE", "FOR", "RESTRICTIVE", "RANDOMIZED", "INTERVAL",
"LIMITS", "ONLY", "TRACKING", "IP", "REGEXP", "ILIKE", "CLEANUP", "APPEND",
"IGNORE NULLS", "RESPECT NULLS", "OVER"});
} }
static String getLoadSuggestionQuery(Int32 suggestion_limit, bool basic_suggestion) static String getLoadSuggestionQuery(Int32 suggestion_limit, bool basic_suggestion)

View File

@ -3,11 +3,11 @@
#include <cstring> #include <cstring>
#include <memory> #include <memory>
#include <vector> #include <vector>
#include <boost/noncopyable.hpp>
#include <Core/Defines.h> #include <Core/Defines.h>
#include <Common/memcpySmall.h> #include <boost/noncopyable.hpp>
#include <Common/ProfileEvents.h>
#include <Common/Allocator.h> #include <Common/Allocator.h>
#include <Common/ProfileEvents.h>
#include <Common/memcpySmall.h>
#if __has_include(<sanitizer/asan_interface.h>) && defined(ADDRESS_SANITIZER) #if __has_include(<sanitizer/asan_interface.h>) && defined(ADDRESS_SANITIZER)
# include <sanitizer/asan_interface.h> # include <sanitizer/asan_interface.h>
@ -180,7 +180,7 @@ public:
char * alloc(size_t size) char * alloc(size_t size)
{ {
used_bytes += size; used_bytes += size;
if (unlikely(head.empty() || static_cast<std::ptrdiff_t>(size) > head.end - head.pos)) if (unlikely(head.empty() || size > head.remaining()))
addMemoryChunk(size); addMemoryChunk(size);
char * res = head.pos; char * res = head.pos;
@ -193,6 +193,9 @@ public:
char * alignedAlloc(size_t size, size_t alignment) char * alignedAlloc(size_t size, size_t alignment)
{ {
used_bytes += size; used_bytes += size;
if (unlikely(head.empty() || size > head.remaining()))
addMemoryChunk(size + alignment);
do do
{ {
void * head_pos = head.pos; void * head_pos = head.pos;

View File

@ -1,12 +1,24 @@
#include <Common/AsyncLoader.h> #include <Common/AsyncLoader.h>
#include <limits>
#include <optional>
#include <base/defines.h> #include <base/defines.h>
#include <base/scope_guard.h>
#include <Common/ErrorCodes.h> #include <Common/ErrorCodes.h>
#include <Common/Exception.h> #include <Common/Exception.h>
#include <Common/noexcept_scope.h> #include <Common/noexcept_scope.h>
#include <Common/setThreadName.h> #include <Common/setThreadName.h>
#include <Common/logger_useful.h> #include <Common/logger_useful.h>
#include <Common/ThreadPool.h> #include <Common/ThreadPool.h>
#include <Common/getNumberOfPhysicalCPUCores.h>
#include <Common/ProfileEvents.h>
#include <Common/Stopwatch.h>
namespace ProfileEvents
{
extern const Event AsyncLoaderWaitMicroseconds;
}
namespace DB namespace DB
{ {
@ -16,6 +28,7 @@ namespace ErrorCodes
extern const int ASYNC_LOAD_CYCLE; extern const int ASYNC_LOAD_CYCLE;
extern const int ASYNC_LOAD_FAILED; extern const int ASYNC_LOAD_FAILED;
extern const int ASYNC_LOAD_CANCELED; extern const int ASYNC_LOAD_CANCELED;
extern const int LOGICAL_ERROR;
} }
static constexpr size_t PRINT_MESSAGE_EACH_N_OBJECTS = 256; static constexpr size_t PRINT_MESSAGE_EACH_N_OBJECTS = 256;
@ -52,63 +65,48 @@ size_t LoadJob::pool() const
return pool_id; return pool_id;
} }
void LoadJob::wait() const
{
std::unique_lock lock{mutex};
waiters++;
finished.wait(lock, [this] { return load_status != LoadStatus::PENDING; });
waiters--;
if (load_exception)
std::rethrow_exception(load_exception);
}
void LoadJob::waitNoThrow() const noexcept
{
std::unique_lock lock{mutex};
waiters++;
finished.wait(lock, [this] { return load_status != LoadStatus::PENDING; });
waiters--;
}
size_t LoadJob::waitersCount() const size_t LoadJob::waitersCount() const
{ {
std::unique_lock lock{mutex}; std::unique_lock lock{mutex};
return waiters; return waiters;
} }
void LoadJob::ok() size_t LoadJob::ok()
{ {
std::unique_lock lock{mutex}; std::unique_lock lock{mutex};
load_status = LoadStatus::OK; load_status = LoadStatus::OK;
finish(); return finish();
} }
void LoadJob::failed(const std::exception_ptr & ptr) size_t LoadJob::failed(const std::exception_ptr & ptr)
{ {
std::unique_lock lock{mutex}; std::unique_lock lock{mutex};
load_status = LoadStatus::FAILED; load_status = LoadStatus::FAILED;
load_exception = ptr; load_exception = ptr;
finish(); return finish();
} }
void LoadJob::canceled(const std::exception_ptr & ptr) size_t LoadJob::canceled(const std::exception_ptr & ptr)
{ {
std::unique_lock lock{mutex}; std::unique_lock lock{mutex};
load_status = LoadStatus::CANCELED; load_status = LoadStatus::CANCELED;
load_exception = ptr; load_exception = ptr;
finish(); return finish();
} }
void LoadJob::finish() size_t LoadJob::finish()
{ {
func = {}; // To ensure job function is destructed before `AsyncLoader::wait()` and `LoadJob::wait()` return func = {}; // To ensure job function is destructed before `AsyncLoader::wait()` return
finish_time = std::chrono::system_clock::now(); finish_time = std::chrono::system_clock::now();
if (waiters > 0) if (waiters > 0)
finished.notify_all(); finished.notify_all();
return std::exchange(suspended_waiters, 0);
} }
void LoadJob::scheduled() void LoadJob::scheduled(UInt64 job_id_)
{ {
chassert(job_id == 0); // Job cannot be scheduled twice
job_id = job_id_;
schedule_time = std::chrono::system_clock::now(); schedule_time = std::chrono::system_clock::now();
} }
@ -118,11 +116,11 @@ void LoadJob::enqueued()
enqueue_time = std::chrono::system_clock::now(); enqueue_time = std::chrono::system_clock::now();
} }
void LoadJob::execute(size_t pool, const LoadJobPtr & self) void LoadJob::execute(AsyncLoader & loader, size_t pool, const LoadJobPtr & self)
{ {
execution_pool_id = pool; execution_pool_id = pool;
start_time = std::chrono::system_clock::now(); start_time = std::chrono::system_clock::now();
func(self); func(loader, self);
} }
@ -180,11 +178,11 @@ AsyncLoader::AsyncLoader(std::vector<PoolInitializer> pool_initializers, bool lo
init.metric_threads, init.metric_threads,
init.metric_active_threads, init.metric_active_threads,
init.metric_scheduled_threads, init.metric_scheduled_threads,
init.max_threads, /* max_threads = */ std::numeric_limits<size_t>::max(), // Unlimited number of threads, we do worker management ourselves
/* max_free_threads = */ 0, /* max_free_threads = */ 0, // We do not require free threads
init.max_threads), /* queue_size = */0), // Unlimited queue to avoid blocking during worker spawning
.ready_queue = {}, .ready_queue = {},
.max_threads = init.max_threads .max_threads = init.max_threads > 0 ? init.max_threads : getNumberOfPhysicalCPUCores()
}); });
} }
@ -228,16 +226,16 @@ void AsyncLoader::stop()
void AsyncLoader::schedule(LoadTask & task) void AsyncLoader::schedule(LoadTask & task)
{ {
chassert(this == &task.loader); chassert(this == &task.loader);
scheduleImpl(task.jobs); schedule(task.jobs);
} }
void AsyncLoader::schedule(const LoadTaskPtr & task) void AsyncLoader::schedule(const LoadTaskPtr & task)
{ {
chassert(this == &task->loader); chassert(this == &task->loader);
scheduleImpl(task->jobs); schedule(task->jobs);
} }
void AsyncLoader::schedule(const std::vector<LoadTaskPtr> & tasks) void AsyncLoader::schedule(const LoadTaskPtrs & tasks)
{ {
LoadJobSet all_jobs; LoadJobSet all_jobs;
for (const auto & task : tasks) for (const auto & task : tasks)
@ -245,10 +243,10 @@ void AsyncLoader::schedule(const std::vector<LoadTaskPtr> & tasks)
chassert(this == &task->loader); chassert(this == &task->loader);
all_jobs.insert(task->jobs.begin(), task->jobs.end()); all_jobs.insert(task->jobs.begin(), task->jobs.end());
} }
scheduleImpl(all_jobs); schedule(all_jobs);
} }
void AsyncLoader::scheduleImpl(const LoadJobSet & input_jobs) void AsyncLoader::schedule(const LoadJobSet & jobs_to_schedule)
{ {
std::unique_lock lock{mutex}; std::unique_lock lock{mutex};
@ -264,7 +262,7 @@ void AsyncLoader::scheduleImpl(const LoadJobSet & input_jobs)
// 1) exclude already scheduled or finished jobs // 1) exclude already scheduled or finished jobs
// 2) include assigned job dependencies (that are not yet scheduled) // 2) include assigned job dependencies (that are not yet scheduled)
LoadJobSet jobs; LoadJobSet jobs;
for (const auto & job : input_jobs) for (const auto & job : jobs_to_schedule)
gatherNotScheduled(job, jobs, lock); gatherNotScheduled(job, jobs, lock);
// Ensure scheduled_jobs graph will have no cycles. The only way to get a cycle is to add a cycle, assuming old jobs cannot reference new ones. // Ensure scheduled_jobs graph will have no cycles. The only way to get a cycle is to add a cycle, assuming old jobs cannot reference new ones.
@ -280,7 +278,7 @@ void AsyncLoader::scheduleImpl(const LoadJobSet & input_jobs)
NOEXCEPT_SCOPE({ NOEXCEPT_SCOPE({
ALLOW_ALLOCATIONS_IN_SCOPE; ALLOW_ALLOCATIONS_IN_SCOPE;
scheduled_jobs.try_emplace(job); scheduled_jobs.try_emplace(job);
job->scheduled(); job->scheduled(++last_job_id);
}); });
} }
@ -365,11 +363,20 @@ void AsyncLoader::prioritize(const LoadJobPtr & job, size_t new_pool)
if (!job) if (!job)
return; return;
chassert(new_pool < pools.size()); chassert(new_pool < pools.size());
DENY_ALLOCATIONS_IN_SCOPE; DENY_ALLOCATIONS_IN_SCOPE;
std::unique_lock lock{mutex}; std::unique_lock lock{mutex};
prioritize(job, new_pool, lock); prioritize(job, new_pool, lock);
} }
void AsyncLoader::wait(const LoadJobPtr & job, bool no_throw)
{
std::unique_lock job_lock{job->mutex};
wait(job_lock, job);
if (!no_throw && job->load_exception)
std::rethrow_exception(job->load_exception);
}
void AsyncLoader::remove(const LoadJobSet & jobs) void AsyncLoader::remove(const LoadJobSet & jobs)
{ {
DENY_ALLOCATIONS_IN_SCOPE; DENY_ALLOCATIONS_IN_SCOPE;
@ -397,9 +404,10 @@ void AsyncLoader::remove(const LoadJobSet & jobs)
if (auto info = scheduled_jobs.find(job); info != scheduled_jobs.end()) if (auto info = scheduled_jobs.find(job); info != scheduled_jobs.end())
{ {
// Job is currently executing // Job is currently executing
ALLOW_ALLOCATIONS_IN_SCOPE;
chassert(info->second.isExecuting()); chassert(info->second.isExecuting());
lock.unlock(); lock.unlock();
job->waitNoThrow(); // Wait for job to finish wait(job, /* no_throw = */ true); // Wait for job to finish
lock.lock(); lock.lock();
} }
} }
@ -415,10 +423,12 @@ void AsyncLoader::remove(const LoadJobSet & jobs)
void AsyncLoader::setMaxThreads(size_t pool, size_t value) void AsyncLoader::setMaxThreads(size_t pool, size_t value)
{ {
if (value == 0)
value = getNumberOfPhysicalCPUCores();
std::unique_lock lock{mutex}; std::unique_lock lock{mutex};
auto & p = pools[pool]; auto & p = pools[pool];
p.thread_pool->setMaxThreads(value); // Note that underlying `ThreadPool` always has unlimited `queue_size` and `max_threads`.
p.thread_pool->setQueueSize(value); // Keep queue size equal max threads count to avoid blocking during spawning // Worker management is done by `AsyncLoader` based on `Pool::max_threads + Pool::suspended_workers` instead.
p.max_threads = value; p.max_threads = value;
if (!is_running) if (!is_running)
return; return;
@ -442,7 +452,6 @@ Priority AsyncLoader::getPoolPriority(size_t pool) const
return pools[pool].priority; // NOTE: lock is not needed because `priority` is const and `pools` are immutable return pools[pool].priority; // NOTE: lock is not needed because `priority` is const and `pools` are immutable
} }
size_t AsyncLoader::getScheduledJobCount() const size_t AsyncLoader::getScheduledJobCount() const
{ {
std::unique_lock lock{mutex}; std::unique_lock lock{mutex};
@ -479,11 +488,11 @@ void AsyncLoader::checkCycle(const LoadJobSet & jobs, std::unique_lock<std::mute
while (!left.empty()) while (!left.empty())
{ {
LoadJobPtr job = *left.begin(); LoadJobPtr job = *left.begin();
checkCycleImpl(job, left, visited, lock); checkCycle(job, left, visited, lock);
} }
} }
String AsyncLoader::checkCycleImpl(const LoadJobPtr & job, LoadJobSet & left, LoadJobSet & visited, std::unique_lock<std::mutex> & lock) String AsyncLoader::checkCycle(const LoadJobPtr & job, LoadJobSet & left, LoadJobSet & visited, std::unique_lock<std::mutex> & lock)
{ {
if (!left.contains(job)) if (!left.contains(job))
return {}; // Do not consider external dependencies and already processed jobs return {}; // Do not consider external dependencies and already processed jobs
@ -494,7 +503,7 @@ String AsyncLoader::checkCycleImpl(const LoadJobPtr & job, LoadJobSet & left, Lo
} }
for (const auto & dep : job->dependencies) for (const auto & dep : job->dependencies)
{ {
if (auto chain = checkCycleImpl(dep, left, visited, lock); !chain.empty()) if (auto chain = checkCycle(dep, left, visited, lock); !chain.empty())
{ {
if (!visited.contains(job)) // Check for cycle end if (!visited.contains(job)) // Check for cycle end
throw Exception(ErrorCodes::ASYNC_LOAD_CYCLE, "Load job dependency cycle detected: {} -> {}", job->name, chain); throw Exception(ErrorCodes::ASYNC_LOAD_CYCLE, "Load job dependency cycle detected: {} -> {}", job->name, chain);
@ -509,10 +518,11 @@ String AsyncLoader::checkCycleImpl(const LoadJobPtr & job, LoadJobSet & left, Lo
void AsyncLoader::finish(const LoadJobPtr & job, LoadStatus status, std::exception_ptr exception_from_job, std::unique_lock<std::mutex> & lock) void AsyncLoader::finish(const LoadJobPtr & job, LoadStatus status, std::exception_ptr exception_from_job, std::unique_lock<std::mutex> & lock)
{ {
chassert(scheduled_jobs.contains(job)); // Job was pending chassert(scheduled_jobs.contains(job)); // Job was pending
size_t resumed_workers = 0; // Number of workers resumed in the execution pool of the job
if (status == LoadStatus::OK) if (status == LoadStatus::OK)
{ {
// Notify waiters // Notify waiters
job->ok(); resumed_workers += job->ok();
// Update dependent jobs and enqueue if ready // Update dependent jobs and enqueue if ready
for (const auto & dep : scheduled_jobs[job].dependent_jobs) for (const auto & dep : scheduled_jobs[job].dependent_jobs)
@ -528,9 +538,9 @@ void AsyncLoader::finish(const LoadJobPtr & job, LoadStatus status, std::excepti
{ {
// Notify waiters // Notify waiters
if (status == LoadStatus::FAILED) if (status == LoadStatus::FAILED)
job->failed(exception_from_job); resumed_workers += job->failed(exception_from_job);
else if (status == LoadStatus::CANCELED) else if (status == LoadStatus::CANCELED)
job->canceled(exception_from_job); resumed_workers += job->canceled(exception_from_job);
Info & info = scheduled_jobs[job]; Info & info = scheduled_jobs[job];
if (info.isReady()) if (info.isReady())
@ -572,35 +582,40 @@ void AsyncLoader::finish(const LoadJobPtr & job, LoadStatus status, std::excepti
if (log_progress) if (log_progress)
logAboutProgress(log, finished_jobs.size() - old_jobs, finished_jobs.size() + scheduled_jobs.size() - old_jobs, stopwatch); logAboutProgress(log, finished_jobs.size() - old_jobs, finished_jobs.size() + scheduled_jobs.size() - old_jobs, stopwatch);
}); });
if (resumed_workers)
{
Pool & pool = pools[job->executionPool()];
pool.suspended_workers -= resumed_workers;
}
} }
void AsyncLoader::prioritize(const LoadJobPtr & job, size_t new_pool_id, std::unique_lock<std::mutex> & lock) void AsyncLoader::prioritize(const LoadJobPtr & job, size_t new_pool_id, std::unique_lock<std::mutex> & lock)
{ {
Pool & old_pool = pools[job->pool_id];
Pool & new_pool = pools[new_pool_id];
if (old_pool.priority <= new_pool.priority)
return; // Never lower priority or change pool leaving the same priority
// Note that there is no point in prioritizing finished jobs, but because we do not lock `job.mutex` here (due to recursion),
// Races are inevitable, so we prioritize all job unconditionally: both finished and pending.
if (auto info = scheduled_jobs.find(job); info != scheduled_jobs.end()) if (auto info = scheduled_jobs.find(job); info != scheduled_jobs.end())
{ {
Pool & old_pool = pools[job->pool_id];
Pool & new_pool = pools[new_pool_id];
if (old_pool.priority <= new_pool.priority)
return; // Never lower priority or change pool leaving the same priority
// Update priority and push job forward through ready queue if needed
UInt64 ready_seqno = info->second.ready_seqno;
// Requeue job into the new pool queue without allocations // Requeue job into the new pool queue without allocations
if (ready_seqno) if (UInt64 ready_seqno = info->second.ready_seqno)
{ {
new_pool.ready_queue.insert(old_pool.ready_queue.extract(ready_seqno)); new_pool.ready_queue.insert(old_pool.ready_queue.extract(ready_seqno));
if (canSpawnWorker(new_pool, lock)) if (canSpawnWorker(new_pool, lock))
spawn(new_pool, lock); spawn(new_pool, lock);
} }
// Set user-facing pool (may affect executing jobs)
job->pool_id.store(new_pool_id);
// Recurse into dependencies
for (const auto & dep : job->dependencies)
prioritize(dep, new_pool_id, lock);
} }
job->pool_id.store(new_pool_id);
// Recurse into dependencies
for (const auto & dep : job->dependencies)
prioritize(dep, new_pool_id, lock);
} }
void AsyncLoader::enqueue(Info & info, const LoadJobPtr & job, std::unique_lock<std::mutex> & lock) void AsyncLoader::enqueue(Info & info, const LoadJobPtr & job, std::unique_lock<std::mutex> & lock)
@ -620,11 +635,102 @@ void AsyncLoader::enqueue(Info & info, const LoadJobPtr & job, std::unique_lock<
spawn(pool, lock); spawn(pool, lock);
} }
// Keep track of currently executing load jobs to be able to:
// 1) Detect "wait dependent" deadlocks -- throw LOGICAL_ERROR
// (when job A function waits for job B that depends on job A)
// 2) Detect "wait not scheduled" deadlocks -- throw LOGICAL_ERROR
// (thread T is waiting on an assigned job A, but job A is not yet scheduled)
// 3) Resolve "priority inversion" deadlocks -- apply priority inheritance
// (when high-priority job A function waits for a lower-priority job B, and B never starts due to its priority)
// 4) Resolve "blocked pool" deadlocks -- spawn more workers
// (when job A in pool P waits for another ready job B in P, but B never starts because there are no free workers in P)
thread_local LoadJob * current_load_job = nullptr;
size_t currentPoolOr(size_t pool)
{
return current_load_job ? current_load_job->executionPool() : pool;
}
bool detectWaitDependentDeadlock(const LoadJobPtr & waited)
{
if (waited.get() == current_load_job)
return true;
for (const auto & dep : waited->dependencies)
{
if (detectWaitDependentDeadlock(dep))
return true;
}
return false;
}
void AsyncLoader::wait(std::unique_lock<std::mutex> & job_lock, const LoadJobPtr & job)
{
// Ensure job we are going to wait was scheduled to avoid "wait not scheduled" deadlocks
if (job->job_id == 0)
throw Exception(ErrorCodes::LOGICAL_ERROR, "Load job '{}' waits for not scheduled load job '{}'", current_load_job->name, job->name);
// Deadlock detection and resolution
if (current_load_job && job->load_status == LoadStatus::PENDING)
{
if (detectWaitDependentDeadlock(job))
throw Exception(ErrorCodes::LOGICAL_ERROR, "Load job '{}' waits for dependent load job '{}'", current_load_job->name, job->name);
auto worker_pool = current_load_job->executionPool();
auto worker_priority = getPoolPriority(worker_pool);
auto job_priority = getPoolPriority(job->pool_id);
// Waiting for a lower-priority job ("priority inversion" deadlock) is resolved using priority inheritance.
if (worker_priority < job_priority)
{
job_lock.unlock(); // Avoid reverse locking order
prioritize(job, worker_pool);
job_lock.lock();
}
// Spawn more workers to avoid exhaustion of worker pool ("blocked pool" deadlock)
if (worker_pool == job->pool_id)
{
job_lock.unlock(); // Avoid reverse locking order
workerIsSuspendedByWait(worker_pool, job);
job_lock.lock();
}
}
Stopwatch watch;
job->waiters++;
job->finished.wait(job_lock, [&] { return job->load_status != LoadStatus::PENDING; });
job->waiters--;
ProfileEvents::increment(ProfileEvents::AsyncLoaderWaitMicroseconds, watch.elapsedMicroseconds());
}
void AsyncLoader::workerIsSuspendedByWait(size_t pool_id, const LoadJobPtr & job)
{
std::unique_lock lock{mutex};
std::unique_lock job_lock{job->mutex};
if (job->load_status != LoadStatus::PENDING)
return; // Job is already done, worker can continue execution
// To resolve "blocked pool" deadlocks we spawn a new worker for every suspended worker, if required
// This can lead to a visible excess of `max_threads` specified for a pool,
// but actual number of NOT suspended workers may exceed `max_threads` ONLY in intermittent state.
Pool & pool = pools[pool_id];
pool.suspended_workers++;
job->suspended_waiters++;
if (canSpawnWorker(pool, lock))
spawn(pool, lock);
// TODO(serxa): it is a good idea to propagate `job` and all its dependencies in `pool.ready_queue` by introducing
// key {suspended_waiters, ready_seqno} instead of plain `ready_seqno`, to force newly spawn workers to work on jobs
// that are being waited. But it doesn't affect correctness. So let's not complicate it for time being.
}
bool AsyncLoader::canSpawnWorker(Pool & pool, std::unique_lock<std::mutex> &) bool AsyncLoader::canSpawnWorker(Pool & pool, std::unique_lock<std::mutex> &)
{ {
// TODO(serxa): optimization: we should not spawn new worker on the first enqueue during `finish()` because current worker will take this job.
return is_running return is_running
&& !pool.ready_queue.empty() && !pool.ready_queue.empty()
&& pool.workers < pool.max_threads && pool.workers < pool.max_threads + pool.suspended_workers
&& (!current_priority || *current_priority >= pool.priority); && (!current_priority || *current_priority >= pool.priority);
} }
@ -632,7 +738,7 @@ bool AsyncLoader::canWorkerLive(Pool & pool, std::unique_lock<std::mutex> &)
{ {
return is_running return is_running
&& !pool.ready_queue.empty() && !pool.ready_queue.empty()
&& pool.workers <= pool.max_threads && pool.workers <= pool.max_threads + pool.suspended_workers
&& (!current_priority || *current_priority >= pool.priority); && (!current_priority || *current_priority >= pool.priority);
} }
@ -705,7 +811,9 @@ void AsyncLoader::worker(Pool & pool)
try try
{ {
job->execute(pool_id, job); current_load_job = job.get();
SCOPE_EXIT({ current_load_job = nullptr; }); // Note that recursive job execution is not supported
job->execute(*this, pool_id, job);
exception_from_job = {}; exception_from_job = {};
} }
catch (...) catch (...)

View File

@ -21,6 +21,16 @@ namespace Poco { class Logger; }
namespace DB namespace DB
{ {
// TERMINOLOGY:
// Job (`LoadJob`) - The smallest part of loading process, executed by worker. Job can depend on the other jobs. Jobs are grouped in tasks.
// Task (`LoadTask`) - Owning holder of a set of jobs. Should be held during the whole job lifetime. Cancels all jobs on destruction.
// Goal jobs (goals) - a subset of "final" jobs of a task (usually no job in task depend on a goal job).
// By default all jobs in task are included in goal jobs.
// Goals should used if you need to create a job that depends on a task (to avoid placing all jobs of the task in dependencies).
// Pool (worker pool) - A set of workers with specific priority. Every job is assigned to a pool. Job can change its pool dynamically.
// Priority (pool priority) - Constant integer value showing relative priority of a pool. Lower value means higher priority.
// AsyncLoader - scheduling system responsible for job dependency tracking and worker management respecting pool priorities.
class LoadJob; class LoadJob;
using LoadJobPtr = std::shared_ptr<LoadJob>; using LoadJobPtr = std::shared_ptr<LoadJob>;
using LoadJobSet = std::unordered_set<LoadJobPtr>; using LoadJobSet = std::unordered_set<LoadJobPtr>;
@ -43,6 +53,7 @@ enum class LoadStatus
// Smallest indivisible part of a loading process. Load job can have multiple dependencies, thus jobs constitute a direct acyclic graph (DAG). // Smallest indivisible part of a loading process. Load job can have multiple dependencies, thus jobs constitute a direct acyclic graph (DAG).
// Job encapsulates a function to be executed by `AsyncLoader` as soon as job functions of all dependencies are successfully executed. // Job encapsulates a function to be executed by `AsyncLoader` as soon as job functions of all dependencies are successfully executed.
// Job can be waited for by an arbitrary number of threads. See `AsyncLoader` class description for more details. // Job can be waited for by an arbitrary number of threads. See `AsyncLoader` class description for more details.
// WARNING: jobs are usually held with ownership by tasks (see `LoadTask`). You are encouraged to add jobs into a tasks as soon as the are created.
class LoadJob : private boost::noncopyable class LoadJob : private boost::noncopyable
{ {
public: public:
@ -50,6 +61,7 @@ public:
LoadJob(LoadJobSetType && dependencies_, String name_, size_t pool_id_, Func && func_) LoadJob(LoadJobSetType && dependencies_, String name_, size_t pool_id_, Func && func_)
: dependencies(std::forward<LoadJobSetType>(dependencies_)) : dependencies(std::forward<LoadJobSetType>(dependencies_))
, name(std::move(name_)) , name(std::move(name_))
, execution_pool_id(pool_id_)
, pool_id(pool_id_) , pool_id(pool_id_)
, func(std::forward<Func>(func_)) , func(std::forward<Func>(func_))
{} {}
@ -67,18 +79,12 @@ public:
// Value may change during job execution by `prioritize()`. // Value may change during job execution by `prioritize()`.
size_t pool() const; size_t pool() const;
// Sync wait for a pending job to be finished: OK, FAILED or CANCELED status. // Returns number of threads blocked by `wait()` calls.
// Throws if job is FAILED or CANCELED. Returns or throws immediately if called on non-pending job.
void wait() const;
// Wait for a job to reach any non PENDING status.
void waitNoThrow() const noexcept;
// Returns number of threads blocked by `wait()` or `waitNoThrow()` calls.
size_t waitersCount() const; size_t waitersCount() const;
// Introspection // Introspection
using TimePoint = std::chrono::system_clock::time_point; using TimePoint = std::chrono::system_clock::time_point;
UInt64 jobId() const { return job_id; }
TimePoint scheduleTime() const { return schedule_time; } TimePoint scheduleTime() const { return schedule_time; }
TimePoint enqueueTime() const { return enqueue_time; } TimePoint enqueueTime() const { return enqueue_time; }
TimePoint startTime() const { return start_time; } TimePoint startTime() const { return start_time; }
@ -90,22 +96,24 @@ public:
private: private:
friend class AsyncLoader; friend class AsyncLoader;
void ok(); [[nodiscard]] size_t ok();
void failed(const std::exception_ptr & ptr); [[nodiscard]] size_t failed(const std::exception_ptr & ptr);
void canceled(const std::exception_ptr & ptr); [[nodiscard]] size_t canceled(const std::exception_ptr & ptr);
void finish(); [[nodiscard]] size_t finish();
void scheduled(); void scheduled(UInt64 job_id_);
void enqueued(); void enqueued();
void execute(size_t pool, const LoadJobPtr & self); void execute(AsyncLoader & loader, size_t pool, const LoadJobPtr & self);
std::atomic<UInt64> job_id{0};
std::atomic<size_t> execution_pool_id; std::atomic<size_t> execution_pool_id;
std::atomic<size_t> pool_id; std::atomic<size_t> pool_id;
std::function<void(const LoadJobPtr & self)> func; std::function<void(AsyncLoader & loader, const LoadJobPtr & self)> func;
mutable std::mutex mutex; mutable std::mutex mutex;
mutable std::condition_variable finished; mutable std::condition_variable finished;
mutable size_t waiters = 0; mutable size_t waiters = 0; // All waiters, including suspended
mutable size_t suspended_waiters = 0;
LoadStatus load_status{LoadStatus::PENDING}; LoadStatus load_status{LoadStatus::PENDING};
std::exception_ptr load_exception; std::exception_ptr load_exception;
@ -117,7 +125,7 @@ private:
struct EmptyJobFunc struct EmptyJobFunc
{ {
void operator()(const LoadJobPtr &) {} void operator()(AsyncLoader &, const LoadJobPtr &) {}
}; };
template <class Func = EmptyJobFunc> template <class Func = EmptyJobFunc>
@ -144,6 +152,7 @@ LoadJobPtr makeLoadJob(const LoadJobSet & dependencies, size_t pool_id, String n
return std::make_shared<LoadJob>(dependencies, std::move(name), pool_id, std::forward<Func>(func)); return std::make_shared<LoadJob>(dependencies, std::move(name), pool_id, std::forward<Func>(func));
} }
// Represents a logically connected set of LoadJobs required to achieve some goals (final LoadJob in the set). // Represents a logically connected set of LoadJobs required to achieve some goals (final LoadJob in the set).
class LoadTask : private boost::noncopyable class LoadTask : private boost::noncopyable
{ {
@ -168,10 +177,11 @@ public:
// auto load_task = loadSomethingAsync(async_loader, load_after_task.goals(), something); // auto load_task = loadSomethingAsync(async_loader, load_after_task.goals(), something);
const LoadJobSet & goals() const { return goal_jobs.empty() ? jobs : goal_jobs; } const LoadJobSet & goals() const { return goal_jobs.empty() ? jobs : goal_jobs; }
AsyncLoader & loader;
private: private:
friend class AsyncLoader; friend class AsyncLoader;
AsyncLoader & loader;
LoadJobSet jobs; LoadJobSet jobs;
LoadJobSet goal_jobs; LoadJobSet goal_jobs;
}; };
@ -181,91 +191,6 @@ inline LoadTaskPtr makeLoadTask(AsyncLoader & loader, LoadJobSet && jobs, LoadJo
return std::make_shared<LoadTask>(loader, std::move(jobs), std::move(goals)); return std::make_shared<LoadTask>(loader, std::move(jobs), std::move(goals));
} }
inline void scheduleLoad(const LoadTaskPtr & task)
{
task->schedule();
}
inline void scheduleLoad(const LoadTaskPtrs & tasks)
{
for (const auto & task : tasks)
task->schedule();
}
template <class... Args>
inline void scheduleLoadAll(Args && ... args)
{
(scheduleLoad(std::forward<Args>(args)), ...);
}
inline void waitLoad(const LoadJobSet & jobs)
{
for (const auto & job : jobs)
job->wait();
}
inline void waitLoad(const LoadTaskPtr & task)
{
waitLoad(task->goals());
}
inline void waitLoad(const LoadTaskPtrs & tasks)
{
for (const auto & task : tasks)
waitLoad(task->goals());
}
template <class... Args>
inline void waitLoadAll(Args && ... args)
{
(waitLoad(std::forward<Args>(args)), ...);
}
template <class... Args>
inline void scheduleAndWaitLoadAll(Args && ... args)
{
scheduleLoadAll(std::forward<Args>(args)...);
waitLoadAll(std::forward<Args>(args)...);
}
inline LoadJobSet getGoals(const LoadTaskPtrs & tasks)
{
LoadJobSet result;
for (const auto & task : tasks)
result.insert(task->goals().begin(), task->goals().end());
return result;
}
inline LoadJobSet getGoalsOr(const LoadTaskPtrs & tasks, const LoadJobSet & alternative)
{
LoadJobSet result;
for (const auto & task : tasks)
result.insert(task->goals().begin(), task->goals().end());
return result.empty() ? alternative : result;
}
inline LoadJobSet joinJobs(const LoadJobSet & jobs1, const LoadJobSet & jobs2)
{
LoadJobSet result;
if (!jobs1.empty())
result.insert(jobs1.begin(), jobs1.end());
if (!jobs2.empty())
result.insert(jobs2.begin(), jobs2.end());
return result;
}
inline LoadTaskPtrs joinTasks(const LoadTaskPtrs & tasks1, const LoadTaskPtrs & tasks2)
{
if (tasks1.empty())
return tasks2;
if (tasks2.empty())
return tasks1;
LoadTaskPtrs result;
result.reserve(tasks1.size() + tasks2.size());
result.insert(result.end(), tasks1.begin(), tasks1.end());
result.insert(result.end(), tasks2.begin(), tasks2.end());
return result;
}
// `AsyncLoader` is a scheduler for DAG of `LoadJob`s. It tracks job dependencies and priorities. // `AsyncLoader` is a scheduler for DAG of `LoadJob`s. It tracks job dependencies and priorities.
// Basic usage example: // Basic usage example:
@ -277,8 +202,8 @@ inline LoadTaskPtrs joinTasks(const LoadTaskPtrs & tasks1, const LoadTaskPtrs &
// //
// // Create and schedule a task consisting of three jobs. Job1 has no dependencies and is run first. // // Create and schedule a task consisting of three jobs. Job1 has no dependencies and is run first.
// // Job2 and job3 depend on job1 and are run only after job1 completion. // // Job2 and job3 depend on job1 and are run only after job1 completion.
// auto job_func = [&] (const LoadJobPtr & self) { // auto job_func = [&] (AsyncLoader & loader, const LoadJobPtr & self) {
// LOG_TRACE(log, "Executing load job '{}' in pool '{}'", self->name, async_loader->getPoolName(self->pool())); // LOG_TRACE(log, "Executing load job '{}' in pool '{}'", self->name, loader->getPoolName(self->pool()));
// }; // };
// auto job1 = makeLoadJob({}, "job1", /* pool_id = */ 1, job_func); // auto job1 = makeLoadJob({}, "job1", /* pool_id = */ 1, job_func);
// auto job2 = makeLoadJob({ job1 }, "job2", /* pool_id = */ 1, job_func); // auto job2 = makeLoadJob({ job1 }, "job2", /* pool_id = */ 1, job_func);
@ -287,8 +212,8 @@ inline LoadTaskPtrs joinTasks(const LoadTaskPtrs & tasks1, const LoadTaskPtrs &
// task.schedule(); // task.schedule();
// //
// // Another thread may prioritize a job by changing its pool and wait for it: // // Another thread may prioritize a job by changing its pool and wait for it:
// async_loader->prioritize(job3, /* pool_id = */ 0); // Increase priority: 1 -> 0 (lower is better) // async_loader.prioritize(job3, /* pool_id = */ 0); // Increase priority: 1 -> 0 (lower is better)
// job3->wait(); // Blocks until job completion or cancellation and rethrow an exception (if any) // async_loader.wait(job3); // Blocks until job completion or cancellation and rethrow an exception (if any)
// //
// Every job has a pool associated with it. AsyncLoader starts every job in its thread pool. // Every job has a pool associated with it. AsyncLoader starts every job in its thread pool.
// Each pool has a constant priority and a mutable maximum number of threads. // Each pool has a constant priority and a mutable maximum number of threads.
@ -341,7 +266,8 @@ private:
std::unique_ptr<ThreadPool> thread_pool; // NOTE: we avoid using a `ThreadPool` queue to be able to move jobs between pools. std::unique_ptr<ThreadPool> thread_pool; // NOTE: we avoid using a `ThreadPool` queue to be able to move jobs between pools.
std::map<UInt64, LoadJobPtr> ready_queue; // FIFO queue of jobs to be executed in this pool. Map is used for faster erasing. Key is `ready_seqno` std::map<UInt64, LoadJobPtr> ready_queue; // FIFO queue of jobs to be executed in this pool. Map is used for faster erasing. Key is `ready_seqno`
size_t max_threads; // Max number of workers to be spawn size_t max_threads; // Max number of workers to be spawn
size_t workers = 0; // Number of currently execution workers size_t workers = 0; // Number of currently executing workers
size_t suspended_workers = 0; // Number of workers that are blocked by `wait()` call on a job executing in the same pool (for deadlock resolution)
bool isActive() const { return workers > 0 || !ready_queue.empty(); } bool isActive() const { return workers > 0 || !ready_queue.empty(); }
}; };
@ -369,7 +295,7 @@ public:
Metric metric_threads; Metric metric_threads;
Metric metric_active_threads; Metric metric_active_threads;
Metric metric_scheduled_threads; Metric metric_scheduled_threads;
size_t max_threads; size_t max_threads; // Zero means use all CPU cores
Priority priority; Priority priority;
}; };
@ -399,6 +325,7 @@ public:
// and are removed from AsyncLoader, so it is thread-safe to destroy them. // and are removed from AsyncLoader, so it is thread-safe to destroy them.
void schedule(LoadTask & task); void schedule(LoadTask & task);
void schedule(const LoadTaskPtr & task); void schedule(const LoadTaskPtr & task);
void schedule(const LoadJobSet & jobs_to_schedule);
// Schedule all tasks atomically. To ensure only highest priority jobs among all tasks are run first. // Schedule all tasks atomically. To ensure only highest priority jobs among all tasks are run first.
void schedule(const LoadTaskPtrs & tasks); void schedule(const LoadTaskPtrs & tasks);
@ -407,6 +334,11 @@ public:
// Jobs from higher (than `new_pool`) priority pools are not changed. // Jobs from higher (than `new_pool`) priority pools are not changed.
void prioritize(const LoadJobPtr & job, size_t new_pool); void prioritize(const LoadJobPtr & job, size_t new_pool);
// Sync wait for a pending job to be finished: OK, FAILED or CANCELED status.
// Throws if job is FAILED or CANCELED unless `no_throw` is set. Returns or throws immediately if called on non-pending job.
// If job was not scheduled, it will be implicitly scheduled before the wait (deadlock auto-resolution).
void wait(const LoadJobPtr & job, bool no_throw = false);
// Remove finished jobs, cancel scheduled jobs, wait for executing jobs to finish and remove them. // Remove finished jobs, cancel scheduled jobs, wait for executing jobs to finish and remove them.
void remove(const LoadJobSet & jobs); void remove(const LoadJobSet & jobs);
@ -430,23 +362,26 @@ public:
bool is_executing = false; bool is_executing = false;
}; };
// For introspection and debug only, see `system.async_loader` table // For introspection and debug only, see `system.async_loader` table.
std::vector<JobState> getJobStates() const; std::vector<JobState> getJobStates() const;
// For deadlock resolution. Should not be used directly.
void workerIsSuspendedByWait(size_t pool_id, const LoadJobPtr & job);
private: private:
void checkCycle(const LoadJobSet & jobs, std::unique_lock<std::mutex> & lock); void checkCycle(const LoadJobSet & jobs, std::unique_lock<std::mutex> & lock);
String checkCycleImpl(const LoadJobPtr & job, LoadJobSet & left, LoadJobSet & visited, std::unique_lock<std::mutex> & lock); String checkCycle(const LoadJobPtr & job, LoadJobSet & left, LoadJobSet & visited, std::unique_lock<std::mutex> & lock);
void finish(const LoadJobPtr & job, LoadStatus status, std::exception_ptr exception_from_job, std::unique_lock<std::mutex> & lock); void finish(const LoadJobPtr & job, LoadStatus status, std::exception_ptr exception_from_job, std::unique_lock<std::mutex> & lock);
void scheduleImpl(const LoadJobSet & input_jobs);
void gatherNotScheduled(const LoadJobPtr & job, LoadJobSet & jobs, std::unique_lock<std::mutex> & lock); void gatherNotScheduled(const LoadJobPtr & job, LoadJobSet & jobs, std::unique_lock<std::mutex> & lock);
void prioritize(const LoadJobPtr & job, size_t new_pool_id, std::unique_lock<std::mutex> & lock); void prioritize(const LoadJobPtr & job, size_t new_pool_id, std::unique_lock<std::mutex> & lock);
void enqueue(Info & info, const LoadJobPtr & job, std::unique_lock<std::mutex> & lock); void enqueue(Info & info, const LoadJobPtr & job, std::unique_lock<std::mutex> & lock);
bool canSpawnWorker(Pool & pool, std::unique_lock<std::mutex> &); void wait(std::unique_lock<std::mutex> & job_lock, const LoadJobPtr & job);
bool canWorkerLive(Pool & pool, std::unique_lock<std::mutex> &); bool canSpawnWorker(Pool & pool, std::unique_lock<std::mutex> & lock);
void updateCurrentPriorityAndSpawn(std::unique_lock<std::mutex> &); bool canWorkerLive(Pool & pool, std::unique_lock<std::mutex> & lock);
void spawn(Pool & pool, std::unique_lock<std::mutex> &); void updateCurrentPriorityAndSpawn(std::unique_lock<std::mutex> & lock);
void spawn(Pool & pool, std::unique_lock<std::mutex> & lock);
void worker(Pool & pool); void worker(Pool & pool);
bool hasWorker(std::unique_lock<std::mutex> &) const; bool hasWorker(std::unique_lock<std::mutex> & lock) const;
// Logging // Logging
const bool log_failures; // Worker should log all exceptions caught from job functions. const bool log_failures; // Worker should log all exceptions caught from job functions.
@ -457,6 +392,7 @@ private:
bool is_running = true; bool is_running = true;
std::optional<Priority> current_priority; // highest priority among active pools std::optional<Priority> current_priority; // highest priority among active pools
UInt64 last_ready_seqno = 0; // Increasing counter for ready queue keys. UInt64 last_ready_seqno = 0; // Increasing counter for ready queue keys.
UInt64 last_job_id = 0; // Increasing counter for job IDs
std::unordered_map<LoadJobPtr, Info> scheduled_jobs; // Full set of scheduled pending jobs along with scheduling info. std::unordered_map<LoadJobPtr, Info> scheduled_jobs; // Full set of scheduled pending jobs along with scheduling info.
std::vector<Pool> pools; // Thread pools for job execution and ready queues std::vector<Pool> pools; // Thread pools for job execution and ready queues
LoadJobSet finished_jobs; // Set of finished jobs (for introspection only, until jobs are removed). LoadJobSet finished_jobs; // Set of finished jobs (for introspection only, until jobs are removed).
@ -465,4 +401,136 @@ private:
std::chrono::system_clock::time_point busy_period_start_time; std::chrono::system_clock::time_point busy_period_start_time;
}; };
// === HELPER FUNCTIONS ===
// There are three types of helper functions:
// schedulerLoad([loader], {jobs|task|tasks}):
// Just schedule jobs for async loading.
// Note that normally function `doSomethingAsync()` returns you a task which is NOT scheduled.
// This is done to allow you:
// (1) construct complex dependency graph offline.
// (2) schedule tasks simultaneously to respect their relative priorities.
// (3) do prioritization independently, before scheduling.
// prioritizeLoad([loader], pool_id, {jobs|task|tasks}):
// Prioritize jobs w/o waiting for it.
// Note that prioritization may be done
// (1) before scheduling (to ensure all jobs are started in the correct pools)
// (2) after scheduling (for dynamic prioritization, e.g. when new query arrives)
// waitLoad([loader], pool_id, {jobs|task|tasks}, [no_throw]):
// Prioritize and wait for jobs.
// Note that to avoid deadlocks it implicitly schedules all the jobs before waiting for them.
// Also to avoid priority inversion you should never wait for a job that has lower priority.
// So it prioritizes all jobs, then schedules all jobs and waits every job.
// IMPORTANT: Any load error will be rethrown, unless `no_throw` is set.
// Common usage pattern is:
// waitLoad(currentPoolOr(foreground_pool_id), tasks);
// Returns current execution pool if it is called from load job, or `pool` otherwise
// It should be used for waiting other load jobs in places that can be executed from load jobs
size_t currentPoolOr(size_t pool);
inline void scheduleLoad(AsyncLoader & loader, const LoadJobSet & jobs)
{
loader.schedule(jobs);
}
inline void scheduleLoad(const LoadTaskPtr & task)
{
task->schedule();
}
inline void scheduleLoad(const LoadTaskPtrs & tasks)
{
if (tasks.empty())
return;
// NOTE: it is assumed that all tasks use the same `AsyncLoader`
AsyncLoader & loader = tasks.front()->loader;
loader.schedule(tasks);
}
inline void waitLoad(AsyncLoader & loader, const LoadJobSet & jobs, bool no_throw = false)
{
scheduleLoad(loader, jobs);
for (const auto & job : jobs)
loader.wait(job, no_throw);
}
inline void waitLoad(const LoadTaskPtr & task, bool no_throw = false)
{
scheduleLoad(task);
waitLoad(task->loader, task->goals(), no_throw);
}
inline void waitLoad(const LoadTaskPtrs & tasks, bool no_throw = false)
{
scheduleLoad(tasks);
for (const auto & task : tasks)
waitLoad(task->loader, task->goals(), no_throw);
}
inline void prioritizeLoad(AsyncLoader & loader, size_t pool_id, const LoadJobSet & jobs)
{
for (const auto & job : jobs)
loader.prioritize(job, pool_id);
}
inline void prioritizeLoad(size_t pool_id, const LoadTaskPtr & task)
{
prioritizeLoad(task->loader, pool_id, task->goals());
}
inline void prioritizeLoad(size_t pool_id, const LoadTaskPtrs & tasks)
{
for (const auto & task : tasks)
prioritizeLoad(task->loader, pool_id, task->goals());
}
inline void waitLoad(AsyncLoader & loader, size_t pool_id, const LoadJobSet & jobs, bool no_throw = false)
{
prioritizeLoad(loader, pool_id, jobs);
waitLoad(loader, jobs, no_throw);
}
inline void waitLoad(size_t pool_id, const LoadTaskPtr & task, bool no_throw = false)
{
prioritizeLoad(task->loader, pool_id, task->goals());
waitLoad(task->loader, task->goals(), no_throw);
}
inline void waitLoad(size_t pool_id, const LoadTaskPtrs & tasks, bool no_throw = false)
{
prioritizeLoad(pool_id, tasks);
waitLoad(tasks, no_throw);
}
inline LoadJobSet getGoals(const LoadTaskPtrs & tasks, const LoadJobSet & alternative = {})
{
LoadJobSet result;
for (const auto & task : tasks)
result.insert(task->goals().begin(), task->goals().end());
return result.empty() ? alternative : result;
}
inline LoadJobSet joinJobs(const LoadJobSet & jobs1, const LoadJobSet & jobs2)
{
LoadJobSet result;
if (!jobs1.empty())
result.insert(jobs1.begin(), jobs1.end());
if (!jobs2.empty())
result.insert(jobs2.begin(), jobs2.end());
return result;
}
inline LoadTaskPtrs joinTasks(const LoadTaskPtrs & tasks1, const LoadTaskPtrs & tasks2)
{
if (tasks1.empty())
return tasks2;
if (tasks2.empty())
return tasks1;
LoadTaskPtrs result;
result.reserve(tasks1.size() + tasks2.size());
result.insert(result.end(), tasks1.begin(), tasks1.end());
result.insert(result.end(), tasks2.begin(), tasks2.end());
return result;
}
} }

View File

@ -110,12 +110,12 @@
M(StorageHiveThreads, "Number of threads in the StorageHive thread pool.") \ M(StorageHiveThreads, "Number of threads in the StorageHive thread pool.") \
M(StorageHiveThreadsActive, "Number of threads in the StorageHive thread pool running a task.") \ M(StorageHiveThreadsActive, "Number of threads in the StorageHive thread pool running a task.") \
M(StorageHiveThreadsScheduled, "Number of queued or active jobs in the StorageHive thread pool.") \ M(StorageHiveThreadsScheduled, "Number of queued or active jobs in the StorageHive thread pool.") \
M(TablesLoaderThreads, "Number of threads in the tables loader thread pool.") \ M(TablesLoaderBackgroundThreads, "Number of threads in the tables loader background thread pool.") \
M(TablesLoaderThreadsActive, "Number of threads in the tables loader thread pool running a task.") \ M(TablesLoaderBackgroundThreadsActive, "Number of threads in the tables loader background thread pool running a task.") \
M(TablesLoaderThreadsScheduled, "Number of queued or active jobs in the tables loader thread pool.") \ M(TablesLoaderBackgroundThreadsScheduled, "Number of queued or active jobs in the tables loader background thread pool.") \
M(DatabaseOrdinaryThreads, "Number of threads in the Ordinary database thread pool.") \ M(TablesLoaderForegroundThreads, "Number of threads in the tables loader foreground thread pool.") \
M(DatabaseOrdinaryThreadsActive, "Number of threads in the Ordinary database thread pool running a task.") \ M(TablesLoaderForegroundThreadsActive, "Number of threads in the tables loader foreground thread pool running a task.") \
M(DatabaseOrdinaryThreadsScheduled, "Number of queued or active jobs in the Ordinary database thread pool.") \ M(TablesLoaderForegroundThreadsScheduled, "Number of queued or active jobs in the tables loader foreground thread pool.") \
M(DatabaseOnDiskThreads, "Number of threads in the DatabaseOnDisk thread pool.") \ M(DatabaseOnDiskThreads, "Number of threads in the DatabaseOnDisk thread pool.") \
M(DatabaseOnDiskThreadsActive, "Number of threads in the DatabaseOnDisk thread pool running a task.") \ M(DatabaseOnDiskThreadsActive, "Number of threads in the DatabaseOnDisk thread pool running a task.") \
M(DatabaseOnDiskThreadsScheduled, "Number of queued or active jobs in the DatabaseOnDisk thread pool.") \ M(DatabaseOnDiskThreadsScheduled, "Number of queued or active jobs in the DatabaseOnDisk thread pool.") \

View File

@ -588,6 +588,7 @@
M(706, LIBSSH_ERROR) \ M(706, LIBSSH_ERROR) \
M(707, GCP_ERROR) \ M(707, GCP_ERROR) \
M(708, ILLEGAL_STATISTIC) \ M(708, ILLEGAL_STATISTIC) \
M(709, CANNOT_GET_REPLICATED_DATABASE_SNAPSHOT) \
\ \
M(999, KEEPER_EXCEPTION) \ M(999, KEEPER_EXCEPTION) \
M(1000, POCO_EXCEPTION) \ M(1000, POCO_EXCEPTION) \

32
src/Common/PoolId.h Normal file
View File

@ -0,0 +1,32 @@
#pragma once
#include <Common/Priority.h>
namespace DB
{
/// Indices and priorities of `AsyncLoader` pools.
/// The most important difference from regular ThreadPools is priorities of pools:
/// * Pools that have different priorities do NOT run jobs simultaneously (with small exception due to dynamic prioritization).
/// * Pools with lower priority wait for all jobs in higher priority pools to be done.
/// Note that pools also have different configurable sizes not listed here. See `Context::getAsyncLoader()` for details.
/// WARNING: `*PoolId` values must be unique and sequential w/o gaps.
/// Used for executing load jobs that are waited for by queries or in case of synchronous table loading.
constexpr size_t TablesLoaderForegroundPoolId = 0;
constexpr Priority TablesLoaderForegroundPriority{0};
/// Has lower priority and is used by table load jobs.
constexpr size_t TablesLoaderBackgroundLoadPoolId = 1;
constexpr Priority TablesLoaderBackgroundLoadPriority{1};
/// Has even lower priority and is used by startup jobs.
/// NOTE: This pool is required to begin table startup only after all tables are loaded.
/// NOTE: Which is needed to prevent heavy merges/mutations from consuming all the resources, slowing table loading down.
constexpr size_t TablesLoaderBackgroundStartupPoolId = 2;
constexpr Priority TablesLoaderBackgroundStartupPriority{2};
}

View File

@ -444,8 +444,13 @@ The server successfully detected this situation and will download merged part fr
M(WaitPrefetchTaskMicroseconds, "Time spend waiting for prefetched reader") \ M(WaitPrefetchTaskMicroseconds, "Time spend waiting for prefetched reader") \
\ \
M(ThreadpoolReaderTaskMicroseconds, "Time spent getting the data in asynchronous reading") \ M(ThreadpoolReaderTaskMicroseconds, "Time spent getting the data in asynchronous reading") \
M(ThreadpoolReaderPrepareMicroseconds, "Time spent on preparation (e.g. call to reader seek() method)") \
M(ThreadpoolReaderReadBytes, "Bytes read from a threadpool task in asynchronous reading") \ M(ThreadpoolReaderReadBytes, "Bytes read from a threadpool task in asynchronous reading") \
M(ThreadpoolReaderSubmit, "Bytes read from a threadpool task in asynchronous reading") \ M(ThreadpoolReaderSubmit, "Bytes read from a threadpool task in asynchronous reading") \
M(ThreadpoolReaderSubmitReadSynchronously, "How many times we haven't scheduled a task on the thread pool and read synchronously instead") \
M(ThreadpoolReaderSubmitReadSynchronouslyBytes, "How many bytes were read synchronously") \
M(ThreadpoolReaderSubmitReadSynchronouslyMicroseconds, "How much time we spent reading synchronously") \
M(AsynchronousReaderIgnoredBytes, "Number of bytes ignored during asynchronous reading") \
\ \
M(FileSegmentWaitReadBufferMicroseconds, "Metric per file segment. Time spend waiting for internal read buffer (includes cache waiting)") \ M(FileSegmentWaitReadBufferMicroseconds, "Metric per file segment. Time spend waiting for internal read buffer (includes cache waiting)") \
M(FileSegmentReadMicroseconds, "Metric per file segment. Time spend reading from file") \ M(FileSegmentReadMicroseconds, "Metric per file segment. Time spend reading from file") \
@ -569,6 +574,8 @@ The server successfully detected this situation and will download merged part fr
\ \
M(ConnectionPoolIsFullMicroseconds, "Total time spent waiting for a slot in connection pool.") \ M(ConnectionPoolIsFullMicroseconds, "Total time spent waiting for a slot in connection pool.") \
\ \
M(AsyncLoaderWaitMicroseconds, "Total time a query was waiting for async loader jobs.") \
\
M(LogTest, "Number of log messages with level Test") \ M(LogTest, "Number of log messages with level Test") \
M(LogTrace, "Number of log messages with level Trace") \ M(LogTrace, "Number of log messages with level Trace") \
M(LogDebug, "Number of log messages with level Debug") \ M(LogDebug, "Number of log messages with level Debug") \

View File

@ -27,6 +27,7 @@
#cmakedefine01 USE_H3 #cmakedefine01 USE_H3
#cmakedefine01 USE_S2_GEOMETRY #cmakedefine01 USE_S2_GEOMETRY
#cmakedefine01 USE_FASTOPS #cmakedefine01 USE_FASTOPS
#cmakedefine01 USE_SQIDS
#cmakedefine01 USE_NLP #cmakedefine01 USE_NLP
#cmakedefine01 USE_VECTORSCAN #cmakedefine01 USE_VECTORSCAN
#cmakedefine01 USE_LIBURING #cmakedefine01 USE_LIBURING

View File

@ -1,8 +1,12 @@
#include <boost/core/noncopyable.hpp>
#include <gtest/gtest.h> #include <gtest/gtest.h>
#include <array>
#include <list>
#include <barrier> #include <barrier>
#include <chrono> #include <chrono>
#include <mutex> #include <mutex>
#include <shared_mutex>
#include <stdexcept> #include <stdexcept>
#include <string_view> #include <string_view>
#include <vector> #include <vector>
@ -19,9 +23,9 @@ using namespace DB;
namespace CurrentMetrics namespace CurrentMetrics
{ {
extern const Metric TablesLoaderThreads; extern const Metric TablesLoaderBackgroundThreads;
extern const Metric TablesLoaderThreadsActive; extern const Metric TablesLoaderBackgroundThreadsActive;
extern const Metric TablesLoaderThreadsScheduled; extern const Metric TablesLoaderBackgroundThreadsScheduled;
} }
namespace DB::ErrorCodes namespace DB::ErrorCodes
@ -61,9 +65,9 @@ struct AsyncLoaderTest
{ {
result.push_back({ result.push_back({
.name = fmt::format("Pool{}", pool_id), .name = fmt::format("Pool{}", pool_id),
.metric_threads = CurrentMetrics::TablesLoaderThreads, .metric_threads = CurrentMetrics::TablesLoaderBackgroundThreads,
.metric_active_threads = CurrentMetrics::TablesLoaderThreadsActive, .metric_active_threads = CurrentMetrics::TablesLoaderBackgroundThreadsActive,
.metric_scheduled_threads = CurrentMetrics::TablesLoaderThreadsScheduled, .metric_scheduled_threads = CurrentMetrics::TablesLoaderBackgroundThreadsScheduled,
.max_threads = desc.max_threads, .max_threads = desc.max_threads,
.priority = desc.priority .priority = desc.priority
}); });
@ -155,7 +159,7 @@ TEST(AsyncLoader, Smoke)
std::atomic<size_t> jobs_done{0}; std::atomic<size_t> jobs_done{0};
std::atomic<size_t> low_priority_jobs_done{0}; std::atomic<size_t> low_priority_jobs_done{0};
auto job_func = [&] (const LoadJobPtr & self) { auto job_func = [&] (AsyncLoader &, const LoadJobPtr & self) {
jobs_done++; jobs_done++;
if (self->pool() == low_priority_pool) if (self->pool() == low_priority_pool)
low_priority_jobs_done++; low_priority_jobs_done++;
@ -172,13 +176,13 @@ TEST(AsyncLoader, Smoke)
auto job5 = makeLoadJob({ job3, job4 }, low_priority_pool, "job5", job_func); auto job5 = makeLoadJob({ job3, job4 }, low_priority_pool, "job5", job_func);
task2->merge(t.schedule({ job5 })); task2->merge(t.schedule({ job5 }));
std::thread waiter_thread([=] { job5->wait(); }); std::thread waiter_thread([&t, job5] { t.loader.wait(job5); });
t.loader.start(); t.loader.start();
job3->wait(); t.loader.wait(job3);
t.loader.wait(); t.loader.wait();
job4->wait(); t.loader.wait(job4);
waiter_thread.join(); waiter_thread.join();
@ -196,7 +200,7 @@ TEST(AsyncLoader, CycleDetection)
{ {
AsyncLoaderTest t; AsyncLoaderTest t;
auto job_func = [&] (const LoadJobPtr &) {}; auto job_func = [&] (AsyncLoader &, const LoadJobPtr &) {};
LoadJobPtr cycle_breaker; // To avoid memleak we introduce with a cycle LoadJobPtr cycle_breaker; // To avoid memleak we introduce with a cycle
@ -241,7 +245,7 @@ TEST(AsyncLoader, CancelPendingJob)
{ {
AsyncLoaderTest t; AsyncLoaderTest t;
auto job_func = [&] (const LoadJobPtr &) {}; auto job_func = [&] (AsyncLoader &, const LoadJobPtr &) {};
auto job = makeLoadJob({}, "job", job_func); auto job = makeLoadJob({}, "job", job_func);
auto task = t.schedule({ job }); auto task = t.schedule({ job });
@ -251,7 +255,7 @@ TEST(AsyncLoader, CancelPendingJob)
ASSERT_EQ(job->status(), LoadStatus::CANCELED); ASSERT_EQ(job->status(), LoadStatus::CANCELED);
try try
{ {
job->wait(); t.loader.wait(job);
FAIL(); FAIL();
} }
catch (Exception & e) catch (Exception & e)
@ -264,7 +268,7 @@ TEST(AsyncLoader, CancelPendingTask)
{ {
AsyncLoaderTest t; AsyncLoaderTest t;
auto job_func = [&] (const LoadJobPtr &) {}; auto job_func = [&] (AsyncLoader &, const LoadJobPtr &) {};
auto job1 = makeLoadJob({}, "job1", job_func); auto job1 = makeLoadJob({}, "job1", job_func);
auto job2 = makeLoadJob({ job1 }, "job2", job_func); auto job2 = makeLoadJob({ job1 }, "job2", job_func);
@ -277,7 +281,7 @@ TEST(AsyncLoader, CancelPendingTask)
try try
{ {
job1->wait(); t.loader.wait(job1);
FAIL(); FAIL();
} }
catch (Exception & e) catch (Exception & e)
@ -287,7 +291,7 @@ TEST(AsyncLoader, CancelPendingTask)
try try
{ {
job2->wait(); t.loader.wait(job2);
FAIL(); FAIL();
} }
catch (Exception & e) catch (Exception & e)
@ -300,7 +304,7 @@ TEST(AsyncLoader, CancelPendingDependency)
{ {
AsyncLoaderTest t; AsyncLoaderTest t;
auto job_func = [&] (const LoadJobPtr &) {}; auto job_func = [&] (AsyncLoader &, const LoadJobPtr &) {};
auto job1 = makeLoadJob({}, "job1", job_func); auto job1 = makeLoadJob({}, "job1", job_func);
auto job2 = makeLoadJob({ job1 }, "job2", job_func); auto job2 = makeLoadJob({ job1 }, "job2", job_func);
@ -314,7 +318,7 @@ TEST(AsyncLoader, CancelPendingDependency)
try try
{ {
job1->wait(); t.loader.wait(job1);
FAIL(); FAIL();
} }
catch (Exception & e) catch (Exception & e)
@ -324,7 +328,7 @@ TEST(AsyncLoader, CancelPendingDependency)
try try
{ {
job2->wait(); t.loader.wait(job2);
FAIL(); FAIL();
} }
catch (Exception & e) catch (Exception & e)
@ -340,7 +344,7 @@ TEST(AsyncLoader, CancelExecutingJob)
std::barrier sync(2); std::barrier sync(2);
auto job_func = [&] (const LoadJobPtr &) auto job_func = [&] (AsyncLoader &, const LoadJobPtr &)
{ {
sync.arrive_and_wait(); // (A) sync with main thread sync.arrive_and_wait(); // (A) sync with main thread
sync.arrive_and_wait(); // (B) wait for waiter sync.arrive_and_wait(); // (B) wait for waiter
@ -362,7 +366,7 @@ TEST(AsyncLoader, CancelExecutingJob)
canceler.join(); canceler.join();
ASSERT_EQ(job->status(), LoadStatus::OK); ASSERT_EQ(job->status(), LoadStatus::OK);
job->wait(); t.loader.wait(job);
} }
TEST(AsyncLoader, CancelExecutingTask) TEST(AsyncLoader, CancelExecutingTask)
@ -371,19 +375,19 @@ TEST(AsyncLoader, CancelExecutingTask)
t.loader.start(); t.loader.start();
std::barrier sync(2); std::barrier sync(2);
auto blocker_job_func = [&] (const LoadJobPtr &) auto blocker_job_func = [&] (AsyncLoader &, const LoadJobPtr &)
{ {
sync.arrive_and_wait(); // (A) sync with main thread sync.arrive_and_wait(); // (A) sync with main thread
sync.arrive_and_wait(); // (B) wait for waiter sync.arrive_and_wait(); // (B) wait for waiter
// signals (C) // signals (C)
}; };
auto job_to_cancel_func = [&] (const LoadJobPtr &) auto job_to_cancel_func = [&] (AsyncLoader &, const LoadJobPtr &)
{ {
FAIL(); // this job should be canceled FAIL(); // this job should be canceled
}; };
auto job_to_succeed_func = [&] (const LoadJobPtr &) auto job_to_succeed_func = [&] (AsyncLoader &, const LoadJobPtr &)
{ {
}; };
@ -430,7 +434,7 @@ TEST(AsyncLoader, DISABLED_JobFailure)
std::string error_message = "test job failure"; std::string error_message = "test job failure";
auto job_func = [&] (const LoadJobPtr &) { auto job_func = [&] (AsyncLoader &, const LoadJobPtr &) {
throw std::runtime_error(error_message); throw std::runtime_error(error_message);
}; };
@ -442,7 +446,7 @@ TEST(AsyncLoader, DISABLED_JobFailure)
ASSERT_EQ(job->status(), LoadStatus::FAILED); ASSERT_EQ(job->status(), LoadStatus::FAILED);
try try
{ {
job->wait(); t.loader.wait(job);
FAIL(); FAIL();
} }
catch (Exception & e) catch (Exception & e)
@ -459,7 +463,7 @@ TEST(AsyncLoader, ScheduleJobWithFailedDependencies)
std::string_view error_message = "test job failure"; std::string_view error_message = "test job failure";
auto failed_job_func = [&] (const LoadJobPtr &) { auto failed_job_func = [&] (AsyncLoader &, const LoadJobPtr &) {
throw Exception(ErrorCodes::ASYNC_LOAD_FAILED, "{}", error_message); throw Exception(ErrorCodes::ASYNC_LOAD_FAILED, "{}", error_message);
}; };
@ -468,7 +472,7 @@ TEST(AsyncLoader, ScheduleJobWithFailedDependencies)
t.loader.wait(); t.loader.wait();
auto job_func = [&] (const LoadJobPtr &) {}; auto job_func = [&] (AsyncLoader &, const LoadJobPtr &) {};
auto job1 = makeLoadJob({ failed_job }, "job1", job_func); auto job1 = makeLoadJob({ failed_job }, "job1", job_func);
auto job2 = makeLoadJob({ job1 }, "job2", job_func); auto job2 = makeLoadJob({ job1 }, "job2", job_func);
@ -480,7 +484,7 @@ TEST(AsyncLoader, ScheduleJobWithFailedDependencies)
ASSERT_EQ(job2->status(), LoadStatus::CANCELED); ASSERT_EQ(job2->status(), LoadStatus::CANCELED);
try try
{ {
job1->wait(); t.loader.wait(job1);
FAIL(); FAIL();
} }
catch (Exception & e) catch (Exception & e)
@ -490,7 +494,7 @@ TEST(AsyncLoader, ScheduleJobWithFailedDependencies)
} }
try try
{ {
job2->wait(); t.loader.wait(job2);
FAIL(); FAIL();
} }
catch (Exception & e) catch (Exception & e)
@ -504,14 +508,14 @@ TEST(AsyncLoader, ScheduleJobWithCanceledDependencies)
{ {
AsyncLoaderTest t; AsyncLoaderTest t;
auto canceled_job_func = [&] (const LoadJobPtr &) {}; auto canceled_job_func = [&] (AsyncLoader &, const LoadJobPtr &) {};
auto canceled_job = makeLoadJob({}, "canceled_job", canceled_job_func); auto canceled_job = makeLoadJob({}, "canceled_job", canceled_job_func);
auto canceled_task = t.schedule({ canceled_job }); auto canceled_task = t.schedule({ canceled_job });
canceled_task->remove(); canceled_task->remove();
t.loader.start(); t.loader.start();
auto job_func = [&] (const LoadJobPtr &) {}; auto job_func = [&] (AsyncLoader &, const LoadJobPtr &) {};
auto job1 = makeLoadJob({ canceled_job }, "job1", job_func); auto job1 = makeLoadJob({ canceled_job }, "job1", job_func);
auto job2 = makeLoadJob({ job1 }, "job2", job_func); auto job2 = makeLoadJob({ job1 }, "job2", job_func);
auto task = t.schedule({ job1, job2 }); auto task = t.schedule({ job1, job2 });
@ -522,7 +526,7 @@ TEST(AsyncLoader, ScheduleJobWithCanceledDependencies)
ASSERT_EQ(job2->status(), LoadStatus::CANCELED); ASSERT_EQ(job2->status(), LoadStatus::CANCELED);
try try
{ {
job1->wait(); t.loader.wait(job1);
FAIL(); FAIL();
} }
catch (Exception & e) catch (Exception & e)
@ -531,7 +535,7 @@ TEST(AsyncLoader, ScheduleJobWithCanceledDependencies)
} }
try try
{ {
job2->wait(); t.loader.wait(job2);
FAIL(); FAIL();
} }
catch (Exception & e) catch (Exception & e)
@ -550,7 +554,7 @@ TEST(AsyncLoader, TestConcurrency)
std::barrier sync(concurrency); std::barrier sync(concurrency);
std::atomic<int> executing{0}; std::atomic<int> executing{0};
auto job_func = [&] (const LoadJobPtr &) auto job_func = [&] (AsyncLoader &, const LoadJobPtr &)
{ {
executing++; executing++;
ASSERT_LE(executing, concurrency); ASSERT_LE(executing, concurrency);
@ -577,7 +581,7 @@ TEST(AsyncLoader, TestOverload)
for (int concurrency = 4; concurrency <= 8; concurrency++) for (int concurrency = 4; concurrency <= 8; concurrency++)
{ {
auto job_func = [&] (const LoadJobPtr &) auto job_func = [&] (AsyncLoader &, const LoadJobPtr &)
{ {
executing++; executing++;
t.randomSleepUs(100, 200, 100); t.randomSleepUs(100, 200, 100);
@ -613,7 +617,7 @@ TEST(AsyncLoader, StaticPriorities)
std::string schedule; std::string schedule;
auto job_func = [&] (const LoadJobPtr & self) auto job_func = [&] (AsyncLoader &, const LoadJobPtr & self)
{ {
schedule += fmt::format("{}{}", self->name, self->pool()); schedule += fmt::format("{}{}", self->name, self->pool());
}; };
@ -656,18 +660,18 @@ TEST(AsyncLoader, SimplePrioritization)
std::atomic<int> executed{0}; // Number of previously executed jobs (to test execution order) std::atomic<int> executed{0}; // Number of previously executed jobs (to test execution order)
LoadJobPtr job_to_prioritize; LoadJobPtr job_to_prioritize;
auto job_func_A_booster = [&] (const LoadJobPtr &) auto job_func_A_booster = [&] (AsyncLoader &, const LoadJobPtr &)
{ {
ASSERT_EQ(executed++, 0); ASSERT_EQ(executed++, 0);
t.loader.prioritize(job_to_prioritize, 2); t.loader.prioritize(job_to_prioritize, 2);
}; };
auto job_func_B_tester = [&] (const LoadJobPtr &) auto job_func_B_tester = [&] (AsyncLoader &, const LoadJobPtr &)
{ {
ASSERT_EQ(executed++, 2); ASSERT_EQ(executed++, 2);
}; };
auto job_func_C_boosted = [&] (const LoadJobPtr &) auto job_func_C_boosted = [&] (AsyncLoader &, const LoadJobPtr &)
{ {
ASSERT_EQ(executed++, 1); ASSERT_EQ(executed++, 1);
}; };
@ -680,7 +684,8 @@ TEST(AsyncLoader, SimplePrioritization)
job_to_prioritize = jobs[2]; // C job_to_prioritize = jobs[2]; // C
scheduleAndWaitLoadAll(task); scheduleLoad(task);
waitLoad(task);
} }
TEST(AsyncLoader, DynamicPriorities) TEST(AsyncLoader, DynamicPriorities)
@ -714,7 +719,7 @@ TEST(AsyncLoader, DynamicPriorities)
UInt64 ready_seqno_D = 0; UInt64 ready_seqno_D = 0;
UInt64 ready_seqno_E = 0; UInt64 ready_seqno_E = 0;
auto job_func = [&] (const LoadJobPtr & self) auto job_func = [&] (AsyncLoader &, const LoadJobPtr & self)
{ {
{ {
std::unique_lock lock{schedule_mutex}; std::unique_lock lock{schedule_mutex};
@ -791,7 +796,7 @@ TEST(AsyncLoader, RandomIndependentTasks)
AsyncLoaderTest t(16); AsyncLoaderTest t(16);
t.loader.start(); t.loader.start();
auto job_func = [&] (const LoadJobPtr & self) auto job_func = [&] (AsyncLoader &, const LoadJobPtr & self)
{ {
for (const auto & dep : self->dependencies) for (const auto & dep : self->dependencies)
ASSERT_EQ(dep->status(), LoadStatus::OK); ASSERT_EQ(dep->status(), LoadStatus::OK);
@ -818,7 +823,7 @@ TEST(AsyncLoader, RandomDependentTasks)
std::vector<LoadTaskPtr> tasks; std::vector<LoadTaskPtr> tasks;
std::vector<LoadJobPtr> all_jobs; std::vector<LoadJobPtr> all_jobs;
auto job_func = [&] (const LoadJobPtr & self) auto job_func = [&] (AsyncLoader &, const LoadJobPtr & self)
{ {
for (const auto & dep : self->dependencies) for (const auto & dep : self->dependencies)
ASSERT_EQ(dep->status(), LoadStatus::OK); ASSERT_EQ(dep->status(), LoadStatus::OK);
@ -860,7 +865,7 @@ TEST(AsyncLoader, SetMaxThreads)
syncs.push_back(std::make_unique<std::barrier<>>(max_threads + 1)); syncs.push_back(std::make_unique<std::barrier<>>(max_threads + 1));
auto job_func = [&] (const LoadJobPtr &) auto job_func = [&] (AsyncLoader &, const LoadJobPtr &)
{ {
int idx = sync_index; int idx = sync_index;
if (idx < syncs.size()) if (idx < syncs.size())
@ -914,10 +919,11 @@ TEST(AsyncLoader, DynamicPools)
{ {
std::atomic<bool> boosted{false}; // Visible concurrency was increased std::atomic<bool> boosted{false}; // Visible concurrency was increased
std::atomic<int> left{concurrency * jobs_in_chain / 2}; // Number of jobs to start before `prioritize()` call std::atomic<int> left{concurrency * jobs_in_chain / 2}; // Number of jobs to start before `prioritize()` call
std::shared_mutex prioritization_mutex; // To slow down job execution during prioritization to avoid race condition
LoadJobSet jobs_to_prioritize; LoadJobSet jobs_to_prioritize;
auto job_func = [&] (const LoadJobPtr & self) auto job_func = [&] (AsyncLoader & loader, const LoadJobPtr & self)
{ {
auto pool_id = self->executionPool(); auto pool_id = self->executionPool();
executing[pool_id]++; executing[pool_id]++;
@ -928,10 +934,12 @@ TEST(AsyncLoader, DynamicPools)
// Dynamic prioritization // Dynamic prioritization
if (--left == 0) if (--left == 0)
{ {
std::unique_lock lock{prioritization_mutex};
for (const auto & job : jobs_to_prioritize) for (const auto & job : jobs_to_prioritize)
t.loader.prioritize(job, 1); loader.prioritize(job, 1);
} }
std::shared_lock lock{prioritization_mutex};
t.randomSleepUs(100, 200, 100); t.randomSleepUs(100, 200, 100);
ASSERT_LE(executing[pool_id], max_threads[pool_id]); ASSERT_LE(executing[pool_id], max_threads[pool_id]);
@ -941,9 +949,10 @@ TEST(AsyncLoader, DynamicPools)
std::vector<LoadTaskPtr> tasks; std::vector<LoadTaskPtr> tasks;
tasks.reserve(concurrency); tasks.reserve(concurrency);
for (int i = 0; i < concurrency; i++) for (int i = 0; i < concurrency; i++)
tasks.push_back(makeLoadTask(t.loader, t.chainJobSet(jobs_in_chain, job_func))); tasks.push_back(makeLoadTask(t.loader, t.chainJobSet(jobs_in_chain, job_func, fmt::format("c{}-j", i))));
jobs_to_prioritize = getGoals(tasks); // All jobs jobs_to_prioritize = getGoals(tasks); // All jobs
scheduleAndWaitLoadAll(tasks); scheduleLoad(tasks);
waitLoad(tasks);
ASSERT_EQ(executing[0], 0); ASSERT_EQ(executing[0], 0);
ASSERT_EQ(executing[1], 0); ASSERT_EQ(executing[1], 0);
@ -952,3 +961,136 @@ TEST(AsyncLoader, DynamicPools)
} }
} }
TEST(AsyncLoader, SubJobs)
{
AsyncLoaderTest t(1);
t.loader.start();
// An example of component with an asynchronous loading interface
class MyComponent : boost::noncopyable {
public:
MyComponent(AsyncLoader & loader_, int jobs)
: loader(loader_)
, jobs_left(jobs)
{}
[[nodiscard]] LoadTaskPtr loadAsync()
{
auto job_func = [this] (AsyncLoader &, const LoadJobPtr &) {
auto sub_job_func = [this] (AsyncLoader &, const LoadJobPtr &) {
--jobs_left;
};
LoadJobSet jobs;
for (size_t j = 0; j < jobs_left; j++)
jobs.insert(makeLoadJob({}, fmt::format("sub job {}", j), sub_job_func));
waitLoad(makeLoadTask(loader, std::move(jobs)));
};
auto job = makeLoadJob({}, "main job", job_func);
return load_task = makeLoadTask(loader, { job });
}
bool isLoaded() const
{
return jobs_left == 0;
}
private:
AsyncLoader & loader;
std::atomic<int> jobs_left;
// It is a good practice to keep load task inside the component:
// 1) to make sure it outlives its load jobs;
// 2) to avoid removing load jobs from `system.async_loader` while we use the component
LoadTaskPtr load_task;
};
for (double jobs_per_thread : std::array{0.5, 1.0, 2.0})
{
for (size_t threads = 1; threads <= 32; threads *= 2)
{
t.loader.setMaxThreads(0, threads);
std::list<MyComponent> components;
LoadTaskPtrs tasks;
size_t size = static_cast<size_t>(jobs_per_thread * threads);
tasks.reserve(size);
for (size_t j = 0; j < size; j++)
{
components.emplace_back(t.loader, 5);
tasks.emplace_back(components.back().loadAsync());
}
waitLoad(tasks);
for (const auto & component: components)
ASSERT_TRUE(component.isLoaded());
}
}
}
TEST(AsyncLoader, RecursiveJob)
{
AsyncLoaderTest t(1);
t.loader.start();
// An example of component with an asynchronous loading interface (a complicated one)
class MyComponent : boost::noncopyable {
public:
MyComponent(AsyncLoader & loader_, int jobs)
: loader(loader_)
, jobs_left(jobs)
{}
[[nodiscard]] LoadTaskPtr loadAsync()
{
return load_task = loadAsyncImpl(jobs_left);
}
bool isLoaded() const
{
return jobs_left == 0;
}
private:
[[nodiscard]] LoadTaskPtr loadAsyncImpl(int id)
{
auto job_func = [this] (AsyncLoader &, const LoadJobPtr & self) {
jobFunction(self);
};
auto job = makeLoadJob({}, fmt::format("job{}", id), job_func);
auto task = makeLoadTask(loader, { job });
return task;
}
void jobFunction(const LoadJobPtr & self)
{
int next = --jobs_left;
if (next > 0)
waitLoad(self->pool(), loadAsyncImpl(next));
}
AsyncLoader & loader;
std::atomic<int> jobs_left;
// It is a good practice to keep load task inside the component:
// 1) to make sure it outlives its load jobs;
// 2) to avoid removing load jobs from `system.async_loader` while we use the component
LoadTaskPtr load_task;
};
for (double jobs_per_thread : std::array{0.5, 1.0, 2.0})
{
for (size_t threads = 1; threads <= 32; threads *= 2)
{
t.loader.setMaxThreads(0, threads);
std::list<MyComponent> components;
LoadTaskPtrs tasks;
size_t size = static_cast<size_t>(jobs_per_thread * threads);
tasks.reserve(size);
for (size_t j = 0; j < size; j++)
{
components.emplace_back(t.loader, 5);
tasks.emplace_back(components.back().loadAsync());
}
waitLoad(tasks);
for (const auto & component: components)
ASSERT_TRUE(component.isLoaded());
}
}
}

View File

@ -139,9 +139,9 @@ void DeflateQplJobHWPool::unLockJob(UInt32 index)
hw_job_ptr_locks[index].store(false); hw_job_ptr_locks[index].store(false);
} }
//HardwareCodecDeflateQpl HardwareCodecDeflateQpl::HardwareCodecDeflateQpl(SoftwareCodecDeflateQpl & sw_codec_)
HardwareCodecDeflateQpl::HardwareCodecDeflateQpl() : log(&Poco::Logger::get("HardwareCodecDeflateQpl"))
:log(&Poco::Logger::get("HardwareCodecDeflateQpl")) , sw_codec(sw_codec_)
{ {
} }
@ -169,7 +169,7 @@ Int32 HardwareCodecDeflateQpl::doCompressData(const char * source, UInt32 source
UInt32 compressed_size = 0; UInt32 compressed_size = 0;
if (!(job_ptr = DeflateQplJobHWPool::instance().acquireJob(job_id))) if (!(job_ptr = DeflateQplJobHWPool::instance().acquireJob(job_id)))
{ {
LOG_INFO(log, "DeflateQpl HW codec failed, falling back to SW codec.(Details: doCompressData->acquireJob fail, probably job pool exhausted)"); LOG_INFO(log, "DeflateQpl HW codec failed, falling back to SW codec. (Details: doCompressData->acquireJob fail, probably job pool exhausted)");
return RET_ERROR; return RET_ERROR;
} }
@ -189,7 +189,7 @@ Int32 HardwareCodecDeflateQpl::doCompressData(const char * source, UInt32 source
} }
else else
{ {
LOG_WARNING(log, "DeflateQpl HW codec failed, falling back to SW codec.(Details: doCompressData->qpl_execute_job with error code: {} - please refer to qpl_status in ./contrib/qpl/include/qpl/c_api/status.h)", static_cast<UInt32>(status)); LOG_WARNING(log, "DeflateQpl HW codec failed, falling back to SW codec. (Details: doCompressData->qpl_execute_job with error code: {} - please refer to qpl_status in ./contrib/qpl/include/qpl/c_api/status.h)", static_cast<UInt32>(status));
DeflateQplJobHWPool::instance().releaseJob(job_id); DeflateQplJobHWPool::instance().releaseJob(job_id);
return RET_ERROR; return RET_ERROR;
} }
@ -202,7 +202,7 @@ Int32 HardwareCodecDeflateQpl::doDecompressDataSynchronous(const char * source,
UInt32 decompressed_size = 0; UInt32 decompressed_size = 0;
if (!(job_ptr = DeflateQplJobHWPool::instance().acquireJob(job_id))) if (!(job_ptr = DeflateQplJobHWPool::instance().acquireJob(job_id)))
{ {
LOG_INFO(log, "DeflateQpl HW codec failed, falling back to SW codec.(Details: doDecompressDataSynchronous->acquireJob fail, probably job pool exhausted)"); LOG_INFO(log, "DeflateQpl HW codec failed, falling back to SW codec. (Details: doDecompressDataSynchronous->acquireJob fail, probably job pool exhausted)");
return RET_ERROR; return RET_ERROR;
} }
@ -214,17 +214,29 @@ Int32 HardwareCodecDeflateQpl::doDecompressDataSynchronous(const char * source,
job_ptr->available_out = uncompressed_size; job_ptr->available_out = uncompressed_size;
job_ptr->flags = QPL_FLAG_FIRST | QPL_FLAG_LAST; job_ptr->flags = QPL_FLAG_FIRST | QPL_FLAG_LAST;
if (auto status = qpl_submit_job(job_ptr); status != QPL_STS_OK) auto status = qpl_submit_job(job_ptr);
if (status != QPL_STS_OK)
{ {
DeflateQplJobHWPool::instance().releaseJob(job_id); DeflateQplJobHWPool::instance().releaseJob(job_id);
LOG_WARNING(log, "DeflateQpl HW codec failed, falling back to SW codec.(Details: doDecompressDataSynchronous->qpl_execute_job with error code: {} - please refer to qpl_status in ./contrib/qpl/include/qpl/c_api/status.h)", static_cast<UInt32>(status)); LOG_WARNING(log, "DeflateQpl HW codec failed, falling back to SW codec. (Details: doDecompressDataSynchronous->qpl_submit_job with error code: {} - please refer to qpl_status in ./contrib/qpl/include/qpl/c_api/status.h)", static_cast<UInt32>(status));
return RET_ERROR; return RET_ERROR;
} }
/// Busy waiting till job complete. /// Busy waiting till job complete.
UInt32 num_checks = 0;
do do
{ {
_tpause(1, __rdtsc() + 1000); _tpause(1, __rdtsc() + 1000);
} while (qpl_check_job(job_ptr) == QPL_STS_BEING_PROCESSED); status = qpl_check_job(job_ptr);
++num_checks;
} while (status == QPL_STS_BEING_PROCESSED && num_checks < MAX_CHECKS);
if (status != QPL_STS_OK)
{
DeflateQplJobHWPool::instance().releaseJob(job_id);
LOG_WARNING(log, "DeflateQpl HW codec failed, falling back to SW codec. (Details: doDecompressDataSynchronous->qpl_submit_job with error code: {} - please refer to qpl_status in ./contrib/qpl/include/qpl/c_api/status.h)", static_cast<UInt32>(status));
return RET_ERROR;
}
decompressed_size = job_ptr->total_out; decompressed_size = job_ptr->total_out;
DeflateQplJobHWPool::instance().releaseJob(job_id); DeflateQplJobHWPool::instance().releaseJob(job_id);
@ -237,7 +249,7 @@ Int32 HardwareCodecDeflateQpl::doDecompressDataAsynchronous(const char * source,
qpl_job * job_ptr = nullptr; qpl_job * job_ptr = nullptr;
if (!(job_ptr = DeflateQplJobHWPool::instance().acquireJob(job_id))) if (!(job_ptr = DeflateQplJobHWPool::instance().acquireJob(job_id)))
{ {
LOG_INFO(log, "DeflateQpl HW codec failed, falling back to SW codec.(Details: doDecompressDataAsynchronous->acquireJob fail, probably job pool exhausted)"); LOG_INFO(log, "DeflateQpl HW codec failed, falling back to SW codec. (Details: doDecompressDataAsynchronous->acquireJob fail, probably job pool exhausted)");
return RET_ERROR; return RET_ERROR;
} }
@ -257,7 +269,7 @@ Int32 HardwareCodecDeflateQpl::doDecompressDataAsynchronous(const char * source,
else else
{ {
DeflateQplJobHWPool::instance().releaseJob(job_id); DeflateQplJobHWPool::instance().releaseJob(job_id);
LOG_WARNING(log, "DeflateQpl HW codec failed, falling back to SW codec.(Details: doDecompressDataAsynchronous->qpl_execute_job with error code: {} - please refer to qpl_status in ./contrib/qpl/include/qpl/c_api/status.h)", static_cast<UInt32>(status)); LOG_WARNING(log, "DeflateQpl HW codec failed, falling back to SW codec. (Details: doDecompressDataAsynchronous->qpl_submit_job with error code: {} - please refer to qpl_status in ./contrib/qpl/include/qpl/c_api/status.h)", static_cast<UInt32>(status));
return RET_ERROR; return RET_ERROR;
} }
} }
@ -266,6 +278,7 @@ void HardwareCodecDeflateQpl::flushAsynchronousDecompressRequests()
{ {
auto n_jobs_processing = decomp_async_job_map.size(); auto n_jobs_processing = decomp_async_job_map.size();
std::map<UInt32, qpl_job *>::iterator it = decomp_async_job_map.begin(); std::map<UInt32, qpl_job *>::iterator it = decomp_async_job_map.begin();
UInt32 num_checks = 0;
while (n_jobs_processing) while (n_jobs_processing)
{ {
@ -274,22 +287,34 @@ void HardwareCodecDeflateQpl::flushAsynchronousDecompressRequests()
job_id = it->first; job_id = it->first;
job_ptr = it->second; job_ptr = it->second;
if (qpl_check_job(job_ptr) == QPL_STS_BEING_PROCESSED) auto status = qpl_check_job(job_ptr);
if ((status == QPL_STS_BEING_PROCESSED) && (num_checks < MAX_CHECKS))
{ {
it++; it++;
} }
else else
{ {
if (status != QPL_STS_OK)
{
sw_codec.doDecompressData(
reinterpret_cast<const char * >(job_ptr->next_in_ptr),
job_ptr->available_in,
reinterpret_cast<char *>(job_ptr->next_out_ptr),
job_ptr->available_out);
LOG_WARNING(log, "DeflateQpl HW codec failed, falling back to SW codec. (Details: flushAsynchronousDecompressRequests with error code: {} - please refer to qpl_status in ./contrib/qpl/include/qpl/c_api/status.h)", static_cast<UInt32>(status));
}
it = decomp_async_job_map.erase(it); it = decomp_async_job_map.erase(it);
DeflateQplJobHWPool::instance().releaseJob(job_id); DeflateQplJobHWPool::instance().releaseJob(job_id);
n_jobs_processing--; n_jobs_processing--;
if (n_jobs_processing <= 0) if (n_jobs_processing <= 0)
break; break;
} }
if (it == decomp_async_job_map.end()) if (it == decomp_async_job_map.end())
{ {
it = decomp_async_job_map.begin(); it = decomp_async_job_map.begin();
_tpause(1, __rdtsc() + 1000); _tpause(1, __rdtsc() + 1000);
++num_checks;
} }
} }
} }
@ -364,8 +389,8 @@ void SoftwareCodecDeflateQpl::doDecompressData(const char * source, UInt32 sourc
} }
CompressionCodecDeflateQpl::CompressionCodecDeflateQpl() CompressionCodecDeflateQpl::CompressionCodecDeflateQpl()
: hw_codec(std::make_unique<HardwareCodecDeflateQpl>()) : sw_codec(std::make_unique<SoftwareCodecDeflateQpl>())
, sw_codec(std::make_unique<SoftwareCodecDeflateQpl>()) , hw_codec(std::make_unique<HardwareCodecDeflateQpl>(*sw_codec))
{ {
setCodecDescription("DEFLATE_QPL"); setCodecDescription("DEFLATE_QPL");
} }

View File

@ -65,8 +65,10 @@ class HardwareCodecDeflateQpl
public: public:
/// RET_ERROR stands for hardware codec fail, needs fallback to software codec. /// RET_ERROR stands for hardware codec fail, needs fallback to software codec.
static constexpr Int32 RET_ERROR = -1; static constexpr Int32 RET_ERROR = -1;
/// Maximum times to check if hardware job complete, otherwise fallback to software codec.
static constexpr UInt32 MAX_CHECKS = UINT16_MAX;
HardwareCodecDeflateQpl(); HardwareCodecDeflateQpl(SoftwareCodecDeflateQpl & sw_codec_);
~HardwareCodecDeflateQpl(); ~HardwareCodecDeflateQpl();
Int32 doCompressData(const char * source, UInt32 source_size, char * dest, UInt32 dest_size) const; Int32 doCompressData(const char * source, UInt32 source_size, char * dest, UInt32 dest_size) const;
@ -87,6 +89,8 @@ private:
/// For flush, pop out job ID && job object from this map. Use job ID to release job lock and use job object to check job status till complete. /// For flush, pop out job ID && job object from this map. Use job ID to release job lock and use job object to check job status till complete.
std::map<UInt32, qpl_job *> decomp_async_job_map; std::map<UInt32, qpl_job *> decomp_async_job_map;
Poco::Logger * log; Poco::Logger * log;
/// Provides a fallback in case of errors.
SoftwareCodecDeflateQpl & sw_codec;
}; };
class CompressionCodecDeflateQpl final : public ICompressionCodec class CompressionCodecDeflateQpl final : public ICompressionCodec
@ -110,8 +114,8 @@ protected:
private: private:
UInt32 getMaxCompressedDataSize(UInt32 uncompressed_size) const override; UInt32 getMaxCompressedDataSize(UInt32 uncompressed_size) const override;
std::unique_ptr<HardwareCodecDeflateQpl> hw_codec;
std::unique_ptr<SoftwareCodecDeflateQpl> sw_codec; std::unique_ptr<SoftwareCodecDeflateQpl> sw_codec;
std::unique_ptr<HardwareCodecDeflateQpl> hw_codec;
}; };
} }

157
src/Core/PlainRanges.cpp Normal file
View File

@ -0,0 +1,157 @@
#include <Core/PlainRanges.h>
namespace DB
{
PlainRanges::PlainRanges(const Range & range)
{
ranges.push_back(range);
}
PlainRanges::PlainRanges(const Ranges & ranges_, bool may_have_intersection, bool ordered)
{
if (may_have_intersection)
ranges = ordered ? makePlainFromOrdered(ranges_) : makePlainFromUnordered(ranges_);
else
ranges = ranges_;
}
Ranges PlainRanges::makePlainFromOrdered(const Ranges & ranges_)
{
if (ranges_.size() <= 1)
return ranges_;
Ranges ret{ranges_.front()};
for (size_t i = 1; i < ranges_.size(); ++i)
{
const auto & cur = ranges_[i];
if (ret.back().intersectsRange(cur))
ret.back() = *ret.back().unionWith(cur);
else
ret.push_back(cur);
}
return ret;
}
Ranges PlainRanges::makePlainFromUnordered(Ranges ranges_)
{
if (ranges_.size() <= 1)
return ranges_;
std::sort(ranges_.begin(), ranges_.end(), compareByLeftBound);
return makePlainFromOrdered(ranges_);
}
PlainRanges PlainRanges::unionWith(const PlainRanges & other)
{
auto left_itr = ranges.begin();
auto right_itr = other.ranges.begin();
Ranges new_range;
for (; left_itr != ranges.end() && right_itr != other.ranges.end();)
{
if (left_itr->leftThan(*right_itr))
{
new_range.push_back(*left_itr);
left_itr++;
}
else if (left_itr->rightThan(*right_itr))
{
new_range.push_back(*right_itr);
right_itr++;
}
else /// union
{
new_range.emplace_back(*(left_itr->unionWith(*right_itr)));
if (compareByRightBound(*left_itr, *right_itr))
left_itr++;
else
right_itr++;
}
}
while (left_itr != ranges.end())
{
new_range.push_back(*left_itr);
left_itr++;
}
while (right_itr != other.ranges.end())
{
new_range.push_back(*right_itr);
right_itr++;
}
/// After union two PlainRanges, new ranges may like: [1, 4], [2, 5]
/// We must make them plain.
return PlainRanges(makePlainFromOrdered(new_range));
}
PlainRanges PlainRanges::intersectWith(const PlainRanges & other)
{
auto left_itr = ranges.begin();
auto right_itr = other.ranges.begin();
Ranges new_ranges;
for (; left_itr != ranges.end() && right_itr != other.ranges.end();)
{
if (left_itr->leftThan(*right_itr))
{
left_itr++;
}
else if (left_itr->rightThan(*right_itr))
{
right_itr++;
}
else /// intersection
{
auto intersected = left_itr->intersectWith(*right_itr);
if (intersected) /// skip blank range
new_ranges.emplace_back(*intersected);
if (compareByRightBound(*left_itr, *right_itr))
left_itr++;
else
right_itr++;
}
}
return PlainRanges(new_ranges);
}
bool PlainRanges::compareByLeftBound(const Range & lhs, const Range & rhs)
{
if (lhs.left == NEGATIVE_INFINITY && rhs.left == NEGATIVE_INFINITY)
return false;
return Range::less(lhs.left, rhs.left) || ((!lhs.left_included && rhs.left_included) && Range::equals(lhs.left, rhs.left));
};
bool PlainRanges::compareByRightBound(const Range & lhs, const Range & rhs)
{
if (lhs.right == POSITIVE_INFINITY && rhs.right == POSITIVE_INFINITY)
return false;
return Range::less(lhs.right, rhs.right) || ((!lhs.right_included && rhs.right_included) && Range::equals(lhs.right, rhs.right));
};
std::vector<Ranges> PlainRanges::invert(const Ranges & to_invert_ranges)
{
/// invert a blank ranges
if (to_invert_ranges.empty())
return {makeUniverse().ranges};
std::vector<Ranges> reverted_ranges;
for (const auto & range : to_invert_ranges)
{
if (range.isInfinite())
/// return a blank ranges
return {{}};
reverted_ranges.push_back(range.invertRange());
}
return reverted_ranges;
};
}

46
src/Core/PlainRanges.h Normal file
View File

@ -0,0 +1,46 @@
#pragma once
#include <Core/Range.h>
namespace DB
{
/** A plain ranges is a series of ranges who
* 1. have no intersection in any two of the ranges
* 2. ordered by left side
* 3. does not contain blank range
*
* Example:
* query: (k > 1 and key < 5) or (k > 3 and k < 10) or key in (2, 12)
* original ranges: (1, 5), (3, 10), [2, 2], [12, 12]
* plain ranges: (1, 10), [12, 12]
*
* If it is blank, ranges is empty.
*/
struct PlainRanges
{
Ranges ranges;
explicit PlainRanges(const Range & range);
explicit PlainRanges(const Ranges & ranges_, bool may_have_intersection = false, bool ordered = true);
PlainRanges unionWith(const PlainRanges & other);
PlainRanges intersectWith(const PlainRanges & other);
/// Union ranges and return a new plain(ordered and no intersection) ranges.
/// Example:
/// [1, 3], [2, 4], [6, 8] -> [1, 4], [6, 8]
/// [1, 3], [2, 4], (4, 5] -> [1, 4], [5, 5]
static Ranges makePlainFromUnordered(Ranges ranges_);
static Ranges makePlainFromOrdered(const Ranges & ranges_);
static bool compareByLeftBound(const Range & lhs, const Range & rhs);
static bool compareByRightBound(const Range & lhs, const Range & rhs);
static std::vector<Ranges> invert(const Ranges & to_invert_ranges);
static PlainRanges makeBlank() { return PlainRanges({}); }
static PlainRanges makeUniverse() { return PlainRanges({Range::createWholeUniverseWithoutNull()}); }
};
}

View File

@ -123,6 +123,27 @@ bool Range::leftThan(const FieldRef & x) const
return less(x, right) || (right_included && equals(x, right)); return less(x, right) || (right_included && equals(x, right));
} }
bool Range::rightThan(const Range & x) const
{
return less(x.right, left) || (!(left_included && x.right_included) && equals(left, x.right));
}
bool Range::leftThan(const Range & x) const
{
return less(right, x.left) || (!(x.left_included && right_included) && equals(right, x.left));
}
bool Range::fullBounded() const
{
return left.getType() != Field::Types::Null && right.getType() != Field::Types::Null;
}
/// (-inf, +inf)
bool Range::isInfinite() const
{
return left.isNegativeInfinity() && right.isPositiveInfinity();
}
bool Range::intersectsRange(const Range & r) const bool Range::intersectsRange(const Range & r) const
{ {
/// r to the left of me. /// r to the left of me.
@ -159,6 +180,95 @@ void Range::invert()
std::swap(left_included, right_included); std::swap(left_included, right_included);
} }
Ranges Range::invertRange() const
{
Ranges ranges;
/// For full bounded range will generate two ranges.
if (fullBounded()) /// case: [1, 3] -> (-inf, 1), (3, +inf)
{
ranges.push_back({NEGATIVE_INFINITY, false, left, !left_included});
ranges.push_back({right, !right_included, POSITIVE_INFINITY, false});
}
else if (isInfinite())
{
/// blank ranges
}
else /// case: (-inf, 1] or [1, +inf)
{
Range r = *this;
std::swap(r.left, r.right);
if (r.left.isPositiveInfinity()) /// [1, +inf)
{
r.left = NEGATIVE_INFINITY;
r.right_included = !r.left_included;
r.left_included = false;
}
else if (r.right.isNegativeInfinity()) /// (-inf, 1]
{
r.right = POSITIVE_INFINITY;
r.left_included = !r.right_included;
r.right_included = false;
}
ranges.push_back(r);
}
return ranges;
}
std::optional<Range> Range::intersectWith(const Range & r) const
{
if (!intersectsRange(r))
return {};
bool left_bound_use_mine = true;
bool right_bound_use_mine = true;
if (less(left, r.left) || ((!left_included && r.left_included) && equals(left, r.left)))
left_bound_use_mine = false;
if (less(r.right, right) || ((!r.right_included && right_included) && equals(r.right, right)))
right_bound_use_mine = false;
return Range(
left_bound_use_mine ? left : r.left,
left_bound_use_mine ? left_included : r.left_included,
right_bound_use_mine ? right : r.right,
right_bound_use_mine ? right_included : r.right_included);
}
std::optional<Range> Range::unionWith(const Range & r) const
{
if (!intersectsRange(r) && !nearByWith(r))
return {};
bool left_bound_use_mine = false;
bool right_bound_use_mine = false;
if (less(left, r.left) || ((!left_included && r.left_included) && equals(left, r.left)))
left_bound_use_mine = true;
if (less(r.right, right) || ((!r.right_included && right_included) && equals(r.right, right)))
right_bound_use_mine = true;
return Range(
left_bound_use_mine ? left : r.left,
left_bound_use_mine ? left_included : r.left_included,
right_bound_use_mine ? right : r.right,
right_bound_use_mine ? right_included : r.right_included);
}
bool Range::nearByWith(const Range & r) const
{
/// me locates at left
if (((right_included && !r.left_included) || (!right_included && r.left_included)) && equals(right, r.left))
return true;
/// r locate left
if (((r.right_included && !left_included) || (r.right_included && !left_included)) && equals(r.right, left))
return true;
return false;
}
Range intersect(const Range & a, const Range & b) Range intersect(const Range & a, const Range & b)
{ {
Range res = Range::createWholeUniverse(); Range res = Range::createWholeUniverse();

View File

@ -38,6 +38,13 @@ struct FieldRef : public Field
size_t column_idx = 0; size_t column_idx = 0;
}; };
/** Range with open or closed ends; possibly unbounded.
*/
struct Range;
/** A serious of range who can overlap or non-overlap.
*/
using Ranges = std::vector<Range>;
/** Range with open or closed ends; possibly unbounded. /** Range with open or closed ends; possibly unbounded.
*/ */
struct Range struct Range
@ -79,12 +86,37 @@ public:
/// x is to the right /// x is to the right
bool leftThan(const FieldRef & x) const; bool leftThan(const FieldRef & x) const;
/// completely right than x
bool rightThan(const Range & x) const;
/// completely left than x
bool leftThan(const Range & x) const;
/// range like [1, 2]
bool fullBounded() const;
/// (-inf, +inf)
bool isInfinite() const;
bool isBlank() const;
bool intersectsRange(const Range & r) const; bool intersectsRange(const Range & r) const;
bool containsRange(const Range & r) const; bool containsRange(const Range & r) const;
/// Invert left and right
void invert(); void invert();
/// Invert the range.
/// Example:
/// [1, 3] -> (-inf, 1), (3, +inf)
Ranges invertRange() const;
std::optional<Range> intersectWith(const Range & r) const;
std::optional<Range> unionWith(const Range & r) const;
/// If near by r, they can be combined to a continuous range.
/// TODO If field is integer, case like [2, 3], [4, 5] is excluded.
bool nearByWith(const Range & r) const;
String toString() const; String toString() const;
}; };

View File

@ -95,6 +95,9 @@ namespace DB
M(UInt64, background_schedule_pool_size, 512, "The maximum number of threads that will be used for constantly executing some lightweight periodic operations.", 0) \ M(UInt64, background_schedule_pool_size, 512, "The maximum number of threads that will be used for constantly executing some lightweight periodic operations.", 0) \
M(UInt64, background_message_broker_schedule_pool_size, 16, "The maximum number of threads that will be used for executing background operations for message streaming.", 0) \ M(UInt64, background_message_broker_schedule_pool_size, 16, "The maximum number of threads that will be used for executing background operations for message streaming.", 0) \
M(UInt64, background_distributed_schedule_pool_size, 16, "The maximum number of threads that will be used for executing distributed sends.", 0) \ M(UInt64, background_distributed_schedule_pool_size, 16, "The maximum number of threads that will be used for executing distributed sends.", 0) \
M(UInt64, tables_loader_foreground_pool_size, 0, "The maximum number of threads that will be used for foreground (that is being waited for by a query) loading of tables. Also used for synchronous loading of tables before the server start. Zero means use all CPUs.", 0) \
M(UInt64, tables_loader_background_pool_size, 0, "The maximum number of threads that will be used for background async loading of tables. Zero means use all CPUs.", 0) \
M(Bool, async_load_databases, false, "Enable asynchronous loading of databases and tables to speedup server startup. Queries to not yet loaded entity will be blocked until load is finished.", 0) \
M(Bool, display_secrets_in_show_and_select, false, "Allow showing secrets in SHOW and SELECT queries via a format setting and a grant", 0) \ M(Bool, display_secrets_in_show_and_select, false, "Allow showing secrets in SHOW and SELECT queries via a format setting and a grant", 0) \
\ \
M(UInt64, total_memory_profiler_step, 0, "Whenever server memory usage becomes larger than every next step in number of bytes the memory profiler will collect the allocating stack trace. Zero means disabled memory profiler. Values lower than a few megabytes will slow down server.", 0) \ M(UInt64, total_memory_profiler_step, 0, "Whenever server memory usage becomes larger than every next step in number of bytes the memory profiler will collect the allocating stack trace. Zero means disabled memory profiler. Values lower than a few megabytes will slow down server.", 0) \

View File

@ -575,7 +575,6 @@ class IColumn;
M(Bool, optimize_substitute_columns, false, "Use constraints for column substitution", 0) \ M(Bool, optimize_substitute_columns, false, "Use constraints for column substitution", 0) \
M(Bool, optimize_append_index, false, "Use constraints in order to append index condition (indexHint)", 0) \ M(Bool, optimize_append_index, false, "Use constraints in order to append index condition (indexHint)", 0) \
M(Bool, normalize_function_names, true, "Normalize function names to their canonical names", 0) \ M(Bool, normalize_function_names, true, "Normalize function names to their canonical names", 0) \
M(Bool, allow_experimental_alter_materialized_view_structure, false, "Allow atomic alter on Materialized views. Work in progress.", 0) \
M(Bool, enable_early_constant_folding, true, "Enable query optimization where we analyze function and subqueries results and rewrite query if there're constants there", 0) \ M(Bool, enable_early_constant_folding, true, "Enable query optimization where we analyze function and subqueries results and rewrite query if there're constants there", 0) \
M(Bool, deduplicate_blocks_in_dependent_materialized_views, false, "Should deduplicate blocks for materialized views if the block is not a duplicate for the table. Use true to always deduplicate in dependent tables.", 0) \ M(Bool, deduplicate_blocks_in_dependent_materialized_views, false, "Should deduplicate blocks for materialized views if the block is not a duplicate for the table. Use true to always deduplicate in dependent tables.", 0) \
M(Bool, materialized_views_ignore_errors, false, "Allows to ignore errors for MATERIALIZED VIEW, and deliver original block to the table regardless of MVs", 0) \ M(Bool, materialized_views_ignore_errors, false, "Allows to ignore errors for MATERIALIZED VIEW, and deliver original block to the table regardless of MVs", 0) \
@ -856,6 +855,7 @@ class IColumn;
MAKE_OBSOLETE(M, Bool, allow_experimental_window_functions, true) \ MAKE_OBSOLETE(M, Bool, allow_experimental_window_functions, true) \
MAKE_OBSOLETE(M, Bool, allow_experimental_geo_types, true) \ MAKE_OBSOLETE(M, Bool, allow_experimental_geo_types, true) \
MAKE_OBSOLETE(M, Bool, allow_experimental_query_cache, true) \ MAKE_OBSOLETE(M, Bool, allow_experimental_query_cache, true) \
MAKE_OBSOLETE(M, Bool, allow_experimental_alter_materialized_view_structure, true) \
\ \
MAKE_OBSOLETE(M, Milliseconds, async_insert_stale_timeout_ms, 0) \ MAKE_OBSOLETE(M, Milliseconds, async_insert_stale_timeout_ms, 0) \
MAKE_OBSOLETE(M, StreamingHandleErrorMode, handle_kafka_error_mode, StreamingHandleErrorMode::DEFAULT) \ MAKE_OBSOLETE(M, StreamingHandleErrorMode, handle_kafka_error_mode, StreamingHandleErrorMode::DEFAULT) \

View File

@ -166,7 +166,6 @@ SerializationPtr DataTypeAggregateFunction::doGetDefaultSerialization() const
static DataTypePtr create(const ASTPtr & arguments) static DataTypePtr create(const ASTPtr & arguments)
{ {
String function_name; String function_name;
AggregateFunctionPtr function;
DataTypes argument_types; DataTypes argument_types;
Array params_row; Array params_row;
std::optional<size_t> version; std::optional<size_t> version;
@ -193,12 +192,14 @@ static DataTypePtr create(const ASTPtr & arguments)
argument_types_start_idx = 2; argument_types_start_idx = 2;
} }
auto action = NullsAction::EMPTY;
if (const auto * parametric = data_type_ast->as<ASTFunction>()) if (const auto * parametric = data_type_ast->as<ASTFunction>())
{ {
if (parametric->parameters) if (parametric->parameters)
throw Exception(ErrorCodes::SYNTAX_ERROR, "Unexpected level of parameters to aggregate function"); throw Exception(ErrorCodes::SYNTAX_ERROR, "Unexpected level of parameters to aggregate function");
function_name = parametric->name; function_name = parametric->name;
action = parametric->nulls_action;
if (parametric->arguments) if (parametric->arguments)
{ {
@ -241,7 +242,7 @@ static DataTypePtr create(const ASTPtr & arguments)
throw Exception(ErrorCodes::LOGICAL_ERROR, "Logical error: empty name of aggregate function passed"); throw Exception(ErrorCodes::LOGICAL_ERROR, "Logical error: empty name of aggregate function passed");
AggregateFunctionProperties properties; AggregateFunctionProperties properties;
function = AggregateFunctionFactory::instance().get(function_name, argument_types, params_row, properties); AggregateFunctionPtr function = AggregateFunctionFactory::instance().get(function_name, action, argument_types, params_row, properties);
return std::make_shared<DataTypeAggregateFunction>(function, argument_types, params_row, version); return std::make_shared<DataTypeAggregateFunction>(function, argument_types, params_row, version);
} }

View File

@ -144,7 +144,9 @@ static std::pair<DataTypePtr, DataTypeCustomDescPtr> create(const ASTPtr & argum
throw Exception(ErrorCodes::LOGICAL_ERROR, "Logical error: empty name of aggregate function passed"); throw Exception(ErrorCodes::LOGICAL_ERROR, "Logical error: empty name of aggregate function passed");
AggregateFunctionProperties properties; AggregateFunctionProperties properties;
function = AggregateFunctionFactory::instance().get(function_name, argument_types, params_row, properties); /// NullsAction is not part of the type definition, instead it will have transformed the function into a different one
auto action = NullsAction::EMPTY;
function = AggregateFunctionFactory::instance().get(function_name, action, argument_types, params_row, properties);
DataTypeCustomSimpleAggregateFunction::checkSupportedFunctions(function); DataTypeCustomSimpleAggregateFunction::checkSupportedFunctions(function);

View File

@ -5,6 +5,7 @@
#include <IO/WriteHelpers.h> #include <IO/WriteHelpers.h>
#include <IO/ReadBufferFromFile.h> #include <IO/ReadBufferFromFile.h>
#include <Parsers/formatAST.h> #include <Parsers/formatAST.h>
#include <Common/PoolId.h>
#include <Common/atomicRename.h> #include <Common/atomicRename.h>
#include <Common/filesystemHelpers.h> #include <Common/filesystemHelpers.h>
#include <Storages/StorageMaterializedView.h> #include <Storages/StorageMaterializedView.h>
@ -74,6 +75,7 @@ String DatabaseAtomic::getTableDataPath(const ASTCreateQuery & query) const
void DatabaseAtomic::drop(ContextPtr) void DatabaseAtomic::drop(ContextPtr)
{ {
waitDatabaseStarted(false);
assert(TSA_SUPPRESS_WARNING_FOR_READ(tables).empty()); assert(TSA_SUPPRESS_WARNING_FOR_READ(tables).empty());
try try
{ {
@ -112,6 +114,7 @@ StoragePtr DatabaseAtomic::detachTable(ContextPtr /* context */, const String &
void DatabaseAtomic::dropTable(ContextPtr local_context, const String & table_name, bool sync) void DatabaseAtomic::dropTable(ContextPtr local_context, const String & table_name, bool sync)
{ {
waitDatabaseStarted(false);
auto table = tryGetTable(table_name, local_context); auto table = tryGetTable(table_name, local_context);
/// Remove the inner table (if any) to avoid deadlock /// Remove the inner table (if any) to avoid deadlock
/// (due to attempt to execute DROP from the worker thread) /// (due to attempt to execute DROP from the worker thread)
@ -175,6 +178,8 @@ void DatabaseAtomic::renameTable(ContextPtr local_context, const String & table_
if (exchange && !supportsAtomicRename()) if (exchange && !supportsAtomicRename())
throw Exception(ErrorCodes::NOT_IMPLEMENTED, "RENAME EXCHANGE is not supported"); throw Exception(ErrorCodes::NOT_IMPLEMENTED, "RENAME EXCHANGE is not supported");
waitDatabaseStarted(false);
auto & other_db = dynamic_cast<DatabaseAtomic &>(to_database); auto & other_db = dynamic_cast<DatabaseAtomic &>(to_database);
bool inside_database = this == &other_db; bool inside_database = this == &other_db;
@ -412,7 +417,7 @@ void DatabaseAtomic::assertCanBeDetached(bool cleanup)
DatabaseTablesIteratorPtr DatabaseTablesIteratorPtr
DatabaseAtomic::getTablesIterator(ContextPtr local_context, const IDatabase::FilterByNameFunction & filter_by_table_name) const DatabaseAtomic::getTablesIterator(ContextPtr local_context, const IDatabase::FilterByNameFunction & filter_by_table_name) const
{ {
auto base_iter = DatabaseWithOwnTablesBase::getTablesIterator(local_context, filter_by_table_name); auto base_iter = DatabaseOrdinary::getTablesIterator(local_context, filter_by_table_name);
return std::make_unique<AtomicDatabaseTablesSnapshotIterator>(std::move(typeid_cast<DatabaseTablesSnapshotIterator &>(*base_iter))); return std::make_unique<AtomicDatabaseTablesSnapshotIterator>(std::move(typeid_cast<DatabaseTablesSnapshotIterator &>(*base_iter)));
} }
@ -441,28 +446,34 @@ void DatabaseAtomic::beforeLoadingMetadata(ContextMutablePtr /*context*/, Loadin
} }
} }
void DatabaseAtomic::loadStoredObjects(ContextMutablePtr local_context, LoadingStrictnessLevel mode) LoadTaskPtr DatabaseAtomic::startupDatabaseAsync(AsyncLoader & async_loader, LoadJobSet startup_after, LoadingStrictnessLevel mode)
{ {
beforeLoadingMetadata(local_context, mode); auto base = DatabaseOrdinary::startupDatabaseAsync(async_loader, std::move(startup_after), mode);
DatabaseOrdinary::loadStoredObjects(local_context, mode); auto job = makeLoadJob(
base->goals(),
TablesLoaderBackgroundStartupPoolId,
fmt::format("startup Atomic database {}", getDatabaseName()),
[this, mode] (AsyncLoader &, const LoadJobPtr &)
{
if (mode < LoadingStrictnessLevel::FORCE_RESTORE)
return;
NameToPathMap table_names;
{
std::lock_guard lock{mutex};
table_names = table_name_to_path;
}
fs::create_directories(path_to_table_symlinks);
for (const auto & table : table_names)
tryCreateSymlink(table.first, table.second, true);
});
return startup_atomic_database_task = makeLoadTask(async_loader, {job});
} }
void DatabaseAtomic::startupTables(ThreadPool & thread_pool, LoadingStrictnessLevel mode) void DatabaseAtomic::waitDatabaseStarted(bool no_throw) const
{ {
DatabaseOrdinary::startupTables(thread_pool, mode); if (startup_atomic_database_task)
waitLoad(currentPoolOr(TablesLoaderForegroundPoolId), startup_atomic_database_task, no_throw);
if (mode < LoadingStrictnessLevel::FORCE_RESTORE)
return;
NameToPathMap table_names;
{
std::lock_guard lock{mutex};
table_names = table_name_to_path;
}
fs::create_directories(path_to_table_symlinks);
for (const auto & table : table_names)
tryCreateSymlink(table.first, table.second, true);
} }
void DatabaseAtomic::tryCreateSymlink(const String & table_name, const String & actual_data_path, bool if_data_path_exist) void DatabaseAtomic::tryCreateSymlink(const String & table_name, const String & actual_data_path, bool if_data_path_exist)
@ -532,6 +543,8 @@ void DatabaseAtomic::renameDatabase(ContextPtr query_context, const String & new
{ {
/// CREATE, ATTACH, DROP, DETACH and RENAME DATABASE must hold DDLGuard /// CREATE, ATTACH, DROP, DETACH and RENAME DATABASE must hold DDLGuard
waitDatabaseStarted(false);
bool check_ref_deps = query_context->getSettingsRef().check_referential_table_dependencies; bool check_ref_deps = query_context->getSettingsRef().check_referential_table_dependencies;
bool check_loading_deps = !check_ref_deps && query_context->getSettingsRef().check_table_dependencies; bool check_loading_deps = !check_ref_deps && query_context->getSettingsRef().check_table_dependencies;
if (check_ref_deps || check_loading_deps) if (check_ref_deps || check_loading_deps)

View File

@ -48,11 +48,10 @@ public:
DatabaseTablesIteratorPtr getTablesIterator(ContextPtr context, const FilterByNameFunction & filter_by_table_name) const override; DatabaseTablesIteratorPtr getTablesIterator(ContextPtr context, const FilterByNameFunction & filter_by_table_name) const override;
void loadStoredObjects(ContextMutablePtr context, LoadingStrictnessLevel mode) override;
void beforeLoadingMetadata(ContextMutablePtr context, LoadingStrictnessLevel mode) override; void beforeLoadingMetadata(ContextMutablePtr context, LoadingStrictnessLevel mode) override;
void startupTables(ThreadPool & thread_pool, LoadingStrictnessLevel mode) override; LoadTaskPtr startupDatabaseAsync(AsyncLoader & async_loader, LoadJobSet startup_after, LoadingStrictnessLevel mode) override;
void waitDatabaseStarted(bool no_throw) const override;
/// Atomic database cannot be detached if there is detached table which still in use /// Atomic database cannot be detached if there is detached table which still in use
void assertCanBeDetached(bool cleanup) override; void assertCanBeDetached(bool cleanup) override;
@ -87,6 +86,8 @@ protected:
String path_to_table_symlinks; String path_to_table_symlinks;
String path_to_metadata_symlink; String path_to_metadata_symlink;
const UUID db_uuid; const UUID db_uuid;
LoadTaskPtr startup_atomic_database_task;
}; };
} }

View File

@ -20,7 +20,6 @@ namespace ErrorCodes
{ {
extern const int UNKNOWN_TABLE; extern const int UNKNOWN_TABLE;
extern const int LOGICAL_ERROR; extern const int LOGICAL_ERROR;
extern const int INCONSISTENT_METADATA_FOR_BACKUP;
} }
DatabaseMemory::DatabaseMemory(const String & name_, ContextPtr context_) DatabaseMemory::DatabaseMemory(const String & name_, ContextPtr context_)
@ -177,21 +176,30 @@ std::vector<std::pair<ASTPtr, StoragePtr>> DatabaseMemory::getTablesForBackup(co
auto storage_id = local_context->tryResolveStorageID(StorageID{"", table_name}, Context::ResolveExternal); auto storage_id = local_context->tryResolveStorageID(StorageID{"", table_name}, Context::ResolveExternal);
if (!storage_id) if (!storage_id)
throw Exception(ErrorCodes::INCONSISTENT_METADATA_FOR_BACKUP, {
"Couldn't resolve the name of temporary table {}", backQuoteIfNeed(table_name)); LOG_WARNING(log, "Couldn't resolve the name of temporary table {}", backQuoteIfNeed(table_name));
continue;
}
/// Here `storage_id.table_name` looks like looks like "_tmp_ab9b15a3-fb43-4670-abec-14a0e9eb70f1" /// Here `storage_id.table_name` looks like looks like "_tmp_ab9b15a3-fb43-4670-abec-14a0e9eb70f1"
/// it's not the real name of the table. /// it's not the real name of the table.
auto create_table_query = tryGetCreateTableQuery(storage_id.table_name, local_context); auto create_table_query = tryGetCreateTableQuery(storage_id.table_name, local_context);
if (!create_table_query) if (!create_table_query)
throw Exception(ErrorCodes::INCONSISTENT_METADATA_FOR_BACKUP, {
"Couldn't get a create query for temporary table {}", backQuoteIfNeed(table_name)); LOG_WARNING(log, "Couldn't get a create query for temporary table {}", backQuoteIfNeed(table_name));
continue;
}
const auto & create = create_table_query->as<const ASTCreateQuery &>(); auto * create = create_table_query->as<ASTCreateQuery>();
if (create.getTable() != table_name) if (create->getTable() != table_name)
throw Exception(ErrorCodes::INCONSISTENT_METADATA_FOR_BACKUP, {
"Got a create query with unexpected name {} for temporary table {}", /// Probably the database has been just renamed. Use the older name for backup to keep the backup consistent.
backQuoteIfNeed(create.getTable()), backQuoteIfNeed(table_name)); LOG_WARNING(log, "Got a create query with unexpected name {} for temporary table {}",
backQuoteIfNeed(create->getTable()), backQuoteIfNeed(table_name));
create_table_query = create_table_query->clone();
create = create_table_query->as<ASTCreateQuery>();
create->setTable(table_name);
}
chassert(storage); chassert(storage);
storage->adjustCreateQueryForBackup(create_table_query); storage->adjustCreateQueryForBackup(create_table_query);

View File

@ -163,6 +163,13 @@ DatabaseOnDisk::DatabaseOnDisk(
} }
void DatabaseOnDisk::shutdown()
{
waitDatabaseStarted(/* no_throw = */ true);
DatabaseWithOwnTablesBase::shutdown();
}
void DatabaseOnDisk::createTable( void DatabaseOnDisk::createTable(
ContextPtr local_context, ContextPtr local_context,
const String & table_name, const String & table_name,
@ -189,6 +196,8 @@ void DatabaseOnDisk::createTable(
throw Exception( throw Exception(
ErrorCodes::TABLE_ALREADY_EXISTS, "Table {}.{} already exists", backQuote(getDatabaseName()), backQuote(table_name)); ErrorCodes::TABLE_ALREADY_EXISTS, "Table {}.{} already exists", backQuote(getDatabaseName()), backQuote(table_name));
waitDatabaseStarted(false);
String table_metadata_path = getObjectMetadataPath(table_name); String table_metadata_path = getObjectMetadataPath(table_name);
if (create.attach_short_syntax) if (create.attach_short_syntax)
@ -278,6 +287,8 @@ void DatabaseOnDisk::commitCreateTable(const ASTCreateQuery & query, const Stora
void DatabaseOnDisk::detachTablePermanently(ContextPtr query_context, const String & table_name) void DatabaseOnDisk::detachTablePermanently(ContextPtr query_context, const String & table_name)
{ {
waitDatabaseStarted(false);
auto table = detachTable(query_context, table_name); auto table = detachTable(query_context, table_name);
fs::path detached_permanently_flag(getObjectMetadataPath(table_name) + detached_suffix); fs::path detached_permanently_flag(getObjectMetadataPath(table_name) + detached_suffix);
@ -294,6 +305,8 @@ void DatabaseOnDisk::detachTablePermanently(ContextPtr query_context, const Stri
void DatabaseOnDisk::dropTable(ContextPtr local_context, const String & table_name, bool /*sync*/) void DatabaseOnDisk::dropTable(ContextPtr local_context, const String & table_name, bool /*sync*/)
{ {
waitDatabaseStarted(false);
String table_metadata_path = getObjectMetadataPath(table_name); String table_metadata_path = getObjectMetadataPath(table_name);
String table_metadata_path_drop = table_metadata_path + drop_suffix; String table_metadata_path_drop = table_metadata_path + drop_suffix;
String table_data_path_relative = getTableDataPath(table_name); String table_data_path_relative = getTableDataPath(table_name);
@ -378,6 +391,8 @@ void DatabaseOnDisk::renameTable(
throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Moving tables between databases of different engines is not supported"); throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Moving tables between databases of different engines is not supported");
} }
waitDatabaseStarted(false);
auto table_data_relative_path = getTableDataPath(table_name); auto table_data_relative_path = getTableDataPath(table_name);
TableExclusiveLockHolder table_lock; TableExclusiveLockHolder table_lock;
String table_metadata_path; String table_metadata_path;
@ -519,6 +534,8 @@ ASTPtr DatabaseOnDisk::getCreateDatabaseQuery() const
void DatabaseOnDisk::drop(ContextPtr local_context) void DatabaseOnDisk::drop(ContextPtr local_context)
{ {
waitDatabaseStarted(false);
assert(TSA_SUPPRESS_WARNING_FOR_READ(tables).empty()); assert(TSA_SUPPRESS_WARNING_FOR_READ(tables).empty());
if (local_context->getSettingsRef().force_remove_data_recursively_on_drop) if (local_context->getSettingsRef().force_remove_data_recursively_on_drop)
{ {

View File

@ -32,6 +32,8 @@ class DatabaseOnDisk : public DatabaseWithOwnTablesBase
public: public:
DatabaseOnDisk(const String & name, const String & metadata_path_, const String & data_path_, const String & logger, ContextPtr context); DatabaseOnDisk(const String & name, const String & metadata_path_, const String & data_path_, const String & logger, ContextPtr context);
void shutdown() override;
void createTable( void createTable(
ContextPtr context, ContextPtr context,
const String & table_name, const String & table_name,

View File

@ -22,6 +22,7 @@
#include <Parsers/queryToString.h> #include <Parsers/queryToString.h>
#include <Common/Stopwatch.h> #include <Common/Stopwatch.h>
#include <Common/ThreadPool.h> #include <Common/ThreadPool.h>
#include <Common/PoolId.h>
#include <Common/escapeForFileName.h> #include <Common/escapeForFileName.h>
#include <Common/quoteString.h> #include <Common/quoteString.h>
#include <Common/typeid_cast.h> #include <Common/typeid_cast.h>
@ -30,13 +31,6 @@
namespace fs = std::filesystem; namespace fs = std::filesystem;
namespace CurrentMetrics
{
extern const Metric DatabaseOrdinaryThreads;
extern const Metric DatabaseOrdinaryThreadsActive;
extern const Metric DatabaseOrdinaryThreadsScheduled;
}
namespace DB namespace DB
{ {
@ -47,38 +41,6 @@ namespace ErrorCodes
static constexpr size_t METADATA_FILE_BUFFER_SIZE = 32768; static constexpr size_t METADATA_FILE_BUFFER_SIZE = 32768;
namespace
{
void tryAttachTable(
ContextMutablePtr context,
const ASTCreateQuery & query,
DatabaseOrdinary & database,
const String & database_name,
const String & metadata_path,
bool force_restore)
{
try
{
auto [table_name, table] = createTableFromAST(
query,
database_name,
database.getTableDataPath(query),
context,
force_restore);
database.attachTable(context, table_name, table, database.getTableDataPath(query));
}
catch (Exception & e)
{
e.addMessage(
"Cannot attach table " + backQuote(database_name) + "." + backQuote(query.getTable()) + " from metadata file " + metadata_path
+ " from query " + serializeAST(query));
throw;
}
}
}
DatabaseOrdinary::DatabaseOrdinary(const String & name_, const String & metadata_path_, ContextPtr context_) DatabaseOrdinary::DatabaseOrdinary(const String & name_, const String & metadata_path_, ContextPtr context_)
: DatabaseOrdinary(name_, metadata_path_, "data/" + escapeForFileName(name_) + "/", "DatabaseOrdinary (" + name_ + ")", context_) : DatabaseOrdinary(name_, metadata_path_, "data/" + escapeForFileName(name_) + "/", "DatabaseOrdinary (" + name_ + ")", context_)
{ {
@ -90,75 +52,10 @@ DatabaseOrdinary::DatabaseOrdinary(
{ {
} }
void DatabaseOrdinary::loadStoredObjects(ContextMutablePtr local_context, LoadingStrictnessLevel mode) void DatabaseOrdinary::loadStoredObjects(ContextMutablePtr, LoadingStrictnessLevel)
{ {
/** Tables load faster if they are loaded in sorted (by name) order. // Because it supportsLoadingInTopologicalOrder, we don't need this loading method.
* Otherwise (for the ext4 filesystem), `DirectoryIterator` iterates through them in some order, throw Exception(ErrorCodes::LOGICAL_ERROR, "Not implemented");
* which does not correspond to order tables creation and does not correspond to order of their location on disk.
*/
ParsedTablesMetadata metadata;
bool force_attach = LoadingStrictnessLevel::FORCE_ATTACH <= mode;
loadTablesMetadata(local_context, metadata, force_attach);
size_t total_tables = metadata.parsed_tables.size() - metadata.total_dictionaries;
AtomicStopwatch watch;
std::atomic<size_t> dictionaries_processed{0};
std::atomic<size_t> tables_processed{0};
ThreadPool pool(CurrentMetrics::DatabaseOrdinaryThreads, CurrentMetrics::DatabaseOrdinaryThreadsActive, CurrentMetrics::DatabaseOrdinaryThreadsScheduled);
/// We must attach dictionaries before attaching tables
/// because while we're attaching tables we may need to have some dictionaries attached
/// (for example, dictionaries can be used in the default expressions for some tables).
/// On the other hand we can attach any dictionary (even sourced from ClickHouse table)
/// without having any tables attached. It is so because attaching of a dictionary means
/// loading of its config only, it doesn't involve loading the dictionary itself.
/// Attach dictionaries.
for (const auto & name_with_path_and_query : metadata.parsed_tables)
{
const auto & name = name_with_path_and_query.first;
const auto & path = name_with_path_and_query.second.path;
const auto & ast = name_with_path_and_query.second.ast;
const auto & create_query = ast->as<const ASTCreateQuery &>();
if (create_query.is_dictionary)
{
pool.scheduleOrThrowOnError([&]()
{
loadTableFromMetadata(local_context, path, name, ast, mode);
/// Messages, so that it's not boring to wait for the server to load for a long time.
logAboutProgress(log, ++dictionaries_processed, metadata.total_dictionaries, watch);
});
}
}
pool.wait();
/// Attach tables.
for (const auto & name_with_path_and_query : metadata.parsed_tables)
{
const auto & name = name_with_path_and_query.first;
const auto & path = name_with_path_and_query.second.path;
const auto & ast = name_with_path_and_query.second.ast;
const auto & create_query = ast->as<const ASTCreateQuery &>();
if (!create_query.is_dictionary)
{
pool.scheduleOrThrowOnError([&]()
{
loadTableFromMetadata(local_context, path, name, ast, mode);
/// Messages, so that it's not boring to wait for the server to load for a long time.
logAboutProgress(log, ++tables_processed, total_tables, watch);
});
}
}
pool.wait();
} }
void DatabaseOrdinary::loadTablesMetadata(ContextPtr local_context, ParsedTablesMetadata & metadata, bool is_startup) void DatabaseOrdinary::loadTablesMetadata(ContextPtr local_context, ParsedTablesMetadata & metadata, bool is_startup)
@ -232,59 +129,143 @@ void DatabaseOrdinary::loadTablesMetadata(ContextPtr local_context, ParsedTables
TSA_SUPPRESS_WARNING_FOR_READ(database_name), tables_in_database, dictionaries_in_database); TSA_SUPPRESS_WARNING_FOR_READ(database_name), tables_in_database, dictionaries_in_database);
} }
void DatabaseOrdinary::loadTableFromMetadata(ContextMutablePtr local_context, const String & file_path, const QualifiedTableName & name, const ASTPtr & ast, void DatabaseOrdinary::loadTableFromMetadata(
ContextMutablePtr local_context,
const String & file_path,
const QualifiedTableName & name,
const ASTPtr & ast,
LoadingStrictnessLevel mode) LoadingStrictnessLevel mode)
{ {
assert(name.database == TSA_SUPPRESS_WARNING_FOR_READ(database_name)); assert(name.database == TSA_SUPPRESS_WARNING_FOR_READ(database_name));
const auto & create_query = ast->as<const ASTCreateQuery &>(); const auto & query = ast->as<const ASTCreateQuery &>();
tryAttachTable(
local_context,
create_query,
*this,
name.database,
file_path, LoadingStrictnessLevel::FORCE_RESTORE <= mode);
}
void DatabaseOrdinary::startupTables(ThreadPool & thread_pool, LoadingStrictnessLevel /*mode*/)
{
LOG_INFO(log, "Starting up tables.");
/// NOTE No concurrent writes are possible during database loading
const size_t total_tables = TSA_SUPPRESS_WARNING_FOR_READ(tables).size();
if (!total_tables)
return;
AtomicStopwatch watch;
std::atomic<size_t> tables_processed{0};
auto startup_one_table = [&](const StoragePtr & table)
{
/// Since startup() method can use physical paths on disk we don't allow any exclusive actions (rename, drop so on)
/// until startup finished.
auto table_lock_holder = table->lockForShare(RWLockImpl::NO_QUERY, getContext()->getSettingsRef().lock_acquire_timeout);
table->startup();
logAboutProgress(log, ++tables_processed, total_tables, watch);
};
try try
{ {
for (const auto & table : TSA_SUPPRESS_WARNING_FOR_READ(tables)) auto [table_name, table] = createTableFromAST(
thread_pool.scheduleOrThrowOnError([&]() { startup_one_table(table.second); }); query,
name.database,
getTableDataPath(query),
local_context,
LoadingStrictnessLevel::FORCE_RESTORE <= mode);
attachTable(local_context, table_name, table, getTableDataPath(query));
} }
catch (...) catch (Exception & e)
{ {
/// We have to wait for jobs to finish here, because job function has reference to variables on the stack of current thread. e.addMessage(
thread_pool.wait(); "Cannot attach table " + backQuote(name.database) + "." + backQuote(query.getTable()) + " from metadata file " + file_path
+ " from query " + serializeAST(query));
throw; throw;
} }
thread_pool.wait(); }
LoadTaskPtr DatabaseOrdinary::loadTableFromMetadataAsync(
AsyncLoader & async_loader,
LoadJobSet load_after,
ContextMutablePtr local_context,
const String & file_path,
const QualifiedTableName & name,
const ASTPtr & ast,
LoadingStrictnessLevel mode)
{
std::scoped_lock lock(mutex);
auto job = makeLoadJob(
std::move(load_after),
TablesLoaderBackgroundLoadPoolId,
fmt::format("load table {}", name.getFullName()),
[this, local_context, file_path, name, ast, mode] (AsyncLoader &, const LoadJobPtr &)
{
loadTableFromMetadata(local_context, file_path, name, ast, mode);
});
return load_table[name.table] = makeLoadTask(async_loader, {job});
}
LoadTaskPtr DatabaseOrdinary::startupTableAsync(
AsyncLoader & async_loader,
LoadJobSet startup_after,
const QualifiedTableName & name,
LoadingStrictnessLevel /*mode*/)
{
std::scoped_lock lock(mutex);
/// Initialize progress indication on the first call
if (total_tables_to_startup == 0)
{
total_tables_to_startup = tables.size();
startup_watch.restart();
}
auto job = makeLoadJob(
std::move(startup_after),
TablesLoaderBackgroundStartupPoolId,
fmt::format("startup table {}", name.getFullName()),
[this, name] (AsyncLoader &, const LoadJobPtr &)
{
if (auto table = tryGetTableNoWait(name.table))
{
/// Since startup() method can use physical paths on disk we don't allow any exclusive actions (rename, drop so on)
/// until startup finished.
auto table_lock_holder = table->lockForShare(RWLockImpl::NO_QUERY, getContext()->getSettingsRef().lock_acquire_timeout);
table->startup();
logAboutProgress(log, ++tables_started, total_tables_to_startup, startup_watch);
}
else
throw Exception(ErrorCodes::LOGICAL_ERROR, "Table {}.{} doesn't exist during startup",
backQuote(name.database), backQuote(name.table));
});
return startup_table[name.table] = makeLoadTask(async_loader, {job});
}
LoadTaskPtr DatabaseOrdinary::startupDatabaseAsync(
AsyncLoader & async_loader,
LoadJobSet startup_after,
LoadingStrictnessLevel /*mode*/)
{
// NOTE: this task is empty, but it is required for correct dependency handling (startup should be done after tables loading)
auto job = makeLoadJob(
std::move(startup_after),
TablesLoaderBackgroundStartupPoolId,
fmt::format("startup Ordinary database {}", getDatabaseName()));
return startup_database_task = makeLoadTask(async_loader, {job});
}
void DatabaseOrdinary::waitTableStarted(const String & name) const
{
/// Prioritize jobs (load and startup the table) to be executed in foreground pool and wait for them synchronously
LoadTaskPtr task;
{
std::scoped_lock lock(mutex);
if (auto it = startup_table.find(name); it != startup_table.end())
task = it->second;
}
if (task)
waitLoad(currentPoolOr(TablesLoaderForegroundPoolId), task);
}
void DatabaseOrdinary::waitDatabaseStarted(bool no_throw) const
{
/// Prioritize load and startup of all tables and database itself and wait for them synchronously
if (startup_database_task)
waitLoad(currentPoolOr(TablesLoaderForegroundPoolId), startup_database_task, no_throw);
}
DatabaseTablesIteratorPtr DatabaseOrdinary::getTablesIterator(ContextPtr local_context, const DatabaseOnDisk::FilterByNameFunction & filter_by_table_name) const
{
auto result = DatabaseWithOwnTablesBase::getTablesIterator(local_context, filter_by_table_name);
std::scoped_lock lock(mutex);
typeid_cast<DatabaseTablesSnapshotIterator &>(*result).setLoadTasks(startup_table);
return result;
} }
void DatabaseOrdinary::alterTable(ContextPtr local_context, const StorageID & table_id, const StorageInMemoryMetadata & metadata) void DatabaseOrdinary::alterTable(ContextPtr local_context, const StorageID & table_id, const StorageInMemoryMetadata & metadata)
{ {
waitDatabaseStarted(false);
String table_name = table_id.table_name; String table_name = table_id.table_name;
/// Read the definition of the table and replace the necessary parts with new ones. /// Read the definition of the table and replace the necessary parts with new ones.
String table_metadata_path = getObjectMetadataPath(table_name); String table_metadata_path = getObjectMetadataPath(table_name);
String table_metadata_tmp_path = table_metadata_path + ".tmp"; String table_metadata_tmp_path = table_metadata_path + ".tmp";

View File

@ -27,10 +27,35 @@ public:
void loadTablesMetadata(ContextPtr context, ParsedTablesMetadata & metadata, bool is_startup) override; void loadTablesMetadata(ContextPtr context, ParsedTablesMetadata & metadata, bool is_startup) override;
void loadTableFromMetadata(ContextMutablePtr local_context, const String & file_path, const QualifiedTableName & name, const ASTPtr & ast, void loadTableFromMetadata(
ContextMutablePtr local_context,
const String & file_path,
const QualifiedTableName & name,
const ASTPtr & ast,
LoadingStrictnessLevel mode) override; LoadingStrictnessLevel mode) override;
void startupTables(ThreadPool & thread_pool, LoadingStrictnessLevel mode) override; LoadTaskPtr loadTableFromMetadataAsync(
AsyncLoader & async_loader,
LoadJobSet load_after,
ContextMutablePtr local_context,
const String & file_path,
const QualifiedTableName & name,
const ASTPtr & ast,
LoadingStrictnessLevel mode) override;
LoadTaskPtr startupTableAsync(
AsyncLoader & async_loader,
LoadJobSet startup_after,
const QualifiedTableName & name,
LoadingStrictnessLevel mode) override;
void waitTableStarted(const String & name) const override;
void waitDatabaseStarted(bool no_throw) const override;
LoadTaskPtr startupDatabaseAsync(AsyncLoader & async_loader, LoadJobSet startup_after, LoadingStrictnessLevel mode) override;
DatabaseTablesIteratorPtr getTablesIterator(ContextPtr local_context, const DatabaseOnDisk::FilterByNameFunction & filter_by_table_name) const override;
void alterTable( void alterTable(
ContextPtr context, ContextPtr context,
@ -48,6 +73,13 @@ protected:
ContextPtr query_context); ContextPtr query_context);
Strings permanently_detached_tables; Strings permanently_detached_tables;
std::unordered_map<String, LoadTaskPtr> load_table TSA_GUARDED_BY(mutex);
std::unordered_map<String, LoadTaskPtr> startup_table TSA_GUARDED_BY(mutex);
LoadTaskPtr startup_database_task;
std::atomic<size_t> total_tables_to_startup{0};
std::atomic<size_t> tables_started{0};
AtomicStopwatch startup_watch;
}; };
} }

View File

@ -12,6 +12,7 @@
#include <Common/ZooKeeper/KeeperException.h> #include <Common/ZooKeeper/KeeperException.h>
#include <Common/ZooKeeper/Types.h> #include <Common/ZooKeeper/Types.h>
#include <Common/ZooKeeper/ZooKeeper.h> #include <Common/ZooKeeper/ZooKeeper.h>
#include <Common/PoolId.h>
#include <Databases/DatabaseReplicated.h> #include <Databases/DatabaseReplicated.h>
#include <Databases/DatabaseReplicatedWorker.h> #include <Databases/DatabaseReplicatedWorker.h>
#include <Databases/DDLDependencyVisitor.h> #include <Databases/DDLDependencyVisitor.h>
@ -53,7 +54,7 @@ namespace ErrorCodes
extern const int INCORRECT_QUERY; extern const int INCORRECT_QUERY;
extern const int ALL_CONNECTION_TRIES_FAILED; extern const int ALL_CONNECTION_TRIES_FAILED;
extern const int NO_ACTIVE_REPLICAS; extern const int NO_ACTIVE_REPLICAS;
extern const int INCONSISTENT_METADATA_FOR_BACKUP; extern const int CANNOT_GET_REPLICATED_DATABASE_SNAPSHOT;
extern const int CANNOT_RESTORE_TABLE; extern const int CANNOT_RESTORE_TABLE;
} }
@ -533,41 +534,54 @@ void DatabaseReplicated::createReplicaNodesInZooKeeper(const zkutil::ZooKeeperPt
createEmptyLogEntry(current_zookeeper); createEmptyLogEntry(current_zookeeper);
} }
void DatabaseReplicated::beforeLoadingMetadata(ContextMutablePtr /*context*/, LoadingStrictnessLevel mode) void DatabaseReplicated::beforeLoadingMetadata(ContextMutablePtr context_, LoadingStrictnessLevel mode)
{ {
DatabaseAtomic::beforeLoadingMetadata(context_, mode);
tryConnectToZooKeeperAndInitDatabase(mode); tryConnectToZooKeeperAndInitDatabase(mode);
} }
void DatabaseReplicated::loadStoredObjects(ContextMutablePtr local_context, LoadingStrictnessLevel mode)
{
beforeLoadingMetadata(local_context, mode);
DatabaseAtomic::loadStoredObjects(local_context, mode);
}
UInt64 DatabaseReplicated::getMetadataHash(const String & table_name) const UInt64 DatabaseReplicated::getMetadataHash(const String & table_name) const
{ {
return DB::getMetadataHash(table_name, readMetadataFile(table_name)); return DB::getMetadataHash(table_name, readMetadataFile(table_name));
} }
void DatabaseReplicated::startupTables(ThreadPool & thread_pool, LoadingStrictnessLevel mode) LoadTaskPtr DatabaseReplicated::startupDatabaseAsync(AsyncLoader & async_loader, LoadJobSet startup_after, LoadingStrictnessLevel mode)
{ {
DatabaseAtomic::startupTables(thread_pool, mode); auto base = DatabaseAtomic::startupDatabaseAsync(async_loader, std::move(startup_after), mode);
auto job = makeLoadJob(
base->goals(),
TablesLoaderBackgroundStartupPoolId,
fmt::format("startup Replicated database {}", getDatabaseName()),
[this] (AsyncLoader &, const LoadJobPtr &)
{
UInt64 digest = 0;
{
std::lock_guard lock{mutex};
for (const auto & table : tables)
digest += getMetadataHash(table.first);
LOG_DEBUG(log, "Calculated metadata digest of {} tables: {}", tables.size(), digest);
}
/// TSA: No concurrent writes are possible during loading {
UInt64 digest = 0; std::lock_guard lock{metadata_mutex};
for (const auto & table : TSA_SUPPRESS_WARNING_FOR_READ(tables)) chassert(!tables_metadata_digest);
digest += getMetadataHash(table.first); tables_metadata_digest = digest;
}
LOG_DEBUG(log, "Calculated metadata digest of {} tables: {}", TSA_SUPPRESS_WARNING_FOR_READ(tables).size(), digest); if (is_probably_dropped)
chassert(!TSA_SUPPRESS_WARNING_FOR_READ(tables_metadata_digest)); return;
TSA_SUPPRESS_WARNING_FOR_WRITE(tables_metadata_digest) = digest;
if (is_probably_dropped) ddl_worker = std::make_unique<DatabaseReplicatedDDLWorker>(this, getContext());
return; ddl_worker->startup();
ddl_worker_initialized = true;
});
return startup_replicated_database_task = makeLoadTask(async_loader, {job});
}
ddl_worker = std::make_unique<DatabaseReplicatedDDLWorker>(this, getContext()); void DatabaseReplicated::waitDatabaseStarted(bool no_throw) const
ddl_worker->startup(); {
ddl_worker_initialized = true; if (startup_replicated_database_task)
waitLoad(currentPoolOr(TablesLoaderForegroundPoolId), startup_replicated_database_task, no_throw);
} }
bool DatabaseReplicated::checkDigestValid(const ContextPtr & local_context, bool debug_check /* = true */) const bool DatabaseReplicated::checkDigestValid(const ContextPtr & local_context, bool debug_check /* = true */) const
@ -728,6 +742,7 @@ void DatabaseReplicated::checkQueryValid(const ASTPtr & query, ContextPtr query_
BlockIO DatabaseReplicated::tryEnqueueReplicatedDDL(const ASTPtr & query, ContextPtr query_context, QueryFlags flags) BlockIO DatabaseReplicated::tryEnqueueReplicatedDDL(const ASTPtr & query, ContextPtr query_context, QueryFlags flags)
{ {
waitDatabaseStarted(false);
if (query_context->getCurrentTransaction() && query_context->getSettingsRef().throw_on_unsupported_query_inside_transaction) if (query_context->getCurrentTransaction() && query_context->getSettingsRef().throw_on_unsupported_query_inside_transaction)
throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Distributed DDL queries inside transactions are not supported"); throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Distributed DDL queries inside transactions are not supported");
@ -791,6 +806,8 @@ static UUID getTableUUIDIfReplicated(const String & metadata, ContextPtr context
void DatabaseReplicated::recoverLostReplica(const ZooKeeperPtr & current_zookeeper, UInt32 our_log_ptr, UInt32 & max_log_ptr) void DatabaseReplicated::recoverLostReplica(const ZooKeeperPtr & current_zookeeper, UInt32 our_log_ptr, UInt32 & max_log_ptr)
{ {
waitDatabaseStarted(false);
is_recovering = true; is_recovering = true;
SCOPE_EXIT({ is_recovering = false; }); SCOPE_EXIT({ is_recovering = false; });
@ -1107,31 +1124,43 @@ void DatabaseReplicated::recoverLostReplica(const ZooKeeperPtr & current_zookeep
} }
std::map<String, String> DatabaseReplicated::tryGetConsistentMetadataSnapshot(const ZooKeeperPtr & zookeeper, UInt32 & max_log_ptr) std::map<String, String> DatabaseReplicated::tryGetConsistentMetadataSnapshot(const ZooKeeperPtr & zookeeper, UInt32 & max_log_ptr)
{
return getConsistentMetadataSnapshotImpl(zookeeper, {}, /* max_retries= */ 10, max_log_ptr);
}
std::map<String, String> DatabaseReplicated::getConsistentMetadataSnapshotImpl(
const ZooKeeperPtr & zookeeper,
const FilterByNameFunction & filter_by_table_name,
size_t max_retries,
UInt32 & max_log_ptr) const
{ {
std::map<String, String> table_name_to_metadata; std::map<String, String> table_name_to_metadata;
constexpr int max_retries = 10; size_t iteration = 0;
int iteration = 0;
while (++iteration <= max_retries) while (++iteration <= max_retries)
{ {
table_name_to_metadata.clear(); table_name_to_metadata.clear();
LOG_DEBUG(log, "Trying to get consistent metadata snapshot for log pointer {}", max_log_ptr); LOG_DEBUG(log, "Trying to get consistent metadata snapshot for log pointer {}", max_log_ptr);
Strings table_names = zookeeper->getChildren(zookeeper_path + "/metadata");
Strings escaped_table_names;
escaped_table_names = zookeeper->getChildren(zookeeper_path + "/metadata");
if (filter_by_table_name)
std::erase_if(escaped_table_names, [&](const String & table) { return !filter_by_table_name(unescapeForFileName(table)); });
std::vector<zkutil::ZooKeeper::FutureGet> futures; std::vector<zkutil::ZooKeeper::FutureGet> futures;
futures.reserve(table_names.size()); futures.reserve(escaped_table_names.size());
for (const auto & table : table_names) for (const auto & table : escaped_table_names)
futures.emplace_back(zookeeper->asyncTryGet(zookeeper_path + "/metadata/" + table)); futures.emplace_back(zookeeper->asyncTryGet(zookeeper_path + "/metadata/" + table));
for (size_t i = 0; i < table_names.size(); ++i) for (size_t i = 0; i < escaped_table_names.size(); ++i)
{ {
auto res = futures[i].get(); auto res = futures[i].get();
if (res.error != Coordination::Error::ZOK) if (res.error != Coordination::Error::ZOK)
break; break;
table_name_to_metadata.emplace(unescapeForFileName(table_names[i]), res.data); table_name_to_metadata.emplace(unescapeForFileName(escaped_table_names[i]), res.data);
} }
UInt32 new_max_log_ptr = parse<UInt32>(zookeeper->get(zookeeper_path + "/max_log_ptr")); UInt32 new_max_log_ptr = parse<UInt32>(zookeeper->get(zookeeper_path + "/max_log_ptr"));
if (new_max_log_ptr == max_log_ptr && table_names.size() == table_name_to_metadata.size()) if (new_max_log_ptr == max_log_ptr && escaped_table_names.size() == table_name_to_metadata.size())
break; break;
if (max_log_ptr < new_max_log_ptr) if (max_log_ptr < new_max_log_ptr)
@ -1142,13 +1171,13 @@ std::map<String, String> DatabaseReplicated::tryGetConsistentMetadataSnapshot(co
else else
{ {
chassert(max_log_ptr == new_max_log_ptr); chassert(max_log_ptr == new_max_log_ptr);
chassert(table_names.size() != table_name_to_metadata.size()); chassert(escaped_table_names.size() != table_name_to_metadata.size());
LOG_DEBUG(log, "Cannot get metadata of some tables due to ZooKeeper error, will retry"); LOG_DEBUG(log, "Cannot get metadata of some tables due to ZooKeeper error, will retry");
} }
} }
if (max_retries < iteration) if (max_retries < iteration)
throw Exception(ErrorCodes::DATABASE_REPLICATION_FAILED, "Cannot get consistent metadata snapshot"); throw Exception(ErrorCodes::CANNOT_GET_REPLICATED_DATABASE_SNAPSHOT, "Cannot get consistent metadata snapshot");
LOG_DEBUG(log, "Got consistent metadata snapshot for log pointer {}", max_log_ptr); LOG_DEBUG(log, "Got consistent metadata snapshot for log pointer {}", max_log_ptr);
@ -1221,6 +1250,8 @@ void DatabaseReplicated::drop(ContextPtr context_)
return; return;
} }
waitDatabaseStarted(false);
auto current_zookeeper = getZooKeeper(); auto current_zookeeper = getZooKeeper();
current_zookeeper->set(replica_path, DROPPED_MARK, -1); current_zookeeper->set(replica_path, DROPPED_MARK, -1);
createEmptyLogEntry(current_zookeeper); createEmptyLogEntry(current_zookeeper);
@ -1238,6 +1269,7 @@ void DatabaseReplicated::drop(ContextPtr context_)
void DatabaseReplicated::stopReplication() void DatabaseReplicated::stopReplication()
{ {
waitDatabaseStarted(/* no_throw = */ true);
if (ddl_worker) if (ddl_worker)
ddl_worker->shutdown(); ddl_worker->shutdown();
} }
@ -1253,6 +1285,8 @@ void DatabaseReplicated::shutdown()
void DatabaseReplicated::dropTable(ContextPtr local_context, const String & table_name, bool sync) void DatabaseReplicated::dropTable(ContextPtr local_context, const String & table_name, bool sync)
{ {
waitDatabaseStarted(false);
auto txn = local_context->getZooKeeperMetadataTransaction(); auto txn = local_context->getZooKeeperMetadataTransaction();
assert(!ddl_worker || !ddl_worker->isCurrentlyActive() || txn || startsWith(table_name, ".inner_id.")); assert(!ddl_worker || !ddl_worker->isCurrentlyActive() || txn || startsWith(table_name, ".inner_id."));
if (txn && txn->isInitialQuery() && !txn->isCreateOrReplaceQuery()) if (txn && txn->isInitialQuery() && !txn->isCreateOrReplaceQuery())
@ -1295,6 +1329,8 @@ void DatabaseReplicated::renameTable(ContextPtr local_context, const String & ta
if (exchange && !to_database.isTableExist(to_table_name, local_context)) if (exchange && !to_database.isTableExist(to_table_name, local_context))
throw Exception(ErrorCodes::UNKNOWN_TABLE, "Table {} does not exist", to_table_name); throw Exception(ErrorCodes::UNKNOWN_TABLE, "Table {} does not exist", to_table_name);
waitDatabaseStarted(false);
String statement = readMetadataFile(table_name); String statement = readMetadataFile(table_name);
String statement_to; String statement_to;
if (exchange) if (exchange)
@ -1395,6 +1431,8 @@ bool DatabaseReplicated::canExecuteReplicatedMetadataAlter() const
void DatabaseReplicated::detachTablePermanently(ContextPtr local_context, const String & table_name) void DatabaseReplicated::detachTablePermanently(ContextPtr local_context, const String & table_name)
{ {
waitDatabaseStarted(false);
auto txn = local_context->getZooKeeperMetadataTransaction(); auto txn = local_context->getZooKeeperMetadataTransaction();
assert(!ddl_worker->isCurrentlyActive() || txn); assert(!ddl_worker->isCurrentlyActive() || txn);
if (txn && txn->isInitialQuery()) if (txn && txn->isInitialQuery())
@ -1418,6 +1456,8 @@ void DatabaseReplicated::detachTablePermanently(ContextPtr local_context, const
void DatabaseReplicated::removeDetachedPermanentlyFlag(ContextPtr local_context, const String & table_name, const String & table_metadata_path, bool attach) void DatabaseReplicated::removeDetachedPermanentlyFlag(ContextPtr local_context, const String & table_name, const String & table_metadata_path, bool attach)
{ {
waitDatabaseStarted(false);
auto txn = local_context->getZooKeeperMetadataTransaction(); auto txn = local_context->getZooKeeperMetadataTransaction();
assert(!ddl_worker->isCurrentlyActive() || txn); assert(!ddl_worker->isCurrentlyActive() || txn);
if (txn && txn->isInitialQuery() && attach) if (txn && txn->isInitialQuery() && attach)
@ -1454,23 +1494,19 @@ String DatabaseReplicated::readMetadataFile(const String & table_name) const
std::vector<std::pair<ASTPtr, StoragePtr>> std::vector<std::pair<ASTPtr, StoragePtr>>
DatabaseReplicated::getTablesForBackup(const FilterByNameFunction & filter, const ContextPtr &) const DatabaseReplicated::getTablesForBackup(const FilterByNameFunction & filter, const ContextPtr &) const
{ {
waitDatabaseStarted(false);
/// Here we read metadata from ZooKeeper. We could do that by simple call of DatabaseAtomic::getTablesForBackup() however /// Here we read metadata from ZooKeeper. We could do that by simple call of DatabaseAtomic::getTablesForBackup() however
/// reading from ZooKeeper is better because thus we won't be dependent on how fast the replication queue of this database is. /// reading from ZooKeeper is better because thus we won't be dependent on how fast the replication queue of this database is.
std::vector<std::pair<ASTPtr, StoragePtr>> res;
auto zookeeper = getContext()->getZooKeeper(); auto zookeeper = getContext()->getZooKeeper();
auto escaped_table_names = zookeeper->getChildren(zookeeper_path + "/metadata"); UInt32 snapshot_version = parse<UInt32>(zookeeper->get(zookeeper_path + "/max_log_ptr"));
for (const auto & escaped_table_name : escaped_table_names) auto snapshot = getConsistentMetadataSnapshotImpl(zookeeper, filter, /* max_retries= */ 20, snapshot_version);
std::vector<std::pair<ASTPtr, StoragePtr>> res;
for (const auto & [table_name, metadata] : snapshot)
{ {
String table_name = unescapeForFileName(escaped_table_name);
if (!filter(table_name))
continue;
String zk_metadata;
if (!zookeeper->tryGet(zookeeper_path + "/metadata/" + escaped_table_name, zk_metadata))
throw Exception(ErrorCodes::INCONSISTENT_METADATA_FOR_BACKUP, "Metadata for table {} was not found in ZooKeeper", table_name);
ParserCreateQuery parser; ParserCreateQuery parser;
auto create_table_query = parseQuery(parser, zk_metadata, 0, getContext()->getSettingsRef().max_parser_depth); auto create_table_query = parseQuery(parser, metadata, 0, getContext()->getSettingsRef().max_parser_depth);
auto & create = create_table_query->as<ASTCreateQuery &>(); auto & create = create_table_query->as<ASTCreateQuery &>();
create.attach = false; create.attach = false;
@ -1501,6 +1537,8 @@ void DatabaseReplicated::createTableRestoredFromBackup(
std::shared_ptr<IRestoreCoordination> restore_coordination, std::shared_ptr<IRestoreCoordination> restore_coordination,
UInt64 timeout_ms) UInt64 timeout_ms)
{ {
waitDatabaseStarted(false);
/// Because of the replication multiple nodes can try to restore the same tables again and failed with "Table already exists" /// Because of the replication multiple nodes can try to restore the same tables again and failed with "Table already exists"
/// because of some table could be restored already on other node and then replicated to this node. /// because of some table could be restored already on other node and then replicated to this node.
/// To solve this problem we use the restore coordination: the first node calls /// To solve this problem we use the restore coordination: the first node calls

Some files were not shown because too many files have changed in this diff Show More