Merge branch 'master' into keeper-dont-rollback-session-id

This commit is contained in:
mergify[bot] 2022-07-01 07:53:50 +00:00 committed by GitHub
commit 6e75d47f51
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
101 changed files with 788 additions and 648 deletions

View File

@ -34,7 +34,6 @@
* Add two new settings `input_format_csv_skip_first_lines/input_format_tsv_skip_first_lines` to allow skipping specified number of lines in the beginning of the file in CSV/TSV formats. [#37537](https://github.com/ClickHouse/ClickHouse/pull/37537) ([Kruglov Pavel](https://github.com/Avogar)).
* `showCertificate` function shows current server's SSL certificate. [#37540](https://github.com/ClickHouse/ClickHouse/pull/37540) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)).
* HTTP source for Data Dictionaries in Named Collections is supported. [#37581](https://github.com/ClickHouse/ClickHouse/pull/37581) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)).
* Added a new window function `nonNegativeDerivative(metric_column, timestamp_column[, INTERVAL x SECOND])`. [#37628](https://github.com/ClickHouse/ClickHouse/pull/37628) ([Andrey Zvonov](https://github.com/zvonand)).
* Implemented changing the comment for `ReplicatedMergeTree` tables. [#37416](https://github.com/ClickHouse/ClickHouse/pull/37416) ([Vasily Nemkov](https://github.com/Enmk)).
* Added `SYSTEM UNFREEZE` query that deletes the whole backup regardless if the corresponding table is deleted or not. [#36424](https://github.com/ClickHouse/ClickHouse/pull/36424) ([Vadim Volodin](https://github.com/PolyProgrammist)).

View File

@ -252,10 +252,10 @@ else ()
endif ()
# Optionally split binaries and debug symbols.
option(INSTALL_STRIPPED_BINARIES "Split binaries and debug symbols" OFF)
if (INSTALL_STRIPPED_BINARIES)
option(SPLIT_DEBUG_SYMBOLS "Split binaries and debug symbols" OFF)
if (SPLIT_DEBUG_SYMBOLS)
message(STATUS "Will split binaries and debug symbols")
set(STRIPPED_BINARIES_OUTPUT "stripped" CACHE STRING "A separate directory for stripped information")
set(SPLITTED_DEBUG_SYMBOLS_DIR "stripped" CACHE STRING "A separate directory for stripped information")
endif()
cmake_host_system_information(RESULT AVAILABLE_PHYSICAL_MEMORY QUERY AVAILABLE_PHYSICAL_MEMORY) # Not available under freebsd

View File

@ -15,5 +15,8 @@ ClickHouse® is an open-source column-oriented database management system that a
* [Contacts](https://clickhouse.com/company/#contact) can help to get your questions answered if there are any.
## Upcoming events
* [Paris Meetup](https://www.meetup.com/clickhouse-france-user-group/events/286304312/) Please join us for an evening of talks (in English), food and discussion. Featuring talks of ClickHouse in production and at least one on the deep internals of ClickHouse itself.
* [v22.7 Release Webinar](https://clickhouse.com/company/events/v22-7-release-webinar/) Original creator, co-founder, and CTO of ClickHouse Alexey Milovidov will walk us through the highlights of the release, provide live demos, and share vision into what is coming in the roadmap.
* [ClickHouse Meetup at the Cloudflare office in London](https://www.meetup.com/clickhouse-london-user-group/events/286891586/) ClickHouse meetup at the Cloudflare office space in central London
* [ClickHouse Meetup at the Metoda office in Munich](https://www.meetup.com/clickhouse-meetup-munich/events/286891667/) ClickHouse meetup at the Metoda office in Munich

View File

@ -1,4 +1,4 @@
macro(clickhouse_strip_binary)
macro(clickhouse_split_debug_symbols)
set(oneValueArgs TARGET DESTINATION_DIR BINARY_PATH)
cmake_parse_arguments(STRIP "" "${oneValueArgs}" "" ${ARGN})

View File

@ -67,7 +67,7 @@ RUN arch=${TARGETARCH:-amd64} \
&& chmod ugo+Xrw -R /var/lib/clickhouse /var/log/clickhouse-keeper /etc/clickhouse-keeper
EXPOSE 2181 10181 44444
EXPOSE 2181 10181 44444 9181
VOLUME /var/lib/clickhouse /var/log/clickhouse-keeper /etc/clickhouse-keeper

View File

@ -31,7 +31,7 @@ else
DO_CHOWN=0
fi
KEEPER_CONFIG="${KEEPER_CONFIG:-/etc/clickhouse-keeper/config.yaml}"
KEEPER_CONFIG="${KEEPER_CONFIG:-/etc/clickhouse-keeper/keeper_config.xml}"
if [ -f "$KEEPER_CONFIG" ] && ! $gosu test -f "$KEEPER_CONFIG" -a -r "$KEEPER_CONFIG"; then
echo "Configuration file '$KEEPER_CONFIG' isn't readable by user with id '$USER'"

View File

@ -202,7 +202,7 @@ def parse_env_variables(
cmake_flags.append("-DCMAKE_INSTALL_SYSCONFDIR=/etc")
cmake_flags.append("-DCMAKE_INSTALL_LOCALSTATEDIR=/var")
if is_release_build(build_type, package_type, sanitizer, split_binary):
cmake_flags.append("-DINSTALL_STRIPPED_BINARIES=ON")
cmake_flags.append("-DSPLIT_DEBUG_SYMBOLS=ON")
result.append("WITH_PERFORMANCE=1")
if is_cross_arm:
cmake_flags.append("-DBUILD_STANDALONE_KEEPER=1")

View File

@ -42,6 +42,7 @@ function install_packages()
function configure()
{
# install test configs
export USE_DATABASE_ORDINARY=1
/usr/share/clickhouse-test/config/install.sh
# we mount tests folder from repo to /usr/share

View File

@ -0,0 +1,22 @@
---
sidebar_position: 1
sidebar_label: 2022
---
# 2022 Changelog
### ClickHouse release v22.6.2.12-stable FIXME as compared to v22.6.1.1985-stable
#### Improvement
* Backported in [#38484](https://github.com/ClickHouse/ClickHouse/issues/38484): Improve the stability for hive storage integration test. Move the data prepare step into test.py. [#38260](https://github.com/ClickHouse/ClickHouse/pull/38260) ([lgbo](https://github.com/lgbo-ustc)).
#### Bug Fix (user-visible misbehavior in official stable or prestable release)
* Backported in [#38404](https://github.com/ClickHouse/ClickHouse/issues/38404): Fix bug with nested short-circuit functions that led to execution of arguments even if condition is false. Closes [#38040](https://github.com/ClickHouse/ClickHouse/issues/38040). [#38173](https://github.com/ClickHouse/ClickHouse/pull/38173) ([Kruglov Pavel](https://github.com/Avogar)).
#### NOT FOR CHANGELOG / INSIGNIFICANT
* Remove processor description from span attributes - it is not working [#38157](https://github.com/ClickHouse/ClickHouse/pull/38157) ([Ilya Yatsishin](https://github.com/qoega)).
* Checkout full repositories for performance tests [#38327](https://github.com/ClickHouse/ClickHouse/pull/38327) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
* Try to fix 02305_schema_inference_with_globs [#38337](https://github.com/ClickHouse/ClickHouse/pull/38337) ([Kruglov Pavel](https://github.com/Avogar)).

View File

@ -349,7 +349,7 @@ Note that ClickHouse uses forks of these libraries, see https://github.com/Click
<td>Only for Linux, x86_64 or aarch64.</td>
</tr>
<tr>
<td><a name="install-stripped-binaries"></a><a href="https://github.com/clickhouse/clickhouse/blob/master/CMakeLists.txt#L270" rel="external nofollow noreferrer" target="_blank"><code class="syntax">INSTALL_STRIPPED_BINARIES</code></a></td>
<td><a name="build-debug-symbols"></a><a href="https://github.com/clickhouse/clickhouse/blob/master/CMakeLists.txt#L270" rel="external nofollow noreferrer" target="_blank"><code class="syntax">SPLIT_DEBUG_SYMBOLS</code></a></td>
<td><code class="syntax">OFF</code></td>
<td>Build stripped binaries with debug info in separate directory</td>
<td></td>

View File

@ -136,4 +136,3 @@ DESCRIBE TABLE test_database.test_table;
└────────┴───────────────────┘
```
[Original article](https://clickhouse.com/docs/en/database-engines/postgresql/) <!--hide-->

View File

@ -43,4 +43,3 @@ The `TinyLog` engine is the simplest in the family and provides the poorest func
The `Log` and `StripeLog` engines support parallel data reading. When reading data, ClickHouse uses multiple threads. Each thread processes a separate data block. The `Log` engine uses a separate file for each column of the table. `StripeLog` stores all the data in one file. As a result, the `StripeLog` engine uses fewer file descriptors, but the `Log` engine provides higher efficiency when reading data.
[Original article](https://clickhouse.com/docs/en/operations/table_engines/log_family/) <!--hide-->

View File

@ -68,40 +68,42 @@ For a description of parameters, see the [CREATE query description](../../../sql
`ORDER BY` — The sorting key.
A tuple of column names or arbitrary expressions. Example: `ORDER BY (CounterID, EventDate)`.
A tuple of column names or arbitrary expressions. Example: `ORDER BY (CounterID, EventDate)`.
ClickHouse uses the sorting key as a primary key if the primary key is not defined explicitly by the `PRIMARY KEY` clause.
ClickHouse uses the sorting key as a primary key if the primary key is not defined explicitly by the `PRIMARY KEY` clause.
Use the `ORDER BY tuple()` syntax, if you do not need sorting. See [Selecting the Primary Key](#selecting-the-primary-key).
Use the `ORDER BY tuple()` syntax, if you do not need sorting. See [Selecting the Primary Key](#selecting-the-primary-key).
#### PARTITION BY
`PARTITION BY` — The [partitioning key](../../../engines/table-engines/mergetree-family/custom-partitioning-key.md). Optional. In most cases you don't need partition key, and in most other cases you don't need partition key more granular than by months. Partitioning does not speed up queries (in contrast to the ORDER BY expression). You should never use too granular partitioning. Don't partition your data by client identifiers or names (instead make client identifier or name the first column in the ORDER BY expression).
For partitioning by month, use the `toYYYYMM(date_column)` expression, where `date_column` is a column with a date of the type [Date](../../../sql-reference/data-types/date.md). The partition names here have the `"YYYYMM"` format.
For partitioning by month, use the `toYYYYMM(date_column)` expression, where `date_column` is a column with a date of the type [Date](../../../sql-reference/data-types/date.md). The partition names here have the `"YYYYMM"` format.
#### PRIMARY KEY
`PRIMARY KEY` — The primary key if it [differs from the sorting key](#choosing-a-primary-key-that-differs-from-the-sorting-key). Optional.
By default the primary key is the same as the sorting key (which is specified by the `ORDER BY` clause). Thus in most cases it is unnecessary to specify a separate `PRIMARY KEY` clause.
By default the primary key is the same as the sorting key (which is specified by the `ORDER BY` clause). Thus in most cases it is unnecessary to specify a separate `PRIMARY KEY` clause.
#### SAMPLE BY
`SAMPLE BY` — An expression for sampling. Optional.
If a sampling expression is used, the primary key must contain it. The result of a sampling expression must be an unsigned integer. Example: `SAMPLE BY intHash32(UserID) ORDER BY (CounterID, EventDate, intHash32(UserID))`.
If a sampling expression is used, the primary key must contain it. The result of a sampling expression must be an unsigned integer. Example: `SAMPLE BY intHash32(UserID) ORDER BY (CounterID, EventDate, intHash32(UserID))`.
#### TTL
`TTL` — A list of rules specifying storage duration of rows and defining logic of automatic parts movement [between disks and volumes](#table_engine-mergetree-multiple-volumes). Optional.
Expression must have one `Date` or `DateTime` column as a result. Example:
`TTL date + INTERVAL 1 DAY`
Expression must have one `Date` or `DateTime` column as a result. Example:
```
TTL date + INTERVAL 1 DAY
```
Type of the rule `DELETE|TO DISK 'xxx'|TO VOLUME 'xxx'|GROUP BY` specifies an action to be done with the part if the expression is satisfied (reaches current time): removal of expired rows, moving a part (if expression is satisfied for all rows in a part) to specified disk (`TO DISK 'xxx'`) or to volume (`TO VOLUME 'xxx'`), or aggregating values in expired rows. Default type of the rule is removal (`DELETE`). List of multiple rules can be specified, but there should be no more than one `DELETE` rule.
Type of the rule `DELETE|TO DISK 'xxx'|TO VOLUME 'xxx'|GROUP BY` specifies an action to be done with the part if the expression is satisfied (reaches current time): removal of expired rows, moving a part (if expression is satisfied for all rows in a part) to specified disk (`TO DISK 'xxx'`) or to volume (`TO VOLUME 'xxx'`), or aggregating values in expired rows. Default type of the rule is removal (`DELETE`). List of multiple rules can be specified, but there should be no more than one `DELETE` rule.
For more details, see [TTL for columns and tables](#table_engine-mergetree-ttl)
For more details, see [TTL for columns and tables](#table_engine-mergetree-ttl)
### SETTINGS
Additional parameters that control the behavior of the `MergeTree` (optional):
@ -129,7 +131,6 @@ Additional parameters that control the behavior of the `MergeTree` (optional):
#### min_merge_bytes_to_use_direct_io
`min_merge_bytes_to_use_direct_io` — The minimum data volume for merge operation that is required for using direct I/O access to the storage disk. When merging data parts, ClickHouse calculates the total storage volume of all the data to be merged. If the volume exceeds `min_merge_bytes_to_use_direct_io` bytes, ClickHouse reads and writes the data to the storage disk using the direct I/O interface (`O_DIRECT` option). If `min_merge_bytes_to_use_direct_io = 0`, then direct I/O is disabled. Default value: `10 * 1024 * 1024 * 1024` bytes.
<a name="mergetree_setting-merge_with_ttl_timeout"></a>
#### merge_with_ttl_timeout
@ -305,15 +306,29 @@ For `SELECT` queries, ClickHouse analyzes whether an index can be used. An index
Thus, it is possible to quickly run queries on one or many ranges of the primary key. In this example, queries will be fast when run for a specific tracking tag, for a specific tag and date range, for a specific tag and date, for multiple tags with a date range, and so on.
Lets look at the engine configured as follows:
ENGINE MergeTree() PARTITION BY toYYYYMM(EventDate) ORDER BY (CounterID, EventDate) SETTINGS index_granularity=8192
```sql
ENGINE MergeTree()
PARTITION BY toYYYYMM(EventDate)
ORDER BY (CounterID, EventDate)
SETTINGS index_granularity=8192
```
In this case, in queries:
``` sql
SELECT count() FROM table WHERE EventDate = toDate(now()) AND CounterID = 34
SELECT count() FROM table WHERE EventDate = toDate(now()) AND (CounterID = 34 OR CounterID = 42)
SELECT count() FROM table WHERE ((EventDate >= toDate('2014-01-01') AND EventDate <= toDate('2014-01-31')) OR EventDate = toDate('2014-05-01')) AND CounterID IN (101500, 731962, 160656) AND (CounterID = 101500 OR EventDate != toDate('2014-05-01'))
SELECT count() FROM table
WHERE EventDate = toDate(now())
AND CounterID = 34
SELECT count() FROM table
WHERE EventDate = toDate(now())
AND (CounterID = 34 OR CounterID = 42)
SELECT count() FROM table
WHERE ((EventDate >= toDate('2014-01-01')
AND EventDate <= toDate('2014-01-31')) OR EventDate = toDate('2014-05-01'))
AND CounterID IN (101500, 731962, 160656)
AND (CounterID = 101500 OR EventDate != toDate('2014-05-01'))
```
ClickHouse will use the primary key index to trim improper data and the monthly partitioning key to trim partitions that are in improper date ranges.
@ -376,36 +391,36 @@ SELECT count() FROM table WHERE u64 * i32 == 10 AND u64 * length(s) >= 1234
#### `minmax`
Stores extremes of the specified expression (if the expression is `tuple`, then it stores extremes for each element of `tuple`), uses stored info for skipping blocks of data like the primary key.
Stores extremes of the specified expression (if the expression is `tuple`, then it stores extremes for each element of `tuple`), uses stored info for skipping blocks of data like the primary key.
#### `set(max_rows)`
Stores unique values of the specified expression (no more than `max_rows` rows, `max_rows=0` means “no limits”). Uses the values to check if the `WHERE` expression is not satisfiable on a block of data.
Stores unique values of the specified expression (no more than `max_rows` rows, `max_rows=0` means “no limits”). Uses the values to check if the `WHERE` expression is not satisfiable on a block of data.
#### `ngrambf_v1(n, size_of_bloom_filter_in_bytes, number_of_hash_functions, random_seed)`
Stores a [Bloom filter](https://en.wikipedia.org/wiki/Bloom_filter) that contains all ngrams from a block of data. Works only with datatypes: [String](../../../sql-reference/data-types/string.md), [FixedString](../../../sql-reference/data-types/fixedstring.md) and [Map](../../../sql-reference/data-types/map.md). Can be used for optimization of `EQUALS`, `LIKE` and `IN` expressions.
Stores a [Bloom filter](https://en.wikipedia.org/wiki/Bloom_filter) that contains all ngrams from a block of data. Works only with datatypes: [String](../../../sql-reference/data-types/string.md), [FixedString](../../../sql-reference/data-types/fixedstring.md) and [Map](../../../sql-reference/data-types/map.md). Can be used for optimization of `EQUALS`, `LIKE` and `IN` expressions.
- `n` — ngram size,
- `size_of_bloom_filter_in_bytes` — Bloom filter size in bytes (you can use large values here, for example, 256 or 512, because it can be compressed well).
- `number_of_hash_functions` — The number of hash functions used in the Bloom filter.
- `random_seed` — The seed for Bloom filter hash functions.
- `n` — ngram size,
- `size_of_bloom_filter_in_bytes` — Bloom filter size in bytes (you can use large values here, for example, 256 or 512, because it can be compressed well).
- `number_of_hash_functions` — The number of hash functions used in the Bloom filter.
- `random_seed` — The seed for Bloom filter hash functions.
#### `tokenbf_v1(size_of_bloom_filter_in_bytes, number_of_hash_functions, random_seed)`
The same as `ngrambf_v1`, but stores tokens instead of ngrams. Tokens are sequences separated by non-alphanumeric characters.
The same as `ngrambf_v1`, but stores tokens instead of ngrams. Tokens are sequences separated by non-alphanumeric characters.
#### `bloom_filter([false_positive])` — Stores a [Bloom filter](https://en.wikipedia.org/wiki/Bloom_filter) for the specified columns.
The optional `false_positive` parameter is the probability of receiving a false positive response from the filter. Possible values: (0, 1). Default value: 0.025.
The optional `false_positive` parameter is the probability of receiving a false positive response from the filter. Possible values: (0, 1). Default value: 0.025.
Supported data types: `Int*`, `UInt*`, `Float*`, `Enum`, `Date`, `DateTime`, `String`, `FixedString`, `Array`, `LowCardinality`, `Nullable`, `UUID`, `Map`.
Supported data types: `Int*`, `UInt*`, `Float*`, `Enum`, `Date`, `DateTime`, `String`, `FixedString`, `Array`, `LowCardinality`, `Nullable`, `UUID`, `Map`.
For `Map` data type client can specify if index should be created for keys or values using [mapKeys](../../../sql-reference/functions/tuple-map-functions.md#mapkeys) or [mapValues](../../../sql-reference/functions/tuple-map-functions.md#mapvalues) function.
For `Map` data type client can specify if index should be created for keys or values using [mapKeys](../../../sql-reference/functions/tuple-map-functions.md#mapkeys) or [mapValues](../../../sql-reference/functions/tuple-map-functions.md#mapvalues) function.
The following functions can use the filter: [equals](../../../sql-reference/functions/comparison-functions.md), [notEquals](../../../sql-reference/functions/comparison-functions.md), [in](../../../sql-reference/functions/in-functions), [notIn](../../../sql-reference/functions/in-functions), [has](../../../sql-reference/functions/array-functions#hasarr-elem), [hasAny](../../../sql-reference/functions/array-functions#hasany), [hasAll](../../../sql-reference/functions/array-functions#hasall).
The following functions can use the filter: [equals](../../../sql-reference/functions/comparison-functions.md), [notEquals](../../../sql-reference/functions/comparison-functions.md), [in](../../../sql-reference/functions/in-functions), [notIn](../../../sql-reference/functions/in-functions), [has](../../../sql-reference/functions/array-functions#hasarr-elem), [hasAny](../../../sql-reference/functions/array-functions#hasany), [hasAll](../../../sql-reference/functions/array-functions#hasall).
Example of index creation for `Map` data type
Example of index creation for `Map` data type
```
INDEX map_key_index mapKeys(map_column) TYPE bloom_filter GRANULARITY 1

View File

@ -86,4 +86,3 @@ $ echo -e "1,2\n3,4" | clickhouse-local -q "CREATE TABLE table (a Int64, b Int64
- Indices
- Replication
[Original article](https://clickhouse.com/docs/en/operations/table_engines/special/file/) <!--hide-->

View File

@ -151,4 +151,3 @@ ALTER TABLE id_val_join DELETE WHERE id = 3;
└────┴─────┘
```
[Original article](https://clickhouse.com/docs/en/operations/table_engines/special/join/) <!--hide-->

View File

@ -86,4 +86,3 @@ SELECT * FROM WatchLog;
- [Virtual columns](../../../engines/table-engines/special/index.md#table_engines-virtual_columns)
- [merge](../../../sql-reference/table-functions/merge.md) table function
[Original article](https://clickhouse.com/docs/en/operations/table_engines/special/merge/) <!--hide-->

View File

@ -10,6 +10,3 @@ When writing to a `Null` table, data is ignored. When reading from a `Null` tabl
:::note
If you are wondering why this is useful, note that you can create a materialized view on a `Null` table. So the data written to the table will end up affecting the view, but original raw data will still be discarded.
:::
[Original article](https://clickhouse.com/docs/en/operations/table_engines/special/null/) <!--hide-->

View File

@ -20,4 +20,3 @@ When creating a table, the following settings are applied:
- [persistent](../../../operations/settings/settings.md#persistent)
[Original article](https://clickhouse.com/docs/en/operations/table_engines/special/set/) <!--hide-->

View File

@ -89,4 +89,3 @@ SELECT * FROM url_engine_table
- Indexes.
- Replication.
[Original article](https://clickhouse.com/docs/en/operations/table_engines/special/url/) <!--hide-->

View File

@ -13,4 +13,3 @@ The following external authenticators and directories are supported:
- Kerberos [Authenticator](./kerberos.md#external-authenticators-kerberos)
- [SSL X.509 authentication](./ssl-x509.md#ssl-external-authentication)
[Original article](https://clickhouse.com/docs/en/operations/external-authenticators/index/) <!--hide-->

View File

@ -61,4 +61,3 @@ exception_code: ZOK
2 rows in set. Elapsed: 0.025 sec.
```
[Original article](https://clickhouse.com/docs/en/operations/system_tables/distributed_ddl_queuedistributed_ddl_queue.md) <!--hide-->

View File

@ -47,4 +47,3 @@ last_exception:
- [Distributed table engine](../../engines/table-engines/special/distributed.md)
[Original article](https://clickhouse.com/docs/en/operations/system_tables/distribution_queue) <!--hide-->

View File

@ -50,4 +50,3 @@ attribute.values: []
- [OpenTelemetry](../../operations/opentelemetry.md)
[Original article](https://clickhouse.com/docs/en/operations/system_tables/opentelemetry_span_log) <!--hide-->

View File

@ -145,4 +145,3 @@ column_marks_bytes: 48
- [MergeTree family](../../engines/table-engines/mergetree-family/mergetree.md)
[Original article](https://clickhouse.com/docs/en/operations/system_tables/parts_columns) <!--hide-->

View File

@ -88,4 +88,3 @@ last_postpone_time: 1970-01-01 03:00:00
- [Managing ReplicatedMergeTree Tables](../../sql-reference/statements/system.md#query-language-system-replicated)
[Original article](https://clickhouse.com/docs/en/operations/system_tables/replication_queue) <!--hide-->

View File

@ -66,5 +66,3 @@ Result:
└──────────────────────────────────────────────────────────────────────────────────┘
```
[Original article](https://clickhouse.com/docs/en/sql-reference/aggregate-functions/reference/meanZTest/) <!--hide-->

View File

@ -69,4 +69,3 @@ Result:
- [Welch's t-test](https://en.wikipedia.org/wiki/Welch%27s_t-test)
- [studentTTest function](studentttest.md#studentttest)
[Original article](https://clickhouse.com/docs/en/sql-reference/aggregate-functions/reference/welchTTest/) <!--hide-->

View File

@ -27,4 +27,3 @@ You can use domains anywhere corresponding base type can be used, for example:
- Cant implicitly convert string values into domain values when inserting data from another column or table.
- Domain adds no constrains on stored values.
[Original article](https://clickhouse.com/docs/en/data_types/domains/) <!--hide-->

View File

@ -104,4 +104,3 @@ Result:
└─────────────────────────────────────────────────────────────────────────────────────────────────┴─────────────────┘
```
[Original article](https://clickhouse.com/docs/en/data-types/geo/) <!--hide-->

View File

@ -108,4 +108,3 @@ Result:
- [map()](../../sql-reference/functions/tuple-map-functions.md#function-map) function
- [CAST()](../../sql-reference/functions/type-conversion-functions.md#type_conversion_function-cast) function
[Original article](https://clickhouse.com/docs/en/data-types/map/) <!--hide-->

View File

@ -39,4 +39,3 @@ Values of the `SimpleAggregateFunction(func, Type)` look and stored the same way
CREATE TABLE simple (id UInt64, val SimpleAggregateFunction(sum, Double)) ENGINE=AggregatingMergeTree ORDER BY id;
```
[Original article](https://clickhouse.com/docs/en/data_types/simpleaggregatefunction/) <!--hide-->

View File

@ -355,4 +355,3 @@ Result:
└───────────┘
```
[Original article](https://clickhouse.com/docs/en/sql-reference/functions/encryption_functions/) <!--hide-->

View File

@ -312,11 +312,11 @@ The aggregation can be performed more effectively, if a table is sorted by some
### GROUP BY in External Memory
You can enable dumping temporary data to the disk to restrict memory usage during `GROUP BY`.
The [max_bytes_before_external_group_by](../../../operations/settings/settings.md#settings-max_bytes_before_external_group_by) setting determines the threshold RAM consumption for dumping `GROUP BY` temporary data to the file system. If set to 0 (the default), it is disabled.
The [max_bytes_before_external_group_by](../../../operations/settings/query-complexity.md#settings-max_bytes_before_external_group_by) setting determines the threshold RAM consumption for dumping `GROUP BY` temporary data to the file system. If set to 0 (the default), it is disabled.
When using `max_bytes_before_external_group_by`, we recommend that you set `max_memory_usage` about twice as high. This is necessary because there are two stages to aggregation: reading the data and forming intermediate data (1) and merging the intermediate data (2). Dumping data to the file system can only occur during stage 1. If the temporary data wasnt dumped, then stage 2 might require up to the same amount of memory as in stage 1.
For example, if [max_memory_usage](../../../operations/settings/settings.md#settings_max_memory_usage) was set to 10000000000 and you want to use external aggregation, it makes sense to set `max_bytes_before_external_group_by` to 10000000000, and `max_memory_usage` to 20000000000. When external aggregation is triggered (if there was at least one dump of temporary data), maximum consumption of RAM is only slightly more than `max_bytes_before_external_group_by`.
For example, if [max_memory_usage](../../../operations/settings/query-complexity.md#settings_max_memory_usage) was set to 10000000000 and you want to use external aggregation, it makes sense to set `max_bytes_before_external_group_by` to 10000000000, and `max_memory_usage` to 20000000000. When external aggregation is triggered (if there was at least one dump of temporary data), maximum consumption of RAM is only slightly more than `max_bytes_before_external_group_by`.
With distributed query processing, external aggregation is performed on remote servers. In order for the requester server to use only a small amount of RAM, set `distributed_aggregation_memory_efficient` to 1.

View File

@ -111,4 +111,3 @@ SELECT * FROM mysql('localhost:3306', 'test', 'test', 'bayonet', '123');
- [The MySQL table engine](../../engines/table-engines/integrations/mysql.md)
- [Using MySQL as a source of external dictionary](../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-sources.md#dicts-external_dicts_dict_sources-mysql)
[Original article](https://clickhouse.com/docs/en/sql-reference/table_functions/mysql/) <!--hide-->

View File

@ -264,10 +264,10 @@ GROUP BY вычисляет для каждого встретившегося
### Группировка во внешней памяти {#select-group-by-in-external-memory}
Можно включить сброс временных данных на диск, чтобы ограничить потребление оперативной памяти при выполнении `GROUP BY`.
Настройка [max_bytes_before_external_group_by](../../../operations/settings/settings.md#settings-max_bytes_before_external_group_by) определяет пороговое значение потребления RAM, по достижении которого временные данные `GROUP BY` сбрасываются в файловую систему. Если равно 0 (по умолчанию) - значит выключено.
Настройка [max_bytes_before_external_group_by](../../../operations/settings/query-complexity.md#settings-max_bytes_before_external_group_by) определяет пороговое значение потребления RAM, по достижении которого временные данные `GROUP BY` сбрасываются в файловую систему. Если равно 0 (по умолчанию) - значит выключено.
При использовании `max_bytes_before_external_group_by`, рекомендуем выставить `max_memory_usage` приблизительно в два раза больше. Это следует сделать, потому что агрегация выполняется в две стадии: чтение и формирование промежуточных данных (1) и слияние промежуточных данных (2). Сброс данных на файловую систему может производиться только на стадии 1. Если сброса временных данных не было, то на стадии 2 может потребляться до такого же объёма памяти, как на стадии 1.
Например, если [max_memory_usage](../../../operations/settings/settings.md#settings_max_memory_usage) было выставлено в 10000000000, и вы хотите использовать внешнюю агрегацию, то имеет смысл выставить `max_bytes_before_external_group_by` в 10000000000, а `max_memory_usage` в 20000000000. При срабатывании внешней агрегации (если был хотя бы один сброс временных данных в файловую систему) максимальное потребление оперативки будет лишь чуть-чуть больше `max_bytes_before_external_group_by`.
Например, если [max_memory_usage](../../../operations/settings/query-complexity.md#settings_max_memory_usage) было выставлено в 10000000000, и вы хотите использовать внешнюю агрегацию, то имеет смысл выставить `max_bytes_before_external_group_by` в 10000000000, а `max_memory_usage` в 20000000000. При срабатывании внешней агрегации (если был хотя бы один сброс временных данных в файловую систему) максимальное потребление оперативки будет лишь чуть-чуть больше `max_bytes_before_external_group_by`.
При распределённой обработке запроса внешняя агрегация производится на удалённых серверах. Для того чтобы на сервере-инициаторе запроса использовалось немного оперативки, нужно выставить настройку `distributed_aggregation_memory_efficient` в 1.

View File

@ -116,11 +116,11 @@ GROUP BY domain
### 在外部存储器中分组 {#select-group-by-in-external-memory}
您可以启用将临时数据转储到磁盘以限制内存使用期间 `GROUP BY`.
该 [max_bytes_before_external_group_by](../../../operations/settings/settings.md#settings-max_bytes_before_external_group_by) 设置确定倾销的阈值RAM消耗 `GROUP BY` 临时数据到文件系统。 如果设置为0默认值它将被禁用。
该 [max_bytes_before_external_group_by](../../../operations/settings/query-complexity.md#settings-max_bytes_before_external_group_by) 设置确定倾销的阈值RAM消耗 `GROUP BY` 临时数据到文件系统。 如果设置为0默认值它将被禁用。
使用时 `max_bytes_before_external_group_by`,我们建议您设置 `max_memory_usage` 大约两倍高。 这是必要的因为聚合有两个阶段读取数据和形成中间数据1和合并中间数据2。 将数据转储到文件系统只能在阶段1中发生。 如果未转储临时数据则阶段2可能需要与阶段1相同的内存量。
例如,如果 [max_memory_usage](../../../operations/settings/settings.md#settings_max_memory_usage) 设置为10000000000你想使用外部聚合这是有意义的设置 `max_bytes_before_external_group_by` 到10000000000`max_memory_usage` 到20000000000。 当触发外部聚合如果至少有一个临时数据转储RAM的最大消耗仅略高于 `max_bytes_before_external_group_by`.
例如,如果 [max_memory_usage](../../../operations/settings/query-complexity.md#settings_max_memory_usage) 设置为10000000000你想使用外部聚合这是有意义的设置 `max_bytes_before_external_group_by` 到10000000000`max_memory_usage` 到20000000000。 当触发外部聚合如果至少有一个临时数据转储RAM的最大消耗仅略高于 `max_bytes_before_external_group_by`.
通过分布式查询处理,在远程服务器上执行外部聚合。 为了使请求者服务器只使用少量的RAM设置 `distributed_aggregation_memory_efficient` 到1。

View File

@ -2,7 +2,7 @@ if (USE_CLANG_TIDY)
set (CMAKE_CXX_CLANG_TIDY "${CLANG_TIDY_PATH}")
endif ()
include(${ClickHouse_SOURCE_DIR}/cmake/strip_binary.cmake)
include(${ClickHouse_SOURCE_DIR}/cmake/split_debug_symbols.cmake)
# The `clickhouse` binary is a multi purpose tool that contains multiple execution modes (client, server, etc.),
# each of them may be built and linked as a separate library.
@ -18,6 +18,12 @@ option (ENABLE_CLICKHOUSE_SERVER "Server mode (main mode)" ${ENABLE_CLICKHOUSE_A
option (ENABLE_CLICKHOUSE_CLIENT "Client mode (interactive tui/shell that connects to the server)"
${ENABLE_CLICKHOUSE_ALL})
if (CLICKHOUSE_SPLIT_BINARY OR NOT ENABLE_UTILS)
option (ENABLE_CLICKHOUSE_SELF_EXTRACTING "Self-extracting executable" OFF)
else ()
option (ENABLE_CLICKHOUSE_SELF_EXTRACTING "Self-extracting executable" ON)
endif ()
# https://clickhouse.com/docs/en/operations/utilities/clickhouse-local/
option (ENABLE_CLICKHOUSE_LOCAL "Local files fast processing mode" ${ENABLE_CLICKHOUSE_ALL})
@ -101,6 +107,12 @@ else()
message(STATUS "Local mode: OFF")
endif()
if (ENABLE_CLICKHOUSE_SELF_EXTRACTING)
message(STATUS "Self-extracting executable: ON")
else()
message(STATUS "Self-extracting executable: OFF")
endif()
if (ENABLE_CLICKHOUSE_BENCHMARK)
message(STATUS "Benchmark mode: ON")
else()
@ -266,6 +278,10 @@ if (ENABLE_CLICKHOUSE_LIBRARY_BRIDGE)
add_subdirectory (library-bridge)
endif ()
if (ENABLE_CLICKHOUSE_SELF_EXTRACTING)
add_subdirectory (self-extracting)
endif ()
if (CLICKHOUSE_ONE_SHARED)
add_library(clickhouse-lib SHARED
${CLICKHOUSE_SERVER_SOURCES}
@ -511,10 +527,10 @@ else ()
add_custom_command(TARGET clickhouse POST_BUILD COMMAND ./clickhouse hash-binary > hash && ${OBJCOPY_PATH} --add-section .clickhouse.hash=hash clickhouse COMMENT "Adding section '.clickhouse.hash' to clickhouse binary" VERBATIM)
endif()
if (INSTALL_STRIPPED_BINARIES)
clickhouse_strip_binary(TARGET clickhouse DESTINATION_DIR ${CMAKE_CURRENT_BINARY_DIR}/${STRIPPED_BINARIES_OUTPUT} BINARY_PATH clickhouse)
if (SPLIT_DEBUG_SYMBOLS)
clickhouse_split_debug_symbols(TARGET clickhouse DESTINATION_DIR ${CMAKE_CURRENT_BINARY_DIR}/${SPLITTED_DEBUG_SYMBOLS_DIR} BINARY_PATH clickhouse)
else()
clickhouse_make_empty_debug_info_for_nfpm(TARGET clickhouse DESTINATION_DIR ${CMAKE_CURRENT_BINARY_DIR}/${STRIPPED_BINARIES_OUTPUT})
clickhouse_make_empty_debug_info_for_nfpm(TARGET clickhouse DESTINATION_DIR ${CMAKE_CURRENT_BINARY_DIR}/${SPLITTED_DEBUG_SYMBOLS_DIR})
install (TARGETS clickhouse RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR} COMPONENT clickhouse)
endif()
endif()

View File

@ -131,10 +131,10 @@ if (BUILD_STANDALONE_KEEPER)
add_dependencies(clickhouse-keeper clickhouse_keeper_configs)
set_target_properties(clickhouse-keeper PROPERTIES RUNTIME_OUTPUT_DIRECTORY ../)
if (INSTALL_STRIPPED_BINARIES)
clickhouse_strip_binary(TARGET clickhouse-keeper DESTINATION_DIR ${CMAKE_CURRENT_BINARY_DIR}/../${STRIPPED_BINARIES_OUTPUT} BINARY_PATH ../clickhouse-keeper)
if (SPLIT_DEBUG_SYMBOLS)
clickhouse_split_debug_symbols(TARGET clickhouse-keeper DESTINATION_DIR ${CMAKE_CURRENT_BINARY_DIR}/../${SPLITTED_DEBUG_SYMBOLS_DIR} BINARY_PATH ../clickhouse-keeper)
else()
clickhouse_make_empty_debug_info_for_nfpm(TARGET clickhouse-keeper DESTINATION_DIR ${CMAKE_CURRENT_BINARY_DIR}/../${STRIPPED_BINARIES_OUTPUT})
clickhouse_make_empty_debug_info_for_nfpm(TARGET clickhouse-keeper DESTINATION_DIR ${CMAKE_CURRENT_BINARY_DIR}/../${SPLITTED_DEBUG_SYMBOLS_DIR})
install(TARGETS clickhouse-keeper RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR} COMPONENT clickhouse)
endif()
endif()

View File

@ -1,4 +1,4 @@
include(${ClickHouse_SOURCE_DIR}/cmake/strip_binary.cmake)
include(${ClickHouse_SOURCE_DIR}/cmake/split_debug_symbols.cmake)
set (CLICKHOUSE_LIBRARY_BRIDGE_SOURCES
library-bridge.cpp
@ -24,9 +24,9 @@ target_link_libraries(clickhouse-library-bridge PRIVATE
set_target_properties(clickhouse-library-bridge PROPERTIES RUNTIME_OUTPUT_DIRECTORY ..)
if (INSTALL_STRIPPED_BINARIES)
clickhouse_strip_binary(TARGET clickhouse-library-bridge DESTINATION_DIR ${CMAKE_CURRENT_BINARY_DIR}/../${STRIPPED_BINARIES_OUTPUT} BINARY_PATH ../clickhouse-library-bridge)
if (SPLIT_DEBUG_SYMBOLS)
clickhouse_split_debug_symbols(TARGET clickhouse-library-bridge DESTINATION_DIR ${CMAKE_CURRENT_BINARY_DIR}/../${SPLITTED_DEBUG_SYMBOLS_DIR} BINARY_PATH ../clickhouse-library-bridge)
else()
clickhouse_make_empty_debug_info_for_nfpm(TARGET clickhouse-library-bridge DESTINATION_DIR ${CMAKE_CURRENT_BINARY_DIR}/../${STRIPPED_BINARIES_OUTPUT})
clickhouse_make_empty_debug_info_for_nfpm(TARGET clickhouse-library-bridge DESTINATION_DIR ${CMAKE_CURRENT_BINARY_DIR}/../${SPLITTED_DEBUG_SYMBOLS_DIR})
install(TARGETS clickhouse-library-bridge RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR} COMPONENT clickhouse)
endif()

View File

@ -1,4 +1,4 @@
include(${ClickHouse_SOURCE_DIR}/cmake/strip_binary.cmake)
include(${ClickHouse_SOURCE_DIR}/cmake/split_debug_symbols.cmake)
set (CLICKHOUSE_ODBC_BRIDGE_SOURCES
ColumnInfoHandler.cpp
@ -39,10 +39,10 @@ if (USE_GDB_ADD_INDEX)
add_custom_command(TARGET clickhouse-odbc-bridge POST_BUILD COMMAND ${GDB_ADD_INDEX_EXE} ../clickhouse-odbc-bridge COMMENT "Adding .gdb-index to clickhouse-odbc-bridge" VERBATIM)
endif()
if (INSTALL_STRIPPED_BINARIES)
clickhouse_strip_binary(TARGET clickhouse-odbc-bridge DESTINATION_DIR ${CMAKE_CURRENT_BINARY_DIR}/../${STRIPPED_BINARIES_OUTPUT} BINARY_PATH ../clickhouse-odbc-bridge)
if (SPLIT_DEBUG_SYMBOLS)
clickhouse_split_debug_symbols(TARGET clickhouse-odbc-bridge DESTINATION_DIR ${CMAKE_CURRENT_BINARY_DIR}/../${SPLITTED_DEBUG_SYMBOLS_DIR} BINARY_PATH ../clickhouse-odbc-bridge)
else()
clickhouse_make_empty_debug_info_for_nfpm(TARGET clickhouse-odbc-bridge DESTINATION_DIR ${CMAKE_CURRENT_BINARY_DIR}/../${STRIPPED_BINARIES_OUTPUT})
clickhouse_make_empty_debug_info_for_nfpm(TARGET clickhouse-odbc-bridge DESTINATION_DIR ${CMAKE_CURRENT_BINARY_DIR}/../${SPLITTED_DEBUG_SYMBOLS_DIR})
install(TARGETS clickhouse-odbc-bridge RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR} COMPONENT clickhouse)
endif()

View File

@ -0,0 +1,6 @@
add_custom_target (self-extracting ALL
${CMAKE_COMMAND} -E remove clickhouse
COMMAND ${CMAKE_BINARY_DIR}/utils/self-extracting-executable/compressor clickhouse ../clickhouse
DEPENDS clickhouse compressor
)

View File

@ -3,34 +3,6 @@
#include <DataTypes/IDataType.h>
#include <AggregateFunctions/IAggregateFunction.h>
#define FOR_BASIC_NUMERIC_TYPES(M) \
M(UInt8) \
M(UInt16) \
M(UInt32) \
M(UInt64) \
M(Int8) \
M(Int16) \
M(Int32) \
M(Int64) \
M(Float32) \
M(Float64)
#define FOR_NUMERIC_TYPES(M) \
M(UInt8) \
M(UInt16) \
M(UInt32) \
M(UInt64) \
M(UInt128) \
M(UInt256) \
M(Int8) \
M(Int16) \
M(Int32) \
M(Int64) \
M(Int128) \
M(Int256) \
M(Float32) \
M(Float64)
namespace DB
{
struct Settings;

View File

@ -138,7 +138,7 @@ public:
type_indexes.insert(TypeToTypeIndex<NearestFieldType<T>>);
}
DataTypePtr getScalarType() const { return getLeastSupertype(type_indexes, true); }
DataTypePtr getScalarType() const { return getLeastSupertypeOrString(type_indexes); }
bool haveNulls() const { return have_nulls; }
bool needConvertField() const { return field_types.size() > 1; }
@ -167,6 +167,7 @@ FieldInfo getFieldInfo(const Field & field)
ColumnObject::Subcolumn::Subcolumn(MutableColumnPtr && data_, bool is_nullable_)
: least_common_type(getDataTypeByColumn(*data_))
, is_nullable(is_nullable_)
, num_rows(data_->size())
{
data.push_back(std::move(data_));
}
@ -176,15 +177,13 @@ ColumnObject::Subcolumn::Subcolumn(
: least_common_type(std::make_shared<DataTypeNothing>())
, is_nullable(is_nullable_)
, num_of_defaults_in_prefix(size_)
, num_rows(size_)
{
}
size_t ColumnObject::Subcolumn::size() const
{
size_t res = num_of_defaults_in_prefix;
for (const auto & part : data)
res += part->size();
return res;
return num_rows;
}
size_t ColumnObject::Subcolumn::byteSize() const
@ -321,7 +320,7 @@ void ColumnObject::Subcolumn::insert(Field field, FieldInfo info)
{
if (isConversionRequiredBetweenIntegers(*base_type, *least_common_base_type))
{
base_type = getLeastSupertype(DataTypes{std::move(base_type), least_common_base_type}, true);
base_type = getLeastSupertypeOrString(DataTypes{std::move(base_type), least_common_base_type});
type_changed = true;
if (!least_common_base_type->equals(*base_type))
addNewColumnPart(createArrayOfType(std::move(base_type), value_dim));
@ -332,12 +331,14 @@ void ColumnObject::Subcolumn::insert(Field field, FieldInfo info)
field = convertFieldToTypeOrThrow(field, *least_common_type.get());
data.back()->insert(field);
++num_rows;
}
void ColumnObject::Subcolumn::insertRangeFrom(const Subcolumn & src, size_t start, size_t length)
{
assert(start + length <= src.size());
size_t end = start + length;
num_rows += length;
if (data.empty())
{
@ -345,7 +346,7 @@ void ColumnObject::Subcolumn::insertRangeFrom(const Subcolumn & src, size_t star
}
else if (!least_common_type.get()->equals(*src.getLeastCommonType()))
{
auto new_least_common_type = getLeastSupertype(DataTypes{least_common_type.get(), src.getLeastCommonType()}, true);
auto new_least_common_type = getLeastSupertypeOrString(DataTypes{least_common_type.get(), src.getLeastCommonType()});
if (!new_least_common_type->equals(*least_common_type.get()))
addNewColumnPart(std::move(new_least_common_type));
}
@ -487,6 +488,8 @@ void ColumnObject::Subcolumn::insertDefault()
++num_of_defaults_in_prefix;
else
data.back()->insertDefault();
++num_rows;
}
void ColumnObject::Subcolumn::insertManyDefaults(size_t length)
@ -495,12 +498,15 @@ void ColumnObject::Subcolumn::insertManyDefaults(size_t length)
num_of_defaults_in_prefix += length;
else
data.back()->insertManyDefaults(length);
num_rows += length;
}
void ColumnObject::Subcolumn::popBack(size_t n)
{
assert(n <= size());
num_rows -= n;
size_t num_removed = 0;
for (auto it = data.rbegin(); it != data.rend(); ++it)
{
@ -559,15 +565,11 @@ ColumnObject::Subcolumn ColumnObject::Subcolumn::recreateWithDefaultValues(const
if (is_nullable)
scalar_type = makeNullable(scalar_type);
Subcolumn new_subcolumn;
Subcolumn new_subcolumn(*this);
new_subcolumn.least_common_type = LeastCommonType{createArrayOfType(scalar_type, field_info.num_dimensions)};
new_subcolumn.is_nullable = is_nullable;
new_subcolumn.num_of_defaults_in_prefix = num_of_defaults_in_prefix;
new_subcolumn.data.reserve(data.size());
for (const auto & part : data)
new_subcolumn.data.push_back(recreateColumnWithDefaultValues(
part, scalar_type, field_info.num_dimensions));
for (auto & part : new_subcolumn.data)
part = recreateColumnWithDefaultValues(part, scalar_type, field_info.num_dimensions);
return new_subcolumn;
}

View File

@ -146,6 +146,8 @@ public:
/// least common type and we count number of defaults in prefix,
/// which will be converted to the default type of final common type.
size_t num_of_defaults_in_prefix = 0;
size_t num_rows = 0;
};
using Subcolumns = SubcolumnsTree<Subcolumn>;

View File

@ -489,10 +489,10 @@ ColumnPtr ColumnVector<T>::filter(const IColumn::Filter & filt, ssize_t result_s
const T * data_pos = data.data();
/** A slightly more optimized version.
* Based on the assumption that often pieces of consecutive values
* completely pass or do not pass the filter.
* Therefore, we will optimistically check the parts of `SIMD_BYTES` values.
*/
* Based on the assumption that often pieces of consecutive values
* completely pass or do not pass the filter.
* Therefore, we will optimistically check the parts of `SIMD_BYTES` values.
*/
static constexpr size_t SIMD_BYTES = 64;
const UInt8 * filt_end_aligned = filt_pos + size / SIMD_BYTES * SIMD_BYTES;
@ -577,6 +577,115 @@ ColumnPtr ColumnVector<T>::index(const IColumn & indexes, size_t limit) const
return selectIndexImpl(*this, indexes, limit);
}
#ifdef __SSE2__
namespace
{
/** Optimization for ColumnVector replicate using SIMD instructions.
* For such optimization it is important that data is right padded with 15 bytes.
*
* Replicate span size is offsets[i] - offsets[i - 1].
*
* Split spans into 3 categories.
* 1. Span with 0 size. Continue iteration.
*
* 2. Span with 1 size. Update pointer from which data must be copied into result.
* Then if we see span with size 1 or greater than 1 copy data directly into result data and reset pointer.
* Example:
* Data: 1 2 3 4
* Offsets: 1 2 3 4
* Result data: 1 2 3 4
*
* 3. Span with size greater than 1. Save single data element into register and copy it into result data.
* Example:
* Data: 1 2 3 4
* Offsets: 4 4 4 4
* Result data: 1 1 1 1
*
* Additional handling for tail is needed if pointer from which data must be copied from span with size 1 is not null.
*/
template<typename IntType>
requires (std::is_same_v<IntType, Int32> || std::is_same_v<IntType, UInt32>)
void replicateSSE42Int32(const IntType * __restrict data, IntType * __restrict result_data, const IColumn::Offsets & offsets)
{
const IntType * data_copy_begin_ptr = nullptr;
size_t offsets_size = offsets.size();
for (size_t offset_index = 0; offset_index < offsets_size; ++offset_index)
{
size_t span = offsets[offset_index] - offsets[offset_index - 1];
if (span == 1)
{
if (!data_copy_begin_ptr)
data_copy_begin_ptr = data + offset_index;
continue;
}
/// Copy data
if (data_copy_begin_ptr)
{
size_t copy_size = (data + offset_index) - data_copy_begin_ptr;
bool remainder = copy_size % 4;
size_t sse_copy_counter = (copy_size / 4) + remainder;
auto * result_data_copy = result_data;
while (sse_copy_counter)
{
__m128i copy_batch = _mm_loadu_si128(reinterpret_cast<const __m128i *>(data_copy_begin_ptr));
_mm_storeu_si128(reinterpret_cast<__m128i *>(result_data_copy), copy_batch);
result_data_copy += 4;
data_copy_begin_ptr += 4;
--sse_copy_counter;
}
result_data += copy_size;
data_copy_begin_ptr = nullptr;
}
if (span == 0)
continue;
/// Copy single data element into result data
bool span_remainder = span % 4;
size_t copy_counter = (span / 4) + span_remainder;
auto * result_data_tmp = result_data;
__m128i copy_element_data = _mm_set1_epi32(data[offset_index]);
while (copy_counter)
{
_mm_storeu_si128(reinterpret_cast<__m128i *>(result_data_tmp), copy_element_data);
result_data_tmp += 4;
--copy_counter;
}
result_data += span;
}
/// Copy tail if needed
if (data_copy_begin_ptr)
{
size_t copy_size = (data + offsets_size) - data_copy_begin_ptr;
bool remainder = copy_size % 4;
size_t sse_copy_counter = (copy_size / 4) + remainder;
while (sse_copy_counter)
{
__m128i copy_batch = _mm_loadu_si128(reinterpret_cast<const __m128i *>(data_copy_begin_ptr));
_mm_storeu_si128(reinterpret_cast<__m128i *>(result_data), copy_batch);
result_data += 4;
data_copy_begin_ptr += 4;
--sse_copy_counter;
}
}
}
}
#endif
template <typename T>
ColumnPtr ColumnVector<T>::replicate(const IColumn::Offsets & offsets) const
{
@ -589,6 +698,14 @@ ColumnPtr ColumnVector<T>::replicate(const IColumn::Offsets & offsets) const
auto res = this->create(offsets.back());
#ifdef __SSE2__
if constexpr (std::is_same_v<T, UInt32>)
{
replicateSSE42Int32(getData().data(), res->getData().data(), offsets);
return res;
}
#endif
auto it = res->getData().begin(); // NOLINT
for (size_t i = 0; i < size; ++i)
{

View File

@ -89,7 +89,7 @@ TEST(ColumnObject, InsertRangeFrom)
const auto & type_dst = subcolumn_dst.getLeastCommonType();
const auto & type_src = subcolumn_src.getLeastCommonType();
auto type_res = getLeastSupertype(DataTypes{type_dst, type_src}, true);
auto type_res = getLeastSupertypeOrString(DataTypes{type_dst, type_src});
size_t from = rng() % subcolumn_src.size();
size_t to = rng() % subcolumn_src.size();

View File

@ -9,13 +9,13 @@ namespace ErrorCodes
extern const int SYNTAX_ERROR;
}
Float64 IntervalKind::toAvgSeconds() const
Int32 IntervalKind::toAvgSeconds() const
{
switch (kind)
{
case IntervalKind::Nanosecond: return 0.000000001;
case IntervalKind::Microsecond: return 0.000001;
case IntervalKind::Millisecond: return 0.001;
case IntervalKind::Nanosecond:
case IntervalKind::Microsecond:
case IntervalKind::Millisecond: return 0; /// fractional parts of seconds have 0 seconds
case IntervalKind::Second: return 1;
case IntervalKind::Minute: return 60;
case IntervalKind::Hour: return 3600;
@ -28,25 +28,6 @@ Float64 IntervalKind::toAvgSeconds() const
__builtin_unreachable();
}
bool IntervalKind::isFixedLength() const
{
switch (kind)
{
case IntervalKind::Nanosecond:
case IntervalKind::Microsecond:
case IntervalKind::Millisecond:
case IntervalKind::Second:
case IntervalKind::Minute:
case IntervalKind::Hour:
case IntervalKind::Day:
case IntervalKind::Week: return true;
case IntervalKind::Month:
case IntervalKind::Quarter:
case IntervalKind::Year: return false;
}
__builtin_unreachable();
}
IntervalKind IntervalKind::fromAvgSeconds(Int64 num_seconds)
{
if (num_seconds)

View File

@ -31,15 +31,12 @@ struct IntervalKind
/// Returns number of seconds in one interval.
/// For `Month`, `Quarter` and `Year` the function returns an average number of seconds.
Float64 toAvgSeconds() const;
Int32 toAvgSeconds() const;
/// Chooses an interval kind based on number of seconds.
/// For example, `IntervalKind::fromAvgSeconds(3600)` returns `IntervalKind::Hour`.
static IntervalKind fromAvgSeconds(Int64 num_seconds);
/// Returns whether IntervalKind has a fixed number of seconds (e.g. Day) or non-fixed(e.g. Month)
bool isFixedLength() const;
/// Returns an uppercased version of what `toString()` returns.
const char * toKeyword() const;

View File

@ -22,13 +22,14 @@ namespace ErrorCodes
extern const int EMPTY_DATA_PASSED;
}
DataTypePtr FieldToDataType::operator() (const Null &) const
template <LeastSupertypeOnError on_error>
DataTypePtr FieldToDataType<on_error>::operator() (const Null &) const
{
return std::make_shared<DataTypeNullable>(std::make_shared<DataTypeNothing>());
}
DataTypePtr FieldToDataType::operator() (const UInt64 & x) const
template <LeastSupertypeOnError on_error>
DataTypePtr FieldToDataType<on_error>::operator() (const UInt64 & x) const
{
if (x <= std::numeric_limits<UInt8>::max()) return std::make_shared<DataTypeUInt8>();
if (x <= std::numeric_limits<UInt16>::max()) return std::make_shared<DataTypeUInt16>();
@ -36,7 +37,8 @@ DataTypePtr FieldToDataType::operator() (const UInt64 & x) const
return std::make_shared<DataTypeUInt64>();
}
DataTypePtr FieldToDataType::operator() (const Int64 & x) const
template <LeastSupertypeOnError on_error>
DataTypePtr FieldToDataType<on_error>::operator() (const Int64 & x) const
{
if (x <= std::numeric_limits<Int8>::max() && x >= std::numeric_limits<Int8>::min()) return std::make_shared<DataTypeInt8>();
if (x <= std::numeric_limits<Int16>::max() && x >= std::numeric_limits<Int16>::min()) return std::make_shared<DataTypeInt16>();
@ -44,77 +46,90 @@ DataTypePtr FieldToDataType::operator() (const Int64 & x) const
return std::make_shared<DataTypeInt64>();
}
DataTypePtr FieldToDataType::operator() (const Float64 &) const
template <LeastSupertypeOnError on_error>
DataTypePtr FieldToDataType<on_error>::operator() (const Float64 &) const
{
return std::make_shared<DataTypeFloat64>();
}
DataTypePtr FieldToDataType::operator() (const UInt128 &) const
template <LeastSupertypeOnError on_error>
DataTypePtr FieldToDataType<on_error>::operator() (const UInt128 &) const
{
return std::make_shared<DataTypeUInt128>();
}
DataTypePtr FieldToDataType::operator() (const Int128 &) const
template <LeastSupertypeOnError on_error>
DataTypePtr FieldToDataType<on_error>::operator() (const Int128 &) const
{
return std::make_shared<DataTypeInt128>();
}
DataTypePtr FieldToDataType::operator() (const UInt256 &) const
template <LeastSupertypeOnError on_error>
DataTypePtr FieldToDataType<on_error>::operator() (const UInt256 &) const
{
return std::make_shared<DataTypeUInt256>();
}
DataTypePtr FieldToDataType::operator() (const Int256 &) const
template <LeastSupertypeOnError on_error>
DataTypePtr FieldToDataType<on_error>::operator() (const Int256 &) const
{
return std::make_shared<DataTypeInt256>();
}
DataTypePtr FieldToDataType::operator() (const UUID &) const
template <LeastSupertypeOnError on_error>
DataTypePtr FieldToDataType<on_error>::operator() (const UUID &) const
{
return std::make_shared<DataTypeUUID>();
}
DataTypePtr FieldToDataType::operator() (const String &) const
template <LeastSupertypeOnError on_error>
DataTypePtr FieldToDataType<on_error>::operator() (const String &) const
{
return std::make_shared<DataTypeString>();
}
DataTypePtr FieldToDataType::operator() (const DecimalField<Decimal32> & x) const
template <LeastSupertypeOnError on_error>
DataTypePtr FieldToDataType<on_error>::operator() (const DecimalField<Decimal32> & x) const
{
using Type = DataTypeDecimal<Decimal32>;
return std::make_shared<Type>(Type::maxPrecision(), x.getScale());
}
DataTypePtr FieldToDataType::operator() (const DecimalField<Decimal64> & x) const
template <LeastSupertypeOnError on_error>
DataTypePtr FieldToDataType<on_error>::operator() (const DecimalField<Decimal64> & x) const
{
using Type = DataTypeDecimal<Decimal64>;
return std::make_shared<Type>(Type::maxPrecision(), x.getScale());
}
DataTypePtr FieldToDataType::operator() (const DecimalField<Decimal128> & x) const
template <LeastSupertypeOnError on_error>
DataTypePtr FieldToDataType<on_error>::operator() (const DecimalField<Decimal128> & x) const
{
using Type = DataTypeDecimal<Decimal128>;
return std::make_shared<Type>(Type::maxPrecision(), x.getScale());
}
DataTypePtr FieldToDataType::operator() (const DecimalField<Decimal256> & x) const
template <LeastSupertypeOnError on_error>
DataTypePtr FieldToDataType<on_error>::operator() (const DecimalField<Decimal256> & x) const
{
using Type = DataTypeDecimal<Decimal256>;
return std::make_shared<Type>(Type::maxPrecision(), x.getScale());
}
DataTypePtr FieldToDataType::operator() (const Array & x) const
template <LeastSupertypeOnError on_error>
DataTypePtr FieldToDataType<on_error>::operator() (const Array & x) const
{
DataTypes element_types;
element_types.reserve(x.size());
for (const Field & elem : x)
element_types.emplace_back(applyVisitor(FieldToDataType(allow_convertion_to_string), elem));
element_types.emplace_back(applyVisitor(*this, elem));
return std::make_shared<DataTypeArray>(getLeastSupertype(element_types, allow_convertion_to_string));
return std::make_shared<DataTypeArray>(getLeastSupertype<on_error>(element_types));
}
DataTypePtr FieldToDataType::operator() (const Tuple & tuple) const
template <LeastSupertypeOnError on_error>
DataTypePtr FieldToDataType<on_error>::operator() (const Tuple & tuple) const
{
if (tuple.empty())
throw Exception("Cannot infer type of an empty tuple", ErrorCodes::EMPTY_DATA_PASSED);
@ -123,12 +138,13 @@ DataTypePtr FieldToDataType::operator() (const Tuple & tuple) const
element_types.reserve(tuple.size());
for (const auto & element : tuple)
element_types.push_back(applyVisitor(FieldToDataType(allow_convertion_to_string), element));
element_types.push_back(applyVisitor(*this, element));
return std::make_shared<DataTypeTuple>(element_types);
}
DataTypePtr FieldToDataType::operator() (const Map & map) const
template <LeastSupertypeOnError on_error>
DataTypePtr FieldToDataType<on_error>::operator() (const Map & map) const
{
DataTypes key_types;
DataTypes value_types;
@ -139,30 +155,37 @@ DataTypePtr FieldToDataType::operator() (const Map & map) const
{
const auto & tuple = elem.safeGet<const Tuple &>();
assert(tuple.size() == 2);
key_types.push_back(applyVisitor(FieldToDataType(allow_convertion_to_string), tuple[0]));
value_types.push_back(applyVisitor(FieldToDataType(allow_convertion_to_string), tuple[1]));
key_types.push_back(applyVisitor(*this, tuple[0]));
value_types.push_back(applyVisitor(*this, tuple[1]));
}
return std::make_shared<DataTypeMap>(
getLeastSupertype(key_types, allow_convertion_to_string),
getLeastSupertype(value_types, allow_convertion_to_string));
getLeastSupertype<on_error>(key_types),
getLeastSupertype<on_error>(value_types));
}
DataTypePtr FieldToDataType::operator() (const Object &) const
template <LeastSupertypeOnError on_error>
DataTypePtr FieldToDataType<on_error>::operator() (const Object &) const
{
/// TODO: Do we need different parameters for type Object?
return std::make_shared<DataTypeObject>("json", false);
}
DataTypePtr FieldToDataType::operator() (const AggregateFunctionStateData & x) const
template <LeastSupertypeOnError on_error>
DataTypePtr FieldToDataType<on_error>::operator() (const AggregateFunctionStateData & x) const
{
const auto & name = static_cast<const AggregateFunctionStateData &>(x).name;
return DataTypeFactory::instance().get(name);
}
DataTypePtr FieldToDataType::operator()(const bool &) const
template <LeastSupertypeOnError on_error>
DataTypePtr FieldToDataType<on_error>::operator()(const bool &) const
{
return DataTypeFactory::instance().get("Bool");
}
template class FieldToDataType<LeastSupertypeOnError::Throw>;
template class FieldToDataType<LeastSupertypeOnError::String>;
template class FieldToDataType<LeastSupertypeOnError::Null>;
}

View File

@ -4,6 +4,7 @@
#include <Core/Types.h>
#include <Core/Field.h>
#include <Common/FieldVisitors.h>
#include <DataTypes/getLeastSupertype.h>
namespace DB
@ -17,14 +18,10 @@ using DataTypePtr = std::shared_ptr<const IDataType>;
* Note that you still have to convert Field to corresponding data type before inserting to columns
* (for example, this is necessary to convert elements of Array to common type).
*/
template <LeastSupertypeOnError on_error = LeastSupertypeOnError::Throw>
class FieldToDataType : public StaticVisitor<DataTypePtr>
{
public:
FieldToDataType(bool allow_convertion_to_string_ = false)
: allow_convertion_to_string(allow_convertion_to_string_)
{
}
DataTypePtr operator() (const Null & x) const;
DataTypePtr operator() (const UInt64 & x) const;
DataTypePtr operator() (const UInt128 & x) const;
@ -45,9 +42,6 @@ public:
DataTypePtr operator() (const UInt256 & x) const;
DataTypePtr operator() (const Int256 & x) const;
DataTypePtr operator() (const bool & x) const;
private:
bool allow_convertion_to_string;
};
}

View File

@ -565,4 +565,31 @@ class DataTypeEnum;
template <typename T> inline constexpr bool IsDataTypeEnum<DataTypeEnum<T>> = true;
#define FOR_BASIC_NUMERIC_TYPES(M) \
M(UInt8) \
M(UInt16) \
M(UInt32) \
M(UInt64) \
M(Int8) \
M(Int16) \
M(Int32) \
M(Int64) \
M(Float32) \
M(Float64)
#define FOR_NUMERIC_TYPES(M) \
M(UInt8) \
M(UInt16) \
M(UInt32) \
M(UInt64) \
M(UInt128) \
M(UInt256) \
M(Int8) \
M(Int16) \
M(Int32) \
M(Int64) \
M(Int128) \
M(Int256) \
M(Float32) \
M(Float64)
}

View File

@ -261,7 +261,7 @@ DataTypePtr getLeastCommonTypeForObject(const DataTypes & types, bool check_ambi
key.getPath(), subtypes[0]->getName(), subtypes[i]->getName());
tuple_paths.emplace_back(key);
tuple_types.emplace_back(getLeastSupertype(subtypes, /*allow_conversion_to_string=*/ true));
tuple_types.emplace_back(getLeastSupertypeOrString(subtypes));
}
if (tuple_paths.empty())

View File

@ -61,29 +61,23 @@ void SerializationObject<Parser>::deserializeTextImpl(IColumn & column, Reader &
auto & [paths, values] = *result;
assert(paths.size() == values.size());
HashSet<StringRef, StringRefHash> paths_set;
size_t column_size = column_object.size();
size_t old_column_size = column_object.size();
for (size_t i = 0; i < paths.size(); ++i)
{
auto field_info = getFieldInfo(values[i]);
if (isNothing(field_info.scalar_type))
continue;
if (!paths_set.insert(paths[i].getPath()).second)
throw Exception(ErrorCodes::INCORRECT_DATA,
"Object has ambiguous path: {}", paths[i].getPath());
if (!column_object.hasSubcolumn(paths[i]))
{
if (paths[i].hasNested())
column_object.addNestedSubcolumn(paths[i], field_info, column_size);
column_object.addNestedSubcolumn(paths[i], field_info, old_column_size);
else
column_object.addSubcolumn(paths[i], column_size);
column_object.addSubcolumn(paths[i], old_column_size);
}
auto & subcolumn = column_object.getSubcolumn(paths[i]);
assert(subcolumn.size() == column_size);
assert(subcolumn.size() == old_column_size);
subcolumn.insert(std::move(values[i]), std::move(field_info));
}
@ -92,7 +86,7 @@ void SerializationObject<Parser>::deserializeTextImpl(IColumn & column, Reader &
const auto & subcolumns = column_object.getSubcolumns();
for (const auto & entry : subcolumns)
{
if (!paths_set.has(entry->path.getPath()))
if (entry->data.size() == old_column_size)
{
bool inserted = column_object.tryInsertDefaultFromNested(entry);
if (!inserted)

View File

@ -55,16 +55,24 @@ String getExceptionMessagePrefix(const DataTypes & types)
return res.str();
}
DataTypePtr getNumericType(const TypeIndexSet & types, bool allow_conversion_to_string)
template <LeastSupertypeOnError on_error, typename DataTypes>
DataTypePtr throwOrReturn(const DataTypes & types, std::string_view message_suffix, int error_code)
{
auto throw_or_return = [&](std::string_view message, int error_code)
{
if (allow_conversion_to_string)
return std::make_shared<DataTypeString>();
if constexpr (on_error == LeastSupertypeOnError::String)
return std::make_shared<DataTypeString>();
throw Exception(String(message), error_code);
};
if constexpr (on_error == LeastSupertypeOnError::Null)
return nullptr;
if (message_suffix.empty())
throw Exception(error_code, getExceptionMessagePrefix(types));
throw Exception(error_code, "{} {}", getExceptionMessagePrefix(types), message_suffix);
}
template <LeastSupertypeOnError on_error>
DataTypePtr getNumericType(const TypeIndexSet & types)
{
bool all_numbers = true;
size_t max_bits_of_signed_integer = 0;
@ -107,14 +115,14 @@ DataTypePtr getNumericType(const TypeIndexSet & types, bool allow_conversion_to_
maximize(max_mantissa_bits_of_floating, 24);
else if (type == TypeIndex::Float64)
maximize(max_mantissa_bits_of_floating, 53);
else
else if (type != TypeIndex::Nothing)
all_numbers = false;
}
if (max_bits_of_signed_integer || max_bits_of_unsigned_integer || max_mantissa_bits_of_floating)
{
if (!all_numbers)
return throw_or_return(getExceptionMessagePrefix(types) + " because some of them are numbers and some of them are not", ErrorCodes::NO_COMMON_TYPE);
return throwOrReturn<on_error>(types, "because some of them are numbers and some of them are not", ErrorCodes::NO_COMMON_TYPE);
/// If there are signed and unsigned types of same bit-width, the result must be signed number with at least one more bit.
/// Example, common of Int32, UInt32 = Int64.
@ -129,10 +137,9 @@ DataTypePtr getNumericType(const TypeIndexSet & types, bool allow_conversion_to_
if (min_bit_width_of_integer != 64)
++min_bit_width_of_integer;
else
return throw_or_return(
getExceptionMessagePrefix(types)
+ " because some of them are signed integers and some are unsigned integers,"
" but there is no signed integer type, that can exactly represent all required unsigned integer values",
return throwOrReturn<on_error>(types,
"because some of them are signed integers and some are unsigned integers,"
" but there is no signed integer type, that can exactly represent all required unsigned integer values",
ErrorCodes::NO_COMMON_TYPE);
}
@ -145,8 +152,8 @@ DataTypePtr getNumericType(const TypeIndexSet & types, bool allow_conversion_to_
else if (min_mantissa_bits <= 53)
return std::make_shared<DataTypeFloat64>();
else
return throw_or_return(getExceptionMessagePrefix(types)
+ " because some of them are integers and some are floating point,"
return throwOrReturn<on_error>(types,
" because some of them are integers and some are floating point,"
" but there is no floating point type, that can exactly represent all required integers", ErrorCodes::NO_COMMON_TYPE);
}
@ -166,8 +173,8 @@ DataTypePtr getNumericType(const TypeIndexSet & types, bool allow_conversion_to_
else if (min_bit_width_of_integer <= 256)
return std::make_shared<DataTypeInt256>();
else
return throw_or_return(getExceptionMessagePrefix(types)
+ " because some of them are signed integers and some are unsigned integers,"
return throwOrReturn<on_error>(types,
" because some of them are signed integers and some are unsigned integers,"
" but there is no signed integer type, that can exactly represent all required unsigned integer values", ErrorCodes::NO_COMMON_TYPE);
}
@ -186,9 +193,8 @@ DataTypePtr getNumericType(const TypeIndexSet & types, bool allow_conversion_to_
else if (min_bit_width_of_integer <= 256)
return std::make_shared<DataTypeUInt256>();
else
return throw_or_return("Logical error: " + getExceptionMessagePrefix(types)
+ " but as all data types are unsigned integers, we must have found maximum unsigned integer type", ErrorCodes::NO_COMMON_TYPE);
return throwOrReturn<on_error>(types,
" but as all data types are unsigned integers, we must have found maximum unsigned integer type", ErrorCodes::NO_COMMON_TYPE);
}
}
@ -197,16 +203,9 @@ DataTypePtr getNumericType(const TypeIndexSet & types, bool allow_conversion_to_
}
DataTypePtr getLeastSupertype(const DataTypes & types, bool allow_conversion_to_string)
template <LeastSupertypeOnError on_error>
DataTypePtr getLeastSupertype(const DataTypes & types)
{
auto throw_or_return = [&](std::string_view message, int error_code)
{
if (allow_conversion_to_string)
return std::make_shared<DataTypeString>();
throw Exception(String(message), error_code);
};
/// Trivial cases
if (types.empty())
@ -243,7 +242,7 @@ DataTypePtr getLeastSupertype(const DataTypes & types, bool allow_conversion_to_
non_nothing_types.emplace_back(type);
if (non_nothing_types.size() < types.size())
return getLeastSupertype(non_nothing_types, allow_conversion_to_string);
return getLeastSupertype<on_error>(non_nothing_types);
}
/// For Arrays
@ -268,9 +267,9 @@ DataTypePtr getLeastSupertype(const DataTypes & types, bool allow_conversion_to_
if (have_array)
{
if (!all_arrays)
return throw_or_return(getExceptionMessagePrefix(types) + " because some of them are Array and some of them are not", ErrorCodes::NO_COMMON_TYPE);
return throwOrReturn<on_error>(types, "because some of them are Array and some of them are not", ErrorCodes::NO_COMMON_TYPE);
return std::make_shared<DataTypeArray>(getLeastSupertype(nested_types, allow_conversion_to_string));
return std::make_shared<DataTypeArray>(getLeastSupertype<on_error>(nested_types));
}
}
@ -294,7 +293,7 @@ DataTypePtr getLeastSupertype(const DataTypes & types, bool allow_conversion_to_
nested_types[elem_idx].reserve(types.size());
}
else if (tuple_size != type_tuple->getElements().size())
return throw_or_return(getExceptionMessagePrefix(types) + " because Tuples have different sizes", ErrorCodes::NO_COMMON_TYPE);
return throwOrReturn<on_error>(types, "because Tuples have different sizes", ErrorCodes::NO_COMMON_TYPE);
have_tuple = true;
@ -308,11 +307,11 @@ DataTypePtr getLeastSupertype(const DataTypes & types, bool allow_conversion_to_
if (have_tuple)
{
if (!all_tuples)
return throw_or_return(getExceptionMessagePrefix(types) + " because some of them are Tuple and some of them are not", ErrorCodes::NO_COMMON_TYPE);
return throwOrReturn<on_error>(types, "because some of them are Tuple and some of them are not", ErrorCodes::NO_COMMON_TYPE);
DataTypes common_tuple_types(tuple_size);
for (size_t elem_idx = 0; elem_idx < tuple_size; ++elem_idx)
common_tuple_types[elem_idx] = getLeastSupertype(nested_types[elem_idx], allow_conversion_to_string);
common_tuple_types[elem_idx] = getLeastSupertype<on_error>(nested_types[elem_idx]);
return std::make_shared<DataTypeTuple>(common_tuple_types);
}
@ -342,11 +341,11 @@ DataTypePtr getLeastSupertype(const DataTypes & types, bool allow_conversion_to_
if (have_maps)
{
if (!all_maps)
return throw_or_return(getExceptionMessagePrefix(types) + " because some of them are Maps and some of them are not", ErrorCodes::NO_COMMON_TYPE);
return throwOrReturn<on_error>(types, "because some of them are Maps and some of them are not", ErrorCodes::NO_COMMON_TYPE);
return std::make_shared<DataTypeMap>(
getLeastSupertype(key_types, allow_conversion_to_string),
getLeastSupertype(value_types, allow_conversion_to_string));
getLeastSupertype<on_error>(key_types),
getLeastSupertype<on_error>(value_types));
}
}
@ -377,9 +376,9 @@ DataTypePtr getLeastSupertype(const DataTypes & types, bool allow_conversion_to_
if (have_low_cardinality)
{
if (have_not_low_cardinality)
return getLeastSupertype(nested_types, allow_conversion_to_string);
return getLeastSupertype<on_error>(nested_types);
else
return std::make_shared<DataTypeLowCardinality>(getLeastSupertype(nested_types, allow_conversion_to_string));
return std::make_shared<DataTypeLowCardinality>(getLeastSupertype<on_error>(nested_types));
}
}
@ -405,7 +404,7 @@ DataTypePtr getLeastSupertype(const DataTypes & types, bool allow_conversion_to_
if (have_nullable)
{
return std::make_shared<DataTypeNullable>(getLeastSupertype(nested_types, allow_conversion_to_string));
return std::make_shared<DataTypeNullable>(getLeastSupertype<on_error>(nested_types));
}
}
@ -425,7 +424,7 @@ DataTypePtr getLeastSupertype(const DataTypes & types, bool allow_conversion_to_
{
bool all_strings = type_ids.size() == (have_string + have_fixed_string);
if (!all_strings)
return throw_or_return(getExceptionMessagePrefix(types) + " because some of them are String/FixedString and some of them are not", ErrorCodes::NO_COMMON_TYPE);
return throwOrReturn<on_error>(types, "because some of them are String/FixedString and some of them are not", ErrorCodes::NO_COMMON_TYPE);
return std::make_shared<DataTypeString>();
}
@ -442,8 +441,8 @@ DataTypePtr getLeastSupertype(const DataTypes & types, bool allow_conversion_to_
{
bool all_date_or_datetime = type_ids.size() == (have_date + have_date32 + have_datetime + have_datetime64);
if (!all_date_or_datetime)
return throw_or_return(getExceptionMessagePrefix(types)
+ " because some of them are Date/Date32/DateTime/DateTime64 and some of them are not",
return throwOrReturn<on_error>(types,
"because some of them are Date/Date32/DateTime/DateTime64 and some of them are not",
ErrorCodes::NO_COMMON_TYPE);
if (have_datetime64 == 0 && have_date32 == 0)
@ -520,8 +519,7 @@ DataTypePtr getLeastSupertype(const DataTypes & types, bool allow_conversion_to_
}
if (num_supported != type_ids.size())
return throw_or_return(getExceptionMessagePrefix(types) + " because some of them have no lossless conversion to Decimal",
ErrorCodes::NO_COMMON_TYPE);
return throwOrReturn<on_error>(types, "because some of them have no lossless conversion to Decimal", ErrorCodes::NO_COMMON_TYPE);
UInt32 max_scale = 0;
for (const auto & type : types)
@ -543,7 +541,7 @@ DataTypePtr getLeastSupertype(const DataTypes & types, bool allow_conversion_to_
}
if (min_precision > DataTypeDecimal<Decimal128>::maxPrecision())
return throw_or_return(getExceptionMessagePrefix(types) + " because the least supertype is Decimal("
return throwOrReturn<on_error>(types, "because the least supertype is Decimal("
+ toString(min_precision) + ',' + toString(max_scale) + ')',
ErrorCodes::NO_COMMON_TYPE);
@ -557,68 +555,77 @@ DataTypePtr getLeastSupertype(const DataTypes & types, bool allow_conversion_to_
/// For numeric types, the most complicated part.
{
auto numeric_type = getNumericType(type_ids, allow_conversion_to_string);
auto numeric_type = getNumericType<on_error>(type_ids);
if (numeric_type)
return numeric_type;
}
/// All other data types (UUID, AggregateFunction, Enum...) are compatible only if they are the same (checked in trivial cases).
return throw_or_return(getExceptionMessagePrefix(types), ErrorCodes::NO_COMMON_TYPE);
return throwOrReturn<on_error>(types, "", ErrorCodes::NO_COMMON_TYPE);
}
DataTypePtr getLeastSupertype(const TypeIndexSet & types, bool allow_conversion_to_string)
DataTypePtr getLeastSupertypeOrString(const DataTypes & types)
{
auto throw_or_return = [&](std::string_view message, int error_code)
{
if (allow_conversion_to_string)
return std::make_shared<DataTypeString>();
throw Exception(String(message), error_code);
};
TypeIndexSet types_set;
for (const auto & type : types)
{
if (WhichDataType(type).isNothing())
continue;
if (!WhichDataType(type).isSimple())
throw Exception(ErrorCodes::NO_COMMON_TYPE,
"Cannot get common type by type ids with parametric type {}", typeToString(type));
types_set.insert(type);
}
if (types_set.empty())
return std::make_shared<DataTypeNothing>();
if (types.contains(TypeIndex::String))
{
if (types.size() != 1)
return throw_or_return(getExceptionMessagePrefix(types) + " because some of them are String and some of them are not", ErrorCodes::NO_COMMON_TYPE);
return std::make_shared<DataTypeString>();
}
/// For numeric types, the most complicated part.
auto numeric_type = getNumericType(types, allow_conversion_to_string);
if (numeric_type)
return numeric_type;
/// All other data types (UUID, AggregateFunction, Enum...) are compatible only if they are the same (checked in trivial cases).
return throw_or_return(getExceptionMessagePrefix(types), ErrorCodes::NO_COMMON_TYPE);
return getLeastSupertype<LeastSupertypeOnError::String>(types);
}
DataTypePtr tryGetLeastSupertype(const DataTypes & types)
{
try
{
return getLeastSupertype(types);
}
catch (...)
{
return nullptr;
}
return getLeastSupertype<LeastSupertypeOnError::Null>(types);
}
template <LeastSupertypeOnError on_error>
DataTypePtr getLeastSupertype(const TypeIndexSet & types)
{
if (types.empty())
return std::make_shared<DataTypeNothing>();
if (types.size() == 1)
{
WhichDataType which(*types.begin());
if (which.isNothing())
return std::make_shared<DataTypeNothing>();
#define DISPATCH(TYPE) \
if (which.idx == TypeIndex::TYPE) \
return std::make_shared<DataTypeNumber<TYPE>>(); /// NOLINT
FOR_NUMERIC_TYPES(DISPATCH)
#undef DISPATCH
if (which.isString())
return std::make_shared<DataTypeString>();
return throwOrReturn<on_error>(types, "because cannot get common type by type indexes with non-simple types", ErrorCodes::NO_COMMON_TYPE);
}
if (types.contains(TypeIndex::String))
{
bool only_string = types.size() == 2 && types.contains(TypeIndex::Nothing);
if (!only_string)
return throwOrReturn<on_error>(types, "because some of them are String and some of them are not", ErrorCodes::NO_COMMON_TYPE);
return std::make_shared<DataTypeString>();
}
auto numeric_type = getNumericType<on_error>(types);
if (numeric_type)
return numeric_type;
return throwOrReturn<on_error>(types, "", ErrorCodes::NO_COMMON_TYPE);
}
DataTypePtr getLeastSupertypeOrString(const TypeIndexSet & types)
{
return getLeastSupertype<LeastSupertypeOnError::String>(types);
}
DataTypePtr tryGetLeastSupertype(const TypeIndexSet & types)
{
return getLeastSupertype<LeastSupertypeOnError::Null>(types);
}
template DataTypePtr getLeastSupertype<LeastSupertypeOnError::Throw>(const DataTypes & types);
template DataTypePtr getLeastSupertype<LeastSupertypeOnError::Throw>(const TypeIndexSet & types);
}

View File

@ -1,24 +1,39 @@
#pragma once
#include <DataTypes/IDataType.h>
namespace DB
{
enum class LeastSupertypeOnError
{
Throw,
String,
Null,
};
/** Get data type that covers all possible values of passed data types.
* If there is no such data type, throws an exception
* or if 'allow_conversion_to_string' is true returns String as common type.
* If there is no such data type, throws an exception.
*
* Examples: least common supertype for UInt8, Int8 - Int16.
* Examples: there is no least common supertype for Array(UInt8), Int8.
*/
DataTypePtr getLeastSupertype(const DataTypes & types, bool allow_conversion_to_string = false);
template <LeastSupertypeOnError on_error = LeastSupertypeOnError::Throw>
DataTypePtr getLeastSupertype(const DataTypes & types);
using TypeIndexSet = std::unordered_set<TypeIndex>;
DataTypePtr getLeastSupertype(const TypeIndexSet & types, bool allow_conversion_to_string = false);
/// Same as above but return String type instead of throwing exception.
/// All types can be casted to String, because they can be serialized to String.
DataTypePtr getLeastSupertypeOrString(const DataTypes & types);
/// Same as above but return nullptr instead of throwing exception.
DataTypePtr tryGetLeastSupertype(const DataTypes & types);
using TypeIndexSet = std::unordered_set<TypeIndex>;
template <LeastSupertypeOnError on_error = LeastSupertypeOnError::Throw>
DataTypePtr getLeastSupertype(const TypeIndexSet & types);
DataTypePtr getLeastSupertypeOrString(const TypeIndexSet & types);
DataTypePtr tryGetLeastSupertype(const TypeIndexSet & types);
}

View File

@ -171,6 +171,11 @@ void TablesLoader::removeUnresolvableDependencies(bool remove_loaded)
void TablesLoader::loadTablesInTopologicalOrder(ThreadPool & pool)
{
/// Compatibility setting which should be enabled by default on attach
/// Otherwise server will be unable to start for some old-format of IPv6/IPv4 types of columns
ContextMutablePtr load_context = Context::createCopy(global_context);
load_context->setSetting("cast_ipv4_ipv6_default_on_conversion_error", 1);
/// Load independent tables in parallel.
/// Then remove loaded tables from dependency graph, find tables/dictionaries that do not have unresolved dependencies anymore,
/// move them to the list of independent tables and load.
@ -183,7 +188,7 @@ void TablesLoader::loadTablesInTopologicalOrder(ThreadPool & pool)
assert(metadata.parsed_tables.size() == tables_processed + metadata.independent_database_objects.size() + getNumberOfTablesWithDependencies());
logDependencyGraph();
startLoadingIndependentTables(pool, level);
startLoadingIndependentTables(pool, level, load_context);
TableNames new_independent_database_objects;
for (const auto & table_name : metadata.independent_database_objects)
@ -237,7 +242,7 @@ DependenciesInfosIter TablesLoader::removeResolvedDependency(const DependenciesI
return metadata.dependencies_info.erase(info_it);
}
void TablesLoader::startLoadingIndependentTables(ThreadPool & pool, size_t level)
void TablesLoader::startLoadingIndependentTables(ThreadPool & pool, size_t level, ContextMutablePtr load_context)
{
size_t total_tables = metadata.parsed_tables.size();
@ -245,10 +250,10 @@ void TablesLoader::startLoadingIndependentTables(ThreadPool & pool, size_t level
for (const auto & table_name : metadata.independent_database_objects)
{
pool.scheduleOrThrowOnError([this, total_tables, &table_name]()
pool.scheduleOrThrowOnError([this, load_context, total_tables, &table_name]()
{
const auto & path_and_query = metadata.parsed_tables[table_name];
databases[table_name.database]->loadTableFromMetadata(global_context, path_and_query.path, table_name, path_and_query.ast, force_restore);
databases[table_name.database]->loadTableFromMetadata(load_context, path_and_query.path, table_name, path_and_query.ast, force_restore);
logAboutProgress(log, ++tables_processed, total_tables, stopwatch);
});
}

View File

@ -104,7 +104,7 @@ private:
DependenciesInfosIter removeResolvedDependency(const DependenciesInfosIter & info_it, TableNames & independent_database_objects);
void startLoadingIndependentTables(ThreadPool & pool, size_t level);
void startLoadingIndependentTables(ThreadPool & pool, size_t level, ContextMutablePtr load_context);
void checkCyclicDependencies() const;

View File

@ -190,7 +190,7 @@ private:
/// Dictionary source should be used with mutex
mutable std::mutex source_mutex;
mutable DictionarySourcePtr source_ptr;
mutable DictionarySourcePtr source_ptr TSA_GUARDED_BY(source_mutex);
CacheDictionaryStoragePtr cache_storage_ptr;
mutable CacheDictionaryUpdateQueue<dictionary_key_type> update_queue;

View File

@ -68,9 +68,9 @@ void CacheDictionaryUpdateQueue<dictionary_key_type>::waitForCurrentUpdateFinish
if (update_queue.isFinished())
throw Exception(ErrorCodes::UNSUPPORTED_METHOD, "CacheDictionaryUpdateQueue finished");
std::unique_lock<std::mutex> update_lock(update_mutex);
std::unique_lock<std::mutex> update_lock(update_unit_ptr->update_mutex);
bool result = is_update_finished.wait_for(
bool result = update_unit_ptr->is_update_finished.wait_for(
update_lock,
std::chrono::milliseconds(configuration.query_wait_timeout_milliseconds),
[&]
@ -133,19 +133,23 @@ void CacheDictionaryUpdateQueue<dictionary_key_type>::updateThreadFunction()
/// Update
update_func(unit_to_update);
/// Notify thread about finished updating the bunch of ids
/// where their own ids were included.
std::lock_guard lock(update_mutex);
{
/// Notify thread about finished updating the bunch of ids
/// where their own ids were included.
std::lock_guard lock(unit_to_update->update_mutex);
unit_to_update->is_done = true;
}
unit_to_update->is_done = true;
is_update_finished.notify_all();
unit_to_update->is_update_finished.notify_all();
}
catch (...)
{
std::lock_guard lock(update_mutex);
{
std::lock_guard lock(unit_to_update->update_mutex);
unit_to_update->current_exception = std::current_exception(); // NOLINT(bugprone-throw-keyword-missing)
}
unit_to_update->current_exception = std::current_exception(); // NOLINT(bugprone-throw-keyword-missing)
is_update_finished.notify_all();
unit_to_update->is_update_finished.notify_all();
}
}
}

View File

@ -74,7 +74,10 @@ private:
template <DictionaryKeyType>
friend class CacheDictionaryUpdateQueue;
std::atomic<bool> is_done{false};
mutable std::mutex update_mutex;
mutable std::condition_variable is_update_finished;
bool is_done{false};
std::exception_ptr current_exception{nullptr}; /// NOLINT
/// While UpdateUnit is alive, it is accounted in update_queue size.
@ -159,9 +162,6 @@ private:
UpdateQueue update_queue;
ThreadPool update_pool;
mutable std::mutex update_mutex;
mutable std::condition_variable is_update_finished;
};
extern template class CacheDictionaryUpdateQueue<DictionaryKeyType::Simple>;

View File

@ -194,12 +194,8 @@ QueryPipeline CassandraDictionarySource::loadUpdatedAll()
CassSessionShared CassandraDictionarySource::getSession()
{
/// Reuse connection if exists, create new one if not
auto session = maybe_session.lock();
if (session)
return session;
std::lock_guard lock(connect_mutex);
session = maybe_session.lock();
auto session = maybe_session.lock();
if (session)
return session;

View File

@ -82,9 +82,10 @@ private:
Block sample_block;
ExternalQueryBuilder query_builder;
std::mutex connect_mutex;
CassClusterPtr cluster;
CassSessionWeak maybe_session;
std::mutex connect_mutex;
CassSessionWeak maybe_session TSA_GUARDED_BY(connect_mutex);
};
}

View File

@ -62,26 +62,26 @@ public:
std::string getFullName() const
{
std::lock_guard lock{name_mutex};
std::lock_guard lock{mutex};
return dictionary_id.getNameForLogs();
}
StorageID getDictionaryID() const
{
std::lock_guard lock{name_mutex};
std::lock_guard lock{mutex};
return dictionary_id;
}
void updateDictionaryName(const StorageID & new_name) const
{
std::lock_guard lock{name_mutex};
std::lock_guard lock{mutex};
assert(new_name.uuid == dictionary_id.uuid && dictionary_id.uuid != UUIDHelpers::Nil);
dictionary_id = new_name;
}
std::string getLoadableName() const final
{
std::lock_guard lock{name_mutex};
std::lock_guard lock{mutex};
return dictionary_id.getInternalDictionaryName();
}
@ -92,6 +92,8 @@ public:
std::string getDatabaseOrNoDatabaseTag() const
{
std::lock_guard lock{mutex};
if (!dictionary_id.database_name.empty())
return dictionary_id.database_name;
@ -278,22 +280,20 @@ public:
void setDictionaryComment(String new_comment)
{
std::lock_guard lock{name_mutex};
std::lock_guard lock{mutex};
dictionary_comment = std::move(new_comment);
}
String getDictionaryComment() const
{
std::lock_guard lock{name_mutex};
std::lock_guard lock{mutex};
return dictionary_comment;
}
private:
mutable std::mutex name_mutex;
mutable StorageID dictionary_id;
protected:
String dictionary_comment;
mutable std::mutex mutex;
mutable StorageID dictionary_id TSA_GUARDED_BY(mutex);
String dictionary_comment TSA_GUARDED_BY(mutex);
};
}

View File

@ -40,6 +40,7 @@ void registerFunctionIsNaN(FunctionFactory &);
void registerFunctionIfNotFinite(FunctionFactory &);
void registerFunctionThrowIf(FunctionFactory &);
void registerFunctionVersion(FunctionFactory &);
void registerFunctionRevision(FunctionFactory &);
void registerFunctionBuildId(FunctionFactory &);
void registerFunctionUptime(FunctionFactory &);
void registerFunctionTimezone(FunctionFactory &);
@ -129,6 +130,7 @@ void registerFunctionsMiscellaneous(FunctionFactory & factory)
registerFunctionIfNotFinite(factory);
registerFunctionThrowIf(factory);
registerFunctionVersion(factory);
registerFunctionRevision(factory);
registerFunctionBuildId(factory);
registerFunctionUptime(factory);
registerFunctionTimezone(factory);

View File

@ -6,6 +6,7 @@
#include <Common/SymbolIndex.h>
#include <Common/DNSResolver.h>
#include <Common/DateLUT.h>
#include <Common/ClickHouseRevision.h>
#if defined(OS_LINUX)
# include <Poco/Environment.h>
@ -88,6 +89,15 @@ namespace
explicit FunctionVersion(ContextPtr context) : FunctionConstantBase(VERSION_STRING, context->isDistributed()) {}
};
/// revision() - returns the current revision.
class FunctionRevision : public FunctionConstantBase<FunctionRevision, UInt32, DataTypeUInt32>
{
public:
static constexpr auto name = "revision";
static FunctionPtr create(ContextPtr context) { return std::make_shared<FunctionRevision>(context); }
explicit FunctionRevision(ContextPtr context) : FunctionConstantBase(ClickHouseRevision::getVersionRevision(), context->isDistributed()) {}
};
class FunctionZooKeeperSessionUptime : public FunctionConstantBase<FunctionZooKeeperSessionUptime, UInt32, DataTypeUInt32>
{
public:
@ -151,6 +161,11 @@ void registerFunctionVersion(FunctionFactory & factory)
factory.registerFunction<FunctionVersion>(FunctionFactory::CaseInsensitive);
}
void registerFunctionRevision(FunctionFactory & factory)
{
factory.registerFunction<FunctionRevision>(FunctionFactory::CaseInsensitive);
}
void registerFunctionZooKeeperSessionUptime(FunctionFactory & factory)
{
factory.registerFunction<FunctionZooKeeperSessionUptime>();

View File

@ -20,6 +20,7 @@ namespace ErrorCodes
BlockIO InterpreterCreateIndexQuery::execute()
{
auto current_context = getContext();
const auto & create_index = query_ptr->as<ASTCreateIndexQuery &>();
AccessRightsElements required_access;
@ -29,23 +30,23 @@ BlockIO InterpreterCreateIndexQuery::execute()
{
DDLQueryOnClusterParams params;
params.access_to_check = std::move(required_access);
return executeDDLQueryOnCluster(query_ptr, getContext(), params);
return executeDDLQueryOnCluster(query_ptr, current_context, params);
}
getContext()->checkAccess(required_access);
auto table_id = getContext()->resolveStorageID(create_index, Context::ResolveOrdinary);
current_context->checkAccess(required_access);
auto table_id = current_context->resolveStorageID(create_index, Context::ResolveOrdinary);
query_ptr->as<ASTCreateIndexQuery &>().setDatabase(table_id.database_name);
DatabasePtr database = DatabaseCatalog::instance().getDatabase(table_id.database_name);
if (typeid_cast<DatabaseReplicated *>(database.get())
&& !getContext()->getClientInfo().is_replicated_database_internal)
&& !current_context->getClientInfo().is_replicated_database_internal)
{
auto guard = DatabaseCatalog::instance().getDDLGuard(table_id.database_name, table_id.table_name);
guard->releaseTableLock();
return typeid_cast<DatabaseReplicated *>(database.get())->tryEnqueueReplicatedDDL(query_ptr, getContext());
return assert_cast<DatabaseReplicated *>(database.get())->tryEnqueueReplicatedDDL(query_ptr, current_context);
}
StoragePtr table = DatabaseCatalog::instance().getTable(table_id, getContext());
StoragePtr table = DatabaseCatalog::instance().getTable(table_id, current_context);
if (table->isStaticStorage())
throw Exception(ErrorCodes::TABLE_IS_READ_ONLY, "Table is read-only");
@ -53,23 +54,20 @@ BlockIO InterpreterCreateIndexQuery::execute()
AlterCommands alter_commands;
AlterCommand command;
command.ast = create_index.convertToASTAlterCommand();
command.index_decl = create_index.index_decl;
command.type = AlterCommand::ADD_INDEX;
command.index_name = create_index.index_name->as<ASTIdentifier &>().name();
command.if_not_exists = create_index.if_not_exists;
/// Fill name in ASTIndexDeclaration
auto & ast_index_decl = command.index_decl->as<ASTIndexDeclaration &>();
ast_index_decl.name = command.index_name;
alter_commands.emplace_back(std::move(command));
auto alter_lock = table->lockForAlter(getContext()->getSettingsRef().lock_acquire_timeout);
auto alter_lock = table->lockForAlter(current_context->getSettingsRef().lock_acquire_timeout);
StorageInMemoryMetadata metadata = table->getInMemoryMetadata();
alter_commands.validate(table, getContext());
alter_commands.validate(table, current_context);
alter_commands.prepare(metadata);
table->checkAlterIsPossible(alter_commands, getContext());
table->alter(alter_commands, getContext(), alter_lock);
table->checkAlterIsPossible(alter_commands, current_context);
table->alter(alter_commands, current_context, alter_lock);
return {};
}

View File

@ -1038,6 +1038,10 @@ BlockIO InterpreterCreateQuery::createTable(ASTCreateQuery & create)
create.attach = true;
create.attach_short_syntax = true;
create.if_not_exists = if_not_exists;
/// Compatibility setting which should be enabled by default on attach
/// Otherwise server will be unable to start for some old-format of IPv6/IPv4 types
getContext()->setSetting("cast_ipv4_ipv6_default_on_conversion_error", 1);
}
/// TODO throw exception if !create.attach_short_syntax && !create.attach_from_path && !internal

View File

@ -18,6 +18,7 @@ namespace ErrorCodes
BlockIO InterpreterDropIndexQuery::execute()
{
auto current_context = getContext();
const auto & drop_index = query_ptr->as<ASTDropIndexQuery &>();
AccessRightsElements required_access;
@ -27,23 +28,23 @@ BlockIO InterpreterDropIndexQuery::execute()
{
DDLQueryOnClusterParams params;
params.access_to_check = std::move(required_access);
return executeDDLQueryOnCluster(query_ptr, getContext(), params);
return executeDDLQueryOnCluster(query_ptr, current_context, params);
}
getContext()->checkAccess(required_access);
auto table_id = getContext()->resolveStorageID(drop_index, Context::ResolveOrdinary);
current_context->checkAccess(required_access);
auto table_id = current_context->resolveStorageID(drop_index, Context::ResolveOrdinary);
query_ptr->as<ASTDropIndexQuery &>().setDatabase(table_id.database_name);
DatabasePtr database = DatabaseCatalog::instance().getDatabase(table_id.database_name);
if (typeid_cast<DatabaseReplicated *>(database.get())
&& !getContext()->getClientInfo().is_replicated_database_internal)
&& !current_context->getClientInfo().is_replicated_database_internal)
{
auto guard = DatabaseCatalog::instance().getDDLGuard(table_id.database_name, table_id.table_name);
guard->releaseTableLock();
return typeid_cast<DatabaseReplicated *>(database.get())->tryEnqueueReplicatedDDL(query_ptr, getContext());
return assert_cast<DatabaseReplicated *>(database.get())->tryEnqueueReplicatedDDL(query_ptr, current_context);
}
StoragePtr table = DatabaseCatalog::instance().getTable(table_id, getContext());
StoragePtr table = DatabaseCatalog::instance().getTable(table_id, current_context);
if (table->isStaticStorage())
throw Exception(ErrorCodes::TABLE_IS_READ_ONLY, "Table is read-only");
@ -58,12 +59,12 @@ BlockIO InterpreterDropIndexQuery::execute()
alter_commands.emplace_back(std::move(command));
auto alter_lock = table->lockForAlter(getContext()->getSettingsRef().lock_acquire_timeout);
auto alter_lock = table->lockForAlter(current_context->getSettingsRef().lock_acquire_timeout);
StorageInMemoryMetadata metadata = table->getInMemoryMetadata();
alter_commands.validate(table, getContext());
alter_commands.validate(table, current_context);
alter_commands.prepare(metadata);
table->checkAlterIsPossible(alter_commands, getContext());
table->alter(alter_commands, getContext(), alter_lock);
table->checkAlterIsPossible(alter_commands, current_context);
table->alter(alter_commands, current_context, alter_lock);
return {};
}

View File

@ -1,7 +1,6 @@
#include <Interpreters/RowRefs.h>
#include <Common/RadixSort.h>
#include <AggregateFunctions/Helpers.h>
#include <Columns/IColumn.h>
#include <DataTypes/IDataType.h>
#include <base/types.h>

View File

@ -1,4 +1,3 @@
#include <Parsers/ASTIndexDeclaration.h>
#include <iomanip>
#include <IO/Operators.h>
#include <Parsers/ASTAlterQuery.h>
@ -557,7 +556,6 @@ void ASTAlterQuery::formatQueryImpl(const FormatSettings & settings, FormatState
frame.need_parens = false;
std::string indent_str = settings.one_line ? "" : std::string(4u * frame.indent, ' ');
settings.ostr << (settings.hilite ? hilite_keyword : "") << indent_str;
switch (alter_object)

View File

@ -2,6 +2,7 @@
#include <IO/Operators.h>
#include <Parsers/ASTCreateIndexQuery.h>
#include <Parsers/ASTIndexDeclaration.h>
#include <Parsers/ASTAlterQuery.h>
namespace DB
@ -23,6 +24,9 @@ ASTPtr ASTCreateIndexQuery::clone() const
res->index_decl = index_decl->clone();
res->children.push_back(res->index_decl);
cloneTableOptions(*res);
return res;
}
@ -58,4 +62,15 @@ void ASTCreateIndexQuery::formatQueryImpl(const FormatSettings & settings, Forma
index_decl->formatImpl(settings, state, frame);
}
ASTPtr ASTCreateIndexQuery::convertToASTAlterCommand() const
{
auto command = std::make_shared<ASTAlterCommand>();
command->type = ASTAlterCommand::ADD_INDEX;
command->index = index_name->clone();
command->index_decl = index_decl->clone();
command->if_not_exists = if_not_exists;
return command;
}
}

View File

@ -14,13 +14,13 @@ namespace DB
class ASTCreateIndexQuery : public ASTQueryWithTableAndOutput, public ASTQueryWithOnCluster
{
public:
bool if_not_exists{false};
ASTPtr index_name;
/// Stores the IndexDeclaration here.
ASTPtr index_decl;
bool if_not_exists{false};
String getID(char delim) const override;
ASTPtr clone() const override;
@ -30,7 +30,10 @@ public:
return removeOnCluster<ASTCreateIndexQuery>(clone(), params.default_database);
}
virtual QueryKind getQueryKind() const override { return QueryKind::Create; }
QueryKind getQueryKind() const override { return QueryKind::Create; }
/// Convert ASTCreateIndexQuery to ASTAlterCommand
ASTPtr convertToASTAlterCommand() const;
protected:
void formatQueryImpl(const FormatSettings & settings, FormatState & state, FormatStateStacked frame) const override;

View File

@ -20,6 +20,8 @@ ASTPtr ASTDropIndexQuery::clone() const
res->index_name = index_name->clone();
res->children.push_back(res->index_name);
cloneTableOptions(*res);
return res;
}
@ -53,9 +55,9 @@ void ASTDropIndexQuery::formatQueryImpl(const FormatSettings & settings, FormatS
ASTPtr ASTDropIndexQuery::convertToASTAlterCommand() const
{
auto command = std::make_shared<ASTAlterCommand>();
command->type = ASTAlterCommand::DROP_INDEX;
command->index = index_name->clone();
command->if_exists = if_exists;
command->type = ASTAlterCommand::DROP_INDEX;
return command;
}

View File

@ -17,10 +17,11 @@ namespace DB
class ASTDropIndexQuery : public ASTQueryWithTableAndOutput, public ASTQueryWithOnCluster
{
public:
bool if_exists{false};
ASTPtr index_name;
bool if_exists{false};
String getID(char delim) const override;
ASTPtr clone() const override;
@ -30,9 +31,9 @@ public:
return removeOnCluster<ASTDropIndexQuery>(clone(), params.default_database);
}
virtual QueryKind getQueryKind() const override { return QueryKind::Drop; }
QueryKind getQueryKind() const override { return QueryKind::Drop; }
/// Convert ASTDropIndexQuery to ASTAlterCommand.
/// Convert ASTDropIndexQuery to ASTAlterCommand
ASTPtr convertToASTAlterCommand() const;
protected:

View File

@ -25,7 +25,7 @@ ASTPtr ASTIndexDeclaration::clone() const
void ASTIndexDeclaration::formatImpl(const FormatSettings & s, FormatState & state, FormatStateStacked frame) const
{
if (from_create_index)
if (part_of_create_index_query)
{
s.ostr << "(";
expr->formatImpl(s, state, frame);

View File

@ -16,7 +16,7 @@ public:
IAST * expr;
ASTFunction * type;
UInt64 granularity;
bool from_create_index = false;
bool part_of_create_index_query = false;
/** Get the text that identifies this element. */
String getID(char) const override { return "Index"; }

View File

@ -43,7 +43,7 @@ bool ParserCreateIndexDeclaration::parseImpl(Pos & pos, ASTPtr & node, Expected
return false;
auto index = std::make_shared<ASTIndexDeclaration>();
index->from_create_index = true;
index->part_of_create_index_query = true;
index->granularity = granularity->as<ASTLiteral &>().value.safeGet<UInt64>();
index->set(index->expr, expr);
index->set(index->type, type);
@ -87,18 +87,21 @@ bool ParserCreateIndexQuery::parseImpl(IParser::Pos & pos, ASTPtr & node, Expect
return false;
if (!parseDatabaseAndTableAsAST(pos, expected, query->database, query->table))
return false;
return false;
/// [ON cluster_name]
if (s_on.ignore(pos, expected))
{
if (!ASTQueryWithOnCluster::parse(pos, cluster_str, expected))
return false;
if (!ASTQueryWithOnCluster::parse(pos, cluster_str, expected))
return false;
}
if (!parser_create_idx_decl.parse(pos, index_decl, expected))
return false;
auto & ast_index_decl = index_decl->as<ASTIndexDeclaration &>();
ast_index_decl.name = index_name->as<ASTIdentifier &>().name();
query->index_name = index_name;
query->children.push_back(index_name);

View File

@ -12,7 +12,7 @@ namespace DB
class ParserCreateIndexQuery : public IParserBase
{
protected:
const char * getName() const override{ return "CREATE INDEX query"; }
const char * getName() const override { return "CREATE INDEX query"; }
bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override;
};
@ -21,7 +21,7 @@ protected:
class ParserCreateIndexDeclaration : public IParserBase
{
public:
ParserCreateIndexDeclaration() {}
ParserCreateIndexDeclaration() = default;
protected:
const char * getName() const override { return "index declaration in create index"; }

View File

@ -39,13 +39,13 @@ bool ParserDropIndexQuery::parseImpl(IParser::Pos & pos, ASTPtr & node, Expected
return false;
if (!parseDatabaseAndTableAsAST(pos, expected, query->database, query->table))
return false;
return false;
/// [ON cluster_name]
if (s_on.ignore(pos, expected))
{
if (!ASTQueryWithOnCluster::parse(pos, cluster_str, expected))
return false;
if (!ASTQueryWithOnCluster::parse(pos, cluster_str, expected))
return false;
query->cluster = std::move(cluster_str);
}

View File

@ -12,7 +12,7 @@ namespace DB
class ParserDropIndexQuery : public IParserBase
{
protected:
const char * getName() const override{ return "DROP INDEX query"; }
const char * getName() const override { return "DROP INDEX query"; }
bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override;
};

View File

@ -71,7 +71,7 @@ bool compareWithThreshold(const ColumnRawPtrs & raw_block_columns, size_t min_bl
size_t raw_block_columns_size = raw_block_columns.size();
for (size_t i = 0; i < raw_block_columns_size; ++i)
{
int res = sort_description[i].direction * raw_block_columns[i]->compareAt(min_block_index, 0, *threshold_columns[0], sort_description[i].nulls_direction);
int res = sort_description[i].direction * raw_block_columns[i]->compareAt(min_block_index, 0, *threshold_columns[i], sort_description[i].nulls_direction);
if (res < 0)
return true;

View File

@ -13,7 +13,6 @@
#include <DataTypes/DataTypesNumber.h>
#include <DataTypes/getLeastSupertype.h>
#include <DataTypes/DataTypeLowCardinality.h>
#include <DataTypes/DataTypeInterval.h>
#include <Interpreters/ExpressionActions.h>
#include <Interpreters/convertFieldToType.h>
@ -28,7 +27,6 @@ namespace ErrorCodes
extern const int BAD_ARGUMENTS;
extern const int NOT_IMPLEMENTED;
extern const int ILLEGAL_COLUMN;
extern const int ILLEGAL_TYPE_OF_ARGUMENT;
}
// Interface for true window functions. It's not much of an interface, they just
@ -2202,109 +2200,6 @@ struct WindowFunctionNthValue final : public WindowFunction
}
};
struct NonNegativeDerivativeState
{
Float64 previous_metric = 0;
Float64 previous_timestamp = 0;
};
// nonNegativeDerivative(metric_column, timestamp_column[, INTERVAL 1 SECOND])
struct WindowFunctionNonNegativeDerivative final : public StatefulWindowFunction<NonNegativeDerivativeState>
{
static constexpr size_t ARGUMENT_METRIC = 0;
static constexpr size_t ARGUMENT_TIMESTAMP = 1;
static constexpr size_t ARGUMENT_INTERVAL = 2;
WindowFunctionNonNegativeDerivative(const std::string & name_,
const DataTypes & argument_types_, const Array & parameters_)
: StatefulWindowFunction(name_, argument_types_, parameters_)
{
if (!parameters.empty())
{
throw Exception(ErrorCodes::BAD_ARGUMENTS,
"Function {} cannot be parameterized", name_);
}
if (argument_types.size() != 2 && argument_types.size() != 3)
{
throw Exception(ErrorCodes::BAD_ARGUMENTS,
"Function {} takes 2 or 3 arguments", name_);
}
if (!isNumber(argument_types[ARGUMENT_METRIC]))
{
throw Exception(ErrorCodes::BAD_ARGUMENTS,
"Argument {} must be a number, '{}' given",
ARGUMENT_METRIC,
argument_types[ARGUMENT_METRIC]->getName());
}
if (!isDateTime(argument_types[ARGUMENT_TIMESTAMP]) && !isDateTime64(argument_types[ARGUMENT_TIMESTAMP]))
{
throw Exception(ErrorCodes::BAD_ARGUMENTS,
"Argument {} must be DateTime or DateTime64, '{}' given",
ARGUMENT_TIMESTAMP,
argument_types[ARGUMENT_TIMESTAMP]->getName());
}
if (argument_types.size() == 3)
{
const DataTypeInterval * interval_datatype = checkAndGetDataType<DataTypeInterval>(argument_types[ARGUMENT_INTERVAL].get());
if (!interval_datatype)
{
throw Exception(
ErrorCodes::BAD_ARGUMENTS,
"Argument {} must be an INTERVAL, '{}' given",
ARGUMENT_INTERVAL,
argument_types[ARGUMENT_INTERVAL]->getName());
}
if (!interval_datatype->getKind().isFixedLength())
{
throw Exception(
ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT,
"The INTERVAL must be a week or shorter, '{}' given",
argument_types[ARGUMENT_INTERVAL]->getName());
}
interval_length = interval_datatype->getKind().toAvgSeconds();
interval_specified = true;
}
}
DataTypePtr getReturnType() const override { return argument_types[0]; }
bool allocatesMemoryInArena() const override { return false; }
void windowInsertResultInto(const WindowTransform * transform,
size_t function_index) override
{
const auto & current_block = transform->blockAt(transform->current_row);
const auto & workspace = transform->workspaces[function_index];
auto & state = getState(workspace);
auto interval_duration = interval_specified ? interval_length *
(*current_block.input_columns[workspace.argument_column_indices[ARGUMENT_INTERVAL]]).getFloat64(0) : 1;
Float64 last_metric = state.previous_metric;
Float64 last_timestamp = state.previous_timestamp;
Float64 curr_metric = WindowFunctionHelpers::getValue<Float64>(transform, function_index, ARGUMENT_METRIC, transform->current_row);
Float64 curr_timestamp = WindowFunctionHelpers::getValue<Float64>(transform, function_index, ARGUMENT_TIMESTAMP, transform->current_row);
Float64 time_elapsed = curr_timestamp - last_timestamp;
Float64 metric_diff = curr_metric - last_metric;
Float64 result = (time_elapsed != 0) ? (metric_diff / time_elapsed * interval_duration) : 0;
state.previous_metric = curr_metric;
state.previous_timestamp = curr_timestamp;
WindowFunctionHelpers::setValueToOutputColumn<Float64>(transform, function_index, result >= 0 ? result : 0);
}
private:
Float64 interval_length = 1;
bool interval_specified = false;
};
void registerWindowFunctions(AggregateFunctionFactory & factory)
{
@ -2404,13 +2299,6 @@ void registerWindowFunctions(AggregateFunctionFactory & factory)
return std::make_shared<WindowFunctionExponentialTimeDecayedAvg>(
name, argument_types, parameters);
}, properties});
factory.registerFunction("nonNegativeDerivative", {[](const std::string & name,
const DataTypes & argument_types, const Array & parameters, const Settings *)
{
return std::make_shared<WindowFunctionNonNegativeDerivative>(
name, argument_types, parameters);
}, properties});
}
}

View File

@ -328,7 +328,7 @@ StorageLiveView::StorageLiveView(
blocks_metadata_ptr = std::make_shared<BlocksMetadataPtr>();
active_ptr = std::make_shared<bool>(true);
periodic_refresh_task = getContext()->getSchedulePool().createTask("LieViewPeriodicRefreshTask", [this]{ periodicRefreshTaskFunc(); });
periodic_refresh_task = getContext()->getSchedulePool().createTask("LiveViewPeriodicRefreshTask", [this]{ periodicRefreshTaskFunc(); });
periodic_refresh_task->deactivate();
}

View File

@ -1300,7 +1300,7 @@ bool KeyCondition::tryParseAtomFromAST(const ASTPtr & node, ContextPtr context,
}
else
{
DataTypePtr common_type = tryGetLeastSupertype({key_expr_type_not_null, const_type});
DataTypePtr common_type = tryGetLeastSupertype(DataTypes{key_expr_type_not_null, const_type});
if (!common_type)
return false;

View File

@ -1103,7 +1103,9 @@ bool ReplicatedMergeTreeQueue::isCoveredByFuturePartsImpl(const LogEntry & entry
continue;
/// Parts are not disjoint, so new_part_name either contains or covers future_part.
chassert(future_part.contains(result_part) || result_part.contains(future_part));
if (!(future_part.contains(result_part) || result_part.contains(future_part)))
throw Exception(ErrorCodes::LOGICAL_ERROR, "Got unexpected non-disjoint parts: {} and {}", future_part_elem.first, new_part_name);
/// We cannot execute `entry` (or upgrade its actual_part_name to `new_part_name`)
/// while any covered or covering parts are processed.
/// But we also cannot simply return true and postpone entry processing, because it may lead to kind of livelock.

View File

@ -598,11 +598,19 @@ void StorageReplicatedMergeTree::createNewZooKeeperNodes()
auto zookeeper = getZooKeeper();
std::vector<zkutil::ZooKeeper::FutureCreate> futures;
/// We need to confirm /quorum exists here although it's called under createTableIfNotExists because in older CH releases (pre 22.4)
/// it was created here, so if metadata creation is done by an older replica the node might not exists when reaching this call
futures.push_back(zookeeper->asyncTryCreateNoThrow(zookeeper_path + "/quorum", String(), zkutil::CreateMode::Persistent));
futures.push_back(zookeeper->asyncTryCreateNoThrow(zookeeper_path + "/quorum/parallel", String(), zkutil::CreateMode::Persistent));
/// These 4 nodes used to be created in createNewZookeeperNodes() and they were moved to createTable()
/// This means that if the first replica creating the table metadata has an older version of CH (22.3 or previous)
/// there will be a time between its calls to `createTable` and `createNewZookeeperNodes` where the nodes won't exists
/// and that will cause issues in newer replicas
/// See https://github.com/ClickHouse/ClickHouse/issues/38600 for example
futures.push_back(zookeeper->asyncTryCreateNoThrow(zookeeper_path + "/quorum", String(), zkutil::CreateMode::Persistent));
futures.push_back(zookeeper->asyncTryCreateNoThrow(zookeeper_path + "/quorum/last_part", String(), zkutil::CreateMode::Persistent));
futures.push_back(zookeeper->asyncTryCreateNoThrow(zookeeper_path + "/quorum/failed_parts", String(), zkutil::CreateMode::Persistent));
futures.push_back(zookeeper->asyncTryCreateNoThrow(zookeeper_path + "/mutations", String(), zkutil::CreateMode::Persistent));
futures.push_back(zookeeper->asyncTryCreateNoThrow(zookeeper_path + "/quorum/parallel", String(), zkutil::CreateMode::Persistent));
/// Nodes for remote fs zero-copy replication
const auto settings = getSettings();
if (settings->allow_remote_fs_zero_copy_replication)

View File

@ -346,6 +346,17 @@ if __name__ == "__main__":
)
except Exception as ex:
print("Volume creationg failed, probably it already exists, exception", ex)
# TODO: this part cleans out stale volumes produced by container name
# randomizer, we should remove it after Sep 2022
try:
subprocess.check_call(
"docker volume rm $(docker volume ls -q | "
f"grep '{VOLUME_NAME}_.*_volume')",
shell=True,
)
except Exception as ex:
print("Probably, some stale volumes still there, just continue:", ex)
# TODO END
dockerd_internal_volume = f"--volume={VOLUME_NAME}_volume:/var/lib/docker"
# If enabled we kill and remove containers before pytest session run.
@ -392,7 +403,11 @@ if __name__ == "__main__":
command=args.command,
)
containers = subprocess.check_output(f"docker ps -a -q --filter name={CONTAINER_NAME} --format={{{{.ID}}}}", shell=True, universal_newlines=True).splitlines()
containers = subprocess.check_output(
f"docker ps -a -q --filter name={CONTAINER_NAME} --format={{{{.ID}}}}",
shell=True,
universal_newlines=True,
).splitlines()
if containers:
print(f"Trying to kill containers name={CONTAINER_NAME} ids={containers}")
subprocess.check_call(f"docker kill {' '.join(containers)}", shell=True)

View File

@ -0,0 +1 @@
#!/usr/bin/env python3

View File

@ -0,0 +1,58 @@
#!/usr/bin/env python3
import logging
import pytest
from helpers.cluster import ClickHouseCluster
cluster = ClickHouseCluster(__file__)
node = cluster.add_instance("node", stay_alive=True)
@pytest.fixture(scope="module", autouse=True)
def start_cluster():
try:
cluster.start()
yield cluster
finally:
cluster.shutdown()
def test_restart_success_ipv4():
node.query(
"""
CREATE TABLE ipv4_test
(
id UInt64,
value String
) ENGINE=MergeTree ORDER BY id""",
settings={"cast_ipv4_ipv6_default_on_conversion_error": 1},
)
node.query(
"ALTER TABLE ipv4_test MODIFY COLUMN value IPv4 DEFAULT ''",
settings={"cast_ipv4_ipv6_default_on_conversion_error": 1},
)
node.restart_clickhouse()
assert node.query("SELECT 1") == "1\n"
def test_restart_success_ipv6():
node.query(
"""
CREATE TABLE ipv6_test
(
id UInt64,
value String
) ENGINE=MergeTree ORDER BY id""",
settings={"cast_ipv4_ipv6_default_on_conversion_error": 1},
)
node.query(
"ALTER TABLE ipv6_test MODIFY COLUMN value IPv6 DEFAULT ''",
settings={"cast_ipv4_ipv6_default_on_conversion_error": 1},
)
node.restart_clickhouse()
assert node.query("SELECT 1") == "1\n"

View File

@ -0,0 +1,41 @@
<test>
<settings>
<allow_experimental_object_type>1</allow_experimental_object_type>
</settings>
<!-- Looks like the only way to use json in test's queries is to put them to substitution.
Otherwise jsons are interpreted as substitutions themselves -->
<substitutions>
<substitution>
<name>json1</name>
<values>
<value>'{"k1":1, "k2": "some"}'</value>
</values>
</substitution>
<substitution>
<name>json2</name>
<values>
<value>'{"col' || toString(number % 100) || '":' || toString(number) || '}'</value>
</values>
</substitution>
<substitution>
<name>json3</name>
<values>
<value>'{"k1":[{"k2":"aaa","k3":[{"k4":"bbb"},{"k4":"ccc"}]},{"k2":"ddd","k3":[{"k4":"eee"},{"k4":"fff"}]}]}'</value>
</values>
</substitution>
</substitutions>
<create_query>CREATE TABLE t_json_1(data JSON) ENGINE = MergeTree ORDER BY tuple()</create_query>
<create_query>CREATE TABLE t_json_2(data JSON) ENGINE = MergeTree ORDER BY tuple()</create_query>
<create_query>CREATE TABLE t_json_3(data JSON) ENGINE = MergeTree ORDER BY tuple()</create_query>
<query>INSERT INTO t_json_1 SELECT materialize({json1}) FROM numbers(200000)</query>
<query>INSERT INTO t_json_2 SELECT {json2} FROM numbers(100000)</query>
<query>INSERT INTO t_json_3 SELECT materialize({json3}) FROM numbers_mt(100000)</query>
<drop_query>DROP TABLE IF EXISTS t_json_1</drop_query>
<drop_query>DROP TABLE IF EXISTS t_json_2</drop_query>
<drop_query>DROP TABLE IF EXISTS t_json_3</drop_query>
</test>

View File

@ -0,0 +1 @@
1 1 1

View File

@ -0,0 +1 @@
SELECT revision()=Revision(), REVISION()=Revision(), revisiON()=reVision();

View File

@ -1 +1 @@
Tuple(k1 Int8, k2 String) 3000000
Tuple(k1 Int8, k2 String) 500000

View File

@ -1,10 +1,10 @@
-- Tags: long, no-backward-compatibility-check:22.3.2.1
DROP TABLE IF EXISTS t_json_parallel;
SET allow_experimental_object_type = 1, max_insert_threads = 20, max_threads = 20;
SET allow_experimental_object_type = 1, max_insert_threads = 20, max_threads = 20, min_insert_block_size_rows = 65536;
CREATE TABLE t_json_parallel (data JSON) ENGINE = MergeTree ORDER BY tuple();
INSERT INTO t_json_parallel SELECT materialize('{"k1":1, "k2": "some"}') FROM numbers_mt(3000000);
INSERT INTO t_json_parallel SELECT materialize('{"k1":1, "k2": "some"}') FROM numbers_mt(500000);
SELECT any(toTypeName(data)), count() FROM t_json_parallel;
DROP TABLE t_json_parallel;

View File

@ -1,4 +1,4 @@
-- Tags: no-backward-compatibility-check:22.5.1
-- Tags: no-backward-compatibility-check
DROP TABLE IF EXISTS partslost_0;
DROP TABLE IF EXISTS partslost_1;

View File

@ -1,64 +0,0 @@
1
1979-12-12 21:21:21.123 1.1 3.5045052519931732e-9
1979-12-12 21:21:22.000 1.3345 0.26738883339230357
1979-12-12 21:21:23.000 1.54 0.20550000000000002
1979-12-12 21:21:23.000 1.54 0
1979-12-12 21:21:21.129 2.1 0
1979-12-12 21:21:21.124 2.34 0
1979-12-12 21:21:21.127 3.7 453.33916989529325
1979-12-12 21:21:21.123 1.1 1.0513515755979521e-17
1979-12-12 21:21:22.000 1.3345 8.021665001769108e-10
1979-12-12 21:21:23.000 1.54 6.165000000000001e-10
1979-12-12 21:21:23.000 1.54 0
1979-12-12 21:21:21.129 2.1 0
1979-12-12 21:21:21.124 2.34 0
1979-12-12 21:21:21.127 3.7 0.0000013600175096858798
1979-12-12 21:21:21.123 1.1 1.4018021007972692e-14
1979-12-12 21:21:22.000 1.3345 0.0000010695553335692141
1979-12-12 21:21:23.000 1.54 8.22e-7
1979-12-12 21:21:23.000 1.54 0
1979-12-12 21:21:21.129 2.1 0
1979-12-12 21:21:21.124 2.34 0
1979-12-12 21:21:21.127 3.7 0.001813356679581173
1979-12-12 21:21:21.123 1.1 1.7522526259965866e-11
1979-12-12 21:21:22.000 1.3345 0.0013369441669615178
1979-12-12 21:21:23.000 1.54 0.0010275000000000002
1979-12-12 21:21:23.000 1.54 0
1979-12-12 21:21:21.129 2.1 0
1979-12-12 21:21:21.124 2.34 0
1979-12-12 21:21:21.127 3.7 2.2666958494764664
1979-12-12 21:21:21.123 1.1 2.102703151195904e-8
1979-12-12 21:21:22.000 1.3345 1.6043330003538214
1979-12-12 21:21:23.000 1.54 1.233
1979-12-12 21:21:23.000 1.54 0
1979-12-12 21:21:21.129 2.1 0
1979-12-12 21:21:21.124 2.34 0
1979-12-12 21:21:21.127 3.7 2720.0350193717595
1979-12-12 21:21:21.123 1.1 0.0000014718922058371327
1979-12-12 21:21:22.000 1.3345 112.3033100247675
1979-12-12 21:21:23.000 1.54 86.31
1979-12-12 21:21:23.000 1.54 0
1979-12-12 21:21:21.129 2.1 0
1979-12-12 21:21:21.124 2.34 0
1979-12-12 21:21:21.127 3.7 190402.45135602317
1979-12-12 21:21:21.123 1.1 0.0001009297512574034
1979-12-12 21:21:21.124 2.34 35712459.78375156
1979-12-12 21:21:21.127 3.7 13056168.092984445
1979-12-12 21:21:21.129 2.1 0
1979-12-12 21:21:22.000 1.3345 0
1979-12-12 21:21:23.000 1.54 5918.400000000001
1979-12-12 21:21:23.000 1.54 0
1979-12-12 21:21:21.123 1.1 0.0027251032839498914
1979-12-12 21:21:21.124 2.34 964236414.1612921
1979-12-12 21:21:21.127 3.7 352516538.51058006
1979-12-12 21:21:21.129 2.1 0
1979-12-12 21:21:22.000 1.3345 0
1979-12-12 21:21:23.000 1.54 159796.80000000002
1979-12-12 21:21:23.000 1.54 0
1979-12-12 21:21:21.123 1.1 0.021195247764054712
1979-12-12 21:21:21.124 2.34 7499616554.587828
1979-12-12 21:21:21.127 3.7 2741795299.5267334
1979-12-12 21:21:21.129 2.1 0
1979-12-12 21:21:22.000 1.3345 0
1979-12-12 21:21:23.000 1.54 1242864
1979-12-12 21:21:23.000 1.54 0

View File

@ -1,63 +0,0 @@
DROP TABLE IF EXISTS nnd;
CREATE TABLE nnd
(
id Int8, ts DateTime64(3, 'UTC'), metric Float64
)
ENGINE=MergeTree()
ORDER BY id;
INSERT INTO nnd VALUES (1, toDateTime64('1979-12-12 21:21:21.123', 3, 'UTC'), 1.1), (2, toDateTime64('1979-12-12 21:21:21.124', 3, 'UTC'), 2.34), (3, toDateTime64('1979-12-12 21:21:21.127', 3, 'UTC'), 3.7);
INSERT INTO nnd VALUES (4, toDateTime64('1979-12-12 21:21:21.129', 3, 'UTC'), 2.1), (5, toDateTime('1979-12-12 21:21:22', 'UTC'), 1.3345), (6, toDateTime('1979-12-12 21:21:23', 'UTC'), 1.54), (7, toDateTime('1979-12-12 21:21:23', 'UTC'), 1.54);
-- shall work for precise intervals
-- INTERVAL 1 SECOND shall be default
SELECT (
SELECT
ts,
metric,
nonNegativeDerivative(metric, ts) OVER (PARTITION BY metric ORDER BY ts ASC Rows BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) AS deriv
FROM nnd
LIMIT 5, 1
) = (
SELECT
ts,
metric,
nonNegativeDerivative(metric, ts, toIntervalSecond(1)) OVER (PARTITION BY metric ORDER BY ts ASC Rows BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) AS deriv
FROM nnd
LIMIT 5, 1
);
SELECT ts, metric, nonNegativeDerivative(metric, ts) OVER (PARTITION BY metric ORDER BY ts ASC Rows BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) AS deriv FROM nnd;
-- Nanosecond
SELECT ts, metric, nonNegativeDerivative(metric, ts, INTERVAL 3 NANOSECOND) OVER (PARTITION BY metric ORDER BY ts ASC Rows BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) AS deriv FROM nnd;
-- Microsecond
SELECT ts, metric, nonNegativeDerivative(metric, ts, INTERVAL 4 MICROSECOND) OVER (PARTITION BY metric ORDER BY ts ASC Rows BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) AS deriv FROM nnd;
-- Millisecond
SELECT ts, metric, nonNegativeDerivative(metric, ts, INTERVAL 5 MILLISECOND) OVER (PARTITION BY metric ORDER BY ts ASC Rows BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) AS deriv FROM nnd;
-- Second
SELECT ts, metric, nonNegativeDerivative(metric, ts, INTERVAL 6 SECOND) OVER (PARTITION BY metric ORDER BY ts ASC Rows BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) AS deriv FROM nnd;
-- Minute
SELECT ts, metric, nonNegativeDerivative(metric, ts, INTERVAL 7 MINUTE) OVER (PARTITION BY metric ORDER BY ts ASC Rows BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) AS deriv FROM nnd;
-- Hour
SELECT ts, metric, nonNegativeDerivative(metric, ts, INTERVAL 8 HOUR) OVER (ORDER BY ts ASC Rows BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) AS deriv FROM nnd;
-- Day
SELECT ts, metric, nonNegativeDerivative(metric, ts, INTERVAL 9 DAY) OVER (ORDER BY ts ASC Rows BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) AS deriv FROM nnd;
-- Week
SELECT ts, metric, nonNegativeDerivative(metric, ts, INTERVAL 10 WEEK) OVER (ORDER BY ts ASC Rows BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) AS deriv FROM nnd;
-- shall not work for month, quarter, year (intervals with floating number of seconds)
-- Month
SELECT ts, metric, nonNegativeDerivative(metric, ts, INTERVAL 11 MONTH) OVER (PARTITION BY metric ORDER BY ts ASC Rows BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) AS deriv FROM nnd; -- { serverError ILLEGAL_TYPE_OF_ARGUMENT }
-- Quarter
SELECT ts, metric, nonNegativeDerivative(metric, ts, INTERVAL 12 QUARTER) OVER (PARTITION BY metric ORDER BY ts ASC Rows BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) AS deriv FROM nnd; -- { serverError ILLEGAL_TYPE_OF_ARGUMENT }
-- Year
SELECT ts, metric, nonNegativeDerivative(metric, ts, INTERVAL 13 YEAR) OVER (PARTITION BY metric ORDER BY ts ASC Rows BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) AS deriv FROM nnd; -- { serverError ILLEGAL_TYPE_OF_ARGUMENT }
-- test against wrong arguments/types
SELECT ts, metric, nonNegativeDerivative(metric, 1, INTERVAL 3 NANOSECOND) OVER (PARTITION BY metric ORDER BY ts ASC Rows BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) AS deriv FROM nnd; -- { serverError BAD_ARGUMENTS }
SELECT ts, metric, nonNegativeDerivative('string not datetime', ts, INTERVAL 3 NANOSECOND) OVER (PARTITION BY metric ORDER BY ts ASC Rows BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) AS deriv FROM nnd; -- { serverError BAD_ARGUMENTS }
SELECT ts, metric, nonNegativeDerivative(metric, ts, INTERVAL 3 NANOSECOND, id) OVER (PARTITION BY metric ORDER BY ts ASC Rows BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) AS deriv FROM nnd; -- { serverError BAD_ARGUMENTS }
SELECT ts, metric, nonNegativeDerivative(metric) OVER (PARTITION BY metric ORDER BY ts ASC Rows BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) AS deriv FROM nnd; -- { serverError BAD_ARGUMENTS }
-- cleanup
DROP TABLE IF EXISTS nnd;

View File

@ -1,4 +1,5 @@
-- Tags: no-backward-compatibility-check
-- TODO: remove no-backward-compatibility-check after new 22.6 release
SET cast_ipv4_ipv6_default_on_conversion_error = 1;
@ -11,6 +12,13 @@ CREATE TABLE ipv4_test
ALTER TABLE ipv4_test MODIFY COLUMN value IPv4 DEFAULT '';
SET cast_ipv4_ipv6_default_on_conversion_error = 0;
DETACH TABLE ipv4_test;
ATTACH TABLE ipv4_test;
SET cast_ipv4_ipv6_default_on_conversion_error = 1;
DROP TABLE ipv4_test;
DROP TABLE IF EXISTS ipv6_test;
@ -20,7 +28,15 @@ CREATE TABLE ipv6_test
value String
) ENGINE=MergeTree ORDER BY id;
ALTER TABLE ipv6_test MODIFY COLUMN value IPv4 DEFAULT '';
ALTER TABLE ipv6_test MODIFY COLUMN value IPv6 DEFAULT '';
SET cast_ipv4_ipv6_default_on_conversion_error = 0;
DETACH TABLE ipv6_test;
ATTACH TABLE ipv6_test;
SET cast_ipv4_ipv6_default_on_conversion_error = 1;
SELECT * FROM ipv6_test;
DROP TABLE ipv6_test;

View File

@ -0,0 +1,15 @@
0 999999 999999
0 999998 999998
0 999997 999997
0 999996 999996
0 999995 999995
0 999994 999994
0 999993 999993
0 999992 999992
0 999991 999991
0 999990 999990
98974
98973
98972
98971
98970

View File

@ -0,0 +1,5 @@
-- Tags: no-backward-compatibility-check
-- Regression for PartialSortingTransform optimization that requires at least 1500 rows.
SELECT * FROM (SELECT * FROM (SELECT 0 a, toNullable(number) b, toString(number) c FROM numbers(1e6)) ORDER BY a DESC, b DESC, c LIMIT 1500) limit 10;
SELECT number FROM (SELECT number, 1 AS k FROM numbers(100000) ORDER BY k ASC, number DESC LIMIT 1025, 1023) LIMIT 5;

Some files were not shown because too many files have changed in this diff Show More