Merge branch 'master' into better-progress-bar-2

This commit is contained in:
Kruglov Pavel 2023-07-26 13:12:24 +02:00 committed by GitHub
commit 15cc046883
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
255 changed files with 3884 additions and 1470 deletions

View File

@ -57,7 +57,7 @@ public:
URI();
/// Creates an empty URI.
explicit URI(const std::string & uri);
explicit URI(const std::string & uri, bool disable_url_encoding = false);
/// Parses an URI from the given string. Throws a
/// SyntaxException if the uri is not valid.
@ -350,6 +350,10 @@ protected:
static const std::string ILLEGAL;
private:
void encodePath(std::string & encodedStr) const;
void decodePath(const std::string & encodedStr);
std::string _scheme;
std::string _userInfo;
std::string _host;
@ -357,6 +361,8 @@ private:
std::string _path;
std::string _query;
std::string _fragment;
bool _disable_url_encoding = false;
};

View File

@ -36,8 +36,8 @@ URI::URI():
}
URI::URI(const std::string& uri):
_port(0)
URI::URI(const std::string& uri, bool decode_and_encode_path):
_port(0), _disable_url_encoding(decode_and_encode_path)
{
parse(uri);
}
@ -107,7 +107,8 @@ URI::URI(const URI& uri):
_port(uri._port),
_path(uri._path),
_query(uri._query),
_fragment(uri._fragment)
_fragment(uri._fragment),
_disable_url_encoding(uri._disable_url_encoding)
{
}
@ -119,7 +120,8 @@ URI::URI(const URI& baseURI, const std::string& relativeURI):
_port(baseURI._port),
_path(baseURI._path),
_query(baseURI._query),
_fragment(baseURI._fragment)
_fragment(baseURI._fragment),
_disable_url_encoding(baseURI._disable_url_encoding)
{
resolve(relativeURI);
}
@ -151,6 +153,7 @@ URI& URI::operator = (const URI& uri)
_path = uri._path;
_query = uri._query;
_fragment = uri._fragment;
_disable_url_encoding = uri._disable_url_encoding;
}
return *this;
}
@ -181,6 +184,7 @@ void URI::swap(URI& uri)
std::swap(_path, uri._path);
std::swap(_query, uri._query);
std::swap(_fragment, uri._fragment);
std::swap(_disable_url_encoding, uri._disable_url_encoding);
}
@ -201,7 +205,7 @@ std::string URI::toString() const
std::string uri;
if (isRelative())
{
encode(_path, RESERVED_PATH, uri);
encodePath(uri);
}
else
{
@ -217,7 +221,7 @@ std::string URI::toString() const
{
if (!auth.empty() && _path[0] != '/')
uri += '/';
encode(_path, RESERVED_PATH, uri);
encodePath(uri);
}
else if (!_query.empty() || !_fragment.empty())
{
@ -313,7 +317,7 @@ void URI::setAuthority(const std::string& authority)
void URI::setPath(const std::string& path)
{
_path.clear();
decode(path, _path);
decodePath(path);
}
@ -418,7 +422,7 @@ void URI::setPathEtc(const std::string& pathEtc)
std::string URI::getPathEtc() const
{
std::string pathEtc;
encode(_path, RESERVED_PATH, pathEtc);
encodePath(pathEtc);
if (!_query.empty())
{
pathEtc += '?';
@ -436,7 +440,7 @@ std::string URI::getPathEtc() const
std::string URI::getPathAndQuery() const
{
std::string pathAndQuery;
encode(_path, RESERVED_PATH, pathAndQuery);
encodePath(pathAndQuery);
if (!_query.empty())
{
pathAndQuery += '?';
@ -681,6 +685,21 @@ void URI::decode(const std::string& str, std::string& decodedStr, bool plusAsSpa
}
}
void URI::encodePath(std::string & encodedStr) const
{
if (_disable_url_encoding)
encodedStr = _path;
else
encode(_path, RESERVED_PATH, encodedStr);
}
void URI::decodePath(const std::string & encodedStr)
{
if (_disable_url_encoding)
_path = encodedStr;
else
decode(encodedStr, _path);
}
bool URI::isWellKnownPort() const
{
@ -820,7 +839,7 @@ void URI::parsePath(std::string::const_iterator& it, const std::string::const_it
{
std::string path;
while (it != end && *it != '?' && *it != '#') path += *it++;
decode(path, _path);
decodePath(path);
}

2
contrib/idxd-config vendored

@ -1 +1 @@
Subproject commit f6605c41a735e3fdfef2d2d18655a33af6490b99
Subproject commit a836ce0e42052a69bffbbc14239ab4097f3b77f1

2
contrib/qpl vendored

@ -1 +1 @@
Subproject commit 3f8f5cea27739f5261e8fd577dc233ffe88bf679
Subproject commit faaf19350459c076e66bb5df11743c3fade59b73

View File

@ -4,6 +4,9 @@
set -e -x -a
# Choose random timezone for this test run.
#
# NOTE: that clickhouse-test will randomize session_timezone by itself as well
# (it will choose between default server timezone and something specific).
TZ="$(rg -v '#' /usr/share/zoneinfo/zone.tab | awk '{print $3}' | shuf | head -n1)"
echo "Choosen random timezone $TZ"
ln -snf "/usr/share/zoneinfo/$TZ" /etc/localtime && echo "$TZ" > /etc/timezone

View File

@ -14,6 +14,7 @@ ln -s /usr/share/clickhouse-test/clickhouse-test /usr/bin/clickhouse-test
# Stress tests and upgrade check uses similar code that was placed
# in a separate bash library. See tests/ci/stress_tests.lib
source /usr/share/clickhouse-test/ci/attach_gdb.lib
source /usr/share/clickhouse-test/ci/stress_tests.lib
install_packages package_folder
@ -52,7 +53,7 @@ azurite-blob --blobHost 0.0.0.0 --blobPort 10000 --debug /azurite_log &
start
shellcheck disable=SC2086 # No quotes because I want to split it into words.
# shellcheck disable=SC2086 # No quotes because I want to split it into words.
/s3downloader --url-prefix "$S3_URL" --dataset-names $DATASETS
chmod 777 -R /var/lib/clickhouse
clickhouse-client --query "ATTACH DATABASE IF NOT EXISTS datasets ENGINE = Ordinary"

View File

@ -16,6 +16,7 @@ ln -s /usr/share/clickhouse-test/ci/get_previous_release_tag.py /usr/bin/get_pre
# Stress tests and upgrade check uses similar code that was placed
# in a separate bash library. See tests/ci/stress_tests.lib
source /usr/share/clickhouse-test/ci/attach_gdb.lib
source /usr/share/clickhouse-test/ci/stress_tests.lib
azurite-blob --blobHost 0.0.0.0 --blobPort 10000 --debug /azurite_log &
@ -61,6 +62,7 @@ configure
# it contains some new settings, but we can safely remove it
rm /etc/clickhouse-server/config.d/merge_tree.xml
rm /etc/clickhouse-server/config.d/enable_wait_for_shutdown_replicated_tables.xml
rm /etc/clickhouse-server/users.d/nonconst_timezone.xml
start
@ -90,6 +92,7 @@ sudo chgrp clickhouse /etc/clickhouse-server/config.d/s3_storage_policy_by_defau
# it contains some new settings, but we can safely remove it
rm /etc/clickhouse-server/config.d/merge_tree.xml
rm /etc/clickhouse-server/config.d/enable_wait_for_shutdown_replicated_tables.xml
rm /etc/clickhouse-server/users.d/nonconst_timezone.xml
start

View File

@ -7,12 +7,8 @@ description: How to build Clickhouse and run benchmark with DEFLATE_QPL Codec
# Build Clickhouse with DEFLATE_QPL
- Make sure your target machine meet the QPL required [prerequisites](https://intel.github.io/qpl/documentation/get_started_docs/installation.html#prerequisites)
- Pass the following flag to CMake when building ClickHouse:
``` bash
cmake -DENABLE_QPL=1 ..
```
- Make sure your host machine meet the QPL required [prerequisites](https://intel.github.io/qpl/documentation/get_started_docs/installation.html#prerequisites)
- deflate_qpl is enabled by default during cmake build. In case you accidentally change it, please double-check build flag: ENABLE_QPL=1
- For generic requirements, please refer to Clickhouse generic [build instructions](/docs/en/development/build.md)

View File

@ -57,7 +57,8 @@ Notice that the S3 endpoint in the `ENGINE` configuration uses the parameter tok
:::note
As shown in the example, querying from S3 tables that are partitioned is
not directly supported at this time, but can be accomplished by querying the bucket contents with a wildcard.
not directly supported at this time, but can be accomplished by querying the individual partitions
using the S3 table function.
The primary use-case for writing
partitioned data in S3 is to enable transferring that data into another
@ -127,23 +128,7 @@ FROM s3('http://minio:10000/clickhouse//test_45.csv', 'minioadmin', 'minioadminp
└────┴────┴────┘
```
#### Select from all partitions
```sql
SELECT *
FROM s3('http://minio:10000/clickhouse//**', 'minioadmin', 'minioadminpassword', 'CSV')
```
```response
┌─c1─┬─c2─┬─c3─┐
│ 3 │ 2 │ 1 │
└────┴────┴────┘
┌─c1─┬─c2─┬─c3─┐
│ 1 │ 2 │ 3 │
└────┴────┴────┘
┌─c1─┬─c2─┬─c3─┐
│ 78 │ 43 │ 45 │
└────┴────┴────┘
```
#### Limitation
You may naturally try to `Select * from p`, but as noted above, this query will fail; use the preceding query.

View File

@ -106,3 +106,4 @@ For partitioning by month, use the `toYYYYMM(date_column)` expression, where `da
## Storage Settings {#storage-settings}
- [engine_url_skip_empty_files](/docs/en/operations/settings/settings.md#engine_url_skip_empty_files) - allows to skip empty files while reading. Disabled by default.
- [disable_url_encoding](/docs/en/operations/settings/settings.md#disable_url_encoding) -allows to disable decoding/encoding path in uri. Disabled by default.

View File

@ -56,7 +56,7 @@ Connection: Close
Content-Type: text/tab-separated-values; charset=UTF-8
X-ClickHouse-Server-Display-Name: clickhouse.ru-central1.internal
X-ClickHouse-Query-Id: 5abe861c-239c-467f-b955-8a201abb8b7f
X-ClickHouse-Summary: {"read_rows":"0","read_bytes":"0","written_rows":"0","written_bytes":"0","total_rows_to_read":"0"}
X-ClickHouse-Summary: {"read_rows":"0","read_bytes":"0","written_rows":"0","written_bytes":"0","total_rows_to_read":"0","peak_memory_usage":"0"}
1
```
@ -286,9 +286,9 @@ Similarly, you can use ClickHouse sessions in the HTTP protocol. To do this, you
You can receive information about the progress of a query in `X-ClickHouse-Progress` response headers. To do this, enable [send_progress_in_http_headers](../operations/settings/settings.md#settings-send_progress_in_http_headers). Example of the header sequence:
``` text
X-ClickHouse-Progress: {"read_rows":"2752512","read_bytes":"240570816","total_rows_to_read":"8880128"}
X-ClickHouse-Progress: {"read_rows":"5439488","read_bytes":"482285394","total_rows_to_read":"8880128"}
X-ClickHouse-Progress: {"read_rows":"8783786","read_bytes":"819092887","total_rows_to_read":"8880128"}
X-ClickHouse-Progress: {"read_rows":"2752512","read_bytes":"240570816","total_rows_to_read":"8880128","peak_memory_usage":"4371480"}
X-ClickHouse-Progress: {"read_rows":"5439488","read_bytes":"482285394","total_rows_to_read":"8880128","peak_memory_usage":"13621616"}
X-ClickHouse-Progress: {"read_rows":"8783786","read_bytes":"819092887","total_rows_to_read":"8880128","peak_memory_usage":"23155600"}
```
Possible header fields:
@ -416,7 +416,7 @@ $ curl -v 'http://localhost:8123/predefined_query'
< X-ClickHouse-Format: Template
< X-ClickHouse-Timezone: Asia/Shanghai
< Keep-Alive: timeout=3
< X-ClickHouse-Summary: {"read_rows":"0","read_bytes":"0","written_rows":"0","written_bytes":"0","total_rows_to_read":"0"}
< X-ClickHouse-Summary: {"read_rows":"0","read_bytes":"0","written_rows":"0","written_bytes":"0","total_rows_to_read":"0","peak_memory_usage":"0"}
<
# HELP "Query" "Number of executing queries"
# TYPE "Query" counter
@ -581,7 +581,7 @@ $ curl -vv -H 'XXX:xxx' 'http://localhost:8123/hi'
< Content-Type: text/html; charset=UTF-8
< Transfer-Encoding: chunked
< Keep-Alive: timeout=3
< X-ClickHouse-Summary: {"read_rows":"0","read_bytes":"0","written_rows":"0","written_bytes":"0","total_rows_to_read":"0"}
< X-ClickHouse-Summary: {"read_rows":"0","read_bytes":"0","written_rows":"0","written_bytes":"0","total_rows_to_read":"0","peak_memory_usage":"0"}
<
* Connection #0 to host localhost left intact
Say Hi!%
@ -621,7 +621,7 @@ $ curl -v -H 'XXX:xxx' 'http://localhost:8123/get_config_static_handler'
< Content-Type: text/plain; charset=UTF-8
< Transfer-Encoding: chunked
< Keep-Alive: timeout=3
< X-ClickHouse-Summary: {"read_rows":"0","read_bytes":"0","written_rows":"0","written_bytes":"0","total_rows_to_read":"0"}
< X-ClickHouse-Summary: {"read_rows":"0","read_bytes":"0","written_rows":"0","written_bytes":"0","total_rows_to_read":"0","peak_memory_usage":"0"}
<
* Connection #0 to host localhost left intact
<html ng-app="SMI2"><head><base href="http://ui.tabix.io/"></head><body><div ui-view="" class="content-ui"></div><script src="http://loader.tabix.io/master.js"></script></body></html>%
@ -673,7 +673,7 @@ $ curl -vv -H 'XXX:xxx' 'http://localhost:8123/get_absolute_path_static_handler'
< Content-Type: text/html; charset=UTF-8
< Transfer-Encoding: chunked
< Keep-Alive: timeout=3
< X-ClickHouse-Summary: {"read_rows":"0","read_bytes":"0","written_rows":"0","written_bytes":"0","total_rows_to_read":"0"}
< X-ClickHouse-Summary: {"read_rows":"0","read_bytes":"0","written_rows":"0","written_bytes":"0","total_rows_to_read":"0","peak_memory_usage":"0"}
<
<html><body>Absolute Path File</body></html>
* Connection #0 to host localhost left intact
@ -692,7 +692,7 @@ $ curl -vv -H 'XXX:xxx' 'http://localhost:8123/get_relative_path_static_handler'
< Content-Type: text/html; charset=UTF-8
< Transfer-Encoding: chunked
< Keep-Alive: timeout=3
< X-ClickHouse-Summary: {"read_rows":"0","read_bytes":"0","written_rows":"0","written_bytes":"0","total_rows_to_read":"0"}
< X-ClickHouse-Summary: {"read_rows":"0","read_bytes":"0","written_rows":"0","written_bytes":"0","total_rows_to_read":"0","peak_memory_usage":"0"}
<
<html><body>Relative Path File</body></html>
* Connection #0 to host localhost left intact

View File

@ -65,6 +65,40 @@ XML substitution example:
Substitutions can also be performed from ZooKeeper. To do this, specify the attribute `from_zk = "/path/to/node"`. The element value is replaced with the contents of the node at `/path/to/node` in ZooKeeper. You can also put an entire XML subtree on the ZooKeeper node and it will be fully inserted into the source element.
## Encrypting Configuration {#encryption}
You can use symmetric encryption to encrypt a configuration element, for example, a password field. To do so, first configure the [encryption codec](../sql-reference/statements/create/table.md#encryption-codecs), then add attribute `encryption_codec` with the name of the encryption codec as value to the element to encrypt.
Unlike attributes `from_zk`, `from_env` and `incl` (or element `include`), no substitution, i.e. decryption of the encrypted value, is performed in the preprocessed file. Decryption happens only at runtime in the server process.
Example:
```xml
<clickhouse>
<encryption_codecs>
<aes_128_gcm_siv>
<key_hex>00112233445566778899aabbccddeeff</key_hex>
</aes_128_gcm_siv>
</encryption_codecs>
<interserver_http_credentials>
<user>admin</user>
<password encryption_codec="AES_128_GCM_SIV">961F000000040000000000EEDDEF4F453CFE6457C4234BD7C09258BD651D85</password>
</interserver_http_credentials>
</clickhouse>
```
To get the encrypted value `encrypt_decrypt` example application may be used.
Example:
``` bash
./encrypt_decrypt /etc/clickhouse-server/config.xml -e AES_128_GCM_SIV abcd
```
``` text
961F000000040000000000EEDDEF4F453CFE6457C4234BD7C09258BD651D85
```
## User Settings {#user-settings}
The `config.xml` file can specify a separate config with user settings, profiles, and quotas. The relative path to this config is set in the `users_config` element. By default, it is `users.xml`. If `users_config` is omitted, the user settings, profiles, and quotas are specified directly in `config.xml`.

View File

@ -3468,6 +3468,12 @@ Possible values:
Default value: `0`.
## disable_url_encoding {#disable_url_encoding}
Allows to disable decoding/encoding path in uri in [URL](../../engines/table-engines/special/url.md) engine tables.
Disabled by default.
## database_atomic_wait_for_drop_and_detach_synchronously {#database_atomic_wait_for_drop_and_detach_synchronously}
Adds a modifier `SYNC` to all `DROP` and `DETACH` queries.

View File

@ -12,3 +12,5 @@ To get a determinate result, you can use the min or max function ins
In some cases, you can rely on the order of execution. This applies to cases when SELECT comes from a subquery that uses ORDER BY.
When a `SELECT` query has the `GROUP BY` clause or at least one aggregate function, ClickHouse (in contrast to MySQL) requires that all expressions in the `SELECT`, `HAVING`, and `ORDER BY` clauses be calculated from keys or from aggregate functions. In other words, each column selected from the table must be used either in keys or inside aggregate functions. To get behavior like in MySQL, you can put the other columns in the `any` aggregate function.
- Alias: `any_value`

View File

@ -575,6 +575,42 @@ Alias:
Like `substring` but for Unicode code points. Assumes that the string contains valid UTF-8 encoded text. If this assumption is violated, no exception is thrown and the result is undefined.
## substringIndex(s, delim, count)
Returns the substring of `s` before `count` occurrences of the delimiter `delim`, as in Spark or MySQL.
**Syntax**
```sql
substringIndex(s, delim, count)
```
Alias: `SUBSTRING_INDEX`
**Arguments**
- s: The string to extract substring from. [String](../../sql-reference/data-types/string.md).
- delim: The character to split. [String](../../sql-reference/data-types/string.md).
- count: The number of occurrences of the delimiter to count before extracting the substring. If count is positive, everything to the left of the final delimiter (counting from the left) is returned. If count is negative, everything to the right of the final delimiter (counting from the right) is returned. [UInt or Int](../data-types/int-uint.md)
**Example**
``` sql
SELECT substringIndex('www.clickhouse.com', '.', 2)
```
Result:
```
┌─substringIndex('www.clickhouse.com', '.', 2)─┐
│ www.clickhouse │
└──────────────────────────────────────────────┘
```
## substringIndexUTF8(s, delim, count)
Like `substringIndex` but for Unicode code points. Assumes that the string contains valid UTF-8 encoded text. If this assumption is violated, no exception is thrown and the result is undefined.
## appendTrailingCharIfAbsent
Appends character `c` to string `s` if `s` is non-empty and does not end with character `c`.

View File

@ -213,7 +213,7 @@ Removes one of the column properties: `DEFAULT`, `ALIAS`, `MATERIALIZED`, `CODEC
Syntax:
```sql
ALTER TABLE table_name MODIFY column_name REMOVE property;
ALTER TABLE table_name MODIFY COLUMN column_name REMOVE property;
```
**Example**

View File

@ -56,6 +56,7 @@ Character `|` inside patterns is used to specify failover addresses. They are it
## Storage Settings {#storage-settings}
- [engine_url_skip_empty_files](/docs/en/operations/settings/settings.md#engine_url_skip_empty_files) - allows to skip empty files while reading. Disabled by default.
- [disable_url_encoding](/docs/en/operations/settings/settings.md#disable_url_encoding) - allows to disable decoding/encoding path in uri. Disabled by default.
**See Also**

View File

@ -3,23 +3,46 @@ slug: /en/guides/developer/transactional
---
# Transactional (ACID) support
INSERT into one partition* in one table* of MergeTree* family up to max_insert_block_size rows* is transactional (ACID):
- Atomic: INSERT is succeeded or rejected as a whole: if confirmation is sent to the client, all rows INSERTed; if error is sent to the client, no rows INSERTed.
## Case 1: INSERT into one partition, of one table, of the MergeTree* family
This is transactional (ACID) if the inserted rows are packed and inserted as a single block (see Notes):
- Atomic: an INSERT succeeds or is rejected as a whole: if a confirmation is sent to the client, then all rows were inserted; if an error is sent to the client, then no rows were inserted.
- Consistent: if there are no table constraints violated, then all rows in an INSERT are inserted and the INSERT succeeds; if constraints are violated, then no rows are inserted.
- Isolated: concurrent clients observe a consistent snapshot of the tablethe state of the table either as if before INSERT or after successful INSERT; no partial state is seen;
- Durable: successful INSERT is written to the filesystem before answering to the client, on single replica or multiple replicas (controlled by the `insert_quorum` setting), and ClickHouse can ask the OS to sync the filesystem data on the storage media (controlled by the `fsync_after_insert` setting).
* If table has many partitions and INSERT covers many partitionsthen insertion into every partition is transactional on its own;
* INSERT into multiple tables with one statement is possible if materialized views are involved;
* INSERT into Distributed table is not transactional as a whole, while insertion into every shard is transactional;
* another example: insert into Buffer tables is neither atomic nor isolated or consistent or durable;
* atomicity is ensured even if `async_insert` is enabled, but it can be turned off by the wait_for_async_insert setting;
* max_insert_block_size is 1 000 000 by default and can be adjusted as needed;
* if client did not receive the answer from the server, the client does not know if transaction succeeded, and it can repeat the transaction, using exactly-once insertion properties;
* ClickHouse is using MVCC with snapshot isolation internally;
* all ACID properties are valid even in case of server kill / crash;
* either insert_quorum into different AZ or fsync should be enabled to ensure durable inserts in typical setup;
* "consistency" in ACID terms does not cover the semantics of distributed systems, see https://jepsen.io/consistency which is controlled by different settings (select_sequential_consistency)
* this explanation does not cover a new transactions feature that allow to have full-featured transactions over multiple tables, materialized views, for multiple SELECTs, etc.
- Isolated: concurrent clients observe a consistent snapshot of the tablethe state of the table either as it was before the INSERT attempt, or after the successful INSERT; no partial state is seen
- Durable: a successful INSERT is written to the filesystem before answering to the client, on a single replica or multiple replicas (controlled by the `insert_quorum` setting), and ClickHouse can ask the OS to sync the filesystem data on the storage media (controlled by the `fsync_after_insert` setting).
- INSERT into multiple tables with one statement is possible if materialized views are involved (the INSERT from the client is to a table which has associate materialized views).
## Case 2: INSERT into multiple partitions, of one table, of the MergeTree* family
Same as Case 1 above, with this detail:
- If table has many partitions and INSERT covers many partitionsthen insertion into every partition is transactional on its own
## Case 3: INSERT into one distributed table of the MergeTree* family
Same as Case 1 above, with this detail:
- INSERT into Distributed table is not transactional as a whole, while insertion into every shard is transactional
## Case 4: Using a Buffer table
- insert into Buffer tables is neither atomic nor isolated nor consistent nor durable
## Case 5: Using async_insert
Same as Case 1 above, with this detail:
- atomicity is ensured even if `async_insert` is enabled and `wait_for_async_insert` is set to 1 (the default), but if `wait_for_async_insert` is set to 0, then atomicity is not ensured.
## Notes
- rows inserted from the client in some data format are packed into a single block when:
- the insert format is row-based (like CSV, TSV, Values, JSONEachRow, etc) and the data contains less then `max_insert_block_size` rows (~1 000 000 by default) or less then `min_chunk_bytes_for_parallel_parsing` bytes (10 MB by default) in case of parallel parsing is used (enabled by default)
- the insert format is column-based (like Native, Parquet, ORC, etc) and the data contains only one block of data
- the size of the inserted block in general may depend on many settings (for example: `max_block_size`, `max_insert_block_size`, `min_insert_block_size_rows`, `min_insert_block_size_bytes`, `preferred_block_size_bytes`, etc)
- if the client did not receive an answer from the server, the client does not know if the transaction succeeded, and it can repeat the transaction, using exactly-once insertion properties
- ClickHouse is using MVCC with snapshot isolation internally
- all ACID properties are valid even in the case of server kill/crash
- either insert_quorum into different AZ or fsync should be enabled to ensure durable inserts in the typical setup
- "consistency" in ACID terms does not cover the semantics of distributed systems, see https://jepsen.io/consistency which is controlled by different settings (select_sequential_consistency)
- this explanation does not cover a new transactions feature that allow to have full-featured transactions over multiple tables, materialized views, for multiple SELECTs, etc. (see the next section on Transactions, Commit, and Rollback)
## Transactions, Commit, and Rollback

View File

@ -50,7 +50,7 @@ Connection: Close
Content-Type: text/tab-separated-values; charset=UTF-8
X-ClickHouse-Server-Display-Name: clickhouse.ru-central1.internal
X-ClickHouse-Query-Id: 5abe861c-239c-467f-b955-8a201abb8b7f
X-ClickHouse-Summary: {"read_rows":"0","read_bytes":"0","written_rows":"0","written_bytes":"0","total_rows_to_read":"0"}
X-ClickHouse-Summary: {"read_rows":"0","read_bytes":"0","written_rows":"0","written_bytes":"0","total_rows_to_read":"0","peak_memory_usage":"0"}
1
```
@ -266,9 +266,9 @@ $ echo 'SELECT number FROM system.numbers LIMIT 10' | curl 'http://localhost:812
Прогресс выполнения запроса можно отслеживать с помощью заголовков ответа `X-ClickHouse-Progress`. Для этого включите [send_progress_in_http_headers](../operations/settings/settings.md#settings-send_progress_in_http_headers). Пример последовательности заголовков:
``` text
X-ClickHouse-Progress: {"read_rows":"2752512","read_bytes":"240570816","total_rows_to_read":"8880128"}
X-ClickHouse-Progress: {"read_rows":"5439488","read_bytes":"482285394","total_rows_to_read":"8880128"}
X-ClickHouse-Progress: {"read_rows":"8783786","read_bytes":"819092887","total_rows_to_read":"8880128"}
X-ClickHouse-Progress: {"read_rows":"2752512","read_bytes":"240570816","total_rows_to_read":"8880128","peak_memory_usage":"4371480"}
X-ClickHouse-Progress: {"read_rows":"5439488","read_bytes":"482285394","total_rows_to_read":"8880128","peak_memory_usage":"13621616"}
X-ClickHouse-Progress: {"read_rows":"8783786","read_bytes":"819092887","total_rows_to_read":"8880128","peak_memory_usage":"23155600"}
```
Возможные поля заголовка:
@ -529,7 +529,7 @@ $ curl -vv -H 'XXX:xxx' 'http://localhost:8123/hi'
< Content-Type: text/html; charset=UTF-8
< Transfer-Encoding: chunked
< Keep-Alive: timeout=3
< X-ClickHouse-Summary: {"read_rows":"0","read_bytes":"0","written_rows":"0","written_bytes":"0","total_rows_to_read":"0"}
< X-ClickHouse-Summary: {"read_rows":"0","read_bytes":"0","written_rows":"0","written_bytes":"0","total_rows_to_read":"0","peak_memory_usage":"0"}
<
* Connection #0 to host localhost left intact
Say Hi!%
@ -569,7 +569,7 @@ $ curl -v -H 'XXX:xxx' 'http://localhost:8123/get_config_static_handler'
< Content-Type: text/plain; charset=UTF-8
< Transfer-Encoding: chunked
< Keep-Alive: timeout=3
< X-ClickHouse-Summary: {"read_rows":"0","read_bytes":"0","written_rows":"0","written_bytes":"0","total_rows_to_read":"0"}
< X-ClickHouse-Summary: {"read_rows":"0","read_bytes":"0","written_rows":"0","written_bytes":"0","total_rows_to_read":"0","peak_memory_usage":"0"}
<
* Connection #0 to host localhost left intact
<html ng-app="SMI2"><head><base href="http://ui.tabix.io/"></head><body><div ui-view="" class="content-ui"></div><script src="http://loader.tabix.io/master.js"></script></body></html>%
@ -621,7 +621,7 @@ $ curl -vv -H 'XXX:xxx' 'http://localhost:8123/get_absolute_path_static_handler'
< Content-Type: text/html; charset=UTF-8
< Transfer-Encoding: chunked
< Keep-Alive: timeout=3
< X-ClickHouse-Summary: {"read_rows":"0","read_bytes":"0","written_rows":"0","written_bytes":"0","total_rows_to_read":"0"}
< X-ClickHouse-Summary: {"read_rows":"0","read_bytes":"0","written_rows":"0","written_bytes":"0","total_rows_to_read":"0","peak_memory_usage":"0"}
<
<html><body>Absolute Path File</body></html>
* Connection #0 to host localhost left intact
@ -640,7 +640,7 @@ $ curl -vv -H 'XXX:xxx' 'http://localhost:8123/get_relative_path_static_handler'
< Content-Type: text/html; charset=UTF-8
< Transfer-Encoding: chunked
< Keep-Alive: timeout=3
< X-ClickHouse-Summary: {"read_rows":"0","read_bytes":"0","written_rows":"0","written_bytes":"0","total_rows_to_read":"0"}
< X-ClickHouse-Summary: {"read_rows":"0","read_bytes":"0","written_rows":"0","written_bytes":"0","total_rows_to_read":"0","peak_memory_usage":"0"}
<
<html><body>Relative Path File</body></html>
* Connection #0 to host localhost left intact

View File

@ -85,6 +85,40 @@ $ cat /etc/clickhouse-server/users.d/alice.xml
Сервер следит за изменениями конфигурационных файлов, а также файлов и ZooKeeper-узлов, которые были использованы при выполнении подстановок и переопределений, и перезагружает настройки пользователей и кластеров на лету. То есть, можно изменять кластера, пользователей и их настройки без перезапуска сервера.
## Шифрование {#encryption}
Вы можете использовать симметричное шифрование для зашифровки элемента конфигурации, например, поля password. Чтобы это сделать, сначала настройте [кодек шифрования](../sql-reference/statements/create/table.md#encryption-codecs), затем добавьте аттибут`encryption_codec` с именем кодека шифрования как значение к элементу, который надо зашифровать.
В отличии от аттрибутов `from_zk`, `from_env` и `incl` (или элемента `include`), подстановка, т.е. расшифровка зашифрованного значения, не выподняется в файле предобработки. Расшифровка происходит только во время исполнения в серверном процессе.
Пример:
```xml
<clickhouse>
<encryption_codecs>
<aes_128_gcm_siv>
<key_hex>00112233445566778899aabbccddeeff</key_hex>
</aes_128_gcm_siv>
</encryption_codecs>
<interserver_http_credentials>
<user>admin</user>
<password encryption_codec="AES_128_GCM_SIV">961F000000040000000000EEDDEF4F453CFE6457C4234BD7C09258BD651D85</password>
</interserver_http_credentials>
</clickhouse>
```
Чтобы получить зашифрованное значение может быть использовано приложение-пример `encrypt_decrypt` .
Пример:
``` bash
./encrypt_decrypt /etc/clickhouse-server/config.xml -e AES_128_GCM_SIV abcd
```
``` text
961F000000040000000000EEDDEF4F453CFE6457C4234BD7C09258BD651D85
```
## Примеры записи конфигурации на YAML {#example}
Здесь можно рассмотреть пример реальной конфигурации записанной на YAML: [config.yaml.example](https://github.com/ClickHouse/ClickHouse/blob/master/programs/server/config.yaml.example).

View File

@ -182,7 +182,7 @@ ALTER TABLE visits MODIFY COLUMN browser Array(String)
Синтаксис:
```sql
ALTER TABLE table_name MODIFY column_name REMOVE property;
ALTER TABLE table_name MODIFY COLUMN column_name REMOVE property;
```
**Пример**

View File

@ -53,7 +53,7 @@ Connection: Close
Content-Type: text/tab-separated-values; charset=UTF-8
X-ClickHouse-Server-Display-Name: clickhouse.ru-central1.internal
X-ClickHouse-Query-Id: 5abe861c-239c-467f-b955-8a201abb8b7f
X-ClickHouse-Summary: {"read_rows":"0","read_bytes":"0","written_rows":"0","written_bytes":"0","total_rows_to_read":"0"}
X-ClickHouse-Summary: {"read_rows":"0","read_bytes":"0","written_rows":"0","written_bytes":"0","total_rows_to_read":"0","peak_memory_usage":"0"}
1
```
@ -262,9 +262,9 @@ $ echo 'SELECT number FROM system.numbers LIMIT 10' | curl 'http://localhost:812
您可以在`X-ClickHouse-Progress`响应头中收到查询进度的信息。为此,启用[Http Header携带进度](../operations/settings/settings.md#settings-send_progress_in_http_headers)。示例:
``` text
X-ClickHouse-Progress: {"read_rows":"2752512","read_bytes":"240570816","total_rows_to_read":"8880128"}
X-ClickHouse-Progress: {"read_rows":"5439488","read_bytes":"482285394","total_rows_to_read":"8880128"}
X-ClickHouse-Progress: {"read_rows":"8783786","read_bytes":"819092887","total_rows_to_read":"8880128"}
X-ClickHouse-Progress: {"read_rows":"2752512","read_bytes":"240570816","total_rows_to_read":"8880128","peak_memory_usage":"4371480"}
X-ClickHouse-Progress: {"read_rows":"5439488","read_bytes":"482285394","total_rows_to_read":"8880128","peak_memory_usage":"13621616"}
X-ClickHouse-Progress: {"read_rows":"8783786","read_bytes":"819092887","total_rows_to_read":"8880128","peak_memory_usage":"23155600"}
```
显示字段信息:
@ -363,7 +363,7 @@ $ curl -v 'http://localhost:8123/predefined_query'
< X-ClickHouse-Format: Template
< X-ClickHouse-Timezone: Asia/Shanghai
< Keep-Alive: timeout=3
< X-ClickHouse-Summary: {"read_rows":"0","read_bytes":"0","written_rows":"0","written_bytes":"0","total_rows_to_read":"0"}
< X-ClickHouse-Summary: {"read_rows":"0","read_bytes":"0","written_rows":"0","written_bytes":"0","total_rows_to_read":"0","peak_memory_usage":"0"}
<
# HELP "Query" "Number of executing queries"
# TYPE "Query" counter
@ -521,7 +521,7 @@ $ curl -vv -H 'XXX:xxx' 'http://localhost:8123/hi'
< Content-Type: text/html; charset=UTF-8
< Transfer-Encoding: chunked
< Keep-Alive: timeout=3
< X-ClickHouse-Summary: {"read_rows":"0","read_bytes":"0","written_rows":"0","written_bytes":"0","total_rows_to_read":"0"}
< X-ClickHouse-Summary: {"read_rows":"0","read_bytes":"0","written_rows":"0","written_bytes":"0","total_rows_to_read":"0","peak_memory_usage":"0"}
<
* Connection #0 to host localhost left intact
Say Hi!%
@ -561,7 +561,7 @@ $ curl -v -H 'XXX:xxx' 'http://localhost:8123/get_config_static_handler'
< Content-Type: text/plain; charset=UTF-8
< Transfer-Encoding: chunked
< Keep-Alive: timeout=3
< X-ClickHouse-Summary: {"read_rows":"0","read_bytes":"0","written_rows":"0","written_bytes":"0","total_rows_to_read":"0"}
< X-ClickHouse-Summary: {"read_rows":"0","read_bytes":"0","written_rows":"0","written_bytes":"0","total_rows_to_read":"0","peak_memory_usage":"0"}
<
* Connection #0 to host localhost left intact
<html ng-app="SMI2"><head><base href="http://ui.tabix.io/"></head><body><div ui-view="" class="content-ui"></div><script src="http://loader.tabix.io/master.js"></script></body></html>%
@ -613,7 +613,7 @@ $ curl -vv -H 'XXX:xxx' 'http://localhost:8123/get_absolute_path_static_handler'
< Content-Type: text/html; charset=UTF-8
< Transfer-Encoding: chunked
< Keep-Alive: timeout=3
< X-ClickHouse-Summary: {"read_rows":"0","read_bytes":"0","written_rows":"0","written_bytes":"0","total_rows_to_read":"0"}
< X-ClickHouse-Summary: {"read_rows":"0","read_bytes":"0","written_rows":"0","written_bytes":"0","total_rows_to_read":"0","peak_memory_usage":"0"}
<
<html><body>Absolute Path File</body></html>
* Connection #0 to host localhost left intact
@ -632,7 +632,7 @@ $ curl -vv -H 'XXX:xxx' 'http://localhost:8123/get_relative_path_static_handler'
< Content-Type: text/html; charset=UTF-8
< Transfer-Encoding: chunked
< Keep-Alive: timeout=3
< X-ClickHouse-Summary: {"read_rows":"0","read_bytes":"0","written_rows":"0","written_bytes":"0","total_rows_to_read":"0"}
< X-ClickHouse-Summary: {"read_rows":"0","read_bytes":"0","written_rows":"0","written_bytes":"0","total_rows_to_read":"0","peak_memory_usage":"0"}
<
<html><body>Relative Path File</body></html>
* Connection #0 to host localhost left intact

View File

@ -80,6 +80,7 @@ if (BUILD_STANDALONE_KEEPER)
${CMAKE_CURRENT_SOURCE_DIR}/../../src/Compression/CompressedReadBuffer.cpp
${CMAKE_CURRENT_SOURCE_DIR}/../../src/Compression/CompressedReadBufferFromFile.cpp
${CMAKE_CURRENT_SOURCE_DIR}/../../src/Compression/CompressedWriteBuffer.cpp
${CMAKE_CURRENT_SOURCE_DIR}/../../src/Compression/CompressionCodecEncrypted.cpp
${CMAKE_CURRENT_SOURCE_DIR}/../../src/Compression/CompressionCodecLZ4.cpp
${CMAKE_CURRENT_SOURCE_DIR}/../../src/Compression/CompressionCodecMultiple.cpp
${CMAKE_CURRENT_SOURCE_DIR}/../../src/Compression/CompressionCodecNone.cpp

View File

@ -747,6 +747,7 @@ try
std::lock_guard lock(servers_lock);
metrics.reserve(servers_to_start_before_tables.size() + servers.size());
for (const auto & server : servers_to_start_before_tables)
metrics.emplace_back(ProtocolServerMetrics{server.getPortName(), server.currentThreads()});
@ -1476,16 +1477,18 @@ try
/// Load global settings from default_profile and system_profile.
global_context->setDefaultProfiles(config());
const Settings & settings = global_context->getSettingsRef();
/// Initialize background executors after we load default_profile config.
/// This is needed to load proper values of background_pool_size etc.
global_context->initializeBackgroundExecutorsIfNeeded();
if (settings.async_insert_threads)
if (server_settings.async_insert_threads)
{
global_context->setAsynchronousInsertQueue(std::make_shared<AsynchronousInsertQueue>(
global_context,
settings.async_insert_threads));
server_settings.async_insert_threads,
server_settings.async_insert_queue_flush_on_shutdown));
}
size_t mark_cache_size = server_settings.mark_cache_size;
String mark_cache_policy = server_settings.mark_cache_policy;

View File

@ -182,6 +182,7 @@ enum class AccessType
M(SYSTEM_SYNC_FILE_CACHE, "SYNC FILE CACHE", GLOBAL, SYSTEM) \
M(SYSTEM_FLUSH_DISTRIBUTED, "FLUSH DISTRIBUTED", TABLE, SYSTEM_FLUSH) \
M(SYSTEM_FLUSH_LOGS, "FLUSH LOGS", GLOBAL, SYSTEM_FLUSH) \
M(SYSTEM_FLUSH_ASYNC_INSERT_QUEUE, "FLUSH ASYNC INSERT QUEUE", GLOBAL, SYSTEM_FLUSH) \
M(SYSTEM_FLUSH, "", GROUP, SYSTEM) \
M(SYSTEM_THREAD_FUZZER, "SYSTEM START THREAD FUZZER, SYSTEM STOP THREAD FUZZER, START THREAD FUZZER, STOP THREAD FUZZER", GLOBAL, SYSTEM) \
M(SYSTEM_UNFREEZE, "SYSTEM UNFREEZE", GLOBAL, SYSTEM) \

View File

@ -49,6 +49,7 @@ void registerAggregateFunctionsAny(AggregateFunctionFactory & factory)
AggregateFunctionProperties properties = { .returns_default_when_only_null = false, .is_order_dependent = true };
factory.registerFunction("any", { createAggregateFunctionAny, properties });
factory.registerAlias("any_value", "any", AggregateFunctionFactory::CaseInsensitive);
factory.registerFunction("anyLast", { createAggregateFunctionAnyLast, properties });
factory.registerFunction("anyHeavy", { createAggregateFunctionAnyHeavy, properties });

View File

@ -66,7 +66,7 @@ AggregateFunctionPtr createAggregateFunctionSimpleLinearRegression(
#undef FOR_LEASTSQR_TYPES
#undef DISPATCH
throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT ,
throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT,
"Illegal types ({}, {}) of arguments of aggregate function {}, must "
"be Native Ints, Native UInts or Floats", x_arg->getName(), y_arg->getName(), name);
}

View File

@ -1195,6 +1195,8 @@ void ClientBase::onProfileEvents(Block & block)
thread_times[host_name].system_ms = value;
else if (event_name == MemoryTracker::USAGE_EVENT_NAME)
thread_times[host_name].memory_usage = value;
else if (event_name == MemoryTracker::PEAK_USAGE_EVENT_NAME)
thread_times[host_name].peak_memory_usage = value;
}
progress_indication.updateThreadEventData(thread_times);

View File

@ -110,7 +110,7 @@ public:
/// Returns false if queue is finished
[[nodiscard]] bool pushFront(const T & x)
{
return emplaceImpl</* back= */ false>(/* timeout_milliseconds= */ std::nullopt , x);
return emplaceImpl</* back= */ false>(/* timeout_milliseconds= */ std::nullopt, x);
}
/// Returns false if queue is finished

View File

@ -26,6 +26,14 @@
#include <IO/WriteBufferFromString.h>
#include <IO/Operators.h>
#if USE_SSL
#include <format>
#include <IO/BufferWithOwnMemory.h>
#include <Compression/ICompressionCodec.h>
#include <Compression/CompressionCodecEncrypted.h>
#include <boost/algorithm/hex.hpp>
#endif
#define PREPROCESSED_SUFFIX "-preprocessed"
namespace fs = std::filesystem;
@ -39,6 +47,9 @@ namespace ErrorCodes
{
extern const int FILE_DOESNT_EXIST;
extern const int CANNOT_LOAD_CONFIG;
#if USE_SSL
extern const int BAD_ARGUMENTS;
#endif
}
/// For cutting preprocessed path to this base
@ -177,6 +188,72 @@ static void mergeAttributes(Element & config_element, Element & with_element)
with_element_attributes->release();
}
#if USE_SSL
std::string ConfigProcessor::encryptValue(const std::string & codec_name, const std::string & value)
{
EncryptionMethod method = getEncryptionMethod(codec_name);
CompressionCodecEncrypted codec(method);
Memory<> memory;
memory.resize(codec.getCompressedReserveSize(static_cast<UInt32>(value.size())));
auto bytes_written = codec.compress(value.data(), static_cast<UInt32>(value.size()), memory.data());
auto encrypted_value = std::string(memory.data(), bytes_written);
std::string hex_value;
boost::algorithm::hex(encrypted_value.begin(), encrypted_value.end(), std::back_inserter(hex_value));
return hex_value;
}
std::string ConfigProcessor::decryptValue(const std::string & codec_name, const std::string & value)
{
EncryptionMethod method = getEncryptionMethod(codec_name);
CompressionCodecEncrypted codec(method);
Memory<> memory;
std::string encrypted_value;
try
{
boost::algorithm::unhex(value, std::back_inserter(encrypted_value));
}
catch (const std::exception &)
{
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Cannot read encrypted text, check for valid characters [0-9a-fA-F] and length");
}
memory.resize(codec.readDecompressedBlockSize(encrypted_value.data()));
codec.decompress(encrypted_value.data(), static_cast<UInt32>(encrypted_value.size()), memory.data());
std::string decrypted_value = std::string(memory.data(), memory.size());
return decrypted_value;
}
void ConfigProcessor::decryptRecursive(Poco::XML::Node * config_root)
{
for (Node * node = config_root->firstChild(); node; node = node->nextSibling())
{
if (node->nodeType() == Node::ELEMENT_NODE)
{
Element & element = dynamic_cast<Element &>(*node);
if (element.hasAttribute("encryption_codec"))
{
const NodeListPtr children = element.childNodes();
if (children->length() != 1)
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Encrypted node {} cannot contain nested elements", node->nodeName());
Node * text_node = node->firstChild();
if (text_node->nodeType() != Node::TEXT_NODE)
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Encrypted node {} should have text node", node->nodeName());
auto encryption_codec = element.getAttribute("encryption_codec");
text_node->setNodeValue(decryptValue(encryption_codec, text_node->getNodeValue()));
}
decryptRecursive(node);
}
}
}
#endif
void ConfigProcessor::mergeRecursive(XMLDocumentPtr config, Node * config_root, const Node * with_root)
{
const NodeListPtr with_nodes = with_root->childNodes();
@ -694,7 +771,19 @@ ConfigProcessor::LoadedConfig ConfigProcessor::loadConfigWithZooKeeperIncludes(
return LoadedConfig{configuration, has_zk_includes, !processed_successfully, config_xml, path};
}
void ConfigProcessor::savePreprocessedConfig(const LoadedConfig & loaded_config, std::string preprocessed_dir)
#if USE_SSL
void ConfigProcessor::decryptEncryptedElements(LoadedConfig & loaded_config)
{
CompressionCodecEncrypted::Configuration::instance().tryLoad(*loaded_config.configuration, "encryption_codecs");
Node * config_root = getRootNode(loaded_config.preprocessed_xml.get());
decryptRecursive(config_root);
loaded_config.configuration = new Poco::Util::XMLConfiguration(loaded_config.preprocessed_xml);
}
#endif
void ConfigProcessor::savePreprocessedConfig(LoadedConfig & loaded_config, std::string preprocessed_dir)
{
try
{
@ -749,6 +838,12 @@ void ConfigProcessor::savePreprocessedConfig(const LoadedConfig & loaded_config,
{
LOG_WARNING(log, "Couldn't save preprocessed config to {}: {}", preprocessed_path, e.displayText());
}
#if USE_SSL
std::string preprocessed_file_name = fs::path(preprocessed_path).filename();
if (preprocessed_file_name == "config.xml" || preprocessed_file_name == std::format("config{}.xml", PREPROCESSED_SUFFIX))
decryptEncryptedElements(loaded_config);
#endif
}
void ConfigProcessor::setConfigPath(const std::string & config_path)

View File

@ -97,7 +97,7 @@ public:
/// Save preprocessed config to specified directory.
/// If preprocessed_dir is empty - calculate from loaded_config.path + /preprocessed_configs/
void savePreprocessedConfig(const LoadedConfig & loaded_config, std::string preprocessed_dir);
void savePreprocessedConfig(LoadedConfig & loaded_config, std::string preprocessed_dir);
/// Set path of main config.xml. It will be cut from all configs placed to preprocessed_configs/
static void setConfigPath(const std::string & config_path);
@ -109,6 +109,14 @@ public:
/// Is the file named as result of config preprocessing, not as original files.
static bool isPreprocessedFile(const std::string & config_path);
#if USE_SSL
/// Encrypt text value
static std::string encryptValue(const std::string & codec_name, const std::string & value);
/// Decrypt value
static std::string decryptValue(const std::string & codec_name, const std::string & value);
#endif
static inline const auto SUBSTITUTION_ATTRS = {"incl", "from_zk", "from_env"};
private:
@ -127,6 +135,13 @@ private:
using NodePtr = Poco::AutoPtr<Poco::XML::Node>;
#if USE_SSL
void decryptRecursive(Poco::XML::Node * config_root);
/// Decrypt elements in config with specified encryption attributes
void decryptEncryptedElements(LoadedConfig & loaded_config);
#endif
void mergeRecursive(XMLDocumentPtr config, Poco::XML::Node * config_root, const Poco::XML::Node * with_root);
void merge(XMLDocumentPtr config, XMLDocumentPtr with);

View File

@ -110,9 +110,23 @@ namespace
}
else
{
Poco::AutoPtr<Poco::XML::Element> xml_key = xml_document->createElement(key);
parent_xml_node.appendChild(xml_key);
processNode(value_node, *xml_key);
if (key == "#text" && value_node.IsScalar())
{
for (Node * child_node = parent_xml_node.firstChild(); child_node; child_node = child_node->nextSibling())
if (child_node->nodeType() == Node::TEXT_NODE)
throw Exception(ErrorCodes::CANNOT_PARSE_YAML,
"YAMLParser has encountered node with several text nodes "
"and cannot continue parsing of the file");
std::string value = value_node.as<std::string>();
Poco::AutoPtr<Poco::XML::Text> xml_value = xml_document->createTextNode(value);
parent_xml_node.appendChild(xml_value);
}
else
{
Poco::AutoPtr<Poco::XML::Element> xml_key = xml_document->createElement(key);
parent_xml_node.appendChild(xml_key);
processNode(value_node, *xml_key);
}
}
}
break;

View File

@ -113,13 +113,19 @@ public:
if ((reinterpret_cast<uintptr_t>(p) & 2048) == 0)
{
memcpy(&n[0], p, 8);
n[0] &= -1ULL >> s;
if constexpr (std::endian::native == std::endian::little)
n[0] &= -1ULL >> s;
else
n[0] &= -1ULL << s;
}
else
{
const char * lp = x.data + x.size - 8;
memcpy(&n[0], lp, 8);
n[0] >>= s;
if constexpr (std::endian::native == std::endian::little)
n[0] >>= s;
else
n[0] <<= s;
}
auto res = hash(k8);
auto buck = getBucketFromHash(res);
@ -131,7 +137,10 @@ public:
memcpy(&n[0], p, 8);
const char * lp = x.data + x.size - 8;
memcpy(&n[1], lp, 8);
n[1] >>= s;
if constexpr (std::endian::native == std::endian::little)
n[1] >>= s;
else
n[1] <<= s;
auto res = hash(k16);
auto buck = getBucketFromHash(res);
keyHolderDiscardKey(key_holder);
@ -142,7 +151,10 @@ public:
memcpy(&n[0], p, 16);
const char * lp = x.data + x.size - 8;
memcpy(&n[2], lp, 8);
n[2] >>= s;
if constexpr (std::endian::native == std::endian::little)
n[2] >>= s;
else
n[2] <<= s;
auto res = hash(k24);
auto buck = getBucketFromHash(res);
keyHolderDiscardKey(key_holder);

View File

@ -95,6 +95,7 @@ private:
public:
static constexpr auto USAGE_EVENT_NAME = "MemoryTrackerUsage";
static constexpr auto PEAK_USAGE_EVENT_NAME = "MemoryTrackerPeakUsage";
explicit MemoryTracker(VariableContext level_ = VariableContext::Thread);
explicit MemoryTracker(MemoryTracker * parent_, VariableContext level_ = VariableContext::Thread);

View File

@ -58,8 +58,8 @@
M(TableFunctionExecute, "Number of table function calls.") \
M(MarkCacheHits, "Number of times an entry has been found in the mark cache, so we didn't have to load a mark file.") \
M(MarkCacheMisses, "Number of times an entry has not been found in the mark cache, so we had to load a mark file in memory, which is a costly operation, adding to query latency.") \
M(QueryCacheHits, "Number of times a query result has been found in the query cache (and query computation was avoided).") \
M(QueryCacheMisses, "Number of times a query result has not been found in the query cache (and required query computation).") \
M(QueryCacheHits, "Number of times a query result has been found in the query cache (and query computation was avoided). Only updated for SELECT queries with SETTING use_query_cache = 1.") \
M(QueryCacheMisses, "Number of times a query result has not been found in the query cache (and required query computation). Only updated for SELECT queries with SETTING use_query_cache = 1.") \
M(CreatedReadBufferOrdinary, "Number of times ordinary read buffer was created for reading data (while choosing among other read methods).") \
M(CreatedReadBufferDirectIO, "Number of times a read buffer with O_DIRECT was created for reading data (while choosing among other read methods).") \
M(CreatedReadBufferDirectIOFailed, "Number of times a read buffer with O_DIRECT was attempted to be created for reading data (while choosing among other read methods), but the OS did not allow it (due to lack of filesystem support or other reasons) and we fallen back to the ordinary reading method.") \

View File

@ -83,7 +83,7 @@ ProgressIndication::MemoryUsage ProgressIndication::getMemoryUsage() const
[](MemoryUsage const & acc, auto const & host_data)
{
UInt64 host_usage = host_data.second.memory_usage;
return MemoryUsage{.total = acc.total + host_usage, .max = std::max(acc.max, host_usage)};
return MemoryUsage{.total = acc.total + host_usage, .max = std::max(acc.max, host_usage), .peak = std::max(acc.peak, host_data.second.peak_memory_usage)};
});
}
@ -101,6 +101,9 @@ void ProgressIndication::writeFinalProgress()
<< formatReadableSizeWithDecimalSuffix(progress.read_bytes * 1000000000.0 / elapsed_ns) << "/s.)";
else
std::cout << ". ";
auto peak_memory_usage = getMemoryUsage().peak;
if (peak_memory_usage >= 0)
std::cout << "\nPeak memory usage (for query) " << formatReadableSizeWithBinarySuffix(peak_memory_usage) << ".";
}
void ProgressIndication::writeProgress(WriteBufferFromFileDescriptor & message)
@ -152,7 +155,7 @@ void ProgressIndication::writeProgress(WriteBufferFromFileDescriptor & message)
std::string profiling_msg;
double cpu_usage = getCPUUsage();
auto [memory_usage, max_host_usage] = getMemoryUsage();
auto [memory_usage, max_host_usage, peak_usage] = getMemoryUsage();
if (cpu_usage > 0 || memory_usage > 0)
{

View File

@ -22,6 +22,9 @@ struct ThreadEventData
UInt64 user_ms = 0;
UInt64 system_ms = 0;
UInt64 memory_usage = 0;
// -1 used as flag 'is not show for old servers'
Int64 peak_memory_usage = -1;
};
using HostToTimesMap = std::unordered_map<String, ThreadEventData>;
@ -64,6 +67,7 @@ private:
{
UInt64 total = 0;
UInt64 max = 0;
Int64 peak = -1;
};
MemoryUsage getMemoryUsage() const;

View File

@ -82,3 +82,8 @@ endif()
clickhouse_add_executable (interval_tree interval_tree.cpp)
target_link_libraries (interval_tree PRIVATE dbms)
if (ENABLE_SSL)
clickhouse_add_executable (encrypt_decrypt encrypt_decrypt.cpp)
target_link_libraries (encrypt_decrypt PRIVATE dbms)
endif()

View File

@ -0,0 +1,61 @@
#include <Common/Config/ConfigProcessor.h>
#include <Compression/ICompressionCodec.h>
#include <Compression/CompressionCodecEncrypted.h>
#include <iostream>
/** This test program encrypts or decrypts text values using a symmetric encryption codec like AES_128_GCM_SIV or AES_256_GCM_SIV.
* Keys for codecs are loaded from <encryption_codecs> section of configuration file.
*
* How to use:
* ./encrypt_decrypt /etc/clickhouse-server/config.xml -e AES_128_GCM_SIV text_to_encrypt
*/
int main(int argc, char ** argv)
{
try
{
if (argc != 5)
{
std::cerr << "Usage:" << std::endl
<< " " << argv[0] << " path action codec value" << std::endl
<< "path: path to configuration file." << std::endl
<< "action: -e for encryption and -d for decryption." << std::endl
<< "codec: AES_128_GCM_SIV or AES_256_GCM_SIV." << std::endl << std::endl
<< "Example:" << std::endl
<< " ./encrypt_decrypt /etc/clickhouse-server/config.xml -e AES_128_GCM_SIV text_to_encrypt";
return 3;
}
std::string action = argv[2];
std::string codec_name = argv[3];
std::string value = argv[4];
DB::ConfigProcessor processor(argv[1], false, true);
auto loaded_config = processor.loadConfig();
DB::CompressionCodecEncrypted::Configuration::instance().tryLoad(*loaded_config.configuration, "encryption_codecs");
if (action == "-e")
std::cout << processor.encryptValue(codec_name, value) << std::endl;
else if (action == "-d")
std::cout << processor.decryptValue(codec_name, value) << std::endl;
else
std::cerr << "Unknown action: " << action << std::endl;
}
catch (Poco::Exception & e)
{
std::cerr << "Exception: " << e.displayText() << std::endl;
return 1;
}
catch (std::exception & e)
{
std::cerr << "std::exception: " << e.what() << std::endl;
return 3;
}
catch (...)
{
std::cerr << "Some exception" << std::endl;
return 2;
}
return 0;
}

View File

@ -52,20 +52,8 @@ static bool parseNumber(const String & description, size_t l, size_t r, size_t &
}
/* Parse a string that generates shards and replicas. Separator - one of two characters | or ,
* depending on whether shards or replicas are generated.
* For example:
* host1,host2,... - generates set of shards from host1, host2, ...
* host1|host2|... - generates set of replicas from host1, host2, ...
* abc{8..10}def - generates set of shards abc8def, abc9def, abc10def.
* abc{08..10}def - generates set of shards abc08def, abc09def, abc10def.
* abc{x,yy,z}def - generates set of shards abcxdef, abcyydef, abczdef.
* abc{x|yy|z} def - generates set of replicas abcxdef, abcyydef, abczdef.
* abc{1..9}de{f,g,h} - is a direct product, 27 shards.
* abc{1..9}de{0|1} - is a direct product, 9 shards, in each 2 replicas.
*/
std::vector<String>
parseRemoteDescription(const String & description, size_t l, size_t r, char separator, size_t max_addresses, const String & func_name)
std::vector<String> parseRemoteDescription(
const String & description, size_t l, size_t r, char separator, size_t max_addresses, const String & func_name)
{
std::vector<String> res;
std::vector<String> cur;

View File

@ -3,7 +3,7 @@
#include <vector>
namespace DB
{
/* Parse a string that generates shards and replicas. Separator - one of two characters | or ,
/* Parse a string that generates shards and replicas. Separator - one of two characters '|' or ','
* depending on whether shards or replicas are generated.
* For example:
* host1,host2,... - generates set of shards from host1, host2, ...

View File

@ -44,4 +44,15 @@ String backQuoteIfNeed(StringRef x)
return res;
}
String backQuoteMySQL(StringRef x)
{
String res(x.size, '\0');
{
WriteBufferFromString wb(res);
writeBackQuotedStringMySQL(x, wb);
}
return res;
}
}

View File

@ -24,4 +24,7 @@ String backQuote(StringRef x);
/// Quote the identifier with backquotes, if required.
String backQuoteIfNeed(StringRef x);
/// Quote the identifier with backquotes, for use in MySQL queries.
String backQuoteMySQL(StringRef x);
}

View File

@ -27,7 +27,7 @@ TEST(Common, SensitiveDataMasker)
{
Poco::AutoPtr<Poco::Util::XMLConfiguration> empty_xml_config = new Poco::Util::XMLConfiguration();
DB::SensitiveDataMasker masker(*empty_xml_config , "");
DB::SensitiveDataMasker masker(*empty_xml_config, "");
masker.addMaskingRule("all a letters", "a+", "--a--");
masker.addMaskingRule("all b letters", "b+", "--b--");
masker.addMaskingRule("all d letters", "d+", "--d--");
@ -45,7 +45,7 @@ TEST(Common, SensitiveDataMasker)
masker.printStats();
#endif
DB::SensitiveDataMasker masker2(*empty_xml_config , "");
DB::SensitiveDataMasker masker2(*empty_xml_config, "");
masker2.addMaskingRule("hide root password", "qwerty123", "******");
masker2.addMaskingRule("hide SSN", "[0-9]{3}-[0-9]{2}-[0-9]{4}", "000-00-0000");
masker2.addMaskingRule("hide email", "[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\\.[A-Za-z]{2,4}", "hidden@hidden.test");
@ -58,7 +58,7 @@ TEST(Common, SensitiveDataMasker)
"SELECT id FROM mysql('localhost:3308', 'database', 'table', 'root', '******') WHERE "
"ssn='000-00-0000' or email='hidden@hidden.test'");
DB::SensitiveDataMasker maskerbad(*empty_xml_config , "");
DB::SensitiveDataMasker maskerbad(*empty_xml_config, "");
// gtest has not good way to check exception content, so just do it manually (see https://github.com/google/googletest/issues/952 )
try

View File

@ -398,6 +398,14 @@ UInt32 CompressionCodecDeflateQpl::doCompressData(const char * source, UInt32 so
return res;
}
inline void touchBufferWithZeroFilling(char * buffer, UInt32 buffer_size)
{
for (char * p = buffer; p < buffer + buffer_size; p += ::getPageSize()/(sizeof(*p)))
{
*p = 0;
}
}
void CompressionCodecDeflateQpl::doDecompressData(const char * source, UInt32 source_size, char * dest, UInt32 uncompressed_size) const
{
/// QPL library is using AVX-512 with some shuffle operations.
@ -405,6 +413,10 @@ void CompressionCodecDeflateQpl::doDecompressData(const char * source, UInt32 so
#if defined(MEMORY_SANITIZER)
__msan_unpoison(dest, uncompressed_size);
#endif
/// Device IOTLB miss has big perf. impact for IAA accelerators.
/// To avoid page fault, we need touch buffers related to accelerator in advance.
touchBufferWithZeroFilling(dest, uncompressed_size);
switch (getDecompressMode())
{
case CodecMode::Synchronous:

View File

@ -28,6 +28,17 @@ namespace DB
namespace ErrorCodes
{
extern const int OPENSSL_ERROR;
extern const int BAD_ARGUMENTS;
}
EncryptionMethod getEncryptionMethod(const std::string & name)
{
if (name == "AES_128_GCM_SIV")
return AES_128_GCM_SIV;
else if (name == "AES_256_GCM_SIV")
return AES_256_GCM_SIV;
else
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Wrong encryption method. Got {}", name);
}
namespace
@ -63,7 +74,7 @@ uint8_t getMethodCode(EncryptionMethod Method)
}
else
{
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Wrong encryption Method. Got {}", getMethodName(Method));
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Wrong encryption method. Got {}", getMethodName(Method));
}
}
@ -79,7 +90,6 @@ namespace ErrorCodes
{
extern const int ILLEGAL_SYNTAX_FOR_CODEC_TYPE;
extern const int LOGICAL_ERROR;
extern const int BAD_ARGUMENTS;
extern const int INCORRECT_DATA;
}
@ -104,7 +114,7 @@ UInt64 methodKeySize(EncryptionMethod Method)
}
else
{
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Wrong encryption Method. Got {}", getMethodName(Method));
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Wrong encryption method. Got {}", getMethodName(Method));
}
}
@ -129,7 +139,7 @@ auto getMethod(EncryptionMethod Method)
}
else
{
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Wrong encryption Method. Got {}", getMethodName(Method));
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Wrong encryption method. Got {}", getMethodName(Method));
}
}
@ -205,7 +215,7 @@ auto getMethod(EncryptionMethod Method)
}
else
{
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Wrong encryption Method. Got {}", getMethodName(Method));
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Wrong encryption method. Got {}", getMethodName(Method));
}
}
@ -578,7 +588,7 @@ String CompressionCodecEncrypted::Configuration::getKey(EncryptionMethod method,
if (current_params->keys_storage[method].contains(key_id))
key = current_params->keys_storage[method].at(key_id);
else
throw Exception(ErrorCodes::BAD_ARGUMENTS, "There is no key {} in config", key_id);
throw Exception(ErrorCodes::BAD_ARGUMENTS, "There is no key {} in config for {} encryption codec", key_id, getMethodName(method));
return key;
}

View File

@ -18,6 +18,9 @@ enum EncryptionMethod
MAX_ENCRYPTION_METHOD
};
/// Get method for string name. Throw exception for wrong name.
EncryptionMethod getEncryptionMethod(const std::string & name);
/** This codec encrypts and decrypts blocks with AES-128 in
* GCM-SIV mode (RFC-8452), which is the only cipher currently
* supported. Although it is implemented as a compression codec

View File

@ -40,7 +40,7 @@ void deserializeSnapshotMagic(ReadBuffer & in)
Coordination::read(dbid, in);
static constexpr int32_t SNP_HEADER = 1514885966; /// "ZKSN"
if (magic_header != SNP_HEADER)
throw Exception(ErrorCodes::CORRUPTED_DATA ,"Incorrect magic header in file, expected {}, got {}", SNP_HEADER, magic_header);
throw Exception(ErrorCodes::CORRUPTED_DATA, "Incorrect magic header in file, expected {}, got {}", SNP_HEADER, magic_header);
}
int64_t deserializeSessionAndTimeout(KeeperStorage & storage, ReadBuffer & in)

View File

@ -48,6 +48,8 @@ namespace DB
M(UInt64, merges_mutations_memory_usage_soft_limit, 0, "Limit on total memory usage for merges and mutations. Zero means Unlimited.", 0) \
M(Double, merges_mutations_memory_usage_to_ram_ratio, 0.5, "Same as merges_mutations_memory_usage_soft_limit but in to ram ratio. Allows to lower memory limit on low-memory systems.", 0) \
M(Bool, allow_use_jemalloc_memory, true, "Allows to use jemalloc memory.", 0) \
M(UInt64, async_insert_threads, 16, "Maximum number of threads to actually parse and insert data in background. Zero means asynchronous mode is disabled", 0) \
M(Bool, async_insert_queue_flush_on_shutdown, true, "If true queue of asynchronous inserts is flushed on graceful shutdown", 0) \
\
M(UInt64, max_concurrent_queries, 0, "Limit on total number of concurrently executed queries. Zero means Unlimited.", 0) \
M(UInt64, max_concurrent_insert_queries, 0, "Limit on total number of concurrently insert queries. Zero means Unlimited.", 0) \

View File

@ -534,7 +534,6 @@ class IColumn;
M(Bool, convert_query_to_cnf, false, "Convert SELECT query to CNF", 0) \
M(Bool, optimize_or_like_chain, false, "Optimize multiple OR LIKE into multiMatchAny. This optimization should not be enabled by default, because it defies index analysis in some cases.", 0) \
M(Bool, optimize_arithmetic_operations_in_aggregate_functions, true, "Move arithmetic operations out of aggregation functions", 0) \
M(Bool, optimize_duplicate_order_by_and_distinct, false, "Remove duplicate ORDER BY and DISTINCT if it's possible", 0) \
M(Bool, optimize_redundant_functions_in_order_by, true, "Remove functions from ORDER BY if its argument is also in ORDER BY", 0) \
M(Bool, optimize_if_chain_to_multiif, false, "Replace if(cond1, then1, if(cond2, ...)) chains to multiIf. Currently it's not beneficial for numeric types.", 0) \
M(Bool, optimize_multiif_to_if, true, "Replace 'multiIf' with only one condition to 'if'.", 0) \
@ -623,6 +622,7 @@ class IColumn;
M(Bool, engine_file_allow_create_multiple_files, false, "Enables or disables creating a new file on each insert in file engine tables if format has suffix.", 0) \
M(Bool, engine_file_skip_empty_files, false, "Allows to skip empty files in file table engine", 0) \
M(Bool, engine_url_skip_empty_files, false, "Allows to skip empty files in url table engine", 0) \
M(Bool, disable_url_encoding, false, " Allows to disable decoding/encoding path in uri in URL table engine", 0) \
M(Bool, allow_experimental_database_replicated, false, "Allow to create databases with Replicated engine", 0) \
M(UInt64, database_replicated_initial_query_timeout_sec, 300, "How long initial DDL query should wait for Replicated database to precess previous DDL queue entries", 0) \
M(Bool, database_replicated_enforce_synchronous_settings, false, "Enforces synchronous waiting for some queries (see also database_atomic_wait_for_drop_and_detach_synchronously, mutation_sync, alter_sync). Not recommended to enable these settings.", 0) \
@ -659,7 +659,8 @@ class IColumn;
M(UInt64, limit, 0, "Limit on read rows from the most 'end' result for select query, default 0 means no limit length", 0) \
M(UInt64, offset, 0, "Offset on read rows from the most 'end' result for select query", 0) \
\
M(UInt64, function_range_max_elements_in_block, 500000000, "Maximum number of values generated by function 'range' per block of data (sum of array sizes for every row in a block, see also 'max_block_size' and 'min_insert_block_size_rows'). It is a safety threshold.", 0) \
M(UInt64, function_range_max_elements_in_block, 500000000, "Maximum number of values generated by function `range` per block of data (sum of array sizes for every row in a block, see also 'max_block_size' and 'min_insert_block_size_rows'). It is a safety threshold.", 0) \
M(UInt64, function_sleep_max_microseconds_per_block, 3000000, "Maximum number of microseconds the function `sleep` is allowed to sleep for each block. If a user called it with a larger value, it throws an exception. It is a safety threshold.", 0) \
M(ShortCircuitFunctionEvaluation, short_circuit_function_evaluation, ShortCircuitFunctionEvaluation::ENABLE, "Setting for short-circuit function evaluation configuration. Possible values: 'enable' - use short-circuit function evaluation for functions that are suitable for it, 'disable' - disable short-circuit function evaluation, 'force_enable' - use short-circuit function evaluation for all functions.", 0) \
\
M(LocalFSReadMethod, storage_file_read_method, LocalFSReadMethod::pread, "Method of reading data from storage file, one of: read, pread, mmap. The mmap method does not apply to clickhouse-server (it's intended for clickhouse-local).", 0) \
@ -674,7 +675,6 @@ class IColumn;
M(UInt64, merge_tree_min_bytes_per_task_for_remote_reading, 4 * DBMS_DEFAULT_BUFFER_SIZE, "Min bytes to read per task.", 0) \
M(Bool, merge_tree_use_const_size_tasks_for_remote_reading, true, "Whether to use constant size tasks for reading from a remote table.", 0) \
\
M(UInt64, async_insert_threads, 16, "Maximum number of threads to actually parse and insert data in background. Zero means asynchronous mode is disabled", 0) \
M(Bool, async_insert, false, "If true, data from INSERT query is stored in queue and later flushed to table in background. If wait_for_async_insert is false, INSERT query is processed almost instantly, otherwise client will wait until data will be flushed to table", 0) \
M(Bool, wait_for_async_insert, true, "If true wait for processing of asynchronous insertion", 0) \
M(Seconds, wait_for_async_insert_timeout, DBMS_DEFAULT_LOCK_ACQUIRE_TIMEOUT_SEC, "Timeout for waiting for processing asynchronous insertion", 0) \
@ -820,6 +820,7 @@ class IColumn;
MAKE_DEPRECATED_BY_SERVER_CONFIG(M, UInt64, background_distributed_schedule_pool_size, 16) \
MAKE_DEPRECATED_BY_SERVER_CONFIG(M, UInt64, max_remote_read_network_bandwidth_for_server, 0) \
MAKE_DEPRECATED_BY_SERVER_CONFIG(M, UInt64, max_remote_write_network_bandwidth_for_server, 0) \
MAKE_DEPRECATED_BY_SERVER_CONFIG(M, UInt64, async_insert_threads, 16) \
MAKE_DEPRECATED_BY_SERVER_CONFIG(M, UInt64, max_replicated_fetches_network_bandwidth_for_server, 0) \
MAKE_DEPRECATED_BY_SERVER_CONFIG(M, UInt64, max_replicated_sends_network_bandwidth_for_server, 0) \
/* ---- */ \
@ -831,6 +832,7 @@ class IColumn;
MAKE_OBSOLETE(M, Seconds, drain_timeout, 3) \
MAKE_OBSOLETE(M, UInt64, backup_threads, 16) \
MAKE_OBSOLETE(M, UInt64, restore_threads, 16) \
MAKE_OBSOLETE(M, Bool, optimize_duplicate_order_by_and_distinct, false) \
/** The section above is for obsolete settings. Do not add anything there. */

View File

@ -80,6 +80,7 @@ namespace SettingsChangesHistory
/// It's used to implement `compatibility` setting (see https://github.com/ClickHouse/ClickHouse/issues/35972)
static std::map<ClickHouseVersion, SettingsChangesHistory::SettingsChanges> settings_changes_history =
{
{"23.7", {{"function_sleep_max_microseconds_per_block", 0, 3000000, "In previous versions, the maximum sleep time of 3 seconds was applied only for `sleep`, but not for `sleepEachRow` function. In the new version, we introduce this setting. If you set compatibility with the previous versions, we will disable the limit altogether."}}},
{"23.6", {{"http_send_timeout", 180, 30, "3 minutes seems crazy long. Note that this is timeout for a single network write call, not for the whole upload operation."},
{"http_receive_timeout", 180, 30, "See http_send_timeout."}}},
{"23.5", {{"input_format_parquet_preserve_order", true, false, "Allow Parquet reader to reorder rows for better parallelism."},

View File

@ -121,7 +121,7 @@ GTEST_TEST(SettingMySQLDataTypesSupport, SetString)
ASSERT_EQ(Field("decimal,datetime64"), setting);
// comma with spaces
setting = " datetime64 , decimal ";
setting = " datetime64 , decimal "; /// bad punctuation is ok here
ASSERT_TRUE(setting.changed);
ASSERT_TRUE(setting.value.isSet(MySQLDataTypesSupport::DECIMAL));
ASSERT_TRUE(setting.value.isSet(MySQLDataTypesSupport::DATETIME64));
@ -166,4 +166,3 @@ GTEST_TEST(SettingMySQLDataTypesSupport, SetInvalidString)
ASSERT_TRUE(setting.changed);
ASSERT_EQ(0, setting.value.getValue());
}

View File

@ -174,7 +174,7 @@ template <typename A> struct ResultOfBitNot
* Float<x>, [U]Int<y> -> Float<max(x, y*2)>
* Decimal<x>, Decimal<y> -> Decimal<max(x,y)>
* UUID, UUID -> UUID
* UInt64 , Int<x> -> Error
* UInt64, Int<x> -> Error
* Float<x>, [U]Int64 -> Error
*/
template <typename A, typename B>

View File

@ -524,6 +524,7 @@ void DatabaseReplicated::startupTables(ThreadPool & thread_pool, LoadingStrictne
ddl_worker = std::make_unique<DatabaseReplicatedDDLWorker>(this, getContext());
ddl_worker->startup();
ddl_worker_initialized = true;
}
bool DatabaseReplicated::checkDigestValid(const ContextPtr & local_context, bool debug_check /* = true */) const
@ -1155,6 +1156,7 @@ void DatabaseReplicated::stopReplication()
void DatabaseReplicated::shutdown()
{
stopReplication();
ddl_worker_initialized = false;
ddl_worker = nullptr;
DatabaseAtomic::shutdown();
}
@ -1299,7 +1301,7 @@ bool DatabaseReplicated::canExecuteReplicatedMetadataAlter() const
/// It may update the metadata digest (both locally and in ZooKeeper)
/// before DatabaseReplicatedDDLWorker::initializeReplication() has finished.
/// We should not update metadata until the database is initialized.
return ddl_worker && ddl_worker->isCurrentlyActive();
return ddl_worker_initialized && ddl_worker->isCurrentlyActive();
}
void DatabaseReplicated::detachTablePermanently(ContextPtr local_context, const String & table_name)

View File

@ -134,6 +134,7 @@ private:
std::atomic_bool is_readonly = true;
std::atomic_bool is_probably_dropped = false;
std::atomic_bool is_recovering = false;
std::atomic_bool ddl_worker_initialized = false;
std::unique_ptr<DatabaseReplicatedDDLWorker> ddl_worker;
UInt32 max_log_ptr_at_creation = 0;

View File

@ -292,7 +292,7 @@ void DatabaseWithOwnTablesBase::shutdown()
for (const auto & kv : tables_snapshot)
{
kv.second->flush();
kv.second->flushAndPrepareForShutdown();
}
for (const auto & kv : tables_snapshot)

View File

@ -4,6 +4,7 @@
#include <Databases/MySQL/MaterializedMySQLSyncThread.h>
#include <Databases/MySQL/tryParseTableIDFromDDL.h>
#include <Databases/MySQL/tryQuoteUnrecognizedTokens.h>
#include <cstdlib>
#include <random>
#include <string_view>
@ -342,9 +343,8 @@ static inline String rewriteMysqlQueryColumn(mysqlxx::Pool::Entry & connection,
{ std::make_shared<DataTypeString>(), "column_type" }
};
const String & query = "SELECT COLUMN_NAME AS column_name, COLUMN_TYPE AS column_type FROM INFORMATION_SCHEMA.COLUMNS"
" WHERE TABLE_SCHEMA = '" + backQuoteIfNeed(database_name) +
"' AND TABLE_NAME = '" + backQuoteIfNeed(table_name) + "' ORDER BY ORDINAL_POSITION";
String query = "SELECT COLUMN_NAME AS column_name, COLUMN_TYPE AS column_type FROM INFORMATION_SCHEMA.COLUMNS"
" WHERE TABLE_SCHEMA = '" + database_name + "' AND TABLE_NAME = '" + table_name + "' ORDER BY ORDINAL_POSITION";
StreamSettings mysql_input_stream_settings(global_settings, false, true);
auto mysql_source = std::make_unique<MySQLSource>(connection, query, tables_columns_sample_block, mysql_input_stream_settings);
@ -812,6 +812,7 @@ void MaterializedMySQLSyncThread::executeDDLAtomic(const QueryEvent & query_even
CurrentThread::QueryScope query_scope(query_context);
String query = query_event.query;
tryQuoteUnrecognizedTokens(query, query);
if (!materialized_tables_list.empty())
{
auto table_id = tryParseTableIDFromDDL(query, query_event.schema);

View File

@ -0,0 +1,289 @@
#include <gtest/gtest.h>
#include <Databases/MySQL/tryQuoteUnrecognizedTokens.h>
using namespace DB;
struct TestCase
{
String query;
String res;
bool ok;
TestCase(
const String & query_,
const String & res_,
bool ok_)
: query(query_)
, res(res_)
, ok(ok_)
{
}
};
std::ostream & operator<<(std::ostream & ostr, const TestCase & test_case)
{
return ostr << '"' << test_case.query << "\" -> \"" << test_case.res << "\" ok:" << test_case.ok;
}
class QuoteUnrecognizedTokensTest : public ::testing::TestWithParam<TestCase>
{
};
TEST_P(QuoteUnrecognizedTokensTest, escape)
{
const auto & [query, expected, ok] = GetParam();
String actual;
bool res = tryQuoteUnrecognizedTokens(query, actual);
EXPECT_EQ(ok, res);
EXPECT_EQ(expected, actual);
}
INSTANTIATE_TEST_SUITE_P(MaterializedMySQL, QuoteUnrecognizedTokensTest, ::testing::ValuesIn(std::initializer_list<TestCase>{
{
"",
"",
false
},
{
"test '\"`",
"",
false
},
{
"SELECT * FROM db.`table`",
"",
false
},
{
"道渠",
"`道渠`",
true
},
{
"",
"`道`",
true
},
{
"道道(skip) 道(",
"`道道`(skip) `道`(",
true
},
{
"`道渠`",
"",
false
},
{
"'道'",
"",
false
},
{
"\"\"",
"",
false
},
{
"` 道 test 渠 `",
"",
false
},
{
"skip 道 skip 123",
"skip `道` skip 123",
true
},
{
"skip 123 `道` skip",
"",
false
},
{
"skip `道 skip 123",
"",
false
},
{
"skip test道 skip",
"skip `test道` skip",
true
},
{
"test道2test",
"`test道2test`",
true
},
{
"skip test道2test 123",
"skip `test道2test` 123",
true
},
{
"skip 您a您a您a a您a您a您a 1您2您3您4 skip",
"skip `您a您a您a` `a您a您a您a` `1您2您3您4` skip",
true
},
{
"skip 您a 您a您a b您2您c您4 skip",
"skip `您a` `您a您a` `b您2您c您4` skip",
true
},
{
"123您a skip 56_您a 您a2 b_您2_您c123您_a4 skip",
"`123您a` skip `56_您a` `您a2` `b_您2_您c123您_a4` skip",
true
},
{
"_您_ 123 skip 56_您_您_您_您_您_您_您_您_您_a 您a2 abc 123_您_您_321 a1b2c3 aaaaa您您_a4 skip",
"`_您_` 123 skip `56_您_您_您_您_您_您_您_您_您_a` `您a2` abc `123_您_您_321` a1b2c3 `aaaaa您您_a4` skip",
true
},
{
"TABLE 您2 您(",
"TABLE `您2` `您`(",
true
},
{
"TABLE 您.a您2(日2日2 INT",
"TABLE `您`.`a您2`(`日2日2` INT",
true
},
{
"TABLE 您$.a_您2a_($日2日_2 INT, 您Hi好 a您b好c)",
"TABLE `您`$.`a_您2a_`($`日2日_2` INT, `您Hi好` `a您b好c`)",
true
},
{
"TABLE 您a日.您a您a您a(test INT",
"TABLE `您a日`.`您a您a您a`(test INT",
true
},
{
"TABLE 您a日.您a您a您a(Hi您Hi好Hi INT",
"TABLE `您a日`.`您a您a您a`(`Hi您Hi好Hi` INT",
true
},
{
"--TABLE 您a日.您a您a您a(test INT",
"",
false
},
{
"--您a日.您a您a您a(\n您Hi好",
"--您a日.您a您a您a(\n`您Hi好`",
true
},
{
" /* TABLE 您a日.您a您a您a(test INT",
"",
false
},
{
"/*您a日.您a您a您a(*/\n您Hi好",
"/*您a日.您a您a您a(*/\n`您Hi好`",
true
},
{
" 您a日.您您aa您a /* 您a日.您a您a您a */ a您a日a.a您您您a",
" `您a日`.`您您aa您a` /* 您a日.您a您a您a */ `a您a日a`.`a您您您a`",
true
},
//{ TODO
// "TABLE 您2.您a您a您a(test INT",
// "TABLE `您2`.`您a您a您a`(test INT",
// true
//},
{
"skip 您a您a您a skip",
"skip `您a您a您a` skip",
true
},
{
"test 您a2您3a您a 4 again",
"test `您a2您3a您a` 4 again",
true
},
{
"CREATE TABLE db.`道渠`",
"",
false
},
{
"CREATE TABLE db.`道渠",
"",
false
},
{
"CREATE TABLE db.道渠",
"CREATE TABLE db.`道渠`",
true
},
{
"CREATE TABLE db. 道渠",
"CREATE TABLE db. `道渠`",
true
},
{
R"sql(
CREATE TABLE gb2312.`` ( `id` int NOT NULL,
INT,
DATETIME,
test INT, test您 INT, test您test INT,
test INT, test道渠 INT, test道渠test INT,
_ INT, _您 INT, _您_ INT,
__ INT, __您您 INT, __您您__ INT,
2 INT, 2 INT, 22 INT,
22 INT, 22 INT, 2222 INT,
_2 INT, _2您 INT, _2您_2 INT, _2您2_ INT, 2_您_2 INT,
__22 INT, __22您您 INT, __22您您__22 INT, __22您您22__ INT, 22__您您__22 INT,
2_ INT, 2_您 INT, 2_您2_ INT,
22__ INT, 22__您您 INT, 22__您您22__ INT,
_test INT, _test您 INT, _test您_test INT, _test您test_ INT, test_您test_ INT, test_您_test INT,
_test INT, _test您您 INT, _test您您_test INT, _test您您test_ INT, test_您您test_ INT, test_您您_test INT,
test3 INT, test3您 INT, test3您test3 INT, test3您3test INT,
test3 INT, test3您您 INT, test3您您test3 INT, test3您您3test INT,
3test INT, 3test您 INT, 3test您3test INT, 3test您test3 INT,
3test INT, 3test您您 INT, 3test您您3test INT, 3test您您test3 INT,
_test4 INT, _test4您 INT, _test4您_test4 INT, test4_您_test4 INT, _test4您4test_ INT, _test4您test4_ INT,
_test4 INT, _test4您您 INT, _test4您您_test4 INT, test4_您您_test4 INT, _test4您您4test_ INT, _test4您您test4_ INT,
_5test INT, _5test您 INT, _5test您_5test INT, 5test_您_test5 INT, _4test您test4_ INT,
test_日期 varchar(256), test_道_2 varchar(256) NOT NULL ,
test_道渠您_3
BIGINT NOT NULL,
3_test INT,
PRIMARY KEY (`id`)) ENGINE=InnoDB DEFAULT CHARSET=gb2312;
)sql",
R"sql(
CREATE TABLE gb2312.`` ( `id` int NOT NULL,
`` INT,
`` DATETIME,
`test` INT, `test您` INT, `test您test` INT,
`test` INT, `test道渠` INT, `test道渠test` INT,
`_` INT, `_您` INT, `_您_` INT,
`__` INT, `__您您` INT, `__您您__` INT,
`2` INT, `2` INT, `22` INT,
`22` INT, `22` INT, `2222` INT,
`_2` INT, `_2您` INT, `_2您_2` INT, `_2您2_` INT, `2_您_2` INT,
`__22` INT, `__22您您` INT, `__22您您__22` INT, `__22您您22__` INT, `22__您您__22` INT,
`2_` INT, `2_您` INT, `2_您2_` INT,
`22__` INT, `22__您您` INT, `22__您您22__` INT,
`_test` INT, `_test您` INT, `_test您_test` INT, `_test您test_` INT, `test_您test_` INT, `test_您_test` INT,
`_test` INT, `_test您您` INT, `_test您您_test` INT, `_test您您test_` INT, `test_您您test_` INT, `test_您您_test` INT,
`test3` INT, `test3您` INT, `test3您test3` INT, `test3您3test` INT,
`test3` INT, `test3您您` INT, `test3您您test3` INT, `test3您您3test` INT,
`3test` INT, `3test您` INT, `3test您3test` INT, `3test您test3` INT,
`3test` INT, `3test您您` INT, `3test您您3test` INT, `3test您您test3` INT,
`_test4` INT, `_test4您` INT, `_test4您_test4` INT, `test4_您_test4` INT, `_test4您4test_` INT, `_test4您test4_` INT,
`_test4` INT, `_test4您您` INT, `_test4您您_test4` INT, `test4_您您_test4` INT, `_test4您您4test_` INT, `_test4您您test4_` INT,
`_5test` INT, `_5test您` INT, `_5test您_5test` INT, `5test_您_test5` INT, `_4test您test4_` INT,
`test_日期` varchar(256), `test_道_2` varchar(256) NOT NULL ,
`test_道渠您_3`
BIGINT NOT NULL,
`3_test` INT,
PRIMARY KEY (`id`)) ENGINE=InnoDB DEFAULT CHARSET=gb2312;
)sql",
true
},
}));

View File

@ -0,0 +1,96 @@
#include <Databases/MySQL/tryQuoteUnrecognizedTokens.h>
#include <Parsers/CommonParsers.h>
#include <Common/quoteString.h>
namespace DB
{
/// Checks if there are no any tokens (like whitespaces) between current and previous pos
static bool noWhitespaces(const char * to, const char * from)
{
return static_cast<size_t>(from - to) == 0;
}
/// Checks if the token should be quoted too together with unrecognized
static bool isWordOrNumber(TokenType type)
{
return type == TokenType::BareWord || type == TokenType::Number;
}
static void quoteLiteral(
IParser::Pos & pos,
IParser::Pos & pos_prev,
const char *& pos_unrecognized,
const char *& copy_from,
String & rewritten_query)
{
/// Copy also whitespaces if any
const auto * end =
isWordOrNumber(pos->type) && noWhitespaces(pos_prev->end, pos->begin)
? pos->end
: pos_prev->end;
String literal(pos_unrecognized, static_cast<size_t>(end - pos_unrecognized));
rewritten_query.append(copy_from, pos_unrecognized - copy_from).append(backQuoteMySQL(literal));
copy_from = end;
}
bool tryQuoteUnrecognizedTokens(const String & query, String & res)
{
Tokens tokens(query.data(), query.data() + query.size());
IParser::Pos pos(tokens, 0);
Expected expected;
String rewritten_query;
const char * copy_from = query.data();
auto pos_prev = pos;
const char * pos_unrecognized = nullptr;
for (;pos->type != TokenType::EndOfStream; ++pos)
{
/// Commit quotes if any whitespaces found or the token is not a word
bool commit = !noWhitespaces(pos_prev->end, pos->begin) || (pos->type != TokenType::Error && !isWordOrNumber(pos->type));
if (pos_unrecognized && commit)
{
quoteLiteral(
pos,
pos_prev,
pos_unrecognized,
copy_from,
rewritten_query);
pos_unrecognized = nullptr;
}
if (pos->type == TokenType::Error)
{
/// Find first appearance of the error token
if (!pos_unrecognized)
{
pos_unrecognized =
isWordOrNumber(pos_prev->type) && noWhitespaces(pos_prev->end, pos->begin)
? pos_prev->begin
: pos->begin;
}
}
pos_prev = pos;
}
/// There was EndOfStream but not committed unrecognized token
if (pos_unrecognized)
{
quoteLiteral(
pos,
pos_prev,
pos_unrecognized,
copy_from,
rewritten_query);
pos_unrecognized = nullptr;
}
/// If no Errors found
if (copy_from == query.data())
return false;
auto size = static_cast<size_t>(pos->end - copy_from);
rewritten_query.append(copy_from, size);
res = rewritten_query;
return true;
}
}

View File

@ -0,0 +1,10 @@
#pragma once
#include <base/types.h>
namespace DB
{
bool tryQuoteUnrecognizedTokens(const String & query, String & res);
}

View File

@ -322,7 +322,7 @@ void buildSingleAttribute(
/** Transforms
* PRIMARY KEY Attr1 ,..., AttrN
* PRIMARY KEY Attr1, ..., AttrN
* to the next configuration
* <id><name>Attr1</name></id>
* or

View File

@ -1183,15 +1183,9 @@ public:
|| (left_tuple && right_tuple && left_tuple->getElements().size() == right_tuple->getElements().size())
|| (arguments[0]->equals(*arguments[1]))))
{
try
{
getLeastSupertype(arguments);
}
catch (const Exception &)
{
if (!tryGetLeastSupertype(arguments))
throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Illegal types of arguments ({}, {})"
" of function {}", arguments[0]->getName(), arguments[1]->getName(), getName());
}
}
if (left_tuple && right_tuple)

View File

@ -292,8 +292,8 @@ struct SimHashImpl
continue;
// we need to store the new word hash value to the oldest location.
// for example, N = 5, array |a0|a1|a2|a3|a4|, now , a0 is the oldest location,
// so we need to store new word hash into location of a0, then ,this array become
// for example, N = 5, array |a0|a1|a2|a3|a4|, now, a0 is the oldest location,
// so we need to store new word hash into location of a0, then this array become
// |a5|a1|a2|a3|a4|, next time, a1 become the oldest location, we need to store new
// word hash value into location of a1, then array become |a5|a6|a2|a3|a4|
words[offset] = BytesRef{word_start, length};
@ -793,4 +793,3 @@ REGISTER_FUNCTION(StringHash)
factory.registerFunction<FunctionWordShingleMinHashArgCaseInsensitiveUTF8>();
}
}

View File

@ -375,14 +375,14 @@ bool sliceHasImplAnyAllImplInt16(
_mm256_or_si256(
_mm256_andnot_si256(
_mm256_shuffle_epi8(_mm256_permute2x128_si256(first_nm_mask, first_nm_mask, 1), _mm256_set_epi8(7,6,5,4,3,2,1,0,31,30,29,28,27,26,25,24,23,22,21,20,19,18,17,16,15,14,13,12,11,10,9,8)),
_mm256_cmpeq_epi16(second_data, _mm256_shuffle_epi8(_mm256_permute2x128_si256(first_data ,first_data, 1), _mm256_set_epi8(7,6,5,4,3,2,1,0,31,30,29,28,27,26,25,24,23,22,21,20,19,18,17,16,15,14,13,12,11,10,9,8)))),
_mm256_cmpeq_epi16(second_data, _mm256_shuffle_epi8(_mm256_permute2x128_si256(first_data, first_data, 1), _mm256_set_epi8(7,6,5,4,3,2,1,0,31,30,29,28,27,26,25,24,23,22,21,20,19,18,17,16,15,14,13,12,11,10,9,8)))),
_mm256_andnot_si256(
_mm256_shuffle_epi8(_mm256_permute2x128_si256(first_nm_mask, first_nm_mask, 1), _mm256_set_epi8(5,4,3,2,1,0,31,30,29,28,27,26,25,24,23,22,21,20,19,18,17,16,15,14,13,12,11,10,9,8,7,6)),
_mm256_cmpeq_epi16(second_data, _mm256_shuffle_epi8(_mm256_permute2x128_si256(first_data, first_data, 1), _mm256_set_epi8(5,4,3,2,1,0,31,30,29,28,27,26,25,24,23,22,21,20,19,18,17,16,15,14,13,12,11,10,9,8,7,6))))),
_mm256_or_si256(
_mm256_andnot_si256(
_mm256_shuffle_epi8(_mm256_permute2x128_si256(first_nm_mask, first_nm_mask, 1), _mm256_set_epi8(3,2,1,0,31,30,29,28,27,26,25,24,23,22,21,20,19,18,17,16,15,14,13,12,11,10,9,8,7,6,5,4)),
_mm256_cmpeq_epi16(second_data, _mm256_shuffle_epi8(_mm256_permute2x128_si256(first_data ,first_data ,1), _mm256_set_epi8(3,2,1,0,31,30,29,28,27,26,25,24,23,22,21,20,19,18,17,16,15,14,13,12,11,10,9,8,7,6,5,4)))),
_mm256_cmpeq_epi16(second_data, _mm256_shuffle_epi8(_mm256_permute2x128_si256(first_data, first_data, 1), _mm256_set_epi8(3,2,1,0,31,30,29,28,27,26,25,24,23,22,21,20,19,18,17,16,15,14,13,12,11,10,9,8,7,6,5,4)))),
_mm256_andnot_si256(
_mm256_shuffle_epi8(_mm256_permute2x128_si256(first_nm_mask, first_nm_mask, 1), _mm256_set_epi8(1,0,31,30,29,28,27,26,25,24,23,22,21,20,19,18,17,16,15,14,13,12,11,10,9,8,7,6,5,4,3,2)),
_mm256_cmpeq_epi16(second_data, _mm256_shuffle_epi8(_mm256_permute2x128_si256(first_data, first_data, 1), _mm256_set_epi8(1,0,31,30,29,28,27,26,25,24,23,22,21,20,19,18,17,16,15,14,13,12,11,10,9,8,7,6,5,4,3,2))))))

View File

@ -0,0 +1,376 @@
#include <Functions/GregorianDate.h>
#include <Common/Exception.h>
#include <IO/ReadBuffer.h>
#include <IO/ReadHelpers.h>
#include <IO/WriteBufferFromString.h>
#include <IO/WriteHelpers.h>
namespace DB
{
namespace ErrorCodes
{
extern const int CANNOT_PARSE_INPUT_ASSERTION_FAILED;
extern const int CANNOT_PARSE_DATE;
extern const int CANNOT_FORMAT_DATETIME;
extern const int LOGICAL_ERROR;
}
namespace
{
inline constexpr bool is_leap_year(int32_t year)
{
return (year % 4 == 0) && ((year % 400 == 0) || (year % 100 != 0));
}
inline constexpr uint8_t monthLength(bool is_leap_year, uint8_t month)
{
switch (month)
{
case 1: return 31;
case 2: return is_leap_year ? 29 : 28;
case 3: return 31;
case 4: return 30;
case 5: return 31;
case 6: return 30;
case 7: return 31;
case 8: return 31;
case 9: return 30;
case 10: return 31;
case 11: return 30;
case 12: return 31;
default:
std::terminate();
}
}
/** Integer division truncated toward negative infinity.
*/
template <typename I, typename J>
inline constexpr I div(I x, J y)
{
const auto y_cast = static_cast<I>(y);
if (x > 0 && y_cast < 0)
return ((x - 1) / y_cast) - 1;
else if (x < 0 && y_cast > 0)
return ((x + 1) / y_cast) - 1;
else
return x / y_cast;
}
/** Integer modulus, satisfying div(x, y)*y + mod(x, y) == x.
*/
template <typename I, typename J>
inline constexpr I mod(I x, J y)
{
const auto y_cast = static_cast<I>(y);
const auto r = x % y_cast;
if ((x > 0 && y_cast < 0) || (x < 0 && y_cast > 0))
return r == 0 ? static_cast<I>(0) : r + y_cast;
else
return r;
}
/** Like std::min(), but the type of operands may differ.
*/
template <typename I, typename J>
inline constexpr I min(I x, J y)
{
const auto y_cast = static_cast<I>(y);
return x < y_cast ? x : y_cast;
}
inline char readDigit(ReadBuffer & in)
{
char c;
if (!in.read(c))
throw Exception(ErrorCodes::CANNOT_PARSE_INPUT_ASSERTION_FAILED, "Cannot parse input: expected a digit at the end of stream");
else if (c < '0' || c > '9')
throw Exception(ErrorCodes::CANNOT_PARSE_INPUT_ASSERTION_FAILED, "Cannot read input: expected a digit but got something else");
else
return c - '0';
}
inline bool tryReadDigit(ReadBuffer & in, char & c)
{
if (in.read(c) && c >= '0' && c <= '9')
{
c -= '0';
return true;
}
return false;
}
}
void GregorianDate::init(ReadBuffer & in)
{
year_ = readDigit(in) * 1000
+ readDigit(in) * 100
+ readDigit(in) * 10
+ readDigit(in);
assertChar('-', in);
month_ = readDigit(in) * 10
+ readDigit(in);
assertChar('-', in);
day_of_month_ = readDigit(in) * 10
+ readDigit(in);
assertEOF(in);
if (month_ < 1 || month_ > 12 || day_of_month_ < 1 || day_of_month_ > monthLength(is_leap_year(year_), month_))
throw Exception(ErrorCodes::CANNOT_PARSE_DATE, "Invalid date, out of range (year: {}, month: {}, day_of_month: {}).");
}
bool GregorianDate::tryInit(ReadBuffer & in)
{
char c[8];
if ( !tryReadDigit(in, c[0])
|| !tryReadDigit(in, c[1])
|| !tryReadDigit(in, c[2])
|| !tryReadDigit(in, c[3])
|| !checkChar('-', in)
|| !tryReadDigit(in, c[4])
|| !tryReadDigit(in, c[5])
|| !checkChar('-', in)
|| !tryReadDigit(in, c[6])
|| !tryReadDigit(in, c[7])
|| !in.eof())
{
return false;
}
year_ = c[0] * 1000 + c[1] * 100 + c[2] * 10 + c[3];
month_ = c[4] * 10 + c[5];
day_of_month_ = c[6] * 10 + c[7];
if (month_ < 1 || month_ > 12 || day_of_month_ < 1 || day_of_month_ > monthLength(is_leap_year(year_), month_))
return false;
return true;
}
GregorianDate::GregorianDate(ReadBuffer & in)
{
init(in);
}
void GregorianDate::init(int64_t modified_julian_day)
{
const OrdinalDate ord(modified_julian_day);
const MonthDay md(is_leap_year(ord.year()), ord.dayOfYear());
year_ = ord.year();
month_ = md.month();
day_of_month_ = md.dayOfMonth();
}
bool GregorianDate::tryInit(int64_t modified_julian_day)
{
OrdinalDate ord;
if (!ord.tryInit(modified_julian_day))
return false;
MonthDay md(is_leap_year(ord.year()), ord.dayOfYear());
year_ = ord.year();
month_ = md.month();
day_of_month_ = md.dayOfMonth();
return true;
}
GregorianDate::GregorianDate(int64_t modified_julian_day)
{
init(modified_julian_day);
}
int64_t GregorianDate::toModifiedJulianDay() const
{
const MonthDay md(month_, day_of_month_);
const auto day_of_year = md.dayOfYear(is_leap_year(year_));
const OrdinalDate ord(year_, day_of_year);
return ord.toModifiedJulianDay();
}
bool GregorianDate::tryToModifiedJulianDay(int64_t & res) const
{
const MonthDay md(month_, day_of_month_);
const auto day_of_year = md.dayOfYear(is_leap_year(year_));
OrdinalDate ord;
if (!ord.tryInit(year_, day_of_year))
return false;
res = ord.toModifiedJulianDay();
return true;
}
template <typename ReturnType>
ReturnType GregorianDate::writeImpl(WriteBuffer & buf) const
{
if (year_ < 0 || year_ > 9999)
{
if constexpr (std::is_same_v<ReturnType, void>)
throw Exception(ErrorCodes::CANNOT_FORMAT_DATETIME,
"Impossible to stringify: year too big or small: {}", year_);
else
return false;
}
else
{
auto y = year_;
writeChar('0' + y / 1000, buf); y %= 1000;
writeChar('0' + y / 100, buf); y %= 100;
writeChar('0' + y / 10, buf); y %= 10;
writeChar('0' + y , buf);
writeChar('-', buf);
auto m = month_;
writeChar('0' + m / 10, buf); m %= 10;
writeChar('0' + m , buf);
writeChar('-', buf);
auto d = day_of_month_;
writeChar('0' + d / 10, buf); d %= 10;
writeChar('0' + d , buf);
}
return ReturnType(true);
}
std::string GregorianDate::toString() const
{
WriteBufferFromOwnString buf;
write(buf);
return buf.str();
}
void OrdinalDate::init(int32_t year, uint16_t day_of_year)
{
year_ = year;
day_of_year_ = day_of_year;
if (day_of_year < 1 || day_of_year > (is_leap_year(year) ? 366 : 365))
throw Exception(ErrorCodes::LOGICAL_ERROR, "Invalid ordinal date: {}-{}", year, day_of_year);
}
bool OrdinalDate::tryInit(int32_t year, uint16_t day_of_year)
{
year_ = year;
day_of_year_ = day_of_year;
return !(day_of_year < 1 || day_of_year > (is_leap_year(year) ? 366 : 365));
}
void OrdinalDate::init(int64_t modified_julian_day)
{
if (!tryInit(modified_julian_day))
throw Exception(
ErrorCodes::CANNOT_FORMAT_DATETIME,
"Value cannot be represented as date because it's out of range");
}
bool OrdinalDate::tryInit(int64_t modified_julian_day)
{
/// This function supports day number from -678941 to 2973119 (which represent 0000-01-01 and 9999-12-31 respectively).
if (modified_julian_day < -678941)
return false;
if (modified_julian_day > 2973119)
return false;
const auto a = modified_julian_day + 678575;
const auto quad_cent = div(a, 146097);
const auto b = mod(a, 146097);
const auto cent = min(div(b, 36524), 3);
const auto c = b - cent * 36524;
const auto quad = div(c, 1461);
const auto d = mod(c, 1461);
const auto y = min(div(d, 365), 3);
day_of_year_ = d - y * 365 + 1;
year_ = static_cast<int32_t>(quad_cent * 400 + cent * 100 + quad * 4 + y + 1);
return true;
}
OrdinalDate::OrdinalDate(int32_t year, uint16_t day_of_year)
{
init(year, day_of_year);
}
OrdinalDate::OrdinalDate(int64_t modified_julian_day)
{
init(modified_julian_day);
}
int64_t OrdinalDate::toModifiedJulianDay() const noexcept
{
const auto y = year_ - 1;
return day_of_year_
+ 365 * y
+ div(y, 4)
- div(y, 100)
+ div(y, 400)
- 678576;
}
MonthDay::MonthDay(uint8_t month, uint8_t day_of_month)
: month_(month)
, day_of_month_(day_of_month)
{
if (month < 1 || month > 12)
throw Exception(ErrorCodes::LOGICAL_ERROR, "Invalid month: {}", month);
/* We can't validate day_of_month here, because we don't know if
* it's a leap year. */
}
MonthDay::MonthDay(bool is_leap_year, uint16_t day_of_year)
{
if (day_of_year < 1 || day_of_year > (is_leap_year ? 366 : 365))
throw Exception(ErrorCodes::LOGICAL_ERROR, "Invalid day of year: {}{}",
(is_leap_year ? "leap, " : "non-leap, "), day_of_year);
month_ = 1;
uint16_t d = day_of_year;
while (true)
{
const auto len = monthLength(is_leap_year, month_);
if (d <= len)
break;
++month_;
d -= len;
}
day_of_month_ = d;
}
uint16_t MonthDay::dayOfYear(bool is_leap_year) const
{
if (day_of_month_ < 1 || day_of_month_ > monthLength(is_leap_year, month_))
{
throw Exception(ErrorCodes::LOGICAL_ERROR, "Invalid day of month: {}{}-{}",
(is_leap_year ? "leap, " : "non-leap, "), month_, day_of_month_);
}
const auto k = month_ <= 2 ? 0 : is_leap_year ? -1 :-2;
return (367 * month_ - 362) / 12 + k + day_of_month_;
}
template void GregorianDate::writeImpl<void>(WriteBuffer & buf) const;
template bool GregorianDate::writeImpl<bool>(WriteBuffer & buf) const;
}

View File

@ -1,408 +1,155 @@
#pragma once
#include <base/extended_types.h>
#include <Common/Exception.h>
#include <Core/Types.h>
#include <IO/ReadBuffer.h>
#include <IO/ReadHelpers.h>
#include <IO/WriteBufferFromString.h>
#include <IO/WriteHelpers.h>
#include <cstdint>
namespace DB
{
namespace ErrorCodes
{
extern const int CANNOT_PARSE_INPUT_ASSERTION_FAILED;
extern const int CANNOT_PARSE_DATE;
extern const int CANNOT_FORMAT_DATETIME;
extern const int LOGICAL_ERROR;
}
/** Proleptic Gregorian calendar date. YearT is an integral type
class ReadBuffer;
class WriteBuffer;
/// Proleptic Gregorian calendar date.
class GregorianDate
{
public:
GregorianDate() {}
void init(ReadBuffer & in);
bool tryInit(ReadBuffer & in);
/** Construct from date in text form 'YYYY-MM-DD' by reading from
* ReadBuffer.
*/
explicit GregorianDate(ReadBuffer & in);
void init(int64_t modified_julian_day);
bool tryInit(int64_t modified_julian_day);
/** Construct from Modified Julian Day. The type T is an
* integral type which should be at least 32 bits wide, and
* should preferably signed.
*/
explicit GregorianDate(int64_t modified_julian_day);
/** Convert to Modified Julian Day. The type T is an integral type
* which should be at least 32 bits wide, and should preferably
* be signed.
*/
template <typename YearT = int32_t>
class GregorianDate
* signed.
*/
int64_t toModifiedJulianDay() const;
bool tryToModifiedJulianDay(int64_t & res) const;
/** Write the date in text form 'YYYY-MM-DD' to a buffer.
*/
void write(WriteBuffer & buf) const
{
public:
/** Construct from date in text form 'YYYY-MM-DD' by reading from
* ReadBuffer.
*/
explicit GregorianDate(ReadBuffer & in);
writeImpl<void>(buf);
}
/** Construct from Modified Julian Day. The type T is an
* integral type which should be at least 32 bits wide, and
* should preferably signed.
*/
explicit GregorianDate(is_integer auto modified_julian_day);
/** Convert to Modified Julian Day. The type T is an integral type
* which should be at least 32 bits wide, and should preferably
* signed.
*/
template <is_integer T>
T toModifiedJulianDay() const;
/** Write the date in text form 'YYYY-MM-DD' to a buffer.
*/
void write(WriteBuffer & buf) const;
/** Convert to a string in text form 'YYYY-MM-DD'.
*/
std::string toString() const;
YearT year() const noexcept
{
return year_;
}
uint8_t month() const noexcept
{
return month_;
}
uint8_t day_of_month() const noexcept /// NOLINT
{
return day_of_month_;
}
private:
YearT year_; /// NOLINT
uint8_t month_; /// NOLINT
uint8_t day_of_month_; /// NOLINT
};
/** ISO 8601 Ordinal Date. YearT is an integral type which should
* be at least 32 bits wide, and should preferably signed.
*/
template <typename YearT = int32_t>
class OrdinalDate
bool tryWrite(WriteBuffer & buf) const
{
public:
OrdinalDate(YearT year, uint16_t day_of_year);
return writeImpl<bool>(buf);
}
/** Construct from Modified Julian Day. The type T is an
* integral type which should be at least 32 bits wide, and
* should preferably signed.
*/
template <is_integer DayT>
explicit OrdinalDate(DayT modified_julian_day);
/** Convert to a string in text form 'YYYY-MM-DD'.
*/
std::string toString() const;
/** Convert to Modified Julian Day. The type T is an integral
* type which should be at least 32 bits wide, and should
* preferably be signed.
*/
template <is_integer T>
T toModifiedJulianDay() const noexcept;
YearT year() const noexcept
{
return year_;
}
uint16_t dayOfYear() const noexcept
{
return day_of_year_;
}
private:
YearT year_; /// NOLINT
uint16_t day_of_year_; /// NOLINT
};
class MonthDay
int32_t year() const noexcept
{
public:
/** Construct from month and day. */
MonthDay(uint8_t month, uint8_t day_of_month);
return year_;
}
/** Construct from day of year in Gregorian or Julian
* calendars to month and day.
*/
MonthDay(bool is_leap_year, uint16_t day_of_year);
uint8_t month() const noexcept
{
return month_;
}
/** Convert month and day in Gregorian or Julian calendars to
* day of year.
*/
uint16_t dayOfYear(bool is_leap_year) const;
uint8_t dayOfMonth() const noexcept
{
return day_of_month_;
}
uint8_t month() const noexcept
{
return month_;
}
private:
int32_t year_ = 0;
uint8_t month_ = 0;
uint8_t day_of_month_ = 0;
uint8_t day_of_month() const noexcept /// NOLINT
{
return day_of_month_;
}
template <typename ReturnType>
ReturnType writeImpl(WriteBuffer & buf) const;
};
private:
uint8_t month_; /// NOLINT
uint8_t day_of_month_; /// NOLINT
};
}
/* Implementation */
namespace gd
/** ISO 8601 Ordinal Date.
*/
class OrdinalDate
{
using namespace DB;
public:
OrdinalDate() {}
template <typename YearT>
static inline constexpr bool is_leap_year(YearT year)
{
return (year % 4 == 0) && ((year % 400 == 0) || (year % 100 != 0));
}
void init(int32_t year, uint16_t day_of_year);
bool tryInit(int32_t year, uint16_t day_of_year);
static inline constexpr uint8_t monthLength(bool is_leap_year, uint8_t month)
{
switch (month)
{
case 1: return 31;
case 2: return is_leap_year ? 29 : 28;
case 3: return 31;
case 4: return 30;
case 5: return 31;
case 6: return 30;
case 7: return 31;
case 8: return 31;
case 9: return 30;
case 10: return 31;
case 11: return 30;
case 12: return 31;
default:
std::terminate();
}
}
void init(int64_t modified_julian_day);
bool tryInit(int64_t modified_julian_day);
/** Integer division truncated toward negative infinity.
OrdinalDate(int32_t year, uint16_t day_of_year);
/** Construct from Modified Julian Day. The type T is an
* integral type which should be at least 32 bits wide, and
* should preferably signed.
*/
template <typename I, typename J>
static inline constexpr I div(I x, J y)
{
const auto y_cast = static_cast<I>(y);
if (x > 0 && y_cast < 0)
return ((x - 1) / y_cast) - 1;
else if (x < 0 && y_cast > 0)
return ((x + 1) / y_cast) - 1;
else
return x / y_cast;
}
explicit OrdinalDate(int64_t modified_julian_day);
/** Integer modulus, satisfying div(x, y)*y + mod(x, y) == x.
/** Convert to Modified Julian Day. The type T is an integral
* type which should be at least 32 bits wide, and should
* preferably be signed.
*/
template <typename I, typename J>
static inline constexpr I mod(I x, J y)
int64_t toModifiedJulianDay() const noexcept;
int32_t year() const noexcept
{
const auto y_cast = static_cast<I>(y);
const auto r = x % y_cast;
if ((x > 0 && y_cast < 0) || (x < 0 && y_cast > 0))
return r == 0 ? static_cast<I>(0) : r + y_cast;
else
return r;
return year_;
}
/** Like std::min(), but the type of operands may differ.
*/
template <typename I, typename J>
static inline constexpr I min(I x, J y)
uint16_t dayOfYear() const noexcept
{
const auto y_cast = static_cast<I>(y);
return x < y_cast ? x : y_cast;
return day_of_year_;
}
static inline char readDigit(ReadBuffer & in)
{
char c;
if (!in.read(c))
throw Exception(ErrorCodes::CANNOT_PARSE_INPUT_ASSERTION_FAILED, "Cannot parse input: expected a digit at the end of stream");
else if (c < '0' || c > '9')
throw Exception(ErrorCodes::CANNOT_PARSE_INPUT_ASSERTION_FAILED, "Cannot read input: expected a digit but got something else");
else
return c - '0';
}
}
private:
int32_t year_ = 0;
uint16_t day_of_year_ = 0;
};
namespace DB
class MonthDay
{
template <typename YearT>
GregorianDate<YearT>::GregorianDate(ReadBuffer & in)
public:
/** Construct from month and day. */
MonthDay(uint8_t month, uint8_t day_of_month);
/** Construct from day of year in Gregorian or Julian
* calendars to month and day.
*/
MonthDay(bool is_leap_year, uint16_t day_of_year);
/** Convert month and day in Gregorian or Julian calendars to
* day of year.
*/
uint16_t dayOfYear(bool is_leap_year) const;
uint8_t month() const noexcept
{
year_ = gd::readDigit(in) * 1000
+ gd::readDigit(in) * 100
+ gd::readDigit(in) * 10
+ gd::readDigit(in);
assertChar('-', in);
month_ = gd::readDigit(in) * 10
+ gd::readDigit(in);
assertChar('-', in);
day_of_month_ = gd::readDigit(in) * 10
+ gd::readDigit(in);
assertEOF(in);
if (month_ < 1 || month_ > 12 || day_of_month_ < 1 || day_of_month_ > gd::monthLength(gd::is_leap_year(year_), month_))
throw Exception(ErrorCodes::CANNOT_PARSE_DATE, "Invalid date: {}", toString());
return month_;
}
template <typename YearT>
GregorianDate<YearT>::GregorianDate(is_integer auto modified_julian_day)
uint8_t dayOfMonth() const noexcept
{
const OrdinalDate<YearT> ord(modified_julian_day);
const MonthDay md(gd::is_leap_year(ord.year()), ord.dayOfYear());
year_ = ord.year();
month_ = md.month();
day_of_month_ = md.day_of_month();
return day_of_month_;
}
template <typename YearT>
template <is_integer T>
T GregorianDate<YearT>::toModifiedJulianDay() const
{
const MonthDay md(month_, day_of_month_);
const auto day_of_year = md.dayOfYear(gd::is_leap_year(year_));
const OrdinalDate<YearT> ord(year_, day_of_year);
return ord.template toModifiedJulianDay<T>();
}
private:
uint8_t month_ = 0;
uint8_t day_of_month_ = 0;
};
template <typename YearT>
void GregorianDate<YearT>::write(WriteBuffer & buf) const
{
if (year_ < 0 || year_ > 9999)
{
throw Exception(ErrorCodes::CANNOT_FORMAT_DATETIME,
"Impossible to stringify: year too big or small: {}", DB::toString(year_));
}
else
{
auto y = year_;
writeChar('0' + y / 1000, buf); y %= 1000;
writeChar('0' + y / 100, buf); y %= 100;
writeChar('0' + y / 10, buf); y %= 10;
writeChar('0' + y , buf);
writeChar('-', buf);
auto m = month_;
writeChar('0' + m / 10, buf); m %= 10;
writeChar('0' + m , buf);
writeChar('-', buf);
auto d = day_of_month_;
writeChar('0' + d / 10, buf); d %= 10;
writeChar('0' + d , buf);
}
}
template <typename YearT>
std::string GregorianDate<YearT>::toString() const
{
WriteBufferFromOwnString buf;
write(buf);
return buf.str();
}
template <typename YearT>
OrdinalDate<YearT>::OrdinalDate(YearT year, uint16_t day_of_year)
: year_(year)
, day_of_year_(day_of_year)
{
if (day_of_year < 1 || day_of_year > (gd::is_leap_year(year) ? 366 : 365))
{
throw Exception(ErrorCodes::LOGICAL_ERROR, "Invalid ordinal date: {}-{}", toString(year), toString(day_of_year));
}
}
template <typename YearT>
template <is_integer DayT>
OrdinalDate<YearT>::OrdinalDate(DayT modified_julian_day)
{
/// This function supports day number from -678941 to 2973119 (which represent 0000-01-01 and 9999-12-31 respectively).
if constexpr (is_signed_v<DayT> && std::numeric_limits<DayT>::lowest() < -678941)
if (modified_julian_day < -678941)
throw Exception(
ErrorCodes::CANNOT_FORMAT_DATETIME,
"Value cannot be represented as date because it's out of range");
if constexpr (std::numeric_limits<DayT>::max() > 2973119)
if (modified_julian_day > 2973119)
throw Exception(
ErrorCodes::CANNOT_FORMAT_DATETIME,
"Value cannot be represented as date because it's out of range");
const auto a = modified_julian_day + 678575;
const auto quad_cent = gd::div(a, 146097);
const auto b = gd::mod(a, 146097);
const auto cent = gd::min(gd::div(b, 36524), 3);
const auto c = b - cent * 36524;
const auto quad = gd::div(c, 1461);
const auto d = gd::mod(c, 1461);
const auto y = gd::min(gd::div(d, 365), 3);
day_of_year_ = d - y * 365 + 1;
year_ = static_cast<YearT>(quad_cent * 400 + cent * 100 + quad * 4 + y + 1);
}
template <typename YearT>
template <is_integer T>
T OrdinalDate<YearT>::toModifiedJulianDay() const noexcept
{
const auto y = year_ - 1;
return day_of_year_
+ 365 * y
+ gd::div(y, 4)
- gd::div(y, 100)
+ gd::div(y, 400)
- 678576;
}
inline MonthDay::MonthDay(uint8_t month, uint8_t day_of_month)
: month_(month)
, day_of_month_(day_of_month)
{
if (month < 1 || month > 12)
throw Exception(ErrorCodes::LOGICAL_ERROR, "Invalid month: {}", DB::toString(month));
/* We can't validate day_of_month here, because we don't know if
* it's a leap year. */
}
inline MonthDay::MonthDay(bool is_leap_year, uint16_t day_of_year)
{
if (day_of_year < 1 || day_of_year > (is_leap_year ? 366 : 365))
throw Exception(ErrorCodes::LOGICAL_ERROR, "Invalid day of year: {}{}",
(is_leap_year ? "leap, " : "non-leap, "), DB::toString(day_of_year));
month_ = 1;
uint16_t d = day_of_year;
while (true)
{
const auto len = gd::monthLength(is_leap_year, month_);
if (d <= len)
break;
month_++;
d -= len;
}
day_of_month_ = d;
}
inline uint16_t MonthDay::dayOfYear(bool is_leap_year) const
{
if (day_of_month_ < 1 || day_of_month_ > gd::monthLength(is_leap_year, month_))
{
throw Exception(ErrorCodes::LOGICAL_ERROR, "Invalid day of month: {}{}-{}",
(is_leap_year ? "leap, " : "non-leap, "), DB::toString(month_), DB::toString(day_of_month_));
}
const auto k = month_ <= 2 ? 0 : is_leap_year ? -1 :-2;
return (367 * month_ - 362) / 12 + k + day_of_month_;
}
}

View File

@ -54,7 +54,8 @@ public:
REGISTER_FUNCTION(CurrentDatabase)
{
factory.registerFunction<FunctionCurrentDatabase>();
factory.registerAlias("DATABASE", "currentDatabase", FunctionFactory::CaseInsensitive);
factory.registerAlias("DATABASE", FunctionCurrentDatabase::name, FunctionFactory::CaseInsensitive);
factory.registerAlias("current_database", FunctionCurrentDatabase::name, FunctionFactory::CaseInsensitive);
}
}

View File

@ -0,0 +1,88 @@
#include <Functions/IFunction.h>
#include <Functions/FunctionFactory.h>
#include <Interpreters/Context.h>
#include <DataTypes/DataTypeArray.h>
#include <DataTypes/DataTypeString.h>
namespace DB
{
namespace ErrorCodes
{
extern const int ILLEGAL_TYPE_OF_ARGUMENT;
}
namespace
{
class FunctionCurrentSchemas : public IFunction
{
const String db_name;
public:
static constexpr auto name = "currentSchemas";
static FunctionPtr create(ContextPtr context)
{
return std::make_shared<FunctionCurrentSchemas>(context->getCurrentDatabase());
}
explicit FunctionCurrentSchemas(const String & db_name_) :
db_name{db_name_}
{
}
String getName() const override
{
return name;
}
size_t getNumberOfArguments() const override
{
return 1;
}
DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override
{
// For compatibility, function implements the same signature as Postgres'
const bool argument_is_valid = arguments.size() == 1 && isBool(arguments.front());
if (!argument_is_valid)
throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Argument for function {} must be bool", getName());
return std::make_shared<DataTypeArray>(std::make_shared<DataTypeString>());
}
bool isDeterministic() const override { return false; }
bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return false; }
ColumnPtr executeImpl(const ColumnsWithTypeAndName &, const DataTypePtr &, size_t input_rows_count) const override
{
return DataTypeArray(std::make_shared<DataTypeString>())
.createColumnConst(input_rows_count, Array { db_name });
}
};
}
REGISTER_FUNCTION(CurrentSchema)
{
factory.registerFunction<FunctionCurrentSchemas>(FunctionDocumentation
{
.description=R"(
Returns a single-element array with the name of the current database
Requires a boolean parameter, but it is ignored actually. It is required just for compatibility with the implementation of this function in other DB engines.
[example:common]
)",
.examples{
{"common", "SELECT current_schemas(true);", "['default']"}
}
},
FunctionFactory::CaseInsensitive);
factory.registerAlias("current_schemas", FunctionCurrentSchemas::name, FunctionFactory::CaseInsensitive);
}
}

View File

@ -13,12 +13,12 @@
#include <IO/WriteBufferFromVector.h>
#include <IO/WriteHelpers.h>
namespace DB
{
namespace ErrorCodes
{
extern const int CANNOT_FORMAT_DATETIME;
extern const int ILLEGAL_TYPE_OF_ARGUMENT;
}
@ -56,25 +56,14 @@ namespace DB
{
if constexpr (nullOnErrors)
{
try
{
const GregorianDate<> gd(vec_from[i]);
gd.write(write_buffer);
(*vec_null_map_to)[i] = false;
}
catch (const Exception & e)
{
if (e.code() == ErrorCodes::CANNOT_FORMAT_DATETIME)
(*vec_null_map_to)[i] = true;
else
throw;
}
GregorianDate gd;
(*vec_null_map_to)[i] = !(gd.tryInit(vec_from[i]) && gd.tryWrite(write_buffer));
writeChar(0, write_buffer);
offsets_to[i] = write_buffer.count();
}
else
{
const GregorianDate<> gd(vec_from[i]);
GregorianDate gd(vec_from[i]);
gd.write(write_buffer);
writeChar(0, write_buffer);
offsets_to[i] = write_buffer.count();

View File

@ -65,15 +65,7 @@ private:
if (!arg_string)
return argument.type;
try
{
DataTypePtr type = DataTypeFactory::instance().get(arg_string->getDataAt(0).toString());
return type;
}
catch (const DB::Exception &)
{
return argument.type;
}
return DataTypeFactory::instance().get(arg_string->getDataAt(0).toString());
}
};

View File

@ -398,7 +398,7 @@ namespace
static Int32 daysSinceEpochFromDayOfYear(Int32 year_, Int32 day_of_year_)
{
if (!isDayOfYearValid(year_, day_of_year_))
throw Exception(ErrorCodes::CANNOT_PARSE_DATETIME, "Invalid day of year, year:{} day of year:{}", year_, day_of_year_);
throw Exception(ErrorCodes::CANNOT_PARSE_DATETIME, "Invalid day of year, out of range (year: {} day of year: {})", year_, day_of_year_);
Int32 res = daysSinceEpochFromDate(year_, 1, 1);
res += day_of_year_ - 1;
@ -408,7 +408,7 @@ namespace
static Int32 daysSinceEpochFromDate(Int32 year_, Int32 month_, Int32 day_)
{
if (!isDateValid(year_, month_, day_))
throw Exception(ErrorCodes::CANNOT_PARSE_DATETIME, "Invalid date, year:{} month:{} day:{}", year_, month_, day_);
throw Exception(ErrorCodes::CANNOT_PARSE_DATETIME, "Invalid date, out of range (year: {} month: {} day_of_month: {})", year_, month_, day_);
Int32 res = cumulativeYearDays[year_ - 1970];
res += isLeapYear(year_) ? cumulativeLeapDays[month_ - 1] : cumulativeDays[month_ - 1];

View File

@ -9,7 +9,8 @@
#include <Common/assert_cast.h>
#include <base/sleep.h>
#include <IO/WriteHelpers.h>
#include <Interpreters/Context_fwd.h>
#include <Interpreters/Context.h>
namespace ProfileEvents
{
@ -40,11 +41,17 @@ enum class FunctionSleepVariant
template <FunctionSleepVariant variant>
class FunctionSleep : public IFunction
{
private:
UInt64 max_microseconds;
public:
static constexpr auto name = variant == FunctionSleepVariant::PerBlock ? "sleep" : "sleepEachRow";
static FunctionPtr create(ContextPtr)
static FunctionPtr create(ContextPtr context)
{
return std::make_shared<FunctionSleep<variant>>(context->getSettingsRef().function_sleep_max_microseconds_per_block);
}
FunctionSleep(UInt64 max_microseconds_) : max_microseconds(max_microseconds_)
{
return std::make_shared<FunctionSleep<variant>>();
}
/// Get the name of the function.
@ -105,13 +112,19 @@ public:
if (size > 0)
{
/// When sleeping, the query cannot be cancelled. For ability to cancel query, we limit sleep time.
if (seconds > 3.0) /// The choice is arbitrary
throw Exception(ErrorCodes::TOO_SLOW, "The maximum sleep time is 3 seconds. Requested: {}", toString(seconds));
if (max_microseconds && seconds * 1e6 > max_microseconds)
throw Exception(ErrorCodes::TOO_SLOW, "The maximum sleep time is {} microseconds. Requested: {}", max_microseconds, seconds);
if (!dry_run)
{
UInt64 count = (variant == FunctionSleepVariant::PerBlock ? 1 : size);
UInt64 microseconds = static_cast<UInt64>(seconds * count * 1e6);
if (max_microseconds && microseconds > max_microseconds)
throw Exception(ErrorCodes::TOO_SLOW,
"The maximum sleep time is {} microseconds. Requested: {} microseconds per block (of size {})",
max_microseconds, microseconds, size);
sleepForMicroseconds(microseconds);
ProfileEvents::increment(ProfileEvents::SleepFunctionCalls, count);
ProfileEvents::increment(ProfileEvents::SleepFunctionMicroseconds, microseconds);

View File

@ -0,0 +1,302 @@
#include <Columns/ColumnConst.h>
#include <Columns/ColumnString.h>
#include <DataTypes/DataTypeString.h>
#include <Functions/FunctionFactory.h>
#include <Functions/FunctionHelpers.h>
#include <Functions/IFunction.h>
#include <Functions/PositionImpl.h>
#include <Interpreters/Context_fwd.h>
#include <base/find_symbols.h>
#include <Common/UTF8Helpers.h>
#include <Common/register_objects.h>
namespace DB
{
namespace ErrorCodes
{
extern const int ILLEGAL_COLUMN;
extern const int ILLEGAL_TYPE_OF_ARGUMENT;
extern const int BAD_ARGUMENTS;
}
namespace
{
template <bool is_utf8>
class FunctionSubstringIndex : public IFunction
{
public:
static constexpr auto name = is_utf8 ? "substringIndexUTF8" : "substringIndex";
static FunctionPtr create(ContextPtr) { return std::make_shared<FunctionSubstringIndex>(); }
String getName() const override { return name; }
size_t getNumberOfArguments() const override { return 3; }
bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return true; }
bool useDefaultImplementationForConstants() const override { return true; }
ColumnNumbers getArgumentsThatAreAlwaysConstant() const override { return {1}; }
DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override
{
if (!isString(arguments[0]))
throw Exception(
ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT,
"Illegal type {} of first argument of function {}, String expected",
arguments[0]->getName(),
getName());
if (!isString(arguments[1]))
throw Exception(
ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT,
"Illegal type {} of second argument of function {}, String expected",
arguments[1]->getName(),
getName());
if (!isNativeInteger(arguments[2]))
throw Exception(
ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT,
"Illegal type {} of third argument of function {}, Integer expected",
arguments[2]->getName(),
getName());
return std::make_shared<DataTypeString>();
}
ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t /*input_rows_count*/) const override
{
ColumnPtr column_string = arguments[0].column;
ColumnPtr column_delim = arguments[1].column;
ColumnPtr column_count = arguments[2].column;
const ColumnConst * column_delim_const = checkAndGetColumnConst<ColumnString>(column_delim.get());
if (!column_delim_const)
throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Second argument to {} must be a constant String", getName());
String delim = column_delim_const->getValue<String>();
if constexpr (!is_utf8)
{
if (delim.size() != 1)
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Second argument to {} must be a single character", getName());
}
else
{
if (UTF8::countCodePoints(reinterpret_cast<const UInt8 *>(delim.data()), delim.size()) != 1)
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Second argument to {} must be a single UTF-8 character", getName());
}
auto column_res = ColumnString::create();
ColumnString::Chars & vec_res = column_res->getChars();
ColumnString::Offsets & offsets_res = column_res->getOffsets();
const ColumnConst * column_string_const = checkAndGetColumnConst<ColumnString>(column_string.get());
if (column_string_const)
{
String str = column_string_const->getValue<String>();
constantVector(str, delim, column_count.get(), vec_res, offsets_res);
}
else
{
const auto * col_str = checkAndGetColumn<ColumnString>(column_string.get());
if (!col_str)
throw Exception(ErrorCodes::ILLEGAL_COLUMN, "First argument to {} must be a String", getName());
bool is_count_const = isColumnConst(*column_count);
if (is_count_const)
{
Int64 count = column_count->getInt(0);
vectorConstant(col_str, delim, count, vec_res, offsets_res);
}
else
vectorVector(col_str, delim, column_count.get(), vec_res, offsets_res);
}
return column_res;
}
protected:
static void vectorVector(
const ColumnString * str_column,
const String & delim,
const IColumn * count_column,
ColumnString::Chars & res_data,
ColumnString::Offsets & res_offsets)
{
size_t rows = str_column->size();
res_data.reserve(str_column->getChars().size() / 2);
res_offsets.reserve(rows);
std::unique_ptr<PositionCaseSensitiveUTF8::SearcherInBigHaystack> searcher
= !is_utf8 ? nullptr : std::make_unique<PositionCaseSensitiveUTF8::SearcherInBigHaystack>(delim.data(), delim.size());
for (size_t i = 0; i < rows; ++i)
{
StringRef str_ref = str_column->getDataAt(i);
Int64 count = count_column->getInt(i);
StringRef res_ref;
if constexpr (!is_utf8)
res_ref = substringIndex(str_ref, delim[0], count);
else
res_ref = substringIndexUTF8(searcher.get(), str_ref, delim, count);
appendToResultColumn(res_ref, res_data, res_offsets);
}
}
static void vectorConstant(
const ColumnString * str_column,
const String & delim,
Int64 count,
ColumnString::Chars & res_data,
ColumnString::Offsets & res_offsets)
{
size_t rows = str_column->size();
res_data.reserve(str_column->getChars().size() / 2);
res_offsets.reserve(rows);
std::unique_ptr<PositionCaseSensitiveUTF8::SearcherInBigHaystack> searcher
= !is_utf8 ? nullptr : std::make_unique<PositionCaseSensitiveUTF8::SearcherInBigHaystack>(delim.data(), delim.size());
for (size_t i = 0; i < rows; ++i)
{
StringRef str_ref = str_column->getDataAt(i);
StringRef res_ref;
if constexpr (!is_utf8)
res_ref = substringIndex(str_ref, delim[0], count);
else
res_ref = substringIndexUTF8(searcher.get(), str_ref, delim, count);
appendToResultColumn(res_ref, res_data, res_offsets);
}
}
static void constantVector(
const String & str,
const String & delim,
const IColumn * count_column,
ColumnString::Chars & res_data,
ColumnString::Offsets & res_offsets)
{
size_t rows = count_column->size();
res_data.reserve(str.size() * rows / 2);
res_offsets.reserve(rows);
std::unique_ptr<PositionCaseSensitiveUTF8::SearcherInBigHaystack> searcher
= !is_utf8 ? nullptr : std::make_unique<PositionCaseSensitiveUTF8::SearcherInBigHaystack>(delim.data(), delim.size());
StringRef str_ref{str.data(), str.size()};
for (size_t i = 0; i < rows; ++i)
{
Int64 count = count_column->getInt(i);
StringRef res_ref;
if constexpr (!is_utf8)
res_ref = substringIndex(str_ref, delim[0], count);
else
res_ref = substringIndexUTF8(searcher.get(), str_ref, delim, count);
appendToResultColumn(res_ref, res_data, res_offsets);
}
}
static void appendToResultColumn(const StringRef & res_ref, ColumnString::Chars & res_data, ColumnString::Offsets & res_offsets)
{
size_t res_offset = res_data.size();
res_data.resize(res_offset + res_ref.size + 1);
memcpy(&res_data[res_offset], res_ref.data, res_ref.size);
res_offset += res_ref.size;
res_data[res_offset] = 0;
++res_offset;
res_offsets.emplace_back(res_offset);
}
static StringRef substringIndexUTF8(
const PositionCaseSensitiveUTF8::SearcherInBigHaystack * searcher, const StringRef & str_ref, const String & delim, Int64 count)
{
if (count == 0)
return {str_ref.data, 0};
const auto * begin = reinterpret_cast<const UInt8 *>(str_ref.data);
const auto * end = reinterpret_cast<const UInt8 *>(str_ref.data + str_ref.size);
const auto * pos = begin;
if (count > 0)
{
Int64 i = 0;
while (i < count)
{
pos = searcher->search(pos, end - pos);
if (pos != end)
{
pos += delim.size();
++i;
}
else
return str_ref;
}
return {begin, static_cast<size_t>(pos - begin - delim.size())};
}
else
{
Int64 total = 0;
while (pos < end && end != (pos = searcher->search(pos, end - pos)))
{
pos += delim.size();
++total;
}
if (total + count < 0)
return str_ref;
pos = begin;
Int64 i = 0;
Int64 count_from_left = total + 1 + count;
while (i < count_from_left && pos < end && end != (pos = searcher->search(pos, end - pos)))
{
pos += delim.size();
++i;
}
return {pos, static_cast<size_t>(end - pos)};
}
}
static StringRef substringIndex(const StringRef & str_ref, char delim, Int64 count)
{
if (count == 0)
return {str_ref.data, 0};
const auto * pos = count > 0 ? str_ref.data : str_ref.data + str_ref.size - 1;
const auto * end = count > 0 ? str_ref.data + str_ref.size : str_ref.data - 1;
int d = count > 0 ? 1 : -1;
for (; count; pos += d)
{
if (pos == end)
return str_ref;
if (*pos == delim)
count -= d;
}
pos -= d;
return {
d > 0 ? str_ref.data : pos + 1, static_cast<size_t>(d > 0 ? pos - str_ref.data : str_ref.data + str_ref.size - pos - 1)};
}
};
}
REGISTER_FUNCTION(SubstringIndex)
{
factory.registerFunction<FunctionSubstringIndex<false>>(); /// substringIndex
factory.registerFunction<FunctionSubstringIndex<true>>(); /// substringIndexUTF8
factory.registerAlias("SUBSTRING_INDEX", "substringIndex", FunctionFactory::CaseInsensitive);
}
}

View File

@ -17,8 +17,6 @@ namespace DB
{
extern const int ILLEGAL_COLUMN;
extern const int ILLEGAL_TYPE_OF_ARGUMENT;
extern const int CANNOT_PARSE_INPUT_ASSERTION_FAILED;
extern const int CANNOT_PARSE_DATE;
}
template <typename Name, typename ToDataType, bool nullOnErrors>
@ -78,27 +76,18 @@ namespace DB
if constexpr (nullOnErrors)
{
try
{
const GregorianDate<> date(read_buffer);
vec_to[i] = date.toModifiedJulianDay<typename ToDataType::FieldType>();
vec_null_map_to[i] = false;
}
catch (const Exception & e)
{
if (e.code() == ErrorCodes::CANNOT_PARSE_INPUT_ASSERTION_FAILED || e.code() == ErrorCodes::CANNOT_PARSE_DATE)
{
vec_to[i] = static_cast<Int32>(0);
vec_null_map_to[i] = true;
}
else
throw;
}
GregorianDate date;
int64_t res = 0;
bool success = date.tryInit(read_buffer) && date.tryToModifiedJulianDay(res);
vec_to[i] = static_cast<typename ToDataType::FieldType>(res);
vec_null_map_to[i] = !success;
}
else
{
const GregorianDate<> date(read_buffer);
vec_to[i] = date.toModifiedJulianDay<typename ToDataType::FieldType>();
const GregorianDate date(read_buffer);
vec_to[i] = static_cast<typename ToDataType::FieldType>(date.toModifiedJulianDay());
}
}

View File

@ -156,7 +156,7 @@ namespace
{
initialize(arguments, result_type);
const auto * in = arguments.front().column.get();
const auto * in = arguments[0].column.get();
if (isColumnConst(*in))
return executeConst(arguments, result_type, input_rows_count);
@ -165,6 +165,10 @@ namespace
if (!cache.default_column && arguments.size() == 4)
default_non_const = castColumn(arguments[3], result_type);
ColumnPtr in_casted = arguments[0].column;
if (arguments.size() == 3)
in_casted = castColumn(arguments[0], result_type);
auto column_result = result_type->createColumn();
if (cache.is_empty)
{
@ -174,30 +178,30 @@ namespace
}
else if (cache.table_num_to_idx)
{
if (!executeNum<ColumnVector<UInt8>>(in, *column_result, default_non_const)
&& !executeNum<ColumnVector<UInt16>>(in, *column_result, default_non_const)
&& !executeNum<ColumnVector<UInt32>>(in, *column_result, default_non_const)
&& !executeNum<ColumnVector<UInt64>>(in, *column_result, default_non_const)
&& !executeNum<ColumnVector<Int8>>(in, *column_result, default_non_const)
&& !executeNum<ColumnVector<Int16>>(in, *column_result, default_non_const)
&& !executeNum<ColumnVector<Int32>>(in, *column_result, default_non_const)
&& !executeNum<ColumnVector<Int64>>(in, *column_result, default_non_const)
&& !executeNum<ColumnVector<Float32>>(in, *column_result, default_non_const)
&& !executeNum<ColumnVector<Float64>>(in, *column_result, default_non_const)
&& !executeNum<ColumnDecimal<Decimal32>>(in, *column_result, default_non_const)
&& !executeNum<ColumnDecimal<Decimal64>>(in, *column_result, default_non_const))
if (!executeNum<ColumnVector<UInt8>>(in, *column_result, default_non_const, *in_casted)
&& !executeNum<ColumnVector<UInt16>>(in, *column_result, default_non_const, *in_casted)
&& !executeNum<ColumnVector<UInt32>>(in, *column_result, default_non_const, *in_casted)
&& !executeNum<ColumnVector<UInt64>>(in, *column_result, default_non_const, *in_casted)
&& !executeNum<ColumnVector<Int8>>(in, *column_result, default_non_const, *in_casted)
&& !executeNum<ColumnVector<Int16>>(in, *column_result, default_non_const, *in_casted)
&& !executeNum<ColumnVector<Int32>>(in, *column_result, default_non_const, *in_casted)
&& !executeNum<ColumnVector<Int64>>(in, *column_result, default_non_const, *in_casted)
&& !executeNum<ColumnVector<Float32>>(in, *column_result, default_non_const, *in_casted)
&& !executeNum<ColumnVector<Float64>>(in, *column_result, default_non_const, *in_casted)
&& !executeNum<ColumnDecimal<Decimal32>>(in, *column_result, default_non_const, *in_casted)
&& !executeNum<ColumnDecimal<Decimal64>>(in, *column_result, default_non_const, *in_casted))
{
throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Illegal column {} of first argument of function {}", in->getName(), getName());
}
}
else if (cache.table_string_to_idx)
{
if (!executeString(in, *column_result, default_non_const))
executeContiguous(in, *column_result, default_non_const);
if (!executeString(in, *column_result, default_non_const, *in_casted))
executeContiguous(in, *column_result, default_non_const, *in_casted);
}
else if (cache.table_anything_to_idx)
{
executeAnything(in, *column_result, default_non_const);
executeAnything(in, *column_result, default_non_const, *in_casted);
}
else
throw Exception(ErrorCodes::LOGICAL_ERROR, "State of the function `transform` is not initialized");
@ -218,7 +222,7 @@ namespace
return impl->execute(args, result_type, input_rows_count);
}
void executeAnything(const IColumn * in, IColumn & column_result, const ColumnPtr default_non_const) const
void executeAnything(const IColumn * in, IColumn & column_result, const ColumnPtr default_non_const, const IColumn & in_casted) const
{
const size_t size = in->size();
const auto & table = *cache.table_anything_to_idx;
@ -236,11 +240,11 @@ namespace
else if (default_non_const)
column_result.insertFrom(*default_non_const, i);
else
column_result.insertFrom(*in, i);
column_result.insertFrom(in_casted, i);
}
}
void executeContiguous(const IColumn * in, IColumn & column_result, const ColumnPtr default_non_const) const
void executeContiguous(const IColumn * in, IColumn & column_result, const ColumnPtr default_non_const, const IColumn & in_casted) const
{
const size_t size = in->size();
const auto & table = *cache.table_string_to_idx;
@ -255,12 +259,12 @@ namespace
else if (default_non_const)
column_result.insertFrom(*default_non_const, i);
else
column_result.insertFrom(*in, i);
column_result.insertFrom(in_casted, i);
}
}
template <typename T>
bool executeNum(const IColumn * in_untyped, IColumn & column_result, const ColumnPtr default_non_const) const
bool executeNum(const IColumn * in_untyped, IColumn & column_result, const ColumnPtr default_non_const, const IColumn & in_casted) const
{
const auto * const in = checkAndGetColumn<T>(in_untyped);
if (!in)
@ -297,7 +301,7 @@ namespace
else if (default_non_const)
column_result.insertFrom(*default_non_const, i);
else
column_result.insertFrom(*in, i);
column_result.insertFrom(in_casted, i);
}
}
return true;
@ -451,7 +455,7 @@ namespace
}
}
bool executeString(const IColumn * in_untyped, IColumn & column_result, const ColumnPtr default_non_const) const
bool executeString(const IColumn * in_untyped, IColumn & column_result, const ColumnPtr default_non_const, const IColumn & in_casted) const
{
const auto * const in = checkAndGetColumn<ColumnString>(in_untyped);
if (!in)
@ -488,7 +492,7 @@ namespace
else if (default_non_const)
column_result.insertFrom(*default_non_const, 0);
else
column_result.insertFrom(*in, i);
column_result.insertFrom(in_casted, i);
}
}
return true;
@ -654,13 +658,13 @@ namespace
std::unique_ptr<StringToIdx> table_string_to_idx;
std::unique_ptr<AnythingToIdx> table_anything_to_idx;
bool is_empty = false;
ColumnPtr from_column;
ColumnPtr to_column;
ColumnPtr default_column;
std::atomic<bool> initialized{false};
bool is_empty = false;
bool initialized = false;
std::mutex mutex;
};
@ -693,13 +697,12 @@ namespace
/// Can be called from different threads. It works only on the first call.
void initialize(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type) const
{
std::lock_guard lock(cache.mutex);
if (cache.initialized)
return;
const DataTypePtr & from_type = arguments[0].type;
std::lock_guard lock(cache.mutex);
if (from_type->onlyNull())
{
cache.is_empty = true;

View File

@ -1,5 +1,4 @@
#include <Columns/ColumnTuple.h>
#include <Columns/ColumnVector.h>
#include <DataTypes/DataTypeTuple.h>
#include <DataTypes/DataTypesNumber.h>
#include <Functions/FunctionFactory.h>
@ -86,7 +85,7 @@ public:
auto plus_elem = plus->build({left_type, right_type});
res_type = plus_elem->getResultType();
}
catch (DB::Exception & e)
catch (Exception & e)
{
e.addMessage("While executing function {} for tuple element {}", getName(), i);
throw;

View File

@ -95,7 +95,7 @@ public:
auto elem_func = func->build(ColumnsWithTypeAndName{left, right});
types[i] = elem_func->getResultType();
}
catch (DB::Exception & e)
catch (Exception & e)
{
e.addMessage("While executing function {} for tuple element {}", getName(), i);
throw;
@ -181,7 +181,7 @@ public:
auto elem_negate = negate->build(ColumnsWithTypeAndName{cur});
types[i] = elem_negate->getResultType();
}
catch (DB::Exception & e)
catch (Exception & e)
{
e.addMessage("While executing function {} for tuple element {}", getName(), i);
throw;
@ -258,7 +258,7 @@ public:
auto elem_func = func->build(ColumnsWithTypeAndName{cur, p_column});
types[i] = elem_func->getResultType();
}
catch (DB::Exception & e)
catch (Exception & e)
{
e.addMessage("While executing function {} for tuple element {}", getName(), i);
throw;
@ -363,7 +363,7 @@ public:
auto plus_elem = plus->build({left_type, right_type});
res_type = plus_elem->getResultType();
}
catch (DB::Exception & e)
catch (Exception & e)
{
e.addMessage("While executing function {} for tuple element {}", getName(), i);
throw;
@ -467,7 +467,7 @@ public:
auto plus_elem = plus->build({left, right});
res_type = plus_elem->getResultType();
}
catch (DB::Exception & e)
catch (Exception & e)
{
e.addMessage("While executing function {} for tuple element {}", getName(), i);
throw;
@ -740,7 +740,7 @@ public:
auto plus_elem = plus->build({left_type, right_type});
res_type = plus_elem->getResultType();
}
catch (DB::Exception & e)
catch (Exception & e)
{
e.addMessage("While executing function {} for tuple element {}", getName(), i);
throw;
@ -842,7 +842,7 @@ public:
auto plus_elem = plus->build({left_type, right_type});
res_type = plus_elem->getResultType();
}
catch (DB::Exception & e)
catch (Exception & e)
{
e.addMessage("While executing function {} for tuple element {}", getName(), i);
throw;
@ -993,7 +993,7 @@ public:
auto max_elem = max->build({left_type, right_type});
res_type = max_elem->getResultType();
}
catch (DB::Exception & e)
catch (Exception & e)
{
e.addMessage("While executing function {} for tuple element {}", getName(), i);
throw;
@ -1103,7 +1103,7 @@ public:
auto plus_elem = plus->build({left_type, right_type});
res_type = plus_elem->getResultType();
}
catch (DB::Exception & e)
catch (Exception & e)
{
e.addMessage("While executing function {} for tuple element {}", getName(), i);
throw;

View File

@ -69,12 +69,14 @@ void ProgressValues::write(WriteBuffer & out, UInt64 client_revision) const
}
}
void ProgressValues::writeJSON(WriteBuffer & out) const
void ProgressValues::writeJSON(WriteBuffer & out, bool add_braces) const
{
/// Numbers are written in double quotes (as strings) to avoid loss of precision
/// of 64-bit integers after interpretation by JavaScript.
writeCString("{\"read_rows\":\"", out);
if (add_braces)
writeCString("{", out);
writeCString("\"read_rows\":\"", out);
writeText(read_rows, out);
writeCString("\",\"read_bytes\":\"", out);
writeText(read_bytes, out);
@ -88,7 +90,9 @@ void ProgressValues::writeJSON(WriteBuffer & out) const
writeText(result_rows, out);
writeCString("\",\"result_bytes\":\"", out);
writeText(result_bytes, out);
writeCString("\"}", out);
writeCString("\"", out);
if (add_braces)
writeCString("}", out);
}
bool Progress::incrementPiecewiseAtomically(const Progress & rhs)
@ -230,9 +234,9 @@ void Progress::write(WriteBuffer & out, UInt64 client_revision) const
getValues().write(out, client_revision);
}
void Progress::writeJSON(WriteBuffer & out) const
void Progress::writeJSON(WriteBuffer & out, bool add_braces) const
{
getValues().writeJSON(out);
getValues().writeJSON(out, add_braces);
}
}

View File

@ -32,7 +32,7 @@ struct ProgressValues
void read(ReadBuffer & in, UInt64 server_revision);
void write(WriteBuffer & out, UInt64 client_revision) const;
void writeJSON(WriteBuffer & out) const;
void writeJSON(WriteBuffer & out, bool add_braces = true) const;
};
struct ReadProgress
@ -119,7 +119,7 @@ struct Progress
void write(WriteBuffer & out, UInt64 client_revision) const;
/// Progress in JSON format (single line, without whitespaces) is used in HTTP headers.
void writeJSON(WriteBuffer & out) const;
void writeJSON(WriteBuffer & out, bool add_braces = true) const;
/// Each value separately is changed atomically (but not whole object).
bool incrementPiecewiseAtomically(const Progress & rhs);

View File

@ -305,12 +305,12 @@ void ReadWriteBufferFromHTTPBase<UpdatableSessionPtr>::callWithRedirects(Poco::N
current_session = session;
call(current_session, response, method_, throw_on_all_errors, for_object_info);
Poco::URI prev_uri = uri;
saved_uri_redirect = uri;
while (isRedirect(response.getStatus()))
{
Poco::URI uri_redirect = getUriAfterRedirect(prev_uri, response);
prev_uri = uri_redirect;
Poco::URI uri_redirect = getUriAfterRedirect(*saved_uri_redirect, response);
saved_uri_redirect = uri_redirect;
if (remote_host_filter)
remote_host_filter->checkURL(uri_redirect);

View File

@ -258,7 +258,7 @@ void PocoHTTPClient::addMetric(const Aws::Http::HttpRequest & request, S3MetricT
void PocoHTTPClient::makeRequestInternal(
Aws::Http::HttpRequest & request,
std::shared_ptr<PocoHTTPResponse> & response,
Aws::Utils::RateLimits::RateLimiterInterface * readLimiter ,
Aws::Utils::RateLimits::RateLimiterInterface * readLimiter,
Aws::Utils::RateLimits::RateLimiterInterface * writeLimiter) const
{
/// Most sessions in pool are already connected and it is not possible to set proxy host/port to a connected session.

View File

@ -292,7 +292,7 @@ struct AggregationMethodStringNoCache
{
}
using State = ColumnsHashing::HashMethodString<typename Data::value_type, Mapped, true, false, false ,nullable>;
using State = ColumnsHashing::HashMethodString<typename Data::value_type, Mapped, true, false, false, nullable>;
static const bool low_cardinality_optimization = false;
static const bool one_key_nullable_optimization = nullable;

View File

@ -149,9 +149,10 @@ void AsynchronousInsertQueue::InsertData::Entry::finish(std::exception_ptr excep
}
}
AsynchronousInsertQueue::AsynchronousInsertQueue(ContextPtr context_, size_t pool_size_)
AsynchronousInsertQueue::AsynchronousInsertQueue(ContextPtr context_, size_t pool_size_, bool flush_on_shutdown_)
: WithContext(context_)
, pool_size(pool_size_)
, flush_on_shutdown(flush_on_shutdown_)
, queue_shards(pool_size)
, pool(CurrentMetrics::AsynchronousInsertThreads, CurrentMetrics::AsynchronousInsertThreadsActive, pool_size)
{
@ -164,8 +165,6 @@ AsynchronousInsertQueue::AsynchronousInsertQueue(ContextPtr context_, size_t poo
AsynchronousInsertQueue::~AsynchronousInsertQueue()
{
/// TODO: add a setting for graceful shutdown.
LOG_TRACE(log, "Shutting down the asynchronous insertion queue");
shutdown = true;
@ -177,17 +176,18 @@ AsynchronousInsertQueue::~AsynchronousInsertQueue()
assert(dump_by_first_update_threads[i].joinable());
dump_by_first_update_threads[i].join();
if (flush_on_shutdown)
{
for (auto & [_, elem] : shard.queue)
scheduleDataProcessingJob(elem.key, std::move(elem.data), getContext());
}
else
{
std::lock_guard lock(shard.mutex);
for (auto & [_, elem] : shard.queue)
{
for (const auto & entry : elem.data->entries)
{
entry->finish(std::make_exception_ptr(Exception(
ErrorCodes::TIMEOUT_EXCEEDED, "Wait for async insert timeout exceeded)")));
}
}
}
}
@ -232,7 +232,10 @@ AsynchronousInsertQueue::push(ASTPtr query, ContextPtr query_context)
/// to avoid buffering of huge amount of data in memory.
auto read_buf = getReadBufferFromASTInsertQuery(query);
LimitReadBuffer limit_buf(*read_buf, settings.async_insert_max_data_size, /* throw_exception */ false, /* exact_limit */ {});
LimitReadBuffer limit_buf(
*read_buf, settings.async_insert_max_data_size,
/*throw_exception=*/ false, /*exact_limit=*/ {});
WriteBufferFromString write_buf(bytes);
copyData(limit_buf, write_buf);
@ -284,18 +287,19 @@ AsynchronousInsertQueue::push(ASTPtr query, ContextPtr query_context)
assert(data);
data->size_in_bytes += entry_data_size;
++data->query_number;
data->entries.emplace_back(entry);
insert_future = entry->getFuture();
LOG_TRACE(log, "Have {} pending inserts with total {} bytes of data for query '{}'",
data->entries.size(), data->size_in_bytes, key.query_str);
bool has_enough_bytes = data->size_in_bytes >= key.settings.async_insert_max_data_size;
bool has_enough_queries = data->entries.size() >= key.settings.async_insert_max_query_number && key.settings.async_insert_deduplicate;
/// Here we check whether we hit the limit on maximum data size in the buffer.
/// And use setting from query context.
/// It works, because queries with the same set of settings are already grouped together.
if (data->size_in_bytes >= key.settings.async_insert_max_data_size
|| (data->query_number >= key.settings.async_insert_max_query_number && key.settings.async_insert_deduplicate))
if (!flush_stopped && (has_enough_bytes || has_enough_queries))
{
data_to_process = std::move(data);
shard.iterators.erase(it);
@ -319,6 +323,51 @@ AsynchronousInsertQueue::push(ASTPtr query, ContextPtr query_context)
};
}
void AsynchronousInsertQueue::flushAll()
{
std::lock_guard flush_lock(flush_mutex);
LOG_DEBUG(log, "Requested to flush asynchronous insert queue");
/// Disable background flushes to avoid adding new elements to the queue.
flush_stopped = true;
std::vector<Queue> queues_to_flush(pool_size);
for (size_t i = 0; i < pool_size; ++i)
{
std::lock_guard lock(queue_shards[i].mutex);
queues_to_flush[i] = std::move(queue_shards[i].queue);
queue_shards[i].iterators.clear();
}
size_t total_queries = 0;
size_t total_bytes = 0;
size_t total_entries = 0;
for (auto & queue : queues_to_flush)
{
total_queries += queue.size();
for (auto & [_, entry] : queue)
{
total_bytes += entry.data->size_in_bytes;
total_entries += entry.data->entries.size();
scheduleDataProcessingJob(entry.key, std::move(entry.data), getContext());
}
}
/// Note that jobs scheduled before the call of 'flushAll' are not counted here.
LOG_DEBUG(log,
"Will wait for finishing of {} flushing jobs (about {} inserts, {} bytes, {} distinct queries)",
pool.active(), total_entries, total_bytes, total_queries);
/// Wait until all jobs are finished. That includes also jobs
/// that were scheduled before the call of 'flushAll'.
pool.wait();
LOG_DEBUG(log, "Finished flushing of asynchronous insert queue");
flush_stopped = false;
}
void AsynchronousInsertQueue::processBatchDeadlines(size_t shard_num)
{
auto & shard = queue_shards[shard_num];
@ -344,6 +393,9 @@ void AsynchronousInsertQueue::processBatchDeadlines(size_t shard_num)
if (shutdown)
return;
if (flush_stopped)
continue;
const auto now = std::chrono::steady_clock::now();
while (true)

View File

@ -19,7 +19,7 @@ class AsynchronousInsertQueue : public WithContext
public:
using Milliseconds = std::chrono::milliseconds;
AsynchronousInsertQueue(ContextPtr context_, size_t pool_size_);
AsynchronousInsertQueue(ContextPtr context_, size_t pool_size_, bool flush_on_shutdown_);
~AsynchronousInsertQueue();
struct PushResult
@ -40,6 +40,8 @@ public:
std::unique_ptr<ReadBuffer> insert_data_buffer;
};
/// Force flush the whole queue.
void flushAll();
PushResult push(ASTPtr query, ContextPtr query_context);
size_t getPoolSize() const { return pool_size; }
@ -100,9 +102,7 @@ private:
using EntryPtr = std::shared_ptr<Entry>;
std::list<EntryPtr> entries;
size_t size_in_bytes = 0;
size_t query_number = 0;
};
using InsertDataPtr = std::unique_ptr<InsertData>;
@ -130,6 +130,8 @@ private:
};
const size_t pool_size;
const bool flush_on_shutdown;
std::vector<QueueShard> queue_shards;
/// Logic and events behind queue are as follows:
@ -141,6 +143,10 @@ private:
/// (async_insert_max_data_size setting). If so, then again we dump the data.
std::atomic<bool> shutdown{false};
std::atomic<bool> flush_stopped{false};
/// A mutex that prevents concurrent forced flushes of queue.
mutable std::mutex flush_mutex;
/// Dump the data only inside this pool.
ThreadPool pool;

View File

@ -144,12 +144,6 @@ public:
UInt32 shard_index_ = 0,
UInt32 replica_index_ = 0);
Address(
const String & host_port_,
const ClusterConnectionParameters & params,
UInt32 shard_index_,
UInt32 replica_index_);
Address(
const DatabaseReplicaInfo & info,
const ClusterConnectionParameters & params,

View File

@ -124,6 +124,7 @@ void SelectStreamFactory::createForShard(
{
remote_shards.emplace_back(Shard{
.query = query_ast,
.main_table = main_table,
.header = header,
.shard_info = shard_info,
.lazy = lazy,

View File

@ -50,6 +50,8 @@ public:
{
/// Query and header may be changed depending on shard.
ASTPtr query;
/// Used to check the table existence on remote node
StorageID main_table;
Block header;
Cluster::ShardInfo shard_info;

View File

@ -35,7 +35,12 @@ namespace ErrorCodes
namespace ClusterProxy
{
ContextMutablePtr updateSettingsForCluster(const Cluster & cluster, ContextPtr context, const Settings & settings, const StorageID & main_table, const SelectQueryInfo * query_info, Poco::Logger * log)
ContextMutablePtr updateSettingsForCluster(bool interserver_mode,
ContextPtr context,
const Settings & settings,
const StorageID & main_table,
const SelectQueryInfo * query_info,
Poco::Logger * log)
{
Settings new_settings = settings;
new_settings.queue_max_wait_ms = Cluster::saturate(new_settings.queue_max_wait_ms, settings.max_execution_time);
@ -43,7 +48,7 @@ ContextMutablePtr updateSettingsForCluster(const Cluster & cluster, ContextPtr c
/// If "secret" (in remote_servers) is not in use,
/// user on the shard is not the same as the user on the initiator,
/// hence per-user limits should not be applied.
if (cluster.getSecret().empty())
if (!interserver_mode)
{
/// Does not matter on remote servers, because queries are sent under different user.
new_settings.max_concurrent_queries_for_user = 0;
@ -170,17 +175,15 @@ void executeQuery(
std::vector<QueryPlanPtr> plans;
SelectStreamFactory::Shards remote_shards;
auto new_context = updateSettingsForCluster(*query_info.getCluster(), context, settings, main_table, &query_info, log);
auto new_context = updateSettingsForCluster(!query_info.getCluster()->getSecret().empty(), context, settings, main_table, &query_info, log);
new_context->increaseDistributedDepth();
size_t shards = query_info.getCluster()->getShardCount();
for (const auto & shard_info : query_info.getCluster()->getShardsInfo())
{
ASTPtr query_ast_for_shard;
if (query_info.optimized_cluster && settings.optimize_skip_unused_shards_rewrite_in && shards > 1)
ASTPtr query_ast_for_shard = query_ast->clone();
if (sharding_key_expr && query_info.optimized_cluster && settings.optimize_skip_unused_shards_rewrite_in && shards > 1)
{
query_ast_for_shard = query_ast->clone();
OptimizeShardingKeyRewriteInVisitor::Data visitor_data{
sharding_key_expr,
sharding_key_expr->getSampleBlock().getByPosition(0).type,
@ -191,8 +194,6 @@ void executeQuery(
OptimizeShardingKeyRewriteInVisitor visitor(visitor_data);
visitor.visit(query_ast_for_shard);
}
else
query_ast_for_shard = query_ast->clone();
if (shard_filter_generator)
{

View File

@ -34,8 +34,12 @@ class SelectStreamFactory;
/// - optimize_skip_unused_shards_nesting
///
/// @return new Context with adjusted settings
ContextMutablePtr updateSettingsForCluster(
const Cluster & cluster, ContextPtr context, const Settings & settings, const StorageID & main_table, const SelectQueryInfo * query_info = nullptr, Poco::Logger * log = nullptr);
ContextMutablePtr updateSettingsForCluster(bool interserver_mode,
ContextPtr context,
const Settings & settings,
const StorageID & main_table,
const SelectQueryInfo * query_info = nullptr,
Poco::Logger * log = nullptr);
using AdditionalShardFilterGenerator = std::function<ASTPtr(uint64_t)>;
/// Execute a distributed query, creating a query plan, from which the query pipeline can be built.

View File

@ -551,7 +551,7 @@ void DDLWorker::processTask(DDLTaskBase & task, const ZooKeeperPtr & zookeeper)
chassert(!task.completely_processed);
/// Setup tracing context on current thread for current DDL
OpenTelemetry::TracingContextHolder tracing_ctx_holder(__PRETTY_FUNCTION__ ,
OpenTelemetry::TracingContextHolder tracing_ctx_holder(__PRETTY_FUNCTION__,
task.entry.tracing_context,
this->context->getOpenTelemetrySpanLog());
tracing_ctx_holder.root_span.kind = OpenTelemetry::CONSUMER;

View File

@ -361,7 +361,7 @@ BlockIO InterpreterDropQuery::executeToDatabaseImpl(const ASTDropQuery & query,
std::vector<std::pair<String, bool>> tables_to_drop;
for (auto iterator = database->getTablesIterator(table_context); iterator->isValid(); iterator->next())
{
iterator->table()->flush();
iterator->table()->flushAndPrepareForShutdown();
tables_to_drop.push_back({iterator->name(), iterator->table()->isDictionary()});
}

View File

@ -193,7 +193,7 @@ AccessRightsElements InterpreterRenameQuery::getRequiredAccess(InterpreterRename
required_access.emplace_back(AccessType::CREATE_TABLE | AccessType::INSERT, elem.to.getDatabase(), elem.to.getTable());
if (rename.exchange)
{
required_access.emplace_back(AccessType::CREATE_TABLE | AccessType::INSERT , elem.from.getDatabase(), elem.from.getTable());
required_access.emplace_back(AccessType::CREATE_TABLE | AccessType::INSERT, elem.from.getDatabase(), elem.from.getTable());
required_access.emplace_back(AccessType::SELECT | AccessType::DROP_TABLE, elem.to.getDatabase(), elem.to.getTable());
}
}

View File

@ -2274,8 +2274,7 @@ std::optional<UInt64> InterpreterSelectQuery::getTrivialCount(UInt64 max_paralle
&& !settings.allow_experimental_query_deduplication
&& !settings.empty_result_for_aggregation_by_empty_set
&& storage
&& storage->getName() != "MaterializedMySQL"
&& !storage->hasLightweightDeletedMask()
&& storage->supportsTrivialCountOptimization()
&& query_info.filter_asts.empty()
&& query_analyzer->hasAggregation()
&& (query_analyzer->aggregates().size() == 1)

View File

@ -38,6 +38,7 @@
#include <Interpreters/AsynchronousInsertLog.h>
#include <Interpreters/JIT/CompiledExpressionCache.h>
#include <Interpreters/TransactionLog.h>
#include <Interpreters/AsynchronousInsertQueue.h>
#include <BridgeHelper/CatBoostLibraryBridgeHelper.h>
#include <Access/AccessControl.h>
#include <Access/ContextAccess.h>
@ -555,6 +556,17 @@ BlockIO InterpreterSystemQuery::execute()
);
break;
}
case Type::FLUSH_ASYNC_INSERT_QUEUE:
{
getContext()->checkAccess(AccessType::SYSTEM_FLUSH_ASYNC_INSERT_QUEUE);
auto * queue = getContext()->getAsynchronousInsertQueue();
if (!queue)
throw Exception(ErrorCodes::BAD_ARGUMENTS,
"Cannot flush asynchronous insert queue because it is not initialized");
queue->flushAll();
break;
}
case Type::STOP_LISTEN_QUERIES:
case Type::START_LISTEN_QUERIES:
throw Exception(ErrorCodes::NOT_IMPLEMENTED, "{} is not supported yet", query.type);
@ -1149,6 +1161,11 @@ AccessRightsElements InterpreterSystemQuery::getRequiredAccessForDDLOnCluster()
required_access.emplace_back(AccessType::SYSTEM_FLUSH_LOGS);
break;
}
case Type::FLUSH_ASYNC_INSERT_QUEUE:
{
required_access.emplace_back(AccessType::SYSTEM_FLUSH_ASYNC_INSERT_QUEUE);
break;
}
case Type::RESTART_DISK:
{
required_access.emplace_back(AccessType::SYSTEM_RESTART_DISK);

View File

@ -86,9 +86,16 @@ static void dumpMemoryTracker(ProfileEventsSnapshot const & snapshot, DB::Mutabl
columns[i++]->insert(static_cast<UInt64>(snapshot.current_time));
columns[i++]->insert(static_cast<UInt64>(snapshot.thread_id));
columns[i++]->insert(Type::GAUGE);
columns[i++]->insertData(MemoryTracker::USAGE_EVENT_NAME, strlen(MemoryTracker::USAGE_EVENT_NAME));
columns[i++]->insert(snapshot.memory_usage);
columns[i]->insert(snapshot.memory_usage);
i = 0;
columns[i++]->insertData(host_name.data(), host_name.size());
columns[i++]->insert(static_cast<UInt64>(snapshot.current_time));
columns[i++]->insert(static_cast<UInt64>(snapshot.thread_id));
columns[i++]->insert(Type::GAUGE);
columns[i++]->insertData(MemoryTracker::PEAK_USAGE_EVENT_NAME, strlen(MemoryTracker::PEAK_USAGE_EVENT_NAME));
columns[i]->insert(snapshot.peak_memory_usage);
}
void getProfileEvents(
@ -121,6 +128,7 @@ void getProfileEvents(
group_snapshot.thread_id = 0;
group_snapshot.current_time = time(nullptr);
group_snapshot.memory_usage = thread_group->memory_tracker.get();
group_snapshot.peak_memory_usage = thread_group->memory_tracker.getPeak();
auto group_counters = thread_group->performance_counters.getPartiallyAtomicSnapshot();
auto prev_group_snapshot = last_sent_snapshots.find(0);
group_snapshot.counters =

View File

@ -16,6 +16,7 @@ struct ProfileEventsSnapshot
UInt64 thread_id;
CountersIncrement counters;
Int64 memory_usage;
Int64 peak_memory_usage;
time_t current_time;
};

View File

@ -482,7 +482,7 @@ CSN TransactionLog::finalizeCommittedTransaction(MergeTreeTransaction * txn, CSN
bool removed = running_list.erase(txn->tid.getHash());
if (!removed)
{
LOG_ERROR(log , "I's a bug: TID {} {} doesn't exist", txn->tid.getHash(), txn->tid);
LOG_ERROR(log, "It's a bug: TID {} {} doesn't exist", txn->tid.getHash(), txn->tid);
abort();
}
}

View File

@ -289,13 +289,6 @@ void optimizeDuplicatesInOrderBy(const ASTSelectQuery * select_query)
elems = std::move(unique_elems);
}
/// Optimize duplicate ORDER BY
void optimizeDuplicateOrderBy(ASTPtr & query, ContextPtr context)
{
DuplicateOrderByVisitor::Data order_by_data{context};
DuplicateOrderByVisitor(order_by_data).visit(query);
}
/// Return simple subselect (without UNIONs or JOINs or SETTINGS) if any
const ASTSelectQuery * getSimpleSubselect(const ASTSelectQuery & select)
{
@ -379,41 +372,6 @@ std::unordered_set<String> getDistinctNames(const ASTSelectQuery & select)
return names;
}
/// Remove DISTINCT from query if columns are known as DISTINCT from subquery
void optimizeDuplicateDistinct(ASTSelectQuery & select)
{
if (!select.select() || select.select()->children.empty())
return;
const ASTSelectQuery * subselect = getSimpleSubselect(select);
if (!subselect)
return;
std::unordered_set<String> distinct_names = getDistinctNames(*subselect);
std::unordered_set<std::string_view> selected_names;
/// Check source column names from select list (ignore aliases and table names)
for (const auto & id : select.select()->children)
{
const auto * identifier = id->as<ASTIdentifier>();
if (!identifier)
return;
const String & name = identifier->shortName();
if (!distinct_names.contains(name))
return; /// Not a distinct column, keep DISTINCT for it.
selected_names.emplace(name);
}
/// select columns list != distinct columns list
/// SELECT DISTINCT a FROM (SELECT DISTINCT a, b FROM ...)) -- cannot remove DISTINCT
if (selected_names.size() != distinct_names.size())
return;
select.distinct = false;
}
/// Replace monotonous functions in ORDER BY if they don't participate in GROUP BY expression,
/// has a single argument and not an aggregate functions.
void optimizeMonotonousFunctionsInOrderBy(ASTSelectQuery * select_query, ContextPtr context,
@ -830,17 +788,6 @@ void TreeOptimizer::apply(ASTPtr & query, TreeRewriterResult & result,
&& !select_query->group_by_with_cube)
optimizeAggregateFunctionsOfGroupByKeys(select_query, query);
/// Remove duplicate ORDER BY and DISTINCT from subqueries.
if (settings.optimize_duplicate_order_by_and_distinct)
{
optimizeDuplicateOrderBy(query, context);
/// DISTINCT has special meaning in Distributed query with enabled distributed_group_by_no_merge
/// TODO: disable Distributed/remote() tables only
if (!settings.distributed_group_by_no_merge)
optimizeDuplicateDistinct(*select_query);
}
/// Remove functions from ORDER BY if its argument is also in ORDER BY
if (settings.optimize_redundant_functions_in_order_by)
optimizeRedundantFunctionsInOrderBy(select_query, context);

View File

@ -71,6 +71,7 @@ public:
START_REPLICATION_QUEUES,
FLUSH_LOGS,
FLUSH_DISTRIBUTED,
FLUSH_ASYNC_INSERT_QUEUE,
STOP_DISTRIBUTED_SENDS,
START_DISTRIBUTED_SENDS,
START_THREAD_FUZZER,

View File

@ -31,10 +31,10 @@ protected:
not_endswith,
endswith_cs,
not_endswith_cs,
equal, //=~
not_equal,//!~
equal_cs, //=
not_equal_cs,//!=
equal, /// =~
not_equal, /// !~
equal_cs, /// =
not_equal_cs, /// !=
has,
not_has,
has_all,
@ -49,10 +49,10 @@ protected:
not_hassuffix,
hassuffix_cs,
not_hassuffix_cs,
in_cs, //in
not_in_cs, //!in
in, //in~
not_in ,//!in~
in_cs, /// in
not_in_cs, /// !in
in, /// in~
not_in, /// !in~
matches_regex,
startswith,
not_startswith,

Some files were not shown because too many files have changed in this diff Show More